fix: add missing request handler for rss feeds (#2006)

* fix: add missing request handler for rss feeds

* fix: ci issues
This commit is contained in:
Meier Lukas
2025-01-22 18:23:49 +01:00
committed by GitHub
parent b413e2ec7d
commit 99c8e5b609
9 changed files with 186 additions and 159 deletions

View File

@@ -1,12 +1,36 @@
import type { RssFeed } from "@homarr/cron-jobs";
import { createItemChannel } from "@homarr/redis";
import { rssFeedsRequestHandler } from "@homarr/request-handler/rss-feeds";
import { z } from "@homarr/validation";
import { createOneItemMiddleware } from "../../middlewares/item";
import { createTRPCRouter, publicProcedure } from "../../trpc";
export const rssFeedRouter = createTRPCRouter({
getFeeds: publicProcedure.unstable_concat(createOneItemMiddleware("rssFeed")).query(async ({ input }) => {
const channel = createItemChannel<RssFeed[]>(input.itemId);
return await channel.getAsync();
}),
getFeeds: publicProcedure
.input(
z.object({
urls: z.array(z.string()),
maximumAmountPosts: z.number(),
}),
)
.query(async ({ input }) => {
const rssFeeds = await Promise.all(
input.urls.map(async (url) => {
const innerHandler = rssFeedsRequestHandler.handler({
url,
count: input.maximumAmountPosts,
});
return await innerHandler.getCachedOrUpdatedDataAsync({
forceUpdate: false,
});
}),
);
return rssFeeds
.flatMap((rssFeed) => rssFeed.data.entries)
.slice(0, input.maximumAmountPosts)
.sort((entryA, entryB) => {
return entryA.published && entryB.published
? new Date(entryB.published).getTime() - new Date(entryA.published).getTime()
: 0;
});
}),
});

View File

@@ -22,7 +22,6 @@
},
"prettier": "@homarr/prettier-config",
"dependencies": {
"@extractus/feed-extractor": "^7.1.3",
"@homarr/analytics": "workspace:^0.1.0",
"@homarr/auth": "workspace:^0.1.0",
"@homarr/common": "workspace:^0.1.0",

View File

@@ -11,7 +11,6 @@ import { mediaServerJob } from "./jobs/integrations/media-server";
import { mediaTranscodingJob } from "./jobs/integrations/media-transcoding";
import { minecraftServerStatusJob } from "./jobs/minecraft-server-status";
import { pingJob } from "./jobs/ping";
import type { RssFeed } from "./jobs/rss-feeds";
import { rssFeedsJob } from "./jobs/rss-feeds";
import { sessionCleanupJob } from "./jobs/session-cleanup";
import { updateCheckerJob } from "./jobs/update-checker";
@@ -38,4 +37,3 @@ export const jobGroup = createCronJobGroup({
});
export type JobGroupKeys = ReturnType<(typeof jobGroup)["getKeys"]>[number];
export type { RssFeed };

View File

@@ -1,139 +1,36 @@
import type { FeedData, FeedEntry } from "@extractus/feed-extractor";
import { extract } from "@extractus/feed-extractor";
import SuperJSON from "superjson";
import type { Modify } from "@homarr/common/types";
import { EVERY_5_MINUTES } from "@homarr/cron-jobs-core/expressions";
import { db, eq } from "@homarr/db";
import { items } from "@homarr/db/schema";
import { logger } from "@homarr/log";
import { createItemChannel } from "@homarr/redis";
import { z } from "@homarr/validation";
// This import is done that way to avoid circular dependencies.
import { rssFeedsRequestHandler } from "@homarr/request-handler/rss-feeds";
import type { WidgetComponentProps } from "../../../widgets";
import { createCronJob } from "../lib";
export const rssFeedsJob = createCronJob("rssFeeds", EVERY_5_MINUTES).withCallback(async () => {
const itemsForIntegration = await db.query.items.findMany({
const rssItems = await db.query.items.findMany({
where: eq(items.kind, "rssFeed"),
});
for (const item of itemsForIntegration) {
const options = SuperJSON.parse<WidgetComponentProps<"rssFeed">["options"]>(item.options);
const itemOptions = rssItems.map((item) => SuperJSON.parse<WidgetComponentProps<"rssFeed">["options"]>(item.options));
const feeds = await Promise.all(
options.feedUrls.map(async (feedUrl) => ({
feedUrl,
feed: (await extract(feedUrl, {
getExtraEntryFields: (feedEntry) => {
const media = attemptGetImageFromEntry(feedUrl, feedEntry);
if (!media) {
return {};
}
return {
enclosure: media,
};
},
})) as ExtendedFeedData,
})),
);
const channel = createItemChannel<RssFeed[]>(item.id);
await channel.publishAndUpdateLastStateAsync(feeds);
for (const option of itemOptions) {
const maxAmountPosts = typeof option.maximumAmountPosts === "number" ? option.maximumAmountPosts : 100;
for (const url of option.feedUrls) {
try {
const innerHandler = rssFeedsRequestHandler.handler({
url,
count: maxAmountPosts,
});
await innerHandler.getCachedOrUpdatedDataAsync({
forceUpdate: true,
});
} catch (error) {
logger.error("Failed to update RSS feed", { url, error });
}
}
}
});
const attemptGetImageFromEntry = (feedUrl: string, entry: object) => {
const media = getFirstMediaProperty(entry);
if (media !== null) {
return media;
}
return getImageFromStringAsFallback(feedUrl, JSON.stringify(entry));
};
const getImageFromStringAsFallback = (feedUrl: string, content: string) => {
const regex = /https?:\/\/\S+?\.(jpg|jpeg|png|gif|bmp|svg|webp|tiff)/i;
const result = regex.exec(content);
if (result == null) {
return null;
}
console.debug(
`Falling back to regex image search for '${feedUrl}'. Found ${result.length} matches in content: ${content}`,
);
return result[0];
};
const mediaProperties = [
{
path: ["enclosure", "@_url"],
},
{
path: ["media:content", "@_url"],
},
];
/**
* The RSS and Atom standards are poorly adhered to in most of the web.
* We want to show pretty background images on the posts and therefore need to extract
* the enclosure (aka. media images). This function uses the dynamic properties defined above
* to search through the possible paths and detect valid image URLs.
* @param feedObject The object to scan for.
* @returns the value of the first path that is found within the object
*/
const getFirstMediaProperty = (feedObject: object) => {
for (const mediaProperty of mediaProperties) {
let propertyIndex = 0;
let objectAtPath: object = feedObject;
while (propertyIndex < mediaProperty.path.length) {
const key = mediaProperty.path[propertyIndex];
if (key === undefined) {
break;
}
const propertyEntries = Object.entries(objectAtPath);
const propertyEntry = propertyEntries.find(([entryKey]) => entryKey === key);
if (!propertyEntry) {
break;
}
// eslint-disable-next-line @typescript-eslint/no-unsafe-assignment
const [_, propertyEntryValue] = propertyEntry;
objectAtPath = propertyEntryValue as object;
propertyIndex++;
}
const validationResult = z.string().url().safeParse(objectAtPath);
if (!validationResult.success) {
continue;
}
logger.debug(`Found an image in the feed entry: ${validationResult.data}`);
return validationResult.data;
}
return null;
};
/**
* We extend the feed with custom properties.
* This interface adds properties on top of the default ones.
*/
interface ExtendedFeedEntry extends FeedEntry {
enclosure?: string;
}
/**
* We extend the feed with custom properties.
* This interface omits the default entries with our custom definition.
*/
type ExtendedFeedData = Modify<
FeedData,
{
entries?: ExtendedFeedEntry;
}
>;
export interface RssFeed {
feedUrl: string;
feed: ExtendedFeedData;
}

View File

@@ -22,6 +22,7 @@
},
"prettier": "@homarr/prettier-config",
"dependencies": {
"@extractus/feed-extractor": "7.1.3",
"@homarr/common": "workspace:^0.1.0",
"@homarr/db": "workspace:^0.1.0",
"@homarr/definitions": "workspace:^0.1.0",

View File

@@ -0,0 +1,127 @@
import type { FeedData, FeedEntry } from "@extractus/feed-extractor";
import { extract } from "@extractus/feed-extractor";
import dayjs from "dayjs";
import { z } from "zod";
import type { Modify } from "@homarr/common/types";
import { logger } from "@homarr/log";
import { createCachedWidgetRequestHandler } from "./lib/cached-widget-request-handler";
export const rssFeedsRequestHandler = createCachedWidgetRequestHandler({
queryKey: "rssFeedList",
widgetKind: "rssFeed",
async requestAsync(input: { url: string; count: number }) {
const result = (await extract(input.url, {
getExtraEntryFields: (feedEntry) => {
const media = attemptGetImageFromEntry(input.url, feedEntry);
if (!media) {
return {};
}
return {
enclosure: media,
};
},
})) as ExtendedFeedData;
return {
...result,
entries: result.entries?.slice(0, input.count) ?? [],
};
},
cacheDuration: dayjs.duration(5, "minutes"),
});
const attemptGetImageFromEntry = (feedUrl: string, entry: object) => {
const media = getFirstMediaProperty(entry);
if (media !== null) {
return media;
}
return getImageFromStringAsFallback(feedUrl, JSON.stringify(entry));
};
const getImageFromStringAsFallback = (feedUrl: string, content: string) => {
const regex = /https?:\/\/\S+?\.(jpg|jpeg|png|gif|bmp|svg|webp|tiff)/i;
const result = regex.exec(content);
if (result == null) {
return null;
}
console.debug(
`Falling back to regex image search for '${feedUrl}'. Found ${result.length} matches in content: ${content}`,
);
return result[0];
};
const mediaProperties = [
{
path: ["enclosure", "@_url"],
},
{
path: ["media:content", "@_url"],
},
];
/**
* The RSS and Atom standards are poorly adhered to in most of the web.
* We want to show pretty background images on the posts and therefore need to extract
* the enclosure (aka. media images). This function uses the dynamic properties defined above
* to search through the possible paths and detect valid image URLs.
* @param feedObject The object to scan for.
* @returns the value of the first path that is found within the object
*/
const getFirstMediaProperty = (feedObject: object) => {
for (const mediaProperty of mediaProperties) {
let propertyIndex = 0;
let objectAtPath: object = feedObject;
while (propertyIndex < mediaProperty.path.length) {
const key = mediaProperty.path[propertyIndex];
if (key === undefined) {
break;
}
const propertyEntries = Object.entries(objectAtPath);
const propertyEntry = propertyEntries.find(([entryKey]) => entryKey === key);
if (!propertyEntry) {
break;
}
// eslint-disable-next-line @typescript-eslint/no-unsafe-assignment
const [_, propertyEntryValue] = propertyEntry;
objectAtPath = propertyEntryValue as object;
propertyIndex++;
}
const validationResult = z.string().url().safeParse(objectAtPath);
if (!validationResult.success) {
continue;
}
logger.debug(`Found an image in the feed entry: ${validationResult.data}`);
return validationResult.data;
}
return null;
};
/**
* We extend the feed with custom properties.
* This interface adds properties on top of the default ones.
*/
interface ExtendedFeedEntry extends FeedEntry {
enclosure?: string;
}
/**
* We extend the feed with custom properties.
* This interface omits the default entries with our custom definition.
*/
type ExtendedFeedData = Modify<
FeedData,
{
entries?: ExtendedFeedEntry[];
}
>;
export interface RssFeed {
feedUrl: string;
feed: ExtendedFeedData;
}

View File

@@ -26,7 +26,6 @@
"dependencies": {
"@dnd-kit/core": "^6.3.1",
"@dnd-kit/sortable": "^10.0.0",
"@extractus/feed-extractor": "^7.1.3",
"@homarr/api": "workspace:^0.1.0",
"@homarr/auth": "workspace:^0.1.0",
"@homarr/common": "workspace:^0.1.0",

View File

@@ -9,41 +9,26 @@ import { clientApi } from "@homarr/api/client";
import type { WidgetComponentProps } from "../definition";
import classes from "./component.module.scss";
export default function RssFeed({ options, itemId }: WidgetComponentProps<"rssFeed">) {
const [rssFeeds] = clientApi.widget.rssFeed.getFeeds.useSuspenseQuery(
export default function RssFeed({ options }: WidgetComponentProps<"rssFeed">) {
const [feedEntries] = clientApi.widget.rssFeed.getFeeds.useSuspenseQuery(
{
// eslint-disable-next-line @typescript-eslint/no-non-null-assertion
itemId: itemId!,
urls: options.feedUrls,
maximumAmountPosts: typeof options.maximumAmountPosts === "number" ? options.maximumAmountPosts : 100,
},
{
refetchOnMount: false,
refetchOnWindowFocus: false,
refetchOnReconnect: false,
retry: false,
select(data) {
return data?.data ?? [];
},
},
);
const entries = rssFeeds
.filter((feedGroup) => feedGroup.feed.entries !== undefined)
.flatMap((feedGroup) => feedGroup.feed.entries)
.filter((entry) => entry !== undefined)
.sort((entryA, entryB) => {
if (!entryA.published || !entryB.published) {
return -1;
}
return new Date(entryB.published).getTime() - new Date(entryA.published).getTime();
})
.slice(0, options.maximumAmountPosts as number);
const languageDir = options.enableRtl ? "RTL" : "LTR";
return (
<ScrollArea className="scroll-area-w100" w="100%" p="4cqmin">
<Stack w={"100%"} gap="4cqmin">
{entries.map((feedEntry) => (
{feedEntries.map((feedEntry) => (
<Card
key={feedEntry.id}
withBorder

9
pnpm-lock.yaml generated
View File

@@ -831,9 +831,6 @@ importers:
packages/cron-jobs:
dependencies:
'@extractus/feed-extractor':
specifier: ^7.1.3
version: 7.1.3
'@homarr/analytics':
specifier: workspace:^0.1.0
version: link:../analytics
@@ -1479,6 +1476,9 @@ importers:
packages/request-handler:
dependencies:
'@extractus/feed-extractor':
specifier: 7.1.3
version: 7.1.3
'@homarr/common':
specifier: workspace:^0.1.0
version: link:../common
@@ -1770,9 +1770,6 @@ importers:
'@dnd-kit/sortable':
specifier: ^10.0.0
version: 10.0.0(@dnd-kit/core@6.3.1(react-dom@19.0.0(react@19.0.0))(react@19.0.0))(react@19.0.0)
'@extractus/feed-extractor':
specifier: ^7.1.3
version: 7.1.3
'@homarr/api':
specifier: workspace:^0.1.0
version: link:../api