From 99c8e5b609b17dfe443b35bfff2b883845dc62fe Mon Sep 17 00:00:00 2001 From: Meier Lukas Date: Wed, 22 Jan 2025 18:23:49 +0100 Subject: [PATCH] fix: add missing request handler for rss feeds (#2006) * fix: add missing request handler for rss feeds * fix: ci issues --- packages/api/src/router/widgets/rssFeed.ts | 38 +++++- packages/cron-jobs/package.json | 1 - packages/cron-jobs/src/index.ts | 2 - packages/cron-jobs/src/jobs/rss-feeds.ts | 141 +++------------------ packages/request-handler/package.json | 1 + packages/request-handler/src/rss-feeds.ts | 127 +++++++++++++++++++ packages/widgets/package.json | 1 - packages/widgets/src/rssFeed/component.tsx | 25 +--- pnpm-lock.yaml | 9 +- 9 files changed, 186 insertions(+), 159 deletions(-) create mode 100644 packages/request-handler/src/rss-feeds.ts diff --git a/packages/api/src/router/widgets/rssFeed.ts b/packages/api/src/router/widgets/rssFeed.ts index e17cb9815..448f1f576 100644 --- a/packages/api/src/router/widgets/rssFeed.ts +++ b/packages/api/src/router/widgets/rssFeed.ts @@ -1,12 +1,36 @@ -import type { RssFeed } from "@homarr/cron-jobs"; -import { createItemChannel } from "@homarr/redis"; +import { rssFeedsRequestHandler } from "@homarr/request-handler/rss-feeds"; +import { z } from "@homarr/validation"; -import { createOneItemMiddleware } from "../../middlewares/item"; import { createTRPCRouter, publicProcedure } from "../../trpc"; export const rssFeedRouter = createTRPCRouter({ - getFeeds: publicProcedure.unstable_concat(createOneItemMiddleware("rssFeed")).query(async ({ input }) => { - const channel = createItemChannel(input.itemId); - return await channel.getAsync(); - }), + getFeeds: publicProcedure + .input( + z.object({ + urls: z.array(z.string()), + maximumAmountPosts: z.number(), + }), + ) + .query(async ({ input }) => { + const rssFeeds = await Promise.all( + input.urls.map(async (url) => { + const innerHandler = rssFeedsRequestHandler.handler({ + url, + count: input.maximumAmountPosts, + }); + return await innerHandler.getCachedOrUpdatedDataAsync({ + forceUpdate: false, + }); + }), + ); + + return rssFeeds + .flatMap((rssFeed) => rssFeed.data.entries) + .slice(0, input.maximumAmountPosts) + .sort((entryA, entryB) => { + return entryA.published && entryB.published + ? new Date(entryB.published).getTime() - new Date(entryA.published).getTime() + : 0; + }); + }), }); diff --git a/packages/cron-jobs/package.json b/packages/cron-jobs/package.json index 217ff58e5..25c6b5af2 100644 --- a/packages/cron-jobs/package.json +++ b/packages/cron-jobs/package.json @@ -22,7 +22,6 @@ }, "prettier": "@homarr/prettier-config", "dependencies": { - "@extractus/feed-extractor": "^7.1.3", "@homarr/analytics": "workspace:^0.1.0", "@homarr/auth": "workspace:^0.1.0", "@homarr/common": "workspace:^0.1.0", diff --git a/packages/cron-jobs/src/index.ts b/packages/cron-jobs/src/index.ts index 50dc00702..fe8e1b94e 100644 --- a/packages/cron-jobs/src/index.ts +++ b/packages/cron-jobs/src/index.ts @@ -11,7 +11,6 @@ import { mediaServerJob } from "./jobs/integrations/media-server"; import { mediaTranscodingJob } from "./jobs/integrations/media-transcoding"; import { minecraftServerStatusJob } from "./jobs/minecraft-server-status"; import { pingJob } from "./jobs/ping"; -import type { RssFeed } from "./jobs/rss-feeds"; import { rssFeedsJob } from "./jobs/rss-feeds"; import { sessionCleanupJob } from "./jobs/session-cleanup"; import { updateCheckerJob } from "./jobs/update-checker"; @@ -38,4 +37,3 @@ export const jobGroup = createCronJobGroup({ }); export type JobGroupKeys = ReturnType<(typeof jobGroup)["getKeys"]>[number]; -export type { RssFeed }; diff --git a/packages/cron-jobs/src/jobs/rss-feeds.ts b/packages/cron-jobs/src/jobs/rss-feeds.ts index 4c7f2d720..167fe9048 100644 --- a/packages/cron-jobs/src/jobs/rss-feeds.ts +++ b/packages/cron-jobs/src/jobs/rss-feeds.ts @@ -1,139 +1,36 @@ -import type { FeedData, FeedEntry } from "@extractus/feed-extractor"; -import { extract } from "@extractus/feed-extractor"; import SuperJSON from "superjson"; -import type { Modify } from "@homarr/common/types"; import { EVERY_5_MINUTES } from "@homarr/cron-jobs-core/expressions"; import { db, eq } from "@homarr/db"; import { items } from "@homarr/db/schema"; import { logger } from "@homarr/log"; -import { createItemChannel } from "@homarr/redis"; -import { z } from "@homarr/validation"; - // This import is done that way to avoid circular dependencies. +import { rssFeedsRequestHandler } from "@homarr/request-handler/rss-feeds"; + import type { WidgetComponentProps } from "../../../widgets"; import { createCronJob } from "../lib"; export const rssFeedsJob = createCronJob("rssFeeds", EVERY_5_MINUTES).withCallback(async () => { - const itemsForIntegration = await db.query.items.findMany({ + const rssItems = await db.query.items.findMany({ where: eq(items.kind, "rssFeed"), }); - for (const item of itemsForIntegration) { - const options = SuperJSON.parse["options"]>(item.options); + const itemOptions = rssItems.map((item) => SuperJSON.parse["options"]>(item.options)); - const feeds = await Promise.all( - options.feedUrls.map(async (feedUrl) => ({ - feedUrl, - feed: (await extract(feedUrl, { - getExtraEntryFields: (feedEntry) => { - const media = attemptGetImageFromEntry(feedUrl, feedEntry); - if (!media) { - return {}; - } - return { - enclosure: media, - }; - }, - })) as ExtendedFeedData, - })), - ); - - const channel = createItemChannel(item.id); - await channel.publishAndUpdateLastStateAsync(feeds); + for (const option of itemOptions) { + const maxAmountPosts = typeof option.maximumAmountPosts === "number" ? option.maximumAmountPosts : 100; + for (const url of option.feedUrls) { + try { + const innerHandler = rssFeedsRequestHandler.handler({ + url, + count: maxAmountPosts, + }); + await innerHandler.getCachedOrUpdatedDataAsync({ + forceUpdate: true, + }); + } catch (error) { + logger.error("Failed to update RSS feed", { url, error }); + } + } } }); - -const attemptGetImageFromEntry = (feedUrl: string, entry: object) => { - const media = getFirstMediaProperty(entry); - if (media !== null) { - return media; - } - return getImageFromStringAsFallback(feedUrl, JSON.stringify(entry)); -}; - -const getImageFromStringAsFallback = (feedUrl: string, content: string) => { - const regex = /https?:\/\/\S+?\.(jpg|jpeg|png|gif|bmp|svg|webp|tiff)/i; - const result = regex.exec(content); - - if (result == null) { - return null; - } - - console.debug( - `Falling back to regex image search for '${feedUrl}'. Found ${result.length} matches in content: ${content}`, - ); - return result[0]; -}; - -const mediaProperties = [ - { - path: ["enclosure", "@_url"], - }, - { - path: ["media:content", "@_url"], - }, -]; - -/** - * The RSS and Atom standards are poorly adhered to in most of the web. - * We want to show pretty background images on the posts and therefore need to extract - * the enclosure (aka. media images). This function uses the dynamic properties defined above - * to search through the possible paths and detect valid image URLs. - * @param feedObject The object to scan for. - * @returns the value of the first path that is found within the object - */ -const getFirstMediaProperty = (feedObject: object) => { - for (const mediaProperty of mediaProperties) { - let propertyIndex = 0; - let objectAtPath: object = feedObject; - while (propertyIndex < mediaProperty.path.length) { - const key = mediaProperty.path[propertyIndex]; - if (key === undefined) { - break; - } - const propertyEntries = Object.entries(objectAtPath); - const propertyEntry = propertyEntries.find(([entryKey]) => entryKey === key); - if (!propertyEntry) { - break; - } - // eslint-disable-next-line @typescript-eslint/no-unsafe-assignment - const [_, propertyEntryValue] = propertyEntry; - objectAtPath = propertyEntryValue as object; - propertyIndex++; - } - - const validationResult = z.string().url().safeParse(objectAtPath); - if (!validationResult.success) { - continue; - } - - logger.debug(`Found an image in the feed entry: ${validationResult.data}`); - return validationResult.data; - } - return null; -}; - -/** - * We extend the feed with custom properties. - * This interface adds properties on top of the default ones. - */ -interface ExtendedFeedEntry extends FeedEntry { - enclosure?: string; -} - -/** - * We extend the feed with custom properties. - * This interface omits the default entries with our custom definition. - */ -type ExtendedFeedData = Modify< - FeedData, - { - entries?: ExtendedFeedEntry; - } ->; - -export interface RssFeed { - feedUrl: string; - feed: ExtendedFeedData; -} diff --git a/packages/request-handler/package.json b/packages/request-handler/package.json index 36a21b66d..b85124ddd 100644 --- a/packages/request-handler/package.json +++ b/packages/request-handler/package.json @@ -22,6 +22,7 @@ }, "prettier": "@homarr/prettier-config", "dependencies": { + "@extractus/feed-extractor": "7.1.3", "@homarr/common": "workspace:^0.1.0", "@homarr/db": "workspace:^0.1.0", "@homarr/definitions": "workspace:^0.1.0", diff --git a/packages/request-handler/src/rss-feeds.ts b/packages/request-handler/src/rss-feeds.ts new file mode 100644 index 000000000..7a565cb5f --- /dev/null +++ b/packages/request-handler/src/rss-feeds.ts @@ -0,0 +1,127 @@ +import type { FeedData, FeedEntry } from "@extractus/feed-extractor"; +import { extract } from "@extractus/feed-extractor"; +import dayjs from "dayjs"; +import { z } from "zod"; + +import type { Modify } from "@homarr/common/types"; +import { logger } from "@homarr/log"; + +import { createCachedWidgetRequestHandler } from "./lib/cached-widget-request-handler"; + +export const rssFeedsRequestHandler = createCachedWidgetRequestHandler({ + queryKey: "rssFeedList", + widgetKind: "rssFeed", + async requestAsync(input: { url: string; count: number }) { + const result = (await extract(input.url, { + getExtraEntryFields: (feedEntry) => { + const media = attemptGetImageFromEntry(input.url, feedEntry); + if (!media) { + return {}; + } + return { + enclosure: media, + }; + }, + })) as ExtendedFeedData; + + return { + ...result, + entries: result.entries?.slice(0, input.count) ?? [], + }; + }, + cacheDuration: dayjs.duration(5, "minutes"), +}); + +const attemptGetImageFromEntry = (feedUrl: string, entry: object) => { + const media = getFirstMediaProperty(entry); + if (media !== null) { + return media; + } + return getImageFromStringAsFallback(feedUrl, JSON.stringify(entry)); +}; + +const getImageFromStringAsFallback = (feedUrl: string, content: string) => { + const regex = /https?:\/\/\S+?\.(jpg|jpeg|png|gif|bmp|svg|webp|tiff)/i; + const result = regex.exec(content); + + if (result == null) { + return null; + } + + console.debug( + `Falling back to regex image search for '${feedUrl}'. Found ${result.length} matches in content: ${content}`, + ); + return result[0]; +}; + +const mediaProperties = [ + { + path: ["enclosure", "@_url"], + }, + { + path: ["media:content", "@_url"], + }, +]; + +/** + * The RSS and Atom standards are poorly adhered to in most of the web. + * We want to show pretty background images on the posts and therefore need to extract + * the enclosure (aka. media images). This function uses the dynamic properties defined above + * to search through the possible paths and detect valid image URLs. + * @param feedObject The object to scan for. + * @returns the value of the first path that is found within the object + */ +const getFirstMediaProperty = (feedObject: object) => { + for (const mediaProperty of mediaProperties) { + let propertyIndex = 0; + let objectAtPath: object = feedObject; + while (propertyIndex < mediaProperty.path.length) { + const key = mediaProperty.path[propertyIndex]; + if (key === undefined) { + break; + } + const propertyEntries = Object.entries(objectAtPath); + const propertyEntry = propertyEntries.find(([entryKey]) => entryKey === key); + if (!propertyEntry) { + break; + } + // eslint-disable-next-line @typescript-eslint/no-unsafe-assignment + const [_, propertyEntryValue] = propertyEntry; + objectAtPath = propertyEntryValue as object; + propertyIndex++; + } + + const validationResult = z.string().url().safeParse(objectAtPath); + if (!validationResult.success) { + continue; + } + + logger.debug(`Found an image in the feed entry: ${validationResult.data}`); + return validationResult.data; + } + return null; +}; + +/** + * We extend the feed with custom properties. + * This interface adds properties on top of the default ones. + */ +interface ExtendedFeedEntry extends FeedEntry { + enclosure?: string; +} + +/** + * We extend the feed with custom properties. + * This interface omits the default entries with our custom definition. + */ +type ExtendedFeedData = Modify< + FeedData, + { + entries?: ExtendedFeedEntry[]; + } +>; + +export interface RssFeed { + feedUrl: string; + feed: ExtendedFeedData; +} diff --git a/packages/widgets/package.json b/packages/widgets/package.json index d65d5d150..1123275a1 100644 --- a/packages/widgets/package.json +++ b/packages/widgets/package.json @@ -26,7 +26,6 @@ "dependencies": { "@dnd-kit/core": "^6.3.1", "@dnd-kit/sortable": "^10.0.0", - "@extractus/feed-extractor": "^7.1.3", "@homarr/api": "workspace:^0.1.0", "@homarr/auth": "workspace:^0.1.0", "@homarr/common": "workspace:^0.1.0", diff --git a/packages/widgets/src/rssFeed/component.tsx b/packages/widgets/src/rssFeed/component.tsx index a765503df..b5f1f57ab 100644 --- a/packages/widgets/src/rssFeed/component.tsx +++ b/packages/widgets/src/rssFeed/component.tsx @@ -9,41 +9,26 @@ import { clientApi } from "@homarr/api/client"; import type { WidgetComponentProps } from "../definition"; import classes from "./component.module.scss"; -export default function RssFeed({ options, itemId }: WidgetComponentProps<"rssFeed">) { - const [rssFeeds] = clientApi.widget.rssFeed.getFeeds.useSuspenseQuery( +export default function RssFeed({ options }: WidgetComponentProps<"rssFeed">) { + const [feedEntries] = clientApi.widget.rssFeed.getFeeds.useSuspenseQuery( { - // eslint-disable-next-line @typescript-eslint/no-non-null-assertion - itemId: itemId!, + urls: options.feedUrls, + maximumAmountPosts: typeof options.maximumAmountPosts === "number" ? options.maximumAmountPosts : 100, }, { refetchOnMount: false, refetchOnWindowFocus: false, refetchOnReconnect: false, retry: false, - select(data) { - return data?.data ?? []; - }, }, ); - const entries = rssFeeds - .filter((feedGroup) => feedGroup.feed.entries !== undefined) - .flatMap((feedGroup) => feedGroup.feed.entries) - .filter((entry) => entry !== undefined) - .sort((entryA, entryB) => { - if (!entryA.published || !entryB.published) { - return -1; - } - return new Date(entryB.published).getTime() - new Date(entryA.published).getTime(); - }) - .slice(0, options.maximumAmountPosts as number); - const languageDir = options.enableRtl ? "RTL" : "LTR"; return ( - {entries.map((feedEntry) => ( + {feedEntries.map((feedEntry) => (