From b88c14407f132b3facc36dbe6cdd8783513f7f34 Mon Sep 17 00:00:00 2001 From: Derrick Hammer Date: Mon, 18 Dec 2023 01:47:08 -0500 Subject: [PATCH] fix: add the siteUpdateReceived back in --- app/routes/api.events.siteUpdateReceived.ts | 96 +++++++++++++++++++++ 1 file changed, 96 insertions(+) create mode 100644 app/routes/api.events.siteUpdateReceived.ts diff --git a/app/routes/api.events.siteUpdateReceived.ts b/app/routes/api.events.siteUpdateReceived.ts new file mode 100644 index 0000000..a050058 --- /dev/null +++ b/app/routes/api.events.siteUpdateReceived.ts @@ -0,0 +1,96 @@ +import { + json, + LoaderFunction, + ActionFunction, + ActionFunctionArgs, +} from "@remix-run/node"; +import { S5Client } from "@lumeweb/s5-js"; +import xml2js from "xml2js"; +import { prisma } from "@/lib/prisma"; +import * as cheerio from "cheerio"; +import slugify from "slugify"; +import path from "path"; + +// Action function for POST requests +export async function action({ request }: ActionFunctionArgs) { + const client = new S5Client("https://s5.web3portal.com"); + const formData = await request.formData(); + const data = Object.fromEntries(formData.entries()); + const meta = (await client.getMetadata(data.cid as string)) as any; + const fileMeta = meta.metadata as any; + const paths = fileMeta.paths as { + [file: string]: { + cid: string; + }; + }; + + if (!("sitemap.xml" in paths)) { + throw new Response("Sitemap not found", { status: 404 }); + } + + const sitemapData = await client.downloadData(paths["sitemap.xml"].cid); + const sitemap = await xml2js.parseStringPromise(sitemapData); + + const urls = sitemap.urlset.url.map((urlEntry: any) => { + const url = urlEntry.loc[0]; + let pathname = new URL(url).pathname; + + // Normalize and remove leading and trailing slashes from the path + pathname = path.normalize(pathname).replace(/^\/|\/$/g, ""); + + // Function to determine if a URL path represents a directory + const isDirectory = (pathname: string) => { + // Check if the path directly maps to a file in the paths object + return !paths.hasOwnProperty(pathname); + }; + + // Check if the path is a directory and look for a directory index + if (isDirectory(pathname)) { + for (const file of fileMeta.tryFiles) { + const indexPath = path.join(pathname, file); + if (paths.hasOwnProperty(indexPath)) { + pathname = indexPath; + break; + } + } + } + + // Fetch cid after confirming the final path + const cid = paths[pathname]?.cid; + + return { url, cid, path: pathname }; // including cid in return object after final path is determined + }); + + for (const { url, cid } of urls) { + if (cid) { + const exists = await prisma.article.findUnique({ + where: { cid }, + }); + + if (!exists) { + // Fetch and parse the content using CID + const contentData = Buffer.from( + await client.downloadData(cid) + ).toString(); + + const $ = cheerio.load(contentData); + const title = $("title").text(); // Extract the title from the content + + const record = { + title, + url, + cid: cid, + createdAt: new Date(), + updatedAt: new Date(), + slug: slugify(new URL(url).pathname), + siteKey: slugify(data.site as string), + }; + + // Insert a new record into the database + await prisma.article.create({ + data: record, + }); + } + } + } +}