From b88c14407f132b3facc36dbe6cdd8783513f7f34 Mon Sep 17 00:00:00 2001
From: Derrick Hammer <derrick@derrickhammer.com>
Date: Mon, 18 Dec 2023 01:47:08 -0500
Subject: [PATCH] fix: add the siteUpdateReceived back in

---
 app/routes/api.events.siteUpdateReceived.ts | 96 +++++++++++++++++++++
 1 file changed, 96 insertions(+)
 create mode 100644 app/routes/api.events.siteUpdateReceived.ts

diff --git a/app/routes/api.events.siteUpdateReceived.ts b/app/routes/api.events.siteUpdateReceived.ts
new file mode 100644
index 0000000..a050058
--- /dev/null
+++ b/app/routes/api.events.siteUpdateReceived.ts
@@ -0,0 +1,96 @@
+import {
+  json,
+  LoaderFunction,
+  ActionFunction,
+  ActionFunctionArgs,
+} from "@remix-run/node";
+import { S5Client } from "@lumeweb/s5-js";
+import xml2js from "xml2js";
+import { prisma } from "@/lib/prisma";
+import * as cheerio from "cheerio";
+import slugify from "slugify";
+import path from "path";
+
+// Action function for POST requests
+export async function action({ request }: ActionFunctionArgs) {
+  const client = new S5Client("https://s5.web3portal.com");
+  const formData = await request.formData();
+  const data = Object.fromEntries(formData.entries());
+  const meta = (await client.getMetadata(data.cid as string)) as any;
+  const fileMeta = meta.metadata as any;
+  const paths = fileMeta.paths as {
+    [file: string]: {
+      cid: string;
+    };
+  };
+
+  if (!("sitemap.xml" in paths)) {
+    throw new Response("Sitemap not found", { status: 404 });
+  }
+
+  const sitemapData = await client.downloadData(paths["sitemap.xml"].cid);
+  const sitemap = await xml2js.parseStringPromise(sitemapData);
+
+  const urls = sitemap.urlset.url.map((urlEntry: any) => {
+    const url = urlEntry.loc[0];
+    let pathname = new URL(url).pathname;
+
+    // Normalize and remove leading and trailing slashes from the path
+    pathname = path.normalize(pathname).replace(/^\/|\/$/g, "");
+
+    // Function to determine if a URL path represents a directory
+    const isDirectory = (pathname: string) => {
+      // Check if the path directly maps to a file in the paths object
+      return !paths.hasOwnProperty(pathname);
+    };
+
+    // Check if the path is a directory and look for a directory index
+    if (isDirectory(pathname)) {
+      for (const file of fileMeta.tryFiles) {
+        const indexPath = path.join(pathname, file);
+        if (paths.hasOwnProperty(indexPath)) {
+          pathname = indexPath;
+          break;
+        }
+      }
+    }
+
+    // Fetch cid after confirming the final path
+    const cid = paths[pathname]?.cid;
+
+    return { url, cid, path: pathname }; // including cid in return object after final path is determined
+  });
+
+  for (const { url, cid } of urls) {
+    if (cid) {
+      const exists = await prisma.article.findUnique({
+        where: { cid },
+      });
+
+      if (!exists) {
+        // Fetch and parse the content using CID
+        const contentData = Buffer.from(
+          await client.downloadData(cid)
+        ).toString();
+
+        const $ = cheerio.load(contentData);
+        const title = $("title").text(); // Extract the title from the content
+
+        const record = {
+          title,
+          url,
+          cid: cid,
+          createdAt: new Date(),
+          updatedAt: new Date(),
+          slug: slugify(new URL(url).pathname),
+          siteKey: slugify(data.site as string),
+        };
+
+        // Insert a new record into the database
+        await prisma.article.create({
+          data: record,
+        });
+      }
+    }
+  }
+}