bitcointranscripts · 0tuedon · Aug 8, 2024 · Aug 8, 2024 · Aug 8, 2024 · Aug 12, 2024
diff --git a/.gitignore b/.gitignore
@@ -1,5 +1,6 @@
 # See https://help.github.com/articles/ignoring-files/ for more about ignoring files.
-
+# editor 
+.vscode
 # dependencies
 /node_modules
 /.pnp
@@ -15,6 +16,9 @@
 
 # production
 /build
+/public/categories.json
+/public/types-data.json
+/public/tag-data.json
 
 # misc
 .DS_Store

diff --git a/.gitmodules b/.gitmodules
@@ -1,3 +1,3 @@
 [submodule "public/bitcoin-transcript"]
 	path = public/bitcoin-transcript
-	url = git@github.com:bitcointranscripts/bitcointranscripts.git
+	url = https://github.com/bitcointranscripts/bitcointranscripts.git
diff --git a/contentlayer.config.ts b/contentlayer.config.ts
@@ -1,84 +1,129 @@
-import * as fs from "fs";
-import { defineDocumentType, makeSource } from "contentlayer2/source-files";
-import { TranscriptFields } from "./src/types/index";
-
-const path = `${process.cwd()}/public/bitcoin-transcript`;
-
-const generateDocuTypes = () => {
-  const getFolders = fs.readdirSync(path, "utf-8");
-
-  // getFolders is sliced @6 to remove files which are not folders like .gitignore, readme.md , .github etc. We only want to read files from the folders that contain transcripts.
-  const folders = getFolders.slice(6).filter((item) => item !== "twitter_handles.json");
-
-  // firstLetterRegex: ** /(^\w{1})|(\s+\w{1})/g ** gets the first letter of the split folder name and capitalizes the first letter of the each word. This is to create a camelCase naming Convention for the generated folders
-  const firstLetterRegex = /(^\w{1})|(\s+\w{1})/g;
-
-  const DefinedDocumentTypes = folders.map((name) => {
-    const slugifyName = name
-      .split("-")
-      .map((text) => text.replace(firstLetterRegex, (text) => text.toUpperCase()))
-      .join("");
+import { createSlug } from "./src/utils";
+import { defineDocumentType, defineNestedType, makeSource } from "contentlayer2/source-files";
+import { writeFileSync } from "fs";
+import path from "path";
+import { Transcript as ContentTranscriptType } from "./.contentlayer/generated/types";
+
+const Resources = defineNestedType(() => ({
+  name: "Resources",
+  fields: {
+    title: { type: "string" },
+    url: { type: "string" },
+  },
+}));
+
+/**
+ * Count the occurrences of all tags across transcripts and write to json file
+ */
+function createTagCount(allTranscripts: ContentTranscriptType[]) {
+  const tagCount: Record<string, number> = {};
+  allTranscripts.forEach((file) => {
+    if (file.tags) {
+      file.tags.forEach((tag: string) => {
+        const formattedTag = createSlug(tag);
+        if (formattedTag in tagCount) {
+          tagCount[formattedTag] += 1;
+        } else {
+          tagCount[formattedTag] = 1;
+        }
+      });
+    }
+  });
+  writeFileSync("./public/tag-data.json", JSON.stringify(tagCount));
+}
+
+/**
+ * Count the occurrences of all types across transcripts and write to json file
+ */
+const createTypesCount = (allTranscripts: ContentTranscriptType[]) => {
+  const typesAndCount: Record<string, number> = {};
+  const relevantTypes = [
+    "video",
+    "core-dev-tech",
+    "podcast",
+    "conference",
+    "meeting",
+    "club",
+    "meetup",
+    "hackathon",
+    "workshop",
+    "residency",
+    "developer-tools",
+  ];
 
-    return defineDocumentType(() => ({
-      name: slugifyName,
-      filePathPattern: `${name}/**/*.md`,
-      contentType: "markdown",
-      fields: TranscriptFields,
-    }));
+  allTranscripts.forEach((transcript) => {
+    if (transcript.categories) {
+      transcript.categories.forEach((type: string) => {
+        const formattedType = createSlug(type);
+        if (relevantTypes.includes(formattedType)) {
+          if (formattedType in typesAndCount) {
+            typesAndCount[formattedType] += 1;
+          } else {
+            typesAndCount[formattedType] = 1;
+          }
+        }
+      });
+    }
   });
 
-  return { DefinedDocumentTypes };
+  writeFileSync("./public/types-data.json", JSON.stringify(typesAndCount));
 };
 
-const generateExcludedPaths = () => {
-  const otherPaths = [
+export const Transcript = defineDocumentType(() => ({
+  name: "Transcript",
+  filePathPattern: `**/*.md`,
+  contentType: "markdown",
+  fields: {
+    title: { type: "string", required: true },
+    speakers: { type: "list", of: { type: "string" } },
+    date: { type: "date" },
+    transcript_by: { type: "string" },
+    Transcript_by: { type: "string" },
+    categories: { type: "list", of: { type: "string" } },
+    tag: { type: "list", of: { type: "string" } },
+    tags: { type: "list", of: { type: "string" } },
+    media: { type: "string" },
+    translation_by: { type: "string" },
+    episode: { type: "number" },
+    aliases: { type: "list", of: { type: "string" } },
+    video: { type: "string" },
+    hosts: { type: "list", of: { type: "string" } },
+    source: { type: "string" },
+    transcription_coverage: { type: "string" },
+    summary: { type: "string" },
+    needs: { type: "string" },
+    aditional_resources: { type: "list", of: Resources },
+    additional_resources: { type: "list", of: Resources },
+    weight: { type: "number" },
+  },
+  computedFields: {
+    url: {
+      type: "string",
+      resolve: (doc) => `/${doc._raw.flattenedPath}`,
+    },
+    slugAsParams: {
+      type: "string",
+      resolve: (doc) => doc._raw.flattenedPath.split("/"),
+    },
+  },
+}));
+
+export default makeSource({
+  contentDirPath: path.join(process.cwd(), "public", "bitcoin-transcript"),
+  documentTypes: [Transcript],
+  contentDirExclude: [
     ".github",
     ".gitignore",
     "LICENSE.md",
     "README.md",
+    "STYLE.md",
     "twitter_handles.json",
     ".json",
     "2018-08-17-richard-bondi-bitcoin-cli-regtest.es.md",
-  ];
-
-  const getFolders = fs.readdirSync(path, "utf-8");
-  const folders = getFolders.slice(6).filter((item) => item !== "twitter_handles.json");
-
-  const indexFiles: string[] = [];
-  const indexFilesInFolders: string[] = [];
-
-  for (let i = 0; i < folders.length; i++) {
-    const name = folders[i];
-    const files = fs.readdirSync(`${path}/${folders[i]}`);
-
-    // isNumRegex: ** /^-?\d+$/ ** checks if file name is a digit. We're using it to check for folders that have transcripts grouped in years.
-    const isNumRegex = /^-?\d+$/;
-    const isDirectory = files.filter((num: string) => isNumRegex.test(num));
-
-    if (isDirectory.length) {
-      isDirectory.map((year) => {
-        const text = `${name}/${year}/_index.md ${name}/${year}/_index.zh.md ${name}/${year}/_index.es.md`;
-        indexFilesInFolders.push(...text.split(" "));
-      });
-    }
-  }
-
-  folders.map((folder) => {
-    const text = `${folder}/_index.md ${folder}/_index.zh.md ${folder}/_index.es.md`;
-
-    indexFiles.push(...text.split(" "));
-  });
-
-  const foldersToExclude = [...otherPaths];
-
-  return { foldersToExclude };
-};
-
-const { DefinedDocumentTypes } = generateDocuTypes();
-const { foldersToExclude } = generateExcludedPaths();
-
-export default makeSource({
-  contentDirPath: "public/bitcoin-transcript/",
-  documentTypes: DefinedDocumentTypes,
-  contentDirExclude: foldersToExclude,
+  ],
+  onSuccess: async (importData) => {
+    const { allDocuments } = await importData();
+    createTagCount(allDocuments);
+    createTypesCount(allDocuments);
+  },
 });