From bfbe35618e8a85afd9e2ede2f5524b40b7620953 Mon Sep 17 00:00:00 2001 From: kouloumos Date: Fri, 22 Nov 2024 16:35:54 +0200 Subject: [PATCH] refactor(contentlayer): simplify topics processing and clarify terminology - terminology change to refer to "topics" instead of "categories" - Simplify data processing to do a single pass and use a single structure - Consolidate JSON generation into cleaner functions --- contentlayer.config.ts | 136 ++++++++++-------- package.json | 4 +- .../{fetchCategories.js => fetchTopics.js} | 6 +- 3 files changed, 79 insertions(+), 67 deletions(-) rename scripts/{fetchCategories.js => fetchTopics.js} (62%) diff --git a/contentlayer.config.ts b/contentlayer.config.ts index edef387..2591ea8 100644 --- a/contentlayer.config.ts +++ b/contentlayer.config.ts @@ -19,7 +19,7 @@ const Resources = defineNestedType(() => ({ url: { type: "string" }, }, })); -export interface CategoryInfo { +export interface Topic { title: string; slug: string; optech_url: string; @@ -28,6 +28,14 @@ export interface CategoryInfo { excerpt: string; } +// The full processed topic we use internally +interface ProcessedTopic { + name: string; // Display name (from topic.title or original tag) + slug: string; // Slugified identifier + count: number; // Number of occurrences + categories: string[]; // List of categories it belongs to +} + interface TagInfo { name: string; slug: string; @@ -55,91 +63,95 @@ const getTranscriptAliases = (allTranscripts: ContentTranscriptType[]) => { fs.writeFileSync("./public/aliases.json", JSON.stringify(aliases)); }; -const getCategories = () => { - const filePath = path.join(process.cwd(), "public", "categories.json"); +const getTopics = () => { + const filePath = path.join(process.cwd(), "public", "topics.json"); const fileContents = fs.readFileSync(filePath, "utf8"); return JSON.parse(fileContents); }; - -function generateTopicsCounts(transcripts: ContentTranscriptType[]) { - const categories: CategoryInfo[] = getCategories(); - const topicsMap = new Map(); - const categoryMap = new Map(); - const topicsByCategory: { [category: string]: TopicsData[] } = {}; - const uncategorizedTopics = new Set(); - - // Initialize category map and category arrays - categories.forEach((cat) => { - cat.categories.forEach((category) => { - if (!topicsByCategory[category]) { - topicsByCategory[category] = []; - } - }); - categoryMap.set(createSlug(cat.slug), cat); - cat.aliases?.forEach((alias) => categoryMap.set(alias, cat)); +function buildTopicsMap(transcripts: ContentTranscriptType[], topics: Topic[]): Map { + // Create topics lookup map (includes aliases) + const topicsLookup = new Map(); + topics.forEach(topic => { + topicsLookup.set(topic.slug, topic); + topic.aliases?.forEach(alias => topicsLookup.set(alias, topic)); }); - // Process all transcripts to build topic counts and names - transcripts.forEach((transcript) => { - transcript.tags?.forEach((tag) => { + // Build the main topics map + const processedTopics = new Map(); + + // Process all transcripts + transcripts.forEach(transcript => { + transcript.tags?.forEach(tag => { const slug = createSlug(tag); - - if (!topicsMap.has(slug)) { - // Get the proper name from categories if it exists - const categoryInfo = categoryMap.get(slug); - const name = categoryInfo ? categoryInfo.title : tag; - - topicsMap.set(slug, { - name, + const topic = topicsLookup.get(slug); + + if (!processedTopics.has(slug)) { + processedTopics.set(slug, { + name: topic?.title || tag, slug, - count: 1 + count: 1, + categories: topic?.categories || ["Miscellaneous"], }); } else { - const topicInfo = topicsMap.get(slug)!; - topicInfo.count += 1; + const processed = processedTopics.get(slug)!; + processed.count += 1; } }); }); - // Organize topics into categories - topicsMap.forEach((topicInfo, slug) => { - const categoryInfo = categoryMap.get(slug); - - if (categoryInfo) { - categoryInfo.categories.forEach((category) => { - topicsByCategory[category].push(topicInfo); - }); - } else { - uncategorizedTopics.add(slug); - } - }); + return processedTopics; +} - // Add miscellaneous category - if (uncategorizedTopics.size > 0) { - topicsByCategory["Miscellaneous"] = Array.from(uncategorizedTopics) - .map(slug => topicsMap.get(slug)!) - .sort((a, b) => a.name.localeCompare(b.name)); +function generateAlphabeticalList(processedTopics: Map): TopicsData[] { + const result: TopicsData[] = []; + // The cateogories property is not needed for this list, so we drop it + for (const { name, slug, count } of processedTopics.values()) { + result.push({ name, slug, count }); } + return result.sort((a, b) => a.name.localeCompare(b.name)); +} + +function generateCategorizedList(processedTopics: Map): Record { + const categorizedTopics: Record = {}; + + Array.from(processedTopics.values()).forEach(({ name, slug, count, categories }) => { + categories.forEach(category => { + if (!categorizedTopics[category]) { + categorizedTopics[category] = []; + } + categorizedTopics[category].push({ name, slug, count }); + }); + }); // Sort topics within each category - Object.keys(topicsByCategory).forEach((category) => { - topicsByCategory[category].sort((a, b) => a.name.localeCompare(b.name)); + Object.values(categorizedTopics).forEach(topics => { + topics.sort((a, b) => a.name.localeCompare(b.name)); }); - // Create alphabetical list of all topics - const allTopicsArray = Array.from(topicsMap.values()) - .sort((a, b) => a.name.localeCompare(b.name)); + return categorizedTopics; +} + +function generateTopicsCounts(transcripts: ContentTranscriptType[]) { + // Get topics + const topics = getTopics(); - // Write both JSON files + // Build the primary data structure + const processedTopics = buildTopicsMap(transcripts, topics); + + // Generate both output formats + const alphabeticalList = generateAlphabeticalList(processedTopics); + const categorizedList = generateCategorizedList(processedTopics); + + // Write output files fs.writeFileSync( - "./public/topics-by-category-counts.json", - JSON.stringify(topicsByCategory, null, 2) + "./public/topics-counts.json", + JSON.stringify(alphabeticalList, null, 2) ); - + fs.writeFileSync( - "./public/topics-counts.json", - JSON.stringify(allTopicsArray, null, 2) + "./public/topics-by-category-counts.json", + JSON.stringify(categorizedList, null, 2) ); } diff --git a/package.json b/package.json index f58ed55..f4c4e17 100644 --- a/package.json +++ b/package.json @@ -4,9 +4,9 @@ "private": true, "scripts": { "dev": "next dev", - "fetch-categories": "node scripts/fetchCategories.js", + "fetch-topics": "node scripts/fetchTopics.js", "submodules:update": "git submodule update --init && git submodule update --remote", - "build": "npm run submodules:update && npm run fetch-categories && next build", + "build": "npm run submodules:update && npm run fetch-topics && next build", "start": "next start", "lint": "next lint" }, diff --git a/scripts/fetchCategories.js b/scripts/fetchTopics.js similarity index 62% rename from scripts/fetchCategories.js rename to scripts/fetchTopics.js index 34edf27..73fddf4 100644 --- a/scripts/fetchCategories.js +++ b/scripts/fetchTopics.js @@ -3,7 +3,7 @@ const path = require("path"); const https = require("https"); const url = "https://bitcoinops.org/topics.json"; -const outputPath = path.join(__dirname, "..", "public", "categories.json"); +const outputPath = path.join(__dirname, "..", "public", "topics.json"); https .get(url, (res) => { @@ -15,9 +15,9 @@ https res.on("end", () => { fs.writeFileSync(outputPath, data); - console.log("Categories data has been fetched and saved to public folder."); + console.log("Topics data has been fetched and saved to public folder."); }); }) .on("error", (err) => { - console.error("Error fetching categories:", err.message); + console.error("Error fetching topics:", err.message); });