diff --git a/.gitignore b/.gitignore index d55e5e4..7db8e0f 100644 --- a/.gitignore +++ b/.gitignore @@ -21,9 +21,9 @@ /build /public/categories.json /public/types-data.json -/public/tag-data.json +/public/topics-by-category-counts.json /public/aliases.json -/public/topics-data.json +/public/topics-counts.json /public/speaker-data.json /public/source-count-data.json /public/sources-data.json diff --git a/contentlayer.config.ts b/contentlayer.config.ts index 69bce5b..1434acc 100644 --- a/contentlayer.config.ts +++ b/contentlayer.config.ts @@ -19,7 +19,7 @@ const Resources = defineNestedType(() => ({ url: { type: "string" }, }, })); -export interface CategoryInfo { +export interface Topic { title: string; slug: string; optech_url: string; @@ -28,6 +28,14 @@ export interface CategoryInfo { excerpt: string; } +// The full processed topic we use internally +interface ProcessedTopic { + name: string; // Display name (from topic.title or original tag) + slug: string; // Slugified identifier + count: number; // Number of occurrences + categories: string[]; // List of categories it belongs to +} + interface TagInfo { name: string; slug: string; @@ -38,25 +46,6 @@ interface ContentTree { [key: string]: ContentTree | ContentTranscriptType[]; } -/** - * Count the occurrences of all tags across transcripts and write to json file - */ -function createTagCount(allTranscripts: ContentTranscriptType[]): { - tagCounts: Record; -} { - const tagCounts: Record = {}; - - for (const file of allTranscripts) { - if (!file.tags) continue; - - for (const tag of file.tags) { - const formattedTag = createSlug(tag); - tagCounts[formattedTag] = (tagCounts[formattedTag] || 0) + 1; - } - } - - return { tagCounts }; -} const getTranscriptAliases = (allTranscripts: ContentTranscriptType[]) => { const aliases: Record = {}; @@ -74,105 +63,106 @@ const getTranscriptAliases = (allTranscripts: ContentTranscriptType[]) => { fs.writeFileSync("./public/aliases.json", JSON.stringify(aliases)); }; -const getCategories = () => { - const filePath = path.join(process.cwd(), "public", "categories.json"); +const getTopics = () => { + const filePath = path.join(process.cwd(), "public", "topics.json"); const fileContents = fs.readFileSync(filePath, "utf8"); return JSON.parse(fileContents); }; -function organizeTags(transcripts: ContentTranscriptType[]) { - const categories: CategoryInfo[] = getCategories(); - const { tagCounts } = createTagCount(transcripts); +function buildTopicsMap(transcripts: ContentTranscriptType[], topics: Topic[]): Map { + // Create topics lookup map (includes aliases) + const topicsLookup = new Map(); + topics.forEach(topic => { + topicsLookup.set(topic.slug, topic); + topic.aliases?.forEach(alias => topicsLookup.set(alias, topic)); + }); - const tagsByCategory: { [category: string]: TagInfo[] } = {}; - const tagsWithoutCategory = new Set(); - const categorizedTags = new Set(); + // Build the main topics map + const processedTopics = new Map(); - // Create a map for faster category lookup - const categoryMap = new Map(); + // Process all transcripts + transcripts.forEach(transcript => { + transcript.tags?.forEach(tag => { + const slug = createSlug(tag); + const topic = topicsLookup.get(slug); - categories.forEach((cat) => { - cat.categories.forEach((category) => { - if (!tagsByCategory[category]) { - tagsByCategory[category] = []; + if (!processedTopics.has(slug)) { + processedTopics.set(slug, { + name: topic?.title || tag, + slug, + count: 1, + categories: topic?.categories || ["Miscellaneous"], + }); + } else { + const processed = processedTopics.get(slug)!; + processed.count += 1; } }); - categoryMap.set(createSlug(cat.slug), cat); - cat.aliases?.forEach((alias) => categoryMap.set(alias, cat)); }); - // Process all tags at once - const allTags = new Set( - transcripts.flatMap( - (transcript) => transcript.tags?.map((tag) => tag) || [] - ) - ); - - allTags.forEach((tag) => { - const catInfo = categoryMap.get(tag); - if (catInfo) { - catInfo.categories.forEach((category) => { - if (!tagsByCategory[category].some((t) => t.slug === tag)) { - tagsByCategory[category].push({ - name: catInfo.title, - slug: tag, - count: tagCounts[tag] || 0, - }); - } - }); - categorizedTags.add(tag); - } else { - tagsWithoutCategory.add(tag); - } - }); + return processedTopics; +} - // Add "Miscellaneous" category with remaining uncategorized tags - if (tagsWithoutCategory.size > 0) { - tagsByCategory["Miscellaneous"] = Array.from(tagsWithoutCategory).map( - (tag) => ({ - name: tag, - slug: tag, - count: tagCounts[tag] || 0, - }) - ); +function generateAlphabeticalList(processedTopics: Map): TopicsData[] { + const result: TopicsData[] = []; + // The categories property is not needed for this list, so we drop it + for (const { name, slug, count } of processedTopics.values()) { + result.push({ name, slug, count }); } + return result.sort((a, b) => a.name.localeCompare(b.name)); +} - // Sort tags alphabetically within each category - Object.keys(tagsByCategory).forEach((category) => { - tagsByCategory[category].sort((a, b) => a.name.localeCompare(b.name)); +function generateCategorizedList(processedTopics: Map): Record { + const categorizedTopics: Record = {}; + + Array.from(processedTopics.values()).forEach(({ name, slug, count, categories }) => { + categories.forEach(category => { + if (!categorizedTopics[category]) { + categorizedTopics[category] = []; + } + + // Check if topic name contains category name and ends with "(Miscellaneous)" + const modifiedName = name.includes(category) && name.endsWith("(Miscellaneous)") + ? "Miscellaneous" + : name; + + categorizedTopics[category].push({ name: modifiedName, slug, count }); + }); + }); + + // Sort topics within each category + Object.values(categorizedTopics).forEach(topics => { + topics.sort((a, b) => { + if (a.name == "Miscellaneous") return 1; + if (b.name == "Miscellaneous") return -1; + return a.name.localeCompare(b.name) + }); }); - fs.writeFileSync("./public/tag-data.json", JSON.stringify(tagsByCategory)); - return { tagsByCategory, tagsWithoutCategory }; + return categorizedTopics; } -function organizeTopics(transcripts: ContentTranscriptType[]) { - const slugTopics: any = {}; - const topicsArray: TopicsData[] = []; +function generateTopicsCounts(transcripts: ContentTranscriptType[]) { + // Get topics + const topics = getTopics(); - transcripts.forEach((transcript) => { - const slugTags = transcript.tags?.map((tag) => ({ - slug: createSlug(tag), - name: tag, - })); + // Build the primary data structure + const processedTopics = buildTopicsMap(transcripts, topics); - slugTags?.forEach(({ slug, name }) => { - if (slugTopics[slug] !== undefined) { - const index = slugTopics[slug]; - topicsArray[index].count += 1; - } else { - const topicsLength = topicsArray.length; - slugTopics[slug] = topicsLength; - topicsArray[topicsLength] = { - slug, - name, - count: 1, - }; - } - }); - }); + // Generate both output formats + const alphabeticalList = generateAlphabeticalList(processedTopics); + const categorizedList = generateCategorizedList(processedTopics); - fs.writeFileSync("./public/topics-data.json", JSON.stringify(topicsArray)); + // Write output files + fs.writeFileSync( + "./public/topics-counts.json", + JSON.stringify(alphabeticalList, null, 2) + ); + + fs.writeFileSync( + "./public/topics-by-category-counts.json", + JSON.stringify(categorizedList, null, 2) + ); } function createSpeakers(transcripts: ContentTranscriptType[]) { @@ -468,13 +458,11 @@ export default makeSource({ "STYLE.md", "twitter_handles.json", ".json", - "2018-08-17-richard-bondi-bitcoin-cli-regtest.es.md", ], onSuccess: async (importData) => { const { allTranscripts, allSources } = await importData(); - organizeTags(allTranscripts); + generateTopicsCounts(allTranscripts); createTypesCount(allTranscripts, allSources); - organizeTopics(allTranscripts); getTranscriptAliases(allTranscripts); createSpeakers(allTranscripts); generateSourcesCount(allTranscripts, allSources); diff --git a/package.json b/package.json index f58ed55..f4c4e17 100644 --- a/package.json +++ b/package.json @@ -4,9 +4,9 @@ "private": true, "scripts": { "dev": "next dev", - "fetch-categories": "node scripts/fetchCategories.js", + "fetch-topics": "node scripts/fetchTopics.js", "submodules:update": "git submodule update --init && git submodule update --remote", - "build": "npm run submodules:update && npm run fetch-categories && next build", + "build": "npm run submodules:update && npm run fetch-topics && next build", "start": "next start", "lint": "next lint" }, diff --git a/scripts/fetchCategories.js b/scripts/fetchTopics.js similarity index 50% rename from scripts/fetchCategories.js rename to scripts/fetchTopics.js index 34edf27..259e9a5 100644 --- a/scripts/fetchCategories.js +++ b/scripts/fetchTopics.js @@ -2,8 +2,8 @@ const fs = require("fs"); const path = require("path"); const https = require("https"); -const url = "https://bitcoinops.org/topics.json"; -const outputPath = path.join(__dirname, "..", "public", "categories.json"); +const url = "https://raw.githubusercontent.com/bitcoinsearch/topics-index/refs/heads/main/topics.json"; +const outputPath = path.join(__dirname, "..", "public", "topics.json"); https .get(url, (res) => { @@ -15,9 +15,9 @@ https res.on("end", () => { fs.writeFileSync(outputPath, data); - console.log("Categories data has been fetched and saved to public folder."); + console.log("Topics data has been fetched and saved to public folder."); }); }) .on("error", (err) => { - console.error("Error fetching categories:", err.message); + console.error("Error fetching topics:", err.message); }); diff --git a/src/app/(explore)/categories/page.tsx b/src/app/(explore)/categories/page.tsx index 7e07944..d9bbd67 100644 --- a/src/app/(explore)/categories/page.tsx +++ b/src/app/(explore)/categories/page.tsx @@ -1,6 +1,6 @@ import React from "react"; import TranscriptContentPage from "@/components/explore/TranscriptContentPage"; -import allCategoriesTopic from "@/public/tag-data.json"; +import allCategoriesTopic from "@/public/topics-by-category-counts.json"; const CategoriesPage = () => { diff --git a/src/app/(explore)/topics/page.tsx b/src/app/(explore)/topics/page.tsx index 3422e15..d807f83 100644 --- a/src/app/(explore)/topics/page.tsx +++ b/src/app/(explore)/topics/page.tsx @@ -1,9 +1,8 @@ import React from "react"; import TranscriptContentPage from "@/components/explore/TranscriptContentPage"; -import allTopics from "@/public/topics-data.json"; +import allTopics from "@/public/topics-counts.json"; const TopicsPage = () => { - return (
diff --git a/src/components/landing-page/explore-transcripts/ExploreTranscripts.tsx b/src/components/landing-page/explore-transcripts/ExploreTranscripts.tsx index 440a328..5eb3f7c 100644 --- a/src/components/landing-page/explore-transcripts/ExploreTranscripts.tsx +++ b/src/components/landing-page/explore-transcripts/ExploreTranscripts.tsx @@ -5,7 +5,7 @@ import Wrapper from "@/components/layout/Wrapper"; import ExploreTranscriptClient from "./ExploreTranscriptClient"; function getTags() { - const filePath = path.join(process.cwd(), "public", "tag-data.json"); + const filePath = path.join(process.cwd(), "public", "topics-by-category-counts.json"); const fileContents = fs.readFileSync(filePath, "utf8"); return JSON.parse(fileContents); }