Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fix multidimensional data pages baking logic #4478

Merged
merged 1 commit into from
Jan 29, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
41 changes: 12 additions & 29 deletions baker/GrapherBaker.tsx
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,6 @@ import { renderToHtmlPage } from "../baker/siteRenderers.js"
import {
excludeUndefined,
urlToSlug,
without,
uniq,
keyBy,
compact,
Expand All @@ -17,7 +16,6 @@ import {
BAKED_GRAPHER_URL,
} from "../settings/serverSettings.js"
import * as db from "../db/db.js"
import { glob } from "glob"
import { isPathRedirectedToExplorer } from "../explorerAdminServer/ExplorerRedirects.js"
import {
getPostIdFromSlug,
Expand Down Expand Up @@ -50,9 +48,10 @@ import {
import { getAllImages } from "../db/model/Image.js"
import { logErrorAndMaybeCaptureInSentry } from "../serverUtils/errorLog.js"

import { getTagToSlugMap } from "./GrapherBakingUtils.js"
import { deleteOldGraphers, getTagToSlugMap } from "./GrapherBakingUtils.js"
import { knexRaw } from "../db/db.js"
import { getRelatedChartsForVariable } from "../db/model/Chart.js"
import { getAllMultiDimDataPageSlugs } from "../db/model/MultiDimDataPage.js"
import pMap from "p-map"

const renderDatapageIfApplicable = async (
Expand Down Expand Up @@ -286,28 +285,6 @@ const bakeGrapherPage = async (
imageMetadataDictionary
)
)
console.log(outPath)
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

We output the slug in the progress bar already, this doesn't add anything useful on top of that.

}

const deleteOldGraphers = async (bakedSiteDir: string, newSlugs: string[]) => {
// Delete any that are missing from the database
const oldSlugs = glob
.sync(`${bakedSiteDir}/grapher/*.html`)
.map((slug) =>
slug.replace(`${bakedSiteDir}/grapher/`, "").replace(".html", "")
)
const toRemove = without(oldSlugs, ...newSlugs)
// do not delete grapher slugs redirected to explorers
.filter((slug) => !isPathRedirectedToExplorer(`/grapher/${slug}`))
for (const slug of toRemove) {
const path = `${bakedSiteDir}/grapher/${slug}.html`
console.log(`DELETING ${path}`)
fs.unlink(path, (err) =>
err
? console.error(`Error deleting ${path}`, err)
: console.log(`Deleted ${path}`)
)
}
}

export interface BakeSingleGrapherChartArguments {
Expand Down Expand Up @@ -363,7 +340,6 @@ export const bakeAllChangedGrapherPagesAndDeleteRemovedGraphers = async (
ORDER BY cc.slug ASC`
)

const newSlugs = chartsToBake.map((row) => row.slug)
await fs.mkdirp(bakedSiteDir + "/grapher")

// Prefetch imageMetadata instead of each grapher page fetching
Expand All @@ -382,7 +358,7 @@ export const bakeAllChangedGrapherPagesAndDeleteRemovedGraphers = async (
}))

const progressBar = new ProgressBar(
"bake grapher page [:bar] :current/:total :elapseds :rate/s :etas :name\n",
"bake grapher page [:bar] :current/:total :elapseds :rate/s :name\n",
{
width: 20,
total: chartsToBake.length + 1,
Expand All @@ -401,11 +377,18 @@ export const bakeAllChangedGrapherPagesAndDeleteRemovedGraphers = async (
async (knex) => await bakeSingleGrapherChart(job, knex),
db.TransactionCloseMode.KeepOpen
)
progressBar.tick({ name: `slug ${job.slug}` })
progressBar.tick({ name: job.slug })
},
{ concurrency: 10 }
)

await deleteOldGraphers(bakedSiteDir, excludeUndefined(newSlugs))
// Multi-dim data pages are baked into the same directory as graphers
// and they are handled separately.
const multiDimSlugs = await getAllMultiDimDataPageSlugs(knex)
const newSlugs = excludeUndefined([
...chartsToBake.map((row) => row.slug),
...multiDimSlugs,
])
await deleteOldGraphers(bakedSiteDir, newSlugs)
progressBar.tick({ name: `✅ Deleted old graphers` })
}
27 changes: 26 additions & 1 deletion baker/GrapherBakingUtils.ts
Original file line number Diff line number Diff line change
@@ -1,8 +1,11 @@
import fs from "fs-extra"
import { glob } from "glob"
import * as lodash from "lodash"

import * as db from "../db/db.js"
import md5 from "md5"
import { DbPlainTag, Url } from "@ourworldindata/utils"
import { DbPlainTag, Url, without } from "@ourworldindata/utils"
import { isPathRedirectedToExplorer } from "../explorerAdminServer/ExplorerRedirects.js"

// Splits a grapher URL like https://ourworldindata.org/grapher/soil-lifespans?tab=chart
// into its slug (soil-lifespans) and queryStr (?tab=chart)
Expand Down Expand Up @@ -74,3 +77,25 @@ export async function getSlugForTopicTag(

return slug
}

export async function deleteOldGraphers(
bakedSiteDir: string,
newSlugs: string[]
) {
// Delete any that are missing from the database
const oldSlugs = glob
.sync(`${bakedSiteDir}/grapher/*.html`)
.map((slug) =>
slug.replace(`${bakedSiteDir}/grapher/`, "").replace(".html", "")
)
const toRemove = without(oldSlugs, ...newSlugs)
// do not delete grapher slugs redirected to explorers
.filter((slug) => !isPathRedirectedToExplorer(`/grapher/${slug}`))
for (const slug of toRemove) {
const path = `${bakedSiteDir}/grapher/${slug}.html`
console.log(`DELETING ${path}`)
fs.unlink(path, (err) => {
if (err) console.error(`Error deleting ${path}`, err)
})
}
}
18 changes: 17 additions & 1 deletion baker/MultiDimBaker.tsx
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
import fs from "fs-extra"
import path from "path"
import ProgressBar from "progress"
import {
ImageMetadata,
MultiDimDataPageConfigPreProcessed,
Expand All @@ -23,7 +24,7 @@ import {
BAKED_BASE_URL,
BAKED_GRAPHER_URL,
} from "../settings/serverSettings.js"
import { getTagToSlugMap } from "./GrapherBakingUtils.js"
import { deleteOldGraphers, getTagToSlugMap } from "./GrapherBakingUtils.js"
import { getVariableMetadata } from "../db/model/Variable.js"
import pMap from "p-map"
import {
Expand All @@ -32,6 +33,7 @@ import {
resolveFaqsForVariable,
} from "./DatapageHelpers.js"
import { logErrorAndMaybeCaptureInSentry } from "../serverUtils/errorLog.js"
import { getAllPublishedChartSlugs } from "../db/model/Chart.js"
import {
getAllMultiDimDataPages,
getMultiDimDataPageBySlug,
Expand Down Expand Up @@ -225,6 +227,14 @@ export const bakeAllMultiDimDataPages = async (
imageMetadata: Record<string, ImageMetadata>
) => {
const multiDimsBySlug = await getAllMultiDimDataPages(knex)
const progressBar = new ProgressBar(
"bake multi-dim page [:bar] :current/:total :elapseds :rate/s :name\n",
{
width: 20,
total: multiDimsBySlug.size + 1,
renderThrottle: 0,
}
)
for (const [slug, row] of multiDimsBySlug.entries()) {
await bakeMultiDimDataPage(
knex,
Expand All @@ -233,5 +243,11 @@ export const bakeAllMultiDimDataPages = async (
row.config,
imageMetadata
)
progressBar.tick({ name: slug })
}
const publishedSlugs = multiDimsBySlug.keys()
const chartSlugs = await getAllPublishedChartSlugs(knex)
const newSlugs = [...publishedSlugs, ...chartSlugs]
await deleteOldGraphers(bakedSiteDir, newSlugs)
progressBar.tick({ name: `✅ Deleted old multi-dim pages` })
}
17 changes: 17 additions & 0 deletions db/model/Chart.ts
Original file line number Diff line number Diff line change
Expand Up @@ -616,3 +616,20 @@ export const getRedirectsByChartId = async (
ORDER BY id ASC`,
[chartId]
)

export async function getAllPublishedChartSlugs(
knex: db.KnexReadonlyTransaction
): Promise<string[]> {
const rows = await db.knexRaw<{
slug: string
}>(
knex,
`-- sql
SELECT cc.slug
FROM charts c
JOIN chart_configs cc ON c.configId = cc.id
WHERE cc.full->>"$.isPublished" = "true"
`
)
return rows.map((row) => row.slug)
}
9 changes: 9 additions & 0 deletions db/model/MultiDimDataPage.ts
Original file line number Diff line number Diff line change
Expand Up @@ -84,6 +84,15 @@ export async function getAllLinkedPublishedMultiDimDataPages(
return rows.map(enrichRow)
}

export async function getAllMultiDimDataPageSlugs(
knex: KnexReadonlyTransaction
): Promise<string[]> {
const rows = await knex<DbPlainMultiDimDataPage>(
MultiDimDataPagesTableName
).select("slug")
return rows.map((row) => row.slug)
}

export const getMultiDimDataPageBySlug = async (
knex: KnexReadonlyTransaction,
slug: string,
Expand Down
18 changes: 17 additions & 1 deletion devTools/tsconfigs/tsconfig.base.json
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,23 @@
"declaration": true,
"declarationMap": true,

"lib": ["dom", "dom.iterable", "es2020", "es2021", "es2022", "es2023"],
// To get newer APIs like Set.prototype.intersection(), we need to use
// ESNext lib on Node 22 and current version of TypeScript (5.7.2).
// However, using the ESNext option doesn't work with
// @types/[email protected], which we are currently pinned to, to fix an
// unrelated bug with types in Cloudflare Functions, thus we only use
// ESNext.Collection.
// https://github.com/microsoft/TypeScript/issues/59919
// https://developers.cloudflare.com/workers/languages/typescript/#transitive-loading-of-typesnode-overrides-cloudflareworkers-types
"lib": [
"dom",
"dom.iterable",
"es2020",
"es2021",
"es2022",
"es2023",
"ESNext.Collection"
],
// Using es2022 as a `target` caused the following error in wrangler:
// "Uncaught TypeError: PointVector is not a constructor".
// This seems to be related to a change in how classes are compiled in
Expand Down
Loading