-
-
Notifications
You must be signed in to change notification settings - Fork 229
/
Copy pathGrapherBaker.tsx
394 lines (356 loc) · 12.3 KB
/
GrapherBaker.tsx
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
import { GrapherPage } from "../site/GrapherPage.js"
import { DataPageV2 } from "../site/DataPageV2.js"
import { renderToHtmlPage } from "../baker/siteRenderers.js"
import {
excludeUndefined,
urlToSlug,
uniq,
keyBy,
compact,
mergeGrapherConfigs,
} from "@ourworldindata/utils"
import fs from "fs-extra"
import * as lodash from "lodash"
import {
BAKED_BASE_URL,
BAKED_GRAPHER_URL,
} from "../settings/serverSettings.js"
import * as db from "../db/db.js"
import { isPathRedirectedToExplorer } from "../explorerAdminServer/ExplorerRedirects.js"
import {
getPostIdFromSlug,
getPostRelatedCharts,
getRelatedArticles,
getRelatedResearchAndWritingForVariables,
} from "../db/model/Post.js"
import {
GrapherInterface,
DimensionProperty,
OwidVariableWithSource,
OwidChartDimensionInterface,
FaqEntryData,
ImageMetadata,
DbPlainChart,
DbRawChartConfig,
DbEnrichedImage,
} from "@ourworldindata/types"
import ProgressBar from "progress"
import {
getMergedGrapherConfigForVariable,
getVariableOfDatapageIfApplicable,
} from "../db/model/Variable.js"
import {
fetchAndParseFaqs,
getDatapageDataV2,
getPrimaryTopic,
resolveFaqsForVariable,
} from "./DatapageHelpers.js"
import { getAllImages } from "../db/model/Image.js"
import { logErrorAndMaybeCaptureInSentry } from "../serverUtils/errorLog.js"
import { deleteOldGraphers, getTagToSlugMap } from "./GrapherBakingUtils.js"
import { knexRaw } from "../db/db.js"
import { getRelatedChartsForVariable } from "../db/model/Chart.js"
import { getAllMultiDimDataPageSlugs } from "../db/model/MultiDimDataPage.js"
import pMap from "p-map"
const renderDatapageIfApplicable = async (
grapher: GrapherInterface,
isPreviewing: boolean,
knex: db.KnexReadonlyTransaction,
imageMetadataDictionary?: Record<string, DbEnrichedImage>
) => {
const variable = await getVariableOfDatapageIfApplicable(grapher)
if (!variable) return undefined
// When baking from `bakeSingleGrapherChart`, we cache imageMetadata to avoid fetching every image for every chart
// But when rendering a datapage from the mockSiteRouter we want to be able to fetch imageMetadata on the fly
// And this function is the point in the two paths where it makes sense to do so
if (!imageMetadataDictionary) {
imageMetadataDictionary = await getAllImages(knex).then((images) =>
keyBy(images, "filename")
)
}
return await renderDataPageV2(
{
variableId: variable.id,
variableMetadata: variable.metadata,
isPreviewing: isPreviewing,
useIndicatorGrapherConfigs: false,
pageGrapher: grapher,
imageMetadataDictionary,
},
knex
)
}
/**
*
* Render a datapage if available, otherwise render a grapher page.
*/
export const renderDataPageOrGrapherPage = async (
grapher: GrapherInterface,
knex: db.KnexReadonlyTransaction,
imageMetadataDictionary?: Record<string, DbEnrichedImage>
): Promise<string> => {
const datapage = await renderDatapageIfApplicable(
grapher,
false,
knex,
imageMetadataDictionary
)
if (datapage) return datapage
return renderGrapherPage(grapher, knex)
}
export async function renderDataPageV2(
{
variableId,
variableMetadata,
isPreviewing,
useIndicatorGrapherConfigs,
pageGrapher,
imageMetadataDictionary = {},
}: {
variableId: number
variableMetadata: OwidVariableWithSource
isPreviewing: boolean
useIndicatorGrapherConfigs: boolean
pageGrapher?: GrapherInterface
imageMetadataDictionary?: Record<string, ImageMetadata>
},
knex: db.KnexReadonlyTransaction
) {
const grapherConfigForVariable = await getMergedGrapherConfigForVariable(
knex,
variableId
)
// Only merge the grapher config on the indicator if the caller tells us to do so -
// this is true for preview pages for datapages on the indicator level but false
// if we are on Grapher pages. Once we have a good way in the grapher admin for how
// to use indicator level defaults, we should reconsider how this works here.
const grapher = useIndicatorGrapherConfigs
? mergeGrapherConfigs(grapherConfigForVariable ?? {}, pageGrapher ?? {})
: (pageGrapher ?? {})
const faqDocIds = compact(
uniq(variableMetadata.presentation?.faqs?.map((faq) => faq.gdocId))
)
const faqGdocs = await fetchAndParseFaqs(knex, faqDocIds, { isPreviewing })
const { resolvedFaqs, errors: faqResolveErrors } = resolveFaqsForVariable(
faqGdocs,
variableMetadata
)
if (faqResolveErrors.length > 0) {
for (const error of faqResolveErrors) {
await logErrorAndMaybeCaptureInSentry(
new Error(
`Data page error in finding FAQs for variable ${variableId}: ${error.error}`
)
)
}
}
const faqEntries: FaqEntryData = {
faqs: resolvedFaqs?.flatMap((faq) => faq.enrichedFaq.content) ?? [],
}
// If we are rendering this in the context of an indicator page preview or similar,
// then the chart config might be entirely empty. Make sure that dimensions is
// set to the variableId as a Y variable in theses cases.
if (
!grapher.dimensions ||
(grapher.dimensions as OwidChartDimensionInterface[]).length === 0
) {
const dimensions: OwidChartDimensionInterface[] = [
{
variableId: variableId,
property: DimensionProperty.y,
display: variableMetadata.display,
},
]
grapher.dimensions = dimensions
}
const datapageData = await getDatapageDataV2(
variableMetadata,
grapher ?? {}
)
const firstTopicTag = datapageData.topicTagsLinks?.[0]
datapageData.primaryTopic = await getPrimaryTopic(knex, firstTopicTag)
// Get the charts this variable is being used in (aka "related charts")
// and exclude the current chart to avoid duplicates
datapageData.allCharts = await getRelatedChartsForVariable(
knex,
variableId,
grapher && "id" in grapher ? [grapher.id as number] : []
)
datapageData.relatedResearch =
await getRelatedResearchAndWritingForVariables(knex, [variableId])
const relatedResearchFilenames = datapageData.relatedResearch
.map((r) => r.imageUrl)
.filter((f): f is string => !!f)
const imageMetadata = lodash.pick(
imageMetadataDictionary,
uniq(relatedResearchFilenames)
)
const tagToSlugMap = await getTagToSlugMap(knex)
return renderToHtmlPage(
<DataPageV2
grapher={grapher}
datapageData={datapageData}
baseUrl={BAKED_BASE_URL}
baseGrapherUrl={BAKED_GRAPHER_URL}
isPreviewing={isPreviewing}
imageMetadata={imageMetadata}
faqEntries={faqEntries}
tagToSlugMap={tagToSlugMap}
/>
)
}
/**
*
* Similar to renderDataPageOrGrapherPage(), but for admin previews
*/
export const renderPreviewDataPageOrGrapherPage = async (
grapher: GrapherInterface,
knex: db.KnexReadonlyTransaction
) => {
const datapage = await renderDatapageIfApplicable(grapher, true, knex)
if (datapage) return datapage
return renderGrapherPage(grapher, knex)
}
const renderGrapherPage = async (
grapher: GrapherInterface,
knex: db.KnexReadonlyTransaction
) => {
const postSlug = urlToSlug(grapher.originUrl || "") as string | undefined
// TODO: update this to use gdocs posts
const postId = postSlug
? await getPostIdFromSlug(knex, postSlug)
: undefined
const relatedCharts = postId
? await getPostRelatedCharts(knex, postId)
: undefined
const relatedArticles = grapher.id
? await getRelatedArticles(knex, grapher.id)
: undefined
return renderToHtmlPage(
<GrapherPage
grapher={grapher}
relatedCharts={relatedCharts}
relatedArticles={relatedArticles}
baseUrl={BAKED_BASE_URL}
baseGrapherUrl={BAKED_GRAPHER_URL}
/>
)
}
const bakeGrapherPage = async (
bakedSiteDir: string,
imageMetadataDictionary: Record<string, DbEnrichedImage>,
grapher: GrapherInterface,
knex: db.KnexReadonlyTransaction
) => {
// Need to set up the connection for using TypeORM in
// renderDataPageOrGrapherPage() when baking using multiple worker threads
// (MAX_NUM_BAKE_PROCESSES > 1). It could be done in
// renderDataPageOrGrapherPage() too, but given that this render function is also used
// for rendering a datapage preview in the admin where worker threads are
// not used, lifting the connection set up here seems more appropriate.
// Always bake the html for every chart; it's cheap to do so
const outPath = `${bakedSiteDir}/grapher/${grapher.slug}.html`
await fs.writeFile(
outPath,
await renderDataPageOrGrapherPage(
grapher,
knex,
imageMetadataDictionary
)
)
}
export interface BakeSingleGrapherChartArguments {
id: number
config: string
bakedSiteDir: string
slug: string
imageMetadataDictionary: Record<string, DbEnrichedImage>
}
export const bakeSingleGrapherChart = async (
args: BakeSingleGrapherChartArguments,
knex: db.KnexReadonlyTransaction
) => {
const grapher: GrapherInterface = JSON.parse(args.config)
grapher.id = args.id
// Avoid baking paths that have an Explorer redirect.
// Redirects take precedence.
if (isPathRedirectedToExplorer(`/grapher/${grapher.slug}`)) {
console.log(`⏩ ${grapher.slug} redirects to explorer`)
return
}
await bakeGrapherPage(
args.bakedSiteDir,
args.imageMetadataDictionary,
grapher,
knex
)
return args
}
export const bakeAllChangedGrapherPagesAndDeleteRemovedGraphers = async (
bakedSiteDir: string,
knex: db.KnexReadonlyTransaction
) => {
const chartsToBake = await knexRaw<
Pick<DbPlainChart, "id"> & {
config: DbRawChartConfig["full"]
slug: string
}
>(
knex,
`-- sql
SELECT
c.id,
cc.full as config,
cc.slug
FROM charts c
JOIN chart_configs cc ON c.configId = cc.id
WHERE JSON_EXTRACT(cc.full, "$.isPublished")=true
ORDER BY cc.slug ASC`
)
await fs.mkdirp(bakedSiteDir + "/grapher")
// Prefetch imageMetadata instead of each grapher page fetching
// individually. imageMetadata is used by the google docs powering rich
// text (including images) in data pages.
const imageMetadataDictionary = await getAllImages(knex).then((images) =>
keyBy(images, "filename")
)
const jobs: BakeSingleGrapherChartArguments[] = chartsToBake.map((row) => ({
id: row.id,
config: row.config,
bakedSiteDir: bakedSiteDir,
slug: row.slug,
imageMetadataDictionary,
}))
const progressBar = new ProgressBar(
"bake grapher page [:bar] :current/:total :elapseds :rate/s :name\n",
{
width: 20,
total: chartsToBake.length + 1,
renderThrottle: 0,
}
)
await pMap(
jobs,
async (job) => {
// We want to run this code on multiple threads, so we need to
// be able to use multiple transactions so that we can use
// multiple connections to the database.
// Read-write consistency is not a concern here, thankfully.
await db.knexReadWriteTransaction(
async (knex) => await bakeSingleGrapherChart(job, knex),
db.TransactionCloseMode.KeepOpen
)
progressBar.tick({ name: job.slug })
},
{ concurrency: 10 }
)
// Multi-dim data pages are baked into the same directory as graphers
// and they are handled separately.
const multiDimSlugs = await getAllMultiDimDataPageSlugs(knex)
const newSlugs = excludeUndefined([
...chartsToBake.map((row) => row.slug),
...multiDimSlugs,
])
await deleteOldGraphers(bakedSiteDir, newSlugs)
progressBar.tick({ name: `✅ Deleted old graphers` })
}