Skip to content

Commit

Permalink
🔨 add tooling to get pageview data into local mysql
Browse files Browse the repository at this point in the history
  • Loading branch information
danyx23 committed Oct 13, 2023
1 parent daff095 commit 67d90b4
Show file tree
Hide file tree
Showing 3 changed files with 66 additions and 13 deletions.
31 changes: 18 additions & 13 deletions Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -20,23 +20,24 @@ help:
@echo 'Available commands:'
@echo
@echo ' GRAPHER ONLY'
@echo ' make up start dev environment via docker-compose and tmux'
@echo ' make down stop any services still running'
@echo ' make refresh (while up) download a new grapher snapshot and update MySQL'
@echo ' make migrate (while up) run any outstanding db migrations'
@echo ' make test run full suite (except db tests) of CI checks including unit tests'
@echo ' make dbtest run db test suite that needs a running mysql db'
@echo ' make svgtest compare current rendering against reference SVGs'
@echo ' make up start dev environment via docker-compose and tmux'
@echo ' make down stop any services still running'
@echo ' make refresh (while up) download a new grapher snapshot and update MySQL'
@echo ' make refresh.pageviews (while up) download and load pageviews from the private datasette instance'
@echo ' make migrate (while up) run any outstanding db migrations'
@echo ' make test run full suite (except db tests) of CI checks including unit tests'
@echo ' make dbtest run db test suite that needs a running mysql db'
@echo ' make svgtest compare current rendering against reference SVGs'
@echo
@echo ' GRAPHER + WORDPRESS (staff-only)'
@echo ' make up.full start dev environment via docker-compose and tmux'
@echo ' make down.full stop any services still running'
@echo ' make refresh.wp download a new wordpress snapshot and update MySQL'
@echo ' make refresh.full do a full MySQL update of both wordpress and grapher'
@echo ' make up.full start dev environment via docker-compose and tmux'
@echo ' make down.full stop any services still running'
@echo ' make refresh.wp download a new wordpress snapshot and update MySQL'
@echo ' make refresh.full do a full MySQL update of both wordpress and grapher'
@echo
@echo ' OPS (staff-only)'
@echo ' make deploy Deploy your local site to production'
@echo ' make stage Deploy your local site to staging'
@echo ' make deploy Deploy your local site to production'
@echo ' make stage Deploy your local site to staging'
@echo

up: export DEBUG = 'knex:query'
Expand Down Expand Up @@ -132,6 +133,10 @@ refresh:
@echo '==> Updating grapher database'
@. ./.env && DATA_FOLDER=tmp-downloads ./devTools/docker/refresh-grapher-data.sh

refresh.pageviews:
@echo '==> Refreshing pageviews'
yarn && yarn buildTsc && yarn refreshPageviews

refresh.wp:
@echo '==> Downloading wordpress data'
./devTools/docker/download-wordpress-mysql.sh
Expand Down
47 changes: 47 additions & 0 deletions db/refreshPageviewsFromDatasette.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,47 @@
// index.ts
import fetch from "node-fetch"
import Papa from "papaparse"
import * as db from "./db.js"

async function downloadAndInsertCSV(): Promise<void> {
const csvUrl = "http://datasette-private/owid/pageviews.csv?_size=max"
const response = await fetch(csvUrl)

if (!response.ok) {
throw new Error(`Failed to fetch CSV: ${response.statusText}`)
}

const csvText = await response.text()
const parsedData = Papa.parse(csvText, {
header: true,
})

if (parsedData.errors.length > 1) {
console.error("Errors while parsing CSV:", parsedData.errors)
return
}

const onlyValidRows = [...parsedData.data].filter(
(row) => Object.keys(row as any).length === 5
) as any[]

console.log("Parsed CSV data:", onlyValidRows.length, "rows")
console.log("Columns:", parsedData.meta.fields)

await db.knexRaw("TRUNCATE TABLE pageviews")

await db.knexInstance().batchInsert("pageviews", onlyValidRows)
console.log("CSV data inserted successfully!")
}

const main = async (): Promise<void> => {
try {
await downloadAndInsertCSV()
} catch (e) {
console.error(e)
} finally {
await db.closeTypeOrmAndKnexConnections()
}
}

main()
1 change: 1 addition & 0 deletions package.json
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@
"fixPrettierChanged": "yarn pretty-quick --pattern \"**/*.{tsx,ts,jsx,js,json,md,html,css,scss,yml}\"",
"runRegionsUpdater": "node --enable-source-maps ./itsJustJavascript/devTools/regionsUpdater/update.js",
"runDbMigrations": "yarn typeorm migration:run -d itsJustJavascript/db/dataSource.js",
"refreshPageviews": "node --enable-source-maps ./itsJustJavascript/db/refreshPageviewsFromDatasette.js",
"revertLastDbMigration": "yarn typeorm migration:revert -d itsJustJavascript/db/dataSource.js",
"runPostUpdateHook": "node --enable-source-maps ./itsJustJavascript/baker/postUpdatedHook.js",
"startAdminServer": "node --enable-source-maps ./itsJustJavascript/adminSiteServer/app.js",
Expand Down

0 comments on commit 67d90b4

Please sign in to comment.