Skip to content

Commit

Permalink
✨ Migrate to using cms caching, filters, and downloads
Browse files Browse the repository at this point in the history
The original implementation relied heavily on lots of self-managed
indexes, but `@jitl/notion-api` provides much of this out-of-the-box.
Some speed improvements could be made, but overall, the use of caching
may be useful in the future -- as I'd like to allow for dynamic block
creation to avoid needing to manually assign templates.
  • Loading branch information
jmuchovej committed Apr 20, 2022
1 parent 2e50c7a commit 5c87df1
Show file tree
Hide file tree
Showing 10 changed files with 542 additions and 449 deletions.
2 changes: 1 addition & 1 deletion package.json
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@
"dependencies": {
"@citation-js/core": "^0.5.4",
"@citation-js/plugin-bibtex": "^0.5.6",
"@jitl/notion-api": "^0.1.2",
"@jitl/notion-api": "0.2.1",
"@notionhq/client": "^1.0.4",
"@oclif/core": "^1.7.0",
"@types/lodash": "^4.14.180",
Expand Down
24 changes: 11 additions & 13 deletions src/commands/articles/clean.ts
Original file line number Diff line number Diff line change
@@ -1,31 +1,29 @@
import {removeEmptyRelationOrMultiSelects} from "../../notion";
import {ArticlesDB} from "../../config";
import BaseCommand, {BaseArgTypes, BaseFlagTypes} from '../../base';
import {ArticlesDB} from "../../config"
import BaseCommand, {BaseArgTypes, BaseFlagTypes} from "../../base"
import {archiveEmptyFilters, ArticleCMS, createCMS} from "../../notion-cms"

export default class ArticlesClean extends BaseCommand {
static summary: string = `Cleans up your Articles Database.`

static description: string = `1. Removes dangling articles without authors.`

static args: BaseArgTypes = BaseCommand.args;
static flags: BaseFlagTypes = BaseCommand.flags;
static examples: string[] = BaseCommand.examples;
static args: BaseArgTypes = BaseCommand.args
static flags: BaseFlagTypes = BaseCommand.flags
static examples: string[] = BaseCommand.examples

public async run(): Promise<void> {
await this.parse(ArticlesClean)

const articles: ArticlesDB = <ArticlesDB>this.appConfig.databases.articles
const articlesCMS: ArticleCMS = createCMS<ArticlesDB>(
this.config, this.appConfig, this.notion, "articles",
)

this.log(`Removing Articles with no Authors.`)
await archivePapersWithNoAuthors(this, articles, this.appConfig.authorType)
const noAuthorsFilter = articlesCMS.filter.authors.is_empty(true)
await archiveEmptyFilters(this, articlesCMS, noAuthorsFilter)
this.log()
this.log()

// TODO implement deduplication
}
}

const archivePapersWithNoAuthors = async (CLI: BaseCommand, articlesDB: ArticlesDB, propType: string): Promise<void> => {
const {databaseID, authorRef} = articlesDB
await removeEmptyRelationOrMultiSelects(CLI, databaseID, authorRef, propType);
}
147 changes: 111 additions & 36 deletions src/commands/articles/sync.ts
Original file line number Diff line number Diff line change
@@ -1,58 +1,133 @@
import _ from "lodash"
import {BibTeXForNotion} from "../../bibtex";
import {ArticlesDB, AuthorsDB} from "../../config"
import BaseCommand, {BaseArgTypes, BaseFlagTypes} from "../../base"
import {
BibTeXToNotion,
initArticleDB,
prepareBibTeXForNotion
} from "../../models/article"
import {initAuthorDB} from "../../models/author"
import {createEntries, diff, updateEntries} from "../../notion"
import {ArticlesDB, AuthorsDB} from "../../config";
import BaseCommand, {BaseArgTypes, BaseFlagTypes} from '../../base';
ArticleCMS,
ArticlePage,
AuthorCMS,
batchEntries,
createCMS,
Relation,
} from "../../notion-cms"
import _ from "lodash"
import {BibTeXToNotion, NotionArticle} from "../../models/article"
import {richTextAsPlainText} from "@jitl/notion-api"
import {performance} from "perf_hooks"

export default class ArticlesSync extends BaseCommand {
static summary: string = `Syncs your Articles Database with the local BibTeX file.`

static description: string = `Strictly creates or updates articles based on the ID assigned by Paperpile.`

static args: BaseArgTypes = BaseCommand.args;
static flags: BaseFlagTypes = BaseCommand.flags;
static examples: string[] = BaseCommand.examples;
static args: BaseArgTypes = BaseCommand.args
static flags: BaseFlagTypes = BaseCommand.flags
static examples: string[] = BaseCommand.examples

public async run(): Promise<void> {
await this.parse(ArticlesSync)

const articlesDB: ArticlesDB = <ArticlesDB>this.appConfig.databases.articles
const authorsDB: AuthorsDB = <AuthorsDB>this.appConfig.databases.authors
const articleCMS: ArticleCMS = createCMS<ArticlesDB>(this.config, this.appConfig, this.notion, "articles")

const {notion: articles} = await initArticleDB(articlesDB.databaseID, this.config.cacheDir)
let authorCMS: AuthorCMS | undefined
if (this.appConfig.hasAuthorDB) {
authorCMS = createCMS<AuthorsDB>(this.config, this.appConfig, this.notion, "authors")
}

const {authorIndex} = await initAuthorDB(authorsDB.databaseID, this.config.cacheDir)
const Status = this.appConfig.status.states
const parent = {
database_id: this.appConfig.databases.articles.databaseID,
}

const BibTeX = _.chain(this.BibTeX).reduce(
(obj: BibTeXForNotion, bib: any, key: string) => {
obj[key] = prepareBibTeXForNotion(bib, authorIndex, this.appConfig)
return obj
}, {}
).value()
const toUpdate: { page_id: string, properties: NotionArticle }[] = []
const toCreate: { parent: typeof parent, properties: NotionArticle }[] = []

this.log(`Found ${_.keys(BibTeX).length} articles in BibTeX and ${_.keys(articles).length} on Notion...`)
const {toCreate, toUpdate} = diff(_.keys(BibTeX), _.keys(articles))
const existingPages = await fetchDB(this.BibTeX, articleCMS)

let notionCreates = _.map(toCreate, (ID: string) => {
return BibTeX[ID]
})
while (notionCreates.length > 0) {
notionCreates = await createEntries(this, notionCreates, BibTeXToNotion, articlesDB.databaseID)
let counter: number = 0
let startTime: number = performance.now(),
endTime: number = performance.now()
for await (const [ID, article] of _.entries(this.BibTeX)) {
article.status = _.isNil(article.status) ? undefined : Status[article.status]

let {authors} = article
if (authors && this.appConfig.hasAuthorDB && authorCMS) {
authors = await fetchAuthors(authors, authorCMS)
}
article.authors = authors?.filter((a: any) => a)

const properties: NotionArticle = BibTeXToNotion(this.appConfig, article)

const page: ArticlePage | undefined = existingPages[ID]
if (page) {
toUpdate.push({page_id: (<ArticlePage>page).content.id, properties})
} else {
toCreate.push({parent, properties})
}

if (counter % 100 == 0) {
endTime = performance.now()
const time: string = `${(endTime - startTime) / 1000 / 60}min`
console.log(`Cumulative time: ~${time}.`)
}

counter++
}

let notionUpdates = _.map(toUpdate, (ID: string) => {
const update = BibTeX[ID]
const {pageID} = articles[ID as string][0].frontmatter
return {pageID, ...update}
await batchEntries(this, toCreate, async (entry: typeof toCreate[0]) => {
await articleCMS.config.notion.pages.create(entry)
})
while (notionUpdates.length > 0) {
notionUpdates = await updateEntries(this, notionUpdates, BibTeXToNotion)

await batchEntries(this, toUpdate, async (entry: typeof toUpdate[0]) => {
await articleCMS.config.notion.pages.update(entry)
})
}
}

type FetchedArticleDB = {
[name: string]: ArticlePage
}

const fetchDB = async (BibTeX: any, cms: ArticleCMS): Promise<FetchedArticleDB> => {
const db: FetchedArticleDB = {}

const chunks = _.chain(BibTeX).keys().chunk(100).value()
let batchId = 1

for await (const batch of chunks) {
const filter = cms.filter.or(
...batch.map((id: string) => cms.filter.ID.equals(id)),
)
for await (const page of cms.query({filter})) {
const ID = richTextAsPlainText(page.frontmatter.ID)
db[ID] = page
}
batchId++
}

return db
}

const fetchAuthors = async (authors: string[], cms: AuthorCMS): Promise<Relation["relation"]> => {
const filter = cms.filter.or(
...authors.map((author: string) => cms.filter.or(
cms.filter.name.equals(author), cms.filter.aliases.contains(author),
)),
)

const sortKeys: number[] = []
const relations: Relation["relation"] = []
for await (const author of cms.query({filter: filter})) {
let {content: {id}, frontmatter: {name, aliases}} = author

name = richTextAsPlainText(name)
aliases = richTextAsPlainText(aliases)
const index: number | undefined = [name, ...aliases.split(";")].map(
(alias: string): number => authors.indexOf(alias.trim()),
).find((n: number): boolean => n > -1)

if (index !== undefined) {
relations.push({id})
sortKeys.push(index)
}
}
return sortKeys.map((index: number) => relations[index])
}
Loading

0 comments on commit 5c87df1

Please sign in to comment.