-
Notifications
You must be signed in to change notification settings - Fork 4
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
✨ Migrate to using
cms
caching, filters, and downloads
The original implementation relied heavily on lots of self-managed indexes, but `@jitl/notion-api` provides much of this out-of-the-box. Some speed improvements could be made, but overall, the use of caching may be useful in the future -- as I'd like to allow for dynamic block creation to avoid needing to manually assign templates.
- Loading branch information
Showing
10 changed files
with
542 additions
and
449 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,31 +1,29 @@ | ||
import {removeEmptyRelationOrMultiSelects} from "../../notion"; | ||
import {ArticlesDB} from "../../config"; | ||
import BaseCommand, {BaseArgTypes, BaseFlagTypes} from '../../base'; | ||
import {ArticlesDB} from "../../config" | ||
import BaseCommand, {BaseArgTypes, BaseFlagTypes} from "../../base" | ||
import {archiveEmptyFilters, ArticleCMS, createCMS} from "../../notion-cms" | ||
|
||
export default class ArticlesClean extends BaseCommand { | ||
static summary: string = `Cleans up your Articles Database.` | ||
|
||
static description: string = `1. Removes dangling articles without authors.` | ||
|
||
static args: BaseArgTypes = BaseCommand.args; | ||
static flags: BaseFlagTypes = BaseCommand.flags; | ||
static examples: string[] = BaseCommand.examples; | ||
static args: BaseArgTypes = BaseCommand.args | ||
static flags: BaseFlagTypes = BaseCommand.flags | ||
static examples: string[] = BaseCommand.examples | ||
|
||
public async run(): Promise<void> { | ||
await this.parse(ArticlesClean) | ||
|
||
const articles: ArticlesDB = <ArticlesDB>this.appConfig.databases.articles | ||
const articlesCMS: ArticleCMS = createCMS<ArticlesDB>( | ||
this.config, this.appConfig, this.notion, "articles", | ||
) | ||
|
||
this.log(`Removing Articles with no Authors.`) | ||
await archivePapersWithNoAuthors(this, articles, this.appConfig.authorType) | ||
const noAuthorsFilter = articlesCMS.filter.authors.is_empty(true) | ||
await archiveEmptyFilters(this, articlesCMS, noAuthorsFilter) | ||
this.log() | ||
this.log() | ||
|
||
// TODO implement deduplication | ||
} | ||
} | ||
|
||
const archivePapersWithNoAuthors = async (CLI: BaseCommand, articlesDB: ArticlesDB, propType: string): Promise<void> => { | ||
const {databaseID, authorRef} = articlesDB | ||
await removeEmptyRelationOrMultiSelects(CLI, databaseID, authorRef, propType); | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,58 +1,133 @@ | ||
import _ from "lodash" | ||
import {BibTeXForNotion} from "../../bibtex"; | ||
import {ArticlesDB, AuthorsDB} from "../../config" | ||
import BaseCommand, {BaseArgTypes, BaseFlagTypes} from "../../base" | ||
import { | ||
BibTeXToNotion, | ||
initArticleDB, | ||
prepareBibTeXForNotion | ||
} from "../../models/article" | ||
import {initAuthorDB} from "../../models/author" | ||
import {createEntries, diff, updateEntries} from "../../notion" | ||
import {ArticlesDB, AuthorsDB} from "../../config"; | ||
import BaseCommand, {BaseArgTypes, BaseFlagTypes} from '../../base'; | ||
ArticleCMS, | ||
ArticlePage, | ||
AuthorCMS, | ||
batchEntries, | ||
createCMS, | ||
Relation, | ||
} from "../../notion-cms" | ||
import _ from "lodash" | ||
import {BibTeXToNotion, NotionArticle} from "../../models/article" | ||
import {richTextAsPlainText} from "@jitl/notion-api" | ||
import {performance} from "perf_hooks" | ||
|
||
export default class ArticlesSync extends BaseCommand { | ||
static summary: string = `Syncs your Articles Database with the local BibTeX file.` | ||
|
||
static description: string = `Strictly creates or updates articles based on the ID assigned by Paperpile.` | ||
|
||
static args: BaseArgTypes = BaseCommand.args; | ||
static flags: BaseFlagTypes = BaseCommand.flags; | ||
static examples: string[] = BaseCommand.examples; | ||
static args: BaseArgTypes = BaseCommand.args | ||
static flags: BaseFlagTypes = BaseCommand.flags | ||
static examples: string[] = BaseCommand.examples | ||
|
||
public async run(): Promise<void> { | ||
await this.parse(ArticlesSync) | ||
|
||
const articlesDB: ArticlesDB = <ArticlesDB>this.appConfig.databases.articles | ||
const authorsDB: AuthorsDB = <AuthorsDB>this.appConfig.databases.authors | ||
const articleCMS: ArticleCMS = createCMS<ArticlesDB>(this.config, this.appConfig, this.notion, "articles") | ||
|
||
const {notion: articles} = await initArticleDB(articlesDB.databaseID, this.config.cacheDir) | ||
let authorCMS: AuthorCMS | undefined | ||
if (this.appConfig.hasAuthorDB) { | ||
authorCMS = createCMS<AuthorsDB>(this.config, this.appConfig, this.notion, "authors") | ||
} | ||
|
||
const {authorIndex} = await initAuthorDB(authorsDB.databaseID, this.config.cacheDir) | ||
const Status = this.appConfig.status.states | ||
const parent = { | ||
database_id: this.appConfig.databases.articles.databaseID, | ||
} | ||
|
||
const BibTeX = _.chain(this.BibTeX).reduce( | ||
(obj: BibTeXForNotion, bib: any, key: string) => { | ||
obj[key] = prepareBibTeXForNotion(bib, authorIndex, this.appConfig) | ||
return obj | ||
}, {} | ||
).value() | ||
const toUpdate: { page_id: string, properties: NotionArticle }[] = [] | ||
const toCreate: { parent: typeof parent, properties: NotionArticle }[] = [] | ||
|
||
this.log(`Found ${_.keys(BibTeX).length} articles in BibTeX and ${_.keys(articles).length} on Notion...`) | ||
const {toCreate, toUpdate} = diff(_.keys(BibTeX), _.keys(articles)) | ||
const existingPages = await fetchDB(this.BibTeX, articleCMS) | ||
|
||
let notionCreates = _.map(toCreate, (ID: string) => { | ||
return BibTeX[ID] | ||
}) | ||
while (notionCreates.length > 0) { | ||
notionCreates = await createEntries(this, notionCreates, BibTeXToNotion, articlesDB.databaseID) | ||
let counter: number = 0 | ||
let startTime: number = performance.now(), | ||
endTime: number = performance.now() | ||
for await (const [ID, article] of _.entries(this.BibTeX)) { | ||
article.status = _.isNil(article.status) ? undefined : Status[article.status] | ||
|
||
let {authors} = article | ||
if (authors && this.appConfig.hasAuthorDB && authorCMS) { | ||
authors = await fetchAuthors(authors, authorCMS) | ||
} | ||
article.authors = authors?.filter((a: any) => a) | ||
|
||
const properties: NotionArticle = BibTeXToNotion(this.appConfig, article) | ||
|
||
const page: ArticlePage | undefined = existingPages[ID] | ||
if (page) { | ||
toUpdate.push({page_id: (<ArticlePage>page).content.id, properties}) | ||
} else { | ||
toCreate.push({parent, properties}) | ||
} | ||
|
||
if (counter % 100 == 0) { | ||
endTime = performance.now() | ||
const time: string = `${(endTime - startTime) / 1000 / 60}min` | ||
console.log(`Cumulative time: ~${time}.`) | ||
} | ||
|
||
counter++ | ||
} | ||
|
||
let notionUpdates = _.map(toUpdate, (ID: string) => { | ||
const update = BibTeX[ID] | ||
const {pageID} = articles[ID as string][0].frontmatter | ||
return {pageID, ...update} | ||
await batchEntries(this, toCreate, async (entry: typeof toCreate[0]) => { | ||
await articleCMS.config.notion.pages.create(entry) | ||
}) | ||
while (notionUpdates.length > 0) { | ||
notionUpdates = await updateEntries(this, notionUpdates, BibTeXToNotion) | ||
|
||
await batchEntries(this, toUpdate, async (entry: typeof toUpdate[0]) => { | ||
await articleCMS.config.notion.pages.update(entry) | ||
}) | ||
} | ||
} | ||
|
||
type FetchedArticleDB = { | ||
[name: string]: ArticlePage | ||
} | ||
|
||
const fetchDB = async (BibTeX: any, cms: ArticleCMS): Promise<FetchedArticleDB> => { | ||
const db: FetchedArticleDB = {} | ||
|
||
const chunks = _.chain(BibTeX).keys().chunk(100).value() | ||
let batchId = 1 | ||
|
||
for await (const batch of chunks) { | ||
const filter = cms.filter.or( | ||
...batch.map((id: string) => cms.filter.ID.equals(id)), | ||
) | ||
for await (const page of cms.query({filter})) { | ||
const ID = richTextAsPlainText(page.frontmatter.ID) | ||
db[ID] = page | ||
} | ||
batchId++ | ||
} | ||
|
||
return db | ||
} | ||
|
||
const fetchAuthors = async (authors: string[], cms: AuthorCMS): Promise<Relation["relation"]> => { | ||
const filter = cms.filter.or( | ||
...authors.map((author: string) => cms.filter.or( | ||
cms.filter.name.equals(author), cms.filter.aliases.contains(author), | ||
)), | ||
) | ||
|
||
const sortKeys: number[] = [] | ||
const relations: Relation["relation"] = [] | ||
for await (const author of cms.query({filter: filter})) { | ||
let {content: {id}, frontmatter: {name, aliases}} = author | ||
|
||
name = richTextAsPlainText(name) | ||
aliases = richTextAsPlainText(aliases) | ||
const index: number | undefined = [name, ...aliases.split(";")].map( | ||
(alias: string): number => authors.indexOf(alias.trim()), | ||
).find((n: number): boolean => n > -1) | ||
|
||
if (index !== undefined) { | ||
relations.push({id}) | ||
sortKeys.push(index) | ||
} | ||
} | ||
return sortKeys.map((index: number) => relations[index]) | ||
} |
Oops, something went wrong.