Skip to content

Commit

Permalink
✨ Add color formatting; centralize repeated CRUD
Browse files Browse the repository at this point in the history
Drop `tqdm`. Now colorfully prints status for each row (e.g. created,
updated, etc.). Fixed status extraction error.
  • Loading branch information
jmuchovej committed Aug 24, 2021
1 parent c68af31 commit 157f1d9
Show file tree
Hide file tree
Showing 4 changed files with 192 additions and 91 deletions.
135 changes: 72 additions & 63 deletions paperpile_notion/commands.py
Original file line number Diff line number Diff line change
@@ -1,38 +1,34 @@
import shutil
import traceback
import itertools
from typing import Dict
import shutil
from pathlib import Path

import click
from click import Context

from tqdm import tqdm
import numpy as np
import pandas as pd
from ruamel.yaml import YAML

from click import Context
from notion.client import NotionClient
from notion.collection import CollectionRowBlock
from notion.block import CollectionViewPageBlock

from ruamel.yaml import YAML

from paperpile_notion.preprocessing import (
extract_status,
extract_fields_methods,
)
from paperpile_notion import crud as CRUD
from paperpile_notion.preprocessing import extract_fields_methods, extract_status


@click.group(invoke_without_command=True)
@click.help_option("-h", "--help")
@click.option("-t", "--token", "token", help="Your Notion API token.", envvar=["NOTION_TOKEN_V2", "TOKEN"])
@click.option(
"-t",
"--token",
"token",
help="Your Notion API token.",
envvar=["NOTION_TOKEN_V2", "TOKEN"],
)
@click.pass_context
def cli(ctx: Context, token: str) -> None:
client_kwargs = {}
if not token:
client_kwargs["email"] = click.prompt("Your Notion email addresss")
client_kwargs["password"] = click.prompt("Your Notion password", hide_input=True)
client_kwargs["password"] = click.prompt(
"Your Notion password", hide_input=True
)
else:
client_kwargs["token_v2"] = token
# TODO support integration tokens, BLOCK NotionClient doesn't support them
Expand All @@ -42,9 +38,12 @@ def cli(ctx: Context, token: str) -> None:


@cli.command()
@click.option("-r", "--refs", "references",
@click.option(
"-r",
"--refs",
"references",
required=True,
help="The JSON file exported from Paperpile."
help="The JSON file exported from Paperpile.",
)
@click.pass_context
def update_db(ctx: click.Context, references: str) -> None:
Expand All @@ -54,11 +53,19 @@ def update_db(ctx: click.Context, references: str) -> None:


@cli.command()
@click.option("-r", "--refs", "references",
@click.option(
"-r",
"--refs",
"references",
required=True,
help="The JSON file exported from Paperpile."
help="The JSON file exported from Paperpile.",
)
@click.option(
"--no-authors",
type=bool,
help="Don't update your Author's database.",
default=False,
)
@click.option("--no-authors", type=bool, help="Don't update your Author's database.", default=False)
@click.pass_context
def update_article_db(ctx: click.Context, references: str, no_authors: bool) -> None:
"""Updates your Article's database, optionally syncing/updating your Author's
Expand All @@ -74,45 +81,49 @@ def update_article_db(ctx: click.Context, references: str, no_authors: bool) ->
authorCV = notion.get_block(config["blocks"]["authors"]).collection

assert references.endswith(".json")
df = pd.read_json(references)[[
"_id", "title", "author", "abstract",
"labelsNamed", "foldersNamed",
"journalfull", "journal", "kind",
]]

df[["fields", "methods"]] = pd.DataFrame(df["labelsNamed"].apply(
extract_fields_methods, config=config["fields-methods"]
).tolist())

status_col = config["status"].get("col" , "foldersNamed")
df = pd.read_json(references)[
[
"_id",
"title",
"author",
"abstract",
"labelsNamed",
"foldersNamed",
"journalfull",
"journal",
"kind",
]
]

df[["fields", "methods"]] = pd.DataFrame(
df["labelsNamed"]
.apply(extract_fields_methods, config=config["fields-methods"])
.tolist()
)

status_col = config["status"].get("col", "foldersNamed")
df["status"] = df[status_col].apply(extract_status, config=config["status"])
df["author"] = df["author"].apply(lambda x: x if type(x) == list else [])

print(f"Found {len(df)} papers in {references} and {len(articleCV.get_rows())}")

pbar = tqdm(desc="Updating/Creating Articles", total=len(df))
for idx, row in df.iterrows():
try:
entry = CRUD.article(row, articleCV, authorCV)
except Exception as e:
tqdm.write(row.title)
# tqdm.write(str(traceback.format_exc()))
tqdm.write(str(e))
import pdb; pdb.set_trace()
finally:
pbar.update(1)
df["author"] = df["author"].apply(lambda x: x if isinstance(x, list) else [])

CRUD.dispatch(
df,
fn=CRUD.article,
CVs=[articleCV, authorCV],
desc="Updating/Creating Articles",
)


@cli.command()
@click.option("-r", "--refs", "references",
@click.option(
"-r",
"--refs",
"references",
required=True,
help="The JSON file exported from Paperpile."
help="The JSON file exported from Paperpile.",
)
@click.pass_context
def update_author_db(ctx: click.Context, references: str) -> None:
"""Strictly updates the your Author's database in Notion.
"""
"""Strictly updates the your Author's database in Notion."""
notion = ctx.obj["notion"]
config = ctx.obj["config"]

Expand All @@ -128,16 +139,14 @@ def update_author_db(ctx: click.Context, references: str) -> None:
df = pd.DataFrame(df.tolist())
# df["orcid"] = pd.fillna(df["orcid"], "")

pbar = tqdm(desc="Updating/Creating Authors", total=len(df))
for idx, row in df.iterrows():
try:
entry = CRUD.author(row, authorCV)
except Exception as e:
tqdm.write(row.title)
tqdm.write(str(traceback.format_exc()))
tqdm.write(str(e))
finally:
pbar.update(1)
CRUD.dispatch(
df,
fn=CRUD.author,
CVs=[
authorCV,
],
desc="Updating/Creating Authors",
)


@cli.command("edit-config")
Expand Down
118 changes: 99 additions & 19 deletions paperpile_notion/crud.py
Original file line number Diff line number Diff line change
@@ -1,46 +1,84 @@
import traceback
from typing import Any, Callable, List, Tuple

import click
import emojis
import pandas as pd
from pandas.core.indexes.base import Index
from notion.block import CollectionViewPageBlock as CV


def _update(old: Any, new: Any, states: List[str]) -> Tuple[Any, bool]:
try:
diff = hash(old) != hash(new)
except TypeError:
diff = hash(f"{list(old)}") != hash(f"{list(new)}")
return new, states + ["update" if diff else "skip"]


def _convert(states: List[str], title: str) -> str:
styles = {
"update": click.style("Updated:", fg="blue", bold=True),
"skip": click.style("Skipped:", fg="yellow", bold=True),
"create": click.style("Created:", fg="green", bold=True),
"failed": click.style("Failed:", fg="red", bold=True),
}

all_skips = states[1:].count("skip") == len(states) - 1
states[0] = "skip" if all_skips else states[0]
return [styles[states[0]], click.style(title, underline=True)]


def author(row: pd.Series, authorCV: CV) -> None:
try:
entry = authorCV.get_rows(search=row["_id"])[0]
states = ["update"]
except IndexError:
entry = authorCV.add_row()
states = ["create"]

entry.id = row["_id"]
entry.id, states = _update(entry.id, row["_id"], states)
try:
entry.title = f"{row['first']} {row['last']}"
title = f"{row['first']} {row['last']}"
except KeyError:
entry.title = row["formatted"]
title = row["formatted"]
entry.title, states = _update(entry.title, title, states)

entry.disciplines += [emojis.encode(":lab_coat: Research")]

return entry
return entry, _convert(states, entry.title)


def article(row: pd.Series, articleCV: CV, authorCV: CV = None) -> None:
try:
assert type(row["_id"]) != float, f"{row['title']} doesn't have an ID."
assert not isinstance(row["_id"], float), f"{row['title']} doesn't have an ID."
except AssertionError:
return None
return None, _convert(["failed"], row["title"])

try:
entry = articleCV.get_rows(search=row["_id"])[0]
states = ["update"]
except IndexError:
entry = articleCV.add_row()
states = ["create"]

entry.id = row["_id"]
entry.title = row["title"]
entry.url = f"https://paperpile.com/app/p/{entry.id}"
entry.id, states = _update(entry.id, row["_id"], states)
entry.title, states = _update(entry.title, row["title"], states)
entry.url, states = _update(
entry.url, f"https://paperpile.com/app/p/{entry.id}", states
)

entry.labels = row["labelsNamed"]
entry.folders = row["foldersNamed"]
entry.methods = [m["name"] for m in row["methods"]]
entry.fields = [f["name"] for f in row["fields"]]
entry.labels, states = _update(entry.labels, row["labelsNamed"], states)
entry.folders, states = _update(entry.folders, row["foldersNamed"], states)
entry.methods, states = _update(
entry.methods, [m["name"] for m in row["methods"]], states
)
entry.fields, states = _update(
entry.fields, [f["name"] for f in row["fields"]], states
)

entry.status = emojis.encode(row["status"]["name"])
entry.status, states = _update(
entry.status, emojis.encode(row["status"]["name"]), states
)

if authorCV:
author_ls = []
Expand All @@ -49,8 +87,50 @@ def article(row: pd.Series, articleCV: CV, authorCV: CV = None) -> None:
author_ls += [authorCV.get_rows(search=author_["_id"])[0]]
except IndexError:
author_ls += [author(author_, authorCV)]
entry.authors = author_ls
else:
entry.authors = [i["formatted"] for i in row["author"]]

return entry
author_ls = [i["formatted"] for i in row["author"]]
entry.authors, states = _update(entry.authors, author_ls, states)

return entry, _convert(states, entry.title)


def dispatch(df: pd.DataFrame, fn: Callable, CVs: List, desc: str = "") -> None:
from click import style as s

text = [
"Found",
s(len(df), bold=True),
f"{fn.__name__}s in JSON and",
s(len(CVs[0].get_rows()), bold=True),
"on Notion.",
]
click.echo(" ".join(map(lambda x: click.style(x, fg="magenta"), text)))

for idx, row in df.iterrows():
states = []
try:
entry, states = fn(row, *CVs)
except Exception as e:
f = [
s("Exception:", fg="red", bold=True),
f"{e} raised at",
s(row.title, underline=True),
]
click.echo(" ".join(f))
click.echo(traceback.format_exc())
finally:
click.echo(" " + " ".join(states))

# with click.progressbar(length=len(df), label=desc) as pbar:
# for idx, row in df.iterrows():
# states = []
# try:
# entry, states = fn(row, *CVs)
# except Exception as e:
# tqdm.write(row.title)
# tqdm.write(str(e))
# finally:
# pbar.update(1)
# click.echo(" " + " ".join(states) + " ", nl=False)

click.secho(emojis.encode(":confetti_ball:: Done!"), fg="cyan", blink=True)
13 changes: 6 additions & 7 deletions paperpile_notion/preprocessing.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,7 @@
from os import stat
from typing import List, Dict
from typing import Dict, List

import pandas as pd

from notion.collection import NotionSelect


Expand All @@ -21,32 +20,32 @@ def extract_fields_methods(labels: List, config: Dict) -> List:
try:
field = config["fields"][field]["name"]
except (TypeError, KeyError) as e:
pass
pass

fields.append({"name": field, "color": fcolor})

try:
mcolor = config["methods"][method]["color"]
except (TypeError, KeyError) as e:
mcolor = fcolor

try:
method = config["methods"][method]["name"]
except (TypeError, KeyError) as e:
pass

methods.append({"name": method, "color": mcolor})

return {"fields": fields, "methods": methods}


def extract_status(row: List, config: Dict) -> List:
prefix = config["prefix"]
state = next(filter(lambda x: x.startswith(prefix), row), None)
state = next(filter(lambda x: x.startswith(prefix), row), None)
try:
assert state
status = config["states"][state.replace(prefix, "")]
except (KeyError, AssertionError):
return {"name": ":question: Unknown", "color": "default"}
else:
return {"name": status["name"], "color": status["color"]}
return {"name": status["name"], "color": status.get("color", "default")}
Loading

0 comments on commit 157f1d9

Please sign in to comment.