-
Notifications
You must be signed in to change notification settings - Fork 3
/
Copy pathpaperstack.py
112 lines (93 loc) · 3.85 KB
/
paperstack.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
import argparse
import asyncio
import os
from datetime import datetime
from arxiv_utils import fill_papers_with_arxiv, search_arxiv_as_paper
from notion_utils import (
get_notion_client,
get_papers_from_notion,
write_papers_to_notion,
)
from openai_utils import (
get_focus_label_from_abstract,
get_openai_client,
summarize_abstract_with_openai,
)
from scholar_utils import get_recommended_arxiv_ids_from_semantic_scholar
ARXIV_SEARCH = """\
"adversarial attacks" OR "language model attacks" OR "LLM vulnerabilities" OR \
"AI security" OR "machine learning security" OR "jailbreak" OR "bypassing AI"\
"""
async def main():
parser = argparse.ArgumentParser()
parser.add_argument(
"--notion-token",
type=str,
default=os.environ.get("NOTION_TOKEN"),
help="Notion token",
)
parser.add_argument(
"--database-id",
type=str,
default=os.environ.get("NOTION_DATABASE_ID"),
help="Notion database id",
)
parser.add_argument(
"--openai-token",
type=str,
default=os.environ.get("OPENAI_API_TOKEN"),
help="OpenAI token",
)
parser.add_argument("--arxiv-search-query", type=str, default=ARXIV_SEARCH)
parser.add_argument("--search-arxiv", action="store_true", default=False)
parser.add_argument("--search-semantic-scholar", action="store_true", default=False)
args = parser.parse_args()
print("[+] Paperstack")
notion_client = get_notion_client(args.notion_token)
openai_client = get_openai_client(args.openai_token)
print(f" |- Getting papers from Notion [{args.database_id}]")
papers = await get_papers_from_notion(notion_client, args.database_id)
print(f" |- {len(papers)} existing papers")
for p in papers:
if p.published < datetime.fromisoformat("2024-07-01 00:00:00+00:00"):
p.explored = True
if not all([p.has_arxiv_props() for p in papers]):
print(" |- Filling in missing data from arXiv")
papers = fill_papers_with_arxiv(papers)
if args.search_arxiv:
print(" |- Searching arXiv for new papers")
existing_titles = [paper.title for paper in papers]
for searched_paper in search_arxiv_as_paper(
args.arxiv_search_query, max_results=50
):
if searched_paper.title not in existing_titles:
print(f" |- {searched_paper.title[:50]}...")
papers.append(searched_paper)
if args.search_semantic_scholar:
to_explore = [p for p in papers if not p.explored]
if to_explore:
print(" |- Getting related papers from Semantic Scholar")
recommended_papers = get_recommended_arxiv_ids_from_semantic_scholar(to_explore)
papers.extend(fill_papers_with_arxiv(recommended_papers))
print(f" |- {len(recommended_papers)} new papers")
else:
print(" |- All papers have been explored")
if not all([paper.summary for paper in papers]):
print(" |- Building summaries with OpenAI")
for paper in [p for p in papers if not p.summary and p.abstract]:
print(f" |- {paper.title[:50]}...")
paper.summary = summarize_abstract_with_openai(
openai_client, paper.abstract
)
if not all([paper.focus for paper in papers]):
print(" |- Assigning focus labels with OpenAI")
for paper in [p for p in papers if not p.focus and p.abstract]:
paper.focus = get_focus_label_from_abstract(openai_client, paper.abstract)
print(f" |- {paper.focus}")
to_write = [p for p in papers if p.has_changed()]
if to_write:
print(f" |- Writing {len(to_write)} updates back to Notion")
await write_papers_to_notion(notion_client, args.database_id, to_write)
print("[+] Done!")
if __name__ == "__main__":
asyncio.run(main())