-
Notifications
You must be signed in to change notification settings - Fork 3
/
Copy pathscholar_utils.py
60 lines (46 loc) · 1.56 KB
/
scholar_utils.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
from semanticscholar import SemanticScholar # type: ignore
from tqdm import tqdm # type: ignore
from _types import Paper
client = SemanticScholar()
def get_recommended_arxiv_ids_from_semantic_scholar(
papers: list[Paper], max_results: int = 10, min_year: int = 2018
) -> list[Paper]:
results: list[dict] = []
for paper in tqdm(papers):
if not paper.url:
continue
if not paper.arxiv_id:
continue
try:
results.extend(
client.get_recommended_papers(
f"arXiv:{paper.arxiv_id}", limit=max_results * 2
)
)
paper.explored = True
except Exception as e:
print(f"[!] {e}]")
pass
filtered: list[dict] = []
for result in results:
if "ArXiv" not in result["externalIds"]:
continue
arxiv_id = result["externalIds"]["ArXiv"]
if arxiv_id in [f["externalIds"]["ArXiv"] for f in filtered]:
continue
if result["title"] in [p.title for p in papers]:
continue
if result["year"] < min_year:
continue
filtered.append(result)
# TODO: Sort by something important
recommended_papers: list[Paper] = []
for result in filtered:
recommended_papers.append(
Paper(
title=result["title"],
url=f'https://arxiv.org/abs/{result["externalIds"]["ArXiv"]}',
abstract=result["abstract"],
)
)
return recommended_papers[:max_results]