-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathstreamlit_app.py
135 lines (116 loc) · 4.78 KB
/
streamlit_app.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
import streamlit as st
import pandas as pd
import gzip
import json
import datetime
import os
import re
temp_data_dir = "./data"
def remove_parentheses_content(s):
return re.sub(r"\([^)]*\)", "", s)
def load_latest_date():
list_of_files = os.listdir(temp_data_dir)
file_date_list = []
for file in list_of_files:
if file.endswith(".json"):
date = file.split(".")[0]
file_date_list.append(date)
return max(file_date_list)
if __name__ == "__main__":
date = load_latest_date()
# Load judgement
judgement_file = os.path.join(temp_data_dir, f"{date}.resp.json")
judgement_results = {}
with open(judgement_file) as f:
for line in f:
paper = json.loads(line)
judgement_results[paper["id"]] = paper["judgement"]
# Load paper information
paper_file = os.path.join(temp_data_dir, f"{date}.json.gz")
with gzip.open(paper_file, "rb") as f:
paper_dict = json.loads(f.read().decode("utf-8"))
# Combine the paper info and judgement
for paper_id in paper_dict.keys():
paper_judgement = judgement_results[paper_id]
paper_dict[paper_id]["judgement"] = paper_judgement
paper_dict[paper_id]["title"] = remove_parentheses_content(
paper_dict[paper_id]["title"]
)
relevance_threshold = 0.7
# Find relevant papers
relevant_papers = {}
relevant_paper_ids = set()
for id, paper_info in paper_dict.items():
for topic, relevance in paper_info["judgement"].items():
if (
isinstance(relevance, dict)
and relevance["relevance"] > relevance_threshold
):
relevant_paper_ids.add(id)
temp_obj = {"paper_id": id, "relevance": relevance}
if topic in relevant_papers:
relevant_papers[topic].append(temp_obj)
else:
relevant_papers[topic] = [temp_obj]
st.title(f"Arxiv paper digest on {date}")
relevant_paper_tab, irrelevant_paper_tab = st.tabs(
[
f"Relevant papers ({len(relevant_paper_ids)})",
f"Irrelevant papers ({len(paper_dict) - len(relevant_paper_ids)})",
]
)
with relevant_paper_tab:
for topic, papers in relevant_papers.items():
st.markdown(f"# {topic}")
for paper in papers:
paper_title = paper_dict[paper["paper_id"]]["title"]
paper_url = paper_dict[paper["paper_id"]]["url"]
st.markdown(f"### [{paper_title}]({paper_url})")
st.markdown(
f"{paper['relevance']['relevance']} || {paper['relevance']['reason']}"
)
st.markdown(f"{', '.join(paper_dict[paper['paper_id']]['authors'])}")
with st.expander("Abstract"):
st.markdown(paper_dict[paper["paper_id"]]["abstract"])
with irrelevant_paper_tab:
for id, paper_info in paper_dict.items():
if id not in relevant_paper_ids:
paper_title = paper_info["title"]
paper_url = paper_info["url"]
st.markdown(f"### [{paper_title}]({paper_url})")
for topic, relevance in paper_info["judgement"].items():
if relevance["relevance"] > 0:
st.markdown(
f"{relevance['relevance']} || {topic} || {relevance['reason']}"
)
st.markdown(f"{', '.join(paper_info['authors'])}")
with st.expander("Abstract"):
st.markdown(paper_info["abstract"])
# tab1, tab2 = st.tabs(
# [
# f"Paper Worth Reading ({len(merged_df)})",
# f"Paper Not Worth Reading ({len(other_papers_df)})",
# ]
# )
# for tab, df in zip([tab1, tab2], [merged_df, other_papers_df]):
# with tab:
# for index, col in df.iterrows():
# st.markdown(
# f"### [{remove_parentheses_content(col['title'])}]({col['url']})"
# )
# reasons = col["reason"].split(" || ")
# for reason in reasons:
# st.markdown(reason)
# st.markdown(f"{col['authors']}")
# with st.expander("Abstract"):
# st.markdown(col["abstract"])
# with tab2:
# st.markdown(f"# Paper Not Worth Reading for {date}")
# for index, col in other_papers_df.iterrows():
# st.markdown(f"### {col['title']}")
# st.markdown(f"{col['authors']}")
# st.markdown(col["url"])
# st.markdown(f"**ChatGPT comment**")
# st.markdown(col["reason"])
# st.markdown(f"**Abstract**")
# st.markdown(col["abstract"])