From 19aaa4ec5905251dbeb65d31da2ba505b8a48939 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?GO=CE=A0ZO?= <37483725+GONZOsint@users.noreply.github.com> Date: Tue, 9 Apr 2024 15:30:40 +0200 Subject: [PATCH] Add files via upload --- feat.py | 471 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 471 insertions(+) create mode 100644 feat.py diff --git a/feat.py b/feat.py new file mode 100644 index 0000000..b917e8c --- /dev/null +++ b/feat.py @@ -0,0 +1,471 @@ +import os +from factcheckexplorer import FactCheckLib +from dash import Dash, dcc, html, Input, Output, callback, State, dash_table +import dash_bootstrap_components as dbc +import plotly.express as px +import pandas as pd +import ast +import time +import dash_cytoscape as cyto + +app = Dash(__name__, external_stylesheets=[dbc.themes.SANDSTONE]) + +layout = { + 'name': 'cose', + 'idealEdgeLength': 350, + 'nodeOverlap': 10, + 'refresh': 20, + 'fit': True, + 'padding': 30, + 'randomize': False, + 'componentSpacing': 100, + 'nodeRepulsion': 800000, + 'edgeElasticity': 100, + 'nestingFactor': 20, +} + +stylesheet = [ + {'selector': 'node', + 'style': {'content': 'data(label)', 'text-valign': 'center', 'text-halign': 'center', 'font-size': '10px', + 'font-family': 'Helvetica'}}, + {'selector': 'node.source', + 'style': {'background-color': '#636efa', 'color': '#000000', 'width': '50px', 'height': '50px', + 'border-color': '#4e57c6', 'border-width': 2, 'shape': 'ellipse'}}, + {'selector': 'node.tag', + 'style': {'background-color': '#ef553b', 'color': '#000000', 'width': '40px', 'height': '40px', + 'border-color': '#bc422e', 'border-width': 2, 'shape': 'ellipse'}}, + {'selector': 'edge', + 'style': {'curve-style': 'bezier', 'width': 2, 'line-color': '#ABB2B9', 'target-arrow-color': '#ABB2B9', + 'target-arrow-shape': 'triangle'}}, + {'selector': 'core', 'style': {'background-color': '#F8F9F9', 'font-family': 'Helvetica'}} +] + +global df, csv_filename + +def create_info_card(title, icon_class, body_id): + return dbc.Card( + [ + dbc.CardHeader(html.Span([html.I(className=icon_class), " ", title]), className="fw-bold"), + dbc.CardBody(id=body_id, className="text-center", style={'font-size': '20px', 'font-weight': 'bold'}) + ], className="h-100 shadow-sm" + ) + +@app.callback( + [Output(f"collapse-{chart_id}", "is_open") for chart_id in [ + "verdict-chart", "tags-chart", "claims-timeline", "sources-bar-chart", "network-graph"]], + [Input(f"toggle-{chart_id}", "value") for chart_id in [ + "verdict-chart", "tags-chart", "claims-timeline", "sources-bar-chart", "network-graph"]] +) +def toggle_collapse(*values): + return [1 in value for value in values] + +@app.callback(Output("download-csv", "data"), Input("btn-download-csv", "n_clicks"), prevent_initial_call=True) +def generate_csv(n_clicks): + if n_clicks and 'df' in globals() and not df.empty: + return dcc.send_data_frame(df.to_csv, filename=csv_filename) + + +@app.callback( + [ + Output("verdict-pie-chart", "figure"), + Output("tags-bar-chart", "figure"), + Output("claims-timeline", "figure"), + Output("sources-bar-chart", "figure"), + Output("panel-search-query", "children"), + Output("panel-num-results", "children"), + Output("panel-unique-sources", "children"), + Output("panel-unique-tags", "children"), + Output("factcheck-table", "columns"), + Output("factcheck-table", "data"), + Output("network-graph", "elements") + ], + [ + Input("search-button", "n_clicks"), + State("query-input", "value"), + State("language-input", "value"), + State("num-results-input", "value"), + State("graph-checkbox", "value") + ], + prevent_initial_call=True +) +def update_charts(n_clicks, query, language, num_results, graph_checkbox): + global df, csv_filename + if n_clicks < 1 or not query: + empty_fig = px.scatter(title="Waiting for data...") + # Return empty_fig for each graph and suitable placeholders for text outputs + return empty_fig, empty_fig, empty_fig, empty_fig, "N/A", "0 Results", "0 Unique Sources", "0 Unique Tags" + + df = pd.DataFrame() + + try: + csv_filename = f"{query.replace(' ', '_').lower() + '_' + str(time.time()).replace('.', '')}.csv" + fact_check_lib = FactCheckLib(query=query, language=language or 'all', num_results=num_results or 100, + csv_filename=csv_filename) + fact_check_lib.process() + df = pd.read_csv(csv_filename, encoding='utf-8') + except Exception as e: + print(f"Error processing FactCheckLib: {e}") + error_fig = px.scatter(title="Error fetching data") + return error_fig, error_fig, error_fig, error_fig, query, "Error", "Error", "Error" + + df['Tags'] = df['Tags'].apply(lambda x: ast.literal_eval(x) if isinstance(x, str) else x) + tags_df = df.explode('Tags') + + def normalize_text(text): + text = text.rstrip('.') + text = text.lower() + return text + + df['Verdict'] = df['Verdict'].apply(normalize_text) \ + .str.replace(r"falso", r"false", regex=False) \ + .str.replace(r"fake", r"false", regex=False) \ + .str.replace(r"falsa", r"false", regex=False) \ + .str.replace(r"verdadero", r"true", regex=False) \ + .str.replace(r"c'est faux", r"false", regex=False) \ + .str.replace(r"doğru", r"true", regex=False) \ + .str.replace(r"dogru", r"true", regex=False) \ + .str.replace(r"doğruluk payı vardır", r"half true", regex=False) \ + .str.replace(r"errado", r"false", regex=False) \ + .str.replace(r"মিথ্যা", r"false", regex=False) \ + .str.replace(r"অসত্য", r"false", regex=False) \ + .str.replace(r"fals", r"false", regex=False) \ + .str.replace(r"falsch", r"false", regex=False) \ + .str.replace(r"false content/false", r"false", regex=False) \ + .str.replace(r"false context/false", r"false", regex=False) \ + .str.replace(r"falso!", r"false", regex=False) \ + .str.replace(r"faux", r"false", regex=False) \ + .str.replace(r"mostly true", r"half true", regex=False) \ + .str.replace(r"partialmente falso", r"mostly false", regex=False) \ + .str.replace(r"misleading/partly false", r"mostly false", regex=False) \ + .str.replace(r"Çok YanlÄ±ÅŞ", r"false", regex=False) \ + .str.replace(r"incorrect", r"false", regex=False) \ + .str.replace(r"مضلل", r"false", regex=False) \ + .str.replace(r"نادرست", r"false", regex=False) \ + .str.replace(r"زائف", r"false", regex=False) \ + .str.replace(r"錯誤", r"false", regex=False) \ + .str.replace(r"部分錯誤", r"false", regex=False) \ + .str.replace(r"pants on fire", r"false", regex=False) \ + .str.replace(r"four pinocchios", r"false", regex=False) \ + .str.replace(r"three pinocchios", r"mostly false", regex=False) \ + .str.replace(r"falsee", r"false", regex=False) \ + .str.replace(r"неверно", r"false", regex=False) \ + .str.replace(r"правильно", r"true", regex=False) \ + .str.replace(r"помилковий", r"false", regex=False) \ + .str.replace(r"вірно", r"true", regex=False) \ + .str.replace(r"錯誤的", r"false", regex=False) \ + .str.replace(r"正確的", r"true", regex=False) \ + .str.replace(r"錯誤な", r"false", regex=False) \ + .str.replace(r"正しい", r"true", regex=False) \ + .str.replace(r"incorrecto", r"false", regex=False) \ + .str.replace(r"notizia false", r"false", regex=False) \ + .str.replace(r"c'eri quasi", r"half true", regex=False) \ + .str.replace(r"pinocchio andante", r"false", regex=False) \ + .str.replace(r"notizia vera", r"true", regex=False) \ + .str.replace(r"vera", r"true", regex=False) \ + .str.replace(r"vero", r"true", regex=False) \ + .str.replace(r"cierto", r"true", regex=False) \ + .str.replace(r"engañoso", r"mostly false", regex=False) \ + .str.replace(r"es falso", r"false", regex=False) \ + .str.replace(r"scam", r"false", regex=False) \ + .str.replace(r"enganoso", r"false", regex=False) \ + .str.replace(r"falsz", r"false", regex=False) \ + .str.replace(r"falsekt", r"false", regex=False) \ + .str.replace(r"falsekt", r"false", regex=False) \ + .str.replace(r"misleidend", r"misleading", regex=False) \ + .str.replace(r"trompeur", r"false", regex=False) \ + .str.replace(r"yanlış", r"false", regex=False) \ + .str.replace(r"es false", r"false", regex=False) \ + .str.replace(r"correct attribution", r"true", regex=False) \ + .str.replace(r"correct", r"true", regex=False) \ + .str.replace(r"delimično netačno", r"mostly false", regex=False) \ + .str.replace(r"enganador", r"mostly false", regex=False) \ + .str.replace(r"epätosi", r"false", regex=False) \ + .str.replace(r"fałsz", r"false", regex=False) \ + + verdict_counts = df['Verdict'].value_counts(normalize=True) * 100 + small_verdicts = verdict_counts[verdict_counts < 2].index + df['Verdict Grouped'] = df['Verdict'].apply(lambda x: 'other' if x in small_verdicts else x) + + verdict_fig = px.pie(df, names='Verdict Grouped', title='Verdict Distribution') + verdict_fig.update_traces(textinfo='percent+label') + + tags_fig = px.bar(tags_df['Tags'].value_counts().reset_index(), x='index', y='Tags', + title='Tags Volume', labels={'index': 'Tag', 'Tags': 'Count'}) + tags_fig.update_layout(xaxis_title="Tag", yaxis_title="Count") + + df['Review Publication Date'] = pd.to_datetime(df['Review Publication Date']) + timeline_fig = px.scatter(df, x='Review Publication Date', y='Verdict Grouped', color='Verdict Grouped', + title='Timeline of Claims', labels={'Review Publication Date': 'Date'}) + timeline_fig.update_layout(xaxis_title="Date", yaxis_title="Verdict") + + sources_counts = df['Source Name'].value_counts().reset_index() + sources_fig = px.bar(sources_counts, x='Source Name', y='index', orientation='h', + labels={'index': 'Source', 'Source Name': 'Number of Checks'}, + title='Source Volume', text_auto='.2s') + sources_fig.update_layout(xaxis_title="Number of Checks", yaxis_title="Source", + font=dict(family="Roboto, sans-serif", size=12, color="#333")) + + search_query_display = query if query else "Not specified" + num_results_display = f"{len(df)} Results" + unique_sources_display = f"{df['Source Name'].nunique()} Unique Sources" + + df['Tags'] = df['Tags'].apply(lambda x: ast.literal_eval(x) if isinstance(x, str) and x.startswith('[') else x) + unique_tags_display = f"{df.explode('Tags')['Tags'].nunique()} Unique Tags" + + if 'Tags' in df.columns: + df['Tags'] = df['Tags'].apply( + lambda tags_list: ', '.join(tags_list) if isinstance(tags_list, list) else tags_list) + + columns = [{"name": col, "id": col} for col in df.columns] + + data = df.to_dict('records') + + if 'ON' in graph_checkbox: + def process_tags(tags): + if isinstance(tags, str): + return [tag.strip() for tag in tags.split(',')] + elif isinstance(tags, list): + return tags + return [] + + df['Tags'] = df['Tags'].apply(process_tags) + + nodes = [{'data': {'id': src, 'label': src}, 'classes': 'source'} for src in df['Source Name'].unique()] + nodes += [{'data': {'id': tag, 'label': tag}, 'classes': 'tag'} for tag in + set().union(*(df['Tags'].dropna()))] # Assumes 'Tags' are lists + + added_edges = set() + + edges = [] + for _, row in df.iterrows(): + src = row['Source Name'] + tags = row['Tags'] if isinstance(row['Tags'], list) else [] + for tag in tags: + # Create a unique identifier for each potential edge + edge_identifier = (src, tag) + # Check if this edge has already been added + if edge_identifier not in added_edges: + edges.append({ + 'data': {'source': src, 'target': tag} + }) + # Mark this edge as added + added_edges.add(edge_identifier) + + network_elements = nodes + edges + else: + network_elements = [] + + try: + os.remove(csv_filename) + except Exception as e: + print(f"Could not remove CSV file: {e}") + + return verdict_fig, tags_fig, timeline_fig, sources_fig, search_query_display, num_results_display, unique_sources_display, unique_tags_display, columns, data, network_elements, + + +app.layout = dbc.Container(fluid=True, children=[ + dbc.Row(dbc.Col(html.Img(src='/assets/FEAT.png', style={'maxHeight': '250px'}), className="text-center", width=12), + justify="center"), + html.Hr(), + html.H2("Search", className="mb-3 mt-4", style={'font-family': 'monospace'}), + dbc.Row([ + dbc.Col(dcc.Input(id="query-input", type="text", placeholder="Enter a query...", className="form-control mb-2", + debounce=True), width=3, style={'font-family': 'monospace'}), + dbc.Col(dcc.Input(id="language-input", type="text", placeholder="Language (default: all)", + className="form-control mb-2", debounce=True), width=2, style={'font-family': 'monospace'}), + dbc.Tooltip( + "Use ISO 639-1 language codes (e.g., 'en' for English, 'es' for Spanish).", + target="language-input", + placement="top" + ), + dbc.Col(dcc.Input(id="num-results-input", type="number", placeholder="# Results (default: 100)", + className="form-control mb-2", debounce=True), width=2, style={'font-family': 'monospace'}), + dbc.Tooltip( + "Max: 10.000", + target="num-results-input", + placement="top" + ), + dbc.Col( + [ + dbc.Checklist( + options=[ + {"label": " Generate Graph", "value": "ON"}, + ], + value=[], + id="graph-checkbox", + switch=True, + className="mb-2", + ), + dbc.Tooltip( + "Enabling this option will generate a network graph of sources and tags. " + "Be cautious with large datasets as it might slow down the response.", + target="graph-checkbox", + placement="right" + ), + ], + width={"size": 2, "offset": 1}, + style={'font-family': 'monospace'} + ), + dbc.Col(html.Button("Search", id="search-button", n_clicks=0, className="btn btn-primary me-2"), width=1, + style={'font-family': 'monospace', 'background-color': '636efa'}), + dbc.Col( + html.Button( + "Download CSV", + id="btn-download-csv", + n_clicks=0, + className="btn", + style={ + 'font-family': 'monospace', + 'background-color': '#00cc96', + 'color': '#FFFFFF', + 'border': 'none' + } + ), + width=1 + ), + dcc.Download(id="download-csv"), + ], justify="start"), + + html.Hr(), + dbc.Row([ + dbc.Col(create_info_card("Search Query", "fas fa-search", "panel-search-query"), width=3, + style={'font-family': 'monospace'}), + dbc.Col(create_info_card("Number of Results", "fas fa-sort-numeric-up", "panel-num-results"), width=3, + style={'font-family': 'monospace'}), + dbc.Col(create_info_card("Unique Sources", "fas fa-broadcast-tower", "panel-unique-sources"), width=3, + style={'font-family': 'monospace'}), + dbc.Col(create_info_card("Unique Tags", "fas fa-tags", "panel-unique-tags"), width=3, + style={'font-family': 'monospace'}), + ], className="mb-4 g-4"), + + html.Hr(), + html.H2("Analytics", className="mb-3", style={'font-family': 'monospace'}), + dbc.Row([ + dbc.Col([ + dbc.Checklist( + options=[{"label": " Show Verdict Distribution", "value": 1}], + value=[1], + id="toggle-verdict-chart", + switch=True, + ), + dbc.Collapse( + dcc.Loading(dcc.Graph(id="verdict-pie-chart")), + id="collapse-verdict-chart", + is_open=True + ), + ], width=6, style={"border-right": "2px solid #dee2e6"}), + + dbc.Col([ + dbc.Checklist( + options=[{"label": " Show Tags Distribution", "value": 1}], + value=[1], + id="toggle-tags-chart", + switch=True, + ), + dbc.Collapse( + dcc.Loading(dcc.Graph(id="tags-bar-chart")), + id="collapse-tags-chart", + is_open=True + ), + ], width=6, style={"border-right": "2px solid #dee2e6"}), + + ], className="mb-4"), + + html.Hr(), + dbc.Row([ + dbc.Col([ + dbc.Checklist( + options=[{"label": " Show Claims Timeline", "value": 1}], + value=[1], + id="toggle-claims-timeline", + switch=True, + ), + dbc.Collapse( + dcc.Loading(dcc.Graph(id="claims-timeline")), + id="collapse-claims-timeline", + is_open=True + ), + ], width=6, style={"border-right": "2px solid #dee2e6"}), + + dbc.Col([ + dbc.Checklist( + options=[{"label": " Show Sources Distribution", "value": 1}], + value=[1], + id="toggle-sources-bar-chart", + switch=True, + ), + dbc.Collapse( + dcc.Loading(dcc.Graph(id="sources-bar-chart")), + id="collapse-sources-bar-chart", + is_open=True + ), + ], width=6, style={"border-right": "2px solid #dee2e6"}), + + ], className="mb-4"), + html.Hr(), + html.P("Source > Node graph", className="mb-3", style={'font-family': 'monospace'}), + dbc.Row( + + dbc.Col([ + dbc.Checklist( + options=[{"label": " Show Sources Distribution", "value": 1}], + value=[0], + id="toggle-network-graph", + switch=True, + ), + dbc.Collapse( + + cyto.Cytoscape( + id='network-graph', + layout=layout, + style={'width': '100%', 'height': '400px'}, + elements=[], + stylesheet=stylesheet + ), + id="collapse-network-graph", + ), + ], width=12, className="mb-4", style={"border-right": "2px solid #dee2e6"}), + ), + + html.Hr(), + html.H2("Fact Check Details", className="mb-3", style={'font-family': 'monospace'}), + dbc.Row([ + dbc.Col(dash_table.DataTable( + id='factcheck-table', + columns=[], + data=[], + filter_action="native", + sort_action="native", + page_action="native", + page_size=10, + style_table={'overflowX': 'auto'}, + style_cell={ + 'height': 'auto', + 'minWidth': '80px', 'width': '120px', 'maxWidth': '180px', + 'whiteSpace': 'normal', + 'overflow': 'hidden', + 'textOverflow': 'ellipsis', + 'maxHeight': '60px', + 'textAlign': 'left' + }, + style_cell_conditional=[ + {'if': {'column_id': c}, + 'textAlign': 'left'} for c in ['column1', 'column2'] + ], + style_data_conditional=[ + { + 'if': {'row_index': 'odd'}, + 'backgroundColor': 'rgb(248, 248, 248)' + }, + ], + style_header={ + 'fontWeight': 'bold', + 'textAlign': 'center' + }, + ), width=12), + ]), +]) + + +if __name__ == "__main__": + app.run_server()