diff --git a/count-march-submissions/submissions-in-2016.png b/count-march-submissions/submissions-in-2016.png index 9d1c0c3..7015d95 100644 Binary files a/count-march-submissions/submissions-in-2016.png and b/count-march-submissions/submissions-in-2016.png differ diff --git a/count-march-submissions/submissions-in-2017.png b/count-march-submissions/submissions-in-2017.png index e733a98..4073f5a 100644 Binary files a/count-march-submissions/submissions-in-2017.png and b/count-march-submissions/submissions-in-2017.png differ diff --git a/count-march-submissions/submissions-in-2018.png b/count-march-submissions/submissions-in-2018.png index 27d0a80..6f43085 100644 Binary files a/count-march-submissions/submissions-in-2018.png and b/count-march-submissions/submissions-in-2018.png differ diff --git a/count-march-submissions/submissions-in-2019.png b/count-march-submissions/submissions-in-2019.png index 9f0d524..25cfe4c 100644 Binary files a/count-march-submissions/submissions-in-2019.png and b/count-march-submissions/submissions-in-2019.png differ diff --git a/count-march-submissions/submissions-in-2020.png b/count-march-submissions/submissions-in-2020.png index 75494fc..465a5a5 100644 Binary files a/count-march-submissions/submissions-in-2020.png and b/count-march-submissions/submissions-in-2020.png differ diff --git a/count-march-submissions/submissions-in-2021.png b/count-march-submissions/submissions-in-2021.png index 1338d2f..5435fd4 100644 Binary files a/count-march-submissions/submissions-in-2021.png and b/count-march-submissions/submissions-in-2021.png differ diff --git a/count-march-submissions/submissions-in-2022.png b/count-march-submissions/submissions-in-2022.png index 5e1b669..0c30f48 100644 Binary files a/count-march-submissions/submissions-in-2022.png and b/count-march-submissions/submissions-in-2022.png differ diff --git a/count-march-submissions/submissions-in-2023.png b/count-march-submissions/submissions-in-2023.png index 378b1ea..eb6799a 100644 Binary files a/count-march-submissions/submissions-in-2023.png and b/count-march-submissions/submissions-in-2023.png differ diff --git a/count-march-submissions/submissions-in-2024.png b/count-march-submissions/submissions-in-2024.png new file mode 100644 index 0000000..8d8b8af Binary files /dev/null and b/count-march-submissions/submissions-in-2024.png differ diff --git a/duplicate-ueis/.gitignore b/duplicate-ueis/.gitignore new file mode 100644 index 0000000..5f8bb43 --- /dev/null +++ b/duplicate-ueis/.gitignore @@ -0,0 +1 @@ +*pdf diff --git a/duplicate-ueis/main.py b/duplicate-ueis/main.py new file mode 100644 index 0000000..8d87869 --- /dev/null +++ b/duplicate-ueis/main.py @@ -0,0 +1,69 @@ +import requests +import os +from collections import defaultdict +from math import floor +from pprint import pprint + +continuing = True + +all_results = [] +start = 0 +limit = 20000 +while continuing: + req = requests.get("https://api.fac.gov/general", + params = { + "select": "auditee_uei,audit_year", + "audit_year": "eq.2023", + "offset": start + }, + headers = { + "x-api-key": os.getenv("API_GOV_KEY") + } + ) + if req.json() == []: + continuing = False + else: + all_results = all_results + req.json() + start += limit + +dups = defaultdict(int) + +print(f"Found: {len(req.json())}") +for rec in all_results: + key = rec["auditee_uei"] + "-" + rec["audit_year"] + dups[key] += 1 + +# This produces something like: +# +# ABC-2023: 3 +# DEF-2023: 1 +# XYZ-2023: 9 +# ... + +resub = defaultdict(int) +for k, v in dups.items(): + resub[v] += 1 + +# This now counts how many of each count: + +# 1: 28323 +# 2: 260 +# 3: 41 +# 4: 1 +# 5: 1 +# 9: 2 + +for k, v in dups.items(): + if v > 2: + print(f"{k}: {v}") + + +for k, v in sorted(resub.items(), key=lambda kv: kv[0]): + print(f"{k} {'re' if k > 1 else ''}submission{'s' if k > 1 else ''}: {v}") + +resub_count = 0 +for k, v in dups.items(): + if v > 1: + resub_count += 1 + +print(f"total: {len(dups)} resub: {resub_count}") \ No newline at end of file diff --git a/findings-by-aln/main.py b/findings-by-aln/main.py index 6c5b926..c89a07a 100644 --- a/findings-by-aln/main.py +++ b/findings-by-aln/main.py @@ -371,19 +371,20 @@ def main(acceptance_date, clean, omit_generals, omit_findings, omit_awards, repo a1 = time.time() t1 = time.time() - wb = fac.to_xlsx() - - rm(path_based_on_ext(workbook_filename)) - wb.save(path_based_on_ext(workbook_filename)) - - DailyMetadata.create( - date_retrieved=today(), - queries_used=get_query_count(), - time_elapsed=t1-t0, - time_general=g1-g0, - time_findings=f1-f0, - time_awards=a1-a0, - ) + try: + wb = fac.to_xlsx() + rm(path_based_on_ext(workbook_filename)) + wb.save(path_based_on_ext(workbook_filename)) + DailyMetadata.create( + date_retrieved=today(), + queries_used=get_query_count(), + time_elapsed=t1-t0, + time_general=g1-g0, + time_findings=f1-f0, + time_awards=a1-a0, + ) + except: + print(f"{acceptance_date} NO FINDINGS, NO WORKBOOK") if __name__ in "__main__": diff --git a/notebook-example/three/.gitignore b/notebook-example/three/.gitignore index eb35bfe..06b90f2 100644 --- a/notebook-example/three/.gitignore +++ b/notebook-example/three/.gitignore @@ -1,2 +1,3 @@ .venv dist/ +*.db diff --git a/notebook-example/three/jupyter-lite.json b/notebook-example/three/jupyter-lite.json new file mode 100644 index 0000000..e45005c --- /dev/null +++ b/notebook-example/three/jupyter-lite.json @@ -0,0 +1,8 @@ +{ + "jupyter-lite-schema-version": 0, + "jupyter-config-data": { + "appName": "FAC Labs", + "collaborative": false, + "exposeAppInBrowser": true + } + } \ No newline at end of file diff --git a/notebook-example/three/jupyter_lite_config.json b/notebook-example/three/jupyter_lite_config.json index 52846e5..ac0fad9 100644 --- a/notebook-example/three/jupyter_lite_config.json +++ b/notebook-example/three/jupyter_lite_config.json @@ -1,6 +1,26 @@ { - "LiteBuildConfig": { - "contents": ["notebooks"], - "output_dir": "dist" + "LiteBuildConfig": { + "contents": ["notebooks"], + "output_dir": "dist", + "extra_file_types": { + "xlsx": { + "name": "xlsx", + "extensions": [".xlsx"], + "mimeTypes": ["application/vnd.openxmlformats-officedocument.spreadsheetml.sheet"], + "fileFormat": "application" + }, + "docx": { + "name": "docx", + "extensions": [".docx"], + "mimeTypes": ["application/vnd.openxmlformats-officedocument.wordprocessingml.document"], + "fileFormat": "application" + }, + "pdf": { + "name": "pdf", + "extensions": [".pdf"], + "mimeTypes": ["application/pdf"], + "fileFormat": "application" + } } - } \ No newline at end of file + } +} \ No newline at end of file diff --git a/notebook-example/three/notebooks/findings_by_aln.ipynb b/notebook-example/three/notebooks/findings_by_aln.ipynb index 29d28b1..d6d5f14 100644 --- a/notebook-example/three/notebooks/findings_by_aln.ipynb +++ b/notebook-example/three/notebooks/findings_by_aln.ipynb @@ -6,7 +6,17 @@ "metadata": {}, "outputs": [], "source": [ - "%pip install pyodide-http requests peewee openpyxl pandas sqlite3\n", + "# Import external libraries first.\n", + "%pip install pyodide-http requests peewee openpyxl pandas sqlite3" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# After those load, import local libraries.\n", "from libraries import findings_by_aln as fba\n", "from fac import FAC_API_KEY" ] diff --git a/notebook-example/three/notebooks/libraries/findings_by_aln.py b/notebook-example/three/notebooks/libraries/findings_by_aln.py index 0408b8a..aa44334 100644 --- a/notebook-example/three/notebooks/libraries/findings_by_aln.py +++ b/notebook-example/three/notebooks/libraries/findings_by_aln.py @@ -144,7 +144,7 @@ def general(self, report_id=None): # Now, populate with the findings. This tells us which we need, and # which to remove. def findings(self, report_id=None): - print("FINDINGS") + print("FINDINGS: ", end="") # console = Console() # We should only do things where we have not fetched. if report_id: @@ -152,7 +152,7 @@ def findings(self, report_id=None): else: gq = DailyGenerals.select().where(DailyGenerals.findings_count.is_null()) for dg in gq: - print(f"\tfindings {dg.report_id}") + # print(f"\tfindings {dg.report_id}") jres = fetch_from_api("findings", { "report_id": op("eq", dg.report_id) }) @@ -176,8 +176,7 @@ def findings(self, report_id=None): & (DailyFindings.reference_number == res["reference_number"]))) if dfq.exists(): for df in dfq: - print( - f"\tUpdating {dg.report_id} {res['award_reference']} {res['reference_number']}") + # print(f"\tUpdating {dg.report_id} {res['award_reference']} {res['reference_number']}") (df .update(**res) .where((DailyFindings.report_id == dg.report_id) @@ -185,15 +184,16 @@ def findings(self, report_id=None): & (DailyFindings.reference_number == res["reference_number"])) .execute()) else: - print( - f"\tCreating {dg.report_id} {res['award_reference']} {res['reference_number']}") + #print(f"\tCreating {dg.report_id} {res['award_reference']} {res['reference_number']}") + print(".", end="") DailyFindings.create(**res) dg.date_retrieved = today() dg.findings_count = len(jres) dg.save() + print() def awards(self, report_id=None): - print("AWARDS") + print("AWARDS: ", end="") # console = Console() if report_id: @@ -230,7 +230,8 @@ def awards(self, report_id=None): del res[k] res = convert_bools(res) # Update the row in question - print(f"\tUpdating awards for {df.report_id} {df.award_reference} {df.reference_number}") + #print(f"\tUpdating awards for {df.report_id} {df.award_reference} {df.reference_number}") + print(".", end="") (df .update(**res) .where((DailyFindings.report_id == dg.report_id) @@ -239,6 +240,7 @@ def awards(self, report_id=None): .execute()) dg.awards_count = awards_count dg.save() + print() def _add_sheets(self, wb, iter, query): # get_unique_agency_numbers() diff --git a/notebook-example/three/notebooks/libraries/findings_util.py b/notebook-example/three/notebooks/libraries/findings_util.py index 477053f..5e6504d 100644 --- a/notebook-example/three/notebooks/libraries/findings_util.py +++ b/notebook-example/three/notebooks/libraries/findings_util.py @@ -43,7 +43,7 @@ def fetch_from_api(table, payload): params=payload,) jres = res.json() if len(jres) == 0: - print(f"No results found for {table}") + pass return jres diff --git a/notebook-example/three/requirements.txt b/notebook-example/three/requirements.txt index 71432d5..a549b52 100644 --- a/notebook-example/three/requirements.txt +++ b/notebook-example/three/requirements.txt @@ -9,24 +9,24 @@ # requests # Core modules (mandatory) -jupyterlite-core==0.3.0 -jupyterlab~=4.1.6 -notebook~=7.1.2 +jupyterlite-core +jupyterlab +notebook # Python kernel (optional) -jupyterlite-pyodide-kernel==0.3.2 +jupyterlite-pyodide-kernel # JavaScript kernel (optional) -jupyterlite-javascript-kernel==0.3.0 +jupyterlite-javascript-kernel # P5 kernel (optional) -jupyterlite-p5-kernel==0.1.0 +jupyterlite-p5-kernel # JupyterLab: Fasta file renderer (optional) -jupyterlab-fasta>=3.3.0,<4 +jupyterlab-fasta # JupyterLab: Geojson file renderer (optional) -jupyterlab-geojson>=3.4.0,<4 +jupyterlab-geojson # JupyterLab: guided tour (optional) # TODO: re-enable after https://github.com/jupyterlab-contrib/jupyterlab-tour/issues/82 # jupyterlab-tour @@ -36,18 +36,18 @@ jupyterlab-night jupyterlab_miami_nights # Python: ipywidget library for Jupyter notebooks (optional) -ipywidgets>=8.1.1,<9 +ipywidgets # Python: ipyevents library for Jupyter notebooks (optional) -ipyevents>=2.0.1 +ipyevents # Python: interative Matplotlib library for Jupyter notebooks (optional) -ipympl>=0.8.2 +ipympl # Python: ipycanvas library for Jupyter notebooks (optional) -ipycanvas>=0.9.1 +ipycanvas # Python: ipyleaflet library for Jupyter notebooks (optional) ipyleaflet # Python: plotting libraries (optional) -plotly>=5,<6 +plotly bqplot # Language packs @@ -58,5 +58,4 @@ jupyterlab-language-pack-vi-VN jupyterlab-language-pack-fr-FR openpyxl -pysqlite3 peewee