diff --git a/notebook-example/three/.gitignore b/notebook-example/three/.gitignore
new file mode 100644
index 0000000..eb35bfe
--- /dev/null
+++ b/notebook-example/three/.gitignore
@@ -0,0 +1,2 @@
+.venv
+dist/
diff --git a/notebook-example/three/build.sh b/notebook-example/three/build.sh
new file mode 100755
index 0000000..f0ccc0c
--- /dev/null
+++ b/notebook-example/three/build.sh
@@ -0,0 +1,51 @@
+#!/bin/bash
+
+set -e
+
+DIST=dist
+FILES=notebooks
+
+# https://stackoverflow.com/a/76208774
+
+# I don't want to have anything lingering.
+# Environments affect the build. Use the
+# one that we build here... for the build.
+# conda deactivate >/dev/null 2>&1
+# deactivate >/dev/null 2>&1
+
+echo "Removing venv"
+rm -rf .venv
+sleep 2
+echo "Creating clean venv"
+python3 -m venv .venv
+source .venv/bin/activate
+
+if [[ "$VIRTUAL_ENV" != "" ]]
+then
+ INVENV=1
+ echo "In environment $VIRTUAL_ENV"
+ sleep 2
+else
+ INVENV=0
+fi
+
+pip install --upgrade pip
+pip install --no-cache-dir -r requirements.txt
+
+echo "Cleaning up for install"
+rm -f *.db
+rm -rf $DIST
+rm -rf $FILES/__pycache__
+
+sleep 1
+
+echo "Launching build"
+sleep 1
+jupyter lite init
+jupyter lite build
+
+if [[ $RUN = 1 ]]; then
+ pushd $DIST
+ python -m http.server 8080
+ popd
+fi
\ No newline at end of file
diff --git a/notebook-example/three/jupyter_lite_config.json b/notebook-example/three/jupyter_lite_config.json
new file mode 100644
index 0000000..52846e5
--- /dev/null
+++ b/notebook-example/three/jupyter_lite_config.json
@@ -0,0 +1,6 @@
+{
+ "LiteBuildConfig": {
+ "contents": ["notebooks"],
+ "output_dir": "dist"
+ }
+ }
\ No newline at end of file
diff --git a/notebook-example/three/notebooks/README.md b/notebook-example/three/notebooks/README.md
new file mode 100644
index 0000000..565cbc8
--- /dev/null
+++ b/notebook-example/three/notebooks/README.md
@@ -0,0 +1 @@
+Bye.
\ No newline at end of file
diff --git a/notebook-example/three/notebooks/api_key.py b/notebook-example/three/notebooks/api_key.py
new file mode 100644
index 0000000..4b7ca60
--- /dev/null
+++ b/notebook-example/three/notebooks/api_key.py
@@ -0,0 +1,12 @@
+##############################################
+# To obtain a key for use with the FAC.
+#
+# 1. Visit https://api.data.gov/signup/
+# 2. Enter your name and email address.
+# 3. You will receive a key in the email.
+# 4. Copy that key.
+# 5. Paste your key in-between the quotes below,
+# being careful not to add spaces before or after
+# the key.
+
+FAC_API_KEY = ""
diff --git a/notebook-example/three/notebooks/fac.py b/notebook-example/three/notebooks/fac.py
new file mode 100644
index 0000000..eec4ef9
--- /dev/null
+++ b/notebook-example/three/notebooks/fac.py
@@ -0,0 +1,19 @@
+import os
+import pyodide_http
+import requests
+pyodide_http.patch_all()
+
+from api_key import FAC_API_KEY
+
+##############################################
+# This patches the FAC_API_KEY variable
+# so that we can test the workbook locally.
+# It will have no effect in the WWW environment.
+if os.getenv("FAC_API_KEY") not in [None, ""]:
+ FAC_API_KEY = os.getenv("FAC_API_KEY")
+
+def get(endpoint,
+ params={"limit": 1}
+ ):
+ return requests.get(f"https://api.fac.gov/{endpoint}",
+ params = params)
\ No newline at end of file
diff --git a/notebook-example/three/notebooks/files/treasury.alns b/notebook-example/three/notebooks/files/treasury.alns
new file mode 100644
index 0000000..a7e30e0
--- /dev/null
+++ b/notebook-example/three/notebooks/files/treasury.alns
@@ -0,0 +1,23 @@
+21.003
+21.004
+21.006
+21.008
+21.009
+21.010
+21.011
+21.012
+21.014
+21.015
+21.016
+21.017
+21.018
+21.019
+21.020
+21.021
+21.023
+21.024
+21.027
+21.028
+21.029
+21.031
+21.032
\ No newline at end of file
diff --git a/notebook-example/three/notebooks/findings_by_aln.ipynb b/notebook-example/three/notebooks/findings_by_aln.ipynb
new file mode 100644
index 0000000..29d28b1
--- /dev/null
+++ b/notebook-example/three/notebooks/findings_by_aln.ipynb
@@ -0,0 +1,43 @@
+{
+ "cells": [
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "%pip install pyodide-http requests peewee openpyxl pandas sqlite3\n",
+ "from libraries import findings_by_aln as fba\n",
+ "from fac import FAC_API_KEY"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "# Findings per ALN\n",
+ "\n",
+ "There was a 5-day period where search was not available. During that time, we published Excel workbooks that provided Federal users a set of spreadsheets that tracked findings on a per-ALN basis."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "# Set the API key for the library\n",
+ "fba.set_api_key(FAC_API_KEY)\n",
+ "# Changing the date generates a different workbook in the `xlsx` directory\n",
+ "fba.findings_by_aln(\"2024-06-17\")"
+ ]
+ }
+ ],
+ "metadata": {
+ "language_info": {
+ "name": "python"
+ }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 2
+}
diff --git a/notebook-example/three/notebooks/libraries/__init__.py b/notebook-example/three/notebooks/libraries/__init__.py
new file mode 100644
index 0000000..e69de29
diff --git a/notebook-example/three/notebooks/libraries/aln.py b/notebook-example/three/notebooks/libraries/aln.py
new file mode 100644
index 0000000..32444dd
--- /dev/null
+++ b/notebook-example/three/notebooks/libraries/aln.py
@@ -0,0 +1,133 @@
+import re
+import logging
+
+class ALN:
+ logging.basicConfig(level=logging.INFO)
+ logger = logging.getLogger("accuracy_alns")
+
+ def __init__(self, agency, program=None):
+ self.agency = agency
+ self.program = program
+
+ def __repr__(self):
+ if self.program:
+ return f"{self.agency}.{self.program}"
+ else:
+ return f"{self.agency}"
+
+ def __str__(self):
+ return self.__repr__()
+
+ def __eq__(self, other):
+ return (self.agency == other.agency
+ and self.program == other.program)
+
+ def __hash__(self):
+ return hash(str(self))
+
+ def streq(self, string_aln):
+ parts = string_aln.split(".")
+ return (self.agency == parts[0]
+ and self.program == parts[1])
+
+ def is_valid(self):
+ return (
+ self.is_all_numeric()
+ or self.is_u_program()
+ or self.is_rd_program()
+ or self.is_gsa_migration()
+ or self.is_alpha_program()
+ )
+ def category(self):
+ if self.is_all_numeric():
+ return "NUMERIC"
+ elif self.is_u_program():
+ return "U"
+ elif self.is_rd_program():
+ return "RD"
+ elif self.is_alpha_program():
+ return "ALPHA"
+ elif self.is_gsa_migration():
+ return "GSA"
+ else:
+ ALN.logger(f"UNK ALN: {self.agency} {self.program}")
+ return "UNK"
+
+ def is_numeric_agency(self):
+ try:
+ int(self.agency)
+ return True
+ except:
+ return False
+
+ def is_numeric_program(self):
+ try:
+ int(self.program)
+ return True
+ except:
+ return False
+
+ def is_all_numeric(self):
+ return self.is_numeric_agency() and self.is_numeric_program()
+
+ def is_u_program(self):
+ return self.is_numeric_agency() and bool(re.match(r"^U[0-9]{2}$", self.program))
+
+ def is_rd_program(self):
+ return self.is_numeric_agency() and bool(re.match(r"^RD([0-9]{1})?$", self.program))
+
+ def is_gsa_migration(self):
+ return self.is_numeric_agency() and bool(re.match(r"^GSA_MIGRATION$", self.program))
+
+ def is_alpha_program(self):
+ return (
+ self.is_numeric_agency()
+ and bool(re.match("^[0-9]{2}$", self.agency))
+ and bool(re.match("^[0-9]{3}([A-Z])?$", self.program)))
+
+######################################
+# TESTS
+######################################
+
+def test_is_numeric_agency():
+ assert ALN("10", "ABC").is_numeric_agency() is True
+ assert ALN("AB", "ABC").is_numeric_agency() is False
+
+numeric_programs_valid = [
+ ALN("10", "123"),
+ ALN("10", "000")
+]
+numeric_programs_invalid = [
+ ALN("10", "ABC"),
+ ALN("AB", "ABC")
+]
+def test_is_numeric_program():
+ for aln in numeric_programs_valid:
+ assert aln.is_numeric_program() is True
+ for aln in numeric_programs_invalid:
+ assert aln.is_numeric_program() is False
+
+rd_alns_valid = [
+ ALN("93", "RD"),
+ ALN("93", "RD1")
+]
+def test_is_rd_program():
+ for aln in rd_alns_valid:
+ assert aln.is_rd_program() is True
+
+def test_validity():
+ for aln in rd_alns_valid:
+ assert aln.is_valid() is True
+ for aln in numeric_programs_valid:
+ assert aln.is_valid() is True
+ for aln in numeric_programs_invalid:
+ assert aln.is_valid() is False
+ assert ALN("11", "123").is_valid() is True
+ assert ALN("92", "RD1").is_valid() is True
+ assert ALN("92", "RD").is_valid() is True
+ assert ALN("92", "RDX").is_valid() is False
+ assert ALN("84", "483A").is_valid() is True
+ assert ALN("84", "483AB").is_valid() is False
+ assert ALN("84", "48A").is_valid() is False
+ assert ALN("21", "U23").is_valid() is True
+ assert ALN("45", "GSA_MIGRATION").is_valid() is True
diff --git a/notebook-example/three/notebooks/libraries/findings_by_aln.py b/notebook-example/three/notebooks/libraries/findings_by_aln.py
new file mode 100644
index 0000000..0408b8a
--- /dev/null
+++ b/notebook-example/three/notebooks/libraries/findings_by_aln.py
@@ -0,0 +1,394 @@
+import time
+from openpyxl import Workbook
+from types import SimpleNamespace
+from openpyxl.styles import PatternFill
+from playhouse.shortcuts import model_to_dict
+# from rich.table import Table
+# from rich.console import Console
+# from rich import print
+
+
+from libraries.findings_models import (
+ DailyGenerals,
+ DailyFindings,
+ DailyMetadata,
+ get_unique_agency_numbers,
+ get_unique_cog_overs,
+ setup_database
+)
+
+import libraries.findings_util as findings_util
+
+from libraries.findings_util import (
+ op,
+ string_to_datetime,
+ fetch_from_api,
+ today,
+ get_query_count,
+ rm,
+ path_based_on_ext,
+ adjust_columns,
+ cog_over,
+ convert_bools,
+)
+
+from libraries.findings_const import (
+ FAC_API_BASE
+)
+
+import logging
+logger = logging.getLogger(__name__)
+
+# https://stackoverflow.com/questions/17755996/how-to-make-a-list-as-the-default-value-for-a-dictionary
+
+# A result is a single award for a single day.
+
+def set_api_key(key):
+ findings_util.set_api_key(key)
+
+
+findings_fields_to_keep = set([
+ "report_id",
+ "auditee_name",
+ "auditee_uei",
+ "cog_over",
+ "award_reference",
+ "reference_number",
+ "is_modified_opinion",
+ "is_other_matters",
+ "is_material_weakness",
+ "is_significant_deficiency",
+ "is_other_findings",
+ "is_questioned_costs",
+ "is_repeat_finding",
+ "prior_finding_ref_numbers",
+])
+
+awards_fields_to_keep = set([
+ "report_id",
+ "reference_number",
+ "award_reference",
+ "auditee_name",
+ "aln",
+ "cog_over",
+ "federal_program_name",
+ "amount_expended",
+ "is_direct",
+ "is_major",
+ "is_passthrough_award",
+ "passthrough_amount",
+])
+
+yes_fill = PatternFill(start_color="FFD700",
+ end_color="FFD700", fill_type="solid")
+
+
+class QParam():
+ def __init__(self, date):
+ self.date = date
+
+
+class Result():
+ def __init__(self, d):
+ self.data = SimpleNamespace(**d)
+
+ def add(self, key, value):
+ self.data[key] = value
+
+ def __str__(self):
+ return f"{self.data.report_id}"
+
+ def __repr__(self):
+ return self.__str__()
+
+
+class FAC():
+ # Takes a list of parameter objects.
+ def __init__(self, params):
+ # TODO: Remove any dates we have already run and
+ # cached locally.
+ self.params = params
+ self.results = []
+
+ # Fetch the general results.
+ # Must start here.
+ def general(self, report_id=None):
+ po: QParam
+
+ for po in self.params:
+ payload = {
+ "fac_accepted_date": op("eq", po.date),
+ "select": ",".join([
+ "report_id",
+ "auditee_name",
+ "cognizant_agency",
+ "oversight_agency",
+ "auditee_uei",
+ ])
+ }
+
+ jres = fetch_from_api("general", payload)
+
+ for res in jres:
+ if DailyGenerals.select().where(DailyGenerals.report_id == res["report_id"]):
+ logger.debug(f"Skipping {res['report_id']}")
+ else:
+ d = {"report_id": res["report_id"],
+ "date": po.date,
+ "auditee_name": res["auditee_name"],
+ "cog_over": cog_over(res["cognizant_agency"], res["oversight_agency"]),
+ "auditee_uei": res["auditee_uei"],
+ }
+ self.results.append(DailyGenerals.create(**d))
+
+ # Now, populate with the findings. This tells us which we need, and
+ # which to remove.
+ def findings(self, report_id=None):
+ print("FINDINGS")
+ # console = Console()
+ # We should only do things where we have not fetched.
+ if report_id:
+ gq = DailyGenerals.select().where(DailyGenerals.report_id == report_id)
+ else:
+ gq = DailyGenerals.select().where(DailyGenerals.findings_count.is_null())
+ for dg in gq:
+ print(f"\tfindings {dg.report_id}")
+ jres = fetch_from_api("findings", {
+ "report_id": op("eq", dg.report_id)
+ })
+ for res in jres:
+ ## print(f"res {res}")
+ res["cog_over"] = dg.cog_over
+ res = res | model_to_dict(dg)
+ # We only need a subset of the keys
+ # that come back from the API query.
+ to_delete = set(res.keys()).difference(findings_fields_to_keep)
+ for k in to_delete:
+ del res[k]
+ # Make sure booleans are booleans...
+ # Peewee does not treat 'N' as False.
+ res = convert_bools(res)
+ # console.log(res)
+ dfq = (DailyFindings
+ .select()
+ .where((DailyFindings.report_id == dg.report_id)
+ & (DailyFindings.award_reference == res["award_reference"])
+ & (DailyFindings.reference_number == res["reference_number"])))
+ if dfq.exists():
+ for df in dfq:
+ print(
+ f"\tUpdating {dg.report_id} {res['award_reference']} {res['reference_number']}")
+ (df
+ .update(**res)
+ .where((DailyFindings.report_id == dg.report_id)
+ & (DailyFindings.award_reference == res["award_reference"])
+ & (DailyFindings.reference_number == res["reference_number"]))
+ .execute())
+ else:
+ print(
+ f"\tCreating {dg.report_id} {res['award_reference']} {res['reference_number']}")
+ DailyFindings.create(**res)
+ dg.date_retrieved = today()
+ dg.findings_count = len(jres)
+ dg.save()
+
+ def awards(self, report_id=None):
+ print("AWARDS")
+ # console = Console()
+
+ if report_id:
+ gq = DailyGenerals.select().where(DailyGenerals.report_id == report_id)
+ else:
+ gq = DailyGenerals.select().where(DailyGenerals.awards_count.is_null())
+ for dg in gq:
+ # For each general
+ dfq = (DailyFindings
+ .select()
+ .where(DailyFindings.report_id == dg.report_id))
+ awards_count = 0
+ # We already have findings loaded.
+ # These are the awards that we care about
+ for df in dfq:
+ # Now, for each row we find, we need to
+ # look up more award info.
+ jres = fetch_from_api("federal_awards", {
+ "report_id": op("eq", dg.report_id),
+ "award_reference": op("eq", df.award_reference)
+ })
+ awards_count += 1
+ # What comes back are federal awards results
+ for res in jres:
+ # Update the appropriate record.
+ res["aln"] = (res["federal_agency_prefix"] +
+ "." + res["federal_award_extension"])
+ # We only need a subset of the keys
+ # that come back from the API query.
+ res = res | model_to_dict(dg)
+ to_delete = set(res.keys()).difference(
+ awards_fields_to_keep)
+ for k in to_delete:
+ del res[k]
+ res = convert_bools(res)
+ # Update the row in question
+ print(f"\tUpdating awards for {df.report_id} {df.award_reference} {df.reference_number}")
+ (df
+ .update(**res)
+ .where((DailyFindings.report_id == dg.report_id)
+ & (DailyFindings.award_reference == df.award_reference)
+ & (DailyFindings.reference_number == df.reference_number))
+ .execute())
+ dg.awards_count = awards_count
+ dg.save()
+
+ def _add_sheets(self, wb, iter, query):
+ # get_unique_agency_numbers()
+ for iter_value in iter:
+ ws = wb.create_sheet(f"{iter_value}")
+ # Put headers on the sheets
+ for obj in query(iter_value):
+ as_d = model_to_dict(obj)
+ ws.append(list(as_d.keys()))
+ break
+ # Now the values.
+ for obj in query(iter_value):
+ as_d = model_to_dict(obj)
+ ws.append(list(as_d.values()))
+ adjust_columns(ws)
+
+ def _cleanup_sheet(self, ws):
+ boolean_columns = ["K", "L", "M", "O", "P", "Q", "R", "S", "T", "U"]
+ # Trys to go through a sheet and
+ # 1. Hyperlink all the report ids,
+ # 2. Cleanup all the booleans.
+ # The columns are hard-coded to the order
+ # they appear from the dump into the sheet.
+ try:
+ report_ids = []
+ for cell in ws["B"]:
+ if ("GSAFAC" in cell.value) or ("CENSUS" in cell.value):
+ report_ids.append(cell.value)
+ cell.hyperlink = f"https://app.fac.gov/dissemination/report/pdf/{cell.value}"
+ else:
+ pass
+ for ndx, cell in enumerate(ws["C"][1:]):
+ cell.hyperlink = f"https://app.fac.gov/dissemination/summary/{report_ids[ndx]}"
+ for bool_column in boolean_columns:
+ for cell in ws[bool_column]:
+ if cell.value == 1:
+ cell.value = "YES"
+ elif cell.value == 0:
+ cell.value = "NO"
+ else:
+ pass
+ for bool_column in boolean_columns:
+ for cell in ws[bool_column]:
+ if cell.value == "YES":
+ cell.fill = yes_fill
+ except:
+ pass
+
+ def _remove_default_sheet(self, wb):
+ # Try removing the default sheet.
+ try:
+ del wb['Sheet']
+ except:
+ pass
+
+ def to_xlsx(self):
+ print("TO XLSX")
+ wb = Workbook()
+ self._add_sheets(
+ wb,
+ get_unique_agency_numbers(),
+ lambda iter_value:
+ (DailyFindings
+ .select
+ ().where(DailyFindings.aln.startswith(iter_value)))
+ )
+ self._add_sheets(
+ wb,
+ get_unique_cog_overs(),
+ lambda iter_value:
+ (DailyFindings
+ .select
+ ().where(DailyFindings.cog_over == iter_value))
+ )
+
+ # Hyperlink the report IDs
+ for sheet in wb.worksheets:
+ self._cleanup_sheet(sheet)
+
+ self._remove_default_sheet(wb)
+
+ return wb
+
+
+# @click.command()
+# @click.argument('acceptance_date', default="2024-03-02")
+# @click.option("--clean", is_flag=True, show_default=True, default=False,)
+# @click.option("--omit-generals", is_flag=True, show_default=True, default=False,)
+# @click.option("--omit-findings", is_flag=True, show_default=True, default=False,)
+# @click.option("--omit-awards", is_flag=True, show_default=True, default=False,)
+# @click.option("--report-id", default=None,)
+def findings_by_aln(acceptance_date,
+ clean=True,
+ omit_generals=False,
+ omit_findings=False,
+ omit_awards=False,
+ report_id=None):
+ acceptance_date = string_to_datetime(acceptance_date)
+ db_filename = f"{acceptance_date.strftime('%Y-%m-%d')}.sqlite"
+ workbook_filename = f"{acceptance_date.strftime('%Y-%m-%d')}-findings.xlsx"
+ # Possibly remove work products
+ # If we're only running part of the generation, then
+ # do not clean things. That's an error on the user's part.
+ if clean and (all(map(lambda v: not v, [omit_generals, omit_findings, omit_awards]))):
+ rm(path_based_on_ext(db_filename))
+
+ setup_database(db_filename)
+
+ qparams = []
+ qparams.append(QParam(acceptance_date.date()))
+ fac = FAC(qparams)
+
+ g0 = g1 = 0
+ f0 = f1 = 0
+ a0 = a1 = 0
+
+ t0 = time.time()
+ if omit_generals:
+ print("Skipping general generation")
+ else:
+ g0 = time.time()
+ fac.general(report_id=report_id)
+ g1 = time.time()
+
+ if omit_findings:
+ print("Skipping findings generation")
+ else:
+ f0 = time.time()
+ fac.findings(report_id=report_id)
+ f1 = time.time()
+
+ if omit_awards:
+ print("Skipping award generation")
+ else:
+ a0 = time.time()
+ fac.awards(report_id=report_id)
+ a1 = time.time()
+ t1 = time.time()
+
+ try:
+ wb = fac.to_xlsx()
+ rm(path_based_on_ext(workbook_filename))
+ wb.save(path_based_on_ext(workbook_filename))
+ DailyMetadata.create(
+ date_retrieved=today(),
+ queries_used=get_query_count(),
+ time_elapsed=t1-t0,
+ time_general=g1-g0,
+ time_findings=f1-f0,
+ time_awards=a1-a0,
+ )
+ except:
+ print(f"{acceptance_date} NO FINDINGS, NO WORKBOOK")
diff --git a/notebook-example/three/notebooks/libraries/findings_const.py b/notebook-example/three/notebooks/libraries/findings_const.py
new file mode 100644
index 0000000..53c7168
--- /dev/null
+++ b/notebook-example/three/notebooks/libraries/findings_const.py
@@ -0,0 +1,3 @@
+FAC_API_BASE = "https://api.fac.gov"
+MAX_RESULTS = 4_000_000
+STEP_SIZE = 20000
diff --git a/notebook-example/three/notebooks/libraries/findings_models.py b/notebook-example/three/notebooks/libraries/findings_models.py
new file mode 100644
index 0000000..552b8f3
--- /dev/null
+++ b/notebook-example/three/notebooks/libraries/findings_models.py
@@ -0,0 +1,79 @@
+from peewee import *
+from libraries.findings_util import (
+ path_based_on_ext
+)
+
+proxy = DatabaseProxy() # Create a proxy for our db.
+
+# We're going to need to cache things.
+# So, a local DB makes sense.
+# The table design...
+# It will pull from General, Findings, and Federal Awards
+
+class DailyMetadata(Model):
+ date_retrieved = DateField(null=True)
+ queries_used = IntegerField(null=True)
+ time_elapsed = IntegerField(null=True)
+ time_general = IntegerField(null=True)
+ time_findings = IntegerField(null=True)
+ time_awards = IntegerField(null=True)
+ class Meta:
+ database = proxy
+
+class DailyGenerals(Model):
+ report_id = TextField(unique=True) # PK
+ auditee_name = TextField()
+ auditee_uei = TextField()
+ date = DateField()
+ date_retrieved = DateField(null=True)
+ findings_count = IntegerField(null=True)
+ awards_count = IntegerField(null=True)
+ cog_over = TextField(null=True)
+ class Meta:
+ database = proxy
+
+
+class DailyFindings(Model):
+ report_id = TextField()
+ auditee_name = TextField()
+ auditee_uei = TextField()
+ award_reference = TextField(null=True)
+ reference_number = TextField(null=True)
+ aln = TextField(null=True)
+ cog_over = TextField(null=True)
+ federal_program_name = TextField(null=True)
+ amount_expended = IntegerField(null=True)
+ is_direct = BooleanField(null=True)
+ is_major = BooleanField(null=True)
+ is_passthrough_award = BooleanField(null=True)
+ passthrough_amount = IntegerField(null=True)
+ is_modified_opinion = BooleanField(null=True)
+ is_other_matters = BooleanField(null=True)
+ is_material_weakness = BooleanField(null=True)
+ is_significant_deficiency = BooleanField(null=True)
+ is_other_findings = BooleanField(null=True)
+ is_questioned_costs = BooleanField(null=True)
+ is_repeat_finding = BooleanField(null=True)
+ prior_finding_ref_numbers = TextField(null=True)
+
+ class Meta:
+ database = proxy
+
+
+def get_unique_agency_numbers():
+ ans = set()
+ for df in DailyFindings.select():
+ ans.add(df.aln.split(".")[0])
+ return sorted(list(ans))
+
+def get_unique_cog_overs():
+ cogs = set()
+ for df in DailyFindings.select():
+ cogs.add(df.cog_over)
+ return sorted(list(cogs))
+
+def setup_database(filename):
+ # Set up the SQLite database pro
+ db = SqliteDatabase(':memory:')
+ proxy.initialize(db)
+ db.create_tables([DailyMetadata, DailyGenerals, DailyFindings])
diff --git a/notebook-example/three/notebooks/libraries/findings_util.py b/notebook-example/three/notebooks/libraries/findings_util.py
new file mode 100644
index 0000000..477053f
--- /dev/null
+++ b/notebook-example/three/notebooks/libraries/findings_util.py
@@ -0,0 +1,93 @@
+from datetime import datetime
+import os
+import requests
+
+from libraries.findings_const import (
+ FAC_API_BASE
+)
+import logging
+logger = logging.getLogger(__name__)
+
+API_KEY = ""
+def set_api_key(key):
+ global API_KEY
+ API_KEY = key
+
+def get_api_key():
+ global API_KEY
+ return API_KEY
+
+def op(op, value):
+ return f"{op}.{value}"
+
+
+def string_to_datetime(strdate):
+ parts = strdate.split("-")
+ return datetime(int(parts[0]), int(parts[1]), int(parts[2]))
+
+def today():
+ return datetime.now().strftime('%Y-%m-%d')
+
+query_count = 0
+
+def get_query_count():
+ global query_count
+ return query_count
+
+def fetch_from_api(table, payload):
+ global query_count
+ query_count += 1
+ payload = payload | {"api_key": get_api_key()}
+
+ res = requests.get(f"{FAC_API_BASE}/{table}",
+ params=payload,)
+ jres = res.json()
+ if len(jres) == 0:
+ print(f"No results found for {table}")
+ return jres
+
+
+def rm(filename):
+ try:
+ os.remove(filename)
+ except FileNotFoundError:
+ pass
+
+def path_based_on_ext(the_file):
+ filename, file_extension = os.path.splitext(the_file)
+ try:
+ os.mkdir(file_extension)
+ except:
+ pass
+ return os.path.join(file_extension[1:], f"{filename}{file_extension}")
+
+
+def convert_bools(res):
+ for k in res.keys():
+ if res[k] in ['Y', "TRUE", "T", "YES"]:
+ res[k] = True
+ elif res[k] in ["N", "NO", "FALSE", "F"]:
+ res[k] = False
+ return res
+
+
+def adjust_columns(ws):
+ for col in ws.columns:
+ max_length = 0
+ column = col[0].column_letter # Get the column name
+ for cell in col:
+ try: # Necessary to avoid error on empty cells
+ if len(str(cell.value)) > max_length:
+ max_length = len(str(cell.value))
+ except:
+ pass
+ adjusted_width = (max_length + 2) * 1.2
+ ws.column_dimensions[column].width = adjusted_width
+ return ws
+
+def cog_over(c, o):
+ if c:
+ return f"COG-{c}"
+ else:
+ return f"OVER-{o}"
+
diff --git a/notebook-example/three/notebooks/libraries/sum_over_alns.py b/notebook-example/three/notebooks/libraries/sum_over_alns.py
new file mode 100644
index 0000000..8696578
--- /dev/null
+++ b/notebook-example/three/notebooks/libraries/sum_over_alns.py
@@ -0,0 +1,296 @@
+import os
+import requests
+import sys
+import datetime
+#from alive_progress import alive_bar
+from .aln import ALN
+import pandas as pd
+from openpyxl import Workbook
+from openpyxl.utils.dataframe import dataframe_to_rows
+import time
+
+# https://stackoverflow.com/questions/17755996/how-to-make-a-list-as-the-default-value-for-a-dictionary
+from collections import defaultdict
+from pprint import pprint
+from collections import namedtuple as NT
+
+FAC_API_BASE = "https://api.fac.gov"
+# This change hard-overrides using the local data.
+# This involves leaving out some audits, but it is faster,
+# and avoids key limit issues while testing.
+# FAC_API_BASE = "http://localhost:3000"
+# FAC_API_KEY = os.getenv("API_GOV_KEY")
+MAX_RESULTS = 4_000_000
+STEP_SIZE = 20000
+
+# Basic headers; intended for use locally as well as remotely.
+def BASE_HEADERS(api_key):
+ return {
+ "X-API-Key": api_key
+ }
+
+
+def load_aln_list(fname):
+ alns = set()
+ with open(fname, 'r') as fp:
+ for line in fp:
+ line = line.strip()
+ parts = line.split(".")
+ if len(parts) == 1:
+ alns.add(ALN(parts[0]))
+ else:
+ alns.add(ALN(parts[0], parts[1]))
+ return list(alns)
+
+
+def op(op, value):
+ return f"{op}.{value}"
+
+
+def string_to_datetime(strdate):
+ parts = strdate.split("-")
+ return datetime.datetime(int(parts[0]), int(parts[1]), int(parts[2]))
+
+
+memoize_dates = {}
+
+
+def get_date(report_id, api_key=""):
+ if memoize_dates.get(report_id, False):
+ return string_to_datetime(memoize_dates.get(report_id))
+ payload = {
+ "report_id": op("eq", report_id),
+ "select": ",".join(["report_id", "fac_accepted_date"]),
+ "api_key": api_key,
+ }
+ res = requests.get(f"{FAC_API_BASE}/general",
+ params=payload,
+ headers=BASE_HEADERS(api_key))
+ jres = res.json()
+ if len(jres) == 0:
+ print(f"NO DATE FOUND FOR {report_id}")
+ sys.exit()
+ the_date = jres[0]["fac_accepted_date"]
+ memoize_dates[report_id] = the_date
+ the_date = string_to_datetime(the_date)
+ return the_date
+
+
+def calculate_for_aln(aln,
+ audit_year="2023",
+ before_acceptance="2023-06-28",
+ api_key=""):
+ # What report IDs does this ALN appear in?
+ # aln : report_id
+ aln_to_report_ids = defaultdict(list)
+ # What is the total direct amount on that ALN?
+ # aln : total
+ aln_to_total = defaultdict(lambda: 0)
+ # How many times do we see this ALN?
+ # aln : count
+ aln_to_count = defaultdict(lambda: 0)
+ aln_dates = defaultdict(list)
+ before_acceptance = string_to_datetime(before_acceptance)
+
+ # We begin by finding this ALN in the federal_awards table
+ payload = {
+ "limit": STEP_SIZE - 1,
+ "federal_agency_prefix": op("eq", aln.agency),
+ "audit_year": op("eq", audit_year),
+ "is_direct": op("eq", "Y"),
+ "select": ",".join(["report_id", "amount_expended", "is_direct", "federal_agency_prefix", "federal_award_extension"]),
+ "api_key": api_key,
+ }
+ # If they included a program, and not just an agency number...
+ if aln.program:
+ payload["federal_award_extension"] = op("eq", aln.program)
+
+ url = f"{FAC_API_BASE}/federal_awards"
+
+ for start in range(0, MAX_RESULTS, STEP_SIZE):
+ payload["offset"] = start
+
+ res = requests.get(url,
+ params=payload)
+ jres = res.json()
+ len_jres = len(jres)
+ # print(f"[{payload['offset']} -> {payload['offset'] + (STEP_SIZE-1)}] Retrieved {len_jres} results...")
+ if jres == []:
+ break
+ elif "code" in jres:
+ print("ERROR: ")
+ pprint(jres)
+ break
+ else:
+ for r in jres:
+ this_date = get_date(r["report_id"], api_key)
+ r["fac_accepted_date"] = this_date
+ if this_date < before_acceptance:
+ aln_to_report_ids[aln].append(r["report_id"])
+ aln_to_count[aln] = aln_to_count.get(aln, 0) + 1
+ aln_dates[aln].append(this_date)
+ if r["is_direct"] == "Y":
+ aln_to_total[aln] = aln_to_total.get(
+ aln, 0) + r["amount_expended"]
+ if len_jres < STEP_SIZE:
+ break
+
+ # return (str(aln), aln_to_report_ids, aln_to_total, aln_to_count)
+ return Results(audit_year, str(aln), aln_to_report_ids[aln], aln_to_total[aln], aln_to_count[aln])
+
+
+def fac_weight_fun(reports, awards, dollars):
+ v = (0.485 * reports) + (0.485 * awards) + (0.03 * dollars)
+ return round(v, 3)
+
+
+class Results():
+ def __init__(self, audit_year, aln, report_ids, total_dollars, award_count):
+ self.audit_year = audit_year
+ self.aln = aln
+ self.report_ids = set(report_ids)
+ self.total_dollars = total_dollars
+ self.award_count = award_count
+
+ def __str__(self):
+ return f"{self.aln} rids: {len(self.report_ids)} $: {self.total_dollars} awards: {self.award_count}"
+
+ def __repr__(self):
+ return self.__str__()
+
+ def to_csv(self):
+ return f"{self.audit_year},{self.aln},{len(self.report_ids)},{self.award_count},{self.total_dollars}"
+
+
+class ResultSummary():
+ def __init__(self, agency_number):
+ self.agency_number = agency_number
+ self.results = defaultdict(list)
+ self.alns = defaultdict(list)
+ self.report_counts = defaultdict(list)
+ self.award_counts = defaultdict(list)
+ self.total_dollars = defaultdict(list)
+ self.pct_of_reports = defaultdict(list)
+ self.pct_of_awards = defaultdict(list)
+ self.pct_of_dollars = defaultdict(list)
+ self.fac_weights = defaultdict(list)
+
+ def add_result(self, audit_year, r):
+ self.results[audit_year].append(r)
+
+ def prep_report(self):
+ for ay, rs in self.results.items():
+ for r in rs:
+ self.alns[ay].append(r.aln)
+ self.report_counts[ay].append(len(r.report_ids))
+ self.award_counts[ay].append(r.award_count)
+ self.total_dollars[ay].append(int(r.total_dollars))
+ try:
+ self.pct_of_reports[ay] = list(
+ map(lambda n: round(n / sum(self.report_counts[ay])*100, 3), self.report_counts[ay]))
+ self.pct_of_awards[ay] = list(
+ map(lambda n: round(n / sum(self.award_counts[ay])*100, 3), self.award_counts[ay]))
+ self.pct_of_dollars[ay] = list(
+ map(lambda n: round(n / sum(self.total_dollars[ay])*100, 3), self.total_dollars[ay]))
+ self.fac_weights[ay] = list(map(fac_weight_fun,
+ self.pct_of_reports[ay],
+ self.pct_of_awards[ay],
+ self.pct_of_dollars[ay]))
+ except:
+ print("REPORT COUNTS", self.report_counts)
+ print("AWARD COUNTS", self.award_counts)
+ print("TOTAL DOLLARS", self.total_dollars)
+
+ def report_as_xlsx(self):
+ self.prep_report()
+ wb = Workbook()
+ ws = wb.create_sheet("Overview")
+ df = pd.DataFrame({
+ "note": [
+ "All values rounded to 3 places.",
+ "FAC weight is (0.485 * pct_rpt) + (0.485 * pct_awd) + (0.03 * pct_$)",
+ "FAC weight can be used for estimating opdiv contribution, if desired.",
+ ]
+ })
+ for r in dataframe_to_rows(df, index=True, header=True):
+ ws.append(r)
+
+ for ay, _ in self.results.items():
+ ws = wb.create_sheet(f"AY{ay}")
+ df = pd.DataFrame(
+ {
+ "aln": self.alns[ay],
+ "report_count": self.report_counts[ay],
+ "award_count": self.award_counts[ay],
+ "total_dollars": self.total_dollars[ay],
+ "pct_of_reports": self.pct_of_reports[ay],
+ "pct_of_awards": self.pct_of_awards[ay],
+ "pct_of_dollars": self.pct_of_dollars[ay],
+ "fac_weight": self.fac_weights[ay]
+ }
+ )
+ for r in dataframe_to_rows(df, index=True, header=True):
+ ws.append(r)
+ del wb['Sheet']
+ wb.save(f"agency-{self.agency_number}-distribution.xlsx")
+
+
+def get_alns_by_agency_number(audit_year, agency_number, api_key="NO_API_KEY"):
+ payload = {
+ "federal_agency_prefix": op("eq", agency_number),
+ "select": "federal_award_extension",
+ "audit_year": op("eq", audit_year),
+ }
+ url = f"{FAC_API_BASE}/federal_awards"
+ all_alns = set()
+
+ for start in range(0, MAX_RESULTS, STEP_SIZE):
+ payload = payload | {
+ "offset": start,
+ "api_key": api_key,
+ }
+ res = requests.get(url,
+ params=payload,
+ )
+ jres = res.json()
+ if jres == []:
+ break
+ elif "code" in jres:
+ print("ERROR: ")
+ pprint(jres)
+ break
+ else:
+ # Don't bother with another call if we had fewer than the max.
+ for r in jres:
+ all_alns.add(ALN(agency_number, r["federal_award_extension"]))
+
+ return all_alns
+
+# @click.command()
+# @click.argument('list_of_alns')
+# @click.option('--audit-years', default="2023", help='Audit year')
+# @click.option('--before-acceptance', default="2023-06-28", help="Acceptance date")
+# @click.option("--distinct-alns-for-agency", default=None, help="Each distinct aln under an agency number.")
+def sum_over_alns(list_of_alns, audit_years, before_acceptance, distinct_alns_for_agency, api_key):
+ RS = ResultSummary(distinct_alns_for_agency)
+
+ for audit_year in list(map(lambda y: int(y), audit_years.split(","))):
+ if distinct_alns_for_agency:
+ alns = get_alns_by_agency_number(
+ audit_year,
+ distinct_alns_for_agency,
+ api_key
+ )
+ else:
+ alns = load_aln_list(list_of_alns)
+ for ndx, aln in enumerate(sorted(alns, key=lambda a: f"{a.agency}.{a.program}")):
+ print(f"Calculating for {aln} [{ndx + 1} of {len(alns)}]")
+ result = calculate_for_aln(aln,
+ audit_year=audit_year,
+ before_acceptance=before_acceptance,
+ api_key=api_key,
+ )
+ print(result)
+ RS.add_result(audit_year, result)
+
+ RS.report_as_xlsx()
diff --git a/notebook-example/three/notebooks/sum_alns.ipynb b/notebook-example/three/notebooks/sum_alns.ipynb
new file mode 100644
index 0000000..b1b718f
--- /dev/null
+++ b/notebook-example/three/notebooks/sum_alns.ipynb
@@ -0,0 +1,93 @@
+{
+ "cells": [
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "# First, install packages required by the notebook\n",
+ "%pip install pyodide-http requests pandas openpyxl\n",
+ "# Import the FAC support library\n",
+ "import fac\n",
+ "# Import the code for this demo\n",
+ "from libraries import sum_over_alns as soa"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "# Does the API work\n",
+ "\n",
+ "The first thing we'll check is if the API works.\n",
+ "\n",
+ "If it does, we'll get back one record from the `general` API endpoint.\n",
+ "\n",
+ "If not, it will throw some kind of error.\n",
+ "\n",
+ "This means you should 1) obtain an API key, and 2) copy the key you receieve into the file `api_key.py`. "
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "payload = { \"api_key\": fac.FAC_API_KEY }\n",
+ "query = payload | { \"limit\": 1 }\n",
+ "r = fac.get(\"general\", params = query)\n",
+ "print(r.text)"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "# Calculate the dollars per ALN \n",
+ "\n",
+ "This demonstrates a calculation using the FAC API. \n",
+ "\n",
+ "Given a list of ALNs, it:\n",
+ "\n",
+ "1. Looks up all awards with those ALNs, and\n",
+ "2. Adds up the direct funding on those awards.\n",
+ "\n",
+ "In this example, we're using a list of ALNs from Treasury. To test it with another list, you could create a file called `my_agency.alns` in the `files` folder, and enter one ALN per line. Then, change the code below to use the file `my_agency.alns` instead of `treasury.alns`. You can also change the year and date to explore other points in history.\n",
+ "\n",
+ "Note this does take a while."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "soa.sum_over_alns(\"files/treasury.alns\", \"2023\", \"2024-06-19\", None, api_key=fac.FAC_API_KEY)"
+ ]
+ }
+ ],
+ "metadata": {
+ "kernelspec": {
+ "display_name": "Python 3",
+ "language": "python",
+ "name": "python3"
+ },
+ "language_info": {
+ "codemirror_mode": {
+ "name": "ipython",
+ "version": 3
+ },
+ "file_extension": ".py",
+ "mimetype": "text/x-python",
+ "name": "python",
+ "nbconvert_exporter": "python",
+ "pygments_lexer": "ipython3",
+ "version": "3.11.9"
+ }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 2
+}
diff --git a/notebook-example/three/notebooks/traffic_per_week.ipynb b/notebook-example/three/notebooks/traffic_per_week.ipynb
new file mode 100644
index 0000000..e69de29
diff --git a/notebook-example/three/notebooks/util/clear_local_storage.ipynb b/notebook-example/three/notebooks/util/clear_local_storage.ipynb
new file mode 100644
index 0000000..9eaea50
--- /dev/null
+++ b/notebook-example/three/notebooks/util/clear_local_storage.ipynb
@@ -0,0 +1,59 @@
+{
+ "cells": [
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "# Clear local storage\n",
+ "\n",
+ "Found [here](https://github.com/jupyterlite/jupyterlite/issues/407#issuecomment-1353088447)."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "from IPython.display import display, HTML\n",
+ "display(HTML(\"\"\"\n",
+ "\n",
+ "\n",
+ "\"\"\"))\n",
+ " "
+ ]
+ }
+ ],
+ "metadata": {
+ "language_info": {
+ "name": "python"
+ }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 2
+}
diff --git a/notebook-example/three/notebooks/xlsx/README.md b/notebook-example/three/notebooks/xlsx/README.md
new file mode 100644
index 0000000..17c4201
--- /dev/null
+++ b/notebook-example/three/notebooks/xlsx/README.md
@@ -0,0 +1 @@
+This is for output files from demonstration scripts.
diff --git a/notebook-example/three/overrides.json b/notebook-example/three/overrides.json
new file mode 100644
index 0000000..0bbb9d9
--- /dev/null
+++ b/notebook-example/three/overrides.json
@@ -0,0 +1,14 @@
+{
+ "@jupyterlab/notebook-extension:panel": {
+ "toolbar": [
+ {
+ "name": "download",
+ "label": "Download",
+ "args": {},
+ "command": "docmanager:download",
+ "icon": "ui-components:download",
+ "rank": 50
+ }
+ ]
+ }
+ }
\ No newline at end of file
diff --git a/notebook-example/three/requirements.txt b/notebook-example/three/requirements.txt
new file mode 100644
index 0000000..71432d5
--- /dev/null
+++ b/notebook-example/three/requirements.txt
@@ -0,0 +1,62 @@
+# jupyterlite-core
+# jupyterlite-pyodide-kernel
+# libarchive-c
+# jupyter_server
+# jupyterlab_server
+# jupyter-datatables
+# jupyterlab-filesystem-access
+# pyodide-http
+# requests
+
+# Core modules (mandatory)
+jupyterlite-core==0.3.0
+jupyterlab~=4.1.6
+notebook~=7.1.2
+
+
+# Python kernel (optional)
+jupyterlite-pyodide-kernel==0.3.2
+
+# JavaScript kernel (optional)
+jupyterlite-javascript-kernel==0.3.0
+
+# P5 kernel (optional)
+jupyterlite-p5-kernel==0.1.0
+
+# JupyterLab: Fasta file renderer (optional)
+jupyterlab-fasta>=3.3.0,<4
+# JupyterLab: Geojson file renderer (optional)
+jupyterlab-geojson>=3.4.0,<4
+# JupyterLab: guided tour (optional)
+# TODO: re-enable after https://github.com/jupyterlab-contrib/jupyterlab-tour/issues/82
+# jupyterlab-tour
+# JupyterLab: dark theme
+jupyterlab-night
+# JupyterLab: Miami nights theme (optional)
+jupyterlab_miami_nights
+
+# Python: ipywidget library for Jupyter notebooks (optional)
+ipywidgets>=8.1.1,<9
+# Python: ipyevents library for Jupyter notebooks (optional)
+ipyevents>=2.0.1
+# Python: interative Matplotlib library for Jupyter notebooks (optional)
+ipympl>=0.8.2
+# Python: ipycanvas library for Jupyter notebooks (optional)
+ipycanvas>=0.9.1
+# Python: ipyleaflet library for Jupyter notebooks (optional)
+ipyleaflet
+
+# Python: plotting libraries (optional)
+plotly>=5,<6
+bqplot
+
+# Language packs
+# https://github.com/jupyterlab/language-packs/tree/main/language-packs
+jupyterlab-language-pack-es-ES
+jupyterlab-language-pack-zh-CN
+jupyterlab-language-pack-vi-VN
+jupyterlab-language-pack-fr-FR
+
+openpyxl
+pysqlite3
+peewee