diff --git a/major-programs/.gitignore b/major-programs/.gitignore new file mode 100644 index 0000000..01b1675 --- /dev/null +++ b/major-programs/.gitignore @@ -0,0 +1,2 @@ +*.json +.~lock* \ No newline at end of file diff --git a/major-programs/aln_counts_by_year.xlsx b/major-programs/aln_counts_by_year.xlsx new file mode 100644 index 0000000..06a963e Binary files /dev/null and b/major-programs/aln_counts_by_year.xlsx differ diff --git a/major-programs/major-programs.ipynb b/major-programs/major-programs.ipynb new file mode 100644 index 0000000..286eed6 --- /dev/null +++ b/major-programs/major-programs.ipynb @@ -0,0 +1,258 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Major programs\n", + "\n", + "To assist in understanding what major programs should be prioritized for inclusion or removal in the Compliance Supplement, it would be nice to see major program counts by ALN.\n", + "\n", + "In short:\n", + "\n", + "1. For each year, look at every federal award made.\n", + "2. For each award, get the ALN, and add one.\n" + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Requirement already satisfied: openpyxl in /home/jadudm/git/ansible/venv/lib/python3.10/site-packages (3.1.4)\n", + "Requirement already satisfied: et-xmlfile in /home/jadudm/git/ansible/venv/lib/python3.10/site-packages (from openpyxl) (1.1.0)\n", + "Note: you may need to restart the kernel to use updated packages.\n", + "Requirement already satisfied: requests in /home/jadudm/git/ansible/venv/lib/python3.10/site-packages (2.32.3)\n", + "Requirement already satisfied: urllib3<3,>=1.21.1 in /home/jadudm/git/ansible/venv/lib/python3.10/site-packages (from requests) (2.2.2)\n", + "Requirement already satisfied: idna<4,>=2.5 in /home/jadudm/git/ansible/venv/lib/python3.10/site-packages (from requests) (3.7)\n", + "Requirement already satisfied: certifi>=2017.4.17 in /home/jadudm/git/ansible/venv/lib/python3.10/site-packages (from requests) (2024.6.2)\n", + "Requirement already satisfied: charset-normalizer<4,>=2 in /home/jadudm/git/ansible/venv/lib/python3.10/site-packages (from requests) (3.3.2)\n", + "Note: you may need to restart the kernel to use updated packages.\n" + ] + } + ], + "source": [ + "%pip install openpyxl\n", + "%pip install requests\n", + "import json\n", + "import openpyxl\n", + "import os\n", + "import requests\n", + "\n", + "from collections import defaultdict\n" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": {}, + "outputs": [], + "source": [ + "# Load our config\n", + "# It has three keys: \n", + "# * YEARS, an array of years (2023, 2022, etc.)\n", + "# * FAC_API, the base URL for the analysis\n", + "# * FAC_API_KEY, the API key.\n", + "config = json.load(open(\"config.json\", \"r\"))" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## fetch_all\n", + "\n", + "A helper to fetch all of a result set.\n", + "\n", + "Counts queries." + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": {}, + "outputs": [], + "source": [ + "query_counts = defaultdict(int)\n", + "\n", + "def fetch_all(base, endpoint, params):\n", + " fetching = True\n", + " results = []\n", + " offset = 0\n", + " inc = 20000\n", + " while fetching:\n", + " params = params | {\n", + " \"offset\": offset,\n", + " \"limit\": ((offset+inc)-1)\n", + " }\n", + " res = requests.get(f\"{base}/{endpoint}\",\n", + " params=params,\n", + " headers={\n", + " \"x-api-key\": config[\"FAC_API_KEY\"]\n", + " }\n", + " )\n", + " query_counts[endpoint] += 1\n", + " resj = res.json()\n", + " # print(f\"{offset} {len(resj)}\")\n", + " if not res or \"code\" in resj or len(resj) == 0:\n", + " fetching = False\n", + " break\n", + " else:\n", + " results += resj\n", + " offset += inc\n", + " return results\n" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": {}, + "outputs": [], + "source": [ + "# For each year, get all of the federal awards.\n", + "awards_by_year = dict()\n", + "for year in config[\"YEARS\"]:\n", + " awards = fetch_all(config[\"FAC_API\"], \"federal_awards\", \n", + " {\n", + " \"audit_year\": f\"eq.{year}\"\n", + " })\n", + " awards_by_year[year] = awards" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Do some counting\n", + "\n", + "Now that we grabbed everything, we can do some counting.\n", + "\n", + "We want to count the incidence of each ALN, and keep it separate by year." + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "metadata": {}, + "outputs": [], + "source": [ + "# We'll use a dictionary of dictionaries\n", + "yearly_aln_counts = defaultdict(lambda: defaultdict(int))\n", + "\n", + "for year, awards in awards_by_year.items():\n", + " for award in awards:\n", + " ALN = award[\"federal_agency_prefix\"] + \".\" + award[\"federal_award_extension\"]\n", + " yearly_aln_counts[year][ALN] += 1" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Restructure the data\n", + "\n", + "The data is now structured incorrectly. That was an easy way to count, but not structured appropriately for output." + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "metadata": {}, + "outputs": [], + "source": [ + "# I need ALN, then one value per year, as a row.\n", + "\n", + "def get_aln_by_year(aln, year):\n", + " return yearly_aln_counts[year].get(aln, 0)\n", + "\n", + "def get_all_alns():\n", + " all = set()\n", + " for _, aln_counts in yearly_aln_counts.items():\n", + " all.update(list(aln_counts.keys()))\n", + " return all\n", + "\n", + "unique_alns = sorted(list(get_all_alns()))" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Make a spreadsheet\n", + "\n", + "Now, we'll take the data and produce a spreadsheet." + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "metadata": {}, + "outputs": [], + "source": [ + "# Create a workbook\n", + "wb = openpyxl.Workbook()\n", + "# Grab the default sheet\n", + "sheet = wb.active\n", + "\n", + "# Add the data.\n", + "# The header row should tell us what we're looking at\n", + "sheet.append([\"Prefix\", \"Extension\"] + config[\"YEARS\"])\n", + "\n", + "for aln in unique_alns:\n", + " pfix = aln.split(\".\")[0]\n", + " ext = aln.split(\".\")[1]\n", + " values = []\n", + " for year in config[\"YEARS\"]:\n", + " values.append(get_aln_by_year(aln, year))\n", + " args = [pfix, ext] + values\n", + " sheet.append(args)\n", + "\n", + "wb.save(\"aln_counts_by_year.xlsx\")" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Queries used in federal_awards: 177\n" + ] + } + ], + "source": [ + "# Metadata\n", + "for year, count in query_counts.items():\n", + " print(f\"Queries used in {year}: {count}\")" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "venv", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.10.12" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} diff --git a/notebook-example/three/notebooks/libraries/findings_by_aln.py b/notebook-example/three/notebooks/libraries/findings_by_aln.py index aa44334..b59f404 100644 --- a/notebook-example/three/notebooks/libraries/findings_by_aln.py +++ b/notebook-example/three/notebooks/libraries/findings_by_aln.py @@ -190,7 +190,6 @@ def findings(self, report_id=None): dg.date_retrieved = today() dg.findings_count = len(jres) dg.save() - print() def awards(self, report_id=None): print("AWARDS: ", end="") @@ -240,7 +239,6 @@ def awards(self, report_id=None): .execute()) dg.awards_count = awards_count dg.save() - print() def _add_sheets(self, wb, iter, query): # get_unique_agency_numbers() @@ -371,6 +369,7 @@ def findings_by_aln(acceptance_date, f0 = time.time() fac.findings(report_id=report_id) f1 = time.time() + print() if omit_awards: print("Skipping award generation") @@ -378,6 +377,7 @@ def findings_by_aln(acceptance_date, a0 = time.time() fac.awards(report_id=report_id) a1 = time.time() + print() t1 = time.time() try: @@ -392,5 +392,9 @@ def findings_by_aln(acceptance_date, time_findings=f1-f0, time_awards=a1-a0, ) + print(f"Queries used: {get_query_count()}") + print(f"Time elapsed: {t1-t0}") + print(f"Findings search time: {f1-f0}") + print(f"Awards search time: {a1-a0}") except: print(f"{acceptance_date} NO FINDINGS, NO WORKBOOK")