Counting ALNs...

GSA-TTS · Jun 28, 2024 · fbe87fa · fbe87fa
1 parent 90e79b7
commit fbe87fa
Show file tree

Hide file tree

Showing 4 changed files with 266 additions and 2 deletions.
diff --git a/major-programs/.gitignore b/major-programs/.gitignore
@@ -0,0 +1,2 @@
+*.json
+.~lock*
diff --git a/major-programs/aln_counts_by_year.xlsx b/major-programs/aln_counts_by_year.xlsx
diff --git a/major-programs/major-programs.ipynb b/major-programs/major-programs.ipynb
@@ -0,0 +1,258 @@
+{
+ "cells": [
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "# Major programs\n",
+    "\n",
+    "To assist in understanding what major programs should be prioritized for inclusion or removal in the Compliance Supplement, it would be nice to see major program counts by ALN.\n",
+    "\n",
+    "In short:\n",
+    "\n",
+    "1. For each year, look at every federal award made.\n",
+    "2. For each award, get the ALN, and add one.\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 1,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Requirement already satisfied: openpyxl in /home/jadudm/git/ansible/venv/lib/python3.10/site-packages (3.1.4)\n",
+      "Requirement already satisfied: et-xmlfile in /home/jadudm/git/ansible/venv/lib/python3.10/site-packages (from openpyxl) (1.1.0)\n",
+      "Note: you may need to restart the kernel to use updated packages.\n",
+      "Requirement already satisfied: requests in /home/jadudm/git/ansible/venv/lib/python3.10/site-packages (2.32.3)\n",
+      "Requirement already satisfied: urllib3<3,>=1.21.1 in /home/jadudm/git/ansible/venv/lib/python3.10/site-packages (from requests) (2.2.2)\n",
+      "Requirement already satisfied: idna<4,>=2.5 in /home/jadudm/git/ansible/venv/lib/python3.10/site-packages (from requests) (3.7)\n",
+      "Requirement already satisfied: certifi>=2017.4.17 in /home/jadudm/git/ansible/venv/lib/python3.10/site-packages (from requests) (2024.6.2)\n",
+      "Requirement already satisfied: charset-normalizer<4,>=2 in /home/jadudm/git/ansible/venv/lib/python3.10/site-packages (from requests) (3.3.2)\n",
+      "Note: you may need to restart the kernel to use updated packages.\n"
+     ]
+    }
+   ],
+   "source": [
+    "%pip install openpyxl\n",
+    "%pip install requests\n",
+    "import json\n",
+    "import openpyxl\n",
+    "import os\n",
+    "import requests\n",
+    "\n",
+    "from collections import defaultdict\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 2,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Load our config\n",
+    "# It has three keys: \n",
+    "#  * YEARS, an array of years (2023, 2022, etc.)\n",
+    "#  * FAC_API, the base URL for the analysis\n",
+    "#  * FAC_API_KEY, the API key.\n",
+    "config = json.load(open(\"config.json\", \"r\"))"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## fetch_all\n",
+    "\n",
+    "A helper to fetch all of a result set.\n",
+    "\n",
+    "Counts queries."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 3,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "query_counts = defaultdict(int)\n",
+    "\n",
+    "def fetch_all(base, endpoint, params):\n",
+    "    fetching = True\n",
+    "    results = []\n",
+    "    offset = 0\n",
+    "    inc = 20000\n",
+    "    while fetching:\n",
+    "        params = params | {\n",
+    "            \"offset\": offset,\n",
+    "            \"limit\": ((offset+inc)-1)\n",
+    "        }\n",
+    "        res = requests.get(f\"{base}/{endpoint}\",\n",
+    "                           params=params,\n",
+    "                           headers={\n",
+    "                               \"x-api-key\": config[\"FAC_API_KEY\"]\n",
+    "                           }\n",
+    "                           )\n",
+    "        query_counts[endpoint] += 1\n",
+    "        resj = res.json()\n",
+    "        # print(f\"{offset} {len(resj)}\")\n",
+    "        if not res or \"code\" in resj or len(resj) == 0:\n",
+    "            fetching = False\n",
+    "            break\n",
+    "        else:\n",
+    "            results += resj\n",
+    "            offset += inc\n",
+    "    return results\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 4,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# For each year, get all of the federal awards.\n",
+    "awards_by_year = dict()\n",
+    "for year in config[\"YEARS\"]:\n",
+    "    awards = fetch_all(config[\"FAC_API\"], \"federal_awards\", \n",
+    "                       {\n",
+    "                           \"audit_year\": f\"eq.{year}\"\n",
+    "                        })\n",
+    "    awards_by_year[year] = awards"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Do some counting\n",
+    "\n",
+    "Now that we grabbed everything, we can do some counting.\n",
+    "\n",
+    "We want to count the incidence of each ALN, and keep it separate by year."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 5,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# We'll use a dictionary of dictionaries\n",
+    "yearly_aln_counts = defaultdict(lambda: defaultdict(int))\n",
+    "\n",
+    "for year, awards in awards_by_year.items():\n",
+    "    for award in awards:\n",
+    "        ALN = award[\"federal_agency_prefix\"] + \".\" + award[\"federal_award_extension\"]\n",
+    "        yearly_aln_counts[year][ALN] += 1"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Restructure the data\n",
+    "\n",
+    "The data is now structured incorrectly. That was an easy way to count, but not structured appropriately for output."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 6,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# I need ALN, then one value per year, as a row.\n",
+    "\n",
+    "def get_aln_by_year(aln, year):\n",
+    "    return yearly_aln_counts[year].get(aln, 0)\n",
+    "\n",
+    "def get_all_alns():\n",
+    "    all = set()\n",
+    "    for _, aln_counts in yearly_aln_counts.items():\n",
+    "        all.update(list(aln_counts.keys()))\n",
+    "    return all\n",
+    "\n",
+    "unique_alns = sorted(list(get_all_alns()))"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Make a spreadsheet\n",
+    "\n",
+    "Now, we'll take the data and produce a spreadsheet."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 7,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Create a workbook\n",
+    "wb = openpyxl.Workbook()\n",
+    "# Grab the default sheet\n",
+    "sheet = wb.active\n",
+    "\n",
+    "# Add the data.\n",
+    "# The header row should tell us what we're looking at\n",
+    "sheet.append([\"Prefix\", \"Extension\"] + config[\"YEARS\"])\n",
+    "\n",
+    "for aln in unique_alns:\n",
+    "    pfix = aln.split(\".\")[0]\n",
+    "    ext = aln.split(\".\")[1]\n",
+    "    values = []\n",
+    "    for year in config[\"YEARS\"]:\n",
+    "        values.append(get_aln_by_year(aln, year))\n",
+    "    args = [pfix, ext] + values\n",
+    "    sheet.append(args)\n",
+    "\n",
+    "wb.save(\"aln_counts_by_year.xlsx\")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 8,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Queries used in federal_awards: 177\n"
+     ]
+    }
+   ],
+   "source": [
+    "# Metadata\n",
+    "for year, count in query_counts.items():\n",
+    "    print(f\"Queries used in {year}: {count}\")"
+   ]
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "venv",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.10.12"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 2
+}
diff --git a/notebook-example/three/notebooks/libraries/findings_by_aln.py b/notebook-example/three/notebooks/libraries/findings_by_aln.py
@@ -190,7 +190,6 @@ def findings(self, report_id=None):
             dg.date_retrieved = today()
             dg.findings_count = len(jres)
             dg.save()
-            print()
 
     def awards(self, report_id=None):
         print("AWARDS: ", end="")
@@ -240,7 +239,6 @@ def awards(self, report_id=None):
                      .execute())
         dg.awards_count = awards_count
         dg.save()
-        print()
 
     def _add_sheets(self, wb, iter, query):
         # get_unique_agency_numbers()
@@ -371,13 +369,15 @@ def findings_by_aln(acceptance_date,
         f0 = time.time()
         fac.findings(report_id=report_id)
         f1 = time.time()
+        print()
 
     if omit_awards:
         print("Skipping award generation")
     else:
         a0 = time.time()
         fac.awards(report_id=report_id)
         a1 = time.time()
+        print()
     t1 = time.time()
 
     try:
@@ -392,5 +392,9 @@ def findings_by_aln(acceptance_date,
             time_findings=f1-f0,
             time_awards=a1-a0,
         )
+        print(f"Queries used: {get_query_count()}")
+        print(f"Time elapsed: {t1-t0}")
+        print(f"Findings search time: {f1-f0}")
+        print(f"Awards search time: {a1-a0}")
     except:
         print(f"{acceptance_date} NO FINDINGS, NO WORKBOOK")