Skip to content

Commit

Permalink
Counting ALNs...
Browse files Browse the repository at this point in the history
  • Loading branch information
jadudm committed Jun 28, 2024
1 parent 90e79b7 commit fbe87fa
Show file tree
Hide file tree
Showing 4 changed files with 266 additions and 2 deletions.
2 changes: 2 additions & 0 deletions major-programs/.gitignore
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
*.json
.~lock*
Binary file added major-programs/aln_counts_by_year.xlsx
Binary file not shown.
258 changes: 258 additions & 0 deletions major-programs/major-programs.ipynb
Original file line number Diff line number Diff line change
@@ -0,0 +1,258 @@
{
"cells": [
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# Major programs\n",
"\n",
"To assist in understanding what major programs should be prioritized for inclusion or removal in the Compliance Supplement, it would be nice to see major program counts by ALN.\n",
"\n",
"In short:\n",
"\n",
"1. For each year, look at every federal award made.\n",
"2. For each award, get the ALN, and add one.\n"
]
},
{
"cell_type": "code",
"execution_count": 1,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Requirement already satisfied: openpyxl in /home/jadudm/git/ansible/venv/lib/python3.10/site-packages (3.1.4)\n",
"Requirement already satisfied: et-xmlfile in /home/jadudm/git/ansible/venv/lib/python3.10/site-packages (from openpyxl) (1.1.0)\n",
"Note: you may need to restart the kernel to use updated packages.\n",
"Requirement already satisfied: requests in /home/jadudm/git/ansible/venv/lib/python3.10/site-packages (2.32.3)\n",
"Requirement already satisfied: urllib3<3,>=1.21.1 in /home/jadudm/git/ansible/venv/lib/python3.10/site-packages (from requests) (2.2.2)\n",
"Requirement already satisfied: idna<4,>=2.5 in /home/jadudm/git/ansible/venv/lib/python3.10/site-packages (from requests) (3.7)\n",
"Requirement already satisfied: certifi>=2017.4.17 in /home/jadudm/git/ansible/venv/lib/python3.10/site-packages (from requests) (2024.6.2)\n",
"Requirement already satisfied: charset-normalizer<4,>=2 in /home/jadudm/git/ansible/venv/lib/python3.10/site-packages (from requests) (3.3.2)\n",
"Note: you may need to restart the kernel to use updated packages.\n"
]
}
],
"source": [
"%pip install openpyxl\n",
"%pip install requests\n",
"import json\n",
"import openpyxl\n",
"import os\n",
"import requests\n",
"\n",
"from collections import defaultdict\n"
]
},
{
"cell_type": "code",
"execution_count": 2,
"metadata": {},
"outputs": [],
"source": [
"# Load our config\n",
"# It has three keys: \n",
"# * YEARS, an array of years (2023, 2022, etc.)\n",
"# * FAC_API, the base URL for the analysis\n",
"# * FAC_API_KEY, the API key.\n",
"config = json.load(open(\"config.json\", \"r\"))"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## fetch_all\n",
"\n",
"A helper to fetch all of a result set.\n",
"\n",
"Counts queries."
]
},
{
"cell_type": "code",
"execution_count": 3,
"metadata": {},
"outputs": [],
"source": [
"query_counts = defaultdict(int)\n",
"\n",
"def fetch_all(base, endpoint, params):\n",
" fetching = True\n",
" results = []\n",
" offset = 0\n",
" inc = 20000\n",
" while fetching:\n",
" params = params | {\n",
" \"offset\": offset,\n",
" \"limit\": ((offset+inc)-1)\n",
" }\n",
" res = requests.get(f\"{base}/{endpoint}\",\n",
" params=params,\n",
" headers={\n",
" \"x-api-key\": config[\"FAC_API_KEY\"]\n",
" }\n",
" )\n",
" query_counts[endpoint] += 1\n",
" resj = res.json()\n",
" # print(f\"{offset} {len(resj)}\")\n",
" if not res or \"code\" in resj or len(resj) == 0:\n",
" fetching = False\n",
" break\n",
" else:\n",
" results += resj\n",
" offset += inc\n",
" return results\n"
]
},
{
"cell_type": "code",
"execution_count": 4,
"metadata": {},
"outputs": [],
"source": [
"# For each year, get all of the federal awards.\n",
"awards_by_year = dict()\n",
"for year in config[\"YEARS\"]:\n",
" awards = fetch_all(config[\"FAC_API\"], \"federal_awards\", \n",
" {\n",
" \"audit_year\": f\"eq.{year}\"\n",
" })\n",
" awards_by_year[year] = awards"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Do some counting\n",
"\n",
"Now that we grabbed everything, we can do some counting.\n",
"\n",
"We want to count the incidence of each ALN, and keep it separate by year."
]
},
{
"cell_type": "code",
"execution_count": 5,
"metadata": {},
"outputs": [],
"source": [
"# We'll use a dictionary of dictionaries\n",
"yearly_aln_counts = defaultdict(lambda: defaultdict(int))\n",
"\n",
"for year, awards in awards_by_year.items():\n",
" for award in awards:\n",
" ALN = award[\"federal_agency_prefix\"] + \".\" + award[\"federal_award_extension\"]\n",
" yearly_aln_counts[year][ALN] += 1"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Restructure the data\n",
"\n",
"The data is now structured incorrectly. That was an easy way to count, but not structured appropriately for output."
]
},
{
"cell_type": "code",
"execution_count": 6,
"metadata": {},
"outputs": [],
"source": [
"# I need ALN, then one value per year, as a row.\n",
"\n",
"def get_aln_by_year(aln, year):\n",
" return yearly_aln_counts[year].get(aln, 0)\n",
"\n",
"def get_all_alns():\n",
" all = set()\n",
" for _, aln_counts in yearly_aln_counts.items():\n",
" all.update(list(aln_counts.keys()))\n",
" return all\n",
"\n",
"unique_alns = sorted(list(get_all_alns()))"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Make a spreadsheet\n",
"\n",
"Now, we'll take the data and produce a spreadsheet."
]
},
{
"cell_type": "code",
"execution_count": 7,
"metadata": {},
"outputs": [],
"source": [
"# Create a workbook\n",
"wb = openpyxl.Workbook()\n",
"# Grab the default sheet\n",
"sheet = wb.active\n",
"\n",
"# Add the data.\n",
"# The header row should tell us what we're looking at\n",
"sheet.append([\"Prefix\", \"Extension\"] + config[\"YEARS\"])\n",
"\n",
"for aln in unique_alns:\n",
" pfix = aln.split(\".\")[0]\n",
" ext = aln.split(\".\")[1]\n",
" values = []\n",
" for year in config[\"YEARS\"]:\n",
" values.append(get_aln_by_year(aln, year))\n",
" args = [pfix, ext] + values\n",
" sheet.append(args)\n",
"\n",
"wb.save(\"aln_counts_by_year.xlsx\")"
]
},
{
"cell_type": "code",
"execution_count": 8,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Queries used in federal_awards: 177\n"
]
}
],
"source": [
"# Metadata\n",
"for year, count in query_counts.items():\n",
" print(f\"Queries used in {year}: {count}\")"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "venv",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.10.12"
}
},
"nbformat": 4,
"nbformat_minor": 2
}
8 changes: 6 additions & 2 deletions notebook-example/three/notebooks/libraries/findings_by_aln.py
Original file line number Diff line number Diff line change
Expand Up @@ -190,7 +190,6 @@ def findings(self, report_id=None):
dg.date_retrieved = today()
dg.findings_count = len(jres)
dg.save()
print()

def awards(self, report_id=None):
print("AWARDS: ", end="")
Expand Down Expand Up @@ -240,7 +239,6 @@ def awards(self, report_id=None):
.execute())
dg.awards_count = awards_count
dg.save()
print()

def _add_sheets(self, wb, iter, query):
# get_unique_agency_numbers()
Expand Down Expand Up @@ -371,13 +369,15 @@ def findings_by_aln(acceptance_date,
f0 = time.time()
fac.findings(report_id=report_id)
f1 = time.time()
print()

if omit_awards:
print("Skipping award generation")
else:
a0 = time.time()
fac.awards(report_id=report_id)
a1 = time.time()
print()
t1 = time.time()

try:
Expand All @@ -392,5 +392,9 @@ def findings_by_aln(acceptance_date,
time_findings=f1-f0,
time_awards=a1-a0,
)
print(f"Queries used: {get_query_count()}")
print(f"Time elapsed: {t1-t0}")
print(f"Findings search time: {f1-f0}")
print(f"Awards search time: {a1-a0}")
except:
print(f"{acceptance_date} NO FINDINGS, NO WORKBOOK")

0 comments on commit fbe87fa

Please sign in to comment.