-
Notifications
You must be signed in to change notification settings - Fork 2
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
Showing
4 changed files
with
266 additions
and
2 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,2 @@ | ||
*.json | ||
.~lock* |
Binary file not shown.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,258 @@ | ||
{ | ||
"cells": [ | ||
{ | ||
"cell_type": "markdown", | ||
"metadata": {}, | ||
"source": [ | ||
"# Major programs\n", | ||
"\n", | ||
"To assist in understanding what major programs should be prioritized for inclusion or removal in the Compliance Supplement, it would be nice to see major program counts by ALN.\n", | ||
"\n", | ||
"In short:\n", | ||
"\n", | ||
"1. For each year, look at every federal award made.\n", | ||
"2. For each award, get the ALN, and add one.\n" | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": 1, | ||
"metadata": {}, | ||
"outputs": [ | ||
{ | ||
"name": "stdout", | ||
"output_type": "stream", | ||
"text": [ | ||
"Requirement already satisfied: openpyxl in /home/jadudm/git/ansible/venv/lib/python3.10/site-packages (3.1.4)\n", | ||
"Requirement already satisfied: et-xmlfile in /home/jadudm/git/ansible/venv/lib/python3.10/site-packages (from openpyxl) (1.1.0)\n", | ||
"Note: you may need to restart the kernel to use updated packages.\n", | ||
"Requirement already satisfied: requests in /home/jadudm/git/ansible/venv/lib/python3.10/site-packages (2.32.3)\n", | ||
"Requirement already satisfied: urllib3<3,>=1.21.1 in /home/jadudm/git/ansible/venv/lib/python3.10/site-packages (from requests) (2.2.2)\n", | ||
"Requirement already satisfied: idna<4,>=2.5 in /home/jadudm/git/ansible/venv/lib/python3.10/site-packages (from requests) (3.7)\n", | ||
"Requirement already satisfied: certifi>=2017.4.17 in /home/jadudm/git/ansible/venv/lib/python3.10/site-packages (from requests) (2024.6.2)\n", | ||
"Requirement already satisfied: charset-normalizer<4,>=2 in /home/jadudm/git/ansible/venv/lib/python3.10/site-packages (from requests) (3.3.2)\n", | ||
"Note: you may need to restart the kernel to use updated packages.\n" | ||
] | ||
} | ||
], | ||
"source": [ | ||
"%pip install openpyxl\n", | ||
"%pip install requests\n", | ||
"import json\n", | ||
"import openpyxl\n", | ||
"import os\n", | ||
"import requests\n", | ||
"\n", | ||
"from collections import defaultdict\n" | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": 2, | ||
"metadata": {}, | ||
"outputs": [], | ||
"source": [ | ||
"# Load our config\n", | ||
"# It has three keys: \n", | ||
"# * YEARS, an array of years (2023, 2022, etc.)\n", | ||
"# * FAC_API, the base URL for the analysis\n", | ||
"# * FAC_API_KEY, the API key.\n", | ||
"config = json.load(open(\"config.json\", \"r\"))" | ||
] | ||
}, | ||
{ | ||
"cell_type": "markdown", | ||
"metadata": {}, | ||
"source": [ | ||
"## fetch_all\n", | ||
"\n", | ||
"A helper to fetch all of a result set.\n", | ||
"\n", | ||
"Counts queries." | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": 3, | ||
"metadata": {}, | ||
"outputs": [], | ||
"source": [ | ||
"query_counts = defaultdict(int)\n", | ||
"\n", | ||
"def fetch_all(base, endpoint, params):\n", | ||
" fetching = True\n", | ||
" results = []\n", | ||
" offset = 0\n", | ||
" inc = 20000\n", | ||
" while fetching:\n", | ||
" params = params | {\n", | ||
" \"offset\": offset,\n", | ||
" \"limit\": ((offset+inc)-1)\n", | ||
" }\n", | ||
" res = requests.get(f\"{base}/{endpoint}\",\n", | ||
" params=params,\n", | ||
" headers={\n", | ||
" \"x-api-key\": config[\"FAC_API_KEY\"]\n", | ||
" }\n", | ||
" )\n", | ||
" query_counts[endpoint] += 1\n", | ||
" resj = res.json()\n", | ||
" # print(f\"{offset} {len(resj)}\")\n", | ||
" if not res or \"code\" in resj or len(resj) == 0:\n", | ||
" fetching = False\n", | ||
" break\n", | ||
" else:\n", | ||
" results += resj\n", | ||
" offset += inc\n", | ||
" return results\n" | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": 4, | ||
"metadata": {}, | ||
"outputs": [], | ||
"source": [ | ||
"# For each year, get all of the federal awards.\n", | ||
"awards_by_year = dict()\n", | ||
"for year in config[\"YEARS\"]:\n", | ||
" awards = fetch_all(config[\"FAC_API\"], \"federal_awards\", \n", | ||
" {\n", | ||
" \"audit_year\": f\"eq.{year}\"\n", | ||
" })\n", | ||
" awards_by_year[year] = awards" | ||
] | ||
}, | ||
{ | ||
"cell_type": "markdown", | ||
"metadata": {}, | ||
"source": [ | ||
"## Do some counting\n", | ||
"\n", | ||
"Now that we grabbed everything, we can do some counting.\n", | ||
"\n", | ||
"We want to count the incidence of each ALN, and keep it separate by year." | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": 5, | ||
"metadata": {}, | ||
"outputs": [], | ||
"source": [ | ||
"# We'll use a dictionary of dictionaries\n", | ||
"yearly_aln_counts = defaultdict(lambda: defaultdict(int))\n", | ||
"\n", | ||
"for year, awards in awards_by_year.items():\n", | ||
" for award in awards:\n", | ||
" ALN = award[\"federal_agency_prefix\"] + \".\" + award[\"federal_award_extension\"]\n", | ||
" yearly_aln_counts[year][ALN] += 1" | ||
] | ||
}, | ||
{ | ||
"cell_type": "markdown", | ||
"metadata": {}, | ||
"source": [ | ||
"## Restructure the data\n", | ||
"\n", | ||
"The data is now structured incorrectly. That was an easy way to count, but not structured appropriately for output." | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": 6, | ||
"metadata": {}, | ||
"outputs": [], | ||
"source": [ | ||
"# I need ALN, then one value per year, as a row.\n", | ||
"\n", | ||
"def get_aln_by_year(aln, year):\n", | ||
" return yearly_aln_counts[year].get(aln, 0)\n", | ||
"\n", | ||
"def get_all_alns():\n", | ||
" all = set()\n", | ||
" for _, aln_counts in yearly_aln_counts.items():\n", | ||
" all.update(list(aln_counts.keys()))\n", | ||
" return all\n", | ||
"\n", | ||
"unique_alns = sorted(list(get_all_alns()))" | ||
] | ||
}, | ||
{ | ||
"cell_type": "markdown", | ||
"metadata": {}, | ||
"source": [ | ||
"## Make a spreadsheet\n", | ||
"\n", | ||
"Now, we'll take the data and produce a spreadsheet." | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": 7, | ||
"metadata": {}, | ||
"outputs": [], | ||
"source": [ | ||
"# Create a workbook\n", | ||
"wb = openpyxl.Workbook()\n", | ||
"# Grab the default sheet\n", | ||
"sheet = wb.active\n", | ||
"\n", | ||
"# Add the data.\n", | ||
"# The header row should tell us what we're looking at\n", | ||
"sheet.append([\"Prefix\", \"Extension\"] + config[\"YEARS\"])\n", | ||
"\n", | ||
"for aln in unique_alns:\n", | ||
" pfix = aln.split(\".\")[0]\n", | ||
" ext = aln.split(\".\")[1]\n", | ||
" values = []\n", | ||
" for year in config[\"YEARS\"]:\n", | ||
" values.append(get_aln_by_year(aln, year))\n", | ||
" args = [pfix, ext] + values\n", | ||
" sheet.append(args)\n", | ||
"\n", | ||
"wb.save(\"aln_counts_by_year.xlsx\")" | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": 8, | ||
"metadata": {}, | ||
"outputs": [ | ||
{ | ||
"name": "stdout", | ||
"output_type": "stream", | ||
"text": [ | ||
"Queries used in federal_awards: 177\n" | ||
] | ||
} | ||
], | ||
"source": [ | ||
"# Metadata\n", | ||
"for year, count in query_counts.items():\n", | ||
" print(f\"Queries used in {year}: {count}\")" | ||
] | ||
} | ||
], | ||
"metadata": { | ||
"kernelspec": { | ||
"display_name": "venv", | ||
"language": "python", | ||
"name": "python3" | ||
}, | ||
"language_info": { | ||
"codemirror_mode": { | ||
"name": "ipython", | ||
"version": 3 | ||
}, | ||
"file_extension": ".py", | ||
"mimetype": "text/x-python", | ||
"name": "python", | ||
"nbconvert_exporter": "python", | ||
"pygments_lexer": "ipython3", | ||
"version": "3.10.12" | ||
} | ||
}, | ||
"nbformat": 4, | ||
"nbformat_minor": 2 | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters