Merge pull request #111 from the-scouts/reports-pt-3

the-scouts · Aug 23, 2021 · 7bd89be · 7bd89be
2 parents 727e3f5 + 97bdef7
commit 7bd89be
Show file tree

Hide file tree

Showing 3 changed files with 114 additions and 134 deletions.
diff --git a/compass/core/_scrapers/member_profile.py b/compass/core/_scrapers/member_profile.py
@@ -46,7 +46,7 @@
 
 if TYPE_CHECKING:
     from collections.abc import Collection
-    from collections.abc import Iterator
+    from collections.abc import Iterable
 
     from compass.core.util.client import Client
 
@@ -192,7 +192,7 @@ def get_personal_tab(client: Client, membership_number: int, /) -> ci.MemberDeta
 
     """
     tree = _get_member_profile_tab(client, membership_number, "Personal")
-    details: dict[str, Union[None, int, str, datetime.date, ci.AddressData, dict[str, str]]] = dict()
+    details: dict[str, Union[None, int, str, datetime.date, ci.AddressData, dict[str, str]]] = {}
 
     # ### Extractors
     # ## Core:
@@ -415,7 +415,7 @@ def _membership_duration(dates: Collection[tuple[datetime.date, datetime.date]])
     return round(membership_duration_days / 365.2425, 3)  # Leap year except thrice per 400 years.
 
 
-def _reduce_date_list(dates: Collection[tuple[datetime.date, datetime.date]]) -> Iterator[tuple[datetime.date, datetime.date]]:
+def _reduce_date_list(dates: Collection[tuple[datetime.date, datetime.date]]) -> Iterable[tuple[datetime.date, datetime.date]]:
     """Reduce list of start and end dates to disjoint ranges.
 
     Iterate through date pairs and get longest consecutive date ranges. Returns
@@ -599,7 +599,7 @@ def _process_role_data(role: html.HtmlElement) -> tuple[int, dict[str, Union[Non
     """Parses a personal learning plan from a LXML row element containing data."""
     child_nodes = list(role)
 
-    role_data: dict[str, Union[None, str, int, datetime.date]] = dict()
+    role_data: dict[str, Union[None, str, int, datetime.date]] = {}
     role_number = int(role.get("data-ng_mrn"))
     role_data["role_number"] = role_number
     role_data["role_title"] = child_nodes[0].text_content()
@@ -648,8 +648,8 @@ def _compile_ongoing_learning(training_plps: TYPES_TRAINING_PLPS, tree: html.Htm
 
     """
     # Handle GDPR (Get latest GDPR date)
-    training_ogl: TYPES_TRAINING_OGL = dict()
-    gdpr_generator: Iterator[datetime.date] = (
+    training_ogl: TYPES_TRAINING_OGL = {}
+    gdpr_generator = (
         module["validated_date"]
         for plp in training_plps.values()
         for module in plp

diff --git a/compass/core/_scrapers/reports.py b/compass/core/_scrapers/reports.py
@@ -1,9 +1,8 @@
 from __future__ import annotations
 
-from pathlib import Path
 import re
 import time
-from typing import Literal, TYPE_CHECKING
+from typing import cast, Literal, TYPE_CHECKING
 
 from lxml import html
 import requests
@@ -12,74 +11,80 @@
 from compass.core.logger import logger
 from compass.core.settings import Settings
 from compass.core.util import auth_header
-from compass.core.util import context_managers
 
 if TYPE_CHECKING:
     from compass.core.util.auth_header import TYPE_AUTH_IDS
     from compass.core.util.client import Client
 
 # TODO move to schema.reports if created
 # TODO remove location from start, to keep list small
-_report_types: dict[str, int] = {
-    # group reports
-    "Group Appointments Report": 59,
-    # district reports
-    "District Appointments Report": 50,
-    "District Member Directory Report": 51,
-    # "District Member Directory 18 To 25 Years": ,
-    "District Permits Report": 70,
-    "District Disclosure Report": 78,
-    "District Training Report": 79,
-    "District Awards Report": 94,
-    "District Disclosure Management Report": 102,
-    # county reports
-    "County/Area/Region Appointments Report": 48,
-    "County/Area/Region Member Directory Report": 49,
-    "County/Area/Region Member Directory 18 To 25 Years": 53,
-    "County/Area/Region Permits Report": 69,
-    "County/Area/Region Disclosure Report": 77,
-    "County/Area/Region Training Report": 80,
-    "County/Area/Region Awards Report": 95,
-    "County Disclosure Management Report": 101,
-    # region reports
-    "Region Member Directory": 37,
-    "Region Appointments Report": 52,
-    "Region Permit Report": 72,
-    "Region Disclosure Report": 76,
-    "Region Training Report": 84,
-    "Region Disclosure Management Report": 100,
+_report_ids_appointments: dict[ci.TYPES_UNIT_LEVELS, int] = {
+    "Group": 59,
+    "District": 50,
+    "County": 48,
+    "Region": 52,
+}
+_report_ids_member_directory: dict[ci.TYPES_UNIT_LEVELS, int] = {
+    "District": 51,
+    "County": 49,
+    "Region": 37,
+}
+_report_ids_18_25_member_directory: dict[ci.TYPES_UNIT_LEVELS, int] = {
+    "County": 53,
+}
+_report_ids_permits: dict[ci.TYPES_UNIT_LEVELS, int] = {
+    "District": 70,
+    "County": 69,
+    "Region": 72,
+}
+_report_ids_disclosure: dict[ci.TYPES_UNIT_LEVELS, int] = {
+    "District": 78,
+    "County": 77,
+    "Region": 76,
+}
+_report_ids_training: dict[ci.TYPES_UNIT_LEVELS, int] = {
+    "District": 79,
+    "County": 80,
+    "Region": 84,
+}
+_report_ids_awards: dict[ci.TYPES_UNIT_LEVELS, int] = {
+    "District": 94,
+    "County": 95,
+}
+_report_ids_disclosure_management: dict[ci.TYPES_UNIT_LEVELS, int] = {
+    "District": 102,
+    "County": 101,
+    "Region": 100,
 }
 TYPES_REPORTS = Literal[
-    # group
-    "Group Appointments Report",
-    # district
-    "District Appointments Report",
-    "District Member Directory Report",
-    "District Permits Report",
-    "District Disclosure Report",
-    "District Training Report",
-    "District Awards Report",
-    "District Disclosure Management Report",
-    # county
-    "County/Area/Region Appointments Report",
-    "County/Area/Region Member Directory Report",
-    "County/Area/Region Member Directory 18 To 25 Years",
-    "County/Area/Region Permits Report",
-    "County/Area/Region Disclosure Report",
-    "County/Area/Region Training Report",
-    "County/Area/Region Awards Report",
-    "County Disclosure Management Report",
-    # region
-    "Region Member Directory",
-    "Region Appointments Report",
-    "Region Permit Report",
-    "Region Disclosure Report",
-    "Region Training Report",
-    "Region Disclosure Management Report",
+    "Appointments Report",
+    "Member Directory Report",
+    "18-25 Member Directory Report",
+    "Permits Report",
+    "Disclosure Report",
+    "Training Report",
+    "Awards Report",
+    "Disclosure Management Report",
 ]
+_report_ids: dict[TYPES_REPORTS, dict[ci.TYPES_UNIT_LEVELS, int]] = {
+    "Appointments Report": _report_ids_appointments,
+    "Member Directory Report": _report_ids_member_directory,
+    "18-25 Member Directory Report": _report_ids_18_25_member_directory,
+    "Permits Report": _report_ids_permits,
+    "Disclosure Report": _report_ids_disclosure,
+    "Training Report": _report_ids_training,
+    "Awards Report": _report_ids_awards,
+    "Disclosure Management Report": _report_ids_disclosure_management,
+}
 
 
-def export_report(client: Client, auth_ids: TYPE_AUTH_IDS, report_type: TYPES_REPORTS, stream: bool = False) -> bytes:
+def export_report(
+    client: Client,
+    report_type: TYPES_REPORTS,
+    hierarchy_level: ci.TYPES_HIERARCHY_LEVELS,
+    auth_ids: TYPE_AUTH_IDS,
+    stream: bool = False,
+) -> str:
     """Exports report as CSV from Compass.
 
     See `Reports.get_report` for an overview of the export process
@@ -99,10 +104,14 @@ def export_report(client: Client, auth_ids: TYPE_AUTH_IDS, report_type: TYPES_RE
             reports a HTTP 5XX status code
 
     """
-    if report_type not in _report_types:
-        types = [*_report_types.keys()]
+    if report_type not in _report_ids:
+        types = [*_report_ids]
         raise ci.CompassReportError(f"{report_type} is not a valid report type. Valid report types are {types}") from None
-    report_number = _report_types[report_type]
+    report_level_map = _report_ids[report_type]
+    if hierarchy_level not in report_level_map:
+        raise ci.CompassReportError(f"Requested report does not exist for hierarchy level: {hierarchy_level}.")
+    hierarchy_level = cast(ci.TYPES_UNIT_LEVELS, hierarchy_level)
+    report_number = report_level_map[hierarchy_level]
 
     # Get token for report type & role running said report:
     run_report_url = _get_report_token(client, auth_ids, report_number)
@@ -114,13 +123,14 @@ def export_report(client: Client, auth_ids: TYPE_AUTH_IDS, report_type: TYPES_RE
     # Update form data & set location selection:
     _update_form_data(client, report_page, run_report_url, report_number)
 
-    # Export the report:
+    # Get report export URL:
     logger.info("Exporting report")
     export_url = _extract_report_export_url(report_page.decode("UTF-8"))
 
-    time_string = time.strftime("%Y-%m-%d %H-%M-%S")  # colons are illegal on windows
-    filename = f"Compass Export - {report_type} - {time_string}.csv"
-    csv_export = _download_report(client, export_url, streaming=stream, filename=filename)
+    # Download report to CSV:
+    start = time.time()
+    csv_export = _download_report(client, export_url, streaming=stream)
+    logger.debug(f"Downloading took {time.time() - start:.2f}s")
 
     # start = time.time()
     # TODO TRAINING REPORT ETC.
@@ -142,16 +152,15 @@ def export_report(client: Client, auth_ids: TYPE_AUTH_IDS, report_type: TYPES_RE
 
 
 def _get_report_token(client: Client, auth_ids: TYPE_AUTH_IDS, report_number: int) -> str:
-    params = {
-        "pReportNumber": str(report_number),
-        "pMemberRoleNumber": str(auth_ids[1]),  # auth IDs are membership number, role number, 'jk'
-    }
     logger.debug("Getting report token")
     response = auth_header.auth_header_get(
         auth_ids,
         client,
         f"{Settings.web_service_path}/ReportToken",
-        params=params,
+        params={
+            "pReportNumber": str(report_number),
+            "pMemberRoleNumber": str(auth_ids[1]),  # auth IDs are membership number, role number, 'jk'
+        },
     )
     _error_status(response)
 
@@ -174,26 +183,27 @@ def _update_form_data(client: Client, report_page: bytes, run_report: str, repor
     form_data = {el.name: el.value for el in tree.forms[0].inputs if el.get("type") not in {"checkbox", "image"}}
 
     # Appointments Reports
-    if report_number == 52:
+    if report_number in {48, 52}:  # County, Region
         form_data = _form_data_appointments(form_data, tree)
 
     # Compass does user-agent sniffing in reports!!! This does seem to be the
     # only place that *requires* a Mozilla/5 type UA.
     # Including the MicrosoftAjax pair lets us check errors quickly. In reality
     # we don't care about the output of this POST, just that it doesn't fail.
-    report = client.post(
+    updated_report_page = client.post(
         run_report,
         data=form_data,
         headers={"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64)", "X-MicrosoftAjax": "Delta=true"},
     )
 
     # Check error state
-    _error_status(report, msg="Updating report locations failed!")
-    if "compass.scouts.org.uk%2fError.aspx|" in report.text:
+    _error_status(updated_report_page, msg="Updating report locations failed!")
+    if "compass.scouts.org.uk%2fError.aspx|" in updated_report_page.text:
         raise ci.CompassReportError("Compass Error!")
 
 
 def _form_data_appointments(form_data: dict[str, str], tree: html.HtmlElement) -> dict[str, str | None]:
+    """Select all units/locations."""
     additional_form_data = {
         "ReportViewer1$ctl10": "ltr",
         "ReportViewer1$ctl11": "standards",
@@ -205,34 +215,15 @@ def _form_data_appointments(form_data: dict[str, str], tree: html.HtmlElement) -
         "__ASYNCPOST": "true",
     }  # TODO this may not be needed. Test.
 
-    # ReportViewer1$ctl04$ctl05$txtValue - County Label
-    # ReportViewer1$ctl04$ctl07$txtValue - District Label
-    # ReportViewer1$ctl04$ctl09$txtValue - Role Statuses
-    # ReportViewer1$ctl04$ctl15$txtValue - Columns Label
-
-    numbered_counties = _parse_drop_down_list(tree, "ReportViewer1_ctl04_ctl05_divDropDown")  # Counties
-    numbered_districts = _parse_drop_down_list(tree, "ReportViewer1_ctl04_ctl07_divDropDown")  # Districts
-    numbered_role_statuses = _parse_drop_down_list(tree, "ReportViewer1_ctl04_ctl09_divDropDown")  # Role Statuses
-    numbered_column_names = _parse_drop_down_list(tree, "ReportViewer1_ctl04_ctl15_divDropDown")  # Report Fields
-
-    # # Export regional roles only
-    # form_data["ReportViewer1$ctl04$ctl05$txtValue"] = "Regional Roles"
-    # form_data["ReportViewer1$ctl04$ctl05$divDropDown$ctl01$HiddenIndices"] = "0"
-
-    # Export all districts
-    form_data["ReportViewer1$ctl04$ctl05$txtValue"] = ", ".join(numbered_counties.values())
-    form_data["ReportViewer1$ctl04$ctl05$divDropDown$ctl01$HiddenIndices"] = ",".join(numbered_counties.keys())
-    form_data["ReportViewer1$ctl04$ctl07$txtValue"] = ", ".join(numbered_districts.values())
-    form_data["ReportViewer1$ctl04$ctl07$divDropDown$ctl01$HiddenIndices"] = ",".join(numbered_districts.keys())
-
-    # TODO this may not be needed. Test.
-    # update text values of role statuses and column names from default indices
-    form_data["ReportViewer1$ctl04$ctl09$txtValue"] = _get_defaults_labels(
-        form_data, "ReportViewer1$ctl04$ctl09$divDropDown$ctl01$HiddenIndices", numbered_role_statuses
-    )
-    form_data["ReportViewer1$ctl04$ctl15$txtValue"] = _get_defaults_labels(
-        form_data, "ReportViewer1$ctl04$ctl15$divDropDown$ctl01$HiddenIndices", numbered_column_names
-    )
+    # report level - 1 (e.g. county -> district)
+    numbered_levels_children = _parse_drop_down_list(tree, "ReportViewer1_ctl04_ctl05_divDropDown")
+    form_data["ReportViewer1$ctl04$ctl05$txtValue"] = ", ".join(numbered_levels_children.values())
+    form_data["ReportViewer1$ctl04$ctl05$divDropDown$ctl01$HiddenIndices"] = ",".join(numbered_levels_children.keys())
+
+    # report level - 2 (e.g. county -> group)
+    numbered_levels_grandchildren = _parse_drop_down_list(tree, "ReportViewer1_ctl04_ctl07_divDropDown")
+    form_data["ReportViewer1$ctl04$ctl07$txtValue"] = ", ".join(numbered_levels_grandchildren.values())
+    form_data["ReportViewer1$ctl04$ctl07$divDropDown$ctl01$HiddenIndices"] = ",".join(numbered_levels_grandchildren.keys())
 
     return form_data | additional_form_data
 
@@ -242,33 +233,21 @@ def _extract_report_export_url(report_page: str) -> str:
     cut = report_page[start:].removeprefix('ExportUrlBase":"')
     end = cut.index('"')
     full_url = cut[:end].encode().decode("unicode-escape")
-    return f"{full_url}CSV"
+    return f"{Settings.base_url}/{full_url}CSV"
 
 
-def _download_report(client: Client, url_path: str, streaming: bool, filename: str | None = None) -> bytes:
-    start = time.time()
-    url = f"{Settings.base_url}/{url_path}"
-
-    # actually do the download
-    if streaming:
-        csv_export = b""
-        with client.get(url, stream=True) as r:
-            _error_status(r)
-            for chunk in r.iter_content(chunk_size=None):  # Chunk size == 1MiB
-                csv_export += chunk
-    else:
-        csv_export = client.get(url).content
-
-    logger.debug(f"Exporting took {time.time() - start:.2f}s")
-
-    # maybe save to disk
-    if filename is not None:
-        logger.info("Saving report")
-        with context_managers.filesystem_guard("Unable to write report export"):
-            Path(filename).write_bytes(csv_export)
-        logger.info("Report Saved")
+def _download_report(client: Client, url: str, streaming: bool) -> str:
+    # standard download
+    if not streaming:
+        return client.get(url).content.decode("utf-8-sig")  # report is returned with Byte Order Mark
 
-    return csv_export
+    # streaming download
+    csv_export = b""
+    with client.get(url, stream=True) as r:
+        _error_status(r)
+        for chunk in r.iter_content(chunk_size=None):  # Chunk size == 1MiB
+            csv_export += chunk
+    return csv_export.decode("utf-8-sig")  # report is returned with Byte Order Mark
 
 
 def _error_status(response: requests.Response, /, msg: str = "Request to Compass failed!") -> None:

diff --git a/compass/core/reports.py b/compass/core/reports.py
@@ -10,8 +10,9 @@ def __init__(self, session: ci.Logon):
         """Constructor for Reports."""
         self.auth_ids = session.membership_number, session.role_number, session._jk
         self.client = session._client
+        self.hierarchy_level = session.hierarchy.level
 
-    def get_report(self, report_type: TYPES_REPORTS) -> bytes:
+    def get_report(self, report_type: TYPES_REPORTS) -> str:
         """Exports report as CSV from Compass.
 
         Exporting a report is of course surprisingly complicated. The process
@@ -53,4 +54,4 @@ def get_report(self, report_type: TYPES_REPORTS) -> bytes:
                 reports a HTTP 5XX status code
 
         """
-        return export_report(self.client, self.auth_ids, report_type, stream=False)
+        return export_report(self.client, report_type, self.hierarchy_level, self.auth_ids, stream=False)