Skip to content

Commit

Permalink
feat(acms parser): Adds a parser class to handle attachment pages.
Browse files Browse the repository at this point in the history
  • Loading branch information
ERosendo committed Nov 20, 2023
1 parent 6ffc533 commit b92755d
Show file tree
Hide file tree
Showing 2 changed files with 90 additions and 0 deletions.
2 changes: 2 additions & 0 deletions juriscraper/pacer/__init__.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
from .acms_attachment_page import ACMSAttachmentPage
from .acms_docket import ACMSDocketReport
from .appellate_attachment_page import AppellateAttachmentPage
from .appellate_docket import AppellateDocketReport
Expand All @@ -19,6 +20,7 @@
from .rss_feeds import PacerRssFeed

__all__ = [
ACMSAttachmentPage,
ACMSDocketReport,
AppellateDocketReport,
AttachmentPage,
Expand Down
88 changes: 88 additions & 0 deletions juriscraper/pacer/acms_attachment_page.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,88 @@
import json
import pprint
import sys
from typing import Dict

from ..lib.log_tools import make_default_logger
from .reports import BaseReport

logger = make_default_logger()


class ACMSAttachmentPage(BaseReport):
"""Parse ACMS attachment pages' JSON."""

def __init__(self, court_id, pacer_session=None):
super().__init__(court_id, pacer_session)

def _parse_text(self, text):
"""Store the ACMS JSON
This does not, in fact, actually *parse* the data, it
stores it for subsequent parsing, which happens in
data().
:param text: A unicode object
:return: None
"""
self._acms_json = json.loads(text)

@property
def data(self) -> Dict:
"""Extract relevant information from the JSON payload
provided by the extension.
:return: a dict containing the following fields:
- pacer_doc_id: The id of the main document.
- pacer_case_id: The pacer case id for this case.
- entry_number - The value of the row we came from.
- attachments: A list of attached items with the following fields:
- attachment_number: The attachment number.
- description: A description of the item.
- page_count: The number of pages for the attachment.
- pacer_doc_id: The pacer doc id for the attachment (a str).
- acms_document_guid: The GUID of the document in ACMS.
"""
case_details = self._acms_json["caseDetails"]
docket_entry = self._acms_json["docketEntry"]
result = {
"pacer_doc_id": docket_entry["docketEntryId"],
"pacer_case_id": case_details["caseId"],
"entry_number": docket_entry["entryNumber"],
"attachments": [],
}

for row in self._acms_json["docketEntryDocuments"]:
result["attachments"].append(
{
"attachment_number": int(row["documentNumber"]),
"description": row["name"],
"page_count": row["billablePages"],
"pacer_doc_id": docket_entry["docketEntryId"],
"acms_document_guid": row["docketDocumentDetailsId"],
}
)

return result


def main():
if len(sys.argv) != 2:
print(
"Usage: python -m juriscraper.pacer.acms_attachment_page filepath"
)
print("Please provide a path to a file to parse.")
sys.exit(1)
# Court ID is only needed for querying.
report = ACMSAttachmentPage("cand")
filepath = sys.argv[1]
print(f"Parsing JSON file at {filepath}")
with open(filepath) as f:
text = f.read()
report._parse_text(text)
pprint.pprint(report.data, indent=2)


if __name__ == "__main__":
main()

0 comments on commit b92755d

Please sign in to comment.