From 510ac094a1f3fe435b087e07c16cae6c2c5036d7 Mon Sep 17 00:00:00 2001 From: Gianfranco Rossi Date: Tue, 17 Dec 2024 11:11:29 -0500 Subject: [PATCH] fix(okla): skip row with no docket number Solves #1275 --- juriscraper/opinions/united_states/state/okla.py | 16 +++++++++------- 1 file changed, 9 insertions(+), 7 deletions(-) diff --git a/juriscraper/opinions/united_states/state/okla.py b/juriscraper/opinions/united_states/state/okla.py index 609acb81b..18f98b9c5 100644 --- a/juriscraper/opinions/united_states/state/okla.py +++ b/juriscraper/opinions/united_states/state/okla.py @@ -9,6 +9,7 @@ from lxml import html +from juriscraper.AbstractSite import logger from juriscraper.lib.html_utils import strip_bad_html_tags_insecure from juriscraper.OpinionSiteLinear import OpinionSiteLinear @@ -25,17 +26,18 @@ def _process_html(self): for row in self.html.xpath(".//li[@class='decision']"): name, citation = row.xpath(".//a/text()") url = row.xpath(".//a/@href")[0] - date_filed_raw = row.xpath(".//span[@class='decidedDate']/text()")[ - 0 - ].strip() - docket_number_raw = row.xpath( - ".//span[@class='caseNumber']/text()" - )[0].strip() + date_filed_raw = row.xpath(".//span[@class='decidedDate']/text()") summary = row.xpath(".//p[@class='summaryParagraph']/text()")[0] + docket = row.xpath(".//span[@class='caseNumber']/text()") + if not docket: + logger.debug("Skipping row without docket number") + continue + docket_number_raw = docket[0].strip() + self.cases.append( { - "date": date_filed_raw.split()[1], + "date": date_filed_raw[0].strip().split()[1], "name": name, "docket": docket_number_raw.split()[1], "citation": citation,