diff --git a/juriscraper/OpinionSite.py b/juriscraper/OpinionSite.py index 975dcbc01..4ad70adbc 100644 --- a/juriscraper/OpinionSite.py +++ b/juriscraper/OpinionSite.py @@ -138,7 +138,7 @@ def _get_other_dates(self): def _get_attorneys(self): return None - def extract_from_text(self, scraped_text): + def extract_from_text(self, scraped_text: str) -> dict: """Pass scraped text into function and return data as a dictionary :param opinion_text: Text of scraped content diff --git a/juriscraper/opinions/united_states/state/nd.py b/juriscraper/opinions/united_states/state/nd.py index 539f17954..f06e600d7 100644 --- a/juriscraper/opinions/united_states/state/nd.py +++ b/juriscraper/opinions/united_states/state/nd.py @@ -115,12 +115,11 @@ def extract_from_text(self, scraped_text: str) -> dict: of models field - value pairs """ metadata = {} - regex = r"(?P20\d{2})\s(?PND)\s(?P\d+)" + regex = r"20\d{2}\sND\s\d+" citation_match = re.search(regex, scraped_text[:1000]) if citation_match: - # type 8 is a neutral citation in Courtlistener - metadata["Citation"] = {**citation_match.groupdict(), "type": 8} + metadata["Citation"] = citation_match.group(0) # Most times, paragraphs are enumerated. The data we are interested # in is in a few lines before the first paragraph diff --git a/juriscraper/opinions/united_states/state/nytrial.py b/juriscraper/opinions/united_states/state/nytrial.py index de5141d5b..8810c7151 100644 --- a/juriscraper/opinions/united_states/state/nytrial.py +++ b/juriscraper/opinions/united_states/state/nytrial.py @@ -129,8 +129,8 @@ def extract_from_text(self, scraped_text: str) -> Dict[str, Any]: pattern = r"
\s?(.*)\r?\n|Docket Number:\s?(.+)" docket_number = self.match(scraped_text, pattern) - pattern = r"\[(?P\d+) (?PMisc 3d) (?P.+)\]" - cite_match = re.search(pattern, scraped_text[:2000]) + regex_citation = r"(?<=\[)\d+ Misc 3d .+(?=\])" + cite_match = re.search(regex_citation, scraped_text[:2000]) # Only for .htm links full_case = None @@ -150,8 +150,7 @@ def extract_from_text(self, scraped_text: str) -> Dict[str, Any]: "author_str": normalize_judge_string(judge)[0] } if cite_match: - metadata["Citation"] = cite_match.groupdict("") - metadata["Citation"]["type"] = 2 # 'State' type in courtlistener + metadata["Citation"] = cite_match.group(0) if full_case: full_case = harmonize(full_case) metadata["Docket"]["case_name_full"] = full_case diff --git a/juriscraper/opinions/united_states/state/pasuperct.py b/juriscraper/opinions/united_states/state/pasuperct.py index f8fbd4d45..4f4138854 100644 --- a/juriscraper/opinions/united_states/state/pasuperct.py +++ b/juriscraper/opinions/united_states/state/pasuperct.py @@ -66,12 +66,8 @@ def extract_from_text(self, scraped_text: str) -> Dict: Not all scraped opinions have them """ - neutral_citation_regex = ( - r"(?P\d{4}) (?PPA Super) (?P\d+)" - ) + neutral_citation_regex = r"\d{4} PA Super \d+" if cite_match := re.search(neutral_citation_regex, scraped_text[:200]): - cite_data = cite_match.groupdict() - cite_data["type"] = 8 # Neutral citation - return {"Citation": cite_data} + return {"Citation": cite_match.group(0)} return {} diff --git a/juriscraper/opinions/united_states/state/vt.py b/juriscraper/opinions/united_states/state/vt.py index 0bed1239b..b2662ac93 100644 --- a/juriscraper/opinions/united_states/state/vt.py +++ b/juriscraper/opinions/united_states/state/vt.py @@ -115,11 +115,8 @@ def set_url( self.url = f"{self.base_url}?{urlencode(params)}" def extract_from_text(self, scraped_text: str): - match = re.search( - r"(?P\d{4}) (?PVT) (?P\d+)", - scraped_text[:1000], - ) + match = re.search(r"\d{4} VT \d+", scraped_text[:1000]) if match: - return {"Citation": {"type": 8, **match.groupdict()}} + return {"Citation": match.group(0)} return {} diff --git a/juriscraper/opinions/united_states/state/wis.py b/juriscraper/opinions/united_states/state/wis.py index 04765747b..b47b1a17e 100644 --- a/juriscraper/opinions/united_states/state/wis.py +++ b/juriscraper/opinions/united_states/state/wis.py @@ -17,9 +17,7 @@ def __init__(self, *args, **kwargs): self.base_url = "https://www.wicourts.gov/supreme/scopin.jsp" self.status = "Published" self.set_url() - self.cite_regex = ( - r"(?P20\d{2})\s(?PWI)\s(?P\d+)" - ) + self.cite_regex = r"20\d{2}\sWI\s\d+" self.make_backscrape_iterable(kwargs) def set_url( @@ -73,10 +71,9 @@ def extract_from_text(self, scraped_text: str) -> dict: :return: date filed """ first_line = scraped_text[:100].splitlines()[0] - match = re.search(self.cite_regex, first_line) + if match := re.search(self.cite_regex, first_line): + return {"Citation": match.group(0)} - if match: - return {"Citation": {**match.groupdict(), "type": 8}} return {} def _download_backwards(self, dates: Tuple[date]) -> None: diff --git a/juriscraper/opinions/united_states/state/wisctapp.py b/juriscraper/opinions/united_states/state/wisctapp.py index 5bb105388..3fdea8941 100644 --- a/juriscraper/opinions/united_states/state/wisctapp.py +++ b/juriscraper/opinions/united_states/state/wisctapp.py @@ -10,9 +10,7 @@ def __init__(self, *args, **kwargs): self.court_id = self.__module__ self.base_url = "https://www.wicourts.gov/other/appeals/caopin.jsp" self.set_url() - self.cite_regex = ( - r"(?P20\d{2})\s(?PWI App)\s(?P\d+)" - ) + self.cite_regex = r"20\d{2}\sWI App\s\d+" def combine_opinions(self, url: str, docket_number: str) -> bool: """Combine duplicate opinions in self.cases diff --git a/tests/local/test_ScraperExtractFromTextTest.py b/tests/local/test_ScraperExtractFromTextTest.py index 2e55825e8..bc2a27122 100644 --- a/tests/local/test_ScraperExtractFromTextTest.py +++ b/tests/local/test_ScraperExtractFromTextTest.py @@ -145,12 +145,7 @@ class ScraperExtractFromText(unittest.TestCase): "case_name_full": '1125 Morris Avenue Realty LLC, Plaintiff(s), against Title Issues Agency LLC, MARTIN E. KOFMAN, STEVEN LOWENTHAL, ESQ., and LOWENTHAL PC, "JOHN DOE," "JANE DOE," "ABC CORPORATION," AND "XYZ CORPORATION," Defendant(s).', }, "Opinion": {"author_str": "Fidel E. Gomez"}, - "Citation": { - "volume": "81", - "reporter": "Misc 3d", - "page": "1215(A)", - "type": 2, - }, + "Citation": "81 Misc 3d 1215(A)", "OpinionCluster": { "case_name_full": '1125 Morris Avenue Realty LLC, Plaintiff(s), against Title Issues Agency LLC, MARTIN E. KOFMAN, STEVEN LOWENTHAL, ESQ., and LOWENTHAL PC, "JOHN DOE," "JANE DOE," "ABC CORPORATION," AND "XYZ CORPORATION," Defendant(s).' }, @@ -225,12 +220,7 @@ class ScraperExtractFromText(unittest.TestCase): "case_name_full": "The People of the State of New York, against J.S., Adolescent Offender.", }, "Opinion": {"author_str": "Conrad D. Singer"}, - "Citation": { - "volume": "66", - "reporter": "Misc 3d", - "page": "1213(A)", - "type": 2, - }, + "Citation": "66 Misc 3d 1213(A)", "OpinionCluster": { "case_name_full": "The People of the State of New York, against J.S., Adolescent Offender." }, @@ -264,12 +254,7 @@ class ScraperExtractFromText(unittest.TestCase): "OpinionCluster": { "case_name_full": '201 East 164th Street Associates, LLC, against Pastora Calderon & ROSA IDALIA ABDELNOUR, "JOHN DOE" & "JANE DOE" A/K/A DUNIA GOMEZ Respondents-Undertenants.' }, - "Citation": { - "volume": "81", - "reporter": "Misc 3d", - "page": "1211(A)", - "type": 2, - }, + "Citation": "81 Misc 3d 1211(A)", }, ), ], @@ -283,12 +268,7 @@ class ScraperExtractFromText(unittest.TestCase): "case_name_full": "Probate Proceeding, Will of Pia Jeong Yoon, a/k/a PIA JEONG AE YOON, a/k/a PIA J. YOON, a/k/a JEONG YOON, a/k/a JEONG AE YOON", }, "Opinion": {"author_str": "Peter J. Kelly"}, - "Citation": { - "volume": "78", - "reporter": "Misc 3d", - "page": "1203(A)", - "type": 2, - }, + "Citation": "78 Misc 3d 1203(A)", "OpinionCluster": { "case_name_full": "Probate Proceeding, Will of Pia Jeong Yoon, a/k/a PIA JEONG AE YOON, a/k/a PIA J. YOON, a/k/a JEONG YOON, a/k/a JEONG AE YOON" }, @@ -313,12 +293,7 @@ class ScraperExtractFromText(unittest.TestCase): "case_name_full": "In the Matter of a Proceeding Under Article 6 of the Family Court Act Robyn C., against William M. J. (Deceased) and EVA JANE P.", }, "Opinion": {"author_str": "Javier E. Vargas"}, - "Citation": { - "volume": "66", - "reporter": "Misc 3d", - "page": "1210(A)", - "type": 2, - }, + "Citation": "66 Misc 3d 1210(A)", "OpinionCluster": { "case_name_full": "In the Matter of a Proceeding Under Article 6 of the Family Court Act Robyn C., against William M. J. (Deceased) and EVA JANE P." }, @@ -333,12 +308,7 @@ class ScraperExtractFromText(unittest.TestCase): "case_name_full": "In the Matter of a Proceeding for Support Under Article 4 of the Family Court Act Michelle B., against Thomas Y.", }, "Opinion": {"author_str": "Javier E. Vargas"}, - "Citation": { - "volume": "73", - "reporter": "Misc 3d", - "page": "1238(A)", - "type": 2, - }, + "Citation": "73 Misc 3d 1238(A)", "OpinionCluster": { "case_name_full": "In the Matter of a Proceeding for Support Under Article 4 of the Family Court Act Michelle B., against Thomas Y." }, @@ -355,12 +325,7 @@ class ScraperExtractFromText(unittest.TestCase): "case_name_full": "The People of the State of New York against Amela Hot", }, "Opinion": {"author_str": "Donald Leo"}, - "Citation": { - "volume": "58", - "reporter": "Misc 3d", - "page": "1215(A)", - "type": 2, - }, + "Citation": "58 Misc 3d 1215(A)", "OpinionCluster": { "case_name_full": "The People of the State of New York against Amela Hot" }, @@ -375,12 +340,7 @@ class ScraperExtractFromText(unittest.TestCase): "case_name_full": "The People of the State of New York, against James Smith", }, "Opinion": {"author_str": "Althea E. Drysdale"}, - "Citation": { - "volume": "59", - "reporter": "Misc 3d", - "page": "1211(A)", - "type": 2, - }, + "Citation": "59 Misc 3d 1211(A)", "OpinionCluster": { "case_name_full": "The People of the State of New York, against James Smith" }, @@ -408,12 +368,7 @@ class ScraperExtractFromText(unittest.TestCase): "OpinionCluster": { "case_name_full": "Bernardo Martinaj, against State of New York" }, - "Citation": { - "volume": "78", - "reporter": "Misc 3d", - "page": "1211(A)", - "type": 2, - }, + "Citation": "78 Misc 3d 1211(A)", }, ), ], @@ -427,12 +382,7 @@ class ScraperExtractFromText(unittest.TestCase): "case_name_full": "Alistair Sims, against Lance Frantz Regis A/K/A LANCE REGIS A/K/A LANCE F. REGIS A/K/A FRANTZ L. REGISTRE A/K/A REGISTRE FRANTZ A/K/A VANCE REGIS A/K/A REGIS LANCE A/K/A REGIS L. FRANTZ, Defendant(s).", }, "Opinion": {"author_str": "Michael A. Montesano"}, - "Citation": { - "volume": "81", - "reporter": "Misc 3d", - "page": "1210(A)", - "type": 2, - }, + "Citation": "81 Misc 3d 1210(A)", "OpinionCluster": { "case_name_full": "Alistair Sims, against Lance Frantz Regis A/K/A LANCE REGIS A/K/A LANCE F. REGIS A/K/A FRANTZ L. REGISTRE A/K/A REGISTRE FRANTZ A/K/A VANCE REGIS A/K/A REGIS LANCE A/K/A REGIS L. FRANTZ, Defendant(s)." }, @@ -477,12 +427,7 @@ class ScraperExtractFromText(unittest.TestCase): # https://www.courtlistener.com/api/rest/v3/opinions/10473075/ """IN THE SUPREME COURT\n STATE OF NORTH DAKOTA\n\n 2024 ND 143\n\nRonald Wayne Wootan, Petitioner and Appellant\n v.\nState of North Dakota, Respondent and Appellee\n\n No. 20240025\n\nAppeal from the District Court of Rolette County, Northeast Judicial District,\nthe Honorable Anthony S. Benson, Judge.\n\nAFFIRMED.\n\nPer Curiam.\n\nKiara C. Kraus-Parr, Grand Forks, ND, for petitioner and appellant.\n\nBrian D. Grosinger, State’s Attorney, Rolla, ND, for respondent and appellee.\n\f Wootan v. State\n No. 20240025\n\nPer Curiam.\n\n Ronald Wootan appeals from an order denying his postconviction relief\napplication entered after the district court held an evidentiary hearing on\nremand. See Wootan v. State,""", { - "Citation": { - "volume": "2024", - "reporter": "ND", - "page": "143", - "type": 8, - }, + "Citation": "2024 ND 143", }, ), ( @@ -490,12 +435,7 @@ class ScraperExtractFromText(unittest.TestCase): # https://www.courtlistener.com/api/rest/v3/opinions/10473085/ """IN THE SUPREME COURT\n STATE OF NORTH DAKOTA\n\n 2024 ND 141\n\nRenae Irene Gerszewski, Petitioner and Appellee\n v.\nConrad Keith Rostvet, Respondent and Appellant\n\n\n\n No. 20230361\n\n\n\nConrad Keith Rostvet, Petitioner and Appellant\n v.\nRenae Irene Gerszewski, Respondent and Appellee\n\n\n\n No. 20230362\n\n\n\nConrad Rostvet, Petitioner and Appellant\n v.\nWayne Gerszewski, Respondent and Appellee\n\n\n\n No. 20230363\n\n\n\nAppeal from the District Court of Walsh County, Northeast Judicial District, the Honorable\nBarbara L. Whelan, Judge.\n\fAFFIRMED.\n\nOpinion of the Court by Tufte, Justice.\n\nSamuel A. Gereszek, Grand Forks, N.D., for appellees.\n\nTimothy C. Lamb, Grand Forks, N.D., for appellant.\n\f Gerszewski v. Rostvet\n Nos. 20230361–20230363\n\nTufte, Justice.\n\n[¶1] Conrad Rostvet appeals from a district court’s order""", { - "Citation": { - "volume": "2024", - "reporter": "ND", - "page": "141", - "type": 8, - }, + "Citation": "2024 ND 141", "OpinionCluster": {"case_name": "Gerszewski v. Rostvet"}, "Docket": { "case_name": "Gerszewski v. Rostvet", @@ -509,12 +449,7 @@ class ScraperExtractFromText(unittest.TestCase): # https://www.wicourts.gov/sc/opinion/DisplayDocument.pdf?content=pdf&seqNo=669658 """2023 WI 50\nS C W\nUPREME OURT OF ISCONSIN\nCASE NO.: 2021AP938-CR\nCOMPLETE TITLE: State of Wisconsin,""", { - "Citation": { - "volume": "2023", - "reporter": "WI", - "page": "50", - "type": 8, - }, + "Citation": "2023 WI 50", }, ) ], @@ -523,12 +458,7 @@ class ScraperExtractFromText(unittest.TestCase): # https://www.wicourts.gov/ca/opinion/DisplayDocument.pdf?content=pdf&seqNo=799325 """2024 WI App 36\nCOURT OF APPEALS OF WISCONSIN\nPUBLISHED OPINION""", { - "Citation": { - "volume": "2024", - "reporter": "WI App", - "page": "36", - "type": 8, - }, + "Citation": "2024 WI App 36", }, ) ], @@ -600,12 +530,7 @@ class ScraperExtractFromText(unittest.TestCase): # https://www.courtlistener.com/api/rest/v3/opinions/10566596/ """NOTICE: This opinion is subject to motions for reargument under V.R.A.P. 40 as well as formal\nrevision before publication in the Vermont Reports. Readers are requested to notify the Reporter\nof Decisions by email at: JUD.Reporter@vtcourts.gov or by mail at: Vermont Supreme Court, 109\nState Street, Montpelier, Vermont 05609-0801, of any errors in order that corrections may be made\nbefore this opinion goes to press.\n\n\n 2024 VT 52\n\n No. 23-AP-226\n\nState of Vermont """, { - "Citation": { - "volume": "2024", - "reporter": "VT", - "page": "52", - "type": 8, - } + "Citation": "2024 VT 52", }, ) ], @@ -614,12 +539,7 @@ class ScraperExtractFromText(unittest.TestCase): # https://www.courtlistener.com/api/rest/v3/clusters/7854285/ """NOTICE: This opinion is subject to motions for reargument under V.R.A.P. 40 as well as formal\nrevision before publication in the Vermont Reports. Readers are requested to notify the Reporter\nof Decisions by email at: JUD.Reporter@vermont.gov or by mail at: Vermont Supreme Court, 109\nState Street, Montpelier, Vermont 05609-0801, of any errors in order that corrections may be made\nbefore this opinion goes to press.\n\n\n 2022 VT 35\n\n No. 2021-059\n\nState of Vermont Supreme Court\n\n On Appeal from\n v. Superior Court, Chittenden Unit,\n Criminal Division\n\nRandy F. Therrien """, { - "Citation": { - "volume": "2022", - "reporter": "VT", - "page": "35", - "type": 8, - } + "Citation": "2022 VT 35", }, ) ], @@ -696,12 +616,7 @@ class ScraperExtractFromText(unittest.TestCase): ( "J-A13044-21\n\n 2021 PA Super 113\n\n\n COMMONWEALTH OF PENNSYLVANIA : IN THE SUPERIOR COURT OF\n : PENNSYLVANIA\n :\n ", { - "Citation": { - "volume": "2021", - "reporter": "PA Super", - "page": "113", - "type": 8, - } + "Citation": "2021 PA Super 113", }, ) ],