Skip to content

Commit

Permalink
Merge pull request #778 from freelawproject/update-conn-appeals
Browse files Browse the repository at this point in the history
feat(connappct): Fix Conn App Ct
  • Loading branch information
flooie authored Nov 20, 2023
2 parents 8530b4e + 26cbb75 commit 2951180
Show file tree
Hide file tree
Showing 4 changed files with 2,318 additions and 447 deletions.
1 change: 0 additions & 1 deletion juriscraper/opinions/united_states/state/conn.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,6 @@
import re
from datetime import date

from juriscraper.lib.string_utils import normalize_dashes
from juriscraper.OpinionSiteLinear import OpinionSiteLinear


Expand Down
34 changes: 30 additions & 4 deletions juriscraper/opinions/united_states/state/connappct.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,15 +4,41 @@
Author: Asadullah Baig<[email protected]>
Date created: 2014-07-11
History:
- 2022-02-02, satsuki-chan: Fixed docket and name separator, changed super class to OpinionSiteLinear
- 2022-02-02, satsuki-chan: Updated to Opinionsitelinear
- 2023-11-20, William Palin: Updated
"""

from juriscraper.opinions.united_states.state import conn
from datetime import date

from juriscraper.OpinionSiteLinear import OpinionSiteLinear

class Site(conn.Site):

class Site(OpinionSiteLinear):
def __init__(self, *args, **kwargs):
super().__init__(*args, **kwargs)
self.court_id = self.__module__
self.year = date.today().strftime("%y")
self.url = f"http://www.jud.ct.gov/external/supapp/archiveAROap{self.year}.htm"
self.docket_regex = r"AC\d+"
self.status = "Published"

def _process_html(self) -> None:
"""Process the html and extract out the opinions
:return: None
"""
for date_section in self.html.xpath("//ul"):
b = date_section[0].xpath(".//preceding::b[1]")[0]
date = b.text_content().strip().split()[-1][:-1]
for li in date_section.xpath(".//li"):
link = li.xpath(".//a")[0]
link_text = link.text_content()
docket = link.text_content()
name = li.text_content().replace(link_text, "").strip()
self.cases.append(
{
"date": date,
"url": link.get("href"),
"docket": docket,
"name": name,
}
)
Loading

0 comments on commit 2951180

Please sign in to comment.