Merge pull request #778 from freelawproject/update-conn-appeals

feat(connappct): Fix Conn App Ct
freelawproject · Nov 20, 2023 · 2951180 · 2951180
2 parents 8530b4e + 26cbb75
commit 2951180
Show file tree

Hide file tree

Showing 4 changed files with 2,318 additions and 447 deletions.
diff --git a/juriscraper/opinions/united_states/state/conn.py b/juriscraper/opinions/united_states/state/conn.py
@@ -16,7 +16,6 @@
 import re
 from datetime import date
 
-from juriscraper.lib.string_utils import normalize_dashes
 from juriscraper.OpinionSiteLinear import OpinionSiteLinear
 
 

diff --git a/juriscraper/opinions/united_states/state/connappct.py b/juriscraper/opinions/united_states/state/connappct.py
@@ -4,15 +4,41 @@
 Author: Asadullah Baig<[email protected]>
 Date created: 2014-07-11
 History:
-    - 2022-02-02, satsuki-chan: Fixed docket and name separator, changed super class to OpinionSiteLinear
+    - 2022-02-02, satsuki-chan: Updated to Opinionsitelinear
+    - 2023-11-20, William Palin: Updated
 """
 
-from juriscraper.opinions.united_states.state import conn
+from datetime import date
 
+from juriscraper.OpinionSiteLinear import OpinionSiteLinear
 
-class Site(conn.Site):
+
+class Site(OpinionSiteLinear):
     def __init__(self, *args, **kwargs):
         super().__init__(*args, **kwargs)
         self.court_id = self.__module__
+        self.year = date.today().strftime("%y")
         self.url = f"http://www.jud.ct.gov/external/supapp/archiveAROap{self.year}.htm"
-        self.docket_regex = r"AC\d+"
+        self.status = "Published"
+
+    def _process_html(self) -> None:
+        """Process the html and extract out the opinions
+
+        :return: None
+        """
+        for date_section in self.html.xpath("//ul"):
+            b = date_section[0].xpath(".//preceding::b[1]")[0]
+            date = b.text_content().strip().split()[-1][:-1]
+            for li in date_section.xpath(".//li"):
+                link = li.xpath(".//a")[0]
+                link_text = link.text_content()
+                docket = link.text_content()
+                name = li.text_content().replace(link_text, "").strip()
+                self.cases.append(
+                    {
+                        "date": date,
+                        "url": link.get("href"),
+                        "docket": docket,
+                        "name": name,
+                    }
+                )