diff --git a/.github/workflows/lint.yml b/.github/workflows/lint.yml index b40483f..158a621 100644 --- a/.github/workflows/lint.yml +++ b/.github/workflows/lint.yml @@ -1,14 +1,14 @@ name: Lint -on: [push, pull_request] +on: [pull_request] jobs: lint: runs-on: ubuntu-latest steps: - name: Check out repository - uses: actions/checkout@v3 - - uses: actions/cache@v3 + uses: actions/checkout@v4 + - uses: actions/cache@v4 name: Configure npm caching with: path: ~/.npm diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml index 831fcf4..e8f1928 100644 --- a/.github/workflows/test.yml +++ b/.github/workflows/test.yml @@ -1,18 +1,20 @@ name: Test -on: [push, pull_request] +on: [pull_request] jobs: test: - runs-on: ubuntu-latest + runs-on: ubuntu-24.04 steps: - name: Check out repository - uses: actions/checkout@v3 + uses: actions/checkout@v4 - name: Set up Python - uses: actions/setup-python@v3 + uses: actions/setup-python@v5 with: - python-version: "3.10" - - uses: actions/cache@v2 + python-version: "3.12" + - name: Install packages to support building lxml from source + run: sudo apt-get install python3.12-dev libxml2-dev libxslt-dev + - uses: actions/cache@v4 name: Configure pip caching with: path: ~/.cache/pip diff --git a/README.md b/README.md index 606b126..ccd924b 100644 --- a/README.md +++ b/README.md @@ -26,7 +26,7 @@ and export results as CSV or JSON reports. Create a Python virtual environment and install required packages: ``` -python3.6 -m venv venv +python3.12 -m venv venv source venv/bin/activate pip install -r requirements/base.txt ``` @@ -139,7 +139,7 @@ yarn build Create a Python virtual environment and install required packages: ``` -python3.6 -m venv venv +python3.12 -m venv venv source venv/bin/activate pip install -r requirements/base.txt ``` @@ -269,9 +269,8 @@ fab configure -H crawler The `configure` command: -- Installs Node, Yarn, and Git -- Installs a modern version of SQLite -- Installs Python 3 +- Installs Node and Git +- Installs Python 3.12 ### Deploying the application diff --git a/crawler/wpull_plugin.py b/crawler/wpull_plugin.py index 26d194c..171bc61 100644 --- a/crawler/wpull_plugin.py +++ b/crawler/wpull_plugin.py @@ -1,4 +1,3 @@ -import asyncio import logging import re from urllib import parse @@ -39,9 +38,8 @@ def patch_wpull_connection(): """Use wait_timeout instead of close_timeout for readline.""" - @asyncio.coroutine - def readline(self): - data = yield from self.run_network_operation( + async def readline(self): + data = await self.run_network_operation( self.reader.readline(), wait_timeout=self._timeout, name="Readline" ) return data diff --git a/fabfile.py b/fabfile.py index 60927f2..0c3da95 100644 --- a/fabfile.py +++ b/fabfile.py @@ -16,10 +16,6 @@ NODE_VERSION = "20" -SQLITE_VERSION = "3390200" -SQLITE_BASENAME = f"sqlite-autoconf-{SQLITE_VERSION}" -SQLITE_INSTALL_ROOT = f"{DEPLOY_ROOT}/{SQLITE_BASENAME}" - SOURCE_PARENT = f"{DEPLOY_ROOT}/cfpb" SOURCE_REPO = "https://github.com/cfpb/website-indexer.git" SOURCE_DIRNAME = "website-indexer" @@ -62,6 +58,12 @@ def configure(conn): # Install git to be able to clone source code repository. conn.sudo("yum install -y git") + # Install Python 3.12. + conn.sudo("yum install -y python3.12") + + # Install libraries needed to build lxml from source. + conn.sudo("yum install -y python3.12-devel libxml2-devel libxslt-devel") + # Set up deploy root and grant permissions to deploy user. conn.sudo(f"mkdir -p {DEPLOY_ROOT}") conn.sudo(f"chown -R {conn.user}:{conn.user} {DEPLOY_ROOT}") @@ -85,7 +87,7 @@ def deploy(conn): with conn.cd(SOURCE_ROOT): conn.sudo("corepack enable") conn.run("yarn && yarn build") - conn.run("python3 -m venv venv") + conn.run(f"python3.12 -m venv venv") with conn.prefix("source venv/bin/activate"): conn.run("pip install -r requirements/base.txt") diff --git a/requirements/base.txt b/requirements/base.txt index cc5e6c2..74a17da 100644 --- a/requirements/base.txt +++ b/requirements/base.txt @@ -8,13 +8,8 @@ django-filter==21.1 django-modelcluster==5.3 djangorestframework==3.13.1 djangorestframework-csv==2.1.1 -lxml==4.9.1 whitenoise==5.3.0 -wpull==2.0.1 -# wpull doesn't set upper bounds for some of its requirements, -# so we need to specify these manually: -# See https://github.com/ArchiveTeam/wpull/blob/v2.0.1/requirements.txt -html5lib==0.9999999 -sqlalchemy==1.0.12 -tornado==4.5.3 +# Ensure libxml2 is loaded dynamically; see +# https://html5-parser.readthedocs.io/en/latest/#unix +wpull@https://github.com/ArchiveTeam/ludios_wpull/archive/refs/tags/5.0.3.tar.gz --no-binary=lxml