From 791d7d9f0c90cd8922eec8bfaaaf8b1e757f30c5 Mon Sep 17 00:00:00 2001 From: Adam Buckingham Date: Tue, 24 Oct 2023 10:04:33 -0400 Subject: [PATCH] Getting scraper py310 to deploy properly and work --- .dockerignore | 6 +++++- Dockerfile | 6 ++---- conf/config.yml | 4 +++- crontab | 2 +- pyproject.toml | 11 +++++------ setup.cfg | 35 +++++++++++++++++++++++------------ 6 files changed, 39 insertions(+), 25 deletions(-) diff --git a/.dockerignore b/.dockerignore index e0d839d..e64139c 100644 --- a/.dockerignore +++ b/.dockerignore @@ -15,4 +15,8 @@ env/ venv/ .venv* crontab-test -.vscode/ \ No newline at end of file +.vscode/ + +cf* +bin* +tests* \ No newline at end of file diff --git a/Dockerfile b/Dockerfile index 05a73b2..9de6a1a 100644 --- a/Dockerfile +++ b/Dockerfile @@ -4,7 +4,6 @@ ENV SUPERCRONIC_URL=https://github.com/albertcrowley/supercronic/releases/downlo SUPERCRONIC=supercronic-linux-x86 \ SUPERCRONIC_SHA1SUM=2b5144dee1af0dc07c372c3c45026dd42af81226 -RUN pip install --upgrade pip ADD requirements.txt . RUN apt-get update && apt-get install -y \ @@ -43,8 +42,6 @@ RUN apt-get update && apt-get install -y \ #clean up the apt cache && rm -rf /var/lib/apt/lists/* -RUN pip install --upgrade pip -RUN pip install -r requirements.txt --no-cache-dir RUN wget -q -O - https://dl-ssl.google.com/linux/linux_signing_key.pub | apt-key add - \ && echo "deb [arch=amd64] http://dl.google.com/linux/chrome/deb/ stable main" >> /etc/apt/sources.list.d/google-chrome.list \ @@ -54,10 +51,11 @@ RUN wget -q -O - https://dl-ssl.google.com/linux/linux_signing_key.pub | apt-key && unzip /tmp/chromedriver.zip chromedriver -d /usr/local/bin/ ADD . . -RUN pip install . +RUN pip install --upgrade pip && pip install -e . #see https://docs.cloudfoundry.org/devguide/deploy-apps/push-docker.html COPY ./conf/passwd /etc/passwd +COPY ./conf /usr/local/conf ENTRYPOINT ["supercronic"] diff --git a/conf/config.yml b/conf/config.yml index 74e1a73..9d248b9 100644 --- a/conf/config.yml +++ b/conf/config.yml @@ -5,4 +5,6 @@ client: from_date: "yesterday" to_date: "yesterday" database: - update_old: True \ No newline at end of file + update_old: True +prediction: + model_name: "clf_ajbuckingham_roc_auc.pkl" \ No newline at end of file diff --git a/crontab b/crontab index 9587891..7f71717 100644 --- a/crontab +++ b/crontab @@ -1,2 +1,2 @@ # Run every day at 5 AM Eastern Standard Time -0 10 * * * /usr/local/bin/python3.6 main.py +0 10 * * * /usr/local/bin/fbo_scraper diff --git a/pyproject.toml b/pyproject.toml index b9f5893..56aecf7 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -3,10 +3,9 @@ requires = ["setuptools", "wheel"] build-backend = "setuptools.build_meta" [tool.pytest.ini_options] -addopts = "--cov=fbo_scraper" -testpaths = [ - "tests", -] +addopts = "--cov=src" +testpaths = ["tests"] +pythonpath = "src" [tool.mypy] mypy_path = "src" @@ -63,8 +62,8 @@ line-length = 88 # Allow unused variables when underscore-prefixed. dummy-variable-rgx = "^(_+|(_+[a-zA-Z0-9_]*[a-zA-Z0-9]+?))$" -# Assume Python 3.7 minimum. -target-version = "py37" +# Assume Python 3.10 minimum. +target-version = "py310" [tool.ruff.mccabe] # Unlike Flake8, default to a complexity level of 10. diff --git a/setup.cfg b/setup.cfg index e690e52..633d3ae 100644 --- a/setup.cfg +++ b/setup.cfg @@ -9,14 +9,10 @@ platforms = unix, linux, osx classifiers = Programming Language :: Python :: 3 Programming Language :: Python :: 3 :: Only - Programming Language :: Python :: 3.7 - Programming Language :: Python :: 3.8 - Programming Language :: Python :: 3.9 Programming Language :: Python :: 3.10 [options] -packages = - fbo_scraper +packages = find: install_requires = addict dill @@ -28,30 +24,45 @@ install_requires = python-json-logger PyYAML requests - scikit-learn + scikit-learn==1.2.2 scipy sqlalchemy sqlalchemy_utils textract urllib3 wget -python_requires = >=3.7 +python_requires = >=3.10 package_dir = =src -zip_safe = no +zip_safe = False +include_package_data = True + [options.extras_require] testing = + fpdf + python-docx pytest pytest-cov mypy + requests_mock + reportlab ruff - tox + +[options.package_data] +mypkg = + *.pkl + + +[options.packages.find] +where=src +exclude = + cf* + bin* + docs* + [options.entry_points] console_scripts = fbo_scraper = fbo_scraper.main:actual_main -[options.package_data] -fbo_scraper = py.typed -