From 37c8166d8cf0141b2ead041b91c1d65d5900a8fc Mon Sep 17 00:00:00 2001 From: Steven Chen Date: Tue, 6 Dec 2022 10:56:00 -0800 Subject: [PATCH] NR Duplicates Report#14287 Signed-off-by: Steven Chen --- .github/workflows/nr-duplicates-report-cd.yml | 103 ++++++ .github/workflows/nr-duplicates-report-ci.yml | 90 +++++ jobs/nr-duplicates-report/Dockerfile | 35 ++ jobs/nr-duplicates-report/Makefile | 150 ++++++++ jobs/nr-duplicates-report/README.md | 78 +++++ jobs/nr-duplicates-report/config.py | 29 ++ .../daily/nr-duplicates.ipynb | 330 ++++++++++++++++++ jobs/nr-duplicates-report/devops/vaults.json | 10 + jobs/nr-duplicates-report/logging.conf | 28 ++ jobs/nr-duplicates-report/notebookreport.py | 136 ++++++++ jobs/nr-duplicates-report/openshift/Readme.md | 7 + .../openshift/templates/bc.yaml | 122 +++++++ .../openshift/templates/cronjob.yaml | 201 +++++++++++ jobs/nr-duplicates-report/requirements.txt | 38 ++ .../nr-duplicates-report/requirements/dev.txt | 13 + .../requirements/prod.txt | 43 +++ jobs/nr-duplicates-report/run.sh | 4 + jobs/nr-duplicates-report/setup.cfg | 14 + jobs/nr-duplicates-report/setup.py | 22 ++ .../tests/unit/__init__.py | 1 + .../tests/unit/conftest.py | 20 ++ .../tests/unit/test_job.py | 48 +++ jobs/nr-duplicates-report/util/__init__.py | 0 jobs/nr-duplicates-report/util/logging.py | 16 + 24 files changed, 1538 insertions(+) create mode 100644 .github/workflows/nr-duplicates-report-cd.yml create mode 100644 .github/workflows/nr-duplicates-report-ci.yml create mode 100644 jobs/nr-duplicates-report/Dockerfile create mode 100644 jobs/nr-duplicates-report/Makefile create mode 100644 jobs/nr-duplicates-report/README.md create mode 100644 jobs/nr-duplicates-report/config.py create mode 100644 jobs/nr-duplicates-report/daily/nr-duplicates.ipynb create mode 100644 jobs/nr-duplicates-report/devops/vaults.json create mode 100644 jobs/nr-duplicates-report/logging.conf create mode 100644 jobs/nr-duplicates-report/notebookreport.py create mode 100644 jobs/nr-duplicates-report/openshift/Readme.md create mode 100644 jobs/nr-duplicates-report/openshift/templates/bc.yaml create mode 100644 jobs/nr-duplicates-report/openshift/templates/cronjob.yaml create mode 100644 jobs/nr-duplicates-report/requirements.txt create mode 100644 jobs/nr-duplicates-report/requirements/dev.txt create mode 100644 jobs/nr-duplicates-report/requirements/prod.txt create mode 100755 jobs/nr-duplicates-report/run.sh create mode 100644 jobs/nr-duplicates-report/setup.cfg create mode 100644 jobs/nr-duplicates-report/setup.py create mode 100644 jobs/nr-duplicates-report/tests/unit/__init__.py create mode 100644 jobs/nr-duplicates-report/tests/unit/conftest.py create mode 100644 jobs/nr-duplicates-report/tests/unit/test_job.py create mode 100644 jobs/nr-duplicates-report/util/__init__.py create mode 100644 jobs/nr-duplicates-report/util/logging.py diff --git a/.github/workflows/nr-duplicates-report-cd.yml b/.github/workflows/nr-duplicates-report-cd.yml new file mode 100644 index 000000000..b7a42040c --- /dev/null +++ b/.github/workflows/nr-duplicates-report-cd.yml @@ -0,0 +1,103 @@ +name: Namex Duplicates Report Job CD + +on: + push: + branches: + - main + paths: + - "jobs/nr-duplicates-report/**" + workflow_dispatch: + inputs: + environment: + description: "Environment (dev/test/prod)" + required: true + default: "dev" + +defaults: + run: + shell: bash + working-directory: ./jobs/nr-duplicates-report + +env: + APP_NAME: "nr-duplicates-report" + TAG_NAME: "dev" + +jobs: + nr-duplicates-report-cd-by-push: + runs-on: ubuntu-20.04 + + if: github.event_name == 'push' && github.repository == 'bcgov/namex' + environment: + name: "dev" + + steps: + - uses: actions/checkout@v2 + + - name: Login Openshift + shell: bash + run: | + oc login --server=${{secrets.OPENSHIFT4_LOGIN_REGISTRY}} --token=${{secrets.OPENSHIFT4_SA_TOKEN}} + + - name: CD Flow + shell: bash + env: + OPS_REPOSITORY: ${{ secrets.OPS_REPOSITORY }} + OPENSHIFT_DOCKER_REGISTRY: ${{ secrets.OPENSHIFT4_DOCKER_REGISTRY }} + OPENSHIFT_SA_NAME: ${{ secrets.OPENSHIFT4_SA_NAME }} + OPENSHIFT_SA_TOKEN: ${{ secrets.OPENSHIFT4_SA_TOKEN }} + OPENSHIFT_REPOSITORY: ${{ secrets.OPENSHIFT4_REPOSITORY }} + TAG_NAME: ${{ env.TAG_NAME }} + run: | + make cd + + - name: Rocket.Chat Notification + uses: RocketChat/Rocket.Chat.GitHub.Action.Notification@master + if: failure() + with: + type: ${{ job.status }} + job_name: "*NR Duplicates Report Job Built and Deployed to ${{env.TAG_NAME}}*" + channel: "#registries-bot" + url: ${{ secrets.ROCKETCHAT_WEBHOOK }} + commit: true + token: ${{ secrets.GITHUB_TOKEN }} + + nr-duplicates-report-cd-by-dispatch: + runs-on: ubuntu-20.04 + + if: github.event_name == 'workflow_dispatch' && github.repository == 'bcgov/namex' + environment: + name: "${{ github.event.inputs.environment }}" + + steps: + - uses: actions/checkout@v2 + - name: Set env by input + run: | + echo "TAG_NAME=${{ github.event.inputs.environment }}" >> $GITHUB_ENV + + - name: Login Openshift + shell: bash + run: | + oc login --server=${{secrets.OPENSHIFT4_LOGIN_REGISTRY}} --token=${{secrets.OPENSHIFT4_SA_TOKEN}} + + - name: CD Flow + shell: bash + env: + OPS_REPOSITORY: ${{ secrets.OPS_REPOSITORY }} + OPENSHIFT_DOCKER_REGISTRY: ${{ secrets.OPENSHIFT4_DOCKER_REGISTRY }} + OPENSHIFT_SA_NAME: ${{ secrets.OPENSHIFT4_SA_NAME }} + OPENSHIFT_SA_TOKEN: ${{ secrets.OPENSHIFT4_SA_TOKEN }} + OPENSHIFT_REPOSITORY: ${{ secrets.OPENSHIFT4_REPOSITORY }} + TAG_NAME: ${{ env.TAG_NAME }} + run: | + make cd + + - name: Rocket.Chat Notification + uses: RocketChat/Rocket.Chat.GitHub.Action.Notification@master + if: failure() + with: + type: ${{ job.status }} + job_name: "*NR Duplicates Report Job Built and Deployed to ${{env.TAG_NAME}}*" + channel: "#registries-bot" + url: ${{ secrets.ROCKETCHAT_WEBHOOK }} + commit: true + token: ${{ secrets.GITHUB_TOKEN }} diff --git a/.github/workflows/nr-duplicates-report-ci.yml b/.github/workflows/nr-duplicates-report-ci.yml new file mode 100644 index 000000000..90daad648 --- /dev/null +++ b/.github/workflows/nr-duplicates-report-ci.yml @@ -0,0 +1,90 @@ +name: NR Duplicates Report Job CI + +on: + pull_request: + types: [assigned, synchronize] + paths: + - "jobs/nr-duplicates-report/**" + +defaults: + run: + shell: bash + working-directory: ./jobs/nr-duplicates-report + +jobs: + setup-job: + runs-on: ubuntu-20.04 + + if: github.repository == 'bcgov/namex' + + steps: + - uses: actions/checkout@v2 + - run: "true" + + linting: + needs: setup-job + runs-on: ubuntu-20.04 + + strategy: + matrix: + python-version: [3.8] + + steps: + - uses: actions/checkout@v2 + - name: Set up Python ${{ matrix.python-version }} + uses: actions/setup-python@v1 + with: + python-version: ${{ matrix.python-version }} + - name: Install dependencies + run: | + make setup + - name: Lint with pylint + id: pylint + run: | + make pylint + - name: Lint with flake8 + id: flake8 + run: | + make flake8 + + testing: + needs: setup-job + env: + PG_USER: postgres + PG_PASSWORD: postgres + PG_DB_NAME: postgres + PG_HOST: localhost + PG_PORT: 5432 + + runs-on: ubuntu-20.04 + steps: + - uses: actions/checkout@v2 + - name: Set up Python ${{ matrix.python-version }} + uses: actions/setup-python@v1 + with: + python-version: ${{ matrix.python-version }} + - name: Install dependencies + run: | + make setup + - name: Test with pytest + id: test + run: | + # make test + #- name: Upload coverage to Codecov + # uses: codecov/codecov-action@v1 + # with: + # file: ./queue_services/entity-pay/coverage.xml + # flags: entitypay + # name: codecov-entity-pay + # fail_ci_if_error: true + + build-check: + needs: setup-job + runs-on: ubuntu-20.04 + + steps: + - uses: actions/checkout@v2 + - name: build to check strictness + id: build + run: | + make build-nc diff --git a/jobs/nr-duplicates-report/Dockerfile b/jobs/nr-duplicates-report/Dockerfile new file mode 100644 index 000000000..1122a9977 --- /dev/null +++ b/jobs/nr-duplicates-report/Dockerfile @@ -0,0 +1,35 @@ +FROM python:3.8.5-buster + +ARG VCS_REF="missing" +ARG BUILD_DATE="missing" + +ENV VCS_REF=${VCS_REF} +ENV BUILD_DATE=${BUILD_DATE} + +LABEL org.label-schema.vcs-ref=${VCS_REF} \ + org.label-schema.build-date=${BUILD_DATE} + +USER root + +# Create working directory +RUN mkdir /opt/app-root && chmod 755 /opt/app-root +RUN mkdir /opt/app-root/data && chmod 777 /opt/app-root/data +WORKDIR /opt/app-root + +# Install the requirements +COPY ./requirements.txt . + +#RUN pip install --upgrade pip +RUN pip install pip==20.3.3 +RUN pip install --no-cache-dir -r requirements.txt + +COPY . . + +USER 1001 + +# Set Python path +ENV PYTHONPATH=/opt/app-root/src + +EXPOSE 8080 + +CMD [ "python", "/opt/app-root/notebookreport.py" ] diff --git a/jobs/nr-duplicates-report/Makefile b/jobs/nr-duplicates-report/Makefile new file mode 100644 index 000000000..74c141edd --- /dev/null +++ b/jobs/nr-duplicates-report/Makefile @@ -0,0 +1,150 @@ +.PHONY: license +.PHONY: setup +.PHONY: ci cd +.PHONY: db run + +MKFILE_PATH:=$(abspath $(lastword $(MAKEFILE_LIST))) +CURRENT_ABS_DIR:=$(patsubst %/,%,$(dir $(MKFILE_PATH))) + +PROJECT_NAME:=nr-duplicates-report +DOCKER_NAME:=nr-duplicates-report + +################################################################################# +# COMMANDS -- license # +################################################################################# +license: ## Verify source code license headers. + ./scripts/verify_license_headers.sh $(CURRENT_ABS_DIR)/src $(CURRENT_ABS_DIR)/tests + +################################################################################# +# COMMANDS -- Setup # +################################################################################# +setup: install install-dev ## Setup the project + +clean: clean-build clean-pyc clean-test ## Clean the project + rm -rf venv/ + +clean-build: ## Clean build files + rm -fr build/ + rm -fr dist/ + rm -fr .eggs/ + find . -name '*.egg-info' -exec rm -fr {} + + find . -name '*.egg' -exec rm -fr {} + + +clean-pyc: ## Clean cache files + find . -name '*.pyc' -exec rm -f {} + + find . -name '*.pyo' -exec rm -f {} + + find . -name '*~' -exec rm -f {} + + find . -name '__pycache__' -exec rm -fr {} + + +clean-test: ## clean test files + find . -name '.pytest_cache' -exec rm -fr {} + + rm -fr .tox/ + rm -f .coverage + rm -fr htmlcov/ + +build-req: clean ## Upgrade requirements + test -f venv/bin/activate || python3.8 -m venv $(CURRENT_ABS_DIR)/venv ;\ + . venv/bin/activate ;\ + pip install pip==21.1.2 ;\ + pip install -Ur requirements/prod.txt ;\ + pip freeze | sort > requirements.txt + +install: clean ## Install python virtrual environment + test -f venv/bin/activate || python3.8 -m venv $(CURRENT_ABS_DIR)/venv ;\ + . venv/bin/activate ;\ + pip install pip==21.1.2 ;\ + pip install -Ur requirements.txt + +install-dev: ## Install local application + . venv/bin/activate ; \ + pip install -Ur requirements/dev.txt; \ + pip install -e . + +################################################################################# +# COMMANDS - CI # +################################################################################# +ci: pylint flake8 test ## CI flow + +pylint: ## Linting with pylint + . venv/bin/activate && pylint --rcfile=setup.cfg notebookreport.py + +flake8: ## Linting with flake8 + . venv/bin/activate && flake8 notebookreport.py + +lint: pylint flake8 ## run all lint type scripts + +test: ## Unit testing + . venv/bin/activate && pytest + +mac-cov: local-test ## Run the coverage report and display in a browser window (mac) + open -a "Google Chrome" htmlcov/index.html + +################################################################################# +# COMMANDS - CD +# expects the terminal to be docker login +# expects export OPENSHIFT_DOCKER_REGISTRY="" +# expects export OPENSHIFT_SA_NAME="$(oc whoami)" +# expects export OPENSHIFT_SA_TOKEN="$(oc whoami -t)" +# expects export OPENSHIFT_REPOSITORY="" +# expects export TAG_NAME="dev/test/prod" +# expects export OPS_REPOSITORY="" # +################################################################################# +cd: ## CD flow +ifeq ($(TAG_NAME), test) +cd: update-env + oc -n "$(OPENSHIFT_REPOSITORY)-tools" tag $(DOCKER_NAME):dev $(DOCKER_NAME):$(TAG_NAME) +else ifeq ($(TAG_NAME), prod) +cd: update-env + oc -n "$(OPENSHIFT_REPOSITORY)-tools" tag $(DOCKER_NAME):$(TAG_NAME) $(DOCKER_NAME):$(TAG_NAME)-$(shell date +%F) + oc -n "$(OPENSHIFT_REPOSITORY)-tools" tag $(DOCKER_NAME):test $(DOCKER_NAME):$(TAG_NAME) +else +TAG_NAME=dev +cd: build update-env tag +endif + +build: ## Build the docker container + docker build . -t $(DOCKER_NAME) \ + --build-arg VCS_REF=$(shell git rev-parse --short HEAD) \ + --build-arg BUILD_DATE=$(shell date -u +"%Y-%m-%dT%H:%M:%SZ") \ + +build-nc: ## Build the docker container without caching + docker build --no-cache -t $(DOCKER_NAME) . + +REGISTRY_IMAGE=$(OPENSHIFT_DOCKER_REGISTRY)/$(OPENSHIFT_REPOSITORY)-tools/$(DOCKER_NAME) +push: #build ## Push the docker container to the registry & tag latest + @echo "$(OPENSHIFT_SA_TOKEN)" | docker login $(OPENSHIFT_DOCKER_REGISTRY) -u $(OPENSHIFT_SA_NAME) --password-stdin ;\ + docker tag $(DOCKER_NAME) $(REGISTRY_IMAGE):latest ;\ + docker push $(REGISTRY_IMAGE):latest + +VAULTS=`cat devops/vaults.json` +update-env: ## Update env from 1pass + oc -n "$(OPS_REPOSITORY)-$(TAG_NAME)" exec "dc/vault-service-$(TAG_NAME)" -- ./scripts/1pass.sh \ + -m "secret" \ + -e "$(TAG_NAME)" \ + -a "$(DOCKER_NAME)-$(TAG_NAME)" \ + -n "$(OPENSHIFT_REPOSITORY)-$(TAG_NAME)" \ + -v "$(VAULTS)" \ + -r "false" \ + -f "false" + +tag: push ## tag image + oc -n "$(OPENSHIFT_REPOSITORY)-tools" tag $(DOCKER_NAME):latest $(DOCKER_NAME):$(TAG_NAME) + +################################################################################# +# COMMANDS - Local # +################################################################################# +run: db ## Run the project in local + . venv/bin/activate && python -m flask run -p 5000 + +db: ## Update the local database + . venv/bin/activate && python -m manage.py db upgrade + +################################################################################# +# Self Documenting Commands # +################################################################################# +.PHONY: help + +.DEFAULT_GOAL := help + +help: + @grep -E '^[a-zA-Z_-]+:.*?## .*$$' $(MAKEFILE_LIST) | sort | awk 'BEGIN {FS = ":.*?## "}; {printf "\033[36m%-30s\033[0m %s\n", $$1, $$2}' diff --git a/jobs/nr-duplicates-report/README.md b/jobs/nr-duplicates-report/README.md new file mode 100644 index 000000000..f9967241f --- /dev/null +++ b/jobs/nr-duplicates-report/README.md @@ -0,0 +1,78 @@ +# Notebook Report + +Generate notebook report + +## Development Environment + +Follow the instructions of the [Development Readme](https://github.com/bcgov/entity/blob/master/docs/development.md) +to setup your local development environment. + +## Development Setup + +1. Follow the [instructions](https://github.com/bcgov/entity/blob/master/docs/setup-forking-workflow.md) to checkout the project from GitHub. +2. Open the nr-duplicates-report directory in VS Code to treat it as a project (or WSL projec). To prevent version clashes, set up a virtual environment to install the Python packages used by this project. +3. Run `make setup` to set up the virtual environment and install libraries. + +## Running Notebook Report + +1. Run `. venv/bin/activate` to change to `venv` environment. +2. Run notebook with `python notebookreport.py` + +## Running Unit Tests + +1. Run `python -m pytest` or `pytest` command. + +### Important: Please remember to do "git update-index --add --chmod=+x run.sh" before run.sh is commit to github on first time. +### Build API - can be done in VS Code + +1. Login to openshift + + ```sh + oc login xxxxxxx + ``` + +2. switch to tools namespace + + ```sh + oc project f2b77c-tools + ``` + +3. Create build image + + ```sh + oc process -f openshift/templates/bc.yaml \ + -p GIT_REPO_URL=https://github.com/bcgov/namex.git \ + -p GIT_REF=main \ + -o yaml \ + | oc apply -f - -n f2b77c-tools + ``` + +4. Checking log for building process at Console => Administrator => Builds => Builds => click image 'nr-duplicates-report' => logs + +5. Tag image to dev: 'oc tag nr-duplicates-report:latest nr-duplicates-report:dev' + + +### Create cron + +1. Login to openshift + + ```sh + oc login xxxxxxx + ``` + +2. switch to dev namespace + + ```sh + oc project f2b77c-dev + ``` + +3. Create cron + ### please remember that SCHEDULE is UTC which is 7 hour ahead of PST + ```sh + oc process -f openshift/templates/cronjob.yaml \ + -p TAG=dev \ + -p SCHEDULE="30 14 * * *" \ + -o yaml \ + | oc apply -f - -n f2b77c-dev + ``` +4. Create a job to run and test it: 'oc create job nr-duplicates-report-dev-1 --from=cronjob/nr-duplicates-report-dev -n f2b77c-dev' diff --git a/jobs/nr-duplicates-report/config.py b/jobs/nr-duplicates-report/config.py new file mode 100644 index 000000000..dbc14e96e --- /dev/null +++ b/jobs/nr-duplicates-report/config.py @@ -0,0 +1,29 @@ +import os +from dotenv import load_dotenv, find_dotenv + +# this will load all the envars from a .env file located in the project root (api) +load_dotenv(find_dotenv()) + + +class Config(object): + PROJECT_ROOT = os.path.abspath(os.path.dirname(__file__)) + APP_FILE = os.getenv('APP_FILE', '') + SENDER_EMAIL = os.getenv('SENDER_EMAIL', '') + ERROR_EMAIL_RECIPIENTS = os.getenv('ERROR_EMAIL_RECIPIENTS', '') + REPORT_RECIPIENTS = os.getenv('REPORT_RECIPIENTS', '') + EMAIL_SMTP = os.getenv('EMAIL_SMTP', '') + ENVIRONMENT = os.getenv('ENVIRONMENT', '') + + # POSTGRESQL + PG_USER = os.getenv('PG_USER', '') + PG_PASSWORD = os.getenv('PG_PASSWORD', '') + PG_NAME = os.getenv('PG_DB_NAME', '') + PG_HOST = os.getenv('PG_HOST', '') + PG_PORT = os.getenv('PG_PORT', '5432') + SQLALCHEMY_DATABASE_URI = 'postgresql://{user}:{password}@{host}:{port}/{name}'.format( + user=PG_USER, + password=PG_PASSWORD, + host=PG_HOST, + port=int(PG_PORT), + name=PG_NAME, + ) diff --git a/jobs/nr-duplicates-report/daily/nr-duplicates.ipynb b/jobs/nr-duplicates-report/daily/nr-duplicates.ipynb new file mode 100644 index 000000000..5ae6682b6 --- /dev/null +++ b/jobs/nr-duplicates-report/daily/nr-duplicates.ipynb @@ -0,0 +1,330 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": { + "collapsed": false + }, + "source": [ + "# NameX Daily Stats" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "collapsed": false + }, + "source": [ + "We need to load in these libraries into our notebook in order to query, load, manipulate and view the data" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "collapsed": false, + "pycharm": { + "is_executing": false, + "name": "#%%\n" + } + }, + "outputs": [], + "source": [ + "import os\n", + "import pandas as pd\n", + "import csv\n", + "from datetime import datetime, timedelta\n", + "\n", + "%load_ext sql\n", + "%config SqlMagic.displaylimit = 5" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "collapsed": false + }, + "source": [ + "This will create the connection to the database and prep the jupyter magic for SQL..." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "collapsed": false, + "pycharm": { + "is_executing": false, + "name": "#%%\n" + } + }, + "outputs": [], + "source": [ + "namex_db = 'postgresql://' + \\\n", + " os.getenv('PG_USER', '') + \":\" + os.getenv('PG_PASSWORD', '') +'@' + \\\n", + " os.getenv('PG_HOST', '') + ':' + os.getenv('PG_PORT', '5432') + '/' + os.getenv('PG_DB_NAME', '');\n", + "\n", + "%sql $namex_db\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "pay_db = 'postgresql://' + \\\n", + " os.getenv('PAY_USER', '') + \":\" + os.getenv('PAY_PASSWORD', '') +'@' + \\\n", + " os.getenv('PAY_HOST', '') + ':' + os.getenv('PAY_PORT', '5432') + '/' + os.getenv('PAY_DB_NAME', '');\n", + "\n", + "%sql $pay_db" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "collapsed": false + }, + "source": [ + "Simplest query to run to ensure our libraries are loaded and our DB connection is working" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "collapsed": false, + "pycharm": { + "is_executing": false, + "name": "#%%\n" + } + }, + "outputs": [], + "source": [ + "%%sql $namex_db\n", + "select now() AT TIME ZONE 'PST' as current_date" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "collapsed": false + }, + "source": [ + "Daily totals for specified date: Following query, 'current_date - 0' means today, 'current_date - 1' means yesterday, 'current_date - 2' means the day before yesterday..." + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Set the number of days we want the report to be run over." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "number_of_days_nr=int(os.getenv('NUMBER_OF_DAYS_NR', '1')) \n", + "report_start_date=datetime.strftime(datetime.now()-timedelta(number_of_days_nr), '%Y-%m-%d')\n", + "\n", + "number_of_days_payment=int(os.getenv('NUMBER_OF_DAYS_PAYMENT', '1')) \n", + "payments_start_date=datetime.strftime(datetime.now()-timedelta(number_of_days_payment), '%Y-%m-%d')" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## get all duplicate names" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "%%sql $namex_db name_requests <<\n", + "select distinct\n", + "r.id, r.nr_num, r.priority_cd as priority, r.state_cd as nr_state,r.submitted_date,r.source,r.previous_request_id as resubmit,\n", + "n.name,\n", + "a.first_name||' '||a.last_name as customer_name, a.phone_number, a.email_address\n", + "from requests r, names n, applicants a\n", + "where r.id = n.nr_id\n", + "and r.id = a.nr_id\n", + "and r.submitted_date::date >= :report_start_date\n", + "and r.state_cd <> 'PENDING_DELETION'\n", + "and r.nr_num not like 'NR L%'\n", + "and\n", + "n.choice=1\n", + "and\n", + "n.name in (\n", + "\n", + "select \n", + "n.name\n", + "from requests r, names n\n", + "where r.id = n.nr_id\n", + "and\n", + "r.submitted_date::date >= :report_start_date\n", + "-- and r.state_cd in ('DRAFT','HOLD','PENDING_PAYMENT','CANCELLED')\n", + "-- and r.state_cd in ('DRAFT','HOLD','PENDING_PAYMENT')\n", + "and r.state_cd not in ('PENDING_DELETION')\n", + "--and n.choice=1\n", + "group by n.name\n", + "having count(n.name) > 1\n", + ")\n", + "order by n.name\n", + ";" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "nr_frame = name_requests.DataFrame()\n", + "filename = os.path.join(os.getcwd(), r'data/')+'nr_duplicates_namex_' + report_start_date +'.csv'\n", + "\n", + "with open(filename, 'w') as f:\n", + " if not nr_frame.empty:\n", + " nr_frame.to_csv(f, sep=',', encoding='utf-8', index=False)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## get all payments" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "%%sql $pay_db paid <<\n", + "SELECT i.business_identifier, \n", + " i.id invoice_id, \n", + " i.created_on,\n", + " ir.invoice_number, \n", + " i.invoice_status_code invoice_status, \n", + " p.payment_status_code pay_status, \n", + " i.total, \n", + " i.paid, \n", + " r.receipt_number \n", + "FROM invoices i \n", + " LEFT OUTER JOIN invoice_references ir \n", + " ON ir.invoice_id = i.id \n", + " LEFT OUTER JOIN payments p \n", + " ON p.invoice_number = ir.invoice_number \n", + " LEFT OUTER JOIN receipts r \n", + " ON r.invoice_id = i.id \n", + "WHERE \n", + " created_on >=:payments_start_date\n", + " and i.invoice_status_code = 'PAID'\n", + " and i.business_identifier like 'NR%'\n", + " and i.paid <> 101.5\n", + "ORDER BY invoice_id ASC;" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "paid_frame = paid.DataFrame()\n", + "if not paid_frame.empty: \n", + " paid_frame['nr_num']=paid_frame['business_identifier']\n", + "\n", + "payment_filename = os.path.join(os.getcwd(), r'data/')+'nr_duplicates_payment_' + payments_start_date +'.csv'\n", + "\n", + "with open(payment_filename, 'w') as f:\n", + " if paid_frame.empty:\n", + " writer = csv.writer(f)\n", + " writer.writerow(('No Data Retrieved','')) \n", + " else:\n", + " paid_frame.to_csv(f, sep=',', encoding='utf-8', index=False) " + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Merge the Duplicate Names with Payment information" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "if not nr_frame.empty and not paid_frame.empty: \n", + " result_frame = pd.merge(nr_frame, paid_frame, how='left', on=['nr_num'])\n", + " result_frame=result_frame.drop(['id','business_identifier','created_on','invoice_number','total','receipt_number'], axis=1) \n", + "elif not nr_frame.empty:\n", + " result_frame = nr_frame.drop(['id'], axis=1)\n", + "else: \n", + " result_frame = pd.DataFrame([])" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "merged_filename = os.path.join(os.getcwd(), r'data/')+'nr_duplicates_' + report_start_date +'.csv'\n", + "\n", + "with open(merged_filename, 'w') as f:\n", + " if result_frame.empty:\n", + " writer = csv.writer(f)\n", + " writer.writerow(('No Data Retrieved','')) \n", + " else: \n", + " result_frame.to_csv(f, sep=',', encoding='utf-8', index=False)" + ] + } + ], + "metadata": { + "celltoolbar": "Tags", + "kernelspec": { + "display_name": "Python 3.10.4 64-bit", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.10.4" + }, + "pycharm": { + "stem_cell": { + "cell_type": "raw", + "metadata": { + "collapsed": false + }, + "source": [] + } + }, + "vscode": { + "interpreter": { + "hash": "fcb35bce15c55b4cacb5112e543368f86c7f98ed17acd45e6841ee83ed1df6e3" + } + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} diff --git a/jobs/nr-duplicates-report/devops/vaults.json b/jobs/nr-duplicates-report/devops/vaults.json new file mode 100644 index 000000000..9d1f03d34 --- /dev/null +++ b/jobs/nr-duplicates-report/devops/vaults.json @@ -0,0 +1,10 @@ +[ + { + "vault": "namex", + "application": [ + "postgres-namex", + "postgres-pay", + "nr-duplicates-report" + ] + } +] \ No newline at end of file diff --git a/jobs/nr-duplicates-report/logging.conf b/jobs/nr-duplicates-report/logging.conf new file mode 100644 index 000000000..ffc1a01e3 --- /dev/null +++ b/jobs/nr-duplicates-report/logging.conf @@ -0,0 +1,28 @@ +[loggers] +keys=root,api + +[handlers] +keys=console + +[formatters] +keys=simple + +[logger_root] +level=DEBUG +handlers=console + +[logger_api] +level=DEBUG +handlers=console +qualname=api +propagate=0 + +[handler_console] +class=StreamHandler +level=DEBUG +formatter=simple +args=(sys.stdout,) + +[formatter_simple] +format=%(asctime)s - %(name)s - %(levelname)s in %(module)s:%(filename)s:%(lineno)d - %(funcName)s: %(message)s +datefmt= \ No newline at end of file diff --git a/jobs/nr-duplicates-report/notebookreport.py b/jobs/nr-duplicates-report/notebookreport.py new file mode 100644 index 000000000..212b5467f --- /dev/null +++ b/jobs/nr-duplicates-report/notebookreport.py @@ -0,0 +1,136 @@ +"""s2i based launch script to run the notebook.""" +import fnmatch +import logging +import os +import smtplib +import sys +import traceback +from datetime import datetime, timedelta +from email import encoders +from email.mime.base import MIMEBase +from email.mime.multipart import MIMEMultipart +from email.mime.text import MIMEText + +import papermill as pm +from flask import Flask, current_app + +from config import Config +from util.logging import setup_logging + +setup_logging(os.path.join(os.path.abspath(os.path.dirname( + __file__)), 'logging.conf')) # important to do this first + +# Notebook Scheduler +# --------------------------------------- +# This script helps with the automated processing of Jupyter Notebooks via +# papermill (https://github.com/nteract/papermill/) + + +def create_app(config=Config): + """create_app.""" + app = Flask(__name__) + app.config.from_object(config) + app.app_context().push() + current_app.logger.debug( + 'created the Flask App and pushed the App Context') + + return app + + +def findfiles(directory, pattern): + """findfiles.""" + # Lists all files in the specified directory that match the specified pattern + for filename in os.listdir(directory): + if fnmatch.fnmatch(filename.lower(), pattern): + yield os.path.join(directory, filename) + + +def send_email(emailtype, errormessage): + """Send email.""" + number_of_days_nr = int(os.getenv('NUMBER_OF_DAYS_NR', '1')) + date = datetime.strftime( + datetime.now()-timedelta(number_of_days_nr), '%Y-%m-%d') + ext = '' + if os.getenv('ENVIRONMENT', '') != 'prod': + ext = ' on ' + os.getenv('ENVIRONMENT', '') + + message = MIMEMultipart() + sender_email = os.getenv('SENDER_EMAIL', '') + + if emailtype == 'ERROR': + subject = 'NR Duplicates Report Error Notification on ' + date + ext + filename = '' + recipients = os.getenv('ERROR_EMAIL_RECIPIENTS', '') + message.attach(MIMEText('ERROR!!! \n' + errormessage, 'plain')) + else: + subject = 'NR Duplicates Report ' + date + ext + filename = 'nr_duplicates_' + date + '.csv' + recipients = os.getenv('DAILY_REPORT_RECIPIENTS', '') + # Add body to email + message.attach(MIMEText('Please see attached.', 'plain')) + + # Open file in binary mode + with open(os.path.join(os.getcwd(), r'data/')+filename, 'rb') as attachment: + # Add file as application/octet-stream + # Email client can usually download this automatically as attachment + part = MIMEBase('application', 'octet-stream') + part.set_payload(attachment.read()) + + # Encode file in ASCII characters to send by email + encoders.encode_base64(part) + + # Add header as key/value pair to attachment part + part.add_header( + 'Content-Disposition', + f'attachment; filename= {filename}', + ) + + # Add attachment to message and convert message to string + message.attach(part) + + message['Subject'] = subject + server = smtplib.SMTP(os.getenv('EMAIL_SMTP', '')) + email_list = [] + email_list = recipients.strip('][').split(', ') + logging.info('Email recipients list is: %s', email_list) + server.sendmail(sender_email, email_list, message.as_string()) + logging.info( + 'Email with subject \"%s\" has been sent successfully!', subject) + server.quit() + + +def processnotebooks(notebookdirectory, data_dir): + """Process Notebook.""" + status = False + logging.info('NR Duploicates Report start processing directory: %s', + notebookdirectory) + + try: + pm.execute_notebook(os.path.join(notebookdirectory, 'nr-duplicates.ipynb'), + data_dir + 'temp.ipynb', parameters=None) + + # send email to receivers and remove files/directories which we don't want to keep + send_email('', '') + os.remove(data_dir+'temp.ipynb') + status = True + except Exception: # noqa: B902 + logging.exception( + 'NR Duplicates Report Error processing %s.', notebookdirectory) + send_email('ERROR', traceback.format_exc()) + return status + + +if __name__ == '__main__': + start_time = datetime.utcnow() + + temp_dir = os.path.join(os.getcwd(), r'data/') + if not os.path.exists(temp_dir): + os.makedirs(temp_dir) + + processnotebooks('daily', temp_dir) + # shutil.rmtree(temp_dir) + + end_time = datetime.utcnow() + logging.info('job - jupyter notebook report completed in: %s', + end_time - start_time) + sys.exit() diff --git a/jobs/nr-duplicates-report/openshift/Readme.md b/jobs/nr-duplicates-report/openshift/Readme.md new file mode 100644 index 000000000..af020295a --- /dev/null +++ b/jobs/nr-duplicates-report/openshift/Readme.md @@ -0,0 +1,7 @@ +# buildconfig +oc process -f openshift/templates/bc.yaml -o yaml | oc apply -f - -n f2b77c-tools +# cronjob +oc process -f openshift/templates/cronjob.yaml -o yaml | oc apply -f - -n f2b77c-dev +oc process -f openshift/templates/cronjob.yaml -p TAG=test -o yaml | oc apply -f - -n f2b77c-test +oc process -f openshift/templates/cronjob.yaml -p TAG=prod -o yaml | oc apply -f - -n f2b77c-prod + diff --git a/jobs/nr-duplicates-report/openshift/templates/bc.yaml b/jobs/nr-duplicates-report/openshift/templates/bc.yaml new file mode 100644 index 000000000..33d503260 --- /dev/null +++ b/jobs/nr-duplicates-report/openshift/templates/bc.yaml @@ -0,0 +1,122 @@ +apiVersion: template.openshift.io/v1 +kind: Template +metadata: + labels: + name: ${NAME} + name: ${NAME}-build +objects: +- apiVersion: v1 + kind: ImageStream + metadata: + name: ${NAME} + labels: + name: ${NAME} +- apiVersion: v1 + kind: BuildConfig + metadata: + name: ${NAME} + labels: + name: ${NAME} + spec: + output: + to: + kind: ImageStreamTag + name: ${NAME}:${OUTPUT_IMAGE_TAG} + resources: + limits: + cpu: ${CPU_LIMIT} + memory: ${MEMORY_LIMIT} + requests: + cpu: ${CPU_REQUEST} + memory: ${MEMORY_REQUEST} + runPolicy: Serial + source: + contextDir: ${SOURCE_CONTEXT_DIR} + git: + ref: ${GIT_REF} + uri: ${GIT_REPO_URL} + dockerfile: | + FROM docker-remote.artifacts.developer.gov.bc.ca/python:3.8.6-buster + USER root + + # Create working directory + RUN mkdir /opt/app-root && chmod 755 /opt/app-root + RUN mkdir /opt/app-root/data && chmod 777 /opt/app-root/data + WORKDIR /opt/app-root + + # Install the requirements + COPY ./requirements.txt . + + #RUN pip install --upgrade pip + RUN pip install pip==20.1.1 + RUN pip install --no-cache-dir -r requirements.txt + + COPY . . + + USER 1001 + + # Set Python path + ENV PYTHONPATH=/opt/app-root/src + + EXPOSE 8080 + + CMD [ "python", "/opt/app-root/notebookreport.py" ] + type: Git + strategy: + type: Docker + dockerStrategy: + pullSecret: + name: artifactory-creds + + triggers: + - type: ConfigChange +parameters: +- description: | + The name assigned to all of the objects defined in this template. + You should keep this as default unless your know what your doing. + displayName: Name + name: NAME + required: true + value: nr-duplicates-report +- description: | + The URL to your GIT repo, don't use the this default unless + your just experimenting. + displayName: Git Repo URL + name: GIT_REPO_URL + required: true + value: https://github.com/bcgov/namex.git +- description: The git reference or branch. + displayName: Git Reference + name: GIT_REF + required: true + value: main +- description: The source context directory. + displayName: Source Context Directory + name: SOURCE_CONTEXT_DIR + required: false + value: jobs/nr-duplicates-report +- description: The tag given to the built image. + displayName: Output Image Tag + name: OUTPUT_IMAGE_TAG + required: true + value: latest +- description: The resources CPU limit (in cores) for this build. + displayName: Resources CPU Limit + name: CPU_LIMIT + required: true + value: "2" +- description: The resources Memory limit (in Mi, Gi, etc) for this build. + displayName: Resources Memory Limit + name: MEMORY_LIMIT + required: true + value: 2Gi +- description: The resources CPU request (in cores) for this build. + displayName: Resources CPU Request + name: CPU_REQUEST + required: true + value: "1" +- description: The resources Memory request (in Mi, Gi, etc) for this build. + displayName: Resources Memory Request + name: MEMORY_REQUEST + required: true + value: 2Gi diff --git a/jobs/nr-duplicates-report/openshift/templates/cronjob.yaml b/jobs/nr-duplicates-report/openshift/templates/cronjob.yaml new file mode 100644 index 000000000..e5ec889a9 --- /dev/null +++ b/jobs/nr-duplicates-report/openshift/templates/cronjob.yaml @@ -0,0 +1,201 @@ +apiVersion: template.openshift.io/v1 +kind: Template +metadata: + labels: + name: ${NAME} + name: ${NAME}-cronjob +objects: +- kind: "CronJob" + apiVersion: "batch/v1beta1" + metadata: + name: "${NAME}-${TAG}" + labels: + name: "${NAME}" + environment: "${TAG}" + role: "${ROLE}" + spec: + schedule: "${SCHEDULE}" + concurrencyPolicy: "Forbid" + successfulJobsHistoryLimit: "${{SUCCESS_JOBS_HISTORY_LIMIT}}" + failedJobsHistoryLimit: "${{FAILED_JOBS_HISTORY_LIMIT}}" + jobTemplate: + metadata: + labels: + name: "${NAME}" + environment: "${TAG}" + role: "${ROLE}" + spec: + backoffLimit: ${{JOB_BACKOFF_LIMIT}} + template: + metadata: + labels: + name: "${NAME}" + environment: "${TAG}" + role: "${ROLE}" + spec: + containers: + - name: "${NAME}-${TAG}" + image: "${IMAGE_REGISTRY}/${IMAGE_NAMESPACE}/${NAME}:${TAG}" + imagePullPolicy: Always + command: + - /bin/sh + - -c + - cd /opt/app-root; ./run.sh + env: + - name: PG_USER + valueFrom: + secretKeyRef: + name: ${NAME}-${TAG}-secret + key: DATABASE_USERNAME + - name: PG_PASSWORD + valueFrom: + secretKeyRef: + name: ${NAME}-${TAG}-secret + key: DATABASE_PASSWORD + - name: PG_DB_NAME + valueFrom: + secretKeyRef: + name: ${NAME}-${TAG}-secret + key: DATABASE_NAME + - name: PG_HOST + valueFrom: + secretKeyRef: + name: ${NAME}-${TAG}-secret + key: DATABASE_HOST + - name: PG_PORT + valueFrom: + secretKeyRef: + name: ${NAME}-${TAG}-secret + key: DATABASE_PORT + - name: PG_USER + - name: PAY_USER + valueFrom: + secretKeyRef: + name: ${NAME}-${TAG}-secret + key: PAY_USER + - name: PAY_PASSWORD + valueFrom: + secretKeyRef: + name: ${NAME}-${TAG}-secret + key: PAY_PASSWORD + - name: PAY_DB_NAME + valueFrom: + secretKeyRef: + name: ${NAME}-${TAG}-secret + key: PAY_DB_NAME + - name: PAY_HOST + valueFrom: + secretKeyRef: + name: ${NAME}-${TAG}-secret + key: PAY_HOST + - name: PAY_PORT + valueFrom: + secretKeyRef: + name: ${NAME}-${TAG}-secret + key: PAY_PORT + - name: APP_FILE + valueFrom: + secretKeyRef: + name: ${NAME}-${TAG}-secret + key: APP_FILE + - name: SENDER_EMAIL + valueFrom: + secretKeyRef: + name: ${NAME}-${TAG}-secret + key: SENDER_EMAIL + - name: ERROR_EMAIL_RECIPIENTS + valueFrom: + secretKeyRef: + name: ${NAME}-${TAG}-secret + key: ERROR_EMAIL_RECIPIENTS + - name: DAILY_REPORT_RECIPIENTS + valueFrom: + secretKeyRef: + name: ${NAME}-${TAG}-secret + key: DAILY_REPORT_RECIPIENTS + - name: EMAIL_SMTP + valueFrom: + secretKeyRef: + name: ${NAME}-${TAG}-secret + key: EMAIL_SMTP + - name: NUMBER_OF_DAYS_NR + valueFrom: + secretKeyRef: + name: ${NAME}-${TAG}-secret + key: NUMBER_OF_DAYS_NR + - name: NUMBER_OF_DAYS_PAYMENT + valueFrom: + secretKeyRef: + name: ${NAME}-${TAG}-secret + key: NUMBER_OF_DAYS_PAYMENT + - name: ENVIRONMENT + valueFrom: + secretKeyRef: + name: ${NAME}-${TAG}-secret + key: ENVIRONMENT + restartPolicy: "Never" + terminationGracePeriodSeconds: 30 + activeDeadlineSeconds: 1600 + dnsPolicy: "ClusterFirst" +parameters: + + - name: NAME + displayName: Name + description: The name assigned to all of the OpenShift resources associated to the server instance. + required: true + value: nr-duplicates-report + + - name: TAG + displayName: Environment TAG name + description: The TAG name for this environment, e.g., dev, test, prod + value: dev + required: true + + - name: ROLE + displayName: Role + description: Role + required: true + value: job + + - name: NAMESPACE + displayName: Namespace Name + description: The base namespace name for the project. + required: true + value: f2b77c + + - name: IMAGE_NAMESPACE + displayName: Image Namespace + required: true + description: The namespace of the OpenShift project containing the imagestream for the application. + value: f2b77c-tools + + - name: IMAGE_REGISTRY + displayName: Image Registry + required: true + description: The image registry of the OpenShift project. + value: image-registry.openshift-image-registry.svc:5000 + + - name: "SCHEDULE" + displayName: "Cron Schedule" + description: "Cron Schedule to Execute the Job (using local cluster system TZ)" + value: "30 14 * * *" + required: true + + - name: "SUCCESS_JOBS_HISTORY_LIMIT" + displayName: "Successful Job History Limit" + description: "The number of successful jobs that will be retained" + value: "5" + required: true + + - name: "FAILED_JOBS_HISTORY_LIMIT" + displayName: "Failed Job History Limit" + description: "The number of failed jobs that will be retained" + value: "2" + required: true + + - name: "JOB_BACKOFF_LIMIT" + displayName: "Job Backoff Limit" + description: "The number of attempts to try for a successful job outcome" + value: "0" + required: false + diff --git a/jobs/nr-duplicates-report/requirements.txt b/jobs/nr-duplicates-report/requirements.txt new file mode 100644 index 000000000..7ee082ee9 --- /dev/null +++ b/jobs/nr-duplicates-report/requirements.txt @@ -0,0 +1,38 @@ +jupyter +SQLAlchemy==1.3.16 +psycopg2-binary==2.8.5 +ipython-sql +simplejson +pandas +matplotlib +spacy +papermill +schedule +attrs==19.2.0 +future==0.18.2 +Jinja2==3.0 +markupsafe==2.0.1 +python-dateutil==2.8.2 +pytz==2020.1 +importlib-metadata==3.6.0 +itsdangerous==2.0.1 +jsonschema==3.2.0 +more-itertools==7.2.0 +six==1.14.0 +pyrsistent==0.16.0 +zipp==3.1.0 + +Flask==1.1.2 +pytest +Click==7.1.2 +python-dotenv==0.13.0 +requests==2.23.0 +marshmallow==2.20.5 +Werkzeug==0.16.1 +certifi==2020.4.5.1 +urllib3==1.25.9 +idna==2.9 +pylint +pylint-flask +pep8 +autopep8 diff --git a/jobs/nr-duplicates-report/requirements/dev.txt b/jobs/nr-duplicates-report/requirements/dev.txt new file mode 100644 index 000000000..ddfc0e591 --- /dev/null +++ b/jobs/nr-duplicates-report/requirements/dev.txt @@ -0,0 +1,13 @@ +# Everything the developer needs outside of the production requirements + +# Testing +pytest + +# Lint and code style +flake8<6.0 +flake8-blind-except +flake8-debugger +flake8-docstrings +flake8-isort +flake8-quotes +pep8-naming diff --git a/jobs/nr-duplicates-report/requirements/prod.txt b/jobs/nr-duplicates-report/requirements/prod.txt new file mode 100644 index 000000000..b8892f7a5 --- /dev/null +++ b/jobs/nr-duplicates-report/requirements/prod.txt @@ -0,0 +1,43 @@ +jupyter +SQLAlchemy==1.3.16 +psycopg2-binary==2.8.5 +ipython-sql +simplejson +pandas +matplotlib +spacy +papermill +schedule +attrs==19.2.0 +future==0.18.2 +Jinja2==2.11.2 +markupsafe==2.0.1 +python-dateutil==2.8.1 +pytz==2019.3 +importlib-metadata==1.6.0 +itsdangerous==2.0.1 +jsonschema==3.2.0 +more-itertools==7.2.0 +six==1.14.0 +pyrsistent==0.16.0 +zipp==3.1.0 + +Flask==1.1.2 +pytest +Click==7.1.2 +python-dotenv==0.13.0 +requests==2.23.0 +marshmallow==2.20.5 +Werkzeug==0.16.1 +certifi==2020.4.5.1 +urllib3==1.25.9 +idna==2.9 +pylint +pylint-flask +pep8 +autopep8 + + + + + diff --git a/jobs/nr-duplicates-report/run.sh b/jobs/nr-duplicates-report/run.sh new file mode 100755 index 000000000..c7d725432 --- /dev/null +++ b/jobs/nr-duplicates-report/run.sh @@ -0,0 +1,4 @@ +#! /bin/sh +cd /opt/app-root +echo 'run notebookreport to generate Jupyter Notebook Report' +python notebookreport.py diff --git a/jobs/nr-duplicates-report/setup.cfg b/jobs/nr-duplicates-report/setup.cfg new file mode 100644 index 000000000..4eb5d82e2 --- /dev/null +++ b/jobs/nr-duplicates-report/setup.cfg @@ -0,0 +1,14 @@ +[flake8] +exclude = .git,*migrations* +max-line-length = 120 +docstring-min-length=10 +per-file-ignores = + */__init__.py:F401 + +[pylint] +ignore=migrations,test +max-line-length=120 +notes=FIXME,XXX,TODO +ignored-modules=flask_sqlalchemy,sqlalchemy,SQLAlchemy,alembic,scoped_session +ignored-classes=scoped_session +disable=C0301,W0511,W0703,R0801,R0902,R0401 diff --git a/jobs/nr-duplicates-report/setup.py b/jobs/nr-duplicates-report/setup.py new file mode 100644 index 000000000..7d3762486 --- /dev/null +++ b/jobs/nr-duplicates-report/setup.py @@ -0,0 +1,22 @@ +# Copyright © 2019 Province of British Columbia. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""Installer and setup for this module.""" + +from setuptools import find_packages, setup + + +setup( + name='nr-duplicates-report', + packages=find_packages() +) diff --git a/jobs/nr-duplicates-report/tests/unit/__init__.py b/jobs/nr-duplicates-report/tests/unit/__init__.py new file mode 100644 index 000000000..76a6f8981 --- /dev/null +++ b/jobs/nr-duplicates-report/tests/unit/__init__.py @@ -0,0 +1 @@ +"""init for the notebook.""" diff --git a/jobs/nr-duplicates-report/tests/unit/conftest.py b/jobs/nr-duplicates-report/tests/unit/conftest.py new file mode 100644 index 000000000..866977756 --- /dev/null +++ b/jobs/nr-duplicates-report/tests/unit/conftest.py @@ -0,0 +1,20 @@ +"""conftest for the notebook.""" +import pytest + +from config import Config +from notebookreport import create_app + + +@pytest.fixture(scope='session') +def app(request): + """Init apps and Returns session-wide application.""" + app = create_app(Config) + + return app + + +@pytest.fixture(scope='session') +def client_ctx(app): + """Init client and returns session-wide Flask test client.""" + with app.test_client() as c: + yield c diff --git a/jobs/nr-duplicates-report/tests/unit/test_job.py b/jobs/nr-duplicates-report/tests/unit/test_job.py new file mode 100644 index 000000000..508cd6bbf --- /dev/null +++ b/jobs/nr-duplicates-report/tests/unit/test_job.py @@ -0,0 +1,48 @@ +"""test job for the notebook.""" +import os + +import psycopg2 + +from notebookreport import processnotebooks + + +def test_connection_failed(): + """Test connection failed for the notebook.""" + status = False + try: + connection = psycopg2.connect(user=os.getenv('FAKE_PG_USER', ''), + password=os.getenv('FAKE_PG_PASSWORD', ''), + host=os.getenv('FAKE_PG_HOST', ''), + port=os.getenv('FAKE_PG_PORT', '5432'), + database=os.getenv('FAKE_PG_DB_NAME', '')) + + connection.cursor() + status = True + except Exception: # noqa: B902 + status = False + finally: + assert status is False + + +def test_connection_succeed(): + """Test connection succeed for the notebook report.""" + status = False + try: + connection = psycopg2.connect(user=os.getenv('PG_USER', ''), + password=os.getenv('PG_PASSWORD', ''), + host=os.getenv('PG_HOST', ''), + port=os.getenv('PG_PORT', '5432'), + database=os.getenv('PG_DB_NAME', '')) + connection.cursor() + status = True + except Exception: # noqa: B902 + status = False + finally: + assert status is True + + +def test_daily_notebook_report(): + """Test daily notebook report.""" + status = processnotebooks('daily') + + assert status is True diff --git a/jobs/nr-duplicates-report/util/__init__.py b/jobs/nr-duplicates-report/util/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/jobs/nr-duplicates-report/util/logging.py b/jobs/nr-duplicates-report/util/logging.py new file mode 100644 index 000000000..0bd53d163 --- /dev/null +++ b/jobs/nr-duplicates-report/util/logging.py @@ -0,0 +1,16 @@ +# setup logging - important to set it up first +from config import Config +from os import path +import logging.config +import sys + + +def setup_logging(conf='logging.conf'): + # log_file_path = path.join(path.dirname(path.abspath(__file__)), conf) + log_file_path = path.join(Config.PROJECT_ROOT, conf) + + if path.isfile(log_file_path): + logging.config.fileConfig(log_file_path) + print('Configure logging, from conf:{}'.format(log_file_path), file=sys.stderr) + else: + print('Unable to configure logging, attempted conf:{}'.format(log_file_path), file=sys.stderr)