From 37c8166d8cf0141b2ead041b91c1d65d5900a8fc Mon Sep 17 00:00:00 2001
From: Steven Chen <steven.chen@gov.bc.ca>
Date: Tue, 6 Dec 2022 10:56:00 -0800
Subject: [PATCH] NR Duplicates Report#14287

Signed-off-by: Steven Chen <steven.chen@gov.bc.ca>
---
 .github/workflows/nr-duplicates-report-cd.yml | 103 ++++++
 .github/workflows/nr-duplicates-report-ci.yml |  90 +++++
 jobs/nr-duplicates-report/Dockerfile          |  35 ++
 jobs/nr-duplicates-report/Makefile            | 150 ++++++++
 jobs/nr-duplicates-report/README.md           |  78 +++++
 jobs/nr-duplicates-report/config.py           |  29 ++
 .../daily/nr-duplicates.ipynb                 | 330 ++++++++++++++++++
 jobs/nr-duplicates-report/devops/vaults.json  |  10 +
 jobs/nr-duplicates-report/logging.conf        |  28 ++
 jobs/nr-duplicates-report/notebookreport.py   | 136 ++++++++
 jobs/nr-duplicates-report/openshift/Readme.md |   7 +
 .../openshift/templates/bc.yaml               | 122 +++++++
 .../openshift/templates/cronjob.yaml          | 201 +++++++++++
 jobs/nr-duplicates-report/requirements.txt    |  38 ++
 .../nr-duplicates-report/requirements/dev.txt |  13 +
 .../requirements/prod.txt                     |  43 +++
 jobs/nr-duplicates-report/run.sh              |   4 +
 jobs/nr-duplicates-report/setup.cfg           |  14 +
 jobs/nr-duplicates-report/setup.py            |  22 ++
 .../tests/unit/__init__.py                    |   1 +
 .../tests/unit/conftest.py                    |  20 ++
 .../tests/unit/test_job.py                    |  48 +++
 jobs/nr-duplicates-report/util/__init__.py    |   0
 jobs/nr-duplicates-report/util/logging.py     |  16 +
 24 files changed, 1538 insertions(+)
 create mode 100644 .github/workflows/nr-duplicates-report-cd.yml
 create mode 100644 .github/workflows/nr-duplicates-report-ci.yml
 create mode 100644 jobs/nr-duplicates-report/Dockerfile
 create mode 100644 jobs/nr-duplicates-report/Makefile
 create mode 100644 jobs/nr-duplicates-report/README.md
 create mode 100644 jobs/nr-duplicates-report/config.py
 create mode 100644 jobs/nr-duplicates-report/daily/nr-duplicates.ipynb
 create mode 100644 jobs/nr-duplicates-report/devops/vaults.json
 create mode 100644 jobs/nr-duplicates-report/logging.conf
 create mode 100644 jobs/nr-duplicates-report/notebookreport.py
 create mode 100644 jobs/nr-duplicates-report/openshift/Readme.md
 create mode 100644 jobs/nr-duplicates-report/openshift/templates/bc.yaml
 create mode 100644 jobs/nr-duplicates-report/openshift/templates/cronjob.yaml
 create mode 100644 jobs/nr-duplicates-report/requirements.txt
 create mode 100644 jobs/nr-duplicates-report/requirements/dev.txt
 create mode 100644 jobs/nr-duplicates-report/requirements/prod.txt
 create mode 100755 jobs/nr-duplicates-report/run.sh
 create mode 100644 jobs/nr-duplicates-report/setup.cfg
 create mode 100644 jobs/nr-duplicates-report/setup.py
 create mode 100644 jobs/nr-duplicates-report/tests/unit/__init__.py
 create mode 100644 jobs/nr-duplicates-report/tests/unit/conftest.py
 create mode 100644 jobs/nr-duplicates-report/tests/unit/test_job.py
 create mode 100644 jobs/nr-duplicates-report/util/__init__.py
 create mode 100644 jobs/nr-duplicates-report/util/logging.py

diff --git a/.github/workflows/nr-duplicates-report-cd.yml b/.github/workflows/nr-duplicates-report-cd.yml
new file mode 100644
index 000000000..b7a42040c
--- /dev/null
+++ b/.github/workflows/nr-duplicates-report-cd.yml
@@ -0,0 +1,103 @@
+name: Namex Duplicates Report Job CD
+
+on:
+  push:
+    branches:
+      - main
+    paths:
+      - "jobs/nr-duplicates-report/**"
+  workflow_dispatch:
+    inputs:
+      environment:
+        description: "Environment (dev/test/prod)"
+        required: true
+        default: "dev"
+
+defaults:
+  run:
+    shell: bash
+    working-directory: ./jobs/nr-duplicates-report
+
+env:
+  APP_NAME: "nr-duplicates-report"
+  TAG_NAME: "dev"
+
+jobs:
+  nr-duplicates-report-cd-by-push:
+    runs-on: ubuntu-20.04
+
+    if: github.event_name == 'push' && github.repository == 'bcgov/namex'
+    environment:
+      name: "dev"
+
+    steps:
+      - uses: actions/checkout@v2
+
+      - name: Login Openshift
+        shell: bash
+        run: |
+          oc login --server=${{secrets.OPENSHIFT4_LOGIN_REGISTRY}} --token=${{secrets.OPENSHIFT4_SA_TOKEN}}
+
+      - name: CD Flow
+        shell: bash
+        env:
+          OPS_REPOSITORY: ${{ secrets.OPS_REPOSITORY }}
+          OPENSHIFT_DOCKER_REGISTRY: ${{ secrets.OPENSHIFT4_DOCKER_REGISTRY }}
+          OPENSHIFT_SA_NAME: ${{ secrets.OPENSHIFT4_SA_NAME }}
+          OPENSHIFT_SA_TOKEN: ${{ secrets.OPENSHIFT4_SA_TOKEN }}
+          OPENSHIFT_REPOSITORY: ${{ secrets.OPENSHIFT4_REPOSITORY }}
+          TAG_NAME: ${{ env.TAG_NAME }}
+        run: |
+          make cd
+
+      - name: Rocket.Chat Notification
+        uses: RocketChat/Rocket.Chat.GitHub.Action.Notification@master
+        if: failure()
+        with:
+          type: ${{ job.status }}
+          job_name: "*NR Duplicates Report Job Built and Deployed to ${{env.TAG_NAME}}*"
+          channel: "#registries-bot"
+          url: ${{ secrets.ROCKETCHAT_WEBHOOK }}
+          commit: true
+          token: ${{ secrets.GITHUB_TOKEN }}
+
+  nr-duplicates-report-cd-by-dispatch:
+    runs-on: ubuntu-20.04
+
+    if: github.event_name == 'workflow_dispatch' && github.repository == 'bcgov/namex'
+    environment:
+      name: "${{ github.event.inputs.environment }}"
+
+    steps:
+      - uses: actions/checkout@v2
+      - name: Set env by input
+        run: |
+          echo "TAG_NAME=${{ github.event.inputs.environment }}" >> $GITHUB_ENV
+
+      - name: Login Openshift
+        shell: bash
+        run: |
+          oc login --server=${{secrets.OPENSHIFT4_LOGIN_REGISTRY}} --token=${{secrets.OPENSHIFT4_SA_TOKEN}}
+
+      - name: CD Flow
+        shell: bash
+        env:
+          OPS_REPOSITORY: ${{ secrets.OPS_REPOSITORY }}
+          OPENSHIFT_DOCKER_REGISTRY: ${{ secrets.OPENSHIFT4_DOCKER_REGISTRY }}
+          OPENSHIFT_SA_NAME: ${{ secrets.OPENSHIFT4_SA_NAME }}
+          OPENSHIFT_SA_TOKEN: ${{ secrets.OPENSHIFT4_SA_TOKEN }}
+          OPENSHIFT_REPOSITORY: ${{ secrets.OPENSHIFT4_REPOSITORY }}
+          TAG_NAME: ${{ env.TAG_NAME }}
+        run: |
+          make cd
+
+      - name: Rocket.Chat Notification
+        uses: RocketChat/Rocket.Chat.GitHub.Action.Notification@master
+        if: failure()
+        with:
+          type: ${{ job.status }}
+          job_name: "*NR Duplicates Report Job Built and Deployed to ${{env.TAG_NAME}}*"
+          channel: "#registries-bot"
+          url: ${{ secrets.ROCKETCHAT_WEBHOOK }}
+          commit: true
+          token: ${{ secrets.GITHUB_TOKEN }}
diff --git a/.github/workflows/nr-duplicates-report-ci.yml b/.github/workflows/nr-duplicates-report-ci.yml
new file mode 100644
index 000000000..90daad648
--- /dev/null
+++ b/.github/workflows/nr-duplicates-report-ci.yml
@@ -0,0 +1,90 @@
+name: NR Duplicates Report Job CI
+
+on:
+  pull_request:
+    types: [assigned, synchronize]
+    paths:
+      - "jobs/nr-duplicates-report/**"
+
+defaults:
+  run:
+    shell: bash
+    working-directory: ./jobs/nr-duplicates-report
+
+jobs:
+  setup-job:
+    runs-on: ubuntu-20.04
+
+    if: github.repository == 'bcgov/namex'
+
+    steps:
+      - uses: actions/checkout@v2
+      - run: "true"
+
+  linting:
+    needs: setup-job
+    runs-on: ubuntu-20.04
+
+    strategy:
+      matrix:
+        python-version: [3.8]
+
+    steps:
+      - uses: actions/checkout@v2
+      - name: Set up Python ${{ matrix.python-version }}
+        uses: actions/setup-python@v1
+        with:
+          python-version: ${{ matrix.python-version }}
+      - name: Install dependencies
+        run: |
+          make setup
+      - name: Lint with pylint
+        id: pylint
+        run: |
+          make pylint
+      - name: Lint with flake8
+        id: flake8
+        run: |
+          make flake8
+
+  testing:
+    needs: setup-job
+    env:
+      PG_USER: postgres
+      PG_PASSWORD: postgres
+      PG_DB_NAME: postgres
+      PG_HOST: localhost
+      PG_PORT: 5432      
+
+    runs-on: ubuntu-20.04
+    steps:
+      - uses: actions/checkout@v2
+      - name: Set up Python ${{ matrix.python-version }}
+        uses: actions/setup-python@v1
+        with:
+          python-version: ${{ matrix.python-version }}
+      - name: Install dependencies
+        run: |
+          make setup
+      - name: Test with pytest
+        id: test
+        run: |
+      #    make test
+      #- name: Upload coverage to Codecov
+      #  uses: codecov/codecov-action@v1
+      #  with:
+      #    file: ./queue_services/entity-pay/coverage.xml
+      #    flags: entitypay
+      #    name: codecov-entity-pay
+      #    fail_ci_if_error: true
+
+  build-check:
+    needs: setup-job
+    runs-on: ubuntu-20.04
+
+    steps:
+      - uses: actions/checkout@v2
+      - name: build to check strictness
+        id: build
+        run: |
+          make build-nc
diff --git a/jobs/nr-duplicates-report/Dockerfile b/jobs/nr-duplicates-report/Dockerfile
new file mode 100644
index 000000000..1122a9977
--- /dev/null
+++ b/jobs/nr-duplicates-report/Dockerfile
@@ -0,0 +1,35 @@
+FROM python:3.8.5-buster
+
+ARG VCS_REF="missing"
+ARG BUILD_DATE="missing"
+
+ENV VCS_REF=${VCS_REF}
+ENV BUILD_DATE=${BUILD_DATE}
+
+LABEL org.label-schema.vcs-ref=${VCS_REF} \
+    org.label-schema.build-date=${BUILD_DATE}
+    
+USER root
+
+# Create working directory
+RUN mkdir /opt/app-root && chmod 755 /opt/app-root
+RUN mkdir /opt/app-root/data && chmod 777 /opt/app-root/data
+WORKDIR /opt/app-root
+
+# Install the requirements
+COPY ./requirements.txt .
+
+#RUN pip install --upgrade pip
+RUN pip install pip==20.3.3
+RUN pip install --no-cache-dir -r requirements.txt
+
+COPY . .
+
+USER 1001
+
+# Set Python path
+ENV PYTHONPATH=/opt/app-root/src
+
+EXPOSE 8080
+
+CMD [ "python", "/opt/app-root/notebookreport.py" ]
diff --git a/jobs/nr-duplicates-report/Makefile b/jobs/nr-duplicates-report/Makefile
new file mode 100644
index 000000000..74c141edd
--- /dev/null
+++ b/jobs/nr-duplicates-report/Makefile
@@ -0,0 +1,150 @@
+.PHONY: license
+.PHONY: setup
+.PHONY: ci cd
+.PHONY: db run
+
+MKFILE_PATH:=$(abspath $(lastword $(MAKEFILE_LIST)))
+CURRENT_ABS_DIR:=$(patsubst %/,%,$(dir $(MKFILE_PATH)))
+
+PROJECT_NAME:=nr-duplicates-report
+DOCKER_NAME:=nr-duplicates-report
+
+#################################################################################
+# COMMANDS -- license                                                           #
+#################################################################################
+license: ## Verify source code license headers.
+	./scripts/verify_license_headers.sh $(CURRENT_ABS_DIR)/src $(CURRENT_ABS_DIR)/tests
+
+#################################################################################
+# COMMANDS -- Setup                                                             #
+#################################################################################
+setup: install install-dev ## Setup the project
+
+clean: clean-build clean-pyc clean-test ## Clean the project
+	rm -rf venv/
+
+clean-build: ## Clean build files
+	rm -fr build/
+	rm -fr dist/
+	rm -fr .eggs/
+	find . -name '*.egg-info' -exec rm -fr {} +
+	find . -name '*.egg' -exec rm -fr {} +
+
+clean-pyc: ## Clean cache files
+	find . -name '*.pyc' -exec rm -f {} +
+	find . -name '*.pyo' -exec rm -f {} +
+	find . -name '*~' -exec rm -f {} +
+	find . -name '__pycache__' -exec rm -fr {} +
+
+clean-test: ## clean test files
+	find . -name '.pytest_cache' -exec rm -fr {} +
+	rm -fr .tox/
+	rm -f .coverage
+	rm -fr htmlcov/
+
+build-req: clean ## Upgrade requirements
+	test -f venv/bin/activate || python3.8 -m venv  $(CURRENT_ABS_DIR)/venv ;\
+	. venv/bin/activate ;\
+	pip install pip==21.1.2 ;\
+	pip install -Ur requirements/prod.txt ;\
+	pip freeze | sort > requirements.txt
+
+install: clean ## Install python virtrual environment
+	test -f venv/bin/activate || python3.8 -m venv  $(CURRENT_ABS_DIR)/venv ;\
+	. venv/bin/activate ;\
+	pip install pip==21.1.2 ;\
+	pip install -Ur requirements.txt
+
+install-dev: ## Install local application
+	. venv/bin/activate ; \
+	pip install -Ur requirements/dev.txt; \
+	pip install -e .
+
+#################################################################################
+# COMMANDS - CI                                                                 #
+#################################################################################
+ci: pylint flake8 test ## CI flow
+
+pylint: ## Linting with pylint
+	. venv/bin/activate && pylint --rcfile=setup.cfg  notebookreport.py
+
+flake8: ## Linting with flake8
+	. venv/bin/activate && flake8 notebookreport.py
+
+lint: pylint flake8 ## run all lint type scripts
+
+test: ## Unit testing
+	. venv/bin/activate && pytest
+
+mac-cov: local-test ## Run the coverage report and display in a browser window (mac)
+	open -a "Google Chrome" htmlcov/index.html
+
+#################################################################################
+# COMMANDS - CD
+# expects the terminal to be docker login
+# expects export OPENSHIFT_DOCKER_REGISTRY=""
+# expects export OPENSHIFT_SA_NAME="$(oc whoami)"
+# expects export OPENSHIFT_SA_TOKEN="$(oc whoami -t)"
+# expects export OPENSHIFT_REPOSITORY=""
+# expects export TAG_NAME="dev/test/prod"
+# expects export OPS_REPOSITORY=""                                                        #
+#################################################################################
+cd: ## CD flow
+ifeq ($(TAG_NAME), test)
+cd: update-env
+	oc -n "$(OPENSHIFT_REPOSITORY)-tools" tag $(DOCKER_NAME):dev $(DOCKER_NAME):$(TAG_NAME)
+else ifeq ($(TAG_NAME), prod)
+cd: update-env
+	oc -n "$(OPENSHIFT_REPOSITORY)-tools" tag $(DOCKER_NAME):$(TAG_NAME) $(DOCKER_NAME):$(TAG_NAME)-$(shell date +%F)
+	oc -n "$(OPENSHIFT_REPOSITORY)-tools" tag $(DOCKER_NAME):test $(DOCKER_NAME):$(TAG_NAME)
+else
+TAG_NAME=dev
+cd: build update-env tag
+endif
+
+build: ## Build the docker container
+	docker build . -t $(DOCKER_NAME) \
+		--build-arg VCS_REF=$(shell git rev-parse --short HEAD) \
+		--build-arg BUILD_DATE=$(shell date -u +"%Y-%m-%dT%H:%M:%SZ") \
+
+build-nc: ## Build the docker container without caching
+	docker build --no-cache -t $(DOCKER_NAME) .
+
+REGISTRY_IMAGE=$(OPENSHIFT_DOCKER_REGISTRY)/$(OPENSHIFT_REPOSITORY)-tools/$(DOCKER_NAME)
+push: #build ## Push the docker container to the registry & tag latest
+	@echo "$(OPENSHIFT_SA_TOKEN)" | docker login $(OPENSHIFT_DOCKER_REGISTRY) -u $(OPENSHIFT_SA_NAME) --password-stdin ;\
+    docker tag $(DOCKER_NAME) $(REGISTRY_IMAGE):latest ;\
+    docker push $(REGISTRY_IMAGE):latest
+
+VAULTS=`cat devops/vaults.json`
+update-env: ## Update env from 1pass
+	oc -n "$(OPS_REPOSITORY)-$(TAG_NAME)" exec "dc/vault-service-$(TAG_NAME)" -- ./scripts/1pass.sh \
+		-m "secret" \
+		-e "$(TAG_NAME)" \
+		-a "$(DOCKER_NAME)-$(TAG_NAME)" \
+		-n "$(OPENSHIFT_REPOSITORY)-$(TAG_NAME)" \
+		-v "$(VAULTS)" \
+		-r "false" \
+		-f "false"
+
+tag: push ## tag image
+	oc -n "$(OPENSHIFT_REPOSITORY)-tools" tag $(DOCKER_NAME):latest $(DOCKER_NAME):$(TAG_NAME)
+
+#################################################################################
+# COMMANDS - Local                                                              #
+#################################################################################
+run: db ## Run the project in local
+	. venv/bin/activate && python -m flask run -p 5000
+
+db: ## Update the local database
+	. venv/bin/activate && python -m manage.py db upgrade
+
+#################################################################################
+# Self Documenting Commands                                                     #
+#################################################################################
+.PHONY: help
+
+.DEFAULT_GOAL := help
+
+help:
+	@grep -E '^[a-zA-Z_-]+:.*?## .*$$' $(MAKEFILE_LIST) | sort | awk 'BEGIN {FS = ":.*?## "}; {printf "\033[36m%-30s\033[0m %s\n", $$1, $$2}'
diff --git a/jobs/nr-duplicates-report/README.md b/jobs/nr-duplicates-report/README.md
new file mode 100644
index 000000000..f9967241f
--- /dev/null
+++ b/jobs/nr-duplicates-report/README.md
@@ -0,0 +1,78 @@
+# Notebook Report
+
+Generate notebook report
+
+## Development Environment
+
+Follow the instructions of the [Development Readme](https://github.com/bcgov/entity/blob/master/docs/development.md)
+to setup your local development environment.
+
+## Development Setup
+
+1. Follow the [instructions](https://github.com/bcgov/entity/blob/master/docs/setup-forking-workflow.md) to checkout the project from GitHub.
+2. Open the nr-duplicates-report directory in VS Code to treat it as a project (or WSL projec). To prevent version clashes, set up a virtual environment to install the Python packages used by this project.
+3. Run `make setup` to set up the virtual environment and install libraries.
+
+## Running Notebook Report
+
+1. Run `. venv/bin/activate` to change to `venv` environment.
+2. Run notebook with `python notebookreport.py`
+
+## Running Unit Tests
+
+1. Run `python -m pytest` or `pytest` command.
+
+### Important: Please remember to do "git update-index --add --chmod=+x run.sh" before run.sh is commit to github on first time. 
+### Build API - can be done in VS Code
+
+1. Login to openshift
+
+   ```sh
+   oc login xxxxxxx
+   ```
+
+2. switch to tools namespace
+
+   ```sh
+   oc project f2b77c-tools
+   ```
+
+3. Create build image
+
+   ```sh
+   oc process -f openshift/templates/bc.yaml \
+	  -p GIT_REPO_URL=https://github.com/bcgov/namex.git \
+	  -p GIT_REF=main \
+	  -o yaml \
+   | oc apply -f - -n f2b77c-tools     
+   ```
+
+4. Checking log for building process at Console => Administrator => Builds => Builds => click image 'nr-duplicates-report' => logs
+
+5. Tag image to dev: 'oc tag nr-duplicates-report:latest nr-duplicates-report:dev'
+
+
+### Create cron
+
+1. Login to openshift
+
+   ```sh
+   oc login xxxxxxx
+   ```
+
+2. switch to dev namespace
+
+   ```sh
+   oc project f2b77c-dev
+   ```
+
+3. Create cron
+   ### please remember that SCHEDULE is UTC which is 7 hour ahead of PST
+   ```sh
+   oc process -f openshift/templates/cronjob.yaml \
+     -p TAG=dev \
+     -p SCHEDULE="30 14 * * *" \
+     -o yaml \
+     | oc apply -f - -n f2b77c-dev
+   ```
+4. Create a job to run and test it: 'oc create job nr-duplicates-report-dev-1 --from=cronjob/nr-duplicates-report-dev -n f2b77c-dev'
diff --git a/jobs/nr-duplicates-report/config.py b/jobs/nr-duplicates-report/config.py
new file mode 100644
index 000000000..dbc14e96e
--- /dev/null
+++ b/jobs/nr-duplicates-report/config.py
@@ -0,0 +1,29 @@
+import os
+from dotenv import load_dotenv, find_dotenv
+
+# this will load all the envars from a .env file located in the project root (api)
+load_dotenv(find_dotenv())
+
+
+class Config(object):
+    PROJECT_ROOT = os.path.abspath(os.path.dirname(__file__))
+    APP_FILE = os.getenv('APP_FILE', '')
+    SENDER_EMAIL = os.getenv('SENDER_EMAIL', '')
+    ERROR_EMAIL_RECIPIENTS = os.getenv('ERROR_EMAIL_RECIPIENTS', '')
+    REPORT_RECIPIENTS = os.getenv('REPORT_RECIPIENTS', '')
+    EMAIL_SMTP = os.getenv('EMAIL_SMTP', '')
+    ENVIRONMENT = os.getenv('ENVIRONMENT', '')
+
+    # POSTGRESQL
+    PG_USER = os.getenv('PG_USER', '')
+    PG_PASSWORD = os.getenv('PG_PASSWORD', '')
+    PG_NAME = os.getenv('PG_DB_NAME', '')
+    PG_HOST = os.getenv('PG_HOST', '')
+    PG_PORT = os.getenv('PG_PORT', '5432')
+    SQLALCHEMY_DATABASE_URI = 'postgresql://{user}:{password}@{host}:{port}/{name}'.format(
+        user=PG_USER,
+        password=PG_PASSWORD,
+        host=PG_HOST,
+        port=int(PG_PORT),
+        name=PG_NAME,
+    )
diff --git a/jobs/nr-duplicates-report/daily/nr-duplicates.ipynb b/jobs/nr-duplicates-report/daily/nr-duplicates.ipynb
new file mode 100644
index 000000000..5ae6682b6
--- /dev/null
+++ b/jobs/nr-duplicates-report/daily/nr-duplicates.ipynb
@@ -0,0 +1,330 @@
+{
+ "cells": [
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "collapsed": false
+   },
+   "source": [
+    "# NameX Daily Stats"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "collapsed": false
+   },
+   "source": [
+    "We need to load in these libraries into our notebook in order to query, load, manipulate and view the data"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {
+    "collapsed": false,
+    "pycharm": {
+     "is_executing": false,
+     "name": "#%%\n"
+    }
+   },
+   "outputs": [],
+   "source": [
+    "import os\n",
+    "import pandas as pd\n",
+    "import csv\n",
+    "from datetime import datetime, timedelta\n",
+    "\n",
+    "%load_ext sql\n",
+    "%config SqlMagic.displaylimit = 5"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "collapsed": false
+   },
+   "source": [
+    "This will create the connection to the database and prep the jupyter magic for SQL..."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {
+    "collapsed": false,
+    "pycharm": {
+     "is_executing": false,
+     "name": "#%%\n"
+    }
+   },
+   "outputs": [],
+   "source": [
+    "namex_db = 'postgresql://' + \\\n",
+    "                os.getenv('PG_USER', '') + \":\" + os.getenv('PG_PASSWORD', '') +'@' + \\\n",
+    "                os.getenv('PG_HOST', '') + ':' + os.getenv('PG_PORT', '5432') + '/' + os.getenv('PG_DB_NAME', '');\n",
+    "\n",
+    "%sql $namex_db\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "pay_db = 'postgresql://' + \\\n",
+    "                os.getenv('PAY_USER', '') + \":\" + os.getenv('PAY_PASSWORD', '') +'@' + \\\n",
+    "                os.getenv('PAY_HOST', '') + ':' + os.getenv('PAY_PORT', '5432') + '/' + os.getenv('PAY_DB_NAME', '');\n",
+    "\n",
+    "%sql $pay_db"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "collapsed": false
+   },
+   "source": [
+    "Simplest query to run to ensure our libraries are loaded and our DB connection is working"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {
+    "collapsed": false,
+    "pycharm": {
+     "is_executing": false,
+     "name": "#%%\n"
+    }
+   },
+   "outputs": [],
+   "source": [
+    "%%sql $namex_db\n",
+    "select now() AT TIME ZONE 'PST' as current_date"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "collapsed": false
+   },
+   "source": [
+    "Daily totals for specified date: Following query, 'current_date - 0' means today, 'current_date - 1' means yesterday, 'current_date - 2' means the day before yesterday..."
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "Set the number of days we want the report to be run over."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "number_of_days_nr=int(os.getenv('NUMBER_OF_DAYS_NR', '1')) \n",
+    "report_start_date=datetime.strftime(datetime.now()-timedelta(number_of_days_nr), '%Y-%m-%d')\n",
+    "\n",
+    "number_of_days_payment=int(os.getenv('NUMBER_OF_DAYS_PAYMENT', '1'))  \n",
+    "payments_start_date=datetime.strftime(datetime.now()-timedelta(number_of_days_payment), '%Y-%m-%d')"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## get all duplicate names"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "%%sql $namex_db name_requests  <<\n",
+    "select distinct\n",
+    "r.id, r.nr_num, r.priority_cd as priority, r.state_cd as nr_state,r.submitted_date,r.source,r.previous_request_id as resubmit,\n",
+    "n.name,\n",
+    "a.first_name||' '||a.last_name as customer_name, a.phone_number, a.email_address\n",
+    "from requests r, names n, applicants a\n",
+    "where r.id = n.nr_id\n",
+    "and r.id = a.nr_id\n",
+    "and r.submitted_date::date >= :report_start_date\n",
+    "and r.state_cd <> 'PENDING_DELETION'\n",
+    "and r.nr_num not like 'NR L%'\n",
+    "and\n",
+    "n.choice=1\n",
+    "and\n",
+    "n.name in (\n",
+    "\n",
+    "select \n",
+    "n.name\n",
+    "from requests r, names n\n",
+    "where r.id = n.nr_id\n",
+    "and\n",
+    "r.submitted_date::date >= :report_start_date\n",
+    "-- and r.state_cd in ('DRAFT','HOLD','PENDING_PAYMENT','CANCELLED')\n",
+    "-- and r.state_cd in ('DRAFT','HOLD','PENDING_PAYMENT')\n",
+    "and r.state_cd not in ('PENDING_DELETION')\n",
+    "--and n.choice=1\n",
+    "group by n.name\n",
+    "having count(n.name) > 1\n",
+    ")\n",
+    "order by n.name\n",
+    ";"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "nr_frame = name_requests.DataFrame()\n",
+    "filename = os.path.join(os.getcwd(), r'data/')+'nr_duplicates_namex_' + report_start_date +'.csv'\n",
+    "\n",
+    "with open(filename, 'w') as f:\n",
+    "    if not nr_frame.empty:\n",
+    "        nr_frame.to_csv(f, sep=',', encoding='utf-8', index=False)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## get all payments"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "%%sql $pay_db paid  <<\n",
+    "SELECT i.business_identifier, \n",
+    "       i.id                  invoice_id, \n",
+    "       i.created_on,\n",
+    "       ir.invoice_number, \n",
+    "       i.invoice_status_code invoice_status, \n",
+    "       p.payment_status_code pay_status, \n",
+    "       i.total, \n",
+    "       i.paid, \n",
+    "       r.receipt_number \n",
+    "FROM   invoices i \n",
+    "       LEFT OUTER JOIN invoice_references ir \n",
+    "                    ON ir.invoice_id = i.id \n",
+    "       LEFT OUTER JOIN payments p \n",
+    "                    ON p.invoice_number = ir.invoice_number \n",
+    "       LEFT OUTER JOIN receipts r \n",
+    "                    ON r.invoice_id = i.id \n",
+    "WHERE \n",
+    "  created_on >=:payments_start_date\n",
+    "  and i.invoice_status_code = 'PAID'\n",
+    "  and i.business_identifier like 'NR%'\n",
+    "  and i.paid <> 101.5\n",
+    "ORDER  BY invoice_id ASC;"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "paid_frame = paid.DataFrame()\n",
+    "if not paid_frame.empty:    \n",
+    "    paid_frame['nr_num']=paid_frame['business_identifier']\n",
+    "\n",
+    "payment_filename = os.path.join(os.getcwd(), r'data/')+'nr_duplicates_payment_' + payments_start_date +'.csv'\n",
+    "\n",
+    "with open(payment_filename, 'w') as f:\n",
+    "    if paid_frame.empty:\n",
+    "        writer = csv.writer(f)\n",
+    "        writer.writerow(('No Data Retrieved',''))        \n",
+    "    else:\n",
+    "        paid_frame.to_csv(f, sep=',', encoding='utf-8', index=False)    "
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Merge the Duplicate Names with Payment information"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "if not nr_frame.empty and not paid_frame.empty:  \n",
+    "    result_frame = pd.merge(nr_frame, paid_frame, how='left', on=['nr_num'])\n",
+    "    result_frame=result_frame.drop(['id','business_identifier','created_on','invoice_number','total','receipt_number'], axis=1)      \n",
+    "elif not nr_frame.empty:\n",
+    "    result_frame = nr_frame.drop(['id'], axis=1)\n",
+    "else: \n",
+    "    result_frame = pd.DataFrame([])"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "merged_filename = os.path.join(os.getcwd(), r'data/')+'nr_duplicates_' + report_start_date +'.csv'\n",
+    "\n",
+    "with open(merged_filename, 'w') as f:\n",
+    "    if result_frame.empty:\n",
+    "        writer = csv.writer(f)\n",
+    "        writer.writerow(('No Data Retrieved','')) \n",
+    "    else: \n",
+    "        result_frame.to_csv(f, sep=',', encoding='utf-8', index=False)"
+   ]
+  }
+ ],
+ "metadata": {
+  "celltoolbar": "Tags",
+  "kernelspec": {
+   "display_name": "Python 3.10.4 64-bit",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.10.4"
+  },
+  "pycharm": {
+   "stem_cell": {
+    "cell_type": "raw",
+    "metadata": {
+     "collapsed": false
+    },
+    "source": []
+   }
+  },
+  "vscode": {
+   "interpreter": {
+    "hash": "fcb35bce15c55b4cacb5112e543368f86c7f98ed17acd45e6841ee83ed1df6e3"
+   }
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 2
+}
diff --git a/jobs/nr-duplicates-report/devops/vaults.json b/jobs/nr-duplicates-report/devops/vaults.json
new file mode 100644
index 000000000..9d1f03d34
--- /dev/null
+++ b/jobs/nr-duplicates-report/devops/vaults.json
@@ -0,0 +1,10 @@
+[
+    {
+        "vault": "namex",
+        "application": [
+            "postgres-namex",
+            "postgres-pay",
+            "nr-duplicates-report"
+        ]
+    }
+]
\ No newline at end of file
diff --git a/jobs/nr-duplicates-report/logging.conf b/jobs/nr-duplicates-report/logging.conf
new file mode 100644
index 000000000..ffc1a01e3
--- /dev/null
+++ b/jobs/nr-duplicates-report/logging.conf
@@ -0,0 +1,28 @@
+[loggers]
+keys=root,api
+
+[handlers]
+keys=console
+
+[formatters]
+keys=simple
+
+[logger_root]
+level=DEBUG
+handlers=console
+
+[logger_api]
+level=DEBUG
+handlers=console
+qualname=api
+propagate=0
+
+[handler_console]
+class=StreamHandler
+level=DEBUG
+formatter=simple
+args=(sys.stdout,)
+
+[formatter_simple]
+format=%(asctime)s - %(name)s - %(levelname)s in %(module)s:%(filename)s:%(lineno)d - %(funcName)s: %(message)s
+datefmt=
\ No newline at end of file
diff --git a/jobs/nr-duplicates-report/notebookreport.py b/jobs/nr-duplicates-report/notebookreport.py
new file mode 100644
index 000000000..212b5467f
--- /dev/null
+++ b/jobs/nr-duplicates-report/notebookreport.py
@@ -0,0 +1,136 @@
+"""s2i based launch script to run the notebook."""
+import fnmatch
+import logging
+import os
+import smtplib
+import sys
+import traceback
+from datetime import datetime, timedelta
+from email import encoders
+from email.mime.base import MIMEBase
+from email.mime.multipart import MIMEMultipart
+from email.mime.text import MIMEText
+
+import papermill as pm
+from flask import Flask, current_app
+
+from config import Config
+from util.logging import setup_logging
+
+setup_logging(os.path.join(os.path.abspath(os.path.dirname(
+    __file__)), 'logging.conf'))  # important to do this first
+
+# Notebook Scheduler
+# ---------------------------------------
+# This script helps with the automated processing of Jupyter Notebooks via
+# papermill (https://github.com/nteract/papermill/)
+
+
+def create_app(config=Config):
+    """create_app."""
+    app = Flask(__name__)
+    app.config.from_object(config)
+    app.app_context().push()
+    current_app.logger.debug(
+        'created the Flask App and pushed the App Context')
+
+    return app
+
+
+def findfiles(directory, pattern):
+    """findfiles."""
+    # Lists all files in the specified directory that match the specified pattern
+    for filename in os.listdir(directory):
+        if fnmatch.fnmatch(filename.lower(), pattern):
+            yield os.path.join(directory, filename)
+
+
+def send_email(emailtype, errormessage):
+    """Send email."""
+    number_of_days_nr = int(os.getenv('NUMBER_OF_DAYS_NR', '1'))
+    date = datetime.strftime(
+        datetime.now()-timedelta(number_of_days_nr), '%Y-%m-%d')
+    ext = ''
+    if os.getenv('ENVIRONMENT', '') != 'prod':
+        ext = ' on ' + os.getenv('ENVIRONMENT', '')
+
+    message = MIMEMultipart()
+    sender_email = os.getenv('SENDER_EMAIL', '')
+
+    if emailtype == 'ERROR':
+        subject = 'NR Duplicates Report Error Notification on ' + date + ext
+        filename = ''
+        recipients = os.getenv('ERROR_EMAIL_RECIPIENTS', '')
+        message.attach(MIMEText('ERROR!!! \n' + errormessage, 'plain'))
+    else:
+        subject = 'NR Duplicates Report ' + date + ext
+        filename = 'nr_duplicates_' + date + '.csv'
+        recipients = os.getenv('DAILY_REPORT_RECIPIENTS', '')
+        # Add body to email
+        message.attach(MIMEText('Please see attached.', 'plain'))
+
+        # Open file in binary mode
+        with open(os.path.join(os.getcwd(), r'data/')+filename, 'rb') as attachment:
+            # Add file as application/octet-stream
+            # Email client can usually download this automatically as attachment
+            part = MIMEBase('application', 'octet-stream')
+            part.set_payload(attachment.read())
+
+        # Encode file in ASCII characters to send by email
+        encoders.encode_base64(part)
+
+        # Add header as key/value pair to attachment part
+        part.add_header(
+            'Content-Disposition',
+            f'attachment; filename= {filename}',
+        )
+
+        # Add attachment to message and convert message to string
+        message.attach(part)
+
+    message['Subject'] = subject
+    server = smtplib.SMTP(os.getenv('EMAIL_SMTP', ''))
+    email_list = []
+    email_list = recipients.strip('][').split(', ')
+    logging.info('Email recipients list is: %s', email_list)
+    server.sendmail(sender_email, email_list, message.as_string())
+    logging.info(
+        'Email with subject \"%s\" has been sent successfully!', subject)
+    server.quit()
+
+
+def processnotebooks(notebookdirectory, data_dir):
+    """Process Notebook."""
+    status = False
+    logging.info('NR Duploicates Report start processing directory: %s',
+                 notebookdirectory)
+
+    try:
+        pm.execute_notebook(os.path.join(notebookdirectory, 'nr-duplicates.ipynb'),
+                            data_dir + 'temp.ipynb', parameters=None)
+
+        # send email to receivers and remove files/directories which we don't want to keep
+        send_email('', '')
+        os.remove(data_dir+'temp.ipynb')
+        status = True
+    except Exception:  # noqa: B902
+        logging.exception(
+            'NR Duplicates Report Error processing %s.', notebookdirectory)
+        send_email('ERROR', traceback.format_exc())
+    return status
+
+
+if __name__ == '__main__':
+    start_time = datetime.utcnow()
+
+    temp_dir = os.path.join(os.getcwd(), r'data/')
+    if not os.path.exists(temp_dir):
+        os.makedirs(temp_dir)
+
+    processnotebooks('daily', temp_dir)
+    # shutil.rmtree(temp_dir)
+
+    end_time = datetime.utcnow()
+    logging.info('job - jupyter notebook report completed in: %s',
+                 end_time - start_time)
+    sys.exit()
diff --git a/jobs/nr-duplicates-report/openshift/Readme.md b/jobs/nr-duplicates-report/openshift/Readme.md
new file mode 100644
index 000000000..af020295a
--- /dev/null
+++ b/jobs/nr-duplicates-report/openshift/Readme.md
@@ -0,0 +1,7 @@
+# buildconfig
+oc process -f openshift/templates/bc.yaml -o yaml | oc apply -f - -n f2b77c-tools
+# cronjob
+oc process -f openshift/templates/cronjob.yaml -o yaml | oc apply -f - -n f2b77c-dev
+oc process -f openshift/templates/cronjob.yaml -p TAG=test -o yaml | oc apply -f - -n f2b77c-test
+oc process -f openshift/templates/cronjob.yaml -p TAG=prod -o yaml | oc apply -f - -n f2b77c-prod
+
diff --git a/jobs/nr-duplicates-report/openshift/templates/bc.yaml b/jobs/nr-duplicates-report/openshift/templates/bc.yaml
new file mode 100644
index 000000000..33d503260
--- /dev/null
+++ b/jobs/nr-duplicates-report/openshift/templates/bc.yaml
@@ -0,0 +1,122 @@
+apiVersion: template.openshift.io/v1
+kind: Template
+metadata:
+  labels:
+    name: ${NAME}
+  name: ${NAME}-build
+objects:
+- apiVersion: v1
+  kind: ImageStream
+  metadata:
+    name: ${NAME}
+    labels:
+      name: ${NAME}
+- apiVersion: v1
+  kind: BuildConfig
+  metadata:
+    name: ${NAME}
+    labels:
+      name: ${NAME}
+  spec:
+    output:
+      to:
+        kind: ImageStreamTag
+        name: ${NAME}:${OUTPUT_IMAGE_TAG}
+    resources:
+      limits:
+        cpu: ${CPU_LIMIT}
+        memory: ${MEMORY_LIMIT}
+      requests:
+        cpu: ${CPU_REQUEST}
+        memory: ${MEMORY_REQUEST}
+    runPolicy: Serial
+    source:
+      contextDir: ${SOURCE_CONTEXT_DIR}
+      git:
+        ref: ${GIT_REF}
+        uri: ${GIT_REPO_URL}
+      dockerfile: |
+        FROM docker-remote.artifacts.developer.gov.bc.ca/python:3.8.6-buster
+        USER root
+
+        # Create working directory
+        RUN mkdir /opt/app-root && chmod 755 /opt/app-root
+        RUN mkdir /opt/app-root/data && chmod 777 /opt/app-root/data
+        WORKDIR /opt/app-root
+
+        # Install the requirements
+        COPY ./requirements.txt .
+
+        #RUN pip install --upgrade pip
+        RUN pip install pip==20.1.1
+        RUN pip install --no-cache-dir -r requirements.txt
+
+        COPY . .
+
+        USER 1001
+
+        # Set Python path
+        ENV PYTHONPATH=/opt/app-root/src
+
+        EXPOSE 8080
+
+        CMD [ "python", "/opt/app-root/notebookreport.py" ]
+      type: Git
+    strategy:
+      type: Docker
+      dockerStrategy:
+        pullSecret:
+          name: artifactory-creds
+
+    triggers:
+    - type: ConfigChange
+parameters:
+- description: |
+    The name assigned to all of the objects defined in this template.
+    You should keep this as default unless your know what your doing.
+  displayName: Name
+  name: NAME
+  required: true
+  value: nr-duplicates-report
+- description: |
+    The URL to your GIT repo, don't use the this default unless
+    your just experimenting.
+  displayName: Git Repo URL
+  name: GIT_REPO_URL
+  required: true
+  value: https://github.com/bcgov/namex.git
+- description: The git reference or branch.
+  displayName: Git Reference
+  name: GIT_REF
+  required: true
+  value: main
+- description: The source context directory.
+  displayName: Source Context Directory
+  name: SOURCE_CONTEXT_DIR
+  required: false
+  value: jobs/nr-duplicates-report
+- description: The tag given to the built image.
+  displayName: Output Image Tag
+  name: OUTPUT_IMAGE_TAG
+  required: true
+  value: latest
+- description: The resources CPU limit (in cores) for this build.
+  displayName: Resources CPU Limit
+  name: CPU_LIMIT
+  required: true
+  value: "2"
+- description: The resources Memory limit (in Mi, Gi, etc) for this build.
+  displayName: Resources Memory Limit
+  name: MEMORY_LIMIT
+  required: true
+  value: 2Gi
+- description: The resources CPU request (in cores) for this build.
+  displayName: Resources CPU Request
+  name: CPU_REQUEST
+  required: true
+  value: "1"
+- description: The resources Memory request (in Mi, Gi, etc) for this build.
+  displayName: Resources Memory Request
+  name: MEMORY_REQUEST
+  required: true
+  value: 2Gi
diff --git a/jobs/nr-duplicates-report/openshift/templates/cronjob.yaml b/jobs/nr-duplicates-report/openshift/templates/cronjob.yaml
new file mode 100644
index 000000000..e5ec889a9
--- /dev/null
+++ b/jobs/nr-duplicates-report/openshift/templates/cronjob.yaml
@@ -0,0 +1,201 @@
+apiVersion: template.openshift.io/v1
+kind: Template
+metadata:
+  labels:
+    name: ${NAME}
+  name: ${NAME}-cronjob
+objects:
+- kind: "CronJob"
+  apiVersion: "batch/v1beta1"
+  metadata:
+    name: "${NAME}-${TAG}"
+    labels:
+      name: "${NAME}"
+      environment: "${TAG}"
+      role: "${ROLE}"
+  spec:
+    schedule: "${SCHEDULE}"
+    concurrencyPolicy: "Forbid"
+    successfulJobsHistoryLimit: "${{SUCCESS_JOBS_HISTORY_LIMIT}}"
+    failedJobsHistoryLimit: "${{FAILED_JOBS_HISTORY_LIMIT}}"
+    jobTemplate:
+      metadata:
+        labels:
+          name: "${NAME}"
+          environment: "${TAG}"
+          role: "${ROLE}"
+      spec:
+        backoffLimit: ${{JOB_BACKOFF_LIMIT}}
+        template:
+          metadata:
+            labels:
+              name: "${NAME}"
+              environment: "${TAG}"
+              role: "${ROLE}"
+          spec:
+            containers:
+              - name: "${NAME}-${TAG}"
+                image: "${IMAGE_REGISTRY}/${IMAGE_NAMESPACE}/${NAME}:${TAG}"
+                imagePullPolicy: Always
+                command:
+                  - /bin/sh
+                  - -c
+                  - cd /opt/app-root; ./run.sh
+                env:
+                  - name: PG_USER
+                    valueFrom:
+                      secretKeyRef:
+                        name: ${NAME}-${TAG}-secret
+                        key: DATABASE_USERNAME
+                  - name: PG_PASSWORD
+                    valueFrom:
+                      secretKeyRef:
+                        name: ${NAME}-${TAG}-secret
+                        key: DATABASE_PASSWORD
+                  - name: PG_DB_NAME
+                    valueFrom:
+                      secretKeyRef:
+                        name: ${NAME}-${TAG}-secret
+                        key: DATABASE_NAME
+                  - name: PG_HOST
+                    valueFrom:
+                      secretKeyRef:
+                        name: ${NAME}-${TAG}-secret
+                        key: DATABASE_HOST
+                  - name: PG_PORT
+                    valueFrom:
+                      secretKeyRef:
+                        name: ${NAME}-${TAG}-secret
+                        key: DATABASE_PORT
+                                    - name: PG_USER
+                  - name: PAY_USER  
+                    valueFrom:
+                      secretKeyRef:
+                        name: ${NAME}-${TAG}-secret
+                        key: PAY_USER
+                  - name: PAY_PASSWORD
+                    valueFrom:
+                      secretKeyRef:
+                        name: ${NAME}-${TAG}-secret
+                        key: PAY_PASSWORD
+                  - name: PAY_DB_NAME
+                    valueFrom:
+                      secretKeyRef:
+                        name: ${NAME}-${TAG}-secret
+                        key: PAY_DB_NAME
+                  - name: PAY_HOST
+                    valueFrom:
+                      secretKeyRef:
+                        name: ${NAME}-${TAG}-secret
+                        key: PAY_HOST
+                  - name: PAY_PORT
+                    valueFrom:
+                      secretKeyRef:
+                        name: ${NAME}-${TAG}-secret
+                        key: PAY_PORT      
+                  - name: APP_FILE
+                    valueFrom:
+                      secretKeyRef:
+                        name: ${NAME}-${TAG}-secret
+                        key: APP_FILE
+                  - name: SENDER_EMAIL
+                    valueFrom:
+                      secretKeyRef:
+                        name: ${NAME}-${TAG}-secret
+                        key: SENDER_EMAIL
+                  - name: ERROR_EMAIL_RECIPIENTS
+                    valueFrom:
+                      secretKeyRef:
+                        name: ${NAME}-${TAG}-secret
+                        key: ERROR_EMAIL_RECIPIENTS
+                  - name: DAILY_REPORT_RECIPIENTS
+                    valueFrom:
+                      secretKeyRef:
+                        name: ${NAME}-${TAG}-secret
+                        key: DAILY_REPORT_RECIPIENTS
+                  - name: EMAIL_SMTP
+                    valueFrom:
+                      secretKeyRef:
+                        name: ${NAME}-${TAG}-secret
+                        key: EMAIL_SMTP
+                  - name: NUMBER_OF_DAYS_NR
+                    valueFrom:
+                      secretKeyRef:
+                        name: ${NAME}-${TAG}-secret
+                        key: NUMBER_OF_DAYS_NR
+                  - name: NUMBER_OF_DAYS_PAYMENT
+                    valueFrom:
+                      secretKeyRef:
+                        name: ${NAME}-${TAG}-secret
+                        key: NUMBER_OF_DAYS_PAYMENT
+                  - name: ENVIRONMENT
+                    valueFrom:
+                      secretKeyRef:
+                        name: ${NAME}-${TAG}-secret
+                        key: ENVIRONMENT  
+            restartPolicy: "Never"
+            terminationGracePeriodSeconds: 30
+            activeDeadlineSeconds: 1600
+            dnsPolicy: "ClusterFirst"
+parameters: 
+
+  - name: NAME
+    displayName: Name
+    description: The name assigned to all of the OpenShift resources associated to the server instance.
+    required: true
+    value: nr-duplicates-report
+
+  - name: TAG
+    displayName: Environment TAG name
+    description: The TAG name for this environment, e.g., dev, test, prod
+    value: dev
+    required: true
+
+  - name: ROLE
+    displayName: Role
+    description: Role
+    required: true
+    value: job
+
+  - name: NAMESPACE
+    displayName: Namespace Name
+    description: The base namespace name for the project.
+    required: true
+    value: f2b77c
+
+  - name: IMAGE_NAMESPACE
+    displayName: Image Namespace
+    required: true
+    description: The namespace of the OpenShift project containing the imagestream for the application.
+    value: f2b77c-tools
+
+  - name: IMAGE_REGISTRY
+    displayName: Image Registry
+    required: true
+    description: The image registry of the OpenShift project.
+    value: image-registry.openshift-image-registry.svc:5000
+
+  - name: "SCHEDULE"
+    displayName: "Cron Schedule"
+    description: "Cron Schedule to Execute the Job (using local cluster system TZ)"
+    value: "30 14 * * *"
+    required: true
+
+  - name: "SUCCESS_JOBS_HISTORY_LIMIT"
+    displayName: "Successful Job History Limit"
+    description: "The number of successful jobs that will be retained"
+    value: "5"
+    required: true
+
+  - name: "FAILED_JOBS_HISTORY_LIMIT"
+    displayName: "Failed Job History Limit"
+    description: "The number of failed jobs that will be retained"
+    value: "2"
+    required: true
+
+  - name: "JOB_BACKOFF_LIMIT"
+    displayName: "Job Backoff Limit"
+    description: "The number of attempts to try for a successful job outcome"
+    value: "0"
+    required: false
+
diff --git a/jobs/nr-duplicates-report/requirements.txt b/jobs/nr-duplicates-report/requirements.txt
new file mode 100644
index 000000000..7ee082ee9
--- /dev/null
+++ b/jobs/nr-duplicates-report/requirements.txt
@@ -0,0 +1,38 @@
+jupyter
+SQLAlchemy==1.3.16
+psycopg2-binary==2.8.5
+ipython-sql
+simplejson
+pandas
+matplotlib
+spacy
+papermill
+schedule
+attrs==19.2.0
+future==0.18.2
+Jinja2==3.0
+markupsafe==2.0.1
+python-dateutil==2.8.2
+pytz==2020.1
+importlib-metadata==3.6.0
+itsdangerous==2.0.1
+jsonschema==3.2.0
+more-itertools==7.2.0
+six==1.14.0
+pyrsistent==0.16.0
+zipp==3.1.0
+
+Flask==1.1.2
+pytest
+Click==7.1.2
+python-dotenv==0.13.0
+requests==2.23.0
+marshmallow==2.20.5
+Werkzeug==0.16.1
+certifi==2020.4.5.1
+urllib3==1.25.9
+idna==2.9
+pylint
+pylint-flask
+pep8
+autopep8
diff --git a/jobs/nr-duplicates-report/requirements/dev.txt b/jobs/nr-duplicates-report/requirements/dev.txt
new file mode 100644
index 000000000..ddfc0e591
--- /dev/null
+++ b/jobs/nr-duplicates-report/requirements/dev.txt
@@ -0,0 +1,13 @@
+# Everything the developer needs outside of the production requirements
+
+# Testing
+pytest
+
+# Lint and code style
+flake8<6.0
+flake8-blind-except
+flake8-debugger
+flake8-docstrings
+flake8-isort
+flake8-quotes
+pep8-naming
diff --git a/jobs/nr-duplicates-report/requirements/prod.txt b/jobs/nr-duplicates-report/requirements/prod.txt
new file mode 100644
index 000000000..b8892f7a5
--- /dev/null
+++ b/jobs/nr-duplicates-report/requirements/prod.txt
@@ -0,0 +1,43 @@
+jupyter
+SQLAlchemy==1.3.16
+psycopg2-binary==2.8.5
+ipython-sql
+simplejson
+pandas
+matplotlib
+spacy
+papermill
+schedule
+attrs==19.2.0
+future==0.18.2
+Jinja2==2.11.2
+markupsafe==2.0.1
+python-dateutil==2.8.1
+pytz==2019.3
+importlib-metadata==1.6.0
+itsdangerous==2.0.1
+jsonschema==3.2.0
+more-itertools==7.2.0
+six==1.14.0
+pyrsistent==0.16.0
+zipp==3.1.0
+
+Flask==1.1.2
+pytest
+Click==7.1.2
+python-dotenv==0.13.0
+requests==2.23.0
+marshmallow==2.20.5
+Werkzeug==0.16.1
+certifi==2020.4.5.1
+urllib3==1.25.9
+idna==2.9
+pylint
+pylint-flask
+pep8
+autopep8
+
+
+
+
+
diff --git a/jobs/nr-duplicates-report/run.sh b/jobs/nr-duplicates-report/run.sh
new file mode 100755
index 000000000..c7d725432
--- /dev/null
+++ b/jobs/nr-duplicates-report/run.sh
@@ -0,0 +1,4 @@
+#! /bin/sh
+cd /opt/app-root
+echo 'run notebookreport to generate Jupyter Notebook Report'
+python notebookreport.py
diff --git a/jobs/nr-duplicates-report/setup.cfg b/jobs/nr-duplicates-report/setup.cfg
new file mode 100644
index 000000000..4eb5d82e2
--- /dev/null
+++ b/jobs/nr-duplicates-report/setup.cfg
@@ -0,0 +1,14 @@
+[flake8]
+exclude = .git,*migrations*
+max-line-length = 120
+docstring-min-length=10
+per-file-ignores =
+    */__init__.py:F401
+
+[pylint]
+ignore=migrations,test
+max-line-length=120
+notes=FIXME,XXX,TODO
+ignored-modules=flask_sqlalchemy,sqlalchemy,SQLAlchemy,alembic,scoped_session
+ignored-classes=scoped_session
+disable=C0301,W0511,W0703,R0801,R0902,R0401
diff --git a/jobs/nr-duplicates-report/setup.py b/jobs/nr-duplicates-report/setup.py
new file mode 100644
index 000000000..7d3762486
--- /dev/null
+++ b/jobs/nr-duplicates-report/setup.py
@@ -0,0 +1,22 @@
+# Copyright © 2019 Province of British Columbia.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+"""Installer and setup for this module."""
+
+from setuptools import find_packages, setup
+
+
+setup(
+    name='nr-duplicates-report',
+    packages=find_packages()
+)
diff --git a/jobs/nr-duplicates-report/tests/unit/__init__.py b/jobs/nr-duplicates-report/tests/unit/__init__.py
new file mode 100644
index 000000000..76a6f8981
--- /dev/null
+++ b/jobs/nr-duplicates-report/tests/unit/__init__.py
@@ -0,0 +1 @@
+"""init for the notebook."""
diff --git a/jobs/nr-duplicates-report/tests/unit/conftest.py b/jobs/nr-duplicates-report/tests/unit/conftest.py
new file mode 100644
index 000000000..866977756
--- /dev/null
+++ b/jobs/nr-duplicates-report/tests/unit/conftest.py
@@ -0,0 +1,20 @@
+"""conftest for the notebook."""
+import pytest
+
+from config import Config
+from notebookreport import create_app
+
+
+@pytest.fixture(scope='session')
+def app(request):
+    """Init apps and Returns session-wide application."""
+    app = create_app(Config)
+
+    return app
+
+
+@pytest.fixture(scope='session')
+def client_ctx(app):
+    """Init client and returns session-wide Flask test client."""
+    with app.test_client() as c:
+        yield c
diff --git a/jobs/nr-duplicates-report/tests/unit/test_job.py b/jobs/nr-duplicates-report/tests/unit/test_job.py
new file mode 100644
index 000000000..508cd6bbf
--- /dev/null
+++ b/jobs/nr-duplicates-report/tests/unit/test_job.py
@@ -0,0 +1,48 @@
+"""test job for the notebook."""
+import os
+
+import psycopg2
+
+from notebookreport import processnotebooks
+
+
+def test_connection_failed():
+    """Test connection failed for the notebook."""
+    status = False
+    try:
+        connection = psycopg2.connect(user=os.getenv('FAKE_PG_USER', ''),
+                                      password=os.getenv('FAKE_PG_PASSWORD', ''),
+                                      host=os.getenv('FAKE_PG_HOST', ''),
+                                      port=os.getenv('FAKE_PG_PORT', '5432'),
+                                      database=os.getenv('FAKE_PG_DB_NAME', ''))
+
+        connection.cursor()
+        status = True
+    except Exception:  # noqa: B902
+        status = False
+    finally:
+        assert status is False
+
+
+def test_connection_succeed():
+    """Test connection succeed for the notebook report."""
+    status = False
+    try:
+        connection = psycopg2.connect(user=os.getenv('PG_USER', ''),
+                                      password=os.getenv('PG_PASSWORD', ''),
+                                      host=os.getenv('PG_HOST', ''),
+                                      port=os.getenv('PG_PORT', '5432'),
+                                      database=os.getenv('PG_DB_NAME', ''))
+        connection.cursor()
+        status = True
+    except Exception:  # noqa: B902
+        status = False
+    finally:
+        assert status is True
+
+
+def test_daily_notebook_report():
+    """Test daily notebook report."""
+    status = processnotebooks('daily')
+
+    assert status is True
diff --git a/jobs/nr-duplicates-report/util/__init__.py b/jobs/nr-duplicates-report/util/__init__.py
new file mode 100644
index 000000000..e69de29bb
diff --git a/jobs/nr-duplicates-report/util/logging.py b/jobs/nr-duplicates-report/util/logging.py
new file mode 100644
index 000000000..0bd53d163
--- /dev/null
+++ b/jobs/nr-duplicates-report/util/logging.py
@@ -0,0 +1,16 @@
+# setup logging - important to set it up first
+from config import Config
+from os import path
+import logging.config
+import sys
+
+
+def setup_logging(conf='logging.conf'):
+    # log_file_path = path.join(path.dirname(path.abspath(__file__)), conf)
+    log_file_path = path.join(Config.PROJECT_ROOT, conf)
+
+    if path.isfile(log_file_path):
+        logging.config.fileConfig(log_file_path)
+        print('Configure logging, from conf:{}'.format(log_file_path), file=sys.stderr)
+    else:
+        print('Unable to configure logging, attempted conf:{}'.format(log_file_path), file=sys.stderr)