diff --git a/.bandit.yml b/.bandit.yml new file mode 100644 index 0000000..2b618f6 --- /dev/null +++ b/.bandit.yml @@ -0,0 +1,13 @@ +--- +# Configuration file for the Bandit python security scanner +# https://bandit.readthedocs.io/en/latest/config.html + +# Tests are first included by `tests`, and then excluded by `skips`. +# If `tests` is empty, all tests are are considered included. + +tests: +# - B101 +# - B102 + +skips: + - B101 # skip "assert used" check since assertions are required in pytests diff --git a/.flake8 b/.flake8 new file mode 100644 index 0000000..92ff826 --- /dev/null +++ b/.flake8 @@ -0,0 +1,25 @@ +[flake8] +max-line-length = 80 +# Select (turn on) +# * Complexity violations reported by mccabe (C) - +# http://flake8.pycqa.org/en/latest/user/error-codes.html#error-violation-codes +# * Documentation conventions compliance reported by pydocstyle (D) - +# http://www.pydocstyle.org/en/stable/error_codes.html +# * Default errors and warnings reported by pycodestyle (E and W) - +# https://pycodestyle.readthedocs.io/en/latest/intro.html#error-codes +# * Default errors reported by pyflakes (F) - +# http://flake8.pycqa.org/en/latest/glossary.html#term-pyflakes +# * Default warnings reported by flake8-bugbear (B) - +# https://github.com/PyCQA/flake8-bugbear#list-of-warnings +# * The B950 flake8-bugbear opinionated warning - +# https://github.com/PyCQA/flake8-bugbear#opinionated-warnings +select = C,D,E,F,W,B,B950 +# Ignore flake8's default warning about maximum line length, which has +# a hard stop at the configured value. Instead we use +# flake8-bugbear's B950, which allows up to 10% overage. +# +# Also ignore flake8's warning about line breaks before binary +# operators. It no longer agrees with PEP8. See, for example, here: +# https://github.com/ambv/black/issues/21. Guido agrees here: +# https://github.com/python/peps/commit/c59c4376ad233a62ca4b3a6060c81368bd21e85b. +ignore = E501,W503 diff --git a/.github/CODEOWNERS b/.github/CODEOWNERS new file mode 100644 index 0000000..ab07ea9 --- /dev/null +++ b/.github/CODEOWNERS @@ -0,0 +1,10 @@ +# Each line is a file pattern followed by one or more owners. + +# These owners will be the default owners for everything in the +# repo. Unless a later match takes precedence, these owners will be +# requested for review when someone opens a pull request. +* @dav3r @jsf9k @mcdonnnj + +# These folks own any files in the .github directory at the root of +# the repository and any of its subdirectories. +/.github/ @dav3r @felddy @jsf9k @mcdonnnj diff --git a/.github/dependabot.yml b/.github/dependabot.yml new file mode 100644 index 0000000..fa93c02 --- /dev/null +++ b/.github/dependabot.yml @@ -0,0 +1,18 @@ +--- + +version: 2 +updates: + - package-ecosystem: "docker" + directory: "/" + schedule: + interval: "weekly" + + - package-ecosystem: "github-actions" + directory: "/" + schedule: + interval: "weekly" + + - package-ecosystem: "pip" + directory: "/" + schedule: + interval: "weekly" diff --git a/.github/lineage.yml b/.github/lineage.yml new file mode 100644 index 0000000..b10c80c --- /dev/null +++ b/.github/lineage.yml @@ -0,0 +1,6 @@ +--- +version: "1" + +lineage: + skeleton: + remote-url: https://github.com/cisagov/skeleton-docker.git diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml new file mode 100644 index 0000000..dbb6cc1 --- /dev/null +++ b/.github/workflows/build.yml @@ -0,0 +1,350 @@ +--- +name: build + +on: + push: + branches: + - '**' + tags: + - 'v*.*.*' + pull_request: + schedule: + - cron: '0 10 * * *' # everyday at 10am + repository_dispatch: + # Respond to rebuild requests. See: https://github.com/cisagov/action-apb/ + types: [apb] + workflow_dispatch: + inputs: + remote-shell: + description: "Debug with remote shell" + required: true + default: false + image-tag: + description: "Tag to apply to pushed images" + required: true + default: dispatch + +env: + BUILDX_CACHE_DIR: ~/.cache/buildx + IMAGE_NAME: cisagov/gatherer + PIP_CACHE_DIR: ~/.cache/pip + PLATFORMS: "linux/amd64,linux/arm/v6,linux/arm/v7,\ + linux/arm64,linux/ppc64le,linux/s390x" + PRE_COMMIT_CACHE_DIR: ~/.cache/pre-commit + +jobs: + lint: + # Checks out the source and runs pre-commit hooks. Detects coding errors + # and style deviations. + name: "Lint sources" + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v2 + - id: setup-python + uses: actions/setup-python@v2 + with: + python-version: 3.9 + - uses: actions/cache@v2 + env: + BASE_CACHE_KEY: "${{ github.job }}-${{ runner.os }}-\ + py${{ steps.setup-python.outputs.python-version }}-" + with: + path: | + ${{ env.PIP_CACHE_DIR }} + ${{ env.PRE_COMMIT_CACHE_DIR }} + key: "${{ env.BASE_CACHE_KEY }}\ + ${{ hashFiles('**/requirements-test.txt') }}-\ + ${{ hashFiles('**/requirements.txt') }}-\ + ${{ hashFiles('**/.pre-commit-config.yaml') }}" + restore-keys: | + ${{ env.BASE_CACHE_KEY }} + - name: Install dependencies + run: | + python -m pip install --upgrade pip + pip install --upgrade --requirement requirements-test.txt + - name: Set up pre-commit hook environments + run: pre-commit install-hooks + - name: Run pre-commit on all files + run: pre-commit run --all-files + + prepare: + # Calculates and publishes outputs that are used by other jobs. + # + # Outputs: + # created: + # The current date-time in RFC3339 format. + # repometa: + # The json metadata describing this repository. + # source_version: + # The source version as reported by the `bump_version.sh show` command. + # tags: + # A comma separated list of Docker tags to be applied to the images on + # DockerHub. The tags will vary depending on: + # - The event that triggered the build. + # - The branch the build is based upon. + # - The git tag the build is based upon. + # + # When a build is based on a git tag of the form `v*.*.*` the image will + # be tagged on DockerHub with multiple levels of version specificity. + # For example, a git tag of `v1.2.3+a` will generate Docker tags of + # `:1.2.3_a`, `:1.2.3`, `:1.2`, `:1`, and `:latest`. + # + # Builds targeting the default branch will be tagged with `:edge`. + # + # Builds from other branches will be tagged with the branch name. Solidi + # (`/` characters - commonly known as slashes) in branch names are + # replaced with hyphen-minuses (`-` characters) in the Docker tag. For + # more information about the solidus see these links: + # * https://www.compart.com/en/unicode/U+002F + # * https://en.wikipedia.org/wiki/Slash_(punctuation)#Encoding + # + # Builds triggered by a push event are tagged with a short hash in the + # form: sha-12345678 + # + # Builds triggered by a pull request are tagged with the pull request + # number in the form pr-123. + # + # Builds triggered using the GitHub GUI (workflow_dispatch) are tagged + # with the value specified by the user. + # + # Scheduled builds are tagged with `:nightly`. + name: "Prepare build variables" + runs-on: ubuntu-latest + outputs: + created: ${{ steps.prep.outputs.created }} + repometa: ${{ steps.repo.outputs.result }} + source_version: ${{ steps.prep.outputs.source_version }} + tags: ${{ steps.prep.outputs.tags }} + steps: + - uses: actions/checkout@v2 + - name: Gather repository metadata + id: repo + uses: actions/github-script@v3 + with: + script: | + const repo = await github.repos.get(context.repo) + return repo.data + - name: Calculate output values + id: prep + run: | + VERSION=noop + SEMVER="^v(0|[1-9][0-9]*)\.(0|[1-9][0-9]*)\.(0|[1-9][0-9]*)(-((0|[1-9][0-9]*|[0-9]*[a-zA-Z-][0-9a-zA-Z-]*)(\.(0|[1-9][0-9]*|[0-9]*[a-zA-Z-][0-9a-zA-Z-]*))*))?(\+([0-9a-zA-Z-]+(\.[0-9a-zA-Z-]+)*))?$" + if [ "${{ github.event_name }}" = "schedule" ]; then + VERSION=nightly + elif [ "${{ github.event_name }}" = "workflow_dispatch" ]; then + VERSION=${{ github.event.inputs.image-tag }} + elif [[ $GITHUB_REF == refs/tags/* ]]; then + VERSION=${GITHUB_REF#refs/tags/} + elif [[ $GITHUB_REF == refs/heads/* ]]; then + VERSION=$(echo ${GITHUB_REF#refs/heads/} | sed -r 's#/+#-#g') + if [ "${{ github.event.repository.default_branch }}" = "$VERSION" ]; + then + VERSION=edge + fi + elif [[ $GITHUB_REF == refs/pull/* ]]; then + VERSION=pr-${{ github.event.number }} + fi + if [[ $VERSION =~ $SEMVER ]]; then + VERSION_NO_V=${VERSION#v} + MAJOR="${BASH_REMATCH[1]}" + MINOR="${BASH_REMATCH[2]}" + PATCH="${BASH_REMATCH[3]}" + TAGS="${IMAGE_NAME}:${VERSION_NO_V//+/_},${IMAGE_NAME}:${MAJOR}.${MINOR}.${PATCH},${IMAGE_NAME}:${MAJOR}.${MINOR},${IMAGE_NAME}:${MAJOR},${IMAGE_NAME}:latest" + else + TAGS="${IMAGE_NAME}:${VERSION}" + fi + if [ "${{ github.event_name }}" = "push" ]; then + TAGS="${TAGS},${IMAGE_NAME}:sha-${GITHUB_SHA::8}" + fi + echo ::set-output name=created::$(date -u +'%Y-%m-%dT%H:%M:%SZ') + echo ::set-output name=source_version::$(./bump_version.sh show) + echo ::set-output name=tags::${TAGS} + echo tags=${TAGS} + - name: Setup debug session remote shell + uses: mxschmitt/action-tmate@v3 + if: github.event.inputs.remote-shell == 'true' + + build: + # Builds a single test image for the native platform. This image is saved + # as an artifact and loaded by the test job. + name: "Build test image" + runs-on: ubuntu-latest + needs: [prepare] + steps: + - name: Checkout + uses: actions/checkout@v2 + - name: Set up QEMU + uses: docker/setup-qemu-action@v1 + - name: Set up Docker Buildx + uses: docker/setup-buildx-action@v1 + - name: Cache Docker layers + uses: actions/cache@v2 + env: + BASE_CACHE_KEY: buildx-${{ runner.os }}- + with: + path: ${{ env.BUILDX_CACHE_DIR }} + key: ${{ env.BASE_CACHE_KEY }}${{ github.sha }} + restore-keys: | + ${{ env.BASE_CACHE_KEY }} + - name: Create dist directory + run: mkdir -p dist + - name: Build image + id: docker_build + uses: docker/build-push-action@v2 + with: + build-args: | + VERSION=${{ needs.prepare.outputs.source_version }} + cache-from: type=local,src=${{ env.BUILDX_CACHE_DIR }} + cache-to: type=local,dest=${{ env.BUILDX_CACHE_DIR }} + context: . + file: ./Dockerfile + outputs: type=docker,dest=dist/image.tar + tags: ${{ env.IMAGE_NAME }}:latest # not to be pushed + # For a list of pre-defined annotation keys and value types see: + # https://github.com/opencontainers/image-spec/blob/master/annotations.md + labels: "\ + org.opencontainers.image.created=${{ + needs.prepare.outputs.created }} + + org.opencontainers.image.description=${{ + fromJson(needs.prepare.outputs.repometa).description }} + + org.opencontainers.image.licenses=${{ + fromJson(needs.prepare.outputs.repometa).license.spdx_id }} + + org.opencontainers.image.revision=${{ github.sha }} + + org.opencontainers.image.source=${{ + fromJson(needs.prepare.outputs.repometa).clone_url }} + + org.opencontainers.image.title=${{ + fromJson(needs.prepare.outputs.repometa).name }} + + org.opencontainers.image.url=${{ + fromJson(needs.prepare.outputs.repometa).html_url }} + + org.opencontainers.image.version=${{ + needs.prepare.outputs.source_version }}" + - name: Compress image + run: gzip dist/image.tar + - name: Upload artifacts + uses: actions/upload-artifact@v2 + with: + name: dist + path: dist + + test: + # Executes tests on the single-platform image created in the "build" job. + name: "Test image" + runs-on: ubuntu-latest + needs: [build] + steps: + - uses: actions/checkout@v2 + - id: setup-python + uses: actions/setup-python@v2 + with: + python-version: 3.9 + - name: Cache testing environments + uses: actions/cache@v2 + env: + BASE_CACHE_KEY: "${{ github.job }}-${{ runner.os }}-\ + py${{ steps.setup-python.outputs.python-version }}-" + with: + path: ${{ env.PIP_CACHE_DIR }} + key: "${{ env.BASE_CACHE_KEY }}\ + ${{ hashFiles('**/requirements-test.txt') }}-\ + ${{ hashFiles('**/requirements.txt') }}" + restore-keys: | + ${{ env.BASE_CACHE_KEY }} + - name: Install dependencies + run: | + python -m pip install --upgrade pip + pip install --upgrade --requirement requirements-test.txt + - name: Download docker image artifact + uses: actions/download-artifact@v2 + with: + name: dist + path: dist + - name: Load docker image + run: docker load < dist/image.tar.gz + - name: Run tests + env: + RELEASE_TAG: ${{ github.event.release.tag_name }} + run: pytest --runslow + + build-push-all: + # Builds the final set of images for each of the platforms listed in + # PLATFORMS environment variable. These images are tagged with the Docker + # tags calculated in the "prepare" job and pushed to DockerHub. The + # contents of README.md is pushed as the image's description. This job is + # skipped when the triggering event is a pull request. + name: "Build and push all platforms" + runs-on: ubuntu-latest + needs: [lint, prepare, test] + if: github.event_name != 'pull_request' + steps: + - name: Login to DockerHub + uses: docker/login-action@v1 + with: + username: ${{ secrets.DOCKER_USERNAME }} + password: ${{ secrets.DOCKER_PASSWORD }} + - name: Checkout + uses: actions/checkout@v2 + - name: Set up QEMU + uses: docker/setup-qemu-action@v1 + - name: Set up Docker Buildx + uses: docker/setup-buildx-action@v1 + - name: Cache Docker layers + uses: actions/cache@v2 + env: + BASE_CACHE_KEY: buildx-${{ runner.os }}- + with: + path: ${{ env.BUILDX_CACHE_DIR }} + key: ${{ env.BASE_CACHE_KEY }}${{ github.sha }} + restore-keys: | + ${{ env.BASE_CACHE_KEY }} + - name: Create cross-platform support Dockerfile-x + run: ./buildx-dockerfile.sh + - name: Build and push platform images to Docker Hub + id: docker_build + uses: docker/build-push-action@v2 + with: + build-args: | + VERSION=${{ needs.prepare.outputs.source_version }} + cache-from: type=local,src=${{ env.BUILDX_CACHE_DIR }} + cache-to: type=local,dest=${{ env.BUILDX_CACHE_DIR }} + context: . + file: ./Dockerfile-x + platforms: ${{ env.PLATFORMS }} + push: true + tags: ${{ needs.prepare.outputs.tags }} + # For a list of pre-defined annotation keys and value types see: + # https://github.com/opencontainers/image-spec/blob/master/annotations.md + labels: "\ + org.opencontainers.image.created=${{ + needs.prepare.outputs.created }} + + org.opencontainers.image.description=${{ + fromJson(needs.prepare.outputs.repometa).description }} + + org.opencontainers.image.licenses=${{ + fromJson(needs.prepare.outputs.repometa).license.spdx_id }} + + org.opencontainers.image.revision=${{ github.sha }} + + org.opencontainers.image.source=${{ + fromJson(needs.prepare.outputs.repometa).clone_url }} + + org.opencontainers.image.title=${{ + fromJson(needs.prepare.outputs.repometa).name }} + + org.opencontainers.image.url=${{ + fromJson(needs.prepare.outputs.repometa).html_url }} + + org.opencontainers.image.version=${{ + needs.prepare.outputs.source_version }}" + - name: Publish README.md to Docker Hub + env: + DOCKER_PASSWORD: ${{ secrets.DOCKER_PASSWORD }} + DOCKER_USERNAME: ${{ secrets.DOCKER_USERNAME }} + run: ./push_readme.sh diff --git a/.github/workflows/codeql-analysis.yml b/.github/workflows/codeql-analysis.yml new file mode 100644 index 0000000..127ef08 --- /dev/null +++ b/.github/workflows/codeql-analysis.yml @@ -0,0 +1,66 @@ +--- + +# For most projects, this workflow file will not need changing; you simply need +# to commit it to your repository. +# +# You may wish to alter this file to override the set of languages analyzed, +# or to provide custom queries or build logic. +name: "CodeQL" + +on: + push: + pull_request: + # The branches below must be a subset of the branches above + branches: [develop] + schedule: + - cron: '0 21 * * 6' + +jobs: + analyze: + name: Analyze + runs-on: ubuntu-latest + + strategy: + fail-fast: false + matrix: + # Override automatic language detection by changing the below list + # Supported options are ['csharp', 'cpp', 'go', 'java', 'javascript', + # 'python'] + language: ['python'] + # Learn more... + # https://docs.github.com/en/github/finding-security-vulnerabilities-and-errors-in-your-code/configuring-code-scanning#overriding-automatic-language-detection + + steps: + - name: Checkout repository + uses: actions/checkout@v2 + + # Initializes the CodeQL tools for scanning. + - name: Initialize CodeQL + uses: github/codeql-action/init@v1 + with: + languages: ${{ matrix.language }} + # If you wish to specify custom queries, you can do so here or in a + # config file. By default, queries listed here will override any + # specified in a config file. Prefix the list here with "+" to use + # these queries and those in the config file. queries: + # ./path/to/local/query, your-org/your-repo/queries@main + + # Autobuild attempts to build any compiled languages (C/C++, C#, or + # Java). If this step fails, then you should remove it and run the build + # manually (see below) + - name: Autobuild + uses: github/codeql-action/autobuild@v1 + + # ℹī¸ Command-line programs to run using the OS shell. + # 📚 https://git.io/JvXDl + + # ✏ī¸ If the Autobuild fails above, remove it and uncomment the following + # three lines and modify them (or add more) to build your code if your + # project uses a compiled language + + # - run: | + # make bootstrap + # make release + + - name: Perform CodeQL Analysis + uses: github/codeql-action/analyze@v1 diff --git a/.gitignore b/.gitignore index cdb93cd..bceb4ee 100644 --- a/.gitignore +++ b/.gitignore @@ -1 +1,5 @@ +__pycache__ +.mypy_cache +.pytest_cache .python-version +Dockerfile-x diff --git a/.isort.cfg b/.isort.cfg new file mode 100644 index 0000000..46d45f3 --- /dev/null +++ b/.isort.cfg @@ -0,0 +1,10 @@ +[settings] +combine_star=true +force_sort_within_sections=true + +import_heading_stdlib=Standard Python Libraries +import_heading_thirdparty=Third-Party Libraries +import_heading_firstparty=cisagov Libraries + +# Run isort under the black profile to align with our other Python linting +profile=black diff --git a/.lgtm.yml b/.lgtm.yml new file mode 100644 index 0000000..8950263 --- /dev/null +++ b/.lgtm.yml @@ -0,0 +1,8 @@ +--- +extraction: + python: + python_setup: + version: 3 + requirements_files: + - requirements-test.txt + setup_py: false diff --git a/.mdl_config.json b/.mdl_config.json new file mode 100644 index 0000000..7a6f3f8 --- /dev/null +++ b/.mdl_config.json @@ -0,0 +1,10 @@ +{ + "MD013": { + "code_blocks": false, + "tables": false + }, + "MD024": { + "allow_different_nesting": true + }, + "default": true +} diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml new file mode 100644 index 0000000..bc4a0de --- /dev/null +++ b/.pre-commit-config.yaml @@ -0,0 +1,145 @@ +--- +default_language_version: + # force all unspecified python hooks to run python3 + python: python3 + +repos: + - repo: https://github.com/pre-commit/pre-commit-hooks + rev: v3.4.0 + hooks: + - id: check-case-conflict + - id: check-executables-have-shebangs + - id: check-json + - id: check-merge-conflict + - id: check-toml + - id: check-xml + - id: debug-statements + - id: detect-aws-credentials + args: + - --allow-missing-credentials + - id: detect-private-key + exclude: src/secrets/privkey.pem + - id: end-of-file-fixer + exclude: files/(issue|motd) + - id: mixed-line-ending + args: + - --fix=lf + - id: pretty-format-json + args: + - --autofix + - id: requirements-txt-fixer + - id: trailing-whitespace + + # Text file hooks + - repo: https://github.com/igorshubovych/markdownlint-cli + rev: v0.26.0 + hooks: + - id: markdownlint + args: + - --config=.mdl_config.json + - repo: https://github.com/pre-commit/mirrors-prettier + rev: v2.2.1 + hooks: + - id: prettier + - repo: https://github.com/adrienverge/yamllint + rev: v1.26.0 + hooks: + - id: yamllint + args: + - --strict + + # Shell script hooks + - repo: https://github.com/lovesegfault/beautysh + rev: 6.0.1 + hooks: + - id: beautysh + args: + - --indent-size + - '2' + - repo: https://github.com/detailyang/pre-commit-shell + rev: 1.0.5 + hooks: + - id: shell-lint + + # Python hooks + - repo: https://github.com/PyCQA/bandit + rev: 1.7.0 + hooks: + - id: bandit + name: bandit (tests tree) + files: tests + args: + - --config=.bandit.yml + # Run bandit everything but tests directory + - repo: https://github.com/PyCQA/bandit + rev: 1.7.0 + hooks: + - id: bandit + name: bandit (everything else) + exclude: tests + - repo: https://github.com/python/black + rev: 20.8b1 + hooks: + - id: black + - repo: https://gitlab.com/pycqa/flake8 + rev: 3.8.4 + hooks: + - id: flake8 + additional_dependencies: + - flake8-docstrings + - repo: https://github.com/PyCQA/isort + rev: 5.7.0 + hooks: + - id: isort + - repo: https://github.com/pre-commit/mirrors-mypy + rev: v0.800 + hooks: + - id: mypy + - repo: https://github.com/asottile/pyupgrade + rev: v2.10.0 + hooks: + - id: pyupgrade + + # Ansible hooks + - repo: https://github.com/ansible-community/ansible-lint + # This is intentionally being held back because of issues in v5 per + # https://github.com/cisagov/skeleton-ansible-role/issues/69 + rev: v4.3.7 + hooks: + - id: ansible-lint + # files: molecule/default/playbook.yml + + # Terraform hooks + - repo: https://github.com/antonbabenko/pre-commit-terraform + rev: v1.45.0 + hooks: + - id: terraform_fmt + # There are ongoing issues with how this command works. This issue + # documents the core issue: + # https://github.com/hashicorp/terraform/issues/21408 + # We have seen issues primarily with proxy providers and Terraform code + # that uses remote state. The PR + # https://github.com/hashicorp/terraform/pull/24887 + # has been approved and is part of the 0.13 release to resolve the issue + # with remote states. + # The PR + # https://github.com/hashicorp/terraform/pull/24896 + # is a proprosed fix to deal with `terraform validate` with proxy + # providers (among other configurations). + # We have decided to disable the terraform_validate hook until the issues + # above have been resolved, which we hope will be with the release of + # Terraform 0.13. + # - id: terraform_validate + + # Docker hooks + - repo: https://github.com/IamTheFij/docker-pre-commit + rev: v2.0.0 + hooks: + - id: docker-compose-check + + # Packer hooks + - repo: https://github.com/cisagov/pre-commit-packer + rev: v0.0.2 + hooks: + - id: packer_validate + - id: packer_fmt diff --git a/.prettierignore b/.prettierignore new file mode 100644 index 0000000..738d402 --- /dev/null +++ b/.prettierignore @@ -0,0 +1,7 @@ +# Already being linted by pretty-format-json +*.json +# Already being linted by mdl +*.md +# Already being linted by yamllint +*.yaml +*.yml diff --git a/.travis.yml b/.travis.yml deleted file mode 100644 index 488b301..0000000 --- a/.travis.yml +++ /dev/null @@ -1,19 +0,0 @@ -sudo: required - -services: - - docker - -env: - global: - - IMAGE_NAME=cisagov/gatherer - - DOCKER_USER=jsf9k - - secure: "zIyHkDg9thZRegsFjQDTSqpuqkybIQSrQarUfsXJLJR1rAm7s1qeDT1I3AlNyY8x4RgY5PreFqRM3ebsFpHSUmXFS0ECiYVO6aJQ409Hx5wUhbEN1hpwocVJ8iC4tEf7/xv5Lu8LTUlD5/wwEhkcgs5p5OTFfEXu9iZGPQT7lZQMYlch855DvzGNr0TON/biPQ3QK70QtcyJyLsIErQbSkPw7SvhuPrY/HOW/CqbgkVkCqQQL9/M8FfwUzV/iIftAWFU2+vEN2leNmqI69CLEToyhljVK80uMoNJtC3NCeVVnaLhLtLObdASPt1+MIngKe/wxhciNXALAxLr87+MeeouYj/VrF34DZa3+qRcwby7nfTZIBFqpJ4ne+Nf63XXtbhNIkEL43kPILCu2nx1EHvlOCVYOJV4dKYxpAmF+/DERxJDQH/ZL0ltAb5j8nac1HdB/AnrKEhMvSZiOUT1lx1y2x4rQeThOYb9+Qaxejukxcuq0ykreJa7hxSiZ5o4hXlX24PAq3awqDSHk5GHBA6WaBQifuOoYIqypOm4JfvzfzXHYSmWSCZCG6NBKSX8iTIrHhQ8Anhc60zZwXudfhKzeUxa8HTh4jHtGI2dJxON1ajoxWnUovKuNPtfvl+yIjAN9cU4KazFnMu0jnJMxGubhWmAieHM5aeF4MlUQRw=" - -script: - - bash travis_scripts/build_docker_image.sh - -deploy: - - provider: script - script: bash travis_scripts/deploy_to_docker_hub.sh - on: - tags: true diff --git a/.yamllint b/.yamllint new file mode 100644 index 0000000..7ed00eb --- /dev/null +++ b/.yamllint @@ -0,0 +1,7 @@ +--- +extends: default + +rules: + # yamllint doesn't like when we use yes and no for true and false, + # but that's pretty standard in Ansible. + truthy: disable diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md new file mode 100644 index 0000000..df396a2 --- /dev/null +++ b/CONTRIBUTING.md @@ -0,0 +1,141 @@ +# Welcome # + +We're so glad you're thinking about contributing to this open source +project! If you're unsure or afraid of anything, just ask or submit +the issue or pull request anyway. The worst that can happen is that +you'll be politely asked to change something. We appreciate any sort +of contribution, and don't want a wall of rules to get in the way of +that. + +Before contributing, we encourage you to read our CONTRIBUTING policy +(you are here), our [LICENSE](LICENSE), and our [README](README.md), +all of which should be in this repository. + +## Issues ## + +If you want to report a bug or request a new feature, the most direct +method is to [create an +issue](https://github.com/cisagov/gatherer/issues) in this +repository. We recommend that you first search through existing +issues (both open and closed) to check if your particular issue has +already been reported. If it has then you might want to add a comment +to the existing issue. If it hasn't then feel free to create a new +one. + +## Pull requests ## + +If you choose to [submit a pull +request](https://github.com/cisagov/gatherer/pulls), you will +notice that our continuous integration (CI) system runs a fairly +extensive set of linters and syntax checkers. Your pull request may +fail these checks, and that's OK. If you want you can stop there and +wait for us to make the necessary corrections to ensure your code +passes the CI checks. + +If you want to make the changes yourself, or if you want to become a +regular contributor, then you will want to set up +[pre-commit](https://pre-commit.com/) on your local machine. Once you +do that, the CI checks will run locally before you even write your +commit message. This speeds up your development cycle considerably. + +### Setting up pre-commit ### + +There are a few ways to do this, but we prefer to use +[`pyenv`](https://github.com/pyenv/pyenv) and +[`pyenv-virtualenv`](https://github.com/pyenv/pyenv-virtualenv) to +create and manage a Python virtual environment specific to this +project. + +If you already have `pyenv` and `pyenv-virtualenv` configured you can +take advantage of the `setup-env` tool in this repo to automate the +entire environment configuration process. + +```console +./setup-env +``` + +Otherwise, follow the steps below to manually configure your +environment. + +#### Installing and using `pyenv` and `pyenv-virtualenv` #### + +On the Mac, we recommend installing [brew](https://brew.sh/). Then +installation is as simple as `brew install pyenv pyenv-virtualenv` and +adding this to your profile: + +```bash +eval "$(pyenv init -)" +eval "$(pyenv virtualenv-init -)" +``` + +For Linux, Windows Subsystem for Linux (WSL), or on the Mac (if you +don't want to use `brew`) you can use +[pyenv/pyenv-installer](https://github.com/pyenv/pyenv-installer) to +install the necessary tools. Before running this ensure that you have +installed the prerequisites for your platform according to the +[`pyenv` wiki +page](https://github.com/pyenv/pyenv/wiki/common-build-problems). + +On WSL you should treat your platform as whatever Linux distribution +you've chosen to install. + +Once you have installed `pyenv` you will need to add the following +lines to your `.bashrc`: + +```bash +export PATH="$PATH:$HOME/.pyenv/bin" +eval "$(pyenv init -)" +eval "$(pyenv virtualenv-init -)" +``` + +If you are using a shell other than `bash` you should follow the +instructions that the `pyenv-installer` script outputs. + +You will need to reload your shell for these changes to take effect so +you can begin to use `pyenv`. + +For a list of Python versions that are already installed and ready to +use with `pyenv`, use the command `pyenv versions`. To see a list of +the Python versions available to be installed and used with `pyenv` +use the command `pyenv install --list`. You can read more +[here](https://github.com/pyenv/pyenv/blob/master/COMMANDS.md) about +the many things that `pyenv` can do. See +[here](https://github.com/pyenv/pyenv-virtualenv#usage) for the +additional capabilities that pyenv-virtualenv adds to the `pyenv` +command. + +#### Creating the Python virtual environment #### + +Once `pyenv` and `pyenv-virtualenv` are installed on your system, you +can create and configure the Python virtual environment with these +commands: + +```console +cd gatherer +pyenv virtualenv gatherer +pyenv local gatherer +pip install --requirement requirements-dev.txt +``` + +#### Installing the pre-commit hook #### + +Now setting up pre-commit is as simple as: + +```console +pre-commit install +``` + +At this point the pre-commit checks will run against any files that +you attempt to commit. If you want to run the checks against the +entire repo, just execute `pre-commit run --all-files`. + +## Public domain ## + +This project is in the public domain within the United States, and +copyright and related rights in the work worldwide are waived through +the [CC0 1.0 Universal public domain +dedication](https://creativecommons.org/publicdomain/zero/1.0/). + +All contributions to this project will be released under the CC0 +dedication. By submitting a pull request, you are agreeing to comply +with this waiver of copyright interest. diff --git a/Dockerfile b/Dockerfile old mode 100755 new mode 100644 index 4979dea..14cb514 --- a/Dockerfile +++ b/Dockerfile @@ -1,13 +1,34 @@ +ARG VERSION=unspecified + +FROM python:3.6-slim-buster + +ARG VERSION + +# For a list of pre-defined annotation keys and value types see: +# https://github.com/opencontainers/image-spec/blob/master/annotations.md +# Note: Additional labels are added by the build workflow. +LABEL org.opencontainers.image.authors="jeremy.frasier@cisa.dhs.gov" +LABEL org.opencontainers.image.vendor="Cybersecurity and Infrastructure Security Agency" + ### -# Install everything we need +# Setup the user and its home directory ### -FROM python:3.6-slim-buster AS install -LABEL maintainer="jeremy.frasier@trio.dhs.gov" -LABEL organization="CISA Cyber Assessments" -LABEL url="https://github.com/cisagov/gatherer" -ENV HOME=/home/gatherer -ENV USER=gatherer +ARG CISA_GID=421 +ARG CISA_UID=${CISA_GID} +ENV CISA_USER="cisa" +ENV CISA_GROUP=${CISA_USER} +ENV CISA_HOME="/home/cisa" + +### +# Create unprivileged user +### +RUN groupadd --system --gid ${CISA_GID} ${CISA_GROUP} +RUN useradd --system --uid ${CISA_UID} --gid ${CISA_GROUP} --comment "${CISA_USER} user" ${CISA_USER} + +### +# Install everything we need +### ### # Dependencies @@ -43,9 +64,9 @@ RUN pip install --no-cache-dir --upgrade pip setuptools # Install domain-scan ### RUN git clone https://github.com/18F/domain-scan \ - ${HOME}/domain-scan/ + ${CISA_HOME}/domain-scan/ RUN pip install --no-cache-dir --upgrade \ - --requirement ${HOME}/domain-scan/requirements.txt + --requirement ${CISA_HOME}/domain-scan/requirements.txt ### # Install some dependencies for scripts/fed_hostnames.py @@ -67,33 +88,20 @@ RUN rm -rf /var/lib/apt/lists/* ### -# Setup the user and its home directory -### -FROM install AS setup_user - -### -# Create unprivileged user +# Setup working directory and entrypoint ### -RUN groupadd -r $USER -RUN useradd -r -c "$USER user" -g $USER $USER # Put this just before we change users because the copy (and every # step after it) will always be rerun by docker, but we need to be # root for the chown command. -COPY . $HOME -RUN chown -R ${USER}:${USER} $HOME - - -### -# Setup working directory and entrypoint -### -FROM setup_user AS final +COPY src ${CISA_HOME} +RUN chown -R ${CISA_USER}:${CISA_GROUP} ${CISA_HOME} ### # Prepare to Run ### # Right now we need to be root at runtime in order to create files in -# /home/shared -# USER ${USER}:${USER} -WORKDIR $HOME +# ${CISA_HOME}/shared +# USER ${CISA_USER}:${CISA_GROUP} +WORKDIR ${CISA_HOME} ENTRYPOINT ["./gather-domains.sh"] diff --git a/LICENSE.md b/LICENSE similarity index 100% rename from LICENSE.md rename to LICENSE diff --git a/README.md b/README.md index 14b3253..dc34422 100644 --- a/README.md +++ b/README.md @@ -1,6 +1,14 @@ -# NCATS Domain Gatherer :notebook: :file_folder: # +# Gatherer # -[![Build Status](https://travis-ci.com/cisagov/gatherer.svg?branch=develop)](https://travis-ci.com/cisagov/gatherer) +[![GitHub Build Status](https://github.com/cisagov/gatherer/workflows/build/badge.svg)](https://github.com/cisagov/gatherer/actions) +[![Total alerts](https://img.shields.io/lgtm/alerts/g/cisagov/gatherer.svg?logo=lgtm&logoWidth=18)](https://lgtm.com/projects/g/cisagov/gatherer/alerts/) +[![Language grade: Python](https://img.shields.io/lgtm/grade/python/g/cisagov/gatherer.svg?logo=lgtm&logoWidth=18)](https://lgtm.com/projects/g/cisagov/gatherer/context:python) + +## Docker Image ## + +[![Docker Pulls](https://img.shields.io/docker/pulls/cisagov/gatherer)](https://hub.docker.com/r/cisagov/gatherer) +[![Docker Image Size (latest by date)](https://img.shields.io/docker/image-size/cisagov/gatherer)](https://hub.docker.com/r/cisagov/gatherer) +[![Platforms](https://img.shields.io/badge/platforms-amd64%20%7C%20arm%2Fv6%20%7C%20arm%2Fv7%20%7C%20arm64%20%7C%20ppc64le%20%7C%20s390x-blue)](https://hub.docker.com/r/cisagov/gatherer/tags) This is a Docker container that uses [domain-scan](https://github.com/18F/domain-scan) to gather domains as @@ -11,9 +19,58 @@ a precursor to scanning by [pshtt](https://github.com/cisagov/pshtt), This Docker container is intended to be run via [orchestrator](https://github.com/cisagov/orchestrator). +__N.B.:__ The secrets in the `src/secrets` directory are only used +when testing via the `docker-compose.yml` composition. Normally this +Docker container is run via the Docker composition in +[cisagov/orchestrator](https://github.com/cisagov/orchestrator), which +expects the secrets in a different location. + +## Usage ## + +### Install ### + +Pull `cisagov/gatherer` from the Docker repository: + + docker pull cisagov/gatherer + +Or build `cisagov/gatherer` from source: + + git clone https://github.com/cisagov/gatherer.git + cd gatherer + docker-compose build --build-arg VERSION=0.0.1 + +### Run ### + + docker-compose run --rm gatherer + +## Ports ## + +This container exposes no ports. + +## Environment Variables ## + +This container supports no environment variables. + +## Secrets ## + +| Filename | Purpose | +|---------------|----------------------| +| database_creds.yml | Cyber Hygiene database credentials in [this format](https://github.com/cisagov/mongo-db-from-config#usage) | + +## Volumes ## + +| Mount point | Purpose | +|-------------|----------------| +| /home/cisa/shared | Output | + +## Contributing ## + +We welcome contributions! Please see [`CONTRIBUTING.md`](CONTRIBUTING.md) for +details. + ## License ## -This project is in the worldwide [public domain](LICENSE.md). +This project is in the worldwide [public domain](LICENSE). This project is in the public domain within the United States, and copyright and related rights in the work worldwide are waived through diff --git a/buildx-dockerfile.sh b/buildx-dockerfile.sh new file mode 100755 index 0000000..46710e9 --- /dev/null +++ b/buildx-dockerfile.sh @@ -0,0 +1,15 @@ +#!/usr/bin/env bash + +# Create a Dockerfile suitable for a multi-platform build using buildx +# See: https://docs.docker.com/buildx/working-with-buildx/ + +set -o nounset +set -o errexit +set -o pipefail + +DOCKERFILE=Dockerfile +DOCKERFILEX=Dockerfile-x + +# We don't want this expression to expand. +# shellcheck disable=SC2016 +sed 's/^FROM /FROM --platform=$TARGETPLATFORM /g' < $DOCKERFILE > $DOCKERFILEX diff --git a/bump_version.sh b/bump_version.sh index 3c66e73..1d47c62 100755 --- a/bump_version.sh +++ b/bump_version.sh @@ -1,39 +1,47 @@ -#/usr/bin/env bash +#!/usr/bin/env bash # bump_version.sh (show|major|minor|patch|prerelease|build) -VERSION_FILE=version.txt +set -o nounset +set -o errexit +set -o pipefail + +VERSION_FILE=src/version.txt HELP_INFORMATION="bump_version.sh (show|major|minor|patch|prerelease|build|finalize)" old_version=$(cat $VERSION_FILE) -if [[ $# -ne 1 ]] +if [ $# -ne 1 ] then - echo $HELP_INFORMATION + echo "$HELP_INFORMATION" else - case $1 in - major|minor|patch|prerelease|build) - new_version=$(python -c "import semver; print(semver.bump_$1('$old_version'))") - echo Changing version from $old_version to $new_version - sed -i "s/$old_version/$new_version/" $VERSION_FILE - git add $VERSION_FILE - git commit -m"Bumped version from $old_version to $new_version" - git push - ;; - finalize) - new_version=$(python -c "import semver; print(semver.finalize_version('$old_version'))") - echo Changing version from $old_version to $new_version - sed -i "s/$old_version/$new_version/" $VERSION_FILE - git add $VERSION_FILE - git commit -m"Bumped version from $old_version to $new_version" - git push - ;; - show) - echo $old_version - ;; - *) - echo $HELP_INFORMATION - ;; - esac + case $1 in + major|minor|patch|prerelease|build) + new_version=$(python -c "import semver; print(semver.bump_$1('$old_version'))") + echo Changing version from "$old_version" to "$new_version" + tmp_file=/tmp/version.$$ + sed "s/$old_version/$new_version/" $VERSION_FILE > $tmp_file + mv $tmp_file $VERSION_FILE + git add $VERSION_FILE + git commit -m"Bumping version from $old_version to $new_version" + git push + ;; + finalize) + new_version=$(python -c "import semver; print(semver.finalize_version('$old_version'))") + echo Changing version from "$old_version" to "$new_version" + tmp_file=/tmp/version.$$ + sed "s/$old_version/$new_version/" $VERSION_FILE > $tmp_file + mv $tmp_file $VERSION_FILE + git add $VERSION_FILE + git commit -m"Bumping version from $old_version to $new_version" + git push + ;; + show) + echo "$old_version" + ;; + *) + echo "$HELP_INFORMATION" + ;; + esac fi diff --git a/docker-compose.yml b/docker-compose.yml new file mode 100644 index 0000000..535c637 --- /dev/null +++ b/docker-compose.yml @@ -0,0 +1,27 @@ +--- +version: "3.7" + +# This docker-compose file is used to build the container + +secrets: + database_creds: + file: ./src/secrets/database_creds.yml + +services: + gatherer: + # Run the container normally + build: + # VERSION must be specified on the command line: + # e.g., --build-arg VERSION=0.0.1 + context: . + dockerfile: Dockerfile + depends_on: + - redis + image: cisagov/gatherer + init: true + restart: "no" + secrets: + - source: database_creds + target: database_creds.yml + redis: + image: redis:alpine diff --git a/push_readme.sh b/push_readme.sh new file mode 100755 index 0000000..12e2340 --- /dev/null +++ b/push_readme.sh @@ -0,0 +1,33 @@ +#!/usr/bin/env bash + +# Push the README.md file to the docker hub repository + +# Requires the following environment variables to be set: +# DOCKER_PASSWORD, DOCKER_USERNAME, IMAGE_NAME + +set -o nounset +set -o errexit +set -o pipefail + +echo "Logging in and requesting JWT..." +token=$(curl --silent --request POST \ + --header "Content-Type: application/json" \ + --data \ + '{"username": "'"$DOCKER_USERNAME"'", "password": "'"$DOCKER_PASSWORD"'"}' \ + https://hub.docker.com/v2/users/login/ | jq --raw-output .token) + +echo "Pushing README file..." +code=$(jq --null-input --arg msg "$(&2 + exit 1 + ;; + *) # preserve positional arguments + PARAMS="$PARAMS $1" + shift + ;; + esac +done + +# set positional arguments in their proper place +eval set -- "$PARAMS" + +# Check to see if pyenv is installed +if [ -z "$(command -v pyenv)" ] || [ -z "$(command -v pyenv-virtualenv)" ]; then + echo "pyenv and pyenv-virtualenv are required." + if [[ "$OSTYPE" == "darwin"* ]]; then + cat << 'END_OF_LINE' + + On the Mac, we recommend installing brew, https://brew.sh/. Then installation + is as simple as `brew install pyenv pyenv-virtualenv` and adding this to your + profile: + + eval "$(pyenv init -)" + eval "$(pyenv virtualenv-init -)" + +END_OF_LINE + + fi + cat << 'END_OF_LINE' + For Linux, Windows Subsystem for Linux (WSL), or on the Mac (if you don't want + to use "brew") you can use https://github.com/pyenv/pyenv-installer to install + the necessary tools. Before running this ensure that you have installed the + prerequisites for your platform according to the pyenv wiki page, + https://github.com/pyenv/pyenv/wiki/common-build-problems. + + On WSL you should treat your platform as whatever Linux distribution you've + chosen to install. + + Once you have installed "pyenv" you will need to add the following lines to + your ".bashrc": + + export PATH="$PATH:$HOME/.pyenv/bin" + eval "$(pyenv init -)" + eval "$(pyenv virtualenv-init -)" +END_OF_LINE + exit 1 +fi + +set +o nounset +# Determine the virtual environment name +if [ "$1" ]; then + # Use the user-provided environment name + env_name=$1 +else + # Set the environment name to the last part of the working directory. + env_name=${PWD##*/} +fi +set -o nounset + +# Remove any lingering local configuration. +if [ $FORCE -ne 0 ]; then + rm -f .python-version + pyenv virtualenv-delete --force "${env_name}" || true +elif [[ -f .python-version ]]; then + cat << 'END_OF_LINE' + An existing .python-version file was found. Either remove this file yourself + or re-run with --force option to have it deleted along with the associated + virtual environment. + + rm .python-version + +END_OF_LINE + exit 1 +fi + +# Create a new virtual environment for this project +if ! pyenv virtualenv "${env_name}"; then + cat << END_OF_LINE + An existing virtual environment named $env_name was found. Either delete this + environment yourself or re-run with --force option to have it deleted. + + pyenv virtualenv-delete ${env_name} + +END_OF_LINE + exit 1 +fi + +# Set the local application-specific Python version(s) by writing the +# version name to a file named `.python-version'. +pyenv local "${env_name}" + +# Upgrade pip and friends +python3 -m pip install --upgrade pip setuptools wheel + +# Find a requirements file (if possible) and install +for req_file in "requirements-dev.txt" "requirements-test.txt" "requirements.txt"; do + if [[ -f $req_file ]]; then + pip install --requirement $req_file + break + fi +done + +# Install git pre-commit hooks now or later. +pre-commit install ${INSTALL_HOOKS:+"--install-hooks"} + +# Setup git remotes from lineage configuration +# This could fail if the remotes are already setup, but that is ok. +set +o errexit + +eval "$(python3 << 'END_OF_LINE' +from pathlib import Path +import yaml +import sys + +LINEAGE_CONFIG = Path(".github/lineage.yml") + +if not LINEAGE_CONFIG.exists(): + print("No lineage configuration found.", file=sys.stderr) + sys.exit(0) + +with LINEAGE_CONFIG.open("r") as f: + lineage = yaml.safe_load(stream=f) + +if lineage["version"] == "1": + for parent_name, v in lineage["lineage"].items(): + remote_url = v["remote-url"] + print(f"git remote add {parent_name} {remote_url};") + print(f"git remote set-url --push {parent_name} no_push;") +else: + print(f'Unsupported lineage version: {lineage["version"]}', file=sys.stderr) +END_OF_LINE +)" + +# Qapla +echo "Success!" diff --git a/src/fed_hostnames.py b/src/fed_hostnames.py new file mode 100755 index 0000000..1cc0055 --- /dev/null +++ b/src/fed_hostnames.py @@ -0,0 +1,157 @@ +#!/usr/bin/env python + +"""Output a list of all detected Federal hostnames. + +Usage: + COMMAND_NAME [--db-creds-file=FILENAME] [--debug] [--output-file=FILENAME] + COMMAND_NAME (-h | --help) + COMMAND_NAME --version + +Options: + -h --help Show this screen + --version Show version + --db-creds-file=FILENAME A YAML file containing the CYHY database + credentials. + [default: /run/secrets/database_creds.yml] + -d --debug A Boolean value indicating whether the output + should include debugging messages or not. + -o --output-file=FILENAME The name of the output file. + [default: fed_hostnames.csv] + +""" + +# Standard Python Libraries +import logging +import re + +# Third-Party Libraries +from docopt import docopt +from mongo_db_from_config import db_from_config +import pymongo.errors +import yaml + +# The ports that are most commonly used by public-facing web servers +WebServerPorts = {80, 280, 443, 591, 593, 832, 8080, 8888, 4443, 8443, 9443, 10443} + +# The ports that are most commonly used by mail servers +MailServerPorts = {25, 110, 143, 465, 587, 993, 995, 2525} + + +def get_all_descendants(database, owner): + """Return all (non-retired) descendents of the Cyber Hygiene parent. + + Parameters + ---------- + db : MongoDatabase + The Mongo database from which Cyber Hygiene customer data can + be retrieved. + + parent : str + The Cyber Hygiene parent for which all descendents are desired. + + Returns + ------- + list of str: The descendents of the Cyber Hygiene parent. + """ + current_request = database.requests.find_one({"_id": owner}) + if not current_request: + raise ValueError(owner + " has no request document") + + descendants = [] + if current_request.get("children"): + for child in current_request["children"]: + if not database.requests.find_one({"_id": child}).get("retired"): + descendants.append(child) + descendants += get_all_descendants(database, child) + + return descendants + + +def main(): + """Output a list of all detected Federal hostnames.""" + global __doc__ + __doc__ = re.sub("COMMAND_NAME", __file__, __doc__) + args = docopt(__doc__, version="v0.0.1") + + # Set up logging + log_level = logging.WARNING + if args["--debug"]: + log_level = logging.DEBUG + logging.basicConfig( + format="%(asctime)-15s %(levelname)s %(message)s", level=log_level + ) + + db_creds_file = args["--db-creds-file"] + try: + db = db_from_config(db_creds_file) + except OSError: + logging.critical( + "Database configuration file {} does not exist".format(db_creds_file), + exc_info=True, + ) + return 1 + except yaml.YAMLError: + logging.critical( + "Database configuration file {} does not contain valid YAML".format( + db_creds_file + ), + exc_info=True, + ) + return 1 + except KeyError: + logging.critical( + "Database configuration file {} does not contain the expected keys".format( + db_creds_file + ), + exc_info=True, + ) + return 1 + except pymongo.errors.ConnectionError: + logging.critical( + "Unable to connect to the database server in {}".format(db_creds_file), + exc_info=True, + ) + return 1 + except pymongo.errors.InvalidName: + logging.critical( + "The database in {} does not exist".format(db_creds_file), exc_info=True + ) + return 1 + + # Get all Federal organizations + fed_orgs = get_all_descendants(db, "FEDERAL") + logging.debug("Federal orgs are {}".format(fed_orgs)) + + # Get all Federal hosts with open ports that indicate a possible web or + # email server (latest scan only)... + potential_web_or_email_server_ips = { + i["ip_int"] + for i in db.port_scans.find( + { + "latest": True, + "owner": {"$in": fed_orgs}, + "port": {"$in": list(WebServerPorts | MailServerPorts)}, + }, + {"_id": False, "ip_int": True}, + ) + } + # ...of these, get all Federal hosts with a detected hostname (latest scan + # only) + fed_hosts = db.host_scans.find( + { + "latest": True, + "ip_int": {"$in": list(potential_web_or_email_server_ips)}, + "owner": {"$in": fed_orgs}, + "hostname": {"$ne": None}, + }, + {"_id": False, "hostname": True, "owner": True}, + ) + + with open(args["--output-file"], "w") as file: + for host in fed_hosts: + file.write("{},{}\n".format(host["hostname"], host["owner"])) + logging.debug("Federal host {}".format(host)) + + +if __name__ == "__main__": + main() diff --git a/gather-domains.sh b/src/gather-domains.sh similarity index 71% rename from gather-domains.sh rename to src/gather-domains.sh index e9e6108..87fe535 100755 --- a/gather-domains.sh +++ b/src/gather-domains.sh @@ -3,19 +3,19 @@ # Gather hostnames and do any necessary scrubbing of the data. ### -HOME_DIR=/home/gatherer +HOME_DIR=/home/cisa OUTPUT_DIR=$HOME_DIR/shared/artifacts # Create the output directory, if necessary if [ ! -d $OUTPUT_DIR ] then - mkdir $OUTPUT_DIR + mkdir $OUTPUT_DIR fi ### # Grab any extra Federal hostnames that CYHY knows about ### -scripts/fed_hostnames.py --output-file=$OUTPUT_DIR/cyhy_fed_hostnames.csv +./fed_hostnames.py --output-file=$OUTPUT_DIR/cyhy_fed_hostnames.csv ### # We need a copy of current-federal since we want to add and remove @@ -25,14 +25,14 @@ scripts/fed_hostnames.py --output-file=$OUTPUT_DIR/cyhy_fed_hostnames.csv # here. ### wget https://raw.githubusercontent.com/cisagov/dotgov-data/main/current-federal.csv \ - -O $OUTPUT_DIR/current-federal.csv + -O $OUTPUT_DIR/current-federal.csv ### # Grab our online list of extra, non-.gov domains that the # corresponding stakeholder has requested we scan. We have verified # that the stakeholder controls these domains. ### wget https://raw.githubusercontent.com/cisagov/scan-target-data/develop/current-federal-non-dotgov.csv \ - -O $OUTPUT_DIR/current-federal-non-dotgov.csv + -O $OUTPUT_DIR/current-federal-non-dotgov.csv ### # Concatenate current-federal.csv with the list of extra, non-.gov # domains. @@ -41,10 +41,10 @@ wget https://raw.githubusercontent.com/cisagov/scan-target-data/develop/current- # before the concatenation. ### tail -n +2 $OUTPUT_DIR/current-federal-non-dotgov.csv > \ - /tmp/current-federal-non-dotgov.csv + /tmp/current-federal-non-dotgov.csv cat $OUTPUT_DIR/current-federal.csv \ - /tmp/current-federal-non-dotgov.csv > \ - $OUTPUT_DIR/current-federal_modified.csv + /tmp/current-federal-non-dotgov.csv > \ + $OUTPUT_DIR/current-federal_modified.csv ### # Remove the FED.US domain. This is really a top-level domain, # analogous to .gov or .com. It is only present in current-federal as @@ -58,7 +58,7 @@ sed -i '/^FED\.US,.*/d' $OUTPUT_DIR/current-federal_modified.csv # Also remove all other domains that belong to the judicial branch. ### sed -i '/[^,]*,[^,]*,U\.S\. Courts,/d;/[^,]*,[^,]*,The Supreme Court,/d' \ - $OUTPUT_DIR/current-federal_modified.csv + $OUTPUT_DIR/current-federal_modified.csv ### # Remove all domains that belong to the legislative branch, with the # exception of the House of Representatives (HOR). HOR specifically @@ -70,7 +70,7 @@ sed -i '/[^,]*,[^,]*,U\.S\. Courts,/d;/[^,]*,[^,]*,The Supreme Court,/d' \ # (Congress)" in current-federal. ### sed -i '/[^,]*,[^,]*,Library of Congress,/d;/[^,]*,[^,]*,Government Publishing Office,/d;/[^,]*,[^,]*,Congressional Office of Compliance,/d;/[^,]*,[^,]*,Stennis Center for Public Service,/d;/[^,]*,[^,]*,U.S. Capitol Police,/d;/[^,]*,[^,]*,Architect of the Capitol,/d' \ - $OUTPUT_DIR/current-federal_modified.csv + $OUTPUT_DIR/current-federal_modified.csv ### # Gather hostnames using GSA/data, analytics.usa.gov, Censys, EOT, @@ -87,16 +87,16 @@ sed -i '/[^,]*,[^,]*,Library of Congress,/d;/[^,]*,[^,]*,Government Publishing O # include/current-federal-non-dotgov.csv ### $HOME_DIR/domain-scan/gather current_federal,analytics_usa_gov,censys_snapshot,rapid,eot_2012,eot_2016,cyhy,other \ - --suffix=.gov,.edu,.com,.net,.org,.us --ignore-www --include-parents \ - --parents=$OUTPUT_DIR/current-federal_modified.csv \ - --current_federal=$OUTPUT_DIR/current-federal_modified.csv \ - --analytics_usa_gov=https://analytics.usa.gov/data/live/sites.csv \ - --censys_snapshot=https://raw.githubusercontent.com/GSA/data/master/dotgov-websites/censys-federal-snapshot.csv \ - --rapid=https://raw.githubusercontent.com/GSA/data/master/dotgov-websites/rdns-federal-snapshot.csv \ - --eot_2012=https://raw.githubusercontent.com/cisagov/scan-target-data/develop/eot-2012.csv \ - --eot_2016=https://raw.githubusercontent.com/cisagov/scan-target-data/develop/eot-2016.csv \ - --cyhy=$OUTPUT_DIR/cyhy_fed_hostnames.csv \ - --other=https://raw.githubusercontent.com/GSA/data/master/dotgov-websites/other-websites.csv + --suffix=.gov,.edu,.com,.net,.org,.us --ignore-www --include-parents \ + --parents=$OUTPUT_DIR/current-federal_modified.csv \ + --current_federal=$OUTPUT_DIR/current-federal_modified.csv \ + --analytics_usa_gov=https://analytics.usa.gov/data/live/sites.csv \ + --censys_snapshot=https://raw.githubusercontent.com/GSA/data/master/dotgov-websites/censys-federal-snapshot.csv \ + --rapid=https://raw.githubusercontent.com/GSA/data/master/dotgov-websites/rdns-federal-snapshot.csv \ + --eot_2012=https://raw.githubusercontent.com/cisagov/scan-target-data/develop/eot-2012.csv \ + --eot_2016=https://raw.githubusercontent.com/cisagov/scan-target-data/develop/eot-2016.csv \ + --cyhy=$OUTPUT_DIR/cyhy_fed_hostnames.csv \ + --other=https://raw.githubusercontent.com/GSA/data/master/dotgov-websites/other-websites.csv cp results/gathered.csv gathered.csv cp results/gathered.csv $OUTPUT_DIR/gathered.csv diff --git a/src/secrets/database_creds.yml b/src/secrets/database_creds.yml new file mode 100644 index 0000000..1ce60fa --- /dev/null +++ b/src/secrets/database_creds.yml @@ -0,0 +1,6 @@ +--- +version: '1' + +database: + name: cyhy + uri: mongodb://readonly:the_password@cyhy.example.com:27017/cyhy diff --git a/src/version.txt b/src/version.txt new file mode 100644 index 0000000..88c5fb8 --- /dev/null +++ b/src/version.txt @@ -0,0 +1 @@ +1.4.0 diff --git a/tag.sh b/tag.sh index f72316d..fbb93cc 100755 --- a/tag.sh +++ b/tag.sh @@ -2,4 +2,4 @@ version=$(./bump_version.sh show) -git tag v$version && git push --tags +git tag v"$version" && git push --tags diff --git a/tests/conftest.py b/tests/conftest.py new file mode 100644 index 0000000..847765e --- /dev/null +++ b/tests/conftest.py @@ -0,0 +1,45 @@ +"""pytest plugin configuration. + +https://docs.pytest.org/en/latest/writing_plugins.html#conftest-py-plugins +""" +# Third-Party Libraries +import pytest + +MAIN_SERVICE_NAME = "gatherer" +VERSION_SERVICE_NAME = f"{MAIN_SERVICE_NAME}-version" + + +@pytest.fixture(scope="session") +def main_container(dockerc): + """Return the main container from the docker composition.""" + # find the container by name even if it is stopped already + return dockerc.containers(service_names=[MAIN_SERVICE_NAME], stopped=True)[0] + + +# See #57 +# @pytest.fixture(scope="session") +# def version_container(dockerc): +# """Return the version container from the docker composition. + +# The version container should just output the version of its underlying contents. +# """ +# # find the container by name even if it is stopped already +# return dockerc.containers(service_names=[VERSION_SERVICE_NAME], stopped=True)[0] + + +def pytest_addoption(parser): + """Add new commandline options to pytest.""" + parser.addoption( + "--runslow", action="store_true", default=False, help="run slow tests" + ) + + +def pytest_collection_modifyitems(config, items): + """Modify collected tests based on custom marks and commandline options.""" + if config.getoption("--runslow"): + # --runslow given in cli: do not skip slow tests + return + skip_slow = pytest.mark.skip(reason="need --runslow option to run") + for item in items: + if "slow" in item.keywords: + item.add_marker(skip_slow) diff --git a/tests/container_test.py b/tests/container_test.py new file mode 100644 index 0000000..e19fa5c --- /dev/null +++ b/tests/container_test.py @@ -0,0 +1,101 @@ +#!/usr/bin/env pytest -vs +"""Tests for example container.""" +# TODO: Make container tests functional +# See https://github.com/cisagov/gatherer/issues/57 + +# Standard Python Libraries +import os + +# import time + +# import pytest + +ENV_VAR = "ECHO_MESSAGE" +ENV_VAR_VAL = "Hello World from docker-compose!" +READY_MESSAGE = "This is a debug message" +SECRET_QUOTE = ( + "There are no secrets better kept than the secrets everybody guesses." # nosec +) +RELEASE_TAG = os.getenv("RELEASE_TAG") +VERSION_FILE = "src/version.txt" + + +def test_container_count(dockerc): + """Verify the test composition and container.""" + # stopped parameter allows non-running containers in results + assert ( + len(dockerc.containers(stopped=True)) == 2 + ), "Wrong number of containers were started." + + +# See #57 +# def test_wait_for_ready(main_container): +# """Wait for container to be ready.""" +# TIMEOUT = 10 +# for i in range(TIMEOUT): +# if READY_MESSAGE in main_container.logs().decode("utf-8"): +# break +# time.sleep(1) +# else: +# raise Exception( +# f"Container does not seem ready. " +# f'Expected "{READY_MESSAGE}" in the log within {TIMEOUT} seconds.' +# ) + + +# See #57 +# def test_wait_for_exits(main_container, version_container): +# """Wait for containers to exit.""" +# assert main_container.wait() == 0, "Container service (main) did not exit cleanly" +# assert ( +# version_container.wait() == 0 +# ), "Container service (version) did not exit cleanly" + + +# See #57 +# def test_output(main_container): +# """Verify the container had the correct output.""" +# main_container.wait() # make sure container exited if running test isolated +# log_output = main_container.logs().decode("utf-8") +# assert SECRET_QUOTE in log_output, "Secret not found in log output." + + +# See #57 +# @pytest.mark.skipif( +# RELEASE_TAG in [None, ""], reason="this is not a release (RELEASE_TAG not set)" +# ) +# def test_release_version(): +# """Verify that release tag version agrees with the module version.""" +# pkg_vars = {} +# with open(VERSION_FILE) as f: +# exec(f.read(), pkg_vars) # nosec +# project_version = pkg_vars["__version__"] +# assert ( +# RELEASE_TAG == f"v{project_version}" +# ), "RELEASE_TAG does not match the project version" + + +# See #57 +# def test_log_version(version_container): +# """Verify the container outputs the correct version to the logs.""" +# version_container.wait() # make sure container exited if running test isolated +# log_output = version_container.logs().decode("utf-8").strip() +# pkg_vars = {} +# with open(VERSION_FILE) as f: +# exec(f.read(), pkg_vars) # nosec +# project_version = pkg_vars["__version__"] +# assert ( +# log_output == project_version +# ), f"Container version output to log does not match project version file {VERSION_FILE}" + + +# See #57 +# def test_container_version_label_matches(version_container): +# """Verify the container version label is the correct version.""" +# pkg_vars = {} +# with open(VERSION_FILE) as f: +# exec(f.read(), pkg_vars) # nosec +# project_version = pkg_vars["__version__"] +# assert ( +# version_container.labels["org.opencontainers.image.version"] == project_version +# ), "Dockerfile version label does not match project version" diff --git a/travis_scripts/build_docker_image.sh b/travis_scripts/build_docker_image.sh deleted file mode 100644 index 9b8b3b5..0000000 --- a/travis_scripts/build_docker_image.sh +++ /dev/null @@ -1,10 +0,0 @@ -#!/usr/bin/env bash - -set -o nounset -set -o errexit -set -o pipefail - -# semver uses a plus character for the build number (if present). -# This is invalid for a Docker tag, so we replace it with a minus. -version=$(./bump_version.sh show|sed "s/+/-/") -docker build -t "$IMAGE_NAME":"$version" . diff --git a/travis_scripts/deploy_to_docker_hub.sh b/travis_scripts/deploy_to_docker_hub.sh deleted file mode 100644 index cb04d01..0000000 --- a/travis_scripts/deploy_to_docker_hub.sh +++ /dev/null @@ -1,11 +0,0 @@ -#!/usr/bin/env bash - -set -o nounset -set -o errexit -set -o pipefail - -echo "$DOCKER_PW" | docker login -u "$DOCKER_USER" --password-stdin -# semver uses a plus character for the build number (if present). -# This is invalid for a Docker tag, so we replace it with a minus. -version=$(./bump_version.sh show|sed "s/+/-/") -docker push "$IMAGE_NAME":"$version" diff --git a/version.txt b/version.txt deleted file mode 100644 index 3a3cd8c..0000000 --- a/version.txt +++ /dev/null @@ -1 +0,0 @@ -1.3.1