From e29376986cdc6d89272e603b857a60d4e16df253 Mon Sep 17 00:00:00 2001 From: cccs-rs Date: Fri, 22 Jul 2022 16:49:34 +0000 Subject: [PATCH] init --- .dockerignore | 22 +++++ .gitignore | 84 ++++++++++++++++++ CONTRIBUTING.md | 25 ++++++ Dockerfile | 22 +++++ LICENSE | 25 ++++++ README.md | 32 +++++++ pipelines/azure-build.yaml | 30 +++++++ service_manifest.yml | 90 +++++++++++++++++++ virustotal.py | 174 +++++++++++++++++++++++++++++++++++++ 9 files changed, 504 insertions(+) create mode 100644 .dockerignore create mode 100644 .gitignore create mode 100644 CONTRIBUTING.md create mode 100644 Dockerfile create mode 100644 LICENSE create mode 100644 README.md create mode 100644 pipelines/azure-build.yaml create mode 100644 service_manifest.yml create mode 100644 virustotal.py diff --git a/.dockerignore b/.dockerignore new file mode 100644 index 0000000..a764cee --- /dev/null +++ b/.dockerignore @@ -0,0 +1,22 @@ +Dockerfile +.idea +.git + +pipelines +venv +env +test +tests +exemples +docs + +pip-log.txt +pip-delete-this-directory.txt +.tox +.coverage +.coverage.* +.cache +nosetests.xml +coverage.xml +*,cover +*.log diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..9c07ba8 --- /dev/null +++ b/.gitignore @@ -0,0 +1,84 @@ +# Byte-compiled / optimized / DLL files +__pycache__/ +*.py[cod] +*$py.class + +# C extensions +*.so + +# IDE files +.pydevproject +.python-version +.idea + +# Distribution / packaging +.Python +build/ +develop-eggs/ +dist/ +downloads/ +eggs/ +.eggs/ +lib/ +lib64/ +parts/ +sdist/ +var/ +wheels/ +share/python-wheels/ +*.egg-info/ +.installed.cfg +*.egg +MANIFEST + +# PyInstaller +# Usually these files are written by a python script from a template +# before PyInstaller builds the exe, so as to inject date/other infos into it. +*.manifest +*.spec + +# Installer logs +pip-log.txt +pip-delete-this-directory.txt + +# Unit test / coverage reports +htmlcov/ +.tox/ +.nox/ +.coverage +.coverage.* +.cache +nosetests.xml +coverage.xml +*.cover +*.py,cover +.hypothesis/ +.pytest_cache/ +cover/ + +# Translations +*.mo +*.pot + +# PyBuilder +.pybuilder/ +target/ + +# Jupyter Notebook +.ipynb_checkpoints + +# IPython +profile_default/ +ipython_config.py + +# Environments +.env +.venv +env/ +venv/ +ENV/ +env.bak/ +venv.bak/ + +# Cython debug symbols +cython_debug/ \ No newline at end of file diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md new file mode 100644 index 0000000..6fb6aeb --- /dev/null +++ b/CONTRIBUTING.md @@ -0,0 +1,25 @@ +# Assemblyline contributing guide + +This guide covers the basics of how to contribute to the Assemblyline project. + +Python code should follow the PEP8 guidelines defined here: [PEP8 Guidelines](https://www.python.org/dev/peps/pep-0008/). + +## Tell us want you want to build/fix +Before you start coding anything you should connect with the [Assemblyline community](https://groups.google.com/d/forum/cse-cst-assemblyline) to make sure no one else is working on the same thing and that whatever you are going to build still fits with the vision off the system. + +## Git workflow + +- Clone the repo to your own account +- Checkout and pull the latest commits from the master branch +- Make a branch +- Work in any way you like and make sure your changes actually work +- When you're satisfied with your changes, create a pull requests to the main assemblyline repo + +#### Transfer your service repo +If you've worked on a new service that you want to be included in the default service selection you'll have to transfer the repo into our control. + +#### You are not allow to merge: + +Even if you try to merge in your pull request, you will be denied. Only a few people in our team are allowed to merge code into our repositories. + +We check for new pull requests every day and will merge them in once they have been approved by someone in our team. \ No newline at end of file diff --git a/Dockerfile b/Dockerfile new file mode 100644 index 0000000..f2d5c2d --- /dev/null +++ b/Dockerfile @@ -0,0 +1,22 @@ +ARG branch=latest +FROM cccs/assemblyline-v4-service-base:$branch + +ENV SERVICE_PATH virustotal.VirusTotal + +USER root +RUN pip install vt-py + +# Switch to assemblyline user +USER assemblyline + +# Copy VirusTotalDynamic service code +WORKDIR /opt/al_service +COPY . . + +# Patch version in manifest +ARG version=4.0.0.dev1 +USER root +RUN sed -i -e "s/\$SERVICE_TAG/$version/g" service_manifest.yml + +# Switch to assemblyline user +USER assemblyline diff --git a/LICENSE b/LICENSE new file mode 100644 index 0000000..aa2157b --- /dev/null +++ b/LICENSE @@ -0,0 +1,25 @@ +MIT License + +Copyright (c) 2022 Canadian Centre for Cyber Security + +Copyright title to all 3rd party software distributed with Assemblyline (AL) is +held by the respective copyright holders as noted in those files. Users are asked to +read the 3rd Party Licenses referenced with those assets. + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all +copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +SOFTWARE. diff --git a/README.md b/README.md new file mode 100644 index 0000000..17b5498 --- /dev/null +++ b/README.md @@ -0,0 +1,32 @@ +# VirusTotal Service + +This Assemblyline This service checks (and optionally submits) files/URLs to VirusTotal for analysis. + +**NOTE**: This service **requires** you to have your own API key (Paid or Free). It is **not** preinstalled during a default installation. + +## Execution + +This service will actually submit the file to VirusTotal for analysis over the v3 REST API. + +Because the file leaves the Assemblyline infrastructure, if selected by the user, it will prompt the user and notify them that their file or metadata related to their file will leave our system. + +## Configuration +---- +### Service Configuration +|Name|Description| +|:---:|:---| +|api_key|Global VirusTotal API key for the system to use if the submitter doesn't provide their own| +|proxy|Proxy to connect to VirusTotal with| +|av_config|Configuration block that tells the service to ignore/remap certain AV verdicts from the File Report. See [Service Manifest](./service_manifest.yml) for more details.| + +### Submission Parameters +|Name|Description| +|:---:|:---| +|api_key|Individual VirusTotal API key| +|dynamic_submit|Instructs the service to submit to VirusTotal if there is no existing report about the submission| +|relationships|A list of comma-separated relationships that we want to get about the submission| +|download_evtx|Have the service download EVTX from sandbox analyses. (`deep_scan` required)| +|download_pcap|Have the service download EVTX from sandbox analyses. (`deep_scan` required)| + +Note: For operations like `download_evtx` & `download_pcap`, the `deep_scan` flag is required as it entails more API calls to +retrieve additional reports to get a full picture of the analysis done by VirusTotal. diff --git a/pipelines/azure-build.yaml b/pipelines/azure-build.yaml new file mode 100644 index 0000000..151f0e3 --- /dev/null +++ b/pipelines/azure-build.yaml @@ -0,0 +1,30 @@ +name: build + +trigger: + tags: + include: ["v*"] +pr: none + +pool: + vmImage: "ubuntu-20.04" + +stages: + - stage: deploy + jobs: + - job: deploy + displayName: Deploy containers to dockerhub + variables: + - group: deployment-information + steps: + - task: Docker@2 + displayName: Login to docker hub + inputs: + command: login + containerRegistry: dockerhub + - script: | + set -xv # Echo commands before they are run + export TAG=${BUILD_SOURCEBRANCH#"refs/tags/v"} + if [[ "$TAG" == *stable* ]]; then export BUILD_TYPE=stable; else export BUILD_TYPE=latest; fi + docker build --build-arg version=$TAG --build-arg branch=$BUILD_TYPE -t cccs/assemblyline-service-virustotal:$TAG -t cccs/assemblyline-service-virustotal:$BUILD_TYPE . + docker push cccs/assemblyline-service-virustotal --all-tags + displayName: Deploy to Docker Hub diff --git a/service_manifest.yml b/service_manifest.yml new file mode 100644 index 0000000..b0ea3ea --- /dev/null +++ b/service_manifest.yml @@ -0,0 +1,90 @@ +name: VirusTotal +version: $SERVICE_TAG +description: > + This service checks (and optionally submits) files/URLs to VirusTotal for analysis. + +accepts: .* +rejects: empty|metadata/.* + +stage: CORE +category: External + +file_required: true +timeout: 600 +disable_cache: false + +enabled: false +is_external: true +licence_count: 0 + +uses_metadata: true +privileged: true + +config: + api_key: "" + proxy: "" + av_config: + term_blocklist: ["Antiy-AVL", "APEX", "Jiangmin", "not-a-virus"] # Ignore results based on presence of term in signature combination + revised_sig_score_map: # Remap scoring based on signature combination + TACHYON.Suspicious/XOX.Obfus.Gen.2: 100 + # The following should be added to the system-wide safelist + # Ikarus.Trojan-Downloader.MSWord.Agent: 0 + # Ikarus.Trojan-Downloader.VBA.Agent: 0 + # NANOAV.Exploit.Xml.CVE-2017-0199.equmby: 0 + # TACHYON.Suspicious/XOX.Obfus.Gen.3: 0 + # Vir.IT eXplorer.Office.VBA_Macro_Heur: 0 + # Vir.IT eXplorer.W97M/Downloader.AB: 0 + revised_kw_score_map: # Remap scoring based on keyword in an AV result + adware: 100 + +submission_params: + - default: "" + name: api_key + type: str + value: "" + - default: false + name: dynamic_submit + type: bool + value: false + - default: "contacted_ips,contacted_domains,contacted_urls,behaviours" + name: relationships + type: str + value: "contacted_ips,contacted_domains,contacted_urls,behaviours" + - default: false + name: download_evtx + type: bool + value: false + - default: false + name: download_pcap + type: bool + value: false + +heuristics: + - heur_id: 1 + name: File is infected + score: 1000 + filetype: "*" + description: Antivirus detection found. + + - heur_id: 2 + name: Network IOC Detected + score: 50 + filetype: "*" + description: Network IOC detected + max_score: 1000 + signature_score_map: + low: 50 + medium: 300 + high: 1000 + + - heur_id: 1000 + name: Capability Found + score: 100 + filetype: "*" + description: VT has tagged sample with capabilities + +docker_config: + image: ${REGISTRY}cccs/assemblyline-service-virustotal:$SERVICE_TAG + cpu_cores: 0.25 + ram_mb: 1024 + allow_internet_access: true diff --git a/virustotal.py b/virustotal.py new file mode 100644 index 0000000..f5f81f6 --- /dev/null +++ b/virustotal.py @@ -0,0 +1,174 @@ +import json +import os +import time +from base64 import b64encode +from vt import Client, APIError + +from assemblyline_v4_service.common.base import ServiceBase +from assemblyline_v4_service.common.request import ServiceRequest +from assemblyline_v4_service.common.result import Result, ResultSection, BODY_FORMAT +from assemblyline_v4_service.common.virustotal.common.processing import AVResultsProcessor +from assemblyline_v4_service.common.virustotal.file import v3 as parse_file_report +from assemblyline_v4_service.common.virustotal.url import v3 as parse_url_report +from assemblyline_v4_service.common.virustotal.ip_domain import v3 as parse_network_report +from assemblyline_v4_service.common.virustotal.behaviour import v3 as parse_sandbox_report + + +MAX_RETRY = 3 + + +class VirusTotal(ServiceBase): + def __init__(self, config=None): + super(VirusTotal, self).__init__(config) + self.client = None + self.safelist_interface = self.get_api_interface().get_safelist + + sig_safelist = [] + [sig_safelist.extend(match_list) + for _, match_list in self.safelist_interface(["av.virus_name"]).get('match', {}).items()] + self.processor = AVResultsProcessor( + self.config["av_config"]['term_blocklist'], + self.config["av_config"]['revised_sig_score_map'], + self.config["av_config"]['revised_kw_score_map'], + sig_safelist + ) + + def start(self): + self.log.debug("VirusTotal service started") + + def execute(self, request: ServiceRequest): + # Ensure we can actually create a client connection + try: + # Submitter's API key should be used first, global is a fallback if configured + self.client = Client(apikey=request.get_param("api_key") or self.config.get("api_key"), + proxy=self.config.get('proxy') or None) + except ValueError as e: + self.log.error("No API key found for VirusTotal") + raise e + + result = Result() + scan_url = bool(request.task.metadata.get('submitted_url', None) and request.task.depth == 0) + response = None + if scan_url: + submitted_url = request.task.metadata['submitted_url'] + response = self.common_scan(type="url", sample=submitted_url, + id=b64encode(submitted_url.encode()).decode(), + dynamic_submit=request.get_param('dynamic_submit')) + else: + relationships = request.get_param('relationships') + if (request.get_param('download_evtx') or request.get_param('download_pcap')) and 'behaviours' not in relationships: + # Requesting to download Sandbox files but relationship wasn't specified in request + relationships += ',behaviours' + + response = self.common_scan(type="file", sample=open(request.file_path, 'rb'), + # ID with relationship params + id=f"{request.sha256}?relationships={relationships}", + dynamic_submit=request.get_param('dynamic_submit')) + + result_section = self.analyze_response(response, request) + if result_section: + result.add_section(result_section) + + request.result = result + + def analyze_response(self, response: dict, request: ServiceRequest): + if not response: + return + + def download_sandbox_files(): + for downloadable_file in ['evtx', 'pcap']: + if request.get_param(f'download_{downloadable_file}') and response.get(f'has_{downloadable_file}'): + # Download file and append for other services to analyze + dest_path = os.path.join(self.working_directory, f"{response['id']}_{downloadable_file}") + with open(dest_path, 'wb') as fh: + fh.write(self.client.get(f'/file_behaviours/{response["id"]}/{downloadable_file}').read()) + request.add_extracted( + dest_path, downloadable_file, + description=f"{downloadable_file.upper()} from {response['sandbox_name']}") + + report_type = response["type"] + result_section = None + if report_type == "file": + result_section = parse_file_report(response, request.file_name, self.processor) + + # Get as much information as we can about other related objects (entails more API requests) + relationships_section = ResultSection('Related Objects', parent=result_section, auto_collapse=True) + if request.deep_scan: + # Only concerned with relationships that contain content (minimize API calls needed) + for relationship in [k for k, v in response.get('relationships', {}).items() if v.get('data')]: + # Create a pretty title text for the section + title_text = relationship.title()\ + .replace('_', " ").replace('Ip', 'IP').replace('Url', 'URL').replace('Itw', 'ITW') + interim_section = ResultSection(title_text=title_text, parent=relationships_section) + for analysis in self.client.get_json(f'/files/{request.sha256}/{relationship}?limit=40')['data']: + subsection = self.analyze_response(analysis, request) + if subsection: + interim_section.add_subsection(subsection) + else: + # Create a section that tags known relationships but don't assess them for scoring purposes + for relationship, data in response.get('relationships', {}).items(): + data = data['data'] + if not data: + continue + # Create a pretty title text for the section + title_text = relationship.title()\ + .replace('_', " ").replace('Ip', 'IP').replace('Url', 'URL').replace('Itw', 'ITW') + body = [d['id'] for d in data] + tag_type = data[0]['type'] if data[0]['type'] != 'ip_address' else 'ip' + tags = {} + if tag_type != 'file_behaviour': + tags[f'network.static.{tag_type}'] = body + else: + # Place holder in case we want to fetch sandbox files + continue + + interim_section = ResultSection(title_text=title_text, body=", ".join(body), + body_format=BODY_FORMAT.TEXT, parent=relationships_section, + tags=tags, auto_collapse=True) + + elif report_type == "url": + result_section = parse_url_report(response) + elif report_type in ["domain", "ip_address"]: + result_section = parse_network_report(response) + elif report_type == "file_behaviour": + result_section = parse_sandbox_report(response) + download_sandbox_files() + + return result_section + + def common_scan(self, type: str, sample, id, dynamic_submit): + try: + # Sample already submitted to VT, return existing report + return self.client.get_json(f'/{type}s/{id}')['data'] + except APIError as e: + if e.code == "NotFoundError": + # Sample not known to VT, proceed with submitting to VT if allowed + pass + else: + # Raise Exception for unknown handling to be fixed later + raise e + + def submit(retry_attempt: int = 0): + # Submit sample to VT for analysis + json_response = None + if retry_attempt < MAX_RETRY: + try: + if type == 'file': + json_response = self.client.scan_file(sample, wait_for_completion=True).to_dict() + else: + json_response = self.client.scan_url(sample, wait_for_completion=True).to_dict() + except APIError as e: + if "NotFoundError" in e.code: + self.log.warning(f"VirusTotal has nothing on this {type}.") + elif "QuotaExceededError" in e.code: + self.log.warning("Quota Exceeded. Trying again in 60s.") + time.sleep(60) + retry_attempt += 1 + return submit(retry_attempt) + else: + self.log.error(e) + return json_response + + # Only submit to VT if requested by the submitter + if dynamic_submit: + return submit()