Skip to content

Commit

Permalink
Bugfix: allow TLS certificate validation to be disabled
Browse files Browse the repository at this point in the history
  • Loading branch information
whitfieldsdad committed Feb 5, 2024
1 parent 0ba36c0 commit 4215a66
Show file tree
Hide file tree
Showing 4 changed files with 16 additions and 14 deletions.
1 change: 1 addition & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@ This repository contains a lightning-fast [Python 3 module](epss) and a series o
- Idempotently download daily sets of EPSS scores<sub>1</sub> in JSON, JSONL, CSV, or [Apache Parquet](https://parquet.apache.org/)<sub>2</sub> format
- Explore EPSS scores using [Polars](https://pola.rs/), a lightning-fast dataframe library written in Rust
- Optionally drop unchanged scores
- Optionally disable TLS certificate validation when downloading scores (i.e. to support environments where TLS MitM is being performed)
- [Easily](examples/get-scores-as-polars-dataframe.py) [switch](examples/get-changed-scores-as-polars-dataframe.py) between different versions<sub>3</sub> of the [EPSS model](https://www.first.org/epss/model)

<sub>1. By default, EPSS scores will be downloaded from 2023-03-07 onward, as this is the date when the outputs of EPSS v3 (v2023.03.01) were first published.</sub>
Expand Down
11 changes: 7 additions & 4 deletions epss/cli.py
Original file line number Diff line number Diff line change
@@ -1,9 +1,9 @@
import sys
from typing import Optional, Tuple
from typing import Optional
from epss.constants import *
from epss.client import PolarsClient as Client, Query
from epss.client import PolarsClient as Client
from epss.json_encoder import JSONEncoder
from epss import util
import requests.packages
import polars as pl
import logging
import click
Expand Down Expand Up @@ -48,6 +48,9 @@ def main(
level = logging.DEBUG if verbose else logging.INFO
logging.basicConfig(level=level, format='%(asctime)s %(levelname)s %(name)s %(message)s')

if verify_tls is False:
requests.packages.urllib3.disable_warnings()

if include_all_scores:
include_v1_scores = True
include_v2_scores = True
Expand All @@ -65,7 +68,7 @@ def main(


@main.command('scores')
@click.option('--workdir', '-w', required=True, help='Work directory')
@click.option('--workdir', '-w', default=SCORES_BY_DATE_WORKDIR, show_default=True, help='Work directory')
@click.option('--min-date', '-a', show_default=True, help='Minimum date')
@click.option('--date', '-d', help='Date')
@click.option('--max-date', '-b', help='Maximum date')
Expand Down
15 changes: 6 additions & 9 deletions epss/client.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,14 +5,12 @@
import itertools
import os
import re
import sys
from typing import Any, Iterable, Iterator, List, Optional, Tuple, Union
from typing import Any, Iterable, Iterator, Optional, Tuple, Union

import requests
from epss import util
from epss.constants import DEFAULT_FILE_FORMAT, TIME, V1_RELEASE_DATE, V2_RELEASE_DATE, V3_RELEASE_DATE
import polars as pl
import pandas as pd
import concurrent.futures

import logging
Expand Down Expand Up @@ -176,7 +174,7 @@ def download_scores_by_date(self, workdir: str, date: TIME):
logger.debug("Scores for %s have already been downloaded: %s", date.isoformat(), path)
return

url = get_download_url(date)
url = get_download_url(date, verify_tls=self.verify_tls)
logger.debug('Downloading scores for %s: %s -> %s', date.isoformat(), url, path)

response = requests.get(url, verify=self.verify_tls, stream=True)
Expand All @@ -201,7 +199,6 @@ class PolarsClient(BaseClient):
"""
A client for working with EPSS scores using Polars DataFrames.
"""
# TODO
def get_scores(
self,
workdir: str,
Expand Down Expand Up @@ -290,7 +287,7 @@ def iter_urls(

min_date, max_date = self.get_date_range(min_date, max_date)
for date in self.iter_dates(min_date, max_date):
yield get_download_url(date)
yield get_download_url(date, verify_tls=self.verify_tls)


def get_file_path(workdir: str, file_format: str, key: Union[datetime.date, str]) -> str:
Expand All @@ -307,7 +304,7 @@ def get_file_path(workdir: str, file_format: str, key: Union[datetime.date, str]
return os.path.join(workdir, f'{key}.{file_format}')


def get_download_url(date: Optional[TIME] = None) -> str:
def get_download_url(date: Optional[TIME] = None, verify_tls: bool = True) -> str:
"""
Returns the URL for downloading EPSS scores for the specified date.
Expand All @@ -319,7 +316,7 @@ def get_download_url(date: Optional[TIME] = None) -> str:
- https://epss.cyentia.com/epss_scores-2024-01-01.csv.gz
"""
date = util.parse_date(date) if date else get_max_date()
date = util.parse_date(date) if date else get_max_date(verify_tls=verify_tls)
return f"https://epss.cyentia.com/epss_scores-{date.isoformat()}.csv.gz"


Expand Down Expand Up @@ -404,7 +401,7 @@ def get_max_date(
Returns the latest publication date for EPSS scores under the specified model version constraints.
"""
if include_v3_scores:
return get_epss_v3_max_date()
return get_epss_v3_max_date(verify_tls=verify_tls)
elif include_v2_scores:
return get_epss_v2_max_date()
elif include_v1_scores:
Expand Down
3 changes: 2 additions & 1 deletion epss/constants.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,8 @@
import tempfile

# Cache directory
CACHE_DIR = os.path.join(tempfile.gettempdir(), '476c9b0d-79c6-4b7e-a31a-e18cec3d6444')
WORKDIR = os.path.join(tempfile.gettempdir(), '476c9b0d-79c6-4b7e-a31a-e18cec3d6444', 'epss')
SCORES_BY_DATE_WORKDIR = os.path.join(WORKDIR, 'scores-by-date')

# Release dates
V1_RELEASE_DATE = '2021-04-14'
Expand Down

0 comments on commit 4215a66

Please sign in to comment.