Skip to content
This repository has been archived by the owner on Jan 8, 2025. It is now read-only.

Commit

Permalink
RC 0.4.1 (#256)
Browse files Browse the repository at this point in the history
Co-authored-by: Matthew Printz <[email protected]>
Co-authored-by: Daniel Chang <[email protected]>
Co-authored-by: dchang <[email protected]>
  • Loading branch information
4 people authored Jun 21, 2023
1 parent 6fe12b3 commit 5ada4ca
Show file tree
Hide file tree
Showing 10 changed files with 72 additions and 52 deletions.
5 changes: 3 additions & 2 deletions api.env.sample
Original file line number Diff line number Diff line change
Expand Up @@ -6,13 +6,14 @@ SQL_DB=askem
DKG_URL=http://34.230.33.149
DKG_API_PORT=8771
DKG_DESC_PORT=8772
AWS_REGION=us-east-1
AWS_ACCESS_KEY_ID=miniouser
AWS_SECRET_ACCESS_KEY=miniopass
DATASET_STORAGE_BASE_URL=s3://datasets/
STORAGE_HOST=http://minio:9000
S3_BUCKET=askem-staging-data-service
S3_DATASET_PATH=datasets
S3_RESULT_PATH=simulations
S3_RESULTS_PATH=simulations
AWS_REGION=us-east-1
OPENAI_KEY=sk
NEO4J_AUTH=neo4j/password
Expand All @@ -32,4 +33,4 @@ KIBANA_PORT=5601
MEM_LIMIT=1073741824
#COMPOSE_PROJECT_NAME=tds-elasticsearch
SEED_DATA=false
ES_INDEX_PREFIX=tds_
ES_INDEX_PREFIX=tds_
4 changes: 2 additions & 2 deletions migrate/scripts/file_storage.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@
AWS_ACCESS_KEY_ID = os.getenv("AWS_ACCESS_KEY_ID")
S3_BUCKET = os.getenv("S3_BUCKET")
S3_DATASET_PATH = os.getenv("S3_DATASET_PATH")
S3_RESULT_PATH = os.getenv("S3_RESULT_PATH")
S3_RESULTS_PATH = os.getenv("S3_RESULTS_PATH")

migrate_dir = Path(os.path.dirname(__file__))
file_dir = f"{migrate_dir.parent}/seeds/files"
Expand All @@ -26,7 +26,7 @@

s3_paths = {
"datasets": S3_DATASET_PATH,
"simulations": S3_RESULT_PATH,
"simulations": S3_RESULTS_PATH,
}


Expand Down
42 changes: 26 additions & 16 deletions tds/lib/s3.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,35 +12,45 @@ def s3_client():
"""
Function sets up an S3 client based on env settings.
"""
s3_opts = {
"config": boto3.session.Config(signature_version="s3v4"),
}
if settings.STORAGE_HOST:
s3_ = boto3.client(
"s3",
endpoint_url=settings.STORAGE_HOST,
aws_access_key_id=settings.AWS_ACCESS_KEY_ID,
aws_secret_access_key=settings.AWS_SECRET_ACCESS_KEY,
aws_session_token=None,
config=boto3.session.Config(signature_version="s3v4"),
verify=False,
)
s3_opts["endpoint_url"] = settings.STORAGE_HOST
else:
s3_ = boto3.client("s3")
# @TODO: Deprecate this and use AWS_DEFAULT_REGION in ENV.
s3_opts["region_name"] = settings.AWS_REGION

s3_ = boto3.client("s3", **s3_opts)

return s3_


def get_file_path(entity_id: str | int, file_name: str) -> str:
def get_file_path(
entity_id: str | int, file_name: str, path: str = settings.S3_RESULTS_PATH
) -> str:
"""
Function builds a file path for s3.
"""
return os.path.join(settings.S3_RESULT_PATH, str(entity_id), file_name)
return os.path.join(path, str(entity_id), file_name)


def get_presigned_url(entity_id: str | int, file_name: str, method: str):
def get_presigned_url(
entity_id: str | int,
file_name: str,
method: str,
path: str = settings.S3_RESULTS_PATH,
):
"""
Function generates a presigned URL for the HMI client.
"""
s3_ = s3_client()
s3_key = get_file_path(entity_id, file_name)
return s3_.generate_presigned_url(
ClientMethod=method, Params={"Bucket": settings.S3_BUCKET, "Key": s3_key}
s3_key = get_file_path(entity_id=entity_id, file_name=file_name, path=path)

presigned_url = s3_.generate_presigned_url(
ClientMethod=method,
Params={"Bucket": settings.S3_BUCKET, "Key": s3_key},
ExpiresIn=1500,
)

return presigned_url
13 changes: 10 additions & 3 deletions tds/lib/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,19 +2,26 @@
TDS Utilities.
"""

import typing

from pydantic import BaseModel

_PATCHABLE_MODELS: typing.Dict[BaseModel, BaseModel] = {}


def patchable(model: BaseModel) -> BaseModel:
"""
Create a fully optional version of a model for use with PATCH
"""
model_name = f"Patchable{model.__name__}"
if model_name in _PATCHABLE_MODELS:
return _PATCHABLE_MODELS[model_name]

# Create new class that inherits from passed in class
class PatchableModel(model):
...
PatchableModel = type(model_name, (model,), {}) # pylint: disable=invalid-name

# Update the fields to be optional
for field_def in PatchableModel.__fields__.values():
field_def.required = False

_PATCHABLE_MODELS[model_name] = PatchableModel
return PatchableModel
31 changes: 11 additions & 20 deletions tds/modules/dataset/controller.py
Original file line number Diff line number Diff line change
@@ -1,16 +1,15 @@
"""
TDS Dataset
"""
import os.path
from logging import Logger

import boto3
from elasticsearch import NotFoundError
from fastapi import APIRouter, HTTPException, status
from fastapi.encoders import jsonable_encoder
from fastapi.responses import JSONResponse

from tds.db.elasticsearch import es_client
from tds.lib.s3 import get_presigned_url
from tds.lib.utils import patchable
from tds.modules.dataset.model import Dataset
from tds.modules.dataset.response import dataset_response
Expand All @@ -21,18 +20,6 @@
logger = Logger(__name__)
es_index = Dataset.index

if settings.STORAGE_HOST:
s3 = boto3.client(
"s3",
endpoint_url=settings.STORAGE_HOST,
aws_access_key_id=settings.AWS_ACCESS_KEY_ID,
aws_secret_access_key=settings.AWS_SECRET_ACCESS_KEY,
aws_session_token=None,
config=boto3.session.Config(signature_version="s3v4"),
verify=False,
)
else:
s3 = boto3.client("s3")
es = es_client()


Expand Down Expand Up @@ -152,9 +139,11 @@ def dataset_upload_url(dataset_id: str | int, filename: str) -> JSONResponse:
Generates a pre-signed url to allow a user to upload to a secure S3 bucket
without end-user authentication.
"""
s3_key = os.path.join(settings.S3_DATASET_PATH, str(dataset_id), filename)
put_url = s3.generate_presigned_url(
ClientMethod="put_object", Params={"Bucket": settings.S3_BUCKET, "Key": s3_key}
put_url = get_presigned_url(
entity_id=dataset_id,
file_name=filename,
method="put_object",
path=settings.S3_DATASET_PATH,
)
return JSONResponse(
status_code=status.HTTP_200_OK,
Expand All @@ -171,9 +160,11 @@ def dataset_download_url(dataset_id: str | int, filename: str) -> JSONResponse:
Generates a pre-signed url to allow a user to donwload from a secure S3 bucket
without the bucket being public or end-user authentication.
"""
s3_key = os.path.join(settings.S3_DATASET_PATH, str(dataset_id), filename)
get_url = s3.generate_presigned_url(
ClientMethod="get_object", Params={"Bucket": settings.S3_BUCKET, "Key": s3_key}
get_url = get_presigned_url(
entity_id=dataset_id,
file_name=filename,
method="get_object",
path=settings.S3_DATASET_PATH,
)
return JSONResponse(
status_code=status.HTTP_200_OK,
Expand Down
11 changes: 7 additions & 4 deletions tds/modules/dataset/model.py
Original file line number Diff line number Diff line change
Expand Up @@ -52,16 +52,19 @@ class DatasetColumn(BaseModel):
default=ColumnTypes.UNKNOWN,
description=f"Datatype. One of: {', '.join(ColumnTypes)}",
)
description: Optional[str] = Field(
description="(Optional) Textual description of the dataset column.",
)
format_str: Optional[str] = Field(
description="(Optional) String that describes the formatting of the value",
)
annotations: dict[str, List[str]] = Field(
annotations: List[str] = Field(
description="Column annotations from the MIT data profiling tool",
)
metadata: Optional[dict[str, Any]] = Field(
description="(Optional) Unformatted metadata about the dataset",
)
grounding: Optional[dict[str, Grounding]] = Field(
grounding: Optional[Grounding] = Field(
description=(
"(Optional) Grounding of ontological concepts related to the column"
),
Expand All @@ -80,7 +83,7 @@ class Dataset(TdsModel):
description="Display/human name for the dataset",
)
description: Optional[str] = Field(
description="(Optional) Texual description of the dataset",
description="(Optional) Textual description of the dataset",
)
data_source_date: Optional[datetime] = Field(
description="(Optional) The date the data was created."
Expand All @@ -100,7 +103,7 @@ class Dataset(TdsModel):
source: Optional[str] = Field(
description="(Optional) Source of dataset",
)
grounding: Optional[dict[str, Grounding]] = Field(
grounding: Optional[Grounding] = Field(
description=(
"(Optional) Grounding of ontological concepts related to the dataset as"
" a whole"
Expand Down
2 changes: 1 addition & 1 deletion tds/modules/model_configuration/controller.py
Original file line number Diff line number Diff line change
Expand Up @@ -108,7 +108,7 @@ def model_configuration_put(
raise HTTPException(
status_code=422, detail="ID in request URL and in payload must match."
)
res = payload.save(model_configuration_id)
res = payload.save()
logger.info("model_configuration updated: %s", model_configuration_id)
return JSONResponse(
status_code=status.HTTP_200_OK,
Expand Down
11 changes: 9 additions & 2 deletions tds/modules/simulation/controller.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@
from tds.modules.simulation.model import Simulation
from tds.modules.simulation.response import SimulationResponse, simulation_response
from tds.operation import create, delete, retrieve, update
from tds.settings import settings

simulation_router = APIRouter()
logger = Logger(__name__)
Expand Down Expand Up @@ -153,7 +154,10 @@ def run_result_upload_url(simulation_id: str, filename: str) -> JSONResponse:
without end-user authentication.
"""
put_url = get_presigned_url(
entity_id=simulation_id, file_name=filename, method="put_object"
entity_id=simulation_id,
file_name=filename,
method="put_object",
path=settings.S3_RESULTS_PATH,
)
return JSONResponse(
content={
Expand All @@ -170,7 +174,10 @@ def run_result_download_url(simulation_id: str, filename: str) -> JSONResponse:
without the bucket being public or end-user authentication.
"""
get_url = get_presigned_url(
entity_id=simulation_id, file_name=filename, method="get_object"
entity_id=simulation_id,
file_name=filename,
method="get_object",
path=settings.S3_RESULTS_PATH,
)
return JSONResponse(
content={
Expand Down
2 changes: 1 addition & 1 deletion tds/modules/simulation/model.py
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,7 @@ class Simulation(TdsModel):
_index = "simulation"
engine: SimulationEngine
type: SimulationType
status: SimulationStatus = Field(default="queued")
status: Optional[SimulationStatus] = Field(default="queued")
execution_payload: ExecutionPayload
start_time: Optional[datetime]
completed_time: Optional[datetime]
Expand Down
3 changes: 2 additions & 1 deletion tds/settings.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,9 +32,10 @@ class Settings(BaseSettings):
ES_PASSWORD: str = ""
ES_INDEX_PREFIX: str = "tds_"
S3_DATASET_PATH: str = ""
S3_RESULT_PATH: str = ""
S3_RESULTS_PATH: str = ""
S3_BUCKET: str = ""
STORAGE_HOST: Optional[str] = None
AWS_REGION: Optional[str] = None
AWS_ACCESS_KEY_ID: Optional[str] = None
AWS_SECRET_ACCESS_KEY: Optional[str] = None

Expand Down

0 comments on commit 5ada4ca

Please sign in to comment.