Skip to content

Commit

Permalink
Merge pull request #9 from nimh-dsst/add-work-in-doc
Browse files Browse the repository at this point in the history
Generate work for document
  • Loading branch information
joshlawrimore authored Nov 25, 2024
2 parents d9f0afb + c7525b7 commit 0ab28e0
Show file tree
Hide file tree
Showing 20 changed files with 369 additions and 155 deletions.
16 changes: 12 additions & 4 deletions .github/workflows/tests.yml
Original file line number Diff line number Diff line change
Expand Up @@ -34,16 +34,24 @@ jobs:
- name: Install dependencies
run: |
pip install uv
uv pip install --system -r <(uv pip compile --all-extras pyproject.toml)
pip install .[ci]
- name: Start Docker stack
run: |
cp .mockenv .env
docker compose -f .docker/postgres-compose.yaml up -d
- name: Run tests
run: pytest tests
- name: Run tests with coverage
run: |
coverage run -m pytest -s tests
coverage report
coverage xml
- name: Stop Docker stack
if: always()
run: docker compose -f compose.yaml -f compose.development.override.yaml down
run: docker compose -f .docker/postgres-compose.yaml down

- name: Upload coverage to Codecov
uses: codecov/codecov-action@v3
with:
file: coverage.xml
3 changes: 2 additions & 1 deletion .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -12,4 +12,5 @@ _version.py
pdfs/*
test-pdf/*.pdf
dsst_etl.egg-info/*
uv.lock
uv.lock
.coverage
6 changes: 6 additions & 0 deletions .mockenv
Original file line number Diff line number Diff line change
Expand Up @@ -4,9 +4,15 @@ POSTGRES_USER=postgres
POSTGRES_PASSWORD=postgres
POSTGRES_HOST=localhost
POSTGRES_PORT=5432
POSTGRES_DB=pdx
POSTGRES_DB_UNIT_TEST=pdx

# requests will be rejected without user-agent
USER_AGENT=

# metapub; get key from https://pubmed.ncbi.nlm.nih.gov for faster requests
NCBI_API_KEY=

S3_BUCKET_NAME=osm-pdf-uploads
HOSTNAME=localhost
USERNAME=quang
2 changes: 1 addition & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -124,7 +124,7 @@ docker compose -f .docker/postgres-compose.yaml down -v
pre-commit install
# run the pre-commit hooks on all files
pre-commit run -all
pre-commit run --all-files
# run the tests
pytest
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,40 @@
"""update non-null of document in works table
Revision ID: 4a908d10b459
Revises: 360c65a62392
Create Date: 2024-11-21 16:40:56.966690
"""
from typing import Sequence, Union

from alembic import op
import sqlalchemy as sa


# revision identifiers, used by Alembic.
revision: str = '4a908d10b459'
down_revision: Union[str, None] = '360c65a62392'
branch_labels: Union[str, Sequence[str], None] = None
depends_on: Union[str, Sequence[str], None] = None


def upgrade() -> None:
# ### commands auto generated by Alembic - please adjust! ###
op.alter_column('works', 'initial_document_id',
existing_type=sa.INTEGER(),
nullable=True)
op.alter_column('works', 'primary_document_id',
existing_type=sa.INTEGER(),
nullable=True)
# ### end Alembic commands ###


def downgrade() -> None:
# ### commands auto generated by Alembic - please adjust! ###
op.alter_column('works', 'primary_document_id',
existing_type=sa.INTEGER(),
nullable=False)
op.alter_column('works', 'initial_document_id',
existing_type=sa.INTEGER(),
nullable=False)
# ### end Alembic commands ###
35 changes: 27 additions & 8 deletions dsst_etl/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,26 +2,45 @@
DSST ETL Package
"""

import logging
import os

from dotenv import load_dotenv
from sqlalchemy import create_engine
from sqlalchemy.orm import sessionmaker

load_dotenv()
from .config import config

logger = logging.getLogger(__name__)


def get_db_url():
database_url = (
"postgresql://"
f"{os.environ['POSTGRES_USER']}"
f":{os.environ['POSTGRES_PASSWORD']}"
f"@{os.environ['POSTGRES_HOST']}:"
f"{os.environ['POSTGRES_PORT']}/{os.environ['POSTGRES_DB']}"
f"{config.POSTGRES_USER}"
f":{config.POSTGRES_PASSWORD}"
f"@{config.POSTGRES_HOST}:"
f"{config.POSTGRES_PORT}/{config.POSTGRES_DB}"
)
return database_url


def get_db_url_test():
database_url = (
"postgresql://"
f"{config.POSTGRES_USER}"
f":{config.POSTGRES_PASSWORD}"
f"@{config.POSTGRES_HOST}:"
f"{config.POSTGRES_PORT}/{config.POSTGRES_DB_UNIT_TEST}"
)
return database_url

def get_db_engine():
return create_engine(get_db_url())

def get_db_engine(is_test=False):
if is_test:
return create_engine(get_db_url_test())
else:
return create_engine(get_db_url())


engine = get_db_engine()
SessionLocal = sessionmaker(autocommit=False, autoflush=False, bind=engine)
Expand Down
8 changes: 4 additions & 4 deletions dsst_etl/_utils.py
Original file line number Diff line number Diff line change
@@ -1,12 +1,12 @@
import os
from .config import config


def get_compute_context_id():
return hash(f"{os.environ.get('HOSTNAME')}_{os.environ.get('USERNAME')}")
return hash(f"{config.HOSTNAME}_{config.USERNAME}")


def get_bucket_name():
bucket_name = os.getenv('S3_BUCKET_NAME')
bucket_name = config.S3_BUCKET_NAME
if not bucket_name:
raise ValueError("S3_BUCKET_NAME environment variable is not set")
return bucket_name
return bucket_name
11 changes: 11 additions & 0 deletions dsst_etl/config.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
from dotenv import dotenv_values


class Config:
def __init__(self, config_dict):
for key, value in config_dict.items():
setattr(self, key, value)


# Load the environment variables from the .env file
config = Config(dotenv_values(".env"))
20 changes: 13 additions & 7 deletions dsst_etl/db.py
Original file line number Diff line number Diff line change
@@ -1,19 +1,25 @@
import os
import logging

from dsst_etl import get_db_engine
from sqlalchemy import create_engine
from sqlalchemy.orm import sessionmaker
from sqlalchemy_utils import create_database, database_exists

from dsst_etl import get_db_engine

from .models import Base

logger = logging.getLogger(__name__)


def get_db_session():
engine = get_db_engine()
def get_db_session(is_test=False):
engine = get_db_engine(is_test)
Session = sessionmaker(bind=engine)
return Session()


def init_db():
engine = get_db_engine()
def init_db(is_test=False):
engine = get_db_engine(is_test)

if not database_exists(engine.url):
logger.info("Creating database.....")
create_database(engine.url)
Base.metadata.create_all(engine)
13 changes: 0 additions & 13 deletions dsst_etl/extract.py

This file was deleted.

11 changes: 0 additions & 11 deletions dsst_etl/load.py

This file was deleted.

19 changes: 5 additions & 14 deletions dsst_etl/models.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,5 @@
from datetime import datetime

from sqlalchemy import Column, DateTime, ForeignKey, Integer, LargeBinary, String, Text
from sqlalchemy.ext.declarative import declarative_base
from sqlalchemy.orm import relationship
from sqlalchemy import Column, DateTime, ForeignKey, Integer, String, Text
from sqlalchemy.orm import declarative_base
from sqlalchemy.sql import func

Base = declarative_base()
Expand All @@ -14,12 +11,8 @@ class Works(Base):
id = Column(Integer, primary_key=True)
created_at = Column(DateTime, default=func.now())
modified_at = Column(DateTime, default=func.now(), onupdate=func.now())
initial_document_id = Column(
Integer, ForeignKey("documents.id"), nullable=False
)
primary_document_id = Column(
Integer, ForeignKey("documents.id"), nullable=False
)
initial_document_id = Column(Integer, ForeignKey("documents.id"), nullable=True)
primary_document_id = Column(Integer, ForeignKey("documents.id"), nullable=True)
provenance_id = Column(Integer, ForeignKey("provenance.id"))

# Relationships
Expand All @@ -30,7 +23,7 @@ class Works(Base):

class Documents(Base):
__tablename__ = "documents"

id = Column(Integer, primary_key=True)
hash_data = Column(String, nullable=False, unique=True)
created_at = Column(DateTime, default=func.now())
Expand All @@ -42,7 +35,6 @@ class Documents(Base):
# provenance = relationship("Provenance")



class Provenance(Base):
__tablename__ = "provenance"

Expand All @@ -52,4 +44,3 @@ class Provenance(Base):
compute = Column(Text)
personnel = Column(Text)
comment = Column(Text)

13 changes: 0 additions & 13 deletions dsst_etl/transform.py

This file was deleted.

Loading

0 comments on commit 0ab28e0

Please sign in to comment.