Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Dataset internal jobs, external files, & server-side view creation #878

Merged
merged 26 commits into from
Jan 14, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
26 commits
Select commit Hold shift + click to select a range
5968623
Fixup for some durations-as-string issues
bennybp Jan 9, 2025
b1efeae
Don't attempt serialization for reponses that are already flask Respo…
bennybp Jan 3, 2025
2db3a87
Make records table in caches have rowid
bennybp Jan 9, 2025
bbcd6eb
Expand features of internal jobs
bennybp Jan 6, 2025
be24f32
Better handling of incorrect after_functions after migration
bennybp Jan 9, 2025
ba21407
Add external file capability via S3
bennybp Jan 3, 2025
5203495
Don't allow redirects in base client
bennybp Jan 7, 2025
f283de9
Functions for downloading external file in client
bennybp Jan 6, 2025
272dc66
Enable passthrough for external files
bennybp Jan 9, 2025
c4f1d2b
Make boto3 package optional
bennybp Jan 9, 2025
3298c99
Add view creation and attachment to datasets
bennybp Jan 3, 2025
c8dd523
Some more helper functions in the cache classes
bennybp Jan 6, 2025
9543608
Add functionality for downloading & using dataset views
bennybp Jan 6, 2025
163cdce
Add dataset to internal job tracking
bennybp Jan 9, 2025
a7ee70c
Enable external file deletion
bennybp Jan 10, 2025
5b0b97f
Add serial groups to internal jobs
bennybp Jan 10, 2025
8709442
Better handling of cancelling internal jobs
bennybp Jan 13, 2025
8472174
Better output from internal job watch() when it finishes
bennybp Jan 13, 2025
801bc12
Make server side view creation cancellable
bennybp Jan 13, 2025
cd3c64d
Quieter internal job tests
bennybp Jan 13, 2025
0ebbb0c
Create temporary_dir if it doesn't exist
bennybp Jan 13, 2025
38f2da0
Improve dataset testing and remove duplicate submit tests
bennybp Jan 13, 2025
7c1d925
Test temporary_dir creation
bennybp Jan 13, 2025
03369fe
Move update_nested_dict to utils & remove duplicate
bennybp Jan 14, 2025
e36fdba
Remove unused start_api option to QCATesting snowflake
bennybp Jan 14, 2025
6f56eed
Have separate function for generating test config
bennybp Jan 14, 2025
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 3 additions & 5 deletions qcarchivetesting/qcarchivetesting/testing_classes.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,13 +4,14 @@
from copy import deepcopy

from qcarchivetesting import geoip_path, geoip_filename, ip_tests_enabled
from qcfractal.config import DatabaseConfig, update_nested_dict
from qcfractal.config import DatabaseConfig
from qcfractal.db_socket import SQLAlchemySocket
from qcfractal.postgres_harness import PostgresHarness, create_snowflake_postgres
from qcfractal.snowflake import FractalSnowflake
from qcportal import PortalClient, ManagerClient
from qcportal.auth import UserInfo, GroupInfo
from qcportal.managers import ManagerName
from qcportal.utils import update_nested_dict
from .helpers import test_users, test_groups

_activated_manager_programs = {
Expand Down Expand Up @@ -100,7 +101,6 @@ def __init__(
self,
pg_harness: QCATestingPostgresHarness,
encoding: str,
start_api=True,
create_users=False,
enable_security=False,
allow_unauthenticated_read=False,
Expand Down Expand Up @@ -171,9 +171,7 @@ def __init__(
if create_users:
self.create_users()

# Start the flask api process if requested
if start_api:
self.start_api()
self.start_api()

def create_users(self):
# Get a storage socket and add the roles/users/passwords
Expand Down
47 changes: 27 additions & 20 deletions qcarchivetesting/qcarchivetesting/testing_fixtures.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,10 +13,36 @@
from qcfractal.db_socket.socket import SQLAlchemySocket
from qcportal import PortalClient
from qcportal.managers import ManagerName
from qcportal.utils import update_nested_dict
from .helpers import geoip_path, geoip_filename, ip_tests_enabled, test_users
from .testing_classes import QCATestingPostgresServer, QCATestingSnowflake, _activated_manager_programs


def _generate_default_config(pg_harness, extra_config=None) -> FractalConfig:
# Create a configuration. Since this is mostly just for a storage socket,
# We can use defaults for almost all, since a flask server, etc, won't be instantiated
# Also disable connection pooling in the storage socket
# (which can leave db connections open, causing problems when we go to delete
# the database)
cfg_dict = {}
cfg_dict["base_folder"] = pg_harness.config.base_folder
cfg_dict["loglevel"] = "DEBUG"
cfg_dict["database"] = pg_harness.config.dict()
cfg_dict["database"]["pool_size"] = 0
cfg_dict["log_access"] = True

if ip_tests_enabled:
cfg_dict["geoip2_dir"] = geoip_path
cfg_dict["geoip2_filename"] = geoip_filename

cfg_dict["api"] = {"secret_key": secrets.token_urlsafe(32), "jwt_secret_key": secrets.token_urlsafe(32)}

if extra_config:
cfg_dict = update_nested_dict(cfg_dict, extra_config)

return FractalConfig(**cfg_dict)


@pytest.fixture(scope="session")
def postgres_server(tmp_path_factory):
"""
Expand All @@ -40,26 +66,7 @@ def session_storage_socket(postgres_server):
"""

pg_harness = postgres_server.get_new_harness("session_storage")

# Create a configuration. Since this is mostly just for a storage socket,
# We can use defaults for almost all, since a flask server, etc, won't be instantiated
# Also disable connection pooling in the storage socket
# (which can leave db connections open, causing problems when we go to delete
# the database)
cfg_dict = {}
cfg_dict["base_folder"] = pg_harness.config.base_folder
cfg_dict["loglevel"] = "DEBUG"
cfg_dict["database"] = pg_harness.config.dict()
cfg_dict["database"]["pool_size"] = 0
cfg_dict["log_access"] = True

if ip_tests_enabled:
cfg_dict["geoip2_dir"] = geoip_path
cfg_dict["geoip2_filename"] = geoip_filename

cfg_dict["api"] = {"secret_key": secrets.token_urlsafe(32), "jwt_secret_key": secrets.token_urlsafe(32)}
qcf_config = FractalConfig(**cfg_dict)

qcf_config = _generate_default_config(pg_harness)
socket = SQLAlchemySocket(qcf_config)

# Create the template database for use in re-creating the database
Expand Down
3 changes: 3 additions & 0 deletions qcfractal/pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -42,6 +42,9 @@ geoip = [
snowflake = [
"qcfractalcompute"
]
s3 = [
"boto3"
]


[project.urls]
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,19 @@
def upgrade():
# ### commands auto generated by Alembic - please adjust! ###
op.add_column("internal_jobs", sa.Column("repeat_delay", sa.Integer(), nullable=True))

# Remove old periodic tasks
op.execute(
"""DELETE FROM internal_jobs WHERE status IN ('waiting', 'running') AND name IN (
'delete_old_internal_jobs',
'delete_old_access_log',
'iterate_services',
'geolocate_accesses',
'check_manager_heartbeats',
'update_geoip2_file'
)
"""
)
# ### end Alembic commands ###


Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,43 @@
"""Add external files table

Revision ID: 02afa97249c7
Revises: c13116948b54
Create Date: 2025-01-03 10:01:55.717905

"""

import sqlalchemy as sa
from alembic import op
from sqlalchemy.dialects import postgresql

# revision identifiers, used by Alembic.
revision = "02afa97249c7"
down_revision = "c13116948b54"
branch_labels = None
depends_on = None


def upgrade():
# ### commands auto generated by Alembic - please adjust! ###
op.create_table(
"external_file",
sa.Column("id", sa.Integer(), nullable=False),
sa.Column("created_on", sa.TIMESTAMP(), nullable=False),
sa.Column("status", sa.Enum("available", "processing", name="externalfilestatusenum"), nullable=False),
sa.Column("file_type", sa.Enum("dataset_attachment", name="externalfiletypeenum"), nullable=False),
sa.Column("bucket", sa.String(), nullable=False),
sa.Column("file_name", sa.String(), nullable=False),
sa.Column("object_key", sa.String(), nullable=False),
sa.Column("sha256sum", sa.String(), nullable=False),
sa.Column("file_size", sa.BigInteger(), nullable=False),
sa.Column("description", sa.String(), nullable=True),
sa.Column("provenance", postgresql.JSONB(astext_type=sa.Text()), nullable=False),
sa.PrimaryKeyConstraint("id"),
)
# ### end Alembic commands ###


def downgrade():
# ### commands auto generated by Alembic - please adjust! ###
op.drop_table("external_file")
# ### end Alembic commands ###
Original file line number Diff line number Diff line change
@@ -0,0 +1,38 @@
"""Add dataset attachment table

Revision ID: 84285e3620fd
Revises: 02afa97249c7
Create Date: 2025-01-03 10:04:16.201770

"""

import sqlalchemy as sa
from alembic import op

# revision identifiers, used by Alembic.
revision = "84285e3620fd"
down_revision = "02afa97249c7"
branch_labels = None
depends_on = None


def upgrade():
# ### commands auto generated by Alembic - please adjust! ###
op.create_table(
"dataset_attachment",
sa.Column("id", sa.Integer(), nullable=False),
sa.Column("dataset_id", sa.Integer(), nullable=False),
sa.Column("attachment_type", sa.Enum("other", "view", name="datasetattachmenttype"), nullable=False),
sa.ForeignKeyConstraint(["dataset_id"], ["base_dataset.id"], ondelete="cascade"),
sa.ForeignKeyConstraint(["id"], ["external_file.id"], ondelete="cascade"),
sa.PrimaryKeyConstraint("id"),
)
op.create_index("ix_dataset_attachment_dataset_id", "dataset_attachment", ["dataset_id"], unique=False)
# ### end Alembic commands ###


def downgrade():
# ### commands auto generated by Alembic - please adjust! ###
op.drop_index("ix_dataset_attachment_dataset_id", table_name="dataset_attachment")
op.drop_table("dataset_attachment")
# ### end Alembic commands ###
Original file line number Diff line number Diff line change
@@ -0,0 +1,28 @@
"""Add progress description

Revision ID: c13116948b54
Revises: e798462e0c03
Create Date: 2025-01-07 10:35:23.654928

"""

import sqlalchemy as sa
from alembic import op

# revision identifiers, used by Alembic.
revision = "c13116948b54"
down_revision = "e798462e0c03"
branch_labels = None
depends_on = None


def upgrade():
# ### commands auto generated by Alembic - please adjust! ###
op.add_column("internal_jobs", sa.Column("progress_description", sa.String(), nullable=True))
# ### end Alembic commands ###


def downgrade():
# ### commands auto generated by Alembic - please adjust! ###
op.drop_column("internal_jobs", "progress_description")
# ### end Alembic commands ###
Original file line number Diff line number Diff line change
@@ -0,0 +1,35 @@
"""Add dataset internal jobs

Revision ID: 5f6f804e11d3
Revises: 84285e3620fd
Create Date: 2025-01-09 16:25:50.187495

"""

import sqlalchemy as sa
from alembic import op

# revision identifiers, used by Alembic.
revision = "5f6f804e11d3"
down_revision = "84285e3620fd"
branch_labels = None
depends_on = None


def upgrade():
# ### commands auto generated by Alembic - please adjust! ###
op.create_table(
"dataset_internal_job",
sa.Column("internal_job_id", sa.Integer(), nullable=False),
sa.Column("dataset_id", sa.Integer(), nullable=False),
sa.ForeignKeyConstraint(["dataset_id"], ["base_dataset.id"], ondelete="cascade"),
sa.ForeignKeyConstraint(["internal_job_id"], ["internal_jobs.id"], ondelete="cascade"),
sa.PrimaryKeyConstraint("internal_job_id", "dataset_id"),
)
# ### end Alembic commands ###


def downgrade():
# ### commands auto generated by Alembic - please adjust! ###
op.drop_table("dataset_internal_job")
# ### end Alembic commands ###
Original file line number Diff line number Diff line change
@@ -0,0 +1,40 @@
"""Add internal job serial group

Revision ID: 3690c677f8d1
Revises: 5f6f804e11d3
Create Date: 2025-01-10 16:08:36.541807

"""

import sqlalchemy as sa
from alembic import op

# revision identifiers, used by Alembic.
revision = "3690c677f8d1"
down_revision = "5f6f804e11d3"
branch_labels = None
depends_on = None


def upgrade():
# ### commands auto generated by Alembic - please adjust! ###
op.add_column("internal_jobs", sa.Column("serial_group", sa.String(), nullable=True))
op.create_index(
"ux_internal_jobs_status_serial_group",
"internal_jobs",
["status", "serial_group"],
unique=True,
postgresql_where=sa.text("status = 'running'"),
)
# ### end Alembic commands ###


def downgrade():
# ### commands auto generated by Alembic - please adjust! ###
op.drop_index(
"ux_internal_jobs_status_serial_group",
table_name="internal_jobs",
postgresql_where=sa.text("status = 'running'"),
)
op.drop_column("internal_jobs", "serial_group")
# ### end Alembic commands ###
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,6 @@ def module_authtest_snowflake(postgres_server, pytestconfig):
with QCATestingSnowflake(
pg_harness,
encoding,
start_api=True,
create_users=False,
enable_security=True,
allow_unauthenticated_read=False,
Expand Down
Loading
Loading