Skip to content

Commit

Permalink
Merge pull request #92 from LSSTDESC/u/stuart/reformat_registrar
Browse files Browse the repository at this point in the history
Refactor registrar into multiple files
  • Loading branch information
stuartmcalpine authored Mar 7, 2024
2 parents ba2927a + 0fea219 commit 4d02abd
Show file tree
Hide file tree
Showing 21 changed files with 850 additions and 378 deletions.
4 changes: 2 additions & 2 deletions scripts/create_registry_db.py
Original file line number Diff line number Diff line change
Expand Up @@ -240,9 +240,9 @@ def _Dependency(schema, has_production):
# The following should be adjusted whenever there is a change to the structure
# of the database tables.
_DB_VERSION_MAJOR = 2
_DB_VERSION_MINOR = 0
_DB_VERSION_MINOR = 1
_DB_VERSION_PATCH = 0
_DB_VERSION_COMMENT = "Added production dependencies"
_DB_VERSION_COMMENT = "Add dataset status"

# Parse command line arguments
parser = argparse.ArgumentParser(
Expand Down
2 changes: 1 addition & 1 deletion src/cli/register.py
Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,7 @@ def register_dataset(args):
)

# Register new dataset.
new_id = datareg.Registrar.register_dataset(
new_id = datareg.Registrar.dataset.register(
args.relative_path,
args.version,
name=args.name,
Expand Down
20 changes: 8 additions & 12 deletions src/dataregistry/DataRegistry.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,16 +22,17 @@ def __init__(
"""
Primary data registry wrapper class.
Class links to both the Registrar class, to registry new dataset, and
the Query class, to query existing datasets.
The DataRegistry class links to both the Registrar class, to
register/modify/delete datasets, and the Query class, to query existing
datasets.
Links to the database is done automatically using the:
- the users config file (if None defaults are used)
- the passed schema (if None default is used)
- the passed schema (if None the default schema is used)
The `root_dir` is the location the data is copied to. This can be
manually passed, or alternately a predefined `site` can be chosen. If
nether are chosen, the NERSC site will be selected.
nether are chosen, the NERSC site will be selected as the default.
Parameters
----------
Expand Down Expand Up @@ -59,18 +60,13 @@ def __init__(
self.db_connection = DbConnection(config_file, schema=schema, verbose=verbose)

# Work out the location of the root directory
root_dir = self._get_root_dir(root_dir, site)
self.root_dir = self._get_root_dir(root_dir, site)

# Create registrar object
self.Registrar = Registrar(
self.db_connection,
root_dir,
owner=owner,
owner_type=owner_type,
)
self.Registrar = Registrar(self.db_connection, self.root_dir, owner, owner_type)

# Create query object
self.Query = Query(self.db_connection, root_dir)
self.Query = Query(self.db_connection, self.root_dir)

def _get_root_dir(self, root_dir, site):
"""
Expand Down
1 change: 0 additions & 1 deletion src/dataregistry/__init__.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,6 @@
from ._version import __version__
from .db_basic import *
from .registrar import *
from .registrar_util import *
from .query import *
from .git_util import *
from .DataRegistry import DataRegistry
2 changes: 1 addition & 1 deletion src/dataregistry/query.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
from sqlalchemy import text, select
import sqlalchemy.sql.sqltypes as sqltypes
import pandas as pd
from dataregistry.registrar_util import _form_dataset_path
from dataregistry.registrar.registrar_util import _form_dataset_path
from dataregistry.exceptions import DataRegistryNYI, DataRegistryException
import os

Expand Down
1 change: 1 addition & 0 deletions src/dataregistry/registrar/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
from .registrar import Registrar
137 changes: 137 additions & 0 deletions src/dataregistry/registrar/base_table_class.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,137 @@
import os

from dataregistry.db_basic import TableMetadata
from sqlalchemy import select, update
from datetime import datetime

from .registrar_util import (
_bump_version,
_copy_data,
_form_dataset_path,
_name_from_relpath,
_parse_version_string,
_read_configuration_file,
get_directory_info,
)
from .dataset_util import set_dataset_status, get_dataset_status

# Allowed owner types
_OWNER_TYPES = {"user", "project", "group", "production"}

# Default maximum allowed length of configuration file allowed to be ingested
_DEFAULT_MAX_CONFIG = 10000


class BaseTable:
def __init__(self, db_connection, root_dir, owner, owner_type):
"""
Base class to register/modify/delete entries in the database tables.
Each table subclass (e.g., DatasetTable) will inherit this class.
Functions universal to all tables, such as delete and modify are
written here, the register function, and other unique functions for the
tables, are in their respective subclasses.
Parameters
----------
db_connection : DbConnection object
Encompasses sqlalchemy engine, dialect (database backend)
and schema version
root_dir : str
Root directory of the dataregistry on disk
owner : str
To set the default owner for all registered datasets in this
instance.
owner_type : str
To set the default owner_type for all registered datasets in this
instance.
"""

# Root directory on disk for data registry files
self._root_dir = root_dir

# Database engine and dialect.
self._engine = db_connection.engine
self._schema = db_connection.schema

# Link to Table Metadata.
self._metadata_getter = TableMetadata(db_connection)

# Store user id
self._uid = os.getenv("USER")

# Default owner and owner_type's
self._owner = owner
self._owner_type = owner_type

# Allowed owner types
self._OWNER_TYPES = _OWNER_TYPES

# Max configuration file length allowed
self._DEFAULT_MAX_CONFIG = _DEFAULT_MAX_CONFIG

def _get_table_metadata(self, tbl):
return self._metadata_getter.get(tbl)

def delete(self, entry_id):
"""
Delete an entry from the DESC data registry.
Parameters
----------
entry_id : int
Entry we want to delete from the registry
"""

raise NotImplementedError

def modify(self, entry_id, modify_fields):
"""
Modify an entry in the DESC data registry.
Parameters
----------
entry_id : int
The dataset/execution/etc ID we wish to delete from the database
modify_fields : dict
Dict where key is the column to modify (must be allowed to modify)
and value is the desired new value for the entry
"""

raise NotImplementedError

def find_entry(self, entry_id):
"""
Find an entry in the database.
Parameters
----------
entry_id : int
Unique identifier for table entry
e.g., dataset_id for the dataset table
Returns
-------
r : CursorResult object
Found entry (None if no entry found)
"""

# Search for dataset in the registry.
my_table = self._get_table_metadata(self.which_table)

if self.which_table == "dataset":
stmt = select(my_table).where(my_table.c.dataset_id == entry_id)
else:
raise ValueError("Can only perform `find_entry` on dataset table for now")

with self._engine.connect() as conn:
result = conn.execute(stmt)
conn.commit()

# Pull out the single result
for r in result:
return r

# No results found
return None
Loading

0 comments on commit 4d02abd

Please sign in to comment.