diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
index 88a0f860..c9e893e5 100644
--- a/.github/workflows/ci.yml
+++ b/.github/workflows/ci.yml
@@ -74,6 +74,7 @@ jobs:
 
     env:
       DATAREG_CONFIG: "${{ github.workspace }}/config.txt"
+      DATAREG_BACKEND: "postgres"
 
     # Service containers to run with `runner-job`
     services:
@@ -150,6 +151,7 @@ jobs:
 
     env:
       DATAREG_CONFIG: "${{ github.workspace }}/config.txt"
+      DATAREG_BACKEND: "sqlite"
 
     # Our strategy lists the OS and Python versions we want to test on.
     strategy:
diff --git a/.github/workflows/documentation.yml b/.github/workflows/documentation.yml
index 2c7e4af8..ed2295b7 100644
--- a/.github/workflows/documentation.yml
+++ b/.github/workflows/documentation.yml
@@ -10,7 +10,7 @@ jobs:
         run: |
           python -m pip install --upgrade pip
           python -m pip install .
-          pip install sphinx sphinx_rtd_theme sphinx_toolbox sphinxcontrib-autoprogram
+          pip install sphinx sphinx_rtd_theme sphinx_toolbox sphinxcontrib-autoprogram sphinxcontrib.datatemplates
       - name: Sphinx build
         run: |
           sphinx-build docs/source _build
diff --git a/docs/source/_static/css/custom.css b/docs/source/_static/css/custom.css
new file mode 100644
index 00000000..7d2e89e6
--- /dev/null
+++ b/docs/source/_static/css/custom.css
@@ -0,0 +1,3 @@
+.tight-table td {
+    white-space: normal !important;
+}
diff --git a/docs/source/conf.py b/docs/source/conf.py
index 8b2fa02e..6bbbb460 100644
--- a/docs/source/conf.py
+++ b/docs/source/conf.py
@@ -5,7 +5,8 @@
     "sphinx_rtd_theme",
     "sphinx.ext.autodoc",
     'sphinx.ext.napoleon',
-    'sphinxcontrib.autoprogram'
+    'sphinxcontrib.autoprogram',
+    'sphinxcontrib.datatemplates'
 ]
 
 project = 'DESC data management'
@@ -36,3 +37,9 @@
 html_logo = '_static/DREGS_logo_v2.png'
 
 autoclass_content = 'both'
+
+templates_path = ['templates']
+
+html_css_files = [
+    'css/custom.css',
+]
diff --git a/docs/source/reference_schema.rst b/docs/source/reference_schema.rst
index 9494ac52..66da4bc2 100644
--- a/docs/source/reference_schema.rst
+++ b/docs/source/reference_schema.rst
@@ -7,248 +7,5 @@ database (e.g., the default and production schemas) follows the same structure.
 .. image:: _static/schema_plot.png
    :alt: Image missing
 
-The dataset table
------------------
-
-.. list-table::
-   :header-rows: 1
-
-   * - row
-     - description
-     - type
-   * - ``dataset_id``
-     - Unique identifier for dataset
-     - int
-   * - ``name``
-     - User given name for dataset
-     - str
-   * - ``relative_path``
-     - Relative path storing the data, relative to `<root_dir>` 
-     - str
-   * - ``version_major``
-     - Major version in semantic string (i.e., X.x.x)
-     - int
-   * - ``version_minor``
-     - Minor version in semantic string (i.e., x.X.x)
-     - int
-   * - ``version_patch``
-     - Patch version in semantic string (i.e., x.x.X)
-     - int
-   * - ``version_suffix``
-     - Optional version suffix
-     - str
-   * - ``dataset_creation_date``
-     - Dataset creation date
-     - datetime
-   * - ``is_archived``
-     - True if the data is archived, i.e, the data is longer within `<root_dir>`
-     - bool
-   * - ``is_external_link``
-     - ???
-     - bool
-   * - ``is_overwritten``
-     - True if the original data for this dataset has been overwritten at some point. This would have required that ``is_overwritable`` was set to ``true`` on the original dataset  
-     - bool
-   * - ``is_valid``
-     - ???
-     - bool
-   * - ``register_date``
-     - Date the dataset was registered 
-     - datetime
-   * - ``creator_uid``
-     - `uid` (user id) of the person that registered the dataset
-     - str
-   * - ``access_API``
-     - Describes the software that can read the dataset (e.g., "gcr-catalogs", "skyCatalogs")
-     - str
-   * - ``execution_id``
-     - ID of execution this dataset belongs to
-     - int
-   * - ``description``
-     - User provided description of the dataset
-     - str
-   * - ``owner_type``
-     - Datasets owner type, can be "user", "group", "project" or "production".
-     - str
-   * - ``owner``
-     - Owner of the dataset
-     - str
-   * - ``data_org``
-     - Dataset organisation ("file" or "directory")
-     - str
-   * - ``nfiles``
-     - How many files are in the dataset
-     - int
-   * - ``total_disk_space``
-     - Total disk spaced used by the dataset
-     - float
-
-The dataset_alias table
------------------------
-
-.. list-table::
-   :header-rows: 1
-
-   * - row
-     - description
-     - type
-   * - ``dataset_alias_id``
-     - Unique identifier for alias
-     - int
-   * - ``name``
-     - User given alias name
-     - str
-   * - ``dataset_id``
-     - ID of dataset this is an alias for
-     - int
-   * - ``supersede_date``
-     - If a new entry has been added to the table with the same alias name (but
-       different dataset_id), the old entry will be superseded. ``supersede_date``
-       in the old entry tracks when this happened. If the entry has not been
-       superseded, ``supersede_date`` will be None
-     - datetime
-   * - ``register_date``
-     - Date the dataset was registered
-     - datetime
-   * - ``creator_uid``
-     - `uid` (user id) of the person that registered the dataset
-     - str
-
-The dependency table
---------------------
-
-.. list-table::
-   :header-rows: 1
-
-   * - row
-     - description
-     - type
-   * - ``dependency_id``
-     - Unique identifier for dependency
-     - int
-   * - ``execution_id``
-     - Execution this dependency is linked to
-     - int
-   * - ``input_id``
-     - Dataset ID of the dependent dataset
-     - int
-   * - ``register_date``
-     - Date the dependency was registered
-     - datetime
-
-The execution table
--------------------
-
-.. list-table::
-   :header-rows: 1
-
-   * - row
-     - description
-     - type
-   * - ``execution_id``
-     - Unique identifier for execution
-     - int
-   * - ``description``
-     - User given discription of execution
-     - str
-   * - ``name``
-     - User given execution name
-     - str
-   * - ``register_date``
-     - Date the execution was registered
-     - datetime
-   * - ``execution_start``
-     - Date the execution started
-     - datetime
-   * - ``locale``
-     - Locale of execution (e.g., NERSC)
-     - str
-   * - ``configuration``
-     - Path to configuration file of execution
-     - str
-   * - ``creator_uid``
-     - `uid` (user id) of the person that registered the dataset
-     - str
-
-The execution_alias table
--------------------------
-
-.. list-table::
-   :header-rows: 1
-
-   * - row
-     - description
-     - type
-   * - ``execution_alias_id``
-     - Unique identifier for execution alias
-     - int
-   * - ``execution_id``
-     - Execution this alias is linked to
-     - int
-   * - ``alias``
-     - User given execution alias name
-     - str
-   * - ``register_date``
-     - Date the execution was registered
-     - datetime
-   * - ``supersede_date``
-     - If a new entry has been added to the table with the same alias name (but
-       different dataset_id), the old entry will be superseded. ``supersede_date``
-       in the old entry tracks when this happened. If the entry has not been
-       superseded, ``supersede_date`` will be None
-     - datetime
-   * - ``creator_uid``
-     - `uid` (user id) of the person that registered the dataset
-     - str
-
-The provenance table
---------------------
-
-.. list-table::
-   :header-rows: 1
-
-   * - row
-     - description
-     - type
-   * - ``provenance_id``
-     - Unique identifier for provenance
-     - int
-   * - ``code_version_major``
-     - Major version of code when this schema was created
-     - int
-   * - ``code_version_minor``
-     - Minor version of code when this schema was created
-     - int
-   * - ``code_version_patch``
-     - Patch version of code when this schema was created
-     - int
-   * - ``code_version_suffix``
-     - Version suffix of code when this schema was created
-     - str
-   * - ``db_version_major``
-     - Major version of database
-     - int
-   * - ``db_version_minor``
-     - Minor version of database
-     - int
-   * - ``db_version_patch``
-     - Patch version of database
-     - int
-   * - ``git_hash``
-     - Git commit hash when this schema was created
-     - str
-   * - ``repo_is_clean``
-     - Was repository clean when this schema was created
-     - bool
-   * - ``update_method``
-     - "CREATE", "MODIFY" or "MIGRATE"
-     - str
-   * - ``schema_enabled_date``
-     - When was the schema enabled
-     - datetime
-   * - ``creator_uid``
-     - `uid` (user id) of the person that registered the schema
-     - str
-   * - ``comment``
-     - Any comment
-     - str
+.. datatemplate:yaml:: ../../src/dataregistry/schema/schema.yaml
+   :template: schema_table.tmpl
diff --git a/docs/source/templates/schema_table.tmpl b/docs/source/templates/schema_table.tmpl
new file mode 100644
index 00000000..7abbbce6
--- /dev/null
+++ b/docs/source/templates/schema_table.tmpl
@@ -0,0 +1,22 @@
+.. -*- mode: rst -*-
+
+{% for table in ['execution','provenance','execution_alias','dataset','dependency','dataset_alias'] %}
+
+The {{table}} table
+----------------------------------------
+
+.. list-table::
+   :header-rows: 1
+   :class: tight-table 
+
+   * - row
+     - description
+     - type
+
+{% for item in data[table] %}
+   * - {{item}}
+{% for item2 in ['description', 'type'] %}
+     - {{data[table][item][item2]}}
+{% endfor %}
+{% endfor %}
+{% endfor %}
diff --git a/pyproject.toml b/pyproject.toml
index c15b3459..8cc09883 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -38,4 +38,4 @@ where = ["src"]
 dregs = "cli.cli:main"
 
 [tool.setuptools.package-data]
-"dataregistry" = ["site_config/site_rootdir.yaml"]
+"dataregistry" = ["site_config/site_rootdir.yaml", "schema/schema.yaml"]
diff --git a/scripts/create_registry_db.py b/scripts/create_registry_db.py
index c63c1cd1..4c21d302 100644
--- a/scripts/create_registry_db.py
+++ b/scripts/create_registry_db.py
@@ -2,11 +2,21 @@
 import sys
 import argparse
 from datetime import datetime
-from sqlalchemy import Column, Integer, String, DateTime, Boolean, Index, Float
+from sqlalchemy import (
+    Column,
+    ColumnDefault,
+    Integer,
+    String,
+    DateTime,
+    Boolean,
+    Index,
+    Float,
+)
 from sqlalchemy import ForeignKey, UniqueConstraint, text
 from sqlalchemy.orm import relationship, DeclarativeBase
 from dataregistry.db_basic import DbConnection, SCHEMA_VERSION
-from dataregistry.db_basic import add_table_row, _insert_provenance
+from dataregistry.db_basic import _insert_provenance
+from dataregistry.schema import load_schema
 
 """
 A script to create the default dataregistry schema and the production schema.
@@ -20,14 +30,96 @@
     - "provenance"      : Contains information about the database/schema 
 """
 
+# Conversion from string types in `schema.yaml` to SQLAlchemy
+_TYPE_TRANSLATE = {
+    "String": String,
+    "Integer": Integer,
+    "DateTime": DateTime,
+    "StringShort": String(20),
+    "StringLong": String(250),
+    "Boolean": Boolean,
+    "Float": Float,
+}
+
+# Load the schema from the `schema.yaml` file
+schema_yaml = load_schema()
+
+
+def _get_column_definitions(schema, table):
+    """
+    Build the SQLAlchemy `Column` list for this table from the information in
+    the `schema.yaml` file.
+
+    Parameters
+    ----------
+    schema : str
+    table : str
+
+    Returns
+    -------
+    return_dict : dict
+        SQLAlchemy Column entries for each table
+    """
+
+    return_dict = {}
+    for column in schema_yaml[table].keys():
+        # Special case where column has a foreign key
+        if schema_yaml[table][column]["foreign_key"]:
+            if schema_yaml[table][column]["foreign_key_schema"] == "self":
+                schema_yaml[table][column]["foreign_key_schema"] = schema
+
+            return_dict[column] = Column(
+                column,
+                _TYPE_TRANSLATE[schema_yaml[table][column]["type"]],
+                ForeignKey(
+                    _get_ForeignKey_str(
+                        schema_yaml[table][column]["foreign_key_schema"],
+                        schema_yaml[table][column]["foreign_key_table"],
+                        schema_yaml[table][column]["foreign_key_column"],
+                    )
+                ),
+                primary_key=schema_yaml[table][column]["primary_key"],
+                nullable=schema_yaml[table][column]["nullable"],
+            )
+
+        # Normal case
+        else:
+            return_dict[column] = Column(
+                column,
+                _TYPE_TRANSLATE[schema_yaml[table][column]["type"]],
+                primary_key=schema_yaml[table][column]["primary_key"],
+                nullable=schema_yaml[table][column]["nullable"],
+            )
+
+    return return_dict
+
+
 class Base(DeclarativeBase):
     pass
 
-def _get_ForeignKey_str(schema, table, row):
+
+def _get_ForeignKey_str(schema, table, column):
+    """
+    Get the string reference to the "<shema>.<table>.<column>" a foreign key will
+    point to.
+
+    The schema address will only be included for postgres backends.
+
+    Parameters
+    ---------
+    schema : str
+    table : str
+    column : str
+
+    Returns
+    -------
+    - : str
+    """
+
     if schema is None:
-        return f"{table}.{row}"
+        return f"{table}.{column}"
     else:
-        return f"{schema}.{table}.{row}"
+        return f"{schema}.{table}.{column}"
 
 
 def _Provenance(schema):
@@ -35,30 +127,13 @@ def _Provenance(schema):
 
     class_name = f"{schema}_provenance"
 
-    # Rows
-    rows = {
-        "provenance_id": Column("provenance_id", Integer, primary_key=True),
-        "code_version_major": Column("code_version_major", Integer, nullable=False),
-        "code_version_minor": Column("code_version_minor", Integer, nullable=False),
-        "code_version_patch": Column("code_version_patch", Integer, nullable=False),
-        "code_version_suffix": Column("code_version_suffix", String),
-        "db_version_major": Column("db_version_major", Integer, nullable=False),
-        "db_version_minor": Column("db_version_minor", Integer, nullable=False),
-        "db_version_patch": Column("db_version_patch", Integer, nullable=False),
-        "git_hash": Column("git_hash", String, nullable=True),
-        "repo_is_clean": Column("repo_is_clean", Boolean, nullable=True),
-        # update method is always "CREATE" for this script.
-        # Alternative could be "MODIFY" or "MIGRATE"
-        "update_method": Column("update_method", String(10), nullable=False),
-        "schema_enabled_date": Column("schema_enabled_date", DateTime, nullable=False),
-        "creator_uid": Column("creator_uid", String(20), nullable=False),
-        "comment": Column("comment", String(250)),
-    }
+    # Load columns from `schema.yaml` file
+    columns = _get_column_definitions(schema, "provenance")
 
     # Table metadata
     meta = {"__tablename__": "provenance", "__table_args__": {"schema": schema}}
 
-    Model = type(class_name, (Base,), {**rows, **meta})
+    Model = type(class_name, (Base,), {**columns, **meta})
     return Model
 
 
@@ -67,24 +142,13 @@ def _Execution(schema):
 
     class_name = f"{schema}_execution"
 
-    # Rows
-    rows = {
-        "execution_id": Column("execution_id", Integer, primary_key=True),
-        "description": Column("description", String),
-        "register_date": Column("register_date", DateTime, nullable=False),
-        "execution_start": Column("execution_start", DateTime),
-        # name is meant to identify the code executed.  E.g., could be pipeline name
-        "name": Column("name", String),
-        # locale is, e.g. site where code was run
-        "locale": Column("locale", String),
-        "configuration": Column("configuration", String),
-        "creator_uid": Column("creator_uid", String(20), nullable=False),
-    }
+    # Load columns from `schema.yaml` file
+    columns = _get_column_definitions(schema, "execution")
 
     # Table metadata
     meta = {"__tablename__": "execution", "__table_args__": {"schema": schema}}
 
-    Model = type(class_name, (Base,), {**rows, **meta})
+    Model = type(class_name, (Base,), {**columns, **meta})
     return Model
 
 
@@ -93,18 +157,8 @@ def _ExecutionAlias(schema):
 
     class_name = f"{schema}_execution_alias"
 
-    # Rows
-    rows = {
-        "execution_alias_id": Column("execution_alias_id", Integer, primary_key=True),
-        "alias": Column(String, nullable=False),
-        "execution_id": Column(
-            Integer,
-            ForeignKey(_get_ForeignKey_str(schema, "execution", "execution_id")),
-        ),
-        "supersede_date": Column(DateTime, default=None),
-        "register_date": Column(DateTime, nullable=False),
-        "creator_uid": Column(String(20), nullable=False),
-    }
+    # Load columns from `schema.yaml` file
+    columns = _get_column_definitions(schema, "execution_alias")
 
     # Table metadata
     meta = {
@@ -115,7 +169,7 @@ def _ExecutionAlias(schema):
         ),
     }
 
-    Model = type(class_name, (Base,), {**rows, **meta})
+    Model = type(class_name, (Base,), {**columns, **meta})
     return Model
 
 
@@ -124,17 +178,8 @@ def _DatasetAlias(schema):
 
     class_name = f"{schema}_dataset_alias"
 
-    # Rows
-    rows = {
-        "dataset_alias_id": Column(Integer, primary_key=True),
-        "alias": Column(String, nullable=False),
-        "dataset_id": Column(
-            Integer, ForeignKey(_get_ForeignKey_str(schema, "dataset", "dataset_id"))
-        ),
-        "supersede_date": Column(DateTime, default=None),
-        "register_date": Column(DateTime, nullable=False),
-        "creator_uid": Column(String(20), nullable=False),
-    }
+    # Load columns from `schema.yaml` file
+    columns = _get_column_definitions(schema, "dataset_alias")
 
     # Table metadata
     meta = {
@@ -145,7 +190,7 @@ def _DatasetAlias(schema):
         ),
     }
 
-    Model = type(class_name, (Base,), {**rows, **meta})
+    Model = type(class_name, (Base,), {**columns, **meta})
     return Model
 
 
@@ -154,49 +199,8 @@ def _Dataset(schema):
 
     class_name = f"{schema}_dataset"
 
-    # Rows
-    rows = {
-        "dataset_id": Column(Integer, primary_key=True),
-        "name": Column(String, nullable=False),
-        "relative_path": Column(String, nullable=False),
-        "version_major": Column(Integer, nullable=False),
-        "version_minor": Column(Integer, nullable=False),
-        "version_patch": Column(Integer, nullable=False),
-        "version_string": Column(String, nullable=False),
-        "version_suffix": Column(String),
-        "dataset_creation_date": Column(DateTime),
-        "is_archived": Column(Boolean, default=False),
-        "is_external_link": Column(Boolean, default=False),
-        "is_overwritable": Column(Boolean, default=False),
-        "is_overwritten": Column(Boolean, default=False),
-        "is_valid": Column(Boolean, default=True),  # False if, e.g., copy failed
-        # The following are boilerplate, included in all or most tables
-        "register_date": Column(DateTime, nullable=False),
-        "creator_uid": Column(String(20), nullable=False),
-        # Make access_API a string for now, but it could be an enumeration or
-        # a foreign key into another table.   Possible values for the column
-        # might include "gcr-catalogs", "skyCatalogs"
-        "access_API": Column("access_API", String(20)),
-        # Optional configuration file associated with access API
-        "access_API_configuration": Column("configuration", String),
-        # A way to associate a dataset with a program execution or "run"
-        "execution_id": Column(
-            Integer,
-            ForeignKey(_get_ForeignKey_str(schema, "execution", "execution_id")),
-        ),
-        "description": Column(String),
-        "owner_type": Column(String, nullable=False),
-        # If ownership_type is 'production', then owner is always 'production'
-        # If ownership_type is 'group', owner will be a group name
-        # If ownership_type is 'user', owner will be a user name
-        "owner": Column(String, nullable=False),
-        # To store metadata about the dataset.
-        "data_org": Column("data_org", String, nullable=False),
-        "nfiles": Column("nfiles", Integer, nullable=False),
-        "total_disk_space": Column("total_disk_space", Float, nullable=False),
-        # What `root_dir` was the data originially ingested into
-        "register_root_dir": Column(String, nullable=False),
-    }
+    # Load columns from `schema.yaml` file
+    columns = _get_column_definitions(schema, "dataset")
 
     # Table metadata
     meta = {
@@ -210,7 +214,7 @@ def _Dataset(schema):
         ),
     }
 
-    Model = type(class_name, (Base,), {**rows, **meta})
+    Model = type(class_name, (Base,), {**columns, **meta})
     return Model
 
 
@@ -219,30 +223,17 @@ def _Dependency(schema, has_production):
 
     class_name = f"{schema}_dependency"
 
-    # Rows
-    rows = {
-        "dependency_id": Column(Integer, primary_key=True),
-        "register_date": Column(DateTime, nullable=False),
-        "execution_id": Column(
-            Integer,
-            ForeignKey(_get_ForeignKey_str(schema, "execution", "execution_id")),
-        ),
-        "input_id": Column(
-            Integer, ForeignKey(_get_ForeignKey_str(schema, "dataset", "dataset_id"))
-        ),
-    }
+    # Load columns from `schema.yaml` file
+    columns = _get_column_definitions(schema, "dependency")
+
+    # Remove link to production schema.
+    if not has_production:
+        del columns["input_production_id"]
 
-    # Add link to production schema.
-    if has_production:
-        rows["input_production_id"] = Column(
-            Integer,
-            ForeignKey(_get_ForeignKey_str("production", "dataset", "dataset_id")),
-        )
-    
     # Table metadata
     meta = {"__tablename__": "dependency", "__table_args__": {"schema": schema}}
 
-    Model = type(class_name, (Base,), {**rows, **meta})
+    Model = type(class_name, (Base,), {**columns, **meta})
     return Model
 
 
@@ -290,7 +281,7 @@ def _Dependency(schema, has_production):
         stmt = f"CREATE SCHEMA IF NOT EXISTS {SCHEMA}"
         conn.execute(text(stmt))
         conn.commit()
-        
+
 # Grant reg_reader access
 acct = "reg_reader"
 for SCHEMA in SCHEMA_LIST:
diff --git a/src/cli/cli.py b/src/cli/cli.py
index ed07b67d..a60ecc9d 100644
--- a/src/cli/cli.py
+++ b/src/cli/cli.py
@@ -4,6 +4,7 @@
 from dataregistry.db_basic import SCHEMA_VERSION
 from .register import register_dataset
 from .query import dregs_ls
+from dataregistry.schema import load_schema
 
 # ---------------------
 # The data registry CLI
@@ -45,6 +46,20 @@
 # Register a dataset
 # ------------------
 
+# Load the schema information
+schema_data = load_schema()
+
+# Conversion from string types in `schema.yaml` to SQLAlchemy
+_TYPE_TRANSLATE = {
+    "String": str,
+    "Integer": int,
+    "DateTime": str,
+    "StringShort": str,
+    "StringLong": str,
+    "Boolean": bool,
+    "Float": float,
+}
+
 # Register a new database entry.
 arg_register = subparsers.add_parser(
     "register", help="Register a new entry to the database"
@@ -57,6 +72,36 @@
 # Register a new dataset.
 arg_register_dataset = arg_register_sub.add_parser("dataset", help="Register a dataset")
 
+# Get some information from the `schema.yaml` file
+for column in schema_data["dataset"]:
+    extra_args = {}
+
+    # Any default?
+    if schema_data["dataset"][column]["cli_default"] is not None:
+        extra_args["default"] = schema_data["dataset"][column]["cli_default"]
+        default_str = f" (default={extra_args['default']})"
+    else:
+        default_str = ""
+
+    # Restricted to choices?
+    if schema_data["dataset"][column]["choices"] is not None:
+        extra_args["choices"] = schema_data["dataset"][column]["choices"]
+
+    # Is this a boolean flag?
+    if schema_data["dataset"][column]["type"] == "Boolean":
+        extra_args["action"] = "store_true"
+    else:
+        extra_args["type"] = _TYPE_TRANSLATE[schema_data["dataset"][column]["type"]]
+
+    # Add flag
+    if schema_data["dataset"][column]["cli_optional"]:
+        arg_register_dataset.add_argument(
+            "--" + column,
+            help=schema_data["dataset"][column]["description"] + default_str,
+            **extra_args,
+        )
+
+# Entries unique to registering the dataset using the CLI
 arg_register_dataset.add_argument(
     "relative_path",
     help=(
@@ -75,45 +120,6 @@
     ),
     type=str,
 )
-arg_register_dataset.add_argument(
-    "--version_suffix",
-    help=(
-        "Optional suffix string to place at the end of the version string."
-        "Cannot be used for production datasets."
-    ),
-    type=str,
-)
-arg_register_dataset.add_argument(
-    "--name",
-    help=(
-        "Any convenient, evocative name for the human. Note the combination of"
-        "name, version and version_suffix must be unique. If None name is generated"
-        "from the relative path."
-    ),
-    type=str,
-)
-arg_register_dataset.add_argument(
-    "--creation_date", help="Manually set creation date of dataset"
-)
-arg_register_dataset.add_argument(
-    "--description", help="Human-readable description of dataset", type=str
-)
-arg_register_dataset.add_argument(
-    "--execution_id",
-    help="Used to associate dataset with a particular execution",
-    type=int,
-)
-arg_register_dataset.add_argument(
-    "--access_API", help="Hint as to how to read the data", type=str
-)
-arg_register_dataset.add_argument(
-    "--is_overwritable",
-    help=(
-        "True if dataset may be overwritten (defaults to False). Production"
-        "datasets cannot be overwritten."
-    ),
-    action="store_true",
-)
 arg_register_dataset.add_argument(
     "--old_location",
     help=(
@@ -137,18 +143,6 @@
     default=f"{SCHEMA_VERSION}",
     help="Which schema to connect to",
 )
-arg_register_dataset.add_argument(
-    "--locale",
-    help="Location where dataset was produced",
-    type=str,
-    default="NERSC",
-)
-arg_register_dataset.add_argument(
-    "--owner", help="Owner of dataset. Defaults to $USER."
-)
-arg_register_dataset.add_argument(
-    "--owner-type", choices=["production", "group", "user"], default="user"
-)
 arg_register_dataset.add_argument(
     "--config_file", help="Location of data registry config file", type=str
 )
diff --git a/src/cli/register.py b/src/cli/register.py
index 98340e91..ca74870b 100644
--- a/src/cli/register.py
+++ b/src/cli/register.py
@@ -9,7 +9,7 @@ def register_dataset(args):
     Parameters
     ----------
     args : argparse object
-    
+
     args.config_file : str
         Path to data registry config file
     args.schema : str
@@ -38,11 +38,15 @@ def register_dataset(args):
         name=args.name,
         version_suffix=args.version_suffix,
         creation_date=args.creation_date,
+        access_API=args.access_API,
+        execution_id=args.execution_id,
+        is_overwritable=args.is_overwritable,
         description=args.description,
         old_location=args.old_location,
         copy=(not args.make_symlink),
         is_dummy=args.is_dummy,
         owner=args.owner,
+        owner_type=args.owner_type,
         execution_name=args.execution_name,
         execution_description=args.execution_description,
         execution_start=args.execution_start,
diff --git a/src/dataregistry/registrar.py b/src/dataregistry/registrar.py
index 7411a69e..e0d51ee4 100644
--- a/src/dataregistry/registrar.py
+++ b/src/dataregistry/registrar.py
@@ -229,22 +229,19 @@ def register_execution(
         """
         Register a new execution in the DESC data registry.
 
+        Any args marked with '**' share their name with the associated column
+        in the registry schema. Descriptions of what these columns are can be
+        found in `schema.yaml` or the documentation.
+
         Parameters
         ----------
-        name : str
-            Typically pipeline name or program name
-        description : str, optional
-            Human readible description of execution
-        execution_start : datetime, optional
-            Date the execution started
-        locale : str, optional
-            Where was the execution performed?
-        configuration : str, optional
-            Path to text file used to configure the execution
-        input_datasets : list, optional
-            List of dataset ids that were the input to this execution
-        input_production_datasets : list, optional
-            List of production dataset ids that were the input to this execution
+        name** : str
+        description** : str, optional
+        execution_start** : datetime, optional
+        locale** : str, optional
+        configuration** : str, optional
+        input_datasets** : list, optional
+        input_production_datasets** : list, optional
         max_config_length : int, optional
             Maxiumum number of lines to read from a configuration file
 
@@ -323,39 +320,21 @@ def register_dataset(
         """
         Register a new dataset in the DESC data registry.
 
+        Any args marked with '**' share their name with the associated column
+        in the registry schema. Descriptions of what these columns are can be
+        found in `schema.yaml` or the documentation.
+
         Parameters
         ----------
-        relative_path : str
-            Destination for the dataset within the data registry. Path is
-            relative to ``<registry root>/<owner_type>/<owner>``.
-        version : str
-            Semantic version string of the format MAJOR.MINOR.PATCH *or*
-            a special flag: "patch", "minor" or "major".
-
-            When a special flag is used it automatically bumps the relative
-            version for you (see examples for more details).
-        version_suffix : str, optional
-            Suffix string to place at the end of the version string. Cannot be
-            used for production datasets.
-        name : str, optional
-            Any convenient, evocative name for the human.
-
-            Note the combination of name, version and version_suffix must be
-            unique.
-        creation_date : datetime, optional
-            Manually set creation date of dataset
-        description : str, optional
-            Human-readable description of dataset
-        execution_id : int, optional
-            Used to associate dataset with a particular execution
-        access_API : str, optional
-            Hint as to how to read the data
-        access_API_configuration : str, optional
-            Path to configuration file for `access_API`
-        is_overwritable : bool, optional
-            True if dataset may be overwritten (defaults to False).
-
-            Note production datasets cannot be overwritten.
+        relative_path** : str
+        version** : str
+        version_suffix** : str, optional
+        name** : str, optional
+        creation_date** : datetime, optional
+        description** : str, optional
+        execution_id** : int, optional
+        access_API** : str, optional
+        is_overwritable** : bool, optional
         old_location : str, optional
             Absolute location of dataset to copy into the data registry.
 
@@ -370,23 +349,13 @@ def register_dataset(
             only)
         verbose : bool, optional
             Provide some additional output information
-        owner : str, optional
-            Owner of the dataset. If None, defaults to what was set in
-            Registrar __init__, if that is also None, defaults to $USER.
-        owner_type : str, optional
-            Owner type: "user", "group", or "production". If None, defaults to
-            what was set in Registrar __init__, if that is also None, defaults
-            to "user".
-        execution_name : str, optional
-            Typically pipeline name or program name
-        execution_description : str, optional
-            Human readible description of execution
-        execution_start : datetime, optional
-            Date the execution started
-        execution_locale : str, optional
-            Where was the execution performed?
-        execution_configuration : str, optional
-            Path to text file used to configure the execution
+        owner** : str, optional
+        owner_type** : str, optional
+        execution_name** : str, optional
+        execution_description** : str, optional
+        execution_start** : datetime, optional
+        execution_locale** : str, optional
+        execution_configuration** : str, optional
         input_datasets : list, optional
             List of dataset ids that were the input to this execution
         input_production_datasets : list, optional
@@ -506,10 +475,10 @@ def register_dataset(
         if version_suffix:
             values["version_suffix"] = version_suffix
         if creation_date:
-            values["dataset_creation_date"] = creation_date
+            values["creation_date"] = creation_date
         else:
             if ds_creation_date:
-                values["dataset_creation_date"] = ds_creation_date
+                values["creation_date"] = ds_creation_date
         if description:
             values["description"] = description
         if execution_id:
@@ -520,6 +489,9 @@ def register_dataset(
             values["access_API_configuration"] = _read_configuration_file(access_API_configuration, max_config_length)
         values["is_overwritable"] = is_overwritable
         values["is_overwritten"] = False
+        values["is_external_link"] = False
+        values["is_archived"] = False
+        values["is_valid"] = True
         values["register_date"] = datetime.now()
         values["owner_type"] = owner_type
         values["owner"] = owner
@@ -549,12 +521,14 @@ def register_dataset_alias(self, aliasname, dataset_id):
         """
         Register a new dataset alias in the DESC data registry.
 
+        Any args marked with '**' share their name with the associated column
+        in the registry schema. Descriptions of what these columns are can be
+        found in `schema.yaml` or the documentation.
+
         Parameters
         ----------
-        aliasname : str
-            Human readible alias for the dataset
-        dataset_id : int
-            Existing dataset ID to attach dataset alias to
+        aliasname** : str
+        dataset_id** : int
 
         Returns
         -------
diff --git a/src/dataregistry/schema/__init__.py b/src/dataregistry/schema/__init__.py
new file mode 100644
index 00000000..329fa0e8
--- /dev/null
+++ b/src/dataregistry/schema/__init__.py
@@ -0,0 +1 @@
+from .load_schema import load_schema
diff --git a/src/dataregistry/schema/load_schema.py b/src/dataregistry/schema/load_schema.py
new file mode 100644
index 00000000..fbd0f238
--- /dev/null
+++ b/src/dataregistry/schema/load_schema.py
@@ -0,0 +1,40 @@
+import os
+import yaml
+
+
+def _populate_defaults(mydict):
+    """
+    Populate the default values for rows that haven't been specified in the
+    YAML file.
+
+    Parameters
+    ----------
+    mydict : dict
+    """
+
+    # Attributes we check for and populate with these default value if missing
+    atts = {"nullable": True, "primary_key": False, "foreign_key": False, "cli_optional": False,
+            "cli_default": None, "choices": None}
+
+    # Loop over eah row and ingest
+    for table in mydict.keys():
+        for row in mydict[table].keys():
+            for att in atts.keys():
+                if att not in mydict[table][row].keys():
+                    mydict[table][row][att] = atts[att]
+
+
+def load_schema():
+    """Load the schema layout from the YAML file"""
+
+    # Load
+    yaml_file_path = os.path.join(
+        os.path.dirname(os.path.abspath(__file__)), "schema.yaml"
+    )
+    with open(yaml_file_path, "r") as file:
+        yaml_data = yaml.safe_load(file)
+
+    # Populate defaults
+    _populate_defaults(yaml_data)
+
+    return yaml_data
diff --git a/src/dataregistry/schema/schema.yaml b/src/dataregistry/schema/schema.yaml
new file mode 100644
index 00000000..972fa4e5
--- /dev/null
+++ b/src/dataregistry/schema/schema.yaml
@@ -0,0 +1,282 @@
+---
+execution:
+  execution_id:
+    type: "Integer"
+    primary_key: True
+    description: "Unique identifier for execution"
+  description:
+    type: "String"
+    description: "Short description of execution"
+  register_date:
+    type: "DateTime"
+    description: "When was the execution registered in the database"
+    nullable: False
+  execution_start:
+    type: "DateTime"
+    description: "When was the execution performed at `locale`"
+  name:
+    type: "String"
+    description: "Identifies the code executed (e.g., could be pipeline name)"
+  locale:
+    type: "String"
+    description: "Site where the code was run (e.g., NERSC)"
+  configuration:
+    type: "String"
+    description: "Path to execution configuration file (txt, YAML, TOML, etc). Ingested as raw text"
+  creator_uid:
+    type: "StringShort"
+    description: "UID of person who registered the entry"
+    nullable: False
+
+provenance:
+  provenance_id:
+    type: "Integer"
+    primary_key: True
+    description: "Unique identifier for this provenance entry"
+  code_version_major:
+    type: "Integer"
+    description: "Major version of code when this schema was created"
+    nullable: False
+  code_version_minor:
+    type: "Integer"
+    description: "Minor version of code when this schema was created"
+    nullable: False
+  code_version_patch:
+    type: "Integer"
+    description: "Patch version of code when this schema was created"
+    nullable: False
+  code_version_suffix:
+    type: "String"
+    description: "Version suffix of code when this schema was created"
+  creator_uid:
+    type: "StringShort"
+    description: "UID of person who registered the entry"
+    nullable: False
+  db_version_major:
+    type: "Integer"
+    description: "Major version of schema"
+    nullable: False
+  db_version_minor:
+    type: "Integer"
+    description: "Minor version of schema"
+    nullable: False
+  db_version_patch:
+    type: "Integer"
+    description: "Patch version of schema"
+    nullable: False
+  git_hash:
+    type: "String"
+    description: "Git hash at time of schema creation"
+  repo_is_clean:
+    type: "Boolean"
+    description: "Was git repo clean at schema creation?"
+  update_method:
+    type: "String"
+    description: "What type of schema update does this entry relate to ('CREATE','MODIFY','MIGRATE')"
+    nullable: False
+  schema_enabled_date:
+    type: "DateTime"
+    description: "Date schema was created/updated"
+    nullable: False
+  comment:
+    type: "StringLong"
+    description: "Comment relating to new provenance entry"
+
+execution_alias:
+  execution_alias_id:
+    description: "Unique identifier for execution alias"
+    type: "Integer"
+    primary_key: True
+  supersede_date:
+    type: "DateTime"
+    description: "If a new entry has been added to the table with the same alias name (but different `dataset_id`), the old entry will be superseded. `supersede_date` in the old entry tracks when this happened. If the entry has not been superseded, `supersede_date` will be None"
+  creator_uid:
+    type: "StringShort"
+    description: "UID of person who registered the entry"
+    nullable: False
+  register_date:
+    type: "DateTime"
+    description: "Date the execution alias was registered"
+    nullable: False
+  alias:
+    type: "String"
+    description: "User given execution alias name"
+    nullable: False
+  execution_id:
+    type: "Integer"
+    foreign_key: True
+    foreign_key_schema: "self"
+    foreign_key_table: "execution"
+    foreign_key_column: "execution_id"
+    description: "Execution this alias is linked to"
+
+dataset_alias:
+  dataset_alias_id:
+    description: "Unique identifier for dataset alias"
+    type: "Integer"
+    primary_key: True
+  supersede_date:
+    type: "DateTime"
+    description: "If a new entry has been added to the table with the same alias name (but different `dataset_id`), the old entry will be superseded. `supersede_date` in the old entry tracks when this happened. If the entry has not been superseded, `supersede_date` will be None"
+  creator_uid:
+    type: "StringShort"
+    description: "UID of person who registered the entry"
+    nullable: False
+  register_date:
+    type: "DateTime"
+    description: "Date the dataset alias was registered"
+    nullable: False
+  alias:
+    type: "String"
+    description: "User given dataset alias name"
+    nullable: False
+  dataset_id:
+    type: "Integer"
+    foreign_key: True
+    foreign_key_schema: "self"
+    foreign_key_table: "dataset"
+    foreign_key_column: "dataset_id"
+    description: "Dataset this alias is linked to"
+
+dependency:
+  dependency_id:
+    description: "Unique identifier for dependency"
+    type: "Integer"
+    primary_key: True
+  register_date:
+    type: "DateTime"
+    description: "Date the dependency was registered"
+    nullable: False
+  input_id:
+    type: "Integer"
+    foreign_key: True
+    foreign_key_schema: "self"
+    foreign_key_table: "dataset"
+    foreign_key_column: "dataset_id"
+    description: "Dataset this dependency is linked to (for every dependency, this, or `input_production_id`, must be non-null)"
+  input_production_id:
+    type: "Integer"
+    foreign_key: True
+    foreign_key_schema: "production"
+    foreign_key_table: "dataset"
+    foreign_key_column: "dataset_id"
+    description: "Production dataset this dependency is linked to (for every dependency, this, or `input_id`, must be non-null)"
+  execution_id:
+    type: "Integer"
+    foreign_key: True
+    foreign_key_schema: "self"
+    foreign_key_table: "execution"
+    foreign_key_column: "execution_id"
+    description: "Execution this dependency is linked to"
+
+dataset:
+  dataset_id:
+    type: "Integer"
+    primary_key: True
+    description: "Unique identifier for this dataset"
+  name:
+    type: "String"
+    description: "Any convenient, evocative name for the human. Note the combination of name, version and version_suffix must be unique. If None name is generated from the relative path."
+    nullable: False
+    cli_optional: True
+  relative_path:
+    type: "String"
+    description: "Relative path storing the data, relative to `<root_dir>`"
+    nullable: False
+  version_major:
+    type: "Integer"
+    description: "Major version in semantic string (i.e., X.x.x)"
+    nullable: False
+  version_minor:
+    type: "Integer"
+    description: "Minor version in semantic string (i.e., x.X.x)"
+    nullable: False
+  version_patch:
+    type: "Integer"
+    description: "Patch version in semantic string (i.e., x.x.X)"
+    nullable: False
+  version_suffix:
+    type: "String"
+    description: "Optional version suffix to place at the end of the version string. Cannot be used for production datasets."
+    cli_optional: True
+  version_string:
+    type: "String"
+    description: "Version string"
+    nullable: False
+  creation_date:
+    type: "DateTime"
+    description: "Dataset creation date"
+    cli_optional: True
+  register_date:
+    type: "DateTime"
+    description: "Date the dataset was registered"
+    nullable: False
+  creator_uid:
+    type: "StringShort"
+    description: "UID of person who registered the entry"
+    nullable: False
+  access_API:
+    type: "StringShort"
+    description: "Describes the software that can read the dataset (e.g., 'gcr-catalogs', 'skyCatalogs')"
+    cli_optional: True
+  owner:
+    type: "String"
+    description: "Owner of the dataset (defaults to $USER)"
+    nullable: False
+    cli_optional: True
+  owner_type:
+    type: "String"
+    description: "Datasets owner type, can be 'user', 'group', 'project' or 'production'."
+    nullable: False
+    choices: ["user", "group", "project", "production"]
+    cli_default: "user"
+    cli_optional: True
+  data_org:
+    type: "String"
+    description: "Dataset organisation ('file' or 'directory')"
+    nullable: False
+  nfiles:
+    type: "Integer"
+    description: "How many files are in the dataset"
+    nullable: False
+  total_disk_space:
+    type: "Float"
+    description: "Total disk spaced used by the dataset"
+    nullable: False
+  register_root_dir:
+    type: "String"
+    description: "The `root_dir` the dataset was originally ingested into"
+    nullable: False
+  description:
+    type: "String"
+    description: "User provided human-readable description of the dataset"
+    cli_optional: True
+  is_valid:
+    type: "Boolean"
+    nullable: False
+    description: "False if, e.g., copy failed"
+  execution_id:
+    type: "Integer"
+    foreign_key: True
+    foreign_key_schema: "self"
+    foreign_key_table: "execution"
+    foreign_key_column: "execution_id"
+    description: "Execution this dataset is linked to"
+    cli_optional: True
+  is_overwritten:
+    type: "Boolean"
+    nullable: False
+    description: "True if the original data for this dataset has been overwritten at some point. This would have required that `is_overwritable` was set to true on the original dataset"
+  is_overwritable:
+    type: "Boolean"
+    nullable: False
+    description: "True means this dataset can be overwritten in the future"
+    cli_optional: True
+  is_external_link:
+    type: "Boolean"
+    nullable: False
+    description: "True if an external link"
+  is_archived:
+    type: "Boolean"
+    nullable: False
+    description: "True if dataset is archived"
diff --git a/tests/end_to_end_tests/create_test_entries_cli.sh b/tests/end_to_end_tests/create_test_entries_cli.sh
index 9599b48b..b008e3ac 100644
--- a/tests/end_to_end_tests/create_test_entries_cli.sh
+++ b/tests/end_to_end_tests/create_test_entries_cli.sh
@@ -13,14 +13,20 @@ dregs register dataset my_cli_dataset2 "patch" \
 dregs register dataset my_cli_dataset3 "1.2.3" --is_dummy \
     --description "This is my dataset description" \
     --access_API "Awesome API" \
-    --locale "Secret location" \
     --owner "DESC" \
-    --owner-type "group" \
+    --owner_type "group" \
     --version_suffix "test" \
-    --root_dir "DataRegistry_data"
+    --root_dir "DataRegistry_data" \
+    --creation_date "2020-01-01" \
+    --input_datasets 1 2 \
+    --execution_name "I have given the execution a name" \
+    --is_overwritable
 
 # A production dataset
-dregs register dataset my_production_cli_dataset "0.1.2" \
-    --owner-type "production" \
-    --is_dummy \
-    --root_dir "DataRegistry_data"
+if [ "$DATAREG_BACKEND" = "postgres" ]; then
+  dregs register dataset my_production_cli_dataset "0.1.2" \
+      --owner_type "production" \
+      --is_dummy \
+      --root_dir "DataRegistry_data" \
+      --schema "production"
+fi