From 5b3847b36ac5f97682cdc313cbd1fef1863f1836 Mon Sep 17 00:00:00 2001 From: Quang Date: Wed, 4 Dec 2024 17:47:58 +0700 Subject: [PATCH] Refactor Alembic migration scripts and update alembic_check.sh --- ...5a62392_fix_hash_data_in_document_table.py | 36 ----------- ...9_update_non_null_of_document_in_works_.py | 40 ------------ .../4d66a5dc782b_create_funder_column.py | 30 --------- ...5128cc2fe488_remove_fk_in_document_able.py | 32 ---------- ...s.py => 52101c205c9d_initial_migration.py} | 61 ++++++++++++++---- .../versions/73817166f499_add_miss_columns.py | 30 --------- ...reate_rtransparentpublication_workd_id_.py | 46 -------------- ...86c58f09_link_work_id_at_document_table.py | 32 ---------- .../b386f06edbd8_initial_migration.py | 62 ------------------- alembic_check.sh | 8 +-- dsst_etl/models.py | 12 +++- 11 files changed, 60 insertions(+), 329 deletions(-) delete mode 100644 alembic/versions/360c65a62392_fix_hash_data_in_document_table.py delete mode 100644 alembic/versions/4a908d10b459_update_non_null_of_document_in_works_.py delete mode 100644 alembic/versions/4d66a5dc782b_create_funder_column.py delete mode 100644 alembic/versions/5128cc2fe488_remove_fk_in_document_able.py rename alembic/versions/{74f7f1590fb6_create_rtransparentpublication_tables.py => 52101c205c9d_initial_migration.py} (81%) delete mode 100644 alembic/versions/73817166f499_add_miss_columns.py delete mode 100644 alembic/versions/845c59592898_create_rtransparentpublication_workd_id_.py delete mode 100644 alembic/versions/8d6986c58f09_link_work_id_at_document_table.py delete mode 100644 alembic/versions/b386f06edbd8_initial_migration.py diff --git a/alembic/versions/360c65a62392_fix_hash_data_in_document_table.py b/alembic/versions/360c65a62392_fix_hash_data_in_document_table.py deleted file mode 100644 index 3bf1153..0000000 --- a/alembic/versions/360c65a62392_fix_hash_data_in_document_table.py +++ /dev/null @@ -1,36 +0,0 @@ -"""fix hash data in document table - -Revision ID: 360c65a62392 -Revises: 8d6986c58f09 -Create Date: 2024-11-15 17:16:48.117093 - -""" -from typing import Sequence, Union - -from alembic import op -import sqlalchemy as sa -from sqlalchemy.dialects import postgresql - -# revision identifiers, used by Alembic. -revision: str = '360c65a62392' -down_revision: Union[str, None] = '8d6986c58f09' -branch_labels: Union[str, Sequence[str], None] = None -depends_on: Union[str, Sequence[str], None] = None - - -def upgrade() -> None: - # ### commands auto generated by Alembic - please adjust! ### - op.add_column('documents', sa.Column('hash_data', sa.String(), nullable=True)) - op.drop_constraint('documents_hash_key', 'documents', type_='unique') - op.create_unique_constraint(None, 'documents', ['hash_data']) - op.drop_column('documents', 'hash') - # ### end Alembic commands ### - - -def downgrade() -> None: - # ### commands auto generated by Alembic - please adjust! ### - op.add_column('documents', sa.Column('hash', postgresql.BYTEA(), autoincrement=False, nullable=False)) - op.drop_constraint(None, 'documents', type_='unique') - op.create_unique_constraint('documents_hash_key', 'documents', ['hash']) - op.drop_column('documents', 'hash_data') - # ### end Alembic commands ### diff --git a/alembic/versions/4a908d10b459_update_non_null_of_document_in_works_.py b/alembic/versions/4a908d10b459_update_non_null_of_document_in_works_.py deleted file mode 100644 index 6f2d6a8..0000000 --- a/alembic/versions/4a908d10b459_update_non_null_of_document_in_works_.py +++ /dev/null @@ -1,40 +0,0 @@ -"""update non-null of document in works table - -Revision ID: 4a908d10b459 -Revises: 360c65a62392 -Create Date: 2024-11-21 16:40:56.966690 - -""" -from typing import Sequence, Union - -from alembic import op -import sqlalchemy as sa - - -# revision identifiers, used by Alembic. -revision: str = '4a908d10b459' -down_revision: Union[str, None] = '360c65a62392' -branch_labels: Union[str, Sequence[str], None] = None -depends_on: Union[str, Sequence[str], None] = None - - -def upgrade() -> None: - # ### commands auto generated by Alembic - please adjust! ### - op.alter_column('works', 'initial_document_id', - existing_type=sa.INTEGER(), - nullable=True) - op.alter_column('works', 'primary_document_id', - existing_type=sa.INTEGER(), - nullable=True) - # ### end Alembic commands ### - - -def downgrade() -> None: - # ### commands auto generated by Alembic - please adjust! ### - op.alter_column('works', 'primary_document_id', - existing_type=sa.INTEGER(), - nullable=False) - op.alter_column('works', 'initial_document_id', - existing_type=sa.INTEGER(), - nullable=False) - # ### end Alembic commands ### diff --git a/alembic/versions/4d66a5dc782b_create_funder_column.py b/alembic/versions/4d66a5dc782b_create_funder_column.py deleted file mode 100644 index e67e3c5..0000000 --- a/alembic/versions/4d66a5dc782b_create_funder_column.py +++ /dev/null @@ -1,30 +0,0 @@ -"""Create funder column - -Revision ID: 4d66a5dc782b -Revises: 845c59592898 -Create Date: 2024-12-01 20:47:57.932627 - -""" -from typing import Sequence, Union - -from alembic import op -import sqlalchemy as sa - - -# revision identifiers, used by Alembic. -revision: str = '4d66a5dc782b' -down_revision: Union[str, None] = '845c59592898' -branch_labels: Union[str, Sequence[str], None] = None -depends_on: Union[str, Sequence[str], None] = None - - -def upgrade() -> None: - # ### commands auto generated by Alembic - please adjust! ### - op.add_column('rtransparent_publication', sa.Column('funder', sa.String(), nullable=True)) - # ### end Alembic commands ### - - -def downgrade() -> None: - # ### commands auto generated by Alembic - please adjust! ### - op.drop_column('rtransparent_publication', 'funder') - # ### end Alembic commands ### diff --git a/alembic/versions/5128cc2fe488_remove_fk_in_document_able.py b/alembic/versions/5128cc2fe488_remove_fk_in_document_able.py deleted file mode 100644 index e57cc8a..0000000 --- a/alembic/versions/5128cc2fe488_remove_fk_in_document_able.py +++ /dev/null @@ -1,32 +0,0 @@ -"""remove fk in document able - -Revision ID: 5128cc2fe488 -Revises: 73817166f499 -Create Date: 2024-12-04 17:14:10.177083 - -""" -from typing import Sequence, Union - -from alembic import op -import sqlalchemy as sa - - -# revision identifiers, used by Alembic. -revision: str = '5128cc2fe488' -down_revision: Union[str, None] = '73817166f499' -branch_labels: Union[str, Sequence[str], None] = None -depends_on: Union[str, Sequence[str], None] = None - - -def upgrade() -> None: - # ### commands auto generated by Alembic - please adjust! ### - op.drop_constraint('documents_work_id_fkey', 'documents', type_='foreignkey') - op.drop_column('documents', 'work_id') - # ### end Alembic commands ### - - -def downgrade() -> None: - # ### commands auto generated by Alembic - please adjust! ### - op.add_column('documents', sa.Column('work_id', sa.INTEGER(), autoincrement=False, nullable=True)) - op.create_foreign_key('documents_work_id_fkey', 'documents', 'works', ['work_id'], ['id']) - # ### end Alembic commands ### diff --git a/alembic/versions/74f7f1590fb6_create_rtransparentpublication_tables.py b/alembic/versions/52101c205c9d_initial_migration.py similarity index 81% rename from alembic/versions/74f7f1590fb6_create_rtransparentpublication_tables.py rename to alembic/versions/52101c205c9d_initial_migration.py index 0b71209..11e11ee 100644 --- a/alembic/versions/74f7f1590fb6_create_rtransparentpublication_tables.py +++ b/alembic/versions/52101c205c9d_initial_migration.py @@ -1,8 +1,8 @@ -"""Create RTransparentPublication tables +"""initial migration -Revision ID: 74f7f1590fb6 -Revises: 4a908d10b459 -Create Date: 2024-11-29 16:32:06.283873 +Revision ID: 52101c205c9d +Revises: +Create Date: 2024-12-04 17:44:55.160050 """ from typing import Sequence, Union @@ -12,16 +12,49 @@ # revision identifiers, used by Alembic. -revision: str = '74f7f1590fb6' -down_revision: Union[str, None] = '4a908d10b459' +revision: str = '52101c205c9d' +down_revision: Union[str, None] = None branch_labels: Union[str, Sequence[str], None] = None depends_on: Union[str, Sequence[str], None] = None def upgrade() -> None: # ### commands auto generated by Alembic - please adjust! ### + op.create_table('provenance', + sa.Column('id', sa.Integer(), nullable=False), + sa.Column('pipeline_name', sa.String(length=255), nullable=True), + sa.Column('version', sa.String(length=50), nullable=True), + sa.Column('compute', sa.Text(), nullable=True), + sa.Column('personnel', sa.Text(), nullable=True), + sa.Column('comment', sa.Text(), nullable=True), + sa.PrimaryKeyConstraint('id', name=op.f('pk_provenance')) + ) + op.create_table('documents', + sa.Column('id', sa.Integer(), nullable=False), + sa.Column('hash_data', sa.String(), nullable=False), + sa.Column('created_at', sa.DateTime(), nullable=True), + sa.Column('s3uri', sa.Text(), nullable=False), + sa.Column('provenance_id', sa.Integer(), nullable=True), + sa.ForeignKeyConstraint(['provenance_id'], ['provenance.id'], name=op.f('fk_documents_provenance_id_provenance')), + sa.PrimaryKeyConstraint('id', name=op.f('pk_documents')), + sa.UniqueConstraint('hash_data', name=op.f('uq_documents_hash_data')) + ) + op.create_table('works', + sa.Column('id', sa.Integer(), nullable=False), + sa.Column('created_at', sa.DateTime(), nullable=True), + sa.Column('modified_at', sa.DateTime(), nullable=True), + sa.Column('initial_document_id', sa.Integer(), nullable=True), + sa.Column('primary_document_id', sa.Integer(), nullable=True), + sa.Column('provenance_id', sa.Integer(), nullable=True), + sa.ForeignKeyConstraint(['initial_document_id'], ['documents.id'], name=op.f('fk_works_initial_document_id_documents')), + sa.ForeignKeyConstraint(['primary_document_id'], ['documents.id'], name=op.f('fk_works_primary_document_id_documents')), + sa.ForeignKeyConstraint(['provenance_id'], ['provenance.id'], name=op.f('fk_works_provenance_id_provenance')), + sa.PrimaryKeyConstraint('id', name=op.f('pk_works')) + ) op.create_table('rtransparent_publication', sa.Column('id', sa.Integer(), nullable=False), + sa.Column('title', sa.String(), nullable=True), + sa.Column('author', sa.String(), nullable=True), sa.Column('is_open_code', sa.Boolean(), nullable=True), sa.Column('is_open_data', sa.Boolean(), nullable=True), sa.Column('year', sa.Integer(), nullable=True), @@ -161,7 +194,6 @@ def upgrade() -> None: sa.Column('affiliation_aff_id', sa.String(), nullable=True), sa.Column('affiliation_all', sa.String(), nullable=True), sa.Column('article', sa.String(), nullable=True), - sa.Column('author', sa.String(), nullable=True), sa.Column('author_aff_id', sa.String(), nullable=True), sa.Column('correspondence', sa.String(), nullable=True), sa.Column('date_epub', sa.String(), nullable=True), @@ -191,14 +223,14 @@ def upgrade() -> None: sa.Column('pmcid_uid', sa.String(), nullable=True), sa.Column('publisher_id', sa.String(), nullable=True), sa.Column('subject', sa.String(), nullable=True), - sa.Column('title', sa.String(), nullable=True), sa.Column('is_data_pred', sa.Boolean(), nullable=True), sa.Column('is_code_pred', sa.Boolean(), nullable=True), - sa.Column('work_id', sa.Integer(), nullable=False), - sa.Column('provenance_id', sa.Integer(), nullable=False), - sa.ForeignKeyConstraint(['provenance_id'], ['provenance.id'], ), - sa.ForeignKeyConstraint(['work_id'], ['works.id'], ), - sa.PrimaryKeyConstraint('id') + sa.Column('funder', sa.String(), nullable=True), + sa.Column('work_id', sa.Integer(), nullable=True), + sa.Column('provenance_id', sa.Integer(), nullable=True), + sa.ForeignKeyConstraint(['provenance_id'], ['provenance.id'], name=op.f('fk_rtransparent_publication_provenance_id_provenance')), + sa.ForeignKeyConstraint(['work_id'], ['works.id'], name=op.f('fk_rtransparent_publication_work_id_works')), + sa.PrimaryKeyConstraint('id', name=op.f('pk_rtransparent_publication')) ) # ### end Alembic commands ### @@ -206,4 +238,7 @@ def upgrade() -> None: def downgrade() -> None: # ### commands auto generated by Alembic - please adjust! ### op.drop_table('rtransparent_publication') + op.drop_table('works') + op.drop_table('documents') + op.drop_table('provenance') # ### end Alembic commands ### diff --git a/alembic/versions/73817166f499_add_miss_columns.py b/alembic/versions/73817166f499_add_miss_columns.py deleted file mode 100644 index 6f128f2..0000000 --- a/alembic/versions/73817166f499_add_miss_columns.py +++ /dev/null @@ -1,30 +0,0 @@ -"""add miss columns - -Revision ID: 73817166f499 -Revises: 4d66a5dc782b -Create Date: 2024-12-03 16:10:21.404009 - -""" -from typing import Sequence, Union - -from alembic import op -import sqlalchemy as sa - - -# revision identifiers, used by Alembic. -revision: str = '73817166f499' -down_revision: Union[str, None] = '4d66a5dc782b' -branch_labels: Union[str, Sequence[str], None] = None -depends_on: Union[str, Sequence[str], None] = None - - -def upgrade() -> None: - # ### commands auto generated by Alembic - please adjust! ### - pass - # ### end Alembic commands ### - - -def downgrade() -> None: - # ### commands auto generated by Alembic - please adjust! ### - pass - # ### end Alembic commands ### diff --git a/alembic/versions/845c59592898_create_rtransparentpublication_workd_id_.py b/alembic/versions/845c59592898_create_rtransparentpublication_workd_id_.py deleted file mode 100644 index 65b9e57..0000000 --- a/alembic/versions/845c59592898_create_rtransparentpublication_workd_id_.py +++ /dev/null @@ -1,46 +0,0 @@ -"""Create RTransparentPublication workd_id and doc id is nullable - -Revision ID: 845c59592898 -Revises: 74f7f1590fb6 -Create Date: 2024-11-29 17:13:01.858245 - -""" -from typing import Sequence, Union - -from alembic import op -import sqlalchemy as sa - - -# revision identifiers, used by Alembic. -revision: str = '845c59592898' -down_revision: Union[str, None] = '74f7f1590fb6' -branch_labels: Union[str, Sequence[str], None] = None -depends_on: Union[str, Sequence[str], None] = None - - -def upgrade() -> None: - # ### commands auto generated by Alembic - please adjust! ### - op.alter_column('documents', 'hash_data', - existing_type=sa.VARCHAR(), - nullable=False) - op.alter_column('rtransparent_publication', 'work_id', - existing_type=sa.INTEGER(), - nullable=True) - op.alter_column('rtransparent_publication', 'provenance_id', - existing_type=sa.INTEGER(), - nullable=True) - # ### end Alembic commands ### - - -def downgrade() -> None: - # ### commands auto generated by Alembic - please adjust! ### - op.alter_column('rtransparent_publication', 'provenance_id', - existing_type=sa.INTEGER(), - nullable=False) - op.alter_column('rtransparent_publication', 'work_id', - existing_type=sa.INTEGER(), - nullable=False) - op.alter_column('documents', 'hash_data', - existing_type=sa.VARCHAR(), - nullable=True) - # ### end Alembic commands ### diff --git a/alembic/versions/8d6986c58f09_link_work_id_at_document_table.py b/alembic/versions/8d6986c58f09_link_work_id_at_document_table.py deleted file mode 100644 index eee88f9..0000000 --- a/alembic/versions/8d6986c58f09_link_work_id_at_document_table.py +++ /dev/null @@ -1,32 +0,0 @@ -"""Link work_id at document table - -Revision ID: 8d6986c58f09 -Revises: b386f06edbd8 -Create Date: 2024-11-13 18:00:01.069089 - -""" -from typing import Sequence, Union - -from alembic import op -import sqlalchemy as sa - - -# revision identifiers, used by Alembic. -revision: str = '8d6986c58f09' -down_revision: Union[str, None] = 'b386f06edbd8' -branch_labels: Union[str, Sequence[str], None] = None -depends_on: Union[str, Sequence[str], None] = None - - -def upgrade() -> None: - # ### commands auto generated by Alembic - please adjust! ### - op.add_column('documents', sa.Column('work_id', sa.Integer(), nullable=True)) - op.create_foreign_key(None, 'documents', 'works', ['work_id'], ['id']) - # ### end Alembic commands ### - - -def downgrade() -> None: - # ### commands auto generated by Alembic - please adjust! ### - op.drop_constraint(None, 'documents', type_='foreignkey') - op.drop_column('documents', 'work_id') - # ### end Alembic commands ### diff --git a/alembic/versions/b386f06edbd8_initial_migration.py b/alembic/versions/b386f06edbd8_initial_migration.py deleted file mode 100644 index ff985ee..0000000 --- a/alembic/versions/b386f06edbd8_initial_migration.py +++ /dev/null @@ -1,62 +0,0 @@ -"""Initial migration - -Revision ID: b386f06edbd8 -Revises: -Create Date: 2024-11-13 17:57:38.920195 - -""" -from typing import Sequence, Union - -from alembic import op -import sqlalchemy as sa - - -# revision identifiers, used by Alembic. -revision: str = 'b386f06edbd8' -down_revision: Union[str, None] = None -branch_labels: Union[str, Sequence[str], None] = None -depends_on: Union[str, Sequence[str], None] = None - - -def upgrade() -> None: - # ### commands auto generated by Alembic - please adjust! ### - op.create_table('provenance', - sa.Column('id', sa.Integer(), nullable=False), - sa.Column('pipeline_name', sa.String(length=255), nullable=True), - sa.Column('version', sa.String(length=50), nullable=True), - sa.Column('compute', sa.Text(), nullable=True), - sa.Column('personnel', sa.Text(), nullable=True), - sa.Column('comment', sa.Text(), nullable=True), - sa.PrimaryKeyConstraint('id') - ) - op.create_table('documents', - sa.Column('id', sa.Integer(), nullable=False), - sa.Column('hash', sa.LargeBinary(), nullable=False), - sa.Column('created_at', sa.DateTime(), nullable=True), - sa.Column('s3uri', sa.Text(), nullable=False), - sa.Column('provenance_id', sa.Integer(), nullable=True), - sa.ForeignKeyConstraint(['provenance_id'], ['provenance.id'], ), - sa.PrimaryKeyConstraint('id'), - sa.UniqueConstraint('hash') - ) - op.create_table('works', - sa.Column('id', sa.Integer(), nullable=False), - sa.Column('created_at', sa.DateTime(), nullable=True), - sa.Column('modified_at', sa.DateTime(), nullable=True), - sa.Column('initial_document_id', sa.Integer(), nullable=False), - sa.Column('primary_document_id', sa.Integer(), nullable=False), - sa.Column('provenance_id', sa.Integer(), nullable=True), - sa.ForeignKeyConstraint(['initial_document_id'], ['documents.id'], ), - sa.ForeignKeyConstraint(['primary_document_id'], ['documents.id'], ), - sa.ForeignKeyConstraint(['provenance_id'], ['provenance.id'], ), - sa.PrimaryKeyConstraint('id') - ) - # ### end Alembic commands ### - - -def downgrade() -> None: - # ### commands auto generated by Alembic - please adjust! ### - op.drop_table('works') - op.drop_table('documents') - op.drop_table('provenance') - # ### end Alembic commands ### diff --git a/alembic_check.sh b/alembic_check.sh index 607e398..3d20010 100755 --- a/alembic_check.sh +++ b/alembic_check.sh @@ -13,13 +13,7 @@ else fi # Show relevant database environment variables -echo "=== Database Environment Variables ===" -echo "POSTGRES_USER: ${POSTGRES_USER:-not set}" -echo "POSTGRES_DB: ${POSTGRES_DB:-not set}" -echo "POSTGRES_HOST: ${POSTGRES_HOST:-not set}" -echo "POSTGRES_PORT: ${POSTGRES_PORT:-not set}" -echo "POSTGRES_PASSWORD: ${POSTGRES_PASSWORD:-(hidden)}" -echo "==================================" + # Run alembic check echo "Running alembic check..." diff --git a/dsst_etl/models.py b/dsst_etl/models.py index d569bf4..bd4e9a7 100644 --- a/dsst_etl/models.py +++ b/dsst_etl/models.py @@ -5,13 +5,23 @@ Float, ForeignKey, Integer, + MetaData, String, Text, ) from sqlalchemy.orm import declarative_base from sqlalchemy.sql import func -Base = declarative_base() +naming_convention = { + "ix": "ix_%(column_0_label)s", + "uq": "uq_%(table_name)s_%(column_0_name)s", + "ck": "ck_%(table_name)s_%(constraint_name)s", + "fk": "fk_%(table_name)s_%(column_0_name)s_%(referred_table_name)s", + "pk": "pk_%(table_name)s", +} +# Apply the naming convention to the metadata +metadata = MetaData(naming_convention=naming_convention) +Base = declarative_base(metadata=metadata) class Works(Base):