From f0dc54dc6d6fd66a49496b6896d1158f3acfdbbb Mon Sep 17 00:00:00 2001 From: Kiyoon Kim Date: Wed, 17 Jul 2024 11:27:54 +0900 Subject: [PATCH] refactor: remove tools --- tools/create_uniprot_accession_to_pk_id.py | 82 ---------------------- tools/create_uniprot_empty_table.py | 55 --------------- tools/uniprot_xml_to_postgresql.py | 8 --- 3 files changed, 145 deletions(-) delete mode 100644 tools/create_uniprot_accession_to_pk_id.py delete mode 100644 tools/create_uniprot_empty_table.py delete mode 100644 tools/uniprot_xml_to_postgresql.py diff --git a/tools/create_uniprot_accession_to_pk_id.py b/tools/create_uniprot_accession_to_pk_id.py deleted file mode 100644 index f5c2202..0000000 --- a/tools/create_uniprot_accession_to_pk_id.py +++ /dev/null @@ -1,82 +0,0 @@ -import logging - -import psycopg - -from bio_data_to_db.utils.postgresql import ( - create_db_if_not_exists, - create_schema_if_not_exists, - make_columns_unique, - polars_write_database, - set_column_as_primary_key, -) - -logger = logging.getLogger(__name__) - - -def main(): - uri_wo_db = "postgresql://kiyoon@localhost:5432" - db_name = "uniprot" - uri = f"{uri_wo_db}/{db_name}" - - create_db_if_not_exists(uri_wo_db, db_name) - create_schema_if_not_exists(uri, "public") - - with psycopg.connect( - conninfo=uri, - ) as conn: - try: - cursor = conn.cursor() - conn.autocommit = True - cursor.execute( - query=""" - CREATE TABLE public.accession_to_pk_id ( - accession TEXT, - uniprot_pk_id BIGINT - ) - """ - ) - logger.info( - "Table structure 'uniprot.public.accession_to_pk_id' created successfully" - ) - - cursor.execute( - query=""" - INSERT INTO public.accession_to_pk_id (accession, uniprot_pk_id) - SELECT UNNEST(accessions), uniprot_pk_id - FROM public.uniprot_info - """ - ) - logger.info( - "Table 'uniprot.public.accession_to_pk_id' insert content successfully" - ) - - cursor.execute( - query=""" - CREATE TABLE public.accession_to_pk_id_list ( - accession TEXT PRIMARY KEY, - uniprot_pk_ids BIGINT[] - ) - """ - ) - logger.info( - "Table structure 'uniprot.public.accession_to_pk_id_list' created successfully" - ) - - cursor.execute( - query=""" - INSERT INTO public.accession_to_pk_id_list (accession, uniprot_pk_ids) - SELECT accession, ARRAY_AGG(uniprot_pk_id) AS uniprot_pk_ids - FROM public.accession_to_pk_id - GROUP BY accession; - """ - ) - logger.info( - "Table 'uniprot.public.accession_to_pk_id_list' content added successfully" - ) - - except psycopg.Error: - logger.exception("Error creating table 'uniprot.public.accession_to_pk_id'") - - -if __name__ == "__main__": - main() diff --git a/tools/create_uniprot_empty_table.py b/tools/create_uniprot_empty_table.py deleted file mode 100644 index 8e84a69..0000000 --- a/tools/create_uniprot_empty_table.py +++ /dev/null @@ -1,55 +0,0 @@ -import logging - -import psycopg - -from bio_data_to_db.utils.postgresql import ( - create_db_if_not_exists, - create_schema_if_not_exists, - make_columns_unique, - polars_write_database, - set_column_as_primary_key, -) - -logger = logging.getLogger(__name__) - - -def main(): - create_db_if_not_exists("postgresql://kiyoon@localhost:5432", "uniprot") - create_schema_if_not_exists("postgresql://kiyoon@localhost:5432/uniprot", "public") - - with psycopg.connect( - conninfo="postgresql://kiyoon@localhost:5432/uniprot", - ) as conn: - try: - cursor = conn.cursor() - conn.autocommit = True - cursor.execute( - query=""" - CREATE TABLE public.uniprot_info ( - uniprot_pk_id BIGINT GENERATED BY DEFAULT AS IDENTITY PRIMARY KEY, - accessions TEXT[], - names TEXT[], - protein_names TEXT[], - gene_names TEXT[], - organism_scientific TEXT, - organism_commons TEXT[], - organism_synonyms TEXT[], - ncbi_taxonomy_id INT, - deargen_ncbi_taxonomy_id INT, - lineage TEXT[], - keywords TEXT[], - geneontology_ids TEXT[], - geneontology_names TEXT[], - sequence TEXT, - deargen_molecular_functions TEXT[] - ) - """ - ) - logger.info("Database 'uniprot' created successfully") - - except psycopg.Error: - logger.exception("Error creating database 'uniprot'") - - -if __name__ == "__main__": - main() diff --git a/tools/uniprot_xml_to_postgresql.py b/tools/uniprot_xml_to_postgresql.py deleted file mode 100644 index e57f353..0000000 --- a/tools/uniprot_xml_to_postgresql.py +++ /dev/null @@ -1,8 +0,0 @@ -from __future__ import annotations - -from bio_data_to_db import uniprot_xml_to_postgresql - -uniprot_xml_to_postgresql( - uniprot_xml_path="~/Downloads/uniprot_sprot.xml", - uri="postgresql://kiyoon@localhost:5432/uniprot", -)