diff --git a/src/gentropy/dataset/l2g_features/other.py b/src/gentropy/dataset/l2g_features/other.py index f68b05cb4..e71b58544 100644 --- a/src/gentropy/dataset/l2g_features/other.py +++ b/src/gentropy/dataset/l2g_features/other.py @@ -100,6 +100,9 @@ def is_protein_coding_feature_logic( Returns: DataFrame: Feature dataset, with 1 if the gene is protein-coding, 0 if not. + + Raises: + AssertionError: when provided `genomic_window` is more or equal to 500kb. """ assert genomic_window <= 500_000, "Genomic window must be less than 500kb." genes_in_window = ( diff --git a/src/gentropy/dataset/pairwise_ld.py b/src/gentropy/dataset/pairwise_ld.py index ab68a74ab..6db570ba9 100644 --- a/src/gentropy/dataset/pairwise_ld.py +++ b/src/gentropy/dataset/pairwise_ld.py @@ -30,6 +30,9 @@ def __post_init__(self: PairwiseLD) -> None: """Validating the dataset upon creation. - Besides the schema, a pairwise LD table is expected have rows being a square number. + + Raises: + AssertionError: When the number of rows in the provided dataframe to construct the LD matrix is not even after applying square root. """ row_count = self.df.count() diff --git a/src/gentropy/dataset/variant_index.py b/src/gentropy/dataset/variant_index.py index 0b0bb59d2..a44356ca6 100644 --- a/src/gentropy/dataset/variant_index.py +++ b/src/gentropy/dataset/variant_index.py @@ -227,10 +227,13 @@ def filter_by_variant(self: VariantIndex, df: DataFrame) -> VariantIndex: """Filter variant annotation dataset by a variant dataframe. Args: - df (DataFrame): A dataframe of variants + df (DataFrame): A dataframe of variants. Returns: - VariantIndex: A filtered variant annotation dataset + VariantIndex: A filtered variant annotation dataset. + + Raises: + AssertionError: When the variant dataframe does not contain eiter `variantId` or `chromosome` column. """ join_columns = ["variantId", "chromosome"] diff --git a/src/gentropy/datasource/ensembl/vep_parser.py b/src/gentropy/datasource/ensembl/vep_parser.py index 98b015cda..03b884305 100644 --- a/src/gentropy/datasource/ensembl/vep_parser.py +++ b/src/gentropy/datasource/ensembl/vep_parser.py @@ -325,6 +325,9 @@ def _get_most_severe_transcript( |{0.6, transcript3} | +----------------------+ + + Raises: + AssertionError: When `transcript_column_name` is not a string. """ assert isinstance( transcript_column_name, str diff --git a/src/gentropy/method/l2g/trainer.py b/src/gentropy/method/l2g/trainer.py index a123cfda9..62eb78d92 100644 --- a/src/gentropy/method/l2g/trainer.py +++ b/src/gentropy/method/l2g/trainer.py @@ -88,6 +88,7 @@ def fit( Raises: ValueError: Train data not set, nothing to fit. + AssertionError: When x_train_size or y_train_size are not zero. """ if ( self.x_train is not None @@ -180,6 +181,7 @@ def log_to_wandb( Raises: RuntimeError: If dependencies are not available. + AssertionError: When x_train_size or y_train_size are not zero. """ if ( self.x_train is None diff --git a/src/gentropy/method/susie_inf.py b/src/gentropy/method/susie_inf.py index e8a4a57b1..c53d6a939 100644 --- a/src/gentropy/method/susie_inf.py +++ b/src/gentropy/method/susie_inf.py @@ -493,6 +493,9 @@ def credible_set_qc( Returns: StudyLocus: Credible sets which pass filters and LD clumping. + + Raises: + AssertionError: When running in clump mode, but no study study_index or ld_index or ld_min_r2 were provided. """ cred_sets.df = ( cred_sets.df.withColumn( diff --git a/src/gentropy/variant_index.py b/src/gentropy/variant_index.py index 773075c70..2064d7b67 100644 --- a/src/gentropy/variant_index.py +++ b/src/gentropy/variant_index.py @@ -106,6 +106,9 @@ def __init__( source_formats (list[str]): Format of the input dataset. output_path (str): Output VCF file path. partition_size (int): Approximate number of variants in each output partition. + + Raises: + AssertionError: When the length of `source_paths` does not match the lenght of `source_formats`. """ assert len(source_formats) == len( source_paths