diff --git a/filters/significant_window_extraction/filter_by_merge.py b/filters/significant_window_extraction/filter_by_merge.py index f7c5e3b..86c1441 100644 --- a/filters/significant_window_extraction/filter_by_merge.py +++ b/filters/significant_window_extraction/filter_by_merge.py @@ -54,7 +54,7 @@ def filter_significant_windows(in_pq, out_pq, data_type, window, pval): Args: in_pq (path): input parquet out_pq (path): output parquet - data type (str): gwas or moltrait + data type (str): gwas or mol_trait window (int): window to extract around significant variants pval (float): pvalue to be considered significant ''' @@ -68,7 +68,7 @@ def filter_significant_windows(in_pq, out_pq, data_type, window, pval): # Select rows that have "significant" p-values if data_type == 'gwas': sig = df.filter(F.col('pval') <= pval) - elif data_type == 'moltrait': + elif data_type == 'mol_trait': sig = df.filter(F.col('pval') <= (0.05 / F.col('num_tests'))) sig = ( sig @@ -105,24 +105,7 @@ def filter_significant_windows(in_pq, out_pq, data_type, window, pval): ) # Write output - if data_type == 'gwas': - ( - merged - .write.parquet( - out_pq, - mode='overwrite' - ) - ) - elif data_type == 'moltrait': - ( - merged - .write - .partitionBy('bio_feature', 'chrom') - .parquet( - out_pq, - mode='overwrite' - ) - ) + merged.write.parquet(out_pq, mode='overwrite') return 0 @@ -205,7 +188,7 @@ def parse_args(): metavar="", type=float, required=True) p.add_argument('--data_type', help=("Whether dataset is of GWAS or molecular trait type"), - metavar="", type=str, choices=['gwas', 'moltrait'], required=True) + metavar="", type=str, choices=['gwas', 'mol_trait'], required=True) args = p.parse_args() diff --git a/run_extract_significant_window.py b/run_extract_significant_window.py index 435eeb6..a12d7b5 100644 --- a/run_extract_significant_window.py +++ b/run_extract_significant_window.py @@ -17,7 +17,7 @@ def main(): '--out_sumstats', os.path.join('/sumstat-data/output/', f"{file.strip('study=')}.parquet"), '--window', args.window, '--pval', args.pval, - '--data_type', 'gwas' + '--data_type', args.data_type ] subprocess.call(cmd) @@ -27,6 +27,8 @@ def parse_args(): help="Window size", type=str, required=True) parser.add_argument('--pval', metavar="", help="p-value", type=str, required=True) + parser.add_argument('--data_type', metavar="", + help="Data type (gwas or mol_trait)", type=str, required=True) args = parser.parse_args() return args