Skip to content

Commit

Permalink
Remove biofeature/chrom splitting for qtl data
Browse files Browse the repository at this point in the history
  • Loading branch information
ricardo-lourenco committed Mar 7, 2024
1 parent e778896 commit 514b07e
Show file tree
Hide file tree
Showing 2 changed files with 7 additions and 22 deletions.
25 changes: 4 additions & 21 deletions filters/significant_window_extraction/filter_by_merge.py
Original file line number Diff line number Diff line change
Expand Up @@ -54,7 +54,7 @@ def filter_significant_windows(in_pq, out_pq, data_type, window, pval):
Args:
in_pq (path): input parquet
out_pq (path): output parquet
data type (str): gwas or moltrait
data type (str): gwas or mol_trait
window (int): window to extract around significant variants
pval (float): pvalue to be considered significant
'''
Expand All @@ -68,7 +68,7 @@ def filter_significant_windows(in_pq, out_pq, data_type, window, pval):
# Select rows that have "significant" p-values
if data_type == 'gwas':
sig = df.filter(F.col('pval') <= pval)
elif data_type == 'moltrait':
elif data_type == 'mol_trait':
sig = df.filter(F.col('pval') <= (0.05 / F.col('num_tests')))
sig = (
sig
Expand Down Expand Up @@ -105,24 +105,7 @@ def filter_significant_windows(in_pq, out_pq, data_type, window, pval):
)

# Write output
if data_type == 'gwas':
(
merged
.write.parquet(
out_pq,
mode='overwrite'
)
)
elif data_type == 'moltrait':
(
merged
.write
.partitionBy('bio_feature', 'chrom')
.parquet(
out_pq,
mode='overwrite'
)
)
merged.write.parquet(out_pq, mode='overwrite')

return 0

Expand Down Expand Up @@ -205,7 +188,7 @@ def parse_args():
metavar="<float>", type=float, required=True)
p.add_argument('--data_type',
help=("Whether dataset is of GWAS or molecular trait type"),
metavar="<str>", type=str, choices=['gwas', 'moltrait'], required=True)
metavar="<str>", type=str, choices=['gwas', 'mol_trait'], required=True)

args = p.parse_args()

Expand Down
4 changes: 3 additions & 1 deletion run_extract_significant_window.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@ def main():
'--out_sumstats', os.path.join('/sumstat-data/output/', f"{file.strip('study=')}.parquet"),
'--window', args.window,
'--pval', args.pval,
'--data_type', 'gwas'
'--data_type', args.data_type
]
subprocess.call(cmd)

Expand All @@ -27,6 +27,8 @@ def parse_args():
help="Window size", type=str, required=True)
parser.add_argument('--pval', metavar="<float>",
help="p-value", type=str, required=True)
parser.add_argument('--data_type', metavar="<str>",
help="Data type (gwas or mol_trait)", type=str, required=True)
args = parser.parse_args()
return args

Expand Down

0 comments on commit 514b07e

Please sign in to comment.