diff --git a/logan b/logan deleted file mode 100755 index 9a72893..0000000 --- a/logan +++ /dev/null @@ -1,314 +0,0 @@ -#!/usr/bin/env python3 -# -*- coding: UTF-8 -*- - -""" -ABOUT: This is the main entry for the LOGAN (whole genome sequencing pipeline). -REQUIRES: - - python>=3.5 - - nextflow - - singularity -DISCLAIMER: - PUBLIC DOMAIN NOTICE - CCR Collaborative Bioinformatics Resource (CCBR) - National Cancer Institute (NCI) -This software/database is a "United States Government Work" under -the terms of the United States Copyright Act. It was written as -part of the author's official duties as a United States Government -employee and thus cannot be copyrighted. This software is freely -available to the public for use. -Although all reasonable efforts have been taken to ensure the -accuracy and reliability of the software and data, CCBR do not and -cannot warrant the performance or results that may be obtained by -using this software or data. CCBR and NCI disclaim all warranties, -express or implied, including warranties of performance, -merchantability or fitness for any particular purpose. -Please cite the author and the "NIH Biowulf Cluster" in any work or -product based on this material. - - - PIPELINE TYPE - Align --PIPE_ALIGN-TRIM ALIGN - Variant Calls--PIPE_VC-Variant calling step after align - Germline Calls DV--PIPE_GERMLINE-Germline after align - QC requires Alignment, Germline--PIPE_QC--After everything - --PIPE_BAMVC-BAM variant calling only - --PIPE_TONLY_TRIM-Trim and Align - --PIPE_TONLY_TRIM-Trim and Align -""" - -# Python standard library -import argparse, os, time, sys, subprocess, re, json - - -def parse_args(): - parser = argparse.ArgumentParser(description="Input files") - parser.add_argument("--fastq", help="FQ Inputs") - parser.add_argument( - "--file_input", - help="TSV file of all fastq files used for input with 3 Columns Sample Name, Pair1, Pair2", - ) - parser.add_argument("--bam", help="Glob of all the BAM files []") - parser.add_argument("--sample_sheet", help="Sample sheet and required for Paired") - parser.add_argument("--splitregions", default=24, help="How splits per regions") - parser.add_argument("--vc", help="Add Somatic VC calling", action="store_true") - parser.add_argument("--cnv", help="Add CNV calling", action="store_true") - parser.add_argument( - "--sv", help="Add Structural Variant calling", action="store_true" - ) - parser.add_argument("--germline", help="Add Germline VC", action="store_true") - parser.add_argument( - "--qc", - help="Add QC Steps (Requires Germline Calling as well)", - action="store_true", - ) - parser.add_argument("--output", help="Output Directory") - parser.add_argument("--genome", help="hg38, mm10") - parser.add_argument("--profile", help="Biowulf or Local Run") - parser.add_argument( - "--resume", action="store_true", default="True", help="Resume previous run?" - ) - parser.add_argument("--submit", action="store_true", help="Submit to SLURM?") - parser.add_argument("--stub", action="store_true", help="Stub run") - args = parser.parse_args() - return args - - -def main(): - args = parse_args() - dirname = os.path.dirname(os.path.realpath(__file__)) - outdirname = os.path.basename(os.getcwd()) - c1 = "#!/usr/bin/bash" - c2 = "module load nextflow" - c3 = "module load singularity" - # Paired Mode-> either align/VC/SV/CNV/germline(QC as well) with FASTQ - if args.sample_sheet: - sample_path = "--sample_sheet '" + args.sample_sheet + "'" - ##Input Section - if args.fastq: - in1 = "--fastq_input '" + args.fastq + "'" - elif args.file_input: - in1 = "--file_input " + args.file_input - elif args.bam: - in1 = "--bam_input '" + args.bam + "'" - baminput = True - else: - print( - "Missing sample sheet for paired mode or you would like Tumor only mode?" - ) - alignmode = "--PIPE_ALIGN" - if args.vc and args.bam: - vcmode = "--PIPE_BAMVC" - elif args.vc: - vcmode = "--PIPE_VC" - if args.sv and args.bam: - svmode = "--PIPE_BAMSV" - elif args.sv: - svmode = "--PIPE_SV" - if args.cnv and args.bam: - cnvmode = "--PIPE_BAMCNV" - elif args.cnv: - cnvmode = "--PIPE_CNV" - if args.germline and args.bam: - germmode = "--PIPE_BAMGERMLINE" - elif args.germline: - germmode = "--PIPE_GERMLINE" - if args.qc and args.germline: - qcmode = "--PIPE_QC_GL" - elif args.qc: - qcmode = "--PIPE_QC_NOGL" - else: - ##SET DEFAULT for Tumor-Only Modes//Tumor Only Mode (No sample sheet) - alignmode = "--PIPE_TONLY_ALIGN" - qcmode = "--PIPE_TONLY_QC" - if ( - args.file_input and re.search(r".bam", open(args.file_input, "r").read()) - ) or args.bam: - baminput = True - sample_path = "" - if args.vc: - if args.fastq: - vcmode = "--PIPE_TONLY_VC" - in1 = "--fastq_input '" + args.fastq + "'" - elif args.bam: - vcmode = "--PIPE_TONLY_BAMVC" - in1 = "--bam_input '" + args.bam + "'" - elif args.file_input: - in1 = "--file_input " + args.file_input - bamin = re.search(r".bam", open(args.file_input, "r").read()) - if bamin: - vcmode = "--PIPE_TONLY_BAMVC" - else: - vcmode = "--PIPE_TONLY_VC" - if args.sv: - if args.fastq: - svmode = "--PIPE_TONLY_SV" - in1 = "--fastq_input '" + args.fastq + "'" - elif args.bam: - svmode = "--PIPE_TONLY_BAMSV" - in1 = "--bam_input '" + args.bam + "'" - elif args.file_input: - in1 = "--file_input " + args.file_input - bamin = re.search(r".bam", open(args.file_input, "r").read()) - if bamin: - svmode = "--PIPE_TONLY_BAMSV" - else: - svmode = "--PIPE_TONLY_SV" - if args.cnv: - if args.fastq: - cnvmode = "--PIPE_TONLY_CNV" - in1 = "--fastq_input '" + args.fastq + "'" - elif args.bam: - cnvmode = "--PIPE_TONLY_BAMCNV" - in1 = "--bam_input '" + args.bam + "'" - elif args.file_input: - in1 = "--file_input " + args.file_input - bamin = re.search(r".bam", open(args.file_input, "r").read()) - if bamin: - cnvmode = "--PIPE_TONLY_BAMCNV" - else: - cnvmode = "--PIPE_TONLY_CNV" - if args.qc: - if args.fastq: - in1 = "--fastq_input '" + args.fastq + "'" - elif args.file_input: - in1 = "--file_input " + args.file_input - if args.stub and args.profile is None: - profile = "-profile localstub" - splitreg = "4" - elif args.profile == "local": - profile = "-profile local" - splitreg = str(args.splitregions) - elif args.profile == "biowulf" or args.profile is None: - profile = "-profile biowulf" - splitreg = str(args.splitregions) - if args.resume: - resume = "-resume" - else: - resume = "" - ###COMBINE ALL COMMANDS (PIPE ALIGN) - commandbase = [ - "nextflow run", - dirname + "/main.nf", - "-c " + dirname + "/nextflow.config", - in1, - profile, - resume, - sample_path, - "--genome", - args.genome, - "--output '" + args.output + "'" + " --split_regions " + splitreg, - ] - ##FINAL COMMANDS - if not "baminput" in locals(): - commandalign = commandbase + [alignmode] - cmd1 = " ".join(commandalign) - else: - cmd1 = "" - if args.vc: - commandvc = commandbase + [vcmode] - cmd2 = " ".join(commandvc) - else: - cmd2 = "" - if args.sv: - commandsv = commandbase + [svmode] - cmd3 = " ".join(commandsv) - else: - cmd3 = "" - if args.cnv: - commandcnv = commandbase + [cnvmode] - cmd4 = " ".join(commandcnv) - else: - cmd4 = "" - if args.germline: - commandgl = commandbase + [germmode] - cmd5 = " ".join(commandgl) - else: - cmd5 = "" - if args.qc: - commandqc = commandbase + [qcmode] - cmd6 = " ".join(commandqc) - else: - cmd6 = "" - code = ( - c1 - + "\n" - + c2 - + "\n" - + c3 - + "\n" - + cmd1 - + "\n" - + cmd2 - + "\n" - + cmd3 - + "\n" - + cmd4 - + "\n" - + cmd5 - + "\n" - + cmd6 - ) - time1 = time.strftime("%Y_%m_%d_%H%M") - stubbase = " -stub -without-podman T -without-conda -without-docker" - if args.stub: - if not "baminput" in locals(): - cmd1_stub = cmd1 + stubbase - else: - cmd1_stub = "" - if args.vc: - cmd2_stub = cmd2 + stubbase - else: - cmd2_stub = "" - if args.sv: - cmd3_stub = cmd3 + stubbase - else: - cmd3_stub = "" - if args.cnv: - cmd4_stub = cmd4 + stubbase - else: - cmd4_stub = "" - if args.germline: - cmd5_stub = cmd5 + stubbase - else: - cmd5_stub = "" - if args.qc: - cmd6_stub = cmd6 + stubbase - else: - cmd6_stub = "" - cmd_stub = ( - cmd1_stub - + "\n" - + cmd2_stub - + "\n" - + cmd3_stub - + "\n" - + cmd4_stub - + "\n" - + cmd5_stub - + "\n" - + cmd6_stub - ) - print(cmd_stub) - os.system(cmd_stub) - else: - outswarmmut = args.output + "_" + time1 + ".slurm" - with open(outswarmmut, "a") as outfile: - outfile.write(code + "\n") - sbatch_mut = ( - "sbatch --cpus-per-task=2 --mem=8g --time 10-00:00:00 --partition norm --output submit_" - + time1 - + ".log --error error_" - + time1 - + ".log --mail-type=BEGIN,END " - + outswarmmut - ) - sbatch_out = "kickoff_" + time1 + ".sh" - with open(sbatch_out, "a") as outfile: - outfile.write(sbatch_mut + "\n") - print(sbatch_mut) - if args.submit: - os.system(sbatch_mut) - - -if __name__ == "__main__": - main()