From f0f6bec2380c08dec48b0c67a5369d57261c0b9c Mon Sep 17 00:00:00 2001
From: Stephen Binaansim <stephen.binaansimpgs@stu.cu.edu.ng>
Date: Wed, 30 Oct 2024 15:24:39 +0000
Subject: [PATCH 1/3] Adding option for star aligner

---
 .test/config/config_paired_end.yaml           |   5 +-
 workflow/Snakefile                            |   9 +-
 workflow/envs/variants.yaml                   |   1 +
 workflow/rules/common.smk                     |   2 +-
 workflow/rules/hisat2-freebayes.smk           |  35 +++--
 workflow/rules/qc.smk                         |  11 +-
 workflow/rules/star-freebayes.smk             | 126 ++++++++++++++++++
 workflow/rules/utilities.smk                  |  15 ++-
 .../scripts/starGenerateFreebayesParams.R     |  74 ++++++++++
 9 files changed, 245 insertions(+), 33 deletions(-)
 create mode 100644 workflow/rules/star-freebayes.smk
 create mode 100755 workflow/scripts/starGenerateFreebayesParams.R

diff --git a/.test/config/config_paired_end.yaml b/.test/config/config_paired_end.yaml
index 3b54b05..0124add 100644
--- a/.test/config/config_paired_end.yaml
+++ b/.test/config/config_paired_end.yaml
@@ -9,9 +9,12 @@ results-jupyterbook:
       activate: True
 
 fastq: 
-      auto: True # set auto if your files follow 
+      auto: True              # set auto if your files follow 
       paired: True
 
+#specify the seq aligner you want to use
+aligner: "STAR"  #or "HISAT2"
+
 # Paths for reference files 
 reference:
   genome:
diff --git a/workflow/Snakefile b/workflow/Snakefile
index 7092b91..cf8c75b 100644
--- a/workflow/Snakefile
+++ b/workflow/Snakefile
@@ -24,11 +24,18 @@ welcome(version="v2.0.2")
 include: "rules/qc.smk"
 include: "rules/diffexp.smk"
 include: "rules/utilities.smk"
-include: "rules/hisat2-freebayes.smk"
 include: "rules/snpEff.smk"
 include: "rules/variantAnalysis.smk"
 include: "rules/jupyter-book.smk"
 
+if config['aligner'] == 'STAR':
+    include: "rules/star-freebayes.smk"
+
+if config['aligner'] == 'HISAT2':
+    include: "rules/hisat2-freebayes.smk"
+
+
+
 if config['pipeline'] == 'parabricks':
     include: "rules/star-haplotypecaller.smk"
 
diff --git a/workflow/envs/variants.yaml b/workflow/envs/variants.yaml
index e788024..a740055 100644
--- a/workflow/envs/variants.yaml
+++ b/workflow/envs/variants.yaml
@@ -3,6 +3,7 @@ channels:
   - conda-forge
 dependencies:
   - hisat2
+  - star
   - samtools
   - freebayes=1.3.8
   - bcftools
diff --git a/workflow/rules/common.smk b/workflow/rules/common.smk
index d152188..93609ef 100644
--- a/workflow/rules/common.smk
+++ b/workflow/rules/common.smk
@@ -283,4 +283,4 @@ def welcome(version):
     print(f"Author:   Sanjay Curtis Nagi")
     print(f"Workflow Version: {version}")
     print("Execution time: ", datetime.datetime.now())
-    print(f"Dataset: {config['dataset']}", "\n")
+    print(f"Dataset: {config['dataset']}", "\n")
\ No newline at end of file
diff --git a/workflow/rules/hisat2-freebayes.smk b/workflow/rules/hisat2-freebayes.smk
index 6e5383a..5c43278 100644
--- a/workflow/rules/hisat2-freebayes.smk
+++ b/workflow/rules/hisat2-freebayes.smk
@@ -1,22 +1,21 @@
-
 rule HISAT2index:
-    """
-    Make a HISAT2 index of the reference genome
-    """
-    input:
-        fasta=config["reference"]["genome"].rstrip(".gz"),
-    output:
-        "resources/reference/ht2index/idx.1.ht2",
-        touch("resources/reference/ht2index/.complete"),
-    log:
-        "logs/HISAT2/HISAT2index.log",
-    conda:
-        "../envs/variants.yaml"
-    params:
-        prefix=lambda w, output: output[0].split(os.extsep)[0],
-    threads: 8
-    shell:
-        "hisat2-build -p {threads} {input.fasta} {params.prefix}  2> {log}"
+     """
+     Make a HISAT2 index of the reference genome
+     """
+     input:
+         fasta=config["reference"]["genome"].rstrip(".gz"),
+     output:
+         "resources/reference/ht2index/idx.1.ht2",
+         touch("resources/reference/ht2index/.complete"),
+     log:
+         "logs/HISAT2/HISAT2index.log",
+     conda:
+         "../envs/variants.yaml"
+     params:
+         prefix=lambda w, output: output[0].split(os.extsep)[0],
+     threads: 8
+     shell:
+         "hisat2-build -p {threads} {input.fasta} {params.prefix}  2> {log}"
 
 
 rule HISAT2align:
diff --git a/workflow/rules/qc.smk b/workflow/rules/qc.smk
index 149748c..001af1e 100644
--- a/workflow/rules/qc.smk
+++ b/workflow/rules/qc.smk
@@ -37,15 +37,13 @@ rule fastp:
     wrapper:
         "v2.2.1/bio/fastp"
 
-
-
 rule BamStats:
     """
     QC alignment statistics
     """
     input:
-        bam="results/alignments/{sample}.star.bam" if config['pipeline'] == 'parabricks' else "results/alignments/{sample}.hisat2.bam",
-        idx="results/alignments/{sample}.star.bam.bai" if config['pipeline'] == 'parabricks' else "results/alignments/{sample}.hisat2.bam.bai",
+        bam="results/alignments/{sample}.star.bam" if config['aligner'] == 'STAR' else "results/alignments/{sample}.hisat2.bam",
+        idx="results/alignments/{sample}.star.bam.bai" if config['aligner'] == 'STAR' else "results/alignments/{sample}.hisat2.bam.bai",
     output:
         stats="results/qc/alignments/{sample}.flagstat",
     log:
@@ -53,14 +51,13 @@ rule BamStats:
     wrapper:
         "v3.12.1/bio/samtools/flagstat"
 
-
 rule Coverage:
     """
     Calculate coverage with mosdepth
     """
     input:
-        bam="results/alignments/{sample}.star.bam" if config['pipeline'] == 'parabricks' else "results/alignments/{sample}.hisat2.bam",
-        idx="results/alignments/{sample}.star.bam.bai" if config['pipeline'] == 'parabricks' else "results/alignments/{sample}.hisat2.bam.bai",
+        bam="results/alignments/{sample}.star.bam" if config['aligner'] == 'STAR' else "results/alignments/{sample}.hisat2.bam",
+        idx="results/alignments/{sample}.star.bam.bai" if config['aligner'] == 'STAR' else "results/alignments/{sample}.hisat2.bam.bai",
     output:
         "results/qc/coverage/{sample}.mosdepth.summary.txt",
     log:
diff --git a/workflow/rules/star-freebayes.smk b/workflow/rules/star-freebayes.smk
new file mode 100644
index 0000000..c1035ff
--- /dev/null
+++ b/workflow/rules/star-freebayes.smk
@@ -0,0 +1,126 @@
+rule STARindex:
+    """
+    Make a STAR index of the reference genome.
+    """
+    input:
+        gtfFile=config["reference"]["gff"].rstrip(".gz"),
+        fasta=config["reference"]["genome"].rstrip(".gz"),
+    output:
+        directory("resources/reference/starindex/"),  # Flagging the output as a directory
+        touch("resources/reference/starindex/.complete"),  # Completion tracking
+    log:
+        "logs/STAR/STARindex.log",
+    conda:
+        "../envs/variants.yaml"
+    params:
+        prefix="resources/reference/starindex/",
+    threads: 8
+    shell:
+        """
+        STAR --runMode genomeGenerate --genomeDir {params.prefix} --genomeFastaFiles {input.fasta} --sjdbGTFfile {input.gtfFile} --sjdbGTFtagExonParentTranscript Parent --sjdbOverhang 100 --runThreadN {threads} --outFileNamePrefix {params.prefix} 2> {log}
+        """
+
+rule STARalign:
+    """
+    Align reads to the genome with STAR, generate an unsorted BAM, and then sort with samtools.
+    """
+    input:
+        reads=lambda wildcards: getFASTQs(wildcards=wildcards),
+        idx="resources/reference/starindex/"
+    output:
+        "results/alignments/{sample}.star.bam"
+    log:
+        align="logs/STAR/{sample}_align.log",
+        sort="logs/samtoolsSort/{sample}.log"
+    conda:
+        "../envs/variants.yaml"
+    params:
+        readflags=lambda wildcards: getFASTQs(wildcards=wildcards),
+        extra="--outSAMtype BAM Unsorted",
+        rg="--outSAMattrRGline ID:{wildcards.sample} SM:{wildcards.sample} PL:ILLUMINA"
+    threads: 12
+    shell:
+        """
+        # Generate unsorted BAM with STAR
+        STAR --genomeDir {input.idx} --readFilesIn {params.readflags} --readFilesCommand zcat {params.extra} {params.rg} --runThreadN {threads} --outFileNamePrefix results/alignments/{wildcards.sample}. 2> {log.align}
+
+        # Sort the BAM file with samtools, following output file convention
+        samtools sort -@{threads} -o {output} results/alignments/{wildcards.sample}.Aligned.out.bam 2>> {log.sort}
+
+        # Remove intermediate unsorted BAM to save space
+        rm results/alignments/{wildcards.sample}.Aligned.out.bam
+        """
+
+
+chunks = np.arange(1, config["VariantAnalysis"]["chunks"])
+
+rule GenerateFreebayesParams:
+    input:
+        ref_idx=config["reference"]["genome"].rstrip(".gz"),
+        index=config["reference"]["genome"].rstrip(".gz") + ".fai",
+        bams=expand("results/alignments/{sample}.star.bam", sample=samples),
+    output:
+        bamlist="results/alignments/bam.list",
+        pops="results/alignments/populations.tsv",
+        regions=expand(
+            "results/variantAnalysis/regions/genome.{contig}.region.{i}.bed",
+            contig=config["contigs"],
+            i=chunks,
+        ),
+    log:
+        "logs/GenerateFreebayesParams.log",
+    params:
+        aligner='STAR',
+        metadata=config["metadata"],
+        contigs=config["contigs"],
+        chunks=config["VariantAnalysis"]["chunks"],
+    conda:
+        "../envs/diffexp.yaml"
+    script:
+        "../scripts/starGenerateFreebayesParams.R"
+
+
+rule VariantCallingFreebayes:
+    """
+    Run freebayes on chunks of the genome, splitting the samples by population (strain)
+    """
+    input:
+        bams=expand("results/alignments/{sample}.star.bam", sample=samples),
+        index=expand("results/alignments/{sample}.star.bam.bai", sample=samples),
+        ref=config["reference"]["genome"].rstrip(".gz"),
+        samples="results/alignments/bam.list",
+        pops="results/alignments/populations.tsv",
+        regions="results/variantAnalysis/regions/genome.{contig}.region.{i}.bed",
+    output:
+        temp("results/variantAnalysis/vcfs/freebayes/{contig}/variants.{i}.vcf"),
+    log:
+        "logs/VariantCallingFreebayes/{contig}.{i}.log",
+    params:
+        ploidy=config["VariantAnalysis"]["ploidy"],
+    conda:
+        "../envs/variants.yaml"
+    threads: 8
+    shell:
+        """
+        freebayes -f {input.ref} -t {input.regions} --ploidy {params.ploidy} --populations {input.pops} --pooled-discrete --use-best-n-alleles 5 --min-alternate-fraction 0.05 -L {input.samples} > {output} 2> {log}
+        """
+
+
+rule ConcatVCFs:
+    """
+    Concatenate VCFs together
+    """
+    input:
+        calls=expand(
+            "results/variantAnalysis/vcfs/freebayes/{{contig}}/variants.{i}.vcf",
+            i=chunks,
+        ),
+    output:
+        temp("results/variantAnalysis/vcfs/freebayes/variants.{contig}.vcf"),
+    log:
+        "logs/ConcatVCFs/{contig}.log",
+    conda:
+        "../envs/variants.yaml"
+    threads: 4
+    shell:
+        "bcftools concat {input.calls} | vcfuniq > {output} 2> {log}"
diff --git a/workflow/rules/utilities.smk b/workflow/rules/utilities.smk
index 740c81c..3fa27e0 100644
--- a/workflow/rules/utilities.smk
+++ b/workflow/rules/utilities.smk
@@ -44,13 +44,18 @@ rule IndexBams:
     Index bams with samtools
     """
     input:
-        bam="results/alignments/{sample}.hisat2.bam",
+        bam="results/alignments/{sample}.star.bam" if config['aligner'] == 'STAR' else "results/alignments/{sample}.hisat2.bam",
     output:
-        idx="results/alignments/{sample}.hisat2.bam.bai",
+        idx="results/alignments/{sample}.star.bam.bai" if config['aligner'] == 'STAR' else "results/alignments/{sample}.hisat2.bam",
     log:
         "logs/IndexBams/{sample}.log",
-    wrapper:
-        "v1.15.0/bio/samtools/index"
+    conda: 
+        "../envs/variants.yaml"
+    shell:
+        """
+        samtools index {input.bam} {output.idx} 2> {log}
+        """
+
 
 
 rule RestrictToSNPs:
@@ -66,4 +71,4 @@ rule RestrictToSNPs:
     params:
         extra="-v snps",
     wrapper:
-        "v1.15.0/bio/bcftools/view"
+        "v1.15.0/bio/bcftools/view"
\ No newline at end of file
diff --git a/workflow/scripts/starGenerateFreebayesParams.R b/workflow/scripts/starGenerateFreebayesParams.R
new file mode 100755
index 0000000..6c926fc
--- /dev/null
+++ b/workflow/scripts/starGenerateFreebayesParams.R
@@ -0,0 +1,74 @@
+#!/usr/bin/env Rscript
+log <- file(snakemake@log[[1]], open="wt")
+sink(log)
+sink(log, type="message")
+
+## Generate freebayes params ##
+# 1) make bed for parallelising freebayes
+library(dplyr)
+library(data.table)
+library(glue)
+
+
+load_metadata <- function(metadata_path) {
+  # Check the file extension and load metadata accordingly
+  if (tools::file_ext(metadata_path) == "xlsx") {
+    metadata <- readxl::read_excel(metadata_path)
+  } else if (tools::file_ext(metadata_path) == "tsv") {
+    metadata <- data.table::fread(metadata_path, sep = "\t")
+  } else if (tools::file_ext(metadata_path) == "csv") {
+    metadata <- data.table::fread(metadata_path, sep = ",")
+  } else {
+    stop("Metadata file must be .xlsx, .tsv, or .csv")
+  }
+  return(metadata)
+}
+
+# read inputs
+contigs = snakemake@params[['contigs']]
+chunks = snakemake@params[['chunks']]
+fai = fread(snakemake@input[['index']])
+
+# select contigs we want, and start, end columns
+fai = fai[fai$V1 %in% contigs, c(1,2)]
+
+# for each chromsome
+for (contig in contigs){
+   #subset index to desired contig
+   f = fai[fai$V1 == contig]
+   #get sequence of n chunks from 0 to length of contig
+   bedseq = round(seq(0, f$V2, length.out = chunks))
+   
+   #for each chunk
+   for (i in 1:(chunks-1)){
+      #write bed file, one for each chunk/interval, which will be passed as input to freebayes
+      row = c(contig, bedseq[i], bedseq[i+1])
+      data.frame(row) %>% t() %>% fwrite(., glue("results/variantAnalysis/regions/genome.{contig}.region.{i}.bed"), sep="\t", col.names = FALSE)
+   }
+}
+
+
+
+# 2) Make bamlist and populations.tsv file
+metadata = load_metadata(snakemake@params[['metadata']])
+
+# # Set the BAM file paths based on the pipeline condition
+# metadata$bams = if (snakemake@params[['aligner']] == 'STAR') {
+#     paste0("results/alignments/", metadata$sampleID, ".star.bam")
+# } else {
+#     paste0("results/alignments/", metadata$sampleID, ".hisat2.bam")
+# }
+metadata$bams = paste0("results/alignments/", metadata$sampleID, ".star.bam")
+
+# Write the populations file with BAM paths and strain information
+metadata %>% 
+   select(bams, strain) %>% 
+   fwrite(., snakemake@output[['pops']], sep="\t", row.names = FALSE, col.names = FALSE)
+
+# Write the bamlist file with only BAM paths
+metadata %>% 
+   select(bams) %>% 
+   fwrite(., snakemake@output[['bamlist']], sep="\t", row.names = FALSE, col.names = FALSE)
+
+
+sessionInfo()
\ No newline at end of file

From 91847245d8726d0f95b7920a1a92bbca5ca9fa91 Mon Sep 17 00:00:00 2001
From: Stephen Binaansim <stephen.binaansimpgs@stu.cu.edu.ng>
Date: Sat, 2 Nov 2024 08:36:48 +0000
Subject: [PATCH 2/3] Updated Related Scripts

---
 workflow/Snakefile                            |  7 +++-
 workflow/rules/common.smk                     | 38 ++++++++++---------
 workflow/rules/qc.smk                         |  3 +-
 workflow/rules/star-freebayes.smk             | 17 +++++----
 workflow/rules/utilities.smk                  |  1 -
 .../scripts/starGenerateFreebayesParams.R     | 13 +++----
 6 files changed, 43 insertions(+), 36 deletions(-)

diff --git a/workflow/Snakefile b/workflow/Snakefile
index cf8c75b..a33f41e 100644
--- a/workflow/Snakefile
+++ b/workflow/Snakefile
@@ -23,13 +23,16 @@ welcome(version="v2.0.2")
 
 include: "rules/qc.smk"
 include: "rules/diffexp.smk"
+
+if config['aligner'] == 'STAR':
+    include: "rules/star-freebayes.smk"
+    
 include: "rules/utilities.smk"
 include: "rules/snpEff.smk"
 include: "rules/variantAnalysis.smk"
 include: "rules/jupyter-book.smk"
 
-if config['aligner'] == 'STAR':
-    include: "rules/star-freebayes.smk"
+
 
 if config['aligner'] == 'HISAT2':
     include: "rules/hisat2-freebayes.smk"
diff --git a/workflow/rules/common.smk b/workflow/rules/common.smk
index 93609ef..d778842 100644
--- a/workflow/rules/common.smk
+++ b/workflow/rules/common.smk
@@ -17,52 +17,56 @@ def load_metadata(metadata_path):
     return metadata
 
 
-
 def getFASTQs(wildcards, rules=None):
     """
     Get FASTQ files from unit sheet.
     If there are more than one wildcard (aka, sample), only return one fastq file
     If the rule is HISAT2align, then return the fastqs with -1 and -2 flags
+    If the rule is STARalign, return the fastqs without flags.
     """
     metadata = load_metadata(config["metadata"])
     
-    if config['fastq']['paired'] == True:
-        fastq_cols = ['fq1', 'fq2']
-    else:
-        fastq_cols = ['fq1']
+    # Determine if the FASTQs are paired or single-end
+    fastq_cols = ['fq1', 'fq2'] if config['fastq']['paired'] else ['fq1']
 
-    if config["QualityControl"]["fastp-trim"]["activate"] == True:
-        if rules in ["KallistoQuant", "HISAT2align", "HISAT2align_input"]:
+    # Check for fastp trimming activation
+    if config["QualityControl"]["fastp-trim"]["activate"]:
+        if rules in ["KallistoQuant", "HISAT2align", "HISAT2align_input", "STARalign", "STARalign_input"]:
             for i, col in enumerate(fastq_cols):
                 metadata = metadata.assign(**{col: f"resources/reads/trimmed/" + metadata["sampleID"] + f"_{i+1}.fastq.gz"})     
             metadata = metadata.set_index("sampleID")
             
             u = metadata.loc[wildcards.sample, fastq_cols].dropna()
             if rules == "HISAT2align":
-                return [f"-1 {u.fq1} -2 {u.fq2}"] if config['fastq']['paired'] == True else f"-U {u.fq1}"
+                return [f"-1 {u.fq1} -2 {u.fq2}"] if config['fastq']['paired'] else f"-U {u.fq1}"
+            elif rules in ["STARalign", "STARalign_input"]:
+                return [u.fq1, u.fq2] if config['fastq']['paired'] else [u.fq1]
             else:
-                return [u.fq1, u.fq2] if config['fastq']['paired'] == True else [u.fq1]
+                return [u.fq1, u.fq2] if config['fastq']['paired'] else [u.fq1]
 
+    # Auto-generate FASTQ paths if enabled
     if config["fastq"]["auto"]:
         for i, col in enumerate(fastq_cols):
             metadata = metadata.assign(**{col: f"resources/reads/" + metadata["sampleID"] + f"_{i+1}.fastq.gz"})     
         metadata = metadata.set_index("sampleID")
     else:
-        assert (
-            "fq1" in metadata.columns
-        ), f"The fq1 column in the metadata does not seem to exist. Please create one, or use the 'auto' option and name the fastq files as specified in the config/README.md"
+        assert "fq1" in metadata.columns, "The fq1 column in the metadata does not seem to exist. Please create one, or use the 'auto' option."
         if config['fastq']['paired']:
-            assert (
-                "fq2" in metadata.columns
-            ), f"The fq2 column in the metadata does not seem to exist. Please create one, or use the 'auto' option and name the fastq files as specified in the config/README.md"
+            assert "fq2" in metadata.columns, "The fq2 column in the metadata does not seem to exist. Please create one, or use the 'auto' option."
     
         metadata = metadata.set_index("sampleID")
 
+    # Retrieve FASTQ paths based on the current rules
     u = metadata.loc[wildcards.sample, fastq_cols].dropna()
     if rules == "HISAT2align":
-        return [f"-1 {u.fq1} -2 {u.fq2}"] if config['fastq']['paired'] == True else f"-U {u.fq1}"
+        return [f"-1 {u.fq1} -2 {u.fq2}"] if config['fastq']['paired'] else f"-U {u.fq1}"
+    elif rules in ["STARalign", "STARalign_input"]:
+        return [u.fq1, u.fq2] if config['fastq']['paired'] else [u.fq1]
     else:
-        return [u.fq1, u.fq2] if config['fastq']['paired'] == True else [u.fq1]
+        return [u.fq1, u.fq2] if config['fastq']['paired'] else [u.fq1]
+
+
+
 
 
 def getBAM(wildcards):
diff --git a/workflow/rules/qc.smk b/workflow/rules/qc.smk
index 001af1e..bd860d6 100644
--- a/workflow/rules/qc.smk
+++ b/workflow/rules/qc.smk
@@ -23,10 +23,9 @@ rule CheckInputs:
         "../scripts/checkInputs.py"
 
 
-
 rule fastp:
     input:
-        sample = getFASTQs,
+        sample=getFASTQs,
     output:
         trimmed=["resources/reads/trimmed/{sample}_1.fastq.gz", "resources/reads/trimmed/{sample}_2.fastq.gz"] if config['fastq']['paired'] else ["resources/reads/trimmed/{sample}_1.fastq.gz"],
         html="results/qc/{sample}.html",
diff --git a/workflow/rules/star-freebayes.smk b/workflow/rules/star-freebayes.smk
index c1035ff..92ba10a 100644
--- a/workflow/rules/star-freebayes.smk
+++ b/workflow/rules/star-freebayes.smk
@@ -25,19 +25,19 @@ rule STARalign:
     Align reads to the genome with STAR, generate an unsorted BAM, and then sort with samtools.
     """
     input:
-        reads=lambda wildcards: getFASTQs(wildcards=wildcards),
+        reads=lambda wildcards: getFASTQs(wildcards=wildcards, rules="STARalign"),
         idx="resources/reference/starindex/"
     output:
         "results/alignments/{sample}.star.bam"
     log:
-        align="logs/STAR/{sample}_align.log",
-        sort="logs/samtoolsSort/{sample}.log"
+        align="logs/STAR/{sample}_align.log",              # Log for alignment
+        sort="logs/samtoolsSort/{sample}.log"              # Log for sorting
     conda:
-        "../envs/variants.yaml"
+        "../envs/variants.yaml"                            # Conda environment
     params:
-        readflags=lambda wildcards: getFASTQs(wildcards=wildcards),
-        extra="--outSAMtype BAM Unsorted",
-        rg="--outSAMattrRGline ID:{wildcards.sample} SM:{wildcards.sample} PL:ILLUMINA"
+        readflags=lambda wildcards: " ".join(getFASTQs(wildcards=wildcards, rules="STARalign")),
+        extra="--outSAMtype BAM Unsorted",                # STAR output type
+        rg=lambda wildcards: f"--outSAMattrRGline ID:{wildcards.sample} SM:{wildcards.sample} PL:ILLUMINA"
     threads: 12
     shell:
         """
@@ -52,6 +52,9 @@ rule STARalign:
         """
 
 
+
+
+
 chunks = np.arange(1, config["VariantAnalysis"]["chunks"])
 
 rule GenerateFreebayesParams:
diff --git a/workflow/rules/utilities.smk b/workflow/rules/utilities.smk
index 3fa27e0..3cf3676 100644
--- a/workflow/rules/utilities.smk
+++ b/workflow/rules/utilities.smk
@@ -57,7 +57,6 @@ rule IndexBams:
         """
 
 
-
 rule RestrictToSNPs:
     """"
     Filter out indels
diff --git a/workflow/scripts/starGenerateFreebayesParams.R b/workflow/scripts/starGenerateFreebayesParams.R
index 6c926fc..6e4e3a2 100755
--- a/workflow/scripts/starGenerateFreebayesParams.R
+++ b/workflow/scripts/starGenerateFreebayesParams.R
@@ -52,13 +52,12 @@ for (contig in contigs){
 # 2) Make bamlist and populations.tsv file
 metadata = load_metadata(snakemake@params[['metadata']])
 
-# # Set the BAM file paths based on the pipeline condition
-# metadata$bams = if (snakemake@params[['aligner']] == 'STAR') {
-#     paste0("results/alignments/", metadata$sampleID, ".star.bam")
-# } else {
-#     paste0("results/alignments/", metadata$sampleID, ".hisat2.bam")
-# }
-metadata$bams = paste0("results/alignments/", metadata$sampleID, ".star.bam")
+# Set the BAM file paths based on the pipeline condition
+metadata$bams = if (snakemake@params[['aligner']] == 'STAR') {
+    paste0("results/alignments/", metadata$sampleID, ".star.bam")
+} else {
+    paste0("results/alignments/", metadata$sampleID, ".hisat2.bam")
+}
 
 # Write the populations file with BAM paths and strain information
 metadata %>% 

From 06c85e7314db60847ab6ce3bd4566c75c2682bd6 Mon Sep 17 00:00:00 2001
From: Stephen Binaansim <stephen.binaansimpgs@stu.cu.edu.ng>
Date: Mon, 4 Nov 2024 12:24:22 +0000
Subject: [PATCH 3/3] Making STAR the default Aligner & Updating Related
 Scripts

---
 .test/config/config_paired_end.yaml   |  2 --
 workflow/Snakefile                    | 13 +++----------
 workflow/rules/qc.smk                 |  8 ++++----
 workflow/rules/star-freebayes.smk     |  4 ----
 workflow/rules/utilities.smk          |  4 ++--
 workflow/rules/variantsOfInterest.smk |  4 ++--
 6 files changed, 11 insertions(+), 24 deletions(-)

diff --git a/.test/config/config_paired_end.yaml b/.test/config/config_paired_end.yaml
index 0124add..e7afe0f 100644
--- a/.test/config/config_paired_end.yaml
+++ b/.test/config/config_paired_end.yaml
@@ -12,8 +12,6 @@ fastq:
       auto: True              # set auto if your files follow 
       paired: True
 
-#specify the seq aligner you want to use
-aligner: "STAR"  #or "HISAT2"
 
 # Paths for reference files 
 reference:
diff --git a/workflow/Snakefile b/workflow/Snakefile
index a33f41e..b7d9a61 100644
--- a/workflow/Snakefile
+++ b/workflow/Snakefile
@@ -23,21 +23,14 @@ welcome(version="v2.0.2")
 
 include: "rules/qc.smk"
 include: "rules/diffexp.smk"
-
-if config['aligner'] == 'STAR':
-    include: "rules/star-freebayes.smk"
-    
+include: "rules/star-freebayes.smk"  
 include: "rules/utilities.smk"
 include: "rules/snpEff.smk"
 include: "rules/variantAnalysis.smk"
 include: "rules/jupyter-book.smk"
 
-
-
-if config['aligner'] == 'HISAT2':
-    include: "rules/hisat2-freebayes.smk"
-
-
+#if config['aligner'] == 'HISAT2':
+ #   include: "rules/hisat2-freebayes.smk"
 
 if config['pipeline'] == 'parabricks':
     include: "rules/star-haplotypecaller.smk"
diff --git a/workflow/rules/qc.smk b/workflow/rules/qc.smk
index bd860d6..0a72fb9 100644
--- a/workflow/rules/qc.smk
+++ b/workflow/rules/qc.smk
@@ -41,8 +41,8 @@ rule BamStats:
     QC alignment statistics
     """
     input:
-        bam="results/alignments/{sample}.star.bam" if config['aligner'] == 'STAR' else "results/alignments/{sample}.hisat2.bam",
-        idx="results/alignments/{sample}.star.bam.bai" if config['aligner'] == 'STAR' else "results/alignments/{sample}.hisat2.bam.bai",
+        bam="results/alignments/{sample}.star.bam", # if config['aligner'] == 'STAR' else "results/alignments/{sample}.hisat2.bam",
+        idx="results/alignments/{sample}.star.bam.bai", # if config['aligner'] == 'STAR' else "results/alignments/{sample}.hisat2.bam.bai",
     output:
         stats="results/qc/alignments/{sample}.flagstat",
     log:
@@ -55,8 +55,8 @@ rule Coverage:
     Calculate coverage with mosdepth
     """
     input:
-        bam="results/alignments/{sample}.star.bam" if config['aligner'] == 'STAR' else "results/alignments/{sample}.hisat2.bam",
-        idx="results/alignments/{sample}.star.bam.bai" if config['aligner'] == 'STAR' else "results/alignments/{sample}.hisat2.bam.bai",
+        bam="results/alignments/{sample}.star.bam",# if config['aligner'] == 'STAR' else "results/alignments/{sample}.hisat2.bam",
+        idx="results/alignments/{sample}.star.bam.bai",# if config['aligner'] == 'STAR' else "results/alignments/{sample}.hisat2.bam.bai",
     output:
         "results/qc/coverage/{sample}.mosdepth.summary.txt",
     log:
diff --git a/workflow/rules/star-freebayes.smk b/workflow/rules/star-freebayes.smk
index 92ba10a..8500e32 100644
--- a/workflow/rules/star-freebayes.smk
+++ b/workflow/rules/star-freebayes.smk
@@ -51,10 +51,6 @@ rule STARalign:
         rm results/alignments/{wildcards.sample}.Aligned.out.bam
         """
 
-
-
-
-
 chunks = np.arange(1, config["VariantAnalysis"]["chunks"])
 
 rule GenerateFreebayesParams:
diff --git a/workflow/rules/utilities.smk b/workflow/rules/utilities.smk
index 3cf3676..5a166ac 100644
--- a/workflow/rules/utilities.smk
+++ b/workflow/rules/utilities.smk
@@ -44,9 +44,9 @@ rule IndexBams:
     Index bams with samtools
     """
     input:
-        bam="results/alignments/{sample}.star.bam" if config['aligner'] == 'STAR' else "results/alignments/{sample}.hisat2.bam",
+        bam="results/alignments/{sample}.star.bam", #if config['aligner'] == 'STAR' else "results/alignments/{sample}.hisat2.bam",
     output:
-        idx="results/alignments/{sample}.star.bam.bai" if config['aligner'] == 'STAR' else "results/alignments/{sample}.hisat2.bam",
+        idx="results/alignments/{sample}.star.bam.bai", #if config['aligner'] == 'STAR' else "results/alignments/{sample}.hisat2.bam",
     log:
         "logs/IndexBams/{sample}.log",
     conda: 
diff --git a/workflow/rules/variantsOfInterest.smk b/workflow/rules/variantsOfInterest.smk
index 36c3ea6..f40b5b5 100644
--- a/workflow/rules/variantsOfInterest.smk
+++ b/workflow/rules/variantsOfInterest.smk
@@ -3,8 +3,8 @@ rule mpileupVariantsOfInterest:
     Get allele count tables of variants of choice (specified in config file ("IRmutations.tsv"))
     """
     input:
-        bam="results/alignments/{sample}.star.bam" if config['pipeline'] == 'parabricks' else "results/alignments/{sample}.hisat2.bam",
-        idx="results/alignments/{sample}.star.bam.bai" if config['pipeline'] == 'parabricks' else "results/alignments/{sample}.hisat2.bam.bai",
+        bam="results/alignments/{sample}.star.bam", #if config['pipeline'] == 'parabricks' else "results/alignments/{sample}.hisat2.bam",
+        idx="results/alignments/{sample}.star.bam.bai", #if config['pipeline'] == 'parabricks' else "results/alignments/{sample}.hisat2.bam.bai",
     output:
         "results/variantAnalysis/variantsOfInterest/counts/{sample}_{mut}_allele_counts.tsv",
     conda: