rnabioco · jayhesselberth · Jan 14, 2025
diff --git a/.test/make_test_data.sh b/.test/make_test_data.sh
@@ -45,22 +45,6 @@ od=sample2/pod5_fail
 mkdir -p $od
 pod5 filter $ex/rbc/JMW_510_37C/JMW_510_37C.pod5 --ids ex2_read_ids_3.txt --force-overwrite -o $od/1.pod5
 
-od=sample1/fast5_pass
-mkdir -p $od
-pod5 convert to_fast5 -f -o $od sample1/pod5_pass/*.pod5
-
-od=sample1/fast5_fail
-mkdir -p $od
-pod5 convert to_fast5 -f -o $od sample1/pod5_fail/*.pod5
-
-od=sample2/fast5_pass
-mkdir -p $od
-pod5 convert to_fast5 -f -o $od sample2/pod5_pass/*.pod5
-
-od=sample2/fast5_fail
-mkdir -p $od
-pod5 convert to_fast5 -f -o $od sample2/pod5_fail/*.pod5
-
 # make another "sample2" dataset to test merging multiple runs
 # don't duplicate sample2 reads to avoid throwing an error when merging
 # pod5s

diff --git a/config/README.md b/config/README.md
@@ -7,7 +7,7 @@ Provide a path to a TSV file which indicates the samples to process (e.g. `confi
 TSV file should have two columns containing the following information.
 
   - a unique id for the sample
-  - a path to the sequencing run folder which has `pod5_pass`, `pod5`, `pod5_fail`, `fast5_pass`, or `fast5_fail` subdirectories containing raw data.
+  - a path to the sequencing run folder with `pod5_pass` and `pod5_fail` subdirectories containing raw data.
 
 The pipeline will recursively search for pod5 or fast5 files to process within the specified directory and subdirectories 
 

diff --git a/config/config-base.yml b/config/config-base.yml
@@ -4,9 +4,6 @@
 # either a path to a basecalling model to use with dorado or a model selection name to specify model to download and use
 base_calling_model: "resources/models/[email protected]"
 
-# either FAST5 or POD5, if FAST5 then these files will be converted to pod5 before rebasecalling
-input_format: "POD5"
-
 # path to fasta file to use for bwa alignment.
 # a BWA index will be built if it does not exist for this fasta file
 fasta: "resources/ref/sacCer3-mature-tRNAs-dual-adapt-v2.fa"

diff --git a/workflow/rules/aatrnaseq.smk b/workflow/rules/aatrnaseq.smk
@@ -1,23 +1,17 @@
 
 rule merge_pods:
     """
-  merge all fast5/pod5s into a single pod5
+  merge pod5s into a single pod5
   """
     input:
         get_raw_inputs,
     output:
         os.path.join(rbc_outdir, "{sample}", "{sample}.pod5"),
     log:
         os.path.join(outdir, "logs", "merge_pods", "{sample}"),
-    params:
-        is_fast5=config["input_format"],
     shell:
         """
-    if [ "{params.is_fast5}" == "FAST5" ]; then
-      pod5 convert fast5 -f --output {output} {input}
-    else
       pod5 merge -f -o {output} {input}
-    fi
     """
 
 

diff --git a/workflow/rules/common.smk b/workflow/rules/common.smk
@@ -58,27 +58,15 @@ def report_metadata():
 
 def find_raw_inputs(sample_dict):
     """
-    parse through directories listed in samples.tsv and identify fast5 or pod5 files to process
+    parse through directories listed in samples.tsv and identify pod5 files to process
     store input files and uuid base file names in dictionary for each sample
     """
     POD5_DIRS = ["pod5_pass", "pod5_fail", "pod5"]
-    FAST5_DIRS = ["fast5_pass", "fast5_fail"]
-    fmt = config["input_format"]
-
-    data_subdirs = []
-    if fmt == "POD5":
-        data_subdirs = POD5_DIRS
-        ext = ".pod5"
-    elif fmt == "FAST5":
-        data_subdirs = FAST5_DIRS
-        ext = ".fast5"
-    else:
-        sys.exit("input_format config option must be either FAST5, or POD5")
 
     for sample, info in sample_dict.items():
         raw_fls = []
         for path in info["path"]:
-            for subdir in data_subdirs:
+            for subdir in POD5_DIRS:
                 data_path = os.path.join(path, subdir, "*" + ext)
                 fls = glob.glob(data_path)
                 raw_fls += fls