bump v1.3.8

benoukraflab · May 24, 2020 · 2e2030f · 2e2030f
1 parent 5065d65
commit 2e2030f
Show file tree

Hide file tree

Showing 5 changed files with 23 additions and 24 deletions.
diff --git a/CHANGELOG.txt b/CHANGELOG.txt
@@ -3,6 +3,10 @@ NanoVar Changelog
 
 Release Summary:
 
+Version 1.3.8 - May 24, 2020
+    * Fixed file type detection (Thanks to jiadong324, https://github.com/cytham/nanovar/issues/9#issuecomment-626579853)
+    * Fixed negative coordinates in VCF
+
 
 Version 1.3.7 - May 23, 2020
     * Changed version import approach in setup.py

diff --git a/README.md b/README.md
@@ -141,8 +141,9 @@ Although NanoVar is provided with a universal model and threshold score, instruc
 
 ## Limitations
 * The inaccurate basecalling of large homopolymer or low complexity DNA regions may result in the false determination of deletion SVs. We advise the use of up-to-date ONT basecallers such as Guppy to minimize this possibility.
-* For BND SVs, NanoVar cannot calculate the actual number of SV-opposing reads (normal reads) at the novel adjacency as there
- are two breakends from distant locations. Since it is not clear whether the novel adjacency is derived from both or either
-  breakends, it is not possible to know which breakend location(s) to consider for counting normal reads. Currently, NanoVar
-   approximates the normal read count by the minimum count from either breakend location. This would help to capture more true
-    BNDs but might also lower its precision.
+
+* For BND SVs, NanoVar is unable to calculate the actual number of SV-opposing reads (normal reads) at the novel adjacency as
+ there are two breakends from distant locations. It is not clear whether the novel adjacency is derived from both or either
+  breakends, in cases of balanced and unbalanced variants, and therefore its not possible to know which breakend location(s) to
+   consider for counting normal reads. Currently, NanoVar approximates the normal read count by the minimum count from either 
+   breakend location. Although this helps in capturing unbalanced BNDs, it might lead to some false positives.
diff --git a/nanovar/nanovar b/nanovar/nanovar
@@ -137,7 +137,7 @@ def main():
     filename = os.path.basename(file_path)
     read_suffix = ['.fa', '.fq', '.fasta', '.fastq', '.fa.gzip', '.fq.gzip', '.fa.gz', '.fq.gz', '.fasta.gz', '.fastq.gz']
     bam_suffix = '.bam'
-    if any(s in filename.lower() for s in read_suffix):
+    if any(filename.lower().endswith(s) for s in read_suffix):
         input_name = os.path.basename(file_path).rsplit('.f', 1)[0]
         input_type = 'raw'
         # Test gzip compression and validates read file
@@ -152,7 +152,7 @@ def main():
             raise Exception("Error: Input FASTQ/FASTA file is corrupted around line %s +/- 4" % str(fastx_check[1]))
         else:
             logging.debug("Input FASTQ/FASTA file passed")
-    elif bam_suffix in filename.lower():
+    elif filename.lower().endswith(bam_suffix):
         sam = pysam.AlignmentFile(file_path, "rb")
         try:
             assert sam.is_bam, "Error: Input BAM file is not a BAM file."

diff --git a/nanovar/nv_vcf.py b/nanovar/nv_vcf.py
@@ -80,12 +80,10 @@ def create_vcf(wk_dir, thres, nn_out, ref_path, read_path, read_name, blast_cmd,
             coord1 = int(tmpread[0].split('\t')[6].split('~')[1].split(':')[1].split('-')[0])
             coord2 = int(tmpread[0].split('\t')[6].split('~')[1].split(':')[1].split('-')[1])
             if coord2 - coord1 < minlen:
-                mid = (coord2 + coord1)/2
-                coord1 = int(mid - round(sv_len/2, 0))
-                coord2 = int(mid + round(sv_len/2, 0) - 1)
-                sv_len = '-' + str(sv_len)
-            else:
-                sv_len = '-' + str(coord2 - coord1)
+                mid = (coord2 + coord1) / 2
+                coord1 = max(1, int(mid - round(sv_len/2, 0)))
+                coord2 = int(mid + round(sv_len/2, 0) + 1)
+            sv_len = '-' + str(coord2 - coord1)
             out.append(str(chrm1) + '\t' + str(coord1) + '\t' + str(sv_id) + '\tN\t' + str(sv) + '\t' + str(phred) + '\t' +
                        filt + '\t' + 'SVTYPE=DEL;END=' + str(coord2) + ';SVLEN=' + str(sv_len) + ';SR=' + str(covl) + ';NN=' +
                        str(dnn) + '\tGT:DP:AD\t' + geno + ':' + dp + ':' + str(normcov) + ',' + str(covl))
@@ -95,11 +93,9 @@ def create_vcf(wk_dir, thres, nn_out, ref_path, read_path, read_name, blast_cmd,
             coord2 = int(tmpread[0].split('\t')[6].split('~')[1].split(':')[1].split('-')[1])
             if coord2 - coord1 < minlen:
                 mid = (coord2 + coord1) / 2
-                coord1 = int(mid - round(minlen / 2, 0))
-                coord2 = int(mid + round(minlen / 2, 0) - 1)
-                sv_len = str(minlen)
-            else:
-                sv_len = str(coord2 - coord1)
+                coord1 = max(1, int(mid - round(minlen / 2, 0)))
+                coord2 = int(mid + round(minlen / 2, 0) + 1)
+            sv_len = str(coord2 - coord1)
             out.append(str(chrm1) + '\t' + str(coord1) + '\t' + str(sv_id) + '\tN\t' + str(sv) + '\t' + str(phred) + '\t' +
                        filt + '\t' + 'SVTYPE=INV;END=' + str(coord2) + ';SVLEN=' + str(sv_len) + ';SR=' + str(covl) + ';NN=' +
                        str(dnn) + '\tGT:DP:AD\t' + geno + ':' + dp + ':' + str(normcov) + ',' + str(covl))
@@ -117,11 +113,9 @@ def create_vcf(wk_dir, thres, nn_out, ref_path, read_path, read_name, blast_cmd,
             coord2 = int(tmpread[0].split('\t')[6].split('~')[1].split(':')[1].split('-')[1])
             if coord2 - coord1 < minlen:
                 mid = (coord2 + coord1) / 2
-                coord1 = int(mid - round(minlen / 2, 0))
-                coord2 = int(mid + round(minlen / 2, 0) - 1)
-                sv_len = str(minlen)
-            else:
-                sv_len = str(coord2 - coord1)
+                coord1 = max(1, int(mid - round(minlen / 2, 0)))
+                coord2 = int(mid + round(minlen / 2, 0) + 1)
+            sv_len = str(coord2 - coord1)
             out.append(str(chrm1) + '\t' + str(coord1) + '\t' + str(sv_id) + '\tN\t' + str(sv) + '\t' + str(phred) + '\t' +
                        filt + '\t' + 'SVTYPE=DUP;END=' + str(coord2) + ';SVLEN=' + str(sv_len) + ';SR=' + str(covl) + ';NN=' +
                        str(dnn) + '\tGT:DP:AD\t' + geno + ':' + dp + ':' + str(normcov) + ',' + str(covl))

diff --git a/nanovar/version.py b/nanovar/version.py
@@ -1 +1 @@
-__version__ = "1.3.7"
+__version__ = "1.3.8"