Skip to content

Commit

Permalink
bump v1.3.8
Browse files Browse the repository at this point in the history
  • Loading branch information
cytham committed May 24, 2020
1 parent 5065d65 commit 2e2030f
Show file tree
Hide file tree
Showing 5 changed files with 23 additions and 24 deletions.
4 changes: 4 additions & 0 deletions CHANGELOG.txt
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,10 @@ NanoVar Changelog

Release Summary:

Version 1.3.8 - May 24, 2020
* Fixed file type detection (Thanks to jiadong324, https://github.com/cytham/nanovar/issues/9#issuecomment-626579853)
* Fixed negative coordinates in VCF


Version 1.3.7 - May 23, 2020
* Changed version import approach in setup.py
Expand Down
11 changes: 6 additions & 5 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -141,8 +141,9 @@ Although NanoVar is provided with a universal model and threshold score, instruc

## Limitations
* The inaccurate basecalling of large homopolymer or low complexity DNA regions may result in the false determination of deletion SVs. We advise the use of up-to-date ONT basecallers such as Guppy to minimize this possibility.
* For BND SVs, NanoVar cannot calculate the actual number of SV-opposing reads (normal reads) at the novel adjacency as there
are two breakends from distant locations. Since it is not clear whether the novel adjacency is derived from both or either
breakends, it is not possible to know which breakend location(s) to consider for counting normal reads. Currently, NanoVar
approximates the normal read count by the minimum count from either breakend location. This would help to capture more true
BNDs but might also lower its precision.

* For BND SVs, NanoVar is unable to calculate the actual number of SV-opposing reads (normal reads) at the novel adjacency as
there are two breakends from distant locations. It is not clear whether the novel adjacency is derived from both or either
breakends, in cases of balanced and unbalanced variants, and therefore its not possible to know which breakend location(s) to
consider for counting normal reads. Currently, NanoVar approximates the normal read count by the minimum count from either
breakend location. Although this helps in capturing unbalanced BNDs, it might lead to some false positives.
4 changes: 2 additions & 2 deletions nanovar/nanovar
Original file line number Diff line number Diff line change
Expand Up @@ -137,7 +137,7 @@ def main():
filename = os.path.basename(file_path)
read_suffix = ['.fa', '.fq', '.fasta', '.fastq', '.fa.gzip', '.fq.gzip', '.fa.gz', '.fq.gz', '.fasta.gz', '.fastq.gz']
bam_suffix = '.bam'
if any(s in filename.lower() for s in read_suffix):
if any(filename.lower().endswith(s) for s in read_suffix):
input_name = os.path.basename(file_path).rsplit('.f', 1)[0]
input_type = 'raw'
# Test gzip compression and validates read file
Expand All @@ -152,7 +152,7 @@ def main():
raise Exception("Error: Input FASTQ/FASTA file is corrupted around line %s +/- 4" % str(fastx_check[1]))
else:
logging.debug("Input FASTQ/FASTA file passed")
elif bam_suffix in filename.lower():
elif filename.lower().endswith(bam_suffix):
sam = pysam.AlignmentFile(file_path, "rb")
try:
assert sam.is_bam, "Error: Input BAM file is not a BAM file."
Expand Down
26 changes: 10 additions & 16 deletions nanovar/nv_vcf.py
Original file line number Diff line number Diff line change
Expand Up @@ -80,12 +80,10 @@ def create_vcf(wk_dir, thres, nn_out, ref_path, read_path, read_name, blast_cmd,
coord1 = int(tmpread[0].split('\t')[6].split('~')[1].split(':')[1].split('-')[0])
coord2 = int(tmpread[0].split('\t')[6].split('~')[1].split(':')[1].split('-')[1])
if coord2 - coord1 < minlen:
mid = (coord2 + coord1)/2
coord1 = int(mid - round(sv_len/2, 0))
coord2 = int(mid + round(sv_len/2, 0) - 1)
sv_len = '-' + str(sv_len)
else:
sv_len = '-' + str(coord2 - coord1)
mid = (coord2 + coord1) / 2
coord1 = max(1, int(mid - round(sv_len/2, 0)))
coord2 = int(mid + round(sv_len/2, 0) + 1)
sv_len = '-' + str(coord2 - coord1)
out.append(str(chrm1) + '\t' + str(coord1) + '\t' + str(sv_id) + '\tN\t' + str(sv) + '\t' + str(phred) + '\t' +
filt + '\t' + 'SVTYPE=DEL;END=' + str(coord2) + ';SVLEN=' + str(sv_len) + ';SR=' + str(covl) + ';NN=' +
str(dnn) + '\tGT:DP:AD\t' + geno + ':' + dp + ':' + str(normcov) + ',' + str(covl))
Expand All @@ -95,11 +93,9 @@ def create_vcf(wk_dir, thres, nn_out, ref_path, read_path, read_name, blast_cmd,
coord2 = int(tmpread[0].split('\t')[6].split('~')[1].split(':')[1].split('-')[1])
if coord2 - coord1 < minlen:
mid = (coord2 + coord1) / 2
coord1 = int(mid - round(minlen / 2, 0))
coord2 = int(mid + round(minlen / 2, 0) - 1)
sv_len = str(minlen)
else:
sv_len = str(coord2 - coord1)
coord1 = max(1, int(mid - round(minlen / 2, 0)))
coord2 = int(mid + round(minlen / 2, 0) + 1)
sv_len = str(coord2 - coord1)
out.append(str(chrm1) + '\t' + str(coord1) + '\t' + str(sv_id) + '\tN\t' + str(sv) + '\t' + str(phred) + '\t' +
filt + '\t' + 'SVTYPE=INV;END=' + str(coord2) + ';SVLEN=' + str(sv_len) + ';SR=' + str(covl) + ';NN=' +
str(dnn) + '\tGT:DP:AD\t' + geno + ':' + dp + ':' + str(normcov) + ',' + str(covl))
Expand All @@ -117,11 +113,9 @@ def create_vcf(wk_dir, thres, nn_out, ref_path, read_path, read_name, blast_cmd,
coord2 = int(tmpread[0].split('\t')[6].split('~')[1].split(':')[1].split('-')[1])
if coord2 - coord1 < minlen:
mid = (coord2 + coord1) / 2
coord1 = int(mid - round(minlen / 2, 0))
coord2 = int(mid + round(minlen / 2, 0) - 1)
sv_len = str(minlen)
else:
sv_len = str(coord2 - coord1)
coord1 = max(1, int(mid - round(minlen / 2, 0)))
coord2 = int(mid + round(minlen / 2, 0) + 1)
sv_len = str(coord2 - coord1)
out.append(str(chrm1) + '\t' + str(coord1) + '\t' + str(sv_id) + '\tN\t' + str(sv) + '\t' + str(phred) + '\t' +
filt + '\t' + 'SVTYPE=DUP;END=' + str(coord2) + ';SVLEN=' + str(sv_len) + ';SR=' + str(covl) + ';NN=' +
str(dnn) + '\tGT:DP:AD\t' + geno + ':' + dp + ':' + str(normcov) + ',' + str(covl))
Expand Down
2 changes: 1 addition & 1 deletion nanovar/version.py
Original file line number Diff line number Diff line change
@@ -1 +1 @@
__version__ = "1.3.7"
__version__ = "1.3.8"

0 comments on commit 2e2030f

Please sign in to comment.