Skip to content

Commit

Permalink
Modifications to return the number of clipped supporting reads either…
Browse files Browse the repository at this point in the history
… side of the breakpoint. Potentially use this as a filter.
  • Loading branch information
Thomas Keane committed May 9, 2016
1 parent 25b0d56 commit a1fb472
Show file tree
Hide file tree
Showing 2 changed files with 31 additions and 18 deletions.
34 changes: 20 additions & 14 deletions RetroSeq/Utilities.pm
Original file line number Diff line number Diff line change
Expand Up @@ -150,7 +150,7 @@ sub testBreakPoint
#test to see if lots of rp's either side
my $lhsFwdBlue = 0; my $lhsRevBlue = 0; my $rhsFwdBlue = 0; my $rhsRevBlue = 0;
my $lhsFwdGreen = 0; my $lhsRevGreen = 0; my $rhsFwdGreen = 0; my $rhsRevGreen = 0;
my $softClipSupporting = 0;
my $lhsSoftClipping=0; my $rhsSoftClipping = 0;

#store the last blue read before the b/point, and first blue read after the b/point
my $lastSupportingPos = 0;my $firstSupportingPos = 100000000000;
Expand All @@ -175,7 +175,7 @@ sub testBreakPoint
my $cmd = $cmdpre.($refPos-$lhsWindow).qq[-].($refPos+$rhsWindow).qq[ | ].(defined($ignoreRGs) ? qq[ grep -v -f $ignoreRGs |] : qq[]);
open( my $tfh, $cmd ) or die $!;
my $totalLFwd = 0; my $totalRRev = 0;my $totalSup = 0;
my @lhsFwd;my @lhsRev;my @rhsFwd;my @rhsRev;my @soft;
my @lhsFwd;my @lhsRev;my @rhsFwd;my @rhsRev;my $lhsSoft=0; my $rhsSoft=0;
my $totalSpanningRPs = 0;my %spanningFrags;
while( my $sam = <$tfh> )
{
Expand All @@ -187,12 +187,12 @@ sub testBreakPoint
{
if( ( $s[ 1 ] & $$BAMFLAGS{'reverse_strand'} ) ) #rev strand
{
if( $s[ 5 ] =~ /^[0-9]{2}+S/ ){$rhsRevBlue++;push(@rhsRev, $s[0]);}
if( $s[ 5 ] =~ /^[0-9]{2}+S/ ){$rhsSoft++;}
elsif( $s[ 3 ] < $refPos ){$lhsRevBlue++;push(@lhsRev, $s[0]);}else{$rhsRevBlue++;$firstSupportingPos = $s[ 3 ] if( $s[ 3 ] < $firstSupportingPos );push(@rhsRev, $s[0]);}
}
else
{
if( $s[ 5 ] =~ /[0-9]{2}+S$/ ){$lhsFwdBlue++;push(@lhsFwd, $s[0]);}
if( $s[ 5 ] =~ /[0-9]{2}+S$/ ){$lhsSoft++;}
elsif( $s[ 3 ] < $refPos ){$lhsFwdBlue++;$lastSupportingPos = $s[ 3 ] + length( $s[ 9 ] ) if( ( $s[ 3 ] + length( $s[ 9 ] ) ) > $lastSupportingPos );push(@lhsFwd, $s[0]);}else{$rhsFwdBlue++;push(@rhsFwd, $s[0]);}
}
}
Expand All @@ -201,12 +201,12 @@ sub testBreakPoint
{
if( $s[ 1 ] & $$BAMFLAGS{'reverse_strand'} ) #rev strand
{
if( $s[ 5 ] =~ /^[0-9]{2}+S/ ){$rhsRevGreen++;push(@rhsRev, $s[0]);}
if( $s[ 5 ] =~ /^[0-9]{2}+S/ ){$rhsSoft++;}
elsif( $s[ 3 ] < $refPos ){$lhsRevGreen++;push(@lhsRev, $s[0]);}else{$rhsRevGreen++;$firstSupportingPos = $s[ 3 ] if( $s[ 3 ] < $firstSupportingPos );push(@rhsRev, $s[0]);}
}
else
{
if( $s[ 5 ] =~ /[0-9]{2}+S$/ ){$lhsFwdGreen++;push(@lhsFwd, $s[0]);}
if( $s[ 5 ] =~ /[0-9]{2}+S$/ ){$lhsSoft++;}
elsif( $s[ 3 ] < $refPos ){$lhsFwdGreen++;$lastSupportingPos = $s[ 3 ] + length( $s[ 9 ] ) if( ( $s[ 3 ] + length( $s[ 9 ] ) ) > $lastSupportingPos );push(@lhsFwd, $s[0]);}else{$rhsFwdGreen++;push(@rhsFwd, $s[0]);}
}
}
Expand Down Expand Up @@ -236,7 +236,7 @@ sub testBreakPoint

#if it appears to be an inversion breakpoint
my $lhsRev = $lhsRevGreen + $lhsRevBlue;my $rhsRev = $rhsRevGreen + $rhsRevBlue;my $lhsFwd = $lhsFwdGreen + $lhsFwdBlue;my $rhsFwd = $rhsFwdGreen + $rhsFwdBlue;
my $totalSupporting = ( $lhsFwd + $rhsRev + $softClipSupporting );
my $totalSupporting = ( $lhsFwd + $rhsRev + $rhsSoft + $lhsSoft );
my $callString = qq[$chr\t$refPos\t].($refPos+1).qq[\t$originalCallA[ 3 ]\t$totalSupporting];
print qq[No same orientation: $numSameOrientation\n];
if( $numSameOrientation > $totalSupporting )
Expand All @@ -258,33 +258,33 @@ sub testBreakPoint

if( $totalSupporting < $minReads )
{
return [$NOT_ENOUGH_READS_CLUSTER, $callString, 0, $totalSupporting, $totalSpanningRPs];
return [$NOT_ENOUGH_READS_CLUSTER, $callString, 0, $totalSupporting, $totalSpanningRPs, $lhsSoft, $rhsSoft];
}
elsif( $lhsFwd < ($minReads/2) || $rhsRev < ($minReads/2) )
{
print qq[Code: $NOT_ENOUGH_READS_FLANKS\t$lhsFwd\t$rhsRev\n];
return [$NOT_ENOUGH_READS_FLANKS, $callString, 0, $totalSupporting, $totalSpanningRPs];
return [$NOT_ENOUGH_READS_FLANKS, $callString, 0, $totalSupporting, $totalSpanningRPs, $lhsSoft, $rhsSoft];
}
elsif( ! $lhsRatioPass && ! $rhsRatioPass )
{
return [$NEITHER_SIDE_RATIO_PASSES, $callString, 0, $totalSupporting, $totalSpanningRPs];
return [$NEITHER_SIDE_RATIO_PASSES, $callString, 0, $totalSupporting, $totalSpanningRPs, $lhsSoft, $rhsSoft];
}
elsif( ! ( $lhsRatioPass && $rhsRatioPass ) )
{
return [$ONE_SIDED_RATIO_PASSES, $callString, 0, $totalSupporting, $totalSpanningRPs];
return [$ONE_SIDED_RATIO_PASSES, $callString, 0, $totalSupporting, $totalSpanningRPs, $lhsSoft, $rhsSoft];
}
elsif( $dist > 220 )
{
print qq[Distance between 5' and 3' clusters: $dist\n];
return [$DISTANCE_THRESHOLD, $callString, 0, $totalSupporting, $totalSpanningRPs ];
return [$DISTANCE_THRESHOLD, $callString, 0, $totalSupporting, $totalSpanningRPs, $lhsSoft, $rhsSoft];
}
else
{
my $ratio = ( $lhsRev + $rhsFwd ) / ( $lhsFwd + $rhsRev );
return [$PASS, $callString, $ratio, $totalSupporting, $totalSpanningRPs];
return [$PASS, $callString, $ratio, $totalSupporting, $totalSpanningRPs, $lhsSoft, $rhsSoft];
}

return [$UNKNOWN_FAIL, $callString, 10000, $totalSpanningRPs];
return [$UNKNOWN_FAIL, $callString, 10000, $totalSpanningRPs, $lhsSoft, $rhsSoft];
}
}

Expand Down Expand Up @@ -821,6 +821,12 @@ sub getVcfHeader
##FORMAT=<ID=SP,Number=1,Type=Float,Description="Number of read pairs spanning breakpoint, useful for estimation of size of insertion">
$vcf_out->add_header_line( {key=>'FORMAT', ID=>'SP', Number=>'1', Type=>'Float', Description=>'Number of correctly mapped read pairs spanning breakpoint, useful for estimation of size of insertion'} );

##FORMAT=<ID=CLIP3,Number=1,Type=Float,Description="Number of soft clipped reads downstream of the breakpoint">
$vcf_out->add_header_line( {key=>'FORMAT', ID=>'CLIP3', Number=>'1', Type=>'Float', Description=>'Number of soft clipped reads downstream of the breakpoint'} );

##FORMAT=<ID=CLIP5,Number=1,Type=Float,Description="Number of soft clipped reads uptream of the breakpoint">
$vcf_out->add_header_line( {key=>'FORMAT', ID=>'CLIP5', Number=>'1', Type=>'Float', Description=>'Number of soft clipped reads upstream of the breakpoint'} );

##FORMAT=<ID=GQ,Number=1,Type=Float,Description="Genotype quality">
$vcf_out->add_header_line( {key=>'FORMAT', ID=>'FL', Number=>'1', Type=>'Integer', Description=>'Call Status - for reference calls a flag to say if the call failed a particular filter. Filters are ordered by priority in calling (higher number indicates closer to being called). 1 - depth too high in region, 2 - not enough reads in cluster, 3 - not enough total flanking reads, 4 - not enough inconsistently mapped reads, 5 - neither side passes ratio test, 6 - one side passes ratio test, 7 - distance too large at breakpoint, 8 - PASSED all filters'} );

Expand Down
15 changes: 11 additions & 4 deletions bin/retroseq.pl
Original file line number Diff line number Diff line change
Expand Up @@ -572,10 +572,11 @@ sub _findInsertions
my ($chr, $start, $end) = (undef, undef, undef);
if( defined( $region ) )
{
if( $region =~ /^([A-Za-z0-9]+):([0-9]+)-([0-9]+)$/ )
if( $region =~ /^([A-Za-z0-9]+):([0-9,]+)-([0-9,]+)$/ )
{
$chr = $1;$start=$2;$end=$3;
print qq[Restricting calling to region: $region\n];
print qq[Restricting calling to region: $chr:$start-$end\n];
die qq[Start co-ordinate is larger than end co-ordinate] if($start>$end);
}
else{$chr = $region;print qq[Restricting calling to Chr: $chr\n];}
}
Expand Down Expand Up @@ -926,6 +927,8 @@ sub _filterCallsBedMinima
my $call = $result->[1];
my $ratio = $result->[2];
my $spanningRPs = $result->[4];
my $lhsSoft = $result->[5];
my $rhsSoft = $result->[6];

if( $flag == $RetroSeq::Utilities::INV_BREAKPOINT )
{
Expand All @@ -935,7 +938,7 @@ sub _filterCallsBedMinima
{
#note to self - disabled the hom/het decision code (needs to be rewritten)
print qq[Breakpoint score: $ratio Flag: $flag\n];
print $homsfh $call.qq[\t$flag\t$spanningRPs\n];
print $homsfh $call.qq[\t$flag\t$spanningRPs\t$lhsSoft\t$rhsSoft\n];
}
else{print qq[Discarding: $call : $flag\n];}
}
Expand Down Expand Up @@ -1098,6 +1101,8 @@ sub _outputCalls
my $ci2 = $s[ 2 ] - $pos;
my $flag = $s[ 5 ];
my $spanning = $s[ 6 ];
my $lhsSoft = $s[ 7 ];
my $rhsSoft = $s[ 8 ];

print $bfh qq[$s[0]\t$pos\t].($pos+1).qq[\t$type==$sample\t$s[4]\tNA\n];

Expand All @@ -1109,12 +1114,14 @@ sub _outputCalls
$out{REF} = $refbase;
$out{QUAL} = $s[ 4 ];
$out{INFO} = { SVTYPE=>'INS', NOT_VALIDATED=>undef, MEINFO=>qq[$s[3],$s[1],$s[2],NA] };
$out{FORMAT} = ['GT', 'GQ', 'FL', 'SP'];
$out{FORMAT} = ['GT', 'GQ', 'FL', 'SP', 'CLIP5' , 'CLIP3'];

$out{gtypes}{$sample}{GT} = qq[<INS:ME>/<INS:ME>];
$out{gtypes}{$sample}{GQ} = qq[$s[4]];
$out{gtypes}{$sample}{FL} = $flag;
$out{gtypes}{$sample}{SP} = $spanning;
$out{gtypes}{$sample}{CLIP5} = $lhsSoft;
$out{gtypes}{$sample}{CLIP3} = $rhsSoft;

$vcf_out->format_genotype_strings(\%out);
print $vfh $vcf_out->format_line(\%out);
Expand Down

0 comments on commit a1fb472

Please sign in to comment.