Merge pull request #56 from eastgenomics/v1.5.2

V1.5.2
eastgenomics · Oct 26, 2021 · b37e226 · b37e226
2 parents 0086783 + ab647bd
commit b37e226
Show file tree

Hide file tree

Showing 4 changed files with 20 additions and 16 deletions.
diff --git a/Readme.md b/Readme.md
@@ -26,7 +26,7 @@ Inputs:
 
 Example:
 ```
-dx run vcf2xls_nirvana_v1.3.0 -iannotated_vcf=X210333_markdup_recalibrated_Haplotyper.refseq_nirvana_2010.annotated.vcf -iraw_vcf="X210333_markdup_recalibrated_Haplotyper.vcf.gz" -isample_coverage_file=X210333_markdup.nirvana_2010_5bp.gz -isample_coverage_index=X210333_markdup.nirvana_2010_5bp.gz.tbi -iflagstat_file=X210333_markdup.flagstat -igenepanels=001_Reference:/dynamic_files/gene_panels/gemini_panels_200522 -ibioinformatic_manifest=001_Reference:/dynamic_files/BioinformaticManifest/BioinformaticManifest_200819 -iexons_nirvana=001_Reference:/annotation/b37/exons_nirvana2010_no_PAR_Y.tsv -inirvana_genes2transcripts=001_Reference:/dynamic_files/nirvana_genes2transcripts/nirvana_genes2transcripts_2010_200728
+dx run app-vcf2xls_nirvana/1.5.2 -iannotated_vcf=X210333_markdup_recalibrated_Haplotyper.refseq_nirvana_2010.annotated.vcf -iraw_vcf="X210333_markdup_recalibrated_Haplotyper.vcf.gz" -isample_coverage_file=X210333_markdup.nirvana_2010_5bp.gz -isample_coverage_index=X210333_markdup.nirvana_2010_5bp.gz.tbi -iflagstat_file=X210333_markdup.flagstat -igenepanels=001_Reference:/dynamic_files/gene_panels/gemini_panels_200522 -ibioinformatic_manifest=001_Reference:/dynamic_files/BioinformaticManifest/BioinformaticManifest_200819 -iexons_nirvana=001_Reference:/annotation/b37/exons_nirvana2010_no_PAR_Y.tsv -inirvana_genes2transcripts=001_Reference:/dynamic_files/nirvana_genes2transcripts/nirvana_genes2transcripts_2010_200728
 ```
 
 ## What does this app output?

diff --git a/dxapp.json b/dxapp.json
@@ -1,11 +1,12 @@
 {
-  "name": "vcf2xls_nirvana_v1.5.1",
-  "title": "vcf2xls_nirvana_v1.5.1",
-  "summary": "vcf2xls_nirvana adapated for DNAnexus: no queries for Genetics Ark, uses Python STDOUT to replace essential queries for creating QC sheets",
+  "name": "vcf2xls_nirvana",
+  "title": "Vcf2xls nirvana",
+  "summary": "Create xls from Nirvana annotated vcf",
   "dxapi": "1.0.0",
-  "version": "1.5.1",
+  "version": "1.5.2",
+  "authorizedUsers": ["org-emee_1"],
   "properties": {
-    "githubRelease": "v1.5.1"
+    "githubRelease": "v1.5.2"
   },
   "inputSpec": [
     {

diff --git a/resources/home/dnanexus/vcf2xls_nirvana.pl b/resources/home/dnanexus/vcf2xls_nirvana.pl
@@ -27,7 +27,7 @@ BEGIN
 use Vcf;
 use Getopt::Std;
 
-my $opts = 'p:a:v:u:T:w:i:c:h';
+my $opts = 'p:a:s:v:u:T:w:i:c:h';
 my %opts;
 getopts($opts, \%opts);
 
@@ -95,6 +95,7 @@ BEGIN
 # }
 
 my $vcf_file = $opts{a} || shift || usage();
+my $sliced_vcf = $opts{s};
 my $raw_vcf_file = $opts{v};
 usage() if ( $opts{ 'h' });
 
@@ -103,14 +104,17 @@ BEGIN
 my $sample = find_sample_name( $vcf_file );
 
 $sample =~ s/_.*//;
+# match the X number bit at the beginning of the sample id extracted
+$sample =~ m/^[XGC][0-9]+/;
 
 my %gene_list;
 my %hotspots;
 
+# $& corresponds to matching bit of the regex
 if ( $opts{ 'p' } ) {
-  %gene_list = parameter_panels2genes($opts{ 'p' }, $sample);
+  %gene_list = parameter_panels2genes($opts{ 'p' }, $&);
 } else {
-  %gene_list = readin_manifest( $manifest, $sample);
+  %gene_list = readin_manifest( $manifest, $&);
 }
 
 die "No genes for $sample\n" if ( keys %gene_list == 0 );
@@ -155,7 +159,7 @@ BEGIN
 $meta_stats{ 'PANEL'} = $gene_list{ 'PANEL'};
 $meta_stats{ 'PANEL_IDS'} = $gene_list{ 'PANEL_IDS'};
 
-analyse_vcf_file( $vcf_file );
+analyse_vcf_file( $sliced_vcf );
 
 print "Filling summary sheet\n";
 fill_summary_sheet();

diff --git a/src/vcf2xls_nirvana.sh b/src/vcf2xls_nirvana.sh
@@ -78,14 +78,11 @@ main() {
         echo $panel_bed_name
 
         # If panel bed is provided, filter the vcf
-        bedtools intersect -header -a inputs/$annotated_vcf_name -b inputs/$panel_bed_name > inputs/filtered_annotated_vcf
-        bedtools intersect -header -a inputs/$raw_vcf_name -b inputs/$panel_bed_name > inputs/filtered_raw_vcf
-
-        mv inputs/filtered_annotated_vcf inputs/$annotated_vcf_name
-        mv inputs/filtered_raw_vcf inputs/$raw_vcf_name
-
+        bedtools intersect -header -a inputs/$annotated_vcf_name -b inputs/$panel_bed_name > inputs/sliced_annotated_vcf
     else
+        # Create sliced annotated vcf to be the same as the annotated vcf if the bed is not provided
         echo "VCF not filtered as panel bed not provided"
+        cp inputs/$annotated_vcf_name inputs/sliced_annotated_vcf
     fi
 
     # Boolean to detect if workflow id has been found
@@ -165,6 +162,7 @@ main() {
     if [ -z ${list_panel_names_genes+x} ]; then
         perl vcf2xls_nirvana.pl \
             -a inputs/$annotated_vcf_name \
+            -s inputs/sliced_annotated_vcf \
             -v inputs/$raw_vcf_name \
             -c inputs/$sample_coverage_file_name \
             -u $nb_usable_reads \
@@ -175,6 +173,7 @@ main() {
         perl vcf2xls_nirvana.pl \
             -p "$list_panel_names_genes" \
             -a inputs/$annotated_vcf_name \
+            -s inputs/sliced_annotated_vcf \
             -v inputs/$raw_vcf_name \
             -c inputs/$sample_coverage_file_name \
             -u $nb_usable_reads \