From cd99ea84f04ee98fd13e857a87dcc17f726c648f Mon Sep 17 00:00:00 2001 From: naumenko-sa Date: Mon, 20 Aug 2018 13:03:38 -0400 Subject: [PATCH] new file: data/test.vcf --- data/test.vcf | 434 ++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 434 insertions(+) create mode 100644 data/test.vcf diff --git a/data/test.vcf b/data/test.vcf new file mode 100644 index 0000000..6a4e91f --- /dev/null +++ b/data/test.vcf @@ -0,0 +1,434 @@ +##fileformat=VCFv4.2 +##FILTER= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##FILTER= 0.5 && (DP < 4 && %QUAL < 50))"> +##FILTER= 200.0 || SOR > 10.0 || (QD < 10.0 && AD[0:1] / (AD[0:1] + AD[0:0]) < 0.25 && ReadPosRankSum < 0.0))"> +##FILTER= 60.0 || (QD < 10.0 && AD[0:1] / (AD[0:1] + AD[0:0]) < 0.25 && ReadPosRankSum < 0.0) || MQ < 30.0)"> +##FILTER= +##FILTER= +##FILTER= +##FILTER= +##FILTER= 0.5 && TC < 4 && %QUAL < 50)"> +##FILTER= +##FILTER= +##FILTER= +##FILTER= +##FILTER= +##FILTER= +##FILTER= +##FILTER= +##FILTER= 0.5 && DP < 4 && %QUAL < 50)"> +##FILTER= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##FORMAT= +##FORMAT= +##FORMAT= +##FORMAT= +##FORMAT= +##FORMAT= +##FORMAT= +##FORMAT= +##FORMAT= +##FORMAT= +##FORMAT= +##FORMAT= +##FORMAT= +##FORMAT= +##ALT= +##GATKCommandLine.HaplotypeCaller= +##LoF=Loss-of-function annotation (HC = High Confidence; LC = Low Confidence) +##LoF_filter=Reason for LoF not being HC +##LoF_flags=Possible warning flags for LoF +##LoF_info=Info used for LoF annotation +##MaxEntScan_alt=MaxEntScan alternate sequence score +##MaxEntScan_diff=MaxEntScan score difference +##MaxEntScan_ref=MaxEntScan reference sequence score +##SpliceRegion=SpliceRegion predictions +##bcftools_annotateCommand=annotate -x INFO/CSQ; Date=Fri Aug 3 23:31:36 2018 +##bcftools_annotateVersion=1.7+htslib-1.7 +##bcftools_callCommand=call -v -m -; Date=Thu Aug 2 00:01:03 2018 +##bcftools_callVersion=1.7+htslib-1.7 +##bcftools_filterCommand=filter -O v --soft-filter GATKCutoffIndel -e 'TYPE="indel" && (ReadPosRankSum < -20.0 || QD < 2.0 || FS > 200.0 || SOR > 10.0 || (QD < 10.0 && AD[0:1] / (AD[0:1] + AD[0:0]) < 0.25 && ReadPosRankSum < 0.0))' -m + /hpf/largeprojects/ccmbio/naumenko/project_cheo/1171R/work/gatk-haplotype/1171R-vepeffects-annotated-filterSNP.vcf.gz; Date=Fri Aug 3 22:57:22 2018 +##bcftools_filterVersion=1.7+htslib-1.7 +##bcftools_viewCommand=view -f PASS,. -O z -o /hpf/largeprojects/ccmbio/naumenko/project_cheo/1171R/work/bcbiotx/tmpkxC5NW/1171R-ensemble-work/txtmp4004832469508254059/1171R-vepeffects-annotated-filterSNP-filterINDEL-noeff-nomultiallelic-nofilter.vcf.gz /hpf/largeprojects/ccmbio/naumenko/project_cheo/1171R/work/gatk-haplotype/1171R-vepeffects-annotated-filterSNP-filterINDEL-noeff-nomultiallelic.vcf.gz; Date=Fri Aug 3 23:32:49 2018 +##bcftools_viewVersion=1.7+htslib-1.7 +##commandline="/hpf/largeprojects/ccmbio/naumenko/tools/bcbio/anaconda/bin/freebayes -f /hpf/largeprojects/ccmbio/naumenko/tools/bcbio/genomes/Hsapiens/GRCh37/seq/GRCh37.fa --genotype-qualities --strict-vcf --ploidy 2 --targets /hpf/largeprojects/ccmbio/naumenko/project_cheo/1171R/work/freebayes/1/1171R-1_0_15509579-regions.bed --min-repeat-entropy 1 --no-partial-observations -b /hpf/largeprojects/ccmbio/naumenko/project_cheo/1171R/work/prealign/1171R_595742/1171R_595742-prealign.bam" +##fileDate=2018-08-02 +##phasing=none +##platypusOptions={'assemblyRegionSize': 1500, 'trimReadFlank': 0, 'assembleBadReads': 1, 'bamFiles': ['/hpf/largeprojects/ccmbio/naumenko/project_cheo/1171R/work/prealign/1171R_595742/1171R_595742-prealign.bam'], 'minVarDist': 9, 'trimSoftClipped': 1, 'minReads': 2, 'qualBinSize': 1, 'refFile': '/hpf/largeprojects/ccmbio/naumenko/tools/bcbio/genomes/Hsapiens/GRCh37/seq/GRCh37.fa', 'maxHaplotypes': 50, 'filterVarsByCoverage': 1, 'maxSize': 1500, 'originalMaxHaplotypes': 50, 'skipDifficultWindows': 0, 'parseNCBI': 0, 'skipRegionsFile': None, 'noCycles': 0, 'trimAdapter': 1, 'minPosterior': 5, 'assembleAll': 1, 'trimOverlapping': 1, 'filterDuplicates': 0, 'abThreshold': 0.001, 'minFlank': 10, 'bufferSize': 100000, 'fileCaching': 0, 'useEMLikelihoods': 0, 'coverageSamplingLevel': 30, 'calculateFlankScore': 0, 'logFileName': '/dev/null', 'nCPU': 1, 'filterReadsWithUnmappedMates': 1, 'qdThreshold': 10, 'maxVariants': 8, 'scThreshold': 0.95, 'filterReadsWithDistantMates': 1, 'maxReads': 5000000, 'badReadsWindow': 11, 'genIndels': 1, 'largeWindows': 0, 'minMapQual': 20, 'maxVarDist': 15, 'maxGOF': 30, 'rlen': 150, 'minGoodQualBases': 20, 'refCallBlockSize': 1000, 'countOnlyExactIndelMatches': 0, 'longHaps': 0, 'HLATyping': 0, 'filterReadPairsWithSmallInserts': 1, 'minBaseQual': 20, 'getVariantsFromBAMs': 1, 'genSNPs': 1, 'assemble': 0, 'assemblerKmerSize': 15, 'minVarFreq': 0.05, 'alignScoreFile': '', 'verbosity': 1, 'sourceFile': None, 'compressReads': 0, 'rmsmqThreshold': 40, 'filteredReadsFrac': 0.7, 'outputRefCalls': 0, 'badReadsThreshold': 15, 'hapScoreThreshold': 4, 'regions': ['/hpf/largeprojects/ccmbio/naumenko/project_cheo/1171R/work/platypus/1/1171R-1_0_15509579-regions.bed'], 'sbThreshold': 0.001, 'output': '-', 'assembleBrokenPairs': 0, 'mergeClusteredVariants': 1, 'maxGenotypes': 1275, 'nInd': 1} +##reference=file:///hpf/largeprojects/ccmbio/naumenko/tools/bcbio/genomes/Hsapiens/GRCh37/seq/GRCh37.fa +##samtoolsCommand=samtools mpileup -f /hpf/largeprojects/ccmbio/naumenko/tools/bcbio/genomes/Hsapiens/GRCh37/seq/GRCh37.fa -t DP -u -g -l /hpf/largeprojects/ccmbio/naumenko/project_cheo/1171R/work/samtools/1/1171R-1_0_15509579-regions.bed /hpf/largeprojects/ccmbio/naumenko/project_cheo/1171R/work/prealign/1171R_595742/1171R_595742-prealign.bam +##samtoolsVersion=1.7+htslib-1.7 +##source=Platypus_Version_0.8.1.1 +##bcftools_viewCommand=view -f PASS,.; Date=Sun Aug 5 20:18:16 2018 +##bcftools_viewCommand=view -f PASS,.; Date=Thu Aug 16 23:29:37 2018 +##VEP="v92" time="2018-08-16 23:31:46" cache="/hpf/largeprojects/ccmbio/naumenko/tools/bcbio/genomes/Hsapiens/GRCh37/vep/homo_sapiens_merged/92_GRCh37" ensembl-variation=92.77a06cf ensembl=92.98e8548 ensembl-funcgen=92.cd2ca86 ensembl-io=92.39280bd 1000genomes="phase3" COSMIC="81" ClinVar="201706" ESP="20141103" HGMD-PUBLIC="20164" assembly="GRCh37.p13" dbSNP="150" gencode="GENCODE 19" genebuild="2011-04" gnomAD="170228" polyphen="2.2.2" refseq="01_2015" regbuild="1.0" sift="sift5.2.2" +##bcftools_viewCommand=view 1171R-ensemble-annotated-decomposed.vcf.gz 1:14464; Date=Mon Aug 20 13:02:28 2018 +#CHROM POS ID REF ALT QUAL FILTER INFO FORMAT 1171R_595742 +1 14464 rs546169444 A T 72.8 PASS AC=1;AF=0.5;AN=2;BaseQRankSum=0.674;CALLERS=gatk-haplotype,samtools,freebayes;ClippingRankSum=0;DP=4;ExcessHet=3.0103;FS=0;MLEAC=1;MLEAF=0.5;MQ=34.53;MQ0=0;MQRankSum=0.319;QD=18.2;ReadPosRankSum=-0.319;SOR=1.609;CSQ=T|non_coding_transcript_exon_variant|MODIFIER|WASH7P|ENSG00000227232|Transcript|ENST00000423562|unprocessed_pseudogene|10/10||ENST00000423562.1:n.1568T>A||1568/1669|||||rs546169444|1||-1||SNV|HGNC|38034||||||||||Ensembl|A|A|||||||0.0958|0.0144|0.1138|0.005|0.1859|0.1943||||||||||||0.1943|SAS||||||||||||||||,T|non_coding_transcript_exon_variant|MODIFIER|WASH7P|ENSG00000227232|Transcript|ENST00000438504|unprocessed_pseudogene|12/12||ENST00000438504.2:n.1682T>A||1682/1783|||||rs546169444|1||-1||SNV|HGNC|38034|YES|||||||||Ensembl|A|A|||||||0.0958|0.0144|0.1138|0.005|0.1859|0.1943||||||||||||0.1943|SAS||||||||||||||||,T|downstream_gene_variant|MODIFIER|DDX11L1|ENSG00000223972|Transcript|ENST00000450305|transcribed_unprocessed_pseudogene||||||||||rs546169444|1|794|1||SNV|HGNC|37102||||||||||Ensembl|A|A|||||||0.0958|0.0144|0.1138|0.005|0.1859|0.1943||||||||||||0.1943|SAS||||||||||||||||,T|downstream_gene_variant|MODIFIER|DDX11L1|ENSG00000223972|Transcript|ENST00000456328|processed_transcript||||||||||rs546169444|1|55|1||SNV|HGNC|37102|YES|||||||||Ensembl|A|A|||||||0.0958|0.0144|0.1138|0.005|0.1859|0.1943||||||||||||0.1943|SAS||||||||||||||||,T|non_coding_transcript_exon_variant|MODIFIER|WASH7P|ENSG00000227232|Transcript|ENST00000488147|unprocessed_pseudogene|11/11||ENST00000488147.1:n.1291T>A||1291/1351|||||rs546169444|1||-1||SNV|HGNC|38034||||||||||Ensembl|A|A|||||||0.0958|0.0144|0.1138|0.005|0.1859|0.1943||||||||||||0.1943|SAS||||||||||||||||,T|downstream_gene_variant|MODIFIER|DDX11L1|ENSG00000223972|Transcript|ENST00000515242|transcribed_unprocessed_pseudogene||||||||||rs546169444|1|52|1||SNV|HGNC|37102||||||||||Ensembl|A|A|||||||0.0958|0.0144|0.1138|0.005|0.1859|0.1943||||||||||||0.1943|SAS||||||||||||||||,T|downstream_gene_variant|MODIFIER|DDX11L1|ENSG00000223972|Transcript|ENST00000518655|transcribed_unprocessed_pseudogene||||||||||rs546169444|1|55|1||SNV|HGNC|37102||||||||||Ensembl|A|A|||||||0.0958|0.0144|0.1138|0.005|0.1859|0.1943||||||||||||0.1943|SAS||||||||||||||||,T|non_coding_transcript_exon_variant|MODIFIER|WASH7P|ENSG00000227232|Transcript|ENST00000538476|unprocessed_pseudogene|13/13||ENST00000538476.1:n.1530T>A||1530/1583|||||rs546169444|1||-1||SNV|HGNC|38034||||||||||Ensembl|A|A|||||||0.0958|0.0144|0.1138|0.005|0.1859|0.1943||||||||||||0.1943|SAS||||||||||||||||,T|non_coding_transcript_exon_variant|MODIFIER|WASH7P|ENSG00000227232|Transcript|ENST00000541675|unprocessed_pseudogene|9/9||ENST00000541675.1:n.1315T>A||1315/1416|||||rs546169444|1||-1||SNV|HGNC|38034||||||||||Ensembl|A|A|||||||0.0958|0.0144|0.1138|0.005|0.1859|0.1943||||||||||||0.1943|SAS||||||||||||||||,T|non_coding_transcript_exon_variant|MODIFIER|WASH7P|653635|Transcript|NR_024540.1|pseudogene|11/11||NR_024540.1:n.1667T>A||1667/1769|||||rs546169444|1||-1||SNV|EntrezGene|38034|YES|||||||||RefSeq|A|A|OK||||||0.0958|0.0144|0.1138|0.005|0.1859|0.1943||||||||||||0.1943|SAS||||||||||||||||,T|downstream_gene_variant|MODIFIER|DDX11L1|100287102|Transcript|NR_046018.2|pseudogene||||||||||rs546169444|1|55|1||SNV|EntrezGene|37102|YES|||||||||RefSeq|A|A|||||||0.0958|0.0144|0.1138|0.005|0.1859|0.1943||||||||||||0.1943|SAS||||||||||||||||,T|downstream_gene_variant|MODIFIER|MIR6859-1|102466751|Transcript|NR_106918.1|miRNA||||||||||rs546169444|1|2905|-1||SNV|EntrezGene||YES|||||||||RefSeq|A|A|||||||0.0958|0.0144|0.1138|0.005|0.1859|0.1943||||||||||||0.1943|SAS||||||||||||||||,T|regulatory_region_variant|MODIFIER|||RegulatoryFeature|ENSR00000000002|open_chromatin_region||||||||||rs546169444|1||||SNV|||||||||||||||||||||0.0958|0.0144|0.1138|0.005|0.1859|0.1943||||||||||||0.1943|SAS||||||||||||||||;gnomad_ac_gs=5015;gnomad_af_gs=0.3152;af_1kg_amr=0.1138;af_1kg_eas=0.005;af_1kg_sas=0.1943;af_1kg_afr=0.0144;af_1kg_eur=0.1859;af_1kg_all=0.0958;rs_ids=rs546169444;af_1kg=0.1943;max_af=0.3152 GT:AD:DP:GQ:PL 0/1:1,3:4:22:101,0,22