From ea07f11a9e27d234a3832f133e7fcb4a57d90083 Mon Sep 17 00:00:00 2001 From: Daniel Danis Date: Mon, 1 Aug 2022 14:21:46 -0400 Subject: [PATCH 1/8] Next development iteration `1.0.3-SNAPSHOT`. Signed-off-by: Daniel Danis --- pom.xml | 2 +- svanna-benchmark/pom.xml | 2 +- svanna-cli/pom.xml | 2 +- svanna-configuration/pom.xml | 2 +- svanna-core/pom.xml | 2 +- svanna-db/pom.xml | 2 +- svanna-ingest/pom.xml | 2 +- svanna-io/pom.xml | 2 +- svanna-model/pom.xml | 2 +- svanna-test/pom.xml | 2 +- 10 files changed, 10 insertions(+), 10 deletions(-) diff --git a/pom.xml b/pom.xml index 2fe0b855..43811b71 100644 --- a/pom.xml +++ b/pom.xml @@ -16,7 +16,7 @@ svanna-benchmark org.monarchinitiative.svanna - 1.0.2 + 1.0.3-SNAPSHOT SvAnna diff --git a/svanna-benchmark/pom.xml b/svanna-benchmark/pom.xml index 837e72e9..a3a082d9 100644 --- a/svanna-benchmark/pom.xml +++ b/svanna-benchmark/pom.xml @@ -3,7 +3,7 @@ SvAnna org.monarchinitiative.svanna - 1.0.2 + 1.0.3-SNAPSHOT 4.0.0 diff --git a/svanna-cli/pom.xml b/svanna-cli/pom.xml index 565c07bd..a6bd16ea 100644 --- a/svanna-cli/pom.xml +++ b/svanna-cli/pom.xml @@ -3,7 +3,7 @@ SvAnna org.monarchinitiative.svanna - 1.0.2 + 1.0.3-SNAPSHOT 4.0.0 svanna-cli diff --git a/svanna-configuration/pom.xml b/svanna-configuration/pom.xml index b7336222..7a499b1b 100644 --- a/svanna-configuration/pom.xml +++ b/svanna-configuration/pom.xml @@ -3,7 +3,7 @@ SvAnna org.monarchinitiative.svanna - 1.0.2 + 1.0.3-SNAPSHOT 4.0.0 diff --git a/svanna-core/pom.xml b/svanna-core/pom.xml index 4cc93f89..cf841316 100644 --- a/svanna-core/pom.xml +++ b/svanna-core/pom.xml @@ -3,7 +3,7 @@ SvAnna org.monarchinitiative.svanna - 1.0.2 + 1.0.3-SNAPSHOT 4.0.0 diff --git a/svanna-db/pom.xml b/svanna-db/pom.xml index ec116e2f..73a72f52 100644 --- a/svanna-db/pom.xml +++ b/svanna-db/pom.xml @@ -3,7 +3,7 @@ SvAnna org.monarchinitiative.svanna - 1.0.2 + 1.0.3-SNAPSHOT 4.0.0 diff --git a/svanna-ingest/pom.xml b/svanna-ingest/pom.xml index 0549f147..a6194641 100644 --- a/svanna-ingest/pom.xml +++ b/svanna-ingest/pom.xml @@ -3,7 +3,7 @@ SvAnna org.monarchinitiative.svanna - 1.0.2 + 1.0.3-SNAPSHOT 4.0.0 svanna-ingest diff --git a/svanna-io/pom.xml b/svanna-io/pom.xml index 98268a6e..ff14b7bc 100644 --- a/svanna-io/pom.xml +++ b/svanna-io/pom.xml @@ -3,7 +3,7 @@ SvAnna org.monarchinitiative.svanna - 1.0.2 + 1.0.3-SNAPSHOT 4.0.0 diff --git a/svanna-model/pom.xml b/svanna-model/pom.xml index 8f563ee6..98cfa424 100644 --- a/svanna-model/pom.xml +++ b/svanna-model/pom.xml @@ -3,7 +3,7 @@ SvAnna org.monarchinitiative.svanna - 1.0.2 + 1.0.3-SNAPSHOT 4.0.0 diff --git a/svanna-test/pom.xml b/svanna-test/pom.xml index a36c0067..0145d070 100644 --- a/svanna-test/pom.xml +++ b/svanna-test/pom.xml @@ -3,7 +3,7 @@ SvAnna org.monarchinitiative.svanna - 1.0.2 + 1.0.3-SNAPSHOT 4.0.0 From 5f560c477fa09a5951b9b741f7adf3ea305fa056 Mon Sep 17 00:00:00 2001 From: Daniel Danis Date: Wed, 12 Apr 2023 09:37:28 -0400 Subject: [PATCH 2/8] Update dependencies. Signed-off-by: Daniel Danis --- pom.xml | 8 +++--- ...ncode.10genes.v38.basic.annotation.json.gz | Bin 3795 -> 3727 bytes ...ermSimilarityGeneWeightCalculatorTest.java | 8 +++--- .../getad/RouteDataEvaluatorGETadTest.java | 8 +++--- .../GeneSequenceImpactCalculatorTest.java | 8 +++--- ...ncode.10genes.v38.basic.annotation.json.gz | Bin 3654 -> 3727 bytes .../dispatch/TadAwareDispatcherTest.java | 8 +++--- .../svanna/ingest/cmd/BuildDb.java | 23 +++++++++--------- .../ingest/similarity/IcMicaCalculator.java | 5 +--- .../ingest/MakeSmallGencodeFileTest.java | 12 ++++++--- svanna-io/pom.xml | 4 +++ 11 files changed, 40 insertions(+), 44 deletions(-) diff --git a/pom.xml b/pom.xml index 43811b71..ca483278 100644 --- a/pom.xml +++ b/pom.xml @@ -22,7 +22,7 @@ org.springframework.boot spring-boot-starter-parent - 2.6.10 + 3.0.5 @@ -32,8 +32,8 @@ ${java.version} ${java.version} - 2.0.0-RC2 - 0.2.0 + 2.0.0 + 0.2.5 1.4.200 @@ -125,7 +125,7 @@ com.github.samtools htsjdk - 2.23.0 + 3.0.5 org.tukaani diff --git a/svanna-cli/src/test/resources/gencode.10genes.v38.basic.annotation.json.gz b/svanna-cli/src/test/resources/gencode.10genes.v38.basic.annotation.json.gz index 0b51523dcd221d9abb57b1c5ce8d2be984dbd2b5..7d50da6d42737fc5b647694c3a1e882e8cb4252b 100644 GIT binary patch literal 3727 zcmV;A4sh`wiwFP!00000|Lt90Z)G(Of1giL^_Ar+|Li!tbqWJ$p(DZYz#_CNor)a^ zW>(YDYF8`1``S78_TJn0IQQI^)3TfaA;z(L630K=Nu2bDlm9+DIXQjv`t;xM}#u&u_23tUv4Yi_b64 zKM9U}I_0+CgyHm;cUL>#`T3`_tE=mqn>X*?R*Cbk`CEu_6>h)t;r->?;m3WvrCq-N zAfylg2>IW2!}Y)^g?5*FE93oaQ*LhZ};?gaAznx$m^dqCQA{rDoY7m zN45k=s%(kmZP^leB0CB^`EjiQp5^m*@7}+D^Y-$?_066l?+hk+buc9*NW%|(w$rJ; zLFR;xR?#-d@DvP^TpTgTlxS{)9P7;E8P%;4emA2!`SIDhV;$!nTp}ndA2sLx{MGYB zW6nL5=pMwkf7JMPgj`CA`<@+THpt!Iujh<8jrsLDWD`b|9vDR*DJa_0fTn}F5H#Kq z=Od0IKo}r7Mb3v(Ou69NARy$P928HA9J*+rTo|x47#DITBU~{zmB>)UtprUD+i{s-h)q>!_>aRV>0`pjQ;*EFCwNe;_`URHIF^UrIiVZfsC>g z3e8m3aeBN(_Z)7`>I^p&XSj^oUl&s26o&>zcTW~uhk8c|lOVDlOlnX*e2pS)VRCr3 znIL9bWy5C1VNwXfyq{ceK4aFHzq0V3c-^9zI0dwrgM%K708?)=l zCMvotI8Pl}D6MO*4d@(nWYPO3ehv-68xf|mkZ*H~p&^EzEGw&tPeTKEdybwS>WD6X(aJtjK_UGGL#}Ot1%ps9DdWz<#VV9Mfgu%8NhwyU5i;&|Az!PJHA zIP762T$gtHa6Piy^~!v>);sgGan1iMxPC-jZw>YFagEtiFmdhOqsQws=Z4l2ub&34 z`?7!|vKV{1dU8HtYvfPCC&XaKdLJIyC#-k97N79v@(F*$SnrHOi2?dK>Kk)gBY!}j zz+Wp{pK$o5c%o4Jij#qMQ~cZSzIhSGw#C^H^2B}d><>H!^f5+;Iv(x%?b}Jn2+1;*inJ51VatFB4%@yR%ZY)TN~$HosNET(BSQNaRf3 zfNfBrZio!UlF~65wZjZ@@T+o)|OsYBZkqL5EHX{ta z&A)^kvT-ag43Ha#B4o6x>x3akRqoc1Cb^+%5Up2~A4r?@@!6zRi3f17N+uv-6p)!c zBF%=##`#u7`ndP?J|fS)d-d|`vsa&edLK|Ked&X80oP36+StTIaIwNj9rzx&4v+7V zmuw7=ti<={-<+Qr>HSa{G9kRff!Tcw(Ssw{kMFz&-=^U(9i9w62LXs*ipk=A%46oOy|UY-2R0F8FHE=Lg+{q*SeHSkHOf6V2;3@Z9rG) zo(%6G9GsOm?r)%56~UDwByF5bC&RN;j@0heR62TNn@2cGN{y2r=*9kKbV^C2(7jm` zDIDx6wQ=$vt141BP)Q4zp1fJ7C>#me!p}-aur-vBa}iAvsq;-p)CNMKLm(r93I|)f z)?0>(wIw7RIjc4VxmYVv=D+|!vjkRou{DIzXU!5Yg0-2>9GPR0*4xbMTH6w`EA7%( z=p0mR>&uAPgzrkvR&A|t6m~h9BkFo+CtbapFEhKJ*wN#soZD zcg_(aP@sD7d*5r~h#_u|PTDb>`wZ-@V>mKf$y4?%s}`u;^xNuTwzGuMt}q=ftP2M# zyWQCnI#`(lb7XSLL4?pf89t3UBav@60ENy;mnE5` zV~QlxIZD}*Sg766XS%q3Q<4*;ws=P9;Lw)LF6MRzTj&r?4W*o@jp9NF*U?bsK(Bri z$f~#zjtp(CT6EQ&Z@Vn91Js6 zlCo*>!wHrq$x9g1`oMuw;#KLKblH+QBvnH>nukryQIc{>C3OCeDarWNq9D`Jj!4Ww zUem1?tF$Ev+RB|u$BqpJm!c-g9sz9KhJaf9WP0vcLWqu(=m4_EU~4FF0M)P(AX#gl zfkVYg0PJ9eSL~=^CD@M%R1T%CN$4DKYz>KmG)stVmZraDkE5)gD;=!WzHkr&N|PiE zc5hP%s%}$A9o?oBQR{6=$<7#o6Cy40GTkgU6AlC|Ixrp0su$+qL`uU72RZYoJhJM8 zI-fi(9SyPioi0gMNuF@9UsWY#x@<{IN+p5N!P@r`4pQFLsdTe`m^n}ah$4xcjN83M z2B?Nlw%QI>Fs61_qtbaHV@vXcYSxErf3{CRN#s>Yva=c~&eZZ-riW--0&=pY*)3-e z*0UzED3Nj%MUqzlcCHCo%uJIM9odpRZQ874_CvOYI7e-3Q1=(n(@H&L>6)~q0WU^^ zJL>p^?Hpj_e#d%4;;74=a?&Zj~?Q2Ms%f&?Yh-Lo7-+uEPdn) zB|9q6m9Vdd3+5>z1@YV6S&>oo(pb%tTjdDl1^m<3|VseLD=Hjw!o7qdWdZGGy{;4Kd)EEuV+79AQ&==2$-YD1MJ z``NR~l3_&FJfs$u>{DBHux`LkpR-raZ`TdcdIn5bBLAz(M)SbAvZ&9k4GmVASY^q| zD+ucbYeiUD$h9vo3=Q5YdKg$j(RD?UI`hj$+rCV(Zm`N62Nu+Ak|eD5WDDuv_0GD^ zR#^hI3-6(!^lXDcJ|dgIx(ruY-1-H7-a)C#E3Evqq{cf$MxXyU^CGt{bf1V^&#G zPU`&1Vy@HYoKt%sJ2cqO7&8mU!S?3o(2%VXz=37N=Wu|uimoe*&+Ts-8yd*ET3cDf zF{!d>zq2STIjM1<8?a|rC1mCIAJvA^!+uGmV?l0dcl2B!pe_r^DvPfysMIvyS-ebp z8?vr7d|Ng;d@bfiwFP!00000|Lq-Jj~ut{-M`{+uW1(gASo$t_O(umwk{B-4{eZ##_b z*YdZ^KfHSVsgplGU;*5Dy!_pS@$NtW`sVGA73m+ZUP7e(z(2o#bN$CU*X3uQzPkD( zxa{GQ$9@rn%OBrfA3f*$Z=PRYfA{X)4{zU8imSiyvxwp@+&t&~&tJcpru+B+`}*hi z0tx{!6aPC8d^ceVxKK%W^HiDS-Sy8u{Pg~vAbh!h{_&zum;FUucJ$!9-gU*P<8+`Zgw{K3^uz0SV9Hx1$ z9&L`8RN4&a2DBw0sI(FbhBeadjp>hzJK zPA44Dc(72F`tvv+sjmuxfZ!B5A4)Olf@|G^fJby#d{*elMMK>Mfl8z70#4a*V2qGh z^cA`kieVxI0g|@9L@}6j%+R9HWnb)l$U%Kx1{gdqA!>9?Xwwz1(H-P&vH24rBBw3d zk>9`kBGXr&ug+A$ex_As&x<~?U(AZx`}FEhU7P0%uC#e&VT9{FdPwZ`POTn;*%&4A zcei_y2tvp1@tSLRdyZWzE5Za>Whpr{TUpoX>BdGZux-{{JW!lrH|pbDfW|UT1FYgB zT57H8tu-vxlX)S~sQEmAtHOlo^*A`!mzHBb=*#jT6fmZ|ONQ|DJtjhSXZp;wvL)W2 ziI2{dt=?QbP@Lf->aGR!jrW`eD8&J7WQM7fF!Vo_ z2hx8ejG7#tqyN3RJSImYec1GW)BiR4&oUKMCdj~k(;rd)S@de++t+`5AO$aN#d2O5 zoaPh@6kMO{X)8GYvnlu^D)>RpJ|qRFAaH25esxC7;MlPvD9W-b0JZ(%Iy`U_*SQ6fbFeiyDCaj>}?&$ru`9= zqGe37e&>Uw%M~S1Yxn8^#KCjPJH>XL@_o4-n3!LJ&U`GVjXxthFug_FV6h)K+YY$C z9q^VNSlKt$u>*y;M<5yz#$o*_5}e`V_5`DR?nUwq&6=&k%Ptds0u@C8Ygf%5vqYLlILgF*tH3a z2RpPpS@DPtcVKEIcC|9Gk6MPp(xb2j`73#&z%G#nC3@5^I zPwKVY%|}iKSwWKG=3jh%_1p;er~Z-^ULF@l`!Ph1F5{`2=j@VmK)Q!mKReEkI)1*c z+4S=z!kI0DM+_Lp`jrXj8Vx&Q!$OnNpNZi_h7t${vEFHvgkeBgTx(Y}?ZJqqbYOoXxQX&L~gEw0q1p~0{A+2yin?pbxe`u7H2hT*vtv3`nhHSV| z!C;Bk-j-Fxsuc-FN~#QDDpqF77!)vQlE5r4Hiszsq)7rrw94&_k=XlaZ<{#JwIw0B z(x$=!XP;n8Um~K$hgW#A+H?h@u*uOVIhe4mN#dkVeP$^G{mf#t6-xFWU_Z04fNHzK ziapqs!ji2?U*YYtZC}Zrl>I#R7K;Ox$?&TRB=A*L%KB=&!P=s7+LxK0t;q=x#H8%V z^x*U)Jorr*b7v2(&J?B1*p|i3gKr*u^Wd8Y-#qx{!GCQJzP#+frWKNYEdAw+SKW^b zkz8C^bx2FUEyM`OyM9`Az=%2I`hoebC(?;RJbX`S*KAfD*y_h{X3&z>)G;d;a$86| z@L_2svpW;R$x?a2U|zMwK7peZKrlwiPWd_^aGwp+#u#!$x%r}5;ruoxWzo#3z`aAm zC<+E--_`{M&asz83G!kJCBxZS*_2drTk2=HSbvj}^&%|ss<_Jc@He{}F0{d;VSDHI2+=CJB2u6T5z^`y}R^2~JnA8TL z74EIZC&5S@sB4r71{^BED@A;P2dnu=FxYK3jVmP{DOJ6~11EmU2 z!QRh;r9F@+aB^1Rv|<$H^Ff7YTM~*KlGZ5YFtbI8&U?+8Fg(TqC5Nf)M<9XcVA?*$ zC~g0nz@xL;vML6vnM#?HDe==sEsc^h8B_Yuq0$Lf;T(I}lqrD9p%l&UPmEDO`EVt0 z{*Ot?`m1?ChLi1)7$bX)w_c>urp(y(^;9?wWGFa@8YNoY5tSpX&?z9x~zBy zlv0Dup}-DQ)0Kc=r9cygsw)9$pevlXlcp=dzNMgIC|yqiXNO~RNEoC^LS(Tt?JZjz zC5^6dw32?o$gEHrWu{=iErg)*76J@-3lQ53rNY6E7?A@a&GRzcY(^6dLd`ocoXl1j z#^69o(-n4d=GXJctPkpb^0o{(#OznT99Ov2uqPO7S5->cE}N1-X)X{rTIChc}GX{wRQ7DOnar4QLdh9`9cy9|G1Dl8M>ZvIo3<;7?U2nOj#1kV z)zch!dH08;t)Qg^6~3wzBd_4Rmf$ixxU)W{u)ZL6PJ_&KiL(xao>6zuyrYDeZpC2` zW2W=#4oC0sNc&z(A6k(?;76x`R@|B6T_XL?oX=jpgy+2)9P?sje{T*8JAc@Tvuh}~ z+Wgst^VU^=apxMGE2VR1PF=HEgOhgZGjBdQ;(*SBQ!aq>Y>97g_7NWz>Vh$oGjgm8=0cA<8 zdRKT(Ms&j%f%&iTmcv}jrwK-mZ4Iu%NBbFy@`>tHrWV__DnLp}Z-%crzEEFHDb>yJ zbDD;w*q&mi#c!|Z{Tq&$nd!`4{*{V;Z0>HaJgsng{h1_Uw$imt(f@B0{Ya)(D7pwx zd$r;WUv)O%S5S12TR%?w^jk&mUuDE1ZK0zI!l8h^RMGXBHbsA$ihi@dkEV1G+iL3! zKRN@vTK3nmwXExfd52Lf{6VC~w)tR<1`iUyt!+X~cFpMTwQo(_u z=dPkt_7CJ;`-*-rahsxlkcxh@zmKML%e?FI0({OM@N##d&k8*V0A}MD5T3|7VkQuEm?oAch+IJiV||0 zjZXt*Bpa;c9a=!^C|yO7qY^4DanxYbN&~vOzr_cu?|MSfZt|=HWOc(-6hx=$Z|%e# z&{App8|uK|VzG*%xaesBhqO7h4M)>JHZMbCcOchjXmUkTs+m**d%^C?1MxC*_c)c?l$=1Q%4>&I2&;DHOcY!Zr_#-Z=zn z&!kQRd7y*DX{K@oYaNK@#c_gysSQEr0qbU-ibBfIBuxX+>M#@(#MoZ%H4T`Vub?na zn={M<1D`_D`>67YiAU?@#9H+WGBEHdL~=orN}JIQo|{}~TV3Y?>vz>E3M5eXS8}?+ zJEsKgH`{5zzAc$i*blbnMyG*fbq-D_9h<`r((2W&C_c5nd^Qba>tJq05yPa?l6`rj zpd?WJJ`d1Hu1ZMC&oR}3GSdA5S=R+Q<2~SW!9Y4Kg4Io5QAlZpzLk3{zi_hV8NTf_ z-4C4JMliZ?vc6u_&8ZBW^_x`WSvoquNp+JK_M0|5Fi#&WFDyazyJzKv`dnLHsL^hD z;g%O}dEu59Zh4_eyXA#jUby9jch3vA{A|n5w)|{r2Fh~7Sm&JBO@4OPNwKy07l&4$ z{FblSzZ7Ui8-O*}7QZR9Tl}`gZ(IEKE5>gJ`?bb*h(Tyiyv_r+8Xy<=4vr#~-^rN= z(!qDEXmx>HaHkzLef|00`&Z0Izw(axOiOQj19tFfe_zRdjdELr!>VC-y<9h-MCitqh==usuAKMJSCCwLK{ST{J Jf}U27000Jugrfie diff --git a/svanna-core/src/test/java/org/monarchinitiative/svanna/core/priority/additive/TermSimilarityGeneWeightCalculatorTest.java b/svanna-core/src/test/java/org/monarchinitiative/svanna/core/priority/additive/TermSimilarityGeneWeightCalculatorTest.java index bdb4c868..25fc510a 100644 --- a/svanna-core/src/test/java/org/monarchinitiative/svanna/core/priority/additive/TermSimilarityGeneWeightCalculatorTest.java +++ b/svanna-core/src/test/java/org/monarchinitiative/svanna/core/priority/additive/TermSimilarityGeneWeightCalculatorTest.java @@ -1,5 +1,6 @@ package org.monarchinitiative.svanna.core.priority.additive; +import org.monarchinitiative.sgenes.model.*; import org.monarchinitiative.svanna.core.TestDataConfig; import org.monarchinitiative.svanna.core.hpo.SimilarityScoreCalculator; import org.monarchinitiative.svanna.core.service.PhenotypeDataService; @@ -8,10 +9,6 @@ import org.junit.jupiter.api.Test; import org.mockito.Mock; import org.monarchinitiative.phenol.ontology.data.TermId; -import org.monarchinitiative.sgenes.model.Gene; -import org.monarchinitiative.sgenes.model.GeneIdentifier; -import org.monarchinitiative.sgenes.model.Transcript; -import org.monarchinitiative.sgenes.model.TranscriptIdentifier; import org.monarchinitiative.svart.*; import org.monarchinitiative.svart.assembly.GenomicAssemblies; import org.monarchinitiative.svart.assembly.GenomicAssembly; @@ -73,7 +70,8 @@ public void calculateRelevance() { TranscriptIdentifier txId = TranscriptIdentifier.of("TX_ACCESSION", "FBN1", null); List exons = List.of(Coordinates.of(CoordinateSystem.oneBased(), 48_408_313, 48_645_721)); Coordinates cdsCoordinates = Coordinates.of(CoordinateSystem.oneBased(), 48_408_313, 48_645_721); - List transcripts = List.of(Transcript.of(txId, location, exons, cdsCoordinates)); + TranscriptMetadata metadata = TranscriptMetadata.of(TranscriptEvidence.CANONICAL); + List transcripts = List.of(Transcript.of(txId, location, exons, cdsCoordinates, metadata)); Gene gene = Gene.of(id, location, transcripts); diff --git a/svanna-core/src/test/java/org/monarchinitiative/svanna/core/priority/additive/evaluator/getad/RouteDataEvaluatorGETadTest.java b/svanna-core/src/test/java/org/monarchinitiative/svanna/core/priority/additive/evaluator/getad/RouteDataEvaluatorGETadTest.java index 5a87ef68..e6988bde 100644 --- a/svanna-core/src/test/java/org/monarchinitiative/svanna/core/priority/additive/evaluator/getad/RouteDataEvaluatorGETadTest.java +++ b/svanna-core/src/test/java/org/monarchinitiative/svanna/core/priority/additive/evaluator/getad/RouteDataEvaluatorGETadTest.java @@ -1,5 +1,6 @@ package org.monarchinitiative.svanna.core.priority.additive.evaluator.getad; +import org.monarchinitiative.sgenes.model.*; import org.monarchinitiative.svanna.core.TestContig; import org.monarchinitiative.svanna.core.TestEnhancer; import org.monarchinitiative.svanna.core.TestTad; @@ -12,10 +13,6 @@ import org.junit.jupiter.params.provider.CsvSource; import org.monarchinitiative.svanna.core.priority.additive.*; import org.monarchinitiative.svart.*; -import org.monarchinitiative.sgenes.model.Gene; -import org.monarchinitiative.sgenes.model.GeneIdentifier; -import org.monarchinitiative.sgenes.model.Transcript; -import org.monarchinitiative.sgenes.model.TranscriptIdentifier; import java.util.List; @@ -40,7 +37,8 @@ private static Gene makeGene(String id, String symbol, Contig contig, int start, TranscriptIdentifier txId = TranscriptIdentifier.of(id + "_tx", symbol + "_tx", null); List exons = List.of(Coordinates.of(CoordinateSystem.zeroBased(), start, end)); Coordinates cdsCoordinates = Coordinates.of(CoordinateSystem.zeroBased(), start, end); - Transcript tx = Transcript.of(txId, location, exons, cdsCoordinates); + TranscriptMetadata metadata = TranscriptMetadata.of(TranscriptEvidence.CANONICAL); + Transcript tx = Transcript.of(txId, location, exons, cdsCoordinates, metadata); GeneIdentifier geneId = GeneIdentifier.of(id, symbol, null, null); return Gene.of(geneId, location, List.of(tx)); diff --git a/svanna-core/src/test/java/org/monarchinitiative/svanna/core/priority/additive/impact/GeneSequenceImpactCalculatorTest.java b/svanna-core/src/test/java/org/monarchinitiative/svanna/core/priority/additive/impact/GeneSequenceImpactCalculatorTest.java index 2b9e10f2..8db06c98 100644 --- a/svanna-core/src/test/java/org/monarchinitiative/svanna/core/priority/additive/impact/GeneSequenceImpactCalculatorTest.java +++ b/svanna-core/src/test/java/org/monarchinitiative/svanna/core/priority/additive/impact/GeneSequenceImpactCalculatorTest.java @@ -1,5 +1,6 @@ package org.monarchinitiative.svanna.core.priority.additive.impact; +import org.monarchinitiative.sgenes.model.*; import org.monarchinitiative.svanna.core.TestContig; import org.junit.jupiter.api.BeforeEach; import org.junit.jupiter.params.ParameterizedTest; @@ -9,10 +10,6 @@ import org.monarchinitiative.svart.Coordinates; import org.monarchinitiative.svart.GenomicRegion; import org.monarchinitiative.svart.Strand; -import org.monarchinitiative.sgenes.model.Gene; -import org.monarchinitiative.sgenes.model.GeneIdentifier; -import org.monarchinitiative.sgenes.model.Transcript; -import org.monarchinitiative.sgenes.model.TranscriptIdentifier; import java.util.List; @@ -34,7 +31,8 @@ private static Gene makeGene(TestContig contig, int start, int end, TranscriptIdentifier txId = TranscriptIdentifier.of("TX1", "TX1_SYMBOL", null); List exons = makeExons(oneStart, oneEnd, twoStart, twoEnd, threeStart, threeEnd); Coordinates cdsCoordinates = Coordinates.of(CoordinateSystem.zeroBased(), start + 10, end - 10); - Transcript tx = Transcript.of(txId, location, exons, cdsCoordinates); + TranscriptMetadata metadata = TranscriptMetadata.of(TranscriptEvidence.CANONICAL); + Transcript tx = Transcript.of(txId, location, exons, cdsCoordinates, metadata); // make gene GeneIdentifier gId = GeneIdentifier.of("NCBIGene:123", "A", null, null); diff --git a/svanna-core/src/test/resources/gencode.10genes.v38.basic.annotation.json.gz b/svanna-core/src/test/resources/gencode.10genes.v38.basic.annotation.json.gz index 82558f0506a0e4b971f6e775cdd7cc24a501ecd9..7d50da6d42737fc5b647694c3a1e882e8cb4252b 100644 GIT binary patch literal 3727 zcmV;A4sh`wiwFP!00000|Lt90Z)G(Of1giL^_Ar+|Li!tbqWJ$p(DZYz#_CNor)a^ zW>(YDYF8`1``S78_TJn0IQQI^)3TfaA;z(L630K=Nu2bDlm9+DIXQjv`t;xM}#u&u_23tUv4Yi_b64 zKM9U}I_0+CgyHm;cUL>#`T3`_tE=mqn>X*?R*Cbk`CEu_6>h)t;r->?;m3WvrCq-N zAfylg2>IW2!}Y)^g?5*FE93oaQ*LhZ};?gaAznx$m^dqCQA{rDoY7m zN45k=s%(kmZP^leB0CB^`EjiQp5^m*@7}+D^Y-$?_066l?+hk+buc9*NW%|(w$rJ; zLFR;xR?#-d@DvP^TpTgTlxS{)9P7;E8P%;4emA2!`SIDhV;$!nTp}ndA2sLx{MGYB zW6nL5=pMwkf7JMPgj`CA`<@+THpt!Iujh<8jrsLDWD`b|9vDR*DJa_0fTn}F5H#Kq z=Od0IKo}r7Mb3v(Ou69NARy$P928HA9J*+rTo|x47#DITBU~{zmB>)UtprUD+i{s-h)q>!_>aRV>0`pjQ;*EFCwNe;_`URHIF^UrIiVZfsC>g z3e8m3aeBN(_Z)7`>I^p&XSj^oUl&s26o&>zcTW~uhk8c|lOVDlOlnX*e2pS)VRCr3 znIL9bWy5C1VNwXfyq{ceK4aFHzq0V3c-^9zI0dwrgM%K708?)=l zCMvotI8Pl}D6MO*4d@(nWYPO3ehv-68xf|mkZ*H~p&^EzEGw&tPeTKEdybwS>WD6X(aJtjK_UGGL#}Ot1%ps9DdWz<#VV9Mfgu%8NhwyU5i;&|Az!PJHA zIP762T$gtHa6Piy^~!v>);sgGan1iMxPC-jZw>YFagEtiFmdhOqsQws=Z4l2ub&34 z`?7!|vKV{1dU8HtYvfPCC&XaKdLJIyC#-k97N79v@(F*$SnrHOi2?dK>Kk)gBY!}j zz+Wp{pK$o5c%o4Jij#qMQ~cZSzIhSGw#C^H^2B}d><>H!^f5+;Iv(x%?b}Jn2+1;*inJ51VatFB4%@yR%ZY)TN~$HosNET(BSQNaRf3 zfNfBrZio!UlF~65wZjZ@@T+o)|OsYBZkqL5EHX{ta z&A)^kvT-ag43Ha#B4o6x>x3akRqoc1Cb^+%5Up2~A4r?@@!6zRi3f17N+uv-6p)!c zBF%=##`#u7`ndP?J|fS)d-d|`vsa&edLK|Ked&X80oP36+StTIaIwNj9rzx&4v+7V zmuw7=ti<={-<+Qr>HSa{G9kRff!Tcw(Ssw{kMFz&-=^U(9i9w62LXs*ipk=A%46oOy|UY-2R0F8FHE=Lg+{q*SeHSkHOf6V2;3@Z9rG) zo(%6G9GsOm?r)%56~UDwByF5bC&RN;j@0heR62TNn@2cGN{y2r=*9kKbV^C2(7jm` zDIDx6wQ=$vt141BP)Q4zp1fJ7C>#me!p}-aur-vBa}iAvsq;-p)CNMKLm(r93I|)f z)?0>(wIw7RIjc4VxmYVv=D+|!vjkRou{DIzXU!5Yg0-2>9GPR0*4xbMTH6w`EA7%( z=p0mR>&uAPgzrkvR&A|t6m~h9BkFo+CtbapFEhKJ*wN#soZD zcg_(aP@sD7d*5r~h#_u|PTDb>`wZ-@V>mKf$y4?%s}`u;^xNuTwzGuMt}q=ftP2M# zyWQCnI#`(lb7XSLL4?pf89t3UBav@60ENy;mnE5` zV~QlxIZD}*Sg766XS%q3Q<4*;ws=P9;Lw)LF6MRzTj&r?4W*o@jp9NF*U?bsK(Bri z$f~#zjtp(CT6EQ&Z@Vn91Js6 zlCo*>!wHrq$x9g1`oMuw;#KLKblH+QBvnH>nukryQIc{>C3OCeDarWNq9D`Jj!4Ww zUem1?tF$Ev+RB|u$BqpJm!c-g9sz9KhJaf9WP0vcLWqu(=m4_EU~4FF0M)P(AX#gl zfkVYg0PJ9eSL~=^CD@M%R1T%CN$4DKYz>KmG)stVmZraDkE5)gD;=!WzHkr&N|PiE zc5hP%s%}$A9o?oBQR{6=$<7#o6Cy40GTkgU6AlC|Ixrp0su$+qL`uU72RZYoJhJM8 zI-fi(9SyPioi0gMNuF@9UsWY#x@<{IN+p5N!P@r`4pQFLsdTe`m^n}ah$4xcjN83M z2B?Nlw%QI>Fs61_qtbaHV@vXcYSxErf3{CRN#s>Yva=c~&eZZ-riW--0&=pY*)3-e z*0UzED3Nj%MUqzlcCHCo%uJIM9odpRZQ874_CvOYI7e-3Q1=(n(@H&L>6)~q0WU^^ zJL>p^?Hpj_e#d%4;;74=a?&Zj~?Q2Ms%f&?Yh-Lo7-+uEPdn) zB|9q6m9Vdd3+5>z1@YV6S&>oo(pb%tTjdDl1^m<3|VseLD=Hjw!o7qdWdZGGy{;4Kd)EEuV+79AQ&==2$-YD1MJ z``NR~l3_&FJfs$u>{DBHux`LkpR-raZ`TdcdIn5bBLAz(M)SbAvZ&9k4GmVASY^q| zD+ucbYeiUD$h9vo3=Q5YdKg$j(RD?UI`hj$+rCV(Zm`N62Nu+Ak|eD5WDDuv_0GD^ zR#^hI3-6(!^lXDcJ|dgIx(ruY-1-H7-a)C#E3Evqq{cf$MxXyU^CGt{bf1V^&#G zPU`&1Vy@HYoKt%sJ2cqO7&8mU!S?3o(2%VXz=37N=Wu|uimoe*&+Ts-8yd*ET3cDf zF{!d>zq2STIjM1<8?a|rC1mCIAJvA^!+uGmV?l0dcl2B!pe_r^DvPfysMIvyS-ebp z8?vr7d|Ng;d@bf^6K}?aF5yc-#X(;Wt7yo&3adG+Phs%pE>i_az zUR__y-!6am;^mi4{`imzq@7QfznM60{{4^d-~CjX{`TTIL_9S7{l~Z0zps5=e)Z*x zt1p5h50~8bvoKu#^zQoPIX`~?^!obs&CQ#4Z>z-BpZHmbaTji%^Wpugw^MhYA8D`N ze-Kg#DG~C&`-ayOr;rPloNk{go!nf%fAh}|H^T7k;rXY7J|Ffsb=c8^^L|(MPu9l$ zd2s&Ne0}}H&3^P(SHFE3V%+)5+W7PS9nTanM+LeP3^nMAIKIK_f4zHqriR&lQFbZs z`|8P3Oj(sBNNyloLIPE`MDn(5i98R3@n3wm*8oq_)w_4^e|You)rZ$NXVY28aH8`MD zc0OvZ_1QPifO@VKN_6+)SwCw$E2dmZiRXS5Wj4rtyjRT`GxWS_9kPi6f_nzRX9@_; zG@$8VCII^VI3KZZz`~HgDRMrPV#)>Ax?mxXDaQ&eiX8c3&jSVZc?2+cUQS7qV?kT4cunqTcZ-~s7!o>d;3fF5 z!eMzq62~D@9M+dtf9QHEU(!NvRc7Xa-X{;4qs+O#VlumtjQ-<2Rz!r*@mM_Ongp)# zNM%7-mQj{Mp_$4$PLH?fp5c|OzPO<{1M&57E7hoTq_oDy?Iz4d`4N$fEa6^qLxiH~f2L;U%%ID~e$x%gSn^!_<(xJx7lZ zwS<&9FSHJ|z{zDPJ2FrU25NJ8Ak-#IQL~;ksO|OTQ+7DA;tjPMYL}vx?J8)BSdUsZ zn7XLzqc%MdEK9rdwIA7SvOgP^dS8zlmi*5K%TEZ)qn&+vSYq}ROjvsN$)hDK*o)bs z<>P?mSTB5VNdKxJ{cDW0&e#s0kej2vr*yQ@ zdxSJUtsEgee>)Z^6u;w8+-}Fd{L9zRLvKTt4IwYwkh}qh*tt`d2NRS5hl9tD* zELriAwSi2pAuL5(Q^@$84;BI|OTgA!)dq-z=c0Ft$2r&gGW1RpP(r8E_2_kd^m+@u z3+rk+=xx8JBp1nnKgVuz5IDfx3vx4bw&?1RN65u{My{5lhzde%Z2qW(nW*jd6FChw zP*)jB6McjXV4#K>N^X;9AyY9BBMdC4jF(di^LgNM`*~Ez1#LObW15c*qF-m_62joy z>_^BU8=qr^A?3y&2pO%KCSk}?mAkcrMP8&DMC;U~LpmgHPZp*0J|&lSVB#rt@s!!2 z(Srf1@tswXKAt^&2B4IF#(>&@@j$>h%;!R2u);_k_!%(fht|l89X9Q(gw|(YUp+Mv zMn5lG62DpWT^V!CGO?0b< zvT_8_#>sRt97E-Rc9Wvg(Hr|4!chPkA3f2F{mkf;Dm0;cvkXl**wJa@9lHL^yI*Z7Ahp zt>KshLkgNDu*!?AA&fq2mVhx?o50MGIh1H`n|VcOTS9iFT{H@vV}fmc84;W4Tj|*< zXB3XYE=QB(G{E*InHN&(GfNrZlf-E&k{mz4er90<)nVYA8qZOAT`rP(JYpo&RG>GJdrv$aJ(L5_6E(bnC?`ZAnUP#Yv^( zz=nc@s7bO%NVaZ6KrMbUJr67)L`O<=NU}v>YbbC4)vyv0SWB0QL&Zu+IKT=oxKYDO zuuo4{4y7JP=p1lt4T*v@ONeZiroUy6qbz?bJz0x&;UET-CP^6Vw}lW?-9mtYZUG`{ zZwml+#+W!E(jqU@%`z$Bz@$Y7rlVQz!5o}OX;|SPXMSgntooqNCr`^jL#%$MOOjPe zCLHWnRY{pHTM|L}3@`L#ZQBS3DffU>x>>f$94G-qkwi|$?YBh+sD@9r+5uKDLc1wZ z>AaA!C3!+M>qE9b+b5tT@~R}+S&bAGYWXeGL$oacIoZLg_83fj_u7bC%~`gp;1o>J#jzaOgV6JA2+d~E2X#RRR?WudP$A$$}1U8RG=&2Tn*qnyS}@# zeWHk&Mr`yXRl~-DNH|cr1E|u23rLS9+ey%3?GL&P%pifc9@k;i-1%p(1E(D=x&_ae}|#?1jjWNmcIN_A?`Z?*;{@pK{N5W_Am)>Ulk>rn zvXn9C2Y?S?=@(o|G+SA5xjfw@H3qFoH`SiDbn}sd<>8~*BT6^Xm0Z#QpwtB@l7FCR z)(7a(HEjU>p9JXb{N5W_&ZF7<9bF=y;?9GzqkHw|Kf=@0+tLle<%L_ia}O(eM-U4g z?m#&VlgaX%3iq+8%MQ@Fo!37+Oyjmo8B<()8vrdiH*rT3x-MqA39BzgEhjB~V`i2V zONRDdi`qcu?LW+tifii zZ++~ZSVGZtMUe*c%SPKC=ifJ2NZId6-Tm#^yhkKokFTCf!bB;)KEsY!65I* zLbA>jnDdj*GOIv|m8wPh4 zRhHP+i%bn^C`-Am7?~Q#yl08Sf!sTw*%iu`hCRP1d4M**+)&8IAUaOeHM^3Z??7A6 zQi0kYGMXC5IyELN9*XKG<#e2RD;%=~7gXQQ4KRo)7`#%0v~L)_a|qZT2AmpbfI;MC zU0aPiHAM4HDq%^vO&j+O)+HpBg_WYk1#XB|iI}jY6x%z9rUrARDJ;y>=KA}FK}0&!lUu>LRO6n(lCfAh;kQHV-x8+= z`_fZp;W*eH*qa)%Rr)otbbJm6NUPYkviRKoq|Vep)``fx2*ADbtQb1icfmIG&Sx~8Ey|eaMc9&gUYxuTYaC%zIYc7|K z=y^^%Kbdm7n(LPUcwkLD*lMl>HCnD(&FOt@t2s?}Tg`2&xotJKt>(7XoGH7l=C;+` zwwn9+)!eqK+E!J+>Qxoz7co-jaL(u=khbtKlW0V8&F5Sn^Yg2UDbxtPmknNLwkn_sxtWL(SYoM exons = List.of(Coordinates.of(CoordinateSystem.zeroBased(), start, end)); Coordinates cdsCoordinates = Coordinates.of(CoordinateSystem.zeroBased(), start, end); - Transcript tx = Transcript.of(txId, location, exons, cdsCoordinates); + TranscriptMetadata metadata = TranscriptMetadata.of(TranscriptEvidence.CANONICAL); + Transcript tx = Transcript.of(txId, location, exons, cdsCoordinates, metadata); GeneIdentifier geneId = GeneIdentifier.of(id, symbol, null, null); return Gene.of(geneId, location, List.of(tx)); diff --git a/svanna-ingest/src/main/java/org/monarchinitiative/svanna/ingest/cmd/BuildDb.java b/svanna-ingest/src/main/java/org/monarchinitiative/svanna/ingest/cmd/BuildDb.java index c8764c12..7df25179 100644 --- a/svanna-ingest/src/main/java/org/monarchinitiative/svanna/ingest/cmd/BuildDb.java +++ b/svanna-ingest/src/main/java/org/monarchinitiative/svanna/ingest/cmd/BuildDb.java @@ -15,6 +15,9 @@ import org.apache.commons.io.IOUtils; import org.flywaydb.core.Flyway; import org.flywaydb.core.api.output.MigrateResult; +import org.monarchinitiative.phenol.annotations.assoc.GeneInfoGeneType; +import org.monarchinitiative.phenol.annotations.formats.hpo.*; +import org.monarchinitiative.phenol.annotations.io.hpo.HpoDiseaseLoaders; import org.monarchinitiative.svanna.core.LogUtils; import org.monarchinitiative.svanna.core.SvAnnaRuntimeException; import org.monarchinitiative.svanna.core.hpo.TermPair; @@ -45,12 +48,6 @@ import org.monarchinitiative.svanna.model.landscape.enhancer.Enhancer; import org.monarchinitiative.svanna.model.landscape.tad.TadBoundary; import org.monarchinitiative.svanna.model.landscape.variant.PopulationVariant; -import org.monarchinitiative.phenol.annotations.assoc.HpoAssociationLoader; -import org.monarchinitiative.phenol.annotations.base.Ratio; -import org.monarchinitiative.phenol.annotations.formats.hpo.HpoAssociationData; -import org.monarchinitiative.phenol.annotations.formats.hpo.HpoDisease; -import org.monarchinitiative.phenol.annotations.formats.hpo.HpoDiseaseAnnotation; -import org.monarchinitiative.phenol.annotations.formats.hpo.HpoDiseases; import org.monarchinitiative.phenol.annotations.io.hpo.DiseaseDatabase; import org.monarchinitiative.phenol.annotations.io.hpo.HpoDiseaseLoader; import org.monarchinitiative.phenol.annotations.io.hpo.HpoDiseaseLoaderOptions; @@ -239,9 +236,13 @@ private static PhenotypeData downloadPhenotypeFiles(PhenotypeProperties properti LOGGER.debug("Parsing gene info file at {}", geneInfoPath.toAbsolutePath()); LOGGER.debug("Parsing MIM to gene medgen file at {}", mim2geneMedgenPath.toAbsolutePath()); HpoDiseaseLoaderOptions loaderOptions = HpoDiseaseLoaderOptions.of(DISEASE_DATABASES, true, HpoDiseaseLoaderOptions.DEFAULT_COHORT_SIZE); - HpoDiseaseLoader loader = HpoDiseaseLoader.of(hpo, loaderOptions); + HpoDiseaseLoader loader = HpoDiseaseLoaders.defaultLoader(hpo, loaderOptions); HpoDiseases diseases = loader.load(hpoAnnotationsPath); - HpoAssociationData hpoAssociationData = HpoAssociationLoader.loadHpoAssociationData(hpo, geneInfoPath, mim2geneMedgenPath, null, diseases); + HpoAssociationData hpoAssociationData = HpoAssociationData.builder(hpo) + .hpoDiseases(diseases) + .mim2GeneMedgen(mim2geneMedgenPath) + .homoSapiensGeneInfo(geneInfoPath, GeneInfoGeneType.DEFAULT) + .build(); // Ingest geneToDisease int updatedGeneToDisease = ingestGeneToDiseaseMap(hpoAssociationData, ncbiGeneToHgnc, diseases, geneDiseaseDao); @@ -313,7 +314,7 @@ private static int ingestGeneToDiseaseMap(HpoAssociationData hpoAssociationData, Map> geneToDisease = new HashMap<>(); // extract relevant bits and pieces for diseases, and map NCBIGene to HGNC - Map> geneToDiseaseIdMap = hpoAssociationData.geneToDiseases(); + Map> geneToDiseaseIdMap = hpoAssociationData.associations().geneIdToDiseaseIds(); Map diseaseMap = diseases.diseaseById(); for (TermId ncbiGeneTermId : geneToDiseaseIdMap.keySet()) { @@ -341,9 +342,7 @@ private static int ingestDiseaseToPhenotypes(GeneDiseaseDao geneDiseaseDao, HpoD int updated = 0; for (HpoDisease disease : diseases) { - List presentPhenotypeTermIds = disease.phenotypicAbnormalitiesStream() - // We assume that the terms with missing ratio are observed/present. - .filter(pa -> pa.ratio().map(Ratio::isPositive).orElse(true)) + List presentPhenotypeTermIds = disease.presentAnnotationsStream() .map(HpoDiseaseAnnotation::id) .collect(Collectors.toList()); updated += geneDiseaseDao.insertDiseaseToPhenotypes(disease.id().getValue(), presentPhenotypeTermIds); diff --git a/svanna-ingest/src/main/java/org/monarchinitiative/svanna/ingest/similarity/IcMicaCalculator.java b/svanna-ingest/src/main/java/org/monarchinitiative/svanna/ingest/similarity/IcMicaCalculator.java index de50f09e..fed0d009 100644 --- a/svanna-ingest/src/main/java/org/monarchinitiative/svanna/ingest/similarity/IcMicaCalculator.java +++ b/svanna-ingest/src/main/java/org/monarchinitiative/svanna/ingest/similarity/IcMicaCalculator.java @@ -2,7 +2,6 @@ import org.monarchinitiative.svanna.core.LogUtils; import org.monarchinitiative.svanna.core.hpo.TermPair; -import org.monarchinitiative.phenol.annotations.base.Ratio; import org.monarchinitiative.phenol.annotations.formats.hpo.HpoDisease; import org.monarchinitiative.phenol.annotations.formats.hpo.HpoDiseaseAnnotation; import org.monarchinitiative.phenol.annotations.formats.hpo.HpoDiseases; @@ -37,9 +36,7 @@ public static Map precomputeIcMicaValues(Ontology ontology, diseaseIdToTermIds.putIfAbsent(diseaseId, new HashSet<>()); // add term ancestors - Set hpoTerms = disease.phenotypicAbnormalitiesStream() - // We assume that the terms with missing ratio are observed/present. - .filter(a -> a.ratio().map(Ratio::isPositive).orElse(true)) + Set hpoTerms = disease.presentAnnotationsStream() .map(HpoDiseaseAnnotation::id) .collect(Collectors.toSet()); Set inclAncestorTermIds = TermIds.augmentWithAncestors(ontology, hpoTerms, true); diff --git a/svanna-ingest/src/test/java/org/monarchinitiative/svanna/ingest/MakeSmallGencodeFileTest.java b/svanna-ingest/src/test/java/org/monarchinitiative/svanna/ingest/MakeSmallGencodeFileTest.java index 5e62b4bd..72d9a5b3 100644 --- a/svanna-ingest/src/test/java/org/monarchinitiative/svanna/ingest/MakeSmallGencodeFileTest.java +++ b/svanna-ingest/src/test/java/org/monarchinitiative/svanna/ingest/MakeSmallGencodeFileTest.java @@ -29,11 +29,12 @@ public class MakeSmallGencodeFileTest { @Test public void makeSmallGencodeFile() throws Exception { - Path gencodeGtf = Path.of("/home/ielis/data/gencode/gencode.v39.basic.annotation.gtf.gz"); - Path output = Path.of("../svanna-core/src/test/resources/gencode.10genes.v38.basic.annotation.json.gz"); + Path gencodeGtf = Path.of("/home/ielis/data/genes/gtf/hg38/gencode.v39.basic.annotation.gtf.gz"); + Path outputCore = Path.of("../svanna-core/src/test/resources/gencode.10genes.v38.basic.annotation.json.gz"); + Path outputCli = Path.of("../svanna-cli/src/test/resources/gencode.10genes.v38.basic.annotation.json.gz"); // read Gencode genes & keep the target genes - GtfGeneParser parser = GtfGeneParserFactory.gtfGeneParser(gencodeGtf, ASSEMBLY); + GtfGeneParser parser = GtfGeneParserFactory.gencodeGeneParser(gencodeGtf, ASSEMBLY); Set targetGeneSymbols = Set.of("SURF1", "SURF2", "FBN1", "ZNF436", "ZBTB48", "HNF4A", "GCK", "BRCA2", "COL4A5", "SRY"); @@ -44,7 +45,10 @@ public void makeSmallGencodeFile() throws Exception { // write the target genes into the output GeneParserFactory parserFactory = GeneParserFactory.of(ASSEMBLY); GeneParser printer = parserFactory.forFormat(SerializationFormat.JSON); - try (OutputStream os = new BufferedOutputStream(new GzipCompressorOutputStream(Files.newOutputStream(output)))) { + try (OutputStream os = new BufferedOutputStream(new GzipCompressorOutputStream(Files.newOutputStream(outputCore)))) { + printer.write(targetGenes, os); + } + try (OutputStream os = new BufferedOutputStream(new GzipCompressorOutputStream(Files.newOutputStream(outputCli)))) { printer.write(targetGenes, os); } } diff --git a/svanna-io/pom.xml b/svanna-io/pom.xml index ff14b7bc..8efa20cf 100644 --- a/svanna-io/pom.xml +++ b/svanna-io/pom.xml @@ -23,6 +23,10 @@ com.github.samtools htsjdk + + org.apache.commons + commons-compress + org.monarchinitiative.phenol phenol-annotations From f5b3806e491e96cbbc52c4ebc1d8a717bc10a5ed Mon Sep 17 00:00:00 2001 From: Daniel Danis Date: Wed, 12 Apr 2023 12:05:45 -0400 Subject: [PATCH 3/8] Use phenopacket-tools to read phenopackets. Signed-off-by: Daniel Danis --- pom.xml | 6 + svanna-cli/pom.xml | 42 +---- svanna-cli/src/assemble/distribution.xml | 6 +- ...acket.json => example-phenopacket.v1.json} | 9 +- .../src/examples/example-phenopacket.v2.json | 30 +++ .../svanna/cli/cmd/AnalysisData.java | 50 +++++ .../cli/cmd/AnalysisInputException.java | 28 +++ .../cli/cmd/PhenopacketAnalysisDataUtil.java | 175 ++++++++++++++++++ .../svanna/cli/cmd/PhenopacketImporter.java | 166 ----------------- .../svanna/cli/cmd/PrioritizeCommand.java | 124 ++++--------- 10 files changed, 336 insertions(+), 300 deletions(-) rename svanna-cli/src/examples/{example-phenopacket.json => example-phenopacket.v1.json} (79%) create mode 100644 svanna-cli/src/examples/example-phenopacket.v2.json create mode 100644 svanna-cli/src/main/java/org/monarchinitiative/svanna/cli/cmd/AnalysisData.java create mode 100644 svanna-cli/src/main/java/org/monarchinitiative/svanna/cli/cmd/AnalysisInputException.java create mode 100644 svanna-cli/src/main/java/org/monarchinitiative/svanna/cli/cmd/PhenopacketAnalysisDataUtil.java delete mode 100644 svanna-cli/src/main/java/org/monarchinitiative/svanna/cli/cmd/PhenopacketImporter.java diff --git a/pom.xml b/pom.xml index ca483278..77ae0c8a 100644 --- a/pom.xml +++ b/pom.xml @@ -33,6 +33,7 @@ ${java.version} 2.0.0 + 1.0.0-RC2 0.2.5 1.4.200 @@ -101,6 +102,11 @@ silent-genes-io ${silent.genes.version} + + org.phenopackets.phenopackettools + phenopacket-tools-io + ${phenopacket-tools.version} + org.phenopackets diff --git a/svanna-cli/pom.xml b/svanna-cli/pom.xml index a6bd16ea..7f373785 100644 --- a/svanna-cli/pom.xml +++ b/svanna-cli/pom.xml @@ -19,6 +19,10 @@ svanna-configuration ${project.parent.version} + + org.phenopackets.phenopackettools + phenopacket-tools-io + info.picocli picocli @@ -27,21 +31,11 @@ org.freemarker freemarker - org.apache.commons commons-csv - - org.phenopackets - phenopacket-schema - - - com.google.protobuf - protobuf-java-util - - org.monarchinitiative.svanna @@ -63,33 +57,7 @@ org.springframework.boot spring-boot-maven-plugin - - org.apache.maven.plugins - maven-resources-plugin - - - copy-resources - validate - - copy-resources - - - ${project.build.directory}/assembly-resources - - - src/examples - true - - example.csv - example.vcf - run_examples.sh - - - - - - - + org.apache.maven.plugins maven-assembly-plugin diff --git a/svanna-cli/src/assemble/distribution.xml b/svanna-cli/src/assemble/distribution.xml index e196be0c..6c964992 100644 --- a/svanna-cli/src/assemble/distribution.xml +++ b/svanna-cli/src/assemble/distribution.xml @@ -24,12 +24,12 @@ svanna-cli-${version}.jar + - ${project.build.directory}/assembly-resources + ${project.basedir}/src/examples ./examples - example.vcf - run_examples.sh + **/** diff --git a/svanna-cli/src/examples/example-phenopacket.json b/svanna-cli/src/examples/example-phenopacket.v1.json similarity index 79% rename from svanna-cli/src/examples/example-phenopacket.json rename to svanna-cli/src/examples/example-phenopacket.v1.json index b49994e8..692c6868 100644 --- a/svanna-cli/src/examples/example-phenopacket.json +++ b/svanna-cli/src/examples/example-phenopacket.v1.json @@ -1,7 +1,7 @@ { - "id": "example", + "id": "example-phenopacket", "subject": { - "id": "example", + "id": "Sample", "ageAtCollection": { "age": "P2Y" } @@ -12,11 +12,6 @@ "label": "Reduced von Willebrand factor activity" } }], - "htsFiles": [{ - "uri": "file:///***/***/example.vcf", - "htsFormat": "VCF", - "genomeAssembly": "hg38" - }], "metaData": { "created": "2021-07-13T15:08:53.846Z", "createdBy": "ExampleOrg:ExampleCurator", diff --git a/svanna-cli/src/examples/example-phenopacket.v2.json b/svanna-cli/src/examples/example-phenopacket.v2.json new file mode 100644 index 00000000..2bc8291b --- /dev/null +++ b/svanna-cli/src/examples/example-phenopacket.v2.json @@ -0,0 +1,30 @@ +{ + "id": "example-phenopacket", + "subject": { + "id": "Sample", + "timeAtLastEncounter": { + "age": { + "iso8601duration": "P2Y" + } + } + }, + "phenotypicFeatures": [{ + "type": { + "id": "HP:0008330", + "label": "Reduced von Willebrand factor activity" + } + }], + "metaData": { + "created": "2021-07-13T15:08:53.846Z", + "createdBy": "ExampleOrg:ExampleCurator", + "resources": [{ + "id": "hp", + "name": "human phenotype ontology", + "url": "http://purl.obolibrary.org/obo/hp.owl", + "version": "unknown HPO version", + "namespacePrefix": "HP", + "iriPrefix": "http://purl.obolibrary.org/obo/HP_" + }], + "phenopacketSchemaVersion": "2.0.0" + } +} diff --git a/svanna-cli/src/main/java/org/monarchinitiative/svanna/cli/cmd/AnalysisData.java b/svanna-cli/src/main/java/org/monarchinitiative/svanna/cli/cmd/AnalysisData.java new file mode 100644 index 00000000..24e42aa6 --- /dev/null +++ b/svanna-cli/src/main/java/org/monarchinitiative/svanna/cli/cmd/AnalysisData.java @@ -0,0 +1,50 @@ +package org.monarchinitiative.svanna.cli.cmd; + +import org.monarchinitiative.phenol.ontology.data.TermId; + +import java.nio.file.Path; +import java.util.List; +import java.util.Objects; + +/** + * SvAnna requires these inputs for the analysis. + */ +class AnalysisData { + + private final List phenotypeTerms; + private final Path vcf; + + AnalysisData(List phenotypeTerms, Path vcf) { + this.phenotypeTerms = phenotypeTerms; + this.vcf = vcf; + } + + List phenotypeTerms() { + return phenotypeTerms; + } + + Path vcf() { + return vcf; + } + + @Override + public boolean equals(Object o) { + if (this == o) return true; + if (o == null || getClass() != o.getClass()) return false; + AnalysisData that = (AnalysisData) o; + return Objects.equals(phenotypeTerms, that.phenotypeTerms) && Objects.equals(vcf, that.vcf); + } + + @Override + public int hashCode() { + return Objects.hash(phenotypeTerms, vcf); + } + + @Override + public String toString() { + return "AnalysisData{" + + "phenotypeTerms=" + phenotypeTerms + + ", vcf=" + vcf + + '}'; + } +} diff --git a/svanna-cli/src/main/java/org/monarchinitiative/svanna/cli/cmd/AnalysisInputException.java b/svanna-cli/src/main/java/org/monarchinitiative/svanna/cli/cmd/AnalysisInputException.java new file mode 100644 index 00000000..872fe6d7 --- /dev/null +++ b/svanna-cli/src/main/java/org/monarchinitiative/svanna/cli/cmd/AnalysisInputException.java @@ -0,0 +1,28 @@ +package org.monarchinitiative.svanna.cli.cmd; + +import org.monarchinitiative.svanna.core.SvAnnaRuntimeException; + +/** + * An exception thrown if inputs for the analysis are incomplete or otherwise invalid. + */ +class AnalysisInputException extends SvAnnaRuntimeException { + AnalysisInputException() { + super(); + } + + AnalysisInputException(String message) { + super(message); + } + + AnalysisInputException(String message, Throwable cause) { + super(message, cause); + } + + AnalysisInputException(Throwable cause) { + super(cause); + } + + AnalysisInputException(String message, Throwable cause, boolean enableSuppression, boolean writableStackTrace) { + super(message, cause, enableSuppression, writableStackTrace); + } +} diff --git a/svanna-cli/src/main/java/org/monarchinitiative/svanna/cli/cmd/PhenopacketAnalysisDataUtil.java b/svanna-cli/src/main/java/org/monarchinitiative/svanna/cli/cmd/PhenopacketAnalysisDataUtil.java new file mode 100644 index 00000000..7a0b9b45 --- /dev/null +++ b/svanna-cli/src/main/java/org/monarchinitiative/svanna/cli/cmd/PhenopacketAnalysisDataUtil.java @@ -0,0 +1,175 @@ +package org.monarchinitiative.svanna.cli.cmd; + +import com.google.protobuf.Message; +import org.monarchinitiative.phenol.base.PhenolRuntimeException; +import org.monarchinitiative.phenol.ontology.data.TermId; +import org.phenopackets.phenopackettools.core.PhenopacketElement; +import org.phenopackets.phenopackettools.core.PhenopacketSchemaVersion; +import org.phenopackets.phenopackettools.io.PhenopacketParser; +import org.phenopackets.phenopackettools.io.PhenopacketParserFactory; +import org.phenopackets.phenopackettools.util.format.SniffException; +import org.phenopackets.schema.v1.Phenopacket; +import org.phenopackets.schema.v1.core.HtsFile; +import org.phenopackets.schema.v2.core.File; +import org.phenopackets.schema.v2.core.PhenotypicFeature; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import java.io.IOException; +import java.net.URI; +import java.net.URISyntaxException; +import java.nio.file.Path; +import java.util.*; +import java.util.function.Function; +import java.util.function.Predicate; +import java.util.stream.Collectors; +import java.util.stream.IntStream; + +/** + * Utility methods for reading {@link AnalysisData} from v1 or v2 phenopacket. + */ +class PhenopacketAnalysisDataUtil { + + private static final Logger LOGGER = LoggerFactory.getLogger(PhenopacketAnalysisDataUtil.class); + + private PhenopacketAnalysisDataUtil() { + } + + static AnalysisData parseV2Phenopacket(Path phenopacketPath, + Path cliVcfPath, + PhenopacketParserFactory parserFactory) throws AnalysisInputException { + Message message = parseMessage(phenopacketPath, parserFactory, PhenopacketSchemaVersion.V2); + + if (message instanceof org.phenopackets.schema.v2.Phenopacket) { + org.phenopackets.schema.v2.Phenopacket pp = (org.phenopackets.schema.v2.Phenopacket) message; + + // (1) Phenotype features + List phenotypeTermIds = new ArrayList<>(); + boolean reportExcludedFeature = true; + List phenotypicFeaturesList = pp.getPhenotypicFeaturesList(); + for (int i = 0; i < phenotypicFeaturesList.size(); i++) { + PhenotypicFeature pf = phenotypicFeaturesList.get(i); + // SvAnna does not support excluded features. + // As a matter of courtesy, let's warn the user about skipping the excluded features. + if (pf.getExcluded()) { + if (reportExcludedFeature) { + reportExcludedFeature = false; + String excludedFeatureIndices = extractIndicesOfNegatedFeatures(phenotypicFeaturesList, PhenotypicFeature::getExcluded); + LOGGER.warn("Skipping unsupported excluded phenotype features {}", excludedFeatureIndices); + } + } else { + try { + TermId termId = TermId.of(pf.getType().getId()); + phenotypeTermIds.add(termId); + } catch (PhenolRuntimeException pre) { + LOGGER.warn("Skipping phenotype feature #{} due to invalid identifier {}", i, pf.getType().getId()); + } + } + } + + // (2) VCF path + // We take the 1st VCF file + List vcfFiles = pp.getFilesList().stream() + .filter(f -> "vcf".equalsIgnoreCase(f.getFileAttributesOrDefault("fileFormat", null))) + .collect(Collectors.toList()); + Path vcf = getVcfPath(cliVcfPath, vcfFiles, File::getUri); + return new AnalysisData(phenotypeTermIds, vcf); + } else { + // Shouldn't really happen but let's make sure we can report a meaningful error. + throw new AnalysisInputException(String.format("Unexpected instance %s!=%s", message.getClass().getName(), org.phenopackets.schema.v2.Phenopacket.class.getName())); + } + + } + + static AnalysisData parseV1Phenopacket(Path phenopacketPath, + Path cliVcfPath, + PhenopacketParserFactory parserFactory) throws AnalysisInputException { + Message message = parseMessage(phenopacketPath, parserFactory, PhenopacketSchemaVersion.V1); + if (message instanceof Phenopacket) { + Phenopacket pp = (Phenopacket) message; + + // (1) Phenotype features + List phenotypeTermIds = new ArrayList<>(); + boolean reportExcludedFeature = true; + List phenotypicFeaturesList = pp.getPhenotypicFeaturesList(); + for (int i = 0; i < phenotypicFeaturesList.size(); i++) { + org.phenopackets.schema.v1.core.PhenotypicFeature pf = phenotypicFeaturesList.get(i); + // SvAnna does not support excluded features. + // As a matter of courtesy, let's warn the user about skipping the excluded features. + if (pf.getNegated()) { + if (reportExcludedFeature) { + reportExcludedFeature = false; + String excludedFeatureIndices = extractIndicesOfNegatedFeatures(phenotypicFeaturesList, org.phenopackets.schema.v1.core.PhenotypicFeature::getNegated); + LOGGER.warn("Skipping unsupported excluded phenotype features {}", excludedFeatureIndices); + } + } else { + try { + TermId termId = TermId.of(pf.getType().getId()); + phenotypeTermIds.add(termId); + } catch (PhenolRuntimeException pre) { + LOGGER.warn("Skipping phenotype feature #{} due to invalid identifier {}", i, pf.getType().getId()); + } + } + } + + // (2) VCF path + // We take the 1st VCF file + List vcfFiles = pp.getHtsFilesList().stream() + .filter(f -> f.getHtsFormat().equals(HtsFile.HtsFormat.VCF)) + .collect(Collectors.toList()); + Path vcf = getVcfPath(cliVcfPath, vcfFiles, HtsFile::getUri); + return new AnalysisData(phenotypeTermIds, vcf); + } else { + // Again, shouldn't really happen but let's make sure we can report a meaningful error. + throw new AnalysisInputException(String.format("Unexpected instance %s!=%s", message.getClass().getName(), org.phenopackets.schema.v2.Phenopacket.class.getName())); + } + } + + private static Message parseMessage(Path phenopacketPath, + PhenopacketParserFactory parserFactory, + PhenopacketSchemaVersion schemaVersion) throws AnalysisInputException { + PhenopacketParser parser = parserFactory.forFormat(schemaVersion); + + Message message; + try { + message = parser.parse(PhenopacketElement.PHENOPACKET, phenopacketPath); + } catch (IOException | SniffException e) { + throw new AnalysisInputException(e); + } + return message; + } + + private static Path getVcfPath(Path cliVcfPath, + List files, + Function uriExtractor) throws AnalysisInputException { + if (files.isEmpty()) { + if (cliVcfPath == null) + throw new AnalysisInputException("VCF file was found neither in CLI arguments nor in the Phenopacket. Aborting."); + else + return cliVcfPath; + } else if (files.size() > 1) { + String fileUris = files.stream() + .map(uriExtractor) + .collect(Collectors.joining(", ", "[", "]")); + throw new AnalysisInputException(String.format("There must be exactly 1 VCF file in the phenopacket but got %s: %s", files.size(), fileUris)); + } else { + String uriStr = uriExtractor.apply(files.get(0)); + try { + URI uri = new URI(uriStr); + return Path.of(uri); + } catch (URISyntaxException e) { + LOGGER.warn("Invalid URI `{}`: {}", uriStr, e.getMessage()); + LOGGER.debug("Invalid URI `{}`: {}", uriStr, e.getMessage(), e); + throw new AnalysisInputException(e); + } + } + } + + private static String extractIndicesOfNegatedFeatures(List phenotypicFeaturesList, Predicate isNegated) { + return IntStream.range(0, phenotypicFeaturesList.size()) + .filter(idx -> isNegated.test(phenotypicFeaturesList.get(idx))) + .boxed() + .map(Objects::toString) + .collect(Collectors.joining(", ", "[", "]")); + } +} diff --git a/svanna-cli/src/main/java/org/monarchinitiative/svanna/cli/cmd/PhenopacketImporter.java b/svanna-cli/src/main/java/org/monarchinitiative/svanna/cli/cmd/PhenopacketImporter.java deleted file mode 100644 index 3d7a1cb2..00000000 --- a/svanna-cli/src/main/java/org/monarchinitiative/svanna/cli/cmd/PhenopacketImporter.java +++ /dev/null @@ -1,166 +0,0 @@ -package org.monarchinitiative.svanna.cli.cmd; - -import com.google.protobuf.InvalidProtocolBufferException; -import com.google.protobuf.util.JsonFormat; -import org.monarchinitiative.phenol.base.PhenolRuntimeException; -import org.monarchinitiative.phenol.ontology.data.Ontology; -import org.monarchinitiative.phenol.ontology.data.TermId; -import org.phenopackets.schema.v1.Phenopacket; -import org.phenopackets.schema.v1.core.HtsFile; -import org.phenopackets.schema.v1.core.PhenotypicFeature; -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; - -import java.io.BufferedReader; -import java.io.IOException; -import java.nio.file.Files; -import java.nio.file.Path; -import java.nio.file.Paths; -import java.util.ArrayList; -import java.util.List; -import java.util.stream.Collectors; - - -/** - * This class ingests a phenopacket, which is required to additionally contain the - * path of a VCF file that will be used for the analysis. - * @author Peter Robinson - */ -public class PhenopacketImporter { - - private static final Logger logger = LoggerFactory.getLogger(PhenopacketImporter.class); - private static final JsonFormat.Parser JSON_PARSER = JsonFormat.parser(); - /** The Phenopacket that represents the individual being sequenced in the current run. */ - private final Phenopacket phenoPacket; - /** Object representing the VCF file with variants identified in the subject of this Phenopacket. */ - private HtsFile vcfFile; - /** Reference to HPO ontology */ - private final Ontology hpo; - - /** - * Factory method to obtain a PhenopacketImporter object starting from a phenopacket in Json format - * - * @param phenopacketPath -- path to the phenopacket - * @return {@link PhenopacketImporter} object corresponding to the PhenoPacket - */ - public static PhenopacketImporter fromJson(Path phenopacketPath, Ontology ontology) throws IOException { - logger.trace("Importing Phenopacket: " + phenopacketPath); - if (!phenopacketPath.toFile().isFile()) { - logger.error("Could not find phenopacket file at " + phenopacketPath); - throw new IOException("Could not find phenopacket file at " + phenopacketPath); - } - try { - Phenopacket phenopacket = readPhenopacket(phenopacketPath); - return new PhenopacketImporter(phenopacket, ontology); - } catch (InvalidProtocolBufferException e) { - logger.error("Malformed phenopacket: " + e.getMessage()); - throw new IOException("Could not load phenopacket (" + phenopacketPath + "): " + e.getMessage()); - } catch (IOException e) { - throw new IOException("I/O Error: Could not load phenopacket (" + phenopacketPath + "): " + e.getMessage(), e); - } - } - - public static Phenopacket readPhenopacket(Path phenopacketPath) throws IOException { - logger.info("Reading phenopacket from `{}`", phenopacketPath.toAbsolutePath()); - try (BufferedReader reader = Files.newBufferedReader(phenopacketPath)) { - String phenopacketJsonString = reader.lines().collect(Collectors.joining(System.lineSeparator())); - Phenopacket.Builder phenoPacketBuilder = Phenopacket.newBuilder(); - JSON_PARSER.merge(phenopacketJsonString, phenoPacketBuilder); - return phenoPacketBuilder.build(); - } - } - - private PhenopacketImporter(Phenopacket ppack, Ontology ontology){ - this.phenoPacket=ppack; - this.hpo=ontology; - extractVcfData(); - } - - public boolean hasVcf() { return this.vcfFile !=null; } - - public List getHpoTerms() { - List builder = new ArrayList<>(); - for (PhenotypicFeature feature : phenoPacket.getPhenotypicFeaturesList()) { - if (feature.getNegated()) continue; - String id = feature.getType().getId(); - TermId tid = TermId.of(id); - if (! hpo.getTermMap().containsKey(tid)) { - logger.error("Could not identify HPO term id {}.",tid.getValue()); - logger.error("Please check the input file and update to the latest hp.obo file. "); - throw new PhenolRuntimeException("Could not identify HPO term id: "+tid.getValue()); - } else if (hpo.getObsoleteTermIds().contains(tid)) { - TermId current = hpo.getPrimaryTermId(tid); - builder.add(current); - logger.error("Replacing obsolete HPO term id {} with current id {}.",tid.getValue(),current.getValue()); - } else { - builder.add(tid); - } - } - return builder; - } - - - public List getNegatedHpoTerms() { - List builder = new ArrayList<>(); - for (PhenotypicFeature feature : phenoPacket.getPhenotypicFeaturesList()) { - if (! feature.getNegated()) continue; - String id = feature.getType().getId(); - TermId tid = TermId.of(id); - if (! hpo.getTermMap().containsKey(tid)) { - logger.error("Could not identify HPO term id {}.",tid.getValue()); - logger.error("Please check the input file and update to the latest hp.obo file. "); - throw new PhenolRuntimeException("Could not identify HPO term id: "+tid.getValue()); - } else if (hpo.getObsoleteTermIds().contains(tid)) { - TermId current = hpo.getPrimaryTermId(tid); - builder.add(current); - logger.error("Replacing obsolete HPO term id {} with current id {}.", tid.getValue(), current.getValue()); - } else { - builder.add(tid); - } - } - return builder; - } - - /** - * The path to the VCF file may be a string such as file:/path/to/examples/BBS1.vcf - * In this case, remove the prefix 'path:', otherwise return the original URI - * @return URI of VCF file mentioned in the Phenopacket - */ - - - public HtsFile getVcfFile() { - return this.vcfFile; - } - - public Path getVcfPath() { - if (this.vcfFile == null) { - return null; - } - String uri = this.vcfFile.getUri().startsWith("file:") ? - this.vcfFile.getUri().substring(5) : - this.vcfFile.getUri(); - return Paths.get(uri); - } - - public String getSampleName() { - return phenoPacket.getSubject().getId(); - } - - - /** This method extracts the VCF file and the corresponding GenomeBuild. We assume that - * the phenopacket contains a single VCF file and that this file is for a single person. */ - private void extractVcfData() { - List htsFileList = phenoPacket.getHtsFilesList(); - if (htsFileList.size() > 1 ) { - logger.error("Warning: multiple HTsFiles associated with this phenopacket"); - logger.error("Warning: we will return the path to the first VCF file we find"); - } else if (htsFileList.isEmpty()) { - return; - } - for (HtsFile htsFile : htsFileList) { - if (htsFile.getHtsFormat().equals(HtsFile.HtsFormat.VCF)) { - this.vcfFile = htsFile; - } - } - } -} diff --git a/svanna-cli/src/main/java/org/monarchinitiative/svanna/cli/cmd/PrioritizeCommand.java b/svanna-cli/src/main/java/org/monarchinitiative/svanna/cli/cmd/PrioritizeCommand.java index fad82c13..78e3a06b 100644 --- a/svanna-cli/src/main/java/org/monarchinitiative/svanna/cli/cmd/PrioritizeCommand.java +++ b/svanna-cli/src/main/java/org/monarchinitiative/svanna/cli/cmd/PrioritizeCommand.java @@ -24,15 +24,12 @@ import org.monarchinitiative.phenol.ontology.data.Term; import org.monarchinitiative.phenol.ontology.data.TermId; import org.monarchinitiative.svart.assembly.GenomicAssembly; -import org.phenopackets.schema.v1.Phenopacket; -import org.phenopackets.schema.v1.core.HtsFile; +import org.phenopackets.phenopackettools.io.PhenopacketParserFactory; import org.slf4j.Logger; import org.slf4j.LoggerFactory; import picocli.CommandLine; import java.io.IOException; -import java.net.URI; -import java.net.URISyntaxException; import java.nio.file.Files; import java.nio.file.Path; import java.text.NumberFormat; @@ -63,12 +60,12 @@ public class PrioritizeCommand extends SvAnnaCommand { public InputOptions inputOptions = new InputOptions(); public static class InputOptions { @CommandLine.Option(names = {"-p", "--phenopacket"}, - description = "Path to phenopacket.") + description = "Path to v1 or v2 phenopacket in JSON, YAML or Protobuf format.") public Path phenopacket = null; @CommandLine.Option(names = {"-t", "--phenotype-term"}, description = "HPO term ID(s). Can be provided multiple times.") - public List hpoTermIdList = List.of(); + public List hpoTermIdList = null; @CommandLine.Option(names = {"--vcf"}, description = "Path to the input VCF file.") @@ -139,14 +136,11 @@ public Integer call() { DataProperties dataProperties = dataProperties(); SvAnnaProperties svAnnaProperties = SvAnnaProperties.of(svannaDataDirectory, prioritizationProperties, dataProperties); - Optional analysisData = parseAnalysisData(); - if (analysisData.isEmpty()) - return 1; - try { - runAnalysis(analysisData.get(), svAnnaProperties); + AnalysisData analysisData = parseAnalysisData(); + runAnalysis(analysisData, svAnnaProperties); } catch (InterruptedException | ExecutionException | IOException | InvalidResourceException | - MissingResourceException | UndefinedResourceException e) { + MissingResourceException | UndefinedResourceException | AnalysisInputException e) { LOGGER.error("Error: {}", e.getMessage()); LOGGER.debug("Error: {}", e.getMessage(), e); return 1; @@ -156,76 +150,50 @@ public Integer call() { return 0; } - private Optional parseAnalysisData() { - Path vcf; - List phenotypeTermIds; - if (inputOptions.vcf != null) { // VCF & CLI - vcf = inputOptions.vcf; - phenotypeTermIds = inputOptions.hpoTermIdList.stream() + private AnalysisData parseAnalysisData() throws AnalysisInputException { + if (inputOptions.hpoTermIdList != null) { // CLI + LOGGER.info("Using {} phenotype features supplied via CLI", inputOptions.hpoTermIdList.size()); + Path vcf = inputOptions.vcf; + List phenotypeTermIds = inputOptions.hpoTermIdList.stream() .map(TermId::of) .collect(Collectors.toList()); - } else { // phenopacket - try { - Phenopacket phenopacket = PhenopacketImporter.readPhenopacket(inputOptions.phenopacket); - phenotypeTermIds = phenopacket.getPhenotypicFeaturesList().stream() - .map(pf -> TermId.of(pf.getType().getId())) - .collect(Collectors.toList()); - - Optional vcfFilePathOptional = getVcfFilePath(phenopacket); - if (vcfFilePathOptional.isEmpty()) { - if (inputOptions.vcf == null) { - LOGGER.error("VCF file was found neither in CLI arguments nor in the Phenopacket. Aborting."); - return Optional.empty(); - } else { - vcf = inputOptions.vcf; - } - } else { - LOGGER.info("VCF file was found in both CLI arguments and in the Phenopacket. Using the file from CLI: `{}`", inputOptions.vcf); - vcf = inputOptions.vcf; - } + return new AnalysisData(phenotypeTermIds, vcf); + } else { // Phenopacket + LOGGER.info("Using phenotype features from a phenopacket at {}", inputOptions.phenopacket.toAbsolutePath()); + PhenopacketParserFactory parserFactory = PhenopacketParserFactory.getInstance(); - } catch (IOException e) { - LOGGER.error("Error reading phenopacket at `{}`: {}", inputOptions.phenopacket, e.getMessage()); - return Optional.empty(); + // try v2 first + try { + LOGGER.debug("Trying v2 format first.."); + AnalysisData analysisData = PhenopacketAnalysisDataUtil.parseV2Phenopacket(inputOptions.phenopacket, inputOptions.vcf, parserFactory); + LOGGER.debug("Success!"); + return analysisData; + } catch (AnalysisInputException e) { + // swallow and try v1 + LOGGER.debug("Unable to decode {} as v2 phenopacket, falling back to v1", inputOptions.phenopacket.toAbsolutePath()); } - } - return Optional.of(new AnalysisData(phenotypeTermIds, vcf)); - } - - private static Optional getVcfFilePath(Phenopacket phenopacket) { - // There should be exactly one VCF file - LinkedList vcfFiles = phenopacket.getHtsFilesList().stream() - .filter(htsFile -> htsFile.getHtsFormat().equals(HtsFile.HtsFormat.VCF)) - .distinct() - .collect(Collectors.toCollection(LinkedList::new)); - if (vcfFiles.isEmpty()) { - LOGGER.info("VCF file was not found in Phenopacket. Expecting to find the file among the CLI arguments"); - return Optional.empty(); + // try v1 or fail + AnalysisData analysisData = PhenopacketAnalysisDataUtil.parseV1Phenopacket(inputOptions.phenopacket, inputOptions.vcf, parserFactory); + LOGGER.debug("Success!"); + return analysisData; } - if (vcfFiles.size() > 1) - LOGGER.warn("Found >1 VCF files. Using the first one."); - - // The VCF file should have a proper URI - HtsFile vcf = vcfFiles.getFirst(); - try { - URI uri = new URI(vcf.getUri()); - return Optional.of(Path.of(uri)); - } catch (URISyntaxException e) { - LOGGER.warn("Invalid URI `{}`: {}", vcf.getUri(), e.getMessage()); - return Optional.empty(); - } } protected int checkArguments() { - if ((inputOptions.vcf == null) == (inputOptions.phenopacket == null)) { - LOGGER.error("Path to a VCF file or to a phenopacket must be supplied"); + if (inputOptions.hpoTermIdList == null && inputOptions.phenopacket == null) { + LOGGER.error("No phenotype features provided. Use the CLI or a phenopacket"); return 1; } - if (inputOptions.phenopacket != null && !inputOptions.hpoTermIdList.isEmpty()) { - LOGGER.error("Passing HPO terms both through CLI and Phenopacket is not supported"); + if (inputOptions.hpoTermIdList != null && inputOptions.phenopacket != null) { + LOGGER.error("Passing HPO terms both through CLI and Phenopacket is not supported. Choose one"); + return 1; + } + + if (inputOptions.vcf == null || inputOptions.phenopacket == null) { + LOGGER.error("Path to a VCF file or to a phenopacket must be supplied"); return 1; } @@ -356,22 +324,4 @@ private AnalysisParameters getAnalysisParameters(AnalysisData analysisData, SvAn return analysisParameters; } - - private static class AnalysisData { - private final List phenotypeTerms; - private final Path vcf; - - private AnalysisData(List phenotypeTerms, Path vcf) { - this.phenotypeTerms = phenotypeTerms; - this.vcf = vcf; - } - - public List phenotypeTerms() { - return phenotypeTerms; - } - - public Path vcf() { - return vcf; - } - } } From e5140f59ac79ba5a4d0f97d9a0c3d67aa5688682 Mon Sep 17 00:00:00 2001 From: Daniel Danis Date: Wed, 12 Apr 2023 12:23:20 -0400 Subject: [PATCH 4/8] Enhance logging. Signed-off-by: Daniel Danis --- .../svanna/cli/cmd/PrioritizeCommand.java | 4 +- .../svanna/cli/cmd/SvAnnaCommand.java | 60 +++++++++++++++++++ svanna-cli/src/main/resources/banner.txt | 10 ++++ svanna-cli/src/main/resources/logback.xml | 23 +------ 4 files changed, 75 insertions(+), 22 deletions(-) create mode 100644 svanna-cli/src/main/resources/banner.txt diff --git a/svanna-cli/src/main/java/org/monarchinitiative/svanna/cli/cmd/PrioritizeCommand.java b/svanna-cli/src/main/java/org/monarchinitiative/svanna/cli/cmd/PrioritizeCommand.java index 78e3a06b..858bf87a 100644 --- a/svanna-cli/src/main/java/org/monarchinitiative/svanna/cli/cmd/PrioritizeCommand.java +++ b/svanna-cli/src/main/java/org/monarchinitiative/svanna/cli/cmd/PrioritizeCommand.java @@ -127,7 +127,7 @@ public static class OutputConfig { } @Override - public Integer call() { + public Integer execute() { int status = checkArguments(); if (status != 0) return status; @@ -192,7 +192,7 @@ protected int checkArguments() { return 1; } - if (inputOptions.vcf == null || inputOptions.phenopacket == null) { + if (inputOptions.vcf == null && inputOptions.phenopacket == null) { LOGGER.error("Path to a VCF file or to a phenopacket must be supplied"); return 1; } diff --git a/svanna-cli/src/main/java/org/monarchinitiative/svanna/cli/cmd/SvAnnaCommand.java b/svanna-cli/src/main/java/org/monarchinitiative/svanna/cli/cmd/SvAnnaCommand.java index df5bee08..79ca0485 100644 --- a/svanna-cli/src/main/java/org/monarchinitiative/svanna/cli/cmd/SvAnnaCommand.java +++ b/svanna-cli/src/main/java/org/monarchinitiative/svanna/cli/cmd/SvAnnaCommand.java @@ -1,5 +1,7 @@ package org.monarchinitiative.svanna.cli.cmd; +import ch.qos.logback.classic.Level; +import ch.qos.logback.classic.LoggerContext; import org.monarchinitiative.svanna.cli.writer.ResultWriterFactory; import org.monarchinitiative.svanna.configuration.SvAnnaBuilder; import org.monarchinitiative.svanna.configuration.exception.InvalidResourceException; @@ -16,9 +18,11 @@ import org.slf4j.LoggerFactory; import picocli.CommandLine; +import java.io.BufferedInputStream; import java.io.IOException; import java.io.InputStream; import java.nio.file.Path; +import java.util.Objects; import java.util.Properties; import java.util.concurrent.Callable; @@ -37,6 +41,11 @@ public abstract class SvAnnaCommand implements Callable { private static final double TAD_STABILITY_THRESHOLD = 80.; // ----------------------------------------------------------------------------------------------------------------- + @CommandLine.Option(names = {"-v"}, + description = {"Specify multiple -v options to increase verbosity.", + "For example, `-v -v -v` or `-vvv`"}) + protected boolean[] verbosity = {}; + @CommandLine.Option(names = {"-d", "--data-directory"}, paramLabel = "path/to/datadir", required = true, @@ -76,6 +85,17 @@ private static Properties readProperties() { return properties; } + @Override + public Integer call() { + // (0) Setup verbosity and print banner. + setupLoggingAndPrintBanner(); + + // (1) Run the command functionality. + return execute(); + } + + protected abstract Integer execute(); + protected SvAnna bootstrapSvAnna(SvAnnaProperties svAnnaProperties) throws MissingResourceException, InvalidResourceException, UndefinedResourceException { LOGGER.info("Spooling up SvAnna v{} using resources in {}", SVANNA_VERSION, svAnnaProperties.dataDirectory().toAbsolutePath()); return SvAnnaBuilder.builder(svAnnaProperties) @@ -103,4 +123,44 @@ protected static ResultWriterFactory resultWriterFactory(SvAnna svAnna) { return new ResultWriterFactory(overlapper, svAnna.annotationDataService(), svAnna.phenotypeDataService()); } + private void setupLoggingAndPrintBanner() { + Level level = parseVerbosityLevel(); + + LoggerContext context = (LoggerContext) LoggerFactory.getILoggerFactory(); + context.getLogger(Logger.ROOT_LOGGER_NAME).setLevel(level); + + printBanner(); + } + + private static String readBanner() { + try (InputStream is = new BufferedInputStream(Objects.requireNonNull(SvAnnaCommand.class.getResourceAsStream("/banner.txt")))) { + return new String(is.readAllBytes()); + } catch (IOException e) { + // swallow + return ""; + } + } + + private Level parseVerbosityLevel() { + int verbosity = 0; + for (boolean a : this.verbosity) { + if (a) verbosity++; + } + + switch (verbosity) { + case 0: + return Level.INFO; + case 1: + return Level.DEBUG; + case 2: + return Level.TRACE; + default: + return Level.ALL; + } + } + + private static void printBanner() { + System.err.println(readBanner()); + } + } diff --git a/svanna-cli/src/main/resources/banner.txt b/svanna-cli/src/main/resources/banner.txt new file mode 100644 index 00000000..7777b36c --- /dev/null +++ b/svanna-cli/src/main/resources/banner.txt @@ -0,0 +1,10 @@ + + _____ ___ + / ___/_ __/ | ____ ____ ____ _ + \__ \| | / / /| | / __ \/ __ \/ __ `/ + ___/ /| |/ / ___ |/ / / / / / / /_/ / +/____/ |___/_/ |_/_/ /_/_/ /_/\__,_/ + +Structural Variant Annotation and Analysis + :: v@project.version@ :: + diff --git a/svanna-cli/src/main/resources/logback.xml b/svanna-cli/src/main/resources/logback.xml index 9fc1379b..cc9ecca1 100644 --- a/svanna-cli/src/main/resources/logback.xml +++ b/svanna-cli/src/main/resources/logback.xml @@ -5,31 +5,14 @@ - - - - - INFO - - - ${pattern} - - - - - - - DEBUG - - svanna.log - false + + System.err ${pattern} - - + \ No newline at end of file From eaf22d0b529b559514602e6da8e0341cc123757b Mon Sep 17 00:00:00 2001 From: Daniel Danis Date: Wed, 12 Apr 2023 12:30:29 -0400 Subject: [PATCH 5/8] Update the CI script. Signed-off-by: Daniel Danis --- .github/workflows/maven.yml | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) diff --git a/.github/workflows/maven.yml b/.github/workflows/maven.yml index b2573776..aee2d8f5 100644 --- a/.github/workflows/maven.yml +++ b/.github/workflows/maven.yml @@ -12,14 +12,11 @@ on: jobs: build: runs-on: ${{ matrix.os }} - env: - JDK_VERSION: ${{ matrix.jdk }} strategy: fail-fast: false matrix: os: [ windows-latest, macOS-latest, ubuntu-latest ] - jdk: [ 11 ] steps: - uses: actions/checkout@v2 @@ -27,8 +24,8 @@ jobs: - name: Set up JDK uses: actions/setup-java@v2 with: - java-version: '11' + java-version: '17' distribution: 'adopt' - name: Build with Maven - run: ./mvnw -B verify --file pom.xml + run: ./mvnw --batch-mode verify From 0db4322b05e8c3ebc1cd74e4a80821f8cd9faba9 Mon Sep 17 00:00:00 2001 From: Daniel Danis Date: Wed, 12 Apr 2023 12:38:48 -0400 Subject: [PATCH 6/8] Update the documentation. Signed-off-by: Daniel Danis --- docs/running.rst | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/docs/running.rst b/docs/running.rst index f5e8bd94..d1876f46 100644 --- a/docs/running.rst +++ b/docs/running.rst @@ -34,9 +34,9 @@ Analysis input ############## The input data can be specified in two ways: either as a path to a VCF file along with one or more HPO terms, -or as a *Phenopacket*: +or as a *phenopacket*: -* ``-p | --phenopacket`` - path to Phenopacket in JSON format. +* ``-p | --phenopacket`` - path to a phenopacket file. We support *v1* and *v2* schemas and the file can be in JSON, YAML, or protobuf binary format. * ``-t | --phenotype-term`` - HPO term describing clinical condition of the proband, may be specified multiple times (e.g. ``--term HP:1234567 --term HP:9876543``). * ``--vcf`` - path to the input VCF file. @@ -84,6 +84,7 @@ SvAnna configuration * ``--promoter-length`` - number of bases pre-pended to a transcript and evaluated as a promoter region (default: ``2000``). * ``--promoter-fitness-gain`` - set to ``0.`` to score the promoter variants as strictly as coding variants or to ``1.`` to completely disregard the promoter variants (default: ``0.6``). +* ``-v`` - set logging output granularity. The option can be set multiple times (e.g. ``-vv``) to increase logging output. See the next section to learn more about the SvAnna :ref:`rstoutputformats`, and the :ref:`rstexamples` section to see how SvAnna prioritizes various SV classes. From 4b609077799071933415e748d6040cf40a2226eb Mon Sep 17 00:00:00 2001 From: Daniel Danis Date: Wed, 12 Apr 2023 12:42:00 -0400 Subject: [PATCH 7/8] Update CHANGELOG.rst Signed-off-by: Daniel Danis --- CHANGELOG.rst | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/CHANGELOG.rst b/CHANGELOG.rst index c4100cd3..55374904 100644 --- a/CHANGELOG.rst +++ b/CHANGELOG.rst @@ -3,8 +3,13 @@ Changelog ========= ------ -latest +v1.0.3 ------ +- ``SvAnna`` + - update dependency versions. +- ``svanna-cli`` + - Add support for *v1* and *v2* phenopacket in JSON, YAML, or protobuf binary formats. + - Rework the logging functionality to log all output to standard error output and to allow setting the logging granularity via ``-v`` CLI option. ------ v1.0.2 From a9560afad79f7c17f59625ee3d123162911c4491 Mon Sep 17 00:00:00 2001 From: Daniel Danis Date: Wed, 12 Apr 2023 12:43:21 -0400 Subject: [PATCH 8/8] Update versions to `1.0.3`. Signed-off-by: Daniel Danis --- docs/conf.py | 2 +- pom.xml | 2 +- svanna-benchmark/pom.xml | 2 +- svanna-cli/pom.xml | 2 +- .../src/main/java/org/monarchinitiative/svanna/cli/Main.java | 2 +- svanna-configuration/pom.xml | 2 +- svanna-core/pom.xml | 2 +- svanna-db/pom.xml | 2 +- svanna-ingest/pom.xml | 2 +- svanna-io/pom.xml | 2 +- svanna-model/pom.xml | 2 +- svanna-test/pom.xml | 2 +- 12 files changed, 12 insertions(+), 12 deletions(-) diff --git a/docs/conf.py b/docs/conf.py index b2e54e5d..2d13836b 100644 --- a/docs/conf.py +++ b/docs/conf.py @@ -56,7 +56,7 @@ # The short X.Y version. version = u'1.0' # The full version, including alpha/beta/rc tags. -release = u'1.0.2' +release = u'1.0.3' # The language for content autogenerated by Sphinx. Refer to documentation # for a list of supported languages. diff --git a/pom.xml b/pom.xml index 77ae0c8a..8f156007 100644 --- a/pom.xml +++ b/pom.xml @@ -16,7 +16,7 @@ svanna-benchmark org.monarchinitiative.svanna - 1.0.3-SNAPSHOT + 1.0.3 SvAnna diff --git a/svanna-benchmark/pom.xml b/svanna-benchmark/pom.xml index a3a082d9..b8b58f4b 100644 --- a/svanna-benchmark/pom.xml +++ b/svanna-benchmark/pom.xml @@ -3,7 +3,7 @@ SvAnna org.monarchinitiative.svanna - 1.0.3-SNAPSHOT + 1.0.3 4.0.0 diff --git a/svanna-cli/pom.xml b/svanna-cli/pom.xml index 7f373785..a48d43ae 100644 --- a/svanna-cli/pom.xml +++ b/svanna-cli/pom.xml @@ -3,7 +3,7 @@ SvAnna org.monarchinitiative.svanna - 1.0.3-SNAPSHOT + 1.0.3 4.0.0 svanna-cli diff --git a/svanna-cli/src/main/java/org/monarchinitiative/svanna/cli/Main.java b/svanna-cli/src/main/java/org/monarchinitiative/svanna/cli/Main.java index b0ec5a70..e16da4d3 100644 --- a/svanna-cli/src/main/java/org/monarchinitiative/svanna/cli/Main.java +++ b/svanna-cli/src/main/java/org/monarchinitiative/svanna/cli/Main.java @@ -17,7 +17,7 @@ footer = Main.FOOTER) public class Main implements Callable { - public static final String VERSION = "svanna-cli v1.0.2"; + public static final String VERSION = "svanna-cli v1.0.3"; public static final int WIDTH = 120; diff --git a/svanna-configuration/pom.xml b/svanna-configuration/pom.xml index 7a499b1b..1dc947de 100644 --- a/svanna-configuration/pom.xml +++ b/svanna-configuration/pom.xml @@ -3,7 +3,7 @@ SvAnna org.monarchinitiative.svanna - 1.0.3-SNAPSHOT + 1.0.3 4.0.0 diff --git a/svanna-core/pom.xml b/svanna-core/pom.xml index cf841316..eb949207 100644 --- a/svanna-core/pom.xml +++ b/svanna-core/pom.xml @@ -3,7 +3,7 @@ SvAnna org.monarchinitiative.svanna - 1.0.3-SNAPSHOT + 1.0.3 4.0.0 diff --git a/svanna-db/pom.xml b/svanna-db/pom.xml index 73a72f52..52878476 100644 --- a/svanna-db/pom.xml +++ b/svanna-db/pom.xml @@ -3,7 +3,7 @@ SvAnna org.monarchinitiative.svanna - 1.0.3-SNAPSHOT + 1.0.3 4.0.0 diff --git a/svanna-ingest/pom.xml b/svanna-ingest/pom.xml index a6194641..8933ac32 100644 --- a/svanna-ingest/pom.xml +++ b/svanna-ingest/pom.xml @@ -3,7 +3,7 @@ SvAnna org.monarchinitiative.svanna - 1.0.3-SNAPSHOT + 1.0.3 4.0.0 svanna-ingest diff --git a/svanna-io/pom.xml b/svanna-io/pom.xml index 8efa20cf..d103020c 100644 --- a/svanna-io/pom.xml +++ b/svanna-io/pom.xml @@ -3,7 +3,7 @@ SvAnna org.monarchinitiative.svanna - 1.0.3-SNAPSHOT + 1.0.3 4.0.0 diff --git a/svanna-model/pom.xml b/svanna-model/pom.xml index 98cfa424..87eca9af 100644 --- a/svanna-model/pom.xml +++ b/svanna-model/pom.xml @@ -3,7 +3,7 @@ SvAnna org.monarchinitiative.svanna - 1.0.3-SNAPSHOT + 1.0.3 4.0.0 diff --git a/svanna-test/pom.xml b/svanna-test/pom.xml index 0145d070..c5d40d9c 100644 --- a/svanna-test/pom.xml +++ b/svanna-test/pom.xml @@ -3,7 +3,7 @@ SvAnna org.monarchinitiative.svanna - 1.0.3-SNAPSHOT + 1.0.3 4.0.0