diff --git a/.github/workflows/maven.yml b/.github/workflows/maven.yml
index b2573776..aee2d8f5 100644
--- a/.github/workflows/maven.yml
+++ b/.github/workflows/maven.yml
@@ -12,14 +12,11 @@ on:
jobs:
build:
runs-on: ${{ matrix.os }}
- env:
- JDK_VERSION: ${{ matrix.jdk }}
strategy:
fail-fast: false
matrix:
os: [ windows-latest, macOS-latest, ubuntu-latest ]
- jdk: [ 11 ]
steps:
- uses: actions/checkout@v2
@@ -27,8 +24,8 @@ jobs:
- name: Set up JDK
uses: actions/setup-java@v2
with:
- java-version: '11'
+ java-version: '17'
distribution: 'adopt'
- name: Build with Maven
- run: ./mvnw -B verify --file pom.xml
+ run: ./mvnw --batch-mode verify
diff --git a/CHANGELOG.rst b/CHANGELOG.rst
index c4100cd3..55374904 100644
--- a/CHANGELOG.rst
+++ b/CHANGELOG.rst
@@ -3,8 +3,13 @@ Changelog
=========
------
-latest
+v1.0.3
------
+- ``SvAnna``
+ - update dependency versions.
+- ``svanna-cli``
+ - Add support for *v1* and *v2* phenopacket in JSON, YAML, or protobuf binary formats.
+ - Rework the logging functionality to log all output to standard error output and to allow setting the logging granularity via ``-v`` CLI option.
------
v1.0.2
diff --git a/docs/conf.py b/docs/conf.py
index b2e54e5d..2d13836b 100644
--- a/docs/conf.py
+++ b/docs/conf.py
@@ -56,7 +56,7 @@
# The short X.Y version.
version = u'1.0'
# The full version, including alpha/beta/rc tags.
-release = u'1.0.2'
+release = u'1.0.3'
# The language for content autogenerated by Sphinx. Refer to documentation
# for a list of supported languages.
diff --git a/docs/running.rst b/docs/running.rst
index f5e8bd94..d1876f46 100644
--- a/docs/running.rst
+++ b/docs/running.rst
@@ -34,9 +34,9 @@ Analysis input
##############
The input data can be specified in two ways: either as a path to a VCF file along with one or more HPO terms,
-or as a *Phenopacket*:
+or as a *phenopacket*:
-* ``-p | --phenopacket`` - path to Phenopacket in JSON format.
+* ``-p | --phenopacket`` - path to a phenopacket file. We support *v1* and *v2* schemas and the file can be in JSON, YAML, or protobuf binary format.
* ``-t | --phenotype-term`` - HPO term describing clinical condition of the proband, may be specified multiple times (e.g. ``--term HP:1234567 --term HP:9876543``).
* ``--vcf`` - path to the input VCF file.
@@ -84,6 +84,7 @@ SvAnna configuration
* ``--promoter-length`` - number of bases pre-pended to a transcript and evaluated as a promoter region (default: ``2000``).
* ``--promoter-fitness-gain`` - set to ``0.`` to score the promoter variants as strictly as coding variants
or to ``1.`` to completely disregard the promoter variants (default: ``0.6``).
+* ``-v`` - set logging output granularity. The option can be set multiple times (e.g. ``-vv``) to increase logging output.
See the next section to learn more about the SvAnna :ref:`rstoutputformats`,
and the :ref:`rstexamples` section to see how SvAnna prioritizes various SV classes.
diff --git a/pom.xml b/pom.xml
index 2fe0b855..8f156007 100644
--- a/pom.xml
+++ b/pom.xml
@@ -16,13 +16,13 @@
svanna-benchmark
org.monarchinitiative.svanna
- 1.0.2
+ 1.0.3
SvAnna
org.springframework.boot
spring-boot-starter-parent
- 2.6.10
+ 3.0.5
@@ -32,8 +32,9 @@
${java.version}
${java.version}
- 2.0.0-RC2
- 0.2.0
+ 2.0.0
+ 1.0.0-RC2
+ 0.2.5
1.4.200
@@ -101,6 +102,11 @@
silent-genes-io
${silent.genes.version}
+
+ org.phenopackets.phenopackettools
+ phenopacket-tools-io
+ ${phenopacket-tools.version}
+
org.phenopackets
@@ -125,7 +131,7 @@
com.github.samtools
htsjdk
- 2.23.0
+ 3.0.5
org.tukaani
diff --git a/svanna-benchmark/pom.xml b/svanna-benchmark/pom.xml
index 837e72e9..b8b58f4b 100644
--- a/svanna-benchmark/pom.xml
+++ b/svanna-benchmark/pom.xml
@@ -3,7 +3,7 @@
SvAnna
org.monarchinitiative.svanna
- 1.0.2
+ 1.0.3
4.0.0
diff --git a/svanna-cli/pom.xml b/svanna-cli/pom.xml
index 565c07bd..a48d43ae 100644
--- a/svanna-cli/pom.xml
+++ b/svanna-cli/pom.xml
@@ -3,7 +3,7 @@
SvAnna
org.monarchinitiative.svanna
- 1.0.2
+ 1.0.3
4.0.0
svanna-cli
@@ -19,6 +19,10 @@
svanna-configuration
${project.parent.version}
+
+ org.phenopackets.phenopackettools
+ phenopacket-tools-io
+
info.picocli
picocli
@@ -27,21 +31,11 @@
org.freemarker
freemarker
-
org.apache.commons
commons-csv
-
- org.phenopackets
- phenopacket-schema
-
-
- com.google.protobuf
- protobuf-java-util
-
-
org.monarchinitiative.svanna
@@ -63,33 +57,7 @@
org.springframework.boot
spring-boot-maven-plugin
-
- org.apache.maven.plugins
- maven-resources-plugin
-
-
- copy-resources
- validate
-
- copy-resources
-
-
- ${project.build.directory}/assembly-resources
-
-
- src/examples
- true
-
- example.csv
- example.vcf
- run_examples.sh
-
-
-
-
-
-
-
+
org.apache.maven.plugins
maven-assembly-plugin
diff --git a/svanna-cli/src/assemble/distribution.xml b/svanna-cli/src/assemble/distribution.xml
index e196be0c..6c964992 100644
--- a/svanna-cli/src/assemble/distribution.xml
+++ b/svanna-cli/src/assemble/distribution.xml
@@ -24,12 +24,12 @@
svanna-cli-${version}.jar
+
- ${project.build.directory}/assembly-resources
+ ${project.basedir}/src/examples
./examples
- example.vcf
- run_examples.sh
+ **/**
diff --git a/svanna-cli/src/examples/example-phenopacket.json b/svanna-cli/src/examples/example-phenopacket.v1.json
similarity index 79%
rename from svanna-cli/src/examples/example-phenopacket.json
rename to svanna-cli/src/examples/example-phenopacket.v1.json
index b49994e8..692c6868 100644
--- a/svanna-cli/src/examples/example-phenopacket.json
+++ b/svanna-cli/src/examples/example-phenopacket.v1.json
@@ -1,7 +1,7 @@
{
- "id": "example",
+ "id": "example-phenopacket",
"subject": {
- "id": "example",
+ "id": "Sample",
"ageAtCollection": {
"age": "P2Y"
}
@@ -12,11 +12,6 @@
"label": "Reduced von Willebrand factor activity"
}
}],
- "htsFiles": [{
- "uri": "file:///***/***/example.vcf",
- "htsFormat": "VCF",
- "genomeAssembly": "hg38"
- }],
"metaData": {
"created": "2021-07-13T15:08:53.846Z",
"createdBy": "ExampleOrg:ExampleCurator",
diff --git a/svanna-cli/src/examples/example-phenopacket.v2.json b/svanna-cli/src/examples/example-phenopacket.v2.json
new file mode 100644
index 00000000..2bc8291b
--- /dev/null
+++ b/svanna-cli/src/examples/example-phenopacket.v2.json
@@ -0,0 +1,30 @@
+{
+ "id": "example-phenopacket",
+ "subject": {
+ "id": "Sample",
+ "timeAtLastEncounter": {
+ "age": {
+ "iso8601duration": "P2Y"
+ }
+ }
+ },
+ "phenotypicFeatures": [{
+ "type": {
+ "id": "HP:0008330",
+ "label": "Reduced von Willebrand factor activity"
+ }
+ }],
+ "metaData": {
+ "created": "2021-07-13T15:08:53.846Z",
+ "createdBy": "ExampleOrg:ExampleCurator",
+ "resources": [{
+ "id": "hp",
+ "name": "human phenotype ontology",
+ "url": "http://purl.obolibrary.org/obo/hp.owl",
+ "version": "unknown HPO version",
+ "namespacePrefix": "HP",
+ "iriPrefix": "http://purl.obolibrary.org/obo/HP_"
+ }],
+ "phenopacketSchemaVersion": "2.0.0"
+ }
+}
diff --git a/svanna-cli/src/main/java/org/monarchinitiative/svanna/cli/Main.java b/svanna-cli/src/main/java/org/monarchinitiative/svanna/cli/Main.java
index b0ec5a70..e16da4d3 100644
--- a/svanna-cli/src/main/java/org/monarchinitiative/svanna/cli/Main.java
+++ b/svanna-cli/src/main/java/org/monarchinitiative/svanna/cli/Main.java
@@ -17,7 +17,7 @@
footer = Main.FOOTER)
public class Main implements Callable {
- public static final String VERSION = "svanna-cli v1.0.2";
+ public static final String VERSION = "svanna-cli v1.0.3";
public static final int WIDTH = 120;
diff --git a/svanna-cli/src/main/java/org/monarchinitiative/svanna/cli/cmd/AnalysisData.java b/svanna-cli/src/main/java/org/monarchinitiative/svanna/cli/cmd/AnalysisData.java
new file mode 100644
index 00000000..24e42aa6
--- /dev/null
+++ b/svanna-cli/src/main/java/org/monarchinitiative/svanna/cli/cmd/AnalysisData.java
@@ -0,0 +1,50 @@
+package org.monarchinitiative.svanna.cli.cmd;
+
+import org.monarchinitiative.phenol.ontology.data.TermId;
+
+import java.nio.file.Path;
+import java.util.List;
+import java.util.Objects;
+
+/**
+ * SvAnna requires these inputs for the analysis.
+ */
+class AnalysisData {
+
+ private final List phenotypeTerms;
+ private final Path vcf;
+
+ AnalysisData(List phenotypeTerms, Path vcf) {
+ this.phenotypeTerms = phenotypeTerms;
+ this.vcf = vcf;
+ }
+
+ List phenotypeTerms() {
+ return phenotypeTerms;
+ }
+
+ Path vcf() {
+ return vcf;
+ }
+
+ @Override
+ public boolean equals(Object o) {
+ if (this == o) return true;
+ if (o == null || getClass() != o.getClass()) return false;
+ AnalysisData that = (AnalysisData) o;
+ return Objects.equals(phenotypeTerms, that.phenotypeTerms) && Objects.equals(vcf, that.vcf);
+ }
+
+ @Override
+ public int hashCode() {
+ return Objects.hash(phenotypeTerms, vcf);
+ }
+
+ @Override
+ public String toString() {
+ return "AnalysisData{" +
+ "phenotypeTerms=" + phenotypeTerms +
+ ", vcf=" + vcf +
+ '}';
+ }
+}
diff --git a/svanna-cli/src/main/java/org/monarchinitiative/svanna/cli/cmd/AnalysisInputException.java b/svanna-cli/src/main/java/org/monarchinitiative/svanna/cli/cmd/AnalysisInputException.java
new file mode 100644
index 00000000..872fe6d7
--- /dev/null
+++ b/svanna-cli/src/main/java/org/monarchinitiative/svanna/cli/cmd/AnalysisInputException.java
@@ -0,0 +1,28 @@
+package org.monarchinitiative.svanna.cli.cmd;
+
+import org.monarchinitiative.svanna.core.SvAnnaRuntimeException;
+
+/**
+ * An exception thrown if inputs for the analysis are incomplete or otherwise invalid.
+ */
+class AnalysisInputException extends SvAnnaRuntimeException {
+ AnalysisInputException() {
+ super();
+ }
+
+ AnalysisInputException(String message) {
+ super(message);
+ }
+
+ AnalysisInputException(String message, Throwable cause) {
+ super(message, cause);
+ }
+
+ AnalysisInputException(Throwable cause) {
+ super(cause);
+ }
+
+ AnalysisInputException(String message, Throwable cause, boolean enableSuppression, boolean writableStackTrace) {
+ super(message, cause, enableSuppression, writableStackTrace);
+ }
+}
diff --git a/svanna-cli/src/main/java/org/monarchinitiative/svanna/cli/cmd/PhenopacketAnalysisDataUtil.java b/svanna-cli/src/main/java/org/monarchinitiative/svanna/cli/cmd/PhenopacketAnalysisDataUtil.java
new file mode 100644
index 00000000..7a0b9b45
--- /dev/null
+++ b/svanna-cli/src/main/java/org/monarchinitiative/svanna/cli/cmd/PhenopacketAnalysisDataUtil.java
@@ -0,0 +1,175 @@
+package org.monarchinitiative.svanna.cli.cmd;
+
+import com.google.protobuf.Message;
+import org.monarchinitiative.phenol.base.PhenolRuntimeException;
+import org.monarchinitiative.phenol.ontology.data.TermId;
+import org.phenopackets.phenopackettools.core.PhenopacketElement;
+import org.phenopackets.phenopackettools.core.PhenopacketSchemaVersion;
+import org.phenopackets.phenopackettools.io.PhenopacketParser;
+import org.phenopackets.phenopackettools.io.PhenopacketParserFactory;
+import org.phenopackets.phenopackettools.util.format.SniffException;
+import org.phenopackets.schema.v1.Phenopacket;
+import org.phenopackets.schema.v1.core.HtsFile;
+import org.phenopackets.schema.v2.core.File;
+import org.phenopackets.schema.v2.core.PhenotypicFeature;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+import java.io.IOException;
+import java.net.URI;
+import java.net.URISyntaxException;
+import java.nio.file.Path;
+import java.util.*;
+import java.util.function.Function;
+import java.util.function.Predicate;
+import java.util.stream.Collectors;
+import java.util.stream.IntStream;
+
+/**
+ * Utility methods for reading {@link AnalysisData} from v1 or v2 phenopacket.
+ */
+class PhenopacketAnalysisDataUtil {
+
+ private static final Logger LOGGER = LoggerFactory.getLogger(PhenopacketAnalysisDataUtil.class);
+
+ private PhenopacketAnalysisDataUtil() {
+ }
+
+ static AnalysisData parseV2Phenopacket(Path phenopacketPath,
+ Path cliVcfPath,
+ PhenopacketParserFactory parserFactory) throws AnalysisInputException {
+ Message message = parseMessage(phenopacketPath, parserFactory, PhenopacketSchemaVersion.V2);
+
+ if (message instanceof org.phenopackets.schema.v2.Phenopacket) {
+ org.phenopackets.schema.v2.Phenopacket pp = (org.phenopackets.schema.v2.Phenopacket) message;
+
+ // (1) Phenotype features
+ List phenotypeTermIds = new ArrayList<>();
+ boolean reportExcludedFeature = true;
+ List phenotypicFeaturesList = pp.getPhenotypicFeaturesList();
+ for (int i = 0; i < phenotypicFeaturesList.size(); i++) {
+ PhenotypicFeature pf = phenotypicFeaturesList.get(i);
+ // SvAnna does not support excluded features.
+ // As a matter of courtesy, let's warn the user about skipping the excluded features.
+ if (pf.getExcluded()) {
+ if (reportExcludedFeature) {
+ reportExcludedFeature = false;
+ String excludedFeatureIndices = extractIndicesOfNegatedFeatures(phenotypicFeaturesList, PhenotypicFeature::getExcluded);
+ LOGGER.warn("Skipping unsupported excluded phenotype features {}", excludedFeatureIndices);
+ }
+ } else {
+ try {
+ TermId termId = TermId.of(pf.getType().getId());
+ phenotypeTermIds.add(termId);
+ } catch (PhenolRuntimeException pre) {
+ LOGGER.warn("Skipping phenotype feature #{} due to invalid identifier {}", i, pf.getType().getId());
+ }
+ }
+ }
+
+ // (2) VCF path
+ // We take the 1st VCF file
+ List vcfFiles = pp.getFilesList().stream()
+ .filter(f -> "vcf".equalsIgnoreCase(f.getFileAttributesOrDefault("fileFormat", null)))
+ .collect(Collectors.toList());
+ Path vcf = getVcfPath(cliVcfPath, vcfFiles, File::getUri);
+ return new AnalysisData(phenotypeTermIds, vcf);
+ } else {
+ // Shouldn't really happen but let's make sure we can report a meaningful error.
+ throw new AnalysisInputException(String.format("Unexpected instance %s!=%s", message.getClass().getName(), org.phenopackets.schema.v2.Phenopacket.class.getName()));
+ }
+
+ }
+
+ static AnalysisData parseV1Phenopacket(Path phenopacketPath,
+ Path cliVcfPath,
+ PhenopacketParserFactory parserFactory) throws AnalysisInputException {
+ Message message = parseMessage(phenopacketPath, parserFactory, PhenopacketSchemaVersion.V1);
+ if (message instanceof Phenopacket) {
+ Phenopacket pp = (Phenopacket) message;
+
+ // (1) Phenotype features
+ List phenotypeTermIds = new ArrayList<>();
+ boolean reportExcludedFeature = true;
+ List phenotypicFeaturesList = pp.getPhenotypicFeaturesList();
+ for (int i = 0; i < phenotypicFeaturesList.size(); i++) {
+ org.phenopackets.schema.v1.core.PhenotypicFeature pf = phenotypicFeaturesList.get(i);
+ // SvAnna does not support excluded features.
+ // As a matter of courtesy, let's warn the user about skipping the excluded features.
+ if (pf.getNegated()) {
+ if (reportExcludedFeature) {
+ reportExcludedFeature = false;
+ String excludedFeatureIndices = extractIndicesOfNegatedFeatures(phenotypicFeaturesList, org.phenopackets.schema.v1.core.PhenotypicFeature::getNegated);
+ LOGGER.warn("Skipping unsupported excluded phenotype features {}", excludedFeatureIndices);
+ }
+ } else {
+ try {
+ TermId termId = TermId.of(pf.getType().getId());
+ phenotypeTermIds.add(termId);
+ } catch (PhenolRuntimeException pre) {
+ LOGGER.warn("Skipping phenotype feature #{} due to invalid identifier {}", i, pf.getType().getId());
+ }
+ }
+ }
+
+ // (2) VCF path
+ // We take the 1st VCF file
+ List vcfFiles = pp.getHtsFilesList().stream()
+ .filter(f -> f.getHtsFormat().equals(HtsFile.HtsFormat.VCF))
+ .collect(Collectors.toList());
+ Path vcf = getVcfPath(cliVcfPath, vcfFiles, HtsFile::getUri);
+ return new AnalysisData(phenotypeTermIds, vcf);
+ } else {
+ // Again, shouldn't really happen but let's make sure we can report a meaningful error.
+ throw new AnalysisInputException(String.format("Unexpected instance %s!=%s", message.getClass().getName(), org.phenopackets.schema.v2.Phenopacket.class.getName()));
+ }
+ }
+
+ private static Message parseMessage(Path phenopacketPath,
+ PhenopacketParserFactory parserFactory,
+ PhenopacketSchemaVersion schemaVersion) throws AnalysisInputException {
+ PhenopacketParser parser = parserFactory.forFormat(schemaVersion);
+
+ Message message;
+ try {
+ message = parser.parse(PhenopacketElement.PHENOPACKET, phenopacketPath);
+ } catch (IOException | SniffException e) {
+ throw new AnalysisInputException(e);
+ }
+ return message;
+ }
+
+ private static Path getVcfPath(Path cliVcfPath,
+ List files,
+ Function uriExtractor) throws AnalysisInputException {
+ if (files.isEmpty()) {
+ if (cliVcfPath == null)
+ throw new AnalysisInputException("VCF file was found neither in CLI arguments nor in the Phenopacket. Aborting.");
+ else
+ return cliVcfPath;
+ } else if (files.size() > 1) {
+ String fileUris = files.stream()
+ .map(uriExtractor)
+ .collect(Collectors.joining(", ", "[", "]"));
+ throw new AnalysisInputException(String.format("There must be exactly 1 VCF file in the phenopacket but got %s: %s", files.size(), fileUris));
+ } else {
+ String uriStr = uriExtractor.apply(files.get(0));
+ try {
+ URI uri = new URI(uriStr);
+ return Path.of(uri);
+ } catch (URISyntaxException e) {
+ LOGGER.warn("Invalid URI `{}`: {}", uriStr, e.getMessage());
+ LOGGER.debug("Invalid URI `{}`: {}", uriStr, e.getMessage(), e);
+ throw new AnalysisInputException(e);
+ }
+ }
+ }
+
+ private static String extractIndicesOfNegatedFeatures(List phenotypicFeaturesList, Predicate isNegated) {
+ return IntStream.range(0, phenotypicFeaturesList.size())
+ .filter(idx -> isNegated.test(phenotypicFeaturesList.get(idx)))
+ .boxed()
+ .map(Objects::toString)
+ .collect(Collectors.joining(", ", "[", "]"));
+ }
+}
diff --git a/svanna-cli/src/main/java/org/monarchinitiative/svanna/cli/cmd/PhenopacketImporter.java b/svanna-cli/src/main/java/org/monarchinitiative/svanna/cli/cmd/PhenopacketImporter.java
deleted file mode 100644
index 3d7a1cb2..00000000
--- a/svanna-cli/src/main/java/org/monarchinitiative/svanna/cli/cmd/PhenopacketImporter.java
+++ /dev/null
@@ -1,166 +0,0 @@
-package org.monarchinitiative.svanna.cli.cmd;
-
-import com.google.protobuf.InvalidProtocolBufferException;
-import com.google.protobuf.util.JsonFormat;
-import org.monarchinitiative.phenol.base.PhenolRuntimeException;
-import org.monarchinitiative.phenol.ontology.data.Ontology;
-import org.monarchinitiative.phenol.ontology.data.TermId;
-import org.phenopackets.schema.v1.Phenopacket;
-import org.phenopackets.schema.v1.core.HtsFile;
-import org.phenopackets.schema.v1.core.PhenotypicFeature;
-import org.slf4j.Logger;
-import org.slf4j.LoggerFactory;
-
-import java.io.BufferedReader;
-import java.io.IOException;
-import java.nio.file.Files;
-import java.nio.file.Path;
-import java.nio.file.Paths;
-import java.util.ArrayList;
-import java.util.List;
-import java.util.stream.Collectors;
-
-
-/**
- * This class ingests a phenopacket, which is required to additionally contain the
- * path of a VCF file that will be used for the analysis.
- * @author Peter Robinson
- */
-public class PhenopacketImporter {
-
- private static final Logger logger = LoggerFactory.getLogger(PhenopacketImporter.class);
- private static final JsonFormat.Parser JSON_PARSER = JsonFormat.parser();
- /** The Phenopacket that represents the individual being sequenced in the current run. */
- private final Phenopacket phenoPacket;
- /** Object representing the VCF file with variants identified in the subject of this Phenopacket. */
- private HtsFile vcfFile;
- /** Reference to HPO ontology */
- private final Ontology hpo;
-
- /**
- * Factory method to obtain a PhenopacketImporter object starting from a phenopacket in Json format
- *
- * @param phenopacketPath -- path to the phenopacket
- * @return {@link PhenopacketImporter} object corresponding to the PhenoPacket
- */
- public static PhenopacketImporter fromJson(Path phenopacketPath, Ontology ontology) throws IOException {
- logger.trace("Importing Phenopacket: " + phenopacketPath);
- if (!phenopacketPath.toFile().isFile()) {
- logger.error("Could not find phenopacket file at " + phenopacketPath);
- throw new IOException("Could not find phenopacket file at " + phenopacketPath);
- }
- try {
- Phenopacket phenopacket = readPhenopacket(phenopacketPath);
- return new PhenopacketImporter(phenopacket, ontology);
- } catch (InvalidProtocolBufferException e) {
- logger.error("Malformed phenopacket: " + e.getMessage());
- throw new IOException("Could not load phenopacket (" + phenopacketPath + "): " + e.getMessage());
- } catch (IOException e) {
- throw new IOException("I/O Error: Could not load phenopacket (" + phenopacketPath + "): " + e.getMessage(), e);
- }
- }
-
- public static Phenopacket readPhenopacket(Path phenopacketPath) throws IOException {
- logger.info("Reading phenopacket from `{}`", phenopacketPath.toAbsolutePath());
- try (BufferedReader reader = Files.newBufferedReader(phenopacketPath)) {
- String phenopacketJsonString = reader.lines().collect(Collectors.joining(System.lineSeparator()));
- Phenopacket.Builder phenoPacketBuilder = Phenopacket.newBuilder();
- JSON_PARSER.merge(phenopacketJsonString, phenoPacketBuilder);
- return phenoPacketBuilder.build();
- }
- }
-
- private PhenopacketImporter(Phenopacket ppack, Ontology ontology){
- this.phenoPacket=ppack;
- this.hpo=ontology;
- extractVcfData();
- }
-
- public boolean hasVcf() { return this.vcfFile !=null; }
-
- public List getHpoTerms() {
- List builder = new ArrayList<>();
- for (PhenotypicFeature feature : phenoPacket.getPhenotypicFeaturesList()) {
- if (feature.getNegated()) continue;
- String id = feature.getType().getId();
- TermId tid = TermId.of(id);
- if (! hpo.getTermMap().containsKey(tid)) {
- logger.error("Could not identify HPO term id {}.",tid.getValue());
- logger.error("Please check the input file and update to the latest hp.obo file. ");
- throw new PhenolRuntimeException("Could not identify HPO term id: "+tid.getValue());
- } else if (hpo.getObsoleteTermIds().contains(tid)) {
- TermId current = hpo.getPrimaryTermId(tid);
- builder.add(current);
- logger.error("Replacing obsolete HPO term id {} with current id {}.",tid.getValue(),current.getValue());
- } else {
- builder.add(tid);
- }
- }
- return builder;
- }
-
-
- public List getNegatedHpoTerms() {
- List builder = new ArrayList<>();
- for (PhenotypicFeature feature : phenoPacket.getPhenotypicFeaturesList()) {
- if (! feature.getNegated()) continue;
- String id = feature.getType().getId();
- TermId tid = TermId.of(id);
- if (! hpo.getTermMap().containsKey(tid)) {
- logger.error("Could not identify HPO term id {}.",tid.getValue());
- logger.error("Please check the input file and update to the latest hp.obo file. ");
- throw new PhenolRuntimeException("Could not identify HPO term id: "+tid.getValue());
- } else if (hpo.getObsoleteTermIds().contains(tid)) {
- TermId current = hpo.getPrimaryTermId(tid);
- builder.add(current);
- logger.error("Replacing obsolete HPO term id {} with current id {}.", tid.getValue(), current.getValue());
- } else {
- builder.add(tid);
- }
- }
- return builder;
- }
-
- /**
- * The path to the VCF file may be a string such as file:/path/to/examples/BBS1.vcf
- * In this case, remove the prefix 'path:', otherwise return the original URI
- * @return URI of VCF file mentioned in the Phenopacket
- */
-
-
- public HtsFile getVcfFile() {
- return this.vcfFile;
- }
-
- public Path getVcfPath() {
- if (this.vcfFile == null) {
- return null;
- }
- String uri = this.vcfFile.getUri().startsWith("file:") ?
- this.vcfFile.getUri().substring(5) :
- this.vcfFile.getUri();
- return Paths.get(uri);
- }
-
- public String getSampleName() {
- return phenoPacket.getSubject().getId();
- }
-
-
- /** This method extracts the VCF file and the corresponding GenomeBuild. We assume that
- * the phenopacket contains a single VCF file and that this file is for a single person. */
- private void extractVcfData() {
- List htsFileList = phenoPacket.getHtsFilesList();
- if (htsFileList.size() > 1 ) {
- logger.error("Warning: multiple HTsFiles associated with this phenopacket");
- logger.error("Warning: we will return the path to the first VCF file we find");
- } else if (htsFileList.isEmpty()) {
- return;
- }
- for (HtsFile htsFile : htsFileList) {
- if (htsFile.getHtsFormat().equals(HtsFile.HtsFormat.VCF)) {
- this.vcfFile = htsFile;
- }
- }
- }
-}
diff --git a/svanna-cli/src/main/java/org/monarchinitiative/svanna/cli/cmd/PrioritizeCommand.java b/svanna-cli/src/main/java/org/monarchinitiative/svanna/cli/cmd/PrioritizeCommand.java
index fad82c13..858bf87a 100644
--- a/svanna-cli/src/main/java/org/monarchinitiative/svanna/cli/cmd/PrioritizeCommand.java
+++ b/svanna-cli/src/main/java/org/monarchinitiative/svanna/cli/cmd/PrioritizeCommand.java
@@ -24,15 +24,12 @@
import org.monarchinitiative.phenol.ontology.data.Term;
import org.monarchinitiative.phenol.ontology.data.TermId;
import org.monarchinitiative.svart.assembly.GenomicAssembly;
-import org.phenopackets.schema.v1.Phenopacket;
-import org.phenopackets.schema.v1.core.HtsFile;
+import org.phenopackets.phenopackettools.io.PhenopacketParserFactory;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import picocli.CommandLine;
import java.io.IOException;
-import java.net.URI;
-import java.net.URISyntaxException;
import java.nio.file.Files;
import java.nio.file.Path;
import java.text.NumberFormat;
@@ -63,12 +60,12 @@ public class PrioritizeCommand extends SvAnnaCommand {
public InputOptions inputOptions = new InputOptions();
public static class InputOptions {
@CommandLine.Option(names = {"-p", "--phenopacket"},
- description = "Path to phenopacket.")
+ description = "Path to v1 or v2 phenopacket in JSON, YAML or Protobuf format.")
public Path phenopacket = null;
@CommandLine.Option(names = {"-t", "--phenotype-term"},
description = "HPO term ID(s). Can be provided multiple times.")
- public List hpoTermIdList = List.of();
+ public List hpoTermIdList = null;
@CommandLine.Option(names = {"--vcf"},
description = "Path to the input VCF file.")
@@ -130,7 +127,7 @@ public static class OutputConfig {
}
@Override
- public Integer call() {
+ public Integer execute() {
int status = checkArguments();
if (status != 0)
return status;
@@ -139,14 +136,11 @@ public Integer call() {
DataProperties dataProperties = dataProperties();
SvAnnaProperties svAnnaProperties = SvAnnaProperties.of(svannaDataDirectory, prioritizationProperties, dataProperties);
- Optional analysisData = parseAnalysisData();
- if (analysisData.isEmpty())
- return 1;
-
try {
- runAnalysis(analysisData.get(), svAnnaProperties);
+ AnalysisData analysisData = parseAnalysisData();
+ runAnalysis(analysisData, svAnnaProperties);
} catch (InterruptedException | ExecutionException | IOException | InvalidResourceException |
- MissingResourceException | UndefinedResourceException e) {
+ MissingResourceException | UndefinedResourceException | AnalysisInputException e) {
LOGGER.error("Error: {}", e.getMessage());
LOGGER.debug("Error: {}", e.getMessage(), e);
return 1;
@@ -156,76 +150,50 @@ public Integer call() {
return 0;
}
- private Optional parseAnalysisData() {
- Path vcf;
- List phenotypeTermIds;
- if (inputOptions.vcf != null) { // VCF & CLI
- vcf = inputOptions.vcf;
- phenotypeTermIds = inputOptions.hpoTermIdList.stream()
+ private AnalysisData parseAnalysisData() throws AnalysisInputException {
+ if (inputOptions.hpoTermIdList != null) { // CLI
+ LOGGER.info("Using {} phenotype features supplied via CLI", inputOptions.hpoTermIdList.size());
+ Path vcf = inputOptions.vcf;
+ List phenotypeTermIds = inputOptions.hpoTermIdList.stream()
.map(TermId::of)
.collect(Collectors.toList());
- } else { // phenopacket
- try {
- Phenopacket phenopacket = PhenopacketImporter.readPhenopacket(inputOptions.phenopacket);
- phenotypeTermIds = phenopacket.getPhenotypicFeaturesList().stream()
- .map(pf -> TermId.of(pf.getType().getId()))
- .collect(Collectors.toList());
-
- Optional vcfFilePathOptional = getVcfFilePath(phenopacket);
- if (vcfFilePathOptional.isEmpty()) {
- if (inputOptions.vcf == null) {
- LOGGER.error("VCF file was found neither in CLI arguments nor in the Phenopacket. Aborting.");
- return Optional.empty();
- } else {
- vcf = inputOptions.vcf;
- }
- } else {
- LOGGER.info("VCF file was found in both CLI arguments and in the Phenopacket. Using the file from CLI: `{}`", inputOptions.vcf);
- vcf = inputOptions.vcf;
- }
+ return new AnalysisData(phenotypeTermIds, vcf);
+ } else { // Phenopacket
+ LOGGER.info("Using phenotype features from a phenopacket at {}", inputOptions.phenopacket.toAbsolutePath());
+ PhenopacketParserFactory parserFactory = PhenopacketParserFactory.getInstance();
- } catch (IOException e) {
- LOGGER.error("Error reading phenopacket at `{}`: {}", inputOptions.phenopacket, e.getMessage());
- return Optional.empty();
+ // try v2 first
+ try {
+ LOGGER.debug("Trying v2 format first..");
+ AnalysisData analysisData = PhenopacketAnalysisDataUtil.parseV2Phenopacket(inputOptions.phenopacket, inputOptions.vcf, parserFactory);
+ LOGGER.debug("Success!");
+ return analysisData;
+ } catch (AnalysisInputException e) {
+ // swallow and try v1
+ LOGGER.debug("Unable to decode {} as v2 phenopacket, falling back to v1", inputOptions.phenopacket.toAbsolutePath());
}
- }
- return Optional.of(new AnalysisData(phenotypeTermIds, vcf));
- }
-
- private static Optional getVcfFilePath(Phenopacket phenopacket) {
- // There should be exactly one VCF file
- LinkedList vcfFiles = phenopacket.getHtsFilesList().stream()
- .filter(htsFile -> htsFile.getHtsFormat().equals(HtsFile.HtsFormat.VCF))
- .distinct()
- .collect(Collectors.toCollection(LinkedList::new));
- if (vcfFiles.isEmpty()) {
- LOGGER.info("VCF file was not found in Phenopacket. Expecting to find the file among the CLI arguments");
- return Optional.empty();
+ // try v1 or fail
+ AnalysisData analysisData = PhenopacketAnalysisDataUtil.parseV1Phenopacket(inputOptions.phenopacket, inputOptions.vcf, parserFactory);
+ LOGGER.debug("Success!");
+ return analysisData;
}
- if (vcfFiles.size() > 1)
- LOGGER.warn("Found >1 VCF files. Using the first one.");
-
- // The VCF file should have a proper URI
- HtsFile vcf = vcfFiles.getFirst();
- try {
- URI uri = new URI(vcf.getUri());
- return Optional.of(Path.of(uri));
- } catch (URISyntaxException e) {
- LOGGER.warn("Invalid URI `{}`: {}", vcf.getUri(), e.getMessage());
- return Optional.empty();
- }
}
protected int checkArguments() {
- if ((inputOptions.vcf == null) == (inputOptions.phenopacket == null)) {
- LOGGER.error("Path to a VCF file or to a phenopacket must be supplied");
+ if (inputOptions.hpoTermIdList == null && inputOptions.phenopacket == null) {
+ LOGGER.error("No phenotype features provided. Use the CLI or a phenopacket");
return 1;
}
- if (inputOptions.phenopacket != null && !inputOptions.hpoTermIdList.isEmpty()) {
- LOGGER.error("Passing HPO terms both through CLI and Phenopacket is not supported");
+ if (inputOptions.hpoTermIdList != null && inputOptions.phenopacket != null) {
+ LOGGER.error("Passing HPO terms both through CLI and Phenopacket is not supported. Choose one");
+ return 1;
+ }
+
+ if (inputOptions.vcf == null && inputOptions.phenopacket == null) {
+ LOGGER.error("Path to a VCF file or to a phenopacket must be supplied");
return 1;
}
@@ -356,22 +324,4 @@ private AnalysisParameters getAnalysisParameters(AnalysisData analysisData, SvAn
return analysisParameters;
}
-
- private static class AnalysisData {
- private final List phenotypeTerms;
- private final Path vcf;
-
- private AnalysisData(List phenotypeTerms, Path vcf) {
- this.phenotypeTerms = phenotypeTerms;
- this.vcf = vcf;
- }
-
- public List phenotypeTerms() {
- return phenotypeTerms;
- }
-
- public Path vcf() {
- return vcf;
- }
- }
}
diff --git a/svanna-cli/src/main/java/org/monarchinitiative/svanna/cli/cmd/SvAnnaCommand.java b/svanna-cli/src/main/java/org/monarchinitiative/svanna/cli/cmd/SvAnnaCommand.java
index df5bee08..79ca0485 100644
--- a/svanna-cli/src/main/java/org/monarchinitiative/svanna/cli/cmd/SvAnnaCommand.java
+++ b/svanna-cli/src/main/java/org/monarchinitiative/svanna/cli/cmd/SvAnnaCommand.java
@@ -1,5 +1,7 @@
package org.monarchinitiative.svanna.cli.cmd;
+import ch.qos.logback.classic.Level;
+import ch.qos.logback.classic.LoggerContext;
import org.monarchinitiative.svanna.cli.writer.ResultWriterFactory;
import org.monarchinitiative.svanna.configuration.SvAnnaBuilder;
import org.monarchinitiative.svanna.configuration.exception.InvalidResourceException;
@@ -16,9 +18,11 @@
import org.slf4j.LoggerFactory;
import picocli.CommandLine;
+import java.io.BufferedInputStream;
import java.io.IOException;
import java.io.InputStream;
import java.nio.file.Path;
+import java.util.Objects;
import java.util.Properties;
import java.util.concurrent.Callable;
@@ -37,6 +41,11 @@ public abstract class SvAnnaCommand implements Callable {
private static final double TAD_STABILITY_THRESHOLD = 80.;
// -----------------------------------------------------------------------------------------------------------------
+ @CommandLine.Option(names = {"-v"},
+ description = {"Specify multiple -v options to increase verbosity.",
+ "For example, `-v -v -v` or `-vvv`"})
+ protected boolean[] verbosity = {};
+
@CommandLine.Option(names = {"-d", "--data-directory"},
paramLabel = "path/to/datadir",
required = true,
@@ -76,6 +85,17 @@ private static Properties readProperties() {
return properties;
}
+ @Override
+ public Integer call() {
+ // (0) Setup verbosity and print banner.
+ setupLoggingAndPrintBanner();
+
+ // (1) Run the command functionality.
+ return execute();
+ }
+
+ protected abstract Integer execute();
+
protected SvAnna bootstrapSvAnna(SvAnnaProperties svAnnaProperties) throws MissingResourceException, InvalidResourceException, UndefinedResourceException {
LOGGER.info("Spooling up SvAnna v{} using resources in {}", SVANNA_VERSION, svAnnaProperties.dataDirectory().toAbsolutePath());
return SvAnnaBuilder.builder(svAnnaProperties)
@@ -103,4 +123,44 @@ protected static ResultWriterFactory resultWriterFactory(SvAnna svAnna) {
return new ResultWriterFactory(overlapper, svAnna.annotationDataService(), svAnna.phenotypeDataService());
}
+ private void setupLoggingAndPrintBanner() {
+ Level level = parseVerbosityLevel();
+
+ LoggerContext context = (LoggerContext) LoggerFactory.getILoggerFactory();
+ context.getLogger(Logger.ROOT_LOGGER_NAME).setLevel(level);
+
+ printBanner();
+ }
+
+ private static String readBanner() {
+ try (InputStream is = new BufferedInputStream(Objects.requireNonNull(SvAnnaCommand.class.getResourceAsStream("/banner.txt")))) {
+ return new String(is.readAllBytes());
+ } catch (IOException e) {
+ // swallow
+ return "";
+ }
+ }
+
+ private Level parseVerbosityLevel() {
+ int verbosity = 0;
+ for (boolean a : this.verbosity) {
+ if (a) verbosity++;
+ }
+
+ switch (verbosity) {
+ case 0:
+ return Level.INFO;
+ case 1:
+ return Level.DEBUG;
+ case 2:
+ return Level.TRACE;
+ default:
+ return Level.ALL;
+ }
+ }
+
+ private static void printBanner() {
+ System.err.println(readBanner());
+ }
+
}
diff --git a/svanna-cli/src/main/resources/banner.txt b/svanna-cli/src/main/resources/banner.txt
new file mode 100644
index 00000000..7777b36c
--- /dev/null
+++ b/svanna-cli/src/main/resources/banner.txt
@@ -0,0 +1,10 @@
+
+ _____ ___
+ / ___/_ __/ | ____ ____ ____ _
+ \__ \| | / / /| | / __ \/ __ \/ __ `/
+ ___/ /| |/ / ___ |/ / / / / / / /_/ /
+/____/ |___/_/ |_/_/ /_/_/ /_/\__,_/
+
+Structural Variant Annotation and Analysis
+ :: v@project.version@ ::
+
diff --git a/svanna-cli/src/main/resources/logback.xml b/svanna-cli/src/main/resources/logback.xml
index 9fc1379b..cc9ecca1 100644
--- a/svanna-cli/src/main/resources/logback.xml
+++ b/svanna-cli/src/main/resources/logback.xml
@@ -5,31 +5,14 @@
-
-
-
-
- INFO
-
-
- ${pattern}
-
-
-
-
-
-
- DEBUG
-
- svanna.log
- false
+
+ System.err
${pattern}
-
-
+
\ No newline at end of file
diff --git a/svanna-cli/src/test/resources/gencode.10genes.v38.basic.annotation.json.gz b/svanna-cli/src/test/resources/gencode.10genes.v38.basic.annotation.json.gz
index 0b51523d..7d50da6d 100644
Binary files a/svanna-cli/src/test/resources/gencode.10genes.v38.basic.annotation.json.gz and b/svanna-cli/src/test/resources/gencode.10genes.v38.basic.annotation.json.gz differ
diff --git a/svanna-configuration/pom.xml b/svanna-configuration/pom.xml
index b7336222..1dc947de 100644
--- a/svanna-configuration/pom.xml
+++ b/svanna-configuration/pom.xml
@@ -3,7 +3,7 @@
SvAnna
org.monarchinitiative.svanna
- 1.0.2
+ 1.0.3
4.0.0
diff --git a/svanna-core/pom.xml b/svanna-core/pom.xml
index 4cc93f89..eb949207 100644
--- a/svanna-core/pom.xml
+++ b/svanna-core/pom.xml
@@ -3,7 +3,7 @@
SvAnna
org.monarchinitiative.svanna
- 1.0.2
+ 1.0.3
4.0.0
diff --git a/svanna-core/src/test/java/org/monarchinitiative/svanna/core/priority/additive/TermSimilarityGeneWeightCalculatorTest.java b/svanna-core/src/test/java/org/monarchinitiative/svanna/core/priority/additive/TermSimilarityGeneWeightCalculatorTest.java
index bdb4c868..25fc510a 100644
--- a/svanna-core/src/test/java/org/monarchinitiative/svanna/core/priority/additive/TermSimilarityGeneWeightCalculatorTest.java
+++ b/svanna-core/src/test/java/org/monarchinitiative/svanna/core/priority/additive/TermSimilarityGeneWeightCalculatorTest.java
@@ -1,5 +1,6 @@
package org.monarchinitiative.svanna.core.priority.additive;
+import org.monarchinitiative.sgenes.model.*;
import org.monarchinitiative.svanna.core.TestDataConfig;
import org.monarchinitiative.svanna.core.hpo.SimilarityScoreCalculator;
import org.monarchinitiative.svanna.core.service.PhenotypeDataService;
@@ -8,10 +9,6 @@
import org.junit.jupiter.api.Test;
import org.mockito.Mock;
import org.monarchinitiative.phenol.ontology.data.TermId;
-import org.monarchinitiative.sgenes.model.Gene;
-import org.monarchinitiative.sgenes.model.GeneIdentifier;
-import org.monarchinitiative.sgenes.model.Transcript;
-import org.monarchinitiative.sgenes.model.TranscriptIdentifier;
import org.monarchinitiative.svart.*;
import org.monarchinitiative.svart.assembly.GenomicAssemblies;
import org.monarchinitiative.svart.assembly.GenomicAssembly;
@@ -73,7 +70,8 @@ public void calculateRelevance() {
TranscriptIdentifier txId = TranscriptIdentifier.of("TX_ACCESSION", "FBN1", null);
List exons = List.of(Coordinates.of(CoordinateSystem.oneBased(), 48_408_313, 48_645_721));
Coordinates cdsCoordinates = Coordinates.of(CoordinateSystem.oneBased(), 48_408_313, 48_645_721);
- List transcripts = List.of(Transcript.of(txId, location, exons, cdsCoordinates));
+ TranscriptMetadata metadata = TranscriptMetadata.of(TranscriptEvidence.CANONICAL);
+ List transcripts = List.of(Transcript.of(txId, location, exons, cdsCoordinates, metadata));
Gene gene = Gene.of(id, location, transcripts);
diff --git a/svanna-core/src/test/java/org/monarchinitiative/svanna/core/priority/additive/evaluator/getad/RouteDataEvaluatorGETadTest.java b/svanna-core/src/test/java/org/monarchinitiative/svanna/core/priority/additive/evaluator/getad/RouteDataEvaluatorGETadTest.java
index 5a87ef68..e6988bde 100644
--- a/svanna-core/src/test/java/org/monarchinitiative/svanna/core/priority/additive/evaluator/getad/RouteDataEvaluatorGETadTest.java
+++ b/svanna-core/src/test/java/org/monarchinitiative/svanna/core/priority/additive/evaluator/getad/RouteDataEvaluatorGETadTest.java
@@ -1,5 +1,6 @@
package org.monarchinitiative.svanna.core.priority.additive.evaluator.getad;
+import org.monarchinitiative.sgenes.model.*;
import org.monarchinitiative.svanna.core.TestContig;
import org.monarchinitiative.svanna.core.TestEnhancer;
import org.monarchinitiative.svanna.core.TestTad;
@@ -12,10 +13,6 @@
import org.junit.jupiter.params.provider.CsvSource;
import org.monarchinitiative.svanna.core.priority.additive.*;
import org.monarchinitiative.svart.*;
-import org.monarchinitiative.sgenes.model.Gene;
-import org.monarchinitiative.sgenes.model.GeneIdentifier;
-import org.monarchinitiative.sgenes.model.Transcript;
-import org.monarchinitiative.sgenes.model.TranscriptIdentifier;
import java.util.List;
@@ -40,7 +37,8 @@ private static Gene makeGene(String id, String symbol, Contig contig, int start,
TranscriptIdentifier txId = TranscriptIdentifier.of(id + "_tx", symbol + "_tx", null);
List exons = List.of(Coordinates.of(CoordinateSystem.zeroBased(), start, end));
Coordinates cdsCoordinates = Coordinates.of(CoordinateSystem.zeroBased(), start, end);
- Transcript tx = Transcript.of(txId, location, exons, cdsCoordinates);
+ TranscriptMetadata metadata = TranscriptMetadata.of(TranscriptEvidence.CANONICAL);
+ Transcript tx = Transcript.of(txId, location, exons, cdsCoordinates, metadata);
GeneIdentifier geneId = GeneIdentifier.of(id, symbol, null, null);
return Gene.of(geneId, location, List.of(tx));
diff --git a/svanna-core/src/test/java/org/monarchinitiative/svanna/core/priority/additive/impact/GeneSequenceImpactCalculatorTest.java b/svanna-core/src/test/java/org/monarchinitiative/svanna/core/priority/additive/impact/GeneSequenceImpactCalculatorTest.java
index 2b9e10f2..8db06c98 100644
--- a/svanna-core/src/test/java/org/monarchinitiative/svanna/core/priority/additive/impact/GeneSequenceImpactCalculatorTest.java
+++ b/svanna-core/src/test/java/org/monarchinitiative/svanna/core/priority/additive/impact/GeneSequenceImpactCalculatorTest.java
@@ -1,5 +1,6 @@
package org.monarchinitiative.svanna.core.priority.additive.impact;
+import org.monarchinitiative.sgenes.model.*;
import org.monarchinitiative.svanna.core.TestContig;
import org.junit.jupiter.api.BeforeEach;
import org.junit.jupiter.params.ParameterizedTest;
@@ -9,10 +10,6 @@
import org.monarchinitiative.svart.Coordinates;
import org.monarchinitiative.svart.GenomicRegion;
import org.monarchinitiative.svart.Strand;
-import org.monarchinitiative.sgenes.model.Gene;
-import org.monarchinitiative.sgenes.model.GeneIdentifier;
-import org.monarchinitiative.sgenes.model.Transcript;
-import org.monarchinitiative.sgenes.model.TranscriptIdentifier;
import java.util.List;
@@ -34,7 +31,8 @@ private static Gene makeGene(TestContig contig, int start, int end,
TranscriptIdentifier txId = TranscriptIdentifier.of("TX1", "TX1_SYMBOL", null);
List exons = makeExons(oneStart, oneEnd, twoStart, twoEnd, threeStart, threeEnd);
Coordinates cdsCoordinates = Coordinates.of(CoordinateSystem.zeroBased(), start + 10, end - 10);
- Transcript tx = Transcript.of(txId, location, exons, cdsCoordinates);
+ TranscriptMetadata metadata = TranscriptMetadata.of(TranscriptEvidence.CANONICAL);
+ Transcript tx = Transcript.of(txId, location, exons, cdsCoordinates, metadata);
// make gene
GeneIdentifier gId = GeneIdentifier.of("NCBIGene:123", "A", null, null);
diff --git a/svanna-core/src/test/resources/gencode.10genes.v38.basic.annotation.json.gz b/svanna-core/src/test/resources/gencode.10genes.v38.basic.annotation.json.gz
index 82558f05..7d50da6d 100644
Binary files a/svanna-core/src/test/resources/gencode.10genes.v38.basic.annotation.json.gz and b/svanna-core/src/test/resources/gencode.10genes.v38.basic.annotation.json.gz differ
diff --git a/svanna-db/pom.xml b/svanna-db/pom.xml
index ec116e2f..52878476 100644
--- a/svanna-db/pom.xml
+++ b/svanna-db/pom.xml
@@ -3,7 +3,7 @@
SvAnna
org.monarchinitiative.svanna
- 1.0.2
+ 1.0.3
4.0.0
diff --git a/svanna-db/src/test/java/org/monarchinitiative/svanna/db/additive/dispatch/TadAwareDispatcherTest.java b/svanna-db/src/test/java/org/monarchinitiative/svanna/db/additive/dispatch/TadAwareDispatcherTest.java
index 0d29c8cf..5bb5faf8 100644
--- a/svanna-db/src/test/java/org/monarchinitiative/svanna/db/additive/dispatch/TadAwareDispatcherTest.java
+++ b/svanna-db/src/test/java/org/monarchinitiative/svanna/db/additive/dispatch/TadAwareDispatcherTest.java
@@ -1,5 +1,6 @@
package org.monarchinitiative.svanna.db.additive.dispatch;
+import org.monarchinitiative.sgenes.model.*;
import org.monarchinitiative.svanna.core.priority.additive.Routes;
import org.monarchinitiative.svanna.core.service.GeneService;
import org.monarchinitiative.svanna.core.service.QueryResult;
@@ -11,10 +12,6 @@
import org.junit.jupiter.api.Disabled;
import org.junit.jupiter.api.Test;
import org.monarchinitiative.svart.*;
-import org.monarchinitiative.sgenes.model.Gene;
-import org.monarchinitiative.sgenes.model.GeneIdentifier;
-import org.monarchinitiative.sgenes.model.Transcript;
-import org.monarchinitiative.sgenes.model.TranscriptIdentifier;
import java.util.List;
import java.util.Optional;
@@ -48,7 +45,8 @@ private static Gene makeGene(String id, String symbol, Contig contig, int start,
TranscriptIdentifier txId = TranscriptIdentifier.of(id + "_tx", symbol + "_tx", null);
List exons = List.of(Coordinates.of(CoordinateSystem.zeroBased(), start, end));
Coordinates cdsCoordinates = Coordinates.of(CoordinateSystem.zeroBased(), start, end);
- Transcript tx = Transcript.of(txId, location, exons, cdsCoordinates);
+ TranscriptMetadata metadata = TranscriptMetadata.of(TranscriptEvidence.CANONICAL);
+ Transcript tx = Transcript.of(txId, location, exons, cdsCoordinates, metadata);
GeneIdentifier geneId = GeneIdentifier.of(id, symbol, null, null);
return Gene.of(geneId, location, List.of(tx));
diff --git a/svanna-ingest/pom.xml b/svanna-ingest/pom.xml
index 0549f147..8933ac32 100644
--- a/svanna-ingest/pom.xml
+++ b/svanna-ingest/pom.xml
@@ -3,7 +3,7 @@
SvAnna
org.monarchinitiative.svanna
- 1.0.2
+ 1.0.3
4.0.0
svanna-ingest
diff --git a/svanna-ingest/src/main/java/org/monarchinitiative/svanna/ingest/cmd/BuildDb.java b/svanna-ingest/src/main/java/org/monarchinitiative/svanna/ingest/cmd/BuildDb.java
index c8764c12..7df25179 100644
--- a/svanna-ingest/src/main/java/org/monarchinitiative/svanna/ingest/cmd/BuildDb.java
+++ b/svanna-ingest/src/main/java/org/monarchinitiative/svanna/ingest/cmd/BuildDb.java
@@ -15,6 +15,9 @@
import org.apache.commons.io.IOUtils;
import org.flywaydb.core.Flyway;
import org.flywaydb.core.api.output.MigrateResult;
+import org.monarchinitiative.phenol.annotations.assoc.GeneInfoGeneType;
+import org.monarchinitiative.phenol.annotations.formats.hpo.*;
+import org.monarchinitiative.phenol.annotations.io.hpo.HpoDiseaseLoaders;
import org.monarchinitiative.svanna.core.LogUtils;
import org.monarchinitiative.svanna.core.SvAnnaRuntimeException;
import org.monarchinitiative.svanna.core.hpo.TermPair;
@@ -45,12 +48,6 @@
import org.monarchinitiative.svanna.model.landscape.enhancer.Enhancer;
import org.monarchinitiative.svanna.model.landscape.tad.TadBoundary;
import org.monarchinitiative.svanna.model.landscape.variant.PopulationVariant;
-import org.monarchinitiative.phenol.annotations.assoc.HpoAssociationLoader;
-import org.monarchinitiative.phenol.annotations.base.Ratio;
-import org.monarchinitiative.phenol.annotations.formats.hpo.HpoAssociationData;
-import org.monarchinitiative.phenol.annotations.formats.hpo.HpoDisease;
-import org.monarchinitiative.phenol.annotations.formats.hpo.HpoDiseaseAnnotation;
-import org.monarchinitiative.phenol.annotations.formats.hpo.HpoDiseases;
import org.monarchinitiative.phenol.annotations.io.hpo.DiseaseDatabase;
import org.monarchinitiative.phenol.annotations.io.hpo.HpoDiseaseLoader;
import org.monarchinitiative.phenol.annotations.io.hpo.HpoDiseaseLoaderOptions;
@@ -239,9 +236,13 @@ private static PhenotypeData downloadPhenotypeFiles(PhenotypeProperties properti
LOGGER.debug("Parsing gene info file at {}", geneInfoPath.toAbsolutePath());
LOGGER.debug("Parsing MIM to gene medgen file at {}", mim2geneMedgenPath.toAbsolutePath());
HpoDiseaseLoaderOptions loaderOptions = HpoDiseaseLoaderOptions.of(DISEASE_DATABASES, true, HpoDiseaseLoaderOptions.DEFAULT_COHORT_SIZE);
- HpoDiseaseLoader loader = HpoDiseaseLoader.of(hpo, loaderOptions);
+ HpoDiseaseLoader loader = HpoDiseaseLoaders.defaultLoader(hpo, loaderOptions);
HpoDiseases diseases = loader.load(hpoAnnotationsPath);
- HpoAssociationData hpoAssociationData = HpoAssociationLoader.loadHpoAssociationData(hpo, geneInfoPath, mim2geneMedgenPath, null, diseases);
+ HpoAssociationData hpoAssociationData = HpoAssociationData.builder(hpo)
+ .hpoDiseases(diseases)
+ .mim2GeneMedgen(mim2geneMedgenPath)
+ .homoSapiensGeneInfo(geneInfoPath, GeneInfoGeneType.DEFAULT)
+ .build();
// Ingest geneToDisease
int updatedGeneToDisease = ingestGeneToDiseaseMap(hpoAssociationData, ncbiGeneToHgnc, diseases, geneDiseaseDao);
@@ -313,7 +314,7 @@ private static int ingestGeneToDiseaseMap(HpoAssociationData hpoAssociationData,
Map> geneToDisease = new HashMap<>();
// extract relevant bits and pieces for diseases, and map NCBIGene to HGNC
- Map> geneToDiseaseIdMap = hpoAssociationData.geneToDiseases();
+ Map> geneToDiseaseIdMap = hpoAssociationData.associations().geneIdToDiseaseIds();
Map diseaseMap = diseases.diseaseById();
for (TermId ncbiGeneTermId : geneToDiseaseIdMap.keySet()) {
@@ -341,9 +342,7 @@ private static int ingestDiseaseToPhenotypes(GeneDiseaseDao geneDiseaseDao, HpoD
int updated = 0;
for (HpoDisease disease : diseases) {
- List presentPhenotypeTermIds = disease.phenotypicAbnormalitiesStream()
- // We assume that the terms with missing ratio are observed/present.
- .filter(pa -> pa.ratio().map(Ratio::isPositive).orElse(true))
+ List presentPhenotypeTermIds = disease.presentAnnotationsStream()
.map(HpoDiseaseAnnotation::id)
.collect(Collectors.toList());
updated += geneDiseaseDao.insertDiseaseToPhenotypes(disease.id().getValue(), presentPhenotypeTermIds);
diff --git a/svanna-ingest/src/main/java/org/monarchinitiative/svanna/ingest/similarity/IcMicaCalculator.java b/svanna-ingest/src/main/java/org/monarchinitiative/svanna/ingest/similarity/IcMicaCalculator.java
index de50f09e..fed0d009 100644
--- a/svanna-ingest/src/main/java/org/monarchinitiative/svanna/ingest/similarity/IcMicaCalculator.java
+++ b/svanna-ingest/src/main/java/org/monarchinitiative/svanna/ingest/similarity/IcMicaCalculator.java
@@ -2,7 +2,6 @@
import org.monarchinitiative.svanna.core.LogUtils;
import org.monarchinitiative.svanna.core.hpo.TermPair;
-import org.monarchinitiative.phenol.annotations.base.Ratio;
import org.monarchinitiative.phenol.annotations.formats.hpo.HpoDisease;
import org.monarchinitiative.phenol.annotations.formats.hpo.HpoDiseaseAnnotation;
import org.monarchinitiative.phenol.annotations.formats.hpo.HpoDiseases;
@@ -37,9 +36,7 @@ public static Map precomputeIcMicaValues(Ontology ontology,
diseaseIdToTermIds.putIfAbsent(diseaseId, new HashSet<>());
// add term ancestors
- Set hpoTerms = disease.phenotypicAbnormalitiesStream()
- // We assume that the terms with missing ratio are observed/present.
- .filter(a -> a.ratio().map(Ratio::isPositive).orElse(true))
+ Set hpoTerms = disease.presentAnnotationsStream()
.map(HpoDiseaseAnnotation::id)
.collect(Collectors.toSet());
Set inclAncestorTermIds = TermIds.augmentWithAncestors(ontology, hpoTerms, true);
diff --git a/svanna-ingest/src/test/java/org/monarchinitiative/svanna/ingest/MakeSmallGencodeFileTest.java b/svanna-ingest/src/test/java/org/monarchinitiative/svanna/ingest/MakeSmallGencodeFileTest.java
index 5e62b4bd..72d9a5b3 100644
--- a/svanna-ingest/src/test/java/org/monarchinitiative/svanna/ingest/MakeSmallGencodeFileTest.java
+++ b/svanna-ingest/src/test/java/org/monarchinitiative/svanna/ingest/MakeSmallGencodeFileTest.java
@@ -29,11 +29,12 @@ public class MakeSmallGencodeFileTest {
@Test
public void makeSmallGencodeFile() throws Exception {
- Path gencodeGtf = Path.of("/home/ielis/data/gencode/gencode.v39.basic.annotation.gtf.gz");
- Path output = Path.of("../svanna-core/src/test/resources/gencode.10genes.v38.basic.annotation.json.gz");
+ Path gencodeGtf = Path.of("/home/ielis/data/genes/gtf/hg38/gencode.v39.basic.annotation.gtf.gz");
+ Path outputCore = Path.of("../svanna-core/src/test/resources/gencode.10genes.v38.basic.annotation.json.gz");
+ Path outputCli = Path.of("../svanna-cli/src/test/resources/gencode.10genes.v38.basic.annotation.json.gz");
// read Gencode genes & keep the target genes
- GtfGeneParser parser = GtfGeneParserFactory.gtfGeneParser(gencodeGtf, ASSEMBLY);
+ GtfGeneParser parser = GtfGeneParserFactory.gencodeGeneParser(gencodeGtf, ASSEMBLY);
Set targetGeneSymbols = Set.of("SURF1", "SURF2", "FBN1", "ZNF436", "ZBTB48", "HNF4A", "GCK", "BRCA2", "COL4A5", "SRY");
@@ -44,7 +45,10 @@ public void makeSmallGencodeFile() throws Exception {
// write the target genes into the output
GeneParserFactory parserFactory = GeneParserFactory.of(ASSEMBLY);
GeneParser printer = parserFactory.forFormat(SerializationFormat.JSON);
- try (OutputStream os = new BufferedOutputStream(new GzipCompressorOutputStream(Files.newOutputStream(output)))) {
+ try (OutputStream os = new BufferedOutputStream(new GzipCompressorOutputStream(Files.newOutputStream(outputCore)))) {
+ printer.write(targetGenes, os);
+ }
+ try (OutputStream os = new BufferedOutputStream(new GzipCompressorOutputStream(Files.newOutputStream(outputCli)))) {
printer.write(targetGenes, os);
}
}
diff --git a/svanna-io/pom.xml b/svanna-io/pom.xml
index 98268a6e..d103020c 100644
--- a/svanna-io/pom.xml
+++ b/svanna-io/pom.xml
@@ -3,7 +3,7 @@
SvAnna
org.monarchinitiative.svanna
- 1.0.2
+ 1.0.3
4.0.0
@@ -23,6 +23,10 @@
com.github.samtools
htsjdk
+
+ org.apache.commons
+ commons-compress
+
org.monarchinitiative.phenol
phenol-annotations
diff --git a/svanna-model/pom.xml b/svanna-model/pom.xml
index 8f563ee6..87eca9af 100644
--- a/svanna-model/pom.xml
+++ b/svanna-model/pom.xml
@@ -3,7 +3,7 @@
SvAnna
org.monarchinitiative.svanna
- 1.0.2
+ 1.0.3
4.0.0
diff --git a/svanna-test/pom.xml b/svanna-test/pom.xml
index a36c0067..c5d40d9c 100644
--- a/svanna-test/pom.xml
+++ b/svanna-test/pom.xml
@@ -3,7 +3,7 @@
SvAnna
org.monarchinitiative.svanna
- 1.0.2
+ 1.0.3
4.0.0