diff --git a/.github/workflows/maven.yml b/.github/workflows/maven.yml index b2573776..aee2d8f5 100644 --- a/.github/workflows/maven.yml +++ b/.github/workflows/maven.yml @@ -12,14 +12,11 @@ on: jobs: build: runs-on: ${{ matrix.os }} - env: - JDK_VERSION: ${{ matrix.jdk }} strategy: fail-fast: false matrix: os: [ windows-latest, macOS-latest, ubuntu-latest ] - jdk: [ 11 ] steps: - uses: actions/checkout@v2 @@ -27,8 +24,8 @@ jobs: - name: Set up JDK uses: actions/setup-java@v2 with: - java-version: '11' + java-version: '17' distribution: 'adopt' - name: Build with Maven - run: ./mvnw -B verify --file pom.xml + run: ./mvnw --batch-mode verify diff --git a/CHANGELOG.rst b/CHANGELOG.rst index c4100cd3..55374904 100644 --- a/CHANGELOG.rst +++ b/CHANGELOG.rst @@ -3,8 +3,13 @@ Changelog ========= ------ -latest +v1.0.3 ------ +- ``SvAnna`` + - update dependency versions. +- ``svanna-cli`` + - Add support for *v1* and *v2* phenopacket in JSON, YAML, or protobuf binary formats. + - Rework the logging functionality to log all output to standard error output and to allow setting the logging granularity via ``-v`` CLI option. ------ v1.0.2 diff --git a/docs/conf.py b/docs/conf.py index b2e54e5d..2d13836b 100644 --- a/docs/conf.py +++ b/docs/conf.py @@ -56,7 +56,7 @@ # The short X.Y version. version = u'1.0' # The full version, including alpha/beta/rc tags. -release = u'1.0.2' +release = u'1.0.3' # The language for content autogenerated by Sphinx. Refer to documentation # for a list of supported languages. diff --git a/docs/running.rst b/docs/running.rst index f5e8bd94..d1876f46 100644 --- a/docs/running.rst +++ b/docs/running.rst @@ -34,9 +34,9 @@ Analysis input ############## The input data can be specified in two ways: either as a path to a VCF file along with one or more HPO terms, -or as a *Phenopacket*: +or as a *phenopacket*: -* ``-p | --phenopacket`` - path to Phenopacket in JSON format. +* ``-p | --phenopacket`` - path to a phenopacket file. We support *v1* and *v2* schemas and the file can be in JSON, YAML, or protobuf binary format. * ``-t | --phenotype-term`` - HPO term describing clinical condition of the proband, may be specified multiple times (e.g. ``--term HP:1234567 --term HP:9876543``). * ``--vcf`` - path to the input VCF file. @@ -84,6 +84,7 @@ SvAnna configuration * ``--promoter-length`` - number of bases pre-pended to a transcript and evaluated as a promoter region (default: ``2000``). * ``--promoter-fitness-gain`` - set to ``0.`` to score the promoter variants as strictly as coding variants or to ``1.`` to completely disregard the promoter variants (default: ``0.6``). +* ``-v`` - set logging output granularity. The option can be set multiple times (e.g. ``-vv``) to increase logging output. See the next section to learn more about the SvAnna :ref:`rstoutputformats`, and the :ref:`rstexamples` section to see how SvAnna prioritizes various SV classes. diff --git a/pom.xml b/pom.xml index 2fe0b855..8f156007 100644 --- a/pom.xml +++ b/pom.xml @@ -16,13 +16,13 @@ svanna-benchmark org.monarchinitiative.svanna - 1.0.2 + 1.0.3 SvAnna org.springframework.boot spring-boot-starter-parent - 2.6.10 + 3.0.5 @@ -32,8 +32,9 @@ ${java.version} ${java.version} - 2.0.0-RC2 - 0.2.0 + 2.0.0 + 1.0.0-RC2 + 0.2.5 1.4.200 @@ -101,6 +102,11 @@ silent-genes-io ${silent.genes.version} + + org.phenopackets.phenopackettools + phenopacket-tools-io + ${phenopacket-tools.version} + org.phenopackets @@ -125,7 +131,7 @@ com.github.samtools htsjdk - 2.23.0 + 3.0.5 org.tukaani diff --git a/svanna-benchmark/pom.xml b/svanna-benchmark/pom.xml index 837e72e9..b8b58f4b 100644 --- a/svanna-benchmark/pom.xml +++ b/svanna-benchmark/pom.xml @@ -3,7 +3,7 @@ SvAnna org.monarchinitiative.svanna - 1.0.2 + 1.0.3 4.0.0 diff --git a/svanna-cli/pom.xml b/svanna-cli/pom.xml index 565c07bd..a48d43ae 100644 --- a/svanna-cli/pom.xml +++ b/svanna-cli/pom.xml @@ -3,7 +3,7 @@ SvAnna org.monarchinitiative.svanna - 1.0.2 + 1.0.3 4.0.0 svanna-cli @@ -19,6 +19,10 @@ svanna-configuration ${project.parent.version} + + org.phenopackets.phenopackettools + phenopacket-tools-io + info.picocli picocli @@ -27,21 +31,11 @@ org.freemarker freemarker - org.apache.commons commons-csv - - org.phenopackets - phenopacket-schema - - - com.google.protobuf - protobuf-java-util - - org.monarchinitiative.svanna @@ -63,33 +57,7 @@ org.springframework.boot spring-boot-maven-plugin - - org.apache.maven.plugins - maven-resources-plugin - - - copy-resources - validate - - copy-resources - - - ${project.build.directory}/assembly-resources - - - src/examples - true - - example.csv - example.vcf - run_examples.sh - - - - - - - + org.apache.maven.plugins maven-assembly-plugin diff --git a/svanna-cli/src/assemble/distribution.xml b/svanna-cli/src/assemble/distribution.xml index e196be0c..6c964992 100644 --- a/svanna-cli/src/assemble/distribution.xml +++ b/svanna-cli/src/assemble/distribution.xml @@ -24,12 +24,12 @@ svanna-cli-${version}.jar + - ${project.build.directory}/assembly-resources + ${project.basedir}/src/examples ./examples - example.vcf - run_examples.sh + **/** diff --git a/svanna-cli/src/examples/example-phenopacket.json b/svanna-cli/src/examples/example-phenopacket.v1.json similarity index 79% rename from svanna-cli/src/examples/example-phenopacket.json rename to svanna-cli/src/examples/example-phenopacket.v1.json index b49994e8..692c6868 100644 --- a/svanna-cli/src/examples/example-phenopacket.json +++ b/svanna-cli/src/examples/example-phenopacket.v1.json @@ -1,7 +1,7 @@ { - "id": "example", + "id": "example-phenopacket", "subject": { - "id": "example", + "id": "Sample", "ageAtCollection": { "age": "P2Y" } @@ -12,11 +12,6 @@ "label": "Reduced von Willebrand factor activity" } }], - "htsFiles": [{ - "uri": "file:///***/***/example.vcf", - "htsFormat": "VCF", - "genomeAssembly": "hg38" - }], "metaData": { "created": "2021-07-13T15:08:53.846Z", "createdBy": "ExampleOrg:ExampleCurator", diff --git a/svanna-cli/src/examples/example-phenopacket.v2.json b/svanna-cli/src/examples/example-phenopacket.v2.json new file mode 100644 index 00000000..2bc8291b --- /dev/null +++ b/svanna-cli/src/examples/example-phenopacket.v2.json @@ -0,0 +1,30 @@ +{ + "id": "example-phenopacket", + "subject": { + "id": "Sample", + "timeAtLastEncounter": { + "age": { + "iso8601duration": "P2Y" + } + } + }, + "phenotypicFeatures": [{ + "type": { + "id": "HP:0008330", + "label": "Reduced von Willebrand factor activity" + } + }], + "metaData": { + "created": "2021-07-13T15:08:53.846Z", + "createdBy": "ExampleOrg:ExampleCurator", + "resources": [{ + "id": "hp", + "name": "human phenotype ontology", + "url": "http://purl.obolibrary.org/obo/hp.owl", + "version": "unknown HPO version", + "namespacePrefix": "HP", + "iriPrefix": "http://purl.obolibrary.org/obo/HP_" + }], + "phenopacketSchemaVersion": "2.0.0" + } +} diff --git a/svanna-cli/src/main/java/org/monarchinitiative/svanna/cli/Main.java b/svanna-cli/src/main/java/org/monarchinitiative/svanna/cli/Main.java index b0ec5a70..e16da4d3 100644 --- a/svanna-cli/src/main/java/org/monarchinitiative/svanna/cli/Main.java +++ b/svanna-cli/src/main/java/org/monarchinitiative/svanna/cli/Main.java @@ -17,7 +17,7 @@ footer = Main.FOOTER) public class Main implements Callable { - public static final String VERSION = "svanna-cli v1.0.2"; + public static final String VERSION = "svanna-cli v1.0.3"; public static final int WIDTH = 120; diff --git a/svanna-cli/src/main/java/org/monarchinitiative/svanna/cli/cmd/AnalysisData.java b/svanna-cli/src/main/java/org/monarchinitiative/svanna/cli/cmd/AnalysisData.java new file mode 100644 index 00000000..24e42aa6 --- /dev/null +++ b/svanna-cli/src/main/java/org/monarchinitiative/svanna/cli/cmd/AnalysisData.java @@ -0,0 +1,50 @@ +package org.monarchinitiative.svanna.cli.cmd; + +import org.monarchinitiative.phenol.ontology.data.TermId; + +import java.nio.file.Path; +import java.util.List; +import java.util.Objects; + +/** + * SvAnna requires these inputs for the analysis. + */ +class AnalysisData { + + private final List phenotypeTerms; + private final Path vcf; + + AnalysisData(List phenotypeTerms, Path vcf) { + this.phenotypeTerms = phenotypeTerms; + this.vcf = vcf; + } + + List phenotypeTerms() { + return phenotypeTerms; + } + + Path vcf() { + return vcf; + } + + @Override + public boolean equals(Object o) { + if (this == o) return true; + if (o == null || getClass() != o.getClass()) return false; + AnalysisData that = (AnalysisData) o; + return Objects.equals(phenotypeTerms, that.phenotypeTerms) && Objects.equals(vcf, that.vcf); + } + + @Override + public int hashCode() { + return Objects.hash(phenotypeTerms, vcf); + } + + @Override + public String toString() { + return "AnalysisData{" + + "phenotypeTerms=" + phenotypeTerms + + ", vcf=" + vcf + + '}'; + } +} diff --git a/svanna-cli/src/main/java/org/monarchinitiative/svanna/cli/cmd/AnalysisInputException.java b/svanna-cli/src/main/java/org/monarchinitiative/svanna/cli/cmd/AnalysisInputException.java new file mode 100644 index 00000000..872fe6d7 --- /dev/null +++ b/svanna-cli/src/main/java/org/monarchinitiative/svanna/cli/cmd/AnalysisInputException.java @@ -0,0 +1,28 @@ +package org.monarchinitiative.svanna.cli.cmd; + +import org.monarchinitiative.svanna.core.SvAnnaRuntimeException; + +/** + * An exception thrown if inputs for the analysis are incomplete or otherwise invalid. + */ +class AnalysisInputException extends SvAnnaRuntimeException { + AnalysisInputException() { + super(); + } + + AnalysisInputException(String message) { + super(message); + } + + AnalysisInputException(String message, Throwable cause) { + super(message, cause); + } + + AnalysisInputException(Throwable cause) { + super(cause); + } + + AnalysisInputException(String message, Throwable cause, boolean enableSuppression, boolean writableStackTrace) { + super(message, cause, enableSuppression, writableStackTrace); + } +} diff --git a/svanna-cli/src/main/java/org/monarchinitiative/svanna/cli/cmd/PhenopacketAnalysisDataUtil.java b/svanna-cli/src/main/java/org/monarchinitiative/svanna/cli/cmd/PhenopacketAnalysisDataUtil.java new file mode 100644 index 00000000..7a0b9b45 --- /dev/null +++ b/svanna-cli/src/main/java/org/monarchinitiative/svanna/cli/cmd/PhenopacketAnalysisDataUtil.java @@ -0,0 +1,175 @@ +package org.monarchinitiative.svanna.cli.cmd; + +import com.google.protobuf.Message; +import org.monarchinitiative.phenol.base.PhenolRuntimeException; +import org.monarchinitiative.phenol.ontology.data.TermId; +import org.phenopackets.phenopackettools.core.PhenopacketElement; +import org.phenopackets.phenopackettools.core.PhenopacketSchemaVersion; +import org.phenopackets.phenopackettools.io.PhenopacketParser; +import org.phenopackets.phenopackettools.io.PhenopacketParserFactory; +import org.phenopackets.phenopackettools.util.format.SniffException; +import org.phenopackets.schema.v1.Phenopacket; +import org.phenopackets.schema.v1.core.HtsFile; +import org.phenopackets.schema.v2.core.File; +import org.phenopackets.schema.v2.core.PhenotypicFeature; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import java.io.IOException; +import java.net.URI; +import java.net.URISyntaxException; +import java.nio.file.Path; +import java.util.*; +import java.util.function.Function; +import java.util.function.Predicate; +import java.util.stream.Collectors; +import java.util.stream.IntStream; + +/** + * Utility methods for reading {@link AnalysisData} from v1 or v2 phenopacket. + */ +class PhenopacketAnalysisDataUtil { + + private static final Logger LOGGER = LoggerFactory.getLogger(PhenopacketAnalysisDataUtil.class); + + private PhenopacketAnalysisDataUtil() { + } + + static AnalysisData parseV2Phenopacket(Path phenopacketPath, + Path cliVcfPath, + PhenopacketParserFactory parserFactory) throws AnalysisInputException { + Message message = parseMessage(phenopacketPath, parserFactory, PhenopacketSchemaVersion.V2); + + if (message instanceof org.phenopackets.schema.v2.Phenopacket) { + org.phenopackets.schema.v2.Phenopacket pp = (org.phenopackets.schema.v2.Phenopacket) message; + + // (1) Phenotype features + List phenotypeTermIds = new ArrayList<>(); + boolean reportExcludedFeature = true; + List phenotypicFeaturesList = pp.getPhenotypicFeaturesList(); + for (int i = 0; i < phenotypicFeaturesList.size(); i++) { + PhenotypicFeature pf = phenotypicFeaturesList.get(i); + // SvAnna does not support excluded features. + // As a matter of courtesy, let's warn the user about skipping the excluded features. + if (pf.getExcluded()) { + if (reportExcludedFeature) { + reportExcludedFeature = false; + String excludedFeatureIndices = extractIndicesOfNegatedFeatures(phenotypicFeaturesList, PhenotypicFeature::getExcluded); + LOGGER.warn("Skipping unsupported excluded phenotype features {}", excludedFeatureIndices); + } + } else { + try { + TermId termId = TermId.of(pf.getType().getId()); + phenotypeTermIds.add(termId); + } catch (PhenolRuntimeException pre) { + LOGGER.warn("Skipping phenotype feature #{} due to invalid identifier {}", i, pf.getType().getId()); + } + } + } + + // (2) VCF path + // We take the 1st VCF file + List vcfFiles = pp.getFilesList().stream() + .filter(f -> "vcf".equalsIgnoreCase(f.getFileAttributesOrDefault("fileFormat", null))) + .collect(Collectors.toList()); + Path vcf = getVcfPath(cliVcfPath, vcfFiles, File::getUri); + return new AnalysisData(phenotypeTermIds, vcf); + } else { + // Shouldn't really happen but let's make sure we can report a meaningful error. + throw new AnalysisInputException(String.format("Unexpected instance %s!=%s", message.getClass().getName(), org.phenopackets.schema.v2.Phenopacket.class.getName())); + } + + } + + static AnalysisData parseV1Phenopacket(Path phenopacketPath, + Path cliVcfPath, + PhenopacketParserFactory parserFactory) throws AnalysisInputException { + Message message = parseMessage(phenopacketPath, parserFactory, PhenopacketSchemaVersion.V1); + if (message instanceof Phenopacket) { + Phenopacket pp = (Phenopacket) message; + + // (1) Phenotype features + List phenotypeTermIds = new ArrayList<>(); + boolean reportExcludedFeature = true; + List phenotypicFeaturesList = pp.getPhenotypicFeaturesList(); + for (int i = 0; i < phenotypicFeaturesList.size(); i++) { + org.phenopackets.schema.v1.core.PhenotypicFeature pf = phenotypicFeaturesList.get(i); + // SvAnna does not support excluded features. + // As a matter of courtesy, let's warn the user about skipping the excluded features. + if (pf.getNegated()) { + if (reportExcludedFeature) { + reportExcludedFeature = false; + String excludedFeatureIndices = extractIndicesOfNegatedFeatures(phenotypicFeaturesList, org.phenopackets.schema.v1.core.PhenotypicFeature::getNegated); + LOGGER.warn("Skipping unsupported excluded phenotype features {}", excludedFeatureIndices); + } + } else { + try { + TermId termId = TermId.of(pf.getType().getId()); + phenotypeTermIds.add(termId); + } catch (PhenolRuntimeException pre) { + LOGGER.warn("Skipping phenotype feature #{} due to invalid identifier {}", i, pf.getType().getId()); + } + } + } + + // (2) VCF path + // We take the 1st VCF file + List vcfFiles = pp.getHtsFilesList().stream() + .filter(f -> f.getHtsFormat().equals(HtsFile.HtsFormat.VCF)) + .collect(Collectors.toList()); + Path vcf = getVcfPath(cliVcfPath, vcfFiles, HtsFile::getUri); + return new AnalysisData(phenotypeTermIds, vcf); + } else { + // Again, shouldn't really happen but let's make sure we can report a meaningful error. + throw new AnalysisInputException(String.format("Unexpected instance %s!=%s", message.getClass().getName(), org.phenopackets.schema.v2.Phenopacket.class.getName())); + } + } + + private static Message parseMessage(Path phenopacketPath, + PhenopacketParserFactory parserFactory, + PhenopacketSchemaVersion schemaVersion) throws AnalysisInputException { + PhenopacketParser parser = parserFactory.forFormat(schemaVersion); + + Message message; + try { + message = parser.parse(PhenopacketElement.PHENOPACKET, phenopacketPath); + } catch (IOException | SniffException e) { + throw new AnalysisInputException(e); + } + return message; + } + + private static Path getVcfPath(Path cliVcfPath, + List files, + Function uriExtractor) throws AnalysisInputException { + if (files.isEmpty()) { + if (cliVcfPath == null) + throw new AnalysisInputException("VCF file was found neither in CLI arguments nor in the Phenopacket. Aborting."); + else + return cliVcfPath; + } else if (files.size() > 1) { + String fileUris = files.stream() + .map(uriExtractor) + .collect(Collectors.joining(", ", "[", "]")); + throw new AnalysisInputException(String.format("There must be exactly 1 VCF file in the phenopacket but got %s: %s", files.size(), fileUris)); + } else { + String uriStr = uriExtractor.apply(files.get(0)); + try { + URI uri = new URI(uriStr); + return Path.of(uri); + } catch (URISyntaxException e) { + LOGGER.warn("Invalid URI `{}`: {}", uriStr, e.getMessage()); + LOGGER.debug("Invalid URI `{}`: {}", uriStr, e.getMessage(), e); + throw new AnalysisInputException(e); + } + } + } + + private static String extractIndicesOfNegatedFeatures(List phenotypicFeaturesList, Predicate isNegated) { + return IntStream.range(0, phenotypicFeaturesList.size()) + .filter(idx -> isNegated.test(phenotypicFeaturesList.get(idx))) + .boxed() + .map(Objects::toString) + .collect(Collectors.joining(", ", "[", "]")); + } +} diff --git a/svanna-cli/src/main/java/org/monarchinitiative/svanna/cli/cmd/PhenopacketImporter.java b/svanna-cli/src/main/java/org/monarchinitiative/svanna/cli/cmd/PhenopacketImporter.java deleted file mode 100644 index 3d7a1cb2..00000000 --- a/svanna-cli/src/main/java/org/monarchinitiative/svanna/cli/cmd/PhenopacketImporter.java +++ /dev/null @@ -1,166 +0,0 @@ -package org.monarchinitiative.svanna.cli.cmd; - -import com.google.protobuf.InvalidProtocolBufferException; -import com.google.protobuf.util.JsonFormat; -import org.monarchinitiative.phenol.base.PhenolRuntimeException; -import org.monarchinitiative.phenol.ontology.data.Ontology; -import org.monarchinitiative.phenol.ontology.data.TermId; -import org.phenopackets.schema.v1.Phenopacket; -import org.phenopackets.schema.v1.core.HtsFile; -import org.phenopackets.schema.v1.core.PhenotypicFeature; -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; - -import java.io.BufferedReader; -import java.io.IOException; -import java.nio.file.Files; -import java.nio.file.Path; -import java.nio.file.Paths; -import java.util.ArrayList; -import java.util.List; -import java.util.stream.Collectors; - - -/** - * This class ingests a phenopacket, which is required to additionally contain the - * path of a VCF file that will be used for the analysis. - * @author Peter Robinson - */ -public class PhenopacketImporter { - - private static final Logger logger = LoggerFactory.getLogger(PhenopacketImporter.class); - private static final JsonFormat.Parser JSON_PARSER = JsonFormat.parser(); - /** The Phenopacket that represents the individual being sequenced in the current run. */ - private final Phenopacket phenoPacket; - /** Object representing the VCF file with variants identified in the subject of this Phenopacket. */ - private HtsFile vcfFile; - /** Reference to HPO ontology */ - private final Ontology hpo; - - /** - * Factory method to obtain a PhenopacketImporter object starting from a phenopacket in Json format - * - * @param phenopacketPath -- path to the phenopacket - * @return {@link PhenopacketImporter} object corresponding to the PhenoPacket - */ - public static PhenopacketImporter fromJson(Path phenopacketPath, Ontology ontology) throws IOException { - logger.trace("Importing Phenopacket: " + phenopacketPath); - if (!phenopacketPath.toFile().isFile()) { - logger.error("Could not find phenopacket file at " + phenopacketPath); - throw new IOException("Could not find phenopacket file at " + phenopacketPath); - } - try { - Phenopacket phenopacket = readPhenopacket(phenopacketPath); - return new PhenopacketImporter(phenopacket, ontology); - } catch (InvalidProtocolBufferException e) { - logger.error("Malformed phenopacket: " + e.getMessage()); - throw new IOException("Could not load phenopacket (" + phenopacketPath + "): " + e.getMessage()); - } catch (IOException e) { - throw new IOException("I/O Error: Could not load phenopacket (" + phenopacketPath + "): " + e.getMessage(), e); - } - } - - public static Phenopacket readPhenopacket(Path phenopacketPath) throws IOException { - logger.info("Reading phenopacket from `{}`", phenopacketPath.toAbsolutePath()); - try (BufferedReader reader = Files.newBufferedReader(phenopacketPath)) { - String phenopacketJsonString = reader.lines().collect(Collectors.joining(System.lineSeparator())); - Phenopacket.Builder phenoPacketBuilder = Phenopacket.newBuilder(); - JSON_PARSER.merge(phenopacketJsonString, phenoPacketBuilder); - return phenoPacketBuilder.build(); - } - } - - private PhenopacketImporter(Phenopacket ppack, Ontology ontology){ - this.phenoPacket=ppack; - this.hpo=ontology; - extractVcfData(); - } - - public boolean hasVcf() { return this.vcfFile !=null; } - - public List getHpoTerms() { - List builder = new ArrayList<>(); - for (PhenotypicFeature feature : phenoPacket.getPhenotypicFeaturesList()) { - if (feature.getNegated()) continue; - String id = feature.getType().getId(); - TermId tid = TermId.of(id); - if (! hpo.getTermMap().containsKey(tid)) { - logger.error("Could not identify HPO term id {}.",tid.getValue()); - logger.error("Please check the input file and update to the latest hp.obo file. "); - throw new PhenolRuntimeException("Could not identify HPO term id: "+tid.getValue()); - } else if (hpo.getObsoleteTermIds().contains(tid)) { - TermId current = hpo.getPrimaryTermId(tid); - builder.add(current); - logger.error("Replacing obsolete HPO term id {} with current id {}.",tid.getValue(),current.getValue()); - } else { - builder.add(tid); - } - } - return builder; - } - - - public List getNegatedHpoTerms() { - List builder = new ArrayList<>(); - for (PhenotypicFeature feature : phenoPacket.getPhenotypicFeaturesList()) { - if (! feature.getNegated()) continue; - String id = feature.getType().getId(); - TermId tid = TermId.of(id); - if (! hpo.getTermMap().containsKey(tid)) { - logger.error("Could not identify HPO term id {}.",tid.getValue()); - logger.error("Please check the input file and update to the latest hp.obo file. "); - throw new PhenolRuntimeException("Could not identify HPO term id: "+tid.getValue()); - } else if (hpo.getObsoleteTermIds().contains(tid)) { - TermId current = hpo.getPrimaryTermId(tid); - builder.add(current); - logger.error("Replacing obsolete HPO term id {} with current id {}.", tid.getValue(), current.getValue()); - } else { - builder.add(tid); - } - } - return builder; - } - - /** - * The path to the VCF file may be a string such as file:/path/to/examples/BBS1.vcf - * In this case, remove the prefix 'path:', otherwise return the original URI - * @return URI of VCF file mentioned in the Phenopacket - */ - - - public HtsFile getVcfFile() { - return this.vcfFile; - } - - public Path getVcfPath() { - if (this.vcfFile == null) { - return null; - } - String uri = this.vcfFile.getUri().startsWith("file:") ? - this.vcfFile.getUri().substring(5) : - this.vcfFile.getUri(); - return Paths.get(uri); - } - - public String getSampleName() { - return phenoPacket.getSubject().getId(); - } - - - /** This method extracts the VCF file and the corresponding GenomeBuild. We assume that - * the phenopacket contains a single VCF file and that this file is for a single person. */ - private void extractVcfData() { - List htsFileList = phenoPacket.getHtsFilesList(); - if (htsFileList.size() > 1 ) { - logger.error("Warning: multiple HTsFiles associated with this phenopacket"); - logger.error("Warning: we will return the path to the first VCF file we find"); - } else if (htsFileList.isEmpty()) { - return; - } - for (HtsFile htsFile : htsFileList) { - if (htsFile.getHtsFormat().equals(HtsFile.HtsFormat.VCF)) { - this.vcfFile = htsFile; - } - } - } -} diff --git a/svanna-cli/src/main/java/org/monarchinitiative/svanna/cli/cmd/PrioritizeCommand.java b/svanna-cli/src/main/java/org/monarchinitiative/svanna/cli/cmd/PrioritizeCommand.java index fad82c13..858bf87a 100644 --- a/svanna-cli/src/main/java/org/monarchinitiative/svanna/cli/cmd/PrioritizeCommand.java +++ b/svanna-cli/src/main/java/org/monarchinitiative/svanna/cli/cmd/PrioritizeCommand.java @@ -24,15 +24,12 @@ import org.monarchinitiative.phenol.ontology.data.Term; import org.monarchinitiative.phenol.ontology.data.TermId; import org.monarchinitiative.svart.assembly.GenomicAssembly; -import org.phenopackets.schema.v1.Phenopacket; -import org.phenopackets.schema.v1.core.HtsFile; +import org.phenopackets.phenopackettools.io.PhenopacketParserFactory; import org.slf4j.Logger; import org.slf4j.LoggerFactory; import picocli.CommandLine; import java.io.IOException; -import java.net.URI; -import java.net.URISyntaxException; import java.nio.file.Files; import java.nio.file.Path; import java.text.NumberFormat; @@ -63,12 +60,12 @@ public class PrioritizeCommand extends SvAnnaCommand { public InputOptions inputOptions = new InputOptions(); public static class InputOptions { @CommandLine.Option(names = {"-p", "--phenopacket"}, - description = "Path to phenopacket.") + description = "Path to v1 or v2 phenopacket in JSON, YAML or Protobuf format.") public Path phenopacket = null; @CommandLine.Option(names = {"-t", "--phenotype-term"}, description = "HPO term ID(s). Can be provided multiple times.") - public List hpoTermIdList = List.of(); + public List hpoTermIdList = null; @CommandLine.Option(names = {"--vcf"}, description = "Path to the input VCF file.") @@ -130,7 +127,7 @@ public static class OutputConfig { } @Override - public Integer call() { + public Integer execute() { int status = checkArguments(); if (status != 0) return status; @@ -139,14 +136,11 @@ public Integer call() { DataProperties dataProperties = dataProperties(); SvAnnaProperties svAnnaProperties = SvAnnaProperties.of(svannaDataDirectory, prioritizationProperties, dataProperties); - Optional analysisData = parseAnalysisData(); - if (analysisData.isEmpty()) - return 1; - try { - runAnalysis(analysisData.get(), svAnnaProperties); + AnalysisData analysisData = parseAnalysisData(); + runAnalysis(analysisData, svAnnaProperties); } catch (InterruptedException | ExecutionException | IOException | InvalidResourceException | - MissingResourceException | UndefinedResourceException e) { + MissingResourceException | UndefinedResourceException | AnalysisInputException e) { LOGGER.error("Error: {}", e.getMessage()); LOGGER.debug("Error: {}", e.getMessage(), e); return 1; @@ -156,76 +150,50 @@ public Integer call() { return 0; } - private Optional parseAnalysisData() { - Path vcf; - List phenotypeTermIds; - if (inputOptions.vcf != null) { // VCF & CLI - vcf = inputOptions.vcf; - phenotypeTermIds = inputOptions.hpoTermIdList.stream() + private AnalysisData parseAnalysisData() throws AnalysisInputException { + if (inputOptions.hpoTermIdList != null) { // CLI + LOGGER.info("Using {} phenotype features supplied via CLI", inputOptions.hpoTermIdList.size()); + Path vcf = inputOptions.vcf; + List phenotypeTermIds = inputOptions.hpoTermIdList.stream() .map(TermId::of) .collect(Collectors.toList()); - } else { // phenopacket - try { - Phenopacket phenopacket = PhenopacketImporter.readPhenopacket(inputOptions.phenopacket); - phenotypeTermIds = phenopacket.getPhenotypicFeaturesList().stream() - .map(pf -> TermId.of(pf.getType().getId())) - .collect(Collectors.toList()); - - Optional vcfFilePathOptional = getVcfFilePath(phenopacket); - if (vcfFilePathOptional.isEmpty()) { - if (inputOptions.vcf == null) { - LOGGER.error("VCF file was found neither in CLI arguments nor in the Phenopacket. Aborting."); - return Optional.empty(); - } else { - vcf = inputOptions.vcf; - } - } else { - LOGGER.info("VCF file was found in both CLI arguments and in the Phenopacket. Using the file from CLI: `{}`", inputOptions.vcf); - vcf = inputOptions.vcf; - } + return new AnalysisData(phenotypeTermIds, vcf); + } else { // Phenopacket + LOGGER.info("Using phenotype features from a phenopacket at {}", inputOptions.phenopacket.toAbsolutePath()); + PhenopacketParserFactory parserFactory = PhenopacketParserFactory.getInstance(); - } catch (IOException e) { - LOGGER.error("Error reading phenopacket at `{}`: {}", inputOptions.phenopacket, e.getMessage()); - return Optional.empty(); + // try v2 first + try { + LOGGER.debug("Trying v2 format first.."); + AnalysisData analysisData = PhenopacketAnalysisDataUtil.parseV2Phenopacket(inputOptions.phenopacket, inputOptions.vcf, parserFactory); + LOGGER.debug("Success!"); + return analysisData; + } catch (AnalysisInputException e) { + // swallow and try v1 + LOGGER.debug("Unable to decode {} as v2 phenopacket, falling back to v1", inputOptions.phenopacket.toAbsolutePath()); } - } - return Optional.of(new AnalysisData(phenotypeTermIds, vcf)); - } - - private static Optional getVcfFilePath(Phenopacket phenopacket) { - // There should be exactly one VCF file - LinkedList vcfFiles = phenopacket.getHtsFilesList().stream() - .filter(htsFile -> htsFile.getHtsFormat().equals(HtsFile.HtsFormat.VCF)) - .distinct() - .collect(Collectors.toCollection(LinkedList::new)); - if (vcfFiles.isEmpty()) { - LOGGER.info("VCF file was not found in Phenopacket. Expecting to find the file among the CLI arguments"); - return Optional.empty(); + // try v1 or fail + AnalysisData analysisData = PhenopacketAnalysisDataUtil.parseV1Phenopacket(inputOptions.phenopacket, inputOptions.vcf, parserFactory); + LOGGER.debug("Success!"); + return analysisData; } - if (vcfFiles.size() > 1) - LOGGER.warn("Found >1 VCF files. Using the first one."); - - // The VCF file should have a proper URI - HtsFile vcf = vcfFiles.getFirst(); - try { - URI uri = new URI(vcf.getUri()); - return Optional.of(Path.of(uri)); - } catch (URISyntaxException e) { - LOGGER.warn("Invalid URI `{}`: {}", vcf.getUri(), e.getMessage()); - return Optional.empty(); - } } protected int checkArguments() { - if ((inputOptions.vcf == null) == (inputOptions.phenopacket == null)) { - LOGGER.error("Path to a VCF file or to a phenopacket must be supplied"); + if (inputOptions.hpoTermIdList == null && inputOptions.phenopacket == null) { + LOGGER.error("No phenotype features provided. Use the CLI or a phenopacket"); return 1; } - if (inputOptions.phenopacket != null && !inputOptions.hpoTermIdList.isEmpty()) { - LOGGER.error("Passing HPO terms both through CLI and Phenopacket is not supported"); + if (inputOptions.hpoTermIdList != null && inputOptions.phenopacket != null) { + LOGGER.error("Passing HPO terms both through CLI and Phenopacket is not supported. Choose one"); + return 1; + } + + if (inputOptions.vcf == null && inputOptions.phenopacket == null) { + LOGGER.error("Path to a VCF file or to a phenopacket must be supplied"); return 1; } @@ -356,22 +324,4 @@ private AnalysisParameters getAnalysisParameters(AnalysisData analysisData, SvAn return analysisParameters; } - - private static class AnalysisData { - private final List phenotypeTerms; - private final Path vcf; - - private AnalysisData(List phenotypeTerms, Path vcf) { - this.phenotypeTerms = phenotypeTerms; - this.vcf = vcf; - } - - public List phenotypeTerms() { - return phenotypeTerms; - } - - public Path vcf() { - return vcf; - } - } } diff --git a/svanna-cli/src/main/java/org/monarchinitiative/svanna/cli/cmd/SvAnnaCommand.java b/svanna-cli/src/main/java/org/monarchinitiative/svanna/cli/cmd/SvAnnaCommand.java index df5bee08..79ca0485 100644 --- a/svanna-cli/src/main/java/org/monarchinitiative/svanna/cli/cmd/SvAnnaCommand.java +++ b/svanna-cli/src/main/java/org/monarchinitiative/svanna/cli/cmd/SvAnnaCommand.java @@ -1,5 +1,7 @@ package org.monarchinitiative.svanna.cli.cmd; +import ch.qos.logback.classic.Level; +import ch.qos.logback.classic.LoggerContext; import org.monarchinitiative.svanna.cli.writer.ResultWriterFactory; import org.monarchinitiative.svanna.configuration.SvAnnaBuilder; import org.monarchinitiative.svanna.configuration.exception.InvalidResourceException; @@ -16,9 +18,11 @@ import org.slf4j.LoggerFactory; import picocli.CommandLine; +import java.io.BufferedInputStream; import java.io.IOException; import java.io.InputStream; import java.nio.file.Path; +import java.util.Objects; import java.util.Properties; import java.util.concurrent.Callable; @@ -37,6 +41,11 @@ public abstract class SvAnnaCommand implements Callable { private static final double TAD_STABILITY_THRESHOLD = 80.; // ----------------------------------------------------------------------------------------------------------------- + @CommandLine.Option(names = {"-v"}, + description = {"Specify multiple -v options to increase verbosity.", + "For example, `-v -v -v` or `-vvv`"}) + protected boolean[] verbosity = {}; + @CommandLine.Option(names = {"-d", "--data-directory"}, paramLabel = "path/to/datadir", required = true, @@ -76,6 +85,17 @@ private static Properties readProperties() { return properties; } + @Override + public Integer call() { + // (0) Setup verbosity and print banner. + setupLoggingAndPrintBanner(); + + // (1) Run the command functionality. + return execute(); + } + + protected abstract Integer execute(); + protected SvAnna bootstrapSvAnna(SvAnnaProperties svAnnaProperties) throws MissingResourceException, InvalidResourceException, UndefinedResourceException { LOGGER.info("Spooling up SvAnna v{} using resources in {}", SVANNA_VERSION, svAnnaProperties.dataDirectory().toAbsolutePath()); return SvAnnaBuilder.builder(svAnnaProperties) @@ -103,4 +123,44 @@ protected static ResultWriterFactory resultWriterFactory(SvAnna svAnna) { return new ResultWriterFactory(overlapper, svAnna.annotationDataService(), svAnna.phenotypeDataService()); } + private void setupLoggingAndPrintBanner() { + Level level = parseVerbosityLevel(); + + LoggerContext context = (LoggerContext) LoggerFactory.getILoggerFactory(); + context.getLogger(Logger.ROOT_LOGGER_NAME).setLevel(level); + + printBanner(); + } + + private static String readBanner() { + try (InputStream is = new BufferedInputStream(Objects.requireNonNull(SvAnnaCommand.class.getResourceAsStream("/banner.txt")))) { + return new String(is.readAllBytes()); + } catch (IOException e) { + // swallow + return ""; + } + } + + private Level parseVerbosityLevel() { + int verbosity = 0; + for (boolean a : this.verbosity) { + if (a) verbosity++; + } + + switch (verbosity) { + case 0: + return Level.INFO; + case 1: + return Level.DEBUG; + case 2: + return Level.TRACE; + default: + return Level.ALL; + } + } + + private static void printBanner() { + System.err.println(readBanner()); + } + } diff --git a/svanna-cli/src/main/resources/banner.txt b/svanna-cli/src/main/resources/banner.txt new file mode 100644 index 00000000..7777b36c --- /dev/null +++ b/svanna-cli/src/main/resources/banner.txt @@ -0,0 +1,10 @@ + + _____ ___ + / ___/_ __/ | ____ ____ ____ _ + \__ \| | / / /| | / __ \/ __ \/ __ `/ + ___/ /| |/ / ___ |/ / / / / / / /_/ / +/____/ |___/_/ |_/_/ /_/_/ /_/\__,_/ + +Structural Variant Annotation and Analysis + :: v@project.version@ :: + diff --git a/svanna-cli/src/main/resources/logback.xml b/svanna-cli/src/main/resources/logback.xml index 9fc1379b..cc9ecca1 100644 --- a/svanna-cli/src/main/resources/logback.xml +++ b/svanna-cli/src/main/resources/logback.xml @@ -5,31 +5,14 @@ - - - - - INFO - - - ${pattern} - - - - - - - DEBUG - - svanna.log - false + + System.err ${pattern} - - + \ No newline at end of file diff --git a/svanna-cli/src/test/resources/gencode.10genes.v38.basic.annotation.json.gz b/svanna-cli/src/test/resources/gencode.10genes.v38.basic.annotation.json.gz index 0b51523d..7d50da6d 100644 Binary files a/svanna-cli/src/test/resources/gencode.10genes.v38.basic.annotation.json.gz and b/svanna-cli/src/test/resources/gencode.10genes.v38.basic.annotation.json.gz differ diff --git a/svanna-configuration/pom.xml b/svanna-configuration/pom.xml index b7336222..1dc947de 100644 --- a/svanna-configuration/pom.xml +++ b/svanna-configuration/pom.xml @@ -3,7 +3,7 @@ SvAnna org.monarchinitiative.svanna - 1.0.2 + 1.0.3 4.0.0 diff --git a/svanna-core/pom.xml b/svanna-core/pom.xml index 4cc93f89..eb949207 100644 --- a/svanna-core/pom.xml +++ b/svanna-core/pom.xml @@ -3,7 +3,7 @@ SvAnna org.monarchinitiative.svanna - 1.0.2 + 1.0.3 4.0.0 diff --git a/svanna-core/src/test/java/org/monarchinitiative/svanna/core/priority/additive/TermSimilarityGeneWeightCalculatorTest.java b/svanna-core/src/test/java/org/monarchinitiative/svanna/core/priority/additive/TermSimilarityGeneWeightCalculatorTest.java index bdb4c868..25fc510a 100644 --- a/svanna-core/src/test/java/org/monarchinitiative/svanna/core/priority/additive/TermSimilarityGeneWeightCalculatorTest.java +++ b/svanna-core/src/test/java/org/monarchinitiative/svanna/core/priority/additive/TermSimilarityGeneWeightCalculatorTest.java @@ -1,5 +1,6 @@ package org.monarchinitiative.svanna.core.priority.additive; +import org.monarchinitiative.sgenes.model.*; import org.monarchinitiative.svanna.core.TestDataConfig; import org.monarchinitiative.svanna.core.hpo.SimilarityScoreCalculator; import org.monarchinitiative.svanna.core.service.PhenotypeDataService; @@ -8,10 +9,6 @@ import org.junit.jupiter.api.Test; import org.mockito.Mock; import org.monarchinitiative.phenol.ontology.data.TermId; -import org.monarchinitiative.sgenes.model.Gene; -import org.monarchinitiative.sgenes.model.GeneIdentifier; -import org.monarchinitiative.sgenes.model.Transcript; -import org.monarchinitiative.sgenes.model.TranscriptIdentifier; import org.monarchinitiative.svart.*; import org.monarchinitiative.svart.assembly.GenomicAssemblies; import org.monarchinitiative.svart.assembly.GenomicAssembly; @@ -73,7 +70,8 @@ public void calculateRelevance() { TranscriptIdentifier txId = TranscriptIdentifier.of("TX_ACCESSION", "FBN1", null); List exons = List.of(Coordinates.of(CoordinateSystem.oneBased(), 48_408_313, 48_645_721)); Coordinates cdsCoordinates = Coordinates.of(CoordinateSystem.oneBased(), 48_408_313, 48_645_721); - List transcripts = List.of(Transcript.of(txId, location, exons, cdsCoordinates)); + TranscriptMetadata metadata = TranscriptMetadata.of(TranscriptEvidence.CANONICAL); + List transcripts = List.of(Transcript.of(txId, location, exons, cdsCoordinates, metadata)); Gene gene = Gene.of(id, location, transcripts); diff --git a/svanna-core/src/test/java/org/monarchinitiative/svanna/core/priority/additive/evaluator/getad/RouteDataEvaluatorGETadTest.java b/svanna-core/src/test/java/org/monarchinitiative/svanna/core/priority/additive/evaluator/getad/RouteDataEvaluatorGETadTest.java index 5a87ef68..e6988bde 100644 --- a/svanna-core/src/test/java/org/monarchinitiative/svanna/core/priority/additive/evaluator/getad/RouteDataEvaluatorGETadTest.java +++ b/svanna-core/src/test/java/org/monarchinitiative/svanna/core/priority/additive/evaluator/getad/RouteDataEvaluatorGETadTest.java @@ -1,5 +1,6 @@ package org.monarchinitiative.svanna.core.priority.additive.evaluator.getad; +import org.monarchinitiative.sgenes.model.*; import org.monarchinitiative.svanna.core.TestContig; import org.monarchinitiative.svanna.core.TestEnhancer; import org.monarchinitiative.svanna.core.TestTad; @@ -12,10 +13,6 @@ import org.junit.jupiter.params.provider.CsvSource; import org.monarchinitiative.svanna.core.priority.additive.*; import org.monarchinitiative.svart.*; -import org.monarchinitiative.sgenes.model.Gene; -import org.monarchinitiative.sgenes.model.GeneIdentifier; -import org.monarchinitiative.sgenes.model.Transcript; -import org.monarchinitiative.sgenes.model.TranscriptIdentifier; import java.util.List; @@ -40,7 +37,8 @@ private static Gene makeGene(String id, String symbol, Contig contig, int start, TranscriptIdentifier txId = TranscriptIdentifier.of(id + "_tx", symbol + "_tx", null); List exons = List.of(Coordinates.of(CoordinateSystem.zeroBased(), start, end)); Coordinates cdsCoordinates = Coordinates.of(CoordinateSystem.zeroBased(), start, end); - Transcript tx = Transcript.of(txId, location, exons, cdsCoordinates); + TranscriptMetadata metadata = TranscriptMetadata.of(TranscriptEvidence.CANONICAL); + Transcript tx = Transcript.of(txId, location, exons, cdsCoordinates, metadata); GeneIdentifier geneId = GeneIdentifier.of(id, symbol, null, null); return Gene.of(geneId, location, List.of(tx)); diff --git a/svanna-core/src/test/java/org/monarchinitiative/svanna/core/priority/additive/impact/GeneSequenceImpactCalculatorTest.java b/svanna-core/src/test/java/org/monarchinitiative/svanna/core/priority/additive/impact/GeneSequenceImpactCalculatorTest.java index 2b9e10f2..8db06c98 100644 --- a/svanna-core/src/test/java/org/monarchinitiative/svanna/core/priority/additive/impact/GeneSequenceImpactCalculatorTest.java +++ b/svanna-core/src/test/java/org/monarchinitiative/svanna/core/priority/additive/impact/GeneSequenceImpactCalculatorTest.java @@ -1,5 +1,6 @@ package org.monarchinitiative.svanna.core.priority.additive.impact; +import org.monarchinitiative.sgenes.model.*; import org.monarchinitiative.svanna.core.TestContig; import org.junit.jupiter.api.BeforeEach; import org.junit.jupiter.params.ParameterizedTest; @@ -9,10 +10,6 @@ import org.monarchinitiative.svart.Coordinates; import org.monarchinitiative.svart.GenomicRegion; import org.monarchinitiative.svart.Strand; -import org.monarchinitiative.sgenes.model.Gene; -import org.monarchinitiative.sgenes.model.GeneIdentifier; -import org.monarchinitiative.sgenes.model.Transcript; -import org.monarchinitiative.sgenes.model.TranscriptIdentifier; import java.util.List; @@ -34,7 +31,8 @@ private static Gene makeGene(TestContig contig, int start, int end, TranscriptIdentifier txId = TranscriptIdentifier.of("TX1", "TX1_SYMBOL", null); List exons = makeExons(oneStart, oneEnd, twoStart, twoEnd, threeStart, threeEnd); Coordinates cdsCoordinates = Coordinates.of(CoordinateSystem.zeroBased(), start + 10, end - 10); - Transcript tx = Transcript.of(txId, location, exons, cdsCoordinates); + TranscriptMetadata metadata = TranscriptMetadata.of(TranscriptEvidence.CANONICAL); + Transcript tx = Transcript.of(txId, location, exons, cdsCoordinates, metadata); // make gene GeneIdentifier gId = GeneIdentifier.of("NCBIGene:123", "A", null, null); diff --git a/svanna-core/src/test/resources/gencode.10genes.v38.basic.annotation.json.gz b/svanna-core/src/test/resources/gencode.10genes.v38.basic.annotation.json.gz index 82558f05..7d50da6d 100644 Binary files a/svanna-core/src/test/resources/gencode.10genes.v38.basic.annotation.json.gz and b/svanna-core/src/test/resources/gencode.10genes.v38.basic.annotation.json.gz differ diff --git a/svanna-db/pom.xml b/svanna-db/pom.xml index ec116e2f..52878476 100644 --- a/svanna-db/pom.xml +++ b/svanna-db/pom.xml @@ -3,7 +3,7 @@ SvAnna org.monarchinitiative.svanna - 1.0.2 + 1.0.3 4.0.0 diff --git a/svanna-db/src/test/java/org/monarchinitiative/svanna/db/additive/dispatch/TadAwareDispatcherTest.java b/svanna-db/src/test/java/org/monarchinitiative/svanna/db/additive/dispatch/TadAwareDispatcherTest.java index 0d29c8cf..5bb5faf8 100644 --- a/svanna-db/src/test/java/org/monarchinitiative/svanna/db/additive/dispatch/TadAwareDispatcherTest.java +++ b/svanna-db/src/test/java/org/monarchinitiative/svanna/db/additive/dispatch/TadAwareDispatcherTest.java @@ -1,5 +1,6 @@ package org.monarchinitiative.svanna.db.additive.dispatch; +import org.monarchinitiative.sgenes.model.*; import org.monarchinitiative.svanna.core.priority.additive.Routes; import org.monarchinitiative.svanna.core.service.GeneService; import org.monarchinitiative.svanna.core.service.QueryResult; @@ -11,10 +12,6 @@ import org.junit.jupiter.api.Disabled; import org.junit.jupiter.api.Test; import org.monarchinitiative.svart.*; -import org.monarchinitiative.sgenes.model.Gene; -import org.monarchinitiative.sgenes.model.GeneIdentifier; -import org.monarchinitiative.sgenes.model.Transcript; -import org.monarchinitiative.sgenes.model.TranscriptIdentifier; import java.util.List; import java.util.Optional; @@ -48,7 +45,8 @@ private static Gene makeGene(String id, String symbol, Contig contig, int start, TranscriptIdentifier txId = TranscriptIdentifier.of(id + "_tx", symbol + "_tx", null); List exons = List.of(Coordinates.of(CoordinateSystem.zeroBased(), start, end)); Coordinates cdsCoordinates = Coordinates.of(CoordinateSystem.zeroBased(), start, end); - Transcript tx = Transcript.of(txId, location, exons, cdsCoordinates); + TranscriptMetadata metadata = TranscriptMetadata.of(TranscriptEvidence.CANONICAL); + Transcript tx = Transcript.of(txId, location, exons, cdsCoordinates, metadata); GeneIdentifier geneId = GeneIdentifier.of(id, symbol, null, null); return Gene.of(geneId, location, List.of(tx)); diff --git a/svanna-ingest/pom.xml b/svanna-ingest/pom.xml index 0549f147..8933ac32 100644 --- a/svanna-ingest/pom.xml +++ b/svanna-ingest/pom.xml @@ -3,7 +3,7 @@ SvAnna org.monarchinitiative.svanna - 1.0.2 + 1.0.3 4.0.0 svanna-ingest diff --git a/svanna-ingest/src/main/java/org/monarchinitiative/svanna/ingest/cmd/BuildDb.java b/svanna-ingest/src/main/java/org/monarchinitiative/svanna/ingest/cmd/BuildDb.java index c8764c12..7df25179 100644 --- a/svanna-ingest/src/main/java/org/monarchinitiative/svanna/ingest/cmd/BuildDb.java +++ b/svanna-ingest/src/main/java/org/monarchinitiative/svanna/ingest/cmd/BuildDb.java @@ -15,6 +15,9 @@ import org.apache.commons.io.IOUtils; import org.flywaydb.core.Flyway; import org.flywaydb.core.api.output.MigrateResult; +import org.monarchinitiative.phenol.annotations.assoc.GeneInfoGeneType; +import org.monarchinitiative.phenol.annotations.formats.hpo.*; +import org.monarchinitiative.phenol.annotations.io.hpo.HpoDiseaseLoaders; import org.monarchinitiative.svanna.core.LogUtils; import org.monarchinitiative.svanna.core.SvAnnaRuntimeException; import org.monarchinitiative.svanna.core.hpo.TermPair; @@ -45,12 +48,6 @@ import org.monarchinitiative.svanna.model.landscape.enhancer.Enhancer; import org.monarchinitiative.svanna.model.landscape.tad.TadBoundary; import org.monarchinitiative.svanna.model.landscape.variant.PopulationVariant; -import org.monarchinitiative.phenol.annotations.assoc.HpoAssociationLoader; -import org.monarchinitiative.phenol.annotations.base.Ratio; -import org.monarchinitiative.phenol.annotations.formats.hpo.HpoAssociationData; -import org.monarchinitiative.phenol.annotations.formats.hpo.HpoDisease; -import org.monarchinitiative.phenol.annotations.formats.hpo.HpoDiseaseAnnotation; -import org.monarchinitiative.phenol.annotations.formats.hpo.HpoDiseases; import org.monarchinitiative.phenol.annotations.io.hpo.DiseaseDatabase; import org.monarchinitiative.phenol.annotations.io.hpo.HpoDiseaseLoader; import org.monarchinitiative.phenol.annotations.io.hpo.HpoDiseaseLoaderOptions; @@ -239,9 +236,13 @@ private static PhenotypeData downloadPhenotypeFiles(PhenotypeProperties properti LOGGER.debug("Parsing gene info file at {}", geneInfoPath.toAbsolutePath()); LOGGER.debug("Parsing MIM to gene medgen file at {}", mim2geneMedgenPath.toAbsolutePath()); HpoDiseaseLoaderOptions loaderOptions = HpoDiseaseLoaderOptions.of(DISEASE_DATABASES, true, HpoDiseaseLoaderOptions.DEFAULT_COHORT_SIZE); - HpoDiseaseLoader loader = HpoDiseaseLoader.of(hpo, loaderOptions); + HpoDiseaseLoader loader = HpoDiseaseLoaders.defaultLoader(hpo, loaderOptions); HpoDiseases diseases = loader.load(hpoAnnotationsPath); - HpoAssociationData hpoAssociationData = HpoAssociationLoader.loadHpoAssociationData(hpo, geneInfoPath, mim2geneMedgenPath, null, diseases); + HpoAssociationData hpoAssociationData = HpoAssociationData.builder(hpo) + .hpoDiseases(diseases) + .mim2GeneMedgen(mim2geneMedgenPath) + .homoSapiensGeneInfo(geneInfoPath, GeneInfoGeneType.DEFAULT) + .build(); // Ingest geneToDisease int updatedGeneToDisease = ingestGeneToDiseaseMap(hpoAssociationData, ncbiGeneToHgnc, diseases, geneDiseaseDao); @@ -313,7 +314,7 @@ private static int ingestGeneToDiseaseMap(HpoAssociationData hpoAssociationData, Map> geneToDisease = new HashMap<>(); // extract relevant bits and pieces for diseases, and map NCBIGene to HGNC - Map> geneToDiseaseIdMap = hpoAssociationData.geneToDiseases(); + Map> geneToDiseaseIdMap = hpoAssociationData.associations().geneIdToDiseaseIds(); Map diseaseMap = diseases.diseaseById(); for (TermId ncbiGeneTermId : geneToDiseaseIdMap.keySet()) { @@ -341,9 +342,7 @@ private static int ingestDiseaseToPhenotypes(GeneDiseaseDao geneDiseaseDao, HpoD int updated = 0; for (HpoDisease disease : diseases) { - List presentPhenotypeTermIds = disease.phenotypicAbnormalitiesStream() - // We assume that the terms with missing ratio are observed/present. - .filter(pa -> pa.ratio().map(Ratio::isPositive).orElse(true)) + List presentPhenotypeTermIds = disease.presentAnnotationsStream() .map(HpoDiseaseAnnotation::id) .collect(Collectors.toList()); updated += geneDiseaseDao.insertDiseaseToPhenotypes(disease.id().getValue(), presentPhenotypeTermIds); diff --git a/svanna-ingest/src/main/java/org/monarchinitiative/svanna/ingest/similarity/IcMicaCalculator.java b/svanna-ingest/src/main/java/org/monarchinitiative/svanna/ingest/similarity/IcMicaCalculator.java index de50f09e..fed0d009 100644 --- a/svanna-ingest/src/main/java/org/monarchinitiative/svanna/ingest/similarity/IcMicaCalculator.java +++ b/svanna-ingest/src/main/java/org/monarchinitiative/svanna/ingest/similarity/IcMicaCalculator.java @@ -2,7 +2,6 @@ import org.monarchinitiative.svanna.core.LogUtils; import org.monarchinitiative.svanna.core.hpo.TermPair; -import org.monarchinitiative.phenol.annotations.base.Ratio; import org.monarchinitiative.phenol.annotations.formats.hpo.HpoDisease; import org.monarchinitiative.phenol.annotations.formats.hpo.HpoDiseaseAnnotation; import org.monarchinitiative.phenol.annotations.formats.hpo.HpoDiseases; @@ -37,9 +36,7 @@ public static Map precomputeIcMicaValues(Ontology ontology, diseaseIdToTermIds.putIfAbsent(diseaseId, new HashSet<>()); // add term ancestors - Set hpoTerms = disease.phenotypicAbnormalitiesStream() - // We assume that the terms with missing ratio are observed/present. - .filter(a -> a.ratio().map(Ratio::isPositive).orElse(true)) + Set hpoTerms = disease.presentAnnotationsStream() .map(HpoDiseaseAnnotation::id) .collect(Collectors.toSet()); Set inclAncestorTermIds = TermIds.augmentWithAncestors(ontology, hpoTerms, true); diff --git a/svanna-ingest/src/test/java/org/monarchinitiative/svanna/ingest/MakeSmallGencodeFileTest.java b/svanna-ingest/src/test/java/org/monarchinitiative/svanna/ingest/MakeSmallGencodeFileTest.java index 5e62b4bd..72d9a5b3 100644 --- a/svanna-ingest/src/test/java/org/monarchinitiative/svanna/ingest/MakeSmallGencodeFileTest.java +++ b/svanna-ingest/src/test/java/org/monarchinitiative/svanna/ingest/MakeSmallGencodeFileTest.java @@ -29,11 +29,12 @@ public class MakeSmallGencodeFileTest { @Test public void makeSmallGencodeFile() throws Exception { - Path gencodeGtf = Path.of("/home/ielis/data/gencode/gencode.v39.basic.annotation.gtf.gz"); - Path output = Path.of("../svanna-core/src/test/resources/gencode.10genes.v38.basic.annotation.json.gz"); + Path gencodeGtf = Path.of("/home/ielis/data/genes/gtf/hg38/gencode.v39.basic.annotation.gtf.gz"); + Path outputCore = Path.of("../svanna-core/src/test/resources/gencode.10genes.v38.basic.annotation.json.gz"); + Path outputCli = Path.of("../svanna-cli/src/test/resources/gencode.10genes.v38.basic.annotation.json.gz"); // read Gencode genes & keep the target genes - GtfGeneParser parser = GtfGeneParserFactory.gtfGeneParser(gencodeGtf, ASSEMBLY); + GtfGeneParser parser = GtfGeneParserFactory.gencodeGeneParser(gencodeGtf, ASSEMBLY); Set targetGeneSymbols = Set.of("SURF1", "SURF2", "FBN1", "ZNF436", "ZBTB48", "HNF4A", "GCK", "BRCA2", "COL4A5", "SRY"); @@ -44,7 +45,10 @@ public void makeSmallGencodeFile() throws Exception { // write the target genes into the output GeneParserFactory parserFactory = GeneParserFactory.of(ASSEMBLY); GeneParser printer = parserFactory.forFormat(SerializationFormat.JSON); - try (OutputStream os = new BufferedOutputStream(new GzipCompressorOutputStream(Files.newOutputStream(output)))) { + try (OutputStream os = new BufferedOutputStream(new GzipCompressorOutputStream(Files.newOutputStream(outputCore)))) { + printer.write(targetGenes, os); + } + try (OutputStream os = new BufferedOutputStream(new GzipCompressorOutputStream(Files.newOutputStream(outputCli)))) { printer.write(targetGenes, os); } } diff --git a/svanna-io/pom.xml b/svanna-io/pom.xml index 98268a6e..d103020c 100644 --- a/svanna-io/pom.xml +++ b/svanna-io/pom.xml @@ -3,7 +3,7 @@ SvAnna org.monarchinitiative.svanna - 1.0.2 + 1.0.3 4.0.0 @@ -23,6 +23,10 @@ com.github.samtools htsjdk + + org.apache.commons + commons-compress + org.monarchinitiative.phenol phenol-annotations diff --git a/svanna-model/pom.xml b/svanna-model/pom.xml index 8f563ee6..87eca9af 100644 --- a/svanna-model/pom.xml +++ b/svanna-model/pom.xml @@ -3,7 +3,7 @@ SvAnna org.monarchinitiative.svanna - 1.0.2 + 1.0.3 4.0.0 diff --git a/svanna-test/pom.xml b/svanna-test/pom.xml index a36c0067..c5d40d9c 100644 --- a/svanna-test/pom.xml +++ b/svanna-test/pom.xml @@ -3,7 +3,7 @@ SvAnna org.monarchinitiative.svanna - 1.0.2 + 1.0.3 4.0.0