Skip to content

Commit

Permalink
adding data resolver with tests, renaming package from jacksonlaborat…
Browse files Browse the repository at this point in the history
…ory to jax
  • Loading branch information
iimpulse committed Oct 24, 2023
1 parent 64b4e54 commit 254ce91
Show file tree
Hide file tree
Showing 34 changed files with 333 additions and 80 deletions.
3 changes: 2 additions & 1 deletion oan-etl/build.gradle
Original file line number Diff line number Diff line change
Expand Up @@ -4,14 +4,15 @@ plugins {
id('jacoco')
}

group = 'org.jacksonlaboratory'
group = 'org.jax'
version = '0.1'

repositories {
mavenCentral()
}

dependencies {
implementation project(':oan-model')
annotationProcessor("info.picocli:picocli-codegen")
annotationProcessor("io.micronaut.serde:micronaut-serde-processor")
testAnnotationProcessor("io.micronaut:micronaut-inject-java")
Expand Down

This file was deleted.

Original file line number Diff line number Diff line change
@@ -1,13 +1,14 @@
package org.jacksonlaboratory;
package org.jax.oan;

import io.micronaut.configuration.picocli.PicocliRunner;
import jakarta.inject.Inject;
import org.jacksonlaboratory.graph.Operations;
import org.jacksonlaboratory.ontology.HpoGraphLoader;
import org.jax.oan.exception.OntologyAnnotationNetworkRuntimeException;
import org.jax.oan.graph.Operations;
import org.jax.oan.ontology.HpoGraphLoader;
import picocli.CommandLine.Command;
import picocli.CommandLine.Option;

import java.io.IOException;
import java.nio.file.Path;
import java.util.List;

@Command(name = "graph",
Expand All @@ -22,7 +23,7 @@ public class GraphCommand implements Runnable {
Operations operations;

@Option(names = {"-d", "--data"}, description = "The directory with the data.")
String path;
Path path;

@Option(names = {"-m", "--modules"}, description = "The list of modules to load into the graph.")
List<String> modules;
Expand All @@ -35,8 +36,8 @@ public void run() {
try {
operations.truncate();
hpoGraphLoader.load(path);
} catch (IOException e) {
throw new RuntimeException(e);
} catch (Exception e) {
throw new OntologyAnnotationNetworkRuntimeException(e);
}
}
}
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
package org.jacksonlaboratory.graph;
package org.jax.oan.graph;

import jakarta.inject.Singleton;
import org.jacksonlaboratory.model.OntologyModule;
import org.jax.oan.core.OntologyModule;
import org.neo4j.driver.Driver;
import org.neo4j.driver.Transaction;
import org.slf4j.Logger;
Expand Down
10 changes: 10 additions & 0 deletions oan-etl/src/main/java/org/jax/oan/ontology/GraphLoader.java
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
package org.jax.oan.ontology;

import org.jax.oan.exception.OntologyAnnotationNetworkException;

import java.io.IOException;
import java.nio.file.Path;

public interface GraphLoader {
void load(Path dataDirectory) throws IOException, OntologyAnnotationNetworkException;
}
70 changes: 70 additions & 0 deletions oan-etl/src/main/java/org/jax/oan/ontology/HpoDataResolver.java
Original file line number Diff line number Diff line change
@@ -0,0 +1,70 @@
package org.jax.oan.ontology;

import org.jax.oan.exception.OntologyAnnotationNetworkDataException;
import org.jax.oan.exception.OntologyAnnotationNetworkException;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

import java.nio.file.Files;
import java.nio.file.Path;
import java.util.List;
import java.util.Objects;

public class HpoDataResolver {
private static final Logger LOGGER = LoggerFactory.getLogger(HpoDataResolver.class);

private final Path dataDirectory;
public static HpoDataResolver of(Path dataDirectory) throws OntologyAnnotationNetworkException {
return new HpoDataResolver(dataDirectory);
}

private HpoDataResolver(Path dataDirectory) throws OntologyAnnotationNetworkException {
Objects.requireNonNull(dataDirectory, "Data directory must not be null!");
this.dataDirectory = dataDirectory;
validateHpoFiles();
}

private void validateHpoFiles() throws OntologyAnnotationNetworkException {
boolean error = false;
List<Path> requiredFiles = List.of(hpJson(), hgncCompleteSet(), mim2geneMedgen(), phenotypeAnnotations(),
orpha2Gene(), loinc());
for (Path file : requiredFiles) {
if (!Files.isRegularFile(file)) {
LOGGER.error("Missing required file `{}` in `{}`.", file.toFile().getName(), dataDirectory.toAbsolutePath());
error = true;
}
}
if (error) {
throw new OntologyAnnotationNetworkDataException("Missing one or more required files in OntologyAnnotationNetwork data directory!");
}
}


public Path dataDirectory() {
return dataDirectory;
}

public Path hpJson(){
return dataDirectory.resolve("hp-simple-non-classified.json");
}

public Path hgncCompleteSet() {
return dataDirectory.resolve("hgnc_complete_set.txt");
}

public Path mim2geneMedgen() {
return dataDirectory.resolve("mim2gene_medgen");
}

public Path phenotypeAnnotations() {
return dataDirectory.resolve("phenotype.hpoa");
}

public Path orpha2Gene(){
return dataDirectory.resolve("en_product6.xml");
}

public Path loinc(){
return dataDirectory.resolve("loinc2hpo-annotations-merged.tsv");
}
}
Original file line number Diff line number Diff line change
@@ -1,9 +1,10 @@
package org.jacksonlaboratory.ontology;
package org.jax.oan.ontology;

import io.micronaut.context.annotation.Context;
import org.jacksonlaboratory.graph.Operations;
import org.jacksonlaboratory.model.OntologyModule;
import org.monarchinitiative.phenol.annotations.assoc.MissingPhenolResourceException;
import org.jax.oan.exception.OntologyAnnotationNetworkException;
import org.jax.oan.exception.OntologyAnnotationNetworkRuntimeException;
import org.jax.oan.graph.Operations;
import org.jax.oan.core.OntologyModule;
import org.monarchinitiative.phenol.annotations.formats.AnnotationReference;
import org.monarchinitiative.phenol.annotations.formats.hpo.HpoAssociationData;
import org.monarchinitiative.phenol.annotations.formats.hpo.HpoOnset;
Expand All @@ -21,7 +22,6 @@
import org.slf4j.LoggerFactory;

import java.io.BufferedReader;
import java.io.File;
import java.io.FileReader;
import java.io.IOException;
import java.nio.file.Path;
Expand All @@ -44,17 +44,12 @@ public HpoGraphLoader(Driver driver, Operations operations) {
this.operations = operations;
}
@Override
public void load(String folder) throws IOException, MissingPhenolResourceException {
final File ontologyFile = new File(String.format("data/%s-simple-non-classified.json", "hp"));
final Ontology hpoOntology = OntologyLoader.loadOntology(ontologyFile);
final Path hgncPath = new File("data/hgnc_complete_set.txt").toPath();
final Path omimToGenePath = new File("data/mim2gene_medgen").toPath();
final Path hpoaFilePath = new File("data/phenotype.hpoa").toPath();
final Path orphaToGenePath = new File("data/en_product6.xml").toPath();
final File loincPath = new File("data/loinc2hpo-annotations-merged.tsv");
final HpoaDiseaseDataContainer diseases = HpoaDiseaseDataLoader.of(Set.of(DiseaseDatabase.OMIM, DiseaseDatabase.ORPHANET)).loadDiseaseData(hpoaFilePath);
final HpoAssociationData associations = HpoAssociationData.builder(hpoOntology).orphaToGenePath(orphaToGenePath).mim2GeneMedgen(omimToGenePath)
.hpoDiseases(diseases).hgncCompleteSetArchive(hgncPath).build();
public void load(Path hpoDataDirectory) throws IOException, OntologyAnnotationNetworkException {
final HpoDataResolver dataResolver = HpoDataResolver.of(hpoDataDirectory);
final Ontology hpoOntology = OntologyLoader.loadOntology(dataResolver.hpJson().toFile());
final HpoaDiseaseDataContainer diseases = HpoaDiseaseDataLoader.of(Set.of(DiseaseDatabase.OMIM, DiseaseDatabase.ORPHANET)).loadDiseaseData(dataResolver.phenotypeAnnotations());
final HpoAssociationData associations = HpoAssociationData.builder(hpoOntology).orphaToGenePath(dataResolver.orpha2Gene()).mim2GeneMedgen(dataResolver.mim2geneMedgen())
.hpoDiseases(diseases).hgncCompleteSetArchive(dataResolver.hgncCompleteSet()).build();
List<TermId> phenotypes = diseases.stream().flatMap(d -> d.annotationLines().stream().map(HpoAnnotationLine::phenotypeTermId)).distinct().toList();
operations.dropIndexes(OntologyModule.HPO);
try (Session session = driver.session()) {
Expand All @@ -64,14 +59,14 @@ public void load(String folder) throws IOException, MissingPhenolResourceExcepti
operations.createIndexes(OntologyModule.HPO);
diseaseToPhenotype(session, diseases, hpoOntology);
diseaseToGene(session, associations);
assayToPhenotype(session, loincPath);
assayToPhenotype(session, dataResolver.loinc());
}
}

protected void assayToPhenotype(Session session, File loinc){
protected void assayToPhenotype(Session session, Path loinc){
logger.info("Loading Assay Relationships...");
Transaction tx = session.beginTransaction();
try (BufferedReader reader = new BufferedReader(new FileReader(loinc))) {
try (BufferedReader reader = new BufferedReader(new FileReader(loinc.toFile()))) {
String line;
reader.readLine();
while ((line = reader.readLine()) != null) {
Expand All @@ -86,8 +81,7 @@ protected void assayToPhenotype(Session session, File loinc){
tx.commit();
tx.close();
} catch (IOException e) {
// TODO: throw oan-error
e.printStackTrace();
throw new OntologyAnnotationNetworkRuntimeException("There was a problem with the required assay file format.");
}
}

Expand Down
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
package org.jacksonlaboratory.graph;
package org.jax.oan.graph;

import io.micronaut.test.extensions.junit5.annotation.MicronautTest;
import org.jacksonlaboratory.model.OntologyModule;
import org.jax.oan.core.OntologyModule;
import org.junit.jupiter.api.Test;
import org.neo4j.driver.Driver;
import org.neo4j.driver.Record;
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,29 @@
package org.jax.oan.ontology;

import org.jax.oan.exception.OntologyAnnotationNetworkDataException;
import org.jax.oan.exception.OntologyAnnotationNetworkException;
import org.junit.jupiter.api.Test;

import java.nio.file.Path;

import static org.junit.jupiter.api.Assertions.*;

class HpoDataResolverTest {

@Test
void of() throws OntologyAnnotationNetworkException {
Path dataDirectory = Path.of("src/test/resources");
HpoDataResolver dataResolver = HpoDataResolver.of(dataDirectory);
assertEquals(dataResolver.hpJson(), dataDirectory.resolve("hp-simple-non-classified.json"));
assertEquals(dataResolver.mim2geneMedgen(), dataDirectory.resolve("mim2gene_medgen"));
assertEquals(dataResolver.hgncCompleteSet(), dataDirectory.resolve("hgnc_complete_set.txt"));
assertEquals(dataResolver.phenotypeAnnotations(), dataDirectory.resolve("phenotype.hpoa"));
assertEquals(dataResolver.loinc(), dataDirectory.resolve("loinc2hpo-annotations-merged.tsv"));
assertEquals(dataDirectory, dataResolver.dataDirectory());
}

@Test
void error() {
assertThrows(OntologyAnnotationNetworkDataException.class, () -> HpoDataResolver.of(Path.of("src/main/resources")));
}
}
Original file line number Diff line number Diff line change
@@ -1,8 +1,8 @@
package org.jacksonlaboratory.ontology;
package org.jax.oan.ontology;

import io.micronaut.core.io.ResourceLoader;
import io.micronaut.test.extensions.junit5.annotation.MicronautTest;
import org.jacksonlaboratory.graph.Operations;
import org.jax.oan.exception.OntologyAnnotationNetworkException;
import org.jax.oan.graph.Operations;
import org.junit.jupiter.api.Test;
import org.monarchinitiative.phenol.annotations.formats.AnnotationReference;
import org.monarchinitiative.phenol.annotations.formats.EvidenceCode;
Expand All @@ -19,7 +19,6 @@
import org.neo4j.driver.types.Node;

import java.io.IOException;
import java.net.URISyntaxException;
import java.nio.file.Path;
import java.util.List;
import java.util.Set;
Expand All @@ -37,18 +36,16 @@ class HpoGraphLoaderTest {

final Session session;

public HpoGraphLoaderTest(Driver driver, ResourceLoader resourceLoader, HpoGraphLoader graphLoader, Operations operations) throws IOException, URISyntaxException {
public HpoGraphLoaderTest(Driver driver, HpoGraphLoader graphLoader, Operations operations) throws IOException, OntologyAnnotationNetworkException {
this.graphLoader = graphLoader;
this.operations = operations;
Ontology ontology = OntologyLoader.loadOntology(resourceLoader.getResourceAsStream("classpath:hp.json").get());
final HpoaDiseaseDataContainer container = HpoaDiseaseDataLoader.of(Set.of(DiseaseDatabase.OMIM, DiseaseDatabase.ORPHANET)).loadDiseaseData(resourceLoader.getResourceAsStream("classpath:phenotype.hpoa").get());
HpoDataResolver hpoDataResolver = HpoDataResolver.of(Path.of("src/test/resources"));
Ontology ontology = OntologyLoader.loadOntology(hpoDataResolver.hpJson().toFile());
final HpoaDiseaseDataContainer container = HpoaDiseaseDataLoader.of(Set.of(DiseaseDatabase.OMIM, DiseaseDatabase.ORPHANET)).loadDiseaseData(hpoDataResolver.phenotypeAnnotations());
session = driver.session();
final Path mim2gene = Path.of(resourceLoader.getResource("classpath:mim2gene_medgen_short").get().toURI());
final Path hgncPath = Path.of(resourceLoader.getResource("classpath:hgnc_2gene.txt").get().toURI());
final HpoAssociationData associations = HpoAssociationData.builder(ontology).mim2GeneMedgen(mim2gene)
.hpoDiseases(container).hgncCompleteSetArchive(hgncPath).build();
final Path loincPath = Path.of(resourceLoader.getResource("classpath:loinc-annotations.tsv").get().toURI());
configureGraph(associations, container, ontology, loincPath);
final HpoAssociationData associations = HpoAssociationData.builder(ontology).mim2GeneMedgen(hpoDataResolver.mim2geneMedgen())
.hpoDiseases(container).hgncCompleteSetArchive(hpoDataResolver.hgncCompleteSet()).build();
configureGraph(associations, container, ontology, hpoDataResolver.loinc());
}

void configureGraph(HpoAssociationData associations, HpoaDiseaseDataContainer container, Ontology ontology,
Expand All @@ -60,7 +57,7 @@ void configureGraph(HpoAssociationData associations, HpoaDiseaseDataContainer co
graphLoader.diseaseToPhenotype(session, container, ontology);
graphLoader.genes(session, associations);
graphLoader.diseaseToGene(session, associations);
graphLoader.assayToPhenotype(session, loincPath.toFile());
graphLoader.assayToPhenotype(session, loincPath);
}

@Test
Expand Down
Loading

0 comments on commit 254ce91

Please sign in to comment.