From f50aa34277997616cb2f661ad5f9ae380d2c2a21 Mon Sep 17 00:00:00 2001 From: Jim Balhoff Date: Thu, 30 Jun 2016 23:37:28 -0400 Subject: [PATCH 1/2] In-progress work on HPO annotation conversion to PhenoPacket. --- build.sbt | 3 +- .../org/phenopackets/pxftools/Main.scala | 3 +- .../pxftools/command/Import.scala | 18 ++++ .../pxftools/util/HPOAnnotations.scala | 84 +++++++++++++++++++ .../pxftools/util/PhenoPacketVocabulary.scala | 20 +++++ 5 files changed, 126 insertions(+), 2 deletions(-) create mode 100644 src/main/scala/org/phenopackets/pxftools/command/Import.scala create mode 100644 src/main/scala/org/phenopackets/pxftools/util/HPOAnnotations.scala create mode 100644 src/main/scala/org/phenopackets/pxftools/util/PhenoPacketVocabulary.scala diff --git a/build.sbt b/build.sbt index 2ae1eea..754d529 100644 --- a/build.sbt +++ b/build.sbt @@ -20,7 +20,7 @@ javaOptions += "-Xmx4G" libraryDependencies ++= { Seq( - "org.phenopackets" % "phenopackets-api" % "0.0.4", + "org.phenopackets" % "phenopackets-api" % "0.0.5-SNAPSHOT", "org.backuity.clist" %% "clist-core" % "2.0.2", "org.backuity.clist" %% "clist-macros" % "2.0.2" % "provided", "net.sourceforge.owlapi" % "owlapi-distribution" % "4.2.5", @@ -28,6 +28,7 @@ libraryDependencies ++= { "com.github.jsonld-java" % "jsonld-java" % "0.8.3", "org.apache.directory.studio" % "org.apache.commons.io" % "2.4", "org.scalaz" %% "scalaz-core" % "7.2.1", + "com.github.tototoshi" %% "scala-csv" % "1.3.3", "com.typesafe.scala-logging" %% "scala-logging" % "3.4.0", "ch.qos.logback" % "logback-classic" % "1.1.7", "org.codehaus.groovy" % "groovy-all" % "2.4.6", diff --git a/src/main/scala/org/phenopackets/pxftools/Main.scala b/src/main/scala/org/phenopackets/pxftools/Main.scala index bf47a3b..d073efb 100644 --- a/src/main/scala/org/phenopackets/pxftools/Main.scala +++ b/src/main/scala/org/phenopackets/pxftools/Main.scala @@ -3,9 +3,10 @@ package org.phenopackets.pxftools import org.backuity.clist._ import org.phenopackets.pxftools.command.Convert import org.phenopackets.pxftools.command.Merge +import org.phenopackets.pxftools.command.Import object Main extends App { - Cli.parse(args).withProgramName("pxftools").withCommands(Convert, Merge).foreach(_.run()) + Cli.parse(args).withProgramName("pxftools").withCommands(Convert, Merge, Import).foreach(_.run()) } \ No newline at end of file diff --git a/src/main/scala/org/phenopackets/pxftools/command/Import.scala b/src/main/scala/org/phenopackets/pxftools/command/Import.scala new file mode 100644 index 0000000..54346eb --- /dev/null +++ b/src/main/scala/org/phenopackets/pxftools/command/Import.scala @@ -0,0 +1,18 @@ +package org.phenopackets.pxftools.command + +import org.backuity.clist._ +import com.github.tototoshi.csv.CSVReader +import scala.io.Source +import com.github.tototoshi.csv.TSVFormat +import org.phenopackets.pxftools.util.HPOAnnotations + +object Import extends Command(description = "Create a PhenoPacket from the input.") with Common with SingleInput { + + var inFormat = opt[String](description = "Input format. One of:\nhpoa\nturtle", default = "turtle") + + override def run(): Unit = { + val table = CSVReader.open(Source.fromInputStream(determineInput, "utf-8"))(new TSVFormat {}) + writePhenoPacket(HPOAnnotations.importFromTable(table), determineOutput, outputWriter) + } + +} diff --git a/src/main/scala/org/phenopackets/pxftools/util/HPOAnnotations.scala b/src/main/scala/org/phenopackets/pxftools/util/HPOAnnotations.scala new file mode 100644 index 0000000..8340feb --- /dev/null +++ b/src/main/scala/org/phenopackets/pxftools/util/HPOAnnotations.scala @@ -0,0 +1,84 @@ +package org.phenopackets.pxftools.util + +import java.util.UUID + +import scala.collection.JavaConverters._ +import scala.collection.mutable + +import org.apache.jena.rdf.model.ModelFactory +import org.apache.jena.rdf.model.Resource +import org.apache.jena.rdf.model.ResourceFactory +import org.apache.jena.rdf.model.Statement +import org.phenopackets.api.PhenoPacket +import org.phenopackets.api.io.RDFReader +import org.phenopackets.api.util.ContextUtil + +import com.github.jsonldjava.core.Context +import com.github.tototoshi.csv.CSVReader + +import org.phenopackets.pxftools.util.PhenoPacketVocabulary._ +import org.apache.jena.vocabulary.RDFS +import org.apache.jena.vocabulary.RDF +import org.apache.jena.vocabulary.DC + +object HPOAnnotations { + + def importFromTable(table: CSVReader): PhenoPacket = { + val packetURI = s"urn:uuid:${UUID.randomUUID.toString}" + val packet = ResourceFactory.createResource(packetURI) + val triples = table.iteratorWithHeaders.flatMap(rowToTriples(_, packet)).toSeq + val model = ModelFactory.createDefaultModel() + model.add(triples.asJava) + model.listStatements().asScala.foreach(println) + RDFReader.readModel(model, packetURI) + } + + private def rowToTriples(row: Map[String, String], packet: Resource): Set[Statement] = { + val statements = mutable.Set.empty[Statement] + row.get("Disease ID").filter(_.nonEmpty).foreach { diseaseID => + val disease = ResourceFactory.createResource(ContextUtil.expandIdentifierAsValue(diseaseID.trim, HPOContext)) + statements += ResourceFactory.createStatement(packet, Diseases, disease) + row.get("Disease Name").filter(_.nonEmpty).foreach { diseaseLabel => + statements += ResourceFactory.createStatement(disease, RDFS.label, ResourceFactory.createTypedLiteral(diseaseLabel.trim)) + } + row.get("Phenotype ID").filter(_.nonEmpty).foreach { phenotypeID => + // will we ever want to add values from other fields even if there is no phenotype class ID? + val phenotypeType = ResourceFactory.createResource(ContextUtil.expandIdentifierAsValue(phenotypeID.trim, HPOContext)) + val association = ResourceFactory.createResource() + statements += ResourceFactory.createStatement(packet, PhenotypeProfile, association) + statements += ResourceFactory.createStatement(association, Entity, disease) + val phenotype = ResourceFactory.createResource() + statements += ResourceFactory.createStatement(association, Phenotype, phenotype) + statements += ResourceFactory.createStatement(phenotype, RDF.`type`, phenotypeType) + row.get("Phenotype Name").filter(_.nonEmpty).foreach { phenotypeLabel => + statements += ResourceFactory.createStatement(phenotypeType, RDFS.label, ResourceFactory.createTypedLiteral(phenotypeLabel.trim)) + } + row.get("Description").filter(_.nonEmpty).foreach { description => + statements += ResourceFactory.createStatement(phenotype, Description, ResourceFactory.createTypedLiteral(description.trim)) + } + if (row.get("Evidence ID").nonEmpty || row.get("Pub").nonEmpty) { + val evidence = ResourceFactory.createResource() + statements += ResourceFactory.createStatement(association, Evidence, evidence) + row.get("Evidence ID").filter(_.nonEmpty).foreach { evidenceID => + val evidenceType = ResourceFactory.createResource(evidenceID.trim) //FIXME + statements += ResourceFactory.createStatement(evidence, RDF.`type`, evidenceType) + row.get("Evidence Name").filter(_.nonEmpty).foreach { evidenceName => + statements += ResourceFactory.createStatement(evidenceType, RDFS.label, ResourceFactory.createTypedLiteral(evidenceName.trim)) + } + } + row.get("Pub").filter(_.nonEmpty).foreach { pubID => + val pub = ResourceFactory.createResource(ContextUtil.expandIdentifierAsValue(pubID.trim, HPOContext)) + statements += ResourceFactory.createStatement(evidence, Source, pub) + } + } + } + } + statements.toSet + } + + private val HPOContext: Context = new Context().parse(Map[String, Object]( + "obo" -> "http://purl.obolibrary.org/obo/", + "HP" -> "obo:HP_", + "OMIM" -> "obo:OMIM_").asJava) + +} \ No newline at end of file diff --git a/src/main/scala/org/phenopackets/pxftools/util/PhenoPacketVocabulary.scala b/src/main/scala/org/phenopackets/pxftools/util/PhenoPacketVocabulary.scala new file mode 100644 index 0000000..366faa6 --- /dev/null +++ b/src/main/scala/org/phenopackets/pxftools/util/PhenoPacketVocabulary.scala @@ -0,0 +1,20 @@ +package org.phenopackets.pxftools.util + +import org.apache.jena.rdf.model.ResourceFactory + +object PhenoPacketVocabulary { + + private val Pheno = "http://phenopackets.org" + private val DC = "http://purl.org/dc/terms" + + private def p = ResourceFactory.createProperty(_: String) + + val Diseases = p(s"$Pheno/diseases") + val PhenotypeProfile = p(s"$Pheno/phenotype_profile") + val Entity = p(s"$Pheno/entity") + val Phenotype = p(s"$Pheno/phenotype") + val Evidence = p(s"$Pheno/evidence") + val Description = p(s"$DC/description") + val Source = p(s"$DC/source") + +} \ No newline at end of file From dc50e9a3b6856eef5e960b84bc1b44c533050858 Mon Sep 17 00:00:00 2001 From: Jim Balhoff Date: Wed, 6 Jul 2016 17:09:12 -0400 Subject: [PATCH 2/2] Initial implementation of HPO Phenote annotation conversion. --- build.sbt | 7 +- .../org/phenopackets/pxftools/Main.scala | 3 +- .../pxftools/command/Common.scala | 52 +++++--- .../pxftools/command/Import.scala | 18 --- .../pxftools/util/HPOAnnotations.scala | 119 ++++++++++++------ .../pxftools/util/MergeUtil.scala | 3 +- .../pxftools/util/PhenoPacketVocabulary.scala | 5 +- 7 files changed, 127 insertions(+), 80 deletions(-) delete mode 100644 src/main/scala/org/phenopackets/pxftools/command/Import.scala diff --git a/build.sbt b/build.sbt index 754d529..230edcb 100644 --- a/build.sbt +++ b/build.sbt @@ -20,15 +20,18 @@ javaOptions += "-Xmx4G" libraryDependencies ++= { Seq( - "org.phenopackets" % "phenopackets-api" % "0.0.5-SNAPSHOT", + "org.phenopackets" % "phenopackets-api" % "0.0.5-SNAPSHOT" exclude("org.slf4j", "slf4j-log4j12"), "org.backuity.clist" %% "clist-core" % "2.0.2", "org.backuity.clist" %% "clist-macros" % "2.0.2" % "provided", "net.sourceforge.owlapi" % "owlapi-distribution" % "4.2.5", - "org.apache.jena" % "apache-jena-libs" % "3.1.0", + "org.phenoscape" %% "scowl" % "1.1", + "org.apache.jena" % "apache-jena-libs" % "2.12.1" exclude("org.slf4j", "slf4j-log4j12"), "com.github.jsonld-java" % "jsonld-java" % "0.8.3", "org.apache.directory.studio" % "org.apache.commons.io" % "2.4", "org.scalaz" %% "scalaz-core" % "7.2.1", "com.github.tototoshi" %% "scala-csv" % "1.3.3", + "com.nrinaudo" %% "kantan.csv" % "0.1.12", + "com.nrinaudo" %% "kantan.csv-generic" % "0.1.12", "com.typesafe.scala-logging" %% "scala-logging" % "3.4.0", "ch.qos.logback" % "logback-classic" % "1.1.7", "org.codehaus.groovy" % "groovy-all" % "2.4.6", diff --git a/src/main/scala/org/phenopackets/pxftools/Main.scala b/src/main/scala/org/phenopackets/pxftools/Main.scala index d073efb..bf47a3b 100644 --- a/src/main/scala/org/phenopackets/pxftools/Main.scala +++ b/src/main/scala/org/phenopackets/pxftools/Main.scala @@ -3,10 +3,9 @@ package org.phenopackets.pxftools import org.backuity.clist._ import org.phenopackets.pxftools.command.Convert import org.phenopackets.pxftools.command.Merge -import org.phenopackets.pxftools.command.Import object Main extends App { - Cli.parse(args).withProgramName("pxftools").withCommands(Convert, Merge, Import).foreach(_.run()) + Cli.parse(args).withProgramName("pxftools").withCommands(Convert, Merge).foreach(_.run()) } \ No newline at end of file diff --git a/src/main/scala/org/phenopackets/pxftools/command/Common.scala b/src/main/scala/org/phenopackets/pxftools/command/Common.scala index f025723..1b7db31 100644 --- a/src/main/scala/org/phenopackets/pxftools/command/Common.scala +++ b/src/main/scala/org/phenopackets/pxftools/command/Common.scala @@ -11,28 +11,38 @@ import java.io.OutputStream import java.io.OutputStreamWriter import org.apache.commons.io.IOUtils +import org.apache.jena.riot.Lang import org.backuity.clist._ import org.phenopackets.api.PhenoPacket import org.phenopackets.api.io.JsonGenerator import org.phenopackets.api.io.JsonReader +import org.phenopackets.api.io.RDFGenerator import org.phenopackets.api.io.YamlGenerator import org.phenopackets.api.io.YamlReader -import org.phenopackets.api.io.RDFGenerator -import org.apache.jena.riot.Lang +import org.phenopackets.pxftools.util.HPOAnnotations trait Common extends Command { type PhenoPacketWriter = PhenoPacket => String + type PhenoPacketReader = InputStream => PhenoPacket def run(): Unit var out = opt[String](description = "Output file. Omit to write to standard out.", default = "") - var format = opt[String](description = "Output format. Set the output format to one of:\nyaml\njson\nturtle", default = "yaml") + var informat = opt[Option[String]](description = "Input format. By default both yaml and json will be attempted. Set the input format to one of:\nyaml\njson\nhpo-phenote") + var outformat = opt[String](description = "Output format. Set the output format to one of:\nyaml\njson\nturtle", default = "yaml") - def outputWriter: PhenoPacketWriter = format match { - case "yaml" => YamlGenerator.render _ - case "json" => JsonGenerator.render _ + def inputReader: Option[PhenoPacketReader] = informat.map(_ match { + case "yaml" => YamlReader.readInputStream + case "json" => JsonReader.readInputStream + case "hpo-phenote" => HPOAnnotations.read + case _ => throw new ParsingException("Invalid input format.") + }) + + def outputWriter: PhenoPacketWriter = outformat match { + case "yaml" => YamlGenerator.render + case "json" => JsonGenerator.render case "turtle" => RDFGenerator.render(_, null, Lang.TURTLE) //TODO should we ask for a base? case _ => throw new ParsingException("Invalid output format.") } @@ -43,23 +53,25 @@ trait Common extends Command { } def readPhenoPacket(inputStream: InputStream): PhenoPacket = { - // This is more complicated than it ought to be so that we can reuse - // the inputStream to try multiple parsers - val baos = new ByteArrayOutputStream() - IOUtils.copy(inputStream, baos) - val bytes = baos.toByteArray() - inputStream.close() - try { - val bais = new ByteArrayInputStream(bytes); - val packet = JsonReader.readInputStream(bais) - bais.close() - packet - } catch { - case ioe: IOException => { + inputReader.map(_(inputStream)).getOrElse { + // This is more complicated than it ought to be so that we can reuse + // the inputStream to try multiple parsers + val baos = new ByteArrayOutputStream() + IOUtils.copy(inputStream, baos) + val bytes = baos.toByteArray() + inputStream.close() + try { val bais = new ByteArrayInputStream(bytes); - val packet = YamlReader.readInputStream(bais) + val packet = JsonReader.readInputStream(bais) bais.close() packet + } catch { + case ioe: IOException => { + val bais = new ByteArrayInputStream(bytes); + val packet = YamlReader.readInputStream(bais) + bais.close() + packet + } } } } diff --git a/src/main/scala/org/phenopackets/pxftools/command/Import.scala b/src/main/scala/org/phenopackets/pxftools/command/Import.scala deleted file mode 100644 index 54346eb..0000000 --- a/src/main/scala/org/phenopackets/pxftools/command/Import.scala +++ /dev/null @@ -1,18 +0,0 @@ -package org.phenopackets.pxftools.command - -import org.backuity.clist._ -import com.github.tototoshi.csv.CSVReader -import scala.io.Source -import com.github.tototoshi.csv.TSVFormat -import org.phenopackets.pxftools.util.HPOAnnotations - -object Import extends Command(description = "Create a PhenoPacket from the input.") with Common with SingleInput { - - var inFormat = opt[String](description = "Input format. One of:\nhpoa\nturtle", default = "turtle") - - override def run(): Unit = { - val table = CSVReader.open(Source.fromInputStream(determineInput, "utf-8"))(new TSVFormat {}) - writePhenoPacket(HPOAnnotations.importFromTable(table), determineOutput, outputWriter) - } - -} diff --git a/src/main/scala/org/phenopackets/pxftools/util/HPOAnnotations.scala b/src/main/scala/org/phenopackets/pxftools/util/HPOAnnotations.scala index 8340feb..9ec61a6 100644 --- a/src/main/scala/org/phenopackets/pxftools/util/HPOAnnotations.scala +++ b/src/main/scala/org/phenopackets/pxftools/util/HPOAnnotations.scala @@ -5,23 +5,30 @@ import java.util.UUID import scala.collection.JavaConverters._ import scala.collection.mutable -import org.apache.jena.rdf.model.ModelFactory -import org.apache.jena.rdf.model.Resource -import org.apache.jena.rdf.model.ResourceFactory -import org.apache.jena.rdf.model.Statement import org.phenopackets.api.PhenoPacket import org.phenopackets.api.io.RDFReader import org.phenopackets.api.util.ContextUtil +import org.phenopackets.pxftools.util.PhenoPacketVocabulary._ +import org.phenoscape.scowl._ +import org.semanticweb.owlapi.apibinding.OWLManager +import org.semanticweb.owlapi.model.AxiomType +import org.semanticweb.owlapi.model.IRI import com.github.jsonldjava.core.Context import com.github.tototoshi.csv.CSVReader +import com.hp.hpl.jena.rdf.model.ModelFactory +import com.hp.hpl.jena.rdf.model.Resource +import com.hp.hpl.jena.rdf.model.ResourceFactory +import com.hp.hpl.jena.rdf.model.Statement +import com.hp.hpl.jena.vocabulary.RDF +import com.hp.hpl.jena.vocabulary.RDFS +import com.typesafe.scalalogging.LazyLogging +import com.github.tototoshi.csv.TSVFormat +import java.io.InputStream -import org.phenopackets.pxftools.util.PhenoPacketVocabulary._ -import org.apache.jena.vocabulary.RDFS -import org.apache.jena.vocabulary.RDF -import org.apache.jena.vocabulary.DC +object HPOAnnotations extends LazyLogging { -object HPOAnnotations { + def read(stream: InputStream): PhenoPacket = importFromTable(CSVReader.open(scala.io.Source.fromInputStream(stream, "utf-8"))(new TSVFormat {})) def importFromTable(table: CSVReader): PhenoPacket = { val packetURI = s"urn:uuid:${UUID.randomUUID.toString}" @@ -29,48 +36,67 @@ object HPOAnnotations { val triples = table.iteratorWithHeaders.flatMap(rowToTriples(_, packet)).toSeq val model = ModelFactory.createDefaultModel() model.add(triples.asJava) - model.listStatements().asScala.foreach(println) RDFReader.readModel(model, packetURI) } private def rowToTriples(row: Map[String, String], packet: Resource): Set[Statement] = { val statements = mutable.Set.empty[Statement] - row.get("Disease ID").filter(_.nonEmpty).foreach { diseaseID => + row.getOpt("Disease ID").foreach { diseaseID => val disease = ResourceFactory.createResource(ContextUtil.expandIdentifierAsValue(diseaseID.trim, HPOContext)) statements += ResourceFactory.createStatement(packet, Diseases, disease) - row.get("Disease Name").filter(_.nonEmpty).foreach { diseaseLabel => + row.getOpt("Disease Name").foreach { diseaseLabel => statements += ResourceFactory.createStatement(disease, RDFS.label, ResourceFactory.createTypedLiteral(diseaseLabel.trim)) } - row.get("Phenotype ID").filter(_.nonEmpty).foreach { phenotypeID => - // will we ever want to add values from other fields even if there is no phenotype class ID? + val association = ResourceFactory.createResource() + statements += ResourceFactory.createStatement(packet, PhenotypeProfile, association) + statements += ResourceFactory.createStatement(association, Entity, disease) + val phenotype = ResourceFactory.createResource() + statements += ResourceFactory.createStatement(association, Phenotype, phenotype) + row.getOpt("Phenotype ID").foreach { phenotypeID => val phenotypeType = ResourceFactory.createResource(ContextUtil.expandIdentifierAsValue(phenotypeID.trim, HPOContext)) - val association = ResourceFactory.createResource() - statements += ResourceFactory.createStatement(packet, PhenotypeProfile, association) - statements += ResourceFactory.createStatement(association, Entity, disease) - val phenotype = ResourceFactory.createResource() - statements += ResourceFactory.createStatement(association, Phenotype, phenotype) - statements += ResourceFactory.createStatement(phenotype, RDF.`type`, phenotypeType) - row.get("Phenotype Name").filter(_.nonEmpty).foreach { phenotypeLabel => + val phenoRelation = if (row.getOpt("Negation ID").exists(_.trim.toUpperCase == "NOT")) { + OWLComplementOf + } else RDF.`type` + statements += ResourceFactory.createStatement(phenotype, phenoRelation, phenotypeType) + row.getOpt("Phenotype Name").foreach { phenotypeLabel => statements += ResourceFactory.createStatement(phenotypeType, RDFS.label, ResourceFactory.createTypedLiteral(phenotypeLabel.trim)) } - row.get("Description").filter(_.nonEmpty).foreach { description => - statements += ResourceFactory.createStatement(phenotype, Description, ResourceFactory.createTypedLiteral(description.trim)) + } + row.getOpt("Age of Onset ID").foreach { onsetID => + val onsetType = ResourceFactory.createResource(ContextUtil.expandIdentifierAsValue(onsetID.trim, HPOContext)) + val onset = ResourceFactory.createResource() + statements += ResourceFactory.createStatement(phenotype, Onset, onset) + statements += ResourceFactory.createStatement(onset, RDF.`type`, onsetType) + row.getOpt("Age of Onset Name").foreach { onsetLabel => + statements += ResourceFactory.createStatement(onsetType, RDFS.label, ResourceFactory.createTypedLiteral(onsetLabel.trim)) } - if (row.get("Evidence ID").nonEmpty || row.get("Pub").nonEmpty) { - val evidence = ResourceFactory.createResource() - statements += ResourceFactory.createStatement(association, Evidence, evidence) - row.get("Evidence ID").filter(_.nonEmpty).foreach { evidenceID => - val evidenceType = ResourceFactory.createResource(evidenceID.trim) //FIXME - statements += ResourceFactory.createStatement(evidence, RDF.`type`, evidenceType) - row.get("Evidence Name").filter(_.nonEmpty).foreach { evidenceName => - statements += ResourceFactory.createStatement(evidenceType, RDFS.label, ResourceFactory.createTypedLiteral(evidenceName.trim)) - } + } + row.getOpt("Frequency").foreach { frequencyDesc => + val frequency = ResourceFactory.createResource() + statements += ResourceFactory.createStatement(phenotype, Frequency, frequency) + statements += ResourceFactory.createStatement(frequency, Description, ResourceFactory.createTypedLiteral(frequencyDesc.trim)) + } + row.getOpt("Description").foreach { description => + statements += ResourceFactory.createStatement(phenotype, Description, ResourceFactory.createTypedLiteral(description.trim)) + } + if (row.getOpt("Evidence ID").nonEmpty || row.getOpt("Pub").nonEmpty) { + val evidence = ResourceFactory.createResource() + statements += ResourceFactory.createStatement(association, Evidence, evidence) + row.getOpt("Evidence ID").foreach { evidenceID => + val evidenceTypeOpt = evidenceCodesToURI.get(evidenceID.trim) + val evidenceType = evidenceTypeOpt.getOrElse { + logger.warn(s"No IRI found for evidence code $evidenceID") + ResourceFactory.createResource(evidenceID.trim) } - row.get("Pub").filter(_.nonEmpty).foreach { pubID => - val pub = ResourceFactory.createResource(ContextUtil.expandIdentifierAsValue(pubID.trim, HPOContext)) - statements += ResourceFactory.createStatement(evidence, Source, pub) + statements += ResourceFactory.createStatement(evidence, RDF.`type`, evidenceType) + row.getOpt("Evidence Name").foreach { evidenceName => + statements += ResourceFactory.createStatement(evidenceType, RDFS.label, ResourceFactory.createTypedLiteral(evidenceName.trim)) } } + row.getOpt("Pub").foreach { pubID => + val pub = ResourceFactory.createResource(ContextUtil.expandIdentifierAsValue(pubID.trim, HPOContext)) + statements += ResourceFactory.createStatement(evidence, Source, pub) + } } } statements.toSet @@ -81,4 +107,25 @@ object HPOAnnotations { "HP" -> "obo:HP_", "OMIM" -> "obo:OMIM_").asJava) -} \ No newline at end of file + /** + * HPO annotations use shorthand labels as evidence IDs + */ + private lazy val evidenceCodesToURI: Map[String, Resource] = { + val manager = OWLManager.createOWLOntologyManager() + val eco = manager.loadOntology(IRI.create("http://purl.obolibrary.org/obo/eco.owl")) + val HasExactSynonym = AnnotationProperty("http://www.geneontology.org/formats/oboInOwl#hasExactSynonym") + (for { + AnnotationAssertion(_, HasExactSynonym, term: IRI, synonym ^^ dt) <- eco.getAxioms(AxiomType.ANNOTATION_ASSERTION).asScala + } yield { + synonym -> ResourceFactory.createResource(term.toString) + }).toMap + } + + private implicit class NullEmptyStringMap(val self: Map[String, String]) extends AnyVal { + + //scala-csv puts empty strings in the result map; convert to None instead + def getOpt(key: String): Option[String] = self.get(key).filter(_.nonEmpty) + + } + +} diff --git a/src/main/scala/org/phenopackets/pxftools/util/MergeUtil.scala b/src/main/scala/org/phenopackets/pxftools/util/MergeUtil.scala index 04e1a88..6d2a04b 100644 --- a/src/main/scala/org/phenopackets/pxftools/util/MergeUtil.scala +++ b/src/main/scala/org/phenopackets/pxftools/util/MergeUtil.scala @@ -3,7 +3,8 @@ package org.phenopackets.pxftools.util import org.phenopackets.api.PhenoPacket import org.phenopackets.api.io.RDFGenerator import org.phenopackets.api.io.RDFReader -import org.apache.jena.rdf.model.ModelFactory + +import com.hp.hpl.jena.rdf.model.ModelFactory object MergeUtil { diff --git a/src/main/scala/org/phenopackets/pxftools/util/PhenoPacketVocabulary.scala b/src/main/scala/org/phenopackets/pxftools/util/PhenoPacketVocabulary.scala index 366faa6..02114de 100644 --- a/src/main/scala/org/phenopackets/pxftools/util/PhenoPacketVocabulary.scala +++ b/src/main/scala/org/phenopackets/pxftools/util/PhenoPacketVocabulary.scala @@ -1,6 +1,6 @@ package org.phenopackets.pxftools.util -import org.apache.jena.rdf.model.ResourceFactory +import com.hp.hpl.jena.rdf.model.ResourceFactory object PhenoPacketVocabulary { @@ -13,8 +13,11 @@ object PhenoPacketVocabulary { val PhenotypeProfile = p(s"$Pheno/phenotype_profile") val Entity = p(s"$Pheno/entity") val Phenotype = p(s"$Pheno/phenotype") + val Onset = p(s"$Pheno/onset") + val Frequency = p(s"$Pheno/frequency") val Evidence = p(s"$Pheno/evidence") val Description = p(s"$DC/description") val Source = p(s"$DC/source") + val OWLComplementOf = p("http://www.w3.org/2002/07/owl#complementOf") } \ No newline at end of file