From f50aa34277997616cb2f661ad5f9ae380d2c2a21 Mon Sep 17 00:00:00 2001
From: Jim Balhoff <jim@balhoff.org>
Date: Thu, 30 Jun 2016 23:37:28 -0400
Subject: [PATCH 1/2] In-progress work on HPO annotation conversion to
 PhenoPacket.

---
 build.sbt                                     |  3 +-
 .../org/phenopackets/pxftools/Main.scala      |  3 +-
 .../pxftools/command/Import.scala             | 18 ++++
 .../pxftools/util/HPOAnnotations.scala        | 84 +++++++++++++++++++
 .../pxftools/util/PhenoPacketVocabulary.scala | 20 +++++
 5 files changed, 126 insertions(+), 2 deletions(-)
 create mode 100644 src/main/scala/org/phenopackets/pxftools/command/Import.scala
 create mode 100644 src/main/scala/org/phenopackets/pxftools/util/HPOAnnotations.scala
 create mode 100644 src/main/scala/org/phenopackets/pxftools/util/PhenoPacketVocabulary.scala

diff --git a/build.sbt b/build.sbt
index 2ae1eea..754d529 100644
--- a/build.sbt
+++ b/build.sbt
@@ -20,7 +20,7 @@ javaOptions += "-Xmx4G"
 
 libraryDependencies ++= {
   Seq(
-    "org.phenopackets"            %  "phenopackets-api"      % "0.0.4",
+    "org.phenopackets"            %  "phenopackets-api"      % "0.0.5-SNAPSHOT",
     "org.backuity.clist"          %% "clist-core"            % "2.0.2",
     "org.backuity.clist"          %% "clist-macros"          % "2.0.2" % "provided",
     "net.sourceforge.owlapi"      %  "owlapi-distribution"   % "4.2.5",
@@ -28,6 +28,7 @@ libraryDependencies ++= {
     "com.github.jsonld-java"      %  "jsonld-java"           % "0.8.3",
     "org.apache.directory.studio" %  "org.apache.commons.io" % "2.4",
     "org.scalaz"                  %% "scalaz-core"           % "7.2.1",
+    "com.github.tototoshi"        %% "scala-csv"             % "1.3.3",
     "com.typesafe.scala-logging"  %% "scala-logging"         % "3.4.0",
     "ch.qos.logback"              %  "logback-classic"       % "1.1.7",
     "org.codehaus.groovy"         %  "groovy-all"            % "2.4.6",
diff --git a/src/main/scala/org/phenopackets/pxftools/Main.scala b/src/main/scala/org/phenopackets/pxftools/Main.scala
index bf47a3b..d073efb 100644
--- a/src/main/scala/org/phenopackets/pxftools/Main.scala
+++ b/src/main/scala/org/phenopackets/pxftools/Main.scala
@@ -3,9 +3,10 @@ package org.phenopackets.pxftools
 import org.backuity.clist._
 import org.phenopackets.pxftools.command.Convert
 import org.phenopackets.pxftools.command.Merge
+import org.phenopackets.pxftools.command.Import
 
 object Main extends App {
 
-  Cli.parse(args).withProgramName("pxftools").withCommands(Convert, Merge).foreach(_.run())
+  Cli.parse(args).withProgramName("pxftools").withCommands(Convert, Merge, Import).foreach(_.run())
 
 }
\ No newline at end of file
diff --git a/src/main/scala/org/phenopackets/pxftools/command/Import.scala b/src/main/scala/org/phenopackets/pxftools/command/Import.scala
new file mode 100644
index 0000000..54346eb
--- /dev/null
+++ b/src/main/scala/org/phenopackets/pxftools/command/Import.scala
@@ -0,0 +1,18 @@
+package org.phenopackets.pxftools.command
+
+import org.backuity.clist._
+import com.github.tototoshi.csv.CSVReader
+import scala.io.Source
+import com.github.tototoshi.csv.TSVFormat
+import org.phenopackets.pxftools.util.HPOAnnotations
+
+object Import extends Command(description = "Create a PhenoPacket from the input.") with Common with SingleInput {
+
+  var inFormat = opt[String](description = "Input format. One of:\nhpoa\nturtle", default = "turtle")
+
+  override def run(): Unit = {
+    val table = CSVReader.open(Source.fromInputStream(determineInput, "utf-8"))(new TSVFormat {})
+    writePhenoPacket(HPOAnnotations.importFromTable(table), determineOutput, outputWriter)
+  }
+
+}
diff --git a/src/main/scala/org/phenopackets/pxftools/util/HPOAnnotations.scala b/src/main/scala/org/phenopackets/pxftools/util/HPOAnnotations.scala
new file mode 100644
index 0000000..8340feb
--- /dev/null
+++ b/src/main/scala/org/phenopackets/pxftools/util/HPOAnnotations.scala
@@ -0,0 +1,84 @@
+package org.phenopackets.pxftools.util
+
+import java.util.UUID
+
+import scala.collection.JavaConverters._
+import scala.collection.mutable
+
+import org.apache.jena.rdf.model.ModelFactory
+import org.apache.jena.rdf.model.Resource
+import org.apache.jena.rdf.model.ResourceFactory
+import org.apache.jena.rdf.model.Statement
+import org.phenopackets.api.PhenoPacket
+import org.phenopackets.api.io.RDFReader
+import org.phenopackets.api.util.ContextUtil
+
+import com.github.jsonldjava.core.Context
+import com.github.tototoshi.csv.CSVReader
+
+import org.phenopackets.pxftools.util.PhenoPacketVocabulary._
+import org.apache.jena.vocabulary.RDFS
+import org.apache.jena.vocabulary.RDF
+import org.apache.jena.vocabulary.DC
+
+object HPOAnnotations {
+
+  def importFromTable(table: CSVReader): PhenoPacket = {
+    val packetURI = s"urn:uuid:${UUID.randomUUID.toString}"
+    val packet = ResourceFactory.createResource(packetURI)
+    val triples = table.iteratorWithHeaders.flatMap(rowToTriples(_, packet)).toSeq
+    val model = ModelFactory.createDefaultModel()
+    model.add(triples.asJava)
+    model.listStatements().asScala.foreach(println)
+    RDFReader.readModel(model, packetURI)
+  }
+
+  private def rowToTriples(row: Map[String, String], packet: Resource): Set[Statement] = {
+    val statements = mutable.Set.empty[Statement]
+    row.get("Disease ID").filter(_.nonEmpty).foreach { diseaseID =>
+      val disease = ResourceFactory.createResource(ContextUtil.expandIdentifierAsValue(diseaseID.trim, HPOContext))
+      statements += ResourceFactory.createStatement(packet, Diseases, disease)
+      row.get("Disease Name").filter(_.nonEmpty).foreach { diseaseLabel =>
+        statements += ResourceFactory.createStatement(disease, RDFS.label, ResourceFactory.createTypedLiteral(diseaseLabel.trim))
+      }
+      row.get("Phenotype ID").filter(_.nonEmpty).foreach { phenotypeID =>
+        // will we ever want to add values from other fields even if there is no phenotype class ID?
+        val phenotypeType = ResourceFactory.createResource(ContextUtil.expandIdentifierAsValue(phenotypeID.trim, HPOContext))
+        val association = ResourceFactory.createResource()
+        statements += ResourceFactory.createStatement(packet, PhenotypeProfile, association)
+        statements += ResourceFactory.createStatement(association, Entity, disease)
+        val phenotype = ResourceFactory.createResource()
+        statements += ResourceFactory.createStatement(association, Phenotype, phenotype)
+        statements += ResourceFactory.createStatement(phenotype, RDF.`type`, phenotypeType)
+        row.get("Phenotype Name").filter(_.nonEmpty).foreach { phenotypeLabel =>
+          statements += ResourceFactory.createStatement(phenotypeType, RDFS.label, ResourceFactory.createTypedLiteral(phenotypeLabel.trim))
+        }
+        row.get("Description").filter(_.nonEmpty).foreach { description =>
+          statements += ResourceFactory.createStatement(phenotype, Description, ResourceFactory.createTypedLiteral(description.trim))
+        }
+        if (row.get("Evidence ID").nonEmpty || row.get("Pub").nonEmpty) {
+          val evidence = ResourceFactory.createResource()
+          statements += ResourceFactory.createStatement(association, Evidence, evidence)
+          row.get("Evidence ID").filter(_.nonEmpty).foreach { evidenceID =>
+            val evidenceType = ResourceFactory.createResource(evidenceID.trim) //FIXME
+            statements += ResourceFactory.createStatement(evidence, RDF.`type`, evidenceType)
+            row.get("Evidence Name").filter(_.nonEmpty).foreach { evidenceName =>
+              statements += ResourceFactory.createStatement(evidenceType, RDFS.label, ResourceFactory.createTypedLiteral(evidenceName.trim))
+            }
+          }
+          row.get("Pub").filter(_.nonEmpty).foreach { pubID =>
+            val pub = ResourceFactory.createResource(ContextUtil.expandIdentifierAsValue(pubID.trim, HPOContext))
+            statements += ResourceFactory.createStatement(evidence, Source, pub)
+          }
+        }
+      }
+    }
+    statements.toSet
+  }
+
+  private val HPOContext: Context = new Context().parse(Map[String, Object](
+    "obo" -> "http://purl.obolibrary.org/obo/",
+    "HP" -> "obo:HP_",
+    "OMIM" -> "obo:OMIM_").asJava)
+
+}
\ No newline at end of file
diff --git a/src/main/scala/org/phenopackets/pxftools/util/PhenoPacketVocabulary.scala b/src/main/scala/org/phenopackets/pxftools/util/PhenoPacketVocabulary.scala
new file mode 100644
index 0000000..366faa6
--- /dev/null
+++ b/src/main/scala/org/phenopackets/pxftools/util/PhenoPacketVocabulary.scala
@@ -0,0 +1,20 @@
+package org.phenopackets.pxftools.util
+
+import org.apache.jena.rdf.model.ResourceFactory
+
+object PhenoPacketVocabulary {
+
+  private val Pheno = "http://phenopackets.org"
+  private val DC = "http://purl.org/dc/terms"
+
+  private def p = ResourceFactory.createProperty(_: String)
+
+  val Diseases = p(s"$Pheno/diseases")
+  val PhenotypeProfile = p(s"$Pheno/phenotype_profile")
+  val Entity = p(s"$Pheno/entity")
+  val Phenotype = p(s"$Pheno/phenotype")
+  val Evidence = p(s"$Pheno/evidence")
+  val Description = p(s"$DC/description")
+  val Source = p(s"$DC/source")
+
+}
\ No newline at end of file

From dc50e9a3b6856eef5e960b84bc1b44c533050858 Mon Sep 17 00:00:00 2001
From: Jim Balhoff <jim@balhoff.org>
Date: Wed, 6 Jul 2016 17:09:12 -0400
Subject: [PATCH 2/2] Initial implementation of HPO Phenote annotation
 conversion.

---
 build.sbt                                     |   7 +-
 .../org/phenopackets/pxftools/Main.scala      |   3 +-
 .../pxftools/command/Common.scala             |  52 +++++---
 .../pxftools/command/Import.scala             |  18 ---
 .../pxftools/util/HPOAnnotations.scala        | 119 ++++++++++++------
 .../pxftools/util/MergeUtil.scala             |   3 +-
 .../pxftools/util/PhenoPacketVocabulary.scala |   5 +-
 7 files changed, 127 insertions(+), 80 deletions(-)
 delete mode 100644 src/main/scala/org/phenopackets/pxftools/command/Import.scala

diff --git a/build.sbt b/build.sbt
index 754d529..230edcb 100644
--- a/build.sbt
+++ b/build.sbt
@@ -20,15 +20,18 @@ javaOptions += "-Xmx4G"
 
 libraryDependencies ++= {
   Seq(
-    "org.phenopackets"            %  "phenopackets-api"      % "0.0.5-SNAPSHOT",
+    "org.phenopackets"            %  "phenopackets-api"      % "0.0.5-SNAPSHOT" exclude("org.slf4j", "slf4j-log4j12"),
     "org.backuity.clist"          %% "clist-core"            % "2.0.2",
     "org.backuity.clist"          %% "clist-macros"          % "2.0.2" % "provided",
     "net.sourceforge.owlapi"      %  "owlapi-distribution"   % "4.2.5",
-    "org.apache.jena"             %  "apache-jena-libs"      % "3.1.0",
+    "org.phenoscape"              %% "scowl"                 % "1.1",
+    "org.apache.jena"             %  "apache-jena-libs"      % "2.12.1" exclude("org.slf4j", "slf4j-log4j12"),
     "com.github.jsonld-java"      %  "jsonld-java"           % "0.8.3",
     "org.apache.directory.studio" %  "org.apache.commons.io" % "2.4",
     "org.scalaz"                  %% "scalaz-core"           % "7.2.1",
     "com.github.tototoshi"        %% "scala-csv"             % "1.3.3",
+    "com.nrinaudo"                %% "kantan.csv"            % "0.1.12",
+    "com.nrinaudo"                %% "kantan.csv-generic"    % "0.1.12",
     "com.typesafe.scala-logging"  %% "scala-logging"         % "3.4.0",
     "ch.qos.logback"              %  "logback-classic"       % "1.1.7",
     "org.codehaus.groovy"         %  "groovy-all"            % "2.4.6",
diff --git a/src/main/scala/org/phenopackets/pxftools/Main.scala b/src/main/scala/org/phenopackets/pxftools/Main.scala
index d073efb..bf47a3b 100644
--- a/src/main/scala/org/phenopackets/pxftools/Main.scala
+++ b/src/main/scala/org/phenopackets/pxftools/Main.scala
@@ -3,10 +3,9 @@ package org.phenopackets.pxftools
 import org.backuity.clist._
 import org.phenopackets.pxftools.command.Convert
 import org.phenopackets.pxftools.command.Merge
-import org.phenopackets.pxftools.command.Import
 
 object Main extends App {
 
-  Cli.parse(args).withProgramName("pxftools").withCommands(Convert, Merge, Import).foreach(_.run())
+  Cli.parse(args).withProgramName("pxftools").withCommands(Convert, Merge).foreach(_.run())
 
 }
\ No newline at end of file
diff --git a/src/main/scala/org/phenopackets/pxftools/command/Common.scala b/src/main/scala/org/phenopackets/pxftools/command/Common.scala
index f025723..1b7db31 100644
--- a/src/main/scala/org/phenopackets/pxftools/command/Common.scala
+++ b/src/main/scala/org/phenopackets/pxftools/command/Common.scala
@@ -11,28 +11,38 @@ import java.io.OutputStream
 import java.io.OutputStreamWriter
 
 import org.apache.commons.io.IOUtils
+import org.apache.jena.riot.Lang
 import org.backuity.clist._
 import org.phenopackets.api.PhenoPacket
 import org.phenopackets.api.io.JsonGenerator
 import org.phenopackets.api.io.JsonReader
+import org.phenopackets.api.io.RDFGenerator
 import org.phenopackets.api.io.YamlGenerator
 import org.phenopackets.api.io.YamlReader
-import org.phenopackets.api.io.RDFGenerator
-import org.apache.jena.riot.Lang
+import org.phenopackets.pxftools.util.HPOAnnotations
 
 trait Common extends Command {
 
   type PhenoPacketWriter = PhenoPacket => String
+  type PhenoPacketReader = InputStream => PhenoPacket
 
   def run(): Unit
 
   var out = opt[String](description = "Output file. Omit to write to standard out.", default = "")
 
-  var format = opt[String](description = "Output format. Set the output format to one of:\nyaml\njson\nturtle", default = "yaml")
+  var informat = opt[Option[String]](description = "Input format. By default both yaml and json will be attempted. Set the input format to one of:\nyaml\njson\nhpo-phenote")
+  var outformat = opt[String](description = "Output format. Set the output format to one of:\nyaml\njson\nturtle", default = "yaml")
 
-  def outputWriter: PhenoPacketWriter = format match {
-    case "yaml"   => YamlGenerator.render _
-    case "json"   => JsonGenerator.render _
+  def inputReader: Option[PhenoPacketReader] = informat.map(_ match {
+    case "yaml"        => YamlReader.readInputStream
+    case "json"        => JsonReader.readInputStream
+    case "hpo-phenote" => HPOAnnotations.read
+    case _             => throw new ParsingException("Invalid input format.")
+  })
+
+  def outputWriter: PhenoPacketWriter = outformat match {
+    case "yaml"   => YamlGenerator.render
+    case "json"   => JsonGenerator.render
     case "turtle" => RDFGenerator.render(_, null, Lang.TURTLE) //TODO should we ask for a base?
     case _        => throw new ParsingException("Invalid output format.")
   }
@@ -43,23 +53,25 @@ trait Common extends Command {
   }
 
   def readPhenoPacket(inputStream: InputStream): PhenoPacket = {
-    // This is more complicated than it ought to be so that we can reuse 
-    // the inputStream to try multiple parsers
-    val baos = new ByteArrayOutputStream()
-    IOUtils.copy(inputStream, baos)
-    val bytes = baos.toByteArray()
-    inputStream.close()
-    try {
-      val bais = new ByteArrayInputStream(bytes);
-      val packet = JsonReader.readInputStream(bais)
-      bais.close()
-      packet
-    } catch {
-      case ioe: IOException => {
+    inputReader.map(_(inputStream)).getOrElse {
+      // This is more complicated than it ought to be so that we can reuse 
+      // the inputStream to try multiple parsers
+      val baos = new ByteArrayOutputStream()
+      IOUtils.copy(inputStream, baos)
+      val bytes = baos.toByteArray()
+      inputStream.close()
+      try {
         val bais = new ByteArrayInputStream(bytes);
-        val packet = YamlReader.readInputStream(bais)
+        val packet = JsonReader.readInputStream(bais)
         bais.close()
         packet
+      } catch {
+        case ioe: IOException => {
+          val bais = new ByteArrayInputStream(bytes);
+          val packet = YamlReader.readInputStream(bais)
+          bais.close()
+          packet
+        }
       }
     }
   }
diff --git a/src/main/scala/org/phenopackets/pxftools/command/Import.scala b/src/main/scala/org/phenopackets/pxftools/command/Import.scala
deleted file mode 100644
index 54346eb..0000000
--- a/src/main/scala/org/phenopackets/pxftools/command/Import.scala
+++ /dev/null
@@ -1,18 +0,0 @@
-package org.phenopackets.pxftools.command
-
-import org.backuity.clist._
-import com.github.tototoshi.csv.CSVReader
-import scala.io.Source
-import com.github.tototoshi.csv.TSVFormat
-import org.phenopackets.pxftools.util.HPOAnnotations
-
-object Import extends Command(description = "Create a PhenoPacket from the input.") with Common with SingleInput {
-
-  var inFormat = opt[String](description = "Input format. One of:\nhpoa\nturtle", default = "turtle")
-
-  override def run(): Unit = {
-    val table = CSVReader.open(Source.fromInputStream(determineInput, "utf-8"))(new TSVFormat {})
-    writePhenoPacket(HPOAnnotations.importFromTable(table), determineOutput, outputWriter)
-  }
-
-}
diff --git a/src/main/scala/org/phenopackets/pxftools/util/HPOAnnotations.scala b/src/main/scala/org/phenopackets/pxftools/util/HPOAnnotations.scala
index 8340feb..9ec61a6 100644
--- a/src/main/scala/org/phenopackets/pxftools/util/HPOAnnotations.scala
+++ b/src/main/scala/org/phenopackets/pxftools/util/HPOAnnotations.scala
@@ -5,23 +5,30 @@ import java.util.UUID
 import scala.collection.JavaConverters._
 import scala.collection.mutable
 
-import org.apache.jena.rdf.model.ModelFactory
-import org.apache.jena.rdf.model.Resource
-import org.apache.jena.rdf.model.ResourceFactory
-import org.apache.jena.rdf.model.Statement
 import org.phenopackets.api.PhenoPacket
 import org.phenopackets.api.io.RDFReader
 import org.phenopackets.api.util.ContextUtil
+import org.phenopackets.pxftools.util.PhenoPacketVocabulary._
+import org.phenoscape.scowl._
+import org.semanticweb.owlapi.apibinding.OWLManager
+import org.semanticweb.owlapi.model.AxiomType
+import org.semanticweb.owlapi.model.IRI
 
 import com.github.jsonldjava.core.Context
 import com.github.tototoshi.csv.CSVReader
+import com.hp.hpl.jena.rdf.model.ModelFactory
+import com.hp.hpl.jena.rdf.model.Resource
+import com.hp.hpl.jena.rdf.model.ResourceFactory
+import com.hp.hpl.jena.rdf.model.Statement
+import com.hp.hpl.jena.vocabulary.RDF
+import com.hp.hpl.jena.vocabulary.RDFS
+import com.typesafe.scalalogging.LazyLogging
+import com.github.tototoshi.csv.TSVFormat
+import java.io.InputStream
 
-import org.phenopackets.pxftools.util.PhenoPacketVocabulary._
-import org.apache.jena.vocabulary.RDFS
-import org.apache.jena.vocabulary.RDF
-import org.apache.jena.vocabulary.DC
+object HPOAnnotations extends LazyLogging {
 
-object HPOAnnotations {
+  def read(stream: InputStream): PhenoPacket = importFromTable(CSVReader.open(scala.io.Source.fromInputStream(stream, "utf-8"))(new TSVFormat {}))
 
   def importFromTable(table: CSVReader): PhenoPacket = {
     val packetURI = s"urn:uuid:${UUID.randomUUID.toString}"
@@ -29,48 +36,67 @@ object HPOAnnotations {
     val triples = table.iteratorWithHeaders.flatMap(rowToTriples(_, packet)).toSeq
     val model = ModelFactory.createDefaultModel()
     model.add(triples.asJava)
-    model.listStatements().asScala.foreach(println)
     RDFReader.readModel(model, packetURI)
   }
 
   private def rowToTriples(row: Map[String, String], packet: Resource): Set[Statement] = {
     val statements = mutable.Set.empty[Statement]
-    row.get("Disease ID").filter(_.nonEmpty).foreach { diseaseID =>
+    row.getOpt("Disease ID").foreach { diseaseID =>
       val disease = ResourceFactory.createResource(ContextUtil.expandIdentifierAsValue(diseaseID.trim, HPOContext))
       statements += ResourceFactory.createStatement(packet, Diseases, disease)
-      row.get("Disease Name").filter(_.nonEmpty).foreach { diseaseLabel =>
+      row.getOpt("Disease Name").foreach { diseaseLabel =>
         statements += ResourceFactory.createStatement(disease, RDFS.label, ResourceFactory.createTypedLiteral(diseaseLabel.trim))
       }
-      row.get("Phenotype ID").filter(_.nonEmpty).foreach { phenotypeID =>
-        // will we ever want to add values from other fields even if there is no phenotype class ID?
+      val association = ResourceFactory.createResource()
+      statements += ResourceFactory.createStatement(packet, PhenotypeProfile, association)
+      statements += ResourceFactory.createStatement(association, Entity, disease)
+      val phenotype = ResourceFactory.createResource()
+      statements += ResourceFactory.createStatement(association, Phenotype, phenotype)
+      row.getOpt("Phenotype ID").foreach { phenotypeID =>
         val phenotypeType = ResourceFactory.createResource(ContextUtil.expandIdentifierAsValue(phenotypeID.trim, HPOContext))
-        val association = ResourceFactory.createResource()
-        statements += ResourceFactory.createStatement(packet, PhenotypeProfile, association)
-        statements += ResourceFactory.createStatement(association, Entity, disease)
-        val phenotype = ResourceFactory.createResource()
-        statements += ResourceFactory.createStatement(association, Phenotype, phenotype)
-        statements += ResourceFactory.createStatement(phenotype, RDF.`type`, phenotypeType)
-        row.get("Phenotype Name").filter(_.nonEmpty).foreach { phenotypeLabel =>
+        val phenoRelation = if (row.getOpt("Negation ID").exists(_.trim.toUpperCase == "NOT")) {
+          OWLComplementOf
+        } else RDF.`type`
+        statements += ResourceFactory.createStatement(phenotype, phenoRelation, phenotypeType)
+        row.getOpt("Phenotype Name").foreach { phenotypeLabel =>
           statements += ResourceFactory.createStatement(phenotypeType, RDFS.label, ResourceFactory.createTypedLiteral(phenotypeLabel.trim))
         }
-        row.get("Description").filter(_.nonEmpty).foreach { description =>
-          statements += ResourceFactory.createStatement(phenotype, Description, ResourceFactory.createTypedLiteral(description.trim))
+      }
+      row.getOpt("Age of Onset ID").foreach { onsetID =>
+        val onsetType = ResourceFactory.createResource(ContextUtil.expandIdentifierAsValue(onsetID.trim, HPOContext))
+        val onset = ResourceFactory.createResource()
+        statements += ResourceFactory.createStatement(phenotype, Onset, onset)
+        statements += ResourceFactory.createStatement(onset, RDF.`type`, onsetType)
+        row.getOpt("Age of Onset Name").foreach { onsetLabel =>
+          statements += ResourceFactory.createStatement(onsetType, RDFS.label, ResourceFactory.createTypedLiteral(onsetLabel.trim))
         }
-        if (row.get("Evidence ID").nonEmpty || row.get("Pub").nonEmpty) {
-          val evidence = ResourceFactory.createResource()
-          statements += ResourceFactory.createStatement(association, Evidence, evidence)
-          row.get("Evidence ID").filter(_.nonEmpty).foreach { evidenceID =>
-            val evidenceType = ResourceFactory.createResource(evidenceID.trim) //FIXME
-            statements += ResourceFactory.createStatement(evidence, RDF.`type`, evidenceType)
-            row.get("Evidence Name").filter(_.nonEmpty).foreach { evidenceName =>
-              statements += ResourceFactory.createStatement(evidenceType, RDFS.label, ResourceFactory.createTypedLiteral(evidenceName.trim))
-            }
+      }
+      row.getOpt("Frequency").foreach { frequencyDesc =>
+        val frequency = ResourceFactory.createResource()
+        statements += ResourceFactory.createStatement(phenotype, Frequency, frequency)
+        statements += ResourceFactory.createStatement(frequency, Description, ResourceFactory.createTypedLiteral(frequencyDesc.trim))
+      }
+      row.getOpt("Description").foreach { description =>
+        statements += ResourceFactory.createStatement(phenotype, Description, ResourceFactory.createTypedLiteral(description.trim))
+      }
+      if (row.getOpt("Evidence ID").nonEmpty || row.getOpt("Pub").nonEmpty) {
+        val evidence = ResourceFactory.createResource()
+        statements += ResourceFactory.createStatement(association, Evidence, evidence)
+        row.getOpt("Evidence ID").foreach { evidenceID =>
+          val evidenceTypeOpt = evidenceCodesToURI.get(evidenceID.trim)
+          val evidenceType = evidenceTypeOpt.getOrElse {
+            logger.warn(s"No IRI found for evidence code $evidenceID")
+            ResourceFactory.createResource(evidenceID.trim)
           }
-          row.get("Pub").filter(_.nonEmpty).foreach { pubID =>
-            val pub = ResourceFactory.createResource(ContextUtil.expandIdentifierAsValue(pubID.trim, HPOContext))
-            statements += ResourceFactory.createStatement(evidence, Source, pub)
+          statements += ResourceFactory.createStatement(evidence, RDF.`type`, evidenceType)
+          row.getOpt("Evidence Name").foreach { evidenceName =>
+            statements += ResourceFactory.createStatement(evidenceType, RDFS.label, ResourceFactory.createTypedLiteral(evidenceName.trim))
           }
         }
+        row.getOpt("Pub").foreach { pubID =>
+          val pub = ResourceFactory.createResource(ContextUtil.expandIdentifierAsValue(pubID.trim, HPOContext))
+          statements += ResourceFactory.createStatement(evidence, Source, pub)
+        }
       }
     }
     statements.toSet
@@ -81,4 +107,25 @@ object HPOAnnotations {
     "HP" -> "obo:HP_",
     "OMIM" -> "obo:OMIM_").asJava)
 
-}
\ No newline at end of file
+  /**
+   * HPO annotations use shorthand labels as evidence IDs
+   */
+  private lazy val evidenceCodesToURI: Map[String, Resource] = {
+    val manager = OWLManager.createOWLOntologyManager()
+    val eco = manager.loadOntology(IRI.create("http://purl.obolibrary.org/obo/eco.owl"))
+    val HasExactSynonym = AnnotationProperty("http://www.geneontology.org/formats/oboInOwl#hasExactSynonym")
+    (for {
+      AnnotationAssertion(_, HasExactSynonym, term: IRI, synonym ^^ dt) <- eco.getAxioms(AxiomType.ANNOTATION_ASSERTION).asScala
+    } yield {
+      synonym -> ResourceFactory.createResource(term.toString)
+    }).toMap
+  }
+
+  private implicit class NullEmptyStringMap(val self: Map[String, String]) extends AnyVal {
+
+    //scala-csv puts empty strings in the result map; convert to None instead
+    def getOpt(key: String): Option[String] = self.get(key).filter(_.nonEmpty)
+
+  }
+
+}
diff --git a/src/main/scala/org/phenopackets/pxftools/util/MergeUtil.scala b/src/main/scala/org/phenopackets/pxftools/util/MergeUtil.scala
index 04e1a88..6d2a04b 100644
--- a/src/main/scala/org/phenopackets/pxftools/util/MergeUtil.scala
+++ b/src/main/scala/org/phenopackets/pxftools/util/MergeUtil.scala
@@ -3,7 +3,8 @@ package org.phenopackets.pxftools.util
 import org.phenopackets.api.PhenoPacket
 import org.phenopackets.api.io.RDFGenerator
 import org.phenopackets.api.io.RDFReader
-import org.apache.jena.rdf.model.ModelFactory
+
+import com.hp.hpl.jena.rdf.model.ModelFactory
 
 object MergeUtil {
 
diff --git a/src/main/scala/org/phenopackets/pxftools/util/PhenoPacketVocabulary.scala b/src/main/scala/org/phenopackets/pxftools/util/PhenoPacketVocabulary.scala
index 366faa6..02114de 100644
--- a/src/main/scala/org/phenopackets/pxftools/util/PhenoPacketVocabulary.scala
+++ b/src/main/scala/org/phenopackets/pxftools/util/PhenoPacketVocabulary.scala
@@ -1,6 +1,6 @@
 package org.phenopackets.pxftools.util
 
-import org.apache.jena.rdf.model.ResourceFactory
+import com.hp.hpl.jena.rdf.model.ResourceFactory
 
 object PhenoPacketVocabulary {
 
@@ -13,8 +13,11 @@ object PhenoPacketVocabulary {
   val PhenotypeProfile = p(s"$Pheno/phenotype_profile")
   val Entity = p(s"$Pheno/entity")
   val Phenotype = p(s"$Pheno/phenotype")
+  val Onset = p(s"$Pheno/onset")
+  val Frequency = p(s"$Pheno/frequency")
   val Evidence = p(s"$Pheno/evidence")
   val Description = p(s"$DC/description")
   val Source = p(s"$DC/source")
+  val OWLComplementOf = p("http://www.w3.org/2002/07/owl#complementOf")
 
 }
\ No newline at end of file