From f4d6ff0f26289517d933187e37c7946495c8714c Mon Sep 17 00:00:00 2001 From: David Johnson Date: Wed, 27 Jul 2016 17:12:05 +0100 Subject: [PATCH] Fixes to address problems reported by COPO, linked to issues #67 #63, as well as a few others. * Add missing STUDY_SET element to study.xml output * Modifications to SraExportPipelineComponent for new and updated assay types supported by SRA 1.5 Note, some of these changes may affect ISAcreator and ISAconfigurator builds. Until tested with those, keep these changes in this hotfix/copo-sra-export branch. --- .../commandline/ConverterShellCommand.java | 8 +- .../sra/SraExportPipelineComponent.java | 468 ++++++++++++++---- .../isatab/export/sra/SraExporter.java | 62 ++- 3 files changed, 417 insertions(+), 121 deletions(-) diff --git a/import_layer/src/main/java/org/isatools/isatab/commandline/ConverterShellCommand.java b/import_layer/src/main/java/org/isatools/isatab/commandline/ConverterShellCommand.java index bd00b23..dba659e 100644 --- a/import_layer/src/main/java/org/isatools/isatab/commandline/ConverterShellCommand.java +++ b/import_layer/src/main/java/org/isatools/isatab/commandline/ConverterShellCommand.java @@ -93,9 +93,13 @@ public static void main(String[] args) { ISATABReducedMapper mapper = new ISATABReducedMapper(new BIIObjectStore(), loader.load()); ISAConfiguratorValidator validator = new ISAConfiguratorValidator(mapper.map()); log.info("Running validator"); - if (validator.validate() != GUIInvokerResult.SUCCESS) { - log.warn("Validation failed"); + GUIInvokerResult validationResult = validator.validate(); + if (validationResult == GUIInvokerResult.ERROR) { + throw new Exception("Validation failed"); } else { + if (validationResult == GUIInvokerResult.WARNING) { + log.warn("Validation succceded with warnings"); + } log.info("Using SraExporter"); SraExporter exporter = new SraExporter(store, sourceDirPath, exportPath); exporter.export(); diff --git a/import_layer/src/main/java/org/isatools/isatab/export/sra/SraExportPipelineComponent.java b/import_layer/src/main/java/org/isatools/isatab/export/sra/SraExportPipelineComponent.java index f67bf35..d2c5bea 100644 --- a/import_layer/src/main/java/org/isatools/isatab/export/sra/SraExportPipelineComponent.java +++ b/import_layer/src/main/java/org/isatools/isatab/export/sra/SraExportPipelineComponent.java @@ -1,6 +1,7 @@ /** - The ISAconverter, ISAvalidator & BII Management Tool are components of the ISA software suite (http://www.isa-tools.org) + The ISAconverter, ISAvalidator & BII Management Tool are components of the ISA software suite (http://www.isa-tools. + org) Exhibit A The ISAconverter, ISAvalidator & BII Management Tool are licensed under the Mozilla Public License (MPL) version @@ -178,12 +179,12 @@ protected boolean buildExportedAssay(Assay assay, /*SubmissionType.FILES xsubFil EXPERIMENTREF xexpRef = EXPERIMENTREF.Factory.newInstance(); xexpRef.setRefname(materialAcc); - STUDYREF.Factory.newInstance(); + STUDYREF.Factory.newInstance(); - // DESIGN xdesign = DESIGN.Factory.newInstance(); + // DESIGN xdesign = DESIGN.Factory.newInstance(); LibraryType xdesign = LibraryType.Factory.newInstance(); xdesign.setDESIGNDESCRIPTION("See study and sample descriptions for details"); @@ -352,8 +353,8 @@ protected LibraryDescriptorType buildExportedLibraryDescriptor(Assay assay) { // if (targetTaxon != null) { xlib.setLIBRARYNAME(assay.getAcc() + "_" + targetTaxon); } // else { // //IF no 'parameter [library name] is found', then set the library name to be that on the assay - xlib.setLIBRARYNAME(assay.getAcc() + ""); - // } + xlib.setLIBRARYNAME(assay.getAcc() + ""); + // } StringBuffer protocol = new StringBuffer(); String pDescription = StringUtils.trimToNull(papp.getProtocol().getDescription()); @@ -361,69 +362,374 @@ protected LibraryDescriptorType buildExportedLibraryDescriptor(Assay assay) { protocol.append("\n protocol_description: " + pDescription); } - - //HERE we handle SRA way of coding transcription profiling using sequencing. We can automatically set LIBRARY SOURCE to TRANSCRIPTOMIC + //HERE we handle SRA way of coding whole genome sequencing using nucleic sequencing. We can automatically set LIBRARY SOURCE to GENOMIC //same for Strategy and selection however we check against the user input via the ISA file - if (measurement.equalsIgnoreCase("transcription profiling") && technology.equalsIgnoreCase("nucleotide sequencing")) { + if ((measurement.equalsIgnoreCase("genome sequencing") || + (measurement.equalsIgnoreCase("whole genome sequencing")) + && technology.equalsIgnoreCase("nucleotide sequencing"))){ + String source = getParameterValue(assay, papp, "library source", true); + String selection = getParameterValue(assay, papp, "library selection", true); + String strategy = getParameterValue(assay, papp, "library strategy", true); + //xlib.setLIBRARYSOURCE(LibraryDescriptorType.LIBRARYSOURCE.GENOMIC); + if (("GENOMIC".equalsIgnoreCase(source)) || + ("GENOMIC SINGLE CELL".equalsIgnoreCase(source)) || + ("METAGENOMIC".equalsIgnoreCase(source)) || + ("OTHER".equalsIgnoreCase(source)) + ) { + xlib.setLIBRARYSOURCE(LibraryDescriptorType.LIBRARYSOURCE.Enum.forString(source)); + } else { + xlib.setLIBRARYSOURCE(LibraryDescriptorType.LIBRARYSOURCE.OTHER); + System.out.println("ERROR:value supplied is not compatible with SRA1.5 schema" + source); + } - xlib.setLIBRARYSOURCE(LibraryDescriptorType.LIBRARYSOURCE.TRANSCRIPTOMIC); - xlib.setLIBRARYSTRATEGY(LibraryDescriptorType.LIBRARYSTRATEGY.RNA_SEQ); - xlib.setLIBRARYSELECTION(LibraryDescriptorType.LIBRARYSELECTION.RT_PCR); + if (("WGS".equalsIgnoreCase(strategy)) || ("OTHER".equalsIgnoreCase(strategy))) { + xlib.setLIBRARYSTRATEGY(LibraryDescriptorType.LIBRARYSTRATEGY.Enum.forString(strategy)); + } else { + xlib.setLIBRARYSTRATEGY(LibraryDescriptorType.LIBRARYSTRATEGY.OTHER); + System.out.println("ERROR:value supplied is not compatible with SRA1.5 schema" + strategy); + } + + if (("RANDOM".equalsIgnoreCase(selection)) || ("UNSPECIFIED".equalsIgnoreCase(selection))) { + xlib.setLIBRARYSELECTION(LibraryDescriptorType.LIBRARYSELECTION.Enum.forString(selection)); + } else { + xlib.setLIBRARYSELECTION(LibraryDescriptorType.LIBRARYSELECTION.UNSPECIFIED); + System.out.println("ERROR:value supplied is not compatible with SRA1.5 schema" + selection); + } ProtocolApplication pappIp = getProtocol(assay, "library construction"); if (pappIp == null) { return null; } - - } - if (measurement.equalsIgnoreCase("DNA methylation profiling") && technology.equalsIgnoreCase("nucleotide sequencing")) { - xlib.setLIBRARYSOURCE(LibraryDescriptorType.LIBRARYSOURCE.GENOMIC); + //HERE we handle SRA way of coding targeted genome sequencing using nucleic sequencing. We can automatically set LIBRARY SOURCE to GENOMIC + //same for Strategy and selection however we check against the user input via the ISA file + if ((measurement.equalsIgnoreCase("exome sequencing") && technology.equalsIgnoreCase("nucleotide sequencing"))){ + String source = getParameterValue(assay, papp, "library source", true); String selection = getParameterValue(assay, papp, "library selection", true); String strategy = getParameterValue(assay, papp, "library strategy", true); - //String strategy = getParameterValue(assay, papp, "library strategy", true); + //xlib.setLIBRARYSOURCE(LibraryDescriptorType.LIBRARYSOURCE.GENOMIC); + if (("GENOMIC".equalsIgnoreCase(source)) || + ("GENOMIC SINGLE CELL".equalsIgnoreCase(source)) || + ("METAGENOMIC".equalsIgnoreCase(source)) || + ("OTHER".equalsIgnoreCase(source)) + ) { + xlib.setLIBRARYSOURCE(LibraryDescriptorType.LIBRARYSOURCE.Enum.forString(source)); + } else { + xlib.setLIBRARYSOURCE(LibraryDescriptorType.LIBRARYSOURCE.OTHER); + System.out.println("ERROR:value supplied is not compatible with SRA1.5 schema" + source); + } + + if (("WXS".equalsIgnoreCase(strategy)) || ("OTHER".equalsIgnoreCase(strategy))) { + xlib.setLIBRARYSTRATEGY(LibraryDescriptorType.LIBRARYSTRATEGY.Enum.forString(strategy)); + } else { + xlib.setLIBRARYSTRATEGY(LibraryDescriptorType.LIBRARYSTRATEGY.OTHER); + System.out.println("ERROR:value supplied is not compatible with SRA1.5 schema" + strategy); + } + + if (("RANDOM".equalsIgnoreCase(selection)) || + ("Hybrid Selection".equalsIgnoreCase(selection)) || + ("Reduced Representation".equalsIgnoreCase(selection)) || + ("MDA".equalsIgnoreCase(selection)) || + ("RANDOM PCR".equalsIgnoreCase(selection)) || + ("RANDOM".equalsIgnoreCase(selection)) || + ("PCR".equalsIgnoreCase(selection)) || + ("other".equalsIgnoreCase(selection)) || + ("unspecified".equalsIgnoreCase(selection)) + ) { + xlib.setLIBRARYSELECTION(LibraryDescriptorType.LIBRARYSELECTION.Enum.forString(selection)); + } else { + xlib.setLIBRARYSELECTION(LibraryDescriptorType.LIBRARYSELECTION.UNSPECIFIED); + System.out.println("ERROR:value supplied is not compatible with SRA1.5 schema" + selection); + } + + ProtocolApplication pappIp = getProtocol(assay, "library construction"); + if (pappIp == null) { + return null; + } + } + + + + //HERE we handle SRA way of coding transcription profiling using sequencing. We can automatically set LIBRARY SOURCE to TRANSCRIPTOMIC + //same for Strategy and selection however we check against the user input via the ISA file + if (measurement.equalsIgnoreCase("transcription profiling") && technology.equalsIgnoreCase("nucleotide sequencing")) { + + String source = getParameterValue(assay, papp, "library source", true); + String strategy = getParameterValue(assay, papp, "library strategy", true); + String selection = getParameterValue(assay, papp, "library selection", true); //now checking that the input obtained from parsing ISA is compatible with SRA CV + //xlib.setLIBRARYSOURCE(LibraryDescriptorType.LIBRARYSOURCE.TRANSCRIPTOMIC); + if (("TRANSCRIPTOMIC".equalsIgnoreCase(source)) || + ("TRANSCRIPTOMIC SINGLE CELL".equalsIgnoreCase(source)) || + ("METATRANSCRIPTOMIC".equalsIgnoreCase(source)) || + ("OTHER".equalsIgnoreCase(source)) + ) { + xlib.setLIBRARYSOURCE(LibraryDescriptorType.LIBRARYSOURCE.Enum.forString(source)); + } else { + xlib.setLIBRARYSOURCE(LibraryDescriptorType.LIBRARYSOURCE.OTHER); + System.out.println("ERROR:value supplied is not compatible with SRA1.5 schema" + source); + } + if (("RNA-Seq".equalsIgnoreCase(strategy)) || + ("ssRNA-Seq".equalsIgnoreCase(strategy)) || + ("miRNA-Seq".equalsIgnoreCase(strategy)) || + ("ncRNA-Seq".equalsIgnoreCase(strategy)) || + ("FL-cDNA".equalsIgnoreCase(strategy)) || + ("EST".equalsIgnoreCase(strategy)) || + ("OTHER".equalsIgnoreCase(strategy)) + ) { + xlib.setLIBRARYSTRATEGY(LibraryDescriptorType.LIBRARYSTRATEGY.Enum.forString(strategy)); + } else { + xlib.setLIBRARYSTRATEGY(LibraryDescriptorType.LIBRARYSTRATEGY.OTHER); + System.out.println("ERROR:value supplied is not compatible with SRA1.5 schema" + strategy); + } + //xlib.setLIBRARYSTRATEGY(LibraryDescriptorType.LIBRARYSTRATEGY.RNA_SEQ); + //xlib.setLIBRARYSELECTION(LibraryDescriptorType.LIBRARYSELECTION.RT_PCR); + if (("RT-PCR".equalsIgnoreCase(selection)) || + ("cDNA".equalsIgnoreCase(selection)) || + ("cDNA_randomPriming".equalsIgnoreCase(selection)) || + ("cDNA_oligo_dT".equalsIgnoreCase(selection)) || + ("PolyA".equalsIgnoreCase(selection)) || + ("Oligo-dT".equals(selection)) || + ("Inverse rRNA".equals(selection)) || + ("Inverse rRNA selection".equals(selection)) || + ("CAGE".equals(selection)) || + ("RACE".equals(selection)) || + ("other".equalsIgnoreCase(selection)) + ) { + xlib.setLIBRARYSELECTION(LibraryDescriptorType.LIBRARYSELECTION.Enum.forString(selection)); + } else { + xlib.setLIBRARYSELECTION(LibraryDescriptorType.LIBRARYSELECTION.OTHER); + System.out.println("ERROR:value supplied is not compatible with SRA1.5 schema" + selection); + } + + + ProtocolApplication pappIp = getProtocol(assay, "library construction"); + if (pappIp == null) { + return null; + } + } + //HERE we handle SRA way of coding DNA Methylation profiling using nucleotide sequencing. We can automatically + //set LIBRARY SOURCE to GENOMIC + //but Library Strategy and Selection are supplied by users and need to be checked. + //allowed values: {Bisulfite-Seq,MRE-Seq,MeDIP-Seq,MBD-Seq,OTHER} and {PCR,RANDOM-PCR,HMPR,MF,MSLL,5-methyl + //cytidine antibody,MBD2 protein methyl-CpG binding domain,other,unspecified} respectively + if (measurement.equalsIgnoreCase("DNA methylation profiling") && technology.equalsIgnoreCase("nucleotide sequencing")) { + xlib.setLIBRARYSOURCE(LibraryDescriptorType.LIBRARYSOURCE.GENOMIC); + String source = getParameterValue(assay, papp, "library source", true); + String strategy = getParameterValue(assay, papp, "library strategy", true); + String selection = getParameterValue(assay, papp, "library selection", true); + //now checking that the input obtained from parsing ISA is compatible with SRA CV + //xlib.setLIBRARYSOURCE(LibraryDescriptorType.LIBRARYSOURCE.GENOMIC); //formerly inferred + if (("GENOMIC".equalsIgnoreCase(source)) || + ("GENOMIC SINGLE CELL".equalsIgnoreCase(source)) || + ("METAGENOMIC".equalsIgnoreCase(source)) || + ("OTHER".equalsIgnoreCase(source)) + ) { + xlib.setLIBRARYSOURCE(LibraryDescriptorType.LIBRARYSOURCE.Enum.forString(source)); + } else { + xlib.setLIBRARYSOURCE(LibraryDescriptorType.LIBRARYSOURCE.OTHER); + System.out.println("ERROR:value supplied is not compatible with SRA1.5 schema" + source); + } if (("MRE-Seq".equalsIgnoreCase(strategy)) || ("MeDIP-Seq".equalsIgnoreCase(strategy)) || ("MBD-Seq".equalsIgnoreCase(strategy)) || ("Bisulfite-Seq".equalsIgnoreCase(strategy)) || + ("MNase-Seq".equalsIgnoreCase(strategy)) || + ("MRE-Seq".equalsIgnoreCase(strategy)) || ("OTHER".equalsIgnoreCase(strategy)) ) { - xlib.setLIBRARYSTRATEGY(LibraryDescriptorType.LIBRARYSTRATEGY.Enum.forString(strategy)); } else { xlib.setLIBRARYSTRATEGY(LibraryDescriptorType.LIBRARYSTRATEGY.OTHER); - System.out.println("ERROR:value supplied is not compatible with SRA1.2 schema" + strategy); + System.out.println("ERROR:value supplied is not compatible with SRA1.5 schema" + strategy); } - - //String selection = getParameterValue(assay, papp, "library selection", true); - if (("MF".equalsIgnoreCase(selection)) || ("PCR".equalsIgnoreCase(selection)) || ("HMPR".equalsIgnoreCase(selection)) || + ("MNase".equalsIgnoreCase(selection)) || + ("Restriction Digest".equalsIgnoreCase(selection)) || + ("MF".equalsIgnoreCase(selection)) || ("MSLL".equalsIgnoreCase(selection)) || ("5-methylcytidine antibody".equalsIgnoreCase(selection)) || ("MBD2 protein methyl-CpG binding domain".equals(selection)) || - ("other".equalsIgnoreCase(selection)) + ("other".equalsIgnoreCase(selection)) || + ("unspecified".equalsIgnoreCase(selection)) ) { xlib.setLIBRARYSELECTION(LibraryDescriptorType.LIBRARYSELECTION.Enum.forString(selection)); } else { xlib.setLIBRARYSELECTION(LibraryDescriptorType.LIBRARYSELECTION.OTHER); - System.out.println("ERROR:value supplied is not compatible with SRA1.2 schema" + selection); + System.out.println("ERROR:value supplied is not compatible with SRA1.5 schema" + selection); + } + } + //HERE we handle SRA way of coding Chromatin modifications profiling using nucleotide sequencing. + //but Library Strategy and Selection are supplied by users and need to be checked. + //allowed values: {Bisulfite-Seq,MRE-Seq,MeDIP-Seq,MBD-Seq,OTHER} and {PCR,RANDOM-PCR,HMPR,MF,MSLL,5-methyl + //cytidine antibody,MBD2 protein methyl-CpG binding domain,other,unspecified} respectively + if (measurement.equalsIgnoreCase("chromatin modification profiling") && technology.equalsIgnoreCase("nucleotide sequencing")) { + + String source = getParameterValue(assay, papp, "library source", true); + String strategy = getParameterValue(assay, papp, "library strategy", true); + String selection = getParameterValue(assay, papp, "library selection", true); + + //now checking that the input obtained from parsing ISA is compatible with SRA CV + //xlib.setLIBRARYSOURCE(LibraryDescriptorType.LIBRARYSOURCE.GENOMIC); //formerly inferred + if (("GENOMIC".equalsIgnoreCase(source)) || + ("GENOMIC SINGLE CELL".equalsIgnoreCase(source)) || + ("METAGENOMIC".equalsIgnoreCase(source)) || + ("OTHER".equalsIgnoreCase(source)) + ) { + xlib.setLIBRARYSOURCE(LibraryDescriptorType.LIBRARYSOURCE.Enum.forString(source)); + } else { + xlib.setLIBRARYSOURCE(LibraryDescriptorType.LIBRARYSOURCE.OTHER); + System.out.println("ERROR:value supplied is not compatible with SRA1.5 schema" + source); } + if (("Hi-C".equalsIgnoreCase(strategy)) || + ("Targeted-Capture".equalsIgnoreCase(strategy)) || + ("Tethered Chromatin Conformation Capture".equalsIgnoreCase(strategy)) || + ("ATAC-Seq".equalsIgnoreCase(strategy)) || + ("ChIP-Seq".equalsIgnoreCase(strategy)) || + ("OTHER".equalsIgnoreCase(strategy)) + ) { + xlib.setLIBRARYSTRATEGY(LibraryDescriptorType.LIBRARYSTRATEGY.Enum.forString(strategy)); + } else { + xlib.setLIBRARYSTRATEGY(LibraryDescriptorType.LIBRARYSTRATEGY.OTHER); + System.out.println("ERROR:value supplied is not compatible with SRA1.5 schema" + strategy); + } + + if (("MDA".equalsIgnoreCase(selection)) || + ("Hybrid Selection".equalsIgnoreCase(selection)) || + ("Reduced Representation".equalsIgnoreCase(selection)) || + ("padlock probes capture method".equalsIgnoreCase(selection)) || + ("RANDOM PCR".equalsIgnoreCase(selection)) || + ("RANDOM".equalsIgnoreCase(selection)) || + ("PCR".equalsIgnoreCase(selection)) || + ("other".equalsIgnoreCase(selection)) || + ("unspecified".equalsIgnoreCase(selection)) + ) { + xlib.setLIBRARYSELECTION(LibraryDescriptorType.LIBRARYSELECTION.Enum.forString(selection)); + } else { + xlib.setLIBRARYSELECTION(LibraryDescriptorType.LIBRARYSELECTION.OTHER); + System.out.println("ERROR:value supplied is not compatible with SRA1.5 schema" + selection); + } + } + + + + //HERE we handle SRA way of coding Chromatin modifications profiling using nucleotide sequencing. + //but Library Strategy and Selection are supplied by users and need to be checked. + //allowed values: {Bisulfite-Seq,MRE-Seq,MeDIP-Seq,MBD-Seq,OTHER} and {PCR,RANDOM-PCR,HMPR,MF,MSLL,5-methyl + //cytidine antibody,MBD2 protein methyl-CpG binding domain,other,unspecified} respectively + if (measurement.equalsIgnoreCase("protein-DNA binding site identification profiling") && technology.equalsIgnoreCase("nucleotide sequencing")) { + + String source = getParameterValue(assay, papp, "library source", true); + String strategy = getParameterValue(assay, papp, "library strategy", true); + String selection = getParameterValue(assay, papp, "library selection", true); + + //now checking that the input obtained from parsing ISA is compatible with SRA CV + //xlib.setLIBRARYSOURCE(LibraryDescriptorType.LIBRARYSOURCE.GENOMIC); //formerly inferred + if (("GENOMIC".equalsIgnoreCase(source)) || + ("GENOMIC SINGLE CELL".equalsIgnoreCase(source)) || + ("METAGENOMIC".equalsIgnoreCase(source)) || + ("OTHER".equalsIgnoreCase(source)) + ) { + xlib.setLIBRARYSOURCE(LibraryDescriptorType.LIBRARYSOURCE.Enum.forString(source)); + } else { + xlib.setLIBRARYSOURCE(LibraryDescriptorType.LIBRARYSOURCE.OTHER); + System.out.println("ERROR:value supplied is not compatible with SRA1.5 schema" + source); + } + + if (("Hi-C".equalsIgnoreCase(strategy)) || + ("ChIP-Seq".equalsIgnoreCase(strategy)) || + ("ChIA-PET".equalsIgnoreCase(strategy)) || + ("FAIRE-Seq".equalsIgnoreCase(strategy)) || + ("Targeted-Capture".equalsIgnoreCase(strategy)) || + ("Tethered Chromatin Conformation Capture".equalsIgnoreCase(strategy)) || + ("OTHER".equalsIgnoreCase(strategy)) + ) { + xlib.setLIBRARYSTRATEGY(LibraryDescriptorType.LIBRARYSTRATEGY.Enum.forString(strategy)); + } else { + xlib.setLIBRARYSTRATEGY(LibraryDescriptorType.LIBRARYSTRATEGY.OTHER); + System.out.println("ERROR:value supplied is not compatible with SRA1.5 schema" + strategy); + } + + if (("MDA".equalsIgnoreCase(selection)) || + ("Hybrid Selection".equalsIgnoreCase(selection)) || + ("Reduced Representation".equalsIgnoreCase(selection)) || + ("padlock probes capture method".equalsIgnoreCase(selection)) || + ("RANDOM PCR".equalsIgnoreCase(selection)) || + ("RANDOM".equalsIgnoreCase(selection)) || + ("PCR".equalsIgnoreCase(selection)) || + ("other".equalsIgnoreCase(selection)) || + ("unspecified".equalsIgnoreCase(selection)) + ) { + xlib.setLIBRARYSELECTION(LibraryDescriptorType.LIBRARYSELECTION.Enum.forString(selection)); + } else { + xlib.setLIBRARYSELECTION(LibraryDescriptorType.LIBRARYSELECTION.OTHER); + System.out.println("ERROR:value supplied is not compatible with SRA1.5 schema" + selection); + } + } + + //HERE we handle SRA way of coding Chromatin modifications profiling using nucleotide sequencing. + //but Library Strategy and Selection are supplied by users and need to be checked. + //allowed values: {Bisulfite-Seq,MRE-Seq,MeDIP-Seq,MBD-Seq,OTHER} and {PCR,RANDOM-PCR,HMPR,MF,MSLL,5-methyl + //cytidine antibody,MBD2 protein methyl-CpG binding domain,other,unspecified} respectively + if (measurement.equalsIgnoreCase("protein-RNA binding site identification profiling") && technology.equalsIgnoreCase("nucleotide sequencing")) { + + String source = getParameterValue(assay, papp, "library source", true); + String strategy = getParameterValue(assay, papp, "library strategy", true); + String selection = getParameterValue(assay, papp, "library selection", true); + + //now checking that the input obtained from parsing ISA is compatible with SRA CV + //xlib.setLIBRARYSOURCE(LibraryDescriptorType.LIBRARYSOURCE.GENOMIC); //formerly inferred + if (("TRANSCRIPTOMIC".equalsIgnoreCase(source)) || + ("TRANSCRIPTOMIC SINGLE CELL".equalsIgnoreCase(source)) || + ("METATRANSCRIPTOMIC".equalsIgnoreCase(source)) || + ("OTHER".equalsIgnoreCase(source)) + ) { + xlib.setLIBRARYSOURCE(LibraryDescriptorType.LIBRARYSOURCE.Enum.forString(source)); + } else { + xlib.setLIBRARYSOURCE(LibraryDescriptorType.LIBRARYSOURCE.OTHER); + System.out.println("ERROR:value supplied is not compatible with SRA1.5 schema" + source); + } + + if (("Hi-C".equalsIgnoreCase(strategy)) || + ("RIP-Seq".equalsIgnoreCase(strategy)) || + ("OTHER".equalsIgnoreCase(strategy)) + ) { + xlib.setLIBRARYSTRATEGY(LibraryDescriptorType.LIBRARYSTRATEGY.Enum.forString(strategy)); + } else { + xlib.setLIBRARYSTRATEGY(LibraryDescriptorType.LIBRARYSTRATEGY.OTHER); + System.out.println("ERROR:value supplied is not compatible with SRA1.5 schema" + strategy); + } + + if (("MDA".equalsIgnoreCase(selection)) || + ("Hybrid Selection".equalsIgnoreCase(selection)) || + ("Reduced Representation".equalsIgnoreCase(selection)) || + ("RANDOM PCR".equalsIgnoreCase(selection)) || + ("RANDOM".equalsIgnoreCase(selection)) || + ("PCR".equalsIgnoreCase(selection)) || + ("other".equalsIgnoreCase(selection)) || + ("unspecified".equalsIgnoreCase(selection)) + ) { + xlib.setLIBRARYSELECTION(LibraryDescriptorType.LIBRARYSELECTION.Enum.forString(selection)); + } else { + xlib.setLIBRARYSELECTION(LibraryDescriptorType.LIBRARYSELECTION.OTHER); + System.out.println("ERROR:value supplied is not compatible with SRA1.5 schema" + selection); + } } + //Here, we deal with chromatin remodeling use case, user input via ISA is about library strategy, library selection, library layout if (measurement.equalsIgnoreCase("histone modification profiling") && technology.equalsIgnoreCase("nucleotide sequencing")) { @@ -435,23 +741,19 @@ protected LibraryDescriptorType buildExportedLibraryDescriptor(Assay assay) { if (pappIp == null) { return null; } - //dealing with Chromatin immunoprecipitation requirements in ISA_TAB and dumping those in SRA Library Construction Protocol section String crosslink = getParameterValue(assay, pappIp, "cross linking", true); if (crosslink != null) { protocol.append("\n cross-linking: ").append(crosslink); } - String fragmentation = getParameterValue(assay, pappIp, "DNA fragmentation", true); if (fragmentation != null) { protocol.append("\n DNA fragmentation: ").append(fragmentation); } - String fragsize = getParameterValue(assay, pappIp, "DNA fragment size", true); if (fragsize != null) { protocol.append("\n DNA fragment size: ").append(fragsize); } - String ipAntibody = getParameterValue(assay, pappIp, "immunoprecipitation antibody", true); if (ipAntibody != null) { @@ -468,14 +770,13 @@ protected LibraryDescriptorType buildExportedLibraryDescriptor(Assay assay) { if (interestingbits[3] != null) { protocol.append("\n immunoprecipitation antibody: ").append(interestingbits[3]); } - } } + //Here, we deal with metagenome sequencing use case, SRA library_source is automatically set to METAGENOMICS + //relying on user input via ISA to obtain values for library strategy, library selection and library layout if (measurement.equalsIgnoreCase("metagenome sequencing") && - technology.equalsIgnoreCase("nucleotide sequencing")) { - - // xlib.setLIBRARYSOURCE(LibraryDescriptorType.LIBRARYSOURCE.GENOMIC); + technology.equalsIgnoreCase("nucleotide sequencing")) { String selection = getParameterValue(assay, papp, "library selection", true); String strategy = getParameterValue(assay, papp, "library strategy", true); @@ -497,20 +798,15 @@ protected LibraryDescriptorType buildExportedLibraryDescriptor(Assay assay) { ProtocolApplication pappIp = getProtocol(assay, "library construction"); if (pappIp == null) { - return null; - } - + return null; } + } - - //HERE, we handle the MIMARKS annotation for library construction in environmental gene survey and map those to SRA objects + //HERE, we handle the MIMARKS annotation for library construction in environmental gene survey (aka targeted approach) and map those to SRA objects //reliance on ISA parameters tagged to INSDC codes 'target taxon,target_gene,target_subfragment, mid, - + //deducing the values for source.strategy.selection from ISA assay if (measurement.equalsIgnoreCase("environmental gene survey") && technology.equalsIgnoreCase("nucleotide sequencing")) { - - //deducing the values for source.strategy.selection from ISA assay - xlib.setLIBRARYSOURCE(LibraryDescriptorType.LIBRARYSOURCE.METAGENOMIC); xlib.setLIBRARYSTRATEGY(LibraryDescriptorType.LIBRARYSTRATEGY.AMPLICON); xlib.setLIBRARYSELECTION(LibraryDescriptorType.LIBRARYSELECTION.PCR); @@ -520,38 +816,28 @@ protected LibraryDescriptorType buildExportedLibraryDescriptor(Assay assay) { if (pBibRef != null) { protocol.append("\n "+INSDC.nuclAcidAmp[0]+": ").append(pBibRef); } - String pUrl = getParameterValue(assay, papp, "url", false); if (pUrl != null) { protocol.append("\n url: ").append(pUrl); - } - String targetTaxon = ""; targetTaxon = getParameterValue(assay, papp, INSDC.targetTaxon, false); if (targetTaxon != null) { protocol.append("\n "+INSDC.targetTaxon[0]+": ").append(targetTaxon); xlib.setLIBRARYNAME(assay.getAcc() + "_" + targetTaxon); } - String targetGene = getParameterValue(assay, papp, INSDC.targetGene, true); if (targetGene != null) { protocol.append("\n "+INSDC.targetGene[0]+": ").append(targetGene); } - String targetSubfrag = getParameterValue(assay, papp, INSDC.targetSubfragment, true); if (targetSubfrag != null) { protocol.append("\n target_subfragment: ").append(targetSubfrag); } - String pcrPrimers = getParameterValue(assay, papp, INSDC.pcrPrimers, true); if (pcrPrimers != null) { protocol.append("\n pcr_primers: ").append(pcrPrimers.replaceAll("=", ":")); } - - - - String pcrConditions = getParameterValue(assay, papp, INSDC.pcrCond, true); if (pcrConditions != null) { protocol.append("\n pcr_cond: ").append(pcrConditions.replaceAll("=", ":")); @@ -577,36 +863,36 @@ protected LibraryDescriptorType buildExportedLibraryDescriptor(Assay assay) { locusXref.setID("pcrPrimersXref"); locusXref.setDB("PubMed"); } - + if (locus != null) { if (locus.toLowerCase().contains("16s")) { xlocus.setLocusName(LibraryDescriptorType.TARGETEDLOCI.LOCUS.LocusName.X_16_S_R_RNA); - // xlocus.setPROBESET(locusXref); + // xlocus.setPROBESET(locusXref); } else if (locus.toLowerCase().contains("18s")) { xlocus.setLocusName(LibraryDescriptorType.TARGETEDLOCI.LOCUS.LocusName.X_18_S_R_RNA); - // xlocus.setPROBESET(locusXref); + // xlocus.setPROBESET(locusXref); } else if (locus.toLowerCase().contains("cox")) { xlocus.setLocusName(LibraryDescriptorType.TARGETEDLOCI.LOCUS.LocusName.COX_1); - // xlocus.setPROBESET(locusXref); + // xlocus.setPROBESET(locusXref); } else if (locus.toLowerCase().contains("its")) { xlocus.setLocusName(LibraryDescriptorType.TARGETEDLOCI.LOCUS.LocusName.ITS_1_5_8_S_ITS_2); - // xlocus.setPROBESET(locusXref); + // xlocus.setPROBESET(locusXref); } else if (locus.toLowerCase().contains("matk")) { xlocus.setLocusName(LibraryDescriptorType.TARGETEDLOCI.LOCUS.LocusName.MAT_K); - // xlocus.setPROBESET(locusXref); + // xlocus.setPROBESET(locusXref); } else if (locus.toLowerCase().contains("rbcl")) { xlocus.setLocusName(LibraryDescriptorType.TARGETEDLOCI.LOCUS.LocusName.RBCL); - // xlocus.setPROBESET(locusXref); + // xlocus.setPROBESET(locusXref); } else { xlocus.setLocusName(LibraryDescriptorType.TARGETEDLOCI.LOCUS.LocusName.OTHER); - // xlocus.setDescription(locus); + // xlocus.setDescription(locus); } } @@ -614,7 +900,6 @@ else if (locus.toLowerCase().contains("rbcl")) { xtargetedloci.setLOCUSArray(xlocusArray); xlib.setTARGETEDLOCI(xtargetedloci); - } @@ -665,7 +950,7 @@ else if (locus.toLowerCase().contains("rbcl")) { xlib.setPOOLINGSTRATEGY(xpoolingstrategy.getStringValue()); } - return xlib; + return xlib; } @@ -813,7 +1098,7 @@ protected SpotDescriptorType buildExportedSpotDescriptor(Assay assay, Map 0) { - return spotDecodeSpec.getREADSPECArray(); + return spotDecodeSpec.getREADSPECArray(); } return null; } @@ -964,6 +1249,7 @@ protected PlatformType buildExportedPlatform(final Assay assay) { //if we can detect which instrument it was that is consistent with SRA schema if (sequencinginst.equalsIgnoreCase("454 GS") || sequencinginst.equalsIgnoreCase("454 GS FLX") || + sequencinginst.equalsIgnoreCase("454 GS FLX+") || sequencinginst.equalsIgnoreCase("454 GS 20") || sequencinginst.equalsIgnoreCase("454 GS FLX Titanium") || sequencinginst.equalsIgnoreCase("454 GS Junior")) { @@ -978,7 +1264,7 @@ protected PlatformType buildExportedPlatform(final Assay assay) { //ls454.setFLOWSEQUENCE("TACG"); //ls454.setFLOWCOUNT(BigInteger.valueOf(800)); xplatform.setLS454(ls454); - } else if (sequencinginst.toLowerCase().contains("illumina")) { + } else if (sequencinginst.toLowerCase().contains("illumina") || sequencinginst.toLowerCase().contains("HiSeq") || sequencinginst.toLowerCase().contains("NextSeq")) { PlatformType.ILLUMINA illumina = PlatformType.ILLUMINA.Factory.newInstance(); @@ -987,13 +1273,20 @@ protected PlatformType buildExportedPlatform(final Assay assay) { sequencinginst.equalsIgnoreCase("Illumina Genome Analyzer II") || sequencinginst.equalsIgnoreCase("Illumina Genome Analyzer IIx") || sequencinginst.equalsIgnoreCase("Illumina HiScanSQ") || + sequencinginst.equalsIgnoreCase("Illumina HiSeq 4000") || + sequencinginst.equalsIgnoreCase("Illumina HiSeq 3000") || sequencinginst.equalsIgnoreCase("Illumina HiSeq 2500") || sequencinginst.equalsIgnoreCase("Illumina HiSeq 2000") || + sequencinginst.equalsIgnoreCase("Illumina HiSeq 1500") || sequencinginst.equalsIgnoreCase("Illumina HiSeq 1000") || - sequencinginst.equalsIgnoreCase("Illumina MiSeq")) { - + sequencinginst.equalsIgnoreCase("Illumina HiScanSQ") || + sequencinginst.equalsIgnoreCase("Illumina MiSeq") || + sequencinginst.equalsIgnoreCase("HiSeq X Five") || + sequencinginst.equalsIgnoreCase("HiSeq X Ten") || + sequencinginst.equalsIgnoreCase("NextSeq 500") || + sequencinginst.equalsIgnoreCase("NextSeq 550")) + { illumina.setINSTRUMENTMODEL(PlatformType.ILLUMINA.INSTRUMENTMODEL.Enum.forString(sequencinginst)); - } //otherwise, we fall back on the 'unspecified' value to avoid falling over else { @@ -1009,7 +1302,6 @@ protected PlatformType buildExportedPlatform(final Assay assay) { PlatformType.HELICOS helicos = PlatformType.HELICOS.Factory.newInstance(); if (sequencinginst.equalsIgnoreCase("Helicos HeliScope")) { - helicos.setINSTRUMENTMODEL(PlatformType.HELICOS.INSTRUMENTMODEL.Enum.forString(sequencinginst)); } else { helicos.setINSTRUMENTMODEL(PlatformType.HELICOS.INSTRUMENTMODEL.Enum.forString("unspecified")); @@ -1019,7 +1311,6 @@ protected PlatformType buildExportedPlatform(final Assay assay) { //helicos.setFLOWCOUNT(new BigInteger(checkNumericParameter(getParameterValue(assay, pApp, "Flow Count", true)))); xplatform.setHELICOS(helicos); - } else if (sequencinginst.toLowerCase().contains("ion torrent")) { PlatformType.IONTORRENT iontorrent = PlatformType.IONTORRENT.Factory.newInstance(); @@ -1033,9 +1324,13 @@ protected PlatformType buildExportedPlatform(final Assay assay) { xplatform.setIONTORRENT(iontorrent); + } else if (sequencinginst.equalsIgnoreCase("MinION") || sequencinginst.equalsIgnoreCase("GridION") ) { + PlatformType.OXFORDNANOPORE oxfordnanopore = PlatformType.OXFORDNANOPORE.Factory.newInstance(); + oxfordnanopore.setINSTRUMENTMODEL(PlatformType.OXFORDNANOPORE.INSTRUMENTMODEL.Enum.forString(sequencinginst)); + xplatform.setOXFORDNANOPORE(oxfordnanopore); } - else if (sequencinginst.toLowerCase().contains("solid")) { + else if (sequencinginst.toLowerCase().contains("AB ")) { PlatformType.ABISOLID abisolid = PlatformType.ABISOLID.Factory.newInstance(); @@ -1049,7 +1344,8 @@ else if (sequencinginst.toLowerCase().contains("solid")) { sequencinginst.equalsIgnoreCase("AB SOLiD 5500") || sequencinginst.equalsIgnoreCase("AB SOLiD 5500xl") || sequencinginst.equalsIgnoreCase("AB 5500 Genetic Analyzer") || - sequencinginst.equalsIgnoreCase("AB 5500xl Genetic Analyzer") + sequencinginst.equalsIgnoreCase("AB 5500xl Genetic Analyzer") || + sequencinginst.equalsIgnoreCase("AB 5500xl-W Genetic Analysis System") ) { abisolid.setINSTRUMENTMODEL(PlatformType.ABISOLID.INSTRUMENTMODEL.Enum.forString(sequencinginst)); @@ -1058,23 +1354,23 @@ else if (sequencinginst.toLowerCase().contains("solid")) { } //{ - //String colorMatrix = getParameterValue(assay, pApp, "Color Matrix", false); - // single dibase colours are semicolon-separated - //if (colorMatrix != null) { - //PlatformType.ABISOLID.COLORMATRIX xcolorMatrix = PlatformType.ABISOLID.COLORMATRIX.Factory.newInstance(); - //String dibases[] = colorMatrix.split("\\;"); - //if (dibases != null && dibases.length > 0) { - //PlatformType.ABISOLID.COLORMATRIX.COLOR xcolors[] = new PlatformType.ABISOLID.COLORMATRIX.COLOR[dibases.length]; - //int i = 0; - //for (String dibase : dibases) { - //PlatformType.ABISOLID.COLORMATRIX.COLOR xcolor = PlatformType.ABISOLID.COLORMATRIX.COLOR.Factory.newInstance(); - //xcolor.setDibase(dibase); - //xcolors[i++] = xcolor; - //} - //xcolorMatrix.setCOLORArray(xcolors); - //abisolid.setCOLORMATRIX(xcolorMatrix); - //} - //} + //String colorMatrix = getParameterValue(assay, pApp, "Color Matrix", false); + // single dibase colours are semicolon-separated + //if (colorMatrix != null) { + //PlatformType.ABISOLID.COLORMATRIX xcolorMatrix = PlatformType.ABISOLID.COLORMATRIX.Factory.newInstance(); + //String dibases[] = colorMatrix.split("\\;"); + //if (dibases != null && dibases.length > 0) { + //PlatformType.ABISOLID.COLORMATRIX.COLOR xcolors[] = new PlatformType.ABISOLID.COLORMATRIX.COLOR[dibases.length]; + //int i = 0; + //for (String dibase : dibases) { + //PlatformType.ABISOLID.COLORMATRIX.COLOR xcolor = PlatformType.ABISOLID.COLORMATRIX.COLOR.Factory.newInstance(); + //xcolor.setDibase(dibase); + //xcolors[i++] = xcolor; + //} + //xcolorMatrix.setCOLORArray(xcolors); + //abisolid.setCOLORMATRIX(xcolorMatrix); + //} + //} //} //{ diff --git a/import_layer/src/main/java/org/isatools/isatab/export/sra/SraExporter.java b/import_layer/src/main/java/org/isatools/isatab/export/sra/SraExporter.java index a752ade..15d5288 100644 --- a/import_layer/src/main/java/org/isatools/isatab/export/sra/SraExporter.java +++ b/import_layer/src/main/java/org/isatools/isatab/export/sra/SraExporter.java @@ -66,32 +66,18 @@ The ISAconverter, ISAvalidator & BII Management Tool are licensed under the Mozi import uk.ac.ebi.bioinvindex.model.term.Design; import uk.ac.ebi.bioinvindex.model.term.PublicationStatus; import uk.ac.ebi.bioinvindex.utils.datasourceload.DataLocationManager; - -import uk.ac.ebi.embl.era.sra.xml.AttributeType; -import uk.ac.ebi.embl.era.sra.xml.EXPERIMENTSETDocument; -import uk.ac.ebi.embl.era.sra.xml.ExperimentSetType; -import uk.ac.ebi.embl.era.sra.xml.LinkType; +import uk.ac.ebi.embl.era.sra.xml.*; import uk.ac.ebi.embl.era.sra.xml.LinkType.ENTREZLINK; import uk.ac.ebi.embl.era.sra.xml.LinkType.URLLINK; -import uk.ac.ebi.embl.era.sra.xml.RUNSETDocument; -import uk.ac.ebi.embl.era.sra.xml.RunSetType; -import uk.ac.ebi.embl.era.sra.xml.SAMPLESETDocument; -import uk.ac.ebi.embl.era.sra.xml.STUDYDocument; -import uk.ac.ebi.embl.era.sra.xml.SUBMISSIONDocument; -import uk.ac.ebi.embl.era.sra.xml.SampleSetType; -import uk.ac.ebi.embl.era.sra.xml.StudyType; import uk.ac.ebi.embl.era.sra.xml.StudyType.DESCRIPTOR.STUDYTYPE; import uk.ac.ebi.embl.era.sra.xml.StudyType.DESCRIPTOR.STUDYTYPE.ExistingStudyType; import uk.ac.ebi.embl.era.sra.xml.StudyType.STUDYATTRIBUTES; import uk.ac.ebi.embl.era.sra.xml.StudyType.STUDYLINKS; -import uk.ac.ebi.embl.era.sra.xml.SubmissionType; import uk.ac.ebi.embl.era.sra.xml.SubmissionType.ACTIONS; import uk.ac.ebi.embl.era.sra.xml.SubmissionType.ACTIONS.ACTION; import uk.ac.ebi.embl.era.sra.xml.SubmissionType.ACTIONS.ACTION.ADD; import uk.ac.ebi.embl.era.sra.xml.SubmissionType.ACTIONS.ACTION.MODIFY; import uk.ac.ebi.embl.era.sra.xml.SubmissionType.ACTIONS.ACTION.VALIDATE; -import uk.ac.ebi.embl.era.sra.xml.SubmissionType.ACTIONS.ACTION.HOLD; -import uk.ac.ebi.embl.era.sra.xml.SubmissionType.ACTIONS.ACTION.CANCEL; import uk.ac.ebi.embl.era.sra.xml.SubmissionType.CONTACTS; import uk.ac.ebi.embl.era.sra.xml.SubmissionType.CONTACTS.CONTACT; @@ -178,26 +164,28 @@ public void export() { xsubmission.setAlias(studyAcc); - //brokerName = StringUtils.trimToNull(study.getSingleAnnotationValue("comment:SRA Broker Name")); + brokerName = StringUtils.trimToNull(study.getSingleAnnotationValue("comment:SRA Broker Name")); if (brokerName == null) { brokerName = "ISAcreator"; -// log.warn(MessageFormat.format( -// "The study ''{0}'' has no 'SRA Broker Name'", -// study.getAcc() -// )); + xsubmission.setBrokerName(brokerName); + log.warn(MessageFormat.format( + "The study ''{0}'' has no 'SRA Broker Name'", + study.getAcc() + )); } else { xsubmission.setBrokerName(brokerName); } - String labName = null; //StringUtils.trimToNull(study.getSingleAnnotationValue("comment:SRA Lab Name")); + //String labName = null; + String labName = StringUtils.trimToNull(study.getSingleAnnotationValue("comment:SRA Lab Name")); if (labName == null) { -// labName=centerName; -// xsubmission.setLabName(labName); + labName=centerName; + xsubmission.setLabName(labName); -// log.warn(MessageFormat.format( -// "The study ''{0}'' has no 'SRA Lab Name'", -// study.getAcc() -// )); + log.warn(MessageFormat.format( + "The study ''{0}'' has no 'SRA Lab Name'", + study.getAcc() + )); } else { xsubmission.setLabName(centerName); } @@ -221,7 +209,7 @@ public void export() { //final int minFiles = xsubFiles.sizeOfFILEArray(); - STUDYDocument xstudyDoc = null; + STUDYSETDocument xstudyDoc = null; boolean isAssayOk = true; @@ -327,16 +315,17 @@ public void export() { * * @return the SRA STUDY element that is to be used to build the corresponding XML study file. */ - private STUDYDocument buildExportedStudy(Study study) { + private STUDYSETDocument buildExportedStudy(Study study) { final String studyAcc = study.getAcc(); final Investigation investigation = study.getUniqueInvestigation(); XmlOptions xmlOptions = new XmlOptions(); xmlOptions.setSaveNamespacesFirst(); - - STUDYDocument xstudyDoc = STUDYDocument.Factory.newInstance(xmlOptions); - StudyType xstudy = StudyType.Factory.newInstance(); + STUDYSETDocument xstudyDoc = STUDYSETDocument.Factory.newInstance(xmlOptions); + StudySetType xstudySet = StudySetType.Factory.newInstance(); +// STUDYDocument xstudyDoc = STUDYDocument.Factory.newInstance(xmlOptions); + StudyType xstudy = xstudySet.addNewSTUDY(); xstudy.setAlias(studyAcc); StudyType.DESCRIPTOR xdescriptor = StudyType.DESCRIPTOR.Factory.newInstance(); @@ -364,6 +353,12 @@ private STUDYDocument buildExportedStudy(Study study) { if (title != null) { xdescriptor.setSTUDYTITLE(title); } + else { + throw new TabMissingValueException(MessageFormat.format( + "The study ''{0}'' has no 'Study Title', cannot export to SRA format", + study.getAcc() + )); + } String studyAbstract = StringUtils.trimToNull(study.getDescription()); if (studyAbstract != null) { @@ -489,7 +484,8 @@ private STUDYDocument buildExportedStudy(Study study) { xstudy.setDESCRIPTOR(xdescriptor); } - xstudyDoc.setSTUDY(xstudy); +// xstudyDoc.setSTUDY(xstudy); + xstudyDoc.setSTUDYSET(xstudySet); return xstudyDoc; }