AMIXED | 0 | ...
*
*
- * @param line the line from the profile data file to be parsed
- * @param nrColumns the number of columns, defined by the header line
- * @param sampleStartIndex the index of the first column with a sample name in the header field
- * @param hugoSymbolIndex the index of the column Hugo_Symbol
- * @param entrezGeneIdIndex the index of the column Entrez_Gene_Id
- * @param rppaGeneRefIndex the index of the column Composite.Element.Ref
* @param isRppaProfile true if this is an rppa profile (i.e. alteration type is PROTEIN_LEVEL and the first column is Composite.Element.Ref)
* @param isDiscretizedCnaProfile true if this is a discretized CNA profile (i.e. alteration type COPY_NUMBER_ALTERATION and showProfileInAnalysisTab is true)
- * @param daoGene an instance of DaoGeneOptimized ... for use in resolving gene symbols
- * @param orderedSampleList a list of the internal sample ids corresponding to the sample names in the header line
* @param existingCnaEvents a collection of CnaEvents, to be added to or updated during parsing of individual lines
* @return true if any record was stored in genetic_alteration, else false
* @throws DaoException if any DaoException is thrown while using daoGene or daoGeneticAlteration
*/
- private boolean parseLine(String line, int nrColumns, int sampleStartIndex,
- int hugoSymbolIndex, int entrezGeneIdIndex, int rppaGeneRefIndex,
- boolean isRppaProfile, boolean isDiscretizedCnaProfile,
- DaoGeneOptimized daoGene,
- List filteredSampleIndices, List orderedSampleList,
- Set existingCnaEvents
+ private boolean saveLine(String[] values,
+ String entrez,
+ String geneSymbol,
+ boolean isRppaProfile,
+ boolean isDiscretizedCnaProfile,
+ Set existingCnaEvents
) throws DaoException {
- //TODO: refactor this entire function - split functionality into smaller units / subroutines
-
boolean recordStored = false;
- // Ignore lines starting with #
- if (!line.startsWith("#") && line.trim().length() > 0) {
- String[] parts = line.split("\t", -1);
+ if (isRppaProfile && geneSymbol == null) {
+ ProgressMonitor.logWarning("Ignoring line with no Composite.Element.REF value");
+ return false;
+ }
- if (parts.length > nrColumns) {
- if (line.split("\t").length > nrColumns) {
- ProgressMonitor.logWarning("Ignoring line with more fields (" + parts.length
- + ") than specified in the headers(" + nrColumns + "): \n" + parts[0]);
- return false;
- }
- }
- String values[] = (String[]) ArrayUtils.subarray(parts, sampleStartIndex, parts.length > nrColumns ? nrColumns : parts.length);
- values = filterOutNormalValues(filteredSampleIndices, values);
+ //If all are empty, skip line:
+ boolean noGeneSpecified = geneSymbol == null && entrez == null;
+ if (noGeneSpecified) {
+ ProgressMonitor.logWarning("Ignoring line with no Hugo_Symbol and no Entrez_Id");
+ return false;
+ }
- String geneSymbol = null;
- if (hugoSymbolIndex != -1) {
- geneSymbol = parts[hugoSymbolIndex];
- }
- //RPPA: //TODO - we should split up the RPPA scenario from this code...too many if/else because of this
- if (rppaGeneRefIndex != -1) {
- geneSymbol = parts[rppaGeneRefIndex];
- }
- if (geneSymbol != null && geneSymbol.isEmpty()) {
- geneSymbol = null;
+ if (geneSymbol != null) {
+ boolean multipleGenesLine = geneSymbol.contains("///");
+ if (multipleGenesLine) {
+ ProgressMonitor.logWarning("Ignoring gene symbol: " + geneSymbol
+ + " It is separated by ///. This indicates that the line contains information regarding multiple genes, and we cannot currently handle this");
+ return false;
}
- if (isRppaProfile && geneSymbol == null) {
- ProgressMonitor.logWarning("Ignoring line with no Composite.Element.REF value");
+ boolean unknownGene = geneSymbol.contains("---");
+ if (unknownGene) {
+ ProgressMonitor.logWarning("Ignoring gene symbol: " + geneSymbol
+ + " It is specified as ---. This indicates that the line contains information regarding an unknown gene, and we cannot currently handle this");
return false;
}
- //get entrez
- String entrez = null;
- if (entrezGeneIdIndex != -1) {
- entrez = parts[entrezGeneIdIndex];
+ }
+
+ List genes;
+ //If rppa, parse genes from "Composite.Element.REF" column:
+ if (isRppaProfile) {
+ genes = parseRPPAGenes(geneSymbol);
+ } else {
+ genes = parseGenes(entrez, geneSymbol);
+ }
+
+ //if genes still null, skip current record
+ if (genes == null || genes.isEmpty()) {
+ ProgressMonitor.logWarning("Gene with Entrez_Id " + entrez + " and gene symbol" + geneSymbol +" not found. Record will be skipped for this gene.");
+ return false;
+ }
+
+ List genesMatchingAnAlias = Collections.emptyList();
+ if (geneSymbol != null) {
+ genesMatchingAnAlias = daoGene.getGenesForAlias(geneSymbol);
+ }
+
+ Set microRNAGenes = new HashSet<>();
+ Set nonMicroRNAGenes = new HashSet<>();
+ Iterator geneIterator = Stream.concat(genes.stream(), genesMatchingAnAlias.stream()).iterator();
+ while (geneIterator.hasNext()) {
+ CanonicalGene g = geneIterator.next();
+ if ("miRNA".equals(g.getType())) {
+ microRNAGenes.add(g);
+ } else {
+ nonMicroRNAGenes.add(g);
}
- if (entrez != null) {
- if (entrez.isEmpty()) {
- entrez = null;
- }
- else if (!entrez.matches("[0-9]+")) {
- //TODO - would be better to give an exception in some cases, like negative Entrez values
- ProgressMonitor.logWarning("Ignoring line with invalid Entrez_Id " + entrez);
- return false;
+ }
+ if (!microRNAGenes.isEmpty()) {
+ // for micro rna, duplicate the data
+ for (CanonicalGene gene : microRNAGenes) {
+ if (this.geneticAlterationImporter.store(values, gene, geneSymbol)) {
+ recordStored = true;
}
}
-
- //If all are empty, skip line:
- if (geneSymbol == null && entrez == null) {
- ProgressMonitor.logWarning("Ignoring line with no Hugo_Symbol and no Entrez_Id");
+ if (!recordStored) {
+ if (nonMicroRNAGenes.isEmpty()) {
+ // this means that no microRNA records could not be stored
+ ProgressMonitor.logWarning("Could not store microRNA data");
+ } else {
+ // this case :
+ // - at least one of the entrez-gene-ids was not a microRNA
+ // - all of the matched microRNA ids (if any) failed to be imported (presumably already imported on a prior line)
+ ProgressMonitor.logWarning("Gene symbol " + geneSymbol + " found to be ambiguous (a mixture of microRNA and other types). Record will be skipped for this gene.");
+ }
return false;
+ }
+ } else {
+ // none of the matched genes are type "miRNA"
+ if (genes.size() == 1) {
+ // Store all values per gene:
+ recordStored = this.geneticAlterationImporter.store(values, genes.get(0), geneSymbol);
+ //only add extra CNA related records if the step above worked, otherwise skip:
+ if (recordStored && isDiscretizedCnaProfile) {
+ if (isIncrementalUpdateMode) {
+ DaoCnaEvent.removeSampleCnaEvents(geneticProfileId, orderedSampleList);
+ }
+ long entrezGeneId = genes.get(0).getEntrezGeneId();
+ CnaUtil.storeCnaEvents(existingCnaEvents, composeCnaEventsToAdd(values, entrezGeneId));
+ }
} else {
- if (geneSymbol != null && (geneSymbol.contains("///") || geneSymbol.contains("---"))) {
- // Ignore gene IDs separated by ///. This indicates that
- // the line contains information regarding multiple genes, and
- // we cannot currently handle this.
- // Also, ignore gene IDs that are specified as ---. This indicates
- // the line contains information regarding an unknown gene, and
- // we cannot currently handle this.
- ProgressMonitor.logWarning("Ignoring gene ID: " + geneSymbol);
- return false;
+ if (isRppaProfile) { // for protein data, duplicate the data
+ recordStored = saveRppaValues(values, recordStored, genes, geneSymbol);
} else {
- List genes = null;
- //If rppa, parse genes from "Composite.Element.REF" column:
- if (isRppaProfile) {
- genes = parseRPPAGenes(geneSymbol);
- if (genes == null) {
- //will be null when there is a parse error in this case, so we
- //can return here and avoid duplicated messages:
- return false;
- }
- if (genes.isEmpty()) {
- String gene = (geneSymbol != null) ? geneSymbol : entrez;
- ProgressMonitor.logWarning("Gene not found for: [" + gene
- + "]. Ignoring it "
- + "and all tab-delimited data associated with it!");
- return false;
- }
- } else {
- //try entrez:
- if (entrez != null) {
- CanonicalGene gene = daoGene.getGene(Long.parseLong(entrez));
- if (gene != null) {
- genes = Arrays.asList(gene);
- }
- }
- //no entrez or could not resolve by entrez, try hugo:
- if ((genes == null || genes.isEmpty()) && geneSymbol != null) {
- // deal with multiple symbols separate by |, use the first one
- int ix = geneSymbol.indexOf("|");
- if (ix > 0) {
- geneSymbol = geneSymbol.substring(0, ix);
- }
- genes = daoGene.getGene(geneSymbol, true);
- }
- //if genes still null, skip current record
- if (genes == null || genes.isEmpty()) {
- ProgressMonitor.logWarning("Entrez_Id " + entrez + " not found. Record will be skipped for this gene.");
- return false;
- }
+ if (!recordStored) {
+ // this case :
+ // - the hugo gene symbol was ambiguous (matched multiple entrez-gene-ids)
+ ProgressMonitor.logWarning("Gene symbol " + geneSymbol + " found to be ambiguous. Record will be skipped for this gene.");
}
+ }
+ }
+ }
+ return recordStored;
+ }
- // If targetLine is specified and does not match the current line, skip the current line.
- if (targetLine != null && !(parts[0].equals(targetLine))) {
- return false;
- }
+ private boolean saveRppaValues(String[] values, boolean recordStored, List genes, String geneSymbol) throws DaoException {
+ for (CanonicalGene gene : genes) {
+ if (this.geneticAlterationImporter.store(values, gene, geneSymbol)) {
+ recordStored = true;
+ nrExtraRecords++;
+ }
+ }
+ if (recordStored) {
+ //skip one, to avoid double counting:
+ nrExtraRecords--;
+ } else {
+ // this means that RPPA could not be stored
+ ProgressMonitor.logWarning("Could not store RPPA data");
+ }
+ return recordStored;
+ }
- List genesMatchingAnAlias = Collections.emptyList();
- if (geneSymbol != null) {
- genesMatchingAnAlias = daoGene.getGenesForAlias(geneSymbol);
- }
+ private List parseGenes(String entrez, String geneSymbol) {
+ //try entrez:
+ if (entrez != null) {
+ CanonicalGene gene = daoGene.getGene(Long.parseLong(entrez));
+ if (gene != null) {
+ return Arrays.asList(gene);
+ }
+ }
+ //no entrez or could not resolve by entrez, try hugo:
+ if (geneSymbol != null) {
+ // deal with multiple symbols separate by |, use the first one
+ int ix = geneSymbol.indexOf("|");
+ if (ix > 0) {
+ geneSymbol = geneSymbol.substring(0, ix);
+ }
+ return daoGene.getGene(geneSymbol, true);
+ }
+ return List.of();
+ }
- Set microRNAGenes = new HashSet<>();
- Set nonMicroRNAGenes = new HashSet<>();
- Iterator geneIterator = Stream.concat(genes.stream(), genesMatchingAnAlias.stream()).iterator();
- while (geneIterator.hasNext()) {
- CanonicalGene g = geneIterator.next();
- if ("miRNA".equals(g.getType())) {
- microRNAGenes.add(g);
- } else {
- nonMicroRNAGenes.add(g);
- }
- }
- if (!microRNAGenes.isEmpty()) {
- // for micro rna, duplicate the data
- for (CanonicalGene gene : microRNAGenes) {
- if (this.geneticAlterationImporter.store(values, gene, geneSymbol)) {
- recordStored = true;
- }
- }
- if (!recordStored) {
- if (nonMicroRNAGenes.isEmpty()) {
- // this means that no microRNA records could not be stored
- ProgressMonitor.logWarning("Could not store microRNA data");
- } else {
- // this case :
- // - at least one of the entrez-gene-ids was not a microRNA
- // - all of the matched microRNA ids (if any) failed to be imported (presumably already imported on a prior line)
- ProgressMonitor.logWarning("Gene symbol " + geneSymbol + " found to be ambiguous (a mixture of microRNA and other types). Record will be skipped for this gene.");
- }
- return false;
- }
- } else {
- // none of the matched genes are type "miRNA"
- if (genes.size() == 1) {
- List cnaEventsToAdd = new ArrayList();
-
- if (isDiscretizedCnaProfile) {
- long entrezGeneId = genes.get(0).getEntrezGeneId();
- for (int i = 0; i < values.length; i++) {
-
- // temporary solution -- change partial deletion back to full deletion.
- if (values[i].equals(CNA_VALUE_PARTIAL_DELETION)) {
- values[i] = CNA_VALUE_HOMOZYGOUS_DELETION;
- }
- if (values[i].equals(CNA_VALUE_AMPLIFICATION)
- // || values[i].equals(CNA_VALUE_GAIN) >> skipping GAIN, ZERO, HEMIZYGOUS_DELETION to minimize size of dataset in DB
- // || values[i].equals(CNA_VALUE_ZERO)
- // || values[i].equals(CNA_VALUE_HEMIZYGOUS_DELETION)
- || values[i].equals(CNA_VALUE_HOMOZYGOUS_DELETION)
- ) {
- Integer sampleId = orderedSampleList.get(i);
- CnaEvent cnaEvent = new CnaEvent(sampleId, geneticProfileId, entrezGeneId, Short.parseShort(values[i]));
- //delayed add:
- AbstractMap.SimpleEntry sampleGenePair = new AbstractMap.SimpleEntry<>(sampleId, entrezGeneId);
- Map pdAnnotationDetails = this.pdAnnotations.get(sampleGenePair);
- if (pdAnnotationDetails != null) {
- cnaEvent.setDriverFilter(pdAnnotationDetails.get("DRIVER_FILTER"));
- cnaEvent.setDriverFilterAnnotation(pdAnnotationDetails.get("DRIVER_FILTER_ANNOTATION"));
- cnaEvent.setDriverTiersFilter(pdAnnotationDetails.get("DRIVER_TIERS_FILTER"));
- cnaEvent.setDriverTiersFilterAnnotation(pdAnnotationDetails.get("DRIVER_TIERS_FILTER_ANNOTATION"));
- }
- cnaEventsToAdd.add(cnaEvent);
- }
- }
- }
- // Store all values per gene:
- recordStored = this.geneticAlterationImporter.store(values, genes.get(0), geneSymbol);
- //only add extra CNA related records if the step above worked, otherwise skip:
- if (recordStored) {
- CnaUtil.storeCnaEvents(existingCnaEvents, cnaEventsToAdd);
- }
- } else {
- if (isRppaProfile) { // for protein data, duplicate the data
- for (CanonicalGene gene : genes) {
- if (this.geneticAlterationImporter.store(values, gene, geneSymbol)) {
- recordStored = true;
- nrExtraRecords++;
- }
- }
- if (recordStored) {
- //skip one, to avoid double counting:
- nrExtraRecords--;
- } else {
- // this means that RPPA could not be stored
- ProgressMonitor.logWarning("Could not store RPPA data");
- }
- } else {
- if (!recordStored) {
- // this case :
- // - the hugo gene symbol was ambiguous (matched multiple entrez-gene-ids)
- ProgressMonitor.logWarning("Gene symbol " + geneSymbol + " found to be ambiguous. Record will be skipped for this gene.");
- }
- }
- }
- }
+ private List composeCnaEventsToAdd(String[] values, long entrezGeneId) {
+ List cnaEventsToAdd = new ArrayList();
+ for (int i = 0; i < values.length; i++) {
+
+ // temporary solution -- change partial deletion back to full deletion.
+ if (values[i].equals(CNA_VALUE_PARTIAL_DELETION)) {
+ values[i] = CNA_VALUE_HOMOZYGOUS_DELETION;
+ }
+ if (values[i].equals(CNA_VALUE_AMPLIFICATION)
+ // || values[i].equals(CNA_VALUE_GAIN) >> skipping GAIN, ZERO, HEMIZYGOUS_DELETION to minimize size of dataset in DB
+ // || values[i].equals(CNA_VALUE_ZERO)
+ // || values[i].equals(CNA_VALUE_HEMIZYGOUS_DELETION)
+ || values[i].equals(CNA_VALUE_HOMOZYGOUS_DELETION)
+ ) {
+ Integer sampleId = orderedSampleList.get(i);
+ CnaEvent cnaEvent = new CnaEvent(sampleId, geneticProfileId, entrezGeneId, Short.parseShort(values[i]));
+ //delayed add:
+ AbstractMap.SimpleEntry sampleGenePair = new AbstractMap.SimpleEntry<>(sampleId, entrezGeneId);
+ Map pdAnnotationDetails = this.pdAnnotations.get(sampleGenePair);
+ if (pdAnnotationDetails != null) {
+ cnaEvent.setDriverFilter(pdAnnotationDetails.get("DRIVER_FILTER"));
+ cnaEvent.setDriverFilterAnnotation(pdAnnotationDetails.get("DRIVER_FILTER_ANNOTATION"));
+ cnaEvent.setDriverTiersFilter(pdAnnotationDetails.get("DRIVER_TIERS_FILTER"));
+ cnaEvent.setDriverTiersFilterAnnotation(pdAnnotationDetails.get("DRIVER_TIERS_FILTER_ANNOTATION"));
}
+ cnaEventsToAdd.add(cnaEvent);
}
}
- return recordStored;
+ return cnaEventsToAdd;
}
/**
* Parses line for gene set record and stores record in 'genetic_alteration' table.
- * @param line
- * @param nrColumns
- * @param sampleStartIndex
- * @param genesetIdIndex
- * @param filteredSampleIndices
- * @param daoGeneticAlteration
+ * @param genesetId
* @return
* @throws DaoException
*/
- private boolean parseGenesetLine(String line, int nrColumns, int sampleStartIndex, int genesetIdIndex,
- List filteredSampleIndices, DaoGeneticAlteration daoGeneticAlteration) throws DaoException {
+ private boolean saveGenesetLine(String[] values, String genesetId) throws DaoException {
boolean storedRecord = false;
- if (!line.startsWith("#") && line.trim().length() > 0) {
- String[] parts = line.split("\t", -1);
-
- if (parts.length > nrColumns) {
- if (line.split("\t").length > nrColumns) {
- ProgressMonitor.logWarning("Ignoring line with more fields (" + parts.length
- + ") than specified in the headers(" + nrColumns + "): \n" + parts[0]);
- return false;
- }
- }
-
- String values[] = (String[]) ArrayUtils.subarray(parts, sampleStartIndex, parts.length > nrColumns ? nrColumns : parts.length);
-
- // trim whitespace from values
- values = Stream.of(values).map(String::trim).toArray(String[]::new);
- values = filterOutNormalValues(filteredSampleIndices, values);
-
- Geneset geneset = DaoGeneset.getGenesetByExternalId(parts[genesetIdIndex]);
- if (geneset != null) {
- storedRecord = storeGeneticEntityGeneticAlterations(values, daoGeneticAlteration, geneset.getGeneticEntityId(),
- EntityType.GENESET, geneset.getExternalId());
- }
- else {
- ProgressMonitor.logWarning("Geneset " + parts[genesetIdIndex] + " not found in DB. Record will be skipped.");
- }
+ Geneset geneset = DaoGeneset.getGenesetByExternalId(genesetId);
+ if (geneset != null) {
+ storedRecord = this.geneticAlterationImporter.store(geneset.getGeneticEntityId(), values);
+ }
+ else {
+ ProgressMonitor.logWarning("Geneset " + genesetId + " not found in DB. Record will be skipped.");
}
return storedRecord;
}
/**
* Parses line for generic assay profile record and stores record in 'genetic_alteration' table.
- * @param line row from the separated-text that contains one or more values on a single sample
- * @param nrColumns
- * @param sampleStartIndex index of the first sample column
- * @param genericAssayIdIndex index of the column that uniquely identifies a sample
- * @param filteredSampleIndices
- * @param daoGeneticAlteration
- * @return
- * @throws DaoException
*/
-
- private boolean parseGenericAssayLine(String line, int nrColumns, int sampleStartIndex, int genericAssayIdIndex,
- List filteredSampleIndices, DaoGeneticAlteration daoGeneticAlteration, Map genericAssayStableIdToEntityIdMap) throws DaoException {
+ private boolean saveGenericAssayLine(String[] values, String genericAssayId, Map genericAssayStableIdToEntityIdMap) throws DaoException {
boolean recordIsStored = false;
- if (!line.startsWith("#") && line.trim().length() > 0) {
- String[] parts = line.split("\t", -1);
-
- if (parts.length > nrColumns) {
- if (line.split("\t").length > nrColumns) {
- ProgressMonitor.logWarning("Ignoring line with more fields (" + parts.length
- + ") than specified in the headers(" + nrColumns + "): \n" + parts[0]);
- return false;
- }
- }
-
- String values[] = (String[]) ArrayUtils.subarray(parts, sampleStartIndex, parts.length > nrColumns ? nrColumns : parts.length);
-
- // trim whitespace from values
- values = Stream.of(values).map(String::trim).toArray(String[]::new);
- values = filterOutNormalValues(filteredSampleIndices, values);
-
- String stableId = parts[genericAssayIdIndex];
- Integer entityId = genericAssayStableIdToEntityIdMap.getOrDefault(stableId, null);
+ Integer entityId = genericAssayStableIdToEntityIdMap.getOrDefault(genericAssayId, null);
- if (entityId == null) {
- ProgressMonitor.logWarning("Generic Assay entity " + parts[genericAssayIdIndex] + " not found in DB. Record will be skipped.");
- } else {
- recordIsStored = storeGeneticEntityGeneticAlterations(values, daoGeneticAlteration, entityId,
- EntityType.GENERIC_ASSAY, stableId);
- }
-
- return recordIsStored;
+ if (entityId == null) {
+ ProgressMonitor.logWarning("Generic Assay entity " + genericAssayId + " not found in DB. Record will be skipped.");
+ } else {
+ recordIsStored = this.geneticAlterationImporter.store(entityId, values);
}
return recordIsStored;
}
- /**
- * Stores genetic alteration data for a genetic entity.
- * @param values
- * @param daoGeneticAlteration
- * @param geneticEntityId - internal id for genetic entity
- * @param geneticEntityType - "GENE", "GENESET", "PHOSPHOPROTEIN"
- * @param geneticEntityName - hugo symbol for "GENE", external id for "GENESET", phospho gene name for "PHOSPHOPROTEIN"
- * @return boolean indicating if record was stored successfully or not
- */
- private boolean storeGeneticEntityGeneticAlterations(String[] values, DaoGeneticAlteration daoGeneticAlteration,
- Integer geneticEntityId, EntityType geneticEntityType, String geneticEntityName) {
- try {
- if (importedGeneticEntitySet.add(geneticEntityId)) {
- daoGeneticAlteration.addGeneticAlterationsForGeneticEntity(geneticProfile.getGeneticProfileId(), geneticEntityId, values);
- return true;
- }
- else {
- ProgressMonitor.logWarning("Data for genetic entity " + geneticEntityName
- + " [" + geneticEntityType + "] already imported from file. Record will be skipped.");
- return false;
- }
- }
- catch (Exception ex) {
- throw new RuntimeException("Aborted: Error found for row starting with " + geneticEntityName + ": " + ex.getMessage());
- }
- }
-
/**
* Tries to parse the genes and look them up in DaoGeneOptimized
*
diff --git a/src/main/java/org/mskcc/cbio/portal/scripts/ImportTimelineData.java b/src/main/java/org/mskcc/cbio/portal/scripts/ImportTimelineData.java
index 0b5b182b..24903d17 100644
--- a/src/main/java/org/mskcc/cbio/portal/scripts/ImportTimelineData.java
+++ b/src/main/java/org/mskcc/cbio/portal/scripts/ImportTimelineData.java
@@ -32,14 +32,26 @@
package org.mskcc.cbio.portal.scripts;
-import java.io.*;
-import java.util.*;
-import joptsimple.*;
-import org.mskcc.cbio.portal.dao.*;
-import org.mskcc.cbio.portal.model.*;
+import joptsimple.OptionSet;
+import org.mskcc.cbio.portal.dao.DaoClinicalEvent;
+import org.mskcc.cbio.portal.dao.DaoException;
+import org.mskcc.cbio.portal.dao.DaoPatient;
+import org.mskcc.cbio.portal.dao.MySQLbulkLoader;
+import org.mskcc.cbio.portal.model.ClinicalEvent;
+import org.mskcc.cbio.portal.model.Patient;
import org.mskcc.cbio.portal.util.ConsoleUtil;
import org.mskcc.cbio.portal.util.ProgressMonitor;
-import org.mskcc.cbio.portal.util.SpringUtil;
+
+import java.io.BufferedReader;
+import java.io.File;
+import java.io.FileInputStream;
+import java.io.FileReader;
+import java.io.IOException;
+import java.util.HashMap;
+import java.util.HashSet;
+import java.util.Map;
+import java.util.Properties;
+import java.util.Set;
/**
* Imports timeline data for display in patient view
@@ -48,9 +60,8 @@
*/
public class ImportTimelineData extends ConsoleRunnable {
- private static void importData(String dataFile, int cancerStudyId) throws IOException, DaoException {
+ private static void importData(String dataFile, int cancerStudyId, boolean overwriteExisting) throws IOException, DaoException {
MySQLbulkLoader.bulkLoadOn();
- SpringUtil.initDataSource();
ProgressMonitor.setCurrentMessage("Reading file " + dataFile);
FileReader reader = new FileReader(dataFile);
@@ -72,9 +83,10 @@ private static void importData(String dataFile, int cancerStudyId) throws IOExce
throw new RuntimeException("The first line must start with\n'PATIENT_ID\tSTART_DATE\tEVENT_TYPE'\nor\n"
+ "PATIENT_ID\tSTART_DATE\tSTOP_DATE\tEVENT_TYPE");
}
-
+
long clinicalEventId = DaoClinicalEvent.getLargestClinicalEventId();
-
+ Set processedPatientIds = new HashSet<>();
+
while ((line = buff.readLine()) != null) {
line = line.trim();
@@ -90,6 +102,9 @@ private static void importData(String dataFile, int cancerStudyId) throws IOExce
ProgressMonitor.logWarning("Patient " + patientId + " not found in study " + cancerStudyId + ". Skipping entry.");
continue;
}
+ if (overwriteExisting && processedPatientIds.add(patient.getInternalId())) {
+ DaoClinicalEvent.deleteByPatientId(patient.getInternalId());
+ }
ClinicalEvent event = new ClinicalEvent();
event.setClinicalEventId(++clinicalEventId);
event.setPatientId(patient.getInternalId());
@@ -119,17 +134,23 @@ private static void importData(String dataFile, int cancerStudyId) throws IOExce
public void run() {
try {
String description = "Import 'timeline' data";
-
+
OptionSet options = ConsoleUtil.parseStandardDataAndMetaOptions(args, description, true);
- String dataFile = (String) options.valueOf("data");
+ if (options.has("loadMode") && !"bulkLoad".equalsIgnoreCase((String) options.valueOf("loadMode"))) {
+ throw new UnsupportedOperationException("This loader supports bulkLoad load mode only, but "
+ + options.valueOf("loadMode")
+ + " has been supplied.");
+ }
+ String dataFile = (String) options.valueOf("data");
File descriptorFile = new File((String) options.valueOf("meta"));
+ boolean overwriteExisting = options.has("overwrite-existing");
Properties properties = new TrimmedProperties();
properties.load(new FileInputStream(descriptorFile));
int cancerStudyInternalId = ValidationUtils.getInternalStudyId(properties.getProperty("cancer_study_identifier"));
- importData(dataFile, cancerStudyInternalId);
+ importData(dataFile, cancerStudyInternalId, overwriteExisting);
} catch (RuntimeException e) {
throw e;
} catch (IOException|DaoException e) {
diff --git a/src/main/java/org/mskcc/cbio/portal/scripts/ImportTypesOfCancers.java b/src/main/java/org/mskcc/cbio/portal/scripts/ImportTypesOfCancers.java
index 9e119f03..10bf9159 100644
--- a/src/main/java/org/mskcc/cbio/portal/scripts/ImportTypesOfCancers.java
+++ b/src/main/java/org/mskcc/cbio/portal/scripts/ImportTypesOfCancers.java
@@ -37,10 +37,9 @@
import org.mskcc.cbio.portal.dao.DaoException;
import org.mskcc.cbio.portal.dao.DaoTypeOfCancer;
import org.mskcc.cbio.portal.model.TypeOfCancer;
-import org.mskcc.cbio.portal.scripts.ConsoleRunnable;
import org.mskcc.cbio.portal.util.ConsoleUtil;
import org.mskcc.cbio.portal.util.ProgressMonitor;
-import org.mskcc.cbio.portal.util.SpringUtil;
+import org.mskcc.cbio.portal.util.TsvUtil;
/**
* Load all the types of cancer and their names from a file.
@@ -76,7 +75,6 @@ public void run() {
public static void load(File file, boolean clobber) throws IOException, DaoException {
ProgressMonitor.setCurrentMessage("Loading cancer types...");
List typeOfCancerList = parseCancerTypesFromFile(file);
- SpringUtil.initDataSource();
if (clobber) {
ProgressMonitor.setCurrentMessage("Deleting all previous cancer types...");
DaoTypeOfCancer.deleteAllRecords(); //TODO - remove this option - foreign key constraints may mean large cascade effects (possibly the deletion of all studies) - instead, change the option to 'deleteTypeOfCancerIfNotPresent' and add a loop through existing typeOfCancer records, removing those which are not in the parsed typeOfCancerList
@@ -92,7 +90,7 @@ private static List parseCancerTypesFromFile(File file) throws IOE
Scanner scanner = new Scanner(file);
while (scanner.hasNextLine()) {
String nextLine = scanner.nextLine();
- String[] fields = nextLine.split("\t", -1);
+ String[] fields = TsvUtil.splitTsvLine(nextLine);
throwExceptionIfColumnCountIsWrong(file, nextLine, fields, EXPECTED_DATAFILE_COLUMN_COUNT);
TypeOfCancer typeOfCancer = new TypeOfCancer();
String typeOfCancerId = fields[0].trim();
diff --git a/src/main/java/org/mskcc/cbio/portal/scripts/ImportUsers.java b/src/main/java/org/mskcc/cbio/portal/scripts/ImportUsers.java
index c886c8b8..fbaa030f 100644
--- a/src/main/java/org/mskcc/cbio/portal/scripts/ImportUsers.java
+++ b/src/main/java/org/mskcc/cbio/portal/scripts/ImportUsers.java
@@ -33,12 +33,20 @@
package org.mskcc.cbio.portal.scripts;
// imports
-import org.mskcc.cbio.portal.model.*;
-import org.mskcc.cbio.portal.dao.*;
-import org.mskcc.cbio.portal.util.*;
-import java.io.*;
-import java.util.*;
+import org.mskcc.cbio.portal.dao.DaoUser;
+import org.mskcc.cbio.portal.dao.DaoUserAuthorities;
+import org.mskcc.cbio.portal.model.User;
+import org.mskcc.cbio.portal.model.UserAuthorities;
+import org.mskcc.cbio.portal.util.ConsoleUtil;
+import org.mskcc.cbio.portal.util.ProgressMonitor;
+import org.mskcc.cbio.portal.util.TsvUtil;
+
+import java.io.BufferedReader;
+import java.io.File;
+import java.io.FileReader;
+import java.util.Arrays;
+import java.util.List;
/**
* Import a file of users and their authorities.
@@ -62,8 +70,6 @@ public static void main(String[] args) throws Exception {
ProgressMonitor.setConsoleMode(true);
- SpringUtil.initDataSource();
-
File file = new File(args[0]);
FileReader reader = new FileReader(file);
BufferedReader buf = new BufferedReader(reader);
@@ -72,7 +78,7 @@ public static void main(String[] args) throws Exception {
while (line != null) {
ProgressMonitor.incrementCurValue();
ConsoleUtil.showProgress();
- if (!line.startsWith("#") && line.trim().length() > 0) {
+ if (TsvUtil.isDataLine(line)) {
try {
addUser(line);
count++;
diff --git a/src/main/java/org/mskcc/cbio/portal/scripts/NormalizeExpressionLevels.java b/src/main/java/org/mskcc/cbio/portal/scripts/NormalizeExpressionLevels.java
index a5c8d642..d2214c93 100644
--- a/src/main/java/org/mskcc/cbio/portal/scripts/NormalizeExpressionLevels.java
+++ b/src/main/java/org/mskcc/cbio/portal/scripts/NormalizeExpressionLevels.java
@@ -32,14 +32,25 @@
package org.mskcc.cbio.portal.scripts;
-import java.io.*;
-import java.util.*;
import org.mskcc.cbio.portal.dao.DaoGeneOptimized;
import org.mskcc.cbio.portal.model.CanonicalGene;
-import org.mskcc.cbio.portal.util.SpringUtil;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
+import java.io.BufferedReader;
+import java.io.FileNotFoundException;
+import java.io.FileReader;
+import java.io.FileWriter;
+import java.io.IOException;
+import java.io.PrintWriter;
+import java.util.ArrayList;
+import java.util.Collection;
+import java.util.HashMap;
+import java.util.HashSet;
+import java.util.Iterator;
+import java.util.List;
+import java.util.Locale;
+
/**
*
* Given expression and CNV data for a set of samples generate normalized expression values.
@@ -110,7 +121,6 @@ public class NormalizeExpressionLevels{
public static void main (String[]args) {
try {
- SpringUtil.initDataSource();
// init dao gene
daoGeneOptimized = DaoGeneOptimized.getInstance();
driver(args);
diff --git a/src/main/java/org/mskcc/cbio/portal/scripts/RemoveCancerStudy.java b/src/main/java/org/mskcc/cbio/portal/scripts/RemoveCancerStudy.java
index ad515683..8ededd3a 100644
--- a/src/main/java/org/mskcc/cbio/portal/scripts/RemoveCancerStudy.java
+++ b/src/main/java/org/mskcc/cbio/portal/scripts/RemoveCancerStudy.java
@@ -32,9 +32,9 @@
package org.mskcc.cbio.portal.scripts;
-import org.mskcc.cbio.portal.util.*;
import org.mskcc.cbio.portal.dao.DaoCancerStudy;
import org.mskcc.cbio.portal.dao.DaoException;
+import org.mskcc.cbio.portal.util.ProgressMonitor;
/**
* Command Line Tool to Remove a Single Cancer Study.
@@ -51,8 +51,7 @@ public void run() {
"");
}
String cancerStudyIdentifier = args[0];
-
- SpringUtil.initDataSource();
+
ProgressMonitor.setCurrentMessage(
"Checking if Cancer study with identifier " +
cancerStudyIdentifier +
diff --git a/src/main/java/org/mskcc/cbio/portal/scripts/ResetDatabase.java b/src/main/java/org/mskcc/cbio/portal/scripts/ResetDatabase.java
index 82cad899..43c0b95d 100644
--- a/src/main/java/org/mskcc/cbio/portal/scripts/ResetDatabase.java
+++ b/src/main/java/org/mskcc/cbio/portal/scripts/ResetDatabase.java
@@ -32,8 +32,26 @@
package org.mskcc.cbio.portal.scripts;
-import org.mskcc.cbio.portal.dao.*;
-import org.mskcc.cbio.portal.util.*;
+import org.mskcc.cbio.portal.dao.DaoCancerStudy;
+import org.mskcc.cbio.portal.dao.DaoClinicalData;
+import org.mskcc.cbio.portal.dao.DaoClinicalEvent;
+import org.mskcc.cbio.portal.dao.DaoCopyNumberSegmentFile;
+import org.mskcc.cbio.portal.dao.DaoException;
+import org.mskcc.cbio.portal.dao.DaoGeneOptimized;
+import org.mskcc.cbio.portal.dao.DaoGeneset;
+import org.mskcc.cbio.portal.dao.DaoGeneticAlteration;
+import org.mskcc.cbio.portal.dao.DaoGeneticProfile;
+import org.mskcc.cbio.portal.dao.DaoGeneticProfileSamples;
+import org.mskcc.cbio.portal.dao.DaoInfo;
+import org.mskcc.cbio.portal.dao.DaoMutSig;
+import org.mskcc.cbio.portal.dao.DaoMutation;
+import org.mskcc.cbio.portal.dao.DaoPatient;
+import org.mskcc.cbio.portal.dao.DaoSample;
+import org.mskcc.cbio.portal.dao.DaoSampleList;
+import org.mskcc.cbio.portal.dao.DaoSampleProfile;
+import org.mskcc.cbio.portal.dao.DaoTypeOfCancer;
+import org.mskcc.cbio.portal.dao.DaoUser;
+import org.mskcc.cbio.portal.dao.DaoUserAuthorities;
/**
* Empty the database.
@@ -91,7 +109,6 @@ public static void resetDatabase() throws DaoException {
}
public static void main(String[] args) throws DaoException {
- SpringUtil.initDataSource();
StatDatabase.statDb();
ResetDatabase.resetDatabase();
System.err.println("Database Cleared and Reset.");
diff --git a/src/main/java/org/mskcc/cbio/portal/scripts/TransactionalScriptRunner.java b/src/main/java/org/mskcc/cbio/portal/scripts/TransactionalScriptRunner.java
index 7ee8d94a..b7124de0 100644
--- a/src/main/java/org/mskcc/cbio/portal/scripts/TransactionalScriptRunner.java
+++ b/src/main/java/org/mskcc/cbio/portal/scripts/TransactionalScriptRunner.java
@@ -1,14 +1,13 @@
package org.mskcc.cbio.portal.scripts;
-import java.io.File;
-
-import org.mskcc.cbio.portal.util.SpringUtil;
import org.mskcc.cbio.portal.util.TransactionalScripts;
import org.springframework.context.support.FileSystemXmlApplicationContext;
import org.springframework.transaction.TransactionStatus;
import org.springframework.transaction.support.TransactionCallback;
import org.springframework.transaction.support.TransactionTemplate;
+import java.io.File;
+
/**
* A high-level script runner than can be used to run a batch of scripts within a
* transactional context. It's handy loading a batch of data of different types.
@@ -69,8 +68,7 @@ public void run () {
// Inject the context into SpringUtil, so we don't need to initialize again.
// This ensures that the XML files from the command line provide a complete
// context and we don't get data sources later from anywhere else.
- SpringUtil.initDataSource(context);
-
+
// Set up the transaction template
transactionTemplate = (TransactionTemplate) context.getBean("scriptTransactionTemplate");
if (transactionTemplate == null) {
diff --git a/src/main/java/org/mskcc/cbio/portal/scripts/UpdateCancerStudy.java b/src/main/java/org/mskcc/cbio/portal/scripts/UpdateCancerStudy.java
index af8a5ba9..00bcbf69 100644
--- a/src/main/java/org/mskcc/cbio/portal/scripts/UpdateCancerStudy.java
+++ b/src/main/java/org/mskcc/cbio/portal/scripts/UpdateCancerStudy.java
@@ -23,10 +23,10 @@
package org.mskcc.cbio.portal.scripts;
-import org.mskcc.cbio.portal.util.*;
import org.mskcc.cbio.portal.dao.DaoCancerStudy;
import org.mskcc.cbio.portal.dao.DaoException;
-import org.mskcc.cbio.portal.model.*;
+import org.mskcc.cbio.portal.model.CancerStudy;
+import org.mskcc.cbio.portal.util.ProgressMonitor;
/**
@@ -58,7 +58,6 @@ public void run() {
"Invalid study status parameter: " + cancerStudyStatus);
}
- SpringUtil.initDataSource();
CancerStudy theCancerStudy = DaoCancerStudy.getCancerStudyByStableId(cancerStudyIdentifier);
if (theCancerStudy == null) {
throw new IllegalArgumentException("cancer study identified by cancer_study_identifier '"
diff --git a/src/main/java/org/mskcc/cbio/portal/scripts/UpdateCaseListsSampleIds.java b/src/main/java/org/mskcc/cbio/portal/scripts/UpdateCaseListsSampleIds.java
new file mode 100644
index 00000000..865cc660
--- /dev/null
+++ b/src/main/java/org/mskcc/cbio/portal/scripts/UpdateCaseListsSampleIds.java
@@ -0,0 +1,226 @@
+/*
+ * This file is part of cBioPortal.
+ *
+ * cBioPortal is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU Affero General Public License as
+ * published by the Free Software Foundation, either version 3 of the
+ * License.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU Affero General Public License for more details.
+ *
+ * You should have received a copy of the GNU Affero General Public License
+ * along with this program. If not, see .
+*/
+
+package org.mskcc.cbio.portal.scripts;
+
+import joptsimple.OptionException;
+import joptsimple.OptionParser;
+import joptsimple.OptionSet;
+import joptsimple.OptionSpec;
+import org.mskcc.cbio.portal.dao.DaoCancerStudy;
+import org.mskcc.cbio.portal.dao.DaoException;
+import org.mskcc.cbio.portal.dao.DaoSampleList;
+import org.mskcc.cbio.portal.model.CancerStudy;
+import org.mskcc.cbio.portal.model.SampleList;
+import org.mskcc.cbio.portal.util.CaseList;
+import org.mskcc.cbio.portal.util.CaseListReader;
+import org.mskcc.cbio.portal.util.ProgressMonitor;
+import org.mskcc.cbio.portal.validate.CaseListValidator;
+
+import java.io.BufferedReader;
+import java.io.File;
+import java.io.FileInputStream;
+import java.io.FileReader;
+import java.io.IOException;
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.LinkedHashMap;
+import java.util.LinkedHashSet;
+import java.util.List;
+import java.util.Map;
+import java.util.Objects;
+import java.util.Set;
+import java.util.stream.Collectors;
+
+public class UpdateCaseListsSampleIds extends ConsoleRunnable {
+
+ private File metaFile;
+ private File dataFile;
+ private List caseListFiles = List.of();
+ private String cancerStudyStableId;
+ private final Map> caseListSampleIdToSampleIds = new LinkedHashMap<>();
+ private final DaoSampleList daoSampleList = new DaoSampleList();
+ private LinkedHashSet allSampleIds;
+
+ public UpdateCaseListsSampleIds(String[] args) {
+ super(args);
+ }
+
+ /**
+ * Updates case list sample ids from clinical sample and case list files
+ */
+ public void run() {
+ parseArguments();
+ readStudyIdAndDataFileFromMetaFile();
+ this.allSampleIds = readSampleIdsFromDataFile(this.dataFile);
+ this.caseListSampleIdToSampleIds.put(cancerStudyStableId + "_all", this.allSampleIds);
+ Map> readCaseListSampleIds = readCaseListFiles();
+ this.caseListSampleIdToSampleIds.putAll(readCaseListSampleIds);
+ updateCaseListsForTheStudy(this.caseListSampleIdToSampleIds);
+ }
+
+ private Map> readCaseListFiles() {
+ LinkedHashMap> result = new LinkedHashMap<>();
+ for (File caseListFile : this.caseListFiles) {
+ CaseList caseList = CaseListReader.readFile(caseListFile);
+ CaseListValidator.validateIdFields(caseList);
+ String cancerStudyIdentifier = caseList.getCancerStudyIdentifier();
+ if (!cancerStudyIdentifier.equals(this.cancerStudyStableId)) {
+ ProgressMonitor.logWarning(
+ String.format(
+ "Skipping %s case list file as it belongs to %s study and we uploading %s study.",
+ caseListFile, cancerStudyIdentifier, this.cancerStudyStableId));
+ continue;
+ }
+ LinkedHashSet extraSampleIds = new LinkedHashSet<>(caseList.getSampleIds());
+ extraSampleIds.removeAll(this.allSampleIds);
+ if (!extraSampleIds.isEmpty()) {
+ throw new RuntimeException(caseListFile.getAbsolutePath() + ": The following sample ids present in the case list file, but not specified in the clinical sample file: " + String.join(", ", extraSampleIds));
+ }
+ result.put(caseList.getStableId(), new LinkedHashSet<>(caseList.getSampleIds()));
+ }
+ return result;
+ }
+
+ /**
+ * Updates the sample lists according to the steps below:
+ *
+ * 1. New sample IDs provided in the `caseListSampleIdToSampleIds` map are added to their corresponding case lists.
+ * 2. These sample IDs are removed from any other case lists within the same study.
+ *
+ * @param caseListSampleIdToSampleIds A map where the key is the case list stable ID and the value is a set of sample IDs
+ * to be added to the corresponding case list.
+ * Note: This map only includes the case lists that need to be updated with new sample IDs.
+ * Existing case lists in the study that are not in the map will not be dropped,
+ * but the provided sample IDs will be removed from these lists if present.
+ * @throws RuntimeException if any DAO operations fail or if a case list with a specified stable ID is not found.
+ */
+ private void updateCaseListsForTheStudy(Map> caseListSampleIdToSampleIds) {
+ DaoCancerStudy.reCacheAll();
+ try {
+ for (Map.Entry> caseListStableIdToSampleIds : caseListSampleIdToSampleIds.entrySet()) {
+ String caseListStableId = caseListStableIdToSampleIds.getKey();
+ Set uploadedSampleIds = caseListStableIdToSampleIds.getValue();
+ SampleList sampleList = daoSampleList.getSampleListByStableId(caseListStableId);
+ if (sampleList == null) {
+ throw new RuntimeException("No case list with " + caseListStableId + " stable id is found");
+ }
+ LinkedHashSet newCaseListSampleIds = new LinkedHashSet<>(sampleList.getSampleList());
+ if (newCaseListSampleIds.addAll(uploadedSampleIds)) {
+ sampleList.setSampleList(new ArrayList<>(newCaseListSampleIds));
+ daoSampleList.updateSampleListList(sampleList);
+ }
+ }
+ CancerStudy cancerStudy = DaoCancerStudy.getCancerStudyByStableId(this.cancerStudyStableId);
+ List sampleLists = daoSampleList.getAllSampleLists(cancerStudy.getInternalId());
+ List remainingLists = sampleLists.stream().filter(sl ->
+ !caseListSampleIdToSampleIds.containsKey(sl.getStableId()) && sl.getSampleList().stream().anyMatch(this.allSampleIds::contains)
+ ).toList();
+ for (SampleList remainingList : remainingLists) {
+ ArrayList newSampleList = new ArrayList<>(remainingList.getSampleList());
+ if (newSampleList.removeAll(this.allSampleIds)) {
+ remainingList.setSampleList(newSampleList);
+ daoSampleList.updateSampleListList(remainingList);
+ }
+ }
+ } catch (DaoException e) {
+ throw new RuntimeException(e);
+ }
+ }
+
+ private LinkedHashSet readSampleIdsFromDataFile(File dataFile) {
+ LinkedHashSet allSampleIds = new LinkedHashSet<>();
+ try (FileReader reader = new FileReader(dataFile);
+ BufferedReader buff = new BufferedReader(reader)) {
+ String line;
+ int sampleIdPosition = -1;
+ while ((line = buff.readLine()) != null) {
+ String trimmedLine = line.trim();
+ if (trimmedLine.isEmpty() || trimmedLine.startsWith("#")) {
+ continue;
+ }
+
+ String[] fieldValues = line.split("\t");
+ if (sampleIdPosition == -1) {
+ sampleIdPosition = List.of(fieldValues).indexOf("SAMPLE_ID");
+ if (sampleIdPosition == -1) {
+ throw new RuntimeException("No SAMPLE_ID header is found");
+ }
+ } else {
+ allSampleIds.add(fieldValues[sampleIdPosition].trim());
+ }
+ }
+ return allSampleIds;
+ } catch (IOException e) {
+ throw new RuntimeException(e);
+ }
+ }
+
+ private void readStudyIdAndDataFileFromMetaFile() {
+ TrimmedProperties properties = new TrimmedProperties();
+ try {
+ FileInputStream inStream = new FileInputStream(this.metaFile);
+ properties.load(inStream);
+ this.cancerStudyStableId = properties.getProperty("cancer_study_identifier");
+ String dataFilename = properties.getProperty("data_filename");
+ this.dataFile = new File(metaFile.getParent(), dataFilename);
+ } catch (Exception e) {
+ throw new RuntimeException(e);
+ }
+ }
+
+ private void parseArguments() {
+ String progName = getClass().getName();
+ String description = "Updates (adds/removes) sample ids in specified case lists.";
+
+ OptionParser parser = new OptionParser();
+ OptionSpec metaOpt = parser.accepts("meta",
+ "clinical sample (genetic_alteration_type=CLINICAL and datatype=SAMPLE_ATTRIBUTES or datatype=MIXED_ATTRIBUTES) meta data file. All sample ids found in the file will be added to the _all case list.").withRequiredArg().required().describedAs("meta_clinical_sample.txt").ofType(String.class);
+ OptionSpec caseListDirOrFileOpt = parser.accepts("case-lists",
+ "case list file or a directory with case list files").withRequiredArg().describedAs("case_lists/").ofType(String.class);
+
+ try {
+ OptionSet options = parser.parse(args);
+ this.metaFile = new File(options.valueOf(metaOpt));
+ if (options.has(caseListDirOrFileOpt)) {
+ File caseListDirOrFile = new File(options.valueOf(caseListDirOrFileOpt));
+ if (caseListDirOrFile.isDirectory()) {
+ this.caseListFiles = Arrays.stream(Objects.requireNonNull(caseListDirOrFile.listFiles()))
+ .filter(file -> !file.getName().startsWith(".") && !file.getName().endsWith("~")).collect(Collectors.toList());
+ } else if (caseListDirOrFile.isFile()) {
+ this.caseListFiles = List.of(caseListDirOrFile);
+ } else {
+ throw new RuntimeException("No file " + caseListDirOrFile.getAbsolutePath() + " exists");
+ }
+ }
+ } catch (OptionException e) {
+ throw new UsageException(
+ progName, description, parser,
+ e.getMessage());
+ }
+ }
+
+ /**
+ * Runs the command as a script and exits with an appropriate exit code.
+ *
+ * @param args the arguments given on the command line
+ */
+ public static void main(String[] args) {
+ ConsoleRunnable runner = new UpdateCaseListsSampleIds(args);
+ runner.runInConsole();
+ }
+}
diff --git a/src/main/java/org/mskcc/cbio/portal/scripts/UpdateMetaData.java b/src/main/java/org/mskcc/cbio/portal/scripts/UpdateMetaData.java
index d9669ed5..8235ed3a 100644
--- a/src/main/java/org/mskcc/cbio/portal/scripts/UpdateMetaData.java
+++ b/src/main/java/org/mskcc/cbio/portal/scripts/UpdateMetaData.java
@@ -32,9 +32,11 @@
package org.mskcc.cbio.portal.scripts;
-import org.mskcc.cbio.portal.dao.*;
-import org.mskcc.cbio.portal.util.*;
+import org.mskcc.cbio.portal.dao.DaoGeneticProfile;
import org.mskcc.cbio.portal.model.GeneticProfile;
+import org.mskcc.cbio.portal.util.ConsoleUtil;
+import org.mskcc.cbio.portal.util.GeneticProfileReader;
+import org.mskcc.cbio.portal.util.ProgressMonitor;
import java.io.File;
@@ -51,7 +53,6 @@ public static void main(String[] args) throws Exception {
}
ProgressMonitor.setConsoleMode(true);
- SpringUtil.initDataSource();
File descriptorFile = new File(args[0]);
GeneticProfile geneticProfile = GeneticProfileReader.loadGeneticProfileFromMeta(descriptorFile);
diff --git a/src/main/java/org/mskcc/cbio/portal/util/ArrayUtil.java b/src/main/java/org/mskcc/cbio/portal/util/ArrayUtil.java
new file mode 100644
index 00000000..3235d33e
--- /dev/null
+++ b/src/main/java/org/mskcc/cbio/portal/util/ArrayUtil.java
@@ -0,0 +1,21 @@
+package org.mskcc.cbio.portal.util;
+
+import java.util.HashMap;
+import java.util.Map;
+
+public class ArrayUtil {
+ public static Map zip(K[] keys, V[] values) {
+ Map map = new HashMap<>();
+
+ // Check if both arrays have the same length
+ if (keys.length == values.length) {
+ for (int i = 0; i < keys.length; i++) {
+ map.put(keys[i], values[i]);
+ }
+ } else {
+ throw new IllegalArgumentException("Arrays must be of the same length");
+ }
+ return map;
+
+ }
+}
\ No newline at end of file
diff --git a/src/main/java/org/mskcc/cbio/portal/util/CaseList.java b/src/main/java/org/mskcc/cbio/portal/util/CaseList.java
new file mode 100644
index 00000000..5e01c984
--- /dev/null
+++ b/src/main/java/org/mskcc/cbio/portal/util/CaseList.java
@@ -0,0 +1,48 @@
+package org.mskcc.cbio.portal.util;
+
+import java.util.List;
+
+public class CaseList {
+
+ private final String stableId;
+ private final String cancerStudyIdentifier;
+ private final String name;
+ private final String description;
+
+ private final String category;
+ private final List sampleIds;
+
+ CaseList(String stableId, String cancerStudyIdentifier, String name, String description, String category, List sampleIds) {
+ this.stableId = stableId;
+ this.cancerStudyIdentifier = cancerStudyIdentifier;
+ this.name = name;
+ this.description = description;
+ this.category = category;
+ this.sampleIds = sampleIds;
+ }
+
+ public String getStableId() {
+ return stableId;
+ }
+
+ public String getCancerStudyIdentifier() {
+ return cancerStudyIdentifier;
+ }
+
+ public String getName() {
+ return name;
+ }
+
+ public String getCategory() {
+ return category;
+ }
+
+ public String getDescription() {
+ return description;
+ }
+
+ public List getSampleIds() {
+ return sampleIds;
+ }
+
+}
diff --git a/src/main/java/org/mskcc/cbio/portal/util/CaseListReader.java b/src/main/java/org/mskcc/cbio/portal/util/CaseListReader.java
new file mode 100644
index 00000000..02c15763
--- /dev/null
+++ b/src/main/java/org/mskcc/cbio/portal/util/CaseListReader.java
@@ -0,0 +1,43 @@
+package org.mskcc.cbio.portal.util;
+
+import org.mskcc.cbio.portal.scripts.TrimmedProperties;
+
+import java.io.File;
+import java.io.FileReader;
+import java.io.IOException;
+import java.util.Arrays;
+import java.util.List;
+import java.util.Properties;
+import java.util.stream.Collectors;
+
+public class CaseListReader {
+
+ public static CaseList readFile(File caseListFile) {
+ Properties properties = new TrimmedProperties();
+ try {
+ properties.load(new FileReader(caseListFile));
+ } catch (IOException e) {
+ throw new RuntimeException(e);
+ }
+
+ String stableId = properties.getProperty("stable_id");
+ String cancerStudyIdentifier = properties.getProperty("cancer_study_identifier");
+ String caseListName = properties.getProperty("case_list_name");
+ String caseListDescription = properties.getProperty("case_list_description");
+ String caseListCategory = properties.getProperty("case_list_category");
+ String caseListIds = properties.getProperty("case_list_ids");
+ List sampleIds = caseListIds == null ? List.of()
+ : Arrays.stream(caseListIds.split("\t")).toList();
+
+ return new CaseList(
+ stableId,
+ cancerStudyIdentifier,
+ caseListName,
+ caseListDescription,
+ caseListCategory,
+ sampleIds
+ );
+ }
+
+
+}
diff --git a/src/main/java/org/mskcc/cbio/portal/util/CnaUtil.java b/src/main/java/org/mskcc/cbio/portal/util/CnaUtil.java
index 3cc6fd71..b5aa293e 100644
--- a/src/main/java/org/mskcc/cbio/portal/util/CnaUtil.java
+++ b/src/main/java/org/mskcc/cbio/portal/util/CnaUtil.java
@@ -53,7 +53,6 @@ public static void storeCnaEvents(
if (!CNA.AMP.equals(cnaEvent.getAlteration()) && !CNA.HOMDEL.equals(cnaEvent.getAlteration())) {
continue;
}
-
// Revert PR https://github.com/cBioPortal/cbioportal-core/pull/1 breaks importer
Optional existingCnaEvent = existingCnaEvents
.stream()
diff --git a/src/main/java/org/mskcc/cbio/portal/util/ConsoleUtil.java b/src/main/java/org/mskcc/cbio/portal/util/ConsoleUtil.java
index b5b36227..0d2b6a23 100644
--- a/src/main/java/org/mskcc/cbio/portal/util/ConsoleUtil.java
+++ b/src/main/java/org/mskcc/cbio/portal/util/ConsoleUtil.java
@@ -138,8 +138,10 @@ public static OptionSet parseStandardDataAndMetaOptions(String[] args, String de
parser.accepts( "loadMode", "direct (per record) or bulk load of data" )
.withRequiredArg().describedAs( "[directLoad|bulkLoad (default)]" ).ofType( String.class );
}
+ parser.accepts("overwrite-existing",
+ "Enables overwriting data if it turns out it already exists in DB.").withOptionalArg().describedAs("overwrite-existing").ofType(String.class);
String progName = "importScript";
-
+
OptionSet options = null;
try {
options = parser.parse( args );
@@ -176,10 +178,6 @@ public static OptionSet parseStandardDataAndMetaOptions(String[] args, String de
"Error: unknown loadMode action: " + actionArg);
}
}
- else {
- throw new UsageException(progName, description, parser,
- "Error: 'loadMode' argument required.");
- }
}
return options;
}
@@ -251,6 +249,9 @@ public static OptionSet parseStandardDataAndMetaUpdateOptions(String[] args, Str
parser.accepts( "loadMode", "direct (per record) or bulk load of data" )
.withRequiredArg().describedAs( "[directLoad|bulkLoad (default)]" ).ofType( String.class );
}
+ parser.accepts("overwrite-existing",
+ "Enables overwriting data if it turns out it already exists in DB.").withOptionalArg().describedAs("overwrite-existing").ofType(String.class);
+
String progName = "importScript";
OptionSet options = null;
diff --git a/src/main/java/org/mskcc/cbio/portal/util/DataValidator.java b/src/main/java/org/mskcc/cbio/portal/util/DataValidator.java
new file mode 100644
index 00000000..1878f063
--- /dev/null
+++ b/src/main/java/org/mskcc/cbio/portal/util/DataValidator.java
@@ -0,0 +1,7 @@
+package org.mskcc.cbio.portal.util;
+
+public class DataValidator {
+ public static boolean isValidNumericSequence(String str) {
+ return str.matches("[0-9]+");
+ }
+}
diff --git a/src/main/java/org/mskcc/cbio/portal/util/FileUtil.java b/src/main/java/org/mskcc/cbio/portal/util/FileUtil.java
index 744ca565..4f0958ee 100644
--- a/src/main/java/org/mskcc/cbio/portal/util/FileUtil.java
+++ b/src/main/java/org/mskcc/cbio/portal/util/FileUtil.java
@@ -43,30 +43,6 @@
* @author Ethan Cerami.
*/
public class FileUtil {
- /**
- * BioPAX File Type.
- */
- public static final int BIOPAX = 0;
-
- /**
- * PSI_MI File Type.
- */
- public static final int PSI_MI = 1;
-
- /**
- * External DBs File Type.
- */
- public static final int EXTERNAL_DBS = 2;
-
- /**
- * Identifiers File Type.
- */
- public static final int IDENTIFIERS = 3;
-
- /**
- * Unknown File Type.
- */
- public static final int UNKNOWN = 4;
/**
* Gets Number of Lines in Specified File.
@@ -77,32 +53,16 @@ public class FileUtil {
*/
public static int getNumLines(File file) throws IOException {
int numLines = 0;
- FileReader reader = new FileReader(file);
- BufferedReader buffered = new BufferedReader(reader);
- String line = buffered.readLine();
- while (line != null) {
- if (!line.startsWith("#") && line.trim().length() > 0) {
- numLines++;
+ try (FileReader reader = new FileReader(file); BufferedReader buffered = new BufferedReader(reader)) {
+ String line = buffered.readLine();
+ while (line != null) {
+ if (TsvUtil.isDataLine(line)) {
+ numLines++;
+ }
+ line = buffered.readLine();
}
- line = buffered.readLine();
+ return numLines;
}
- reader.close();
- return numLines;
}
- /**
- * Gets Next Line of Input. Filters out Empty Lines and Comments.
- *
- * @param buf BufferedReader Object.
- * @return next line of input.
- * @throws IOException Error reading input stream.
- */
- public static String getNextLine(BufferedReader buf) throws IOException {
- String line = buf.readLine();
- while (line != null && (line.trim().length() == 0
- || line.trim().startsWith("#"))) {
- line = buf.readLine();
- }
- return line;
- }
}
\ No newline at end of file
diff --git a/src/main/java/org/mskcc/cbio/portal/util/GeneticProfileReader.java b/src/main/java/org/mskcc/cbio/portal/util/GeneticProfileReader.java
index af686a72..7d3bb6cc 100644
--- a/src/main/java/org/mskcc/cbio/portal/util/GeneticProfileReader.java
+++ b/src/main/java/org/mskcc/cbio/portal/util/GeneticProfileReader.java
@@ -48,6 +48,9 @@
import org.mskcc.cbio.portal.model.GeneticProfileLink;
import org.mskcc.cbio.portal.scripts.TrimmedProperties;
+import static org.cbioportal.model.MolecularProfile.DataType.DISCRETE;
+import static org.cbioportal.model.MolecularProfile.ImportType.DISCRETE_LONG;
+
/**
* Prepare a GeneticProfile for having its data loaded.
*
@@ -76,22 +79,33 @@ public static GeneticProfile loadGeneticProfile(File file) throws IOException, D
GeneticProfile geneticProfile = loadGeneticProfileFromMeta(file);
GeneticProfile existingGeneticProfile = DaoGeneticProfile.getGeneticProfileByStableId(geneticProfile.getStableId());
if (existingGeneticProfile != null) {
- if (!existingGeneticProfile.getDatatype().equals("MAF")) {
- // the dbms already contains a GeneticProfile with the file's stable_id. This scenario is not supported
- // anymore, so throw error telling user to remove existing profile first:
- throw new RuntimeException("Error: genetic_profile record found with same Stable ID as the one used in your data: "
- + existingGeneticProfile.getStableId() + ". Remove the existing genetic_profile record first.");
+ ProgressMonitor.setCurrentMessage("genetic_profile record found with same Stable ID (" + geneticProfile.getStableId()
+ + "). Using it instead.");
+ if (geneticProfile.getGeneticAlterationType() != existingGeneticProfile.getGeneticAlterationType()) {
+ throw new IllegalStateException("genetic_profile record found with same Stable ID ("
+ + existingGeneticProfile.getStableId() + ") but different genetic alteration type: "
+ + existingGeneticProfile.getGeneticProfileId());
+ }
+ if (DISCRETE_LONG.name().equals(geneticProfile.getDatatype())) {
+ if (!Set.of(DISCRETE_LONG.name(), DISCRETE.name()).contains(existingGeneticProfile.getDatatype())) {
+ throw new IllegalStateException("genetic_profile record found with same Stable ID ("
+ + existingGeneticProfile.getStableId() + ") but unsupported data type: "
+ + existingGeneticProfile.getDatatype());
+ }
} else {
- // For mutation data only we can have multiple files with the same genetic_profile.
- // There is a constraint in the mutation database table to prevent duplicated data
- // If this constraint is hit (mistakenly importing the same maf twice) MySqlBulkLoader will throw an exception
- //
- // make an object combining the pre-existing profile with the file-specific properties of the current file
- GeneticProfile gp = new GeneticProfile(existingGeneticProfile);
- gp.setTargetLine(gp.getTargetLine());
- gp.setOtherMetadataFields(gp.getAllOtherMetadataFields());
- return gp;
+ if (!existingGeneticProfile.getDatatype().equals(geneticProfile.getDatatype())) {
+ throw new IllegalStateException("genetic_profile record found with same Stable ID ("
+ + existingGeneticProfile.getStableId() + ") but different data type: "
+ + existingGeneticProfile.getDatatype());
+ }
+ }
+ if (geneticProfile.getCancerStudyId() != existingGeneticProfile.getCancerStudyId()) {
+ throw new IllegalStateException("genetic_profile record found with same Stable ID ("
+ + existingGeneticProfile.getStableId() + ") but different cancer study (id="
+ + existingGeneticProfile.getCancerStudyId() + ")");
}
+ existingGeneticProfile.setOtherMetadataFields(geneticProfile.getAllOtherMetadataFields());
+ return existingGeneticProfile;
}
// For GSVA profiles, we want to create a geneticProfileLink from source_stable_id for:
diff --git a/src/main/java/org/mskcc/cbio/portal/util/GeneticProfileUtil.java b/src/main/java/org/mskcc/cbio/portal/util/GeneticProfileUtil.java
index 16ab5098..748ffd54 100644
--- a/src/main/java/org/mskcc/cbio/portal/util/GeneticProfileUtil.java
+++ b/src/main/java/org/mskcc/cbio/portal/util/GeneticProfileUtil.java
@@ -87,6 +87,9 @@ public static boolean outlierExpressionSelected(HashSet geneticProfileId
public static int getGenePanelId(String panelId) {
GenePanel genePanel = DaoGenePanel.getGenePanelByStableId(panelId);
+ if (genePanel == null) {
+ throw new NoSuchElementException("Gene panel with id " + panelId + " not found.");
+ }
return genePanel.getInternalId();
}
diff --git a/src/main/java/org/mskcc/cbio/portal/util/MyCancerGenomeLinkUtil.java b/src/main/java/org/mskcc/cbio/portal/util/MyCancerGenomeLinkUtil.java
index e1c035e1..a5244050 100644
--- a/src/main/java/org/mskcc/cbio/portal/util/MyCancerGenomeLinkUtil.java
+++ b/src/main/java/org/mskcc/cbio/portal/util/MyCancerGenomeLinkUtil.java
@@ -33,23 +33,12 @@
import java.io.BufferedReader;
import java.io.IOException;
-import java.io.InputStream;
import java.io.InputStreamReader;
-import java.net.URL;
import java.util.ArrayList;
-import java.util.Collections;
import java.util.HashMap;
-import java.util.HashSet;
import java.util.List;
import java.util.Map;
-import java.util.Set;
-import java.util.TreeMap;
-import java.util.logging.Level;
-import java.util.logging.Logger;
-import java.util.regex.Matcher;
-import java.util.regex.Pattern;
-import javax.net.ssl.HttpsURLConnection;
-import org.apache.commons.text.StringEscapeUtils;
+
import org.mskcc.cbio.portal.dao.DaoGeneOptimized;
import org.mskcc.cbio.portal.model.CanonicalGene;
@@ -107,7 +96,7 @@ private static void setMyCancerGenomeLinkFromLocal() {
while ((line=in.readLine())!=null && line.startsWith("#")) {}
for (; line!=null; line=in.readLine()) {
- String[] parts = line.trim().split("\t",-1);
+ String[] parts = TsvUtil.splitTsvLine(line);
if (parts.length<4) {
continue;
}
diff --git a/src/main/java/org/mskcc/cbio/portal/util/TsvUtil.java b/src/main/java/org/mskcc/cbio/portal/util/TsvUtil.java
new file mode 100644
index 00000000..0c2e61a2
--- /dev/null
+++ b/src/main/java/org/mskcc/cbio/portal/util/TsvUtil.java
@@ -0,0 +1,43 @@
+package org.mskcc.cbio.portal.util;
+
+/**
+ * Utils to parse and validate TSV lines
+ * @author Ruslan Forostianov
+ */
+public class TsvUtil {
+ /**
+ * Detects if the line has some data
+ * e.g. blank line and comments are not considered as data rows
+ * @param line the line to evaluate
+ * @return true if the line contains data, false otherwise
+ */
+ public static boolean isDataLine(String line) {
+ return !line.startsWith("#") && line.trim().length() > 0;
+ }
+
+ /**
+ * Splits tsv line and does not trim empty values at the end.
+ * @param line
+ * @return
+ */
+ public static String[] splitTsvLine(String line) {
+ return line.split("\t", -1);
+ }
+
+ /**
+ * Makes sure header and row length match
+ * @param headerParts
+ * @param rowParts
+ */
+ public static void ensureHeaderAndRowMatch(String[] headerParts, String[] rowParts) {
+ int headerColumns = headerParts.length;
+ if (rowParts.length > headerColumns) {
+ throw new IllegalArgumentException("Found line with more fields (" + rowParts.length
+ + ") than specified in the headers(" + headerColumns + "): \n" + rowParts[0]);
+ }
+ if (rowParts.length < headerColumns) {
+ throw new IllegalArgumentException("Found line with less fields (" + rowParts.length
+ + ") than specified in the headers(" + headerColumns + "): \n" + rowParts[0]);
+ }
+ }
+}
diff --git a/src/main/java/org/mskcc/cbio/portal/validate/CaseListValidator.java b/src/main/java/org/mskcc/cbio/portal/validate/CaseListValidator.java
new file mode 100644
index 00000000..d6a2494e
--- /dev/null
+++ b/src/main/java/org/mskcc/cbio/portal/validate/CaseListValidator.java
@@ -0,0 +1,48 @@
+package org.mskcc.cbio.portal.validate;
+
+import org.mskcc.cbio.portal.util.CaseList;
+
+public class CaseListValidator {
+
+ /**
+ * Fields that are used during case list update
+ * @param caseList
+ */
+ public static void validateIdFields(CaseList caseList) {
+ if (caseList.getStableId() == null) {
+ throw new IllegalArgumentException("stable id is not specified.");
+ }
+ if (caseList.getStableId().matches(".*\\s.*")) {
+ throw new IllegalArgumentException(String.format("stable id cannot contain white space(s): '%s'", caseList.getStableId()));
+ }
+ if (caseList.getCancerStudyIdentifier() == null) {
+ throw new IllegalArgumentException("cancer study identifier is not specified.");
+ }
+ if (caseList.getCancerStudyIdentifier().matches(".*\\s.*")) {
+ throw new IllegalArgumentException(String.format("cancer study identifier cannot contain white space(s): '%s'", caseList.getStableId()));
+ }
+ if (caseList.getSampleIds() == null || caseList.getSampleIds().isEmpty()) {
+ throw new IllegalArgumentException("sample ids are not specified.");
+ }
+ }
+
+ /**
+ * Fields that are used during case list creation
+ * @param caseList
+ */
+ public static void validateDescriptionFields(CaseList caseList) {
+ if (caseList.getName() == null) {
+ throw new IllegalArgumentException("case list name is not specified.");
+ }
+ if (caseList.getDescription() == null) {
+ throw new IllegalArgumentException("case list description is not specified.");
+ }
+ }
+
+ public static void validateAll(CaseList caseList) {
+ validateIdFields(caseList);
+ validateDescriptionFields(caseList);
+ }
+
+
+}
diff --git a/src/test/java/org/cbioportal/model/util/TsvUtilTest.java b/src/test/java/org/cbioportal/model/util/TsvUtilTest.java
new file mode 100644
index 00000000..c49de40b
--- /dev/null
+++ b/src/test/java/org/cbioportal/model/util/TsvUtilTest.java
@@ -0,0 +1,29 @@
+package org.cbioportal.model.util;
+
+import org.junit.Test;
+
+import static org.junit.Assert.assertTrue;
+import static org.mskcc.cbio.portal.util.TsvUtil.ensureHeaderAndRowMatch;
+import static org.junit.Assert.assertThrows;
+
+public class TsvUtilTest {
+
+ @Test
+ public void testEnsureHeaderAndRowMatch_headerHasGreaterLength() {
+ IllegalArgumentException illegalArgumentException = assertThrows(IllegalArgumentException.class,
+ () -> ensureHeaderAndRowMatch(new String[] {"header1", "header2"}, new String[] {"row1"}));
+ assertTrue(illegalArgumentException.getMessage().contains("Found line with less fields"));
+ }
+
+ @Test
+ public void testEnsureHeaderAndRowMatch_headerHasSmallerLength() {
+ IllegalArgumentException illegalArgumentException = assertThrows(IllegalArgumentException.class,
+ () -> ensureHeaderAndRowMatch(new String[] {"header1"}, new String[] {"row1", "row2"}));
+ assertTrue(illegalArgumentException.getMessage().contains("Found line with more fields"));
+ }
+
+ @Test
+ public void testEnsureHeaderAndRowMatch_headerHasSameLength() {
+ ensureHeaderAndRowMatch(new String[] {"header1", "header2"}, new String[] {"row1", "row2"});
+ }
+}
diff --git a/src/test/java/org/mskcc/cbio/portal/integrationTest/dao/TestDaoGeneticProfile.java b/src/test/java/org/mskcc/cbio/portal/integrationTest/dao/TestDaoGeneticProfile.java
index 8c1afdcc..83e04144 100644
--- a/src/test/java/org/mskcc/cbio/portal/integrationTest/dao/TestDaoGeneticProfile.java
+++ b/src/test/java/org/mskcc/cbio/portal/integrationTest/dao/TestDaoGeneticProfile.java
@@ -72,7 +72,7 @@ public void setUp() throws DaoException
public void testDaoGetAllGeneticProfiles() throws DaoException {
ArrayList list = DaoGeneticProfile.getAllGeneticProfiles(studyId);
- assertEquals(7, list.size());
+ assertEquals(9, list.size());
}
@Test
@@ -134,12 +134,12 @@ public void testDaoDeleteGeneticProfile() throws DaoException {
GeneticProfile geneticProfile = DaoGeneticProfile.getGeneticProfileById(2);
- assertEquals(7, DaoGeneticProfile.getCount());
+ assertEquals(9, DaoGeneticProfile.getCount());
DaoGeneticProfile.deleteGeneticProfile(geneticProfile);
- assertEquals(6, DaoGeneticProfile.getCount());
+ assertEquals(8, DaoGeneticProfile.getCount());
ArrayList list = DaoGeneticProfile.getAllGeneticProfiles(studyId);
- assertEquals(6, list.size());
+ assertEquals(8, list.size());
geneticProfile = list.get(0);
assertEquals(studyId, geneticProfile.getCancerStudyId());
assertEquals("mRNA expression (microarray)", geneticProfile.getProfileName());
@@ -155,7 +155,7 @@ public void testDaoUpdateGeneticProfile() throws DaoException {
geneticProfile.getGeneticProfileId(), "Updated Name",
"Updated Description"));
ArrayList list = DaoGeneticProfile.getAllGeneticProfiles(studyId);
- assertEquals(7, list.size());
+ assertEquals(9, list.size());
geneticProfile = list.get(0);
assertEquals(studyId, geneticProfile.getCancerStudyId());
assertEquals("Updated Name", geneticProfile.getProfileName());
diff --git a/src/test/java/org/mskcc/cbio/portal/integrationTest/dao/TestDaoSampleProfile.java b/src/test/java/org/mskcc/cbio/portal/integrationTest/dao/TestDaoSampleProfile.java
index 705f46b6..d4c80a8e 100644
--- a/src/test/java/org/mskcc/cbio/portal/integrationTest/dao/TestDaoSampleProfile.java
+++ b/src/test/java/org/mskcc/cbio/portal/integrationTest/dao/TestDaoSampleProfile.java
@@ -55,6 +55,7 @@
import java.util.ArrayList;
import java.util.HashSet;
+import java.util.List;
import static org.junit.Assert.assertEquals;
import static org.junit.Assert.assertTrue;
@@ -105,8 +106,8 @@ public void testDaoSampleProfile() throws DaoException {
Patient patient = DaoPatient.getPatientByCancerStudyAndPatientId(study.getInternalId(), "TCGA-12345");
Sample sample = DaoSample.getSampleByPatientAndSampleId(patient.getInternalId(), "TCGA-12345-01");
- int num = DaoSampleProfile.addSampleProfile(sample.getInternalId(), geneticProfileId, null);
- assertEquals(1, num);
+ DaoSampleProfile.upsertSampleToProfileMapping(List.of(
+ new DaoSampleProfile.SampleProfileTuple(geneticProfileId, sample.getInternalId(), null)));
boolean exists = DaoSampleProfile.sampleExistsInGeneticProfile(sample.getInternalId(), geneticProfileId);
assertTrue(exists);
@@ -114,8 +115,8 @@ public void testDaoSampleProfile() throws DaoException {
assertEquals(geneticProfileId, DaoSampleProfile.getProfileIdForSample(sample.getInternalId()));
sample = DaoSample.getSampleByPatientAndSampleId(patient.getInternalId(), "TCGA-123456-01");
- num = DaoSampleProfile.addSampleProfile(sample.getInternalId(), geneticProfileId, genePanel.getInternalId());
- assertEquals(1, num);
+ DaoSampleProfile.upsertSampleToProfileMapping(List.of(
+ new DaoSampleProfile.SampleProfileTuple(geneticProfileId, sample.getInternalId(), genePanel.getInternalId())));
boolean existsByPanelId = DaoSampleProfile.sampleProfileMappingExistsByPanel(genePanel.getInternalId());
assertTrue(existsByPanelId);
diff --git a/src/test/java/org/mskcc/cbio/portal/integrationTest/incremental/GeneticAlterationsTestHelper.java b/src/test/java/org/mskcc/cbio/portal/integrationTest/incremental/GeneticAlterationsTestHelper.java
new file mode 100644
index 00000000..48ca7b4e
--- /dev/null
+++ b/src/test/java/org/mskcc/cbio/portal/integrationTest/incremental/GeneticAlterationsTestHelper.java
@@ -0,0 +1,53 @@
+package org.mskcc.cbio.portal.integrationTest.incremental;
+
+import org.mskcc.cbio.portal.dao.DaoException;
+import org.mskcc.cbio.portal.dao.DaoGeneticEntity;
+
+import java.util.HashMap;
+import java.util.Set;
+import java.util.stream.Collectors;
+
+import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertTrue;
+
+public class GeneticAlterationsTestHelper {
+ public static Set geneStableIdsToEntityIds(Set beforeStableIds) {
+ return beforeStableIds.stream().map(stableId -> {
+ try {
+ return geneStableIdToEntityId(stableId);
+ } catch (DaoException e) {
+ throw new RuntimeException(e);
+ }
+ }).collect(Collectors.toSet());
+ }
+
+ public static int geneStableIdToEntityId(String stableId) throws DaoException {
+ return DaoGeneticEntity.getGeneticEntityByStableId(stableId).getId();
+ }
+
+ public static void assertPriorDataState(HashMap> beforeResult, Set expectedEntityIds, Set expectedSampleIds) {
+ assertEquals(expectedEntityIds, beforeResult.keySet());
+ beforeResult.forEach((entityId, sampleIdToValue) -> {
+ assertEquals("Samples for gene with entityId = " + entityId + " have to match expected ones",
+ expectedSampleIds, beforeResult.get(entityId).keySet());
+ });
+ }
+
+ public static void assertNoChange(HashMap> beforeResult,
+ HashMap> afterResult,
+ Set entityIds,
+ Set sampleIds) {
+ entityIds.forEach(entityId -> {
+ assertTrue("After result is expected to contain entityId=" + entityId,
+ afterResult.containsKey(entityId));
+ sampleIds.forEach(sampleId -> {
+ assertTrue("Sample_id=" + sampleId + " expected to be found for gene with entityId=" + entityId,
+ afterResult.get(entityId).containsKey(sampleId));
+ assertEquals("The values for sample_id=" + sampleId +
+ " and entityId=" + entityId + " before and after upload have to match.",
+ beforeResult.get(entityId).get(sampleId), afterResult.get(entityId).get(sampleId));
+ });
+ });
+ }
+
+}
diff --git a/src/test/java/org/mskcc/cbio/portal/integrationTest/incremental/TestIncrementalCopyNumberAlterationImport.java b/src/test/java/org/mskcc/cbio/portal/integrationTest/incremental/TestIncrementalCopyNumberAlterationImport.java
new file mode 100644
index 00000000..0e1d8a68
--- /dev/null
+++ b/src/test/java/org/mskcc/cbio/portal/integrationTest/incremental/TestIncrementalCopyNumberAlterationImport.java
@@ -0,0 +1,244 @@
+/*
+ * This file is part of cBioPortal.
+ *
+ * cBioPortal is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU Affero General Public License as
+ * published by the Free Software Foundation, either version 3 of the
+ * License.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU Affero General Public License for more details.
+ *
+ * You should have received a copy of the GNU Affero General Public License
+ * along with this program. If not, see .
+ */
+
+package org.mskcc.cbio.portal.integrationTest.incremental;
+
+import org.cbioportal.model.CNA;
+import org.junit.After;
+import org.junit.Before;
+import org.junit.Test;
+import org.junit.runner.RunWith;
+import org.junit.runners.Parameterized;
+import org.mskcc.cbio.portal.dao.DaoCancerStudy;
+import org.mskcc.cbio.portal.dao.DaoCnaEvent;
+import org.mskcc.cbio.portal.dao.DaoException;
+import org.mskcc.cbio.portal.dao.DaoGenePanel;
+import org.mskcc.cbio.portal.dao.DaoGeneticAlteration;
+import org.mskcc.cbio.portal.dao.DaoGeneticProfile;
+import org.mskcc.cbio.portal.dao.DaoSampleProfile;
+import org.mskcc.cbio.portal.model.CnaEvent;
+import org.mskcc.cbio.portal.model.GenePanel;
+import org.mskcc.cbio.portal.model.GeneticProfile;
+import org.mskcc.cbio.portal.scripts.ImportProfileData;
+import org.springframework.test.context.ContextConfiguration;
+import org.springframework.test.context.TestContextManager;
+import org.springframework.transaction.PlatformTransactionManager;
+import org.springframework.transaction.TransactionStatus;
+import org.springframework.transaction.support.DefaultTransactionDefinition;
+
+import java.io.File;
+import java.util.Arrays;
+import java.util.Collection;
+import java.util.HashMap;
+import java.util.HashSet;
+import java.util.List;
+import java.util.Map;
+import java.util.Set;
+import java.util.stream.Collectors;
+
+import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertNotNull;
+import static org.junit.Assert.assertNull;
+import static org.junit.Assert.assertTrue;
+import static org.mskcc.cbio.portal.integrationTest.incremental.GeneticAlterationsTestHelper.assertNoChange;
+import static org.mskcc.cbio.portal.integrationTest.incremental.GeneticAlterationsTestHelper.assertPriorDataState;
+
+/**
+ * Tests Incremental Import of PROTEIN_LEVEL Data.
+ *
+ * @author Ruslan Forostianov
+ * @author Pieter Lukasse
+ */
+@RunWith(Parameterized.class)
+@ContextConfiguration(locations = {"classpath:/applicationContext-dao.xml"})
+public class TestIncrementalCopyNumberAlterationImport {
+
+ // Hugo_Symbol: CDK1
+ final long newGeneEntrezId = 983l;
+ // Gene that is part of the platform, but absent during the incremental upload
+ // Hugo_Symbol: ATM
+ final long absentGeneEntrezId = 472l;
+ final Set noChangeEntrezIds = Set.of(10000l, 207l, 208l, 3265l, 3845l, 4893l, 672l, 673l, 675l);
+ final Set beforeEntrezIds = new HashSet<>(noChangeEntrezIds);
+ private final String metaFile;
+ private final String dataFile;
+
+ { beforeEntrezIds.add(absentGeneEntrezId); }
+
+ // stable_id: TCGA-XX-0800
+ final int newSampleId = 15;
+ // stable_id: TCGA-A1-A0SO
+ final int updateSampleId = 12;
+ final Set |