diff --git a/src/main/java/org/mskcc/cbio/portal/dao/DaoMutation.java b/src/main/java/org/mskcc/cbio/portal/dao/DaoMutation.java index 3a7472c0..b8d1e4f9 100644 --- a/src/main/java/org/mskcc/cbio/portal/dao/DaoMutation.java +++ b/src/main/java/org/mskcc/cbio/portal/dao/DaoMutation.java @@ -67,6 +67,8 @@ public final class DaoMutation { public static final String NAN = "NaN"; private static final String MUTATION_COUNT_ATTR_ID = "MUTATION_COUNT"; + public static final String DELETE_ALTERATION_DRIVER_ANNOTATION = "DELETE from alteration_driver_annotation WHERE GENETIC_PROFILE_ID=? and SAMPLE_ID=?"; + public static final String DELETE_MUTATION = "DELETE from mutation WHERE GENETIC_PROFILE_ID=? and SAMPLE_ID=?"; public static int addMutation(ExtendedMutation mutation, boolean newMutationEvent) throws DaoException { if (!MySQLbulkLoader.isBulkLoad()) { @@ -1499,13 +1501,12 @@ public static void deleteAllRecordsInGeneticProfileForSample(long geneticProfile ResultSet rs = null; try { con = JdbcUtil.getDbConnection(DaoMutation.class); - // TODO Move it to another class? - pstmt = con.prepareStatement("DELETE from alteration_driver_annotation WHERE GENETIC_PROFILE_ID=? and SAMPLE_ID=?"); + pstmt = con.prepareStatement(DELETE_ALTERATION_DRIVER_ANNOTATION); pstmt.setLong(1, geneticProfileId); pstmt.setLong(2, internalSampleId); pstmt.executeUpdate(); - pstmt = con.prepareStatement("DELETE from mutation WHERE GENETIC_PROFILE_ID=? and SAMPLE_ID=?"); + pstmt = con.prepareStatement(DELETE_MUTATION); pstmt.setLong(1, geneticProfileId); pstmt.setLong(2, internalSampleId); pstmt.executeUpdate(); diff --git a/src/main/java/org/mskcc/cbio/portal/dao/DaoSampleList.java b/src/main/java/org/mskcc/cbio/portal/dao/DaoSampleList.java index 72d1c1b2..cd5fef98 100644 --- a/src/main/java/org/mskcc/cbio/portal/dao/DaoSampleList.java +++ b/src/main/java/org/mskcc/cbio/portal/dao/DaoSampleList.java @@ -42,7 +42,9 @@ */ public class DaoSampleList { - /** + public static final String DELETE_SAMPLE_LIST_LIST = "DELETE FROM sample_list_list WHERE `LIST_ID` = ?"; + + /** * Adds record to sample_list table. */ public int addSampleList(SampleList sampleList) throws DaoException { @@ -67,7 +69,7 @@ public int addSampleList(SampleList sampleList) throws DaoException { int listListRow = addSampleListList(sampleList.getCancerStudyId(), listId, sampleList.getSampleList(), con); rows = (listListRow != -1) ? (rows + listListRow) : rows; } else { - throw new SQLException("Creating sample list failed, no ID obtained."); + throw new DaoException("Creating sample list failed, no ID obtained."); } } } catch (SQLException e) { @@ -253,7 +255,7 @@ public void updateSampleListList(SampleList sampleList) throws DaoException { PreparedStatement pstmt = null; try { con = JdbcUtil.getDbConnection(DaoSampleList.class); - pstmt = con.prepareStatement("DELETE FROM sample_list_list WHERE `LIST_ID` = ?"); + pstmt = con.prepareStatement(DELETE_SAMPLE_LIST_LIST); pstmt.setInt(1, sampleList.getSampleListId()); pstmt.executeUpdate(); diff --git a/src/main/java/org/mskcc/cbio/portal/scripts/ImportClinicalData.java b/src/main/java/org/mskcc/cbio/portal/scripts/ImportClinicalData.java index 11519601..0e37c9e0 100644 --- a/src/main/java/org/mskcc/cbio/portal/scripts/ImportClinicalData.java +++ b/src/main/java/org/mskcc/cbio/portal/scripts/ImportClinicalData.java @@ -43,7 +43,6 @@ import java.util.regex.*; import org.apache.commons.collections4.map.MultiKeyMap; -//TODO Remove MIXED_ATTRIBUTES data type https://github.com/cBioPortal/cbioportal-core/issues/31 public class ImportClinicalData extends ConsoleRunnable { public static final String DELIMITER = "\t"; @@ -104,6 +103,10 @@ public static enum AttributeTypes { PATIENT_ATTRIBUTES("PATIENT"), SAMPLE_ATTRIBUTES("SAMPLE"), + /** + * We want to encourage use patient or sample files instead, not mixed ones. + * See https://github.com/cBioPortal/cbioportal-core/issues/31 + */ @Deprecated MIXED_ATTRIBUTES("MIXED"); @@ -664,17 +667,13 @@ public void run() { attributesDatatype = properties.getProperty("datatype"); cancerStudyStableId = properties.getProperty("cancer_study_identifier"); } - if( options.has ( attributeFlag ) ) - { + if (options.has(attributeFlag)) { attributesDatatype = "MIXED_ATTRIBUTES"; } - if( options.has ( relaxedFlag ) ) - { + if (options.has(relaxedFlag)) { relaxed = true; - } - if( options.has ( overWriteExistingFlag ) ) - { + if (options.has(overWriteExistingFlag)) { overwriteExisting = true; } diff --git a/src/main/java/org/mskcc/cbio/portal/scripts/ImportSampleList.java b/src/main/java/org/mskcc/cbio/portal/scripts/ImportSampleList.java index 7e94efb2..356471e7 100644 --- a/src/main/java/org/mskcc/cbio/portal/scripts/ImportSampleList.java +++ b/src/main/java/org/mskcc/cbio/portal/scripts/ImportSampleList.java @@ -66,8 +66,7 @@ public static void importSampleList(File dataFile) throws IOException, DaoExcept boolean itemsAddedViaPatientLink = false; // construct sample id list ArrayList sampleIDsList = new ArrayList(); - List sampleIds = caseList.getSampleIds(); - for (String sampleId : sampleIds) { + for (String sampleId : caseList.getSampleIds()) { sampleId = StableIdUtil.getSampleId(sampleId); Sample s = DaoSample.getSampleByCancerStudyAndSampleId(theCancerStudy.getInternalId(), sampleId); if (s==null) { @@ -87,7 +86,7 @@ public static void importSampleList(File dataFile) throws IOException, DaoExcept } else if (!sampleIDsList.contains(s.getStableId())) { sampleIDsList.add(s.getStableId()); } else { - ProgressMonitor.logWarning("Warning: duplicated sample ID "+s.getStableId()+" in case list "+caseList.getStableId()); + ProgressMonitor.logWarning("Warning: duplicated sample ID " + s.getStableId() + " in case list " + caseList.getStableId()); } } @@ -111,7 +110,7 @@ public static void importSampleList(File dataFile) throws IOException, DaoExcept ProgressMonitor.setCurrentMessage(" --> stable ID: " + sampleList.getStableId()); ProgressMonitor.setCurrentMessage(" --> sample list name: " + sampleList.getName()); - ProgressMonitor.setCurrentMessage(" --> number of samples in file: " + sampleIds.size()); + ProgressMonitor.setCurrentMessage(" --> number of samples in file: " + caseList.getSampleIds().size()); String warningSamplesViaPatientLink = (itemsAddedViaPatientLink? "(nb: can be higher if samples were added via patient link)" : ""); ProgressMonitor.setCurrentMessage(" --> number of samples stored in final sample list " + warningSamplesViaPatientLink + ": " + sampleIDsList.size()); } diff --git a/src/main/java/org/mskcc/cbio/portal/scripts/UpdateCaseListsSampleIds.java b/src/main/java/org/mskcc/cbio/portal/scripts/UpdateCaseListsSampleIds.java index 2c581ffc..e80762f0 100644 --- a/src/main/java/org/mskcc/cbio/portal/scripts/UpdateCaseListsSampleIds.java +++ b/src/main/java/org/mskcc/cbio/portal/scripts/UpdateCaseListsSampleIds.java @@ -31,8 +31,19 @@ import org.mskcc.cbio.portal.util.ProgressMonitor; import org.mskcc.cbio.portal.validate.CaseListValidator; -import java.io.*; -import java.util.*; +import java.io.BufferedReader; +import java.io.File; +import java.io.FileInputStream; +import java.io.FileReader; +import java.io.IOException; +import java.util.ArrayList; +import java.util.Arrays; +import java.util.LinkedHashMap; +import java.util.LinkedHashSet; +import java.util.List; +import java.util.Map; +import java.util.Objects; +import java.util.Set; import java.util.stream.Collectors; public class UpdateCaseListsSampleIds extends ConsoleRunnable { @@ -41,8 +52,8 @@ public class UpdateCaseListsSampleIds extends ConsoleRunnable { private File dataFile; private List caseListFiles = List.of(); private String cancerStudyStableId; - private Map> caseListSampleIdToSampleIds = new LinkedHashMap<>(); - private DaoSampleList daoSampleList = new DaoSampleList(); + private final Map> caseListSampleIdToSampleIds = new LinkedHashMap<>(); + private final DaoSampleList daoSampleList = new DaoSampleList(); private LinkedHashSet allSampleIds; public UpdateCaseListsSampleIds(String[] args) { @@ -56,7 +67,6 @@ public void run() { parseArguments(); readStudyIdAndDataFileFromMetaFile(); this.allSampleIds = readSampleIdsFromDataFile(this.dataFile); - // TODO has the all case list always to exist? this.caseListSampleIdToSampleIds.put(cancerStudyStableId + "_all", this.allSampleIds); Map> readCaseListSampleIds = readCaseListFiles(); this.caseListSampleIdToSampleIds.putAll(readCaseListSampleIds); @@ -65,7 +75,7 @@ public void run() { private Map> readCaseListFiles() { LinkedHashMap> result = new LinkedHashMap<>(); - for (File caseListFile: this.caseListFiles) { + for (File caseListFile : this.caseListFiles) { CaseList caseList = CaseListReader.readFile(caseListFile); CaseListValidator.validateIdFields(caseList); String cancerStudyIdentifier = caseList.getCancerStudyIdentifier(); @@ -87,34 +97,32 @@ private Map> readCaseListFiles() { } private void updateCaseLists(Map> caseListSampleIdToSampleIds) { - // TODO Do we really have to do this? Is there a better way? DaoCancerStudy.reCacheAll(); try { - for (Map.Entry> caseListStableIdToSampleIds: caseListSampleIdToSampleIds.entrySet()) { + for (Map.Entry> caseListStableIdToSampleIds : caseListSampleIdToSampleIds.entrySet()) { String caseListStableId = caseListStableIdToSampleIds.getKey(); - Set sampleIds = caseListStableIdToSampleIds.getValue(); + Set uploadedSampleIds = caseListStableIdToSampleIds.getValue(); SampleList sampleList = daoSampleList.getSampleListByStableId(caseListStableId); if (sampleList == null) { throw new RuntimeException("No case list with " + caseListStableId + " stable id is found"); } - LinkedHashSet newCaseListSampleIds = new LinkedHashSet<>(sampleIds); - newCaseListSampleIds.addAll(sampleList.getSampleList()); - ArrayList newSampleArrayList = new ArrayList<>(newCaseListSampleIds); - sampleList.setSampleList(newSampleArrayList); - //TODO no need to run expensive db update if sampleList hasn't effectively changed - daoSampleList.updateSampleListList(sampleList); + LinkedHashSet newCaseListSampleIds = new LinkedHashSet<>(uploadedSampleIds); + if (newCaseListSampleIds.addAll(sampleList.getSampleList())) { + sampleList.setSampleList(new ArrayList<>(newCaseListSampleIds)); + daoSampleList.updateSampleListList(sampleList); + } } CancerStudy cancerStudy = DaoCancerStudy.getCancerStudyByStableId(this.cancerStudyStableId); List sampleLists = daoSampleList.getAllSampleLists(cancerStudy.getInternalId()); List remainingLists = sampleLists.stream().filter(sl -> !caseListSampleIdToSampleIds.containsKey(sl.getStableId()) && sl.getSampleList().stream().anyMatch(this.allSampleIds::contains) - ).collect(Collectors.toList()); - for (SampleList remainingList: remainingLists) { + ).toList(); + for (SampleList remainingList : remainingLists) { ArrayList newSampleList = new ArrayList<>(remainingList.getSampleList()); - newSampleList.removeAll(this.allSampleIds); - remainingList.setSampleList(newSampleList); - //TODO for optimization purpose we could supply to the update method 2 set of samples: samples that have to be added and samples that have to be removed - daoSampleList.updateSampleListList(remainingList); + if (newSampleList.removeAll(this.allSampleIds)) { + remainingList.setSampleList(newSampleList); + daoSampleList.updateSampleListList(remainingList); + } } } catch (DaoException e) { throw new RuntimeException(e); @@ -123,40 +131,29 @@ private void updateCaseLists(Map> caseListSampleIdToSampleId private LinkedHashSet readSampleIdsFromDataFile(File dataFile) { LinkedHashSet allSampleIds = new LinkedHashSet<>(); - FileReader reader = null; - try { - reader = new FileReader(dataFile); - try (BufferedReader buff = new BufferedReader(reader)) { - String line; - int sampleIdPosition = -1; - while ((line = buff.readLine()) != null) { - String trimmedLine = line.trim(); - if (trimmedLine.isEmpty() || trimmedLine.startsWith("#")) { - continue; - } + try (FileReader reader = new FileReader(dataFile); + BufferedReader buff = new BufferedReader(reader)) { + String line; + int sampleIdPosition = -1; + while ((line = buff.readLine()) != null) { + String trimmedLine = line.trim(); + if (trimmedLine.isEmpty() || trimmedLine.startsWith("#")) { + continue; + } - String[] fieldValues = line.split("\t"); + String[] fieldValues = line.split("\t"); + if (sampleIdPosition == -1) { + sampleIdPosition = List.of(fieldValues).indexOf("SAMPLE_ID"); if (sampleIdPosition == -1) { - sampleIdPosition = List.of(fieldValues).indexOf("SAMPLE_ID"); - if (sampleIdPosition == -1) { - throw new RuntimeException("No SAMPLE_ID header is found"); - } - } else { - allSampleIds.add(fieldValues[sampleIdPosition].trim()); + throw new RuntimeException("No SAMPLE_ID header is found"); } + } else { + allSampleIds.add(fieldValues[sampleIdPosition].trim()); } - return allSampleIds; } - } catch (Exception e) { + return allSampleIds; + } catch (IOException e) { throw new RuntimeException(e); - } finally { - if (reader != null) { - try { - reader.close(); - } catch (IOException e) { - throw new RuntimeException(e); - } - } } } @@ -178,15 +175,15 @@ private void parseArguments() { String description = "Updates (adds/removes) sample ids in specified case lists."; OptionParser parser = new OptionParser(); - OptionSpec metaOpt = parser.accepts( "meta", - "clinical sample (genetic_alteration_type=CLINICAL and datatype=SAMPLE_ATTRIBUTES or datatype=MIXED_ATTRIBUTES) meta data file. All sample ids found in the file will be added to the _all case list." ).withRequiredArg().required().describedAs( "meta_clinical_sample.txt" ).ofType( String.class ); - OptionSpec caseListDirOrFileOpt = parser.accepts( "case-lists", - "case list file or a directory with case list files" ).withRequiredArg().describedAs( "case_lists/" ).ofType( String.class ); + OptionSpec metaOpt = parser.accepts("meta", + "clinical sample (genetic_alteration_type=CLINICAL and datatype=SAMPLE_ATTRIBUTES or datatype=MIXED_ATTRIBUTES) meta data file. All sample ids found in the file will be added to the _all case list.").withRequiredArg().required().describedAs("meta_clinical_sample.txt").ofType(String.class); + OptionSpec caseListDirOrFileOpt = parser.accepts("case-lists", + "case list file or a directory with case list files").withRequiredArg().describedAs("case_lists/").ofType(String.class); try { - OptionSet options = parser.parse( args ); + OptionSet options = parser.parse(args); this.metaFile = new File(options.valueOf(metaOpt)); - if(options.has(caseListDirOrFileOpt)){ + if (options.has(caseListDirOrFileOpt)) { File caseListDirOrFile = new File(options.valueOf(caseListDirOrFileOpt)); if (caseListDirOrFile.isDirectory()) { this.caseListFiles = Arrays.stream(Objects.requireNonNull(caseListDirOrFile.listFiles())) @@ -207,7 +204,7 @@ private void parseArguments() { /** * Runs the command as a script and exits with an appropriate exit code. * - * @param args the arguments given on the command line + * @param args the arguments given on the command line */ public static void main(String[] args) { ConsoleRunnable runner = new UpdateCaseListsSampleIds(args);