From 2929643df181718ef7c8f77a17dc082ec0ae0979 Mon Sep 17 00:00:00 2001 From: Ruslan Forostianov Date: Wed, 17 Jul 2024 09:45:03 +0200 Subject: [PATCH] RFC84: Data Entries Removal (#46) Add command to remove sample to python wrapper Implment sample removal command Remove sample from the tab delimited tables as well Do samples removal in a transaction Move removing sample everywhere in study to respective DAOs To be able to reuse this functionality Refactor sample removal test by using stream of Test and fix edge cases of sample removal command Add command to remove patient to python wrapper Add java command to remove patient in a study Increase test independence Adding or removing profile/sample should break only one test Fix patient id to sample ids mapping bug Throw exception when sample with GSVA is removed Throw exception when generic profile samples list is empty Refactor patient and sample removal code Remove unused logger and imports Remove unused code from cna discrete long data tests --- scripts/importer/cbioportalImporter.py | 67 ++++- scripts/importer/cbioportal_common.py | 2 + .../cbio/portal/dao/DaoGeneticAlteration.java | 15 +- .../portal/dao/DaoGeneticProfileSamples.java | 8 +- .../org/mskcc/cbio/portal/dao/DaoPatient.java | 59 +++++ .../org/mskcc/cbio/portal/dao/DaoSample.java | 138 ++++++++++- .../cbio/portal/scripts/RemovePatients.java | 144 +++++++++++ .../cbio/portal/scripts/RemoveSamples.java | 144 +++++++++++ .../dao/TestDaoGeneticProfile.java | 20 +- .../TestImportCnaDiscreteLongData.java | 26 +- .../scripts/TestRemovePatients.java | 228 ++++++++++++++++++ .../scripts/TestRemoveSamples.java | 218 +++++++++++++++++ src/test/resources/seed_mini.sql | 14 ++ tests/system_tests_import_data.py | 33 +++ 14 files changed, 1060 insertions(+), 56 deletions(-) create mode 100644 src/main/java/org/mskcc/cbio/portal/scripts/RemovePatients.java create mode 100644 src/main/java/org/mskcc/cbio/portal/scripts/RemoveSamples.java create mode 100644 src/test/java/org/mskcc/cbio/portal/integrationTest/scripts/TestRemovePatients.java create mode 100644 src/test/java/org/mskcc/cbio/portal/integrationTest/scripts/TestRemoveSamples.java diff --git a/scripts/importer/cbioportalImporter.py b/scripts/importer/cbioportalImporter.py index c2f65cc0..c29035a7 100755 --- a/scripts/importer/cbioportalImporter.py +++ b/scripts/importer/cbioportalImporter.py @@ -36,6 +36,8 @@ from .cbioportal_common import IMPORT_STUDY_CLASS from .cbioportal_common import UPDATE_STUDY_STATUS_CLASS from .cbioportal_common import REMOVE_STUDY_CLASS +from .cbioportal_common import REMOVE_SAMPLES_CLASS +from .cbioportal_common import REMOVE_PATIENTS_CLASS from .cbioportal_common import IMPORT_CASE_LIST_CLASS from .cbioportal_common import ADD_CASE_LIST_CLASS from .cbioportal_common import VERSION_UTIL_CLASS @@ -53,10 +55,12 @@ IMPORT_CANCER_TYPE = "import-cancer-type" IMPORT_STUDY = "import-study" REMOVE_STUDY = "remove-study" +REMOVE_SAMPLES = "remove-samples" +REMOVE_PATIENTS = "remove-patients" IMPORT_STUDY_DATA = "import-study-data" IMPORT_CASE_LIST = "import-case-list" -COMMANDS = [IMPORT_CANCER_TYPE, IMPORT_STUDY, REMOVE_STUDY, IMPORT_STUDY_DATA, IMPORT_CASE_LIST] +COMMANDS = [IMPORT_CANCER_TYPE, IMPORT_STUDY, IMPORT_STUDY_DATA, IMPORT_CASE_LIST, REMOVE_STUDY, REMOVE_SAMPLES, REMOVE_PATIENTS] # ------------------------------------------------------------------------------ # sub-routines @@ -104,6 +108,24 @@ def remove_study_id(jvm_args, study_id): args.append("--noprogress") # don't report memory usage and % progress run_java(*args) +def remove_samples(jvm_args, study_ids, sample_ids): + args = jvm_args.split(' ') + args.append(REMOVE_SAMPLES_CLASS) + args.append("--study_ids") + args.append(study_ids) + args.append("--sample_ids") + args.append(sample_ids) + run_java(*args) + +def remove_patients(jvm_args, study_ids, patient_ids): + args = jvm_args.split(' ') + args.append(REMOVE_PATIENTS_CLASS) + args.append("--study_ids") + args.append(study_ids) + args.append("--patient_ids") + args.append(patient_ids) + run_java(*args) + def update_case_lists(jvm_args, meta_filename, case_lists_file_or_dir = None): args = jvm_args.split(' ') args.append(UPDATE_CASE_LIST_CLASS) @@ -213,7 +235,7 @@ def process_case_lists(jvm_args, case_list_dir): if not (case_list.startswith('.') or case_list.endswith('~')): import_case_list(jvm_args, os.path.join(case_list_dir, case_list)) -def process_command(jvm_args, command, meta_filename, data_filename, study_ids, update_generic_assay_entity = None): +def process_command(jvm_args, command, meta_filename, data_filename, study_ids, patient_ids, sample_ids, update_generic_assay_entity = None): if command == IMPORT_CANCER_TYPE: import_cancer_type(jvm_args, data_filename) elif command == IMPORT_STUDY: @@ -227,6 +249,10 @@ def process_command(jvm_args, command, meta_filename, data_filename, study_ids, remove_study_id(jvm_args, study_id) else: raise RuntimeError('Your command uses both -id and -meta. Please, use only one of the two parameters.') + elif command == REMOVE_SAMPLES: + remove_samples(jvm_args, study_ids, sample_ids) + elif command == REMOVE_PATIENTS: + remove_patients(jvm_args, study_ids, patient_ids) elif command == IMPORT_STUDY_DATA: import_data(jvm_args, meta_filename, data_filename, update_generic_assay_entity) elif command == IMPORT_CASE_LIST: @@ -505,7 +531,7 @@ def usage(): '--command [%s] --study_directory ' '--meta_filename ' '--data_filename ' - '--study_ids ' % (COMMANDS)), file=OUTPUT_FILE) + '--study_ids ' % (COMMANDS)), file=OUTPUT_FILE) def check_args(command): if command not in COMMANDS: @@ -545,23 +571,32 @@ def interface(args=None): parent_parser = argparse.ArgumentParser(description='cBioPortal meta Importer') add_parser_args(parent_parser) parser = argparse.ArgumentParser() + allowed_commands_csv = ', '.join(COMMANDS) subparsers = parser.add_subparsers(title='subcommands', dest='subcommand', - help='Command for import. Allowed commands: import-cancer-type, ' - 'import-study, import-study-data, import-case-list or ' - 'remove-study') + help='Command for import. Allowed commands: ' + allowed_commands_csv) import_cancer_type = subparsers.add_parser('import-cancer-type', parents=[parent_parser], add_help=False) import_study = subparsers.add_parser('import-study', parents=[parent_parser], add_help=False) import_study_data = subparsers.add_parser('import-study-data', parents=[parent_parser], add_help=False) import_case_list = subparsers.add_parser('import-case-list', parents=[parent_parser], add_help=False) remove_study = subparsers.add_parser('remove-study', parents=[parent_parser], add_help=False) - remove_study.add_argument('-id', '--study_ids', type=str, required=False, help='Cancer Study IDs for `remove-study` command, comma separated') - parser.add_argument('-c', '--command', type=str, required=False, + + remove_samples = subparsers.add_parser('remove-samples', parents=[], add_help=True) + remove_samples.add_argument('--study_ids', type=str, required=True, + help='Cancer Study ID(s) that contains sample(s). Comma separated, if multiple.') + remove_samples.add_argument('--sample_ids', type=str, required=True, + help='Sample ID(s). Comma separated, if multiple.') + + remove_patients = subparsers.add_parser('remove-patients', parents=[], add_help=True) + remove_patients.add_argument('--study_ids', type=str, required=True, + help='Cancer Study ID(s) that contains sample(s). Comma separated, if multiple.') + remove_patients.add_argument('--patient_ids', type=str, required=True, + help='Patient ID(s). Comma separated, if multiple.') + + parser.add_argument('-c', '--command', type=str, required=False, help='This argument is outdated. Please use the listed subcommands, without the -c flag. ' - 'Command for import. Allowed commands: import-cancer-type, ' - 'import-study, import-study-data, import-case-list or ' - 'remove-study') + 'Command for import. Allowed commands: ' + allowed_commands_csv) add_parser_args(parser) parser.add_argument('-id', '--study_ids', type=str, required=False, help='Cancer Study IDs for `remove-study` command, comma separated') @@ -647,7 +682,15 @@ def main(args): else: check_args(args.command) check_files(args.meta_filename, args.data_filename) - process_command(jvm_args, args.command, args.meta_filename, args.data_filename, args.study_ids, args.update_generic_assay_entity) + process_command( + jvm_args, + args.command, + args.meta_filename, + args.data_filename, + args.study_ids, + args.patient_ids if hasattr(args, 'patient_ids') else None, + args.sample_ids if hasattr(args, 'sample_ids') else None, + args.update_generic_assay_entity) # ------------------------------------------------------------------------------ # ready to roll diff --git a/scripts/importer/cbioportal_common.py b/scripts/importer/cbioportal_common.py index e4bbe041..8e6c97ae 100644 --- a/scripts/importer/cbioportal_common.py +++ b/scripts/importer/cbioportal_common.py @@ -34,6 +34,8 @@ IMPORT_STUDY_CLASS = "org.mskcc.cbio.portal.scripts.ImportCancerStudy" UPDATE_STUDY_STATUS_CLASS = "org.mskcc.cbio.portal.scripts.UpdateCancerStudy" REMOVE_STUDY_CLASS = "org.mskcc.cbio.portal.scripts.RemoveCancerStudy" +REMOVE_SAMPLES_CLASS = "org.mskcc.cbio.portal.scripts.RemoveSamples" +REMOVE_PATIENTS_CLASS = "org.mskcc.cbio.portal.scripts.RemovePatients" IMPORT_CANCER_TYPE_CLASS = "org.mskcc.cbio.portal.scripts.ImportTypesOfCancers" IMPORT_CASE_LIST_CLASS = "org.mskcc.cbio.portal.scripts.ImportSampleList" ADD_CASE_LIST_CLASS = "org.mskcc.cbio.portal.scripts.AddCaseList" diff --git a/src/main/java/org/mskcc/cbio/portal/dao/DaoGeneticAlteration.java b/src/main/java/org/mskcc/cbio/portal/dao/DaoGeneticAlteration.java index 25cd987a..c67c0e3e 100644 --- a/src/main/java/org/mskcc/cbio/portal/dao/DaoGeneticAlteration.java +++ b/src/main/java/org/mskcc/cbio/portal/dao/DaoGeneticAlteration.java @@ -32,18 +32,22 @@ package org.mskcc.cbio.portal.dao; +import com.fasterxml.jackson.databind.node.ObjectNode; +import org.apache.commons.lang3.StringUtils; import org.mskcc.cbio.portal.model.CanonicalGene; import java.sql.Connection; import java.sql.PreparedStatement; import java.sql.ResultSet; import java.sql.SQLException; -import java.util.*; +import java.util.ArrayList; +import java.util.Collection; +import java.util.Collections; +import java.util.HashMap; +import java.util.HashSet; +import java.util.Iterator; import java.util.Map.Entry; - -import com.fasterxml.jackson.databind.node.ObjectNode; - -import org.apache.commons.lang3.StringUtils; +import java.util.Set; /** * Data Access Object for the Genetic Alteration Table. @@ -51,6 +55,7 @@ * @author Ethan Cerami. */ public class DaoGeneticAlteration { + private static final String DELIM = ","; public static final String NAN = "NaN"; private static DaoGeneticAlteration daoGeneticAlteration = null; diff --git a/src/main/java/org/mskcc/cbio/portal/dao/DaoGeneticProfileSamples.java b/src/main/java/org/mskcc/cbio/portal/dao/DaoGeneticProfileSamples.java index b8346957..e25e2ac8 100644 --- a/src/main/java/org/mskcc/cbio/portal/dao/DaoGeneticProfileSamples.java +++ b/src/main/java/org/mskcc/cbio/portal/dao/DaoGeneticProfileSamples.java @@ -64,7 +64,7 @@ public static int addGeneticProfileSamples(int geneticProfileId, List o StringBuffer orderedSampleListBuf = new StringBuffer(); // Created Joined String, based on DELIM token for (Integer sampleId : orderedSampleList) { - orderedSampleListBuf.append(Integer.toString(sampleId)).append(DELIM); + orderedSampleListBuf.append(sampleId).append(DELIM); } try { con = JdbcUtil.getDbConnection(DaoGeneticProfileSamples.class); @@ -126,7 +126,11 @@ public static ArrayList getOrderedSampleList(int geneticProfileId) thr String orderedSampleList = rs.getString("ORDERED_SAMPLE_LIST"); // Split, based on DELIM token - String parts[] = orderedSampleList.split(DELIM); + String[] parts = orderedSampleList.split(DELIM); + if (parts.length == 1 && parts[0].isBlank()) { + throw new IllegalStateException("genetic_profile_samples row for geneticProfileId=" + + geneticProfileId + " has blank ORDERED_SAMPLE_LIST. Consider removing it."); + } ArrayList sampleList = new ArrayList (); for (String internalSampleId : parts) { sampleList.add(Integer.parseInt(internalSampleId)); diff --git a/src/main/java/org/mskcc/cbio/portal/dao/DaoPatient.java b/src/main/java/org/mskcc/cbio/portal/dao/DaoPatient.java index fecc868b..05675b4c 100644 --- a/src/main/java/org/mskcc/cbio/portal/dao/DaoPatient.java +++ b/src/main/java/org/mskcc/cbio/portal/dao/DaoPatient.java @@ -35,9 +35,12 @@ import org.mskcc.cbio.portal.model.*; import org.apache.commons.collections4.map.MultiKeyMap; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; import java.sql.*; import java.util.*; +import java.util.stream.Collectors; /** * DAO to `patient`. @@ -46,6 +49,8 @@ */ public class DaoPatient { + private static final Logger log = LoggerFactory.getLogger(DaoPatient.class); + private static final String SAMPLE_COUNT_ATTR_ID = "SAMPLE_COUNT"; private static final Map byInternalId = new HashMap(); @@ -215,4 +220,58 @@ private static Patient extractPatient(ResultSet rs) throws SQLException throw new SQLException(e); } } + + /** + * Removes patients information from the study + * @param internalStudyId - id of the study that contains the patients + * @param patientStableIds - patient stable ids to remove + * @throws DaoException + */ + public static void deletePatients(int internalStudyId, Set patientStableIds) throws DaoException + { + if (patientStableIds == null || patientStableIds.isEmpty()) { + log.info("No patients specified to remove for study with internal id={}. Skipping.", internalStudyId); + return; + } + log.info("Removing {} patients from study with internal id={} ...", patientStableIds, internalStudyId); + + Set internalPatientIds = findInternalPatientIdsInStudy(internalStudyId, patientStableIds); + Set patientsSampleStableIds = internalPatientIds.stream().flatMap(internalPatientId -> + DaoSample.getSamplesByPatientId(internalPatientId).stream().map(Sample::getStableId)) + .collect(Collectors.toSet()); + DaoSample.deleteSamples(internalStudyId, patientsSampleStableIds); + + Connection con = null; + PreparedStatement pstmt = null; + try { + con = JdbcUtil.getDbConnection(DaoPatient.class); + pstmt = con.prepareStatement("DELETE FROM `patient` WHERE `INTERNAL_ID` IN (" + + String.join(",", Collections.nCopies(internalPatientIds.size(), "?")) + + ")"); + int parameterIndex = 1; + for (Integer internalPatientId : internalPatientIds) { + pstmt.setInt(parameterIndex++, internalPatientId); + }; + pstmt.executeUpdate(); + } + catch (SQLException e) { + throw new DaoException(e); + } + finally { + JdbcUtil.closeAll(DaoPatient.class, con, pstmt, null); + } + log.info("Removing {} patients from study with internal id={} done.", patientStableIds, internalStudyId); + } + + public static Set findInternalPatientIdsInStudy(Integer internalStudyId, Set patientStableIds) { + HashSet internalPatientIds = new HashSet<>(); + for (String patientId : patientStableIds) { + Patient patientByCancerStudyAndPatientId = DaoPatient.getPatientByCancerStudyAndPatientId(internalStudyId, patientId); + if (patientByCancerStudyAndPatientId == null) { + throw new NoSuchElementException("Patient with stable id=" + patientId + " not found in study with internal id=" + internalStudyId + "."); + } + internalPatientIds.add(patientByCancerStudyAndPatientId.getInternalId()); + } + return internalPatientIds; + } } diff --git a/src/main/java/org/mskcc/cbio/portal/dao/DaoSample.java b/src/main/java/org/mskcc/cbio/portal/dao/DaoSample.java index df445036..d67cbeeb 100644 --- a/src/main/java/org/mskcc/cbio/portal/dao/DaoSample.java +++ b/src/main/java/org/mskcc/cbio/portal/dao/DaoSample.java @@ -32,12 +32,29 @@ package org.mskcc.cbio.portal.dao; -import org.mskcc.cbio.portal.model.*; - +import org.mskcc.cbio.portal.model.GeneticAlterationType; +import org.mskcc.cbio.portal.model.GeneticProfile; +import org.mskcc.cbio.portal.model.Patient; +import org.mskcc.cbio.portal.model.Sample; import org.mskcc.cbio.portal.util.ProgressMonitor; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; -import java.sql.*; -import java.util.*; +import java.sql.Connection; +import java.sql.PreparedStatement; +import java.sql.ResultSet; +import java.sql.SQLException; +import java.sql.Statement; +import java.util.ArrayList; +import java.util.Collection; +import java.util.Collections; +import java.util.HashMap; +import java.util.HashSet; +import java.util.List; +import java.util.Map; +import java.util.NoSuchElementException; +import java.util.Set; +import java.util.stream.Collectors; /** * DAO to `sample`. @@ -46,6 +63,9 @@ */ public class DaoSample { + + private static final Logger log = LoggerFactory.getLogger(DaoSample.class); + private static final int MISSING_CANCER_STUDY_ID = -1; private static final Map byStableId = new HashMap(); @@ -246,7 +266,6 @@ public static void deleteAllRecords() throws DaoException { Connection con = null; PreparedStatement pstmt = null; - ResultSet rs = null; try { con = JdbcUtil.getDbConnection(DaoSample.class); JdbcUtil.disableForeignKeyCheck(con); @@ -258,16 +277,123 @@ public static void deleteAllRecords() throws DaoException throw new DaoException(e); } finally { - JdbcUtil.closeAll(DaoSample.class, con, pstmt, rs); + JdbcUtil.closeAll(DaoSample.class, con, pstmt, null); } clearCache(); } + /** + * Remove set of samples from the study + * @param internalStudyId - id of the study that contains the samples + * @param sampleStableIds - sample stable ids of samples to remove + * @throws DaoException + */ + public static void deleteSamples(int internalStudyId, Set sampleStableIds) throws DaoException + { + if (sampleStableIds == null || sampleStableIds.isEmpty()) { + log.info("No samples specified to remove for study with internal id={}. Skipping.", internalStudyId); + return; + } + + log.info("Removing {} samples from study with internal id={} ...", sampleStableIds, internalStudyId); + + Set internalSampleIds = findInternalSampleIdsInStudy(internalStudyId, sampleStableIds); + removeSamplesInGeneticAlterationsForStudy(internalStudyId, internalSampleIds); + + Connection con = null; + PreparedStatement pstmt = null; + try { + con = JdbcUtil.getDbConnection(DaoSample.class); + pstmt = con.prepareStatement("DELETE FROM `sample` WHERE `INTERNAL_ID` IN (" + + String.join(",", Collections.nCopies(internalSampleIds.size(), "?")) + + ")"); + int parameterIndex = 1; + for (Integer internalSampleId : internalSampleIds) { + pstmt.setInt(parameterIndex++, internalSampleId); + }; + pstmt.executeUpdate(); + } + catch (SQLException e) { + throw new DaoException(e); + } + finally { + JdbcUtil.closeAll(DaoSample.class, con, pstmt, null); + } + log.info("Removing {} samples from study with internal id={} done.", sampleStableIds, internalStudyId); + } + private static Sample extractSample(ResultSet rs) throws SQLException { return new Sample(rs.getInt("INTERNAL_ID"), rs.getString("STABLE_ID"), rs.getInt("PATIENT_ID")); } + + /** + * Removes sample in genetic alterations' data for a study + * @param internalStudyId - internal id of study to remove samples in genetic alterations data + * @param internalSampleIdsToRemove - internal ids of samples to remove + * @throws DaoException + */ + private static void removeSamplesInGeneticAlterationsForStudy(int internalStudyId, Set internalSampleIdsToRemove) throws DaoException { + List geneticProfiles = DaoGeneticProfile.getAllGeneticProfiles(internalStudyId); + for (GeneticProfile geneticProfile : geneticProfiles) { + Set removedInternalSampleIds = removeSamplesInGeneticAlterationsForGeneticProfile(geneticProfile, internalSampleIdsToRemove); + log.debug("Genetic alterations data for {} sample ids ouf of {} requested have been removed for genetic profile with stable id={}", + removedInternalSampleIds, internalSampleIdsToRemove, geneticProfile.getStableId()); + } + } + + /** + * Removes sample in genetic alterations' data for a genetic profile + * @param geneticProfile - genetic profile to remove samples in genetic alteration data + * @param internalSampleIdsToRemove - internal ids of samples to remove + * @return set of sample internal ids that were actually removed + * @throws DaoException + */ + private static Set removeSamplesInGeneticAlterationsForGeneticProfile(GeneticProfile geneticProfile, Set internalSampleIdsToRemove) throws DaoException { + int geneticProfileId = geneticProfile.getGeneticProfileId(); + List orderedSampleList = DaoGeneticProfileSamples.getOrderedSampleList(geneticProfileId); + Set actualInternalSampleIdsToRemove = orderedSampleList.stream() + .filter(internalSampleIdsToRemove::contains).collect(Collectors.toUnmodifiableSet()); + if (!actualInternalSampleIdsToRemove.isEmpty()) { + if (GeneticAlterationType.GENESET_SCORE.equals(geneticProfile.getGeneticAlterationType())) { + List sampleStableIds = actualInternalSampleIdsToRemove.stream() + .map(internalSampleID -> + DaoSample.getSampleById(internalSampleID).getStableId()) + .toList(); + throw new RuntimeException("Sample(s) with stable id " + + String.join(", ", sampleStableIds) + + " can't be removed as it contains GSVA data." + + " Consider dropping and re-uploading the whole study."); + } + orderedSampleList.removeAll(actualInternalSampleIdsToRemove); + HashMap> geneticAlterationMapForEntityIds = DaoGeneticAlteration.getInstance().getGeneticAlterationMapForEntityIds(geneticProfileId, null); + DaoGeneticAlteration.getInstance().deleteAllRecordsInGeneticProfile(geneticProfileId); + if (!orderedSampleList.isEmpty()) { + for (Map.Entry> entry : geneticAlterationMapForEntityIds.entrySet()) { + String[] values = orderedSampleList.stream().map(isid -> entry.getValue().get(isid)).toArray(String[]::new); + DaoGeneticAlteration.getInstance().addGeneticAlterationsForGeneticEntity(geneticProfileId, entry.getKey(), values); + } + } + DaoGeneticProfileSamples.deleteAllSamplesInGeneticProfile(geneticProfileId); + if (!orderedSampleList.isEmpty()) { + DaoGeneticProfileSamples.addGeneticProfileSamples(geneticProfileId, orderedSampleList); + } + } + return actualInternalSampleIdsToRemove; + } + + public static Set findInternalSampleIdsInStudy(Integer internalStudyId, Set sampleStableIds) { + HashSet internalSampleIds = new HashSet<>(); + for (String sampleId : sampleStableIds) { + Sample sampleByCancerStudyAndSampleId = DaoSample.getSampleByCancerStudyAndSampleId(internalStudyId, sampleId); + if (sampleByCancerStudyAndSampleId == null) { + throw new NoSuchElementException("Sample with stable id=" + sampleId + " not found in study with internal id=" + internalStudyId + "."); + } + internalSampleIds.add(sampleByCancerStudyAndSampleId.getInternalId()); + } + return internalSampleIds; + } } diff --git a/src/main/java/org/mskcc/cbio/portal/scripts/RemovePatients.java b/src/main/java/org/mskcc/cbio/portal/scripts/RemovePatients.java new file mode 100644 index 00000000..bec9fabc --- /dev/null +++ b/src/main/java/org/mskcc/cbio/portal/scripts/RemovePatients.java @@ -0,0 +1,144 @@ +/* + * This file is part of cBioPortal. + * + * cBioPortal is free software: you can redistribute it and/or modify + * it under the terms of the GNU Affero General Public License as + * published by the Free Software Foundation, either version 3 of the + * License. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Affero General Public License for more details. + * + * You should have received a copy of the GNU Affero General Public License + * along with this program. If not, see . + */ + +package org.mskcc.cbio.portal.scripts; + +import joptsimple.OptionException; +import joptsimple.OptionParser; +import joptsimple.OptionSet; +import joptsimple.OptionSpec; +import org.jetbrains.annotations.NotNull; +import org.mskcc.cbio.portal.dao.DaoCancerStudy; +import org.mskcc.cbio.portal.dao.DaoException; +import org.mskcc.cbio.portal.dao.DaoPatient; +import org.mskcc.cbio.portal.dao.JdbcUtil; +import org.mskcc.cbio.portal.model.CancerStudy; +import org.mskcc.cbio.portal.util.ProgressMonitor; + +import java.util.Arrays; +import java.util.NoSuchElementException; +import java.util.Set; +import java.util.stream.Collectors; + +/** + * Command Line Tool to Remove Patients in Cancer Studies + */ +public class RemovePatients extends ConsoleRunnable { + + public static final String COMMA = ","; + private Set studyIds; + private Set patientIds; + + public void run() { + JdbcUtil.getTransactionTemplate().execute(status -> { + try { + doRun(); + } catch (Throwable e) { + status.setRollbackOnly(); + throw new RuntimeException(e); + } + return null; + }); + } + + private void doRun() { + ProgressMonitor.setCurrentMessage("Start removing patient(s) from study(ies)."); + parseArgs(); + ProgressMonitor.logDebug("Reading study id(s) from the database."); + final Set cancerStudies = studyIds.stream().map(studyId -> { + try { + CancerStudy cancerStudy = DaoCancerStudy.getCancerStudyByStableId(studyId); + if (cancerStudy == null) { + throw new NoSuchElementException("Cancer study with stable id=" + studyId + " not found."); + } + return cancerStudy; + } catch (DaoException e) { + throw new RuntimeException(e); + } + }).collect(Collectors.toUnmodifiableSet()); + try { + for (CancerStudy cancerStudy : cancerStudies) { + ProgressMonitor.setCurrentMessage("Removing patient with stable id(s) (" + + String.join(", ", patientIds) + + ") from study with stable id=" + cancerStudy.getCancerStudyStableId() + " ..."); + DaoPatient.deletePatients(cancerStudy.getInternalId(), patientIds); + } + } catch (DaoException e) { + throw new RuntimeException(e); + } + ProgressMonitor.setCurrentMessage("Done removing patient(s) from study(ies)."); + } + + private void parseArgs() { + OptionParser parser = new OptionParser(); + OptionSpec studyIdsOpt = parser.accepts("study_ids", "Cancer Study ID(s; comma separated) to remove patients for.") + .withRequiredArg() + .describedAs("comma separated study ids") + .ofType(String.class); + OptionSpec patientIdsOpt = parser.accepts("patient_ids", "Patients Stable ID(s; comma separated) to remove.") + .withRequiredArg() + .describedAs("comma separated patient ids") + .ofType(String.class); + OptionSpec help = parser.accepts("help", "print this help info"); + String progName = this.getClass().getSimpleName(); + String description = "Removes clinical patient(s) information by their stable id(s) and cancer study id(s)."; + + OptionSet options; + try { + options = parser.parse(args); + } catch (OptionException e) { + throw new UsageException(progName, description, parser, + e.getMessage()); + } + + if (options.has(help)) { + throw new UsageException(progName, description, parser); + } + if (!options.has(studyIdsOpt) || options.valueOf(studyIdsOpt) == null || "".equals(options.valueOf(studyIdsOpt).trim())) { + throw new UsageException(progName, description, parser, "'--study_ids' argument has to specify study id(s)."); + } + if (!options.has(patientIdsOpt) || options.valueOf(patientIdsOpt) == null || "".equals(options.valueOf(patientIdsOpt).trim())) { + throw new UsageException(progName, description, parser, "'--patient_ids' argument has to specify patient id(s)."); + } + this.studyIds = parseCsvAsSet(options.valueOf(studyIdsOpt)); + this.patientIds = parseCsvAsSet(options.valueOf(patientIdsOpt)); + } + + @NotNull + private Set parseCsvAsSet(String s) { + return Arrays.stream(s.trim().split(COMMA)).filter(val -> !"".equals(val)).collect(Collectors.toUnmodifiableSet()); + } + + /** + * Makes an instance to run with the given command line arguments. + * + * @param args the command line arguments to be used + */ + public RemovePatients(String[] args) { + super(args); + } + + /** + * Runs the command as a script and exits with an appropriate exit code. + * + * @param args the arguments given on the command line + */ + public static void main(String[] args) { + ConsoleRunnable runner = new RemovePatients(args); + runner.runInConsole(); + } +} diff --git a/src/main/java/org/mskcc/cbio/portal/scripts/RemoveSamples.java b/src/main/java/org/mskcc/cbio/portal/scripts/RemoveSamples.java new file mode 100644 index 00000000..c0aa170d --- /dev/null +++ b/src/main/java/org/mskcc/cbio/portal/scripts/RemoveSamples.java @@ -0,0 +1,144 @@ +/* + * This file is part of cBioPortal. + * + * cBioPortal is free software: you can redistribute it and/or modify + * it under the terms of the GNU Affero General Public License as + * published by the Free Software Foundation, either version 3 of the + * License. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Affero General Public License for more details. + * + * You should have received a copy of the GNU Affero General Public License + * along with this program. If not, see . + */ + +package org.mskcc.cbio.portal.scripts; + +import joptsimple.OptionException; +import joptsimple.OptionParser; +import joptsimple.OptionSet; +import joptsimple.OptionSpec; +import org.jetbrains.annotations.NotNull; +import org.mskcc.cbio.portal.dao.DaoCancerStudy; +import org.mskcc.cbio.portal.dao.DaoException; +import org.mskcc.cbio.portal.dao.DaoSample; +import org.mskcc.cbio.portal.dao.JdbcUtil; +import org.mskcc.cbio.portal.model.CancerStudy; +import org.mskcc.cbio.portal.util.ProgressMonitor; + +import java.util.Arrays; +import java.util.NoSuchElementException; +import java.util.Set; +import java.util.stream.Collectors; + +/** + * Command Line Tool to Remove Samples in Cancer Studies + */ +public class RemoveSamples extends ConsoleRunnable { + + public static final String COMMA = ","; + private Set studyIds; + private Set sampleIds; + + public void run() { + JdbcUtil.getTransactionTemplate().execute(status -> { + try { + doRun(); + } catch (Throwable e) { + status.setRollbackOnly(); + throw new RuntimeException(e); + } + return null; + }); + } + + private void doRun() { + ProgressMonitor.setCurrentMessage("Start removing sample(s) from study(ies)."); + parseArgs(); + ProgressMonitor.logDebug("Reading study id(s) from the database."); + final Set cancerStudies = studyIds.stream().map(studyId -> { + try { + CancerStudy cancerStudy = DaoCancerStudy.getCancerStudyByStableId(studyId); + if (cancerStudy == null) { + throw new NoSuchElementException("Cancer study with stable id=" + studyId + " not found."); + } + return cancerStudy; + } catch (DaoException e) { + throw new RuntimeException(e); + } + }).collect(Collectors.toUnmodifiableSet()); + try { + for (CancerStudy cancerStudy : cancerStudies) { + ProgressMonitor.setCurrentMessage("Removing sample with stable id(s) (" + + String.join(", ", sampleIds) + + ") from study with stable id=" + cancerStudy.getCancerStudyStableId() + " ..."); + DaoSample.deleteSamples(cancerStudy.getInternalId(), sampleIds); + } + } catch (DaoException e) { + throw new RuntimeException(e); + } + ProgressMonitor.setCurrentMessage("Done removing sample(s) from study(ies)."); + } + + private void parseArgs() { + OptionParser parser = new OptionParser(); + OptionSpec studyIdsOpt = parser.accepts("study_ids", "Cancer Study ID(s; comma separated) to remove samples for.") + .withRequiredArg() + .describedAs("comma separated study ids") + .ofType(String.class); + OptionSpec sampleIdsOpt = parser.accepts("sample_ids", "Samples Stable ID(s; comma separated) to remove.") + .withRequiredArg() + .describedAs("comma separated sample ids") + .ofType(String.class); + OptionSpec help = parser.accepts("help", "print this help info"); + String progName = this.getClass().getSimpleName(); + String description = "Removes clinical sample(s) information by their stable id(s) and cancer study id(s)."; + + OptionSet options; + try { + options = parser.parse(args); + } catch (OptionException e) { + throw new UsageException(progName, description, parser, + e.getMessage()); + } + + if (options.has(help)) { + throw new UsageException(progName, description, parser); + } + if (!options.has(studyIdsOpt) || options.valueOf(studyIdsOpt) == null || "".equals(options.valueOf(studyIdsOpt).trim())) { + throw new UsageException(progName, description, parser, "'--study_ids' argument has to specify study id(s)."); + } + if (!options.has(sampleIdsOpt) || options.valueOf(sampleIdsOpt) == null || "".equals(options.valueOf(sampleIdsOpt).trim())) { + throw new UsageException(progName, description, parser, "'--sample_ids' argument has to specify sample id(s)."); + } + this.studyIds = parseCsvAsSet(options.valueOf(studyIdsOpt)); + this.sampleIds = parseCsvAsSet(options.valueOf(sampleIdsOpt)); + } + + @NotNull + private Set parseCsvAsSet(String s) { + return Arrays.stream(s.trim().split(COMMA)).filter(val -> !"".equals(val)).collect(Collectors.toUnmodifiableSet()); + } + + /** + * Makes an instance to run with the given command line arguments. + * + * @param args the command line arguments to be used + */ + public RemoveSamples(String[] args) { + super(args); + } + + /** + * Runs the command as a script and exits with an appropriate exit code. + * + * @param args the arguments given on the command line + */ + public static void main(String[] args) { + ConsoleRunnable runner = new RemoveSamples(args); + runner.runInConsole(); + } +} diff --git a/src/test/java/org/mskcc/cbio/portal/integrationTest/dao/TestDaoGeneticProfile.java b/src/test/java/org/mskcc/cbio/portal/integrationTest/dao/TestDaoGeneticProfile.java index 83e04144..22849352 100644 --- a/src/test/java/org/mskcc/cbio/portal/integrationTest/dao/TestDaoGeneticProfile.java +++ b/src/test/java/org/mskcc/cbio/portal/integrationTest/dao/TestDaoGeneticProfile.java @@ -72,7 +72,7 @@ public void setUp() throws DaoException public void testDaoGetAllGeneticProfiles() throws DaoException { ArrayList list = DaoGeneticProfile.getAllGeneticProfiles(studyId); - assertEquals(9, list.size()); + assertEquals(11, list.size()); } @Test @@ -134,12 +134,12 @@ public void testDaoDeleteGeneticProfile() throws DaoException { GeneticProfile geneticProfile = DaoGeneticProfile.getGeneticProfileById(2); - assertEquals(9, DaoGeneticProfile.getCount()); + int numberOfProfiles = DaoGeneticProfile.getCount(); DaoGeneticProfile.deleteGeneticProfile(geneticProfile); - assertEquals(8, DaoGeneticProfile.getCount()); + assertEquals(numberOfProfiles - 1, DaoGeneticProfile.getCount()); ArrayList list = DaoGeneticProfile.getAllGeneticProfiles(studyId); - assertEquals(8, list.size()); + assertEquals(numberOfProfiles - 1, list.size()); geneticProfile = list.get(0); assertEquals(studyId, geneticProfile.getCancerStudyId()); assertEquals("mRNA expression (microarray)", geneticProfile.getProfileName()); @@ -154,12 +154,10 @@ public void testDaoUpdateGeneticProfile() throws DaoException { assertTrue(DaoGeneticProfile.updateNameAndDescription( geneticProfile.getGeneticProfileId(), "Updated Name", "Updated Description")); - ArrayList list = DaoGeneticProfile.getAllGeneticProfiles(studyId); - assertEquals(9, list.size()); - geneticProfile = list.get(0); - assertEquals(studyId, geneticProfile.getCancerStudyId()); - assertEquals("Updated Name", geneticProfile.getProfileName()); - assertEquals(GeneticAlterationType.COPY_NUMBER_ALTERATION, geneticProfile.getGeneticAlterationType()); - assertEquals("Updated Description", geneticProfile.getProfileDescription()); + GeneticProfile updatedGeneticProfile = DaoGeneticProfile.getGeneticProfileById(geneticProfile.getGeneticProfileId()); + assertEquals(studyId, updatedGeneticProfile.getCancerStudyId()); + assertEquals("Updated Name", updatedGeneticProfile.getProfileName()); + assertEquals(GeneticAlterationType.COPY_NUMBER_ALTERATION, updatedGeneticProfile.getGeneticAlterationType()); + assertEquals("Updated Description", updatedGeneticProfile.getProfileDescription()); } } diff --git a/src/test/java/org/mskcc/cbio/portal/integrationTest/scripts/TestImportCnaDiscreteLongData.java b/src/test/java/org/mskcc/cbio/portal/integrationTest/scripts/TestImportCnaDiscreteLongData.java index 27eb111e..d2ab1447 100644 --- a/src/test/java/org/mskcc/cbio/portal/integrationTest/scripts/TestImportCnaDiscreteLongData.java +++ b/src/test/java/org/mskcc/cbio/portal/integrationTest/scripts/TestImportCnaDiscreteLongData.java @@ -29,7 +29,6 @@ import com.fasterxml.jackson.databind.ObjectMapper; import org.junit.After; -import org.junit.Assert; import org.junit.Before; import org.junit.Test; import org.junit.runner.RunWith; @@ -178,7 +177,6 @@ public void testImportCnaDiscreteLongDataAddsCnaEvents() throws Exception { @Test public void testImportCnaDiscreteLongDataAddsGeneticAlterations() throws Exception { List beforeGeneticAlterations = getAllGeneticAlterations(); - assertEquals(57, beforeGeneticAlterations.size()); File file = new File("src/test/resources/data_cna_discrete_import_test.txt"); new ImportCnaDiscreteLongData( @@ -201,9 +199,6 @@ public void testImportCnaDiscreteLongDataAddsGeneticAlterations() throws Excepti */ @Test public void testImportCnaDiscreteLongDataAddsMissingGeneticAlterations() throws Exception { - List beforeGeneticAlterations = getAllGeneticAlterations(); - assertEquals(57, beforeGeneticAlterations.size()); - File file = new File("src/test/resources/data_cna_discrete_import_test_with_cna_events_missing.txt"); new ImportCnaDiscreteLongData( file, @@ -228,9 +223,6 @@ public void testImportCnaDiscreteLongDataAddsMissingGeneticAlterations() throws */ @Test public void testImportCnaDiscreteLongDataAddsGeneticAlterationsAndProfileSamplesInCorrectOrder() throws Exception { - List beforeGeneticAlterations = getAllGeneticAlterations(); - assertEquals(57, beforeGeneticAlterations.size()); - File file = new File("src/test/resources/data_cna_discrete_import_test.txt"); new ImportCnaDiscreteLongData( file, @@ -254,9 +246,6 @@ public void testImportCnaDiscreteLongDataAddsGeneticAlterationsAndProfileSamples */ @Test public void testImportCnaDiscreteLongDataHandlesEntriesWithoutEntrezButWithHugo() throws Exception { - List beforeGeneticAlterations = getAllGeneticAlterations(); - assertEquals(57, beforeGeneticAlterations.size()); - File file = new File("src/test/resources/data_cna_discrete_import_test_without_entrez_with_hugo.txt"); new ImportCnaDiscreteLongData( file, @@ -276,9 +265,6 @@ public void testImportCnaDiscreteLongDataHandlesEntriesWithoutEntrezButWithHugo( */ @Test public void testImportCnaDiscreteLongDataHandlesEntriesWithWrongEntrezAndCorrectHugo() throws Exception { - List beforeGeneticAlterations = getAllGeneticAlterations(); - assertEquals(57, beforeGeneticAlterations.size()); - File file = new File("src/test/resources/data_cna_discrete_import_test_with_wrong_entrez_and_correct_hugo.txt"); new ImportCnaDiscreteLongData( file, @@ -298,9 +284,6 @@ public void testImportCnaDiscreteLongDataHandlesEntriesWithWrongEntrezAndCorrect */ @Test public void testImportCnaDiscreteLongDataAddsGeneticAlterationsFromNonCnaEvents() throws Exception { - List beforeGeneticAlterations = getAllGeneticAlterations(); - assertEquals(57, beforeGeneticAlterations.size()); - File file = new File("src/test/resources/data_cna_discrete_import_test.txt"); new ImportCnaDiscreteLongData( file, @@ -325,9 +308,6 @@ public void testImportCnaDiscreteLongDataAddsGeneticAlterationsFromNonCnaEvents( */ @Test public void testImportCnaDiscreteLongDataIgnoresLineWithDuplicateGene() throws Exception { - List beforeGeneticAlterations = getAllGeneticAlterations(); - assertEquals(57, beforeGeneticAlterations.size()); - File file = new File("src/test/resources/data_cna_discrete_import_test.txt"); new ImportCnaDiscreteLongData( file, @@ -544,6 +524,12 @@ public void testImportCnaDiscreteLongDataImportsCustomNamespaceColumnsAsNullWhen String expectedAnnotationJson = null; assertEquals(expectedAnnotationJson, results.get(0).annotationJson); } + + @Test + public void testGetAllGeneticAlterations() throws DaoException { + List geneticAlterations = getAllGeneticAlterations(); + assertEquals(59, geneticAlterations.size()); + } private List createPrimaryKeys(String sample, List cnaEvents) { return cnaEvents.stream().map(e -> { diff --git a/src/test/java/org/mskcc/cbio/portal/integrationTest/scripts/TestRemovePatients.java b/src/test/java/org/mskcc/cbio/portal/integrationTest/scripts/TestRemovePatients.java new file mode 100644 index 00000000..634c8d12 --- /dev/null +++ b/src/test/java/org/mskcc/cbio/portal/integrationTest/scripts/TestRemovePatients.java @@ -0,0 +1,228 @@ +/* + * This file is part of cBioPortal. + * + * cBioPortal is free software: you can redistribute it and/or modify + * it under the terms of the GNU Affero General Public License as + * published by the Free Software Foundation, either version 3 of the + * License. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Affero General Public License for more details. + * + * You should have received a copy of the GNU Affero General Public License + * along with this program. If not, see . + */ + +package org.mskcc.cbio.portal.integrationTest.scripts; + +import io.swagger.models.auth.In; +import org.junit.Test; +import org.junit.runner.RunWith; +import org.mskcc.cbio.portal.dao.DaoCancerStudy; +import org.mskcc.cbio.portal.dao.DaoException; +import org.mskcc.cbio.portal.dao.DaoGeneticAlteration; +import org.mskcc.cbio.portal.dao.DaoGeneticProfile; +import org.mskcc.cbio.portal.dao.DaoGeneticProfileSamples; +import org.mskcc.cbio.portal.dao.DaoPatient; +import org.mskcc.cbio.portal.dao.DaoSample; +import org.mskcc.cbio.portal.model.CancerStudy; +import org.mskcc.cbio.portal.model.GeneticProfile; +import org.mskcc.cbio.portal.model.Patient; +import org.mskcc.cbio.portal.model.Sample; +import org.mskcc.cbio.portal.scripts.RemovePatients; +import org.springframework.test.annotation.Rollback; +import org.springframework.test.context.ContextConfiguration; +import org.springframework.test.context.junit4.SpringJUnit4ClassRunner; +import org.springframework.transaction.annotation.Transactional; + +import java.util.HashMap; +import java.util.List; +import java.util.Map; +import java.util.stream.Stream; + +import static org.hamcrest.MatcherAssert.assertThat; +import static org.hamcrest.Matchers.containsString; +import static org.junit.Assert.assertEquals; +import static org.junit.Assert.assertThrows; +import static org.junit.Assert.assertTrue; +import static org.junit.jupiter.api.Assertions.assertFalse; + +/** + * JUnit tests for RemovePatients class. + * + * @author Ruslan Forostianov + * @author Pieter Lukasse + */ +@RunWith(SpringJUnit4ClassRunner.class) +@ContextConfiguration(locations = {"classpath:/applicationContext-dao.xml"}) +@Rollback +@Transactional +public class TestRemovePatients { + + @Test + public void testRemovePatients() throws DaoException { + String patient1StableId = "TCGA-A1-A0SB"; + String patient2StableId = "TCGA-A1-A0SD"; + CancerStudy cancerStudy = DaoCancerStudy.getCancerStudyByStableId("study_tcga_pub"); + int patient1InternalId = DaoPatient.getPatientByCancerStudyAndPatientId(cancerStudy.getInternalId(), patient1StableId).getInternalId(); + List patient1InternalsSampleIds = DaoSample.getSamplesByPatientId(patient1InternalId).stream().map(Sample::getInternalPatientId).toList(); + int patient2InternalId = DaoPatient.getPatientByCancerStudyAndPatientId(cancerStudy.getInternalId(), patient2StableId).getInternalId(); + List patient2InternalsSampleIds = DaoSample.getSamplesByPatientId(patient2InternalId).stream().map(Sample::getInternalPatientId).toList(); + List beforePatientIds = DaoPatient.getPatientsByCancerStudyId(cancerStudy.getInternalId()).stream().map(Patient::getInternalId).toList(); + assertTrue(beforePatientIds.contains(patient1InternalId)); + assertTrue(beforePatientIds.contains(patient2InternalId)); + + new RemovePatients(new String[]{ + "--study_ids", "study_tcga_pub", + "--patient_ids", "TCGA-A1-A0SB,TCGA-A1-A0SD" + }).run(); + + DaoPatient.reCache(); + + List afterPatientIds = DaoPatient.getPatientsByCancerStudyId(cancerStudy.getInternalId()).stream().map(Patient::getInternalId).toList(); + assertFalse(afterPatientIds.contains(patient1InternalId)); + assertFalse(afterPatientIds.contains(patient2InternalId)); + assertEquals(beforePatientIds.size() - 2, afterPatientIds.size()); + + List geneticProfiles = Stream.of("study_tcga_pub_gistic", "study_tcga_pub_mrna", "study_tcga_pub_log2CNA", + "study_tcga_pub_rppa", "study_tcga_pub_treatment_ic50").map(DaoGeneticProfile::getGeneticProfileByStableId).toList(); + for (GeneticProfile geneticProfile : geneticProfiles) { + HashMap> geneticAlterationMapForEntityIds = DaoGeneticAlteration.getInstance() + .getGeneticAlterationMapForEntityIds(geneticProfile.getGeneticProfileId(), null); + for (Map.Entry> gaEntry : geneticAlterationMapForEntityIds.entrySet()) { + for (Integer patient1InternalsSampleId : patient1InternalsSampleIds) { + assertFalse(gaEntry.getValue().containsKey(patient1InternalsSampleId), + "Genetic entity with id " + + gaEntry.getKey() + + " of " + geneticProfile.getStableId() + "genetic profile" + + " must have all samples of " + patient1StableId + " patient deleted"); + } + for (Integer patient2InternalsSampleId : patient2InternalsSampleIds) { + assertFalse(gaEntry.getValue().containsKey(patient2InternalsSampleId), + "Genetic entity with id " + + gaEntry.getKey() + + " of " + geneticProfile.getStableId() + "genetic profile" + + " must have all samples of " + patient2StableId + " patient deleted"); + } + } + } + int studyTcgaPubMethylationHm27 = DaoGeneticProfile.getGeneticProfileByStableId("study_tcga_pub_methylation_hm27").getGeneticProfileId(); + assertTrue("The methylation platform has to loose it's last sample", DaoGeneticProfileSamples.getOrderedSampleList( + studyTcgaPubMethylationHm27).isEmpty()); + } + + @Test + public void testStudyIdsOptionIsRequired() { + RuntimeException runtimeException = assertThrows(RuntimeException.class, () -> + new RemovePatients(new String[]{ + "--patient_ids", "TCGA-A1-A0SB-01" + }).run() + ); + assertThat(runtimeException.getMessage(), + containsString("'--study_ids' argument has to specify study id")); + } + + @Test + public void testStudyIdsOptionValueIsRequired() { + RuntimeException runtimeException = assertThrows(RuntimeException.class, () -> + new RemovePatients(new String[]{ + "--study_ids", "", + "--patient_ids", "TCGA-A1-A0SB" + }).run() + ); + assertThat(runtimeException.getMessage(), + containsString("'--study_ids' argument has to specify study id")); + } + + @Test + public void testPatientIdsOptionIsRequired() { + RuntimeException runtimeException = assertThrows(RuntimeException.class, () -> + new RemovePatients(new String[]{ + "--study_ids", "study_tcga_pub", + }).run() + ); + assertThat(runtimeException.getMessage(), + containsString("'--patient_ids' argument has to specify patient id")); + } + + @Test + public void testPatientIdsOptionValueIsRequired() { + RuntimeException runtimeException = assertThrows(RuntimeException.class, () -> + new RemovePatients(new String[]{ + "--study_ids", "study_tcga_pub", + "--patient_ids", "" + }).run() + ); + assertThat(runtimeException.getMessage(), + containsString("'--patient_ids' argument has to specify patient id")); + } + + @Test + public void testNoStudyExists() { + RuntimeException runtimeException = assertThrows(RuntimeException.class, () -> + new RemovePatients(new String[]{ + "--study_ids", "study_tcga_pub,non_existing_study", + "--patient_ids", "TCGA-A1-A0SB" + }).run() + ); + assertThat(runtimeException.getMessage(), + containsString("Cancer study with stable id=non_existing_study not found.")); + } + @Test + public void testNoPatientExists() { + RuntimeException runtimeException = assertThrows(RuntimeException.class, () -> + new RemovePatients(new String[]{ + "--study_ids", "study_tcga_pub", + "--patient_ids", "TCGA-A1-A0SB,NON_EXISTING_PATIENT" + }).run() + ); + assertThat(runtimeException.getMessage(), + containsString("Patient with stable id=NON_EXISTING_PATIENT not found in study with internal id=")); + } + + @Test + public void testRollbackPatientRemovalWithGsvaScore() throws DaoException { + GeneticProfile gsvaScoreGeneticProfile = DaoGeneticProfile.getGeneticProfileByStableId("study_tcga_pub_gsva_scores"); + HashMap> beforeData = DaoGeneticAlteration.getInstance() + .getGeneticAlterationMapForEntityIds(gsvaScoreGeneticProfile.getGeneticProfileId(), null); + + RuntimeException runtimeException = assertThrows(RuntimeException.class, () -> + new RemovePatients(new String[]{ + "--study_ids", "study_tcga_pub", + "--patient_ids", "TCGA-TEST-PATIENT-21" + }).run() + ); + assertThat(runtimeException.getMessage(), + containsString("Sample(s) with stable id TCGA-TEST-SAMPLE-22 can't be removed as it contains GSVA data." + + " Consider dropping and re-uploading the whole study.")); + + HashMap> afterData = DaoGeneticAlteration.getInstance() + .getGeneticAlterationMapForEntityIds(gsvaScoreGeneticProfile.getGeneticProfileId(), null); + + assertEquals(beforeData, afterData); + } + + @Test + public void testRollbackPatientRemovalWithGsvaPvalue() throws DaoException { + GeneticProfile gsvaPvalueGeneticProfile = DaoGeneticProfile.getGeneticProfileByStableId("study_tcga_pub_gsva_pvalues"); + HashMap> beforeData = DaoGeneticAlteration.getInstance() + .getGeneticAlterationMapForEntityIds(gsvaPvalueGeneticProfile.getGeneticProfileId(), null); + + RuntimeException runtimeException = assertThrows(RuntimeException.class, () -> + new RemovePatients(new String[]{ + "--study_ids", "study_tcga_pub", + "--patient_ids", "TCGA-TEST-PATIENT-22" + }).run() + ); + assertThat(runtimeException.getMessage(), + containsString("Sample(s) with stable id TCGA-TEST-SAMPLE-23 can't be removed as it contains GSVA data." + + " Consider dropping and re-uploading the whole study.")); + + HashMap> afterData = DaoGeneticAlteration.getInstance() + .getGeneticAlterationMapForEntityIds(gsvaPvalueGeneticProfile.getGeneticProfileId(), null); + + assertEquals(beforeData, afterData); + } +} diff --git a/src/test/java/org/mskcc/cbio/portal/integrationTest/scripts/TestRemoveSamples.java b/src/test/java/org/mskcc/cbio/portal/integrationTest/scripts/TestRemoveSamples.java new file mode 100644 index 00000000..78b3f3f9 --- /dev/null +++ b/src/test/java/org/mskcc/cbio/portal/integrationTest/scripts/TestRemoveSamples.java @@ -0,0 +1,218 @@ +/* + * This file is part of cBioPortal. + * + * cBioPortal is free software: you can redistribute it and/or modify + * it under the terms of the GNU Affero General Public License as + * published by the Free Software Foundation, either version 3 of the + * License. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Affero General Public License for more details. + * + * You should have received a copy of the GNU Affero General Public License + * along with this program. If not, see . + */ + +package org.mskcc.cbio.portal.integrationTest.scripts; + +import org.junit.Test; +import org.junit.runner.RunWith; +import org.mskcc.cbio.portal.dao.DaoCancerStudy; +import org.mskcc.cbio.portal.dao.DaoException; +import org.mskcc.cbio.portal.dao.DaoGeneticAlteration; +import org.mskcc.cbio.portal.dao.DaoGeneticProfile; +import org.mskcc.cbio.portal.dao.DaoGeneticProfileSamples; +import org.mskcc.cbio.portal.dao.DaoSample; +import org.mskcc.cbio.portal.model.CancerStudy; +import org.mskcc.cbio.portal.model.GeneticProfile; +import org.mskcc.cbio.portal.scripts.RemoveSamples; +import org.springframework.test.annotation.Rollback; +import org.springframework.test.context.ContextConfiguration; +import org.springframework.test.context.junit4.SpringJUnit4ClassRunner; +import org.springframework.transaction.annotation.Transactional; + +import java.util.HashMap; +import java.util.List; +import java.util.Map; +import java.util.stream.Stream; + +import static org.hamcrest.MatcherAssert.assertThat; +import static org.hamcrest.Matchers.containsString; +import static org.junit.Assert.assertEquals; +import static org.junit.Assert.assertThrows; +import static org.junit.Assert.assertTrue; +import static org.junit.jupiter.api.Assertions.assertFalse; + +/** + * JUnit tests for RemoveSamples class. + * + * @author Ruslan Forostianov + * @author Pieter Lukasse + */ +@RunWith(SpringJUnit4ClassRunner.class) +@ContextConfiguration(locations = {"classpath:/applicationContext-dao.xml"}) +@Rollback +@Transactional +public class TestRemoveSamples { + + @Test + public void testRemoveSamples() throws DaoException { + String sample1StableId = "TCGA-A1-A0SB-01"; + String sample2StableId = "TCGA-A1-A0SD-01"; + CancerStudy cancerStudy = DaoCancerStudy.getCancerStudyByStableId("study_tcga_pub"); + List beforeSampleIds = DaoSample.getSampleStableIdsByCancerStudy(cancerStudy.getInternalId()); + assertTrue(beforeSampleIds.contains(sample1StableId)); + assertTrue(beforeSampleIds.contains(sample2StableId)); + int sample1InternalId = DaoSample.getSampleByCancerStudyAndSampleId(cancerStudy.getInternalId(), sample1StableId).getInternalId(); + int sample2InternalId = DaoSample.getSampleByCancerStudyAndSampleId(cancerStudy.getInternalId(), sample2StableId).getInternalId(); + + new RemoveSamples(new String[]{ + "--study_ids", "study_tcga_pub", + "--sample_ids", "TCGA-A1-A0SB-01,TCGA-A1-A0SD-01" + }).run(); + + DaoSample.reCache(); + + List afterSampleIds = DaoSample.getSampleStableIdsByCancerStudy(cancerStudy.getInternalId()); + assertFalse(afterSampleIds.contains(sample1StableId)); + assertFalse(afterSampleIds.contains(sample2StableId)); + assertEquals(beforeSampleIds.size() - 2, afterSampleIds.size()); + + List geneticProfiles = Stream.of("study_tcga_pub_gistic", "study_tcga_pub_mrna", "study_tcga_pub_log2CNA", + "study_tcga_pub_rppa", "study_tcga_pub_treatment_ic50").map(DaoGeneticProfile::getGeneticProfileByStableId).toList(); + for (GeneticProfile geneticProfile : geneticProfiles) { + HashMap> geneticAlterationMapForEntityIds = DaoGeneticAlteration.getInstance() + .getGeneticAlterationMapForEntityIds(geneticProfile.getGeneticProfileId(), null); + for (Map.Entry> gaEntry : geneticAlterationMapForEntityIds.entrySet()) { + assertFalse(gaEntry.getValue().containsKey(sample1InternalId), + "Genetic entity with id " + + gaEntry.getKey() + + " of " + geneticProfile.getStableId() + "genetic profile" + + " must have " + sample1StableId + " sample deleted"); + assertFalse(gaEntry.getValue().containsKey(sample2InternalId), + "Genetic entity with id " + + gaEntry.getKey() + + " of " + geneticProfile.getStableId() + "genetic profile" + + " must have " + sample2StableId + " sample deleted"); + } + } + int studyTcgaPubMethylationHm27 = DaoGeneticProfile.getGeneticProfileByStableId("study_tcga_pub_methylation_hm27").getGeneticProfileId(); + assertTrue("The methylation platform has to loose it's last sample", DaoGeneticProfileSamples.getOrderedSampleList( + studyTcgaPubMethylationHm27).isEmpty()); + } + + @Test + public void testStudyIdsOptionIsRequired() { + RuntimeException runtimeException = assertThrows(RuntimeException.class, () -> + new RemoveSamples(new String[]{ + "--sample_ids", "TCGA-A1-A0SB-01" + }).run() + ); + assertThat(runtimeException.getMessage(), + containsString("'--study_ids' argument has to specify study id")); + } + + @Test + public void testStudyIdsOptionValueIsRequired() { + RuntimeException runtimeException = assertThrows(RuntimeException.class, () -> + new RemoveSamples(new String[]{ + "--study_ids", "", + "--sample_ids", "TCGA-A1-A0SB-01" + }).run() + ); + assertThat(runtimeException.getMessage(), + containsString("'--study_ids' argument has to specify study id")); + } + + @Test + public void testSampleIdsOptionIsRequired() { + RuntimeException runtimeException = assertThrows(RuntimeException.class, () -> + new RemoveSamples(new String[]{ + "--study_ids", "study_tcga_pub", + }).run() + ); + assertThat(runtimeException.getMessage(), + containsString("'--sample_ids' argument has to specify sample id")); + } + + @Test + public void testSampleIdsOptionValueIsRequired() { + RuntimeException runtimeException = assertThrows(RuntimeException.class, () -> + new RemoveSamples(new String[]{ + "--study_ids", "study_tcga_pub", + "--sample_ids", "" + }).run() + ); + assertThat(runtimeException.getMessage(), + containsString("'--sample_ids' argument has to specify sample id")); + } + + @Test + public void testNoStudyExists() { + RuntimeException runtimeException = assertThrows(RuntimeException.class, () -> + new RemoveSamples(new String[]{ + "--study_ids", "study_tcga_pub,non_existing_study", + "--sample_ids", "TCGA-A1-A0SB-01" + }).run() + ); + assertThat(runtimeException.getMessage(), + containsString("Cancer study with stable id=non_existing_study not found.")); + } + @Test + public void testNoSampleExists() { + RuntimeException runtimeException = assertThrows(RuntimeException.class, () -> + new RemoveSamples(new String[]{ + "--study_ids", "study_tcga_pub", + "--sample_ids", "TCGA-A1-A0SB-01,NON_EXISTING_SAMPLE" + }).run() + ); + assertThat(runtimeException.getMessage(), + containsString("Sample with stable id=NON_EXISTING_SAMPLE not found in study with internal id=")); + } + + @Test + public void testRollbackSampleRemovalWithGsvaScore() throws DaoException { + GeneticProfile gsvaScoreGeneticProfile = DaoGeneticProfile.getGeneticProfileByStableId("study_tcga_pub_gsva_scores"); + HashMap> beforeData = DaoGeneticAlteration.getInstance() + .getGeneticAlterationMapForEntityIds(gsvaScoreGeneticProfile.getGeneticProfileId(), null); + + RuntimeException runtimeException = assertThrows(RuntimeException.class, () -> + new RemoveSamples(new String[]{ + "--study_ids", "study_tcga_pub", + "--sample_ids", "TCGA-TEST-SAMPLE-22" + }).run() + ); + assertThat(runtimeException.getMessage(), + containsString("Sample(s) with stable id TCGA-TEST-SAMPLE-22 can't be removed as it contains GSVA data." + + " Consider dropping and re-uploading the whole study.")); + + HashMap> afterData = DaoGeneticAlteration.getInstance() + .getGeneticAlterationMapForEntityIds(gsvaScoreGeneticProfile.getGeneticProfileId(), null); + + assertEquals(beforeData, afterData); + } + + @Test + public void testRollbackSampleRemovalWithGsvaPvalue() throws DaoException { + GeneticProfile gsvaPvalueGeneticProfile = DaoGeneticProfile.getGeneticProfileByStableId("study_tcga_pub_gsva_pvalues"); + HashMap> beforeData = DaoGeneticAlteration.getInstance() + .getGeneticAlterationMapForEntityIds(gsvaPvalueGeneticProfile.getGeneticProfileId(), null); + + RuntimeException runtimeException = assertThrows(RuntimeException.class, () -> + new RemoveSamples(new String[]{ + "--study_ids", "study_tcga_pub", + "--sample_ids", "TCGA-TEST-SAMPLE-23" + }).run() + ); + assertThat(runtimeException.getMessage(), + containsString("Sample(s) with stable id TCGA-TEST-SAMPLE-23 can't be removed as it contains GSVA data." + + " Consider dropping and re-uploading the whole study.")); + + HashMap> afterData = DaoGeneticAlteration.getInstance() + .getGeneticAlterationMapForEntityIds(gsvaPvalueGeneticProfile.getGeneticProfileId(), null); + + assertEquals(beforeData, afterData); + } +} diff --git a/src/test/resources/seed_mini.sql b/src/test/resources/seed_mini.sql index 5ffe18da..b660316b 100644 --- a/src/test/resources/seed_mini.sql +++ b/src/test/resources/seed_mini.sql @@ -398,6 +398,8 @@ INSERT INTO "genetic_profile" ("GENETIC_PROFILE_ID","STABLE_ID","CANCER_STUDY_ID INSERT INTO "genetic_profile" ("GENETIC_PROFILE_ID","STABLE_ID","CANCER_STUDY_ID","GENETIC_ALTERATION_TYPE","DATATYPE","NAME","DESCRIPTION","SHOW_PROFILE_IN_ANALYSIS_TAB") VALUES (8,'study_tcga_pub_cna_long',1,'COPY_NUMBER_ALTERATION','DISCRETE_LONG','CNA values','CNA values dummy data','1'); INSERT INTO "genetic_profile" ("GENETIC_PROFILE_ID","STABLE_ID","CANCER_STUDY_ID","GENETIC_ALTERATION_TYPE","DATATYPE","NAME","DESCRIPTION","SHOW_PROFILE_IN_ANALYSIS_TAB") VALUES (9,'study_tcga_pub_rppa',1,'PROTEIN_LEVEL','LOG2-VALUE','RPPA values','RPPA values dummy data','0'); INSERT INTO "genetic_profile" ("GENETIC_PROFILE_ID","STABLE_ID","CANCER_STUDY_ID","GENETIC_ALTERATION_TYPE","DATATYPE","NAME","DESCRIPTION","SHOW_PROFILE_IN_ANALYSIS_TAB") VALUES (10,'study_tcga_pub_treatment_ic50',1,'GENERIC_ASSAY','LIMIT-VALUE','test treatment values','treatment values dummy data','0'); +INSERT INTO "genetic_profile" ("GENETIC_PROFILE_ID","STABLE_ID","CANCER_STUDY_ID","GENETIC_ALTERATION_TYPE","DATATYPE","NAME","DESCRIPTION","SHOW_PROFILE_IN_ANALYSIS_TAB") VALUES (11,'study_tcga_pub_gsva_scores',1,'GENESET_SCORE','GSVA-SCORE','test gsva score values','test gsva score description','0'); +INSERT INTO "genetic_profile" ("GENETIC_PROFILE_ID","STABLE_ID","CANCER_STUDY_ID","GENETIC_ALTERATION_TYPE","DATATYPE","NAME","DESCRIPTION","SHOW_PROFILE_IN_ANALYSIS_TAB") VALUES (12,'study_tcga_pub_gsva_pvalues',1,'GENESET_SCORE','P-VALUE','test gsva p-values','test gsva p-values description','0'); -- gene_panel INSERT INTO gene_panel (INTERNAL_ID,STABLE_ID,DESCRIPTION) VALUES (1,'TESTPANEL_CNA_DISCRETE_LONG_FORMAT','Some test panel'); @@ -466,6 +468,10 @@ INSERT INTO "genetic_alteration" ("GENETIC_PROFILE_ID","GENETIC_ENTITY_ID","VALU INSERT INTO "genetic_alteration" ("GENETIC_PROFILE_ID","GENETIC_ENTITY_ID","VALUES") VALUES (10,(Select "ID" from "genetic_entity" where "STABLE_ID" = 'Irinotecan'),'>8,7.1,'); INSERT INTO "genetic_alteration" ("GENETIC_PROFILE_ID","GENETIC_ENTITY_ID","VALUES") VALUES (10,(Select "ID" from "genetic_entity" where "STABLE_ID" = 'L-685458'),'>4.6,7.2,'); INSERT INTO "genetic_alteration" ("GENETIC_PROFILE_ID","GENETIC_ENTITY_ID","VALUES") VALUES (10,(Select "ID" from "genetic_entity" where "STABLE_ID" = 'Lapatinib'),'6.9,>~8,'); +-- GSVA score +INSERT INTO "genetic_alteration" ("GENETIC_PROFILE_ID","GENETIC_ENTITY_ID","VALUES") VALUES (11,(Select "GENETIC_ENTITY_ID" from "gene" where "ENTREZ_GENE_ID" = 675),'0.1,'); +-- GSVA p-value +INSERT INTO "genetic_alteration" ("GENETIC_PROFILE_ID","GENETIC_ENTITY_ID","VALUES") VALUES (12,(Select "GENETIC_ENTITY_ID" from "gene" where "ENTREZ_GENE_ID" = 675),'0.2,'); -- genetic_profile_samples INSERT INTO "genetic_profile_samples" ("GENETIC_PROFILE_ID","ORDERED_SAMPLE_LIST") VALUES (2,'1,2,3,4,5,6,7,8,9,10,11,12,13,14,'); @@ -474,6 +480,8 @@ INSERT INTO "genetic_profile_samples" ("GENETIC_PROFILE_ID","ORDERED_SAMPLE_LIST INSERT INTO "genetic_profile_samples" ("GENETIC_PROFILE_ID","ORDERED_SAMPLE_LIST") VALUES (5,'2,'); INSERT INTO "genetic_profile_samples" ("GENETIC_PROFILE_ID","ORDERED_SAMPLE_LIST") VALUES (9,'2,3,6,8,9,10,12,13,'); INSERT INTO "genetic_profile_samples" ("GENETIC_PROFILE_ID","ORDERED_SAMPLE_LIST") VALUES (10,'2,3,'); +INSERT INTO "genetic_profile_samples" ("GENETIC_PROFILE_ID","ORDERED_SAMPLE_LIST") VALUES (11,'22,'); +INSERT INTO "genetic_profile_samples" ("GENETIC_PROFILE_ID","ORDERED_SAMPLE_LIST") VALUES (12,'23,'); -- patient INSERT INTO "patient" ("INTERNAL_ID","STABLE_ID","CANCER_STUDY_ID") VALUES (1,'TCGA-A1-A0SB',1); @@ -496,6 +504,8 @@ INSERT INTO "patient" ("INTERNAL_ID","STABLE_ID","CANCER_STUDY_ID") VALUES (17,' INSERT INTO "patient" ("INTERNAL_ID","STABLE_ID","CANCER_STUDY_ID") VALUES (18,'TCGA-AA-3665',1); INSERT INTO "patient" ("INTERNAL_ID","STABLE_ID","CANCER_STUDY_ID") VALUES (19,'TCGA-A2-A04U',1); INSERT INTO "patient" ("INTERNAL_ID","STABLE_ID","CANCER_STUDY_ID") VALUES (20,'TCGA-A1-A0SB',1); +INSERT INTO "patient" ("INTERNAL_ID","STABLE_ID","CANCER_STUDY_ID") VALUES (21,'TCGA-TEST-PATIENT-21',1); +INSERT INTO "patient" ("INTERNAL_ID","STABLE_ID","CANCER_STUDY_ID") VALUES (22,'TCGA-TEST-PATIENT-22',1); -- sample INSERT INTO "sample" ("INTERNAL_ID","STABLE_ID","SAMPLE_TYPE","PATIENT_ID") VALUES (1,'TCGA-A1-A0SB-01','Primary Solid Tumor',1); @@ -519,6 +529,8 @@ INSERT INTO "sample" ("INTERNAL_ID","STABLE_ID","SAMPLE_TYPE","PATIENT_ID") VALU INSERT INTO "sample" ("INTERNAL_ID","STABLE_ID","SAMPLE_TYPE","PATIENT_ID") VALUES (19,'TCGA-A1-A0SB-02','Primary Solid Tumor',1); INSERT INTO "sample" ("INTERNAL_ID","STABLE_ID","SAMPLE_TYPE","PATIENT_ID") VALUES (20,'TCGA-A2-A04U-11','Solid Tissues Normal',19); INSERT INTO "sample" ("INTERNAL_ID","STABLE_ID","SAMPLE_TYPE","PATIENT_ID") VALUES (21,'TCGA-A1-A0SB-11','Solid Tissues Normal',20); +INSERT INTO "sample" ("INTERNAL_ID","STABLE_ID","SAMPLE_TYPE","PATIENT_ID") VALUES (22,'TCGA-TEST-SAMPLE-22','Primary Solid Tumor',21); +INSERT INTO "sample" ("INTERNAL_ID","STABLE_ID","SAMPLE_TYPE","PATIENT_ID") VALUES (23,'TCGA-TEST-SAMPLE-23','Primary Solid Tumor',22); -- mutation_event INSERT INTO "mutation_event" ("MUTATION_EVENT_ID","ENTREZ_GENE_ID","CHR","START_POSITION","END_POSITION","REFERENCE_ALLELE","TUMOR_SEQ_ALLELE","PROTEIN_CHANGE","MUTATION_TYPE","NCBI_BUILD","STRAND","VARIANT_TYPE","DB_SNP_RS","DB_SNP_VAL_STATUS","REFSEQ_MRNA_ID","CODON_CHANGE","UNIPROT_ACCESSION","PROTEIN_POS_START","PROTEIN_POS_END","CANONICAL_TRANSCRIPT","KEYWORD") VALUES (2038,672,'17',41244748,41244748,'G','A','Q934*','Nonsense_Mutation','37','+','SNP','rs80357223','unknown','NM_007294','c.(2800-2802)CAG>TAG','P38398',934,934,1,'BRCA1 truncating'); @@ -678,6 +690,8 @@ INSERT INTO "sample_profile" ("SAMPLE_ID","GENETIC_PROFILE_ID","PANEL_ID") VALUE INSERT INTO "sample_profile" ("SAMPLE_ID","GENETIC_PROFILE_ID","PANEL_ID") VALUES (13,4,NULL); INSERT INTO "sample_profile" ("SAMPLE_ID","GENETIC_PROFILE_ID","PANEL_ID") VALUES (14,2,NULL); INSERT INTO "sample_profile" ("SAMPLE_ID","GENETIC_PROFILE_ID","PANEL_ID") VALUES (14,4,NULL); +INSERT INTO "sample_profile" ("SAMPLE_ID","GENETIC_PROFILE_ID","PANEL_ID") VALUES (22,11,NULL); +INSERT INTO "sample_profile" ("SAMPLE_ID","GENETIC_PROFILE_ID","PANEL_ID") VALUES (23,12,NULL); -- users INSERT INTO users (EMAIL,NAME,ENABLED) values ('jami@gmail.com','Jami Bax',1); diff --git a/tests/system_tests_import_data.py b/tests/system_tests_import_data.py index 9c30cdef..78d894d1 100755 --- a/tests/system_tests_import_data.py +++ b/tests/system_tests_import_data.py @@ -148,6 +148,39 @@ def test_incremental_load(self, run_java, locate_jar): self.assertTrue(run_java.call_args_list.index(clinical_sample_call) < run_java.call_args_list.index(mutation_call)) self.assertTrue(run_java.call_args_list.index(clinical_sample_call) < run_java.call_args_list.index(case_list_call)) + @mock.patch('importer.cbioportalImporter.locate_jar') + @mock.patch('importer.cbioportalImporter.run_java') + def test_remove_samples(self, run_java, locate_jar): + ''' + Tests java commands removal of sample in study produces + ''' + locate_jar.return_value = "test.jar" + + args = ['remove-samples', '--study_ids', 'STUDY1,STUDY2', '--sample_ids', 'SAMPLE1,SAMPLE2'] + parsed_args = cbioportalImporter.interface(args) + cbioportalImporter.main(parsed_args) + + self.assertCountEqual(run_java.call_args_list, [ + call(*common_part, 'org.mskcc.cbio.portal.util.VersionUtil',), + call(*common_part, 'org.mskcc.cbio.portal.scripts.RemoveSamples', '--study_ids', 'STUDY1,STUDY2', '--sample_ids', 'SAMPLE1,SAMPLE2'), + ]) + + @mock.patch('importer.cbioportalImporter.locate_jar') + @mock.patch('importer.cbioportalImporter.run_java') + def test_remove_patients(self, run_java, locate_jar): + ''' + Tests java commands removal of patient in study produces + ''' + locate_jar.return_value = "test.jar" + + args = ['remove-patients', '--study_ids', 'STUDY1,STUDY2', '--patient_ids', 'PATIENT1,PATIENT2'] + parsed_args = cbioportalImporter.interface(args) + cbioportalImporter.main(parsed_args) + + self.assertCountEqual(run_java.call_args_list, [ + call(*common_part, 'org.mskcc.cbio.portal.util.VersionUtil',), + call(*common_part, 'org.mskcc.cbio.portal.scripts.RemovePatients', '--study_ids', 'STUDY1,STUDY2', '--patient_ids', 'PATIENT1,PATIENT2'), + ]) if __name__ == '__main__': unittest.main(buffer=True)