From 8d3aaed61cca23f08de3849d67f0a3bd470af352 Mon Sep 17 00:00:00 2001 From: Ruslan Forostianov Date: Wed, 1 May 2024 14:35:20 +0200 Subject: [PATCH] Add python tests for incremental/full data import --- scripts/importer/cbioportalImporter.py | 14 ++-- tests/system_tests_import_data.py | 93 ++++++++++++++++++++++++++ 2 files changed, 100 insertions(+), 7 deletions(-) create mode 100755 tests/system_tests_import_data.py diff --git a/scripts/importer/cbioportalImporter.py b/scripts/importer/cbioportalImporter.py index f059282e..3fa2040b 100755 --- a/scripts/importer/cbioportalImporter.py +++ b/scripts/importer/cbioportalImporter.py @@ -207,7 +207,7 @@ def check_version(jvm_args): raise def process_case_lists(jvm_args, case_list_dir): - for case_list in os.listdir(case_list_dir): + for case_list in sorted(os.listdir(case_list_dir)): # skip "temp"/backup files made by some text editors: if not (case_list.startswith('.') or case_list.endswith('~')): import_case_list(jvm_args, os.path.join(case_list_dir, case_list)) @@ -232,13 +232,13 @@ def process_command(jvm_args, command, meta_filename, data_filename, study_ids, import_case_list(jvm_args, meta_filename) def get_meta_filenames(data_directory): - meta_filenames = ( + meta_filenames = [ os.path.join(data_directory, meta_filename) for meta_filename in os.listdir(data_directory) if re.search(r'(\b|_)meta(\b|[_0-9])', meta_filename, flags=re.IGNORECASE) and - not (meta_filename.startswith('.') or meta_filename.endswith('~'))) - return meta_filenames + not (meta_filename.startswith('.') or meta_filename.endswith('~'))] + return sorted(meta_filenames) def process_study_directory(jvm_args, study_directory, update_generic_assay_entity = None): """ @@ -525,7 +525,7 @@ def add_parser_args(parser): parser.add_argument('-data', '--data_filename', type=str, required=False, help='Path to Data file') -def interface(): +def interface(args=None): parent_parser = argparse.ArgumentParser(description='cBioPortal meta Importer') add_parser_args(parent_parser) parser = argparse.ArgumentParser() @@ -555,7 +555,7 @@ def interface(): # TODO - add same argument to metaimporter # TODO - harmonize on - and _ - parser = parser.parse_args() + parser = parser.parse_args(args) if parser.command is not None and parser.subcommand is not None: print('Cannot call multiple commands') sys.exit(2) @@ -637,5 +637,5 @@ def main(args): # ready to roll if __name__ == '__main__': - parsed_args = interface() + parsed_args = interface(args) main(parsed_args) diff --git a/tests/system_tests_import_data.py b/tests/system_tests_import_data.py new file mode 100755 index 00000000..a646e0c4 --- /dev/null +++ b/tests/system_tests_import_data.py @@ -0,0 +1,93 @@ +#!/usr/bin/env python3 + +''' +This code is licensed under the GNU Affero General Public License (AGPL), +version 3, or (at your option) any later version. +''' + +import unittest +from unittest import mock +from unittest.mock import call +from importer import cbioportalImporter + +common_part = ('-Dspring.profiles.active=dbcp', '-cp', 'test.jar') + +class DataImporterTests(unittest.TestCase): + ''' + Tests of commands produced by scripts + ''' + + def setUp(self): + self.maxDiff = None + + @mock.patch('importer.cbioportalImporter.locate_jar') + @mock.patch('importer.cbioportalImporter.run_java') + def test_full_study_load(self, run_java, locate_jar): + ''' + Tests java commands full study load produces + ''' + locate_jar.return_value = "test.jar" + + study_directory = 'test_data/study_es_0' + args = ['--study_directory', study_directory] + parsed_args = cbioportalImporter.interface(args) + cbioportalImporter.main(parsed_args) + + self.assertListEqual(run_java.call_args_list, [ + call(*common_part, 'org.mskcc.cbio.portal.util.VersionUtil',), + call(*common_part, 'org.mskcc.cbio.portal.scripts.ImportTypesOfCancers', f'{study_directory}/data_cancer_type.txt', 'false', '--noprogress'), + call(*common_part, 'org.mskcc.cbio.portal.scripts.RemoveCancerStudy', 'study_es_0', '--noprogress'), + call(*common_part, 'org.mskcc.cbio.portal.scripts.ImportCancerStudy', f'{study_directory}/meta_study.txt', '--noprogress'), + call(*common_part, 'org.mskcc.cbio.portal.scripts.ImportClinicalData', '--meta', f'{study_directory}/meta_clinical_samples.txt', '--loadMode', 'bulkload', '--data', f'{study_directory}/data_clinical_samples.txt', '--noprogress'), + call(*common_part, 'org.mskcc.cbio.portal.scripts.ImportResourceDefinition', '--meta', f'{study_directory}/meta_resource_definition.txt', '--loadMode', 'bulkload', '--data', f'{study_directory}/data_resource_definition.txt', '--noprogress'), + call(*common_part, 'org.mskcc.cbio.portal.scripts.ImportResourceData', '--meta', f'{study_directory}/meta_resource_sample.txt', '--loadMode', 'bulkload', '--data', f'{study_directory}/data_resource_sample.txt', '--noprogress'), + call(*common_part, 'org.mskcc.cbio.portal.scripts.ImportClinicalData', '--meta', f'{study_directory}/meta_clinical_patients.txt', '--loadMode', 'bulkload', '--data', f'{study_directory}/data_clinical_patients.txt', '--noprogress'), + call(*common_part, 'org.mskcc.cbio.portal.scripts.ImportCopyNumberSegmentData', '--meta', f'{study_directory}/meta_cna_hg19_seg.txt', '--loadMode', 'bulkload', '--data', f'{study_directory}/data_cna_hg19.seg', '--noprogress'), + call(*common_part, 'org.mskcc.cbio.portal.scripts.ImportProfileData', '--meta', f'{study_directory}/meta_cna_log2.txt', '--loadMode', 'bulkload', '--update-info', 'False', '--data', f'{study_directory}/data_cna_log2.txt', '--noprogress'), + call(*common_part, 'org.mskcc.cbio.portal.scripts.ImportProfileData', '--meta', f'{study_directory}/meta_expression_median.txt', '--loadMode', 'bulkload', '--update-info', 'False', '--data', f'{study_directory}/data_expression_median.txt', '--noprogress'), + call(*common_part, 'org.mskcc.cbio.portal.scripts.ImportProfileData', '--meta', f'{study_directory}/meta_generic_assay_patient_test.txt', '--loadMode', 'bulkload', '--update-info', 'False', '--data', f'{study_directory}/data_generic_assay_patient_test.txt', '--noprogress'), + call(*common_part, 'org.mskcc.cbio.portal.scripts.ImportGisticData', '--data', f'{study_directory}/data_gistic_genes_amp.txt', '--study', 'study_es_0', '--noprogress'), + call(*common_part, 'org.mskcc.cbio.portal.scripts.ImportProfileData', '--meta', f'{study_directory}/meta_methylation_hm27.txt', '--loadMode', 'bulkload', '--update-info', 'False', '--data', f'{study_directory}/data_methylation_hm27.txt', '--noprogress'), + call(*common_part, 'org.mskcc.cbio.portal.scripts.ImportProfileData', '--meta', f'{study_directory}/meta_mutational_signature.txt', '--loadMode', 'bulkload', '--update-info', 'False', '--data', f'{study_directory}/data_mutational_signature.txt', '--noprogress'), + call(*common_part, 'org.mskcc.cbio.portal.scripts.ImportProfileData', '--meta', f'{study_directory}/meta_mutations_extended.txt', '--loadMode', 'bulkload', '--update-info', 'False', '--data', f'{study_directory}/data_mutations_extended.maf', '--noprogress'), + call(*common_part, 'org.mskcc.cbio.portal.scripts.ImportResourceData', '--meta', f'{study_directory}/meta_resource_patient.txt', '--loadMode', 'bulkload', '--data', f'{study_directory}/data_resource_patient.txt', '--noprogress'), + call(*common_part, 'org.mskcc.cbio.portal.scripts.ImportResourceData', '--meta', f'{study_directory}/meta_resource_study.txt', '--loadMode', 'bulkload', '--data', f'{study_directory}/data_resource_study.txt', '--noprogress'), + call(*common_part, 'org.mskcc.cbio.portal.scripts.ImportProfileData', '--meta', f'{study_directory}/meta_treatment_ec50.txt', '--loadMode', 'bulkload', '--update-info', 'False', '--data', f'{study_directory}/data_treatment_ec50.txt', '--noprogress'), + call(*common_part, 'org.mskcc.cbio.portal.scripts.ImportProfileData', '--meta', f'{study_directory}/meta_treatment_ic50.txt', '--loadMode', 'bulkload', '--update-info', 'False', '--data', f'{study_directory}/data_treatment_ic50.txt', '--noprogress'), + call(*common_part, 'org.mskcc.cbio.portal.scripts.ImportProfileData', '--meta', f'{study_directory}/meta_structural_variants.txt', '--loadMode', 'bulkload', '--update-info', 'False', '--data', f'{study_directory}/data_structural_variants.txt', '--noprogress'), + call(*common_part, 'org.mskcc.cbio.portal.scripts.ImportProfileData', '--meta', f'{study_directory}/meta_cna_discrete.txt', '--loadMode', 'bulkload', '--update-info', 'False', '--data', f'{study_directory}/data_cna_discrete.txt', '--noprogress'), + call(*common_part, 'org.mskcc.cbio.portal.scripts.ImportProfileData', '--meta', f'{study_directory}/meta_expression_median_Zscores.txt', '--loadMode', 'bulkload', '--update-info', 'False', '--data', f'{study_directory}/data_expression_median_Zscores.txt', '--noprogress'), + call(*common_part, 'org.mskcc.cbio.portal.scripts.ImportProfileData', '--meta', f'{study_directory}/meta_gsva_scores.txt', '--loadMode', 'bulkload', '--update-info', 'False', '--data', f'{study_directory}/data_gsva_scores.txt', '--noprogress'), + call(*common_part, 'org.mskcc.cbio.portal.scripts.ImportProfileData', '--meta', f'{study_directory}/meta_gsva_pvalues.txt', '--loadMode', 'bulkload', '--update-info', 'False', '--data', f'{study_directory}/data_gsva_pvalues.txt', '--noprogress'), + call(*common_part, 'org.mskcc.cbio.portal.scripts.ImportGenePanelProfileMap', '--meta', f'{study_directory}/meta_gene_panel_matrix.txt', '--data', f'{study_directory}/data_gene_panel_matrix.txt', '--noprogress'), + call(*common_part, 'org.mskcc.cbio.portal.scripts.ImportSampleList', f'{study_directory}/case_lists/cases_cna.txt', '--noprogress'), + call(*common_part, 'org.mskcc.cbio.portal.scripts.ImportSampleList', f'{study_directory}/case_lists/cases_cnaseq.txt', '--noprogress'), + call(*common_part, 'org.mskcc.cbio.portal.scripts.ImportSampleList', f'{study_directory}/case_lists/cases_custom.txt', '--noprogress'), + call(*common_part, 'org.mskcc.cbio.portal.scripts.ImportSampleList', f'{study_directory}/case_lists/cases_sequenced.txt', '--noprogress'), + call(*common_part, 'org.mskcc.cbio.portal.scripts.ImportSampleList', f'{study_directory}/case_lists/cases_test.txt', '--noprogress'), + call(*common_part, 'org.mskcc.cbio.portal.scripts.AddCaseList', 'study_es_0', 'all', '--noprogress'), + call(*common_part, 'org.mskcc.cbio.portal.scripts.UpdateCancerStudy', 'study_es_0', 'AVAILABLE', '--noprogress')]) + + @mock.patch('importer.cbioportalImporter.locate_jar') + @mock.patch('importer.cbioportalImporter.run_java') + def test_incremental_load(self, run_java, locate_jar): + ''' + Tests java commands incremental load produces + ''' + locate_jar.return_value = "test.jar" + + data_directory = 'test_data/study_es_0_inc' + args = ['--data_directory', data_directory] + parsed_args = cbioportalImporter.interface(args) + cbioportalImporter.main(parsed_args) + + self.assertListEqual(run_java.call_args_list, [ + call(*common_part, 'org.mskcc.cbio.portal.util.VersionUtil',), + call(*common_part, 'org.mskcc.cbio.portal.scripts.ImportClinicalData', '--overwrite-existing', '--meta', f'{data_directory}/meta_clinical_patients.txt', '--loadMode', 'bulkload', '--data', f'{data_directory}/data_clinical_patients.txt', '--noprogress'), + call(*common_part, 'org.mskcc.cbio.portal.scripts.ImportClinicalData', '--overwrite-existing', '--meta', f'{data_directory}/meta_clinical_samples.txt', '--loadMode', 'bulkload', '--data', f'{data_directory}/data_clinical_samples.txt', '--noprogress'), + call(*common_part, 'org.mskcc.cbio.portal.scripts.ImportProfileData', '--overwrite-existing', '--meta', f'{data_directory}/meta_mutations_extended.txt', '--loadMode', 'bulkload', '--update-info', 'False', '--data', f'{data_directory}/data_mutations_extended.maf', '--noprogress'), + call(*common_part, 'org.mskcc.cbio.portal.scripts.UpdateCaseListsSampleIds', '--meta', f'{data_directory}/meta_clinical_samples.txt', '--case-lists', f'{data_directory}/case_lists')]) + + +if __name__ == '__main__': + unittest.main(buffer=True)