Skip to content

Commit

Permalink
Add python tests for incremental/full data import
Browse files Browse the repository at this point in the history
  • Loading branch information
forus committed May 1, 2024
1 parent f5e8217 commit 8d3aaed
Show file tree
Hide file tree
Showing 2 changed files with 100 additions and 7 deletions.
14 changes: 7 additions & 7 deletions scripts/importer/cbioportalImporter.py
Original file line number Diff line number Diff line change
Expand Up @@ -207,7 +207,7 @@ def check_version(jvm_args):
raise

def process_case_lists(jvm_args, case_list_dir):
for case_list in os.listdir(case_list_dir):
for case_list in sorted(os.listdir(case_list_dir)):
# skip "temp"/backup files made by some text editors:
if not (case_list.startswith('.') or case_list.endswith('~')):
import_case_list(jvm_args, os.path.join(case_list_dir, case_list))
Expand All @@ -232,13 +232,13 @@ def process_command(jvm_args, command, meta_filename, data_filename, study_ids,
import_case_list(jvm_args, meta_filename)

def get_meta_filenames(data_directory):
meta_filenames = (
meta_filenames = [
os.path.join(data_directory, meta_filename) for
meta_filename in os.listdir(data_directory) if
re.search(r'(\b|_)meta(\b|[_0-9])', meta_filename,
flags=re.IGNORECASE) and
not (meta_filename.startswith('.') or meta_filename.endswith('~')))
return meta_filenames
not (meta_filename.startswith('.') or meta_filename.endswith('~'))]
return sorted(meta_filenames)

def process_study_directory(jvm_args, study_directory, update_generic_assay_entity = None):
"""
Expand Down Expand Up @@ -525,7 +525,7 @@ def add_parser_args(parser):
parser.add_argument('-data', '--data_filename', type=str, required=False,
help='Path to Data file')

def interface():
def interface(args=None):
parent_parser = argparse.ArgumentParser(description='cBioPortal meta Importer')
add_parser_args(parent_parser)
parser = argparse.ArgumentParser()
Expand Down Expand Up @@ -555,7 +555,7 @@ def interface():
# TODO - add same argument to metaimporter
# TODO - harmonize on - and _

parser = parser.parse_args()
parser = parser.parse_args(args)
if parser.command is not None and parser.subcommand is not None:
print('Cannot call multiple commands')
sys.exit(2)
Expand Down Expand Up @@ -637,5 +637,5 @@ def main(args):
# ready to roll

if __name__ == '__main__':
parsed_args = interface()
parsed_args = interface(args)
main(parsed_args)
93 changes: 93 additions & 0 deletions tests/system_tests_import_data.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,93 @@
#!/usr/bin/env python3

'''
This code is licensed under the GNU Affero General Public License (AGPL),
version 3, or (at your option) any later version.
'''

import unittest
from unittest import mock
from unittest.mock import call
from importer import cbioportalImporter

common_part = ('-Dspring.profiles.active=dbcp', '-cp', 'test.jar')

class DataImporterTests(unittest.TestCase):
'''
Tests of commands produced by scripts
'''

def setUp(self):
self.maxDiff = None

@mock.patch('importer.cbioportalImporter.locate_jar')
@mock.patch('importer.cbioportalImporter.run_java')
def test_full_study_load(self, run_java, locate_jar):
'''
Tests java commands full study load produces
'''
locate_jar.return_value = "test.jar"

study_directory = 'test_data/study_es_0'
args = ['--study_directory', study_directory]
parsed_args = cbioportalImporter.interface(args)
cbioportalImporter.main(parsed_args)

self.assertListEqual(run_java.call_args_list, [
call(*common_part, 'org.mskcc.cbio.portal.util.VersionUtil',),
call(*common_part, 'org.mskcc.cbio.portal.scripts.ImportTypesOfCancers', f'{study_directory}/data_cancer_type.txt', 'false', '--noprogress'),
call(*common_part, 'org.mskcc.cbio.portal.scripts.RemoveCancerStudy', 'study_es_0', '--noprogress'),
call(*common_part, 'org.mskcc.cbio.portal.scripts.ImportCancerStudy', f'{study_directory}/meta_study.txt', '--noprogress'),
call(*common_part, 'org.mskcc.cbio.portal.scripts.ImportClinicalData', '--meta', f'{study_directory}/meta_clinical_samples.txt', '--loadMode', 'bulkload', '--data', f'{study_directory}/data_clinical_samples.txt', '--noprogress'),
call(*common_part, 'org.mskcc.cbio.portal.scripts.ImportResourceDefinition', '--meta', f'{study_directory}/meta_resource_definition.txt', '--loadMode', 'bulkload', '--data', f'{study_directory}/data_resource_definition.txt', '--noprogress'),
call(*common_part, 'org.mskcc.cbio.portal.scripts.ImportResourceData', '--meta', f'{study_directory}/meta_resource_sample.txt', '--loadMode', 'bulkload', '--data', f'{study_directory}/data_resource_sample.txt', '--noprogress'),
call(*common_part, 'org.mskcc.cbio.portal.scripts.ImportClinicalData', '--meta', f'{study_directory}/meta_clinical_patients.txt', '--loadMode', 'bulkload', '--data', f'{study_directory}/data_clinical_patients.txt', '--noprogress'),
call(*common_part, 'org.mskcc.cbio.portal.scripts.ImportCopyNumberSegmentData', '--meta', f'{study_directory}/meta_cna_hg19_seg.txt', '--loadMode', 'bulkload', '--data', f'{study_directory}/data_cna_hg19.seg', '--noprogress'),
call(*common_part, 'org.mskcc.cbio.portal.scripts.ImportProfileData', '--meta', f'{study_directory}/meta_cna_log2.txt', '--loadMode', 'bulkload', '--update-info', 'False', '--data', f'{study_directory}/data_cna_log2.txt', '--noprogress'),
call(*common_part, 'org.mskcc.cbio.portal.scripts.ImportProfileData', '--meta', f'{study_directory}/meta_expression_median.txt', '--loadMode', 'bulkload', '--update-info', 'False', '--data', f'{study_directory}/data_expression_median.txt', '--noprogress'),
call(*common_part, 'org.mskcc.cbio.portal.scripts.ImportProfileData', '--meta', f'{study_directory}/meta_generic_assay_patient_test.txt', '--loadMode', 'bulkload', '--update-info', 'False', '--data', f'{study_directory}/data_generic_assay_patient_test.txt', '--noprogress'),
call(*common_part, 'org.mskcc.cbio.portal.scripts.ImportGisticData', '--data', f'{study_directory}/data_gistic_genes_amp.txt', '--study', 'study_es_0', '--noprogress'),
call(*common_part, 'org.mskcc.cbio.portal.scripts.ImportProfileData', '--meta', f'{study_directory}/meta_methylation_hm27.txt', '--loadMode', 'bulkload', '--update-info', 'False', '--data', f'{study_directory}/data_methylation_hm27.txt', '--noprogress'),
call(*common_part, 'org.mskcc.cbio.portal.scripts.ImportProfileData', '--meta', f'{study_directory}/meta_mutational_signature.txt', '--loadMode', 'bulkload', '--update-info', 'False', '--data', f'{study_directory}/data_mutational_signature.txt', '--noprogress'),
call(*common_part, 'org.mskcc.cbio.portal.scripts.ImportProfileData', '--meta', f'{study_directory}/meta_mutations_extended.txt', '--loadMode', 'bulkload', '--update-info', 'False', '--data', f'{study_directory}/data_mutations_extended.maf', '--noprogress'),
call(*common_part, 'org.mskcc.cbio.portal.scripts.ImportResourceData', '--meta', f'{study_directory}/meta_resource_patient.txt', '--loadMode', 'bulkload', '--data', f'{study_directory}/data_resource_patient.txt', '--noprogress'),
call(*common_part, 'org.mskcc.cbio.portal.scripts.ImportResourceData', '--meta', f'{study_directory}/meta_resource_study.txt', '--loadMode', 'bulkload', '--data', f'{study_directory}/data_resource_study.txt', '--noprogress'),
call(*common_part, 'org.mskcc.cbio.portal.scripts.ImportProfileData', '--meta', f'{study_directory}/meta_treatment_ec50.txt', '--loadMode', 'bulkload', '--update-info', 'False', '--data', f'{study_directory}/data_treatment_ec50.txt', '--noprogress'),
call(*common_part, 'org.mskcc.cbio.portal.scripts.ImportProfileData', '--meta', f'{study_directory}/meta_treatment_ic50.txt', '--loadMode', 'bulkload', '--update-info', 'False', '--data', f'{study_directory}/data_treatment_ic50.txt', '--noprogress'),
call(*common_part, 'org.mskcc.cbio.portal.scripts.ImportProfileData', '--meta', f'{study_directory}/meta_structural_variants.txt', '--loadMode', 'bulkload', '--update-info', 'False', '--data', f'{study_directory}/data_structural_variants.txt', '--noprogress'),
call(*common_part, 'org.mskcc.cbio.portal.scripts.ImportProfileData', '--meta', f'{study_directory}/meta_cna_discrete.txt', '--loadMode', 'bulkload', '--update-info', 'False', '--data', f'{study_directory}/data_cna_discrete.txt', '--noprogress'),
call(*common_part, 'org.mskcc.cbio.portal.scripts.ImportProfileData', '--meta', f'{study_directory}/meta_expression_median_Zscores.txt', '--loadMode', 'bulkload', '--update-info', 'False', '--data', f'{study_directory}/data_expression_median_Zscores.txt', '--noprogress'),
call(*common_part, 'org.mskcc.cbio.portal.scripts.ImportProfileData', '--meta', f'{study_directory}/meta_gsva_scores.txt', '--loadMode', 'bulkload', '--update-info', 'False', '--data', f'{study_directory}/data_gsva_scores.txt', '--noprogress'),
call(*common_part, 'org.mskcc.cbio.portal.scripts.ImportProfileData', '--meta', f'{study_directory}/meta_gsva_pvalues.txt', '--loadMode', 'bulkload', '--update-info', 'False', '--data', f'{study_directory}/data_gsva_pvalues.txt', '--noprogress'),
call(*common_part, 'org.mskcc.cbio.portal.scripts.ImportGenePanelProfileMap', '--meta', f'{study_directory}/meta_gene_panel_matrix.txt', '--data', f'{study_directory}/data_gene_panel_matrix.txt', '--noprogress'),
call(*common_part, 'org.mskcc.cbio.portal.scripts.ImportSampleList', f'{study_directory}/case_lists/cases_cna.txt', '--noprogress'),
call(*common_part, 'org.mskcc.cbio.portal.scripts.ImportSampleList', f'{study_directory}/case_lists/cases_cnaseq.txt', '--noprogress'),
call(*common_part, 'org.mskcc.cbio.portal.scripts.ImportSampleList', f'{study_directory}/case_lists/cases_custom.txt', '--noprogress'),
call(*common_part, 'org.mskcc.cbio.portal.scripts.ImportSampleList', f'{study_directory}/case_lists/cases_sequenced.txt', '--noprogress'),
call(*common_part, 'org.mskcc.cbio.portal.scripts.ImportSampleList', f'{study_directory}/case_lists/cases_test.txt', '--noprogress'),
call(*common_part, 'org.mskcc.cbio.portal.scripts.AddCaseList', 'study_es_0', 'all', '--noprogress'),
call(*common_part, 'org.mskcc.cbio.portal.scripts.UpdateCancerStudy', 'study_es_0', 'AVAILABLE', '--noprogress')])

@mock.patch('importer.cbioportalImporter.locate_jar')
@mock.patch('importer.cbioportalImporter.run_java')
def test_incremental_load(self, run_java, locate_jar):
'''
Tests java commands incremental load produces
'''
locate_jar.return_value = "test.jar"

data_directory = 'test_data/study_es_0_inc'
args = ['--data_directory', data_directory]
parsed_args = cbioportalImporter.interface(args)
cbioportalImporter.main(parsed_args)

self.assertListEqual(run_java.call_args_list, [
call(*common_part, 'org.mskcc.cbio.portal.util.VersionUtil',),
call(*common_part, 'org.mskcc.cbio.portal.scripts.ImportClinicalData', '--overwrite-existing', '--meta', f'{data_directory}/meta_clinical_patients.txt', '--loadMode', 'bulkload', '--data', f'{data_directory}/data_clinical_patients.txt', '--noprogress'),
call(*common_part, 'org.mskcc.cbio.portal.scripts.ImportClinicalData', '--overwrite-existing', '--meta', f'{data_directory}/meta_clinical_samples.txt', '--loadMode', 'bulkload', '--data', f'{data_directory}/data_clinical_samples.txt', '--noprogress'),
call(*common_part, 'org.mskcc.cbio.portal.scripts.ImportProfileData', '--overwrite-existing', '--meta', f'{data_directory}/meta_mutations_extended.txt', '--loadMode', 'bulkload', '--update-info', 'False', '--data', f'{data_directory}/data_mutations_extended.maf', '--noprogress'),
call(*common_part, 'org.mskcc.cbio.portal.scripts.UpdateCaseListsSampleIds', '--meta', f'{data_directory}/meta_clinical_samples.txt', '--case-lists', f'{data_directory}/case_lists')])


if __name__ == '__main__':
unittest.main(buffer=True)

0 comments on commit 8d3aaed

Please sign in to comment.