diff --git a/scripts/importer/cbioportal_common.py b/scripts/importer/cbioportal_common.py index 5f5895d1..9fda27d6 100644 --- a/scripts/importer/cbioportal_common.py +++ b/scripts/importer/cbioportal_common.py @@ -383,6 +383,7 @@ class MetaFileTypes(object): MetaFileTypes.TIMELINE, MetaFileTypes.GENE_PANEL_MATRIX, MetaFileTypes.STRUCTURAL_VARIANT, + MetaFileTypes.SEG, ] IMPORTER_CLASSNAME_BY_META_TYPE = { diff --git a/src/main/java/org/mskcc/cbio/portal/dao/DaoClinicalData.java b/src/main/java/org/mskcc/cbio/portal/dao/DaoClinicalData.java index 8a5aaf30..4ac8f1e6 100755 --- a/src/main/java/org/mskcc/cbio/portal/dao/DaoClinicalData.java +++ b/src/main/java/org/mskcc/cbio/portal/dao/DaoClinicalData.java @@ -370,7 +370,6 @@ public static List getSampleData(int cancerStudyId, Collection sampleInternalIds, String attrId) throws DaoException { + Connection con = null; + PreparedStatement pstmt = null; + try { + con = JdbcUtil.getDbConnection(DaoClinicalData.class); + pstmt = con.prepareStatement("DELETE FROM " + SAMPLE_ATTRIBUTES_TABLE + + " WHERE `ATTR_ID` = ? AND `INTERNAL_ID` IN (" + + String.join(",", Collections.nCopies(sampleInternalIds.size(), "?")) + + ")"); + int parameterIndex = 1; + pstmt.setString(parameterIndex++, attrId); + for (Integer sampleInternalId : sampleInternalIds) { + pstmt.setInt(parameterIndex++, sampleInternalId); + } + pstmt.executeUpdate(); + } + catch (SQLException e) { + throw new DaoException(e); + } + finally { + JdbcUtil.closeAll(DaoClinicalData.class, con, pstmt, null); } } diff --git a/src/main/java/org/mskcc/cbio/portal/dao/DaoCopyNumberSegment.java b/src/main/java/org/mskcc/cbio/portal/dao/DaoCopyNumberSegment.java index a0113a44..a71166f7 100644 --- a/src/main/java/org/mskcc/cbio/portal/dao/DaoCopyNumberSegment.java +++ b/src/main/java/org/mskcc/cbio/portal/dao/DaoCopyNumberSegment.java @@ -67,7 +67,7 @@ public static int addCopyNumberSegment(CopyNumberSegment seg) throws DaoExceptio } } - public static void createFractionGenomeAlteredClinicalData(int cancerStudyId) throws DaoException { + public static void createFractionGenomeAlteredClinicalData(int cancerStudyId, Set sampleIds, boolean updateMode) throws DaoException { Connection con = null; PreparedStatement pstmt = null; ResultSet rs = null; @@ -80,8 +80,15 @@ public static void createFractionGenomeAlteredClinicalData(int cancerStudyId) th "AS c2 WHERE c2.`CANCER_STUDY_ID` = c1.`CANCER_STUDY_ID` AND c2.`SAMPLE_ID` = c1.`SAMPLE_ID` AND " + "ABS(c2.`SEGMENT_MEAN`) >= 0.2) / SUM(`END`-`START`)) AS `VALUE` FROM `copy_number_seg` AS c1 , `cancer_study` " + "WHERE c1.`CANCER_STUDY_ID` = cancer_study.`CANCER_STUDY_ID` AND cancer_study.`CANCER_STUDY_ID`=? " + - "GROUP BY cancer_study.`CANCER_STUDY_ID` , `SAMPLE_ID` HAVING SUM(`END`-`START`) > 0;"); - pstmt.setInt(1, cancerStudyId); + (sampleIds == null ? "" : ("AND `SAMPLE_ID` IN ("+ String.join(",", Collections.nCopies(sampleIds.size(), "?")) + ") ")) + +"GROUP BY cancer_study.`CANCER_STUDY_ID` , `SAMPLE_ID` HAVING SUM(`END`-`START`) > 0;"); + int parameterIndex = 1; + pstmt.setInt(parameterIndex++, cancerStudyId); + if (sampleIds != null) { + for (Integer sampleId : sampleIds) { + pstmt.setInt(parameterIndex++, sampleId); + } + } Map fractionGenomeAltereds = new HashMap(); rs = pstmt.executeQuery(); while (rs.next()) { @@ -94,7 +101,10 @@ public static void createFractionGenomeAlteredClinicalData(int cancerStudyId) th false, "20", cancerStudyId); DaoClinicalAttributeMeta.addDatum(attr); } - + + if (updateMode) { + DaoClinicalData.removeSampleAttributesData(fractionGenomeAltereds.keySet(), FRACTION_GENOME_ALTERED_ATTR_ID); + } for (Map.Entry fractionGenomeAltered : fractionGenomeAltereds.entrySet()) { DaoClinicalData.addSampleDatum(fractionGenomeAltered.getKey(), FRACTION_GENOME_ALTERED_ATTR_ID, fractionGenomeAltered.getValue()); } @@ -283,4 +293,27 @@ public static boolean segmentDataExistForSample(int cancerStudyId, int sampleId) JdbcUtil.closeAll(DaoCopyNumberSegment.class, con, pstmt, rs); } } + + public static void deleteSegmentDataForSamples(int cancerStudyId, Set sampleIds) throws DaoException { + Connection con = null; + PreparedStatement pstmt = null; + ResultSet rs = null; + try { + con = JdbcUtil.getDbConnection(DaoCopyNumberSegment.class); + pstmt = con.prepareStatement("DELETE FROM `copy_number_seg`" + + " WHERE `CANCER_STUDY_ID`= ?" + + " AND `SAMPLE_ID` IN (" + String.join(",", Collections.nCopies(sampleIds.size(), "?")) + + ")"); + int parameterIndex = 1; + pstmt.setInt(parameterIndex++, cancerStudyId); + for (Integer sampleId : sampleIds) { + pstmt.setInt(parameterIndex++, sampleId); + } + pstmt.executeUpdate(); + } catch (SQLException e) { + throw new DaoException(e); + } finally { + JdbcUtil.closeAll(DaoCopyNumberSegment.class, con, pstmt, rs); + } + } } diff --git a/src/main/java/org/mskcc/cbio/portal/dao/DaoCopyNumberSegmentFile.java b/src/main/java/org/mskcc/cbio/portal/dao/DaoCopyNumberSegmentFile.java index ef0011a4..cf2332f6 100644 --- a/src/main/java/org/mskcc/cbio/portal/dao/DaoCopyNumberSegmentFile.java +++ b/src/main/java/org/mskcc/cbio/portal/dao/DaoCopyNumberSegmentFile.java @@ -65,7 +65,7 @@ public static int addCopyNumberSegmentFile(CopyNumberSegmentFile copySegFile) th } catch (SQLException e) { throw new DaoException(e); } finally { - JdbcUtil.closeAll(DaoCopyNumberSegment.class, con, pstmt, rs); + JdbcUtil.closeAll(DaoCopyNumberSegmentFile.class, con, pstmt, rs); } } @@ -86,6 +86,9 @@ public static CopyNumberSegmentFile getCopyNumberSegmentFile(int cancerStudyId) cnsf.referenceGenomeId = CopyNumberSegmentFile.ReferenceGenomeId.valueOf(rs.getString("REFERENCE_GENOME_ID")); cnsf.description = rs.getString("DESCRIPTION"); cnsf.filename = rs.getString("FILENAME"); + if (rs.next()) { + throw new SQLException("More than one row was returned."); + } return cnsf; } return null; diff --git a/src/main/java/org/mskcc/cbio/portal/scripts/ImportCopyNumberSegmentData.java b/src/main/java/org/mskcc/cbio/portal/scripts/ImportCopyNumberSegmentData.java index 92343aa3..1fb5c0d0 100644 --- a/src/main/java/org/mskcc/cbio/portal/scripts/ImportCopyNumberSegmentData.java +++ b/src/main/java/org/mskcc/cbio/portal/scripts/ImportCopyNumberSegmentData.java @@ -55,7 +55,9 @@ import java.io.FileReader; import java.io.IOException; import java.math.BigDecimal; +import java.util.HashSet; import java.util.Properties; +import java.util.Set; /** * Import Segment data into database. @@ -64,7 +66,9 @@ public class ImportCopyNumberSegmentData extends ConsoleRunnable { private int entriesSkipped; - + private boolean updateMode; + private Set processedSampleIds; + private void importData(File file, int cancerStudyId) throws IOException, DaoException { MySQLbulkLoader.bulkLoadOn(); FileReader reader = new FileReader(file); @@ -72,6 +76,7 @@ private void importData(File file, int cancerStudyId) throws IOException, DaoExc try { String line = buf.readLine(); // skip header line long segId = DaoCopyNumberSegment.getLargestId(); + processedSampleIds = new HashSet<>(); while ((line=buf.readLine()) != null) { ProgressMonitor.incrementCurValue(); ConsoleUtil.showProgress(); @@ -81,8 +86,7 @@ private void importData(File file, int cancerStudyId) throws IOException, DaoExc System.err.println("wrong format: "+line); } - CancerStudy cancerStudy = DaoCancerStudy.getCancerStudyByInternalId(cancerStudyId); - String chrom = strs[1].trim(); + String chrom = strs[1].trim(); //validate in same way as GistitReader: ValidationUtils.validateChromosome(chrom); @@ -112,6 +116,10 @@ private void importData(File file, int cancerStudyId) throws IOException, DaoExc CopyNumberSegment cns = new CopyNumberSegment(cancerStudyId, s.getInternalId(), chrom, start, end, numProbes, segMean); cns.setSegId(++segId); DaoCopyNumberSegment.addCopyNumberSegment(cns); + processedSampleIds.add(s.getInternalId()); + } + if (updateMode) { + DaoCopyNumberSegment.deleteSegmentDataForSamples(cancerStudyId, processedSampleIds); } MySQLbulkLoader.flushAll(); } @@ -127,6 +135,7 @@ public void run() { OptionSet options = ConsoleUtil.parseStandardDataAndMetaOptions(args, description, true); String dataFile = (String) options.valueOf("data"); File descriptorFile = new File((String) options.valueOf("meta")); + updateMode = options.has("overwrite-existing"); Properties properties = new Properties(); properties.load(new FileInputStream(descriptorFile)); @@ -135,13 +144,13 @@ public void run() { CancerStudy cancerStudy = getCancerStudy(properties); - if (segmentDataExistsForCancerStudy(cancerStudy)) { + if (!updateMode && segmentDataExistsForCancerStudy(cancerStudy)) { throw new IllegalArgumentException("Seg data for cancer study " + cancerStudy.getCancerStudyStableId() + " has already been imported: " + dataFile); } importCopyNumberSegmentFileMetadata(cancerStudy, properties); importCopyNumberSegmentFileData(cancerStudy, dataFile); - DaoCopyNumberSegment.createFractionGenomeAlteredClinicalData(cancerStudy.getInternalId()); + DaoCopyNumberSegment.createFractionGenomeAlteredClinicalData(cancerStudy.getInternalId(), processedSampleIds, updateMode); if( MySQLbulkLoader.isBulkLoad()) { MySQLbulkLoader.flushAll(); } @@ -164,7 +173,7 @@ private static boolean segmentDataExistsForCancerStudy(CancerStudy cancerStudy) return (DaoCopyNumberSegment.segmentDataExistForCancerStudy(cancerStudy.getInternalId())); } - private static void importCopyNumberSegmentFileMetadata(CancerStudy cancerStudy, Properties properties) throws DaoException { + private void importCopyNumberSegmentFileMetadata(CancerStudy cancerStudy, Properties properties) throws DaoException { CopyNumberSegmentFile copyNumSegFile = new CopyNumberSegmentFile(); copyNumSegFile.cancerStudyId = cancerStudy.getInternalId(); String referenceGenomeId = properties.getProperty("reference_genome_id").trim(); @@ -179,7 +188,18 @@ private static void importCopyNumberSegmentFileMetadata(CancerStudy cancerStudy, copyNumSegFile.referenceGenomeId = getRefGenId(referenceGenomeId); copyNumSegFile.description = properties.getProperty("description").trim(); copyNumSegFile.filename = properties.getProperty("data_filename").trim(); - DaoCopyNumberSegmentFile.addCopyNumberSegmentFile(copyNumSegFile); + CopyNumberSegmentFile storedCopyNumSegFile = DaoCopyNumberSegmentFile.getCopyNumberSegmentFile(cancerStudy.getInternalId()); + if (updateMode && storedCopyNumSegFile != null) { + if (storedCopyNumSegFile.referenceGenomeId != copyNumSegFile.referenceGenomeId) { + throw new IllegalStateException("You are trying to upload " + + copyNumSegFile.referenceGenomeId + + " reference genome data into " + + storedCopyNumSegFile.referenceGenomeId + + " reference genome data."); + } + } else { + DaoCopyNumberSegmentFile.addCopyNumberSegmentFile(copyNumSegFile); + } } private void importCopyNumberSegmentFileData(CancerStudy cancerStudy, String dataFilename) throws IOException, DaoException { diff --git a/src/main/java/org/mskcc/cbio/portal/scripts/ImportTimelineData.java b/src/main/java/org/mskcc/cbio/portal/scripts/ImportTimelineData.java index c16eba21..6556c591 100644 --- a/src/main/java/org/mskcc/cbio/portal/scripts/ImportTimelineData.java +++ b/src/main/java/org/mskcc/cbio/portal/scripts/ImportTimelineData.java @@ -135,8 +135,13 @@ public void run() { try { String description = "Import 'timeline' data"; - OptionSet options = ConsoleUtil.parseStandardDataAndMetaOptions(args, description, false); - String dataFile = (String) options.valueOf("data"); + OptionSet options = ConsoleUtil.parseStandardDataAndMetaOptions(args, description, true); + if (options.has("loadMode") && !"bulkLoad".equals(options.valueOf("loadMode"))) { + throw new UnsupportedOperationException("This loader supports bulkLoad load mode only, but " + + options.valueOf("loadMode") + + " has been supplied."); + } + String dataFile = (String) options.valueOf("data"); File descriptorFile = new File((String) options.valueOf("meta")); boolean overwriteExisting = options.has("overwrite-existing"); diff --git a/src/main/java/org/mskcc/cbio/portal/util/ConsoleUtil.java b/src/main/java/org/mskcc/cbio/portal/util/ConsoleUtil.java index 7eba9610..f53c7ba2 100644 --- a/src/main/java/org/mskcc/cbio/portal/util/ConsoleUtil.java +++ b/src/main/java/org/mskcc/cbio/portal/util/ConsoleUtil.java @@ -178,10 +178,6 @@ public static OptionSet parseStandardDataAndMetaOptions(String[] args, String de "Error: unknown loadMode action: " + actionArg); } } - else { - throw new UsageException(progName, description, parser, - "Error: 'loadMode' argument required."); - } } return options; } diff --git a/src/test/java/org/mskcc/cbio/portal/integrationTest/incremental/TestIncrementalCopyNumberSegmentDataImport.java b/src/test/java/org/mskcc/cbio/portal/integrationTest/incremental/TestIncrementalCopyNumberSegmentDataImport.java new file mode 100644 index 00000000..db2ee519 --- /dev/null +++ b/src/test/java/org/mskcc/cbio/portal/integrationTest/incremental/TestIncrementalCopyNumberSegmentDataImport.java @@ -0,0 +1,121 @@ +/* + * This file is part of cBioPortal. + * + * cBioPortal is free software: you can redistribute it and/or modify + * it under the terms of the GNU Affero General Public License as + * published by the Free Software Foundation, either version 3 of the + * License. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Affero General Public License for more details. + * + * You should have received a copy of the GNU Affero General Public License + * along with this program. If not, see . +*/ + +package org.mskcc.cbio.portal.integrationTest.incremental; + +import org.junit.Before; +import org.junit.Test; +import org.junit.runner.RunWith; +import org.mskcc.cbio.portal.dao.DaoCancerStudy; +import org.mskcc.cbio.portal.dao.DaoClinicalData; +import org.mskcc.cbio.portal.dao.DaoCopyNumberSegment; +import org.mskcc.cbio.portal.dao.DaoCopyNumberSegmentFile; +import org.mskcc.cbio.portal.dao.DaoException; +import org.mskcc.cbio.portal.dao.DaoSample; +import org.mskcc.cbio.portal.dao.MySQLbulkLoader; +import org.mskcc.cbio.portal.model.CancerStudy; +import org.mskcc.cbio.portal.model.ClinicalData; +import org.mskcc.cbio.portal.model.CopyNumberSegment; +import org.mskcc.cbio.portal.model.CopyNumberSegmentFile; +import org.mskcc.cbio.portal.model.Sample; +import org.mskcc.cbio.portal.scripts.ImportCopyNumberSegmentData; +import org.springframework.test.annotation.Rollback; +import org.springframework.test.context.ContextConfiguration; +import org.springframework.test.context.junit4.SpringJUnit4ClassRunner; +import org.springframework.transaction.annotation.Transactional; + +import java.io.File; +import java.util.List; +import java.util.Set; + +import static org.junit.Assert.assertEquals; +import static org.junit.Assert.assertNotNull; + +/** + * Tests Incremental Import of CNA segmented data. + * + * @author Ruslan Forostianov + * @author Pieter Lukasse + */ +@RunWith(SpringJUnit4ClassRunner.class) +@ContextConfiguration(locations = { "classpath:/applicationContext-dao.xml" }) +@Rollback +@Transactional +public class TestIncrementalCopyNumberSegmentDataImport { + + /** + * Test incremental upload of CNA SEG data + */ + @Test + public void testIncrementalUpload() throws DaoException { + String segSampleId = "TCGA-A1-A0SE-01"; + Sample segDataSample = DaoSample.getSampleByCancerStudyAndSampleId(cancerStudy.getInternalId(), segSampleId); + + CopyNumberSegmentFile copyNumberSegmentFile = new CopyNumberSegmentFile(); + copyNumberSegmentFile.cancerStudyId = cancerStudy.getInternalId(); + copyNumberSegmentFile.referenceGenomeId = CopyNumberSegmentFile.ReferenceGenomeId.hg19; + copyNumberSegmentFile.segFileId = 1; + copyNumberSegmentFile.filename = "test_file.seg"; + copyNumberSegmentFile.description = "test seg file description"; + DaoCopyNumberSegmentFile.addCopyNumberSegmentFile(copyNumberSegmentFile); + DaoClinicalData.addSampleDatum(segDataSample.getInternalId(), "FRACTION_GENOME_ALTERED", "TEST"); + MySQLbulkLoader.bulkLoadOn(); + CopyNumberSegment copyNumberSegment = new CopyNumberSegment( + cancerStudy.getInternalId(), + segDataSample.getInternalId(), + "1", + 3218610, + 95674710, + 100, + 0.01); + copyNumberSegment.setSegId(1L); + DaoCopyNumberSegment.addCopyNumberSegment(copyNumberSegment); + MySQLbulkLoader.flushAll(); + + File dataFolder = new File("src/test/resources/incremental/copy_number_alteration/"); + File metaFile = new File(dataFolder, "meta_cna_seg.txt"); + File dataFile = new File(dataFolder, "data_cna.seg"); + + ImportCopyNumberSegmentData importCnaSegData = new ImportCopyNumberSegmentData(new String[] { + "--loadMode", "bulkLoad", + "--meta", metaFile.getAbsolutePath(), + "--data", dataFile.getAbsolutePath(), + "--overwrite-existing", + }); + importCnaSegData.run(); + + CopyNumberSegmentFile fetchedCopyNumberSegmentFile = DaoCopyNumberSegmentFile.getCopyNumberSegmentFile(cancerStudy.getInternalId()); + assertNotNull(fetchedCopyNumberSegmentFile); + assertEquals("test_file.seg", fetchedCopyNumberSegmentFile.filename); + List cnaSegments = DaoCopyNumberSegment + .getSegmentForASample(segDataSample.getInternalId(), cancerStudy.getInternalId()); + assertEquals(9, cnaSegments.size()); + List clinicalData = DaoClinicalData.getSampleData(cancerStudy.getInternalId(), Set.of(segSampleId)); + ClinicalData fractionGenomeAltered = clinicalData.stream() + .filter(cd -> "FRACTION_GENOME_ALTERED".equals(cd.getAttrId())).findFirst().get(); + assertEquals("0.0000", fractionGenomeAltered.getAttrVal()); + } + + public static final String STUDY_ID = "study_tcga_pub"; + private CancerStudy cancerStudy; + + @Before + public void setUp() throws DaoException { + cancerStudy = DaoCancerStudy.getCancerStudyByStableId(STUDY_ID); + } + +} diff --git a/src/test/resources/incremental/copy_number_alteration/data_cna.seg b/src/test/resources/incremental/copy_number_alteration/data_cna.seg new file mode 100644 index 00000000..fd1be197 --- /dev/null +++ b/src/test/resources/incremental/copy_number_alteration/data_cna.seg @@ -0,0 +1,10 @@ +ID chrom loc.start loc.end num.mark seg.mean +TCGA-A1-A0SE-01 1 3218610 95674710 53225 0.0055 +TCGA-A1-A0SE-01 1 95676511 95676518 2 -1.6636 +TCGA-A1-A0SE-01 1 95680124 167057183 24886 0.0053 +TCGA-A1-A0SE-01 1 167057495 167059336 3 -1.0999 +TCGA-A1-A0SE-01 1 167059760 181602002 9213 -8e-04 +TCGA-A1-A0SE-01 1 181603120 181609567 6 -1.2009 +TCGA-A1-A0SE-01 1 181610685 201473647 12002 0.0055 +TCGA-A1-A0SE-01 1 201474400 201474544 2 -1.4235 +TCGA-A1-A0SE-01 1 201475220 247813706 29781 -4e-04 diff --git a/src/test/resources/incremental/copy_number_alteration/meta_cna_seg.txt b/src/test/resources/incremental/copy_number_alteration/meta_cna_seg.txt new file mode 100644 index 00000000..61d86a9a --- /dev/null +++ b/src/test/resources/incremental/copy_number_alteration/meta_cna_seg.txt @@ -0,0 +1,6 @@ +cancer_study_identifier: study_tcga_pub +genetic_alteration_type: COPY_NUMBER_ALTERATION +datatype: SEG +reference_genome_id: hg19 +description: Test somatic CNA data +data_filename: data_cna.seg diff --git a/tests/system_tests_import_data.py b/tests/system_tests_import_data.py index 05d0002f..9c30cdef 100755 --- a/tests/system_tests_import_data.py +++ b/tests/system_tests_import_data.py @@ -125,6 +125,8 @@ def test_incremental_load(self, run_java, locate_jar): '--meta', f'{data_directory}/meta_clinical_samples.txt', '--case-lists', f'{data_directory}/case_lists') gene_panel_matrix_call = call(*common_part, 'org.mskcc.cbio.portal.scripts.ImportGenePanelProfileMap', '--overwrite-existing', '--meta', f'{data_directory}/meta_gene_panel_matrix.txt', '--data', f'{data_directory}/data_gene_panel_matrix.txt', '--noprogress') + seg_call = call(*common_part, 'org.mskcc.cbio.portal.scripts.ImportCopyNumberSegmentData', '--overwrite-existing', + '--meta', f'{data_directory}/meta_cna_hg19_seg.txt', '--loadMode', 'bulkload', '--data', f'{data_directory}/data_cna_hg19.seg', '--noprogress') self.assertCountEqual(run_java.call_args_list, [ call(*common_part, 'org.mskcc.cbio.portal.util.VersionUtil',), @@ -139,6 +141,7 @@ def test_incremental_load(self, run_java, locate_jar): sv_call, timeline_call, gene_panel_matrix_call, + seg_call, case_list_call, ]) diff --git a/tests/test_data/study_es_0_inc/data_cna_hg19.seg b/tests/test_data/study_es_0_inc/data_cna_hg19.seg new file mode 100644 index 00000000..4c149a9c --- /dev/null +++ b/tests/test_data/study_es_0_inc/data_cna_hg19.seg @@ -0,0 +1,10 @@ +ID chrom loc.start loc.end num.mark seg.mean +TCGA-A2-A04P-01 1 3218610 95674710 53225 0.0055 +TCGA-A2-A04P-01 1 95676511 95676518 2 -1.6636 +TCGA-A2-A04P-01 1 95680124 167057183 24886 0.0053 +TCGA-A1-A0SB-01 1 167057495 167059336 3 -1.0999 +TCGA-A1-A0SB-01 1 167059760 181602002 9213 -8e-04 +TCGA-A1-A0SB-03 1 181603120 181609567 6 -1.2009 +TCGA-A1-A0SB-03 1 181610685 201473647 12002 0.0055 +TCGA-BH-NEW-01 1 201474400 201474544 2 -1.4235 +TCGA-BH-NEW-01 1 201475220 247813706 29781 -4e-04 diff --git a/tests/test_data/study_es_0_inc/data_structural_variants.txt b/tests/test_data/study_es_0_inc/data_structural_variants.txt index bd395c93..db82553f 100644 --- a/tests/test_data/study_es_0_inc/data_structural_variants.txt +++ b/tests/test_data/study_es_0_inc/data_structural_variants.txt @@ -1,10 +1,10 @@ Sample_Id Site1_Entrez_Gene_Id Site1_Hugo_Symbol Site1_Ensembl_Transcript_Id Site1_Region_Number Site1_Chromosome Site1_Position Site1_Region Site1_Description Site2_Entrez_Gene_Id Site2_Hugo_Symbol Site2_Ensembl_Transcript_Id Site2_Region_Number Site2_Chromosome Site2_Position Site2_Contig Site2_Region Site2_Description Site2_Effect_On_Frame NCBI_Build DNA_Support RNA_Support Normal_Read_Count Tumor_Read_Count Normal_Variant_Count Tumor_Variant_Count Normal_Paired_End_Read_Count Tumor_Paired_End_Read_Count Normal_Split_Read_Count Tumor_Split_Read_Count Annotation Breakpoint_Type Center Connection_Type Event_Info Class SV_Length Comments External_Annotation cbp_driver cbp_driver_annotation cbp_driver_tiers cbp_driver_tiers_annotation SV_Status StructVarNs.column1 StructVarNs2.lorem StructVarNs.column2 TCGA-BH-NEW NA PIEZO1 ENST00000242365 15 7 138536968 EXON PIEZO1-NCOA4.K16B10.COSF509_1 NA NCOA4 ENST00000288602 10 7 140482957 EXON PIEZO1-NCOA4.PIEZO1.COSF509_2 NA GRCh37 no yes NA 1000 NA 900 NA NA NA NA PIEZO1-NCOA4.K16B10.COSF509 NA NA NA Fusion NA NA Gain-of-Function COSMIC:COSF509 Putative_Driver Test driver Foo Class 4 Class annotation SOMATIC value1 ipsum value2 TCGA-BH-NEW NA KIAA1549 ENST00000242365 15 7 138536968 EXON KIAA1549-BRAF.K16B10.COSF509_1 NA BRAF ENST00000288602 10 7 140482957 EXON KIAA1549-BRAF.K16B10.COSF509_2 NA GRCh37 no yes NA 1000 NA 900 NA NA NA NA KIAA1549-BRAF.K16B10.COSF509 NA NA NA Fusion NA NA Gain-of-Function COSMIC:COSF509 Putative_Driver Test driver Class 4 Class annotation SOMATIC value1 ipsum value2 -TCGA-A1-A0SB-03 NA NCOA4 ENST00000344348 7 10 51582939 EXON NCOA4-RET.N7R12_1 NA RET ENST00000340058 12 10 43612031 EXON NCOA4-RET.N7R12_2 NA GRCh37 no yes NA 1001 NA 800 NA NA NA NA NCOA4-RET.N7R1 NA NA NA Fusion NA NA Gain-of-Function NA Putative_Passenger Test driver Class 3 Class annotation SOMATIC +TCGA-A1-A0SB-03 NA NCOA4 ENST00000344348 7 10 51582939 EXON NCOA4-RET.N7R12_1 NA RET ENST00000340058 12 10 43612031 EXON NCOA4-RET.N7R12_2 NA GRCh37 no yes NA 1001 NA 800 NA NA NA NA NCOA4-RET.N7R1 NA NA NA Fusion NA NA Gain-of-Function NA Putative_Passenger Test driver Class 3 Class annotation SOMATIC TCGA-BH-NEW NA EML4 ENST00000318522 6 2 42492091 EXON EML4-ALK.E6bA20.AB374362_1 NA ALK ENST00000389048 20 2 29446394 EXON EML4-ALK.E6bA20.AB374362_2 NA GRCh37 no yes NA 1002 NA 700 NA NA NA NA EML4-ALK.E6bA20.AB374362 NA NA NA Fusion NA NA Gain-of-Function GENBANK:AB374362 Putative_Driver Test driver Class 2 Class annotation SOMATIC TCGA-BH-NEW NA TMPRSS2 ENST00000332149 1 21 42880007 EXON TMPRSS2-ERG.T1E2.COSF23.1_1 NA ERG ENST00000442448 2 21 39956869 EXON TMPRSS2-ERG.T1E2.COSF23.1_2 NA GRCh37 no yes NA 1003 NA 600 NA NA NA NA TMPRSS2-ERG.T1E2.COSF23.1 NA NA NA Fusion NA NA Gain-of-Function COSMIC:COSF23 Unknown Test driver Class 1 Class annotation SOMATIC -TCGA-A1-A0SB-01 NA EGFR ENST00000275493 1 7 55087058 EXON EGFR-EGFR.E1E8.DelPositive.1_1 NA EGFR ENST00000275493 8 7 55223522 EXON EGFR-EGFR.E1E8.DelPositive.1_2 NA GRCh37 no yes NA 1004 NA 500 NA NA NA NA EGFR-EGFR.E1E8.DelPositive NA NA NA Fusion NA NA NA NA Putative_Driver Test driver Unknown Class annotation SOMATIC +TCGA-A1-A0SB-01 NA EGFR ENST00000275493 1 7 55087058 EXON EGFR-EGFR.E1E8.DelPositive.1_1 NA EGFR ENST00000275493 8 7 55223522 EXON EGFR-EGFR.E1E8.DelPositive.1_2 NA GRCh37 no yes NA 1004 NA 500 NA NA NA NA EGFR-EGFR.E1E8.DelPositive NA NA NA Fusion NA NA NA NA Putative_Driver Test driver Unknown Class annotation SOMATIC TCGA-BH-NEW NA ALK ENST00000389048 11 2 29497964 EXON ALK-PTPN3.A11P3_1 NA PTPN3 ENST00000374541 3 9 112219679 EXON ALK-PTPN3.A11P3_2 NA GRCh37 no yes NA 1005 NA 400 NA NA NA NA ALK-PTPN3.A11P3 NA NA NA Fusion NA NA NA NA NA NA NA NA SOMATIC TCGA-A1-A0SB-01 NA EML4 ENST00000318522 13 2 42522656 EXON EML4-ALK.E13A20.AB462411_1 NA ALK ENST00000389048 20 2 29446335 EXON EML4-ALK.E13A20.AB462411_2 NA GRCh37 no yes NA 1006 NA 300 NA NA NA NA EML4-ALK.E13A20 NA NA NA Fusion NA NA Gain-of-Function GENBANK:AB462411 NA NA NA NA SOMATIC TCGA-A1-A0SB-03 NA TMPRSS2 ENST00000455813 1 21 42870045 EXON TMPRSS2-ETV1.T1bE4_1 NA ETV1 ENST00000405358 4 7 14017105 EXON TMPRSS2-ETV1.T1bE4_2 NA GRCh37 no yes NA 1007 NA 200 NA NA NA NA TMPRSS2-ETV1.T1bE4 NA NA NA Fusion NA NA NA NA NA NA NA NA SOMATIC diff --git a/tests/test_data/study_es_0_inc/meta_cna_hg19_seg.txt b/tests/test_data/study_es_0_inc/meta_cna_hg19_seg.txt new file mode 100644 index 00000000..f17e1657 --- /dev/null +++ b/tests/test_data/study_es_0_inc/meta_cna_hg19_seg.txt @@ -0,0 +1,6 @@ +cancer_study_identifier: study_es_0 +genetic_alteration_type: COPY_NUMBER_ALTERATION +datatype: SEG +reference_genome_id: hg19 +description: Somatic CNA data (copy number ratio from tumor samples minus ratio from matched normals) from TCGA. +data_filename: data_cna_hg19.seg