Skip to content

Commit

Permalink
Minor improvments. Apply PRs feedback
Browse files Browse the repository at this point in the history
  • Loading branch information
forus committed Apr 30, 2024
1 parent e785a53 commit c87f63f
Show file tree
Hide file tree
Showing 5 changed files with 72 additions and 74 deletions.
7 changes: 4 additions & 3 deletions src/main/java/org/mskcc/cbio/portal/dao/DaoMutation.java
Original file line number Diff line number Diff line change
Expand Up @@ -67,6 +67,8 @@
public final class DaoMutation {
public static final String NAN = "NaN";
private static final String MUTATION_COUNT_ATTR_ID = "MUTATION_COUNT";
public static final String DELETE_ALTERATION_DRIVER_ANNOTATION = "DELETE from alteration_driver_annotation WHERE GENETIC_PROFILE_ID=? and SAMPLE_ID=?";
public static final String DELETE_MUTATION = "DELETE from mutation WHERE GENETIC_PROFILE_ID=? and SAMPLE_ID=?";

public static int addMutation(ExtendedMutation mutation, boolean newMutationEvent) throws DaoException {
if (!MySQLbulkLoader.isBulkLoad()) {
Expand Down Expand Up @@ -1499,13 +1501,12 @@ public static void deleteAllRecordsInGeneticProfileForSample(long geneticProfile
ResultSet rs = null;
try {
con = JdbcUtil.getDbConnection(DaoMutation.class);
// TODO Move it to another class?
pstmt = con.prepareStatement("DELETE from alteration_driver_annotation WHERE GENETIC_PROFILE_ID=? and SAMPLE_ID=?");
pstmt = con.prepareStatement(DELETE_ALTERATION_DRIVER_ANNOTATION);
pstmt.setLong(1, geneticProfileId);
pstmt.setLong(2, internalSampleId);
pstmt.executeUpdate();

pstmt = con.prepareStatement("DELETE from mutation WHERE GENETIC_PROFILE_ID=? and SAMPLE_ID=?");
pstmt = con.prepareStatement(DELETE_MUTATION);
pstmt.setLong(1, geneticProfileId);
pstmt.setLong(2, internalSampleId);
pstmt.executeUpdate();
Expand Down
8 changes: 5 additions & 3 deletions src/main/java/org/mskcc/cbio/portal/dao/DaoSampleList.java
Original file line number Diff line number Diff line change
Expand Up @@ -42,7 +42,9 @@
*/
public class DaoSampleList {

/**
public static final String DELETE_SAMPLE_LIST_LIST = "DELETE FROM sample_list_list WHERE `LIST_ID` = ?";

/**
* Adds record to sample_list table.
*/
public int addSampleList(SampleList sampleList) throws DaoException {
Expand All @@ -67,7 +69,7 @@ public int addSampleList(SampleList sampleList) throws DaoException {
int listListRow = addSampleListList(sampleList.getCancerStudyId(), listId, sampleList.getSampleList(), con);
rows = (listListRow != -1) ? (rows + listListRow) : rows;
} else {
throw new SQLException("Creating sample list failed, no ID obtained.");
throw new DaoException("Creating sample list failed, no ID obtained.");
}
}
} catch (SQLException e) {
Expand Down Expand Up @@ -253,7 +255,7 @@ public void updateSampleListList(SampleList sampleList) throws DaoException {
PreparedStatement pstmt = null;
try {
con = JdbcUtil.getDbConnection(DaoSampleList.class);
pstmt = con.prepareStatement("DELETE FROM sample_list_list WHERE `LIST_ID` = ?");
pstmt = con.prepareStatement(DELETE_SAMPLE_LIST_LIST);
pstmt.setInt(1, sampleList.getSampleListId());
pstmt.executeUpdate();

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -43,7 +43,6 @@
import java.util.regex.*;
import org.apache.commons.collections4.map.MultiKeyMap;

//TODO Remove MIXED_ATTRIBUTES data type https://github.com/cBioPortal/cbioportal-core/issues/31
public class ImportClinicalData extends ConsoleRunnable {

public static final String DELIMITER = "\t";
Expand Down Expand Up @@ -104,6 +103,10 @@ public static enum AttributeTypes
{
PATIENT_ATTRIBUTES("PATIENT"),
SAMPLE_ATTRIBUTES("SAMPLE"),
/**
* We want to encourage use patient or sample files instead, not mixed ones.
* See https://github.com/cBioPortal/cbioportal-core/issues/31
*/
@Deprecated
MIXED_ATTRIBUTES("MIXED");

Expand Down Expand Up @@ -664,17 +667,13 @@ public void run() {
attributesDatatype = properties.getProperty("datatype");
cancerStudyStableId = properties.getProperty("cancer_study_identifier");
}
if( options.has ( attributeFlag ) )
{
if (options.has(attributeFlag)) {
attributesDatatype = "MIXED_ATTRIBUTES";
}
if( options.has ( relaxedFlag ) )
{
if (options.has(relaxedFlag)) {
relaxed = true;

}
if( options.has ( overWriteExistingFlag ) )
{
if (options.has(overWriteExistingFlag)) {
overwriteExisting = true;

}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -66,8 +66,7 @@ public static void importSampleList(File dataFile) throws IOException, DaoExcept
boolean itemsAddedViaPatientLink = false;
// construct sample id list
ArrayList<String> sampleIDsList = new ArrayList<String>();
List<String> sampleIds = caseList.getSampleIds();
for (String sampleId : sampleIds) {
for (String sampleId : caseList.getSampleIds()) {
sampleId = StableIdUtil.getSampleId(sampleId);
Sample s = DaoSample.getSampleByCancerStudyAndSampleId(theCancerStudy.getInternalId(), sampleId);
if (s==null) {
Expand All @@ -87,7 +86,7 @@ public static void importSampleList(File dataFile) throws IOException, DaoExcept
} else if (!sampleIDsList.contains(s.getStableId())) {
sampleIDsList.add(s.getStableId());
} else {
ProgressMonitor.logWarning("Warning: duplicated sample ID "+s.getStableId()+" in case list "+caseList.getStableId());
ProgressMonitor.logWarning("Warning: duplicated sample ID " + s.getStableId() + " in case list " + caseList.getStableId());
}
}

Expand All @@ -111,7 +110,7 @@ public static void importSampleList(File dataFile) throws IOException, DaoExcept

ProgressMonitor.setCurrentMessage(" --> stable ID: " + sampleList.getStableId());
ProgressMonitor.setCurrentMessage(" --> sample list name: " + sampleList.getName());
ProgressMonitor.setCurrentMessage(" --> number of samples in file: " + sampleIds.size());
ProgressMonitor.setCurrentMessage(" --> number of samples in file: " + caseList.getSampleIds().size());
String warningSamplesViaPatientLink = (itemsAddedViaPatientLink? "(nb: can be higher if samples were added via patient link)" : "");
ProgressMonitor.setCurrentMessage(" --> number of samples stored in final sample list " + warningSamplesViaPatientLink + ": " + sampleIDsList.size());
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -31,8 +31,19 @@
import org.mskcc.cbio.portal.util.ProgressMonitor;
import org.mskcc.cbio.portal.validate.CaseListValidator;

import java.io.*;
import java.util.*;
import java.io.BufferedReader;
import java.io.File;
import java.io.FileInputStream;
import java.io.FileReader;
import java.io.IOException;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.LinkedHashMap;
import java.util.LinkedHashSet;
import java.util.List;
import java.util.Map;
import java.util.Objects;
import java.util.Set;
import java.util.stream.Collectors;

public class UpdateCaseListsSampleIds extends ConsoleRunnable {
Expand All @@ -41,8 +52,8 @@ public class UpdateCaseListsSampleIds extends ConsoleRunnable {
private File dataFile;
private List<File> caseListFiles = List.of();
private String cancerStudyStableId;
private Map<String, Set<String>> caseListSampleIdToSampleIds = new LinkedHashMap<>();
private DaoSampleList daoSampleList = new DaoSampleList();
private final Map<String, Set<String>> caseListSampleIdToSampleIds = new LinkedHashMap<>();
private final DaoSampleList daoSampleList = new DaoSampleList();
private LinkedHashSet<String> allSampleIds;

public UpdateCaseListsSampleIds(String[] args) {
Expand All @@ -56,7 +67,6 @@ public void run() {
parseArguments();
readStudyIdAndDataFileFromMetaFile();
this.allSampleIds = readSampleIdsFromDataFile(this.dataFile);
// TODO has the all case list always to exist?
this.caseListSampleIdToSampleIds.put(cancerStudyStableId + "_all", this.allSampleIds);
Map<String, Set<String>> readCaseListSampleIds = readCaseListFiles();
this.caseListSampleIdToSampleIds.putAll(readCaseListSampleIds);
Expand All @@ -65,7 +75,7 @@ public void run() {

private Map<String, Set<String>> readCaseListFiles() {
LinkedHashMap<String, Set<String>> result = new LinkedHashMap<>();
for (File caseListFile: this.caseListFiles) {
for (File caseListFile : this.caseListFiles) {
CaseList caseList = CaseListReader.readFile(caseListFile);
CaseListValidator.validateIdFields(caseList);
String cancerStudyIdentifier = caseList.getCancerStudyIdentifier();
Expand All @@ -87,34 +97,32 @@ private Map<String, Set<String>> readCaseListFiles() {
}

private void updateCaseLists(Map<String, Set<String>> caseListSampleIdToSampleIds) {
// TODO Do we really have to do this? Is there a better way?
DaoCancerStudy.reCacheAll();
try {
for (Map.Entry<String, Set<String>> caseListStableIdToSampleIds: caseListSampleIdToSampleIds.entrySet()) {
for (Map.Entry<String, Set<String>> caseListStableIdToSampleIds : caseListSampleIdToSampleIds.entrySet()) {
String caseListStableId = caseListStableIdToSampleIds.getKey();
Set<String> sampleIds = caseListStableIdToSampleIds.getValue();
Set<String> uploadedSampleIds = caseListStableIdToSampleIds.getValue();
SampleList sampleList = daoSampleList.getSampleListByStableId(caseListStableId);
if (sampleList == null) {
throw new RuntimeException("No case list with " + caseListStableId + " stable id is found");
}
LinkedHashSet<String> newCaseListSampleIds = new LinkedHashSet<>(sampleIds);
newCaseListSampleIds.addAll(sampleList.getSampleList());
ArrayList<String> newSampleArrayList = new ArrayList<>(newCaseListSampleIds);
sampleList.setSampleList(newSampleArrayList);
//TODO no need to run expensive db update if sampleList hasn't effectively changed
daoSampleList.updateSampleListList(sampleList);
LinkedHashSet<String> newCaseListSampleIds = new LinkedHashSet<>(uploadedSampleIds);
if (newCaseListSampleIds.addAll(sampleList.getSampleList())) {
sampleList.setSampleList(new ArrayList<>(newCaseListSampleIds));
daoSampleList.updateSampleListList(sampleList);
}
}
CancerStudy cancerStudy = DaoCancerStudy.getCancerStudyByStableId(this.cancerStudyStableId);
List<SampleList> sampleLists = daoSampleList.getAllSampleLists(cancerStudy.getInternalId());
List<SampleList> remainingLists = sampleLists.stream().filter(sl ->
!caseListSampleIdToSampleIds.containsKey(sl.getStableId()) && sl.getSampleList().stream().anyMatch(this.allSampleIds::contains)
).collect(Collectors.toList());
for (SampleList remainingList: remainingLists) {
).toList();
for (SampleList remainingList : remainingLists) {
ArrayList<String> newSampleList = new ArrayList<>(remainingList.getSampleList());
newSampleList.removeAll(this.allSampleIds);
remainingList.setSampleList(newSampleList);
//TODO for optimization purpose we could supply to the update method 2 set of samples: samples that have to be added and samples that have to be removed
daoSampleList.updateSampleListList(remainingList);
if (newSampleList.removeAll(this.allSampleIds)) {
remainingList.setSampleList(newSampleList);
daoSampleList.updateSampleListList(remainingList);
}
}
} catch (DaoException e) {
throw new RuntimeException(e);
Expand All @@ -123,40 +131,29 @@ private void updateCaseLists(Map<String, Set<String>> caseListSampleIdToSampleId

private LinkedHashSet<String> readSampleIdsFromDataFile(File dataFile) {
LinkedHashSet<String> allSampleIds = new LinkedHashSet<>();
FileReader reader = null;
try {
reader = new FileReader(dataFile);
try (BufferedReader buff = new BufferedReader(reader)) {
String line;
int sampleIdPosition = -1;
while ((line = buff.readLine()) != null) {
String trimmedLine = line.trim();
if (trimmedLine.isEmpty() || trimmedLine.startsWith("#")) {
continue;
}
try (FileReader reader = new FileReader(dataFile);
BufferedReader buff = new BufferedReader(reader)) {
String line;
int sampleIdPosition = -1;
while ((line = buff.readLine()) != null) {
String trimmedLine = line.trim();
if (trimmedLine.isEmpty() || trimmedLine.startsWith("#")) {
continue;
}

String[] fieldValues = line.split("\t");
String[] fieldValues = line.split("\t");
if (sampleIdPosition == -1) {
sampleIdPosition = List.of(fieldValues).indexOf("SAMPLE_ID");
if (sampleIdPosition == -1) {
sampleIdPosition = List.of(fieldValues).indexOf("SAMPLE_ID");
if (sampleIdPosition == -1) {
throw new RuntimeException("No SAMPLE_ID header is found");
}
} else {
allSampleIds.add(fieldValues[sampleIdPosition].trim());
throw new RuntimeException("No SAMPLE_ID header is found");
}
} else {
allSampleIds.add(fieldValues[sampleIdPosition].trim());
}
return allSampleIds;
}
} catch (Exception e) {
return allSampleIds;
} catch (IOException e) {
throw new RuntimeException(e);
} finally {
if (reader != null) {
try {
reader.close();
} catch (IOException e) {
throw new RuntimeException(e);
}
}
}
}

Expand All @@ -178,15 +175,15 @@ private void parseArguments() {
String description = "Updates (adds/removes) sample ids in specified case lists.";

OptionParser parser = new OptionParser();
OptionSpec<String> metaOpt = parser.accepts( "meta",
"clinical sample (genetic_alteration_type=CLINICAL and datatype=SAMPLE_ATTRIBUTES or datatype=MIXED_ATTRIBUTES) meta data file. All sample ids found in the file will be added to the _all case list." ).withRequiredArg().required().describedAs( "meta_clinical_sample.txt" ).ofType( String.class );
OptionSpec<String> caseListDirOrFileOpt = parser.accepts( "case-lists",
"case list file or a directory with case list files" ).withRequiredArg().describedAs( "case_lists/" ).ofType( String.class );
OptionSpec<String> metaOpt = parser.accepts("meta",
"clinical sample (genetic_alteration_type=CLINICAL and datatype=SAMPLE_ATTRIBUTES or datatype=MIXED_ATTRIBUTES) meta data file. All sample ids found in the file will be added to the _all case list.").withRequiredArg().required().describedAs("meta_clinical_sample.txt").ofType(String.class);
OptionSpec<String> caseListDirOrFileOpt = parser.accepts("case-lists",
"case list file or a directory with case list files").withRequiredArg().describedAs("case_lists/").ofType(String.class);

try {
OptionSet options = parser.parse( args );
OptionSet options = parser.parse(args);
this.metaFile = new File(options.valueOf(metaOpt));
if(options.has(caseListDirOrFileOpt)){
if (options.has(caseListDirOrFileOpt)) {
File caseListDirOrFile = new File(options.valueOf(caseListDirOrFileOpt));
if (caseListDirOrFile.isDirectory()) {
this.caseListFiles = Arrays.stream(Objects.requireNonNull(caseListDirOrFile.listFiles()))
Expand All @@ -207,7 +204,7 @@ private void parseArguments() {
/**
* Runs the command as a script and exits with an appropriate exit code.
*
* @param args the arguments given on the command line
* @param args the arguments given on the command line
*/
public static void main(String[] args) {
ConsoleRunnable runner = new UpdateCaseListsSampleIds(args);
Expand Down

0 comments on commit c87f63f

Please sign in to comment.