Skip to content

Commit

Permalink
changed distance function to push samples with no data to outside
Browse files Browse the repository at this point in the history
this solution should push samples that have no data to the outer edge of
the resulting hierarchy (and of the resulting oncoprint heatmap)

- also added missing LICENSE for clustering js library

- added support for clustering (sorting) on one track only
  • Loading branch information
pieterlukasse authored and jjgao committed Jun 23, 2017
1 parent 4bfe6b7 commit 5d74d3b
Show file tree
Hide file tree
Showing 2 changed files with 86 additions and 9 deletions.
28 changes: 28 additions & 0 deletions OPEN-SOURCE-DOCUMENTATION
Original file line number Diff line number Diff line change
Expand Up @@ -136,6 +136,7 @@ [email protected].
* FileSaver.min
* Font Awesome (CSS)
* Font Awesome (Fonts)
* clusterfck.min.js


ant-1.7.0
Expand Down Expand Up @@ -19686,3 +19687,30 @@ INCLUDING ANY GENERAL, SPECIAL, INDIRECT, INCIDENTAL, OR CONSEQUENTIAL
DAMAGES, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
FROM, OUT OF THE USE OR INABILITY TO USE THE FONT SOFTWARE OR FROM
OTHER DEALINGS IN THE FONT SOFTWARE.

clusterfck.min.js
-----------------
From: https://github.com/tayden/clusterfck

Available under license:

Copyright (c) 2011 Heather Arthur <[email protected]>

Permission is hereby granted, free of charge, to any person obtaining
a copy of this software and associated documentation files (the
"Software"), to deal in the Software without restriction, including
without limitation the rights to use, copy, modify, merge, publish,
distribute, sublicense, and/or sell copies of the Software, and to
permit persons to whom the Software is furnished to do so, subject to
the following conditions:

The above copyright notice and this permission notice shall be
included in all copies or substantial portions of the Software.

THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
67 changes: 58 additions & 9 deletions portal/src/main/webapp/js/src/clustering/clustering-worker.js
Original file line number Diff line number Diff line change
Expand Up @@ -44,6 +44,20 @@ onmessage = function(m) {
postMessage(result);
}

/**
* Returns false if any value is a valid number != 0.0,
* and true otherwise.
*/
var isAllNaNs = function(values) {
for (var i = 0; i < values.length; i++) {
var val = values[i];
if (!isNaN(val) && val != null && val != 0.0 ) {
return false;
}
}
return true;
}

/**
* Distance measure using 1-spearman's correlation. This function does expect that item1 and item2
* are an item than contains a item.preProcessedValueList attribute which is the ranked version
Expand All @@ -54,11 +68,26 @@ var preRankedSpearmanDist = function(item1, item2) {
//take the arrays from the preProcessedValueList:
var ranks1 = item1.preProcessedValueList;
var ranks2 = item2.preProcessedValueList;
var item1AllNaNs = isAllNaNs(item1.orderedValueList);
var item2AllNaNs = isAllNaNs(item2.orderedValueList);
//rules for NaN values:
if (item1AllNaNs && item2AllNaNs) {
//return distance 0
return 0;
}
else if (item1AllNaNs || item2AllNaNs) {
//return large distance:
return 3;
}
//calculate spearman's rank correlation coefficient, using pearson's distance
//for correlation of the ranks:
var r = jStat.corrcoeff(ranks1, ranks2);
var r = jStat.corrcoeff(ranks1, ranks2);
if (isNaN(r)) {
r = 0; //will result in same distance as no correlation //TODO - calculate correlation only on items where there is data...?
//assuming the ranks1 and ranks2 lists do not contain NaN entries (and this code DOES assume all missing values have been imputed by a valid number),
//this specific scenario should not occur, unless all values are the same (and given the same rank). In this case, there is no variation, and
//correlation returns NaN. In theory this could happen on small number of entities being clustered. We give this a large distance:
console.log("NaN in correlation calculation");
r = -2;
}
return 1 - r;
}
Expand All @@ -68,7 +97,7 @@ var preRankedSpearmanDist = function(item1, item2) {
* It will pre-calculate ranks and deviation and store this in inputItems[x].preProcessedValueList.
* This pre-calculation significantly improves the performance of the clustering step itself.
*/
var _prepareForAndGetDistanceFunction = function(inputItems) {
var _prepareForDistanceFunction = function(inputItems) {
//pre-calculate ranks and configure to use last step of SPEARMAN as distance function:
for (var i = 0; i < inputItems.length; i++) {
var inputItem = inputItems[i];
Expand Down Expand Up @@ -110,14 +139,34 @@ var hclusterCases = function(casesAndEntitites) {
refEntityList = getRefList(caseObj);
}
for (var j = 0; j < refEntityList.length; j++) {
var entityId = refEntityList[j];
var value = caseObj[entityId];
inputItem.orderedValueList.push(value);
}
var entityId = refEntityList[j];
var value = caseObj[entityId];
inputItem.orderedValueList.push(value);
}
inputItems.push(inputItem);
}
}
_prepareForAndGetDistanceFunction(inputItems);
if (refEntityList.length == 1) {
//this is a special case, where the "clustering" becomes a simple sorting in 1 dimension:
//so, just sort and return inputItems:
inputItems.sort(function (i1, i2) {
var val1 = i1.orderedValueList[0];
var val2 = i2.orderedValueList[0];
//ensure NaNs are moved out (NaN or null which are seen here as equivalents to NA (not available)) to the end of the list:
val1 = (val1 == null || isNaN(val1) ? Number.MAX_VALUE : val1);
val2 = (val2 == null || isNaN(val2) ? Number.MAX_VALUE : val2);
if (val1 > val2) {
return 1;
}
else if (val1 < val2) {
return -1;
}
return 0;
});
return inputItems;
}
//else, normal clustering:
_prepareForDistanceFunction(inputItems);
var clusters = clusterfck.hcluster(inputItems, preRankedSpearmanDist);
return clusters.clusters(1)[0];
}
Expand Down Expand Up @@ -167,7 +216,7 @@ var hclusterGeneticEntities = function(casesAndEntitites) {
}
inputItems.push(inputItem);
}
_prepareForAndGetDistanceFunction(inputItems);
_prepareForDistanceFunction(inputItems);
var clusters = clusterfck.hcluster(inputItems, preRankedSpearmanDist);
return clusters.clusters(1)[0];
}

0 comments on commit 5d74d3b

Please sign in to comment.