Skip to content

Commit

Permalink
Add files via upload
Browse files Browse the repository at this point in the history
  • Loading branch information
bsml320 authored Sep 20, 2022
1 parent fc4e703 commit a2da795
Show file tree
Hide file tree
Showing 4 changed files with 486 additions and 1 deletion.
189 changes: 189 additions & 0 deletions code/source/util/QuantileNormalization.java
Original file line number Diff line number Diff line change
@@ -0,0 +1,189 @@
package util;

import java.util.*;

import org.apache.commons.math3.distribution.NormalDistribution;

public class QuantileNormalization {
public static double[][] quantilenormalize(double[][] rawData) {


//Calculate the average expression, when per sample all raw expression levels have been ordered:
int probeCount = rawData.length;
int sampleCount = rawData[probeCount-1].length;
System.out.println("Performing quantile normalization for "+probeCount + " rows and " +sampleCount+" columns");
double[][] qn_data = new double[probeCount][sampleCount];

double[] rankedMean = new double[probeCount];
for (int sampleID=0; sampleID<sampleCount; sampleID++) {
double[] x = new double[probeCount];

for (int probeID=0; probeID<probeCount; probeID++) {
x[probeID] = rawData[probeID][sampleID];
}
java.util.Arrays.sort(x);

for (int probeID=0; probeID<probeCount; probeID++) {
rankedMean[probeID] += x[probeID];
}
}

for (int probeID=0; probeID<probeCount; probeID++) {
rankedMean[probeID]/=(double) sampleCount;
}

//Iterate through each sample: skip s=0 because this is the column for reference. Won't be used any way.
for (int s=0; s<sampleCount; s++) {
double[] probes = new double[probeCount];
for (int p=0; p<probeCount; p++) {
probes[p]=rawData[p][s];
}

double[] probesRanked = rankify.rankify(probes);

double[] probesQuantileNormalized = new double[probeCount];
for (int p=0; p<probeCount; p++) {
probesQuantileNormalized[p] = rankedMean[ (int)probesRanked[p] - 1 ];
}
for (int p=0; p<probeCount; p++) {
qn_data[p][s] = (float) probesQuantileNormalized[p];
}
}

return(qn_data);
}

public static double[][] quantilenormalize_2cn(double[] refData, double[] rawData) {

// Calculate the average expression, when per sample all raw expression levels
// have been ordered:
int probeCount = rawData.length;
int sampleCount = 2;
System.out.println(
"Performing quantile normalization for " + probeCount + " rows and " + sampleCount + " columns");
double[][] qn_data = new double[probeCount][sampleCount];

double[] rankedMean = new double[probeCount];

java.util.Arrays.sort(refData);
java.util.Arrays.sort(rawData);
for (int probeID = 0; probeID < probeCount; probeID++) {
rankedMean[probeID] = rankedMean[probeID] + refData[probeID] + rawData[probeID];
}

for (int probeID = 0; probeID < probeCount; probeID++) {
rankedMean[probeID] /= (double) sampleCount;
}

// Iterate through each sample: skip s=0 because this is the column for
// reference. Won't be used any way.
double[] probes = new double[probeCount];
for (int p = 0; p < probeCount; p++) {
probes[p] = rawData[p];
}

double[] probesRanked = rankify.rankify(probes);

double[] probesQuantileNormalized = new double[probeCount];
for (int p = 0; p < probeCount; p++) {
probesQuantileNormalized[p] = rankedMean[(int) probesRanked[p] - 1];
}
for (int p = 0; p < probeCount; p++) {
qn_data[p][1] = (float) probesQuantileNormalized[p];
}

return(qn_data);
}

public static int[] rank_double_array(double[] original) {
int N = original.length;
int[] ranked = new int[N];

// create an empty TreeMap
Map<Double, Integer> map = new TreeMap<>();

// store (element, index) pair in TreeMap
for (int i = 0; i < N; i++) {
map.put(original[i], i);
}
// keys are stored in sorted order in TreeMap

// rank starts from 1
int rank = 0;

// iterate through the map and replace each element by its rank

for (Map.Entry<Double, Integer> entry : map.entrySet()) {
ranked[entry.getValue()] = rank++;
}
System.out.println("total rank: "+rank);
return(ranked);
}

public static int[] rank_double_array_2(double[] original) {
int N = original.length;
int[] ranked = new int[N];

for(int i=0; i<N; i++) {
int count = 0;
for(int j=0; j<N; j++) {
if(j==i)continue;
if(original[j] < original[i])count++;
}
ranked[i] = count;
}

return(ranked);
}


public static double[] generate_normal_distribution(int N) {
double[] norm = new double[N];
Random r = new Random();

for(int i=0; i<N; i++) {
norm[i] = r.nextGaussian();
}

double sd = 0, mean = 0;
for(int i=0; i<N; i++) {
mean = mean + norm[i];
}
mean = mean/N;
for(int i=0; i<N; i++) {
sd = sd + Math.pow(norm[i] - mean, 2);
}
sd = Math.sqrt(sd/(N-1) );
System.out.println("Random scores: n = "+N+", mean: "+String.format("%6.4e",mean)+", sd = "+String.format("%6.4e",sd));

return(norm);
}

public static double[] generate_standard_normal_distribution(int N) {
double[] norm = new double[N];

NormalDistribution d = new NormalDistribution ();
for(int i=0; i<N; i++) {
double di = i;
double p_rank = di/N;
if(i == 0)p_rank = 0.5/N;
norm[i] = d.inverseCumulativeProbability(p_rank);
if(i==0)System.out.println(N+"\t"+p_rank+"\t"+norm[i]);
}

//System.out.println(d.inverseCumulativeProbability(0.05));

double sd = 0, mean = 0;
for(int i=0; i<N; i++) {
mean = mean + norm[i];
}
mean = mean/N;
for(int i=0; i<N; i++) {
sd = sd + Math.pow(norm[i] - mean, 2);
}
sd = Math.sqrt(sd/(N-1) );
System.out.println("Random scores: n = "+N+", mean: "+String.format("%6.4e",mean)+", sd = "+String.format("%6.4e",sd));

return(norm);
}
}
143 changes: 143 additions & 0 deletions code/source/util/RandomizeNode.java
Original file line number Diff line number Diff line change
@@ -0,0 +1,143 @@
package util;

import java.util.Arrays;
import java.util.Hashtable;
import java.util.Random;
import java.util.Vector;

public class RandomizeNode {
public static Hashtable<String, Double> randomize_node (Hashtable<String, Double> nodeHash){
Object[] nodes_in_G = nodeHash.keySet().toArray();
Random r = new Random();
double[] random_index_for_Y = new double[nodeHash.size()];
double[] random_node_score = new double[nodeHash.size()];
Hashtable<String, Double> random_nodeHash = new Hashtable<String, Double>();
for(int i=0; i<nodeHash.size(); i++){
random_index_for_Y[i] = r.nextDouble();
random_node_score[i] = (double)nodeHash.get(nodes_in_G[i]);
}

// bubble sort random_index_for_Y, so that random_Y is randomized
boolean swapped = true; int jj = 0; double tmp = 0;
while (swapped) {
swapped = false;
jj++;
for (int i = 0; i < nodeHash.size() - jj; i++) {
if (random_index_for_Y[i] < random_index_for_Y[i + 1]) {
tmp = random_index_for_Y[i];
random_index_for_Y[i] = random_index_for_Y[i + 1];
random_index_for_Y[i + 1] = tmp;

tmp = random_node_score[i];
random_node_score[i] = random_node_score[i + 1];
random_node_score[i + 1] = tmp;

swapped = true;
}
}
} // end of bubble sort

for(int k=0; k<nodes_in_G.length; k++){
random_nodeHash.put((String)nodes_in_G[k], random_node_score[k]);
}

return(random_nodeHash);
}

public static Hashtable<String, Double> randomize_node_by_group (Hashtable<String, Double> nodeHash, Hashtable<String, Integer> groupHash){
Hashtable<String, Double> random_nodeHash = new Hashtable<String, Double>();

Object[] nodes_in_G = groupHash.keySet().toArray();
Hashtable<Integer, Vector<String>> group_label = new Hashtable<Integer, Vector<String>>();
for(int i=0; i<groupHash.size(); i++){
if(!nodeHash.containsKey(nodes_in_G[i]))continue;
int group = groupHash.get(nodes_in_G[i]);
if(group_label.containsKey(group)) {
Vector<String> vec = group_label.get(group);
vec.add( (String)nodes_in_G[i]);
group_label.put(group, vec);
} else {
Vector<String> vec = new Vector<String>();
vec.add( (String)nodes_in_G[i]);
group_label.put(group, vec);
}
}

Object[] unique_group_label = group_label.keySet().toArray();
Arrays.sort(unique_group_label);
for(int i=0; i<unique_group_label.length; i++) {
int cur_group_label = (int) unique_group_label[i];
Vector<String> vec = group_label.get(cur_group_label);
int cur_group_size = vec.size();
//System.out.println(cur_group_label+"\t"+cur_group_size);

Random r = new Random();
double[] random_index_for_Y = new double[cur_group_size];
double[] random_node_score = new double[cur_group_size];
for(int j=0; j<cur_group_size; j++){
random_index_for_Y[j] = r.nextDouble();
random_node_score[j] = (double)nodeHash.get(vec.get(j));
}

// bubble sort random_index_for_Y, so that random_Y is randomized
boolean swapped = true; int jj = 0; double tmp = 0;
while (swapped) {
swapped = false;
jj++;
for (int i1 = 0; i1 < cur_group_size - jj; i1++) {
if (random_index_for_Y[i1] < random_index_for_Y[i1 + 1]) {
tmp = random_index_for_Y[i1];
random_index_for_Y[i1] = random_index_for_Y[i1 + 1];
random_index_for_Y[i1 + 1] = tmp;

tmp = random_node_score[i1];
random_node_score[i1] = random_node_score[i1 + 1];
random_node_score[i1 + 1] = tmp;

swapped = true;
}
}
} // end of bubble sort

for(int j=0; j<cur_group_size; j++){
random_nodeHash.put((String)vec.get(j), random_node_score[j]);
}
}

return(random_nodeHash);
}

public static double[] randomize_edge (double[] edge_weights){
int n_edges = edge_weights.length;
Random r = new Random();
double[] random_index_for_Y = new double[n_edges];
double[] random_edge_score = new double[n_edges];
for(int i=0; i<n_edges; i++){
random_index_for_Y[i] = r.nextDouble();
random_edge_score[i] = edge_weights[i];
}

// bubble sort random_index_for_Y, so that random_Y is randomized
boolean swapped = true; int jj = 0; double tmp = 0;
while (swapped) {
swapped = false;
jj++;
for (int i = 0; i < n_edges - jj; i++) {
if (random_index_for_Y[i] < random_index_for_Y[i + 1]) {
tmp = random_index_for_Y[i];
random_index_for_Y[i] = random_index_for_Y[i + 1];
random_index_for_Y[i + 1] = tmp;

tmp = random_edge_score[i];
random_edge_score[i] = random_edge_score[i + 1];
random_edge_score[i + 1] = tmp;

swapped = true;
}
}
} // end of bubble sort

return(random_edge_score);
}

}
Loading

0 comments on commit a2da795

Please sign in to comment.