forked from zimmerlab/MS-EmpiRe
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathexample.R
92 lines (69 loc) · 2.79 KB
/
example.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
library(msEmpiRe)
#for evaluation at the end
library(PerfMeas)
#to extract featureData from data, also used for evaluation
library(Biobase)
#########################
# DIFFERENTIAL ANALYSIS #
#########################
f <- system.file("extdata", "c1_c3.data", package = "msEmpiRe")
p <- system.file("extdata", "c1_c3.pdata", package = "msEmpiRe")
#loading data from installed data sets. The dataset is the yeast spike-in benchmarking data of O'Connell et al., as discussed in the paper
data <- msEmpiRe::read.standard(f, p,
prot.id.generator=function(pep) unlist(strsplit(pep, "\\."))[1],
signal_pattern="c.*rep.*")
#extract the first two conditions
conditions <- extract_conditions(data)
conditions <- conditions[, c(1,2)]
#removing peptides that are detected in less than 2 samples per condition
data <- msEmpiRe::filter_detection_rate(data, condition=conditions)
#normalize, system.time is just to measure the time of processing
system.time(data <- msEmpiRe::normalize(data))
#required if we want to reproduce the analysis
set.seed(1234)
#main analysis, system.time is just to measure the time of processing
system.time(result <- de.ana(data))
##############
# EVALUATION #
##############
#as the data we have analysed is a benchmarking set, with known fold changes applied to a subset of the proteins
#(see paper for more detail),
#we can now evaluate the differential analysis, i.e. extract measures like precision, recall etc.
#all proteins are labelled, wether they are actually changing ("true"), or not ("false")
#extracting the labels,aggregate per protein
tmp <- fData(data)
tmp$is_true <- sapply(tmp$is_true, function(val)
{if(val=="true"){return(1)}
return(0)})
print(head(tmp))
tmp <- aggregate(is_true ~ prot.id, tmp, sum)
tmp$is_true <- sapply(tmp$is_true, function(val)
{
if(val > 0)
{
return(1)
}
return(0)
})
#labels must have same order as result
rownames(tmp) <- tmp$prot.id
tmp <- tmp[as.character(result$prot.id), ]
result$label <- tmp$is_true
min_fc <- 0.35
scores <- cbind(colnames(result)[grep("p.val", colnames(result))], colnames(result)[grep("p.adj", colnames(result))])
#print out the scores of the evaluation
apply(scores, 1, function(score)
{
print(score)
tp <- sum( (result[, score[2]] <= 0.05) & (result$label==1) & (abs(result$log2FC) >= min_fc))
real <- sum( (result$label==1))
called <- sum( (result[, score[2]] <= 0.05) & (abs(result$log2FC) >= min_fc))
print(sprintf("PREC: %g", tp / called))
print(sprintf("REC: %g", tp / real))
auroc <- AUC.single(1.0 - result[, score[1]], result$label)
preccc <- list(precision.at.all.recall.levels(1.0 - result[, score[1]], result$label))
auprc <- AUPRC(preccc)
print(sprintf("AUROC: %g", auroc))
print(sprintf("AUPRC: %g", auprc))
print("----------")
})