-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathRunNMF.R
69 lines (57 loc) · 2.8 KB
/
RunNMF.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
## This is the third script to run -- must be tweaked (uncommented here and there) to create a
## new NMF object. By default, it'll just load the .RData file described at the bottom.
# Factorizing the matrix:
# Libraries:
# install.packages("foreach")
# install.packages("doParallel")
# install.packages("NMF")
# install.packages("Biobase")
#
# # If this is throwing a warning or erroring out, and you're on an ubuntu box, run the following:
# #!> sudo apt-get install r-bioc-biobase
#
# library(foreach)
# library(doParallel)
# library(NMF)
# install.extras("NMF")
#
## ASSUME factormatrix_clean exists and is already prepped.
# Choose method according to input matrix
# Exploratory NMF Run, perofrming test runs to examine fit of sparse models of ranks 2-20:
# estim.r <- nmf(factormatrix_clean, 2:20, .opt = "v3Pt")
# save(estim.r, file='./outputs/estim.r.RData')
# plot(estim.r)
# consensusmap(estim.r)
# Random baselining NMF run:
# factormatrix_clean.random <- randomize(factormatrix_clean)
# estim.r.random <- nmf(factormatrix_clean.random, 2:20, .opt = "v3Pt")
# save(estim.r.random, file='./outputs/estim.r.random.RData')
#
# # Compare runs:
# plot(estim.r, estim.r.random)
# Run the full-scale estimate:
# factormatrix_clean.3rank <- nmf(factormatrix_clean, 3, .opt = ".v3Pt")
# save(factormatrix_clean.3rank, file='./outputs/factormatrix_clean.3rank.RData')
## Diagnostics:
# 1. check iterative progress ... want to see this dramatically flatten:
# plot(factormatrix_clean.3rank)
# 2. consensus map: within the chart, this should be pretty dramatically either red or blue: light coloration is an indicator of the model's ambiguity, or of mixed-ranking (which may be totally accurate for hte dataset):
# consensusmap(factormatrix_clean.3rank)
# 3. summary of model:
# summary(factormatrix_clean.3rank)
# summary(featureScore(factormatrix_clean.3rank))
# 4. fit evaluation:
# fit(factormatrix_clean.3rank)
# summary(fit(factormatrix_clean.3rank))
# summary(factormatrix_clean.3rank, target = factormatrix_clean)
# 5. Basis Map: Darker colors indicate higher coefficients, and the clusters on the left are colored to indicate the dominant rank within a cluster.
# basismap(factormatrix_clean.3rank)
# 6. Coefficient Map: (read the same as basis map)
# coefmap(factormatrix_clean.3rank)
# 7. Get products:
# write.csv(file = './outputs/test_matrix_3rank_basis.csv', x = basis(factormatrix_clean.3rank))
# write.csv(file = './outputs/test_matrix_3rank_coefs.csv', x = coef(factormatrix_clean.3rank))
# write.csv(file = './outputs/test_matrix_3rank_factor_predictions.csv', x = predict(factormatrix_clean.3rank, prob=TRUE))
# write.csv(file = './outputs/test_matrix_3rank_obs_predictions.csv', x = predict(factormatrix_clean.3rank, what="rows", prob=TRUE))
load("./outputs/factormatrix_clean.3rank.RData")
model.rank <- factormatrix_clean.3rank