-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathtsne_outliers.R
102 lines (70 loc) · 3.04 KB
/
tsne_outliers.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
##
## Script for detecting outliers in a t-SNE plot
##
load("../pbmc6k/data/seurat_33k_merged")
library(Seurat)
library(pbapply)
library(FNN)
tsne.neighbours <- function(data, max.distance = 1, cores=4) {
pb <- txtProgressBar(min = 0, max = nrow(data), style = 3)
for (i in 1:nrow(data)) {
# Calculate neighbours within max.distance
neighbours <- data$tSNE_1 > (data[i,"tSNE_1"] - max.distance) &
data$tSNE_1 < (data[i,"tSNE_1"] + max.distance) &
data$tSNE_2 > (data[i,"tSNE_2"] - max.distance) &
data$tSNE_2 < (data[i,"tSNE_2"] + max.distance)
data[i,"neighbours"] <- sum(neighbours) -1 # -1 to exclude current cell
setTxtProgressBar(pb, i)
}
close(pb)
return(data)
}
system.time(pbmc33k.tsne.rot <- tsne.neighbours(data = [email protected], max.distance = 1))
pbmc33k.tsne.rot[1:10,]
plot([email protected][email protected]$tSNE_1,
pch=19, cex=0.4, col = ifelse(pbmc33k.tsne.rot$neighbours < 20 ,'red','green'))
pbmc33k.tsne.rot[1:5,]
[email protected][1:200,1:2]
## WORKSPACE
snn.sum <- rowSums(as.matrix([email protected]))
plot(pbmc33k.tsne.rot$neighbours~snn.sum, pch = 19, cex = 0.4, col = ifelse(pbmc33k.tsne.rot$neighbours < 10 ,'red','green'))
t.cell.data <- FetchData(pbmc33k.merged, c("nUMI", "nGene", "percent.mito"), cells.use = WhichCells(pbmc33k.merged, ident = 13))
t.cell.outliers.data <- FetchData(pbmc33k.merged, c("nUMI", "nGene", "percent.mito"),
cells.use = WhichCells(pbmc33k.merged, cells.use = pbmc33k.tsne.rot$neighbours < 50, ident = 13))
all.cells <- FetchData(pbmc33k.merged, c("nUMI", "nGene", "percent.mito"))
outlier.data <- FetchData(pbmc33k.merged, c("nUMI", "nGene", "percent.mito"),
cells.use = WhichCells(pbmc33k.merged, cells.use = pbmc33k.tsne.rot$neighbours < 10))
hist(outlier.data$nUMI, breaks = 100)
hist(all.cells$nUMI, breaks = 100)
boxplot(outlier.data$nGene, all.cells$nGene)
row.1.not.null.snn <- [email protected][1,[email protected][1,] != 0]
row.1000.not.null.snn <- [email protected][1000,[email protected][1000,] != 0]
not.null.values.snn <- [email protected]
# rowSums(your.matrix != 0)
dim([email protected])
[email protected][1:10,1:10]
k.param <- 30
k.scale <- 25
k <- k.param*k.scale #750
genes.use <- [email protected]
data.use <- as.matrix([email protected][, 1:25])
my.knn <- get.knn(data = data.use, k = k)
my.knn$nn.dist[1:10,1:10]
my.knn$nn.index[1:10,1:10] #Euclidian distance
dim(my.knn$nn.index)
dim(my.knn$nn.dist)
n.cells <- nrow(data.use)
nn.ranked <- cbind(1:n.cells, my.knn$nn.index[, 1:(k.param-1)])
nn.large <- my.knn$nn.index
dim(nn.ranked)
nn.ranked[1:10,1:10]
nn.large[1:10,1:10]
length(row.1.not.null.snn)
length(row.1000.not.null.snn)
head(t.cell.data)
hist(t.cell.data$nUMI, breaks = 200)
hist(t.cell.outliers.data$nUMI, breaks = 200)
mean(t.cell.outliers.data$nUMI)
mean(t.cell.data$nUMI)
boxplot(t.cell.outliers.data$percent.mito, t.cell.data$percent.mito)