forked from Castelo-Branco-lab/Meijer_Agirre_scATACEAE_2020
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathmultiome_preproc_EAECtrl.R
141 lines (100 loc) · 4.27 KB
/
multiome_preproc_EAECtrl.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
library(Signac)
library(Seurat)
library(EnsDb.Mmusculus.v79)
library(BSgenome.Mmusculus.UCSC.mm10)
library(tidyverse)
set.seed(1234)
#combine together the peak and control that worked 1004 and 1005
###analysis sepparatelly ;
# the 10x hdf5 file contains both data types.
inputdata.10x_CP <- Read10X_h5("outs/filtered_feature_bc_matrix.h5")
# extract RNA and ATAC data
rna_counts_CP <- inputdata.10x_CP$`Gene Expression`
atac_counts_CP <- inputdata.10x_CP$Peaks
# Create Seurat object
EAE_CP <- CreateSeuratObject(counts = rna_counts_CP)
EAE_CP[["percent.mt"]] <- PercentageFeatureSet(EAE_CP, pattern = "^mt-")
#QC atac
grange.counts <- StringToGRanges(rownames(atac_counts_CP), sep = c(":", "-"))
grange.use <- seqnames(grange.counts) %in% standardChromosomes(grange.counts)
atac_counts_CP <- atac_counts_CP[as.vector(grange.use), ]
annotations <- GetGRangesFromEnsDb(ensdb = EnsDb.Mmusculus.v79)
seqlevelsStyle(annotations) <- 'UCSC'
genome(annotations) <- "mm10"
frag.file <- "atac_fragments.tsv.gz"
chrom_assay <- CreateChromatinAssay(
counts = atac_counts_CP,
sep = c(":", "-"),
genome = 'mm10',
fragments = frag.file,
min.cells = 10,
annotation = annotations
)
EAE_CP[["ATAC"]] <- chrom_assay
DefaultAssay(EAE_CP) <- "ATAC"
EAE_CP <- NucleosomeSignal(EAE_CP)
EAE_CP <- TSSEnrichment(EAE_CP)
EAE_CP.subset <- subset(
x = EAE_CP,
subset = nCount_ATAC < 100000 &
nCount_RNA < 25000 &
nCount_ATAC > 500 &
nCount_RNA > 500 &
nucleosome_signal < 2 &
TSS.enrichment > 1 &
percent.mt < 20
)
##peak calling atac
DefaultAssay(EAE_CP.subset) <- "ATAC"
peaks <- CallPeaks(EAE_CP.subset, macs2.path = "/home/eneritz/anaconda3/bin/macs2")
# remove peaks on nonstandard chromosomes and in genomic blacklist regions
peaks <- keepStandardChromosomes(peaks, pruning.mode = "coarse")
peaks <- subsetByOverlaps(x = peaks, ranges = blacklist_mm10, invert = TRUE)
# quantify counts in each peak
macs2_counts <- FeatureMatrix(
fragments = Fragments(EAE_CP.subset),
features = peaks,
cells = colnames(EAE_CP.subset)
)
# create a new assay using the MACS2 peak set and add it to the Seurat object
EAE_CP.subset[["peaks"]] <- CreateChromatinAssay(
counts = macs2_counts,
fragments = frag.file,
annotation = annotations
)
###########################
##############################harmony
# RNA analysis
DefaultAssay(EAE_CP.subset) <- "RNA"
#EAE_CP.subset <- SCTransform(EAE_CP.subset, verbose = FALSE) %>% RunPCA() %>% RunUMAP(dims = 1:50, reduction.name = 'umap.rna', reduction.key = 'rnaUMAP_')
library(harmony)
EAE_CP.subset <- NormalizeData(EAE_CP.subset) %>% FindVariableFeatures() %>% ScaleData() %>% RunPCA(verbose = FALSE)
EAE_CP.subset <- RunHarmony(EAE_CP.subset, group.by.vars = "sample")
EAE_CP.subset <- RunUMAP(EAE_CP.subset, reduction = "harmony", dims = 1:30 , reduction.name = "umap.harmony", reduction.key = 'rnaUMAP_')
EAE_CP.subset <- FindNeighbors(EAE_CP.subset, reduction = "harmony", dims = 1:30) %>% FindClusters()
# ATAC analysis
# We exclude the first dimension as this is typically correlated with sequencing depth
DefaultAssay(EAE_CP.subset) <- "peaks"
EAE_CP.subset <- RunTFIDF(EAE_CP.subset)
EAE_CP.subset <- FindTopFeatures(EAE_CP.subset, min.cutoff = 'q0')
EAE_CP.subset <- RunSVD(EAE_CP.subset)
EAE_CP.subset <- RunHarmony(
object = EAE_CP.subset,
group.by.vars = 'sample',
reduction = 'lsi',
assay.use = 'ATAC',
project.dim = FALSE,
reduction.save="harmony.atac"
)
##not integrated
#DefaultAssay(EAE_CP.subset) <- "peaks"
#EAE_CP.subset <- RunTFIDF(EAE_CP.subset)
#EAE_CP.subset <- FindTopFeatures(EAE_CP.subset, min.cutoff = 'q0')
#EAE_CP.subset <- RunSVD(EAE_CP.subset)
#EAE_CP.subset <- RunUMAP(EAE_CP.subset, reduction = 'lsi', dims = 2:30, reduction.name = "umap.atac_nonint", reduction.key = "atacnonintUMAP_")
#integrated
EAE_CP.subset <- RunUMAP(EAE_CP.subset, reduction = 'harmony.atac', dims = 2:30, reduction.name = "umap.atac", reduction.key = "atacharm_")
#multimodal neighbors:
EAE_CP.subset <- FindMultiModalNeighbors(EAE_CP.subset, reduction.list = list("harmony", "harmony.atac"), dims.list = list(1:30, 2:30))
EAE_CP.subset <- RunUMAP(EAE_CP.subset, nn.name = "weighted.nn", reduction.name = "wnn.umap", reduction.key = "wnnUMAP_")
EAE_CP.subset <- FindClusters(EAE_CP.subset, graph.name = "wsnn", algorithm = 3, verbose = FALSE)