forked from MBrooks313/R_codes
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathGOANA.R
70 lines (57 loc) · 2.93 KB
/
GOANA.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
GOANA <- function(gene.list, univ, species){
##This function performs Gene Ontology and KEGG enrichment analysis within the limma package. The user supplies a named
##list (or list-of-lists) of ENTREZ gene IDs for clusters (or gene subsets) and a vector of ENTREZ gene IDs to be used as
##a background list (expressed genes). Benjamini-Hochberg false discovery rate (FDR) is calculated. It exports a csv file of the complete enrichment analysis.
##This is the 1st of 3 programs for GO/KEGG enrichment.
##***WARNING*** This function takes several minutes to run depending on how many lists are being analyzed.
##This written by Matthew Brooks, last modified March 23rd, 2017
####USAGE####
# gene.list = list (or list-of-list) of ENTREZ gene IDS.
# univ = list of ENTREZ gene IDs for
# species = 'Hs', 'Mm', or 'Rn'
#
# Example:
# GOANA(gene.list = cl.list, univ=univ, species = "Hs")
#
# Output is a csv file for each item in gene.list ending in "_GOANA.csv"
#############
require(limma)
#Load the species database
if (species == "Hs") {require(org.Hs.eg.db); x<-as.list(org.Hs.egGO2ALLEGS); xx<-as.list(org.Hs.egSYMBOL2EG); k<-"hsa"}
if (species == "Mm") {require(org.Mm.eg.db); x<-as.list(org.Mm.egGO2ALLEGS); xx<-as.list(org.Mm.egSYMBOL2EG); k<-"mmu"}
if (species == "Rn") {require(org.Rn.eg.db); x<-as.list(org.Rn.egGO2ALLEGS); xx<-as.list(org.Rn.egSYMBOL2EG); k<-"rno"}
#if (species == "Dm") require()
#if (species == "Pt") require()
name.list <- names(gene.list) #for list of IDs
#Loop through each list given
for (i in 1:length(name.list)){
list <- gene.list[[i]] #for list of IDs
#Perform Gene Ontology enrichment
go <- goana(list, universe=univ, species=species)
go <- go[order(go$P.DE),]
rank.go <- 1:dim(go)[1]
#Perform Kegg pathway enrichment
kegg.g <- getGeneKEGGLinks(species.KEGG = k, convert = TRUE)
kegg.p <- getKEGGPathwayNames(species.KEGG = k, remove.qualifier = FALSE)
kg <- kegga(list, species=species, gene.pathway = kegg.g, pathway.names = kegg.p)
kg <- kg[order(kg$P.DE),]
rank.kg <- 1:dim(kg)[1]
#Calculate FDR
go$FDR <- go$P.DE*dim(go[go$DE!=0,])[1]/rank.go
kg$FDR <- kg$P.DE*dim(kg[kg$DE!=0,])[1]/rank.kg
#Get gene names of GO enrichment
for (j in 1:dim(go)[1]){
z <- gene.list[[i]][gene.list[[i]] %in% x[row.names(go[j,])][[1]]]
go$Genes[j] <- paste(sort(names(xx[match(z,xx)])), collapse=",")
}
#Get gene names of Kegg enrichment
for (j in 1:dim(kg)[1]){
z <- gsub(".+(\\d....)$", "\\1",row.names(kg[j,]), perl = T)
zz <- gene.list[[i]][gene.list[[i]] %in% kegg.g[grep(z, kegg.g$PathwayID),1]]
kg$Genes[j] <- paste(sort(names(xx[match(zz,xx)])), collapse=",")
}
#Write files
write.csv(go, file=paste(paste(name.list[i], "GOANA", sep="_"),".csv", sep=""))
write.csv(kg, file=paste(paste(name.list[i], "KGANA", sep="_"),".csv", sep=""))
}
}