-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathXCMSConsensusXML.R
99 lines (86 loc) · 4 KB
/
XCMSConsensusXML.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
ConsensusXMLToXCMS<-function(consensusXMLFile=NA)
{
library(XML)
xmlfile=xmlParse(consensusXMLFile)
xmltop = xmlRoot(xmlfile)
xmlSize(xmltop)
mapListIndex<-as.numeric(which(xmlSApply(xmltop,xmlName)=="mapList"))
mapNamesAndIDs<-c()
for(j in 1:xmlSize(xmltop[[mapListIndex]]))
{
mapID<-as.numeric(xmlAttrs(xmltop[[mapListIndex]][[j]])["id"])+1
mapPath<-as.character(xmlAttrs(xmltop[[mapListIndex]][[j]])["name"])
mapName<-basename(mapPath)
mapName<-tools::file_path_sans_ext(mapName)
mapNamesAndIDs<-rbind(mapNamesAndIDs,data.frame(ID=mapID,path=mapPath,name=mapName))
}
consensusElementListIndex<- as.numeric(which(xmlSApply(xmltop,xmlName)=="consensusElementList"))
numberOfConsensusElements<-xmlSize(xmltop[[consensusElementListIndex]])
matrixOfConsensus<-matrix(nrow=numberOfConsensusElements,ncol=8)
rownames(matrixOfConsensus)<-1:numberOfConsensusElements
colnames(matrixOfConsensus)<-c("mzmed","mzmin","mzmax","rtmed","rtmin","rtmax","npeaks","featureXMLXCMS")
listOfConsensusLink<-list()
numberOfSubElements<-0
for(i in 1:numberOfConsensusElements)
{
consensusElement<-xmltop[[consensusElementListIndex]][[i]]
groupedElementListIndex<-as.numeric(which(xmlSApply(consensusElement,xmlName)=="groupedElementList"))
numberOfSubElements<-numberOfSubElements+xmlSize(consensusElement[[groupedElementListIndex]])
}
matrixOfsubElements<-matrix(nrow=numberOfSubElements,ncol=11)
rownames(matrixOfsubElements)<-1:numberOfSubElements
colnames(matrixOfsubElements)<-c("mz","mzmin","mzmax","rt","rtmin","rtmax",
"into","intb","maxo","sn","sample")
matrixOfsubElementsCounter<-1
for(i in 1:numberOfConsensusElements)
{
consensusElement<-xmltop[[consensusElementListIndex]][[i]]
centroidIndex<-as.numeric(which(xmlSApply(consensusElement,xmlName)=="centroid"))
consensusRT<-as.numeric(xmlAttrs(consensusElement[[centroidIndex]])["rt"])
consensusMZ<-as.numeric(xmlAttrs(consensusElement[[centroidIndex]])["mz"])
consensusIT<-as.numeric(xmlAttrs(consensusElement[[centroidIndex]])["it"])
minMZ<-NA
maxMZ<-NA
minRT<-NA
maxRT<-NA
SubElementsIndex<-as.numeric(which(xmlSApply(consensusElement,xmlName)=="groupedElementList"))
numberOfSubElementstmp<-xmlSize(consensusElement[[groupedElementListIndex]])
subElementsLinkIndex<-c()
for(j in 1:numberOfSubElementstmp)
{
subElementAttr<-xmlAttrs(consensusElement[[SubElementsIndex]][[j]])
sample<-as.numeric(subElementAttr["map"])+1
subRT<-as.numeric(subElementAttr["rt"])
subMZ<-as.numeric(subElementAttr["mz"])
subIT<-as.numeric(subElementAttr["it"])
minMZ<-min(c(minMZ,subMZ),na.rm = T)
maxMZ<-max(c(minMZ,subMZ),na.rm = T)
minRT<-min(c(minRT,subRT),na.rm = T)
maxRT<-max(c(minRT,subRT),na.rm = T)
matrixOfsubElements[matrixOfsubElementsCounter,]<-
c(subMZ,subMZ,subMZ,subRT,subRT,subRT,subIT,subIT,subIT,1,sample)
subElementsLinkIndex<-c(subElementsLinkIndex,matrixOfsubElementsCounter)
matrixOfsubElementsCounter<-matrixOfsubElementsCounter+1
}
matrixOfConsensus[i,]<-c(consensusMZ,minMZ,maxMZ,consensusRT,minRT,maxRT,numberOfSubElementstmp,2)
listOfConsensusLink[[i]]<-subElementsLinkIndex
}
###### building RT
rt<-list()
for(sm in unique(matrixOfsubElements[,"sample"]))
{
rt[[sm]]<- sort(unique(matrixOfsubElements[matrixOfsubElements[,"sample"]==sm,"rt"]))
}
outputXCMS<-new("xcmsSet")
outputXCMS@peaks<-matrixOfsubElements
outputXCMS@groups<-matrixOfConsensus
outputXCMS@groupidx<-listOfConsensusLink
phenoData<-data.frame(class=rep(1,nrow(mapNamesAndIDs)))
rownames(phenoData)<-as.character(mapNamesAndIDs[,"name"])
outputXCMS@phenoData<-phenoData
outputXCMS@rt$raw<-rt
outputXCMS@rt$corrected<-rt
outputXCMS@peaks[,"mz"]
outputXCMS@filepaths<-as.character(mapNamesAndIDs[,"path"])
return(outputXCMS)
}