forked from EarthSystemDiagnostics/piccr
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathDataHandling.R
103 lines (68 loc) · 3.44 KB
/
DataHandling.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
##===================================================================================
## DATAHANDLING.R:
## FUNCTION TO CALCULATE MEAN AND POOLED STANDARD DEVIATION
## OF THE LAST THREE INJECTIONS OF THE SAMPLE DATA
##
## author: Thomas Muench ([email protected])
##===================================================================================
###########################################################
TakeMean_CalcPooledSD.sample_data <- function(sample_data){
###########################################################
#------------------------------------------------------------------------------------
# get sample data sorted by injections
## maximum injection number
max.inj_no=max(sample_data[,'injection.no'])
## create auxiliary variable containing the numeric data stored in 'sample_data'
## as a matrix
aux=sample_data
aux$sample_names <- NULL
aux=data.matrix(aux)
## concatenate all sample data into a vector corresponding to ascending injection no.s
cbind_injections.sample_data=c(sapply((1:max.inj_no),ReturnInjNoData,aux))
#------------------------------------------------------------------------------------
# partition sample data in a 3D array:
# -> rows = sample number; columns = data type (LineNo, InjNo, O18, H2);
# third dimension = injection number
## dimension of array
dim.sample_data.partitioned=
c(dim(aux)[1]/max.inj_no,dim(aux)[2],max.inj_no)
## partition data in array if file contains more than one sample in total
if (dim.sample_data.partitioned[1]!=1)
sample_data.array=array(cbind_injections.sample_data,dim.sample_data.partitioned)
#------------------------------------------------------------------------------------
# take mean of last three injections/calculate pooled standard deviation
if (dim.sample_data.partitioned[1]==1){
## file contains only one sample in total
## take mean - convert result to array for consistency
sample_data.MeanLast3Inj=array(colMeans(aux),dim=c(1,4))
## calculate pooled sd
pooled.sd=sqrt(apply(aux[,c('delta.O18','delta.H2')],2,var,na.rm=TRUE))
} else {
## get index range of the last three injections
range=((max.inj_no-2):max.inj_no)
## correct range if less than three injections were measured in total
if (max.inj_no<3) range=(1:max.inj_no)
## take mean
sample_data.MeanLast3Inj=apply(sample_data.array[,,range],c(1,2),mean,na.rm=TRUE)
## calculate pooled sd
pooled.sd=sqrt(colMeans(apply(sample_data.array[,c(3,4),range],c(1,2),var,na.rm=TRUE)))
}
## name columns again as these information has been lost in above process
colnames(sample_data.MeanLast3Inj) <- colnames(aux)
names(pooled.sd) <- c('delta.O18','delta.H2')
#------------------------------------------------------------------------------------
# reduce vector of sample names to length of mean sample data
sample_names.mean=
sample_data[which(sample_data[,'injection.no']==max.inj_no),'sample_names']
#------------------------------------------------------------------------------------
# convert data back to data.frame
sample_data=as.data.frame(sample_data.MeanLast3Inj)
sample_data$sample_names=sample_names.mean
## move sample_names back to first column - only for later file output
sample_data=sample_data[c(dim(sample_data)[2],1:(dim(sample_data)[2]-1))]
#------------------------------------------------------------------------------------
# return data
res=list(sample_data=sample_data,pooled.sd=pooled.sd)
return(res)
### END OF FUNCTION ###
}