-
Notifications
You must be signed in to change notification settings - Fork 42
/
Copy pathanova.sh
executable file
·114 lines (93 loc) · 2.27 KB
/
anova.sh
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
#!/bin/bash
#set -x
set -e
set -u
usage()
{
cat <<EOF >&2
${txtcyn}
Usage:
$0 options${txtrst}
${bldblu}Function${txtrst}:
This script is used to do ANOVA analysis.
It accepts several types of input data.
1. Compute if there is significant difference for every compound in each two groups.
Data matrix
Compound Ita1 Ita2 Chi1 Chi2 Ger1 Ger2
pig 0 0 800 790.66089 200 200.49341
rabbit 283.05845 145.22119 1224.42165 501.76551 1176.32261 497.01935
monkey 5316.20028 5324.68285 8223.78233 6077.86028 10167.074 15420.62318
Group file (two column file, header is needed but their real name will be omitted)
Individual group
Ita1 Ita
Ita2 Ita
Ita3 Ita
Chi1 Chi
Chi2 Chi
Chi3 Chi
Ger1 Ger
Ger2 Ger
Ger3 Ger
The output would be
Compound Ger-Chi Ita-Chi Ita-Ger
monkey 0.177434068907696 0.733945335047997 0.0951758177204322
pig 0 0 0
rabbit 0.997701154807196 0.37525039314285 0.397819955522017
${txtbld}OPTIONS${txtrst}:
-f Data matrix ${bldred}[NECESSARY]${txtrst}
-g Group file. Normally the first column of group file
corresponds with the first column of data matrix to
specify the group information (order not matter).
${bldred}[NECESSARY]${txtrst}
EOF
}
file=
group=
header='TRUE'
while getopts "hf:g:" OPTION
do
case $OPTION in
h)
usage
exit 1
;;
f)
file=$OPTARG
;;
g)
group=$OPTARG
;;
?)
usage
exit 1
;;
esac
done
if [ -z $file ]; then
usage
exit 1
fi
cat <<END >${file}.annnovar.r
library(reshape2)
library(data.table, quietly=T)
test <- read.table("${file}", header=T, row.names=1, quote="")
test\$id <- rownames(test)
test_m <- melt(test, id.vars="id")
grp_file <- read.table("${group}", header=T, row.names = 1, quote="")
colnames(grp_file) <- c("group")
grp_file\$variable <- rownames(grp_file)
merged_data <- merge(test_m, grp_file, by="variable", all.x=TRUE)
all_result <- by(merged_data, INDICES=merged_data\$id, FUN=function(m) {
m <- droplevels(m)
ct_aov <- aov(value ~ group, data=m)
ct_hsd <- TukeyHSD(ct_aov)
result <- as.data.frame(ct_hsd\$group[,4, drop=F])
#colnames(result) <- paste0(unique(m\$id), "_", colnames(result))
colnames(result) <- unique(m\$id)
t(result)
})
result <- do.call(rbind, unname(all_result))
#result
write.table(result, file="${file}.anovar.xls", sep="\t", quote=F)
END
Rscript ${file}.annnovar.r