-
Notifications
You must be signed in to change notification settings - Fork 3
/
Copy pathTaskWatch.py
253 lines (210 loc) · 8.95 KB
/
TaskWatch.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
# -*- coding: utf-8 -*-
import matplotlib.pyplot as plt
import gc
import json
from scipy.stats import f_oneway
from scipy.stats import f as F
import te_f_oneway
class NodeLogcalls:
''' 参数为节点logcalls路径 '''
def __init__(self, path):
with open(path, "r") as f:
self._datas = f.read()
self._datas = self._datas.split('\n')
self._datas = self._datas[:-1] # 最后一行为空白,去掉
# 函数调用信息(self._data为全部的原始数据信息)
self.calls = list()
# logcall的元素为 (函数调用时间, 函数名)
self.logcalls = list()
# called_func 的元素为所有被调用的函数名,没有重复
self.called_func = list()
# 时间窗口集合
self.timewinds_logcalls = list()
self.timewindows_func_count = list()
'''
pure_calls从logcalls中提取所有被调用的函数,
不包含函数调用时间,
用来根据函数名计算函数调用次数
'''
self.pure_calls = list()
self._exec_trim()
self.called_func = self._calc_calledfunc(self.logcalls)
def _exec_trim(self):
''' 整理原始数据, 获得函数调用信息
'''
# i, totals = 0, len(self._datas)
self.calls = self._datas[:]
for k in self.calls:
line = k.split(' ')
self.logcalls.append((line[0], line[-1]))
for k, v in self.logcalls:
self.pure_calls.append(v)
def _calc_calledfunc(self, logcalls=None):
''' 计算被调用的函数
'''
# called_func 的元素为所有被调用的函数名,没有重复
tmp = list()
for e in logcalls:
if e[1] not in tmp:
tmp.append(e[1])
return tmp
def get_calledfunc(self):
''' 获得被调用的函数
'''
# called_func 的元素为所有被调用的函数名,没有重复
return self.called_func
def get_all_calledfunc_count(self, called_func, pure_calls):
''' 获得所有被调用函数以及被调用次数
'''
dict_f = {}
for f in called_func:
dict_f[f] = pure_calls.count(f)
return dict_f
def get_func_count(self, func_name):
''' 获得指定的函数被调用次数, 返回次数
'''
return self.pure_calls.count(func_name)
def get_funcs_count(self, *args):
''' 获得指定函数被调用次数, 返回函数名与次数
'''
foo = dict()
for e in args:
foo[e] = self.get_func_count(e)
def help_gc(self):
""" 帮助GC释放无用的内存, 否则数据太大就会崩掉的
"""
self._datas = None
self.calls = None
gc.collect()
def get_pure_func(self, logcalls):
""" 根据指定logcalls返回所有函数名
"""
tmp = list()
for k, v in self.logcalls:
tmp.append(v)
return tmp
def split_timewindow(self, time_gap):
""" 按时间窗分割数据
"""
self.timewinds_logcalls = list()
start_time = int(self.logcalls[1][0])
gap_start = start_time
cnt_start = 0
cnt = 0
for e in self.logcalls:
cnt += 1
if int(e[0]) > gap_start + time_gap:
cnt = cnt - 1 if cnt > 0 else cnt # 回退一个位置的数据为时间窗口区间最后一个数据
self.timewinds_logcalls.append(self.logcalls[cnt_start:cnt])
cnt_start = cnt
gap_start += time_gap
log_and_pure_calls = list()
for e in self.timewinds_logcalls:
log_and_pure_calls.append((self._calc_calledfunc(e),
self.get_pure_func(e)))
timewindows_func_count = list()
for e in log_and_pure_calls:
timewindows_func_count.append(
self.get_all_calledfunc_count(e[0], e[1]))
self.timewindows_func_count = timewindows_func_count
return timewindows_func_count
def get_statistic_timewind(self, func_name, time_gap):
''' param: func_name 函数/任务名
param: time_gap 时间窗口大小
返回指定函数依据时间窗口获得的统计量
ex: a1 = [12, 14, 16]
a2 = [13, 10, 20] 这种形式
'''
a = list()
for e in self.split_timewindow(time_gap):
if func_name in e:
a.append(e[func_name])
else:
a.append(0)
return a
def save_to_file(self, filename):
with open(filename, 'w') as f:
f.write(
json.dumps(
self.get_all_calledfunc_count(self.called_func,
self.pure_calls)))
# That function use the te_f_oneway as ANOVA analysis written by Zhang Te a nice people.
def get_route_related_fun(nodes, func_name, time_gap, alpha=0.05):
''' param: nodes 所有节点数据
param: func_name 要判断的函数/任务名
param: time_gap 统计的时间窗口大小(ms), 该参数决定样本容量
param: alpha 显著水平
return: True 经过方差分析, 多组数据之间的差异具有统计学意义,
即该函数的调用次数与组间因素(不同路由)的影响有关
False 反之
根据指定alpha返回某函数是否路由相关
该函数基于方差分析, 且没有进行方差齐检验
'''
all_statistic = list()
for e in nodes.values():
all_statistic.append(e.get_statistic_timewind(func_name, time_gap))
f = te_f_oneway.te_f_oneway(*all_statistic)
f_alpha = F.isf(alpha, len(all_statistic)-1, sum([len(e) for e in all_statistic])-len(all_statistic))
return True if f > f_alpha else False
""" That function use the f_oneway as ANOVA analysis provided by scipy library.
def get_route_related_fun(nodes, func_name, time_gap, alpha=0.05):
''' param: nodes 所有节点数据
param: func_name 要判断的函数/任务名
param: time_gap 统计的时间窗口大小(ms), 该参数决定样本容量
param: alpha 显著水平
return: True 经过方差分析, 多组数据之间的差异具有统计学意义,
即该函数的调用次数与组间因素(不同路由)的影响有关
False 反之
根据指定alpha返回某函数是否路由相关
该函数基于方差分析, 且没有进行方差齐检验
'''
all_statistic = list()
for e in nodes.values():
all_statistic.append(e.get_statistic_timewind(func_name, time_gap))
f, p = f_oneway(*all_statistic)
# print f, p
return True if p < alpha else False
"""
def get_axs(num=140):
''' 获得指定数量的子图, 子图数量总是4的倍数 31857
'''
axs = list()
for i in range(1, num / 4 + 1):
plt.figure(i)
for i in range(1, 5):
axs.append(plt.subplot(24 * 10 + i))
if num % 4 != 0:
plt.figure(num / 4 + 1)
for i in range(1, 5):
axs.append(plt.subplot(24 * 10 + i))
return axs
if __name__ == '__main__':
path = "/media/tete/F/TinyOS/wsnA/logcalls-logs/" # 数据路径
result_path = "C:/Users/Administrator/Desktop/tools/cooja/TaskCalls/result/5-result/" # 结果路径
node_amount = 2 #节点数量
nodes = dict() # 存储所有节点的NodeLogcalls实例
for i in range(1, node_amount + 1):
print str(i) + ' node has been created.'
nodes['node_' + str(i)] = NodeLogcalls(path + str(i) + '.txt')
#nodes['node_'+str(i)].help_gc()
#nodes['node_'+str(i)].save_to_file(result_path+'node_'+str(i)+'.txt')
""" 找出调用函数最多的节点, 以及调用函数的数量这样可
以在统计中覆盖到所有的函数,并且构造合适数量的子图
"""
most_func_node = {'node_1': 0}
most_func_node['node_1'] = len(nodes['node_1'].get_calledfunc())
for e in nodes:
foo = nodes[e].get_calledfunc()
if len(foo) > most_func_node[most_func_node.keys()[0]]:
most_func_node = {e: len(foo)}
print 'the number of total functions = ', most_func_node[
most_func_node.keys()[0]]
""" 获取调用函数最多节点的中被调用的所有函数 """
allfunc = list()
allfunc = nodes[most_func_node.keys()[0]].get_calledfunc()
""" x坐标为节点编号, 本例为(1 - 40)"""
x_nodes = [i for i in range(1, node_amount + 1)]
y_logcalls = list()
''' work prefect '''
for e in nodes['node_1'].called_func:
print get_route_related_fun(nodes, e, 500)