-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathdata_explore.py
74 lines (63 loc) · 2.42 KB
/
data_explore.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
#!/usr/bin/env python
# _*_coding:utf-8_*_
"""
@Time : 2020/8/28 9:35
@Author : ji hao ran
@File : data_explore.py
@Project : waterGame
@Software : PyCharm
"""
import seaborn as sns
from utils import *
# 数据
train_set, valid_set1, valid_set2, valid_set3 = data_preprocessing()
# 图表输出主路径
output_path = 'E:/waterGame/explore/'
# 描述统计
train_set.describe().to_csv(output_path + 'describe/train_set.csv')
valid_set1.describe().to_csv(output_path + 'describe/valid_set1.csv')
valid_set2.describe().to_csv(output_path + 'describe/valid_set2.csv')
valid_set3.describe().to_csv(output_path + 'describe/valid_set3.csv')
# 时序图
for year in range(2013, 2018):
for var in ['Qi', 'T', 'w', 'Rsum', 'D1', 'D2', 'D3', 'D4', 'D5']:
plt.switch_backend('agg')
plt.figure(figsize=(16, 8))
title = '_'.join(['train_set', str(year), var])
plt.plot(train_set.loc[train_set.index.year == year, var], label=var)
plt.legend()
plt.title(title)
plt.savefig(output_path + 'plot/train_set/' + title + '.png')
plt.close()
for data, name in zip([valid_set1, valid_set2, valid_set3], ['valid_set1', 'valid_set2', 'valid_set3']):
for var in ['Qi', 'T', 'w', 'Rsum', 'D1', 'D2', 'D3', 'D4', 'D5']:
plt.switch_backend('agg')
plt.figure(figsize=(16, 8))
title = '_'.join([name, var])
plt.plot(data.loc[:, var], label=var)
plt.legend()
plt.title(title)
plt.savefig(output_path + 'plot/' + name + '/' + title + '.png')
plt.close()
# 盒形图
# 直方图
# 散点图矩阵
# sns.pairplot(train_set, hue='rainy_season', markers=["o", "s"])
# sns.pairplot(valid_set1, hue='rainy_season', markers=["o", "s"])
# sns.pairplot(valid_set2, hue='rainy_season', markers=["o", "s"])
# sns.pairplot(valid_set3, hue='rainy_season', markers=["o", "s"])
# 相关性矩阵图
# 相关性矩阵
train_set_corr = train_set.corr()
train_set_corr.to_csv(output_path + 'corr/train_set.csv')
valid_set1_corr = valid_set1.corr()
valid_set1_corr.to_csv(output_path + 'corr/valid1_set.csv')
valid_set2_corr = valid_set2.corr()
valid_set2_corr.to_csv(output_path + 'corr/valid2_set.csv')
valid_set3_corr = valid_set3.corr()
valid_set3_corr.to_csv(output_path + 'corr/valid3_set.csv')
# 绘图
sns.heatmap(train_set_corr, annot=True)
sns.heatmap(valid_set1_corr, annot=True)
sns.heatmap(valid_set2_corr, annot=True)
sns.heatmap(valid_set3_corr, annot=True)