-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathsubmit.py
143 lines (122 loc) · 6.48 KB
/
submit.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
# -*- coding:utf-8 -*-
import pandas as pd
import numpy as np
import torch
from torch.utils.data import Dataset, DataLoader
from tqdm import tqdm
from transformers import BertForMultipleChoice
from bertBaseDistribute import BertModelsCustom
import utils
import distribute_utils
from args import init_arg_parser
from DUMA import DUMA
import TerminalMultGPU
args = init_arg_parser()
Param = { # 训练的参数配置
'fold_num': 5, # 五折交叉验证
'seed': 42,
# 'model': 'hfl/chinese-bert-wwm-ext', #预训练模型
'max_len': 256, # 文本截断的最大长度
'epochs': 8,
'train_bs': 8, # batch_size,可根据自己的显存调整
'valid_bs': 8,
'lr': 2e-5, # 学习率
'num_workers': 4,
'accum_iter': 2, # 梯度累积,相当于将batch_size*2
'weight_decay': 1e-4, # 权重衰减,防止过拟合
'device': 0,
'path_data': '/home/zuoyuhui/DataGame/haihuai_RC/data/',
'model': '/home/zuoyuhui/DataGame/haihuai_RC/chinese-bert-wwm-ext',
}
def load_checkpoint(model, checkpoint_PATH, optimizer):
model_CKPT = torch.load(checkpoint_PATH)
model.load_state_dict(model_CKPT['state_dict'])
optimizer.load_state_dict(model_CKPT['optimizer'])
print('loading checkpoint!')
return model, optimizer
def preprocessing_df():
train_df = pd.read_csv(Param['path_data'] + 'train.csv')
test_df = pd.read_csv(Param['path_data'] + 'test.csv')
utils.seed_everything(Param['seed'])
train_df['label'] = train_df['Answer'].apply(lambda x: ['A', 'B', 'C', 'D'].index(x))
test_df['label'] = 0
return train_df, test_df
def test2submit(test_df, model, model_state_name, model_num, model_name, output_name):
device = torch.device('cuda')
test_set = utils.MyDataset(test_df)
test_loader = DataLoader(test_set, batch_size=Param['valid_bs'], collate_fn=TerminalMultGPU.collate_fn,
shuffle=False,
num_workers=Param['num_workers'])
model = model.to(device)
predictions = []
for fold in range(model_num): # 把训练后的五个模型挨个进行预测
y_pred = []
model.load_state_dict(torch.load(model_state_name.format(model_name.split('/')[-1], fold)),
strict=False)
print(model_name.split('/')[-1], fold)
with torch.no_grad():
tk = tqdm(test_loader, total=len(test_loader), position=0, leave=True)
for idx, (input_ids, attention_mask, token_type_ids, y) in enumerate(tk):
input_ids, attention_mask, token_type_ids, y = input_ids.to(device), attention_mask.to(
device), token_type_ids.to(device), y.to(device).long()
output = model(input_ids, attention_mask, token_type_ids)
output = output[0].cpu().numpy()
y_pred.extend(output)
predictions += [y_pred]
predictions = np.mean(predictions, 0).argmax(1) # 将结果按五折进行平均,然后argmax得到label
sub = pd.read_csv(Param['path_data'] + '/sample.csv', dtype=object)
sub['label'] = predictions
sub['label'] = sub['label'].apply(lambda x: ['A', 'B', 'C', 'D'][x])
sub.to_csv(output_name, index=False)
# chinese-bert-wwm-ext_fold_0.pt
if __name__ == '__main__':
train_df, test_df = preprocessing_df()
# train_df.to_csv(args.data_dir + "train_label.csv", index=False)
# train_df.to_csv(args.data_dir + "test_label.csv", index=False)
# test2submit(test_df, model_state_name="spawn_adv_{}_fold_{}.pt", model_num=3, model_name="chinese-bert-wwm-ext",
# output_name="sub_spawn_adv_fold3.csv")
"""头两折是pgd后面两折是fgm"""
# test2submit(test_df, model_state_name="spawn_adv_pgd_fgm_{}_fold_{}.pt", model_num=4,
# model_name="chinese-bert-wwm-ext",
# output_name="sub_spawn_adv_pgd_fgm_fold4.csv")
# test2submit(test_df, model_state_name="spawn_adv_pgd_fgm_{}_fold_{}.pt", model_num=2,
# model_name="chinese-bert-wwm-ext",
# output_name="sub_spawn_adv_pgd_fold2.csv")
# fgm bert 1e-5 single
# test2submit(test_df, model_state_name="spawn_adv_pgd_{}_fold_{}.pt", model_num=5,
# model_name="chinese-bert-wwm-ext",
# output_name="sub_spawn_adv_fgm_fold5.csv")
# test2submit(test_df, model_state_name="roformer/{}_fold_{}_roformer.pt", model_num=5,
# model_name="chinese_roformer_base",
# output_name="roformer_single_fold5.csv")
# bert ema fgm 10epoch
# test2submit(test_df, model_state_name="train/robert_fgm_early_{}_fold_{}.pt", model_num=5,
# model_name="chinese-bert-wwm-ext",
# output_name="bert_ema_10ep_fold5.csv")
# test2submit(test_df, model_state_name="spawn_adv_pgd_{}_fold_{}.pt", model_num=5,
# model_name="chinese-bert-wwm-ext",
# output_name="bert_adv_notr_fold5.csv")
bert_3linear = BertModelsCustom.BertForMultipleChoice.from_pretrained(Param['model'])
roberta = BertModelsCustom.BertForMultipleChoice.from_pretrained("/data2/roberta/hfl_chinese_roberta_wwm_ext")
bert_normal = BertModelsCustom.BertForMultipleChoice.from_pretrained(
"/home/zuoyuhui/DataGame/haihuai_RC/chinese-bert-wwm-ext")
bert_MHA = DUMA.BertForMultipleChoiceMHA(args, "/home/zuoyuhui/DataGame/haihuai_RC/chinese-bert-wwm-ext")
# 加三个全连接层
# test2submit(test_df, bert_3linear, model_state_name="spawn_3linear_{}_fold_{}.pt", model_num=5,
# model_name="chinese-bert-wwm-ext",
# output_name="bert_3linear_fold5_1.csv")
# 数据增强 第二折不收敛 加三个全连接层
# test2submit(test_df, bert_3linear, model_state_name="spawn_adv_pgd_{}_fold_{}.pt", model_num=4,
# model_name="chinese-bert-wwm-ext",
# output_name="bert_enhance1.csv")
# 五折不打乱 句子乱序
# test2submit(test_df, roberta, model_state_name="spawn_adv_pgd_{}_fold_{}.pt", model_num=4,
# model_name="hfl_chinese_roberta_wwm_ext",
# output_name="roberta_4flod.csv")
# pgd
# test2submit(test_df, bert_MHA, model_state_name="./DUMA/bert_pgd_mha_{}_fold_{}.pt", model_num=2,
# model_name="chinese-bert-wwm-ext",
# output_name="bert_pgd_mha_ema_flod2.csv")
test2submit(test_df, bert_normal, model_state_name="./addtest85_{}_fold_{}.pt", model_num=3,
model_name="chinese-bert-wwm-ext",
output_name="bert_pgd_addtest85.csv")