-
Notifications
You must be signed in to change notification settings - Fork 5
/
Copy pathconsts.py
142 lines (123 loc) · 3.81 KB
/
consts.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
FILENAME_TEST_RESULT = 'result_test.txt'
FILENAME_SCORES = 'scores.txt'
FILENAME_BEST_MODEL = 'best_model.bin'
FILENAME_PARAMS = 'params.json'
VOCAB_FILES = {
'cangjie': 'tokenizers/cangjie_zh_22675.vocab',
'pinyin': 'tokenizers/pinyin_zh_22675.vocab',
'stroke': 'tokenizers/stroke_zh_22675.vocab',
'wubi': 'tokenizers/wubi_zh_22675.vocab',
'zhengma': 'tokenizers/zhengma_zh_22675.vocab',
'zhuyin': 'tokenizers/zhuyin_zh_22675.vocab',
'raw': 'tokenizers/raw_zh_22675.vocab',
'bert': 'tokenizers/bert_chinese_uncased_22675.vocab',
'pinyin_concat_wubi': 'tokenizers/pinyin_concat_wubi_22675.vocab',
}
# Returns a string template
VOCAB_FILES_CWS = {
'pinyin': 'cws_tokenizers/pinyin_cws_22675_{}.vocab',
'wubi': 'cws_tokenizers/wubi_cws_22675_{}.vocab',
}
VOCAB_FILES_NO_INDEX = {
'pinyin': 'tokenizers/pinyin_no_index_22675.vocab',
'wubi': 'tokenizers/wubi_no_index_22675.vocab',
}
VOCAB_FILES_SHUFFLED = {
'wubi': 'tokenizers/shuffled_wubi_22675.vocab',
'pinyin': 'tokenizers/shuffled_pinyin_22675.vocab',
}
TOKENIZER_TYPES = {
'cangjie': 'CommonZh',
'pinyin': 'CommonZh',
'stroke': 'CommonZh',
'wubi': 'CommonZh',
'zhengma': 'CommonZh',
'zhuyin': 'CommonZh',
'raw': 'RawZh',
'bert': 'BertZh',
'pinyin_no_index': 'CommonZhNoIndex',
'wubi_no_index': 'CommonZhNoIndex',
'pinyin_concat_wubi': 'PinyinConcatWubi',
}
MODEL_NAMES = [
'cangjie',
'pinyin',
'stroke',
'wubi',
'zhengma',
'zhuyin',
'raw',
'bert',
]
BEST_CKPTS = {
# 'cangjie': "ckpt_7202",
'cangjie': 'ckpt_8804',
'pinyin': "ckpt_8804",
'stroke': "ckpt_8804",
# "ckpt_7992" # wubi
'wubi': "ckpt_8804",
'zhengma': "ckpt_8804",
'zhuyin': "ckpt_7992",
# 'zhuyin': 'ckpt_8804',
# "ckpt_7202" # raw
'raw': "ckpt_8804",
'bert': "ckpt_8601", # bert
# cws
# "ckpt_7202" # cws_raw
'cws_raw': "ckpt_8804",
# "ckpt_7993" # cws_wubi
'cws_wubi': "ckpt_8804",
'cws_zhuyin': "ckpt_8804",
'pinyin_concat_wubi': 'ckpt_8840',
}
BEST_CKPTS_NO_INDEX = {
'pinyin': 'ckpt_8840',
'wubi': 'ckpt_8840',
}
BEST_CKPTS_SHUFFLED = {
'wubi': 'ckpt_8840',
'pinyin': 'ckpt_8840',
}
BEST_CKPTS_CWS = {
'pinyin': 'ckpt_8840',
'wubi': 'ckpt_8840',
}
BEST_CKPT_BYTE = 'ckpt_8840'
BEST_CKPT_RANDOM_INDEX = 'ckpt_8840'
DIR_CKPTS_BYTE = 'checkpoints/ckpts_byte_22675'
DIR_CKPTS_RANDOM_INDEX = 'checkpoints/ckpts_random_index_22675'
DIR_CKPT_SP = {
"bert": "checkpoints/checkpoints_bert_zh_22675",
"concat_sep": "checkpoints/checkpoints_concat_sep",
"raw": "checkpoints/checkpoints_raw_zh",
"wubi": "checkpoints/checkpoints_wubi_zh",
}
DIR_CKPTS = {
"cangjie": "checkpoints/checkpoints_cangjie_22675",
"pinyin": "checkpoints/checkpoints_pinyin_zh_22675",
"stroke": "checkpoints/checkpoints_stroke_22675",
"wubi": "checkpoints/checkpoints_wubi_zh_22675",
"zhengma": "checkpoints/checkpoints_zhengma_zh_22675",
"zhuyin": "checkpoints/checkpoints_zhuyin_zh_22675",
"raw": "checkpoints/checkpoints_raw_zh_22675",
"bert": "checkpoints/checkpoints_bert_zh_22675",
"pinyin_concat_wubi": "checkpoints/checkpoints_pinyin_concat_wubi",
}
DIR_CKPTS_CWS = {
# "raw": "checkpoints/checkpoints_cws_raw_zh_22675",
# "wubi": "checkpoints/checkpoints_cws_wubi_zh_22675",
# "zhuyin": "checkpoints/checkpoints_cws_zhuyin_zh_22675",
'pinyin': 'checkpoints/ckpts_pinyin_cws_22675',
'wubi': 'checkpoints/ckpts_wubi_cws_22675',
}
DIR_CKPTS_LONG = {
'raw': "checkpoints/checkpoints_raw_zh_long",
}
DIR_CKPTS_NO_INDEX = {
'pinyin': 'checkpoints/checkpoints_pinyin_no_index',
'wubi': 'checkpoints/checkpoints_wubi_no_index',
}
DIR_CKPTS_SHUFFLED = {
'wubi': 'checkpoints/checkpoints_shuffled_wubi',
'pinyin': 'checkpoints/checkpoints_shuffled_pinyin',
}