forked from mddunlap924/PyTorch-LLM
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathtrain-1.yaml
94 lines (81 loc) · 1.64 KB
/
train-1.yaml
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
# YAML file listing config. parameters
# Paths
paths:
data:
base_dir: ../data
data: cfpb_partial.csv
debug_data: cfpb_debug.csv
partial: cfpb.csv
save_results:
apply_model: False # Save model weights [boolean: True/False]
apply_metric: True # Save performance metrics [boolean: True/False]
base_dir: ../logs
# DEBUG [True or False]; if False it will load the debug_data
# Use for pipeline development
debug: False
# DATA
data_info:
source_fields:
- Consumer complaint narrative
- ZIP code
- Sub-issue
target: Product
# Stratification Technique
stratify:
technique: stratified_kfold
# Cross-Validation Folds
cv:
num_folds: 5
val_folds: [1, 2] #[list of integers] (start counting at 1)
# Preprocessing
preprocessing:
apply_techniques:
- LabelEncoder
LabelEncoder:
fields:
- Product
OneHotEncoder:
fields:
- Product
# Model and Tokenizer
model_tokenizer:
base_dir: ../hf_download
name: bert-base-uncased
# Model
model:
freeze:
apply: True
# Number of layers to freeze starting from layer 1
num_layers: 10
# Custom LLM Pooling
mean_pooling:
apply: True
# Gradient checkpointing
gradient_checkpointing: False
# Tokenizer parameters
tokenizer:
abbreviations:
- Null
add_special_tokens: True
max_length: 512
padding: True
truncation: True
return_tensors: pt
# Optimizer
optimizer:
name: AdamW
lr:
max: 1.0E-4
# Learning Rate Scheduler
lr_scheduler:
name: CosineAnnealingLR
OneCycleLR:
pct_start: 0.1
CosineAnnealingLR:
eta_min: 1.0E-5
# Model Tuning
epochs: 10
batch_size: 16
num_workers: 8
eval_metric:
name: loss