forked from McGill-NLP/llm2vec
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathMetaLlama3.json
29 lines (29 loc) · 942 Bytes
/
MetaLlama3.json
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
{
"model_name_or_path": "meta-llama/Meta-Llama-3-8B-Instruct",
"peft_model_name_or_path": "McGill-NLP/LLM2Vec-Meta-Llama-3-8B-Instruct-mntp",
"bidirectional": true,
"pooling_mode": "mean",
"dataset_name": "E5",
"dataset_file_path": "cache/echo-data",
"remove_unused_columns": false,
"learning_rate": 2e-4,
"num_train_epochs": 3,
"warmup_steps": 300,
"per_device_train_batch_size": 64,
"per_device_eval_batch_size": 64,
"gradient_accumulation_steps": 1,
"do_train": true,
"disable_tqdm": false,
"max_seq_length": 512,
"overwrite_output_dir": true,
"output_dir": "output/mntp-supervised/Meta-Llama-3-8B-Instruct",
"logging_steps": 50,
"save_steps": 200,
"save_only_model": true,
"stop_after_n_steps": 1000,
"lora_r": 16,
"gradient_checkpointing": true,
"torch_dtype": "bfloat16",
"attn_implementation": "flash_attention_2",
"seed": 42
}