From fb5c9960a0c8c62b9210afe7f8d22e0bd41fe027 Mon Sep 17 00:00:00 2001 From: Mustafa Eyceoz Date: Tue, 29 Oct 2024 11:14:58 -0400 Subject: [PATCH 1/4] Adding support for Dolomite + Granite model class Signed-off-by: Mustafa Eyceoz --- src/instructlab/training/main_ds.py | 5 ++++ src/instructlab/training/utils.py | 37 ++++++++++++++++------------- 2 files changed, 25 insertions(+), 17 deletions(-) diff --git a/src/instructlab/training/main_ds.py b/src/instructlab/training/main_ds.py index ab59282f..78068b7b 100644 --- a/src/instructlab/training/main_ds.py +++ b/src/instructlab/training/main_ds.py @@ -4,6 +4,7 @@ from copy import deepcopy from pathlib import Path import argparse +import json import math import os import re @@ -528,6 +529,10 @@ def main(args): tokenizer = setup_tokenizer(args.model_name_or_path, SPECIAL_TOKENS, CHAT_TEMPLATE) # device = torch.device("cuda", args.local_rank) + with open(Path(args.model_path) / "config.json") as conf_json: + model_conf = json.load(conf_json) + args.model_type = model_conf["model_type"] + #### distributed init ##### torch.cuda.set_device(int(os.environ["LOCAL_RANK"])) args.local_rank = int(os.environ["LOCAL_RANK"]) diff --git a/src/instructlab/training/utils.py b/src/instructlab/training/utils.py index d685d212..d6cef034 100644 --- a/src/instructlab/training/utils.py +++ b/src/instructlab/training/utils.py @@ -10,7 +10,6 @@ from typing import Any, List, Optional import importlib import inspect -import json import logging import os import random @@ -62,17 +61,10 @@ def check_valid_train_args(train_args: TrainingArgs): f"Provided path to model does not exist. Please make sure that you've passed a valid model and that it has appropriate permissions: {train_args.model_path}" ) - if train_args.use_dolomite: - with open(Path(train_args.model_path) / "config.json") as conf_json: - model_conf = json.load(conf_json) - if model_conf["model_type"] == "granite": - raise RuntimeError( - "Converting Granite models to Dolomite format is currently unsupported." - ) - if train_args.disable_flash_attn: - raise RuntimeError( - "ERROR: Trying to use dolomite padding-free transformer without flash attention is not supported" - ) + if train_args.use_dolomite and train_args.disable_flash_attn: + raise RuntimeError( + "ERROR: Trying to use dolomite padding-free transformer without flash attention is not supported" + ) if train_args.is_padding_free: print( @@ -802,10 +794,21 @@ def _get_state_dict_patched(model, unwrap=False): output_dir.mkdir(parents=True, exist_ok=True) if not model.module.config.architectures and convert_dolomite: - model.module.config.architectures = ["LlamaForCausalLM"] - warnings.warn( - f"Adding architectures to ckpt: {model.module.config.architectures}", - ) + arch_added = False + if args.model_type == "llama": + model.module.config.architectures = ["LlamaForCausalLM"] + arch_added = True + elif args.model_type == "granite": + model.module.config.architectures = ["GraniteForCausalLM"] + arch_added = True + if arch_added: + warnings.warn( + f"Adding architectures to ckpt: {model.module.config.architectures}", + ) + else: + warnings.warn( + f"Converting from dolomite, but no architecture field added to config.json", + ) model.module.config.to_json_file(output_config_file) tokenizer.save_pretrained(output_dir) @@ -834,7 +837,7 @@ def _get_state_dict_patched(model, unwrap=False): export_to_huggingface( pretrained_model_name_or_path=tmpdir.name, save_path=final_output_dir, - model_type="llama", + model_type=args.model_type, ) tmpdir.cleanup() From 2c00ef669b310956e1d48c81ec7aed1eaa6269de Mon Sep 17 00:00:00 2001 From: Mustafa Eyceoz Date: Tue, 29 Oct 2024 13:36:05 -0400 Subject: [PATCH 2/4] Incorrect arg fix Signed-off-by: Mustafa Eyceoz --- src/instructlab/training/main_ds.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/instructlab/training/main_ds.py b/src/instructlab/training/main_ds.py index 78068b7b..7910c341 100644 --- a/src/instructlab/training/main_ds.py +++ b/src/instructlab/training/main_ds.py @@ -529,7 +529,7 @@ def main(args): tokenizer = setup_tokenizer(args.model_name_or_path, SPECIAL_TOKENS, CHAT_TEMPLATE) # device = torch.device("cuda", args.local_rank) - with open(Path(args.model_path) / "config.json") as conf_json: + with open(Path(args.model_name_or_path) / "config.json") as conf_json: model_conf = json.load(conf_json) args.model_type = model_conf["model_type"] From 8852f48f577aa41dc848909ef6cfe7c920aa75ec Mon Sep 17 00:00:00 2001 From: Mustafa Eyceoz Date: Wed, 30 Oct 2024 11:23:47 -0400 Subject: [PATCH 3/4] Bring HF padding-free into dolomite parity Signed-off-by: Mustafa Eyceoz --- src/instructlab/training/utils.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/instructlab/training/utils.py b/src/instructlab/training/utils.py index d6cef034..41b410c7 100644 --- a/src/instructlab/training/utils.py +++ b/src/instructlab/training/utils.py @@ -221,7 +221,7 @@ def pad_collate_fn(batch): input_ids.extend(item["input_ids"].tolist()) labels.extend(item["labels"].tolist()) - position_ids.extend(range(total_len, total_len + item_len)) + position_ids.extend(range(item_len)) total_len += item_len num_loss_counted_tokens += (item["labels"] != -100).sum().item() From 2a9626fff10a4694832e70253746a25d3e72e3cc Mon Sep 17 00:00:00 2001 From: Mustafa Eyceoz Date: Fri, 1 Nov 2024 16:38:16 -0400 Subject: [PATCH 4/4] Update dolomite requirement Signed-off-by: Mustafa Eyceoz --- requirements.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/requirements.txt b/requirements.txt index 2d77d1e1..84da8460 100644 --- a/requirements.txt +++ b/requirements.txt @@ -17,7 +17,7 @@ numba numpy>=1.23.5,<2.0.0 ; python_version == '3.10' numpy>=1.26.4,<2.0.0 ; python_version != '3.10' rich -instructlab-dolomite>=0.1.1 +instructlab-dolomite>=0.2.0 trl>=0.9.4 peft pydantic>=2.7.0