diff --git a/src/main/arguments/arguments.py b/src/main/arguments/arguments.py index 4018280..b7bae9e 100644 --- a/src/main/arguments/arguments.py +++ b/src/main/arguments/arguments.py @@ -141,7 +141,10 @@ def __init__(self, max_seq_length: int | None = None, overwrite_output: bool = True, neftune_noise_alpha: float = 5.0, - huggingface_auth_token: str | None = None): + huggingface_auth_token: str | None = None, + eval_dataset: str | None = None, + eval_strategy: str | None = None, + eval_steps: int | None = None): super(TuneArguments, self).__init__(new_model, is_fp16, is_bf16, use_4bit, use_8bit, fp32_cpu_offload, is_chat_model, padding_side, use_agent_tokens, additional_vocabulary_tokens, huggingface_auth_token) self.r = r self.alpha = alpha @@ -177,6 +180,9 @@ def __init__(self, self.max_seq_length = max_seq_length self.overwrite_output = overwrite_output self.neftune_noise_alpha = neftune_noise_alpha + self.eval_dataset = eval_dataset + self.eval_strategy = eval_strategy + self.eval_steps = eval_steps def validate(self) -> None: # I know it's bad, I will clean it up eventually diff --git a/src/main/base/llm_base_module.py b/src/main/base/llm_base_module.py index daba02e..1607f5d 100644 --- a/src/main/base/llm_base_module.py +++ b/src/main/base/llm_base_module.py @@ -65,6 +65,7 @@ def fine_tune_base(arguments: TuneArguments, tokenizer, base_model) -> None: optim=arguments.optimizer_type, save_strategy=arguments.save_strategy, save_steps=arguments.save_steps, + eval_steps=arguments.eval_steps, logging_strategy=arguments.save_strategy, logging_steps=arguments.save_steps, save_total_limit=arguments.max_checkpoints, @@ -81,6 +82,8 @@ def fine_tune_base(arguments: TuneArguments, tokenizer, base_model) -> None: lr_scheduler_type=arguments.lr_scheduler_type, report_to="tensorboard", do_eval=arguments.do_eval, + eval_strategy=arguments.eval_strategy if arguments.do_eval else 'no', + eval_on_start=arguments.do_eval, # TODO - is this ignored bt SFTTrainer? max_seq_length=arguments.max_seq_length, neftune_noise_alpha=arguments.neftune_noise_alpha if arguments.is_instruct_model else None, @@ -93,6 +96,7 @@ def fine_tune_base(arguments: TuneArguments, tokenizer, base_model) -> None: model=model, train_dataset=ds['train'], args=train_params, + eval_dataset=ds['eval'] if arguments.do_eval else None ) model.config.use_cache = False diff --git a/src/main/main.py b/src/main/main.py index 609c3fb..c8dc682 100644 --- a/src/main/main.py +++ b/src/main/main.py @@ -3,13 +3,14 @@ from exception.exceptions import main_exception_handler from hf.hf_auth import authenticate_with_hf from utils.argument_utils import build_and_validate_push_args, build_and_validate_tune_args, build_and_validate_merge_args +from utils.config_utils import print_serve_mode_config, print_tune_mode_config, print_fine_tune_config from serve.llm_executor import build_llm_executor_factory from serve.serve import OpenAiLlmServer from arguments.arguments import ServerArguments, LlmExecutorFactoryArguments import os # TODO - Automate this -version = '2.1.1' +version = '2.1.2' title = f'Torch-Tuner CLI v{version}' description = 'This app is a simple CLI to automate the Supervised Fine-Tuning(SFT)(and testing of) of AI Large Language Model(LLM)s with simple text and jsonl on Nvidia GPUs(and Intel/AMD CPUs) using LoRA, Torch and Transformers.' @@ -33,17 +34,7 @@ def main() -> None: print("Is Debug Mode: True") print('') if args.serve: - print("Running in serve mode") - print() - print("WARNING - Serve mode is currently EXPERIMENTAL and should NEVER be used in a production environment!") - print() - print(f'Using bf16: {str(args.use_bf_16)}') - print(f'Using fp16: {str(args.use_fp_16)}') - print(f'Using 8bit: {str(args.use_8bit)}') - print(f'Using 4bit: {str(args.use_4bit)}') - print(f'Using fp32 CPU Offload: {str(args.fp32_cpu_offload)}') - print() - print(f"Serving {args.serve_model} on port {args.serve_port}") + print_serve_mode_config(args) authenticate_with_hf(args.huggingface_auth_token) model_path = os.path.expanduser(f"{args.output_directory}{os.sep}{args.serve_model}" if (not '/' in args.serve_model and not os.sep in args.serve_model) else args.serve_model) @@ -61,65 +52,16 @@ def main() -> None: lora_scale = round(args.lora_alpha / args.lora_r, 1) model_dir = os.path.expanduser(f'{args.output_directory}{os.sep}merged-models{os.sep}{args.new_model}') + authenticate_with_hf(args.huggingface_auth_token) + tune_arguments = build_and_validate_tune_args(args) merge_arguments = build_and_validate_merge_args(args) push_arguments = build_and_validate_push_args(args, model_dir) - print('') - print(f'Using LLM Type: {tuner.llm_type}') - - print('') - print(f'Output Directory: {args.output_directory}') - print(f'Base Model: {args.base_model}') - print(f'Model Save Directory: {model_dir}') - print(f'Training File: {args.training_data_file}') - - print('') - print(f'Using CPU Only Tuning: {str(args.cpu_only_tuning)}') - print(f'Using tf32: {str(args.use_tf_32)}') - print(f'Using bf16: {str(args.use_bf_16)}') - print(f'Using fp16: {str(args.use_fp_16)}') - print(f'Using 8bit: {str(args.use_8bit)}') - print(f'Using 4bit: {str(args.use_4bit)}') - print(f'Using fp32 CPU Offload: {str(args.fp32_cpu_offload)}') - - print('') - print(f'Is Fine-Tuning: {str(args.fine_tune)}') - print(f'Is Merging: {str(args.merge)}') - print(f'Is Pushing: {str(args.push)}') - - print('') - print(f'Is Chat Model: {args.is_chat_model}') - print(f'Is Instruct Model: {args.is_instruct_model}') - print(f'Using Additional Vocab Tokens: {args.additional_vocabulary_tokens}') - print(f'Is LangChain Agent Model: {args.use_agent_tokens}') + print_tune_mode_config(args, model_dir, tuner) if args.fine_tune: - print('') - if args.torch_empty_cache_steps is not None: - print(f'Empty Torch Cache After {args.torch_empty_cache_steps} Steps') - - print(f'Using Checkpointing: {str(not args.no_checkpoint)}') - print(f'Using Max Saves: {str(args.max_saved)}') - print(f'Using Batch Size: {str(args.batch_size)}') - print(f'Using Save Strategy: {args.save_strategy}') - print(f'Using Save Steps: {str(args.save_steps)}') - print(f'Using Save Embeddings: {str(args.save_embeddings)}') - - print('') - print(f'Epochs: {str(args.epochs)}') - print(f'Using LoRA R: {str(args.lora_r)}') - print(f'Using LoRA Alpha: {str(args.lora_alpha)}') - print(f'LoRA Adapter Scale(alpha/r): {str(lora_scale)}') - print(f'Using Optimizer: {args.optimizer_type}') - if 'adamw' in args.optimizer_type: - print(f'Using Base Learning Rate: {str(args.base_learning_rate)}') - print(f'Using Actual Learning Rate(Base Learning Rate * Batch Size): {str(args.base_learning_rate * args.batch_size)}') - print(f'Learning Rate Scheduler Type: {str(args.lr_scheduler_type)}') - print(f'Using LoRA Dropout: {str(args.lora_dropout)}') - print(f'Using Warmup Ratio: {args.warmup_ratio}') - print(f'Using Max Sequence Length: {args.max_seq_length}') - + print_fine_tune_config(args, lora_scale, tune_arguments) if args.fine_tune: print('') @@ -150,4 +92,7 @@ def main() -> None: print(f'{title} COMPLETED') + + + main_exception_handler(main, title, args.debug) diff --git a/src/main/utils/argument_utils.py b/src/main/utils/argument_utils.py index c3ed0bf..4ad5ef6 100644 --- a/src/main/utils/argument_utils.py +++ b/src/main/utils/argument_utils.py @@ -101,7 +101,10 @@ def build_and_validate_tune_args(prog_args) -> TuneArguments: max_seq_length=prog_args.max_seq_length, overwrite_output=prog_args.overwrite_output, neftune_noise_alpha=prog_args.neftune_noise_alpha, - huggingface_auth_token=prog_args.huggingface_auth_token + huggingface_auth_token=prog_args.huggingface_auth_token, + eval_dataset=prog_args.eval_dataset, + eval_strategy=prog_args.eval_strategy if prog_args.eval_strategy is not None else prog_args.save_strategy, + eval_steps=prog_args.eval_steps if prog_args.eval_steps is not None else prog_args.save_steps ) tune_arguments.validate() return tune_arguments @@ -181,7 +184,7 @@ def _build_program_argument_parser(title: str, description: str) -> ArgumentPars parser.add_argument('-tm', '--target-modules', help="Modules to target(CSV List: 'q,k')(OVERRIDES '--target-all-modules' when not None)(default: None)", type=lambda x: _parse_nullable_list_arg(x), default="None") parser.add_argument('-tecs', '--torch-empty-cache-steps', help="Empty torch cache after x steps(NEVER empties cache when set to None)(USEFUL to prevent OOM issues)(default: 1)", type=lambda x: _parse_nullable_int_arg(x), default="1") parser.add_argument('-cft', '--cpu-only-tuning', default="false", help="Run a fine-tune job on CPU ONLY(default: false)", type=lambda x: _parse_bool_arg(x)) - parser.add_argument('-hfat', '--huggingface-auth-token', default="false", help="Huggingface auth token(default: None)", type=lambda x: _parse_nullable_arg(x)) + parser.add_argument('-hfat', '--huggingface-auth-token', default="None", help="Huggingface auth token(default: None)", type=lambda x: _parse_nullable_arg(x)) parser.add_argument('-ft', '--fine-tune', default="true", help="Run a fine-tune job(default: true)", type=lambda x: _parse_bool_arg(x)) parser.add_argument('-m', '--merge', default="true", @@ -229,10 +232,12 @@ def _build_program_argument_parser(title: str, description: str) -> ArgumentPars parser.add_argument('-mgn', '--max-gradient-norm', help="Max gradient norm(default: 0.0)", type=float, default=0.0) parser.add_argument('-ss', '--save-strategy', help="Save strategy(default: epoch)", default="epoch") parser.add_argument('-msl', '--max-seq-length', help="The maximum sequence length to use for the `ConstantLengthDataset` and for automatically creating the Dataset(default: the smaller of the `tokenizer.model_max_length` and `1024`)", type=lambda x: _parse_nullable_int_arg(x), default="None") - parser.add_argument('-ssteps', '--save-steps', help="Save after each --save-steps steps(ignored when --save-strategy='epoch')(default: 50)", default=50, type=int) + parser.add_argument('-ssteps', '--save-steps', help="Save after each --save-steps steps(ignored when SAVE_STRATEGY='epoch')(default: 50)", default=50, type=int) parser.add_argument('-ms', '--max-saved', help="Maximum number of checkpoint saves to keep(this helps prevent filling up disk while tuning)(default: 5)", default=5, type=int) - parser.add_argument('-de', '--do-eval', help="Do evaluation on each save step(evaluates model loss after each 'save step')(default: true)", default="true", type=lambda x: _parse_bool_arg(x)) - + parser.add_argument('-de', '--do-eval', help="Do evaluation on each save step(default: false)", default="false", type=lambda x: _parse_bool_arg(x)) + parser.add_argument('-eds', '--eval-dataset', help="Path or HF id of evaluation dataset(defaults to training dataset when set to None)(default: None)", default="None", type=lambda x: _parse_nullable_arg(x)) parser.add_argument('-llm', '--llm-type', help="LLM Type(default: generic[options: generic, llama])", default="generic") + parser.add_argument('-evalstrat', '--eval-strategy', help="Eval strategy('None', 'epoch' or 'steps')(Defaults to SAVE_STRATEGY when set to None)(default: None)", default="None", type=lambda x: _parse_nullable_arg(x)) + parser.add_argument('-evalsteps', '--eval-steps', help="Steps between evaluations(Ignored when EVAL_STRATEGY is set to 'epoch')(Defaults to SAVE_STEPS when set to None)(default: None)", default="None", type=lambda x: _parse_nullable_int_arg(x)) return parser diff --git a/src/main/utils/config_utils.py b/src/main/utils/config_utils.py new file mode 100644 index 0000000..c44e2fa --- /dev/null +++ b/src/main/utils/config_utils.py @@ -0,0 +1,96 @@ +def print_serve_mode_config(args): + print("Running in serve mode") + print() + print("WARNING - Serve mode is currently EXPERIMENTAL and should NEVER be used in a production environment!") + print() + print(f'Using bf16: {str(args.use_bf_16)}') + print(f'Using fp16: {str(args.use_fp_16)}') + print(f'Using 8bit: {str(args.use_8bit)}') + print(f'Using 4bit: {str(args.use_4bit)}') + print(f'Using fp32 CPU Offload: {str(args.fp32_cpu_offload)}') + print() + print(f"Serving {args.serve_model} on port {args.serve_port}") + + +def print_tune_mode_config(args, model_dir, tuner): + print('') + print(f'Using LLM Type: {tuner.llm_type}') + print('') + print(f'Output Directory: {args.output_directory}') + print(f'Base Model: {args.base_model}') + print(f'Model Save Directory: {model_dir}') + + print('') + print(f'Using CPU Only Tuning: {str(args.cpu_only_tuning)}') + print(f'Using tf32: {str(args.use_tf_32)}') + print(f'Using bf16: {str(args.use_bf_16)}') + print(f'Using fp16: {str(args.use_fp_16)}') + print(f'Using 8bit: {str(args.use_8bit)}') + print(f'Using 4bit: {str(args.use_4bit)}') + print(f'Using fp32 CPU Offload: {str(args.fp32_cpu_offload)}') + print('') + print(f'Is Fine-Tuning: {str(args.fine_tune)}') + print(f'Is Merging: {str(args.merge)}') + print(f'Is Pushing: {str(args.push)}') + print('') + print(f'Is Chat Model: {args.is_chat_model}') + print(f'Is Instruct Model: {args.is_instruct_model}') + print(f'Using Additional Vocab Tokens: {args.additional_vocabulary_tokens}') + print(f'Is LangChain Agent Model: {args.use_agent_tokens}') + + +def print_fine_tune_config(args, lora_scale, tune_arguments): + print('') + print(f'Epochs: {str(args.epochs)}') + print(f'Using Tuning Dataset: {args.hf_training_dataset_id if args.hf_training_dataset_id is not None else args.training_data_file}') + + if args.torch_empty_cache_steps is not None: + print(f'Empty Torch Cache After {args.torch_empty_cache_steps} Steps') + print(f'Using Checkpointing: {str(not args.no_checkpoint)}') + if not args.no_checkpoint: + print(f'Using Max Saved Checkpoints: {args.max_saved}') + print(f'Using Batch Size: {str(args.batch_size)}') + print(f'Using Save Strategy: {args.save_strategy}') + print(f'Using Save Steps: {str(args.save_steps)}') + print(f'Using Save Embeddings: {str(args.save_embeddings)}') + if args.target_modules is not None: + print(f'Targeting Modules: {args.target_modules}') + elif args.target_all_modules: + print(f'Targeting Modules: ALL') + else: + print(f'Targeting Modules: LINEAR') + print('') + print(f'Using LoRA R: {str(args.lora_r)}') + print(f'Using LoRA Alpha: {str(args.lora_alpha)}') + print(f'LoRA Adapter Scale(alpha/r): {str(lora_scale)}') + print(f'Using LoRA Dropout: {str(args.lora_dropout)}') + print(f'Using LoRA Bias: {str(args.bias)}') + print() + print(f'Using Optimizer: {args.optimizer_type}') + if 'adamw' in args.optimizer_type: + print(f'Using Base Learning Rate: {str(args.base_learning_rate)}') + print( + f'Using Actual Learning Rate(Base Learning Rate * Batch Size): {str(args.base_learning_rate * args.batch_size)}') + print(f'Learning Rate Scheduler Type: {str(args.lr_scheduler_type)}') + + print(f'Using Warmup Ratio: {args.warmup_ratio}') + print(f'Using Max Sequence Length: {args.max_seq_length}') + print(f'Using Group By Length: {args.group_by_length}') + print(f'Using Weight Decay: {args.weight_decay}') + print(f'Using Max Gradient Norm: {args.max_gradient_norm}') + print(f'Using Gradient Accumulation Steps: {args.gradient_accumulation_steps}') + print() + print(f'Using Do Eval: {args.do_eval}') + if args.do_eval is not None and args.do_eval: + print(f'Using Eval Strategy: {tune_arguments.eval_strategy}') + if tune_arguments.eval_strategy == 'steps': + print(f'Using Eval Steps: {tune_arguments.eval_steps}') + if args.eval_dataset is None: + print(f'Using Eval Dataset: {args.hf_training_dataset_id if args.hf_training_dataset_id is not None else args.training_data_file}') + else: + print(f'Using Eval Dataset: {args.eval_dataset }') + print() + if args.is_instruct_model: + print(f'Using NEFTune Noise Alpha: {args.neftune_noise_alpha}') + + diff --git a/src/main/utils/dataset_utils.py b/src/main/utils/dataset_utils.py index f840148..a58b263 100644 --- a/src/main/utils/dataset_utils.py +++ b/src/main/utils/dataset_utils.py @@ -1,3 +1,4 @@ +import os.path from typing import Union from datasets import load_dataset as load_data_set, DatasetDict, Dataset, IterableDatasetDict, IterableDataset @@ -7,9 +8,37 @@ def load_dataset(arguments: TuneArguments) -> Union[DatasetDict, Dataset, IterableDatasetDict, IterableDataset]: """Load dataset for SFT trainer.""" if arguments.hf_training_dataset_id is not None: - return load_data_set(arguments.hf_training_dataset_id, split='train') + train_set = load_data_set(arguments.hf_training_dataset_id, split='train') + if arguments.do_eval: + train_set = _load_eval_ds(arguments, train_set) + return train_set + elif arguments.train_file.endswith(".jsonl"): - seperator = "/" if not arguments.training_data_dir.endswith("/") else "" - return load_data_set("json", data_files={"train": f"{arguments.training_data_dir}{seperator}{arguments.train_file}"}) + seperator = os.sep if not arguments.training_data_dir.endswith(os.sep) else "" + train_set = load_data_set("json", data_files={"train": f"{arguments.training_data_dir}{seperator}{arguments.train_file}"}) + if arguments.do_eval: + train_set = _load_eval_ds(arguments, train_set) + return train_set else: - return load_data_set(arguments.training_data_dir, data_files={"train": arguments.train_file}) \ No newline at end of file + train_set = load_data_set(arguments.training_data_dir, data_files={"train": arguments.train_file}) + if arguments.do_eval: + train_set = _load_eval_ds(arguments, train_set) + return train_set + + +def _load_eval_ds(arguments: TuneArguments, train_set: Union[DatasetDict, Dataset, IterableDatasetDict, IterableDataset]) -> Union[DatasetDict, Dataset, IterableDatasetDict, IterableDataset]: + if arguments.eval_dataset is None: + train_set['eval'] = train_set['train'] + return train_set + if os.path.exists(arguments.eval_dataset) and arguments.eval_dataset.endswith('jsonl'): + eval_set = load_data_set("json", data_files={"train": arguments.eval_dataset}) + train_set['eval'] = eval_set['train'] + return train_set + if os.path.exists(arguments.eval_dataset): + eval_set = load_data_set(arguments.eval_dataset.replace(arguments.eval_dataset.split(os.sep)[len(arguments.eval_dataset.split(os.sep)) - 1], ''), data_files={"train": arguments.eval_dataset.split(os.sep)[len(arguments.eval_dataset.split(os.sep)) - 1]}) + train_set['eval'] = eval_set['train'] + return train_set + + eval_set = load_data_set(arguments.hf_training_dataset_id, split='train') + train_set['eval'] = eval_set['train'] + return train_set \ No newline at end of file