From 9bfbe1a5f905919854726a042772c98af6e3ac72 Mon Sep 17 00:00:00 2001 From: Robert Kirchner Date: Sun, 8 Sep 2024 18:58:37 -0500 Subject: [PATCH 1/5] fix do-eval argument not configured properly - rk --- src/main/arguments/arguments.py | 4 +++- src/main/base/llm_base_module.py | 3 +++ src/main/utils/argument_utils.py | 7 +++--- src/main/utils/dataset_utils.py | 37 ++++++++++++++++++++++++++++---- 4 files changed, 43 insertions(+), 8 deletions(-) diff --git a/src/main/arguments/arguments.py b/src/main/arguments/arguments.py index 4018280..6aaeafa 100644 --- a/src/main/arguments/arguments.py +++ b/src/main/arguments/arguments.py @@ -141,7 +141,8 @@ def __init__(self, max_seq_length: int | None = None, overwrite_output: bool = True, neftune_noise_alpha: float = 5.0, - huggingface_auth_token: str | None = None): + huggingface_auth_token: str | None = None, + eval_dataset: str | None = None): super(TuneArguments, self).__init__(new_model, is_fp16, is_bf16, use_4bit, use_8bit, fp32_cpu_offload, is_chat_model, padding_side, use_agent_tokens, additional_vocabulary_tokens, huggingface_auth_token) self.r = r self.alpha = alpha @@ -177,6 +178,7 @@ def __init__(self, self.max_seq_length = max_seq_length self.overwrite_output = overwrite_output self.neftune_noise_alpha = neftune_noise_alpha + self.eval_dataset = eval_dataset def validate(self) -> None: # I know it's bad, I will clean it up eventually diff --git a/src/main/base/llm_base_module.py b/src/main/base/llm_base_module.py index daba02e..609c52c 100644 --- a/src/main/base/llm_base_module.py +++ b/src/main/base/llm_base_module.py @@ -65,6 +65,7 @@ def fine_tune_base(arguments: TuneArguments, tokenizer, base_model) -> None: optim=arguments.optimizer_type, save_strategy=arguments.save_strategy, save_steps=arguments.save_steps, + eval_steps=arguments.save_steps, logging_strategy=arguments.save_strategy, logging_steps=arguments.save_steps, save_total_limit=arguments.max_checkpoints, @@ -81,6 +82,7 @@ def fine_tune_base(arguments: TuneArguments, tokenizer, base_model) -> None: lr_scheduler_type=arguments.lr_scheduler_type, report_to="tensorboard", do_eval=arguments.do_eval, + eval_strategy=arguments.save_strategy if arguments.do_eval else 'no', # TODO - is this ignored bt SFTTrainer? max_seq_length=arguments.max_seq_length, neftune_noise_alpha=arguments.neftune_noise_alpha if arguments.is_instruct_model else None, @@ -93,6 +95,7 @@ def fine_tune_base(arguments: TuneArguments, tokenizer, base_model) -> None: model=model, train_dataset=ds['train'], args=train_params, + eval_dataset=ds['eval'] if arguments.do_eval else None ) model.config.use_cache = False diff --git a/src/main/utils/argument_utils.py b/src/main/utils/argument_utils.py index c3ed0bf..55f1248 100644 --- a/src/main/utils/argument_utils.py +++ b/src/main/utils/argument_utils.py @@ -101,7 +101,8 @@ def build_and_validate_tune_args(prog_args) -> TuneArguments: max_seq_length=prog_args.max_seq_length, overwrite_output=prog_args.overwrite_output, neftune_noise_alpha=prog_args.neftune_noise_alpha, - huggingface_auth_token=prog_args.huggingface_auth_token + huggingface_auth_token=prog_args.huggingface_auth_token, + eval_dataset=prog_args.eval_dataset ) tune_arguments.validate() return tune_arguments @@ -231,8 +232,8 @@ def _build_program_argument_parser(title: str, description: str) -> ArgumentPars parser.add_argument('-msl', '--max-seq-length', help="The maximum sequence length to use for the `ConstantLengthDataset` and for automatically creating the Dataset(default: the smaller of the `tokenizer.model_max_length` and `1024`)", type=lambda x: _parse_nullable_int_arg(x), default="None") parser.add_argument('-ssteps', '--save-steps', help="Save after each --save-steps steps(ignored when --save-strategy='epoch')(default: 50)", default=50, type=int) parser.add_argument('-ms', '--max-saved', help="Maximum number of checkpoint saves to keep(this helps prevent filling up disk while tuning)(default: 5)", default=5, type=int) - parser.add_argument('-de', '--do-eval', help="Do evaluation on each save step(evaluates model loss after each 'save step')(default: true)", default="true", type=lambda x: _parse_bool_arg(x)) - + parser.add_argument('-de', '--do-eval', help="Do evaluation on each save step(evaluates model loss after each 'save step')(default: false)", default="false", type=lambda x: _parse_bool_arg(x)) + parser.add_argument('-eds', '--eval-dataset', help="Path or HF id of evaluation dataset(defaults to training dataset when set to None)(default: None)", default="none", type=lambda x: _parse_nullable_arg(x)) parser.add_argument('-llm', '--llm-type', help="LLM Type(default: generic[options: generic, llama])", default="generic") return parser diff --git a/src/main/utils/dataset_utils.py b/src/main/utils/dataset_utils.py index f840148..a58b263 100644 --- a/src/main/utils/dataset_utils.py +++ b/src/main/utils/dataset_utils.py @@ -1,3 +1,4 @@ +import os.path from typing import Union from datasets import load_dataset as load_data_set, DatasetDict, Dataset, IterableDatasetDict, IterableDataset @@ -7,9 +8,37 @@ def load_dataset(arguments: TuneArguments) -> Union[DatasetDict, Dataset, IterableDatasetDict, IterableDataset]: """Load dataset for SFT trainer.""" if arguments.hf_training_dataset_id is not None: - return load_data_set(arguments.hf_training_dataset_id, split='train') + train_set = load_data_set(arguments.hf_training_dataset_id, split='train') + if arguments.do_eval: + train_set = _load_eval_ds(arguments, train_set) + return train_set + elif arguments.train_file.endswith(".jsonl"): - seperator = "/" if not arguments.training_data_dir.endswith("/") else "" - return load_data_set("json", data_files={"train": f"{arguments.training_data_dir}{seperator}{arguments.train_file}"}) + seperator = os.sep if not arguments.training_data_dir.endswith(os.sep) else "" + train_set = load_data_set("json", data_files={"train": f"{arguments.training_data_dir}{seperator}{arguments.train_file}"}) + if arguments.do_eval: + train_set = _load_eval_ds(arguments, train_set) + return train_set else: - return load_data_set(arguments.training_data_dir, data_files={"train": arguments.train_file}) \ No newline at end of file + train_set = load_data_set(arguments.training_data_dir, data_files={"train": arguments.train_file}) + if arguments.do_eval: + train_set = _load_eval_ds(arguments, train_set) + return train_set + + +def _load_eval_ds(arguments: TuneArguments, train_set: Union[DatasetDict, Dataset, IterableDatasetDict, IterableDataset]) -> Union[DatasetDict, Dataset, IterableDatasetDict, IterableDataset]: + if arguments.eval_dataset is None: + train_set['eval'] = train_set['train'] + return train_set + if os.path.exists(arguments.eval_dataset) and arguments.eval_dataset.endswith('jsonl'): + eval_set = load_data_set("json", data_files={"train": arguments.eval_dataset}) + train_set['eval'] = eval_set['train'] + return train_set + if os.path.exists(arguments.eval_dataset): + eval_set = load_data_set(arguments.eval_dataset.replace(arguments.eval_dataset.split(os.sep)[len(arguments.eval_dataset.split(os.sep)) - 1], ''), data_files={"train": arguments.eval_dataset.split(os.sep)[len(arguments.eval_dataset.split(os.sep)) - 1]}) + train_set['eval'] = eval_set['train'] + return train_set + + eval_set = load_data_set(arguments.hf_training_dataset_id, split='train') + train_set['eval'] = eval_set['train'] + return train_set \ No newline at end of file From 50afe0f277f38ae4be438a7f2860cc5752bbd597 Mon Sep 17 00:00:00 2001 From: Robert Kirchner Date: Mon, 9 Sep 2024 11:17:26 -0500 Subject: [PATCH 2/5] fix huggingface auth token - rk --- src/main/main.py | 2 ++ src/main/utils/argument_utils.py | 2 +- 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/src/main/main.py b/src/main/main.py index 609c3fb..ec6e601 100644 --- a/src/main/main.py +++ b/src/main/main.py @@ -61,6 +61,8 @@ def main() -> None: lora_scale = round(args.lora_alpha / args.lora_r, 1) model_dir = os.path.expanduser(f'{args.output_directory}{os.sep}merged-models{os.sep}{args.new_model}') + authenticate_with_hf(args.huggingface_auth_token) + tune_arguments = build_and_validate_tune_args(args) merge_arguments = build_and_validate_merge_args(args) push_arguments = build_and_validate_push_args(args, model_dir) diff --git a/src/main/utils/argument_utils.py b/src/main/utils/argument_utils.py index 55f1248..aa29792 100644 --- a/src/main/utils/argument_utils.py +++ b/src/main/utils/argument_utils.py @@ -182,7 +182,7 @@ def _build_program_argument_parser(title: str, description: str) -> ArgumentPars parser.add_argument('-tm', '--target-modules', help="Modules to target(CSV List: 'q,k')(OVERRIDES '--target-all-modules' when not None)(default: None)", type=lambda x: _parse_nullable_list_arg(x), default="None") parser.add_argument('-tecs', '--torch-empty-cache-steps', help="Empty torch cache after x steps(NEVER empties cache when set to None)(USEFUL to prevent OOM issues)(default: 1)", type=lambda x: _parse_nullable_int_arg(x), default="1") parser.add_argument('-cft', '--cpu-only-tuning', default="false", help="Run a fine-tune job on CPU ONLY(default: false)", type=lambda x: _parse_bool_arg(x)) - parser.add_argument('-hfat', '--huggingface-auth-token', default="false", help="Huggingface auth token(default: None)", type=lambda x: _parse_nullable_arg(x)) + parser.add_argument('-hfat', '--huggingface-auth-token', default="None", help="Huggingface auth token(default: None)", type=lambda x: _parse_nullable_arg(x)) parser.add_argument('-ft', '--fine-tune', default="true", help="Run a fine-tune job(default: true)", type=lambda x: _parse_bool_arg(x)) parser.add_argument('-m', '--merge', default="true", From d8373c1dbf500f9c521cfce3cd8ed4c8a7392403 Mon Sep 17 00:00:00 2001 From: Robert Kirchner Date: Tue, 10 Sep 2024 10:19:22 -0500 Subject: [PATCH 3/5] refactor auth config - rk --- src/main/arguments/arguments.py | 6 ++- src/main/base/llm_base_module.py | 5 ++- src/main/main.py | 71 ++++--------------------------- src/main/utils/argument_utils.py | 12 ++++-- src/main/utils/config_utils.py | 72 ++++++++++++++++++++++++++++++++ 5 files changed, 95 insertions(+), 71 deletions(-) create mode 100644 src/main/utils/config_utils.py diff --git a/src/main/arguments/arguments.py b/src/main/arguments/arguments.py index 6aaeafa..b7bae9e 100644 --- a/src/main/arguments/arguments.py +++ b/src/main/arguments/arguments.py @@ -142,7 +142,9 @@ def __init__(self, overwrite_output: bool = True, neftune_noise_alpha: float = 5.0, huggingface_auth_token: str | None = None, - eval_dataset: str | None = None): + eval_dataset: str | None = None, + eval_strategy: str | None = None, + eval_steps: int | None = None): super(TuneArguments, self).__init__(new_model, is_fp16, is_bf16, use_4bit, use_8bit, fp32_cpu_offload, is_chat_model, padding_side, use_agent_tokens, additional_vocabulary_tokens, huggingface_auth_token) self.r = r self.alpha = alpha @@ -179,6 +181,8 @@ def __init__(self, self.overwrite_output = overwrite_output self.neftune_noise_alpha = neftune_noise_alpha self.eval_dataset = eval_dataset + self.eval_strategy = eval_strategy + self.eval_steps = eval_steps def validate(self) -> None: # I know it's bad, I will clean it up eventually diff --git a/src/main/base/llm_base_module.py b/src/main/base/llm_base_module.py index 609c52c..1607f5d 100644 --- a/src/main/base/llm_base_module.py +++ b/src/main/base/llm_base_module.py @@ -65,7 +65,7 @@ def fine_tune_base(arguments: TuneArguments, tokenizer, base_model) -> None: optim=arguments.optimizer_type, save_strategy=arguments.save_strategy, save_steps=arguments.save_steps, - eval_steps=arguments.save_steps, + eval_steps=arguments.eval_steps, logging_strategy=arguments.save_strategy, logging_steps=arguments.save_steps, save_total_limit=arguments.max_checkpoints, @@ -82,7 +82,8 @@ def fine_tune_base(arguments: TuneArguments, tokenizer, base_model) -> None: lr_scheduler_type=arguments.lr_scheduler_type, report_to="tensorboard", do_eval=arguments.do_eval, - eval_strategy=arguments.save_strategy if arguments.do_eval else 'no', + eval_strategy=arguments.eval_strategy if arguments.do_eval else 'no', + eval_on_start=arguments.do_eval, # TODO - is this ignored bt SFTTrainer? max_seq_length=arguments.max_seq_length, neftune_noise_alpha=arguments.neftune_noise_alpha if arguments.is_instruct_model else None, diff --git a/src/main/main.py b/src/main/main.py index ec6e601..042e334 100644 --- a/src/main/main.py +++ b/src/main/main.py @@ -3,6 +3,7 @@ from exception.exceptions import main_exception_handler from hf.hf_auth import authenticate_with_hf from utils.argument_utils import build_and_validate_push_args, build_and_validate_tune_args, build_and_validate_merge_args +from utils.config_utils import print_serve_mode_config, print_tune_mode_config, print_fine_tune_config from serve.llm_executor import build_llm_executor_factory from serve.serve import OpenAiLlmServer from arguments.arguments import ServerArguments, LlmExecutorFactoryArguments @@ -33,17 +34,7 @@ def main() -> None: print("Is Debug Mode: True") print('') if args.serve: - print("Running in serve mode") - print() - print("WARNING - Serve mode is currently EXPERIMENTAL and should NEVER be used in a production environment!") - print() - print(f'Using bf16: {str(args.use_bf_16)}') - print(f'Using fp16: {str(args.use_fp_16)}') - print(f'Using 8bit: {str(args.use_8bit)}') - print(f'Using 4bit: {str(args.use_4bit)}') - print(f'Using fp32 CPU Offload: {str(args.fp32_cpu_offload)}') - print() - print(f"Serving {args.serve_model} on port {args.serve_port}") + print_serve_mode_config(args) authenticate_with_hf(args.huggingface_auth_token) model_path = os.path.expanduser(f"{args.output_directory}{os.sep}{args.serve_model}" if (not '/' in args.serve_model and not os.sep in args.serve_model) else args.serve_model) @@ -67,61 +58,10 @@ def main() -> None: merge_arguments = build_and_validate_merge_args(args) push_arguments = build_and_validate_push_args(args, model_dir) - print('') - print(f'Using LLM Type: {tuner.llm_type}') - - print('') - print(f'Output Directory: {args.output_directory}') - print(f'Base Model: {args.base_model}') - print(f'Model Save Directory: {model_dir}') - print(f'Training File: {args.training_data_file}') - - print('') - print(f'Using CPU Only Tuning: {str(args.cpu_only_tuning)}') - print(f'Using tf32: {str(args.use_tf_32)}') - print(f'Using bf16: {str(args.use_bf_16)}') - print(f'Using fp16: {str(args.use_fp_16)}') - print(f'Using 8bit: {str(args.use_8bit)}') - print(f'Using 4bit: {str(args.use_4bit)}') - print(f'Using fp32 CPU Offload: {str(args.fp32_cpu_offload)}') - - print('') - print(f'Is Fine-Tuning: {str(args.fine_tune)}') - print(f'Is Merging: {str(args.merge)}') - print(f'Is Pushing: {str(args.push)}') - - print('') - print(f'Is Chat Model: {args.is_chat_model}') - print(f'Is Instruct Model: {args.is_instruct_model}') - print(f'Using Additional Vocab Tokens: {args.additional_vocabulary_tokens}') - print(f'Is LangChain Agent Model: {args.use_agent_tokens}') + print_tune_mode_config(args, model_dir, tuner) if args.fine_tune: - print('') - if args.torch_empty_cache_steps is not None: - print(f'Empty Torch Cache After {args.torch_empty_cache_steps} Steps') - - print(f'Using Checkpointing: {str(not args.no_checkpoint)}') - print(f'Using Max Saves: {str(args.max_saved)}') - print(f'Using Batch Size: {str(args.batch_size)}') - print(f'Using Save Strategy: {args.save_strategy}') - print(f'Using Save Steps: {str(args.save_steps)}') - print(f'Using Save Embeddings: {str(args.save_embeddings)}') - - print('') - print(f'Epochs: {str(args.epochs)}') - print(f'Using LoRA R: {str(args.lora_r)}') - print(f'Using LoRA Alpha: {str(args.lora_alpha)}') - print(f'LoRA Adapter Scale(alpha/r): {str(lora_scale)}') - print(f'Using Optimizer: {args.optimizer_type}') - if 'adamw' in args.optimizer_type: - print(f'Using Base Learning Rate: {str(args.base_learning_rate)}') - print(f'Using Actual Learning Rate(Base Learning Rate * Batch Size): {str(args.base_learning_rate * args.batch_size)}') - print(f'Learning Rate Scheduler Type: {str(args.lr_scheduler_type)}') - print(f'Using LoRA Dropout: {str(args.lora_dropout)}') - print(f'Using Warmup Ratio: {args.warmup_ratio}') - print(f'Using Max Sequence Length: {args.max_seq_length}') - + print_fine_tune_config(args, lora_scale, tune_arguments) if args.fine_tune: print('') @@ -152,4 +92,7 @@ def main() -> None: print(f'{title} COMPLETED') + + + main_exception_handler(main, title, args.debug) diff --git a/src/main/utils/argument_utils.py b/src/main/utils/argument_utils.py index aa29792..4ad5ef6 100644 --- a/src/main/utils/argument_utils.py +++ b/src/main/utils/argument_utils.py @@ -102,7 +102,9 @@ def build_and_validate_tune_args(prog_args) -> TuneArguments: overwrite_output=prog_args.overwrite_output, neftune_noise_alpha=prog_args.neftune_noise_alpha, huggingface_auth_token=prog_args.huggingface_auth_token, - eval_dataset=prog_args.eval_dataset + eval_dataset=prog_args.eval_dataset, + eval_strategy=prog_args.eval_strategy if prog_args.eval_strategy is not None else prog_args.save_strategy, + eval_steps=prog_args.eval_steps if prog_args.eval_steps is not None else prog_args.save_steps ) tune_arguments.validate() return tune_arguments @@ -230,10 +232,12 @@ def _build_program_argument_parser(title: str, description: str) -> ArgumentPars parser.add_argument('-mgn', '--max-gradient-norm', help="Max gradient norm(default: 0.0)", type=float, default=0.0) parser.add_argument('-ss', '--save-strategy', help="Save strategy(default: epoch)", default="epoch") parser.add_argument('-msl', '--max-seq-length', help="The maximum sequence length to use for the `ConstantLengthDataset` and for automatically creating the Dataset(default: the smaller of the `tokenizer.model_max_length` and `1024`)", type=lambda x: _parse_nullable_int_arg(x), default="None") - parser.add_argument('-ssteps', '--save-steps', help="Save after each --save-steps steps(ignored when --save-strategy='epoch')(default: 50)", default=50, type=int) + parser.add_argument('-ssteps', '--save-steps', help="Save after each --save-steps steps(ignored when SAVE_STRATEGY='epoch')(default: 50)", default=50, type=int) parser.add_argument('-ms', '--max-saved', help="Maximum number of checkpoint saves to keep(this helps prevent filling up disk while tuning)(default: 5)", default=5, type=int) - parser.add_argument('-de', '--do-eval', help="Do evaluation on each save step(evaluates model loss after each 'save step')(default: false)", default="false", type=lambda x: _parse_bool_arg(x)) - parser.add_argument('-eds', '--eval-dataset', help="Path or HF id of evaluation dataset(defaults to training dataset when set to None)(default: None)", default="none", type=lambda x: _parse_nullable_arg(x)) + parser.add_argument('-de', '--do-eval', help="Do evaluation on each save step(default: false)", default="false", type=lambda x: _parse_bool_arg(x)) + parser.add_argument('-eds', '--eval-dataset', help="Path or HF id of evaluation dataset(defaults to training dataset when set to None)(default: None)", default="None", type=lambda x: _parse_nullable_arg(x)) parser.add_argument('-llm', '--llm-type', help="LLM Type(default: generic[options: generic, llama])", default="generic") + parser.add_argument('-evalstrat', '--eval-strategy', help="Eval strategy('None', 'epoch' or 'steps')(Defaults to SAVE_STRATEGY when set to None)(default: None)", default="None", type=lambda x: _parse_nullable_arg(x)) + parser.add_argument('-evalsteps', '--eval-steps', help="Steps between evaluations(Ignored when EVAL_STRATEGY is set to 'epoch')(Defaults to SAVE_STEPS when set to None)(default: None)", default="None", type=lambda x: _parse_nullable_int_arg(x)) return parser diff --git a/src/main/utils/config_utils.py b/src/main/utils/config_utils.py new file mode 100644 index 0000000..9cdb45a --- /dev/null +++ b/src/main/utils/config_utils.py @@ -0,0 +1,72 @@ +def print_serve_mode_config(args): + print("Running in serve mode") + print() + print("WARNING - Serve mode is currently EXPERIMENTAL and should NEVER be used in a production environment!") + print() + print(f'Using bf16: {str(args.use_bf_16)}') + print(f'Using fp16: {str(args.use_fp_16)}') + print(f'Using 8bit: {str(args.use_8bit)}') + print(f'Using 4bit: {str(args.use_4bit)}') + print(f'Using fp32 CPU Offload: {str(args.fp32_cpu_offload)}') + print() + print(f"Serving {args.serve_model} on port {args.serve_port}") + + +def print_tune_mode_config(args, model_dir, tuner): + print('') + print(f'Using LLM Type: {tuner.llm_type}') + print('') + print(f'Output Directory: {args.output_directory}') + print(f'Base Model: {args.base_model}') + print(f'Model Save Directory: {model_dir}') + print(f'Training File: {args.training_data_file}') + print('') + print(f'Using CPU Only Tuning: {str(args.cpu_only_tuning)}') + print(f'Using tf32: {str(args.use_tf_32)}') + print(f'Using bf16: {str(args.use_bf_16)}') + print(f'Using fp16: {str(args.use_fp_16)}') + print(f'Using 8bit: {str(args.use_8bit)}') + print(f'Using 4bit: {str(args.use_4bit)}') + print(f'Using fp32 CPU Offload: {str(args.fp32_cpu_offload)}') + print('') + print(f'Is Fine-Tuning: {str(args.fine_tune)}') + print(f'Is Merging: {str(args.merge)}') + print(f'Is Pushing: {str(args.push)}') + print('') + print(f'Is Chat Model: {args.is_chat_model}') + print(f'Is Instruct Model: {args.is_instruct_model}') + print(f'Using Additional Vocab Tokens: {args.additional_vocabulary_tokens}') + print(f'Is LangChain Agent Model: {args.use_agent_tokens}') + + +def print_fine_tune_config(args, lora_scale, tune_arguments): + print('') + if args.torch_empty_cache_steps is not None: + print(f'Empty Torch Cache After {args.torch_empty_cache_steps} Steps') + print(f'Using Checkpointing: {str(not args.no_checkpoint)}') + if not args.no_checkpoint: + print(f'Using Max Saved Checkpoints: {args.max_saved}') + print(f'Using Batch Size: {str(args.batch_size)}') + print(f'Using Save Strategy: {args.save_strategy}') + print(f'Using Save Steps: {str(args.save_steps)}') + print(f'Using Save Embeddings: {str(args.save_embeddings)}') + print('') + print(f'Epochs: {str(args.epochs)}') + print(f'Using LoRA R: {str(args.lora_r)}') + print(f'Using LoRA Alpha: {str(args.lora_alpha)}') + print(f'LoRA Adapter Scale(alpha/r): {str(lora_scale)}') + print(f'Using Optimizer: {args.optimizer_type}') + if 'adamw' in args.optimizer_type: + print(f'Using Base Learning Rate: {str(args.base_learning_rate)}') + print( + f'Using Actual Learning Rate(Base Learning Rate * Batch Size): {str(args.base_learning_rate * args.batch_size)}') + print(f'Learning Rate Scheduler Type: {str(args.lr_scheduler_type)}') + print(f'Using LoRA Dropout: {str(args.lora_dropout)}') + print(f'Using Warmup Ratio: {args.warmup_ratio}') + print(f'Using Max Sequence Length: {args.max_seq_length}') + print(f'Using Do Eval: {args.do_eval}') + if args.do_eval is not None and args.do_eval: + print(f'Using Eval Strategy: {tune_arguments.eval_strategy}') + if tune_arguments.eval_strategy == 'steps': + print(f'Using Eval Steps: {tune_arguments.eval_steps}') + print(f'Using Eval Dataset: {args.eval_dataset}') \ No newline at end of file From d5a3d06960d558ea691944a984c9bf30f6ed79a8 Mon Sep 17 00:00:00 2001 From: Robert Kirchner Date: Tue, 10 Sep 2024 10:22:45 -0500 Subject: [PATCH 4/5] bump patch - rk --- src/main/main.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/main/main.py b/src/main/main.py index 042e334..c8dc682 100644 --- a/src/main/main.py +++ b/src/main/main.py @@ -10,7 +10,7 @@ import os # TODO - Automate this -version = '2.1.1' +version = '2.1.2' title = f'Torch-Tuner CLI v{version}' description = 'This app is a simple CLI to automate the Supervised Fine-Tuning(SFT)(and testing of) of AI Large Language Model(LLM)s with simple text and jsonl on Nvidia GPUs(and Intel/AMD CPUs) using LoRA, Torch and Transformers.' From 1822a964a7326507746d20407453d07aa35ef45a Mon Sep 17 00:00:00 2001 From: Robert Kirchner Date: Tue, 10 Sep 2024 10:51:01 -0500 Subject: [PATCH 5/5] refactor print config - rk --- src/main/utils/config_utils.py | 32 ++++++++++++++++++++++++++++---- 1 file changed, 28 insertions(+), 4 deletions(-) diff --git a/src/main/utils/config_utils.py b/src/main/utils/config_utils.py index 9cdb45a..c44e2fa 100644 --- a/src/main/utils/config_utils.py +++ b/src/main/utils/config_utils.py @@ -19,7 +19,7 @@ def print_tune_mode_config(args, model_dir, tuner): print(f'Output Directory: {args.output_directory}') print(f'Base Model: {args.base_model}') print(f'Model Save Directory: {model_dir}') - print(f'Training File: {args.training_data_file}') + print('') print(f'Using CPU Only Tuning: {str(args.cpu_only_tuning)}') print(f'Using tf32: {str(args.use_tf_32)}') @@ -41,6 +41,9 @@ def print_tune_mode_config(args, model_dir, tuner): def print_fine_tune_config(args, lora_scale, tune_arguments): print('') + print(f'Epochs: {str(args.epochs)}') + print(f'Using Tuning Dataset: {args.hf_training_dataset_id if args.hf_training_dataset_id is not None else args.training_data_file}') + if args.torch_empty_cache_steps is not None: print(f'Empty Torch Cache After {args.torch_empty_cache_steps} Steps') print(f'Using Checkpointing: {str(not args.no_checkpoint)}') @@ -50,23 +53,44 @@ def print_fine_tune_config(args, lora_scale, tune_arguments): print(f'Using Save Strategy: {args.save_strategy}') print(f'Using Save Steps: {str(args.save_steps)}') print(f'Using Save Embeddings: {str(args.save_embeddings)}') + if args.target_modules is not None: + print(f'Targeting Modules: {args.target_modules}') + elif args.target_all_modules: + print(f'Targeting Modules: ALL') + else: + print(f'Targeting Modules: LINEAR') print('') - print(f'Epochs: {str(args.epochs)}') print(f'Using LoRA R: {str(args.lora_r)}') print(f'Using LoRA Alpha: {str(args.lora_alpha)}') print(f'LoRA Adapter Scale(alpha/r): {str(lora_scale)}') + print(f'Using LoRA Dropout: {str(args.lora_dropout)}') + print(f'Using LoRA Bias: {str(args.bias)}') + print() print(f'Using Optimizer: {args.optimizer_type}') if 'adamw' in args.optimizer_type: print(f'Using Base Learning Rate: {str(args.base_learning_rate)}') print( f'Using Actual Learning Rate(Base Learning Rate * Batch Size): {str(args.base_learning_rate * args.batch_size)}') print(f'Learning Rate Scheduler Type: {str(args.lr_scheduler_type)}') - print(f'Using LoRA Dropout: {str(args.lora_dropout)}') + print(f'Using Warmup Ratio: {args.warmup_ratio}') print(f'Using Max Sequence Length: {args.max_seq_length}') + print(f'Using Group By Length: {args.group_by_length}') + print(f'Using Weight Decay: {args.weight_decay}') + print(f'Using Max Gradient Norm: {args.max_gradient_norm}') + print(f'Using Gradient Accumulation Steps: {args.gradient_accumulation_steps}') + print() print(f'Using Do Eval: {args.do_eval}') if args.do_eval is not None and args.do_eval: print(f'Using Eval Strategy: {tune_arguments.eval_strategy}') if tune_arguments.eval_strategy == 'steps': print(f'Using Eval Steps: {tune_arguments.eval_steps}') - print(f'Using Eval Dataset: {args.eval_dataset}') \ No newline at end of file + if args.eval_dataset is None: + print(f'Using Eval Dataset: {args.hf_training_dataset_id if args.hf_training_dataset_id is not None else args.training_data_file}') + else: + print(f'Using Eval Dataset: {args.eval_dataset }') + print() + if args.is_instruct_model: + print(f'Using NEFTune Noise Alpha: {args.neftune_noise_alpha}') + +