Merge pull request #47 from rjojjr/create-windows-os-installer-script

Release v2.1.1
rjojjr · Sep 8, 2024 · 2516435 · 2516435
2 parents 6bdd6b9 + 5acd55c
commit 2516435
Show file tree

Hide file tree

Showing 11 changed files with 100 additions and 30 deletions.
diff --git a/README.md b/README.md
@@ -33,20 +33,26 @@ installed on the host. I would like to add CPU based tuning in the near future.
 
 #### Install the Torch Tuner CLI
 
-You can install the torch tuner CLI as a system-wide application on any Linux OS(and Mac OS)(Windows support coming soon[although this will probably work on WSL(Windows Subsystem for Linux), which you should probably be using anyway]) 
-with [this script](scripts/install-torch-tuner.sh) if you don't want to have to mess with python or the repository in general. After installation,
+You can install the torch tuner CLI as a system-wide application on any OS(including Windows 
+OS[although the linux script will probably work on WSL(Windows Subsystem for Linux), which you should probably be using anyway]) 
+with [this script](scripts/install-torch-tuner.sh)(or [this script for Windows OS](scripts/win/install-torch-tuner.bat)) 
+if you don't want to have to mess with python or the repository in general. After installation,
 you can run the CLI with the `torch-tuner` command.
 
-**NOTE** - You must run the script with the `sudo` command.
+**NOTE** - You must run the script with root/admin privileges.
 
-You can download the latest installer script from [Github](https://raw.githubusercontent.com/rjojjr/torch-tuner/master/scripts/install-torch-tuner.sh)
+You can download the latest installer script from [Github](https://github.com)
 and execute it with the following single command:
 
 ```shell
+# Linux(and WSL)
 wget -O - https://raw.githubusercontent.com/rjojjr/torch-tuner/master/scripts/install-torch-tuner.sh | sudo bash
+
+# Windows(non-WSL) (requires git & python3.11 already installed on target machine)
+curl -sSL https://raw.githubusercontent.com/rjojjr/torch-tuner/master/scripts/win/install-torch-tuner.bat -o install-torch-tuner.bat && install-torch-tuner.bat && del install-torch-tuner.bat
 ```
 
-**NOTE** - If the installer script fails with OS level python dependency errors, and you are using Debian Linux, 
+**NOTE** - If the Unix installer script fails with OS level python dependency errors, and you are using Debian Linux, 
 try running the script with the `--install-apt-deps` flag. Otherwise, install the missing OS packages(python3, pip and python3-venv)
 and run the torch-tuner CLI installer script again.
 
@@ -59,7 +65,11 @@ You can update the installed torch-tuner CLI instance at anytime by running the
 You can uninstall the torch-tuner CLI by running the uninstaller script:
 
 ```shell
-sudo bash /usr/local/torch-tuner/scripts/uninstall-torch-tuner.sh
+# Linux(and MacOS/WSL)
+sudo bash /usr/local/torch-tuner.bat/scripts/uninstall-torch-tuner.bat.sh
+
+# Windows 
+"%UserProfile%\.local\torch-tuner\scripts\win\uninstall-torch-tuner.bat"
 ```
 
 #### Merging your LoRA Adapter
@@ -135,7 +145,7 @@ python src/main/main.py \
   --lora-alpha 32
 
 # A Real Example with CLI Installed
-torch-tuner \
+torch-tuner.bat \
   --base-model meta-llama/Meta-Llama-3-8B-Instruct \
   --new-model llama-tuned \
   --training-data-dir /path/to/data \
@@ -163,7 +173,7 @@ python src/main/main.py \
   --serve-port 8080
 
 # When the Torch Tuner CLI is installed
-torch-tuner \
+torch-tuner.bat \
   --serve true \
   --serve-model llama-tuned \
   --serve-port 8080

diff --git a/ROADMAP.md b/ROADMAP.md
@@ -21,7 +21,7 @@ but in the meantime I will track work/needs/bugs/requests here.
 
 - Add production wrapper to LLM REST server
 - ~~Add ability to provide special/regular tokens to model vocabulary~~
-- Add Windows OS support
+- ~~Add Windows OS support~~
 - ~~Add support for non-llama models~~
   - This is mostly satisfied with the addition of the 'GENERIC' LLM type
   - ~~Mistral~~
@@ -43,4 +43,7 @@ but in the meantime I will track work/needs/bugs/requests here.
 - Add multi-gpu support
 - Add support for ignored OpenAI request properties
 - Add embeddings endpoint to serve mode
-- Add ability to serve models on CPU
+- Add ability to serve models on CPU
+- Add ability to use JSON configs instead of argument config
+- Add change serve model with API endpoint
+- Add ability to serve models with CPU
diff --git a/requirements.txt b/requirements.txt
@@ -1,4 +1,6 @@
 # TODO - Lock down dep. versions?
+numpy
+scipy
 torch
 torchvision
 torchaudio

diff --git a/scripts/win/install-torch-tuner.bat b/scripts/win/install-torch-tuner.bat
@@ -0,0 +1,31 @@
+@echo off
+echo "Installing Torch Tuner CLI"
+
+pushd .
+
+echo "Preparing installation directory"
+if exist "%UserProfile%\AppData\Local\torch-tuner" @RD /S /Q "%UserProfile%\AppData\Local\torch-tuner"
+cd "%UserProfile%\AppData\Local"
+echo "Getting latest CLI from github"
+git clone https://github.com/rjojjr/torch-tuner.git
+
+cd torch-tuner
+
+echo "Installing python dependencies"
+
+python3.11 -m venv .\.venv && call .\.venv\Scripts\activate.bat
+python3.11 -m pip install -I -r requirements.txt --extra-index-url https://pytorch-extension.intel.com/release-whl/stable/xpu/us
+call deactivate
+
+echo "Finalizing install"
+
+echo f | xcopy /f /y scripts\win\torch-tuner.bat C:\Windows\System32\torch-tuner.bat
+
+icacls "%UserProfile%\AppData\Local\torch-tuner" /grant Users:F
+attrib -s C:\Windows\System32\torch-tuner.bat
+
+popd
+
+echo "Torch Tuner CLI installed successfully!"
+echo "You can now access the Torch Tuner CLI with the 'torch-tuner' command."
+
diff --git a/scripts/win/torch-tuner.bat b/scripts/win/torch-tuner.bat
@@ -0,0 +1,5 @@
+@echo off
+call "%UserProfile%\AppData\Local\torch-tuner\.venv\Scripts\activate.bat"
+set args=%*
+python3.11 "%UserProfile%\AppData\Local\torch-tuner\src\main\main.py" %args%
+call deactivate
diff --git a/scripts/win/uninstall-torch-tuner.bat b/scripts/win/uninstall-torch-tuner.bat
@@ -0,0 +1,7 @@
+@echo off
+echo "Uninstalling Torch Tuner CLI"
+
+del /s /q "%UserProfile%\AppData\Local\torch-tuner"
+rmdir /s /q "%UserProfile%\AppData\Local\torch-tuner"
+del /q C:\Windows\System32\torch-tuner
+echo "Uninstalled Torch Tuner CLI"
diff --git a/src/main/arguments/arguments.py b/src/main/arguments/arguments.py
@@ -24,14 +24,15 @@ def validate(self) -> None:
 class LlmArguments(CliArguments):
     """Base LLM load parameters."""
 
-    def __init__(self, model: str, use_4bit: bool = False, use_8bit: bool = False, is_fp16: bool = False, is_bf16: bool = False, fp32_cpu_offload: bool = False, padding_side: str | None = 'right'):
+    def __init__(self, model: str, use_4bit: bool = False, use_8bit: bool = False, is_fp16: bool = False, is_bf16: bool = False, fp32_cpu_offload: bool = False, padding_side: str | None = 'right', huggingface_auth_token: str | None = None):
         self.model = model
         self.use_4bit = use_4bit
         self.use_8bit = use_8bit
         self.is_fp16 = is_fp16
         self.is_bf16 = is_bf16
         self.fp32_cpu_offload = fp32_cpu_offload
         self.padding_side = padding_side
+        self.huggingface_auth_token = huggingface_auth_token
 
     def validate(self) -> None:
         if self.use_4bit and self.use_8bit:
@@ -49,7 +50,7 @@ class TunerFunctionArguments(CliArguments):
 
     def __init__(self, new_model: str, is_fp16: bool = False, is_bf16: bool = False, use_4bit: bool = False, use_8bit: bool = False,
                  fp32_cpu_offload: bool = False, is_chat_model: bool = True,
-                 padding_side: str | None = 'right', use_agent_tokens: bool = False, additional_vocabulary_tokens: list | None = None):
+                 padding_side: str | None = 'right', use_agent_tokens: bool = False, additional_vocabulary_tokens: list | None = None, huggingface_auth_token: str | None = None):
         self.new_model = new_model
         self.is_fp16 = is_fp16
         self.is_bf16 = is_bf16
@@ -60,6 +61,7 @@ def __init__(self, new_model: str, is_fp16: bool = False, is_bf16: bool = False,
         self.padding_side = padding_side
         self.use_agent_tokens = use_agent_tokens
         self.additional_vocabulary_tokens = additional_vocabulary_tokens
+        self.huggingface_auth_token = huggingface_auth_token
 
     def validate(self) -> None:
         if self.use_4bit and self.use_8bit:
@@ -138,8 +140,9 @@ def __init__(self,
                  hf_training_dataset_id: str | None = None,
                  max_seq_length: int | None = None,
                  overwrite_output: bool = True,
-                 neftune_noise_alpha: float = 5.0):
-        super(TuneArguments, self).__init__(new_model, is_fp16, is_bf16, use_4bit, use_8bit, fp32_cpu_offload, is_chat_model, padding_side, use_agent_tokens, additional_vocabulary_tokens)
+                 neftune_noise_alpha: float = 5.0,
+                 huggingface_auth_token: str | None = None):
+        super(TuneArguments, self).__init__(new_model, is_fp16, is_bf16, use_4bit, use_8bit, fp32_cpu_offload, is_chat_model, padding_side, use_agent_tokens, additional_vocabulary_tokens, huggingface_auth_token)
         self.r = r
         self.alpha = alpha
         self.epochs = epochs
@@ -224,8 +227,9 @@ def __init__(self,
                  padding_side: str | None = 'right',
                  use_agent_tokens: bool = False,
                  additional_vocabulary_tokens: list | None = None,
-                 overwrite_output: bool = True):
-        super(MergeArguments, self).__init__(new_model, is_fp16, is_bf16, use_4bit, use_8bit, is_chat_model=is_chat_model, padding_side=padding_side, use_agent_tokens=use_agent_tokens, additional_vocabulary_tokens=additional_vocabulary_tokens)
+                 overwrite_output: bool = True,
+                 huggingface_auth_token: str | None = None):
+        super(MergeArguments, self).__init__(new_model, is_fp16, is_bf16, use_4bit, use_8bit, is_chat_model=is_chat_model, padding_side=padding_side, use_agent_tokens=use_agent_tokens, additional_vocabulary_tokens=additional_vocabulary_tokens, huggingface_auth_token=huggingface_auth_token)
         self.base_model = base_model
         self.output_dir = output_dir
         self.overwrite_output = overwrite_output
@@ -257,8 +261,9 @@ def __init__(self,
                  is_chat_model: bool = True,
                  padding_side: str | None = 'right',
                  use_agent_tokens: bool = False,
-                 additional_vocabulary_tokens: list | None = None):
-        super(PushArguments, self).__init__(new_model, is_fp16, is_bf16, use_4bit, use_8bit, is_chat_model=is_chat_model, padding_side=padding_side, use_agent_tokens=use_agent_tokens, additional_vocabulary_tokens=additional_vocabulary_tokens)
+                 additional_vocabulary_tokens: list | None = None,
+                 huggingface_auth_token: str | None = None):
+        super(PushArguments, self).__init__(new_model, is_fp16, is_bf16, use_4bit, use_8bit, is_chat_model=is_chat_model, padding_side=padding_side, use_agent_tokens=use_agent_tokens, additional_vocabulary_tokens=additional_vocabulary_tokens, huggingface_auth_token=huggingface_auth_token)
         self.model_dir = model_dir
         self.public_push = public_push
 

diff --git a/src/main/base/llm_base_module.py b/src/main/base/llm_base_module.py
@@ -99,6 +99,7 @@ def fine_tune_base(arguments: TuneArguments, tokenizer, base_model) -> None:
 
     # TODO - FIXME - There is a warning from checkpointing I believe is do to underlying torch impl.
     if os.path.exists(output_dir) and not arguments.no_checkpoint:
+        print('Loading checkpoint')
         model.gradient_checkpointing_enable()
         last_checkpoint = get_last_checkpoint(output_dir)
         train.train(resume_from_checkpoint=last_checkpoint)

diff --git a/src/main/hf/hf_auth.py b/src/main/hf/hf_auth.py
@@ -3,10 +3,10 @@
 from exception.exceptions import HuggingfaceAuthException
 
 
-def authenticate_with_hf() -> None:
-    """Authenticate with Huggingface using `HUGGING_FACE_TOKEN` environment variable."""
+def authenticate_with_hf(auth_token: str | None = None) -> None:
+    """Authenticate with Huggingface"""
     print('Authenticating with Huggingface')
     try:
-        login(os.environ.get('HUGGING_FACE_TOKEN'))
+        login(os.environ.get('HUGGING_FACE_TOKEN') if auth_token is None else auth_token)
     except Exception as e:
         raise HuggingfaceAuthException(f'error authenticating with huggingface: {str(e)}')
diff --git a/src/main/main.py b/src/main/main.py
@@ -9,7 +9,7 @@
 import os
 
 # TODO - Automate this
-version = '2.1.0'
+version = '2.1.1'
 
 title = f'Torch-Tuner CLI v{version}'
 description = 'This app is a simple CLI to automate the Supervised Fine-Tuning(SFT)(and testing of) of AI Large Language Model(LLM)s with simple text and jsonl on Nvidia GPUs(and Intel/AMD CPUs) using LoRA, Torch and Transformers.'
@@ -44,6 +44,8 @@ def main() -> None:
         print(f'Using fp32 CPU Offload: {str(args.fp32_cpu_offload)}')
         print()
         print(f"Serving {args.serve_model} on port {args.serve_port}")
+
+        authenticate_with_hf(args.huggingface_auth_token)
         model_path = os.path.expanduser(f"{args.output_directory}{os.sep}{args.serve_model}" if (not '/' in args.serve_model and not os.sep in args.serve_model) else args.serve_model)
         llm_factory_args = LlmExecutorFactoryArguments(model=model_path, use_4bit=args.use_4bit, use_8bit=args.use_8bit, is_fp16=args.use_fp_16, is_bf16=args.use_bf_16, padding_side=args.padding_side)
         llm_executor_factory = build_llm_executor_factory(llm_factory_args)
@@ -63,8 +65,6 @@ def main() -> None:
     merge_arguments = build_and_validate_merge_args(args)
     push_arguments = build_and_validate_push_args(args, model_dir)
 
-    authenticate_with_hf()
-
     print('')
     print(f'Using LLM Type: {tuner.llm_type}')
 

diff --git a/src/main/utils/argument_utils.py b/src/main/utils/argument_utils.py
@@ -18,7 +18,8 @@ def build_and_validate_push_args(prog_args, model_dir: str):
             public_push=prog_args.public_push,
             padding_side=prog_args.padding_side,
             use_agent_tokens=prog_args.use_agent_tokens,
-            additional_vocabulary_tokens=prog_args.additional_vocabulary_tokens
+            additional_vocabulary_tokens=prog_args.additional_vocabulary_tokens,
+            huggingface_auth_token=prog_args.huggingface_auth_token
         )
         push_arguments.validate()
         return push_arguments
@@ -39,12 +40,13 @@ def build_and_validate_merge_args(prog_args) -> MergeArguments:
             use_8bit=prog_args.use_8bit,
             is_bf16=prog_args.use_bf_16,
             is_fp16=prog_args.use_fp_16,
-            output_dir=os.path.expanduser(prog_args.output_directory),
+            output_dir=os.path.expanduser(prog_args.output_directory) if prog_args.output_directory is not None else None,
             padding_side=prog_args.padding_side,
             use_agent_tokens=prog_args.use_agent_tokens,
             additional_vocabulary_tokens=prog_args.additional_vocabulary_tokens,
             is_chat_model=prog_args.is_chat_model or (prog_args.training_data_file is not None and prog_args.training_data_file.endswith(".jsonl")),
-            overwrite_output=prog_args.overwrite_output
+            overwrite_output=prog_args.overwrite_output,
+            huggingface_auth_token=prog_args.huggingface_auth_token
         )
         merge_arguments.validate()
         return merge_arguments
@@ -58,7 +60,7 @@ def build_and_validate_tune_args(prog_args) -> TuneArguments:
         tune_arguments = TuneArguments(
             base_model=prog_args.base_model,
             new_model=prog_args.new_model,
-            training_data_dir=os.path.expanduser(prog_args.training_data_dir),
+            training_data_dir=os.path.expanduser(prog_args.training_data_dir) if prog_args.training_data_dir is not None else None,
             train_file=prog_args.training_data_file,
             r=prog_args.lora_r,
             alpha=prog_args.lora_alpha,
@@ -98,7 +100,8 @@ def build_and_validate_tune_args(prog_args) -> TuneArguments:
             hf_training_dataset_id=prog_args.hf_training_dataset_id,
             max_seq_length=prog_args.max_seq_length,
             overwrite_output=prog_args.overwrite_output,
-            neftune_noise_alpha=prog_args.neftune_noise_alpha
+            neftune_noise_alpha=prog_args.neftune_noise_alpha,
+            huggingface_auth_token=prog_args.huggingface_auth_token
         )
         tune_arguments.validate()
         return tune_arguments
@@ -119,9 +122,11 @@ def do_initial_arg_validation(args):
         raise ArgumentValidationException("'merge-only' cannot be used when both 'merge' and 'push' are set to 'false'")
     if args.fine_tune and args.epochs <= 0:
         raise ArgumentValidationException("cannot tune when epochs is set to <= 0")
-    if args.fine_tune and (args.hf_training_dataset_id is None) and (not os.path.exists(args.training_data_dir) or not os.path.exists(
+    if args.fine_tune and (args.hf_training_dataset_id is None) and (args.training_data_dir is None or args.training_data_file is None or not os.path.exists(args.training_data_dir) or not os.path.exists(
             f'{args.training_data_dir}/{args.training_data_file}')):
         raise ArgumentValidationException('training data directory or file not found')
+    if args.new_model is None:
+        raise ArgumentValidationException("'--new-mode' CLI argument must be provided")
 
 
 def parse_arguments(title: str, description: str):
@@ -176,6 +181,7 @@ def _build_program_argument_parser(title: str, description: str) -> ArgumentPars
     parser.add_argument('-tm', '--target-modules', help="Modules to target(CSV List: 'q,k')(OVERRIDES '--target-all-modules' when not None)(default: None)", type=lambda x: _parse_nullable_list_arg(x), default="None")
     parser.add_argument('-tecs', '--torch-empty-cache-steps', help="Empty torch cache after x steps(NEVER empties cache when set to None)(USEFUL to prevent OOM issues)(default: 1)", type=lambda x: _parse_nullable_int_arg(x), default="1")
     parser.add_argument('-cft', '--cpu-only-tuning', default="false", help="Run a fine-tune job on CPU ONLY(default: false)", type=lambda x: _parse_bool_arg(x))
+    parser.add_argument('-hfat', '--huggingface-auth-token', default="false", help="Huggingface auth token(default: None)", type=lambda x: _parse_nullable_arg(x))
 
     parser.add_argument('-ft', '--fine-tune', default="true", help="Run a fine-tune job(default: true)", type=lambda x: _parse_bool_arg(x))
     parser.add_argument('-m', '--merge', default="true",