Skip to content

Commit

Permalink
Merge branch 'main' into rigl
Browse files Browse the repository at this point in the history
  • Loading branch information
ohaijen authored Nov 20, 2023
2 parents 5b22a49 + 1011fbc commit af19b0f
Show file tree
Hide file tree
Showing 14 changed files with 143 additions and 25 deletions.
7 changes: 7 additions & 0 deletions docker/Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -65,6 +65,8 @@ ENV PATH="${VENV}/bin:$PATH"
ENV PIP_DEFAULT_TIMEOUT=200
ARG VERSION
ARG MODE=""
ARG BRANCH

RUN \
if [ -n "$BRANCH" ] ; then \
echo Installing from BRANCH && \
Expand Down Expand Up @@ -92,6 +94,8 @@ ENV PATH="${VENV}/bin:$PATH"
ENV PIP_DEFAULT_TIMEOUT=200
ARG VERSION
ARG MODE
ARG BRANCH

RUN \
if [ -n "$BRANCH" ] ; then \
echo Installing from BRANCH && \
Expand All @@ -115,6 +119,8 @@ ENV PATH="${VENV}/bin:$PATH"
ENV PIP_DEFAULT_TIMEOUT=200
ARG VERSION
ARG MODE
ARG BRANCH

RUN \
if [ -n "$BRANCH" ] ; then \
echo Installing from BRANCH with editable mode && \
Expand All @@ -141,5 +147,6 @@ ARG VENV
COPY --from=build $VENV $VENV
ENV PATH="${VENV}/bin:$PATH"
HEALTHCHECK CMD python -c 'import sparseml'
RUN pip list | grep sparseml
CMD bash

14 changes: 7 additions & 7 deletions setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -63,17 +63,17 @@

_onnxruntime_deps = ["onnxruntime>=1.0.0"]
_clip_deps = ["open_clip_torch==2.20.0"]
supported_torch_version = "torch>=1.7.0,<=2.0"
supported_torch_version = "torch>=1.7.0,<2.2"
_pytorch_deps = [
supported_torch_version,
"gputils",
]
_pytorch_all_deps = _pytorch_deps + [
"torchvision>=0.3.0,<=0.15.1",
"torchvision>=0.3.0,<0.17",
"torchaudio<=2.0.1",
]
_pytorch_vision_deps = _pytorch_deps + [
"torchvision>=0.3.0,<=0.15.1",
"torchvision>=0.3.0,<0.17",
"opencv-python<=4.6.0.66",
]
_transformers_deps = _pytorch_deps + [
Expand Down Expand Up @@ -103,9 +103,9 @@
"black==22.12.0",
"flake8==3.9.2",
"isort==5.8.0",
"m2r2~=0.2.7",
"m2r2>=0.2.7",
"mistune<3,>=2.0.3",
"myst-parser~=0.14.0",
"myst-parser>=0.14.0",
"rinohtype~=0.4.2",
"sphinx~=3.5.0",
"sphinx-copybutton~=0.3.0",
Expand All @@ -114,8 +114,8 @@
"sphinx-pydantic~=0.1.0",
"sphinx-rtd-theme~=0.5.0",
"wheel>=0.36.2",
"pytest~=6.2.0",
"pytest-mock~=3.6.0",
"pytest>=6.0.0",
"pytest-mock>=3.6.0",
"flaky~=3.7.0",
"sphinx-rtd-theme",
"docutils<0.17",
Expand Down
8 changes: 1 addition & 7 deletions src/sparseml/modifiers/obcq/pytorch.py
Original file line number Diff line number Diff line change
Expand Up @@ -46,7 +46,6 @@ class SparseGPTModifierPyTorch(SparseGPTModifier):

model: Any = None
device_: str = "cuda:0"
finalization_kwargs_: Optional[Dict] = None
layer_prefix_: Optional[str] = None

def on_initialize(self, state: "State", **kwargs) -> bool:
Expand All @@ -61,14 +60,12 @@ def on_initialize(self, state: "State", **kwargs) -> bool:
self.on_initialize_structure(state, **kwargs)
if self.quantization_modifier_:
self.quantization_modifier_.initialize(state, **kwargs)
self.finalization_kwargs_ = {}
modifiable_model = state.model
calibration_dataloader = state.data.calib
device = state.hardware.device

self.initialize_obcq(modifiable_model, device)
extras = self.apply_obcq(calibration_dataloader)
self.finalization_kwargs_.update(extras)
self.apply_obcq(calibration_dataloader)

return True

Expand Down Expand Up @@ -99,7 +96,6 @@ def apply_obcq(
Run OBCQ on the loaded model, using dataloader as calibration data
:param dataloader: calibration data for OBCQ
:return: compression outputs used for finalization
"""
accum_kwargs = {"dataloader": dataloader}

Expand Down Expand Up @@ -147,8 +143,6 @@ def apply_obcq(
layer_kwargs = layer_compressor.compress(dev=self.device_, **accum_kwargs)
accum_kwargs.update(layer_kwargs)

return extras

def on_finalize(self, state: "State", **kwargs) -> bool:
"""
disable the observers used by the OBCQ algorithm and set kv-cache configuration
Expand Down
3 changes: 2 additions & 1 deletion src/sparseml/modifiers/obcq/utils/sparsegpt.py
Original file line number Diff line number Diff line change
Expand Up @@ -199,7 +199,8 @@ def fasterprune(
_LOGGER.debug(torch.sum((self.layer(self._inp1) - self.out1) ** 2))
_LOGGER.debug(torch.sum(Losses))

torch.cuda.synchronize()
if torch.cuda.is_available():
torch.cuda.synchronize()
_LOGGER.info("time %.2f" % (time.time() - tick))
_LOGGER.info("error %.2f" % torch.sum(Losses).item())

Expand Down
1 change: 1 addition & 0 deletions src/sparseml/modifiers/smoothquant/pytorch.py
Original file line number Diff line number Diff line change
Expand Up @@ -127,6 +127,7 @@ def _calibrate(self, model: ModifiableModelPyTorch, calibration_dataloader: List
calibration_dataloader,
self.num_calibration_steps,
self.calibration_function,
self.device_,
)

# remove the hooks now that we are done calibrating
Expand Down
2 changes: 1 addition & 1 deletion src/sparseml/pytorch/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -49,7 +49,7 @@


_TORCH_MIN_VERSION = "1.0.0"
_TORCH_MAX_VERSION = os.environ.get("MAX_TORCH", "2.0.100")
_TORCH_MAX_VERSION = os.environ.get("MAX_TORCH", "2.1.10")


def check_torch_install(
Expand Down
1 change: 1 addition & 0 deletions src/sparseml/transformers/data/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@
# flake8: noqa
from .base_llm import TransformersDataset
from .c4 import *
from .evolcodealpaca import *
from .gsm8k import *
from .open_platypus import *
from .ptb import *
Expand Down
56 changes: 56 additions & 0 deletions src/sparseml/transformers/data/evolcodealpaca.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,56 @@
# Copyright (c) 2021 - present / Neuralmagic, Inc. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

from torch.nn import Module

from sparseml.transformers.data.base_llm import TransformersDataset


@TransformersDataset.register(name="evolcodealpaca")
class EvolCodeAlpaca(TransformersDataset):
def __init__(
self,
model: Module,
seqlen: int,
nsamples: int,
seed: int = 0,
split: str = "train",
split_percent_to_use: float = 1.0,
):
super().__init__(
model=model,
seqlen=seqlen,
nsamples=nsamples,
path="theblackcat102/evol-codealpaca-v1",
name=None,
seed=seed,
split=split,
use_max_tokens=False,
split_percent_to_use=split_percent_to_use,
)

processed_data = []
for sample in self._data:
processed_sample = (
"Below is an instruction that describes a "
"programming task. Write a program that appropriately "
"completes the request.\n\n### Instruction:\n{instruction}"
"\n\n### Response:\n"
).format(instruction=sample["instruction"])

if "output" in sample:
processed_sample += sample["output"]
processed_data.append(processed_sample)

self.create_dataloader(processed_data)
41 changes: 40 additions & 1 deletion src/sparseml/transformers/sparsification/obcq/obcq.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@
from pathlib import Path
from typing import Optional

import torch
from torch.nn import Module
from transformers import AutoConfig

Expand All @@ -38,6 +39,7 @@
_LOGGER = logging.getLogger(__name__)
SUPPORTED_DATASETS = TransformersDataset.registered_names()
SUPPORTED_MODELS = ["opt", "llama", "mistral"]
SUPPORTED_PRECISION = ["auto", "half", "full", "float16", "bfloat16", "float32"]


def one_shot(
Expand All @@ -47,6 +49,7 @@ def one_shot(
device: str = "cuda:0",
deploy_dir: Optional[str] = ".",
recipe_file: Optional[str] = None,
precision: str = "auto",
eval_data: Optional[str] = None,
do_save: Optional[bool] = False,
) -> Module:
Expand All @@ -59,6 +62,7 @@ def one_shot(
:param device: Device (cuda:index or cpu) to use for computation
:param deploy_dir: The output directory to save the model to
:param recipe_file: recipe containing SparseGPT configuration
:param precision: precision to load model as, either auto, half or full
:param eval_data: dataset to use for perplexity evalaution, or none to skip
:param do_save: whether to save the output model to disk
Expand All @@ -71,6 +75,10 @@ def one_shot(
if deploy_dir.exists():
raise RuntimeError(f"deploy_dir={deploy_dir} already exists")

# fallback to cpu if cuda not available
device = _fallback_to_cpu(device)
_LOGGER.info(f"Running one_shot on device {device}")

# Load the configuration from the model path
config = AutoConfig.from_pretrained(model_path)
model_type = config.model_type.lower()
Expand All @@ -88,7 +96,8 @@ def one_shot(
forward_fn = llama_forward
else:
raise ValueError(f"model_path={model_path} should be one of {SUPPORTED_MODELS}")
model = model_loader_fn(model_path)
torch_dtype = _parse_dtype(precision)
model = model_loader_fn(model_path, torch_dtype=torch_dtype)

if dataset_name not in SUPPORTED_DATASETS:
raise ValueError(
Expand Down Expand Up @@ -137,6 +146,18 @@ def one_shot(
return model


def _parse_dtype(dtype_arg):
dtype = "auto" # get precision from model by default
if dtype_arg == "half" or dtype_arg == "float16":
dtype = torch.float16
elif dtype_arg == "bfloat16":
dtype = torch.bfloat16
elif dtype_arg == "full" or dtype_arg == "float32":
dtype = torch.float32

return dtype


def _save(model, tokenizer, save_path, recipe_path):
model.save_pretrained(save_path)
tokenizer.save_pretrained(save_path)
Expand All @@ -147,6 +168,16 @@ def _save(model, tokenizer, save_path, recipe_path):
fp.write(load_recipe_yaml_str(recipe_path))


def _fallback_to_cpu(device):
if "cuda" in device and not torch.cuda.is_available():
_LOGGER.warning(
f"Requested {device} but CUDA is not available, falling back to CPU"
)
return "cpu"

return device


if __name__ == "__main__":
parser = argparse.ArgumentParser()

Expand All @@ -163,6 +194,13 @@ def _save(model, tokenizer, save_path, recipe_path):
parser.add_argument("--device", type=str, default="cuda:0")
parser.add_argument("--deploy-dir", type=str, default=".")
parser.add_argument("--recipe", type=str, default=None)
parser.add_argument(
"--precision",
type=str,
choices=SUPPORTED_PRECISION,
default="auto",
help="Precision to cast model weights to, default to auto",
)
parser.add_argument(
"--eval", type=str, default=None, help="Optional dataset for perplexity eval"
)
Expand All @@ -179,6 +217,7 @@ def _save(model, tokenizer, save_path, recipe_path):
num_samples=args.nsamples,
device=args.device,
recipe_file=args.recipe,
precision=args.precision,
eval_data=args.eval,
do_save=args.save,
)
Original file line number Diff line number Diff line change
Expand Up @@ -42,6 +42,7 @@ def opt_forward(model: Module, data_loader: List, device: str, nsamples: int = N
dataloader=data_loader,
device=device,
nsamples=nsamples,
target_ids=["attention_mask"],
layer_prefix="decoder",
)
buffer = [b[0] for b in cached_inputs.pop("inputs")]
Expand Down Expand Up @@ -95,6 +96,7 @@ def llama_forward(model: Module, data_loader: List, device: str, nsamples: int =
dataloader=data_loader,
device=device,
nsamples=nsamples,
target_ids=["attention_mask", "position_ids"],
layer_prefix=None,
)
buffer = [b[0] for b in cached_inputs.pop("inputs")]
Expand Down
Loading

0 comments on commit af19b0f

Please sign in to comment.