Skip to content

Commit

Permalink
Offloading Bug Fix (#58)
Browse files Browse the repository at this point in the history
* fix fstring

* fix offloaded sparsity calculation
  • Loading branch information
Sara Adkins authored Aug 6, 2024
1 parent 0a0a2de commit 066d1e4
Showing 1 changed file with 7 additions and 5 deletions.
12 changes: 7 additions & 5 deletions src/llmcompressor/transformers/compression/helpers.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
from typing import Dict, List, Optional, Union

from accelerate.accelerator import get_state_dict_offloaded_model
import psutil
import torch
from accelerate import infer_auto_device_map, init_empty_weights
Expand Down Expand Up @@ -73,10 +73,12 @@ def infer_sparsity_structure_from_model(model: torch.nn.Module) -> Optional[str]
structures = {"2:4"}
for sparsity_structure in structures:
linear_modules = get_linear_layers(model)
offloaded_params = get_state_dict_offloaded_model(model)

linear_modules_with_sparsity_structure = [
tensor_follows_mask_structure(layer.weight)
for layer in tqdm(
linear_modules.values(),
tensor_follows_mask_structure(offloaded_params[f"{name}.weight"])
for name in tqdm(
linear_modules.keys(),
desc="Checking whether model follows "
f"{sparsity_structure} sparsity structure",
)
Expand Down Expand Up @@ -199,7 +201,7 @@ def calculate_offload_device_map(
available_gpus = torch.cuda.device_count()
if available_gpus < num_gpus:
raise ValueError(
"Requested {num_gpus} GPUs but only {available_gpus} are available."
f"Requested {num_gpus} GPUs but only {available_gpus} are available."
)
max_gpu_memory = [max_gpu_memory] * num_gpus

Expand Down

0 comments on commit 066d1e4

Please sign in to comment.