-
Notifications
You must be signed in to change notification settings - Fork 129
/
Copy pathquantize_torch_model.py
executable file
·197 lines (173 loc) · 7.43 KB
/
quantize_torch_model.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
import os
import subprocess
from typing import Iterable, List, Tuple
import torch
import torchvision
from datasets.imagenet_util import (
evaluate_ppq_module_with_imagenet,
load_imagenet_from_directory,
)
from ppq import QuantizationSettingFactory, QuantizationSetting
from ppq.api import espdl_quantize_torch, get_target_platform
from torch.utils.data import DataLoader
import torchvision.datasets as datasets
import torchvision.transforms as transforms
from torch.utils.data.dataset import Subset
from torchvision.models.mobilenetv2 import MobileNet_V2_Weights
import torch.nn as nn
import urllib.request
import zipfile
def convert_relu6_to_relu(model):
for child_name, child in model.named_children():
if isinstance(child, nn.ReLU6):
setattr(model, child_name, nn.ReLU())
else:
convert_relu6_to_relu(child)
return model
def quant_setting_mobilenet_v2(
model: nn.Module,
optim_quant_method: List[str] = None,
) -> Tuple[QuantizationSetting, nn.Module]:
"""Quantize torch model with optim_quant_method.
Args:
optim_quant_method (List[str]): support 'MixedPrecision_quantization', 'LayerwiseEqualization_quantization'
-'MixedPrecision_quantization': if some layers in model have larger errors in 8-bit quantization, dispathching
the layers to 16-bit quantization. You can remove or add layers according to your
needs.
-'LayerwiseEqualization_quantization': using weight equalization strategy, which is proposed by Markus Nagel.
Refer to paper https://openaccess.thecvf.com/content_ICCV_2019/papers/Nagel_Data-Free_Quantization_Through_Weight_Equalization_and_Bias_Correction_ICCV_2019_paper.pdf for more information.
Since ReLU6 exists in MobilenetV2, convert ReLU6 to ReLU for better precision.
Returns:
[tuple]: [QuantizationSetting, nn.Module]
"""
quant_setting = QuantizationSettingFactory.espdl_setting()
if optim_quant_method is not None:
if "MixedPrecision_quantization" in optim_quant_method:
# These layers have larger errors in 8-bit quantization, dispatching to 16-bit quantization.
# You can remove or add layers according to your needs.
quant_setting.dispatching_table.append(
"/features/features.1/conv/conv.0/conv.0.0/Conv",
get_target_platform(TARGET, 16),
)
quant_setting.dispatching_table.append(
"/features/features.1/conv/conv.0/conv.0.2/Clip",
get_target_platform(TARGET, 16),
)
elif "LayerwiseEqualization_quantization" in optim_quant_method:
# layerwise equalization
quant_setting.equalization = True
quant_setting.equalization_setting.iterations = 4
quant_setting.equalization_setting.value_threshold = 0.4
quant_setting.equalization_setting.opt_level = 2
quant_setting.equalization_setting.interested_layers = None
# replace ReLU6 with ReLU
model = convert_relu6_to_relu(model)
else:
raise ValueError(
"Please set optim_quant_method correctly. Support 'MixedPrecision_quantization', 'LayerwiseEqualization_quantization'"
)
return quant_setting, model
def collate_fn1(x: Tuple) -> torch.Tensor:
return torch.cat([sample[0].unsqueeze(0) for sample in x], dim=0)
def collate_fn2(batch: torch.Tensor) -> torch.Tensor:
return batch.to(DEVICE)
def report_hook(blocknum, blocksize, total):
downloaded = blocknum * blocksize
percent = downloaded / total * 100
print(f"\rDownloading calibration dataset: {percent:.2f}%", end="")
if __name__ == "__main__":
BATCH_SIZE = 32
INPUT_SHAPE = [3, 224, 224]
DEVICE = "cpu" # 'cuda' or 'cpu', if you use cuda, please make sure that cuda is available
TARGET = "esp32p4"
NUM_OF_BITS = 8
ESPDL_MODLE_PATH = "models/torch/mobilenet_v2.espdl"
CALIB_DIR = "./imagenet"
# Download mobilenet_v2 from torchvision and dataset
model = torchvision.models.mobilenet.mobilenet_v2(
weights=MobileNet_V2_Weights.IMAGENET1K_V1
)
model = model.to(DEVICE)
imagenet_url = "https://dl.espressif.com/public/imagenet_calib.zip"
os.makedirs(CALIB_DIR, exist_ok=True)
if not os.path.exists("imagenet_calib.zip"):
urllib.request.urlretrieve(
imagenet_url, "imagenet_calib.zip", reporthook=report_hook
)
if not os.path.exists(os.path.join(CALIB_DIR, "calib")):
with zipfile.ZipFile("imagenet_calib.zip", "r") as zip_file:
zip_file.extractall(CALIB_DIR)
CALIB_DIR = os.path.join(CALIB_DIR, "calib")
# -------------------------------------------
# Prepare Calibration Dataset
# --------------------------------------------
if os.path.exists(CALIB_DIR):
print(f"load imagenet calibration dataset from directory: {CALIB_DIR}")
dataset = datasets.ImageFolder(
CALIB_DIR,
transforms.Compose(
[
transforms.Resize(256),
transforms.CenterCrop(224),
transforms.ToTensor(),
transforms.Normalize(
mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]
),
]
),
)
dataset = Subset(dataset, indices=[_ for _ in range(0, 1024)])
dataloader = DataLoader(
dataset=dataset,
batch_size=BATCH_SIZE,
shuffle=False,
num_workers=4,
pin_memory=False,
collate_fn=collate_fn1,
)
else:
# Random calibration dataset only for debug
print("load random calibration dataset")
def load_random_calibration_dataset() -> Iterable:
return [torch.rand(size=INPUT_SHAPE) for _ in range(BATCH_SIZE)]
# Load training data for creating a calibration dataloader.
dataloader = DataLoader(
dataset=load_random_calibration_dataset(),
batch_size=BATCH_SIZE,
shuffle=False,
)
# -------------------------------------------
# Quantize Torch Model.
# --------------------------------------------
# create a setting for quantizing your network with ESPDL.
# if you don't need to optimize quantization, set the input 1 of the quant_setting_mobilenet_v2 function None
# Example: Using LayerwiseEqualization_quantization
quant_setting, model = quant_setting_mobilenet_v2(
model, ["LayerwiseEqualization_quantization"]
)
quant_ppq_graph = espdl_quantize_torch(
model=model,
espdl_export_file=ESPDL_MODLE_PATH,
calib_dataloader=dataloader,
calib_steps=32,
input_shape=[1] + INPUT_SHAPE,
target=TARGET,
num_of_bits=NUM_OF_BITS,
collate_fn=collate_fn2,
setting=quant_setting,
device=DEVICE,
error_report=True,
skip_export=False,
export_test_values=False,
verbose=1,
)
# -------------------------------------------
# Evaluate Quantized Model.
# --------------------------------------------
evaluate_ppq_module_with_imagenet(
model=quant_ppq_graph,
imagenet_validation_dir=CALIB_DIR,
batchsize=BATCH_SIZE,
device=DEVICE,
verbose=1,
)