-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathpost_training_pruning.py
38 lines (30 loc) · 1.35 KB
/
post_training_pruning.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
"""
This module quantizes a PyTorch model using post-training quantization and pruning.
"""
import copy
from os.path import isfile
import torch
from validation import validate
from model.resnet import get_model
from quantization.post_training import quantize_static_fx
from prune import make_sparse
model = get_model(num_classes=10)
checkpoint = "weights/original_model.pt"
model.load_state_dict(torch.load(checkpoint))
model_orig = copy.deepcopy(model)
checkpoint_quantized_prune = "weights/quantized_prune_model.pt"
if isfile(checkpoint_quantized_prune):
model_quantized_prune = torch.jit.load(checkpoint_quantized_prune)
else:
make_sparse(model)
model_quantized_prune = quantize_static_fx(model)
traced = torch.jit.trace(model_quantized_prune, torch.rand((1, 3, 224, 224)))
torch.jit.save(traced, checkpoint_quantized_prune)
# validate models
device = torch.device("cpu")
accuracy, loss, inference_time = validate(model_orig, device, n_total=100)
accuracy_quantized, loss_quantized, inference_time_quantized = validate(model_quantized_prune, device, n_total=100)
# print the results
print(f"Original model accuracy: {accuracy:.4f}, loss: {loss:.4f}, inference time: {inference_time:.2f}ms")
print(f"Quantized static model accuracy: {accuracy_quantized:.2f}, loss: {loss_quantized:.2f}, "
f"inference time: {inference_time_quantized:.2f}ms")