-
Notifications
You must be signed in to change notification settings - Fork 28
/
Copy pathtrt_infer.py
99 lines (81 loc) · 3.04 KB
/
trt_infer.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
'''
Inference by means of TensorRT
'''
import numpy as np
import imageio
import pycuda.driver as cuda
import pycuda.autoinit # For automatic creation and cleanup of CUDA context
import tensorrt as trt
import utils
__author__ = "Dmitry Korobchenko ([email protected])"
### Settings
ENGINE_PATH = 'data/engine.plan' # ADJUST
CLASSES = ['Cat', 'Dog'] # ADJUST
CROP_SIZE = (224, 224) # ADJUST
INPUT_DATA_TYPE = np.float32 # ADJUST
MEASURE_TIME = True # ADJUST
CALC_VAL_ACCURACY = True # ADJUST
### Load TensorRT engine
trt_logger = trt.Logger(trt.Logger.INFO)
runtime = trt.Runtime(trt_logger)
with open(ENGINE_PATH, "rb") as f:
engine = runtime.deserialize_cuda_engine(f.read())
### Prepare TRT execution context, CUDA stream and necessary buffers
context = engine.create_execution_context()
stream = cuda.Stream()
host_in = cuda.pagelocked_empty(trt.volume(engine.get_binding_shape(0)), dtype=INPUT_DATA_TYPE)
host_out = cuda.pagelocked_empty(trt.volume(engine.get_binding_shape(1)), dtype=INPUT_DATA_TYPE)
devide_in = cuda.mem_alloc(host_in.nbytes)
devide_out = cuda.mem_alloc(host_out.nbytes)
### Load and prepare input image
def prepare_image(img_in, crop_size):
img = utils.resize_and_crop(img_in, crop_size)
img = img.astype(INPUT_DATA_TYPE)
img = img.transpose(2, 0, 1) # to CHW
return img
INPUT_IMAGE_PATH = 'img/cat.png' # ADJUST
img = imageio.imread(INPUT_IMAGE_PATH, pilmode='RGB')
img = prepare_image(img, CROP_SIZE)
### Run inference
def infer(img):
bindings = [int(devide_in), int(devide_out)]
np.copyto(host_in, img.ravel())
cuda.memcpy_htod_async(devide_in, host_in, stream)
context.execute_async(bindings=bindings, stream_handle=stream.handle)
cuda.memcpy_dtoh_async(host_out, devide_out, stream)
stream.synchronize()
return host_out
out = infer(img)
print('Input : {}'.format(INPUT_IMAGE_PATH))
print('Output: {}'.format(out))
print('Prediction: {}'.format(CLASSES[np.argmax(out)]))
### Measure execution time
if MEASURE_TIME:
import time
TIMEIT_N_SKIP = 10 # ADJUST
TIMEIT_N_RUN = 20 # ADJUST
imfer_time_arr = []
for _ in range(TIMEIT_N_SKIP):
out = infer(img)
for _ in range(TIMEIT_N_RUN):
time_start = time.time()
out = infer(img)
imfer_time_arr.append(time.time() - time_start)
print('Inference time: {:.3f} +- {:.3f} ms (Avg over {} runs, {} skipped)'.format(
np.mean(imfer_time_arr)*1000.,
np.std(imfer_time_arr)*1000.,
TIMEIT_N_RUN, TIMEIT_N_SKIP))
### Calculate ImageNet validation accuracy
if CALC_VAL_ACCURACY:
import data_provider
image_list, label_list = data_provider.prepare_sample_list(
'/imagenet/val/','/imagenet/val.txt', classes=[281, 239])
correct = 0
for img_fpath, label in zip(image_list, label_list):
img = imageio.imread(img_fpath, pilmode='RGB')
img = prepare_image(img, CROP_SIZE)
out = infer(img)
if np.argmax(out) == label:
correct += 1
accuracy = float(correct) / len(image_list)
print('ImageNet validation accuracy: {}'.format(accuracy))