Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

ONNX support #42

Open
wants to merge 4 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
26 changes: 5 additions & 21 deletions profiling.py
Original file line number Diff line number Diff line change
@@ -1,36 +1,20 @@
import yaml
import argparse
import torch
import yaml

from utils.all_utils_landec import build_lane_detection_model as build_lane_model
from utils.all_utils_semseg import build_segmentation_model, load_checkpoint
from tools.profiling_utils import init_lane, init_seg, speed_evaluate_real, speed_evaluate_simple, model_profile
import torch
from tools.onnx_utils import add_basic_arguments

if __name__ == '__main__':
# Settings
parser = argparse.ArgumentParser(description='PyTorch Auto-drive')
parser.add_argument('--height', type=int, default=288,
help='Image input height (default: 288)')
parser.add_argument('--width', type=int, default=800,
help='Image input width (default: 800)')
parser.add_argument('--dataset', type=str, default='tusimple',
help='Profile on TuSimple (tusimple) / CULane (culane) (default: tusimple)')
parser.add_argument('--method', type=str, default='baseline',
help='method selection (lstr/scnn/resa/sad/baseline) (default: baseline)')
parser.add_argument('--backbone', type=str, default='erfnet',
help='backbone selection (erfnet/enet/vgg16/resnet18s/resnet18/resnet34/resnet50/resnet101)'
'(default: erfnet)')
parser.add_argument('--task', type=str, default='lane',
help='task selection (lane/seg)')
add_basic_arguments(parser)
parser.add_argument('--mode', type=str, default='simple',
help='Profiling mode (simple/real)')
parser.add_argument('--model', type=str, default='deeplabv3',
help='Model selection (fcn/erfnet/deeplabv2/deeplabv3/enet) (default: deeplabv3)')
parser.add_argument('--times', type=int, default=1,
help='Select test times')
parser.add_argument('--encoder-only', action='store_true', default=False,
help='Only train the encoder. ENet trains encoder and decoder separately (default: False)')
parser.add_argument('--continue-from', type=str, default=None,
help='Continue training from a previous checkpoint')
args = parser.parse_args()
lane_need_interpolate = ['baseline', 'scnn', 'sad', 'resa']
seg_need_interpolate = ['fcn', 'deeplabv2', 'deeplabv3']
Expand Down
96 changes: 96 additions & 0 deletions to_onnx.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,96 @@
# Convert only the pt model part

import argparse
import torch
import yaml
import fcntl

from utils.all_utils_landec import build_lane_detection_model as build_lane_model
from utils.all_utils_landec import init, test_one_set, fast_evaluate
from utils.all_utils_semseg import build_segmentation_model, load_checkpoint
from tools.onnx_utils import add_basic_arguments, pt_to_onnx, test_conversion, MINIMAL_OPSET_VERSIONS, get_ort_session


if __name__ == '__main__':
# Settings
parser = argparse.ArgumentParser(description='PyTorch Auto-drive')
add_basic_arguments(parser)
args = parser.parse_args()
with open('configs.yaml', 'r') as f: # Safer and cleaner than box/EasyDict
configs = yaml.load(f, Loader=yaml.Loader)
input_sizes = (args.height, args.width)
if args.task == 'lane':
num_classes = configs[configs['LANE_DATASETS'][args.dataset]]['NUM_CLASSES']
net = build_lane_model(args, num_classes)
elif args.task == 'seg':
num_classes = configs[configs['SEGMENTATION_DATASETS'][args.dataset]]['NUM_CLASSES']
net, _, _, _ = build_segmentation_model(configs, args, num_classes, 0, input_sizes)
else:
raise ValueError('Task must be lane or seg! Not {}'.format(args.task))

device = torch.device('cpu')
if torch.cuda.is_available():
device = torch.device('cuda:0')
print(device)
net.to(device)
if args.continue_from is not None:
load_checkpoint(net=net, optimizer=None, lr_scheduler=None, filename=args.continue_from)
else:
raise ValueError('Must provide a weight file by --continue-from')
torch.manual_seed(7)
mean = configs['GENERAL']['MEAN']
std = configs['GENERAL']['STD']
if args.dataset not in configs['LANE_DATASETS'].keys():
raise ValueError

# temp variable for inference
real_height, real_width = input_sizes[0]
dummy = torch.randn(1, 3, real_height, real_width, device=device, requires_grad=False)
# Convert
onnx_filename = args.continue_from[:args.continue_from.rfind('.')] + '.onnx'
op_v = 9
if args.task == 'lane' and args.method in MINIMAL_OPSET_VERSIONS.keys():
op_v = MINIMAL_OPSET_VERSIONS[args.method]
if args.task == 'seg' and args.model in MINIMAL_OPSET_VERSIONS.keys():
op_v = MINIMAL_OPSET_VERSIONS[args.model]
# TODO: directly load xxx.onnx without converting
pt_to_onnx(net, dummy, onnx_filename, opset_version=op_v)

if args.verify == 'no':
print("The model has been converted.")
elif args.verify == 'simple':
test_conversion(net, onnx_filename, dummy)
elif args.verify == 'real':
num_classes = configs[configs['LANE_DATASETS'][args.dataset]]['NUM_CLASSES']
input_sizes = configs[configs['LANE_DATASETS'][args.dataset]]['SIZES']
gap = configs[configs['LANE_DATASETS'][args.dataset]]['GAP']
ppl = configs[configs['LANE_DATASETS'][args.dataset]]['PPL']
thresh = configs[configs['LANE_DATASETS'][args.dataset]]['THRESHOLD']
weights = configs[configs['LANE_DATASETS'][args.dataset]]['WEIGHTS']
base = configs[configs['LANE_DATASETS'][args.dataset]]['BASE_DIR']
max_lane = configs[configs['LANE_DATASETS'][args.dataset]]['MAX_LANE']
ort_net = get_ort_session(onnx_filename)

# onnx inference
if args.state == 1 or args.state == 2 or args.state == 3:
data_loader = init(batch_size=args.batch_size, state=args.state, dataset=args.dataset,
input_sizes=input_sizes, mean=mean, std=std, base=base, workers=args.workers,
method=args.method)
load_checkpoint(net=net, optimizer=None, lr_scheduler=None, filename=args.continue_from)
if args.state == 1: # Validate with mean IoU
_, x = fast_evaluate(loader=data_loader, device=device, net=ort_net,
num_classes=num_classes, output_size=input_sizes[0],
is_mixed_precision=args.mixed_precision)
with open('log.txt', 'a') as f:
# Safe writing with locks
fcntl.flock(f, fcntl.LOCK_EX)
f.write(args.exp_name + ' validation: ' + str(x) + '\n')
fcntl.flock(f, fcntl.LOCK_UN)
else: # Test with official scripts later (so just predict lanes here)
test_one_set(net=ort_net, device=device, loader=data_loader, is_mixed_precision=args.mixed_precision,
gap=gap, input_sizes=input_sizes, ppl=ppl, thresh=thresh, dataset=args.dataset,
method=args.method, max_lane=max_lane, exp_name=args.exp_name, deploy='onnx')
else:
raise ValueError

# python to_onnx.py --state=2 --continue-from=vgg16_baseline_tusimple_20210223.pt --dataset=tusimple --method=baseline --backbone=vgg16 --batch-size=1 --mixed-precision --task=lane --exp-name=none_onnx_test --verify=real
102 changes: 102 additions & 0 deletions tools/onnx_utils.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,102 @@
# Convert only the pt model part

import onnx
import onnxruntime as ort
import numpy as np
import torch

MINIMAL_OPSET_VERSIONS = {
# Others use 9
'lstr': 11,
'resa': 12,
'scnn': 11
}


def add_basic_arguments(p):
p.add_argument('--height', type=int, default=288,
help='Image input height (default: 288)')
p.add_argument('--width', type=int, default=800,
help='Image input width (default: 800)')
p.add_argument('--dataset', type=str, default='tusimple',
help='Profile on TuSimple (tusimple) / CULane (culane) (default: tusimple)')
p.add_argument('--method', type=str, default='baseline',
help='method selection (lstr/scnn/sad/baseline) (default: baseline)')
p.add_argument('--backbone', type=str, default='erfnet',
help='backbone selection (erfnet/enet/vgg16/resnet18s/resnet18/resnet34/resnet50/resnet101)'
'(default: erfnet)')
p.add_argument('--task', type=str, default='lane',
help='task selection (lane/seg)')
p.add_argument('--model', type=str, default='deeplabv3',
help='Model selection (fcn/erfnet/deeplabv2/deeplabv3/enet) (default: deeplabv3)')
p.add_argument('--encoder-only', action='store_true', default=False,
help='Only train the encoder. ENet trains encoder and decoder separately (default: False)')
p.add_argument('--continue-from', type=str, default=None,
help='Continue training from a previous checkpoint')
p.add_argument('--batch-size', type=int, default=8,
help='input batch size. Recommend 4 times the training batch size in testing (default: 8)')
p.add_argument('--mixed-precision', action='store_true', default=False,
help='Enable mixed precision training (default: False)')
p.add_argument('--state', type=int, default=0,
help='Conduct validation(3)/final test(2)/fast validation(1)/normal training(0) (default: 0)')
p.add_argument('--workers', type=int, default=10,
help='Number of workers (threads) when loading data.'
'Recommend value for training: batch_size / 2 (default: 10)')
p.add_argument('--exp-name', type=str, default='',
help='Name of experiment')
p.add_argument('--verify', type=str, default='real',
help='no: without verification/real: process the whole dataset/simple: process a random tensor')


def pt_to_onnx(net, dummy, filename, opset_version=9):
net.eval()
torch.onnx.export(net, dummy, filename, verbose=True, input_names=['input1'], output_names=['output1'],
opset_version=opset_version)


@torch.no_grad()
def test_conversion(pt_net, onnx_filename, dummy):
pt_net.eval()
dummy = dummy.cpu()
pt_net = pt_net.cpu()
pt_out = pt_net(dummy)
onnx_net = onnx.load(onnx_filename)
onnx.checker.check_model(onnx_net)
onnx.helper.printable_graph(onnx_net.graph)
ort_session = ort.InferenceSession(onnx_filename)
onnx_out = ort_session.run(None, {'input1': dummy.numpy()})
diff = 0.0
avg = 0.0
for (_, temp_pt), temp_onnx in zip(pt_out.items(), onnx_out):
diff += np.abs((temp_onnx - temp_pt.numpy())).mean()
avg += temp_pt.abs().mean().item()
diff /= len(onnx_out)
avg /= len(onnx_out)
diff_percentage = diff / avg * 100
print('Average diff: {}\nAverage diff (%): {}'.format(diff, diff_percentage))
assert diff_percentage < 0.1, 'Diff over 0.1%, please check for special operators!'


def get_ort_session(onnx_filename):
# return onnx runtime session
print(ort.get_device())
# providers = [
# ('CUDAExecutionProvider', {
# 'device_id': 0,
# 'arena_extend_strategy': 'kNextPowerOfTwo',
# 'gpu_mem_limit': 2 * 1024 * 1024 * 1024,
# 'cudnn_conv_algo_search': 'EXHAUSTIVE',
# 'do_copy_in_default_stream': True,
# }),
# ]
onnx_net = onnx.load(onnx_filename)
onnx.checker.check_model(onnx_net)
onnx.helper.printable_graph(onnx_net.graph)
ort_session = ort.InferenceSession(onnx_filename)

return ort_session


def to_numpy(tensor):
# transfer tensor to numpy
return tensor.detach().cpu().numpy() if tensor.requires_grad else tensor.cpu().numpy()
9 changes: 9 additions & 0 deletions torchvision_models/_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,15 @@
from torch import nn


def is_tracing() -> bool:
# https://github.com/pytorch/pytorch/issues/42448
trace = torch.jit.is_tracing()
if isinstance(trace, bool):
return trace
else:
return torch._C._is_tracing()


class IntermediateLayerGetter(nn.ModuleDict):
"""
Module wrapper that returns intermediate layers from a model
Expand Down
45 changes: 31 additions & 14 deletions torchvision_models/common_models.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,8 @@
import torch.nn as nn
from torch.nn import functional as F

from ._utils import is_tracing


class non_bottleneck_1d(nn.Module):
def __init__(self, chann, dropprob, dilated):
Expand Down Expand Up @@ -164,20 +166,35 @@ def _adjust_initializations(self, num_channels=128):
def forward(self, input):
output = input

# First one remains unchanged (according to the original paper), why not add a relu afterwards?
# Update and send to next
# Down
for i in range(1, output.shape[2]):
output[:, :, i:i + 1, :].add_(F.relu(self.conv_d(output[:, :, i - 1:i, :])))
# Up
for i in range(output.shape[2] - 2, 0, -1):
output[:, :, i:i + 1, :].add_(F.relu(self.conv_u(output[:, :, i + 1:i + 2, :])))
# Right
for i in range(1, output.shape[3]):
output[:, :, :, i:i + 1].add_(F.relu(self.conv_r(output[:, :, :, i - 1:i])))
# Left
for i in range(output.shape[3] - 2, 0, -1):
output[:, :, :, i:i + 1].add_(F.relu(self.conv_l(output[:, :, :, i + 1:i + 2])))
if is_tracing():
# PyTorch index+add_ will be ignored in traced graph
# Down
for i in range(1, output.shape[2]):
output[:, :, i:i + 1, :] = output[:, :, i:i + 1, :].add(F.relu(self.conv_d(output[:, :, i - 1:i, :])))
# Up
for i in range(output.shape[2] - 2, 0, -1):
output[:, :, i:i + 1, :] = output[:, :, i:i + 1, :].add(F.relu(self.conv_u(output[:, :, i + 1:i + 2, :])))
# Right
for i in range(1, output.shape[3]):
output[:, :, :, i:i + 1] = output[:, :, :, i:i + 1].add(F.relu(self.conv_r(output[:, :, :, i - 1:i])))
# Left
for i in range(output.shape[3] - 2, 0, -1):
output[:, :, :, i:i + 1] = output[:, :, :, i:i + 1].add(F.relu(self.conv_l(output[:, :, :, i + 1:i + 2])))
else:
# First one remains unchanged (according to the original paper), why not add a relu afterwards?
# Update and send to next
# Down
for i in range(1, output.shape[2]):
output[:, :, i:i + 1, :].add_(F.relu(self.conv_d(output[:, :, i - 1:i, :])))
# Up
for i in range(output.shape[2] - 2, 0, -1):
output[:, :, i:i + 1, :].add_(F.relu(self.conv_u(output[:, :, i + 1:i + 2, :])))
# Right
for i in range(1, output.shape[3]):
output[:, :, :, i:i + 1].add_(F.relu(self.conv_r(output[:, :, :, i - 1:i])))
# Left
for i in range(output.shape[3] - 2, 0, -1):
output[:, :, :, i:i + 1].add_(F.relu(self.conv_l(output[:, :, :, i + 1:i + 2])))

return output

Expand Down
26 changes: 21 additions & 5 deletions utils/all_utils_landec.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
import ujson as json
import numpy as np
from tqdm import tqdm
from collections import OrderedDict
if torch.__version__ >= '1.6.0':
from torch.cuda.amp import autocast, GradScaler
else:
Expand Down Expand Up @@ -359,11 +360,24 @@ def fast_evaluate(net, device, loader, is_mixed_precision, output_size, num_clas

# A unified inference function, for segmentation-based lane detection methods
@torch.no_grad()
def lane_as_segmentation_inference(net, inputs, input_sizes, gap, ppl, thresh, dataset, max_lane=0, forward=True):
def lane_as_segmentation_inference(net, inputs, input_sizes, gap, ppl, thresh, dataset, max_lane=0, forward=True,
deploy='pt'):
# Assume net and images are on the same device
# images: B x C x H x W
# Return: a list of lane predictions on each image
outputs = net(inputs) if forward else inputs # Support no forwarding inside this function
# deploy: pt(pytorch)/onnx(onnx runtime)/trt(tensortrt)
if deploy == 'pt':
outputs = net(inputs) if forward else inputs # Support no forwarding inside this function
elif deploy == 'onnx':

onnx_inputs = inputs.detach().cpu().numpy() if inputs.requires_grad else inputs.cpu().numpy()
onnx_out = net.run(None, {'input1': onnx_inputs})
outputs = OrderedDict()
outputs['out'] = torch.from_numpy(onnx_out[0]).to(inputs.device)
outputs['lane'] = torch.from_numpy(onnx_out[1]).to(inputs.device)
elif deploy == 'trt':
# TODO: support the tensorrt
pass
prob_map = torch.nn.functional.interpolate(outputs['out'], size=input_sizes[0], mode='bilinear',
align_corners=True).softmax(dim=1)
existence_conf = outputs['lane'].sigmoid()
Expand All @@ -386,19 +400,21 @@ def lane_as_segmentation_inference(net, inputs, input_sizes, gap, ppl, thresh, d
# Adapted from harryhan618/SCNN_Pytorch
@torch.no_grad()
def test_one_set(net, device, loader, is_mixed_precision, input_sizes, gap, ppl, thresh, dataset,
method='baseline', max_lane=0, exp_name=None):
method='baseline', max_lane=0, exp_name=None, deploy='pt'):
# Predict on 1 data_loader and save predictions for the official script
# sizes: [input size, test original size, ...]
# max_lane = 0 -> unlimited number of lanes

all_lanes = []
net.eval()
# onnx runtime dose not have .eval()
if deploy == 'pt':
net.eval()
for images, filenames in tqdm(loader):
images = images.to(device)
with autocast(is_mixed_precision):
if method in ['baseline', 'scnn', 'resa']:
batch_coordinates = lane_as_segmentation_inference(net, images, input_sizes, gap, ppl, thresh, dataset,
max_lane)
max_lane, deploy=deploy)
else:
batch_coordinates = net.inference(images, input_sizes, gap, ppl, dataset, max_lane)

Expand Down