We read every piece of feedback, and take your input very seriously.
To see all available qualifiers, see our documentation.
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Hello, can you tell me why the Boxes display is incorrect?
Picture of problem:
Code of Inference: infer.cpp
#include <iostream> #include <memory> #include <cmath> #include <stdexcept> #include <vector> #include <chrono> #include <opencv2/opencv.hpp> #include <opencv2/core/core.hpp> #include <opencv2/highgui/highgui.hpp> #include <cuda_runtime.h> #include "../../csrc/engine.h" using namespace std; using namespace cv; int main(int argc, char *argv[]) { if (argc<3 || argc>4) { cerr << "Usage: " << argv[0] << " engine.plan image.jpg [<OUTPUT>.png]" << endl; return 1; } cout << "Loading engine..." << endl; auto engine = std::make_unique<odtk::Engine>(argv[1]); cout << "Preparing data..." << endl; auto image = imread(argv[2], IMREAD_COLOR); auto inputSize = engine->getInputSize(); cv::resize(image, image, Size(inputSize[1], inputSize[0])); cv::Mat pixels; image.convertTo(pixels, CV_32FC3, 1.0 / 255, 0); int channels = 3; vector<float> img; vector<float> data (channels * inputSize[0] * inputSize[1]); if (pixels.isContinuous()) img.assign((float*)pixels.datastart, (float*)pixels.dataend); else { cerr << "Error reading image " << argv[2] << endl; return -1; } vector<float> mean {0.485, 0.456, 0.406}; vector<float> std {0.229, 0.224, 0.225}; for (int c = 0; c < channels; c++) { for (int j = 0, hw = inputSize[0] * inputSize[1]; j < hw; j++) { data[c * hw + j] = (img[channels * j + 2 - c] - mean[c]) / std[c]; } } // Create device buffers void *data_d, *classes_d, *scores_d, *boxes_d; auto num_det = engine->getMaxDetections(); cout << "Max Detections: " << num_det << endl; engine->getBindingDimensions(); vector<int> out_sizes = engine->getBindingSizes(); cudaMalloc(&data_d, 12 * channels * inputSize[0] * inputSize[1]); cudaMalloc(&classes_d, out_sizes[0]); cudaMalloc(&scores_d, out_sizes[1]); cudaMalloc(&boxes_d, out_sizes[2]); // Copy image to device size_t dataSize = data.size() * sizeof(float); cudaMemcpy(data_d, data.data(), dataSize, cudaMemcpyHostToDevice); // Run inference n times cout << "Running inference..." << endl; const int count = 100; auto start = chrono::steady_clock::now(); vector<void *> buffers = { data_d, classes_d, scores_d, boxes_d }; for (int i = 0; i < count; i++) { engine->infer(buffers, 1); } auto stop = chrono::steady_clock::now(); auto timing = chrono::duration_cast<chrono::duration<double>>(stop - start); cout << "Took " << timing.count() / count << " seconds per inference." << endl; cudaFree(data_d); // Get back the bounding boxes unique_ptr<float[]> classes(new float[out_sizes[0]]); unique_ptr<float[]> scores(new float[out_sizes[1]]); unique_ptr<float[]> boxes(new float[out_sizes[2]]); cudaMemcpy(classes.get(), classes_d, out_sizes[0], cudaMemcpyDeviceToHost); cudaMemcpy(scores.get(), scores_d, out_sizes[1], cudaMemcpyDeviceToHost); cudaMemcpy(boxes.get(), boxes_d, out_sizes[2], cudaMemcpyDeviceToHost); cudaFree(classes_d); cudaFree(scores_d); cudaFree(boxes_d); for (int i = 0; i < num_det; i++) { // Show results overconfidence threshold if (scores[i] >= 0.9f) { float x1 = boxes[i*4+0]; float y1 = boxes[i*4+1]; float x2 = boxes[i*4+2]; float y2 = boxes[i*4+3]; cout << "Found box {" << x1 << ", " << y1 << ", " << x2 << ", " << y2 << "} with score " << scores[i] << " and class " << round(abs(classes[i])) << endl; // Draw bounding box on image cv::rectangle(image, Point(x1, y1), Point(x2, y2), cv::Scalar(0, 255, 0)); } } // Write image string out_file = argc == 4 ? string(argv[3]) : "detections.png"; cout << "Saving result to " << out_file << endl; imwrite(out_file, image); return 0; }
engine.cpp
#include "engine.h" #include <iostream> #include <fstream> #include <NvOnnxConfig.h> #include <NvOnnxParser.h> #include "plugins/DecodePlugin.h" #include "plugins/NMSPlugin.h" #include "plugins/DecodeRotatePlugin.h" #include "plugins/NMSRotatePlugin.h" #include "calibrator.h" #include <stdio.h> #include <string> using namespace nvinfer1; using namespace nvonnxparser; using namespace std; namespace odtk { class Logger : public ILogger { public: Logger(bool verbose) : _verbose(verbose) { } void log(Severity severity, const char *msg) noexcept override { if (_verbose || ((severity != Severity::kINFO) && (severity != Severity::kVERBOSE))) cout << msg << endl; } private: bool _verbose{false}; }; void Engine::_load(const string &path) { /// read a serialized file ifstream file(path, ios::in | ios::binary); if (!file) { cout << "read serialized file failed\n"; std::exit(1); } file.seekg(0, std::ios::end); const int length = file.tellg(); file.clear(); file.seekg(0, ios::beg); std::shared_ptr<char> data(new char[length], std::default_delete<char[]>()); file.read(data.get(), length); file.close(); cout << "model size: " << length << endl; /// Initialization of the engine _engine = std::unique_ptr<ICudaEngine>(_runtime->deserializeCudaEngine(data.get(), length, nullptr)); } void Engine::_prepare() { _context = std::unique_ptr<IExecutionContext>(_engine->createExecutionContext()); _context->setOptimizationProfileAsync(0, _stream); cudaStreamCreate(&_stream); } Engine::Engine(const string &engine_path, bool verbose) { Logger logger(verbose); _runtime = std::unique_ptr<IRuntime>(createInferRuntime(logger)); _load(engine_path); _prepare(); } Engine::~Engine() { if (_stream) cudaStreamDestroy(_stream); } Engine::Engine(const char *onnx_model, size_t onnx_size, const vector<int>& dynamic_batch_opts, string precision, float score_thresh, int top_n, const vector<vector<float>>& anchors, bool rotated, float nms_thresh, int detections_per_im, const vector<string>& calibration_images, string model_name, string calibration_table, bool verbose, size_t workspace_size) { Logger logger(verbose); _runtime = std::unique_ptr<IRuntime>(createInferRuntime(logger)); bool fp16 = precision.compare("FP16") == 0; bool int8 = precision.compare("INT8") == 0; // Create builder auto builder = std::unique_ptr<IBuilder>(createInferBuilder(logger)); auto builderConfig = std::unique_ptr<IBuilderConfig>(builder->createBuilderConfig()); // Allow use of FP16 layers when running in INT8 if(fp16 || int8) builderConfig->setFlag(BuilderFlag::kFP16); builderConfig->setMaxWorkspaceSize(workspace_size); // Parse ONNX FCN cout << "Building " << precision << " core model..." << endl; const auto flags = 1U << static_cast<int>(NetworkDefinitionCreationFlag::kEXPLICIT_BATCH); auto network = std::unique_ptr<INetworkDefinition>(builder->createNetworkV2(flags)); auto parser = std::unique_ptr<IParser>(createParser(*network, logger)); parser->parse(onnx_model, onnx_size); auto input = network->getInput(0); auto inputDims = input->getDimensions(); auto profile = builder->createOptimizationProfile(); auto inputName = input->getName(); auto profileDimsmin = Dims4{dynamic_batch_opts[0], inputDims.d[1], inputDims.d[2], inputDims.d[3]}; auto profileDimsopt = Dims4{dynamic_batch_opts[1], inputDims.d[1], inputDims.d[2], inputDims.d[3]}; auto profileDimsmax = Dims4{dynamic_batch_opts[2], inputDims.d[1], inputDims.d[2], inputDims.d[3]}; profile->setDimensions(inputName, nvinfer1::OptProfileSelector::kMIN, profileDimsmin); profile->setDimensions(inputName, nvinfer1::OptProfileSelector::kOPT, profileDimsopt); profile->setDimensions(inputName, nvinfer1::OptProfileSelector::kMAX, profileDimsmax); if(profile->isValid()) builderConfig->addOptimizationProfile(profile); std::unique_ptr<Int8EntropyCalibrator> calib; if (int8) { builderConfig->setFlag(BuilderFlag::kINT8); // Calibration is performed using kOPT values of the profile. // Calibration batch size must match this profile. builderConfig->setCalibrationProfile(profile); ImageStream stream(dynamic_batch_opts[1], inputDims, calibration_images); calib = std::unique_ptr<Int8EntropyCalibrator>(new Int8EntropyCalibrator(stream, model_name, calibration_table)); builderConfig->setInt8Calibrator(calib.get()); } // Add decode plugins cout << "Building accelerated plugins..." << endl; vector<DecodePlugin> decodePlugins; vector<DecodeRotatePlugin> decodeRotatePlugins; vector<ITensor *> scores, boxes, classes; auto nbOutputs = network->getNbOutputs(); for (int i = 0; i < nbOutputs / 2; i++) { auto classOutput = network->getOutput(i); auto boxOutput = network->getOutput(nbOutputs / 2 + i); auto outputDims = classOutput->getDimensions(); int scale = inputDims.d[2] / outputDims.d[2]; auto decodePlugin = DecodePlugin(score_thresh, top_n, anchors[i], scale); auto decodeRotatePlugin = DecodeRotatePlugin(score_thresh, top_n, anchors[i], scale); decodePlugins.push_back(decodePlugin); decodeRotatePlugins.push_back(decodeRotatePlugin); vector<ITensor *> inputs = {classOutput, boxOutput}; auto layer = (!rotated) ? network->addPluginV2(inputs.data(), inputs.size(), decodePlugin) \ : network->addPluginV2(inputs.data(), inputs.size(), decodeRotatePlugin); scores.push_back(layer->getOutput(0)); boxes.push_back(layer->getOutput(1)); classes.push_back(layer->getOutput(2)); } // Cleanup outputs for (int i = 0; i < nbOutputs; i++) { auto output = network->getOutput(0); network->unmarkOutput(*output); } // Concat tensors from each feature map vector<ITensor *> concat; for (auto tensors : {scores, boxes, classes}) { auto layer = network->addConcatenation(tensors.data(), tensors.size()); concat.push_back(layer->getOutput(0)); } // Add NMS plugin auto nmsPlugin = NMSPlugin(nms_thresh, detections_per_im); auto nmsRotatePlugin = NMSRotatePlugin(nms_thresh, detections_per_im); auto layer = (!rotated) ? network->addPluginV2(concat.data(), concat.size(), nmsPlugin) \ : network->addPluginV2(concat.data(), concat.size(), nmsRotatePlugin); vector<string> names = {"scores", "boxes", "classes"}; for (int i = 0; i < layer->getNbOutputs(); i++) { auto output = layer->getOutput(i); network->markOutput(*output); output->setName(names[i].c_str()); } // Build engine cout << "Applying optimizations and building TRT CUDA engine..." << endl; _plan = std::unique_ptr<IHostMemory>(builder->buildSerializedNetwork(*network, *builderConfig)); } void Engine::save(const string &path) { cout << "Writing to " << path << "..." << endl; ofstream file(path, ios::out | ios::binary); file.write(reinterpret_cast<const char*>(_plan->data()), _plan->size()); } void Engine::infer(vector<void *> &buffers, int batch){ auto dims = _engine->getBindingDimensions(0); _context->setBindingDimensions(0, Dims4(batch, dims.d[1], dims.d[2], dims.d[3])); _context->enqueueV2(buffers.data(), _stream, nullptr); cudaStreamSynchronize(_stream); } vector<int> Engine::getInputSize() { auto dims = _engine->getBindingDimensions(0); return {dims.d[2], dims.d[3]}; } int Engine::getMaxBatchSize() { return _engine->getMaxBatchSize(); } int Engine::getMaxDetections() { return _engine->getBindingDimensions(1).d[1]; } void Engine::getBindingDimensions() { for (int i = 0; i < _engine->getNbBindings(); ++i) { nvinfer1::Dims bindingDims = _engine->getBindingDimensions(i); std::cout << "\nBinding " << i << ":\n" << " Dimensions: "; for (int j = 0; j < bindingDims.nbDims; ++j) { std::cout << bindingDims.d[j] << " "; } } std::cout << std::endl; } vector<int> Engine::getBindingSizes() { vector<int> vec_sizes; outDims0 = _engine->getBindingDimensions(1); for (int j = 0; j < outDims0.nbDims; j++) { outSize0 *= abs(outDims0.d[j]); } vec_sizes.push_back(outSize0); vector<nvinfer1::Dims> vec_dims; outDims1 = _engine->getBindingDimensions(2); for (int j = 0; j < outDims1.nbDims; j++) { outSize1 *= abs(outDims1.d[j]); } vec_sizes.push_back(outSize1); outDims2 = _engine->getBindingDimensions(3); for (int j = 0; j < outDims2.nbDims; j++) { outSize2 *= abs(outDims2.d[j]); } vec_sizes.push_back(outSize2); cout << "Размеры выходов:" << endl; for (int i=0; i<vec_sizes.size(); ++i) { cout << vec_sizes[i] << endl; } return vec_sizes; } int Engine::getStride() { return 1; } }
The text was updated successfully, but these errors were encountered:
No branches or pull requests
Hello, can you tell me why the Boxes display is incorrect?
Picture of problem:
Code of Inference:
infer.cpp
engine.cpp
The text was updated successfully, but these errors were encountered: