當前位置：首頁 > 编程资源 > 编程问答 >内容正文

编程问答

TensoRT API自定义trt网络结构

發布時間：2024/1/18 编程问答 31 豆豆

生活随笔收集整理的這篇文章主要介紹了 TensoRT API自定义trt网络结构小編覺得挺不錯的,現在分享給大家,幫大家做個參考.

這個后續有時間進一步整理

pth轉wts

若使用tensorrt加載wts格式，需將模型訓練的pt、pth、ckpt等格式權重轉換為wts。

def checkpint2wts(pth_path, wts_file):'''model:模型,需要權重wts_file：保存wts權重路徑，如result.wts'''checkpoint = torch.load(pth_path) # 加載pth文件，pth_path為pth文件的路徑、model_state_dict = checkpoint["model_state"]with open(wts_file, 'w') as f:f.write('{}\n'.format(len(model_state_dict.keys())))for k, v in model_state_dict.items():vr = v.reshape(-1).cpu().numpy()f.write('{} {} '.format(k, len(vr)))for vv in vr:f.write(' ')f.write(struct.pack('>f', float(vv)).hex())f.write('\n')

API自定義網絡層

讀取wts轉map

wts轉tensorrt的原理

從wts文件把weight給load出來，存到一個map里，key是網絡每層的名稱，value就是對應的權重
利用tensorrt的API把網絡重建出來，同時導入key對應的value，也就是weightMap的形式
定義網絡的輸出，設置內存空間
build engine

std::map<std::string, Weights> loadWeights(const std::string file) {std::cout << "Loading weights: " << file << std::endl;std::map<std::string, Weights> weightMap;// Open weights filestd::ifstream input(file);assert(input.is_open() && "Unable to load weight file.");// Read number of weight blobsint32_t count;input >> count;assert(count > 0 && "Invalid weight map file.");while (count--){Weights wt{DataType::kFLOAT, nullptr, 0};uint32_t size;// Read name and type of blobstd::string name;input >> name >> std::dec >> size;wt.type = DataType::kFLOAT;// Load blobuint32_t* val = reinterpret_cast<uint32_t*>(malloc(sizeof(val) * size));for (uint32_t x = 0, y = size; x < y; ++x){input >> std::hex >> val[x];}wt.values = val;wt.count = size;weightMap[name] = wt;}return weightMap; }

接下來用tensorrt api搭建trt網絡模型：

voxelGeneratorlayer層

先定義add_voxel_generator函數，傳入PluginFieldCollection結構體所需的參數，調用addPluginV2給network添加voxelGeneratorlayer算子，這里的voxelGeneratorlayer是自定義的算子，同時給出代碼沒有涉及卷積操作，沒調用上一步讀取出的weight map

nvinfer1::IPluginV2Layer *add_voxel_generator(nvinfer1::INetworkDefinition *network, nvinfer1::ITensor *point_data,nvinfer1::ITensor *point_size, int max_point_num_per_voxel,int max_voxel_num, float voxel_size_x, float voxel_size_y,float voxel_size_z, float x_min, float x_max, float y_min, float y_max,float z_min, float z_max, int point_features_num) {nvinfer1::PluginFieldCollection *newPluginFieldCollection =(nvinfer1::PluginFieldCollection *)malloc(sizeof(nvinfer1::PluginFieldCollection));newPluginFieldCollection->fields = nullptr;newPluginFieldCollection->nbFields = 0;std::vector<nvinfer1::PluginField> new_pluginData_list;float *voxel_size = (float *)malloc(3 * sizeof(float));float *point_cloud_range = (float *)malloc(6 * sizeof(float));voxel_size[0] = voxel_size_x;voxel_size[1] = voxel_size_y;voxel_size[2] = voxel_size_z;point_cloud_range[0] = x_min;point_cloud_range[1] = y_min;point_cloud_range[2] = z_min;point_cloud_range[3] = x_max;point_cloud_range[4] = y_max;point_cloud_range[5] = z_max;// 通過 getPluginRegistry 獲取所有 TensorRT 插件，// getPluginCreator 根據算子名和版本號獲取 VoxelGeneratorPluginCreator 對象auto voxelGeneratorcreator =getPluginRegistry()->getPluginCreator("VoxelGeneratorPlugin", "1");const nvinfer1::PluginFieldCollection *voxelGeneratorpluginData =voxelGeneratorcreator->getFieldNames();const nvinfer1::PluginField *fields = voxelGeneratorpluginData->fields;int nbFields = voxelGeneratorpluginData->nbFields;for (int i = 0; i < nbFields; ++i) {const char *attr_name = fields[i].name;std::cout << attr_name << std::endl;if (!strcmp(attr_name, "max_point_num_per_voxel")) {new_pluginData_list.emplace_back(nvinfer1::PluginField("max_point_num_per_voxel", &(max_point_num_per_voxel),nvinfer1::PluginFieldType::kINT32, 1));} else if (!strcmp(attr_name, "max_voxel_num")) {new_pluginData_list.emplace_back(nvinfer1::PluginField("max_voxel_num", &(max_voxel_num),nvinfer1::PluginFieldType::kINT32, 1));} else if (!strcmp(attr_name, "point_cloud_range")) {new_pluginData_list.emplace_back(nvinfer1::PluginField("point_cloud_range", point_cloud_range,nvinfer1::PluginFieldType::kFLOAT32, 1));} else if (!strcmp(attr_name, "point_feature_num")) {new_pluginData_list.emplace_back(nvinfer1::PluginField("point_feature_num", &(point_features_num),nvinfer1::PluginFieldType::kINT32, 1));} else if (!strcmp(attr_name, "voxel_size")) {new_pluginData_list.emplace_back(nvinfer1::PluginField("voxel_size", voxel_size,nvinfer1::PluginFieldType::kFLOAT32, 1));}}newPluginFieldCollection->fields = new_pluginData_list.data();newPluginFieldCollection->nbFields = new_pluginData_list.size();// 類VoxelGeneratorPluginCreator方法 createPlugin// 內會創建VoxelGeneratorPlugin對象nvinfer1::IPluginV2 *pluginObj_voxelGenerator =voxelGeneratorcreator->createPlugin("voxelGeneratorlayer",newPluginFieldCollection);nvinfer1::ITensor *inputTensors_voxelgenerator[] = {point_data, point_size};auto voxelGenerator = network->addPluginV2(inputTensors_voxelgenerator, 2,*pluginObj_voxelGenerator);pluginObj_voxelGenerator->destroy();free(voxel_size);free(point_cloud_range);return voxelGenerator; }

3d稀疏卷積層

繼續添加3d稀疏卷積層，算子也是自定義的。

需要指定輸入數據的名字、數據類型、維度，同時必須給network添加輸出，通過調用markOutput完成

// Creat the engine using only the API and not any parser. ICudaEngine *createEngine(unsigned int maxBatchSize, IBuilder *builder,IBuilderConfig *config) {const auto explicitBatch =1U << static_cast<uint32_t>(NetworkDefinitionCreationFlag::kEXPLICIT_BATCH);auto network = builder->createNetworkV2(explicitBatch);ITensor *point_data = network->addInput(INPUT_POINTS, DataType::kFLOAT,Dims3{1, MAX_POINT_NUM, 5});Dims dims1;dims1.d[0] = 1;dims1.nbDims = 1;ITensor *point_size =network->addInput(INPUT_POINTS_SIZE, DataType::kINT32, dims1);assert(point_data);assert(point_size);auto voxelGenerator = add_voxel_generator(network, point_data, point_size, MAX_POINT_NUM_PER_VOXEL, MAX_VOXEL_NUM,VOXEL_SIZE_X, VOXEL_SIZE_Y, VOXEL_SIZE_Z, X_MIN, X_MAX, Y_MIN, Y_MAX,Z_MIN, Z_MAX, POINT_FEATURES_NUM);voxelGenerator->getOutput(0)->setName("voxel_feature");network->markOutput(*voxelGenerator->getOutput(0));voxelGenerator->getOutput(1)->setName("coords");network->markOutput(*voxelGenerator->getOutput(1));voxelGenerator->getOutput(2)->setName("voxel_num");network->markOutput(*voxelGenerator->getOutput(2));// Build engineconfig->setMaxWorkspaceSize(1600 * (1 << 20)); // 16MB #ifdef USE_FP16config->setFlag(BuilderFlag::kFP16); #endifstd::cout << "Building engine, please wait for a while..." << std::endl;ICudaEngine *engine = builder->buildEngineWithConfig(*network, *config);std::cout << "Build engine successfully!" << std::endl;network->destroy();return engine; }

得到engine后，可以將engine序列化保存為文件輸出

總結

以上是生活随笔為你收集整理的TensoRT API自定义trt网络结构的全部內容，希望文章能夠幫你解決所遇到的問題。

如果覺得生活随笔網站內容還不錯，歡迎將生活随笔推薦給好友。

上一篇：网站域名怎么注册？域名选择注意事项有哪些
下一篇： OpenCV--图像的基本表示方法