TensoRT API自定义trt网络结构
                                                            生活随笔
收集整理的這篇文章主要介紹了
                                TensoRT API自定义trt网络结构
小編覺得挺不錯的,現在分享給大家,幫大家做個參考.                        
                                這個后續有時間進一步整理
pth轉wts
若使用tensorrt加載wts格式,需將模型訓練的pt、pth、ckpt等格式權重轉換為wts。
def checkpint2wts(pth_path, wts_file):'''model:模型,需要權重wts_file:保存wts權重路徑,如result.wts'''checkpoint = torch.load(pth_path) # 加載pth文件,pth_path為pth文件的路徑、model_state_dict = checkpoint["model_state"]with open(wts_file, 'w') as f:f.write('{}\n'.format(len(model_state_dict.keys())))for k, v in model_state_dict.items():vr = v.reshape(-1).cpu().numpy()f.write('{} {} '.format(k, len(vr)))for vv in vr:f.write(' ')f.write(struct.pack('>f', float(vv)).hex())f.write('\n')API自定義網絡層
讀取wts轉map
wts轉tensorrt的原理
- 從wts文件把weight給load出來,存到一個map里,key是網絡每層的名稱,value就是對應的權重
- 利用tensorrt的API把網絡重建出來,同時導入key對應的value,也就是weightMap的形式
- 定義網絡的輸出,設置內存空間
- build engine
接下來用tensorrt api搭建trt網絡模型:
voxelGeneratorlayer層
先定義add_voxel_generator函數,傳入PluginFieldCollection結構體所需的參數,調用addPluginV2給network添加voxelGeneratorlayer算子,這里的voxelGeneratorlayer是自定義的算子,同時給出代碼沒有涉及卷積操作,沒調用上一步讀取出的weight map
nvinfer1::IPluginV2Layer *add_voxel_generator(nvinfer1::INetworkDefinition *network, nvinfer1::ITensor *point_data,nvinfer1::ITensor *point_size, int max_point_num_per_voxel,int max_voxel_num, float voxel_size_x, float voxel_size_y,float voxel_size_z, float x_min, float x_max, float y_min, float y_max,float z_min, float z_max, int point_features_num) {nvinfer1::PluginFieldCollection *newPluginFieldCollection =(nvinfer1::PluginFieldCollection *)malloc(sizeof(nvinfer1::PluginFieldCollection));newPluginFieldCollection->fields = nullptr;newPluginFieldCollection->nbFields = 0;std::vector<nvinfer1::PluginField> new_pluginData_list;float *voxel_size = (float *)malloc(3 * sizeof(float));float *point_cloud_range = (float *)malloc(6 * sizeof(float));voxel_size[0] = voxel_size_x;voxel_size[1] = voxel_size_y;voxel_size[2] = voxel_size_z;point_cloud_range[0] = x_min;point_cloud_range[1] = y_min;point_cloud_range[2] = z_min;point_cloud_range[3] = x_max;point_cloud_range[4] = y_max;point_cloud_range[5] = z_max;// 通過 getPluginRegistry 獲取所有 TensorRT 插件,// getPluginCreator 根據算子名和版本號獲取 VoxelGeneratorPluginCreator 對象auto voxelGeneratorcreator =getPluginRegistry()->getPluginCreator("VoxelGeneratorPlugin", "1");const nvinfer1::PluginFieldCollection *voxelGeneratorpluginData =voxelGeneratorcreator->getFieldNames();const nvinfer1::PluginField *fields = voxelGeneratorpluginData->fields;int nbFields = voxelGeneratorpluginData->nbFields;for (int i = 0; i < nbFields; ++i) {const char *attr_name = fields[i].name;std::cout << attr_name << std::endl;if (!strcmp(attr_name, "max_point_num_per_voxel")) {new_pluginData_list.emplace_back(nvinfer1::PluginField("max_point_num_per_voxel", &(max_point_num_per_voxel),nvinfer1::PluginFieldType::kINT32, 1));} else if (!strcmp(attr_name, "max_voxel_num")) {new_pluginData_list.emplace_back(nvinfer1::PluginField("max_voxel_num", &(max_voxel_num),nvinfer1::PluginFieldType::kINT32, 1));} else if (!strcmp(attr_name, "point_cloud_range")) {new_pluginData_list.emplace_back(nvinfer1::PluginField("point_cloud_range", point_cloud_range,nvinfer1::PluginFieldType::kFLOAT32, 1));} else if (!strcmp(attr_name, "point_feature_num")) {new_pluginData_list.emplace_back(nvinfer1::PluginField("point_feature_num", &(point_features_num),nvinfer1::PluginFieldType::kINT32, 1));} else if (!strcmp(attr_name, "voxel_size")) {new_pluginData_list.emplace_back(nvinfer1::PluginField("voxel_size", voxel_size,nvinfer1::PluginFieldType::kFLOAT32, 1));}}newPluginFieldCollection->fields = new_pluginData_list.data();newPluginFieldCollection->nbFields = new_pluginData_list.size();// 類VoxelGeneratorPluginCreator方法 createPlugin// 內會創建VoxelGeneratorPlugin對象nvinfer1::IPluginV2 *pluginObj_voxelGenerator =voxelGeneratorcreator->createPlugin("voxelGeneratorlayer",newPluginFieldCollection);nvinfer1::ITensor *inputTensors_voxelgenerator[] = {point_data, point_size};auto voxelGenerator = network->addPluginV2(inputTensors_voxelgenerator, 2,*pluginObj_voxelGenerator);pluginObj_voxelGenerator->destroy();free(voxel_size);free(point_cloud_range);return voxelGenerator; }3d稀疏卷積層
繼續添加3d稀疏卷積層,算子也是自定義的。
需要指定輸入數據的名字、數據類型、維度,同時必須給network添加輸出,通過調用markOutput完成
// Creat the engine using only the API and not any parser. ICudaEngine *createEngine(unsigned int maxBatchSize, IBuilder *builder,IBuilderConfig *config) {const auto explicitBatch =1U << static_cast<uint32_t>(NetworkDefinitionCreationFlag::kEXPLICIT_BATCH);auto network = builder->createNetworkV2(explicitBatch);ITensor *point_data = network->addInput(INPUT_POINTS, DataType::kFLOAT,Dims3{1, MAX_POINT_NUM, 5});Dims dims1;dims1.d[0] = 1;dims1.nbDims = 1;ITensor *point_size =network->addInput(INPUT_POINTS_SIZE, DataType::kINT32, dims1);assert(point_data);assert(point_size);auto voxelGenerator = add_voxel_generator(network, point_data, point_size, MAX_POINT_NUM_PER_VOXEL, MAX_VOXEL_NUM,VOXEL_SIZE_X, VOXEL_SIZE_Y, VOXEL_SIZE_Z, X_MIN, X_MAX, Y_MIN, Y_MAX,Z_MIN, Z_MAX, POINT_FEATURES_NUM);voxelGenerator->getOutput(0)->setName("voxel_feature");network->markOutput(*voxelGenerator->getOutput(0));voxelGenerator->getOutput(1)->setName("coords");network->markOutput(*voxelGenerator->getOutput(1));voxelGenerator->getOutput(2)->setName("voxel_num");network->markOutput(*voxelGenerator->getOutput(2));// Build engineconfig->setMaxWorkspaceSize(1600 * (1 << 20)); // 16MB #ifdef USE_FP16config->setFlag(BuilderFlag::kFP16); #endifstd::cout << "Building engine, please wait for a while..." << std::endl;ICudaEngine *engine = builder->buildEngineWithConfig(*network, *config);std::cout << "Build engine successfully!" << std::endl;network->destroy();return engine; }得到engine后,可以將engine序列化保存為文件輸出
總結
以上是生活随笔為你收集整理的TensoRT API自定义trt网络结构的全部內容,希望文章能夠幫你解決所遇到的問題。
 
                            
                        - 上一篇: 网站域名怎么注册?域名选择注意事项有哪些
- 下一篇: OpenCV--图像的基本表示方法
