當前位置：首頁 > 编程资源 > 编程问答 >内容正文

编程问答

OpenCL编程实例: 向量计算

發布時間：2023/12/9 编程问答 31 豆豆

生活随笔收集整理的這篇文章主要介紹了 OpenCL编程实例: 向量计算小編覺得挺不錯的,現在分享給大家,幫大家做個參考.

GPGPU OpenCL編程步驟與簡單實例

轉自: ?http://www.cnblogs.com/xudong-bupt/p/3582780.html?

1.OpenCL概念

　　OpenCL是一個為異構平臺編寫程序的框架，此異構平臺可由CPU、GPU或其他類型的處理器組成。OpenCL由一門用于編寫kernels （在OpenCL設備上運行的函數）的語言（基于C99）和一組用于定義并控制平臺的API組成。

　　OpenCL提供了兩種層面的并行機制：任務并行與數據并行。

2.OpenCL與CUDA的區別

　　不同點：OpenCL是通用的異構平臺編程語言，為了兼顧不同設備，使用繁瑣。

　　　　　　CUDA是nvidia公司發明的專門在其GPGPU上的編程的框架，使用簡單，好入門。

　　相同點：都是基于任務并行與數據并行。

3.OpenCL的編程步驟

　　（1）Discover and initialize the platforms

　　　　調用兩次clGetPlatformIDs函數，第一次獲取可用的平臺數量，第二次獲取一個可用的平臺。

　　（2）Discover and initialize the devices

　　　　調用兩次clGetDeviceIDs函數，第一次獲取可用的設備數量，第二次獲取一個可用的設備。

　　（3）Create ?a context(調用clCreateContext函數）

　　　　上下文context可能會管理多個設備device。

　　（4）Create a command queue(調用clCreateCommandQueue函數）

　　　　一個設備device對應一個command queue。

　　　　上下文conetxt將命令發送到設備對應的command queue，設備就可以執行命令隊列里的命令。

　　（5）Create device buffers(調用clCreateBuffer函數）

　　　　Buffer中保存的是數據對象，就是設備執行程序需要的數據保存在其中。

?　　　　Buffer由上下文conetxt創建，這樣上下文管理的多個設備就會共享Buffer中的數據。

　　（6）Write host data to device buffers(調用clEnqueueWriteBuffer函數）

　　（7）Create and compile the program

　　　　創建程序對象，程序對象就代表你的程序源文件或者二進制代碼數據。

　　（8）Create the kernel(調用clCreateKernel函數）

　　　　根據你的程序對象，生成kernel對象，表示設備程序的入口。

　　（9）Set the kernel arguments(調用clSetKernelArg函數）

　　（10）Configure the work-item structure(設置worksize）

　　　　配置work-item的組織形式（維數，group組成等）

　　（11）Enqueue the kernel for execution(調用clEnqueueNDRangeKernel函數）

　　　　將kernel對象，以及?work-item參數放入命令隊列中進行執行。

　　（12）Read ?the output buffer back to the host(調用clEnqueueReadBuffer函數）

　　（13）Release OpenCL resources（至此結束整個運行過程）

4.說明

　　OpenCL中的核函數必須單列一個文件。

　　OpenCL的編程一般步驟就是上面的13步，太長了，以至于要想做個向量加法都是那么困難。

　　不過上面的步驟前3步一般是固定的，可以單獨寫在一個.h/.cpp文件中，其他的一般也不會有什么大的變化。

5.程序實例，向量運算

5.1通用前3個步驟，生成一個文件

　　tool.h

1 #ifndef TOOLH 2 #define TOOLH 3 4 #include <CL/cl.h> 5 #include <string.h> 6 #include <stdio.h> 7 #include <stdlib.h> 8 #include <iostream> 9 #include <string> 10 #include <fstream> 11 using namespace std; 12 13 /** convert the kernel file into a string */ 14 int convertToString(const char *filename, std::string& s); 15 16 /**Getting platforms and choose an available one.*/ 17 int getPlatform(cl_platform_id &platform); 18 19 /**Step 2:Query the platform and choose the first GPU device if has one.*/ 20 cl_device_id *getCl_device_id(cl_platform_id &platform); 21 22 #endif View Code

　　tool.cpp

1 #include <CL/cl.h> 2 #include <string.h> 3 #include <stdio.h> 4 #include <stdlib.h> 5 #include <iostream> 6 #include <string> 7 #include <fstream> 8 #include "tool.h" 9 using namespace std; 10 11 /** convert the kernel file into a string */ 12 int convertToString(const char *filename, std::string& s) 13 { 14 size_t size; 15 char* str; 16 std::fstream f(filename, (std::fstream::in | std::fstream::binary)); 17 18 if(f.is_open()) 19 { 20 size_t fileSize; 21 f.seekg(0, std::fstream::end); 22 size = fileSize = (size_t)f.tellg(); 23 f.seekg(0, std::fstream::beg); 24 str = new char[size+1]; 25 if(!str) 26 { 27 f.close(); 28 return 0; 29 } 30 31 f.read(str, fileSize); 32 f.close(); 33 str[size] = '\0'; 34 s = str; 35 delete[] str; 36 return 0; 37 } 38 cout<<"Error: failed to open file\n:"<<filename<<endl; 39 return -1; 40 } 41 42 /**Getting platforms and choose an available one.*/ 43 int getPlatform(cl_platform_id &platform) 44 { 45 platform = NULL;//the chosen platform 46 47 cl_uint numPlatforms;//the NO. of platforms 48 cl_int status = clGetPlatformIDs(0, NULL, &numPlatforms); 49 if (status != CL_SUCCESS) 50 { 51 cout<<"Error: Getting platforms!"<<endl; 52 return -1; 53 } 54 55 /**For clarity, choose the first available platform. */ 56 if(numPlatforms > 0) 57 { 58 cl_platform_id* platforms = 59 (cl_platform_id* )malloc(numPlatforms* sizeof(cl_platform_id)); 60 status = clGetPlatformIDs(numPlatforms, platforms, NULL); 61 platform = platforms[0]; 62 free(platforms); 63 } 64 else 65 return -1; 66 } 67 68 /**Step 2:Query the platform and choose the first GPU device if has one.*/ 69 cl_device_id *getCl_device_id(cl_platform_id &platform) 70 { 71 cl_uint numDevices = 0; 72 cl_device_id *devices=NULL; 73 cl_int status = clGetDeviceIDs(platform, CL_DEVICE_TYPE_GPU, 0, NULL, &numDevices); 74 if (numDevices > 0) //GPU available. 75 { 76 devices = (cl_device_id*)malloc(numDevices * sizeof(cl_device_id)); 77 status = clGetDeviceIDs(platform, CL_DEVICE_TYPE_GPU, numDevices, devices, NULL); 78 } 79 return devices; 80 } View Code

5.2核函數文件

　　HelloWorld_Kernel.cl

1 __kernel void helloworld(__global double* in, __global double* out) 2 { 3 int num = get_global_id(0); 4 out[num] = in[num] / 2.4 *(in[num]/6) ; 5 } View Code

?5.3主函數文件

　　HelloWorld.cpp

1 //For clarity,error checking has been omitted. 2 #include <CL/cl.h> 3 #include "tool.h" 4 #include <string.h> 5 #include <stdio.h> 6 #include <stdlib.h> 7 #include <iostream> 8 #include <string> 9 #include <fstream> 10 using namespace std; 11 12 int main(int argc, char* argv[]) 13 { 14 cl_int status; 15 /**Step 1: Getting platforms and choose an available one(first).*/ 16 cl_platform_id platform; 17 getPlatform(platform); 18 19 /**Step 2:Query the platform and choose the first GPU device if has one.*/ 20 cl_device_id *devices=getCl_device_id(platform); 21 22 /**Step 3: Create context.*/ 23 cl_context context = clCreateContext(NULL,1, devices,NULL,NULL,NULL); 24 25 /**Step 4: Creating command queue associate with the context.*/ 26 cl_command_queue commandQueue = clCreateCommandQueue(context, devices[0], 0, NULL); 27 28 /**Step 5: Create program object */ 29 const char *filename = "HelloWorld_Kernel.cl"; 30 string sourceStr; 31 status = convertToString(filename, sourceStr); 32 const char *source = sourceStr.c_str(); 33 size_t sourceSize[] = {strlen(source)}; 34 cl_program program = clCreateProgramWithSource(context, 1, &source, sourceSize, NULL); 35 36 /**Step 6: Build program. */ 37 status=clBuildProgram(program, 1,devices,NULL,NULL,NULL); 38 39 /**Step 7: Initial input,output for the host and create memory objects for the kernel*/ 40 const int NUM=512000; 41 double* input = new double[NUM]; 42 for(int i=0;i<NUM;i++) 43 input[i]=i; 44 double* output = new double[NUM]; 45 46 cl_mem inputBuffer = clCreateBuffer(context, CL_MEM_READ_ONLY|CL_MEM_COPY_HOST_PTR, (NUM) * sizeof(double),(void *) input, NULL); 47 cl_mem outputBuffer = clCreateBuffer(context, CL_MEM_WRITE_ONLY , NUM * sizeof(double), NULL, NULL); 48 49 /**Step 8: Create kernel object */ 50 cl_kernel kernel = clCreateKernel(program,"helloworld", NULL); 51 52 /**Step 9: Sets Kernel arguments.*/ 53 status = clSetKernelArg(kernel, 0, sizeof(cl_mem), (void *)&inputBuffer); 54 status = clSetKernelArg(kernel, 1, sizeof(cl_mem), (void *)&outputBuffer); 55 56 /**Step 10: Running the kernel.*/ 57 size_t global_work_size[1] = {NUM}; 58 cl_event enentPoint; 59 status = clEnqueueNDRangeKernel(commandQueue, kernel, 1, NULL, global_work_size, NULL, 0, NULL, &enentPoint); 60 clWaitForEvents(1,&enentPoint); ///wait 61 clReleaseEvent(enentPoint); 62 63 /**Step 11: Read the cout put back to host memory.*/ 64 status = clEnqueueReadBuffer(commandQueue, outputBuffer, CL_TRUE, 0, NUM * sizeof(double), output, 0, NULL, NULL); 65 cout<<output[NUM-1]<<endl; 66 67 /**Step 12: Clean the resources.*/ 68 status = clReleaseKernel(kernel);//*Release kernel. 69 status = clReleaseProgram(program); //Release the program object. 70 status = clReleaseMemObject(inputBuffer);//Release mem object. 71 status = clReleaseMemObject(outputBuffer); 72 status = clReleaseCommandQueue(commandQueue);//Release Command queue. 73 status = clReleaseContext(context);//Release context. 74 75 if (output != NULL) 76 { 77 free(output); 78 output = NULL; 79 } 80 81 if (devices != NULL) 82 { 83 free(devices); 84 devices = NULL; 85 } 86 return 0; 87 } View Code

編譯、鏈接、執行：

　　g++ -I /opt/AMDAPP/include/ -o A ?*.cpp -lOpenCL ; ./A

分類: GPU/OpenCL 好文要頂關注我收藏該文旭東的博客
關注 - 12
粉絲 - 269 +加關注 0 0 ? 上一篇：去掉linux 系統vi中出現^M字符的方法
? 下一篇：GPGPU OpenCL 獲取設備信息

posted on 2014-03-06 17:37 旭東的博客閱讀(10882) 評論(0) 編輯收藏

總結

以上是生活随笔為你收集整理的OpenCL编程实例: 向量计算的全部內容，希望文章能夠幫你解決所遇到的問題。

如果覺得生活随笔網站內容還不錯，歡迎將生活随笔推薦給好友。

上一篇： UNIX网络编程--读书笔记
下一篇： POJ 2676 Sudoku【Danc

3atv精品不卡视频,97人人超碰国产精品最新,中文字幕av一区二区三区人妻少妇,久久久精品波多野结衣,日韩一区二区三区精品

编程问答

OpenCL编程实例: 向量计算

GPGPU OpenCL編程步驟與簡單實例

總結