盒子滤波/均值滤波NEON优化
生活随笔
收集整理的這篇文章主要介紹了
盒子滤波/均值滤波NEON优化
小編覺得挺不錯的,現在分享給大家,幫大家做個參考.
?github鏈接:https://github.com/XhtZz/boxfilter
//第一版 暴力版本(復雜度:width*height*(2*radius+1)*(2*radius+1)) void boxfilter(uchar* image, uchar* dst, int width, int height, int radius){for(int h=0;h<height;++h){int h_shift = h*weight;int start_h = max(0,height-radius);int end_h = min(h+radius,height-1);for(int w=0;w<width;++w){int start_w = max(0,w-radius);int end_w = max(w+radius,weight-1);int sum = 0;for(int i=start_h;i<end_h;++i){for(int j=start_w;j<end_w;++j){sum += image[i*weight+j];}}image[h_shift+w] = sum;} } }//第二版 行列拆分(復雜度:wigth*height*(2*radius+1)*2) void boxfilter(uchar* image, uchar* dst, int width, int height, int radius){int* temp = (int*)malloc(height*width*sizeof(int));//計算每一行for(int h=0;h<height;++h){int h_shift = h*weight;for(int w=0;w<weight;++w){int start_w = min(0,w-radius);int end_w = max(w+radius,weight-1)float sum = 0;for(int i=start_w;i<end_w;++i){sum += image[h_shift+i];}temp[h_shift+w] = sum;}}//計算每一列for(int h=0;h<height;++h){int h_shift = h*weight;int start_h = min(0,h-radius);int end_h = max(h+radius,height-1);for(int w=0;w<weight;++w){float sum = 0;for(int j=start_h;j<end_h;++j){sum += temp[j*weight+w];}dst[h_shift+w] = sum;}} }//第三版 考慮復雜度不受radius大小的方法,很簡單,計算下一個目標點,只要加上后一列減去前一列就可以,重復利用了中間數據的和(復雜度:weight*height*(2*2)) void boxfilter(uchar* image, uchar* dst, int width, int height, int radius){//水平方向int* temp = (int*)malloc(sizeof(int)*width*height);for(int h=0;h<height;++h){int shift_h = h*weight;int sum = 0;//headfor(int i=0;i<radius;++i){sum += image[shift_h+i];}for(int i=0;i<=radius;++i){sum += image[shift_h+i+radius];temp[shift_h+i] = sum;}//middlefor(int i=radius+1;i<width-radius;++i){sum += image[shift_h+i+radius];sum -= image[shift_h+i-radius-1];temp[shift_h+i] = sum;}//tailfor(int i=width-radius;i<width;++i){sum -= image[shift_h+i-radius-1];temp[shift_h+i] = sum;}}//垂直方向int* row_temp = (int*)malloc(sizeof(int)*width);memset(row_temp,0,sizeof(int)*width);//headfor(int h=0;h<radius;++h){int shift_h = h*width;for(int w=0;w<width;++w){row_temp[w] += temp[shift_h+w];}}for(int h=0;h<=radius;++h){int shift_h = h*width;for(int w=0;w<width;++w){row_temp[w] += temp[(h+radius)*width+w]dst[shift_h+w] = row_temp[w];}}//middlefor(int h=radius+1;h<height-radius;++h){int shift_h = h*width;for(int w=0;w<width;++w){row_temp[w] += temp[(h+radius)*width+w];row_temp[w] -= temp[(h-radius-1)*width+w];dst[shift_h+w] = row_temp[w];}}//tailfor(int h=height-radius;h<height;++h){int shift_h = h*width;for(int w=0;w<width;++w){row_temp[w] -= temp[(h-radius-1)*width+w];dst[shift_h+w] = row_temp[w];}}free(row_temp);free(temp); }// 第四版 垂直方向利用neon intrinsic指令進行加速(只做了head部分,其他的類似) void boxfilter(uchar* image, uchar* dst, int width, int height, int radius){//水平方向int* temp = (int*)malloc(sizeof(int)*width*height);for(int h=0;h<height;++h){int shift_h = h*weight;int sum = 0;//headfor(int i=0;i<radius;++i){sum += image[shift_h+i];}for(int i=0;i<=radius;++i){sum += image[shift_h+i+radius];temp[shift_h+i] = sum;}//middlefor(int i=radius+1;i<width-radius;++i){sum += image[shift_h+i+radius];sum -= image[shift_h+i-radius-1];temp[shift_h+i] = sum;}//tailfor(int i=width-radius;i<width;++i){sum -= image[shift_h+i-radius-1];temp[shift_h+i] = sum;}}//垂直方向ushort* row_temp = (ushort*)malloc(sizeof(ushort)*width);memset(row_temp,0,sizeof(ushort)*width);int remain = width%16;//headfor(int h=0;h<radius;++h){int shift_h = h*width;int w;for(w=0;w<width;w+=16){ushort* t = row_temp;uint8x16 temp_vector = vld1q_u8(temp+shift_h+w);uint16x8_t row_temp_vector = vld1q_u16(row_temp);row_temp += 8;row_temp_vector = vaddw_u8(row_temp_vector,vget_low_u8(temp_vector));vst1q_u16(t,row_temp_vector);row_temp_vector= vld1q_u16(row_temp);row_temp += 8;row_temp_vector = vaddw_u8(row_temp_vector,vget_high_u8(temp_vector));vst1q_u16(t+8,row_temp_vector);}for(;w<width;++w){row_temp[w] += temp[shift_h+w];}}for(int h=0;h<=radius;++h){int shift_h = h*width;for(int w=0;w<width;++w){row_temp[w] += temp[(h+radius)*width+w]dst[shift_h+w] = row_temp[w];}}//middlefor(int h=radius+1;h<height-radius;++h){int shift_h = h*width;for(int w=0;w<width;++w){row_temp[w] += temp[(h+radius)*width+w];row_temp[w] -= temp[(h-radius-1)*width+w];dst[shift_h+w] = row_temp[w];}}//tailfor(int h=height-radius;h<height;++h){int shift_h = h*width;for(int w=0;w<width;++w){row_temp[w] -= temp[(h-radius-1)*width+w];dst[shift_h+w] = row_temp[w];}}free(row_temp);free(temp); }?
總結
以上是生活随笔為你收集整理的盒子滤波/均值滤波NEON优化的全部內容,希望文章能夠幫你解決所遇到的問題。
- 上一篇: ARM汇编优化技巧
- 下一篇: x的平方根—leetcode69