C# vs C++ 全局照明渲染性能比试
512x512像素,每像素1000采樣,C#版本渲染時間為40分47秒
最近有多篇討論程序語言趨勢的博文,其中談及到C#的性能問題。本人之前未做過相關測試,自己的回覆流于理論猜測,所以花了點時間做個簡單實驗,比較C#和C++的性能。
實驗內容
趙姐夫在此回覆認為,C#比C/C++慢,主要在于.Net平臺的垃圾回收(garbage collection, GC)機制。若是計算密集型應用,C#和C++產生的原生代碼,速度應該相差不大。我對此半信半疑。想到之前看過一個用99行C++代碼實現的全局照明(global illumination, GI)渲染程序smallpt?,是純計算密集的。而且在運算期間,若用C#實現,基本上連GC都可以不用。因此,就把該99行代碼移植至C#。
此渲染器的一些特點如下:
- 使用蒙地卡羅路徑追蹤(Monte Carlo path-tracing)來產生全局照明效果
- 支持三種雙向反射分布函數(bidirectional reflectance distribution function, BRDF): 鏡射(specular)、漫射(diffuse)和玻璃(即純折射的介質)
- 從漫射光源產生柔和陰影(soft shadow)
- 使用2x2超采樣(super-sampling)去實現反鋸齒
- 使用OpenMP作并行運算,充份利用多核性能
當中的術語及技術,之后可能會于圖形學博文系列里探討。本文主要以性能為題。
C++版本
以下是C++版本代碼,作了些許修改。修改地方加上了MILO注譯。
| 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 | #include <math.h>?? // smallpt, a Path Tracer by Kevin Beason, 2008 #include <stdlib.h> // Make : g++ -O3 -fopenmp smallpt.cpp -o smallpt #include <stdio.h>? //??????? Remove "-fopenmp" for g++ version < 4.2 #include <time.h>???? // MILO #include "erand48.inc"? // MILO #define M_PI 3.141592653589793238462643 // MILO struct?Vec {??????? // Usage: time ./smallpt 5000 && xv image.ppm ??double?x, y, z;????????????????? // position, also color (r,g,b) ??Vec(double?x_=0, double?y_=0, double?z_=0){ x=x_; y=y_; z=z_; } ??Vec operator+(const?Vec &b) const?{ return?Vec(x+b.x,y+b.y,z+b.z); } ??Vec operator-(const?Vec &b) const?{ return?Vec(x-b.x,y-b.y,z-b.z); } ??Vec operator*(double?b) const?{ return?Vec(x*b,y*b,z*b); } ??Vec mult(const?Vec &b) const?{ return?Vec(x*b.x,y*b.y,z*b.z); } ??Vec& norm(){ return?*this?= *this?* (1/sqrt(x*x+y*y+z*z)); } ??double?dot(const?Vec &b) const?{ return?x*b.x+y*b.y+z*b.z; } // cross: ??Vec operator%(const?Vec &b){return?Vec(y*b.z-z*b.y,z*b.x-x*b.z,x*b.y-y*b.x);} }; struct?Ray { Vec o, d; Ray(const?Vec &o_, const?Vec &d_) : o(o_), d(d_) {} }; enum?Refl_t { DIFF, SPEC, REFR };? // material types, used in radiance() struct?Sphere { ??double?rad;?????? // radius ??Vec p, e, c;????? // position, emission, color ??Refl_t refl;????? // reflection type (DIFFuse, SPECular, REFRactive) ??Sphere(double?rad_, Vec p_, Vec e_, Vec c_, Refl_t refl_): ????rad(rad_), p(p_), e(e_), c(c_), refl(refl_) {} ??double?intersect(const?Ray &r) const?{ // returns distance, 0 if nohit ????Vec op = p-r.o; // Solve t^2*d.d + 2*t*(o-p).d + (o-p).(o-p)-R^2 = 0 ????double?t, eps=1e-4, b=op.dot(r.d), det=b*b-op.dot(op)+rad*rad; ????if?(det<0) return?0; else?det=sqrt(det); ????return?(t=b-det)>eps ? t : ((t=b+det)>eps ? t : 0); ??} }; Sphere spheres[] = {//Scene: radius, position, emission, color, material ??Sphere(1e5, Vec( 1e5+1,40.8,81.6), Vec(),Vec(.75,.25,.25),DIFF),//Left ??Sphere(1e5, Vec(-1e5+99,40.8,81.6),Vec(),Vec(.25,.25,.75),DIFF),//Rght ??Sphere(1e5, Vec(50,40.8, 1e5),???? Vec(),Vec(.75,.75,.75),DIFF),//Back ??Sphere(1e5, Vec(50,40.8,-1e5+170), Vec(),Vec(),?????????? DIFF),//Frnt ??Sphere(1e5, Vec(50, 1e5, 81.6),??? Vec(),Vec(.75,.75,.75),DIFF),//Botm ??Sphere(1e5, Vec(50,-1e5+81.6,81.6),Vec(),Vec(.75,.75,.75),DIFF),//Top ??Sphere(16.5,Vec(27,16.5,47),?????? Vec(),Vec(1,1,1)*.999, SPEC),//Mirr ??Sphere(16.5,Vec(73,16.5,78),?????? Vec(),Vec(1,1,1)*.999, REFR),//Glas ??Sphere(600, Vec(50,681.6-.27,81.6),Vec(12,12,12),? Vec(), DIFF) //Lite }; inline?double?clamp(double?x){ return?x<0 ? 0 : x>1 ? 1 : x; } inline?int?toInt(double?x){ return?int(pow(clamp(x),1/2.2)*255+.5); } inline?bool?intersect(const?Ray &r, double?&t, int?&id){ ??double?n=sizeof(spheres)/sizeof(Sphere), d, inf=t=1e20; ??for(int?i=int(n);i--;) if((d=spheres[i].intersect(r))&&d<t){t=d;id=i;} ??return?t<inf; } Vec radiance(const?Ray &r, int?depth, unsigned short?*Xi){ ??double?t;?????????????????????????????? // distance to intersection ??int?id=0;?????????????????????????????? // id of intersected object ??if?(!intersect(r, t, id)) return?Vec(); // if miss, return black ??const?Sphere &obj = spheres[id];??????? // the hit object ??Vec x=r.o+r.d*t, n=(x-obj.p).norm(), nl=n.dot(r.d)<0?n:n*-1, f=obj.c; ??double?p = f.x>f.y && f.x>f.z ? f.x : f.y>f.z ? f.y : f.z; // max refl ??if?(++depth>5) if?(erand48(Xi)<p) f=f*(1/p); else?return?obj.e; //R.R. ??if?(depth > 100) return?obj.e; // MILO ??if?(obj.refl == DIFF){????????????????? // Ideal DIFFUSE reflection ????double?r1=2*M_PI*erand48(Xi), r2=erand48(Xi), r2s=sqrt(r2); ????Vec w=nl, u=((fabs(w.x)>.1?Vec(0,1):Vec(1))%w).norm(), v=w%u; ????Vec d = (u*cos(r1)*r2s + v*sin(r1)*r2s + w*sqrt(1-r2)).norm(); ????return?obj.e + f.mult(radiance(Ray(x,d),depth,Xi)); ??} else?if?(obj.refl == SPEC)??????????? // Ideal SPECULAR reflection ????return?obj.e + f.mult(radiance(Ray(x,r.d-n*2*n.dot(r.d)),depth,Xi)); ??Ray reflRay(x, r.d-n*2*n.dot(r.d));???? // Ideal dielectric REFRACTION ??bool?into = n.dot(nl)>0;??????????????? // Ray from outside going in? ??double?nc=1, nt=1.5, nnt=into?nc/nt:nt/nc, ddn=r.d.dot(nl), cos2t; ??if?((cos2t=1-nnt*nnt*(1-ddn*ddn))<0)??? // Total internal reflection ????return?obj.e + f.mult(radiance(reflRay,depth,Xi)); ??Vec tdir = (r.d*nnt - n*((into?1:-1)*(ddn*nnt+sqrt(cos2t)))).norm(); ??double?a=nt-nc, b=nt+nc, R0=a*a/(b*b), c = 1-(into?-ddn:tdir.dot(n)); ??double?Re=R0+(1-R0)*c*c*c*c*c,Tr=1-Re,P=.25+.5*Re,RP=Re/P,TP=Tr/(1-P); ??return?obj.e + f.mult(depth>2 ? (erand48(Xi)<P ??? // Russian roulette ????radiance(reflRay,depth,Xi)*RP:radiance(Ray(x,tdir),depth,Xi)*TP) : ????radiance(reflRay,depth,Xi)*Re+radiance(Ray(x,tdir),depth,Xi)*Tr); } int?main(int?argc, char?*argv[]){ ??clock_t?start = clock(); // MILO ??int?w=512, h=512, samps = argc==2 ? atoi(argv[1])/4 : 250; // # samples ??Ray cam(Vec(50,52,295.6), Vec(0,-0.042612,-1).norm()); // cam pos, dir ??Vec cx=Vec(w*.5135/h), cy=(cx%cam.d).norm()*.5135, r, *c=new?Vec[w*h]; #pragma omp parallel for schedule(dynamic, 1) private(r)?????? // OpenMP ??for?(int?y=0; y<h; y++){?????????????????????? // Loop over image rows ????fprintf(stderr,"\rRendering (%d spp) %5.2f%%",samps*4,100.*y/(h-1)); ????unsigned short?Xi[3]={0,0,y*y*y}; // MILO ????for?(unsigned short?x=0; x<w; x++)?? // Loop cols ??????for?(int?sy=0, i=(h-y-1)*w+x; sy<2; sy++)???? // 2x2 subpixel rows ????????for?(int?sx=0; sx<2; sx++, r=Vec()){??????? // 2x2 subpixel cols ??????????for?(int?s=0; s<samps; s++){ ????????????double?r1=2*erand48(Xi), dx=r1<1 ? sqrt(r1)-1: 1-sqrt(2-r1); ????????????double?r2=2*erand48(Xi), dy=r2<1 ? sqrt(r2)-1: 1-sqrt(2-r2); ????????????Vec d = cx*( ( (sx+.5 + dx)/2 + x)/w - .5) + ????????????????????cy*( ( (sy+.5 + dy)/2 + y)/h - .5) + cam.d; ????????????r = r + radiance(Ray(cam.o+d*140,d.norm()),0,Xi)*(1./samps); ??????????} // Camera rays are pushed ^^^^^ forward to start in interior ??????????c[i] = c[i] + Vec(clamp(r.x),clamp(r.y),clamp(r.z))*.25; ????????} ??} ??printf("\n%f sec\n", (float)(clock() - start)/CLOCKS_PER_SEC); // MILO ??FILE?*f = fopen("image.ppm", "w");???????? // Write image to PPM file. ??fprintf(f, "P3\n%d %d\n%d\n", w, h, 255); ??for?(int?i=0; i<w*h; i++) ????fprintf(f,"%d %d %d ", toInt(c[i].x), toInt(c[i].y), toInt(c[i].z)); } |
由于Visual C++沒有erand48()函數,便在網上找到一個PostreSQL的實現?。此外,為了比較公平,分別測試使用和禁用OpenMP情況下的性能。
本人亦為了顯示C++特有的能力,另外作一個版本,采用微軟DirectX SDK中的C++ XNA數學庫進行SIMD矢量加速(XNA Game Studio也有.Net用的XNA數學庫,但本文并沒有使用)。由于XNA數學庫采用單精度浮點數,對這個特別場景(6面墻壁其實是由6個巨大的球體組成)有超出精度范圍的問題。因此,我對這版本里的場境稍作修改。又因為erand48()函數是傳回雙精度的隨機數,多次轉換比較慢,此版本就換了之前博文使用的LCG實現。
C#版本
| 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 | using?System; using?System.IO; namespace?smallpt_cs { struct?Vec {??????? // Usage: time ./smallpt 5000 && xv image.ppm ???public?double?x,y,z;???????????????? // position,also color (r,g,b) ???public?Vec(double?x_,double?y_,double?z_) {x=x_;y=y_;z=z_;} ???public?static?Vec operator?+(Vec a,Vec b) {return?new?Vec(a.x+b.x,a.y+b.y,a.z+b.z);} ???public?static?Vec operator?-(Vec a,Vec b) {return?new?Vec(a.x-b.x,a.y-b.y,a.z-b.z);} ???public?static?Vec operator?*(Vec a,double?b) {return?new?Vec(a.x*b,a.y*b,a.z*b);} ???public?Vec mult(Vec b) { return?new?Vec(x*b.x,y*b.y,z*b.z);} ???public?Vec norm() { return?this=this*(1/Math.Sqrt(x*x+y*y+z*z));} ???public?double?dot(Vec b) { return?x*b.x+y*b.y+z*b.z;}//cross: ???public?static?Vec operator?%(Vec a,Vec b) { return?new?Vec(a.y*b.z-a.z*b.y,a.z*b.x-a.x*b.z,a.x*b.y-a.y*b.x);} } enum?Refl_t { DIFF,SPEC,REFR }; // material types,used in radiance() struct?Ray { public?Vec o,d;public?Ray(Vec o_,Vec d_) { o=o_;d=d_;} } class?Sphere { ??public?double?rad;????? // radius ??public?Vec p,e,c;???? // position,emission,color ??public?Refl_t refl;???? // reflection type (DIFFuse,SPECular,REFRactive) ??public?Sphere(double?rad_,Vec p_,Vec e_,Vec c_,Refl_t refl_) { ????rad=rad_;p=p_;e=e_;c=c_;refl=refl_; ??} ??public?double?intersect(Ray r) ??{ // returns distance,0 if nohit ????Vec op=p-r.o;// Solve t^2*d.d+2*t*(o-p).d+(o-p).(o-p)-R^2=0 ????double?t,eps=1e-4,b=op.dot(r.d),det=b*b-op.dot(op)+rad*rad; ????if?(det<0) return?0;else?det=Math.Sqrt(det); ????return?(t=b-det) > eps?t : ((t=b+det) > eps?t : 0); ??} }; class?smallpt { ??static?Random random=new?Random(); ??static?double?erand48() { return?random.NextDouble();} ??static?Sphere[] spheres={//Scene: radius,position,emission,color,material ????new?Sphere(1e5,new?Vec( 1e5+1,40.8,81.6), new?Vec(),new?Vec(.75,.25,.25),Refl_t.DIFF),//Left ????new?Sphere(1e5,new?Vec(-1e5+99,40.8,81.6),new?Vec(),new?Vec(.25,.25,.75),Refl_t.DIFF),//Rght ????new?Sphere(1e5,new?Vec(50,40.8,1e5),????? new?Vec(),new?Vec(.75,.75,.75),Refl_t.DIFF),//Back ????new?Sphere(1e5,new?Vec(50,40.8,-1e5+170), new?Vec(),new?Vec(),?????????? Refl_t.DIFF),//Frnt ????new?Sphere(1e5,new?Vec(50,1e5,81.6),????? new?Vec(),new?Vec(.75,.75,.75),Refl_t.DIFF),//Botm ????new?Sphere(1e5,new?Vec(50,-1e5+81.6,81.6),new?Vec(),new?Vec(.75,.75,.75),Refl_t.DIFF),//Top ????new?Sphere(16.5,new?Vec(27,16.5,47),????? new?Vec(),new?Vec(1,1,1)*.999, Refl_t.SPEC),//Mirr ????new?Sphere(16.5,new?Vec(73,16.5,78),????? new?Vec(),new?Vec(1,1,1)*.999, Refl_t.REFR),//Glas ????new?Sphere(600,new?Vec(50,681.6-.27,81.6),new?Vec(12,12,12), new?Vec(),? Refl_t.DIFF) //Lite ??}; ??static?double?clamp(double?x) { return?x<0?0 : x > 1?1 : x;} ??static?int?toInt(double?x) { return?(int)(Math.Pow(clamp(x),1 / 2.2)*255+.5);} ??static?bool?intersect(Ray r,ref?double?t,ref?int?id) { ????double?d,inf=t=1e20; ????for?(int?i=spheres.Length-1;i >= 0;i--) ??????if?((d=spheres[i].intersect(r)) != 0 && d<t) { t=d;id=i;} ????return?t<inf; ??} ??static?Vec radiance(Ray r,int?depth) { ????double?t=0;????????????????????????????? // distance to intersection ????int?id=0;????????????????????????????? // id of intersected object ????if?(!intersect(r,ref?t,ref?id)) return?new?Vec();// if miss,return black ????Sphere obj=spheres[id];?????? // the hit object ????Vec x=r.o+r.d*t,n=(x-obj.p).norm(),nl=n.dot(r.d)<0?n:n*-1,f=obj.c; ????double?p=f.x>f.y&&f.x>f.z?f.x:f.y>f.z?f.y:f.z;//max refl ????if?(++depth > 5) if?(erand48()<p) f=f*(1 / p);else?return?obj.e;//R.R. ????if?(depth > 100) return?obj.e; ????if?(obj.refl == Refl_t.DIFF) {????????????????? // Ideal DIFFUSE reflection ??????double?r1=2*Math.PI*erand48(),r2=erand48(),r2s=Math.Sqrt(r2); ??????Vec w=nl,u=((Math.Abs(w.x)>.1?new?Vec(0,1,0):new?Vec(1,0,0))%w).norm(),v=w%u; ??????Vec d=(u*Math.Cos(r1)*r2s+v*Math.Sin(r1)*r2s+w*Math.Sqrt(1-r2)).norm(); ??????return?obj.e+f.mult(radiance(new?Ray(x,d),depth)); ????} ????else?if?(obj.refl == Refl_t.SPEC)??????????? // Ideal SPECULAR reflection ??????return?obj.e+f.mult(radiance(new?Ray(x,r.d-n*2*n.dot(r.d)),depth)); ????Ray reflRay=new?Ray(x,r.d-n*2*n.dot(r.d));//IdealdielectricREFRACTION ????bool?into=n.dot(nl) > 0;?????????????? // Ray from outside going in? ????double?nc=1,nt=1.5,nnt=into?nc / nt : nt / nc,ddn=r.d.dot(nl),cos2t; ????if?((cos2t=1-nnt*nnt*(1-ddn*ddn))<0)??? //Total internal reflection ??????return?obj.e+f.mult(radiance(reflRay,depth)); ????Vec tdir=(r.d*nnt-n*((into?1:-1)*(ddn*nnt+Math.Sqrt(cos2t)))).norm(); ????double?a=nt-nc,b=nt+nc,R0=a*a/(b*b),c=1-(into?-ddn:tdir.dot(n)); ????double?Re=R0+(1-R0)*c*c*c*c*c,Tr=1-Re,P=.25+.5*Re,RP=Re/P,TP=Tr/(1-P); ????return?obj.e+f.mult(depth > 2?(erand48()<P?? // Russian roulette ??????radiance(reflRay,depth)*RP:radiance(new?Ray(x,tdir),depth)*TP): ??????radiance(reflRay,depth)*Re+radiance(new?Ray(x,tdir),depth)*Tr); ??} ??public?static?void?Main(string[] args) { ????DateTime start=DateTime.Now; ????int?w=256,h=256,samps=args.Length==2?int.Parse(args[1])/4:25;// # samples ????Ray cam=new?Ray(new?Vec(50,52,295.6),new?Vec(0,-0.042612,-1).norm());//cam pos,dir ????Vec cx=new?Vec(w*.5135/h,0,0),cy=(cx%cam.d).norm()*.5135,r;Vec[] c=new?Vec[w*h]; ????for?(int?y=0;y<h;y++) {??????????????????????? // Loop over image rows ??????Console.Write("\rRendering ({0}spp) {1:F2}%",samps*4,100.0*y/(h-1)); ??????for?(int?x=0;x<w;x++)?? // Loop cols ????????for?(int?sy=0,i=(h-y-1)*w+x;sy<2;sy++)???? // 2x2 subpixel rows ??????????for?(int?sx=0;sx<2;sx++) {?????????????? // 2x2 subpixel cols ????????????r=new?Vec(); ????????????for?(int?s=0;s<samps;s++) { ??????????????double?r1=2*erand48(),dx=r1<1?Math.Sqrt(r1)-1:1-Math.Sqrt(2-r1); ??????????????double?r2=2*erand48(),dy=r2<1?Math.Sqrt(r2)-1:1-Math.Sqrt(2-r2); ??????????????Vec d=cx*(((sx+.5+dx)/2+x)/w-.5)+ ????????????????????cy*(((sy+.5+dy)/2+y)/h-.5)+cam.d; ??????????????r=r+radiance(new?Ray(cam.o+d*140,d.norm()),0)*(1.0/samps); ????????????} // Camera rays are pushed ^^^^^ forward to start in interior ????????????c[i]=c[i]+new?Vec(clamp(r.x),clamp(r.y),clamp(r.z))*.25; ??????????} ????} ????Console.WriteLine("\n{0} sec",(DateTime.Now-start).TotalSeconds); ????using?(StreamWriter sw=new?StreamWriter("image.ppm")) { ??????sw.Write("P3\r\n{0} {1}\r\n{2}\r\n",w,h,255); ??????for?(int?i=0;i<w*h;i++) ????????sw.Write("{0} {1} {2}\r\n",toInt(c[i].x),toInt(c[i].y),toInt(c[i].z)); ??????sw.Close(); ????} ??} } } |
Vec和Ray需要不斷在計算中產生實例,所以設它們為struct,struct在C#代表值類型(value type),ibpp在堆棧上高效分配內存的,不需使用GC。渲染時,Sphere是只讀對象,因此用class作為引用類型(reference type)去避免不必要的復制。
實驗結果和分析
實驗環境是Visual Studio 2008/.Net Framework 3.5編譯,Intel I7 920 (4核、超線程)。渲染512x512解像度,每像素100個采樣。結果如下:
| ? | 測試版本 | 需時(秒) |
| (a) | C++ | 45.548 |
| (b) | C# | 61.044 |
| (c) | C++ SIMD | 20.500 |
| (d) | C++(OpenMP) | 7.397 |
| (e) | C++ SIMD(OpenMP) | 3.470 |
| (f)* | C++ LCG | 17.365 |
| (g)* | C# LCG | 59.623 |
| (h)* | C++ LCG (OpenMP) | 3.427 |
*2010/6/23 加入(f)(g)(h),見更新1
最基本,應比較(a)和(b)。兩者皆使用單線程。 C++版本性能比C#版本快大約34%。這其實已遠遠超出我對C#/.Net的期望,沒想到用JIT代碼的運行速度,能這么接近傳統的編譯方式。
采用SIMD的C++版本(c),雖然仍未大量優化,但性能比沒有SIMD的版本高122%,比C#版本高接近兩倍。不過,采用SIMD后,數值運算的精確度變低,所以這比較只能作為參考。
采用OpenMP能活用i7的8個邏輯核心。使用OpenMP的非SIMD(d)和SIMD(e)版本,分別比沒使用OpenMP的版本(a)和(c),性能各為6.16倍和5.9倍。這已經很接近理想值8,說明這應用能充分利用CPU并行性。而OpenMP強大的地方,在于只需加入1句編譯器#pragma指令就能自動并行。
結語
雖然本文的實驗只能反映個別情況。但實驗可以說明,在某些應用上,C#/.Net的性能可以非常貼近C++,差別小于一個數量級。
本文實驗所用的程序代碼,有不少進一步優化的空間,源代碼可于這里下載。有興趣的朋友也可把代碼移植至Java及其他語言。
最后,本人認為,各種平臺和語言,都有其適用時機。作為程序員,最理想是認識各種技術,以及認清每個技術的特長、短處,以便為應用找到最好的配撘。
更新
總結
以上是生活随笔為你收集整理的C# vs C++ 全局照明渲染性能比试的全部內容,希望文章能夠幫你解決所遇到的問題。
- 上一篇: 用JavaScript玩转计算机图形学(
- 下一篇: 用JavaScript玩转游戏物理(一)