// (c) 2008 Steven Gratton // Guided by examples from the AMD Stream SDK // Note the assignment of the same resource // to both an input and output buffer of a kernel // in order to allow the factorization to // be done in place // Performance is better than the code using // a global buffer but still needs improving... #include <iostream> #include <iomanip> #include <string> #include <ctime> #include <cmath> #include "cal.h" #include "calcl.h" #include "cal_ext.h" #include "cal_ext_counter.h" #include "newcholforweb.h" std::string ILcheck= "il_ps_2_0\n" "ret_dyn\n" "end\n"; using namespace std; static PFNCALCTXCREATECOUNTER calCtxCreateCounterExt; static PFNCALCTXDESTROYCOUNTER calCtxDestroyCounterExt; static PFNCALCTXBEGINCOUNTER calCtxBeginCounterExt; static PFNCALCTXENDCOUNTER calCtxEndCounterExt; static PFNCALCTXGETCOUNTER calCtxGetCounterExt; void maketestmat(int n,float* mat) { for (int i=0;i<n;i++){ mat[i*n+i]=(float)(i+1); // mat[i*n+i]=(float) i*n+i+1; for (int j=i+1;j<n;j++){ mat[i*n+j]=.1f; mat[j*n+i]=.1f; // mat[i*n+j]=(float) i*n+j; // mat[j*n+i]=(float) i*n+j; } } } void splitmat(int width,int height,float* mat,float* split0,float* split1,float* split2,float* split3) { for (int i=0;i<(height/4);i++){ for (int j=0;j<width;j++){ split0[i*width+j]=mat[width*4*i+j]; split1[i*width+j]=mat[width*(4*i+1)+j]; split2[i*width+j]=mat[width*(4*i+2)+j]; split3[i*width+j]=mat[width*(4*i+3)+j]; } } } void unsplitmat(int width,int height,float* mat,float* split0,float* split1,float* split2,float* split3) { for (int i=0;i<(height/4);i++){ for (int j=0;j<width;j++){ mat[width*4*i+j]=split0[i*width+j]; mat[width*(4*i+1)+j]=split1[i*width+j]; mat[width*(4*i+2)+j]=split2[i*width+j]; mat[width*(4*i+3)+j]=split3[i*width+j]; } } } void dispmat(int width,int height,float* mat) { for (int i=0;i<height;i++){ for (int j=0;j<width;j++){ printf("%10.6f ",mat[i*width+j]); if (!((j+1)&0x3)) printf(" "); } printf("\n"); if (!((i+1)&0x3)) printf("\n"); } printf("\n"); } void dispdiag(int width,int height,float* mat) { for (int i=0;i<min(width,height);i++){ printf("%10.6f ",mat[i*width+i]); if (!((i+1)&0x3)) printf(" "); if (!((i+1)&0x1f)) printf("\n"); } printf("\n"); } void dispmattr(int width,int height,float* mat) { for (int i=0;i<height;i++){ for (int j=0;j<width;j++){ printf("%10.6f ",mat[j*width+i]); if (!((j+1)&0x3)) printf(" "); } printf("\n"); if (!((i+1)&0x3)) printf("\n"); } printf("\n"); } void copytogpu(int width,int height,float* cpumat,CALresource gpumat) { float* gpuptr=NULL; CALuint gpupitch=0; calResMap((CALvoid**)&gpuptr, &gpupitch, gpumat, 0); cout << "pitch=" << gpupitch << endl; for (int i = 0; i < height; ++i) { float* tmp = &gpuptr[i * gpupitch*4]; for(int j = 0; j < width; ++j) { tmp[j]=cpumat[i*width+j]; } } calResUnmap(gpumat); } void zeroongpu(int width,int height,CALresource gpumat) { float* gpuptr=NULL; CALuint gpupitch=0; calResMap((CALvoid**)&gpuptr, &gpupitch, gpumat, 0); memset(gpuptr,0,height*gpupitch*4*sizeof(float)); calResUnmap(gpumat); } void copytocpu(int width,int height,float* cpumat,CALresource gpumat) { float* gpuptr=NULL; CALuint gpupitch=0; calResMap((CALvoid**)&gpuptr, &gpupitch, gpumat, 0); for (int i = 0; i < height; ++i) { float* tmp = &gpuptr[i * gpupitch*4]; for(int j = 0; j < width; ++j) { cpumat[i*width+j]=tmp[j]; } } calResUnmap(gpumat); } int main(int argc, char** argv) { int n=8192; int n4=n/4; if((n%4)!=0) { cout << "Sorry, only matrices with size a multiple \ of 4 are supported." << endl; return 1;} float* cpumat=new float[n*n]; float* cpupart0=new float[n*n4]; float* cpupart1=new float[n*n4]; float* cpupart2=new float[n*n4]; float* cpupart3=new float[n*n4]; maketestmat(n,cpumat); // dispmat(n,n,cpumat); splitmat(n,n,cpumat,cpupart0,cpupart1,cpupart2,cpupart3); // dispmat(n,n4,cpupart0); //dispmat(n,n4,cpupart1); //dispmat(n,n4,cpupart2); //dispmat(n,n4,cpupart3); //unsplitmat(n,n,cpumat,cpupart0,cpupart1,cpupart2,cpupart3); //dispmat(n,n,cpumat); std::string kernel0 = choltopleft; std::string kernel1 = cholstrip; std::string kernel2 = cholcopy; std::string kernel3 = cholupdate; // kernel0=ILcheck; // kernel1=ILcheck; // kernel2=ILcheck; // kernel3=ILcheck; calInit(); CALuint numDevices = 0; calDeviceGetCount(&numDevices); cout << "Num devices =" << numDevices << endl; CALdevice device = 0; calDeviceOpen(&device, 0); CALdeviceinfo info; calDeviceGetInfo(&info, 0); if (calExtSupported((CALextid)CAL_EXT_COUNTERS) != CAL_RESULT_OK) { return 1; } if (calExtGetProc((CALextproc*)&calCtxCreateCounterExt, (CALextid)CAL_EXT_COUNTERS, "calCtxCreateCounter")) { return 1; } if (calExtGetProc((CALextproc*)&calCtxDestroyCounterExt, (CALextid)CAL_EXT_COUNTERS, "calCtxDestroyCounter")) { return 1; } if (calExtGetProc((CALextproc*)&calCtxBeginCounterExt, (CALextid)CAL_EXT_COUNTERS, "calCtxBeginCounter")) { return 1; } if (calExtGetProc((CALextproc*)&calCtxEndCounterExt, (CALextid)CAL_EXT_COUNTERS, "calCtxEndCounter")) { return 1; } if (calExtGetProc((CALextproc*)&calCtxGetCounterExt, (CALextid)CAL_EXT_COUNTERS, "calCtxGetCounter")) { return 1; } CALcontext ctx = 0; calCtxCreate(&ctx, device); CALobject obj0 = NULL; CALimage image0 = NULL; CALlanguage lang0 = CAL_LANGUAGE_IL; if (calclCompile(&obj0, lang0, kernel0.c_str(), info.target) != CAL_RESULT_OK) { fprintf(stdout, "Kernel0 compilation failed. Exiting.\n"); return 1; } else { cout << "kernel0 compiled fine" << endl; }; if (calclLink(&image0, &obj0, 1) != CAL_RESULT_OK) { fprintf(stdout, "Kernel0 linking failed. Exiting.\n"); return 1; } CALobject obj1 = NULL; CALimage image1 = NULL; CALlanguage lang1 = CAL_LANGUAGE_IL; if (calclCompile(&obj1, lang1, kernel1.c_str(), info.target) != CAL_RESULT_OK) { fprintf(stdout, "Kernel1 compilation failed. Exiting.\n"); return 1; } else { cout << "kernel1 compiled fine" << endl; }; if (calclLink(&image1, &obj1, 1) != CAL_RESULT_OK) { fprintf(stdout, "Kernel1 linking failed. Exiting.\n"); return 1; } CALobject obj2 = NULL; CALimage image2 = NULL; CALlanguage lang2 = CAL_LANGUAGE_IL; if (calclCompile(&obj2, lang2, kernel2.c_str(), info.target) != CAL_RESULT_OK) { fprintf(stdout, "Kernel2 compilation failed. Exiting.\n"); return 1; } else { cout << "kernel2 compiled fine" << endl; }; if (calclLink(&image2, &obj2, 1) != CAL_RESULT_OK) { fprintf(stdout, "Kernel2 linking failed. Exiting.\n"); return 1; } CALobject obj3 = NULL; CALimage image3 = NULL; CALlanguage lang3 = CAL_LANGUAGE_IL; if (calclCompile(&obj3, lang3, kernel3.c_str(), info.target) != CAL_RESULT_OK) { fprintf(stdout, "Kernel3 compilation failed. Exiting.\n"); return 1; } else { cout << "kernel3 compiled fine" << endl; }; if (calclLink(&image3, &obj3, 1) != CAL_RESULT_OK) { fprintf(stdout, "Kernel3 linking failed. Exiting.\n"); return 1; } cout << "after compiles..." << endl; CALresource part0=0; if(calResAllocLocal2D(&part0, device,n4,n4, CAL_FORMAT_FLOAT_4, 0) !=CAL_RESULT_OK) { printf("part0 resource allocation failed.\n"); } else { cout << "part0 fine." << endl; } CALresource part1=0; if(calResAllocLocal2D(&part1, device, n4,n4, CAL_FORMAT_FLOAT_4, 0) !=CAL_RESULT_OK) { printf("part1 resource allocation failed.\n"); } else { cout << "part1 fine." << endl; } CALresource part2=0; if(calResAllocLocal2D(&part2, device, n4,n4, CAL_FORMAT_FLOAT_4, 0) !=CAL_RESULT_OK) { printf("part2 resource allocation failed.\n"); } else { cout << "part2 fine." << endl; } CALresource part3=0; if(calResAllocLocal2D(&part3, device, n4,n4, CAL_FORMAT_FLOAT_4, 0) !=CAL_RESULT_OK) { printf("part3 resource allocation failed.\n"); } else { cout << "part3 fine." << endl; } /* cout << "to the gpu and back..." << endl; copytogpu(n,n4,cpupart0,part0); copytocpu(n,n4,cpupart0,part0); dispmat(n,n4,cpupart0); */ copytogpu(n,n4,cpupart0,part0); copytogpu(n,n4,cpupart1,part1); copytogpu(n,n4,cpupart2,part2); copytogpu(n,n4,cpupart3,part3); CALmem part0mem=0; CALmem part1mem=0; CALmem part2mem=0; CALmem part3mem=0; calCtxGetMem(&part0mem, ctx, part0); calCtxGetMem(&part1mem, ctx, part1); calCtxGetMem(&part2mem, ctx, part2); calCtxGetMem(&part3mem, ctx, part3); CALresource extra0=0; if(calResAllocLocal2D(&extra0, device,n4,1, CAL_FORMAT_FLOAT_4, 0) !=CAL_RESULT_OK) { printf("extra0 resource allocation failed.\n"); } else { cout << "extra0 fine." << endl; } CALresource extra1=0; if(calResAllocLocal2D(&extra1, device, n4,1, CAL_FORMAT_FLOAT_4, 0) !=CAL_RESULT_OK) { printf("extra1 resource allocation failed.\n"); } else { cout << "extra1 fine." << endl; } CALresource extra2=0; if(calResAllocLocal2D(&extra2, device, n4,1, CAL_FORMAT_FLOAT_4, 0) !=CAL_RESULT_OK) { printf("extra2 resource allocation failed.\n"); } else { cout << "extra2 fine." << endl; } CALresource extra3=0; if(calResAllocLocal2D(&extra3, device, n4,1, CAL_FORMAT_FLOAT_4, 0) !=CAL_RESULT_OK) { printf("extra3 resource allocation failed.\n"); } else { cout << "extra3 fine." << endl; } zeroongpu(n,1,extra0); zeroongpu(n,1,extra1); zeroongpu(n,1,extra2); zeroongpu(n,1,extra3); CALmem extra0mem=0; CALmem extra1mem=0; CALmem extra2mem=0; CALmem extra3mem=0; calCtxGetMem(&extra0mem, ctx, extra0); calCtxGetMem(&extra1mem, ctx, extra1); calCtxGetMem(&extra2mem, ctx, extra2); calCtxGetMem(&extra3mem, ctx, extra3); CALresource const2=0; if(calResAllocLocal1D(&const2, device, 1, CAL_FORMAT_FLOAT_1, 0) !=CAL_RESULT_OK) { printf("const2 resource allocation failed.\n"); } else { cout << "const2 fine." << endl; } CALmem const2mem=0; calCtxGetMem(&const2mem,ctx,const2); CALmodule module0 = 0; CALmodule module1 = 0; CALmodule module2 = 0; CALmodule module3 = 0; calModuleLoad(&module0, ctx, image0); calModuleLoad(&module1, ctx, image1); calModuleLoad(&module2, ctx, image2); calModuleLoad(&module3, ctx, image3); CALfunc func0 = 0; CALfunc func1 = 0; CALfunc func2 = 0; CALfunc func3 = 0; CALname inname0formod0=0; CALname inname1formod0=0; CALname inname2formod0=0; CALname inname3formod0=0; CALname inname0formod1=0; CALname inname1formod1=0; CALname inname2formod1=0; CALname inname3formod1=0; CALname inname0formod2=0; CALname inname1formod2=0; CALname inname2formod2=0; CALname inname3formod2=0; CALname inname0formod3=0; CALname inname1formod3=0; CALname inname2formod3=0; CALname inname3formod3=0; CALname inname4formod3=0; CALname inname5formod3=0; CALname inname6formod3=0; CALname inname7formod3=0; CALname outname0formod0=0; CALname outname1formod0=0; CALname outname2formod0=0; CALname outname3formod0=0; CALname outname0formod1=0; CALname outname1formod1=0; CALname outname2formod1=0; CALname outname3formod1=0; CALname outname0formod2=0; CALname outname1formod2=0; CALname outname2formod2=0; CALname outname3formod2=0; CALname outname0formod3=0; CALname outname1formod3=0; CALname outname2formod3=0; CALname outname3formod3=0; CALname constname0formod2=0; calModuleGetEntry(&func0, ctx, module0, "main"); calModuleGetEntry(&func1, ctx, module1, "main"); calModuleGetEntry(&func2, ctx, module2, "main"); calModuleGetEntry(&func3, ctx, module3, "main"); calModuleGetName(&inname0formod0, ctx, module0, "i0"); calModuleGetName(&inname1formod0, ctx, module0, "i1"); calModuleGetName(&inname2formod0, ctx, module0, "i2"); calModuleGetName(&inname3formod0, ctx, module0, "i3"); calModuleGetName(&inname0formod1, ctx, module1, "i0"); calModuleGetName(&inname1formod1, ctx, module1, "i1"); calModuleGetName(&inname2formod1, ctx, module1, "i2"); calModuleGetName(&inname3formod1, ctx, module1, "i3"); calModuleGetName(&inname0formod2, ctx, module2, "i0"); calModuleGetName(&inname1formod2, ctx, module2, "i1"); calModuleGetName(&inname2formod2, ctx, module2, "i2"); calModuleGetName(&inname3formod2, ctx, module2, "i3"); calModuleGetName(&inname0formod3, ctx, module3, "i0"); calModuleGetName(&inname1formod3, ctx, module3, "i1"); calModuleGetName(&inname2formod3, ctx, module3, "i2"); calModuleGetName(&inname3formod3, ctx, module3, "i3"); calModuleGetName(&inname4formod3, ctx, module3, "i4"); calModuleGetName(&inname5formod3, ctx, module3, "i5"); calModuleGetName(&inname6formod3, ctx, module3, "i6"); calModuleGetName(&inname7formod3, ctx, module3, "i7"); calModuleGetName(&outname0formod0, ctx, module0, "o0"); calModuleGetName(&outname1formod0, ctx, module0, "o1"); calModuleGetName(&outname2formod0, ctx, module0, "o2"); calModuleGetName(&outname3formod0, ctx, module0, "o3"); calModuleGetName(&outname0formod1, ctx, module1, "o0"); calModuleGetName(&outname1formod1, ctx, module1, "o1"); calModuleGetName(&outname2formod1, ctx, module1, "o2"); calModuleGetName(&outname3formod1, ctx, module1, "o3"); calModuleGetName(&outname0formod2, ctx, module2, "o0"); calModuleGetName(&outname1formod2, ctx, module2, "o1"); calModuleGetName(&outname2formod2, ctx, module2, "o2"); calModuleGetName(&outname3formod2, ctx, module2, "o3"); calModuleGetName(&outname0formod3, ctx, module3, "o0"); calModuleGetName(&outname1formod3, ctx, module3, "o1"); calModuleGetName(&outname2formod3, ctx, module3, "o2"); calModuleGetName(&outname3formod3, ctx, module3, "o3"); calModuleGetName(&constname0formod2, ctx, module2, "cb0"); calCtxSetMem(ctx, inname0formod0, part0mem); calCtxSetMem(ctx, inname1formod0, part1mem); calCtxSetMem(ctx, inname2formod0, part2mem); calCtxSetMem(ctx, inname3formod0, part3mem); calCtxSetMem(ctx, inname0formod1, part0mem); calCtxSetMem(ctx, inname1formod1, part1mem); calCtxSetMem(ctx, inname2formod1, part2mem); calCtxSetMem(ctx, inname3formod1, part3mem); calCtxSetMem(ctx, inname0formod2, part0mem); calCtxSetMem(ctx, inname1formod2, part1mem); calCtxSetMem(ctx, inname2formod2, part2mem); calCtxSetMem(ctx, inname3formod2, part3mem); calCtxSetMem(ctx, inname0formod3, part0mem); calCtxSetMem(ctx, inname1formod3, part1mem); calCtxSetMem(ctx, inname2formod3, part2mem); calCtxSetMem(ctx, inname3formod3, part3mem); calCtxSetMem(ctx, inname4formod3, extra0mem); calCtxSetMem(ctx, inname5formod3, extra1mem); calCtxSetMem(ctx, inname6formod3, extra2mem); calCtxSetMem(ctx, inname7formod3, extra3mem); calCtxSetMem(ctx, outname0formod0, part0mem); calCtxSetMem(ctx, outname1formod0, part1mem); calCtxSetMem(ctx, outname2formod0, part2mem); calCtxSetMem(ctx, outname3formod0, part3mem); calCtxSetMem(ctx, outname0formod1, part0mem); calCtxSetMem(ctx, outname1formod1, part1mem); calCtxSetMem(ctx, outname2formod1, part2mem); calCtxSetMem(ctx, outname3formod1, part3mem); calCtxSetMem(ctx, outname0formod2, extra0mem); calCtxSetMem(ctx, outname1formod2, extra1mem); calCtxSetMem(ctx, outname2formod2, extra2mem); calCtxSetMem(ctx, outname3formod2, extra3mem); calCtxSetMem(ctx, outname0formod3, part0mem); calCtxSetMem(ctx, outname1formod3, part1mem); calCtxSetMem(ctx, outname2formod3, part2mem); calCtxSetMem(ctx, outname3formod3, part3mem); calCtxSetMem(ctx, constname0formod2,const2mem); CALevent e = 0; cout << "Just before running, " << calGetErrorString() << "." << endl; volatile clock_t gputime; gputime=clock(); int pos=0; // cout << "just before while loop" << endl; CALcounter idleCounter; if (calCtxCreateCounterExt(&idleCounter, ctx, CAL_COUNTER_IDLE) != CAL_RESULT_OK) { return 1; } CALcounter cacheCounter; if (calCtxCreateCounterExt(&cacheCounter, ctx, CAL_COUNTER_INPUT_CACHE_HIT_RATE) != CAL_RESULT_OK) { return 1; } if (calCtxBeginCounterExt(ctx, idleCounter) != CAL_RESULT_OK) { return 1; } if (calCtxBeginCounterExt(ctx, cacheCounter) != CAL_RESULT_OK) { return 1; } float* constdata=NULL; CALuint constpitch=0; while (pos<(n4-1)) { // cout << "For pos=" << pos << endl; calResMap((void**)&constdata,&constpitch,const2,0); *constdata=(float) pos; calResUnmap(const2); CALdomain domain0 = {pos, pos, 1, 1}; CALdomain domain1 = {pos+1, pos, n4-pos-1, 1}; CALdomain domain2 = {pos, 0, n4-pos,1}; CALdomain domain3 = {pos+1, pos+1, n4-pos-1, n4-pos-1}; calCtxRunProgram(&e, ctx, func0, &domain0); // while (calCtxIsEventDone(ctx, e) == CAL_RESULT_PENDING);e=0; calCtxFlush(ctx); //cout<< " after 0" << endl; calCtxRunProgram(&e, ctx, func1, &domain1); //while (calCtxIsEventDone(ctx, e) == CAL_RESULT_PENDING);e=0; calCtxFlush(ctx); //cout << " after 1" << endl; calCtxRunProgram(&e, ctx, func2, &domain2); //don't seem to need this wait... //while (calCtxIsEventDone(ctx, e) == CAL_RESULT_PENDING);e=0; calCtxFlush(ctx); //cout << " after 2" << endl; calCtxRunProgram(&e, ctx, func3, &domain3); // while (calCtxIsEventDone(ctx, e) == CAL_RESULT_PENDING);e=0; calCtxFlush(ctx); //cout << " after 3" << endl; //cout << " " << calGetErrorString()<<endl; pos++; } // cout << "Out of loop" << endl; CALdomain domain0 = {pos,pos,1,1}; calCtxRunProgram(&e, ctx, func0, &domain0); while (calCtxIsEventDone(ctx, e) == CAL_RESULT_PENDING); //cout << calGetErrorString(); //cout << ": done kernel0" << endl; if (calCtxEndCounterExt(ctx, idleCounter) != CAL_RESULT_OK) { return 1; } if (calCtxEndCounterExt(ctx, cacheCounter) != CAL_RESULT_OK) { return 1; } gputime=clock()-gputime; CALfloat idlePercentage = 0.0f; if (calCtxGetCounterExt(&idlePercentage, ctx, idleCounter) != CAL_RESULT_OK) { return 1; } CALfloat cachePercentage = 0.0f; if (calCtxGetCounterExt(&cachePercentage, ctx, cacheCounter) != CAL_RESULT_OK) { return 1; } cout << "gpu time=" << gputime/1.e6f <<" s." <<endl; printf("Idle percentage: %0.2f%% Cache hit rate: %0.2f%%\n", idlePercentage * 100.0f, cachePercentage * 100.0f); if (calCtxDestroyCounterExt(ctx, idleCounter) != CAL_RESULT_OK) { return 1; } if (calCtxDestroyCounterExt(ctx, cacheCounter) != CAL_RESULT_OK) { return 1; } cout << "After calculation, " << calGetErrorString() << "." << endl; /* float* cpuextramat=new float[n*4]; float* cpuextra0= new float[n]; float* cpuextra1= new float[n]; float* cpuextra2= new float[n]; float* cpuextra3= new float[n]; copytocpu(n,1,cpuextra0,extra0); copytocpu(n,1,cpuextra1,extra1); copytocpu(n,1,cpuextra2,extra2); copytocpu(n,1,cpuextra3,extra3); unsplitmat(n,4,cpuextramat,cpuextra0,cpuextra1,cpuextra2,cpuextra3); dispmat(n,4,cpuextramat); delete[] cpuextramat; delete[] cpuextra0; delete[] cpuextra1; delete[] cpuextra2; delete[] cpuextra3; */ copytocpu(n,n4,cpupart0,part0); copytocpu(n,n4,cpupart1,part1); copytocpu(n,n4,cpupart2,part2); copytocpu(n,n4,cpupart3,part3); unsplitmat(n,n,cpumat,cpupart0,cpupart1,cpupart2,cpupart3); //dispmat(n,n,cpumat); dispdiag(n,n,cpumat); delete[] cpumat; delete[] cpupart0; delete[] cpupart1; delete[] cpupart2; delete[] cpupart3; calModuleUnload(ctx, module0); calModuleUnload(ctx, module1); calModuleUnload(ctx, module2); calModuleUnload(ctx, module3); calclFreeImage(image0); calclFreeObject(obj0); calclFreeImage(image1); calclFreeObject(obj1); calclFreeImage(image2); calclFreeObject(obj2); calclFreeImage(image3); calclFreeObject(obj3); calCtxReleaseMem(ctx,part0mem); calCtxReleaseMem(ctx,part1mem); calCtxReleaseMem(ctx,part2mem); calCtxReleaseMem(ctx,part3mem); calCtxReleaseMem(ctx,extra0mem); calCtxReleaseMem(ctx,extra1mem); calCtxReleaseMem(ctx,extra2mem); calCtxReleaseMem(ctx,extra3mem); calCtxReleaseMem(ctx,const2mem); calResFree(part0); calResFree(part1); calResFree(part2); calResFree(part3); calResFree(extra0); calResFree(extra1); calResFree(extra2); calResFree(extra3); calResFree(const2); calCtxDestroy(ctx); calDeviceClose(device); calShutdown(); return 0; }