// (c) 2008 Steven Gratton // Guided by examples from the AMD Stream SDK // Note the assignment of the same resource // to both an input and output buffer of a kernel // in order to allow the factorization to // be done in place // Performance is better than the code using // a global buffer but still needs improving... #include #include #include #include #include #include "cal.h" #include "calcl.h" #include "cal_ext.h" #include "cal_ext_counter.h" #include "newcholforweb.h" std::string ILcheck= "il_ps_2_0\n" "ret_dyn\n" "end\n"; using namespace std; static PFNCALCTXCREATECOUNTER calCtxCreateCounterExt; static PFNCALCTXDESTROYCOUNTER calCtxDestroyCounterExt; static PFNCALCTXBEGINCOUNTER calCtxBeginCounterExt; static PFNCALCTXENDCOUNTER calCtxEndCounterExt; static PFNCALCTXGETCOUNTER calCtxGetCounterExt; void maketestmat(int n,float* mat) { for (int i=0;i