CUDA: Changed the overlap test to work with the current version of the API.
authorLukasz Wesolowski <wesolwsk@talent.cs.uiuc.edu>
Wed, 24 Feb 2010 22:14:19 +0000 (16:14 -0600)
committerLukasz Wesolowski <wesolwsk@talent.cs.uiuc.edu>
Wed, 24 Feb 2010 22:14:19 +0000 (16:14 -0600)
examples/charm++/cuda/gpuManager/overlapTestGPUManager/overlapTest.C
examples/charm++/cuda/gpuManager/overlapTestGPUManager/overlapTest.cu

index c94c46c9a96ff0733ce1a66350a4a925a6a7e17b..68830a1b3b19cb201cb7e022dbf8802dcbcebe43 100644 (file)
@@ -73,6 +73,15 @@ void Workers::complete() {
   int size = matrixSize * matrixSize * sizeof(ElementType); 
   memcpy(C, h_C, size); 
 
+  for (int i=0; i<matrixSize; i++) {
+    for (int j=0; j<matrixSize; j++) {
+      C[i*matrixSize + j] = 0; 
+      for (int k=0; k<matrixSize; k++) {
+       C[i*matrixSize + j] += A[i*matrixSize +k] * B[k * matrixSize + j];
+      }
+    }
+  }
+
 #ifdef DEBUG
   CkPrintf("[%d] A\n", thisIndex); 
   for (int i=0; i<matrixSize; i++) {
index ef83ac30e7dcbc7e6576b0df4d4bcc19f23faf10..4a00f57559aaf7db19f3812dc90409a6f700cec6 100644 (file)
 __global__ void
 matrixMul(float* C, float* A, float* B, int wA, int wB)
 {
-  for (int i=0; i<1000000; i++) {
-    C[blockIdx.x * BLOCK_SIZE + threadIdx.x] ++; 
-    C[blockIdx.x * BLOCK_SIZE + threadIdx.x] --; 
-  }
-  /*
     // Block index
     int bx = blockIdx.x;
     int by = blockIdx.y;
@@ -86,7 +81,6 @@ matrixMul(float* C, float* A, float* B, int wA, int wB)
     // each thread writes one element
     int c = wB * BLOCK_SIZE * by + BLOCK_SIZE * bx;
     C[c + wB * ty + tx] = Csub;
-  */
 }
 
 void hostMemorySetup(int matrixSize, ElementType **h_A_ptr, 
@@ -114,9 +108,16 @@ void hostMemorySetup(int matrixSize, ElementType **h_A_ptr,
 }
 
 void hostMemoryCleanup(ElementType *h_A, ElementType *h_B, ElementType *h_C) {
+
+  delayedFree(h_A); 
+  delayedFree(h_B); 
+  delayedFree(h_C);
+
+  /*
   cudaFreeHost(h_A); 
   cudaFreeHost(h_B); 
-  cudaFreeHost(h_C);
+  cudaFreeHost(h_C); 
+  */
 }
 
 void cudaMatMul(int matrixSize, ElementType *h_A, ElementType *h_B,