CUDA: (1) Added a call to the GPU Manager progress function immediately after executi...
authorLukasz Wesolowski <wesolwsk@talent.cs.uiuc.edu>
Thu, 4 Mar 2010 20:42:39 +0000 (14:42 -0600)
committerLukasz Wesolowski <wesolwsk@talent.cs.uiuc.edu>
Thu, 4 Mar 2010 20:42:39 +0000 (14:42 -0600)
callbacks to increase GPU throughput and (2) modified the condition on executing cudaFree calls
to also wait for completion of pending GPU transfer in requests

src/arch/cuda/hybridAPI/cuda-hybrid-api.cu

index d10e5695426270b861012a6c14b613ee7e8ff538..eec793c726bf0ce4aea1b264ddc982592dec6970 100644 (file)
@@ -161,7 +161,7 @@ void pinnedMallocHost(pinnedMemReq *reqs) {
     free(reqs->sizes);
 
     CUDACallbackManager(reqs->callbackFn);
-
+    gpuProgressFn(); 
   }
   else {
     pinnedMemQueue[pinnedMemQueueIndex].hostPtrs = reqs->hostPtrs;
@@ -768,7 +768,9 @@ void gpuProgressFn() {
       */
   }
   if (head->state == TRANSFERRING_OUT) {
-    if (cudaStreamQuery(data_out_stream) == cudaSuccess && cudaStreamQuery(kernel_stream) == cudaSuccess){
+    if (cudaStreamQuery(data_in_stream) == cudaSuccess &&
+       cudaStreamQuery(data_out_stream) == cudaSuccess && 
+       cudaStreamQuery(kernel_stream) == cudaSuccess){
       freeMemory(head); 
 #ifdef GPU_PROFILE
       gpuEvents[dataCleanupIndex].endTime = cutGetTimerValue(timerHandle);
@@ -800,6 +802,7 @@ void gpuProgressFn() {
 
       dequeue(wrQueue);
       CUDACallbackManager(head->callbackFn);
+      gpuProgressFn(); 
     }
   }
 }