Adding performance benchmarks for CmiReduce and Broadcast in commbench
authorNikhil Jain <nikhil@illinois.edu>
Tue, 27 Dec 2011 00:09:47 +0000 (18:09 -0600)
committerNikhil Jain <nikhil@illinois.edu>
Tue, 27 Dec 2011 00:09:47 +0000 (18:09 -0600)
tests/converse/commbench/Makefile
tests/converse/commbench/broadcast.c [new file with mode: 0644]
tests/converse/commbench/commbench.c
tests/converse/commbench/memoryAccess.c
tests/converse/commbench/reduction.c [new file with mode: 0644]

index 68bd4200a1c8c1644b6d2577d4df637be3a8fe1e..aeb2bfc5e12c21fe807a20e93509fd93b61e54d2 100644 (file)
@@ -1,6 +1,7 @@
 CHARMC=../../../bin/charmc $(OPTS)
 
-OBJS=memoryAccess.o commbench.o overhead.o timer.o proc.o smputil.o pingpong.o ctxt.o
+OBJS=memoryAccess.o commbench.o overhead.o timer.o proc.o smputil.o pingpong.o \
+    broadcast.o reduction.o ctxt.o
 
 all: pgm
 
@@ -31,6 +32,12 @@ smputil.o: smputil.c
 pingpong.o: pingpong.c
        $(CHARMC) pingpong.c
 
+broadcast.o: broadcast.c
+       $(CHARMC) broadcast.c
+
+reduction.o: reduction.c
+       $(CHARMC) reduction.c
+
 ctxt.o: ctxt.c
        $(CHARMC) ctxt.c
 
diff --git a/tests/converse/commbench/broadcast.c b/tests/converse/commbench/broadcast.c
new file mode 100644 (file)
index 0000000..dc78221
--- /dev/null
@@ -0,0 +1,259 @@
+/*****************************************************************************
+ *  Benchmark to measure performance of CmiSyncBroadcast
+ *  
+ *  Does two types of benchmarking-
+ *
+ *  1. A flurry of Bcasts followed by a reduction
+ *
+ *  2. Singleton broadcast followed by reduction (clocks synchronized
+ *                                                across processors)
+ *
+ *  Author- Nikhil Jain
+ *  Date- Dec/26/2011
+ *
+ *****************************************************************************/
+
+#include "converse.h"
+#include "commbench.h"
+
+typedef double* pdouble;
+
+CpvStaticDeclare(int, numiter);
+CpvStaticDeclare(int, nextidx);
+CpvStaticDeclare(int, bcast_handler);
+CpvStaticDeclare(int, bcast_reply);
+CpvStaticDeclare(int, bcast_central);
+CpvStaticDeclare(int, reduction_handler);
+CpvStaticDeclare(int, sync_starter);
+CpvStaticDeclare(int, sync_reply);
+CpvStaticDeclare(double, starttime);
+CpvStaticDeclare(double, lasttime);
+CpvStaticDeclare(pdouble, timediff);
+CpvStaticDeclare(int, currentPe);
+
+static struct testdata {
+  int size;
+  int numiter;
+  double time;
+} sizes[] = {
+  {4,       1024,      0.0},
+  {16,      1024,      0.0},
+  {64,      1024,      0.0},
+  {256,     1024,      0.0},
+  {1024,    1024,      0.0},
+  {4096,    1024,      0.0},
+  {16384,   1024,      0.0},
+  {65536,   1024,      0.0},
+  {262144,  1024,      0.0},
+  {1048576, 1024,      0.0},
+  {-1,      -1,        0.0},
+};
+
+typedef struct _timemsg {
+      char head[CmiMsgHeaderSizeBytes];
+      double time;
+      int srcpe;
+} *ptimemsg;
+
+typedef struct _timemsg timemsg;
+
+static char *sync_outstr =
+"[broadcast] (%s) %le seconds per %d bytes\n"
+;
+
+static void * reduceMessage(int *size, void *data, void **remote, int count) 
+{
+  return data;
+}
+
+static void print_results(char *func)
+{
+  int i=0;
+
+  while(sizes[i].size != (-1)) {
+    CmiPrintf(sync_outstr, func, sizes[i].time/sizes[i].numiter, sizes[i].size);
+    i++;
+  }
+}
+
+static void bcast_handler(void *msg)
+{
+  int idx = CpvAccess(nextidx);
+  void *red_msg;
+
+  CpvAccess(numiter)++;
+  if(CpvAccess(numiter)<sizes[idx].numiter) {
+    if(CmiMyPe() == 0) {
+      CmiSyncBroadcastAll(CmiMsgHeaderSizeBytes+sizes[idx].size, msg);
+      CmiFree(msg);
+    }
+  } else {
+    red_msg = CmiAlloc(CmiMsgHeaderSizeBytes);
+    CmiSetHandler(red_msg, CpvAccess(reduction_handler));
+    CmiReduce(red_msg, CmiMsgHeaderSizeBytes, reduceMessage);
+    if(CmiMyPe() != 0) {
+      CpvAccess(nextidx) = idx + 1;
+      CpvAccess(numiter) = 0;
+    }
+  }
+}
+
+static void reduction_handler(void *msg) 
+{
+  int i=0;
+  int idx = CpvAccess(nextidx);
+  EmptyMsg emsg;
+
+  sizes[idx].time = CmiWallTimer() - CpvAccess(starttime);
+  CmiFree(msg);
+  CpvAccess(numiter) = 0;
+  idx++;
+  if(sizes[idx].size == (-1)) {
+    print_results("Consecutive CmiSyncBroadcastAll");
+    CpvAccess(nextidx) = 0;
+    CpvAccess(numiter) = 0;
+    while(sizes[i].size != (-1)) {
+      sizes[i].time = 0;
+      i++;
+    }
+    CmiSetHandler(&emsg, CpvAccess(sync_reply));
+    CpvAccess(lasttime) = CmiWallTimer(); 
+    CmiSyncSend(CpvAccess(currentPe), sizeof(EmptyMsg), &emsg);
+    return;
+  } else {
+    CpvAccess(nextidx) = idx;
+    msg = CmiAlloc(CmiMsgHeaderSizeBytes+sizes[idx].size);
+    CmiSetHandler(msg, CpvAccess(bcast_handler));
+    CpvAccess(starttime) = CmiWallTimer();
+    CmiSyncBroadcastAll(CmiMsgHeaderSizeBytes+sizes[idx].size, msg);
+    CmiFree(msg);
+  }
+}
+   
+static void sync_starter(void *msg) 
+{
+  EmptyMsg emsg;    
+  ptimemsg tmsg = (ptimemsg)msg;
+
+  double midTime = (CmiWallTimer() + CpvAccess(lasttime))/2;
+  CpvAccess(timediff)[CpvAccess(currentPe)] = midTime - tmsg->time;
+  CmiFree(msg);
+
+  CpvAccess(currentPe)++;
+  if(CpvAccess(currentPe) < CmiNumPes()) {
+    CmiSetHandler(&emsg, CpvAccess(sync_reply));
+    CpvAccess(lasttime) = CmiWallTimer(); 
+    CmiSyncSend(CpvAccess(currentPe), sizeof(EmptyMsg), &emsg);
+  } else {
+    msg = CmiAlloc(CmiMsgHeaderSizeBytes+sizes[0].size);
+    CmiSetHandler(msg, CpvAccess(bcast_reply));
+    CpvAccess(currentPe) = 0;
+    CpvAccess(starttime) = CmiWallTimer();
+    CmiSyncBroadcastAll(CmiMsgHeaderSizeBytes+sizes[0].size, msg);
+    CmiFree(msg);
+  }
+}
+
+static void sync_reply(void *msg) 
+{
+  ptimemsg tmsg = (ptimemsg)CmiAlloc(sizeof(timemsg));
+  tmsg->time = CmiWallTimer();
+
+  CmiFree(msg);
+  CmiSetHandler(tmsg, CpvAccess(sync_starter));
+  CmiSyncSend(0, sizeof(timemsg), tmsg);
+  CmiFree(tmsg);
+}
+static void bcast_reply(void *msg)
+{
+  ptimemsg tmsg = (ptimemsg)CmiAlloc(sizeof(timemsg));
+  tmsg->time = CmiWallTimer();
+  tmsg->srcpe = CmiMyPe();
+  CmiFree(msg);
+  CmiSetHandler(tmsg, CpvAccess(bcast_central));
+  CmiSyncSend(0, sizeof(timemsg), tmsg);
+  CmiFree(tmsg);
+}
+
+static void bcast_central(void *msg)
+{
+  EmptyMsg emsg;
+  ptimemsg tmsg = (ptimemsg)msg;
+  if(CpvAccess(currentPe) == 0) {
+    CpvAccess(lasttime) = tmsg->time - CpvAccess(starttime) + 
+                          CpvAccess(timediff)[tmsg->srcpe];
+  } else if((tmsg->time - CpvAccess(starttime) + 
+    CpvAccess(timediff)[tmsg->srcpe]) > CpvAccess(lasttime)) {
+    CpvAccess(lasttime) = tmsg->time - CpvAccess(starttime) +
+                          CpvAccess(timediff)[tmsg->srcpe];
+  }
+  CmiFree(msg);
+  CpvAccess(currentPe)++;
+  if(CpvAccess(currentPe) == CmiNumPes()) {
+    sizes[CpvAccess(nextidx)].time += CpvAccess(lasttime);
+    CpvAccess(numiter)++;
+    if(CpvAccess(numiter)<sizes[CpvAccess(nextidx)].numiter) {
+      msg = CmiAlloc(CmiMsgHeaderSizeBytes+sizes[CpvAccess(nextidx)].size);
+      CpvAccess(currentPe) = 0;
+      CmiSetHandler(msg, CpvAccess(bcast_reply));
+      CpvAccess(starttime) = CmiWallTimer();
+      CmiSyncBroadcastAll(CmiMsgHeaderSizeBytes+sizes[CpvAccess(nextidx)].size, msg);
+      CmiFree(msg);
+    } else {
+      CpvAccess(numiter) = 0;
+      CpvAccess(nextidx)++;
+      if(sizes[CpvAccess(nextidx)].size == (-1)) {
+        print_results("CmiSyncBroadcastAll");
+        CmiSetHandler(&emsg, CpvAccess(ack_handler));
+        CmiSyncSend(0, sizeof(EmptyMsg), &emsg);
+        return;
+      } else {
+        msg = CmiAlloc(CmiMsgHeaderSizeBytes+sizes[CpvAccess(nextidx)].size);
+        CpvAccess(currentPe) = 0;
+        CmiSetHandler(msg, CpvAccess(bcast_reply));
+        CpvAccess(starttime) = CmiWallTimer();
+        CmiSyncBroadcastAll(CmiMsgHeaderSizeBytes+sizes[CpvAccess(nextidx)].size, 
+                            msg);
+        CmiFree(msg);
+      }
+    }
+  }
+}
+
+void broadcast_init(void)
+{
+  void *msg;
+
+  msg = CmiAlloc(CmiMsgHeaderSizeBytes+sizes[0].size);
+  CmiSetHandler(msg, CpvAccess(bcast_handler));
+  CpvAccess(starttime) = CmiWallTimer();
+  CmiSyncBroadcastAll(CmiMsgHeaderSizeBytes+sizes[0].size, msg);
+  CmiFree(msg);
+}
+
+void broadcast_moduleinit(void)
+{
+  CpvInitialize(int, numiter);
+  CpvInitialize(int, nextidx);
+  CpvInitialize(double, starttime);
+  CpvInitialize(double, lasttime);
+  CpvInitialize(pdouble, timediff); 
+  CpvInitialize(int, currentPe);
+  CpvInitialize(int, bcast_handler);
+  CpvInitialize(int, bcast_reply);
+  CpvInitialize(int, bcast_central);
+  CpvInitialize(int, reduction_handler);
+  CpvInitialize(int, sync_starter);
+  CpvInitialize(int, sync_reply);
+  CpvAccess(numiter) = 0;
+  CpvAccess(nextidx) = 0;
+  CpvAccess(currentPe) = 0;
+  CpvAccess(timediff) = (pdouble)malloc(CmiNumPes()*sizeof(double));
+  CpvAccess(bcast_handler) = CmiRegisterHandler((CmiHandler)bcast_handler);
+  CpvAccess(bcast_reply) = CmiRegisterHandler((CmiHandler)bcast_reply);
+  CpvAccess(bcast_central) = CmiRegisterHandler((CmiHandler)bcast_central);
+  CpvAccess(reduction_handler) = CmiRegisterHandler((CmiHandler)reduction_handler);
+  CpvAccess(sync_starter) = CmiRegisterHandler((CmiHandler)sync_starter);
+  CpvAccess(sync_reply) = CmiRegisterHandler((CmiHandler)sync_reply);
+}
index e4b0e53502971673a4d3fb614fc4df4ed9757063..f39e44a30fe2c8d9ff095103bd8537e85cb55356 100644 (file)
@@ -26,6 +26,8 @@ extern void timer_init(void);
 extern void proc_init(void);
 extern void smputil_init(void);
 extern void pingpong_init(void);
+extern void broadcast_init(void);
+extern void reduction_init(void);
 extern void ctxt_init(void);
 
 extern void memoryAccess_moduleinit(void);
@@ -34,6 +36,8 @@ extern void timer_moduleinit(void);
 extern void proc_moduleinit(void);
 extern void smputil_moduleinit(void);
 extern void pingpong_moduleinit(void);
+extern void broadcast_moduleinit(void);
+extern void reduction_moduleinit(void);
 extern void ctxt_moduleinit(void);
 
 struct testinfo
@@ -48,6 +52,8 @@ struct testinfo
   { "proc",      proc_init,      proc_moduleinit },
   { "smputil",   smputil_init,   smputil_moduleinit },
   { "pingpong",  pingpong_init,  pingpong_moduleinit },
+  { "broadcast", broadcast_init, broadcast_moduleinit },
+  { "reduction", reduction_init, reduction_moduleinit },
   { "ctxt",      ctxt_init,      ctxt_moduleinit },
   { 0,0,0 },
 };
index 078973e7d0e776435136a432a75d2c42bf1975a4..2c2a299abcc135581fbc4cbb6ce23b4d3c8da5a7 100755 (executable)
@@ -1,3 +1,19 @@
+/***************************************************************************
+ *
+ *  Benchmark to measure performnce of CmiAlloc/CmiFree and traversals
+ *  of associated memory
+ *
+ *  Two types of benchmarking has been done-
+ *
+ *  1. A flurry of operations of same type and on same size
+ *
+ *  2. A random but commuatatively organized  mix of operations on a range
+ *     of data size.
+ *
+ *  Author- Nikhil Jain
+ *
+ ***************************************************************************/
+
 #include <converse.h>
 #include "commbench.h"
 
diff --git a/tests/converse/commbench/reduction.c b/tests/converse/commbench/reduction.c
new file mode 100644 (file)
index 0000000..061be80
--- /dev/null
@@ -0,0 +1,234 @@
+/*****************************************************************************
+ *
+ *  Benchmarks to measure performance of CmiReduce
+ *
+ *  Clocks are synchronized first up, followed by singleton CmiReduce
+ *  after which the performance is measured by message collection are
+ *  a central point.
+ *
+ *
+ *  Author- Nikhil Jain
+ *
+ *****************************************************************************/
+
+
+#include "converse.h"
+#include "commbench.h"
+
+typedef double* pdouble;
+
+CpvStaticDeclare(int, numiter);
+CpvStaticDeclare(int, nextidx);
+CpvStaticDeclare(int, reduction_starter);
+CpvStaticDeclare(int, reduction_handler);
+CpvStaticDeclare(int, reduction_central);
+CpvStaticDeclare(int, sync_starter);
+CpvStaticDeclare(int, sync_reply);
+CpvStaticDeclare(int, flip);
+CpvStaticDeclare(double, starttime);
+CpvStaticDeclare(double, endtime);
+CpvStaticDeclare(double, lasttime);
+CpvStaticDeclare(pdouble, timediff);
+CpvStaticDeclare(int, currentPe);
+
+//change it if adding values to sizes
+#define MAXSIZE 1048576
+
+static struct testdata {
+  int size;
+  int numiter;
+  double time;
+} sizes[] = {
+  {4,       1024,      0.0},
+  {16,      1024,      0.0},
+  {64,      1024,      0.0},
+  {256,     1024,      0.0},
+  {1024,    1024,      0.0},
+  {4096,    1024,      0.0},
+  {16384,   1024,      0.0},
+  {65536,   1024,      0.0},
+  {262144,  1024,      0.0},
+  {1048576, 1024,      0.0},
+  {-1,      -1,        0.0},
+};
+
+typedef struct _varmsg {
+    char head[CmiMsgHeaderSizeBytes];
+    char contribution[MAXSIZE];
+} *varmsg;
+
+typedef struct _timemsg {
+      char head[CmiMsgHeaderSizeBytes];
+      double time;
+      int srcpe;
+} *ptimemsg;
+
+typedef struct _timemsg timemsg;
+
+
+
+static char *sync_outstr =
+"[broadcast] (%s) %le seconds per %d bytes\n"
+;
+
+static void * reduceMessage(int *size, void *data, void **remote, int count) 
+{
+  return data;
+}
+
+static void print_results(char *func)
+{
+  int i=0;
+
+  while(sizes[i].size != (-1)) {
+    CmiPrintf(sync_outstr, func, sizes[i].time/sizes[i].numiter, sizes[i].size);
+    i++;
+  }
+}
+
+static void reduction_starter(void *msg)
+{
+  int idx = CpvAccess(nextidx);
+  varmsg red_msg;
+  ptimemsg tmsg;
+  CmiFree(msg);
+
+  if(CpvAccess(flip)) {
+    tmsg = (ptimemsg)CmiAlloc(sizeof(timemsg));
+    tmsg->time = CpvAccess(starttime);;
+    tmsg->srcpe = CmiMyPe();
+    CmiSetHandler(tmsg, CpvAccess(reduction_central));
+    CmiSyncSend(0, sizeof(timemsg), tmsg);
+    CmiFree(tmsg);
+    CpvAccess(flip) = 0;
+  } else {
+    red_msg = (varmsg)CmiAlloc(sizeof(struct _varmsg));
+    CmiSetHandler(red_msg, CpvAccess(reduction_handler));
+    CpvAccess(starttime) = CmiWallTimer();
+    CmiReduce(red_msg, CmiMsgHeaderSizeBytes+sizes[idx].size, reduceMessage);
+    CpvAccess(flip) = 1;
+    if(CmiMyPe() != 0) {
+      CpvAccess(numiter)++;
+      if(CpvAccess(numiter) == sizes[idx].numiter) {
+        CpvAccess(nextidx) = idx + 1;
+        CpvAccess(numiter) = 0;
+      }
+    }
+  }
+}
+
+static void reduction_handler(void *msg) 
+{
+  CpvAccess(endtime) = CmiWallTimer();
+  EmptyMsg emsg;
+
+  CmiFree(msg);
+  CmiSetHandler(&emsg, CpvAccess(reduction_starter));
+  CmiSyncBroadcastAll(sizeof(EmptyMsg), &emsg);
+}
+
+static void reduction_central(void *msg)
+{
+  EmptyMsg emsg;
+  ptimemsg tmsg = (ptimemsg)msg;
+  if(CpvAccess(currentPe) == 0) {
+    CpvAccess(lasttime) = CpvAccess(endtime) - tmsg->time -
+                          CpvAccess(timediff)[tmsg->srcpe];
+  } else if((CpvAccess(endtime) - tmsg->time - 
+    CpvAccess(timediff)[tmsg->srcpe]) > CpvAccess(lasttime)) {
+    CpvAccess(lasttime) = CpvAccess(endtime) - tmsg->time -
+                          CpvAccess(timediff)[tmsg->srcpe];
+  }
+  CmiFree(msg);
+  CpvAccess(currentPe)++;
+  if(CpvAccess(currentPe) == CmiNumPes()) {
+    sizes[CpvAccess(nextidx)].time += CpvAccess(lasttime);
+    CpvAccess(numiter)++;
+    if(CpvAccess(numiter)<sizes[CpvAccess(nextidx)].numiter) {
+      CpvAccess(currentPe) = 0;
+      CmiSetHandler(&emsg, CpvAccess(reduction_starter));
+      CmiSyncBroadcastAll(sizeof(EmptyMsg), &emsg);
+    } else {
+      CpvAccess(numiter) = 0;
+      CpvAccess(nextidx)++;
+      if(sizes[CpvAccess(nextidx)].size == (-1)) {
+        print_results("CmiReduce");
+        CmiSetHandler(&emsg, CpvAccess(ack_handler));
+        CmiSyncSend(0, sizeof(EmptyMsg), &emsg);
+        return;
+      } else {
+        CpvAccess(currentPe) = 0;
+        CmiSetHandler(&emsg, CpvAccess(reduction_starter));
+        CmiSyncBroadcastAll(sizeof(EmptyMsg), &emsg);
+      }
+    }
+  }
+}
+
+static void sync_starter(void *msg) 
+{
+  EmptyMsg emsg;    
+  ptimemsg tmsg = (ptimemsg)msg;
+
+  double midTime = (CmiWallTimer() + CpvAccess(lasttime))/2;
+  CpvAccess(timediff)[CpvAccess(currentPe)] = midTime - tmsg->time;
+  CmiFree(msg);
+
+  CpvAccess(currentPe)++;
+  if(CpvAccess(currentPe) < CmiNumPes()) {
+    CmiSetHandler(&emsg, CpvAccess(sync_reply));
+    CpvAccess(lasttime) = CmiWallTimer(); 
+    CmiSyncSend(CpvAccess(currentPe), sizeof(EmptyMsg), &emsg);
+  } else {
+    CmiSetHandler(&emsg, CpvAccess(reduction_starter));
+    CpvAccess(currentPe) = 0;
+    CmiSyncBroadcastAll(sizeof(EmptyMsg), &emsg);
+  }
+}
+
+static void sync_reply(void *msg) 
+{
+  ptimemsg tmsg = (ptimemsg)CmiAlloc(sizeof(timemsg));
+  tmsg->time = CmiWallTimer();
+
+  CmiFree(msg);
+  CmiSetHandler(tmsg, CpvAccess(sync_starter));
+  CmiSyncSend(0, sizeof(timemsg), tmsg);
+  CmiFree(tmsg);
+}
+
+void reduction_init(void)
+{
+  EmptyMsg emsg;
+
+  CmiSetHandler(&emsg, CpvAccess(sync_reply));
+  CpvAccess(lasttime) = CmiWallTimer();
+  CmiSyncSend(CpvAccess(currentPe),sizeof(EmptyMsg), &emsg);
+}
+
+void reduction_moduleinit(void)
+{
+  CpvInitialize(int, numiter);
+  CpvInitialize(int, nextidx);
+  CpvInitialize(int, flip);
+  CpvInitialize(int, currentPe);
+  CpvInitialize(double, starttime);
+  CpvInitialize(doube, lasttime);
+  CpvInitialize(doube, endtime);
+  CpvInitialize(pdoube, timediff);
+  CpvInitialize(int, sync_starter);
+  CpvInitialize(int, sync_reply);
+  CpvInitialize(int, reduction_starter);
+  CpvInitialize(int, reduction_handler);
+  CpvInitialize(int, reduction_central);
+  CpvAccess(numiter) = 0;
+  CpvAccess(nextidx) = 0;
+  CpvAccess(currentPe) = 0;
+  CpvAccess(flip) = 0;
+  CpvAccess(timediff) = (pdouble)malloc(CmiNumPes()*sizeof(double));
+  CpvAccess(reduction_starter) = CmiRegisterHandler((CmiHandler)reduction_starter);
+  CpvAccess(reduction_handler) = CmiRegisterHandler((CmiHandler)reduction_handler);
+  CpvAccess(reduction_central) = CmiRegisterHandler((CmiHandler)reduction_central);
+  CpvAccess(sync_starter) = CmiRegisterHandler((CmiHandler)sync_starter);
+  CpvAccess(sync_reply) = CmiRegisterHandler((CmiHandler)sync_reply);
+}