Example: Simple Charm++ matrix-matrix multiply
authorPhil Miller <mille121@illinois.edu>
Tue, 8 May 2012 19:01:17 +0000 (14:01 -0500)
committerPhil Miller <mille121@illinois.edu>
Tue, 8 May 2012 19:21:19 +0000 (14:21 -0500)
examples/charm++/matmul/Makefile [new file with mode: 0644]
examples/charm++/matmul/matmul.C [new file with mode: 0644]
examples/charm++/matmul/matmul.ci [new file with mode: 0644]

diff --git a/examples/charm++/matmul/Makefile b/examples/charm++/matmul/Makefile
new file mode 100644 (file)
index 0000000..5ed5c70
--- /dev/null
@@ -0,0 +1,16 @@
+CHARMC=../../../bin/charmc
+
+CXX=$(CHARMC)
+CXXFLAGS=$(OPTS)
+
+BINARY=matmul
+
+$(BINARY): matmul.o
+       $(CHARMC) -o $@ $< $(OPTS) -lblas
+
+matmul.o: matmul.C matmul.decl.h
+matmul.decl.h: matmul.ci
+       $(CHARMC) $<
+
+clean:
+       rm *.o *.decl.h *.def.h $(BINARY)
diff --git a/examples/charm++/matmul/matmul.C b/examples/charm++/matmul/matmul.C
new file mode 100644 (file)
index 0000000..fb3067a
--- /dev/null
@@ -0,0 +1,58 @@
+#include <cblas.h>
+#include "matmul.decl.h"
+
+CProxy_Main mainProxy;
+
+class Main : public CBase_Main {
+  double startTime;
+  unsigned int blockSize, numBlocks;
+  CProxy_Block a, b, c;
+public:
+  Main(CkArgMsg* m) {
+    if (m->argc > 2) {
+      blockSize = atoi(m->argv[1]);
+      numBlocks = atoi(m->argv[2]);
+    } else {
+      CkAbort("Usage: matmul blockSize numBlocks");
+    }
+
+    mainProxy = thisProxy;
+
+    a = CProxy_Block::ckNew(blockSize, numBlocks, numBlocks, numBlocks);
+    b = CProxy_Block::ckNew(blockSize, numBlocks, numBlocks, numBlocks);
+    c = CProxy_Block::ckNew(blockSize, numBlocks, numBlocks, numBlocks);
+
+    startTime = CkWallTimer();
+
+    a.pdgemmSendInput(c, true);
+    b.pdgemmSendInput(c, false);
+    c.pdgemmRun(1.0, 0.0, CkCallback(CkReductionTarget(Main, done), thisProxy));
+  }
+
+  void done() {
+    double endTime = CkWallTimer();
+    CkPrintf("Matrix multiply of %u blocks with %u elements each (%u^2) finished in %f seconds\n",
+             numBlocks, blockSize, numBlocks*blockSize, endTime - startTime);
+    CkExit();
+  }
+};
+
+class Block : public CBase_Block {
+  unsigned int blockSize, numBlocks, block;
+  double* data;
+  Block_SDAG_CODE
+  public:
+  Block(unsigned int blockSize_, unsigned int numBlocks_)
+    : blockSize(blockSize_), numBlocks(numBlocks_)
+  {
+    __sdag_init();
+    unsigned int elems = blockSize * blockSize;
+    data = new double[elems];
+    for (int i = 0; i < elems; ++i)
+      data[i] = drand48();
+  }
+
+  Block(CkMigrateMessage*) {}
+};
+
+#include "matmul.def.h"
diff --git a/examples/charm++/matmul/matmul.ci b/examples/charm++/matmul/matmul.ci
new file mode 100644 (file)
index 0000000..054332e
--- /dev/null
@@ -0,0 +1,51 @@
+mainmodule matmul {
+  readonly CProxy_Main mainProxy;
+  mainchare Main {
+    entry Main(CkArgMsg *m);
+    entry [reductiontarget] void done();
+  };
+
+  array [2D] Block {
+    entry Block(unsigned int blockSize, unsigned int numBlocks);
+    entry void pdgemmSendInput(CProxy_Block output, bool aOrB) {
+      atomic {
+        if (aOrB)
+          output[thisIndex].inputA(thisIndex.x, data, blockSize, true);
+        else
+          output[thisIndex].inputB(thisIndex.y, data, blockSize, true);
+      }
+    };
+
+    entry void pdgemmRun(double alpha, double beta, CkCallback done) {
+      forall [block] (0:numBlocks-1,1) {
+        when
+          inputA[block](int blockIdA, double blockA[blockSizeA*blockSizeA],
+                        unsigned int blockSizeA, bool fromSourceA),
+          inputB[block](int blockIdB, double blockB[blockSizeB*blockSizeB],
+                        unsigned int blockSizeB, bool fromSourceB) atomic {
+          CkAssert(blockSizeA == blockSizeB);
+          cblas_dgemm(CblasRowMajor, CblasNoTrans, CblasNoTrans,
+                      blockSize, blockSize, blockSize,
+                      alpha,
+                      blockA, blockSize, blockB, blockSize,
+                      beta, data, blockSize);
+          if (fromSourceA || ((blockIdA + numBlocks - 1) % numBlocks != thisIndex.x)) {
+            int destX = (thisIndex.x + 1) % numBlocks;
+            int destY = thisIndex.y;
+            thisProxy(destX, destY).inputA(blockIdA, blockA, blockSizeA);
+          }
+          if (fromSourceB || ((blockIdB + numBlocks - 1) % numBlocks != thisIndex.y)) {
+            int destX = thisIndex.x;
+            int destY = (thisIndex.y + 1) % numBlocks;
+            thisProxy(destX, destY).inputB(blockIdB, blockB, blockSizeB);
+          }
+        }
+      }
+      atomic {
+        contribute(done);
+      }
+    };
+    entry void inputA(int blockIdA, double blockA[blockSizeA*blockSizeA], unsigned int blockSizeA, bool fromSource = false);
+    entry void inputB(int blockIdB, double blockB[blockSizeB*blockSizeB], unsigned int blockSizeB, bool fromSource = false);
+  };
+};