Update NQueens program. This program can be a benchmark test for seed balancer
authorYanhua Yanhua <sun51@illinois.edu>
Wed, 28 Jul 2010 17:25:32 +0000 (12:25 -0500)
committerYanhua Yanhua <sun51@illinois.edu>
Wed, 28 Jul 2010 17:25:32 +0000 (12:25 -0500)
examples/charm++/NQueen/Makefile
examples/charm++/NQueen/main.C
examples/charm++/NQueen/main.ci
examples/charm++/NQueen/main.h
examples/charm++/NQueen/nqueen.C
examples/charm++/NQueen/nqueen.h
src/conv-ldb/cldb.prioritycentralized.c
src/conv-ldb/cldb.prioritycentralized.h
src/conv-ldb/cldb.rand.c
src/conv-ldb/priorityqueue.c
src/conv-ldb/topology.C

index eb87b0d570e6c8e8e12de1ac1db66dcafb7bb184..485e4c8c268929676ba009daab5c0c3e8a0c3435 100644 (file)
@@ -1,26 +1,22 @@
-CHARMDIR = /expand/home/jessie/PPL/git/charm/mpi-linux-x86_64
-#CHARMDIR = /Users/yanhuasun/PPL/charm/charm-6.1.2/multicore-darwin-x86_64
-CHARMC = $(CHARMDIR)/bin/charmc $(OPTS) 
+CHARMC = ../../../bin/charmc $(OPTS)
 default: all
-all: nqueen nqueen_prj nqueen_greedy nqueen_refine nqueen_spray nqueen_neighbor
+all: nqueen nqueen_prj nqueen_rand  nqueen_neighbor nqueen_neighbor.prj
 
 nqueen : main.o nqueen.o counter.o
        $(CHARMC) -language charm++ -o nqueen main.o nqueen.o counter.o
 
-nqueen_spray : main.o nqueen.o counter.o
-       $(CHARMC) -language charm++ -balance spray -o nqueen_spray main.o nqueen.o counter.o
+nqueen_rand : main.o nqueen.o counter.o
+       $(CHARMC) -language charm++ -balance rand -o nqueen_rand main.o nqueen.o counter.o
 
 nqueen_neighbor : main.o nqueen.o counter.o
        $(CHARMC) -language charm++ -balance neighbor -o nqueen_neighbor main.o nqueen.o counter.o
 
 nqueen_prj : main.o nqueen.o counter.o
-       $(CHARMC) -language charm++ -tracemode projections -o nqueen_prj main.o nqueen.o counter.o
+       $(CHARMC) -language charm++ -tracemode projections -balance rand -o nqueen_prj main.o nqueen.o counter.o
 
-nqueen_greedy : main.o nqueen.o counter.o
-       $(CHARMC) -language charm++ -balancer GreedyCommLB  -o nqueen_greedy main.o nqueen.o counter.o
+nqueen_neighbor.prj : main.o nqueen.o counter.o
+       $(CHARMC) -language charm++ -tracemode projections -balance neighbor -o nqueen_neighbor.prj main.o nqueen.o counter.o
 
-nqueen_refine : main.o nqueen.o counter.o
-       $(CHARMC) -language charm++ -balancer RefineLB  -o nqueen_refine main.o nqueen.o counter.o
 main.o : main.C main.h counter.decl.h counter.def.h  main.decl.h main.def.h
        $(CHARMC) -o main.o main.C
 
@@ -36,10 +32,13 @@ main.decl.h main.def.h : main.ci
 counter.decl.h counter.def.h : counter.ci
        $(CHARMC) counter.ci
 
+test:
+        ./charmrun +p128 ./nqueen 17 5 
+
 clean:
        rm -f main.decl.h main.def.h main.o
        rm -f counter.decl.h counter.def.h nqueen.o
-       rm -f nqueen nqueen_refine nqueen_greedy nqueen_prj nqueen_rand nqueen_spray nqueen_neighbor charmrun
+       rm -f nqueen nqueen_refine nqueen_greedy nqueen_prj nqueen_rand nqueen_spray nqueen_neighbor charmrun nqueen_neighbor.prj
        rm *.o *.def.h *.decl.h
        rm *.log *.sts *.projrc
 
index e37ea5d9122170c32868f13843d86bc5e9901eae..dc33171a03cdbf23a0aa2a821fd70566eb25a6ea 100644 (file)
@@ -5,7 +5,6 @@
  * Copyright by PPL
  * problem report sun51@uiuc.edu
 */
-
 #include "main.decl.h"
 #include "main.h"
 #include "nqueen.h"
@@ -27,17 +26,67 @@ Main::Main(CkArgMsg* msg)
 
     CkPrintf(" Usage: nqueen numQueen grainsize (%d, g=%d, np=%d)\n", numQueens, grainsize, CkNumPes());
 
-    CkVec<int> initStatus(numQueens);
-    for(int i=0; i<numQueens; i++)
-        initStatus[i] = -1;
-
     /* timer */
     mainhandle=thishandle;
-   
-    QueenState *qmsg = new QueenState;
-    qmsg->row = -1; 
-    CProxy_NQueen::ckNew(qmsg);
     
+    
+    int odd = numQueens&1;
+    int board_minus = numQueens - 1;
+    mask = (1<<numQueens) - 1;
+    int bitfield = 0;
+    int half = numQueens >> 1;
+    int numrows = 1;
+    int col;
+    int pos;
+    int neg;
+    int lsb;
+
+    bitfield  = (1 << half) -1;
+    for(;;)
+    {
+        if(bitfield == 0)
+            break;
+        lsb = -((signed)bitfield) & bitfield; 
+        QueenState *qmsg = new QueenState;
+        qmsg->aQueenBitRes= lsb;
+        qmsg->aQueenBitCol = 0|lsb;
+        qmsg->aQueenBitNegDiag = (0|lsb) >> 1;
+        qmsg->aQueenBitPosDiag = (0|lsb) << 1;
+        //qmsg->bitfield = mask & ~(qmsg->aQueenBitCol[numrows] | qmsg->aQueenBitNegDiag[numrows] | qmsg->aQueenBitPosDiag[numrows]);
+        qmsg->numrows = 1; 
+        //CkSetQueueing(qmsg, CK_QUEUEING_ILIFO);
+        CProxy_NQueen::ckNew(qmsg);
+        bitfield &= ~lsb;
+    }
+
+    if(odd == 1)
+    {
+        bitfield = 1 << (numQueens >> 1);
+        numrows = 1; /* prob. already 0 */
+        /* The first row just has one queen (in the middle column).*/
+        col = bitfield;
+        /* Now do the next row.  Only set bits in half of it, because we'll
+         * * * *                flip the results over the "Y-axis".  */
+        neg = (bitfield >> 1);
+        pos = (bitfield << 1);
+        bitfield = (bitfield - 1) >> 1;
+        for(;;)
+        {
+            if(bitfield == 0)
+                break;
+            lsb = -((signed)bitfield) & bitfield; 
+
+            QueenState *qmsg = new QueenState;
+            qmsg->aQueenBitRes = lsb;
+            qmsg->aQueenBitCol = col|lsb;
+            qmsg->aQueenBitNegDiag = (neg|lsb)>>1;
+            qmsg->aQueenBitPosDiag = (pos|lsb)<<1;
+            qmsg->numrows = 2; 
+            //CkSetQueueing(qmsg, CK_QUEUEING_ILIFO);
+            CProxy_NQueen::ckNew(qmsg);
+            bitfield &= ~lsb;
+        }
+    }
     starttimer = CkTimer();
     counterGroup = counterInit();
     CkStartQD(CkIndex_Main::Quiescence1((DUMMYMSG *)0), &mainhandle);
@@ -50,7 +99,7 @@ void Main::Quiescence1(DUMMYMSG *msg)
     int numSolutions = CProxy_counter(counterGroup).ckLocalBranch()->getTotalCount();
     double endtimer = CkTimer();
     CkPrintf("There are %d Solutions to %d queens. Time=%f\n",
-        numSolutions, numQueens, endtimer-starttimer);
+        2*numSolutions, numQueens, endtimer-starttimer);
     CkExit();
 }
 
index 78e32029d4565be2b896eb5c0439e63e8f837f24..ae3621c8b1c0843fd2b830f69fe693e2c73bfa56 100644 (file)
@@ -4,7 +4,7 @@ mainmodule main {
     readonly int numQueens;
     readonly int grainsize;
     readonly CkGroupID counterGroup;
-    
+    readonly int mask;    
 
     message QueenState; 
     message DUMMYMSG;
@@ -17,6 +17,7 @@ mainmodule main {
 
     chare NQueen {
         entry NQueen(QueenState *);
+        entry void sequentialSolve( QueenState *msg);
     };
 
 };
index 13a56f87c7280c8b97f6cd708278d555c8b7b11d..656a60ef16a48a0fbb26201b822ff58ce42ce980 100644 (file)
@@ -1,6 +1,6 @@
 #ifndef _MAIN_H_
 #define _MAIN_H_
-
+/* readonly */ int mask;
 /* readonly */ int numQueens; 
 /* readonly */ int grainsize;
 /* readonly */ CkGroupID counterGroup;
index b3dc39bbea24f387fc1a13827db632218bad46cc..6bcf3d69df166ba40c4571208c7bc5deee33a8ef 100644 (file)
@@ -11,95 +11,120 @@ NQueen::NQueen(CkMigrateMessage *msg) {}
 
 NQueen::NQueen(QueenState *m)
 {
-    current_row = m->row;
+    int lsb;
+    int n = m->numrows;
+    int bitfield;// = m->bitfield;
+    int mask; 
+    int current_row = m->numrows+1;
     QueenState *new_msg;
+    
+    mask = (1 << numQueens ) - 1;
+    bitfield = mask & ~(m->aQueenBitCol | m->aQueenBitNegDiag | m->aQueenBitPosDiag);
+
     /*for all possible places in next row */
-    if(current_row == numQueens-1)
+    if(n == numQueens-1)
     {
-        CProxy_counter(counterGroup).ckLocalBranch()->increment();
-        //printSolution(m->queenInWhichColum);
-        return;
+        if(bitfield!=0){
+            CProxy_counter(counterGroup).ckLocalBranch()->increment();
+        }
+            return;
     }
-
-    bool conflict = false;
-    for(int i_colm=0; i_colm< numQueens; i_colm++)
+    // the next step is reasonable. fire a new child
+    for(;;)
     {
-       /* conflict check (row+1, i_colm)*/
-        conflict = false;
-        for(int j_row=0; j_row <= current_row; j_row++)
+    
+        if(bitfield == 0)
         {
-            //conflict for the same colum
-            if(m->queenInWhichColum[j_row] == i_colm || i_colm - m->queenInWhichColum[j_row] == (current_row+1 - j_row) || i_colm - m->queenInWhichColum[j_row] == -(current_row+1 - j_row) )
-            {
-                conflict = true;
-                break;
-            }
+            break;
         }
-        if(conflict){
-            continue;
+        lsb = -((signed)bitfield)&bitfield;
+        new_msg = new QueenState;
+        new_msg->aQueenBitRes = lsb;
+        new_msg->aQueenBitCol = m->aQueenBitCol | lsb;
+        new_msg->aQueenBitNegDiag = (m->aQueenBitNegDiag |lsb) >> 1;
+        new_msg->aQueenBitPosDiag = (m->aQueenBitPosDiag | lsb) << 1;
+        new_msg->numrows = current_row;
+        bitfield &= ~lsb;
+        if(current_row < grainsize)
+        {
+            //CkSetQueueing(new_msg, CK_QUEUEING_ILIFO);
+            CProxy_NQueen::ckNew(new_msg);
         }else
         {
-             // the next step is reasonable. fire a new child
-            if(numQueens - current_row > grainsize)
-            {
-                new_msg = new QueenState;
-                new_msg->row = current_row+1;
-                for(int __i=0; __i<current_row+1;__i++)
-                    new_msg->queenInWhichColum[__i] = m->queenInWhichColum[__i];
-                new_msg->queenInWhichColum[current_row+1] = i_colm; 
-                CProxy_NQueen::ckNew(new_msg);
-            }else
-            {
-                //sequential solve
-                sequentialSolve(m->queenInWhichColum, current_row+1, i_colm);
-            }
+            //sequential solve
+            thisProxy.sequentialSolve(new_msg);
         }
     }
+    //CkPrintf(" level = %d\n", current_row+1);
 }
 
-void NQueen::sequentialSolve(int queenInWhichColum[], int row, int colum)
+void NQueen::sequentialSolve( QueenState *m)
 {
 
-    queenInWhichColum[row] = colum;
-    if(row == numQueens -1){
-        CProxy_counter(counterGroup).ckLocalBranch()->increment();
-        //CkPrintf("[%d, %d]\n", row, colum);
-        //printSolution(queenInWhichColum);
-        return;
-    }   
-    bool conflict = false;
-    for(int i_colm=0; i_colm< numQueens; i_colm++)
+    int aQueenBitRes[MAX_BOARDSIZE];
+    int aQueenBitCol[MAX_BOARDSIZE];
+    int aQueenBitPosDiag[MAX_BOARDSIZE];
+    int aQueenBitNegDiag[MAX_BOARDSIZE];
+    int aStack[MAX_BOARDSIZE + 2]; /* we use a stack instead of recursion */
+    int n = m->numrows;
+    register int* pnStack;
+    register int numrows = m->numrows; /* numrows redundant - could use stack */
+    register unsigned int lsb; /* least significant bit */
+    register unsigned int bitfield; /* bits which are set mark possible positions for a queen */
+    int board_minus = numQueens - 1; /* board size - 1 */
+    int mask = (1 << numQueens) - 1; /* if board size is N, mask consists of N 1's */
+    aStack[0] = -1;
+    
+    pnStack = aStack + 1;
+
+    aQueenBitCol[n] = m->aQueenBitCol;
+    aQueenBitNegDiag[n] = m->aQueenBitNegDiag;
+    aQueenBitPosDiag[n] = m->aQueenBitPosDiag;
+    bitfield = mask & ~(m->aQueenBitCol | m->aQueenBitNegDiag | m->aQueenBitPosDiag);
+    /* this is the critical loop */
+    for (;;)
     {
-        /* conflict check (row+1, i_colm)*/        /*all existing queens */
-        conflict = false;
-        for(int j_row=0; j_row <= row; j_row++)
+        /* could use 
+         * lsb = bitfield ^ (bitfield & (bitfield -1)); 
+         * to get first (least sig) "1" bit, but that's slower. */
+        lsb = -((signed)bitfield) & bitfield; /* this assumes a 2's complement architecture */
+        if (0 == bitfield)
         {
-            //conflict for the same colum
-            if(queenInWhichColum[j_row] == i_colm || i_colm-queenInWhichColum[j_row] == (row+1 - j_row) || i_colm - queenInWhichColum[j_row] == -(row+1 - j_row) )
-            {
-                conflict = true;
-                break;
+            bitfield = *--pnStack; /* get prev. bitfield from stack */
+            if (pnStack == aStack) { /* if sentinel hit.... */
+                break ;
             }
-        }
-        // all colums are checked
-        if(conflict){
+            --numrows;
             continue;
-        }else
-        {
-            /* the last row , get  a solution. stop */
-             /* the next step is reasonable. fire a new child */
-                sequentialSolve(queenInWhichColum, row+1, i_colm);
         }
+        bitfield &= ~lsb; /* toggle off this bit so we don't try it again */
 
+        aQueenBitRes[numrows] = lsb; /* save the result */
+        if (numrows < board_minus) /* we still have more rows to process? */
+        {
+            n = numrows++;
+            aQueenBitCol[numrows] = aQueenBitCol[n] | lsb;
+            aQueenBitNegDiag[numrows] = (aQueenBitNegDiag[n] | lsb) >> 1;
+            aQueenBitPosDiag[numrows] = (aQueenBitPosDiag[n] | lsb) << 1;
+            *pnStack++ = bitfield;
+            /* We can't consider positions for the queen which are in the same
+             * * column, same positive diagonal, or same negative diagonal as another
+             * queen already on the board. */
+            bitfield = mask & ~(aQueenBitCol[numrows] | aQueenBitNegDiag[numrows] | aQueenBitPosDiag[numrows]);
+            continue;
+        }
+        else{
+            /* printtable(board_size, aQueenBitRes, g_numsolutions + 1);  */
+            CProxy_counter(counterGroup).ckLocalBranch()->increment();
+            bitfield = *--pnStack;
+            --numrows;
+            continue;
+        }
     }
+
 }
 void NQueen::printSolution(int queens[])
 {
-    for(int i=0; i<numQueens; i++)
-    {
-        CkPrintf("[%d, %d] ", i, queens[i]); 
-    }
-    CkPrintf("\n");
 }
 
 /*
index b757fcdf7352910f23a1d84e6fccbc88d4854bde..b2e2609a4c360aaa2c647a3ec8faea71ea0b8884 100644 (file)
@@ -2,33 +2,34 @@
 #define _NQueenState_H_
 
 #define MAX 20
+#define MAX_BOARDSIZE 30
+
+class QueenState: public CMessage_QueenState {
+
+public:
+    int aQueenBitRes;//[MAX_BOARDSIZE];
+    int aQueenBitCol;//[MAX_BOARDSIZE];
+    int aQueenBitPosDiag;//[MAX_BOARDSIZE];
+    int aQueenBitNegDiag;//[MAX_BOARDSIZE];
+    
+    //int bitfield;
+    int numrows;
+};
+
+class DUMMYMSG : public CMessage_DUMMYMSG {
+public:
+};
+
 class NQueen : public CBase_NQueen {
 
 public:
 
     NQueen(QueenState *);
     NQueen(CkMigrateMessage *msg);
+    void sequentialSolve( QueenState *msg);
     
 private:
-    //place another queen
-
     void printSolution(int[]);
-    void sequentialSolve(int[], int row, int colum);
 
-    //indicate which colum has a queen for each row
-    int current_row;
-
-};
-
-class QueenState: public CMessage_QueenState {
-
-public:
-    int row;
-    int queenInWhichColum[MAX]; 
 };
-
-class DUMMYMSG : public CMessage_DUMMYMSG {
-public:
-};
-
 #endif
index 5cb411c5bce256de7e2400dbd73644dadbaf331b..f65797dc96af471f97084e658b14c0277abec527 100644 (file)
@@ -30,8 +30,6 @@ CpvDeclare(int,  CldstorecharemsgHandlerIndex);
 CpvDeclare(int, CldHigherPriorityComesHandlerIndex);
 CpvDeclare(int, CldReadytoExecHandlerIndex);
 CpvDeclare(void*, CldRequestQueue);
-CpvDeclare(CldPriorityToken, headsentinel);
-CpvDeclare(priormsg, nexttoexec);
 
 void LoadNotifyFn(int l)
 {
@@ -71,7 +69,7 @@ inline void SendTasktoPe(int receiver, void *msg)
     CpvAccess(CldSlavesPriorityQueue)[receiver].load = new_load;
 
 #if YH_DEBUG
-    CmiPrintf(" sending this msg with prior %u  to processor %d len=%d \n", *prioptr, receiver, len);
+    CmiPrintf(" P%d====>P%d sending this msg with prior %u  to processor %d len=%d \n", CmiMyPe(), receiver, *prioptr, receiver, len);
 #endif
 }
 /* master processor , what to do when receive a new msg from network */
@@ -97,7 +95,7 @@ static void CldStoreCharemsg(void *msg)
     /* find the processor with the highest priority */
     int i=0; 
     unsigned int _max_ = *prioptr;
-    int index = 0;
+    int index = 1;
     double max_evaluation = 0;
     int old_load;
     //check which processor has underload and also the task priority is lower than the msg priority
@@ -167,18 +165,16 @@ static void CldAskLoadHandler(requestmsg *msg)
 #if YH_DEBUG
     CmiPrintf(" Step 6 :%f %d<======= getrequest  from processor queue current size=%d, notidle=%d, load=%d\n", CmiTimer(), receiver, heap_size( &CpvAccess(CldManagerLoadQueue)), msg->notidle, CpvAccess(CldSlavesPriorityQueue)[receiver].load);
 #endif
-    if(msg->notidle){
-        if(old_load == 0 || old_load == 1)
-        {
-            CpvAccess(CldSlavesPriorityQueue)[receiver].average_priority = _U_INT_MAX;
-            CpvAccess(CldSlavesPriorityQueue)[receiver].load = 0;
-        }else
-        {
-            new_load = old_load - 1;
-            CpvAccess(CldSlavesPriorityQueue)[receiver].load = new_load;
-            CpvAccess(CldSlavesPriorityQueue)[receiver].average_priority = old_average_prior/new_load * old_load - msg->priority/new_load;
-        }
-    } 
+    if(!msg->notidle || old_load == 0 || old_load == 1)
+    {
+        CpvAccess(CldSlavesPriorityQueue)[receiver].average_priority = _U_INT_MAX;
+        CpvAccess(CldSlavesPriorityQueue)[receiver].load = 0;
+    }else
+    {
+        new_load = old_load - 1;
+        CpvAccess(CldSlavesPriorityQueue)[receiver].load = new_load;
+        CpvAccess(CldSlavesPriorityQueue)[receiver].average_priority = old_average_prior/new_load * old_load - msg->priority/new_load;
+    }
    
     old_load = CpvAccess(CldSlavesPriorityQueue)[receiver].load;
     if(old_load < THRESHOLD_LOAD)
@@ -211,8 +207,6 @@ static void CldEndIdle(void *dummy)
 
 static void CldStillIdle(void *dummy, double curT)
 {
-
-
     if(CmiMyPe() == 0) 
     {
 #if YH_DEBUG
@@ -233,92 +227,42 @@ static void CldStillIdle(void *dummy, double curT)
     double now = curT;
     double lt = cldData->lastCheck;
    
-    
-    CldPriorityToken  head = CpvAccess(headsentinel);
-    CldPriorityToken exectoken;
-    unsigned int priority = _U_INT_MAX; 
-    exectoken = CpvAccess(headsentinel)->succ;
-    if(exectoken)
-    {
-        CldRestoreHandler(exectoken->msg); 
-        CmiHandleMessage(exectoken->msg);
-        head->succ= exectoken->succ;
-        CmiFree(exectoken);
-        cldData->load = cldData->load - 1;
-        msg.notidle = 1;
+    cldData->load  = 0;
+    msg.notidle = 0;
+    if ((lt!=-1 && now-lt< PERIOD*0.001) ) return;
 #if YH_DEBUG
-        CmiPrintf("Step 1000: processor %d finishes processing one msg =>Send request for task,  last check time %f, current time=%f, notidle11=%d, load=%d\n", CmiMyPe(), lt, now, msg.notidle, cldData->load);
+    CmiPrintf("Step 1000: processor %d task is already zero ", CmiMyPe());
 #endif
-    }else
-    {
-        cldData->load  = 0;
-        msg.notidle = 0;
-        if ((lt!=-1 && now-lt< PERIOD*0.001) ) return;
-#if YH_DEBUG
-        CmiPrintf("Step 1000: processor %d task is already zero ", CmiMyPe());
-#endif
-    }
 
-    if(cldData->load >= THRESHOLD_LOAD)
-    {
-#if YH_DEBUG
-        CmiPrintf(" worker processor %d finishes processing one msg =>Send request for task,  last check time %f, current time=%f, load=%d\n", CmiMyPe(), lt, now, cldData->load);
-#endif
-        return;
-    }
     cldData->lastCheck = now;
     msg.from_pe = CmiMyPe();
     CmiSetHandler(&msg, CpvAccess(CldAskLoadHandlerIndex));
 
     cldData->sent = 1;
-    /* fixme */
-    //CmiBecomeImmediate(&msg);
-    //msg.to_rank = CmiRankOf(0);
 #if YH_DEBUG
     CmiPrintf("Step 1000: processor %d task is already zero sentidle=%d", CmiMyPe(), (&msg)->notidle);
 #endif
     CmiSyncSend(0, sizeof(requestmsg), &msg);
 }
-/*
-void CldReadytoExec(CldPriorityToken readytoken)
+void CldReadytoExec(void *msg)
 {
-    CldPriorityToken  head = CpvAccess(headsentinel);
-    CldPriorityToken exectoken;
-    unsigned int priority = _U_INT_MAX; 
-    exectoken = CpvAccess(headsentinel)->succ;
-#if YH_DEBUG
-    CmiPrintf("Step 4: processor %d, task is ready to run with priority=%u, Timer=%f\n", CmiMyPe(), readytoken->priority, CmiTimer());
-#endif
 
-    if(exectoken)
-    {
-        priority = exectoken->priority;
-#if YH_DEBUG
-    CmiPrintf("Step 4-1: processor %d, task is ready to run with priority=%u, Timer=%f\n", CmiMyPe(), priority, CmiTimer());
-#endif
-        CldRestoreHandler(exectoken->msg); 
-        CmiHandleMessage(exectoken->msg);
-        head->succ= exectoken->succ;
-        CmiFree(exectoken);
-    }
-#if YH_DEBUG
-    CmiPrintf("Step 4-2: processor %d, task is done Timer=%f\n", CmiMyPe(), CmiTimer());
-#endif
+    CldProcInfo  cldData = CpvAccess(CldData);
+    CldRestoreHandler(msg);
+    CmiHandleMessage(msg);
+    cldData->load = cldData->load - 1;
+
+    requestmsg r_msg;
+
+    r_msg.notidle = 1;
+    r_msg.from_pe = CmiMyPe();
+    CmiSetHandler(&r_msg, CpvAccess(CldAskLoadHandlerIndex));
+    CmiSyncSend(0, sizeof(requestmsg), &r_msg);
+
 #if YH_DEBUG
-    CmiPrintf("Step 5: processor %d, send request to processor 0 to ask for load Timer=%f\n", CmiMyPe(), CmiTimer());
+    CmiPrintf(" Step final: message is handled on processor %d, task left=%d", CmiMyPe(), cldData->load);
 #endif
-    requestmsg updatemsg;
-
-    CldInfoFn ifn;
-    CldPackFn pfn;
-    int len, queueing, priobits; 
-    updatemsg.priority = priority;
-    updatemsg.from_pe = CmiMyPe();
-    CmiSetHandler(&updatemsg, CpvAccess(CldAskLoadHandlerIndex));
-    //CmiBecomeImmediate(&updatemsg);
-    CmiSyncSend(0, sizeof(requestmsg), &updatemsg);
 }
-*/
 void HigherPriorityWork(void *msg)
 {
     //wrap this msg with token and put it into token queue
@@ -327,39 +271,16 @@ void HigherPriorityWork(void *msg)
     CldPackFn pfn;
     int len, queueing, priobits; 
     unsigned int *prioptr;
-    CldPriorityToken priortoken;
     CldProcInfo  cldData = CpvAccess(CldData);
     ifn = (CldInfoFn)CmiHandlerToFunction(CmiGetInfo(msg));
     ifn(msg, &pfn, &len, &queueing, &priobits, &prioptr);
+    CldRestoreHandler(msg);
+    CldSwitchHandler(msg, CpvAccess(CldReadytoExecHandlerIndex));
+    CsdEnqueueGeneral(msg, queueing, priobits, prioptr);
+    cldData->load = cldData->load  + 1;
 
 #if YH_DEBUG
-        CmiPrintf(" Step 3:  processor %d, Task arrives and sort the task first, prior=%u Timer=%f\n", CmiMyPe(), *prioptr, CmiTimer());
-#endif
-    priortoken = (CldPriorityToken) CmiAlloc(sizeof(struct CldPriorityToken_s));
-    priortoken->msg = msg;
-    priortoken->priority = *prioptr; 
-    CldPriorityToken head = CpvAccess(headsentinel);
-    CldPriorityToken  tmptoken = head;//
-    CldPriorityToken  predtoken;
-    if(head->succ == NULL)
-    {
-        head->succ = priortoken;
-        priortoken->succ = NULL;
-        cldData->load = 1;
-    }else  //insert this token into queue accord to prior smaller ->higher
-    {
-        do{
-            predtoken = tmptoken;
-            tmptoken = tmptoken->succ;
-        }while(tmptoken != NULL && tmptoken->priority<= *prioptr);
-        priortoken->succ = tmptoken;
-        predtoken->succ = priortoken;
-        cldData->load = cldData->load  + 1;
-    }
-    //CmiSetHandler(priortoken, CpvAccess(CldReadytoExecHandlerIndex));
-    //CmiSyncSend(CmiMyPe(), sizeof(struct CldPriorityToken_s), priortoken); 
-#if YH_DEBUG
-        CmiPrintf(" Step 3-1:  processor %d, Task has been inserted and re-Queue prior=%u Timer=%f, enqueue load=%d\n", CmiMyPe(), *prioptr, CmiTimer(), cldData->load);
+    CmiPrintf(" Step 3:  processor %d, Task arrives and put it into charm++ queue, prior=%u Timer=%f\n", CmiMyPe(), *prioptr, CmiTimer());
 #endif
 }
 
@@ -390,6 +311,10 @@ void CldEnqueue(int pe, void *msg, int infofn)
               pfn(&msg);
               ifn(msg, &pfn, &len, &queueing, &priobits, &prioptr);
           }
+#if YH_DEBUG
+          CmiPrintf(" Step 1-1: Creation New msg on pe%d ==> p0  priority=%u Timer:%f (msg len=%d)\n", CmiMyPe(),  *prioptr, CmiTimer(), len);
+#endif
+
           CmiSyncSendAndFree(0, len, msg);
       }
   }else if((pe == CmiMyPe()) || (CmiNumPes() == 1) ) {
@@ -466,8 +391,7 @@ void  CldOtherInit()
       CpvAccess(CldSlavesPriorityQueue) = (CldSlavePriorInfo*)CmiAlloc(sizeof(CldSlavePriorInfo) * numpes);
       int i=0;
       for(i=0; i<numpes; i++){
-          CpvAccess(CldSlavesPriorityQueue)[i].priority_1 = _U_INT_MAX;
-          CpvAccess(CldSlavesPriorityQueue)[i].priority_2 = _U_INT_MAX;
+          CpvAccess(CldSlavesPriorityQueue)[i].average_priority = _U_INT_MAX;
           CpvAccess(CldSlavesPriorityQueue)[i].load = 0;
       }
   }
@@ -486,18 +410,11 @@ void CldModuleInit(char **argv)
   CpvInitialize(MsgHeap, CldManagerLoadQueue);
   CpvInitialize(CldSlavePriorInfo*, CldSlavesPriorityQueue);
   CpvInitialize(void*, CldRequestQueue);
-  CpvInitialize(priormsg, nexttoexec);
-  CpvInitialize(CldPriorityToken, headsentinel);
-
-
-  CpvAccess(headsentinel) = (CldPriorityToken) CmiAlloc(sizeof(struct CldPriorityToken_s));
-  CpvAccess(headsentinel)->succ = NULL;
-  CpvAccess(nexttoexec).priority = _U_INT_MAX;
 
   CpvAccess(CldstorecharemsgHandlerIndex) = CmiRegisterHandler(CldStoreCharemsg);
   CpvAccess(CldHigherPriorityComesHandlerIndex) = CmiRegisterHandler(HigherPriorityWork);
   CpvAccess(CldAskLoadHandlerIndex) = CmiRegisterHandler((CmiHandler)CldAskLoadHandler);
-  //CpvAccess(CldReadytoExecHandlerIndex) = CmiRegisterHandler((CmiHandler)CldReadytoExec);
+  CpvAccess(CldReadytoExecHandlerIndex) = CmiRegisterHandler((CmiHandler)CldReadytoExec);
   CpvAccess(CldRequestQueue) = (void *)CqsCreate();
   CldModuleGeneralInit(argv);
   
index 390c87841827981795fd0346c24014272a9a164d..db2a2c52df401e892ce863121ea7fef8d12e61dd 100644 (file)
@@ -53,25 +53,8 @@ typedef struct CldProcPriorInfo_s {
 typedef struct CldSlavePriorInfo_s {
     int pe;
     double average_priority;
-    int priority_1;
-    int priority_2;
+    //int priority_1;
+    //int priority_2;
     int load;
 } CldSlavePriorInfo;
 
-typedef struct CldPriorityToken_s {
-
-    char msg_header[CmiMsgHeaderSizeBytes];
-    unsigned int priority;
-    struct CldPriorityToken_s *pred;
-    struct CldPriorityToken_s *succ;
-    void* msg;
-} *CldPriorityToken;
-
-typedef struct CldPriorityTokenInfo_s {
-  int tokenhandleridx;
-  int load; /* number of items in doubly-linked circle besides sentinel */
-  CldPriorityToken head;
-  CldPriorityToken tail;
-} *CldPriorityTokenInfo;
-
-
index 9c3ff836501faa7cca7c20ae5d504897cb3d56b9..29f6298732da0babd28d251a0f8e2818f98c75bd 100644 (file)
@@ -89,6 +89,7 @@ void CldEnqueue(int pe, void *msg, int infofn)
   if (pe == CmiMyPe() && !CmiImmIsRunning()) {
     ifn(msg, &pfn, &len, &queueing, &priobits, &prioptr);
     /* CsdEnqueueGeneral is not thread or SIGIO safe */
+    //CmiPrintf("   myself processor %d ==> %d, length=%d Timer:%f , priori=%d \n", CmiMyPe(), pe, len, CmiWallTimer(), *prioptr);
     CsdEnqueueGeneral(msg, queueing, priobits, prioptr);
   } else {
     ifn(msg, &pfn, &len, &queueing, &priobits, &prioptr);
@@ -101,7 +102,7 @@ void CldEnqueue(int pe, void *msg, int infofn)
     if (pe==CLD_BROADCAST) { CmiSyncBroadcastAndFree(len, msg); }
     else if (pe==CLD_BROADCAST_ALL) { CmiSyncBroadcastAllAndFree(len, msg); }
     else {
-        CmiPrintf(" on processor %d Send msg to pe:%d, length=%d Timer:%f \n", CmiMyPe(), pe, len, CmiWallTimer());
+        //CmiPrintf("   processor %d ==> %d, length=%d Timer:%f , priori=%d \n", CmiMyPe(), pe, len, CmiWallTimer(), *prioptr);
         CmiSyncSendAndFree(pe, len, msg);
     }
   }
index 204ecdce8239584a762251280ed46a364f0f2920..1ec2643de39df0709db18865dde164da799dcb0f 100644 (file)
@@ -14,6 +14,7 @@ The implemementation is based on Heapsort as described in
 typedef struct priormsg_s {
     unsigned int priority;
     int     pe;
+    int     load;
     void* msg;
 } priormsg;
 
index 391f267c3bc1fcd1c11d9639e0cb55b142bf5ca3..c6544bdd3df353fa291895c3276aab4d8fbc8210 100644 (file)
@@ -144,8 +144,12 @@ public:
   virtual int max_neighbors() { return npes - 1; }
 
   virtual void neighbors(int mype, int* _n, int &nb){
-               CkPrintf("neighbors:Nothing in here..\n");
-       }
+      nb = 0;
+      for(int i=1; i<=ppn; i++)
+      {
+          _n[nb++] = (mype+i)%npes;
+      }
+  }
        
        int get_hop_count(int src,int dest){
                
@@ -168,11 +172,23 @@ typedef LBTopo_smp_n<1> LBTopo_smp_n_1;
 typedef LBTopo_smp_n<2> LBTopo_smp_n_2;
 typedef LBTopo_smp_n<3> LBTopo_smp_n_3;
 typedef LBTopo_smp_n<4> LBTopo_smp_n_4;
+typedef LBTopo_smp_n<5> LBTopo_smp_n_5;
+typedef LBTopo_smp_n<6> LBTopo_smp_n_6;
+typedef LBTopo_smp_n<7> LBTopo_smp_n_7;
+typedef LBTopo_smp_n<8> LBTopo_smp_n_8;
+typedef LBTopo_smp_n<9> LBTopo_smp_n_9;
+typedef LBTopo_smp_n<10> LBTopo_smp_n_10;
 
 LBTOPO_MACRO(LBTopo_smp_n_1)
 LBTOPO_MACRO(LBTopo_smp_n_2)
 LBTOPO_MACRO(LBTopo_smp_n_3)
 LBTOPO_MACRO(LBTopo_smp_n_4)
+LBTOPO_MACRO(LBTopo_smp_n_5)
+LBTOPO_MACRO(LBTopo_smp_n_6)
+LBTOPO_MACRO(LBTopo_smp_n_7)
+LBTOPO_MACRO(LBTopo_smp_n_8)
+LBTOPO_MACRO(LBTopo_smp_n_9)
+LBTOPO_MACRO(LBTopo_smp_n_10)
 
 
 // ring
@@ -681,6 +697,9 @@ typedef LBTopo_torus_nd<4> LBTopo_torus_nd_4;
 typedef LBTopo_torus_nd<5> LBTopo_torus_nd_5;
 typedef LBTopo_torus_nd<6> LBTopo_torus_nd_6;
 typedef LBTopo_torus_nd<7> LBTopo_torus_nd_7;
+typedef LBTopo_torus_nd<8> LBTopo_torus_nd_8;
+typedef LBTopo_torus_nd<9> LBTopo_torus_nd_9;
+typedef LBTopo_torus_nd<10> LBTopo_torus_nd_10;
 
 LBTOPO_MACRO(LBTopo_torus_nd_1)
 LBTOPO_MACRO(LBTopo_torus_nd_2)
@@ -689,7 +708,176 @@ LBTOPO_MACRO(LBTopo_torus_nd_4)
 LBTOPO_MACRO(LBTopo_torus_nd_5)
 LBTOPO_MACRO(LBTopo_torus_nd_6)
 LBTOPO_MACRO(LBTopo_torus_nd_7)
+LBTOPO_MACRO(LBTopo_torus_nd_8)
+LBTOPO_MACRO(LBTopo_torus_nd_9)
+LBTOPO_MACRO(LBTopo_torus_nd_10)
+
+//  TORUS ND  and SMP Awareness
+//  added by Yanhua Sun 
+
+template <int dimension>
+class LBTopo_torus_nd_smp: public LBTopology {
+private:
+  // inherited int npes;
+  int* Cardinality;
+  int  VirtualNodeCount;
+  int* TempCo;
+  int  ppn;
+  int  NumOfNodes;
+private:
+  int GetNeighborID(int ProcessorID, int number) {
+    CmiAssert(number>=0 && number<max_neighbors());
+    CmiAssert(ProcessorID>=0 && ProcessorID<npes);
+    int neighborId; 
+    int nodeId = CmiPhysicalNodeID(ProcessorID);
+    
+    get_node_coordinates(nodeId, TempCo);
+
+    int index = number/2;
+    int displacement = (number%2)? -1: 1;
+    do{
+      TempCo[index] = (TempCo[index] + displacement + Cardinality[index]) % Cardinality[index];
+      get_node_id(TempCo, &nodeId);
+    } while (nodeId >= NumOfNodes);
+    neighborId = CmiGetFirstPeOnPhysicalNode(nodeId);
+    return neighborId;
+  }
+public:
+  LBTopo_torus_nd_smp(int p): LBTopology(p) /*inherited :npes(p) */ {
+    int i;
+    CmiAssert(dimension>=1 && dimension<=32);
+    CmiAssert(p>=1);
+
+    int ppn = CmiNumPesOnPhysicalNode(0);
+    int NumOfNodes = CmiNumPhysicalNodes();
+
+    Cardinality = new int[dimension];
+    TempCo = new int[dimension];
+    double pp = NumOfNodes;
+    for(i=0;i<dimension;i++) {
+      Cardinality[i] = (int)ceil(pow(pp,1.0/(dimension-i))-1e-5);
+      pp = pp / Cardinality[i];
+    }
+    VirtualNodeCount = 1;
+    for(i=0;i<dimension;i++) {
+      VirtualNodeCount *= Cardinality[i];
+    }
+
+    CmiPrintf(" ppn=%d, NumOfNodes=%d\n", ppn, NumOfNodes);
+  }
+  ~LBTopo_torus_nd_smp() {
+    delete[] Cardinality;
+    delete[] TempCo;
+  }
+  virtual int max_neighbors() {
+    return dimension*2 + ppn - 1;
+  }
+  virtual void neighbors(int mype, int* _n, int &nb) {
+    nb = 0;
+    int *nodePeList; 
+    int numpes;
+    int rank = CmiPhysicalRank(mype);
+    int node = CmiPhysicalNodeID(mype);
+    int _ppn_ = CmiNumPesOnPhysicalNode(node);
+    CmiGetPesOnPhysicalNode(node, &nodePeList, &numpes); 
+    CmiPrintf(" 22222222222222ppn=%d, NumOfNodes=%d, rank=%d, node=%d, numpes=%d\n", _ppn_, NumOfNodes, rank, node, numpes);
+   
+    for(int i=0; i<numpes; i++)
+    {
+        int _pid = nodePeList[i];
+        if(_pid != mype)
+        {
+             _n[nb] = _pid;
+             nb++;
+        }
+    }
+
+    /* for inter-node communication */
+    if(mype == CmiGetFirstPeOnPhysicalNode(node))
+    {
+        for(int j=0; j<dimension*2; j++)
+        {
+            _n[nb] = (mype+1)%npes;//GetNeighborID(mype, j);
+            /* the first processors in other nodes */
+            if (_n[nb]!=mype && (nb==0 || _n[nb-1]!=_n[nb]) ) nb++;
+        }
+    }
+
+  CmiPrintf(" Yes my neighbor = %d ppn=%d, NumOfNodes=%d, rank=%d, node=%d, numpes=%d\n", nb, _ppn_, NumOfNodes, rank, node, numpes);
+  }
+  virtual int get_dimension() {
+    return dimension;
+  }
+  virtual bool get_node_coordinates(int node_id, int* node_coordinates) {
+    CmiAssert(node_id>=0 && node_id<VirtualNodeCount);
+    CmiAssert( node_coordinates != NULL );
+    for(int i=0;i<dimension;i++) {
+      node_coordinates[i] = node_id % Cardinality[i];
+      node_id = node_id / Cardinality[i];
+    }
+    return true;
+  }
+  virtual bool get_node_id(const int* node_coordinates, int* node_id) {
+    int i;
+    CmiAssert( node_coordinates != NULL );
+    CmiAssert( node_id != NULL );
+    for(i=dimension-1;i>=0;i--) 
+      CmiAssert( 0<=node_coordinates[i] && node_coordinates[i]<Cardinality[i]);
+    (*node_id) = 0;
+    for(i=dimension-1;i>=0;i--) {
+      (*node_id) = (*node_id)* Cardinality[i] + node_coordinates[i];
+    }
+    return true;
+  }
+  //Note: if abs(difference)*2 = cardinality, the difference is set to zero
+  virtual bool coordinate_difference(const int* my_coordinates, const int* target_coordinates, int* difference) { 
+    CmiAssert( my_coordinates != NULL);
+    CmiAssert( target_coordinates != NULL);
+    CmiAssert( difference != NULL);
+    for(int i=0;i<dimension;i++) {
+      difference[i] = target_coordinates[i] - my_coordinates[i];
+      if (abs(difference[i])*2 > Cardinality[i]) {
+        difference[i] += (difference[i]>0) ? -Cardinality[i] : Cardinality[i];
+      } else if (abs(difference[i])*2 == Cardinality[i]) {
+        difference[i] = 0;
+      }
+    }
+    return true;
+  }
+  //Note: if abs(difference)*2 = cardinality, the difference is set to zero
+  virtual bool coordinate_difference(int my_processor_id, int target_processor_id, int* difference) { 
+    CmiAssert( difference != NULL);
+    int my_coordinates[dimension];
+    int target_coordinates[dimension];
+    get_processor_coordinates(my_processor_id, my_coordinates);
+    get_processor_coordinates(target_processor_id, target_coordinates);
+    coordinate_difference(my_coordinates, target_coordinates, difference);
+    return true;
+  }
+};
+
 
+typedef LBTopo_torus_nd_smp<1> LBTopo_torus_nd_smp_1;
+typedef LBTopo_torus_nd_smp<2> LBTopo_torus_nd_smp_2;
+typedef LBTopo_torus_nd_smp<3> LBTopo_torus_nd_smp_3;
+typedef LBTopo_torus_nd_smp<4> LBTopo_torus_nd_smp_4;
+typedef LBTopo_torus_nd_smp<5> LBTopo_torus_nd_smp_5;
+typedef LBTopo_torus_nd_smp<6> LBTopo_torus_nd_smp_6;
+typedef LBTopo_torus_nd_smp<7> LBTopo_torus_nd_smp_7;
+typedef LBTopo_torus_nd_smp<8> LBTopo_torus_nd_smp_8;
+typedef LBTopo_torus_nd_smp<9> LBTopo_torus_nd_smp_9;
+typedef LBTopo_torus_nd_smp<10> LBTopo_torus_nd_smp_10;
+
+LBTOPO_MACRO(LBTopo_torus_nd_smp_1)
+LBTOPO_MACRO(LBTopo_torus_nd_smp_2)
+LBTOPO_MACRO(LBTopo_torus_nd_smp_3)
+LBTOPO_MACRO(LBTopo_torus_nd_smp_4)
+LBTOPO_MACRO(LBTopo_torus_nd_smp_5)
+LBTOPO_MACRO(LBTopo_torus_nd_smp_6)
+LBTOPO_MACRO(LBTopo_torus_nd_smp_7)
+LBTOPO_MACRO(LBTopo_torus_nd_smp_8)
+LBTOPO_MACRO(LBTopo_torus_nd_smp_9)
+LBTOPO_MACRO(LBTopo_torus_nd_smp_10)
 
 
 //Torus ND with unequal number of processors in each dimension
@@ -1002,6 +1190,19 @@ public:
     lbTopos.push_back(new LBTopoMap("torus_nd_5", createLBTopo_torus_nd_5));
     lbTopos.push_back(new LBTopoMap("torus_nd_6", createLBTopo_torus_nd_6));
     lbTopos.push_back(new LBTopoMap("torus_nd_7", createLBTopo_torus_nd_7));
+    lbTopos.push_back(new LBTopoMap("torus_nd_8", createLBTopo_torus_nd_8));
+    lbTopos.push_back(new LBTopoMap("torus_nd_9", createLBTopo_torus_nd_9));
+    lbTopos.push_back(new LBTopoMap("torus_nd_10", createLBTopo_torus_nd_10));
+    lbTopos.push_back(new LBTopoMap("torus_nd_smp_1", createLBTopo_torus_nd_smp_1));
+    lbTopos.push_back(new LBTopoMap("torus_nd_smp_2", createLBTopo_torus_nd_smp_2));
+    lbTopos.push_back(new LBTopoMap("torus_nd_smp_3", createLBTopo_torus_nd_smp_3));
+    lbTopos.push_back(new LBTopoMap("torus_nd_smp_4", createLBTopo_torus_nd_smp_4));
+    lbTopos.push_back(new LBTopoMap("torus_nd_smp_5", createLBTopo_torus_nd_smp_5));
+    lbTopos.push_back(new LBTopoMap("torus_nd_smp_6", createLBTopo_torus_nd_smp_6));
+    lbTopos.push_back(new LBTopoMap("torus_nd_smp_7", createLBTopo_torus_nd_smp_7));
+    lbTopos.push_back(new LBTopoMap("torus_nd_smp_8", createLBTopo_torus_nd_smp_8));
+    lbTopos.push_back(new LBTopoMap("torus_nd_smp_9", createLBTopo_torus_nd_smp_9));
+    lbTopos.push_back(new LBTopoMap("torus_nd_smp_10", createLBTopo_torus_nd_smp_10));
     lbTopos.push_back(new LBTopoMap("itorus_nd_1", createLBTopo_itorus_nd_1));
     lbTopos.push_back(new LBTopoMap("itorus_nd_2", createLBTopo_itorus_nd_2));
     lbTopos.push_back(new LBTopoMap("itorus_nd_3", createLBTopo_itorus_nd_3));
@@ -1025,6 +1226,12 @@ public:
     lbTopos.push_back(new LBTopoMap("smp_n_2", createLBTopo_smp_n_2));
     lbTopos.push_back(new LBTopoMap("smp_n_3", createLBTopo_smp_n_3));
     lbTopos.push_back(new LBTopoMap("smp_n_4", createLBTopo_smp_n_4));
+    lbTopos.push_back(new LBTopoMap("smp_n_5", createLBTopo_smp_n_5));
+    lbTopos.push_back(new LBTopoMap("smp_n_6", createLBTopo_smp_n_6));
+    lbTopos.push_back(new LBTopoMap("smp_n_7", createLBTopo_smp_n_7));
+    lbTopos.push_back(new LBTopoMap("smp_n_8", createLBTopo_smp_n_8));
+    lbTopos.push_back(new LBTopoMap("smp_n_9", createLBTopo_smp_n_9));
+    lbTopos.push_back(new LBTopoMap("smp_n_10", createLBTopo_smp_n_10));
   }
   ~LBTopoVec() {
     for (int i=0; i<lbTopos.length(); i++)