--- branches/mmeineke/OOPSE/libmdtools/mpiSimulation.cpp	2003/03/21 17:42:12	377
+++ trunk/OOPSE/libmdtools/mpiSimulation.cpp	2004/06/01 21:45:22	1217
@@ -1,27 +1,30 @@
 #ifdef IS_MPI
-
-#include <cstdlib>
-#include <cstring>
+#include <iostream>
+#include <stdlib.h>
+#include <string.h>
+#include <math.h>
 #include <mpi.h>
-#include <mpi++.h>
 
 #include "mpiSimulation.hpp"
 #include "simError.h"
 #include "fortranWrappers.hpp"
+#include "randomSPRNG.hpp"
 
-
-
-
 mpiSimulation* mpiSim;
 
 mpiSimulation::mpiSimulation(SimInfo* the_entryPlug)
 {
   entryPlug = the_entryPlug;
-  mpiPlug = new mpiSimData;
+  parallelData = new mpiSimData;
   
-  mpiPlug->numberProcessors = MPI::COMM_WORLD.Get_size();
-  mpiPlug->myNode = worldRank;
-  
+  MPI_Comm_size(MPI_COMM_WORLD, &(parallelData->nProcessors) );
+  parallelData->myNode = worldRank;
+
+  MolToProcMap = new int[entryPlug->n_mol];
+  MolComponentType = new int[entryPlug->n_mol];
+  AtomToProcMap = new int[entryPlug->n_atoms];
+  GroupToProcMap = new int[entryPlug->ngroup];
+
   mpiSim = this;
   wrapMeSimParallel( this );
 }
@@ -29,176 +32,391 @@ mpiSimulation::~mpiSimulation(){
 
 mpiSimulation::~mpiSimulation(){
   
-  delete mpiPlug;
+  delete[] MolToProcMap;
+  delete[] MolComponentType;
+  delete[] AtomToProcMap;
+  delete[] GroupToProcMap;
+
+  delete parallelData;
   // perhaps we should let fortran know the party is over.
   
 }
 
+void mpiSimulation::divideLabor( ){
 
-
-int* mpiSimulation::divideLabor( void ){
-
-  int* globalIndex;
-
   int nComponents;
   MoleculeStamp** compStamps;
+  randomSPRNG *myRandom;
   int* componentsNmol;
+  int* AtomsPerProc;
+  int* GroupsPerProc;
 
   double numerator;
   double denominator;
   double precast;
+  double x, y, a;
+  int old_atoms, add_atoms, new_atoms;
+  int old_groups, add_groups, new_groups;
 
   int nTarget;
-  int molIndex, atomIndex, compIndex, compStart;
+  int molIndex, atomIndex, groupIndex;
   int done;
-  int nLocal, molLocal;
-  int i, index;
-  int smallDiff, bigDiff;
+  int i, j, loops, which_proc;
+  int nmol_global, nmol_local;
+  int ngroups_global, ngroups_local;
+  int natoms_global, natoms_local;
+  int ncutoff_groups, nAtomsInGroups;
+  int local_index;
+  int baseSeed = entryPlug->getSeed();
+  CutoffGroupStamp* cg;
 
-  int testSum;
-
   nComponents = entryPlug->nComponents;
   compStamps = entryPlug->compStamps;
   componentsNmol = entryPlug->componentsNmol;
+  AtomsPerProc = new int[parallelData->nProcessors];
+  GroupsPerProc = new int[parallelData->nProcessors];
+  
+  parallelData->nAtomsGlobal = entryPlug->n_atoms;
+  parallelData->nBondsGlobal = entryPlug->n_bonds;
+  parallelData->nBendsGlobal = entryPlug->n_bends;
+  parallelData->nTorsionsGlobal = entryPlug->n_torsions;
+  parallelData->nSRIGlobal = entryPlug->n_SRI;
+  parallelData->nGroupsGlobal = entryPlug->ngroup;
+  parallelData->nMolGlobal = entryPlug->n_mol;
 
-  mpiPlug->nAtomsGlobal = entryPlug->n_atoms;
-  mpiPlug->nBondsGlobal = entryPlug->n_bonds;
-  mpiPlug->nBendsGlobal = entryPlug->n_bends;
-  mpiPlug->nTorsionsGlobal = entryPlug->n_torsions;
-  mpiPlug->nSRIGlobal = entryPlug->n_SRI;
-  mpiPlug->nMolGlobal = entryPlug->n_mol;
+  myRandom = new randomSPRNG( baseSeed );
 
-  numerator = (double) entryPlug->n_atoms;
-  denominator = (double) mpiPlug->numberProcessors;
-  precast = numerator / denominator;
-  nTarget = (int)( precast + 0.5 );
-  
-  molIndex = 0;
-  atomIndex = 0;
-  compIndex = 0;
-  compStart = 0;
-  for( i=0; i<(mpiPlug->numberProcessors-1); i++){
+  a = 3.0 * (double)parallelData->nMolGlobal / (double)parallelData->nAtomsGlobal;
+
+  // Initialize things that we'll send out later:
+  for (i = 0; i < parallelData->nProcessors; i++ ) {
+    AtomsPerProc[i] = 0;
+    GroupsPerProc[i] = 0;
+  }
+  for (i = 0; i < parallelData->nMolGlobal; i++ ) {
+    // default to an error condition:
+    MolToProcMap[i] = -1;
+    MolComponentType[i] = -1;
+  }
+  for (i = 0; i < parallelData->nAtomsGlobal; i++ ) {
+    // default to an error condition:
+    AtomToProcMap[i] = -1;
+  }
+  for (i = 0; i < parallelData->nGroupsGlobal; i++ ) {
+    // default to an error condition:
+    GroupToProcMap[i] = -1;
+  }
     
-    done = 0;
-    nLocal = 0;
-    molLocal = 0;
+  if (parallelData->myNode == 0) {
+    numerator = (double) entryPlug->n_atoms;
+    denominator = (double) parallelData->nProcessors;
+    precast = numerator / denominator;
+    nTarget = (int)( precast + 0.5 );
 
-    if( i == mpiPlug->myNode ){
-      mpiPlug->myMolStart = molIndex;
-      mpiPlug->myAtomStart = atomIndex;
+    // Build the array of molecule component types first
+    molIndex = 0;
+    for (i=0; i < nComponents; i++) {
+      for (j=0; j < componentsNmol[i]; j++) {        
+        MolComponentType[molIndex] = i;
+        molIndex++;
+      }
     }
+
+    atomIndex = 0;
+    groupIndex = 0;
+
+    for (i = 0; i < molIndex; i++ ) {
+
+      done = 0;
+      loops = 0;
+
+      while( !done ){
+        loops++;
+        
+        // Pick a processor at random
+
+        which_proc = (int) (myRandom->getRandom() * parallelData->nProcessors);
+
+        // How many atoms does this processor have?
+        
+        old_atoms = AtomsPerProc[which_proc];
+        add_atoms = compStamps[MolComponentType[i]]->getNAtoms();
+        new_atoms = old_atoms + add_atoms;
+
+        old_groups = GroupsPerProc[which_proc];
+        ncutoff_groups = compStamps[MolComponentType[i]]->getNCutoffGroups(); 
+        nAtomsInGroups = 0;
+        for (j = 0; j < ncutoff_groups; j++) {
+          cg = compStamps[MolComponentType[i]]->getCutoffGroup(j);
+          nAtomsInGroups += cg->getNMembers();
+        }
+        add_groups = add_atoms - nAtomsInGroups + ncutoff_groups;        
+        new_groups = old_groups + add_groups;
+
+        // If we've been through this loop too many times, we need
+        // to just give up and assign the molecule to this processor
+        // and be done with it. 
+        
+        if (loops > 100) {          
+          sprintf( painCave.errMsg,
+                   "I've tried 100 times to assign molecule %d to a "
+                   " processor, but can't find a good spot.\n"  
+                   "I'm assigning it at random to processor %d.\n",
+                   i, which_proc);
+          painCave.isFatal = 0;
+          simError();
+          
+          MolToProcMap[i] = which_proc;
+          AtomsPerProc[which_proc] += add_atoms;
+          for (j = 0 ; j < add_atoms; j++ ) {
+	    AtomToProcMap[atomIndex] = which_proc;
+	    atomIndex++;
+          }
+          GroupsPerProc[which_proc] += add_groups;
+          for (j=0; j < add_groups; j++) {
+            GroupToProcMap[groupIndex] = which_proc;
+            groupIndex++;
+          }
+          done = 1;
+          continue;
+        }
     
-    while( !done ){
-      
-      if( (molIndex-compStart) >= componentsNmol[compIndex] ){
-	compStart = molIndex;
-	compIndex++;
-	continue;
-      }
+        // If we can add this molecule to this processor without sending
+        // it above nTarget, then go ahead and do it:
+    
+        if (new_atoms <= nTarget) {
+          MolToProcMap[i] = which_proc;
+          AtomsPerProc[which_proc] += add_atoms;
+          for (j = 0 ; j < add_atoms; j++ ) {
+	    AtomToProcMap[atomIndex] = which_proc;
+	    atomIndex++;
+          }
+          GroupsPerProc[which_proc] += add_groups;
+          for (j=0; j < add_groups; j++) {
+            GroupToProcMap[groupIndex] = which_proc;
+            groupIndex++;
+          }
+          done = 1;
+          continue;
+        }
 
-      nLocal += compStamps[compIndex]->getNAtoms();
-      atomIndex += compStamps[compIndex]->getNAtoms();
-      molIndex++;
-      molLocal++;
+
+        // The only situation left is when new_atoms > nTarget.  We
+        // want to accept this with some probability that dies off the
+        // farther we are from nTarget
+
+        // roughly:  x = new_atoms - nTarget
+        //           Pacc(x) = exp(- a * x)
+        // where a = penalty / (average atoms per molecule)
+
+        x = (double) (new_atoms - nTarget);
+        y = myRandom->getRandom();
       
-      if ( nLocal == nTarget ) done = 1;
-      
-      else if( nLocal < nTarget ){
-	smallDiff = nTarget - nLocal;
-      }
-      else if( nLocal > nTarget ){
-	bigDiff = nLocal - nTarget;
-	
-	if( bigDiff < smallDiff ) done = 1;
-	else{
-	  molIndex--;
-	  molLocal--;
-	  atomIndex -= compStamps[compIndex]->getNAtoms();
-	  nLocal -= compStamps[compIndex]->getNAtoms();
-	  done = 1;
-	}
+        if (y < exp(- a * x)) {
+          MolToProcMap[i] = which_proc;
+          AtomsPerProc[which_proc] += add_atoms;
+          for (j = 0 ; j < add_atoms; j++ ) {
+	    AtomToProcMap[atomIndex] = which_proc;
+	    atomIndex++;
+           }
+          GroupsPerProc[which_proc] += add_groups;
+          for (j=0; j < add_groups; j++) {
+            GroupToProcMap[groupIndex] = which_proc;
+            groupIndex++;
+          }
+          done = 1;
+          continue;
+        } else {
+          continue;
+        }       
+        
       }
     }
+
+
+    // Spray out this nonsense to all other processors:
+
+    //std::cerr << "node 0 mol2proc = \n";
+    //for (i = 0; i < parallelData->nMolGlobal; i++) 
+    //  std::cerr << i << "\t" << MolToProcMap[i] << "\n";
+
+    MPI_Bcast(MolToProcMap, parallelData->nMolGlobal, 
+	      MPI_INT, 0, MPI_COMM_WORLD);
+
+    MPI_Bcast(AtomToProcMap, parallelData->nAtomsGlobal, 
+	      MPI_INT, 0, MPI_COMM_WORLD);
+
+    MPI_Bcast(GroupToProcMap, parallelData->nGroupsGlobal, 
+	      MPI_INT, 0, MPI_COMM_WORLD);
+
+    MPI_Bcast(MolComponentType, parallelData->nMolGlobal, 
+	      MPI_INT, 0, MPI_COMM_WORLD);
+
+    MPI_Bcast(AtomsPerProc, parallelData->nProcessors,
+	      MPI_INT, 0, MPI_COMM_WORLD);    
+
+    MPI_Bcast(GroupsPerProc, parallelData->nProcessors,
+	      MPI_INT, 0, MPI_COMM_WORLD);    
+  } else {
+
+    // Listen to your marching orders from processor 0:
     
-    if( i == mpiPlug->myNode ){
-      mpiPlug->myMolEnd = (molIndex - 1);
-      mpiPlug->myAtomEnd = (atomIndex - 1);
-      mpiPlug->myNlocal = nLocal;
-      mpiPlug->myMol = molLocal;
-    }
+    MPI_Bcast(MolToProcMap, parallelData->nMolGlobal, 
+	      MPI_INT, 0, MPI_COMM_WORLD);
     
-    numerator = (double)( entryPlug->n_atoms - atomIndex );
-    denominator = (double)( mpiPlug->numberProcessors - (i+1) );
-    precast = numerator / denominator;
-    nTarget = (int)( precast + 0.5 );
+    MPI_Bcast(AtomToProcMap, parallelData->nAtomsGlobal, 
+	      MPI_INT, 0, MPI_COMM_WORLD);
+
+    MPI_Bcast(GroupToProcMap, parallelData->nGroupsGlobal, 
+	      MPI_INT, 0, MPI_COMM_WORLD);
+
+    MPI_Bcast(MolComponentType, parallelData->nMolGlobal, 
+	      MPI_INT, 0, MPI_COMM_WORLD);
+    
+    MPI_Bcast(AtomsPerProc, parallelData->nProcessors,
+	      MPI_INT, 0, MPI_COMM_WORLD);
+
+    MPI_Bcast(GroupsPerProc, parallelData->nProcessors,
+	      MPI_INT, 0, MPI_COMM_WORLD);
+
+
   }
-  
-  if( mpiPlug->myNode == mpiPlug->numberProcessors-1 ){
-      mpiPlug->myMolStart = molIndex;
-      mpiPlug->myAtomStart = atomIndex;
 
-      nLocal = 0;
-      molLocal = 0;
-      while( compIndex < nComponents ){
-	
-	if( (molIndex-compStart) >= componentsNmol[compIndex] ){
-	  compStart = molIndex;
-	  compIndex++;
-	  continue;
-	}
+  // Let's all check for sanity:
 
-	nLocal += compStamps[compIndex]->getNAtoms();
-	atomIndex += compStamps[compIndex]->getNAtoms();
-	molIndex++;
-	molLocal++;
-      }
-      
-      mpiPlug->myMolEnd = (molIndex - 1);
-      mpiPlug->myAtomEnd = (atomIndex - 1);
-      mpiPlug->myNlocal = nLocal;  
-      mpiPlug->myMol = molLocal;
+  nmol_local = 0;
+  for (i = 0 ; i < parallelData->nMolGlobal; i++ ) {
+    if (MolToProcMap[i] == parallelData->myNode) {
+      nmol_local++;
+    }
   }
 
+  natoms_local = 0;
+  for (i = 0; i < parallelData->nAtomsGlobal; i++) {
+    if (AtomToProcMap[i] == parallelData->myNode) {
+      natoms_local++;      
+    }
+  }
 
-  MPI_Allreduce( &nLocal, &testSum, 1, MPI_INT, MPI_SUM, MPI_COMM_WORLD );
-  
-  if( mpiPlug->myNode == 0 ){
-    if( testSum != entryPlug->n_atoms ){
-      sprintf( painCave.errMsg,
-	       "The summ of all nLocals, %d, did not equal the total number of atoms, %d.\n",
-	       testSum, entryPlug->n_atoms );
-      painCave.isFatal = 1;
-      simError();
+  ngroups_local = 0;
+  for (i = 0; i < parallelData->nGroupsGlobal; i++) {
+    if (GroupToProcMap[i] == parallelData->myNode) {
+      ngroups_local++;      
     }
   }
 
+  MPI_Allreduce(&nmol_local,&nmol_global,1,MPI_INT,MPI_SUM, 
+		MPI_COMM_WORLD);
+
+  MPI_Allreduce(&natoms_local,&natoms_global,1,MPI_INT,
+		MPI_SUM, MPI_COMM_WORLD);
+
+  MPI_Allreduce(&ngroups_local,&ngroups_global,1,MPI_INT,
+		MPI_SUM, MPI_COMM_WORLD);
+  
+  if( nmol_global != entryPlug->n_mol ){
+    sprintf( painCave.errMsg,
+             "The sum of all nmol_local, %d, did not equal the "
+             "total number of molecules, %d.\n",
+             nmol_global, entryPlug->n_mol );
+    painCave.isFatal = 1;
+    simError();
+  }
+  
+  if( natoms_global != entryPlug->n_atoms ){
+    sprintf( painCave.errMsg,
+             "The sum of all natoms_local, %d, did not equal the "
+             "total number of atoms, %d.\n",
+             natoms_global, entryPlug->n_atoms );
+    painCave.isFatal = 1;
+    simError();
+  }
+
+  if( ngroups_global != entryPlug->ngroup ){
+    sprintf( painCave.errMsg,
+             "The sum of all ngroups_local, %d, did not equal the "
+             "total number of cutoffGroups, %d.\n",
+             ngroups_global, entryPlug->ngroup );
+    painCave.isFatal = 1;
+    simError();
+  }
+
   sprintf( checkPointMsg,
 	   "Successfully divided the molecules among the processors.\n" );
   MPIcheckPoint();
 
-  // lets create the identity array
+  parallelData->nMolLocal = nmol_local;
+  parallelData->nAtomsLocal = natoms_local;
+  parallelData->nGroupsLocal = ngroups_local;
 
-  globalIndex = new int[mpiPlug->myNlocal];
-  index = mpiPlug->myAtomStart;
-  for( i=0; i<mpiPlug->myNlocal; i++){
-    globalIndex[i] = index;
-    index++;
+  globalAtomIndex.resize(parallelData->nAtomsLocal);
+  globalToLocalAtom.resize(parallelData->nAtomsGlobal);
+  local_index = 0;
+  for (i = 0; i < parallelData->nAtomsGlobal; i++) {
+    if (AtomToProcMap[i] == parallelData->myNode) {
+      globalAtomIndex[local_index] = i;
+
+      globalToLocalAtom[i] = local_index;
+      local_index++;
+      
+    }
+    else
+       globalToLocalAtom[i] = -1;
   }
 
-  return globalIndex;
+  globalGroupIndex.resize(parallelData->nGroupsLocal);
+  globalToLocalGroup.resize(parallelData->nGroupsGlobal);
+  local_index = 0;
+  for (i = 0; i < parallelData->nGroupsGlobal; i++) {
+    if (GroupToProcMap[i] == parallelData->myNode) {
+      globalGroupIndex[local_index] = i;
+
+      globalToLocalGroup[i] = local_index;
+      local_index++;
+      
+    }
+    else
+       globalToLocalGroup[i] = -1;
+  }
+
+  globalMolIndex.resize(parallelData->nMolLocal);
+  globalToLocalMol.resize(parallelData->nMolGlobal);  
+  local_index = 0;
+  for (i = 0; i < parallelData->nMolGlobal; i++) {
+    if (MolToProcMap[i] == parallelData->myNode) {
+      globalMolIndex[local_index] = i;
+      globalToLocalMol[i] = local_index;
+      local_index++;
+    }
+    else
+      globalToLocalMol[i] = -1;
+  }
+  
 }
 
 
 void mpiSimulation::mpiRefresh( void ){
 
   int isError, i;
-  int *globalIndex = new int[mpiPlug->myNlocal];
+  int *localToGlobalAtomIndex = new int[parallelData->nAtomsLocal];
+  int *localToGlobalGroupIndex = new int[parallelData->nGroupsLocal];
 
-  for(i=0; i<mpiPlug->myNlocal; i++) globalIndex[i] = entryPlug->atoms[i]->getGlobalIndex();
+  // Fortran indexing needs to be increased by 1 in order to get the 2
+  // languages to not barf
 
+  for(i = 0; i < parallelData->nAtomsLocal; i++) 
+    localToGlobalAtomIndex[i] = globalAtomIndex[i] + 1;
+
+  for(i = 0; i < parallelData->nGroupsLocal; i++) 
+    localToGlobalGroupIndex[i] = globalGroupIndex[i] + 1;
   
   isError = 0;
-  setFsimParallel( mpiPlug, &(entryPlug->n_atoms), globalIndex, &isError );
+
+  setFsimParallel( parallelData, 
+                   &(parallelData->nAtomsLocal), localToGlobalAtomIndex, 
+                   &(parallelData->nGroupsLocal),  localToGlobalGroupIndex, 
+                   &isError );
+
   if( isError ){
 
     sprintf( painCave.errMsg,
@@ -207,8 +425,10 @@ void mpiSimulation::mpiRefresh( void ){
     simError();
   }
 
-  delete[] globalIndex;
+  delete[] localToGlobalGroupIndex;
+  delete[] localToGlobalAtomIndex;
 
+
   sprintf( checkPointMsg,
 	   " mpiRefresh successful.\n" );
   MPIcheckPoint();