--- branches/mmeineke/OOPSE/libmdtools/mpiSimulation.cpp 2003/03/21 17:42:12 377 +++ trunk/OOPSE/libmdtools/mpiSimulation.cpp 2004/06/01 21:45:22 1217 @@ -1,27 +1,30 @@ #ifdef IS_MPI - -#include -#include +#include +#include +#include +#include #include -#include #include "mpiSimulation.hpp" #include "simError.h" #include "fortranWrappers.hpp" +#include "randomSPRNG.hpp" - - - mpiSimulation* mpiSim; mpiSimulation::mpiSimulation(SimInfo* the_entryPlug) { entryPlug = the_entryPlug; - mpiPlug = new mpiSimData; + parallelData = new mpiSimData; - mpiPlug->numberProcessors = MPI::COMM_WORLD.Get_size(); - mpiPlug->myNode = worldRank; - + MPI_Comm_size(MPI_COMM_WORLD, &(parallelData->nProcessors) ); + parallelData->myNode = worldRank; + + MolToProcMap = new int[entryPlug->n_mol]; + MolComponentType = new int[entryPlug->n_mol]; + AtomToProcMap = new int[entryPlug->n_atoms]; + GroupToProcMap = new int[entryPlug->ngroup]; + mpiSim = this; wrapMeSimParallel( this ); } @@ -29,176 +32,391 @@ mpiSimulation::~mpiSimulation(){ mpiSimulation::~mpiSimulation(){ - delete mpiPlug; + delete[] MolToProcMap; + delete[] MolComponentType; + delete[] AtomToProcMap; + delete[] GroupToProcMap; + + delete parallelData; // perhaps we should let fortran know the party is over. } +void mpiSimulation::divideLabor( ){ - -int* mpiSimulation::divideLabor( void ){ - - int* globalIndex; - int nComponents; MoleculeStamp** compStamps; + randomSPRNG *myRandom; int* componentsNmol; + int* AtomsPerProc; + int* GroupsPerProc; double numerator; double denominator; double precast; + double x, y, a; + int old_atoms, add_atoms, new_atoms; + int old_groups, add_groups, new_groups; int nTarget; - int molIndex, atomIndex, compIndex, compStart; + int molIndex, atomIndex, groupIndex; int done; - int nLocal, molLocal; - int i, index; - int smallDiff, bigDiff; + int i, j, loops, which_proc; + int nmol_global, nmol_local; + int ngroups_global, ngroups_local; + int natoms_global, natoms_local; + int ncutoff_groups, nAtomsInGroups; + int local_index; + int baseSeed = entryPlug->getSeed(); + CutoffGroupStamp* cg; - int testSum; - nComponents = entryPlug->nComponents; compStamps = entryPlug->compStamps; componentsNmol = entryPlug->componentsNmol; + AtomsPerProc = new int[parallelData->nProcessors]; + GroupsPerProc = new int[parallelData->nProcessors]; + + parallelData->nAtomsGlobal = entryPlug->n_atoms; + parallelData->nBondsGlobal = entryPlug->n_bonds; + parallelData->nBendsGlobal = entryPlug->n_bends; + parallelData->nTorsionsGlobal = entryPlug->n_torsions; + parallelData->nSRIGlobal = entryPlug->n_SRI; + parallelData->nGroupsGlobal = entryPlug->ngroup; + parallelData->nMolGlobal = entryPlug->n_mol; - mpiPlug->nAtomsGlobal = entryPlug->n_atoms; - mpiPlug->nBondsGlobal = entryPlug->n_bonds; - mpiPlug->nBendsGlobal = entryPlug->n_bends; - mpiPlug->nTorsionsGlobal = entryPlug->n_torsions; - mpiPlug->nSRIGlobal = entryPlug->n_SRI; - mpiPlug->nMolGlobal = entryPlug->n_mol; + myRandom = new randomSPRNG( baseSeed ); - numerator = (double) entryPlug->n_atoms; - denominator = (double) mpiPlug->numberProcessors; - precast = numerator / denominator; - nTarget = (int)( precast + 0.5 ); - - molIndex = 0; - atomIndex = 0; - compIndex = 0; - compStart = 0; - for( i=0; i<(mpiPlug->numberProcessors-1); i++){ + a = 3.0 * (double)parallelData->nMolGlobal / (double)parallelData->nAtomsGlobal; + + // Initialize things that we'll send out later: + for (i = 0; i < parallelData->nProcessors; i++ ) { + AtomsPerProc[i] = 0; + GroupsPerProc[i] = 0; + } + for (i = 0; i < parallelData->nMolGlobal; i++ ) { + // default to an error condition: + MolToProcMap[i] = -1; + MolComponentType[i] = -1; + } + for (i = 0; i < parallelData->nAtomsGlobal; i++ ) { + // default to an error condition: + AtomToProcMap[i] = -1; + } + for (i = 0; i < parallelData->nGroupsGlobal; i++ ) { + // default to an error condition: + GroupToProcMap[i] = -1; + } - done = 0; - nLocal = 0; - molLocal = 0; + if (parallelData->myNode == 0) { + numerator = (double) entryPlug->n_atoms; + denominator = (double) parallelData->nProcessors; + precast = numerator / denominator; + nTarget = (int)( precast + 0.5 ); - if( i == mpiPlug->myNode ){ - mpiPlug->myMolStart = molIndex; - mpiPlug->myAtomStart = atomIndex; + // Build the array of molecule component types first + molIndex = 0; + for (i=0; i < nComponents; i++) { + for (j=0; j < componentsNmol[i]; j++) { + MolComponentType[molIndex] = i; + molIndex++; + } } + + atomIndex = 0; + groupIndex = 0; + + for (i = 0; i < molIndex; i++ ) { + + done = 0; + loops = 0; + + while( !done ){ + loops++; + + // Pick a processor at random + + which_proc = (int) (myRandom->getRandom() * parallelData->nProcessors); + + // How many atoms does this processor have? + + old_atoms = AtomsPerProc[which_proc]; + add_atoms = compStamps[MolComponentType[i]]->getNAtoms(); + new_atoms = old_atoms + add_atoms; + + old_groups = GroupsPerProc[which_proc]; + ncutoff_groups = compStamps[MolComponentType[i]]->getNCutoffGroups(); + nAtomsInGroups = 0; + for (j = 0; j < ncutoff_groups; j++) { + cg = compStamps[MolComponentType[i]]->getCutoffGroup(j); + nAtomsInGroups += cg->getNMembers(); + } + add_groups = add_atoms - nAtomsInGroups + ncutoff_groups; + new_groups = old_groups + add_groups; + + // If we've been through this loop too many times, we need + // to just give up and assign the molecule to this processor + // and be done with it. + + if (loops > 100) { + sprintf( painCave.errMsg, + "I've tried 100 times to assign molecule %d to a " + " processor, but can't find a good spot.\n" + "I'm assigning it at random to processor %d.\n", + i, which_proc); + painCave.isFatal = 0; + simError(); + + MolToProcMap[i] = which_proc; + AtomsPerProc[which_proc] += add_atoms; + for (j = 0 ; j < add_atoms; j++ ) { + AtomToProcMap[atomIndex] = which_proc; + atomIndex++; + } + GroupsPerProc[which_proc] += add_groups; + for (j=0; j < add_groups; j++) { + GroupToProcMap[groupIndex] = which_proc; + groupIndex++; + } + done = 1; + continue; + } - while( !done ){ - - if( (molIndex-compStart) >= componentsNmol[compIndex] ){ - compStart = molIndex; - compIndex++; - continue; - } + // If we can add this molecule to this processor without sending + // it above nTarget, then go ahead and do it: + + if (new_atoms <= nTarget) { + MolToProcMap[i] = which_proc; + AtomsPerProc[which_proc] += add_atoms; + for (j = 0 ; j < add_atoms; j++ ) { + AtomToProcMap[atomIndex] = which_proc; + atomIndex++; + } + GroupsPerProc[which_proc] += add_groups; + for (j=0; j < add_groups; j++) { + GroupToProcMap[groupIndex] = which_proc; + groupIndex++; + } + done = 1; + continue; + } - nLocal += compStamps[compIndex]->getNAtoms(); - atomIndex += compStamps[compIndex]->getNAtoms(); - molIndex++; - molLocal++; + + // The only situation left is when new_atoms > nTarget. We + // want to accept this with some probability that dies off the + // farther we are from nTarget + + // roughly: x = new_atoms - nTarget + // Pacc(x) = exp(- a * x) + // where a = penalty / (average atoms per molecule) + + x = (double) (new_atoms - nTarget); + y = myRandom->getRandom(); - if ( nLocal == nTarget ) done = 1; - - else if( nLocal < nTarget ){ - smallDiff = nTarget - nLocal; - } - else if( nLocal > nTarget ){ - bigDiff = nLocal - nTarget; - - if( bigDiff < smallDiff ) done = 1; - else{ - molIndex--; - molLocal--; - atomIndex -= compStamps[compIndex]->getNAtoms(); - nLocal -= compStamps[compIndex]->getNAtoms(); - done = 1; - } + if (y < exp(- a * x)) { + MolToProcMap[i] = which_proc; + AtomsPerProc[which_proc] += add_atoms; + for (j = 0 ; j < add_atoms; j++ ) { + AtomToProcMap[atomIndex] = which_proc; + atomIndex++; + } + GroupsPerProc[which_proc] += add_groups; + for (j=0; j < add_groups; j++) { + GroupToProcMap[groupIndex] = which_proc; + groupIndex++; + } + done = 1; + continue; + } else { + continue; + } + } } + + + // Spray out this nonsense to all other processors: + + //std::cerr << "node 0 mol2proc = \n"; + //for (i = 0; i < parallelData->nMolGlobal; i++) + // std::cerr << i << "\t" << MolToProcMap[i] << "\n"; + + MPI_Bcast(MolToProcMap, parallelData->nMolGlobal, + MPI_INT, 0, MPI_COMM_WORLD); + + MPI_Bcast(AtomToProcMap, parallelData->nAtomsGlobal, + MPI_INT, 0, MPI_COMM_WORLD); + + MPI_Bcast(GroupToProcMap, parallelData->nGroupsGlobal, + MPI_INT, 0, MPI_COMM_WORLD); + + MPI_Bcast(MolComponentType, parallelData->nMolGlobal, + MPI_INT, 0, MPI_COMM_WORLD); + + MPI_Bcast(AtomsPerProc, parallelData->nProcessors, + MPI_INT, 0, MPI_COMM_WORLD); + + MPI_Bcast(GroupsPerProc, parallelData->nProcessors, + MPI_INT, 0, MPI_COMM_WORLD); + } else { + + // Listen to your marching orders from processor 0: - if( i == mpiPlug->myNode ){ - mpiPlug->myMolEnd = (molIndex - 1); - mpiPlug->myAtomEnd = (atomIndex - 1); - mpiPlug->myNlocal = nLocal; - mpiPlug->myMol = molLocal; - } + MPI_Bcast(MolToProcMap, parallelData->nMolGlobal, + MPI_INT, 0, MPI_COMM_WORLD); - numerator = (double)( entryPlug->n_atoms - atomIndex ); - denominator = (double)( mpiPlug->numberProcessors - (i+1) ); - precast = numerator / denominator; - nTarget = (int)( precast + 0.5 ); + MPI_Bcast(AtomToProcMap, parallelData->nAtomsGlobal, + MPI_INT, 0, MPI_COMM_WORLD); + + MPI_Bcast(GroupToProcMap, parallelData->nGroupsGlobal, + MPI_INT, 0, MPI_COMM_WORLD); + + MPI_Bcast(MolComponentType, parallelData->nMolGlobal, + MPI_INT, 0, MPI_COMM_WORLD); + + MPI_Bcast(AtomsPerProc, parallelData->nProcessors, + MPI_INT, 0, MPI_COMM_WORLD); + + MPI_Bcast(GroupsPerProc, parallelData->nProcessors, + MPI_INT, 0, MPI_COMM_WORLD); + + } - - if( mpiPlug->myNode == mpiPlug->numberProcessors-1 ){ - mpiPlug->myMolStart = molIndex; - mpiPlug->myAtomStart = atomIndex; - nLocal = 0; - molLocal = 0; - while( compIndex < nComponents ){ - - if( (molIndex-compStart) >= componentsNmol[compIndex] ){ - compStart = molIndex; - compIndex++; - continue; - } + // Let's all check for sanity: - nLocal += compStamps[compIndex]->getNAtoms(); - atomIndex += compStamps[compIndex]->getNAtoms(); - molIndex++; - molLocal++; - } - - mpiPlug->myMolEnd = (molIndex - 1); - mpiPlug->myAtomEnd = (atomIndex - 1); - mpiPlug->myNlocal = nLocal; - mpiPlug->myMol = molLocal; + nmol_local = 0; + for (i = 0 ; i < parallelData->nMolGlobal; i++ ) { + if (MolToProcMap[i] == parallelData->myNode) { + nmol_local++; + } } + natoms_local = 0; + for (i = 0; i < parallelData->nAtomsGlobal; i++) { + if (AtomToProcMap[i] == parallelData->myNode) { + natoms_local++; + } + } - MPI_Allreduce( &nLocal, &testSum, 1, MPI_INT, MPI_SUM, MPI_COMM_WORLD ); - - if( mpiPlug->myNode == 0 ){ - if( testSum != entryPlug->n_atoms ){ - sprintf( painCave.errMsg, - "The summ of all nLocals, %d, did not equal the total number of atoms, %d.\n", - testSum, entryPlug->n_atoms ); - painCave.isFatal = 1; - simError(); + ngroups_local = 0; + for (i = 0; i < parallelData->nGroupsGlobal; i++) { + if (GroupToProcMap[i] == parallelData->myNode) { + ngroups_local++; } } + MPI_Allreduce(&nmol_local,&nmol_global,1,MPI_INT,MPI_SUM, + MPI_COMM_WORLD); + + MPI_Allreduce(&natoms_local,&natoms_global,1,MPI_INT, + MPI_SUM, MPI_COMM_WORLD); + + MPI_Allreduce(&ngroups_local,&ngroups_global,1,MPI_INT, + MPI_SUM, MPI_COMM_WORLD); + + if( nmol_global != entryPlug->n_mol ){ + sprintf( painCave.errMsg, + "The sum of all nmol_local, %d, did not equal the " + "total number of molecules, %d.\n", + nmol_global, entryPlug->n_mol ); + painCave.isFatal = 1; + simError(); + } + + if( natoms_global != entryPlug->n_atoms ){ + sprintf( painCave.errMsg, + "The sum of all natoms_local, %d, did not equal the " + "total number of atoms, %d.\n", + natoms_global, entryPlug->n_atoms ); + painCave.isFatal = 1; + simError(); + } + + if( ngroups_global != entryPlug->ngroup ){ + sprintf( painCave.errMsg, + "The sum of all ngroups_local, %d, did not equal the " + "total number of cutoffGroups, %d.\n", + ngroups_global, entryPlug->ngroup ); + painCave.isFatal = 1; + simError(); + } + sprintf( checkPointMsg, "Successfully divided the molecules among the processors.\n" ); MPIcheckPoint(); - // lets create the identity array + parallelData->nMolLocal = nmol_local; + parallelData->nAtomsLocal = natoms_local; + parallelData->nGroupsLocal = ngroups_local; - globalIndex = new int[mpiPlug->myNlocal]; - index = mpiPlug->myAtomStart; - for( i=0; imyNlocal; i++){ - globalIndex[i] = index; - index++; + globalAtomIndex.resize(parallelData->nAtomsLocal); + globalToLocalAtom.resize(parallelData->nAtomsGlobal); + local_index = 0; + for (i = 0; i < parallelData->nAtomsGlobal; i++) { + if (AtomToProcMap[i] == parallelData->myNode) { + globalAtomIndex[local_index] = i; + + globalToLocalAtom[i] = local_index; + local_index++; + + } + else + globalToLocalAtom[i] = -1; } - return globalIndex; + globalGroupIndex.resize(parallelData->nGroupsLocal); + globalToLocalGroup.resize(parallelData->nGroupsGlobal); + local_index = 0; + for (i = 0; i < parallelData->nGroupsGlobal; i++) { + if (GroupToProcMap[i] == parallelData->myNode) { + globalGroupIndex[local_index] = i; + + globalToLocalGroup[i] = local_index; + local_index++; + + } + else + globalToLocalGroup[i] = -1; + } + + globalMolIndex.resize(parallelData->nMolLocal); + globalToLocalMol.resize(parallelData->nMolGlobal); + local_index = 0; + for (i = 0; i < parallelData->nMolGlobal; i++) { + if (MolToProcMap[i] == parallelData->myNode) { + globalMolIndex[local_index] = i; + globalToLocalMol[i] = local_index; + local_index++; + } + else + globalToLocalMol[i] = -1; + } + } void mpiSimulation::mpiRefresh( void ){ int isError, i; - int *globalIndex = new int[mpiPlug->myNlocal]; + int *localToGlobalAtomIndex = new int[parallelData->nAtomsLocal]; + int *localToGlobalGroupIndex = new int[parallelData->nGroupsLocal]; - for(i=0; imyNlocal; i++) globalIndex[i] = entryPlug->atoms[i]->getGlobalIndex(); + // Fortran indexing needs to be increased by 1 in order to get the 2 + // languages to not barf + for(i = 0; i < parallelData->nAtomsLocal; i++) + localToGlobalAtomIndex[i] = globalAtomIndex[i] + 1; + + for(i = 0; i < parallelData->nGroupsLocal; i++) + localToGlobalGroupIndex[i] = globalGroupIndex[i] + 1; isError = 0; - setFsimParallel( mpiPlug, &(entryPlug->n_atoms), globalIndex, &isError ); + + setFsimParallel( parallelData, + &(parallelData->nAtomsLocal), localToGlobalAtomIndex, + &(parallelData->nGroupsLocal), localToGlobalGroupIndex, + &isError ); + if( isError ){ sprintf( painCave.errMsg, @@ -207,8 +425,10 @@ void mpiSimulation::mpiRefresh( void ){ simError(); } - delete[] globalIndex; + delete[] localToGlobalGroupIndex; + delete[] localToGlobalAtomIndex; + sprintf( checkPointMsg, " mpiRefresh successful.\n" ); MPIcheckPoint();