| 63 |
|
#include <iomanip> |
| 64 |
|
|
| 65 |
|
#include <omp.h> |
| 66 |
+ |
//#include <time.h> |
| 67 |
+ |
#include <sys/time.h> |
| 68 |
|
|
| 69 |
|
using namespace std; |
| 70 |
|
namespace OpenMD { |
| 71 |
+ |
|
| 72 |
+ |
//long int elapsedTime = 0; |
| 73 |
|
|
| 74 |
|
ForceManager::ForceManager(SimInfo * info) : |
| 75 |
|
info_(info) { |
| 586 |
|
RealType sw, dswdr, swderiv; |
| 587 |
|
vector<int> atomListColumn, atomListRow, atomListLocal; |
| 588 |
|
|
| 589 |
+ |
/* Defines local interaction data to each thread */ |
| 590 |
|
InteractionDataPrv idatPrv; |
| 591 |
|
|
| 592 |
|
SelfData sdat; |
| 628 |
|
neighborMatW = fDecomp_->buildLayerBasedNeighborList(); |
| 629 |
|
} |
| 630 |
|
|
| 631 |
+ |
/* Eager initialization */ |
| 632 |
+ |
/* Initializes InteractionManager before force calculations */ |
| 633 |
+ |
interactionMan_->initializeOMP(); |
| 634 |
+ |
/* Initializes forces used in simulation before force calculations */ |
| 635 |
+ |
interactionMan_->initNonbondedForces(); |
| 636 |
+ |
|
| 637 |
|
vector<CutoffGroup *>::iterator cg1; |
| 638 |
|
vector<CutoffGroup *>::iterator cg2; |
| 639 |
|
|
| 640 |
< |
// int nThreads = 2; |
| 641 |
< |
int chunkSize = cgs.size() / (omp_get_num_threads() * 20); |
| 640 |
> |
/* Defines local accumulators for each thread */ |
| 641 |
> |
vector<Vector3d> forceLcl; |
| 642 |
> |
Vector3d fatom1Lcl; |
| 643 |
> |
Mat3x3d tauLcl; |
| 644 |
> |
potVec potLcl; |
| 645 |
|
|
| 646 |
< |
// printf("before omp loop\n"); |
| 647 |
< |
#pragma omp parallel /*num_threads(nThreads)*/ default(none) shared(curSnapshot, iLoop, cgs, chunkSize) \ |
| 648 |
< |
private(cg1, cg2, cuts, d_grp, rgrpsq, rCutSq, idatPrv, vij, fij, in_switching_region, dswdr, rgrp, \ |
| 649 |
< |
atomListRow, atomListColumn, atom1, atom2, topoDist, d, r2, swderiv, fg, mf, dag) |
| 646 |
> |
/* Defines the size of chunks into which the loop iterations will be split */ |
| 647 |
> |
int chunkSize = cgs.size() / (omp_get_max_threads() * 5); |
| 648 |
> |
|
| 649 |
> |
/*struct timeval tv, tv2; |
| 650 |
> |
|
| 651 |
> |
gettimeofday(&tv, NULL);*/ |
| 652 |
> |
|
| 653 |
> |
/* Defines the parallel region and the list of variables that should be shared and private to each thread */ |
| 654 |
> |
#pragma omp parallel default(none) shared(curSnapshot, iLoop, cgs, chunkSize, config) \ |
| 655 |
> |
private(cg1, cg2, cuts, d_grp, rgrpsq, rCutSq, idatPrv, vij, fij, in_switching_region, \ |
| 656 |
> |
dswdr, rgrp, atomListRow, atomListColumn, atom1, atom2, topoDist, d, r2, swderiv, fg, mf, \ |
| 657 |
> |
dag, tauLcl, forceLcl, fatom1Lcl, potLcl) |
| 658 |
|
{ |
| 659 |
|
idatPrv.shiftedPot = (cutoffMethod_ == SHIFTED_POTENTIAL) ? true : false; |
| 660 |
|
idatPrv.shiftedForce = (cutoffMethod_ == SHIFTED_FORCE) ? true : false; |
| 661 |
+ |
forceLcl = vector<Vector3d>(config->force.size()); |
| 662 |
+ |
tauLcl *= 0; |
| 663 |
|
|
| 664 |
< |
// printf("Thread %d\n", omp_get_thread_num()); |
| 665 |
< |
#pragma omp for schedule(dynamic, chunkSize) |
| 664 |
> |
/* Spreads force calculations between threads. Each thread receives chunkSize portion of the CutoffGroups. */ |
| 665 |
> |
#pragma omp for schedule(dynamic, chunkSize) |
| 666 |
|
for (cg1 = cgs.begin(); cg1 < cgs.end(); ++cg1) |
| 667 |
|
{ |
| 668 |
+ |
/* Iterates between neighbors of the CutoffGroup */ |
| 669 |
|
for (cg2 = neighborMatW[(*cg1)->getGlobalIndex()].begin(); cg2 < neighborMatW[(*cg1)->getGlobalIndex()].end(); ++cg2) |
| 670 |
|
{ |
| 671 |
|
|
| 688 |
|
fij = V3Zero; |
| 689 |
|
} |
| 690 |
|
|
| 691 |
< |
in_switching_region = switcher_->getSwitch(rgrpsq, /*sw*/idatPrv.sw, dswdr, rgrp); |
| 691 |
> |
in_switching_region = switcher_->getSwitch(rgrpsq, idatPrv.sw, dswdr, rgrp); |
| 692 |
|
|
| 693 |
|
// printf("in_switching_region:%d\trgrpsq:%f\t*idatPrv.sw:%f\tdswdr:%f\trgrp:%f\n", (in_switching_region == false ? 0 : 1), rgrpsq, idatPrv.sw, dswdr, rgrp); |
| 694 |
|
|
| 698 |
|
for (vector<int>::iterator ia = atomListRow.begin(); ia != atomListRow.end(); ++ia) |
| 699 |
|
{ |
| 700 |
|
atom1 = (*ia); |
| 701 |
+ |
fatom1Lcl = V3Zero; |
| 702 |
|
|
| 703 |
|
for (vector<int>::iterator jb = atomListColumn.begin(); jb != atomListColumn.end(); ++jb) |
| 704 |
|
{ |
| 705 |
|
atom2 = (*jb); |
| 706 |
|
|
| 681 |
– |
// printf("atom1:%d atom2:%d\n", atom1, atom2); |
| 707 |
|
if (!fDecomp_->skipAtomPair(atom1, atom2)) |
| 708 |
|
{ |
| 709 |
|
idatPrv.vpair = 0.0; |
| 722 |
|
{ |
| 723 |
|
idatPrv.d = d_grp; |
| 724 |
|
idatPrv.r2 = rgrpsq; |
| 725 |
< |
// cerr << "dgrp = " << d_grp << ":" << __LINE__ << "\n"; |
| 725 |
> |
//cerr << "dgrp = " << d_grp << ":" << __LINE__ << "\n"; |
| 726 |
|
} else |
| 727 |
|
{ |
| 728 |
|
d = fDecomp_->getInteratomicVector(atom1, atom2); |
| 729 |
|
curSnapshot->wrapVector(d); |
| 730 |
|
r2 = d.lengthSquare(); |
| 731 |
< |
// cerr << "datm = " << d << ":" << __LINE__ << "\n"; |
| 731 |
> |
//cerr << "datm = " << d << ":" << __LINE__ << "\n"; |
| 732 |
|
idatPrv.d = d; |
| 733 |
|
idatPrv.r2 = r2; |
| 734 |
|
} |
| 735 |
|
|
| 736 |
< |
// printf("idatPrv.d x:%f\ty:%f\tz:%f\tidatPrv.r2:%f\n", (idatPrv.d).x(), (idatPrv.d).y(), (idatPrv.d).z(), idatPrv.r2); |
| 737 |
< |
|
| 713 |
< |
// cerr << "idat.d = " << *(idat.d) << ":" << __LINE__ << "\n"; |
| 736 |
> |
//printf("idatPrv.d x:%f\ty:%f\tz:%f\tidatPrv.r2:%f\n", (idatPrv.d).x(), (idatPrv.d).y(), (idatPrv.d).z(), idatPrv.r2); |
| 737 |
> |
//cerr << "idat.d = " << *(idat.d) << ":" << __LINE__ << "\n"; |
| 738 |
|
idatPrv.rij = sqrt((idatPrv.r2)); |
| 739 |
< |
// cerr << "idat.rij = " << *(idat.rij) << "\n"; |
| 739 |
> |
//cerr << "idat.rij = " << *(idat.rij) << "\n"; |
| 740 |
|
|
| 717 |
– |
#pragma omp critical |
| 718 |
– |
{ |
| 719 |
– |
interactionMan_->initializeOMP(); |
| 720 |
– |
} |
| 721 |
– |
|
| 741 |
|
if (iLoop == PREPAIR_LOOP) |
| 742 |
|
{ |
| 743 |
|
interactionMan_->doPrePairOMP(idatPrv); |
| 744 |
|
} else |
| 745 |
|
{ |
| 746 |
|
interactionMan_->doPairOMP(idatPrv); |
| 728 |
– |
fDecomp_->unpackInteractionDataOMP(idatPrv, atom1, atom2); |
| 747 |
|
|
| 748 |
< |
// cerr << "d = " << *(idat.d) << "\tv=" << vpair << "\tf=" << f1 << ":" << __LINE__ << "\n"; |
| 749 |
< |
// printf("d x:%f y:%f z:%f vpair:%f f1 x:%f y:%f z:%f\n", idatPrv.d.x(), idatPrv.d.y(), idatPrv.d.z(), idatPrv.vpair, idatPrv.f1.x(), idatPrv.f1.y(), idatPrv.f1.z()); |
| 750 |
< |
#pragma omp critical |
| 751 |
< |
{ |
| 752 |
< |
vij += idatPrv.vpair; |
| 753 |
< |
fij += idatPrv.f1; |
| 754 |
< |
tau -= outProduct(idatPrv.d, idatPrv.f1); |
| 755 |
< |
|
| 756 |
< |
// printf("vij:%f fij x:%f y:%f z:%f\n", vij, fij.x(), fij.y(), fij.z()); |
| 757 |
< |
} |
| 748 |
> |
/* Accumulates potential and forces in local arrays */ |
| 749 |
> |
potLcl += idatPrv.pot; |
| 750 |
> |
fatom1Lcl += idatPrv.f1; |
| 751 |
> |
forceLcl[atom2] -= idatPrv.f1; |
| 752 |
> |
//cerr << "d = " << *(idat.d) << "\tv=" << vpair << "\tf=" << f1 << ":" << __LINE__ << "\n"; |
| 753 |
> |
//printf("d x:%f y:%f z:%f vpair:%f f1 x:%f y:%f z:%f\n", idatPrv.d.x(), idatPrv.d.y(), idatPrv.d.z(), idatPrv.vpair, idatPrv.f1.x(), idatPrv.f1.y(), idatPrv.f1.z()); |
| 754 |
> |
vij += idatPrv.vpair; |
| 755 |
> |
fij += idatPrv.f1; |
| 756 |
> |
tauLcl -= outProduct(idatPrv.d, idatPrv.f1); |
| 757 |
> |
//printf("vij:%f fij x:%f y:%f z:%f\n", vij, fij.x(), fij.y(), fij.z()); |
| 758 |
|
} |
| 759 |
|
} |
| 760 |
|
} |
| 761 |
+ |
/* We can accumulate force for current CutoffGroup in shared memory without worring about read after write bugs*/ |
| 762 |
+ |
config->force[atom1] += fatom1Lcl; |
| 763 |
|
} |
| 764 |
|
|
| 765 |
|
if (iLoop == PAIR_LOOP) |
| 772 |
|
|
| 773 |
|
if (atomListRow.size() == 1 && atomListColumn.size() == 1) |
| 774 |
|
{ |
| 775 |
< |
#pragma omp critical |
| 756 |
< |
{ |
| 757 |
< |
tau -= outProduct(idatPrv.d, fg); |
| 758 |
< |
} |
| 775 |
> |
tauLcl -= outProduct(idatPrv.d, fg); |
| 776 |
|
} |
| 777 |
|
|
| 778 |
|
for (vector<int>::iterator ia = atomListRow.begin(); ia != atomListRow.end(); ++ia) |
| 782 |
|
// fg is the force on atom ia due to cutoff group's |
| 783 |
|
// presence in switching region |
| 784 |
|
fg = swderiv * d_grp * mf; |
| 785 |
< |
fDecomp_->addForceToAtomRowOMP(atom1, fg); |
| 785 |
> |
#pragma omp critical (forceLck) |
| 786 |
> |
{ |
| 787 |
> |
fDecomp_->addForceToAtomRow(atom1, fg); |
| 788 |
> |
} |
| 789 |
|
|
| 790 |
|
if (atomListRow.size() > 1) |
| 791 |
|
{ |
| 794 |
|
// find the distance between the atom |
| 795 |
|
// and the center of the cutoff group: |
| 796 |
|
dag = fDecomp_->getAtomToGroupVectorRow(atom1, (*cg1)->getGlobalIndex()); |
| 797 |
< |
#pragma omp critical |
| 778 |
< |
{ |
| 779 |
< |
tau -= outProduct(dag, fg); |
| 780 |
< |
} |
| 797 |
> |
tauLcl -= outProduct(dag, fg); |
| 798 |
|
} |
| 799 |
|
} |
| 800 |
|
} |
| 805 |
|
// fg is the force on atom jb due to cutoff group's |
| 806 |
|
// presence in switching region |
| 807 |
|
fg = -swderiv * d_grp * mf; |
| 808 |
< |
fDecomp_->addForceToAtomColumn(atom2, fg); |
| 808 |
> |
#pragma omp critical (forceLck) |
| 809 |
> |
{ |
| 810 |
> |
fDecomp_->addForceToAtomColumn(atom2, fg); |
| 811 |
> |
} |
| 812 |
|
|
| 813 |
|
if (atomListColumn.size() > 1) |
| 814 |
|
{ |
| 817 |
|
// find the distance between the atom |
| 818 |
|
// and the center of the cutoff group: |
| 819 |
|
dag = fDecomp_->getAtomToGroupVectorColumn(atom2, (*cg2)->getGlobalIndex()); |
| 820 |
< |
#pragma omp critical |
| 801 |
< |
{ |
| 802 |
< |
tau -= outProduct(dag, fg); |
| 803 |
< |
} |
| 820 |
> |
tauLcl -= outProduct(dag, fg); |
| 821 |
|
} |
| 822 |
|
} |
| 823 |
|
} |
| 829 |
|
} |
| 830 |
|
} |
| 831 |
|
}// END: omp for loop |
| 832 |
< |
// printf("after omp loop\n"); |
| 833 |
< |
} |
| 832 |
> |
/* Critical region which accumulates forces from local to shared arrays */ |
| 833 |
> |
#pragma omp critical (forceAdd) |
| 834 |
> |
{ |
| 835 |
> |
for (int currAtom = 0; currAtom < config->force.size(); ++currAtom) |
| 836 |
> |
{ |
| 837 |
> |
config->force[currAtom] += forceLcl[currAtom]; |
| 838 |
> |
} |
| 839 |
|
|
| 840 |
+ |
tau -= tauLcl; |
| 841 |
+ |
*(fDecomp_->getPairwisePotential()) += potLcl; |
| 842 |
+ |
} |
| 843 |
+ |
}// END: omp parallel region |
| 844 |
+ |
|
| 845 |
+ |
/*gettimeofday(&tv2, NULL); |
| 846 |
+ |
|
| 847 |
+ |
elapsedTime += 1000000 * (tv2.tv_sec - tv.tv_sec) |
| 848 |
+ |
+ (tv2.tv_usec - tv.tv_usec); |
| 849 |
+ |
|
| 850 |
+ |
Globals *simParams_ = info_->getSimParams(); |
| 851 |
+ |
|
| 852 |
+ |
if(curSnapshot->getTime() >= simParams_->getRunTime() - 1) |
| 853 |
+ |
{ |
| 854 |
+ |
printf("Force calculation time: %ld [us]\n", elapsedTime); |
| 855 |
+ |
}*/ |
| 856 |
+ |
|
| 857 |
|
if (iLoop == PREPAIR_LOOP) |
| 858 |
|
{ |
| 859 |
|
if (info_->requiresPrepair()) |