--- trunk/src/rnemd/RNEMD.cpp	2013/11/01 19:31:41	1940
+++ trunk/src/rnemd/RNEMD.cpp	2014/02/28 13:25:13	1971
@@ -424,10 +424,10 @@ namespace OpenMD {
       OutputData angularVelocity;
       angularVelocity.units = "angstroms^2/fs";
       angularVelocity.title =  "AngularVelocity";  
-      angularVelocity.dataType = "RealType";
+      angularVelocity.dataType = "Vector3d";
       angularVelocity.accumulator.reserve(nBins_);
       for (int i = 0; i < nBins_; i++) 
-        angularVelocity.accumulator.push_back( new Accumulator() );
+        angularVelocity.accumulator.push_back( new VectorAccumulator() );
       data_[ANGULARVELOCITY] = angularVelocity;
       outputMap_["ANGULARVELOCITY"] = ANGULARVELOCITY;
 
@@ -623,11 +623,11 @@ namespace OpenMD {
     StuntDouble* sd;
 
     RealType min_val;
-    bool min_found = false;   
+    int min_found = 0;   
     StuntDouble* min_sd;
 
     RealType max_val;
-    bool max_found = false;
+    int max_found = 0;
     StuntDouble* max_sd;
 
     for (sd = seleManA_.beginSelected(selei); sd != NULL; 
@@ -682,7 +682,7 @@ namespace OpenMD {
       if (!max_found) {
         max_val = value;
         max_sd = sd;
-        max_found = true;
+        max_found = 1;
       } else {
         if (max_val < value) {
           max_val = value;
@@ -744,7 +744,7 @@ namespace OpenMD {
       if (!min_found) {
         min_val = value;
         min_sd = sd;
-        min_found = true;
+        min_found = 1;
       } else {
         if (min_val > value) {
           min_val = value;
@@ -754,15 +754,18 @@ namespace OpenMD {
     }
     
 #ifdef IS_MPI    
-    int worldRank = MPI::COMM_WORLD.Get_rank();
-    
-    bool my_min_found = min_found;
-    bool my_max_found = max_found;
+    int worldRank;
+    MPI_Comm_rank( MPI_COMM_WORLD, &worldRank);
+         
+    int my_min_found = min_found;
+    int my_max_found = max_found;
 
     // Even if we didn't find a minimum, did someone else?
-    MPI::COMM_WORLD.Allreduce(&my_min_found, &min_found, 1, MPI::BOOL, MPI::LOR);
+    MPI_Allreduce(&my_min_found, &min_found, 1, MPI_INT, MPI_LOR, 
+                  MPI_COMM_WORLD);
     // Even if we didn't find a maximum, did someone else?
-    MPI::COMM_WORLD.Allreduce(&my_max_found, &max_found, 1, MPI::BOOL, MPI::LOR);
+    MPI_Allreduce(&my_max_found, &max_found, 1, MPI_INT, MPI_LOR, 
+                  MPI_COMM_WORLD);
 #endif
 
     if (max_found && min_found) {
@@ -781,8 +784,8 @@ namespace OpenMD {
       min_vals.rank = worldRank;    
       
       // Who had the minimum?
-      MPI::COMM_WORLD.Allreduce(&min_vals, &min_vals, 
-                                1, MPI::REALTYPE_INT, MPI::MINLOC);
+      MPI_Allreduce(&min_vals, &min_vals, 
+                    1, MPI_REALTYPE_INT, MPI_MINLOC, MPI_COMM_WORLD);
       min_val = min_vals.val;
       
       if (my_max_found) {
@@ -793,8 +796,8 @@ namespace OpenMD {
       max_vals.rank = worldRank;    
       
       // Who had the maximum?
-      MPI::COMM_WORLD.Allreduce(&max_vals, &max_vals, 
-                                1, MPI::REALTYPE_INT, MPI::MAXLOC);
+      MPI_Allreduce(&max_vals, &max_vals, 
+                    1, MPI_REALTYPE_INT, MPI_MAXLOC, MPI_COMM_WORLD);
       max_val = max_vals.val;
 #endif
       
@@ -854,13 +857,13 @@ namespace OpenMD {
           
           Vector3d min_vel;
           Vector3d max_vel = max_sd->getVel();
-          MPI::Status status;
+          MPI_Status* status;
 
           // point-to-point swap of the velocity vector
-          MPI::COMM_WORLD.Sendrecv(max_vel.getArrayPointer(), 3, MPI::REALTYPE,
-                                   min_vals.rank, 0, 
-                                   min_vel.getArrayPointer(), 3, MPI::REALTYPE,
-                                   min_vals.rank, 0, status);
+          MPI_Sendrecv(max_vel.getArrayPointer(), 3, MPI_REALTYPE,
+                       min_vals.rank, 0, 
+                       min_vel.getArrayPointer(), 3, MPI_REALTYPE,
+                       min_vals.rank, 0, MPI_COMM_WORLD, status);
           
           switch(rnemdFluxType_) {
           case rnemdKE :
@@ -871,11 +874,11 @@ namespace OpenMD {
               Vector3d max_angMom = max_sd->getJ();
               
               // point-to-point swap of the angular momentum vector
-              MPI::COMM_WORLD.Sendrecv(max_angMom.getArrayPointer(), 3, 
-                                       MPI::REALTYPE, min_vals.rank, 1, 
-                                       min_angMom.getArrayPointer(), 3, 
-                                       MPI::REALTYPE, min_vals.rank, 1, 
-                                       status);
+              MPI_Sendrecv(max_angMom.getArrayPointer(), 3, 
+                           MPI_REALTYPE, min_vals.rank, 1, 
+                           min_angMom.getArrayPointer(), 3, 
+                           MPI_REALTYPE, min_vals.rank, 1, 
+                           MPI_COMM_WORLD, status);
               
               max_sd->setJ(min_angMom);
 	    }
@@ -900,13 +903,13 @@ namespace OpenMD {
           
           Vector3d max_vel;
           Vector3d min_vel = min_sd->getVel();
-          MPI::Status status;
+          MPI_Status* status;
           
           // point-to-point swap of the velocity vector
-          MPI::COMM_WORLD.Sendrecv(min_vel.getArrayPointer(), 3, MPI::REALTYPE,
-                                   max_vals.rank, 0, 
-                                   max_vel.getArrayPointer(), 3, MPI::REALTYPE,
-                                   max_vals.rank, 0, status);
+          MPI_Sendrecv(min_vel.getArrayPointer(), 3, MPI_REALTYPE,
+                       max_vals.rank, 0, 
+                       max_vel.getArrayPointer(), 3, MPI_REALTYPE,
+                       max_vals.rank, 0, MPI_COMM_WORLD, status);
           
           switch(rnemdFluxType_) {
           case rnemdKE :
@@ -917,11 +920,11 @@ namespace OpenMD {
               Vector3d max_angMom;
               
               // point-to-point swap of the angular momentum vector
-              MPI::COMM_WORLD.Sendrecv(min_angMom.getArrayPointer(), 3, 
-                                       MPI::REALTYPE, max_vals.rank, 1, 
-                                       max_angMom.getArrayPointer(), 3, 
-                                       MPI::REALTYPE, max_vals.rank, 1, 
-                                       status);
+              MPI_Sendrecv(min_angMom.getArrayPointer(), 3, 
+                           MPI_REALTYPE, max_vals.rank, 1, 
+                           max_angMom.getArrayPointer(), 3, 
+                           MPI_REALTYPE, max_vals.rank, 1, 
+                           MPI_COMM_WORLD, status);
               
               min_sd->setJ(max_angMom);
             }
@@ -1090,22 +1093,22 @@ namespace OpenMD {
     Kcw *= 0.5;
 
 #ifdef IS_MPI
-    MPI::COMM_WORLD.Allreduce(MPI::IN_PLACE, &Phx, 1, MPI::REALTYPE, MPI::SUM);
-    MPI::COMM_WORLD.Allreduce(MPI::IN_PLACE, &Phy, 1, MPI::REALTYPE, MPI::SUM);
-    MPI::COMM_WORLD.Allreduce(MPI::IN_PLACE, &Phz, 1, MPI::REALTYPE, MPI::SUM);
-    MPI::COMM_WORLD.Allreduce(MPI::IN_PLACE, &Pcx, 1, MPI::REALTYPE, MPI::SUM);
-    MPI::COMM_WORLD.Allreduce(MPI::IN_PLACE, &Pcy, 1, MPI::REALTYPE, MPI::SUM);
-    MPI::COMM_WORLD.Allreduce(MPI::IN_PLACE, &Pcz, 1, MPI::REALTYPE, MPI::SUM);
+    MPI_Allreduce(MPI_IN_PLACE, &Phx, 1, MPI_REALTYPE, MPI_SUM, MPI_COMM_WORLD);
+    MPI_Allreduce(MPI_IN_PLACE, &Phy, 1, MPI_REALTYPE, MPI_SUM, MPI_COMM_WORLD);
+    MPI_Allreduce(MPI_IN_PLACE, &Phz, 1, MPI_REALTYPE, MPI_SUM, MPI_COMM_WORLD);
+    MPI_Allreduce(MPI_IN_PLACE, &Pcx, 1, MPI_REALTYPE, MPI_SUM, MPI_COMM_WORLD);
+    MPI_Allreduce(MPI_IN_PLACE, &Pcy, 1, MPI_REALTYPE, MPI_SUM, MPI_COMM_WORLD);
+    MPI_Allreduce(MPI_IN_PLACE, &Pcz, 1, MPI_REALTYPE, MPI_SUM, MPI_COMM_WORLD);
 
-    MPI::COMM_WORLD.Allreduce(MPI::IN_PLACE, &Khx, 1, MPI::REALTYPE, MPI::SUM);
-    MPI::COMM_WORLD.Allreduce(MPI::IN_PLACE, &Khy, 1, MPI::REALTYPE, MPI::SUM);
-    MPI::COMM_WORLD.Allreduce(MPI::IN_PLACE, &Khz, 1, MPI::REALTYPE, MPI::SUM);
-    MPI::COMM_WORLD.Allreduce(MPI::IN_PLACE, &Khw, 1, MPI::REALTYPE, MPI::SUM);
+    MPI_Allreduce(MPI_IN_PLACE, &Khx, 1, MPI_REALTYPE, MPI_SUM, MPI_COMM_WORLD);
+    MPI_Allreduce(MPI_IN_PLACE, &Khy, 1, MPI_REALTYPE, MPI_SUM, MPI_COMM_WORLD);
+    MPI_Allreduce(MPI_IN_PLACE, &Khz, 1, MPI_REALTYPE, MPI_SUM, MPI_COMM_WORLD);
+    MPI_Allreduce(MPI_IN_PLACE, &Khw, 1, MPI_REALTYPE, MPI_SUM, MPI_COMM_WORLD);
 
-    MPI::COMM_WORLD.Allreduce(MPI::IN_PLACE, &Kcx, 1, MPI::REALTYPE, MPI::SUM);
-    MPI::COMM_WORLD.Allreduce(MPI::IN_PLACE, &Kcy, 1, MPI::REALTYPE, MPI::SUM);
-    MPI::COMM_WORLD.Allreduce(MPI::IN_PLACE, &Kcz, 1, MPI::REALTYPE, MPI::SUM);
-    MPI::COMM_WORLD.Allreduce(MPI::IN_PLACE, &Kcw, 1, MPI::REALTYPE, MPI::SUM);
+    MPI_Allreduce(MPI_IN_PLACE, &Kcx, 1, MPI_REALTYPE, MPI_SUM, MPI_COMM_WORLD);
+    MPI_Allreduce(MPI_IN_PLACE, &Kcy, 1, MPI_REALTYPE, MPI_SUM, MPI_COMM_WORLD);
+    MPI_Allreduce(MPI_IN_PLACE, &Kcz, 1, MPI_REALTYPE, MPI_SUM, MPI_COMM_WORLD);
+    MPI_Allreduce(MPI_IN_PLACE, &Kcw, 1, MPI_REALTYPE, MPI_SUM, MPI_COMM_WORLD);
 #endif
 
     //solve coldBin coeff's first
@@ -1582,18 +1585,22 @@ namespace OpenMD {
     Kc *= 0.5;
     
 #ifdef IS_MPI
-    MPI::COMM_WORLD.Allreduce(MPI::IN_PLACE, &Ph[0], 3, MPI::REALTYPE, MPI::SUM);
-    MPI::COMM_WORLD.Allreduce(MPI::IN_PLACE, &Pc[0], 3, MPI::REALTYPE, MPI::SUM);
-    MPI::COMM_WORLD.Allreduce(MPI::IN_PLACE, &Lh[0], 3, MPI::REALTYPE, MPI::SUM);
-    MPI::COMM_WORLD.Allreduce(MPI::IN_PLACE, &Lc[0], 3, MPI::REALTYPE, MPI::SUM);
-    MPI::COMM_WORLD.Allreduce(MPI::IN_PLACE, &Mh, 1, MPI::REALTYPE, MPI::SUM);
-    MPI::COMM_WORLD.Allreduce(MPI::IN_PLACE, &Kh, 1, MPI::REALTYPE, MPI::SUM);
-    MPI::COMM_WORLD.Allreduce(MPI::IN_PLACE, &Mc, 1, MPI::REALTYPE, MPI::SUM);
-    MPI::COMM_WORLD.Allreduce(MPI::IN_PLACE, &Kc, 1, MPI::REALTYPE, MPI::SUM);
-    MPI::COMM_WORLD.Allreduce(MPI::IN_PLACE, Ih.getArrayPointer(), 9, 
-                              MPI::REALTYPE, MPI::SUM);
-    MPI::COMM_WORLD.Allreduce(MPI::IN_PLACE, Ic.getArrayPointer(), 9, 
-                              MPI::REALTYPE, MPI::SUM);
+    MPI_Allreduce(MPI_IN_PLACE, &Ph[0], 3, MPI_REALTYPE, MPI_SUM, 
+                  MPI_COMM_WORLD);
+    MPI_Allreduce(MPI_IN_PLACE, &Pc[0], 3, MPI_REALTYPE, MPI_SUM, 
+                  MPI_COMM_WORLD);
+    MPI_Allreduce(MPI_IN_PLACE, &Lh[0], 3, MPI_REALTYPE, MPI_SUM, 
+                  MPI_COMM_WORLD);
+    MPI_Allreduce(MPI_IN_PLACE, &Lc[0], 3, MPI_REALTYPE, MPI_SUM, 
+                  MPI_COMM_WORLD);
+    MPI_Allreduce(MPI_IN_PLACE, &Mh, 1, MPI_REALTYPE, MPI_SUM, MPI_COMM_WORLD);
+    MPI_Allreduce(MPI_IN_PLACE, &Kh, 1, MPI_REALTYPE, MPI_SUM, MPI_COMM_WORLD);
+    MPI_Allreduce(MPI_IN_PLACE, &Mc, 1, MPI_REALTYPE, MPI_SUM, MPI_COMM_WORLD);
+    MPI_Allreduce(MPI_IN_PLACE, &Kc, 1, MPI_REALTYPE, MPI_SUM, MPI_COMM_WORLD);
+    MPI_Allreduce(MPI_IN_PLACE, Ih.getArrayPointer(), 9, 
+                  MPI_REALTYPE, MPI_SUM, MPI_COMM_WORLD);
+    MPI_Allreduce(MPI_IN_PLACE, Ic.getArrayPointer(), 9, 
+                  MPI_REALTYPE, MPI_SUM, MPI_COMM_WORLD);
 #endif
     
 
@@ -1882,12 +1889,19 @@ namespace OpenMD {
     int selei(0);
     StuntDouble* sd;
     int binNo;
+    RealType mass;
+    Vector3d vel; 
+    Vector3d rPos;
+    RealType KE;
+    Vector3d L;
+    Mat3x3d I;
+    RealType r2;
 
     vector<RealType> binMass(nBins_, 0.0);
-    vector<RealType> binPx(nBins_, 0.0);
-    vector<RealType> binPy(nBins_, 0.0);
-    vector<RealType> binPz(nBins_, 0.0);
+    vector<Vector3d> binP(nBins_, V3Zero);
     vector<RealType> binOmega(nBins_, 0.0);
+    vector<Vector3d> binL(nBins_, V3Zero);
+    vector<Mat3x3d>  binI(nBins_);
     vector<RealType> binKE(nBins_, 0.0);
     vector<int> binDOF(nBins_, 0);
     vector<int> binCount(nBins_, 0);
@@ -1927,44 +1941,50 @@ namespace OpenMD {
         binNo = int(rPos.length() / binWidth_);
       }
 
+      mass = sd->getMass();
+      vel = sd->getVel();
+      rPos = sd->getPos() - coordinateOrigin_;
+      KE = 0.5 * mass * vel.lengthSquare();
+      L = mass * cross(rPos, vel);
+      I = outProduct(rPos, rPos) * mass;
+      r2 = rPos.lengthSquare();
+      I(0, 0) += mass * r2;
+      I(1, 1) += mass * r2;
+      I(2, 2) += mass * r2;
 
-      RealType mass = sd->getMass();
-      Vector3d vel = sd->getVel();
-      Vector3d rPos = sd->getPos() - coordinateOrigin_;
       // Project the relative position onto a plane perpendicular to
       // the angularMomentumFluxVector:
-      Vector3d rProj = rPos - dot(rPos, u) * u;
+      // Vector3d rProj = rPos - dot(rPos, u) * u;
       // Project the velocity onto a plane perpendicular to the
       // angularMomentumFluxVector:
-      Vector3d vProj = vel  - dot(vel, u) * u;
+      // Vector3d vProj = vel  - dot(vel, u) * u;
       // Compute angular velocity vector (should be nearly parallel to
       // angularMomentumFluxVector
-      Vector3d aVel = cross(rProj, vProj);
-      
+      // Vector3d aVel = cross(rProj, vProj);
+
       if (binNo >= 0 && binNo < nBins_)  {
         binCount[binNo]++;
         binMass[binNo] += mass;
-        binPx[binNo] += mass*vel.x();
-        binPy[binNo] += mass*vel.y();
-        binPz[binNo] += mass*vel.z();
-        binOmega[binNo] += dot(aVel, u);
-        binKE[binNo] += 0.5 * (mass * vel.lengthSquare());
+        binP[binNo] += mass*vel;
+        binKE[binNo] += KE;
+        binI[binNo] += I;
+        binL[binNo] += L;
         binDOF[binNo] += 3;
         
         if (sd->isDirectional()) {
           Vector3d angMom = sd->getJ();
-          Mat3x3d I = sd->getI();
+          Mat3x3d Ia = sd->getI();
           if (sd->isLinear()) {
             int i = sd->linearAxis();
             int j = (i + 1) % 3;
             int k = (i + 2) % 3;
-            binKE[binNo] += 0.5 * (angMom[j] * angMom[j] / I(j, j) + 
-                                   angMom[k] * angMom[k] / I(k, k));
+            binKE[binNo] += 0.5 * (angMom[j] * angMom[j] / Ia(j, j) + 
+                                   angMom[k] * angMom[k] / Ia(k, k));
             binDOF[binNo] += 2;
           } else {
-            binKE[binNo] += 0.5 * (angMom[0] * angMom[0] / I(0, 0) +
-                                   angMom[1] * angMom[1] / I(1, 1) +
-                                   angMom[2] * angMom[2] / I(2, 2));
+            binKE[binNo] += 0.5 * (angMom[0] * angMom[0] / Ia(0, 0) +
+                                   angMom[1] * angMom[1] / Ia(1, 1) +
+                                   angMom[2] * angMom[2] / Ia(2, 2));
             binDOF[binNo] += 3;
           }
         }
@@ -1972,26 +1992,30 @@ namespace OpenMD {
     }
     
 #ifdef IS_MPI
-    MPI::COMM_WORLD.Allreduce(MPI::IN_PLACE, &binCount[0],
-			      nBins_, MPI::INT, MPI::SUM);
-    MPI::COMM_WORLD.Allreduce(MPI::IN_PLACE, &binMass[0],
-			      nBins_, MPI::REALTYPE, MPI::SUM);
-    MPI::COMM_WORLD.Allreduce(MPI::IN_PLACE, &binPx[0],
-			      nBins_, MPI::REALTYPE, MPI::SUM);
-    MPI::COMM_WORLD.Allreduce(MPI::IN_PLACE, &binPy[0],
-			      nBins_, MPI::REALTYPE, MPI::SUM);
-    MPI::COMM_WORLD.Allreduce(MPI::IN_PLACE, &binPz[0],
-			      nBins_, MPI::REALTYPE, MPI::SUM);
-    MPI::COMM_WORLD.Allreduce(MPI::IN_PLACE, &binOmega[0],
-			      nBins_, MPI::REALTYPE, MPI::SUM);
-    MPI::COMM_WORLD.Allreduce(MPI::IN_PLACE, &binKE[0],
-			      nBins_, MPI::REALTYPE, MPI::SUM);
-    MPI::COMM_WORLD.Allreduce(MPI::IN_PLACE, &binDOF[0],
-			      nBins_, MPI::INT, MPI::SUM);
+
+    for (int i = 0; i < nBins_; i++) {
+      
+      MPI_Allreduce(MPI_IN_PLACE, &binCount[i],
+                    1, MPI_INT, MPI_SUM, MPI_COMM_WORLD);
+      MPI_Allreduce(MPI_IN_PLACE, &binMass[i],
+                    1, MPI_REALTYPE, MPI_SUM, MPI_COMM_WORLD);
+      MPI_Allreduce(MPI_IN_PLACE, binP[i].getArrayPointer(),
+                    3, MPI_REALTYPE, MPI_SUM, MPI_COMM_WORLD);
+      MPI_Allreduce(MPI_IN_PLACE, binL[i].getArrayPointer(),
+                    3, MPI_REALTYPE, MPI_SUM, MPI_COMM_WORLD);
+      MPI_Allreduce(MPI_IN_PLACE, binI[i].getArrayPointer(),
+                    9, MPI_REALTYPE, MPI_SUM, MPI_COMM_WORLD);
+      MPI_Allreduce(MPI_IN_PLACE, &binKE[i],
+                    1, MPI_REALTYPE, MPI_SUM, MPI_COMM_WORLD);
+      MPI_Allreduce(MPI_IN_PLACE, &binDOF[i],
+                    1, MPI_INT, MPI_SUM, MPI_COMM_WORLD);
+      //MPI_Allreduce(MPI_IN_PLACE, &binOmega[i],
+      //                          1, MPI_REALTYPE, MPI_SUM, MPI_COMM_WORLD);
+    }
+    
 #endif
 
-    Vector3d vel;
-    RealType omega;
+    Vector3d omega;
     RealType den;
     RealType temp;
     RealType z;
@@ -2008,10 +2032,11 @@ namespace OpenMD {
         den = binMass[i] * 3.0 * PhysicalConstants::densityConvert
           / (4.0 * M_PI * (pow(router,3) - pow(rinner,3)));
       }
-      vel.x() = binPx[i] / binMass[i];
-      vel.y() = binPy[i] / binMass[i];
-      vel.z() = binPz[i] / binMass[i];
-      omega = binOmega[i] / binCount[i];
+      vel = binP[i] / binMass[i];
+
+      omega = binI[i].inverse() * binL[i];
+
+      // omega = binOmega[i] / binCount[i];
 
       if (binCount[i] > 0) {
         // only add values if there are things to add
@@ -2034,7 +2059,7 @@ namespace OpenMD {
               dynamic_cast<VectorAccumulator *>(data_[j].accumulator[i])->add(vel);
               break;
             case ANGULARVELOCITY:  
-              dynamic_cast<Accumulator *>(data_[j].accumulator[i])->add(omega);
+              dynamic_cast<VectorAccumulator *>(data_[j].accumulator[i])->add(omega);
               break;
             case DENSITY:
               dynamic_cast<Accumulator *>(data_[j].accumulator[i])->add(den);
@@ -2081,7 +2106,9 @@ namespace OpenMD {
     
 #ifdef IS_MPI
     // If we're the root node, should we print out the results
-    int worldRank = MPI::COMM_WORLD.Get_rank();
+    int worldRank;
+    MPI_Comm_rank( MPI_COMM_WORLD, &worldRank);
+
     if (worldRank == 0) {
 #endif
       rnemdFile_.open(rnemdFileName_.c_str(), std::ios::out | std::ios::trunc );