| 1 | /* | 
| 2 | * Copyright (c) 2005 The University of Notre Dame. All Rights Reserved. | 
| 3 | * | 
| 4 | * The University of Notre Dame grants you ("Licensee") a | 
| 5 | * non-exclusive, royalty free, license to use, modify and | 
| 6 | * redistribute this software in source and binary code form, provided | 
| 7 | * that the following conditions are met: | 
| 8 | * | 
| 9 | * 1. Acknowledgement of the program authors must be made in any | 
| 10 | *    publication of scientific results based in part on use of the | 
| 11 | *    program.  An acceptable form of acknowledgement is citation of | 
| 12 | *    the article in which the program was described (Matthew | 
| 13 | *    A. Meineke, Charles F. Vardeman II, Teng Lin, Christopher | 
| 14 | *    J. Fennell and J. Daniel Gezelter, "OOPSE: An Object-Oriented | 
| 15 | *    Parallel Simulation Engine for Molecular Dynamics," | 
| 16 | *    J. Comput. Chem. 26, pp. 252-271 (2005)) | 
| 17 | * | 
| 18 | * 2. Redistributions of source code must retain the above copyright | 
| 19 | *    notice, this list of conditions and the following disclaimer. | 
| 20 | * | 
| 21 | * 3. Redistributions in binary form must reproduce the above copyright | 
| 22 | *    notice, this list of conditions and the following disclaimer in the | 
| 23 | *    documentation and/or other materials provided with the | 
| 24 | *    distribution. | 
| 25 | * | 
| 26 | * This software is provided "AS IS," without a warranty of any | 
| 27 | * kind. All express or implied conditions, representations and | 
| 28 | * warranties, including any implied warranty of merchantability, | 
| 29 | * fitness for a particular purpose or non-infringement, are hereby | 
| 30 | * excluded.  The University of Notre Dame and its licensors shall not | 
| 31 | * be liable for any damages suffered by licensee as a result of | 
| 32 | * using, modifying or distributing the software or its | 
| 33 | * derivatives. In no event will the University of Notre Dame or its | 
| 34 | * licensors be liable for any lost revenue, profit or data, or for | 
| 35 | * direct, indirect, special, consequential, incidental or punitive | 
| 36 | * damages, however caused and regardless of the theory of liability, | 
| 37 | * arising out of the use of or inability to use software, even if the | 
| 38 | * University of Notre Dame has been advised of the possibility of | 
| 39 | * such damages. | 
| 40 | */ | 
| 41 |  | 
| 42 | #include <sys/time.h> | 
| 43 | #include <string.h> | 
| 44 | #include <stdio.h> | 
| 45 | #include <stdlib.h> | 
| 46 | #include <math.h> | 
| 47 |  | 
| 48 | #ifdef IS_MPI | 
| 49 | #include <mpi.h> | 
| 50 |  | 
| 51 | #include "brains/mpiSimulation.hpp" | 
| 52 | #endif //is_mpi | 
| 53 |  | 
| 54 | #include "config.h" | 
| 55 | #include "utils/simError.h" | 
| 56 | #include "profiling/mdProfile.hpp" | 
| 57 |  | 
| 58 | namespace mdProfileSpace { | 
| 59 |  | 
| 60 | class ProfileString{ | 
| 61 | public: | 
| 62 | char myName[MAX_PROFILE_NAMELENGTH]; | 
| 63 | }; | 
| 64 |  | 
| 65 | ProfileString theNames[N_PROFILES]; | 
| 66 |  | 
| 67 | struct timeval startTime[N_PROFILES]; | 
| 68 | struct timeval endTime[N_PROFILES]; | 
| 69 |  | 
| 70 | double accumTime[N_PROFILES]; | 
| 71 |  | 
| 72 | #ifdef IS_MPI | 
| 73 | double globalTime[N_PROFILES]; | 
| 74 | #endif //is_mpi | 
| 75 |  | 
| 76 |  | 
| 77 | } | 
| 78 |  | 
| 79 | extern "C"{ | 
| 80 |  | 
| 81 | void F90_FUNC(gettimes, GETTIMES)(double* forceTime, | 
| 82 | double* commTime); | 
| 83 | } | 
| 84 |  | 
| 85 |  | 
| 86 | using namespace mdProfileSpace; | 
| 87 |  | 
| 88 |  | 
| 89 | void initProfile( void ){ | 
| 90 |  | 
| 91 | int i; | 
| 92 |  | 
| 93 | for( i=0;i<N_PROFILES;i++ ){ | 
| 94 |  | 
| 95 | accumTime[i] = 0.0; | 
| 96 |  | 
| 97 | #ifdef IS_MPI | 
| 98 | globalTime[i] = 0.0; | 
| 99 | #endif //is_mpi | 
| 100 | } | 
| 101 |  | 
| 102 | strncpy( theNames[pro1].myName, "Integrator->integrateStep()", MAX_PROFILE_NAMELENGTH ); | 
| 103 | strncpy( theNames[pro2].myName, "Integrator->writes and stats", MAX_PROFILE_NAMELENGTH ); | 
| 104 | strncpy( theNames[pro3].myName, "Integrator->preMove", MAX_PROFILE_NAMELENGTH ); | 
| 105 | strncpy( theNames[pro4].myName, "Integrator->moveA", MAX_PROFILE_NAMELENGTH ); | 
| 106 | strncpy( theNames[pro5].myName, "Integrator->CalcForce", MAX_PROFILE_NAMELENGTH ); | 
| 107 | strncpy( theNames[pro6].myName, "Integrator->moveB", MAX_PROFILE_NAMELENGTH ); | 
| 108 | strncpy( theNames[pro7].myName, "shortRange force calc", MAX_PROFILE_NAMELENGTH ); | 
| 109 | strncpy( theNames[pro8].myName, "fortran force calc", MAX_PROFILE_NAMELENGTH ); | 
| 110 | } | 
| 111 |  | 
| 112 |  | 
| 113 | void startProfile( proNames theProfile ){ | 
| 114 | struct timezone tz; | 
| 115 |  | 
| 116 | gettimeofday( &startTime[theProfile], &tz ); | 
| 117 | } | 
| 118 |  | 
| 119 | void endProfile( proNames theProfile ){ | 
| 120 | struct timezone tz; | 
| 121 | double startVal, endVal; | 
| 122 |  | 
| 123 | gettimeofday( &endTime[theProfile], &tz ); | 
| 124 |  | 
| 125 | startVal = (double)startTime[theProfile].tv_sec | 
| 126 | + (double)startTime[theProfile].tv_usec / 1000000.0; | 
| 127 |  | 
| 128 | endVal = (double)endTime[theProfile].tv_sec | 
| 129 | + (double)endTime[theProfile].tv_usec / 1000000.0; | 
| 130 |  | 
| 131 | accumTime[theProfile] += endVal - startVal; | 
| 132 | } | 
| 133 |  | 
| 134 |  | 
| 135 | void writeProfiles( void ){ | 
| 136 |  | 
| 137 | int i; | 
| 138 | double totalTime; | 
| 139 | double percentTime[N_PROFILES]; | 
| 140 | int days, hours, minutes, secs, msecs; | 
| 141 | double donkey; | 
| 142 |  | 
| 143 | double forceTime, commTime; | 
| 144 |  | 
| 145 | #ifdef IS_MPI | 
| 146 | int j; | 
| 147 |  | 
| 148 | MPI_Status istatus; | 
| 149 |  | 
| 150 | double nodeTime, nodeForceTime, nodeCommTime; | 
| 151 | double nodeAccum[N_PROFILES]; | 
| 152 | double nodePercent[N_PROFILES]; | 
| 153 |  | 
| 154 | double globalTime, globalForceTime, globalCommTime; | 
| 155 | double globalAccum[N_PROFILES]; | 
| 156 | double globalPercent[N_PROFILES]; | 
| 157 | #endif // is_mpi | 
| 158 |  | 
| 159 |  | 
| 160 | #ifndef IS_MPI // single processor version | 
| 161 |  | 
| 162 | totalTime = 0.0; | 
| 163 | for(i=0;i<N_PROFILES;i++) | 
| 164 | totalTime += accumTime[i]; | 
| 165 |  | 
| 166 | for(i=0;i<N_PROFILES;i++) | 
| 167 | percentTime[i] = accumTime[i] / totalTime; | 
| 168 |  | 
| 169 | fprintf(stdout, | 
| 170 | "  Time Spent      Percent Time                        Name\n" | 
| 171 | "--------------  ----------------   -----------------------------------------\n" | 
| 172 | ); | 
| 173 |  | 
| 174 | for(i=0;i<N_PROFILES;i++){ | 
| 175 | fprintf(stdout, | 
| 176 | " %12G    %14G     %40s\n", | 
| 177 | accumTime[i], | 
| 178 | percentTime[i], | 
| 179 | theNames[i].myName ); | 
| 180 | } | 
| 181 |  | 
| 182 | days = (int)floor( totalTime / 86400 ); | 
| 183 | donkey = totalTime - 86400 * days; | 
| 184 |  | 
| 185 | hours = (int)floor( donkey / 3600 ); | 
| 186 | donkey -= hours * 3600; | 
| 187 |  | 
| 188 | minutes = (int)floor( donkey / 60 ); | 
| 189 | donkey -= minutes * 60; | 
| 190 |  | 
| 191 | secs = (int)donkey; | 
| 192 | msecs = (int)( (donkey - secs) * 1000 ); | 
| 193 |  | 
| 194 | F90_FUNC(gettimes, GETTIMES)(&forceTime, &commTime); | 
| 195 |  | 
| 196 | fprintf( stdout, | 
| 197 | "----------------------------------------------------------------------------\n" | 
| 198 | "  Total Time = %03d:%02d:%02d:%02d.%03d ( %G sec )\n" | 
| 199 | "\n" | 
| 200 | "  From Fortran: forceTime = %G secs; communicationTime = %G secs.\n", | 
| 201 | days, | 
| 202 | hours, | 
| 203 | minutes, | 
| 204 | secs, | 
| 205 | msecs, | 
| 206 | totalTime, | 
| 207 | forceTime, | 
| 208 | commTime); | 
| 209 |  | 
| 210 | #else // the parrallel version | 
| 211 |  | 
| 212 | if( worldRank == 0 ){ | 
| 213 |  | 
| 214 | double *nodeTots = new double[mpiSim->getNProcessors()]; | 
| 215 | double *nodePercentTots = new double[mpiSim->getNProcessors()]; | 
| 216 |  | 
| 217 | totalTime = 0.0; | 
| 218 | for(i=0;i<N_PROFILES;i++) | 
| 219 | totalTime += accumTime[i]; | 
| 220 |  | 
| 221 | for(i=0;i<N_PROFILES;i++) | 
| 222 | percentTime[i] = accumTime[i] / totalTime; | 
| 223 |  | 
| 224 | fprintf(stdout, | 
| 225 | "\n" | 
| 226 | "----------------------------------------------------------------------------\n" | 
| 227 | "  Output from Node %d:   \n" | 
| 228 | "\n" | 
| 229 | "  Time Spent      Percent Time                        Name\n" | 
| 230 | "--------------  ----------------   -----------------------------------------\n", | 
| 231 | worldRank); | 
| 232 |  | 
| 233 | for(i=0;i<N_PROFILES;i++){ | 
| 234 | fprintf(stdout, | 
| 235 | " %12G    %14G     %40s\n", | 
| 236 | accumTime[i], | 
| 237 | percentTime[i], | 
| 238 | theNames[i].myName ); | 
| 239 | } | 
| 240 |  | 
| 241 | days = (int)floor( totalTime / 86400 ); | 
| 242 | donkey = totalTime - 86400 * days; | 
| 243 |  | 
| 244 | hours = (int)floor( donkey / 3600 ); | 
| 245 | donkey -= hours * 3600; | 
| 246 |  | 
| 247 | minutes = (int)floor( donkey / 60 ); | 
| 248 | donkey -= minutes * 60; | 
| 249 |  | 
| 250 | secs = (int)donkey; | 
| 251 | msecs = (int)( (donkey - secs) * 1000 ); | 
| 252 |  | 
| 253 | F90_FUNC(gettimes, GETTIMES)(&forceTime, &commTime); | 
| 254 |  | 
| 255 | fprintf( stdout, | 
| 256 | "----------------------------------------------------------------------------\n" | 
| 257 | "  Total Time = %03d:%02d:%02d:%02d.%03d ( %G sec )\n" | 
| 258 | "\n" | 
| 259 | "  From Fortran: forceTime = %G secs; communicationTime = %G secs.\n", | 
| 260 | days, | 
| 261 | hours, | 
| 262 | minutes, | 
| 263 | secs, | 
| 264 | msecs, | 
| 265 | totalTime, | 
| 266 | forceTime, | 
| 267 | commTime); | 
| 268 |  | 
| 269 | // now the rest of the nodes | 
| 270 |  | 
| 271 | nodeTots[0] = totalTime; | 
| 272 |  | 
| 273 | globalTime = totalTime; | 
| 274 | globalForceTime = forceTime; | 
| 275 | globalCommTime = commTime; | 
| 276 | for(i=0;i<N_PROFILES;i++) | 
| 277 | globalAccum[i] = accumTime[i]; | 
| 278 |  | 
| 279 |  | 
| 280 | for(j=1;j<mpiSim->getNProcessors();j++){ | 
| 281 |  | 
| 282 | nodeTime = 0.0; | 
| 283 |  | 
| 284 | MPI_Recv(nodeAccum, N_PROFILES, MPI_DOUBLE, j, | 
| 285 | 1, MPI_COMM_WORLD, &istatus ); | 
| 286 |  | 
| 287 | MPI_Recv(&nodeForceTime, 1, MPI_DOUBLE, j, | 
| 288 | 1, MPI_COMM_WORLD, &istatus ); | 
| 289 | MPI_Recv(&nodeCommTime, 1, MPI_DOUBLE, j, | 
| 290 | 1, MPI_COMM_WORLD, &istatus ); | 
| 291 |  | 
| 292 | for(i=0;i<N_PROFILES;i++){ | 
| 293 | nodeTime += nodeAccum[i]; | 
| 294 | } | 
| 295 |  | 
| 296 | for(i=0;i<N_PROFILES;i++) | 
| 297 | nodePercent[i] = nodeAccum[i] / nodeTime; | 
| 298 |  | 
| 299 | fprintf(stdout, | 
| 300 | "\n" | 
| 301 | "----------------------------------------------------------------------------\n" | 
| 302 | "  Output from Node %d:   \n" | 
| 303 | "\n" | 
| 304 | "  Time Spent      Percent Time                        Name\n" | 
| 305 | "--------------  ----------------   -----------------------------------------\n", | 
| 306 | j); | 
| 307 |  | 
| 308 | for(i=0;i<N_PROFILES;i++){ | 
| 309 | fprintf(stdout, | 
| 310 | " %12G    %14G     %40s\n", | 
| 311 | nodeAccum[i], | 
| 312 | nodePercent[i], | 
| 313 | theNames[i].myName ); | 
| 314 | } | 
| 315 |  | 
| 316 | days = (int)floor( nodeTime / 86400 ); | 
| 317 | donkey = nodeTime - 86400 * days; | 
| 318 |  | 
| 319 | hours = (int)floor( donkey / 3600 ); | 
| 320 | donkey -= hours * 3600; | 
| 321 |  | 
| 322 | minutes = (int)floor( donkey / 60 ); | 
| 323 | donkey -= minutes * 60; | 
| 324 |  | 
| 325 | secs = (int)donkey; | 
| 326 | msecs = (int)( (donkey - secs) * 1000 ); | 
| 327 |  | 
| 328 | fprintf( stdout, | 
| 329 | "----------------------------------------------------------------------------\n" | 
| 330 | "  Total Time = %03d:%02d:%02d:%02d.%03d ( %G sec )\n" | 
| 331 | "\n" | 
| 332 | "  From Fortran: forceTime = %G secs; communicationTime = %G secs.\n", | 
| 333 | days, | 
| 334 | hours, | 
| 335 | minutes, | 
| 336 | secs, | 
| 337 | msecs, | 
| 338 | nodeTime, | 
| 339 | nodeForceTime, | 
| 340 | nodeCommTime); | 
| 341 |  | 
| 342 | for(i=0;i<N_PROFILES;i++) | 
| 343 | globalAccum[i] += nodeAccum[i]; | 
| 344 |  | 
| 345 | globalTime += nodeTime; | 
| 346 | globalForceTime += nodeForceTime; | 
| 347 | globalCommTime += nodeCommTime; | 
| 348 | nodeTots[j] = nodeTime; | 
| 349 | } | 
| 350 |  | 
| 351 | // print out the totals | 
| 352 |  | 
| 353 | for(j=0;j<mpiSim->getNProcessors();j++) | 
| 354 | nodePercentTots[j] = nodeTots[j] / globalTime; | 
| 355 |  | 
| 356 | for(i=0;i<N_PROFILES;i++) | 
| 357 | globalPercent[i] = globalAccum[i] / globalTime; | 
| 358 |  | 
| 359 | fprintf(stdout, | 
| 360 | "\n" | 
| 361 | "----------------------------------------------------------------------------\n" | 
| 362 | "  Total Across Nodes\n" | 
| 363 | "\n" | 
| 364 | "  Time Spent      Percent Time                        Name\n" | 
| 365 | "--------------  ----------------   -----------------------------------------\n", | 
| 366 | j); | 
| 367 |  | 
| 368 | for(i=0;i<N_PROFILES;i++){ | 
| 369 | fprintf(stdout, | 
| 370 | " %12G    %14G     %40s\n", | 
| 371 | globalAccum[i], | 
| 372 | globalPercent[i], | 
| 373 | theNames[i].myName ); | 
| 374 | } | 
| 375 | fprintf(stdout, | 
| 376 | "\n" | 
| 377 | "\n" ); | 
| 378 |  | 
| 379 | for(j=0;j<mpiSim->getNProcessors();j++){ | 
| 380 |  | 
| 381 | fprintf(stdout, | 
| 382 | " %12G    %14G     node %d\n", | 
| 383 | nodeTots[j], | 
| 384 | nodePercentTots[j], | 
| 385 | j ); | 
| 386 | } | 
| 387 |  | 
| 388 | days = (int)floor( globalTime / 86400 ); | 
| 389 | donkey = nodeTime - 86400 * days; | 
| 390 |  | 
| 391 |  | 
| 392 | hours = (int)floor( donkey / 3600 ); | 
| 393 | donkey -= hours * 3600; | 
| 394 |  | 
| 395 | minutes = (int)floor( donkey / 60 ); | 
| 396 | donkey -= minutes * 60; | 
| 397 |  | 
| 398 | secs = (int)donkey; | 
| 399 | msecs = (int)( (donkey - secs) * 1000 ); | 
| 400 |  | 
| 401 | fprintf( stdout, | 
| 402 | "----------------------------------------------------------------------------\n" | 
| 403 | "  Total Time = %03d:%02d:%02d:%02d.%03d ( %G sec )\n" | 
| 404 | "\n" | 
| 405 | "  From Fortran: forceTime = %G secs; communicationTime = %G secs.\n", | 
| 406 | days, | 
| 407 | hours, | 
| 408 | minutes, | 
| 409 | secs, | 
| 410 | msecs, | 
| 411 | globalTime, | 
| 412 | globalForceTime, | 
| 413 | globalCommTime); | 
| 414 | } | 
| 415 |  | 
| 416 | else{ | 
| 417 |  | 
| 418 | for(j=1;j<mpiSim->getNProcessors();j++){ | 
| 419 |  | 
| 420 | if( worldRank == j ){ | 
| 421 |  | 
| 422 | F90_FUNC(gettimes, GETTIMES)(&forceTime, &commTime); | 
| 423 |  | 
| 424 | MPI_Send( accumTime, N_PROFILES, MPI_DOUBLE, 0, 1, MPI_COMM_WORLD ); | 
| 425 | MPI_Send( &forceTime, 1, MPI_DOUBLE, 0, 1, MPI_COMM_WORLD ); | 
| 426 | MPI_Send( &commTime, 1, MPI_DOUBLE, 0, 1, MPI_COMM_WORLD ); | 
| 427 | } | 
| 428 | } | 
| 429 | } | 
| 430 |  | 
| 431 | #endif // is_mpi | 
| 432 |  | 
| 433 |  | 
| 434 | } |