Scrambler  1
timing.f90
Go to the documentation of this file.
00001 !#########################################################################
00002 !               
00003 !    Copyright (C) 2003-2012 Department of Physics and Astronomy,
00004 !                            University of Rochester,
00005 !                            Rochester, NY
00006 !
00007 !    timing.f90 is part of AstroBEAR.
00008 !
00009 !    AstroBEAR is free software: you can redistribute it and/or modify    
00010 !    it under the terms of the GNU General Public License as published by 
00011 !    the Free Software Foundation, either version 3 of the License, or    
00012 !    (at your option) any later version.
00013 !
00014 !    AstroBEAR is distributed in the hope that it will be useful, 
00015 !    but WITHOUT ANY WARRANTY; without even the implied warranty of
00016 !    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
00017 !    GNU General Public License for more details.
00018 !
00019 !    You should have received a copy of the GNU General Public License
00020 !    along with AstroBEAR.  If not, see <http://www.gnu.org/licenses/>.
00021 !
00022 !#########################################################################
00023 MODULE Timing
00024    USE GlobalDeclarations
00025    USE HyperbolicDeclarations
00026    IMPLICIT NONE
00027    SAVE
00028    INTEGER, PARAMETER :: 
00029         iInitInfos                            = 1,  
00030         iProlongateParentsData                = 2,  
00031         iChildMaskOverlaps                    = 3,  
00032         iUpdateOverlaps                       = 4,  
00033         iApplyOverlaps                        = 5,  
00034         iAfterOverlaps                        = 6,  
00035         iParticleUpdate                       = 7,  
00036         iApplyPhysicalBCs                     = 8,  
00037         iSetErrFlags                          = 9,  
00038         iAgeNodesChildren                     = 10, 
00039         iBackUpNodes                          = 11, 
00040         iCreateChildrens                      = 12, 
00041         iInheritOverlapsOldChildren           = 13, 
00042         iInheritNeighborsChildren             = 14, 
00043         iInheritOverlapsNewChildren           = 15, 
00044         iInheritOldNodeOverlapsChildren       = 16, 
00045         iInheritNewNodeOverlapsChildren       = 17, 
00046         iScheduledAdvanceGrids                = 18, 
00047         iAdvanceGrids                         = 19, 
00048         iRestrictionFixups                    = 20, 
00049         iAMR                                  = 21, 
00050         iElliptic                             = 22, 
00051         iPrintAdvance                         = 23, 
00052         iApplyChildrenData                    = 24, 
00053         iCompleteAdvancedGrids                = 25, 
00054         iUpdateChildMasks                     = 26, 
00055         iSyncFluxes                           = 27, 
00056         iAccumulateFluxes                     = 28, 
00057         iNullifyNeighbors                     = 29, 
00058         iCoarsenDataForParents                = 30, 
00059         iClearParentProcs                     = 31, 
00060         iAfterFixups                          = 32, 
00061         iiRecvGridsFromParents                = 33, 
00062         iiSendGridsToChildren                 = 34, 
00063         iiRecvParentsData                     = 35,  
00064         iiSendChildrenData                    = 36,  
00065         iiRecvOverlapsNeighbors               = 37, 
00066         iiSendOverlapsNeighbors               = 38, 
00067         iiRecvOldNodeOverlaps                 = 39,  
00068         iiSendOverlapsToOldNodesChildren      = 40, 
00069         iiSendOverlapsToNodesOldChildren      = 42, 
00070         iiRecvOverlaps                        = 43, 
00071         iiSendOverlaps                        = 44, 
00072         iiRecvNeighboringChildren             = 45, 
00073         iiSendNeighboringChildren             = 46, 
00074         iiRecvOverlappingChildrenFromNewNodes = 47,  
00075         iiSendOverlappingChildrenToOldNodes   = 48, 
00076         iiRecvOverlappingChildrenFromOldNodes = 49, 
00077         iiSendOverlappingChildrenToNewNodes   = 50, 
00078         iiRecvChildrenData                    = 51,  
00079         iiSendParentsData                     = 52, 
00080         iiRecvFluxes                          = 53, 
00081         iiSendFluxes                          = 54, 
00082         iiRecvEllipticData                    = 55, 
00083         iiSendEllipticData                    = 56,         
00084         iWaitingAdvances                      = 57, 
00085         iBackUpData                           = 58, 
00086         iApplyEllipticBCs                     = 59, 
00087         iDistributeChildrens                  = 60, 
00088         iProcessData                          = 61, 
00089         iWriteData                            = 62, 
00090         iBarrier                              = 63, 
00091         iTestBadCFL                           = 64, 
00092         MaxTimers                             = 64
00093 
00094 
00095    INTEGER, PARAMETER :: CommLow = 33
00096    INTEGER, PARAMETER :: CommHigh = 55
00097    INTEGER, PARAMETER :: StageLow = 1
00098    INTEGER, PARAMETER :: StageHigh= 32
00099    TYPE TimerDef
00100       REAL(8) :: LastStarted(-2:MaxDepth)
00101       REAL(8) :: Accumulator(-2:MaxDepth)=0
00102       CHARACTER(LEN=40) :: description=''      
00103    END TYPE TimerDef
00104 
00105    TYPE(TimerDef) :: Timers(MaxTimers)
00106    TYPE(TimerDef), PUBLIC :: AdvanceTimer
00107    TYPE(TimerDef), PUBLIC :: AdvancePredictor
00108 
00109 CONTAINS
00110 
00111    SUBROUTINE StartTimer(index,n)
00112       INTEGER :: index, n
00113       IF (lTimingLog .AND. index /= iAMR) write(TIMER_LOG_HANDLE,*) MPI_Wtime()-InitTime, 100*(n+2)+index+MPI_ID
00114 !      write(*,'(A,I6,A,A,A,I3,A,E15.5)') 'Processor ', MPI_ID, ' Starting ', Timers(index)%description, ' on level ', n, ' at ', MPI_Wtime()-InitTime
00115       Timers(index)%LastStarted(n)=MPI_Wtime()
00116    END SUBROUTINE StartTimer
00117 
00118    SUBROUTINE StopTimer(index,n)
00119       INTEGER :: index, n
00120       IF (lTimingLog .AND. index /= iAMR) write(TIMER_LOG_HANDLE,*) MPI_Wtime()-InitTime, 100*(n+2)+index+MPI_ID
00121 !      write(*,'(A,I6,A,A,A,I3,A,E15.5)') 'Processor ', MPI_ID, ' Stopping ', Timers(index)%description, ' on level ', n, ' at ', MPI_Wtime()-InitTime
00122       Timers(index)%Accumulator(n)=Timers(index)%Accumulator(n)+MPi_Wtime()-Timers(index)%LastStarted(n)
00123    END SUBROUTINE StopTimer
00124 
00125    SUBROUTINE TimerInit()
00126       INTEGER :: i, iErr
00127       CHARACTER(LEN=14) :: FILENAME
00128       Timers(iInitInfos)%description                            = 'InitInfos'
00129       Timers(iProlongateParentsData)%description                = 'ProlongateParentsData'
00130       Timers(iChildMaskOverlaps)%description                    = 'ChildMaskOverlaps'
00131       Timers(iUpdateOverlaps)%description                       = 'UpdateOverlaps'
00132       Timers(iApplyOverlaps)%description                        = 'ApplyOverlaps'
00133       Timers(iAfterOverlaps)%description                        = 'AfterOverlaps'
00134       Timers(iParticleUpdate)%description                       = 'ParticleUpdate'
00135       Timers(iApplyPhysicalBCs)%description                     = 'ApplyPhysicalBCs'
00136       Timers(iApplyEllipticBCs)%description                     = 'ApplyEllipticBCs'
00137       Timers(iSetErrFlags)%description                          = 'SetErrFlags'
00138       Timers(iAgeNodesChildren)%description                     = 'AgeNodesChildren'
00139       Timers(iBackUpNodes)%description                          = 'BackUpNodes'
00140       Timers(iCreateChildrens)%description                      = 'CreateChildrens'
00141       Timers(iDistributeChildrens)%description                  = 'DistributeChildrens'
00142       Timers(iInheritOverlapsOldChildren)%description           = 'InheritOverlapsOldChildren'
00143       Timers(iInheritNeighborsChildren)%description             = 'InheritNeighborsChildren'
00144       Timers(iInheritOverlapsNewChildren)%description           = 'InheritOverlapsNewChildren'
00145       Timers(iInheritOldNodeOverlapsChildren)%description       = 'InheritOldNodeOverlapsChildren'
00146       Timers(iInheritNewNodeOverlapsChildren)%description       = 'InheritNewNodeOverlapsChildren'
00147       Timers(iScheduledAdvanceGrids)%description                = 'ScheduleAdvanceGrids'
00148       Timers(iAdvanceGrids)%description                         = 'AdvanceGrids'
00149       Timers(iCompleteAdvancedGrids)%description                = 'CompleteAdvancedGrids'
00150       Timers(iWaitingAdvances)%description                      = 'WaitingAdvances'
00151       Timers(iElliptic)%description                             = 'Elliptic'
00152       Timers(iPrintAdvance)%description                         = 'PrintAdvance'
00153       Timers(iApplyChildrenData)%description                    = 'ApplyChildrenData'
00154       Timers(iRestrictionFixups)%description                    = 'RestrictionFixups'
00155       Timers(iUpdateChildMasks)%description                     = 'UpdateChildMasks'
00156       Timers(iSyncFluxes)%description                           = 'SyncFluxes'
00157       Timers(iAccumulateFluxes)%description                     = 'AccumulateFluxes'
00158       Timers(iNullifyNeighbors)%description                     = 'NullifyNeighbors'
00159       Timers(iCoarsenDataForParents)%description                = 'CoarsenDataForParents'
00160       Timers(iClearParentProcs)%description                     = 'ClearParentProcs'
00161       Timers(iAfterFixups)%description                          = 'AfterFixups'
00162       Timers(iiRecvGridsFromParents)%description                = 'RecvGridsFromParents'
00163       Timers(iiSendGridsToChildren)%description                 = 'SendGridsToChildren'
00164       Timers(iiRecvParentsData)%description                     = 'RecvParentsData'
00165       Timers(iiSendChildrenData)%description                    = 'SendChildrenData' 
00166       Timers(iiRecvOverlapsNeighbors)%description               = 'RecvOverlapsNeighbors'
00167       Timers(iiSendOverlapsNeighbors)%description               = 'SendOverlapsNeighbors'
00168       Timers(iiRecvOldNodeOverlaps)%description                 = 'RecvOldNodeOverlaps' 
00169       Timers(iiSendOverlapsToOldNodesChildren)%description      = 'SendOverlapsToOldNodesChildren'
00170       Timers(iiSendOverlapsToNodesOldChildren)%description      = 'SendOverlapsToNodesOldChildren'
00171       Timers(iiRecvOverlaps)%description                        = 'RecvOverlaps'
00172       Timers(iiSendOverlaps)%description                        = 'SendOverlaps'
00173       Timers(iiRecvNeighboringChildren)%description             = 'RecvNeighboringChildren'
00174       Timers(iiSendNeighboringChildren)%description             = 'SendNeighboringChildren'
00175       Timers(iiRecvOverlappingChildrenFromNewNodes)%description = 'RecvOverlappingChildrenFromNewNodes' 
00176       Timers(iiSendOverlappingChildrenToOldNodes)%description   = 'SendOverlappingChildrenToOldNodes'
00177       Timers(iiRecvOverlappingChildrenFromOldNodes)%description = 'RecvOverlappingChildrenFromOldNodes'
00178       Timers(iiSendOverlappingChildrenToNewNodes)%description   = 'SendOverlappingChildrenToNewNodes'
00179       Timers(iiRecvChildrenData)%description                    = 'RecvChildrenData'
00180       Timers(iiSendParentsData)%description                     = 'SendParentsData'
00181       Timers(iiRecvFluxes)%description                          = 'RecvFluxes'
00182       Timers(iiSendFluxes)%description                          = 'SendFluxes'
00183       Timers(iiRecvEllipticData)%description                    = 'RecvEllipticData'
00184       Timers(iiSendEllipticData)%description                    = 'SendEllipticData'
00185       Timers(iAMR)%description                                  = 'AMR'
00186       Timers(iProcessData)%description                          = 'ProcessData'
00187       Timers(iWriteData)%description                            = 'WriteData'
00188       Timers(iBarrier)%description                              = 'Barrier'
00189       Timers(iTestBadCFL)%description                           = 'TestBadCFL'
00190       DO i=1,MaxTimers
00191          Timers(i)%Accumulator=0
00192       END DO
00193       IF (lTimingLog) THEN
00194          write(FILENAME,'(A6,I4.4,A4)') "timer_",mpi_id,".log"
00195          OPEN (UNIT=TIMER_LOG_HANDLE, file=FILENAME, status="unknown")
00196          write(TIMER_LOG_HANDLE, '(A,I4.4)') '# Timer_', MPI_ID
00197       END IF
00198       InitTime=MPI_WTime()
00199       CALL MPI_ALLREDUCE(MPI_IN_PLACE, InitTime, 1, MPI_DOUBLE_PRECISION, MPI_SUM, MPI_COMM_WORLD, iErr)
00200       InitTime=InitTime/MPI_NP
00201    END SUBROUTINE TimerInit
00202 
00203 
00204    SUBROUTINE WriteStats()
00205       INTEGER :: iErr, i
00206       REAL(8) :: TotalAccumulators(1:MaxTimers,-2:MaxDepth)
00207       REAL(8) :: TotalAmrTime
00208       REAL(8) :: TotalCellUPdates
00209       DO i=1, MaxTimers
00210          TotalAccumulators(i,:)=Timers(i)%Accumulator
00211       END DO
00212 
00213       CALL MPI_ALLREDUCE(MPI_IN_PLACE, TotalAccumulators, (MaxDepth+3)*MaxTimers, MPI_DOUBLE_PRECISION, MPI_SUM, MPI_COMM_WORLD, iErr)
00214       TotalAccumulators=TotalAccumulators/REAL(MPI_NP)
00215 
00216 
00217       !   CALL MPI_ALLREDUCE(MPI_IN_PLACE, NumCellUpdatesByLevel, MaxLevel+1, MPI_INTEGER, MPI_SUM, MPI_COMM_WORLD, iErr)
00218       !   CALL MPI_ALLREDUCE(MPI_IN_PLACE, EffectiveCellUpdatesByLevel, MaxLevel+1, MPI_INTEGER, MPI_SUM, MPI_COMM_WORLD, iErr)
00219 
00220       IF (MPI_ID == 0) THEN
00221          TotalAmrTime=TotalAccumulators(iAmr,Baselevel)
00222          DO i=BaseLevel,MaxLevel-1
00223             TotalAccumulators(iAMR,i) = TotalAccumulators(iAMR,i) - TotalAccumulators(iAMR,i+1)
00224          END DO
00225 
00226          write(*,*) ' ========== Relative Times overall =========== '
00227          DO i=1,MaxTimers
00228             write(*,'(A42, 20F12.2)') Timers(i)%description, TotalAccumulators(i,BaseLevel:MaxLevel) /TotalAmrTime*100d0
00229          END DO
00230 
00231 
00232          write(*,*) ' ========== Relative Times Across all levels =========== '
00233          DO i=1, MaxTimers
00234             write(*,'(A42, 20F12.2)') Timers(i)%description, SUM(TotalAccumulators(i,BaseLevel:MaxLevel))/TotalAmrTime*100d0
00235          END DO
00236 
00237 
00238          write(*,*) ' ========== Relative Times within Level =========== '
00239          DO i=1,MaxTimers
00240             write(*,'(A42, 20F12.2)') Timers(i)%description, TotalAccumulators(i,BaseLevel:MaxLevel)/TotalAccumulators(iAMR,BaseLevel:MaxLevel)*100d0
00241          END DO
00242 
00243          write(*,*) ' ========== Relative Times of each Level =========== '
00244          DO i=1,MaxLevel
00245             write(*,'(A38, I4, 20F12.2)') "Level ", i, TotalAccumulators(iAMR,i)/SUM(TotalAccumulators(iAMR,BaseLevel:MaxLevel))*100d0
00246          END DO
00247 
00248 
00249          !      write(*,*) ' ========== Cells by level ==============='
00250          !      DO i=0,MaxLevel
00251          !         write(*,'(I4,2I13)') i, NumCellUpdatesByLevel(i), EffectiveCellUpdatesByLevel(i)
00252          !      END DO
00253 
00254          !      write(*,*) ' ========== Filling Fraction By Level =========== '
00255          !      DO i=1,MaxLevel      
00256          !         write(*,'(I4,2F13.3)') i, REAL(NumCellUpdatesByLevel(i))/REAL(NumCellUpdatesByLevel(i-1)*2**(nDim+1)), REAL(EffectiveCellUpdatesByLevel(i))/REAL(NumCellUpdatesByLevel(i-1)*2**(nDim+1))
00257          !      END DO
00258 
00259          write(*,*) "Total AMR Time=", SUM(TotalAccumulators(iAMR,BaseLevel:MaxLevel))
00260          !      write(*,*) "Total WorkLoad=", SUM(NumCellUpdatesByLevel(:)), SUM(EffectiveCellUpdatesByLevel(:))
00261 
00262 
00263 
00264       END IF
00265 
00266       CALL MPI_ALLREDUCE(MPI_IN_PLACE, AdvancePredictor%Accumulator, MaxLevel+3, MPI_DOUBLE_PRECISION, MPI_SUM, MPI_COMM_WORLD, iErr)
00267 
00268       CALL MPI_ALLREDUCE(MPI_IN_PLACE, AdvanceTimer%Accumulator, MaxLevel+3, MPI_DOUBLE_PRECISION, MPI_SUM, MPI_COMM_WORLD, iErr)
00269       CALL MPI_ALLREDUCE(REAL(SUM(InternalCellUpdates), 8),TotalCellUpdates, 1, MPI_DOUBLE_PRECISION, MPI_SUM, MPI_COMM_WORLD, iErr)
00270 
00271       IF (MPI_ID == 0) THEN
00272          write(*,'(A,16E25.16)') "Predicted work load", AdvancePredictor%Accumulator(0:MaxLevel)
00273          write(*,'(A,16E25.16)') "Actual advance work", AdvanceTimer%Accumulator(0:MaxLevel)
00274 
00275          write(*,'(A6,9A13,2A8)') 'stats', 'AdvanceTimer', 'BarrierTimer', 'CPUTime', 'OtherTime', 'CellUpdates', 'iThreaded', 'LevelBalance', 'LevelBalance', 'MaxLevel', 'mx', 'MPI_NP'
00276          write(*,'(A6,4E13.4,I13,I13,2E13.4,I13,2I8)') 'stats', SUM(TotalAccumulators(iAdvanceGrids,:)), SUM(TotalAccumulators(iBarrier,:)), TotalAmrTime, TotalAmrTime-SUM(TotalAccumulators(iBarrier,:))-SUM(AdvanceTimer%Accumulator(0:MaxLevel))/MPI_NP, nint(TotalCellUpdates), iThreaded, LevelBalance, MaxLevel, Gmx(1), MPI_NP
00277 
00278       END IF
00279       IF (lTimingLog) THEN
00280          CLOSE(TIMER_LOG_HANDLE)
00281       END IF
00282 
00283    END SUBROUTINE WriteStats
00284 END MODULE Timing
00285 !   REAL(8), DIMENSION(4) :: EfficiencyStats
00286 !   DO i=1,MaxTimers
00287 !      write(*,'(
00288 !   PRINT *
00289 !   PRINT *, "PROC ", MPI_id, " COMMUNICATION STAGES"
00290 !!   DO n = 1, nStages
00291 !      PRINT "('Proc ', i2, ' stage ', i2, ' times = ', 9f9.3, ' seconds.')", MPI_id, n, stage_times(:,n)
00292 !   END DO
00293 
00294 !   PRINT *
00295 !   PRINT *, "PROC ", MPI_ID, " SERIAL STAGES"
00296 
00297 !   PRINT "('Proc ', i2, ' ClearParentProcs times               = ', 9f9.3, ' seconds.')", MPI_id, tClearParentProcs
00298 !   PRINT "('Proc ', i2, ' AfterFixups times                    = ', 9f9.3, ' seconds.')", MPI_id, tAfterFixups
00299 !   PRINT "('Proc ', i2, ' AdvanceEfficiency                    = ', 9f9.3, ' %.')", MPI_id, 100d0*(1d0-sum(tPrintAdvance)/sum(tCompleteAdvancedGrids+tWaitingAdvances))
00300 !   PRINT "('Proc ', i2, ' ExpectedEfficiency                   = ', 9f9.3, ' %.')", MPI_id, 100d0*(1d0-sum(TimeWastedByLevel)/sum(AccumulatedWorkDoneByLevel))
00301 !   EfficiencyStats=(/sum(TimeWastedByLevel),sum(AccumulatedWorkDoneByLevel),sum(tPrintAdvance),sum(tCompleteAdvancedGrids+tWaitingAdvances)/)
00302 !   CALL MPI_ALLREDUCE(MPI_IN_PLACE, EfficiencyStats, 4, MPI_DOUBLE_PRECISION, MPI_SUM, MPI_COMM_WORLD, iErr)
00303 !   PRINT "('Proc ', i2, ' ExpectedOverallEfficiency            = ', 9f9.3, ' %.')", MPI_id, 100d0*(1d0-EfficiencyStats(1)/EfficiencyStats(2))
00304 !   PRINT "('Proc ', i2, ' ActualOverallEfficiency              = ', 9f9.3, ' %.')", MPI_id, 100d0*(1d0-EfficiencyStats(3)/EfficiencyStats(4))
00305 ! Shut down the MPI job.
00306 
 All Classes Files Functions Variables