Scrambler
1
|
00001 !######################################################################### 00002 ! 00003 ! Copyright (C) 2003-2012 Department of Physics and Astronomy, 00004 ! University of Rochester, 00005 ! Rochester, NY 00006 ! 00007 ! timing.f90 is part of AstroBEAR. 00008 ! 00009 ! AstroBEAR is free software: you can redistribute it and/or modify 00010 ! it under the terms of the GNU General Public License as published by 00011 ! the Free Software Foundation, either version 3 of the License, or 00012 ! (at your option) any later version. 00013 ! 00014 ! AstroBEAR is distributed in the hope that it will be useful, 00015 ! but WITHOUT ANY WARRANTY; without even the implied warranty of 00016 ! MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 00017 ! GNU General Public License for more details. 00018 ! 00019 ! You should have received a copy of the GNU General Public License 00020 ! along with AstroBEAR. If not, see <http://www.gnu.org/licenses/>. 00021 ! 00022 !######################################################################### 00023 MODULE Timing 00024 USE GlobalDeclarations 00025 USE HyperbolicDeclarations 00026 IMPLICIT NONE 00027 SAVE 00028 INTEGER, PARAMETER :: 00029 iInitInfos = 1, 00030 iProlongateParentsData = 2, 00031 iChildMaskOverlaps = 3, 00032 iUpdateOverlaps = 4, 00033 iApplyOverlaps = 5, 00034 iAfterOverlaps = 6, 00035 iParticleUpdate = 7, 00036 iApplyPhysicalBCs = 8, 00037 iSetErrFlags = 9, 00038 iAgeNodesChildren = 10, 00039 iBackUpNodes = 11, 00040 iCreateChildrens = 12, 00041 iInheritOverlapsOldChildren = 13, 00042 iInheritNeighborsChildren = 14, 00043 iInheritOverlapsNewChildren = 15, 00044 iInheritOldNodeOverlapsChildren = 16, 00045 iInheritNewNodeOverlapsChildren = 17, 00046 iScheduledAdvanceGrids = 18, 00047 iAdvanceGrids = 19, 00048 iRestrictionFixups = 20, 00049 iAMR = 21, 00050 iElliptic = 22, 00051 iPrintAdvance = 23, 00052 iApplyChildrenData = 24, 00053 iCompleteAdvancedGrids = 25, 00054 iUpdateChildMasks = 26, 00055 iSyncFluxes = 27, 00056 iAccumulateFluxes = 28, 00057 iNullifyNeighbors = 29, 00058 iCoarsenDataForParents = 30, 00059 iClearParentProcs = 31, 00060 iAfterFixups = 32, 00061 iiRecvGridsFromParents = 33, 00062 iiSendGridsToChildren = 34, 00063 iiRecvParentsData = 35, 00064 iiSendChildrenData = 36, 00065 iiRecvOverlapsNeighbors = 37, 00066 iiSendOverlapsNeighbors = 38, 00067 iiRecvOldNodeOverlaps = 39, 00068 iiSendOverlapsToOldNodesChildren = 40, 00069 iiSendOverlapsToNodesOldChildren = 42, 00070 iiRecvOverlaps = 43, 00071 iiSendOverlaps = 44, 00072 iiRecvNeighboringChildren = 45, 00073 iiSendNeighboringChildren = 46, 00074 iiRecvOverlappingChildrenFromNewNodes = 47, 00075 iiSendOverlappingChildrenToOldNodes = 48, 00076 iiRecvOverlappingChildrenFromOldNodes = 49, 00077 iiSendOverlappingChildrenToNewNodes = 50, 00078 iiRecvChildrenData = 51, 00079 iiSendParentsData = 52, 00080 iiRecvFluxes = 53, 00081 iiSendFluxes = 54, 00082 iiRecvEllipticData = 55, 00083 iiSendEllipticData = 56, 00084 iWaitingAdvances = 57, 00085 iBackUpData = 58, 00086 iApplyEllipticBCs = 59, 00087 iDistributeChildrens = 60, 00088 iProcessData = 61, 00089 iWriteData = 62, 00090 iBarrier = 63, 00091 iTestBadCFL = 64, 00092 MaxTimers = 64 00093 00094 00095 INTEGER, PARAMETER :: CommLow = 33 00096 INTEGER, PARAMETER :: CommHigh = 55 00097 INTEGER, PARAMETER :: StageLow = 1 00098 INTEGER, PARAMETER :: StageHigh= 32 00099 TYPE TimerDef 00100 REAL(8) :: LastStarted(-2:MaxDepth) 00101 REAL(8) :: Accumulator(-2:MaxDepth)=0 00102 CHARACTER(LEN=40) :: description='' 00103 END TYPE TimerDef 00104 00105 TYPE(TimerDef) :: Timers(MaxTimers) 00106 TYPE(TimerDef), PUBLIC :: AdvanceTimer 00107 TYPE(TimerDef), PUBLIC :: AdvancePredictor 00108 00109 CONTAINS 00110 00111 SUBROUTINE StartTimer(index,n) 00112 INTEGER :: index, n 00113 IF (lTimingLog .AND. index /= iAMR) write(TIMER_LOG_HANDLE,*) MPI_Wtime()-InitTime, 100*(n+2)+index+MPI_ID 00114 ! write(*,'(A,I6,A,A,A,I3,A,E15.5)') 'Processor ', MPI_ID, ' Starting ', Timers(index)%description, ' on level ', n, ' at ', MPI_Wtime()-InitTime 00115 Timers(index)%LastStarted(n)=MPI_Wtime() 00116 END SUBROUTINE StartTimer 00117 00118 SUBROUTINE StopTimer(index,n) 00119 INTEGER :: index, n 00120 IF (lTimingLog .AND. index /= iAMR) write(TIMER_LOG_HANDLE,*) MPI_Wtime()-InitTime, 100*(n+2)+index+MPI_ID 00121 ! write(*,'(A,I6,A,A,A,I3,A,E15.5)') 'Processor ', MPI_ID, ' Stopping ', Timers(index)%description, ' on level ', n, ' at ', MPI_Wtime()-InitTime 00122 Timers(index)%Accumulator(n)=Timers(index)%Accumulator(n)+MPi_Wtime()-Timers(index)%LastStarted(n) 00123 END SUBROUTINE StopTimer 00124 00125 SUBROUTINE TimerInit() 00126 INTEGER :: i, iErr 00127 CHARACTER(LEN=14) :: FILENAME 00128 Timers(iInitInfos)%description = 'InitInfos' 00129 Timers(iProlongateParentsData)%description = 'ProlongateParentsData' 00130 Timers(iChildMaskOverlaps)%description = 'ChildMaskOverlaps' 00131 Timers(iUpdateOverlaps)%description = 'UpdateOverlaps' 00132 Timers(iApplyOverlaps)%description = 'ApplyOverlaps' 00133 Timers(iAfterOverlaps)%description = 'AfterOverlaps' 00134 Timers(iParticleUpdate)%description = 'ParticleUpdate' 00135 Timers(iApplyPhysicalBCs)%description = 'ApplyPhysicalBCs' 00136 Timers(iApplyEllipticBCs)%description = 'ApplyEllipticBCs' 00137 Timers(iSetErrFlags)%description = 'SetErrFlags' 00138 Timers(iAgeNodesChildren)%description = 'AgeNodesChildren' 00139 Timers(iBackUpNodes)%description = 'BackUpNodes' 00140 Timers(iCreateChildrens)%description = 'CreateChildrens' 00141 Timers(iDistributeChildrens)%description = 'DistributeChildrens' 00142 Timers(iInheritOverlapsOldChildren)%description = 'InheritOverlapsOldChildren' 00143 Timers(iInheritNeighborsChildren)%description = 'InheritNeighborsChildren' 00144 Timers(iInheritOverlapsNewChildren)%description = 'InheritOverlapsNewChildren' 00145 Timers(iInheritOldNodeOverlapsChildren)%description = 'InheritOldNodeOverlapsChildren' 00146 Timers(iInheritNewNodeOverlapsChildren)%description = 'InheritNewNodeOverlapsChildren' 00147 Timers(iScheduledAdvanceGrids)%description = 'ScheduleAdvanceGrids' 00148 Timers(iAdvanceGrids)%description = 'AdvanceGrids' 00149 Timers(iCompleteAdvancedGrids)%description = 'CompleteAdvancedGrids' 00150 Timers(iWaitingAdvances)%description = 'WaitingAdvances' 00151 Timers(iElliptic)%description = 'Elliptic' 00152 Timers(iPrintAdvance)%description = 'PrintAdvance' 00153 Timers(iApplyChildrenData)%description = 'ApplyChildrenData' 00154 Timers(iRestrictionFixups)%description = 'RestrictionFixups' 00155 Timers(iUpdateChildMasks)%description = 'UpdateChildMasks' 00156 Timers(iSyncFluxes)%description = 'SyncFluxes' 00157 Timers(iAccumulateFluxes)%description = 'AccumulateFluxes' 00158 Timers(iNullifyNeighbors)%description = 'NullifyNeighbors' 00159 Timers(iCoarsenDataForParents)%description = 'CoarsenDataForParents' 00160 Timers(iClearParentProcs)%description = 'ClearParentProcs' 00161 Timers(iAfterFixups)%description = 'AfterFixups' 00162 Timers(iiRecvGridsFromParents)%description = 'RecvGridsFromParents' 00163 Timers(iiSendGridsToChildren)%description = 'SendGridsToChildren' 00164 Timers(iiRecvParentsData)%description = 'RecvParentsData' 00165 Timers(iiSendChildrenData)%description = 'SendChildrenData' 00166 Timers(iiRecvOverlapsNeighbors)%description = 'RecvOverlapsNeighbors' 00167 Timers(iiSendOverlapsNeighbors)%description = 'SendOverlapsNeighbors' 00168 Timers(iiRecvOldNodeOverlaps)%description = 'RecvOldNodeOverlaps' 00169 Timers(iiSendOverlapsToOldNodesChildren)%description = 'SendOverlapsToOldNodesChildren' 00170 Timers(iiSendOverlapsToNodesOldChildren)%description = 'SendOverlapsToNodesOldChildren' 00171 Timers(iiRecvOverlaps)%description = 'RecvOverlaps' 00172 Timers(iiSendOverlaps)%description = 'SendOverlaps' 00173 Timers(iiRecvNeighboringChildren)%description = 'RecvNeighboringChildren' 00174 Timers(iiSendNeighboringChildren)%description = 'SendNeighboringChildren' 00175 Timers(iiRecvOverlappingChildrenFromNewNodes)%description = 'RecvOverlappingChildrenFromNewNodes' 00176 Timers(iiSendOverlappingChildrenToOldNodes)%description = 'SendOverlappingChildrenToOldNodes' 00177 Timers(iiRecvOverlappingChildrenFromOldNodes)%description = 'RecvOverlappingChildrenFromOldNodes' 00178 Timers(iiSendOverlappingChildrenToNewNodes)%description = 'SendOverlappingChildrenToNewNodes' 00179 Timers(iiRecvChildrenData)%description = 'RecvChildrenData' 00180 Timers(iiSendParentsData)%description = 'SendParentsData' 00181 Timers(iiRecvFluxes)%description = 'RecvFluxes' 00182 Timers(iiSendFluxes)%description = 'SendFluxes' 00183 Timers(iiRecvEllipticData)%description = 'RecvEllipticData' 00184 Timers(iiSendEllipticData)%description = 'SendEllipticData' 00185 Timers(iAMR)%description = 'AMR' 00186 Timers(iProcessData)%description = 'ProcessData' 00187 Timers(iWriteData)%description = 'WriteData' 00188 Timers(iBarrier)%description = 'Barrier' 00189 Timers(iTestBadCFL)%description = 'TestBadCFL' 00190 DO i=1,MaxTimers 00191 Timers(i)%Accumulator=0 00192 END DO 00193 IF (lTimingLog) THEN 00194 write(FILENAME,'(A6,I4.4,A4)') "timer_",mpi_id,".log" 00195 OPEN (UNIT=TIMER_LOG_HANDLE, file=FILENAME, status="unknown") 00196 write(TIMER_LOG_HANDLE, '(A,I4.4)') '# Timer_', MPI_ID 00197 END IF 00198 InitTime=MPI_WTime() 00199 CALL MPI_ALLREDUCE(MPI_IN_PLACE, InitTime, 1, MPI_DOUBLE_PRECISION, MPI_SUM, MPI_COMM_WORLD, iErr) 00200 InitTime=InitTime/MPI_NP 00201 END SUBROUTINE TimerInit 00202 00203 00204 SUBROUTINE WriteStats() 00205 INTEGER :: iErr, i 00206 REAL(8) :: TotalAccumulators(1:MaxTimers,-2:MaxDepth) 00207 REAL(8) :: TotalAmrTime 00208 REAL(8) :: TotalCellUPdates 00209 DO i=1, MaxTimers 00210 TotalAccumulators(i,:)=Timers(i)%Accumulator 00211 END DO 00212 00213 CALL MPI_ALLREDUCE(MPI_IN_PLACE, TotalAccumulators, (MaxDepth+3)*MaxTimers, MPI_DOUBLE_PRECISION, MPI_SUM, MPI_COMM_WORLD, iErr) 00214 TotalAccumulators=TotalAccumulators/REAL(MPI_NP) 00215 00216 00217 ! CALL MPI_ALLREDUCE(MPI_IN_PLACE, NumCellUpdatesByLevel, MaxLevel+1, MPI_INTEGER, MPI_SUM, MPI_COMM_WORLD, iErr) 00218 ! CALL MPI_ALLREDUCE(MPI_IN_PLACE, EffectiveCellUpdatesByLevel, MaxLevel+1, MPI_INTEGER, MPI_SUM, MPI_COMM_WORLD, iErr) 00219 00220 IF (MPI_ID == 0) THEN 00221 TotalAmrTime=TotalAccumulators(iAmr,Baselevel) 00222 DO i=BaseLevel,MaxLevel-1 00223 TotalAccumulators(iAMR,i) = TotalAccumulators(iAMR,i) - TotalAccumulators(iAMR,i+1) 00224 END DO 00225 00226 write(*,*) ' ========== Relative Times overall =========== ' 00227 DO i=1,MaxTimers 00228 write(*,'(A42, 20F12.2)') Timers(i)%description, TotalAccumulators(i,BaseLevel:MaxLevel) /TotalAmrTime*100d0 00229 END DO 00230 00231 00232 write(*,*) ' ========== Relative Times Across all levels =========== ' 00233 DO i=1, MaxTimers 00234 write(*,'(A42, 20F12.2)') Timers(i)%description, SUM(TotalAccumulators(i,BaseLevel:MaxLevel))/TotalAmrTime*100d0 00235 END DO 00236 00237 00238 write(*,*) ' ========== Relative Times within Level =========== ' 00239 DO i=1,MaxTimers 00240 write(*,'(A42, 20F12.2)') Timers(i)%description, TotalAccumulators(i,BaseLevel:MaxLevel)/TotalAccumulators(iAMR,BaseLevel:MaxLevel)*100d0 00241 END DO 00242 00243 write(*,*) ' ========== Relative Times of each Level =========== ' 00244 DO i=1,MaxLevel 00245 write(*,'(A38, I4, 20F12.2)') "Level ", i, TotalAccumulators(iAMR,i)/SUM(TotalAccumulators(iAMR,BaseLevel:MaxLevel))*100d0 00246 END DO 00247 00248 00249 ! write(*,*) ' ========== Cells by level ===============' 00250 ! DO i=0,MaxLevel 00251 ! write(*,'(I4,2I13)') i, NumCellUpdatesByLevel(i), EffectiveCellUpdatesByLevel(i) 00252 ! END DO 00253 00254 ! write(*,*) ' ========== Filling Fraction By Level =========== ' 00255 ! DO i=1,MaxLevel 00256 ! write(*,'(I4,2F13.3)') i, REAL(NumCellUpdatesByLevel(i))/REAL(NumCellUpdatesByLevel(i-1)*2**(nDim+1)), REAL(EffectiveCellUpdatesByLevel(i))/REAL(NumCellUpdatesByLevel(i-1)*2**(nDim+1)) 00257 ! END DO 00258 00259 write(*,*) "Total AMR Time=", SUM(TotalAccumulators(iAMR,BaseLevel:MaxLevel)) 00260 ! write(*,*) "Total WorkLoad=", SUM(NumCellUpdatesByLevel(:)), SUM(EffectiveCellUpdatesByLevel(:)) 00261 00262 00263 00264 END IF 00265 00266 CALL MPI_ALLREDUCE(MPI_IN_PLACE, AdvancePredictor%Accumulator, MaxLevel+3, MPI_DOUBLE_PRECISION, MPI_SUM, MPI_COMM_WORLD, iErr) 00267 00268 CALL MPI_ALLREDUCE(MPI_IN_PLACE, AdvanceTimer%Accumulator, MaxLevel+3, MPI_DOUBLE_PRECISION, MPI_SUM, MPI_COMM_WORLD, iErr) 00269 CALL MPI_ALLREDUCE(REAL(SUM(InternalCellUpdates), 8),TotalCellUpdates, 1, MPI_DOUBLE_PRECISION, MPI_SUM, MPI_COMM_WORLD, iErr) 00270 00271 IF (MPI_ID == 0) THEN 00272 write(*,'(A,16E25.16)') "Predicted work load", AdvancePredictor%Accumulator(0:MaxLevel) 00273 write(*,'(A,16E25.16)') "Actual advance work", AdvanceTimer%Accumulator(0:MaxLevel) 00274 00275 write(*,'(A6,9A13,2A8)') 'stats', 'AdvanceTimer', 'BarrierTimer', 'CPUTime', 'OtherTime', 'CellUpdates', 'iThreaded', 'LevelBalance', 'LevelBalance', 'MaxLevel', 'mx', 'MPI_NP' 00276 write(*,'(A6,4E13.4,I13,I13,2E13.4,I13,2I8)') 'stats', SUM(TotalAccumulators(iAdvanceGrids,:)), SUM(TotalAccumulators(iBarrier,:)), TotalAmrTime, TotalAmrTime-SUM(TotalAccumulators(iBarrier,:))-SUM(AdvanceTimer%Accumulator(0:MaxLevel))/MPI_NP, nint(TotalCellUpdates), iThreaded, LevelBalance, MaxLevel, Gmx(1), MPI_NP 00277 00278 END IF 00279 IF (lTimingLog) THEN 00280 CLOSE(TIMER_LOG_HANDLE) 00281 END IF 00282 00283 END SUBROUTINE WriteStats 00284 END MODULE Timing 00285 ! REAL(8), DIMENSION(4) :: EfficiencyStats 00286 ! DO i=1,MaxTimers 00287 ! write(*,'( 00288 ! PRINT * 00289 ! PRINT *, "PROC ", MPI_id, " COMMUNICATION STAGES" 00290 !! DO n = 1, nStages 00291 ! PRINT "('Proc ', i2, ' stage ', i2, ' times = ', 9f9.3, ' seconds.')", MPI_id, n, stage_times(:,n) 00292 ! END DO 00293 00294 ! PRINT * 00295 ! PRINT *, "PROC ", MPI_ID, " SERIAL STAGES" 00296 00297 ! PRINT "('Proc ', i2, ' ClearParentProcs times = ', 9f9.3, ' seconds.')", MPI_id, tClearParentProcs 00298 ! PRINT "('Proc ', i2, ' AfterFixups times = ', 9f9.3, ' seconds.')", MPI_id, tAfterFixups 00299 ! PRINT "('Proc ', i2, ' AdvanceEfficiency = ', 9f9.3, ' %.')", MPI_id, 100d0*(1d0-sum(tPrintAdvance)/sum(tCompleteAdvancedGrids+tWaitingAdvances)) 00300 ! PRINT "('Proc ', i2, ' ExpectedEfficiency = ', 9f9.3, ' %.')", MPI_id, 100d0*(1d0-sum(TimeWastedByLevel)/sum(AccumulatedWorkDoneByLevel)) 00301 ! EfficiencyStats=(/sum(TimeWastedByLevel),sum(AccumulatedWorkDoneByLevel),sum(tPrintAdvance),sum(tCompleteAdvancedGrids+tWaitingAdvances)/) 00302 ! CALL MPI_ALLREDUCE(MPI_IN_PLACE, EfficiencyStats, 4, MPI_DOUBLE_PRECISION, MPI_SUM, MPI_COMM_WORLD, iErr) 00303 ! PRINT "('Proc ', i2, ' ExpectedOverallEfficiency = ', 9f9.3, ' %.')", MPI_id, 100d0*(1d0-EfficiencyStats(1)/EfficiencyStats(2)) 00304 ! PRINT "('Proc ', i2, ' ActualOverallEfficiency = ', 9f9.3, ' %.')", MPI_id, 100d0*(1d0-EfficiencyStats(3)/EfficiencyStats(4)) 00305 ! Shut down the MPI job. 00306