Scrambler
1
|
00001 !######################################################################### 00002 ! 00003 ! Copyright (C) 2003-2012 Department of Physics and Astronomy, 00004 ! University of Rochester, 00005 ! Rochester, NY 00006 ! 00007 ! distribution_control.f90 is part of AstroBEAR. 00008 ! 00009 ! AstroBEAR is free software: you can redistribute it and/or modify 00010 ! it under the terms of the GNU General Public License as published by 00011 ! the Free Software Foundation, either version 3 of the License, or 00012 ! (at your option) any later version. 00013 ! 00014 ! AstroBEAR is distributed in the hope that it will be useful, 00015 ! but WITHOUT ANY WARRANTY; without even the implied warranty of 00016 ! MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 00017 ! GNU General Public License for more details. 00018 ! 00019 ! You should have received a copy of the GNU General Public License 00020 ! along with AstroBEAR. If not, see <http://www.gnu.org/licenses/>. 00021 ! 00022 !######################################################################### 00023 MODULE DistributionControl 00024 USE GlobalDeclarations 00025 USE DataDeclarations 00026 USE TreeDeclarations 00027 USE DistributionDeclarations 00028 USE Scheduling 00029 USE DataInfoOps 00030 USE Timing 00031 IMPLICIT NONE 00032 00033 00034 REAL(KIND=qPREC), PARAMETER :: Safety_Fact=.9d0 00035 REAL(KIND=qPREC) :: TotalNewWorkLoad, CoarserWorkLoad 00036 REAL(KIND=qPREC) :: MeanWorkLoadByLevel(-1:MaxDepth)=0 00037 REAL(KIND=qPREC) :: SplitFactor(-2:MaxDepth)=1d0 00038 LOGICAL :: lDistVerbose=.false. 00039 00040 CONTAINS 00041 00042 00043 SUBROUTINE DistributionInit 00044 INTEGER :: temp, n 00045 ! ALLOCATE(MyRemainingWorkLoad(-2:MaxLevel)) 00046 ALLOCATE(ParentProcs(-1:MaxLevel)) 00047 ALLOCATE(ChildProcs(-2:MaxLevel-1)) 00048 ! MyRemainingWorkLoad=0 00049 levels(-2)%MPI_COMM=MPI_COMM_WORLD 00050 DO n = -1, MaxLevel 00051 NULLIFY(ParentProcs(n)%p) 00052 NULLIFY(ChildProcs(n-1)%p) 00053 END DO 00054 IF (MPI_ID /= 0) THEN 00055 ALLOCATE(ParentProcs(-1)%p(1)) 00056 ParentProcs(-1)%p=0 00057 END IF 00058 temp=1 00059 DO Rootn=1,1000 00060 temp=temp*2 00061 if (temp >= maxval(levels(0)%mX)) exit 00062 END DO 00063 di=REAL(2**Rootn)/REAL(levels(0)%mX(1)) 00064 dj=REAL(2**Rootn)/REAL(levels(0)%mX(2)) 00065 dk=REAL(2**Rootn)/REAL(levels(0)%mX(3)) 00066 00067 END SUBROUTINE DistributionInit 00068 00070 SUBROUTINE GetLevelLoad(n) 00071 INTEGER :: n,i 00072 TYPE(NodeDefList), POINTER :: nodelist 00073 TYPE(NodeDef), POINTER :: node 00074 WorkLoadByLevelPerStep(n,:)=0 00075 nodelist=>Nodes(n)%p 00076 IF (n < 0) RETURN 00077 00078 DO WHILE (associated(nodelist)) 00079 node=>nodelist%self 00080 DO i=1, levels(n)%steps 00081 node%info%CostPerGrid(i) = GetMyCosts(node%info,i) 00082 END DO 00083 WorkLoadByLevelPerStep(n,1:levels(n)%steps)=WorkLoadByLevelPerStep(n,1:levels(n)%steps)+node%info%costperGrid(1:levels(n)%steps) 00084 nodelist=>nodelist%next 00085 END DO 00086 MeanWorkLoadByLevelPerStep(n)=sum(WorkLoadByLevelPerStep(n,1:levels(n)%steps))/real(levels(n)%steps) 00087 00088 END SUBROUTINE GetLevelLoad 00089 00090 SUBROUTINE ClearCompletedWorkLoads(n) 00091 INTEGER :: n 00092 WorkDoneByLevel(n)=0d0 00093 END SUBROUTINE ClearCompletedWorkLoads 00094 00095 00096 SUBROUTINE SortNodes(n) 00097 INTEGER :: n, nNodes, i 00098 INTEGER, DIMENSION(:,:,:), POINTER :: nodegrids 00099 TYPE(NodeDefList), POINTER :: SortedNodes, LastSortedNode, temp, nodelist 00100 nNodes=NodeCount(Nodes(n)%p) 00101 IF (nNodes <= 1) RETURN 00102 NULLIFY(SortedNodes, LastSortedNode) 00103 ALLOCATE(nodegrids(3,2,nNodes)) 00104 nodelist=>Nodes(n)%p 00105 i=0 00106 DO WHILE (associated(nodelist)) 00107 i=i+1 00108 nodegrids(:,:,i)=Nodelist%self%box%mGlobal 00109 nodelist=>nodelist%next 00110 END DO 00111 CALL HilbertSort(nodegrids, n) 00112 nodelist=>Nodes(n)%p 00113 i=1 00114 00115 DO WHILE (i <= nNodes) 00116 IF (ALL(nodegrids(:,:,i) == nodelist%self%box%mGlobal)) THEN 00117 CALL AddNodeToList(nodelist%self, LastSortedNode, SortedNodes) 00118 i=i+1 00119 END IF 00120 nodelist=>nodelist%next 00121 IF (.NOT. (ASSOCIATED(nodelist))) nodelist=>Nodes(n)%p 00122 END DO 00123 temp=>Nodes(n)%p 00124 Nodes(n)%p=>SortedNodes 00125 LastLocalNode(n)%p=>LastSortedNode 00126 CALL ClearNodeList(temp) 00127 00128 DEALLOCATE(nodegrids) 00129 END SUBROUTINE SortNodes 00130 00131 00134 SUBROUTINE DistributeChildrens(n, lsplitOpt) 00135 INTEGER :: n 00136 TYPE(NodeDefList), POINTER :: nodelist, children, childlist 00137 TYPE(NodeDef), POINTER :: node, child, newchild 00138 INTEGER :: cp, pp, i, iErr, j, l, nChildren, m, test,k, childsteps_remaining, nextproc 00139 INTEGER, DIMENSION(:), ALLOCATABLE :: parentproc, childproc 00140 REAL(KIND=qPREC), DIMENSION(:), ALLOCATABLE :: childtimes, ChildLoad, newload, maxchildtimes 00141 REAL(KIND=qPREC), DIMENSION(:,:), ALLOCATABLE :: workloadsbyprocessor, parentoverlaps, childoverlaps 00142 REAL(KIND=qPREC) :: MyWorkLoads(3), TotalWOrkLoad, MeanWorkLoad, LocalCost, childoverlap, parentoverlap, ChildCostRemaining, temp, TotalLocalCost 00143 LOGICAL, OPTIONAL :: lsplitOpt 00144 LOGICAL :: lsplit 00145 REAL(KIND=qPREC), DIMENSION(:), POINTER :: splitweight, maxsplitweight 00146 INTEGER, DIMENSION(:), POINTER :: splitproc 00147 INTEGER, DIMENSION(:,:,:), POINTER :: newgrids, childgrids, oldgrids 00148 TYPE(NodeBox), POINTER :: child_box 00149 INTEGER, DIMENSION(:), ALLOCATABLE :: req 00150 INTEGER, DIMENSION(:), ALLOCATABLE :: steps_remaining 00151 INTEGER, DIMENSION(MPI_STATUS_SIZE) :: status 00152 REAL(KIND=qPREC) :: TotalExcessPerProc, DesiredExcessPerProc, SplitExcessPerProc, alpha 00153 CALL StartTimer(iDistributeChildrens, n) 00154 00155 00156 !First get local cost of new children 00157 00158 nodelist=>Nodes(n)%p 00159 localcost=0 00160 00161 DO WHILE (associated(nodelist)) 00162 node=>nodelist%self 00163 children=>node%children 00164 DO WHILE (ASSOCIATED(children)) 00165 LocalCost=LocalCost+max(GetChildCosts(children%self%box%mGlobal,n+1), 1e-20) !product(children%self%box%mGlobal(:,2)-children%self%box%mGlobal(:,1)+1)*levels(n+1)%steps 00166 children=>children%next 00167 END DO 00168 nodelist=>nodelist%next 00169 END DO 00170 00171 00172 !Take care of single processor stuff before returning 00173 IF (MPI_NP == 1 .AND. n <= FinestLevel) THEN 00174 IF (LocalCost > 0) THEN 00175 FinestLevel=n+1 00176 ELSE 00177 FinestLevel=min(n,FinestLevel) 00178 END IF 00179 RestartStep=lRequestRestart 00180 CALL StopTimer(iDistributeChildrens, n) 00181 RETURN 00182 END IF 00183 00184 00185 !Determine whether or not we are splitting 00186 IF (Present(lsplitopt)) THEN !On restarts we don't split grids to better distribute until the data has been read in 00187 lsplit=lsplitOpt 00188 ELSE IF (n == -2) THEN !Never split level -2 00189 lsplit=.false. 00190 ELSE 00191 lsplit=.true. !Default behavior is to split - based on min and max values 00192 END IF 00193 00194 00195 !Allocate space for 3 quantities for each processor. (Coarser Level Work Remaining, New Child Loads, RestartFlag) 00196 ALLOCATE(WorkLoadsByProcessor(3,MPI_NP)) 00197 00198 !Calculate Coarser level work remaining per level n step 00199 IF (iThreaded == NON_THREADED .OR. n < -1) THEN ! .OR. n < 0) THEN 00200 MyWorkLoads(1) = 0d0 !Shouldn't have any coarser work left 00201 ELSE 00202 ! Calculate steps remaining on each level 00203 ALLOCATE(steps_remaining(-1:MaxLevel)) 00204 steps_remaining=0 00205 DO i=-1, MaxLevel 00206 steps_remaining(i) = sum((levels(-1:i)%steps-levels(-1:i)%step) * 2**(/((i-j),j=-1,i)/))+1 00207 ! write(*,*) 'levels%steps', levels(-1:MaxLevel)%step 00208 ! IF (MPI_ID == 0) write(*,*) i, steps_remaining(i), (levels(0:i)%steps-levels(0:i)%step), 2**(/((i-j),j=0,i)/) 00209 END DO 00210 00211 ! Determine projected coarser work that needs to be done per level n step 00212 MyWorkLoads(1) = SUM(MeanWorkLoadByLevelPerStep(-1:n)*steps_remaining(-1:n)-WorkDoneByLevel(-1:n)) / steps_remaining(n) 00213 00214 00215 IF (lDistVerbose) THEN 00216 DO i=0, MPI_NP 00217 IF (MPI_ID == i) THEN 00218 write(*,*) '**********************************************************' 00219 write(*,'(A,I4,A,E13.5,A,E13.5)') 'Processor ', MPI_ID, ' has a coarser work load of ', MyWorkLoads(1), ' and a local child cost of ', LocalCost 00220 write(*,'(A50, 10E13.5)') 'WorkLoadByLevelPerStep', MeanWorkLoadByLevelPerStep 00221 write(*,'(A50, 10I4)') 'Steps Remaining', steps_remaining(-1:MaxLevel) 00222 write(*,'(A50, 10E13.5)') 'Work Done By Level', WorkDoneByLevel(-1:n) 00223 write(*,'(A50, 10E13.5)') 'Remaining projected work load by level', MeanWorkLoadByLevelPerStep(-1:n)*steps_remaining(-1:n) - WorkDoneByLevel(-1:n) 00224 write(*,'(A50, 10E13.5)') 'Remaining projected work load to balance this step', (MeanWorkLoadByLevelPerStep(-1:n)*steps_remaining(-1:n) - WorkDoneByLevel(-1:n))/steps_remaining(n) 00225 00226 END IF 00227 CALL MPI_BARRIER(MPI_COMM_WORLD, iERr) 00228 END DO 00229 END IF 00230 00231 END IF 00232 MyWorkLoads(2) = LocalCost 00233 MyWorkLoads(3)=merge(1d0,0d0,lRequestRestart) 00234 CALL StartTimer(iBarrier, n) 00235 CALL MPI_ALLGather(MyWorkLoads, 3, MPI_DOUBLE_PRECISION, WorkLoadsByProcessor, 3, MPI_DOUBLE_PRECISION, levels(n)%MPI_COMM, iErr) 00236 CALL StopTimer(iBarrier, n) 00237 00238 ! Adjust Finest level depending on whether or not any processor created children 00239 FinestLevel=merge(n+1, min(n, FinestLevel), SUM(WorkLoadsByProcessor(2,:)) > 0 .AND. n <= FinestLevel) 00240 00241 ! Determine if any processor has requested restarts 00242 RestartStep=ANY(WorkLoadsByProcessor(3,:) == 1d0) 00243 00244 !Calculate total new work load, total coarser work load, and update mean workloadbylevel for child level 00245 TotalNewWorkLoad=SUM(WorkLoadsByProcessor(2,:)) 00246 CoarserWorkLoad=SUM(WorkLoadsByProcessor(1,:)) 00247 00248 MeanWorkLoadByLevel(n+1)=TotalNewWorkLoad/REAL(MPI_NP)*splitfactor(n) !not very accurate as it does not account for splitting 00249 00250 00251 !now we have \eta_l^p and c_l^p and we want to balance \eta_l^p+childsteps_remaining*c_l^p 00252 00253 !Calculate total new projected remaining workload for child level and all coarser levels and the average 00254 TotalWorkLoad=CoarserWorkLoad+TotalNewWorkLoad 00255 MeanWorkLoad=TotalWorkLoad/REAL(MPI_NP, 8) 00256 00257 IF (iThreaded == NON_THREADED .OR. n < -1) THEN !Don't use finer level excesses to adjust splitting since approach is non threaded 00258 TotalExcessPerProc=0d0 00259 ELSE 00260 TotalExcessPerProc=sum(MeanWorkLoadByLevel(n+2:MaxLevel)*steps_remaining(n+1:MaxLevel-1))/steps_remaining(n) !How much finer work can we assume will be available to balance 00261 END IF 00262 00263 00264 DesiredExcessPerProc=LevelBalance(1)*TotalExcessPerProc !Adjust split points based on degree of global breadth first approach 00265 SplitExcessPerProc=(LevelBalance(2)-LevelBalance(1))*TotalExcessPerProc ! Determine excess per processor allowed above the split point 00266 00267 ! Calculate partition points for new load 00268 ALLOCATE(ChildLoad(0:MPI_NP), NewLoad(0:MPI_NP)) 00269 00270 00271 00272 IF (LevelBalance(2) == 0d0 .OR. iThreaded == NON_THREADED .OR. n < -1) THEN 00273 ChildLoad(0)=0 00274 NewLoad(0) = 0 00275 DO i=1, MPI_NP 00276 NewLoad(i)=NewLoad(i-1)+MeanWorkLoad-WorkLoadsByProcessor(1,i) !Split points for omega^{n+1} 00277 ChildLoad(i)=ChildLoad(i-1)+WorkLoadsByProcessor(2,i) !Split points for c_p 00278 END DO 00279 ELSE 00280 ChildLoad(0)=0 00281 NewLoad(0) = 0 00282 temp=0 00283 DO i=1, MPI_NP 00284 NewLoad(i)=NewLoad(i-1)+max(MeanWorkLoad-WorkLoadsByProcessor(1,i)+DesiredExcessPerProc,0d0) !Split points for omega^{n+1} 00285 temp=temp+MeanWorkLoad-WorkLoadsByProcessor(1,i)+DesiredExcessPerProc !Split points for omega^{n+1} 00286 ChildLoad(i)=ChildLoad(i-1)+WorkLoadsByProcessor(2,i) !Split points for c_p 00287 END DO 00288 NewLoad=NewLoad*temp/NewLoad(MPI_NP) 00289 00290 IF (lDistVerbose .AND. MPI_ID == 0) THEN 00291 write(*,*) 'loads for level', n 00292 write(*,'(A,10E15.6)') 'Coarser Work Loads = ', WorkLoadsByProcessor(1,:) 00293 write(*,'(A,10E15.6)') 'Predicted Finer Work Loads = ', sum(MeanWorkLoadByLevel(n+2:MaxLevel)*steps_remaining(n+1:MaxLevel-1))/steps_remaining(n) 00294 write(*,'(A,10E15.6)') 'New Work Loads = ', WorkLoadsByProcessor(2,:) 00295 write(*,'(A,10E15.6)') 'New Load Partitions = ', NewLoad(:) 00296 write(*,'(A,10E15.6)') 'Current Load Partitions = ', ChildLoad(:) 00297 END IF 00298 END IF 00299 00300 00301 ALLOCATE(parentproc(MPI_NP), childproc(MPI_NP), childtimes(MPI_NP), maxchildtimes(MPI_NP)) 00302 00303 childproc=-1 00304 childtimes=0d0 00305 maxchildtimes=0d0 00306 00307 !Do partioning and identify child & parent processors 00308 cp = 0 00309 pp = 0 00310 DO i=1, MPI_NP 00311 parentoverlap=min(NewLoad(MPI_ID+1), ChildLoad(i))-max(NewLoad(MPI_ID), ChildLoad(i-1)) 00312 childoverlap=min(ChildLoad(MPI_ID+1), NewLoad(i))-max(ChildLoad(MPI_ID), NewLoad(i-1)) 00313 00314 IF (childoverlap > 0) THEN 00315 cp=cp+1 00316 childproc(cp)=i-1 00317 childtimes(cp)=childoverlap 00318 maxchildtimes(cp)=childoverlap*(1d0+SplitExcessPerProc/(NewLoad(i)-NewLoad(i-1))) !Distribute excess per proc in proportion to the relative work load desired to be assigned to that child proc 00319 END IF 00320 IF (parentoverlap > 0) THEN 00321 pp=pp+1 00322 parentproc(pp)=i-1 00323 END IF 00324 END DO 00325 00326 00327 00328 IF (lDistVerbose) THEN 00329 DO i=0, MPI_NP 00330 IF (MPI_ID == i) THEN 00331 write(*,*) 'parent procs for processor ', MPI_ID, ' = ', parentproc(1:pp) 00332 write(*,*) 'child procs and times for processor ', MPI_ID, ' = ', childproc(1:cp), childtimes(1:cp) 00333 END IF 00334 CALL MPI_BARRIER(MPI_COMM_WORLD, iErr) 00335 END DO 00336 00337 IF (MPI_ID == 0) THEN 00338 write(*,'(A,8E13.2)') 'New Load =', NewLoad 00339 write(*,'(A,8E13.2)') 'Current Load =', ChildLoad 00340 ALLOCATE(parentoverlaps(MPI_NP, MPI_NP), childoverlaps(MPI_NP, MPI_NP)) 00341 DO i=1, MPI_NP 00342 DO j=1, MPI_NP 00343 parentoverlaps(i,j)=min(NewLoad(j), ChildLoad(i))-max(NewLoad(j-1), ChildLoad(i-1)) 00344 childoverlaps(i,j)=min(ChildLoad(j), NewLoad(i))-max(ChildLoad(j-1), NewLoad(i-1)) 00345 END DO 00346 END DO 00347 ! write(*,*) 'parentoverlaps' 00348 ! write(*,'(2E13.2)') parentoverlaps 00349 ! write(*,*) 'childoverlaps' 00350 ! write(*,'(2E13.2)') childoverlaps 00351 DEALLOCATE(parentoverlaps, childoverlaps) 00352 END IF 00353 END IF 00354 00355 !Rebuild ParentProcs and ChildProcs lists 00356 IF (ASSOCIATED(ParentProcs(n+1)%p)) DEALLOCATE(ParentProcs(n+1)%p) 00357 IF (ASSOCIATED(ChildProcs(n)%p)) DEALLOCATE(ChildProcs(n)%p) 00358 IF (pp > 0) THEN 00359 ALLOCATE(ParentProcs(n+1)%p(pp)) 00360 ParentProcs(n+1)%p=parentproc(1:pp) 00361 END IF 00362 IF (cp > 0) THEN 00363 ALLOCATE(ChildProcs(n)%p(cp)) 00364 ChildProcs(n)%p=childproc(1:cp) 00365 END IF 00366 00367 00368 DEALLOCATE(ChildLoad, NewLoad, WorkLoadsByProcessor, parentproc, childproc) 00369 IF (ALLOCATED(steps_remaining)) DEALLOCATE(steps_remaining) 00370 00371 00372 00373 ! Now we know how much workload we would like to assign to each child processor as well as the maximum we can assign before splitting it is time to distribute the workload. 00374 ! We can either do an ordered distribution where processor assignments follow a strict hilbert ordering 00375 ! Or we can do a local knapsack type algorithm 00376 00377 ! We have a list of nodes - and their children to distribute 00378 ! We have a list of available childprocs 00379 ! and we have a list of desired work load allocations (childtimes) as well as a maximum work load allocation (maxchildtimes) 00380 00381 00382 LocalCost=0d0 !Recalculate local costs for split children 00383 00384 IF (lKnapSack) THEN 00385 ! Assign heaviest child nodes first to best matching bin (one with the desired workload closest to actual and with a maximum work load that is greater) 00386 ! If none is available then we will have to split grid into at least two pieces... 00387 ! So then check for best matcing pairs that will accomplish the same 00388 ! Then triplets... and so on.... 00389 ! Then split grids based on desired weights 00390 ! Then move to next smallest grid and repeat... 00391 00392 write(*,*) 'Local Knapsack algorithm not yet implemented' 00393 00394 ELSE 00395 ! Go through children in order and processors in order assigning children and splitting if a child assignemnt will overload the child processor 00396 00397 ! ALLOCATE(splitweight(MPI_NP), splitproc(MPI_NP), maxsplitweight(MPI_NP)) 00398 nextproc = 1 00399 nodelist=>Nodes(n)%p 00400 00401 DO WHILE (associated(nodelist)) 00402 node=>nodelist%self 00403 children=>node%children !backup current chidllist to later destroy 00404 childlist=>node%children !Child list pointer 00405 NULLIFY(node%children) !Destroy current nodes child list so it can be rebuilt 00406 NULLIFY(node%lastchild) 00407 00408 DO WHILE (ASSOCIATED(childlist)) 00409 child=>childlist%self 00410 ChildCostRemaining=max(GetChildCosts(childlist%self%box%mGlobal, n+1),1e-20) 00411 IF (cp == 0) THEN !Random check that should not be needed... unless profile.data gives negative workloads... 00412 write(*,*) MPI_ID, 'have child but no child processors', childlist%self%box%mGlobal, 'perhaps profile.data has negative entries?' 00413 STOP 00414 END IF 00415 i=nextproc 00416 j=nextproc 00417 DO WHILE (SUM(MaxChildTimes(i:j)) < ChildCostRemaining) 00418 IF (j == cp) EXIT 00419 j=j+1 00420 END DO 00421 IF (lDistVerbose) write(*,*) MPI_ID, ' splitting into ', j-i+1 , ' pieces' 00422 00423 ! Now we know the minimum number of subgrids required 00424 ALLOCATE(splitproc(1:j-i+1)) 00425 ALLOCATE(splitweight(1:j-i+1)) 00426 00427 splitproc=ChildProcs(n)%p(i:j) 00428 00429 IF (SUM(ChildTimes(i:j)) >= ChildCostRemaining) THEN !Now let's use the desired times for i:j-1 and adjust j to match 00430 splitweight(1:j-i)=ChildTimes(i:j-1) 00431 splitweight(j-i+1)=ChildCostRemaining-SUM(ChildTimes(i:j-1)) 00432 nextproc=j !increase i to the next processor with any remaining time 00433 IF (lDistVerbose) write(*,*) MPI_ID, 'shrinking last grid only' 00434 ELSE 00435 00436 !Now we want to adjust the splits so they are closer to the desired values instead of the maximum values 00437 !Each processor has a range from x to X and we know that X+Y+Z > c' 00438 !We want x'+y'+z'=c' where x < x' < X and y < y' < Y and z<z'<Z 00439 !Take x+y+z=c and X+Y+Z=C and calculate alpha = (c'-c)/(C-c) where 0 < alpha < 1 00440 !Then x'=x+alpha*(X-x), y'=y+alpha*(Y-y) and z'=z+alpha*(Z-z) 00441 00442 IF (SUM(MaxChildTimes(i:j)-ChildTimes(i:j)) < 1e-10) THEN 00443 alpha=1d0 00444 ELSE 00445 alpha=(ChildCostRemaining-SUM(ChildTimes(i:j)))/SUM(MaxChildTimes(i:j)-ChildTimes(i:j)) 00446 END IF 00447 IF (lDistVerbose) write(*,*) MPI_ID, 'stretching all grids', alpha 00448 splitweight=ChildTimes(i:j)+(MaxChildTimes(i:j)-ChildTimes(i:j))*alpha 00449 nextproc=min(j+1,cp) 00450 END IF 00451 ChildTimes(i:j)=ChildTimes(i:j)-splitweight 00452 MaxChildTimes(i:j)=MaxChildTimes(i:j)-splitweight 00453 IF (ChildTimes(j) <= 0d0) nextproc=min(j+1,cp) 00454 ! write(*,*) MPI_ID, 'i,j,i-j+1', i, j, j-i+1 00455 j=j-i+1 00456 IF (.NOT. lsplit) THEN ! We can't split child 00457 ALLOCATE(newgrids(3,2,1)) 00458 newgrids(:,:,1)=child%box%mGlobal 00459 splitproc(1)=ChildProcs(n)%p(i-1+sum(maxloc(splitweight(1:j)))) !Assign child to childproc with the maximum availability 00460 j=1 00461 ! write(*,*) MPI_ID, 'not splitting', splitweight(1:j), splitproc(1) 00462 ELSE 00463 ALLOCATE(newgrids(3,2,j)) ! Calculate new set of smaller grids that have the correct weights 00464 ! write(*,*) 'splitting', child%box%mGlobal 00465 DO 00466 newgrids=0 00467 IF (lDistVerbose) write(*,*) MPI_ID, ' split weights ', splitweight(1:j) 00468 CALL HilbertSplit(leveldown(child%box%mGlobal, n+1), splitweight(1:j), newgrids, (/1,j/), n) 00469 ! write(*,'(4I6)') newgrids(1:2,1:2,1:j) 00470 l=j 00471 m=1 00472 DO WHILE (m <= l) 00473 IF (newgrids(1,1,m) == 0) THEN 00474 ! write(*,*) MPI_ID, ' removing ', m, j, l 00475 l=l-1 00476 IF (l == 0) THEN 00477 write(*,*) 'huh' 00478 STOP 00479 END IF 00480 newgrids(:,:,m:l)=newgrids(:,:,m+1:l+1) 00481 splitweight(m:l)=splitweight(m+1:l+1) 00482 splitproc(m:l)=splitproc(m+1:l+1) 00483 m=m-1 00484 END IF 00485 m=m+1 00486 END DO 00487 IF (l == j) EXIT !No grids to remove from splitting 00488 ! write(*,*) MPI_ID, 'reducing number of child grids to ', l 00489 j=l !reduce number of grids to split 00490 END DO 00491 DO m=1,j 00492 newgrids(:,:,m)=levelup(newgrids(:,:,m), n) 00493 END DO 00494 END IF 00495 00496 DO l=1,j 00497 !Do some sanity checks 00498 IF (ALL(newgrids(:,:,l)==0)) CYCLE !Unable to split into the desired number of pieces so remove unused procs and adjust weights 00499 00500 !Just checking that hilbert split didn't return any illegal new grids 00501 IF (ANY(newgrids(:,1,l) < child%box%mGlobal(:,1)) .OR. ANY(newgrids(:,2,l) > child%box%mGlobal(:,2))) THEN 00502 write(*,*) l, newgrids(:,:,l), child%box%mGlobal 00503 write(*,*) "error in distribution", lsplit 00504 write(*,'(A,6I4)') "childgrid=", child%box%mGlobal 00505 write(*,'(A,6I4)') "splitgrid=", newgrids(:,:,l) 00506 STOP 00507 END IF 00508 IF (ANY(newgrids(1:nDim,1,l) > newgrids(1:nDim,2,l))) THEN 00509 write(*,*) 'illegal grid' 00510 write(*,*) newgrids(:,:,l) 00511 STOP 00512 END IF 00513 if (splitproc(l) < 0 .OR. splitproc(l) > MPI_NP-1) THEN 00514 write(*,*) 'illegal target processor', splitproc(l) 00515 STOP 00516 END if 00517 00518 !Create child 00519 NULLIFY(child_box) 00520 CALL CreateNodeBox(newgrids(:,:,l), child_box, splitproc(l)) 00521 IF (lDistVerbose) write(*,*) MPI_ID, ' creating child ', child_box 00522 LocalCost=LocalCost+max(GetChildCosts(child_box%mGlobal,n+1), 1e-20) 00523 NULLIFY(newchild) 00524 CALL AddNode(n+1, child_box, newchild) 00525 CALL AddParent(newchild, node) 00526 CALL AddChild(node, newchild) 00527 CALL DestroyNodeBox(child_box) 00528 00529 END DO 00530 00531 DEALLOCATE(newgrids) 00532 DEALLOCATE(splitproc, splitweight) 00533 childlist=>childlist%next 00534 END DO 00535 00536 00537 00538 CALL DestroyNodeList(children) !Destroy old childlist 00539 00540 00541 !Setup childgrids list like newsubgrids 00542 IF (n >= 0) THEN 00543 nchildren=NodeCount(node%children) 00544 IF (nchildren > 0) THEN 00545 ALLOCATE(childgrids(3,2,nChildren)) 00546 children=>node%children 00547 j=0 00548 DO WHILE (ASSOCIATED(children)) 00549 j=j+1 00550 childgrids(:,:,j)=children%self%box%mGlobal 00551 children=>children%next 00552 END DO 00553 ELSE 00554 NULLIFY(childgrids) 00555 END IF 00556 CALL AllocChildFixups(node%info, childgrids) 00557 IF (ASSOCIATED(childgrids)) DEALLOCATE(childgrids) 00558 END IF 00559 nodelist=>nodelist%next 00560 END DO 00561 00562 ! IF (lSplit) THEN 00563 ! CALL MPI_ALLREDUCE(LocalCost, TotalLocalCost, 1, MPI_DOUBLE_PRECISION, MPI_SUM, levels(n)%MPI_COMM, ierr) 00564 00565 ! Since MeanWorkLoadByLevel(n+1) = TotalLocalCostEst*splitfactor(n)/MPI_NP 00566 ! And we want splitfactor(n)=TotalLocalCost/TotalLocalCostEst 00567 ! splitfactor(n) = splitfactor(n)*(TotalLocalCost/REAL(MPI_NP))/MeanWorkLoadByLevel(n+1) 00568 ! IF (MPI_ID == 0) write(*,*) 'Splitfactor for level ', n, ' = ', splitfactor(n) 00569 ! Now we can correct the workload 00570 ! MeanWorkLoadByLevel(n+1)=TotalLocalCost/MPI_NP 00571 ! END IF 00572 00573 00574 END IF 00575 00576 DEALLOCATE(childtimes, maxchildtimes) 00577 00578 CALL StopTimer(iDistributeChildrens, n) 00579 END SUBROUTINE DistributeChildrens 00580 00581 00585 FUNCTION HilbertValue(mGlobal, level) 00586 REAL(KIND=qPREC) :: HilbertValue 00587 INTEGER :: i,j,k,n, level,tempi,tempj,tempk 00588 INTEGER, DIMENSION(:,:) :: mGlobal 00589 INTEGER :: indices(3,2), sum_mGlobal(3) 00590 INTEGER :: h 00591 HilbertValue=0 00592 n=Rootn+max(level,0) 00593 sum_mGlobal(1:nDim)=sum(mGlobal(1:nDim,:),2) 00594 DO i=1,nDim 00595 IF (modulo(sum_mGlobal(i),2) == 0) THEN 00596 indices(i,1)=floor(half*sum_mGlobal(i)) 00597 indices(i,2)=indices(i,1)+1 00598 ELSE 00599 indices(i,:)=sum_mGlobal(i)/2 00600 END IF 00601 END DO 00602 IF (nDim == 2) THEN 00603 DO i=indices(1,1), indices(1,2) !, ncells(1) 00604 DO j=indices(2,1), indices(2,2) !, ncells(1) 00605 tempi=(i-1)*di 00606 tempj=(j-1)*dj 00607 CALL closedformhilbert2d(n,tempi,tempj,h) 00608 HilbertValue=HilbertValue+h 00609 END DO 00610 END DO 00611 ELSEIF (nDim == 3) THEN 00612 DO i=indices(1,1), indices(1,2) !, ncells(1) 00613 DO j=indices(2,1), indices(2,2) !, ncells(1) 00614 DO k=indices(3,1), indices(3,2) 00615 tempi=(i-1)*di 00616 tempj=(j-1)*dj 00617 tempk=(k-1)*dk 00618 CALL closedformhilbert3d(n,tempi,tempj,tempk,h) 00619 HilbertValue=HilbertValue+h 00620 END DO 00621 END DO 00622 END DO 00623 END IF 00624 HilbertValue=HilbertValue/REAL(product(indices(1:nDim,2)-indices(1:nDim,1)+1)) 00625 END FUNCTION HilbertValue 00626 00630 SUBROUTINE HilbertSort(childgrids,level) 00631 INTEGER, DIMENSION(:,:,:), POINTER, INTENT(INOUT) :: childgrids 00632 INTEGER, DIMENSION(:,:,:), POINTER :: sortedchildgrids 00633 REAL(KIND=qPREC), DIMENSION(:), POINTER :: childgridhilbertvalues 00634 INTEGER, INTENT(IN) :: level 00635 INTEGER :: n,i,index 00636 n=size(childgrids,3) 00637 IF (n == 1) RETURN 00638 ! write(*,*) "HilbertSort", n 00639 ALLOCATE(sortedchildgrids(3,2,n), childgridhilbertvalues(n)) 00640 DO i=1,n 00641 IF (ALL(childgrids(:,:,i) == 0)) THEN 00642 childgridhilbertvalues(i)=-1 00643 ELSE 00644 childgridhilbertvalues(i)=HilbertValue(childgrids(:,:,i),level) 00645 END IF 00646 END DO 00647 ! write(*,*) childgridhilbertvalues 00648 DO i=1,n 00649 index=minloc(childgridhilbertvalues,DIM=1) 00650 sortedchildgrids(:,:,i)=childgrids(:,:,index) 00651 childgridhilbertvalues(index)=huge(1.0) 00652 END DO 00653 DEALLOCATE(childgrids) 00654 childgrids=>sortedchildgrids 00655 NULLIFY(sortedchildgrids) 00656 DEALLOCATE(childgridhilbertvalues) 00657 NULLIFY(childgridhilbertvalues) 00658 END SUBROUTINE HilbertSort 00659 00660 00668 RECURSIVE SUBROUTINE HilbertSplit(childgrid, weights, newgrids,indx,level) 00669 INTEGER, DIMENSION(:,:), INTENT(IN) :: childgrid 00670 REAL(KIND=qPREC), DIMENSION(:), INTENT(IN) :: weights 00671 REAL(KIND=qPREC), DIMENSION(:), ALLOCATABLE :: accweight 00672 INTEGER, DIMENSION(:,:,:), POINTER, INTENT(OUT) :: newgrids 00673 REAL(KIND=qPREC) :: leftweight, rightweight, subgridweights(2),totalweight,HilbertDiff(3,2),HilbertLeft,HilbertRight,cost(3,2), effective_cost(3,2) 00674 INTEGER, DIMENSION(3,2) :: leftmGlobal,rightmGlobal,subgridleft,subgridright 00675 INTEGER, DIMENSION(2) :: indx 00676 INTEGER ::ngrids,i1,i,j,splitpoint(3,2),level, loc(2) 00677 ! IF (SIZE(weights) == 0) THEN 00678 ! write(*,*) 'weights are zero sized' 00679 ! STOP 00680 ! END IF 00681 IF (any(weights==0)) THEN 00682 write(*,*) "weights are zero", weights, MPI_ID 00683 STOP 00684 END IF 00685 ngrids=indx(2)-indx(1)+1 00686 ! write(*,'(A,6I4,20E13.2)') "HilbertSplit", childgrid, weights 00687 IF (indx(2) < indx(1)) THEN 00688 write(*,*) "err in HilbertSplit", indx 00689 write(*,*) childgrid 00690 write(*,*) weights 00691 write(*,*) level 00692 STOP 00693 END IF 00694 IF (ANY(childgrid(1:nDim,2) < childgrid(1:nDim,1))) THEN 00695 newgrids(:,:,indx(1):indx(2))=0 00696 RETURN 00697 ELSE 00698 IF (ngrids == 1) THEN 00699 newgrids(:,:,indx(1))=childgrid 00700 ELSE 00701 00702 totalweight=sum(weights(indx(1):indx(2))) 00703 IF (ngrids==2) THEN !get processor split point 00704 i1=indx(1) 00705 leftweight=weights(indx(1))/totalweight 00706 ELSE 00707 ALLOCATE(accweight(indx(1):indx(2))) 00708 00709 totalweight=sum(weights(indx(1):indx(2))) 00710 accweight(indx(1))=weights(indx(1)) 00711 DO i=indx(1)+1,indx(2) 00712 accweight(i)=accweight(i-1)+weights(i) 00713 END DO 00714 i1=indx(1)-1+sum(minloc(abs(accweight/totalweight-half))) !gets split point that is closest to half 00715 leftweight=accweight(i1)/totalweight !left weight 00716 DEALLOCATE(accweight) 00717 END IF 00718 rightweight=1d0-leftweight !right weight 00719 00720 !Now we know where to split the processors 00721 !and the relative sizes of the left and right grids 00722 !Now we just need to decide along which direction to split and which half is the new left and the new right 00723 00724 leftmGlobal=childgrid 00725 rightmGlobal=childgrid 00726 subgridweights=(/leftweight,rightweight/) 00727 00728 DO i=1,nDim 00729 DO j=1,2 00730 splitpoint(i,j)=childgrid(i,1)-1+nint((childgrid(i,2)-childgrid(i,1)+1)*subgridweights(j)) 00731 IF (splitpoint(i,j) >= childgrid(i,1)-1+MinimumGridPoints .AND. splitpoint(i,j) <= childgrid(i,2)-MinimumGridPoints) THEN 00732 leftmGlobal(i,2)=splitpoint(i,j) 00733 rightmGlobal(i,1)=splitpoint(i,j)+1 00734 HilbertLeft=HilbertValue(leftmGlobal,level) 00735 HilbertRight=HilbertValue(rightmGlobal,level) 00736 HilbertDiff(i,j)=(-1)**(j-1)*(HilbertRight-HilbertLeft) 00737 00738 ELSE IF (childgrid(i,2)-childgrid(i,1)+1 < 2*MinimumGridPoints) THEN !Can't split 00739 splitpoint(i,j)=childgrid(i,1)-1+(childgrid(i,2)-childgrid(i,1)+1)*nint(subgridweights(j)) 00740 leftmGlobal(i,2)=splitpoint(i,j) 00741 rightmGlobal(i,1)=splitpoint(i,j)+1 00742 HilbertDiff(i,j)=0 !We don't want to favor the additional load imbalancing 00743 00744 ELSE !Need to round splitpoint to multiple of MinimumGridPoints 00745 IF (splitpoint(i,j) < childgrid(i,1)-1+MinimumGridPoints) THEN 00746 splitpoint(i,j)=childgrid(i,1)-1+MinimumGridPoints*min(nint((childgrid(i,2)-childgrid(i,1)+1) * & 00747 subgridweights(j)/MinimumGridPoints),1) 00748 00749 ELSE !splitpoint > childgrid(i,2)-MinimumGridPoints 00750 splitpoint(i,j)=childgrid(i,2)-MinimumGridPoints*min(nint((childgrid(i,2)-childgrid(i,1)+1) * & 00751 subgridweights(3-j)/MinimumGridPoints),1) 00752 END IF 00753 leftmGlobal(i,2)=splitpoint(i,j) 00754 rightmGlobal(i,1)=splitpoint(i,j)+1 00755 HilbertLeft=HilbertValue(leftmGlobal,level) 00756 HilbertRight=HilbertValue(rightmGlobal,level) 00757 HilbertDiff(i,j)=(-1)**(j-1)*(HilbertRight-HilbertLeft) 00758 END IF 00759 00760 00761 IF (ngrids == 2) THEN 00762 IF (j==1) THEN 00763 cost(i,j)=max(ChildAdvanceCost(leftmGlobal, level)/leftweight, ChildAdvanceCost(rightmGlobal,level)/rightweight) 00764 ELSE 00765 cost(i,j)=max(ChildAdvanceCost(leftmGlobal, level)/rightweight, ChildAdvanceCost(rightmGlobal, level)/leftweight) 00766 END IF 00767 ELSE 00768 IF (j == 1) THEN 00769 cost(i,j)=max(ChildAdvanceCost(nint((leftmGlobal)/REAL(i1-indx(1)+1)**(1d0/nDim)),level)/leftweight, ChildAdvanceCost(nint((rightmGlobal)/REAL(indx(2)-i1)**(1d0/nDim)),level)/rightweight) 00770 ELSE 00771 cost(i,j)=max(ChildAdvanceCost(nint((rightmGlobal)/REAL(i1-indx(1)+1)**(1d0/nDim)),level)/rightweight, ChildAdvanceCost(nint((leftmGlobal)/REAL(indx(2)-i1)**(1d0/nDim)),level)/leftweight) 00772 END IF 00773 END IF 00774 00775 00776 END DO 00777 leftmGlobal(i,2)=childgrid(i,2) 00778 rightmGlobal(i,1)=childgrid(i,1) 00779 00780 END DO 00781 00782 00783 !Rescale HilbertDiffs to go from 0 to +- 1 00784 HilbertDiff(1:nDim,:)=HilbertDiff(1:nDim,:) / max(maxval(abs(HilbertDiff(1:nDim,:))), 1d-10) 00785 cost(1:nDim,:)=cost(1:nDim,:)/max(minval(cost(1:nDim,:)),1d-10) 00786 effective_cost(1:nDim,:)=cost(1:nDim,:)-.00001*HilbertDiff(1:nDim,:) 00787 00788 ! write(*,'(A,6E15.6)') 'effective_cost=', effective_cost 00789 ! write(*,'(A,6E15.6)') 'cost =', cost 00790 ! write(*,'(A,6I15)') 'splitpoints=', splitpoint 00791 00792 loc=minloc(effective_cost(1:nDim,:)) 00793 i=loc(1) 00794 j=loc(2) 00795 00796 IF (splitpoint(i,j) < childgrid(i,1)-1 .OR. splitpoint(i,j) > childgrid(i,2)) THEN 00797 write(*,*) 'illegal split point', splitpoint, effective_cost, i, j, childgrid 00798 STOP 00799 END IF 00800 00801 leftmGlobal(i,2)=splitpoint(i,j) 00802 rightmGlobal(i,1)=splitpoint(i,j)+1 00803 00804 IF (j==1) THEN 00805 subgridleft=leftmGlobal 00806 subgridright=rightmGlobal 00807 ELSE 00808 subgridleft=rightmGlobal 00809 subgridright=leftmGlobal 00810 END IF 00811 ! write(*,'(A,6I4,A,6I4,A,6I4,A,F12.2)') "Split ", childgrid, " into ",subgridleft, " and ", subgridright, "with HDiff=", HilbertDiff(i,j) 00812 00813 ! write(*,*) indx(1),i1,indx(2) 00814 IF (ANY(subgridleft(1:ndim,2) < subgridleft(1:nDim,1))) THEN 00815 00816 IF (leftweight == rightweight) THEN 00817 CALL HilbertSplit(subgridright,weights,newgrids,(/indx(1),i1/),level) 00818 CALL HilbertSplit(subgridleft,weights,newgrids,(/i1+1,indx(2)/),level) 00819 ELSE 00820 CALL HilbertSplit(subgridright,weights,newgrids,(/i1+1,indx(2)/),level) 00821 CALL HilbertSplit(subgridleft,weights,newgrids,(/indx(1),i1/),level) 00822 END IF 00823 ELSE 00824 CALL HilbertSplit(subgridright,weights,newgrids,(/i1+1,indx(2)/),level) 00825 CALL HilbertSplit(subgridleft,weights,newgrids,(/indx(1),i1/),level) 00826 END IF 00827 END IF 00828 END IF 00829 END SUBROUTINE HilbertSplit 00830 00831 00832 00835 SUBROUTINE ClearParentProcs(level) 00836 INTEGER :: level 00837 RETURN 00838 IF (ASSOCIATED(ParentProcs(level)%p)) THEN 00839 DEALLOCATE(ParentProcs(level)%p) 00840 NULLIFY(ParentProcs(level)%p) 00841 END IF 00842 00843 END SUBROUTINE ClearParentProcs 00844 00845 END MODULE DistributionControl