Scrambler  1
distribution_control.f90
Go to the documentation of this file.
00001 !#########################################################################
00002 !               
00003 !    Copyright (C) 2003-2012 Department of Physics and Astronomy,
00004 !                            University of Rochester,
00005 !                            Rochester, NY
00006 !
00007 !    distribution_control.f90 is part of AstroBEAR.
00008 !
00009 !    AstroBEAR is free software: you can redistribute it and/or modify    
00010 !    it under the terms of the GNU General Public License as published by 
00011 !    the Free Software Foundation, either version 3 of the License, or    
00012 !    (at your option) any later version.
00013 !
00014 !    AstroBEAR is distributed in the hope that it will be useful, 
00015 !    but WITHOUT ANY WARRANTY; without even the implied warranty of
00016 !    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
00017 !    GNU General Public License for more details.
00018 !
00019 !    You should have received a copy of the GNU General Public License
00020 !    along with AstroBEAR.  If not, see <http://www.gnu.org/licenses/>.
00021 !
00022 !#########################################################################
00023 MODULE DistributionControl
00024    USE GlobalDeclarations
00025    USE DataDeclarations
00026    USE TreeDeclarations
00027    USE DistributionDeclarations
00028    USE Scheduling
00029    USE DataInfoOps
00030    USE Timing
00031    IMPLICIT NONE
00032 
00033 
00034    REAL(KIND=qPREC), PARAMETER :: Safety_Fact=.9d0
00035    REAL(KIND=qPREC) :: TotalNewWorkLoad, CoarserWorkLoad
00036    REAL(KIND=qPREC) :: MeanWorkLoadByLevel(-1:MaxDepth)=0
00037    REAL(KIND=qPREC) :: SplitFactor(-2:MaxDepth)=1d0
00038    LOGICAL :: lDistVerbose=.false.
00039    
00040 CONTAINS
00041 
00042 
00043    SUBROUTINE DistributionInit
00044       INTEGER :: temp, n
00045 !      ALLOCATE(MyRemainingWorkLoad(-2:MaxLevel))
00046       ALLOCATE(ParentProcs(-1:MaxLevel))
00047       ALLOCATE(ChildProcs(-2:MaxLevel-1))      
00048 !      MyRemainingWorkLoad=0
00049       levels(-2)%MPI_COMM=MPI_COMM_WORLD
00050       DO n = -1, MaxLevel
00051          NULLIFY(ParentProcs(n)%p)
00052          NULLIFY(ChildProcs(n-1)%p)
00053       END DO
00054       IF (MPI_ID /= 0) THEN
00055          ALLOCATE(ParentProcs(-1)%p(1))
00056          ParentProcs(-1)%p=0
00057       END IF
00058       temp=1      
00059       DO Rootn=1,1000
00060          temp=temp*2
00061          if (temp >= maxval(levels(0)%mX)) exit
00062       END DO
00063       di=REAL(2**Rootn)/REAL(levels(0)%mX(1))
00064       dj=REAL(2**Rootn)/REAL(levels(0)%mX(2))
00065       dk=REAL(2**Rootn)/REAL(levels(0)%mX(3))
00066 
00067    END SUBROUTINE DistributionInit
00068 
00070    SUBROUTINE GetLevelLoad(n)
00071       INTEGER :: n,i
00072       TYPE(NodeDefList), POINTER :: nodelist
00073       TYPE(NodeDef), POINTER :: node
00074       WorkLoadByLevelPerStep(n,:)=0
00075       nodelist=>Nodes(n)%p      
00076       IF (n < 0) RETURN
00077       
00078       DO WHILE (associated(nodelist))
00079          node=>nodelist%self        
00080          DO i=1, levels(n)%steps
00081             node%info%CostPerGrid(i) = GetMyCosts(node%info,i)
00082          END DO
00083          WorkLoadByLevelPerStep(n,1:levels(n)%steps)=WorkLoadByLevelPerStep(n,1:levels(n)%steps)+node%info%costperGrid(1:levels(n)%steps)
00084          nodelist=>nodelist%next
00085       END DO
00086       MeanWorkLoadByLevelPerStep(n)=sum(WorkLoadByLevelPerStep(n,1:levels(n)%steps))/real(levels(n)%steps)
00087       
00088    END SUBROUTINE GetLevelLoad
00089 
00090    SUBROUTINE ClearCompletedWorkLoads(n)
00091       INTEGER :: n
00092       WorkDoneByLevel(n)=0d0
00093    END SUBROUTINE ClearCompletedWorkLoads
00094 
00095 
00096    SUBROUTINE SortNodes(n)
00097       INTEGER :: n, nNodes, i
00098       INTEGER, DIMENSION(:,:,:), POINTER :: nodegrids
00099       TYPE(NodeDefList), POINTER :: SortedNodes, LastSortedNode, temp, nodelist
00100       nNodes=NodeCount(Nodes(n)%p)
00101       IF (nNodes <= 1) RETURN
00102       NULLIFY(SortedNodes, LastSortedNode)
00103       ALLOCATE(nodegrids(3,2,nNodes))
00104       nodelist=>Nodes(n)%p      
00105       i=0
00106       DO WHILE (associated(nodelist))
00107          i=i+1
00108          nodegrids(:,:,i)=Nodelist%self%box%mGlobal
00109          nodelist=>nodelist%next
00110       END DO
00111       CALL HilbertSort(nodegrids, n)
00112       nodelist=>Nodes(n)%p
00113       i=1
00114 
00115       DO WHILE (i <= nNodes)
00116          IF (ALL(nodegrids(:,:,i) == nodelist%self%box%mGlobal)) THEN
00117             CALL AddNodeToList(nodelist%self, LastSortedNode, SortedNodes)
00118             i=i+1
00119          END IF
00120          nodelist=>nodelist%next
00121          IF (.NOT. (ASSOCIATED(nodelist))) nodelist=>Nodes(n)%p
00122       END DO
00123       temp=>Nodes(n)%p
00124       Nodes(n)%p=>SortedNodes
00125       LastLocalNode(n)%p=>LastSortedNode
00126       CALL ClearNodeList(temp)      
00127 
00128       DEALLOCATE(nodegrids)
00129    END SUBROUTINE SortNodes
00130 
00131 
00134    SUBROUTINE DistributeChildrens(n, lsplitOpt)
00135       INTEGER :: n
00136       TYPE(NodeDefList), POINTER :: nodelist, children, childlist
00137       TYPE(NodeDef), POINTER :: node, child, newchild
00138       INTEGER :: cp, pp, i, iErr, j, l, nChildren, m, test,k, childsteps_remaining, nextproc
00139       INTEGER, DIMENSION(:), ALLOCATABLE :: parentproc, childproc
00140       REAL(KIND=qPREC), DIMENSION(:), ALLOCATABLE :: childtimes, ChildLoad, newload, maxchildtimes
00141       REAL(KIND=qPREC), DIMENSION(:,:), ALLOCATABLE :: workloadsbyprocessor, parentoverlaps, childoverlaps
00142       REAL(KIND=qPREC) :: MyWorkLoads(3), TotalWOrkLoad, MeanWorkLoad, LocalCost, childoverlap, parentoverlap, ChildCostRemaining, temp, TotalLocalCost
00143       LOGICAL, OPTIONAL :: lsplitOpt
00144       LOGICAL :: lsplit
00145       REAL(KIND=qPREC), DIMENSION(:), POINTER :: splitweight, maxsplitweight
00146       INTEGER, DIMENSION(:), POINTER :: splitproc
00147       INTEGER, DIMENSION(:,:,:), POINTER :: newgrids, childgrids, oldgrids
00148       TYPE(NodeBox), POINTER :: child_box
00149       INTEGER, DIMENSION(:), ALLOCATABLE :: req
00150       INTEGER, DIMENSION(:), ALLOCATABLE :: steps_remaining
00151       INTEGER, DIMENSION(MPI_STATUS_SIZE) :: status
00152       REAL(KIND=qPREC) :: TotalExcessPerProc, DesiredExcessPerProc, SplitExcessPerProc, alpha
00153       CALL StartTimer(iDistributeChildrens, n)
00154 
00155 
00156       !First get local cost of new children
00157 
00158       nodelist=>Nodes(n)%p     
00159       localcost=0
00160 
00161       DO WHILE (associated(nodelist))
00162          node=>nodelist%self
00163          children=>node%children
00164          DO WHILE (ASSOCIATED(children))
00165             LocalCost=LocalCost+max(GetChildCosts(children%self%box%mGlobal,n+1), 1e-20) !product(children%self%box%mGlobal(:,2)-children%self%box%mGlobal(:,1)+1)*levels(n+1)%steps
00166             children=>children%next
00167          END DO
00168          nodelist=>nodelist%next
00169       END DO
00170 
00171 
00172       !Take care of single processor stuff before returning
00173       IF (MPI_NP == 1 .AND. n <= FinestLevel) THEN
00174          IF (LocalCost > 0) THEN
00175             FinestLevel=n+1
00176          ELSE
00177             FinestLevel=min(n,FinestLevel)
00178          END IF
00179          RestartStep=lRequestRestart
00180          CALL StopTimer(iDistributeChildrens, n)
00181          RETURN
00182       END IF
00183 
00184 
00185       !Determine whether or not we are splitting
00186       IF (Present(lsplitopt)) THEN !On restarts we don't split grids to better distribute until the data has been read in
00187          lsplit=lsplitOpt
00188       ELSE IF (n == -2) THEN !Never split level -2
00189          lsplit=.false.
00190       ELSE
00191          lsplit=.true. !Default behavior is to split - based on min and max values
00192       END IF
00193 
00194 
00195       !Allocate space for 3 quantities for each processor.  (Coarser Level Work Remaining, New Child Loads, RestartFlag)
00196       ALLOCATE(WorkLoadsByProcessor(3,MPI_NP))
00197 
00198       !Calculate Coarser level work remaining per level n step
00199       IF (iThreaded == NON_THREADED .OR. n < -1) THEN ! .OR. n < 0) THEN
00200          MyWorkLoads(1) = 0d0 !Shouldn't have any coarser work left
00201       ELSE         
00202          ! Calculate steps remaining on each level
00203          ALLOCATE(steps_remaining(-1:MaxLevel))
00204          steps_remaining=0
00205          DO i=-1, MaxLevel
00206             steps_remaining(i) = sum((levels(-1:i)%steps-levels(-1:i)%step) * 2**(/((i-j),j=-1,i)/))+1
00207             !            write(*,*) 'levels%steps', levels(-1:MaxLevel)%step
00208             !            IF (MPI_ID == 0) write(*,*) i, steps_remaining(i), (levels(0:i)%steps-levels(0:i)%step), 2**(/((i-j),j=0,i)/)
00209          END DO
00210 
00211          ! Determine projected coarser work that needs to be done per level n step
00212          MyWorkLoads(1) = SUM(MeanWorkLoadByLevelPerStep(-1:n)*steps_remaining(-1:n)-WorkDoneByLevel(-1:n)) / steps_remaining(n)
00213 
00214 
00215          IF (lDistVerbose) THEN
00216             DO i=0, MPI_NP
00217                IF (MPI_ID == i) THEN
00218                   write(*,*) '**********************************************************'
00219                   write(*,'(A,I4,A,E13.5,A,E13.5)') 'Processor ', MPI_ID, ' has a coarser work load of ', MyWorkLoads(1), ' and a local child cost of ', LocalCost
00220                   write(*,'(A50, 10E13.5)') 'WorkLoadByLevelPerStep', MeanWorkLoadByLevelPerStep
00221                   write(*,'(A50, 10I4)') 'Steps Remaining', steps_remaining(-1:MaxLevel)
00222                   write(*,'(A50, 10E13.5)') 'Work Done By Level', WorkDoneByLevel(-1:n)
00223                   write(*,'(A50, 10E13.5)') 'Remaining projected work load by level', MeanWorkLoadByLevelPerStep(-1:n)*steps_remaining(-1:n) - WorkDoneByLevel(-1:n)
00224                   write(*,'(A50, 10E13.5)') 'Remaining projected work load to balance this step', (MeanWorkLoadByLevelPerStep(-1:n)*steps_remaining(-1:n) - WorkDoneByLevel(-1:n))/steps_remaining(n)
00225 
00226                END IF
00227                CALL MPI_BARRIER(MPI_COMM_WORLD, iERr)
00228             END DO
00229          END IF
00230 
00231       END IF
00232       MyWorkLoads(2) = LocalCost 
00233       MyWorkLoads(3)=merge(1d0,0d0,lRequestRestart)
00234       CALL StartTimer(iBarrier, n)
00235       CALL MPI_ALLGather(MyWorkLoads, 3, MPI_DOUBLE_PRECISION, WorkLoadsByProcessor, 3, MPI_DOUBLE_PRECISION, levels(n)%MPI_COMM, iErr)
00236       CALL StopTimer(iBarrier, n)
00237 
00238       ! Adjust Finest level depending on whether or not any processor created children
00239       FinestLevel=merge(n+1, min(n, FinestLevel), SUM(WorkLoadsByProcessor(2,:)) > 0 .AND. n <= FinestLevel)
00240 
00241       ! Determine if any processor has requested restarts
00242       RestartStep=ANY(WorkLoadsByProcessor(3,:) == 1d0)
00243 
00244       !Calculate total new work load, total coarser work load, and update mean workloadbylevel for child level
00245       TotalNewWorkLoad=SUM(WorkLoadsByProcessor(2,:))
00246       CoarserWorkLoad=SUM(WorkLoadsByProcessor(1,:))
00247 
00248       MeanWorkLoadByLevel(n+1)=TotalNewWorkLoad/REAL(MPI_NP)*splitfactor(n) !not very accurate as it does not account for splitting
00249 
00250 
00251       !now we have \eta_l^p and c_l^p and we want to balance \eta_l^p+childsteps_remaining*c_l^p
00252 
00253       !Calculate total new projected remaining workload for child level and all coarser levels and the average
00254       TotalWorkLoad=CoarserWorkLoad+TotalNewWorkLoad 
00255       MeanWorkLoad=TotalWorkLoad/REAL(MPI_NP, 8)
00256 
00257       IF (iThreaded == NON_THREADED .OR. n < -1) THEN !Don't use finer level excesses to adjust splitting since approach is non threaded
00258          TotalExcessPerProc=0d0
00259       ELSE
00260          TotalExcessPerProc=sum(MeanWorkLoadByLevel(n+2:MaxLevel)*steps_remaining(n+1:MaxLevel-1))/steps_remaining(n)  !How much finer work can we assume will be available to balance
00261       END IF
00262 
00263 
00264       DesiredExcessPerProc=LevelBalance(1)*TotalExcessPerProc !Adjust split points based on degree of global breadth first approach
00265       SplitExcessPerProc=(LevelBalance(2)-LevelBalance(1))*TotalExcessPerProc ! Determine excess per processor allowed above the split point
00266 
00267       ! Calculate partition points for new load
00268       ALLOCATE(ChildLoad(0:MPI_NP), NewLoad(0:MPI_NP))
00269 
00270 
00271 
00272       IF (LevelBalance(2) == 0d0 .OR. iThreaded == NON_THREADED .OR. n < -1) THEN
00273          ChildLoad(0)=0
00274          NewLoad(0) = 0 
00275          DO i=1, MPI_NP
00276             NewLoad(i)=NewLoad(i-1)+MeanWorkLoad-WorkLoadsByProcessor(1,i)  !Split points for omega^{n+1}
00277             ChildLoad(i)=ChildLoad(i-1)+WorkLoadsByProcessor(2,i)       !Split points for c_p
00278          END DO
00279       ELSE
00280          ChildLoad(0)=0
00281          NewLoad(0) = 0 
00282          temp=0
00283          DO i=1, MPI_NP
00284             NewLoad(i)=NewLoad(i-1)+max(MeanWorkLoad-WorkLoadsByProcessor(1,i)+DesiredExcessPerProc,0d0)     !Split points for omega^{n+1}
00285             temp=temp+MeanWorkLoad-WorkLoadsByProcessor(1,i)+DesiredExcessPerProc     !Split points for omega^{n+1}
00286             ChildLoad(i)=ChildLoad(i-1)+WorkLoadsByProcessor(2,i)      !Split points for c_p
00287          END DO
00288          NewLoad=NewLoad*temp/NewLoad(MPI_NP)
00289 
00290          IF (lDistVerbose .AND. MPI_ID == 0) THEN
00291             write(*,*) 'loads for level', n
00292             write(*,'(A,10E15.6)') 'Coarser Work Loads  = ',  WorkLoadsByProcessor(1,:)
00293             write(*,'(A,10E15.6)') 'Predicted Finer Work Loads  = ',  sum(MeanWorkLoadByLevel(n+2:MaxLevel)*steps_remaining(n+1:MaxLevel-1))/steps_remaining(n)
00294             write(*,'(A,10E15.6)') 'New Work Loads = ', WorkLoadsByProcessor(2,:)
00295             write(*,'(A,10E15.6)') 'New Load Partitions = ', NewLoad(:)
00296             write(*,'(A,10E15.6)') 'Current Load Partitions = ', ChildLoad(:)
00297          END IF
00298       END IF
00299 
00300 
00301       ALLOCATE(parentproc(MPI_NP), childproc(MPI_NP), childtimes(MPI_NP), maxchildtimes(MPI_NP))
00302 
00303       childproc=-1
00304       childtimes=0d0
00305       maxchildtimes=0d0
00306 
00307       !Do partioning and identify child & parent processors
00308       cp = 0
00309       pp = 0
00310       DO i=1, MPI_NP
00311          parentoverlap=min(NewLoad(MPI_ID+1), ChildLoad(i))-max(NewLoad(MPI_ID), ChildLoad(i-1))
00312          childoverlap=min(ChildLoad(MPI_ID+1), NewLoad(i))-max(ChildLoad(MPI_ID), NewLoad(i-1))
00313 
00314          IF (childoverlap > 0) THEN               
00315             cp=cp+1
00316             childproc(cp)=i-1
00317             childtimes(cp)=childoverlap
00318             maxchildtimes(cp)=childoverlap*(1d0+SplitExcessPerProc/(NewLoad(i)-NewLoad(i-1))) !Distribute excess per proc in proportion to the relative work load desired to be assigned to that child proc
00319          END IF
00320          IF (parentoverlap > 0) THEN
00321             pp=pp+1
00322             parentproc(pp)=i-1
00323          END IF
00324       END DO
00325 
00326 
00327 
00328       IF (lDistVerbose) THEN
00329          DO i=0, MPI_NP
00330             IF (MPI_ID == i) THEN
00331                write(*,*) 'parent procs for processor ', MPI_ID, ' = ', parentproc(1:pp)
00332                write(*,*) 'child procs and times for processor ', MPI_ID, ' = ', childproc(1:cp), childtimes(1:cp)
00333             END IF
00334             CALL MPI_BARRIER(MPI_COMM_WORLD, iErr)
00335          END DO
00336 
00337          IF (MPI_ID == 0) THEN
00338             write(*,'(A,8E13.2)') 'New Load =', NewLoad
00339             write(*,'(A,8E13.2)') 'Current Load =', ChildLoad
00340             ALLOCATE(parentoverlaps(MPI_NP, MPI_NP), childoverlaps(MPI_NP, MPI_NP))
00341             DO i=1, MPI_NP
00342                DO j=1, MPI_NP
00343                   parentoverlaps(i,j)=min(NewLoad(j), ChildLoad(i))-max(NewLoad(j-1), ChildLoad(i-1))
00344                   childoverlaps(i,j)=min(ChildLoad(j), NewLoad(i))-max(ChildLoad(j-1), NewLoad(i-1))
00345                END DO
00346             END DO
00347             !            write(*,*) 'parentoverlaps'
00348             !            write(*,'(2E13.2)') parentoverlaps
00349             !            write(*,*) 'childoverlaps'
00350             !            write(*,'(2E13.2)') childoverlaps
00351             DEALLOCATE(parentoverlaps, childoverlaps)
00352          END IF
00353       END IF
00354 
00355       !Rebuild ParentProcs and ChildProcs lists
00356       IF (ASSOCIATED(ParentProcs(n+1)%p)) DEALLOCATE(ParentProcs(n+1)%p)
00357       IF (ASSOCIATED(ChildProcs(n)%p)) DEALLOCATE(ChildProcs(n)%p)
00358       IF (pp > 0) THEN
00359          ALLOCATE(ParentProcs(n+1)%p(pp))
00360          ParentProcs(n+1)%p=parentproc(1:pp)
00361       END IF
00362       IF (cp > 0) THEN
00363          ALLOCATE(ChildProcs(n)%p(cp))
00364          ChildProcs(n)%p=childproc(1:cp)
00365       END IF
00366 
00367 
00368       DEALLOCATE(ChildLoad, NewLoad, WorkLoadsByProcessor, parentproc, childproc)
00369       IF (ALLOCATED(steps_remaining)) DEALLOCATE(steps_remaining)
00370 
00371 
00372 
00373       ! Now we know how much workload we would like to assign to each child processor as well as the maximum we can assign before splitting it is time to distribute the workload.
00374       ! We can either do an ordered distribution where processor assignments follow a strict hilbert ordering
00375       ! Or we can do a local knapsack type algorithm     
00376 
00377       ! We have a list of nodes - and their children to distribute
00378       ! We have a list of available childprocs 
00379       ! and we have a list of desired work load allocations (childtimes) as well as a maximum work load allocation (maxchildtimes)
00380 
00381 
00382       LocalCost=0d0 !Recalculate local costs for split children
00383 
00384       IF (lKnapSack) THEN
00385          ! Assign heaviest child nodes first to best matching bin (one with the desired workload closest to actual and with a maximum work load that is greater)
00386          ! If none is available then we will have to split grid into at least two pieces...
00387          ! So then check for best matcing pairs that will accomplish the same
00388          ! Then triplets... and so on....
00389          ! Then split grids based on desired weights
00390          ! Then move to next smallest grid and repeat...
00391 
00392          write(*,*) 'Local Knapsack algorithm not yet implemented'
00393 
00394       ELSE
00395          ! Go through children in order and processors in order assigning children and splitting if a child assignemnt will overload the child processor
00396 
00397          !         ALLOCATE(splitweight(MPI_NP), splitproc(MPI_NP), maxsplitweight(MPI_NP))   
00398          nextproc = 1
00399          nodelist=>Nodes(n)%p      
00400 
00401          DO WHILE (associated(nodelist))
00402             node=>nodelist%self
00403             children=>node%children             !backup current chidllist to later destroy
00404             childlist=>node%children            !Child list pointer
00405             NULLIFY(node%children)              !Destroy current nodes child list so it can be rebuilt
00406             NULLIFY(node%lastchild)
00407 
00408             DO WHILE (ASSOCIATED(childlist))    
00409                child=>childlist%self
00410                ChildCostRemaining=max(GetChildCosts(childlist%self%box%mGlobal, n+1),1e-20)
00411                IF (cp == 0) THEN !Random check that should not be needed... unless profile.data gives negative workloads...
00412                   write(*,*) MPI_ID, 'have child but no child processors', childlist%self%box%mGlobal, 'perhaps profile.data has negative entries?'
00413                   STOP
00414                END IF
00415                i=nextproc
00416                j=nextproc
00417                DO WHILE (SUM(MaxChildTimes(i:j)) < ChildCostRemaining)
00418                   IF (j == cp) EXIT
00419                   j=j+1
00420                END DO
00421                IF (lDistVerbose) write(*,*) MPI_ID, ' splitting into ', j-i+1 , ' pieces'
00422 
00423                ! Now we know the minimum number of subgrids required
00424                ALLOCATE(splitproc(1:j-i+1))
00425                ALLOCATE(splitweight(1:j-i+1))
00426 
00427                splitproc=ChildProcs(n)%p(i:j)
00428 
00429                IF (SUM(ChildTimes(i:j)) >= ChildCostRemaining) THEN !Now let's use the desired times for i:j-1 and adjust j to match                  
00430                   splitweight(1:j-i)=ChildTimes(i:j-1)
00431                   splitweight(j-i+1)=ChildCostRemaining-SUM(ChildTimes(i:j-1))
00432                   nextproc=j !increase i to the next processor with any remaining time
00433                   IF (lDistVerbose) write(*,*) MPI_ID, 'shrinking last grid only'
00434                ELSE
00435 
00436                   !Now we want to adjust the splits so they are closer to the desired values instead of the maximum values
00437                   !Each processor has a range from x to X and we know that X+Y+Z > c'
00438                   !We want x'+y'+z'=c' where x < x' < X and y < y' < Y and z<z'<Z
00439                   !Take x+y+z=c and X+Y+Z=C and calculate alpha = (c'-c)/(C-c) where 0 < alpha < 1
00440                   !Then x'=x+alpha*(X-x), y'=y+alpha*(Y-y) and z'=z+alpha*(Z-z)
00441 
00442                   IF (SUM(MaxChildTimes(i:j)-ChildTimes(i:j)) < 1e-10) THEN
00443                      alpha=1d0
00444                   ELSE
00445                      alpha=(ChildCostRemaining-SUM(ChildTimes(i:j)))/SUM(MaxChildTimes(i:j)-ChildTimes(i:j))
00446                   END IF
00447                   IF (lDistVerbose) write(*,*) MPI_ID, 'stretching all grids', alpha
00448                   splitweight=ChildTimes(i:j)+(MaxChildTimes(i:j)-ChildTimes(i:j))*alpha                  
00449                   nextproc=min(j+1,cp)
00450                END IF
00451                ChildTimes(i:j)=ChildTimes(i:j)-splitweight
00452                MaxChildTimes(i:j)=MaxChildTimes(i:j)-splitweight                 
00453                IF (ChildTimes(j) <= 0d0) nextproc=min(j+1,cp)
00454                !               write(*,*) MPI_ID, 'i,j,i-j+1', i, j, j-i+1
00455                j=j-i+1           
00456                IF (.NOT. lsplit) THEN                         ! We can't split child
00457                   ALLOCATE(newgrids(3,2,1))
00458                   newgrids(:,:,1)=child%box%mGlobal
00459                   splitproc(1)=ChildProcs(n)%p(i-1+sum(maxloc(splitweight(1:j)))) !Assign child to childproc with the maximum availability
00460                   j=1
00461                   !                  write(*,*) MPI_ID, 'not splitting', splitweight(1:j), splitproc(1)
00462                ELSE
00463                   ALLOCATE(newgrids(3,2,j))                   ! Calculate new set of smaller grids that have the correct weights
00464                   !                 write(*,*) 'splitting', child%box%mGlobal
00465                   DO 
00466                      newgrids=0
00467                      IF (lDistVerbose) write(*,*) MPI_ID, ' split weights ', splitweight(1:j)
00468                      CALL HilbertSplit(leveldown(child%box%mGlobal, n+1), splitweight(1:j), newgrids, (/1,j/), n)                         
00469                      !                     write(*,'(4I6)') newgrids(1:2,1:2,1:j)
00470                      l=j
00471                      m=1
00472                      DO WHILE (m <= l)
00473                         IF (newgrids(1,1,m) == 0) THEN
00474                            !                          write(*,*) MPI_ID, ' removing ', m, j, l
00475                            l=l-1
00476                            IF (l == 0) THEN
00477                               write(*,*) 'huh'
00478                               STOP
00479                            END IF
00480                            newgrids(:,:,m:l)=newgrids(:,:,m+1:l+1)
00481                            splitweight(m:l)=splitweight(m+1:l+1)
00482                            splitproc(m:l)=splitproc(m+1:l+1)
00483                            m=m-1
00484                         END IF
00485                         m=m+1
00486                      END DO
00487                      IF (l == j) EXIT !No grids to remove from splitting
00488                      !                    write(*,*) MPI_ID, 'reducing number of child grids to ', l
00489                      j=l !reduce number of grids to split
00490                   END DO
00491                   DO m=1,j
00492                      newgrids(:,:,m)=levelup(newgrids(:,:,m), n)
00493                   END DO
00494                END IF
00495 
00496                DO l=1,j
00497                   !Do some sanity checks
00498                   IF (ALL(newgrids(:,:,l)==0)) CYCLE !Unable to split into the desired number of pieces so remove unused procs and adjust weights
00499 
00500                   !Just checking that hilbert split didn't return any illegal new grids
00501                   IF (ANY(newgrids(:,1,l) < child%box%mGlobal(:,1)) .OR. ANY(newgrids(:,2,l) > child%box%mGlobal(:,2))) THEN
00502                      write(*,*) l, newgrids(:,:,l), child%box%mGlobal
00503                      write(*,*) "error in distribution", lsplit
00504                      write(*,'(A,6I4)') "childgrid=", child%box%mGlobal
00505                      write(*,'(A,6I4)') "splitgrid=", newgrids(:,:,l)
00506                      STOP
00507                   END IF
00508                   IF (ANY(newgrids(1:nDim,1,l) > newgrids(1:nDim,2,l))) THEN
00509                      write(*,*) 'illegal grid'
00510                      write(*,*) newgrids(:,:,l)
00511                      STOP
00512                   END IF
00513                   if (splitproc(l) < 0 .OR. splitproc(l) > MPI_NP-1) THEN
00514                      write(*,*) 'illegal target processor', splitproc(l)
00515                      STOP
00516                   END if
00517 
00518                   !Create child
00519                   NULLIFY(child_box)
00520                   CALL CreateNodeBox(newgrids(:,:,l), child_box, splitproc(l))
00521                   IF (lDistVerbose) write(*,*) MPI_ID, ' creating child ', child_box
00522                   LocalCost=LocalCost+max(GetChildCosts(child_box%mGlobal,n+1), 1e-20)
00523                   NULLIFY(newchild)
00524                   CALL AddNode(n+1, child_box, newchild)
00525                   CALL AddParent(newchild, node)
00526                   CALL AddChild(node, newchild)
00527                   CALL DestroyNodeBox(child_box)
00528 
00529                END DO
00530 
00531                DEALLOCATE(newgrids)
00532                DEALLOCATE(splitproc, splitweight)
00533                childlist=>childlist%next
00534             END DO
00535 
00536 
00537 
00538             CALL DestroyNodeList(children)         !Destroy old childlist
00539 
00540 
00541             !Setup childgrids list like newsubgrids
00542             IF (n >= 0) THEN
00543                nchildren=NodeCount(node%children)
00544                IF (nchildren > 0) THEN
00545                   ALLOCATE(childgrids(3,2,nChildren))
00546                   children=>node%children
00547                   j=0
00548                   DO WHILE (ASSOCIATED(children))
00549                      j=j+1
00550                      childgrids(:,:,j)=children%self%box%mGlobal
00551                      children=>children%next
00552                   END DO
00553                ELSE
00554                   NULLIFY(childgrids)
00555                END IF
00556                CALL AllocChildFixups(node%info, childgrids)
00557                IF (ASSOCIATED(childgrids)) DEALLOCATE(childgrids)
00558             END IF
00559             nodelist=>nodelist%next         
00560          END DO
00561 
00562          !         IF (lSplit) THEN
00563          !            CALL MPI_ALLREDUCE(LocalCost, TotalLocalCost, 1, MPI_DOUBLE_PRECISION, MPI_SUM, levels(n)%MPI_COMM, ierr)
00564 
00565          ! Since MeanWorkLoadByLevel(n+1) = TotalLocalCostEst*splitfactor(n)/MPI_NP
00566          ! And we want splitfactor(n)=TotalLocalCost/TotalLocalCostEst
00567          !            splitfactor(n) = splitfactor(n)*(TotalLocalCost/REAL(MPI_NP))/MeanWorkLoadByLevel(n+1) 
00568          !            IF (MPI_ID == 0) write(*,*) 'Splitfactor for level ', n, ' = ', splitfactor(n)
00569          ! Now we can correct the workload
00570          !            MeanWorkLoadByLevel(n+1)=TotalLocalCost/MPI_NP
00571          !         END IF
00572 
00573 
00574       END IF
00575 
00576       DEALLOCATE(childtimes, maxchildtimes)
00577 
00578       CALL StopTimer(iDistributeChildrens, n)
00579    END SUBROUTINE DistributeChildrens
00580 
00581 
00585    FUNCTION HilbertValue(mGlobal, level)
00586       REAL(KIND=qPREC) :: HilbertValue
00587       INTEGER :: i,j,k,n, level,tempi,tempj,tempk
00588       INTEGER, DIMENSION(:,:) :: mGlobal
00589       INTEGER :: indices(3,2), sum_mGlobal(3)
00590       INTEGER :: h
00591       HilbertValue=0
00592       n=Rootn+max(level,0)
00593       sum_mGlobal(1:nDim)=sum(mGlobal(1:nDim,:),2)
00594       DO i=1,nDim
00595          IF (modulo(sum_mGlobal(i),2) == 0) THEN
00596             indices(i,1)=floor(half*sum_mGlobal(i))
00597             indices(i,2)=indices(i,1)+1
00598          ELSE
00599             indices(i,:)=sum_mGlobal(i)/2
00600          END IF
00601       END DO
00602       IF (nDim == 2) THEN
00603          DO i=indices(1,1), indices(1,2) !, ncells(1)
00604             DO j=indices(2,1), indices(2,2) !, ncells(1)
00605                tempi=(i-1)*di
00606                tempj=(j-1)*dj
00607                CALL closedformhilbert2d(n,tempi,tempj,h)
00608                HilbertValue=HilbertValue+h
00609             END DO
00610          END DO
00611       ELSEIF (nDim == 3) THEN
00612          DO i=indices(1,1), indices(1,2) !, ncells(1)
00613             DO j=indices(2,1), indices(2,2) !, ncells(1)
00614                DO k=indices(3,1), indices(3,2)
00615                   tempi=(i-1)*di
00616                   tempj=(j-1)*dj
00617                   tempk=(k-1)*dk
00618                   CALL closedformhilbert3d(n,tempi,tempj,tempk,h)
00619                   HilbertValue=HilbertValue+h         
00620                END DO
00621             END DO
00622          END DO
00623       END IF
00624       HilbertValue=HilbertValue/REAL(product(indices(1:nDim,2)-indices(1:nDim,1)+1))
00625    END FUNCTION HilbertValue
00626 
00630    SUBROUTINE HilbertSort(childgrids,level)
00631       INTEGER, DIMENSION(:,:,:), POINTER, INTENT(INOUT) :: childgrids
00632       INTEGER, DIMENSION(:,:,:), POINTER :: sortedchildgrids
00633       REAL(KIND=qPREC), DIMENSION(:), POINTER :: childgridhilbertvalues
00634       INTEGER, INTENT(IN) :: level
00635       INTEGER :: n,i,index
00636       n=size(childgrids,3)
00637       IF (n == 1) RETURN
00638       !      write(*,*) "HilbertSort", n
00639       ALLOCATE(sortedchildgrids(3,2,n), childgridhilbertvalues(n))
00640       DO i=1,n
00641          IF (ALL(childgrids(:,:,i) == 0)) THEN
00642             childgridhilbertvalues(i)=-1
00643          ELSE
00644             childgridhilbertvalues(i)=HilbertValue(childgrids(:,:,i),level)
00645          END IF
00646       END DO
00647       !      write(*,*) childgridhilbertvalues
00648       DO i=1,n
00649          index=minloc(childgridhilbertvalues,DIM=1)
00650          sortedchildgrids(:,:,i)=childgrids(:,:,index)
00651          childgridhilbertvalues(index)=huge(1.0)
00652       END DO
00653       DEALLOCATE(childgrids)
00654       childgrids=>sortedchildgrids
00655       NULLIFY(sortedchildgrids)
00656       DEALLOCATE(childgridhilbertvalues)
00657       NULLIFY(childgridhilbertvalues)
00658    END SUBROUTINE HilbertSort
00659 
00660 
00668    RECURSIVE SUBROUTINE HilbertSplit(childgrid, weights, newgrids,indx,level)
00669       INTEGER, DIMENSION(:,:), INTENT(IN) :: childgrid
00670       REAL(KIND=qPREC), DIMENSION(:), INTENT(IN) :: weights
00671       REAL(KIND=qPREC), DIMENSION(:), ALLOCATABLE :: accweight
00672       INTEGER, DIMENSION(:,:,:), POINTER, INTENT(OUT) :: newgrids
00673       REAL(KIND=qPREC) :: leftweight, rightweight, subgridweights(2),totalweight,HilbertDiff(3,2),HilbertLeft,HilbertRight,cost(3,2), effective_cost(3,2)
00674       INTEGER, DIMENSION(3,2) :: leftmGlobal,rightmGlobal,subgridleft,subgridright
00675       INTEGER, DIMENSION(2) :: indx
00676       INTEGER ::ngrids,i1,i,j,splitpoint(3,2),level, loc(2)
00677       !      IF (SIZE(weights) == 0) THEN
00678       !         write(*,*) 'weights are zero sized'
00679       !         STOP
00680       !      END IF
00681       IF (any(weights==0)) THEN
00682          write(*,*) "weights are zero", weights, MPI_ID
00683          STOP
00684       END IF
00685       ngrids=indx(2)-indx(1)+1 
00686       !          write(*,'(A,6I4,20E13.2)') "HilbertSplit", childgrid, weights
00687       IF (indx(2) < indx(1)) THEN
00688          write(*,*) "err in HilbertSplit", indx
00689          write(*,*) childgrid
00690          write(*,*) weights
00691          write(*,*) level
00692          STOP
00693       END IF
00694       IF (ANY(childgrid(1:nDim,2) < childgrid(1:nDim,1))) THEN
00695          newgrids(:,:,indx(1):indx(2))=0
00696          RETURN
00697       ELSE
00698          IF (ngrids == 1) THEN
00699             newgrids(:,:,indx(1))=childgrid
00700          ELSE
00701 
00702             totalweight=sum(weights(indx(1):indx(2)))
00703             IF (ngrids==2) THEN !get processor split point
00704                i1=indx(1)
00705                leftweight=weights(indx(1))/totalweight
00706             ELSE
00707                ALLOCATE(accweight(indx(1):indx(2)))
00708 
00709                totalweight=sum(weights(indx(1):indx(2)))
00710                accweight(indx(1))=weights(indx(1))
00711                DO i=indx(1)+1,indx(2)
00712                   accweight(i)=accweight(i-1)+weights(i)
00713                END DO
00714                i1=indx(1)-1+sum(minloc(abs(accweight/totalweight-half))) !gets split point that is closest to half
00715                leftweight=accweight(i1)/totalweight !left weight
00716                DEALLOCATE(accweight)
00717             END IF
00718             rightweight=1d0-leftweight !right weight
00719 
00720             !Now we know where to split the processors
00721             !and the relative sizes of the left and right grids
00722             !Now we just need to decide along which direction to split and which half is the new left and the new right
00723 
00724             leftmGlobal=childgrid
00725             rightmGlobal=childgrid
00726             subgridweights=(/leftweight,rightweight/)
00727 
00728             DO i=1,nDim
00729                DO j=1,2
00730                   splitpoint(i,j)=childgrid(i,1)-1+nint((childgrid(i,2)-childgrid(i,1)+1)*subgridweights(j))
00731                   IF (splitpoint(i,j) >= childgrid(i,1)-1+MinimumGridPoints .AND. splitpoint(i,j) <= childgrid(i,2)-MinimumGridPoints) THEN
00732                      leftmGlobal(i,2)=splitpoint(i,j)
00733                      rightmGlobal(i,1)=splitpoint(i,j)+1
00734                      HilbertLeft=HilbertValue(leftmGlobal,level)
00735                      HilbertRight=HilbertValue(rightmGlobal,level)
00736                      HilbertDiff(i,j)=(-1)**(j-1)*(HilbertRight-HilbertLeft)
00737 
00738                   ELSE IF (childgrid(i,2)-childgrid(i,1)+1 < 2*MinimumGridPoints) THEN !Can't split
00739                      splitpoint(i,j)=childgrid(i,1)-1+(childgrid(i,2)-childgrid(i,1)+1)*nint(subgridweights(j))
00740                      leftmGlobal(i,2)=splitpoint(i,j)
00741                      rightmGlobal(i,1)=splitpoint(i,j)+1
00742                      HilbertDiff(i,j)=0 !We don't want to favor the additional load imbalancing                                         
00743 
00744                   ELSE !Need to round splitpoint to multiple of MinimumGridPoints
00745                      IF (splitpoint(i,j) < childgrid(i,1)-1+MinimumGridPoints) THEN 
00746                         splitpoint(i,j)=childgrid(i,1)-1+MinimumGridPoints*min(nint((childgrid(i,2)-childgrid(i,1)+1) * &
00747                              subgridweights(j)/MinimumGridPoints),1)
00748 
00749                      ELSE !splitpoint > childgrid(i,2)-MinimumGridPoints
00750                         splitpoint(i,j)=childgrid(i,2)-MinimumGridPoints*min(nint((childgrid(i,2)-childgrid(i,1)+1) * &
00751                              subgridweights(3-j)/MinimumGridPoints),1)
00752                      END IF
00753                      leftmGlobal(i,2)=splitpoint(i,j)
00754                      rightmGlobal(i,1)=splitpoint(i,j)+1                     
00755                      HilbertLeft=HilbertValue(leftmGlobal,level)
00756                      HilbertRight=HilbertValue(rightmGlobal,level)
00757                      HilbertDiff(i,j)=(-1)**(j-1)*(HilbertRight-HilbertLeft)
00758                   END IF
00759 
00760 
00761                   IF (ngrids == 2) THEN
00762                      IF (j==1) THEN
00763                         cost(i,j)=max(ChildAdvanceCost(leftmGlobal, level)/leftweight, ChildAdvanceCost(rightmGlobal,level)/rightweight)
00764                      ELSE
00765                         cost(i,j)=max(ChildAdvanceCost(leftmGlobal, level)/rightweight, ChildAdvanceCost(rightmGlobal, level)/leftweight)
00766                      END IF
00767                   ELSE
00768                      IF (j == 1) THEN
00769                         cost(i,j)=max(ChildAdvanceCost(nint((leftmGlobal)/REAL(i1-indx(1)+1)**(1d0/nDim)),level)/leftweight, ChildAdvanceCost(nint((rightmGlobal)/REAL(indx(2)-i1)**(1d0/nDim)),level)/rightweight)
00770                      ELSE
00771                         cost(i,j)=max(ChildAdvanceCost(nint((rightmGlobal)/REAL(i1-indx(1)+1)**(1d0/nDim)),level)/rightweight, ChildAdvanceCost(nint((leftmGlobal)/REAL(indx(2)-i1)**(1d0/nDim)),level)/leftweight)
00772                      END IF
00773                   END IF
00774 
00775 
00776                END DO
00777                leftmGlobal(i,2)=childgrid(i,2)
00778                rightmGlobal(i,1)=childgrid(i,1)
00779 
00780             END DO
00781 
00782 
00783             !Rescale HilbertDiffs to go from 0 to +- 1
00784             HilbertDiff(1:nDim,:)=HilbertDiff(1:nDim,:) / max(maxval(abs(HilbertDiff(1:nDim,:))), 1d-10)
00785             cost(1:nDim,:)=cost(1:nDim,:)/max(minval(cost(1:nDim,:)),1d-10)
00786             effective_cost(1:nDim,:)=cost(1:nDim,:)-.00001*HilbertDiff(1:nDim,:)
00787 
00788             !            write(*,'(A,6E15.6)') 'effective_cost=', effective_cost
00789             !            write(*,'(A,6E15.6)') 'cost =', cost
00790             !            write(*,'(A,6I15)') 'splitpoints=', splitpoint
00791 
00792             loc=minloc(effective_cost(1:nDim,:))
00793             i=loc(1)
00794             j=loc(2)
00795 
00796             IF (splitpoint(i,j) < childgrid(i,1)-1 .OR. splitpoint(i,j) > childgrid(i,2)) THEN
00797                write(*,*) 'illegal split point', splitpoint, effective_cost, i, j, childgrid
00798                STOP
00799             END IF
00800 
00801             leftmGlobal(i,2)=splitpoint(i,j)
00802             rightmGlobal(i,1)=splitpoint(i,j)+1           
00803 
00804             IF (j==1) THEN            
00805                subgridleft=leftmGlobal
00806                subgridright=rightmGlobal
00807             ELSE
00808                subgridleft=rightmGlobal
00809                subgridright=leftmGlobal
00810             END IF
00811             !            write(*,'(A,6I4,A,6I4,A,6I4,A,F12.2)') "Split ", childgrid, " into ",subgridleft, " and ", subgridright, "with HDiff=", HilbertDiff(i,j)
00812 
00813             !            write(*,*) indx(1),i1,indx(2)
00814             IF (ANY(subgridleft(1:ndim,2) < subgridleft(1:nDim,1))) THEN
00815 
00816                IF (leftweight == rightweight) THEN
00817                   CALL HilbertSplit(subgridright,weights,newgrids,(/indx(1),i1/),level)
00818                   CALL HilbertSplit(subgridleft,weights,newgrids,(/i1+1,indx(2)/),level)
00819                ELSE
00820                   CALL HilbertSplit(subgridright,weights,newgrids,(/i1+1,indx(2)/),level)
00821                   CALL HilbertSplit(subgridleft,weights,newgrids,(/indx(1),i1/),level)
00822                END IF
00823             ELSE
00824                CALL HilbertSplit(subgridright,weights,newgrids,(/i1+1,indx(2)/),level)
00825                CALL HilbertSplit(subgridleft,weights,newgrids,(/indx(1),i1/),level)
00826             END IF
00827          END IF
00828       END IF
00829    END SUBROUTINE HilbertSplit
00830 
00831 
00832 
00835    SUBROUTINE ClearParentProcs(level)
00836       INTEGER :: level
00837       RETURN
00838       IF (ASSOCIATED(ParentProcs(level)%p)) THEN
00839          DEALLOCATE(ParentProcs(level)%p)
00840          NULLIFY(ParentProcs(level)%p)
00841       END IF
00842 
00843    END SUBROUTINE ClearParentProcs
00844 
00845 END MODULE DistributionControl
 All Classes Files Functions Variables