!#########################################################################
!		
!    Copyright (C) 2003-2012 Department of Physics and Astronomy,
!                            University of Rochester,
!                            Rochester, NY
!
!    scheduling.f90 is part of AstroBEAR.
!
!    AstroBEAR is free software: you can redistribute it and/or modify	  
!    it under the terms of the GNU General Public License as published by 
!    the Free Software Foundation, either version 3 of the License, or    
!    (at your option) any later version.
!
!    AstroBEAR is distributed in the hope that it will be useful, 
!    but WITHOUT ANY WARRANTY; without even the implied warranty of
!    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
!    GNU General Public License for more details.
!
!    You should have received a copy of the GNU General Public License
!    along with AstroBEAR.  If not, see <http://www.gnu.org/licenses/>.
!
!#########################################################################
!> @file scheduling.f90
!! @brief Main file for module Scheduling

!> @defgroup Scheduling Scheduling
!! @brief Module for calculating the waiting times to allow for global load balancing
!! @ingroup AMR

!> Module for calculating the waiting times to allow for global load balancing
!! @ingroup Scheduling
MODULE Scheduling

   USE GlobalDeclarations
   USE TreeDeclarations
   USE HyperbolicDeclarations
   USE Timing
!   USE HyperBolicControl

   IMPLICIT NONE
!   REAL(KIND=qPREC), DIMENSION(:), ALLOCATABLE :: WorkLoadByLevel

   REAL(KIND=qPREC), DIMENSION(:,:), ALLOCATABLE :: WorkLoadByLevelPerStep
   REAL(KIND=qPREC), DIMENSION(:), ALLOCATABLE :: MeanWorkLoadByLevelPerStep
   TYPE(pNodeDeflist), DIMENSION(:), ALLOCATABLE :: AdvanceNodeListBylevel
!   REAL(KIND=qPREC), DIMENSION(:), ALLOCATABLE :: WaitingTimesByLevel
!   REAL(KIND=qPREC), DIMENSION(:), ALLOCATABLE :: AdvanceTimesByLevel
!   REAL(KIND=qPREC), DIMENSION(:), ALLOCATABLE :: WaitingTimes
   REAL(KIND=qPREC), DIMENSION(:), ALLOCATABLE :: SyncTime
   REAL(KIND=qPREC), DIMENSION(:), ALLOCATABLE :: AccumulatedWorkDoneByLevel
!   REAL(KIND=qPREC), DIMENSION(:), ALLOCATABLE :: TimeWastedByLevel
!   REAL(KIND=qPREC), DIMENSION(:), ALLOCATABLE  :: TimeUsedBySolver  !Values used for partial updates

    REAL(KIND=qPREC) :: TimeAvailableToSolver
   
   SAVE
CONTAINS

   !> Calculate waiting times for each level - called before every maxlevel advance
   !! @param n level
   SUBROUTINE CalcEstimatedWaitingTimes(n)
      INTEGER :: ierr, n, i, j
      INTEGER :: MinLevel, NumLevels
      REAL(KIND=qPREC), DIMENSION(:,:), ALLOCATABLE :: WaitTimes, WorkLeft, tSynchronize
      REAL(KIND=qPREC), DIMENSION(:), ALLOCATABLE :: MyWorkLeft
      REAL(KIND=qPREC) :: safety_fact
      REAL(KIND=qPREC) :: MyFinerWorkLeft, MyCoarserWorkLeft
      ! First fine the minimum level that will be going into a waiting advance before another call to calcEstimatedWaiting
      DO MinLevel=MaxLevel, 0, -1
         IF (levels(MinLevel)%step == 1) EXIT
      END DO


      safety_fact=1.1d0
      
      ! So level i is on it's first step...  It will have to complete it's second step before the next coarser level has to complete it's step and then go into a waiting advance

!      IF (MPI_ID == 0) write(*,*) 'looking from levels ', minlevel, ' to ', maxlevel
      ALLOCATE(WorkLeft(MinLevel:MaxLevel, 0:MPI_NP-1))
      ALLOCATE(WaitTimes(MinLevel:MaxLevel, 0:MPI_NP-1))
      ALLOCATE(MyWorkLeft(MinLevel:MaxLevel))
      ALLOCATE(tSynchronize(MinLevel:MaxLevel,2))
!      WaitTimes=0
      
      DO i = MaxLevel, MinLevel, -1
         MyFinerWorkLeft=SUM((/(WorkLoadByLevelPerStep(j,levels(j)%step)-WorkDoneByLevel(j),j=i,MaxLevel)/))
         MyCoarserWorkLeft=SUM((/(WorkLoadByLevelPerStep(j,levels(j)%step)-WorkDoneByLevel(j),j=0,i-1)/))
         tSynchronize(i,1)=MyFinerWorkLeft*safety_fact !earliest i'll likely be ready
         tSynchronize(i,2)=(MyFinerWorkLeft+MyCoarserWorkLeft) ! longest i would like to wait before redistributing
      END DO



!SUM(WorkLoadByLevelPerStep(, levels(i)%step)-WorkDoneByLevel(i)
!         CoarserWorkCanUse=SUM((/(WorkLoadByLevelPerStep(j,levels(j)%step)-WorkDoneByLevel(j),j=0,i-1)/))/(levels(i)%steps-levels(i)%step + 1)
!         tSyncrhonize(i)=min(MyWorkThisLevel*1.1,MyWorkThisLevel+CoarserWorkCanUse)
!         CoarserWorkWillUse=MyWorkLef

!              (WorkLoadByLevelPerStep(i,levels(i)%step)-WorkDoneByLevel(i)) + &
!              SUM((/(WorkLoadByLevelPerStep(j,levels(j)%step)-WorkDoneByLevel(j),j=0,i)/))/(levels(i)%steps-levels(i)%step + 1)


 !        MyWorkLeft(i)=min((WorkLoadByLevelPerStep(i, levels(i)%step)-WorkDoneByLevel(i))*1.1, &
 !             (WorkLoadByLevelPerStep(i,levels(i)%step)-WorkDoneByLevel(i)) + &
 !             SUM((/(WorkLoadByLevelPerStep(j,levels(j)%step)-WorkDoneByLevel(j),j=0,i)/))/(levels(i)%steps-levels(i)%step + 1)
 !     END DO

      ! If all you have is work on level 4 - then you don't have time to burn on coarser advances... - so you want to encourage other processors to stop waiting advances earlier...'


! On each processor we have
  !Tearliestpossible=workloadbylevelperstep-workdonebylevel
  !TEarliestDoneForSure=tearliestpossible*1.1
  !TCouldWait=CoarserWorkLoadLeft / number of fine level steps remaining within coarse step

! we have steps remaining for entire root step
!  steps_remaining(i) = sum((levels(-1:i)%steps-levels(-1:i)%step) * 2**(/((i-j),j=-1,i)/))+1




  ! how many steps can we 

! Why not choose tearliestdoneforsure?
! because if tearliestdoneforsure > tearliestpossible+tcouldwait then maybe idle
! tAimtobeready = min(tearliestdoneforsure, tearliestpossible+tcouldwait)
! 


 !     write(*,*) 'myworkleft=', myworkleft(i)
      NumLevels=MaxLevel-MinLevel+1

      ! invert tsynchronize since we want to do mpi_min on the longest any proc can wait
      tSynchronize(:,2)=-tSynchronize(:,2)
      
      CALL StartTimer(iBarrier, n)
      CALL MPI_ALLReduce(MPI_IN_PLACE, tSynchronize, NumLevels*2, MPI_DOUBLE_PRECISION, MPI_MAX, MPI_COMM_WORLD, iErr)
!      CALL MPI_ALLGather(MyWorkLeft, NumLevels , MPI_DOUBLE_PRECISION, WorkLeft, NumLevels, MPI_DOUBLE_PRECISION, levels(n)%MPI_COMM, iErr)      
      CALL StopTimer(iBarrier, n)
      tSynchronize(:,2)=-tSynchronize(:,2)

      !Now tSynchronize(:,1) is earliest everyone will be ready
      !and tSynchronize(:,2) is latest everyone can stay busy

 !     DO i=MaxLevel, MinLevel, -1
 !        CALL GetMaxDiff(WaitTimes(i,:), WorkLeft(i,:)-sum(WaitTimes(i+1:MaxLevel,:),1))
 !     END DO
!      WaitingTimes(MinLevel:MaxLevel)=WaitTimes(MinLevel:MaxLevel, MPI_ID)

!      DO i=Maxlevel, MaxLevel
!         SyncTime(i)=mpi_wtime()-StartTime+SUM(WaitingTimes(i:MaxLevel)+MyWorkLeft(i:MaxLevel))
!         write(*,'(A,2I4,3E18.7)') 'time to sync level ', i, MPI_ID, SUM(WaitingTimes(i:MaxLevel)+MyWorkLeft(i:MaxLevel))
!      END DO


      DO i=Minlevel, MaxLevel
         IF (tSynchronize(i,2) > tSynchronize(i,1)) THEN !no problem
            SyncTime(i)=mpi_wtime()-StartTime+tSynchronize(i,1) 
         ELSE !take average
            SyncTime(i)=mpi_wtime()-StartTime+half*SUM(tSynchronize(i,1:2))
         END IF
!         IF (MPi_ID == 0) write(*,'(A,2I4,3E18.7)') 'time to sync level ', i, MPI_ID, SyncTime(i), mpi_wtime()-StartTime+tSynchronize(i,:) !SUM(WaitingTimes(i:MaxLevel)+MyWorkLeft(i:MaxLevel))
      END DO
      
      

!      SUM(WaitingTimes(i:MaxLevel) + WorkLeft(i:MaxLevel)) = constant

 
!      WaitingTimes(
!      IF (MPI_ID == 0) THEN
!         write(*,'(A,10E25.15)') 'WorkLeft = ', MyWorkLeft
!         write(*,'(A,10E25.15)') 'Waiting Times = ', WaitingTimes
!      END IF

!      write(*,*) MPI_ID, ' has time available to solver ', WaitingTimes(MinLevel:MaxLevel)
      
      DEALLOCATE(WorkLeft, WaitTimes, MyWorkLeft, tSynchronize)

   END SUBROUTINE CalcEstimatedWaitingTimes
   

   !> Takes a vector and subtracts it from the maximum value.
   !! @param maxdiff output vector
   !! @param x input vector
   SUBROUTINE GetMaxDiff(maxdiff, x)
     REAL(KIND=qPREC), DIMENSION(:) :: maxdiff, x
!     write(*,*) 'x = ', x
     maxdiff=maxval(x)-x
!     write(*,*) 'maxdiff = ', maxdiff
   END SUBROUTINE GetMaxDiff


   !> Allocate necessary arrays for scheduling module
   SUBROUTINE SchedulingInit()

      ALLOCATE(AccumulatedWorkDoneByLevel(0:MaxLevel))
      ALLOCATE(WorkLoadByLevelPerStep(-1:MaxLevel,2))
      ALLOCATE(MeanWorkLoadByLevelPerStep(-1:MaxLevel))
      ALLOCATE(AdvanceNodeListByLevel(-1:MaxLevel))
!      ALLOCATE(TimeUsedBySolver(-1:MaxLevel))
!      ALLOCATE(WaitingTimes(-1:MaxLevel))
      ALLOCATE(SyncTime(0:MaxLevel))
      WorkLoadByLevelPerStep=0d0
      MeanWorkLoadByLevelPerStep=0d0
      AccumulatedWorkDoneByLevel=0d0
!               WaitingTimesByLevel(0:MaxLevel), AdvanceTimesByLevel(0:MaxLevel), , &
!               AccumulatedWorkDoneByLevel(0:MaxLevel), TimeWastedByLevel(0:MaxLevel), NumCellUpdatesByLevel(0:MaxLevel), EffectiveCellUpdatesByLevel(0:MaxLevel))


!      TimeWastedByLevel=0
!      AccumulatedWorkDoneByLevel=0
!      NumCellUpdatesByLevel=0
!      EffectiveCellUpdatesByLevel=0
!      WorkDoneByLevel=0
      
!      CALL ProfileAdvance
   END SUBROUTINE SchedulingInit

END MODULE Scheduling




