!#########################################################################
!		
!    Copyright (C) 2003-2012 Department of Physics and Astronomy,
!                            University of Rochester,
!                            Rochester, NY
!
!    pth_declarations.f90 is part of AstroBEAR.
!
!    AstroBEAR is free software: you can redistribute it and/or modify	  
!    it under the terms of the GNU General Public License as published by 
!    the Free Software Foundation, either version 3 of the License, or    
!    (at your option) any later version.
!
!    AstroBEAR is distributed in the hope that it will be useful, 
!    but WITHOUT ANY WARRANTY; without even the implied warranty of
!    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
!    GNU General Public License for more details.
!
!    You should have received a copy of the GNU General Public License
!    along with AstroBEAR.  If not, see <http://www.gnu.org/licenses/>.
!
!#########################################################################
MODULE PthDeclarations
   USE GlobalDeclarations
   USE HyperbolicDeclarations
   USE Timing
   IMPLICIT NONE
   include 'fpth.f'
   INTEGER, PARAMETER :: nAdvanceSleeps=4, AdvanceSleepTime=1
   INTEGER, PARAMETER :: nControlSleeps=1, ControlSleepTime=1
   TYPE IntStatus
      integer, POINTER :: val
   END type IntStatus
   INTEGER, DIMENSION(-2:MaxDepth) :: threadarg
   REAL(8) :: NextBarrierTime
   INTEGER :: returnstatus(-2:MaxDepth)
   TYPE(fpth_t) :: control_thread, advance_threads(-2:MaxDepth), comm_thread
   TYPE(fpth_attr_t) :: control_attribute, advance_attributes(-2:MaxDepth), comm_attribute
   INTEGER :: Advance_StackSize=10*1024*1024 !100 mB

   LOGICAL, DIMENSION(-2:MaxDepth) :: lAdvanceActive, lAdvanceCompleted
   LOGICAL :: lControlActive


   INTEGER  :: pVerBose=0
   INTEGER, PARAMETER :: nIters=0

   INTEGER, PARAMETER :: TESTANY=0, TESTALL=1, TESTONE=2, TESTNONE=3

   INTEGER, SAVE :: Curr_TestType=TESTNONE
   INTEGER, SAVE, POINTER, DIMENSION(:) :: Curr_req_array
   INTEGER, SAVE, POINTER :: Curr_req=>NULL()
   INTEGER, SAVE, POINTER, DIMENSION(:) :: Curr_mpi_status
   INTEGER, SAVE, POINTER :: Curr_ierr=>NULL()
   LOGICAL, SAVE, POINTER :: curr_success=>NULL()
   INTEGER, SAVE, POINTER :: curr_req_index=>NULL()
   INTEGER, SAVE, POINTER :: Curr_nreqs=>NULL()

   INTEGER, PARAMETER :: commint=-1

   ! routines by which the control thread yields
   PUBLIC :: pth_MPI_WAIT, pth_MPI_WAITANY, pth_MPI_WAITALL, pth_MPI_BARRIER, pth_MPI_BARRIER2, pth_MPI_BARRIER3

   ! routine by which the advance threads yield
   PUBLIC :: yield
   
contains



   SUBROUTINE pth_MPI_WAIT(mpi_request, mpi_status, ierr)
      INTEGER,TARGET :: mpi_request, ierr, mpi_status(MPI_STATUS_SIZE)
      INTEGER :: yield_err, ierr2, n
      TYPE(IntStatus) :: status
      LOGICAL, TARGET :: success
!      CALL MPI_WAIT(mpi_request, mpi_status, ierr)
!      write(*,*) 'MPIW', mpi_request, mpi_status, ierr
!      RETURN

      IF (ANY(lAdvanceActive(0:MaxLevel))) THEN
!         write(*,*) 'waiting',
         success=.false.
         CALL MPI_TEST(mpi_request, success, mpi_status, ierr)
         IF (.not. success) THEN
            write(*,*) 'waiting for request', mpi_request
            Curr_req=>mpi_request
            Curr_mpi_status=>mpi_status
            Curr_ierr=>ierr
            Curr_TestType=TESTONE
            Curr_success=>success
!            write(*,'(20I)') 0, MPI_ID, loc(curr_req), loc(curr_success), loc(curr_ierr), loc(curr_mpi_status)
!            write(*,'(20I)') 0, TESTONE, MPI_ID, curr_req, curr_success, curr_ierr, curr_mpi_status
            DO WHILE (.NOT. success .AND. ANY(lAdvanceActive(0:MaxLevel)))              
               IF (pVerbose >= 2) writE(*,*) 'wait yielding'
               DO n=MaxLevel, 0, -1
                  IF (lAdvanceActive(n)) EXIT
               END DO
               IF (pVerbose >=2 ) write(*,*) 'yielding to advance thread', n
               CALL fpth_yield(advance_threads(n), ierr)
               CALL pth_checkerr(ierr, 'fpth_yield')
!               CALL fpth_yield_any(yield_err)
               IF (pVerbose >= 2) write(*,*) 'wait resuming'
            END DO
            !            write(*,*) 'wait resuming'
!            CALL fpth_spawn(comm_thread, comm_attribute, yield, commint)
!            CALl yield(0)
!            CALL fpth_join(comm_thread, status, ierr2)

            IF (.NOT. success) THEN
!               write(*,*) 'waiting'
               CALL StartTimer(iBarrier, 0)
               CALL MPI_WAIT(mpi_request, mpi_status, ierr)
               CALL StopTimer(iBarrier, 0)
            END IF
            NULLIFY(Curr_req, Curr_mpi_status, Curr_ierr, Curr_success)
            Curr_TestType=TESTNONE
!            write(*,*) 'reseetting curr_testtype'
         END IF
!         write(*,*) 'completed', mpi_request, mpi_status, ierr
      ELSE
         IF (pverbose >= 2) write(*,*) 'waiting mpi_wait   '
         CALL StartTimer(iBarrier, 0)
         CALL MPI_WAIT(mpi_request, mpi_status, ierr)
         CALL StopTimer(iBarrier, 0)
      END IF
      
   END SUBROUTINE pth_MPI_WAIT


   SUBROUTINE pth_MPI_WAITANY(nreqs, req_array, req_index, mpi_status, iErr)
      INTEGER, TARGET :: iErr, nreqs, req_index, mpi_status(MPI_STATUS_SIZE)
      INTEGER, POINTER, DIMENSION(:) :: req_array
      LOGICAL, TARGET :: success
      INTEGER :: yield_err, ierr2, n
      TYPE(IntStatus) :: status
      IF (ANY(lAdvanceActive(0:MaxLevel))) THEN
         success=.false.
         CALL MPI_TESTANY(nreqs, req_array, req_index, success, mpi_status, iErr)
         IF (.NOT. success) THEN
!            write(*,*) 'failed to find waiting for request', mpi_request
            
            Curr_nreqs=>nreqs
            Curr_req_array=>req_array
            Curr_req_index=>req_index
            Curr_mpi_status=>mpi_status
            Curr_ierr=>ierr
            !            write(*,*) 'setting currtesttype to', TESTANY, loc(threadarg(-1)), threadarg(-1)
            Curr_TestType=TESTANY
            Curr_success=>success
!            write(*,*) 'commint=', commint
!            CALL fpth_spawn(comm_thread, comm_attribute, yield, threadarg(-1))
!            CALl yield(0)
!            CALL fpth_join(comm_thread, status, ierr2)

!            CALL yield(0)
!            write(*,'(20I)') 0, MPI_ID, loc(curr_req_array), loc(curr_nreqs), loc(curr_success), loc(curr_ierr), loc(curr_mpi_status), loc(curr_req_index)
            !            write(*,'(20I)') 0, TESTANY, MPI_ID, curr_req_array, curr_nreqs, curr_success, curr_ierr, curr_mpi_status, curr_req_index
            DO WHILE (.NOT. success .AND. ANY(lAdvanceActive(0:MaxLevel)))              
               IF (pVerbose >= 2) write(*,*) 'wait any yielding', lAdvanceActive(0:MaxLevel)
               DO n=MaxLevel, 0, -1
                  IF (lAdvanceActive(n)) EXIT
               END DO
               IF (pVerbose >=2 ) write(*,*) 'yielding to advance thread', n
               CALL fpth_yield(advance_threads(n), ierr)
               CALL pth_checkerr(ierr, 'fpth_yield')
!               CALL fpth_yield_any(yield_err)
               IF (pVerbose >= 2) write(*,*) 'wait any resuming'
            END DO
            IF (.NOT. success) THEN
               !               write(*,*) 'waitanying'
               CALL StartTimer(iBarrier,0)
               CALL MPI_WAITANY(nreqs, req_array, req_index, mpi_status, iErr)
               CALL StopTimer(iBarrier,0)
            END IF
               
            NULLIFY(Curr_req_array, Curr_mpi_status, Curr_ierr, Curr_success, Curr_req_index, Curr_nreqs)
            Curr_TestType=TESTNONE
!            write(*,*) 'reseetting curr_testtype'
         END IF
         
      ELSE
         !         IF (pverbose >= 2) write(*,*) 'waiting mpi_waitany'
         CALL StartTimer(iBarrier,0)
         CALL MPI_WAITANY(nreqs, req_array, req_index, mpi_status, iErr)
         CALL StopTimer(iBarrier,0)
      END IF
   END SUBROUTINE pth_MPI_WAITANY

   SUBROUTINE pth_MPI_WAITALL(nreqs, req_array, iErr)
      INTEGER, TARGET :: iErr, nreqs, req_index
      INTEGER, POINTER, DIMENSION(:) :: req_array
      INTEGER :: yield_err, ierr2, n
      TYPE(IntStatus) :: status
      LOGICAL, TARGET :: success
      IF (ANY(lAdvanceActive(0:MaxLevel))) THEN
         success=.false.
         CALL MPI_TESTALL(nreqs, req_array, success, MPI_STATUSES_IGNORE, iErr)
         IF (.NOT. success) THEN
            Curr_req_array=>req_array
            Curr_ierr=>ierr
            Curr_TestType=TESTALL
            Curr_success=>success
            Curr_nreqs=>nreqs
!            CALL fpth_spawn(comm_thread, comm_attribute, yield, threadarg(-1))
!            CALl yield(0)
!            CALL fpth_join(comm_thread, status, ierr2)
 
!           CALl yield(0) 

!            write(*,'(20I)') 0, MPI_ID, loc(curr_req_array), loc(curr_nreqs), loc(curr_success), loc(curr_ierr)
!            write(*,'(20I)') 0, TESTALL, MPI_ID, curr_req_array, curr_nreqs, curr_success, curr_ierr
            DO WHILE (.NOT. success .AND. ANY(lAdvanceActive(0:MaxLevel)))              
               IF (pVerbose >= 2) write(*,*) 'wait all yielding', lAdvanceActive(0:MaxLevel)
               DO n=MaxLevel, 0, -1
                  IF (lAdvanceActive(n)) EXIT
               END DO
               IF (pVerbose >=2 ) write(*,*) 'yielding to advance thread', n
               CALL fpth_yield(advance_threads(n), ierr)
               CALL pth_checkerr(ierr, 'fpth_yield')
!               CALL fpth_yield_any(yield_err)
               IF (pVerbose >= 2) write(*,*) 'wait all resuming'
            END DO
            IF (.NOT. success) THEN
!               write(*,*) 'waitalling'
               CALL StartTimer(iBarrier,0)
               CALL MPI_WAITALL(nreqs, req_array, MPI_STATUSES_IGNORE, iErr)
               CALL StopTimer(iBarrier,0)
            END IF
            NULLIFY(Curr_req_array, Curr_ierr, Curr_success, Curr_nreqs)
            Curr_TestType=TESTNONE
!            write(*,*) 'reseetting curr_testtype'
         END IF
      ELSE
         IF (pverbose >= 2) write(*,*) 'waiting mpi_waitall'
         CALL StartTimer(iBarrier,0)
         CALL MPI_WAITALL(nreqs, req_array, MPI_STATUSES_IGNORE, iErr)
         CALL StopTimer(iBarrier,0)
      END IF
   END SUBROUTINE pth_MPI_WAITALL

   SUBROUTINE pth_MPI_BARRIER(comm, ierr)
      INTEGER :: iErr, nreqs, req_index,i
      INTEGER, POINTER, DIMENSION(:) :: req_array
      INTEGER :: dummy_int
      INTEGER :: pth_BarrierTag=160000
      INTEGER :: comm
      !This subroutine needs to perform a nonblocking mpi_barrier call.  One way to do this is to have every processor send a small message to one processor - and have that one processor then send a message back to everyone.

      !      IF (MPI_ID == 0) THEN
      IF (MPI_NP == 1) RETURN

!     IF (ANY(lAdvanceActive(0:MaxLevel))) THEN
         nreqs=2*(MPI_NP-1)
         ALLOCATE(req_array(nreqs))
         DO i=1, MPI_NP-1
            CALL MPI_IRECV(dummy_int, 1, MPI_INTEGER, modulo(MPI_ID+i, MPI_NP), pth_BarrierTag, MPI_COMM_WORLD, req_array(i), iErr)
         END DO
         DO i=1, MPI_NP-1
            CALL MPI_ISEND(dummy_int, 1, MPI_INTEGER, modulo(MPI_ID+i, MPI_NP), pth_BarrierTag, MPI_COMM_WORLD, req_array(MPI_NP-1+i), iErr)
         END DO
         CALL StartTimer(iBarrier,0)
         CALL pth_MPI_WAITALL(nreqs, req_array, iErr)
         CALL StopTimer(iBarrier,0)
!     ELSE
!        write(*,*) 'waiting mpi_barrier', lAdvanceActive(0:MaxLevel)
!        CALL MPI_BARRIER(comm, ierr)
!     END IF
   END SUBROUTINE pth_MPI_BARRIER

   SUBROUTINE pth_MPI_BARRIER2(comm, ierr)
      INTEGER :: iErr, nreqs, req_index,i
      INTEGER, POINTER, DIMENSION(:) :: req_array
      INTEGER :: dummy_int
      INTEGER :: pth_BarrierTag=16000
      INTEGER :: comm
      !This subroutine needs to perform a nonblocking mpi_barrier call.  One way to do this is to have every processor send a small message to one processor - and have that one processor then send a message back to everyone.

      IF (MPI_ID == 0) THEN
         nreqs=MPI_NP-1
         ALLOCATE(req_array(nreqs))
         DO i=1, MPI_NP-1
            CALL MPI_IRECV(dummy_int, 1, MPI_INTEGER, i, pth_BarrierTag, MPI_COMM_WORLD, req_array(i), iErr)
         END DO
         CALL pth_MPI_WAITALL(nreqs, req_array, iErr)

         DO i=1, MPI_NP-1
            CALL MPI_ISEND(dummy_int, 1, MPI_INTEGER, i, pth_BarrierTag, MPI_COMM_WORLD, req_array(i), iErr)
         END DO
         CALL pth_MPI_WAITALL(nreqs, req_array, iErr)
      ELSE
         nreqs=2
         ALLOCATE(req_array(nreqs))
         CALL MPI_IRECV(dummy_int, 1, MPI_INTEGER, 0, pth_BarrierTag, MPI_COMM_WORLD, req_array(1), iErr)
         CALL MPI_ISEND(dummy_int, 1, MPI_INTEGER, 0, pth_BarrierTag, MPI_COMM_WORLD, req_array(2), iErr)
         CALL pth_MPI_WAITALL(nreqs, req_array, iErr)
      END IF
      DEALLOCATE(req_array)      
   END SUBROUTINE Pth_MPI_BARRIER2


   SUBROUTINE pth_MPI_BARRIER3(comm, ierr)
      REAL(8) :: current_time
      INTEGER :: comm, ierr
      INTEGER :: n, yield_err
      IF (NextBarrierTime > MPI_WTIME()) THEN
         IF (ANY(lAdvanceActive(0:MaxLevel))) THEN
               DO n=MaxLevel, 0, -1
                  IF (lAdvanceActive(n)) EXIT
               END DO
               CALL fpth_yield(advance_threads(n), ierr)
               CALL pth_checkerr(ierr, 'fpth_yield')
!            CALL fpth_yield_any(yield_err)
         END IF
      END IF
   END SUBROUTINE pth_MPI_BARRIER3

   SUBROUTINE yield(n)
      INTEGER :: ierr
      INTEGER, INTENT(IN) :: n      
      TYPE(fpth_t) :: my_thread
      LOGICAL :: lRepeat
      REAL(8) :: tused
      IF (lControlActive) THEN !We are waiting to join the current thread         
         IF (ASSOCIATED(Curr_success)) THEN !Curr_TestType /= TESTNONE)

            ! update time for modifying redistributions..
            tused = mpi_wtime()-t_startadvance(n)
            WorkDoneByLevel(n)=WorkDoneByLevel(n)+tused
            WorkDoneByGrid(n)=WorkDoneByGrid(n)+tused

            SELECT CASE(Curr_TestType)
            CASE(TESTONE)
               CALL MPI_TEST(Curr_req, Curr_success, Curr_mpi_status, Curr_ierr)
            CASE(TESTANY)
               CALL MPI_TESTANY(Curr_nreqs,Curr_req_array, Curr_req_index, Curr_success, Curr_mpi_status, Curr_iErr)               
            CASE(TESTALL)
               CALL MPI_TESTALL(Curr_nreqs, Curr_req_array, Curr_success, MPI_STATUSES_IGNORE, Curr_iErr)               
            END SELECT
  
            IF (Curr_success) THEN !Request has completed
               IF (pVerbose >= 1) write(*,*) 'yielding to control thread'
               CALL StopTimer(iAdvanceGrids, n)
               CALL fpth_yield(control_thread, ierr)
               CALL pth_checkerr(ierr, 'fpth_yield')
               CALL StartTimer(iAdvanceGrids, n)
               IF (pVerbose >= 1) write(*,*) 'advance thread resuming on level ', n
            END IF
            t_startadvance(n)=mpi_wtime()
         ELSE ! the control thread is active but there is no active request?

!            we maybe waiting for a certain walltime

            write(*,*) 'whoops in pth_declarations'
            STOP
!            IF (MPI_WTIME() > NextBarrierTime) CALL fpth_yield(control_thread, ierr)
!            CALL pth_checkerr(ierr, 'fpth_yield')

         END IF
      END IF
!      write(*,*) 'continuing advance'
   END SUBROUTINE yield

   SUBROUTINE pth_CheckErr(iErr, errString)
      CHARACTER(*) :: errString
      INTEGER :: iErr

      IF(iErr/=1) THEN
         PRINT'(A,A)','*** Threads returned an error on ',errString
         PRINT'(A,I8)','    with error code ',iErr
         STOP
      END IF
    END SUBROUTINE Pth_CheckErr


END MODULE PthDeclarations
