!#########################################################################
!		
!    Copyright (C) 2003-2012 Department of Physics and Astronomy,
!                            University of Rochester,
!                            Rochester, NY
!
!    data_level_comms.f90 is part of AstroBEAR.
!
!    AstroBEAR is free software: you can redistribute it and/or modify	  
!    it under the terms of the GNU General Public License as published by 
!    the Free Software Foundation, either version 3 of the License, or    
!    (at your option) any later version.
!
!    AstroBEAR is distributed in the hope that it will be useful, 
!    but WITHOUT ANY WARRANTY; without even the implied warranty of
!    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
!    GNU General Public License for more details.
!
!    You should have received a copy of the GNU General Public License
!    along with AstroBEAR.  If not, see <http://www.gnu.org/licenses/>.
!
!#########################################################################
!> @file data_level_comms.f90
!! @brief Main file for module DataLevelComms

!> @defgroup DataComms Data Communications
!! @ingroup DataOps

!> @defgroup DataLevelComms Data Level Communications
!> @brief Main module for scheduling AMR related data communication
!! @ingroup DataComms

!> Main module for scheduling AMR related data communication
!! @ingroup DataLevelComms
MODULE DataLevelComms
   USE CommunicationDeclarations
   USE DataParsing
   USE Timing
   IMPLICIT NONE


   ! ProlongateParentsData
   PUBLIC PostSendChildrenData, CompSendChildrenData, PostRecvParentsData, CompRecvParentsData

   ! ApplyOverlaps
   PUBLIC PostSendOverlaps, CompSendOverlaps, PostRecvOverlaps, CompRecvOverlaps

   ! ApplyChildrenData
   PUBLIC PostSendParentsData, CompSendParentsData, PostRecvChildrenData, CompRecvChildrenData

   ! ApplyInitialChildrenData
   PUBLIC PostSendParentsInitialData, CompSendParentsInitialData, PostRecvInitialChildrenData, CompRecvInitialChildrenData

   ! SyncFluxes
   PUBLIC PostSendFluxes, CompSendFluxes, PostRecvFluxes, CompRecvFluxes

   PUBLIC PostSendGenericData, CompSendGenericData, PostRecvGenericData, CompRecvGenericData

CONTAINS

   !> @name Routines required for ProlongateParentsData
   !! @{

   !> Precalculates the buffer size needed to send child data to each processor.
   !! @param level The level of the nodes doing the sending.
   !! @param proc_buffer_sizes A 0:np-1 array of integers to hold the accumulated buffer size for each processor.
   SUBROUTINE SendChildrenData_LevelPrecalc(level, proc_buffer_sizes)

      INTEGER :: level
      INTEGER, POINTER, DIMENSION(:) :: proc_buffer_sizes

      TYPE(NodeDefList), POINTER :: nodelist
      TYPE(NodeDefList), POINTER :: childlist
      TYPE(NodeDef), POINTER :: node
      TYPE(NodeDef), POINTER :: child


      proc_buffer_sizes = 0

      nodelist=>Nodes(level)%p 

      ! Do a pass through the child lists to calculate the size required for each message's buffer.
      DO WHILE (associated(nodelist))
         node=>nodelist%self 
         childlist=>node%children

         DO WHILE (associated(childlist))
            child=>childlist%self
            IF (child%box%MPI_ID /= MPI_ID) THEN

                ! If the proc_buffer_sizes array for the child processor's MPI ID is empty, then add its overhead.
                IF (proc_buffer_sizes(child%box%MPI_id) == 0)  proc_buffer_sizes(child%box%MPI_id) = TERMINATION_BOX_BYTES
                ! Add the payload cost for this child to the accumulator.
                proc_buffer_sizes(child%box%MPI_id) = proc_buffer_sizes(child%box%MPI_id) + SendChildrenData_Precalculate(level, node, child)

            END IF
            childlist=>childlist%next
         END DO

         nodelist=>nodelist%next
      END DO

   END SUBROUTINE SendChildrenData_LevelPrecalc


   SUBROUTINE PostSendChildrenData(n)
      INTEGER :: n
      TYPE(StageMessageGroup), Pointer ::MessageGroup
      TYPE(PackedMessage), POINTER :: message
      TYPE(NodeDefList), POINTER :: nodelist,childlist
      TYPE(NodeDef), POINTER :: node,child

      INTEGER, TARGET, DIMENSION(0:MPI_np-1) :: bufsize_array
      INTEGER, POINTER, DIMENSION(:) :: proc_buffer_sizes
      CALL StartTimer(iiSendChildrenData, n)

      CALL CreateMessageGroup(StageMessageGroups(iSendChildrenData,n)%p, TRANSMIT_CHILD_DATA, STAGE_SEND, n)
      MessageGroup=>StageMessageGroups(iSendChildrenData,n)%p

      proc_buffer_sizes => bufsize_array
      proc_buffer_sizes = 0
      ! Do a pass through the child lists to calculate the size required for each message's buffer.  The results
      ! will be stored in the array poitned to by proc_buffer_sizes
      CALL SendChildrenData_LevelPrecalc(n, proc_buffer_sizes)

      nodelist=>Nodes(n)%p 

      ! Do a second pass to assemble the actual messages to be sent.
      DO WHILE (associated(nodelist))
         node=>nodelist%self 
         childlist=>node%children
         DO WHILE (associated(childlist))
            child=>childlist%self
            IF (child%box%MPI_ID /= MPI_ID) THEN
               CALL ExtractMessageFromGroup(MessageGroup, &
                                            child%box%MPI_ID, &
                                            message, &
                                            proc_buffer_sizes(child%box%MPI_id))
!                                            message)

               CALL SendChildrenData(message, node, child)
            END IF

            childlist=>childlist%next

         END DO
         nodelist=>nodelist%next
      END DO

      CALL SendTerminationBox(MessageGroup)
      CALL CloseMessageGroup(MessageGroup)

      NULLIFY(proc_buffer_sizes)

      CALL StopTimer(iiSendChildrenData, n)    

   END SUBROUTINE PostSendChildrenData


   SUBROUTINE CompSendChildrenData(n)
      INTEGER :: n
      CALL StartTimer(iiSendChildrenData, n)
      CALL DestroyMessageGroup(StageMessageGroups(iSendChildrenData,n)%p)
      CALL StopTimer(iiSendChildrenData, n)    
   END SUBROUTINE CompSendChildrenData


   !> Precalculates the buffer size needed receive child data from each processor.
   !! @param level The level of the nodes doing the sending.
   !! @param proc_buffer_sizes A 0:np-1 array of integers to hold the accumulated buffer size for each processor.
   SUBROUTINE RecvParentsData_LevelPrecalc(parent_level, proc_buffer_sizes)

      INTEGER :: parent_level
      INTEGER, POINTER, DIMENSION(:) :: proc_buffer_sizes

      INTEGER :: accumulator
      INTEGER :: level
      TYPE(NodeDefList), POINTER :: nodelist
      TYPE(NodeDef), POINTER :: node
      TYPE(NodeDef), POINTER :: parent


      proc_buffer_sizes = 0

      ! We want to lop over the nodes in the child level, so be sure to use parent_level + 1.
      nodelist => Nodes(parent_level + 1)%p

      DO WHILE (ASSOCIATED(nodelist))
          node => nodelist%self
          parent => node%parent

          IF (parent%box%MPI_id /= MPI_id) THEN

              ! If the parent processor's accumulator is empty, add the message overhead.  In this case,
              ! that's just the termination box.
              IF (proc_buffer_sizes(parent%box%MPI_id) == 0)  proc_buffer_sizes(parent%box%MPI_id) = TERMINATION_BOX_BYTES
              ! Add this node's parent payload to the accumulator.
              proc_buffer_sizes(parent%box%MPI_id) = proc_buffer_sizes(parent%box%MPI_id) + RecvParentsData_Precalculate(parent_level, node)

          END IF

          nodelist => nodelist%next

      END DO

   END SUBROUTINE RecvParentsData_LevelPrecalc

   SUBROUTINE PostRecvParentsData(n) 
      INTEGER :: n
      TYPE(StageMessageGroup), Pointer ::MessageGroup
      TYPE(PackedMessage), POINTER :: message
      TYPE(NodeDefList), POINTER :: nodelist
      TYPE(NodeDef), POINTER :: node,parent

      INTEGER, TARGET, DIMENSION(0:MPI_np-1) :: bufsize_array
      INTEGER, POINTER, DIMENSION(:) :: proc_buffer_sizes
      CALL StartTimer(iiRecvParentsData, n)

      CALL CreateMessageGroup(StageMessageGroups(iRecvParentsData,n)%p, TRANSMIT_CHILD_DATA,STAGE_RECV,n-1)
      MessageGroup=>StageMessageGroups(iRecvParentsData,n)%p

      proc_buffer_sizes => bufsize_array
      proc_buffer_sizes = 0
      ! Precalculate the buffer sizes needed to receive parent data.  The parent level (n-1) is used because most of the 
      ! data parsing calculations are performed from the parent level's perspective.
      CALL RecvParentsData_LevelPrecalc(n-1, proc_buffer_sizes)

      nodelist=>Nodes(n)%p 
      DO WHILE (associated(nodelist))
         node=>nodelist%self 
         parent=>node%parent
         IF (parent%box%MPI_ID /= MPI_ID) THEN
            CALL ExtractMessageFromGroup(MessageGroup, &
                                         parent%box%MPI_ID, &
                                         message, &
                                         proc_buffer_sizes(parent%box%MPI_id))
!                                         message)
         END IF
         nodelist=>nodelist%next
      END DO

      NULLIFY(proc_buffer_sizes)

      CALL StopTimer(iiRecvParentsData, n)
   END SUBROUTINE PostRecvParentsData

   SUBROUTINE CompRecvParentsData(n)
      INTEGER :: n

      TYPE(StageMessageGroup), POINTER ::MessageGroup
      TYPE(PackedMessage), POINTER :: message
      CALL StartTimer(iiRecvParentsData, n)
    
      MessageGroup=>StageMessageGroups(iRecvParentsData,n)%p

      CALL MGBlockOnFirstMessages(MessageGroup,message)

      DO WHILE (ASSOCIATED(message))
         CALL RecvParentsData(message)
         CALL MGBlockOnFirstMessages(MessageGroup, message)
      END DO

      CALL DestroyMessageGroup(StageMessageGroups(iRecvParentsData,n)%p)

      CALL StopTimer(iiRecvParentsData, n) 
   END SUBROUTINE CompRecvParentsData
   !> @}

   !> @name Routines required for ApplyOverlaps
   !! @{

   !> Calculate the buffer sizes needed to handle this processor's overlap sends for this level.
   !! @param level The level of the overlaps to be sent.
   !! @param proc_buffer_sizes An array of buffer sizes for each processor.  Will be populated by this subroutine.
   SUBROUTINE SendOverlaps_LevelPrecalc(level, proc_buffer_sizes, lUseNew)

       INTEGER :: level
       INTEGER, POINTER, DIMENSION(:) :: proc_buffer_sizes

       TYPE(NodeDefList), POINTER :: nodelist
       TYPE(NodeDefList), POINTER :: overlaplist
       TYPE(NodeDef), POINTER :: node
       TYPE(NodeDef), POINTER :: overlap
       LOGICAL :: lUseNew

       proc_buffer_sizes = 0

       ! Select the appropriate nodelist to scan for overlaps.
       IF (lUseNew) THEN
          nodelist=>Nodes(level)%p
       ELSE
          nodelist=>OldNodes(level)%p 
       END IF


       DO WHILE (associated(nodelist))

          node=>nodelist%self 
          overlaplist=>node%overlaps

          DO WHILE (associated(overlaplist))
             overlap=>overlaplist%self

             IF (overlap%box%MPI_ID /= MPI_ID) THEN

                ! If this processor has not yet been initialized with the overhead for this transfer, then do so.
                ! In this case, the overhead for SendOverlaps is just a termination box.
                IF (proc_buffer_sizes(overlap%box%MPI_id) == 0) &
                    proc_buffer_sizes(overlap%box%MPI_id) = TERMINATION_BOX_BYTES

                ! Add the cost of the current node-overlap pair to the message's buffer size.
                proc_buffer_sizes(overlap%box%MPI_id) = &
                    proc_buffer_sizes(overlap%box%MPI_id) + SendOverlapData_Precalculate(level, node, overlap)
             END IF

             overlaplist=>overlaplist%next
          END DO

          nodelist=>nodelist%next
       END DO

   END SUBROUTINE SendOverlaps_LevelPrecalc


   SUBROUTINE PostSendOverlaps(n, lUseNewOpt)
      USE MpiTransmission, ONLY: GetMGAllRequestsArray
      INTEGER :: n
      TYPE(StageMessageGroup), Pointer ::MessageGroup
      TYPE(PackedMessage), POINTER :: message
      TYPE(NodeDefList), POINTER :: nodelist,overlaplist, testlist
      TYPE(NodeDef), POINTER :: node,overlap, testnode
      INTEGER, TARGET, DIMENSION(0:MPI_np-1) :: bufsize_array
      INTEGER, POINTER, DIMENSION(:) :: proc_buffer_sizes
      LOGICAL, OPTIONAL :: lUseNewOpt
      LOGICAL :: lUseNew
      CALL StartTimer(iiSendOverlaps,n)

      CALL CreateMessageGroup(StageMessageGroups(iSendOverlaps,n)%p, TRANSMIT_OVERLAP_DATA,STAGE_SEND,n)
      MessageGroup=>StageMessageGroups(iSendOverlaps,n)%p

      IF (PRESENT(lUseNewOpt)) THEN
         lUseNew=lUseNewOpt
      ELSE
         lUseNew = (levels(n)%step == 2 .OR. .NOT. lRegridLevel(n))
      END IF
      IF (lUseNew) THEN
         nodelist=>Nodes(n)%p
      ELSE
         nodelist=>OldNodes(n)%p
      END IF

      ! Obtain the buffer sizes for overlap transfers on this level.
      proc_buffer_sizes => bufsize_array
      CALL SendOverlaps_LevelPrecalc(n, proc_buffer_sizes, lUseNew)

      DO WHILE (associated(nodelist))
         node=>nodelist%self 
         overlaplist=>node%overlaps
        DO WHILE (associated(overlaplist))
            overlap=>overlaplist%self
            IF (overlap%box%MPI_ID /= MPI_ID) THEN
               CALL ExtractMessageFromGroup(MessageGroup, &
                                            overlap%box%MPI_ID, &
                                            message, &
                                            proc_buffer_sizes(overlap%box%MPI_id))
!                                            message)

               CALL SendOverlapData(message, node, overlap)
            END IF

            overlaplist=>overlaplist%next
         END DO

         nodelist=>nodelist%next
      END DO

      CALL SendTerminationBox(MessageGroup)
      CALL CloseMessageGroup(MessageGroup)

      NULLIFY(proc_buffer_sizes)

      CALL StopTimer(iiSendOverlaps,n)            

   END SUBROUTINE PostSendOverlaps


   SUBROUTINE CompSendOverlaps(n)
      INTEGER :: n
      CALL StartTimer(iiSendOverlaps, n)
      CALL DestroyMessageGroup(StageMessageGroups(iSendOverlaps,n)%p)
      CALL StopTimer(iiSendOverlaps, n)   
   END SUBROUTINE CompSendOverlaps


   SUBROUTINE RecvOverlaps_LevelPrecalc(level, proc_buffer_sizes)

       INTEGER :: level
       INTEGER, POINTER, DIMENSION(:) :: proc_buffer_sizes

       TYPE(NodeDefList), POINTER :: nodelist
       TYPE(NodeDefList), POINTER :: overlaplist
       TYPE(NodeDef), POINTER :: node
       TYPE(NodeDef), POINTER :: overlap


       proc_buffer_sizes = 0

       nodelist => Nodes(level)%p

       DO WHILE (ASSOCIATED(nodelist))

          node=>nodelist%self 
          overlaplist=>node%overlaps

          DO WHILE (ASSOCIATED(overlaplist))

             overlap=>overlaplist%self

             IF (overlap%box%MPI_ID /= MPI_ID) THEN

                ! If this processor has not yet been initialized with the overhead for this transfer, then do so.
                ! In this case, the overhead for SendOverlaps is just a termination box.
                IF (proc_buffer_sizes(overlap%box%MPI_id) == 0) &
                    proc_buffer_sizes(overlap%box%MPI_id) = TERMINATION_BOX_BYTES

                ! Add the cost of the current node-overlap pair to the message's buffer size.
                proc_buffer_sizes(overlap%box%MPI_id) = &
                    proc_buffer_sizes(overlap%box%MPI_id) + RecvOverlapData_Precalculate(level, node, overlap)
             END IF

             overlaplist=>overlaplist%next
          END DO

          nodelist=>nodelist%next
       END DO

   END SUBROUTINE RecvOverlaps_LevelPrecalc


   SUBROUTINE PostRecvOverlaps(n)
      USE MpiTransmission, ONLY: GetMGAllRequestsArray
      INTEGER :: n
      TYPE(StageMessageGroup), Pointer ::MessageGroup
      TYPE(PackedMessage), POINTER :: message
      TYPE(NodeDefList), POINTER :: nodelist,overlaplist
      TYPE(NodeDef), POINTER :: node,overlap

      INTEGER, TARGET, DIMENSION(0:MPI_np-1) :: bufsize_array
      INTEGER, POINTER, DIMENSION(:) :: proc_buffer_sizes
      CALL StartTimer(iiRecvOverlaps,n)

      proc_buffer_sizes => bufsize_array

      CALL CreateMessageGroup(StageMessageGroups(iRecvOverlaps,n)%p, TRANSMIT_OVERLAP_DATA,STAGE_RECV,n)
      MessageGroup=>StageMessageGroups(iRecvOverlaps,n)%p
      nodelist=>Nodes(n)%p

      ! Populate the buffer size array with the size of the buffers this node will send to each processor.
      CALL RecvOverlaps_LevelPrecalc(n, proc_buffer_sizes)


      DO WHILE (associated(nodelist))
         node=>nodelist%self 
         overlaplist=>node%overlaps
         DO WHILE (associated(overlaplist))
            overlap=>overlaplist%self
            IF (overlap%box%MPI_ID /= MPI_ID) THEN
               CALL ExtractMessageFromGroup(MessageGroup, &
                                            overlap%box%MPI_ID, &
                                            message, &
                                            proc_buffer_sizes(overlap%box%MPI_id))
!                                            message)
            END IF
            overlaplist=>overlaplist%next
         END DO

         nodelist=>nodelist%next
      END DO
      NULLIFY(proc_buffer_sizes)


      CALL StopTimer(iiRecvOverlaps,n)            
   END SUBROUTINE PostRecvOverlaps



   SUBROUTINE CompRecvOverlaps(n) 
      INTEGER :: n
      TYPE(StageMessageGroup), Pointer ::MessageGroup
      TYPE(PackedMessage), POINTER :: message
      CALL StartTimer(iiRecvOverlaps, n)
      MessageGroup=>StageMessageGroups(iRecvOverLaps,n)%p
      CALL MGBlockOnFirstMessages(MessageGroup,message)

      DO WHILE (ASSOCIATED(message))
         CALL RecvOverlaps(message)
         CALL MGBlockOnFirstMessages(MessageGroup, message)
      END DO

      CALL DestroyMessageGroup(StageMessageGroups(iRecvOverLaps,n)%p)
      CALL StopTimer(iiRecvOverlaps, n)
   END SUBROUTINE CompRecvOverlaps
   !> @}


   !> @name Routines required for ApplyChildrenData
   !! @{

   !> Precalculate the buffer sizes for each ParentData message this processor will send on this level.
   !! @param child_level The level of the nodes sending data to their parents.
   !! @param proc_buffer_sizes A buffer array that will be filled with precalculations for each processor.
   SUBROUTINE SendParentsData_LevelPrecalc(child_level, proc_buffer_sizes)

       INTEGER :: child_level
       INTEGER, POINTER, DIMENSION(:) :: proc_buffer_sizes

       TYPE(NodeDefList), POINTER :: nodelist
       TYPE(NodeDef), POINTER :: node
       TYPE(NodeDef), POINTER :: parent


       nodelist=>Nodes(child_level)%p

       proc_buffer_sizes = 0

       ! Loop through the nodes on this level, calculate the cost of sending data to their parents, and
       ! add up the totals for each processor.  This will give us the size of the buffer the PostSendParentsData()
       ! routine needs to allocate for each processor.
       DO WHILE (ASSOCIATED(nodelist))
          node=>nodelist%self 
          parent=>node%parent

          IF (parent%box%MPI_ID /= MPI_ID) THEN            

             ! If this is the first cost to be calculated for this processor, then add the overhead cost
             ! to this processor's buffer size.  In this case, the overhead is just one termination box.
             IF (proc_buffer_sizes(parent%box%MPI_id) == 0) &
                 proc_buffer_sizes(parent%box%MPI_id) = TERMINATION_BOX_BYTES

             ! Add the cost of this node's parent data to the processor buffer size.
             proc_buffer_sizes(parent%box%MPI_id) = proc_buffer_sizes(parent%box%MPI_id) + &
                                                    SendParentsData_Precalculate(child_level, parent, node)
          END IF

          nodelist=>nodelist%next
       END DO
   END SUBROUTINE SendParentsData_LevelPrecalc


   SUBROUTINE PostSendParentsData(n) 
      INTEGER :: n
      TYPE(StageMessageGroup), Pointer ::MessageGroup
      TYPE(PackedMessage), POINTER :: message
      TYPE(NodeDefList), POINTER :: nodelist
      TYPE(NodeDef), POINTER :: node,parent

      INTEGER, TARGET, DIMENSION(0:MPI_np-1) :: bufsize_array
      INTEGER, POINTER, DIMENSION(:) :: proc_buffer_sizes
      CALL StartTimer(iiSendParentsData, n)

      CALL CreateMessageGroup(StageMessageGroups(iSendParentsData,n)%p, TRANSMIT_PARENT_DATA,STAGE_SEND,n-1)
      MessageGroup=>StageMessageGroups(iSendParentsData,n)%p

      ! Precalculate the buffer sizes for each message this routine will send.
      proc_buffer_sizes => bufsize_array
      proc_buffer_sizes = 0
      CALL SendParentsData_LevelPrecalc(n, proc_buffer_sizes)


      nodelist=>Nodes(n)%p 
      DO WHILE (associated(nodelist))
         node=>nodelist%self 
         parent=>node%parent
         IF (parent%box%MPI_ID /= MPI_ID) THEN

            CALL ExtractMessageFromGroup(MessageGroup, &
                                         parent%box%MPI_ID, &
                                         message, &
                                         proc_buffer_sizes(parent%box%MPI_id))
!                                         message)

            CALL SendParentsData(message, node)
         END IF
         nodelist=>nodelist%next
      END DO

      CALL SendTerminationBox(MessageGroup)
      CALL CloseMessageGroup(MessageGroup)

      NULLIFY(proc_buffer_sizes)

      CALL StopTimer(iiSendParentsData, n)
   END SUBROUTINE PostSendParentsData

   SUBROUTINE CompSendParentsData(n) 
      INTEGER :: n
      CALL StartTimer(iiSendParentsData, n)
      CALL DestroyMessageGroup(StageMessageGroups(iSendParentsData,n)%p)
      CALL StopTimer(iiSendParentsData, n)           

   END SUBROUTINE CompSendParentsData

   !> Calculate the size of the buffers required to receive data from this level's children.
   !! @param parent_level The level of the parents who will be receiving the data.
   !! @param proc_buffer_sizes A pointer to an MPI_np-sized array which will hold the size of the precalculated buffers.
   SUBROUTINE RecvChildrenData_LevelPrecalc(parent_level, proc_buffer_sizes)

       INTEGER :: parent_level
       INTEGER, POINTER, DIMENSION(:) :: proc_buffer_sizes

       TYPE(NodeDefList), POINTER :: nodelist
       TYPE(NodeDefList), POINTER :: childlist
       TYPE(NodeDef), POINTER :: node
       TYPE(NodeDef), POINTER :: child


       nodelist=>Nodes(parent_level)%p

       DO WHILE (associated(nodelist))
          node=>nodelist%self 
          childlist=>node%children


          DO WHILE (associated(childlist))

             child=>childlist%self

             IF (child%box%MPI_ID /= MPI_ID) THEN
                 ! If this is the first cost to be calculated for this processor, then add the overhead cost
                 ! to this processor's buffer size.  In this case, the overhead is just one termination box.
                 IF (proc_buffer_sizes(child%box%MPI_id) == 0) &
                     proc_buffer_sizes(child%box%MPI_id) = TERMINATION_BOX_BYTES

                 ! Add the cost of this child's data to the processor buffer size.
                 proc_buffer_sizes(child%box%MPI_id) = proc_buffer_sizes(child%box%MPI_id) + &
                                                        RecvChildrenData_Precalculate(parent_level, node, child)
             END IF

             childlist=>childlist%next
          END DO

          nodelist=>nodelist%next
       END DO
   END SUBROUTINE RecvChildrenData_LevelPrecalc

   SUBROUTINE PostRecvChildrenData(n)
      INTEGER :: n
      TYPE(StageMessageGroup), Pointer ::MessageGroup
      TYPE(PackedMessage), POINTER :: message
      TYPE(NodeDefList), POINTER :: nodelist,childlist
      TYPE(NodeDef), POINTER :: node,child

      INTEGER, TARGET, DIMENSION(0:MPI_np-1) :: bufsize_array
      INTEGER, POINTER, DIMENSION(:) :: proc_buffer_sizes
      CALL StartTimer(iiRecvChildrenData, n)

      CALL CreateMessageGroup(StageMessageGroups(iRecvChildrenData,n)%p, TRANSMIT_PARENT_DATA,STAGE_RECV,n)
      MessageGroup=>StageMessageGroups(iRecvChildrenData,n)%p
      nodelist=>Nodes(n)%p

      ! Calculate the size required for each message's buffer.
      proc_buffer_sizes => bufsize_array
      proc_buffer_sizes = 0
      CALL RecvChildrenData_LevelPrecalc(n, proc_buffer_sizes)

      DO WHILE (associated(nodelist))
         node=>nodelist%self 
         childlist=>node%children
         DO WHILE (associated(childlist))
            child=>childlist%self
            IF (child%box%MPI_ID /= MPI_ID) THEN
               CALL ExtractMessageFromGroup(MessageGroup, &
                                            child%box%MPI_ID, &
                                            message, &
                                            proc_buffer_sizes(child%box%MPI_id))
!                                            message)
            END IF
            childlist=>childlist%next
         END DO
         nodelist=>nodelist%next
      END DO

      NULLIFY(proc_buffer_sizes)

      CALL StopTimer(iiRecvChildrenData, n)                


   END SUBROUTINE PostRecvChildrenData

   SUBROUTINE CompRecvChildrenData(n)
      INTEGER :: n
      TYPE(StageMessageGroup), Pointer ::MessageGroup
      TYPE(PackedMessage), POINTER :: message
      CALL StartTimer(iiRecvChildrenData, n)

      MessageGroup=>StageMessageGroups(iRecvChildrenData,n)%p
      CALL MGBlockOnFirstMessages(MessageGroup,message)
      DO WHILE (ASSOCIATED(message))
         CALL RecvChildrenData(message)
         CALL MGBlockOnFirstMessages(MessageGroup, message)
      END DO

      CALL DestroyMessageGroup(StageMessageGroups(iRecvChildrenData,n)%p)

      CALL StopTimer(iiRecvChildrenData, n)               
   END SUBROUTINE CompRecvChildrenData
   !> @}

   !> @name Routines required for ApplyInitialChildrenData
   !! @{


   !> Precalculate the buffer sizes for each ParentData message this processor will send on this level on the initial step.
   !! @param child_level The level of the nodes sending data to their parents.
   !! @param proc_buffer_sizes A buffer array that will be filled with precalculations for each processor.
   SUBROUTINE SendParentsInitialData_LevelPrecalc(child_level, proc_buffer_sizes)

       INTEGER :: child_level
       INTEGER, POINTER, DIMENSION(:) :: proc_buffer_sizes

       TYPE(NodeDefList), POINTER :: nodelist
       TYPE(NodeDef), POINTER :: node
       TYPE(NodeDef), POINTER :: parent


       nodelist=>Nodes(child_level)%p

       proc_buffer_sizes = 0

       ! Loop through the nodes on this level, calculate the cost of sending data to their parents, and
       ! add up the totals for each processor.  This will give us the size of the buffer the PostSendParentsData()
       ! routine needs to allocate for each processor.
       DO WHILE (ASSOCIATED(nodelist))

          node=>nodelist%self 
          parent=>node%parent

          IF (parent%box%MPI_ID /= MPI_ID) THEN            

             ! If this is the first cost to be calculated for this processor, then add the overhead cost
             ! to this processor's buffer size.  In this case, the overhead is just one termination box.
             IF (proc_buffer_sizes(parent%box%MPI_id) == 0) &
                 proc_buffer_sizes(parent%box%MPI_id) = TERMINATION_BOX_BYTES

             ! Add the cost of this node's parent data to the processor buffer size.
             proc_buffer_sizes(parent%box%MPI_id) = proc_buffer_sizes(parent%box%MPI_id) + &
                                                    SendParentsInitialData_Precalculate(child_level, node)
          END IF

          nodelist=>nodelist%next
       END DO

   END SUBROUTINE SendParentsInitialData_LevelPrecalc

   SUBROUTINE PostSendParentsInitialData(n) 
      INTEGER :: n
      TYPE(StageMessageGroup), Pointer ::MessageGroup
      TYPE(PackedMessage), POINTER :: message
      TYPE(NodeDefList), POINTER :: nodelist
      TYPE(NodeDef), POINTER :: node,parent

      INTEGER, TARGET, DIMENSION(0:MPI_np-1) :: bufsize_array
      INTEGER, POINTER, DIMENSION(:) :: proc_buffer_sizes


      CALL StartTimer(iiSendParentsData, n)
      CALL CreateMessageGroup(StageMessageGroups(iSendParentsData,n)%p, TRANSMIT_PARENT_DATA, STAGE_SEND, n-1)

      MessageGroup=>StageMessageGroups(iSendParentsData,n)%p
      nodelist=>Nodes(n)%p 

      ! Pre-calculate the buffer sizes required for the initial parent data send.
      proc_buffer_sizes => bufsize_array
      proc_buffer_sizes = 0
      CALL SendParentsInitialData_LevelPrecalc(n, proc_buffer_sizes)

!PRINT "(' PostSendParentsInitialData(', i2, ', ', i2, ')::buffer sizes = [', 16i6, '].')", MPI_id, n, proc_buffer_sizes

      DO WHILE (ASSOCIATED(nodelist))
         node=>nodelist%self 
         parent=>node%parent

         IF (parent%box%MPI_ID /= MPI_ID) THEN

            CALL ExtractMessageFromGroup(MessageGroup, &
                                         parent%box%MPI_ID, &
                                         message, &
                                         proc_buffer_sizes(parent%box%MPI_id))
!                                         message)

            CALL SendParentsInitialData(message, node)
         END IF
         nodelist=>nodelist%next
      END DO
      CALL SendTerminationBox(MessageGroup)
      CALL CloseMessageGroup(MessageGroup)

      NULLIFY(proc_buffer_sizes)

      CALL StopTimer(iiSendParentsData, n)
   END SUBROUTINE PostSendParentsInitialData

   SUBROUTINE CompSendParentsInitialData(n) 
      INTEGER :: n

      CALL StartTimer(iiSendParentsData, n)

      CALL DestroyMessageGroup(StageMessageGroups(iSendParentsData,n)%p)

      CALL StopTimer(iiSendParentsData, n)

   END SUBROUTINE CompSendParentsInitialData


   !> Calculate the size of the buffers required to receive data from this level's children on the first timestep.
   !! @param parent_level The level of the parents who will be receiving the data.
   !! @param proc_buffer_sizes A pointer to an MPI_np-sized array which will hold the size of the precalculated buffers.
   SUBROUTINE RecvInitialChildrenData_LevelPrecalc(parent_level, proc_buffer_sizes)

       INTEGER :: parent_level
       INTEGER, POINTER, DIMENSION(:) :: proc_buffer_sizes

       TYPE(NodeDefList), POINTER :: nodelist
       TYPE(NodeDefList), POINTER :: childlist
       TYPE(NodeDef), POINTER :: node
       TYPE(NodeDef), POINTER :: child

       nodelist=>Nodes(parent_level)%p

       proc_buffer_sizes = 0 

       DO WHILE (associated(nodelist))
          node=>nodelist%self 
          childlist=>node%children

          DO WHILE (associated(childlist))

             child=>childlist%self

             IF (child%box%MPI_ID /= MPI_ID) THEN
                 ! If this is the first cost to be calculated for this processor, then add the overhead cost
                 ! to this processor's buffer size.  In this case, the overhead is just one termination box.
                 IF (proc_buffer_sizes(child%box%MPI_id) == 0) &
                     proc_buffer_sizes(child%box%MPI_id) = TERMINATION_BOX_BYTES

                 ! Add the cost of this child's data to the processor buffer size.
                 proc_buffer_sizes(child%box%MPI_id) = proc_buffer_sizes(child%box%MPI_id) + &
                                                       RecvInitialChildrenData_Precalculate(parent_level, node, child)
             END IF

             childlist=>childlist%next
          END DO

          nodelist=>nodelist%next
       END DO
   END SUBROUTINE RecvInitialChildrenData_LevelPrecalc

   SUBROUTINE PostRecvInitialChildrenData(n)
      INTEGER :: n
      TYPE(StageMessageGroup), Pointer ::MessageGroup
      TYPE(PackedMessage), POINTER :: message
      TYPE(NodeDefList), POINTER :: nodelist,childlist
      TYPE(NodeDef), POINTER :: node,child

      INTEGER, TARGET, DIMENSION(0:MPI_np-1) :: bufsize_array
      INTEGER, POINTER, DIMENSION(:) :: proc_buffer_sizes

      CALL StartTimer(iiRecvChildrenData, n)

      CALL CreateMessageGroup(StageMessageGroups(iRecvChildrenData,n)%p, TRANSMIT_PARENT_DATA,STAGE_RECV,n)
      MessageGroup=>StageMessageGroups(iRecvChildrenData,n)%p
      nodelist=>Nodes(n)%p 

      ! Calculate the size required for each message's buffer.
      proc_buffer_sizes => bufsize_array
      proc_buffer_sizes = 0
      CALL RecvInitialChildrenData_LevelPrecalc(n, proc_buffer_sizes)

!PRINT "('PostRecvInitialChildrenData(', i2, ', ', i2, ')::buffer sizes = [', 16i6, '].')", MPI_id, n, proc_buffer_sizes

      DO WHILE (associated(nodelist))
         node=>nodelist%self 
         childlist=>node%children
         DO WHILE (associated(childlist))
            child=>childlist%self
            IF (child%box%MPI_ID /= MPI_ID) THEN
                CALL ExtractMessageFromGroup(MessageGroup, &
                                             child%box%MPI_ID, &
                                            message, &
                                            proc_buffer_sizes(child%box%MPI_id))
!                                             message)
            END IF
            childlist=>childlist%next
         END DO
         nodelist=>nodelist%next
      END DO

      NULLIFY(proc_buffer_sizes)

      CALL StopTimer(iiRecvChildrenData, n)

   END SUBROUTINE PostRecvInitialChildrenData

   SUBROUTINE CompRecvInitialChildrenData(n)
      INTEGER :: n
      TYPE(StageMessageGroup), Pointer ::MessageGroup
      TYPE(PackedMessage), POINTER :: message


      MessageGroup=>StageMessageGroups(iRecvChildrenData,n)%p  !Same as iRecvInitialChildData

      CALL MGBlockOnFirstMessages(MessageGroup,message)

      DO WHILE (ASSOCIATED(message))
         CALL RecvInitialChildrenData(message)
         CALL MGBlockOnFirstMessages(MessageGroup, message)
      END DO
      CALL DestroyMessageGroup(StageMessageGroups(iRecvChildrenData,n)%p)

   END SUBROUTINE CompRecvInitialChildrenData

   !> @}

   !> @name Routines required for SyncFluxes
   !! @{

   !> Calculates the buffers required for this processor to send a level's flux data to all its neighbors.
   !! @param level The level of the data being sent.
   !! @param proc_buffer_sizes An integer array of range [0:np-1] that will be populated with buffer sizes.
   SUBROUTINE SendFluxes_LevelPrecalc(level, proc_buffer_sizes)

       INTEGER :: level
       INTEGER, POINTER, DIMENSION(:) :: proc_buffer_sizes

       TYPE(NodeDefList), POINTER :: nodelist
       TYPE(NodeDefList), POINTER :: neighborlist
       TYPE(NodeDef), POINTER :: node
       TYPE(NodeDef), POINTER :: neighbor


       proc_buffer_sizes = 0
       nodelist => Nodes(level)%p

       DO WHILE (associated(nodelist))
          node=>nodelist%self 
          neighborlist=>node%neighbors

          DO WHILE (associated(neighborlist))
             neighbor=>neighborlist%self

             IF (neighbor%box%MPI_ID /= MPI_ID) THEN
                 ! If this is the first cost to be calculated for this processor, then add the overhead cost
                 ! to this processor's buffer size.  In this case, the overhead is just one termination box.
                 IF (proc_buffer_sizes(neighbor%box%MPI_id) == 0) &
                     proc_buffer_sizes(neighbor%box%MPI_id) = TERMINATION_BOX_BYTES

                 ! Add the cost of this child's data to the processor buffer size.
                 proc_buffer_sizes(neighbor%box%MPI_id) = proc_buffer_sizes(neighbor%box%MPI_id) + &
                                                       SendFluxes_Precalculate(level, node, neighbor)
             END IF

             neighborlist=>neighborlist%next
          END DO

          nodelist=>nodelist%next
       END DO

   END SUBROUTINE SendFluxes_LevelPrecalc

   SUBROUTINE PostSendFluxes(n) 
      INTEGER :: n
      TYPE(StageMessageGroup), Pointer ::MessageGroup
      TYPE(PackedMessage), POINTER :: message
      TYPE(NodeDefList), POINTER :: nodelist,neighborlist
      TYPE(NodeDef), POINTER :: node,neighbor

      INTEGER, TARGET, DIMENSION(0:MPI_np-1) :: bufsize_array
      INTEGER, POINTER, DIMENSION(:) :: proc_buffer_sizes
      IF (.not. ASSOCIATED(Nodes(n)%p)) RETURN
      CALL StartTimer(iiSendFluxes, n)


      CALL CreateMessageGroup(StageMessageGroups(iSendFluxes,n)%p, TRANSMIT_FLUX_DATA, STAGE_SEND,n)
      MessageGroup=>StageMessageGroups(iSendFluxes,n)%p
      nodelist=>Nodes(n)%p 

      proc_buffer_sizes => bufsize_array
      proc_buffer_sizes = 0
      CALL SendFluxes_LevelPrecalc(n, proc_buffer_sizes)
      DO WHILE (associated(nodelist))
         node=>nodelist%self 
         neighborlist=>node%neighbors
         DO WHILE (associated(neighborlist))
            neighbor=>neighborlist%self
            
            IF (neighbor%box%MPI_ID /= MPI_ID) THEN
                CALL ExtractMessageFromGroup(MessageGroup, &
                                             neighbor%box%MPI_ID, &
                                             message, &
                                             proc_buffer_sizes(neighbor%box%MPI_id))

                CALL SendFluxes(message, node, neighbor)
             END IF
             neighborlist=>neighborlist%next
          END DO
          nodelist=>nodelist%next
       END DO
       CALL SendTerminationBox(MessageGroup)
       CALL CloseMessageGroup(MessageGroup)

      NULLIFY(proc_buffer_sizes)
      CALL StopTimer(iiSendFluxes, n)

   END SUBROUTINE PostSendFluxes

   SUBROUTINE CompSendFluxes(n)  
      INTEGER :: n
      IF (.not. ASSOCIATED(Nodes(n)%p)) RETURN
      CALL StartTimer(iiSendFluxes, n)
      CALL DestroyMessageGroup(StageMessageGroups(iSendFluxes,n)%p)           
      CALL StopTimer(iiSendFluxes, n)            
   END SUBROUTINE CompSendFluxes

   !> Calculates the buffers required for this processor to receive a level's flux data from all its neighbors.
   !! @param level The level of the data being receive.
   !! @param proc_buffer_sizes An integer array of range [0:np-1] that will be populated with buffer sizes.
   SUBROUTINE RecvFluxes_LevelPrecalc(level, proc_buffer_sizes)

       INTEGER :: level
       INTEGER, POINTER, DIMENSION(:) :: proc_buffer_sizes

       TYPE(NodeDefList), POINTER :: nodelist
       TYPE(NodeDefList), POINTER :: neighborlist
       TYPE(NodeDef), POINTER :: node
       TYPE(NodeDef), POINTER :: neighbor


       proc_buffer_sizes = 0

       nodelist=>Nodes(level)%p 

       DO WHILE (ASSOCIATED(nodelist))
          node=>nodelist%self 
          neighborlist=>node%neighbors

          DO WHILE (ASSOCIATED(neighborlist))
             neighbor=>neighborlist%self

             IF (neighbor%box%MPI_ID /= MPI_ID) THEN
                 ! If this is the first cost to be calculated for this processor, then add the overhead cost
                 ! to this processor's buffer size.  In this case, the overhead is just one termination box.
                 IF (proc_buffer_sizes(neighbor%box%MPI_id) == 0) &
                     proc_buffer_sizes(neighbor%box%MPI_id) = TERMINATION_BOX_BYTES

                 ! Add the cost of the flux data to the processor buffer size.
                 proc_buffer_sizes(neighbor%box%MPI_id) = proc_buffer_sizes(neighbor%box%MPI_id) + &
                                                          RecvFluxes_Precalculate(level, node, neighbor)
             END IF

             neighborlist=>neighborlist%next
          END DO

          nodelist=>nodelist%next
       END DO

   END SUBROUTINE RecvFluxes_LevelPrecalc

   SUBROUTINE PostRecvFluxes(n) 
     INTEGER :: n
     TYPE(StageMessageGroup), Pointer ::MessageGroup
     TYPE(PackedMessage), POINTER :: message
     TYPE(NodeDefList), POINTER :: nodelist,neighborlist
     TYPE(NodeDef), POINTER :: node,neighbor

      INTEGER, TARGET, DIMENSION(0:MPI_np-1) :: bufsize_array
      INTEGER, POINTER, DIMENSION(:) :: proc_buffer_sizes
      IF (.not. ASSOCIATED(Nodes(n)%p)) RETURN
      CALL StartTimer(iiRecvFluxes, n)


     CALL CreateMessageGroup(StageMessageGroups(iRecvFluxes,n)%p, TRANSMIT_FLUX_DATA, STAGE_RECV,n)
     MessageGroup=>StageMessageGroups(iRecvFluxes,n)%p
     nodelist=>Nodes(n)%p 

     proc_buffer_sizes => bufsize_array
     proc_buffer_sizes = 0
     CALL RecvFluxes_LevelPrecalc(n, proc_buffer_sizes)

     DO WHILE (associated(nodelist))
        node=>nodelist%self 
        neighborlist=>node%neighbors
        DO WHILE (associated(neighborlist))
           neighbor=>neighborlist%self
           IF (neighbor%box%MPI_ID /= MPI_ID)  CALL ExtractMessageFromGroup(MessageGroup, &
                                                                            neighbor%box%MPI_ID, &
                                                                            message, &
                                                                            proc_buffer_sizes(neighbor%box%MPI_id))
!                                                                            message)
           neighborlist=>neighborlist%next
        END DO
        nodelist=>nodelist%next
     END DO

     NULLIFY(proc_buffer_sizes)

     CALL StopTimer(iiRecvFluxes, n)             
   END SUBROUTINE PostRecvFluxes

   SUBROUTINE CompRecvFluxes(n) 
      INTEGER :: n
      TYPE(StageMessageGroup), Pointer ::MessageGroup
      TYPE(PackedMessage), POINTER :: message
      IF (.not. ASSOCIATED(Nodes(n)%p)) RETURN
      CALL StartTimer(iiRecvFluxes, n)

      MessageGroup=>StageMessageGroups(iRecvFluxes,n)%p
      CALL MGBlockOnFirstMessages(MessageGroup,message)
      DO WHILE (ASSOCIATED(message))
         CALL RecvFluxes(message)
         CALL MGBlockOnFirstMessages(MessageGroup, message)
      END DO
      CALL DestroyMessageGroup(StageMessageGroups(iRecvFluxes,n)%p)
      CALL StopTimer(iiRecvFluxes, n)
   END SUBROUTINE CompRecvFluxes
   !> @}

   !> @name Routines required for ApplyGenericOverlaps
   !! @{


   !> Calculates the buffers required for this processor to receive a level's flux data to all its neighbors.
   !! @param level The level of the data being received.
   !! @param fields An integer array containing the indices of elliptic fields to be received.
   !! @param nghost The number of ghost cells to consider in an overlap.
   !! @param proc_buffer_sizes An integer array of range [0:np-1] that will be populated with buffer sizes.
   SUBROUTINE RecvGenericData_LevelPrecalc(level, fields, nghost, proc_buffer_sizes, lPeriodic, lNeighbors)

       INTEGER :: level
       INTEGER, POINTER, DIMENSION(:) :: proc_buffer_sizes

       TYPE(NodeDefList), POINTER :: nodelist
       TYPE(NodeDefList), POINTER :: neighborlist
       TYPE(NodeDef), POINTER :: node
       TYPE(NodeDef), POINTER :: neighbor
       INTEGER :: nghost
       INTEGER, DIMENSION(:) :: fields
       LOGICAL :: lPeriodic(3)
       LOGICAL :: lNeighbors
       proc_buffer_sizes = 0
       nodelist=>Nodes(level)%p 

       DO WHILE (associated(nodelist))

           node=>nodelist%self 
           IF (lNeighbors) THEN
              neighborlist=>node%neighbors
           ELSE
              neighborlist=>node%overlaps
           END IF

           DO WHILE (associated(neighborlist))
               neighbor=>neighborlist%self
               IF (neighbor%box%MPI_ID /= MPI_ID) THEN
                   ! If this is the first cost to be calculated for this processor, then add the overhead cost
                   ! to this processor's buffer size.  In this case, the overhead is just one termination box.
                   IF (proc_buffer_sizes(neighbor%box%MPI_id) == 0) &
                       proc_buffer_sizes(neighbor%box%MPI_id) = TERMINATION_BOX_BYTES

                   ! Add this node-neighbor pair's buffer cost to the appropriate buffer size.
                   proc_buffer_sizes(neighbor%box%MPI_id) = &
                       proc_buffer_sizes(neighbor%box%MPI_id) + &
                       RecvGenericData_Precalculate(level, node, neighbor, fields, nghost, lPeriodic)
               END IF
               neighborlist=>neighborlist%next
           END DO
           nodelist=>nodelist%next
       END DO

   END SUBROUTINE RecvGenericData_LevelPrecalc

   SUBROUTINE PostRecvGenericData(n, fields, nghost, lPeriodic, lNeighbors)
      INTEGER :: n
      TYPE(StageMessageGroup), Pointer ::MessageGroup
      TYPE(PackedMessage), POINTER :: message
      TYPE(NodeDefList), POINTER :: nodelist,neighborlist
      TYPE(NodeDef), POINTER :: node,neighbor
      LOGICAL, DIMENSION(3) :: lPeriodic

      INTEGER, TARGET, DIMENSION(0:MPI_np-1) :: bufsize_array
      INTEGER, POINTER, DIMENSION(:) :: proc_buffer_sizes
      INTEGER :: nghost
      INTEGER, DIMENSION(:) :: fields
      LOGICAL :: lNeighbors

      CALL CreateMessageGroup(StageMessageGroups(iRecvGenericData,n)%p, TRANSMIT_GENERIC_DATA,STAGE_RECV,n)
      MessageGroup=>StageMessageGroups(iRecvGenericData,n)%p
      nodelist=>Nodes(n)%p 

      proc_buffer_sizes => bufsize_array
      proc_buffer_sizes = 0

      ! Obtain buffer sizes for this level.
      CALL RecvGenericData_LevelPrecalc(n, fields, nghost, proc_buffer_sizes, lPeriodic, lNeighbors)

      DO WHILE (associated(nodelist))
         node=>nodelist%self 
         IF (lNeighbors) THEN
            neighborlist=>node%neighbors
         ELSE
            neighborlist=>node%overlaps
         END IF
         DO WHILE (associated(neighborlist))
            neighbor=>neighborlist%self
            IF (neighbor%box%MPI_ID /= MPI_ID)  THEN
               CALL ExtractMessageFromGroup(MessageGroup, &
                                                                             neighbor%box%MPI_ID, &
                                                                             message, &
                                                                             proc_buffer_sizes(neighbor%box%MPI_id))
!                                                                             message)
            END IF
            neighborlist=>neighborlist%next
         END DO
         nodelist=>nodelist%next
      END DO
   END SUBROUTINE PostRecvGenericData

   !> Calculates the buffers required for this processor to send a level's flux data to all its neighbors.
   !! @param level The level of the data being sent.
   !! @param fields An integer array containing the indices of elliptic fields to be sent.
   !! @param nghost The number of ghost cells to consider in an overlap.
   !! @param proc_buffer_sizes An integer array of range [0:np-1] that will be populated with buffer sizes.
   SUBROUTINE SendGenericData_LevelPrecalc(level, fields, nghost, proc_buffer_sizes, lPeriodic, lNeighbors)

       INTEGER :: level
       INTEGER, POINTER, DIMENSION(:) :: proc_buffer_sizes

       TYPE(NodeDefList), POINTER :: nodelist
       TYPE(NodeDefList), POINTER :: neighborlist
       TYPE(NodeDef), POINTER :: node
       TYPE(NodeDef), POINTER :: neighbor
       INTEGER :: nghost
       INTEGER, DIMENSION(:) :: fields
       LOGICAL :: lPeriodic(3)
       LOGICAL :: lNeighbors

       proc_buffer_sizes = 0

       IF (lNeighbors) THEN
          nodelist=>Nodes(level)%p
       ELSE
          nodelist=>OldNodes(level)%p
       END IF
       DO WHILE (associated(nodelist))

           node=>nodelist%self 
           IF (lNeighbors) THEN
              neighborlist=>node%neighbors
           ELSE
              neighborlist=>node%overlaps
           END IF

           DO WHILE (associated(neighborlist))
               neighbor=>neighborlist%self
               IF (neighbor%box%MPI_ID /= MPI_ID) THEN
                   ! If this is the first cost to be calculated for this processor, then add the overhead cost
                   ! to this processor's buffer size.  In this case, the overhead is just one termination box.
                   IF (proc_buffer_sizes(neighbor%box%MPI_id) == 0) &
                       proc_buffer_sizes(neighbor%box%MPI_id) = TERMINATION_BOX_BYTES

                   ! Add this node-neighbor pair's buffer cost to the appropriate buffer size.
                   proc_buffer_sizes(neighbor%box%MPI_id) = &
                       proc_buffer_sizes(neighbor%box%MPI_id) + &
                       SendGenericData_Precalculate(level, node, neighbor, fields, nghost, lPeriodic)
               END IF
               neighborlist=>neighborlist%next
           END DO
           nodelist=>nodelist%next
       END DO

   END SUBROUTINE SendGenericData_LevelPrecalc


   SUBROUTINE PostSendGenericData(n,fields,nghost, lPeriodic, lNeighbors) 
      INTEGER :: n
      TYPE(StageMessageGroup), Pointer ::MessageGroup
      TYPE(PackedMessage), POINTER :: message
      TYPE(NodeDefList), POINTER :: nodelist,neighborlist
      TYPE(NodeDef), POINTER :: node,neighbor
      INTEGER :: nghost
      INTEGER, DIMENSION(:) :: fields
      LOGICAL, DIMENSION(3) :: lPeriodic
      INTEGER, TARGET, DIMENSION(0:MPI_np-1) :: bufsize_array
      INTEGER, POINTER, DIMENSION(:) :: proc_buffer_sizes
      LOGICAL :: lNeighbors
      CALL CreateMessageGroup(StageMessageGroups(iSendGenericData,n)%p, TRANSMIT_GENERIC_DATA,STAGE_SEND,n)
      MessageGroup=>StageMessageGroups(iSendGenericData,n)%p
      IF (lNeighbors) THEN
         nodelist=>Nodes(n)%p
      ELSE
         nodelist=>OldNodes(n)%p
      END IF

      proc_buffer_sizes => bufsize_array
      proc_buffer_sizes = 0

      ! Obtain buffer sizes for this level.
      CALL SendGenericData_LevelPrecalc(n, fields, nghost, proc_buffer_sizes, lPeriodic, lNeighbors)
      
      DO WHILE (associated(nodelist))
         node=>nodelist%self 
         IF (lNeighbors) THEN
            neighborlist=>node%neighbors
         ELSE
            neighborlist=>node%overlaps
         END IF
         DO WHILE (associated(neighborlist))
            neighbor=>neighborlist%self
            IF (neighbor%box%MPI_ID /= MPI_ID) THEN
               CALL ExtractMessageFromGroup(MessageGroup, &
                                            neighbor%box%MPI_ID, &
                                            message, &
                                            proc_buffer_sizes(neighbor%box%MPI_id))
!                                            message)

               CALL SendGenericData(message, node, neighbor, fields, nghost, lPeriodic)
            END IF
            neighborlist=>neighborlist%next
         END DO
         nodelist=>nodelist%next
      END DO
      CALL SendTerminationBox(MessageGroup)
      CALL CloseMessageGroup(MessageGroup)

      NULLIFY(proc_buffer_sizes)

   END SUBROUTINE PostSendGenericData

   SUBROUTINE CompRecvGenericData(n,fields) 
      INTEGER :: n
      TYPE(StageMessageGroup), Pointer ::MessageGroup
      TYPE(PackedMessage), POINTER :: message
      TYPE(NodeDefList), POINTER :: nodelist,childlist
      TYPE(NodeDef), POINTER :: node,child
      INTEGER, DIMENSION(:) :: fields
      MessageGroup=>StageMessageGroups(iRecvGenericData,n)%p

      CALL MGBlockOnFirstMessages(MessageGroup,message)
      DO WHILE (ASSOCIATED(message))
         CALL RecvGenericData(message,fields)
         CALL MGBlockOnFirstMessages(MessageGroup, message)
      END DO
      CALL DestroyMessageGroup(StageMessageGroups(iRecvGenericData,n)%p)

   END SUBROUTINE CompRecvGenericData

   SUBROUTINE CompSendGenericData(n)
      INTEGER :: n
      CALL DestroyMessageGroup(StageMessageGroups(iSendGenericData,n)%p)
   END SUBROUTINE CompSendGenericData
  !> @}
 END MODULE DataLevelComms


