MODULE LayoutComms
   USE LayoutDeclarations
   USE GlobalDeclarations
   USE TreeDeclarations
   USE DataDeclarations
   USE CommunicationDeclarations
   USE Fields

   IMPLICIT NONE

   INTERFACE LoadFieldIntoLayout
      MODULE PROCEDURE LoadFieldIntoLayoutC, LoadFieldIntoLayoutR
   END INTERFACE LoadFieldIntoLayout

   INTERFACE UnLoadFieldFromLayout
      MODULE PROCEDURE UnLoadFieldFromLayoutC, UnLoadFieldFromLayoutR
   END INTERFACE UnLoadFieldFromLayout

   INTERFACE TransferLayouts
      MODULE PROCEDURE TransferLayoutsC, TransferLayoutsR
   END INTERFACE TransferLayouts


CONTAINS
   !> Use the tree structure on a given level to populate the layout
   SUBROUTINE LoadFieldIntoLayoutC(layout, data, FieldID, level_opt)
      TYPE(LayoutDef) :: layout
      INTEGER :: FieldID(:)
      INTEGER :: level
      COMPLEX(8), DIMENSION(:,:,:,:), POINTER :: data
      INTEGER :: mO(3,2), mB(3,2), mC(3,2)
      REAL(8), DIMENSION(:,:,:,:), ALLOCATABLE :: buffer
      CHARACTER, DIMENSION(:), ALLOCATABLE :: mpi_buffer
      INTEGER :: iErr
      INTEGER :: status(MPI_STATUS_SIZE)
      TYPE(NodeDefList), POINTER :: nodelist
      TYPE(NodeDef), POINTER :: node
      INTEGER :: buffsize
      INTEGER :: i,j,k,l,m,remote_proc, nfields
      INTEGER, OPTIONAL :: level_opt
      INTEGER :: level_min, level_max

      IF (PRESENT(level_opt)) THEN
         level_min=level_opt
         level_max=level_opt
      ELSE
         level_min=0
         level_max=layout%level
      END IF
      mC=1
      mB=layout%mB(MPI_ID,:,:)
      nfields=size(data,4)
      DO level=level_min, level_max
         buffsize=0
         nodelist=>Nodes(level)%p       
         DO WHILE (ASSOCIATED(NodeList))
            buffsize=buffsize+6*PACK_INTEGER_SIZE
            buffsize=buffsize+PRODUCT(nodelist%self%info%mx(1:nDim))*PACK_DOUBLE_SIZE*nfields
            buffsize=buffsize+MPI_BSEND_OVERHEAD
            nodelist=>nodelist%next
         END DO
         buffsize=buffsize+6*PACK_INTEGER_SIZE+MPI_BSEND_OVERHEAD
         ALLOCATE(mpi_buffer(buffsize))
         DO l=1, MPI_NP
            CALL MPI_Buffer_attach(mpi_buffer, buffsize, iErr)
            remote_proc=modulo(MPI_ID+l, MPI_NP)
            nodelist=>Nodes(level)%p
            DO WHILE (ASSOCIATED(nodelist))
               node=>nodelist%self
               mO(:,:)=LevelDown(layout%mB(remote_proc,:,:),layout%level, level)
               mO(:,1)=max(mO(:,1), node%box%mGlobal(:,1))
               mO(:,2)=min(mO(:,2), node%box%mGlobal(:,2))
               IF (ALL(mO(:,2) >= mO(:,1))) THEN
                  ALLOCATE(buffer(mO(1,1):mO(1,2), mO(2,1):mO(2,2), mO(3,1):mO(3,2),nfields))
                  CALL MPI_BSEND(mO, 6, MPI_INTEGER, remote_proc, LAYOUT_TAG, MPI_COMM_WORLD, iErr)
                  DO i=mO(1,1),mO(1,2)
                     DO j=mO(2,1), mO(2,2)
                        DO k=mO(3,1), mO(3,2)
                           DO m=1,nfields
                              buffer(i,j,k,m)=GetField(node%Info,i-node%box%mGlobal(1,1)+1, j-node%box%mGlobal(2,1)+1, k-node%box%mGlobal(3,1)+1, FieldID(m))
                           END DO
                        END DO
                     END DO
                  END DO
                  CALL MPI_BSEND(buffer, size(buffer), MPI_DOUBLE_PRECISION, remote_proc, LAYOUT_TAG, MPI_COMM_WORLD, iErr)
                  DEALLOCATE(buffer)
               END IF
               nodelist=>nodelist%next
            END DO
            CALL MPI_BSEND(TERMINATIONBOX, 6, MPI_INTEGER, remote_proc, LAYOUT_TAG, MPI_COMM_WORLD, iErr)    
            remote_proc=modulo(MPI_ID-l, MPI_NP)
            DO
               CALL MPI_RECV(mO, 6, MPI_INTEGER, remote_proc, LAYOUT_TAG, MPI_COMM_WORLD, status, iErr)
               IF (ALL(mO == TERMINATIONBOX)) THEN
                  EXIT
               ELSE
                  ALLOCATE(buffer(mO(1,1):mO(1,2), mO(2,1):mO(2,2), mO(3,1):mO(3,2), nfields))
                  CALL MPI_RECV(buffer, size(buffer), MPI_DOUBLE_PRECISION, remote_proc, LAYOUT_TAG, MPI_COMM_WORLD, status, iErr)
                  IF (layout%level == level) THEN
                     data(mO(1,1):mO(1,2), mO(2,1):mO(2,2), mO(3,1):mO(3,2),1:nfields)=buffer
                  ELSE
                     DO i=mO(1,1), mO(1,2)
                        mC(1,:)=MapToLevel(i, level, layout%level)
                        mC(1,1)=max(mC(1,1), mB(1,1))
                        mC(1,2)=min(mC(1,2), mB(1,2))
                        DO j=mO(2,1), mO(2,2)
                           IF (nDim >= 2) THEN
                              mC(2,:)=MapToLevel(j, level, layout%level)
                              mC(2,1)=max(mC(2,1), mB(2,1))
                              mC(2,2)=min(mC(2,2), mB(2,2))
                           END IF
                           DO k=mO(3,1), mO(3,2)
                              IF (nDim >= 3) THEN
                                 mC(3,:)=MapToLevel(k, level, layout%level)
                                 mC(3,1)=max(mC(3,1), mB(3,1))
                                 mC(3,2)=min(mC(3,2), mB(3,2))                               
                              END IF
                              DO m=1,nFields
                                 data(mC(1,1):mC(1,2), mC(2,1):mC(2,2), mC(3,1):mC(3,2), m)=buffer(i,j,k,m)
                              END DO
                           END DO
                        END DO
                     END DO
                  END IF
                  DEALLOCATE(buffer)
               END IF
            END DO
            CALL MPI_Buffer_detach(mpi_buffer, buffsize, iErr)
         END DO
         DEALLOCATE(mpi_buffer)
      END DO
   END SUBROUTINE LoadFieldIntoLayoutC



   !> Use the tree structure on a given level to populate the layout
   SUBROUTINE UnLoadFieldFromLayoutC(layout, data, FieldID, lPeriodic, rmbc)
      TYPE(LayoutDef) :: layout
      INTEGER :: FieldID(:,:)
      INTEGER :: level
      INTEGER :: rmbc
      LOGICAL, DIMENSION(:) :: lPeriodic
      COMPLEX(8), DIMENSION(:,:,:,:) :: data
      INTEGER :: mS(3,2), mB(3,2), mT(3,2)
      INTEGER, POINTER, DIMENSION(:,:,:) :: mTs, mSs
      INTEGER :: nOverlaps
      COMPLEX(8), DIMENSION(:,:,:,:), ALLOCATABLE :: buffer
      CHARACTER, DIMENSION(:), ALLOCATABLE :: mpi_buffer
      INTEGER :: iErr
      INTEGER :: status(MPI_STATUS_SIZE)
      TYPE(NodeDefList), POINTER :: nodelist
      TYPE(NodeDef), POINTER :: node
      INTEGER :: buffsize
      INTEGER :: i,j,k,l, recv_proc, send_proc
      INTEGER :: request, nfields
      nfields=size(FieldID,1)
      DO l=1, MPI_NP
         recv_proc=modulo(MPI_ID+l, MPI_NP)
         send_proc=modulo(MPI_ID-l, MPI_NP)
         mB=layout%mB(recv_proc,:,:)
         ALLOCATE(buffer(mB(1,1):mB(1,2), mB(2,1):mB(2,2), mB(3,1):mB(3,2),nfields))
         CALL MPI_IRECV(buffer, size(buffer), MPI_DOUBLE_COMPLEX, recv_proc, LAYOUT_TAG, MPI_COMM_WORLD, request, iErr)

         CALL MPI_SEND(data, size(data), MPI_DOUBLE_COMPLEX, send_proc, LAYOUT_TAG, MPI_COMM_WORLD, iErr)
         CALL MPI_WAIT(request, status, iErr)

         nodelist=>Nodes(layout%level)%p
         DO WHILE (ASSOCIATED(nodelist))
            node=>nodelist%self
            CALL CalcOverlaps(node%box%mGlobal(:,:), layout%mB(recv_proc,:,:), mTs, mSs, nOverlaps, layout%level, lPeriodic, rmbc)
            IF (nOverlaps > 0) THEN
               DO j=1,nOverlaps
                  mS=mSs(j,:,:)+spread(layout%mB(recv_proc,:,1),2,2)-1
                  mT=mTs(j,:,:)
                  !          mO(1:nDim,1)=max(layout%mB(recv_proc,1:nDim,1), node%box%mGlobal(1:nDim,1)-rmbc)
                  !          mO(1:nDim,2)=min(layout%mB(recv_proc,1:nDim,2), node%box%mGlobal(1:nDim,2)+rmbc)
                  !          IF (ALL(mO(:,2) >= mO(:,1))) THEN
                  !             ip=mO-spread(node%box%mGlobal(:,1),2,2)+1
                  DO i=1,nfields
                     IF (FieldID(i,1) /= 0) THEN
                        node%info%q(mT(1,1):mT(1,2), mT(2,1):mT(2,2), mT(3,1):mT(3,2), FieldiD(i,1))= &
                             REAL(buffer(mS(1,1):mS(1,2), mS(2,1):mS(2,2), mS(3,1):mS(3,2),i))
                     END IF
                     IF (FieldID(i,2) /= 0) THEN
                        node%info%q(mT(1,1):mT(1,2), mT(2,1):mT(2,2), mT(3,1):mT(3,2), FieldiD(i,2))= &
                             AIMAG(buffer(mS(1,1):mS(1,2), mS(2,1):mS(2,2), mS(3,1):mS(3,2),i))
                     END IF
                  END DO
               END DO
               DEALLOCATE(mTs, mSs)
            END IF
            nodelist=>nodelist%next
         END DO
         DEALLOCATE(buffer)
      END DO
   END SUBROUTINE UnLoadFieldFromLayoutC


   !> Use the tree structure on a given level to populate the layout
   SUBROUTINE TransferLayoutsC(layout_src, layout_trg, data, newdata_opt)
      TYPE(LayoutDef) :: layout_src, layout_trg
      COMPLEX(8), DIMENSION(:,:,:,:), POINTER :: data, newdata
      COMPLEX(8), DIMENSION(:,:,:,:), POINTER, OPTIONAL :: newdata_opt
      INTEGER :: mO(3,2), mB(3,2)
      COMPLEX(8), DIMENSION(:,:,:,:), ALLOCATABLE :: recvbuffer, sendbuffer
      INTEGER :: iErr
      INTEGER :: status(MPI_STATUS_SIZE)
      INTEGER :: l,send_proc, recv_proc, isrc, itrg
      INTEGER :: request, nfields
      mB=layout_trg%mB(MPI_ID,:,:)
      nfields=size(data,4)
      IF (PRESENT(newdata_opt)) THEN
         newdata=>newdata_opt
      ELSE
         ALLOCATE(newdata(mB(1,1):mB(1,2), mB(2,1):mB(2,2), mB(3,1):mB(3,2),nfields))
      END IF
      DO l=1, MPI_NP
         send_proc=modulo(MPI_ID+l, MPI_NP)
         recv_proc=modulo(MPI_ID-l, MPI_NP)
         mO(:,1)=max(layout_trg%mB(MPI_ID,:,1), layout_src%mB(recv_proc,:,1))
         mO(:,2)=min(layout_trg%mB(MPI_ID,:,2), layout_src%mB(recv_proc,:,2))
         IF (ALL(mO(:,2) >= mO(:,1))) THEN
            ALLOCATE(recvbuffer(mO(1,1):mO(1,2), mO(2,1):mO(2,2), mO(3,1):mO(3,2), nfields))
            CALL MPI_IRECV(recvbuffer, size(recvbuffer), MPI_DOUBLE_COMPLEX, recv_proc, LAYOUT_TAG, MPI_COMM_WORLD, request, iErr)
         END IF
         mB(:,1)=max(layout_src%mB(MPI_ID,:,1), layout_trg%mB(send_proc,:,1))
         mB(:,2)=min(layout_src%mB(MPI_ID,:,2), layout_trg%mB(send_proc,:,2))
         IF (ALL(mB(:,2) >= mB(:,1))) THEN
            ALLOCATE(sendbuffer(mB(1,1):mB(1,2), mB(2,1):mB(2,2), mB(3,1):mB(3,2), nfields))
            sendbuffer=data(mB(1,1):mB(1,2), mB(2,1):mB(2,2), mB(3,1):mB(3,2),:)
            CALL MPI_SEND(sendbuffer, size(sendbuffer), MPI_DOUBLE_COMPLEX, send_proc, LAYOUT_TAG, MPI_COMM_WORLD, iErr)
            DEALLOCATE(sendbuffer)
         END IF
         IF (ALL(mO(:,2) >= mO(:,1))) THEN
            CALL MPI_WAIT(request, status, iErr)
            newdata(mO(1,1):mO(1,2), mO(2,1):mO(2,2), mO(3,1):mO(3,2),:)=recvbuffer
            DEALLOCATE(recvbuffer)
         END IF
      END DO
      IF (.NOT. PRESENT(newdata_opt)) THEN
         DEALLOCATE(data)
         data=>newdata
      END IF
   END SUBROUTINE TransferLayoutsC



   !> Use the tree structure on a given level to populate the layout
   SUBROUTINE LoadFieldIntoLayoutR(layout, data, FieldID, level_opt)
      TYPE(LayoutDef) :: layout
      INTEGER :: FieldID(:)
      INTEGER :: level
      REAL(8), DIMENSION(:,:,:,:), POINTER :: data
      INTEGER :: mO(3,2), mB(3,2), mC(3,2)
      REAL(8), DIMENSION(:,:,:,:), ALLOCATABLE :: buffer
      CHARACTER, DIMENSION(:), ALLOCATABLE :: mpi_buffer
      INTEGER :: iErr
      INTEGER :: status(MPI_STATUS_SIZE)
      TYPE(NodeDefList), POINTER :: nodelist
      TYPE(NodeDef), POINTER :: node
      INTEGER :: buffsize
      INTEGER :: i,j,k,l,m,remote_proc, nfields
      INTEGER, OPTIONAL :: level_opt
      INTEGER :: level_min, level_max

      IF (PRESENT(level_opt)) THEN
         level_min=level_opt
         level_max=level_opt
      ELSE
         level_min=0
         level_max=layout%level
      END IF
      mB=layout%mB(MPI_ID,:,:)
      mC=1
      nfields=size(data,4)
      DO level=level_min, level_max
         nodelist=>Nodes(level)%p
         buffsize=0
         DO WHILE (ASSOCIATED(NodeList))
            buffsize=buffsize+6*PACK_INTEGER_SIZE
            buffsize=buffsize+PRODUCT(nodelist%self%info%mx(1:nDim)+2)*PACK_DOUBLE_SIZE*nfields
            buffsize=buffsize+MPI_BSEND_OVERHEAD
            nodelist=>nodelist%next
         END DO
         buffsize=buffsize+6*PACK_INTEGER_SIZE+MPI_BSEND_OVERHEAD
         ALLOCATE(mpi_buffer(buffsize))
         DO l=1, MPI_NP
            CALL MPI_Buffer_attach(mpi_buffer, buffsize, iErr)
            remote_proc=modulo(MPI_ID+l, MPI_NP)
            nodelist=>Nodes(level)%p
            DO WHILE (ASSOCIATED(nodelist))
               node=>nodelist%self
               mO(:,:)=LevelDown(layout%mB(remote_proc,:,:),layout%level, level)
               mO(:,1)=max(mO(:,1), node%box%mGlobal(:,1))
               mO(:,2)=min(mO(:,2), node%box%mGlobal(:,2))
               IF (ALL(mO(:,2) >= mO(:,1))) THEN
                  ALLOCATE(buffer(mO(1,1):mO(1,2), mO(2,1):mO(2,2), mO(3,1):mO(3,2),nfields))
                  CALL MPI_BSEND(mO, 6, MPI_INTEGER, remote_proc, LAYOUT_TAG, MPI_COMM_WORLD, iErr)
                  DO i=mO(1,1),mO(1,2)
                     DO j=mO(2,1), mO(2,2)
                        DO k=mO(3,1), mO(3,2)
                           DO m=1,nfields
                              buffer(i,j,k,m)=GetField(node%Info,i-node%box%mGlobal(1,1)+1, j-node%box%mGlobal(2,1)+1, k-node%box%mGlobal(3,1)+1, FieldID(m))
                           END DO
                        END DO
                     END DO
                  END DO
                  CALL MPI_BSEND(buffer, size(buffer), MPI_DOUBLE_PRECISION, remote_proc, LAYOUT_TAG, MPI_COMM_WORLD, iErr)
                  DEALLOCATE(buffer)
               END IF
               nodelist=>nodelist%next
            END DO
            CALL MPI_BSEND(TERMINATIONBOX, 6, MPI_INTEGER, remote_proc, LAYOUT_TAG, MPI_COMM_WORLD, iErr)    
            remote_proc=modulo(MPI_ID-l, MPI_NP)
            DO
               CALL MPI_RECV(mO, 6, MPI_INTEGER, remote_proc, LAYOUT_TAG, MPI_COMM_WORLD, status, iErr)
               IF (ALL(mO == TERMINATIONBOX)) THEN
                  EXIT
               ELSE
                  ALLOCATE(buffer(mO(1,1):mO(1,2), mO(2,1):mO(2,2), mO(3,1):mO(3,2), nfields))
                  CALL MPI_RECV(buffer, size(buffer), MPI_DOUBLE_PRECISION, remote_proc, LAYOUT_TAG, MPI_COMM_WORLD, status, iErr)
                  IF (layout%level == level) THEN
                     data(mO(1,1):mO(1,2), mO(2,1):mO(2,2), mO(3,1):mO(3,2),:)=buffer
                  ELSE
                     DO i=mO(1,1), mO(1,2)
                        mC(1,:)=MapToLevel(i, level, layout%level)
                        mC(1,1)=max(mC(1,1), mB(1,1))
                        mC(1,2)=min(mC(1,2), mB(1,2))
                        DO j=mO(2,1), mO(2,2)
                           IF (nDim >= 2) THEN
                              mC(2,:)=MapToLevel(j, level, layout%level)
                              mC(2,1)=max(mC(2,1), mB(2,1))
                              mC(2,2)=min(mC(2,2), mB(2,2))
                           END IF
                           DO k=mO(3,1), mO(3,2)
                              IF (nDim >= 3) THEN
                                 mC(3,:)=MapToLevel(k, level, layout%level)
                                 mC(3,1)=max(mC(3,1), mB(3,1))
                                 mC(3,2)=min(mC(3,2), mB(3,2))
                              END IF
                              DO m=1,nFields
                                 data(mC(1,1):mC(1,2), mC(2,1):mC(2,2), mC(3,1):mC(3,2), m)=buffer(i,j,k,m)
                              END DO
                           END DO
                        END DO
                     END DO
                  END IF
                  DEALLOCATE(buffer)
               END IF
            END DO
            CALL MPI_Buffer_detach(mpi_buffer, buffsize, iErr)
         END DO
         DEALLOCATE(mpi_buffer)
      END DO
   END SUBROUTINE LoadFieldIntoLayoutR



   !> Use the tree structure on a given level to populate the layout
   SUBROUTINE UnLoadFieldFromLayoutR(layout, data, FieldID, lPeriodic, rmbc)
      TYPE(LayoutDef) :: layout
      INTEGER :: FieldID(:)
      INTEGER :: level
      INTEGER :: rmbc
      LOGICAL :: lPeriodic(:)
      REAL(8), DIMENSION(:,:,:,:) :: data
      INTEGER :: mS(3,2), mB(3,2), mT(3,2)
      INTEGER, DIMENSION(:,:,:), POINTER :: mTs, mSs
      REAL(8), DIMENSION(:,:,:,:), ALLOCATABLE :: buffer
      CHARACTER, DIMENSION(:), ALLOCATABLE :: mpi_buffer
      INTEGER :: iErr, nOverlaps
      INTEGER :: status(MPI_STATUS_SIZE)
      TYPE(NodeDefList), POINTER :: nodelist
      TYPE(NodeDef), POINTER :: node
      INTEGER :: buffsize
      INTEGER :: i,j,k,l, recv_proc, send_proc
      INTEGER :: request, nfields
      nfields=size(FieldID,1)
      DO l=1, MPI_NP
         recv_proc=modulo(MPI_ID+l, MPI_NP)
         send_proc=modulo(MPI_ID-l, MPI_NP)
         mB=layout%mB(recv_proc,:,:)
         ALLOCATE(buffer(mB(1,1):mB(1,2), mB(2,1):mB(2,2), mB(3,1):mB(3,2),nfields))
         CALL MPI_IRECV(buffer, size(buffer), MPI_DOUBLE_PRECISION, recv_proc, LAYOUT_TAG, MPI_COMM_WORLD, request, iErr)

         CALL MPI_SEND(data, size(data), MPI_DOUBLE_PRECISION, send_proc, LAYOUT_TAG, MPI_COMM_WORLD, iErr)
         CALL MPI_WAIT(request, status, iErr)

         nodelist=>Nodes(layout%level)%p
         DO WHILE (ASSOCIATED(nodelist))
            node=>nodelist%self
            CALL CalcOverlaps(node%box%mGlobal(:,:), layout%mB(recv_proc,:,:), mTs, mSs, nOverlaps, layout%level, lPeriodic, rmbc)
            IF (nOverlaps > 0) THEN
               DO j=1,nOverlaps
                  mS=mSs(j,:,:)+spread(layout%mB(recv_proc,:,1),2,2)-1
                  mT=mTs(j,:,:)
                  !          mO(1:nDim,1)=max(layout%mB(recv_proc,1:nDim,1), node%box%mGlobal(1:nDim,1)-rmbc)
                  !          mO(1:nDim,2)=min(layout%mB(recv_proc,1:nDim,2), node%box%mGlobal(1:nDim,2)+rmbc)
                  !          IF (ALL(mO(:,2) >= mO(:,1))) THEN
                  !             ip=mO-spread(node%box%mGlobal(:,1),2,2)+1
                  DO i=1,nfields
                     node%info%q(mT(1,1):mT(1,2), mT(2,1):mT(2,2), mT(3,1):mT(3,2), FieldiD(i))= &
                          buffer(mS(1,1):mS(1,2), mS(2,1):mS(2,2), mS(3,1):mS(3,2),i)
                  END DO
               END DO
               DEALLOCATE(mTs, mSs)
            END IF
            nodelist=>nodelist%next
         END DO
         DEALLOCATE(buffer)
      END DO
   END SUBROUTINE UnLoadFieldFromLayoutR


   !> Use the tree structure on a given level to populate the layout
   SUBROUTINE TransferLayoutsR(layout_src, layout_trg, data, newdata_opt)
      TYPE(LayoutDef) :: layout_src, layout_trg
      REAL(8), DIMENSION(:,:,:,:), POINTER :: data, newdata
      REAL(8), DIMENSION(:,:,:,:), POINTER, OPTIONAL :: newdata_opt
      INTEGER :: mO(3,2), mB(3,2)
      REAL(8), DIMENSION(:,:,:,:), ALLOCATABLE :: recvbuffer, sendbuffer
      INTEGER :: iErr
      INTEGER :: status(MPI_STATUS_SIZE)
      INTEGER :: l,send_proc, recv_proc, isrc, itrg
      INTEGER :: request, nfields
      mB=layout_trg%mB(MPI_ID,:,:)
      nfields=size(data,4)
      IF (PRESENT(newdata_opt)) THEN
         newdata=>newdata_opt
      ELSE
         ALLOCATE(newdata(mB(1,1):mB(1,2), mB(2,1):mB(2,2), mB(3,1):mB(3,2),nfields))
      END IF
      DO l=1, MPI_NP
         send_proc=modulo(MPI_ID+l, MPI_NP)
         recv_proc=modulo(MPI_ID-l, MPI_NP)
         mO(:,1)=max(layout_trg%mB(MPI_ID,:,1), layout_src%mB(recv_proc,:,1))
         mO(:,2)=min(layout_trg%mB(MPI_ID,:,2), layout_src%mB(recv_proc,:,2))
         IF (ALL(mO(:,2) >= mO(:,1))) THEN
            ALLOCATE(recvbuffer(mO(1,1):mO(1,2), mO(2,1):mO(2,2), mO(3,1):mO(3,2), nfields))
            CALL MPI_IRECV(recvbuffer, size(recvbuffer), MPI_DOUBLE_PRECISION, recv_proc, LAYOUT_TAG, MPI_COMM_WORLD, request, iErr)
         END IF
         mB(:,1)=max(layout_src%mB(MPI_ID,:,1), layout_trg%mB(send_proc,:,1))
         mB(:,2)=min(layout_src%mB(MPI_ID,:,2), layout_trg%mB(send_proc,:,2))
         IF (ALL(mB(:,2) >= mB(:,1))) THEN
            ALLOCATE(sendbuffer(mB(1,1):mB(1,2), mB(2,1):mB(2,2), mB(3,1):mB(3,2), nfields))
            sendbuffer=data(mB(1,1):mB(1,2), mB(2,1):mB(2,2), mB(3,1):mB(3,2),:)
            CALL MPI_SEND(sendbuffer, size(sendbuffer), MPI_DOUBLE_PRECISION, send_proc, LAYOUT_TAG, MPI_COMM_WORLD, iErr)
            DEALLOCATE(sendbuffer)
         END IF
         IF (ALL(mO(:,2) >= mO(:,1))) THEN
            CALL MPI_WAIT(request, status, iErr)
            newdata(mO(1,1):mO(1,2), mO(2,1):mO(2,2), mO(3,1):mO(3,2),:)=recvbuffer
            DEALLOCATE(recvbuffer)
         END IF
      END DO
      IF (.NOT. PRESENT(newdata_opt)) THEN
         DEALLOCATE(data)
         data=>newdata
      END IF
   END SUBROUTINE TransferLayoutsR


   !Similar to TransferLayouts except that we are transferring a subregion and the layouts may have different bounds
   SUBROUTINE LayoutTransferC(mC, mD, layout_src, layout_trg, data_src, data_trg)
      INTEGER, DIMENSION(3,2) :: mC, mD, mS, mR, mSO, mRO
      INTEGER :: l, offset(3), trg_proc, src_proc, nfields
      TYPE(LayoutDef) :: layout_src, layout_trg
      COMPLEX(8), DIMENSION(:,:,:,:), POINTER :: data_src, data_trg
      COMPLEX(8), DIMENSION(:,:,:,:), ALLOCATABLE :: send_buffer, recv_buffer
      INTEGER :: status(MPI_STATUS_SIZE), request, iErr
      LOGICAL :: lRecv, lSend, lRecvNow, lSendNow


      IF (product(mC(:,2)-mC(:,1)+1) /= product(mD(:,2)-mD(:,1)+1)) THEN
         PRINT*, 'ERROR: Data in layout transfer not the same size'
         write(*,'(6I5)') mC
         write(*,'(6I5)') mD
         STOP
      END IF

      nfields=size(data_src, 4)
      offset=mD(:,1)-mC(:,1) !offset between layouts

      !So mS and mT are distributed across processors
      !Processors can send data right and receive left
      !Doesn't really matter?

      mR(:,1)=max(layout_trg%mB(MPI_ID,:,1), mD(:,1))
      mR(:,2)=min(layout_trg%mB(MPI_ID,:,2), mD(:,2))
      lRecv=ALL(mR(:,2) >= mR(:,1))

      mS(:,1)=max(layout_src%mB(MPI_ID,:,1), mC(:,1))
      mS(:,2)=min(layout_src%mB(MPI_ID,:,2), mC(:,2))
      lSend=ALL(mS(:,2) >= mS(:,1))

      IF (.NOT. lSend .AND. .NOT. lRecv) RETURN !We are done - nothing to transfer

      DO l=1, MPI_NP
         IF (lSend) THEN
            trg_proc=modulo(MPI_ID+l, MPI_NP)
            mSO(:,1)=max(mS(:,1), layout_trg%mB(trg_proc,:,1)-offset)
            mSO(:,2)=min(mS(:,2), layout_trg%mB(trg_proc,:,2)-offset)
            lSendNow=ALL(mSO(:,2) >= mSO(:,1))
         ELSE
            lSendNow=.false.
         END IF
         IF (lRecv) THEN
            src_proc=modulo(MPI_ID-l, MPI_NP)
            mRO(:,1)=max(mR(:,1), layout_src%mB(src_proc,:,1)+offset)
            mRO(:,2)=min(mR(:,2), layout_src%mB(src_proc,:,2)+offset)
            lRecvNow=ALL(mRO(:,2) >= mRO(:,1))
         ELSE
            lRecvNow=.false.
         END IF

         IF (lRecvNow) THEN
            ALLOCATE(recv_buffer(mRO(1,1):mRO(1,2), mRO(2,1):mRO(2,2), mRO(3,1):mRO(3,2), 1:nfields))
            CALL MPI_IRECV(recv_buffer, size(recv_buffer), MPI_DOUBLE_COMPLEX, src_proc, LAYOUT_TAG, MPI_COMM_WORLD, request, iErr)
         END IF

         IF (lSendNow) THEN
            ALLOCATE(send_buffer(mSO(1,1):mSO(1,2), mSO(2,1):mSO(2,2), mSO(3,1):mSO(3,2), 1:nfields))
            send_buffer=data_src(mSO(1,1):mSO(1,2), mSO(2,1):mSO(2,2), mSO(3,1):mSO(3,2), 1:nFields)
            CALL MPI_SEND(send_buffer, size(send_buffer), MPI_DOUBLE_COMPLEX, trg_proc, LAYOUT_TAG, MPI_COMM_WORLD, iErr)
            DEALLOCATE(send_buffer)
         END IF
         IF (lRecvNow) THEN
            CALL MPI_WAIT(request, status, iErr)
            data_trg(mRO(1,1):mRO(1,2), mRO(2,1):mRO(2,2), mRO(3,1):mRO(3,2), 1:nfields)=recv_buffer
            DEALLOCATE(recv_buffer)
         END IF
      END DO

   END SUBROUTINE LayoutTransferC


END MODULE LayoutComms
