communication.f90

        if (LUseO) call mpi_isend(a%s(0,yoffset,1),blocklength,                &
                       MPI_DOUBLE_PRECISION,neighbour_w_rank,sendtag,   &
                       MPI_COMM_HORIZ, requests_ew(4), ierr)
        recvtag = 1011
        if (LUseT) call mpi_isend(a%st(1,0,0),1,                &
                       halo_wet(level,m),neighbour_w_rank,sendtag,   &
                       MPI_COMM_HORIZ, requests_ewT(4), ierr)
        ! Wait for East <-> West communication to complete
        if (halo_size == 1) then
          ! Wait for North <-> South communication to complete
          if (LUseO) call mpi_waitall(4,requests_ns, MPI_STATUSES_IGNORE, ierr)
          if (LUseT) call mpi_waitall(4,requests_nsT, MPI_STATUSES_IGNORE, ierr)
        end if
        if (LUseO) call mpi_waitall(4,requests_ew, MPI_STATUSES_IGNORE, ierr)
        if (LUseT) call mpi_waitall(4,requests_ewT, MPI_STATUSES_IGNORE, ierr)
      end if
      if (comm_measuretime) then
        call finish_timer(t_haloswap(level,m))
      end if
    end if

  end subroutine haloswap_mnh
!==================================================================
  subroutine haloswap(level,m, &  ! multigrid- and processor- level
                      a)          ! data field
    implicit none
    integer, intent(in) :: level
    integer, intent(in) :: m
    type(scalar3d), intent(inout) :: a
    integer :: a_n  ! horizontal grid size
    integer :: nz   ! vertical grid size
    integer, dimension(2) :: p_horiz
    integer :: stepsize
    integer :: ierr, rank, sendtag, recvtag
    integer :: stat(MPI_STATUS_SIZE)
    integer :: halo_size
    integer :: neighbour_n_rank
    integer :: neighbour_s_rank
    integer :: neighbour_e_rank
    integer :: neighbour_w_rank
    integer :: yoffset, blocklength
    integer, dimension(4) :: requests_ns
    integer, dimension(4) :: requests_ew

    halo_size = comm_param%halo_size

    ! Do nothing if we are only using one processor
    if (m > 0) then
      if (comm_measuretime) then
        call start_timer(t_haloswap(level,m))
      end if
      a_n = a%ix_max-a%ix_min+1
      nz = a%grid_param%nz
      stepsize = 2**(pproc-m)

      ! Work out rank, only execute on relevant processes
      call mpi_comm_rank(MPI_COMM_HORIZ, rank, ierr)
      call mpi_cart_coords(MPI_COMM_HORIZ,rank,dim_horiz,p_horiz,ierr)

      ! Work out ranks of neighbours
      ! W -> E
      call mpi_cart_shift(MPI_COMM_HORIZ,1, stepsize, &
                          neighbour_w_rank,neighbour_e_rank,ierr)
      ! N -> S
      call mpi_cart_shift(MPI_COMM_HORIZ,0, stepsize, &
                          neighbour_n_rank,neighbour_s_rank,ierr)
      if ( (stepsize == 1) .or.                   &
         (  (mod(p_horiz(1),stepsize) == 0) .and. &
            (mod(p_horiz(2),stepsize) == 0) ) ) then
        if (halo_size == 1) then
          ! Do not include corners in send/recv
          yoffset = 1
          blocklength = a_n*(nz+2)*halo_size
        else
          yoffset = 1-halo_size
          blocklength = (a_n+2*halo_size)*(nz+2)*halo_size
        end if
        ! Receive from north
        recvtag = 2
        call mpi_irecv(a%s(0,0-(halo_size-1),1),1,                  &
                       halo_ns(level,m),neighbour_n_rank,recvtag,   &
                       MPI_COMM_HORIZ, requests_ns(1), ierr)
        ! Receive from south
        recvtag = 3
        call mpi_irecv(a%s(0,a_n+1,1),1,                            &
                       halo_ns(level,m),neighbour_s_rank,recvtag,   &
                       MPI_COMM_HORIZ, requests_ns(2), ierr)
        ! Send to south
        sendtag = 2
        call mpi_isend(a%s(0,a_n-(halo_size-1),1),1,                &
                       halo_ns(level,m),neighbour_s_rank,sendtag,   &
                       MPI_COMM_HORIZ, requests_ns(3), ierr)
        ! Send to north
        sendtag = 3
        call mpi_isend(a%s(0,1,1),1,                                &
                       halo_ns(level,m),neighbour_n_rank,sendtag,   &
                       MPI_COMM_HORIZ, requests_ns(4), ierr)
        if (halo_size > 1) then
          ! Wait for North <-> South communication to complete
          call mpi_waitall(4,requests_ns, MPI_STATUSES_IGNORE, ierr)
        end if
        ! Receive from west
        recvtag = 0
        call mpi_irecv(a%s(0,yoffset,0-(halo_size-1)),blocklength,    &
                       MPI_DOUBLE_PRECISION,neighbour_w_rank,recvtag, &
                       MPI_COMM_HORIZ, requests_ew(1), ierr)
        ! Receive from east
        sendtag = 1
        call mpi_irecv(a%s(0,yoffset,a_n+1),blocklength,          &
                       MPI_DOUBLE_PRECISION,neighbour_e_rank,recvtag, &
                       MPI_COMM_HORIZ, requests_ew(2), ierr)
        ! Send to east
        sendtag = 0
        call mpi_isend(a%s(0,yoffset,a_n-(halo_size-1)),blocklength,  &
                       MPI_DOUBLE_PRECISION,neighbour_e_rank,sendtag, &
                       MPI_COMM_HORIZ, requests_ew(3), ierr)
        ! Send to west
        recvtag = 1
        call mpi_isend(a%s(0,yoffset,1),blocklength,                &
                       MPI_DOUBLE_PRECISION,neighbour_w_rank,sendtag,   &
                       MPI_COMM_HORIZ, requests_ew(4), ierr)
        ! Wait for East <-> West communication to complete
        if (halo_size == 1) then
          ! Wait for North <-> South communication to complete
          call mpi_waitall(4,requests_ns, MPI_STATUSES_IGNORE, ierr)
        end if
        call mpi_waitall(4,requests_ew, MPI_STATUSES_IGNORE, ierr)
      end if
      if (comm_measuretime) then
        call finish_timer(t_haloswap(level,m))
      end if
    end if
  end subroutine haloswap
!==================================================================
!  Collect from a(level,m) and store on less processors
!  in b(level,m-1)
!
!  Example for pproc-m = 1, i.e. stepsize = 2:
!
!   NW (0,0)  <--  NE (0,2)
!
!     ^      .
!     !        .
!                .
!   SW (2,0)       SE (2,2) [send to 0,0]
!
!==================================================================
  subroutine collect(level,m, &    ! multigrid and processor level
                     a, &          ! IN: data on level (level,m)
                     b)            ! OUT: data on level (level,m-1)
    implicit none
    integer, intent(in) :: level
    integer, intent(in) :: m
    type(scalar3d), intent(in) :: a
    type(scalar3d), intent(inout) :: b
    integer :: a_n, b_n   ! horizontal grid sizes
    integer :: nz ! vertical grid size
    integer, dimension(2) :: p_horiz
    integer :: stepsize
    integer :: ierr, source_rank, dest_rank, rank, recv_tag, send_tag, iz
    logical :: corner_nw, corner_ne, corner_sw, corner_se
    integer :: recv_request(3)

    call start_timer(t_collect(m))

    stepsize = 2**(pproc-m)

    a_n = a%ix_max-a%ix_min+1
    b_n = b%ix_max-b%ix_min+1
    nz = b%grid_param%nz

    ! Work out rank, only execute on relevant processes
    call mpi_comm_rank(MPI_COMM_HORIZ, rank, ierr)
    ! Store position in process grid in in p_horiz
    ! Note we can NOT use cart_shift as we need diagonal neighburs as well
    call mpi_cart_coords(MPI_COMM_HORIZ,rank,dim_horiz,p_horiz,ierr)

    ! Ignore all processes that do not participate at this level
    if ( (stepsize .eq. 1) .or. ((mod(p_horiz(1),stepsize) == 0) .and. (mod(p_horiz(2),stepsize)) == 0)) then
      ! Determine position in local 2x2 block
      if (stepsize .eq. 1) then
        corner_nw = ((mod(p_horiz(1),2) == 0) .and. (mod(p_horiz(2),2) == 0))
        corner_ne = ((mod(p_horiz(1),2) == 0) .and. (mod(p_horiz(2),2) == 1))
        corner_sw = ((mod(p_horiz(1),2) == 1) .and. (mod(p_horiz(2),2) == 0))
        corner_se = ((mod(p_horiz(1),2) == 1) .and. (mod(p_horiz(2),2) == 1))
      else
        corner_nw = ((mod(p_horiz(1)/stepsize,2) == 0) .and. (mod(p_horiz(2)/stepsize,2) == 0))
        corner_ne = ((mod(p_horiz(1)/stepsize,2) == 0) .and. (mod(p_horiz(2)/stepsize,2) == 1))
        corner_sw = ((mod(p_horiz(1)/stepsize,2) == 1) .and. (mod(p_horiz(2)/stepsize,2) == 0))
        corner_se = ((mod(p_horiz(1)/stepsize,2) == 1) .and. (mod(p_horiz(2)/stepsize,2) == 1))
      end if
      ! NW receives from the other three processes
      if ( corner_nw ) then
        ! Receive from NE
        call mpi_cart_rank(MPI_COMM_HORIZ, &
                           (/p_horiz(1),p_horiz(2)+stepsize/), &
                           source_rank, &
                           ierr)
        recv_tag = 0
        call mpi_irecv(b%s(0,1,b_n/2+1),1,sub_interior(level,m-1), source_rank, recv_tag, MPI_COMM_HORIZ, &
                           recv_request(1),ierr)
#ifndef NDEBUG
  if (ierr .ne. 0) &
    call fatalerror("Collect: receive from NE failed in mpi_irecv().")
#endif
         ! Receive from SW
        call mpi_cart_rank(MPI_COMM_HORIZ, &
                           (/p_horiz(1)+stepsize,p_horiz(2)/), &
                           source_rank, &
                           ierr)
        recv_tag = 1
        call mpi_irecv(b%s(0,b_n/2+1,1),1,sub_interior(level,m-1), source_rank, recv_tag, MPI_COMM_HORIZ, &
                           recv_request(2),ierr)
#ifndef NDEBUG
  if (ierr .ne. 0) &
    call fatalerror("Collect: receive from SW failed in mpi_irecv().")
#endif
        ! Receive from SE
        call mpi_cart_rank(MPI_COMM_HORIZ, &
                           (/p_horiz(1)+stepsize,p_horiz(2)+stepsize/), &
                           source_rank, &
                           ierr)
        recv_tag = 2
        call mpi_irecv(b%s(0,b_n/2+1,b_n/2+1),1,sub_interior(level,m-1), source_rank, recv_tag, MPI_COMM_HORIZ, &
                           recv_request(3),ierr)
#ifndef NDEBUG
  if (ierr .ne. 0) &
    call fatalerror("Collect: receive from SE failed in mpi_irecv().")
#endif
        ! Copy local data while waiting for data from other processes
        do iz=0,nz+1
          b%s(iz,1:a_n,1:a_n) = a%s(iz,1:a_n,1:a_n)
        end do
        ! Wait for receives to complete before proceeding
        call mpi_waitall(3,recv_request,MPI_STATUSES_IGNORE,ierr)
      end if
      if ( corner_ne ) then
        ! Send to NW
        call mpi_cart_rank(MPI_COMM_HORIZ, &
                           (/p_horiz(1),p_horiz(2)-stepsize/), &
                           dest_rank, &
                           ierr)
        send_tag = 0
        call mpi_send(a%s(0,1,1),1,interior(level,m),dest_rank,send_tag,MPI_COMM_HORIZ,ierr)
#ifndef NDEBUG
  if (ierr .ne. 0) &
    call fatalerror("Collect: send from NE failed in mpi_send().")
#endif
      end if
      if ( corner_sw ) then
        ! Send to NW
        call mpi_cart_rank(MPI_COMM_HORIZ, &
                           (/p_horiz(1)-stepsize,p_horiz(2)/), &
                           dest_rank, &
                           ierr)
        send_tag = 1
        call mpi_send(a%s(0,1,1),1,interior(level,m),dest_rank,send_tag,MPI_COMM_HORIZ,ierr)
#ifndef NDEBUG
  if (ierr .ne. 0) &
    call fatalerror("Collect: send from SW failed in mpi_send().")
#endif
      end if
      if ( corner_se ) then
        ! send to NW
        call mpi_cart_rank(MPI_COMM_HORIZ, &
                           (/p_horiz(1)-stepsize,p_horiz(2)-stepsize/), &
                           dest_rank, &
                           ierr)
        send_tag = 2
        call mpi_send(a%s(0,1,1),1,interior(level,m),dest_rank,send_tag,MPI_COMM_HORIZ,ierr)
#ifndef NDEBUG
  if (ierr .ne. 0) &
    call fatalerror("Collect: send from SE failed in mpi_send().")
#endif
      end if

    end if
    call finish_timer(t_collect(m))

  end subroutine collect

!==================================================================
!  Distribute data in a(level,m-1) and store in b(level,m)
!
!  Example for p-m = 1, i.e. stepsize = 2:
!
!   NW (0,0)  -->  NE (2,0)
!
!     !      .
!     v        .
!                .
!   SW (0,2)       SE (2,2) [receive from to 0,0]
!==================================================================
  subroutine distribute(level,m, &  ! multigrid and processor level
                        a,       &  ! IN: Data on level (level,m-1)
                        b)          ! OUT: Data on level (level,m)
    implicit none
    integer, intent(in) :: level
    integer, intent(in) :: m
    type(scalar3d), intent(in) :: a
    type(scalar3d), intent(inout) :: b
    integer :: a_n, b_n   ! horizontal grid sizes
    integer :: nz ! vertical grid size
    integer, dimension(2) :: p_horiz
    integer :: stepsize
    integer :: ierr, source_rank, dest_rank, send_tag, recv_tag, rank, iz
    integer :: stat(MPI_STATUS_SIZE)
    integer :: send_request(3)
    logical :: corner_nw, corner_ne, corner_sw, corner_se

    call start_timer(t_distribute(m))

    stepsize = 2**(pproc-m)

    a_n = a%ix_max-a%ix_min+1
    b_n = b%ix_max-b%ix_min+1
    nz = a%grid_param%nz

    ! Work out rank, only execute on relevant processes
    call mpi_comm_rank(MPI_COMM_HORIZ, rank, ierr)
    call mpi_cart_coords(MPI_COMM_HORIZ,rank,dim_horiz,p_horiz,ierr)

    ! Ignore all processes that do not participate at this level
    if ( (stepsize .eq. 1) .or. ((mod(p_horiz(1),stepsize) == 0) .and. (mod(p_horiz(2),stepsize)) == 0)) then
      ! Work out coordinates in local 2 x 2 block
      if (stepsize .eq. 1) then
        corner_nw = ((mod(p_horiz(1),2) == 0) .and. (mod(p_horiz(2),2) == 0))
        corner_ne = ((mod(p_horiz(1),2) == 0) .and. (mod(p_horiz(2),2) == 1))
        corner_sw = ((mod(p_horiz(1),2) == 1) .and. (mod(p_horiz(2),2) == 0))
        corner_se = ((mod(p_horiz(1),2) == 1) .and. (mod(p_horiz(2),2) == 1))
      else
        corner_nw = ((mod(p_horiz(1)/stepsize,2) == 0) .and. (mod(p_horiz(2)/stepsize,2) == 0))
        corner_ne = ((mod(p_horiz(1)/stepsize,2) == 0) .and. (mod(p_horiz(2)/stepsize,2) == 1))
        corner_sw = ((mod(p_horiz(1)/stepsize,2) == 1) .and. (mod(p_horiz(2)/stepsize,2) == 0))
        corner_se = ((mod(p_horiz(1)/stepsize,2) == 1) .and. (mod(p_horiz(2)/stepsize,2) == 1))
      end if
      if ( corner_nw ) then
        ! (Asynchronous) send to NE
        call mpi_cart_rank(MPI_COMM_HORIZ, &
                           (/p_horiz(1),p_horiz(2)+stepsize/), &
                           dest_rank, &
                           ierr)

        send_tag = 0
        call mpi_isend(a%s(0,1,a_n/2+1), 1,sub_interior(level,m-1),dest_rank, send_tag, &
                       MPI_COMM_HORIZ,send_request(1),ierr)
#ifndef NDEBUG
  if (ierr .ne. 0) &
    call fatalerror("Distribute: send to NE failed in mpi_isend().")
#endif
        ! (Asynchronous) send to SW
        call mpi_cart_rank(MPI_COMM_HORIZ, &
                           (/p_horiz(1)+stepsize,p_horiz(2)/), &
                           dest_rank, &
                           ierr)
        send_tag = 1
        call mpi_isend(a%s(0,a_n/2+1,1),1,sub_interior(level,m-1), dest_rank, send_tag, &
                       MPI_COMM_HORIZ, send_request(2), ierr)
#ifndef NDEBUG
  if (ierr .ne. 0) &
    call fatalerror("Distribute: send to SW failed in mpi_isend().")
#endif
        ! (Asynchronous) send to SE
        call mpi_cart_rank(MPI_COMM_HORIZ, &
                           (/p_horiz(1)+stepsize,p_horiz(2)+stepsize/), &
                           dest_rank, &
                           ierr)
        send_tag = 2
        call mpi_isend(a%s(0,a_n/2+1,a_n/2+1),1,sub_interior(level,m-1), dest_rank, send_tag, &
                      MPI_COMM_HORIZ, send_request(3), ierr)
#ifndef NDEBUG
  if (ierr .ne. 0) &
    call fatalerror("Distribute: send to SE failed in mpi_isend().")
#endif
        ! While sending, copy local data
        do iz=0,nz+1
          b%s(iz,1:b_n,1:b_n) = a%s(iz,1:b_n,1:b_n)
        end do
        ! Only proceed when async sends to complete
        call mpi_waitall(3, send_request, MPI_STATUSES_IGNORE, ierr)
      end if
      if ( corner_ne ) then

        ! Receive from NW
        call mpi_cart_rank(MPI_COMM_HORIZ, &
                           (/p_horiz(1),p_horiz(2)-stepsize/), &
                           source_rank, &
                           ierr)
        recv_tag = 0
        call mpi_recv(b%s(0,1,1),1,interior(level,m),source_rank,recv_tag,MPI_COMM_HORIZ,stat,ierr)
#ifndef NDEBUG
  if (ierr .ne. 0) &
    call fatalerror("Distribute: receive on NE failed in mpi_recv().")
#endif
      end if
      if ( corner_sw ) then
        ! Receive from NW
        call mpi_cart_rank(MPI_COMM_HORIZ, &
                           (/p_horiz(1)-stepsize,p_horiz(2)/), &
                           source_rank, &
                           ierr)
        recv_tag = 1
        call mpi_recv(b%s(0,1,1),1,interior(level,m),source_rank,recv_tag,MPI_COMM_HORIZ,stat,ierr)
#ifndef NDEBUG
  if (ierr .ne. 0) &
    call fatalerror("Distribute: receive on SW failed in mpi_recv().")
#endif
      end if
      if ( corner_se ) then
        ! Receive from NW
        call mpi_cart_rank(MPI_COMM_HORIZ, &
                           (/p_horiz(1)-stepsize,p_horiz(2)-stepsize/), &
                           source_rank, &
                           ierr)
        recv_tag = 2
        call mpi_recv(b%s(0,1,1),1,interior(level,m),source_rank,recv_tag,MPI_COMM_HORIZ,stat,ierr)
#ifndef NDEBUG
  if (ierr .ne. 0) &
    call fatalerror("Distribute: receive on NW failed in mpi_recv().")
#endif
      end if

    end if
    call finish_timer(t_distribute(m))

  end subroutine distribute

end module communication