!=== COPYRIGHT AND LICENSE STATEMENT === ! ! This file is part of the TensorProductMultigrid code. ! ! (c) The copyright relating to this work is owned jointly by the ! Crown, Met Office and NERC [2014]. However, it has been created ! with the help of the GungHo Consortium, whose members are identified ! at https://puma.nerc.ac.uk/trac/GungHo/wiki . ! ! Main Developer: Eike Mueller ! ! TensorProductMultigrid is free software: you can redistribute it and/or ! modify it under the terms of the GNU Lesser General Public License as ! published by the Free Software Foundation, either version 3 of the ! License, or (at your option) any later version. ! ! TensorProductMultigrid is distributed in the hope that it will be useful, ! but WITHOUT ANY WARRANTY; without even the implied warranty of ! MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ! GNU Lesser General Public License for more details. ! ! You should have received a copy of the GNU Lesser General Public License ! along with TensorProductMultigrid (see files COPYING and COPYING.LESSER). ! If not, see <http://www.gnu.org/licenses/>. ! !=== COPYRIGHT AND LICENSE STATEMENT === !================================================================== ! ! MPI communication routines for multigrid code ! ! Eike Mueller, University of Bath, Feb 2012 ! !================================================================== module communication use messages use datatypes use parameters use mpi use timer implicit none public::comm_preinitialise public::comm_initialise public::comm_finalise public::scalarprod public::haloswap public::ihaloswap public::collect public::distribute public::i_am_master_mpi public::master_rank public::pproc public::MPI_COMM_HORIZ public::comm_parameters public::comm_measuretime ! Number of processors ! n_proc = 2^(2*pproc), with integer pproc integer :: pproc ! Rank of master process integer, parameter :: master_rank = 0 ! Am I the master process? logical :: i_am_master_mpi integer, parameter :: dim = 3 ! Dimension integer, parameter :: dim_horiz = 2 ! Horizontal dimension integer :: MPI_COMM_HORIZ ! Communicator with horizontal partitioning private ! Data types for halo exchange in both x- and y-direction integer, dimension(:,:,:), allocatable :: halo_type ! MPI vector data types ! Halo for data exchange in north-south direction integer, allocatable, dimension(:,:) :: halo_ns ! Vector data type for interior of field a(level,m) integer, allocatable, dimension(:,:) :: interior ! Vector data type for one quarter of interior of field ! at level a(level,m). This has the same size (and can be ! used for communications with) the interior of a(level,m+1) integer, allocatable, dimension(:,:) :: sub_interior ! Timer for halo swaps type(time), allocatable, dimension(:,:) :: t_haloswap ! Timer for collect and distribute type(time), allocatable, dimension(:) :: t_collect type(time), allocatable, dimension(:) :: t_distribute ! Parallelisation parameters ! Measure communication times? logical :: comm_measuretime ! Parallel communication parameters type comm_parameters ! Size of halos integer :: halo_size end type comm_parameters type(comm_parameters) :: comm_param ! Data layout ! =========== ! ! The number of processes has to be of the form nproc = 2^(2*pproc) to ! ensure that data can be distributed between processes. ! The processes are arranged in a (2^pproc) x (2^pproc) cartesian grid ! in the horizontal plane (i.e. vertical columns are always local to one ! process), which is implemented via the communicator MPI_COMM_HORIZ. ! This MPI_cart_rank() and MPI_cart_shift() can then be used to ! easily identify neighbouring processes. ! The number of data grid cells in each direction has to be a multiply ! of 2**(L-1) where L is the number of levels (including the coarse ! and fine level), with the coarse level corresponding to level=1. ! Also define L_split as the level where we start to pull together ! data. For levels > L_split each position in the cartesian grid is ! included in the work, below this only a subset of processes is ! used. ! ! Each grid a(level,m) is identified by two numbers: ! (1) The multigrid level it belongs to (level) ! (2) The number of active processes that operate on it (2^(2*m)). ! ! For level > L_split, m=procp. For L_split we store a(L_split,pproc) and ! a(L_split,pproc-1), and only processes with even coordinates in both ! horizontal directions use this grid. ! Below that level, store a(L_split-1,pproc-1) and a(L_split-1,pproc-2), ! where only processes for which both horiontal coordinates are ! multiples of four use the latter. This is continued until only on ! process is left. ! ! ! level ! L a(L,pproc) ! L-1 a(L-1,pproc) ! ... ! L_split a(L_split,pproc) a(L_split ,pproc-1) ! L_split-1 a(L_split-1,pproc-1) a(L_split-1,pproc-2) ! ! ... a(3,1) ! a(2,1) ! a(1,1) ! ! When moving from left to right in the above graph the total number of ! grid cells does not change, but the number of data points per process ! increases by a factor of 4. ! ! Parallel operations ! =================== ! ! (*) Halo exchange. Update halo with data from neighbouring ! processors in cartesian grid on current (level,m) ! (*) Collect data on all processes at (level,m) on those ! processes that are still active on (level,m-1). ! (*) Distribute data at (level,m-1) and duplicate on all processes ! that are active at (level,m). ! ! Note that in the cartesian processor grid the first coordinate ! is the North-South (y-) direction, the second coordinate is the ! East-West (x-) direction, i.e. the layout is this: ! ! p_0 (0,0) p_1 (0,1) p_2 (0,2) p_3 (0,3) ! ! p_4 (1,0) p_5 (1,1) p_6 (1,2) p_7 (1,3) ! ! p_8 (2,0) p_9 (2,1) p_10 (2,2) p_11 (2,3) ! ! [...] ! ! ! Normal multigrid restriction and prolongation are used to ! move between levels with fixed m. ! ! contains !================================================================== ! Pre-initialise communication routines !================================================================== subroutine comm_preinitialise() implicit none integer :: nproc, ierr, rank call mpi_comm_size(MPI_COMM_WORLD, nproc, ierr) call mpi_comm_rank(MPI_COMM_WORLD, rank, ierr) i_am_master_mpi = (rank == master_rank) ! Check that nproc = 2^(2*p) pproc = floor(log(1.0d0*nproc)/log(4.0d0)) if ( (nproc - 4**pproc) .ne. 0) then call fatalerror("Number of processors has to be 2^(2*pproc) with integer pproc.") end if if (i_am_master_mpi) then write(STDOUT,'("PARALLEL RUN")') write(STDOUT,'("Number of processors : 2^(2*pproc) = ",I10," with pproc = ",I6)') & nproc, pproc end if ! Create halo data types end subroutine comm_preinitialise !================================================================== ! Initialise communication routines !================================================================== subroutine comm_initialise(n_lev, & !} multigrid parameters lev_split, & !} grid_param, & ! Grid parameters comm_param_in) ! Parallel communication ! parameters implicit none integer, intent(in) :: n_lev integer, intent(in) :: lev_split type(grid_parameters), intent(inout) :: grid_param type(comm_parameters), intent(in) :: comm_param_in integer :: n integer :: nz integer :: rank, nproc, ierr integer :: count, blocklength, stride integer, dimension(2) :: p_horiz integer :: m, level, nlocal logical :: reduced_m integer :: halo_size character(len=32) :: t_label n = grid_param%n nz = grid_param%nz comm_param = comm_param_in halo_size = comm_param%halo_size call mpi_comm_size(MPI_COMM_WORLD, nproc, ierr) ! Create cartesian topology call mpi_cart_create(MPI_COMM_WORLD, & ! Old communicator name dim_horiz, & ! horizontal dimension (/2**pproc,2**pproc/), & ! extent in each horizontal direction (/.false.,.false./), & ! periodic? .true., & ! reorder? MPI_COMM_HORIZ, & ! Name of new communicator ierr) ! calculate and display rank and corrdinates in cartesian grid call mpi_comm_rank(MPI_COMM_HORIZ, rank, ierr) call mpi_cart_coords(MPI_COMM_HORIZ,rank,dim_horiz,p_horiz,ierr) ! Local size of (horizontal) grid nlocal = n/2**pproc ! === Set up data types === ! Halo for exchange in north-south direction allocate(halo_ns(n_lev,0:pproc)) ! Interior data types allocate(interior(n_lev,0:pproc)) allocate(sub_interior(n_lev,0:pproc)) ! Timer allocate(t_haloswap(n_lev,0:pproc)) allocate(t_collect(0:pproc)) allocate(t_distribute(0:pproc)) do m=0,pproc write(t_label,'("t_collect(",I3,")")') m call initialise_timer(t_collect(m),t_label) write(t_label,'("t_distribute(",I3,")")') m call initialise_timer(t_distribute(m),t_label) end do m = pproc level = n_lev reduced_m = .false. do while (level > 0) ! --- Create halo data types --- ! NS- (y-) direction count = nlocal blocklength = (nz+2)*halo_size stride = (nlocal+2*halo_size)*(nz+2) call mpi_type_vector(count,blocklength,stride,MPI_DOUBLE_PRECISION, & halo_ns(level,m),ierr) call mpi_type_commit(halo_ns(level,m),ierr) #ifndef NDEBUG if (ierr .ne. 0) & call fatalerror("Commit halo_ns failed in mpi_type_commit().") #endif ! --- Create interior data types --- count = nlocal blocklength = nlocal*(nz+2) stride = (nz+2)*(nlocal+2*halo_size) call mpi_type_vector(count,blocklength,stride,MPI_DOUBLE_PRECISION,interior(level,m),ierr) call mpi_type_commit(interior(level,m),ierr) count = nlocal/2 blocklength = nlocal/2*(nz+2) stride = (nlocal+2*halo_size)*(nz+2) call mpi_type_vector(count,blocklength,stride,MPI_DOUBLE_PRECISION,sub_interior(level,m),ierr) call mpi_type_commit(sub_interior(level,m),ierr) ! --- Create timers --- write(t_label,'("t_haloswap(",I3,",",I3,")")') level,m call initialise_timer(t_haloswap(level,m),t_label) ! If we are below L_split, split data if ( (level .le. lev_split) .and. (m > 0) .and. (.not. reduced_m)) then reduced_m = .true. m = m-1 nlocal = 2*nlocal cycle end if reduced_m = .false. level = level-1 nlocal = nlocal/2 end do end subroutine comm_initialise !================================================================== ! Finalise communication routines !================================================================== subroutine comm_finalise(n_lev, & ! } lev_split) ! } Multigrid parameters implicit none integer, intent(in) :: n_lev integer, intent(in) :: lev_split logical :: reduced_m integer :: level, m integer :: ierr character(len=80) :: s m = pproc level = n_lev reduced_m = .false. if (i_am_master_mpi) then write(STDOUT,'(" *** Finalising communications ***")') end if call print_timerinfo("--- Communication timing results ---") do while (level > 0) write(s,'("level = ",I3,", m = ",I3)') level, m call print_timerinfo(s) ! --- Print out timer information --- call print_elapsed(t_haloswap(level,m),.True.,1.0_rl) ! --- Free halo data types --- call mpi_type_free(halo_ns(level,m),ierr) ! --- Free interior data types --- call mpi_type_free(interior(level,m),ierr) call mpi_type_free(sub_interior(level,m),ierr) ! If we are below L_split, split data if ( (level .le. lev_split) .and. (m > 0) .and. (.not. reduced_m)) then reduced_m = .true. m = m-1 cycle end if reduced_m = .false. level = level-1 end do do m=pproc,0,-1 write(s,'("m = ",I3)') m call print_timerinfo(s) ! --- Print out timer information --- call print_elapsed(t_collect(m),.True.,1.0_rl) call print_elapsed(t_distribute(m),.True.,1.0_rl) end do ! Deallocate arrays deallocate(halo_ns) deallocate(interior) deallocate(sub_interior) deallocate(t_haloswap) deallocate(t_collect) deallocate(t_distribute) if (i_am_master_mpi) then write(STDOUT,'("")') end if end subroutine comm_finalise !================================================================== ! Scalar product of two fields !================================================================== subroutine scalarprod(m, a, b, s) implicit none integer, intent(in) :: m type(scalar3d), intent(in) :: a type(scalar3d), intent(in) :: b real(kind=rl), intent(out) :: s integer :: nprocs, rank, ierr integer :: p_horiz(2) integer :: stepsize integer, parameter :: dim_horiz = 2 real(kind=rl) :: local_sum, global_sum integer :: nlocal, nz, i real(kind=rl) :: ddot nlocal = a%ix_max-a%ix_min+1 nz = a%grid_param%nz ! Work out coordinates of processor call mpi_comm_size(MPI_COMM_HORIZ,nprocs,ierr) call mpi_comm_rank(MPI_COMM_HORIZ,rank,ierr) stepsize = 2**(pproc-m) if (nprocs > 1) then ! Only inlcude local sum if the processor coordinates ! are multiples of stepsize call mpi_cart_coords(MPI_COMM_HORIZ,rank,dim_horiz,p_horiz,ierr) if ( (stepsize == 1) .or. & ( (stepsize > 1) .and. & (mod(p_horiz(1),stepsize)==0) .and. & (mod(p_horiz(2),stepsize)==0) ) ) then local_sum = 0.0_rl do i = 1, nlocal local_sum = local_sum & + ddot((nz+2)*nlocal,a%s(0,1,i),1,b%s(0,1,i),1) end do else local_sum = 0.0_rl end if call mpi_allreduce(local_sum,global_sum,1,MPI_DOUBLE_PRECISION, & MPI_SUM,MPI_COMM_HORIZ,ierr) else global_sum = 0.0_rl do i = 1, nlocal global_sum = global_sum & + ddot((nz+2)*nlocal,a%s(0,1,i),1,b%s(0,1,i),1) end do end if s = global_sum end subroutine scalarprod !================================================================== ! Initiate asynchronous halo exchange ! ! For all processes with horizontal indices that are multiples ! of 2^(pproc-m), update halos with information held by ! neighbouring processes, e.g. for pproc-m = 1, stepsize=2 ! ! N (0,2) ! ^ ! ! ! v ! ! W (2,0) <--> (2,2) <--> E (2,4) ! ! ^ ! ! ! v ! S (4,2) ! !================================================================== subroutine ihaloswap(level,m, & ! multigrid- and processor- level a, & ! data field send_requests, & ! send requests (OUT) recv_requests & ! recv requests (OUT) ) implicit none integer, intent(in) :: level integer, intent(in) :: m integer, intent(out), dimension(4) :: send_requests integer, intent(out), dimension(4) :: recv_requests type(scalar3d), intent(inout) :: a integer :: a_n ! horizontal grid size integer :: nz ! vertical grid size integer, dimension(2) :: p_horiz integer :: stepsize integer :: ierr, rank, sendtag, recvtag integer :: stat(MPI_STATUS_SIZE) integer :: halo_size integer :: neighbour_n_rank integer :: neighbour_s_rank integer :: neighbour_e_rank integer :: neighbour_w_rank integer :: yoffset, blocklength halo_size = comm_param%halo_size ! Do nothing if we are only using one processor if (m > 0) then a_n = a%ix_max-a%ix_min+1 nz = a%grid_param%nz stepsize = 2**(pproc-m) ! Work out rank, only execute on relevant processes call mpi_comm_rank(MPI_COMM_HORIZ, rank, ierr) call mpi_cart_coords(MPI_COMM_HORIZ,rank,dim_horiz,p_horiz,ierr) ! Work out ranks of neighbours ! W -> E call mpi_cart_shift(MPI_COMM_HORIZ,1, stepsize, & neighbour_w_rank,neighbour_e_rank,ierr) ! N -> S call mpi_cart_shift(MPI_COMM_HORIZ,0, stepsize, & neighbour_n_rank,neighbour_s_rank,ierr) if ( (stepsize == 1) .or. & ( (mod(p_horiz(1),stepsize) == 0) .and. & (mod(p_horiz(2),stepsize) == 0) ) ) then if (halo_size == 1) then ! Do not include corners in send/recv yoffset = 1 blocklength = a_n*(nz+2)*halo_size else yoffset = 1-halo_size blocklength = (a_n+2*halo_size)*(nz+2)*halo_size end if ! Receive from north recvtag = 2 call mpi_irecv(a%s(0,0-(halo_size-1),1),1, & halo_ns(level,m),neighbour_n_rank,recvtag, & MPI_COMM_HORIZ, recv_requests(1), ierr) ! Receive from south recvtag = 3 call mpi_irecv(a%s(0,a_n+1,1),1, & halo_ns(level,m),neighbour_s_rank,recvtag, & MPI_COMM_HORIZ, recv_requests(2), ierr) ! Send to south sendtag = 2 call mpi_isend(a%s(0,a_n-(halo_size-1),1),1, & halo_ns(level,m),neighbour_s_rank,sendtag, & MPI_COMM_HORIZ, send_requests(1), ierr) ! Send to north sendtag = 3 call mpi_isend(a%s(0,1,1),1, & halo_ns(level,m),neighbour_n_rank,sendtag, & MPI_COMM_HORIZ, send_requests(2), ierr) ! Receive from west recvtag = 0 call mpi_irecv(a%s(0,yoffset,0-(halo_size-1)),blocklength, & MPI_DOUBLE_PRECISION,neighbour_w_rank,recvtag, & MPI_COMM_HORIZ, recv_requests(3), ierr) ! Receive from east sendtag = 1 call mpi_irecv(a%s(0,yoffset,a_n+1),blocklength, & MPI_DOUBLE_PRECISION,neighbour_e_rank,recvtag, & MPI_COMM_HORIZ, recv_requests(4), ierr) ! Send to east sendtag = 0 call mpi_isend(a%s(0,yoffset,a_n-(halo_size-1)),blocklength, & MPI_DOUBLE_PRECISION,neighbour_e_rank,sendtag, & MPI_COMM_HORIZ, send_requests(3), ierr) ! Send to west recvtag = 1 call mpi_isend(a%s(0,yoffset,1),blocklength, & MPI_DOUBLE_PRECISION,neighbour_w_rank,sendtag, & MPI_COMM_HORIZ, send_requests(4), ierr) end if end if end subroutine ihaloswap !================================================================== ! Halo exchange ! ! For all processes with horizontal indices that are multiples ! of 2^(pproc-m), update halos with information held by ! neighbouring processes, e.g. for pproc-m = 1, stepsize=2 ! ! N (0,2) ! ^ ! ! ! v ! ! W (2,0) <--> (2,2) <--> E (2,4) ! ! ^ ! ! ! v ! S (4,2) ! !================================================================== subroutine haloswap(level,m, & ! multigrid- and processor- level a) ! data field implicit none integer, intent(in) :: level integer, intent(in) :: m type(scalar3d), intent(inout) :: a integer :: a_n ! horizontal grid size integer :: nz ! vertical grid size integer, dimension(2) :: p_horiz integer :: stepsize integer :: ierr, rank, sendtag, recvtag integer :: stat(MPI_STATUS_SIZE) integer :: halo_size integer :: neighbour_n_rank integer :: neighbour_s_rank integer :: neighbour_e_rank integer :: neighbour_w_rank integer :: yoffset, blocklength integer, dimension(4) :: requests_ns integer, dimension(4) :: requests_ew halo_size = comm_param%halo_size ! Do nothing if we are only using one processor if (m > 0) then if (comm_measuretime) then call start_timer(t_haloswap(level,m)) end if a_n = a%ix_max-a%ix_min+1 nz = a%grid_param%nz stepsize = 2**(pproc-m) ! Work out rank, only execute on relevant processes call mpi_comm_rank(MPI_COMM_HORIZ, rank, ierr) call mpi_cart_coords(MPI_COMM_HORIZ,rank,dim_horiz,p_horiz,ierr) ! Work out ranks of neighbours ! W -> E call mpi_cart_shift(MPI_COMM_HORIZ,1, stepsize, & neighbour_w_rank,neighbour_e_rank,ierr) ! N -> S call mpi_cart_shift(MPI_COMM_HORIZ,0, stepsize, & neighbour_n_rank,neighbour_s_rank,ierr) if ( (stepsize == 1) .or. & ( (mod(p_horiz(1),stepsize) == 0) .and. & (mod(p_horiz(2),stepsize) == 0) ) ) then if (halo_size == 1) then ! Do not include corners in send/recv yoffset = 1 blocklength = a_n*(nz+2)*halo_size else yoffset = 1-halo_size blocklength = (a_n+2*halo_size)*(nz+2)*halo_size end if ! Receive from north recvtag = 2 call mpi_irecv(a%s(0,0-(halo_size-1),1),1, & halo_ns(level,m),neighbour_n_rank,recvtag, & MPI_COMM_HORIZ, requests_ns(1), ierr) ! Receive from south recvtag = 3 call mpi_irecv(a%s(0,a_n+1,1),1, & halo_ns(level,m),neighbour_s_rank,recvtag, & MPI_COMM_HORIZ, requests_ns(2), ierr) ! Send to south sendtag = 2 call mpi_isend(a%s(0,a_n-(halo_size-1),1),1, & halo_ns(level,m),neighbour_s_rank,sendtag, & MPI_COMM_HORIZ, requests_ns(3), ierr) ! Send to north sendtag = 3 call mpi_isend(a%s(0,1,1),1, & halo_ns(level,m),neighbour_n_rank,sendtag, & MPI_COMM_HORIZ, requests_ns(4), ierr) if (halo_size > 1) then ! Wait for North <-> South communication to complete call mpi_waitall(4,requests_ns, MPI_STATUSES_IGNORE, ierr) end if ! Receive from west recvtag = 0 call mpi_irecv(a%s(0,yoffset,0-(halo_size-1)),blocklength, & MPI_DOUBLE_PRECISION,neighbour_w_rank,recvtag, & MPI_COMM_HORIZ, requests_ew(1), ierr) ! Receive from east sendtag = 1 call mpi_irecv(a%s(0,yoffset,a_n+1),blocklength, & MPI_DOUBLE_PRECISION,neighbour_e_rank,recvtag, & MPI_COMM_HORIZ, requests_ew(2), ierr) ! Send to east sendtag = 0 call mpi_isend(a%s(0,yoffset,a_n-(halo_size-1)),blocklength, & MPI_DOUBLE_PRECISION,neighbour_e_rank,sendtag, & MPI_COMM_HORIZ, requests_ew(3), ierr) ! Send to west recvtag = 1 call mpi_isend(a%s(0,yoffset,1),blocklength, & MPI_DOUBLE_PRECISION,neighbour_w_rank,sendtag, & MPI_COMM_HORIZ, requests_ew(4), ierr) ! Wait for East <-> West communication to complete if (halo_size == 1) then ! Wait for North <-> South communication to complete call mpi_waitall(4,requests_ns, MPI_STATUSES_IGNORE, ierr) end if call mpi_waitall(4,requests_ew, MPI_STATUSES_IGNORE, ierr) end if if (comm_measuretime) then call finish_timer(t_haloswap(level,m)) end if end if end subroutine haloswap !================================================================== ! Collect from a(level,m) and store on less processors ! in b(level,m-1) ! ! Example for pproc-m = 1, i.e. stepsize = 2: ! ! NW (0,0) <-- NE (0,2) ! ! ^ . ! ! . ! . ! SW (2,0) SE (2,2) [send to 0,0] ! !================================================================== subroutine collect(level,m, & ! multigrid and processor level a, & ! IN: data on level (level,m) b) ! OUT: data on level (level,m-1) implicit none integer, intent(in) :: level integer, intent(in) :: m type(scalar3d), intent(in) :: a type(scalar3d), intent(inout) :: b integer :: a_n, b_n ! horizontal grid sizes integer :: nz ! vertical grid size integer, dimension(2) :: p_horiz integer :: stepsize integer :: ierr, source_rank, dest_rank, rank, recv_tag, send_tag, iz logical :: corner_nw, corner_ne, corner_sw, corner_se integer :: recv_request(3) call start_timer(t_collect(m)) stepsize = 2**(pproc-m) a_n = a%ix_max-a%ix_min+1 b_n = b%ix_max-b%ix_min+1 nz = b%grid_param%nz ! Work out rank, only execute on relevant processes call mpi_comm_rank(MPI_COMM_HORIZ, rank, ierr) ! Store position in process grid in in p_horiz ! Note we can NOT use cart_shift as we need diagonal neighburs as well call mpi_cart_coords(MPI_COMM_HORIZ,rank,dim_horiz,p_horiz,ierr) ! Ignore all processes that do not participate at this level if ( (stepsize .eq. 1) .or. ((mod(p_horiz(1),stepsize) == 0) .and. (mod(p_horiz(2),stepsize)) == 0)) then ! Determine position in local 2x2 block if (stepsize .eq. 1) then corner_nw = ((mod(p_horiz(1),2) == 0) .and. (mod(p_horiz(2),2) == 0)) corner_ne = ((mod(p_horiz(1),2) == 0) .and. (mod(p_horiz(2),2) == 1)) corner_sw = ((mod(p_horiz(1),2) == 1) .and. (mod(p_horiz(2),2) == 0)) corner_se = ((mod(p_horiz(1),2) == 1) .and. (mod(p_horiz(2),2) == 1)) else corner_nw = ((mod(p_horiz(1)/stepsize,2) == 0) .and. (mod(p_horiz(2)/stepsize,2) == 0)) corner_ne = ((mod(p_horiz(1)/stepsize,2) == 0) .and. (mod(p_horiz(2)/stepsize,2) == 1)) corner_sw = ((mod(p_horiz(1)/stepsize,2) == 1) .and. (mod(p_horiz(2)/stepsize,2) == 0)) corner_se = ((mod(p_horiz(1)/stepsize,2) == 1) .and. (mod(p_horiz(2)/stepsize,2) == 1)) end if ! NW receives from the other three processes if ( corner_nw ) then ! Receive from NE call mpi_cart_rank(MPI_COMM_HORIZ, & (/p_horiz(1),p_horiz(2)+stepsize/), & source_rank, & ierr) recv_tag = 0 call mpi_irecv(b%s(0,1,b_n/2+1),1,sub_interior(level,m-1), source_rank, recv_tag, MPI_COMM_HORIZ, & recv_request(1),ierr) #ifndef NDEBUG if (ierr .ne. 0) & call fatalerror("Collect: receive from NE failed in mpi_irecv().") #endif ! Receive from SW call mpi_cart_rank(MPI_COMM_HORIZ, & (/p_horiz(1)+stepsize,p_horiz(2)/), & source_rank, & ierr) recv_tag = 1 call mpi_irecv(b%s(0,b_n/2+1,1),1,sub_interior(level,m-1), source_rank, recv_tag, MPI_COMM_HORIZ, & recv_request(2),ierr) #ifndef NDEBUG if (ierr .ne. 0) & call fatalerror("Collect: receive from SW failed in mpi_irecv().") #endif ! Receive from SE call mpi_cart_rank(MPI_COMM_HORIZ, & (/p_horiz(1)+stepsize,p_horiz(2)+stepsize/), & source_rank, & ierr) recv_tag = 2 call mpi_irecv(b%s(0,b_n/2+1,b_n/2+1),1,sub_interior(level,m-1), source_rank, recv_tag, MPI_COMM_HORIZ, & recv_request(3),ierr) #ifndef NDEBUG if (ierr .ne. 0) & call fatalerror("Collect: receive from SE failed in mpi_irecv().") #endif ! Copy local data while waiting for data from other processes do iz=0,nz+1 b%s(iz,1:a_n,1:a_n) = a%s(iz,1:a_n,1:a_n) end do ! Wait for receives to complete before proceeding call mpi_waitall(3,recv_request,MPI_STATUSES_IGNORE,ierr) end if if ( corner_ne ) then ! Send to NW call mpi_cart_rank(MPI_COMM_HORIZ, & (/p_horiz(1),p_horiz(2)-stepsize/), & dest_rank, & ierr) send_tag = 0 call mpi_send(a%s(0,1,1),1,interior(level,m),dest_rank,send_tag,MPI_COMM_HORIZ,ierr) #ifndef NDEBUG if (ierr .ne. 0) & call fatalerror("Collect: send from NE failed in mpi_send().") #endif end if if ( corner_sw ) then ! Send to NW call mpi_cart_rank(MPI_COMM_HORIZ, & (/p_horiz(1)-stepsize,p_horiz(2)/), & dest_rank, & ierr) send_tag = 1 call mpi_send(a%s(0,1,1),1,interior(level,m),dest_rank,send_tag,MPI_COMM_HORIZ,ierr) #ifndef NDEBUG if (ierr .ne. 0) & call fatalerror("Collect: send from SW failed in mpi_send().") #endif end if if ( corner_se ) then ! send to NW call mpi_cart_rank(MPI_COMM_HORIZ, & (/p_horiz(1)-stepsize,p_horiz(2)-stepsize/), & dest_rank, & ierr) send_tag = 2 call mpi_send(a%s(0,1,1),1,interior(level,m),dest_rank,send_tag,MPI_COMM_HORIZ,ierr) #ifndef NDEBUG if (ierr .ne. 0) & call fatalerror("Collect: send from SE failed in mpi_send().") #endif end if end if call finish_timer(t_collect(m)) end subroutine collect !================================================================== ! Distribute data in a(level,m-1) and store in b(level,m) ! ! Example for p-m = 1, i.e. stepsize = 2: ! ! NW (0,0) --> NE (2,0) ! ! ! . ! v . ! . ! SW (0,2) SE (2,2) [receive from to 0,0] !================================================================== subroutine distribute(level,m, & ! multigrid and processor level a, & ! IN: Data on level (level,m-1) b) ! OUT: Data on level (level,m) implicit none integer, intent(in) :: level integer, intent(in) :: m type(scalar3d), intent(in) :: a type(scalar3d), intent(inout) :: b integer :: a_n, b_n ! horizontal grid sizes integer :: nz ! vertical grid size integer, dimension(2) :: p_horiz integer :: stepsize integer :: ierr, source_rank, dest_rank, send_tag, recv_tag, rank, iz integer :: stat(MPI_STATUS_SIZE) integer :: send_request(3) logical :: corner_nw, corner_ne, corner_sw, corner_se call start_timer(t_distribute(m)) stepsize = 2**(pproc-m) a_n = a%ix_max-a%ix_min+1 b_n = b%ix_max-b%ix_min+1 nz = a%grid_param%nz ! Work out rank, only execute on relevant processes call mpi_comm_rank(MPI_COMM_HORIZ, rank, ierr) call mpi_cart_coords(MPI_COMM_HORIZ,rank,dim_horiz,p_horiz,ierr) ! Ignore all processes that do not participate at this level if ( (stepsize .eq. 1) .or. ((mod(p_horiz(1),stepsize) == 0) .and. (mod(p_horiz(2),stepsize)) == 0)) then ! Work out coordinates in local 2 x 2 block if (stepsize .eq. 1) then corner_nw = ((mod(p_horiz(1),2) == 0) .and. (mod(p_horiz(2),2) == 0)) corner_ne = ((mod(p_horiz(1),2) == 0) .and. (mod(p_horiz(2),2) == 1)) corner_sw = ((mod(p_horiz(1),2) == 1) .and. (mod(p_horiz(2),2) == 0)) corner_se = ((mod(p_horiz(1),2) == 1) .and. (mod(p_horiz(2),2) == 1)) else corner_nw = ((mod(p_horiz(1)/stepsize,2) == 0) .and. (mod(p_horiz(2)/stepsize,2) == 0)) corner_ne = ((mod(p_horiz(1)/stepsize,2) == 0) .and. (mod(p_horiz(2)/stepsize,2) == 1)) corner_sw = ((mod(p_horiz(1)/stepsize,2) == 1) .and. (mod(p_horiz(2)/stepsize,2) == 0)) corner_se = ((mod(p_horiz(1)/stepsize,2) == 1) .and. (mod(p_horiz(2)/stepsize,2) == 1)) end if if ( corner_nw ) then ! (Asynchronous) send to NE call mpi_cart_rank(MPI_COMM_HORIZ, & (/p_horiz(1),p_horiz(2)+stepsize/), & dest_rank, & ierr) send_tag = 0 call mpi_isend(a%s(0,1,a_n/2+1), 1,sub_interior(level,m-1),dest_rank, send_tag, & MPI_COMM_HORIZ,send_request(1),ierr) #ifndef NDEBUG if (ierr .ne. 0) & call fatalerror("Distribute: send to NE failed in mpi_isend().") #endif ! (Asynchronous) send to SW call mpi_cart_rank(MPI_COMM_HORIZ, & (/p_horiz(1)+stepsize,p_horiz(2)/), & dest_rank, & ierr) send_tag = 1 call mpi_isend(a%s(0,a_n/2+1,1),1,sub_interior(level,m-1), dest_rank, send_tag, & MPI_COMM_HORIZ, send_request(2), ierr) #ifndef NDEBUG if (ierr .ne. 0) & call fatalerror("Distribute: send to SW failed in mpi_isend().") #endif ! (Asynchronous) send to SE call mpi_cart_rank(MPI_COMM_HORIZ, & (/p_horiz(1)+stepsize,p_horiz(2)+stepsize/), & dest_rank, & ierr) send_tag = 2 call mpi_isend(a%s(0,a_n/2+1,a_n/2+1),1,sub_interior(level,m-1), dest_rank, send_tag, & MPI_COMM_HORIZ, send_request(3), ierr) #ifndef NDEBUG if (ierr .ne. 0) & call fatalerror("Distribute: send to SE failed in mpi_isend().") #endif ! While sending, copy local data do iz=0,nz+1 b%s(iz,1:b_n,1:b_n) = a%s(iz,1:b_n,1:b_n) end do ! Only proceed when async sends to complete call mpi_waitall(3, send_request, MPI_STATUSES_IGNORE, ierr) end if if ( corner_ne ) then ! Receive from NW call mpi_cart_rank(MPI_COMM_HORIZ, & (/p_horiz(1),p_horiz(2)-stepsize/), & source_rank, & ierr) recv_tag = 0 call mpi_recv(b%s(0,1,1),1,interior(level,m),source_rank,recv_tag,MPI_COMM_HORIZ,stat,ierr) #ifndef NDEBUG if (ierr .ne. 0) & call fatalerror("Distribute: receive on NE failed in mpi_recv().") #endif end if if ( corner_sw ) then ! Receive from NW call mpi_cart_rank(MPI_COMM_HORIZ, & (/p_horiz(1)-stepsize,p_horiz(2)/), & source_rank, & ierr) recv_tag = 1 call mpi_recv(b%s(0,1,1),1,interior(level,m),source_rank,recv_tag,MPI_COMM_HORIZ,stat,ierr) #ifndef NDEBUG if (ierr .ne. 0) & call fatalerror("Distribute: receive on SW failed in mpi_recv().") #endif end if if ( corner_se ) then ! Receive from NW call mpi_cart_rank(MPI_COMM_HORIZ, & (/p_horiz(1)-stepsize,p_horiz(2)-stepsize/), & source_rank, & ierr) recv_tag = 2 call mpi_recv(b%s(0,1,1),1,interior(level,m),source_rank,recv_tag,MPI_COMM_HORIZ,stat,ierr) #ifndef NDEBUG if (ierr .ne. 0) & call fatalerror("Distribute: receive on NW failed in mpi_recv().") #endif end if end if call finish_timer(t_distribute(m)) end subroutine distribute end module communication