diff --git a/src/ZSOLVER/tensorproductmultigrid_Source/communication.f90 b/src/ZSOLVER/tensorproductmultigrid_Source/communication.f90 index 243365e917d19e5db6f708089de2c86396304274..d3fe6b88fa353daa1d4770af1f10af0b077d330e 100644 --- a/src/ZSOLVER/tensorproductmultigrid_Source/communication.f90 +++ b/src/ZSOLVER/tensorproductmultigrid_Source/communication.f90 @@ -103,12 +103,14 @@ private type(type_halo_T), allocatable, dimension(:,:) :: tab_halo_et ! Vector data type for interior of field a(level,m) integer, allocatable, dimension(:,:) :: interior - integer, allocatable, dimension(:,:) :: interiorT + integer, allocatable, dimension(:,:) :: interiorT + type(type_halo_T), allocatable, dimension(:,:) :: tab_interiorT_ne,tab_interiorT_sw,tab_interiorT_se ! Vector data type for one quarter of interior of field ! at level a(level,m). This has the same size (and can be ! used for communications with) the interior of a(level,m+1) integer, allocatable, dimension(:,:) :: sub_interior - integer, allocatable, dimension(:,:) :: sub_interiorT + integer, allocatable, dimension(:,:) :: sub_interiorT + type(type_halo_T), allocatable, dimension(:,:) :: tab_sub_interiorT_ne,tab_sub_interiorT_sw,tab_sub_interiorT_se ! Timer for halo swaps type(time), allocatable, dimension(:,:) :: t_haloswap ! Timer for collect and distribute @@ -289,6 +291,12 @@ contains if (LUseO) allocate(sub_interior(n_lev,0:pproc)) if (LUseT) allocate(interiorT(n_lev,0:pproc)) if (LUseT) allocate(sub_interiorT(n_lev,0:pproc)) + if (LUseT) allocate(tab_interiorT_ne(n_lev,0:pproc)) + if (LUseT) allocate(tab_interiorT_sw(n_lev,0:pproc)) + if (LUseT) allocate(tab_interiorT_se(n_lev,0:pproc)) + if (LUseT) allocate(tab_sub_interiorT_ne(n_lev,0:pproc)) + if (LUseT) allocate(tab_sub_interiorT_sw(n_lev,0:pproc)) + if (LUseT) allocate(tab_sub_interiorT_se(n_lev,0:pproc)) ! Timer allocate(t_haloswap(n_lev,0:pproc)) allocate(t_collect(0:pproc)) @@ -368,6 +376,12 @@ contains call MPI_TYPE_CREATE_SUBARRAY(nb_dims,profil_tab,profil_sous_tab,coord_debut,& MPI_ORDER_FORTRAN,MPI_DOUBLE_PRECISION,interiorT(level,m),ierr) call mpi_type_commit(interiorT(level,m),ierr) + call mnh_allocate_mg_halo(tab_interiorT_ne(level,m)%haloTin,nlocal,nlocal,nz+2) + call mnh_allocate_mg_halo(tab_interiorT_ne(level,m)%haloTout,nlocal,nlocal,nz+2) + call mnh_allocate_mg_halo(tab_interiorT_sw(level,m)%haloTin,nlocal,nlocal,nz+2) + call mnh_allocate_mg_halo(tab_interiorT_sw(level,m)%haloTout,nlocal,nlocal,nz+2) + call mnh_allocate_mg_halo(tab_interiorT_se(level,m)%haloTin,nlocal,nlocal,nz+2) + call mnh_allocate_mg_halo(tab_interiorT_se(level,m)%haloTout,nlocal,nlocal,nz+2) end if ! sub_interiorT if ( (nlocal/2) /= 0 ) then @@ -377,6 +391,12 @@ contains call MPI_TYPE_CREATE_SUBARRAY(nb_dims,profil_tab,profil_sous_tab,coord_debut,& MPI_ORDER_FORTRAN,MPI_DOUBLE_PRECISION,sub_interiorT(level,m),ierr) call mpi_type_commit(sub_interiorT(level,m),ierr) + call mnh_allocate_mg_halo(tab_sub_interiorT_ne(level,m)%haloTin,nlocal/2,nlocal/2,nz+2) + call mnh_allocate_mg_halo(tab_sub_interiorT_ne(level,m)%haloTout,nlocal/2,nlocal/2,nz+2) + call mnh_allocate_mg_halo(tab_sub_interiorT_sw(level,m)%haloTin,nlocal/2,nlocal/2,nz+2) + call mnh_allocate_mg_halo(tab_sub_interiorT_sw(level,m)%haloTout,nlocal/2,nlocal/2,nz+2) + call mnh_allocate_mg_halo(tab_sub_interiorT_se(level,m)%haloTin,nlocal/2,nlocal/2,nz+2) + call mnh_allocate_mg_halo(tab_sub_interiorT_se(level,m)%haloTout,nlocal/2,nlocal/2,nz+2) end if end if ! --- Create timers --- @@ -1470,11 +1490,19 @@ contains source_rank, & ierr) recv_tag = 1000 - if (LUseO) call mpi_irecv(b%s(0,1,b_n/2+1),1,sub_interior(level,m-1), source_rank, recv_tag, MPI_COMM_HORIZ, & + if (LUseO) call mpi_irecv(b%s(0,1,b_n/2+1),1,sub_interior(level,m-1),source_rank, recv_tag, MPI_COMM_HORIZ, & recv_request(1),ierr) recv_tag = 1010 - if (LUseT) call mpi_irecv(b%st(b_n/2+1,1,0),1,sub_interiorT(level,m-1), source_rank, recv_tag, MPI_COMM_HORIZ, & - recv_requestT(1),ierr) + if (LUseT) then +#ifdef MNH_GPUDIRECT + call mpi_irecv(tab_sub_interiorT_ne(level,m-1)%haloTout,size(tab_sub_interiorT_ne(level,m-1)%haloTout), & + MPI_DOUBLE_PRECISION,source_rank, recv_tag, MPI_COMM_HORIZ, & + recv_requestT(1),ierr) +#else + call mpi_irecv(b%st(b_n/2+1,1,0),1,sub_interiorT(level,m-1), source_rank, recv_tag, MPI_COMM_HORIZ, & + recv_requestT(1),ierr) +#endif + end if #ifndef NDEBUG if (ierr .ne. 0) & call fatalerror("Collect: receive from NE failed in mpi_irecv().") @@ -1488,8 +1516,17 @@ contains if (LUseO) call mpi_irecv(b%s(0,b_n/2+1,1),1,sub_interior(level,m-1), source_rank, recv_tag, MPI_COMM_HORIZ, & recv_request(2),ierr) recv_tag = 1011 - if (LUseT) call mpi_irecv(b%st(1,b_n/2+1,0),1,sub_interiorT(level,m-1), source_rank, recv_tag, MPI_COMM_HORIZ, & - recv_requestT(2),ierr) + if (LUseT) then +#ifdef MNH_GPUDIRECT + call mpi_irecv(tab_sub_interiorT_sw(level,m-1)%haloTout,size(tab_sub_interiorT_sw(level,m-1)%haloTout), & + MPI_DOUBLE_PRECISION,source_rank, recv_tag, MPI_COMM_HORIZ, & + recv_requestT(2),ierr) +#else + call mpi_irecv(b%st(1,b_n/2+1,0),1,sub_interiorT(level,m-1), source_rank, recv_tag, MPI_COMM_HORIZ, & + recv_requestT(2),ierr) +#endif + + endif #ifndef NDEBUG if (ierr .ne. 0) & @@ -1504,8 +1541,16 @@ contains if (LUseO) call mpi_irecv(b%s(0,b_n/2+1,b_n/2+1),1,sub_interior(level,m-1), source_rank, recv_tag, MPI_COMM_HORIZ, & recv_request(3),ierr) recv_tag = 1012 - if (LUseT) call mpi_irecv(b%st(b_n/2+1,b_n/2+1,0),1,sub_interiorT(level,m-1), source_rank, recv_tag, MPI_COMM_HORIZ, & - recv_requestT(3),ierr) + if (LUseT) then +#ifdef MNH_GPUDIRECT + call mpi_irecv(tab_sub_interiorT_se(level,m-1)%haloTout,size(tab_sub_interiorT_se(level,m-1)%haloTout), & + MPI_DOUBLE_PRECISION,source_rank, recv_tag, MPI_COMM_HORIZ, & + recv_requestT(3),ierr) +#else + call mpi_irecv(b%st(b_n/2+1,b_n/2+1,0),1,sub_interiorT(level,m-1), source_rank, recv_tag, MPI_COMM_HORIZ, & + recv_requestT(3),ierr) +#endif + end if #ifndef NDEBUG if (ierr .ne. 0) & call fatalerror("Collect: receive from SE failed in mpi_irecv().") @@ -1516,6 +1561,17 @@ contains ! Wait for receives to complete before proceeding if (LUseO) call mpi_waitall(3,recv_request,MPI_STATUSES_IGNORE,ierr) if (LUseT) call mpi_waitall(3,recv_requestT,MPI_STATUSES_IGNORE,ierr) +#ifdef MNH_GPUDIRECT + if (LUseT) then + ! copy from buffer for GPU DIRECT + ! Receive from NE + b%st(b_n/2+1:b_n,1:b_n/2,0:nz+1) = tab_sub_interiorT_ne(level,m-1)%haloTout(1:b_n/2,1:b_n/2,1:nz+2) + ! Receive from SW + b%st(1:b_n/2,b_n/2+1:b_n,0:nz+1) = tab_sub_interiorT_sw(level,m-1)%haloTout(1:b_n/2,1:b_n/2,1:nz+2) + ! Receive from SE + b%st(b_n/2+1:b_n,b_n/2+1:b_n,0:nz+1) = tab_sub_interiorT_se(level,m-1)%haloTout(1:b_n/2,1:b_n/2,1:nz+2) + end if +#endif end if if ( corner_ne ) then ! Send to NW @@ -1526,7 +1582,15 @@ contains send_tag = 1000 if (LUseO) call mpi_send(a%s(0,1,1),1,interior(level,m),dest_rank,send_tag,MPI_COMM_HORIZ,ierr) send_tag = 1010 - if (LUseT) call mpi_send(a%st(1,1,0),1,interiorT(level,m),dest_rank,send_tag,MPI_COMM_HORIZ,ierr) + if (LUseT) then +#ifdef MNH_GPUDIRECT + tab_interiorT_ne(level,m)%haloTin(1:a_n,1:a_n,1:nz+2) = a%st(1:a_n,1:a_n,0:nz+1) + call mpi_send(tab_interiorT_ne(level,m)%haloTin,size(tab_interiorT_ne(level,m)%haloTin), & + MPI_DOUBLE_PRECISION,dest_rank,send_tag,MPI_COMM_HORIZ,ierr) +#else + call mpi_send(a%st(1,1,0),1,interiorT(level,m),dest_rank,send_tag,MPI_COMM_HORIZ,ierr) +#endif + end if #ifndef NDEBUG if (ierr .ne. 0) & call fatalerror("Collect: send from NE failed in mpi_send().") @@ -1541,7 +1605,15 @@ contains send_tag = 1001 if (LUseO) call mpi_send(a%s(0,1,1),1,interior(level,m),dest_rank,send_tag,MPI_COMM_HORIZ,ierr) send_tag = 1011 - if (LUseT) call mpi_send(a%st(1,1,0),1,interiorT(level,m),dest_rank,send_tag,MPI_COMM_HORIZ,ierr) + if (LUseT) then +#ifdef MNH_GPUDIRECT + tab_interiorT_sw(level,m)%haloTin(1:a_n,1:a_n,1:nz+2) = a%st(1:a_n,1:a_n,0:nz+1) + call mpi_send(tab_interiorT_sw(level,m)%haloTin,size(tab_interiorT_sw(level,m)%haloTin), & + MPI_DOUBLE_PRECISION,dest_rank,send_tag,MPI_COMM_HORIZ,ierr) +#else + call mpi_send(a%st(1,1,0),1,interiorT(level,m),dest_rank,send_tag,MPI_COMM_HORIZ,ierr) +#endif + end if #ifndef NDEBUG if (ierr .ne. 0) & call fatalerror("Collect: send from SW failed in mpi_send().") @@ -1556,7 +1628,15 @@ contains send_tag = 1002 if (LUseO) call mpi_send(a%s(0,1,1),1,interior(level,m),dest_rank,send_tag,MPI_COMM_HORIZ,ierr) send_tag = 1012 - if (LUseT) call mpi_send(a%st(1,1,0),1,interiorT(level,m),dest_rank,send_tag,MPI_COMM_HORIZ,ierr) + if (LUseT) then +#ifdef MNH_GPUDIRECT + tab_interiorT_se(level,m)%haloTin(1:a_n,1:a_n,1:nz+2) = a%st(1:a_n,1:a_n,0:nz+1) + call mpi_send(tab_interiorT_se(level,m)%haloTin,size(tab_interiorT_se(level,m)%haloTin), & + MPI_DOUBLE_PRECISION,dest_rank,send_tag,MPI_COMM_HORIZ,ierr) +#else + call mpi_send(a%st(1,1,0),1,interiorT(level,m),dest_rank,send_tag,MPI_COMM_HORIZ,ierr) +#endif + end if #ifndef NDEBUG if (ierr .ne. 0) & call fatalerror("Collect: send from SE failed in mpi_send().")