diff --git a/tensorproductmultigrid_Source/communication.f90 b/tensorproductmultigrid_Source/communication.f90 index a8cf438f503f1a8f7d7e7f6acf4cd7bfa69f9507..e549f3a09d21faab81d37bbb1528b71a946dbe67 100644 --- a/tensorproductmultigrid_Source/communication.f90 +++ b/tensorproductmultigrid_Source/communication.f90 @@ -89,10 +89,12 @@ private integer, allocatable, dimension(:,:) :: halo_wet ! Vector data type for interior of field a(level,m) integer, allocatable, dimension(:,:) :: interior + integer, allocatable, dimension(:,:) :: interiorT ! Vector data type for one quarter of interior of field ! at level a(level,m). This has the same size (and can be ! used for communications with) the interior of a(level,m+1) integer, allocatable, dimension(:,:) :: sub_interior + integer, allocatable, dimension(:,:) :: sub_interiorT ! Timer for halo swaps type(time), allocatable, dimension(:,:) :: t_haloswap ! Timer for collect and distribute @@ -233,6 +235,9 @@ contains integer :: halo_size character(len=32) :: t_label + integer,parameter :: nb_dims=3 + integer,dimension(nb_dims) :: profil_tab,profil_sous_tab,coord_debut + n = grid_param%n nz = grid_param%nz @@ -262,8 +267,10 @@ contains if (LUseT) allocate(halo_nst(n_lev,0:pproc)) if (LUseT) allocate(halo_wet(n_lev,0:pproc)) ! Interior data types - allocate(interior(n_lev,0:pproc)) - allocate(sub_interior(n_lev,0:pproc)) + if (LUseO) allocate(interior(n_lev,0:pproc)) + if (LUseO) allocate(sub_interior(n_lev,0:pproc)) + if (LUseT) allocate(interiorT(n_lev,0:pproc)) + if (LUseT) allocate(sub_interiorT(n_lev,0:pproc)) ! Timer allocate(t_haloswap(n_lev,0:pproc)) allocate(t_collect(0:pproc)) @@ -311,17 +318,38 @@ contains call fatalerror("Commit halo_ns failed in mpi_type_commit().") #endif ! --- Create interior data types --- - count = nlocal - blocklength = nlocal*(nz+2) - stride = (nz+2)*(nlocal+2*halo_size) - call mpi_type_vector(count,blocklength,stride,MPI_DOUBLE_PRECISION,interior(level,m),ierr) - call mpi_type_commit(interior(level,m),ierr) - count = nlocal/2 - blocklength = nlocal/2*(nz+2) - stride = (nlocal+2*halo_size)*(nz+2) - call mpi_type_vector(count,blocklength,stride,MPI_DOUBLE_PRECISION,sub_interior(level,m),ierr) - call mpi_type_commit(sub_interior(level,m),ierr) - + if (LUseO) then + count = nlocal + blocklength = nlocal*(nz+2) + stride = (nz+2)*(nlocal+2*halo_size) + call mpi_type_vector(count,blocklength,stride,MPI_DOUBLE_PRECISION,interior(level,m),ierr) + call mpi_type_commit(interior(level,m),ierr) + count = nlocal/2 + blocklength = nlocal/2*(nz+2) + stride = (nlocal+2*halo_size)*(nz+2) + call mpi_type_vector(count,blocklength,stride,MPI_DOUBLE_PRECISION,sub_interior(level,m),ierr) + call mpi_type_commit(sub_interior(level,m),ierr) + end if + if (LUseT) then + ! interiorT + if ( nlocal /= 0 ) then + profil_tab = (/ nlocal+2*halo_size , nlocal+2*halo_size , nz+2 /) + profil_sous_tab = (/ nlocal , nlocal , nz+2 /) + coord_debut = (/ 0 , 0 , 0 /) + call MPI_TYPE_CREATE_SUBARRAY(nb_dims,profil_tab,profil_sous_tab,coord_debut,& + MPI_ORDER_FORTRAN,MPI_DOUBLE_PRECISION,interiorT(level,m),ierr) + call mpi_type_commit(interiorT(level,m),ierr) + end if + ! sub_interiorT + if ( (nlocal/2) /= 0 ) then + profil_tab = (/ nlocal+2*halo_size , nlocal+2*halo_size , nz+2 /) + profil_sous_tab = (/ nlocal/2 , nlocal/2 , nz+2 /) + coord_debut = (/ 0 , 0 , 0 /) + call MPI_TYPE_CREATE_SUBARRAY(nb_dims,profil_tab,profil_sous_tab,coord_debut,& + MPI_ORDER_FORTRAN,MPI_DOUBLE_PRECISION,sub_interiorT(level,m),ierr) + call mpi_type_commit(sub_interiorT(level,m),ierr) + end if + end if ! --- Create timers --- write(t_label,'("t_haloswap(",I3,",",I3,")")') level,m call initialise_timer(t_haloswap(level,m),t_label) @@ -369,8 +397,10 @@ contains if (LUseT) call mpi_type_free(halo_nst(level,m),ierr) if (LUseT) call mpi_type_free(halo_wet(level,m),ierr) ! --- Free interior data types --- - call mpi_type_free(interior(level,m),ierr) - call mpi_type_free(sub_interior(level,m),ierr) + if (LUseO) call mpi_type_free(interior(level,m),ierr) + if (LUseO) call mpi_type_free(sub_interior(level,m),ierr) + if (LUseT) call mpi_type_free(interiorT(level,m),ierr) + if (LUseT) call mpi_type_free(sub_interiorT(level,m),ierr) ! If we are below L_split, split data if ( (level .le. lev_split) .and. (m > 0) .and. (.not. reduced_m)) then reduced_m = .true. @@ -391,8 +421,11 @@ contains ! Deallocate arrays if (LUseO) deallocate(halo_ns) if (LUseT) deallocate(halo_nst,halo_wet) - deallocate(interior) - deallocate(sub_interior) + if (LUseO) deallocate(interior) + if (LUseO) deallocate(sub_interior) + if (LUseT) deallocate(interiorT) + if (LUseT) deallocate(sub_interiorT) + deallocate(t_haloswap) deallocate(t_collect) deallocate(t_distribute) @@ -1160,6 +1193,7 @@ contains integer :: ierr, source_rank, dest_rank, rank, recv_tag, send_tag, iz logical :: corner_nw, corner_ne, corner_sw, corner_se integer :: recv_request(3) + integer :: recv_requestT(3) call start_timer(t_collect(m)) @@ -1196,9 +1230,12 @@ contains (/p_horiz(1),p_horiz(2)+stepsize/), & source_rank, & ierr) - recv_tag = 0 - call mpi_irecv(b%s(0,1,b_n/2+1),1,sub_interior(level,m-1), source_rank, recv_tag, MPI_COMM_HORIZ, & + recv_tag = 1000 + if (LUseO) call mpi_irecv(b%s(0,1,b_n/2+1),1,sub_interior(level,m-1), source_rank, recv_tag, MPI_COMM_HORIZ, & recv_request(1),ierr) + recv_tag = 1010 + if (LUseT) call mpi_irecv(b%st(b_n/2+1,1,0),1,sub_interiorT(level,m-1), source_rank, recv_tag, MPI_COMM_HORIZ, & + recv_requestT(1),ierr) #ifndef NDEBUG if (ierr .ne. 0) & call fatalerror("Collect: receive from NE failed in mpi_irecv().") @@ -1208,9 +1245,13 @@ contains (/p_horiz(1)+stepsize,p_horiz(2)/), & source_rank, & ierr) - recv_tag = 1 - call mpi_irecv(b%s(0,b_n/2+1,1),1,sub_interior(level,m-1), source_rank, recv_tag, MPI_COMM_HORIZ, & + recv_tag = 1001 + if (LUseO) call mpi_irecv(b%s(0,b_n/2+1,1),1,sub_interior(level,m-1), source_rank, recv_tag, MPI_COMM_HORIZ, & recv_request(2),ierr) + recv_tag = 1011 + if (LUseT) call mpi_irecv(b%st(1,b_n/2+1,0),1,sub_interiorT(level,m-1), source_rank, recv_tag, MPI_COMM_HORIZ, & + recv_requestT(2),ierr) + #ifndef NDEBUG if (ierr .ne. 0) & call fatalerror("Collect: receive from SW failed in mpi_irecv().") @@ -1220,19 +1261,22 @@ contains (/p_horiz(1)+stepsize,p_horiz(2)+stepsize/), & source_rank, & ierr) - recv_tag = 2 - call mpi_irecv(b%s(0,b_n/2+1,b_n/2+1),1,sub_interior(level,m-1), source_rank, recv_tag, MPI_COMM_HORIZ, & + recv_tag = 1002 + if (LUseO) call mpi_irecv(b%s(0,b_n/2+1,b_n/2+1),1,sub_interior(level,m-1), source_rank, recv_tag, MPI_COMM_HORIZ, & recv_request(3),ierr) + recv_tag = 1012 + if (LUseT) call mpi_irecv(b%st(b_n/2+1,b_n/2+1,0),1,sub_interiorT(level,m-1), source_rank, recv_tag, MPI_COMM_HORIZ, & + recv_requestT(3),ierr) #ifndef NDEBUG if (ierr .ne. 0) & call fatalerror("Collect: receive from SE failed in mpi_irecv().") #endif ! Copy local data while waiting for data from other processes - do iz=0,nz+1 - b%s(iz,1:a_n,1:a_n) = a%s(iz,1:a_n,1:a_n) - end do + if (LUseO) b%s(0:nz+1,1:a_n,1:a_n) = a%s(0:nz+1,1:a_n,1:a_n) + if (LUseT) b%st(1:a_n,1:a_n,0:nz+1) = a%st(1:a_n,1:a_n,0:nz+1) ! Wait for receives to complete before proceeding - call mpi_waitall(3,recv_request,MPI_STATUSES_IGNORE,ierr) + if (LUseO) call mpi_waitall(3,recv_request,MPI_STATUSES_IGNORE,ierr) + if (LUseT) call mpi_waitall(3,recv_requestT,MPI_STATUSES_IGNORE,ierr) end if if ( corner_ne ) then ! Send to NW @@ -1240,8 +1284,10 @@ contains (/p_horiz(1),p_horiz(2)-stepsize/), & dest_rank, & ierr) - send_tag = 0 - call mpi_send(a%s(0,1,1),1,interior(level,m),dest_rank,send_tag,MPI_COMM_HORIZ,ierr) + send_tag = 1000 + if (LUseO) call mpi_send(a%s(0,1,1),1,interior(level,m),dest_rank,send_tag,MPI_COMM_HORIZ,ierr) + send_tag = 1010 + if (LUseT) call mpi_send(a%st(1,1,0),1,interiorT(level,m),dest_rank,send_tag,MPI_COMM_HORIZ,ierr) #ifndef NDEBUG if (ierr .ne. 0) & call fatalerror("Collect: send from NE failed in mpi_send().") @@ -1253,8 +1299,10 @@ contains (/p_horiz(1)-stepsize,p_horiz(2)/), & dest_rank, & ierr) - send_tag = 1 - call mpi_send(a%s(0,1,1),1,interior(level,m),dest_rank,send_tag,MPI_COMM_HORIZ,ierr) + send_tag = 1001 + if (LUseO) call mpi_send(a%s(0,1,1),1,interior(level,m),dest_rank,send_tag,MPI_COMM_HORIZ,ierr) + send_tag = 1011 + if (LUseT) call mpi_send(a%st(1,1,0),1,interiorT(level,m),dest_rank,send_tag,MPI_COMM_HORIZ,ierr) #ifndef NDEBUG if (ierr .ne. 0) & call fatalerror("Collect: send from SW failed in mpi_send().") @@ -1266,8 +1314,10 @@ contains (/p_horiz(1)-stepsize,p_horiz(2)-stepsize/), & dest_rank, & ierr) - send_tag = 2 - call mpi_send(a%s(0,1,1),1,interior(level,m),dest_rank,send_tag,MPI_COMM_HORIZ,ierr) + send_tag = 1002 + if (LUseO) call mpi_send(a%s(0,1,1),1,interior(level,m),dest_rank,send_tag,MPI_COMM_HORIZ,ierr) + send_tag = 1012 + if (LUseT) call mpi_send(a%st(1,1,0),1,interiorT(level,m),dest_rank,send_tag,MPI_COMM_HORIZ,ierr) #ifndef NDEBUG if (ierr .ne. 0) & call fatalerror("Collect: send from SE failed in mpi_send().") @@ -1306,6 +1356,7 @@ contains integer :: ierr, source_rank, dest_rank, send_tag, recv_tag, rank, iz integer :: stat(MPI_STATUS_SIZE) integer :: send_request(3) + integer :: send_requestT(3) logical :: corner_nw, corner_ne, corner_sw, corner_se call start_timer(t_distribute(m)) @@ -1341,9 +1392,12 @@ contains dest_rank, & ierr) - send_tag = 0 - call mpi_isend(a%s(0,1,a_n/2+1), 1,sub_interior(level,m-1),dest_rank, send_tag, & + send_tag = 1000 + if (LUseO) call mpi_isend(a%s(0,1,a_n/2+1), 1,sub_interior(level,m-1),dest_rank, send_tag, & MPI_COMM_HORIZ,send_request(1),ierr) + send_tag = 1010 + if (LUseT) call mpi_isend(a%st(a_n/2+1,1,0), 1,sub_interiorT(level,m-1),dest_rank, send_tag, & + MPI_COMM_HORIZ,send_requestT(1),ierr) #ifndef NDEBUG if (ierr .ne. 0) & call fatalerror("Distribute: send to NE failed in mpi_isend().") @@ -1353,9 +1407,13 @@ contains (/p_horiz(1)+stepsize,p_horiz(2)/), & dest_rank, & ierr) - send_tag = 1 - call mpi_isend(a%s(0,a_n/2+1,1),1,sub_interior(level,m-1), dest_rank, send_tag, & + send_tag = 1001 + if (LUseO) call mpi_isend(a%s(0,a_n/2+1,1),1,sub_interior(level,m-1), dest_rank, send_tag, & MPI_COMM_HORIZ, send_request(2), ierr) + send_tag = 1011 + if (LUseT) call mpi_isend(a%st(1,a_n/2+1,0),1,sub_interiorT(level,m-1), dest_rank, send_tag, & + MPI_COMM_HORIZ, send_requestT(2), ierr) + #ifndef NDEBUG if (ierr .ne. 0) & call fatalerror("Distribute: send to SW failed in mpi_isend().") @@ -1365,19 +1423,22 @@ contains (/p_horiz(1)+stepsize,p_horiz(2)+stepsize/), & dest_rank, & ierr) - send_tag = 2 - call mpi_isend(a%s(0,a_n/2+1,a_n/2+1),1,sub_interior(level,m-1), dest_rank, send_tag, & + send_tag = 1002 + if (LUseO) call mpi_isend(a%s(0,a_n/2+1,a_n/2+1),1,sub_interior(level,m-1), dest_rank, send_tag, & MPI_COMM_HORIZ, send_request(3), ierr) + send_tag = 1012 + if (LUseT) call mpi_isend(a%st(a_n/2+1,a_n/2+1,0),1,sub_interiorT(level,m-1), dest_rank, send_tag, & + MPI_COMM_HORIZ, send_requestT(3), ierr) #ifndef NDEBUG if (ierr .ne. 0) & call fatalerror("Distribute: send to SE failed in mpi_isend().") #endif ! While sending, copy local data - do iz=0,nz+1 - b%s(iz,1:b_n,1:b_n) = a%s(iz,1:b_n,1:b_n) - end do + if (LUseO) b%s(0:nz+1,1:b_n,1:b_n) = a%s(0:nz+1,1:b_n,1:b_n) + if (LUseT) b%st(1:b_n,1:b_n,0:nz+1) = a%st(1:b_n,1:b_n,0:nz+1) ! Only proceed when async sends to complete - call mpi_waitall(3, send_request, MPI_STATUSES_IGNORE, ierr) + if (LUseO) call mpi_waitall(3, send_request, MPI_STATUSES_IGNORE, ierr) + if (LUseT) call mpi_waitall(3, send_requestT, MPI_STATUSES_IGNORE, ierr) end if if ( corner_ne ) then @@ -1386,8 +1447,10 @@ contains (/p_horiz(1),p_horiz(2)-stepsize/), & source_rank, & ierr) - recv_tag = 0 - call mpi_recv(b%s(0,1,1),1,interior(level,m),source_rank,recv_tag,MPI_COMM_HORIZ,stat,ierr) + recv_tag = 1000 + if (LUseO) call mpi_recv(b%s(0,1,1),1,interior(level,m),source_rank,recv_tag,MPI_COMM_HORIZ,stat,ierr) + recv_tag = 1010 + if (LUseT) call mpi_recv(b%st(1,1,0),1,interiorT(level,m),source_rank,recv_tag,MPI_COMM_HORIZ,stat,ierr) #ifndef NDEBUG if (ierr .ne. 0) & call fatalerror("Distribute: receive on NE failed in mpi_recv().") @@ -1399,8 +1462,10 @@ contains (/p_horiz(1)-stepsize,p_horiz(2)/), & source_rank, & ierr) - recv_tag = 1 - call mpi_recv(b%s(0,1,1),1,interior(level,m),source_rank,recv_tag,MPI_COMM_HORIZ,stat,ierr) + recv_tag = 1001 + if (LUseO) call mpi_recv(b%s(0,1,1),1,interior(level,m),source_rank,recv_tag,MPI_COMM_HORIZ,stat,ierr) + recv_tag = 1011 + if (LUseT) call mpi_recv(b%st(1,1,0),1,interiorT(level,m),source_rank,recv_tag,MPI_COMM_HORIZ,stat,ierr) #ifndef NDEBUG if (ierr .ne. 0) & call fatalerror("Distribute: receive on SW failed in mpi_recv().") @@ -1412,8 +1477,10 @@ contains (/p_horiz(1)-stepsize,p_horiz(2)-stepsize/), & source_rank, & ierr) - recv_tag = 2 - call mpi_recv(b%s(0,1,1),1,interior(level,m),source_rank,recv_tag,MPI_COMM_HORIZ,stat,ierr) + recv_tag = 1002 + if (LUseO) call mpi_recv(b%s(0,1,1),1,interior(level,m),source_rank,recv_tag,MPI_COMM_HORIZ,stat,ierr) + recv_tag = 1012 + if (LUseT) call mpi_recv(b%st(1,1,0),1,interiorT(level,m),source_rank,recv_tag,MPI_COMM_HORIZ,stat,ierr) #ifndef NDEBUG if (ierr .ne. 0) & call fatalerror("Distribute: receive on NW failed in mpi_recv().")