From 763d448c702c3a82f6fc3c86225b489733f14d03 Mon Sep 17 00:00:00 2001 From: Juan Escobar <juan.escobar@aero.obs-mip.fr> Date: Wed, 18 Aug 2021 15:40:30 +0200 Subject: [PATCH] Juan 18/08/2021: add acc kernels to init routines --- .../datatypes.f90 | 7 +- .../multigrid.f90 | 35 ++-- .../profiles.f90 | 157 ++++++++++++------ 3 files changed, 132 insertions(+), 67 deletions(-) diff --git a/src/ZSOLVER/tensorproductmultigrid_Source/datatypes.f90 b/src/ZSOLVER/tensorproductmultigrid_Source/datatypes.f90 index 4ff30cd06..d0ddee6da 100644 --- a/src/ZSOLVER/tensorproductmultigrid_Source/datatypes.f90 +++ b/src/ZSOLVER/tensorproductmultigrid_Source/datatypes.f90 @@ -142,6 +142,8 @@ private ! direction integer, parameter :: dim_horiz = 2 ! horiz. dimension + real(kind=rl) , dimension(:,:,:), pointer , contiguous :: zphi_st + phi%grid_param = grid_param call mpi_comm_size(comm_horiz, nproc, ierr) nlocal = grid_param%n/sqrt(1.0*nproc) @@ -190,11 +192,12 @@ private phi%s(:,:,:) = 0.0_rl end if if (LUseT) then - allocate(phi%st(1-halo_size:nlocal+halo_size, & + allocate(zphi_st(1-halo_size:nlocal+halo_size, & 1-halo_size:nlocal+halo_size, & 0:grid_param%nz+1)) + phi%st => zphi_st !$acc kernels - phi%st(:,:,:) = 0.0_rl + zphi_st(:,:,:) = 0.0_rl !$acc end kernels end if diff --git a/src/ZSOLVER/tensorproductmultigrid_Source/multigrid.f90 b/src/ZSOLVER/tensorproductmultigrid_Source/multigrid.f90 index e642d1722..bb1a7d3f7 100644 --- a/src/ZSOLVER/tensorproductmultigrid_Source/multigrid.f90 +++ b/src/ZSOLVER/tensorproductmultigrid_Source/multigrid.f90 @@ -166,7 +166,7 @@ contains integer :: vertbc character(len=32) :: t_label - + real , dimension(:,:,:) , pointer , contiguous :: zxu_mg_st,zxb_mg_st,zxr_mg_st if (i_am_master_mpi) & write(STDOUT,*) '*** Initialising multigrid ***' @@ -272,18 +272,27 @@ contains endif if (LUseT) then - allocate(xu_mg(level,m)%st(1-halo_size:nlocal+halo_size, & - 1-halo_size:nlocal+halo_size, & - 0:nz+1)) - allocate(xb_mg(level,m)%st(1-halo_size:nlocal+halo_size, & - 1-halo_size:nlocal+halo_size, & - 0:nz+1)) - allocate(xr_mg(level,m)%st(1-halo_size:nlocal+halo_size, & - 1-halo_size:nlocal+halo_size, & - 0:nz+1)) - xu_mg(level,m)%st(:,:,:) = 0.0_rl - xb_mg(level,m)%st(:,:,:) = 0.0_rl - xr_mg(level,m)%st(:,:,:) = 0.0_rl + allocate(zxu_mg_st(1-halo_size:nlocal+halo_size, & + 1-halo_size:nlocal+halo_size, & + 0:nz+1)) + xu_mg(level,m)%st => zxu_mg_st + + allocate(zxb_mg_st(1-halo_size:nlocal+halo_size, & + 1-halo_size:nlocal+halo_size, & + 0:nz+1)) + xb_mg(level,m)%st => zxb_mg_st + + allocate(zxr_mg_st(1-halo_size:nlocal+halo_size, & + 1-halo_size:nlocal+halo_size, & + 0:nz+1)) + xr_mg(level,m)%st => zxr_mg_st + + + !$acc kernels + zxu_mg_st(:,:,:) = 0.0_rl + zxb_mg_st(:,:,:) = 0.0_rl + zxr_mg_st(:,:,:) = 0.0_rl + !$acc end kernels endif ! NB: 1st coordinate is in the y-direction of the processor grid, diff --git a/src/ZSOLVER/tensorproductmultigrid_Source/profiles.f90 b/src/ZSOLVER/tensorproductmultigrid_Source/profiles.f90 index 489210645..e20e0d221 100644 --- a/src/ZSOLVER/tensorproductmultigrid_Source/profiles.f90 +++ b/src/ZSOLVER/tensorproductmultigrid_Source/profiles.f90 @@ -65,23 +65,40 @@ private integer , optional, intent(in) :: KIB,KIE,KIU,KJB,KJE,KJU,KKU real(kind=rl) , optional, intent(in) :: PU(:,:,:) + real , dimension(:,:,:) , pointer , contiguous :: zu_st + ix_min = u%ix_min ix_max = u%ix_max iy_min = u%iy_min iy_max = u%iy_max IF (.NOT. PRESENT(KIB) ) THEN - ! Initialise RHS - do ix=ix_min, ix_max - do iy=iy_min, iy_max - do iz=1,u%grid_param%nz - - if (LUseO) u%s(iz,iy-iy_min+1,ix-ix_min+1) = 0.0_rl - if (LUseT) u%st(ix-ix_min+1,iy-iy_min+1,iz) = 0.0_rl - - end do - end do - end do - ELSE + ! Initialise RHS + if (LUseO) then + do ix=ix_min, ix_max + do iy=iy_min, iy_max + do iz=1,u%grid_param%nz + + u%s(iz,iy-iy_min+1,ix-ix_min+1) = 0.0_rl + + end do + end do + end do + end if + if (LUseT) then + zu_st => u%st + !$acc kernels loop independent collapse(3) + do iz=1,u%grid_param%nz + do iy=iy_min, iy_max + do ix=ix_min, ix_max + + zu_st(ix-ix_min+1,iy-iy_min+1,iz) = 0.0_rl + + end do + end do + end do + !$acc end kernels + end if + ELSE ! Initialise RHS if (LUseO) then do ix=ix_min, ix_max @@ -93,13 +110,16 @@ private end do end if if (LUseT) then + zu_st => u%st + !$acc kernels loop independent collapse(3) do iz=1,u%grid_param%nz do iy=iy_min, iy_max do ix=ix_min, ix_max - u%st(ix-ix_min+1,iy-iy_min+1,iz) = PU(IX-ix_min+KIB,IY-iy_min+KJB,IZ) + zu_st(ix-ix_min+1,iy-iy_min+1,iz) = PU(IX-ix_min+KIB,IY-iy_min+KJB,IZ) end do end do end do + !$acc end kernels end if END IF end subroutine initialise_u_mnh @@ -160,53 +180,86 @@ private integer , optional, intent(in) :: KIB,KIE,KIU,KJB,KJE,KJU,KKU real(kind=rl) , optional, intent(in) :: PY(:,:,:) + real , dimension(:,:,:) , pointer , contiguous :: zb_st + ix_min = b%ix_min ix_max = b%ix_max iy_min = b%iy_min iy_max = b%iy_max IF (.NOT. PRESENT(KIB) ) THEN ! Initialise RHS - do ix=ix_min, ix_max - do iy=iy_min, iy_max - do iz=1,b%grid_param%nz - x = 1.0_rl*((ix-0.5_rl)/(1.0_rl*b%grid_param%n)) - y = 1.0_rl*((iy-0.5_rl)/(1.0_rl*b%grid_param%n)) - z = 1.0_rl*((iz-0.5_rl)/(1.0_rl*b%grid_param%nz)) - - if (LUseO) b%s(iz,iy-iy_min+1,ix-ix_min+1) = 0.0_rl - if (LUseT) b%st(ix-ix_min+1,iy-iy_min+1,iz) = 0.0_rl - - if ( ( x .ge. 0.1_rl ) .and. ( x .le. 0.4_rl ) & - .and. (y .ge. 0.3_rl ) .and. ( y .le. 0.6_rl ) & - .and. (z .ge. 0.2_rl ) .and. ( z .le. 0.7_rl ) ) & - then - if (LUseO) b%s(iz,iy-iy_min+1,ix-ix_min+1) = 1.0_rl - if (LUseT) b%st(ix-ix_min+1,iy-iy_min+1,iz) = 1.0_rl - end if - - end do - end do - end do + if (LUseO) then + do ix=ix_min, ix_max + do iy=iy_min, iy_max + do iz=1,b%grid_param%nz + x = 1.0_rl*((ix-0.5_rl)/(1.0_rl*b%grid_param%n)) + y = 1.0_rl*((iy-0.5_rl)/(1.0_rl*b%grid_param%n)) + z = 1.0_rl*((iz-0.5_rl)/(1.0_rl*b%grid_param%nz)) + + b%s(iz,iy-iy_min+1,ix-ix_min+1) = 0.0_rl + + if ( ( x .ge. 0.1_rl ) .and. ( x .le. 0.4_rl ) & + .and. (y .ge. 0.3_rl ) .and. ( y .le. 0.6_rl ) & + .and. (z .ge. 0.2_rl ) .and. ( z .le. 0.7_rl ) ) & + then + + b%s(iz,iy-iy_min+1,ix-ix_min+1) = 1.0_rl + + end if + + end do + end do + end do + endif + if (LUseT) then + zb_st => b%st + !$acc kernels loop independent collapse(3) + do iz=1,b%grid_param%nz + do iy=iy_min, iy_max + do ix=ix_min, ix_max + x = 1.0_rl*((ix-0.5_rl)/(1.0_rl*b%grid_param%n)) + y = 1.0_rl*((iy-0.5_rl)/(1.0_rl*b%grid_param%n)) + z = 1.0_rl*((iz-0.5_rl)/(1.0_rl*b%grid_param%nz)) + + zb_st(ix-ix_min+1,iy-iy_min+1,iz) = 0.0_rl + + if ( ( x .ge. 0.1_rl ) .and. ( x .le. 0.4_rl ) & + .and. (y .ge. 0.3_rl ) .and. ( y .le. 0.6_rl ) & + .and. (z .ge. 0.2_rl ) .and. ( z .le. 0.7_rl ) ) & + then + + zb_st(ix-ix_min+1,iy-iy_min+1,iz) = 1.0_rl + + end if + + end do + end do + end do + !$acc end kernels + end if ELSE ! Initialise RHS - if (LUseO) then - do ix=ix_min, ix_max - do iy=iy_min, iy_max - do iz=1,b%grid_param%nz - b%s(iz,iy-iy_min+1,ix-ix_min+1) = PY(IX-ix_min+KIB,IY-iy_min+KJB,IZ) - end do - end do - end do - end if - if (LUseT) then - do iz=1,b%grid_param%nz - do iy=iy_min, iy_max - do ix=ix_min, ix_max - b%st(ix-ix_min+1,iy-iy_min+1,iz) = PY(IX-ix_min+KIB,IY-iy_min+KJB,IZ) - end do - end do - end do - end if + if (LUseO) then + do ix=ix_min, ix_max + do iy=iy_min, iy_max + do iz=1,b%grid_param%nz + b%s(iz,iy-iy_min+1,ix-ix_min+1) = PY(IX-ix_min+KIB,IY-iy_min+KJB,IZ) + end do + end do + end do + end if + if (LUseT) then + zb_st => b%st + !$acc kernels loop independent collapse(3) + do iz=1,b%grid_param%nz + do iy=iy_min, iy_max + do ix=ix_min, ix_max + zb_st(ix-ix_min+1,iy-iy_min+1,iz) = PY(IX-ix_min+KIB,IY-iy_min+KJB,IZ) + end do + end do + end do + !$acc end kernels + end if END IF end subroutine initialise_rhs_mnh !================================================================== -- GitLab