From f5613bcccf551bc890241d2b45db0980d46dbd00 Mon Sep 17 00:00:00 2001 From: Juan Escobar <escj@aero.obs-mip.fr> Date: Tue, 16 Nov 2021 12:27:48 +0100 Subject: [PATCH] Juan 16/11/2021:ZSOLVER, add routine zt1d_discretisation_allocate3d , for fast GPU memory allocation in MutiGrid --- .../discretisation.f90 | 212 +++++++++++++----- .../mg_main_mnh.f90 | 4 +- 2 files changed, 162 insertions(+), 54 deletions(-) diff --git a/src/ZSOLVER/tensorproductmultigrid_Source/discretisation.f90 b/src/ZSOLVER/tensorproductmultigrid_Source/discretisation.f90 index 1bd6d51d8..b81b9dc95 100644 --- a/src/ZSOLVER/tensorproductmultigrid_Source/discretisation.f90 +++ b/src/ZSOLVER/tensorproductmultigrid_Source/discretisation.f90 @@ -135,6 +135,29 @@ private ! Stoarge for vertical coefficients type(vertical_coefficients) :: vert_coeff + type Temp_jacobi + real(kind=rl), pointer , contiguous :: r(:) + real(kind=rl), pointer , contiguous :: c(:), utmp(:) + real(kind=rl), pointer , contiguous :: u0(:,:,:) + real(kind=rl), pointer , contiguous :: ut0(:,:,:) + type(scalar3d) , pointer :: Sr,Sc,Sut0,Sutmp + end type Temp_jacobi + + integer , parameter :: max_lev = 128 + type (Temp_jacobi) , save , dimension(max_lev) :: Tjacobi + + real(kind=rl), pointer , contiguous , dimension(:) :: zt1d_discretisation + INTEGER :: zt1d_discretisation_size = 10000000 + INTEGER , parameter :: zt1d_discretisation_size_factor = 2 * 4 + INTEGER :: nt1d_discretisation_top = 0 , nt1d_discretisation_max = 0 + + INTEGER , ALLOCATABLE, DIMENSION (:) :: nt1d_discretisation_pointer , nt1d_discretisation_pointer_ksize + INTEGER , parameter :: zt1d_discretisation_pointer_size = 128 + INTEGER :: nt1d_discretisation_pointer_top = 0 , nt1d_discretisation_pointer_max = 0 + + logical :: gfirst_call_zt1d_discretisation = .true. + + public::discretisation_initialise_mnh public::discretisation_initialise public::discretisation_finalise @@ -158,12 +181,91 @@ public::DIRECTION_FORWARD public::DIRECTION_BACKWARD public::ORDERING_LEX public::ORDERING_RB +public::zt1d_discretisation_init contains !================================================================== ! Initialise module !================================================================== + + subroutine zt1d_discretisation_init(kiu,kju,kku) + implicit none + + integer , optional , intent(in) :: kiu,kju,kku ! size of 1 3D MNH array + + if (gfirst_call_zt1d_discretisation) then + ! + gfirst_call_zt1d_discretisation = .false. + ! + if (present(kiu)) then + zt1d_discretisation_size = kiu*kju*kku * zt1d_discretisation_size_factor + end if + allocate(zt1d_discretisation(zt1d_discretisation_size)) + zt1d_discretisation = 0.0 + !$acc enter data copyin(zt1d_discretisation) + ! + allocate(nt1d_discretisation_pointer(zt1d_discretisation_pointer_size)) + nt1d_discretisation_pointer = 0.0 + allocate(nt1d_discretisation_pointer_ksize(zt1d_discretisation_pointer_size)) + nt1d_discretisation_pointer_ksize = 0.0 + ! + end if + + end subroutine zt1d_discretisation_init + + function zt1d_discretisation_allocate3d(ptab3d,kib,kie,kjb,kje,kkb,kke) result(kindex) + implicit none + real(kind=rl), pointer , contiguous , dimension(:,:,:) :: ptab3d + !real(kind=rl), pointer , dimension(:,:,:) :: ptab3d + integer , intent(in) :: kib,kie,kjb,kje,kkb,kke + integer :: kindex + + ! local var + + integer :: ksize, kbeg, kend + + if ( nt1d_discretisation_pointer_top == zt1d_discretisation_pointer_size ) then + print*,"ERROR zt1d_discretisation_init:: zt1d_discretisation_pointer_size to small=",zt1d_discretisation_pointer_size + print*,"ERROR zt1d_discretisation_init:: Augment it " + stop + end if + + ksize = (kie-kib+1)*(kje-kjb+1)*(kke-kkb+1) + + kbeg = nt1d_discretisation_top + 1 + kend = nt1d_discretisation_top + ksize + + if ( kend > zt1d_discretisation_size ) then + print*,"ERROR zt1d_discretisation_allocate3d:: zt1d_discretisation_size to small=",zt1d_discretisation_size + print*,"ERROR zt1d_discretisation_allocate3d:: Augment it " + stop + end if + + ptab3d(kib:kie,kjb:kje,kkb:kke) => zt1d_discretisation(kbeg:) + + nt1d_discretisation_pointer_top = nt1d_discretisation_pointer_top + 1 + nt1d_discretisation_top = kend + + nt1d_discretisation_pointer(nt1d_discretisation_pointer_top) = kend + nt1d_discretisation_pointer_ksize(nt1d_discretisation_pointer_top) = ksize + + if ( nt1d_discretisation_pointer_top > nt1d_discretisation_pointer_max ) then + nt1d_discretisation_pointer_max = nt1d_discretisation_pointer_top + print*,"zt1d_discretisation_allocate3d:: nt1d_discretisation_pointer_max=",nt1d_discretisation_pointer_max + call flush(6) + endif + + if ( nt1d_discretisation_top > nt1d_discretisation_max ) then + nt1d_discretisation_max = nt1d_discretisation_top + print*,"zt1d_discretisation_allocate3d:: nt1d_discretisation_max=",nt1d_discretisation_max + call flush(6) + endif + + end function zt1d_discretisation_allocate3d + + + subroutine discretisation_initialise_mnh(grid_param_in, & model_param_in, & smoother_param_in, & @@ -1326,9 +1428,8 @@ end subroutine construct_vertical_coeff integer :: iib,iie,ijb,ije - real , dimension(:,:,:) , pointer , contiguous :: zSut0_st , zu_st + real , dimension(:,:,:) , pointer , contiguous :: zSut0_st , zu_st, zSr_st, zSutmp_st - integer , parameter :: max_lev = 128 logical , save :: Ljacob_first_call = .true. logical , save , dimension(max_lev) :: Ljacob_first_call_level = .true. @@ -1338,15 +1439,9 @@ end subroutine construct_vertical_coeff real(kind=rl), pointer , contiguous :: ut0(:,:,:) type(scalar3d) , pointer :: Sr,Sc,Sut0,Sutmp - type Temp_jacobi - real(kind=rl), pointer , contiguous :: r(:) - real(kind=rl), pointer , contiguous :: c(:), utmp(:) - real(kind=rl), pointer , contiguous :: u0(:,:,:) - real(kind=rl), pointer , contiguous :: ut0(:,:,:) - type(scalar3d) , pointer :: Sr,Sc,Sut0,Sutmp - end type Temp_jacobi - - type (Temp_jacobi) , save , dimension(max_lev) :: Tjacobi + integer :: nib,nie,njb,nje,nkb,nke + integer :: ii,ij,ik + integer :: iindex ! ! init size , param @@ -1354,7 +1449,14 @@ end subroutine construct_vertical_coeff nz = u%grid_param%nz nlocal = u%ix_max -u%ix_min + 1 halo_size = u%halo_size - + + nib = 1-halo_size + nie = nlocal+halo_size + njb = 1-halo_size + nje = nlocal+halo_size + nkb = 0 + nke = nz + 1 + ! Set optimal smoothing parameter on each level !rho = 2.0_rl/(2.0_rl+4.0_rl*model_param%omega2*u%grid_param%n**2/(1.0_rl+4.0_rl*model_param%omega2*u%grid_param%n**2)) rho = smoother_param%rho @@ -1369,42 +1471,62 @@ end subroutine construct_vertical_coeff !$acc enter data create(Tjacobi(level)%u0) end if if (LUseT) then - allocate(Tjacobi(level)%ut0(1-halo_size:nlocal+halo_size, & - 1-halo_size:nlocal+halo_size, & - 0:u%grid_param%nz+1) ) - !$acc enter data create(Tjacobi(level)%ut0) + print*,"allocate(ut0),nib,nie,njb,nje,nkb,nke,level",nib,nie,njb,nje,nkb,nke, level +!!$ allocate(Tjacobi(level)%ut0(1-halo_size:nlocal+halo_size, & +!!$ 1-halo_size:nlocal+halo_size, & +!!$ 0:u%grid_param%nz+1) ) +!!$ ut0 => Tjacobi(level)%ut0 + +!!$ allocate(ut0(nib:nie,njb:nje,nkb:nke) ) +!!$ !$acc enter data create(ut0) + iindex = zt1d_discretisation_allocate3d(ut0,nib,nie,njb,nje,nkb,nke) + + Tjacobi(level)%ut0 => ut0 + end if ! Create residual vector allocate(Tjacobi(level)%r(nz)) - !$acc enter data create(Tjacobi(level)%r) + r => Tjacobi(level)%r + !$acc enter data create(r) ! Allocate memory for auxiliary vectors for Thomas algorithm allocate(Tjacobi(level)%c(nz)) - !$acc enter data create(Tjacobi(level)%c) + c => Tjacobi(level)%c + !$acc enter data create(c) allocate(Tjacobi(level)%utmp(nz)) - !$acc enter data create(Tjacobi(level)%utmp) + utmp => Tjacobi(level)%utmp + !$acc enter data create(utmp) if (LUseT) then allocate(Tjacobi(level)%Sr) - allocate(Tjacobi(level)%Sr%st(1-halo_size:nlocal+halo_size, & - 1-halo_size:nlocal+halo_size, & - 0:u%grid_param%nz+1) ) - !$acc enter data create(Tjacobi(level)%Sr%st) +!!$ allocate(Tjacobi(level)%Sr%st(1-halo_size:nlocal+halo_size, & +!!$ 1-halo_size:nlocal+halo_size, & +!!$ 0:u%grid_param%nz+1) ) +!!$ zSr_st => Tjacobi(level)%Sr%st +!!$ !$acc enter data create(zSr_st) + iindex = zt1d_discretisation_allocate3d(zSr_st,nib,nie,njb,nje,nkb,nke) + Tjacobi(level)%Sr%st => zSr_st + allocate(Tjacobi(level)%Sut0) - allocate(Tjacobi(level)%Sut0%st(1-halo_size:nlocal+halo_size, & - 1-halo_size:nlocal+halo_size, & - 0:u%grid_param%nz+1) ) - !$acc enter data create(Tjacobi(level)%Sut0%st) +!!$ allocate(Tjacobi(level)%Sut0%st(1-halo_size:nlocal+halo_size, & +!!$ 1-halo_size:nlocal+halo_size, & +!!$ 0:u%grid_param%nz+1) ) +!!$ zSut0_st => Tjacobi(level)%Sut0%st +!!$ !$acc enter data create(zSut0_st) + iindex = zt1d_discretisation_allocate3d(zSut0_st,nib,nie,njb,nje,nkb,nke) + Tjacobi(level)%Sut0%st => zSut0_st + allocate(Tjacobi(level)%Sutmp) - allocate(Tjacobi(level)%Sutmp%st(1-halo_size:nlocal+halo_size, & - 1-halo_size:nlocal+halo_size, & - 0:u%grid_param%nz+1) ) - !$acc enter data create(Tjacobi(level)%Sutmp%st) +!!$ allocate(Tjacobi(level)%Sutmp%st(1-halo_size:nlocal+halo_size, & +!!$ 1-halo_size:nlocal+halo_size, & +!!$ 0:u%grid_param%nz+1) ) +!!$ zSutmp_st => Tjacobi(level)%Sutmp%st +!!$ !$acc enter data create(zSutmp_st) + iindex = zt1d_discretisation_allocate3d(zSutmp_st,nib,nie,njb,nje,nkb,nke) + Tjacobi(level)%Sutmp%st => zSutmp_st end if end if call boundary_mnh(u) - - #ifdef OVERLAPCOMMS overlap_comms = (nlocal > 2) #else @@ -1448,40 +1570,27 @@ end subroutine construct_vertical_coeff ! Temporary vector if (LUseO) then -!!$ allocate(u0(0:u%grid_param%nz+1, & -!!$ 1-halo_size:nlocal+halo_size, & -!!$ 1-halo_size:nlocal+halo_size) ) u0 => Tjacobi(level)%u0 end if if (LUseT) then -!!$ allocate(ut0(1-halo_size:nlocal+halo_size, & -!!$ 1-halo_size:nlocal+halo_size, & -!!$ 0:u%grid_param%nz+1) ) ut0 => Tjacobi(level)%ut0 end if if (LUseO) u0(:,:,:) = u%s(:,:,:) if (LUseT) then zu_st => u%st - !$acc kernels - ut0(:,:,:) = zu_st(:,:,:) + !$acc kernels loop independent collapse(3) + do concurrent (ii=nib:nie,ij=njb:nje,ik=nkb:nke) + ut0(ii,ij,ik) = zu_st(ii,ij,ik) + end do !$acc end kernels end if ! Create residual vector -!!$ allocate(r(nz)) r => Tjacobi(level)%r ! Allocate memory for auxiliary vectors for Thomas algorithm -!!$ allocate(c(nz)) c => Tjacobi(level)%c -!!$ allocate(utmp(nz)) utmp => Tjacobi(level)%utmp if (LUseT) then -!!$ allocate(Sr%st(1-halo_size:nlocal+halo_size, & -!!$ 1-halo_size:nlocal+halo_size, & -!!$ 0:u%grid_param%nz+1) ) Sr => Tjacobi(level)%Sr -!!$ allocate(Sut0%st(1-halo_size:nlocal+halo_size, & -!!$ 1-halo_size:nlocal+halo_size, & -!!$ 0:u%grid_param%nz+1) ) Sut0 => Tjacobi(level)%Sut0 zSut0_st => Sut0%st @@ -1491,9 +1600,6 @@ end subroutine construct_vertical_coeff zSut0_st(:,:,:) = zu_st(:,:,:) !$acc end kernels -!!$ allocate(Sutmp%st(1-halo_size:nlocal+halo_size, & -!!$ 1-halo_size:nlocal+halo_size, & -!!$ 0:u%grid_param%nz+1) ) Sutmp => Tjacobi(level)%Sutmp endif diff --git a/src/ZSOLVER/tensorproductmultigrid_Source/mg_main_mnh.f90 b/src/ZSOLVER/tensorproductmultigrid_Source/mg_main_mnh.f90 index 219993168..e20657cfe 100644 --- a/src/ZSOLVER/tensorproductmultigrid_Source/mg_main_mnh.f90 +++ b/src/ZSOLVER/tensorproductmultigrid_Source/mg_main_mnh.f90 @@ -73,6 +73,7 @@ contains real(kind=rl) , optional , intent (in) :: PL,PH real(kind=rl) , optional , intent (in) :: PA_K(:),PB_K(:),PC_K(:),PD_K(:) + call mg_init_mnh(KN,KNZ,PL,PH,PA_K,PB_K,PC_K,PD_K) ! Initialise ghosts in initial solution, as mg_solve assumes that they @@ -113,6 +114,7 @@ implicit none integer , optional , intent(in) :: KIB,KIE,KIU,KJB,KJE,KJU,KKU real(kind=rl) , optional , intent(in) :: PY(:,:,:) + call zt1d_discretisation_init(KIU,KJU,KKU) call initialise_rhs_mnh(grid_param,model_param,xb_fine,KIB,KIE,KIU,KJB,KJE,KJU,KKU,PY) call haloswap_mnh(mg_param%n_lev,pproc,xb_fine) @@ -179,7 +181,7 @@ end subroutine mg_main_get_u_mnh call mg_main_initialise_u_mnh() if (i_am_master_mpi) then - write(STDOUT,*),'************************ IT=',it,' ***************************' + write(STDOUT,*) '************************ IT=',it,' ***************************' call flush(STDOUT) end if -- GitLab