Skip to content
Snippets Groups Projects
Commit 40fa0e74 authored by WAUTELET Philippe's avatar WAUTELET Philippe
Browse files

Philippe 03/04/2023: contrav: partial merge from ZSOLVER/ version

parent 5d776203
No related branches found
No related tags found
No related merge requests found
......@@ -609,7 +609,7 @@ IJU= SIZE(PDXX,2)
IKU= SIZE(PDXX,3)
!Pin positions in the pools of MNH memory
CALL MNH_MEM_POSITION_PIN( 'CONTRAV_DEVICE 1' )
CALL MNH_MEM_POSITION_PIN( 'CONTRAV_DEVICE' )
CALL MNH_MEM_GET( Z1, IIU, IJU, IKU )
CALL MNH_MEM_GET( Z2, IIU, IJU, IKU )
......@@ -624,6 +624,41 @@ CALL GET_INDICE_ll( IIB,IJB,IIE,IJE)
IKB=1+JPVEXT
IKE=IKU - JPVEXT
CALL CONTRAV_DEVICE_DIM(&
PRUT,PRVT,PRWT,&
PDXX,PDYY,PDZZ,PDZX,PDZY,&
PRUCT,PRVCT,PRWCT,Z1,Z2)
! ZU_EAST,ZV_NORTH,ZDZX_EAST,ZDZY_NORTH)
!Release all memory allocated with MNH_MEM_GET calls since last call to MNH_MEM_POSITION_PIN
CALL MNH_MEM_RELEASE( 'CONTRAV_DEVICE' )
IF (MPPDB_INITIALIZED) THEN
!Check all OUT arrays
CALL MPPDB_CHECK(PRUCT,"CONTRAV end:PRUCT")
CALL MPPDB_CHECK(PRVCT,"CONTRAV end:PRVCT")
CALL MPPDB_CHECK(PRWCT,"CONTRAV end:PRWCT")
END IF
CONTAINS
SUBROUTINE CONTRAV_DEVICE_DIM(&
PRUT,PRVT,PRWT,&
PDXX,PDYY,PDZZ,PDZX,PDZY,&
PRUCT,PRVCT,PRWCT,Z1,Z2)
! ZU_EAST,ZV_NORTH,ZDZX_EAST,ZDZY_NORTH)
IMPLICIT NONE
REAL, DIMENSION(IIU, IJU, IKU) :: &
PRUT,PRVT,PRWT,&
PDXX,PDYY,PDZZ,PDZX,PDZY,&
PRUCT,PRVCT,PRWCT,Z1,Z2
! REAL :: ZU_EAST(IJU,IKU),ZV_NORTH(IIU,IKU),ZDZX_EAST(IJU,IKU),ZDZY_NORTH(IIU,IKU)
!
! Begin Compute
!
!$acc data present( PRUT, PRVT, PRWT, PDXX, PDYY, PDZZ, PDZX, PDZY, PRUCT, PRVCT, PRWCT, Z1, Z2 )
IF (GDATA_ON_DEVICE) THEN
......@@ -703,46 +738,31 @@ ELSE
! Z1(:,:,:) = 0.
! Z2(:,:,:) = 0.
!
CALL MPPDB_CHECK3DM("contrav_device :: init Z1/Z2",PRECISION,Z1,Z2)
!
IF (KADV_ORDER == 2 ) THEN
#ifdef MNH_OPENACC
call Print_msg( NVERB_WARNING, 'GEN', 'CONTRAV', 'OpenACC: KADV_ORDER=2 and LFLAT=.TRUE. not yet tested' )
#endif
!
!$acc kernels
!$acc loop independent collapse(3)
do jk = ikb, ike + 1
do jj = 1, iju
do ji = iib, iie
Z1(ji, jj, jk ) = ( PRUCT(ji, jj, jk ) + PRUCT(ji, jj, jk - 1 ) ) * PDZX (ji, jj, jk ) * 0.25 &
+ ( PRUCT(ji + 1, jj, jk ) + PRUCT(ji + 1, jj, jk - 1 ) ) * PDZX (ji + 1, jj, jk ) * 0.25
end do
end do
end do
!$acc loop independent collapse(3)
do jk = ikb, ike + 1
do jj = ijb, ije
do ji = 1, iiu
Z2(:, jj, jk ) = ( PRVCT(:, jj, jk) + PRVCT( :, jj, jk - 1) ) * PDZY(:, jj, jk) * 0.25 &
+ ( PRVCT(:, jj + 1, jk) + PRVCT( :, jj + 1,jk - 1) ) * PDZY(:, jj + 1, jk) * 0.25
end do
end do
end do
!
!$mnh_do_concurrent(ji=iib:iie,jj=1:iju,jk=ikb:ike+1)
Z1(ji, jj, jk ) = ( PRUCT(ji, jj, jk ) + PRUCT(ji, jj, jk - 1 ) ) * PDZX (ji, jj, jk ) * 0.25 &
+ ( PRUCT(ji + 1, jj, jk ) + PRUCT(ji + 1, jj, jk - 1 ) ) * PDZX (ji + 1, jj, jk ) * 0.25
!$mnh_end_do()
!$mnh_do_concurrent(ji=1:iiu,jj=ijb:ije,jk=ikb:ike+1)
Z2(ji, jj, jk ) = ( PRVCT(ji, jj, jk) + PRVCT( ji, jj, jk - 1) ) * PDZY(ji, jj, jk) * 0.25 &
+ ( PRVCT(ji, jj + 1, jk) + PRVCT( ji, jj + 1,jk - 1) ) * PDZY(ji, jj + 1, jk) * 0.25
!$mnh_end_do()
PRWCT(:,:,:)=0.
!$acc loop independent collapse(3)
do jk = ikb, ike + 1
do jj = ijb, ije
do ji = iib, iie
PRWCT(ji ,jj, jk ) = ( PRWT(ji ,jj, jk ) - Z1(ji ,jj, jk ) - Z2(ji ,jj, jk ) ) / PDZZ(ji ,jj, jk )
end do
end do
end do
!$acc end kernels
!$mnh_do_concurrent(ji=iib:iie,jj=ijb:ije,jk=ikb:ike+1)
PRWCT(ji ,jj, jk ) = ( PRWT(ji ,jj, jk ) - Z1(ji ,jj, jk ) - Z2(ji ,jj, jk ) ) / PDZZ(ji ,jj, jk )
!$mnh_end_do()
!
!$acc end kernels
ELSE IF (KADV_ORDER == 4 ) THEN
!$acc kernels present( zu_east, zv_north, zdzx_east, zdzy_north )
!
!!$ IF (NHALO == 1) THEN
IF ( GWEST ) THEN
......@@ -786,109 +806,108 @@ ELSE IF (KADV_ORDER == 4 ) THEN
!
!
!* 3.1 interior of the process subdomain
!$acc kernels
!
!
!PW: OpenACC remarks: *computing only ztmp2 and reusing it at next iteration works
! but ji loop can not be collapsed -> 10x slower on GPU
! *ztmp1 and ztmp2 are not necessary but improve readability (no impact on performance)
!$acc loop independent collapse(3) private(ztmp1, ztmp2)
do jk = IKB, IKE + 1
do jj = 1, iju
do ji = IW, IE
!$mnh_do_concurrent(ji=IW:IE,jj=1:iju,jk=IKB:IKE+1)
ztmp1 = ( 9.0 * PDZX(ji, jj, jk ) - ( PDZX(ji+1, jj, jk ) + PDZX(ji, jj, jk ) + PDZX(ji-1, jj, jk ) ) / 3.0 ) / 16.0
ztmp2 = ( 9.0 * PDZX(ji+1, jj, jk ) - ( PDZX(ji+2, jj, jk ) + PDZX(ji+1, jj, jk ) + PDZX(ji, jj, jk ) ) / 3.0 ) / 16.0
Z1(ji, jj, jk ) = 7.0 * ( ( PRUCT(ji, jj, jk ) + PRUCT(ji, jj, jk-1 ) ) * ztmp1 &
+ ( PRUCT(ji+1, jj, jk ) + PRUCT(ji+1, jj, jk-1 ) ) * ztmp2 ) / 12.0 &
- 0.5 * ( ( PRUCT(ji-1, jj, jk ) + PRUCT(ji-1, jj, jk-1 ) ) * PDZX(ji-1, jj, jk) &
+ ( PRUCT(ji+2, jj, jk ) + PRUCT(ji+2, jj, jk-1 ) ) * PDZX(ji+2, jj, jk) ) / 12.0
end do
end do
end do
!
!$acc loop independent collapse(3)
do jk = IKB, IKE + 1
do jj = is, in
do ji = 1, iiu
!$mnh_end_do()
!
!$mnh_do_concurrent(ji=1:iiu,jj=is:in,jk=IKB:IKE+1)
ztmp1 = ( 9.0 * PDZY(ji, jj, jk ) - ( PDZY(ji, jj+1, jk ) + PDZY(ji, jj, jk ) + PDZY(ji, jj-1, jk ) ) / 3.0 ) / 16.0
ztmp2 = ( 9.0 * PDZY(ji, jj+1, jk ) - ( PDZY(ji, jj+2, jk ) + PDZY(ji, jj+1, jk ) + PDZY(ji, jj, jk ) ) / 3.0 ) / 16.0
Z2(ji, jj, jk ) = 7.0 * ( ( PRVCT(ji, jj, jk ) + PRVCT(ji, jj, jk-1 ) ) * ztmp1 &
+ ( PRVCT(ji, jj+1, jk ) + PRVCT(ji, jj+1, jk-1 ) ) * ztmp2 ) / 12.0 &
- 0.5 * ( ( PRVCT(ji, jj-1, jk ) + PRVCT(ji, jj-1, jk-1 ) ) * PDZY(ji, jj-1, jk ) &
+ ( PRVCT(ji, jj+2, jk ) + PRVCT(ji, jj+2, jk-1 ) ) * PDZY(ji, jj+2, jk ) ) / 12.0
end do
end do
end do
!$mnh_end_do()
!$acc end kernels
!
!!$CALL MPPDB_CHECK3DM("contrav_device :: dom Z1/Z2",PRECISION,Z1,Z2)
!
!* 3.2 limits of the process subdomain (inside the whole domain or in cyclic conditions)
!
!!$ IF (NHALO==1) THEN
!$acc loop independent collapse(2)
do jk = IKB, IKE + 1
do jj = 1, iju
!$acc kernels async
!$mnh_do_concurrent(jj=1:iju,jk=IKB:IKE+1)
ztmp1 = ( 9.0 * PDZX(IIE, jj, jk ) - ( PDZX(IIE+1, jj, jk ) + PDZX(IIE, jj, jk ) + PDZX(IIE-1, jj, jk ) ) / 3.0 ) / 16.0
ztmp2 = ( 9.0 * PDZX(IIE+1, jj, jk ) - ( ZDZX_EAST(jj, jk ) + PDZX(IIE+1, jj, jk ) + PDZX(IIE, jj, jk ) ) / 3.0 ) / 16.0
Z1(IIE, jj, jk ) = 7.0 * ( ( PRUCT(IIE, jj, jk ) + PRUCT(IIE, jj, jk-1 ) ) * ztmp1 &
+ ( PRUCT(IIE+1, jj, jk ) + PRUCT(IIE+1, jj, jk-1 ) ) * ztmp2 ) / 12.0 &
- 0.5 * ( ( PRUCT(IIE-1, jj, jk ) + PRUCT(IIE-1, jj, jk-1 ) ) * PDZX(IIE-1, jj, jk) &
+ ( ZU_EAST (jj, jk ) + ZU_EAST (jj, jk-1 ) ) * ZDZX_EAST (jj, jk) ) / 12.0
end do
end do
!$mnh_end_do()
!$acc end kernels
!
!$acc loop independent collapse(2)
do jk = IKB, IKE + 1
do ji = 1, iiu
!$acc kernels async
!$mnh_do_concurrent(ji=1:iiu,jk=IKB:IKE+1)
ztmp1 = ( 9.0 * PDZY(ji, IJE, jk) - ( PDZY (ji, IJE+1, jk) + PDZY(ji, IJE, jk) + PDZY(ji, IJE-1, jk) ) / 3.0 ) / 16.0
ztmp2 = ( 9.0 * PDZY(ji, IJE+1, jk) - ( ZDZY_NORTH(ji, jk) + PDZY(ji, IJE+1, jk) + PDZY(ji, IJE, jk) ) / 3.0 ) / 16.0
Z2(ji, IJE, jk ) = 7.0 * ( ( PRVCT (ji, IJE, jk ) + PRVCT (ji, IJE, jk-1 ) ) * ztmp1 &
+ ( PRVCT (ji, IJE+1, jk ) + PRVCT (ji, IJE+1, jk-1 ) ) * ztmp2 ) / 12.0 &
- 0.5 * ( ( PRVCT (ji, IJE-1, jk ) + PRVCT (ji, IJE-1, jk-1 ) ) * PDZY (ji, IJE-1, jk ) &
+ ( ZV_NORTH(ji, jk ) + ZV_NORTH(ji, jk-1 ) ) * ZDZY_NORTH(ji, jk ) ) / 12.0
end do
end do
!$mnh_end_do()
!$acc end kernels
!$acc wait
!!$ END IF
!
!* 3.3 non-CYCLIC CASE IN THE X DIRECTION: 2nd order case
!
IF ( GWEST ) THEN
IF ( GWEST ) THEN
!$acc kernels async
Z1(IIB, :, IKB:IKE+1 ) = ( PRUCT(IIB, :, IKB:IKE+1 ) + PRUCT(IIB, :, IKB-1:IKE ) ) * PDZX(IIB, :, IKB:IKE+1 ) * 0.25 &
+ ( PRUCT(IIB+1, :, IKB:IKE+1 ) + PRUCT(IIB+1, :, IKB-1:IKE ) ) * PDZX(IIB+1, :, IKB:IKE+1 ) * 0.25
!$acc end kernels
END IF
!
IF ( GEAST ) THEN
!$acc kernels async
Z1(IIE, :, IKB:IKE+1 ) = ( PRUCT(IIE, :, IKB:IKE+1 ) + PRUCT(IIE, :, IKB-1:IKE ) ) * PDZX(IIE, :, IKB:IKE+1 ) * 0.25 &
+ ( PRUCT(IIE+1, :, IKB:IKE+1 ) + PRUCT(IIE+1, :, IKB-1:IKE ) ) * PDZX(IIE+1, :, IKB:IKE+1 ) * 0.25
END IF
!$acc end kernels
END IF
!
!* 3.4 non-CYCLIC CASE IN THE Y DIRECTION: 2nd order case
!
IF ( GSOUTH ) THEN
IF ( GSOUTH ) THEN
!$acc kernels async
Z2(:, IJB, IKB:IKE+1 ) = ( PRVCT(:, IJB, IKB:IKE+1 ) + PRVCT(:, IJB, IKB-1:IKE ) ) * PDZY(:, IJB, IKB:IKE+1 ) * 0.25 &
+ ( PRVCT(:, IJB+1, IKB:IKE+1 ) + PRVCT(:, IJB+1, IKB-1:IKE ) ) * PDZY(:, IJB+1, IKB:IKE+1 ) * 0.25
END IF
!$acc end kernels
END IF
!
IF ( GNORTH ) THEN
IF ( GNORTH ) THEN
!$acc kernels async
Z2(:, IJE, IKB:IKE+1 ) = ( PRVCT(:, IJE, IKB:IKE+1 ) + PRVCT(:, IJE, IKB-1:IKE ) ) * PDZY(:, IJE, IKB:IKE+1 ) * 0.25 &
+ ( PRVCT(:, IJE+1, IKB:IKE+1 ) + PRVCT(:, IJE+1, IKB-1:IKE ) ) * PDZY(:, IJE+1, IKB:IKE+1 ) * 0.25
END IF
!$acc end kernels
END IF
!$acc wait
!
!* 3.5 Vertical contyravariant wind
!
!
!$acc kernels
!!$ CALL GET_HALO(Z1)
!!$ CALL GET_HALO(Z2)
!!$
!!$ CALL MPPDB_CHECK3DM("contrav_device ::Z1/Z2/ PDZZ",PRECISION,Z1,Z2,PDZZ)
PRWCT(:,:,:)=0.
!$acc loop independent collapse(3)
do jk = ikb, ike + 1
do jj = ijb, ije
do ji = iib, iie
!$mnh_do_concurrent (ji=iib:iie,jj=ijb:ije,jk=ikb:ike+1)
PRWCT(ji ,jj, jk ) = ( PRWT(ji ,jj, jk ) - Z1(ji ,jj, jk ) - Z2(ji ,jj, jk ) ) / PDZZ(ji ,jj, jk )
end do
end do
end do
!$mnh_end_do()
!$acc end kernels
!
CALL MPPDB_CHECK3DM("contrav_device :: PRWCT/Z1/Z2",PRECISION,PRWCT,Z1,Z2)
!
END IF
!
......@@ -915,15 +934,11 @@ END IF FLAT
!$acc end data
!Release all memory allocated with MNH_MEM_GET calls since last call to MNH_MEM_POSITION_PIN
CALL MNH_MEM_RELEASE( 'CONTRAV_DEVICE 1' )
!
! End Compute
!
IF (MPPDB_INITIALIZED) THEN
!Check all OUT arrays
CALL MPPDB_CHECK(PRUCT,"CONTRAV end:PRUCT")
CALL MPPDB_CHECK(PRVCT,"CONTRAV end:PRVCT")
CALL MPPDB_CHECK(PRWCT,"CONTRAV end:PRWCT")
END IF
END SUBROUTINE CONTRAV_DEVICE_DIM
!-----------------------------------------------------------------------
END SUBROUTINE CONTRAV_DEVICE
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment