From fc613008cf3cfbfe8ddbf5d25ce3a345e2ac7395 Mon Sep 17 00:00:00 2001 From: Philippe WAUTELET <philippe.wautelet@aero.obs-mip.fr> Date: Wed, 26 Jun 2019 15:08:13 +0200 Subject: [PATCH] Philippe 26/06/2019: OPENACC: optimisation for GPU + improve readability --- src/MNH/contrav.f90 | 256 +++++++++++++++++++++----------------------- 1 file changed, 124 insertions(+), 132 deletions(-) diff --git a/src/MNH/contrav.f90 b/src/MNH/contrav.f90 index b0c14210a..4a1d6da58 100644 --- a/src/MNH/contrav.f90 +++ b/src/MNH/contrav.f90 @@ -316,7 +316,7 @@ ELSE IF (KADV_ORDER == 4 ) THEN !!$ END IF ! ! - !* 3.1 interior of the processor subdomain + !* 3.1 interior of the process subdomain ! ! Z1(IW:IE,:,IKB:IKE+1)= & @@ -344,7 +344,7 @@ ELSE IF (KADV_ORDER == 4 ) THEN +(PRVCT(:,IS+2:IN+2,IKB:IKE+1)+PRVCT(:,IS+2:IN+2,IKB-1:IKE)) & *PDZY(:,IS+2:IN+2,IKB:IKE+1) *0.5)/12.0 ! -!* 3.2 limits of the processor subdomain (inside the whole domain or in cyclic conditions) +!* 3.2 limits of the process subdomain (inside the whole domain or in cyclic conditions) ! !!$ IF (NHALO==1) THEN @@ -519,6 +519,7 @@ END SUBROUTINE CONTRAV !! Corrections 19/01/11 (by J.P. Pinty) WC 4th order !! Corrections 28/03/11 (by V.Masson) // of WC 4th order !! J.Escobar 21/03/2013: for HALOK comment all NHALO=1 test +! P. Wautelet 26/06/2019: optimisation for GPU + improve readability !---------------------------------------------------------------------------- ! !* 0. DECLARATIONS @@ -562,22 +563,22 @@ REAL, DIMENSION(:,:,:), INTENT(OUT) :: PRWCT ! Contrav comp along z-bar INTEGER, INTENT(IN) :: KADV_ORDER ! Order of the advection scheme REAL, DIMENSION(:,:,:), INTENT(OUT) :: Z1,Z2 ! Work arrays !$acc declare present(Z1,Z2) -LOGICAL, OPTIONAL, INTENT(IN) :: ODATA_ON_DEVICE ! Is some of the data on the accelerator device +LOGICAL, OPTIONAL, INTENT(IN) :: ODATA_ON_DEVICE ! Is some of the data on the accelerator device ! ! !* 0.2 declarations of local variables ! -REAL, DIMENSION(:,:),ALLOCATABLE::ZU_EAST,ZV_NORTH,ZDZX_EAST,ZDZY_NORTH -!$acc declare create(ZU_EAST,ZV_NORTH,ZDZX_EAST,ZDZY_NORTH) -INTEGER :: IIB,IIE,IJB,IJE,IKB,IKE -INTEGER :: IIU, IJU, IKU -INTEGER:: IW,IE,IS,IN ! Coordinate of forth order diffusion area -! -TYPE(LIST_ll), POINTER :: TZFIELD_U, TZFIELD_V, TZFIELD_DZX, TZFIELD_DZY -TYPE(HALO2LIST_ll), POINTER :: TZHALO2_U, TZHALO2_V, TZHALO2_DZX, TZHALO2_DZY -INTEGER :: IINFO_ll -LOGICAL :: GDATA_ON_DEVICE - +integer :: ji, jj, jk +INTEGER :: IIB, IIE, IJB, IJE, IKB, IKE +INTEGER :: IIU, IJU, IKU +INTEGER :: IW, IE, IS, IN ! Coordinate of fourth order diffusion area +INTEGER :: IINFO_ll +LOGICAL :: GDATA_ON_DEVICE +real :: ZTMP1, ZTMP2 ! Intermediate work variables +REAL, DIMENSION(:,:), ALLOCATABLE :: ZU_EAST, ZV_NORTH, ZDZX_EAST, ZDZY_NORTH +!$acc declare create(ZU_EAST, ZV_NORTH, ZDZX_EAST, ZDZY_NORTH) +TYPE(LIST_ll), POINTER :: TZFIELD_U, TZFIELD_V, TZFIELD_DZX, TZFIELD_DZY +TYPE(HALO2LIST_ll), POINTER :: TZHALO2_U, TZHALO2_V, TZHALO2_DZX, TZHALO2_DZY IF ( PRESENT(ODATA_ON_DEVICE) ) THEN GDATA_ON_DEVICE = ODATA_ON_DEVICE @@ -612,7 +613,7 @@ IKE=IKU - JPVEXT ! IF (GDATA_ON_DEVICE) THEN !PW TODO:remplacer (ailleurs aussi...) 1/PDXX... par PINV_PDXX (fait pour la turbulence...) cfr MNH/turb_hor_splt.f90 -!$acc kernels present(PRUCT,PRVCT,PRUT,PRVT,PDXX,PDYY) +!$acc kernels PRUCT(:,:,:) = PRUT(:,:,:) / PDXX(:,:,:) PRVCT(:,:,:) = PRVT(:,:,:) / PDYY(:,:,:) !$acc end kernels @@ -656,7 +657,7 @@ IF (KADV_ORDER == 4 ) THEN ZDZX_EAST(:,:) = TZHALO2_DZX%HALO2%EAST ZV_NORTH(:,:) = TZHALO2_V%HALO2%NORTH ZDZY_NORTH(:,:) = TZHALO2_DZY%HALO2%NORTH - !$acc update device(ZU_EAST,ZV_NORTH,ZDZX_EAST,ZDZY_NORTH) +!$acc update device(ZU_EAST,ZV_NORTH,ZDZX_EAST,ZDZY_NORTH) END IF END IF ! @@ -666,7 +667,7 @@ END IF ! IF (LFLAT) THEN IF (GDATA_ON_DEVICE) THEN -!$acc kernels present(PRWCT,PRWT,PDZZ) +!$acc kernels PRWCT(:,:,:) = PRWT(:,:,:) / PDZZ(:,:,:) !$acc end kernels !$acc update self(PRWCT) @@ -679,11 +680,9 @@ END IF !* 3. Compute the vertical contravariant components (general case) ! ------------------------------------ ! -!$acc kernels present(Z1,Z2) present(PRUCT,PRVCT,PRWCT) present(PDXX,PDYY,PDZZ,PDZX,PDZY) & -!$acc & present(ZU_EAST,ZV_NORTH,ZDZX_EAST,ZDZY_NORTH) -!PW:TODO:initialize Z1,Z2,PRWCT only if necessary (or part of it) -Z1(:,:,:) = 0. -Z2(:,:,:) = 0. +!$acc kernels +! Z1(:,:,:) = 0. +! Z2(:,:,:) = 0. ! IF (KADV_ORDER == 2 ) THEN #ifdef _OPENACC @@ -691,26 +690,36 @@ PRINT *,'OPENACC: contrav::KADV_ORDER=2 and LFLAT=.TRUE. not yet tested' CALL ABORT #endif ! -!PW:TODO: not parallelized with openacc - Z1(IIB:IIE,:,IKB:IKE+1)= & - (PRUCT(IIB:IIE,:,IKB:IKE+1)+PRUCT(IIB:IIE,:,IKB-1:IKE) ) & - *PDZX(IIB:IIE,:,IKB:IKE+1) *0.25 & - +(PRUCT(IIB+1:IIE+1,:,IKB:IKE+1)+PRUCT(IIB+1:IIE+1,:,IKB-1:IKE) ) & - *PDZX(IIB+1:IIE+1,:,IKB:IKE+1) *0.25 - -!PW:TODO: not parallelized with openacc - Z2(:,IJB:IJE,IKB:IKE+1)= & - (PRVCT(:,IJB:IJE,IKB:IKE+1)+PRVCT(:,IJB:IJE,IKB-1:IKE) ) & - *PDZY(:,IJB:IJE,IKB:IKE+1) *0.25 & - +(PRVCT(:,IJB+1:IJE+1,IKB:IKE+1)+PRVCT(:,IJB+1:IJE+1,IKB-1:IKE) ) & - *PDZY(:,IJB+1:IJE+1,IKB:IKE+1) *0.25 +!$acc loop independent collapse(3) + do jk = ikb, ike + 1 + do jj = 1, iju + do ji = iib, iie + Z1(ji, jj, jk ) = ( PRUCT(ji, jj, jk ) + PRUCT(ji, jj, jk - 1 ) ) * PDZX (ji, jj, jk ) * 0.25 & + + ( PRUCT(ji + 1, jj, jk ) + PRUCT(ji + 1, jj, jk - 1 ) ) * PDZX (ji + 1, jj, jk ) * 0.25 + end do + end do + end do + +!$acc loop independent collapse(3) + do jk = ikb, ike + 1 + do jj = ijb, ije + do ji = 1, iiu + Z2(:, jj, jk ) = ( PRVCT(:, jj, jk) + PRVCT( :, jj, jk - 1) ) * PDZY(:, jj, jk) * 0.25 & + + ( PRVCT(:, jj + 1, jk) + PRVCT( :, jj + 1,jk - 1) ) * PDZY(:, jj + 1, jk) * 0.25 + end do + end do + end do + PRWCT(:,:,:)=0. -!PW:TODO: not parallelized with openacc - PRWCT(IIB:IIE,IJB:IJE,IKB:IKE+1) = & - ( PRWT(IIB:IIE,IJB:IJE,IKB:IKE+1) & - - Z1(IIB:IIE,IJB:IJE,IKB:IKE+1) & - - Z2(IIB:IIE,IJB:IJE,IKB:IKE+1) & - ) / PDZZ(IIB:IIE,IJB:IJE,IKB:IKE+1) + +!$acc loop independent collapse(3) + do jk = ikb, ike + 1 + do jj = ijb, ije + do ji = iib, iie + PRWCT(ji ,jj, jk ) = ( PRWT(ji ,jj, jk ) - Z1(ji ,jj, jk ) - Z2(ji ,jj, jk ) ) / PDZZ(ji ,jj, jk ) + end do + end do + end do ! ELSE IF (KADV_ORDER == 4 ) THEN ! @@ -755,109 +764,90 @@ ELSE IF (KADV_ORDER == 4 ) THEN !!$ END IF ! ! - !* 3.1 interior of the processor subdomain -! -! - Z1(IW:IE,:,IKB:IKE+1)= & - 7.0*( (PRUCT(IW:IE,:,IKB:IKE+1)+PRUCT(IW:IE,:,IKB-1:IKE)) & - *( 9.0*PDZX(IW:IE,:,IKB:IKE+1)-(PDZX(IW+1:IE+1,:,IKB:IKE+1) & - +PDZX(IW:IE,:,IKB:IKE+1)+PDZX(IW-1:IE-1,:,IKB:IKE+1))/3.0)/8.0 * 0.5 & - +(PRUCT(IW+1:IE+1,:,IKB:IKE+1)+PRUCT(IW+1:IE+1,:,IKB-1:IKE)) & - *( 9.0*PDZX(IW+1:IE+1,:,IKB:IKE+1)-(PDZX(IW+2:IE+2,:,IKB:IKE+1) & - +PDZX(IW+1:IE+1,:,IKB:IKE+1)+PDZX(IW:IE,:,IKB:IKE+1))/3.0)/8.0 * 0.5 )/12.0 & - -( (PRUCT(IW-1:IE-1,:,IKB:IKE+1)+PRUCT(IW-1:IE-1,:,IKB-1:IKE)) & - *PDZX(IW-1:IE-1,:,IKB:IKE+1) *0.5 & - +(PRUCT(IW+2:IE+2,:,IKB:IKE+1)+PRUCT(IW+2:IE+2,:,IKB-1:IKE)) & - *PDZX(IW+2:IE+2,:,IKB:IKE+1) *0.5)/12.0 - -! - Z2(:,IS:IN,IKB:IKE+1)= & - 7.0*( (PRVCT(:,IS:IN,IKB:IKE+1)+PRVCT(:,IS:IN,IKB-1:IKE)) & - *( 9.0*PDZY(:,IS:IN,IKB:IKE+1)-(PDZY(:,IS+1:IN+1,IKB:IKE+1) & - +PDZY(:,IS:IN,IKB:IKE+1)+PDZY(:,IS-1:IN-1,IKB:IKE+1))/3.0)/8.0 * 0.5 & - +(PRVCT(:,IS+1:IN+1,IKB:IKE+1)+PRVCT(:,IS+1:IN+1,IKB-1:IKE)) & - *( 9.0*PDZY(:,IS+1:IN+1,IKB:IKE+1)-(PDZY(:,IS+2:IN+2,IKB:IKE+1) & - +PDZY(:,IS+1:IN+1,IKB:IKE+1)+PDZY(:,IS:IN,IKB:IKE+1))/3.0)/8.0 * 0.5 )/12.0 & - -( (PRVCT(:,IS-1:IN-1,IKB:IKE+1)+PRVCT(:,IS-1:IN-1,IKB-1:IKE)) & - *PDZY(:,IS-1:IN-1,IKB:IKE+1) *0.5 & - +(PRVCT(:,IS+2:IN+2,IKB:IKE+1)+PRVCT(:,IS+2:IN+2,IKB-1:IKE)) & - *PDZY(:,IS+2:IN+2,IKB:IKE+1) *0.5)/12.0 -! -!* 3.2 limits of the processor subdomain (inside the whole domain or in cyclic conditions) + !* 3.1 interior of the process subdomain +! +! +!PW: OpenACC remarks: *computing only ztmp2 and reusing it at next iteration works +! but ji loop can not be collapsed -> 10x slower on GPU +! *ztmp1 and ztmp2 are not necessary but improve readability (no impact on performance) +!$acc loop independent collapse(3) private(ztmp1, ztmp2) + do jk = IKB, IKE + 1 + do jj = 1, iju + do ji = IW, IE + ztmp1 = ( 9.0 * PDZX(ji, jj, jk ) - ( PDZX(ji+1, jj, jk ) + PDZX(ji, jj, jk ) + PDZX(ji-1, jj, jk ) ) / 3.0 ) / 16.0 + ztmp2 = ( 9.0 * PDZX(ji+1, jj, jk ) - ( PDZX(ji+2, jj, jk ) + PDZX(ji+1, jj, jk ) + PDZX(ji, jj, jk ) ) / 3.0 ) / 16.0 + Z1(ji, jj, jk ) = 7.0 * ( ( PRUCT(ji, jj, jk ) + PRUCT(ji, jj, jk-1 ) ) * ztmp1 & + + ( PRUCT(ji+1, jj, jk ) + PRUCT(ji+1, jj, jk-1 ) ) * ztmp2 ) / 12.0 & + - 0.5 * ( ( PRUCT(ji-1, jj, jk ) + PRUCT(ji-1, jj, jk-1 ) ) * PDZX(ji-1, jj, jk) & + + ( PRUCT(ji+2, jj, jk ) + PRUCT(ji+2, jj, jk-1 ) ) * PDZX(ji+2, jj, jk) ) / 12.0 + end do + end do + end do +! +!$acc loop independent collapse(3) + do jk = IKB, IKE + 1 + do jj = is, in + do ji = 1, iiu + ztmp1 = ( 9.0 * PDZY(ji, jj, jk ) - ( PDZY(ji, jj+1, jk ) + PDZY(ji, jj, jk ) + PDZY(ji, jj-1, jk ) ) / 3.0 ) / 16.0 + ztmp2 = ( 9.0 * PDZY(ji, jj+1, jk ) - ( PDZY(ji, jj+2, jk ) + PDZY(ji, jj+1, jk ) + PDZY(ji, jj, jk ) ) / 3.0 ) / 16.0 + Z2(ji, jj, jk ) = 7.0 * ( ( PRVCT(ji, jj, jk ) + PRVCT(ji, jj, jk-1 ) ) * ztmp1 & + + ( PRVCT(ji, jj+1, jk ) + PRVCT(ji, jj+1, jk-1 ) ) * ztmp2 ) / 12.0 & + - 0.5 * ( ( PRVCT(ji, jj-1, jk ) + PRVCT(ji, jj-1, jk-1 ) ) * PDZY(ji, jj-1, jk ) & + + ( PRVCT(ji, jj+2, jk ) + PRVCT(ji, jj+2, jk-1 ) ) * PDZY(ji, jj+2, jk ) ) / 12.0 + end do + end do + end do +! +!* 3.2 limits of the process subdomain (inside the whole domain or in cyclic conditions) ! !!$ IF (NHALO==1) THEN - - Z1(IIE,:,IKB:IKE+1)= & - 7.0*( (PRUCT(IIE,:,IKB:IKE+1)+PRUCT(IIE,:,IKB-1:IKE)) & - *( 9.0*PDZX(IIE,:,IKB:IKE+1)-(PDZX(IIE+1,:,IKB:IKE+1) & - +PDZX(IIE,:,IKB:IKE+1)+PDZX(IIE-1,:,IKB:IKE+1))/3.0)/8.0 * 0.5 & - +(PRUCT(IIE+1,:,IKB:IKE+1)+PRUCT(IIE+1,:,IKB-1:IKE)) & -! *( 9.0*PDZX(IIE+1,:,IKB:IKE+1)-(TZHALO2_DZX%HALO2%EAST(:,IKB:IKE+1) & - *( 9.0*PDZX(IIE+1,:,IKB:IKE+1)-(ZDZX_EAST(:,IKB:IKE+1) & - +PDZX(IIE+1,:,IKB:IKE+1)+PDZX(IIE,:,IKB:IKE+1))/3.0)/8.0 * 0.5 )/12.0 & - -( (PRUCT(IIE-1,:,IKB:IKE+1)+PRUCT(IIE-1,:,IKB-1:IKE)) & - *PDZX(IIE-1,:,IKB:IKE+1) *0.5 & -! +(TZHALO2_U%HALO2%EAST(:,IKB:IKE+1)+TZHALO2_U%HALO2%EAST(:,IKB-1:IKE)) & - +(ZU_EAST(:,IKB:IKE+1)+ZU_EAST(:,IKB-1:IKE)) & -! *TZHALO2_DZX%HALO2%EAST(:,IKB:IKE+1) *0.5)/12.0 - *ZDZX_EAST(:,IKB:IKE+1) *0.5)/12.0 -! - Z2(:,IJE,IKB:IKE+1)= & - 7.0*( (PRVCT(:,IJE,IKB:IKE+1)+PRVCT(:,IJE,IKB-1:IKE)) & - *( 9.0*PDZY(:,IJE,IKB:IKE+1)-(PDZY(:,IJE+1,IKB:IKE+1) & - +PDZY(:,IJE,IKB:IKE+1)+PDZY(:,IJE-1,IKB:IKE+1))/3.0)/8.0 * 0.5 & - +(PRVCT(:,IJE+1,IKB:IKE+1)+PRVCT(:,IJE+1,IKB-1:IKE)) & -! *( 9.0*PDZY(:,IJE+1,IKB:IKE+1)-(TZHALO2_DZY%HALO2%NORTH(:,IKB:IKE+1) & - *( 9.0*PDZY(:,IJE+1,IKB:IKE+1)-(ZDZY_NORTH(:,IKB:IKE+1) & - +PDZY(:,IJE+1,IKB:IKE+1)+PDZY(:,IJE,IKB:IKE+1))/3.0)/8.0 * 0.5 )/12.0 & - -( (PRVCT(:,IJE-1,IKB:IKE+1)+PRVCT(:,IJE-1,IKB-1:IKE)) & - *PDZY(:,IJE-1,IKB:IKE+1) *0.5 & -! +(TZHALO2_V%HALO2%NORTH(:,IKB:IKE+1)+TZHALO2_V%HALO2%NORTH(:,IKB-1:IKE)) & - +(ZV_NORTH(:,IKB:IKE+1)+ZV_NORTH(:,IKB-1:IKE)) & -! *TZHALO2_DZY%HALO2%NORTH(:,IKB:IKE+1) *0.5)/12.0 - *ZDZY_NORTH(:,IKB:IKE+1) *0.5)/12.0 +!$acc loop independent collapse(2) + do jk = IKB, IKE + 1 + do jj = 1, iju + ztmp1 = ( 9.0 * PDZX(IIE, jj, jk ) - ( PDZX(IIE+1, jj, jk ) + PDZX(IIE, jj, jk ) + PDZX(IIE-1, jj, jk ) ) / 3.0 ) / 16.0 + ztmp2 = ( 9.0 * PDZX(IIE+1, jj, jk ) - ( ZDZX_EAST(jj, jk ) + PDZX(IIE+1, jj, jk ) + PDZX(IIE, jj, jk ) ) / 3.0 ) / 16.0 + Z1(IIE, jj, jk ) = 7.0 * ( ( PRUCT(IIE, jj, jk ) + PRUCT(IIE, jj, jk-1 ) ) * ztmp1 & + + ( PRUCT(IIE+1, jj, jk ) + PRUCT(IIE+1, jj, jk-1 ) ) * ztmp2 ) / 12.0 & + - 0.5 * ( ( PRUCT(IIE-1, jj, jk ) + PRUCT(IIE-1, jj, jk-1 ) ) * PDZX(IIE-1, jj, jk) & + + ( ZU_EAST (jj, jk ) + ZU_EAST (jj, jk-1 ) ) * ZDZX_EAST (jj, jk) ) / 12.0 + end do + end do +! +!$acc loop independent collapse(2) + do jk = IKB, IKE + 1 + do ji = 1, iiu + ztmp1 = ( 9.0 * PDZY(ji, IJE, jk) - ( PDZY (ji, IJE+1, jk) + PDZY(ji, IJE, jk) + PDZY(ji, IJE-1, jk) ) / 3.0 ) / 16.0 + ztmp2 = ( 9.0 * PDZY(ji, IJE+1, jk) - ( ZDZY_NORTH(ji, jk) + PDZY(ji, IJE+1, jk) + PDZY(ji, IJE, jk) ) / 3.0 ) / 16.0 + Z2(ji, IJE, jk ) = 7.0 * ( ( PRVCT (ji, IJE, jk ) + PRVCT (ji, IJE, jk-1 ) ) * ztmp1 & + + ( PRVCT (ji, IJE+1, jk ) + PRVCT (ji, IJE+1, jk-1 ) ) * ztmp2 ) / 12.0 & + - 0.5 * ( ( PRVCT (ji, IJE-1, jk ) + PRVCT (ji, IJE-1, jk-1 ) ) * PDZY (ji, IJE-1, jk ) & + + ( ZV_NORTH(ji, jk ) + ZV_NORTH(ji, jk-1 ) ) * ZDZY_NORTH(ji, jk ) ) / 12.0 + end do + end do !!$ END IF ! !* 3.3 non-CYCLIC CASE IN THE X DIRECTION: 2nd order case ! IF (HLBCX(1)/='CYCL' .AND. LWEST_ll()) THEN -! - Z1(IIB,:,IKB:IKE+1)= & - (PRUCT(IIB,:,IKB:IKE+1)+PRUCT(IIB,:,IKB-1:IKE) ) & - *PDZX(IIB,:,IKB:IKE+1) *0.25 & - +(PRUCT(IIB+1,:,IKB:IKE+1)+PRUCT(IIB+1,:,IKB-1:IKE) ) & - *PDZX(IIB+1,:,IKB:IKE+1) *0.25 + Z1(IIB, :, IKB:IKE+1 ) = ( PRUCT(IIB, :, IKB:IKE+1 ) + PRUCT(IIB, :, IKB-1:IKE ) ) * PDZX(IIB, :, IKB:IKE+1 ) * 0.25 & + + ( PRUCT(IIB+1, :, IKB:IKE+1 ) + PRUCT(IIB+1, :, IKB-1:IKE ) ) * PDZX(IIB+1, :, IKB:IKE+1 ) * 0.25 END IF ! IF (HLBCX(2)/='CYCL' .AND. LEAST_ll()) THEN -! - Z1(IIE,:,IKB:IKE+1)= & - (PRUCT(IIE,:,IKB:IKE+1)+PRUCT(IIE,:,IKB-1:IKE) ) & - *PDZX(IIE,:,IKB:IKE+1) *0.25 & - +(PRUCT(IIE+1,:,IKB:IKE+1)+PRUCT(IIE+1,:,IKB-1:IKE) ) & - *PDZX(IIE+1,:,IKB:IKE+1) *0.25 + Z1(IIE, :, IKB:IKE+1 ) = ( PRUCT(IIE, :, IKB:IKE+1 ) + PRUCT(IIE, :, IKB-1:IKE ) ) * PDZX(IIE, :, IKB:IKE+1 ) * 0.25 & + + ( PRUCT(IIE+1, :, IKB:IKE+1 ) + PRUCT(IIE+1, :, IKB-1:IKE ) ) * PDZX(IIE+1, :, IKB:IKE+1 ) * 0.25 END IF ! !* 3.4 non-CYCLIC CASE IN THE Y DIRECTION: 2nd order case ! IF (HLBCY(1)/='CYCL' .AND. LSOUTH_ll()) THEN -! - Z2(:,IJB,IKB:IKE+1)= & - (PRVCT(:,IJB,IKB:IKE+1)+PRVCT(:,IJB,IKB-1:IKE) ) & - *PDZY(:,IJB,IKB:IKE+1) *0.25 & - +(PRVCT(:,IJB+1,IKB:IKE+1)+PRVCT(:,IJB+1,IKB-1:IKE) ) & - *PDZY(:,IJB+1,IKB:IKE+1) *0.25 -! + Z2(:, IJB, IKB:IKE+1 ) = ( PRVCT(:, IJB, IKB:IKE+1 ) + PRVCT(:, IJB, IKB-1:IKE ) ) * PDZY(:, IJB, IKB:IKE+1 ) * 0.25 & + + ( PRVCT(:, IJB+1, IKB:IKE+1 ) + PRVCT(:, IJB+1, IKB-1:IKE ) ) * PDZY(:, IJB+1, IKB:IKE+1 ) * 0.25 END IF ! IF (HLBCY(2)/='CYCL' .AND. LNORTH_ll()) THEN -! - Z2(:,IJE,IKB:IKE+1)= & - (PRVCT(:,IJE,IKB:IKE+1)+PRVCT(:,IJE,IKB-1:IKE) ) & - *PDZY(:,IJE,IKB:IKE+1) *0.25 & - +(PRVCT(:,IJE+1,IKB:IKE+1)+PRVCT(:,IJE+1,IKB-1:IKE) ) & - *PDZY(:,IJE+1,IKB:IKE+1) *0.25 -! + Z2(:, IJE, IKB:IKE+1 ) = ( PRVCT(:, IJE, IKB:IKE+1 ) + PRVCT(:, IJE, IKB-1:IKE ) ) * PDZY(:, IJE, IKB:IKE+1 ) * 0.25 & + + ( PRVCT(:, IJE+1, IKB:IKE+1 ) + PRVCT(:, IJE+1, IKB-1:IKE ) ) * PDZY(:, IJE+1, IKB:IKE+1 ) * 0.25 END IF ! !* 3.5 Vertical contyravariant wind @@ -868,12 +858,14 @@ ELSE IF (KADV_ORDER == 4 ) THEN !!$ !!$ CALL MPPDB_CHECK3DM("contrav_device ::Z1/Z2/ PDZZ",PRECISION,Z1,Z2,PDZZ) PRWCT(:,:,:)=0. -!PW:TODO: not parallelized with openacc - PRWCT(IIB:IIE,IJB:IJE,IKB:IKE+1) = & - ( PRWT(IIB:IIE,IJB:IJE,IKB:IKE+1) & - - Z1(IIB:IIE,IJB:IJE,IKB:IKE+1) & - - Z2(IIB:IIE,IJB:IJE,IKB:IKE+1) & - ) / PDZZ(IIB:IIE,IJB:IJE,IKB:IKE+1) +!$acc loop independent collapse(3) + do jk = ikb, ike + 1 + do jj = ijb, ije + do ji = iib, iie + PRWCT(ji ,jj, jk ) = ( PRWT(ji ,jj, jk ) - Z1(ji ,jj, jk ) - Z2(ji ,jj, jk ) ) / PDZZ(ji ,jj, jk ) + end do + end do + end do ! ! END IF -- GitLab