From 8987f40b445ce4e847058126a38ce63495f2dc2a Mon Sep 17 00:00:00 2001 From: Juan ESCOBAR <juan.escobar@aero.obs-mip.fr> Date: Fri, 15 Apr 2022 19:02:35 +0200 Subject: [PATCH] Juan 15/04/2022:MNH/ZSOLVER/turb_hor_dyn_corr.f90, Cray GPU Opt/Bug bypass, add present_cr + acc_nv , where needeed --- src/MNH/turb_hor_dyn_corr.f90 | 222 ++++++++++++++++++++---------- src/ZSOLVER/turb_hor_dyn_corr.f90 | 144 +++++-------------- 2 files changed, 181 insertions(+), 185 deletions(-) diff --git a/src/MNH/turb_hor_dyn_corr.f90 b/src/MNH/turb_hor_dyn_corr.f90 index 902dd306b..8d5717d5c 100644 --- a/src/MNH/turb_hor_dyn_corr.f90 +++ b/src/MNH/turb_hor_dyn_corr.f90 @@ -456,7 +456,10 @@ ELSE END IF ! !$acc kernels async(2) -ZFLX(:,:,IKE+1) = ZFLX(:,:,IKE) +!$acc_nv loop independent collapse(2) +DO CONCURRENT (JI=1:JIU,JJ=1:JJU) + ZFLX(JI,JJ,IKE+1) = ZFLX(JI,JJ,IKE) +ENDDO !$acc end kernels ! !* prescription of du/dz and dv/dz with uncentered gradient at the surface @@ -470,12 +473,15 @@ ZDZZ(:,:,:) = MXM(PDZZ(:,:,IKB:IKB+2)) CALL MXM_DEVICE(PDZZ(:,:,IKB:IKB+2),ZDZZ(:,:,:)) #endif !$acc kernels async(3) -ZCOEFF(:,:,IKB+2)= - ZDZZ(:,:,2) / & - ( (ZDZZ(:,:,3)+ZDZZ(:,:,2)) * ZDZZ(:,:,3) ) -ZCOEFF(:,:,IKB+1)= (ZDZZ(:,:,3)+ZDZZ(:,:,2)) / & - ( ZDZZ(:,:,2) * ZDZZ(:,:,3) ) -ZCOEFF(:,:,IKB)= - (ZDZZ(:,:,3)+2.*ZDZZ(:,:,2)) / & - ( (ZDZZ(:,:,3)+ZDZZ(:,:,2)) * ZDZZ(:,:,2) ) +!$acc_nv loop independent collapse(2) +DO CONCURRENT (JI=1:JIU,JJ=1:JJU) + ZCOEFF(JI,JJ,IKB+2)= - ZDZZ(JI,JJ,2) / & + ( (ZDZZ(JI,JJ,3)+ZDZZ(JI,JJ,2)) * ZDZZ(JI,JJ,3) ) + ZCOEFF(JI,JJ,IKB+1)= (ZDZZ(JI,JJ,3)+ZDZZ(JI,JJ,2)) / & + ( ZDZZ(JI,JJ,2) * ZDZZ(JI,JJ,3) ) + ZCOEFF(JI,JJ,IKB)= - (ZDZZ(JI,JJ,3)+2.*ZDZZ(JI,JJ,2)) / & + ( (ZDZZ(JI,JJ,3)+ZDZZ(JI,JJ,2)) * ZDZZ(JI,JJ,2) ) +ENDDO !$acc end kernels ! #ifndef MNH_OPENACC @@ -488,10 +494,13 @@ ZDU_DZ_DZS_DX(:,:,:)=MXF ((ZCOEFF(:,:,IKB+2:IKB+2)*PUM(:,:,IKB+2:IKB+2) & ZDZZ(:,:,:) = MYM(PDZZ(:,:,IKB:IKB+2)) #else !$acc kernels async(3) -ZTMP1_DEVICE(:,:,1) = (ZCOEFF(:,:,IKB+2)*PUM(:,:,IKB+2) & - +ZCOEFF(:,:,IKB+1)*PUM(:,:,IKB+1) & - +ZCOEFF(:,:,IKB )*PUM(:,:,IKB) & - )* 0.5 * ( PDZX(:,:,IKB+1)+PDZX(:,:,IKB)) +!$acc_nv loop independent collapse(2) +DO CONCURRENT (JI=1:JIU,JJ=1:JJU) + ZTMP1_DEVICE(JI,JJ,1) = (ZCOEFF(JI,JJ,IKB+2)*PUM(JI,JJ,IKB+2) & + +ZCOEFF(JI,JJ,IKB+1)*PUM(JI,JJ,IKB+1) & + +ZCOEFF(JI,JJ,IKB)*PUM(JI,JJ,IKB) & + )* 0.5 * ( PDZX(JI,JJ,IKB+1)+PDZX(JI,JJ,IKB)) +ENDDO !$acc end kernels ! !!! wait for the computation of ZCOEFF and ZTMP1_DEVICE @@ -500,18 +509,24 @@ ZTMP1_DEVICE(:,:,1) = (ZCOEFF(:,:,IKB+2)*PUM(:,:,IKB+2) & CALL MXF_DEVICE(ZTMP1_DEVICE(:,:,1:1), ZTMP2_DEVICE(:,:,1:1)) CALL MXF_DEVICE(PDXX(:,:,IKB:IKB), ZTMP1_DEVICE(:,:,1:1)) !$acc kernels async(3) -ZDU_DZ_DZS_DX(:,:,1) = ZTMP2_DEVICE(:,:,1) / ZTMP1_DEVICE(:,:,1) +!$acc_nv loop independent collapse(2) +DO CONCURRENT (JI=1:JIU,JJ=1:JJU) + ZDU_DZ_DZS_DX(JI,JJ,1) = ZTMP2_DEVICE(JI,JJ,1) / ZTMP1_DEVICE(JI,JJ,1) +ENDDO !$acc end kernels ! CALL MYM_DEVICE(PDZZ(:,:,IKB:IKB+2),ZDZZ(:,:,:)) #endif !$acc kernels async(4) -ZCOEFF(:,:,IKB+2)= - ZDZZ(:,:,2) / & - ( (ZDZZ(:,:,3)+ZDZZ(:,:,2)) * ZDZZ(:,:,3) ) -ZCOEFF(:,:,IKB+1)= (ZDZZ(:,:,3)+ZDZZ(:,:,2)) / & - ( ZDZZ(:,:,2) * ZDZZ(:,:,3) ) -ZCOEFF(:,:,IKB)= - (ZDZZ(:,:,3)+2.*ZDZZ(:,:,2)) / & - ( (ZDZZ(:,:,3)+ZDZZ(:,:,2)) * ZDZZ(:,:,2) ) +!$acc_nv loop independent collapse(2) +DO CONCURRENT (JI=1:JIU,JJ=1:JJU) + ZCOEFF(JI,JJ,IKB+2)= - ZDZZ(JI,JJ,2) / & + ( (ZDZZ(JI,JJ,3)+ZDZZ(JI,JJ,2)) * ZDZZ(JI,JJ,3) ) + ZCOEFF(JI,JJ,IKB+1)= (ZDZZ(JI,JJ,3)+ZDZZ(JI,JJ,2)) / & + ( ZDZZ(JI,JJ,2) * ZDZZ(JI,JJ,3) ) + ZCOEFF(JI,JJ,IKB)= - (ZDZZ(JI,JJ,3)+2.*ZDZZ(JI,JJ,2)) / & + ( (ZDZZ(JI,JJ,3)+ZDZZ(JI,JJ,2)) * ZDZZ(JI,JJ,2) ) +ENDDO !$acc end kernels ! #ifndef MNH_OPENACC @@ -522,10 +537,13 @@ ZDV_DZ_DZS_DY(:,:,:)=MYF ((ZCOEFF(:,:,IKB+2:IKB+2)*PVM(:,:,IKB+2:IKB+2) & )/ MYF(PDYY(:,:,IKB:IKB)) #else !$acc kernels async(4) -ZTMP3_DEVICE(:,:,1) = (ZCOEFF(:,:,IKB+2)*PVM(:,:,IKB+2) & - +ZCOEFF(:,:,IKB+1)*PVM(:,:,IKB+1) & - +ZCOEFF(:,:,IKB)*PVM(:,:,IKB) & - )* 0.5 * ( PDZY(:,:,IKB+1)+PDZY(:,:,IKB)) +!$acc_nv loop independent collapse(2) +DO CONCURRENT (JI=1:JIU,JJ=1:JJU) + ZTMP3_DEVICE(JI,JJ,1) = (ZCOEFF(JI,JJ,IKB+2)*PVM(JI,JJ,IKB+2) & + +ZCOEFF(JI,JJ,IKB+1)*PVM(JI,JJ,IKB+1) & + +ZCOEFF(JI,JJ,IKB)*PVM(JI,JJ,IKB) & + )* 0.5 * ( PDZY(JI,JJ,IKB+1)+PDZY(JI,JJ,IKB)) +ENDDO !$acc end kernels ! !!! wait for the computation of ZCOEFF and ZTMP3_DEVICE @@ -541,7 +559,7 @@ ZDV_DY(:,:,:)= DYF(PVM(:,:,IKB:IKB)) / MYF(PDYY(:,:,IKB:IKB)) & #else CALL MYF_DEVICE(ZTMP3_DEVICE(:,:,1:1), ZTMP4_DEVICE(:,:,1:1)) CALL MYF_DEVICE(PDYY(:,:,IKB:IKB), ZTMP3_DEVICE(:,:,1:1)) -!$acc kernels async(4) +!$acc kernels async(4) present_cr(ZDV_DZ_DZS_DY) ZDV_DZ_DZS_DY(:,:,1)= ZTMP4_DEVICE(:,:,1) / ZTMP3_DEVICE(:,:,1) !$acc end kernels ! @@ -552,7 +570,10 @@ ZDV_DZ_DZS_DY(:,:,1)= ZTMP4_DEVICE(:,:,1) / ZTMP3_DEVICE(:,:,1) CALL DXF_DEVICE(PUM(:,:,IKB:IKB),ZTMP1_DEVICE(:,:,1:1)) CALL MXF_DEVICE(PDXX(:,:,IKB:IKB),ZTMP2_DEVICE(:,:,1:1)) !$acc kernels async(3) -ZDU_DX(:,:,1)= ZTMP1_DEVICE(:,:,1) / ZTMP2_DEVICE(:,:,1) - ZDU_DZ_DZS_DX(:,:,1) +!$acc_nv loop independent collapse(2) +DO CONCURRENT (JI=1:JIU,JJ=1:JJU) + ZDU_DX(JI,JJ,1)= ZTMP1_DEVICE(JI,JJ,1) / ZTMP2_DEVICE(JI,JJ,1) - ZDU_DZ_DZS_DX(JI,JJ,1) +ENDDO !$acc end kernels !!! wait for the computation of ZDV_DZ_DZS_DY @@ -560,8 +581,11 @@ ZDU_DX(:,:,1)= ZTMP1_DEVICE(:,:,1) / ZTMP2_DEVICE(:,:,1) - ZDU_DZ_DZS_DX(:,:,1) ! CALL DYF_DEVICE(PVM(:,:,IKB:IKB),ZTMP3_DEVICE(:,:,1:1)) CALL MYF_DEVICE(PDYY(:,:,IKB:IKB),ZTMP4_DEVICE(:,:,1:1)) -!$acc kernels! async(4) -ZDV_DY(:,:,1)= ZTMP3_DEVICE(:,:,1) / ZTMP4_DEVICE(:,:,1) - ZDV_DZ_DZS_DY(:,:,1) +!$acc kernels async(4) +!$acc_nv loop independent collapse(2) +DO CONCURRENT (JI=1:JIU,JJ=1:JJU) + ZDV_DY(JI,JJ,1)= ZTMP3_DEVICE(JI,JJ,1) / ZTMP4_DEVICE(JI,JJ,1) - ZDV_DZ_DZS_DY(JI,JJ,1) +ENDDO !$acc end kernels ! ! @@ -570,7 +594,10 @@ ZDV_DY(:,:,1)= ZTMP3_DEVICE(:,:,1) / ZTMP4_DEVICE(:,:,1) - ZDV_DZ_DZS_DY(:,:,1) #endif ! !$acc kernels async(4) -ZDW_DZ(:,:,:)=-ZDU_DX(:,:,:)-ZDV_DY(:,:,:) +!$acc_nv loop independent collapse(2) +DO CONCURRENT (JI=1:JIU,JJ=1:JJU) + ZDW_DZ(JI,JJ,1)=-ZDU_DX(JI,JJ,1)-ZDV_DY(JI,JJ,1) +ENDDO !$acc end kernels ! !* computation @@ -586,11 +613,13 @@ ZDW_DZ(:,:,:)=-ZDU_DX(:,:,:)-ZDV_DY(:,:,:) ! du coup je ne peux pas faire de update self asynchrone... ! !$acc kernels async(3) -ZFLX(:,:,IKB) = (2./3.) * PTKEM(:,:,IKB) & - - XCMFS * PK(:,:,IKB) * 2. * ZDU_DX(:,:,1) +!$acc_nv loop independent collapse(2) +DO CONCURRENT (JI=1:JIU,JJ=1:JJU) + ZFLX(JI,JJ,IKB) = (2./3.) * PTKEM(JI,JJ,IKB) & + - XCMFS * PK(JI,JJ,IKB) * 2. * ZDU_DX(JI,JJ,1) +ENDDO !$acc end kernels - !! & to be tested later !! + XCMFB * PLM(:,:,IKB:IKB) /SQRT(PTKEM(:,:,IKB:IKB)) * & !! (-2./3.) * PTP(:,:,IKB:IKB) @@ -601,7 +630,7 @@ ZFLX(:,:,IKB) = (2./3.) * PTKEM(:,:,IKB) & !!! wait for the computation of ZDIRSINZW !$acc wait(1) ! -!$acc kernels async(4) +!$acc kernels async(4) present_cr(ZFLX) #ifndef MNH_BITREP ZFLX(:,:,IKB-1) = & PTAU11M(:,:) * PCOSSLOPE(:,:)**2 * PDIRCOSZW(:,:)**2 & @@ -613,9 +642,7 @@ ZFLX(:,:,IKB-1) = & - PUSLOPEM(:,:) * PCOSSLOPE(:,:)**2 * ZDIRSINZW(:,:) * PDIRCOSZW(:,:) ) #else !PW: BUG: commented 'acc loop independent collapse(2)' to workaround compiler bug (NVHPC 21.1) -#ifdef MNH_COMPILER_NVHPC -!acc loop independent collapse(2) -#endif +!$acc_nv loop independent collapse(2) DO CONCURRENT ( JI=1:JIU,JJ=1:JJU ) ZFLX(JI,JJ,IKB-1) = & PTAU11M(JI,JJ) * BR_P2(PCOSSLOPE(JI,JJ)) * BR_P2(PDIRCOSZW(JI,JJ)) & @@ -633,7 +660,10 @@ END DO ! CONCURRENT !$acc wait(3) async(4) ! !$acc kernels async(4) -ZFLX(:,:,IKB-1) = 2. * ZFLX(:,:,IKB-1) - ZFLX(:,:,IKB) +!$acc_nv loop independent collapse(2) +DO CONCURRENT (JI=1:JIU,JJ=1:JJU) + ZFLX(JI,JJ,IKB-1) = 2. * ZFLX(JI,JJ,IKB-1) - ZFLX(JI,JJ,IKB) +ENDDO !$acc end kernels ! ! @@ -755,8 +785,11 @@ IF (KSPLT==1) THEN ! evaluate the dynamic production at w(IKB+1) in PDP(IKB) ! !$acc kernels async(2) - ZWORK(:,:,IKB) = 0.5* ( -ZFLX(:,:,IKB)*ZDU_DX(:,:,1) + ZWORK(:,:,IKB+1) ) - !$acc end kernels + !$acc_nv loop independent collapse(2) + DO CONCURRENT (JI=1:JIU,JJ=1:JJU) + ZWORK(JI,JJ,IKB) = 0.5* ( -ZFLX(JI,JJ,IKB)*ZDU_DX(JI,JJ,1) + ZWORK(JI,JJ,IKB+1) ) + ENDDO + !$acc end kernels ! !$acc kernels async(2) PDP(:,:,:) = PDP(:,:,:) + ZWORK(:,:,:) @@ -777,7 +810,7 @@ IF (LLES_CALL .AND. KSPLT==1) THEN !$acc wait(2) ! !$acc kernels - ZTMP1_DEVICE = -ZWORK + ZTMP1_DEVICE(:,:,:) = -ZWORK(:,:,:) !$acc end kernels CALL LES_MEAN_SUBGRID( ZTMP1_DEVICE, X_LES_RES_ddxa_U_SBG_UaU , .TRUE.) ! @@ -811,11 +844,14 @@ IF (.NOT. L2D) THEN !! + XCMFB * PLM / SQRT(PTKEM) * (-2./3.) * PTP ! ELSE - !$acc kernels async(3) - ZFLX(:,:,:)= (2./3.) * PTKEM & - - XCMFS * PK *(-(2./3.) * ( GX_U_M_PUM & - +GZ_W_M_PWM ) ) - !$acc end kernels + !$acc kernels async(3) + !$acc_nv loop independent collapse(3) + DO CONCURRENT (JI=1:JIU,JJ=1:JJU,JK=1:JKU) + ZFLX(JI,JJ,JK)= (2./3.) * PTKEM(JI,JJ,JK) & + - XCMFS * PK(JI,JJ,JK) *(-(2./3.) * ( GX_U_M_PUM(JI,JJ,JK) & + +GZ_W_M_PWM(JI,JJ,JK) ) ) + ENDDO + !$acc end kernels !! & to be tested !! + XCMFB * PLM / SQRT(PTKEM) * (-2./3.) * PTP ! @@ -830,8 +866,11 @@ ZFLX(:,:,IKE+1) = ZFLX(:,:,IKE) ! ! !$acc update self(ZFLX(:,:,IKB+1:)) async(10) ! !$acc kernels async(3) -ZFLX(:,:,IKB) = (2./3.) * PTKEM(:,:,IKB) & - - XCMFS * PK(:,:,IKB) * 2. * ZDV_DY(:,:,1) +!$acc_nv loop independent collapse(2) +DO CONCURRENT (JI=1:JIU,JJ=1:JJU) + ZFLX(JI,JJ,IKB) = (2./3.) * PTKEM(JI,JJ,IKB) & + - XCMFS * PK(JI,JJ,IKB) * 2. * ZDV_DY(JI,JJ,1) +ENDDO !$acc end kernels !! & to be tested @@ -839,7 +878,7 @@ ZFLX(:,:,IKB) = (2./3.) * PTKEM(:,:,IKB) & !! (-2./3.) * PTP(:,:,IKB:IKB) ! ! extrapolates this flux under the ground with the surface flux -!$acc kernels async(3) +!$acc kernels async(3) present_cr(ZFLX) #ifndef MNH_BITREP ZFLX(:,:,IKB-1) = & PTAU11M(:,:) * PSINSLOPE(:,:)**2 * PDIRCOSZW(:,:)**2 & @@ -851,9 +890,7 @@ ZFLX(:,:,IKB-1) = & + PVSLOPEM(:,:) * PCOSSLOPE(:,:) * PSINSLOPE(:,:) * ZDIRSINZW(:,:) ) #else !PW: BUG: commented 'acc loop independent collapse(2)' to workaround compiler bug (NVHPC 21.1) -#ifdef MNH_COMPILER_NVHPC -!acc loop independent collapse(2) -#endif +!$acc_nv loop independent collapse(2) DO CONCURRENT ( JI=1:JIU,JJ=1:JJU ) ZFLX(JI,JJ,IKB-1) = & PTAU11M(JI,JJ) * BR_P2(PSINSLOPE(JI,JJ)) * BR_P2(PDIRCOSZW(JI,JJ)) & @@ -999,9 +1036,12 @@ IF (KSPLT==1) THEN ! ! evaluate the dynamic production at w(IKB+1) in PDP(IKB) ! - !$acc kernels async(2) - ZWORK(:,:,IKB) = 0.5* ( -ZFLX(:,:,IKB)*ZDV_DY(:,:,1) + ZWORK(:,:,IKB+1) ) - !$acc end kernels + !$acc kernels async(2) + !$acc_nv loop independent collapse(2) + DO CONCURRENT (JI=1:JIU,JJ=1:JJU) + ZWORK(JI,JJ,IKB) = 0.5* ( -ZFLX(JI,JJ,IKB)*ZDV_DY(JI,JJ,1) + ZWORK(JI,JJ,IKB+1) ) + ENDDO + !$acc end kernels ! !$acc kernels async(2) PDP(:,:,:) = PDP(:,:,:) + ZWORK(:,:,:) @@ -1022,7 +1062,10 @@ IF (LLES_CALL .AND. KSPLT==1) THEN !$acc wait(2) ! !$acc kernels - ZTMP1_DEVICE = -ZWORK + !$acc_nv loop independent collapse(3) + DO CONCURRENT (JI=1:JIU,JJ=1:JJU,JK=1:JKU) + ZTMP1_DEVICE(JI,JJ,JK) = -ZWORK(JI,JJ,JK) + ENDDO !$acc end kernels CALL LES_MEAN_SUBGRID( ZTMP1_DEVICE, X_LES_RES_ddxa_V_SBG_UaV , .TRUE.) ! @@ -1051,11 +1094,14 @@ IF (.NOT. L2D) THEN !! & to be tested !! -2.* XCMFB * PLM / SQRT(PTKEM) * (-2./3.) * PTP ELSE - !$acc kernels async(2) - ZFLX(:,:,:)= (2./3.) * PTKEM & - - XCMFS * PK *( (4./3.) * GZ_W_M_PWM & - -(2./3.) * ( GX_U_M_PUM ) ) - !$acc end kernels + !$acc kernels async(2) + !$acc_nv loop independent collapse(3) + DO CONCURRENT (JI=1:JIU,JJ=1:JJU,JK=1:JKU) + ZFLX(JI,JJ,JK)= (2./3.) * PTKEM(JI,JJ,JK) & + - XCMFS * PK(JI,JJ,JK) *( (4./3.) * GZ_W_M_PWM(JI,JJ,JK) & + -(2./3.) * ( GX_U_M_PUM(JI,JJ,JK) ) ) + ENDDO + !$acc end kernels !! & to be tested !! -2.* XCMFB * PLM / SQRT(PTKEM) * (-2./3.) * PTP END IF @@ -1069,8 +1115,12 @@ ZFLX(:,:,IKE+1)= ZFLX(:,:,IKE) ! ! !$acc kernels async(2) -ZFLX(:,:,IKB) = (2./3.) * PTKEM(:,:,IKB) & - - XCMFS * PK(:,:,IKB) * 2. * ZDW_DZ(:,:,1) +!$acc_nv loop independent collapse(2) +DO CONCURRENT (JI=1:JIU,JJ=1:JJU) + ZFLX(JI,JJ,IKB) = (2./3.) * PTKEM(JI,JJ,IKB) & + - XCMFS * PK(JI,JJ,IKB) * 2. * ZDW_DZ(JI,JJ,1) +ENDDO + !$acc end kernels ! @@ -1078,7 +1128,7 @@ ZFLX(:,:,IKB) = (2./3.) * PTKEM(:,:,IKB) & ! - 2.* XCMFB * PLM(:,:,IKB:IKB) /SQRT(PTKEM(:,:,IKB:IKB)) * & ! (-2./3.) * PTP(:,:,IKB:IKB) ! extrapolates this flux under the ground with the surface flux -!$acc kernels async(3) +!$acc kernels async(3) present_cr(ZFLX) #ifndef MNH_BITREP ZFLX(:,:,IKB-1) = & PTAU11M(:,:) * ZDIRSINZW(:,:)**2 & @@ -1086,9 +1136,7 @@ ZFLX(:,:,IKB-1) = & +2. * PCDUEFF(:,:)* PUSLOPEM(:,:) * ZDIRSINZW(:,:) * PDIRCOSZW(:,:) #else !PW: BUG: commented 'acc loop independent collapse(2)' to workaround compiler bug (NVHPC 21.1) -#ifdef MNH_COMPILER_NVHPC -!acc loop independent collapse(2) -#endif +!$acc_nv loop independent collapse(2) DO CONCURRENT ( JI=1:JIU,JJ=1:JJU ) ZFLX(JI,JJ,IKB-1) = & PTAU11M(JI,JJ) * BR_P2(ZDIRSINZW(JI,JJ)) & @@ -1184,9 +1232,12 @@ IF (KSPLT==1) THEN ! ! evaluate the dynamic production at w(IKB+1) in PDP(IKB) ! - !$acc kernels async(2) - ZWORK(:,:,IKB) = 0.5* ( -ZFLX(:,:,IKB)*ZDW_DZ(:,:,1) + ZWORK(:,:,IKB+1) ) - !$acc end kernels + !$acc kernels async(2) + !$acc_nv loop independent collapse(2) + DO CONCURRENT (JI=1:JIU,JJ=1:JJU) + ZWORK(JI,JJ,IKB) = 0.5* ( -ZFLX(JI,JJ,IKB)*ZDW_DZ(JI,JJ,1) + ZWORK(JI,JJ,IKB+1) ) + ENDDO + !$acc end kernels ! !$acc kernels async(2) PDP(:,:,:) = PDP(:,:,:) + ZWORK(:,:,:) @@ -1225,20 +1276,29 @@ IF (LLES_CALL .AND. KSPLT==1) THEN !$acc wait(2) ! !$acc kernels - ZTMP1_DEVICE = -ZWORK + !$acc_nv loop independent collapse(3) + DO CONCURRENT (JI=1:JIU,JJ=1:JJU,JK=1:JKU) + ZTMP1_DEVICE(JI,JJ,JK) = -ZWORK(JI,JJ,JK) + ENDDO !$acc end kernels CALL LES_MEAN_SUBGRID( ZTMP1_DEVICE, X_LES_RES_ddxa_W_SBG_UaW , .TRUE.) ! CALL GZ_M_M_DEVICE(PTHLM,PDZZ,ZTMP1_DEVICE) !$acc kernels - ZTMP2_DEVICE = ZTMP1_DEVICE * ZFLX + !$acc_nv loop independent collapse(3) + DO CONCURRENT (JI=1:JIU,JJ=1:JJU,JK=1:JKU) + ZTMP2_DEVICE(JI,JJ,JK) = ZTMP1_DEVICE(JI,JJ,JK) * ZFLX(JI,JJ,JK) + ENDDO !$acc end kernels CALL LES_MEAN_SUBGRID( ZTMP2_DEVICE, X_LES_RES_ddxa_Thl_SBG_UaW , .TRUE.) ! CALL GZ_M_W_DEVICE(1,IKU,1,PTHLM,PDZZ,ZTMP1_DEVICE) CALL MZF_DEVICE( ZTMP1_DEVICE, ZTMP2_DEVICE ) !$acc kernels - ZTMP3_DEVICE = ZFLX*ZTMP2_DEVICE + !$acc_nv loop independent collapse(3) + DO CONCURRENT (JI=1:JIU,JJ=1:JJU,JK=1:JKU) + ZTMP3_DEVICE(JI,JJ,JK) = ZFLX(JI,JJ,JK)*ZTMP2_DEVICE(JI,JJ,JK) + ENDDO !$acc end kernels CALL LES_MEAN_SUBGRID(ZTMP3_DEVICE,X_LES_RES_ddz_Thl_SBG_W2) ! @@ -1249,14 +1309,20 @@ IF (LLES_CALL .AND. KSPLT==1) THEN ! CALL GZ_M_M_DEVICE(PRM(:,:,:,1),PDZZ,ZTMP1_DEVICE) !$acc kernels - ZTMP2_DEVICE = ZTMP1_DEVICE*ZFLX + !$acc_nv loop independent collapse(3) + DO CONCURRENT (JI=1:JIU,JJ=1:JJU,JK=1:JKU) + ZTMP2_DEVICE(JI,JJ,JK) = ZTMP1_DEVICE(JI,JJ,JK)*ZFLX(JI,JJ,JK) + ENDDO !$acc end kernels CALL LES_MEAN_SUBGRID( ZTMP2_DEVICE, X_LES_RES_ddxa_Rt_SBG_UaW , .TRUE.) ! CALL GZ_M_W_DEVICE(1,IKU,1,PRM(:,:,:,1),PDZZ,ZTMP1_DEVICE) CALL MZF_DEVICE( ZTMP1_DEVICE, ZTMP2_DEVICE ) !$acc kernels - ZTMP3_DEVICE = ZFLX*ZTMP2_DEVICE + !$acc_nv loop independent collapse(3) + DO CONCURRENT (JI=1:JIU,JJ=1:JJU,JK=1:JKU) + ZTMP3_DEVICE(JI,JJ,JK) = ZFLX(JI,JJ,JK)*ZTMP2_DEVICE(JI,JJ,JK) + ENDDO !$acc end kernels CALL LES_MEAN_SUBGRID(ZTMP3_DEVICE, X_LES_RES_ddz_Rt_SBG_W2) ! @@ -1268,15 +1334,21 @@ IF (LLES_CALL .AND. KSPLT==1) THEN ! CALL GZ_M_M_DEVICE(PSVM(:,:,:,JSV),PDZZ,ZTMP1_DEVICE) !$acc kernels - ZTMP2_DEVICE = ZTMP1_DEVICE*ZFLX + !$acc_nv loop independent collapse(3) + DO CONCURRENT (JI=1:JIU,JJ=1:JJU,JK=1:JKU) + ZTMP2_DEVICE(JI,JJ,JK) = ZTMP1_DEVICE(JI,JJ,JK)*ZFLX(JI,JJ,JK) + ENDDO !$acc end kernels CALL LES_MEAN_SUBGRID( ZTMP2_DEVICE, & X_LES_RES_ddxa_Sv_SBG_UaW(:,:,:,JSV) , .TRUE.) ! CALL GZ_M_W_DEVICE(1,IKU,1,PSVM(:,:,:,JSV),PDZZ,ZTMP1_DEVICE) - CALL MZF_DEVICE( ZTMP1_DEVICE,ZTMP2_DEVICE) + CALL MZF_DEVICE(ZTMP1_DEVICE,ZTMP2_DEVICE) !$acc kernels - ZTMP3_DEVICE = ZFLX*ZTMP2_DEVICE + !$acc_nv loop independent collapse(3) + DO CONCURRENT (JI=1:JIU,JJ=1:JJU,JK=1:JKU) + ZTMP3_DEVICE(JI,JJ,JK) = ZFLX(JI,JJ,JK)*ZTMP2_DEVICE(JI,JJ,JK) + ENDDO !$acc end kernels CALL LES_MEAN_SUBGRID(ZTMP3_DEVICE, X_LES_RES_ddz_Sv_SBG_W2(:,:,:,JSV)) ! diff --git a/src/ZSOLVER/turb_hor_dyn_corr.f90 b/src/ZSOLVER/turb_hor_dyn_corr.f90 index 9ce23f59a..850e010d2 100644 --- a/src/ZSOLVER/turb_hor_dyn_corr.f90 +++ b/src/ZSOLVER/turb_hor_dyn_corr.f90 @@ -461,13 +461,10 @@ ELSE END IF ! !$acc kernels async(2) -#ifdef MNH_COMPILER_NVHPC -!$acc loop independent collapse(2) -#endif +!$acc_nv loop independent collapse(2) DO CONCURRENT (JI=1:JIU,JJ=1:JJU) ZFLX(JI,JJ,IKE+1) = ZFLX(JI,JJ,IKE) ENDDO - !$acc end kernels ! !* prescription of du/dz and dv/dz with uncentered gradient at the surface @@ -481,9 +478,7 @@ ZDZZ(:,:,:) = MXM(PDZZ(:,:,IKB:IKB+2)) CALL MXM_DEVICE(PDZZ(:,:,IKB:IKB+2),ZDZZ(:,:,:)) #endif !$acc kernels async(3) -#ifdef MNH_COMPILER_NVHPC -!$acc loop independent collapse(2) -#endif +!$acc_nv loop independent collapse(2) DO CONCURRENT (JI=1:JIU,JJ=1:JJU) ZCOEFF(JI,JJ,IKB+2)= - ZDZZ(JI,JJ,2) / & ( (ZDZZ(JI,JJ,3)+ZDZZ(JI,JJ,2)) * ZDZZ(JI,JJ,3) ) @@ -493,7 +488,6 @@ DO CONCURRENT (JI=1:JIU,JJ=1:JJU) ( (ZDZZ(JI,JJ,3)+ZDZZ(JI,JJ,2)) * ZDZZ(JI,JJ,2) ) ENDDO !$acc end kernels - ! #ifndef MNH_OPENACC ZDU_DZ_DZS_DX(:,:,:)=MXF ((ZCOEFF(:,:,IKB+2:IKB+2)*PUM(:,:,IKB+2:IKB+2) & @@ -505,9 +499,7 @@ ZDU_DZ_DZS_DX(:,:,:)=MXF ((ZCOEFF(:,:,IKB+2:IKB+2)*PUM(:,:,IKB+2:IKB+2) & ZDZZ(:,:,:) = MYM(PDZZ(:,:,IKB:IKB+2)) #else !$acc kernels async(3) -#ifdef MNH_COMPILER_NVHPC -!$acc loop independent collapse(2) -#endif +!$acc_nv loop independent collapse(2) DO CONCURRENT (JI=1:JIU,JJ=1:JJU) ZTMP1_DEVICE(JI,JJ,1) = (ZCOEFF(JI,JJ,IKB+2)*PUM(JI,JJ,IKB+2) & +ZCOEFF(JI,JJ,IKB+1)*PUM(JI,JJ,IKB+1) & @@ -522,9 +514,7 @@ ENDDO CALL MXF_DEVICE(ZTMP1_DEVICE(:,:,1:1), ZTMP2_DEVICE(:,:,1:1)) CALL MXF_DEVICE(PDXX(:,:,IKB:IKB), ZTMP1_DEVICE(:,:,1:1)) !$acc kernels async(3) -#ifdef MNH_COMPILER_NVHPC -!$acc loop independent collapse(2) -#endif +!$acc_nv loop independent collapse(2) DO CONCURRENT (JI=1:JIU,JJ=1:JJU) ZDU_DZ_DZS_DX(JI,JJ,1) = ZTMP2_DEVICE(JI,JJ,1) / ZTMP1_DEVICE(JI,JJ,1) ENDDO @@ -533,9 +523,7 @@ ENDDO CALL MYM_DEVICE(PDZZ(:,:,IKB:IKB+2),ZDZZ(:,:,:)) #endif !$acc kernels async(4) -#ifdef MNH_COMPILER_NVHPC -!$acc loop independent collapse(2) -#endif +!$acc_nv loop independent collapse(2) DO CONCURRENT (JI=1:JIU,JJ=1:JJU) ZCOEFF(JI,JJ,IKB+2)= - ZDZZ(JI,JJ,2) / & ( (ZDZZ(JI,JJ,3)+ZDZZ(JI,JJ,2)) * ZDZZ(JI,JJ,3) ) @@ -544,10 +532,8 @@ DO CONCURRENT (JI=1:JIU,JJ=1:JJU) ZCOEFF(JI,JJ,IKB)= - (ZDZZ(JI,JJ,3)+2.*ZDZZ(JI,JJ,2)) / & ( (ZDZZ(JI,JJ,3)+ZDZZ(JI,JJ,2)) * ZDZZ(JI,JJ,2) ) ENDDO - !$acc end kernels ! - #ifndef MNH_OPENACC ZDV_DZ_DZS_DY(:,:,:)=MYF ((ZCOEFF(:,:,IKB+2:IKB+2)*PVM(:,:,IKB+2:IKB+2) & +ZCOEFF(:,:,IKB+1:IKB+1)*PVM(:,:,IKB+1:IKB+1) & @@ -556,9 +542,7 @@ ZDV_DZ_DZS_DY(:,:,:)=MYF ((ZCOEFF(:,:,IKB+2:IKB+2)*PVM(:,:,IKB+2:IKB+2) & )/ MYF(PDYY(:,:,IKB:IKB)) #else !$acc kernels async(4) -#ifdef MNH_COMPILER_NVHPC -!$acc loop independent collapse(2) -#endif +!$acc_nv loop independent collapse(2) DO CONCURRENT (JI=1:JIU,JJ=1:JJU) ZTMP3_DEVICE(JI,JJ,1) = (ZCOEFF(JI,JJ,IKB+2)*PVM(JI,JJ,IKB+2) & +ZCOEFF(JI,JJ,IKB+1)*PVM(JI,JJ,IKB+1) & @@ -580,11 +564,7 @@ ZDV_DY(:,:,:)= DYF(PVM(:,:,IKB:IKB)) / MYF(PDYY(:,:,IKB:IKB)) & #else CALL MYF_DEVICE(ZTMP3_DEVICE(:,:,1:1), ZTMP4_DEVICE(:,:,1:1)) CALL MYF_DEVICE(PDYY(:,:,IKB:IKB), ZTMP3_DEVICE(:,:,1:1)) -#ifdef MNH_COMPILER_CCE -!$acc kernels present(ZDV_DZ_DZS_DY) async(4) -#else -!$acc kernels async(4) -#endif +!$acc kernels async(4) present_cr(ZDV_DZ_DZS_DY) ZDV_DZ_DZS_DY(:,:,1)= ZTMP4_DEVICE(:,:,1) / ZTMP3_DEVICE(:,:,1) !$acc end kernels ! @@ -595,9 +575,7 @@ ZDV_DZ_DZS_DY(:,:,1)= ZTMP4_DEVICE(:,:,1) / ZTMP3_DEVICE(:,:,1) CALL DXF_DEVICE(PUM(:,:,IKB:IKB),ZTMP1_DEVICE(:,:,1:1)) CALL MXF_DEVICE(PDXX(:,:,IKB:IKB),ZTMP2_DEVICE(:,:,1:1)) !$acc kernels async(3) -#ifdef MNH_COMPILER_NVHPC -!$acc loop independent collapse(2) -#endif +!$acc_nv loop independent collapse(2) DO CONCURRENT (JI=1:JIU,JJ=1:JJU) ZDU_DX(JI,JJ,1)= ZTMP1_DEVICE(JI,JJ,1) / ZTMP2_DEVICE(JI,JJ,1) - ZDU_DZ_DZS_DX(JI,JJ,1) ENDDO @@ -609,9 +587,7 @@ ENDDO CALL DYF_DEVICE(PVM(:,:,IKB:IKB),ZTMP3_DEVICE(:,:,1:1)) CALL MYF_DEVICE(PDYY(:,:,IKB:IKB),ZTMP4_DEVICE(:,:,1:1)) !$acc kernels async(4) -#ifdef MNH_COMPILER_NVHPC -!$acc loop independent collapse(2) -#endif +!$acc_nv loop independent collapse(2) DO CONCURRENT (JI=1:JIU,JJ=1:JJU) ZDV_DY(JI,JJ,1)= ZTMP3_DEVICE(JI,JJ,1) / ZTMP4_DEVICE(JI,JJ,1) - ZDV_DZ_DZS_DY(JI,JJ,1) ENDDO @@ -642,9 +618,7 @@ ENDDO ! du coup je ne peux pas faire de update self asynchrone... ! !$acc kernels async(3) -#ifdef MNH_COMPILER_NVHPC -!$acc loop independent collapse(2) -#endif +!$acc_nv loop independent collapse(2) DO CONCURRENT (JI=1:JIU,JJ=1:JJU) ZFLX(JI,JJ,IKB) = (2./3.) * PTKEM(JI,JJ,IKB) & - XCMFS * PK(JI,JJ,IKB) * 2. * ZDU_DX(JI,JJ,1) @@ -661,11 +635,7 @@ ENDDO !!! wait for the computation of ZDIRSINZW !$acc wait(1) ! -#ifdef MNH_COMPILER_CCE -!$acc kernels present(ZFLX) async(4) -#else -!$acc kernels async(4) -#endif +!$acc kernels async(4) present_cr(ZFLX) #ifndef MNH_BITREP ZFLX(:,:,IKB-1) = & PTAU11M(:,:) * PCOSSLOPE(:,:)**2 * PDIRCOSZW(:,:)**2 & @@ -677,9 +647,7 @@ ZFLX(:,:,IKB-1) = & - PUSLOPEM(:,:) * PCOSSLOPE(:,:)**2 * ZDIRSINZW(:,:) * PDIRCOSZW(:,:) ) #else !PW: BUG: commented 'acc loop independent collapse(2)' to workaround compiler bug (NVHPC 21.1) -#ifdef MNH_COMPILER_NVHPC -!$acc loop independent collapse(2) -#endif +!$acc_nv loop independent collapse(2) DO CONCURRENT ( JI=1:JIU,JJ=1:JJU ) ZFLX(JI,JJ,IKB-1) = & PTAU11M(JI,JJ) * BR_P2(PCOSSLOPE(JI,JJ)) * BR_P2(PDIRCOSZW(JI,JJ)) & @@ -697,9 +665,7 @@ END DO ! CONCURRENT !$acc wait(3) async(4) ! !$acc kernels async(4) -#ifdef MNH_COMPILER_NVHPC -!$acc loop independent collapse(2) -#endif +!$acc_nv loop independent collapse(2) DO CONCURRENT (JI=1:JIU,JJ=1:JJU) ZFLX(JI,JJ,IKB-1) = 2. * ZFLX(JI,JJ,IKB-1) - ZFLX(JI,JJ,IKB) ENDDO @@ -825,10 +791,8 @@ IF (KSPLT==1) THEN ! ! evaluate the dynamic production at w(IKB+1) in PDP(IKB) ! - !$acc kernels async(2) -#ifdef MNH_COMPILER_NVHPC - !$acc loop independent collapse(2) -#endif + !$acc kernels async(2) + !$acc_nv loop independent collapse(2) DO CONCURRENT (JI=1:JIU,JJ=1:JJU) ZWORK(JI,JJ,IKB) = 0.5* ( -ZFLX(JI,JJ,IKB)*ZDU_DX(JI,JJ,1) + ZWORK(JI,JJ,IKB+1) ) ENDDO @@ -888,9 +852,7 @@ IF (.NOT. L2D) THEN ! ELSE !$acc kernels async(3) -#ifdef MNH_COMPILER_NVHPC - !$acc loop independent collapse(3) -#endif + !$acc_nv loop independent collapse(3) DO CONCURRENT (JI=1:JIU,JJ=1:JJU,JK=1:JKU) ZFLX(JI,JJ,JK)= (2./3.) * PTKEM(JI,JJ,JK) & - XCMFS * PK(JI,JJ,JK) *(-(2./3.) * ( GX_U_M_PUM(JI,JJ,JK) & @@ -911,9 +873,7 @@ ZFLX(:,:,IKE+1) = ZFLX(:,:,IKE) ! ! !$acc update self(ZFLX(:,:,IKB+1:)) async(10) ! !$acc kernels async(3) -#ifdef MNH_COMPILER_NVHPC -!$acc loop independent collapse(2) -#endif +!$acc_nv loop independent collapse(2) DO CONCURRENT (JI=1:JIU,JJ=1:JJU) ZFLX(JI,JJ,IKB) = (2./3.) * PTKEM(JI,JJ,IKB) & - XCMFS * PK(JI,JJ,IKB) * 2. * ZDV_DY(JI,JJ,1) @@ -925,11 +885,7 @@ ENDDO !! (-2./3.) * PTP(:,:,IKB:IKB) ! ! extrapolates this flux under the ground with the surface flux -#ifdef MNH_COMPILER_CCE -!$acc kernels present(ZFLX) async(3) -#else -!$acc kernels async(3) -#endif +!$acc kernels async(3) present_cr(ZFLX) #ifndef MNH_BITREP ZFLX(:,:,IKB-1) = & PTAU11M(:,:) * PSINSLOPE(:,:)**2 * PDIRCOSZW(:,:)**2 & @@ -941,9 +897,7 @@ ZFLX(:,:,IKB-1) = & + PVSLOPEM(:,:) * PCOSSLOPE(:,:) * PSINSLOPE(:,:) * ZDIRSINZW(:,:) ) #else !PW: BUG: commented 'acc loop independent collapse(2)' to workaround compiler bug (NVHPC 21.1) -#ifdef MNH_COMPILER_NVHPC -!$acc loop independent collapse(2) -#endif +!$acc_nv loop independent collapse(2) DO CONCURRENT ( JI=1:JIU,JJ=1:JJU ) ZFLX(JI,JJ,IKB-1) = & PTAU11M(JI,JJ) * BR_P2(PSINSLOPE(JI,JJ)) * BR_P2(PDIRCOSZW(JI,JJ)) & @@ -1092,9 +1046,7 @@ IF (KSPLT==1) THEN ! evaluate the dynamic production at w(IKB+1) in PDP(IKB) ! !$acc kernels async(2) -#ifdef MNH_COMPILER_NVHPC - !$acc loop independent collapse(2) -#endif + !$acc_nv loop independent collapse(2) DO CONCURRENT (JI=1:JIU,JJ=1:JJU) ZWORK(JI,JJ,IKB) = 0.5* ( -ZFLX(JI,JJ,IKB)*ZDV_DY(JI,JJ,1) + ZWORK(JI,JJ,IKB+1) ) ENDDO @@ -1119,9 +1071,7 @@ IF (LLES_CALL .AND. KSPLT==1) THEN !$acc wait(2) ! !$acc kernels -#ifdef MNH_COMPILER_NVHPC - !$acc loop independent collapse(3) -#endif + !$acc_nv loop independent collapse(3) DO CONCURRENT (JI=1:JIU,JJ=1:JJU,JK=1:JKU) ZTMP1_DEVICE(JI,JJ,JK) = -ZWORK(JI,JJ,JK) ENDDO @@ -1154,9 +1104,7 @@ IF (.NOT. L2D) THEN !! -2.* XCMFB * PLM / SQRT(PTKEM) * (-2./3.) * PTP ELSE !$acc kernels async(2) -#ifdef MNH_COMPILER_NVHPC - !$acc loop independent collapse(3) -#endif + !$acc_nv loop independent collapse(3) DO CONCURRENT (JI=1:JIU,JJ=1:JJU,JK=1:JKU) ZFLX(JI,JJ,JK)= (2./3.) * PTKEM(JI,JJ,JK) & - XCMFS * PK(JI,JJ,JK) *( (4./3.) * GZ_W_M_PWM(JI,JJ,JK) & @@ -1176,9 +1124,7 @@ ZFLX(:,:,IKE+1)= ZFLX(:,:,IKE) ! ! !$acc kernels async(2) -#ifdef MNH_COMPILER_NVHPC -!$acc loop independent collapse(2) -#endif +!$acc_nv loop independent collapse(2) DO CONCURRENT (JI=1:JIU,JJ=1:JJU) ZFLX(JI,JJ,IKB) = (2./3.) * PTKEM(JI,JJ,IKB) & - XCMFS * PK(JI,JJ,IKB) * 2. * ZDW_DZ(JI,JJ,1) @@ -1191,11 +1137,7 @@ ENDDO ! - 2.* XCMFB * PLM(:,:,IKB:IKB) /SQRT(PTKEM(:,:,IKB:IKB)) * & ! (-2./3.) * PTP(:,:,IKB:IKB) ! extrapolates this flux under the ground with the surface flux -#ifdef MNH_COMPILER_CCE -!$acc kernels present(ZFLX) async(3) -#else -!$acc kernels async(3) -#endif +!$acc kernels async(3) present_cr(ZFLX) #ifndef MNH_BITREP ZFLX(:,:,IKB-1) = & PTAU11M(:,:) * ZDIRSINZW(:,:)**2 & @@ -1203,9 +1145,7 @@ ZFLX(:,:,IKB-1) = & +2. * PCDUEFF(:,:)* PUSLOPEM(:,:) * ZDIRSINZW(:,:) * PDIRCOSZW(:,:) #else !PW: BUG: commented 'acc loop independent collapse(2)' to workaround compiler bug (NVHPC 21.1) -#ifdef MNH_COMPILER_NVHPC -!$acc loop independent collapse(2) -#endif +!$acc_nv loop independent collapse(2) DO CONCURRENT ( JI=1:JIU,JJ=1:JJU ) ZFLX(JI,JJ,IKB-1) = & PTAU11M(JI,JJ) * BR_P2(ZDIRSINZW(JI,JJ)) & @@ -1302,9 +1242,7 @@ IF (KSPLT==1) THEN ! evaluate the dynamic production at w(IKB+1) in PDP(IKB) ! !$acc kernels async(2) -#ifdef MNH_COMPILER_NVHPC - !$acc loop independent collapse(2) -#endif + !$acc_nv loop independent collapse(2) DO CONCURRENT (JI=1:JIU,JJ=1:JJU) ZWORK(JI,JJ,IKB) = 0.5* ( -ZFLX(JI,JJ,IKB)*ZDW_DZ(JI,JJ,1) + ZWORK(JI,JJ,IKB+1) ) ENDDO @@ -1347,9 +1285,7 @@ IF (LLES_CALL .AND. KSPLT==1) THEN !$acc wait(2) ! !$acc kernels -#ifdef MNH_COMPILER_NVHPC - !$acc loop independent collapse(3) -#endif + !$acc_nv loop independent collapse(3) DO CONCURRENT (JI=1:JIU,JJ=1:JJU,JK=1:JKU) ZTMP1_DEVICE(JI,JJ,JK) = -ZWORK(JI,JJ,JK) ENDDO @@ -1358,9 +1294,7 @@ IF (LLES_CALL .AND. KSPLT==1) THEN ! CALL GZ_M_M_DEVICE(PTHLM,PDZZ,ZTMP1_DEVICE) !$acc kernels -#ifdef MNH_COMPILER_NVHPC - !$acc loop independent collapse(3) -#endif + !$acc_nv loop independent collapse(3) DO CONCURRENT (JI=1:JIU,JJ=1:JJU,JK=1:JKU) ZTMP2_DEVICE(JI,JJ,JK) = ZTMP1_DEVICE(JI,JJ,JK) * ZFLX(JI,JJ,JK) ENDDO @@ -1370,9 +1304,7 @@ IF (LLES_CALL .AND. KSPLT==1) THEN CALL GZ_M_W_DEVICE(1,IKU,1,PTHLM,PDZZ,ZTMP1_DEVICE) CALL MZF_DEVICE( ZTMP1_DEVICE, ZTMP2_DEVICE ) !$acc kernels -#ifdef MNH_COMPILER_NVHPC - !$acc loop independent collapse(3) -#endif + !$acc_nv loop independent collapse(3) DO CONCURRENT (JI=1:JIU,JJ=1:JJU,JK=1:JKU) ZTMP3_DEVICE(JI,JJ,JK) = ZFLX(JI,JJ,JK)*ZTMP2_DEVICE(JI,JJ,JK) ENDDO @@ -1386,9 +1318,7 @@ IF (LLES_CALL .AND. KSPLT==1) THEN ! CALL GZ_M_M_DEVICE(PRM(:,:,:,1),PDZZ,ZTMP1_DEVICE) !$acc kernels -#ifdef MNH_COMPILER_NVHPC - !$acc loop independent collapse(3) -#endif + !$acc_nv loop independent collapse(3) DO CONCURRENT (JI=1:JIU,JJ=1:JJU,JK=1:JKU) ZTMP2_DEVICE(JI,JJ,JK) = ZTMP1_DEVICE(JI,JJ,JK)*ZFLX(JI,JJ,JK) ENDDO @@ -1398,9 +1328,7 @@ IF (LLES_CALL .AND. KSPLT==1) THEN CALL GZ_M_W_DEVICE(1,IKU,1,PRM(:,:,:,1),PDZZ,ZTMP1_DEVICE) CALL MZF_DEVICE( ZTMP1_DEVICE, ZTMP2_DEVICE ) !$acc kernels -#ifdef MNH_COMPILER_NVHPC - !$acc loop independent collapse(3) -#endif + !$acc_nv loop independent collapse(3) DO CONCURRENT (JI=1:JIU,JJ=1:JJU,JK=1:JKU) ZTMP3_DEVICE(JI,JJ,JK) = ZFLX(JI,JJ,JK)*ZTMP2_DEVICE(JI,JJ,JK) ENDDO @@ -1415,9 +1343,7 @@ IF (LLES_CALL .AND. KSPLT==1) THEN ! CALL GZ_M_M_DEVICE(PSVM(:,:,:,JSV),PDZZ,ZTMP1_DEVICE) !$acc kernels -#ifdef MNH_COMPILER_NVHPC - !$acc loop independent collapse(3) -#endif + !$acc_nv loop independent collapse(3) DO CONCURRENT (JI=1:JIU,JJ=1:JJU,JK=1:JKU) ZTMP2_DEVICE(JI,JJ,JK) = ZTMP1_DEVICE(JI,JJ,JK)*ZFLX(JI,JJ,JK) ENDDO @@ -1426,11 +1352,9 @@ IF (LLES_CALL .AND. KSPLT==1) THEN X_LES_RES_ddxa_Sv_SBG_UaW(:,:,:,JSV) , .TRUE.) ! CALL GZ_M_W_DEVICE(1,IKU,1,PSVM(:,:,:,JSV),PDZZ,ZTMP1_DEVICE) - CALL MZF_DEVICE( ZTMP1_DEVICE, ZTMP2_DEVICE ) + CALL MZF_DEVICE(ZTMP1_DEVICE,ZTMP2_DEVICE) !$acc kernels -#ifdef MNH_COMPILER_NVHPC - !$acc loop independent collapse(3) -#endif + !$acc_nv loop independent collapse(3) DO CONCURRENT (JI=1:JIU,JJ=1:JJU,JK=1:JKU) ZTMP3_DEVICE(JI,JJ,JK) = ZFLX(JI,JJ,JK)*ZTMP2_DEVICE(JI,JJ,JK) ENDDO -- GitLab