From faa1f1adb34c4aefa529c8a9d6ab2e8f69200a39 Mon Sep 17 00:00:00 2001 From: Juan Escobar <juan.escobar@aero.obs-mip.fr> Date: Wed, 19 May 2021 16:33:51 +0200 Subject: [PATCH] Juan 19/05/2021: add loop independent collapse , for all do conccurent & BR_ routine , or the do loop seq !!! --- src/MNH/advection_metsv.f90 | 21 ++++-- src/MNH/emoist.f90 | 2 + src/MNH/gradient_u.f90 | 11 ++- src/MNH/gradient_v.f90 | 5 ++ src/MNH/mode_prandtl.f90 | 26 ++++--- src/MNH/prandtl.f90 | 11 ++- src/MNH/rain_ice.f90 | 29 +++++--- src/MNH/rain_ice_nucleation.f90 | 1 + src/MNH/resolved_cloud.f90 | 1 + src/MNH/shuman_device.f90 | 16 +++- src/MNH/tke_eps_sources.f90 | 13 +++- src/MNH/tridiag_thermo.f90 | 18 ++++- src/MNH/tridiag_tke.f90 | 10 ++- src/MNH/tridiag_w.f90 | 16 +++- src/MNH/tridiag_wind.f90 | 4 +- src/MNH/turb.f90 | 11 ++- src/MNH/turb_hor_dyn_corr.f90 | 24 +++++- src/MNH/turb_hor_thermo_flux.f90 | 122 +++++++++++++++++++------------ src/MNH/turb_hor_tke.f90 | 24 +++++- src/MNH/turb_hor_uv.f90 | 23 +++++- src/MNH/turb_hor_uw.f90 | 11 +++ src/MNH/turb_hor_vw.f90 | 11 +++ src/MNH/turb_ver.f90 | 1 + src/MNH/turb_ver_dyn_flux.f90 | 100 ++++++++++++++++++------- src/MNH/turb_ver_thermo_corr.f90 | 97 +++++++++++++++--------- src/MNH/turb_ver_thermo_flux.f90 | 19 ++++- src/Makefile.MESONH.mk | 2 +- 27 files changed, 470 insertions(+), 159 deletions(-) diff --git a/src/MNH/advection_metsv.f90 b/src/MNH/advection_metsv.f90 index 73c8cd0aa..4a69e687f 100644 --- a/src/MNH/advection_metsv.f90 +++ b/src/MNH/advection_metsv.f90 @@ -527,12 +527,18 @@ IF (.NOT. L1D) THEN END IF #else IF (.NOT. L2D) THEN - !$acc kernels - ZCFL(:,:,:) = SQRT(BR_P2(ZCFLU(:,:,:))+BR_P2(ZCFLV(:,:,:))+BR_P2(ZCFLW(:,:,:))) + !$acc kernels + !$acc loop independent collapse(3) + DO CONCURRENT ( JI=1:JIU,JJ=1:JJU,JK=1:JKU ) + ZCFL(JI,JJ,JK) = SQRT(BR_P2(ZCFLU(JI,JJ,JK))+BR_P2(ZCFLV(JI,JJ,JK))+BR_P2(ZCFLW(JI,JJ,JK))) + END DO !$acc end kernels ELSE - !$acc kernels - ZCFL(:,:,:) = SQRT(BR_P2(ZCFLU(:,:,:))+BR_P2(ZCFLW(:,:,:))) + !$acc kernels + !$acc loop independent collapse(3) + DO CONCURRENT ( JI=1:JIU,JJ=1:JJU,JK=1:JKU ) + ZCFL(JI,JJ,JK) = SQRT(BR_P2(ZCFLU(JI,JJ,JK))+BR_P2(ZCFLW(JI,JJ,JK))) + END DO !$acc end kernels END IF #endif @@ -543,7 +549,10 @@ ELSE #ifndef MNH_BITREP ZCFL(:,:,:) = SQRT(ZCFLW(:,:,:)**2) #else - ZCFL(:,:,:) = SQRT(BR_P2(ZCFLW(:,:,:))) + !$acc loop independent collapse(3) + DO CONCURRENT ( JI=1:JIU,JJ=1:JJU,JK=1:JKU ) + ZCFL(JI,JJ,JK) = SQRT(BR_P2(ZCFLW(JI,JJ,JK))) + END DO #endif !$acc end kernels END IF @@ -859,12 +868,14 @@ DO JSPL=1,KSPLIT !$acc end kernels END IF !$acc kernels + !$acc loop independent collapse(4) DO CONCURRENT ( JI=1:JIU,JJ=1:JJU,JK=1:JKU, JR=1:KRR ) ZR(JI,JJ,JK,JR) = ZR(JI,JJ,JK,JR) + ( ZRRS_PPM(JI,JJ,JK,JR) + ZRRS_OTHER(JI,JJ,JK,JR) + PRRS_CLD(JI,JJ,JK,JR) ) & * ZTSTEP_PPM / PRHODJ(JI,JJ,JK) END DO !CONCURRENT !$acc loop seq DO JSV = 1, KSV + !$acc loop independent collapse(3) DO CONCURRENT ( JI=1:JIU,JJ=1:JJU,JK=1:JKU) ZSV(JI,JJ,JK,JSV) = ZSV(JI,JJ,JK,JSV) + ( ZRSVS_PPM(JI,JJ,JK,JSV) + ZRSVS_OTHER(JI,JJ,JK,JSV) + & PRSVS_CLD(JI,JJ,JK,JSV) ) * ZTSTEP_PPM / PRHODJ(JI,JJ,JK) diff --git a/src/MNH/emoist.f90 b/src/MNH/emoist.f90 index d08b2042e..7116d12d4 100644 --- a/src/MNH/emoist.f90 +++ b/src/MNH/emoist.f90 @@ -191,6 +191,7 @@ ELSE ! liquid water & ice present DO JRR=5,KRR ZRW(1:JIU,1:JJU,1:JKU) = ZRW(1:JIU,1:JJU,1:JKU) + PRM(1:JIU,1:JJU,1:JKU,JRR) ENDDO + !$acc loop independent collapse(3) DO CONCURRENT ( JI=1:JIU,JJ=1:JJU,JK=1:JKU) ZA(JI,JJ,JK) = 1. + ( & ! Compute A (1.+ZDELTA) * (PRM(JI,JJ,JK,1) - PRM(JI,JJ,JK,2) - PRM(JI,JJ,JK,4)) & @@ -218,6 +219,7 @@ ELSE ! liquid water & ice present DO JRR=3,KRR ZRW(1:JIU,1:JJU,1:JKU) = ZRW(1:JIU,1:JJU,1:JKU) + PRM(1:JIU,1:JJU,1:JKU,JRR) ENDDO + !$acc loop independent collapse(3) DO CONCURRENT ( JI=1:JIU,JJ=1:JJU,JK=1:JKU) ZA(JI,JJ,JK) = 1. + ( & ! Compute ZA (1.+ZDELTA) * (PRM(JI,JJ,JK,1) - PRM(JI,JJ,JK,2)) & diff --git a/src/MNH/gradient_u.f90 b/src/MNH/gradient_u.f90 index fa743bbd8..a361dcbed 100644 --- a/src/MNH/gradient_u.f90 +++ b/src/MNH/gradient_u.f90 @@ -244,13 +244,15 @@ iztmp3_device = MNH_ALLOCATE_ZT3D( ztmp3_device,JIU,JJU,JKU ) IF (.NOT. LFLAT) THEN CALL DXF_DEVICE(PA,ZTMP1_DEVICE) CALL DZM_DEVICE(KKA,KKU,KL,PA,ZTMP2_DEVICE) - !$acc kernels loop independent collapse(3) - DO JK=1,JKU ; DO JJ=1,JJU ; DO JI=1,JIU + !$acc kernels + !$acc loop independent collapse(3) + DO CONCURRENT ( JI=1:JIU,JJ=1:JJU,JK=1:JKU) ZTMP3_DEVICE(JI,JJ,JK) = PDZX(JI,JJ,JK) * ZTMP2_DEVICE(JI,JJ,JK) - END DO ; END DO ; END DO + END DO !CONCURRENT !$acc end kernels CALL MXF_DEVICE(ZTMP3_DEVICE,ZTMP2_DEVICE) !$acc kernels + !$acc loop independent collapse(3) DO CONCURRENT ( JI=1:JIU,JJ=1:JJU,JK=1:JKU) ZTMP3_DEVICE(JI,JJ,JK) = ZTMP2_DEVICE(JI,JJ,JK) / PDZZ(JI,JJ,JK) END DO !CONCURRENT @@ -434,6 +436,7 @@ IF (.NOT. LFLAT) THEN CALL DZM_DEVICE(KKA,KKU,KL,PA,ZTMP1_DEVICE) CALL MXM_DEVICE(PDZZ,ZTMP2_DEVICE) !$acc kernels + !$acc loop independent collapse(3) DO CONCURRENT ( JI=1:JIU,JJ=1:JJU,JK=1:JKU) ZTMP3_DEVICE(JI,JJ,JK) = ZTMP1_DEVICE(JI,JJ,JK)/ZTMP2_DEVICE(JI,JJ,JK) END DO !CONCURRENT @@ -441,6 +444,7 @@ IF (.NOT. LFLAT) THEN CALL MYM_DEVICE(ZTMP3_DEVICE,ZTMP1_DEVICE) CALL MXM_DEVICE(PDZY,ZTMP2_DEVICE) !$acc kernels + !$acc loop independent collapse(3) DO CONCURRENT ( JI=1:JIU,JJ=1:JJU,JK=1:JKU) ZTMP3_DEVICE(JI,JJ,JK) = ZTMP1_DEVICE(JI,JJ,JK)*ZTMP2_DEVICE(JI,JJ,JK) END DO !CONCURRENT @@ -449,6 +453,7 @@ IF (.NOT. LFLAT) THEN CALL DYM_DEVICE(PA,ZTMP1_DEVICE) CALL MXM_DEVICE(PDYY,ZTMP3_DEVICE) !$acc kernels + !$acc loop independent collapse(3) DO CONCURRENT ( JI=1:JIU,JJ=1:JJU,JK=1:JKU) PGY_U_UV_DEVICE(JI,JJ,JK)= ( ZTMP1_DEVICE(JI,JJ,JK) - ZTMP2_DEVICE(JI,JJ,JK) ) / ZTMP3_DEVICE(JI,JJ,JK) END DO !CONCURRENT diff --git a/src/MNH/gradient_v.f90 b/src/MNH/gradient_v.f90 index 0f7192cd8..c1a7810be 100644 --- a/src/MNH/gradient_v.f90 +++ b/src/MNH/gradient_v.f90 @@ -247,12 +247,14 @@ IF (.NOT. LFLAT) THEN CALL DYF_DEVICE(PA,ZTMP1_DEVICE) CALL DZM_DEVICE(KKA,KKU,KL,PA,ZTMP2_DEVICE) !$acc kernels + !$acc loop independent collapse(3) DO CONCURRENT ( JI=1:JIU,JJ=1:JJU,JK=1:JKU) ZTMP3_DEVICE(JI,JJ,JK) = PDZY(JI,JJ,JK)*ZTMP2_DEVICE(JI,JJ,JK) END DO !CONCURRENT !$acc end kernels CALL MYF_DEVICE(ZTMP3_DEVICE,ZTMP2_DEVICE) !$acc kernels + !$acc loop independent collapse(3) DO CONCURRENT ( JI=1:JIU,JJ=1:JJU,JK=1:JKU) ZTMP3_DEVICE(JI,JJ,JK) = ZTMP2_DEVICE(JI,JJ,JK)/PDZZ(JI,JJ,JK) END DO !CONCURRENT @@ -435,6 +437,7 @@ IF (.NOT. LFLAT) THEN CALL MYM_DEVICE(PDZZ,ZTMP2_DEVICE) CALL DZM_DEVICE(KKA,KKU,KL,PA,ZTMP3_DEVICE) !$acc kernels + !$acc loop independent collapse(3) DO CONCURRENT ( JI=1:JIU,JJ=1:JJU,JK=1:JKU) ZTMP4_DEVICE(JI,JJ,JK) = ZTMP3_DEVICE(JI,JJ,JK) / ZTMP2_DEVICE(JI,JJ,JK) END DO !CONCURRENT @@ -442,6 +445,7 @@ IF (.NOT. LFLAT) THEN CALL MXM_DEVICE(ZTMP4_DEVICE,ZTMP2_DEVICE) CALL MYM_DEVICE(PDZX,ZTMP3_DEVICE) !$acc kernels + !$acc loop independent collapse(3) DO CONCURRENT ( JI=1:JIU,JJ=1:JJU,JK=1:JKU) ZTMP4_DEVICE(JI,JJ,JK) = ZTMP2_DEVICE(JI,JJ,JK) *ZTMP3_DEVICE(JI,JJ,JK) END DO !CONCURRENT @@ -449,6 +453,7 @@ IF (.NOT. LFLAT) THEN CALL MZF_DEVICE(KKA,KKU,KL,ZTMP4_DEVICE,ZTMP2_DEVICE) CALL MYM_DEVICE(PDXX,ZTMP3_DEVICE) !$acc kernels + !$acc loop independent collapse(3) DO CONCURRENT ( JI=1:JIU,JJ=1:JJU,JK=1:JKU) PGX_V_UV_DEVICE(JI,JJ,JK)= ( ZTMP1_DEVICE(JI,JJ,JK) - ZTMP2_DEVICE(JI,JJ,JK) ) / ZTMP3_DEVICE(JI,JJ,JK) END DO !CONCURRENT diff --git a/src/MNH/mode_prandtl.f90 b/src/MNH/mode_prandtl.f90 index 5ac5ddc57..1a714d1c5 100644 --- a/src/MNH/mode_prandtl.f90 +++ b/src/MNH/mode_prandtl.f90 @@ -122,27 +122,35 @@ igphi3logic = MNH_ALLOCATE_GT3D( gphi3logic, JIU,JJU,JKU ) !$acc kernels IF (HTURBDIM=='3DIM') THEN !* 3DIM case - IF (OUSERV) THEN - ZW1(:,:,:) = 1. + 1.5* (PREDTH1(:,:,:)+PREDR1(:,:,:)) + & + IF (OUSERV) THEN #ifndef MNH_BITREP + ZW1(:,:,:) = 1. + 1.5* (PREDTH1(:,:,:)+PREDR1(:,:,:)) + & ( 0.5 * (PREDTH1(:,:,:)**2+PREDR1(:,:,:)**2) & -#else - ( 0.5 * (BR_P2(PREDTH1(:,:,:))+BR_P2(PREDR1(:,:,:))) & -#endif + PREDTH1(:,:,:) * PREDR1(:,:,:) & - ) + ) +#else + DO CONCURRENT (JI=1:JIU,JJ=1:JJU,JK=1:JKU) + ZW1(JI,JJ,JK) = 1. + 1.5* (PREDTH1(JI,JJ,JK)+PREDR1(JI,JJ,JK)) + & + ( 0.5 * (BR_P2(PREDTH1(JI,JJ,JK))+BR_P2(PREDR1(JI,JJ,JK))) & + + PREDTH1(JI,JJ,JK) * PREDR1(JI,JJ,JK) & + ) + END DO +#endif ZW2(:,:,:) = 0.5 * (PRED2TH3(:,:,:)-PRED2R3(:,:,:)) PPHI3(:,:,:)= 1. - & ( ( (1.+PREDR1(:,:,:)) * & (PRED2THR3(:,:,:) + PRED2TH3(:,:,:)) / PREDTH1(:,:,:) & ) + ZW2(:,:,:) & ) / ZW1(:,:,:) - ELSE + ELSE +#ifndef MNH_BITREP ZW1(:,:,:) = 1. + 1.5* PREDTH1(:,:,:) + & -#ifndef MNH_BITREP 0.5* PREDTH1(:,:,:)**2 #else - 0.5* BR_P2(PREDTH1(:,:,:)) + DO CONCURRENT (JI=1:JIU,JJ=1:JJU,JK=1:JKU) + ZW1(JI,JJ,JK) = 1. + 1.5* PREDTH1(JI,JJ,JK) + & + 0.5* BR_P2(PREDTH1(JI,JJ,JK)) + END DO #endif ZW2(:,:,:) = 0.5* PRED2TH3(:,:,:) PPHI3(:,:,:)= 1. - & diff --git a/src/MNH/prandtl.f90 b/src/MNH/prandtl.f90 index 0b0815ba5..927314266 100644 --- a/src/MNH/prandtl.f90 +++ b/src/MNH/prandtl.f90 @@ -437,7 +437,7 @@ END WHERE WHERE (PREDTH1(:,:,:) < -ZMINVAL) ZW2(:,:,:) = (-ZMINVAL) / (PREDTH1(:,:,:)) END WHERE - +!$acc loop independent collapse(3) DO CONCURRENT ( JI=1:JIU,JJ=1:JJU,JK=1:JKU) ZW2(JI,JJ,JK) = MIN( ZW1(JI,JJ,JK),ZW2(JI,JJ,JK) ) END DO @@ -448,6 +448,7 @@ WHERE (PREDR1(:,:,:)<-ZMINVAL) END WHERE !!$ZW1(:,:,:) = MIN(ZW2(:,:,:),ZW1(:,:,:)) +!$acc loop independent collapse(3) DO CONCURRENT ( JI=1:JIU,JJ=1:JJU,JK=1:JKU) ZW1(JI,JJ,JK) = MIN( ZW2(JI,JJ,JK),ZW1(JI,JJ,JK) ) END DO @@ -550,6 +551,7 @@ ELSE IF (L2D) THEN ! 3D case in a 2D model #ifndef MNH_BITREP ZTMP1_DEVICE(:,:,:) = ZTMP1_DEVICE(:,:,:)**2 #else +!$acc loop independent collapse(3) DO CONCURRENT ( JI=1:JIU,JJ=1:JJU,JK=1:JKU) ZTMP1_DEVICE(JI,JJ,JK) = BR_P2(ZTMP1_DEVICE(JI,JJ,JK)) END DO !CONCURRENT @@ -587,6 +589,7 @@ END DO !CONCURRENT #ifndef MNH_BITREP ZTMP1_DEVICE(:,:,:) = ZTMP1_DEVICE(:,:,:)**2 #else + !$acc loop independent collapse(3) DO CONCURRENT ( JI=1:JIU,JJ=1:JJU,JK=1:JKU) ZTMP1_DEVICE(JI,JJ,JK) = BR_P2(ZTMP1_DEVICE(JI,JJ,JK)) END DO !CONCURRENT @@ -623,6 +626,7 @@ END DO !CONCURRENT CALL GX_M_M_DEVICE(KKA,KKU,KKL,PRM(:,:,:,1),PDXX,PDZZ,PDZX,ZTMP1_DEVICE) CALL GX_M_M_DEVICE(KKA,KKU,KKL,PTHLM ,PDXX,PDZZ,PDZX,ZTMP2_DEVICE) !$acc kernels +!$acc loop independent collapse(3) DO CONCURRENT ( JI=1:JIU,JJ=1:JJU,JK=1:JKU) ZTMP1_DEVICE(JI,JJ,JK) = ZTMP1_DEVICE(JI,JJ,JK) * ZTMP2_DEVICE(JI,JJ,JK) END DO @@ -635,6 +639,7 @@ END DO * PEMOIST(:,:,:) * PETHETA(:,:,:) & * ZTMP2_DEVICE(:,:,:) #else +!$acc loop independent collapse(3) DO CONCURRENT ( JI=1:JIU,JJ=1:JJU,JK=1:JKU) PRED2THR3(JI,JJ,JK)= PREDR1(JI,JJ,JK) * PREDTH1(JI,JJ,JK) + BR_P2(XCTV)*BR_P2(PBLL_O_E(JI,JJ,JK)) * & PEMOIST(JI,JJ,JK) * PETHETA(JI,JJ,JK) * ZTMP2_DEVICE(JI,JJ,JK) @@ -719,6 +724,7 @@ ELSE ! 3D case in a 3D model #ifndef MNH_BITREP ZTMP1_DEVICE(:,:,:) = ZTMP1_DEVICE(:,:,:)**2 + ZTMP2_DEVICE(:,:,:)**2 #else +!$acc loop independent collapse(3) DO CONCURRENT ( JI=1:JIU,JJ=1:JJU,JK=1:JKU) ZTMP1_DEVICE(JI,JJ,JK) = BR_P2(ZTMP1_DEVICE(JI,JJ,JK)) + BR_P2(ZTMP2_DEVICE(JI,JJ,JK)) END DO @@ -759,6 +765,7 @@ END DO #ifndef MNH_BITREP ZTMP1_DEVICE(:,:,:) = ZTMP1_DEVICE(:,:,:)**2 + ZTMP2_DEVICE(:,:,:)**2 #else +!$acc loop independent collapse(3) DO CONCURRENT ( JI=1:JIU,JJ=1:JJU,JK=1:JKU) ZTMP1_DEVICE(JI,JJ,JK) = BR_P2(ZTMP1_DEVICE(JI,JJ,JK)) + BR_P2(ZTMP2_DEVICE(JI,JJ,JK)) END DO @@ -801,6 +808,7 @@ END DO CALL GY_M_M_DEVICE(KKA,KKU,KKL,PRM(:,:,:,1),PDYY,PDZZ,PDZY,ZTMP3_DEVICE) CALL GY_M_M_DEVICE(KKA,KKU,KKL,PTHLM ,PDYY,PDZZ,PDZY,ZTMP4_DEVICE) !$acc kernels +!$acc loop independent collapse(3) DO CONCURRENT ( JI=1:JIU,JJ=1:JJU,JK=1:JKU) ZTMP1_DEVICE(JI,JJ,JK) = ZTMP1_DEVICE(JI,JJ,JK)*ZTMP2_DEVICE(JI,JJ,JK)+ & ZTMP3_DEVICE(JI,JJ,JK)*ZTMP4_DEVICE(JI,JJ,JK) @@ -844,6 +852,7 @@ call Print_msg( NVERB_WARNING, 'GEN', 'PRANDTL', 'OpenACC: L2D=.F. and KRR=0 not #ifndef MNH_BITREP ZTMP1_DEVICE(:,:,:) = ZTMP1_DEVICE(:,:,:)**2 + ZTMP2_DEVICE(:,:,:)**2 #else +!$acc loop independent collapse(3) DO CONCURRENT ( JI=1:JIU,JJ=1:JJU,JK=1:JKU) ZTMP1_DEVICE(JI,JJ,JK) = BR_P2(ZTMP1_DEVICE(JI,JJ,JK)) + BR_P2(ZTMP2_DEVICE(JI,JJ,JK)) END DO diff --git a/src/MNH/rain_ice.f90 b/src/MNH/rain_ice.f90 index d0de2f3ff..6a7c12bdd 100644 --- a/src/MNH/rain_ice.f90 +++ b/src/MNH/rain_ice.f90 @@ -747,7 +747,7 @@ IZRHODJ = MNH_ALLOCATE_ZT1DP(ZRHODJ,0) ! !$acc kernels -!acc loop independent +!$acc loop independent DO CONCURRENT ( JL=1:IMICRO ) ZRVT(JL) = PRVT(I1(JL),I2(JL),I3(JL)) ZRCT(JL) = PRCT(I1(JL),I2(JL),I3(JL)) @@ -790,20 +790,26 @@ IZRHODJ = MNH_ALLOCATE_ZT1DP(ZRHODJ,0) ! ZSIGMA_RC(JL) = MAX(PSIGS(I1(JL),I2(JL),I3(JL)) * 2., 1.E-12) END DO END IF -! - ZZW(:) = ZEXNREF(:)*( XCPD+XCPV*ZRVT(:)+XCL*(ZRCT(:)+ZRRT(:)) & - +XCI*(ZRIT(:)+ZRST(:)+ZRGT(:)) ) - ZLSFACT(:) = (XLSTT+(XCPV-XCI)*(ZZT(:)-XTT))/ZZW(:) ! L_s/(Pi_ref*C_ph) - ZLVFACT(:) = (XLVTT+(XCPV-XCL)*(ZZT(:)-XTT))/ZZW(:) ! L_v/(Pi_ref*C_ph) - + ! + !$acc loop independent + DO CONCURRENT ( JL=1:IMICRO ) + ZZW(JL) = ZEXNREF(JL)*( XCPD+XCPV*ZRVT(JL)+XCL*(ZRCT(JL)+ZRRT(JL)) & + +XCI*(ZRIT(JL)+ZRST(JL)+ZRGT(JL)) ) + ZLSFACT(JL) = (XLSTT+(XCPV-XCI)*(ZZT(JL)-XTT))/ZZW(JL) ! L_s/(Pi_ref*C_ph) + ZLVFACT(JL) = (XLVTT+(XCPV-XCL)*(ZZT(JL)-XTT))/ZZW(JL) ! L_v/(Pi_ref*C_ph) + END DO + #ifndef MNH_BITREP ZZW(:) = EXP( XALPI - XBETAI/ZZT(:) - XGAMI*ALOG(ZZT(:) ) ) + ZSSI(:) = ZRVT(:)*( ZPRES(:)-ZZW(:) ) / ( (XMV/XMD) * ZZW(:) ) - 1.0 #else + !$acc loop independent DO CONCURRENT ( JL=1:IMICRO ) ZZW(JL) = BR_EXP( XALPI - XBETAI/ZZT(JL) - XGAMI*BR_LOG(ZZT(JL) ) ) + ZSSI(JL) = ZRVT(JL)*( ZPRES(JL)-ZZW(JL) ) / ( (XMV/XMD) * ZZW(JL) ) - 1.0 END DO #endif - ZSSI(:) = ZRVT(:)*( ZPRES(:)-ZZW(:) ) / ( (XMV/XMD) * ZZW(:) ) - 1.0 + ! Supersaturation over ice ! IF (LBU_ENABLE .OR. LLES_CALL) THEN @@ -815,7 +821,10 @@ IZRHODJ = MNH_ALLOCATE_ZT1DP(ZRHODJ,0) ! !Cloud water split between high and low content part is done here !according to autoconversion option - ZRCRAUTC(:) = XCRIAUTC/ZRHODREF(:) ! Autoconversion rc threshold + !$acc loop independent + DO CONCURRENT ( JL=1:IMICRO ) + ZRCRAUTC(JL) = XCRIAUTC/ZRHODREF(JL) ! Autoconversion rc threshold + END DO !$acc end kernels #ifdef MNH_OPENACC IF (LBU_ENABLE .OR. LLES_CALL) THEN @@ -1054,6 +1063,7 @@ IZRHODJ = MNH_ALLOCATE_ZT1DP(ZRHODJ,0) ZLBDAR(:) = 0. END WHERE #else + !$acc loop independent DO CONCURRENT ( JL=1:IMICRO ) IF ( ZRRT(JL)>0.0 ) THEN ZLBDAR(JL) = XLBR * BR_POW( ZRHODREF(JL) * MAX( ZRRT(JL), XRTMIN(3) ), XLBEXR ) @@ -1070,6 +1080,7 @@ END DO ! CONCURRENT ZLBDAR_RF(:) = 0. END WHERE #else + !$acc loop independent DO CONCURRENT ( JL=1:IMICRO ) IF ( ZRRT(JL)>0.0 .AND. ZRF(JL)>0.0 ) THEN ZLBDAR_RF(JL) = XLBR * BR_POW( ZRHODREF(JL) * MAX( ZRRT(JL)/ZRF(JL), XRTMIN(3) ), XLBEXR ) diff --git a/src/MNH/rain_ice_nucleation.f90 b/src/MNH/rain_ice_nucleation.f90 index 509e7ddab..88858b403 100644 --- a/src/MNH/rain_ice_nucleation.f90 +++ b/src/MNH/rain_ice_nucleation.f90 @@ -203,6 +203,7 @@ IF( INEGT >= 1 ) THEN ( ZSSI(1:INEGT)/ZUSW(1:INEGT) )**XALPHA1 ) END WHERE #else + !$acc loop independent DO CONCURRENT ( JL=1:INEGT ) IF ( (ZZT(JL)<=XTT-2.0) .AND. (ZZT(JL)>=XTT-5.0) .AND. (ZSSI(JL)>0.0) ) THEN ZZW(JL) = MAX( XNU20 * BR_EXP( -XBETA2 ),XNU10 * BR_EXP( -XBETA1*(ZZT(JL)-XTT) ) * & diff --git a/src/MNH/resolved_cloud.f90 b/src/MNH/resolved_cloud.f90 index 5e19a9116..01e131b70 100644 --- a/src/MNH/resolved_cloud.f90 +++ b/src/MNH/resolved_cloud.f90 @@ -722,6 +722,7 @@ ENDIF ! microphysical routines would save ! computing time ! +!$acc loop independent collapse(3) DO CONCURRENT ( JI=1:JIU,JJ=1:JJU,JK=1:JKU) #ifndef MNH_BITREP ZEXN(JI,JJ,JK) = (PPABST(JI,JJ,JK)/XP00) ** (XRD/XCPD) diff --git a/src/MNH/shuman_device.f90 b/src/MNH/shuman_device.f90 index 380868f8c..0ca7755d4 100644 --- a/src/MNH/shuman_device.f90 +++ b/src/MNH/shuman_device.f90 @@ -167,6 +167,7 @@ IKU = SIZE(PA,3) ! #ifndef _OPT_LINEARIZED_LOOPS !$acc kernels present(PMXF,PA) +!$acc loop independent collapse(3) DO JK = 1, IKU DO JJ = 1, IJU DO JI = 1 + 1, IIU @@ -292,6 +293,7 @@ IKU = SIZE(PA,3) ! #ifndef _OPT_LINEARIZED_LOOPS !$acc kernels present(PA,PMXM) +!$acc loop independent collapse(3) DO JK = 1, IKU DO JJ = 1, IJU DO JI = 1 + 1, IIU @@ -300,6 +302,7 @@ DO JK = 1, IKU ENDDO ENDDO ! +!$acc loop independent collapse(2) DO JK = 1, IKU DO JJ=1,IJU PMXM(1,JJ,JK) = PMXM(IIU-2*JPHEXT+1,JJ,JK) !TODO: voir si ce n'est pas plutot JPHEXT+1 @@ -418,6 +421,7 @@ IKU = SIZE(PA,3) ! !$acc kernels present(PA,PMYF) #ifndef _OPT_LINEARIZED_LOOPS +!$acc loop independent collapse(3) DO JK=1,IKU DO JJ=1,IJU-1 DO JI=1,IIU !TODO: remplacer le 1 par JPHEXT ? @@ -534,6 +538,7 @@ IKU = SIZE(PA,3) ! #ifndef _OPT_LINEARIZED_LOOPS !$acc kernels present(PA,PMYM) +!$acc loop independent collapse(3) DO JK=1,IKU DO JJ=2,IJU !TODO: remplacer le 1+1 par 1+JPHEXT ? DO JI=1,IIU @@ -870,6 +875,7 @@ IKU = SIZE(PA,3) ! #ifndef _OPT_LINEARIZED_LOOPS !$acc kernels present(PA,PDXF) +!$acc loop independent collapse(3) DO JK=1,IKU DO JJ=1,IJU DO JI=1+1,IIU @@ -878,6 +884,7 @@ DO JK=1,IKU END DO END DO ! +!$acc loop independent collapse(2) DO JK=1,IKU DO JJ=1,IJU PDXF(IIU,JJ,JK) = PDXF(2*JPHEXT,JJ,JK) @@ -994,6 +1001,7 @@ IKU = SIZE(PA,3) ! #ifndef _OPT_LINEARIZED_LOOPS !$acc kernels present(PA,PDXM) +!$acc loop independent collapse(3) DO JK=1,IKU DO JJ=1,IJU DO JI=1+1,IIU !TODO: remplacer le 1 par JPHEXT ? @@ -1002,6 +1010,7 @@ DO JK=1,IKU END DO END DO ! +!$acc loop independent collapse(2) DO JK=1,IKU DO JJ=1,IJU PDXM(1,JJ,JK) = PDXM(IIU-2*JPHEXT+1,JJ,JK) !TODO: remplacer -2*JPHEXT+1 par -JPHEXT ? @@ -1119,6 +1128,7 @@ IKU = SIZE(PA,3) ! !$acc kernels present(PA,PDYF) #ifndef _OPT_LINEARIZED_LOOPS +!$acc loop independent collapse(3) DO JK=1,IKU DO JJ=1,IJU-1 !TODO: remplacer le 1 par JPHEXT ? DO JI=1,IIU @@ -1232,6 +1242,7 @@ IKU=SIZE(PA,3) ! #ifndef _OPT_LINEARIZED_LOOPS !$acc kernels present(PA,PDYM) +!$acc loop independent collapse(3) DO JK=1,IKU DO JJ=2,IJU !TODO: remplacer le 2 par JPHEXT+1 ? DO JI=1,IIU @@ -1240,9 +1251,6 @@ DO JK=1,IKU END DO END DO ! -DO JJ=1,JPHEXT - PDYM(:,JJ,:) = PDYM(:,IJU-2*JPHEXT+JJ,:) ! for reprod JPHEXT <> 1 -END DO #else JIJKOR = 1 + IIU JIJKEND = IIU*IJU*IKU @@ -1345,6 +1353,7 @@ IKU = SIZE(PA,3) ! #ifndef _OPT_LINEARIZED_LOOPS !$acc kernels present(PA,PDZF) +!$acc loop independent collapse(3) DO JK=1,IKU-1 !TODO: remplacer le 1 par JPHEXT ? DO JJ=1,IJU DO JI=1,IIU @@ -1459,6 +1468,7 @@ IKU = SIZE(PA,3) ! #ifndef _OPT_LINEARIZED_LOOPS !$acc kernels present(PA,PDZM) +!$acc loop independent collapse(3) DO JK=2,IKU !TODO: remplacer le 1+1 par 1+JPHEXT ? DO JJ=1,IJU DO JI=1,IIU diff --git a/src/MNH/tke_eps_sources.f90 b/src/MNH/tke_eps_sources.f90 index ae9c4b339..17218c59f 100644 --- a/src/MNH/tke_eps_sources.f90 +++ b/src/MNH/tke_eps_sources.f90 @@ -350,7 +350,10 @@ IKE=KKU-JPVEXT_TURB*KKL #ifndef MNH_BITREP ZKEFF(:,:,:) = PLM(:,:,:) * SQRT(PTKEM(:,:,:)) #else -ZKEFF(:,:,:) = PLM(:,:,:) * BR_POW(PTKEM(:,:,:),0.5) +!$acc loop independent collapse(3) +DO CONCURRENT ( JI=1:JIU,JJ=1:JJU,JK=1:JKU) + ZKEFF(JI,JJ,JK) = PLM(JI,JJ,JK) * BR_POW(PTKEM(JI,JJ,JK),0.5) +END DO #endif ! !---------------------------------------------------------------------------- @@ -383,8 +386,12 @@ PDP(:,:,IKB) = PDP(:,:,IKB) * (1. + PDZZ(:,:,IKB+KKL)/PDZZ(:,:,IKB)) #ifndef MNH_BITREP ZFLX(:,:,:) = XCED * SQRT(PTKEM(:,:,:)) / PLEPS(:,:,:) #else -ZFLX(:,:,:) = XCED * BR_POW(PTKEM(:,:,:),0.5) / PLEPS(:,:,:) +!$acc loop independent collapse(3) +DO CONCURRENT ( JI=1:JIU,JJ=1:JJU,JK=1:JKU) + ZFLX(JI,JJ,JK) = XCED * BR_POW(PTKEM(JI,JJ,JK),0.5) / PLEPS(JI,JJ,JK) +END DO #endif +!$acc loop independent collapse(3) DO CONCURRENT ( JI=1:JIU,JJ=1:JJU,JK=1:JKU) ZSOURCE(JI,JJ,JK) = PRTKES(JI,JJ,JK) / PRHODJ(JI,JJ,JK) + PRTKESM(JI,JJ,JK) / PRHODJ(JI,JJ,JK) & - PTKEM(JI,JJ,JK) / PTSTEP & @@ -412,6 +419,7 @@ CALL MZM_DEVICE(PRHODJ,ZTMP2_DEVICE) !Warning: re-used later #ifndef MNH_BITREP ZA(:,:,:) = - PTSTEP * XCET * ZTMP1_DEVICE(:,:,:) * ZTMP2_DEVICE(:,:,:) / PDZZ(:,:,:)**2 #else +!$acc loop independent collapse(3) DO CONCURRENT ( JI=1:JIU,JJ=1:JJU,JK=1:JKU) ZA(JI,JJ,JK) = - PTSTEP * XCET * ZTMP1_DEVICE(JI,JJ,JK) * ZTMP2_DEVICE(JI,JJ,JK) / BR_P2(PDZZ(JI,JJ,JK)) END DO !CONCURRENT @@ -427,6 +435,7 @@ CALL TRIDIAG_TKE(KKA,KKU,KKL,PTKEM,ZA,PTSTEP,PEXPL,PIMPL,PRHODJ,& CALL GET_HALO(ZRES) #else !$acc kernels +!$acc loop independent collapse(3) DO CONCURRENT ( JI=1:JIU,JJ=1:JJU,JK=1:JKU) ZTMP3_DEVICE(JI,JJ,JK) = PTSTEP*ZFLX(JI,JJ,JK) END DO !CONCURRENT diff --git a/src/MNH/tridiag_thermo.f90 b/src/MNH/tridiag_thermo.f90 index eefe1b37a..95e86abe0 100644 --- a/src/MNH/tridiag_thermo.f90 +++ b/src/MNH/tridiag_thermo.f90 @@ -266,6 +266,7 @@ CALL MZM_DEVICE(PRHODJ,ZMZM_RHODJ) #ifndef MNH_BITREP ZRHODJ_DFDDTDZ_O_DZ2 = ZMZM_RHODJ*PDFDDTDZ/PDZZ**2 #else +!$acc loop independent collapse(3) DO CONCURRENT ( JI=1:JIU,JJ=1:JJU,JK=1:JKU) ZRHODJ_DFDDTDZ_O_DZ2(JI,JJ,JK) = ZMZM_RHODJ(JI,JJ,JK)*PDFDDTDZ(JI,JJ,JK)/BR_P2(PDZZ(JI,JJ,JK)) END DO !CONCURRENT @@ -285,6 +286,7 @@ ZY=0. ! --------------------------- ! !$acc kernels ! async +!$acc loop independent collapse(2) DO CONCURRENT ( JI=1:JIU,JJ=1:JJU) ZY(JI,JJ,IKB) = PRHODJ(JI,JJ,IKB)*PVARM(JI,JJ,IKB)/PTSTEP & - ZMZM_RHODJ(JI,JJ,IKB+KKL) * PF(JI,JJ,IKB+KKL)/PDZZ(JI,JJ,IKB+KKL) & @@ -295,6 +297,7 @@ END DO !CONCURRENT !$acc end kernels ! !$acc kernels ! async +!$acc loop independent collapse(3) DO CONCURRENT ( JI=1:JIU,JJ=1:JJU,JK=IKTB+1:IKTE-1) ZY(JI,JJ,JK) = PRHODJ(JI,JJ,JK)*PVARM(JI,JJ,JK)/PTSTEP & - ZMZM_RHODJ(JI,JJ,JK+KKL) * PF(JI,JJ,JK+KKL)/PDZZ(JI,JJ,JK+KKL) & @@ -307,6 +310,7 @@ END DO !CONCURRENT !$acc end kernels ! !$acc kernels ! async +!$acc loop independent collapse(2) DO CONCURRENT ( JI=1:JIU,JJ=1:JJU) ZY(JI,JJ,IKE) = PRHODJ(JI,JJ,IKE)*PVARM(JI,JJ,IKE)/PTSTEP & - ZMZM_RHODJ(JI,JJ,IKE+KKL) * PF(JI,JJ,IKE+KKL)/PDZZ(JI,JJ,IKE+KKL) & @@ -327,6 +331,7 @@ IF ( PIMPL > 1.E-10 ) THEN ! -------------- ! !$acc kernels ! async +!$acc loop independent collapse(2) DO CONCURRENT ( JI=1:JIU,JJ=1:JJU) ZB(JI,JJ,IKB) = PRHODJ(JI,JJ,IKB)/PTSTEP & - ZRHODJ_DFDDTDZ_O_DZ2(JI,JJ,IKB+KKL) * PIMPL @@ -334,12 +339,14 @@ END DO !CONCURRENT !$acc end kernels ! !$acc kernels ! async +!$acc loop independent collapse(2) DO CONCURRENT ( JI=1:JIU,JJ=1:JJU) ZC(JI,JJ,IKB) = ZRHODJ_DFDDTDZ_O_DZ2(JI,JJ,IKB+KKL) * PIMPL END DO !CONCURRENT !$acc end kernels ! !$acc kernels ! async +!$acc loop independent collapse(3) DO CONCURRENT ( JI=1:JIU,JJ=1:JJU,JK=IKTB+1:IKTE-1) ZA(JI,JJ,JK) = ZRHODJ_DFDDTDZ_O_DZ2(JI,JJ,JK) * PIMPL ZB(JI,JJ,JK) = PRHODJ(JI,JJ,JK)/PTSTEP & @@ -350,6 +357,7 @@ END DO !CONCURRENT !$acc end kernels ! !$acc kernels ! async +!$acc loop independent collapse(2) DO CONCURRENT ( JI=1:JIU,JJ=1:JJU) ZA(JI,JJ,IKE) = ZRHODJ_DFDDTDZ_O_DZ2(JI,JJ,IKE ) * PIMPL ZB(JI,JJ,IKE) = PRHODJ(JI,JJ,IKE)/PTSTEP & @@ -364,6 +372,7 @@ END DO !CONCURRENT ! -------- ! !$acc kernels +!$acc loop independent collapse(2) DO CONCURRENT ( JI=1:JIU,JJ=1:JJU) ZBET(JI,JJ) = ZB(JI,JJ,IKB) ! bet = b(ikb) PVARP(JI,JJ,IKB) = ZY(JI,JJ,IKB) / ZBET(JI,JJ) @@ -371,7 +380,8 @@ END DO !CONCURRENT ! !$acc loop seq DO JK = IKB+KKL,IKE-KKL,KKL -!$acc loop gang, vector collapse(2) + !$acc loop independent collapse(2) + ! acc loop gang, vector collapse(2) DO CONCURRENT ( JI=1:JIU,JJ=1:JJU) !DO JJ=1,JJU ! DO JI=1,JIU @@ -386,6 +396,7 @@ DO JK = IKB+KKL,IKE-KKL,KKL END DO !CONCURRENT END DO ! special treatment for the last level +!$acc loop independent collapse(2) DO CONCURRENT ( JI=1:JIU,JJ=1:JJU) ZGAM(JI,JJ,IKE) = ZC(JI,JJ,IKE-KKL) / ZBET(JI,JJ) ! gam(k) = c(k-1) / bet @@ -400,7 +411,8 @@ END DO !CONCURRENT ! !$acc loop seq DO JK = IKE-KKL,IKB,-1*KKL - !$acc loop gang, vector collapse(2) + !$acc loop independent collapse(2) + ! acc loop gang, vector collapse(2) DO CONCURRENT ( JI=1:JIU,JJ=1:JJU) PVARP(JI,JJ,JK) = PVARP(JI,JJ,JK) - ZGAM(JI,JJ,JK+KKL) * PVARP(JI,JJ,JK+KKL) END DO !CONCURRENT @@ -410,6 +422,7 @@ END DO ELSE ! !$acc kernels +!$acc loop independent collapse(3) DO CONCURRENT ( JI=1:JIU,JJ=1:JJU,JK=IKTB:IKTE) PVARP(JI,JJ,JK) = ZY(JI,JJ,JK) * PTSTEP / PRHODJ(JI,JJ,JK) END DO !CONCURRENT @@ -422,6 +435,7 @@ END IF ! ---------------------------------------- ! !$acc kernels +!$acc loop independent collapse(2) DO CONCURRENT ( JI=1:JIU,JJ=1:JJU) PVARP(JI,JJ,KKA)=PVARP(JI,JJ,IKB) PVARP(JI,JJ,KKU)=PVARP(JI,JJ,IKE) diff --git a/src/MNH/tridiag_tke.f90 b/src/MNH/tridiag_tke.f90 index 135d4c319..a81715fb4 100644 --- a/src/MNH/tridiag_tke.f90 +++ b/src/MNH/tridiag_tke.f90 @@ -1,3 +1,4 @@ + !MNH_LIC Copyright 1994-2019 CNRS, Meteo-France and Universite Paul Sabatier !MNH_LIC This is part of the Meso-NH software governed by the CeCILL-C licence !MNH_LIC version 1. See LICENSE, CeCILL-C_V1-en.txt and CeCILL-C_V1-fr.txt @@ -220,12 +221,14 @@ IKE=KKU-JPVEXT_TURB*KKL ! ! +!$acc loop independent collapse(2) DO CONCURRENT ( JI=1:JIU,JJ=1:JJU) ZY(JI,JJ,IKB) = PVARM(JI,JJ,IKB) + PTSTEP*PSOURCE(JI,JJ,IKB) - & PEXPL / PRHODJ(JI,JJ,IKB) * PA(JI,JJ,IKB+KKL) * (PVARM(JI,JJ,IKB+KKL) - PVARM(JI,JJ,IKB)) END DO !CONCURRENT ! DO JK=IKTB+1,IKTE-1 + !$acc loop independent collapse(2) DO CONCURRENT ( JI=1:JIU,JJ=1:JJU) ZY(JI,JJ,JK)= PVARM(JI,JJ,JK) + PTSTEP*PSOURCE(JI,JJ,JK) - & PEXPL / PRHODJ(JI,JJ,JK) * & @@ -236,6 +239,7 @@ DO JK=IKTB+1,IKTE-1 END DO !CONCURRENT END DO ! +!$acc loop independent collapse(2) DO CONCURRENT ( JI=1:JIU,JJ=1:JJU) ZY(JI,JJ,IKE)= PVARM(JI,JJ,IKE) + PTSTEP*PSOURCE(JI,JJ,IKE) + & PEXPL / PRHODJ(JI,JJ,IKE) * PA(JI,JJ,IKE) * (PVARM(JI,JJ,IKE)-PVARM(JI,JJ,IKE-KKL)) @@ -250,6 +254,7 @@ IF ( PIMPL > 1.E-10 ) THEN ! ! going up ! + !$acc loop independent collapse(2) DO CONCURRENT ( JI=1:JIU,JJ=1:JJU) ZBET(JI,JJ) = 1. + PIMPL * (PDIAG(JI,JJ,IKB)-PA(JI,JJ,IKB+KKL) / PRHODJ(JI,JJ,IKB)) ! bet = b(ikb) @@ -276,6 +281,7 @@ IF ( PIMPL > 1.E-10 ) THEN END DO END DO ! special treatment for the last level + !$acc loop independent collapse(2) DO CONCURRENT ( JI=1:JIU,JJ=1:JJU) ZGAM(JI,JJ,IKE) = PIMPL * PA(JI,JJ,IKE) / PRHODJ(JI,JJ,IKE-KKL) / ZBET(JI,JJ) ! gam(k) = c(k-1) / bet @@ -300,7 +306,8 @@ IF ( PIMPL > 1.E-10 ) THEN END DO ! ELSE - ! + ! + !$acc loop independent collapse(2) DO CONCURRENT ( JI=1:JIU,JJ=1:JJU) PVARP(JI,JJ,IKTB:IKTE) = ZY(JI,JJ,IKTB:IKTE) END DO !CONCURRENT @@ -311,6 +318,7 @@ END IF !* 3. FILL THE UPPER AND LOWER EXTERNAL VALUES ! ---------------------------------------- ! +!$acc loop independent collapse(2) DO CONCURRENT ( JI=1:JIU,JJ=1:JJU) PVARP(JI,JJ,KKA)=PVARP(JI,JJ,IKB) PVARP(JI,JJ,KKU)=PVARP(JI,JJ,IKE) diff --git a/src/MNH/tridiag_w.f90 b/src/MNH/tridiag_w.f90 index db89be613..b0815bf07 100644 --- a/src/MNH/tridiag_w.f90 +++ b/src/MNH/tridiag_w.f90 @@ -283,6 +283,7 @@ ZY=0. !!#endif ! !$acc kernels ! async +!$acc loop independent collapse(2) DO CONCURRENT ( JI=1:JIU,JJ=1:JJU) ZY(JI,JJ,IKB) = ZMZM_RHODJ(JI,JJ,IKB)*PVARM(JI,JJ,IKB)/PTSTEP & - PRHODJ(JI,JJ,IKB ) * PF(JI,JJ,IKB )/PMZF_DZZ(JI,JJ,IKB ) & @@ -293,6 +294,7 @@ END DO !CONCURRENT !$acc end kernels ! !$acc kernels ! async +!$acc loop independent collapse(3) DO CONCURRENT ( JI=1:JIU,JJ=1:JJU,JK=IKB+1:IKE-1) ZY(JI,JJ,JK) = ZMZM_RHODJ(JI,JJ,JK)*PVARM(JI,JJ,JK)/PTSTEP & - PRHODJ(JI,JJ,JK ) * PF(JI,JJ,JK )/PMZF_DZZ(JI,JJ,JK ) & @@ -305,6 +307,7 @@ END DO !CONCURRENT !$acc end kernels ! !$acc kernels ! async +!$acc loop independent collapse(2) DO CONCURRENT ( JI=1:JIU,JJ=1:JJU) ZY(JI,JJ,IKE) = ZMZM_RHODJ(JI,JJ,IKE)*PVARM(JI,JJ,IKE)/PTSTEP & - PRHODJ(JI,JJ,IKE ) * PF(JI,JJ,IKE )/PMZF_DZZ(JI,JJ,IKE ) & @@ -329,18 +332,21 @@ END DO !CONCURRENT !! c(k) = + PRHODJ(k) * PDFDDWDZ(k)/PMZF_DZZ(k)**2 ! !$acc kernels ! async +!$acc loop independent collapse(2) DO CONCURRENT ( JI=1:JIU,JJ=1:JJU) ZB(JI,JJ,IKB) = ZMZM_RHODJ(JI,JJ,IKB)/PTSTEP & - ZRHODJ_DFDDWDZ_O_DZ2(JI,JJ,IKB) END DO !CONCURRENT !$acc end kernels !$acc kernels ! async +!$acc loop independent collapse(2) DO CONCURRENT ( JI=1:JIU,JJ=1:JJU) ZC(JI,JJ,IKB) = ZRHODJ_DFDDWDZ_O_DZ2(JI,JJ,IKB) END DO !CONCURRENT !$acc end kernels !$acc kernels ! async +!$acc loop independent collapse(3) DO CONCURRENT ( JI=1:JIU,JJ=1:JJU,JK=IKB+1:IKE-1) ZA(JI,JJ,JK) = ZRHODJ_DFDDWDZ_O_DZ2(JI,JJ,JK-1) ZB(JI,JJ,JK) = ZMZM_RHODJ(JI,JJ,JK)/PTSTEP & @@ -351,11 +357,13 @@ END DO !CONCURRENT !$acc end kernels !$acc kernels ! async +!$acc loop independent collapse(2) DO CONCURRENT ( JI=1:JIU,JJ=1:JJU) ZA(JI,JJ,IKE) = ZRHODJ_DFDDWDZ_O_DZ2(JI,JJ,IKE-1) END DO !CONCURRENT !$acc end kernels !$acc kernels ! async +!$acc loop independent collapse(2) DO CONCURRENT ( JI=1:JIU,JJ=1:JJU) ZB(JI,JJ,IKE) = ZMZM_RHODJ(JI,JJ,IKE)/PTSTEP & - ZRHODJ_DFDDWDZ_O_DZ2(JI,JJ,IKE ) & @@ -370,12 +378,15 @@ END DO !CONCURRENT ! -------- ! !$acc kernels +!$acc loop independent collapse(2) DO CONCURRENT ( JI=1:JIU,JJ=1:JJU) ZBET(JI,JJ) = ZB(JI,JJ,IKB) ! bet = b(ikb) PVARP(JI,JJ,IKB) = ZY(JI,JJ,IKB) / ZBET(JI,JJ) END DO !CONCURRENT - ! +! +!$acc loop seq DO JK = IKB+1,IKE-1 + !$acc loop independent collapse(2) DO CONCURRENT ( JI=1:JIU,JJ=1:JJU) ZGAM(JI,JJ,JK) = ZC(JI,JJ,JK-1) / ZBET(JI,JJ) ! gam(k) = c(k-1) / bet @@ -386,6 +397,7 @@ DO JK = IKB+1,IKE-1 END DO !CONCURRENT END DO ! special treatment for the last level +!$acc loop independent collapse(2) DO CONCURRENT ( JI=1:JIU,JJ=1:JJU) ZGAM(JI,JJ,IKE) = ZC(JI,JJ,IKE-1) / ZBET(JI,JJ) ! gam(k) = c(k-1) / bet @@ -398,7 +410,9 @@ END DO !CONCURRENT !* 3.3 going down ! ---------- ! +!$acc loop seq DO JK = IKE-1,IKB,-1 + !$acc loop independent collapse(2) DO CONCURRENT ( JI=1:JIU,JJ=1:JJU) PVARP(JI,JJ,JK) = PVARP(JI,JJ,JK) - ZGAM(JI,JJ,JK+1) * PVARP(JI,JJ,JK+1) END DO !CONCURRENT diff --git a/src/MNH/tridiag_wind.f90 b/src/MNH/tridiag_wind.f90 index 0152538c2..8ab02c872 100644 --- a/src/MNH/tridiag_wind.f90 +++ b/src/MNH/tridiag_wind.f90 @@ -263,6 +263,7 @@ IF ( PIMPL > 1.E-10 ) THEN ! going up ! !$acc kernels + !$acc loop independent collapse(2) DO CONCURRENT ( JI=1:JIU,JJ=1:JJU) ZBET(JI,JJ) = 1. - PIMPL * ( PA(JI,JJ,IKB+KKL) / PRHODJA(JI,JJ,IKB) & + PCOEFS(JI,JJ) * PTSTEP ) ! bet = b(ikb) @@ -271,7 +272,7 @@ IF ( PIMPL > 1.E-10 ) THEN ! !$acc loop seq DO JK = IKB+KKL,IKE-KKL,KKL - !$acc loop gang, vector collapse(2) + !$acc loop independent gang, vector collapse(2) DO CONCURRENT ( JJ=1:JJU , JI=1:JIU ) ZGAM(JI,JJ,JK) = PIMPL * PA(JI,JJ,JK) / PRHODJA(JI,JJ,JK-KKL) / ZBET(JI,JJ) ! gam(k) = c(k-1) / bet @@ -286,6 +287,7 @@ IF ( PIMPL > 1.E-10 ) THEN END DO ! CONCURRENT END DO ! special treatment for the last level + !$acc loop independent gang, vector collapse(2) DO CONCURRENT ( JI=1:JIU,JJ=1:JJU) ZGAM(JI,JJ,IKE) = PIMPL * PA(JI,JJ,IKE) / PRHODJA(JI,JJ,IKE-KKL) / ZBET(JI,JJ) ! gam(k) = c(k-1) / bet diff --git a/src/MNH/turb.f90 b/src/MNH/turb.f90 index 81531d768..7b4498dd8 100644 --- a/src/MNH/turb.f90 +++ b/src/MNH/turb.f90 @@ -998,8 +998,11 @@ END IF ZCDUEFF(:,:) =-SQRT ( (PSFU(:,:)**2 + PSFV(:,:)**2) / & (XMNH_TINY + ZUSLOPE(:,:)**2 + ZVSLOPE(:,:)**2 ) ) #else - ZCDUEFF(:,:) =-SQRT ( (BR_P2(PSFU(:,:)) + BR_P2(PSFV(:,:))) / & - (XMNH_TINY + BR_P2(ZUSLOPE(:,:)) + BR_P2(ZVSLOPE(:,:)) ) ) + !$acc loop independent collapse(2) + DO CONCURRENT ( JI=1:JIU,JJ=1:JJU ) + ZCDUEFF(JI,JJ) =-SQRT ( (BR_P2(PSFU(JI,JJ)) + BR_P2(PSFV(JI,JJ))) / & + (XMNH_TINY + BR_P2(ZUSLOPE(JI,JJ)) + BR_P2(ZVSLOPE(JI,JJ)) ) ) + END DO #endif !$acc end kernels ! @@ -1728,7 +1731,9 @@ izdrvsatdt = MNH_ALLOCATE_ZT3D( zdrvsatdt, size( pexn, 1 ), size( pexn, 2 ), siz #ifndef MNH_BITREP ZRVSAT(:,:,:) = EXP( PALP - PBETA/PT(:,:,:) - PGAM*ALOG( PT(:,:,:) ) ) #else - ZRVSAT(:,:,:) = BR_EXP( PALP - PBETA/PT(:,:,:) - PGAM*BR_LOG( PT(:,:,:) ) ) + DO CONCURRENT ( JI=1:JIU,JJ=1:JJU,JK=1:JKU) + ZRVSAT(JI,JJ,JK) = BR_EXP( PALP - PBETA/PT(JI,JJ,JK) - PGAM*BR_LOG( PT(JI,JJ,JK) ) ) + END DO #endif ! !* 1.3 saturation mixing ratio at t diff --git a/src/MNH/turb_hor_dyn_corr.f90 b/src/MNH/turb_hor_dyn_corr.f90 index 01ee3ad89..1373e2597 100644 --- a/src/MNH/turb_hor_dyn_corr.f90 +++ b/src/MNH/turb_hor_dyn_corr.f90 @@ -403,7 +403,10 @@ IKU = SIZE(PUM,3) #ifndef MNH_BITREP ZDIRSINZW(:,:) = SQRT( 1. - PDIRCOSZW(:,:)**2 ) #else -ZDIRSINZW(:,:) = SQRT( 1. - BR_P2(PDIRCOSZW(:,:)) ) +!$acc loop independent collapse(2) +DO CONCURRENT ( JI=1:JIU,JJ=1:JJU ) + ZDIRSINZW(JI,JJ) = SQRT( 1. - BR_P2(PDIRCOSZW(JI,JJ)) ) +END DO #endif !$acc end kernels ! @@ -437,6 +440,7 @@ CALL ADD3DFIELD_ll( TZFIELDS_ll, ZFLX, 'TURB_HOR_DYN_CORR::ZFLX' ) ! Computes the U variance IF (.NOT. L2D) THEN !$acc kernels async(2) + !$acc loop independent collapse(3) DO CONCURRENT ( JI=1:JIU,JJ=1:JJU,JK=1:JKU) ZFLX(JI,JJ,JK)= (2./3.) * PTKEM(JI,JJ,JK) & - XCMFS * PK(JI,JJ,JK) *( (4./3.) * GX_U_M_PUM(JI,JJ,JK) & @@ -613,6 +617,7 @@ ZFLX(:,:,IKB-1) = & PVSLOPEM(:,:) * PCOSSLOPE(:,:) * PSINSLOPE(:,:) * ZDIRSINZW(:,:) & - PUSLOPEM(:,:) * PCOSSLOPE(:,:)**2 * ZDIRSINZW(:,:) * PDIRCOSZW(:,:) ) #else +!$acc loop independent collapse(2) DO CONCURRENT ( JI=1:JIU,JJ=1:JJU ) ZFLX(JI,JJ,IKB-1) = & PTAU11M(JI,JJ) * BR_P2(PCOSSLOPE(JI,JJ)) * BR_P2(PDIRCOSZW(JI,JJ)) & @@ -685,6 +690,7 @@ END IF #else CALL MXF_DEVICE(PDXX, ZTMP1_DEVICE) !$acc kernels async(10) +!$acc loop independent collapse(3) DO CONCURRENT ( JI=1:JIU,JJ=1:JJU,JK=1:JKU) ZTMP2_DEVICE(JI,JJ,JK) = PRHODJ(JI,JJ,JK) * ZFLX(JI,JJ,JK) / ZTMP1_DEVICE(JI,JJ,JK) END DO !CONCURRENT @@ -697,18 +703,21 @@ CALL DXM_DEVICE(ZTMP2_DEVICE, ZTMP3_DEVICE) IF (.NOT. LFLAT) THEN CALL MZM_DEVICE(PDXX,ZTMP1_DEVICE) !$acc kernels + !$acc loop independent collapse(3) DO CONCURRENT ( JI=1:JIU,JJ=1:JJU,JK=1:JKU) ZTMP2_DEVICE(JI,JJ,JK) = PRHODJ(JI,JJ,JK) * ZFLX(JI,JJ,JK) END DO !CONCURRENT !$acc end kernels CALL MZM_DEVICE(ZTMP2_DEVICE,ZTMP4_DEVICE) !$acc kernels + !$acc loop independent collapse(3) DO CONCURRENT ( JI=1:JIU,JJ=1:JJU,JK=1:JKU) ZTMP2_DEVICE(JI,JJ,JK) = ZTMP4_DEVICE(JI,JJ,JK) * PINV_PDZZ(JI,JJ,JK) END DO !CONCURRENT !$acc end kernels CALL MXM_DEVICE( ZTMP2_DEVICE, ZTMP4_DEVICE ) !$acc kernels + !$acc loop independent collapse(3) DO CONCURRENT ( JI=1:JIU,JJ=1:JJU,JK=1:JKU) ZTMP2_DEVICE(JI,JJ,JK) = PDZX(JI,JJ,JK) / ZTMP1_DEVICE(JI,JJ,JK) * ZTMP4_DEVICE(JI,JJ,JK) END DO !CONCURRENT @@ -729,6 +738,7 @@ END IF IF (KSPLT==1) THEN ! Contribution to the dynamic production of TKE: !$acc kernels async(2) + !$acc loop independent collapse(3) DO CONCURRENT ( JI=1:JIU,JJ=1:JJU,JK=1:JKU) ZWORK(JI,JJ,JK) = - ZFLX(JI,JJ,JK) * GX_U_M_PUM(JI,JJ,JK) END DO !CONCURRENT @@ -779,6 +789,7 @@ END IF ! Computes the V variance IF (.NOT. L2D) THEN !$acc kernels async(3) + !$acc loop independent collapse(3) DO CONCURRENT ( JI=1:JIU,JJ=1:JJU,JK=1:JKU) ZFLX(JI,JJ,JK)= (2./3.) * PTKEM(JI,JJ,JK) & - XCMFS * PK(JI,JJ,JK) *( (4./3.) * GY_V_M_PVM(JI,JJ,JK) & @@ -829,6 +840,7 @@ ZFLX(:,:,IKB-1) = & PUSLOPEM(:,:) * PSINSLOPE(:,:)**2 * ZDIRSINZW(:,:) * PDIRCOSZW(:,:) & + PVSLOPEM(:,:) * PCOSSLOPE(:,:) * PSINSLOPE(:,:) * ZDIRSINZW(:,:) ) #else +!$acc loop independent collapse(2) DO CONCURRENT ( JI=1:JIU,JJ=1:JJU ) ZFLX(JI,JJ,IKB-1) = & PTAU11M(JI,JJ) * BR_P2(PSINSLOPE(JI,JJ)) * BR_P2(PDIRCOSZW(JI,JJ)) & @@ -896,6 +908,7 @@ IF (.NOT. L2D) THEN #else CALL MYF_DEVICE(PDYY, ZTMP1_DEVICE) !$acc kernels async(10) + !$acc loop independent collapse(3) DO CONCURRENT ( JI=1:JIU,JJ=1:JJU,JK=1:JKU) ZTMP2_DEVICE(JI,JJ,JK) = PRHODJ(JI,JJ,JK) * ZFLX(JI,JJ,JK) / ZTMP1_DEVICE(JI,JJ,JK) END DO !CONCURRENT @@ -908,18 +921,21 @@ IF (.NOT. L2D) THEN IF (.NOT. LFLAT) THEN CALL MZM_DEVICE(PDYY,ZTMP1_DEVICE) !$acc kernels + !$acc loop independent collapse(3) DO CONCURRENT ( JI=1:JIU,JJ=1:JJU,JK=1:JKU) ZTMP2_DEVICE(JI,JJ,JK) = PRHODJ(JI,JJ,JK) * ZFLX(JI,JJ,JK) END DO !CONCURRENT !$acc end kernels CALL MZM_DEVICE(ZTMP2_DEVICE,ZTMP4_DEVICE) !$acc kernels + !$acc loop independent collapse(3) DO CONCURRENT ( JI=1:JIU,JJ=1:JJU,JK=1:JKU) ZTMP2_DEVICE(JI,JJ,JK) = ZTMP4_DEVICE(JI,JJ,JK) * PINV_PDZZ(JI,JJ,JK) END DO !CONCURRENT !$acc end kernels CALL MYM_DEVICE( ZTMP2_DEVICE,ZTMP4_DEVICE ) !$acc kernels + !$acc loop independent collapse(3) DO CONCURRENT ( JI=1:JIU,JJ=1:JJU,JK=1:JKU) ZTMP2_DEVICE(JI,JJ,JK) = PDZY(JI,JJ,JK) / ZTMP1_DEVICE(JI,JJ,JK) * ZTMP4_DEVICE(JI,JJ,JK) END DO !CONCURRENT @@ -932,6 +948,7 @@ IF (.NOT. L2D) THEN !$acc end kernels ELSE !$acc kernels async(1) + !$acc loop independent collapse(3) DO CONCURRENT ( JI=1:JIU,JJ=1:JJU,JK=1:JKU) PRVS(JI,JJ,JK)=PRVS(JI,JJ,JK) - ZTMP3_DEVICE(JI,JJ,JK) END DO !CONCURRENT @@ -940,6 +957,7 @@ IF (.NOT. L2D) THEN ! Contribution to the dynamic production of TKE: IF (KSPLT==1) THEN !$acc kernels async(2) + !$acc loop independent collapse(3) DO CONCURRENT ( JI=1:JIU,JJ=1:JJU,JK=1:JKU) ZWORK(JI,JJ,JK) = - ZFLX(JI,JJ,JK) * GY_V_M_PVM(JI,JJ,JK) END DO !CONCURRENT @@ -995,6 +1013,7 @@ END IF ! Computes the W variance IF (.NOT. L2D) THEN !$acc kernels async(2) + !$acc loop independent collapse(3) DO CONCURRENT ( JI=1:JIU,JJ=1:JJU,JK=1:JKU) ZFLX(JI,JJ,JK) = (2./3.) * PTKEM(JI,JJ,JK) & - XCMFS * PK(JI,JJ,JK) *( (4./3.) * GZ_W_M_PWM(JI,JJ,JK) & @@ -1039,6 +1058,7 @@ ZFLX(:,:,IKB-1) = & + PTAU33M(:,:) * PDIRCOSZW(:,:)**2 & +2. * PCDUEFF(:,:)* PUSLOPEM(:,:) * ZDIRSINZW(:,:) * PDIRCOSZW(:,:) #else +!$acc loop independent collapse(2) DO CONCURRENT ( JI=1:JIU,JJ=1:JJU ) ZFLX(JI,JJ,IKB-1) = & PTAU11M(JI,JJ) * BR_P2(ZDIRSINZW(JI,JJ)) & @@ -1112,6 +1132,7 @@ GZ_W_M_ZWP = GZ_W_M(ZWP,PDZZ) CALL GZ_W_M_DEVICE(1,IKU,1,ZWP,PDZZ,GZ_W_M_ZWP) #endif !$acc kernels async(2) +!$acc loop independent collapse(3) DO CONCURRENT ( JI=1:JIU,JJ=1:JJU,JK=IKB+1:JKU) ZFLX(JI,JJ,JK)=ZFLX(JI,JJ,JK) & - XCMFS * PK(JI,JJ,JK) * (4./3.) * (GZ_W_M_ZWP(JI,JJ,JK) - GZ_W_M_PWM(JI,JJ,JK)) @@ -1121,6 +1142,7 @@ END DO !CONCURRENT IF (KSPLT==1) THEN !Contribution to the dynamic production of TKE: !$acc kernels async(2) + !$acc loop independent collapse(3) DO CONCURRENT ( JI=1:JIU,JJ=1:JJU,JK=1:JKU) ZWORK(JI,JJ,JK) = - ZFLX(JI,JJ,JK) * GZ_W_M_ZWP(JI,JJ,JK) END DO !CONCURRENT diff --git a/src/MNH/turb_hor_thermo_flux.f90 b/src/MNH/turb_hor_thermo_flux.f90 index f589aec72..14cba9ab4 100644 --- a/src/MNH/turb_hor_thermo_flux.f90 +++ b/src/MNH/turb_hor_thermo_flux.f90 @@ -342,6 +342,7 @@ ZFLX(:,:,IKE+1) = ZFLX(:,:,IKE) CALL MXM_DEVICE( PK, ZTMP1_DEVICE ) CALL GX_M_U_DEVICE(1,IKU,1,PTHLM,PDXX,PDZZ,PDZX,ZTMP2_DEVICE) !$acc kernels +!$acc loop independent collapse(3) DO CONCURRENT ( JI=1:JIU,JJ=1:JJU,JK=1:JKU) ZFLX(JI,JJ,JK) = -XCSHF * ZTMP1_DEVICE(JI,JJ,JK) * ZTMP2_DEVICE(JI,JJ,JK) END DO @@ -383,7 +384,10 @@ ZFLX(:,:,IKB-1:IKB-1) = 2. * MXM( SPREAD( PSFTHM(:,:)* PDIRCOSXW(:,:), 3,1) ) - ZFLX(:,:,IKB:IKB) #else !$acc kernels - ZTMP1_DEVICE(:,:,1) = PSFTHM(:,:)* PDIRCOSXW(:,:) +!$acc loop independent collapse(2) +DO CONCURRENT ( JI=1:JIU,JJ=1:JJU ) + ZTMP1_DEVICE(JI,JJ,1) = PSFTHM(JI,JJ)* PDIRCOSXW(JI,JJ) +END DO !$acc end kernels CALL MXM_DEVICE( ZTMP1_DEVICE(:,:,1:1), ZTMP2_DEVICE(:,:,1:1) ) !$acc kernels @@ -404,25 +408,29 @@ END IF #else IF (.NOT. LFLAT) THEN CALL MXM_DEVICE(PRHODJ, ZTMP1_DEVICE) -!$acc kernels -DO CONCURRENT ( JI=1:JIU,JJ=1:JJU,JK=1:JKU) - ZTMP2_DEVICE(JI,JJ,JK) = ZTMP1_DEVICE(JI,JJ,JK) * ZFLX(JI,JJ,JK) * PINV_PDXX(JI,JJ,JK) -END DO -!$acc end kernels + !$acc kernels + !$acc loop independent collapse(3) + DO CONCURRENT ( JI=1:JIU,JJ=1:JJU,JK=1:JKU) + ZTMP2_DEVICE(JI,JJ,JK) = ZTMP1_DEVICE(JI,JJ,JK) * ZFLX(JI,JJ,JK) * PINV_PDXX(JI,JJ,JK) + END DO + !$acc end kernels CALL DXF_DEVICE(ZTMP2_DEVICE, ZTMP3_DEVICE) -!$acc kernels -DO CONCURRENT ( JI=1:JIU,JJ=1:JJU,JK=1:JKU) - ZTMP2_DEVICE(JI,JJ,JK) = ZFLX(JI,JJ,JK) * PINV_PDXX(JI,JJ,JK) -END DO -!$acc end kernels + !$acc kernels + !$acc loop independent collapse(3) + DO CONCURRENT ( JI=1:JIU,JJ=1:JJU,JK=1:JKU) + ZTMP2_DEVICE(JI,JJ,JK) = ZFLX(JI,JJ,JK) * PINV_PDXX(JI,JJ,JK) + END DO + !$acc end kernels CALL MZM_DEVICE(ZTMP2_DEVICE,ZTMP4_DEVICE) -!$acc kernels -DO CONCURRENT ( JI=1:JIU,JJ=1:JJU,JK=1:JKU) - ZTMP2_DEVICE(JI,JJ,JK) = PDZX(JI,JJ,JK)*ZTMP4_DEVICE(JI,JJ,JK) -END DO + !$acc kernels + !$acc loop independent collapse(3) + DO CONCURRENT ( JI=1:JIU,JJ=1:JJU,JK=1:JKU) + ZTMP2_DEVICE(JI,JJ,JK) = PDZX(JI,JJ,JK)*ZTMP4_DEVICE(JI,JJ,JK) + END DO !$acc end kernels CALL MXF_DEVICE(ZTMP2_DEVICE, ZTMP4_DEVICE) !$acc kernels +!$acc loop independent collapse(3) DO CONCURRENT ( JI=1:JIU,JJ=1:JJU,JK=1:JKU) ZTMP2_DEVICE(JI,JJ,JK) = PMZM_PRHODJ(JI,JJ,JK) * ZTMP4_DEVICE(JI,JJ,JK) * PINV_PDZZ(JI,JJ,JK) END DO @@ -768,6 +776,7 @@ END IF CALL MXM_DEVICE( PK, ZTMP1_DEVICE ) CALL GX_M_U_DEVICE(1,IKU,1,PRM(:,:,:,1),PDXX,PDZZ,PDZX,ZTMP2_DEVICE) !$acc kernels +!$acc loop independent collapse(3) DO CONCURRENT ( JI=1:JIU,JJ=1:JJU,JK=1:JKU) ZFLX(JI,JJ,JK) = -XCHF * ZTMP1_DEVICE(JI,JJ,JK) * ZTMP2_DEVICE(JI,JJ,JK) END DO @@ -806,33 +815,38 @@ END DO IF (.NOT. LFLAT) THEN CALL MXM_DEVICE(PRHODJ,ZTMP1_DEVICE) !$acc kernels -DO CONCURRENT ( JI=1:JIU,JJ=1:JJU,JK=1:JKU) - ZTMP2_DEVICE(JI,JJ,JK) = ZTMP1_DEVICE(JI,JJ,JK) * ZFLX(JI,JJ,JK) * PINV_PDXX(JI,JJ,JK) -END DO + !$acc loop independent collapse(3) + DO CONCURRENT ( JI=1:JIU,JJ=1:JJU,JK=1:JKU) + ZTMP2_DEVICE(JI,JJ,JK) = ZTMP1_DEVICE(JI,JJ,JK) * ZFLX(JI,JJ,JK) * PINV_PDXX(JI,JJ,JK) + END DO !$acc end kernels CALL DXF_DEVICE( ZTMP2_DEVICE, ZTMP3_DEVICE ) !$acc kernels -DO CONCURRENT ( JI=1:JIU,JJ=1:JJU,JK=1:JKU) - ZTMP2_DEVICE(JI,JJ,JK) = ZFLX(JI,JJ,JK) * PINV_PDXX(JI,JJ,JK) -END DO + !$acc loop independent collapse(3) + DO CONCURRENT ( JI=1:JIU,JJ=1:JJU,JK=1:JKU) + ZTMP2_DEVICE(JI,JJ,JK) = ZFLX(JI,JJ,JK) * PINV_PDXX(JI,JJ,JK) + END DO !$acc end kernels CALL MZM_DEVICE(ZTMP2_DEVICE,ZTMP4_DEVICE) !$acc kernels -DO CONCURRENT ( JI=1:JIU,JJ=1:JJU,JK=1:JKU) - ZTMP2_DEVICE(JI,JJ,JK) = PDZX(JI,JJ,JK)*ZTMP4_DEVICE(JI,JJ,JK) -END DO + !$acc loop independent collapse(3) + DO CONCURRENT ( JI=1:JIU,JJ=1:JJU,JK=1:JKU) + ZTMP2_DEVICE(JI,JJ,JK) = PDZX(JI,JJ,JK)*ZTMP4_DEVICE(JI,JJ,JK) + END DO !$acc end kernels CALL MXF_DEVICE(ZTMP2_DEVICE,ZTMP4_DEVICE) !$acc kernels -DO CONCURRENT ( JI=1:JIU,JJ=1:JJU,JK=1:JKU) - ZTMP2_DEVICE(JI,JJ,JK) = PMZM_PRHODJ(JI,JJ,JK) * ZTMP4_DEVICE(JI,JJ,JK) * PINV_PDZZ(JI,JJ,JK) -END DO + !$acc loop independent collapse(3) + DO CONCURRENT ( JI=1:JIU,JJ=1:JJU,JK=1:JKU) + ZTMP2_DEVICE(JI,JJ,JK) = PMZM_PRHODJ(JI,JJ,JK) * ZTMP4_DEVICE(JI,JJ,JK) * PINV_PDZZ(JI,JJ,JK) + END DO !$acc end kernels CALL DZF_DEVICE(1,IKU,1, ZTMP2_DEVICE, ZTMP4_DEVICE) !$acc kernels -DO CONCURRENT ( JI=1:JIU,JJ=1:JJU,JK=1:JKU) - PRRS(JI,JJ,JK,1) = PRRS(JI,JJ,JK,1) - ZTMP3_DEVICE(JI,JJ,JK) + ZTMP4_DEVICE(JI,JJ,JK) -END DO + !$acc loop independent collapse(3) + DO CONCURRENT ( JI=1:JIU,JJ=1:JJU,JK=1:JKU) + PRRS(JI,JJ,JK,1) = PRRS(JI,JJ,JK,1) - ZTMP3_DEVICE(JI,JJ,JK) + ZTMP4_DEVICE(JI,JJ,JK) + END DO !$acc end kernels ELSE CALL MXM_DEVICE(PRHODJ,ZTMP1_DEVICE) @@ -1140,9 +1154,10 @@ END IF CALL MYM_DEVICE( PK, ZTMP1_DEVICE ) CALL GY_M_V_DEVICE(1,IKU,1,PTHLM,PDYY,PDZZ,PDZY,ZTMP2_DEVICE) !$acc kernels -DO CONCURRENT ( JI=1:JIU,JJ=1:JJU,JK=1:JKU) - ZFLX(JI,JJ,JK) = -XCSHF * ZTMP1_DEVICE(JI,JJ,JK) * ZTMP2_DEVICE(JI,JJ,JK) -END DO + !$acc loop independent collapse(3) + DO CONCURRENT ( JI=1:JIU,JJ=1:JJU,JK=1:JKU) + ZFLX(JI,JJ,JK) = -XCSHF * ZTMP1_DEVICE(JI,JJ,JK) * ZTMP2_DEVICE(JI,JJ,JK) + END DO ZFLX(:,:,IKE+1) = ZFLX(:,:,IKE) !$acc end kernels ELSE @@ -1186,33 +1201,38 @@ IF (.NOT. L2D) THEN IF (.NOT. LFLAT) THEN CALL MYM_DEVICE(PRHODJ, ZTMP1_DEVICE) !$acc kernels -DO CONCURRENT ( JI=1:JIU,JJ=1:JJU,JK=1:JKU) - ZTMP2_DEVICE(JI,JJ,JK) = ZTMP1_DEVICE(JI,JJ,JK) * ZFLX(JI,JJ,JK) * PINV_PDYY(JI,JJ,JK) -END DO + !$acc loop independent collapse(3) + DO CONCURRENT ( JI=1:JIU,JJ=1:JJU,JK=1:JKU) + ZTMP2_DEVICE(JI,JJ,JK) = ZTMP1_DEVICE(JI,JJ,JK) * ZFLX(JI,JJ,JK) * PINV_PDYY(JI,JJ,JK) + END DO !$acc end kernels CALL DYF_DEVICE( ZTMP2_DEVICE, ZTMP3_DEVICE ) !$acc kernels -DO CONCURRENT ( JI=1:JIU,JJ=1:JJU,JK=1:JKU) - ZTMP1_DEVICE(JI,JJ,JK) = ZFLX(JI,JJ,JK) * PINV_PDYY(JI,JJ,JK) -END DO + !$acc loop independent collapse(3) + DO CONCURRENT ( JI=1:JIU,JJ=1:JJU,JK=1:JKU) + ZTMP1_DEVICE(JI,JJ,JK) = ZFLX(JI,JJ,JK) * PINV_PDYY(JI,JJ,JK) + END DO !$acc end kernels CALL MZM_DEVICE(ZTMP1_DEVICE, ZTMP2_DEVICE) !$acc kernels -DO CONCURRENT ( JI=1:JIU,JJ=1:JJU,JK=1:JKU) - ZTMP1_DEVICE(JI,JJ,JK) = PDZY(JI,JJ,JK)*ZTMP2_DEVICE(JI,JJ,JK) -END DO + !$acc loop independent collapse(3) + DO CONCURRENT ( JI=1:JIU,JJ=1:JJU,JK=1:JKU) + ZTMP1_DEVICE(JI,JJ,JK) = PDZY(JI,JJ,JK)*ZTMP2_DEVICE(JI,JJ,JK) + END DO !$acc end kernels CALL MYF_DEVICE(ZTMP1_DEVICE, ZTMP2_DEVICE) !$acc kernels -DO CONCURRENT ( JI=1:JIU,JJ=1:JJU,JK=1:JKU) - ZTMP1_DEVICE(JI,JJ,JK) = PMZM_PRHODJ(JI,JJ,JK) * ZTMP2_DEVICE(JI,JJ,JK) * PINV_PDZZ(JI,JJ,JK) -END DO + !$acc loop independent collapse(3) + DO CONCURRENT ( JI=1:JIU,JJ=1:JJU,JK=1:JKU) + ZTMP1_DEVICE(JI,JJ,JK) = PMZM_PRHODJ(JI,JJ,JK) * ZTMP2_DEVICE(JI,JJ,JK) * PINV_PDZZ(JI,JJ,JK) + END DO !$acc end kernels CALL DZF_DEVICE(1,IKU,1, ZTMP1_DEVICE, ZTMP2_DEVICE ) !$acc kernels -DO CONCURRENT ( JI=1:JIU,JJ=1:JJU,JK=1:JKU) - PRTHLS(JI,JJ,JK) = PRTHLS(JI,JJ,JK) - ZTMP3_DEVICE(JI,JJ,JK) + ZTMP2_DEVICE(JI,JJ,JK) -END DO + !$acc loop independent collapse(3) + DO CONCURRENT ( JI=1:JIU,JJ=1:JJU,JK=1:JKU) + PRTHLS(JI,JJ,JK) = PRTHLS(JI,JJ,JK) - ZTMP3_DEVICE(JI,JJ,JK) + ZTMP2_DEVICE(JI,JJ,JK) + END DO !$acc end kernels ELSE CALL MYM_DEVICE(PRHODJ, ZTMP1_DEVICE) @@ -1501,6 +1521,7 @@ IF (KRR/=0) THEN CALL MYM_DEVICE( PK, ZTMP1_DEVICE ) CALL GY_M_V_DEVICE(1,IKU,1,PRM(:,:,:,1),PDYY,PDZZ,PDZY, ZTMP2_DEVICE) !$acc kernels + !$acc loop independent collapse(3) DO CONCURRENT ( JI=1:JIU,JJ=1:JJU,JK=1:JKU) ZFLX(JI,JJ,JK) = -XCHF * ZTMP1_DEVICE(JI,JJ,JK) * ZTMP2_DEVICE(JI,JJ,JK) END DO !CONCURRENT @@ -1546,6 +1567,7 @@ IF (KRR/=0) THEN IF (.NOT. LFLAT) THEN CALL MYM_DEVICE(PRHODJ, ZTMP1_DEVICE) !$acc kernels + !$acc loop independent collapse(3) DO CONCURRENT ( JI=1:JIU,JJ=1:JJU,JK=1:JKU) ZTMP2_DEVICE(JI,JJ,JK) = ZTMP1_DEVICE(JI,JJ,JK) * ZFLX(JI,JJ,JK) * PINV_PDYY(JI,JJ,JK) END DO @@ -1553,18 +1575,21 @@ IF (KRR/=0) THEN CALL DYF_DEVICE( ZTMP2_DEVICE, ZTMP3_DEVICE ) ! !$acc kernels + !$acc loop independent collapse(3) DO CONCURRENT ( JI=1:JIU,JJ=1:JJU,JK=1:JKU) ZTMP1_DEVICE(JI,JJ,JK) = ZFLX(JI,JJ,JK) * PINV_PDYY(JI,JJ,JK) END DO !$acc end kernels CALL MZM_DEVICE(ZTMP1_DEVICE,ZTMP2_DEVICE) !$acc kernels + !$acc loop independent collapse(3) DO CONCURRENT ( JI=1:JIU,JJ=1:JJU,JK=1:JKU) ZTMP1_DEVICE(JI,JJ,JK) = PDZY(JI,JJ,JK)*ZTMP2_DEVICE(JI,JJ,JK) END DO !$acc end kernels CALL MYF_DEVICE(ZTMP1_DEVICE,ZTMP2_DEVICE) !$acc kernels + !$acc loop independent collapse(3) DO CONCURRENT ( JI=1:JIU,JJ=1:JJU,JK=1:JKU) ZTMP1_DEVICE(JI,JJ,JK) = PMZM_PRHODJ(JI,JJ,JK) * ZTMP2_DEVICE(JI,JJ,JK) * PINV_PDZZ(JI,JJ,JK) END DO @@ -1572,6 +1597,7 @@ IF (KRR/=0) THEN CALL DZF_DEVICE(1,IKU,1,ZTMP1_DEVICE,ZTMP2_DEVICE ) ! !$acc kernels + !$acc loop independent collapse(3) DO CONCURRENT ( JI=1:JIU,JJ=1:JJU,JK=1:JKU) PRRS(JI,JJ,JK,1) = PRRS(JI,JJ,JK,1) - ZTMP3_DEVICE(JI,JJ,JK) + ZTMP2_DEVICE(JI,JJ,JK) END DO diff --git a/src/MNH/turb_hor_tke.f90 b/src/MNH/turb_hor_tke.f90 index 748a65bc3..6a6174eaf 100644 --- a/src/MNH/turb_hor_tke.f90 +++ b/src/MNH/turb_hor_tke.f90 @@ -226,6 +226,7 @@ ZFLX = -XCET * MXM(PK) * GX_M_U(1,IKU,1,PTKEM,PDXX,PDZZ,PDZX) ! < u'e > CALL MXM_DEVICE(PK,ZTMP1_DEVICE) CALL GX_M_U_DEVICE(1,IKU,1,PTKEM,PDXX,PDZZ,PDZX,ZTMP2_DEVICE) !$acc kernels +!$acc loop independent collapse(3) DO CONCURRENT ( JI=1:JIU,JJ=1:JJU,JK=1:JKU) ZFLX(JI,JJ,JK) = -XCET * ZTMP1_DEVICE(JI,JJ,JK) * ZTMP2_DEVICE(JI,JJ,JK) ! < u'e > END DO !CONCURRENT @@ -233,9 +234,12 @@ END DO !CONCURRENT ! ! special case near the ground ( uncentred gradient ) ! -ZFLX(:,:,IKB) = ZCOEFF(:,:,IKB+2)*PTKEM(:,:,IKB+2) & - + ZCOEFF(:,:,IKB+1)*PTKEM(:,:,IKB+1) & - + ZCOEFF(:,:,IKB )*PTKEM(:,:,IKB ) +!$acc loop independent collapse(2) +DO CONCURRENT ( JI=1:JIU,JJ=1:JJU ) + ZFLX(JI,JJ,IKB) = ZCOEFF(JI,JJ,IKB+2)*PTKEM(JI,JJ,IKB+2) & + + ZCOEFF(JI,JJ,IKB+1)*PTKEM(JI,JJ,IKB+1) & + + ZCOEFF(JI,JJ,IKB )*PTKEM(JI,JJ,IKB ) +END DO !$acc end kernels ! #ifndef MNH_OPENACC @@ -285,30 +289,35 @@ END IF IF (.NOT. LFLAT) THEN CALL MXM_DEVICE(PRHODJ,ZTMP1_DEVICE) !$acc kernels + !$acc loop independent collapse(3) DO CONCURRENT ( JI=1:JIU,JJ=1:JJU,JK=1:JKU) ZTMP2_DEVICE(JI,JJ,JK) = ZTMP1_DEVICE(JI,JJ,JK) * ZFLX(JI,JJ,JK) * PINV_PDXX(JI,JJ,JK) END DO !CONCURRENT !$acc end kernels CALL DXF_DEVICE(ZTMP2_DEVICE,ZTMP1_DEVICE) !$acc kernels + !$acc loop independent collapse(3) DO CONCURRENT ( JI=1:JIU,JJ=1:JJU,JK=1:JKU) ZTMP2_DEVICE(JI,JJ,JK) = ZFLX(JI,JJ,JK)*PINV_PDXX(JI,JJ,JK) END DO !CONCURRENT !$acc end kernels CALL MZM_DEVICE(ZTMP2_DEVICE,ZTMP3_DEVICE) !$acc kernels + !$acc loop independent collapse(3) DO CONCURRENT ( JI=1:JIU,JJ=1:JJU,JK=1:JKU) ZTMP2_DEVICE(JI,JJ,JK) = PDZX(JI,JJ,JK) * ZTMP3_DEVICE(JI,JJ,JK) END DO !CONCURRENT !$acc end kernels CALL MXF_DEVICE( ZTMP2_DEVICE,ZTMP3_DEVICE) !$acc kernels + !$acc loop independent collapse(3) DO CONCURRENT ( JI=1:JIU,JJ=1:JJU,JK=1:JKU) ZTMP2_DEVICE(JI,JJ,JK) = PMZM_PRHODJ(JI,JJ,JK) * ZTMP3_DEVICE(JI,JJ,JK) * PINV_PDZZ(JI,JJ,JK) END DO !CONCURRENT !$acc end kernels CALL DZF_DEVICE(1,IKU,1,ZTMP2_DEVICE,ZTMP3_DEVICE) !$acc kernels + !$acc loop independent collapse(3) DO CONCURRENT ( JI=1:JIU,JJ=1:JJU,JK=1:JKU) PTRH(JI,JJ,JK) =-( ZTMP1_DEVICE(JI,JJ,JK) - ZTMP3_DEVICE(JI,JJ,JK) ) /PRHODJ(JI,JJ,JK) END DO !CONCURRENT @@ -352,6 +361,7 @@ IF (.NOT. L2D) THEN CALL MYM_DEVICE(PK,ZTMP1_DEVICE) CALL GY_M_V_DEVICE(1,IKU,1,PTKEM,PDYY,PDZZ,PDZY,ZTMP2_DEVICE) !$acc kernels + !$acc loop independent collapse(3) DO CONCURRENT ( JI=1:JIU,JJ=1:JJU,JK=1:JKU) ZFLX(JI,JJ,JK) =-XCET * ZTMP1_DEVICE(JI,JJ,JK) * ZTMP2_DEVICE(JI,JJ,JK) ! < v'e > END DO !CONCURRENT @@ -415,32 +425,38 @@ IF (.NOT. L2D) THEN IF (.NOT. LFLAT) THEN CALL MYM_DEVICE(PRHODJ,ZTMP1_DEVICE) !$acc kernels + !$acc loop independent collapse(3) DO CONCURRENT ( JI=1:JIU,JJ=1:JJU,JK=1:JKU) ZTMP2_DEVICE(JI,JJ,JK) = ZTMP1_DEVICE(JI,JJ,JK) * ZFLX(JI,JJ,JK) * PINV_PDYY(JI,JJ,JK) END DO !CONCURRENT !$acc end kernels CALL DYF_DEVICE(ZTMP2_DEVICE,ZTMP1_DEVICE) !$acc kernels + !$acc loop independent collapse(3) DO CONCURRENT ( JI=1:JIU,JJ=1:JJU,JK=1:JKU) ZTMP2_DEVICE(JI,JJ,JK) = ZFLX(JI,JJ,JK)*PINV_PDYY(JI,JJ,JK) END DO !CONCURRENT !$acc end kernels CALL MZM_DEVICE(ZTMP2_DEVICE,ZTMP3_DEVICE) !$acc kernels + !$acc loop independent collapse(3) DO CONCURRENT ( JI=1:JIU,JJ=1:JJU,JK=1:JKU) ZTMP2_DEVICE(JI,JJ,JK) = PDZY(JI,JJ,JK) * ZTMP3_DEVICE(JI,JJ,JK) END DO !CONCURRENT !$acc end kernels CALL MYF_DEVICE(ZTMP2_DEVICE,ZTMP3_DEVICE) !$acc kernels + !$acc loop independent collapse(3) DO CONCURRENT ( JI=1:JIU,JJ=1:JJU,JK=1:JKU) ZTMP2_DEVICE(JI,JJ,JK) = PMZM_PRHODJ(JI,JJ,JK) * ZTMP3_DEVICE(JI,JJ,JK) * PINV_PDZZ(JI,JJ,JK) END DO !CONCURRENT !$acc end kernels CALL DZF_DEVICE(1,IKU,1,ZTMP2_DEVICE,ZTMP3_DEVICE) !$acc kernels + !$acc loop independent collapse(3) DO CONCURRENT ( JI=1:JIU,JJ=1:JJU,JK=1:JKU) - PTRH(JI,JJ,JK) = PTRH(JI,JJ,JK) - ( ZTMP1_DEVICE(JI,JJ,JK) - ZTMP3_DEVICE(JI,JJ,JK) ) /PRHODJ(JI,JJ,JK) + PTRH(JI,JJ,JK) = PTRH(JI,JJ,JK) - ( ZTMP1_DEVICE(JI,JJ,JK) - ZTMP3_DEVICE(JI,JJ,JK) ) & + /PRHODJ(JI,JJ,JK) END DO !CONCURRENT !$acc end kernels ELSE diff --git a/src/MNH/turb_hor_uv.f90 b/src/MNH/turb_hor_uv.f90 index 9866f9adf..d1da623a0 100644 --- a/src/MNH/turb_hor_uv.f90 +++ b/src/MNH/turb_hor_uv.f90 @@ -323,7 +323,10 @@ IKU = SIZE(PUM,3) #ifndef MNH_BITREP ZDIRSINZW(:,:) = SQRT( 1. - PDIRCOSZW(:,:)**2 ) #else -ZDIRSINZW(:,:) = SQRT( 1. - BR_P2(PDIRCOSZW(:,:)) ) +!$acc loop independent collapse(2) +DO CONCURRENT ( JI=1:JIU,JJ=1:JJU ) + ZDIRSINZW(JI,JJ) = SQRT( 1. - BR_P2(PDIRCOSZW(JI,JJ)) ) +END DO #endif !$acc end kernels ! @@ -355,6 +358,7 @@ CALL MXM_DEVICE(PK,ZTMP1_DEVICE) CALL MYM_DEVICE(ZTMP1_DEVICE,ZTMP2_DEVICE) IF (.NOT. L2D) THEN !$acc kernels + !$acc loop independent collapse(3) DO CONCURRENT ( JI=1:JIU,JJ=1:JJU,JK=1:JKU) ZFLX(JI,JJ,JK)= - XCMFS * ZTMP2_DEVICE(JI,JJ,JK) * (GY_U_UV_PUM(JI,JJ,JK) + GX_V_UV_PVM(JI,JJ,JK)) END DO !CONCURRENT @@ -450,6 +454,7 @@ ZFLX(:,:,IKB-1) = & PDIRCOSZW(:,:) * ZDIRSINZW(:,:) & +PVSLOPEM(:,:) * (PCOSSLOPE(:,:)**2 - PSINSLOPE(:,:)**2) * ZDIRSINZW(:,:) ) #else +!$acc loop independent collapse(2) DO CONCURRENT ( JI=1:JIU,JJ=1:JJU ) ZFLX(JI,JJ,IKB-1) = & PTAU11M(JI,JJ) * PCOSSLOPE(JI,JJ) * PSINSLOPE(JI,JJ) * BR_P2(PDIRCOSZW(JI,JJ)) & @@ -507,12 +512,14 @@ END IF #else CALL MYM_DEVICE(PRHODJ,ZTMP1_DEVICE) !$acc kernels +!$acc loop independent collapse(3) DO CONCURRENT ( JI=1:JIU,JJ=1:JJU,JK=1:JKU) ZTMP2_DEVICE(JI,JJ,JK) = ZTMP1_DEVICE(JI,JJ,JK) * PINV_PDYY(JI,JJ,JK) END DO !CONCURRENT !$acc end kernels CALL MXM_DEVICE(ZTMP2_DEVICE,ZTMP1_DEVICE) !$acc kernels +!$acc loop independent collapse(3) DO CONCURRENT ( JI=1:JIU,JJ=1:JJU,JK=1:JKU) ZTMP2_DEVICE(JI,JJ,JK) = ZFLX(JI,JJ,JK) * ZTMP1_DEVICE(JI,JJ,JK) END DO !CONCURRENT @@ -522,30 +529,35 @@ IF (.NOT. LFLAT) THEN CALL MZM_DEVICE(ZFLX,ZTMP2_DEVICE) CALL MZM_DEVICE(PDYY,ZTMP3_DEVICE) !$acc kernels + !$acc loop independent collapse(3) DO CONCURRENT ( JI=1:JIU,JJ=1:JJU,JK=1:JKU) ZTMP4_DEVICE(JI,JJ,JK) = PDZY(JI,JJ,JK)/ZTMP3_DEVICE(JI,JJ,JK) END DO !CONCURRENT !$acc end kernels CALL MXM_DEVICE(ZTMP4_DEVICE,ZTMP5_DEVICE) !$acc kernels + !$acc loop independent collapse(3) DO CONCURRENT ( JI=1:JIU,JJ=1:JJU,JK=1:JKU) ZTMP4_DEVICE(JI,JJ,JK) = ZTMP2_DEVICE(JI,JJ,JK)*ZTMP5_DEVICE(JI,JJ,JK) END DO !CONCURRENT !$acc end kernels CALL MYF_DEVICE(ZTMP4_DEVICE,ZTMP2_DEVICE) !$acc kernels + !$acc loop independent collapse(3) DO CONCURRENT ( JI=1:JIU,JJ=1:JJU,JK=1:JKU) ZTMP3_DEVICE(JI,JJ,JK) = PMZM_PRHODJ(JI,JJ,JK) * PINV_PDZZ(JI,JJ,JK) END DO !CONCURRENT !$acc end kernels CALL MXM_DEVICE(ZTMP3_DEVICE,ZTMP4_DEVICE) !$acc kernels + !$acc loop independent collapse(3) DO CONCURRENT ( JI=1:JIU,JJ=1:JJU,JK=1:JKU) ZTMP5_DEVICE(JI,JJ,JK) = ZTMP2_DEVICE(JI,JJ,JK)*ZTMP4_DEVICE(JI,JJ,JK) END DO !CONCURRENT !$acc end kernels CALL DZF_DEVICE(1,IKU,1,ZTMP5_DEVICE,ZTMP3_DEVICE) !$acc kernels + !$acc loop independent collapse(3) DO CONCURRENT ( JI=1:JIU,JJ=1:JJU,JK=1:JKU) PRUS(JI,JJ,JK) = PRUS(JI,JJ,JK) - ZTMP1_DEVICE(JI,JJ,JK) + ZTMP3_DEVICE(JI,JJ,JK) END DO !CONCURRENT @@ -570,12 +582,14 @@ END IF #else CALL MXM_DEVICE(PRHODJ,ZTMP1_DEVICE) !$acc kernels +!$acc loop independent collapse(3) DO CONCURRENT ( JI=1:JIU,JJ=1:JJU,JK=1:JKU) ZTMP2_DEVICE(JI,JJ,JK) = ZTMP1_DEVICE(JI,JJ,JK) * PINV_PDXX(JI,JJ,JK) END DO !CONCURRENT !$acc end kernels CALL MYM_DEVICE(ZTMP2_DEVICE,ZTMP1_DEVICE) !$acc kernels +!$acc loop independent collapse(3) DO CONCURRENT ( JI=1:JIU,JJ=1:JJU,JK=1:JKU) ZTMP2_DEVICE(JI,JJ,JK) = ZFLX(JI,JJ,JK) * ZTMP1_DEVICE(JI,JJ,JK) END DO !CONCURRENT @@ -585,30 +599,35 @@ IF (.NOT. LFLAT) THEN CALL MZM_DEVICE(ZFLX,ZTMP2_DEVICE) CALL MZM_DEVICE(PDXX,ZTMP3_DEVICE) !$acc kernels + !$acc loop independent collapse(3) DO CONCURRENT ( JI=1:JIU,JJ=1:JJU,JK=1:JKU) ZTMP4_DEVICE(JI,JJ,JK) = PDZX(JI,JJ,JK)/ZTMP3_DEVICE(JI,JJ,JK) END DO !CONCURRENT !$acc end kernels CALL MYM_DEVICE(ZTMP4_DEVICE,ZTMP5_DEVICE) !$acc kernels + !$acc loop independent collapse(3) DO CONCURRENT ( JI=1:JIU,JJ=1:JJU,JK=1:JKU) ZTMP4_DEVICE(JI,JJ,JK) = ZTMP2_DEVICE(JI,JJ,JK)*ZTMP5_DEVICE(JI,JJ,JK) END DO !CONCURRENT !$acc end kernels CALL MXF_DEVICE(ZTMP4_DEVICE,ZTMP2_DEVICE) !$acc kernels + !$acc loop independent collapse(3) DO CONCURRENT ( JI=1:JIU,JJ=1:JJU,JK=1:JKU) ZTMP3_DEVICE(JI,JJ,JK) = PMZM_PRHODJ(JI,JJ,JK) * PINV_PDZZ(JI,JJ,JK) END DO !CONCURRENT !$acc end kernels CALL MYM_DEVICE(ZTMP3_DEVICE,ZTMP4_DEVICE) !$acc kernels + !$acc loop independent collapse(3) DO CONCURRENT ( JI=1:JIU,JJ=1:JJU,JK=1:JKU) ZTMP5_DEVICE(JI,JJ,JK) = ZTMP2_DEVICE(JI,JJ,JK)*ZTMP4_DEVICE(JI,JJ,JK) END DO !CONCURRENT !$acc end kernels CALL DZF_DEVICE(1,IKU,1,ZTMP5_DEVICE,ZTMP3_DEVICE) !$acc kernels + !$acc loop independent collapse(3) DO CONCURRENT ( JI=1:JIU,JJ=1:JJU,JK=1:JKU) PRVS(JI,JJ,JK) = PRVS(JI,JJ,JK) - ZTMP1_DEVICE(JI,JJ,JK) + ZTMP3_DEVICE(JI,JJ,JK) END DO !CONCURRENT @@ -635,6 +654,7 @@ IF (KSPLT==1) THEN #else IF (.NOT. L2D) THEN !$acc kernels + !$acc loop independent collapse(3) DO CONCURRENT ( JI=1:JIU,JJ=1:JJU,JK=1:JKU) ZTMP1_DEVICE(JI,JJ,JK) = ZFLX(JI,JJ,JK) * (GY_U_UV_PUM(JI,JJ,JK) + GX_V_UV_PVM(JI,JJ,JK)) END DO !CONCURRENT @@ -647,6 +667,7 @@ IF (KSPLT==1) THEN CALL MYF_DEVICE(ZTMP1_DEVICE,ZTMP2_DEVICE) CALL MXF_DEVICE(ZTMP2_DEVICE,ZTMP1_DEVICE) !$acc kernels + !$acc loop independent collapse(3) DO CONCURRENT ( JI=1:JIU,JJ=1:JJU,JK=1:JKU) ZWORK(JI,JJ,JK) = - ZTMP1_DEVICE(JI,JJ,JK) END DO !CONCURRENT diff --git a/src/MNH/turb_hor_uw.f90 b/src/MNH/turb_hor_uw.f90 index ff27caa32..6b0501fdd 100644 --- a/src/MNH/turb_hor_uw.f90 +++ b/src/MNH/turb_hor_uw.f90 @@ -291,6 +291,7 @@ ZFLX(:,:,:) = & CALL MZM_DEVICE(PK,ZTMP1_DEVICE) CALL MXM_DEVICE(ZTMP1_DEVICE,ZTMP2_DEVICE) !$acc kernels +!$acc loop independent collapse(3) DO CONCURRENT ( JI=1:JIU,JJ=1:JJU,JK=1:JKU) ZFLX(JI,JJ,JK) = - XCMFS * ZTMP2_DEVICE(JI,JJ,JK) * GX_W_UW_PWM(JI,JJ,JK) END DO !CONCURRENT @@ -331,12 +332,14 @@ PRUS(:,:,:) = PRUS(:,:,:) - DZF( ZFLX* MXM( PMZM_PRHODJ ) / MXM( PDZZ ) ) CALL MXM_DEVICE( PMZM_PRHODJ, ZTMP1_DEVICE ) CALL MXM_DEVICE( PDZZ, ZTMP2_DEVICE ) !$acc kernels +!$acc loop independent collapse(3) DO CONCURRENT ( JI=1:JIU,JJ=1:JJU,JK=1:JKU) ZTMP3_DEVICE(JI,JJ,JK) = ZFLX(JI,JJ,JK)* ZTMP1_DEVICE(JI,JJ,JK) / ZTMP2_DEVICE(JI,JJ,JK) END DO !CONCURRENT !$acc end kernels CALL DZF_DEVICE(1,IKU,1, ZTMP3_DEVICE, ZTMP1_DEVICE ) !$acc kernels +!$acc loop independent collapse(3) DO CONCURRENT ( JI=1:JIU,JJ=1:JJU,JK=1:JKU) PRUS(JI,JJ,JK) = PRUS(JI,JJ,JK) - ZTMP1_DEVICE(JI,JJ,JK) END DO !CONCURRENT @@ -355,12 +358,14 @@ END IF #else CALL MXM_DEVICE(PRHODJ,ZTMP1_DEVICE) !$acc kernels + !$acc loop independent collapse(3) DO CONCURRENT ( JI=1:JIU,JJ=1:JJU,JK=1:JKU) ZTMP2_DEVICE(JI,JJ,JK) = ZTMP1_DEVICE(JI,JJ,JK) * PINV_PDXX(JI,JJ,JK) END DO !CONCURRENT !$acc end kernels CALL MZM_DEVICE(ZTMP2_DEVICE, ZTMP1_DEVICE) !$acc kernels + !$acc loop independent collapse(3) DO CONCURRENT ( JI=1:JIU,JJ=1:JJU,JK=1:JKU) ZTMP2_DEVICE(JI,JJ,JK) = ZTMP1_DEVICE(JI,JJ,JK) * ZFLX(JI,JJ,JK) END DO !CONCURRENT @@ -368,12 +373,14 @@ END IF CALL DXF_DEVICE( ZTMP2_DEVICE,ZTMP1_DEVICE) IF (.NOT. LFLAT) THEN !$acc kernels + !$acc loop independent collapse(3) DO CONCURRENT ( JI=1:JIU,JJ=1:JJU,JK=1:JKU) ZTMP2_DEVICE(JI,JJ,JK) = ZFLX(JI,JJ,JK)*PDZX(JI,JJ,JK) END DO !CONCURRENT !$acc end kernels CALL MZF_DEVICE(1,IKU,1, ZTMP2_DEVICE, ZTMP3_DEVICE ) !$acc kernels + !$acc loop independent collapse(3) DO CONCURRENT ( JI=1:JIU,JJ=1:JJU,JK=1:JKU) ZTMP2_DEVICE(JI,JJ,JK) = ZTMP3_DEVICE(JI,JJ,JK)*PINV_PDXX(JI,JJ,JK) END DO !CONCURRENT @@ -381,12 +388,14 @@ IF (.NOT. LFLAT) THEN CALL MXF_DEVICE( ZTMP2_DEVICE, ZTMP3_DEVICE ) CALL MZF_DEVICE(1,IKU,1,PDZZ, ZTMP2_DEVICE) !$acc kernels + !$acc loop independent collapse(3) DO CONCURRENT ( JI=1:JIU,JJ=1:JJU,JK=1:JKU) ZTMP4_DEVICE(JI,JJ,JK) = PRHODJ(JI,JJ,JK) * ZTMP3_DEVICE(JI,JJ,JK) / ZTMP2_DEVICE(JI,JJ,JK) END DO !CONCURRENT !$acc end kernels CALL DZM_DEVICE(1,IKU,1, ZTMP4_DEVICE, ZTMP2_DEVICE ) !$acc kernels + !$acc loop independent collapse(3) DO CONCURRENT ( JI=1:JIU,JJ=1:JJU,JK=1:JKU) PRWS(JI,JJ,JK) = PRWS(JI,JJ,JK) & - ZTMP1_DEVICE(JI,JJ,JK) & @@ -410,6 +419,7 @@ IF (KSPLT==1) THEN #else CALL GZ_U_UW_DEVICE(1,IKU,1,PUM,PDZZ,ZTMP1_DEVICE) !$acc kernels + !$acc loop independent collapse(3) DO CONCURRENT ( JI=1:JIU,JJ=1:JJU,JK=1:JKU) ZTMP2_DEVICE(JI,JJ,JK) = ZFLX(JI,JJ,JK) *( ZTMP1_DEVICE(JI,JJ,JK) + GX_W_UW_PWM(JI,JJ,JK) ) END DO !CONCURRENT @@ -417,6 +427,7 @@ IF (KSPLT==1) THEN CALL MXF_DEVICE( ZTMP2_DEVICE,ZTMP1_DEVICE ) CALL MZF_DEVICE(1,IKU,1, ZTMP1_DEVICE, ZTMP2_DEVICE ) !$acc kernels + !$acc loop independent collapse(3) DO CONCURRENT ( JI=1:JIU,JJ=1:JJU,JK=1:JKU) ZWORK(JI,JJ,JK) = -ZTMP2_DEVICE(JI,JJ,JK) END DO !CONCURRENT diff --git a/src/MNH/turb_hor_vw.f90 b/src/MNH/turb_hor_vw.f90 index 6ea12270b..53d816171 100644 --- a/src/MNH/turb_hor_vw.f90 +++ b/src/MNH/turb_hor_vw.f90 @@ -299,6 +299,7 @@ IF (.NOT. L2D) THEN CALL MZM_DEVICE(PK,ZTMP1_DEVICE) CALL MYM_DEVICE(ZTMP1_DEVICE,ZTMP2_DEVICE) !$acc kernels + !$acc loop independent collapse(3) DO CONCURRENT ( JI=1:JIU,JJ=1:JJU,JK=1:JKU) ZFLX(JI,JJ,JK) = - XCMFS * ZTMP2_DEVICE(JI,JJ,JK) * GY_W_VW_PWM(JI,JJ,JK) END DO !CONCURRENT @@ -350,12 +351,14 @@ IF (.NOT. L2D) THEN CALL MYM_DEVICE( PMZM_PRHODJ, ZTMP1_DEVICE ) CALL MYM_DEVICE( PDZZ, ZTMP2_DEVICE ) !$acc kernels + !$acc loop independent collapse(3) DO CONCURRENT ( JI=1:JIU,JJ=1:JJU,JK=1:JKU) ZTMP3_DEVICE(JI,JJ,JK) = ZFLX(JI,JJ,JK)* ZTMP1_DEVICE(JI,JJ,JK) / ZTMP2_DEVICE(JI,JJ,JK) END DO !CONCURRENT !$acc end kernels CALL DZF_DEVICE(1,IKU,1, ZTMP3_DEVICE, ZTMP1_DEVICE ) !$acc kernels + !$acc loop independent collapse(3) DO CONCURRENT ( JI=1:JIU,JJ=1:JJU,JK=1:JKU) PRVS(JI,JJ,JK) = PRVS(JI,JJ,JK) - ZTMP1_DEVICE(JI,JJ,JK) END DO !CONCURRENT @@ -379,24 +382,28 @@ IF (.NOT. L2D) THEN IF (.NOT. LFLAT) THEN CALL MYM_DEVICE(PRHODJ, ZTMP1_DEVICE) !$acc kernels + !$acc loop independent collapse(3) DO CONCURRENT ( JI=1:JIU,JJ=1:JJU,JK=1:JKU) ZTMP2_DEVICE(JI,JJ,JK) = ZTMP1_DEVICE(JI,JJ,JK) * PINV_PDYY(JI,JJ,JK) END DO !CONCURRENT !$acc end kernels CALL MZM_DEVICE(ZTMP2_DEVICE, ZTMP1_DEVICE) !$acc kernels + !$acc loop independent collapse(3) DO CONCURRENT ( JI=1:JIU,JJ=1:JJU,JK=1:JKU) ZTMP2_DEVICE(JI,JJ,JK) = ZTMP1_DEVICE(JI,JJ,JK) * ZFLX(JI,JJ,JK) END DO !CONCURRENT !$acc end kernels CALL DYF_DEVICE( ZTMP2_DEVICE, ZTMP1_DEVICE ) !$acc kernels + !$acc loop independent collapse(3) DO CONCURRENT ( JI=1:JIU,JJ=1:JJU,JK=1:JKU) ZTMP2_DEVICE(JI,JJ,JK) = ZFLX(JI,JJ,JK) *PDZY(JI,JJ,JK) END DO !CONCURRENT !$acc end kernels CALL MZF_DEVICE(1,IKU,1,ZTMP2_DEVICE,ZTMP3_DEVICE ) !$acc kernels + !$acc loop independent collapse(3) DO CONCURRENT ( JI=1:JIU,JJ=1:JJU,JK=1:JKU) ZTMP2_DEVICE(JI,JJ,JK) = ZTMP3_DEVICE(JI,JJ,JK) * PINV_PDYY(JI,JJ,JK) END DO !CONCURRENT @@ -404,12 +411,14 @@ IF (.NOT. L2D) THEN CALL MYF_DEVICE(ZTMP2_DEVICE,ZTMP3_DEVICE) CALL MZF_DEVICE(1,IKU,1,PDZZ,ZTMP2_DEVICE) !$acc kernels + !$acc loop independent collapse(3) DO CONCURRENT ( JI=1:JIU,JJ=1:JJU,JK=1:JKU) ZTMP4_DEVICE(JI,JJ,JK) = PRHODJ(JI,JJ,JK) * ZTMP3_DEVICE(JI,JJ,JK) / ZTMP2_DEVICE(JI,JJ,JK) END DO !CONCURRENT !$acc end kernels CALL DZM_DEVICE(1,IKU,1,ZTMP4_DEVICE,ZTMP2_DEVICE) !$acc kernels + !$acc loop independent collapse(3) DO CONCURRENT ( JI=1:JIU,JJ=1:JJU,JK=1:JKU) PRWS(JI,JJ,JK) = PRWS(JI,JJ,JK) & - ZTMP1_DEVICE(JI,JJ,JK) & @@ -443,6 +452,7 @@ IF (KSPLT==1) THEN #else CALL GZ_V_VW_DEVICE(1,IKU,1,PVM,PDZZ,ZTMP1_DEVICE) !$acc kernels + !$acc loop independent collapse(3) DO CONCURRENT ( JI=1:JIU,JJ=1:JJU,JK=1:JKU) ZTMP2_DEVICE(JI,JJ,JK) = ZFLX(JI,JJ,JK) *( ZTMP1_DEVICE(JI,JJ,JK) + GY_W_VW_PWM(JI,JJ,JK) ) END DO !CONCURRENT @@ -450,6 +460,7 @@ IF (KSPLT==1) THEN CALL MYF_DEVICE(ZTMP2_DEVICE,ZTMP1_DEVICE) CALL MZF_DEVICE(1,IKU,1,ZTMP1_DEVICE,ZTMP2_DEVICE) !$acc kernels + !$acc loop independent collapse(3) DO CONCURRENT ( JI=1:JIU,JJ=1:JJU,JK=1:JKU) ZWORK(JI,JJ,JK) = -ZTMP2_DEVICE(JI,JJ,JK) END DO !CONCURRENT diff --git a/src/MNH/turb_ver.f90 b/src/MNH/turb_ver.f90 index 673c4c304..7f2dc537c 100644 --- a/src/MNH/turb_ver.f90 +++ b/src/MNH/turb_ver.f90 @@ -666,6 +666,7 @@ ENDIF ! Denominator factor in 3rd order terms ! !$acc kernels +!$acc loop independent collapse(3) DO CONCURRENT ( JI=1:JIU,JJ=1:JJU,JK=1:JKU) ZD(JI,JJ,JK) = (1.+ZREDTH1(JI,JJ,JK)+ZREDR1(JI,JJ,JK)) * (1.+0.5*(ZREDTH1(JI,JJ,JK)+ZREDR1(JI,JJ,JK))) END DO diff --git a/src/MNH/turb_ver_dyn_flux.f90 b/src/MNH/turb_ver_dyn_flux.f90 index 834aa2d85..badf3088d 100644 --- a/src/MNH/turb_ver_dyn_flux.f90 +++ b/src/MNH/turb_ver_dyn_flux.f90 @@ -525,7 +525,10 @@ ZSOURCE(:,:,:) = 0. #ifndef MNH_BITREP ZDIRSINZW(:,:) = SQRT(1.-PDIRCOSZW(:,:)**2) #else -ZDIRSINZW(:,:) = SQRT(1.-BR_P2(PDIRCOSZW(:,:))) +!$acc loop independent collapse(2) +DO CONCURRENT ( JI=1:JIU,JJ=1:JJU ) + ZDIRSINZW(JI,JJ) = SQRT(1.-BR_P2(PDIRCOSZW(JI,JJ))) +END DO #endif ! compute the coefficients for the uncentred gradient computation near the ! ground @@ -570,6 +573,7 @@ CALL MXM_DEVICE( PDZZ, ZTMP4_DEVICE ) #ifndef MNH_BITREP ZA(:,:,:) = -PTSTEP * XCMFS * ZTMP1_DEVICE(:,:,:) * ZTMP3_DEVICE(:,:,:) / ZTMP4_DEVICE(:,:,:)**2 #else +!$acc loop independent collapse(3) DO CONCURRENT ( JI=1:JIU,JJ=1:JJU,JK=1:JKU) ZA(JI,JJ,JK) = -PTSTEP * XCMFS * ZTMP1_DEVICE(JI,JJ,JK) * ZTMP3_DEVICE(JI,JJ,JK) / BR_P2(ZTMP4_DEVICE(JI,JJ,JK)) END DO @@ -585,21 +589,31 @@ END DO !$acc kernels #ifndef MNH_BITREP ZCOEFFLXU(:,:,1) = PCDUEFF(:,:) * (PDIRCOSZW(:,:)**2 - ZDIRSINZW(:,:)**2) & + * PCOSSLOPE(:,:) #else -ZCOEFFLXU(:,:,1) = PCDUEFF(:,:) * (BR_P2(PDIRCOSZW(:,:)) - BR_P2(ZDIRSINZW(:,:))) & +!$acc loop independent collapse(2) +DO CONCURRENT ( JI=1:JIU,JJ=1:JJU ) + ZCOEFFLXU(JI,JJ,1) = PCDUEFF(JI,JJ) * (BR_P2(PDIRCOSZW(JI,JJ)) - BR_P2(ZDIRSINZW(JI,JJ))) & + * PCOSSLOPE(JI,JJ) +END DO #endif - * PCOSSLOPE(:,:) -ZCOEFFLXV(:,:,1) = PCDUEFF(:,:) * PDIRCOSZW(:,:) * PSINSLOPE(:,:) - -! prepare the implicit scheme coefficients for the surface flux -ZCOEFS(:,:,1)= ZCOEFFLXU(:,:,1) * PCOSSLOPE(:,:) * PDIRCOSZW(:,:) & - +ZCOEFFLXV(:,:,1) * PSINSLOPE(:,:) +!$acc loop independent collapse(2) +DO CONCURRENT ( JI=1:JIU,JJ=1:JJU ) + ZCOEFFLXV(JI,JJ,1) = PCDUEFF(JI,JJ) * PDIRCOSZW(JI,JJ) * PSINSLOPE(JI,JJ) + + ! prepare the implicit scheme coefficients for the surface flux + ZCOEFS(JI,JJ,1)= ZCOEFFLXU(JI,JJ,1) * PCOSSLOPE(JI,JJ) * PDIRCOSZW(JI,JJ) & + +ZCOEFFLXV(JI,JJ,1) * PSINSLOPE(JI,JJ) +END DO ! ! average this flux to be located at the U,W vorticity point #ifndef MNH_OPENACC ZCOEFS(:,:,1:1)=MXM(ZCOEFS(:,:,1:1) / PDZZ(:,:,IKB:IKB) ) #else -ZTMP1_DEVICE(:,:,1) = ZCOEFS(:,:,1) / PDZZ(:,:,IKB) +!$acc loop independent collapse(2) +DO CONCURRENT ( JI=1:JIU,JJ=1:JJU ) + ZTMP1_DEVICE(JI,JJ,1) = ZCOEFS(JI,JJ,1) / PDZZ(JI,JJ,IKB) +END DO !$acc end kernels CALL MXM_DEVICE(ZTMP1_DEVICE(:,:,1:1),ZCOEFS(:,:,1:1)) #endif @@ -673,6 +687,7 @@ ZFLXZ(:,:,:) = -XCMFS * MXM(ZKEFF) * & DZM (PIMPL*ZRES + PEXPL*PUM) / MXM(PDZZ) #else !$acc kernels +!$acc loop independent collapse(3) DO CONCURRENT ( JI=1:JIU,JJ=1:JJU,JK=1:JKU) ZTMP2_DEVICE(JI,JJ,JK) = PIMPL*ZRES(JI,JJ,JK) + PEXPL*PUM(JI,JJ,JK) END DO @@ -681,6 +696,7 @@ CALL MXM_DEVICE(ZKEFF,ZTMP1_DEVICE) CALL DZM_DEVICE(KKA,KKU,KKL,ZTMP2_DEVICE,ZTMP3_DEVICE) CALL MXM_DEVICE(PDZZ,ZTMP4_DEVICE) !$acc kernels +!$acc loop independent collapse(3) DO CONCURRENT ( JI=1:JIU,JJ=1:JJU,JK=1:JKU) ZFLXZ(JI,JJ,JK) = -XCMFS * ZTMP1_DEVICE(JI,JJ,JK) * ZTMP3_DEVICE(JI,JJ,JK) / ZTMP4_DEVICE(JI,JJ,JK) END DO @@ -738,6 +754,7 @@ PDP(:,:,:) = - MZF( MXF ( ZFLXZ * GZ_U_UW(PUM,PDZZ) ) ) #else CALL GZ_U_UW_DEVICE(KKA,KKU,KKL,PUM,PDZZ,ZTMP1_DEVICE) !$acc kernels +!$acc loop independent collapse(3) DO CONCURRENT ( JI=1:JIU,JJ=1:JJU,JK=1:JKU ) ZTMP2_DEVICE(JI,JJ,JK) = ZFLXZ(JI,JJ,JK) * ZTMP1_DEVICE(JI,JJ,JK) END DO @@ -824,13 +841,15 @@ IF(HTURBDIM=='3DIM') THEN END IF #else CALL MXM_DEVICE(PRHODJ,ZTMP1_DEVICE) -!$acc kernels + !$acc kernels + !$acc loop independent collapse(3) DO CONCURRENT ( JI=1:JIU,JJ=1:JJU,JK=1:JKU) ZTMP2_DEVICE(JI,JJ,JK) = ZTMP1_DEVICE(JI,JJ,JK) /PDXX(JI,JJ,JK) END DO !$acc end kernels CALL MZM_DEVICE(ZTMP2_DEVICE,ZTMP3_DEVICE ) -!$acc kernels + !$acc kernels + !$acc loop independent collapse(3) DO CONCURRENT ( JI=1:JIU,JJ=1:JJU,JK=1:JKU) ZTMP2_DEVICE(JI,JJ,JK) = ZTMP3_DEVICE(JI,JJ,JK) * ZFLXZ(JI,JJ,JK) END DO @@ -839,18 +858,21 @@ IF(HTURBDIM=='3DIM') THEN IF (.NOT. LFLAT) THEN CALL MZF_DEVICE(KKA,KKU,KKL,PDZZ,ZTMP2_DEVICE ) !$acc kernels + !$acc loop independent collapse(3) DO CONCURRENT ( JI=1:JIU,JJ=1:JJU,JK=1:JKU) ZTMP3_DEVICE(JI,JJ,JK) = ZFLXZ(JI,JJ,JK)*PDZX(JI,JJ,JK) END DO !$acc end kernels CALL MZF_DEVICE(KKA,KKU,KKL, ZTMP3_DEVICE,ZTMP4_DEVICE ) !$acc kernels + !$acc loop independent collapse(3) DO CONCURRENT ( JI=1:JIU,JJ=1:JJU,JK=1:JKU) ZTMP3_DEVICE(JI,JJ,JK) = ZTMP4_DEVICE(JI,JJ,JK) / PDXX(JI,JJ,JK) END DO !$acc end kernels CALL MXF_DEVICE( ZTMP3_DEVICE,ZTMP4_DEVICE ) !$acc kernels + !$acc loop independent collapse(3) DO CONCURRENT ( JI=1:JIU,JJ=1:JJU,JK=1:JKU) ZTMP3_DEVICE(JI,JJ,JK) = PRHODJ(JI,JJ,JK) / ZTMP2_DEVICE(JI,JJ,JK) * ZTMP4_DEVICE(JI,JJ,JK) END DO @@ -872,14 +894,16 @@ IF(HTURBDIM=='3DIM') THEN ZA(:,:,:)=-MZF( MXF ( ZFLXZ * GX_W_UW( PWM,PDXX,PDZZ,PDZX) ) ) #else CALL GX_W_UW_DEVICE(KKA,KKU,KKL, PWM,PDXX,PDZZ,PDZX,ZTMP1_DEVICE) -!$acc kernels + !$acc kernels + !$acc loop independent collapse(3) DO CONCURRENT ( JI=1:JIU,JJ=1:JJU,JK=1:JKU) ZTMP2_DEVICE(JI,JJ,JK) = ZFLXZ(JI,JJ,JK) * ZTMP1_DEVICE(JI,JJ,JK) END DO !$acc end kernels CALL MXF_DEVICE(ZTMP2_DEVICE,ZTMP1_DEVICE) CALL MZF_DEVICE(KKA,KKU,KKL, ZTMP1_DEVICE,ZTMP2_DEVICE ) -!$acc kernels + !$acc kernels + !$acc loop independent collapse(3) DO CONCURRENT ( JI=1:JIU,JJ=1:JJU,JK=1:JKU) ZA(JI,JJ,JK)=-ZTMP2_DEVICE(JI,JJ,JK) END DO @@ -1013,6 +1037,7 @@ CALL MYM_DEVICE(ZTMP3_DEVICE,ZTMP4_DEVICE) #ifndef MNH_BITREP ZA(:,:,:) = - PTSTEP * XCMFS * ZTMP1_DEVICE(:,:,:) * ZTMP4_DEVICE(:,:,:) / ZTMP2_DEVICE(:,:,:)**2 #else +!$acc loop independent collapse(3) DO CONCURRENT ( JI=1:JIU,JJ=1:JJU,JK=1:JKU) ZA(JI,JJ,JK) = - PTSTEP * XCMFS * ZTMP1_DEVICE(JI,JJ,JK) * ZTMP4_DEVICE(JI,JJ,JK) / BR_P2(ZTMP2_DEVICE(JI,JJ,JK)) END DO @@ -1026,21 +1051,31 @@ END DO ! wind following the slope #ifndef MNH_BITREP ZCOEFFLXU(:,:,1) = PCDUEFF(:,:) * (PDIRCOSZW(:,:)**2 - ZDIRSINZW(:,:)**2) & + * PSINSLOPE(:,:) #else -ZCOEFFLXU(:,:,1) = PCDUEFF(:,:) * (BR_P2(PDIRCOSZW(:,:)) - BR_P2(ZDIRSINZW(:,:))) & +!$acc loop independent collapse(2) +DO CONCURRENT ( JI=1:JIU,JJ=1:JJU ) +ZCOEFFLXU(JI,JJ,1) = PCDUEFF(JI,JJ) * (BR_P2(PDIRCOSZW(JI,JJ)) - BR_P2(ZDIRSINZW(JI,JJ))) & + * PSINSLOPE(JI,JJ) +END DO #endif - * PSINSLOPE(:,:) ZCOEFFLXV(:,:,1) = PCDUEFF(:,:) * PDIRCOSZW(:,:) * PCOSSLOPE(:,:) ! prepare the implicit scheme coefficients for the surface flux -ZCOEFS(:,:,1)= ZCOEFFLXU(:,:,1) * PSINSLOPE(:,:) * PDIRCOSZW(:,:) & - +ZCOEFFLXV(:,:,1) * PCOSSLOPE(:,:) +!$acc loop independent collapse(2) +DO CONCURRENT ( JI=1:JIU,JJ=1:JJU ) +ZCOEFS(JI,JJ,1)= ZCOEFFLXU(JI,JJ,1) * PSINSLOPE(JI,JJ) * PDIRCOSZW(JI,JJ) & + +ZCOEFFLXV(JI,JJ,1) * PCOSSLOPE(JI,JJ) +END DO ! ! average this flux to be located at the V,W vorticity point #ifndef MNH_OPENACC ZCOEFS(:,:,1:1)=MYM(ZCOEFS(:,:,1:1) / PDZZ(:,:,IKB:IKB) ) #else -ZTMP1_DEVICE(:,:,1:1) = ZCOEFS(:,:,1:1) / PDZZ(:,:,IKB:IKB) +!$acc loop independent collapse(2) +DO CONCURRENT ( JI=1:JIU,JJ=1:JJU ) + ZTMP1_DEVICE(JI,JJ,1) = ZCOEFS(JI,JJ,1) / PDZZ(JI,JJ,IKB) +END DO !$acc end kernels CALL MYM_DEVICE(ZTMP1_DEVICE(:,:,1:1),ZCOEFS(:,:,1:1) ) #endif @@ -1119,14 +1154,16 @@ ZFLXZ(:,:,IKB:IKB) = MYM(PDZZ(:,:,IKB:IKB)) * & ) / 0.5 / ( 1. + MYM(PRHODJ(:,:,KKA:KKA)) / MYM(PRHODJ(:,:,IKB:IKB)) ) #else !$acc kernels +!$acc loop independent collapse(3) DO CONCURRENT ( JI=1:JIU,JJ=1:JJU,JK=1:JKU) -ZTMP1_DEVICE(JI,JJ,JK) = PIMPL*ZRES(JI,JJ,JK) + PEXPL*PVM(JI,JJ,JK) + ZTMP1_DEVICE(JI,JJ,JK) = PIMPL*ZRES(JI,JJ,JK) + PEXPL*PVM(JI,JJ,JK) END DO !$acc end kernels CALL DZM_DEVICE(KKA,KKU,KKL,ZTMP1_DEVICE,ZTMP2_DEVICE) CALL MYM_DEVICE(PDZZ,ZTMP3_DEVICE) CALL MYM_DEVICE(ZKEFF,ZTMP1_DEVICE) !$acc kernels +!$acc loop independent collapse(3) DO CONCURRENT ( JI=1:JIU,JJ=1:JJU,JK=1:JKU) ZFLXZ(JI,JJ,JK) = -XCMFS * ZTMP1_DEVICE(JI,JJ,JK) * ZTMP2_DEVICE(JI,JJ,JK) / ZTMP3_DEVICE(JI,JJ,JK) END DO @@ -1175,6 +1212,7 @@ ZA(:,:,:) = - MZF( MYF ( ZFLXZ * GZ_V_VW(PVM,PDZZ) ) ) #else CALL GZ_V_VW_DEVICE(KKA,KKU,KKL,PVM,PDZZ,ZTMP1_DEVICE) !$acc kernels +!$acc loop independent collapse(3) DO CONCURRENT ( JI=1:JIU,JJ=1:JJU,JK=1:JKU) ZTMP2_DEVICE(JI,JJ,JK) = ZFLXZ(JI,JJ,JK) * ZTMP1_DEVICE(JI,JJ,JK) END DO @@ -1182,6 +1220,7 @@ END DO CALL MYF_DEVICE( ZTMP2_DEVICE, ZTMP3_DEVICE ) CALL MZF_DEVICE(KKA,KKU,KKL, ZTMP3_DEVICE, ZTMP1_DEVICE ) !$acc kernels +!$acc loop independent collapse(3) DO CONCURRENT ( JI=1:JIU,JJ=1:JJU,JK=1:JKU) ZA(JI,JJ,JK) = - ZTMP1_DEVICE(JI,JJ,JK) END DO @@ -1261,33 +1300,38 @@ IF(HTURBDIM=='3DIM') THEN #else IF (.NOT. L2D) THEN CALL MYM_DEVICE(PRHODJ,ZTMP1_DEVICE) -!$acc kernels + !$acc kernels + !$acc loop independent collapse(3) DO CONCURRENT ( JI=1:JIU,JJ=1:JJU,JK=1:JKU) ZTMP2_DEVICE(JI,JJ,JK) = ZTMP1_DEVICE(JI,JJ,JK) /PDYY(JI,JJ,JK) END DO -!$acc end kernels + !$acc end kernels CALL MZM_DEVICE(ZTMP2_DEVICE,ZTMP1_DEVICE) -!$acc kernels + !$acc kernels + !$acc loop independent collapse(3) DO CONCURRENT ( JI=1:JIU,JJ=1:JJU,JK=1:JKU) ZTMP2_DEVICE(JI,JJ,JK) = ZTMP1_DEVICE(JI,JJ,JK) * ZFLXZ(JI,JJ,JK) END DO -!$acc end kernels + !$acc end kernels CALL DYF_DEVICE( ZTMP2_DEVICE,ZTMP1_DEVICE ) IF (.NOT. LFLAT) THEN CALL MZF_DEVICE(KKA,KKU,KKL,PDZZ,ZTMP2_DEVICE ) !$acc kernels + !$acc loop independent collapse(3) DO CONCURRENT ( JI=1:JIU,JJ=1:JJU,JK=1:JKU) ZTMP3_DEVICE(JI,JJ,JK) = ZFLXZ(JI,JJ,JK)*PDZY(JI,JJ,JK) END DO !$acc end kernels CALL MZF_DEVICE(KKA,KKU,KKL,ZTMP3_DEVICE,ZTMP4_DEVICE) !$acc kernels + !$acc loop independent collapse(3) DO CONCURRENT ( JI=1:JIU,JJ=1:JJU,JK=1:JKU) ZTMP3_DEVICE(JI,JJ,JK) = ZTMP4_DEVICE(JI,JJ,JK) / PDYY(JI,JJ,JK) END DO !$acc end kernels CALL MYF_DEVICE(ZTMP3_DEVICE,ZTMP4_DEVICE) !$acc kernels + !$acc loop independent collapse(3) DO CONCURRENT ( JI=1:JIU,JJ=1:JJU,JK=1:JKU) ZTMP3_DEVICE(JI,JJ,JK) = PRHODJ(JI,JJ,JK) / ZTMP2_DEVICE(JI,JJ,JK) * ZTMP4_DEVICE(JI,JJ,JK) END DO @@ -1310,18 +1354,20 @@ IF(HTURBDIM=='3DIM') THEN ZA(:,:,:) = - MZF( MYF ( ZFLXZ(:,:,:) * GY_W_VW( PWM,PDYY,PDZZ,PDZY) ) ) #else CALL GY_W_VW_DEVICE(KKA,KKU,KKL, PWM,PDYY,PDZZ,PDZY,ZTMP1_DEVICE) -!$acc kernels + !$acc kernels + !$acc loop independent collapse(3) DO CONCURRENT ( JI=1:JIU,JJ=1:JJU,JK=1:JKU) ZTMP2_DEVICE(JI,JJ,JK) = ZFLXZ(JI,JJ,JK) * ZTMP1_DEVICE(JI,JJ,JK) END DO -!$acc end kernels + !$acc end kernels CALL MYF_DEVICE(ZTMP2_DEVICE,ZTMP1_DEVICE) CALL MZF_DEVICE(KKA,KKU,KKL,ZTMP1_DEVICE,ZTMP2_DEVICE ) -!$acc kernels + !$acc kernels + !$acc loop independent collapse(3) DO CONCURRENT ( JI=1:JIU,JJ=1:JJU,JK=1:JKU) ZA(JI,JJ,JK) = - ZTMP2_DEVICE(JI,JJ,JK) END DO -!$acc end kernels + !$acc end kernels #endif ! ! evaluate the dynamic production at w(IKB+KKL) in PDP(IKB) diff --git a/src/MNH/turb_ver_thermo_corr.f90 b/src/MNH/turb_ver_thermo_corr.f90 index c045f7a7d..9227a238d 100644 --- a/src/MNH/turb_ver_thermo_corr.f90 +++ b/src/MNH/turb_ver_thermo_corr.f90 @@ -572,11 +572,15 @@ END IF #ifndef MNH_BITREP ZTMP1_DEVICE(:,:,:) = PPHI3(:,:,:)*PDTH_DZ(:,:,:)**2 #else - ZTMP1_DEVICE(:,:,:) = PPHI3(:,:,:)*BR_P2(PDTH_DZ(:,:,:)) + !$acc loop independent collapse(3) + DO CONCURRENT ( JI=1:JIU,JJ=1:JJU,JK=1:JKU) + ZTMP1_DEVICE(JI,JJ,JK) = PPHI3(JI,JJ,JK)*BR_P2(PDTH_DZ(JI,JJ,JK)) + END DO #endif !$acc end kernels CALL MZF_DEVICE(KKA,KKU,KKL,ZTMP1_DEVICE(:,:,:),ZTMP2_DEVICE(:,:,:)) !$acc kernels + !$acc loop independent collapse(3) DO CONCURRENT ( JI=1:JIU,JJ=1:JJU,JK=1:JKU) ZF (JI,JJ,JK) = XCTV*PLM(JI,JJ,JK)*PLEPS(JI,JJ,JK)*ZTMP2_DEVICE(JI,JJ,JK) END DO @@ -718,12 +722,14 @@ END IF !$acc end kernels CALL DZM_DEVICE(KKA,KKU,KKL,ZTMP1_DEVICE(:,:,:),ZTMP2_DEVICE(:,:,:)) !$acc kernels + !$acc loop independent collapse(3) DO CONCURRENT ( JI=1:JIU,JJ=1:JJU,JK=1:JKU) ZTMP3_DEVICE(JI,JJ,JK) = ZTMP2_DEVICE(JI,JJ,JK) / PDZZ(JI,JJ,JK) END DO !$acc end kernels CALL MZF_DEVICE(KKA,KKU,KKL,ZTMP3_DEVICE(:,:,:),ZTMP4_DEVICE(:,:,:) ) !$acc kernels + !$acc loop independent collapse(3) DO CONCURRENT ( JI=1:JIU,JJ=1:JJU,JK=1:JKU) ZFLXZ(JI,JJ,JK) = ZF(JI,JJ,JK) & + PIMPL * ZDFDDTDZ(JI,JJ,JK) * ZTMP4_DEVICE(JI,JJ,JK) @@ -744,17 +750,20 @@ END IF +ZCOEFF(:,:,IKB )*PTHLP(:,:,IKB ) )**2 & ) #else - ZFLXZ(:,:,IKB) = XCTV * PPHI3(:,:,IKB+KKL) * PLM(:,:,IKB) & - * PLEPS(:,:,IKB) & + !$acc loop independent collapse(2) + DO CONCURRENT ( JI=1:JIU,JJ=1:JJU ) + ZFLXZ(JI,JJ,IKB) = XCTV * PPHI3(JI,JJ,IKB+KKL) * PLM(JI,JJ,IKB) & + * PLEPS(JI,JJ,IKB) & *( PEXPL * & - BR_P2( ZCOEFF(:,:,IKB+2*KKL)*PTHLM(:,:,IKB+2*KKL) & - +ZCOEFF(:,:,IKB+KKL )*PTHLM(:,:,IKB+KKL ) & - +ZCOEFF(:,:,IKB )*PTHLM(:,:,IKB ) ) & + BR_P2( ZCOEFF(JI,JJ,IKB+2*KKL)*PTHLM(JI,JJ,IKB+2*KKL) & + +ZCOEFF(JI,JJ,IKB+KKL )*PTHLM(JI,JJ,IKB+KKL ) & + +ZCOEFF(JI,JJ,IKB )*PTHLM(JI,JJ,IKB ) ) & +PIMPL * & - BR_P2( ZCOEFF(:,:,IKB+2*KKL)*PTHLP(:,:,IKB+2*KKL) & - +ZCOEFF(:,:,IKB+KKL )*PTHLP(:,:,IKB+KKL ) & - +ZCOEFF(:,:,IKB )*PTHLP(:,:,IKB ) ) & - ) + BR_P2( ZCOEFF(JI,JJ,IKB+2*KKL)*PTHLP(JI,JJ,IKB+2*KKL) & + +ZCOEFF(JI,JJ,IKB+KKL )*PTHLP(JI,JJ,IKB+KKL ) & + +ZCOEFF(JI,JJ,IKB )*PTHLP(JI,JJ,IKB ) ) & + ) + END DO #endif ! ZFLXZ(:,:,KKA) = ZFLXZ(:,:,IKB) @@ -843,6 +852,7 @@ END IF !$acc end kernels CALL MZF_DEVICE(KKA,KKU,KKL,ZTMP1_DEVICE(:,:,:),ZTMP2_DEVICE(:,:,:)) !$acc kernels +!$acc loop independent collapse(3) DO CONCURRENT ( JI=1:JIU,JJ=1:JJU,JK=1:JKU) ZF (JI,JJ,JK) = XCTV*PLM(JI,JJ,JK)*PLEPS(JI,JJ,JK)*ZTMP2_DEVICE(JI,JJ,JK) END DO @@ -1031,6 +1041,7 @@ END DO CALL DZM_DEVICE(KKA,KKU,KKL,ZTMP1_DEVICE(:,:,:),ZTMP3_DEVICE(:,:,:)) CALL DZM_DEVICE(KKA,KKU,KKL,ZTMP2_DEVICE(:,:,:),ZTMP4_DEVICE(:,:,:)) !$acc kernels + !$acc loop independent collapse(3) DO CONCURRENT ( JI=1:JIU,JJ=1:JJU,JK=1:JKU) ZTMP1_DEVICE(JI,JJ,JK) = ZTMP3_DEVICE(JI,JJ,JK) / PDZZ(JI,JJ,JK) ZTMP2_DEVICE(JI,JJ,JK) = ZTMP4_DEVICE(JI,JJ,JK) / PDZZ(JI,JJ,JK) @@ -1053,6 +1064,7 @@ END DO CALL DZM_DEVICE(KKA,KKU,KKL,ZTMP8_DEVICE(:,:,:),ZTMP1_DEVICE(:,:,:)) !!! !$acc kernels + !$acc loop independent collapse(3) DO CONCURRENT ( JI=1:JIU,JJ=1:JJU,JK=1:JKU) ZTMP7_DEVICE(JI,JJ,JK) = ( ZTMP3_DEVICE(JI,JJ,JK) + ZTMP4_DEVICE(JI,JJ,JK)) * PDR_DZ(JI,JJ,JK) & * ZTMP2_DEVICE(JI,JJ,JK) / PDZZ(JI,JJ,JK) & @@ -1068,6 +1080,7 @@ END DO CALL DZM_DEVICE(KKA,KKU,KKL,ZTMP1_DEVICE(:,:,:),ZTMP3_DEVICE(:,:,:)) CALL DZM_DEVICE(KKA,KKU,KKL,ZTMP2_DEVICE(:,:,:),ZTMP4_DEVICE(:,:,:)) !$acc kernels + !$acc loop independent collapse(3) DO CONCURRENT ( JI=1:JIU,JJ=1:JJU,JK=1:JKU) ZTMP1_DEVICE(JI,JJ,JK) = ZTMP3_DEVICE(JI,JJ,JK) / PDZZ(JI,JJ,JK) ZTMP2_DEVICE(JI,JJ,JK) = ZTMP4_DEVICE(JI,JJ,JK) /PDZZ(JI,JJ,JK) @@ -1078,6 +1091,7 @@ END DO CALL MZF_DEVICE(KKA,KKU,KKL,ZTMP1_DEVICE(:,:,:), ZTMP4_DEVICE(:,:,:) ) CALL MZF_DEVICE(KKA,KKU,KKL,ZTMP2_DEVICE(:,:,:), ZTMP5_DEVICE(:,:,:) ) !$acc kernels + !$acc loop independent collapse(3) DO CONCURRENT ( JI=1:JIU,JJ=1:JJU,JK=1:JKU) ZFLXZ(JI,JJ,JK) = ZF(JI,JJ,JK) & + PIMPL * XCTV*PLM(JI,JJ,JK)*PLEPS(JI,JJ,JK)*0.5 * ZTMP3_DEVICE(JI,JJ,JK) & @@ -1087,23 +1101,26 @@ END DO #endif ! ! special case near the ground ( uncentred gradient ) - ZFLXZ(:,:,IKB) = & - (XCHT1 * PPHI3(:,:,IKB+KKL) + XCHT2 * PPSI3(:,:,IKB+KKL)) & + !$acc loop independent collapse(2) + DO CONCURRENT ( JI=1:JIU,JJ=1:JJU ) + ZFLXZ(JI,JJ,IKB) = & + (XCHT1 * PPHI3(JI,JJ,IKB+KKL) + XCHT2 * PPSI3(JI,JJ,IKB+KKL)) & *( PEXPL * & - ( ZCOEFF(:,:,IKB+2*KKL)*PTHLM(:,:,IKB+2*KKL) & - +ZCOEFF(:,:,IKB+KKL )*PTHLM(:,:,IKB+KKL ) & - +ZCOEFF(:,:,IKB )*PTHLM(:,:,IKB )) & - *( ZCOEFF(:,:,IKB+2*KKL)*PRM(:,:,IKB+2*KKL,1) & - +ZCOEFF(:,:,IKB+KKL )*PRM(:,:,IKB+KKL,1 ) & - +ZCOEFF(:,:,IKB )*PRM(:,:,IKB ,1 )) & + ( ZCOEFF(JI,JJ,IKB+2*KKL)*PTHLM(JI,JJ,IKB+2*KKL) & + +ZCOEFF(JI,JJ,IKB+KKL )*PTHLM(JI,JJ,IKB+KKL ) & + +ZCOEFF(JI,JJ,IKB )*PTHLM(JI,JJ,IKB )) & + *( ZCOEFF(JI,JJ,IKB+2*KKL)*PRM(JI,JJ,IKB+2*KKL,1) & + +ZCOEFF(JI,JJ,IKB+KKL )*PRM(JI,JJ,IKB+KKL,1 ) & + +ZCOEFF(JI,JJ,IKB )*PRM(JI,JJ,IKB ,1 )) & +PIMPL * & - ( ZCOEFF(:,:,IKB+2*KKL)*PTHLP(:,:,IKB+2*KKL) & - +ZCOEFF(:,:,IKB+KKL )*PTHLP(:,:,IKB+KKL ) & - +ZCOEFF(:,:,IKB )*PTHLP(:,:,IKB )) & - *( ZCOEFF(:,:,IKB+2*KKL)*PRP(:,:,IKB+2*KKL ) & - +ZCOEFF(:,:,IKB+KKL )*PRP(:,:,IKB+KKL ) & - +ZCOEFF(:,:,IKB )*PRP(:,:,IKB )) & - ) + ( ZCOEFF(JI,JJ,IKB+2*KKL)*PTHLP(JI,JJ,IKB+2*KKL) & + +ZCOEFF(JI,JJ,IKB+KKL )*PTHLP(JI,JJ,IKB+KKL ) & + +ZCOEFF(JI,JJ,IKB )*PTHLP(JI,JJ,IKB )) & + *( ZCOEFF(JI,JJ,IKB+2*KKL)*PRP(JI,JJ,IKB+2*KKL ) & + +ZCOEFF(JI,JJ,IKB+KKL )*PRP(JI,JJ,IKB+KKL ) & + +ZCOEFF(JI,JJ,IKB )*PRP(JI,JJ,IKB )) & + ) + END DO ! ZFLXZ(:,:,KKA) = ZFLXZ(:,:,IKB) ! @@ -1199,11 +1216,15 @@ END DO #ifndef MNH_BITREP ZTMP1_DEVICE(:,:,:) = PPSI3(:,:,:)*PDR_DZ(:,:,:)**2 #else - ZTMP1_DEVICE(:,:,:) = PPSI3(:,:,:)*BR_P2(PDR_DZ(:,:,:)) + !$acc loop independent collapse(3) + DO CONCURRENT ( JI=1:JIU,JJ=1:JJU,JK=1:JKU) + ZTMP1_DEVICE(JI,JJ,JK) = PPSI3(JI,JJ,JK)*BR_P2(PDR_DZ(JI,JJ,JK)) + END DO #endif !$acc end kernels CALL MZF_DEVICE(KKA,KKU,KKL,ZTMP1_DEVICE(:,:,:),ZTMP2_DEVICE(:,:,:)) !$acc kernels + !$acc loop independent collapse(3) DO CONCURRENT ( JI=1:JIU,JJ=1:JJU,JK=1:JKU) ZF (JI,JJ,JK) = XCTV*PLM(JI,JJ,JK)*PLEPS(JI,JJ,JK)*ZTMP2_DEVICE(JI,JJ,JK) END DO @@ -1349,18 +1370,21 @@ END DO !$acc end kernels CALL DZM_DEVICE(KKA,KKU,KKL,ZTMP2_DEVICE(:,:,:),ZTMP3_DEVICE(:,:,:)) !$acc kernels + !$acc loop independent collapse(3) DO CONCURRENT ( JI=1:JIU,JJ=1:JJU,JK=1:JKU) ZTMP2_DEVICE(JI,JJ,JK) = ZTMP1_DEVICE(JI,JJ,JK) * ZTMP3_DEVICE(JI,JJ,JK) / PDZZ(JI,JJ,JK) END DO !$acc end kernels CALL MZF_DEVICE(KKA,KKU,KKL,ZTMP2_DEVICE(:,:,:),ZTMP1_DEVICE(:,:,:)) !$acc kernels + !$acc loop independent collapse(3) DO CONCURRENT ( JI=1:JIU,JJ=1:JJU,JK=1:JKU) ZTMP2_DEVICE(JI,JJ,JK) = ZTMP3_DEVICE(JI,JJ,JK) / PDZZ(JI,JJ,JK) END DO !$acc end kernels CALL MZF_DEVICE(KKA,KKU,KKL,ZTMP2_DEVICE(:,:,:),ZTMP3_DEVICE(:,:,:)) !$acc kernels + !$acc loop independent collapse(3) DO CONCURRENT ( JI=1:JIU,JJ=1:JJU,JK=1:JKU) ZFLXZ(JI,JJ,JK) = ZF(JI,JJ,JK) & + PIMPL * XCTV*PLM(JI,JJ,JK)*PLEPS(JI,JJ,JK) * ZTMP1_DEVICE(JI,JJ,JK) & @@ -1382,17 +1406,20 @@ END DO +ZCOEFF(:,:,IKB )*PRP(:,:,IKB ))**2 & ) #else - ZFLXZ(:,:,IKB) = XCHV * PPSI3(:,:,IKB+KKL) * PLM(:,:,IKB) & - * PLEPS(:,:,IKB) & + !$acc loop independent collapse(2) + DO CONCURRENT ( JI=1:JIU,JJ=1:JJU ) + ZFLXZ(JI,JJ,IKB) = XCHV * PPSI3(JI,JJ,IKB+KKL) * PLM(JI,JJ,IKB) & + * PLEPS(JI,JJ,IKB) & *( PEXPL * & - BR_P2( ZCOEFF(:,:,IKB+2*KKL)*PRM(:,:,IKB+2*KKL,1) & - +ZCOEFF(:,:,IKB+KKL )*PRM(:,:,IKB+KKL,1 ) & - +ZCOEFF(:,:,IKB )*PRM(:,:,IKB ,1 )) & + BR_P2( ZCOEFF(JI,JJ,IKB+2*KKL)*PRM(JI,JJ,IKB+2*KKL,1) & + +ZCOEFF(JI,JJ,IKB+KKL )*PRM(JI,JJ,IKB+KKL,1 ) & + +ZCOEFF(JI,JJ,IKB )*PRM(JI,JJ,IKB ,1 )) & +PIMPL * & - BR_P2( ZCOEFF(:,:,IKB+2*KKL)*PRP(:,:,IKB+2*KKL) & - +ZCOEFF(:,:,IKB+KKL )*PRP(:,:,IKB+KKL ) & - +ZCOEFF(:,:,IKB )*PRP(:,:,IKB )) & - ) + BR_P2( ZCOEFF(JI,JJ,IKB+2*KKL)*PRP(JI,JJ,IKB+2*KKL) & + +ZCOEFF(JI,JJ,IKB+KKL )*PRP(JI,JJ,IKB+KKL ) & + +ZCOEFF(JI,JJ,IKB )*PRP(JI,JJ,IKB )) & + ) + END DO #endif ! ZFLXZ(:,:,KKA) = ZFLXZ(:,:,IKB) diff --git a/src/MNH/turb_ver_thermo_flux.f90 b/src/MNH/turb_ver_thermo_flux.f90 index a7f75d118..9d7775365 100644 --- a/src/MNH/turb_ver_thermo_flux.f90 +++ b/src/MNH/turb_ver_thermo_flux.f90 @@ -634,6 +634,7 @@ ZDFDDTDZ(:,:,:) = -XCSHF*ZKEFF(:,:,:)*D_PHI3DTDZ_O_DDTDZ(PPHI3,PREDTH1,PREDR1,PR #else CALL DZM_DEVICE(KKA,KKU,KKL,PTHLM,ZTMP1_DEVICE) !$acc kernels +!$acc loop independent collapse(3) DO CONCURRENT ( JI=1:JIU,JJ=1:JJU,JK=1:JKU) ZF (JI,JJ,JK) = -XCSHF*PPHI3(JI,JJ,JK)*ZKEFF(JI,JJ,JK)*ZTMP1_DEVICE(JI,JJ,JK)/PDZZ(JI,JJ,JK) END DO @@ -641,6 +642,7 @@ END DO ! CALL D_PHI3DTDZ_O_DDTDZ(PPHI3,PREDTH1,PREDR1,PRED2TH3,PRED2THR3,HTURBDIM,GUSERV,ZTMP2_DEVICE) !$acc kernels +!$acc loop independent collapse(3) DO CONCURRENT ( JI=1:JIU,JJ=1:JJU,JK=1:JKU) ZDFDDTDZ(JI,JJ,JK) = -XCSHF*ZKEFF(JI,JJ,JK)*ZTMP2_DEVICE(JI,JJ,JK) END DO @@ -815,6 +817,7 @@ ZTMP1_DEVICE(:,:,:) = PTHLP(:,:,:) - PTHLM(:,:,:) !$acc end kernels CALL DZM_DEVICE(KKA,KKU,KKL,ZTMP1_DEVICE,ZTMP2_DEVICE) !$acc kernels +!$acc loop independent collapse(3) DO CONCURRENT ( JI=1:JIU,JJ=1:JJU,JK=1:JKU) ZFLXZ(JI,JJ,JK) = ZF(JI,JJ,JK) + PIMPL * ZDFDDTDZ(JI,JJ,JK) * ZTMP2_DEVICE(JI,JJ,JK) / PDZZ(JI,JJ,JK) END DO @@ -862,6 +865,7 @@ END IF IF (KRR /= 0) THEN CALL MZM_DEVICE(PETHETA,ZTMP1_DEVICE) !$acc kernels +!$acc loop independent collapse(3) DO CONCURRENT ( JI=1:JIU,JJ=1:JJU,JK=1:JKU) ZTMP2_DEVICE(JI,JJ,JK) = ZTMP1_DEVICE(JI,JJ,JK) * ZFLXZ(JI,JJ,JK) END DO @@ -916,6 +920,7 @@ IF ( KRRL >= 1 ) THEN !$acc end kernels CALL MZM_DEVICE(ZTMP1_DEVICE,ZTMP2_DEVICE ) !$acc kernels +!$acc loop independent collapse(3) DO CONCURRENT ( JI=1:JIU,JJ=1:JJU,JK=1:JKU) ZTMP1_DEVICE(JI,JJ,JK) = ZTMP2_DEVICE(JI,JJ,JK)*ZFLXZ(JI,JJ,JK)/PDZZ(JI,JJ,JK) END DO @@ -931,6 +936,7 @@ END DO !$acc end kernels CALL MZM_DEVICE(ZTMP1_DEVICE,ZTMP2_DEVICE ) !$acc kernels +!$acc loop independent collapse(3) DO CONCURRENT ( JI=1:JIU,JJ=1:JJU,JK=1:JKU) ZTMP1_DEVICE(JI,JJ,JK) = ZTMP2_DEVICE(JI,JJ,JK)*ZFLXZ(JI,JJ,JK)/PDZZ(JI,JJ,JK) END DO @@ -1007,8 +1013,12 @@ IF (LLES_CALL) THEN END IF !* diagnostic of mixing coefficient for heat CALL DZM_DEVICE(KKA,KKU,KKL,PTHLP,ZA) -!$acc kernels - WHERE (ZA(:,:,:)==0.) ZA(:,:,:)=1.E-6 + !$acc kernels + DO CONCURRENT ( JI=1:JIU,JJ=1:JJU,JK=1:JKU) + IF (ZA(JI,JJ,JK)==0.) THEN + ZA(JI,JJ,JK)=1.E-6 + END IF + END DO ZA(:,:,:) = - ZFLXZ(:,:,:) / ZA(:,:,:) * PDZZ(:,:,:) ZA(:,:,IKB) = XCSHF*PPHI3(:,:,IKB)*ZKEFF(:,:,IKB) !$acc end kernels @@ -1047,6 +1057,7 @@ IF (KRR /= 0) THEN #else CALL DZM_DEVICE(KKA,KKU,KKL,PRM(:,:,:,1),ZTMP1_DEVICE) !$acc kernels + !$acc loop independent collapse(3) DO CONCURRENT ( JI=1:JIU,JJ=1:JJU,JK=1:JKU) ZF (JI,JJ,JK) = -XCSHF*PPSI3(JI,JJ,JK)*ZKEFF(JI,JJ,JK)*ZTMP1_DEVICE(JI,JJ,JK)/PDZZ(JI,JJ,JK) END DO @@ -1054,6 +1065,7 @@ IF (KRR /= 0) THEN CALL D_PSI3DRDZ_O_DDRDZ(PPSI3,PREDR1,PREDTH1,PRED2R3,PRED2THR3,HTURBDIM,GUSERV,ZTMP1_DEVICE) !CALL D_PHI3DRDZ_O_DDRDZ_DEVICE(PPSI3,PREDR1,PREDTH1,PRED2R3,PRED2THR3,HTURBDIM,GUSERV,ZTMP1_DEVICE) !$acc kernels + !$acc loop independent collapse(3) DO CONCURRENT ( JI=1:JIU,JJ=1:JJU,JK=1:JKU) ZDFDDRDZ(JI,JJ,JK) = -XCSHF*ZKEFF(JI,JJ,JK)*ZTMP1_DEVICE(JI,JJ,JK) END DO @@ -1227,6 +1239,7 @@ IF (KRR /= 0) THEN !$acc end kernels CALL DZM_DEVICE(KKA,KKU,KKL,ZTMP1_DEVICE,ZTMP2_DEVICE) !$acc kernels +!$acc loop independent collapse(3) DO CONCURRENT ( JI=1:JIU,JJ=1:JJU,JK=1:JKU) ZFLXZ(JI,JJ,JK) = ZF(JI,JJ,JK) + PIMPL * ZDFDDRDZ(JI,JJ,JK) *ZTMP2_DEVICE(JI,JJ,JK) / PDZZ(JI,JJ,JK) END DO @@ -1268,12 +1281,14 @@ END DO #else CALL MZM_DEVICE(PEMOIST,ZTMP1_DEVICE) !$acc kernels + !$acc loop independent collapse(3) DO CONCURRENT ( JI=1:JIU,JJ=1:JJU,JK=1:JKU) ZTMP2_DEVICE(JI,JJ,JK) = ZTMP1_DEVICE(JI,JJ,JK) * ZFLXZ(JI,JJ,JK) END DO !$acc end kernels CALL MZF_DEVICE(KKA,KKU,KKL, ZTMP2_DEVICE, ZTMP3_DEVICE ) !$acc kernels + !$acc loop independent collapse(3) DO CONCURRENT ( JI=1:JIU,JJ=1:JJU,JK=1:JKU) ZA(JI,JJ,JK) = PBETA(JI,JJ,JK) * ZTMP3_DEVICE(JI,JJ,JK) END DO diff --git a/src/Makefile.MESONH.mk b/src/Makefile.MESONH.mk index f7b5994ea..c5ca2e753 100644 --- a/src/Makefile.MESONH.mk +++ b/src/Makefile.MESONH.mk @@ -490,7 +490,7 @@ ifeq "$(VER_CDF)" "CDFAUTO" DIR_CDFC?=${SRC_MESONH}/src/LIB/netcdf-${VERSION_CDFC} DIR_CDFCXX?=${SRC_MESONH}/src/LIB/netcdf-cxx-${VERSION_CDFCXX} DIR_CDFF?=${SRC_MESONH}/src/LIB/netcdf-fortran-${VERSION_CDFF} -CDF_PATH?=${SRC_MESONH}/src/LIB/netcdf-${ARCH}-R${MNH_REAL}I${MNH_INT} +CDF_PATH?=${OBJDIR_MASTER}/NETCDF-${VERSION_CDFF} CDF_MOD?=${CDF_PATH}/include/netcdf.mod # INC_NETCDF ?= -I${CDF_PATH}/include -- GitLab