From c297b792b949046e77754592bddfabf98eb2f9a6 Mon Sep 17 00:00:00 2001 From: Philippe WAUTELET <philippe.wautelet@aero.obs-mip.fr> Date: Mon, 17 Jan 2022 10:40:59 +0100 Subject: [PATCH] Philippe 17/01/2022: OpenACC: Cray compiler does not like collapse directive before do concurrent --- src/MNH/advection_metsv.f90 | 10 ++++ src/MNH/emoist.f90 | 6 ++- src/MNH/gradient_u.f90 | 10 ++++ src/MNH/gradient_v.f90 | 10 ++++ src/MNH/ice4_sedimentation_split.f90 | 4 +- src/MNH/prandtl.f90 | 30 ++++++++++++ src/MNH/rain_ice_nucleation.f90 | 4 +- src/MNH/rain_ice_sedimentation_split.f90 | 4 +- src/MNH/tke_eps_sources.f90 | 10 ++++ src/MNH/tridiag_thermo.f90 | 30 +++++++++++- src/MNH/tridiag_tke.f90 | 18 ++++++- src/MNH/tridiag_w.f90 | 26 +++++++++- src/MNH/tridiag_wind.f90 | 10 +++- src/MNH/turb.f90 | 2 + src/MNH/turb_hor_dyn_corr.f90 | 40 ++++++++++++++++ src/MNH/turb_hor_thermo_flux.f90 | 48 +++++++++++++++++++ src/MNH/turb_hor_tke.f90 | 26 ++++++++++ src/MNH/turb_hor_uv.f90 | 38 +++++++++++++++ src/MNH/turb_hor_uw.f90 | 22 +++++++++ src/MNH/turb_hor_vw.f90 | 22 +++++++++ src/MNH/turb_ver.f90 | 4 +- src/MNH/turb_ver_dyn_flux.f90 | 60 ++++++++++++++++++++++++ src/MNH/turb_ver_thermo_corr.f90 | 34 ++++++++++++++ src/MNH/turb_ver_thermo_flux.f90 | 18 +++++++ src/ZSOLVER/advection_metsv.f90 | 12 ++++- src/ZSOLVER/contrav.f90 | 18 ++++++- src/ZSOLVER/pressurez.f90 | 10 +++- src/ZSOLVER/tridiag_thermo.f90 | 30 +++++++++++- src/ZSOLVER/turb.f90 | 2 + src/ZSOLVER/turb_hor_dyn_corr.f90 | 40 ++++++++++++++++ src/ZSOLVER/turb_hor_thermo_flux.f90 | 46 ++++++++++++++++++ 31 files changed, 631 insertions(+), 13 deletions(-) diff --git a/src/MNH/advection_metsv.f90 b/src/MNH/advection_metsv.f90 index 395d44553..766bd277c 100644 --- a/src/MNH/advection_metsv.f90 +++ b/src/MNH/advection_metsv.f90 @@ -584,14 +584,18 @@ IF (.NOT. L1D) THEN #else IF (.NOT. L2D) THEN !$acc kernels +#ifdef MNH_COMPILER_NVHPC !$acc loop independent collapse(3) +#endif DO CONCURRENT ( JI=1:JIU,JJ=1:JJU,JK=1:JKU ) ZCFL(JI,JJ,JK) = SQRT(BR_P2(ZCFLU(JI,JJ,JK))+BR_P2(ZCFLV(JI,JJ,JK))+BR_P2(ZCFLW(JI,JJ,JK))) END DO !$acc end kernels ELSE !$acc kernels +#ifdef MNH_COMPILER_NVHPC !$acc loop independent collapse(3) +#endif DO CONCURRENT ( JI=1:JIU,JJ=1:JJU,JK=1:JKU ) ZCFL(JI,JJ,JK) = SQRT(BR_P2(ZCFLU(JI,JJ,JK))+BR_P2(ZCFLW(JI,JJ,JK))) END DO @@ -605,7 +609,9 @@ ELSE #ifndef MNH_BITREP ZCFL(:,:,:) = SQRT(ZCFLW(:,:,:)**2) #else +#ifdef MNH_COMPILER_NVHPC !$acc loop independent collapse(3) +#endif DO CONCURRENT ( JI=1:JIU,JJ=1:JJU,JK=1:JKU ) ZCFL(JI,JJ,JK) = SQRT(BR_P2(ZCFLW(JI,JJ,JK))) END DO @@ -929,14 +935,18 @@ DO JSPL=1,KSPLIT !$acc end kernels END IF !$acc kernels +#ifdef MNH_COMPILER_NVHPC !$acc loop independent collapse(4) +#endif DO CONCURRENT ( JI=1:JIU,JJ=1:JJU,JK=1:JKU, JR=1:KRR ) ZR(JI,JJ,JK,JR) = ZR(JI,JJ,JK,JR) + ( ZRRS_PPM(JI,JJ,JK,JR) + ZRRS_OTHER(JI,JJ,JK,JR) + PRRS_CLD(JI,JJ,JK,JR) ) & * ZTSTEP_PPM / PRHODJ(JI,JJ,JK) END DO !CONCURRENT !$acc loop seq DO JSV = 1, KSV +#ifdef MNH_COMPILER_NVHPC !$acc loop independent collapse(3) +#endif DO CONCURRENT ( JI=1:JIU,JJ=1:JJU,JK=1:JKU) ZSV(JI,JJ,JK,JSV) = ZSV(JI,JJ,JK,JSV) + ( ZRSVS_PPM(JI,JJ,JK,JSV) + ZRSVS_OTHER(JI,JJ,JK,JSV) + & PRSVS_CLD(JI,JJ,JK,JSV) ) * ZTSTEP_PPM / PRHODJ(JI,JJ,JK) diff --git a/src/MNH/emoist.f90 b/src/MNH/emoist.f90 index 142586b90..50d6d0779 100644 --- a/src/MNH/emoist.f90 +++ b/src/MNH/emoist.f90 @@ -1,4 +1,4 @@ -!MNH_LIC Copyright 1995-2021 CNRS, Meteo-France and Universite Paul Sabatier +!MNH_LIC Copyright 1995-2022 CNRS, Meteo-France and Universite Paul Sabatier !MNH_LIC This is part of the Meso-NH software governed by the CeCILL-C licence !MNH_LIC version 1. See LICENSE, CeCILL-C_V1-en.txt and CeCILL-C_V1-fr.txt !MNH_LIC for details. version 1. @@ -204,7 +204,9 @@ ELSE DO JRR=5,KRR ZRW(1:JIU,1:JJU,1:JKU) = ZRW(1:JIU,1:JJU,1:JKU) + PRM(1:JIU,1:JJU,1:JKU,JRR) ENDDO +#ifdef MNH_COMPILER_NVHPC !$acc loop independent collapse(3) +#endif DO CONCURRENT ( JI=1:JIU,JJ=1:JJU,JK=1:JKU) ZA(JI,JJ,JK) = 1. + ( & ! Compute A (1.+ZDELTA) * (PRM(JI,JJ,JK,1) - PRM(JI,JJ,JK,2) - PRM(JI,JJ,JK,4)) & @@ -232,7 +234,9 @@ ELSE DO JRR=3,KRR ZRW(1:JIU,1:JJU,1:JKU) = ZRW(1:JIU,1:JJU,1:JKU) + PRM(1:JIU,1:JJU,1:JKU,JRR) ENDDO +#ifdef MNH_COMPILER_NVHPC !$acc loop independent collapse(3) +#endif DO CONCURRENT ( JI=1:JIU,JJ=1:JJU,JK=1:JKU) ZA(JI,JJ,JK) = 1. + ( & ! Compute ZA (1.+ZDELTA) * (PRM(JI,JJ,JK,1) - PRM(JI,JJ,JK,2)) & diff --git a/src/MNH/gradient_u.f90 b/src/MNH/gradient_u.f90 index c959eac3a..68c007167 100644 --- a/src/MNH/gradient_u.f90 +++ b/src/MNH/gradient_u.f90 @@ -237,14 +237,18 @@ IF (.NOT. LFLAT) THEN CALL DXF_DEVICE(PA,ZTMP1_DEVICE) CALL DZM_DEVICE( PA, ZTMP2_DEVICE ) !$acc kernels +#ifdef MNH_COMPILER_NVHPC !$acc loop independent collapse(3) +#endif DO CONCURRENT ( JI=1:JIU,JJ=1:JJU,JK=1:JKU) ZTMP3_DEVICE(JI,JJ,JK) = PDZX(JI,JJ,JK) * ZTMP2_DEVICE(JI,JJ,JK) END DO !CONCURRENT !$acc end kernels CALL MXF_DEVICE(ZTMP3_DEVICE,ZTMP2_DEVICE) !$acc kernels +#ifdef MNH_COMPILER_NVHPC !$acc loop independent collapse(3) +#endif DO CONCURRENT ( JI=1:JIU,JJ=1:JJU,JK=1:JKU) ZTMP3_DEVICE(JI,JJ,JK) = ZTMP2_DEVICE(JI,JJ,JK) / PDZZ(JI,JJ,JK) END DO !CONCURRENT @@ -426,7 +430,9 @@ IF (.NOT. LFLAT) THEN CALL DZM_DEVICE( PA, ZTMP1_DEVICE ) CALL MXM_DEVICE(PDZZ,ZTMP2_DEVICE) !$acc kernels +#ifdef MNH_COMPILER_NVHPC !$acc loop independent collapse(3) +#endif DO CONCURRENT ( JI=1:JIU,JJ=1:JJU,JK=1:JKU) ZTMP3_DEVICE(JI,JJ,JK) = ZTMP1_DEVICE(JI,JJ,JK)/ZTMP2_DEVICE(JI,JJ,JK) END DO !CONCURRENT @@ -434,7 +440,9 @@ IF (.NOT. LFLAT) THEN CALL MYM_DEVICE(ZTMP3_DEVICE,ZTMP1_DEVICE) CALL MXM_DEVICE(PDZY,ZTMP2_DEVICE) !$acc kernels +#ifdef MNH_COMPILER_NVHPC !$acc loop independent collapse(3) +#endif DO CONCURRENT ( JI=1:JIU,JJ=1:JJU,JK=1:JKU) ZTMP3_DEVICE(JI,JJ,JK) = ZTMP1_DEVICE(JI,JJ,JK)*ZTMP2_DEVICE(JI,JJ,JK) END DO !CONCURRENT @@ -443,7 +451,9 @@ IF (.NOT. LFLAT) THEN CALL DYM_DEVICE(PA,ZTMP1_DEVICE) CALL MXM_DEVICE(PDYY,ZTMP3_DEVICE) !$acc kernels +#ifdef MNH_COMPILER_NVHPC !$acc loop independent collapse(3) +#endif DO CONCURRENT ( JI=1:JIU,JJ=1:JJU,JK=1:JKU) PGY_U_UV_DEVICE(JI,JJ,JK)= ( ZTMP1_DEVICE(JI,JJ,JK) - ZTMP2_DEVICE(JI,JJ,JK) ) / ZTMP3_DEVICE(JI,JJ,JK) END DO !CONCURRENT diff --git a/src/MNH/gradient_v.f90 b/src/MNH/gradient_v.f90 index 5f44dbffb..6bda4c104 100644 --- a/src/MNH/gradient_v.f90 +++ b/src/MNH/gradient_v.f90 @@ -239,14 +239,18 @@ IF (.NOT. LFLAT) THEN CALL DYF_DEVICE(PA,ZTMP1_DEVICE) CALL DZM_DEVICE( PA, ZTMP2_DEVICE ) !$acc kernels +#ifdef MNH_COMPILER_NVHPC !$acc loop independent collapse(3) +#endif DO CONCURRENT ( JI=1:JIU,JJ=1:JJU,JK=1:JKU) ZTMP3_DEVICE(JI,JJ,JK) = PDZY(JI,JJ,JK)*ZTMP2_DEVICE(JI,JJ,JK) END DO !CONCURRENT !$acc end kernels CALL MYF_DEVICE(ZTMP3_DEVICE,ZTMP2_DEVICE) !$acc kernels +#ifdef MNH_COMPILER_NVHPC !$acc loop independent collapse(3) +#endif DO CONCURRENT ( JI=1:JIU,JJ=1:JJU,JK=1:JKU) ZTMP3_DEVICE(JI,JJ,JK) = ZTMP2_DEVICE(JI,JJ,JK)/PDZZ(JI,JJ,JK) END DO !CONCURRENT @@ -427,7 +431,9 @@ IF (.NOT. LFLAT) THEN CALL MYM_DEVICE(PDZZ,ZTMP2_DEVICE) CALL DZM_DEVICE( PA, ZTMP3_DEVICE ) !$acc kernels +#ifdef MNH_COMPILER_NVHPC !$acc loop independent collapse(3) +#endif DO CONCURRENT ( JI=1:JIU,JJ=1:JJU,JK=1:JKU) ZTMP4_DEVICE(JI,JJ,JK) = ZTMP3_DEVICE(JI,JJ,JK) / ZTMP2_DEVICE(JI,JJ,JK) END DO !CONCURRENT @@ -435,7 +441,9 @@ IF (.NOT. LFLAT) THEN CALL MXM_DEVICE(ZTMP4_DEVICE,ZTMP2_DEVICE) CALL MYM_DEVICE(PDZX,ZTMP3_DEVICE) !$acc kernels +#ifdef MNH_COMPILER_NVHPC !$acc loop independent collapse(3) +#endif DO CONCURRENT ( JI=1:JIU,JJ=1:JJU,JK=1:JKU) ZTMP4_DEVICE(JI,JJ,JK) = ZTMP2_DEVICE(JI,JJ,JK) *ZTMP3_DEVICE(JI,JJ,JK) END DO !CONCURRENT @@ -443,7 +451,9 @@ IF (.NOT. LFLAT) THEN CALL MZF_DEVICE( ZTMP4_DEVICE, ZTMP2_DEVICE ) CALL MYM_DEVICE(PDXX,ZTMP3_DEVICE) !$acc kernels +#ifdef MNH_COMPILER_NVHPC !$acc loop independent collapse(3) +#endif DO CONCURRENT ( JI=1:JIU,JJ=1:JJU,JK=1:JKU) PGX_V_UV_DEVICE(JI,JJ,JK)= ( ZTMP1_DEVICE(JI,JJ,JK) - ZTMP2_DEVICE(JI,JJ,JK) ) / ZTMP3_DEVICE(JI,JJ,JK) END DO !CONCURRENT diff --git a/src/MNH/ice4_sedimentation_split.f90 b/src/MNH/ice4_sedimentation_split.f90 index 63921e9d3..b6ff2f792 100644 --- a/src/MNH/ice4_sedimentation_split.f90 +++ b/src/MNH/ice4_sedimentation_split.f90 @@ -1,4 +1,4 @@ -!MNH_LIC Copyright 1994-2019 CNRS, Meteo-France and Universite Paul Sabatier +!MNH_LIC Copyright 1994-2022 CNRS, Meteo-France and Universite Paul Sabatier !MNH_LIC This is part of the Meso-NH software governed by the CeCILL-C licence !MNH_LIC version 1. See LICENSE, CeCILL-C_V1-en.txt and CeCILL-C_V1-fr.txt !MNH_LIC for details. version 1. @@ -680,7 +680,9 @@ DO WHILE (ANY(ZREMAINT>0.)) !$acc end kernels !$acc kernels DO JK = KKTB , KKTE +#ifdef MNH_COMPILER_NVHPC !$acc loop independent collapse(2) +#endif DO CONCURRENT( JI = 1 : KIT, JJ = 1 : KJT ) ZMRCHANGE(JI,JJ) = ZMAX_TSTEP(JI,JJ) * POORHODZ(JI,JJ,JK)*(ZWSED(JI,JJ,JK+KKL)-ZWSED(JI,JJ,JK)) PRXT(JI,JJ,JK) = PRXT(JI,JJ,JK) + ZMRCHANGE(JI,JJ) + PPRXS(JI,JJ,JK) * ZMAX_TSTEP(JI,JJ) diff --git a/src/MNH/prandtl.f90 b/src/MNH/prandtl.f90 index 4f07ff0d0..d87bca878 100644 --- a/src/MNH/prandtl.f90 +++ b/src/MNH/prandtl.f90 @@ -452,7 +452,9 @@ END WHERE WHERE (PREDTH1(:,:,:) < -ZMINVAL) ZW2(:,:,:) = (-ZMINVAL) / (PREDTH1(:,:,:)) END WHERE +#ifdef MNH_COMPILER_NVHPC !$acc loop independent collapse(3) +#endif DO CONCURRENT ( JI=1:JIU,JJ=1:JJU,JK=1:JKU) ZW2(JI,JJ,JK) = MIN( ZW1(JI,JJ,JK),ZW2(JI,JJ,JK) ) END DO @@ -463,7 +465,9 @@ WHERE (PREDR1(:,:,:)<-ZMINVAL) END WHERE !!$ZW1(:,:,:) = MIN(ZW2(:,:,:),ZW1(:,:,:)) +#ifdef MNH_COMPILER_NVHPC !$acc loop independent collapse(3) +#endif DO CONCURRENT ( JI=1:JIU,JJ=1:JJU,JK=1:JKU) ZW1(JI,JJ,JK) = MIN( ZW2(JI,JJ,JK),ZW1(JI,JJ,JK) ) END DO @@ -566,12 +570,16 @@ ELSE IF (L2D) THEN ! 3D case in a 2D model CALL GX_M_M_DEVICE(PTHLM,PDXX,PDZZ,PDZX,ZTMP1_DEVICE) !$acc kernels #ifndef MNH_BITREP +#ifdef MNH_COMPILER_NVHPC !$acc loop independent collapse(3) +#endif DO CONCURRENT ( JI=1:JIU,JJ=1:JJU,JK=1:JKU) ZTMP1_DEVICE(JI,JJ,JK) = ZTMP1_DEVICE(JI,JJ,JK)**2 END DO !CONCURRENT #else +#ifdef MNH_COMPILER_NVHPC !$acc loop independent collapse(3) +#endif DO CONCURRENT ( JI=1:JIU,JJ=1:JJU,JK=1:JKU) ZTMP1_DEVICE(JI,JJ,JK) = BR_P2(ZTMP1_DEVICE(JI,JJ,JK)) END DO !CONCURRENT @@ -607,12 +615,16 @@ ELSE IF (L2D) THEN ! 3D case in a 2D model CALL GX_M_M_DEVICE(PRM(:,:,:,1),PDXX,PDZZ,PDZX,ZTMP1_DEVICE) !$acc kernels #ifndef MNH_BITREP +#ifdef MNH_COMPILER_NVHPC !$acc loop independent collapse(3) +#endif DO CONCURRENT ( JI=1:JIU,JJ=1:JJU,JK=1:JKU) ZTMP1_DEVICE(JI,JJ,JK) = ZTMP1_DEVICE(JI,JJ,JK)**2 END DO !CONCURRENT #else +#ifdef MNH_COMPILER_NVHPC !$acc loop independent collapse(3) +#endif DO CONCURRENT ( JI=1:JIU,JJ=1:JJU,JK=1:JKU) ZTMP1_DEVICE(JI,JJ,JK) = BR_P2(ZTMP1_DEVICE(JI,JJ,JK)) END DO !CONCURRENT @@ -649,7 +661,9 @@ ELSE IF (L2D) THEN ! 3D case in a 2D model CALL GX_M_M_DEVICE(PRM(:,:,:,1),PDXX,PDZZ,PDZX,ZTMP1_DEVICE) CALL GX_M_M_DEVICE(PTHLM ,PDXX,PDZZ,PDZX,ZTMP2_DEVICE) !$acc kernels +#ifdef MNH_COMPILER_NVHPC !$acc loop independent collapse(3) +#endif DO CONCURRENT ( JI=1:JIU,JJ=1:JJU,JK=1:JKU) ZTMP1_DEVICE(JI,JJ,JK) = ZTMP1_DEVICE(JI,JJ,JK) * ZTMP2_DEVICE(JI,JJ,JK) END DO @@ -662,7 +676,9 @@ ELSE IF (L2D) THEN ! 3D case in a 2D model * PEMOIST(:,:,:) * PETHETA(:,:,:) & * ZTMP2_DEVICE(:,:,:) #else +#ifdef MNH_COMPILER_NVHPC !$acc loop independent collapse(3) +#endif DO CONCURRENT ( JI=1:JIU,JJ=1:JJU,JK=1:JKU) PRED2THR3(JI,JJ,JK)= PREDR1(JI,JJ,JK) * PREDTH1(JI,JJ,JK) + BR_P2(XCTV)*BR_P2(PBLL_O_E(JI,JJ,JK)) * & PEMOIST(JI,JJ,JK) * PETHETA(JI,JJ,JK) * ZTMP2_DEVICE(JI,JJ,JK) @@ -747,12 +763,16 @@ ELSE ! 3D case in a 3D model CALL GY_M_M_DEVICE(PTHLM,PDYY,PDZZ,PDZY,ZTMP2_DEVICE) !$acc kernels #ifndef MNH_BITREP +#ifdef MNH_COMPILER_NVHPC !$acc loop independent collapse(3) +#endif DO CONCURRENT ( JI=1:JIU,JJ=1:JJU,JK=1:JKU) ZTMP1_DEVICE(JI,JJ,JK) = ZTMP1_DEVICE(JI,JJ,JK)**2 + ZTMP2_DEVICE(JI,JJ,JK)**2 END DO #else +#ifdef MNH_COMPILER_NVHPC !$acc loop independent collapse(3) +#endif DO CONCURRENT ( JI=1:JIU,JJ=1:JJU,JK=1:JKU) ZTMP1_DEVICE(JI,JJ,JK) = BR_P2(ZTMP1_DEVICE(JI,JJ,JK)) + BR_P2(ZTMP2_DEVICE(JI,JJ,JK)) END DO @@ -791,12 +811,16 @@ ELSE ! 3D case in a 3D model CALL GY_M_M_DEVICE(PRM(:,:,:,1),PDYY,PDZZ,PDZY,ZTMP2_DEVICE) !$acc kernels #ifndef MNH_BITREP +#ifdef MNH_COMPILER_NVHPC !$acc loop independent collapse(3) +#endif DO CONCURRENT ( JI=1:JIU,JJ=1:JJU,JK=1:JKU) ZTMP1_DEVICE(JI,JJ,JK) = ZTMP1_DEVICE(JI,JJ,JK)**2 + ZTMP2_DEVICE(JI,JJ,JK)**2 END DO #else +#ifdef MNH_COMPILER_NVHPC !$acc loop independent collapse(3) +#endif DO CONCURRENT ( JI=1:JIU,JJ=1:JJU,JK=1:JKU) ZTMP1_DEVICE(JI,JJ,JK) = BR_P2(ZTMP1_DEVICE(JI,JJ,JK)) + BR_P2(ZTMP2_DEVICE(JI,JJ,JK)) END DO @@ -839,7 +863,9 @@ ELSE ! 3D case in a 3D model CALL GY_M_M_DEVICE(PRM(:,:,:,1),PDYY,PDZZ,PDZY,ZTMP3_DEVICE) CALL GY_M_M_DEVICE(PTHLM ,PDYY,PDZZ,PDZY,ZTMP4_DEVICE) !$acc kernels +#ifdef MNH_COMPILER_NVHPC !$acc loop independent collapse(3) +#endif DO CONCURRENT ( JI=1:JIU,JJ=1:JJU,JK=1:JKU) ZTMP1_DEVICE(JI,JJ,JK) = ZTMP1_DEVICE(JI,JJ,JK)*ZTMP2_DEVICE(JI,JJ,JK)+ & ZTMP3_DEVICE(JI,JJ,JK)*ZTMP4_DEVICE(JI,JJ,JK) @@ -881,12 +907,16 @@ call Print_msg( NVERB_WARNING, 'GEN', 'PRANDTL', 'OpenACC: L2D=.F. and KRR=0 not CALL GY_M_M_DEVICE(PTHLM,PDYY,PDZZ,PDZY,ZTMP2_DEVICE) !$acc kernels #ifndef MNH_BITREP +#ifdef MNH_COMPILER_NVHPC !$acc loop independent collapse(3) +#endif DO CONCURRENT ( JI=1:JIU,JJ=1:JJU,JK=1:JKU) ZTMP1_DEVICE(JI,JJ,JK) = ZTMP1_DEVICE(JI,JJ,JK)**2 + ZTMP2_DEVICE(JI,JJ,JK)**2 END DO #else +#ifdef MNH_COMPILER_NVHPC !$acc loop independent collapse(3) +#endif DO CONCURRENT ( JI=1:JIU,JJ=1:JJU,JK=1:JKU) ZTMP1_DEVICE(JI,JJ,JK) = BR_P2(ZTMP1_DEVICE(JI,JJ,JK)) + BR_P2(ZTMP2_DEVICE(JI,JJ,JK)) END DO diff --git a/src/MNH/rain_ice_nucleation.f90 b/src/MNH/rain_ice_nucleation.f90 index f96677049..4fcdd4718 100644 --- a/src/MNH/rain_ice_nucleation.f90 +++ b/src/MNH/rain_ice_nucleation.f90 @@ -1,4 +1,4 @@ -!MNH_LIC Copyright 1995-2020 CNRS, Meteo-France and Universite Paul Sabatier +!MNH_LIC Copyright 1995-2022 CNRS, Meteo-France and Universite Paul Sabatier !MNH_LIC This is part of the Meso-NH software governed by the CeCILL-C licence !MNH_LIC version 1. See LICENSE, CeCILL-C_V1-en.txt and CeCILL-C_V1-fr.txt !MNH_LIC for details. version 1. @@ -176,7 +176,9 @@ if ( lbudget_ri ) call Budget_store_init( tbudgets(NBUDGET_RI), 'HENU', pris(:, #ifndef MNH_BITREP PT(:,:,:) = PTHT(:,:,:) * ( PPABST(:,:,:) / XP00 ) ** ( XRD / XCPD ) #else +#ifdef MNH_COMPILER_NVHPC !$acc loop independent collapse(3) +#endif DO CONCURRENT ( JI=1:JIU,JJ=1:JJU,JK=1:JKU) PT(JI,JJ,JK) = PTHT(JI,JJ,JK) * BR_POW( PPABST(JI,JJ,JK) / XP00, XRD / XCPD ) END DO diff --git a/src/MNH/rain_ice_sedimentation_split.f90 b/src/MNH/rain_ice_sedimentation_split.f90 index 8fdf9a95d..38f7f55e3 100644 --- a/src/MNH/rain_ice_sedimentation_split.f90 +++ b/src/MNH/rain_ice_sedimentation_split.f90 @@ -1,4 +1,4 @@ -!MNH_LIC Copyright 1995-2020 CNRS, Meteo-France and Universite Paul Sabatier +!MNH_LIC Copyright 1995-2022 CNRS, Meteo-France and Universite Paul Sabatier !MNH_LIC This is part of the Meso-NH software governed by the CeCILL-C licence !MNH_LIC version 1. See LICENSE, CeCILL-C_V1-en.txt and CeCILL-C_V1-fr.txt !MNH_LIC for details. version 1. @@ -453,7 +453,9 @@ DO JN = 1 , KSPLITR IF ( KRR == 7 ) PRHS(:,:,:) = PRHS(:,:,:) + ZPRHS(:,:,:) * ZTSPLITR END IF ! +#ifdef MNH_COMPILER_NVHPC !$acc loop independent collapse(3) +#endif DO CONCURRENT ( JI=KIB:KIE,JJ=KJB:KJE,JK=KKTB:KKTE ) IF ( OSEDIC ) GSEDIMC(JI,JJ,JK) = & PRCS(JI,JJ,JK) > ZRTMIN(2) diff --git a/src/MNH/tke_eps_sources.f90 b/src/MNH/tke_eps_sources.f90 index c37a2d81a..777745fce 100644 --- a/src/MNH/tke_eps_sources.f90 +++ b/src/MNH/tke_eps_sources.f90 @@ -347,7 +347,9 @@ IKE=KKU-JPVEXT_TURB*KKL #ifndef MNH_BITREP ZKEFF(:,:,:) = PLM(:,:,:) * SQRT(PTKEM(:,:,:)) #else +#ifdef MNH_COMPILER_NVHPC !$acc loop independent collapse(3) +#endif DO CONCURRENT ( JI=1:JIU,JJ=1:JJU,JK=1:JKU) ZKEFF(JI,JJ,JK) = PLM(JI,JJ,JK) * BR_POW(PTKEM(JI,JJ,JK),0.5) END DO @@ -386,12 +388,16 @@ PDP(:,:,IKB) = PDP(:,:,IKB) * (1. + PDZZ(:,:,IKB+KKL)/PDZZ(:,:,IKB)) #ifndef MNH_BITREP ZFLX(:,:,:) = XCED * SQRT(PTKEM(:,:,:)) / PLEPS(:,:,:) #else +#ifdef MNH_COMPILER_NVHPC !$acc loop independent collapse(3) +#endif DO CONCURRENT ( JI=1:JIU,JJ=1:JJU,JK=1:JKU) ZFLX(JI,JJ,JK) = XCED * BR_POW(PTKEM(JI,JJ,JK),0.5) / PLEPS(JI,JJ,JK) END DO #endif +#ifdef MNH_COMPILER_NVHPC !$acc loop independent collapse(3) +#endif DO CONCURRENT ( JI=1:JIU,JJ=1:JJU,JK=1:JKU) ZSOURCE(JI,JJ,JK) = ( PRTKES(JI,JJ,JK) + PRTKESM(JI,JJ,JK) ) / PRHODJ(JI,JJ,JK) & - PTKEM(JI,JJ,JK) / PTSTEP & @@ -419,7 +425,9 @@ CALL MZM_DEVICE(PRHODJ,ZTMP2_DEVICE) !Warning: re-used later #ifndef MNH_BITREP ZA(:,:,:) = - PTSTEP * XCET * ZTMP1_DEVICE(:,:,:) * ZTMP2_DEVICE(:,:,:) / PDZZ(:,:,:)**2 #else +#ifdef MNH_COMPILER_NVHPC !$acc loop independent collapse(3) +#endif DO CONCURRENT ( JI=1:JIU,JJ=1:JJU,JK=1:JKU) ZA(JI,JJ,JK) = - PTSTEP * XCET * ZTMP1_DEVICE(JI,JJ,JK) * ZTMP2_DEVICE(JI,JJ,JK) / BR_P2(PDZZ(JI,JJ,JK)) END DO !CONCURRENT @@ -435,7 +443,9 @@ CALL TRIDIAG_TKE(KKA,KKU,KKL,PTKEM,ZA,PTSTEP,PEXPL,PIMPL,PRHODJ,& CALL GET_HALO(ZRES) #else !$acc kernels +#ifdef MNH_COMPILER_NVHPC !$acc loop independent collapse(3) +#endif DO CONCURRENT ( JI=1:JIU,JJ=1:JJU,JK=1:JKU) ZTMP3_DEVICE(JI,JJ,JK) = PTSTEP*ZFLX(JI,JJ,JK) END DO !CONCURRENT diff --git a/src/MNH/tridiag_thermo.f90 b/src/MNH/tridiag_thermo.f90 index d3d3c1b00..07b817501 100644 --- a/src/MNH/tridiag_thermo.f90 +++ b/src/MNH/tridiag_thermo.f90 @@ -1,4 +1,4 @@ -!MNH_LIC Copyright 2003-2020 CNRS, Meteo-France and Universite Paul Sabatier +!MNH_LIC Copyright 2003-2022 CNRS, Meteo-France and Universite Paul Sabatier !MNH_LIC This is part of the Meso-NH software governed by the CeCILL-C licence !MNH_LIC version 1. See LICENSE, CeCILL-C_V1-en.txt and CeCILL-C_V1-fr.txt !MNH_LIC for details. version 1. @@ -270,7 +270,9 @@ CALL MZM_DEVICE(PRHODJ,ZMZM_RHODJ) #ifndef MNH_BITREP ZRHODJ_DFDDTDZ_O_DZ2 = ZMZM_RHODJ*PDFDDTDZ/PDZZ**2 #else +#ifdef MNH_COMPILER_NVHPC !$acc loop independent collapse(3) +#endif DO CONCURRENT ( JI=1:JIU,JJ=1:JJU,JK=1:JKU) ZRHODJ_DFDDTDZ_O_DZ2(JI,JJ,JK) = ZMZM_RHODJ(JI,JJ,JK)*PDFDDTDZ(JI,JJ,JK)/BR_P2(PDZZ(JI,JJ,JK)) END DO !CONCURRENT @@ -290,7 +292,9 @@ ZY=0. ! --------------------------- ! !$acc kernels ! async +#ifdef MNH_COMPILER_NVHPC !$acc loop independent collapse(2) +#endif DO CONCURRENT ( JI=1:JIU,JJ=1:JJU) ZY(JI,JJ,IKB) = PRHODJ(JI,JJ,IKB)*PVARM(JI,JJ,IKB)/PTSTEP & - ZMZM_RHODJ(JI,JJ,IKB+KKL) * PF(JI,JJ,IKB+KKL)/PDZZ(JI,JJ,IKB+KKL) & @@ -301,7 +305,9 @@ END DO !CONCURRENT !$acc end kernels ! !$acc kernels ! async +#ifdef MNH_COMPILER_NVHPC !$acc loop independent collapse(3) +#endif DO CONCURRENT ( JI=1:JIU,JJ=1:JJU,JK=IKTB+1:IKTE-1) ZY(JI,JJ,JK) = PRHODJ(JI,JJ,JK)*PVARM(JI,JJ,JK)/PTSTEP & - ZMZM_RHODJ(JI,JJ,JK+KKL) * PF(JI,JJ,JK+KKL)/PDZZ(JI,JJ,JK+KKL) & @@ -314,7 +320,9 @@ END DO !CONCURRENT !$acc end kernels ! !$acc kernels ! async +#ifdef MNH_COMPILER_NVHPC !$acc loop independent collapse(2) +#endif DO CONCURRENT ( JI=1:JIU,JJ=1:JJU) ZY(JI,JJ,IKE) = PRHODJ(JI,JJ,IKE)*PVARM(JI,JJ,IKE)/PTSTEP & - ZMZM_RHODJ(JI,JJ,IKE+KKL) * PF(JI,JJ,IKE+KKL)/PDZZ(JI,JJ,IKE+KKL) & @@ -335,7 +343,9 @@ IF ( PIMPL > 1.E-10 ) THEN ! -------------- ! !$acc kernels ! async +#ifdef MNH_COMPILER_NVHPC !$acc loop independent collapse(2) +#endif DO CONCURRENT ( JI=1:JIU,JJ=1:JJU) ZB(JI,JJ,IKB) = PRHODJ(JI,JJ,IKB)/PTSTEP & - ZRHODJ_DFDDTDZ_O_DZ2(JI,JJ,IKB+KKL) * PIMPL @@ -343,14 +353,18 @@ END DO !CONCURRENT !$acc end kernels ! !$acc kernels ! async +#ifdef MNH_COMPILER_NVHPC !$acc loop independent collapse(2) +#endif DO CONCURRENT ( JI=1:JIU,JJ=1:JJU) ZC(JI,JJ,IKB) = ZRHODJ_DFDDTDZ_O_DZ2(JI,JJ,IKB+KKL) * PIMPL END DO !CONCURRENT !$acc end kernels ! !$acc kernels ! async +#ifdef MNH_COMPILER_NVHPC !$acc loop independent collapse(3) +#endif DO CONCURRENT ( JI=1:JIU,JJ=1:JJU,JK=IKTB+1:IKTE-1) ZA(JI,JJ,JK) = ZRHODJ_DFDDTDZ_O_DZ2(JI,JJ,JK) * PIMPL ZB(JI,JJ,JK) = PRHODJ(JI,JJ,JK)/PTSTEP & @@ -361,7 +375,9 @@ END DO !CONCURRENT !$acc end kernels ! !$acc kernels ! async +#ifdef MNH_COMPILER_NVHPC !$acc loop independent collapse(2) +#endif DO CONCURRENT ( JI=1:JIU,JJ=1:JJU) ZA(JI,JJ,IKE) = ZRHODJ_DFDDTDZ_O_DZ2(JI,JJ,IKE ) * PIMPL ZB(JI,JJ,IKE) = PRHODJ(JI,JJ,IKE)/PTSTEP & @@ -376,7 +392,9 @@ END DO !CONCURRENT ! -------- ! !$acc kernels +#ifdef MNH_COMPILER_NVHPC !$acc loop independent collapse(2) +#endif DO CONCURRENT ( JI=1:JIU,JJ=1:JJU) ZBET(JI,JJ) = ZB(JI,JJ,IKB) ! bet = b(ikb) PVARP(JI,JJ,IKB) = ZY(JI,JJ,IKB) / ZBET(JI,JJ) @@ -384,7 +402,9 @@ END DO !CONCURRENT ! !$acc loop seq DO JK = IKB+KKL,IKE-KKL,KKL +#ifdef MNH_COMPILER_NVHPC !$acc loop independent collapse(2) +#endif ! acc loop gang, vector collapse(2) DO CONCURRENT ( JI=1:JIU,JJ=1:JJU) !DO JJ=1,JJU @@ -400,7 +420,9 @@ DO JK = IKB+KKL,IKE-KKL,KKL END DO !CONCURRENT END DO ! special treatment for the last level +#ifdef MNH_COMPILER_NVHPC !$acc loop independent collapse(2) +#endif DO CONCURRENT ( JI=1:JIU,JJ=1:JJU) ZGAM(JI,JJ,IKE) = ZC(JI,JJ,IKE-KKL) / ZBET(JI,JJ) ! gam(k) = c(k-1) / bet @@ -415,7 +437,9 @@ END DO !CONCURRENT ! !$acc loop seq DO JK = IKE-KKL,IKB,-1*KKL +#ifdef MNH_COMPILER_NVHPC !$acc loop independent collapse(2) +#endif ! acc loop gang, vector collapse(2) DO CONCURRENT ( JI=1:JIU,JJ=1:JJU) PVARP(JI,JJ,JK) = PVARP(JI,JJ,JK) - ZGAM(JI,JJ,JK+KKL) * PVARP(JI,JJ,JK+KKL) @@ -426,7 +450,9 @@ END DO ELSE ! !$acc kernels +#ifdef MNH_COMPILER_NVHPC !$acc loop independent collapse(3) +#endif DO CONCURRENT ( JI=1:JIU,JJ=1:JJU,JK=IKTB:IKTE) PVARP(JI,JJ,JK) = ZY(JI,JJ,JK) * PTSTEP / PRHODJ(JI,JJ,JK) END DO !CONCURRENT @@ -439,7 +465,9 @@ END IF ! ---------------------------------------- ! !$acc kernels +#ifdef MNH_COMPILER_NVHPC !$acc loop independent collapse(2) +#endif DO CONCURRENT ( JI=1:JIU,JJ=1:JJU) PVARP(JI,JJ,KKA)=PVARP(JI,JJ,IKB) PVARP(JI,JJ,KKU)=PVARP(JI,JJ,IKE) diff --git a/src/MNH/tridiag_tke.f90 b/src/MNH/tridiag_tke.f90 index 516196038..fcee8400d 100644 --- a/src/MNH/tridiag_tke.f90 +++ b/src/MNH/tridiag_tke.f90 @@ -1,4 +1,4 @@ -!MNH_LIC Copyright 1994-2019 CNRS, Meteo-France and Universite Paul Sabatier +!MNH_LIC Copyright 1994-2022 CNRS, Meteo-France and Universite Paul Sabatier !MNH_LIC This is part of the Meso-NH software governed by the CeCILL-C licence !MNH_LIC version 1. See LICENSE, CeCILL-C_V1-en.txt and CeCILL-C_V1-fr.txt !MNH_LIC for details. version 1. @@ -222,14 +222,18 @@ IKE=KKU-JPVEXT_TURB*KKL ! ! +#ifdef MNH_COMPILER_NVHPC !$acc loop independent collapse(2) +#endif DO CONCURRENT ( JI=1:JIU,JJ=1:JJU) ZY(JI,JJ,IKB) = PVARM(JI,JJ,IKB) + PTSTEP*PSOURCE(JI,JJ,IKB) - & PEXPL / PRHODJ(JI,JJ,IKB) * PA(JI,JJ,IKB+KKL) * (PVARM(JI,JJ,IKB+KKL) - PVARM(JI,JJ,IKB)) END DO !CONCURRENT ! DO JK=IKTB+1,IKTE-1 +#ifdef MNH_COMPILER_NVHPC !$acc loop independent collapse(2) +#endif DO CONCURRENT ( JI=1:JIU,JJ=1:JJU) ZY(JI,JJ,JK)= PVARM(JI,JJ,JK) + PTSTEP*PSOURCE(JI,JJ,JK) - & PEXPL / PRHODJ(JI,JJ,JK) * & @@ -240,7 +244,9 @@ DO JK=IKTB+1,IKTE-1 END DO !CONCURRENT END DO ! +#ifdef MNH_COMPILER_NVHPC !$acc loop independent collapse(2) +#endif DO CONCURRENT ( JI=1:JIU,JJ=1:JJU) ZY(JI,JJ,IKE)= PVARM(JI,JJ,IKE) + PTSTEP*PSOURCE(JI,JJ,IKE) + & PEXPL / PRHODJ(JI,JJ,IKE) * PA(JI,JJ,IKE) * (PVARM(JI,JJ,IKE)-PVARM(JI,JJ,IKE-KKL)) @@ -255,7 +261,9 @@ IF ( PIMPL > 1.E-10 ) THEN ! ! going up ! +#ifdef MNH_COMPILER_NVHPC !$acc loop independent collapse(2) +#endif DO CONCURRENT ( JI=1:JIU,JJ=1:JJU) ZBET(JI,JJ) = 1. + PIMPL * (PDIAG(JI,JJ,IKB)-PA(JI,JJ,IKB+KKL) / PRHODJ(JI,JJ,IKB)) ! bet = b(ikb) @@ -282,7 +290,9 @@ IF ( PIMPL > 1.E-10 ) THEN END DO END DO ! special treatment for the last level +#ifdef MNH_COMPILER_NVHPC !$acc loop independent collapse(2) +#endif DO CONCURRENT ( JI=1:JIU,JJ=1:JJU) ZGAM(JI,JJ,IKE) = PIMPL * PA(JI,JJ,IKE) / PRHODJ(JI,JJ,IKE-KKL) / ZBET(JI,JJ) ! gam(k) = c(k-1) / bet @@ -300,7 +310,9 @@ IF ( PIMPL > 1.E-10 ) THEN ! !$acc loop seq DO JK = IKE-KKL,IKB,-1*KKL +#ifdef MNH_COMPILER_NVHPC !$acc loop gang, vector collapse(2) +#endif DO CONCURRENT ( JI=1:JIU,JJ=1:JJU) PVARP(JI,JJ,JK) = PVARP(JI,JJ,JK) - ZGAM(JI,JJ,JK+KKL) * PVARP(JI,JJ,JK+KKL) END DO !CONCURRENT @@ -308,7 +320,9 @@ IF ( PIMPL > 1.E-10 ) THEN ! ELSE ! +#ifdef MNH_COMPILER_NVHPC !$acc loop independent collapse(2) +#endif DO CONCURRENT ( JI=1:JIU,JJ=1:JJU) PVARP(JI,JJ,IKTB:IKTE) = ZY(JI,JJ,IKTB:IKTE) END DO !CONCURRENT @@ -319,7 +333,9 @@ END IF !* 3. FILL THE UPPER AND LOWER EXTERNAL VALUES ! ---------------------------------------- ! +#ifdef MNH_COMPILER_NVHPC !$acc loop independent collapse(2) +#endif DO CONCURRENT ( JI=1:JIU,JJ=1:JJU) PVARP(JI,JJ,KKA)=PVARP(JI,JJ,IKB) PVARP(JI,JJ,KKU)=PVARP(JI,JJ,IKE) diff --git a/src/MNH/tridiag_w.f90 b/src/MNH/tridiag_w.f90 index 5a6215036..67a4895fd 100644 --- a/src/MNH/tridiag_w.f90 +++ b/src/MNH/tridiag_w.f90 @@ -1,4 +1,4 @@ -!MNH_LIC Copyright 2011-2020 CNRS, Meteo-France and Universite Paul Sabatier +!MNH_LIC Copyright 2011-2022 CNRS, Meteo-France and Universite Paul Sabatier !MNH_LIC This is part of the Meso-NH software governed by the CeCILL-C licence !MNH_LIC version 1. See LICENSE, CeCILL-C_V1-en.txt and CeCILL-C_V1-fr.txt !MNH_LIC for details. version 1. @@ -285,7 +285,9 @@ ZY=0. !!#endif ! !$acc kernels ! async +#ifdef MNH_COMPILER_NVHPC !$acc loop independent collapse(2) +#endif DO CONCURRENT ( JI=1:JIU,JJ=1:JJU) ZY(JI,JJ,IKB) = ZMZM_RHODJ(JI,JJ,IKB)*PVARM(JI,JJ,IKB)/PTSTEP & - PRHODJ(JI,JJ,IKB ) * PF(JI,JJ,IKB )/PMZF_DZZ(JI,JJ,IKB ) & @@ -296,7 +298,9 @@ END DO !CONCURRENT !$acc end kernels ! !$acc kernels ! async +#ifdef MNH_COMPILER_NVHPC !$acc loop independent collapse(3) +#endif DO CONCURRENT ( JI=1:JIU,JJ=1:JJU,JK=IKB+1:IKE-1) ZY(JI,JJ,JK) = ZMZM_RHODJ(JI,JJ,JK)*PVARM(JI,JJ,JK)/PTSTEP & - PRHODJ(JI,JJ,JK ) * PF(JI,JJ,JK )/PMZF_DZZ(JI,JJ,JK ) & @@ -309,7 +313,9 @@ END DO !CONCURRENT !$acc end kernels ! !$acc kernels ! async +#ifdef MNH_COMPILER_NVHPC !$acc loop independent collapse(2) +#endif DO CONCURRENT ( JI=1:JIU,JJ=1:JJU) ZY(JI,JJ,IKE) = ZMZM_RHODJ(JI,JJ,IKE)*PVARM(JI,JJ,IKE)/PTSTEP & - PRHODJ(JI,JJ,IKE ) * PF(JI,JJ,IKE )/PMZF_DZZ(JI,JJ,IKE ) & @@ -334,21 +340,27 @@ END DO !CONCURRENT !! c(k) = + PRHODJ(k) * PDFDDWDZ(k)/PMZF_DZZ(k)**2 ! !$acc kernels ! async +#ifdef MNH_COMPILER_NVHPC !$acc loop independent collapse(2) +#endif DO CONCURRENT ( JI=1:JIU,JJ=1:JJU) ZB(JI,JJ,IKB) = ZMZM_RHODJ(JI,JJ,IKB)/PTSTEP & - ZRHODJ_DFDDWDZ_O_DZ2(JI,JJ,IKB) END DO !CONCURRENT !$acc end kernels !$acc kernels ! async +#ifdef MNH_COMPILER_NVHPC !$acc loop independent collapse(2) +#endif DO CONCURRENT ( JI=1:JIU,JJ=1:JJU) ZC(JI,JJ,IKB) = ZRHODJ_DFDDWDZ_O_DZ2(JI,JJ,IKB) END DO !CONCURRENT !$acc end kernels !$acc kernels ! async +#ifdef MNH_COMPILER_NVHPC !$acc loop independent collapse(3) +#endif DO CONCURRENT ( JI=1:JIU,JJ=1:JJU,JK=IKB+1:IKE-1) ZA(JI,JJ,JK) = ZRHODJ_DFDDWDZ_O_DZ2(JI,JJ,JK-1) ZB(JI,JJ,JK) = ZMZM_RHODJ(JI,JJ,JK)/PTSTEP & @@ -359,13 +371,17 @@ END DO !CONCURRENT !$acc end kernels !$acc kernels ! async +#ifdef MNH_COMPILER_NVHPC !$acc loop independent collapse(2) +#endif DO CONCURRENT ( JI=1:JIU,JJ=1:JJU) ZA(JI,JJ,IKE) = ZRHODJ_DFDDWDZ_O_DZ2(JI,JJ,IKE-1) END DO !CONCURRENT !$acc end kernels !$acc kernels ! async +#ifdef MNH_COMPILER_NVHPC !$acc loop independent collapse(2) +#endif DO CONCURRENT ( JI=1:JIU,JJ=1:JJU) ZB(JI,JJ,IKE) = ZMZM_RHODJ(JI,JJ,IKE)/PTSTEP & - ZRHODJ_DFDDWDZ_O_DZ2(JI,JJ,IKE ) & @@ -380,7 +396,9 @@ END DO !CONCURRENT ! -------- ! !$acc kernels +#ifdef MNH_COMPILER_NVHPC !$acc loop independent collapse(2) +#endif DO CONCURRENT ( JI=1:JIU,JJ=1:JJU) ZBET(JI,JJ) = ZB(JI,JJ,IKB) ! bet = b(ikb) PVARP(JI,JJ,IKB) = ZY(JI,JJ,IKB) / ZBET(JI,JJ) @@ -388,7 +406,9 @@ END DO !CONCURRENT ! !$acc loop seq DO JK = IKB+1,IKE-1 +#ifdef MNH_COMPILER_NVHPC !$acc loop independent collapse(2) +#endif DO CONCURRENT ( JI=1:JIU,JJ=1:JJU) ZGAM(JI,JJ,JK) = ZC(JI,JJ,JK-1) / ZBET(JI,JJ) ! gam(k) = c(k-1) / bet @@ -399,7 +419,9 @@ DO JK = IKB+1,IKE-1 END DO !CONCURRENT END DO ! special treatment for the last level +#ifdef MNH_COMPILER_NVHPC !$acc loop independent collapse(2) +#endif DO CONCURRENT ( JI=1:JIU,JJ=1:JJU) ZGAM(JI,JJ,IKE) = ZC(JI,JJ,IKE-1) / ZBET(JI,JJ) ! gam(k) = c(k-1) / bet @@ -414,7 +436,9 @@ END DO !CONCURRENT ! !$acc loop seq DO JK = IKE-1,IKB,-1 +#ifdef MNH_COMPILER_NVHPC !$acc loop independent collapse(2) +#endif DO CONCURRENT ( JI=1:JIU,JJ=1:JJU) PVARP(JI,JJ,JK) = PVARP(JI,JJ,JK) - ZGAM(JI,JJ,JK+1) * PVARP(JI,JJ,JK+1) END DO !CONCURRENT diff --git a/src/MNH/tridiag_wind.f90 b/src/MNH/tridiag_wind.f90 index e0f6a1e41..2a1298642 100644 --- a/src/MNH/tridiag_wind.f90 +++ b/src/MNH/tridiag_wind.f90 @@ -1,4 +1,4 @@ -!MNH_LIC Copyright 1994-2019 CNRS, Meteo-France and Universite Paul Sabatier +!MNH_LIC Copyright 1994-2022 CNRS, Meteo-France and Universite Paul Sabatier !MNH_LIC This is part of the Meso-NH software governed by the CeCILL-C licence !MNH_LIC version 1. See LICENSE, CeCILL-C_V1-en.txt and CeCILL-C_V1-fr.txt !MNH_LIC for details. version 1. @@ -265,7 +265,9 @@ IF ( PIMPL > 1.E-10 ) THEN ! going up ! !$acc kernels +#ifdef MNH_COMPILER_NVHPC !$acc loop independent collapse(2) +#endif DO CONCURRENT ( JI=1:JIU,JJ=1:JJU) ZBET(JI,JJ) = 1. - PIMPL * ( PA(JI,JJ,IKB+KKL) / PRHODJA(JI,JJ,IKB) & + PCOEFS(JI,JJ) * PTSTEP ) ! bet = b(ikb) @@ -274,7 +276,9 @@ IF ( PIMPL > 1.E-10 ) THEN ! !$acc loop seq DO JK = IKB+KKL,IKE-KKL,KKL +#ifdef MNH_COMPILER_NVHPC !$acc loop independent gang, vector collapse(2) +#endif DO CONCURRENT ( JJ=1:JJU , JI=1:JIU ) ZGAM(JI,JJ,JK) = PIMPL * PA(JI,JJ,JK) / PRHODJA(JI,JJ,JK-KKL) / ZBET(JI,JJ) ! gam(k) = c(k-1) / bet @@ -289,7 +293,9 @@ IF ( PIMPL > 1.E-10 ) THEN END DO ! CONCURRENT END DO ! special treatment for the last level +#ifdef MNH_COMPILER_NVHPC !$acc loop independent gang, vector collapse(2) +#endif DO CONCURRENT ( JI=1:JIU,JJ=1:JJU) ZGAM(JI,JJ,IKE) = PIMPL * PA(JI,JJ,IKE) / PRHODJA(JI,JJ,IKE-KKL) / ZBET(JI,JJ) ! gam(k) = c(k-1) / bet @@ -306,7 +312,9 @@ IF ( PIMPL > 1.E-10 ) THEN ! !$acc loop seq DO JK = IKE-KKL,IKB,-1*KKL +#ifdef MNH_COMPILER_NVHPC !$acc loop gang, vector collapse(2) +#endif DO CONCURRENT ( JI=1:JIU,JJ=1:JJU) PVARP(JI,JJ,JK) = PVARP(JI,JJ,JK) - ZGAM(JI,JJ,JK+KKL) * PVARP(JI,JJ,JK+KKL) END DO !CONCURRENT diff --git a/src/MNH/turb.f90 b/src/MNH/turb.f90 index 752d81803..0c23b9dda 100644 --- a/src/MNH/turb.f90 +++ b/src/MNH/turb.f90 @@ -1058,7 +1058,9 @@ ENDIF ZCDUEFF(:,:) =-SQRT ( (PSFU(:,:)**2 + PSFV(:,:)**2) / & (XMNH_TINY + ZUSLOPE(:,:)**2 + ZVSLOPE(:,:)**2 ) ) #else +#ifdef MNH_COMPILER_NVHPC !$acc loop independent collapse(2) +#endif DO CONCURRENT ( JI=1:JIU,JJ=1:JJU ) ZCDUEFF(JI,JJ) =-SQRT ( (BR_P2(PSFU(JI,JJ)) + BR_P2(PSFV(JI,JJ))) / & (XMNH_TINY + BR_P2(ZUSLOPE(JI,JJ)) + BR_P2(ZVSLOPE(JI,JJ)) ) ) diff --git a/src/MNH/turb_hor_dyn_corr.f90 b/src/MNH/turb_hor_dyn_corr.f90 index 192702b70..4eace2008 100644 --- a/src/MNH/turb_hor_dyn_corr.f90 +++ b/src/MNH/turb_hor_dyn_corr.f90 @@ -407,7 +407,9 @@ IKU = SIZE(PUM,3) #ifndef MNH_BITREP ZDIRSINZW(:,:) = SQRT( 1. - PDIRCOSZW(:,:)**2 ) #else +#ifdef MNH_COMPILER_NVHPC !$acc loop independent collapse(2) +#endif DO CONCURRENT ( JI=1:JIU,JJ=1:JJU ) ZDIRSINZW(JI,JJ) = SQRT( 1. - BR_P2(PDIRCOSZW(JI,JJ)) ) END DO @@ -444,7 +446,9 @@ CALL ADD3DFIELD_ll( TZFIELDS_ll, ZFLX, 'TURB_HOR_DYN_CORR::ZFLX' ) ! Computes the U variance IF (.NOT. L2D) THEN !$acc kernels async(2) +#ifdef MNH_COMPILER_NVHPC !$acc loop independent collapse(3) +#endif DO CONCURRENT ( JI=1:JIU,JJ=1:JJU,JK=1:JKU) ZFLX(JI,JJ,JK)= (2./3.) * PTKEM(JI,JJ,JK) & - XCMFS * PK(JI,JJ,JK) *( (4./3.) * GX_U_M_PUM(JI,JJ,JK) & @@ -622,7 +626,9 @@ ZFLX(:,:,IKB-1) = & - PUSLOPEM(:,:) * PCOSSLOPE(:,:)**2 * ZDIRSINZW(:,:) * PDIRCOSZW(:,:) ) #else !PW: BUG: commented 'acc loop independent collapse(2)' to workaround compiler bug (NVHPC 21.1) +#ifdef MNH_COMPILER_NVHPC !acc loop independent collapse(2) +#endif DO CONCURRENT ( JI=1:JIU,JJ=1:JJU ) ZFLX(JI,JJ,IKB-1) = & PTAU11M(JI,JJ) * BR_P2(PCOSSLOPE(JI,JJ)) * BR_P2(PDIRCOSZW(JI,JJ)) & @@ -695,7 +701,9 @@ END IF #else CALL MXF_DEVICE(PDXX, ZTMP1_DEVICE) !$acc kernels async(10) +#ifdef MNH_COMPILER_NVHPC !$acc loop independent collapse(3) +#endif DO CONCURRENT ( JI=1:JIU,JJ=1:JJU,JK=1:JKU) ZTMP2_DEVICE(JI,JJ,JK) = PRHODJ(JI,JJ,JK) * ZFLX(JI,JJ,JK) / ZTMP1_DEVICE(JI,JJ,JK) END DO !CONCURRENT @@ -708,21 +716,27 @@ CALL DXM_DEVICE(ZTMP2_DEVICE, ZTMP3_DEVICE) IF (.NOT. LFLAT) THEN CALL MZM_DEVICE(PDXX,ZTMP1_DEVICE) !$acc kernels +#ifdef MNH_COMPILER_NVHPC !$acc loop independent collapse(3) +#endif DO CONCURRENT ( JI=1:JIU,JJ=1:JJU,JK=1:JKU) ZTMP2_DEVICE(JI,JJ,JK) = PRHODJ(JI,JJ,JK) * ZFLX(JI,JJ,JK) END DO !CONCURRENT !$acc end kernels CALL MZM_DEVICE(ZTMP2_DEVICE,ZTMP4_DEVICE) !$acc kernels +#ifdef MNH_COMPILER_NVHPC !$acc loop independent collapse(3) +#endif DO CONCURRENT ( JI=1:JIU,JJ=1:JJU,JK=1:JKU) ZTMP2_DEVICE(JI,JJ,JK) = ZTMP4_DEVICE(JI,JJ,JK) * PINV_PDZZ(JI,JJ,JK) END DO !CONCURRENT !$acc end kernels CALL MXM_DEVICE( ZTMP2_DEVICE, ZTMP4_DEVICE ) !$acc kernels +#ifdef MNH_COMPILER_NVHPC !$acc loop independent collapse(3) +#endif DO CONCURRENT ( JI=1:JIU,JJ=1:JJU,JK=1:JKU) ZTMP2_DEVICE(JI,JJ,JK) = PDZX(JI,JJ,JK) / ZTMP1_DEVICE(JI,JJ,JK) * ZTMP4_DEVICE(JI,JJ,JK) END DO !CONCURRENT @@ -743,7 +757,9 @@ END IF IF (KSPLT==1) THEN ! Contribution to the dynamic production of TKE: !$acc kernels async(2) +#ifdef MNH_COMPILER_NVHPC !$acc loop independent collapse(3) +#endif DO CONCURRENT ( JI=1:JIU,JJ=1:JJU,JK=1:JKU) ZWORK(JI,JJ,JK) = - ZFLX(JI,JJ,JK) * GX_U_M_PUM(JI,JJ,JK) END DO !CONCURRENT @@ -794,7 +810,9 @@ END IF ! Computes the V variance IF (.NOT. L2D) THEN !$acc kernels async(3) +#ifdef MNH_COMPILER_NVHPC !$acc loop independent collapse(3) +#endif DO CONCURRENT ( JI=1:JIU,JJ=1:JJU,JK=1:JKU) ZFLX(JI,JJ,JK)= (2./3.) * PTKEM(JI,JJ,JK) & - XCMFS * PK(JI,JJ,JK) *( (4./3.) * GY_V_M_PVM(JI,JJ,JK) & @@ -846,7 +864,9 @@ ZFLX(:,:,IKB-1) = & + PVSLOPEM(:,:) * PCOSSLOPE(:,:) * PSINSLOPE(:,:) * ZDIRSINZW(:,:) ) #else !PW: BUG: commented 'acc loop independent collapse(2)' to workaround compiler bug (NVHPC 21.1) +#ifdef MNH_COMPILER_NVHPC !acc loop independent collapse(2) +#endif DO CONCURRENT ( JI=1:JIU,JJ=1:JJU ) ZFLX(JI,JJ,IKB-1) = & PTAU11M(JI,JJ) * BR_P2(PSINSLOPE(JI,JJ)) * BR_P2(PDIRCOSZW(JI,JJ)) & @@ -914,7 +934,9 @@ IF (.NOT. L2D) THEN #else CALL MYF_DEVICE(PDYY, ZTMP1_DEVICE) !$acc kernels async(10) +#ifdef MNH_COMPILER_NVHPC !$acc loop independent collapse(3) +#endif DO CONCURRENT ( JI=1:JIU,JJ=1:JJU,JK=1:JKU) ZTMP2_DEVICE(JI,JJ,JK) = PRHODJ(JI,JJ,JK) * ZFLX(JI,JJ,JK) / ZTMP1_DEVICE(JI,JJ,JK) END DO !CONCURRENT @@ -927,21 +949,27 @@ IF (.NOT. L2D) THEN IF (.NOT. LFLAT) THEN CALL MZM_DEVICE(PDYY,ZTMP1_DEVICE) !$acc kernels +#ifdef MNH_COMPILER_NVHPC !$acc loop independent collapse(3) +#endif DO CONCURRENT ( JI=1:JIU,JJ=1:JJU,JK=1:JKU) ZTMP2_DEVICE(JI,JJ,JK) = PRHODJ(JI,JJ,JK) * ZFLX(JI,JJ,JK) END DO !CONCURRENT !$acc end kernels CALL MZM_DEVICE(ZTMP2_DEVICE,ZTMP4_DEVICE) !$acc kernels +#ifdef MNH_COMPILER_NVHPC !$acc loop independent collapse(3) +#endif DO CONCURRENT ( JI=1:JIU,JJ=1:JJU,JK=1:JKU) ZTMP2_DEVICE(JI,JJ,JK) = ZTMP4_DEVICE(JI,JJ,JK) * PINV_PDZZ(JI,JJ,JK) END DO !CONCURRENT !$acc end kernels CALL MYM_DEVICE( ZTMP2_DEVICE,ZTMP4_DEVICE ) !$acc kernels +#ifdef MNH_COMPILER_NVHPC !$acc loop independent collapse(3) +#endif DO CONCURRENT ( JI=1:JIU,JJ=1:JJU,JK=1:JKU) ZTMP2_DEVICE(JI,JJ,JK) = PDZY(JI,JJ,JK) / ZTMP1_DEVICE(JI,JJ,JK) * ZTMP4_DEVICE(JI,JJ,JK) END DO !CONCURRENT @@ -954,7 +982,9 @@ IF (.NOT. L2D) THEN !$acc end kernels ELSE !$acc kernels async(1) +#ifdef MNH_COMPILER_NVHPC !$acc loop independent collapse(3) +#endif DO CONCURRENT ( JI=1:JIU,JJ=1:JJU,JK=1:JKU) PRVS(JI,JJ,JK)=PRVS(JI,JJ,JK) - ZTMP3_DEVICE(JI,JJ,JK) END DO !CONCURRENT @@ -963,7 +993,9 @@ IF (.NOT. L2D) THEN ! Contribution to the dynamic production of TKE: IF (KSPLT==1) THEN !$acc kernels async(2) +#ifdef MNH_COMPILER_NVHPC !$acc loop independent collapse(3) +#endif DO CONCURRENT ( JI=1:JIU,JJ=1:JJU,JK=1:JKU) ZWORK(JI,JJ,JK) = - ZFLX(JI,JJ,JK) * GY_V_M_PVM(JI,JJ,JK) END DO !CONCURRENT @@ -1019,7 +1051,9 @@ END IF ! Computes the W variance IF (.NOT. L2D) THEN !$acc kernels async(2) +#ifdef MNH_COMPILER_NVHPC !$acc loop independent collapse(3) +#endif DO CONCURRENT ( JI=1:JIU,JJ=1:JJU,JK=1:JKU) ZFLX(JI,JJ,JK) = (2./3.) * PTKEM(JI,JJ,JK) & - XCMFS * PK(JI,JJ,JK) *( (4./3.) * GZ_W_M_PWM(JI,JJ,JK) & @@ -1065,7 +1099,9 @@ ZFLX(:,:,IKB-1) = & +2. * PCDUEFF(:,:)* PUSLOPEM(:,:) * ZDIRSINZW(:,:) * PDIRCOSZW(:,:) #else !PW: BUG: commented 'acc loop independent collapse(2)' to workaround compiler bug (NVHPC 21.1) +#ifdef MNH_COMPILER_NVHPC !acc loop independent collapse(2) +#endif DO CONCURRENT ( JI=1:JIU,JJ=1:JJU ) ZFLX(JI,JJ,IKB-1) = & PTAU11M(JI,JJ) * BR_P2(ZDIRSINZW(JI,JJ)) & @@ -1139,7 +1175,9 @@ GZ_W_M_ZWP = GZ_W_M(ZWP,PDZZ) CALL GZ_W_M_DEVICE(ZWP,PDZZ,GZ_W_M_ZWP) #endif !$acc kernels async(2) +#ifdef MNH_COMPILER_NVHPC !$acc loop independent collapse(3) +#endif DO CONCURRENT ( JI=1:JIU,JJ=1:JJU,JK=IKB+1:JKU) ZFLX(JI,JJ,JK)=ZFLX(JI,JJ,JK) & - XCMFS * PK(JI,JJ,JK) * (4./3.) * (GZ_W_M_ZWP(JI,JJ,JK) - GZ_W_M_PWM(JI,JJ,JK)) @@ -1149,7 +1187,9 @@ END DO !CONCURRENT IF (KSPLT==1) THEN !Contribution to the dynamic production of TKE: !$acc kernels async(2) +#ifdef MNH_COMPILER_NVHPC !$acc loop independent collapse(3) +#endif DO CONCURRENT ( JI=1:JIU,JJ=1:JJU,JK=1:JKU) ZWORK(JI,JJ,JK) = - ZFLX(JI,JJ,JK) * GZ_W_M_ZWP(JI,JJ,JK) END DO !CONCURRENT diff --git a/src/MNH/turb_hor_thermo_flux.f90 b/src/MNH/turb_hor_thermo_flux.f90 index 7a959f5b9..f9e1c3202 100644 --- a/src/MNH/turb_hor_thermo_flux.f90 +++ b/src/MNH/turb_hor_thermo_flux.f90 @@ -340,7 +340,9 @@ ZFLX(:,:,IKE+1) = ZFLX(:,:,IKE) CALL MXM_DEVICE( PK, ZTMP1_DEVICE ) CALL GX_M_U_DEVICE(1,IKU,1,PTHLM,PDXX,PDZZ,PDZX,ZTMP2_DEVICE) !$acc kernels +#ifdef MNH_COMPILER_NVHPC !$acc loop independent collapse(3) +#endif DO CONCURRENT ( JI=1:JIU,JJ=1:JJU,JK=1:JKU) ZFLX(JI,JJ,JK) = -XCSHF * ZTMP1_DEVICE(JI,JJ,JK) * ZTMP2_DEVICE(JI,JJ,JK) END DO @@ -383,7 +385,9 @@ ZFLX(:,:,IKB-1:IKB-1) = 2. * MXM( SPREAD( PSFTHM(:,:)* PDIRCOSXW(:,:), 3,1) ) #else !$acc kernels !PW: BUG: commented 'acc loop independent collapse(2)' to workaround compiler bug (NVHPC 21.1) +#ifdef MNH_COMPILER_NVHPC !acc loop independent collapse(2) +#endif DO CONCURRENT ( JI=1:JIU,JJ=1:JJU ) ZTMP1_DEVICE(JI,JJ,1) = PSFTHM(JI,JJ)* PDIRCOSXW(JI,JJ) END DO @@ -408,28 +412,36 @@ END IF IF (.NOT. LFLAT) THEN CALL MXM_DEVICE(PRHODJ, ZTMP1_DEVICE) !$acc kernels +#ifdef MNH_COMPILER_NVHPC !$acc loop independent collapse(3) +#endif DO CONCURRENT ( JI=1:JIU,JJ=1:JJU,JK=1:JKU) ZTMP2_DEVICE(JI,JJ,JK) = ZTMP1_DEVICE(JI,JJ,JK) * ZFLX(JI,JJ,JK) * PINV_PDXX(JI,JJ,JK) END DO !$acc end kernels CALL DXF_DEVICE(ZTMP2_DEVICE, ZTMP3_DEVICE) !$acc kernels +#ifdef MNH_COMPILER_NVHPC !$acc loop independent collapse(3) +#endif DO CONCURRENT ( JI=1:JIU,JJ=1:JJU,JK=1:JKU) ZTMP2_DEVICE(JI,JJ,JK) = ZFLX(JI,JJ,JK) * PINV_PDXX(JI,JJ,JK) END DO !$acc end kernels CALL MZM_DEVICE(ZTMP2_DEVICE,ZTMP4_DEVICE) !$acc kernels +#ifdef MNH_COMPILER_NVHPC !$acc loop independent collapse(3) +#endif DO CONCURRENT ( JI=1:JIU,JJ=1:JJU,JK=1:JKU) ZTMP2_DEVICE(JI,JJ,JK) = PDZX(JI,JJ,JK)*ZTMP4_DEVICE(JI,JJ,JK) END DO !$acc end kernels CALL MXF_DEVICE(ZTMP2_DEVICE, ZTMP4_DEVICE) !$acc kernels +#ifdef MNH_COMPILER_NVHPC !$acc loop independent collapse(3) +#endif DO CONCURRENT ( JI=1:JIU,JJ=1:JJU,JK=1:JKU) ZTMP2_DEVICE(JI,JJ,JK) = PMZM_PRHODJ(JI,JJ,JK) * ZTMP4_DEVICE(JI,JJ,JK) * PINV_PDZZ(JI,JJ,JK) END DO @@ -775,7 +787,9 @@ END IF CALL MXM_DEVICE( PK, ZTMP1_DEVICE ) CALL GX_M_U_DEVICE(1,IKU,1,PRM(:,:,:,1),PDXX,PDZZ,PDZX,ZTMP2_DEVICE) !$acc kernels +#ifdef MNH_COMPILER_NVHPC !$acc loop independent collapse(3) +#endif DO CONCURRENT ( JI=1:JIU,JJ=1:JJU,JK=1:JKU) ZFLX(JI,JJ,JK) = -XCHF * ZTMP1_DEVICE(JI,JJ,JK) * ZTMP2_DEVICE(JI,JJ,JK) END DO @@ -814,35 +828,45 @@ END DO IF (.NOT. LFLAT) THEN CALL MXM_DEVICE(PRHODJ,ZTMP1_DEVICE) !$acc kernels +#ifdef MNH_COMPILER_NVHPC !$acc loop independent collapse(3) +#endif DO CONCURRENT ( JI=1:JIU,JJ=1:JJU,JK=1:JKU) ZTMP2_DEVICE(JI,JJ,JK) = ZTMP1_DEVICE(JI,JJ,JK) * ZFLX(JI,JJ,JK) * PINV_PDXX(JI,JJ,JK) END DO !$acc end kernels CALL DXF_DEVICE( ZTMP2_DEVICE, ZTMP3_DEVICE ) !$acc kernels +#ifdef MNH_COMPILER_NVHPC !$acc loop independent collapse(3) +#endif DO CONCURRENT ( JI=1:JIU,JJ=1:JJU,JK=1:JKU) ZTMP2_DEVICE(JI,JJ,JK) = ZFLX(JI,JJ,JK) * PINV_PDXX(JI,JJ,JK) END DO !$acc end kernels CALL MZM_DEVICE(ZTMP2_DEVICE,ZTMP4_DEVICE) !$acc kernels +#ifdef MNH_COMPILER_NVHPC !$acc loop independent collapse(3) +#endif DO CONCURRENT ( JI=1:JIU,JJ=1:JJU,JK=1:JKU) ZTMP2_DEVICE(JI,JJ,JK) = PDZX(JI,JJ,JK)*ZTMP4_DEVICE(JI,JJ,JK) END DO !$acc end kernels CALL MXF_DEVICE(ZTMP2_DEVICE,ZTMP4_DEVICE) !$acc kernels +#ifdef MNH_COMPILER_NVHPC !$acc loop independent collapse(3) +#endif DO CONCURRENT ( JI=1:JIU,JJ=1:JJU,JK=1:JKU) ZTMP2_DEVICE(JI,JJ,JK) = PMZM_PRHODJ(JI,JJ,JK) * ZTMP4_DEVICE(JI,JJ,JK) * PINV_PDZZ(JI,JJ,JK) END DO !$acc end kernels CALL DZF_DEVICE( ZTMP2_DEVICE, ZTMP4_DEVICE ) !$acc kernels +#ifdef MNH_COMPILER_NVHPC !$acc loop independent collapse(3) +#endif DO CONCURRENT ( JI=1:JIU,JJ=1:JJU,JK=1:JKU) PRRS(JI,JJ,JK,1) = PRRS(JI,JJ,JK,1) - ZTMP3_DEVICE(JI,JJ,JK) + ZTMP4_DEVICE(JI,JJ,JK) END DO @@ -1153,7 +1177,9 @@ END IF CALL MYM_DEVICE( PK, ZTMP1_DEVICE ) CALL GY_M_V_DEVICE(1,IKU,1,PTHLM,PDYY,PDZZ,PDZY,ZTMP2_DEVICE) !$acc kernels +#ifdef MNH_COMPILER_NVHPC !$acc loop independent collapse(3) +#endif DO CONCURRENT ( JI=1:JIU,JJ=1:JJU,JK=1:JKU) ZFLX(JI,JJ,JK) = -XCSHF * ZTMP1_DEVICE(JI,JJ,JK) * ZTMP2_DEVICE(JI,JJ,JK) END DO @@ -1200,35 +1226,45 @@ IF (.NOT. L2D) THEN IF (.NOT. LFLAT) THEN CALL MYM_DEVICE(PRHODJ, ZTMP1_DEVICE) !$acc kernels +#ifdef MNH_COMPILER_NVHPC !$acc loop independent collapse(3) +#endif DO CONCURRENT ( JI=1:JIU,JJ=1:JJU,JK=1:JKU) ZTMP2_DEVICE(JI,JJ,JK) = ZTMP1_DEVICE(JI,JJ,JK) * ZFLX(JI,JJ,JK) * PINV_PDYY(JI,JJ,JK) END DO !$acc end kernels CALL DYF_DEVICE( ZTMP2_DEVICE, ZTMP3_DEVICE ) !$acc kernels +#ifdef MNH_COMPILER_NVHPC !$acc loop independent collapse(3) +#endif DO CONCURRENT ( JI=1:JIU,JJ=1:JJU,JK=1:JKU) ZTMP1_DEVICE(JI,JJ,JK) = ZFLX(JI,JJ,JK) * PINV_PDYY(JI,JJ,JK) END DO !$acc end kernels CALL MZM_DEVICE(ZTMP1_DEVICE, ZTMP2_DEVICE) !$acc kernels +#ifdef MNH_COMPILER_NVHPC !$acc loop independent collapse(3) +#endif DO CONCURRENT ( JI=1:JIU,JJ=1:JJU,JK=1:JKU) ZTMP1_DEVICE(JI,JJ,JK) = PDZY(JI,JJ,JK)*ZTMP2_DEVICE(JI,JJ,JK) END DO !$acc end kernels CALL MYF_DEVICE(ZTMP1_DEVICE, ZTMP2_DEVICE) !$acc kernels +#ifdef MNH_COMPILER_NVHPC !$acc loop independent collapse(3) +#endif DO CONCURRENT ( JI=1:JIU,JJ=1:JJU,JK=1:JKU) ZTMP1_DEVICE(JI,JJ,JK) = PMZM_PRHODJ(JI,JJ,JK) * ZTMP2_DEVICE(JI,JJ,JK) * PINV_PDZZ(JI,JJ,JK) END DO !$acc end kernels CALL DZF_DEVICE( ZTMP1_DEVICE, ZTMP2_DEVICE ) !$acc kernels +#ifdef MNH_COMPILER_NVHPC !$acc loop independent collapse(3) +#endif DO CONCURRENT ( JI=1:JIU,JJ=1:JJU,JK=1:JKU) PRTHLS(JI,JJ,JK) = PRTHLS(JI,JJ,JK) - ZTMP3_DEVICE(JI,JJ,JK) + ZTMP2_DEVICE(JI,JJ,JK) END DO @@ -1520,7 +1556,9 @@ IF (KRR/=0) THEN CALL MYM_DEVICE( PK, ZTMP1_DEVICE ) CALL GY_M_V_DEVICE(1,IKU,1,PRM(:,:,:,1),PDYY,PDZZ,PDZY, ZTMP2_DEVICE) !$acc kernels +#ifdef MNH_COMPILER_NVHPC !$acc loop independent collapse(3) +#endif DO CONCURRENT ( JI=1:JIU,JJ=1:JJU,JK=1:JKU) ZFLX(JI,JJ,JK) = -XCHF * ZTMP1_DEVICE(JI,JJ,JK) * ZTMP2_DEVICE(JI,JJ,JK) END DO !CONCURRENT @@ -1566,7 +1604,9 @@ IF (KRR/=0) THEN IF (.NOT. LFLAT) THEN CALL MYM_DEVICE(PRHODJ, ZTMP1_DEVICE) !$acc kernels +#ifdef MNH_COMPILER_NVHPC !$acc loop independent collapse(3) +#endif DO CONCURRENT ( JI=1:JIU,JJ=1:JJU,JK=1:JKU) ZTMP2_DEVICE(JI,JJ,JK) = ZTMP1_DEVICE(JI,JJ,JK) * ZFLX(JI,JJ,JK) * PINV_PDYY(JI,JJ,JK) END DO @@ -1574,21 +1614,27 @@ IF (KRR/=0) THEN CALL DYF_DEVICE( ZTMP2_DEVICE, ZTMP3_DEVICE ) ! !$acc kernels +#ifdef MNH_COMPILER_NVHPC !$acc loop independent collapse(3) +#endif DO CONCURRENT ( JI=1:JIU,JJ=1:JJU,JK=1:JKU) ZTMP1_DEVICE(JI,JJ,JK) = ZFLX(JI,JJ,JK) * PINV_PDYY(JI,JJ,JK) END DO !$acc end kernels CALL MZM_DEVICE(ZTMP1_DEVICE,ZTMP2_DEVICE) !$acc kernels +#ifdef MNH_COMPILER_NVHPC !$acc loop independent collapse(3) +#endif DO CONCURRENT ( JI=1:JIU,JJ=1:JJU,JK=1:JKU) ZTMP1_DEVICE(JI,JJ,JK) = PDZY(JI,JJ,JK)*ZTMP2_DEVICE(JI,JJ,JK) END DO !$acc end kernels CALL MYF_DEVICE(ZTMP1_DEVICE,ZTMP2_DEVICE) !$acc kernels +#ifdef MNH_COMPILER_NVHPC !$acc loop independent collapse(3) +#endif DO CONCURRENT ( JI=1:JIU,JJ=1:JJU,JK=1:JKU) ZTMP1_DEVICE(JI,JJ,JK) = PMZM_PRHODJ(JI,JJ,JK) * ZTMP2_DEVICE(JI,JJ,JK) * PINV_PDZZ(JI,JJ,JK) END DO @@ -1596,7 +1642,9 @@ IF (KRR/=0) THEN CALL DZF_DEVICE( ZTMP1_DEVICE, ZTMP2_DEVICE ) ! !$acc kernels +#ifdef MNH_COMPILER_NVHPC !$acc loop independent collapse(3) +#endif DO CONCURRENT ( JI=1:JIU,JJ=1:JJU,JK=1:JKU) PRRS(JI,JJ,JK,1) = PRRS(JI,JJ,JK,1) - ZTMP3_DEVICE(JI,JJ,JK) + ZTMP2_DEVICE(JI,JJ,JK) END DO diff --git a/src/MNH/turb_hor_tke.f90 b/src/MNH/turb_hor_tke.f90 index 2319dbaa8..c4cc5f5f1 100644 --- a/src/MNH/turb_hor_tke.f90 +++ b/src/MNH/turb_hor_tke.f90 @@ -228,7 +228,9 @@ ZFLX = -XCET * MXM(PK) * GX_M_U(1,IKU,1,PTKEM,PDXX,PDZZ,PDZX) ! < u'e > CALL MXM_DEVICE(PK,ZTMP1_DEVICE) CALL GX_M_U_DEVICE(1,IKU,1,PTKEM,PDXX,PDZZ,PDZX,ZTMP2_DEVICE) !$acc kernels +#ifdef MNH_COMPILER_NVHPC !$acc loop independent collapse(3) +#endif DO CONCURRENT ( JI=1:JIU,JJ=1:JJU,JK=1:JKU) ZFLX(JI,JJ,JK) = -XCET * ZTMP1_DEVICE(JI,JJ,JK) * ZTMP2_DEVICE(JI,JJ,JK) ! < u'e > END DO !CONCURRENT @@ -236,7 +238,9 @@ END DO !CONCURRENT ! ! special case near the ground ( uncentred gradient ) ! +#ifdef MNH_COMPILER_NVHPC !$acc loop independent collapse(2) +#endif DO CONCURRENT ( JI=1:JIU,JJ=1:JJU ) ZFLX(JI,JJ,IKB) = ZCOEFF(JI,JJ,IKB+2)*PTKEM(JI,JJ,IKB+2) & + ZCOEFF(JI,JJ,IKB+1)*PTKEM(JI,JJ,IKB+1) & @@ -291,35 +295,45 @@ END IF IF (.NOT. LFLAT) THEN CALL MXM_DEVICE(PRHODJ,ZTMP1_DEVICE) !$acc kernels +#ifdef MNH_COMPILER_NVHPC !$acc loop independent collapse(3) +#endif DO CONCURRENT ( JI=1:JIU,JJ=1:JJU,JK=1:JKU) ZTMP2_DEVICE(JI,JJ,JK) = ZTMP1_DEVICE(JI,JJ,JK) * ZFLX(JI,JJ,JK) * PINV_PDXX(JI,JJ,JK) END DO !CONCURRENT !$acc end kernels CALL DXF_DEVICE(ZTMP2_DEVICE,ZTMP1_DEVICE) !$acc kernels +#ifdef MNH_COMPILER_NVHPC !$acc loop independent collapse(3) +#endif DO CONCURRENT ( JI=1:JIU,JJ=1:JJU,JK=1:JKU) ZTMP2_DEVICE(JI,JJ,JK) = ZFLX(JI,JJ,JK)*PINV_PDXX(JI,JJ,JK) END DO !CONCURRENT !$acc end kernels CALL MZM_DEVICE(ZTMP2_DEVICE,ZTMP3_DEVICE) !$acc kernels +#ifdef MNH_COMPILER_NVHPC !$acc loop independent collapse(3) +#endif DO CONCURRENT ( JI=1:JIU,JJ=1:JJU,JK=1:JKU) ZTMP2_DEVICE(JI,JJ,JK) = PDZX(JI,JJ,JK) * ZTMP3_DEVICE(JI,JJ,JK) END DO !CONCURRENT !$acc end kernels CALL MXF_DEVICE( ZTMP2_DEVICE,ZTMP3_DEVICE) !$acc kernels +#ifdef MNH_COMPILER_NVHPC !$acc loop independent collapse(3) +#endif DO CONCURRENT ( JI=1:JIU,JJ=1:JJU,JK=1:JKU) ZTMP2_DEVICE(JI,JJ,JK) = PMZM_PRHODJ(JI,JJ,JK) * ZTMP3_DEVICE(JI,JJ,JK) * PINV_PDZZ(JI,JJ,JK) END DO !CONCURRENT !$acc end kernels CALL DZF_DEVICE( ZTMP2_DEVICE, ZTMP3_DEVICE ) !$acc kernels +#ifdef MNH_COMPILER_NVHPC !$acc loop independent collapse(3) +#endif DO CONCURRENT ( JI=1:JIU,JJ=1:JJU,JK=1:JKU) PTRH(JI,JJ,JK) =-( ZTMP1_DEVICE(JI,JJ,JK) - ZTMP3_DEVICE(JI,JJ,JK) ) /PRHODJ(JI,JJ,JK) END DO !CONCURRENT @@ -363,7 +377,9 @@ IF (.NOT. L2D) THEN CALL MYM_DEVICE(PK,ZTMP1_DEVICE) CALL GY_M_V_DEVICE(1,IKU,1,PTKEM,PDYY,PDZZ,PDZY,ZTMP2_DEVICE) !$acc kernels +#ifdef MNH_COMPILER_NVHPC !$acc loop independent collapse(3) +#endif DO CONCURRENT ( JI=1:JIU,JJ=1:JJU,JK=1:JKU) ZFLX(JI,JJ,JK) =-XCET * ZTMP1_DEVICE(JI,JJ,JK) * ZTMP2_DEVICE(JI,JJ,JK) ! < v'e > END DO !CONCURRENT @@ -427,35 +443,45 @@ IF (.NOT. L2D) THEN IF (.NOT. LFLAT) THEN CALL MYM_DEVICE(PRHODJ,ZTMP1_DEVICE) !$acc kernels +#ifdef MNH_COMPILER_NVHPC !$acc loop independent collapse(3) +#endif DO CONCURRENT ( JI=1:JIU,JJ=1:JJU,JK=1:JKU) ZTMP2_DEVICE(JI,JJ,JK) = ZTMP1_DEVICE(JI,JJ,JK) * ZFLX(JI,JJ,JK) * PINV_PDYY(JI,JJ,JK) END DO !CONCURRENT !$acc end kernels CALL DYF_DEVICE(ZTMP2_DEVICE,ZTMP1_DEVICE) !$acc kernels +#ifdef MNH_COMPILER_NVHPC !$acc loop independent collapse(3) +#endif DO CONCURRENT ( JI=1:JIU,JJ=1:JJU,JK=1:JKU) ZTMP2_DEVICE(JI,JJ,JK) = ZFLX(JI,JJ,JK)*PINV_PDYY(JI,JJ,JK) END DO !CONCURRENT !$acc end kernels CALL MZM_DEVICE(ZTMP2_DEVICE,ZTMP3_DEVICE) !$acc kernels +#ifdef MNH_COMPILER_NVHPC !$acc loop independent collapse(3) +#endif DO CONCURRENT ( JI=1:JIU,JJ=1:JJU,JK=1:JKU) ZTMP2_DEVICE(JI,JJ,JK) = PDZY(JI,JJ,JK) * ZTMP3_DEVICE(JI,JJ,JK) END DO !CONCURRENT !$acc end kernels CALL MYF_DEVICE(ZTMP2_DEVICE,ZTMP3_DEVICE) !$acc kernels +#ifdef MNH_COMPILER_NVHPC !$acc loop independent collapse(3) +#endif DO CONCURRENT ( JI=1:JIU,JJ=1:JJU,JK=1:JKU) ZTMP2_DEVICE(JI,JJ,JK) = PMZM_PRHODJ(JI,JJ,JK) * ZTMP3_DEVICE(JI,JJ,JK) * PINV_PDZZ(JI,JJ,JK) END DO !CONCURRENT !$acc end kernels CALL DZF_DEVICE( ZTMP2_DEVICE, ZTMP3_DEVICE ) !$acc kernels +#ifdef MNH_COMPILER_NVHPC !$acc loop independent collapse(3) +#endif DO CONCURRENT ( JI=1:JIU,JJ=1:JJU,JK=1:JKU) PTRH(JI,JJ,JK) = PTRH(JI,JJ,JK) - ( ZTMP1_DEVICE(JI,JJ,JK) - ZTMP3_DEVICE(JI,JJ,JK) ) & /PRHODJ(JI,JJ,JK) diff --git a/src/MNH/turb_hor_uv.f90 b/src/MNH/turb_hor_uv.f90 index a86107779..a4f3c565d 100644 --- a/src/MNH/turb_hor_uv.f90 +++ b/src/MNH/turb_hor_uv.f90 @@ -320,7 +320,9 @@ IKB = 1+JPVEXT IKE = SIZE(PUM,3)-JPVEXT ! !$acc kernels +#ifdef MNH_COMPILER_NVHPC !$acc loop independent collapse(2) +#endif DO CONCURRENT ( JI=1:JIU,JJ=1:JJU ) #ifndef MNH_BITREP ZDIRSINZW(JI,JJ) = SQRT( 1. - PDIRCOSZW(JI,JJ)**2 ) @@ -358,7 +360,9 @@ CALL MXM_DEVICE(PK,ZTMP1_DEVICE) CALL MYM_DEVICE(ZTMP1_DEVICE,ZTMP2_DEVICE) IF (.NOT. L2D) THEN !$acc kernels +#ifdef MNH_COMPILER_NVHPC !$acc loop independent collapse(3) +#endif DO CONCURRENT ( JI=1:JIU,JJ=1:JJU,JK=1:JKU) ZFLX(JI,JJ,JK)= - XCMFS * ZTMP2_DEVICE(JI,JJ,JK) * (GY_U_UV_PUM(JI,JJ,JK) + GX_V_UV_PVM(JI,JJ,JK)) END DO !CONCURRENT @@ -455,7 +459,9 @@ ZFLX(:,:,IKB-1) = & +PVSLOPEM(:,:) * (PCOSSLOPE(:,:)**2 - PSINSLOPE(:,:)**2) * ZDIRSINZW(:,:) ) #else !PW: BUG: commented 'acc loop independent collapse(2)' to workaround compiler bug (NVHPC 21.1) +#ifdef MNH_COMPILER_NVHPC !acc loop independent collapse(2) +#endif DO CONCURRENT ( JI=1:JIU,JJ=1:JJU ) ZFLX(JI,JJ,IKB-1) = & PTAU11M(JI,JJ) * PCOSSLOPE(JI,JJ) * PSINSLOPE(JI,JJ) * BR_P2(PDIRCOSZW(JI,JJ)) & @@ -513,14 +519,18 @@ END IF #else CALL MYM_DEVICE(PRHODJ,ZTMP1_DEVICE) !$acc kernels +#ifdef MNH_COMPILER_NVHPC !$acc loop independent collapse(3) +#endif DO CONCURRENT ( JI=1:JIU,JJ=1:JJU,JK=1:JKU) ZTMP2_DEVICE(JI,JJ,JK) = ZTMP1_DEVICE(JI,JJ,JK) * PINV_PDYY(JI,JJ,JK) END DO !CONCURRENT !$acc end kernels CALL MXM_DEVICE(ZTMP2_DEVICE,ZTMP1_DEVICE) !$acc kernels +#ifdef MNH_COMPILER_NVHPC !$acc loop independent collapse(3) +#endif DO CONCURRENT ( JI=1:JIU,JJ=1:JJU,JK=1:JKU) ZTMP2_DEVICE(JI,JJ,JK) = ZFLX(JI,JJ,JK) * ZTMP1_DEVICE(JI,JJ,JK) END DO !CONCURRENT @@ -530,35 +540,45 @@ IF (.NOT. LFLAT) THEN CALL MZM_DEVICE(ZFLX,ZTMP2_DEVICE) CALL MZM_DEVICE(PDYY,ZTMP3_DEVICE) !$acc kernels +#ifdef MNH_COMPILER_NVHPC !$acc loop independent collapse(3) +#endif DO CONCURRENT ( JI=1:JIU,JJ=1:JJU,JK=1:JKU) ZTMP4_DEVICE(JI,JJ,JK) = PDZY(JI,JJ,JK)/ZTMP3_DEVICE(JI,JJ,JK) END DO !CONCURRENT !$acc end kernels CALL MXM_DEVICE(ZTMP4_DEVICE,ZTMP5_DEVICE) !$acc kernels +#ifdef MNH_COMPILER_NVHPC !$acc loop independent collapse(3) +#endif DO CONCURRENT ( JI=1:JIU,JJ=1:JJU,JK=1:JKU) ZTMP4_DEVICE(JI,JJ,JK) = ZTMP2_DEVICE(JI,JJ,JK)*ZTMP5_DEVICE(JI,JJ,JK) END DO !CONCURRENT !$acc end kernels CALL MYF_DEVICE(ZTMP4_DEVICE,ZTMP2_DEVICE) !$acc kernels +#ifdef MNH_COMPILER_NVHPC !$acc loop independent collapse(3) +#endif DO CONCURRENT ( JI=1:JIU,JJ=1:JJU,JK=1:JKU) ZTMP3_DEVICE(JI,JJ,JK) = PMZM_PRHODJ(JI,JJ,JK) * PINV_PDZZ(JI,JJ,JK) END DO !CONCURRENT !$acc end kernels CALL MXM_DEVICE(ZTMP3_DEVICE,ZTMP4_DEVICE) !$acc kernels +#ifdef MNH_COMPILER_NVHPC !$acc loop independent collapse(3) +#endif DO CONCURRENT ( JI=1:JIU,JJ=1:JJU,JK=1:JKU) ZTMP5_DEVICE(JI,JJ,JK) = ZTMP2_DEVICE(JI,JJ,JK)*ZTMP4_DEVICE(JI,JJ,JK) END DO !CONCURRENT !$acc end kernels CALL DZF_DEVICE( ZTMP5_DEVICE, ZTMP3_DEVICE ) !$acc kernels +#ifdef MNH_COMPILER_NVHPC !$acc loop independent collapse(3) +#endif DO CONCURRENT ( JI=1:JIU,JJ=1:JJU,JK=1:JKU) PRUS(JI,JJ,JK) = PRUS(JI,JJ,JK) - ZTMP1_DEVICE(JI,JJ,JK) + ZTMP3_DEVICE(JI,JJ,JK) END DO !CONCURRENT @@ -583,14 +603,18 @@ END IF #else CALL MXM_DEVICE(PRHODJ,ZTMP1_DEVICE) !$acc kernels +#ifdef MNH_COMPILER_NVHPC !$acc loop independent collapse(3) +#endif DO CONCURRENT ( JI=1:JIU,JJ=1:JJU,JK=1:JKU) ZTMP2_DEVICE(JI,JJ,JK) = ZTMP1_DEVICE(JI,JJ,JK) * PINV_PDXX(JI,JJ,JK) END DO !CONCURRENT !$acc end kernels CALL MYM_DEVICE(ZTMP2_DEVICE,ZTMP1_DEVICE) !$acc kernels +#ifdef MNH_COMPILER_NVHPC !$acc loop independent collapse(3) +#endif DO CONCURRENT ( JI=1:JIU,JJ=1:JJU,JK=1:JKU) ZTMP2_DEVICE(JI,JJ,JK) = ZFLX(JI,JJ,JK) * ZTMP1_DEVICE(JI,JJ,JK) END DO !CONCURRENT @@ -600,35 +624,45 @@ IF (.NOT. LFLAT) THEN CALL MZM_DEVICE(ZFLX,ZTMP2_DEVICE) CALL MZM_DEVICE(PDXX,ZTMP3_DEVICE) !$acc kernels +#ifdef MNH_COMPILER_NVHPC !$acc loop independent collapse(3) +#endif DO CONCURRENT ( JI=1:JIU,JJ=1:JJU,JK=1:JKU) ZTMP4_DEVICE(JI,JJ,JK) = PDZX(JI,JJ,JK)/ZTMP3_DEVICE(JI,JJ,JK) END DO !CONCURRENT !$acc end kernels CALL MYM_DEVICE(ZTMP4_DEVICE,ZTMP5_DEVICE) !$acc kernels +#ifdef MNH_COMPILER_NVHPC !$acc loop independent collapse(3) +#endif DO CONCURRENT ( JI=1:JIU,JJ=1:JJU,JK=1:JKU) ZTMP4_DEVICE(JI,JJ,JK) = ZTMP2_DEVICE(JI,JJ,JK)*ZTMP5_DEVICE(JI,JJ,JK) END DO !CONCURRENT !$acc end kernels CALL MXF_DEVICE(ZTMP4_DEVICE,ZTMP2_DEVICE) !$acc kernels +#ifdef MNH_COMPILER_NVHPC !$acc loop independent collapse(3) +#endif DO CONCURRENT ( JI=1:JIU,JJ=1:JJU,JK=1:JKU) ZTMP3_DEVICE(JI,JJ,JK) = PMZM_PRHODJ(JI,JJ,JK) * PINV_PDZZ(JI,JJ,JK) END DO !CONCURRENT !$acc end kernels CALL MYM_DEVICE(ZTMP3_DEVICE,ZTMP4_DEVICE) !$acc kernels +#ifdef MNH_COMPILER_NVHPC !$acc loop independent collapse(3) +#endif DO CONCURRENT ( JI=1:JIU,JJ=1:JJU,JK=1:JKU) ZTMP5_DEVICE(JI,JJ,JK) = ZTMP2_DEVICE(JI,JJ,JK)*ZTMP4_DEVICE(JI,JJ,JK) END DO !CONCURRENT !$acc end kernels CALL DZF_DEVICE( ZTMP5_DEVICE, ZTMP3_DEVICE ) !$acc kernels +#ifdef MNH_COMPILER_NVHPC !$acc loop independent collapse(3) +#endif DO CONCURRENT ( JI=1:JIU,JJ=1:JJU,JK=1:JKU) PRVS(JI,JJ,JK) = PRVS(JI,JJ,JK) - ZTMP1_DEVICE(JI,JJ,JK) + ZTMP3_DEVICE(JI,JJ,JK) END DO !CONCURRENT @@ -655,7 +689,9 @@ IF (KSPLT==1) THEN #else IF (.NOT. L2D) THEN !$acc kernels +#ifdef MNH_COMPILER_NVHPC !$acc loop independent collapse(3) +#endif DO CONCURRENT ( JI=1:JIU,JJ=1:JJU,JK=1:JKU) ZTMP1_DEVICE(JI,JJ,JK) = ZFLX(JI,JJ,JK) * (GY_U_UV_PUM(JI,JJ,JK) + GX_V_UV_PVM(JI,JJ,JK)) END DO !CONCURRENT @@ -668,7 +704,9 @@ IF (KSPLT==1) THEN CALL MYF_DEVICE(ZTMP1_DEVICE,ZTMP2_DEVICE) CALL MXF_DEVICE(ZTMP2_DEVICE,ZTMP1_DEVICE) !$acc kernels +#ifdef MNH_COMPILER_NVHPC !$acc loop independent collapse(3) +#endif DO CONCURRENT ( JI=1:JIU,JJ=1:JJU,JK=1:JKU) ZWORK(JI,JJ,JK) = - ZTMP1_DEVICE(JI,JJ,JK) END DO !CONCURRENT diff --git a/src/MNH/turb_hor_uw.f90 b/src/MNH/turb_hor_uw.f90 index b9474968b..bb57c98c4 100644 --- a/src/MNH/turb_hor_uw.f90 +++ b/src/MNH/turb_hor_uw.f90 @@ -289,7 +289,9 @@ ZFLX(:,:,:) = & CALL MZM_DEVICE(PK,ZTMP1_DEVICE) CALL MXM_DEVICE(ZTMP1_DEVICE,ZTMP2_DEVICE) !$acc kernels +#ifdef MNH_COMPILER_NVHPC !$acc loop independent collapse(3) +#endif DO CONCURRENT ( JI=1:JIU,JJ=1:JJU,JK=1:JKU) ZFLX(JI,JJ,JK) = - XCMFS * ZTMP2_DEVICE(JI,JJ,JK) * GX_W_UW_PWM(JI,JJ,JK) END DO !CONCURRENT @@ -330,14 +332,18 @@ PRUS(:,:,:) = PRUS(:,:,:) - DZF( ZFLX* MXM( PMZM_PRHODJ ) / MXM( PDZZ ) ) CALL MXM_DEVICE( PMZM_PRHODJ, ZTMP1_DEVICE ) CALL MXM_DEVICE( PDZZ, ZTMP2_DEVICE ) !$acc kernels +#ifdef MNH_COMPILER_NVHPC !$acc loop independent collapse(3) +#endif DO CONCURRENT ( JI=1:JIU,JJ=1:JJU,JK=1:JKU) ZTMP3_DEVICE(JI,JJ,JK) = ZFLX(JI,JJ,JK)* ZTMP1_DEVICE(JI,JJ,JK) / ZTMP2_DEVICE(JI,JJ,JK) END DO !CONCURRENT !$acc end kernels CALL DZF_DEVICE( ZTMP3_DEVICE, ZTMP1_DEVICE ) !$acc kernels +#ifdef MNH_COMPILER_NVHPC !$acc loop independent collapse(3) +#endif DO CONCURRENT ( JI=1:JIU,JJ=1:JJU,JK=1:JKU) PRUS(JI,JJ,JK) = PRUS(JI,JJ,JK) - ZTMP1_DEVICE(JI,JJ,JK) END DO !CONCURRENT @@ -356,14 +362,18 @@ END IF #else CALL MXM_DEVICE(PRHODJ,ZTMP1_DEVICE) !$acc kernels +#ifdef MNH_COMPILER_NVHPC !$acc loop independent collapse(3) +#endif DO CONCURRENT ( JI=1:JIU,JJ=1:JJU,JK=1:JKU) ZTMP2_DEVICE(JI,JJ,JK) = ZTMP1_DEVICE(JI,JJ,JK) * PINV_PDXX(JI,JJ,JK) END DO !CONCURRENT !$acc end kernels CALL MZM_DEVICE(ZTMP2_DEVICE, ZTMP1_DEVICE) !$acc kernels +#ifdef MNH_COMPILER_NVHPC !$acc loop independent collapse(3) +#endif DO CONCURRENT ( JI=1:JIU,JJ=1:JJU,JK=1:JKU) ZTMP2_DEVICE(JI,JJ,JK) = ZTMP1_DEVICE(JI,JJ,JK) * ZFLX(JI,JJ,JK) END DO !CONCURRENT @@ -371,14 +381,18 @@ END IF CALL DXF_DEVICE( ZTMP2_DEVICE,ZTMP1_DEVICE) IF (.NOT. LFLAT) THEN !$acc kernels +#ifdef MNH_COMPILER_NVHPC !$acc loop independent collapse(3) +#endif DO CONCURRENT ( JI=1:JIU,JJ=1:JJU,JK=1:JKU) ZTMP2_DEVICE(JI,JJ,JK) = ZFLX(JI,JJ,JK)*PDZX(JI,JJ,JK) END DO !CONCURRENT !$acc end kernels CALL MZF_DEVICE( ZTMP2_DEVICE, ZTMP3_DEVICE ) !$acc kernels +#ifdef MNH_COMPILER_NVHPC !$acc loop independent collapse(3) +#endif DO CONCURRENT ( JI=1:JIU,JJ=1:JJU,JK=1:JKU) ZTMP2_DEVICE(JI,JJ,JK) = ZTMP3_DEVICE(JI,JJ,JK)*PINV_PDXX(JI,JJ,JK) END DO !CONCURRENT @@ -386,14 +400,18 @@ IF (.NOT. LFLAT) THEN CALL MXF_DEVICE( ZTMP2_DEVICE, ZTMP3_DEVICE ) CALL MZF_DEVICE( PDZZ, ZTMP2_DEVICE ) !$acc kernels +#ifdef MNH_COMPILER_NVHPC !$acc loop independent collapse(3) +#endif DO CONCURRENT ( JI=1:JIU,JJ=1:JJU,JK=1:JKU) ZTMP4_DEVICE(JI,JJ,JK) = PRHODJ(JI,JJ,JK) * ZTMP3_DEVICE(JI,JJ,JK) / ZTMP2_DEVICE(JI,JJ,JK) END DO !CONCURRENT !$acc end kernels CALL DZM_DEVICE( ZTMP4_DEVICE, ZTMP2_DEVICE ) !$acc kernels +#ifdef MNH_COMPILER_NVHPC !$acc loop independent collapse(3) +#endif DO CONCURRENT ( JI=1:JIU,JJ=1:JJU,JK=1:JKU) PRWS(JI,JJ,JK) = PRWS(JI,JJ,JK) & - ZTMP1_DEVICE(JI,JJ,JK) & @@ -417,7 +435,9 @@ IF (KSPLT==1) THEN #else CALL GZ_U_UW_DEVICE(PUM,PDZZ,ZTMP1_DEVICE) !$acc kernels +#ifdef MNH_COMPILER_NVHPC !$acc loop independent collapse(3) +#endif DO CONCURRENT ( JI=1:JIU,JJ=1:JJU,JK=1:JKU) ZTMP2_DEVICE(JI,JJ,JK) = ZFLX(JI,JJ,JK) *( ZTMP1_DEVICE(JI,JJ,JK) + GX_W_UW_PWM(JI,JJ,JK) ) END DO !CONCURRENT @@ -425,7 +445,9 @@ IF (KSPLT==1) THEN CALL MXF_DEVICE( ZTMP2_DEVICE,ZTMP1_DEVICE ) CALL MZF_DEVICE( ZTMP1_DEVICE, ZTMP2_DEVICE ) !$acc kernels +#ifdef MNH_COMPILER_NVHPC !$acc loop independent collapse(3) +#endif DO CONCURRENT ( JI=1:JIU,JJ=1:JJU,JK=1:JKU) ZWORK(JI,JJ,JK) = -ZTMP2_DEVICE(JI,JJ,JK) END DO !CONCURRENT diff --git a/src/MNH/turb_hor_vw.f90 b/src/MNH/turb_hor_vw.f90 index 13741e3a7..2e03a3586 100644 --- a/src/MNH/turb_hor_vw.f90 +++ b/src/MNH/turb_hor_vw.f90 @@ -289,7 +289,9 @@ IF (.NOT. L2D) THEN CALL MZM_DEVICE(PK,ZTMP1_DEVICE) CALL MYM_DEVICE(ZTMP1_DEVICE,ZTMP2_DEVICE) !$acc kernels +#ifdef MNH_COMPILER_NVHPC !$acc loop independent collapse(3) +#endif DO CONCURRENT ( JI=1:JIU,JJ=1:JJU,JK=1:JKU) ZFLX(JI,JJ,JK) = - XCMFS * ZTMP2_DEVICE(JI,JJ,JK) * GY_W_VW_PWM(JI,JJ,JK) END DO !CONCURRENT @@ -337,14 +339,18 @@ IF (.NOT. L2D) THEN CALL MYM_DEVICE( PMZM_PRHODJ, ZTMP1_DEVICE ) CALL MYM_DEVICE( PDZZ, ZTMP2_DEVICE ) !$acc kernels +#ifdef MNH_COMPILER_NVHPC !$acc loop independent collapse(3) +#endif DO CONCURRENT ( JI=1:JIU,JJ=1:JJU,JK=1:JKU) ZTMP3_DEVICE(JI,JJ,JK) = ZFLX(JI,JJ,JK)* ZTMP1_DEVICE(JI,JJ,JK) / ZTMP2_DEVICE(JI,JJ,JK) END DO !CONCURRENT !$acc end kernels CALL DZF_DEVICE( ZTMP3_DEVICE, ZTMP1_DEVICE ) !$acc kernels +#ifdef MNH_COMPILER_NVHPC !$acc loop independent collapse(3) +#endif DO CONCURRENT ( JI=1:JIU,JJ=1:JJU,JK=1:JKU) PRVS(JI,JJ,JK) = PRVS(JI,JJ,JK) - ZTMP1_DEVICE(JI,JJ,JK) END DO !CONCURRENT @@ -368,28 +374,36 @@ IF (.NOT. L2D) THEN IF (.NOT. LFLAT) THEN CALL MYM_DEVICE(PRHODJ, ZTMP1_DEVICE) !$acc kernels +#ifdef MNH_COMPILER_NVHPC !$acc loop independent collapse(3) +#endif DO CONCURRENT ( JI=1:JIU,JJ=1:JJU,JK=1:JKU) ZTMP2_DEVICE(JI,JJ,JK) = ZTMP1_DEVICE(JI,JJ,JK) * PINV_PDYY(JI,JJ,JK) END DO !CONCURRENT !$acc end kernels CALL MZM_DEVICE(ZTMP2_DEVICE, ZTMP1_DEVICE) !$acc kernels +#ifdef MNH_COMPILER_NVHPC !$acc loop independent collapse(3) +#endif DO CONCURRENT ( JI=1:JIU,JJ=1:JJU,JK=1:JKU) ZTMP2_DEVICE(JI,JJ,JK) = ZTMP1_DEVICE(JI,JJ,JK) * ZFLX(JI,JJ,JK) END DO !CONCURRENT !$acc end kernels CALL DYF_DEVICE( ZTMP2_DEVICE, ZTMP1_DEVICE ) !$acc kernels +#ifdef MNH_COMPILER_NVHPC !$acc loop independent collapse(3) +#endif DO CONCURRENT ( JI=1:JIU,JJ=1:JJU,JK=1:JKU) ZTMP2_DEVICE(JI,JJ,JK) = ZFLX(JI,JJ,JK) *PDZY(JI,JJ,JK) END DO !CONCURRENT !$acc end kernels CALL MZF_DEVICE( ZTMP2_DEVICE, ZTMP3_DEVICE ) !$acc kernels +#ifdef MNH_COMPILER_NVHPC !$acc loop independent collapse(3) +#endif DO CONCURRENT ( JI=1:JIU,JJ=1:JJU,JK=1:JKU) ZTMP2_DEVICE(JI,JJ,JK) = ZTMP3_DEVICE(JI,JJ,JK) * PINV_PDYY(JI,JJ,JK) END DO !CONCURRENT @@ -397,14 +411,18 @@ IF (.NOT. L2D) THEN CALL MYF_DEVICE(ZTMP2_DEVICE,ZTMP3_DEVICE) CALL MZF_DEVICE( PDZZ, ZTMP2_DEVICE ) !$acc kernels +#ifdef MNH_COMPILER_NVHPC !$acc loop independent collapse(3) +#endif DO CONCURRENT ( JI=1:JIU,JJ=1:JJU,JK=1:JKU) ZTMP4_DEVICE(JI,JJ,JK) = PRHODJ(JI,JJ,JK) * ZTMP3_DEVICE(JI,JJ,JK) / ZTMP2_DEVICE(JI,JJ,JK) END DO !CONCURRENT !$acc end kernels CALL DZM_DEVICE( ZTMP4_DEVICE, ZTMP2_DEVICE ) !$acc kernels +#ifdef MNH_COMPILER_NVHPC !$acc loop independent collapse(3) +#endif DO CONCURRENT ( JI=1:JIU,JJ=1:JJU,JK=1:JKU) PRWS(JI,JJ,JK) = PRWS(JI,JJ,JK) & - ZTMP1_DEVICE(JI,JJ,JK) & @@ -438,7 +456,9 @@ IF (KSPLT==1) THEN #else CALL GZ_V_VW_DEVICE(PVM,PDZZ,ZTMP1_DEVICE) !$acc kernels +#ifdef MNH_COMPILER_NVHPC !$acc loop independent collapse(3) +#endif DO CONCURRENT ( JI=1:JIU,JJ=1:JJU,JK=1:JKU) ZTMP2_DEVICE(JI,JJ,JK) = ZFLX(JI,JJ,JK) *( ZTMP1_DEVICE(JI,JJ,JK) + GY_W_VW_PWM(JI,JJ,JK) ) END DO !CONCURRENT @@ -446,7 +466,9 @@ IF (KSPLT==1) THEN CALL MYF_DEVICE(ZTMP2_DEVICE,ZTMP1_DEVICE) CALL MZF_DEVICE( ZTMP1_DEVICE, ZTMP2_DEVICE ) !$acc kernels +#ifdef MNH_COMPILER_NVHPC !$acc loop independent collapse(3) +#endif DO CONCURRENT ( JI=1:JIU,JJ=1:JJU,JK=1:JKU) ZWORK(JI,JJ,JK) = -ZTMP2_DEVICE(JI,JJ,JK) END DO !CONCURRENT diff --git a/src/MNH/turb_ver.f90 b/src/MNH/turb_ver.f90 index 2f32b303c..ebe93d1f5 100644 --- a/src/MNH/turb_ver.f90 +++ b/src/MNH/turb_ver.f90 @@ -1,4 +1,4 @@ -!MNH_LIC Copyright 1994-2021 CNRS, Meteo-France and Universite Paul Sabatier +!MNH_LIC Copyright 1994-2022 CNRS, Meteo-France and Universite Paul Sabatier !MNH_LIC This is part of the Meso-NH software governed by the CeCILL-C licence !MNH_LIC version 1. See LICENSE, CeCILL-C_V1-en.txt and CeCILL-C_V1-fr.txt !MNH_LIC for details. version 1. @@ -679,7 +679,9 @@ ENDIF ! Denominator factor in 3rd order terms ! !$acc kernels +#ifdef MNH_COMPILER_NVHPC !$acc loop independent collapse(3) +#endif DO CONCURRENT ( JI=1:JIU,JJ=1:JJU,JK=1:JKU) ZD(JI,JJ,JK) = (1.+ZREDTH1(JI,JJ,JK)+ZREDR1(JI,JJ,JK)) * (1.+0.5*(ZREDTH1(JI,JJ,JK)+ZREDR1(JI,JJ,JK))) END DO diff --git a/src/MNH/turb_ver_dyn_flux.f90 b/src/MNH/turb_ver_dyn_flux.f90 index 471f78489..55355f15d 100644 --- a/src/MNH/turb_ver_dyn_flux.f90 +++ b/src/MNH/turb_ver_dyn_flux.f90 @@ -534,7 +534,9 @@ ZSOURCE(:,:,:) = 0. #ifndef MNH_BITREP ZDIRSINZW(:,:) = SQRT(1.-PDIRCOSZW(:,:)**2) #else +#ifdef MNH_COMPILER_NVHPC !$acc loop independent collapse(2) +#endif DO CONCURRENT ( JI=1:JIU,JJ=1:JJU ) ZDIRSINZW(JI,JJ) = SQRT(1.-BR_P2(PDIRCOSZW(JI,JJ))) END DO @@ -582,7 +584,9 @@ CALL MXM_DEVICE( PDZZ, ZTMP4_DEVICE ) #ifndef MNH_BITREP ZA(:,:,:) = -PTSTEP * XCMFS * ZTMP1_DEVICE(:,:,:) * ZTMP3_DEVICE(:,:,:) / ZTMP4_DEVICE(:,:,:)**2 #else +#ifdef MNH_COMPILER_NVHPC !$acc loop independent collapse(3) +#endif DO CONCURRENT ( JI=1:JIU,JJ=1:JJU,JK=1:JKU) ZA(JI,JJ,JK) = -PTSTEP * XCMFS * ZTMP1_DEVICE(JI,JJ,JK) * ZTMP3_DEVICE(JI,JJ,JK) / BR_P2(ZTMP4_DEVICE(JI,JJ,JK)) END DO @@ -600,13 +604,17 @@ END DO ZCOEFFLXU(:,:,1) = PCDUEFF(:,:) * (PDIRCOSZW(:,:)**2 - ZDIRSINZW(:,:)**2) & * PCOSSLOPE(:,:) #else +#ifdef MNH_COMPILER_NVHPC !$acc loop independent collapse(2) +#endif DO CONCURRENT ( JI=1:JIU,JJ=1:JJU ) ZCOEFFLXU(JI,JJ,1) = PCDUEFF(JI,JJ) * (BR_P2(PDIRCOSZW(JI,JJ)) - BR_P2(ZDIRSINZW(JI,JJ))) & * PCOSSLOPE(JI,JJ) END DO #endif +#ifdef MNH_COMPILER_NVHPC !$acc loop independent collapse(2) +#endif DO CONCURRENT ( JI=1:JIU,JJ=1:JJU ) ZCOEFFLXV(JI,JJ,1) = PCDUEFF(JI,JJ) * PDIRCOSZW(JI,JJ) * PSINSLOPE(JI,JJ) @@ -619,7 +627,9 @@ END DO #ifndef MNH_OPENACC ZCOEFS(:,:,1:1)=MXM(ZCOEFS(:,:,1:1) / PDZZ(:,:,IKB:IKB) ) #else +#ifdef MNH_COMPILER_NVHPC !$acc loop independent collapse(2) +#endif DO CONCURRENT ( JI=1:JIU,JJ=1:JJU ) ZTMP1_DEVICE(JI,JJ,1) = ZCOEFS(JI,JJ,1) / PDZZ(JI,JJ,IKB) END DO @@ -727,7 +737,9 @@ ZFLXZ(:,:,:) = -XCMFS * MXM(ZKEFF) * & DZM (PIMPL*ZRES + PEXPL*PUM) / MXM(PDZZ) #else !$acc kernels +#ifdef MNH_COMPILER_NVHPC !$acc loop independent collapse(3) +#endif DO CONCURRENT ( JI=1:JIU,JJ=1:JJU,JK=1:JKU) ZTMP2_DEVICE(JI,JJ,JK) = PIMPL*ZRES(JI,JJ,JK) + PEXPL*PUM(JI,JJ,JK) END DO @@ -736,7 +748,9 @@ CALL MXM_DEVICE(ZKEFF,ZTMP1_DEVICE) CALL DZM_DEVICE( ZTMP2_DEVICE, ZTMP3_DEVICE ) CALL MXM_DEVICE(PDZZ,ZTMP4_DEVICE) !$acc kernels +#ifdef MNH_COMPILER_NVHPC !$acc loop independent collapse(3) +#endif DO CONCURRENT ( JI=1:JIU,JJ=1:JJU,JK=1:JKU) ZFLXZ(JI,JJ,JK) = -XCMFS * ZTMP1_DEVICE(JI,JJ,JK) * ZTMP3_DEVICE(JI,JJ,JK) / ZTMP4_DEVICE(JI,JJ,JK) END DO @@ -800,7 +814,9 @@ PDP(:,:,:) = - MZF( MXF ( ZFLXZ * GZ_U_UW(PUM,PDZZ) ) ) #else CALL GZ_U_UW_DEVICE(PUM,PDZZ,ZTMP1_DEVICE) !$acc kernels +#ifdef MNH_COMPILER_NVHPC !$acc loop independent collapse(3) +#endif DO CONCURRENT ( JI=1:JIU,JJ=1:JJU,JK=1:JKU ) ZTMP2_DEVICE(JI,JJ,JK) = ZFLXZ(JI,JJ,JK) * ZTMP1_DEVICE(JI,JJ,JK) END DO @@ -899,14 +915,18 @@ IF(HTURBDIM=='3DIM') THEN #else CALL MXM_DEVICE(PRHODJ,ZTMP1_DEVICE) !$acc kernels +#ifdef MNH_COMPILER_NVHPC !$acc loop independent collapse(3) +#endif DO CONCURRENT ( JI=1:JIU,JJ=1:JJU,JK=1:JKU) ZTMP2_DEVICE(JI,JJ,JK) = ZTMP1_DEVICE(JI,JJ,JK) /PDXX(JI,JJ,JK) END DO !$acc end kernels CALL MZM_DEVICE(ZTMP2_DEVICE,ZTMP3_DEVICE ) !$acc kernels +#ifdef MNH_COMPILER_NVHPC !$acc loop independent collapse(3) +#endif DO CONCURRENT ( JI=1:JIU,JJ=1:JJU,JK=1:JKU) ZTMP2_DEVICE(JI,JJ,JK) = ZTMP3_DEVICE(JI,JJ,JK) * ZFLXZ(JI,JJ,JK) END DO @@ -915,21 +935,27 @@ IF(HTURBDIM=='3DIM') THEN IF (.NOT. LFLAT) THEN CALL MZF_DEVICE( PDZZ, ZTMP2_DEVICE ) !$acc kernels +#ifdef MNH_COMPILER_NVHPC !$acc loop independent collapse(3) +#endif DO CONCURRENT ( JI=1:JIU,JJ=1:JJU,JK=1:JKU) ZTMP3_DEVICE(JI,JJ,JK) = ZFLXZ(JI,JJ,JK)*PDZX(JI,JJ,JK) END DO !$acc end kernels CALL MZF_DEVICE( ZTMP3_DEVICE, ZTMP4_DEVICE ) !$acc kernels +#ifdef MNH_COMPILER_NVHPC !$acc loop independent collapse(3) +#endif DO CONCURRENT ( JI=1:JIU,JJ=1:JJU,JK=1:JKU) ZTMP3_DEVICE(JI,JJ,JK) = ZTMP4_DEVICE(JI,JJ,JK) / PDXX(JI,JJ,JK) END DO !$acc end kernels CALL MXF_DEVICE( ZTMP3_DEVICE,ZTMP4_DEVICE ) !$acc kernels +#ifdef MNH_COMPILER_NVHPC !$acc loop independent collapse(3) +#endif DO CONCURRENT ( JI=1:JIU,JJ=1:JJU,JK=1:JKU) ZTMP3_DEVICE(JI,JJ,JK) = PRHODJ(JI,JJ,JK) / ZTMP2_DEVICE(JI,JJ,JK) * ZTMP4_DEVICE(JI,JJ,JK) END DO @@ -952,7 +978,9 @@ IF(HTURBDIM=='3DIM') THEN #else CALL GX_W_UW_DEVICE( PWM,PDXX,PDZZ,PDZX,ZTMP1_DEVICE) !$acc kernels +#ifdef MNH_COMPILER_NVHPC !$acc loop independent collapse(3) +#endif DO CONCURRENT ( JI=1:JIU,JJ=1:JJU,JK=1:JKU) ZTMP2_DEVICE(JI,JJ,JK) = ZFLXZ(JI,JJ,JK) * ZTMP1_DEVICE(JI,JJ,JK) END DO @@ -960,7 +988,9 @@ IF(HTURBDIM=='3DIM') THEN CALL MXF_DEVICE(ZTMP2_DEVICE,ZTMP1_DEVICE) CALL MZF_DEVICE( ZTMP1_DEVICE, ZTMP2_DEVICE ) !$acc kernels +#ifdef MNH_COMPILER_NVHPC !$acc loop independent collapse(3) +#endif DO CONCURRENT ( JI=1:JIU,JJ=1:JJU,JK=1:JKU) ZA(JI,JJ,JK)=-ZTMP2_DEVICE(JI,JJ,JK) END DO @@ -1111,7 +1141,9 @@ CALL MYM_DEVICE(ZTMP3_DEVICE,ZTMP4_DEVICE) #ifndef MNH_BITREP ZA(:,:,:) = - PTSTEP * XCMFS * ZTMP1_DEVICE(:,:,:) * ZTMP4_DEVICE(:,:,:) / ZTMP2_DEVICE(:,:,:)**2 #else +#ifdef MNH_COMPILER_NVHPC !$acc loop independent collapse(3) +#endif DO CONCURRENT ( JI=1:JIU,JJ=1:JJU,JK=1:JKU) ZA(JI,JJ,JK) = - PTSTEP * XCMFS * ZTMP1_DEVICE(JI,JJ,JK) * ZTMP4_DEVICE(JI,JJ,JK) / BR_P2(ZTMP2_DEVICE(JI,JJ,JK)) END DO @@ -1127,7 +1159,9 @@ END DO ZCOEFFLXU(:,:,1) = PCDUEFF(:,:) * (PDIRCOSZW(:,:)**2 - ZDIRSINZW(:,:)**2) & * PSINSLOPE(:,:) #else +#ifdef MNH_COMPILER_NVHPC !$acc loop independent collapse(2) +#endif DO CONCURRENT ( JI=1:JIU,JJ=1:JJU ) ZCOEFFLXU(JI,JJ,1) = PCDUEFF(JI,JJ) * (BR_P2(PDIRCOSZW(JI,JJ)) - BR_P2(ZDIRSINZW(JI,JJ))) & * PSINSLOPE(JI,JJ) @@ -1136,7 +1170,9 @@ END DO ZCOEFFLXV(:,:,1) = PCDUEFF(:,:) * PDIRCOSZW(:,:) * PCOSSLOPE(:,:) ! prepare the implicit scheme coefficients for the surface flux +#ifdef MNH_COMPILER_NVHPC !$acc loop independent collapse(2) +#endif DO CONCURRENT ( JI=1:JIU,JJ=1:JJU ) ZCOEFS(JI,JJ,1)= ZCOEFFLXU(JI,JJ,1) * PSINSLOPE(JI,JJ) * PDIRCOSZW(JI,JJ) & +ZCOEFFLXV(JI,JJ,1) * PCOSSLOPE(JI,JJ) @@ -1146,7 +1182,9 @@ END DO #ifndef MNH_OPENACC ZCOEFS(:,:,1:1)=MYM(ZCOEFS(:,:,1:1) / PDZZ(:,:,IKB:IKB) ) #else +#ifdef MNH_COMPILER_NVHPC !$acc loop independent collapse(2) +#endif DO CONCURRENT ( JI=1:JIU,JJ=1:JJU ) ZTMP1_DEVICE(JI,JJ,1) = ZCOEFS(JI,JJ,1) / PDZZ(JI,JJ,IKB) END DO @@ -1252,7 +1290,9 @@ ZFLXZ(:,:,IKB:IKB) = MYM(PDZZ(:,:,IKB:IKB)) * & ) / 0.5 / ( 1. + MYM(PRHODJ(:,:,KKA:KKA)) / MYM(PRHODJ(:,:,IKB:IKB)) ) #else !$acc kernels +#ifdef MNH_COMPILER_NVHPC !$acc loop independent collapse(3) +#endif DO CONCURRENT ( JI=1:JIU,JJ=1:JJU,JK=1:JKU) ZTMP1_DEVICE(JI,JJ,JK) = PIMPL*ZRES(JI,JJ,JK) + PEXPL*PVM(JI,JJ,JK) END DO @@ -1261,7 +1301,9 @@ CALL DZM_DEVICE( ZTMP1_DEVICE, ZTMP2_DEVICE ) CALL MYM_DEVICE(PDZZ,ZTMP3_DEVICE) CALL MYM_DEVICE(ZKEFF,ZTMP1_DEVICE) !$acc kernels +#ifdef MNH_COMPILER_NVHPC !$acc loop independent collapse(3) +#endif DO CONCURRENT ( JI=1:JIU,JJ=1:JJU,JK=1:JKU) ZFLXZ(JI,JJ,JK) = -XCMFS * ZTMP1_DEVICE(JI,JJ,JK) * ZTMP2_DEVICE(JI,JJ,JK) / ZTMP3_DEVICE(JI,JJ,JK) END DO @@ -1317,7 +1359,9 @@ ZA(:,:,:) = - MZF( MYF ( ZFLXZ * GZ_V_VW(PVM,PDZZ) ) ) #else CALL GZ_V_VW_DEVICE(PVM,PDZZ,ZTMP1_DEVICE) !$acc kernels +#ifdef MNH_COMPILER_NVHPC !$acc loop independent collapse(3) +#endif DO CONCURRENT ( JI=1:JIU,JJ=1:JJU,JK=1:JKU) ZTMP2_DEVICE(JI,JJ,JK) = ZFLXZ(JI,JJ,JK) * ZTMP1_DEVICE(JI,JJ,JK) END DO @@ -1325,7 +1369,9 @@ END DO CALL MYF_DEVICE( ZTMP2_DEVICE, ZTMP3_DEVICE ) CALL MZF_DEVICE( ZTMP3_DEVICE, ZTMP1_DEVICE ) !$acc kernels +#ifdef MNH_COMPILER_NVHPC !$acc loop independent collapse(3) +#endif DO CONCURRENT ( JI=1:JIU,JJ=1:JJU,JK=1:JKU) ZA(JI,JJ,JK) = - ZTMP1_DEVICE(JI,JJ,JK) END DO @@ -1419,14 +1465,18 @@ IF(HTURBDIM=='3DIM') THEN IF (.NOT. L2D) THEN CALL MYM_DEVICE(PRHODJ,ZTMP1_DEVICE) !$acc kernels +#ifdef MNH_COMPILER_NVHPC !$acc loop independent collapse(3) +#endif DO CONCURRENT ( JI=1:JIU,JJ=1:JJU,JK=1:JKU) ZTMP2_DEVICE(JI,JJ,JK) = ZTMP1_DEVICE(JI,JJ,JK) /PDYY(JI,JJ,JK) END DO !$acc end kernels CALL MZM_DEVICE(ZTMP2_DEVICE,ZTMP1_DEVICE) !$acc kernels +#ifdef MNH_COMPILER_NVHPC !$acc loop independent collapse(3) +#endif DO CONCURRENT ( JI=1:JIU,JJ=1:JJU,JK=1:JKU) ZTMP2_DEVICE(JI,JJ,JK) = ZTMP1_DEVICE(JI,JJ,JK) * ZFLXZ(JI,JJ,JK) END DO @@ -1435,21 +1485,27 @@ IF(HTURBDIM=='3DIM') THEN IF (.NOT. LFLAT) THEN CALL MZF_DEVICE( PDZZ, ZTMP2_DEVICE ) !$acc kernels +#ifdef MNH_COMPILER_NVHPC !$acc loop independent collapse(3) +#endif DO CONCURRENT ( JI=1:JIU,JJ=1:JJU,JK=1:JKU) ZTMP3_DEVICE(JI,JJ,JK) = ZFLXZ(JI,JJ,JK)*PDZY(JI,JJ,JK) END DO !$acc end kernels CALL MZF_DEVICE( ZTMP3_DEVICE, ZTMP4_DEVICE ) !$acc kernels +#ifdef MNH_COMPILER_NVHPC !$acc loop independent collapse(3) +#endif DO CONCURRENT ( JI=1:JIU,JJ=1:JJU,JK=1:JKU) ZTMP3_DEVICE(JI,JJ,JK) = ZTMP4_DEVICE(JI,JJ,JK) / PDYY(JI,JJ,JK) END DO !$acc end kernels CALL MYF_DEVICE(ZTMP3_DEVICE,ZTMP4_DEVICE) !$acc kernels +#ifdef MNH_COMPILER_NVHPC !$acc loop independent collapse(3) +#endif DO CONCURRENT ( JI=1:JIU,JJ=1:JJU,JK=1:JKU) ZTMP3_DEVICE(JI,JJ,JK) = PRHODJ(JI,JJ,JK) / ZTMP2_DEVICE(JI,JJ,JK) * ZTMP4_DEVICE(JI,JJ,JK) END DO @@ -1473,7 +1529,9 @@ IF(HTURBDIM=='3DIM') THEN #else CALL GY_W_VW_DEVICE( PWM,PDYY,PDZZ,PDZY,ZTMP1_DEVICE) !$acc kernels +#ifdef MNH_COMPILER_NVHPC !$acc loop independent collapse(3) +#endif DO CONCURRENT ( JI=1:JIU,JJ=1:JJU,JK=1:JKU) ZTMP2_DEVICE(JI,JJ,JK) = ZFLXZ(JI,JJ,JK) * ZTMP1_DEVICE(JI,JJ,JK) END DO @@ -1481,7 +1539,9 @@ IF(HTURBDIM=='3DIM') THEN CALL MYF_DEVICE(ZTMP2_DEVICE,ZTMP1_DEVICE) CALL MZF_DEVICE( ZTMP1_DEVICE, ZTMP2_DEVICE ) !$acc kernels +#ifdef MNH_COMPILER_NVHPC !$acc loop independent collapse(3) +#endif DO CONCURRENT ( JI=1:JIU,JJ=1:JJU,JK=1:JKU) ZA(JI,JJ,JK) = - ZTMP2_DEVICE(JI,JJ,JK) END DO diff --git a/src/MNH/turb_ver_thermo_corr.f90 b/src/MNH/turb_ver_thermo_corr.f90 index aabdbebf4..e94a0cbb2 100644 --- a/src/MNH/turb_ver_thermo_corr.f90 +++ b/src/MNH/turb_ver_thermo_corr.f90 @@ -572,7 +572,9 @@ END IF #ifndef MNH_BITREP ZTMP1_DEVICE(:,:,:) = PPHI3(:,:,:)*PDTH_DZ(:,:,:)**2 #else +#ifdef MNH_COMPILER_NVHPC !$acc loop independent collapse(3) +#endif DO CONCURRENT ( JI=1:JIU,JJ=1:JJU,JK=1:JKU) ZTMP1_DEVICE(JI,JJ,JK) = PPHI3(JI,JJ,JK)*BR_P2(PDTH_DZ(JI,JJ,JK)) END DO @@ -580,7 +582,9 @@ END IF !$acc end kernels CALL MZF_DEVICE( ZTMP1_DEVICE(:,:,:), ZTMP2_DEVICE(:,:,:) ) !$acc kernels +#ifdef MNH_COMPILER_NVHPC !$acc loop independent collapse(3) +#endif DO CONCURRENT ( JI=1:JIU,JJ=1:JJU,JK=1:JKU) ZF (JI,JJ,JK) = XCTV*PLM(JI,JJ,JK)*PLEPS(JI,JJ,JK)*ZTMP2_DEVICE(JI,JJ,JK) END DO @@ -721,14 +725,18 @@ END IF !$acc end kernels CALL DZM_DEVICE( ZTMP1_DEVICE(:,:,:), ZTMP2_DEVICE(:,:,:) ) !$acc kernels +#ifdef MNH_COMPILER_NVHPC !$acc loop independent collapse(3) +#endif DO CONCURRENT ( JI=1:JIU,JJ=1:JJU,JK=1:JKU) ZTMP3_DEVICE(JI,JJ,JK) = ZTMP2_DEVICE(JI,JJ,JK) / PDZZ(JI,JJ,JK) END DO !$acc end kernels CALL MZF_DEVICE( ZTMP3_DEVICE(:,:,:), ZTMP4_DEVICE(:,:,:) ) !$acc kernels +#ifdef MNH_COMPILER_NVHPC !$acc loop independent collapse(3) +#endif DO CONCURRENT ( JI=1:JIU,JJ=1:JJU,JK=1:JKU) ZFLXZ(JI,JJ,JK) = ZF(JI,JJ,JK) & + PIMPL * ZDFDDTDZ(JI,JJ,JK) * ZTMP4_DEVICE(JI,JJ,JK) @@ -749,7 +757,9 @@ END IF +ZCOEFF(:,:,IKB )*PTHLP(:,:,IKB ) )**2 & ) #else +#ifdef MNH_COMPILER_NVHPC !$acc loop independent collapse(2) +#endif DO CONCURRENT ( JI=1:JIU,JJ=1:JJU ) ZFLXZ(JI,JJ,IKB) = XCTV * PPHI3(JI,JJ,IKB+KKL) * PLM(JI,JJ,IKB) & * PLEPS(JI,JJ,IKB) & @@ -851,7 +861,9 @@ END IF !$acc end kernels CALL MZF_DEVICE( ZTMP1_DEVICE(:,:,:), ZTMP2_DEVICE(:,:,:) ) !$acc kernels +#ifdef MNH_COMPILER_NVHPC !$acc loop independent collapse(3) +#endif DO CONCURRENT ( JI=1:JIU,JJ=1:JJU,JK=1:JKU) ZF (JI,JJ,JK) = XCTV*PLM(JI,JJ,JK)*PLEPS(JI,JJ,JK)*ZTMP2_DEVICE(JI,JJ,JK) END DO @@ -1040,7 +1052,9 @@ END IF CALL DZM_DEVICE( ZTMP1_DEVICE(:,:,:), ZTMP3_DEVICE(:,:,:) ) CALL DZM_DEVICE( ZTMP2_DEVICE(:,:,:), ZTMP4_DEVICE(:,:,:) ) !$acc kernels +#ifdef MNH_COMPILER_NVHPC !$acc loop independent collapse(3) +#endif DO CONCURRENT ( JI=1:JIU,JJ=1:JJU,JK=1:JKU) ZTMP1_DEVICE(JI,JJ,JK) = ZTMP3_DEVICE(JI,JJ,JK) / PDZZ(JI,JJ,JK) ZTMP2_DEVICE(JI,JJ,JK) = ZTMP4_DEVICE(JI,JJ,JK) / PDZZ(JI,JJ,JK) @@ -1063,7 +1077,9 @@ END IF CALL DZM_DEVICE( ZTMP8_DEVICE(:,:,:), ZTMP1_DEVICE(:,:,:) ) !!! !$acc kernels +#ifdef MNH_COMPILER_NVHPC !$acc loop independent collapse(3) +#endif DO CONCURRENT ( JI=1:JIU,JJ=1:JJU,JK=1:JKU) ZTMP7_DEVICE(JI,JJ,JK) = ( ZTMP3_DEVICE(JI,JJ,JK) + ZTMP4_DEVICE(JI,JJ,JK)) * PDR_DZ(JI,JJ,JK) & * ZTMP2_DEVICE(JI,JJ,JK) / PDZZ(JI,JJ,JK) & @@ -1079,7 +1095,9 @@ END IF CALL DZM_DEVICE( ZTMP1_DEVICE(:,:,:), ZTMP3_DEVICE(:,:,:) ) CALL DZM_DEVICE( ZTMP2_DEVICE(:,:,:), ZTMP4_DEVICE(:,:,:) ) !$acc kernels +#ifdef MNH_COMPILER_NVHPC !$acc loop independent collapse(3) +#endif DO CONCURRENT ( JI=1:JIU,JJ=1:JJU,JK=1:JKU) ZTMP1_DEVICE(JI,JJ,JK) = ZTMP3_DEVICE(JI,JJ,JK) / PDZZ(JI,JJ,JK) ZTMP2_DEVICE(JI,JJ,JK) = ZTMP4_DEVICE(JI,JJ,JK) /PDZZ(JI,JJ,JK) @@ -1090,7 +1108,9 @@ END IF CALL MZF_DEVICE( ZTMP1_DEVICE(:,:,:), ZTMP4_DEVICE(:,:,:) ) CALL MZF_DEVICE( ZTMP2_DEVICE(:,:,:), ZTMP5_DEVICE(:,:,:) ) !$acc kernels +#ifdef MNH_COMPILER_NVHPC !$acc loop independent collapse(3) +#endif DO CONCURRENT ( JI=1:JIU,JJ=1:JJU,JK=1:JKU) ZFLXZ(JI,JJ,JK) = ZF(JI,JJ,JK) & + PIMPL * XCTV*PLM(JI,JJ,JK)*PLEPS(JI,JJ,JK)*0.5 * ZTMP3_DEVICE(JI,JJ,JK) & @@ -1100,7 +1120,9 @@ END IF #endif ! ! special case near the ground ( uncentred gradient ) +#ifdef MNH_COMPILER_NVHPC !$acc loop independent collapse(2) +#endif DO CONCURRENT ( JI=1:JIU,JJ=1:JJU ) ZFLXZ(JI,JJ,IKB) = & (XCHT1 * PPHI3(JI,JJ,IKB+KKL) + XCHT2 * PPSI3(JI,JJ,IKB+KKL)) & @@ -1215,7 +1237,9 @@ END IF #ifndef MNH_BITREP ZTMP1_DEVICE(:,:,:) = PPSI3(:,:,:)*PDR_DZ(:,:,:)**2 #else +#ifdef MNH_COMPILER_NVHPC !$acc loop independent collapse(3) +#endif DO CONCURRENT ( JI=1:JIU,JJ=1:JJU,JK=1:JKU) ZTMP1_DEVICE(JI,JJ,JK) = PPSI3(JI,JJ,JK)*BR_P2(PDR_DZ(JI,JJ,JK)) END DO @@ -1223,7 +1247,9 @@ END IF !$acc end kernels CALL MZF_DEVICE( ZTMP1_DEVICE(:,:,:), ZTMP2_DEVICE(:,:,:) ) !$acc kernels +#ifdef MNH_COMPILER_NVHPC !$acc loop independent collapse(3) +#endif DO CONCURRENT ( JI=1:JIU,JJ=1:JJU,JK=1:JKU) ZF (JI,JJ,JK) = XCTV*PLM(JI,JJ,JK)*PLEPS(JI,JJ,JK)*ZTMP2_DEVICE(JI,JJ,JK) END DO @@ -1369,21 +1395,27 @@ END IF !$acc end kernels CALL DZM_DEVICE( ZTMP2_DEVICE(:,:,:), ZTMP3_DEVICE(:,:,:) ) !$acc kernels +#ifdef MNH_COMPILER_NVHPC !$acc loop independent collapse(3) +#endif DO CONCURRENT ( JI=1:JIU,JJ=1:JJU,JK=1:JKU) ZTMP2_DEVICE(JI,JJ,JK) = ZTMP1_DEVICE(JI,JJ,JK) * ZTMP3_DEVICE(JI,JJ,JK) / PDZZ(JI,JJ,JK) END DO !$acc end kernels CALL MZF_DEVICE( ZTMP2_DEVICE(:,:,:), ZTMP1_DEVICE(:,:,:) ) !$acc kernels +#ifdef MNH_COMPILER_NVHPC !$acc loop independent collapse(3) +#endif DO CONCURRENT ( JI=1:JIU,JJ=1:JJU,JK=1:JKU) ZTMP2_DEVICE(JI,JJ,JK) = ZTMP3_DEVICE(JI,JJ,JK) / PDZZ(JI,JJ,JK) END DO !$acc end kernels CALL MZF_DEVICE( ZTMP2_DEVICE(:,:,:), ZTMP3_DEVICE(:,:,:) ) !$acc kernels +#ifdef MNH_COMPILER_NVHPC !$acc loop independent collapse(3) +#endif DO CONCURRENT ( JI=1:JIU,JJ=1:JJU,JK=1:JKU) ZFLXZ(JI,JJ,JK) = ZF(JI,JJ,JK) & + PIMPL * XCTV*PLM(JI,JJ,JK)*PLEPS(JI,JJ,JK) * ZTMP1_DEVICE(JI,JJ,JK) & @@ -1405,7 +1437,9 @@ END IF +ZCOEFF(:,:,IKB )*PRP(:,:,IKB ))**2 & ) #else +#ifdef MNH_COMPILER_NVHPC !$acc loop independent collapse(2) +#endif DO CONCURRENT ( JI=1:JIU,JJ=1:JJU ) ZFLXZ(JI,JJ,IKB) = XCHV * PPSI3(JI,JJ,IKB+KKL) * PLM(JI,JJ,IKB) & * PLEPS(JI,JJ,IKB) & diff --git a/src/MNH/turb_ver_thermo_flux.f90 b/src/MNH/turb_ver_thermo_flux.f90 index 2103b8e2f..e3545d44f 100644 --- a/src/MNH/turb_ver_thermo_flux.f90 +++ b/src/MNH/turb_ver_thermo_flux.f90 @@ -742,7 +742,9 @@ ZDFDDTDZ(:,:,:) = -XCSHF*ZKEFF(:,:,:)*D_PHI3DTDZ_O_DDTDZ(PPHI3,PREDTH1,PREDR1,PR #else CALL DZM_DEVICE(PTHLM, ZTMP1_DEVICE ) !$acc kernels +#ifdef MNH_COMPILER_NVHPC !$acc loop independent collapse(3) +#endif DO CONCURRENT ( JI=1:JIU,JJ=1:JJU,JK=1:JKU) ZF (JI,JJ,JK) = -XCSHF*PPHI3(JI,JJ,JK)*ZKEFF(JI,JJ,JK)*ZTMP1_DEVICE(JI,JJ,JK)/PDZZ(JI,JJ,JK) END DO @@ -750,7 +752,9 @@ END DO ! CALL D_PHI3DTDZ_O_DDTDZ(PPHI3,PREDTH1,PREDR1,PRED2TH3,PRED2THR3,HTURBDIM,GUSERV,ZTMP2_DEVICE) !$acc kernels +#ifdef MNH_COMPILER_NVHPC !$acc loop independent collapse(3) +#endif DO CONCURRENT ( JI=1:JIU,JJ=1:JJU,JK=1:JKU) ZDFDDTDZ(JI,JJ,JK) = -XCSHF*ZKEFF(JI,JJ,JK)*ZTMP2_DEVICE(JI,JJ,JK) END DO @@ -967,7 +971,9 @@ ZTMP1_DEVICE(:,:,:) = PTHLP(:,:,:) - PTHLM(:,:,:) !$acc end kernels CALL DZM_DEVICE( ZTMP1_DEVICE, ZTMP2_DEVICE ) !$acc kernels +#ifdef MNH_COMPILER_NVHPC !$acc loop independent collapse(3) +#endif DO CONCURRENT ( JI=1:JIU,JJ=1:JJU,JK=1:JKU) ZFLXZ(JI,JJ,JK) = ZF(JI,JJ,JK) + PIMPL * ZDFDDTDZ(JI,JJ,JK) * ZTMP2_DEVICE(JI,JJ,JK) / PDZZ(JI,JJ,JK) END DO @@ -1039,7 +1045,9 @@ ELSE IF (KRR /= 0) THEN CALL MZM_DEVICE(PETHETA,ZTMP1_DEVICE) !$acc kernels +#ifdef MNH_COMPILER_NVHPC !$acc loop independent collapse(3) +#endif DO CONCURRENT ( JI=1:JIU,JJ=1:JJU,JK=1:JKU) ZTMP2_DEVICE(JI,JJ,JK) = ZTMP1_DEVICE(JI,JJ,JK) * ZFLXZ(JI,JJ,JK) END DO @@ -1228,7 +1236,9 @@ IF (KRR /= 0) THEN #else CALL DZM_DEVICE( PRM(:,:,:,1), ZTMP1_DEVICE ) !$acc kernels +#ifdef MNH_COMPILER_NVHPC !$acc loop independent collapse(3) +#endif DO CONCURRENT ( JI=1:JIU,JJ=1:JJU,JK=1:JKU) ZF (JI,JJ,JK) = -XCSHF*PPSI3(JI,JJ,JK)*ZKEFF(JI,JJ,JK)*ZTMP1_DEVICE(JI,JJ,JK)/PDZZ(JI,JJ,JK) END DO @@ -1236,7 +1246,9 @@ IF (KRR /= 0) THEN CALL D_PSI3DRDZ_O_DDRDZ(PPSI3,PREDR1,PREDTH1,PRED2R3,PRED2THR3,HTURBDIM,GUSERV,ZTMP1_DEVICE) !CALL D_PHI3DRDZ_O_DDRDZ_DEVICE(PPSI3,PREDR1,PREDTH1,PRED2R3,PRED2THR3,HTURBDIM,GUSERV,ZTMP1_DEVICE) !$acc kernels +#ifdef MNH_COMPILER_NVHPC !$acc loop independent collapse(3) +#endif DO CONCURRENT ( JI=1:JIU,JJ=1:JJU,JK=1:JKU) ZDFDDRDZ(JI,JJ,JK) = -XCSHF*ZKEFF(JI,JJ,JK)*ZTMP1_DEVICE(JI,JJ,JK) END DO @@ -1458,7 +1470,9 @@ IF (KRR /= 0) THEN !$acc end kernels CALL DZM_DEVICE( ZTMP1_DEVICE, ZTMP2_DEVICE ) !$acc kernels +#ifdef MNH_COMPILER_NVHPC !$acc loop independent collapse(3) +#endif DO CONCURRENT ( JI=1:JIU,JJ=1:JJU,JK=1:JKU) ZFLXZ(JI,JJ,JK) = ZF(JI,JJ,JK) + PIMPL * ZDFDDRDZ(JI,JJ,JK) *ZTMP2_DEVICE(JI,JJ,JK) / PDZZ(JI,JJ,JK) END DO @@ -1519,14 +1533,18 @@ END DO #else CALL MZM_DEVICE(PEMOIST,ZTMP1_DEVICE) !$acc kernels +#ifdef MNH_COMPILER_NVHPC !$acc loop independent collapse(3) +#endif DO CONCURRENT ( JI=1:JIU,JJ=1:JJU,JK=1:JKU) ZTMP2_DEVICE(JI,JJ,JK) = ZTMP1_DEVICE(JI,JJ,JK) * ZFLXZ(JI,JJ,JK) END DO !$acc end kernels CALL MZF_DEVICE( ZTMP2_DEVICE, ZTMP3_DEVICE ) !$acc kernels +#ifdef MNH_COMPILER_NVHPC !$acc loop independent collapse(3) +#endif DO CONCURRENT ( JI=1:JIU,JJ=1:JJU,JK=1:JKU) ZA(JI,JJ,JK) = PBETA(JI,JJ,JK) * ZTMP3_DEVICE(JI,JJ,JK) END DO diff --git a/src/ZSOLVER/advection_metsv.f90 b/src/ZSOLVER/advection_metsv.f90 index 88fa40632..b27652e2a 100644 --- a/src/ZSOLVER/advection_metsv.f90 +++ b/src/ZSOLVER/advection_metsv.f90 @@ -1,4 +1,4 @@ -!MNH_LIC Copyright 1994-2021 CNRS, Meteo-France and Universite Paul Sabatier +!MNH_LIC Copyright 1994-2022 CNRS, Meteo-France and Universite Paul Sabatier !MNH_LIC This is part of the Meso-NH software governed by the CeCILL-C licence !MNH_LIC version 1. See LICENSE, CeCILL-C_V1-en.txt and CeCILL-C_V1-fr.txt !MNH_LIC for details. version 1. @@ -584,14 +584,18 @@ IF (.NOT. L1D) THEN #else IF (.NOT. L2D) THEN !$acc kernels +#ifdef MNH_COMPILER_NVHPC !$acc loop independent collapse(3) +#endif DO CONCURRENT ( JI=1:JIU,JJ=1:JJU,JK=1:JKU ) ZCFL(JI,JJ,JK) = SQRT(BR_P2(ZCFLU(JI,JJ,JK))+BR_P2(ZCFLV(JI,JJ,JK))+BR_P2(ZCFLW(JI,JJ,JK))) END DO !$acc end kernels ELSE !$acc kernels +#ifdef MNH_COMPILER_NVHPC !$acc loop independent collapse(3) +#endif DO CONCURRENT ( JI=1:JIU,JJ=1:JJU,JK=1:JKU ) ZCFL(JI,JJ,JK) = SQRT(BR_P2(ZCFLU(JI,JJ,JK))+BR_P2(ZCFLW(JI,JJ,JK))) END DO @@ -605,7 +609,9 @@ ELSE #ifndef MNH_BITREP ZCFL(:,:,:) = SQRT(ZCFLW(:,:,:)**2) #else +#ifdef MNH_COMPILER_NVHPC !$acc loop independent collapse(3) +#endif DO CONCURRENT ( JI=1:JIU,JJ=1:JJU,JK=1:JKU ) ZCFL(JI,JJ,JK) = SQRT(BR_P2(ZCFLW(JI,JJ,JK))) END DO @@ -929,14 +935,18 @@ DO JSPL=1,KSPLIT !$acc end kernels END IF !$acc kernels +#ifdef MNH_COMPILER_NVHPC !$acc loop independent collapse(4) +#endif DO CONCURRENT ( JI=1:JIU,JJ=1:JJU,JK=1:JKU, JR=1:KRR ) ZR(JI,JJ,JK,JR) = ZR(JI,JJ,JK,JR) + ( ZRRS_PPM(JI,JJ,JK,JR) + ZRRS_OTHER(JI,JJ,JK,JR) + PRRS_CLD(JI,JJ,JK,JR) ) & * ZTSTEP_PPM / PRHODJ(JI,JJ,JK) END DO !CONCURRENT !$acc loop seq DO JSV = 1, KSV +#ifdef MNH_COMPILER_NVHPC !$acc loop independent collapse(3) +#endif DO CONCURRENT ( JI=1:JIU,JJ=1:JJU,JK=1:JKU) ZSV(JI,JJ,JK,JSV) = ZSV(JI,JJ,JK,JSV) + ( ZRSVS_PPM(JI,JJ,JK,JSV) + ZRSVS_OTHER(JI,JJ,JK,JSV) + & PRSVS_CLD(JI,JJ,JK,JSV) ) * ZTSTEP_PPM / PRHODJ(JI,JJ,JK) diff --git a/src/ZSOLVER/contrav.f90 b/src/ZSOLVER/contrav.f90 index b7b8e9873..8329a9eb2 100644 --- a/src/ZSOLVER/contrav.f90 +++ b/src/ZSOLVER/contrav.f90 @@ -1,4 +1,4 @@ -!MNH_LIC Copyright 1994-2021 CNRS, Meteo-France and Universite Paul Sabatier +!MNH_LIC Copyright 1994-2022 CNRS, Meteo-France and Universite Paul Sabatier !MNH_LIC This is part of the Meso-NH software governed by the CeCILL-C licence !MNH_LIC version 1. See LICENSE, CeCILL-C_V1-en.txt and CeCILL-C_V1-fr.txt !MNH_LIC for details. version 1. @@ -699,12 +699,16 @@ IF (KADV_ORDER == 2 ) THEN #endif !$acc kernels ! +#ifdef MNH_COMPILER_NVHPC !$acc loop independent collapse(3) +#endif do concurrent (ji=iib:iie,jj=1:iju,jk=ikb:ike+1) Z1(ji, jj, jk ) = ( PRUCT(ji, jj, jk ) + PRUCT(ji, jj, jk - 1 ) ) * PDZX (ji, jj, jk ) * 0.25 & + ( PRUCT(ji + 1, jj, jk ) + PRUCT(ji + 1, jj, jk - 1 ) ) * PDZX (ji + 1, jj, jk ) * 0.25 end do +#ifdef MNH_COMPILER_NVHPC !$acc loop independent collapse(3) +#endif do concurrent (ji=1:iiu,jj=ijb:ije,jk=ikb:ike+1) Z2(ji, jj, jk ) = ( PRVCT(ji, jj, jk) + PRVCT( ji, jj, jk - 1) ) * PDZY(ji, jj, jk) * 0.25 & + ( PRVCT(ji, jj + 1, jk) + PRVCT( ji, jj + 1,jk - 1) ) * PDZY(ji, jj + 1, jk) * 0.25 @@ -712,7 +716,9 @@ IF (KADV_ORDER == 2 ) THEN PRWCT(:,:,:)=0. +#ifdef MNH_COMPILER_NVHPC !$acc loop independent collapse(3) +#endif do concurrent (ji=iib:iie,jj=ijb:ije,jk=ikb:ike+1) PRWCT(ji ,jj, jk ) = ( PRWT(ji ,jj, jk ) - Z1(ji ,jj, jk ) - Z2(ji ,jj, jk ) ) / PDZZ(ji ,jj, jk ) end do @@ -768,7 +774,9 @@ ELSE IF (KADV_ORDER == 4 ) THEN !PW: OpenACC remarks: *computing only ztmp2 and reusing it at next iteration works ! but ji loop can not be collapsed -> 10x slower on GPU ! *ztmp1 and ztmp2 are not necessary but improve readability (no impact on performance) +#ifdef MNH_COMPILER_NVHPC !$acc loop independent collapse(3) private(ztmp1, ztmp2) +#endif do concurrent(ji=IW:IE,jj=1:iju,jk=IKB:IKE+1) ztmp1 = ( 9.0 * PDZX(ji, jj, jk ) - ( PDZX(ji+1, jj, jk ) + PDZX(ji, jj, jk ) + PDZX(ji-1, jj, jk ) ) / 3.0 ) / 16.0 ztmp2 = ( 9.0 * PDZX(ji+1, jj, jk ) - ( PDZX(ji+2, jj, jk ) + PDZX(ji+1, jj, jk ) + PDZX(ji, jj, jk ) ) / 3.0 ) / 16.0 @@ -778,7 +786,9 @@ ELSE IF (KADV_ORDER == 4 ) THEN + ( PRUCT(ji+2, jj, jk ) + PRUCT(ji+2, jj, jk-1 ) ) * PDZX(ji+2, jj, jk) ) / 12.0 end do ! +#ifdef MNH_COMPILER_NVHPC !$acc loop independent collapse(3) +#endif do concurrent(ji=1:iiu,jj=is:in,jk=IKB:IKE+1) ztmp1 = ( 9.0 * PDZY(ji, jj, jk ) - ( PDZY(ji, jj+1, jk ) + PDZY(ji, jj, jk ) + PDZY(ji, jj-1, jk ) ) / 3.0 ) / 16.0 ztmp2 = ( 9.0 * PDZY(ji, jj+1, jk ) - ( PDZY(ji, jj+2, jk ) + PDZY(ji, jj+1, jk ) + PDZY(ji, jj, jk ) ) / 3.0 ) / 16.0 @@ -792,7 +802,9 @@ ELSE IF (KADV_ORDER == 4 ) THEN !* 3.2 limits of the process subdomain (inside the whole domain or in cyclic conditions) ! !!$ IF (NHALO==1) THEN +#ifdef MNH_COMPILER_NVHPC !$acc parallel loop independent collapse(2) async +#endif do concurrent(jj=1:iju,jk=IKB:IKE+1) ztmp1 = ( 9.0 * PDZX(IIE, jj, jk ) - ( PDZX(IIE+1, jj, jk ) + PDZX(IIE, jj, jk ) + PDZX(IIE-1, jj, jk ) ) / 3.0 ) / 16.0 ztmp2 = ( 9.0 * PDZX(IIE+1, jj, jk ) - ( ZDZX_EAST(jj, jk ) + PDZX(IIE+1, jj, jk ) + PDZX(IIE, jj, jk ) ) / 3.0 ) / 16.0 @@ -802,7 +814,9 @@ ELSE IF (KADV_ORDER == 4 ) THEN + ( ZU_EAST (jj, jk ) + ZU_EAST (jj, jk-1 ) ) * ZDZX_EAST (jj, jk) ) / 12.0 end do ! +#ifdef MNH_COMPILER_NVHPC !$acc parallel loop independent collapse(2) async +#endif do concurrent(ji=1:iiu,jk=IKB:IKE+1) ztmp1 = ( 9.0 * PDZY(ji, IJE, jk) - ( PDZY (ji, IJE+1, jk) + PDZY(ji, IJE, jk) + PDZY(ji, IJE-1, jk) ) / 3.0 ) / 16.0 ztmp2 = ( 9.0 * PDZY(ji, IJE+1, jk) - ( ZDZY_NORTH(ji, jk) + PDZY(ji, IJE+1, jk) + PDZY(ji, IJE, jk) ) / 3.0 ) / 16.0 @@ -856,7 +870,9 @@ ELSE IF (KADV_ORDER == 4 ) THEN !!$ !!$ CALL MPPDB_CHECK3DM("contrav_device ::Z1/Z2/ PDZZ",PRECISION,Z1,Z2,PDZZ) PRWCT(:,:,:)=0. +#ifdef MNH_COMPILER_NVHPC !$acc loop independent collapse(3) +#endif do concurrent (ji=iib:iie,jj=ijb:ije,jk=ikb:ike+1) PRWCT(ji ,jj, jk ) = ( PRWT(ji ,jj, jk ) - Z1(ji ,jj, jk ) - Z2(ji ,jj, jk ) ) / PDZZ(ji ,jj, jk ) end do diff --git a/src/ZSOLVER/pressurez.f90 b/src/ZSOLVER/pressurez.f90 index 32ec521e8..4be2ba2ad 100644 --- a/src/ZSOLVER/pressurez.f90 +++ b/src/ZSOLVER/pressurez.f90 @@ -1,4 +1,4 @@ -!MNH_LIC Copyright 1994-2021 CNRS, Meteo-France and Universite Paul Sabatier +!MNH_LIC Copyright 1994-2022 CNRS, Meteo-France and Universite Paul Sabatier !MNH_LIC This is part of the Meso-NH software governed by the CeCILL-C licence !MNH_LIC version 1. See LICENSE, CeCILL-C_V1-en.txt and CeCILL-C_V1-fr.txt !MNH_LIC for details. version 1. @@ -798,7 +798,9 @@ CALL GX_M_U_DEVICE(1,IKU,1,ZPHIT,PDXX,PDZZ,PDZX,ZDV_SOURCE) IF ( GWEST ) THEN !!!!!!!!!!!!!!!! FUJI compiler directive !!!!!!!!!! !!!!!!!!!!!!!!!! FUJI compiler directive !!!!!!!!!! +#ifdef MNH_COMPILER_NVHPC !$acc kernels loop independent collapse(2) async +#endif DO CONCURRENT (JJ=1:IJU , JK=2:IKU-1) ZDV_SOURCE(IIB,JJ,JK)= & (ZPHIT(IIB,JJ,JK) - ZPHIT(IIB-1,JJ,JK) - 0.5 * ( & @@ -811,7 +813,9 @@ IF ( GWEST ) THEN ENDIF ! IF( GEAST ) THEN +#ifdef MNH_COMPILER_NVHPC !$acc kernels loop independent collapse(2) async +#endif DO CONCURRENT (JJ=1:IJU , JK=2:IKU-1) ZDV_SOURCE(IIE+1,JJ,JK)= & (ZPHIT(IIE+1,JJ,JK) - ZPHIT(IIE+1-1,JJ,JK) - 0.5 * ( & @@ -859,7 +863,9 @@ IF(.NOT. L2D) THEN IF ( GSOUTH ) THEN !!!!!!!!!!!!!!!! FUJI compiler directive !!!!!!!!!! !!!!!!!!!!!!!!!! FUJI compiler directive !!!!!!!!!! +#ifdef MNH_COMPILER_NVHPC !$acc kernels loop independent collapse(2) async +#endif DO CONCURRENT (JI=1:IIU , JK=2:IKU-1) ZDV_SOURCE(JI,IJB,JK)= & (ZPHIT(JI,IJB,JK) - ZPHIT(JI,IJB-1,JK) - 0.5 * ( & @@ -872,7 +878,9 @@ IF(.NOT. L2D) THEN END IF ! IF ( GNORTH ) THEN +#ifdef MNH_COMPILER_NVHPC !$acc kernels loop independent collapse(2) async +#endif DO CONCURRENT (JI=1:IIU , JK=2:IKU-1) ZDV_SOURCE(JI,IJE+1,JK)= & (ZPHIT(JI,IJE+1,JK) - ZPHIT(JI,IJE+1-1,JK) - 0.5 * ( & diff --git a/src/ZSOLVER/tridiag_thermo.f90 b/src/ZSOLVER/tridiag_thermo.f90 index 8eea37276..0e6f21de9 100644 --- a/src/ZSOLVER/tridiag_thermo.f90 +++ b/src/ZSOLVER/tridiag_thermo.f90 @@ -1,4 +1,4 @@ -!MNH_LIC Copyright 2003-2020 CNRS, Meteo-France and Universite Paul Sabatier +!MNH_LIC Copyright 2003-2022 CNRS, Meteo-France and Universite Paul Sabatier !MNH_LIC This is part of the Meso-NH software governed by the CeCILL-C licence !MNH_LIC version 1. See LICENSE, CeCILL-C_V1-en.txt and CeCILL-C_V1-fr.txt !MNH_LIC for details. version 1. @@ -270,7 +270,9 @@ CALL MZM_DEVICE(PRHODJ,ZMZM_RHODJ) #ifndef MNH_BITREP ZRHODJ_DFDDTDZ_O_DZ2 = ZMZM_RHODJ*PDFDDTDZ/PDZZ**2 #else +#ifdef MNH_COMPILER_NVHPC !$acc loop independent collapse(3) +#endif DO CONCURRENT ( JI=1:JIU,JJ=1:JJU,JK=1:JKU) ZRHODJ_DFDDTDZ_O_DZ2(JI,JJ,JK) = ZMZM_RHODJ(JI,JJ,JK)*PDFDDTDZ(JI,JJ,JK)/BR_P2(PDZZ(JI,JJ,JK)) END DO !CONCURRENT @@ -290,7 +292,9 @@ ZY=0. ! --------------------------- ! !$acc kernels ! async +#ifdef MNH_COMPILER_NVHPC !$acc loop independent collapse(2) +#endif DO CONCURRENT ( JI=1:JIU,JJ=1:JJU) ZY(JI,JJ,IKB) = PRHODJ(JI,JJ,IKB)*PVARM(JI,JJ,IKB)/PTSTEP & - ZMZM_RHODJ(JI,JJ,IKB+KKL) * PF(JI,JJ,IKB+KKL)/PDZZ(JI,JJ,IKB+KKL) & @@ -301,7 +305,9 @@ END DO !CONCURRENT !$acc end kernels ! !$acc kernels ! async +#ifdef MNH_COMPILER_NVHPC !$acc loop independent collapse(3) +#endif DO CONCURRENT ( JI=1:JIU,JJ=1:JJU,JK=IKTB+1:IKTE-1) ZY(JI,JJ,JK) = PRHODJ(JI,JJ,JK)*PVARM(JI,JJ,JK)/PTSTEP & - ZMZM_RHODJ(JI,JJ,JK+KKL) * PF(JI,JJ,JK+KKL)/PDZZ(JI,JJ,JK+KKL) & @@ -314,7 +320,9 @@ END DO !CONCURRENT !$acc end kernels ! !$acc kernels ! async +#ifdef MNH_COMPILER_NVHPC !$acc loop independent collapse(2) +#endif DO CONCURRENT ( JI=1:JIU,JJ=1:JJU) ZY(JI,JJ,IKE) = PRHODJ(JI,JJ,IKE)*PVARM(JI,JJ,IKE)/PTSTEP & - ZMZM_RHODJ(JI,JJ,IKE+KKL) * PF(JI,JJ,IKE+KKL)/PDZZ(JI,JJ,IKE+KKL) & @@ -335,7 +343,9 @@ IF ( PIMPL > 1.E-10 ) THEN ! -------------- ! !$acc kernels ! async +#ifdef MNH_COMPILER_NVHPC !$acc loop independent collapse(2) +#endif DO CONCURRENT ( JI=1:JIU,JJ=1:JJU) ZB(JI,JJ,IKB) = PRHODJ(JI,JJ,IKB)/PTSTEP & - ZRHODJ_DFDDTDZ_O_DZ2(JI,JJ,IKB+KKL) * PIMPL @@ -343,14 +353,18 @@ END DO !CONCURRENT !$acc end kernels ! !$acc kernels ! async +#ifdef MNH_COMPILER_NVHPC !$acc loop independent collapse(2) +#endif DO CONCURRENT ( JI=1:JIU,JJ=1:JJU) ZC(JI,JJ,IKB) = ZRHODJ_DFDDTDZ_O_DZ2(JI,JJ,IKB+KKL) * PIMPL END DO !CONCURRENT !$acc end kernels ! !$acc kernels ! async +#ifdef MNH_COMPILER_NVHPC !$acc loop independent collapse(3) +#endif DO CONCURRENT ( JI=1:JIU,JJ=1:JJU,JK=IKTB+1:IKTE-1) ZA(JI,JJ,JK) = ZRHODJ_DFDDTDZ_O_DZ2(JI,JJ,JK) * PIMPL ZB(JI,JJ,JK) = PRHODJ(JI,JJ,JK)/PTSTEP & @@ -361,7 +375,9 @@ END DO !CONCURRENT !$acc end kernels ! !$acc kernels ! async +#ifdef MNH_COMPILER_NVHPC !$acc loop independent collapse(2) +#endif DO CONCURRENT ( JI=1:JIU,JJ=1:JJU) ZA(JI,JJ,IKE) = ZRHODJ_DFDDTDZ_O_DZ2(JI,JJ,IKE ) * PIMPL ZB(JI,JJ,IKE) = PRHODJ(JI,JJ,IKE)/PTSTEP & @@ -376,7 +392,9 @@ END DO !CONCURRENT ! -------- ! !$acc kernels +#ifdef MNH_COMPILER_NVHPC !$acc loop independent collapse(2) +#endif DO CONCURRENT ( JI=1:JIU,JJ=1:JJU) ZBET(JI,JJ) = ZB(JI,JJ,IKB) ! bet = b(ikb) PVARP(JI,JJ,IKB) = ZY(JI,JJ,IKB) / ZBET(JI,JJ) @@ -385,7 +403,9 @@ END DO !CONCURRENT !$acc loop seq DO JK = IKB+KKL,IKE-KKL,KKL ! gang+vector needed or parallisation vector only +#ifdef MNH_COMPILER_NVHPC !$acc loop independent gang, vector collapse(2) +#endif DO CONCURRENT ( JI=1:JIU,JJ=1:JJU) ZGAM(JI,JJ,JK) = ZC(JI,JJ,JK-KKL) / ZBET(JI,JJ) ! gam(k) = c(k-1) / bet @@ -396,7 +416,9 @@ DO JK = IKB+KKL,IKE-KKL,KKL END DO !CONCURRENT END DO ! special treatment for the last level +#ifdef MNH_COMPILER_NVHPC !$acc loop independent collapse(2) +#endif DO CONCURRENT ( JI=1:JIU,JJ=1:JJU) ZGAM(JI,JJ,IKE) = ZC(JI,JJ,IKE-KKL) / ZBET(JI,JJ) ! gam(k) = c(k-1) / bet @@ -412,7 +434,9 @@ END DO !CONCURRENT !$acc loop seq DO JK = IKE-KKL,IKB,-1*KKL ! gang+vector needed or parallisation vector only +#ifdef MNH_COMPILER_NVHPC !$acc loop independent gang, vector collapse(2) +#endif DO CONCURRENT ( JI=1:JIU,JJ=1:JJU) PVARP(JI,JJ,JK) = PVARP(JI,JJ,JK) - ZGAM(JI,JJ,JK+KKL) * PVARP(JI,JJ,JK+KKL) END DO !CONCURRENT @@ -422,7 +446,9 @@ END DO ELSE ! !$acc kernels +#ifdef MNH_COMPILER_NVHPC !$acc loop independent collapse(3) +#endif DO CONCURRENT ( JI=1:JIU,JJ=1:JJU,JK=IKTB:IKTE) PVARP(JI,JJ,JK) = ZY(JI,JJ,JK) * PTSTEP / PRHODJ(JI,JJ,JK) END DO !CONCURRENT @@ -435,7 +461,9 @@ END IF ! ---------------------------------------- ! !$acc kernels +#ifdef MNH_COMPILER_NVHPC !$acc loop independent collapse(2) +#endif DO CONCURRENT ( JI=1:JIU,JJ=1:JJU) PVARP(JI,JJ,KKA)=PVARP(JI,JJ,IKB) PVARP(JI,JJ,KKU)=PVARP(JI,JJ,IKE) diff --git a/src/ZSOLVER/turb.f90 b/src/ZSOLVER/turb.f90 index 80ad2311a..7915e7860 100644 --- a/src/ZSOLVER/turb.f90 +++ b/src/ZSOLVER/turb.f90 @@ -1058,7 +1058,9 @@ ENDIF ZCDUEFF(:,:) =-SQRT ( (PSFU(:,:)**2 + PSFV(:,:)**2) / & (XMNH_TINY + ZUSLOPE(:,:)**2 + ZVSLOPE(:,:)**2 ) ) #else +#ifdef MNH_COMPILER_NVHPC !$acc loop independent collapse(2) +#endif DO CONCURRENT ( JI=1:JIU,JJ=1:JJU ) ZCDUEFF(JI,JJ) =-SQRT ( (BR_P2(PSFU(JI,JJ)) + BR_P2(PSFV(JI,JJ))) / & (XMNH_TINY + BR_P2(ZUSLOPE(JI,JJ)) + BR_P2(ZVSLOPE(JI,JJ)) ) ) diff --git a/src/ZSOLVER/turb_hor_dyn_corr.f90 b/src/ZSOLVER/turb_hor_dyn_corr.f90 index 4f00eb1b2..55c14bce6 100644 --- a/src/ZSOLVER/turb_hor_dyn_corr.f90 +++ b/src/ZSOLVER/turb_hor_dyn_corr.f90 @@ -411,7 +411,9 @@ IKU = SIZE(PUM,3) #ifndef MNH_BITREP ZDIRSINZW(:,:) = SQRT( 1. - PDIRCOSZW(:,:)**2 ) #else +#ifdef MNH_COMPILER_NVHPC !$acc loop independent collapse(2) +#endif DO CONCURRENT ( JI=1:JIU,JJ=1:JJU ) ZDIRSINZW(JI,JJ) = SQRT( 1. - BR_P2(PDIRCOSZW(JI,JJ)) ) END DO @@ -449,7 +451,9 @@ CALL ADD3DFIELD_ll( TZFIELDS_ll, ZFLX, 'TURB_HOR_DYN_CORR::ZFLX' ) ! Computes the U variance IF (.NOT. L2D) THEN !$acc kernels async(2) +#ifdef MNH_COMPILER_NVHPC !$acc loop independent collapse(3) +#endif DO CONCURRENT ( JI=1:JIU,JJ=1:JJU,JK=1:JKU) ZFLX(JI,JJ,JK)= (2./3.) * PTKEM(JI,JJ,JK) & - XCMFS * PK(JI,JJ,JK) *( (4./3.) * GX_U_M_PUM(JI,JJ,JK) & @@ -626,7 +630,9 @@ ZFLX(:,:,IKB-1) = & PVSLOPEM(:,:) * PCOSSLOPE(:,:) * PSINSLOPE(:,:) * ZDIRSINZW(:,:) & - PUSLOPEM(:,:) * PCOSSLOPE(:,:)**2 * ZDIRSINZW(:,:) * PDIRCOSZW(:,:) ) #else +#ifdef MNH_COMPILER_NVHPC !$acc loop independent collapse(2) +#endif DO CONCURRENT ( JI=1:JIU,JJ=1:JJU ) ZFLX(JI,JJ,IKB-1) = & PTAU11M(JI,JJ) * BR_P2(PCOSSLOPE(JI,JJ)) * BR_P2(PDIRCOSZW(JI,JJ)) & @@ -701,7 +707,9 @@ END IF #else CALL MXF_DEVICE(PDXX, ZTMP1_DEVICE) !$acc kernels async(10) +#ifdef MNH_COMPILER_NVHPC !$acc loop independent collapse(3) +#endif DO CONCURRENT ( JI=1:JIU,JJ=1:JJU,JK=1:JKU) ZTMP2_DEVICE(JI,JJ,JK) = PRHODJ(JI,JJ,JK) * ZFLX(JI,JJ,JK) / ZTMP1_DEVICE(JI,JJ,JK) END DO !CONCURRENT @@ -714,21 +722,27 @@ CALL DXM_DEVICE(ZTMP2_DEVICE, ZTMP3_DEVICE) IF (.NOT. LFLAT) THEN CALL MZM_DEVICE(PDXX,ZTMP1_DEVICE) !$acc kernels +#ifdef MNH_COMPILER_NVHPC !$acc loop independent collapse(3) +#endif DO CONCURRENT ( JI=1:JIU,JJ=1:JJU,JK=1:JKU) ZTMP2_DEVICE(JI,JJ,JK) = PRHODJ(JI,JJ,JK) * ZFLX(JI,JJ,JK) END DO !CONCURRENT !$acc end kernels CALL MZM_DEVICE(ZTMP2_DEVICE,ZTMP4_DEVICE) !$acc kernels +#ifdef MNH_COMPILER_NVHPC !$acc loop independent collapse(3) +#endif DO CONCURRENT ( JI=1:JIU,JJ=1:JJU,JK=1:JKU) ZTMP2_DEVICE(JI,JJ,JK) = ZTMP4_DEVICE(JI,JJ,JK) * PINV_PDZZ(JI,JJ,JK) END DO !CONCURRENT !$acc end kernels CALL MXM_DEVICE( ZTMP2_DEVICE, ZTMP4_DEVICE ) !$acc kernels +#ifdef MNH_COMPILER_NVHPC !$acc loop independent collapse(3) +#endif DO CONCURRENT ( JI=1:JIU,JJ=1:JJU,JK=1:JKU) ZTMP2_DEVICE(JI,JJ,JK) = PDZX(JI,JJ,JK) / ZTMP1_DEVICE(JI,JJ,JK) * ZTMP4_DEVICE(JI,JJ,JK) END DO !CONCURRENT @@ -749,7 +763,9 @@ END IF IF (KSPLT==1) THEN ! Contribution to the dynamic production of TKE: !$acc kernels async(2) +#ifdef MNH_COMPILER_NVHPC !$acc loop independent collapse(3) +#endif DO CONCURRENT ( JI=1:JIU,JJ=1:JJU,JK=1:JKU) ZWORK(JI,JJ,JK) = - ZFLX(JI,JJ,JK) * GX_U_M_PUM(JI,JJ,JK) END DO !CONCURRENT @@ -800,7 +816,9 @@ END IF ! Computes the V variance IF (.NOT. L2D) THEN !$acc kernels async(3) +#ifdef MNH_COMPILER_NVHPC !$acc loop independent collapse(3) +#endif DO CONCURRENT ( JI=1:JIU,JJ=1:JJU,JK=1:JKU) ZFLX(JI,JJ,JK)= (2./3.) * PTKEM(JI,JJ,JK) & - XCMFS * PK(JI,JJ,JK) *( (4./3.) * GY_V_M_PVM(JI,JJ,JK) & @@ -851,7 +869,9 @@ ZFLX(:,:,IKB-1) = & PUSLOPEM(:,:) * PSINSLOPE(:,:)**2 * ZDIRSINZW(:,:) * PDIRCOSZW(:,:) & + PVSLOPEM(:,:) * PCOSSLOPE(:,:) * PSINSLOPE(:,:) * ZDIRSINZW(:,:) ) #else +#ifdef MNH_COMPILER_NVHPC !$acc loop independent collapse(2) +#endif DO CONCURRENT ( JI=1:JIU,JJ=1:JJU ) ZFLX(JI,JJ,IKB-1) = & PTAU11M(JI,JJ) * BR_P2(PSINSLOPE(JI,JJ)) * BR_P2(PDIRCOSZW(JI,JJ)) & @@ -921,7 +941,9 @@ IF (.NOT. L2D) THEN #else CALL MYF_DEVICE(PDYY, ZTMP1_DEVICE) !$acc kernels async(10) +#ifdef MNH_COMPILER_NVHPC !$acc loop independent collapse(3) +#endif DO CONCURRENT ( JI=1:JIU,JJ=1:JJU,JK=1:JKU) ZTMP2_DEVICE(JI,JJ,JK) = PRHODJ(JI,JJ,JK) * ZFLX(JI,JJ,JK) / ZTMP1_DEVICE(JI,JJ,JK) END DO !CONCURRENT @@ -934,21 +956,27 @@ IF (.NOT. L2D) THEN IF (.NOT. LFLAT) THEN CALL MZM_DEVICE(PDYY,ZTMP1_DEVICE) !$acc kernels +#ifdef MNH_COMPILER_NVHPC !$acc loop independent collapse(3) +#endif DO CONCURRENT ( JI=1:JIU,JJ=1:JJU,JK=1:JKU) ZTMP2_DEVICE(JI,JJ,JK) = PRHODJ(JI,JJ,JK) * ZFLX(JI,JJ,JK) END DO !CONCURRENT !$acc end kernels CALL MZM_DEVICE(ZTMP2_DEVICE,ZTMP4_DEVICE) !$acc kernels +#ifdef MNH_COMPILER_NVHPC !$acc loop independent collapse(3) +#endif DO CONCURRENT ( JI=1:JIU,JJ=1:JJU,JK=1:JKU) ZTMP2_DEVICE(JI,JJ,JK) = ZTMP4_DEVICE(JI,JJ,JK) * PINV_PDZZ(JI,JJ,JK) END DO !CONCURRENT !$acc end kernels CALL MYM_DEVICE( ZTMP2_DEVICE,ZTMP4_DEVICE ) !$acc kernels +#ifdef MNH_COMPILER_NVHPC !$acc loop independent collapse(3) +#endif DO CONCURRENT ( JI=1:JIU,JJ=1:JJU,JK=1:JKU) ZTMP2_DEVICE(JI,JJ,JK) = PDZY(JI,JJ,JK) / ZTMP1_DEVICE(JI,JJ,JK) * ZTMP4_DEVICE(JI,JJ,JK) END DO !CONCURRENT @@ -961,7 +989,9 @@ IF (.NOT. L2D) THEN !$acc end kernels ELSE !$acc kernels async(1) +#ifdef MNH_COMPILER_NVHPC !$acc loop independent collapse(3) +#endif DO CONCURRENT ( JI=1:JIU,JJ=1:JJU,JK=1:JKU) PRVS(JI,JJ,JK)=PRVS(JI,JJ,JK) - ZTMP3_DEVICE(JI,JJ,JK) END DO !CONCURRENT @@ -970,7 +1000,9 @@ IF (.NOT. L2D) THEN ! Contribution to the dynamic production of TKE: IF (KSPLT==1) THEN !$acc kernels async(2) +#ifdef MNH_COMPILER_NVHPC !$acc loop independent collapse(3) +#endif DO CONCURRENT ( JI=1:JIU,JJ=1:JJU,JK=1:JKU) ZWORK(JI,JJ,JK) = - ZFLX(JI,JJ,JK) * GY_V_M_PVM(JI,JJ,JK) END DO !CONCURRENT @@ -1026,7 +1058,9 @@ END IF ! Computes the W variance IF (.NOT. L2D) THEN !$acc kernels async(2) +#ifdef MNH_COMPILER_NVHPC !$acc loop independent collapse(3) +#endif DO CONCURRENT ( JI=1:JIU,JJ=1:JJU,JK=1:JKU) ZFLX(JI,JJ,JK) = (2./3.) * PTKEM(JI,JJ,JK) & - XCMFS * PK(JI,JJ,JK) *( (4./3.) * GZ_W_M_PWM(JI,JJ,JK) & @@ -1071,7 +1105,9 @@ ZFLX(:,:,IKB-1) = & + PTAU33M(:,:) * PDIRCOSZW(:,:)**2 & +2. * PCDUEFF(:,:)* PUSLOPEM(:,:) * ZDIRSINZW(:,:) * PDIRCOSZW(:,:) #else +#ifdef MNH_COMPILER_NVHPC !$acc loop independent collapse(2) +#endif DO CONCURRENT ( JI=1:JIU,JJ=1:JJU ) ZFLX(JI,JJ,IKB-1) = & PTAU11M(JI,JJ) * BR_P2(ZDIRSINZW(JI,JJ)) & @@ -1145,7 +1181,9 @@ GZ_W_M_ZWP = GZ_W_M(ZWP,PDZZ) CALL GZ_W_M_DEVICE(ZWP,PDZZ,GZ_W_M_ZWP) #endif !$acc kernels async(2) +#ifdef MNH_COMPILER_NVHPC !$acc loop independent collapse(3) +#endif DO CONCURRENT ( JI=1:JIU,JJ=1:JJU,JK=IKB+1:JKU) ZFLX(JI,JJ,JK)=ZFLX(JI,JJ,JK) & - XCMFS * PK(JI,JJ,JK) * (4./3.) * (GZ_W_M_ZWP(JI,JJ,JK) - GZ_W_M_PWM(JI,JJ,JK)) @@ -1155,7 +1193,9 @@ END DO !CONCURRENT IF (KSPLT==1) THEN !Contribution to the dynamic production of TKE: !$acc kernels async(2) +#ifdef MNH_COMPILER_NVHPC !$acc loop independent collapse(3) +#endif DO CONCURRENT ( JI=1:JIU,JJ=1:JJU,JK=1:JKU) ZWORK(JI,JJ,JK) = - ZFLX(JI,JJ,JK) * GZ_W_M_ZWP(JI,JJ,JK) END DO !CONCURRENT diff --git a/src/ZSOLVER/turb_hor_thermo_flux.f90 b/src/ZSOLVER/turb_hor_thermo_flux.f90 index 1908006f3..3c3376098 100644 --- a/src/ZSOLVER/turb_hor_thermo_flux.f90 +++ b/src/ZSOLVER/turb_hor_thermo_flux.f90 @@ -340,7 +340,9 @@ ZFLX(:,:,IKE+1) = ZFLX(:,:,IKE) CALL MXM_DEVICE( PK, ZTMP1_DEVICE ) CALL GX_M_U_DEVICE(1,IKU,1,PTHLM,PDXX,PDZZ,PDZX,ZTMP2_DEVICE) !$acc kernels +#ifdef MNH_COMPILER_NVHPC !$acc loop independent collapse(3) +#endif DO CONCURRENT ( JI=1:JIU,JJ=1:JJU,JK=1:JKU) ZFLX(JI,JJ,JK) = -XCSHF * ZTMP1_DEVICE(JI,JJ,JK) * ZTMP2_DEVICE(JI,JJ,JK) END DO @@ -406,28 +408,36 @@ END IF IF (.NOT. LFLAT) THEN CALL MXM_DEVICE(PRHODJ, ZTMP1_DEVICE) !$acc kernels +#ifdef MNH_COMPILER_NVHPC !$acc loop independent collapse(3) +#endif DO CONCURRENT ( JI=1:JIU,JJ=1:JJU,JK=1:JKU) ZTMP2_DEVICE(JI,JJ,JK) = ZTMP1_DEVICE(JI,JJ,JK) * ZFLX(JI,JJ,JK) * PINV_PDXX(JI,JJ,JK) END DO !$acc end kernels CALL DXF_DEVICE(ZTMP2_DEVICE, ZTMP3_DEVICE) !$acc kernels +#ifdef MNH_COMPILER_NVHPC !$acc loop independent collapse(3) +#endif DO CONCURRENT ( JI=1:JIU,JJ=1:JJU,JK=1:JKU) ZTMP2_DEVICE(JI,JJ,JK) = ZFLX(JI,JJ,JK) * PINV_PDXX(JI,JJ,JK) END DO !$acc end kernels CALL MZM_DEVICE(ZTMP2_DEVICE,ZTMP4_DEVICE) !$acc kernels +#ifdef MNH_COMPILER_NVHPC !$acc loop independent collapse(3) +#endif DO CONCURRENT ( JI=1:JIU,JJ=1:JJU,JK=1:JKU) ZTMP2_DEVICE(JI,JJ,JK) = PDZX(JI,JJ,JK)*ZTMP4_DEVICE(JI,JJ,JK) END DO !$acc end kernels CALL MXF_DEVICE(ZTMP2_DEVICE, ZTMP4_DEVICE) !$acc kernels +#ifdef MNH_COMPILER_NVHPC !$acc loop independent collapse(3) +#endif DO CONCURRENT ( JI=1:JIU,JJ=1:JJU,JK=1:JKU) ZTMP2_DEVICE(JI,JJ,JK) = PMZM_PRHODJ(JI,JJ,JK) * ZTMP4_DEVICE(JI,JJ,JK) * PINV_PDZZ(JI,JJ,JK) END DO @@ -773,7 +783,9 @@ END IF CALL MXM_DEVICE( PK, ZTMP1_DEVICE ) CALL GX_M_U_DEVICE(1,IKU,1,PRM(:,:,:,1),PDXX,PDZZ,PDZX,ZTMP2_DEVICE) !$acc kernels +#ifdef MNH_COMPILER_NVHPC !$acc loop independent collapse(3) +#endif DO CONCURRENT ( JI=1:JIU,JJ=1:JJU,JK=1:JKU) ZFLX(JI,JJ,JK) = -XCHF * ZTMP1_DEVICE(JI,JJ,JK) * ZTMP2_DEVICE(JI,JJ,JK) END DO @@ -812,35 +824,45 @@ END DO IF (.NOT. LFLAT) THEN CALL MXM_DEVICE(PRHODJ,ZTMP1_DEVICE) !$acc kernels +#ifdef MNH_COMPILER_NVHPC !$acc loop independent collapse(3) +#endif DO CONCURRENT ( JI=1:JIU,JJ=1:JJU,JK=1:JKU) ZTMP2_DEVICE(JI,JJ,JK) = ZTMP1_DEVICE(JI,JJ,JK) * ZFLX(JI,JJ,JK) * PINV_PDXX(JI,JJ,JK) END DO !$acc end kernels CALL DXF_DEVICE( ZTMP2_DEVICE, ZTMP3_DEVICE ) !$acc kernels +#ifdef MNH_COMPILER_NVHPC !$acc loop independent collapse(3) +#endif DO CONCURRENT ( JI=1:JIU,JJ=1:JJU,JK=1:JKU) ZTMP2_DEVICE(JI,JJ,JK) = ZFLX(JI,JJ,JK) * PINV_PDXX(JI,JJ,JK) END DO !$acc end kernels CALL MZM_DEVICE(ZTMP2_DEVICE,ZTMP4_DEVICE) !$acc kernels +#ifdef MNH_COMPILER_NVHPC !$acc loop independent collapse(3) +#endif DO CONCURRENT ( JI=1:JIU,JJ=1:JJU,JK=1:JKU) ZTMP2_DEVICE(JI,JJ,JK) = PDZX(JI,JJ,JK)*ZTMP4_DEVICE(JI,JJ,JK) END DO !$acc end kernels CALL MXF_DEVICE(ZTMP2_DEVICE,ZTMP4_DEVICE) !$acc kernels +#ifdef MNH_COMPILER_NVHPC !$acc loop independent collapse(3) +#endif DO CONCURRENT ( JI=1:JIU,JJ=1:JJU,JK=1:JKU) ZTMP2_DEVICE(JI,JJ,JK) = PMZM_PRHODJ(JI,JJ,JK) * ZTMP4_DEVICE(JI,JJ,JK) * PINV_PDZZ(JI,JJ,JK) END DO !$acc end kernels CALL DZF_DEVICE( ZTMP2_DEVICE, ZTMP4_DEVICE) !$acc kernels +#ifdef MNH_COMPILER_NVHPC !$acc loop independent collapse(3) +#endif DO CONCURRENT ( JI=1:JIU,JJ=1:JJU,JK=1:JKU) PRRS(JI,JJ,JK,1) = PRRS(JI,JJ,JK,1) - ZTMP3_DEVICE(JI,JJ,JK) + ZTMP4_DEVICE(JI,JJ,JK) END DO @@ -1148,7 +1170,9 @@ END IF CALL MYM_DEVICE( PK, ZTMP1_DEVICE ) CALL GY_M_V_DEVICE(1,IKU,1,PTHLM,PDYY,PDZZ,PDZY,ZTMP2_DEVICE) !$acc kernels +#ifdef MNH_COMPILER_NVHPC !$acc loop independent collapse(3) +#endif DO CONCURRENT ( JI=1:JIU,JJ=1:JJU,JK=1:JKU) ZFLX(JI,JJ,JK) = -XCSHF * ZTMP1_DEVICE(JI,JJ,JK) * ZTMP2_DEVICE(JI,JJ,JK) END DO @@ -1195,35 +1219,45 @@ IF (.NOT. L2D) THEN IF (.NOT. LFLAT) THEN CALL MYM_DEVICE(PRHODJ, ZTMP1_DEVICE) !$acc kernels +#ifdef MNH_COMPILER_NVHPC !$acc loop independent collapse(3) +#endif DO CONCURRENT ( JI=1:JIU,JJ=1:JJU,JK=1:JKU) ZTMP2_DEVICE(JI,JJ,JK) = ZTMP1_DEVICE(JI,JJ,JK) * ZFLX(JI,JJ,JK) * PINV_PDYY(JI,JJ,JK) END DO !$acc end kernels CALL DYF_DEVICE( ZTMP2_DEVICE, ZTMP3_DEVICE ) !$acc kernels +#ifdef MNH_COMPILER_NVHPC !$acc loop independent collapse(3) +#endif DO CONCURRENT ( JI=1:JIU,JJ=1:JJU,JK=1:JKU) ZTMP1_DEVICE(JI,JJ,JK) = ZFLX(JI,JJ,JK) * PINV_PDYY(JI,JJ,JK) END DO !$acc end kernels CALL MZM_DEVICE(ZTMP1_DEVICE, ZTMP2_DEVICE) !$acc kernels +#ifdef MNH_COMPILER_NVHPC !$acc loop independent collapse(3) +#endif DO CONCURRENT ( JI=1:JIU,JJ=1:JJU,JK=1:JKU) ZTMP1_DEVICE(JI,JJ,JK) = PDZY(JI,JJ,JK)*ZTMP2_DEVICE(JI,JJ,JK) END DO !$acc end kernels CALL MYF_DEVICE(ZTMP1_DEVICE, ZTMP2_DEVICE) !$acc kernels +#ifdef MNH_COMPILER_NVHPC !$acc loop independent collapse(3) +#endif DO CONCURRENT ( JI=1:JIU,JJ=1:JJU,JK=1:JKU) ZTMP1_DEVICE(JI,JJ,JK) = PMZM_PRHODJ(JI,JJ,JK) * ZTMP2_DEVICE(JI,JJ,JK) * PINV_PDZZ(JI,JJ,JK) END DO !$acc end kernels CALL DZF_DEVICE( ZTMP1_DEVICE, ZTMP2_DEVICE ) !$acc kernels +#ifdef MNH_COMPILER_NVHPC !$acc loop independent collapse(3) +#endif DO CONCURRENT ( JI=1:JIU,JJ=1:JJU,JK=1:JKU) PRTHLS(JI,JJ,JK) = PRTHLS(JI,JJ,JK) - ZTMP3_DEVICE(JI,JJ,JK) + ZTMP2_DEVICE(JI,JJ,JK) END DO @@ -1515,7 +1549,9 @@ IF (KRR/=0) THEN CALL MYM_DEVICE( PK, ZTMP1_DEVICE ) CALL GY_M_V_DEVICE(1,IKU,1,PRM(:,:,:,1),PDYY,PDZZ,PDZY, ZTMP2_DEVICE) !$acc kernels +#ifdef MNH_COMPILER_NVHPC !$acc loop independent collapse(3) +#endif DO CONCURRENT ( JI=1:JIU,JJ=1:JJU,JK=1:JKU) ZFLX(JI,JJ,JK) = -XCHF * ZTMP1_DEVICE(JI,JJ,JK) * ZTMP2_DEVICE(JI,JJ,JK) END DO !CONCURRENT @@ -1561,7 +1597,9 @@ IF (KRR/=0) THEN IF (.NOT. LFLAT) THEN CALL MYM_DEVICE(PRHODJ, ZTMP1_DEVICE) !$acc kernels +#ifdef MNH_COMPILER_NVHPC !$acc loop independent collapse(3) +#endif DO CONCURRENT ( JI=1:JIU,JJ=1:JJU,JK=1:JKU) ZTMP2_DEVICE(JI,JJ,JK) = ZTMP1_DEVICE(JI,JJ,JK) * ZFLX(JI,JJ,JK) * PINV_PDYY(JI,JJ,JK) END DO @@ -1569,21 +1607,27 @@ IF (KRR/=0) THEN CALL DYF_DEVICE( ZTMP2_DEVICE, ZTMP3_DEVICE ) ! !$acc kernels +#ifdef MNH_COMPILER_NVHPC !$acc loop independent collapse(3) +#endif DO CONCURRENT ( JI=1:JIU,JJ=1:JJU,JK=1:JKU) ZTMP1_DEVICE(JI,JJ,JK) = ZFLX(JI,JJ,JK) * PINV_PDYY(JI,JJ,JK) END DO !$acc end kernels CALL MZM_DEVICE(ZTMP1_DEVICE,ZTMP2_DEVICE) !$acc kernels +#ifdef MNH_COMPILER_NVHPC !$acc loop independent collapse(3) +#endif DO CONCURRENT ( JI=1:JIU,JJ=1:JJU,JK=1:JKU) ZTMP1_DEVICE(JI,JJ,JK) = PDZY(JI,JJ,JK)*ZTMP2_DEVICE(JI,JJ,JK) END DO !$acc end kernels CALL MYF_DEVICE(ZTMP1_DEVICE,ZTMP2_DEVICE) !$acc kernels +#ifdef MNH_COMPILER_NVHPC !$acc loop independent collapse(3) +#endif DO CONCURRENT ( JI=1:JIU,JJ=1:JJU,JK=1:JKU) ZTMP1_DEVICE(JI,JJ,JK) = PMZM_PRHODJ(JI,JJ,JK) * ZTMP2_DEVICE(JI,JJ,JK) * PINV_PDZZ(JI,JJ,JK) END DO @@ -1591,7 +1635,9 @@ IF (KRR/=0) THEN CALL DZF_DEVICE( ZTMP1_DEVICE, ZTMP2_DEVICE ) ! !$acc kernels +#ifdef MNH_COMPILER_NVHPC !$acc loop independent collapse(3) +#endif DO CONCURRENT ( JI=1:JIU,JJ=1:JJU,JK=1:JKU) PRRS(JI,JJ,JK,1) = PRRS(JI,JJ,JK,1) - ZTMP3_DEVICE(JI,JJ,JK) + ZTMP2_DEVICE(JI,JJ,JK) END DO -- GitLab