From ff91204dee2d1b8a4e3ed819bd94f1cdcc51c84a Mon Sep 17 00:00:00 2001 From: Juan ESCOBAR <juan.escobar@aero.obs-mip.fr> Date: Fri, 26 Aug 2022 15:40:02 +0000 Subject: [PATCH] Juan 26/08/2022:MNH/*.f90 , BitRepro SOurce for Cray/AMD + mnh_expand+LOOP+OPENACC macro --- src/MNH/advec_ppm_algo.f90 | 8 + src/MNH/advection_metsv.f90 | 15 +- src/MNH/ground_paramn.f90 | 6 +- src/MNH/ice4_sedimentation_split.f90 | 8 +- src/MNH/modeln.f90 | 7 + src/MNH/modi_bitrepj.f90 | 66 +++ src/MNH/ppm.f90 | 240 +++++----- src/MNH/prandtl.f90 | 269 +++++------ src/MNH/rain_ice.f90 | 2 +- src/MNH/rain_ice_fast_rg.f90 | 60 ++- src/MNH/rain_ice_fast_ri.f90 | 44 +- src/MNH/rain_ice_fast_rs.f90 | 30 +- src/MNH/rain_ice_sedimentation_split.f90 | 49 +- src/MNH/rain_ice_warm.f90 | 21 +- src/MNH/rotate_wind.f90 | 4 +- src/MNH/slow_terms.f90 | 16 +- src/MNH/tridiag_w.f90 | 102 ++--- src/MNH/turb.f90 | 10 + src/MNH/turb_hor_dyn_corr.f90 | 550 ++++++++++------------- src/MNH/turb_hor_sv_corr.f90 | 8 +- 20 files changed, 748 insertions(+), 767 deletions(-) create mode 100644 src/MNH/modi_bitrepj.f90 diff --git a/src/MNH/advec_ppm_algo.f90 b/src/MNH/advec_ppm_algo.f90 index b594d954a..c26b3e092 100644 --- a/src/MNH/advec_ppm_algo.f90 +++ b/src/MNH/advec_ppm_algo.f90 @@ -88,6 +88,10 @@ use mode_msg USE MODI_SHUMAN USE MODI_PPM ! +#ifdef MNH_BITREP_OMP +USE MODI_BITREPZ +#endif +! IMPLICIT NONE ! !* 0.1 Declarations of dummy arguments : @@ -456,6 +460,10 @@ END SELECT PSRC = (PSRC - PFIELDT)*PRHODJ/PTSTEP_PPM !$acc end kernels ! +#ifdef MNH_BITREP_OMP +CALL SBR_FZ(PSRC(:,:,:)) +#endif +! IF (MPPDB_INITIALIZED) THEN !Check all OUT arrays CALL MPPDB_CHECK(PSRC,"ADVEC_PPM_ALGO end:PSRC") diff --git a/src/MNH/advection_metsv.f90 b/src/MNH/advection_metsv.f90 index 6b5041b15..d54409639 100644 --- a/src/MNH/advection_metsv.f90 +++ b/src/MNH/advection_metsv.f90 @@ -187,9 +187,14 @@ use mode_sum_ll, only: MAX_ll use mode_tools_ll, only: GET_INDICE_ll, lnorth_ll, lsouth_ll, least_ll, lwest_ll ! USE MODI_ADV_BOUNDARIES -#ifdef MNH_BITREP +#if defined(MNH_BITREP) || defined(MNH_BITREP_OMP) USE MODI_BITREP #endif +#ifdef MNH_BITREP_OMP +!$mnh_undef(LOOP) +!$mnh_undef(OPENACC) +#endif + USE MODI_CONTRAV USE MODI_GET_HALO USE MODI_PPM_RHODJ @@ -532,7 +537,8 @@ IF (.NOT. L1D) THEN !$acc end kernels IF (LIBM) THEN !$acc kernels -#ifndef MNH_BITREP +!$mnh_expand_array(JI=IIB:IIE,JJ=IJB:IJE,JK=1:JKU) +#if !defined(MNH_BITREP) && !defined(MNH_BITREP_OMP) ZCFLU(IIB:IIE,IJB:IJE,:) = ZCFLU(IIB:IIE,IJB:IJE,:)*(1.-exp(-(XIBM_LS(IIB:IIE,IJB:IJE,:,2)/& (XRHODJ(IIB:IIE,IJB:IJE,:)/XRHODREF(IIB:IIE,IJB:IJE,:))**(1./3.))**2.)) ZCFLV(IIB:IIE,IJB:IJE,:) = ZCFLV(IIB:IIE,IJB:IJE,:)*(1.-exp(-(XIBM_LS(IIB:IIE,IJB:IJE,:,3)/& @@ -547,12 +553,13 @@ IF (.NOT. L1D) THEN ZCFLW(IIB:IIE,IJB:IJE,:) = ZCFLW(IIB:IIE,IJB:IJE,:)*(1.-Br_exp(-Br_pow(XIBM_LS(IIB:IIE,IJB:IJE,:,4)/& Br_pow(XRHODJ(IIB:IIE,IJB:IJE,:)/XRHODREF(IIB:IIE,IJB:IJE,:),1./3.),2.))) #endif +!$mnh_end_expand_array() WHERE (XIBM_LS(IIB:IIE,IJB:IJE,:,2).GT.(-ZIBM_EPSI)) ZCFLU(IIB:IIE,IJB:IJE,:)=0. WHERE (XIBM_LS(IIB:IIE,IJB:IJE,:,3).GT.(-ZIBM_EPSI)) ZCFLV(IIB:IIE,IJB:IJE,:)=0. WHERE (XIBM_LS(IIB:IIE,IJB:IJE,:,4).GT.(-ZIBM_EPSI)) ZCFLW(IIB:IIE,IJB:IJE,:)=0. !$acc end kernels ENDIF -#ifndef MNH_BITREP +#if !defined(MNH_BITREP) && !defined(MNH_BITREP_OMP) IF (.NOT. L2D) THEN !$acc kernels present_cr(ZCFL) ZCFL(:,:,:) = SQRT(ZCFLU(:,:,:)**2+ZCFLV(:,:,:)**2+ZCFLW(:,:,:)**2) @@ -583,7 +590,7 @@ ELSE !$acc kernels ZCFLU(:,:,:) = 0.0 ; ZCFLV(:,:,:) = 0.0 ; ZCFLW(:,:,:) = 0.0 ZCFLW(IIB:IIE,IJB:IJE,:) = ABS(ZRWCPPM(IIB:IIE,IJB:IJE,:) * PTSTEP) -#ifndef MNH_BITREP +#if !defined(MNH_BITREP) && !defined(MNH_BITREP_OMP) ZCFL(:,:,:) = SQRT(ZCFLW(:,:,:)**2) #else !$acc_nv loop independent collapse(3) diff --git a/src/MNH/ground_paramn.f90 b/src/MNH/ground_paramn.f90 index 34b79393c..292f3ee77 100644 --- a/src/MNH/ground_paramn.f90 +++ b/src/MNH/ground_paramn.f90 @@ -187,7 +187,7 @@ USE MODD_TIME_n USE MODD_TIME ! USE MODD_PARAM_LIMA, ONLY : MSEDC=>LSEDC -#ifdef MNH_BITREP +#if defined(MNH_BITREP) || defined(MNH_BITREP_OMP) USE MODI_BITREP #endif ! @@ -489,7 +489,7 @@ CALL ROTATE_WIND(XUT,XVT,XWT, & ! 1.4 zonal and meridian components of the wind parallel to the slope ! --------------------------------------------------------------- ! -#ifndef MNH_BITREP +#if !defined(MNH_BITREP) && !defined(MNH_BITREP_OMP) ZWIND(:,:) = SQRT( ZUA**2 + ZVA**2 ) #else ZWIND(:,:) = SQRT( BR_P2(ZUA) + BR_P2(ZVA) ) @@ -723,7 +723,7 @@ PSFU(:,:) = 0. PSFV(:,:) = 0. ! WHERE (ZSFU(:,:)/=XUNDEF .AND. ZWIND(:,:)>0.) -#ifndef MNH_BITREP +#if !defined(MNH_BITREP) && !defined(MNH_BITREP_OMP) PSFU(:,:) = - SQRT(ZSFU**2+ZSFV**2) * ZUA(:,:) / ZWIND(:,:) / XRHODREF(:,:,IKB) PSFV(:,:) = - SQRT(ZSFU**2+ZSFV**2) * ZVA(:,:) / ZWIND(:,:) / XRHODREF(:,:,IKB) #else diff --git a/src/MNH/ice4_sedimentation_split.f90 b/src/MNH/ice4_sedimentation_split.f90 index 1b19e954f..df9b72594 100644 --- a/src/MNH/ice4_sedimentation_split.f90 +++ b/src/MNH/ice4_sedimentation_split.f90 @@ -496,7 +496,7 @@ USE MODE_MNH_ZWORK, ONLY: MNH_MEM_GET, MNH_MEM_POSITION_PIN, MNH_MEM_RELEAS use mode_mppdb use mode_msg -#ifdef MNH_BITREP +#if defined(MNH_BITREP) || defined(MNH_BITREP_OMP) USE MODI_BITREP #endif ! @@ -652,7 +652,7 @@ DO WHILE (ANY(ZREMAINT>0.)) IF(PRXT(JI,JJ,JK)>XRTMIN(KSPE)) THEN ZZWLBDC = PLBC(JI,JJ,JK) * PCONC3D(JI,JJ,JK) / & (PRHODREF(JI,JJ,JK) * PRXT(JI,JJ,JK)) -#ifndef MNH_BITREP +#if !defined(MNH_BITREP) && !defined(MNH_BITREP_OMP) ZZWLBDC = ZZWLBDC**XLBEXC ZRAY = PRAY(JI,JJ,JK) / ZZWLBDC ZZT = PTHT(JI,JJ,JK) * (PPABST(JI,JJ,JK)/XP00)**(XRD/XCPD) @@ -682,7 +682,7 @@ DO WHILE (ANY(ZREMAINT>0.)) JJ=I2(JL) JK=I3(JL) IF(PRXT(JI, JJ, JK) .GT. MAX(XRTMIN(4), 1.0E-7)) THEN -#ifndef MNH_BITREP +#if !defined(MNH_BITREP) && !defined(MNH_BITREP_OMP) ZWSED(JI, JJ, JK) = XFSEDI * PRXT(JI, JJ, JK) * & & PRHODREF(JI,JJ,JK)**(1.-XCEXVT) * & ! McF&H & MAX( 0.05E6,-0.15319E6-0.021454E6* & @@ -733,7 +733,7 @@ DO WHILE (ANY(ZREMAINT>0.)) JJ=I2(JL) JK=I3(JL) IF(PRXT(JI,JJ,JK)>XRTMIN(KSPE)) THEN -#ifndef MNH_BITREP +#if !defined(MNH_BITREP) && !defined(MNH_BITREP_OMP) ZWSED(JI, JJ, JK) = ZFSED * PRXT(JI, JJ, JK)**ZEXSED & * PRHODREF(JI, JJ, JK)**(ZEXSED-XCEXVT) #else diff --git a/src/MNH/modeln.f90 b/src/MNH/modeln.f90 index d20227895..321e097e1 100644 --- a/src/MNH/modeln.f90 +++ b/src/MNH/modeln.f90 @@ -459,6 +459,9 @@ USE MODI_WRITE_LFIFM_n USE MODI_WRITE_SERIES_n USE MODI_WRITE_STATION_n USE MODI_WRITE_SURF_ATM_N +#ifdef MNH_BITREP_OMP +USE MODI_BITREPZ +#endif ! IMPLICIT NONE ! @@ -1635,6 +1638,10 @@ XTIME_LES_BU_PROCESS = 0. !$acc data copyin (XTKET, XRSVS_CLD) & !$acc & copy (XRTKES, XRSVS) & !$acc & copyout(XRTKEMS) +#ifdef MNH_BITREP_OMP +CALL SBR_FZ(XRRS_CLD) +CALL SBR_FZ(XRT) +#endif CALL ADVECTION_METSV ( TZBAKFILE, CUVW_ADV_SCHEME, & CMET_ADV_SCHEME, CSV_ADV_SCHEME, CCLOUD, NSPLIT, & LSPLIT_CFL, XSPLIT_CFL, LCFL_WRIT, & diff --git a/src/MNH/modi_bitrepj.f90 b/src/MNH/modi_bitrepj.f90 new file mode 100644 index 000000000..8ee254ae3 --- /dev/null +++ b/src/MNH/modi_bitrepj.f90 @@ -0,0 +1,66 @@ +MODULE MODI_BITREPZ + + IMPLICIT NONE + + REAL, PARAMETER :: XMIN = 2.0_8**(-1020/4) + + INTERFACE SBR_FZ + MODULE PROCEDURE SBR_FZ3D,SBR_FZ4D + END INTERFACE SBR_FZ + +CONTAINS + + SUBROUTINE SBR_FZ3D(PR) + REAL,DIMENSION(:,:,:) :: PR + + INTEGER :: JI,JJ,JK,IIU,IJU,IKU + + IIU = SIZE(PR,1) + IJU = SIZE(PR,2) + IKU = SIZE(PR,3) + + !$acc kernels + !$mnh_expand_where(ji=1:iiu,jj=1:iju,jk=1:iku) + WHERE (ABS(PR(:,:,:)) < XMIN ) + PR(:,:,:)=sign(0.0,PR(:,:,:)) + END WHERE + !$mnh_end_expand_where() + !$acc end kernels + + END SUBROUTINE SBR_FZ3D + + SUBROUTINE SBR_FZ4D(PR) + REAL,DIMENSION(:,:,:,:) :: PR + + INTEGER :: JI,JJ,JK,JR,IIU,IJU,IKU,IRU + + IIU = SIZE(PR,1) + IJU = SIZE(PR,2) + IKU = SIZE(PR,3) + IRU = SIZE(PR,4) + + !$acc kernels + !$mnh_expand_where(ji=1:iiu,jj=1:iju,jk=1:iku,jr=1:iru) + WHERE (ABS(PR(:,:,:,:)) < XMIN ) + PR(:,:,:,:)=sign(0.0,PR(:,:,:,:)) + END WHERE + !$mnh_end_expand_where() + !$acc end kernels + + END SUBROUTINE SBR_FZ4D + + ELEMENTAL FUNCTION BR_FZ(PVAL) + !$acc routine seq + ! + REAL, INTENT(IN) :: PVAL + REAL :: BR_FZ + ! + IF ( ABS(PVAL) < 2.225*(10.0**(-308)) ) THEN + BR_FZ=sign(0.0,PVAL) + ELSE + BR_FZ=PVAL + ENDIF + ! + END FUNCTION BR_FZ + +END MODULE MODI_BITREPZ diff --git a/src/MNH/ppm.f90 b/src/MNH/ppm.f90 index 6dcd0ae4d..5e170e440 100644 --- a/src/MNH/ppm.f90 +++ b/src/MNH/ppm.f90 @@ -10,8 +10,7 @@ ! P. Wautelet 18/07/2019: OpenACC: remove use of macros for dif2x/y/z !----------------------------------------------------------------- #ifdef MNH_OPENACC -! -! inline shuman with macro +!! inline shuman with macro ! !#define dxf(PDXF,PA) PDXF(1:IIU-1,:,:) = PA(2:IIU,:,:) - PA(1:IIU-1,:,:) ; PDXF(IIU,:,:) = PDXF(2*JPHEXT,:,:) ! DXF(PDXF,PA) !#define dyf(PDYF,PA) PDYF(:,1:IJU-1,:) = PA(:,2:IJU,:) - PA(:,1:IJU-1,:); PDYF(:,IJU,:) = PDYF(:,2*JPHEXT,:) ! DYF(PDYF,PA) @@ -352,9 +351,12 @@ use mode_mppdb use mode_msg #endif -#ifdef MNH_BITREP +#if defined(MNH_BITREP) || defined(MNH_BITREP_OMP) USE MODI_BITREP #endif +#ifdef MNH_BITREP_OMP +USE MODI_BITREPZ +#endif USE MODI_GET_HALO #ifndef MNH_OPENACC USE MODI_SHUMAN @@ -416,7 +418,11 @@ REAL, DIMENSION(:,:,:), POINTER, CONTIGUOUS :: ZFPOS, ZFNEG INTEGER :: IJS,IJN LOGICAL :: GWEST , GEAST !------------------------------------------------------------------------------- - +! +#ifdef MNH_BITREP_OMP +CALL SBR_FZ(PSRC(:,:,:)) +#endif +! IF (MPPDB_INITIALIZED) THEN !Check all IN arrays CALL MPPDB_CHECK(PCR, "PPM_01_X beg:PCR") @@ -480,9 +486,8 @@ ZFNEG(:,:,:) = PSRC(:,:,:) #else CALL GET_HALO_D(PSRC,HDIR="01_X", HNAME='UPDATE_HALO_ll::GET_HALO::PSRC') ! -!$acc kernels -!$acc_nv loop independent collapse(3) -DO CONCURRENT ( ji = 1:iiu , jj = 1:iju , jk = 1:iku ) +!$acc kernels present_cr(ZFPOS,ZFNEG,PSRC,PR,ZQL,zqr,zdq,ZQ6,ZDMQ,ZQL0,ZQR0,ZQ60) +!$mnh_do_concurrent( ji = 1:iiu , jj = 1:iju , jk = 1:iku ) PR (ji, jj, jk ) = PSRC(ji, jj, jk ) ZQL (ji, jj, jk ) = PSRC(ji, jj, jk ) ZQR (ji, jj, jk ) = PSRC(ji, jj, jk ) @@ -492,7 +497,7 @@ DO CONCURRENT ( ji = 1:iiu , jj = 1:iju , jk = 1:iku ) ZQL0 (ji, jj, jk ) = PSRC(ji, jj, jk ) ZQR0 (ji, jj, jk ) = PSRC(ji, jj, jk ) ZQ60 (ji, jj, jk ) = PSRC(ji, jj, jk ) - END DO + !$mnh_end_do() ! ZFPOS(:,:,:) = PSRC(:,:,:) ZFNEG(:,:,:) = PSRC(:,:,:) @@ -584,7 +589,7 @@ CASE ('CYCL','WALL') ! In that case one must have HLBCX(1) == HLBCX(2) ZQL(:,IJS:IJN,:) = PSRC(:,IJS:IJN,:) ZQR(:,IJS:IJN,:) = PSRC(:,IJS:IJN,:) ZQ6(:,IJS:IJN,:) = 0.0 -#ifndef MNH_BITREP +#if !defined(MNH_BITREP) && !defined(MNH_BITREP_OMP) ELSEWHERE ( ZQ60(:,IJS:IJN,:)*ZDQ(:,IJS:IJN,:) < -(ZDQ(:,IJS:IJN,:))**2 ) #else ELSEWHERE ( ZQ60(:,IJS:IJN,:)*ZDQ(:,IJS:IJN,:) < -BR_P2(ZDQ(:,IJS:IJN,:)) ) @@ -592,7 +597,7 @@ CASE ('CYCL','WALL') ! In that case one must have HLBCX(1) == HLBCX(2) ZQ6(:,IJS:IJN,:) = 3.0*(ZQL0(:,IJS:IJN,:) - PSRC(:,IJS:IJN,:)) ZQR(:,IJS:IJN,:) = ZQL0(:,IJS:IJN,:) - ZQ6(:,IJS:IJN,:) ZQL(:,IJS:IJN,:) = ZQL0(:,IJS:IJN,:) -#ifndef MNH_BITREP +#if !defined(MNH_BITREP) && !defined(MNH_BITREP_OMP) ELSEWHERE ( ZQ60(:,IJS:IJN,:)*ZDQ(:,IJS:IJN,:) > (ZDQ(:,IJS:IJN,:))**2 ) #else ELSEWHERE ( ZQ60(:,IJS:IJN,:)*ZDQ(:,IJS:IJN,:) > BR_P2(ZDQ(:,IJS:IJN,:)) ) @@ -669,9 +674,8 @@ CASE('OPEN') ! ! ZDMQ(i) = Fct[ ZDMQ(i),PSRC(i),PSRC(i-1),PSRC(i+1) ] ! -!$acc kernels -!$acc_nv loop independent collapse(3) - DO CONCURRENT (ji = iib:iie , jj = ijs:ijn , jk = 1:iku ) +!$acc kernels present_cr(psrc,zdmq) + !$mnh_do_concurrent(ji = iib:iie , jj = ijs:ijn , jk = 1:iku ) ZDMQ(ji, jj, jk ) = SIGN( & MIN( ABS(ZDMQ(ji, jj, jk )), & 2.0 * ( PSRC(ji, jj, jk ) & @@ -679,7 +683,7 @@ CASE('OPEN') 2.0 * (-PSRC(ji, jj, jk ) & + MAX(PSRC(ji - 1, jj, jk ),PSRC(ji, jj, jk ),PSRC(ji + 1, jj, jk )) ) ), & ZDMQ(ji, jj, jk ) ) - END DO + !$mnh_end_do() ! ! WEST BOUND ! @@ -706,16 +710,15 @@ CASE('OPEN') !$acc end kernels CALL GET_HALO_D(ZDMQ, HDIR="01_X", HNAME='UPDATE_HALO_ll::GET_HALO::ZDMQ') #endif -!$acc kernels +!$acc kernels present_cr(zdmq,zql0) ! ! calculate qL and qR ! ! ZQL0(i) = Fct[ PSRC(i),PSRC(i-1),ZDMQ(i),ZDMQ(i-1) ] ! -!$acc_nv loop independent collapse(3) - DO CONCURRENT (ji = iib:iie + 1 , jj = ijs:ijn , jk = 1:iku ) + !$mnh_do_concurrent(ji = iib:iie + 1 , jj = ijs:ijn , jk = 1:iku ) ZQL0(ji, jj, jk ) = 0.5 * ( PSRC(ji, jj, jk ) + PSRC(ji-1, jj, jk ) ) - ( ZDMQ(ji, jj, jk ) - ZDMQ(ji-1, jj, jk ) ) / 6.0 - END DO + !$mnh_end_do() ! #ifndef MNH_OPENACC CALL GET_HALO(ZQL0, HNAME='ZQL0') @@ -782,9 +785,8 @@ CASE('OPEN') ! ZDQ(:,IJS:IJN,:) = ZQR(:,IJS:IJN,:) - ZQL(:,IJS:IJN,:) #else -!$acc kernels present( ZQL0, ZQR0 ) -!$acc_nv loop independent collapse(3) -DO CONCURRENT (I=1:IIU , J = IJS:IJN , K=1:IKU ) +!$acc kernels present(ZQL0,ZQR0) present_cr(psrc,zq60,ZDQ,ZQL,ZQR,ZQ6) +!$mnh_do_concurrent(I=1:IIU , J = IJS:IJN , K=1:IKU ) ! ! determine initial coefficients of the parabolae ! @@ -816,7 +818,7 @@ DO CONCURRENT (I=1:IIU , J = IJS:IJN , K=1:IKU ) ! recalculate coefficients of the parabolae ! ZDQ(I,J,K) = ZQR(I,J,K) - ZQL(I,J,K) -END DO +!$mnh_end_do() !$acc end kernels #endif ! @@ -828,13 +830,12 @@ END DO !!$ ZFPOS(IIB+1:IIE+1,:,:) = ZQR(IIB:IIE,:,:) - 0.5*PCR(IIB+1:IIE+1,:,:) * & !!$ (ZDQ(IIB:IIE,:,:) - (1.0 - 2.0*PCR(IIB+1:IIE+1,:,:)/3.0) & !!$ * ZQ6(IIB:IIE,:,:)) -!$acc kernels -!$acc_nv loop independent collapse(3) -DO CONCURRENT ( ji = iib:iie + 1 , jj = ijs:ijn , jk = 1:iku ) +!$acc kernels present_cr(zq6,zfpos) +!$mnh_do_concurrent( ji = iib:iie + 1 , jj = ijs:ijn , jk = 1:iku ) ZFPOS(ji, jj, jk ) = ZQR(ji - 1, jj, jk ) - 0.5 * PCR(ji, jj, jk ) & * ( ZDQ(ji - 1, jj, jk) - (1.0 - 2.0 * PCR(ji, jj, jk ) / 3.0 ) & * ZQ6(ji - 1, jj, jk) ) -END DO +!$mnh_end_do() ! ! #ifndef MNH_OPENACC @@ -862,12 +863,11 @@ END DO !!$ ZFNEG(IIB-1:IIE,:,:) = ZQL(IIB-1:IIE,:,:) - 0.5*PCR(IIB-1:IIE,:,:) * & !!$ (ZDQ(IIB-1:IIE,:,:) + (1.0 + 2.0*PCR(IIB-1:IIE,:,:)/3.0) & !!$ * ZQ6(IIB-1:IIE,:,:)) -!$acc kernels -!$acc_nv loop independent collapse(3) -DO CONCURRENT ( ji = 1:iiu , jj = ijs:ijn , jk = 1:iku ) +!$acc kernels present_cr(zq6,zfneg) +!$mnh_do_concurrent( ji = 1:iiu , jj = ijs:ijn , jk = 1:iku ) ZFNEG(ji, jj, jk ) = ZQL(ji, jj, jk ) - 0.5*PCR(ji, jj, jk ) * & ( ZDQ(ji, jj, jk ) + (1.0 + 2.0*PCR(ji, jj, jk )/3.0) * ZQ6(ji, jj, jk ) ) -END DO +!$mnh_end_do() ! #ifndef MNH_OPENACC CALL GET_HALO(ZFNEG, HNAME='ZFNEG') @@ -909,6 +909,10 @@ END DO ! END SELECT ! +#ifdef MNH_BITREP_OMP +CALL SBR_FZ(PR(:,:,:)) +#endif +! IF (MPPDB_INITIALIZED) THEN !Check all INOUT arrays CALL MPPDB_CHECK(PSRC,"PPM_01_X end:PSRC") @@ -1030,9 +1034,12 @@ use mode_mppdb use mode_msg #endif -#ifdef MNH_BITREP +#if defined(MNH_BITREP) || defined(MNH_BITREP_OMP) USE MODI_BITREP #endif +#ifdef MNH_BITREP_OMP +USE MODI_BITREPZ +#endif USE MODI_GET_HALO #ifndef MNH_OPENACC USE MODI_SHUMAN @@ -1102,7 +1109,11 @@ INTEGER :: IJN,IJS #endif integer :: ji, jj, jk !------------------------------------------------------------------------------- - +! +#ifdef MNH_BITREP_OMP +CALL SBR_FZ(PSRC(:,:,:)) +#endif +! IF (MPPDB_INITIALIZED) THEN !Check all IN arrays CALL MPPDB_CHECK(PCR, "PPM_01_Y beg:PCR") @@ -1166,9 +1177,8 @@ CALL GET_HALO_D(PSRC, HDIR="01_Y", HNAME='UPDATE_HALO_ll::GET_HALO::PSRC') !------------------------------------------------------------------------------- ! ! -!$acc kernels -!$acc_nv loop independent collapse(3) -DO CONCURRENT ( ji = 1:iiu , jj = 1:iju , jk = 1:iku ) +!$acc kernels present_cr(ZFPOS,ZFNEG,PSRC,PR,ZQL,ZQR,ZDQ,ZQ6,ZDMQ,ZQL0,ZQR0,ZQ60) +!$mnh_do_concurrent( ji = 1:iiu , jj = 1:iju , jk = 1:iku ) PR (ji, jj, jk ) = PSRC(ji, jj, jk ) ZQL (ji, jj, jk ) = PSRC(ji, jj, jk ) ZQR (ji, jj, jk ) = PSRC(ji, jj, jk ) @@ -1178,7 +1188,7 @@ DO CONCURRENT ( ji = 1:iiu , jj = 1:iju , jk = 1:iku ) ZQL0 (ji, jj, jk ) = PSRC(ji, jj, jk ) ZQR0 (ji, jj, jk ) = PSRC(ji, jj, jk ) ZQ60 (ji, jj, jk ) = PSRC(ji, jj, jk ) -END DO +!$mnh_end_do() ZFPOS(:,:,:) = PSRC(:,:,:) ZFNEG(:,:,:) = PSRC(:,:,:) !$acc end kernels @@ -1198,7 +1208,7 @@ CASE ('CYCL','WALL') ! In that case one must have HLBCY(1) == HLBCY(2) ! ! ZDMQ(j) = Fct[ ZDMQ(j),PSRC(j),PSRC(j-1),PSRC(j+1) ] ! -!$acc kernels +!$acc kernels present_cr(ZDMQ,PSRC) ZDMQ(IIW:IIA,IJB:IJE,:) = & SIGN( (MIN( ABS(ZDMQ(IIW:IIA,IJB:IJE,:)),2.0*(PSRC(IIW:IIA,IJB:IJE,:) - & MIN(PSRC(IIW:IIA,IJB-1:IJE-1,:),PSRC(IIW:IIA,IJB:IJE,:),PSRC(IIW:IIA,IJB+1:IJE+1,:))), & @@ -1228,7 +1238,7 @@ CASE ('CYCL','WALL') ! In that case one must have HLBCY(1) == HLBCY(2) #else !$acc end kernels CALL GET_HALO_D(ZDMQ,HDIR="01_Y", HNAME='UPDATE_HALO_ll::GET_HALO::ZDMQ') -!$acc kernels +!$acc kernels present_cr(ZQL0,PSRC,ZDMQ) #endif ! ! calculate qL and qR with the modified dmq @@ -1236,14 +1246,13 @@ CASE ('CYCL','WALL') ! In that case one must have HLBCY(1) == HLBCY(2) ! ZQL0(IIW:IIA,IJB:IJE+1,:) = 0.5*(PSRC(IIW:IIA,IJB:IJE+1,:) + PSRC(IIW:IIA,IJB-1:IJE,:)) - & ! (ZDMQ(IIW:IIA,IJB:IJE+1,:) - ZDMQ(IIW:IIA,IJB-1:IJE,:))/6.0 #if 1 -!$acc_nv loop independent collapse(3) - DO CONCURRENT ( ji = iiw:iia , jj = ijb:ije + 1 , jk = 1:iku ) + !$mnh_do_concurrent( ji = iiw:iia , jj = ijb:ije + 1 , jk = 1:iku ) ZQL0(ji, jj, jk ) = 0.5 * ( PSRC(ji, jj, jk ) + PSRC(ji, jj-1, jk )) - ( ZDMQ(ji, jj, jk ) - ZDMQ(ji, jj-1, jk ) ) / 6.0 - END DO + !$mnh_end_do() #else - DO CONCURRENT ( ji = iiw : iia, jj = ijb : ije + 1, jk = 1 : iku ) + !$mnh_do_concurrent( ji = iiw : iia, jj = ijb : ije + 1, jk = 1 : iku ) ZQL0(ji, jj, jk ) = 0.5 * ( PSRC(ji, jj, jk ) + PSRC(ji, jj-1, jk )) - ( ZDMQ(ji, jj, jk ) - ZDMQ(ji, jj-1, jk ) ) / 6.0 - END DO + !$mnh_end_do() #endif ! #ifndef MNH_OPENACC @@ -1271,7 +1280,7 @@ CALL MPPDB_CHECK(ZQL0,"PPM_01_Y: ZQL0") #else !$acc end kernels CALL GET_HALO_D(ZQR0,HDIR="01_Y", HNAME='UPDATE_HALO_ll::GET_HALO::ZQR0') -!$acc kernels +!$acc kernels present_cr(ZDQ,ZQR0,ZQL0,ZQ60,ZQL,ZQR,ZQ6) #endif ! ! NORTH BOUND @@ -1310,8 +1319,7 @@ CALL MPPDB_CHECK(ZQL0,"PPM_01_Y: ZQL0") ! ZDQ(IIW:IIA,:,:) = ZQR(IIW:IIA,:,:) - ZQL(IIW:IIA,:,:) #else -!$acc_nv loop independent collapse(3) - DO CONCURRENT( I=1:IIU , J=IJS:IJN , K=IKB:IKE ) + !$mnh_do_concurrent( I=1:IIU , J=IJS:IJN , K=IKB:IKE ) ! ! determine initial coefficients of the parabolae ! @@ -1344,7 +1352,7 @@ CALL MPPDB_CHECK(ZQL0,"PPM_01_Y: ZQL0") ! ZDQ(I,J,K) = ZQR(I,J,K) - ZQL(I,J,K) ! - END DO + !$mnh_end_do() !$acc end kernels !$acc kernels #endif @@ -1388,15 +1396,14 @@ CALL MPPDB_CHECK(ZQL0,"PPM_01_Y: ZQL0") ZFNEG*(0.5-SIGN(0.5,PCR)) ) ) #else CALL MYM_DEVICE(PRHO,ZQL) -!$acc kernels -!$acc_nv loop independent collapse(3) - DO CONCURRENT( I=1:IIU , J=IJS:IJN , K=IKB:IKE ) +!$acc kernels present_cr(ZQR,PCR,ZQL,ZFPOS,ZFNEG) + !$mnh_do_concurrent( I=1:IIU , J=IJS:IJN , K=IKB:IKE ) if ( PCR(I,J,K) > 0. ) then ZQR(I,J,K) = PCR(I,J,K)* ZQL(I,J,K) * ZFPOS(I,J,K) else ZQR(I,J,K) = PCR(I,J,K)* ZQL(I,J,K) * ZFNEG(I,J,K) end if - END DO + !$mnh_end_do() !$acc end kernels CALL DYF_DEVICE(ZQR,PR) #endif @@ -1433,15 +1440,14 @@ CASE('OPEN') ENDIF ! ! monotonize the difference followinq eq. 5 in Lin94 -!$acc kernels -!$acc_nv loop independent collapse(3) - DO CONCURRENT ( ji = iiw:iia , jj = ijb:ije , jk = 1:iku ) +!$acc kernels present_cr(ZDMQ,PSRC) + !$mnh_do_concurrent( ji = iiw:iia , jj = ijb:ije , jk = 1:iku ) ZDMQ(ji, jj, jk ) = SIGN( & MIN( ABS( ZDMQ(ji, jj, jk ) ), & 2.0 * ( PSRC(ji, jj, jk ) - MIN( PSRC(ji, jj-1, jk ), PSRC(ji, jj, jk ), PSRC(ji, jj+1, jk ) ) ), & 2.0 * ( - PSRC(ji, jj, jk ) + MAX( PSRC(ji, jj-1, jk ), PSRC(ji, jj, jk ), PSRC(ji, jj+1, jk ) ) ) ), & ZDMQ(ji, jj, jk ) ) - END DO + !$mnh_end_do() !!$ ZDMQ(:,IJB-1,:) = & !!$ SIGN( (MIN( ABS(ZDMQ(:,IJB-1,:)), 2.0*(PSRC(:,IJB-1,:) - & !!$ MIN(PSRC(:,IJE-1,:),PSRC(:,IJB-1,:),PSRC(:,IJB,:))), & @@ -1460,15 +1466,14 @@ CASE('OPEN') #else !$acc end kernels CALL GET_HALO_D(ZDMQ,HDIR="01_Y", HNAME='UPDATE_HALO_ll::GET_HALO::ZDMQ') -!$acc kernels +!$acc kernels present_cr(ZQL0,PSRC,ZDMQ) #endif ! ! calculate qL and qR with the modified dmq ! -!$acc_nv loop independent collapse(3) - DO CONCURRENT ( ji = iiw:iia , jj = ijb:ije + 1 , jk = 1:iku ) + !$mnh_do_concurrent( ji = iiw:iia , jj = ijb:ije + 1 , jk = 1:iku ) ZQL0(ji, jj, jk ) = 0.5 * ( PSRC(ji, jj, jk ) + PSRC(ji, jj-1, jk )) - ( ZDMQ(ji, jj, jk ) - ZDMQ(ji, jj-1, jk ) ) / 6.0 - END DO + !$mnh_end_do() ! #ifndef MNH_OPENACC CALL GET_HALO(ZQL0, HNAME='ZQL0') @@ -1485,11 +1490,10 @@ CALL GET_HALO_D(ZQL0,HDIR="01_Y", HNAME='UPDATE_HALO_ll::GET_HALO::ZQL0') !$acc end kernels ENDIF ! -!$acc kernels -!$acc_nv loop independent collapse(3) - DO CONCURRENT ( ji = iiw:iia , jj = ijb - 1:ije , jk = 1:iku ) +!$acc kernels present_cr(ZQR0,ZQL0) + !$mnh_do_concurrent( ji = iiw:iia , jj = ijb - 1:ije , jk = 1:iku ) ZQR0(ji, jj, jk ) = ZQL0(ji, jj+1, jk ) - END DO + !$mnh_end_do() !$acc end kernels ! ! NORTH BOUND @@ -1533,9 +1537,8 @@ CALL GET_HALO_D(ZQL0,HDIR="01_Y", HNAME='UPDATE_HALO_ll::GET_HALO::ZQL0') ZDQ(IIW:IIA,:,:) = ZQR(IIW:IIA,:,:) - ZQL(IIW:IIA,:,:) ! #else -!$acc kernels -!$acc_nv loop independent collapse(3) - DO CONCURRENT ( I=1:IIU , J=IJS:IJN , K=IKB:IKE ) +!$acc kernels present_cr(ZDQ,ZQR0,ZQL0,ZQ60,PSRC,ZQL,ZQR,ZQ6) + !$mnh_do_concurrent( I=1:IIU , J=IJS:IJN , K=IKB:IKE ) ! ! determine initial coefficients of the parabolae ! @@ -1568,7 +1571,7 @@ CALL GET_HALO_D(ZQL0,HDIR="01_Y", HNAME='UPDATE_HALO_ll::GET_HALO::ZQL0') ! ZDQ(I,J,K) = ZQR(I,J,K) - ZQL(I,J,K) ! - END DO + !$mnh_end_do() !$acc end kernels #endif ! @@ -1577,12 +1580,11 @@ CALL GET_HALO_D(ZQL0,HDIR="01_Y", HNAME='UPDATE_HALO_ll::GET_HALO::ZQL0') !!$ ZFPOS(:,IJB+1:IJE+1,:) = ZQR(:,IJB:IJE,:) - 0.5*PCR(:,IJB+1:IJE+1,:) * & !!$ (ZDQ(:,IJB:IJE,:) - (1.0 - 2.0*PCR(:,IJB+1:IJE+1,:)/3.0) & !!$ * ZQ6(:,IJB:IJE,:)) -!$acc kernels -!$acc_nv loop independent collapse(3) - DO CONCURRENT ( ji = iiw:iia , jj = ijb:ije + 1 , jk = 1:iku ) +!$acc kernels present_cr(ZFPOS,ZQR,PCR,ZDQ,ZQ6) + !$mnh_do_concurrent( ji = iiw:iia , jj = ijb:ije + 1 , jk = 1:iku ) ZFPOS(ji, jj, jk ) = ZQR(ji, jj-1, jk ) - 0.5 * PCR(ji, jj, jk ) & * ( ZDQ(ji, jj-1, jk ) - ( 1.0 - 2.0 * PCR(ji, jj, jk ) / 3.0 ) * ZQ6(ji, jj-1, jk ) ) - END DO + !$mnh_end_do() ! #ifndef MNH_OPENACC CALL GET_HALO(ZFPOS, HNAME='ZFPOS') @@ -1610,12 +1612,11 @@ CALL GET_HALO_D(ZQL0,HDIR="01_Y", HNAME='UPDATE_HALO_ll::GET_HALO::ZQL0') !!$ ZFNEG(:,IJB-1:IJE,:) = ZQL(:,IJB-1:IJE,:) - 0.5*PCR(:,IJB-1:IJE,:) * & !!$ ( ZDQ(:,IJB-1:IJE,:) + (1.0 + 2.0*PCR(:,IJB-1:IJE,:)/3.0) * & !!$ ZQ6(:,IJB-1:IJE,:) ) -!$acc kernels -!$acc_nv loop independent collapse(3) - DO CONCURRENT ( ji = iiw:iia , jj = 1:iju , jk = 1:iku ) +!$acc kernels present_cr(ZFNEG,ZQL,PCR,ZDQ,ZQ6) + !$mnh_do_concurrent( ji = iiw:iia , jj = 1:iju , jk = 1:iku ) ZFNEG(ji, jj, jk ) = ZQL(ji, jj, jk ) - 0.5 * PCR(ji, jj, jk ) & * ( ZDQ(ji, jj, jk ) + ( 1.0 + 2.0 * PCR(ji, jj, jk ) / 3.0 ) * ZQ6(ji, jj, jk ) ) - END DO + !$mnh_end_do() ! #ifndef MNH_OPENACC CALL GET_HALO(ZFNEG, HNAME='ZFNEG') @@ -1643,15 +1644,14 @@ CALL GET_HALO_D(ZQL0,HDIR="01_Y", HNAME='UPDATE_HALO_ll::GET_HALO::ZQL0') ! #else CALL MYM_DEVICE(PRHO,ZQL) -!$acc kernels -!$acc_nv loop independent collapse(3) - DO CONCURRENT ( I=1:IIU , J=IJS:IJN , K=IKB:IKE ) +!$acc kernels present_cr(ZQR,PCR,ZQL,ZFPOS,ZFNEG) + !$mnh_do_concurrent( I=1:IIU , J=IJS:IJN , K=IKB:IKE ) if ( PCR(I,J,K) > 0. ) then ZQR(I,J,K) = PCR(I,J,K)* ZQL(I,J,K) * ZFPOS(I,J,K) else ZQR(I,J,K) = PCR(I,J,K)* ZQL(I,J,K) * ZFNEG(I,J,K) end if - END DO + !$mnh_end_do() !$acc end kernels CALL DYF_DEVICE(ZQR,PR) #endif @@ -1665,6 +1665,10 @@ CALL GET_HALO_D(ZQL0,HDIR="01_Y", HNAME='UPDATE_HALO_ll::GET_HALO::ZQL0') ! END SELECT ! +#ifdef MNH_BITREP_OMP +CALL SBR_FZ(PR(:,:,:)) +#endif +! IF (MPPDB_INITIALIZED) THEN !Check all INOUT arrays CALL MPPDB_CHECK(PSRC,"PPM_01_Y end:PSRC") @@ -1784,9 +1788,13 @@ USE MODE_MNH_ZWORK, ONLY: MNH_MEM_GET, MNH_MEM_POSITION_PIN, MNH_MEM_RELEASE #endif USE MODE_MPPDB -#ifdef MNH_BITREP +#if defined(MNH_BITREP) || defined(MNH_BITREP_OMP) USE MODI_BITREP #endif +#ifdef MNH_BITREP_OMP +USE MODI_BITREPZ +#endif + USE MODI_GET_HALO #ifndef MNH_OPENACC USE MODI_SHUMAN @@ -1851,7 +1859,11 @@ INTEGER :: I,J,K integer :: ji, jj, jk ! !------------------------------------------------------------------------------- - +! +#ifdef MNH_BITREP_OMP +CALL SBR_FZ(PSRC(:,:,:)) +#endif +! IF (MPPDB_INITIALIZED) THEN !Check all IN arrays CALL MPPDB_CHECK(PCR, "PPM_01_Z beg:PCR") @@ -1890,9 +1902,8 @@ CALL MNH_MEM_GET( ZFNEG, IIU, IJU, IKU ) IKB = 1 + JPVEXT IKE = SIZE(PSRC,3) - JPVEXT -!$acc kernels -!$acc_nv loop independent collapse(3) -DO CONCURRENT ( ji = 1:iiu , jj = 1:iju , jk = 1:iku ) +!$acc kernels present_cr(ZFPOS,ZFNEG,PSRC,PR,ZQL,ZQR,ZDQ,ZQ6,ZDMQ,ZQL0,ZQR0,ZQ60) +!$mnh_do_concurrent( ji = 1:iiu , jj = 1:iju , jk = 1:iku ) PR (ji, jj, jk ) = PSRC(ji, jj, jk ) ZQL (ji, jj, jk ) = PSRC(ji, jj, jk ) ZQR (ji, jj, jk ) = PSRC(ji, jj, jk ) @@ -1902,7 +1913,7 @@ DO CONCURRENT ( ji = 1:iiu , jj = 1:iju , jk = 1:iku ) ZQL0 (ji, jj, jk ) = PSRC(ji, jj, jk ) ZQR0 (ji, jj, jk ) = PSRC(ji, jj, jk ) ZQ60 (ji, jj, jk ) = PSRC(ji, jj, jk ) -END DO +!$mnh_end_do() #ifndef MNH_OPENACC ZFPOS=PSRC ZFNEG=PSRC @@ -1926,50 +1937,47 @@ ZFNEG(:,:,:) = PSRC(:,:,:) ! ! calculate dmq call DIF2Z( PSRC, ZDMQ ) -!$acc kernels +!$acc kernels present_cr(ZDMQ,PSRC) ! ! monotonize the difference followinq eq. 5 in Lin94 ! use the periodic BC here, it doesn't matter for vertical (hopefully) ! -!$acc_nv loop independent collapse(3) - DO CONCURRENT ( ji = 1:iiu , jj = 1:iju , jk = ikb:ike ) + !$mnh_do_concurrent( ji = 1:iiu , jj = 1:iju , jk = ikb:ike ) ZDMQ(ji, jj, jk ) = SIGN( & MIN( ABS( ZDMQ(ji, jj, jk ) ), & 2.0 * ( PSRC(ji, jj, jk ) - MIN( PSRC(ji, jj, jk-1 ), PSRC(ji, jj, jk ), PSRC(ji, jj, jk+1 ) ) ) , & 2.0 * ( - PSRC(ji, jj, jk ) + MAX( PSRC(ji, jj, jk-1 ), PSRC(ji, jj, jk ), PSRC(ji, jj, jk+1 ) ) ) ), & ZDMQ(ji, jj, jk ) ) - END DO + !$mnh_end_do() !$acc end kernels -!$acc kernels +!$acc kernels present_cr(ZDMQ,PSRC) ZDMQ(:,:,IKB-1) = & SIGN( (MIN( ABS(ZDMQ(:,:,IKB-1)), 2.0*(PSRC(:,:,IKB-1) - & MIN(PSRC(:,:,IKE-1),PSRC(:,:,IKB-1),PSRC(:,:,IKB))), & 2.0*(MAX(PSRC(:,:,IKE-1),PSRC(:,:,IKB-1),PSRC(:,:,IKB)) - & PSRC(:,:,IKB-1)) )), ZDMQ(:,:,IKB-1) ) !$acc end kernels -!$acc kernels +!$acc kernels present_cr(ZDMQ,PSRC) ZDMQ(:,:,IKE+1) = & SIGN( (MIN( ABS(ZDMQ(:,:,IKE+1)), 2.0*(PSRC(:,:,IKE+1) - & MIN(PSRC(:,:,IKE),PSRC(:,:,IKE+1),PSRC(:,:,IKB+1))), & 2.0*(MAX(PSRC(:,:,IKE),PSRC(:,:,IKE+1),PSRC(:,:,IKB+1)) - & PSRC(:,:,IKE+1)) )), ZDMQ(:,:,IKE+1) ) !$acc end kernels -!$acc kernels +!$acc kernels present_cr(ZQL0,PSRC,ZDMQ) ! ! calculate qL and qR with the modified dmq ! -!$acc_nv loop independent collapse(3) -DO CONCURRENT ( ji = 1:iiu , jj = 1:iju , jk = ikb:ike + 1 ) +!$mnh_do_concurrent( ji = 1:iiu , jj = 1:iju , jk = ikb:ike + 1 ) ZQL0(ji, jj, jk ) = 0.5 * ( PSRC(ji, jj, jk ) + PSRC(ji, jj, jk-1 ) ) - ( ZDMQ(ji, jj, jk ) - ZDMQ(ji, jj, jk-1 ) ) / 6.0 -END DO +!$mnh_end_do() ZQL0(:,:,IKB-1) = ZQL0(:,:,IKE) ! !$acc end kernels -!$acc kernels -!$acc_nv loop independent collapse(3) -DO CONCURRENT ( ji = 1:iiu , jj = 1:iju , jk = ikb-1:ike ) +!$acc kernels present_cr(ZQR0,ZQL0,ZDQ,ZQ60,PSRC,ZQL,ZQR,ZQ6) +!$mnh_do_concurrent( ji = 1:iiu , jj = 1:iju , jk = ikb-1:ike ) ZQR0(ji, jj, jk ) = ZQL0(ji, jj, jk+1 ) -END DO +!$mnh_end_do() ZQR0(:,:,IKE+1) = ZQR0(:,:,IKB) #ifndef MNH_OPENACC ! @@ -1999,7 +2007,7 @@ WHERE ( ZDMQ == 0.0 ) ZQL = PSRC ZQR = PSRC ZQ6 = 0.0 -#ifndef MNH_BITREP +#if !defined(MNH_BITREP) && !defined(MNH_BITREP_OMP) ELSEWHERE ( ZQ60*ZDQ < -(ZDQ)**2 ) #else ELSEWHERE ( ZQ60*ZDQ < -BR_P2(ZDQ) ) @@ -2007,7 +2015,7 @@ ELSEWHERE ( ZQ60*ZDQ < -BR_P2(ZDQ) ) ZQ6 = 3.0*(ZQL0 - PSRC) ZQR = ZQL0 - ZQ6 ZQL = ZQL0 -#ifndef MNH_BITREP +#if !defined(MNH_BITREP) && !defined(MNH_BITREP_OMP) ELSEWHERE ( ZQ60*ZDQ > (ZDQ)**2 ) #else ELSEWHERE ( ZQ60*ZDQ > BR_P2(ZDQ) ) @@ -2021,8 +2029,7 @@ END WHERE ! ZDQ = ZQR - ZQL #else -!$acc_nv loop independent collapse(3) -DO CONCURRENT ( I=1:IIU , J=1:IJU , K=1:IKU ) +!$mnh_do_concurrent( I=1:IIU , J=1:IJU , K=1:IKU ) ! ! determine initial coefficients of the parabolae ! @@ -2055,18 +2062,17 @@ DO CONCURRENT ( I=1:IIU , J=1:IJU , K=1:IKU ) ! ZDQ(I,J,K) = ZQR(I,J,K) - ZQL(I,J,K) ! -END DO +!$mnh_end_do() #endif !$acc end kernels ! ! and finally calculate fluxes for the advection ! -!$acc kernels -!$acc_nv loop independent collapse(3) -DO CONCURRENT ( ji = 1:iiu , jj = 1:iju , jk = ikb + 1: ike + 1 ) +!$acc kernels present_cr(ZFPOS,ZQR,PCR,ZDQ,ZQ6) +!$mnh_do_concurrent( ji = 1:iiu , jj = 1:iju , jk = ikb + 1: ike + 1 ) ZFPOS(ji, jj, jk ) = ZQR(ji, jj, jk-1 ) - 0.5 * PCR(ji, jj, jk ) & * ( ZDQ(ji, jj, jk-1 ) - ( 1.0 - 2.0 * PCR(ji, jj, jk ) / 3.0) * ZQ6(ji, jj, jk-1 ) ) -END DO +!$mnh_end_do() !$acc end kernels ! !$acc kernels present_cr(ZFPOS,ZQR) @@ -2079,12 +2085,11 @@ ZFPOS(:,:,IKB) = (PSRC(:,:,IKB-1) - ZQR(:,:,IKB-1))*PCR(:,:,IKB) + & ZFPOS(:,:,IKB-1) = 0.0 ! !$acc end kernels -!$acc kernels -!$acc_nv loop independent collapse(3) -DO CONCURRENT ( ji = 1:iiu , jj = 1:iju , jk = ikb - 1: ike ) +!$acc kernels present_cr(ZFNEG,ZQL,PCR,ZDQ,ZQ6) +!$mnh_do_concurrent( ji = 1:iiu , jj = 1:iju , jk = ikb - 1: ike ) ZFNEG(ji, jj, jk ) = ZQL(ji, jj, jk ) - 0.5 * PCR(ji, jj, jk ) & * ( ZDQ(ji, jj, jk ) + ( 1.0 + 2.0 * PCR(ji, jj, jk ) / 3.0) * ZQ6(ji, jj, jk ) ) -END DO +!$mnh_end_do() ! !$acc end kernels !$acc kernels present_cr(ZFNEG,ZQR) @@ -2100,15 +2105,14 @@ PR = DZF( PCR*MZM(PRHO)*( ZFPOS*(0.5+SIGN(0.5,PCR)) + & #else !$acc end kernels CALL MZM_DEVICE(PRHO,ZQL) -!$acc kernels -!$acc_nv loop independent collapse(3) - DO CONCURRENT ( ji = 1:iiu , jj = 1:iju , jk = 1:iku ) +!$acc kernels present_cr(ZQR,PCR,ZQL,ZFPOS,ZFNEG) + !$mnh_do_concurrent( ji = 1:iiu , jj = 1:iju , jk = 1:iku ) if ( PCR(ji, jj, jk ) > 0. ) then ZQR(ji, jj, jk ) = PCR(ji, jj, jk ) * ZQL(ji, jj, jk ) * ZFPOS(ji, jj, jk ) else ZQR(ji, jj, jk ) = PCR(ji, jj, jk ) * ZQL(ji, jj, jk ) * ZFNEG(ji, jj, jk ) end if - END DO + !$mnh_end_do() !dzf(PR,ZQR) !$acc end kernels CALL DZF_DEVICE( ZQR, PR ) @@ -2120,6 +2124,10 @@ PR = DZF( PCR*MZM(PRHO)*( ZFPOS*(0.5+SIGN(0.5,PCR)) + & !Unnecessary CALL GET_HALO_D(PR) #endif ! +#ifdef MNH_BITREP_OMP +CALL SBR_FZ(PR(:,:,:)) +#endif +! IF (MPPDB_INITIALIZED) THEN !Check all INOUT arrays CALL MPPDB_CHECK(PSRC,"PPM_01_Z end:PSRC") diff --git a/src/MNH/prandtl.f90 b/src/MNH/prandtl.f90 index a0bf5e1cc..9c1fe3627 100644 --- a/src/MNH/prandtl.f90 +++ b/src/MNH/prandtl.f90 @@ -206,9 +206,13 @@ use mode_mppdb use mode_msg #endif -#ifdef MNH_BITREP +#if defined(MNH_BITREP) || defined(MNH_BITREP_OMP) USE MODI_BITREP #endif +#ifdef MNH_BITREP_OMP +!$mnh_undef(LOOP) +!$mnh_undef(OPENACC) +#endif USE MODI_EMOIST USE MODI_ETHETA USE MODI_GRADIENT_M @@ -434,11 +438,11 @@ END IF ! ZMINVAL = (1.-1./XPHI_LIM) ! -!$acc kernels -DO CONCURRENT ( JI=1:JIU,JJ=1:JJU,JK=1:JKU) - ZW1(JI,JJ,JK) = 1. - ZW2(JI,JJ,JK) = 1. -END DO +!$acc kernels present_cr(zw1,zw2) +!$mnh_expand_array(JI=1:JIU,JJ=1:JJU,JK=1:JKU) + ZW1(:,:,:) = 1. + ZW2(:,:,:) = 1. +!$mnh_end_expand_array() WHERE (PREDTH1(:,:,:)+PREDR1(:,:,:) < -ZMINVAL) ZW1(:,:,:) = (-ZMINVAL) / (PREDTH1(:,:,:)+PREDR1(:,:,:)) @@ -447,25 +451,16 @@ END WHERE WHERE (PREDTH1(:,:,:) < -ZMINVAL) ZW2(:,:,:) = (-ZMINVAL) / (PREDTH1(:,:,:)) END WHERE -#ifdef MNH_COMPILER_NVHPC -!$acc loop independent collapse(3) -#endif -DO CONCURRENT ( JI=1:JIU,JJ=1:JJU,JK=1:JKU) - ZW2(JI,JJ,JK) = MIN( ZW1(JI,JJ,JK),ZW2(JI,JJ,JK) ) -END DO - +!$mnh_expand_array(JI=1:JIU,JJ=1:JJU,JK=1:JKU) + ZW2(:,:,:) = MIN( ZW1(:,:,:),ZW2(:,:,:) ) +!$mnh_end_expand_array() ZW1(:,:,:) = 1. WHERE (PREDR1(:,:,:)<-ZMINVAL) ZW1(:,:,:) = (-ZMINVAL) / (PREDR1(:,:,:)) END WHERE - -!!$ZW1(:,:,:) = MIN(ZW2(:,:,:),ZW1(:,:,:)) -#ifdef MNH_COMPILER_NVHPC -!$acc loop independent collapse(3) -#endif -DO CONCURRENT ( JI=1:JIU,JJ=1:JJU,JK=1:JKU) - ZW1(JI,JJ,JK) = MIN( ZW2(JI,JJ,JK),ZW1(JI,JJ,JK) ) -END DO +!$mnh_expand_array(JI=1:JIU,JJ=1:JJU,JK=1:JKU) + ZW1(:,:,:) = MIN(ZW2(:,:,:),ZW1(:,:,:)) +!$mnh_end_expand_array() ! !$acc end kernels ! @@ -474,11 +469,11 @@ END DO ! !$acc kernels ! -DO CONCURRENT ( JI=1:JIU,JJ=1:JJU,JK=1:JKU) - PBLL_O_E(JI,JJ,JK) = PBLL_O_E(JI,JJ,JK) * ZW1(JI,JJ,JK) - PREDTH1 (JI,JJ,JK) = PREDTH1 (JI,JJ,JK) * ZW1(JI,JJ,JK) - PREDR1 (JI,JJ,JK) = PREDR1 (JI,JJ,JK) * ZW1(JI,JJ,JK) -END DO !CONCURRENT +!$mnh_expand_array(JI=1:JIU,JJ=1:JJU,JK=1:JKU) + PBLL_O_E(:,:,:) = PBLL_O_E(:,:,:) * ZW1(:,:,:) + PREDTH1 (:,:,:) = PREDTH1 (:,:,:) * ZW1(:,:,:) + PREDR1 (:,:,:) = PREDR1 (:,:,:) * ZW1(:,:,:) +!$mnh_end_expand_array() ! ! 4. Threshold for very small (in absolute value) Redelperger numbers ! ---------------------------------------------------------------- @@ -529,18 +524,22 @@ END DO IF(HTURBDIM=='1DIM') THEN ! 1D case ! !$acc kernels ! async -#ifndef MNH_BITREP +#if !defined(MNH_BITREP) && !defined(MNH_BITREP_OMP) PRED2TH3(:,:,:) = PREDTH1(:,:,:)**2 #else +!$mnh_expand_array(JI=1:JIU,JJ=1:JJU,JK=1:JKU) PRED2TH3(:,:,:) = BR_P2(PREDTH1(:,:,:)) +!$mnh_end_expand_array() #endif !$acc end kernels ! !$acc kernels ! async -#ifndef MNH_BITREP +#if !defined(MNH_BITREP) && !defined(MNH_BITREP_OMP) PRED2R3(:,:,:) = PREDR1(:,:,:) **2 #else +!$mnh_expand_array(JI=1:JIU,JJ=1:JJU,JK=1:JKU) PRED2R3(:,:,:) = BR_P2(PREDR1(:,:,:)) +!$mnh_end_expand_array() #endif !$acc end kernels ! @@ -553,7 +552,7 @@ ELSE IF (L2D) THEN ! 3D case in a 2D model ! IF (KRR /= 0) THEN ! moist 3D case #ifndef MNH_OPENACC -#ifndef MNH_BITREP +#if !defined(MNH_BITREP) && !defined(MNH_BITREP_OMP) PRED2TH3(:,:,:)= PREDTH1(:,:,:)**2+(XCTV*PBLL_O_E(:,:,:)*PETHETA(:,:,:) )**2 * & MZM( GX_M_M(PTHLM,PDXX,PDZZ,PDZX)**2 ) #else @@ -564,39 +563,33 @@ ELSE IF (L2D) THEN ! 3D case in a 2D model #else CALL GX_M_M_DEVICE(PTHLM,PDXX,PDZZ,PDZX,ZTMP1_DEVICE) !$acc kernels -#ifndef MNH_BITREP -#ifdef MNH_COMPILER_NVHPC -!$acc loop independent collapse(3) -#endif - DO CONCURRENT ( JI=1:JIU,JJ=1:JJU,JK=1:JKU) - ZTMP1_DEVICE(JI,JJ,JK) = ZTMP1_DEVICE(JI,JJ,JK)**2 - END DO !CONCURRENT +!$mnh_expand_array(JI=1:JIU,JJ=1:JJU,JK=1:JKU) +#if !defined(MNH_BITREP) && !defined(MNH_BITREP_OMP) + ZTMP1_DEVICE(:,:,:) = ZTMP1_DEVICE(:,:,:)**2 #else -#ifdef MNH_COMPILER_NVHPC -!$acc loop independent collapse(3) -#endif - DO CONCURRENT ( JI=1:JIU,JJ=1:JJU,JK=1:JKU) - ZTMP1_DEVICE(JI,JJ,JK) = BR_P2(ZTMP1_DEVICE(JI,JJ,JK)) - END DO !CONCURRENT + ZTMP1_DEVICE(:,:,:) = BR_P2(ZTMP1_DEVICE(:,:,:)) #endif +!$mnh_end_expand_array() !$acc end kernels CALL MZM_DEVICE(ZTMP1_DEVICE,ZTMP2_DEVICE ) !$acc kernels ! async -#ifndef MNH_BITREP +#if !defined(MNH_BITREP) && !defined(MNH_BITREP_OMP) PRED2TH3(:,:,:) = PREDTH1(:,:,:)**2 & + (XCTV*PBLL_O_E(:,:,:)*PETHETA(:,:,:) )**2 & * ZTMP2_DEVICE(:,:,:) #else +!$mnh_expand_array(JI=1:JIU,JJ=1:JJU,JK=1:JKU) PRED2TH3(:,:,:) = BR_P2(PREDTH1(:,:,:)) & + BR_P2(XCTV*PBLL_O_E(:,:,:)*PETHETA(:,:,:) ) & * ZTMP2_DEVICE(:,:,:) +!$mnh_end_expand_array() #endif PRED2TH3(:,:,IKB)=PRED2TH3(:,:,IKB+KKL) !$acc end kernels #endif ! #ifndef MNH_OPENACC -#ifndef MNH_BITREP +#if !defined(MNH_BITREP) && !defined(MNH_BITREP_OMP) PRED2R3(:,:,:) = PREDR1(:,:,:)**2 & + (XCTV*PBLL_O_E(:,:,:)*PEMOIST(:,:,:))**2 & * MZM( GX_M_M(PRM(:,:,:,1),PDXX,PDZZ,PDZX)**2 ) @@ -609,43 +602,37 @@ ELSE IF (L2D) THEN ! 3D case in a 2D model #else CALL GX_M_M_DEVICE(PRM(:,:,:,1),PDXX,PDZZ,PDZX,ZTMP1_DEVICE) !$acc kernels -#ifndef MNH_BITREP -#ifdef MNH_COMPILER_NVHPC - !$acc loop independent collapse(3) -#endif - DO CONCURRENT ( JI=1:JIU,JJ=1:JJU,JK=1:JKU) - ZTMP1_DEVICE(JI,JJ,JK) = ZTMP1_DEVICE(JI,JJ,JK)**2 - END DO !CONCURRENT +!$mnh_expand_array(JI=1:JIU,JJ=1:JJU,JK=1:JKU) +#if !defined(MNH_BITREP) && !defined(MNH_BITREP_OMP) + ZTMP1_DEVICE(:,:,:) = ZTMP1_DEVICE(:,:,:)**2 #else -#ifdef MNH_COMPILER_NVHPC - !$acc loop independent collapse(3) -#endif - DO CONCURRENT ( JI=1:JIU,JJ=1:JJU,JK=1:JKU) - ZTMP1_DEVICE(JI,JJ,JK) = BR_P2(ZTMP1_DEVICE(JI,JJ,JK)) - END DO !CONCURRENT + ZTMP1_DEVICE(:,:,:) = BR_P2(ZTMP1_DEVICE(:,:,:)) #endif +!$mnh_end_expand_array() !$acc end kernels CALL MZM_DEVICE(ZTMP1_DEVICE,ZTMP2_DEVICE ) !$acc kernels ! async -#ifndef MNH_BITREP +#if !defined(MNH_BITREP) && !defined(MNH_BITREP_OMP) PRED2R3(:,:,:) = PREDR1(:,:,:)**2 & + (XCTV*PBLL_O_E(:,:,:)*PEMOIST(:,:,:))**2 & * ZTMP2_DEVICE(:,:,:) #else +!$mnh_expand_array(JI=1:JIU,JJ=1:JJU,JK=1:JKU) PRED2R3(:,:,:) = BR_P2(PREDR1(:,:,:)) & + BR_P2(XCTV*PBLL_O_E(:,:,:)*PEMOIST(:,:,:)) & * ZTMP2_DEVICE(:,:,:) +!$mnh_end_expand_array() #endif PRED2R3(:,:,IKB)=PRED2R3(:,:,IKB+KKL) !$acc end kernels #endif ! #ifndef MNH_OPENACC -#ifndef MNH_BITREP +#if !defined(MNH_BITREP) && !defined(MNH_BITREP_OMP) PRED2THR3(:,:,:) = PREDR1(:,:,:) * PREDTH1(:,:,:) & + XCTV**2*PBLL_O_E(:,:,:)**2 * & #else - PRED2THR3(:,:,:) = PREDR1(:,:,:) * PREDTH1(:,:,:) & + PRED2THR3(:,:,:) = PREDR1(:,:,:) * PREDTH1(:,:,:) & + BR_P2(XCTV)*BR_P2(PBLL_O_E(:,:,:)) * & #endif PEMOIST(:,:,:) * PETHETA(:,:,:) * & @@ -655,29 +642,24 @@ ELSE IF (L2D) THEN ! 3D case in a 2D model #else CALL GX_M_M_DEVICE(PRM(:,:,:,1),PDXX,PDZZ,PDZX,ZTMP1_DEVICE) CALL GX_M_M_DEVICE(PTHLM ,PDXX,PDZZ,PDZX,ZTMP2_DEVICE) -!$acc kernels -#ifdef MNH_COMPILER_NVHPC -!$acc loop independent collapse(3) -#endif - DO CONCURRENT ( JI=1:JIU,JJ=1:JJU,JK=1:JKU) - ZTMP1_DEVICE(JI,JJ,JK) = ZTMP1_DEVICE(JI,JJ,JK) * ZTMP2_DEVICE(JI,JJ,JK) - END DO +!$acc kernels present_cr(ZTMP1_DEVICE,ZTMP2_DEVICE) +!$mnh_expand_array(JI=1:JIU,JJ=1:JJU,JK=1:JKU) + ZTMP1_DEVICE(:,:,:) = ZTMP1_DEVICE(:,:,:) * ZTMP2_DEVICE(:,:,:) +!$mnh_end_expand_array() !$acc end kernels CALL MZM_DEVICE(ZTMP1_DEVICE,ZTMP2_DEVICE) !$acc kernels ! async -#ifndef MNH_BITREP +#if !defined(MNH_BITREP) && !defined(MNH_BITREP_OMP) PRED2THR3(:,:,:)= PREDR1(:,:,:) * PREDTH1(:,:,:) & + XCTV**2*PBLL_O_E(:,:,:)**2 & * PEMOIST(:,:,:) * PETHETA(:,:,:) & * ZTMP2_DEVICE(:,:,:) #else -#ifdef MNH_COMPILER_NVHPC -!$acc loop independent collapse(3) -#endif - DO CONCURRENT ( JI=1:JIU,JJ=1:JJU,JK=1:JKU) - PRED2THR3(JI,JJ,JK)= PREDR1(JI,JJ,JK) * PREDTH1(JI,JJ,JK) + BR_P2(XCTV)*BR_P2(PBLL_O_E(JI,JJ,JK)) * & - PEMOIST(JI,JJ,JK) * PETHETA(JI,JJ,JK) * ZTMP2_DEVICE(JI,JJ,JK) - END DO +!$mnh_expand_array(JI=1:JIU,JJ=1:JJU,JK=1:JKU) + PRED2THR3(:,:,:)= PREDR1(:,:,:) * PREDTH1(:,:,:) & + + BR_P2(XCTV)*BR_P2(PBLL_O_E(:,:,:)) & + * PEMOIST(:,:,:) * PETHETA(:,:,:) * ZTMP2_DEVICE(:,:,:) +!$mnh_end_expand_array() #endif PRED2THR3(:,:,IKB)=PRED2THR3(:,:,IKB+KKL) !$acc end kernels @@ -689,7 +671,7 @@ ELSE IF (L2D) THEN ! 3D case in a 2D model call Print_msg( NVERB_WARNING, 'GEN', 'PRANDTL', 'OpenACC: L2D=.T. and KRR=0 not yet tested' ) #endif #ifndef MNH_OPENACC -#ifndef MNH_BITREP +#if !defined(MNH_BITREP) && !defined(MNH_BITREP_OMP) PRED2TH3(:,:,:) = PREDTH1(:,:,:)**2 + XCTV**2*PBLL_O_E(:,:,:)**2 * & MZM( GX_M_M(PTHLM,PDXX,PDZZ,PDZX)**2 ) #else @@ -700,24 +682,24 @@ call Print_msg( NVERB_WARNING, 'GEN', 'PRANDTL', 'OpenACC: L2D=.T. and KRR=0 not #else CALL GX_M_M_DEVICE(PTHLM,PDXX,PDZZ,PDZX,ZTMP1_DEVICE) !$acc kernels -#ifndef MNH_BITREP - DO CONCURRENT ( JI=1:JIU,JJ=1:JJU,JK=1:JKU) - ZTMP1_DEVICE(JI,JJ,JK) = ZTMP1_DEVICE(JI,JJ,JK)**2 - END DO !CONCURRENT +!$mnh_expand_array(JI=1:JIU,JJ=1:JJU,JK=1:JKU) +#if !defined(MNH_BITREP) && !defined(MNH_BITREP_OMP) + ZTMP1_DEVICE(:,:,:) = ZTMP1_DEVICE(:,:,:)**2 #else - DO CONCURRENT ( JI=1:JIU,JJ=1:JJU,JK=1:JKU) - ZTMP1_DEVICE(JI,JJ,JK) = BR_P2(ZTMP1_DEVICE(JI,JJ,JK)) - END DO !CONCURRENT + ZTMP1_DEVICE(:,:,:) = BR_P2(ZTMP1_DEVICE(:,:,:)) #endif +!$mnh_end_expand_array() !$acc end kernels CALL MZM_DEVICE(ZTMP1_DEVICE,ZTMP2_DEVICE) !$acc kernels -#ifndef MNH_BITREP +#if !defined(MNH_BITREP) && !defined(MNH_BITREP_OMP) PRED2TH3(:,:,:) = PREDTH1(:,:,:)**2 + XCTV**2*PBLL_O_E(:,:,:)**2 * & ZTMP2_DEVICE(:,:,:) #else +!$mnh_expand_array(JI=1:JIU,JJ=1:JJU,JK=1:JKU) PRED2TH3(:,:,:) = BR_P2(PREDTH1(:,:,:)) + BR_P2(XCTV)*BR_P2(PBLL_O_E(:,:,:)) * & ZTMP2_DEVICE(:,:,:) +!$mnh_end_expand_array() #endif !PW: merge kernels + remove async to prevent compiler crash...(bug PGI 19.10) ! !$acc end kernels @@ -741,7 +723,7 @@ ELSE ! 3D case in a 3D model ! IF (KRR /= 0) THEN ! moist 3D case #ifndef MNH_OPENACC -#ifndef MNH_BITREP +#if !defined(MNH_BITREP) && !defined(MNH_BITREP_OMP) PRED2TH3(:,:,:) = PREDTH1(:,:,:)**2 & + ( XCTV*PBLL_O_E(:,:,:)*PETHETA(:,:,:) )**2 & * MZM( GX_M_M(PTHLM,PDXX,PDZZ,PDZX)**2 & @@ -757,25 +739,18 @@ ELSE ! 3D case in a 3D model CALL GX_M_M_DEVICE(PTHLM,PDXX,PDZZ,PDZX,ZTMP1_DEVICE) CALL GY_M_M_DEVICE(PTHLM,PDYY,PDZZ,PDZY,ZTMP2_DEVICE) !$acc kernels -#ifndef MNH_BITREP -#ifdef MNH_COMPILER_NVHPC -!$acc loop independent collapse(3) -#endif - DO CONCURRENT ( JI=1:JIU,JJ=1:JJU,JK=1:JKU) - ZTMP1_DEVICE(JI,JJ,JK) = ZTMP1_DEVICE(JI,JJ,JK)**2 + ZTMP2_DEVICE(JI,JJ,JK)**2 - END DO +!$mnh_expand_array(JI=1:JIU,JJ=1:JJU,JK=1:JKU) +#if !defined(MNH_BITREP) && !defined(MNH_BITREP_OMP) + ZTMP1_DEVICE(:,:,:) = ZTMP1_DEVICE(:,:,:)**2 + ZTMP2_DEVICE(:,:,:)**2 #else -#ifdef MNH_COMPILER_NVHPC -!$acc loop independent collapse(3) -#endif - DO CONCURRENT ( JI=1:JIU,JJ=1:JJU,JK=1:JKU) - ZTMP1_DEVICE(JI,JJ,JK) = BR_P2(ZTMP1_DEVICE(JI,JJ,JK)) + BR_P2(ZTMP2_DEVICE(JI,JJ,JK)) - END DO + ZTMP1_DEVICE(:,:,:) = BR_P2(ZTMP1_DEVICE(:,:,:)) + BR_P2(ZTMP2_DEVICE(:,:,:)) #endif +!$mnh_end_expand_array() !$acc end kernels CALL MZM_DEVICE(ZTMP1_DEVICE,ZTMP2_DEVICE) !$acc kernels -#ifndef MNH_BITREP +!$mnh_expand_array(JI=1:JIU,JJ=1:JJU,JK=1:JKU) +#if !defined(MNH_BITREP) && !defined(MNH_BITREP_OMP) PRED2TH3(:,:,:) = PREDTH1(:,:,:)**2 & + ( XCTV*PBLL_O_E(:,:,:)*PETHETA(:,:,:) )**2 & * ZTMP2_DEVICE(:,:,:) @@ -785,11 +760,12 @@ ELSE ! 3D case in a 3D model * ZTMP2_DEVICE(:,:,:) #endif PRED2TH3(:,:,IKB)=PRED2TH3(:,:,IKB+KKL) +!$mnh_end_expand_array() !$acc end kernels #endif ! #ifndef MNH_OPENACC -#ifndef MNH_BITREP +#if !defined(MNH_BITREP) && !defined(MNH_BITREP_OMP) PRED2R3(:,:,:) = PREDR1(:,:,:)**2 & + (XCTV*PBLL_O_E(:,:,:)*PEMOIST(:,:,:))**2 & * MZM( GX_M_M(PRM(:,:,:,1),PDXX,PDZZ,PDZX)**2 & @@ -805,25 +781,18 @@ ELSE ! 3D case in a 3D model CALL GX_M_M_DEVICE(PRM(:,:,:,1),PDXX,PDZZ,PDZX,ZTMP1_DEVICE) CALL GY_M_M_DEVICE(PRM(:,:,:,1),PDYY,PDZZ,PDZY,ZTMP2_DEVICE) !$acc kernels -#ifndef MNH_BITREP -#ifdef MNH_COMPILER_NVHPC -!$acc loop independent collapse(3) -#endif - DO CONCURRENT ( JI=1:JIU,JJ=1:JJU,JK=1:JKU) - ZTMP1_DEVICE(JI,JJ,JK) = ZTMP1_DEVICE(JI,JJ,JK)**2 + ZTMP2_DEVICE(JI,JJ,JK)**2 - END DO +!$mnh_expand_array(JI=1:JIU,JJ=1:JJU,JK=1:JKU) +#if !defined(MNH_BITREP) && !defined(MNH_BITREP_OMP) + ZTMP1_DEVICE(:,:,:) = ZTMP1_DEVICE(:,:,:)**2 + ZTMP2_DEVICE(:,:,:)**2 #else -#ifdef MNH_COMPILER_NVHPC -!$acc loop independent collapse(3) -#endif - DO CONCURRENT ( JI=1:JIU,JJ=1:JJU,JK=1:JKU) - ZTMP1_DEVICE(JI,JJ,JK) = BR_P2(ZTMP1_DEVICE(JI,JJ,JK)) + BR_P2(ZTMP2_DEVICE(JI,JJ,JK)) - END DO + ZTMP1_DEVICE(:,:,:) = BR_P2(ZTMP1_DEVICE(:,:,:)) + BR_P2(ZTMP2_DEVICE(:,:,:)) #endif +!$mnh_end_expand_array() !$acc end kernels CALL MZM_DEVICE(ZTMP1_DEVICE,ZTMP2_DEVICE) !$acc kernels -#ifndef MNH_BITREP +!$mnh_expand_array(JI=1:JIU,JJ=1:JJU,JK=1:JKU) +#if !defined(MNH_BITREP) && !defined(MNH_BITREP_OMP) PRED2R3(:,:,:) = PREDR1(:,:,:)**2 & + (XCTV*PBLL_O_E(:,:,:)*PEMOIST(:,:,:))**2 & * ZTMP2_DEVICE(:,:,:) @@ -832,6 +801,7 @@ ELSE ! 3D case in a 3D model + BR_P2(XCTV*PBLL_O_E(:,:,:)*PEMOIST(:,:,:)) & * ZTMP2_DEVICE(:,:,:) #endif +!$mnh_end_expand_array() !$acc end kernels !$acc kernels ! async PRED2R3(:,:,IKB)=PRED2R3(:,:,IKB+KKL) @@ -839,11 +809,11 @@ ELSE ! 3D case in a 3D model #endif ! #ifndef MNH_OPENACC -#ifndef MNH_BITREP +#if !defined(MNH_BITREP) && !defined(MNH_BITREP_OMP) PRED2THR3(:,:,:) = PREDR1(:,:,:) * PREDTH1(:,:,:) & + XCTV**2*PBLL_O_E(:,:,:)**2 * & #else - PRED2THR3(:,:,:) = PREDR1(:,:,:) * PREDTH1(:,:,:) & + PRED2THR3(:,:,:) = PREDR1(:,:,:) * PREDTH1(:,:,:) & + BR_P2(XCTV)*BR_P2(PBLL_O_E(:,:,:)) * & #endif PEMOIST(:,:,:) * PETHETA(:,:,:) * & @@ -857,18 +827,16 @@ ELSE ! 3D case in a 3D model CALL GX_M_M_DEVICE(PTHLM ,PDXX,PDZZ,PDZX,ZTMP2_DEVICE) CALL GY_M_M_DEVICE(PRM(:,:,:,1),PDYY,PDZZ,PDZY,ZTMP3_DEVICE) CALL GY_M_M_DEVICE(PTHLM ,PDYY,PDZZ,PDZY,ZTMP4_DEVICE) -!$acc kernels -#ifdef MNH_COMPILER_NVHPC -!$acc loop independent collapse(3) -#endif - DO CONCURRENT ( JI=1:JIU,JJ=1:JJU,JK=1:JKU) - ZTMP1_DEVICE(JI,JJ,JK) = ZTMP1_DEVICE(JI,JJ,JK)*ZTMP2_DEVICE(JI,JJ,JK)+ & - ZTMP3_DEVICE(JI,JJ,JK)*ZTMP4_DEVICE(JI,JJ,JK) - END DO +!$acc kernels present_cr(ZTMP1_DEVICE,ZTMP2_DEVICE,ZTMP3_DEVICE,ZTMP4_DEVICE) +!$mnh_expand_array(JI=1:JIU,JJ=1:JJU,JK=1:JKU) + ZTMP1_DEVICE(:,:,:) = ZTMP1_DEVICE(:,:,:)*ZTMP2_DEVICE(:,:,:)+ & + ZTMP3_DEVICE(:,:,:)*ZTMP4_DEVICE(:,:,:) +!$mnh_end_expand_array() !$acc end kernels CALL MZM_DEVICE(ZTMP1_DEVICE,ZTMP2_DEVICE) !$acc kernels -#ifndef MNH_BITREP +!$mnh_expand_array(JI=1:JIU,JJ=1:JJU,JK=1:JKU) +#if !defined(MNH_BITREP) && !defined(MNH_BITREP_OMP) PRED2THR3(:,:,:)= PREDR1(:,:,:) * PREDTH1(:,:,:) & + XCTV**2*PBLL_O_E(:,:,:)**2 * & #else @@ -876,6 +844,7 @@ ELSE ! 3D case in a 3D model + BR_P2(XCTV)*BR_P2(PBLL_O_E(:,:,:)) * & #endif PEMOIST(:,:,:) * PETHETA(:,:,:) * ZTMP2_DEVICE(:,:,:) +!$mnh_end_expand_array() !$acc end kernels !$acc kernels ! async PRED2THR3(:,:,IKB)=PRED2THR3(:,:,IKB+KKL) @@ -888,7 +857,7 @@ ELSE ! 3D case in a 3D model call Print_msg( NVERB_WARNING, 'GEN', 'PRANDTL', 'OpenACC: L2D=.F. and KRR=0 not yet tested' ) #endif #ifndef MNH_OPENACC -#ifndef MNH_BITREP +#if !defined(MNH_BITREP) && !defined(MNH_BITREP_OMP) PRED2TH3(:,:,:) = PREDTH1(:,:,:)**2 + XCTV**2*PBLL_O_E(:,:,:)**2 * & MZM( GX_M_M(PTHLM,PDXX,PDZZ,PDZX)**2 & + GY_M_M(PTHLM,PDYY,PDZZ,PDZY)**2 ) @@ -901,31 +870,25 @@ call Print_msg( NVERB_WARNING, 'GEN', 'PRANDTL', 'OpenACC: L2D=.F. and KRR=0 not CALL GX_M_M_DEVICE(PTHLM,PDXX,PDZZ,PDZX,ZTMP1_DEVICE) CALL GY_M_M_DEVICE(PTHLM,PDYY,PDZZ,PDZY,ZTMP2_DEVICE) !$acc kernels -#ifndef MNH_BITREP -#ifdef MNH_COMPILER_NVHPC -!$acc loop independent collapse(3) -#endif - DO CONCURRENT ( JI=1:JIU,JJ=1:JJU,JK=1:JKU) - ZTMP1_DEVICE(JI,JJ,JK) = ZTMP1_DEVICE(JI,JJ,JK)**2 + ZTMP2_DEVICE(JI,JJ,JK)**2 - END DO +!$mnh_expand_array(JI=1:JIU,JJ=1:JJU,JK=1:JKU) +#if !defined(MNH_BITREP) && !defined(MNH_BITREP_OMP) + ZTMP1_DEVICE(:,:,:) = ZTMP1_DEVICE(:,:,:)**2 + ZTMP2_DEVICE(:,:,:)**2 #else -#ifdef MNH_COMPILER_NVHPC -!$acc loop independent collapse(3) -#endif - DO CONCURRENT ( JI=1:JIU,JJ=1:JJU,JK=1:JKU) - ZTMP1_DEVICE(JI,JJ,JK) = BR_P2(ZTMP1_DEVICE(JI,JJ,JK)) + BR_P2(ZTMP2_DEVICE(JI,JJ,JK)) - END DO + ZTMP1_DEVICE(:,:,:) = BR_P2(ZTMP1_DEVICE(:,:,:)) + BR_P2(ZTMP2_DEVICE(:,:,:)) #endif +!$mnh_end_expand_array() !$acc end kernels CALL MZM_DEVICE(ZTMP1_DEVICE,ZTMP2_DEVICE) !$acc kernels -#ifndef MNH_BITREP +!$mnh_expand_array(JI=1:JIU,JJ=1:JJU,JK=1:JKU) +#if !defined(MNH_BITREP) && !defined(MNH_BITREP_OMP) PRED2TH3(:,:,:) = PREDTH1(:,:,:)**2 + XCTV**2*PBLL_O_E(:,:,:)**2 & * ZTMP2_DEVICE(:,:,:) #else PRED2TH3(:,:,:) = BR_P2(PREDTH1(:,:,:)) + BR_P2(XCTV)*BR_P2(PBLL_O_E(:,:,:)) & * ZTMP2_DEVICE(:,:,:) #endif +!$mnh_end_expand_array() !$acc end kernels #endif !$acc kernels ! async @@ -972,7 +935,7 @@ ELSE IF (L2D) THEN ! 3D case in a 2D model ! #ifndef MNH_OPENACC IF (LOCEAN) THEN -#ifndef MNH_BITREP +#if !defined(MNH_BITREP) && !defined(MNH_BITREP_OMP) IF (KRR /= 0) THEN ZW1 = MZM((XG *XALPHAOC * PLM * PLEPS / PTKEM)**2 ) *PETHETA ELSE @@ -987,7 +950,7 @@ ELSE IF (L2D) THEN ! 3D case in a 2D model #endif ELSE DO JSV=1,ISV -#ifndef MNH_BITREP +#if !defined(MNH_BITREP) && !defined(MNH_BITREP_OMP) IF (KRR /= 0) THEN ZW1 = MZM( (XG / PTHVREF * PLM * PLEPS / PTKEM)**2 ) *PETHETA ELSE @@ -1003,11 +966,13 @@ ELSE IF (L2D) THEN ! 3D case in a 2D model #else IF (LOCEAN) THEN !$acc kernels -#ifndef MNH_BITREP - ZTMP1_DEVICE(:,:,:) = (XG * XALPHAOC *PLM * PLEPS / PTKEM)**2 +!$mnh_expand_array(JI=1:JIU,JJ=1:JJU,JK=1:JKU ) +#if !defined(MNH_BITREP) && !defined(MNH_BITREP_OMP) + ZTMP1_DEVICE(:,:,:) = (XG * XALPHAOC *PLM(:,:,:) * PLEPS(:,:,:) / PTKEM(:,:,:))**2 #else - ZTMP1_DEVICE(:,:,:) = BR_P2(XG * XALPHAOC *PLM * PLEPS / PTKEM) + ZTMP1_DEVICE(:,:,:) = BR_P2(XG * XALPHAOC *PLM(:,:,:) * PLEPS(:,:,:) / PTKEM(:,:,:)) #endif +!$mnh_end_expand_array() !$acc end kernels CALL MZM_DEVICE(ZTMP1_DEVICE,ZW1) IF (KRR /= 0) THEN @@ -1018,11 +983,13 @@ ELSE IF (L2D) THEN ! 3D case in a 2D model ELSE DO JSV=1,ISV !$acc kernels -#ifndef MNH_BITREP +!$mnh_expand_array(JI=1:JIU,JJ=1:JJU,JK=1:JKU) +#if !defined(MNH_BITREP) && !defined(MNH_BITREP_OMP) ZTMP1_DEVICE(:,:,:) = (XG / PTHVREF * PLM * PLEPS / PTKEM)**2 #else - ZTMP1_DEVICE(:,:,:) = BR_P2(XG / PTHVREF * PLM * PLEPS / PTKEM) + ZTMP1_DEVICE(:,:,:) = BR_P2(XG / PTHVREF(:,:,:) * PLM(:,:,:) * PLEPS(:,:,:) / PTKEM(:,:,:)) #endif +!$mnh_end_expand_array() !$acc end kernels CALL MZM_DEVICE(ZTMP1_DEVICE,ZW1) IF (KRR /= 0) THEN @@ -1081,7 +1048,7 @@ ELSE ! 3D case in a 3D model ! #ifndef MNH_OPENACC DO JSV=1,ISV -#ifndef MNH_BITREP +#if !defined(MNH_BITREP) && !defined(MNH_BITREP_OMP) IF (KRR /= 0) THEN ZW1 = MZM( (XG / PTHVREF * PLM * PLEPS / PTKEM)**2 ) *PETHETA ELSE @@ -1097,11 +1064,13 @@ ELSE ! 3D case in a 3D model #else DO JSV=1,ISV !$acc kernels -#ifndef MNH_BITREP +!$mnh_expand_array(JI=1:JIU,JJ=1:JJU,JK=1:JKU) +#if !defined(MNH_BITREP) && !defined(MNH_BITREP_OMP) ZTMP1_DEVICE(:,:,:) = (XG / PTHVREF * PLM * PLEPS / PTKEM)**2 #else - ZTMP1_DEVICE(:,:,:) = BR_P2(XG / PTHVREF * PLM * PLEPS / PTKEM) + ZTMP1_DEVICE(:,:,:) = BR_P2(XG / PTHVREF(:,:,:) * PLM(:,:,:) * PLEPS(:,:,:) / PTKEM(:,:,:)) #endif +!$mnh_end_expand_array() !$acc end kernels CALL MZM_DEVICE(ZTMP1_DEVICE,ZW1) IF (KRR /= 0) THEN diff --git a/src/MNH/rain_ice.f90 b/src/MNH/rain_ice.f90 index dcb3637c4..ab373a19e 100644 --- a/src/MNH/rain_ice.f90 +++ b/src/MNH/rain_ice.f90 @@ -1057,7 +1057,7 @@ IF( IMICRO >= 0 ) THEN END DO ! CONCURRENT #endif !ZLBDAR_RF will be used when we consider rain concentrated in its fraction -#ifndef MNH_BITREP +#if !defined(MNH_BITREP) && !defined(MNH_BITREP_OMP) !$acc loop independent DO CONCURRENT ( JL=1:IMICRO ) IF ( ZRRT(JL)>0.0 .AND. ZRF(JL)>0.0 ) THEN diff --git a/src/MNH/rain_ice_fast_rg.f90 b/src/MNH/rain_ice_fast_rg.f90 index cf0279cfa..9d3fd8ae8 100644 --- a/src/MNH/rain_ice_fast_rg.f90 +++ b/src/MNH/rain_ice_fast_rg.f90 @@ -54,9 +54,13 @@ use mode_tools, only: Countjv use mode_tools, only: Countjv_device #endif -#ifdef MNH_BITREP +#if defined(MNH_BITREP) || defined(MNH_BITREP_OMP) USE MODI_BITREP #endif +#ifdef MNH_BITREP_OMP +!$mnh_undef(LOOP) +!$mnh_undef(OPENACC) +#endif IMPLICIT NONE ! @@ -198,8 +202,8 @@ CALL MNH_MEM_GET( ZZW1, SIZE(PRHODREF), 7 ) !$acc kernels ZZW1(:,:) = 0.0 GWORK(:) = PRIT(:)>XRTMIN(4) .AND. PRRT(:)>XRTMIN(3) .AND. PRIS(:)>0.0 .AND. PRRS(:)>0.0 -#ifndef MNH_BITREP - !$acc_nv loop independent +#if !defined(MNH_BITREP) && !defined(MNH_BITREP_OMP) + !$acc loop independent DO CONCURRENT ( JL=1:JLU ) IF ( GWORK(JL) ) THEN ZZW1(JL,3) = MIN( PRIS(JL),XICFRR * PRIT(JL) & ! RICFRRG @@ -215,7 +219,7 @@ CALL MNH_MEM_GET( ZZW1, SIZE(PRHODREF), 7 ) END IF END DO ! CONCURRENT #else - !$acc_nv loop independent + !$acc loop independent DO CONCURRENT ( JL=1:JLU ) IF ( GWORK(JL) ) THEN ZZW1(JL,3) = MIN( PRIS(JL),XICFRR * PRIT(JL) & ! RICFRRG @@ -268,17 +272,20 @@ END IF !$acc kernels ZZW1(:,:) = 0.0 GWORK(:) = PRGT(:)>XRTMIN(6) .AND. PRCT(:)>XRTMIN(2) .AND. PRCS(:)>0.0 + !$mnh_expand_where(JL=1:JLU) WHERE( GWORK(:) ) -#ifndef MNH_BITREP +#if !defined(MNH_BITREP) && !defined(MNH_BITREP_OMP) ZZW(:) = PLBDAG(:)**(XCXG-XDG-2.0) * PRHODREF(:)**(-XCEXVT) #else ZZW(:) = BR_POW(PLBDAG(:),XCXG-XDG-2.0) * BR_POW(PRHODREF(:),-XCEXVT) #endif ZZW1(:,1) = MIN( PRCS(:),XFCDRYG * PRCT(:) * ZZW(:) ) ! RCDRYG END WHERE + !$mnh_end_expand_where() GWORK(:) = PRGT(:)>XRTMIN(6) .AND. PRIT(:)>XRTMIN(4) .AND. PRIS(:)>0.0 + !$mnh_expand_where(JL=1:JLU) WHERE( GWORK(:) ) -#ifndef MNH_BITREP +#if !defined(MNH_BITREP) && !defined(MNH_BITREP_OMP) ZZW(:) = PLBDAG(:)**(XCXG-XDG-2.0) * PRHODREF(:)**(-XCEXVT) ZZW1(:,2) = MIN( PRIS(:),XFIDRYG * EXP( XCOLEXIG*(PZT(:)-XTT) ) & * PRIT(:) * ZZW(:) ) ! RIDRYG @@ -288,6 +295,7 @@ END IF * PRIT(:) * ZZW(:) ) ! RIDRYG #endif END WHERE + !$mnh_end_expand_where() !$acc end kernels IF (MPPDB_INITIALIZED) THEN CALL MPPDB_CHECK(ZZW1,"RAIN_ICE_FAST_RG 6.2:ZZW1") @@ -341,8 +349,9 @@ END IF ! in the geometrical set of (Lbda_g,Lbda_s) couplet use to ! tabulate the SDRYG-kernel ! + !$mnh_expand_where(JL=1:IGDRY) ZVEC1(1:IGDRY) = MAX( 1.00001, MIN( REAL(NDRYLBDAG)-0.00001, & -#ifndef MNH_BITREP +#if !defined(MNH_BITREP) && !defined(MNH_BITREP_OMP) XDRYINTP1G * LOG( ZVECLBDAG(1:IGDRY) ) + XDRYINTP2G ) ) #else XDRYINTP1G * BR_LOG( ZVECLBDAG(1:IGDRY) ) + XDRYINTP2G ) ) @@ -351,18 +360,19 @@ END IF ZVEC1(1:IGDRY) = ZVEC1(1:IGDRY) - REAL( IVEC1(1:IGDRY) ) ! ZVEC2(1:IGDRY) = MAX( 1.00001, MIN( REAL(NDRYLBDAS)-0.00001, & -#ifndef MNH_BITREP +#if !defined(MNH_BITREP) && !defined(MNH_BITREP_OMP) XDRYINTP1S * LOG( ZVECLBDAS(1:IGDRY) ) + XDRYINTP2S ) ) #else XDRYINTP1S * BR_LOG( ZVECLBDAS(1:IGDRY) ) + XDRYINTP2S ) ) #endif IVEC2(1:IGDRY) = INT( ZVEC2(1:IGDRY) ) ZVEC2(1:IGDRY) = ZVEC2(1:IGDRY) - REAL( IVEC2(1:IGDRY) ) -! + !$mnh_end_expand_where() + ! !* 6.2.5 perform the bilinear interpolation of the normalized ! SDRYG-kernel ! - !$acc_nv loop independent + !$acc loop independent DO CONCURRENT ( JJ = 1:IGDRY ) ZVEC3(JJ) = ( XKER_SDRYG(IVEC1(JJ)+1,IVEC2(JJ)+1)* ZVEC2(JJ) & - XKER_SDRYG(IVEC1(JJ)+1,IVEC2(JJ) )*(ZVEC2(JJ) - 1.0) ) & @@ -372,10 +382,10 @@ END IF * (ZVEC1(JJ) - 1.0) END DO ! CONCURRENT ! - !$acc_nv loop independent , private (JL) + !$acc loop independent , private (JL) DO CONCURRENT (JJ=1:IGDRY) JL = I1(JJ) -#ifndef MNH_BITREP +#if !defined(MNH_BITREP) && !defined(MNH_BITREP_OMP) ZZW1(JL,3) = MIN( PRSS(JL),XFSDRYG*ZVEC3(JJ) & ! RSDRYG * EXP( XCOLEXSG*(PZT(JL)-XTT) ) & * ZVECLBDAS(JJ)**(XCXS-XBS) * ZVECLBDAG(JJ)**XCXG & @@ -461,8 +471,9 @@ END IF ! in the geometrical set of (Lbda_g,Lbda_r) couplet use to ! tabulate the RDRYG-kernel ! + !$mnh_expand_where(JL=1:IGDRY) ZVEC1(1:IGDRY) = MAX( 1.00001, MIN( REAL(NDRYLBDAG)-0.00001, & -#ifndef MNH_BITREP +#if !defined(MNH_BITREP) && !defined(MNH_BITREP_OMP) XDRYINTP1G * LOG( ZVECLBDAG(1:IGDRY) ) + XDRYINTP2G ) ) #else XDRYINTP1G * BR_LOG( ZVECLBDAG(1:IGDRY) ) + XDRYINTP2G ) ) @@ -471,18 +482,19 @@ END IF ZVEC1(1:IGDRY) = ZVEC1(1:IGDRY) - REAL( IVEC1(1:IGDRY) ) ! ZVEC2(1:IGDRY) = MAX( 1.00001, MIN( REAL(NDRYLBDAR)-0.00001, & -#ifndef MNH_BITREP +#if !defined(MNH_BITREP) && !defined(MNH_BITREP_OMP) XDRYINTP1R * LOG( ZVECLBDAR(1:IGDRY) ) + XDRYINTP2R ) ) #else XDRYINTP1R * BR_LOG( ZVECLBDAR(1:IGDRY) ) + XDRYINTP2R ) ) #endif IVEC2(1:IGDRY) = INT( ZVEC2(1:IGDRY) ) ZVEC2(1:IGDRY) = ZVEC2(1:IGDRY) - REAL( IVEC2(1:IGDRY) ) + !$mnh_end_expand_where() ! !* 6.2.10 perform the bilinear interpolation of the normalized ! RDRYG-kernel ! - !$acc_nv loop independent + !$acc loop independent DO CONCURRENT (JJ = 1:IGDRY ) ZVEC3(JJ) = ( XKER_RDRYG(IVEC1(JJ)+1,IVEC2(JJ)+1)* ZVEC2(JJ) & - XKER_RDRYG(IVEC1(JJ)+1,IVEC2(JJ) )*(ZVEC2(JJ) - 1.0) ) & @@ -492,10 +504,10 @@ END IF * (ZVEC1(JJ) - 1.0) END DO ! CONCURRENT ! - !$acc_nv loop independent , private (JL) + !$acc loop independent , private (JL) DO CONCURRENT (JJ=1:IGDRY) JL = I1(JJ) -#ifndef MNH_BITREP +#if !defined(MNH_BITREP) && !defined(MNH_BITREP_OMP) ZZW1(JL,4) = MIN( PRRS(JL),XFRDRYG*ZVEC3(JJ) & ! RRDRYG * ZVECLBDAR(JJ)**(-4) * ZVECLBDAG(JJ)**XCXG & * PRHODREF(JL)**(-XCEXVT-1.) & @@ -538,8 +550,8 @@ END IF ! PRWETG(:) = 0.0 GWORK(:) = PRGT(:)>XRTMIN(6) -#ifndef MNH_BITREP -!$acc_nv loop independent +#if !defined(MNH_BITREP) && !defined(MNH_BITREP_OMP) +!$acc loop independent DO CONCURRENT ( JL=1:JLU ) IF ( GWORK(JL) ) THEN ZZW1(JL,5) = MIN( PRIS(JL), & @@ -563,7 +575,7 @@ END IF END IF END DO ! CONCURRENT #else -!$acc_nv loop independent +!$acc loop independent DO CONCURRENT ( JL=1:JLU ) IF ( GWORK(JL) ) THEN ZZW1(JL,5) = MIN( PRIS(JL), & @@ -591,7 +603,7 @@ END IF !* 6.4 Select Wet or Dry case ! IF ( KRR == 7 ) THEN -!$acc_nv loop independent +!$acc loop independent DO CONCURRENT (JL=1:JLU) GWORK(JL) = PRGT(JL)>XRTMIN(6) .AND. PZT(JL)<XTT .and. PRDRYG(JL)>=PRWETG(JL) .AND. PRWETG(JL)>0.0 ! Wet case IF( GWORK(JL) )THEN @@ -709,8 +721,8 @@ END IF ! !$acc kernels GWORK(:) = PRGT(:)>XRTMIN(6) .AND. PRGS(:)>0.0 .AND. PZT(:)>XTT -#ifndef MNH_BITREP - !$acc_nv loop independent +#if !defined(MNH_BITREP) && !defined(MNH_BITREP_OMP) + !$acc loop independent DO CONCURRENT ( JL=1:JLU ) IF ( GWORK(JL) ) THEN ZZW(JL) = PRVT(JL)*PPRES(JL)/((XMV/XMD)+PRVT(JL)) ! Vapor pressure @@ -732,7 +744,7 @@ END IF END IF END DO ! CONCURRENT #else - !$acc_nv loop independent + !$acc loop independent DO CONCURRENT ( JL=1:JLU ) IF ( GWORK(JL) ) THEN ZZW(JL) = PRVT(JL)*PPRES(JL)/((XMV/XMD)+PRVT(JL)) ! Vapor pressure diff --git a/src/MNH/rain_ice_fast_ri.f90 b/src/MNH/rain_ice_fast_ri.f90 index f27f0edee..bf08fef05 100644 --- a/src/MNH/rain_ice_fast_ri.f90 +++ b/src/MNH/rain_ice_fast_ri.f90 @@ -37,7 +37,7 @@ USE MODE_MNH_ZWORK, ONLY: MNH_MEM_GET, MNH_MEM_POSITION_PIN, MNH_MEM_RELEASE #endif use mode_mppdb -#ifdef MNH_BITREP +#if defined(MNH_BITREP) || defined(MNH_BITREP_OMP) USE MODI_BITREP #endif @@ -157,7 +157,7 @@ CALL MNH_MEM_GET( ZLBEXI, SIZE(PRHODREF) ) !$acc kernels zzw(:) = 0. GWORK(:) = PRCS(:)>0.0 .AND. PSSI(:)>0.0 .AND. PRIT(:)>XRTMIN(4) .AND. PCIT(:)>0.0 -#ifndef MNH_BITREP +#if !defined(MNH_BITREP) && !defined(MNH_BITREP_OMP) WHERE( GWORK(:) ) ZZW(:) = MIN(1.E8,XLBI*( PRHODREF(:)*PRIT(:)/PCIT(:) )**XLBEXI) ! Lbda_i ZZW(:) = MIN( PRCS(:),( PSSI(:) / (PRHODREF(:)*PAI(:)) ) * PCIT(:) * & @@ -169,27 +169,27 @@ CALL MNH_MEM_GET( ZLBEXI, SIZE(PRHODREF) ) #else !!$ Le DO concurrent n'est pas bit-reproductible BUG NVHPC 20.7 -!!$ DO CONCURRENT ( JL=1:JLU ) -!!$ ZLBEXI(JL) = XLBEXI -!!$ IF ( GWORK(JL) ) THEN -!!$ ZZW(JL) = MIN(1.E8,XLBI*BR_POW( PRHODREF(JL)*PRIT(JL)/PCIT(JL), ZLBEXI(JL) ) ) ! Lbda_i -!!$ ZZW(JL) = MIN( PRCS(JL),( PSSI(JL) / (PRHODREF(JL)*PAI(JL)) ) * PCIT(JL) * & -!!$ ( X0DEPI/ZZW(JL) + X2DEPI*PCJ(JL)*PCJ(JL)/BR_POW(ZZW(JL),XDI+2.0) ) ) -!!$ PRCS(JL) = PRCS(JL) - ZZW(JL) -!!$ PRIS(JL) = PRIS(JL) + ZZW(JL) -!!$ PTHS(JL) = PTHS(JL) + ZZW(JL)*(PLSFACT(JL)-PLVFACT(JL)) ! f(L_f*(RCBERI)) -!!$ END IF -!!$ END DO ! CONCURRENT + DO CONCURRENT ( JL=1:JLU ) + ZLBEXI(JL) = XLBEXI + IF ( GWORK(JL) ) THEN + ZZW(JL) = MIN(1.E8,XLBI*BR_POW( PRHODREF(JL)*PRIT(JL)/PCIT(JL), ZLBEXI(JL) ) ) ! Lbda_i + ZZW(JL) = MIN( PRCS(JL),( PSSI(JL) / (PRHODREF(JL)*PAI(JL)) ) * PCIT(JL) * & + ( X0DEPI/ZZW(JL) + X2DEPI*PCJ(JL)*PCJ(JL)/BR_POW(ZZW(JL),XDI+2.0) ) ) + PRCS(JL) = PRCS(JL) - ZZW(JL) + PRIS(JL) = PRIS(JL) + ZZW(JL) + PTHS(JL) = PTHS(JL) + ZZW(JL)*(PLSFACT(JL)-PLVFACT(JL)) ! f(L_f*(RCBERI)) + END IF + END DO ! CONCURRENT - WHERE( GWORK(:) ) - ZLBEXI(:) = XLBEXI - ZZW(:) = MIN(1.E8,XLBI*BR_POW( PRHODREF(:)*PRIT(:)/PCIT(:), ZLBEXI(:) ) ) ! Lbda_i - ZZW(:) = MIN( PRCS(:),( PSSI(:) / (PRHODREF(:)*PAI(:)) ) * PCIT(:) * & - ( X0DEPI/ZZW(:) + X2DEPI*PCJ(:)*PCJ(:)/BR_POW(ZZW(:),XDI+2.0) ) ) - PRCS(:) = PRCS(:) - ZZW(:) - PRIS(:) = PRIS(:) + ZZW(:) - PTHS(:) = PTHS(:) + ZZW(:)*(PLSFACT(:)-PLVFACT(:)) ! f(L_f*(RCBERI)) - END WHERE +!!! WHERE( GWORK(:) ) + !!!! ZLBEXI(:) = XLBEXI + !!!!ZZW(:) = MIN(1.E8,XLBI*BR_POW( PRHODREF(:)*PRIT(:)/PCIT(:), ZLBEXI(:) ) ) ! Lbda_i + !!!!ZZW(:) = MIN( PRCS(:),( PSSI(:) / (PRHODREF(:)*PAI(:)) ) * PCIT(:) * & + !!! ( X0DEPI/ZZW(:) + X2DEPI*PCJ(:)*PCJ(:)/BR_POW(ZZW(:),XDI+2.0) ) ) + !!!PRCS(:) = PRCS(:) - ZZW(:) + !!!!PRIS(:) = PRIS(:) + ZZW(:) + !!!!PTHS(:) = PTHS(:) + ZZW(:)*(PLSFACT(:)-PLVFACT(:)) ! f(L_f*(RCBERI)) + !!!!END WHERE #endif !$acc end kernels diff --git a/src/MNH/rain_ice_fast_rs.f90 b/src/MNH/rain_ice_fast_rs.f90 index fae5552bd..88462be86 100644 --- a/src/MNH/rain_ice_fast_rs.f90 +++ b/src/MNH/rain_ice_fast_rs.f90 @@ -53,7 +53,7 @@ use mode_tools, only: Countjv use mode_tools, only: Countjv_device #endif -#ifdef MNH_BITREP +#if defined(MNH_BITREP) || defined(MNH_BITREP_OMP) USE MODI_BITREP #endif @@ -224,7 +224,12 @@ CALL COUNTJV_DEVICE( GWORK(:), I1(:), IGRIM ) ! ! 5.1.1 select the PLBDAS ! +#ifdef MNH_COMPILER_CCE +! loop with indirection don't parallelize with Cray -> Keep do concurrent +!$mnh_undef(LOOP) +#endif !$acc kernels +!$mnh_expand_array(JJ = 1:IGRIM) ZVECLBDAS(1:IGRIM) = PLBDAS(I1(1:IGRIM)) ! ! 5.1.2 find the next lower indice for the PLBDAS in the geometrical @@ -232,13 +237,14 @@ CALL COUNTJV_DEVICE( GWORK(:), I1(:), IGRIM ) ! gamma function ! ZVEC2(1:IGRIM) = MAX( 1.00001, MIN( REAL(NGAMINC)-0.00001, & -#ifndef MNH_BITREP +#if !defined(MNH_BITREP) && !defined(MNH_BITREP_OMP) XRIMINTP1 * LOG( ZVECLBDAS(1:IGRIM) ) + XRIMINTP2 ) ) #else XRIMINTP1 * BR_LOG( ZVECLBDAS(1:IGRIM) ) + XRIMINTP2 ) ) #endif IVEC2(1:IGRIM) = INT( ZVEC2(1:IGRIM) ) ZVEC2(1:IGRIM) = ZVEC2(1:IGRIM) - REAL( IVEC2(1:IGRIM) ) +!$mnh_end_expand_array() ! ! 5.1.3 perform the linear interpolation of the normalized ! "2+XDS"-moment of the incomplete gamma function @@ -254,7 +260,7 @@ CALL COUNTJV_DEVICE( GWORK(:), I1(:), IGRIM ) !$acc loop independent , private (JL) DO CONCURRENT ( JJ = 1:IGRIM ) JL = I1(JJ) -#ifndef MNH_BITREP +#if !defined(MNH_BITREP) && !defined(MNH_BITREP_OMP) ZZW1(JJ) = MIN( PRCS(JL), & XCRIMSS * ZVEC1(JJ) * PRCT(JL) & ! RCRIMSS * ZVECLBDAS(JJ)**XEXCRIMSS & @@ -295,7 +301,7 @@ END IF DO CONCURRENT (JJ = 1:IGRIM ) JL = I1(JJ) IF ( PRSS(JL) > 0.0 ) THEN -#ifndef MNH_BITREP +#if !defined(MNH_BITREP) && !defined(MNH_BITREP_OMP) ZZW2(JJ) = MIN( PRCS(JL), & XCRIMSG * PRCT(JL) & ! RCRIMSG * ZVECLBDAS(JJ)**XEXCRIMSG & @@ -413,6 +419,7 @@ END IF ! 5.2.1 select the (PLBDAS,PLBDAR) couplet ! !$acc kernels +!$mnh_expand_array(JJ = 1:IGACC) ZVECLBDAS(1:IGACC) = PLBDAS(I1(1:IGACC)) ZVECLBDAR(1:IGACC) = PLBDAR(I1(1:IGACC)) ! @@ -421,7 +428,7 @@ END IF ! tabulate the RACCSS-kernel ! ZVEC1(1:IGACC) = MAX( 1.00001, MIN( REAL(NACCLBDAS)-0.00001, & -#ifndef MNH_BITREP +#if !defined(MNH_BITREP) && !defined(MNH_BITREP_OMP) XACCINTP1S * LOG( ZVECLBDAS(1:IGACC) ) + XACCINTP2S ) ) #else XACCINTP1S * BR_LOG( ZVECLBDAS(1:IGACC) ) + XACCINTP2S ) ) @@ -430,13 +437,14 @@ END IF ZVEC1(1:IGACC) = ZVEC1(1:IGACC) - REAL( IVEC1(1:IGACC) ) ! ZVEC2(1:IGACC) = MAX( 1.00001, MIN( REAL(NACCLBDAR)-0.00001, & -#ifndef MNH_BITREP +#if !defined(MNH_BITREP) && !defined(MNH_BITREP_OMP) XACCINTP1R * LOG( ZVECLBDAR(1:IGACC) ) + XACCINTP2R ) ) #else XACCINTP1R * BR_LOG( ZVECLBDAR(1:IGACC) ) + XACCINTP2R ) ) #endif IVEC2(1:IGACC) = INT( ZVEC2(1:IGACC) ) ZVEC2(1:IGACC) = ZVEC2(1:IGACC) - REAL( IVEC2(1:IGACC) ) +!$mnh_end_expand_array() ! ! 5.2.3 perform the bilinear interpolation of the normalized ! RACCSS-kernel @@ -456,7 +464,7 @@ END IF !$acc loop independent , private (JL) DO CONCURRENT ( JJ = 1:IGACC ) JL = I1(JJ) -#ifndef MNH_BITREP +#if !defined(MNH_BITREP) && !defined(MNH_BITREP_OMP) ZZW2(JJ) = & !! coef of RRACCS XFRACCSS * ZVECLBDAS(JJ)**XCXS * PRHODREF(JL)**(-XCEXVT-1.) & *( XLBRACCS1 / ZVECLBDAS(JJ)**2 & @@ -498,10 +506,8 @@ END IF - ( XKER_RACCS(IVEC2(JJ) ,IVEC1(JJ)+1)* ZVEC1(JJ) & - XKER_RACCS(IVEC2(JJ) ,IVEC1(JJ) )*(ZVEC1(JJ) - 1.0) ) & * (ZVEC2(JJ) - 1.0) - END DO ! CONCURRENT - DO JJ = 1, IGACC ZZW2(JJ) = ZZW2(JJ) * ZVEC3(JJ) - END DO + END DO ! CONCURRENT !! RRACCS! ! 5.2.5 perform the bilinear interpolation of the normalized ! SACCRG-kernel @@ -525,7 +531,7 @@ END IF IF ( PRSS(JL) > 0.0 ) THEN ZZW2(JJ) = MAX( MIN( PRRS(JL),ZZW2(JJ)-ZZW4(JJ) ),0.0 ) ! RRACCSG IF ( ZZW2(JJ) > 0.0 ) THEN -#ifndef MNH_BITREP +#if !defined(MNH_BITREP) && !defined(MNH_BITREP_OMP) ZZW3(JJ) = MIN( PRSS(JL),XFSACCRG*ZVEC3(JJ)* & ! RSACCRG ZVECLBDAS(JJ)**(XCXS-XBS) * PRHODREF(JL)**(-XCEXVT-1.) & * ( XLBSACCR1 / ZVECLBDAR(JJ)**2 & @@ -593,7 +599,7 @@ END IF ! ! compute RSMLT ! -#ifndef MNH_BITREP +#if !defined(MNH_BITREP) && !defined(MNH_BITREP_OMP) ZZW(JJ) = MIN( PRSS(JJ), XFSCVMG*MAX( 0.0,( -ZZW(JJ) * & ( X0DEPS* PLBDAS(JJ)**XEX0DEPS + & X1DEPS*PCJ(JJ)*PLBDAS(JJ)**XEX1DEPS ) ) / & diff --git a/src/MNH/rain_ice_sedimentation_split.f90 b/src/MNH/rain_ice_sedimentation_split.f90 index 82688b8ea..ab5091edf 100644 --- a/src/MNH/rain_ice_sedimentation_split.f90 +++ b/src/MNH/rain_ice_sedimentation_split.f90 @@ -306,7 +306,8 @@ if ( lbudget_rh ) call Budget_store_init( tbudgets(NBUDGET_RH), 'SE ! ! O. Initialization of for sedimentation ! -!$acc kernels +!$acc kernels present_cr(ZOMPSEA,ZTMP1_2D,zconc_tmp,ztmp3_2d,ztmp2_2d,ztmp4_2d,ZLBC,ZFSEDC) & +!$acc present_cr(zconc3d,zray,zprrs,zprss) ZINVTSTEP=1./PTSTEP ZTSPLITR= PTSTEP / REAL(KSPLITR) ! @@ -326,22 +327,20 @@ IF ( OSEDIC ) THEN ZTMP2 = 0.5 * GAMMA( XNUC2 + 1.0 / XALPHAC2 ) / ( GAMMA( XNUC2 ) ) IF ( GPRESENT_PSEA ) THEN - !$acc_nv loop independent collapse(2) - DO CONCURRENT ( JI=1:SIZE( PRCS, 1 ) , JJ = 1: SIZE( PRCS, 2 ) ) + !$mnh_do_concurrent( JI=1:IIU , JJ=1:IJU ) ZOMPSEA (JI,JJ) = 1.-PSEA(JI,JJ) ZTMP1_2D (JI,JJ) = PSEA(JI,JJ)*XLBC(2) +ZOMPSEA(JI,JJ)*XLBC(1) ZTMP2_2D (JI,JJ) = PSEA(JI,JJ)*XFSEDC(2)+ZOMPSEA(JI,JJ)*XFSEDC(1) ZCONC_TMP(JI,JJ) = PSEA(JI,JJ)*XCONC_SEA+ZOMPSEA(JI,JJ)*XCONC_LAND ZTMP3_2D (JI,JJ) = (1.-PTOWN(JI,JJ))*ZCONC_TMP(JI,JJ)+PTOWN(JI,JJ)*XCONC_URBAN ZTMP4_2D (JI,JJ) = MAX( 1. , ZOMPSEA(JI,JJ)*ZTMP1 + PSEA(JI,JJ)*ZTMP2 ) - END DO - !$acc_nv loop independent collapse(3) - DO CONCURRENT ( JI=1:SIZE( PRCS, 1 ) , JJ = 1:SIZE( PRCS, 2 ) , JK=KKTB:KKTE ) + !$mnh_end_do() + !$mnh_do_concurrent( JI=1:IIU , JJ=1:IJU , JK=KKTB:KKTE ) ZLBC (JI,JJ,JK) = ZTMP1_2D(JI,JJ) ZFSEDC (JI,JJ,JK) = ZTMP2_2D(JI,JJ) ZCONC3D(JI,JJ,JK) = ZTMP3_2D(JI,JJ) ZRAY (JI,JJ,JK) = ZTMP4_2D(JI,JJ) - END DO + !$mnh_end_do() ELSE ZLBC (:,:,:) = XLBC(1) ZFSEDC (:,:,:) = XFSEDC(1) @@ -383,21 +382,19 @@ PRSS(:,:,:) = PRST(:,:,:)* ZINVTSTEP PRGS(:,:,:) = PRGT(:,:,:)* ZINVTSTEP IF ( KRR == 7 ) PRHS(:,:,:) = PRHT(:,:,:)* ZINVTSTEP #else -!$acc_nv loop collapse(3) independent -DO CONCURRENT ( JI=1:SIZE( PRRS, 1 ) , JJ=1:SIZE( PRRS, 2 ) , JK=1:SIZE( PRRS, 3 ) ) +!$mnh_do_concurrent( JI=1:IIU , JJ=1:IJU , JK=1:IKU ) ZPRRS(JI,JJ,JK) = PRRS(JI,JJ,JK) - PRRT(JI,JJ,JK) * ZINVTSTEP ZPRSS(JI,JJ,JK) = PRSS(JI,JJ,JK) - PRST(JI,JJ,JK) * ZINVTSTEP ZPRGS(JI,JJ,JK) = PRGS(JI,JJ,JK) - PRGT(JI,JJ,JK) * ZINVTSTEP PRRS (JI,JJ,JK) = PRRT(JI,JJ,JK) * ZINVTSTEP PRSS (JI,JJ,JK) = PRST(JI,JJ,JK) * ZINVTSTEP PRGS (JI,JJ,JK) = PRGT(JI,JJ,JK) * ZINVTSTEP -END DO +!$mnh_end_do() IF ( KRR == 7 ) THEN -!$acc_nv loop collapse(3) independent -DO CONCURRENT ( JI=1:SIZE( PRRS, 1 ) , JJ=1:SIZE( PRRS, 2 ) , JK=1:SIZE( PRRS, 3 ) ) +!$mnh_do_concurrent( JI=1:IIU , JJ=1:IJU , JK=1:IKU ) ZPRHS(JI,JJ,JK) = PRHS(JI,JJ,JK) - PRHT(JI,JJ,JK) * ZINVTSTEP PRHS (JI,JJ,JK) = PRHT(JI,JJ,JK) * ZINVTSTEP -END DO +!$mnh_end_do() END IF #endif !$acc end kernels @@ -406,7 +403,7 @@ END IF ! step ! DO JN = 1 , KSPLITR -!$acc kernels +!$acc kernels present_cr(gsedimc,gsedimr,gsedimi,gsedims,gsedimg,gsedimh) IF( JN == 1 ) THEN IF ( OSEDIC ) PRCS(:,:,:) = PRCS(:,:,:) + ZPRCS(:,:,:) / KSPLITR PRRS(:,:,:) = PRRS(:,:,:) + ZPRRS(:,:,:) / KSPLITR @@ -424,8 +421,7 @@ DO JN = 1 , KSPLITR IF ( KRR == 7 ) PRHS(:,:,:) = PRHS(:,:,:) + ZPRHS(:,:,:) * ZTSPLITR END IF ! - !$acc_nv loop independent collapse(3) - DO CONCURRENT ( JI=KIB:KIE,JJ=KJB:KJE,JK=KKTB:KKTE ) + !$mnh_do_concurrent( JI=KIB:KIE,JJ=KJB:KJE,JK=KKTB:KKTE ) IF ( OSEDIC ) GSEDIMC(JI,JJ,JK) = & PRCS(JI,JJ,JK) > ZRTMIN(2) GSEDIMR(JI,JJ,JK) = & @@ -438,7 +434,7 @@ DO JN = 1 , KSPLITR PRGS(JI,JJ,JK) > ZRTMIN(6) IF ( KRR == 7 ) GSEDIMH(JI,JJ,JK) = & PRHS(JI,JJ,JK) > ZRTMIN(7) - END DO ! CONCURRENT + !$mnh_end_do() ! CONCURRENT !$acc end kernels ! #ifndef MNH_OPENACC @@ -463,7 +459,7 @@ DO JN = 1 , KSPLITR IF ( OSEDIC ) THEN ZWSED(:,:,:) = 0. IF( JN==1 ) PRCS(:,:,:) = PRCS(:,:,:) * PTSTEP -!$acc_nv loop independent + !$acc loop independent DO CONCURRENT (JL=1:ISEDIMC) ZRSLOC = PRCS(IC1(JL),IC2(JL),IC3(JL)) ZRTLOC = PRCT(IC1(JL),IC2(JL),IC3(JL)) @@ -493,13 +489,14 @@ DO JN = 1 , KSPLITR #else ZZT = ZZT * BR_POW(ZPRES/XP00,XRD/XCPD) #endif - ZWLBDA = 6.6E-8*(101325./ZPRES)*(ZZT/293.15) +!!$ ZWLBDA = 6.6E-8*(101325./ZPRES)*(ZZT/293.15) + ZWLBDA = 2281.238e-8*(ZZT/ZPRES) ZCC = XCC*(1.+1.26*ZWLBDA/ZRAY1D) !! XCC modified for cloud #if !defined(MNH_BITREP) && !defined(MNH_BITREP_OMP) - ZWSED (IC1(JL),IC2(JL),IC3(JL)) = ZRHODREFLOC**(-XCEXVT +1 ) & + ZWSED (IC1(JL),IC2(JL),IC3(JL)) = ZRHODREFLOC**(-XCEXVT +1.0 ) & * ZWLBDC**(-XDC)*ZCC*ZFSEDC1D * ZRSLOC #else - ZWSED (IC1(JL),IC2(JL),IC3(JL)) = BR_POW(ZRHODREFLOC,-XCEXVT +1 ) & + ZWSED (IC1(JL),IC2(JL),IC3(JL)) = BR_POW(ZRHODREFLOC,-XCEXVT +1.0 ) & * BR_POW(ZWLBDC,-XDC)*ZCC*ZFSEDC1D * ZRSLOC #endif END IF @@ -522,7 +519,7 @@ DO JN = 1 , KSPLITR ! IF( JN==1 ) PRRS(:,:,:) = PRRS(:,:,:) * PTSTEP ZWSED(:,:,:) = 0. -!$acc_nv loop independent +!$acc loop independent DO CONCURRENT (JL=1:ISEDIMR) ZRSLOC = PRRS(IR1(JL),IR2(JL),IR3(JL)) IF( ZRSLOC > ZRTMIN(3) ) THEN @@ -555,7 +552,7 @@ DO JN = 1 , KSPLITR ! IF( JN==1 ) PRIS(:,:,:) = PRIS(:,:,:) * PTSTEP ZWSED(:,:,:) = 0. -!$acc_nv loop independent + !$acc loop independent DO CONCURRENT (JL=1:ISEDIMI) ZRSLOC = PRIS(II1(JL),II2(JL),II3(JL)) IF( ZRSLOC > MAX(ZRTMIN(4),1.0E-7 )) THEN ! limitation of the McF&H formula @@ -589,7 +586,7 @@ DO JN = 1 , KSPLITR ! IF( JN==1 ) PRSS(:,:,:) = PRSS(:,:,:) * PTSTEP ZWSED(:,:,:) = 0. -!$acc_nv loop independent + !$acc loop independent DO CONCURRENT (JL=1:ISEDIMS) ZRSLOC = PRSS(IS1(JL),IS2(JL),IS3(JL)) IF( ZRSLOC > ZRTMIN(5) ) THEN @@ -620,7 +617,7 @@ DO JN = 1 , KSPLITR ! ZWSED(:,:,:) = 0. IF( JN==1 ) PRGS(:,:,:) = PRGS(:,:,:) * PTSTEP -!$acc_nv loop independent + !$acc loop independent DO CONCURRENT (JL=1:ISEDIMG) ZRSLOC = PRGS(IG1(JL),IG2(JL),IG3(JL)) IF( ZRSLOC > ZRTMIN(6) ) THEN @@ -652,7 +649,7 @@ DO JN = 1 , KSPLITR IF ( KRR == 7 ) THEN IF( JN==1 ) PRHS(:,:,:) = PRHS(:,:,:) * PTSTEP ZWSED(:,:,:) = 0. -!$acc_nv loop independent + !$acc loop independent DO CONCURRENT (JL=1:ISEDIMH) ZRSLOC = PRHS(IH1(JL),IH2(JL),IH3(JL)) IF( ZRSLOC > ZRTMIN(7) ) THEN diff --git a/src/MNH/rain_ice_warm.f90 b/src/MNH/rain_ice_warm.f90 index 205138b73..a222b8231 100644 --- a/src/MNH/rain_ice_warm.f90 +++ b/src/MNH/rain_ice_warm.f90 @@ -40,8 +40,7 @@ USE MODE_MNH_ZWORK, ONLY: MNH_MEM_GET, MNH_MEM_POSITION_PIN, MNH_MEM_RELEAS #endif use mode_mppdb use MODE_MSG - -#ifdef MNH_BITREP +#if defined(MNH_BITREP) || defined(MNH_BITREP_OMP) USE MODI_BITREP #endif @@ -207,7 +206,7 @@ ENDDO DO CONCURRENT ( JL=1:JLU ) IF ( GWORK(JL) ) THEN ZZW(JL) = MIN( PRCS(JL), XFCACCR * PRCT(JL) & -#ifndef MNH_BITREP +#if !defined(MNH_BITREP) && !defined(MNH_BITREP_OMP) * PLBDAR(JL)**XEXCACCR & * PRHODREF(JL)**(-XCEXVT) ) #else @@ -239,7 +238,7 @@ ENDDO IF ( GWORK(JL) ) THEN !Accretion due to rain falling in high cloud content ZZW(JL) = XFCACCR * ( PHLC_HRC(JL)/PHLC_HCF(JL) ) & -#ifndef MNH_BITREP +#if !defined(MNH_BITREP) && !defined(MNH_BITREP_OMP) * PLBDAR_RF(JL)**XEXCACCR & * PRHODREF(JL)**(-XCEXVT) & #else @@ -254,7 +253,7 @@ ENDDO DO CONCURRENT ( JL=1:JLU ) IF ( GWORK(JL) ) THEN !We add acrretion due to rain falling in low cloud content -#ifndef MNH_BITREP +#if !defined(MNH_BITREP) && !defined(MNH_BITREP_OMP) ZZW(JL) = ZZW(JL) + XFCACCR * ( PHLC_LRC(JL)/PHLC_LCF(JL) ) & * PLBDAR_RF(JL)**XEXCACCR & * PRHODREF(JL)**(-XCEXVT) & @@ -291,7 +290,7 @@ ENDDO !$acc kernels !Evaporation only when there's no cloud (RC must be 0) GWORK(:) = PRRT(:)>XRTMIN(3) .AND. PRCT(:)<=XRTMIN(2) -#ifndef MNH_BITREP +#if !defined(MNH_BITREP) && !defined(MNH_BITREP_OMP) WHERE( GWORK(:) ) ZZW(:) = EXP( XALPW - XBETAW/PZT(:) - XGAMW*ALOG(PZT(:) ) ) ! es_w PUSW(:) = 1.0 - PRVT(:)*( PPRES(:)-ZZW(:) ) / ( (XMV/XMD) * ZZW(:) ) @@ -308,21 +307,21 @@ ENDDO !$acc loop independent DO CONCURRENT ( JL=1:JLU ) IF ( GWORK(JL) ) THEN -#ifndef MNH_BITREP +#if !defined(MNH_BITREP) && !defined(MNH_BITREP_OMP) ZZW(JL) = EXP( XALPW - XBETAW/PZT(JL) - XGAMW*LOG(PZT(JL) ) ) ! es_w #else ZZW(JL) = BR_EXP( XALPW - XBETAW/PZT(JL) - XGAMW*BR_LOG(PZT(JL) ) ) ! es_w #endif PUSW(JL) = 1.0 - PRVT(JL)*( PPRES(JL)-ZZW(JL) ) / ( (XMV/XMD) * ZZW(JL) ) ! Undersaturation over water -#ifndef MNH_BITREP +#if !defined(MNH_BITREP) && !defined(MNH_BITREP_OMP) ZZW(JL) = ( XLVTT+(XCPV-XCL)*(PZT(JL)-XTT) )**2 / ( PKA(JL)*XRV*PZT(JL)**2 ) & #else ZZW(JL) = BR_P2( XLVTT+(XCPV-XCL)*(PZT(JL)-XTT) ) / ( PKA(JL)*XRV*BR_P2(PZT(JL)) ) & #endif + ( XRV*PZT(JL) ) / ( PDV(JL)*ZZW(JL) ) ZZW(JL) = MIN( PRRS(JL),( MAX( 0.0,PUSW(JL) )/(PRHODREF(JL)*ZZW(JL)) ) * & -#ifndef MNH_BITREP +#if !defined(MNH_BITREP) && !defined(MNH_BITREP_OMP) ( X0EVAR*PLBDAR(JL)**XEX0EVAR+X1EVAR*PCJ(JL)*PLBDAR(JL)**XEX1EVAR ) ) #else ( X0EVAR*BR_POW(PLBDAR(JL),XEX0EVAR)+X1EVAR*PCJ(JL)*BR_POW(PLBDAR(JL),XEX1EVAR) ) ) @@ -371,7 +370,7 @@ IF (CSUBG_RR_EVAP=='CLFR') GCSUBG_RR_EVAP=.true. ZZW2(JL) = PTHLT(JL) * PZT(JL) / PTHT(JL) ! ! es_w with new T^u -#ifndef MNH_BITREP +#if !defined(MNH_BITREP) && !defined(MNH_BITREP_OMP) ZZW(JL) = EXP( XALPW - XBETAW/ZZW2(JL) - XGAMW*ALOG(ZZW2(JL) ) ) #else ZZW(JL) = BR_EXP( XALPW - XBETAW/ZZW2(JL) - XGAMW*BR_LOG(ZZW2(JL) ) ) @@ -380,7 +379,7 @@ IF (CSUBG_RR_EVAP=='CLFR') GCSUBG_RR_EVAP=.true. ! S, Undersaturation over water (with new theta^u) PUSW(JL) = 1.0 - PRVT(JL)*( PPRES(JL)-ZZW(JL) ) / ( (XMV/XMD) * ZZW(JL) ) ! -#ifndef MNH_BITREP +#if !defined(MNH_BITREP) && !defined(MNH_BITREP_OMP) ZZW(JL) = ( XLVTT+(XCPV-XCL)*(ZZW2(JL)-XTT) )**2 / ( PKA(JL)*XRV*ZZW2(JL)**2 ) & + ( XRV*ZZW2(JL) ) / ( PDV(JL)*ZZW(JL) ) ! diff --git a/src/MNH/rotate_wind.f90 b/src/MNH/rotate_wind.f90 index bb9cd4d83..c08f6ffa4 100644 --- a/src/MNH/rotate_wind.f90 +++ b/src/MNH/rotate_wind.f90 @@ -113,7 +113,7 @@ USE MODE_MNH_ZWORK, ONLY: MNH_MEM_GET, MNH_MEM_POSITION_PIN, MNH_MEM_RELEASE #endif use mode_mppdb -#ifdef MNH_BITREP +#if defined(MNH_BITREP) || defined(MNH_BITREP_OMP) USE MODI_BITREP #endif ! @@ -279,7 +279,7 @@ END DO DO CONCURRENT(JI=IIB:IIE,JJ=IJB:IJE) PUSLOPE(JI,JJ) = PCOSSLOPE(JI,JJ) * PDIRCOSZW(JI,JJ) * ZUFIN(JI,JJ) + & PSINSLOPE(JI,JJ) * PDIRCOSZW(JI,JJ) * ZVFIN(JI,JJ) + & -#ifndef MNH_BITREP +#if !defined(MNH_BITREP) && !defined(MNH_BITREP_OMP) SQRT(1.-PDIRCOSZW(JI,JJ)**2) * ZWFIN(JI,JJ) #else SQRT(1.-BR_P2(PDIRCOSZW(JI,JJ))) * ZWFIN(JI,JJ) diff --git a/src/MNH/slow_terms.f90 b/src/MNH/slow_terms.f90 index f447b85bf..c962c7112 100644 --- a/src/MNH/slow_terms.f90 +++ b/src/MNH/slow_terms.f90 @@ -166,7 +166,7 @@ USE MODD_PARAMETERS, only: JPVEXT use mode_budget, only: Budget_store_init, Budget_store_end use mode_mppdb -#ifdef MNH_BITREP +#if defined(MNH_BITREP) || defined(MNH_BITREP_OMP) use modi_bitrep #endif @@ -307,7 +307,7 @@ end do #endif ! WHERE (G3D(:,:,:)) -#ifndef MNH_BITREP +#if !defined(MNH_BITREP) && !defined(MNH_BITREP_OMP) ZW3(:,:,:) = PRHODREF(:,:,:) ** (XCEXRS-XCEXVT) #else ZW3(:,:,:) = BR_POW( PRHODREF(:,:,:), XCEXRS - XCEXVT ) @@ -315,7 +315,7 @@ WHERE (G3D(:,:,:)) END WHERE ! WHERE (ZW1(:,:,IKE+1)>0.) -#ifndef MNH_BITREP +#if !defined(MNH_BITREP) && !defined(MNH_BITREP_OMP) ZW2(:,:,IKE+1) = XCRS & * ZW1(:,:,IKE+1) ** XCEXRS & * PRHODREF(:,:,IKE+1) ** (XCEXRS-XCEXVT) @@ -346,7 +346,7 @@ DO JN=1,KSPLITR ! --------------------- ! ! -#ifndef MNH_BITREP +#if !defined(MNH_BITREP) && !defined(MNH_BITREP_OMP) ZW2(JI,JJ,JK) = XCRS * ( ZW1(JI,JJ,JK) ** XCEXRS ) * ZW3(JI,JJ,JK) #else ZW2(JI,JJ,JK) = XCRS * BR_POW( ZW1(JI,JJ,JK), XCEXRS ) * ZW3(JI,JJ,JK) @@ -397,7 +397,7 @@ if ( lbudget_rr ) call Budget_store_init( tbudgets(NBUDGET_RR), 'ACCR', prrs(:, !$acc kernels G3D(:,:,:) = PRCT(:,:,:)>0.0 .AND. PRRT(:,:,:)>0.0 .AND. PRCS(:,:,:)>0.0 WHERE ( G3D(:,:,:) ) -#ifndef MNH_BITREP +#if !defined(MNH_BITREP) && !defined(MNH_BITREP_OMP) ZW(:,:,:) = XCRA * PRCT(:,:,:) & * PRRT(:,:,:) ** XCEXRA & * PRHODREF(:,:,:) ** (XCEXRA - XCEXVT) @@ -471,7 +471,7 @@ WHERE ( G3D(:,:,:) ) ! !* 5.1 compute the Exner function ! -#ifndef MNH_BITREP +#if !defined(MNH_BITREP) && !defined(MNH_BITREP_OMP) ZEXNT(:,:,:) = (PPABST(:,:,:)/XP00)**(XRD/XCPD) #else ZEXNT(:,:,:) = BR_POW( PPABST(:,:,:)/XP00, XRD/XCPD ) @@ -483,7 +483,7 @@ WHERE ( G3D(:,:,:) ) ! !* 5.3 compute the saturation vapor pressure ! -#ifndef MNH_BITREP +#if !defined(MNH_BITREP) && !defined(MNH_BITREP_OMP) ZW1(:,:,:) = EXP( XALPW - XBETAW/ZT(:,:,:) - XGAMW*ALOG(ZT(:,:,:) ) ) #else ZW1(:,:,:) = BR_EXP( XALPW - XBETAW/ZT(:,:,:) - XGAMW*BR_LOG(ZT(:,:,:) ) ) @@ -500,7 +500,7 @@ WHERE ( G3D(:,:,:) ) ! !* 5.6 compute the source ! -#ifndef MNH_BITREP +#if !defined(MNH_BITREP) && !defined(MNH_BITREP_OMP) ZW(:,:,:) = MAX( 1. - PRVT(:,:,:)/ZW2(:,:,:) , 0.0 ) * & ( XC1RE * SQRT( PRRT(:,:,:)/PRHODREF(:,:,:) ) & +XC2RE * PRRT(:,:,:)**XCEXRE & diff --git a/src/MNH/tridiag_w.f90 b/src/MNH/tridiag_w.f90 index 56796decf..a0647140e 100644 --- a/src/MNH/tridiag_w.f90 +++ b/src/MNH/tridiag_w.f90 @@ -157,9 +157,13 @@ USE MODI_SHUMAN #else USE MODI_SHUMAN_DEVICE #endif -#ifdef MNH_BITREP +#if defined(MNH_BITREP) || defined(MNH_BITREP_OMP) USE MODI_BITREP #endif +#ifdef MNH_BITREP_OMP +!$mnh_undef(LOOP) +!$mnh_undef(OPENACC) +#endif ! #ifdef MNH_OPENACC USE MODE_MNH_ZWORK, ONLY: MNH_MEM_GET, MNH_MEM_POSITION_PIN, MNH_MEM_RELEASE @@ -247,12 +251,12 @@ ZMZM_RHODJ = MZM(PRHODJ) CALL MZM_DEVICE(PRHODJ,ZMZM_RHODJ) #endif !$acc kernels ! async -#ifndef MNH_BITREP +#if !defined(MNH_BITREP) && !defined(MNH_BITREP_OMP) ZRHODJ_DFDDWDZ_O_DZ2 = PRHODJ*PDFDDWDZ/PMZF_DZZ**2 #else -DO CONCURRENT ( JI=1:JIU,JJ=1:JJU,JK=1:JKU) - ZRHODJ_DFDDWDZ_O_DZ2(JI,JJ,JK) = PRHODJ(JI,JJ,JK)*PDFDDWDZ(JI,JJ,JK)/BR_P2(PMZF_DZZ(JI,JJ,JK)) -END DO !CONCURRENT +!$mnh_expand_array(JI=1:JIU,JJ=1:JJU,JK=1:JKU) + ZRHODJ_DFDDWDZ_O_DZ2(:,:,:) = PRHODJ(:,:,:)*PDFDDWDZ(:,:,:)/BR_P2(PMZF_DZZ(:,:,:)) +!$mnh_end_expand_array() #endif !$acc end kernels ! @@ -285,23 +289,17 @@ ZY=0. !!#endif ! !$acc kernels ! async -#ifdef MNH_COMPILER_NVHPC -!$acc loop independent collapse(2) -#endif -DO CONCURRENT ( JI=1:JIU,JJ=1:JJU) +!$mnh_do_concurrent(JI=1:JIU,JJ=1:JJU) ZY(JI,JJ,IKB) = ZMZM_RHODJ(JI,JJ,IKB)*PVARM(JI,JJ,IKB)/PTSTEP & - PRHODJ(JI,JJ,IKB ) * PF(JI,JJ,IKB )/PMZF_DZZ(JI,JJ,IKB ) & + PRHODJ(JI,JJ,IKB-1) * PF(JI,JJ,IKB-1)/PMZF_DZZ(JI,JJ,IKB-1) & + ZRHODJ_DFDDWDZ_O_DZ2(JI,JJ,IKB) * PVARM(JI,JJ,IKB+1)& - ZRHODJ_DFDDWDZ_O_DZ2(JI,JJ,IKB) * PVARM(JI,JJ,IKB ) -END DO !CONCURRENT +!$mnh_end_do() !$acc end kernels ! !$acc kernels ! async -#ifdef MNH_COMPILER_NVHPC -!$acc loop independent collapse(3) -#endif -DO CONCURRENT ( JI=1:JIU,JJ=1:JJU,JK=IKB+1:IKE-1) +!$mnh_do_concurrent(JI=1:JIU,JJ=1:JJU,JK=IKB+1:IKE-1) ZY(JI,JJ,JK) = ZMZM_RHODJ(JI,JJ,JK)*PVARM(JI,JJ,JK)/PTSTEP & - PRHODJ(JI,JJ,JK ) * PF(JI,JJ,JK )/PMZF_DZZ(JI,JJ,JK ) & + PRHODJ(JI,JJ,JK-1) * PF(JI,JJ,JK-1)/PMZF_DZZ(JI,JJ,JK-1) & @@ -309,21 +307,18 @@ DO CONCURRENT ( JI=1:JIU,JJ=1:JJU,JK=IKB+1:IKE-1) - ZRHODJ_DFDDWDZ_O_DZ2(JI,JJ,JK ) * PVARM(JI,JJ,JK ) & - ZRHODJ_DFDDWDZ_O_DZ2(JI,JJ,JK-1) * PVARM(JI,JJ,JK ) & + ZRHODJ_DFDDWDZ_O_DZ2(JI,JJ,JK-1) * PVARM(JI,JJ,JK-1) -END DO !CONCURRENT +!$mnh_end_do() !$acc end kernels ! !$acc kernels ! async -#ifdef MNH_COMPILER_NVHPC -!$acc loop independent collapse(2) -#endif -DO CONCURRENT ( JI=1:JIU,JJ=1:JJU) +!$mnh_do_concurrent(JI=1:JIU,JJ=1:JJU) ZY(JI,JJ,IKE) = ZMZM_RHODJ(JI,JJ,IKE)*PVARM(JI,JJ,IKE)/PTSTEP & - PRHODJ(JI,JJ,IKE ) * PF(JI,JJ,IKE )/PMZF_DZZ(JI,JJ,IKE ) & + PRHODJ(JI,JJ,IKE-1) * PF(JI,JJ,IKE-1)/PMZF_DZZ(JI,JJ,IKE-1) & - ZRHODJ_DFDDWDZ_O_DZ2(JI,JJ,IKE ) * PVARM(JI,JJ,IKE ) & - ZRHODJ_DFDDWDZ_O_DZ2(JI,JJ,IKE-1) * PVARM(JI,JJ,IKE ) & + ZRHODJ_DFDDWDZ_O_DZ2(JI,JJ,IKE-1) * PVARM(JI,JJ,IKE-1) -END DO !CONCURRENT +!$mnh_end_do() !$acc end kernels ! !* 3. INVERSION OF THE TRIDIAGONAL SYSTEM @@ -340,53 +335,38 @@ END DO !CONCURRENT !! c(k) = + PRHODJ(k) * PDFDDWDZ(k)/PMZF_DZZ(k)**2 ! !$acc kernels ! async -#ifdef MNH_COMPILER_NVHPC -!$acc loop independent collapse(2) -#endif -DO CONCURRENT ( JI=1:JIU,JJ=1:JJU) +!$mnh_do_concurrent(JI=1:JIU,JJ=1:JJU) ZB(JI,JJ,IKB) = ZMZM_RHODJ(JI,JJ,IKB)/PTSTEP & - ZRHODJ_DFDDWDZ_O_DZ2(JI,JJ,IKB) -END DO !CONCURRENT +!$mnh_end_do() !$acc end kernels !$acc kernels ! async -#ifdef MNH_COMPILER_NVHPC -!$acc loop independent collapse(2) -#endif -DO CONCURRENT ( JI=1:JIU,JJ=1:JJU) +!$mnh_do_concurrent(JI=1:JIU,JJ=1:JJU) ZC(JI,JJ,IKB) = ZRHODJ_DFDDWDZ_O_DZ2(JI,JJ,IKB) -END DO !CONCURRENT +!$mnh_end_do() !$acc end kernels !$acc kernels ! async -#ifdef MNH_COMPILER_NVHPC -!$acc loop independent collapse(3) -#endif -DO CONCURRENT ( JI=1:JIU,JJ=1:JJU,JK=IKB+1:IKE-1) +!$mnh_do_concurrent(JI=1:JIU,JJ=1:JJU,JK=IKB+1:IKE-1) ZA(JI,JJ,JK) = ZRHODJ_DFDDWDZ_O_DZ2(JI,JJ,JK-1) ZB(JI,JJ,JK) = ZMZM_RHODJ(JI,JJ,JK)/PTSTEP & - ZRHODJ_DFDDWDZ_O_DZ2(JI,JJ,JK ) & - ZRHODJ_DFDDWDZ_O_DZ2(JI,JJ,JK-1) ZC(JI,JJ,JK) = ZRHODJ_DFDDWDZ_O_DZ2(JI,JJ,JK ) -END DO !CONCURRENT +!$mnh_end_do() !$acc end kernels !$acc kernels ! async -#ifdef MNH_COMPILER_NVHPC -!$acc loop independent collapse(2) -#endif -DO CONCURRENT ( JI=1:JIU,JJ=1:JJU) +!$mnh_do_concurrent(JI=1:JIU,JJ=1:JJU) ZA(JI,JJ,IKE) = ZRHODJ_DFDDWDZ_O_DZ2(JI,JJ,IKE-1) -END DO !CONCURRENT +!$mnh_end_do() !$acc end kernels !$acc kernels ! async -#ifdef MNH_COMPILER_NVHPC -!$acc loop independent collapse(2) -#endif -DO CONCURRENT ( JI=1:JIU,JJ=1:JJU) +!$mnh_do_concurrent(JI=1:JIU,JJ=1:JJU) ZB(JI,JJ,IKE) = ZMZM_RHODJ(JI,JJ,IKE)/PTSTEP & - ZRHODJ_DFDDWDZ_O_DZ2(JI,JJ,IKE ) & - ZRHODJ_DFDDWDZ_O_DZ2(JI,JJ,IKE-1) -END DO !CONCURRENT +!$mnh_end_do() !$acc end kernels ! ! @@ -396,62 +376,50 @@ END DO !CONCURRENT ! -------- ! !$acc kernels -#ifdef MNH_COMPILER_NVHPC -!$acc loop independent collapse(2) -#endif -DO CONCURRENT ( JI=1:JIU,JJ=1:JJU) +!$mnh_do_concurrent(JI=1:JIU,JJ=1:JJU) ZBET(JI,JJ) = ZB(JI,JJ,IKB) ! bet = b(ikb) PVARP(JI,JJ,IKB) = ZY(JI,JJ,IKB) / ZBET(JI,JJ) -END DO !CONCURRENT +!$mnh_end_do() ! !$acc loop seq DO JK = IKB+1,IKE-1 -#ifdef MNH_COMPILER_NVHPC - !$acc loop independent collapse(2) -#endif - DO CONCURRENT ( JI=1:JIU,JJ=1:JJU) + !$mnh_do_concurrent(JI=1:JIU,JJ=1:JJU) ZGAM(JI,JJ,JK) = ZC(JI,JJ,JK-1) / ZBET(JI,JJ) ! gam(k) = c(k-1) / bet ZBET(JI,JJ) = ZB(JI,JJ,JK) - ZA(JI,JJ,JK) * ZGAM(JI,JJ,JK) ! bet = b(k) - a(k)* gam(k) PVARP(JI,JJ,JK)= ( ZY(JI,JJ,JK) - ZA(JI,JJ,JK) * PVARP(JI,JJ,JK-1) ) / ZBET(JI,JJ) ! res(k) = (y(k) -a(k)*res(k-1))/ bet - END DO !CONCURRENT + !$mnh_end_do() END DO ! special treatment for the last level -#ifdef MNH_COMPILER_NVHPC -!$acc loop independent collapse(2) -#endif -DO CONCURRENT ( JI=1:JIU,JJ=1:JJU) +!$mnh_do_concurrent(JI=1:JIU,JJ=1:JJU) ZGAM(JI,JJ,IKE) = ZC(JI,JJ,IKE-1) / ZBET(JI,JJ) ! gam(k) = c(k-1) / bet ZBET(JI,JJ) = ZB(JI,JJ,IKE) - ZA(JI,JJ,IKE) * ZGAM(JI,JJ,IKE) ! bet = b(k) - a(k)* gam(k) PVARP(JI,JJ,IKE)= ( ZY(JI,JJ,IKE) - ZA(JI,JJ,IKE) * PVARP(JI,JJ,IKE-1) ) / ZBET(JI,JJ) ! res(k) = (y(k) -a(k)*res(k-1))/ bet -END DO !CONCURRENT +!$mnh_end_do() ! !* 3.3 going down ! ---------- ! !$acc loop seq DO JK = IKE-1,IKB,-1 -#ifdef MNH_COMPILER_NVHPC - !$acc loop independent collapse(2) -#endif - DO CONCURRENT ( JI=1:JIU,JJ=1:JJU) + !$mnh_do_concurrent(JI=1:JIU,JJ=1:JJU) PVARP(JI,JJ,JK) = PVARP(JI,JJ,JK) - ZGAM(JI,JJ,JK+1) * PVARP(JI,JJ,JK+1) - END DO !CONCURRENT + !$mnh_end_do() END DO ! ! !* 4. FILL THE UPPER AND LOWER EXTERNAL VALUES ! ---------------------------------------- ! -DO CONCURRENT ( JI=1:JIU,JJ=1:JJU) +!$mnh_do_concurrent(JI=1:JIU,JJ=1:JJU) PVARP(JI,JJ,IKB-1)=PVARP(JI,JJ,IKB) PVARP(JI,JJ,IKE+1)=0. -END DO !CONCURRENT +!$mnh_end_do() !$acc end kernels if ( mppdb_initialized ) then diff --git a/src/MNH/turb.f90 b/src/MNH/turb.f90 index 95de8719c..7ff9e29c6 100644 --- a/src/MNH/turb.f90 +++ b/src/MNH/turb.f90 @@ -14,6 +14,10 @@ module mode_turb #if defined(MNH_BITREP) || defined(MNH_BITREP_OMP) use modi_bitrep #endif +#ifdef MNH_BITREP_OMP +!$mnh_undef(LOOP) +!$mnh_undef(OPENACC) +#endif implicit none @@ -1007,11 +1011,13 @@ IF (ORMC01) THEN #ifdef MNH_OPENACC call Print_msg( NVERB_FATAL, 'GEN', 'TURB', 'OpenACC: ORMC01 not yet implemented' ) #endif +!$mnh_expand_array(JI=1:JIU,JJ=1:JJU) #if !defined(MNH_BITREP) && !defined(MNH_BITREP_OMP) ZUSTAR(:,:) = (PSFU(:,:)**2+PSFV(:,:)**2)**(0.25) #else ZUSTAR(:,:) = BR_POW( BR_P2( PSFU(:,:) ) + BR_P2( PSFV(:,:) ), 0.25 ) #endif +!$mnh_end_expand_array() IF (KRR>0) THEN ZLMO(:,:) = LMO( ZUSTAR(:,:), ZTHLM(:,:,IKB), ZRM(:,:,IKB,1), PSFTH(:,:), PSFRV(:,:) ) ELSE @@ -2311,6 +2317,7 @@ IF (.NOT. ORMC01) THEN DO JJ=1,JJU DO JI=1,JIU IF (GOCEAN) THEN + !$acc loop seq DO JK=KKTE,KKTB,-1 ZD=ZALPHA*(PZZ(JI,JJ,KKTE+1)-PZZ(JI,JJ,JK)) IF ( PLM(JI,JJ,JK)>ZD) THEN @@ -2320,6 +2327,7 @@ IF (.NOT. ORMC01) THEN ENDIF END DO ELSE + !$acc loop seq DO JK=KKTB,KKTE ZD = ZALPHA * ( 0.5 * ( PZZ(JI, JJ, JK) + PZZ(JI, JJ, JK+KKL) ) - PZZ(JI, JJ, KKB) ) * PDIRCOSZW(JI, JJ) IF ( PLM(JI,JJ,JK) > ZD ) THEN @@ -2666,6 +2674,7 @@ IF (.NOT. ORMC01) THEN DO JJ=1,SIZE(PLM,2) DO JI=1,SIZE(PLM,1) IF (GOCEAN) THEN + !$acc loop seq DO JK=KKTE,KKTB,-1 ZD=ZALPHA*(PZZ(JI,JJ,KKTE+1)-PZZ(JI,JJ,JK)) IF ( PLM(JI,JJ,JK)>ZD) THEN @@ -2675,6 +2684,7 @@ IF (.NOT. ORMC01) THEN ENDIF END DO ELSE + !$acc loop seq DO JK=KKTB,KKTE ZD=ZALPHA*(0.5*(PZZ(JI,JJ,JK)+PZZ(JI,JJ,JK+KKL))-PZZ(JI,JJ,KKB)) & *PDIRCOSZW(JI,JJ) diff --git a/src/MNH/turb_hor_dyn_corr.f90 b/src/MNH/turb_hor_dyn_corr.f90 index 8d5717d5c..3472c9da3 100644 --- a/src/MNH/turb_hor_dyn_corr.f90 +++ b/src/MNH/turb_hor_dyn_corr.f90 @@ -172,9 +172,13 @@ USE MODI_TRIDIAG_W ! USE MODI_SECOND_MNH USE MODE_MPPDB -#ifdef MNH_BITREP +#if defined(MNH_BITREP) || defined(MNH_BITREP_OMP) USE MODI_BITREP #endif +#ifdef MNH_BITREP_OMP +!$mnh_undef(LOOP) +!$mnh_undef(OPENACC) +#endif ! IMPLICIT NONE ! @@ -391,15 +395,12 @@ IKU = SIZE(PUM,3) ! ! !$acc kernels async(1) -#ifndef MNH_BITREP +#if !defined(MNH_BITREP) && !defined(MNH_BITREP_OMP) ZDIRSINZW(:,:) = SQRT( 1. - PDIRCOSZW(:,:)**2 ) #else -#ifdef MNH_COMPILER_NVHPC -!$acc loop independent collapse(2) -#endif -DO CONCURRENT ( JI=1:JIU,JJ=1:JJU ) - ZDIRSINZW(JI,JJ) = SQRT( 1. - BR_P2(PDIRCOSZW(JI,JJ)) ) -END DO +!$mnh_expand_array(JI=1:JIU,JJ=1:JJU ) + ZDIRSINZW(:,:) = SQRT( 1. - BR_P2(PDIRCOSZW(:,:)) ) +!$mnh_end_expand_array() #endif !$acc end kernels ! @@ -432,16 +433,13 @@ CALL ADD3DFIELD_ll( TZFIELDS_ll, ZFLX, 'TURB_HOR_DYN_CORR::ZFLX' ) ! ! Computes the U variance IF (.NOT. L2D) THEN - !$acc kernels async(2) -#ifdef MNH_COMPILER_NVHPC - !$acc loop independent collapse(3) -#endif - DO CONCURRENT ( JI=1:JIU,JJ=1:JJU,JK=1:JKU) - ZFLX(JI,JJ,JK)= (2./3.) * PTKEM(JI,JJ,JK) & - - XCMFS * PK(JI,JJ,JK) *( (4./3.) * GX_U_M_PUM(JI,JJ,JK) & - -(2./3.) * ( GY_V_M_PVM(JI,JJ,JK) & - +GZ_W_M_PWM(JI,JJ,JK) ) ) - END DO !CONCURRENT + !$acc kernels async(2) present_cr(zflx,gz_w_m_pwm) + !$mnh_expand_array(JI=1:JIU,JJ=1:JJU,JK=1:JKU) + ZFLX(:,:,:)= (2./3.) * PTKEM(:,:,:) & + - XCMFS * PK(:,:,:) *( (4./3.) * GX_U_M_PUM(:,:,:) & + -(2./3.) * ( GY_V_M_PVM(:,:,:) & + +GZ_W_M_PWM(:,:,:) ) ) + !$mnh_end_expand_array() !$acc end kernels !! & to be tested later !! + XCMFB * PLM / SQRT(PTKEM) * (-2./3.) * PTP @@ -456,10 +454,9 @@ ELSE END IF ! !$acc kernels async(2) -!$acc_nv loop independent collapse(2) -DO CONCURRENT (JI=1:JIU,JJ=1:JJU) - ZFLX(JI,JJ,IKE+1) = ZFLX(JI,JJ,IKE) -ENDDO +!$mnh_expand_array(JI=1:JIU,JJ=1:JJU) + ZFLX(:,:,IKE+1) = ZFLX(:,:,IKE) +!$mnh_end_expand_array() !$acc end kernels ! !* prescription of du/dz and dv/dz with uncentered gradient at the surface @@ -472,16 +469,15 @@ ZDZZ(:,:,:) = MXM(PDZZ(:,:,IKB:IKB+2)) #else CALL MXM_DEVICE(PDZZ(:,:,IKB:IKB+2),ZDZZ(:,:,:)) #endif -!$acc kernels async(3) -!$acc_nv loop independent collapse(2) -DO CONCURRENT (JI=1:JIU,JJ=1:JJU) - ZCOEFF(JI,JJ,IKB+2)= - ZDZZ(JI,JJ,2) / & - ( (ZDZZ(JI,JJ,3)+ZDZZ(JI,JJ,2)) * ZDZZ(JI,JJ,3) ) - ZCOEFF(JI,JJ,IKB+1)= (ZDZZ(JI,JJ,3)+ZDZZ(JI,JJ,2)) / & - ( ZDZZ(JI,JJ,2) * ZDZZ(JI,JJ,3) ) - ZCOEFF(JI,JJ,IKB)= - (ZDZZ(JI,JJ,3)+2.*ZDZZ(JI,JJ,2)) / & - ( (ZDZZ(JI,JJ,3)+ZDZZ(JI,JJ,2)) * ZDZZ(JI,JJ,2) ) -ENDDO +!$acc kernels async(3) present_cr(zdzz,zcoeff) +!$mnh_expand_array(JI=1:JIU,JJ=1:JJU) + ZCOEFF(:,:,IKB+2)= - ZDZZ(:,:,2) / & + ( (ZDZZ(:,:,3)+ZDZZ(:,:,2)) * ZDZZ(:,:,3) ) + ZCOEFF(:,:,IKB+1)= (ZDZZ(:,:,3)+ZDZZ(:,:,2)) / & + ( ZDZZ(:,:,2) * ZDZZ(:,:,3) ) + ZCOEFF(:,:,IKB)= - (ZDZZ(:,:,3)+2.*ZDZZ(:,:,2)) / & + ( (ZDZZ(:,:,3)+ZDZZ(:,:,2)) * ZDZZ(:,:,2) ) +!$mnh_end_expand_array() !$acc end kernels ! #ifndef MNH_OPENACC @@ -493,14 +489,13 @@ ZDU_DZ_DZS_DX(:,:,:)=MXF ((ZCOEFF(:,:,IKB+2:IKB+2)*PUM(:,:,IKB+2:IKB+2) & ! ZDZZ(:,:,:) = MYM(PDZZ(:,:,IKB:IKB+2)) #else -!$acc kernels async(3) -!$acc_nv loop independent collapse(2) -DO CONCURRENT (JI=1:JIU,JJ=1:JJU) - ZTMP1_DEVICE(JI,JJ,1) = (ZCOEFF(JI,JJ,IKB+2)*PUM(JI,JJ,IKB+2) & - +ZCOEFF(JI,JJ,IKB+1)*PUM(JI,JJ,IKB+1) & - +ZCOEFF(JI,JJ,IKB)*PUM(JI,JJ,IKB) & - )* 0.5 * ( PDZX(JI,JJ,IKB+1)+PDZX(JI,JJ,IKB)) -ENDDO +!$acc kernels async(3) present_cr(pum,ztmp1_device) +!$mnh_expand_array(JI=1:JIU,JJ=1:JJU) + ZTMP1_DEVICE(:,:,1) = (ZCOEFF(:,:,IKB+2)*PUM(:,:,IKB+2) & + +ZCOEFF(:,:,IKB+1)*PUM(:,:,IKB+1) & + +ZCOEFF(:,:,IKB)*PUM(:,:,IKB) & + )* 0.5 * ( PDZX(:,:,IKB+1)+PDZX(:,:,IKB)) +!$mnh_end_expand_array() !$acc end kernels ! !!! wait for the computation of ZCOEFF and ZTMP1_DEVICE @@ -508,25 +503,23 @@ ENDDO ! CALL MXF_DEVICE(ZTMP1_DEVICE(:,:,1:1), ZTMP2_DEVICE(:,:,1:1)) CALL MXF_DEVICE(PDXX(:,:,IKB:IKB), ZTMP1_DEVICE(:,:,1:1)) -!$acc kernels async(3) -!$acc_nv loop independent collapse(2) -DO CONCURRENT (JI=1:JIU,JJ=1:JJU) - ZDU_DZ_DZS_DX(JI,JJ,1) = ZTMP2_DEVICE(JI,JJ,1) / ZTMP1_DEVICE(JI,JJ,1) -ENDDO +!$acc kernels async(3) present_cr(ztmp1_device,zdu_dz_dzs_dx) +!$mnh_expand_array(JI=1:JIU,JJ=1:JJU) + ZDU_DZ_DZS_DX(:,:,1) = ZTMP2_DEVICE(:,:,1) / ZTMP1_DEVICE(:,:,1) +!$mnh_end_expand_array() !$acc end kernels ! CALL MYM_DEVICE(PDZZ(:,:,IKB:IKB+2),ZDZZ(:,:,:)) #endif -!$acc kernels async(4) -!$acc_nv loop independent collapse(2) -DO CONCURRENT (JI=1:JIU,JJ=1:JJU) - ZCOEFF(JI,JJ,IKB+2)= - ZDZZ(JI,JJ,2) / & - ( (ZDZZ(JI,JJ,3)+ZDZZ(JI,JJ,2)) * ZDZZ(JI,JJ,3) ) - ZCOEFF(JI,JJ,IKB+1)= (ZDZZ(JI,JJ,3)+ZDZZ(JI,JJ,2)) / & - ( ZDZZ(JI,JJ,2) * ZDZZ(JI,JJ,3) ) - ZCOEFF(JI,JJ,IKB)= - (ZDZZ(JI,JJ,3)+2.*ZDZZ(JI,JJ,2)) / & - ( (ZDZZ(JI,JJ,3)+ZDZZ(JI,JJ,2)) * ZDZZ(JI,JJ,2) ) -ENDDO +!$acc kernels async(4) present_cr(zdzz,zcoeff) +!$mnh_expand_array(JI=1:JIU,JJ=1:JJU) + ZCOEFF(:,:,IKB+2)= - ZDZZ(:,:,2) / & + ( (ZDZZ(:,:,3)+ZDZZ(:,:,2)) * ZDZZ(:,:,3) ) + ZCOEFF(:,:,IKB+1)= (ZDZZ(:,:,3)+ZDZZ(:,:,2)) / & + ( ZDZZ(:,:,2) * ZDZZ(:,:,3) ) + ZCOEFF(:,:,IKB)= - (ZDZZ(:,:,3)+2.*ZDZZ(:,:,2)) / & + ( (ZDZZ(:,:,3)+ZDZZ(:,:,2)) * ZDZZ(:,:,2) ) +!$mnh_end_expand_array() !$acc end kernels ! #ifndef MNH_OPENACC @@ -536,14 +529,13 @@ ZDV_DZ_DZS_DY(:,:,:)=MYF ((ZCOEFF(:,:,IKB+2:IKB+2)*PVM(:,:,IKB+2:IKB+2) & )* 0.5 * ( PDZY(:,:,IKB+1:IKB+1)+PDZY(:,:,IKB:IKB)) & )/ MYF(PDYY(:,:,IKB:IKB)) #else -!$acc kernels async(4) -!$acc_nv loop independent collapse(2) -DO CONCURRENT (JI=1:JIU,JJ=1:JJU) - ZTMP3_DEVICE(JI,JJ,1) = (ZCOEFF(JI,JJ,IKB+2)*PVM(JI,JJ,IKB+2) & - +ZCOEFF(JI,JJ,IKB+1)*PVM(JI,JJ,IKB+1) & - +ZCOEFF(JI,JJ,IKB)*PVM(JI,JJ,IKB) & - )* 0.5 * ( PDZY(JI,JJ,IKB+1)+PDZY(JI,JJ,IKB)) -ENDDO +!$acc kernels async(4) present_cr(pvm,ztmp3_device) +!$mnh_expand_array(JI=1:JIU,JJ=1:JJU) + ZTMP3_DEVICE(:,:,1) = (ZCOEFF(:,:,IKB+2)*PVM(:,:,IKB+2) & + +ZCOEFF(:,:,IKB+1)*PVM(:,:,IKB+1) & + +ZCOEFF(:,:,IKB)*PVM(:,:,IKB) & + )* 0.5 * ( PDZY(:,:,IKB+1)+PDZY(:,:,IKB)) +!$mnh_end_expand_array() !$acc end kernels ! !!! wait for the computation of ZCOEFF and ZTMP3_DEVICE @@ -569,11 +561,10 @@ ZDV_DZ_DZS_DY(:,:,1)= ZTMP4_DEVICE(:,:,1) / ZTMP3_DEVICE(:,:,1) ! CALL DXF_DEVICE(PUM(:,:,IKB:IKB),ZTMP1_DEVICE(:,:,1:1)) CALL MXF_DEVICE(PDXX(:,:,IKB:IKB),ZTMP2_DEVICE(:,:,1:1)) -!$acc kernels async(3) -!$acc_nv loop independent collapse(2) -DO CONCURRENT (JI=1:JIU,JJ=1:JJU) - ZDU_DX(JI,JJ,1)= ZTMP1_DEVICE(JI,JJ,1) / ZTMP2_DEVICE(JI,JJ,1) - ZDU_DZ_DZS_DX(JI,JJ,1) -ENDDO +!$acc kernels async(3) present_cr(zdu_dz_dzs_dx,zdu_dx) +!$mnh_expand_array(JI=1:JIU,JJ=1:JJU) + ZDU_DX(:,:,1)= ZTMP1_DEVICE(:,:,1) / ZTMP2_DEVICE(:,:,1) - ZDU_DZ_DZS_DX(:,:,1) +!$mnh_end_expand_array() !$acc end kernels !!! wait for the computation of ZDV_DZ_DZS_DY @@ -581,11 +572,10 @@ ENDDO ! CALL DYF_DEVICE(PVM(:,:,IKB:IKB),ZTMP3_DEVICE(:,:,1:1)) CALL MYF_DEVICE(PDYY(:,:,IKB:IKB),ZTMP4_DEVICE(:,:,1:1)) -!$acc kernels async(4) -!$acc_nv loop independent collapse(2) -DO CONCURRENT (JI=1:JIU,JJ=1:JJU) - ZDV_DY(JI,JJ,1)= ZTMP3_DEVICE(JI,JJ,1) / ZTMP4_DEVICE(JI,JJ,1) - ZDV_DZ_DZS_DY(JI,JJ,1) -ENDDO +!$acc kernels async(4) present_cr(zdv_dz_dzs_dy,zdv_dy) +!$mnh_expand_array(JI=1:JIU,JJ=1:JJU) + ZDV_DY(:,:,1)= ZTMP3_DEVICE(:,:,1) / ZTMP4_DEVICE(:,:,1) - ZDV_DZ_DZS_DY(:,:,1) +!$mnh_end_expand_array() !$acc end kernels ! ! @@ -593,11 +583,10 @@ ENDDO !$acc wait(3) async(4) #endif ! -!$acc kernels async(4) -!$acc_nv loop independent collapse(2) -DO CONCURRENT (JI=1:JIU,JJ=1:JJU) - ZDW_DZ(JI,JJ,1)=-ZDU_DX(JI,JJ,1)-ZDV_DY(JI,JJ,1) -ENDDO +!$acc kernels async(4) present_cr(zdv_dy,zdw_dz) +!$mnh_expand_array(JI=1:JIU,JJ=1:JJU) + ZDW_DZ(:,:,1)=-ZDU_DX(:,:,1)-ZDV_DY(:,:,1) +!$mnh_end_expand_array() !$acc end kernels ! !* computation @@ -612,12 +601,11 @@ ENDDO !attention !!!!! je ne comprends pas pourquoi mais ce update plante à l'execution... ! du coup je ne peux pas faire de update self asynchrone... ! -!$acc kernels async(3) -!$acc_nv loop independent collapse(2) -DO CONCURRENT (JI=1:JIU,JJ=1:JJU) - ZFLX(JI,JJ,IKB) = (2./3.) * PTKEM(JI,JJ,IKB) & - - XCMFS * PK(JI,JJ,IKB) * 2. * ZDU_DX(JI,JJ,1) -ENDDO +!$acc kernels async(3) present_cr(zdu_dx,zflx) +!$mnh_expand_array(JI=1:JIU,JJ=1:JJU) + ZFLX(:,:,IKB) = (2./3.) * PTKEM(:,:,IKB) & + - XCMFS * PK(:,:,IKB) * 2. * ZDU_DX(:,:,1) +!$mnh_end_expand_array() !$acc end kernels !! & to be tested later @@ -630,8 +618,8 @@ ENDDO !!! wait for the computation of ZDIRSINZW !$acc wait(1) ! -!$acc kernels async(4) present_cr(ZFLX) -#ifndef MNH_BITREP +!$acc kernels async(4) present_cr(ZFLX,ZDIRSINZW) +#if !defined(MNH_BITREP) && !defined(MNH_BITREP_OMP) ZFLX(:,:,IKB-1) = & PTAU11M(:,:) * PCOSSLOPE(:,:)**2 * PDIRCOSZW(:,:)**2 & -2. * PTAU12M(:,:) * PCOSSLOPE(:,:)* PSINSLOPE(:,:) * PDIRCOSZW(:,:) & @@ -642,17 +630,16 @@ ZFLX(:,:,IKB-1) = & - PUSLOPEM(:,:) * PCOSSLOPE(:,:)**2 * ZDIRSINZW(:,:) * PDIRCOSZW(:,:) ) #else !PW: BUG: commented 'acc loop independent collapse(2)' to workaround compiler bug (NVHPC 21.1) -!$acc_nv loop independent collapse(2) -DO CONCURRENT ( JI=1:JIU,JJ=1:JJU ) -ZFLX(JI,JJ,IKB-1) = & - PTAU11M(JI,JJ) * BR_P2(PCOSSLOPE(JI,JJ)) * BR_P2(PDIRCOSZW(JI,JJ)) & - -2. * PTAU12M(JI,JJ) * PCOSSLOPE(JI,JJ)* PSINSLOPE(JI,JJ) * PDIRCOSZW(JI,JJ) & - + PTAU22M(JI,JJ) * BR_P2(PSINSLOPE(JI,JJ)) & - + PTAU33M(JI,JJ) * BR_P2(PCOSSLOPE(JI,JJ)) * BR_P2(ZDIRSINZW(JI,JJ)) & - +2. * PCDUEFF(JI,JJ) * ( & - PVSLOPEM(JI,JJ) * PCOSSLOPE(JI,JJ) * PSINSLOPE(JI,JJ) * ZDIRSINZW(JI,JJ) & - - PUSLOPEM(JI,JJ) * BR_P2(PCOSSLOPE(JI,JJ)) * ZDIRSINZW(JI,JJ) * PDIRCOSZW(JI,JJ) ) -END DO ! CONCURRENT +!$mnh_expand_array(JI=1:JIU,JJ=1:JJU) +ZFLX(:,:,IKB-1) = & + PTAU11M(:,:) * BR_P2(PCOSSLOPE(:,:)) * BR_P2(PDIRCOSZW(:,:)) & + -2. * PTAU12M(:,:) * PCOSSLOPE(:,:)* PSINSLOPE(:,:) * PDIRCOSZW(:,:) & + + PTAU22M(:,:) * BR_P2(PSINSLOPE(:,:)) & + + PTAU33M(:,:) * BR_P2(PCOSSLOPE(:,:)) * BR_P2(ZDIRSINZW(:,:)) & + +2. * PCDUEFF(:,:) * ( & + PVSLOPEM(:,:) * PCOSSLOPE(:,:) * PSINSLOPE(:,:) * ZDIRSINZW(:,:) & + - PUSLOPEM(:,:) * BR_P2(PCOSSLOPE(:,:)) * ZDIRSINZW(:,:) * PDIRCOSZW(:,:) ) +!$mnh_end_expand_array() #endif !$acc end kernels ! @@ -660,10 +647,9 @@ END DO ! CONCURRENT !$acc wait(3) async(4) ! !$acc kernels async(4) -!$acc_nv loop independent collapse(2) -DO CONCURRENT (JI=1:JIU,JJ=1:JJU) - ZFLX(JI,JJ,IKB-1) = 2. * ZFLX(JI,JJ,IKB-1) - ZFLX(JI,JJ,IKB) -ENDDO +!$mnh_expand_array(JI=1:JIU,JJ=1:JJU) + ZFLX(:,:,IKB-1) = 2. * ZFLX(:,:,IKB-1) - ZFLX(:,:,IKB) +!$mnh_end_expand_array() !$acc end kernels ! ! @@ -717,13 +703,10 @@ ELSE END IF #else CALL MXF_DEVICE(PDXX, ZTMP1_DEVICE) -!$acc kernels async(10) -#ifdef MNH_COMPILER_NVHPC -!$acc loop independent collapse(3) -#endif -DO CONCURRENT ( JI=1:JIU,JJ=1:JJU,JK=1:JKU) - ZTMP2_DEVICE(JI,JJ,JK) = PRHODJ(JI,JJ,JK) * ZFLX(JI,JJ,JK) / ZTMP1_DEVICE(JI,JJ,JK) -END DO !CONCURRENT +!$acc kernels async(10) present_cr(ztmp1_device,ztmp2_device) +!$mnh_expand_array(JI=1:JIU,JJ=1:JJU,JK=1:JKU) + ZTMP2_DEVICE(:,:,:) = PRHODJ(:,:,:) * ZFLX(:,:,:) / ZTMP1_DEVICE(:,:,:) +!$mnh_end_expand_array() !$acc end kernels ! !!! wait for the computation of ZTMP2_DEVICE and the update of ZFLX @@ -732,31 +715,22 @@ END DO !CONCURRENT CALL DXM_DEVICE(ZTMP2_DEVICE, ZTMP3_DEVICE) IF (.NOT. LFLAT) THEN CALL MZM_DEVICE(PDXX,ZTMP1_DEVICE) - !$acc kernels -#ifdef MNH_COMPILER_NVHPC - !$acc loop independent collapse(3) -#endif - DO CONCURRENT ( JI=1:JIU,JJ=1:JJU,JK=1:JKU) - ZTMP2_DEVICE(JI,JJ,JK) = PRHODJ(JI,JJ,JK) * ZFLX(JI,JJ,JK) - END DO !CONCURRENT + !$acc kernels present_cr(zflx,ztmp2_device) + !$mnh_expand_array(JI=1:JIU,JJ=1:JJU,JK=1:JKU) + ZTMP2_DEVICE(:,:,:) = PRHODJ(:,:,:) * ZFLX(:,:,:) + !$mnh_end_expand_array() !$acc end kernels CALL MZM_DEVICE(ZTMP2_DEVICE,ZTMP4_DEVICE) - !$acc kernels -#ifdef MNH_COMPILER_NVHPC - !$acc loop independent collapse(3) -#endif - DO CONCURRENT ( JI=1:JIU,JJ=1:JJU,JK=1:JKU) - ZTMP2_DEVICE(JI,JJ,JK) = ZTMP4_DEVICE(JI,JJ,JK) * PINV_PDZZ(JI,JJ,JK) - END DO !CONCURRENT + !$acc kernels present_cr(ztmp4_device,ztmp2_device) + !$mnh_expand_array(JI=1:JIU,JJ=1:JJU,JK=1:JKU) + ZTMP2_DEVICE(:,:,:) = ZTMP4_DEVICE(:,:,:) * PINV_PDZZ(:,:,:) + !$mnh_end_expand_array() !$acc end kernels CALL MXM_DEVICE( ZTMP2_DEVICE, ZTMP4_DEVICE ) - !$acc kernels -#ifdef MNH_COMPILER_NVHPC - !$acc loop independent collapse(3) -#endif - DO CONCURRENT ( JI=1:JIU,JJ=1:JJU,JK=1:JKU) - ZTMP2_DEVICE(JI,JJ,JK) = PDZX(JI,JJ,JK) / ZTMP1_DEVICE(JI,JJ,JK) * ZTMP4_DEVICE(JI,JJ,JK) - END DO !CONCURRENT + !$acc kernels present_cr(ztmp4_device,ztmp2_device) + !$mnh_expand_array(JI=1:JIU,JJ=1:JJU,JK=1:JKU) + ZTMP2_DEVICE(:,:,:) = PDZX(:,:,:) / ZTMP1_DEVICE(:,:,:) * ZTMP4_DEVICE(:,:,:) + !$mnh_end_expand_array() !$acc end kernels CALL DZF_DEVICE( ZTMP2_DEVICE, ZTMP1_DEVICE ) !$acc kernels async(1) @@ -773,22 +747,18 @@ END IF ! IF (KSPLT==1) THEN ! Contribution to the dynamic production of TKE: - !$acc kernels async(2) -#ifdef MNH_COMPILER_NVHPC - !$acc loop independent collapse(3) -#endif - DO CONCURRENT ( JI=1:JIU,JJ=1:JJU,JK=1:JKU) - ZWORK(JI,JJ,JK) = - ZFLX(JI,JJ,JK) * GX_U_M_PUM(JI,JJ,JK) - END DO !CONCURRENT + !$acc kernels async(2) present_cr(gx_u_m_pum,zwork) + !$mnh_expand_array(JI=1:JIU,JJ=1:JJU,JK=1:JKU) + ZWORK(:,:,:) = - ZFLX(:,:,:) * GX_U_M_PUM(:,:,:) + !$mnh_end_expand_array() !$acc end kernels ! ! evaluate the dynamic production at w(IKB+1) in PDP(IKB) ! - !$acc kernels async(2) - !$acc_nv loop independent collapse(2) - DO CONCURRENT (JI=1:JIU,JJ=1:JJU) - ZWORK(JI,JJ,IKB) = 0.5* ( -ZFLX(JI,JJ,IKB)*ZDU_DX(JI,JJ,1) + ZWORK(JI,JJ,IKB+1) ) - ENDDO + !$acc kernels async(2) present_cr(zdu_dx,zwork) + !$mnh_expand_array(JI=1:JIU,JJ=1:JJU) + ZWORK(:,:,IKB) = 0.5* ( -ZFLX(:,:,IKB)*ZDU_DX(:,:,1) + ZWORK(:,:,IKB+1) ) + !$mnh_end_expand_array() !$acc end kernels ! !$acc kernels async(2) @@ -829,28 +799,24 @@ END IF ! ! Computes the V variance IF (.NOT. L2D) THEN - !$acc kernels async(3) -#ifdef MNH_COMPILER_NVHPC - !$acc loop independent collapse(3) -#endif - DO CONCURRENT ( JI=1:JIU,JJ=1:JJU,JK=1:JKU) - ZFLX(JI,JJ,JK)= (2./3.) * PTKEM(JI,JJ,JK) & - - XCMFS * PK(JI,JJ,JK) *( (4./3.) * GY_V_M_PVM(JI,JJ,JK) & - -(2./3.) * ( GX_U_M_PUM(JI,JJ,JK) & - +GZ_W_M_PWM(JI,JJ,JK) ) ) - END DO !CONCURRENT + !$acc kernels async(3) present_cr(gz_w_m_pwm,zflx) + !$mnh_expand_array(JI=1:JIU,JJ=1:JJU,JK=1:JKU) + ZFLX(:,:,:)= (2./3.) * PTKEM(:,:,:) & + - XCMFS * PK(:,:,:) *( (4./3.) * GY_V_M_PVM(:,:,:) & + -(2./3.) * ( GX_U_M_PUM(:,:,:) & + +GZ_W_M_PWM(:,:,:) ) ) + !$mnh_end_expand_array() !$acc end kernels !! & to be tested !! + XCMFB * PLM / SQRT(PTKEM) * (-2./3.) * PTP ! ELSE - !$acc kernels async(3) - !$acc_nv loop independent collapse(3) - DO CONCURRENT (JI=1:JIU,JJ=1:JJU,JK=1:JKU) - ZFLX(JI,JJ,JK)= (2./3.) * PTKEM(JI,JJ,JK) & - - XCMFS * PK(JI,JJ,JK) *(-(2./3.) * ( GX_U_M_PUM(JI,JJ,JK) & - +GZ_W_M_PWM(JI,JJ,JK) ) ) - ENDDO + !$acc kernels async(3) present_cr(gz_w_m_pwm,zflx) + !$mnh_expand_array(JI=1:JIU,JJ=1:JJU,JK=1:JKU) + ZFLX(:,:,:)= (2./3.) * PTKEM(:,:,:) & + - XCMFS * PK(:,:,:) *(-(2./3.) * ( GX_U_M_PUM(:,:,:) & + +GZ_W_M_PWM(:,:,:) ) ) + !$mnh_end_expand_array() !$acc end kernels !! & to be tested !! + XCMFB * PLM / SQRT(PTKEM) * (-2./3.) * PTP @@ -865,12 +831,11 @@ ZFLX(:,:,IKE+1) = ZFLX(:,:,IKE) ! ! !$acc wait(3) ! ! !$acc update self(ZFLX(:,:,IKB+1:)) async(10) ! -!$acc kernels async(3) -!$acc_nv loop independent collapse(2) -DO CONCURRENT (JI=1:JIU,JJ=1:JJU) - ZFLX(JI,JJ,IKB) = (2./3.) * PTKEM(JI,JJ,IKB) & - - XCMFS * PK(JI,JJ,IKB) * 2. * ZDV_DY(JI,JJ,1) -ENDDO +!$acc kernels async(3) present_cr(zdv_dy,zflx) +!$mnh_expand_array(JI=1:JIU,JJ=1:JJU) + ZFLX(:,:,IKB) = (2./3.) * PTKEM(:,:,IKB) & + - XCMFS * PK(:,:,IKB) * 2. * ZDV_DY(:,:,1) +!$mnh_end_expand_array() !$acc end kernels !! & to be tested @@ -879,7 +844,7 @@ ENDDO ! ! extrapolates this flux under the ground with the surface flux !$acc kernels async(3) present_cr(ZFLX) -#ifndef MNH_BITREP +#if !defined(MNH_BITREP) && !defined(MNH_BITREP_OMP) ZFLX(:,:,IKB-1) = & PTAU11M(:,:) * PSINSLOPE(:,:)**2 * PDIRCOSZW(:,:)**2 & +2. * PTAU12M(:,:) * PCOSSLOPE(:,:)* PSINSLOPE(:,:) * PDIRCOSZW(:,:) & @@ -890,17 +855,16 @@ ZFLX(:,:,IKB-1) = & + PVSLOPEM(:,:) * PCOSSLOPE(:,:) * PSINSLOPE(:,:) * ZDIRSINZW(:,:) ) #else !PW: BUG: commented 'acc loop independent collapse(2)' to workaround compiler bug (NVHPC 21.1) -!$acc_nv loop independent collapse(2) -DO CONCURRENT ( JI=1:JIU,JJ=1:JJU ) -ZFLX(JI,JJ,IKB-1) = & - PTAU11M(JI,JJ) * BR_P2(PSINSLOPE(JI,JJ)) * BR_P2(PDIRCOSZW(JI,JJ)) & - +2. * PTAU12M(JI,JJ) * PCOSSLOPE(JI,JJ)* PSINSLOPE(JI,JJ) * PDIRCOSZW(JI,JJ) & - + PTAU22M(JI,JJ) * BR_P2(PCOSSLOPE(JI,JJ)) & - + PTAU33M(JI,JJ) * BR_P2(PSINSLOPE(JI,JJ)) * BR_P2(ZDIRSINZW(JI,JJ)) & - -2. * PCDUEFF(JI,JJ)* ( & - PUSLOPEM(JI,JJ) * BR_P2(PSINSLOPE(JI,JJ)) * ZDIRSINZW(JI,JJ) * PDIRCOSZW(JI,JJ) & - + PVSLOPEM(JI,JJ) * PCOSSLOPE(JI,JJ) * PSINSLOPE(JI,JJ) * ZDIRSINZW(JI,JJ) ) -END DO ! CONCURRENT +!$mnh_expand_array(JI=1:JIU,JJ=1:JJU ) +ZFLX(:,:,IKB-1) = & + PTAU11M(:,:) * BR_P2(PSINSLOPE(:,:)) * BR_P2(PDIRCOSZW(:,:)) & + +2. * PTAU12M(:,:) * PCOSSLOPE(:,:)* PSINSLOPE(:,:) * PDIRCOSZW(:,:) & + + PTAU22M(:,:) * BR_P2(PCOSSLOPE(:,:)) & + + PTAU33M(:,:) * BR_P2(PSINSLOPE(:,:)) * BR_P2(ZDIRSINZW(:,:)) & + -2. * PCDUEFF(:,:)* ( & + PUSLOPEM(:,:) * BR_P2(PSINSLOPE(:,:)) * ZDIRSINZW(:,:) * PDIRCOSZW(:,:) & + + PVSLOPEM(:,:) * PCOSSLOPE(:,:) * PSINSLOPE(:,:) * ZDIRSINZW(:,:) ) +!$mnh_end_expand_array() #endif !$acc end kernels ! @@ -957,13 +921,10 @@ IF (.NOT. L2D) THEN IF (KSPLT==1) ZWORK(:,:,:) = - ZFLX(:,:,:) * GY_V_M_PVM #else CALL MYF_DEVICE(PDYY, ZTMP1_DEVICE) - !$acc kernels async(10) -#ifdef MNH_COMPILER_NVHPC - !$acc loop independent collapse(3) -#endif - DO CONCURRENT ( JI=1:JIU,JJ=1:JJU,JK=1:JKU) - ZTMP2_DEVICE(JI,JJ,JK) = PRHODJ(JI,JJ,JK) * ZFLX(JI,JJ,JK) / ZTMP1_DEVICE(JI,JJ,JK) - END DO !CONCURRENT + !$acc kernels async(10) present_cr(ztmp1_device,ztmp2_device) + !$mnh_expand_array(JI=1:JIU,JJ=1:JJU,JK=1:JKU) + ZTMP2_DEVICE(:,:,:) = PRHODJ(:,:,:) * ZFLX(:,:,:) / ZTMP1_DEVICE(:,:,:) + !$mnh_end_expand_array() !$acc end kernels ! !!! wait for the computation of ZTMP2_DEVICE and the update of ZFLX @@ -972,31 +933,22 @@ IF (.NOT. L2D) THEN CALL DYM_DEVICE( ZTMP2_DEVICE,ZTMP3_DEVICE ) IF (.NOT. LFLAT) THEN CALL MZM_DEVICE(PDYY,ZTMP1_DEVICE) - !$acc kernels -#ifdef MNH_COMPILER_NVHPC - !$acc loop independent collapse(3) -#endif - DO CONCURRENT ( JI=1:JIU,JJ=1:JJU,JK=1:JKU) - ZTMP2_DEVICE(JI,JJ,JK) = PRHODJ(JI,JJ,JK) * ZFLX(JI,JJ,JK) - END DO !CONCURRENT + !$acc kernels present_cr(zflx,ztmp2_device) + !$mnh_expand_array(JI=1:JIU,JJ=1:JJU,JK=1:JKU) + ZTMP2_DEVICE(:,:,:) = PRHODJ(:,:,:) * ZFLX(:,:,:) + !$mnh_end_expand_array() !$acc end kernels CALL MZM_DEVICE(ZTMP2_DEVICE,ZTMP4_DEVICE) - !$acc kernels -#ifdef MNH_COMPILER_NVHPC - !$acc loop independent collapse(3) -#endif - DO CONCURRENT ( JI=1:JIU,JJ=1:JJU,JK=1:JKU) - ZTMP2_DEVICE(JI,JJ,JK) = ZTMP4_DEVICE(JI,JJ,JK) * PINV_PDZZ(JI,JJ,JK) - END DO !CONCURRENT + !$acc kernels present_cr(ztmp4_device,ztmp2_device) + !$mnh_expand_array(JI=1:JIU,JJ=1:JJU,JK=1:JKU) + ZTMP2_DEVICE(:,:,:) = ZTMP4_DEVICE(:,:,:) * PINV_PDZZ(:,:,:) + !$mnh_end_expand_array() !$acc end kernels CALL MYM_DEVICE( ZTMP2_DEVICE,ZTMP4_DEVICE ) - !$acc kernels -#ifdef MNH_COMPILER_NVHPC - !$acc loop independent collapse(3) -#endif - DO CONCURRENT ( JI=1:JIU,JJ=1:JJU,JK=1:JKU) - ZTMP2_DEVICE(JI,JJ,JK) = PDZY(JI,JJ,JK) / ZTMP1_DEVICE(JI,JJ,JK) * ZTMP4_DEVICE(JI,JJ,JK) - END DO !CONCURRENT + !$acc kernels present_cr(ztmp4_device,ztmp2_device) + !$mnh_expand_array(JI=1:JIU,JJ=1:JJU,JK=1:JKU) + ZTMP2_DEVICE(:,:,:) = PDZY(:,:,:) / ZTMP1_DEVICE(:,:,:) * ZTMP4_DEVICE(:,:,:) + !$mnh_end_expand_array() !$acc end kernels CALL DZF_DEVICE( ZTMP2_DEVICE, ZTMP4_DEVICE ) !$acc kernels async(1) @@ -1006,23 +958,17 @@ IF (.NOT. L2D) THEN !$acc end kernels ELSE !$acc kernels async(1) -#ifdef MNH_COMPILER_NVHPC - !$acc loop independent collapse(3) -#endif - DO CONCURRENT ( JI=1:JIU,JJ=1:JJU,JK=1:JKU) - PRVS(JI,JJ,JK)=PRVS(JI,JJ,JK) - ZTMP3_DEVICE(JI,JJ,JK) - END DO !CONCURRENT + !$mnh_expand_array(JI=1:JIU,JJ=1:JJU,JK=1:JKU) + PRVS(:,:,:)=PRVS(:,:,:) - ZTMP3_DEVICE(:,:,:) + !$mnh_end_expand_array() !$acc end kernels END IF ! Contribution to the dynamic production of TKE: IF (KSPLT==1) THEN - !$acc kernels async(2) -#ifdef MNH_COMPILER_NVHPC - !$acc loop independent collapse(3) -#endif - DO CONCURRENT ( JI=1:JIU,JJ=1:JJU,JK=1:JKU) - ZWORK(JI,JJ,JK) = - ZFLX(JI,JJ,JK) * GY_V_M_PVM(JI,JJ,JK) - END DO !CONCURRENT + !$acc kernels async(2) present_cr(gy_v_m_pvm,zwork) + !$mnh_expand_array(JI=1:JIU,JJ=1:JJU,JK=1:JKU) + ZWORK(:,:,:) = - ZFLX(:,:,:) * GY_V_M_PVM(:,:,:) + !$mnh_end_expand_array() !$acc end kernels ENDIF #endif @@ -1036,11 +982,10 @@ IF (KSPLT==1) THEN ! ! evaluate the dynamic production at w(IKB+1) in PDP(IKB) ! - !$acc kernels async(2) - !$acc_nv loop independent collapse(2) - DO CONCURRENT (JI=1:JIU,JJ=1:JJU) - ZWORK(JI,JJ,IKB) = 0.5* ( -ZFLX(JI,JJ,IKB)*ZDV_DY(JI,JJ,1) + ZWORK(JI,JJ,IKB+1) ) - ENDDO + !$acc kernels async(2) present_cr(zdv_dy,zwork) + !$mnh_expand_array(JI=1:JIU,JJ=1:JJU) + ZWORK(:,:,IKB) = 0.5* ( -ZFLX(:,:,IKB)*ZDV_DY(:,:,1) + ZWORK(:,:,IKB+1) ) + !$mnh_end_expand_array() !$acc end kernels ! !$acc kernels async(2) @@ -1061,11 +1006,10 @@ IF (LLES_CALL .AND. KSPLT==1) THEN !!! wait for the computation of ZWORK and PDP !$acc wait(2) ! - !$acc kernels - !$acc_nv loop independent collapse(3) - DO CONCURRENT (JI=1:JIU,JJ=1:JJU,JK=1:JKU) - ZTMP1_DEVICE(JI,JJ,JK) = -ZWORK(JI,JJ,JK) - ENDDO + !$acc kernels present_cr(zwork,ztmp1_device) + !$mnh_expand_array(JI=1:JIU,JJ=1:JJU,JK=1:JKU) + ZTMP1_DEVICE(:,:,:) = -ZWORK(:,:,:) + !$mnh_end_expand_array() !$acc end kernels CALL LES_MEAN_SUBGRID( ZTMP1_DEVICE, X_LES_RES_ddxa_V_SBG_UaV , .TRUE.) ! @@ -1080,27 +1024,23 @@ END IF ! ! Computes the W variance IF (.NOT. L2D) THEN - !$acc kernels async(2) -#ifdef MNH_COMPILER_NVHPC - !$acc loop independent collapse(3) -#endif - DO CONCURRENT ( JI=1:JIU,JJ=1:JJU,JK=1:JKU) - ZFLX(JI,JJ,JK) = (2./3.) * PTKEM(JI,JJ,JK) & - - XCMFS * PK(JI,JJ,JK) *( (4./3.) * GZ_W_M_PWM(JI,JJ,JK) & - -(2./3.) * ( GX_U_M_PUM(JI,JJ,JK) & - +GY_V_M_PVM(JI,JJ,JK) ) ) - END DO !CONCURRENT + !$acc kernels async(2) present_cr(gy_v_m_pvm,zflx) + !$mnh_expand_array(JI=1:JIU,JJ=1:JJU,JK=1:JKU) + ZFLX(:,:,:) = (2./3.) * PTKEM(:,:,:) & + - XCMFS * PK(:,:,:) *( (4./3.) * GZ_W_M_PWM(:,:,:) & + -(2./3.) * ( GX_U_M_PUM(:,:,:) & + +GY_V_M_PVM(:,:,:) ) ) + !$mnh_end_expand_array() !$acc end kernels !! & to be tested !! -2.* XCMFB * PLM / SQRT(PTKEM) * (-2./3.) * PTP ELSE - !$acc kernels async(2) - !$acc_nv loop independent collapse(3) - DO CONCURRENT (JI=1:JIU,JJ=1:JJU,JK=1:JKU) - ZFLX(JI,JJ,JK)= (2./3.) * PTKEM(JI,JJ,JK) & - - XCMFS * PK(JI,JJ,JK) *( (4./3.) * GZ_W_M_PWM(JI,JJ,JK) & - -(2./3.) * ( GX_U_M_PUM(JI,JJ,JK) ) ) - ENDDO + !$acc kernels async(2) present_cr(gx_u_m_pum,zflx) + !$mnh_expand_array(JI=1:JIU,JJ=1:JJU,JK=1:JKU) + ZFLX(:,:,:)= (2./3.) * PTKEM(:,:,:) & + - XCMFS * PK(:,:,:) *( (4./3.) * GZ_W_M_PWM(:,:,:) & + -(2./3.) * ( GX_U_M_PUM(:,:,:) ) ) + !$mnh_end_expand_array() !$acc end kernels !! & to be tested !! -2.* XCMFB * PLM / SQRT(PTKEM) * (-2./3.) * PTP @@ -1114,12 +1054,11 @@ ZFLX(:,:,IKE+1)= ZFLX(:,:,IKE) !$acc wait(2) ! ! -!$acc kernels async(2) -!$acc_nv loop independent collapse(2) -DO CONCURRENT (JI=1:JIU,JJ=1:JJU) - ZFLX(JI,JJ,IKB) = (2./3.) * PTKEM(JI,JJ,IKB) & - - XCMFS * PK(JI,JJ,IKB) * 2. * ZDW_DZ(JI,JJ,1) -ENDDO +!$acc kernels async(2) present_cr(zdw_dz,zflx) +!$mnh_expand_array(JI=1:JIU,JJ=1:JJU) + ZFLX(:,:,IKB) = (2./3.) * PTKEM(:,:,IKB) & + - XCMFS * PK(:,:,IKB) * 2. * ZDW_DZ(:,:,1) +!$mnh_end_expand_array() !$acc end kernels ! @@ -1129,20 +1068,19 @@ ENDDO ! (-2./3.) * PTP(:,:,IKB:IKB) ! extrapolates this flux under the ground with the surface flux !$acc kernels async(3) present_cr(ZFLX) -#ifndef MNH_BITREP +#if !defined(MNH_BITREP) && !defined(MNH_BITREP_OMP) ZFLX(:,:,IKB-1) = & PTAU11M(:,:) * ZDIRSINZW(:,:)**2 & + PTAU33M(:,:) * PDIRCOSZW(:,:)**2 & +2. * PCDUEFF(:,:)* PUSLOPEM(:,:) * ZDIRSINZW(:,:) * PDIRCOSZW(:,:) #else !PW: BUG: commented 'acc loop independent collapse(2)' to workaround compiler bug (NVHPC 21.1) -!$acc_nv loop independent collapse(2) -DO CONCURRENT ( JI=1:JIU,JJ=1:JJU ) -ZFLX(JI,JJ,IKB-1) = & - PTAU11M(JI,JJ) * BR_P2(ZDIRSINZW(JI,JJ)) & - + PTAU33M(JI,JJ) * BR_P2(PDIRCOSZW(JI,JJ)) & - +2. * PCDUEFF(JI,JJ)* PUSLOPEM(JI,JJ) * ZDIRSINZW(JI,JJ) * PDIRCOSZW(JI,JJ) -END DO ! CONCURRENT +!$mnh_expand_array(JI=1:JIU,JJ=1:JJU ) +ZFLX(:,:,IKB-1) = & + PTAU11M(:,:) * BR_P2(ZDIRSINZW(:,:)) & + + PTAU33M(:,:) * BR_P2(PDIRCOSZW(:,:)) & + +2. * PCDUEFF(:,:)* PUSLOPEM(:,:) * ZDIRSINZW(:,:) * PDIRCOSZW(:,:) +!$mnh_end_expand_array() #endif !$acc end kernels ! @@ -1209,34 +1147,27 @@ GZ_W_M_ZWP = GZ_W_M(ZWP,PDZZ) #else CALL GZ_W_M_DEVICE(ZWP,PDZZ,GZ_W_M_ZWP) #endif -!$acc kernels async(2) -#ifdef MNH_COMPILER_NVHPC -!$acc loop independent collapse(3) -#endif -DO CONCURRENT ( JI=1:JIU,JJ=1:JJU,JK=IKB+1:JKU) - ZFLX(JI,JJ,JK)=ZFLX(JI,JJ,JK) & - - XCMFS * PK(JI,JJ,JK) * (4./3.) * (GZ_W_M_ZWP(JI,JJ,JK) - GZ_W_M_PWM(JI,JJ,JK)) -END DO !CONCURRENT +!$acc kernels async(2) present_cr(gz_w_m_pwm,zflx) +!$mnh_expand_array(JI=1:JIU,JJ=1:JJU,JK=IKB+1:JKU) + ZFLX(:,:,:)=ZFLX(:,:,:) & + - XCMFS * PK(:,:,:) * (4./3.) * (GZ_W_M_ZWP(:,:,:) - GZ_W_M_PWM(:,:,:)) +!$mnh_end_expand_array() !$acc end kernels ! IF (KSPLT==1) THEN !Contribution to the dynamic production of TKE: - !$acc kernels async(2) -#ifdef MNH_COMPILER_NVHPC - !$acc loop independent collapse(3) -#endif - DO CONCURRENT ( JI=1:JIU,JJ=1:JJU,JK=1:JKU) - ZWORK(JI,JJ,JK) = - ZFLX(JI,JJ,JK) * GZ_W_M_ZWP(JI,JJ,JK) - END DO !CONCURRENT + !$acc kernels async(2) present_cr(gz_w_m_zwp,zwork) + !$mnh_expand_array(JI=1:JIU,JJ=1:JJU,JK=1:JKU) + ZWORK(:,:,:) = - ZFLX(:,:,:) * GZ_W_M_ZWP(:,:,:) + !$mnh_end_expand_array() !$acc end kernels ! ! evaluate the dynamic production at w(IKB+1) in PDP(IKB) ! - !$acc kernels async(2) - !$acc_nv loop independent collapse(2) - DO CONCURRENT (JI=1:JIU,JJ=1:JJU) - ZWORK(JI,JJ,IKB) = 0.5* ( -ZFLX(JI,JJ,IKB)*ZDW_DZ(JI,JJ,1) + ZWORK(JI,JJ,IKB+1) ) - ENDDO + !$acc kernels async(2) present_cr(zdw_dz,zwork) + !$mnh_expand_array(JI=1:JIU,JJ=1:JJU) + ZWORK(:,:,IKB) = 0.5* ( -ZFLX(:,:,IKB)*ZDW_DZ(:,:,1) + ZWORK(:,:,IKB+1) ) + !$mnh_end_expand_array() !$acc end kernels ! !$acc kernels async(2) @@ -1275,30 +1206,27 @@ IF (LLES_CALL .AND. KSPLT==1) THEN !!! wait for the computation of ZFLX, ZDP and ZWORK !$acc wait(2) ! - !$acc kernels - !$acc_nv loop independent collapse(3) - DO CONCURRENT (JI=1:JIU,JJ=1:JJU,JK=1:JKU) - ZTMP1_DEVICE(JI,JJ,JK) = -ZWORK(JI,JJ,JK) - ENDDO + !$acc kernels present_cr(zwork,ztmp1_device) + !$mnh_expand_array(JI=1:JIU,JJ=1:JJU,JK=1:JKU) + ZTMP1_DEVICE(:,:,:) = -ZWORK(:,:,:) + !$mnh_end_expand_array() !$acc end kernels CALL LES_MEAN_SUBGRID( ZTMP1_DEVICE, X_LES_RES_ddxa_W_SBG_UaW , .TRUE.) ! CALL GZ_M_M_DEVICE(PTHLM,PDZZ,ZTMP1_DEVICE) - !$acc kernels - !$acc_nv loop independent collapse(3) - DO CONCURRENT (JI=1:JIU,JJ=1:JJU,JK=1:JKU) - ZTMP2_DEVICE(JI,JJ,JK) = ZTMP1_DEVICE(JI,JJ,JK) * ZFLX(JI,JJ,JK) - ENDDO + !$acc kernels present_cr(zflx,ztmp2_device) + !$mnh_expand_array(JI=1:JIU,JJ=1:JJU,JK=1:JKU) + ZTMP2_DEVICE(:,:,:) = ZTMP1_DEVICE(:,:,:) * ZFLX(:,:,:) + !$mnh_end_expand_array() !$acc end kernels CALL LES_MEAN_SUBGRID( ZTMP2_DEVICE, X_LES_RES_ddxa_Thl_SBG_UaW , .TRUE.) ! CALL GZ_M_W_DEVICE(1,IKU,1,PTHLM,PDZZ,ZTMP1_DEVICE) CALL MZF_DEVICE( ZTMP1_DEVICE, ZTMP2_DEVICE ) - !$acc kernels - !$acc_nv loop independent collapse(3) - DO CONCURRENT (JI=1:JIU,JJ=1:JJU,JK=1:JKU) - ZTMP3_DEVICE(JI,JJ,JK) = ZFLX(JI,JJ,JK)*ZTMP2_DEVICE(JI,JJ,JK) - ENDDO + !$acc kernels present_cr(ztmp2_device,ztmp3_device) + !$mnh_expand_array(JI=1:JIU,JJ=1:JJU,JK=1:JKU) + ZTMP3_DEVICE(:,:,:) = ZFLX(:,:,:)*ZTMP2_DEVICE(:,:,:) + !$mnh_end_expand_array() !$acc end kernels CALL LES_MEAN_SUBGRID(ZTMP3_DEVICE,X_LES_RES_ddz_Thl_SBG_W2) ! @@ -1308,21 +1236,19 @@ IF (LLES_CALL .AND. KSPLT==1) THEN !$acc data copy(X_LES_RES_ddxa_Rt_SBG_UaW,X_LES_RES_ddz_Rt_SBG_W2) ! CALL GZ_M_M_DEVICE(PRM(:,:,:,1),PDZZ,ZTMP1_DEVICE) - !$acc kernels - !$acc_nv loop independent collapse(3) - DO CONCURRENT (JI=1:JIU,JJ=1:JJU,JK=1:JKU) - ZTMP2_DEVICE(JI,JJ,JK) = ZTMP1_DEVICE(JI,JJ,JK)*ZFLX(JI,JJ,JK) - ENDDO + !$acc kernels present_cr(zflx,ztmp2_device) + !$mnh_expand_array(JI=1:JIU,JJ=1:JJU,JK=1:JKU) + ZTMP2_DEVICE(:,:,:) = ZTMP1_DEVICE(:,:,:)*ZFLX(:,:,:) + !$mnh_end_expand_array() !$acc end kernels CALL LES_MEAN_SUBGRID( ZTMP2_DEVICE, X_LES_RES_ddxa_Rt_SBG_UaW , .TRUE.) ! CALL GZ_M_W_DEVICE(1,IKU,1,PRM(:,:,:,1),PDZZ,ZTMP1_DEVICE) CALL MZF_DEVICE( ZTMP1_DEVICE, ZTMP2_DEVICE ) - !$acc kernels - !$acc_nv loop independent collapse(3) - DO CONCURRENT (JI=1:JIU,JJ=1:JJU,JK=1:JKU) - ZTMP3_DEVICE(JI,JJ,JK) = ZFLX(JI,JJ,JK)*ZTMP2_DEVICE(JI,JJ,JK) - ENDDO + !$acc kernels present_cr(ztmp2_device,ztmp3_device) + !$mnh_expand_array(JI=1:JIU,JJ=1:JJU,JK=1:JKU) + ZTMP3_DEVICE(:,:,:) = ZFLX(:,:,:)*ZTMP2_DEVICE(:,:,:) + !$mnh_end_expand_array() !$acc end kernels CALL LES_MEAN_SUBGRID(ZTMP3_DEVICE, X_LES_RES_ddz_Rt_SBG_W2) ! @@ -1333,22 +1259,20 @@ IF (LLES_CALL .AND. KSPLT==1) THEN ! ! CALL GZ_M_M_DEVICE(PSVM(:,:,:,JSV),PDZZ,ZTMP1_DEVICE) - !$acc kernels - !$acc_nv loop independent collapse(3) - DO CONCURRENT (JI=1:JIU,JJ=1:JJU,JK=1:JKU) - ZTMP2_DEVICE(JI,JJ,JK) = ZTMP1_DEVICE(JI,JJ,JK)*ZFLX(JI,JJ,JK) - ENDDO + !$acc kernels present_cr(zflx,ztmp2_device) + !$mnh_expand_array(JI=1:JIU,JJ=1:JJU,JK=1:JKU) + ZTMP2_DEVICE(:,:,:) = ZTMP1_DEVICE(:,:,:)*ZFLX(:,:,:) + !$mnh_end_expand_array() !$acc end kernels CALL LES_MEAN_SUBGRID( ZTMP2_DEVICE, & X_LES_RES_ddxa_Sv_SBG_UaW(:,:,:,JSV) , .TRUE.) ! CALL GZ_M_W_DEVICE(1,IKU,1,PSVM(:,:,:,JSV),PDZZ,ZTMP1_DEVICE) CALL MZF_DEVICE(ZTMP1_DEVICE,ZTMP2_DEVICE) - !$acc kernels - !$acc_nv loop independent collapse(3) - DO CONCURRENT (JI=1:JIU,JJ=1:JJU,JK=1:JKU) - ZTMP3_DEVICE(JI,JJ,JK) = ZFLX(JI,JJ,JK)*ZTMP2_DEVICE(JI,JJ,JK) - ENDDO + !$acc kernels present_cr(ztmp2_device,ztmp3_device) + !$mnh_expand_array(JI=1:JIU,JJ=1:JJU,JK=1:JKU) + ZTMP3_DEVICE(:,:,:) = ZFLX(:,:,:)*ZTMP2_DEVICE(:,:,:) + !$mnh_end_expand_array() !$acc end kernels CALL LES_MEAN_SUBGRID(ZTMP3_DEVICE, X_LES_RES_ddz_Sv_SBG_W2(:,:,:,JSV)) ! diff --git a/src/MNH/turb_hor_sv_corr.f90 b/src/MNH/turb_hor_sv_corr.f90 index ec6902e32..cdfafdab8 100644 --- a/src/MNH/turb_hor_sv_corr.f90 +++ b/src/MNH/turb_hor_sv_corr.f90 @@ -110,7 +110,7 @@ USE MODI_EMOIST USE MODI_ETHETA ! USE MODI_SECOND_MNH -#ifdef MNH_BITREP +#if defined(MNH_BITREP) || defined(MNH_BITREP_OMP) USE MODI_BITREP #endif ! @@ -216,7 +216,7 @@ DO JSV=1,NSV ! IF (LLES_CALL) THEN IF (.NOT. L2D) THEN -#ifndef MNH_BITREP +#if !defined(MNH_BITREP) && !defined(MNH_BITREP_OMP) ZFLX(:,:,:) = ZCSV / ZCSVD * PLM(:,:,:) * PLEPS(:,:,:) * & ( GX_M_M(PSVM(:,:,:,JSV),PDXX,PDZZ,PDZX)**2 & + GY_M_M(PSVM(:,:,:,JSV),PDYY,PDZZ,PDZY)**2 ) @@ -226,12 +226,12 @@ DO JSV=1,NSV + BR_P2(GY_M_M(PSVM(:,:,:,JSV),PDYY,PDZZ,PDZY)) ) #endif ELSE -#ifndef MNH_BITREP +#if !defined(MNH_BITREP) && !defined(MNH_BITREP_OMP) ZFLX(:,:,:) = ZCSV / ZCSVD * PLM(:,:,:) * PLEPS(:,:,:) * & GX_M_M(PSVM(:,:,:,JSV),PDXX,PDZZ,PDZX)**2 #else ZFLX(:,:,:) = ZCSV / ZCSVD * PLM(:,:,:) * PLEPS(:,:,:) * & - BR_P2(GX_M_M(PSVM(:,:,:,JSV),PDXX,PDZZ,PDZX)) + BR_P2(GX_M_M(PSVM(:,:,:,JSV),PDXX(:,:,:),PDZZ(:,:,:),PDZX(:,:,:))) #endif END IF CALL LES_MEAN_SUBGRID( -2.*ZCSVD*SQRT(PTKEM)*ZFLX/PLEPS, & -- GitLab