From 7e23473755bab7b89bdbad5de2bc79b1d476175d Mon Sep 17 00:00:00 2001
From: Juan ESCOBAR <juan.escobar@aero.obs-mip.fr>
Date: Wed, 20 Apr 2022 17:13:15 +0200
Subject: [PATCH] Juan 20/04/2022:Juan&Naima:MNH/turb.f90/tke_eps_sources.f90,
 Bit Reproductible for CRAY -> MNH_BITREP_OMP

---
 src/MNH/tke_eps_sources.f90 |  45 +++++++++-------
 src/MNH/turb.f90            | 103 +++++++++++++++++++-----------------
 2 files changed, 79 insertions(+), 69 deletions(-)

diff --git a/src/MNH/tke_eps_sources.f90 b/src/MNH/tke_eps_sources.f90
index cb75d7da4..dd01a9a6d 100644
--- a/src/MNH/tke_eps_sources.f90
+++ b/src/MNH/tke_eps_sources.f90
@@ -191,7 +191,7 @@ USE MODE_MNH_ZWORK,      ONLY: MNH_MEM_GET, MNH_MEM_POSITION_PIN, MNH_MEM_RELEAS
 #endif
 use mode_mppdb
 !
-#ifdef MNH_BITREP
+#if defined(MNH_BITREP) || defined(MNH_BITREP_OMP)
 USE MODI_BITREP
 #endif
 USE MODI_GET_HALO
@@ -271,8 +271,11 @@ REAL, DIMENSION(:,:,:), pointer , contiguous :: ZTMP1_DEVICE, ZTMP2_DEVICE, ZTMP
 !
 INTEGER  :: JIU,JJU,JKU
 INTEGER  :: JI,JJ,JK
+LOGICAL  :: GTURBDIM_3DIM
 !----------------------------------------------------------------------------
-
+!
+GTURBDIM_3DIM = HTURBDIM=='3DIM'
+!
 !$acc data present( PTKEM, PLM, PLEPS, PDP, PTRH,              &
 !$acc &             PRHODJ, PDZZ, PDXX, PDYY, PDZX, PDZY, PZZ, &
 !$acc &             PTP, PRTKES, PRTKESM,  PRTHLS, PCOEF_DISS, &
@@ -341,7 +344,7 @@ IKB=KKA+JPVEXT_TURB*KKL
 IKE=KKU-JPVEXT_TURB*KKL
 !
 ! compute the effective diffusion coefficient at the mass point
-#ifndef MNH_BITREP
+#if !defined(MNH_BITREP) && !defined(MNH_BITREP_OMP)
 ZKEFF(:,:,:) = PLM(:,:,:) * SQRT(PTKEM(:,:,:))
 #else
 #ifdef MNH_COMPILER_NVHPC
@@ -365,7 +368,7 @@ if (lbudget_th)  call Budget_store_init( tbudgets(NBUDGET_TH),  'DISSH', prthls(
 !
 ! Complete the sources of TKE with the horizontal turbulent explicit transport
 !
-IF (HTURBDIM=='3DIM') THEN
+IF (GTURBDIM_3DIM) THEN
   PTR(:,:,:) = PTRH(:,:,:)
 ELSE
   PTR(:,:,:) = 0.
@@ -382,7 +385,7 @@ PDP(:,:,IKB) = PDP(:,:,IKB) * (1. + PDZZ(:,:,IKB+KKL)/PDZZ(:,:,IKB))
 !
 ! Compute the source terms for TKE: ( ADVECtion + NUMerical DIFFusion + ..)
 ! + (Dynamical Production) + (Thermal Production) - (dissipation) 
-#ifndef MNH_BITREP
+#if !defined(MNH_BITREP) && !defined(MNH_BITREP_OMP)
 ZFLX(:,:,:) = XCED * SQRT(PTKEM(:,:,:)) / PLEPS(:,:,:)
 #else
 #ifdef MNH_COMPILER_NVHPC
@@ -410,7 +413,7 @@ END DO !CONCURRENT
 !
 #ifndef MNH_OPENACC
 ZA(:,:,:)     = - PTSTEP * XCET * &
-#ifndef MNH_BITREP
+#if !defined(MNH_BITREP) && !defined(MNH_BITREP_OMP)
                 MZM(ZKEFF) * MZM(PRHODJ) / PDZZ(:,:,:)**2
 #else
                 MZM(ZKEFF) * MZM(PRHODJ) / BR_P2(PDZZ(:,:,:))
@@ -418,13 +421,11 @@ ZA(:,:,:)     = - PTSTEP * XCET * &
 #else
 CALL MZM_DEVICE(ZKEFF, ZTMP1_DEVICE) !Warning: re-used later
 CALL MZM_DEVICE(PRHODJ,ZTMP2_DEVICE) !Warning: re-used later
-!$acc kernels present(ZA)
-#ifndef MNH_BITREP
+!$acc kernels ! present(ZA)
+#if !defined(MNH_BITREP) && !defined(MNH_BITREP_OMP)
 ZA(:,:,:)     = - PTSTEP * XCET * ZTMP1_DEVICE(:,:,:) * ZTMP2_DEVICE(:,:,:) / PDZZ(:,:,:)**2
 #else
-#ifdef MNH_COMPILER_NVHPC
-!$acc loop independent collapse(3)
-#endif
+!$acc_nv loop independent collapse(3)
 DO CONCURRENT ( JI=1:JIU,JJ=1:JJU,JK=1:JKU)
    ZA(JI,JJ,JK)     = - PTSTEP * XCET * ZTMP1_DEVICE(JI,JJ,JK) * ZTMP2_DEVICE(JI,JJ,JK) / BR_P2(PDZZ(JI,JJ,JK))
 END DO !CONCURRENT   
@@ -544,7 +545,7 @@ end if
 
 if (lbudget_tke) then
   !Store the previous source terms in prtkes before initializing the next one
-   !$acc kernels
+   !$acc kernels present(ZRES)
    PRTKES(:,:,:) = PRTKES(:,:,:) + PRHODJ(:,:,:) *                                                           &
                   ( PDP(:,:,:) + PTP(:,:,:)                                                                 &
                     - XCED * SQRT(PTKEM(:,:,:)) / PLEPS(:,:,:) * ( PEXPL*PTKEM(:,:,:) + PIMPL*ZRES(:,:,:) ) )
@@ -553,8 +554,9 @@ if (lbudget_tke) then
 end if
 
 !$acc kernels
-!dir$ concurrent
-PRTKES(:,:,:) = ZRES(:,:,:) * PRHODJ(:,:,:) / PTSTEP -  PRTKESM(:,:,:)
+DO CONCURRENT ( JI=1:JIU,JJ=1:JJU,JK=1:JKU)
+   PRTKES(JI,JJ,JK) = ZRES(JI,JJ,JK) * PRHODJ(JI,JJ,JK) / PTSTEP -  PRTKESM(JI,JJ,JK)
+ENDDO
 !$acc end kernels
 !
 ! stores the whole turbulent transport
@@ -567,13 +569,16 @@ if (lbudget_tke) call Budget_store_end( tbudgets(NBUDGET_TKE), 'TR', prtkes(:, :
 !             -------------------------------
 !
 !$acc kernels
-#ifndef MNH_BITREP
+#if !defined(MNH_BITREP) && !defined(MNH_BITREP_OMP)
 !dir$ concurrent
 PRTHLS(:,:,:) = PRTHLS(:,:,:) + XCED * SQRT(PTKEM(:,:,:)) / PLEPS(:,:,:) * &
+        (PEXPL*PTKEM(:,:,:) + PIMPL*ZRES(:,:,:)) * PRHODJ(:,:,:) * PCOEF_DISS(:,:,:)
 #else
-PRTHLS(:,:,:) = PRTHLS(:,:,:) + XCED * BR_POW(PTKEM(:,:,:),0.5) / PLEPS(:,:,:) * &
+DO CONCURRENT (JI=1:JIU, JJ=1:JJU ,JK=1:JKU)
+   PRTHLS(JI,JJ,JK) = PRTHLS(JI,JJ,JK) + XCED * BR_POW(PTKEM(JI,JJ,JK),0.5) / PLEPS(JI,JJ,JK) * &
+        (PEXPL*PTKEM(JI,JJ,JK) + PIMPL*ZRES(JI,JJ,JK)) * PRHODJ(JI,JJ,JK) * PCOEF_DISS(JI,JJ,JK)
+ENDDO
 #endif
-                (PEXPL*PTKEM(:,:,:) + PIMPL*ZRES(:,:,:)) * PRHODJ(:,:,:) * PCOEF_DISS(:,:,:)
 !$acc end kernels
 
 if (lbudget_th) call Budget_store_end( tbudgets(NBUDGET_TH), 'DISSH', prthls(:, :, :) )
@@ -584,10 +589,12 @@ if (lbudget_th) call Budget_store_end( tbudgets(NBUDGET_TH), 'DISSH', prthls(:,
 !             -----------------------
 !
 !$acc kernels
-#ifndef MNH_BITREP
+#if !defined(MNH_BITREP) && !defined(MNH_BITREP_OMP)
 PDISS(:,:,:) =  -XCED * (PTKEM(:,:,:)**1.5) / PLEPS(:,:,:)
 #else
-PDISS(:,:,:) =  -XCED * BR_POW(PTKEM(:,:,:),1.5) / PLEPS(:,:,:)
+DO CONCURRENT (JI=1:JIU, JJ=1:JJU, JK=1:JKU)
+   PDISS(JI,JJ,JK) =  -XCED * BR_POW(PTKEM(JI,JJ,JK),1.5) / PLEPS(JI,JJ,JK)
+ENDDO
 #endif
 !$acc end kernels
 !
diff --git a/src/MNH/turb.f90 b/src/MNH/turb.f90
index 5fd090780..6a1d591af 100644
--- a/src/MNH/turb.f90
+++ b/src/MNH/turb.f90
@@ -11,8 +11,7 @@ module mode_turb
   use mode_msg
   USE MODE_MNH_ZWORK, ONLY: MNH_MEM_GET, MNH_MEM_POSITION_PIN, MNH_MEM_RELEASE
 #endif
-
-#ifdef MNH_BITREP
+#if defined(MNH_BITREP) || defined(MNH_BITREP_OMP)
 use modi_bitrep
 #endif
 
@@ -749,10 +748,12 @@ ELSE
 !PW: "BUG" PGI : results different on CPU and GPU due to the power function
 !See http://www.pgroup.com/userforum/viewtopic.php?t=5364&sid=ec7b762b17fb9bb3332a47f0db57af55
 !Use of own functions allows bit-reproducible results
-#ifndef MNH_BITREP
+#if !defined(MNH_BITREP) && !defined(MNH_BITREP_OMP)
   ZEXN(:,:,:) = (PPABST(:,:,:)/XP00) ** (XRD/XCPD)
 #else
-  ZEXN(:,:,:) = BR_POW(PPABST(:,:,:)/XP00,XRD/XCPD)
+DO CONCURRENT(JI=1:JIU,JJ=1:JJU,JK=1:JKU)
+  ZEXN(JI,JJ,JK) = BR_POW(PPABST(JI,JJ,JK)/XP00,XRD/XCPD)
+END DO
 #endif
 END IF
 !
@@ -1006,7 +1007,7 @@ IF (ORMC01) THEN
 #ifdef MNH_OPENACC
   call Print_msg( NVERB_FATAL, 'GEN', 'TURB', 'OpenACC: ORMC01 not yet implemented' )
 #endif
-#ifndef MNH_BITREP
+#if !defined(MNH_BITREP) && !defined(MNH_BITREP_OMP)
   ZUSTAR(:,:) = (PSFU(:,:)**2+PSFV(:,:)**2)**(0.25)
 #else
   ZUSTAR(:,:) = BR_POW( BR_P2( PSFU(:,:) ) + BR_P2( PSFV(:,:) ), 0.25 )
@@ -1066,17 +1067,15 @@ ENDIF
 !
 !*      4.2 compute the proportionality coefficient between wind and stress
 !
-!$acc kernels present_cr(ZCDUEFF,ZTAU22M,ZTAU33M)
-#ifndef MNH_BITREP
+!$acc kernels ! present_cr(ZCDUEFF,ZUSLOPE,ZVSLOPE,PSFU,PSFV)
+#if !defined(MNH_BITREP) && !defined(MNH_BITREP_OMP)
   ZCDUEFF(:,:) =-SQRT ( (PSFU(:,:)**2 + PSFV(:,:)**2) /                  &
                         (XMNH_TINY + ZUSLOPE(:,:)**2 + ZVSLOPE(:,:)**2 ) )
 #else
-#ifdef MNH_COMPILER_NVHPC
-  !$acc loop independent collapse(2)
-#endif
+  !$acc_nv loop independent collapse(2)
   DO CONCURRENT ( JI=1:JIU,JJ=1:JJU )
      ZCDUEFF(JI,JJ) =-SQRT ( (BR_P2(PSFU(JI,JJ)) + BR_P2(PSFV(JI,JJ))) /                  &
-                    (XMNH_TINY + BR_P2(ZUSLOPE(JI,JJ)) + BR_P2(ZVSLOPE(JI,JJ)) ) )
+                    (XMNH_TINY + BR_P2(ZUSLOPE(JI,JJ)) + BR_P2(ZVSLOPE(JI,JJ)) ) )     
   END DO
 #endif
 !$acc end kernels
@@ -1848,7 +1847,7 @@ CALL MNH_MEM_GET( zdrvsatdt, size( pexn, 1 ), size( pexn, 2 ), size( pexn, 3 ) )
 !
 !*       1.1 Lv/Cph at  t
 !
-!$acc kernels present(PLOCPEXN,ZDRVSATDT)
+!$acc kernels ! present(ZRVSAT,ZDRVSATDT) ! present(PLOCPEXN) ! present ZDRVSATDT)
   PLOCPEXN(:,:,:) = ( PLTT + (XCPV-PC) *  (PT(:,:,:)-XTT) ) / PCP(:,:,:)
 !
 !*      1.2 Saturation vapor pressure at t
@@ -1856,13 +1855,15 @@ CALL MNH_MEM_GET( zdrvsatdt, size( pexn, 1 ), size( pexn, 2 ), size( pexn, 3 ) )
 !PW: "BUG" PGI : results different on CPU and GPU due to the EXP and LOG functions
 !See http://www.pgroup.com/userforum/viewtopic.php?t=5364&sid=ec7b762b17fb9bb3332a47f0db57af55
 !Use of own functions allows bit-reproducible results
-#ifndef MNH_BITREP
+#if !defined(MNH_BITREP) && !defined(MNH_BITREP_OMP)
   ZRVSAT(:,:,:) =  EXP( PALP - PBETA/PT(:,:,:) - PGAM*ALOG( PT(:,:,:) ) )
 #else
   DO CONCURRENT ( JI=1:JIU,JJ=1:JJU,JK=1:JKU)
      ZRVSAT(JI,JJ,JK) =  BR_EXP( PALP - PBETA/PT(JI,JJ,JK) - PGAM*BR_LOG( PT(JI,JJ,JK) ) )
   END DO
 #endif
+!$acc end kernels  
+!$acc kernels present(ZRVSAT,ZDRVSATDT)  
 !
 !*      1.3 saturation  mixing ratio at t
 !
@@ -1877,24 +1878,36 @@ CALL MNH_MEM_GET( zdrvsatdt, size( pexn, 1 ), size( pexn, 2 ), size( pexn, 3 ) )
 !
   PAMOIST(:,:,:)=  0.5 / ( 1.0 + ZDRVSATDT(:,:,:) * PLOCPEXN(:,:,:) )
 !
+!$acc end kernels  
+!$acc kernels  
 !*      1.6 compute Atheta
-!
+#if !defined(MNH_BITREP) && !defined(MNH_BITREP_OMP)
   PATHETA(:,:,:)= PAMOIST(:,:,:) * PEXN(:,:,:) *                             &
         ( ( ZRVSAT(:,:,:) - PRT(:,:,:,1) ) * PLOCPEXN(:,:,:) /               &
           ( 1. + ZDRVSATDT(:,:,:) * PLOCPEXN(:,:,:) )        *               &
           (                                                                  &
            ZRVSAT(:,:,:) * (1. + ZRVSAT(:,:,:)/ZEPS)                         &
-#ifndef MNH_BITREP
                         * ( -2.*PBETA/PT(:,:,:) + PGAM ) / PT(:,:,:)**2      &
-#else
-                        * ( -2.*PBETA/PT(:,:,:) + PGAM ) / BR_P2(PT(:,:,:))  &
-#endif
           +ZDRVSATDT(:,:,:) * (1. + 2. * ZRVSAT(:,:,:)/ZEPS)                 &
                         * ( PBETA/PT(:,:,:) - PGAM ) / PT(:,:,:)             &
           )                                                                  &
          - ZDRVSATDT(:,:,:)                                                  &
         )
-!
+#else
+DO CONCURRENT( JI=1:JIU,JJ=1:JJU,JK=1:JKU)
+ PATHETA(JI,JJ,JK)= PAMOIST(JI,JJ,JK) * PEXN(JI,JJ,JK) *                             &
+        ( ( ZRVSAT(JI,JJ,JK) - PRT(JI,JJ,JK,1) ) * PLOCPEXN(JI,JJ,JK) /              &
+          ( 1. + ZDRVSATDT(JI,JJ,JK) * PLOCPEXN(JI,JJ,JK) )        *                 &
+          (                                                                          &
+           ZRVSAT(JI,JJ,JK) * (1. + ZRVSAT(JI,JJ,JK)/ZEPS)                           &
+                        * ( -2.*PBETA/PT(JI,JJ,JK) + PGAM ) / BR_P2(PT(JI,JJ,JK))    &
+          +ZDRVSATDT(JI,JJ,JK) * (1. + 2. * ZRVSAT(JI,JJ,JK)/ZEPS)                   &
+                        * ( PBETA/PT(JI,JJ,JK) - PGAM ) / PT(JI,JJ,JK)               &
+          )                                                                          &
+         - ZDRVSATDT(JI,JJ,JK)                                                       &
+        )
+ENDDO
+#endif
 !*      1.7 Lv/Cph/Exner at t-1
 !
   PLOCPEXN(:,:,:) = PLOCPEXN(:,:,:) / PEXN(:,:,:)
@@ -2168,6 +2181,7 @@ LOGICAL,                INTENT(IN)    :: ODZ
 !
 
 integer :: ji, jj, jk
+integer :: JIU,JJU,JKU
 LOGICAL :: GOCEAN !Intermediate variable used to work around a Cray compiler bug (CCE 13.0.0)
 REAL                :: ZALPHA       ! proportionnality constant between Dz/2 and
 !                                   ! BL89 mixing length near the surface
@@ -2176,12 +2190,10 @@ REAL                :: ZD           ! distance to the surface
 real, dimension(:,:,:), pointer , contiguous :: ztmp1_device, ztmp2_device
 #endif
 !
-integer :: jiu, jju, jku
 !-------------------------------------------------------------------------------
-
-jiu =  size( pdxx, 1 )
-jju =  size( pdxx, 2 )
-jku =  size( pdxx, 3 )
+JIU=SIZE(pdxx,1)
+JJU=SIZE(pdxx,2)
+JKU=SIZE(pdxx,3)
 
 if ( mppdb_initialized ) then
   !Check all in arrays
@@ -2224,7 +2236,7 @@ IF (ODZ) THEN
 #endif
     ELSE
 #ifndef MNH_OPENACC
-#ifndef MNH_BITREP
+#if !defined(MNH_BITREP) && !defined(MNH_BITREP_OMP)
       PLM(:,:,:) = (PLM(:,:,:)*MXF(PDXX(:,:,:))*MYF(PDYY(:,:,:)) ) ** (1./3.)
 #else
       PLM(:,:,:) = BR_POW( PLM(:, :, : ) * MXF( PDXX(:, :, : ) ) * MYF( PDYY(:, :, : ) ), 1. / 3. )
@@ -2233,10 +2245,12 @@ IF (ODZ) THEN
       CALL MXF_DEVICE( PDXX, ZTMP1_DEVICE )
       CALL MYF_DEVICE( PDYY, ZTMP2_DEVICE )
 !$acc kernels
-#ifndef MNH_BITREP
+#if !defined(MNH_BITREP) && !defined(MNH_BITREP_OMP)
       PLM(:,:,:) = ( PLM(:,:,:) * ZTMP1_DEVICE(:,:,:) * ZTMP2_DEVICE(:,:,:) ) ** (1./3.)
 #else
-      PLM(:,:,:) = BR_POW( PLM(:,:,:) * ZTMP1_DEVICE(:,:,:) * ZTMP2_DEVICE(:,:,:), 1./3. )
+DO CONCURRENT ( JI=1:JIU,JJ=1:JJU,JK=1:JKU)
+      PLM(JI,JJ,JK) = BR_POW( PLM(JI,JJ,JK) * ZTMP1_DEVICE(JI,JJ,JK) * ZTMP2_DEVICE(JI,JJ,JK), 1./3. )
+ENDDO
 #endif
 !$acc end kernels
 #endif
@@ -2256,7 +2270,7 @@ ELSE
 #endif
     ELSE
 #ifndef MNH_OPENACC
-#ifndef MNH_BITREP
+#if !defined(MNH_BITREP) && !defined(MNH_BITREP_OMP)
       PLM(:,:,:) = (MXF(PDXX(:,:,:))*MYF(PDYY(:,:,:)) ) ** (1./2.)
 #else
       PLM(:,:,:) = BR_POW(MXF(PDXX(:,:,:))*MYF(PDYY(:,:,:)), 1. / 2. )
@@ -2265,10 +2279,12 @@ ELSE
       CALL MXF_DEVICE( PDXX, ZTMP1_DEVICE )
       CALL MYF_DEVICE( PDYY, ZTMP2_DEVICE )
 !$acc kernels
-#ifndef MNH_BITREP
+#if !defined(MNH_BITREP) && !defined(MNH_BITREP_OMP)
       PLM(:,:,:) = ( ZTMP1_DEVICE * ZTMP2_DEVICE ) ** (1./2.)
 #else
-      PLM(:,:,:) = BR_POW( ZTMP1_DEVICE(:,:,:) * ZTMP2_DEVICE(:,:,:),  1. / 2. )
+   DO CONCURRENT( JI=1:JIU, JJ=1:JJU, JK=1:JKU )
+      PLM(JI,JJ,JK) = BR_POW( ZTMP1_DEVICE(JI,JJ,JK) * ZTMP2_DEVICE(JI,JJ,JK),  1. / 2. )
+   END DO
 #endif
 !$acc end kernels
 #endif
@@ -2280,9 +2296,8 @@ END IF
 !  mixing length limited by the distance normal to the surface
 !  (with the same factor as for BL89)
 !
-
 IF (.NOT. ORMC01) THEN
-#ifndef MNH_BITREP
+#if !defined(MNH_BITREP) && !defined(MNH_BITREP_OMP)
   ZALPHA = 0.5**(-1.5)
 #else
   ZALPHA = BR_POW( 0.5, -1.5 )
@@ -2506,7 +2521,7 @@ IF ( HTURBDIM /= '1DIM' ) THEN  ! 3D turbulence scheme
 !PW: "BUG" PGI : results different on CPU and GPU due to the power function
 !See http://www.pgroup.com/userforum/viewtopic.php?t=5364&sid=ec7b762b17fb9bb3332a47f0db57af55
 !Use of own functions allows bit-reproducible results
-#ifndef MNH_BITREP
+#if !defined(MNH_BITREP) && !defined(MNH_BITREP_OMP)
 !
 #ifndef MNH_OPENACC
     PLM(:,:,:) = (PLM(:,:,:)*MXF(PDXX(:,:,:))*MYF(PDYY(:,:,:)) ) ** (1./3.)
@@ -2536,7 +2551,9 @@ IF ( HTURBDIM /= '1DIM' ) THEN  ! 3D turbulence scheme
        call Mppdb_check( plm, "Dear mid1:plm" )
     end if
 !$acc kernels
-    PLM(:,:,:) = BR_POW( PLM(:,:,:)*ZTMP1_DEVICE    *ZTMP2_DEVICE     , 1./3. )
+DO CONCURRENT ( JI=1:JIU,JJ=1:JJU,JK=1:JKU)
+    PLM(JI,JJ,JK) = BR_POW( PLM(JI,JJ,JK)*ZTMP1_DEVICE(JI,JJ,JK)    *ZTMP2_DEVICE(JI,JJ,JK)     , 1./3. )
+ENDDO
 !$acc end kernels
     if ( mppdb_initialized ) then
        call Mppdb_check( plm, "Dear mid2:plm" )
@@ -2558,12 +2575,7 @@ CALL EMOIST(KRR,KRRI,PTHLT,PRT,PLOCPEXNM,PAMOIST,PSRCT,ZEMOIST)
 !$acc kernels present(ZWORK2D,PLM)
 IF (KRR>0) THEN
    !$acc_nv loop independent collapse(3) private(ZVAR)
-   !dir$ concurrent 
-   DO JK = KKTB+1,KKTE-1
-      !dir$ concurrent
-      DO JJ=1,SIZE(PLM,2)
-         !dir$ concurrent
-         DO JI=1,SIZE(PLM,1)          
+   DO CONCURRENT( JI=1:JIU, JJ=1:JJU, JK = KKTB+1:KKTE-1)          
             ZDTHLDZ(JI,JJ,JK)= 0.5*((PTHLT(JI,JJ,JK+KKL)-PTHLT(JI,JJ,JK    ))/PDZZ(JI,JJ,JK+KKL)+ &
                  (PTHLT(JI,JJ,JK    )-PTHLT(JI,JJ,JK-KKL))/PDZZ(JI,JJ,JK    ))
             ZDRTDZ(JI,JJ,JK) = 0.5*((PRT(JI,JJ,JK+KKL,1)-PRT(JI,JJ,JK    ,1))/PDZZ(JI,JJ,JK+KKL)+ &
@@ -2579,17 +2591,10 @@ IF (KRR>0) THEN
                PLM(JI,JJ,JK)=MAX(XMNH_EPSILON,MIN(PLM(JI,JJ,JK), &
                     0.76* SQRT(PTKET(JI,JJ,JK)/ZVAR)))
             END IF
-         END DO
-      END DO
    END DO
 ELSE! For dry atmos or unsalted ocean runs
    !$acc_nv loop independent collapse(3) private(ZVAR)
-   !dir$ concurrent   
-   DO JK = KKTB+1,KKTE-1
-      !dir$ concurrent  
-      DO JJ=1,SIZE(PLM,2)
-         !dir$ concurrent
-         DO JI=1,SIZE(PLM,1)
+   DO CONCURRENT( JI=1:JIU, JJ=1:JJU, JK = KKTB+1:KKTE-1)
             ZDTHLDZ(JI,JJ,JK)= 0.5*((PTHLT(JI,JJ,JK+KKL)-PTHLT(JI,JJ,JK    ))/PDZZ(JI,JJ,JK+KKL)+ &
                  (PTHLT(JI,JJ,JK    )-PTHLT(JI,JJ,JK-KKL))/PDZZ(JI,JJ,JK    ))
             IF (GOCEAN) THEN
@@ -2602,8 +2607,6 @@ ELSE! For dry atmos or unsalted ocean runs
                PLM(JI,JJ,JK)=MAX(XMNH_EPSILON,MIN(PLM(JI,JJ,JK), &
                     0.76* SQRT(PTKET(JI,JJ,JK)/ZVAR)))
             END IF
-         END DO
-      END DO
    END DO
 END IF
 !  special case near the surface
-- 
GitLab