diff --git a/src/ZSOLVER/advection_metsv.f90 b/src/ZSOLVER/advection_metsv.f90
index c9cbce0aeec1af60169da227853743a30d66a052..034505c02822ae387020f56a68dd01e87c82c9d3 100644
--- a/src/ZSOLVER/advection_metsv.f90
+++ b/src/ZSOLVER/advection_metsv.f90
@@ -188,9 +188,14 @@ use mode_sum_ll,         only: MAX_ll
 use mode_tools_ll,       only: GET_INDICE_ll, lnorth_ll, lsouth_ll, least_ll, lwest_ll
 !
 USE MODI_ADV_BOUNDARIES
-#ifdef MNH_BITREP
+#if defined(MNH_BITREP) || defined(MNH_BITREP_OMP)
 USE MODI_BITREP
 #endif
+#ifdef MNH_BITREP_OMP
+!$mnh_undef(LOOP)
+!$mnh_undef(OPENACC)
+#endif
+
 USE MODI_CONTRAV
 USE MODI_GET_HALO
 USE MODI_PPM_RHODJ
@@ -534,7 +539,8 @@ IF (.NOT. L1D) THEN
   !$acc end kernels
   IF (LIBM) THEN
     !$acc kernels
-#ifndef MNH_BITREP
+!$mnh_expand_array(JI=IIB:IIE,JJ=IJB:IJE,JK=1:JKU)
+#if !defined(MNH_BITREP) && !defined(MNH_BITREP_OMP)
     ZCFLU(IIB:IIE,IJB:IJE,:) = ZCFLU(IIB:IIE,IJB:IJE,:)*(1.-exp(-(XIBM_LS(IIB:IIE,IJB:IJE,:,2)/&
                                                         (XRHODJ(IIB:IIE,IJB:IJE,:)/XRHODREF(IIB:IIE,IJB:IJE,:))**(1./3.))**2.))
     ZCFLV(IIB:IIE,IJB:IJE,:) = ZCFLV(IIB:IIE,IJB:IJE,:)*(1.-exp(-(XIBM_LS(IIB:IIE,IJB:IJE,:,3)/&
@@ -549,12 +555,13 @@ IF (.NOT. L1D) THEN
     ZCFLW(IIB:IIE,IJB:IJE,:) = ZCFLW(IIB:IIE,IJB:IJE,:)*(1.-Br_exp(-Br_pow(XIBM_LS(IIB:IIE,IJB:IJE,:,4)/&
                                                         Br_pow(XRHODJ(IIB:IIE,IJB:IJE,:)/XRHODREF(IIB:IIE,IJB:IJE,:),1./3.),2.)))
 #endif
+!$mnh_end_expand_array()
     WHERE (XIBM_LS(IIB:IIE,IJB:IJE,:,2).GT.(-ZIBM_EPSI)) ZCFLU(IIB:IIE,IJB:IJE,:)=0.
     WHERE (XIBM_LS(IIB:IIE,IJB:IJE,:,3).GT.(-ZIBM_EPSI)) ZCFLV(IIB:IIE,IJB:IJE,:)=0.
     WHERE (XIBM_LS(IIB:IIE,IJB:IJE,:,4).GT.(-ZIBM_EPSI)) ZCFLW(IIB:IIE,IJB:IJE,:)=0.
     !$acc end kernels
   ENDIF
-#ifndef MNH_BITREP
+#if !defined(MNH_BITREP) && !defined(MNH_BITREP_OMP)
   IF (.NOT. L2D) THEN
      !$acc kernels present_cr(ZCFL) 
      ZCFL(:,:,:)  = SQRT(ZCFLU(:,:,:)**2+ZCFLV(:,:,:)**2+ZCFLW(:,:,:)**2)
@@ -585,7 +592,7 @@ ELSE
    !$acc kernels
    ZCFLU(:,:,:) = 0.0 ; ZCFLV(:,:,:) = 0.0 ;  ZCFLW(:,:,:) = 0.0
    ZCFLW(IIB:IIE,IJB:IJE,:) = ABS(ZRWCPPM(IIB:IIE,IJB:IJE,:) * PTSTEP)
-#ifndef MNH_BITREP
+#if !defined(MNH_BITREP) && !defined(MNH_BITREP_OMP)
    ZCFL(:,:,:) = SQRT(ZCFLW(:,:,:)**2)
 #else
    !$acc_nv loop independent collapse(3)
diff --git a/src/ZSOLVER/ppm.f90 b/src/ZSOLVER/ppm.f90
index 83f0849e7424d7c723d17c9f989e793359f142eb..608a9b2ae8cabd2515bc1241ae786f4910cd2a04 100644
--- a/src/ZSOLVER/ppm.f90
+++ b/src/ZSOLVER/ppm.f90
@@ -397,9 +397,12 @@ use mode_mppdb
 use mode_msg
 #endif
 
-#ifdef MNH_BITREP
+#if defined(MNH_BITREP) || defined(MNH_BITREP_OMP)
 USE MODI_BITREP
 #endif
+#ifdef MNH_BITREP_OMP
+USE MODI_BITREPZ
+#endif
 USE MODI_GET_HALO
 #ifndef MNH_OPENACC
 USE MODI_SHUMAN
@@ -470,7 +473,11 @@ INTEGER                :: IJS,IJN
 #endif
 LOGICAL                :: GWEST , GEAST
 !-------------------------------------------------------------------------------
-
+!
+#ifdef MNH_BITREP_OMP
+CALL SBR_FZ(PSRC(:,:,:))
+#endif
+!
 !$acc data present( PSRC, PCR, PRHO, PR , &
 !$acc &             ZQL, ZQR, ZDQ, ZQ6, ZDMQ, ZQL0, ZQR0, ZQ60, ZFPOS, ZFNEG )
 
@@ -632,7 +639,7 @@ CASE ('CYCL','WALL')          ! In that case one must have HLBCX(1) == HLBCX(2)
       ZQL(:,IJS:IJN,:) = PSRC(:,IJS:IJN,:)
       ZQR(:,IJS:IJN,:) = PSRC(:,IJS:IJN,:)
       ZQ6(:,IJS:IJN,:) = 0.0
-#ifndef MNH_BITREP
+#if !defined(MNH_BITREP) && !defined(MNH_BITREP_OMP)
    ELSEWHERE ( ZQ60(:,IJS:IJN,:)*ZDQ(:,IJS:IJN,:) < -(ZDQ(:,IJS:IJN,:))**2 )
 #else
    ELSEWHERE ( ZQ60(:,IJS:IJN,:)*ZDQ(:,IJS:IJN,:) < -BR_P2(ZDQ(:,IJS:IJN,:)) )
@@ -640,7 +647,7 @@ CASE ('CYCL','WALL')          ! In that case one must have HLBCX(1) == HLBCX(2)
       ZQ6(:,IJS:IJN,:) = 3.0*(ZQL0(:,IJS:IJN,:) - PSRC(:,IJS:IJN,:))
       ZQR(:,IJS:IJN,:) = ZQL0(:,IJS:IJN,:) - ZQ6(:,IJS:IJN,:)
       ZQL(:,IJS:IJN,:) = ZQL0(:,IJS:IJN,:)
-#ifndef MNH_BITREP
+#if !defined(MNH_BITREP) && !defined(MNH_BITREP_OMP)
    ELSEWHERE ( ZQ60(:,IJS:IJN,:)*ZDQ(:,IJS:IJN,:) > (ZDQ(:,IJS:IJN,:))**2 )
 #else
    ELSEWHERE ( ZQ60(:,IJS:IJN,:)*ZDQ(:,IJS:IJN,:) > BR_P2(ZDQ(:,IJS:IJN,:)) )
@@ -970,6 +977,10 @@ ENDDO ; ENDDO ; ENDDO
 !
 END SELECT
 !
+#ifdef MNH_BITREP_OMP
+CALL SBR_FZ(PR(:,:,:))
+#endif
+!
 IF (MPPDB_INITIALIZED) THEN
   !Check all INOUT arrays
   CALL MPPDB_CHECK(PSRC,"PPM_01_X end:PSRC")
@@ -1197,9 +1208,12 @@ use mode_msg
 #endif
 use mode_mppdb
 
-#ifdef MNH_BITREP
+#if defined(MNH_BITREP) || defined(MNH_BITREP_OMP)
 USE MODI_BITREP
 #endif
+#ifdef MNH_BITREP_OMP
+USE MODI_BITREPZ
+#endif
 USE MODI_GET_HALO
 #ifndef MNH_OPENACC
 USE MODI_SHUMAN
@@ -1272,7 +1286,11 @@ INTEGER                          :: IJN,IJS
 #endif
 integer :: ji, jj, jk
 !-------------------------------------------------------------------------------
-
+!
+#ifdef MNH_BITREP_OMP
+CALL SBR_FZ(PSRC(:,:,:))
+#endif
+!
 !$acc data present( PSRC, PCR, PRHO, PR, &
 !$acc &             ZQL, ZQR, ZDQ, ZQ6, ZDMQ, ZQL0, ZQR0, ZQ60, ZFPOS, ZFNEG )
 
@@ -1850,6 +1868,10 @@ CALL  GET_HALO_D(ZQL0,HDIR="01_Y", HNAME='ZQL0')
 !
 END SELECT
 !
+#ifdef MNH_BITREP_OMP
+CALL SBR_FZ(PR(:,:,:))
+#endif
+!
 IF (MPPDB_INITIALIZED) THEN
   !Check all INOUT arrays
   CALL MPPDB_CHECK(PSRC,"PPM_01_Y end:PSRC")
@@ -2076,9 +2098,12 @@ USE MODI_SHUMAN
 USE MODI_SHUMAN_DEVICE
 #endif
 USE MODI_GET_HALO
-#ifdef MNH_BITREP
+#if defined(MNH_BITREP) || defined(MNH_BITREP_OMP)
 USE MODI_BITREP
 #endif
+#ifdef MNH_BITREP_OMP
+USE MODI_BITREPZ
+#endif
 !
 USE MODD_CONF
 USE MODD_PARAMETERS
@@ -2147,7 +2172,11 @@ INTEGER                          :: I,J,K
 integer                          :: ji, jj, jk
 !
 !-------------------------------------------------------------------------------
-
+!
+#ifdef MNH_BITREP_OMP
+CALL SBR_FZ(PSRC(:,:,:))
+#endif
+!
 !$acc data present( PSRC, PCR, PRHO, PR, &
 !$acc &             ZQL, ZQR, ZDQ, ZQ6, ZDMQ, ZQL0, ZQR0, ZQ60, ZFPOS, ZFNEG )
 IF (MPPDB_INITIALIZED) THEN
@@ -2290,7 +2319,7 @@ WHERE ( ZDMQ == 0.0 )
    ZQL = PSRC
    ZQR = PSRC
    ZQ6 = 0.0
-#ifndef MNH_BITREP
+#if !defined(MNH_BITREP) && !defined(MNH_BITREP_OMP)
 ELSEWHERE ( ZQ60*ZDQ < -(ZDQ)**2 )
 #else
 ELSEWHERE ( ZQ60*ZDQ < -BR_P2(ZDQ) )
@@ -2298,7 +2327,7 @@ ELSEWHERE ( ZQ60*ZDQ < -BR_P2(ZDQ) )
    ZQ6 = 3.0*(ZQL0 - PSRC)
    ZQR = ZQL0 - ZQ6
    ZQL = ZQL0
-#ifndef MNH_BITREP
+#if !defined(MNH_BITREP) && !defined(MNH_BITREP_OMP)
 ELSEWHERE ( ZQ60*ZDQ > (ZDQ)**2 ) 
 #else
 ELSEWHERE ( ZQ60*ZDQ > BR_P2(ZDQ) ) 
@@ -2419,6 +2448,10 @@ end do
 !Unnecessary CALL GET_HALO_D(PR)
 #endif
 !
+#ifdef MNH_BITREP_OMP
+CALL SBR_FZ(PR(:,:,:))    
+#endif
+!
 IF (MPPDB_INITIALIZED) THEN
   !Check all INOUT arrays
   CALL MPPDB_CHECK(PSRC,"PPM_01_Z end:PSRC")
diff --git a/src/ZSOLVER/turb_hor_dyn_corr.f90 b/src/ZSOLVER/turb_hor_dyn_corr.f90
index 850e010d2323a0b7673c250f90808939bfe408fd..b07a15f38506bdd88d9451cef10fd89635332227 100644
--- a/src/ZSOLVER/turb_hor_dyn_corr.f90
+++ b/src/ZSOLVER/turb_hor_dyn_corr.f90
@@ -174,9 +174,13 @@ USE MODI_SHUMAN_DEVICE
 #endif
 USE MODI_TRIDIAG_W
 !
-#ifdef MNH_BITREP
+#if defined(MNH_BITREP) || defined(MNH_BITREP_OMP)
 USE MODI_BITREP
 #endif
+#ifdef MNH_BITREP_OMP
+!$mnh_undef(LOOP)
+!$mnh_undef(OPENACC)
+#endif
 !
 IMPLICIT NONE
 !
@@ -395,15 +399,12 @@ IKU = SIZE(PUM,3)
 !
 !
 !$acc kernels async(1)
-#ifndef MNH_BITREP
+#if !defined(MNH_BITREP) && !defined(MNH_BITREP_OMP)
 ZDIRSINZW(:,:) = SQRT( 1. - PDIRCOSZW(:,:)**2 )
 #else
-#ifdef MNH_COMPILER_NVHPC
-!$acc loop independent collapse(2)
-#endif
-DO CONCURRENT ( JI=1:JIU,JJ=1:JJU )
-   ZDIRSINZW(JI,JJ) = SQRT( 1. - BR_P2(PDIRCOSZW(JI,JJ)) )
-END DO
+!$mnh_expand_array(JI=1:JIU,JJ=1:JJU )
+   ZDIRSINZW(:,:) = SQRT( 1. - BR_P2(PDIRCOSZW(:,:)) )
+!$mnh_end_expand_array()
 #endif
 !$acc end kernels
 !
@@ -437,16 +438,13 @@ CALL ADD3DFIELD_ll( TZFIELDS_ll, ZFLX, 'TURB_HOR_DYN_CORR::ZFLX' )
 !
 ! Computes the U variance
 IF (.NOT. L2D) THEN
-   !$acc kernels async(2)
-#ifdef MNH_COMPILER_NVHPC
-   !$acc loop independent collapse(3)
-#endif
-   DO CONCURRENT ( JI=1:JIU,JJ=1:JJU,JK=1:JKU)
-      ZFLX(JI,JJ,JK)= (2./3.) * PTKEM(JI,JJ,JK)                            &
-           - XCMFS * PK(JI,JJ,JK) *( (4./3.) * GX_U_M_PUM(JI,JJ,JK)        &
-           -(2./3.) * ( GY_V_M_PVM(JI,JJ,JK)                     &
-           +GZ_W_M_PWM(JI,JJ,JK)                ) )
-   END DO !CONCURRENT
+   !$acc kernels async(2) present_cr(zflx,gz_w_m_pwm)
+   !$mnh_expand_array(JI=1:JIU,JJ=1:JJU,JK=1:JKU)
+      ZFLX(:,:,:)= (2./3.) * PTKEM(:,:,:)                            &
+           - XCMFS * PK(:,:,:) *( (4./3.) * GX_U_M_PUM(:,:,:)        &
+           -(2./3.) * ( GY_V_M_PVM(:,:,:)                     &
+           +GZ_W_M_PWM(:,:,:)                ) )
+   !$mnh_end_expand_array()
    !$acc end kernels
    !!  &   to be tested later
   !!  + XCMFB *  PLM / SQRT(PTKEM) * (-2./3.) * PTP 
@@ -461,10 +459,9 @@ ELSE
 END IF
 !
 !$acc kernels async(2)
-!$acc_nv loop independent collapse(2) 
-DO CONCURRENT (JI=1:JIU,JJ=1:JJU)
-   ZFLX(JI,JJ,IKE+1) = ZFLX(JI,JJ,IKE) 
-ENDDO
+!$mnh_expand_array(JI=1:JIU,JJ=1:JJU)
+   ZFLX(:,:,IKE+1) = ZFLX(:,:,IKE) 
+!$mnh_end_expand_array()
 !$acc end kernels
 !
 !* prescription of du/dz and dv/dz with uncentered gradient at the surface
@@ -477,16 +474,15 @@ ZDZZ(:,:,:) = MXM(PDZZ(:,:,IKB:IKB+2))
 #else
 CALL MXM_DEVICE(PDZZ(:,:,IKB:IKB+2),ZDZZ(:,:,:))
 #endif
-!$acc kernels async(3)
-!$acc_nv loop independent collapse(2) 
-DO CONCURRENT (JI=1:JIU,JJ=1:JJU)
-   ZCOEFF(JI,JJ,IKB+2)= - ZDZZ(JI,JJ,2) /      &
-        ( (ZDZZ(JI,JJ,3)+ZDZZ(JI,JJ,2)) * ZDZZ(JI,JJ,3) )
-   ZCOEFF(JI,JJ,IKB+1)=   (ZDZZ(JI,JJ,3)+ZDZZ(JI,JJ,2)) /      &
-        ( ZDZZ(JI,JJ,2) * ZDZZ(JI,JJ,3) )
-   ZCOEFF(JI,JJ,IKB)= - (ZDZZ(JI,JJ,3)+2.*ZDZZ(JI,JJ,2)) /      &
-        ( (ZDZZ(JI,JJ,3)+ZDZZ(JI,JJ,2)) * ZDZZ(JI,JJ,2) )
-ENDDO
+!$acc kernels async(3) present_cr(zdzz,zcoeff)
+!$mnh_expand_array(JI=1:JIU,JJ=1:JJU)
+   ZCOEFF(:,:,IKB+2)= - ZDZZ(:,:,2) /      &
+        ( (ZDZZ(:,:,3)+ZDZZ(:,:,2)) * ZDZZ(:,:,3) )
+   ZCOEFF(:,:,IKB+1)=   (ZDZZ(:,:,3)+ZDZZ(:,:,2)) /      &
+        ( ZDZZ(:,:,2) * ZDZZ(:,:,3) )
+   ZCOEFF(:,:,IKB)= - (ZDZZ(:,:,3)+2.*ZDZZ(:,:,2)) /      &
+        ( (ZDZZ(:,:,3)+ZDZZ(:,:,2)) * ZDZZ(:,:,2) )
+!$mnh_end_expand_array()
 !$acc end kernels
 !
 #ifndef MNH_OPENACC
@@ -498,14 +494,13 @@ ZDU_DZ_DZS_DX(:,:,:)=MXF ((ZCOEFF(:,:,IKB+2:IKB+2)*PUM(:,:,IKB+2:IKB+2)       &
 !
 ZDZZ(:,:,:) = MYM(PDZZ(:,:,IKB:IKB+2))
 #else
-!$acc kernels async(3)
-!$acc_nv loop independent collapse(2) 
-DO CONCURRENT (JI=1:JIU,JJ=1:JJU)
-   ZTMP1_DEVICE(JI,JJ,1) = (ZCOEFF(JI,JJ,IKB+2)*PUM(JI,JJ,IKB+2)       &
-                          +ZCOEFF(JI,JJ,IKB+1)*PUM(JI,JJ,IKB+1)       &
-                          +ZCOEFF(JI,JJ,IKB)*PUM(JI,JJ,IKB)       &
-                          )* 0.5 * ( PDZX(JI,JJ,IKB+1)+PDZX(JI,JJ,IKB))
-ENDDO
+!$acc kernels async(3) present_cr(pum,ztmp1_device)
+!$mnh_expand_array(JI=1:JIU,JJ=1:JJU)
+   ZTMP1_DEVICE(:,:,1) = (ZCOEFF(:,:,IKB+2)*PUM(:,:,IKB+2)       &
+                          +ZCOEFF(:,:,IKB+1)*PUM(:,:,IKB+1)       &
+                          +ZCOEFF(:,:,IKB)*PUM(:,:,IKB)       &
+                          )* 0.5 * ( PDZX(:,:,IKB+1)+PDZX(:,:,IKB))
+!$mnh_end_expand_array()
 !$acc end kernels
 !
 !!! wait for the computation of ZCOEFF and ZTMP1_DEVICE
@@ -513,25 +508,23 @@ ENDDO
 !
 CALL MXF_DEVICE(ZTMP1_DEVICE(:,:,1:1), ZTMP2_DEVICE(:,:,1:1))
 CALL MXF_DEVICE(PDXX(:,:,IKB:IKB), ZTMP1_DEVICE(:,:,1:1))
-!$acc kernels async(3)
-!$acc_nv loop independent collapse(2) 
-DO CONCURRENT (JI=1:JIU,JJ=1:JJU)
-   ZDU_DZ_DZS_DX(JI,JJ,1) = ZTMP2_DEVICE(JI,JJ,1) / ZTMP1_DEVICE(JI,JJ,1)
-ENDDO
+!$acc kernels async(3) present_cr(ztmp1_device,zdu_dz_dzs_dx)
+!$mnh_expand_array(JI=1:JIU,JJ=1:JJU)
+   ZDU_DZ_DZS_DX(:,:,1) = ZTMP2_DEVICE(:,:,1) / ZTMP1_DEVICE(:,:,1)
+!$mnh_end_expand_array()
 !$acc end kernels
 !
 CALL MYM_DEVICE(PDZZ(:,:,IKB:IKB+2),ZDZZ(:,:,:))
 #endif
-!$acc kernels async(4)
-!$acc_nv loop independent collapse(2) 
-DO CONCURRENT (JI=1:JIU,JJ=1:JJU)
-   ZCOEFF(JI,JJ,IKB+2)= - ZDZZ(JI,JJ,2) /      &
-        ( (ZDZZ(JI,JJ,3)+ZDZZ(JI,JJ,2)) * ZDZZ(JI,JJ,3) )
-   ZCOEFF(JI,JJ,IKB+1)=   (ZDZZ(JI,JJ,3)+ZDZZ(JI,JJ,2)) /      &
-        ( ZDZZ(JI,JJ,2) * ZDZZ(JI,JJ,3) )
-   ZCOEFF(JI,JJ,IKB)= - (ZDZZ(JI,JJ,3)+2.*ZDZZ(JI,JJ,2)) /      &
-        ( (ZDZZ(JI,JJ,3)+ZDZZ(JI,JJ,2)) * ZDZZ(JI,JJ,2) )
-ENDDO
+!$acc kernels async(4) present_cr(zdzz,zcoeff)
+!$mnh_expand_array(JI=1:JIU,JJ=1:JJU)
+   ZCOEFF(:,:,IKB+2)= - ZDZZ(:,:,2) /      &
+        ( (ZDZZ(:,:,3)+ZDZZ(:,:,2)) * ZDZZ(:,:,3) )
+   ZCOEFF(:,:,IKB+1)=   (ZDZZ(:,:,3)+ZDZZ(:,:,2)) /      &
+        ( ZDZZ(:,:,2) * ZDZZ(:,:,3) )
+   ZCOEFF(:,:,IKB)= - (ZDZZ(:,:,3)+2.*ZDZZ(:,:,2)) /      &
+        ( (ZDZZ(:,:,3)+ZDZZ(:,:,2)) * ZDZZ(:,:,2) )
+!$mnh_end_expand_array()
 !$acc end kernels
 !
 #ifndef MNH_OPENACC
@@ -541,14 +534,13 @@ ZDV_DZ_DZS_DY(:,:,:)=MYF ((ZCOEFF(:,:,IKB+2:IKB+2)*PVM(:,:,IKB+2:IKB+2)       &
                           )* 0.5 * ( PDZY(:,:,IKB+1:IKB+1)+PDZY(:,:,IKB:IKB)) &
                          )/ MYF(PDYY(:,:,IKB:IKB))
 #else
-!$acc kernels async(4)
-!$acc_nv loop independent collapse(2) 
-DO CONCURRENT (JI=1:JIU,JJ=1:JJU)
-   ZTMP3_DEVICE(JI,JJ,1) = (ZCOEFF(JI,JJ,IKB+2)*PVM(JI,JJ,IKB+2)       &
-                          +ZCOEFF(JI,JJ,IKB+1)*PVM(JI,JJ,IKB+1)       &
-                          +ZCOEFF(JI,JJ,IKB)*PVM(JI,JJ,IKB)       &
-                          )* 0.5 * ( PDZY(JI,JJ,IKB+1)+PDZY(JI,JJ,IKB))
-ENDDO
+!$acc kernels async(4) present_cr(pvm,ztmp3_device)
+!$mnh_expand_array(JI=1:JIU,JJ=1:JJU)
+   ZTMP3_DEVICE(:,:,1) = (ZCOEFF(:,:,IKB+2)*PVM(:,:,IKB+2)       &
+                          +ZCOEFF(:,:,IKB+1)*PVM(:,:,IKB+1)       &
+                          +ZCOEFF(:,:,IKB)*PVM(:,:,IKB)       &
+                          )* 0.5 * ( PDZY(:,:,IKB+1)+PDZY(:,:,IKB))
+!$mnh_end_expand_array()
 !$acc end kernels
 !
 !!! wait for the computation of ZCOEFF and ZTMP3_DEVICE
@@ -574,11 +566,10 @@ ZDV_DZ_DZS_DY(:,:,1)= ZTMP4_DEVICE(:,:,1) / ZTMP3_DEVICE(:,:,1)
 !
 CALL DXF_DEVICE(PUM(:,:,IKB:IKB),ZTMP1_DEVICE(:,:,1:1))
 CALL MXF_DEVICE(PDXX(:,:,IKB:IKB),ZTMP2_DEVICE(:,:,1:1))
-!$acc kernels async(3)
-!$acc_nv loop independent collapse(2) 
-DO CONCURRENT (JI=1:JIU,JJ=1:JJU)
-   ZDU_DX(JI,JJ,1)=  ZTMP1_DEVICE(JI,JJ,1) / ZTMP2_DEVICE(JI,JJ,1) - ZDU_DZ_DZS_DX(JI,JJ,1)
-ENDDO
+!$acc kernels async(3) present_cr(zdu_dz_dzs_dx,zdu_dx)
+!$mnh_expand_array(JI=1:JIU,JJ=1:JJU)
+   ZDU_DX(:,:,1)=  ZTMP1_DEVICE(:,:,1) / ZTMP2_DEVICE(:,:,1) - ZDU_DZ_DZS_DX(:,:,1)
+!$mnh_end_expand_array()
 !$acc end kernels
 
 !!! wait for the computation of ZDV_DZ_DZS_DY
@@ -586,11 +577,10 @@ ENDDO
 !
 CALL DYF_DEVICE(PVM(:,:,IKB:IKB),ZTMP3_DEVICE(:,:,1:1))
 CALL MYF_DEVICE(PDYY(:,:,IKB:IKB),ZTMP4_DEVICE(:,:,1:1))
-!$acc kernels async(4)
-!$acc_nv loop independent collapse(2) 
-DO CONCURRENT (JI=1:JIU,JJ=1:JJU)
-   ZDV_DY(JI,JJ,1)=  ZTMP3_DEVICE(JI,JJ,1) / ZTMP4_DEVICE(JI,JJ,1) - ZDV_DZ_DZS_DY(JI,JJ,1)
-ENDDO
+!$acc kernels async(4) present_cr(zdv_dz_dzs_dy,zdv_dy)
+!$mnh_expand_array(JI=1:JIU,JJ=1:JJU)
+   ZDV_DY(:,:,1)=  ZTMP3_DEVICE(:,:,1) / ZTMP4_DEVICE(:,:,1) - ZDV_DZ_DZS_DY(:,:,1)
+!$mnh_end_expand_array()
 !$acc end kernels
 !
 !
@@ -598,11 +588,10 @@ ENDDO
 !$acc wait(3) async(4)
 #endif
 !
-!$acc kernels async(4)
-!$acc_nv loop independent collapse(2) 
-DO CONCURRENT (JI=1:JIU,JJ=1:JJU)
-   ZDW_DZ(JI,JJ,1)=-ZDU_DX(JI,JJ,1)-ZDV_DY(JI,JJ,1)
-ENDDO
+!$acc kernels async(4) present_cr(zdv_dy,zdw_dz)
+!$mnh_expand_array(JI=1:JIU,JJ=1:JJU)
+   ZDW_DZ(:,:,1)=-ZDU_DX(:,:,1)-ZDV_DY(:,:,1)
+!$mnh_end_expand_array()
 !$acc end kernels
 !
 !* computation 
@@ -617,12 +606,11 @@ ENDDO
 !attention !!!!! je ne comprends pas pourquoi mais ce update plante à l'execution...
 ! du coup je ne peux pas faire de update self asynchrone...
 !
-!$acc kernels async(3)
-!$acc_nv loop independent collapse(2) 
-DO CONCURRENT (JI=1:JIU,JJ=1:JJU)
-   ZFLX(JI,JJ,IKB)   = (2./3.) * PTKEM(JI,JJ,IKB)                           &
-        - XCMFS * PK(JI,JJ,IKB) * 2. * ZDU_DX(JI,JJ,1)
-ENDDO
+!$acc kernels async(3) present_cr(zdu_dx,zflx)
+!$mnh_expand_array(JI=1:JIU,JJ=1:JJU)
+   ZFLX(:,:,IKB)   = (2./3.) * PTKEM(:,:,IKB)                           &
+        - XCMFS * PK(:,:,IKB) * 2. * ZDU_DX(:,:,1)
+!$mnh_end_expand_array()
 !$acc end kernels
 
 !!  &  to be tested later
@@ -635,8 +623,8 @@ ENDDO
 !!! wait for the computation of ZDIRSINZW
 !$acc wait(1)
 !
-!$acc kernels async(4) present_cr(ZFLX)
-#ifndef MNH_BITREP
+!$acc kernels async(4) present_cr(ZFLX,ZDIRSINZW)   
+#if !defined(MNH_BITREP) && !defined(MNH_BITREP_OMP)
 ZFLX(:,:,IKB-1) =                                                            &
         PTAU11M(:,:) * PCOSSLOPE(:,:)**2 * PDIRCOSZW(:,:)**2                 &
   -2. * PTAU12M(:,:) * PCOSSLOPE(:,:)* PSINSLOPE(:,:) * PDIRCOSZW(:,:)       &
@@ -647,17 +635,16 @@ ZFLX(:,:,IKB-1) =                                                            &
     - PUSLOPEM(:,:) * PCOSSLOPE(:,:)**2 * ZDIRSINZW(:,:) * PDIRCOSZW(:,:)    )
 #else
 !PW: BUG: commented 'acc loop independent collapse(2)' to workaround compiler bug (NVHPC 21.1)
-!$acc_nv loop independent collapse(2)
-DO CONCURRENT ( JI=1:JIU,JJ=1:JJU )
-ZFLX(JI,JJ,IKB-1) =                                                             &
-        PTAU11M(JI,JJ) * BR_P2(PCOSSLOPE(JI,JJ)) * BR_P2(PDIRCOSZW(JI,JJ))          &
-  -2. * PTAU12M(JI,JJ) * PCOSSLOPE(JI,JJ)* PSINSLOPE(JI,JJ) * PDIRCOSZW(JI,JJ)        &
-  +     PTAU22M(JI,JJ) * BR_P2(PSINSLOPE(JI,JJ))                                  &
-  +     PTAU33M(JI,JJ) * BR_P2(PCOSSLOPE(JI,JJ)) * BR_P2(ZDIRSINZW(JI,JJ))          &
-  +2. * PCDUEFF(JI,JJ) *      (                                                 &
-      PVSLOPEM(JI,JJ) * PCOSSLOPE(JI,JJ)    * PSINSLOPE(JI,JJ) * ZDIRSINZW(JI,JJ)     &
-      - PUSLOPEM(JI,JJ) * BR_P2(PCOSSLOPE(JI,JJ)) * ZDIRSINZW(JI,JJ) * PDIRCOSZW(JI,JJ) )
-END DO ! CONCURRENT
+!$mnh_expand_array(JI=1:JIU,JJ=1:JJU)
+ZFLX(:,:,IKB-1) =                                                             &
+        PTAU11M(:,:) * BR_P2(PCOSSLOPE(:,:)) * BR_P2(PDIRCOSZW(:,:))          &
+  -2. * PTAU12M(:,:) * PCOSSLOPE(:,:)* PSINSLOPE(:,:) * PDIRCOSZW(:,:)        & 
+  +     PTAU22M(:,:) * BR_P2(PSINSLOPE(:,:))                                  &
+  +     PTAU33M(:,:) * BR_P2(PCOSSLOPE(:,:)) * BR_P2(ZDIRSINZW(:,:))          &
+  +2. * PCDUEFF(:,:) *      (                                                 &
+      PVSLOPEM(:,:) * PCOSSLOPE(:,:)    * PSINSLOPE(:,:) * ZDIRSINZW(:,:)     &
+      - PUSLOPEM(:,:) * BR_P2(PCOSSLOPE(:,:)) * ZDIRSINZW(:,:) * PDIRCOSZW(:,:) )
+!$mnh_end_expand_array()
 #endif
 !$acc end kernels
 ! 
@@ -665,10 +652,9 @@ END DO ! CONCURRENT
 !$acc wait(3) async(4)
 !
 !$acc kernels async(4)
-!$acc_nv loop independent collapse(2) 
-DO CONCURRENT (JI=1:JIU,JJ=1:JJU)
-   ZFLX(JI,JJ,IKB-1) = 2. * ZFLX(JI,JJ,IKB-1) -  ZFLX(JI,JJ,IKB)
-ENDDO
+!$mnh_expand_array(JI=1:JIU,JJ=1:JJU)
+   ZFLX(:,:,IKB-1) = 2. * ZFLX(:,:,IKB-1) -  ZFLX(:,:,IKB)
+!$mnh_end_expand_array()
 !$acc end kernels
 !
 !
@@ -724,13 +710,10 @@ ELSE
 END IF
 #else
 CALL MXF_DEVICE(PDXX, ZTMP1_DEVICE)
-!$acc kernels async(10)
-#ifdef MNH_COMPILER_NVHPC
-!$acc loop independent collapse(3)
-#endif
-DO CONCURRENT ( JI=1:JIU,JJ=1:JJU,JK=1:JKU)
-   ZTMP2_DEVICE(JI,JJ,JK) = PRHODJ(JI,JJ,JK) * ZFLX(JI,JJ,JK) / ZTMP1_DEVICE(JI,JJ,JK)
-END DO !CONCURRENT
+!$acc kernels async(10) present_cr(ztmp1_device,ztmp2_device)
+!$mnh_expand_array(JI=1:JIU,JJ=1:JJU,JK=1:JKU)
+   ZTMP2_DEVICE(:,:,:) = PRHODJ(:,:,:) * ZFLX(:,:,:) / ZTMP1_DEVICE(:,:,:)
+!$mnh_end_expand_array()
 !$acc end kernels
 !
 !!! wait for the computation of ZTMP2_DEVICE and the update of ZFLX
@@ -739,31 +722,22 @@ END DO !CONCURRENT
 CALL DXM_DEVICE(ZTMP2_DEVICE, ZTMP3_DEVICE)
 IF (.NOT. LFLAT) THEN
   CALL MZM_DEVICE(PDXX,ZTMP1_DEVICE)
-  !$acc kernels
-#ifdef MNH_COMPILER_NVHPC
-  !$acc loop independent collapse(3)
-#endif
-  DO CONCURRENT ( JI=1:JIU,JJ=1:JJU,JK=1:JKU)
-     ZTMP2_DEVICE(JI,JJ,JK) = PRHODJ(JI,JJ,JK) * ZFLX(JI,JJ,JK)
-  END DO !CONCURRENT
+  !$acc kernels present_cr(zflx,ztmp2_device)
+  !$mnh_expand_array(JI=1:JIU,JJ=1:JJU,JK=1:JKU)
+     ZTMP2_DEVICE(:,:,:) = PRHODJ(:,:,:) * ZFLX(:,:,:)
+  !$mnh_end_expand_array()
   !$acc end kernels
   CALL MZM_DEVICE(ZTMP2_DEVICE,ZTMP4_DEVICE)
-  !$acc kernels
-#ifdef MNH_COMPILER_NVHPC
-  !$acc loop independent collapse(3)
-#endif
-  DO CONCURRENT ( JI=1:JIU,JJ=1:JJU,JK=1:JKU)
-     ZTMP2_DEVICE(JI,JJ,JK) = ZTMP4_DEVICE(JI,JJ,JK) * PINV_PDZZ(JI,JJ,JK)
-  END DO !CONCURRENT   
+  !$acc kernels present_cr(ztmp4_device,ztmp2_device)
+  !$mnh_expand_array(JI=1:JIU,JJ=1:JJU,JK=1:JKU)
+     ZTMP2_DEVICE(:,:,:) = ZTMP4_DEVICE(:,:,:) * PINV_PDZZ(:,:,:)
+  !$mnh_end_expand_array()   
   !$acc end kernels
   CALL MXM_DEVICE( ZTMP2_DEVICE, ZTMP4_DEVICE )
-  !$acc kernels
-#ifdef MNH_COMPILER_NVHPC
-  !$acc loop independent collapse(3)
-#endif
-  DO CONCURRENT ( JI=1:JIU,JJ=1:JJU,JK=1:JKU)
-     ZTMP2_DEVICE(JI,JJ,JK) = PDZX(JI,JJ,JK) / ZTMP1_DEVICE(JI,JJ,JK) * ZTMP4_DEVICE(JI,JJ,JK)
-  END DO !CONCURRENT   
+  !$acc kernels present_cr(ztmp4_device,ztmp2_device)
+  !$mnh_expand_array(JI=1:JIU,JJ=1:JJU,JK=1:JKU)
+     ZTMP2_DEVICE(:,:,:) = PDZX(:,:,:) / ZTMP1_DEVICE(:,:,:) * ZTMP4_DEVICE(:,:,:)
+  !$mnh_end_expand_array()   
   !$acc end kernels
   CALL DZF_DEVICE( ZTMP2_DEVICE, ZTMP1_DEVICE )
   !$acc kernels async(1)
@@ -780,22 +754,18 @@ END IF
 !
 IF (KSPLT==1) THEN
   ! Contribution to the dynamic production of TKE:
-   !$acc kernels async(2)
-#ifdef MNH_COMPILER_NVHPC
-   !$acc loop independent collapse(3)
-#endif
-   DO CONCURRENT ( JI=1:JIU,JJ=1:JJU,JK=1:JKU)
-      ZWORK(JI,JJ,JK)     = - ZFLX(JI,JJ,JK) * GX_U_M_PUM(JI,JJ,JK)
-   END DO !CONCURRENT
+   !$acc kernels async(2) present_cr(gx_u_m_pum,zwork)
+   !$mnh_expand_array(JI=1:JIU,JJ=1:JJU,JK=1:JKU)
+      ZWORK(:,:,:)     = - ZFLX(:,:,:) * GX_U_M_PUM(:,:,:)
+   !$mnh_end_expand_array()
   !$acc end kernels
   !
   ! evaluate the dynamic production at w(IKB+1) in PDP(IKB)
   !
-  !$acc kernels async(2)
-   !$acc_nv loop independent collapse(2) 
-   DO CONCURRENT (JI=1:JIU,JJ=1:JJU)
-      ZWORK(JI,JJ,IKB) = 0.5* ( -ZFLX(JI,JJ,IKB)*ZDU_DX(JI,JJ,1) + ZWORK(JI,JJ,IKB+1) )
-   ENDDO
+  !$acc kernels async(2) present_cr(zdu_dx,zwork)
+   !$mnh_expand_array(JI=1:JIU,JJ=1:JJU)
+      ZWORK(:,:,IKB) = 0.5* ( -ZFLX(:,:,IKB)*ZDU_DX(:,:,1) + ZWORK(:,:,IKB+1) )
+   !$mnh_end_expand_array()
    !$acc end kernels
   !
   !$acc kernels async(2)
@@ -836,28 +806,24 @@ END IF
 !
 ! Computes the V variance
 IF (.NOT. L2D) THEN
-   !$acc kernels async(3)
-#ifdef MNH_COMPILER_NVHPC
-   !$acc loop independent collapse(3)
-#endif
-   DO CONCURRENT ( JI=1:JIU,JJ=1:JJU,JK=1:JKU)
-      ZFLX(JI,JJ,JK)= (2./3.) * PTKEM(JI,JJ,JK)                                  &
-           - XCMFS * PK(JI,JJ,JK) *( (4./3.) * GY_V_M_PVM(JI,JJ,JK)                        &
-           -(2./3.) * ( GX_U_M_PUM(JI,JJ,JK)                      &
-           +GZ_W_M_PWM(JI,JJ,JK)                ) )
-   END DO !CONCURRENT
+   !$acc kernels async(3) present_cr(gz_w_m_pwm,zflx)
+   !$mnh_expand_array(JI=1:JIU,JJ=1:JJU,JK=1:JKU)
+      ZFLX(:,:,:)= (2./3.) * PTKEM(:,:,:)                                  &
+           - XCMFS * PK(:,:,:) *( (4./3.) * GY_V_M_PVM(:,:,:)                        &
+           -(2./3.) * ( GX_U_M_PUM(:,:,:)                      &
+           +GZ_W_M_PWM(:,:,:)                ) )
+   !$mnh_end_expand_array()
    !$acc end kernels
   !! &  to be tested
   !!  + XCMFB *  PLM / SQRT(PTKEM) * (-2./3.) * PTP 
   !
 ELSE
-   !$acc kernels async(3)
-   !$acc_nv loop independent collapse(3)
-   DO CONCURRENT (JI=1:JIU,JJ=1:JJU,JK=1:JKU)
-      ZFLX(JI,JJ,JK)= (2./3.) * PTKEM(JI,JJ,JK)                           &
-           - XCMFS * PK(JI,JJ,JK) *(-(2./3.) * ( GX_U_M_PUM(JI,JJ,JK)        &
-                                      +GZ_W_M_PWM(JI,JJ,JK)     ) )  
-   ENDDO
+   !$acc kernels async(3) present_cr(gz_w_m_pwm,zflx)
+   !$mnh_expand_array(JI=1:JIU,JJ=1:JJU,JK=1:JKU)
+      ZFLX(:,:,:)= (2./3.) * PTKEM(:,:,:)                           &
+           - XCMFS * PK(:,:,:) *(-(2./3.) * ( GX_U_M_PUM(:,:,:)        &
+                                      +GZ_W_M_PWM(:,:,:)     ) )  
+   !$mnh_end_expand_array()
    !$acc end kernels
   !! &  to be tested
   !!  + XCMFB *  PLM / SQRT(PTKEM) * (-2./3.) * PTP 
@@ -872,12 +838,11 @@ ZFLX(:,:,IKE+1) = ZFLX(:,:,IKE)
 ! ! !$acc wait(3)
 ! ! !$acc update self(ZFLX(:,:,IKB+1:)) async(10)
 !
-!$acc kernels async(3)
-!$acc_nv loop independent collapse(2) 
-DO CONCURRENT (JI=1:JIU,JJ=1:JJU)
-   ZFLX(JI,JJ,IKB)   = (2./3.) * PTKEM(JI,JJ,IKB)                           &
-        - XCMFS * PK(JI,JJ,IKB) * 2. * ZDV_DY(JI,JJ,1)
-ENDDO
+!$acc kernels async(3) present_cr(zdv_dy,zflx)
+!$mnh_expand_array(JI=1:JIU,JJ=1:JJU)
+   ZFLX(:,:,IKB)   = (2./3.) * PTKEM(:,:,IKB)                           &
+        - XCMFS * PK(:,:,IKB) * 2. * ZDV_DY(:,:,1)
+!$mnh_end_expand_array()
 !$acc end kernels
 
 !!           & to be tested
@@ -886,7 +851,7 @@ ENDDO
 !
 ! extrapolates this flux under the ground with the surface flux
 !$acc kernels async(3) present_cr(ZFLX) 
-#ifndef MNH_BITREP
+#if !defined(MNH_BITREP) && !defined(MNH_BITREP_OMP)
 ZFLX(:,:,IKB-1) =                                                            &
         PTAU11M(:,:) * PSINSLOPE(:,:)**2 * PDIRCOSZW(:,:)**2                 &         
   +2. * PTAU12M(:,:) * PCOSSLOPE(:,:)* PSINSLOPE(:,:) * PDIRCOSZW(:,:)       &
@@ -897,17 +862,16 @@ ZFLX(:,:,IKB-1) =                                                            &
     + PVSLOPEM(:,:) * PCOSSLOPE(:,:)    * PSINSLOPE(:,:) * ZDIRSINZW(:,:)    )
 #else
 !PW: BUG: commented 'acc loop independent collapse(2)' to workaround compiler bug (NVHPC 21.1)
-!$acc_nv loop independent collapse(2)
-DO CONCURRENT ( JI=1:JIU,JJ=1:JJU )
-ZFLX(JI,JJ,IKB-1) =                                                             &
-        PTAU11M(JI,JJ) * BR_P2(PSINSLOPE(JI,JJ)) * BR_P2(PDIRCOSZW(JI,JJ))          &
-  +2. * PTAU12M(JI,JJ) * PCOSSLOPE(JI,JJ)* PSINSLOPE(JI,JJ) * PDIRCOSZW(JI,JJ)        &
-  +     PTAU22M(JI,JJ) * BR_P2(PCOSSLOPE(JI,JJ))                                  &
-  +     PTAU33M(JI,JJ) * BR_P2(PSINSLOPE(JI,JJ)) * BR_P2(ZDIRSINZW(JI,JJ))          &
-  -2. * PCDUEFF(JI,JJ)*       (                                                 &
-      PUSLOPEM(JI,JJ) * BR_P2(PSINSLOPE(JI,JJ)) * ZDIRSINZW(JI,JJ) * PDIRCOSZW(JI,JJ) &
-      + PVSLOPEM(JI,JJ) * PCOSSLOPE(JI,JJ)    * PSINSLOPE(JI,JJ) * ZDIRSINZW(JI,JJ)     )
-END DO ! CONCURRENT
+!$mnh_expand_array(JI=1:JIU,JJ=1:JJU )
+ZFLX(:,:,IKB-1) =                                                             &
+        PTAU11M(:,:) * BR_P2(PSINSLOPE(:,:)) * BR_P2(PDIRCOSZW(:,:))          &
+  +2. * PTAU12M(:,:) * PCOSSLOPE(:,:)* PSINSLOPE(:,:) * PDIRCOSZW(:,:)        &
+  +     PTAU22M(:,:) * BR_P2(PCOSSLOPE(:,:))                                  &
+  +     PTAU33M(:,:) * BR_P2(PSINSLOPE(:,:)) * BR_P2(ZDIRSINZW(:,:))          &
+  -2. * PCDUEFF(:,:)*       (                                                 &
+      PUSLOPEM(:,:) * BR_P2(PSINSLOPE(:,:)) * ZDIRSINZW(:,:) * PDIRCOSZW(:,:) &
+      + PVSLOPEM(:,:) * PCOSSLOPE(:,:)    * PSINSLOPE(:,:) * ZDIRSINZW(:,:)     )
+!$mnh_end_expand_array()
 #endif
 !$acc end kernels
 ! 
@@ -966,13 +930,10 @@ IF (.NOT. L2D) THEN
   IF (KSPLT==1) ZWORK(:,:,:)     = - ZFLX(:,:,:) * GY_V_M_PVM
 #else
   CALL MYF_DEVICE(PDYY, ZTMP1_DEVICE)
-  !$acc kernels async(10)
-#ifdef MNH_COMPILER_NVHPC
-  !$acc loop independent collapse(3)
-#endif
-  DO CONCURRENT ( JI=1:JIU,JJ=1:JJU,JK=1:JKU)
-     ZTMP2_DEVICE(JI,JJ,JK) = PRHODJ(JI,JJ,JK) * ZFLX(JI,JJ,JK) / ZTMP1_DEVICE(JI,JJ,JK)
-  END DO !CONCURRENT   
+  !$acc kernels async(10) present_cr(ztmp1_device,ztmp2_device)
+  !$mnh_expand_array(JI=1:JIU,JJ=1:JJU,JK=1:JKU)
+     ZTMP2_DEVICE(:,:,:) = PRHODJ(:,:,:) * ZFLX(:,:,:) / ZTMP1_DEVICE(:,:,:)
+  !$mnh_end_expand_array()   
   !$acc end kernels
   !
   !!! wait for the computation of ZTMP2_DEVICE and the update of ZFLX
@@ -981,31 +942,22 @@ IF (.NOT. L2D) THEN
   CALL DYM_DEVICE( ZTMP2_DEVICE,ZTMP3_DEVICE )
   IF (.NOT. LFLAT) THEN
     CALL MZM_DEVICE(PDYY,ZTMP1_DEVICE)
-    !$acc kernels
-#ifdef MNH_COMPILER_NVHPC
-    !$acc loop independent collapse(3)
-#endif
-    DO CONCURRENT ( JI=1:JIU,JJ=1:JJU,JK=1:JKU)
-       ZTMP2_DEVICE(JI,JJ,JK) = PRHODJ(JI,JJ,JK) * ZFLX(JI,JJ,JK)
-    END DO !CONCURRENT   
+    !$acc kernels present_cr(zflx,ztmp2_device)
+    !$mnh_expand_array(JI=1:JIU,JJ=1:JJU,JK=1:JKU)
+       ZTMP2_DEVICE(:,:,:) = PRHODJ(:,:,:) * ZFLX(:,:,:)
+    !$mnh_end_expand_array()   
     !$acc end kernels
     CALL MZM_DEVICE(ZTMP2_DEVICE,ZTMP4_DEVICE)
-    !$acc kernels
-#ifdef MNH_COMPILER_NVHPC
-    !$acc loop independent collapse(3)
-#endif
-    DO CONCURRENT ( JI=1:JIU,JJ=1:JJU,JK=1:JKU)
-       ZTMP2_DEVICE(JI,JJ,JK) = ZTMP4_DEVICE(JI,JJ,JK) * PINV_PDZZ(JI,JJ,JK)
-    END DO !CONCURRENT   
+    !$acc kernels present_cr(ztmp4_device,ztmp2_device)
+    !$mnh_expand_array(JI=1:JIU,JJ=1:JJU,JK=1:JKU)
+       ZTMP2_DEVICE(:,:,:) = ZTMP4_DEVICE(:,:,:) * PINV_PDZZ(:,:,:)
+    !$mnh_end_expand_array()   
     !$acc end kernels
     CALL MYM_DEVICE( ZTMP2_DEVICE,ZTMP4_DEVICE )
-    !$acc kernels
-#ifdef MNH_COMPILER_NVHPC
-    !$acc loop independent collapse(3)
-#endif
-    DO CONCURRENT ( JI=1:JIU,JJ=1:JJU,JK=1:JKU)
-       ZTMP2_DEVICE(JI,JJ,JK) = PDZY(JI,JJ,JK) / ZTMP1_DEVICE(JI,JJ,JK) * ZTMP4_DEVICE(JI,JJ,JK)
-    END DO !CONCURRENT   
+    !$acc kernels present_cr(ztmp4_device,ztmp2_device)
+    !$mnh_expand_array(JI=1:JIU,JJ=1:JJU,JK=1:JKU)
+       ZTMP2_DEVICE(:,:,:) = PDZY(:,:,:) / ZTMP1_DEVICE(:,:,:) * ZTMP4_DEVICE(:,:,:)
+    !$mnh_end_expand_array()   
     !$acc end kernels
     CALL DZF_DEVICE( ZTMP2_DEVICE, ZTMP4_DEVICE )
     !$acc kernels async(1)
@@ -1015,23 +967,17 @@ IF (.NOT. L2D) THEN
     !$acc end kernels
   ELSE
      !$acc kernels async(1)
-#ifdef MNH_COMPILER_NVHPC
-     !$acc loop independent collapse(3)
-#endif
-     DO CONCURRENT ( JI=1:JIU,JJ=1:JJU,JK=1:JKU)
-        PRVS(JI,JJ,JK)=PRVS(JI,JJ,JK) - ZTMP3_DEVICE(JI,JJ,JK)
-     END DO !CONCURRENT    
+     !$mnh_expand_array(JI=1:JIU,JJ=1:JJU,JK=1:JKU)
+        PRVS(:,:,:)=PRVS(:,:,:) - ZTMP3_DEVICE(:,:,:)
+     !$mnh_end_expand_array()    
     !$acc end kernels
   END IF
 ! Contribution to the dynamic production of TKE:
   IF (KSPLT==1) THEN
-     !$acc kernels async(2)
-#ifdef MNH_COMPILER_NVHPC
-     !$acc loop independent collapse(3)
-#endif
-     DO CONCURRENT ( JI=1:JIU,JJ=1:JJU,JK=1:JKU)
-        ZWORK(JI,JJ,JK) = - ZFLX(JI,JJ,JK) * GY_V_M_PVM(JI,JJ,JK)
-     END DO !CONCURRENT   
+     !$acc kernels async(2) present_cr(gy_v_m_pvm,zwork)
+     !$mnh_expand_array(JI=1:JIU,JJ=1:JJU,JK=1:JKU)
+        ZWORK(:,:,:) = - ZFLX(:,:,:) * GY_V_M_PVM(:,:,:)
+     !$mnh_end_expand_array()   
     !$acc end kernels
   ENDIF
 #endif
@@ -1045,11 +991,10 @@ IF (KSPLT==1) THEN
   !
   ! evaluate the dynamic production at w(IKB+1) in PDP(IKB)
   !
-   !$acc kernels async(2)
-   !$acc_nv loop independent collapse(2)
-   DO CONCURRENT (JI=1:JIU,JJ=1:JJU)
-      ZWORK(JI,JJ,IKB) = 0.5* ( -ZFLX(JI,JJ,IKB)*ZDV_DY(JI,JJ,1) + ZWORK(JI,JJ,IKB+1) )
-   ENDDO
+   !$acc kernels async(2) present_cr(zdv_dy,zwork)
+   !$mnh_expand_array(JI=1:JIU,JJ=1:JJU)
+      ZWORK(:,:,IKB) = 0.5* ( -ZFLX(:,:,IKB)*ZDV_DY(:,:,1) + ZWORK(:,:,IKB+1) )
+   !$mnh_end_expand_array()
    !$acc end kernels
   !
   !$acc kernels async(2)
@@ -1070,11 +1015,10 @@ IF (LLES_CALL .AND. KSPLT==1) THEN
   !!! wait for the computation of ZWORK and PDP
   !$acc wait(2)
   !
-  !$acc kernels
-  !$acc_nv loop independent collapse(3) 
-  DO CONCURRENT (JI=1:JIU,JJ=1:JJU,JK=1:JKU)
-     ZTMP1_DEVICE(JI,JJ,JK) = -ZWORK(JI,JJ,JK)
-  ENDDO
+  !$acc kernels present_cr(zwork,ztmp1_device)
+  !$mnh_expand_array(JI=1:JIU,JJ=1:JJU,JK=1:JKU)
+     ZTMP1_DEVICE(:,:,:) = -ZWORK(:,:,:)
+  !$mnh_end_expand_array()
   !$acc end kernels
   CALL LES_MEAN_SUBGRID( ZTMP1_DEVICE, X_LES_RES_ddxa_V_SBG_UaV , .TRUE.)
   !
@@ -1089,27 +1033,23 @@ END IF
 !
 ! Computes the W variance
 IF (.NOT. L2D) THEN
-   !$acc kernels async(2)
-#ifdef MNH_COMPILER_NVHPC
-   !$acc loop independent collapse(3)
-#endif
-   DO CONCURRENT ( JI=1:JIU,JJ=1:JJU,JK=1:JKU)
-      ZFLX(JI,JJ,JK) = (2./3.) * PTKEM(JI,JJ,JK)                                  &
-           - XCMFS * PK(JI,JJ,JK) *( (4./3.) * GZ_W_M_PWM(JI,JJ,JK)                        &
-           -(2./3.) * ( GX_U_M_PUM(JI,JJ,JK)                      &
-           +GY_V_M_PVM(JI,JJ,JK)                ) )
-   END DO !CONCURRENT
+   !$acc kernels async(2) present_cr(gy_v_m_pvm,zflx)
+   !$mnh_expand_array(JI=1:JIU,JJ=1:JJU,JK=1:JKU)
+      ZFLX(:,:,:) = (2./3.) * PTKEM(:,:,:)                                  &
+           - XCMFS * PK(:,:,:) *( (4./3.) * GZ_W_M_PWM(:,:,:)                        &
+           -(2./3.) * ( GX_U_M_PUM(:,:,:)                      &
+           +GY_V_M_PVM(:,:,:)                ) )
+   !$mnh_end_expand_array()
   !$acc end kernels
   !!  &  to be tested
   !!    -2.* XCMFB *  PLM / SQRT(PTKEM) * (-2./3.) * PTP 
 ELSE
-   !$acc kernels async(2)
-   !$acc_nv loop independent collapse(3) 
-   DO CONCURRENT (JI=1:JIU,JJ=1:JJU,JK=1:JKU)
-      ZFLX(JI,JJ,JK)= (2./3.) * PTKEM(JI,JJ,JK)                           &
-           - XCMFS * PK(JI,JJ,JK) *( (4./3.) * GZ_W_M_PWM(JI,JJ,JK)          &
-           -(2./3.) * ( GX_U_M_PUM(JI,JJ,JK)           ) ) 
-   ENDDO
+   !$acc kernels async(2) present_cr(gx_u_m_pum,zflx)
+   !$mnh_expand_array(JI=1:JIU,JJ=1:JJU,JK=1:JKU)
+      ZFLX(:,:,:)= (2./3.) * PTKEM(:,:,:)                           &
+           - XCMFS * PK(:,:,:) *( (4./3.) * GZ_W_M_PWM(:,:,:)          &
+           -(2./3.) * ( GX_U_M_PUM(:,:,:)           ) ) 
+   !$mnh_end_expand_array()
    !$acc end kernels
   !!  &  to be tested
   !!    -2.* XCMFB *  PLM / SQRT(PTKEM) * (-2./3.) * PTP 
@@ -1123,12 +1063,11 @@ ZFLX(:,:,IKE+1)= ZFLX(:,:,IKE)
 !$acc wait(2)
 !
 !
-!$acc kernels async(2)
-!$acc_nv loop independent collapse(2) 
-DO CONCURRENT (JI=1:JIU,JJ=1:JJU)
-   ZFLX(JI,JJ,IKB)   = (2./3.) * PTKEM(JI,JJ,IKB)                           &
-        - XCMFS * PK(JI,JJ,IKB) * 2. * ZDW_DZ(JI,JJ,1)
-ENDDO
+!$acc kernels async(2) present_cr(zdw_dz,zflx)
+!$mnh_expand_array(JI=1:JIU,JJ=1:JJU)
+   ZFLX(:,:,IKB)   = (2./3.) * PTKEM(:,:,IKB)                           &
+        - XCMFS * PK(:,:,IKB) * 2. * ZDW_DZ(:,:,1)
+!$mnh_end_expand_array()
 
 !$acc end kernels
 !
@@ -1138,20 +1077,19 @@ ENDDO
 !  (-2./3.) * PTP(:,:,IKB:IKB)
 ! extrapolates this flux under the ground with the surface flux
 !$acc kernels async(3) present_cr(ZFLX) 
-#ifndef MNH_BITREP
+#if !defined(MNH_BITREP) && !defined(MNH_BITREP_OMP)
 ZFLX(:,:,IKB-1) = &    
         PTAU11M(:,:) * ZDIRSINZW(:,:)**2                                &
   +     PTAU33M(:,:) * PDIRCOSZW(:,:)**2                                &
   +2. * PCDUEFF(:,:)* PUSLOPEM(:,:)  * ZDIRSINZW(:,:) * PDIRCOSZW(:,:)
 #else
 !PW: BUG: commented 'acc loop independent collapse(2)' to workaround compiler bug (NVHPC 21.1)
-!$acc_nv loop independent collapse(2)
-DO CONCURRENT ( JI=1:JIU,JJ=1:JJU )        
-ZFLX(JI,JJ,IKB-1) = &
-        PTAU11M(JI,JJ) * BR_P2(ZDIRSINZW(JI,JJ))                                &
-  +     PTAU33M(JI,JJ) * BR_P2(PDIRCOSZW(JI,JJ))                                &
-  +2. * PCDUEFF(JI,JJ)* PUSLOPEM(JI,JJ)  * ZDIRSINZW(JI,JJ) * PDIRCOSZW(JI,JJ)
-END DO ! CONCURRENT        
+!$mnh_expand_array(JI=1:JIU,JJ=1:JJU )        
+ZFLX(:,:,IKB-1) = &
+        PTAU11M(:,:) * BR_P2(ZDIRSINZW(:,:))                                &
+  +     PTAU33M(:,:) * BR_P2(PDIRCOSZW(:,:))                                &
+  +2. * PCDUEFF(:,:)* PUSLOPEM(:,:)  * ZDIRSINZW(:,:) * PDIRCOSZW(:,:)
+!$mnh_end_expand_array()        
 #endif
 !$acc end kernels
   ! 
@@ -1218,34 +1156,27 @@ GZ_W_M_ZWP = GZ_W_M(ZWP,PDZZ)
 #else
 CALL GZ_W_M_DEVICE(ZWP,PDZZ,GZ_W_M_ZWP)
 #endif
-!$acc kernels async(2)
-#ifdef MNH_COMPILER_NVHPC
-!$acc loop independent collapse(3)
-#endif
-DO CONCURRENT ( JI=1:JIU,JJ=1:JJU,JK=IKB+1:JKU)
-   ZFLX(JI,JJ,JK)=ZFLX(JI,JJ,JK) &
-        - XCMFS * PK(JI,JJ,JK) * (4./3.) * (GZ_W_M_ZWP(JI,JJ,JK) - GZ_W_M_PWM(JI,JJ,JK))
-END DO !CONCURRENT
+!$acc kernels async(2) present_cr(gz_w_m_pwm,zflx)
+!$mnh_expand_array(JI=1:JIU,JJ=1:JJU,JK=IKB+1:JKU)
+   ZFLX(:,:,:)=ZFLX(:,:,:) &
+        - XCMFS * PK(:,:,:) * (4./3.) * (GZ_W_M_ZWP(:,:,:) - GZ_W_M_PWM(:,:,:))
+!$mnh_end_expand_array()
 !$acc end kernels
 !
 IF (KSPLT==1) THEN
    !Contribution to the dynamic production of TKE:
-   !$acc kernels async(2)
-#ifdef MNH_COMPILER_NVHPC
-   !$acc loop independent collapse(3)
-#endif
-   DO CONCURRENT ( JI=1:JIU,JJ=1:JJU,JK=1:JKU)
-      ZWORK(JI,JJ,JK) = - ZFLX(JI,JJ,JK) * GZ_W_M_ZWP(JI,JJ,JK)
-   END DO !CONCURRENT   
+   !$acc kernels async(2) present_cr(gz_w_m_zwp,zwork)
+   !$mnh_expand_array(JI=1:JIU,JJ=1:JJU,JK=1:JKU)
+      ZWORK(:,:,:) = - ZFLX(:,:,:) * GZ_W_M_ZWP(:,:,:)
+   !$mnh_end_expand_array()   
    !$acc end kernels
   !
   ! evaluate the dynamic production at w(IKB+1) in PDP(IKB)
   !
-   !$acc kernels async(2)
-   !$acc_nv loop independent collapse(2)
-   DO CONCURRENT (JI=1:JIU,JJ=1:JJU)
-      ZWORK(JI,JJ,IKB) = 0.5* ( -ZFLX(JI,JJ,IKB)*ZDW_DZ(JI,JJ,1) + ZWORK(JI,JJ,IKB+1) )
-   ENDDO
+   !$acc kernels async(2) present_cr(zdw_dz,zwork)
+   !$mnh_expand_array(JI=1:JIU,JJ=1:JJU)
+      ZWORK(:,:,IKB) = 0.5* ( -ZFLX(:,:,IKB)*ZDW_DZ(:,:,1) + ZWORK(:,:,IKB+1) )
+   !$mnh_end_expand_array()
    !$acc end kernels
   !
   !$acc kernels async(2)
@@ -1284,30 +1215,27 @@ IF (LLES_CALL .AND. KSPLT==1) THEN
   !!! wait for the computation of ZFLX, ZDP and ZWORK
   !$acc wait(2)
   !
-  !$acc kernels
-  !$acc_nv loop independent collapse(3) 
-  DO CONCURRENT (JI=1:JIU,JJ=1:JJU,JK=1:JKU)
-     ZTMP1_DEVICE(JI,JJ,JK) = -ZWORK(JI,JJ,JK)
-  ENDDO
+  !$acc kernels present_cr(zwork,ztmp1_device)
+  !$mnh_expand_array(JI=1:JIU,JJ=1:JJU,JK=1:JKU)
+     ZTMP1_DEVICE(:,:,:) = -ZWORK(:,:,:)
+  !$mnh_end_expand_array()
   !$acc end kernels
   CALL LES_MEAN_SUBGRID( ZTMP1_DEVICE, X_LES_RES_ddxa_W_SBG_UaW , .TRUE.)
   !
   CALL GZ_M_M_DEVICE(PTHLM,PDZZ,ZTMP1_DEVICE)
-  !$acc kernels
-  !$acc_nv loop independent collapse(3) 
-  DO CONCURRENT (JI=1:JIU,JJ=1:JJU,JK=1:JKU)
-     ZTMP2_DEVICE(JI,JJ,JK) = ZTMP1_DEVICE(JI,JJ,JK) * ZFLX(JI,JJ,JK)
-  ENDDO
+  !$acc kernels present_cr(zflx,ztmp2_device)
+  !$mnh_expand_array(JI=1:JIU,JJ=1:JJU,JK=1:JKU)
+     ZTMP2_DEVICE(:,:,:) = ZTMP1_DEVICE(:,:,:) * ZFLX(:,:,:)
+  !$mnh_end_expand_array()
   !$acc end kernels
   CALL LES_MEAN_SUBGRID( ZTMP2_DEVICE, X_LES_RES_ddxa_Thl_SBG_UaW , .TRUE.)
   !
   CALL GZ_M_W_DEVICE(1,IKU,1,PTHLM,PDZZ,ZTMP1_DEVICE)
   CALL MZF_DEVICE( ZTMP1_DEVICE, ZTMP2_DEVICE )
-  !$acc kernels
-  !$acc_nv loop independent collapse(3) 
-  DO CONCURRENT (JI=1:JIU,JJ=1:JJU,JK=1:JKU)
-     ZTMP3_DEVICE(JI,JJ,JK) = ZFLX(JI,JJ,JK)*ZTMP2_DEVICE(JI,JJ,JK)
-  ENDDO
+  !$acc kernels present_cr(ztmp2_device,ztmp3_device)
+  !$mnh_expand_array(JI=1:JIU,JJ=1:JJU,JK=1:JKU)
+     ZTMP3_DEVICE(:,:,:) = ZFLX(:,:,:)*ZTMP2_DEVICE(:,:,:)
+  !$mnh_end_expand_array()
   !$acc end kernels
   CALL LES_MEAN_SUBGRID(ZTMP3_DEVICE,X_LES_RES_ddz_Thl_SBG_W2)
   !
@@ -1317,21 +1245,19 @@ IF (LLES_CALL .AND. KSPLT==1) THEN
 !$acc data copy(X_LES_RES_ddxa_Rt_SBG_UaW,X_LES_RES_ddz_Rt_SBG_W2)
     !
     CALL GZ_M_M_DEVICE(PRM(:,:,:,1),PDZZ,ZTMP1_DEVICE)
-    !$acc kernels
-    !$acc_nv loop independent collapse(3) 
-    DO CONCURRENT (JI=1:JIU,JJ=1:JJU,JK=1:JKU)
-       ZTMP2_DEVICE(JI,JJ,JK) = ZTMP1_DEVICE(JI,JJ,JK)*ZFLX(JI,JJ,JK)
-    ENDDO
+    !$acc kernels present_cr(zflx,ztmp2_device)
+    !$mnh_expand_array(JI=1:JIU,JJ=1:JJU,JK=1:JKU)
+       ZTMP2_DEVICE(:,:,:) = ZTMP1_DEVICE(:,:,:)*ZFLX(:,:,:)
+    !$mnh_end_expand_array()
     !$acc end kernels
     CALL LES_MEAN_SUBGRID( ZTMP2_DEVICE, X_LES_RES_ddxa_Rt_SBG_UaW , .TRUE.)
     !
     CALL GZ_M_W_DEVICE(1,IKU,1,PRM(:,:,:,1),PDZZ,ZTMP1_DEVICE)
     CALL MZF_DEVICE( ZTMP1_DEVICE, ZTMP2_DEVICE )
-    !$acc kernels
-    !$acc_nv loop independent collapse(3) 
-    DO CONCURRENT (JI=1:JIU,JJ=1:JJU,JK=1:JKU)
-       ZTMP3_DEVICE(JI,JJ,JK) = ZFLX(JI,JJ,JK)*ZTMP2_DEVICE(JI,JJ,JK)
-    ENDDO
+    !$acc kernels present_cr(ztmp2_device,ztmp3_device)
+    !$mnh_expand_array(JI=1:JIU,JJ=1:JJU,JK=1:JKU)
+       ZTMP3_DEVICE(:,:,:) = ZFLX(:,:,:)*ZTMP2_DEVICE(:,:,:)
+    !$mnh_end_expand_array()
     !$acc end kernels
     CALL LES_MEAN_SUBGRID(ZTMP3_DEVICE, X_LES_RES_ddz_Rt_SBG_W2)
     !
@@ -1342,22 +1268,20 @@ IF (LLES_CALL .AND. KSPLT==1) THEN
     !
     !
     CALL GZ_M_M_DEVICE(PSVM(:,:,:,JSV),PDZZ,ZTMP1_DEVICE)
-    !$acc kernels
-    !$acc_nv loop independent collapse(3)
-    DO CONCURRENT (JI=1:JIU,JJ=1:JJU,JK=1:JKU)
-       ZTMP2_DEVICE(JI,JJ,JK) = ZTMP1_DEVICE(JI,JJ,JK)*ZFLX(JI,JJ,JK)
-    ENDDO
+    !$acc kernels present_cr(zflx,ztmp2_device)
+    !$mnh_expand_array(JI=1:JIU,JJ=1:JJU,JK=1:JKU)
+       ZTMP2_DEVICE(:,:,:) = ZTMP1_DEVICE(:,:,:)*ZFLX(:,:,:)
+    !$mnh_end_expand_array()
     !$acc end kernels
     CALL LES_MEAN_SUBGRID( ZTMP2_DEVICE, &
                            X_LES_RES_ddxa_Sv_SBG_UaW(:,:,:,JSV) , .TRUE.)
     !
     CALL GZ_M_W_DEVICE(1,IKU,1,PSVM(:,:,:,JSV),PDZZ,ZTMP1_DEVICE)
     CALL MZF_DEVICE(ZTMP1_DEVICE,ZTMP2_DEVICE)
-    !$acc kernels
-    !$acc_nv loop independent collapse(3)
-    DO CONCURRENT (JI=1:JIU,JJ=1:JJU,JK=1:JKU)
-       ZTMP3_DEVICE(JI,JJ,JK) = ZFLX(JI,JJ,JK)*ZTMP2_DEVICE(JI,JJ,JK)
-    ENDDO
+    !$acc kernels present_cr(ztmp2_device,ztmp3_device)
+    !$mnh_expand_array(JI=1:JIU,JJ=1:JJU,JK=1:JKU)
+       ZTMP3_DEVICE(:,:,:) = ZFLX(:,:,:)*ZTMP2_DEVICE(:,:,:)
+    !$mnh_end_expand_array()
     !$acc end kernels
     CALL LES_MEAN_SUBGRID(ZTMP3_DEVICE, X_LES_RES_ddz_Sv_SBG_W2(:,:,:,JSV))
     !