diff --git a/src/MNH/turb_hor_uv.f90 b/src/MNH/turb_hor_uv.f90
index f8aff35b3f6bedd7cf6309c2ebab845e306ce294..b9d578a5941164de1f73be6e467f3197fe570942 100644
--- a/src/MNH/turb_hor_uv.f90
+++ b/src/MNH/turb_hor_uv.f90
@@ -316,16 +316,13 @@ IKB = 1+JPVEXT
 IKE = SIZE(PUM,3)-JPVEXT    
 !
 !$acc kernels
-#ifdef MNH_COMPILER_NVHPC
-!$acc loop independent collapse(2)
-#endif
-DO CONCURRENT ( JI=1:JIU,JJ=1:JJU )
+!$mnh_do_concurrent ( JI=1:JIU,JJ=1:JJU )
 #ifndef MNH_BITREP
   ZDIRSINZW(JI,JJ) = SQRT( 1. - PDIRCOSZW(JI,JJ)**2 )
 #else
   ZDIRSINZW(JI,JJ) = SQRT( 1. - BR_P2(PDIRCOSZW(JI,JJ)) )
 #endif
-END DO
+!$mnh_end_do()
 !$acc end kernels
 !
 #ifndef MNH_OPENACC
@@ -356,12 +353,9 @@ CALL MXM_DEVICE(PK,ZTMP1_DEVICE)
 CALL MYM_DEVICE(ZTMP1_DEVICE,ZTMP2_DEVICE)
 IF (.NOT. L2D) THEN
   !$acc kernels
-#ifdef MNH_COMPILER_NVHPC
-  !$acc loop independent collapse(3)
-#endif
-  DO CONCURRENT ( JI=1:JIU,JJ=1:JJU,JK=1:JKU)
+  !$mnh_do_concurrent ( JI=1:JIU,JJ=1:JJU,JK=1:JKU)
     ZFLX(JI,JJ,JK)= - XCMFS * ZTMP2_DEVICE(JI,JJ,JK) * (GY_U_UV_PUM(JI,JJ,JK) + GX_V_UV_PVM(JI,JJ,JK))
-  END DO !CONCURRENT   
+  !$mnh_end_do() !CONCURRENT   
   !$acc end kernels
 ELSE
   !$acc kernels present_cr(ZFLX)
@@ -465,7 +459,7 @@ ZFLX(:,:,IKB) = - XCMFS * ZTMP2_DEVICE(:,:,1) * ( ZTMP5_DEVICE(:,:,1) + ZTMP6_DE
 #endif
 ! 
 ! extrapolates this flux under the ground with the surface flux
-!$acc parallel present_cr(ZFLX)
+!$acc kernels present_cr(ZFLX)
 #ifndef MNH_BITREP
 !$mnh_expand_array(JI=1:JIU,JJ=1:JJU)
  ZFLX(:,:,IKB-1) =                                                           &
@@ -480,7 +474,10 @@ ZFLX(:,:,IKB) = - XCMFS * ZTMP2_DEVICE(:,:,1) * ( ZTMP5_DEVICE(:,:,1) + ZTMP6_DE
     +PVSLOPEM(:,:) * (PCOSSLOPE(:,:)**2 - PSINSLOPE(:,:)**2) * ZDIRSINZW(:,:) )
 !$mnh_end_expand_array()
 #else
+#ifdef MNH_COMPILER_NVHPC
 !PW: BUG: commented 'acc loop independent collapse(2)' to workaround compiler bug (NVHPC 21.1)
+!$mnh_undef(OPENACC)
+#endif
 !$mnh_expand_array(JI=1:JIU,JJ=1:JJU)
  ZFLX(:,:,IKB-1) =                                                           &
    PTAU11M(:,:) * PCOSSLOPE(:,:) * PSINSLOPE(:,:) * BR_P2(PDIRCOSZW(:,:))     &
@@ -493,8 +490,11 @@ ZFLX(:,:,IKB) = - XCMFS * ZTMP2_DEVICE(:,:,1) * ( ZTMP5_DEVICE(:,:,1) + ZTMP6_DE
           PDIRCOSZW(:,:) * ZDIRSINZW(:,:)                                     &
           +PVSLOPEM(:,:) * (BR_P2(PCOSSLOPE(:,:)) - BR_P2(PSINSLOPE(:,:))) * ZDIRSINZW(:,:) )
 !$mnh_end_expand_array()
+#ifdef MNH_COMPILER_NVHPC
+!$mnh_define(OPENACC) 
+#endif
 #endif
-!$acc end parallel
+!$acc end kernels
 !  
 #ifndef MNH_OPENACC
 ZFLX(:,:,IKB-1:IKB-1) = 2. * MXM( MYM( ZFLX(:,:,IKB-1:IKB-1) ) )  &
@@ -542,69 +542,48 @@ END IF
 #else
 CALL MYM_DEVICE(PRHODJ,ZTMP1_DEVICE)
 !$acc kernels
-#ifdef MNH_COMPILER_NVHPC
-!$acc loop independent collapse(3)
-#endif
-DO CONCURRENT ( JI=1:JIU,JJ=1:JJU,JK=1:JKU)
+!$mnh_do_concurrent ( JI=1:JIU,JJ=1:JJU,JK=1:JKU)
    ZTMP2_DEVICE(JI,JJ,JK) = ZTMP1_DEVICE(JI,JJ,JK) * PINV_PDYY(JI,JJ,JK)
-END DO !CONCURRENT   
+!$mnh_end_do() !CONCURRENT   
 !$acc end kernels
 CALL MXM_DEVICE(ZTMP2_DEVICE,ZTMP1_DEVICE) 
 !$acc kernels
-#ifdef MNH_COMPILER_NVHPC
-!$acc loop independent collapse(3)
-#endif
-DO CONCURRENT ( JI=1:JIU,JJ=1:JJU,JK=1:JKU)
+!$mnh_do_concurrent ( JI=1:JIU,JJ=1:JJU,JK=1:JKU)
    ZTMP2_DEVICE(JI,JJ,JK) = ZFLX(JI,JJ,JK) * ZTMP1_DEVICE(JI,JJ,JK)
-END DO !CONCURRENT   
+!$mnh_end_do() !CONCURRENT   
 !$acc end kernels
 CALL DYF_DEVICE(ZTMP2_DEVICE,ZTMP1_DEVICE)
 IF (.NOT. LFLAT) THEN
   CALL MZM_DEVICE(ZFLX,ZTMP2_DEVICE)
   CALL MZM_DEVICE(PDYY,ZTMP3_DEVICE)
   !$acc kernels
-#ifdef MNH_COMPILER_NVHPC
-  !$acc loop independent collapse(3)
-#endif
-  DO CONCURRENT ( JI=1:JIU,JJ=1:JJU,JK=1:JKU)
+  !$mnh_do_concurrent ( JI=1:JIU,JJ=1:JJU,JK=1:JKU)
      ZTMP4_DEVICE(JI,JJ,JK) = PDZY(JI,JJ,JK)/ZTMP3_DEVICE(JI,JJ,JK)
-  END DO !CONCURRENT   
+  !$mnh_end_do() !CONCURRENT   
   !$acc end kernels
   CALL MXM_DEVICE(ZTMP4_DEVICE,ZTMP5_DEVICE)
   !$acc kernels
-#ifdef MNH_COMPILER_NVHPC
-  !$acc loop independent collapse(3)
-#endif
-  DO CONCURRENT ( JI=1:JIU,JJ=1:JJU,JK=1:JKU)
+  !$mnh_do_concurrent ( JI=1:JIU,JJ=1:JJU,JK=1:JKU)
      ZTMP4_DEVICE(JI,JJ,JK) = ZTMP2_DEVICE(JI,JJ,JK)*ZTMP5_DEVICE(JI,JJ,JK)
-  END DO !CONCURRENT   
+  !$mnh_end_do() !CONCURRENT   
   !$acc end kernels
   CALL MYF_DEVICE(ZTMP4_DEVICE,ZTMP2_DEVICE)
   !$acc kernels
-#ifdef MNH_COMPILER_NVHPC
-  !$acc loop independent collapse(3)
-#endif
-  DO CONCURRENT ( JI=1:JIU,JJ=1:JJU,JK=1:JKU)
+  !$mnh_do_concurrent ( JI=1:JIU,JJ=1:JJU,JK=1:JKU)
      ZTMP3_DEVICE(JI,JJ,JK) = PMZM_PRHODJ(JI,JJ,JK) * PINV_PDZZ(JI,JJ,JK)
-  END DO !CONCURRENT   
+  !$mnh_end_do() !CONCURRENT   
   !$acc end kernels
   CALL MXM_DEVICE(ZTMP3_DEVICE,ZTMP4_DEVICE)
   !$acc kernels
-#ifdef MNH_COMPILER_NVHPC
-  !$acc loop independent collapse(3)
-#endif
-  DO CONCURRENT ( JI=1:JIU,JJ=1:JJU,JK=1:JKU)
+  !$mnh_do_concurrent ( JI=1:JIU,JJ=1:JJU,JK=1:JKU)
      ZTMP5_DEVICE(JI,JJ,JK) = ZTMP2_DEVICE(JI,JJ,JK)*ZTMP4_DEVICE(JI,JJ,JK)
-  END DO !CONCURRENT    
+  !$mnh_end_do() !CONCURRENT    
   !$acc end kernels
   CALL DZF_DEVICE( ZTMP5_DEVICE, ZTMP3_DEVICE )
   !$acc kernels
-#ifdef MNH_COMPILER_NVHPC
-  !$acc loop independent collapse(3)
-#endif
-  DO CONCURRENT ( JI=1:JIU,JJ=1:JJU,JK=1:JKU)
+  !$mnh_do_concurrent ( JI=1:JIU,JJ=1:JJU,JK=1:JKU)
      PRUS(JI,JJ,JK) = PRUS(JI,JJ,JK) - ZTMP1_DEVICE(JI,JJ,JK) + ZTMP3_DEVICE(JI,JJ,JK)
-  END DO !CONCURRENT   
+  !$mnh_end_do() !CONCURRENT   
   !$acc end kernels
 ELSE
 !$acc kernels present_cr(PRUS)
@@ -626,69 +605,48 @@ END IF
 #else
 CALL MXM_DEVICE(PRHODJ,ZTMP1_DEVICE)
 !$acc kernels
-#ifdef MNH_COMPILER_NVHPC
-!$acc loop independent collapse(3)
-#endif
-DO CONCURRENT ( JI=1:JIU,JJ=1:JJU,JK=1:JKU)
+!$mnh_do_concurrent ( JI=1:JIU,JJ=1:JJU,JK=1:JKU)
    ZTMP2_DEVICE(JI,JJ,JK) = ZTMP1_DEVICE(JI,JJ,JK) * PINV_PDXX(JI,JJ,JK)
-END DO !CONCURRENT   
+!$mnh_end_do() !CONCURRENT   
 !$acc end kernels
 CALL MYM_DEVICE(ZTMP2_DEVICE,ZTMP1_DEVICE) 
 !$acc kernels
-#ifdef MNH_COMPILER_NVHPC
-!$acc loop independent collapse(3)
-#endif
-DO CONCURRENT ( JI=1:JIU,JJ=1:JJU,JK=1:JKU)
+!$mnh_do_concurrent ( JI=1:JIU,JJ=1:JJU,JK=1:JKU)
    ZTMP2_DEVICE(JI,JJ,JK) = ZFLX(JI,JJ,JK) * ZTMP1_DEVICE(JI,JJ,JK)
-END DO !CONCURRENT   
+!$mnh_end_do() !CONCURRENT   
 !$acc end kernels
 CALL DXF_DEVICE(ZTMP2_DEVICE,ZTMP1_DEVICE)
 IF (.NOT. LFLAT) THEN
   CALL MZM_DEVICE(ZFLX,ZTMP2_DEVICE)
   CALL MZM_DEVICE(PDXX,ZTMP3_DEVICE)
   !$acc kernels
-#ifdef MNH_COMPILER_NVHPC
-  !$acc loop independent collapse(3)
-#endif
-  DO CONCURRENT ( JI=1:JIU,JJ=1:JJU,JK=1:JKU)
+  !$mnh_do_concurrent ( JI=1:JIU,JJ=1:JJU,JK=1:JKU)
      ZTMP4_DEVICE(JI,JJ,JK) = PDZX(JI,JJ,JK)/ZTMP3_DEVICE(JI,JJ,JK)
-  END DO !CONCURRENT   
+  !$mnh_end_do() !CONCURRENT   
   !$acc end kernels
   CALL MYM_DEVICE(ZTMP4_DEVICE,ZTMP5_DEVICE)
   !$acc kernels
-#ifdef MNH_COMPILER_NVHPC
-  !$acc loop independent collapse(3)
-#endif
-  DO CONCURRENT ( JI=1:JIU,JJ=1:JJU,JK=1:JKU)
+  !$mnh_do_concurrent ( JI=1:JIU,JJ=1:JJU,JK=1:JKU)
      ZTMP4_DEVICE(JI,JJ,JK) = ZTMP2_DEVICE(JI,JJ,JK)*ZTMP5_DEVICE(JI,JJ,JK)
-  END DO !CONCURRENT   
+  !$mnh_end_do() !CONCURRENT   
   !$acc end kernels
   CALL MXF_DEVICE(ZTMP4_DEVICE,ZTMP2_DEVICE)
   !$acc kernels
-#ifdef MNH_COMPILER_NVHPC
-  !$acc loop independent collapse(3)
-#endif
-  DO CONCURRENT ( JI=1:JIU,JJ=1:JJU,JK=1:JKU)
+  !$mnh_do_concurrent ( JI=1:JIU,JJ=1:JJU,JK=1:JKU)
      ZTMP3_DEVICE(JI,JJ,JK) = PMZM_PRHODJ(JI,JJ,JK) * PINV_PDZZ(JI,JJ,JK)
-  END DO !CONCURRENT   
+  !$mnh_end_do() !CONCURRENT   
   !$acc end kernels
   CALL MYM_DEVICE(ZTMP3_DEVICE,ZTMP4_DEVICE)
   !$acc kernels
-#ifdef MNH_COMPILER_NVHPC
-  !$acc loop independent collapse(3)
-#endif
-  DO CONCURRENT ( JI=1:JIU,JJ=1:JJU,JK=1:JKU)
+  !$mnh_do_concurrent ( JI=1:JIU,JJ=1:JJU,JK=1:JKU)
      ZTMP5_DEVICE(JI,JJ,JK) = ZTMP2_DEVICE(JI,JJ,JK)*ZTMP4_DEVICE(JI,JJ,JK)
-  END DO !CONCURRENT   
+  !$mnh_end_do() !CONCURRENT   
   !$acc end kernels
   CALL DZF_DEVICE( ZTMP5_DEVICE, ZTMP3_DEVICE )
   !$acc kernels
-#ifdef MNH_COMPILER_NVHPC
-  !$acc loop independent collapse(3)
-#endif
-  DO CONCURRENT ( JI=1:JIU,JJ=1:JJU,JK=1:JKU)
+  !$mnh_do_concurrent ( JI=1:JIU,JJ=1:JJU,JK=1:JKU)
      PRVS(JI,JJ,JK) = PRVS(JI,JJ,JK) - ZTMP1_DEVICE(JI,JJ,JK) + ZTMP3_DEVICE(JI,JJ,JK)
-  END DO !CONCURRENT   
+  !$mnh_end_do() !CONCURRENT   
   !$acc end kernels
 ELSE
 !$acc kernels
@@ -712,12 +670,9 @@ IF (KSPLT==1) THEN
 #else
   IF (.NOT. L2D) THEN
      !$acc kernels
-#ifdef MNH_COMPILER_NVHPC
-     !$acc loop independent collapse(3)
-#endif
-     DO CONCURRENT ( JI=1:JIU,JJ=1:JJU,JK=1:JKU)
+     !$mnh_do_concurrent ( JI=1:JIU,JJ=1:JJU,JK=1:JKU)
         ZTMP1_DEVICE(JI,JJ,JK) = ZFLX(JI,JJ,JK) * (GY_U_UV_PUM(JI,JJ,JK) + GX_V_UV_PVM(JI,JJ,JK))
-     END DO !CONCURRENT   
+     !$mnh_end_do() !CONCURRENT   
 !$acc end kernels
   ELSE
 !$acc kernels present_cr(ZTMP1_DEVICE)
@@ -727,12 +682,9 @@ IF (KSPLT==1) THEN
   CALL MYF_DEVICE(ZTMP1_DEVICE,ZTMP2_DEVICE)
   CALL MXF_DEVICE(ZTMP2_DEVICE,ZTMP1_DEVICE)
   !$acc kernels
-#ifdef MNH_COMPILER_NVHPC
-  !$acc loop independent collapse(3)
-#endif
-  DO CONCURRENT ( JI=1:JIU,JJ=1:JJU,JK=1:JKU)
+  !$mnh_do_concurrent ( JI=1:JIU,JJ=1:JJU,JK=1:JKU)
      ZWORK(JI,JJ,JK) = - ZTMP1_DEVICE(JI,JJ,JK)
-  END DO !CONCURRENT   
+  !$mnh_end_do() !CONCURRENT   
   !$acc end kernels
 #endif
   !