From faa1f1adb34c4aefa529c8a9d6ab2e8f69200a39 Mon Sep 17 00:00:00 2001
From: Juan Escobar <juan.escobar@aero.obs-mip.fr>
Date: Wed, 19 May 2021 16:33:51 +0200
Subject: [PATCH] Juan 19/05/2021: add loop independent collapse , for all do
 conccurent & BR_ routine , or the do loop seq !!!

---
 src/MNH/advection_metsv.f90      |  21 ++++--
 src/MNH/emoist.f90               |   2 +
 src/MNH/gradient_u.f90           |  11 ++-
 src/MNH/gradient_v.f90           |   5 ++
 src/MNH/mode_prandtl.f90         |  26 ++++---
 src/MNH/prandtl.f90              |  11 ++-
 src/MNH/rain_ice.f90             |  29 +++++---
 src/MNH/rain_ice_nucleation.f90  |   1 +
 src/MNH/resolved_cloud.f90       |   1 +
 src/MNH/shuman_device.f90        |  16 +++-
 src/MNH/tke_eps_sources.f90      |  13 +++-
 src/MNH/tridiag_thermo.f90       |  18 ++++-
 src/MNH/tridiag_tke.f90          |  10 ++-
 src/MNH/tridiag_w.f90            |  16 +++-
 src/MNH/tridiag_wind.f90         |   4 +-
 src/MNH/turb.f90                 |  11 ++-
 src/MNH/turb_hor_dyn_corr.f90    |  24 +++++-
 src/MNH/turb_hor_thermo_flux.f90 | 122 +++++++++++++++++++------------
 src/MNH/turb_hor_tke.f90         |  24 +++++-
 src/MNH/turb_hor_uv.f90          |  23 +++++-
 src/MNH/turb_hor_uw.f90          |  11 +++
 src/MNH/turb_hor_vw.f90          |  11 +++
 src/MNH/turb_ver.f90             |   1 +
 src/MNH/turb_ver_dyn_flux.f90    | 100 ++++++++++++++++++-------
 src/MNH/turb_ver_thermo_corr.f90 |  97 +++++++++++++++---------
 src/MNH/turb_ver_thermo_flux.f90 |  19 ++++-
 src/Makefile.MESONH.mk           |   2 +-
 27 files changed, 470 insertions(+), 159 deletions(-)

diff --git a/src/MNH/advection_metsv.f90 b/src/MNH/advection_metsv.f90
index 73c8cd0aa..4a69e687f 100644
--- a/src/MNH/advection_metsv.f90
+++ b/src/MNH/advection_metsv.f90
@@ -527,12 +527,18 @@ IF (.NOT. L1D) THEN
   END IF
 #else
   IF (.NOT. L2D) THEN
-     !$acc kernels 
-     ZCFL(:,:,:)  = SQRT(BR_P2(ZCFLU(:,:,:))+BR_P2(ZCFLV(:,:,:))+BR_P2(ZCFLW(:,:,:)))
+     !$acc kernels
+     !$acc loop independent collapse(3)
+     DO CONCURRENT ( JI=1:JIU,JJ=1:JJU,JK=1:JKU )
+        ZCFL(JI,JJ,JK)  = SQRT(BR_P2(ZCFLU(JI,JJ,JK))+BR_P2(ZCFLV(JI,JJ,JK))+BR_P2(ZCFLW(JI,JJ,JK)))
+     END DO
      !$acc end kernels
   ELSE
-     !$acc kernels 
-     ZCFL(:,:,:)  = SQRT(BR_P2(ZCFLU(:,:,:))+BR_P2(ZCFLW(:,:,:)))
+     !$acc kernels
+     !$acc loop independent collapse(3)
+     DO CONCURRENT ( JI=1:JIU,JJ=1:JJU,JK=1:JKU )
+        ZCFL(JI,JJ,JK)  = SQRT(BR_P2(ZCFLU(JI,JJ,JK))+BR_P2(ZCFLW(JI,JJ,JK)))
+     END DO
      !$acc end kernels
   END IF
 #endif 
@@ -543,7 +549,10 @@ ELSE
 #ifndef MNH_BITREP
    ZCFL(:,:,:) = SQRT(ZCFLW(:,:,:)**2)
 #else
-   ZCFL(:,:,:) = SQRT(BR_P2(ZCFLW(:,:,:)))
+   !$acc loop independent collapse(3)
+   DO CONCURRENT ( JI=1:JIU,JJ=1:JJU,JK=1:JKU )
+      ZCFL(JI,JJ,JK) = SQRT(BR_P2(ZCFLW(JI,JJ,JK)))
+   END DO
 #endif
    !$acc end kernels
 END IF
@@ -859,12 +868,14 @@ DO JSPL=1,KSPLIT
       !$acc end kernels
    END IF
    !$acc kernels
+   !$acc loop independent collapse(4)
    DO CONCURRENT ( JI=1:JIU,JJ=1:JJU,JK=1:JKU, JR=1:KRR )
       ZR(JI,JJ,JK,JR) = ZR(JI,JJ,JK,JR) + ( ZRRS_PPM(JI,JJ,JK,JR) + ZRRS_OTHER(JI,JJ,JK,JR) + PRRS_CLD(JI,JJ,JK,JR) ) &
            * ZTSTEP_PPM / PRHODJ(JI,JJ,JK)
    END DO !CONCURRENT 
    !$acc loop seq
    DO JSV = 1, KSV
+      !$acc loop independent collapse(3)
       DO CONCURRENT ( JI=1:JIU,JJ=1:JJU,JK=1:JKU)
          ZSV(JI,JJ,JK,JSV) = ZSV(JI,JJ,JK,JSV) + ( ZRSVS_PPM(JI,JJ,JK,JSV) + ZRSVS_OTHER(JI,JJ,JK,JSV) +  &
               PRSVS_CLD(JI,JJ,JK,JSV) ) * ZTSTEP_PPM / PRHODJ(JI,JJ,JK)
diff --git a/src/MNH/emoist.f90 b/src/MNH/emoist.f90
index d08b2042e..7116d12d4 100644
--- a/src/MNH/emoist.f90
+++ b/src/MNH/emoist.f90
@@ -191,6 +191,7 @@ ELSE                                                ! liquid water & ice present
    DO JRR=5,KRR
       ZRW(1:JIU,1:JJU,1:JKU) = ZRW(1:JIU,1:JJU,1:JKU) + PRM(1:JIU,1:JJU,1:JKU,JRR)
    ENDDO
+   !$acc loop independent collapse(3)
    DO CONCURRENT ( JI=1:JIU,JJ=1:JJU,JK=1:JKU)
       ZA(JI,JJ,JK) = 1. + (                                    &  ! Compute A
            (1.+ZDELTA) * (PRM(JI,JJ,JK,1) - PRM(JI,JJ,JK,2) - PRM(JI,JJ,JK,4)) &
@@ -218,6 +219,7 @@ ELSE                                                ! liquid water & ice present
    DO JRR=3,KRR
       ZRW(1:JIU,1:JJU,1:JKU) = ZRW(1:JIU,1:JJU,1:JKU) + PRM(1:JIU,1:JJU,1:JKU,JRR)
    ENDDO
+   !$acc loop independent collapse(3)
    DO CONCURRENT ( JI=1:JIU,JJ=1:JJU,JK=1:JKU)
       ZA(JI,JJ,JK) = 1. + (                                    &  ! Compute ZA
            (1.+ZDELTA) * (PRM(JI,JJ,JK,1) - PRM(JI,JJ,JK,2)) &
diff --git a/src/MNH/gradient_u.f90 b/src/MNH/gradient_u.f90
index fa743bbd8..a361dcbed 100644
--- a/src/MNH/gradient_u.f90
+++ b/src/MNH/gradient_u.f90
@@ -244,13 +244,15 @@ iztmp3_device = MNH_ALLOCATE_ZT3D( ztmp3_device,JIU,JJU,JKU )
 IF (.NOT. LFLAT) THEN
   CALL DXF_DEVICE(PA,ZTMP1_DEVICE)
   CALL DZM_DEVICE(KKA,KKU,KL,PA,ZTMP2_DEVICE)
-  !$acc kernels loop independent collapse(3)
-  DO JK=1,JKU ; DO JJ=1,JJU ; DO JI=1,JIU
+  !$acc kernels
+  !$acc loop independent collapse(3)
+   DO CONCURRENT ( JI=1:JIU,JJ=1:JJU,JK=1:JKU)
      ZTMP3_DEVICE(JI,JJ,JK) = PDZX(JI,JJ,JK) * ZTMP2_DEVICE(JI,JJ,JK)
-  END DO ; END DO ; END DO
+  END DO !CONCURRENT
   !$acc end kernels
   CALL MXF_DEVICE(ZTMP3_DEVICE,ZTMP2_DEVICE)
   !$acc kernels
+  !$acc loop independent collapse(3)
   DO CONCURRENT ( JI=1:JIU,JJ=1:JJU,JK=1:JKU)
      ZTMP3_DEVICE(JI,JJ,JK) = ZTMP2_DEVICE(JI,JJ,JK) / PDZZ(JI,JJ,JK)
   END DO !CONCURRENT
@@ -434,6 +436,7 @@ IF (.NOT. LFLAT) THEN
   CALL DZM_DEVICE(KKA,KKU,KL,PA,ZTMP1_DEVICE)
   CALL MXM_DEVICE(PDZZ,ZTMP2_DEVICE)
   !$acc kernels
+  !$acc loop independent collapse(3)
   DO CONCURRENT ( JI=1:JIU,JJ=1:JJU,JK=1:JKU)
      ZTMP3_DEVICE(JI,JJ,JK) = ZTMP1_DEVICE(JI,JJ,JK)/ZTMP2_DEVICE(JI,JJ,JK)
   END DO !CONCURRENT    
@@ -441,6 +444,7 @@ IF (.NOT. LFLAT) THEN
   CALL MYM_DEVICE(ZTMP3_DEVICE,ZTMP1_DEVICE)
   CALL MXM_DEVICE(PDZY,ZTMP2_DEVICE)
   !$acc kernels
+  !$acc loop independent collapse(3)
   DO CONCURRENT ( JI=1:JIU,JJ=1:JJU,JK=1:JKU)
      ZTMP3_DEVICE(JI,JJ,JK) = ZTMP1_DEVICE(JI,JJ,JK)*ZTMP2_DEVICE(JI,JJ,JK)
   END DO !CONCURRENT   
@@ -449,6 +453,7 @@ IF (.NOT. LFLAT) THEN
   CALL DYM_DEVICE(PA,ZTMP1_DEVICE)
   CALL MXM_DEVICE(PDYY,ZTMP3_DEVICE)
   !$acc kernels
+  !$acc loop independent collapse(3)
   DO CONCURRENT ( JI=1:JIU,JJ=1:JJU,JK=1:JKU)
      PGY_U_UV_DEVICE(JI,JJ,JK)=  ( ZTMP1_DEVICE(JI,JJ,JK) - ZTMP2_DEVICE(JI,JJ,JK) ) / ZTMP3_DEVICE(JI,JJ,JK)
   END DO !CONCURRENT   
diff --git a/src/MNH/gradient_v.f90 b/src/MNH/gradient_v.f90
index 0f7192cd8..c1a7810be 100644
--- a/src/MNH/gradient_v.f90
+++ b/src/MNH/gradient_v.f90
@@ -247,12 +247,14 @@ IF (.NOT. LFLAT) THEN
   CALL DYF_DEVICE(PA,ZTMP1_DEVICE)
   CALL DZM_DEVICE(KKA,KKU,KL,PA,ZTMP2_DEVICE)
   !$acc kernels
+  !$acc loop independent collapse(3)
   DO CONCURRENT ( JI=1:JIU,JJ=1:JJU,JK=1:JKU)
      ZTMP3_DEVICE(JI,JJ,JK) = PDZY(JI,JJ,JK)*ZTMP2_DEVICE(JI,JJ,JK)
   END DO !CONCURRENT
   !$acc end kernels
   CALL MYF_DEVICE(ZTMP3_DEVICE,ZTMP2_DEVICE)
   !$acc kernels
+  !$acc loop independent collapse(3)
   DO CONCURRENT ( JI=1:JIU,JJ=1:JJU,JK=1:JKU)
      ZTMP3_DEVICE(JI,JJ,JK) = ZTMP2_DEVICE(JI,JJ,JK)/PDZZ(JI,JJ,JK)
   END DO !CONCURRENT
@@ -435,6 +437,7 @@ IF (.NOT. LFLAT) THEN
   CALL MYM_DEVICE(PDZZ,ZTMP2_DEVICE)
   CALL DZM_DEVICE(KKA,KKU,KL,PA,ZTMP3_DEVICE)
   !$acc kernels
+  !$acc loop independent collapse(3)
   DO CONCURRENT ( JI=1:JIU,JJ=1:JJU,JK=1:JKU)
      ZTMP4_DEVICE(JI,JJ,JK) = ZTMP3_DEVICE(JI,JJ,JK) / ZTMP2_DEVICE(JI,JJ,JK)
   END DO !CONCURRENT   
@@ -442,6 +445,7 @@ IF (.NOT. LFLAT) THEN
   CALL MXM_DEVICE(ZTMP4_DEVICE,ZTMP2_DEVICE)
   CALL MYM_DEVICE(PDZX,ZTMP3_DEVICE)
   !$acc kernels
+  !$acc loop independent collapse(3)
   DO CONCURRENT ( JI=1:JIU,JJ=1:JJU,JK=1:JKU)
      ZTMP4_DEVICE(JI,JJ,JK) = ZTMP2_DEVICE(JI,JJ,JK) *ZTMP3_DEVICE(JI,JJ,JK)
   END DO !CONCURRENT   
@@ -449,6 +453,7 @@ IF (.NOT. LFLAT) THEN
   CALL MZF_DEVICE(KKA,KKU,KL,ZTMP4_DEVICE,ZTMP2_DEVICE)
   CALL MYM_DEVICE(PDXX,ZTMP3_DEVICE)
   !$acc kernels
+  !$acc loop independent collapse(3)
   DO CONCURRENT ( JI=1:JIU,JJ=1:JJU,JK=1:JKU)
      PGX_V_UV_DEVICE(JI,JJ,JK)= ( ZTMP1_DEVICE(JI,JJ,JK) - ZTMP2_DEVICE(JI,JJ,JK) ) / ZTMP3_DEVICE(JI,JJ,JK)
   END DO !CONCURRENT   
diff --git a/src/MNH/mode_prandtl.f90 b/src/MNH/mode_prandtl.f90
index 5ac5ddc57..1a714d1c5 100644
--- a/src/MNH/mode_prandtl.f90
+++ b/src/MNH/mode_prandtl.f90
@@ -122,27 +122,35 @@ igphi3logic = MNH_ALLOCATE_GT3D( gphi3logic, JIU,JJU,JKU )
 !$acc kernels
 IF (HTURBDIM=='3DIM') THEN
         !* 3DIM case
-  IF (OUSERV) THEN
-    ZW1(:,:,:) = 1. + 1.5* (PREDTH1(:,:,:)+PREDR1(:,:,:)) +      &
+   IF (OUSERV) THEN
 #ifndef MNH_BITREP
+    ZW1(:,:,:) = 1. + 1.5* (PREDTH1(:,:,:)+PREDR1(:,:,:)) +      &
                    ( 0.5 * (PREDTH1(:,:,:)**2+PREDR1(:,:,:)**2)  &
-#else
-                   ( 0.5 * (BR_P2(PREDTH1(:,:,:))+BR_P2(PREDR1(:,:,:)))  &
-#endif
                          + PREDTH1(:,:,:) * PREDR1(:,:,:)        &
-                   )
+                         )
+#else
+    DO CONCURRENT (JI=1:JIU,JJ=1:JJU,JK=1:JKU)
+       ZW1(JI,JJ,JK) = 1. + 1.5* (PREDTH1(JI,JJ,JK)+PREDR1(JI,JJ,JK)) +      &
+                   ( 0.5 * (BR_P2(PREDTH1(JI,JJ,JK))+BR_P2(PREDR1(JI,JJ,JK)))  &
+                         + PREDTH1(JI,JJ,JK) * PREDR1(JI,JJ,JK)        &
+                         )
+    END DO
+#endif    
     ZW2(:,:,:) = 0.5 * (PRED2TH3(:,:,:)-PRED2R3(:,:,:))
     PPHI3(:,:,:)= 1. -                                          &
     ( ( (1.+PREDR1(:,:,:)) *                                   &
         (PRED2THR3(:,:,:) + PRED2TH3(:,:,:)) / PREDTH1(:,:,:)  &
       ) + ZW2(:,:,:)                                           &
     ) / ZW1(:,:,:)
-  ELSE
+ ELSE
+#ifndef MNH_BITREP    
     ZW1(:,:,:) = 1. + 1.5* PREDTH1(:,:,:) + &
-#ifndef MNH_BITREP
                  0.5* PREDTH1(:,:,:)**2
 #else
-                 0.5* BR_P2(PREDTH1(:,:,:))
+    DO CONCURRENT (JI=1:JIU,JJ=1:JJU,JK=1:JKU)
+       ZW1(JI,JJ,JK) = 1. + 1.5* PREDTH1(JI,JJ,JK) + &    
+                    0.5* BR_P2(PREDTH1(JI,JJ,JK))
+ END DO
 #endif
     ZW2(:,:,:) = 0.5* PRED2TH3(:,:,:)
     PPHI3(:,:,:)= 1. -                                       &
diff --git a/src/MNH/prandtl.f90 b/src/MNH/prandtl.f90
index 0b0815ba5..927314266 100644
--- a/src/MNH/prandtl.f90
+++ b/src/MNH/prandtl.f90
@@ -437,7 +437,7 @@ END WHERE
 WHERE (PREDTH1(:,:,:) < -ZMINVAL)
   ZW2(:,:,:) = (-ZMINVAL) / (PREDTH1(:,:,:))
 END WHERE
-
+!$acc loop independent collapse(3)
 DO CONCURRENT ( JI=1:JIU,JJ=1:JJU,JK=1:JKU)
    ZW2(JI,JJ,JK) = MIN( ZW1(JI,JJ,JK),ZW2(JI,JJ,JK) )
 END DO
@@ -448,6 +448,7 @@ WHERE (PREDR1(:,:,:)<-ZMINVAL)
 END WHERE
 
 !!$ZW1(:,:,:) = MIN(ZW2(:,:,:),ZW1(:,:,:))
+!$acc loop independent collapse(3)
 DO CONCURRENT ( JI=1:JIU,JJ=1:JJU,JK=1:JKU)
    ZW1(JI,JJ,JK) = MIN( ZW2(JI,JJ,JK),ZW1(JI,JJ,JK) )
 END DO
@@ -550,6 +551,7 @@ ELSE IF (L2D) THEN                      ! 3D case in a 2D model
 #ifndef MNH_BITREP
     ZTMP1_DEVICE(:,:,:) = ZTMP1_DEVICE(:,:,:)**2
 #else
+!$acc loop independent collapse(3)    
 DO CONCURRENT ( JI=1:JIU,JJ=1:JJU,JK=1:JKU)    
    ZTMP1_DEVICE(JI,JJ,JK) = BR_P2(ZTMP1_DEVICE(JI,JJ,JK))
 END DO !CONCURRENT   
@@ -587,6 +589,7 @@ END DO !CONCURRENT
 #ifndef MNH_BITREP
     ZTMP1_DEVICE(:,:,:) = ZTMP1_DEVICE(:,:,:)**2
 #else
+    !$acc loop independent collapse(3)
     DO CONCURRENT ( JI=1:JIU,JJ=1:JJU,JK=1:JKU)
        ZTMP1_DEVICE(JI,JJ,JK) = BR_P2(ZTMP1_DEVICE(JI,JJ,JK))
     END DO !CONCURRENT   
@@ -623,6 +626,7 @@ END DO !CONCURRENT
     CALL GX_M_M_DEVICE(KKA,KKU,KKL,PRM(:,:,:,1),PDXX,PDZZ,PDZX,ZTMP1_DEVICE)
     CALL GX_M_M_DEVICE(KKA,KKU,KKL,PTHLM       ,PDXX,PDZZ,PDZX,ZTMP2_DEVICE)
 !$acc kernels
+!$acc loop independent collapse(3)    
 DO CONCURRENT ( JI=1:JIU,JJ=1:JJU,JK=1:JKU)    
    ZTMP1_DEVICE(JI,JJ,JK) = ZTMP1_DEVICE(JI,JJ,JK) * ZTMP2_DEVICE(JI,JJ,JK)
 END DO
@@ -635,6 +639,7 @@ END DO
                                 * PEMOIST(:,:,:) * PETHETA(:,:,:) &
                                 * ZTMP2_DEVICE(:,:,:)
 #else
+!$acc loop independent collapse(3)    
 DO CONCURRENT ( JI=1:JIU,JJ=1:JJU,JK=1:JKU)           
     PRED2THR3(JI,JJ,JK)= PREDR1(JI,JJ,JK) * PREDTH1(JI,JJ,JK) +  BR_P2(XCTV)*BR_P2(PBLL_O_E(JI,JJ,JK)) *   &
          PEMOIST(JI,JJ,JK) * PETHETA(JI,JJ,JK) * ZTMP2_DEVICE(JI,JJ,JK)
@@ -719,6 +724,7 @@ ELSE                                 ! 3D case in a 3D model
 #ifndef MNH_BITREP
     ZTMP1_DEVICE(:,:,:) = ZTMP1_DEVICE(:,:,:)**2 + ZTMP2_DEVICE(:,:,:)**2
 #else
+!$acc loop independent collapse(3)    
 DO CONCURRENT ( JI=1:JIU,JJ=1:JJU,JK=1:JKU)    
    ZTMP1_DEVICE(JI,JJ,JK) = BR_P2(ZTMP1_DEVICE(JI,JJ,JK)) + BR_P2(ZTMP2_DEVICE(JI,JJ,JK))
 END DO
@@ -759,6 +765,7 @@ END DO
 #ifndef MNH_BITREP
     ZTMP1_DEVICE(:,:,:) = ZTMP1_DEVICE(:,:,:)**2 + ZTMP2_DEVICE(:,:,:)**2
 #else
+!$acc loop independent collapse(3)    
 DO CONCURRENT ( JI=1:JIU,JJ=1:JJU,JK=1:JKU)    
    ZTMP1_DEVICE(JI,JJ,JK) = BR_P2(ZTMP1_DEVICE(JI,JJ,JK)) + BR_P2(ZTMP2_DEVICE(JI,JJ,JK))
 END DO   
@@ -801,6 +808,7 @@ END DO
     CALL GY_M_M_DEVICE(KKA,KKU,KKL,PRM(:,:,:,1),PDYY,PDZZ,PDZY,ZTMP3_DEVICE)
     CALL GY_M_M_DEVICE(KKA,KKU,KKL,PTHLM       ,PDYY,PDZZ,PDZY,ZTMP4_DEVICE)
 !$acc kernels
+!$acc loop independent collapse(3)    
 DO CONCURRENT ( JI=1:JIU,JJ=1:JJU,JK=1:JKU)    
     ZTMP1_DEVICE(JI,JJ,JK) = ZTMP1_DEVICE(JI,JJ,JK)*ZTMP2_DEVICE(JI,JJ,JK)+ &
                                       ZTMP3_DEVICE(JI,JJ,JK)*ZTMP4_DEVICE(JI,JJ,JK)
@@ -844,6 +852,7 @@ call Print_msg( NVERB_WARNING, 'GEN', 'PRANDTL', 'OpenACC: L2D=.F. and KRR=0 not
 #ifndef MNH_BITREP
     ZTMP1_DEVICE(:,:,:) = ZTMP1_DEVICE(:,:,:)**2 + ZTMP2_DEVICE(:,:,:)**2
 #else
+!$acc loop independent collapse(3)    
 DO CONCURRENT ( JI=1:JIU,JJ=1:JJU,JK=1:JKU)  
    ZTMP1_DEVICE(JI,JJ,JK) = BR_P2(ZTMP1_DEVICE(JI,JJ,JK)) + BR_P2(ZTMP2_DEVICE(JI,JJ,JK))
 END DO   
diff --git a/src/MNH/rain_ice.f90 b/src/MNH/rain_ice.f90
index d0de2f3ff..6a7c12bdd 100644
--- a/src/MNH/rain_ice.f90
+++ b/src/MNH/rain_ice.f90
@@ -747,7 +747,7 @@ IZRHODJ    = MNH_ALLOCATE_ZT1DP(ZRHODJ,0)
 
 !
 !$acc kernels
-!acc loop independent
+!$acc loop independent
   DO CONCURRENT ( JL=1:IMICRO )
     ZRVT(JL) = PRVT(I1(JL),I2(JL),I3(JL))
     ZRCT(JL) = PRCT(I1(JL),I2(JL),I3(JL))
@@ -790,20 +790,26 @@ IZRHODJ    = MNH_ALLOCATE_ZT1DP(ZRHODJ,0)
 !     ZSIGMA_RC(JL) = MAX(PSIGS(I1(JL),I2(JL),I3(JL)) * 2., 1.E-12)
     END DO
   END IF
-!
-  ZZW(:)  = ZEXNREF(:)*( XCPD+XCPV*ZRVT(:)+XCL*(ZRCT(:)+ZRRT(:)) &
-                                    +XCI*(ZRIT(:)+ZRST(:)+ZRGT(:)) )
-  ZLSFACT(:) = (XLSTT+(XCPV-XCI)*(ZZT(:)-XTT))/ZZW(:) ! L_s/(Pi_ref*C_ph)
-  ZLVFACT(:) = (XLVTT+(XCPV-XCL)*(ZZT(:)-XTT))/ZZW(:) ! L_v/(Pi_ref*C_ph)
-
+  !
+  !$acc loop independent
+  DO CONCURRENT ( JL=1:IMICRO )
+     ZZW(JL)  = ZEXNREF(JL)*( XCPD+XCPV*ZRVT(JL)+XCL*(ZRCT(JL)+ZRRT(JL)) &
+          +XCI*(ZRIT(JL)+ZRST(JL)+ZRGT(JL)) )
+     ZLSFACT(JL) = (XLSTT+(XCPV-XCI)*(ZZT(JL)-XTT))/ZZW(JL) ! L_s/(Pi_ref*C_ph)
+     ZLVFACT(JL) = (XLVTT+(XCPV-XCL)*(ZZT(JL)-XTT))/ZZW(JL) ! L_v/(Pi_ref*C_ph)
+  END DO
+  
 #ifndef MNH_BITREP
   ZZW(:) = EXP( XALPI - XBETAI/ZZT(:) - XGAMI*ALOG(ZZT(:) ) )
+  ZSSI(:) = ZRVT(:)*( ZPRES(:)-ZZW(:) ) / ( (XMV/XMD) * ZZW(:) ) - 1.0
 #else
+  !$acc loop independent
   DO CONCURRENT ( JL=1:IMICRO )
      ZZW(JL) = BR_EXP( XALPI - XBETAI/ZZT(JL) - XGAMI*BR_LOG(ZZT(JL) ) )
+     ZSSI(JL) = ZRVT(JL)*( ZPRES(JL)-ZZW(JL) ) / ( (XMV/XMD) * ZZW(JL) ) - 1.0
   END DO
 #endif
-  ZSSI(:) = ZRVT(:)*( ZPRES(:)-ZZW(:) ) / ( (XMV/XMD) * ZZW(:) ) - 1.0
+  
                                                     ! Supersaturation over ice
   !
   IF (LBU_ENABLE .OR. LLES_CALL) THEN
@@ -815,7 +821,10 @@ IZRHODJ    = MNH_ALLOCATE_ZT1DP(ZRHODJ,0)
 !
   !Cloud water split between high and low content part is done here
   !according to autoconversion option
-  ZRCRAUTC(:)   = XCRIAUTC/ZRHODREF(:) ! Autoconversion rc threshold
+  !$acc loop independent
+  DO CONCURRENT ( JL=1:IMICRO )
+     ZRCRAUTC(JL)   = XCRIAUTC/ZRHODREF(JL) ! Autoconversion rc threshold
+  END DO
 !$acc end kernels
 #ifdef MNH_OPENACC
   IF (LBU_ENABLE .OR. LLES_CALL) THEN
@@ -1054,6 +1063,7 @@ IZRHODJ    = MNH_ALLOCATE_ZT1DP(ZRHODJ,0)
     ZLBDAR(:)  = 0.
  END WHERE
 #else
+ !$acc loop independent
  DO CONCURRENT ( JL=1:IMICRO )
   IF ( ZRRT(JL)>0.0 ) THEN
     ZLBDAR(JL)  = XLBR * BR_POW( ZRHODREF(JL) * MAX( ZRRT(JL), XRTMIN(3) ), XLBEXR )
@@ -1070,6 +1080,7 @@ END DO ! CONCURRENT
     ZLBDAR_RF(:)  = 0.
  END WHERE
 #else
+ !$acc loop independent
  DO CONCURRENT ( JL=1:IMICRO )
     IF ( ZRRT(JL)>0.0 .AND. ZRF(JL)>0.0 ) THEN
        ZLBDAR_RF(JL)  = XLBR * BR_POW( ZRHODREF(JL) * MAX( ZRRT(JL)/ZRF(JL), XRTMIN(3) ), XLBEXR )
diff --git a/src/MNH/rain_ice_nucleation.f90 b/src/MNH/rain_ice_nucleation.f90
index 509e7ddab..88858b403 100644
--- a/src/MNH/rain_ice_nucleation.f90
+++ b/src/MNH/rain_ice_nucleation.f90
@@ -203,6 +203,7 @@ IF( INEGT >= 1 ) THEN
          ( ZSSI(1:INEGT)/ZUSW(1:INEGT) )**XALPHA1 )
  END WHERE    
 #else
+ !$acc loop independent
  DO CONCURRENT ( JL=1:INEGT )
     IF ( (ZZT(JL)<=XTT-2.0) .AND. (ZZT(JL)>=XTT-5.0) .AND. (ZSSI(JL)>0.0) ) THEN
        ZZW(JL) = MAX( XNU20 * BR_EXP( -XBETA2 ),XNU10 * BR_EXP( -XBETA1*(ZZT(JL)-XTT) ) * &
diff --git a/src/MNH/resolved_cloud.f90 b/src/MNH/resolved_cloud.f90
index 5e19a9116..01e131b70 100644
--- a/src/MNH/resolved_cloud.f90
+++ b/src/MNH/resolved_cloud.f90
@@ -722,6 +722,7 @@ ENDIF
 !                    microphysical routines would save
 !                    computing time
 !
+!$acc loop independent collapse(3)
 DO CONCURRENT ( JI=1:JIU,JJ=1:JJU,JK=1:JKU)
 #ifndef MNH_BITREP
 ZEXN(JI,JJ,JK) = (PPABST(JI,JJ,JK)/XP00) ** (XRD/XCPD)
diff --git a/src/MNH/shuman_device.f90 b/src/MNH/shuman_device.f90
index 380868f8c..0ca7755d4 100644
--- a/src/MNH/shuman_device.f90
+++ b/src/MNH/shuman_device.f90
@@ -167,6 +167,7 @@ IKU = SIZE(PA,3)
 !
 #ifndef _OPT_LINEARIZED_LOOPS
 !$acc kernels present(PMXF,PA)
+!$acc loop independent collapse(3)
 DO JK = 1, IKU
   DO JJ = 1, IJU
     DO JI = 1 + 1, IIU
@@ -292,6 +293,7 @@ IKU = SIZE(PA,3)
 !
 #ifndef _OPT_LINEARIZED_LOOPS
 !$acc kernels present(PA,PMXM)
+!$acc loop independent collapse(3)
 DO JK = 1, IKU
   DO JJ = 1, IJU
     DO JI = 1 + 1, IIU
@@ -300,6 +302,7 @@ DO JK = 1, IKU
   ENDDO
 ENDDO
 !
+!$acc loop independent collapse(2)
 DO JK = 1, IKU
   DO JJ=1,IJU
     PMXM(1,JJ,JK)    = PMXM(IIU-2*JPHEXT+1,JJ,JK)  	!TODO: voir si ce n'est pas plutot JPHEXT+1
@@ -418,6 +421,7 @@ IKU = SIZE(PA,3)
 !
 !$acc kernels present(PA,PMYF)
 #ifndef _OPT_LINEARIZED_LOOPS
+!$acc loop independent collapse(3)
 DO JK=1,IKU
   DO JJ=1,IJU-1
     DO JI=1,IIU !TODO: remplacer le 1 par JPHEXT ?
@@ -534,6 +538,7 @@ IKU = SIZE(PA,3)
 !
 #ifndef _OPT_LINEARIZED_LOOPS
 !$acc kernels present(PA,PMYM)
+!$acc loop independent collapse(3)
 DO JK=1,IKU
   DO JJ=2,IJU !TODO: remplacer le 1+1 par 1+JPHEXT ?
     DO JI=1,IIU
@@ -870,6 +875,7 @@ IKU = SIZE(PA,3)
 !
 #ifndef _OPT_LINEARIZED_LOOPS
 !$acc kernels present(PA,PDXF)
+!$acc loop independent collapse(3)
 DO JK=1,IKU
   DO JJ=1,IJU
     DO JI=1+1,IIU
@@ -878,6 +884,7 @@ DO JK=1,IKU
   END DO
 END DO
 !
+!$acc loop independent collapse(2)
 DO JK=1,IKU
   DO JJ=1,IJU
     PDXF(IIU,JJ,JK)    = PDXF(2*JPHEXT,JJ,JK) 
@@ -994,6 +1001,7 @@ IKU = SIZE(PA,3)
 !
 #ifndef _OPT_LINEARIZED_LOOPS
 !$acc kernels present(PA,PDXM)
+!$acc loop independent collapse(3)
 DO JK=1,IKU
   DO JJ=1,IJU
     DO JI=1+1,IIU !TODO: remplacer le 1 par JPHEXT ?
@@ -1002,6 +1010,7 @@ DO JK=1,IKU
   END DO
 END DO
 !
+!$acc loop independent collapse(2)
 DO JK=1,IKU
   DO JJ=1,IJU
     PDXM(1,JJ,JK)    = PDXM(IIU-2*JPHEXT+1,JJ,JK)   !TODO: remplacer -2*JPHEXT+1 par -JPHEXT ?
@@ -1119,6 +1128,7 @@ IKU = SIZE(PA,3)
 !
 !$acc kernels present(PA,PDYF)
 #ifndef _OPT_LINEARIZED_LOOPS
+!$acc loop independent collapse(3)
 DO JK=1,IKU
   DO JJ=1,IJU-1 !TODO: remplacer le 1 par JPHEXT ?
     DO JI=1,IIU
@@ -1232,6 +1242,7 @@ IKU=SIZE(PA,3)
 !
 #ifndef _OPT_LINEARIZED_LOOPS
 !$acc kernels present(PA,PDYM)
+!$acc loop independent collapse(3)
 DO JK=1,IKU
   DO JJ=2,IJU !TODO: remplacer le 2 par JPHEXT+1 ?
     DO JI=1,IIU
@@ -1240,9 +1251,6 @@ DO JK=1,IKU
   END DO
 END DO
 !
-DO JJ=1,JPHEXT
-   PDYM(:,JJ,:) = PDYM(:,IJU-2*JPHEXT+JJ,:) ! for reprod JPHEXT <> 1
-END DO
 #else
 JIJKOR  = 1 + IIU
 JIJKEND = IIU*IJU*IKU
@@ -1345,6 +1353,7 @@ IKU = SIZE(PA,3)
 !
 #ifndef _OPT_LINEARIZED_LOOPS
 !$acc kernels present(PA,PDZF)
+!$acc loop independent collapse(3)
 DO JK=1,IKU-1 !TODO: remplacer le 1 par JPHEXT ?
   DO JJ=1,IJU
     DO JI=1,IIU
@@ -1459,6 +1468,7 @@ IKU = SIZE(PA,3)
 !
 #ifndef _OPT_LINEARIZED_LOOPS
 !$acc kernels present(PA,PDZM)
+!$acc loop independent collapse(3)
 DO JK=2,IKU !TODO: remplacer le 1+1 par 1+JPHEXT ?
   DO JJ=1,IJU
     DO JI=1,IIU
diff --git a/src/MNH/tke_eps_sources.f90 b/src/MNH/tke_eps_sources.f90
index ae9c4b339..17218c59f 100644
--- a/src/MNH/tke_eps_sources.f90
+++ b/src/MNH/tke_eps_sources.f90
@@ -350,7 +350,10 @@ IKE=KKU-JPVEXT_TURB*KKL
 #ifndef MNH_BITREP
 ZKEFF(:,:,:) = PLM(:,:,:) * SQRT(PTKEM(:,:,:))
 #else
-ZKEFF(:,:,:) = PLM(:,:,:) * BR_POW(PTKEM(:,:,:),0.5)
+!$acc loop independent collapse(3)
+DO CONCURRENT ( JI=1:JIU,JJ=1:JJU,JK=1:JKU)
+   ZKEFF(JI,JJ,JK) = PLM(JI,JJ,JK) * BR_POW(PTKEM(JI,JJ,JK),0.5)
+END DO
 #endif
 !
 !----------------------------------------------------------------------------
@@ -383,8 +386,12 @@ PDP(:,:,IKB) = PDP(:,:,IKB) * (1. + PDZZ(:,:,IKB+KKL)/PDZZ(:,:,IKB))
 #ifndef MNH_BITREP
 ZFLX(:,:,:) = XCED * SQRT(PTKEM(:,:,:)) / PLEPS(:,:,:)
 #else
-ZFLX(:,:,:) = XCED * BR_POW(PTKEM(:,:,:),0.5) / PLEPS(:,:,:)
+!$acc loop independent collapse(3)
+DO CONCURRENT ( JI=1:JIU,JJ=1:JJU,JK=1:JKU)
+   ZFLX(JI,JJ,JK) = XCED * BR_POW(PTKEM(JI,JJ,JK),0.5) / PLEPS(JI,JJ,JK)
+END DO
 #endif
+!$acc loop independent collapse(3)
 DO CONCURRENT ( JI=1:JIU,JJ=1:JJU,JK=1:JKU)
    ZSOURCE(JI,JJ,JK) = PRTKES(JI,JJ,JK) / PRHODJ(JI,JJ,JK)  +  PRTKESM(JI,JJ,JK) / PRHODJ(JI,JJ,JK) &
         - PTKEM(JI,JJ,JK) / PTSTEP &
@@ -412,6 +419,7 @@ CALL MZM_DEVICE(PRHODJ,ZTMP2_DEVICE) !Warning: re-used later
 #ifndef MNH_BITREP
 ZA(:,:,:)     = - PTSTEP * XCET * ZTMP1_DEVICE(:,:,:) * ZTMP2_DEVICE(:,:,:) / PDZZ(:,:,:)**2
 #else
+!$acc loop independent collapse(3)
 DO CONCURRENT ( JI=1:JIU,JJ=1:JJU,JK=1:JKU)
    ZA(JI,JJ,JK)     = - PTSTEP * XCET * ZTMP1_DEVICE(JI,JJ,JK) * ZTMP2_DEVICE(JI,JJ,JK) / BR_P2(PDZZ(JI,JJ,JK))
 END DO !CONCURRENT   
@@ -427,6 +435,7 @@ CALL TRIDIAG_TKE(KKA,KKU,KKL,PTKEM,ZA,PTSTEP,PEXPL,PIMPL,PRHODJ,&
 CALL GET_HALO(ZRES)
 #else
 !$acc kernels
+!$acc loop independent collapse(3)
 DO CONCURRENT ( JI=1:JIU,JJ=1:JJU,JK=1:JKU)
    ZTMP3_DEVICE(JI,JJ,JK) = PTSTEP*ZFLX(JI,JJ,JK)
 END DO !CONCURRENT   
diff --git a/src/MNH/tridiag_thermo.f90 b/src/MNH/tridiag_thermo.f90
index eefe1b37a..95e86abe0 100644
--- a/src/MNH/tridiag_thermo.f90
+++ b/src/MNH/tridiag_thermo.f90
@@ -266,6 +266,7 @@ CALL MZM_DEVICE(PRHODJ,ZMZM_RHODJ)
 #ifndef MNH_BITREP
 ZRHODJ_DFDDTDZ_O_DZ2 = ZMZM_RHODJ*PDFDDTDZ/PDZZ**2
 #else
+!$acc loop independent collapse(3)
 DO CONCURRENT ( JI=1:JIU,JJ=1:JJU,JK=1:JKU) 
    ZRHODJ_DFDDTDZ_O_DZ2(JI,JJ,JK) = ZMZM_RHODJ(JI,JJ,JK)*PDFDDTDZ(JI,JJ,JK)/BR_P2(PDZZ(JI,JJ,JK))
 END DO !CONCURRENT   
@@ -285,6 +286,7 @@ ZY=0.
 !           ---------------------------
 !
 !$acc kernels ! async
+!$acc loop independent collapse(2)
 DO CONCURRENT ( JI=1:JIU,JJ=1:JJU)
 ZY(JI,JJ,IKB) = PRHODJ(JI,JJ,IKB)*PVARM(JI,JJ,IKB)/PTSTEP                  &
     - ZMZM_RHODJ(JI,JJ,IKB+KKL) * PF(JI,JJ,IKB+KKL)/PDZZ(JI,JJ,IKB+KKL)    &
@@ -295,6 +297,7 @@ END DO !CONCURRENT
 !$acc end kernels
 !
 !$acc kernels ! async
+!$acc loop independent collapse(3)
 DO CONCURRENT ( JI=1:JIU,JJ=1:JJU,JK=IKTB+1:IKTE-1)
   ZY(JI,JJ,JK) = PRHODJ(JI,JJ,JK)*PVARM(JI,JJ,JK)/PTSTEP                 &
     - ZMZM_RHODJ(JI,JJ,JK+KKL) * PF(JI,JJ,JK+KKL)/PDZZ(JI,JJ,JK+KKL)     &
@@ -307,6 +310,7 @@ END DO !CONCURRENT
 !$acc end kernels
 ! 
 !$acc kernels ! async
+!$acc loop independent collapse(2)
 DO CONCURRENT ( JI=1:JIU,JJ=1:JJU)
 ZY(JI,JJ,IKE) = PRHODJ(JI,JJ,IKE)*PVARM(JI,JJ,IKE)/PTSTEP               &
     - ZMZM_RHODJ(JI,JJ,IKE+KKL) * PF(JI,JJ,IKE+KKL)/PDZZ(JI,JJ,IKE+KKL) &
@@ -327,6 +331,7 @@ IF ( PIMPL > 1.E-10 ) THEN
 !            --------------
 !
 !$acc kernels ! async
+!$acc loop independent collapse(2)   
 DO CONCURRENT ( JI=1:JIU,JJ=1:JJU)   
   ZB(JI,JJ,IKB) =   PRHODJ(JI,JJ,IKB)/PTSTEP                   &
                 - ZRHODJ_DFDDTDZ_O_DZ2(JI,JJ,IKB+KKL) * PIMPL
@@ -334,12 +339,14 @@ END DO !CONCURRENT
 !$acc end kernels
 !
 !$acc kernels ! async
+!$acc loop independent collapse(2)
 DO CONCURRENT ( JI=1:JIU,JJ=1:JJU)
   ZC(JI,JJ,IKB) =   ZRHODJ_DFDDTDZ_O_DZ2(JI,JJ,IKB+KKL) * PIMPL
 END DO !CONCURRENT
 !$acc end kernels
 !
 !$acc kernels ! async
+!$acc loop independent collapse(3)
 DO CONCURRENT ( JI=1:JIU,JJ=1:JJU,JK=IKTB+1:IKTE-1)
   ZA(JI,JJ,JK) =   ZRHODJ_DFDDTDZ_O_DZ2(JI,JJ,JK) * PIMPL
   ZB(JI,JJ,JK) =   PRHODJ(JI,JJ,JK)/PTSTEP                        &
@@ -350,6 +357,7 @@ END DO !CONCURRENT
 !$acc end kernels
 !
 !$acc kernels ! async
+!$acc loop independent collapse(2)
 DO CONCURRENT ( JI=1:JIU,JJ=1:JJU)
   ZA(JI,JJ,IKE) =   ZRHODJ_DFDDTDZ_O_DZ2(JI,JJ,IKE  ) * PIMPL
   ZB(JI,JJ,IKE) =   PRHODJ(JI,JJ,IKE)/PTSTEP                   &
@@ -364,6 +372,7 @@ END DO !CONCURRENT
 !            --------
 !
 !$acc kernels
+!$acc loop independent collapse(2)
 DO CONCURRENT ( JI=1:JIU,JJ=1:JJU)
   ZBET(JI,JJ) = ZB(JI,JJ,IKB)  ! bet = b(ikb)
   PVARP(JI,JJ,IKB) = ZY(JI,JJ,IKB) / ZBET(JI,JJ)
@@ -371,7 +380,8 @@ END DO !CONCURRENT
 !
 !$acc loop seq
 DO JK = IKB+KKL,IKE-KKL,KKL
-!$acc loop gang, vector collapse(2)
+   !$acc loop independent collapse(2)
+   ! acc loop gang, vector collapse(2)
    DO CONCURRENT ( JI=1:JIU,JJ=1:JJU)
    !DO JJ=1,JJU
    !   DO JI=1,JIU     
@@ -386,6 +396,7 @@ DO JK = IKB+KKL,IKE-KKL,KKL
    END DO !CONCURRENT 
 END DO
 ! special treatment for the last level
+!$acc loop independent collapse(2)
 DO CONCURRENT ( JI=1:JIU,JJ=1:JJU)
       ZGAM(JI,JJ,IKE) = ZC(JI,JJ,IKE-KKL) / ZBET(JI,JJ) 
       ! gam(k) = c(k-1) / bet
@@ -400,7 +411,8 @@ END DO !CONCURRENT
 !
 !$acc loop seq
 DO JK = IKE-KKL,IKB,-1*KKL
-   !$acc loop gang, vector collapse(2)
+   !$acc loop independent collapse(2)
+   ! acc loop gang, vector collapse(2)
    DO CONCURRENT ( JI=1:JIU,JJ=1:JJU)
       PVARP(JI,JJ,JK) = PVARP(JI,JJ,JK) - ZGAM(JI,JJ,JK+KKL) * PVARP(JI,JJ,JK+KKL)
    END DO !CONCURRENT
@@ -410,6 +422,7 @@ END DO
 ELSE
 ! 
 !$acc kernels
+!$acc loop independent collapse(3)   
 DO CONCURRENT ( JI=1:JIU,JJ=1:JJU,JK=IKTB:IKTE)   
    PVARP(JI,JJ,JK) = ZY(JI,JJ,JK) * PTSTEP / PRHODJ(JI,JJ,JK)
 END DO !CONCURRENT
@@ -422,6 +435,7 @@ END IF
 !            ----------------------------------------
 !
 !$acc kernels
+!$acc loop independent collapse(2)
 DO CONCURRENT ( JI=1:JIU,JJ=1:JJU)
    PVARP(JI,JJ,KKA)=PVARP(JI,JJ,IKB)
    PVARP(JI,JJ,KKU)=PVARP(JI,JJ,IKE)
diff --git a/src/MNH/tridiag_tke.f90 b/src/MNH/tridiag_tke.f90
index 135d4c319..a81715fb4 100644
--- a/src/MNH/tridiag_tke.f90
+++ b/src/MNH/tridiag_tke.f90
@@ -1,3 +1,4 @@
+
 !MNH_LIC Copyright 1994-2019 CNRS, Meteo-France and Universite Paul Sabatier
 !MNH_LIC This is part of the Meso-NH software governed by the CeCILL-C licence
 !MNH_LIC version 1. See LICENSE, CeCILL-C_V1-en.txt and CeCILL-C_V1-fr.txt
@@ -220,12 +221,14 @@ IKE=KKU-JPVEXT_TURB*KKL
 
 !
 !
+!$acc loop independent collapse(2)
 DO CONCURRENT ( JI=1:JIU,JJ=1:JJU)
    ZY(JI,JJ,IKB) = PVARM(JI,JJ,IKB)  + PTSTEP*PSOURCE(JI,JJ,IKB) -   &
         PEXPL / PRHODJ(JI,JJ,IKB) * PA(JI,JJ,IKB+KKL) * (PVARM(JI,JJ,IKB+KKL) - PVARM(JI,JJ,IKB))
 END DO !CONCURRENT
 !
 DO JK=IKTB+1,IKTE-1
+   !$acc loop independent collapse(2)
    DO CONCURRENT ( JI=1:JIU,JJ=1:JJU)
       ZY(JI,JJ,JK)= PVARM(JI,JJ,JK)  + PTSTEP*PSOURCE(JI,JJ,JK) -               &
            PEXPL / PRHODJ(JI,JJ,JK) *                                           &
@@ -236,6 +239,7 @@ DO JK=IKTB+1,IKTE-1
    END DO !CONCURRENT
 END DO
 !
+!$acc loop independent collapse(2)
 DO CONCURRENT ( JI=1:JIU,JJ=1:JJU)
    ZY(JI,JJ,IKE)= PVARM(JI,JJ,IKE) + PTSTEP*PSOURCE(JI,JJ,IKE) +               &
         PEXPL / PRHODJ(JI,JJ,IKE) * PA(JI,JJ,IKE) * (PVARM(JI,JJ,IKE)-PVARM(JI,JJ,IKE-KKL))
@@ -250,6 +254,7 @@ IF ( PIMPL > 1.E-10 ) THEN
   !
   !  going up
    !
+  !$acc loop independent collapse(2)
   DO CONCURRENT ( JI=1:JIU,JJ=1:JJU) 
      ZBET(JI,JJ) = 1. + PIMPL * (PDIAG(JI,JJ,IKB)-PA(JI,JJ,IKB+KKL) / PRHODJ(JI,JJ,IKB))
      ! bet = b(ikb)
@@ -276,6 +281,7 @@ IF ( PIMPL > 1.E-10 ) THEN
     END DO
   END DO 
   ! special treatment for the last level
+  !$acc loop independent collapse(2)
   DO CONCURRENT ( JI=1:JIU,JJ=1:JJU)
      ZGAM(JI,JJ,IKE) = PIMPL * PA(JI,JJ,IKE) / PRHODJ(JI,JJ,IKE-KKL) / ZBET(JI,JJ) 
      ! gam(k) = c(k-1) / bet
@@ -300,7 +306,8 @@ IF ( PIMPL > 1.E-10 ) THEN
   END DO
 !
 ELSE
-   !
+  !
+  !$acc loop independent collapse(2)
   DO CONCURRENT ( JI=1:JIU,JJ=1:JJU) 
      PVARP(JI,JJ,IKTB:IKTE) = ZY(JI,JJ,IKTB:IKTE)
   END DO !CONCURRENT   
@@ -311,6 +318,7 @@ END IF
 !*       3.  FILL THE UPPER AND LOWER EXTERNAL VALUES
 !            ----------------------------------------
 !
+!$acc loop independent collapse(2)
 DO CONCURRENT ( JI=1:JIU,JJ=1:JJU)
    PVARP(JI,JJ,KKA)=PVARP(JI,JJ,IKB)
    PVARP(JI,JJ,KKU)=PVARP(JI,JJ,IKE)
diff --git a/src/MNH/tridiag_w.f90 b/src/MNH/tridiag_w.f90
index db89be613..b0815bf07 100644
--- a/src/MNH/tridiag_w.f90
+++ b/src/MNH/tridiag_w.f90
@@ -283,6 +283,7 @@ ZY=0.
 !!#endif
 !
 !$acc kernels ! async
+!$acc loop independent collapse(2)
 DO CONCURRENT ( JI=1:JIU,JJ=1:JJU)
    ZY(JI,JJ,IKB) = ZMZM_RHODJ(JI,JJ,IKB)*PVARM(JI,JJ,IKB)/PTSTEP              &
         - PRHODJ(JI,JJ,IKB  ) * PF(JI,JJ,IKB  )/PMZF_DZZ(JI,JJ,IKB  )           &
@@ -293,6 +294,7 @@ END DO !CONCURRENT
 !$acc end kernels
 !
 !$acc kernels ! async
+!$acc loop independent collapse(3)
 DO CONCURRENT ( JI=1:JIU,JJ=1:JJU,JK=IKB+1:IKE-1)
   ZY(JI,JJ,JK) = ZMZM_RHODJ(JI,JJ,JK)*PVARM(JI,JJ,JK)/PTSTEP               &
        - PRHODJ(JI,JJ,JK  ) * PF(JI,JJ,JK  )/PMZF_DZZ(JI,JJ,JK  )          &
@@ -305,6 +307,7 @@ END DO !CONCURRENT
 !$acc end kernels
 ! 
 !$acc kernels ! async
+!$acc loop independent collapse(2)
 DO CONCURRENT ( JI=1:JIU,JJ=1:JJU)
    ZY(JI,JJ,IKE) = ZMZM_RHODJ(JI,JJ,IKE)*PVARM(JI,JJ,IKE)/PTSTEP              &
         - PRHODJ(JI,JJ,IKE  ) * PF(JI,JJ,IKE  )/PMZF_DZZ(JI,JJ,IKE  )           &
@@ -329,18 +332,21 @@ END DO !CONCURRENT
 !! c(k) = + PRHODJ(k)   * PDFDDWDZ(k)/PMZF_DZZ(k)**2
 !
 !$acc kernels ! async
+!$acc loop independent collapse(2)
 DO CONCURRENT ( JI=1:JIU,JJ=1:JJU)
   ZB(JI,JJ,IKB) =   ZMZM_RHODJ(JI,JJ,IKB)/PTSTEP      &
        - ZRHODJ_DFDDWDZ_O_DZ2(JI,JJ,IKB)
 END DO !CONCURRENT  
 !$acc end kernels
 !$acc kernels ! async
+!$acc loop independent collapse(2)
 DO CONCURRENT ( JI=1:JIU,JJ=1:JJU)
    ZC(JI,JJ,IKB) =   ZRHODJ_DFDDWDZ_O_DZ2(JI,JJ,IKB)
 END DO !CONCURRENT   
 !$acc end kernels
 
 !$acc kernels ! async
+!$acc loop independent collapse(3)
 DO CONCURRENT ( JI=1:JIU,JJ=1:JJU,JK=IKB+1:IKE-1)
     ZA(JI,JJ,JK) =   ZRHODJ_DFDDWDZ_O_DZ2(JI,JJ,JK-1)
     ZB(JI,JJ,JK) =   ZMZM_RHODJ(JI,JJ,JK)/PTSTEP      &
@@ -351,11 +357,13 @@ END DO !CONCURRENT
 !$acc end kernels
 
 !$acc kernels ! async
+!$acc loop independent collapse(2)
 DO CONCURRENT ( JI=1:JIU,JJ=1:JJU)
    ZA(JI,JJ,IKE) =   ZRHODJ_DFDDWDZ_O_DZ2(JI,JJ,IKE-1)
 END DO !CONCURRENT   
 !$acc end kernels
 !$acc kernels ! async
+!$acc loop independent collapse(2)
 DO CONCURRENT ( JI=1:JIU,JJ=1:JJU)
   ZB(JI,JJ,IKE) =   ZMZM_RHODJ(JI,JJ,IKE)/PTSTEP      &
                 - ZRHODJ_DFDDWDZ_O_DZ2(JI,JJ,IKE  ) &
@@ -370,12 +378,15 @@ END DO !CONCURRENT
 !            --------
 !
 !$acc kernels
+!$acc loop independent collapse(2)
 DO CONCURRENT ( JI=1:JIU,JJ=1:JJU)
   ZBET(JI,JJ) = ZB(JI,JJ,IKB)  ! bet = b(ikb)
   PVARP(JI,JJ,IKB) = ZY(JI,JJ,IKB) / ZBET(JI,JJ)
 END DO !CONCURRENT
-  !
+!
+!$acc loop seq
 DO JK = IKB+1,IKE-1
+   !$acc loop independent collapse(2)
    DO CONCURRENT ( JI=1:JIU,JJ=1:JJU)
       ZGAM(JI,JJ,JK) = ZC(JI,JJ,JK-1) / ZBET(JI,JJ)  
       ! gam(k) = c(k-1) / bet
@@ -386,6 +397,7 @@ DO JK = IKB+1,IKE-1
    END DO !CONCURRENT
 END DO
 ! special treatment for the last level
+!$acc loop independent collapse(2)
 DO CONCURRENT ( JI=1:JIU,JJ=1:JJU)
    ZGAM(JI,JJ,IKE) = ZC(JI,JJ,IKE-1) / ZBET(JI,JJ) 
    ! gam(k) = c(k-1) / bet
@@ -398,7 +410,9 @@ END DO !CONCURRENT
 !*       3.3 going down
 !            ----------
 !
+!$acc loop seq
 DO JK = IKE-1,IKB,-1
+   !$acc loop independent collapse(2)
    DO CONCURRENT ( JI=1:JIU,JJ=1:JJU)
       PVARP(JI,JJ,JK) = PVARP(JI,JJ,JK) - ZGAM(JI,JJ,JK+1) * PVARP(JI,JJ,JK+1)
    END DO !CONCURRENT   
diff --git a/src/MNH/tridiag_wind.f90 b/src/MNH/tridiag_wind.f90
index 0152538c2..8ab02c872 100644
--- a/src/MNH/tridiag_wind.f90
+++ b/src/MNH/tridiag_wind.f90
@@ -263,6 +263,7 @@ IF ( PIMPL > 1.E-10 ) THEN
   !  going up
   !
    !$acc kernels
+   !$acc loop independent collapse(2)
    DO CONCURRENT ( JI=1:JIU,JJ=1:JJU)
       ZBET(JI,JJ) = 1. - PIMPL * (  PA(JI,JJ,IKB+KKL) / PRHODJA(JI,JJ,IKB) &  
            + PCOEFS(JI,JJ) *  PTSTEP        )   ! bet = b(ikb)
@@ -271,7 +272,7 @@ IF ( PIMPL > 1.E-10 ) THEN
   !
   !$acc loop  seq
   DO JK = IKB+KKL,IKE-KKL,KKL
-     !$acc loop gang, vector collapse(2)
+     !$acc loop independent gang, vector collapse(2)
      DO  CONCURRENT ( JJ=1:JJU , JI=1:JIU )  
         ZGAM(JI,JJ,JK) = PIMPL * PA(JI,JJ,JK) / PRHODJA(JI,JJ,JK-KKL) / ZBET(JI,JJ)  
         ! gam(k) = c(k-1) / bet
@@ -286,6 +287,7 @@ IF ( PIMPL > 1.E-10 ) THEN
      END DO ! CONCURRENT
   END DO
   ! special treatment for the last level
+  !$acc loop independent gang, vector collapse(2)
   DO CONCURRENT ( JI=1:JIU,JJ=1:JJU)
      ZGAM(JI,JJ,IKE) = PIMPL * PA(JI,JJ,IKE) / PRHODJA(JI,JJ,IKE-KKL) / ZBET(JI,JJ) 
      ! gam(k) = c(k-1) / bet
diff --git a/src/MNH/turb.f90 b/src/MNH/turb.f90
index 81531d768..7b4498dd8 100644
--- a/src/MNH/turb.f90
+++ b/src/MNH/turb.f90
@@ -998,8 +998,11 @@ END IF
   ZCDUEFF(:,:) =-SQRT ( (PSFU(:,:)**2 + PSFV(:,:)**2) /                  &
                         (XMNH_TINY + ZUSLOPE(:,:)**2 + ZVSLOPE(:,:)**2 ) )
 #else
-  ZCDUEFF(:,:) =-SQRT ( (BR_P2(PSFU(:,:)) + BR_P2(PSFV(:,:))) /                  &
-                        (XMNH_TINY + BR_P2(ZUSLOPE(:,:)) + BR_P2(ZVSLOPE(:,:)) ) )
+  !$acc loop independent collapse(2)
+  DO CONCURRENT ( JI=1:JIU,JJ=1:JJU )
+     ZCDUEFF(JI,JJ) =-SQRT ( (BR_P2(PSFU(JI,JJ)) + BR_P2(PSFV(JI,JJ))) /                  &
+                    (XMNH_TINY + BR_P2(ZUSLOPE(JI,JJ)) + BR_P2(ZVSLOPE(JI,JJ)) ) )
+  END DO
 #endif
 !$acc end kernels
 !
@@ -1728,7 +1731,9 @@ izdrvsatdt = MNH_ALLOCATE_ZT3D( zdrvsatdt, size( pexn, 1 ), size( pexn, 2 ), siz
 #ifndef MNH_BITREP
   ZRVSAT(:,:,:) =  EXP( PALP - PBETA/PT(:,:,:) - PGAM*ALOG( PT(:,:,:) ) )
 #else
-  ZRVSAT(:,:,:) =  BR_EXP( PALP - PBETA/PT(:,:,:) - PGAM*BR_LOG( PT(:,:,:) ) )
+  DO CONCURRENT ( JI=1:JIU,JJ=1:JJU,JK=1:JKU)
+     ZRVSAT(JI,JJ,JK) =  BR_EXP( PALP - PBETA/PT(JI,JJ,JK) - PGAM*BR_LOG( PT(JI,JJ,JK) ) )
+  END DO
 #endif
 !
 !*      1.3 saturation  mixing ratio at t
diff --git a/src/MNH/turb_hor_dyn_corr.f90 b/src/MNH/turb_hor_dyn_corr.f90
index 01ee3ad89..1373e2597 100644
--- a/src/MNH/turb_hor_dyn_corr.f90
+++ b/src/MNH/turb_hor_dyn_corr.f90
@@ -403,7 +403,10 @@ IKU = SIZE(PUM,3)
 #ifndef MNH_BITREP
 ZDIRSINZW(:,:) = SQRT( 1. - PDIRCOSZW(:,:)**2 )
 #else
-ZDIRSINZW(:,:) = SQRT( 1. - BR_P2(PDIRCOSZW(:,:)) )
+!$acc loop independent collapse(2)
+DO CONCURRENT ( JI=1:JIU,JJ=1:JJU )
+   ZDIRSINZW(JI,JJ) = SQRT( 1. - BR_P2(PDIRCOSZW(JI,JJ)) )
+END DO
 #endif
 !$acc end kernels
 !
@@ -437,6 +440,7 @@ CALL ADD3DFIELD_ll( TZFIELDS_ll, ZFLX, 'TURB_HOR_DYN_CORR::ZFLX' )
 ! Computes the U variance
 IF (.NOT. L2D) THEN
    !$acc kernels async(2)
+   !$acc loop independent collapse(3)
    DO CONCURRENT ( JI=1:JIU,JJ=1:JJU,JK=1:JKU)
       ZFLX(JI,JJ,JK)= (2./3.) * PTKEM(JI,JJ,JK)                            &
            - XCMFS * PK(JI,JJ,JK) *( (4./3.) * GX_U_M_PUM(JI,JJ,JK)        &
@@ -613,6 +617,7 @@ ZFLX(:,:,IKB-1) =                                                            &
       PVSLOPEM(:,:) * PCOSSLOPE(:,:)    * PSINSLOPE(:,:) * ZDIRSINZW(:,:)    &
     - PUSLOPEM(:,:) * PCOSSLOPE(:,:)**2 * ZDIRSINZW(:,:) * PDIRCOSZW(:,:)    )
 #else
+!$acc loop independent collapse(2)   
 DO CONCURRENT ( JI=1:JIU,JJ=1:JJU )
 ZFLX(JI,JJ,IKB-1) =                                                             &
         PTAU11M(JI,JJ) * BR_P2(PCOSSLOPE(JI,JJ)) * BR_P2(PDIRCOSZW(JI,JJ))          &
@@ -685,6 +690,7 @@ END IF
 #else
 CALL MXF_DEVICE(PDXX, ZTMP1_DEVICE)
 !$acc kernels async(10)
+!$acc loop independent collapse(3)
 DO CONCURRENT ( JI=1:JIU,JJ=1:JJU,JK=1:JKU)
    ZTMP2_DEVICE(JI,JJ,JK) = PRHODJ(JI,JJ,JK) * ZFLX(JI,JJ,JK) / ZTMP1_DEVICE(JI,JJ,JK)
 END DO !CONCURRENT
@@ -697,18 +703,21 @@ CALL DXM_DEVICE(ZTMP2_DEVICE, ZTMP3_DEVICE)
 IF (.NOT. LFLAT) THEN
   CALL MZM_DEVICE(PDXX,ZTMP1_DEVICE)
   !$acc kernels
+  !$acc loop independent collapse(3)
   DO CONCURRENT ( JI=1:JIU,JJ=1:JJU,JK=1:JKU)
      ZTMP2_DEVICE(JI,JJ,JK) = PRHODJ(JI,JJ,JK) * ZFLX(JI,JJ,JK)
   END DO !CONCURRENT
   !$acc end kernels
   CALL MZM_DEVICE(ZTMP2_DEVICE,ZTMP4_DEVICE)
   !$acc kernels
+  !$acc loop independent collapse(3)
   DO CONCURRENT ( JI=1:JIU,JJ=1:JJU,JK=1:JKU)
      ZTMP2_DEVICE(JI,JJ,JK) = ZTMP4_DEVICE(JI,JJ,JK) * PINV_PDZZ(JI,JJ,JK)
   END DO !CONCURRENT   
   !$acc end kernels
   CALL MXM_DEVICE( ZTMP2_DEVICE, ZTMP4_DEVICE )
   !$acc kernels
+  !$acc loop independent collapse(3)
   DO CONCURRENT ( JI=1:JIU,JJ=1:JJU,JK=1:JKU)
      ZTMP2_DEVICE(JI,JJ,JK) = PDZX(JI,JJ,JK) / ZTMP1_DEVICE(JI,JJ,JK) * ZTMP4_DEVICE(JI,JJ,JK)
   END DO !CONCURRENT   
@@ -729,6 +738,7 @@ END IF
 IF (KSPLT==1) THEN
   ! Contribution to the dynamic production of TKE:
    !$acc kernels async(2)
+   !$acc loop independent collapse(3)
    DO CONCURRENT ( JI=1:JIU,JJ=1:JJU,JK=1:JKU)
       ZWORK(JI,JJ,JK)     = - ZFLX(JI,JJ,JK) * GX_U_M_PUM(JI,JJ,JK)
    END DO !CONCURRENT
@@ -779,6 +789,7 @@ END IF
 ! Computes the V variance
 IF (.NOT. L2D) THEN
    !$acc kernels async(3)
+   !$acc loop independent collapse(3)
    DO CONCURRENT ( JI=1:JIU,JJ=1:JJU,JK=1:JKU)
       ZFLX(JI,JJ,JK)= (2./3.) * PTKEM(JI,JJ,JK)                                  &
            - XCMFS * PK(JI,JJ,JK) *( (4./3.) * GY_V_M_PVM(JI,JJ,JK)                        &
@@ -829,6 +840,7 @@ ZFLX(:,:,IKB-1) =                                                            &
       PUSLOPEM(:,:) * PSINSLOPE(:,:)**2 * ZDIRSINZW(:,:) * PDIRCOSZW(:,:)    &
     + PVSLOPEM(:,:) * PCOSSLOPE(:,:)    * PSINSLOPE(:,:) * ZDIRSINZW(:,:)    )
 #else
+!$acc loop independent collapse(2)   
 DO CONCURRENT ( JI=1:JIU,JJ=1:JJU )
 ZFLX(JI,JJ,IKB-1) =                                                             &
         PTAU11M(JI,JJ) * BR_P2(PSINSLOPE(JI,JJ)) * BR_P2(PDIRCOSZW(JI,JJ))          &
@@ -896,6 +908,7 @@ IF (.NOT. L2D) THEN
 #else
   CALL MYF_DEVICE(PDYY, ZTMP1_DEVICE)
   !$acc kernels async(10)
+  !$acc loop independent collapse(3)
   DO CONCURRENT ( JI=1:JIU,JJ=1:JJU,JK=1:JKU)
      ZTMP2_DEVICE(JI,JJ,JK) = PRHODJ(JI,JJ,JK) * ZFLX(JI,JJ,JK) / ZTMP1_DEVICE(JI,JJ,JK)
   END DO !CONCURRENT   
@@ -908,18 +921,21 @@ IF (.NOT. L2D) THEN
   IF (.NOT. LFLAT) THEN
     CALL MZM_DEVICE(PDYY,ZTMP1_DEVICE)
     !$acc kernels
+    !$acc loop independent collapse(3)
     DO CONCURRENT ( JI=1:JIU,JJ=1:JJU,JK=1:JKU)
        ZTMP2_DEVICE(JI,JJ,JK) = PRHODJ(JI,JJ,JK) * ZFLX(JI,JJ,JK)
     END DO !CONCURRENT   
     !$acc end kernels
     CALL MZM_DEVICE(ZTMP2_DEVICE,ZTMP4_DEVICE)
     !$acc kernels
+    !$acc loop independent collapse(3)
     DO CONCURRENT ( JI=1:JIU,JJ=1:JJU,JK=1:JKU)
        ZTMP2_DEVICE(JI,JJ,JK) = ZTMP4_DEVICE(JI,JJ,JK) * PINV_PDZZ(JI,JJ,JK)
     END DO !CONCURRENT   
     !$acc end kernels
     CALL MYM_DEVICE( ZTMP2_DEVICE,ZTMP4_DEVICE )
     !$acc kernels
+    !$acc loop independent collapse(3)
     DO CONCURRENT ( JI=1:JIU,JJ=1:JJU,JK=1:JKU)
        ZTMP2_DEVICE(JI,JJ,JK) = PDZY(JI,JJ,JK) / ZTMP1_DEVICE(JI,JJ,JK) * ZTMP4_DEVICE(JI,JJ,JK)
     END DO !CONCURRENT   
@@ -932,6 +948,7 @@ IF (.NOT. L2D) THEN
     !$acc end kernels
   ELSE
      !$acc kernels async(1)
+     !$acc loop independent collapse(3)
      DO CONCURRENT ( JI=1:JIU,JJ=1:JJU,JK=1:JKU)
         PRVS(JI,JJ,JK)=PRVS(JI,JJ,JK) - ZTMP3_DEVICE(JI,JJ,JK)
      END DO !CONCURRENT    
@@ -940,6 +957,7 @@ IF (.NOT. L2D) THEN
 ! Contribution to the dynamic production of TKE:
   IF (KSPLT==1) THEN
      !$acc kernels async(2)
+     !$acc loop independent collapse(3)
      DO CONCURRENT ( JI=1:JIU,JJ=1:JJU,JK=1:JKU)
         ZWORK(JI,JJ,JK) = - ZFLX(JI,JJ,JK) * GY_V_M_PVM(JI,JJ,JK)
      END DO !CONCURRENT   
@@ -995,6 +1013,7 @@ END IF
 ! Computes the W variance
 IF (.NOT. L2D) THEN
    !$acc kernels async(2)
+   !$acc loop independent collapse(3)
    DO CONCURRENT ( JI=1:JIU,JJ=1:JJU,JK=1:JKU)
       ZFLX(JI,JJ,JK) = (2./3.) * PTKEM(JI,JJ,JK)                                  &
            - XCMFS * PK(JI,JJ,JK) *( (4./3.) * GZ_W_M_PWM(JI,JJ,JK)                        &
@@ -1039,6 +1058,7 @@ ZFLX(:,:,IKB-1) = &
   +     PTAU33M(:,:) * PDIRCOSZW(:,:)**2                                &
   +2. * PCDUEFF(:,:)* PUSLOPEM(:,:)  * ZDIRSINZW(:,:) * PDIRCOSZW(:,:)
 #else
+!$acc loop independent collapse(2)
 DO CONCURRENT ( JI=1:JIU,JJ=1:JJU )        
 ZFLX(JI,JJ,IKB-1) = &
         PTAU11M(JI,JJ) * BR_P2(ZDIRSINZW(JI,JJ))                                &
@@ -1112,6 +1132,7 @@ GZ_W_M_ZWP = GZ_W_M(ZWP,PDZZ)
 CALL GZ_W_M_DEVICE(1,IKU,1,ZWP,PDZZ,GZ_W_M_ZWP)
 #endif
 !$acc kernels async(2)
+!$acc loop independent collapse(3)
 DO CONCURRENT ( JI=1:JIU,JJ=1:JJU,JK=IKB+1:JKU)
    ZFLX(JI,JJ,JK)=ZFLX(JI,JJ,JK) &
         - XCMFS * PK(JI,JJ,JK) * (4./3.) * (GZ_W_M_ZWP(JI,JJ,JK) - GZ_W_M_PWM(JI,JJ,JK))
@@ -1121,6 +1142,7 @@ END DO !CONCURRENT
 IF (KSPLT==1) THEN
    !Contribution to the dynamic production of TKE:
    !$acc kernels async(2)
+   !$acc loop independent collapse(3)
    DO CONCURRENT ( JI=1:JIU,JJ=1:JJU,JK=1:JKU)
       ZWORK(JI,JJ,JK) = - ZFLX(JI,JJ,JK) * GZ_W_M_ZWP(JI,JJ,JK)
    END DO !CONCURRENT   
diff --git a/src/MNH/turb_hor_thermo_flux.f90 b/src/MNH/turb_hor_thermo_flux.f90
index f589aec72..14cba9ab4 100644
--- a/src/MNH/turb_hor_thermo_flux.f90
+++ b/src/MNH/turb_hor_thermo_flux.f90
@@ -342,6 +342,7 @@ ZFLX(:,:,IKE+1) = ZFLX(:,:,IKE)
 CALL MXM_DEVICE( PK, ZTMP1_DEVICE )
 CALL GX_M_U_DEVICE(1,IKU,1,PTHLM,PDXX,PDZZ,PDZX,ZTMP2_DEVICE)
 !$acc kernels
+!$acc loop independent collapse(3)
 DO CONCURRENT ( JI=1:JIU,JJ=1:JJU,JK=1:JKU)
    ZFLX(JI,JJ,JK)     = -XCSHF * ZTMP1_DEVICE(JI,JJ,JK) * ZTMP2_DEVICE(JI,JJ,JK)
 END DO
@@ -383,7 +384,10 @@ ZFLX(:,:,IKB-1:IKB-1) = 2. * MXM(  SPREAD( PSFTHM(:,:)* PDIRCOSXW(:,:), 3,1) )
                        - ZFLX(:,:,IKB:IKB)
 #else
 !$acc kernels
- ZTMP1_DEVICE(:,:,1) = PSFTHM(:,:)* PDIRCOSXW(:,:)
+!$acc loop independent collapse(2)
+DO CONCURRENT ( JI=1:JIU,JJ=1:JJU )
+   ZTMP1_DEVICE(JI,JJ,1) = PSFTHM(JI,JJ)* PDIRCOSXW(JI,JJ)
+END DO
 !$acc end kernels
   CALL MXM_DEVICE( ZTMP1_DEVICE(:,:,1:1), ZTMP2_DEVICE(:,:,1:1) )
 !$acc kernels
@@ -404,25 +408,29 @@ END IF
 #else
 IF (.NOT. LFLAT) THEN
   CALL MXM_DEVICE(PRHODJ, ZTMP1_DEVICE)
-!$acc kernels
-DO CONCURRENT ( JI=1:JIU,JJ=1:JJU,JK=1:JKU)
-   ZTMP2_DEVICE(JI,JJ,JK) = ZTMP1_DEVICE(JI,JJ,JK) * ZFLX(JI,JJ,JK) * PINV_PDXX(JI,JJ,JK)
-END DO
-!$acc end kernels
+  !$acc kernels
+  !$acc loop independent collapse(3)
+  DO CONCURRENT ( JI=1:JIU,JJ=1:JJU,JK=1:JKU)
+     ZTMP2_DEVICE(JI,JJ,JK) = ZTMP1_DEVICE(JI,JJ,JK) * ZFLX(JI,JJ,JK) * PINV_PDXX(JI,JJ,JK)
+  END DO
+  !$acc end kernels
   CALL DXF_DEVICE(ZTMP2_DEVICE, ZTMP3_DEVICE)
-!$acc kernels
-DO CONCURRENT ( JI=1:JIU,JJ=1:JJU,JK=1:JKU)
-   ZTMP2_DEVICE(JI,JJ,JK) = ZFLX(JI,JJ,JK) * PINV_PDXX(JI,JJ,JK)
-END DO
-!$acc end kernels
+  !$acc kernels
+  !$acc loop independent collapse(3)
+  DO CONCURRENT ( JI=1:JIU,JJ=1:JJU,JK=1:JKU)
+     ZTMP2_DEVICE(JI,JJ,JK) = ZFLX(JI,JJ,JK) * PINV_PDXX(JI,JJ,JK)
+  END DO
+  !$acc end kernels
   CALL MZM_DEVICE(ZTMP2_DEVICE,ZTMP4_DEVICE)
-!$acc kernels
-DO CONCURRENT ( JI=1:JIU,JJ=1:JJU,JK=1:JKU)  
-   ZTMP2_DEVICE(JI,JJ,JK) = PDZX(JI,JJ,JK)*ZTMP4_DEVICE(JI,JJ,JK)
-END DO
+  !$acc kernels
+  !$acc loop independent collapse(3)
+  DO CONCURRENT ( JI=1:JIU,JJ=1:JJU,JK=1:JKU)  
+     ZTMP2_DEVICE(JI,JJ,JK) = PDZX(JI,JJ,JK)*ZTMP4_DEVICE(JI,JJ,JK)
+  END DO
 !$acc end kernels
   CALL MXF_DEVICE(ZTMP2_DEVICE, ZTMP4_DEVICE)
 !$acc kernels
+!$acc loop independent collapse(3)  
 DO CONCURRENT ( JI=1:JIU,JJ=1:JJU,JK=1:JKU)
    ZTMP2_DEVICE(JI,JJ,JK) = PMZM_PRHODJ(JI,JJ,JK) * ZTMP4_DEVICE(JI,JJ,JK) * PINV_PDZZ(JI,JJ,JK)
 END DO
@@ -768,6 +776,7 @@ END IF
   CALL MXM_DEVICE( PK, ZTMP1_DEVICE )
   CALL GX_M_U_DEVICE(1,IKU,1,PRM(:,:,:,1),PDXX,PDZZ,PDZX,ZTMP2_DEVICE)
 !$acc kernels
+!$acc loop independent collapse(3)
 DO CONCURRENT ( JI=1:JIU,JJ=1:JJU,JK=1:JKU)
    ZFLX(JI,JJ,JK)     = -XCHF * ZTMP1_DEVICE(JI,JJ,JK) * ZTMP2_DEVICE(JI,JJ,JK)
 END DO
@@ -806,33 +815,38 @@ END DO
   IF (.NOT. LFLAT) THEN
     CALL MXM_DEVICE(PRHODJ,ZTMP1_DEVICE)
     !$acc kernels
-DO CONCURRENT ( JI=1:JIU,JJ=1:JJU,JK=1:JKU)
-   ZTMP2_DEVICE(JI,JJ,JK) = ZTMP1_DEVICE(JI,JJ,JK) * ZFLX(JI,JJ,JK) * PINV_PDXX(JI,JJ,JK)
-END DO
+    !$acc loop independent collapse(3)
+    DO CONCURRENT ( JI=1:JIU,JJ=1:JJU,JK=1:JKU)
+       ZTMP2_DEVICE(JI,JJ,JK) = ZTMP1_DEVICE(JI,JJ,JK) * ZFLX(JI,JJ,JK) * PINV_PDXX(JI,JJ,JK)
+    END DO
     !$acc end kernels
     CALL DXF_DEVICE( ZTMP2_DEVICE, ZTMP3_DEVICE )
     !$acc kernels
-DO CONCURRENT ( JI=1:JIU,JJ=1:JJU,JK=1:JKU)
-   ZTMP2_DEVICE(JI,JJ,JK) = ZFLX(JI,JJ,JK) * PINV_PDXX(JI,JJ,JK)
-END DO
+    !$acc loop independent collapse(3)
+    DO CONCURRENT ( JI=1:JIU,JJ=1:JJU,JK=1:JKU)
+       ZTMP2_DEVICE(JI,JJ,JK) = ZFLX(JI,JJ,JK) * PINV_PDXX(JI,JJ,JK)
+    END DO
     !$acc end kernels
     CALL MZM_DEVICE(ZTMP2_DEVICE,ZTMP4_DEVICE)
     !$acc kernels
-DO CONCURRENT ( JI=1:JIU,JJ=1:JJU,JK=1:JKU)
-   ZTMP2_DEVICE(JI,JJ,JK) = PDZX(JI,JJ,JK)*ZTMP4_DEVICE(JI,JJ,JK)
-END DO
+    !$acc loop independent collapse(3)
+    DO CONCURRENT ( JI=1:JIU,JJ=1:JJU,JK=1:JKU)
+       ZTMP2_DEVICE(JI,JJ,JK) = PDZX(JI,JJ,JK)*ZTMP4_DEVICE(JI,JJ,JK)
+    END DO
     !$acc end kernels
     CALL MXF_DEVICE(ZTMP2_DEVICE,ZTMP4_DEVICE)
     !$acc kernels
-DO CONCURRENT ( JI=1:JIU,JJ=1:JJU,JK=1:JKU)
-   ZTMP2_DEVICE(JI,JJ,JK) = PMZM_PRHODJ(JI,JJ,JK) * ZTMP4_DEVICE(JI,JJ,JK) * PINV_PDZZ(JI,JJ,JK)
-END DO
+    !$acc loop independent collapse(3)
+    DO CONCURRENT ( JI=1:JIU,JJ=1:JJU,JK=1:JKU)
+       ZTMP2_DEVICE(JI,JJ,JK) = PMZM_PRHODJ(JI,JJ,JK) * ZTMP4_DEVICE(JI,JJ,JK) * PINV_PDZZ(JI,JJ,JK)
+    END DO
     !$acc end kernels
     CALL DZF_DEVICE(1,IKU,1, ZTMP2_DEVICE, ZTMP4_DEVICE)
     !$acc kernels
-DO CONCURRENT ( JI=1:JIU,JJ=1:JJU,JK=1:JKU)    
-   PRRS(JI,JJ,JK,1) = PRRS(JI,JJ,JK,1) - ZTMP3_DEVICE(JI,JJ,JK) + ZTMP4_DEVICE(JI,JJ,JK)
-END DO
+    !$acc loop independent collapse(3)
+    DO CONCURRENT ( JI=1:JIU,JJ=1:JJU,JK=1:JKU)    
+       PRRS(JI,JJ,JK,1) = PRRS(JI,JJ,JK,1) - ZTMP3_DEVICE(JI,JJ,JK) + ZTMP4_DEVICE(JI,JJ,JK)
+    END DO
     !$acc end kernels
   ELSE
     CALL MXM_DEVICE(PRHODJ,ZTMP1_DEVICE)
@@ -1140,9 +1154,10 @@ END IF
   CALL MYM_DEVICE( PK, ZTMP1_DEVICE )
   CALL GY_M_V_DEVICE(1,IKU,1,PTHLM,PDYY,PDZZ,PDZY,ZTMP2_DEVICE)
   !$acc kernels
-DO CONCURRENT ( JI=1:JIU,JJ=1:JJU,JK=1:JKU)
-  ZFLX(JI,JJ,JK)     = -XCSHF * ZTMP1_DEVICE(JI,JJ,JK) * ZTMP2_DEVICE(JI,JJ,JK)
-END DO
+  !$acc loop independent collapse(3)
+  DO CONCURRENT ( JI=1:JIU,JJ=1:JJU,JK=1:JKU)
+     ZFLX(JI,JJ,JK)     = -XCSHF * ZTMP1_DEVICE(JI,JJ,JK) * ZTMP2_DEVICE(JI,JJ,JK)
+  END DO
   ZFLX(:,:,IKE+1) = ZFLX(:,:,IKE) 
   !$acc end kernels
 ELSE
@@ -1186,33 +1201,38 @@ IF (.NOT. L2D) THEN
   IF (.NOT. LFLAT) THEN
     CALL MYM_DEVICE(PRHODJ, ZTMP1_DEVICE)
     !$acc kernels
-DO CONCURRENT ( JI=1:JIU,JJ=1:JJU,JK=1:JKU)
-   ZTMP2_DEVICE(JI,JJ,JK) = ZTMP1_DEVICE(JI,JJ,JK) * ZFLX(JI,JJ,JK) * PINV_PDYY(JI,JJ,JK)
-END DO
+    !$acc loop independent collapse(3)
+    DO CONCURRENT ( JI=1:JIU,JJ=1:JJU,JK=1:JKU)
+       ZTMP2_DEVICE(JI,JJ,JK) = ZTMP1_DEVICE(JI,JJ,JK) * ZFLX(JI,JJ,JK) * PINV_PDYY(JI,JJ,JK)
+    END DO
     !$acc end kernels
     CALL DYF_DEVICE( ZTMP2_DEVICE, ZTMP3_DEVICE )
     !$acc kernels
-DO CONCURRENT ( JI=1:JIU,JJ=1:JJU,JK=1:JKU)
-   ZTMP1_DEVICE(JI,JJ,JK) = ZFLX(JI,JJ,JK) * PINV_PDYY(JI,JJ,JK)
-END DO
+    !$acc loop independent collapse(3)
+    DO CONCURRENT ( JI=1:JIU,JJ=1:JJU,JK=1:JKU)
+       ZTMP1_DEVICE(JI,JJ,JK) = ZFLX(JI,JJ,JK) * PINV_PDYY(JI,JJ,JK)
+    END DO
     !$acc end kernels
     CALL MZM_DEVICE(ZTMP1_DEVICE, ZTMP2_DEVICE)
     !$acc kernels
-DO CONCURRENT ( JI=1:JIU,JJ=1:JJU,JK=1:JKU)
-   ZTMP1_DEVICE(JI,JJ,JK) = PDZY(JI,JJ,JK)*ZTMP2_DEVICE(JI,JJ,JK)
-END DO
+    !$acc loop independent collapse(3)
+    DO CONCURRENT ( JI=1:JIU,JJ=1:JJU,JK=1:JKU)
+       ZTMP1_DEVICE(JI,JJ,JK) = PDZY(JI,JJ,JK)*ZTMP2_DEVICE(JI,JJ,JK)
+    END DO
     !$acc end kernels
     CALL MYF_DEVICE(ZTMP1_DEVICE, ZTMP2_DEVICE)
     !$acc kernels
-DO CONCURRENT ( JI=1:JIU,JJ=1:JJU,JK=1:JKU)
-   ZTMP1_DEVICE(JI,JJ,JK) = PMZM_PRHODJ(JI,JJ,JK) * ZTMP2_DEVICE(JI,JJ,JK) * PINV_PDZZ(JI,JJ,JK)
-END DO
+    !$acc loop independent collapse(3)
+    DO CONCURRENT ( JI=1:JIU,JJ=1:JJU,JK=1:JKU)
+       ZTMP1_DEVICE(JI,JJ,JK) = PMZM_PRHODJ(JI,JJ,JK) * ZTMP2_DEVICE(JI,JJ,JK) * PINV_PDZZ(JI,JJ,JK)
+    END DO
     !$acc end kernels
     CALL DZF_DEVICE(1,IKU,1, ZTMP1_DEVICE, ZTMP2_DEVICE )
     !$acc kernels
-DO CONCURRENT ( JI=1:JIU,JJ=1:JJU,JK=1:JKU)
-   PRTHLS(JI,JJ,JK) = PRTHLS(JI,JJ,JK) - ZTMP3_DEVICE(JI,JJ,JK) + ZTMP2_DEVICE(JI,JJ,JK)
-END DO   
+    !$acc loop independent collapse(3)
+    DO CONCURRENT ( JI=1:JIU,JJ=1:JJU,JK=1:JKU)
+       PRTHLS(JI,JJ,JK) = PRTHLS(JI,JJ,JK) - ZTMP3_DEVICE(JI,JJ,JK) + ZTMP2_DEVICE(JI,JJ,JK)
+    END DO
     !$acc end kernels
   ELSE
     CALL MYM_DEVICE(PRHODJ, ZTMP1_DEVICE)
@@ -1501,6 +1521,7 @@ IF (KRR/=0) THEN
     CALL MYM_DEVICE( PK, ZTMP1_DEVICE )
     CALL GY_M_V_DEVICE(1,IKU,1,PRM(:,:,:,1),PDYY,PDZZ,PDZY, ZTMP2_DEVICE)
     !$acc kernels
+    !$acc loop independent collapse(3)
     DO CONCURRENT ( JI=1:JIU,JJ=1:JJU,JK=1:JKU)
        ZFLX(JI,JJ,JK)     = -XCHF * ZTMP1_DEVICE(JI,JJ,JK) * ZTMP2_DEVICE(JI,JJ,JK)
     END DO !CONCURRENT
@@ -1546,6 +1567,7 @@ IF (KRR/=0) THEN
     IF (.NOT. LFLAT) THEN
       CALL MYM_DEVICE(PRHODJ, ZTMP1_DEVICE)
       !$acc kernels
+      !$acc loop independent collapse(3)
       DO CONCURRENT ( JI=1:JIU,JJ=1:JJU,JK=1:JKU)
          ZTMP2_DEVICE(JI,JJ,JK) = ZTMP1_DEVICE(JI,JJ,JK) * ZFLX(JI,JJ,JK) * PINV_PDYY(JI,JJ,JK)
       END DO
@@ -1553,18 +1575,21 @@ IF (KRR/=0) THEN
       CALL DYF_DEVICE( ZTMP2_DEVICE, ZTMP3_DEVICE )
       !
       !$acc kernels
+      !$acc loop independent collapse(3)
       DO CONCURRENT ( JI=1:JIU,JJ=1:JJU,JK=1:JKU)
          ZTMP1_DEVICE(JI,JJ,JK) = ZFLX(JI,JJ,JK) * PINV_PDYY(JI,JJ,JK)
       END DO
       !$acc end kernels
       CALL MZM_DEVICE(ZTMP1_DEVICE,ZTMP2_DEVICE)
       !$acc kernels
+      !$acc loop independent collapse(3)
       DO CONCURRENT ( JI=1:JIU,JJ=1:JJU,JK=1:JKU)
          ZTMP1_DEVICE(JI,JJ,JK) = PDZY(JI,JJ,JK)*ZTMP2_DEVICE(JI,JJ,JK)
       END DO
       !$acc end kernels
       CALL MYF_DEVICE(ZTMP1_DEVICE,ZTMP2_DEVICE)
       !$acc kernels
+      !$acc loop independent collapse(3)
       DO CONCURRENT ( JI=1:JIU,JJ=1:JJU,JK=1:JKU)
          ZTMP1_DEVICE(JI,JJ,JK) = PMZM_PRHODJ(JI,JJ,JK) * ZTMP2_DEVICE(JI,JJ,JK) * PINV_PDZZ(JI,JJ,JK)
       END DO
@@ -1572,6 +1597,7 @@ IF (KRR/=0) THEN
       CALL DZF_DEVICE(1,IKU,1,ZTMP1_DEVICE,ZTMP2_DEVICE )
       !
       !$acc kernels
+      !$acc loop independent collapse(3)
       DO CONCURRENT ( JI=1:JIU,JJ=1:JJU,JK=1:JKU)
          PRRS(JI,JJ,JK,1) = PRRS(JI,JJ,JK,1) - ZTMP3_DEVICE(JI,JJ,JK) + ZTMP2_DEVICE(JI,JJ,JK)
       END DO
diff --git a/src/MNH/turb_hor_tke.f90 b/src/MNH/turb_hor_tke.f90
index 748a65bc3..6a6174eaf 100644
--- a/src/MNH/turb_hor_tke.f90
+++ b/src/MNH/turb_hor_tke.f90
@@ -226,6 +226,7 @@ ZFLX = -XCET * MXM(PK) * GX_M_U(1,IKU,1,PTKEM,PDXX,PDZZ,PDZX) ! < u'e >
 CALL MXM_DEVICE(PK,ZTMP1_DEVICE)
 CALL GX_M_U_DEVICE(1,IKU,1,PTKEM,PDXX,PDZZ,PDZX,ZTMP2_DEVICE)
 !$acc kernels
+!$acc loop independent collapse(3)
 DO CONCURRENT ( JI=1:JIU,JJ=1:JJU,JK=1:JKU)
    ZFLX(JI,JJ,JK) = -XCET * ZTMP1_DEVICE(JI,JJ,JK) * ZTMP2_DEVICE(JI,JJ,JK) ! < u'e >
 END DO !CONCURRENT    
@@ -233,9 +234,12 @@ END DO !CONCURRENT
 !
 ! special case near the ground ( uncentred gradient )
 !
-ZFLX(:,:,IKB) =  ZCOEFF(:,:,IKB+2)*PTKEM(:,:,IKB+2)                     &
-               + ZCOEFF(:,:,IKB+1)*PTKEM(:,:,IKB+1)                     &
-               + ZCOEFF(:,:,IKB  )*PTKEM(:,:,IKB  )     
+!$acc loop independent collapse(2)
+DO CONCURRENT ( JI=1:JIU,JJ=1:JJU )
+   ZFLX(JI,JJ,IKB) =  ZCOEFF(JI,JJ,IKB+2)*PTKEM(JI,JJ,IKB+2)                     &
+                 + ZCOEFF(JI,JJ,IKB+1)*PTKEM(JI,JJ,IKB+1)                     &
+                 + ZCOEFF(JI,JJ,IKB  )*PTKEM(JI,JJ,IKB  )
+END DO
 !$acc end kernels 
 !
 #ifndef MNH_OPENACC
@@ -285,30 +289,35 @@ END IF
 IF (.NOT. LFLAT) THEN
   CALL MXM_DEVICE(PRHODJ,ZTMP1_DEVICE)
   !$acc kernels
+  !$acc loop independent collapse(3)
   DO CONCURRENT ( JI=1:JIU,JJ=1:JJU,JK=1:JKU)
      ZTMP2_DEVICE(JI,JJ,JK) = ZTMP1_DEVICE(JI,JJ,JK) * ZFLX(JI,JJ,JK) * PINV_PDXX(JI,JJ,JK)
   END DO !CONCURRENT   
   !$acc end kernels 
   CALL DXF_DEVICE(ZTMP2_DEVICE,ZTMP1_DEVICE)
   !$acc kernels
+  !$acc loop independent collapse(3)
   DO CONCURRENT ( JI=1:JIU,JJ=1:JJU,JK=1:JKU)
      ZTMP2_DEVICE(JI,JJ,JK) = ZFLX(JI,JJ,JK)*PINV_PDXX(JI,JJ,JK)
   END DO !CONCURRENT   
   !$acc end kernels 
   CALL MZM_DEVICE(ZTMP2_DEVICE,ZTMP3_DEVICE)
   !$acc kernels
+  !$acc loop independent collapse(3)
   DO CONCURRENT ( JI=1:JIU,JJ=1:JJU,JK=1:JKU)
      ZTMP2_DEVICE(JI,JJ,JK) = PDZX(JI,JJ,JK) * ZTMP3_DEVICE(JI,JJ,JK)
   END DO !CONCURRENT   
   !$acc end kernels 
   CALL MXF_DEVICE( ZTMP2_DEVICE,ZTMP3_DEVICE)
   !$acc kernels
+  !$acc loop independent collapse(3)
   DO CONCURRENT ( JI=1:JIU,JJ=1:JJU,JK=1:JKU)
      ZTMP2_DEVICE(JI,JJ,JK) = PMZM_PRHODJ(JI,JJ,JK) * ZTMP3_DEVICE(JI,JJ,JK) * PINV_PDZZ(JI,JJ,JK)
   END DO !CONCURRENT   
   !$acc end kernels 
   CALL DZF_DEVICE(1,IKU,1,ZTMP2_DEVICE,ZTMP3_DEVICE)
   !$acc kernels
+  !$acc loop independent collapse(3)
   DO CONCURRENT ( JI=1:JIU,JJ=1:JJU,JK=1:JKU)
      PTRH(JI,JJ,JK) =-( ZTMP1_DEVICE(JI,JJ,JK) - ZTMP3_DEVICE(JI,JJ,JK) ) /PRHODJ(JI,JJ,JK)
   END DO !CONCURRENT   
@@ -352,6 +361,7 @@ IF (.NOT. L2D) THEN
   CALL MYM_DEVICE(PK,ZTMP1_DEVICE)
   CALL GY_M_V_DEVICE(1,IKU,1,PTKEM,PDYY,PDZZ,PDZY,ZTMP2_DEVICE)
   !$acc kernels
+  !$acc loop independent collapse(3)
    DO CONCURRENT ( JI=1:JIU,JJ=1:JJU,JK=1:JKU)
       ZFLX(JI,JJ,JK) =-XCET * ZTMP1_DEVICE(JI,JJ,JK) * ZTMP2_DEVICE(JI,JJ,JK) ! < v'e >
    END DO !CONCURRENT   
@@ -415,32 +425,38 @@ IF (.NOT. L2D) THEN
   IF (.NOT. LFLAT) THEN
     CALL MYM_DEVICE(PRHODJ,ZTMP1_DEVICE)
     !$acc kernels
+    !$acc loop independent collapse(3)
     DO CONCURRENT ( JI=1:JIU,JJ=1:JJU,JK=1:JKU)
        ZTMP2_DEVICE(JI,JJ,JK) = ZTMP1_DEVICE(JI,JJ,JK) * ZFLX(JI,JJ,JK) * PINV_PDYY(JI,JJ,JK)
     END DO !CONCURRENT   
     !$acc end kernels 
     CALL DYF_DEVICE(ZTMP2_DEVICE,ZTMP1_DEVICE)
     !$acc kernels
+    !$acc loop independent collapse(3)
     DO CONCURRENT ( JI=1:JIU,JJ=1:JJU,JK=1:JKU)
        ZTMP2_DEVICE(JI,JJ,JK) = ZFLX(JI,JJ,JK)*PINV_PDYY(JI,JJ,JK)
     END DO !CONCURRENT   
     !$acc end kernels 
     CALL MZM_DEVICE(ZTMP2_DEVICE,ZTMP3_DEVICE)
     !$acc kernels
+    !$acc loop independent collapse(3)
     DO CONCURRENT ( JI=1:JIU,JJ=1:JJU,JK=1:JKU)
        ZTMP2_DEVICE(JI,JJ,JK) = PDZY(JI,JJ,JK) * ZTMP3_DEVICE(JI,JJ,JK)
     END DO !CONCURRENT   
     !$acc end kernels 
     CALL MYF_DEVICE(ZTMP2_DEVICE,ZTMP3_DEVICE)
     !$acc kernels
+    !$acc loop independent collapse(3)
     DO CONCURRENT ( JI=1:JIU,JJ=1:JJU,JK=1:JKU)
        ZTMP2_DEVICE(JI,JJ,JK) = PMZM_PRHODJ(JI,JJ,JK) * ZTMP3_DEVICE(JI,JJ,JK) * PINV_PDZZ(JI,JJ,JK)
     END DO !CONCURRENT   
     !$acc end kernels 
     CALL DZF_DEVICE(1,IKU,1,ZTMP2_DEVICE,ZTMP3_DEVICE)
     !$acc kernels
+    !$acc loop independent collapse(3)
     DO CONCURRENT ( JI=1:JIU,JJ=1:JJU,JK=1:JKU)
-       PTRH(JI,JJ,JK) = PTRH(JI,JJ,JK) - (  ZTMP1_DEVICE(JI,JJ,JK) - ZTMP3_DEVICE(JI,JJ,JK) ) /PRHODJ(JI,JJ,JK)
+       PTRH(JI,JJ,JK) = PTRH(JI,JJ,JK) - (  ZTMP1_DEVICE(JI,JJ,JK) - ZTMP3_DEVICE(JI,JJ,JK) ) &
+            /PRHODJ(JI,JJ,JK)
     END DO !CONCURRENT   
     !$acc end kernels 
   ELSE
diff --git a/src/MNH/turb_hor_uv.f90 b/src/MNH/turb_hor_uv.f90
index 9866f9adf..d1da623a0 100644
--- a/src/MNH/turb_hor_uv.f90
+++ b/src/MNH/turb_hor_uv.f90
@@ -323,7 +323,10 @@ IKU = SIZE(PUM,3)
 #ifndef MNH_BITREP
 ZDIRSINZW(:,:) = SQRT( 1. - PDIRCOSZW(:,:)**2 )
 #else
-ZDIRSINZW(:,:) = SQRT( 1. - BR_P2(PDIRCOSZW(:,:)) )
+!$acc loop independent collapse(2)
+DO CONCURRENT ( JI=1:JIU,JJ=1:JJU )
+   ZDIRSINZW(JI,JJ) = SQRT( 1. - BR_P2(PDIRCOSZW(JI,JJ)) )
+END DO
 #endif
 !$acc end kernels
 !
@@ -355,6 +358,7 @@ CALL MXM_DEVICE(PK,ZTMP1_DEVICE)
 CALL MYM_DEVICE(ZTMP1_DEVICE,ZTMP2_DEVICE)
 IF (.NOT. L2D) THEN
    !$acc kernels
+   !$acc loop independent collapse(3)
    DO CONCURRENT ( JI=1:JIU,JJ=1:JJU,JK=1:JKU)
       ZFLX(JI,JJ,JK)= - XCMFS * ZTMP2_DEVICE(JI,JJ,JK) * (GY_U_UV_PUM(JI,JJ,JK) + GX_V_UV_PVM(JI,JJ,JK))
    END DO !CONCURRENT   
@@ -450,6 +454,7 @@ ZFLX(:,:,IKB-1) =                                                           &
           PDIRCOSZW(:,:) * ZDIRSINZW(:,:)                                     &
     +PVSLOPEM(:,:) * (PCOSSLOPE(:,:)**2 - PSINSLOPE(:,:)**2) * ZDIRSINZW(:,:) )
 #else
+!$acc loop independent collapse(2)
 DO CONCURRENT ( JI=1:JIU,JJ=1:JJU )
 ZFLX(JI,JJ,IKB-1) =                                                           &
    PTAU11M(JI,JJ) * PCOSSLOPE(JI,JJ) * PSINSLOPE(JI,JJ) * BR_P2(PDIRCOSZW(JI,JJ))     &
@@ -507,12 +512,14 @@ END IF
 #else
 CALL MYM_DEVICE(PRHODJ,ZTMP1_DEVICE)
 !$acc kernels
+!$acc loop independent collapse(3)
 DO CONCURRENT ( JI=1:JIU,JJ=1:JJU,JK=1:JKU)
    ZTMP2_DEVICE(JI,JJ,JK) = ZTMP1_DEVICE(JI,JJ,JK) * PINV_PDYY(JI,JJ,JK)
 END DO !CONCURRENT   
 !$acc end kernels
 CALL MXM_DEVICE(ZTMP2_DEVICE,ZTMP1_DEVICE) 
 !$acc kernels
+!$acc loop independent collapse(3)
 DO CONCURRENT ( JI=1:JIU,JJ=1:JJU,JK=1:JKU)
    ZTMP2_DEVICE(JI,JJ,JK) = ZFLX(JI,JJ,JK) * ZTMP1_DEVICE(JI,JJ,JK)
 END DO !CONCURRENT   
@@ -522,30 +529,35 @@ IF (.NOT. LFLAT) THEN
   CALL MZM_DEVICE(ZFLX,ZTMP2_DEVICE)
   CALL MZM_DEVICE(PDYY,ZTMP3_DEVICE)
   !$acc kernels
+  !$acc loop independent collapse(3)
   DO CONCURRENT ( JI=1:JIU,JJ=1:JJU,JK=1:JKU)
      ZTMP4_DEVICE(JI,JJ,JK) = PDZY(JI,JJ,JK)/ZTMP3_DEVICE(JI,JJ,JK)
   END DO !CONCURRENT   
   !$acc end kernels
   CALL MXM_DEVICE(ZTMP4_DEVICE,ZTMP5_DEVICE)
   !$acc kernels
+  !$acc loop independent collapse(3)
   DO CONCURRENT ( JI=1:JIU,JJ=1:JJU,JK=1:JKU)
      ZTMP4_DEVICE(JI,JJ,JK) = ZTMP2_DEVICE(JI,JJ,JK)*ZTMP5_DEVICE(JI,JJ,JK)
   END DO !CONCURRENT   
   !$acc end kernels
   CALL MYF_DEVICE(ZTMP4_DEVICE,ZTMP2_DEVICE)
   !$acc kernels
+  !$acc loop independent collapse(3)
   DO CONCURRENT ( JI=1:JIU,JJ=1:JJU,JK=1:JKU)
      ZTMP3_DEVICE(JI,JJ,JK) = PMZM_PRHODJ(JI,JJ,JK) * PINV_PDZZ(JI,JJ,JK)
   END DO !CONCURRENT   
   !$acc end kernels
   CALL MXM_DEVICE(ZTMP3_DEVICE,ZTMP4_DEVICE)
   !$acc kernels
+  !$acc loop independent collapse(3)
   DO CONCURRENT ( JI=1:JIU,JJ=1:JJU,JK=1:JKU)
      ZTMP5_DEVICE(JI,JJ,JK) = ZTMP2_DEVICE(JI,JJ,JK)*ZTMP4_DEVICE(JI,JJ,JK)
   END DO !CONCURRENT    
   !$acc end kernels
   CALL DZF_DEVICE(1,IKU,1,ZTMP5_DEVICE,ZTMP3_DEVICE)
   !$acc kernels
+  !$acc loop independent collapse(3)
   DO CONCURRENT ( JI=1:JIU,JJ=1:JJU,JK=1:JKU)
      PRUS(JI,JJ,JK) = PRUS(JI,JJ,JK) - ZTMP1_DEVICE(JI,JJ,JK) + ZTMP3_DEVICE(JI,JJ,JK)
   END DO !CONCURRENT   
@@ -570,12 +582,14 @@ END IF
 #else
 CALL MXM_DEVICE(PRHODJ,ZTMP1_DEVICE)
 !$acc kernels
+!$acc loop independent collapse(3)
 DO CONCURRENT ( JI=1:JIU,JJ=1:JJU,JK=1:JKU)
    ZTMP2_DEVICE(JI,JJ,JK) = ZTMP1_DEVICE(JI,JJ,JK) * PINV_PDXX(JI,JJ,JK)
 END DO !CONCURRENT   
 !$acc end kernels
 CALL MYM_DEVICE(ZTMP2_DEVICE,ZTMP1_DEVICE) 
 !$acc kernels
+!$acc loop independent collapse(3)
 DO CONCURRENT ( JI=1:JIU,JJ=1:JJU,JK=1:JKU)
    ZTMP2_DEVICE(JI,JJ,JK) = ZFLX(JI,JJ,JK) * ZTMP1_DEVICE(JI,JJ,JK)
 END DO !CONCURRENT   
@@ -585,30 +599,35 @@ IF (.NOT. LFLAT) THEN
   CALL MZM_DEVICE(ZFLX,ZTMP2_DEVICE)
   CALL MZM_DEVICE(PDXX,ZTMP3_DEVICE)
   !$acc kernels
+  !$acc loop independent collapse(3)
   DO CONCURRENT ( JI=1:JIU,JJ=1:JJU,JK=1:JKU)
      ZTMP4_DEVICE(JI,JJ,JK) = PDZX(JI,JJ,JK)/ZTMP3_DEVICE(JI,JJ,JK)
   END DO !CONCURRENT   
   !$acc end kernels
   CALL MYM_DEVICE(ZTMP4_DEVICE,ZTMP5_DEVICE)
   !$acc kernels
+  !$acc loop independent collapse(3)
   DO CONCURRENT ( JI=1:JIU,JJ=1:JJU,JK=1:JKU)
      ZTMP4_DEVICE(JI,JJ,JK) = ZTMP2_DEVICE(JI,JJ,JK)*ZTMP5_DEVICE(JI,JJ,JK)
   END DO !CONCURRENT   
   !$acc end kernels
   CALL MXF_DEVICE(ZTMP4_DEVICE,ZTMP2_DEVICE)
   !$acc kernels
+  !$acc loop independent collapse(3)
   DO CONCURRENT ( JI=1:JIU,JJ=1:JJU,JK=1:JKU)
      ZTMP3_DEVICE(JI,JJ,JK) = PMZM_PRHODJ(JI,JJ,JK) * PINV_PDZZ(JI,JJ,JK)
   END DO !CONCURRENT   
   !$acc end kernels
   CALL MYM_DEVICE(ZTMP3_DEVICE,ZTMP4_DEVICE)
   !$acc kernels
+  !$acc loop independent collapse(3)
   DO CONCURRENT ( JI=1:JIU,JJ=1:JJU,JK=1:JKU)
      ZTMP5_DEVICE(JI,JJ,JK) = ZTMP2_DEVICE(JI,JJ,JK)*ZTMP4_DEVICE(JI,JJ,JK)
   END DO !CONCURRENT   
   !$acc end kernels
   CALL DZF_DEVICE(1,IKU,1,ZTMP5_DEVICE,ZTMP3_DEVICE)
   !$acc kernels
+  !$acc loop independent collapse(3)
   DO CONCURRENT ( JI=1:JIU,JJ=1:JJU,JK=1:JKU)
      PRVS(JI,JJ,JK) = PRVS(JI,JJ,JK) - ZTMP1_DEVICE(JI,JJ,JK) + ZTMP3_DEVICE(JI,JJ,JK)
   END DO !CONCURRENT   
@@ -635,6 +654,7 @@ IF (KSPLT==1) THEN
 #else
   IF (.NOT. L2D) THEN
      !$acc kernels
+     !$acc loop independent collapse(3)
      DO CONCURRENT ( JI=1:JIU,JJ=1:JJU,JK=1:JKU)
         ZTMP1_DEVICE(JI,JJ,JK) = ZFLX(JI,JJ,JK) * (GY_U_UV_PUM(JI,JJ,JK) + GX_V_UV_PVM(JI,JJ,JK))
      END DO !CONCURRENT   
@@ -647,6 +667,7 @@ IF (KSPLT==1) THEN
   CALL MYF_DEVICE(ZTMP1_DEVICE,ZTMP2_DEVICE)
   CALL MXF_DEVICE(ZTMP2_DEVICE,ZTMP1_DEVICE)
   !$acc kernels
+  !$acc loop independent collapse(3)
   DO CONCURRENT ( JI=1:JIU,JJ=1:JJU,JK=1:JKU)
      ZWORK(JI,JJ,JK) = - ZTMP1_DEVICE(JI,JJ,JK)
   END DO !CONCURRENT   
diff --git a/src/MNH/turb_hor_uw.f90 b/src/MNH/turb_hor_uw.f90
index ff27caa32..6b0501fdd 100644
--- a/src/MNH/turb_hor_uw.f90
+++ b/src/MNH/turb_hor_uw.f90
@@ -291,6 +291,7 @@ ZFLX(:,:,:) =                                                      &
 CALL MZM_DEVICE(PK,ZTMP1_DEVICE)
 CALL MXM_DEVICE(ZTMP1_DEVICE,ZTMP2_DEVICE)
 !$acc kernels
+!$acc loop independent collapse(3)
 DO CONCURRENT ( JI=1:JIU,JJ=1:JJU,JK=1:JKU)
    ZFLX(JI,JJ,JK) = - XCMFS * ZTMP2_DEVICE(JI,JJ,JK) * GX_W_UW_PWM(JI,JJ,JK)
 END DO !CONCURRENT   
@@ -331,12 +332,14 @@ PRUS(:,:,:) = PRUS(:,:,:) - DZF( ZFLX* MXM( PMZM_PRHODJ ) / MXM( PDZZ ) )
 CALL MXM_DEVICE( PMZM_PRHODJ, ZTMP1_DEVICE )
 CALL MXM_DEVICE( PDZZ, ZTMP2_DEVICE )
 !$acc kernels
+!$acc loop independent collapse(3)
 DO CONCURRENT ( JI=1:JIU,JJ=1:JJU,JK=1:JKU)
    ZTMP3_DEVICE(JI,JJ,JK) = ZFLX(JI,JJ,JK)* ZTMP1_DEVICE(JI,JJ,JK) / ZTMP2_DEVICE(JI,JJ,JK)
 END DO !CONCURRENT   
 !$acc end kernels
 CALL DZF_DEVICE(1,IKU,1, ZTMP3_DEVICE, ZTMP1_DEVICE )
 !$acc kernels
+!$acc loop independent collapse(3)
 DO CONCURRENT ( JI=1:JIU,JJ=1:JJU,JK=1:JKU)
    PRUS(JI,JJ,JK) = PRUS(JI,JJ,JK) - ZTMP1_DEVICE(JI,JJ,JK)
 END DO !CONCURRENT   
@@ -355,12 +358,14 @@ END IF
 #else
   CALL MXM_DEVICE(PRHODJ,ZTMP1_DEVICE)
   !$acc kernels
+  !$acc loop independent collapse(3)
   DO CONCURRENT ( JI=1:JIU,JJ=1:JJU,JK=1:JKU)
      ZTMP2_DEVICE(JI,JJ,JK) = ZTMP1_DEVICE(JI,JJ,JK) * PINV_PDXX(JI,JJ,JK)
   END DO !CONCURRENT   
   !$acc end kernels
   CALL MZM_DEVICE(ZTMP2_DEVICE, ZTMP1_DEVICE)
   !$acc kernels
+  !$acc loop independent collapse(3)
   DO CONCURRENT ( JI=1:JIU,JJ=1:JJU,JK=1:JKU)
      ZTMP2_DEVICE(JI,JJ,JK) = ZTMP1_DEVICE(JI,JJ,JK) * ZFLX(JI,JJ,JK)
   END DO !CONCURRENT   
@@ -368,12 +373,14 @@ END IF
   CALL DXF_DEVICE( ZTMP2_DEVICE,ZTMP1_DEVICE)
 IF (.NOT. LFLAT) THEN
    !$acc kernels
+   !$acc loop independent collapse(3)
    DO CONCURRENT ( JI=1:JIU,JJ=1:JJU,JK=1:JKU)
       ZTMP2_DEVICE(JI,JJ,JK) = ZFLX(JI,JJ,JK)*PDZX(JI,JJ,JK)
    END DO !CONCURRENT   
    !$acc end kernels
    CALL MZF_DEVICE(1,IKU,1, ZTMP2_DEVICE, ZTMP3_DEVICE )
    !$acc kernels
+   !$acc loop independent collapse(3)
    DO CONCURRENT ( JI=1:JIU,JJ=1:JJU,JK=1:JKU)
      ZTMP2_DEVICE(JI,JJ,JK) = ZTMP3_DEVICE(JI,JJ,JK)*PINV_PDXX(JI,JJ,JK)
    END DO !CONCURRENT  
@@ -381,12 +388,14 @@ IF (.NOT. LFLAT) THEN
    CALL MXF_DEVICE( ZTMP2_DEVICE, ZTMP3_DEVICE )
    CALL MZF_DEVICE(1,IKU,1,PDZZ, ZTMP2_DEVICE)
    !$acc kernels
+   !$acc loop independent collapse(3)
    DO CONCURRENT ( JI=1:JIU,JJ=1:JJU,JK=1:JKU)
       ZTMP4_DEVICE(JI,JJ,JK) = PRHODJ(JI,JJ,JK) * ZTMP3_DEVICE(JI,JJ,JK) / ZTMP2_DEVICE(JI,JJ,JK)
    END DO !CONCURRENT   
    !$acc end kernels
    CALL DZM_DEVICE(1,IKU,1, ZTMP4_DEVICE, ZTMP2_DEVICE )
    !$acc kernels
+   !$acc loop independent collapse(3)
    DO CONCURRENT ( JI=1:JIU,JJ=1:JJU,JK=1:JKU)
       PRWS(JI,JJ,JK) = PRWS(JI,JJ,JK)         &
            - ZTMP1_DEVICE(JI,JJ,JK)           &
@@ -410,6 +419,7 @@ IF (KSPLT==1) THEN
 #else
   CALL GZ_U_UW_DEVICE(1,IKU,1,PUM,PDZZ,ZTMP1_DEVICE)
   !$acc kernels
+  !$acc loop independent collapse(3)
   DO CONCURRENT ( JI=1:JIU,JJ=1:JJU,JK=1:JKU)
      ZTMP2_DEVICE(JI,JJ,JK) = ZFLX(JI,JJ,JK) *( ZTMP1_DEVICE(JI,JJ,JK) + GX_W_UW_PWM(JI,JJ,JK) )
   END DO !CONCURRENT   
@@ -417,6 +427,7 @@ IF (KSPLT==1) THEN
   CALL MXF_DEVICE( ZTMP2_DEVICE,ZTMP1_DEVICE )
   CALL MZF_DEVICE(1,IKU,1, ZTMP1_DEVICE, ZTMP2_DEVICE )
   !$acc kernels
+  !$acc loop independent collapse(3)
   DO CONCURRENT ( JI=1:JIU,JJ=1:JJU,JK=1:JKU)
      ZWORK(JI,JJ,JK) = -ZTMP2_DEVICE(JI,JJ,JK)
   END DO !CONCURRENT   
diff --git a/src/MNH/turb_hor_vw.f90 b/src/MNH/turb_hor_vw.f90
index 6ea12270b..53d816171 100644
--- a/src/MNH/turb_hor_vw.f90
+++ b/src/MNH/turb_hor_vw.f90
@@ -299,6 +299,7 @@ IF (.NOT. L2D) THEN
   CALL MZM_DEVICE(PK,ZTMP1_DEVICE)
   CALL MYM_DEVICE(ZTMP1_DEVICE,ZTMP2_DEVICE)
   !$acc kernels
+  !$acc loop independent collapse(3)
   DO CONCURRENT ( JI=1:JIU,JJ=1:JJU,JK=1:JKU)
      ZFLX(JI,JJ,JK) = - XCMFS * ZTMP2_DEVICE(JI,JJ,JK) * GY_W_VW_PWM(JI,JJ,JK)
   END DO !CONCURRENT   
@@ -350,12 +351,14 @@ IF (.NOT. L2D) THEN
   CALL MYM_DEVICE( PMZM_PRHODJ, ZTMP1_DEVICE )
   CALL MYM_DEVICE( PDZZ, ZTMP2_DEVICE )
   !$acc kernels
+  !$acc loop independent collapse(3)
   DO CONCURRENT ( JI=1:JIU,JJ=1:JJU,JK=1:JKU)
      ZTMP3_DEVICE(JI,JJ,JK) = ZFLX(JI,JJ,JK)* ZTMP1_DEVICE(JI,JJ,JK) / ZTMP2_DEVICE(JI,JJ,JK)
   END DO !CONCURRENT   
   !$acc end kernels
   CALL DZF_DEVICE(1,IKU,1, ZTMP3_DEVICE, ZTMP1_DEVICE )
   !$acc kernels
+  !$acc loop independent collapse(3)
   DO CONCURRENT ( JI=1:JIU,JJ=1:JJU,JK=1:JKU)
      PRVS(JI,JJ,JK) = PRVS(JI,JJ,JK) - ZTMP1_DEVICE(JI,JJ,JK)
   END DO !CONCURRENT   
@@ -379,24 +382,28 @@ IF (.NOT. L2D) THEN
   IF (.NOT. LFLAT) THEN
     CALL MYM_DEVICE(PRHODJ, ZTMP1_DEVICE)
     !$acc kernels
+    !$acc loop independent collapse(3)
     DO CONCURRENT ( JI=1:JIU,JJ=1:JJU,JK=1:JKU)
        ZTMP2_DEVICE(JI,JJ,JK) = ZTMP1_DEVICE(JI,JJ,JK) * PINV_PDYY(JI,JJ,JK)
     END DO !CONCURRENT  
     !$acc end kernels
     CALL MZM_DEVICE(ZTMP2_DEVICE, ZTMP1_DEVICE)
     !$acc kernels
+    !$acc loop independent collapse(3)
     DO CONCURRENT ( JI=1:JIU,JJ=1:JJU,JK=1:JKU)
        ZTMP2_DEVICE(JI,JJ,JK) = ZTMP1_DEVICE(JI,JJ,JK) * ZFLX(JI,JJ,JK)
     END DO !CONCURRENT   
     !$acc end kernels
     CALL DYF_DEVICE( ZTMP2_DEVICE, ZTMP1_DEVICE )
     !$acc kernels
+    !$acc loop independent collapse(3)
     DO CONCURRENT ( JI=1:JIU,JJ=1:JJU,JK=1:JKU)
        ZTMP2_DEVICE(JI,JJ,JK)  = ZFLX(JI,JJ,JK) *PDZY(JI,JJ,JK) 
     END DO !CONCURRENT   
     !$acc end kernels
     CALL MZF_DEVICE(1,IKU,1,ZTMP2_DEVICE,ZTMP3_DEVICE )
     !$acc kernels
+    !$acc loop independent collapse(3)
     DO CONCURRENT ( JI=1:JIU,JJ=1:JJU,JK=1:JKU)
        ZTMP2_DEVICE(JI,JJ,JK) = ZTMP3_DEVICE(JI,JJ,JK) * PINV_PDYY(JI,JJ,JK)
     END DO !CONCURRENT  
@@ -404,12 +411,14 @@ IF (.NOT. L2D) THEN
     CALL MYF_DEVICE(ZTMP2_DEVICE,ZTMP3_DEVICE)
     CALL MZF_DEVICE(1,IKU,1,PDZZ,ZTMP2_DEVICE)
     !$acc kernels
+    !$acc loop independent collapse(3)
     DO CONCURRENT ( JI=1:JIU,JJ=1:JJU,JK=1:JKU)
        ZTMP4_DEVICE(JI,JJ,JK) = PRHODJ(JI,JJ,JK) * ZTMP3_DEVICE(JI,JJ,JK) / ZTMP2_DEVICE(JI,JJ,JK)
     END DO !CONCURRENT   
     !$acc end kernels
     CALL DZM_DEVICE(1,IKU,1,ZTMP4_DEVICE,ZTMP2_DEVICE)
     !$acc kernels
+    !$acc loop independent collapse(3)
     DO CONCURRENT ( JI=1:JIU,JJ=1:JJU,JK=1:JKU)
        PRWS(JI,JJ,JK) = PRWS(JI,JJ,JK)  &
             - ZTMP1_DEVICE(JI,JJ,JK)          &
@@ -443,6 +452,7 @@ IF (KSPLT==1) THEN
 #else
     CALL GZ_V_VW_DEVICE(1,IKU,1,PVM,PDZZ,ZTMP1_DEVICE)
     !$acc kernels
+    !$acc loop independent collapse(3)
     DO CONCURRENT ( JI=1:JIU,JJ=1:JJU,JK=1:JKU)
        ZTMP2_DEVICE(JI,JJ,JK) = ZFLX(JI,JJ,JK) *( ZTMP1_DEVICE(JI,JJ,JK) + GY_W_VW_PWM(JI,JJ,JK) )
     END DO !CONCURRENT   
@@ -450,6 +460,7 @@ IF (KSPLT==1) THEN
     CALL MYF_DEVICE(ZTMP2_DEVICE,ZTMP1_DEVICE)
     CALL MZF_DEVICE(1,IKU,1,ZTMP1_DEVICE,ZTMP2_DEVICE)
     !$acc kernels
+    !$acc loop independent collapse(3)
     DO CONCURRENT ( JI=1:JIU,JJ=1:JJU,JK=1:JKU)
        ZWORK(JI,JJ,JK) = -ZTMP2_DEVICE(JI,JJ,JK)
     END DO !CONCURRENT   
diff --git a/src/MNH/turb_ver.f90 b/src/MNH/turb_ver.f90
index 673c4c304..7f2dc537c 100644
--- a/src/MNH/turb_ver.f90
+++ b/src/MNH/turb_ver.f90
@@ -666,6 +666,7 @@ ENDIF
 ! Denominator factor in 3rd order terms
 !
 !$acc kernels
+!$acc loop independent collapse(3)
 DO CONCURRENT ( JI=1:JIU,JJ=1:JJU,JK=1:JKU)
    ZD(JI,JJ,JK) = (1.+ZREDTH1(JI,JJ,JK)+ZREDR1(JI,JJ,JK)) * (1.+0.5*(ZREDTH1(JI,JJ,JK)+ZREDR1(JI,JJ,JK)))
 END DO   
diff --git a/src/MNH/turb_ver_dyn_flux.f90 b/src/MNH/turb_ver_dyn_flux.f90
index 834aa2d85..badf3088d 100644
--- a/src/MNH/turb_ver_dyn_flux.f90
+++ b/src/MNH/turb_ver_dyn_flux.f90
@@ -525,7 +525,10 @@ ZSOURCE(:,:,:) = 0.
 #ifndef MNH_BITREP
 ZDIRSINZW(:,:) = SQRT(1.-PDIRCOSZW(:,:)**2)
 #else
-ZDIRSINZW(:,:) = SQRT(1.-BR_P2(PDIRCOSZW(:,:)))
+!$acc loop independent collapse(2)
+DO CONCURRENT ( JI=1:JIU,JJ=1:JJU )
+   ZDIRSINZW(JI,JJ) = SQRT(1.-BR_P2(PDIRCOSZW(JI,JJ)))
+END DO
 #endif
 !  compute the coefficients for the uncentred gradient computation near the 
 !  ground
@@ -570,6 +573,7 @@ CALL MXM_DEVICE( PDZZ, ZTMP4_DEVICE )
 #ifndef MNH_BITREP
 ZA(:,:,:) = -PTSTEP * XCMFS * ZTMP1_DEVICE(:,:,:) * ZTMP3_DEVICE(:,:,:) / ZTMP4_DEVICE(:,:,:)**2
 #else
+!$acc loop independent collapse(3)
 DO CONCURRENT ( JI=1:JIU,JJ=1:JJU,JK=1:JKU)
    ZA(JI,JJ,JK) = -PTSTEP * XCMFS * ZTMP1_DEVICE(JI,JJ,JK) * ZTMP3_DEVICE(JI,JJ,JK) / BR_P2(ZTMP4_DEVICE(JI,JJ,JK))
 END DO
@@ -585,21 +589,31 @@ END DO
 !$acc kernels
 #ifndef MNH_BITREP
 ZCOEFFLXU(:,:,1) = PCDUEFF(:,:) * (PDIRCOSZW(:,:)**2 - ZDIRSINZW(:,:)**2) &
+     * PCOSSLOPE(:,:)
 #else
-ZCOEFFLXU(:,:,1) = PCDUEFF(:,:) * (BR_P2(PDIRCOSZW(:,:)) - BR_P2(ZDIRSINZW(:,:))) &
+!$acc loop independent collapse(2)
+DO CONCURRENT ( JI=1:JIU,JJ=1:JJU )     
+   ZCOEFFLXU(JI,JJ,1) = PCDUEFF(JI,JJ) * (BR_P2(PDIRCOSZW(JI,JJ)) - BR_P2(ZDIRSINZW(JI,JJ))) &
+        * PCOSSLOPE(JI,JJ)
+END DO
 #endif
-                                   * PCOSSLOPE(:,:)
-ZCOEFFLXV(:,:,1) = PCDUEFF(:,:) * PDIRCOSZW(:,:) * PSINSLOPE(:,:)
-
-! prepare the implicit scheme coefficients for the surface flux
-ZCOEFS(:,:,1)=  ZCOEFFLXU(:,:,1) * PCOSSLOPE(:,:) * PDIRCOSZW(:,:)  &
-                 +ZCOEFFLXV(:,:,1) * PSINSLOPE(:,:)
+!$acc loop independent collapse(2)
+DO CONCURRENT ( JI=1:JIU,JJ=1:JJU )
+   ZCOEFFLXV(JI,JJ,1) = PCDUEFF(JI,JJ) * PDIRCOSZW(JI,JJ) * PSINSLOPE(JI,JJ)
+   
+   ! prepare the implicit scheme coefficients for the surface flux
+   ZCOEFS(JI,JJ,1)=  ZCOEFFLXU(JI,JJ,1) * PCOSSLOPE(JI,JJ) * PDIRCOSZW(JI,JJ)  &
+        +ZCOEFFLXV(JI,JJ,1) * PSINSLOPE(JI,JJ)
+END DO
 !
 ! average this flux to be located at the U,W vorticity point
 #ifndef MNH_OPENACC
 ZCOEFS(:,:,1:1)=MXM(ZCOEFS(:,:,1:1) / PDZZ(:,:,IKB:IKB) )
 #else
-ZTMP1_DEVICE(:,:,1) = ZCOEFS(:,:,1) / PDZZ(:,:,IKB)
+!$acc loop independent collapse(2)
+DO CONCURRENT ( JI=1:JIU,JJ=1:JJU )
+   ZTMP1_DEVICE(JI,JJ,1) = ZCOEFS(JI,JJ,1) / PDZZ(JI,JJ,IKB)
+END DO
 !$acc end kernels
 CALL MXM_DEVICE(ZTMP1_DEVICE(:,:,1:1),ZCOEFS(:,:,1:1))
 #endif
@@ -673,6 +687,7 @@ ZFLXZ(:,:,:)     = -XCMFS * MXM(ZKEFF) * &
                   DZM (PIMPL*ZRES + PEXPL*PUM) / MXM(PDZZ)
 #else
 !$acc kernels
+!$acc loop independent collapse(3)
 DO CONCURRENT ( JI=1:JIU,JJ=1:JJU,JK=1:JKU)
    ZTMP2_DEVICE(JI,JJ,JK) = PIMPL*ZRES(JI,JJ,JK) + PEXPL*PUM(JI,JJ,JK)
 END DO
@@ -681,6 +696,7 @@ CALL MXM_DEVICE(ZKEFF,ZTMP1_DEVICE)
 CALL DZM_DEVICE(KKA,KKU,KKL,ZTMP2_DEVICE,ZTMP3_DEVICE)
 CALL MXM_DEVICE(PDZZ,ZTMP4_DEVICE)
 !$acc kernels
+!$acc loop independent collapse(3)
 DO CONCURRENT ( JI=1:JIU,JJ=1:JJU,JK=1:JKU)
    ZFLXZ(JI,JJ,JK)     = -XCMFS * ZTMP1_DEVICE(JI,JJ,JK) * ZTMP3_DEVICE(JI,JJ,JK) / ZTMP4_DEVICE(JI,JJ,JK)
 END DO
@@ -738,6 +754,7 @@ PDP(:,:,:) = - MZF( MXF ( ZFLXZ * GZ_U_UW(PUM,PDZZ) )  )
 #else
 CALL GZ_U_UW_DEVICE(KKA,KKU,KKL,PUM,PDZZ,ZTMP1_DEVICE)
 !$acc kernels
+!$acc loop independent collapse(3)
 DO CONCURRENT ( JI=1:JIU,JJ=1:JJU,JK=1:JKU )
    ZTMP2_DEVICE(JI,JJ,JK) = ZFLXZ(JI,JJ,JK) * ZTMP1_DEVICE(JI,JJ,JK)
 END DO
@@ -824,13 +841,15 @@ IF(HTURBDIM=='3DIM') THEN
   END IF
 #else
   CALL MXM_DEVICE(PRHODJ,ZTMP1_DEVICE)
-!$acc kernels
+  !$acc kernels
+  !$acc loop independent collapse(3)
   DO CONCURRENT ( JI=1:JIU,JJ=1:JJU,JK=1:JKU)
      ZTMP2_DEVICE(JI,JJ,JK) = ZTMP1_DEVICE(JI,JJ,JK) /PDXX(JI,JJ,JK)
   END DO
 !$acc end kernels
   CALL MZM_DEVICE(ZTMP2_DEVICE,ZTMP3_DEVICE )
-!$acc kernels
+  !$acc kernels
+  !$acc loop independent collapse(3)
   DO CONCURRENT ( JI=1:JIU,JJ=1:JJU,JK=1:JKU) 
      ZTMP2_DEVICE(JI,JJ,JK) = ZTMP3_DEVICE(JI,JJ,JK) * ZFLXZ(JI,JJ,JK)
   END DO
@@ -839,18 +858,21 @@ IF(HTURBDIM=='3DIM') THEN
   IF (.NOT. LFLAT) THEN
     CALL MZF_DEVICE(KKA,KKU,KKL,PDZZ,ZTMP2_DEVICE )
     !$acc kernels
+    !$acc loop independent collapse(3)
     DO CONCURRENT ( JI=1:JIU,JJ=1:JJU,JK=1:JKU)
        ZTMP3_DEVICE(JI,JJ,JK) = ZFLXZ(JI,JJ,JK)*PDZX(JI,JJ,JK)
     END DO
     !$acc end kernels
     CALL MZF_DEVICE(KKA,KKU,KKL, ZTMP3_DEVICE,ZTMP4_DEVICE )
     !$acc kernels
+    !$acc loop independent collapse(3)
     DO CONCURRENT ( JI=1:JIU,JJ=1:JJU,JK=1:JKU)
        ZTMP3_DEVICE(JI,JJ,JK) = ZTMP4_DEVICE(JI,JJ,JK) / PDXX(JI,JJ,JK)
     END DO   
     !$acc end kernels
     CALL MXF_DEVICE( ZTMP3_DEVICE,ZTMP4_DEVICE )
     !$acc kernels
+    !$acc loop independent collapse(3)
     DO CONCURRENT ( JI=1:JIU,JJ=1:JJU,JK=1:JKU)
        ZTMP3_DEVICE(JI,JJ,JK) = PRHODJ(JI,JJ,JK) / ZTMP2_DEVICE(JI,JJ,JK) * ZTMP4_DEVICE(JI,JJ,JK)
     END DO
@@ -872,14 +894,16 @@ IF(HTURBDIM=='3DIM') THEN
   ZA(:,:,:)=-MZF( MXF ( ZFLXZ * GX_W_UW( PWM,PDXX,PDZZ,PDZX) )  )
 #else
   CALL GX_W_UW_DEVICE(KKA,KKU,KKL, PWM,PDXX,PDZZ,PDZX,ZTMP1_DEVICE)
-!$acc kernels
+  !$acc kernels
+  !$acc loop independent collapse(3)
   DO CONCURRENT ( JI=1:JIU,JJ=1:JJU,JK=1:JKU)
      ZTMP2_DEVICE(JI,JJ,JK) = ZFLXZ(JI,JJ,JK) * ZTMP1_DEVICE(JI,JJ,JK)
   END DO
 !$acc end kernels
   CALL MXF_DEVICE(ZTMP2_DEVICE,ZTMP1_DEVICE)
   CALL MZF_DEVICE(KKA,KKU,KKL, ZTMP1_DEVICE,ZTMP2_DEVICE )
-!$acc kernels
+  !$acc kernels
+  !$acc loop independent collapse(3)
   DO CONCURRENT ( JI=1:JIU,JJ=1:JJU,JK=1:JKU)
      ZA(JI,JJ,JK)=-ZTMP2_DEVICE(JI,JJ,JK)
   END DO
@@ -1013,6 +1037,7 @@ CALL MYM_DEVICE(ZTMP3_DEVICE,ZTMP4_DEVICE)
 #ifndef MNH_BITREP
 ZA(:,:,:)    = - PTSTEP * XCMFS * ZTMP1_DEVICE(:,:,:) * ZTMP4_DEVICE(:,:,:) / ZTMP2_DEVICE(:,:,:)**2
 #else
+!$acc loop independent collapse(3)
 DO CONCURRENT ( JI=1:JIU,JJ=1:JJU,JK=1:JKU)
    ZA(JI,JJ,JK)    = - PTSTEP * XCMFS * ZTMP1_DEVICE(JI,JJ,JK) * ZTMP4_DEVICE(JI,JJ,JK) / BR_P2(ZTMP2_DEVICE(JI,JJ,JK))
 END DO
@@ -1026,21 +1051,31 @@ END DO
 ! wind following the slope
 #ifndef MNH_BITREP
 ZCOEFFLXU(:,:,1) = PCDUEFF(:,:) * (PDIRCOSZW(:,:)**2 - ZDIRSINZW(:,:)**2) &
+                                   * PSINSLOPE(:,:)
 #else
-ZCOEFFLXU(:,:,1) = PCDUEFF(:,:) * (BR_P2(PDIRCOSZW(:,:)) - BR_P2(ZDIRSINZW(:,:))) &
+!$acc loop independent collapse(2)
+DO CONCURRENT ( JI=1:JIU,JJ=1:JJU )
+ZCOEFFLXU(JI,JJ,1) = PCDUEFF(JI,JJ) * (BR_P2(PDIRCOSZW(JI,JJ)) - BR_P2(ZDIRSINZW(JI,JJ))) &
+     * PSINSLOPE(JI,JJ)
+END DO
 #endif
-                                   * PSINSLOPE(:,:)
 ZCOEFFLXV(:,:,1) = PCDUEFF(:,:) * PDIRCOSZW(:,:) * PCOSSLOPE(:,:)
 
 ! prepare the implicit scheme coefficients for the surface flux
-ZCOEFS(:,:,1)=  ZCOEFFLXU(:,:,1) * PSINSLOPE(:,:) * PDIRCOSZW(:,:)  &
-               +ZCOEFFLXV(:,:,1) * PCOSSLOPE(:,:)
+!$acc loop independent collapse(2)
+DO CONCURRENT ( JI=1:JIU,JJ=1:JJU )
+ZCOEFS(JI,JJ,1)=  ZCOEFFLXU(JI,JJ,1) * PSINSLOPE(JI,JJ) * PDIRCOSZW(JI,JJ)  &
+     +ZCOEFFLXV(JI,JJ,1) * PCOSSLOPE(JI,JJ)
+END DO
 !
 ! average this flux to be located at the V,W vorticity point
 #ifndef MNH_OPENACC
 ZCOEFS(:,:,1:1)=MYM(ZCOEFS(:,:,1:1) / PDZZ(:,:,IKB:IKB) )
 #else
-ZTMP1_DEVICE(:,:,1:1) = ZCOEFS(:,:,1:1) / PDZZ(:,:,IKB:IKB)
+!$acc loop independent collapse(2)
+DO CONCURRENT ( JI=1:JIU,JJ=1:JJU )
+   ZTMP1_DEVICE(JI,JJ,1) = ZCOEFS(JI,JJ,1) / PDZZ(JI,JJ,IKB)
+END DO
 !$acc end kernels
 CALL MYM_DEVICE(ZTMP1_DEVICE(:,:,1:1),ZCOEFS(:,:,1:1) )
 #endif
@@ -1119,14 +1154,16 @@ ZFLXZ(:,:,IKB:IKB)   =   MYM(PDZZ(:,:,IKB:IKB))  *                       &
   ) / 0.5 / ( 1. + MYM(PRHODJ(:,:,KKA:KKA)) / MYM(PRHODJ(:,:,IKB:IKB)) )
 #else
 !$acc kernels
+!$acc loop independent collapse(3)
 DO CONCURRENT ( JI=1:JIU,JJ=1:JJU,JK=1:JKU)
-ZTMP1_DEVICE(JI,JJ,JK) = PIMPL*ZRES(JI,JJ,JK) + PEXPL*PVM(JI,JJ,JK)
+   ZTMP1_DEVICE(JI,JJ,JK) = PIMPL*ZRES(JI,JJ,JK) + PEXPL*PVM(JI,JJ,JK)
 END DO
 !$acc end kernels
 CALL DZM_DEVICE(KKA,KKU,KKL,ZTMP1_DEVICE,ZTMP2_DEVICE)
 CALL MYM_DEVICE(PDZZ,ZTMP3_DEVICE)
 CALL MYM_DEVICE(ZKEFF,ZTMP1_DEVICE)
 !$acc kernels
+!$acc loop independent collapse(3)
 DO CONCURRENT ( JI=1:JIU,JJ=1:JJU,JK=1:JKU)
    ZFLXZ(JI,JJ,JK)   = -XCMFS * ZTMP1_DEVICE(JI,JJ,JK) * ZTMP2_DEVICE(JI,JJ,JK) / ZTMP3_DEVICE(JI,JJ,JK)
 END DO
@@ -1175,6 +1212,7 @@ ZA(:,:,:) = - MZF( MYF ( ZFLXZ * GZ_V_VW(PVM,PDZZ) ) )
 #else
 CALL GZ_V_VW_DEVICE(KKA,KKU,KKL,PVM,PDZZ,ZTMP1_DEVICE)
 !$acc kernels
+!$acc loop independent collapse(3)
 DO CONCURRENT ( JI=1:JIU,JJ=1:JJU,JK=1:JKU)
    ZTMP2_DEVICE(JI,JJ,JK) = ZFLXZ(JI,JJ,JK) * ZTMP1_DEVICE(JI,JJ,JK)
 END DO
@@ -1182,6 +1220,7 @@ END DO
 CALL MYF_DEVICE( ZTMP2_DEVICE, ZTMP3_DEVICE )
 CALL MZF_DEVICE(KKA,KKU,KKL, ZTMP3_DEVICE, ZTMP1_DEVICE )
 !$acc kernels
+!$acc loop independent collapse(3)
 DO CONCURRENT ( JI=1:JIU,JJ=1:JJU,JK=1:JKU)
    ZA(JI,JJ,JK) = - ZTMP1_DEVICE(JI,JJ,JK)
 END DO   
@@ -1261,33 +1300,38 @@ IF(HTURBDIM=='3DIM') THEN
 #else
   IF (.NOT. L2D) THEN 
     CALL MYM_DEVICE(PRHODJ,ZTMP1_DEVICE)
-!$acc kernels
+    !$acc kernels
+    !$acc loop independent collapse(3)
     DO CONCURRENT ( JI=1:JIU,JJ=1:JJU,JK=1:JKU)
        ZTMP2_DEVICE(JI,JJ,JK) = ZTMP1_DEVICE(JI,JJ,JK) /PDYY(JI,JJ,JK)
     END DO
-!$acc end kernels
+    !$acc end kernels
     CALL MZM_DEVICE(ZTMP2_DEVICE,ZTMP1_DEVICE)
-!$acc kernels
+    !$acc kernels
+    !$acc loop independent collapse(3)
     DO CONCURRENT ( JI=1:JIU,JJ=1:JJU,JK=1:JKU)
        ZTMP2_DEVICE(JI,JJ,JK) = ZTMP1_DEVICE(JI,JJ,JK) * ZFLXZ(JI,JJ,JK)
     END DO
-!$acc end kernels
+    !$acc end kernels
     CALL DYF_DEVICE( ZTMP2_DEVICE,ZTMP1_DEVICE )
     IF (.NOT. LFLAT) THEN
       CALL MZF_DEVICE(KKA,KKU,KKL,PDZZ,ZTMP2_DEVICE )
       !$acc kernels
+      !$acc loop independent collapse(3)
       DO CONCURRENT ( JI=1:JIU,JJ=1:JJU,JK=1:JKU)
          ZTMP3_DEVICE(JI,JJ,JK) = ZFLXZ(JI,JJ,JK)*PDZY(JI,JJ,JK)
       END DO
       !$acc end kernels
       CALL MZF_DEVICE(KKA,KKU,KKL,ZTMP3_DEVICE,ZTMP4_DEVICE)
       !$acc kernels
+      !$acc loop independent collapse(3)
       DO CONCURRENT ( JI=1:JIU,JJ=1:JJU,JK=1:JKU)
          ZTMP3_DEVICE(JI,JJ,JK) = ZTMP4_DEVICE(JI,JJ,JK) / PDYY(JI,JJ,JK)
       END DO
       !$acc end kernels
       CALL MYF_DEVICE(ZTMP3_DEVICE,ZTMP4_DEVICE)
       !$acc kernels
+      !$acc loop independent collapse(3)
       DO CONCURRENT ( JI=1:JIU,JJ=1:JJU,JK=1:JKU)
          ZTMP3_DEVICE(JI,JJ,JK) =  PRHODJ(JI,JJ,JK) / ZTMP2_DEVICE(JI,JJ,JK) * ZTMP4_DEVICE(JI,JJ,JK)
       END DO
@@ -1310,18 +1354,20 @@ IF(HTURBDIM=='3DIM') THEN
     ZA(:,:,:) = - MZF( MYF ( ZFLXZ(:,:,:) * GY_W_VW( PWM,PDYY,PDZZ,PDZY) )  )
 #else
     CALL GY_W_VW_DEVICE(KKA,KKU,KKL, PWM,PDYY,PDZZ,PDZY,ZTMP1_DEVICE)
-!$acc kernels
+    !$acc kernels
+    !$acc loop independent collapse(3)
     DO CONCURRENT ( JI=1:JIU,JJ=1:JJU,JK=1:JKU)
        ZTMP2_DEVICE(JI,JJ,JK) = ZFLXZ(JI,JJ,JK) * ZTMP1_DEVICE(JI,JJ,JK)
     END DO
-!$acc end kernels
+    !$acc end kernels
     CALL MYF_DEVICE(ZTMP2_DEVICE,ZTMP1_DEVICE)
     CALL MZF_DEVICE(KKA,KKU,KKL,ZTMP1_DEVICE,ZTMP2_DEVICE  )
-!$acc kernels
+    !$acc kernels
+    !$acc loop independent collapse(3)
     DO CONCURRENT ( JI=1:JIU,JJ=1:JJU,JK=1:JKU)
        ZA(JI,JJ,JK) = - ZTMP2_DEVICE(JI,JJ,JK)
     END DO
-!$acc end kernels
+    !$acc end kernels
 #endif
   !
   ! evaluate the dynamic production at w(IKB+KKL) in PDP(IKB)
diff --git a/src/MNH/turb_ver_thermo_corr.f90 b/src/MNH/turb_ver_thermo_corr.f90
index c045f7a7d..9227a238d 100644
--- a/src/MNH/turb_ver_thermo_corr.f90
+++ b/src/MNH/turb_ver_thermo_corr.f90
@@ -572,11 +572,15 @@ END IF
 #ifndef MNH_BITREP
   ZTMP1_DEVICE(:,:,:) = PPHI3(:,:,:)*PDTH_DZ(:,:,:)**2
 #else
-  ZTMP1_DEVICE(:,:,:) = PPHI3(:,:,:)*BR_P2(PDTH_DZ(:,:,:))
+  !$acc loop independent collapse(3)
+  DO CONCURRENT ( JI=1:JIU,JJ=1:JJU,JK=1:JKU)
+     ZTMP1_DEVICE(JI,JJ,JK) = PPHI3(JI,JJ,JK)*BR_P2(PDTH_DZ(JI,JJ,JK))
+  END DO
 #endif
   !$acc end kernels
   CALL MZF_DEVICE(KKA,KKU,KKL,ZTMP1_DEVICE(:,:,:),ZTMP2_DEVICE(:,:,:))
   !$acc kernels
+  !$acc loop independent collapse(3)
   DO CONCURRENT ( JI=1:JIU,JJ=1:JJU,JK=1:JKU)
      ZF      (JI,JJ,JK) = XCTV*PLM(JI,JJ,JK)*PLEPS(JI,JJ,JK)*ZTMP2_DEVICE(JI,JJ,JK) 
   END DO
@@ -718,12 +722,14 @@ END IF
   !$acc end kernels
   CALL DZM_DEVICE(KKA,KKU,KKL,ZTMP1_DEVICE(:,:,:),ZTMP2_DEVICE(:,:,:))
   !$acc kernels
+  !$acc loop independent collapse(3)
   DO CONCURRENT ( JI=1:JIU,JJ=1:JJU,JK=1:JKU)
      ZTMP3_DEVICE(JI,JJ,JK) = ZTMP2_DEVICE(JI,JJ,JK) / PDZZ(JI,JJ,JK)
   END DO
   !$acc end kernels
   CALL MZF_DEVICE(KKA,KKU,KKL,ZTMP3_DEVICE(:,:,:),ZTMP4_DEVICE(:,:,:) )
   !$acc kernels
+  !$acc loop independent collapse(3)
   DO CONCURRENT ( JI=1:JIU,JJ=1:JJU,JK=1:JKU)
      ZFLXZ(JI,JJ,JK)   = ZF(JI,JJ,JK)                                                              &
           + PIMPL * ZDFDDTDZ(JI,JJ,JK) * ZTMP4_DEVICE(JI,JJ,JK)
@@ -744,17 +750,20 @@ END IF
       +ZCOEFF(:,:,IKB      )*PTHLP(:,:,IKB  )   )**2          &
    ) 
 #else
-  ZFLXZ(:,:,IKB) = XCTV * PPHI3(:,:,IKB+KKL) * PLM(:,:,IKB)   &
-     * PLEPS(:,:,IKB)                                         &
+  !$acc loop independent collapse(2)
+  DO CONCURRENT ( JI=1:JIU,JJ=1:JJU )
+  ZFLXZ(JI,JJ,IKB) = XCTV * PPHI3(JI,JJ,IKB+KKL) * PLM(JI,JJ,IKB)   &
+     * PLEPS(JI,JJ,IKB)                                         &
   *( PEXPL *                                                  &
-     BR_P2( ZCOEFF(:,:,IKB+2*KKL)*PTHLM(:,:,IKB+2*KKL)        &
-      +ZCOEFF(:,:,IKB+KKL  )*PTHLM(:,:,IKB+KKL  )             & 
-      +ZCOEFF(:,:,IKB      )*PTHLM(:,:,IKB  )   )             &
+     BR_P2( ZCOEFF(JI,JJ,IKB+2*KKL)*PTHLM(JI,JJ,IKB+2*KKL)        &
+      +ZCOEFF(JI,JJ,IKB+KKL  )*PTHLM(JI,JJ,IKB+KKL  )             & 
+      +ZCOEFF(JI,JJ,IKB      )*PTHLM(JI,JJ,IKB  )   )             &
     +PIMPL *                                                  &
-     BR_P2( ZCOEFF(:,:,IKB+2*KKL)*PTHLP(:,:,IKB+2*KKL)        &
-      +ZCOEFF(:,:,IKB+KKL  )*PTHLP(:,:,IKB+KKL  )             &
-      +ZCOEFF(:,:,IKB      )*PTHLP(:,:,IKB  )   )             &
-   ) 
+     BR_P2( ZCOEFF(JI,JJ,IKB+2*KKL)*PTHLP(JI,JJ,IKB+2*KKL)        &
+      +ZCOEFF(JI,JJ,IKB+KKL  )*PTHLP(JI,JJ,IKB+KKL  )             &
+      +ZCOEFF(JI,JJ,IKB      )*PTHLP(JI,JJ,IKB  )   )             &
+      )
+   END DO
 #endif
   !
   ZFLXZ(:,:,KKA) = ZFLXZ(:,:,IKB) 
@@ -843,6 +852,7 @@ END IF
 !$acc end kernels
     CALL MZF_DEVICE(KKA,KKU,KKL,ZTMP1_DEVICE(:,:,:),ZTMP2_DEVICE(:,:,:))
 !$acc kernels
+!$acc loop independent collapse(3)    
 DO CONCURRENT ( JI=1:JIU,JJ=1:JJU,JK=1:JKU)
     ZF      (JI,JJ,JK) = XCTV*PLM(JI,JJ,JK)*PLEPS(JI,JJ,JK)*ZTMP2_DEVICE(JI,JJ,JK)
 END DO
@@ -1031,6 +1041,7 @@ END DO
     CALL DZM_DEVICE(KKA,KKU,KKL,ZTMP1_DEVICE(:,:,:),ZTMP3_DEVICE(:,:,:))
     CALL DZM_DEVICE(KKA,KKU,KKL,ZTMP2_DEVICE(:,:,:),ZTMP4_DEVICE(:,:,:))
     !$acc kernels
+    !$acc loop independent collapse(3)
     DO CONCURRENT ( JI=1:JIU,JJ=1:JJU,JK=1:JKU)
        ZTMP1_DEVICE(JI,JJ,JK) = ZTMP3_DEVICE(JI,JJ,JK) / PDZZ(JI,JJ,JK)
        ZTMP2_DEVICE(JI,JJ,JK) = ZTMP4_DEVICE(JI,JJ,JK) / PDZZ(JI,JJ,JK)      
@@ -1053,6 +1064,7 @@ END DO
     CALL DZM_DEVICE(KKA,KKU,KKL,ZTMP8_DEVICE(:,:,:),ZTMP1_DEVICE(:,:,:))
     !!!
     !$acc kernels
+    !$acc loop independent collapse(3)
     DO CONCURRENT ( JI=1:JIU,JJ=1:JJU,JK=1:JKU)
        ZTMP7_DEVICE(JI,JJ,JK) =   ( ZTMP3_DEVICE(JI,JJ,JK) + ZTMP4_DEVICE(JI,JJ,JK)) * PDR_DZ(JI,JJ,JK)  &
                               * ZTMP2_DEVICE(JI,JJ,JK) / PDZZ(JI,JJ,JK) &
@@ -1068,6 +1080,7 @@ END DO
     CALL DZM_DEVICE(KKA,KKU,KKL,ZTMP1_DEVICE(:,:,:),ZTMP3_DEVICE(:,:,:))
     CALL DZM_DEVICE(KKA,KKU,KKL,ZTMP2_DEVICE(:,:,:),ZTMP4_DEVICE(:,:,:))
     !$acc kernels
+    !$acc loop independent collapse(3)
     DO CONCURRENT ( JI=1:JIU,JJ=1:JJU,JK=1:JKU)
        ZTMP1_DEVICE(JI,JJ,JK) = ZTMP3_DEVICE(JI,JJ,JK) / PDZZ(JI,JJ,JK)
        ZTMP2_DEVICE(JI,JJ,JK) = ZTMP4_DEVICE(JI,JJ,JK) /PDZZ(JI,JJ,JK)
@@ -1078,6 +1091,7 @@ END DO
     CALL MZF_DEVICE(KKA,KKU,KKL,ZTMP1_DEVICE(:,:,:), ZTMP4_DEVICE(:,:,:) )
     CALL MZF_DEVICE(KKA,KKU,KKL,ZTMP2_DEVICE(:,:,:), ZTMP5_DEVICE(:,:,:) )
     !$acc kernels
+    !$acc loop independent collapse(3)
     DO CONCURRENT ( JI=1:JIU,JJ=1:JJU,JK=1:JKU)
        ZFLXZ(JI,JJ,JK)   = ZF(JI,JJ,JK)                           &
             + PIMPL * XCTV*PLM(JI,JJ,JK)*PLEPS(JI,JJ,JK)*0.5 * ZTMP3_DEVICE(JI,JJ,JK)  &
@@ -1087,23 +1101,26 @@ END DO
 #endif
     !
     ! special case near the ground ( uncentred gradient )
-    ZFLXZ(:,:,IKB) =                                            & 
-    (XCHT1 * PPHI3(:,:,IKB+KKL) + XCHT2 * PPSI3(:,:,IKB+KKL))   &
+    !$acc loop independent collapse(2)
+    DO CONCURRENT ( JI=1:JIU,JJ=1:JJU )
+    ZFLXZ(JI,JJ,IKB) =                                            & 
+    (XCHT1 * PPHI3(JI,JJ,IKB+KKL) + XCHT2 * PPSI3(JI,JJ,IKB+KKL))   &
     *( PEXPL *                                                  &
-       ( ZCOEFF(:,:,IKB+2*KKL)*PTHLM(:,:,IKB+2*KKL)             &
-        +ZCOEFF(:,:,IKB+KKL  )*PTHLM(:,:,IKB+KKL  )             & 
-        +ZCOEFF(:,:,IKB      )*PTHLM(:,:,IKB      ))            &
-      *( ZCOEFF(:,:,IKB+2*KKL)*PRM(:,:,IKB+2*KKL,1)             &
-        +ZCOEFF(:,:,IKB+KKL  )*PRM(:,:,IKB+KKL,1  )             & 
-        +ZCOEFF(:,:,IKB      )*PRM(:,:,IKB  ,1    ))            &
+       ( ZCOEFF(JI,JJ,IKB+2*KKL)*PTHLM(JI,JJ,IKB+2*KKL)             &
+        +ZCOEFF(JI,JJ,IKB+KKL  )*PTHLM(JI,JJ,IKB+KKL  )             & 
+        +ZCOEFF(JI,JJ,IKB      )*PTHLM(JI,JJ,IKB      ))            &
+      *( ZCOEFF(JI,JJ,IKB+2*KKL)*PRM(JI,JJ,IKB+2*KKL,1)             &
+        +ZCOEFF(JI,JJ,IKB+KKL  )*PRM(JI,JJ,IKB+KKL,1  )             & 
+        +ZCOEFF(JI,JJ,IKB      )*PRM(JI,JJ,IKB  ,1    ))            &
       +PIMPL *                                                  &
-       ( ZCOEFF(:,:,IKB+2*KKL)*PTHLP(:,:,IKB+2*KKL)             &
-        +ZCOEFF(:,:,IKB+KKL  )*PTHLP(:,:,IKB+KKL  )             &
-        +ZCOEFF(:,:,IKB      )*PTHLP(:,:,IKB      ))            &
-      *( ZCOEFF(:,:,IKB+2*KKL)*PRP(:,:,IKB+2*KKL  )             &
-        +ZCOEFF(:,:,IKB+KKL  )*PRP(:,:,IKB+KKL    )             & 
-        +ZCOEFF(:,:,IKB      )*PRP(:,:,IKB        ))            &
-     ) 
+       ( ZCOEFF(JI,JJ,IKB+2*KKL)*PTHLP(JI,JJ,IKB+2*KKL)             &
+        +ZCOEFF(JI,JJ,IKB+KKL  )*PTHLP(JI,JJ,IKB+KKL  )             &
+        +ZCOEFF(JI,JJ,IKB      )*PTHLP(JI,JJ,IKB      ))            &
+      *( ZCOEFF(JI,JJ,IKB+2*KKL)*PRP(JI,JJ,IKB+2*KKL  )             &
+        +ZCOEFF(JI,JJ,IKB+KKL  )*PRP(JI,JJ,IKB+KKL    )             & 
+        +ZCOEFF(JI,JJ,IKB      )*PRP(JI,JJ,IKB        ))            &
+        )
+    END DO
     !    
     ZFLXZ(:,:,KKA) = ZFLXZ(:,:,IKB) 
     !
@@ -1199,11 +1216,15 @@ END DO
 #ifndef MNH_BITREP
     ZTMP1_DEVICE(:,:,:) = PPSI3(:,:,:)*PDR_DZ(:,:,:)**2
 #else
-    ZTMP1_DEVICE(:,:,:) = PPSI3(:,:,:)*BR_P2(PDR_DZ(:,:,:))
+    !$acc loop independent collapse(3)
+    DO CONCURRENT ( JI=1:JIU,JJ=1:JJU,JK=1:JKU)
+       ZTMP1_DEVICE(JI,JJ,JK) = PPSI3(JI,JJ,JK)*BR_P2(PDR_DZ(JI,JJ,JK))
+    END DO
 #endif
     !$acc end kernels
     CALL MZF_DEVICE(KKA,KKU,KKL,ZTMP1_DEVICE(:,:,:),ZTMP2_DEVICE(:,:,:))
     !$acc kernels
+    !$acc loop independent collapse(3)
     DO CONCURRENT ( JI=1:JIU,JJ=1:JJU,JK=1:JKU)
        ZF      (JI,JJ,JK) = XCTV*PLM(JI,JJ,JK)*PLEPS(JI,JJ,JK)*ZTMP2_DEVICE(JI,JJ,JK)
     END DO
@@ -1349,18 +1370,21 @@ END DO
     !$acc end kernels
     CALL DZM_DEVICE(KKA,KKU,KKL,ZTMP2_DEVICE(:,:,:),ZTMP3_DEVICE(:,:,:))
     !$acc kernels
+    !$acc loop independent collapse(3)
     DO CONCURRENT ( JI=1:JIU,JJ=1:JJU,JK=1:JKU)
        ZTMP2_DEVICE(JI,JJ,JK) = ZTMP1_DEVICE(JI,JJ,JK) * ZTMP3_DEVICE(JI,JJ,JK) / PDZZ(JI,JJ,JK)
     END DO
     !$acc end kernels
     CALL MZF_DEVICE(KKA,KKU,KKL,ZTMP2_DEVICE(:,:,:),ZTMP1_DEVICE(:,:,:))
     !$acc kernels
+    !$acc loop independent collapse(3)    
     DO CONCURRENT ( JI=1:JIU,JJ=1:JJU,JK=1:JKU)
        ZTMP2_DEVICE(JI,JJ,JK) = ZTMP3_DEVICE(JI,JJ,JK) / PDZZ(JI,JJ,JK)
     END DO
     !$acc end kernels
     CALL MZF_DEVICE(KKA,KKU,KKL,ZTMP2_DEVICE(:,:,:),ZTMP3_DEVICE(:,:,:))
     !$acc kernels
+    !$acc loop independent collapse(3)
     DO CONCURRENT ( JI=1:JIU,JJ=1:JJU,JK=1:JKU)
        ZFLXZ(JI,JJ,JK)   = ZF(JI,JJ,JK)                        &
             + PIMPL * XCTV*PLM(JI,JJ,JK)*PLEPS(JI,JJ,JK) * ZTMP1_DEVICE(JI,JJ,JK) &
@@ -1382,17 +1406,20 @@ END DO
         +ZCOEFF(:,:,IKB      )*PRP(:,:,IKB      ))**2           &
      ) 
 #else
-    ZFLXZ(:,:,IKB) = XCHV * PPSI3(:,:,IKB+KKL) * PLM(:,:,IKB)   &
-        * PLEPS(:,:,IKB)                                        &
+    !$acc loop independent collapse(2)
+    DO CONCURRENT ( JI=1:JIU,JJ=1:JJU )
+    ZFLXZ(JI,JJ,IKB) = XCHV * PPSI3(JI,JJ,IKB+KKL) * PLM(JI,JJ,IKB)   &
+        * PLEPS(JI,JJ,IKB)                                        &
     *( PEXPL *                                                  &
-       BR_P2( ZCOEFF(:,:,IKB+2*KKL)*PRM(:,:,IKB+2*KKL,1)        &
-        +ZCOEFF(:,:,IKB+KKL  )*PRM(:,:,IKB+KKL,1  )             & 
-        +ZCOEFF(:,:,IKB      )*PRM(:,:,IKB  ,1    ))            &
+       BR_P2( ZCOEFF(JI,JJ,IKB+2*KKL)*PRM(JI,JJ,IKB+2*KKL,1)        &
+        +ZCOEFF(JI,JJ,IKB+KKL  )*PRM(JI,JJ,IKB+KKL,1  )             & 
+        +ZCOEFF(JI,JJ,IKB      )*PRM(JI,JJ,IKB  ,1    ))            &
       +PIMPL *                                                  &
-       BR_P2( ZCOEFF(:,:,IKB+2*KKL)*PRP(:,:,IKB+2*KKL)          &
-        +ZCOEFF(:,:,IKB+KKL  )*PRP(:,:,IKB+KKL  )               &
-        +ZCOEFF(:,:,IKB      )*PRP(:,:,IKB      ))              &
-     ) 
+       BR_P2( ZCOEFF(JI,JJ,IKB+2*KKL)*PRP(JI,JJ,IKB+2*KKL)          &
+        +ZCOEFF(JI,JJ,IKB+KKL  )*PRP(JI,JJ,IKB+KKL  )               &
+        +ZCOEFF(JI,JJ,IKB      )*PRP(JI,JJ,IKB      ))              &
+        )
+    END DO
 #endif
     !
     ZFLXZ(:,:,KKA) = ZFLXZ(:,:,IKB) 
diff --git a/src/MNH/turb_ver_thermo_flux.f90 b/src/MNH/turb_ver_thermo_flux.f90
index a7f75d118..9d7775365 100644
--- a/src/MNH/turb_ver_thermo_flux.f90
+++ b/src/MNH/turb_ver_thermo_flux.f90
@@ -634,6 +634,7 @@ ZDFDDTDZ(:,:,:) = -XCSHF*ZKEFF(:,:,:)*D_PHI3DTDZ_O_DDTDZ(PPHI3,PREDTH1,PREDR1,PR
 #else
 CALL DZM_DEVICE(KKA,KKU,KKL,PTHLM,ZTMP1_DEVICE)
 !$acc kernels
+!$acc loop independent collapse(3)
 DO CONCURRENT ( JI=1:JIU,JJ=1:JJU,JK=1:JKU)
    ZF      (JI,JJ,JK) = -XCSHF*PPHI3(JI,JJ,JK)*ZKEFF(JI,JJ,JK)*ZTMP1_DEVICE(JI,JJ,JK)/PDZZ(JI,JJ,JK)
 END DO    
@@ -641,6 +642,7 @@ END DO
 !
 CALL D_PHI3DTDZ_O_DDTDZ(PPHI3,PREDTH1,PREDR1,PRED2TH3,PRED2THR3,HTURBDIM,GUSERV,ZTMP2_DEVICE)
 !$acc kernels
+!$acc loop independent collapse(3)
 DO CONCURRENT ( JI=1:JIU,JJ=1:JJU,JK=1:JKU)
    ZDFDDTDZ(JI,JJ,JK) = -XCSHF*ZKEFF(JI,JJ,JK)*ZTMP2_DEVICE(JI,JJ,JK)
 END DO   
@@ -815,6 +817,7 @@ ZTMP1_DEVICE(:,:,:) = PTHLP(:,:,:) - PTHLM(:,:,:)
 !$acc end kernels
 CALL DZM_DEVICE(KKA,KKU,KKL,ZTMP1_DEVICE,ZTMP2_DEVICE)
 !$acc kernels
+!$acc loop independent collapse(3)
 DO CONCURRENT ( JI=1:JIU,JJ=1:JJU,JK=1:JKU)
    ZFLXZ(JI,JJ,JK)   =  ZF(JI,JJ,JK) + PIMPL * ZDFDDTDZ(JI,JJ,JK) * ZTMP2_DEVICE(JI,JJ,JK) / PDZZ(JI,JJ,JK)
 END DO
@@ -862,6 +865,7 @@ END IF
 IF (KRR /= 0) THEN
   CALL MZM_DEVICE(PETHETA,ZTMP1_DEVICE)
 !$acc kernels
+!$acc loop independent collapse(3)  
 DO CONCURRENT ( JI=1:JIU,JJ=1:JJU,JK=1:JKU)  
   ZTMP2_DEVICE(JI,JJ,JK) = ZTMP1_DEVICE(JI,JJ,JK) * ZFLXZ(JI,JJ,JK)
 END DO
@@ -916,6 +920,7 @@ IF ( KRRL >= 1 ) THEN
 !$acc end kernels
     CALL MZM_DEVICE(ZTMP1_DEVICE,ZTMP2_DEVICE )
 !$acc kernels
+!$acc loop independent collapse(3)    
 DO CONCURRENT ( JI=1:JIU,JJ=1:JJU,JK=1:JKU)    
     ZTMP1_DEVICE(JI,JJ,JK) = ZTMP2_DEVICE(JI,JJ,JK)*ZFLXZ(JI,JJ,JK)/PDZZ(JI,JJ,JK)
 END DO
@@ -931,6 +936,7 @@ END DO
 !$acc end kernels
     CALL MZM_DEVICE(ZTMP1_DEVICE,ZTMP2_DEVICE )
 !$acc kernels
+!$acc loop independent collapse(3)    
 DO CONCURRENT ( JI=1:JIU,JJ=1:JJU,JK=1:JKU)
     ZTMP1_DEVICE(JI,JJ,JK) = ZTMP2_DEVICE(JI,JJ,JK)*ZFLXZ(JI,JJ,JK)/PDZZ(JI,JJ,JK)
 END DO
@@ -1007,8 +1013,12 @@ IF (LLES_CALL) THEN
   END IF
   !* diagnostic of mixing coefficient for heat
   CALL DZM_DEVICE(KKA,KKU,KKL,PTHLP,ZA)
-!$acc kernels
-  WHERE (ZA(:,:,:)==0.) ZA(:,:,:)=1.E-6
+  !$acc kernels
+  DO CONCURRENT ( JI=1:JIU,JJ=1:JJU,JK=1:JKU)
+     IF  (ZA(JI,JJ,JK)==0.) THEN
+        ZA(JI,JJ,JK)=1.E-6
+     END IF
+  END DO
   ZA(:,:,:) = - ZFLXZ(:,:,:) / ZA(:,:,:) * PDZZ(:,:,:)
   ZA(:,:,IKB) = XCSHF*PPHI3(:,:,IKB)*ZKEFF(:,:,IKB)
 !$acc end kernels
@@ -1047,6 +1057,7 @@ IF (KRR /= 0) THEN
 #else
   CALL DZM_DEVICE(KKA,KKU,KKL,PRM(:,:,:,1),ZTMP1_DEVICE)
   !$acc kernels
+  !$acc loop independent collapse(3)
   DO CONCURRENT ( JI=1:JIU,JJ=1:JJU,JK=1:JKU)
      ZF      (JI,JJ,JK) = -XCSHF*PPSI3(JI,JJ,JK)*ZKEFF(JI,JJ,JK)*ZTMP1_DEVICE(JI,JJ,JK)/PDZZ(JI,JJ,JK)
   END DO
@@ -1054,6 +1065,7 @@ IF (KRR /= 0) THEN
   CALL D_PSI3DRDZ_O_DDRDZ(PPSI3,PREDR1,PREDTH1,PRED2R3,PRED2THR3,HTURBDIM,GUSERV,ZTMP1_DEVICE)
 !CALL D_PHI3DRDZ_O_DDRDZ_DEVICE(PPSI3,PREDR1,PREDTH1,PRED2R3,PRED2THR3,HTURBDIM,GUSERV,ZTMP1_DEVICE)
   !$acc kernels
+  !$acc loop independent collapse(3)
   DO CONCURRENT ( JI=1:JIU,JJ=1:JJU,JK=1:JKU)
      ZDFDDRDZ(JI,JJ,JK) = -XCSHF*ZKEFF(JI,JJ,JK)*ZTMP1_DEVICE(JI,JJ,JK)
   END DO
@@ -1227,6 +1239,7 @@ IF (KRR /= 0) THEN
 !$acc end kernels
   CALL DZM_DEVICE(KKA,KKU,KKL,ZTMP1_DEVICE,ZTMP2_DEVICE)
 !$acc kernels
+!$acc loop independent collapse(3)  
 DO CONCURRENT ( JI=1:JIU,JJ=1:JJU,JK=1:JKU)  
    ZFLXZ(JI,JJ,JK)   = ZF(JI,JJ,JK) + PIMPL * ZDFDDRDZ(JI,JJ,JK) *ZTMP2_DEVICE(JI,JJ,JK) / PDZZ(JI,JJ,JK)
 END DO
@@ -1268,12 +1281,14 @@ END DO
 #else
   CALL MZM_DEVICE(PEMOIST,ZTMP1_DEVICE)
   !$acc kernels
+  !$acc loop independent collapse(3)
   DO CONCURRENT ( JI=1:JIU,JJ=1:JJU,JK=1:JKU)
      ZTMP2_DEVICE(JI,JJ,JK) = ZTMP1_DEVICE(JI,JJ,JK) * ZFLXZ(JI,JJ,JK)
   END DO
   !$acc end kernels
   CALL MZF_DEVICE(KKA,KKU,KKL, ZTMP2_DEVICE, ZTMP3_DEVICE )
   !$acc kernels
+  !$acc loop independent collapse(3)
   DO CONCURRENT ( JI=1:JIU,JJ=1:JJU,JK=1:JKU)
      ZA(JI,JJ,JK)   =  PBETA(JI,JJ,JK) * ZTMP3_DEVICE(JI,JJ,JK)
   END DO
diff --git a/src/Makefile.MESONH.mk b/src/Makefile.MESONH.mk
index f7b5994ea..c5ca2e753 100644
--- a/src/Makefile.MESONH.mk
+++ b/src/Makefile.MESONH.mk
@@ -490,7 +490,7 @@ ifeq "$(VER_CDF)" "CDFAUTO"
 DIR_CDFC?=${SRC_MESONH}/src/LIB/netcdf-${VERSION_CDFC}
 DIR_CDFCXX?=${SRC_MESONH}/src/LIB/netcdf-cxx-${VERSION_CDFCXX}
 DIR_CDFF?=${SRC_MESONH}/src/LIB/netcdf-fortran-${VERSION_CDFF}
-CDF_PATH?=${SRC_MESONH}/src/LIB/netcdf-${ARCH}-R${MNH_REAL}I${MNH_INT}
+CDF_PATH?=${OBJDIR_MASTER}/NETCDF-${VERSION_CDFF}
 CDF_MOD?=${CDF_PATH}/include/netcdf.mod
 #
 INC_NETCDF     ?= -I${CDF_PATH}/include
-- 
GitLab