diff --git a/src/MNH/adv_boundaries.f90 b/src/MNH/adv_boundaries.f90
index 6461d2736e2338487183620c35237f2d5dd06de6..4444e683f63864928ced635ff1e72a2079f5b96a 100644
--- a/src/MNH/adv_boundaries.f90
+++ b/src/MNH/adv_boundaries.f90
@@ -274,6 +274,7 @@ INTEGER             :: IKB       ! indice K Beginning in z direction
 INTEGER             :: IKE       ! indice K End       in z direction
 INTEGER             :: IIU, IJU  ! Index End in X and Y directions
 INTEGER             :: IIB,IIE,IJB,IJE ! interior domaine bound
+INTEGER             :: IFLAG     ! Variable to workaround a performance problem with PGI compiler (at least 16.4)
 !
 !-------------------------------------------------------------------------------
 !
@@ -287,6 +288,17 @@ IJU=SIZE(PFIELD,2)
 !
 IF (SIZE(PFIELD)==0) RETURN
 !
+SELECT CASE (HFIELD)
+  CASE ('U')
+    IFLAG = 1
+  CASE ('V')
+    IFLAG = 2
+  CASE ('W')
+    IFLAG = 3
+  CASE DEFAULT
+    IFLAG = 0
+END SELECT
+!
 !$acc kernels
 !
 !-------------------------------------------------------------------------------
@@ -297,8 +309,9 @@ IF (SIZE(PFIELD)==0) RETURN
 !*       2.1    COMPUTE THE FIELD EXTRAPOLATIONS AT THE GROUND
 !
 !
-   IF (HFIELD=='W') &
-   PFIELD  (:,:,IKB  )   = PFIELDI (:,:,IKB)
+   IF (IFLAG==3) THEN !HFIELD=='W'
+     PFIELD  (:,:,IKB  )   = PFIELDI (:,:,IKB)
+   END IF
 !
    PFIELD  (:,:,IKB-1)   = PFIELD  (:,:,IKB)
 
@@ -313,7 +326,7 @@ IF (SIZE(PFIELD)==0) RETURN
 !
   IF (HLBCX(1)=='OPEN' .AND. LWEST_ll()) THEN
      PFIELD(:IIB-1,:,:) = PFIELDI(:IIB-1,:,:)
-     IF (HFIELD=='U') &
+     IF (IFLAG==1) & !HFIELD=='U'
      PFIELD(:IIB,:,:) = PFIELDI(:IIB,:,:)
   END IF
   IF (HLBCX(2)=='OPEN' .AND. LEAST_ll()) THEN
@@ -321,7 +334,7 @@ IF (SIZE(PFIELD)==0) RETURN
   END IF
   IF (HLBCY(1)=='OPEN' .AND. LSOUTH_ll()) THEN
      PFIELD(:,:IJB-1,:) = PFIELDI(:,:IJB-1,:)
-     IF (HFIELD=='V') &
+     IF (IFLAG==2) & !HFIELD=='V'
      PFIELD(:,:IJB,:) = PFIELDI(:,:IJB,:)
   END IF
   IF (HLBCY(2)=='OPEN' .AND. LNORTH_ll()) THEN
@@ -330,7 +343,7 @@ IF (SIZE(PFIELD)==0) RETURN
 !
 !*       4. TOP BC for W
 !
-  IF (HFIELD=='W') PFIELD(:,:,IKE+1) = 0.
+  IF (IFLAG==3) PFIELD(:,:,IKE+1) = 0. !IF HFIELD=='W'
 !$acc end kernels
 !
 #if 0
diff --git a/src/MNH/fast_terms.f90 b/src/MNH/fast_terms.f90
index f7d1578d8b3e3337046db1590474e3e06ba28b4c..44783ef22e768a3848df989f844efbae3a87b033 100644
--- a/src/MNH/fast_terms.f90
+++ b/src/MNH/fast_terms.f90
@@ -233,6 +233,7 @@ INTEGER             :: ILENCH     ! Length of comment string in LFIFM file
 INTEGER             :: JK         ! Var for vertical DO loops
 INTEGER             :: JITER,ITERMAX  ! iterative loop for first order adjustment
 INTEGER             :: ILUOUT     ! Logical unit of output listing 
+LOGICAL,DIMENSION(SIZE(PRCS,1),SIZE(PRCS,2),SIZE(PRCS,3))::GWORK
 !-------------------------------------------------------------------------------
 !
 !*       1.     PRELIMINARIES
@@ -268,7 +269,8 @@ IF( NVERB>5) THEN
 END IF
 !
 !$acc kernels
-WHERE ( PRCS(:,:,:)+PRVS(:,:,:) < 0.)
+GWORK= PRCS(:,:,:)+PRVS(:,:,:) < 0.
+WHERE ( GWORK)
   PRVS(:,:,:) = -  PRCS(:,:,:)
 END WHERE
 !
diff --git a/src/MNH/rain_ice.f90 b/src/MNH/rain_ice.f90
index b30c13cc500b24933470b561c0d88332b8bd34d8..ace041532958db20731d1084be8cb780f3c862bf 100644
--- a/src/MNH/rain_ice.f90
+++ b/src/MNH/rain_ice.f90
@@ -2692,6 +2692,7 @@ REAL,DIMENSION(SIZE(ZZW1,1)) :: ZTMP
     ZZW(:) = UNPACK( VECTOR=ZVEC1(:),MASK=GRIM,FIELD=0.0 )
 #else
     ZZW(:) = 0.0
+!$acc loop independent
     DO JL=1,IGRIM
       ZZW(I1(JL)) = ZVEC1(JL)
     END DO
@@ -2725,6 +2726,7 @@ REAL,DIMENSION(SIZE(ZZW1,1)) :: ZTMP
     ZZW(:) = UNPACK( VECTOR=ZVEC1(:),MASK=GRIM,FIELD=0.0 )
 #else
     ZZW(:) = 0.0
+!$acc loop independent
     DO JL=1,IGRIM
       ZZW(I1(JL)) = ZVEC1(JL)
     END DO
@@ -2850,6 +2852,7 @@ REAL,DIMENSION(SIZE(ZZW1,1)) :: ZTMP
 !        5.2.3  perform the bilinear interpolation of the normalized
 !               RACCSS-kernel
 !
+!$acc loop independent
     DO JJ = 1,IGACC
       ZVEC3(JJ) =  (  XKER_RACCSS(IVEC1(JJ)+1,IVEC2(JJ)+1)* ZVEC2(JJ)          &
                     - XKER_RACCSS(IVEC1(JJ)+1,IVEC2(JJ)  )*(ZVEC2(JJ) - 1.0) ) &
@@ -2862,6 +2865,7 @@ REAL,DIMENSION(SIZE(ZZW1,1)) :: ZTMP
     ZZW(:) = UNPACK( VECTOR=ZVEC3(:),MASK=GACC,FIELD=0.0 )
 #else
     ZZW(:) = 0.0
+!$acc loop independent
     DO JL=1,IGACC
       ZZW(I1(JL)) = ZVEC3(JL)
     END DO
@@ -2915,9 +2919,10 @@ REAL,DIMENSION(SIZE(ZZW1,1)) :: ZTMP
     !OK on GPU:
     ZTMP(:) = ZZW1(:,2)
     ZZW1(:,2) = 0.0
+!$acc loop independent
     DO JL=1,IGACC
-    ZZW1(I1(JL),2) = ZTMP(I1(JL))*ZVEC3(JL)
-END DO
+      ZZW1(I1(JL),2) = ZTMP(I1(JL))*ZVEC3(JL)
+    END DO
 #endif
                                                                        !! RRACCS!
 !        5.2.5  perform the bilinear interpolation of the normalized
@@ -2935,6 +2940,7 @@ END DO
     ZZW(:) = UNPACK( VECTOR=ZVEC3(:),MASK=GACC,FIELD=0.0 )
 #else
     ZZW(:) = 0.0
+!$acc loop independent
     DO JL=1,IGACC
       ZZW(I1(JL)) = ZVEC3(JL)
     END DO
@@ -3196,6 +3202,7 @@ INTEGER,DIMENSION(:),ALLOCATABLE :: I1
 !*       6.2.5  perform the bilinear interpolation of the normalized
 !               SDRYG-kernel
 !
+!$acc loop independent
     DO JJ = 1,IGDRY
       ZVEC3(JJ) =  (  XKER_SDRYG(IVEC1(JJ)+1,IVEC2(JJ)+1)* ZVEC2(JJ)          &
                     - XKER_SDRYG(IVEC1(JJ)+1,IVEC2(JJ)  )*(ZVEC2(JJ) - 1.0) ) &
@@ -3208,6 +3215,7 @@ INTEGER,DIMENSION(:),ALLOCATABLE :: I1
     ZZW(:) = UNPACK( VECTOR=ZVEC3(:),MASK=GDRY,FIELD=0.0 )
 #else
     ZZW(:) = 0.0
+!$acc loop independent
     DO JL=1,IGDRY
       ZZW(I1(JL)) = ZVEC3(JL)
     END DO
@@ -3302,6 +3310,7 @@ INTEGER,DIMENSION(:),ALLOCATABLE :: I1
 !*       6.2.10 perform the bilinear interpolation of the normalized
 !               RDRYG-kernel
 !
+!$acc loop independent
     DO JJ = 1,IGDRY
       ZVEC3(JJ) =  (  XKER_RDRYG(IVEC1(JJ)+1,IVEC2(JJ)+1)* ZVEC2(JJ)          &
                     - XKER_RDRYG(IVEC1(JJ)+1,IVEC2(JJ)  )*(ZVEC2(JJ) - 1.0) ) &
@@ -3314,6 +3323,7 @@ INTEGER,DIMENSION(:),ALLOCATABLE :: I1
     ZZW(:) = UNPACK( VECTOR=ZVEC3(:),MASK=GDRY,FIELD=0.0 )
 #else
     ZZW(:) = 0.0
+!$acc loop independent
     DO JL=1,IGDRY
       ZZW(I1(JL)) = ZVEC3(JL)
     END DO
diff --git a/src/MNH/tm06_h.f90 b/src/MNH/tm06_h.f90
index 6426b82689057464de798b2d22131e1aef32db84..a768e3f5f2bdfce6377fc2b6e1922fc4b2b439a6 100644
--- a/src/MNH/tm06_h.f90
+++ b/src/MNH/tm06_h.f90
@@ -102,7 +102,8 @@ INTEGER                                  :: JK     ! loop counter
 REAL, DIMENSION(SIZE(PZZ,1),SIZE(PZZ,2)) :: ZFLXZMIN ! minimum of temperature flux 
 REAL, DIMENSION(SIZE(PZZ,1),SIZE(PZZ,2)) :: ZBL_DEPTH! BL depth at previous time-step
 REAL                                     :: ZGROWTH  ! maximum BL growth rate
-!$acc declare create(ZFLXZMIN,ZBL_DEPTH)
+LOGICAL, DIMENSION(SIZE(PZZ,1),SIZE(PZZ,2)) :: GWORK
+!$acc declare create(ZFLXZMIN,ZBL_DEPTH,GWORK)
 !----------------------------------------------------------------------------
 !
 #ifdef _OPENACC
@@ -124,7 +125,8 @@ ZFLXZMIN (:,:) = PFLXZ(:,:,KKB)
 !TODO BUG: paralleliser la boucle aevc OpenACC. Avec pgi14.9/16.07, le DO...WHERE..ENDWHERE...ENDDO ne compile pas
 DO JK=KKTB,KKTE
 !$acc kernels
-  WHERE (PFLXZ(:,:,KKB)>0. .AND. PFLXZ(:,:,JK)<ZFLXZMIN(:,:))
+  GWORK(:,:) = PFLXZ(:,:,KKB)>0. .AND. PFLXZ(:,:,JK)<ZFLXZMIN(:,:)
+  WHERE (GWORK(:,:))
     PBL_DEPTH(:,:) = PZZ  (:,:,JK) - PZZ(:,:,KKB)
     ZFLXZMIN (:,:) = PFLXZ(:,:,JK)
   END WHERE
diff --git a/src/MNH/tridiag_thermo.f90 b/src/MNH/tridiag_thermo.f90
index 2019c088a111c452e0ae903965a7d31def938906..6f7cc20793cf0c2f89961783b1b210b5bb38f9bc 100644
--- a/src/MNH/tridiag_thermo.f90
+++ b/src/MNH/tridiag_thermo.f90
@@ -190,7 +190,7 @@ REAL, DIMENSION(SIZE(PVARM,1),SIZE(PVARM,2),SIZE(PVARM,3))  :: ZY ,ZGAM
                                          ! RHS of the equation, 3D work array
 REAL, DIMENSION(SIZE(PVARM,1),SIZE(PVARM,2))                :: ZBET
                                          ! 2D work array
-INTEGER             :: JK            ! loop counter
+INTEGER             :: JI,JJ,JK      ! loop counter
 INTEGER             :: IKB,IKE       ! inner vertical limits
 INTEGER             :: IKT          ! array size in k direction
 INTEGER             :: IKTB,IKTE    ! start, end of k loops in physical domain 
@@ -308,20 +308,29 @@ IF ( PIMPL > 1.E-10 ) THEN
 
   !
   DO JK = IKB+KKL,IKE-KKL,KKL
-    ZGAM(:,:,JK) = ZC(:,:,JK-KKL) / ZBET(:,:)  
+!$acc loop collapse(2) independent
+    DO JJ=1,SIZE(ZGAM,2)
+      DO JI=1,SIZE(ZGAM,1)
+        ZGAM(JI,JJ,JK) = ZC(JI,JJ,JK-KKL) / ZBET(JI,JJ)  
                                                     ! gam(k) = c(k-1) / bet
-    ZBET(:,:)    = ZB(:,:,JK) - ZA(:,:,JK) * ZGAM(:,:,JK)
+        ZBET(JI,JJ)    = ZB(JI,JJ,JK) - ZA(JI,JJ,JK) * ZGAM(JI,JJ,JK)
                                                     ! bet = b(k) - a(k)* gam(k)  
-    PVARP(:,:,JK)= ( ZY(:,:,JK) - ZA(:,:,JK) * PVARP(:,:,JK-KKL) ) / ZBET(:,:)
+        PVARP(JI,JJ,JK)= ( ZY(JI,JJ,JK) - ZA(JI,JJ,JK) * PVARP(JI,JJ,JK-KKL) ) / ZBET(JI,JJ)
                                         ! res(k) = (y(k) -a(k)*res(k-1))/ bet 
+      END DO
+    END DO
   END DO 
   ! special treatment for the last level
-  ZGAM(:,:,IKE) = ZC(:,:,IKE-KKL) / ZBET(:,:) 
+  DO JJ=1,SIZE(ZGAM,2)
+    DO JI=1,SIZE(ZGAM,1)
+      ZGAM(JI,JJ,IKE) = ZC(JI,JJ,IKE-KKL) / ZBET(JI,JJ) 
                                                     ! gam(k) = c(k-1) / bet
-  ZBET(:,:)     = ZB(:,:,IKE) - ZA(:,:,IKE) * ZGAM(:,:,IKE)
+      ZBET(JI,JJ)     = ZB(JI,JJ,IKE) - ZA(JI,JJ,IKE) * ZGAM(JI,JJ,IKE)
                                                     ! bet = b(k) - a(k)* gam(k)  
-  PVARP(:,:,IKE)= ( ZY(:,:,IKE) - ZA(:,:,IKE) * PVARP(:,:,IKE-KKL) ) / ZBET(:,:)
+      PVARP(JI,JJ,IKE)= ( ZY(JI,JJ,IKE) - ZA(JI,JJ,IKE) * PVARP(JI,JJ,IKE-KKL) ) / ZBET(JI,JJ)
                                        ! res(k) = (y(k) -a(k)*res(k-1))/ bet 
+    END DO
+  END DO
 !
 !*       3.3 going down
 !            ----------
diff --git a/src/MNH/tridiag_tke.f90 b/src/MNH/tridiag_tke.f90
index c2cb749a103d7fa8e2f57bd5a50e8f0343edcf3f..389a1a7a7517810122f5de4a16816a0c88510fdd 100644
--- a/src/MNH/tridiag_tke.f90
+++ b/src/MNH/tridiag_tke.f90
@@ -176,7 +176,7 @@ REAL, DIMENSION(SIZE(PVARM,1),SIZE(PVARM,2),SIZE(PVARM,3))  :: ZY ,ZGAM
 REAL, DIMENSION(SIZE(PVARM,1),SIZE(PVARM,2))                :: ZBET
                                          ! 2D work array
 !$acc declare create(ZY,ZGAM,ZBET)
-INTEGER             :: JK            ! loop counter
+INTEGER             :: JI,JJ,JK      ! loop counters
 INTEGER             :: IKB,IKE       ! inner vertical limits
 INTEGER             :: IKT          ! array size in k direction
 INTEGER             :: IKTB,IKTE    ! start, end of k loops in physical domain
@@ -224,17 +224,22 @@ IF ( PIMPL > 1.E-10 ) THEN
   PVARP(:,:,IKB) = ZY(:,:,IKB) / ZBET(:,:)                
   !
   DO JK = IKB+KKL,IKE-KKL,KKL
-      ZGAM(:,:,JK) = PIMPL * PA(:,:,JK) / PRHODJ(:,:,JK-KKL) / ZBET(:,:)  
+!$acc loop collapse(2) independent
+    DO JJ=1,SIZE(ZGAM,2)
+      DO JI=1,SIZE(ZGAM,1)
+        ZGAM(JI,JJ,JK) = PIMPL * PA(JI,JJ,JK) / PRHODJ(JI,JJ,JK-KKL) / ZBET(JI,JJ)  
                                                     ! gam(k) = c(k-1) / bet
-    ZBET(:,:)    = 1. + PIMPL * ( PDIAG(:,:,JK) -                     &
-                                 (  PA(:,:,JK) * (1. + ZGAM(:,:,JK))  &
-                                  + PA(:,:,JK+KKL)                      &
-                                 ) / PRHODJ(:,:,JK)                   &
+        ZBET(JI,JJ)    = 1. + PIMPL * ( PDIAG(JI,JJ,JK) -                 &
+                                 (  PA(JI,JJ,JK) * (1. + ZGAM(JI,JJ,JK))  &
+                                  + PA(JI,JJ,JK+KKL)                      &
+                                 ) / PRHODJ(JI,JJ,JK)                     &
                                 )                   ! bet = b(k) - a(k)* gam(k)  
-    PVARP(:,:,JK)= ( ZY(:,:,JK) - PIMPL * PA(:,:,JK) / PRHODJ(:,:,JK) &
-                    * PVARP(:,:,JK-KKL)                                 &
-                   ) / ZBET(:,:)
+        PVARP(JI,JJ,JK)= ( ZY(JI,JJ,JK) - PIMPL * PA(JI,JJ,JK) / PRHODJ(JI,JJ,JK) &
+                    * PVARP(JI,JJ,JK-KKL)                                         &
+                   ) / ZBET(JI,JJ)
                                         ! res(k) = (y(k) -a(k)*res(k-1))/ bet 
+      END DO
+    END DO
   END DO 
   ! special treatment for the last level
   ZGAM(:,:,IKE) = PIMPL * PA(:,:,IKE) / PRHODJ(:,:,IKE-KKL) / ZBET(:,:) 
diff --git a/src/MNH/tridiag_wind.f90 b/src/MNH/tridiag_wind.f90
index 4550c6904e148b1de246df76cf34e01d8511a1e3..4a4a3092b49d260835a48cdf9d5ce51ff1302afe 100644
--- a/src/MNH/tridiag_wind.f90
+++ b/src/MNH/tridiag_wind.f90
@@ -181,7 +181,7 @@ REAL, DIMENSION(SIZE(PVARM,1),SIZE(PVARM,2),SIZE(PVARM,3))  :: ZY ,ZGAM
                                          ! RHS of the equation, 3D work array
 REAL, DIMENSION(SIZE(PVARM,1),SIZE(PVARM,2))                :: ZBET
                                          ! 2D work array
-INTEGER             :: JK            ! loop counter
+INTEGER             :: JI,JJ,JK      ! loop counters
 INTEGER             :: IKB,IKE       ! inner vertical limits
 INTEGER             :: IKT          ! array size in k direction
 INTEGER             :: IKTB,IKTE    ! start, end of k loops in physical domain 
@@ -251,16 +251,21 @@ IF ( PIMPL > 1.E-10 ) THEN
   PVARP(:,:,IKB) = ZY(:,:,IKB) / ZBET(:,:)                
   !
   DO JK = IKB+KKL,IKE-KKL,KKL
-    ZGAM(:,:,JK) = PIMPL * PA(:,:,JK) / PRHODJA(:,:,JK-KKL) / ZBET(:,:)  
+!$acc loop collapse(2) independent
+    DO JJ=1,SIZE(ZGAM,2)
+      DO JI=1,SIZE(ZGAM,1)
+        ZGAM(JI,JJ,JK) = PIMPL * PA(JI,JJ,JK) / PRHODJA(JI,JJ,JK-KKL) / ZBET(JI,JJ)  
                                                     ! gam(k) = c(k-1) / bet
-    ZBET(:,:)    = 1. - PIMPL * (  PA(:,:,JK) * (1. + ZGAM(:,:,JK))  &
-                                 + PA(:,:,JK+KKL)                      &
-                                ) / PRHODJA(:,:,JK)  
+        ZBET(JI,JJ)    = 1. - PIMPL * (  PA(JI,JJ,JK) * (1. + ZGAM(JI,JJ,JK))  &
+                                 + PA(JI,JJ,JK+KKL)                      &
+                                ) / PRHODJA(JI,JJ,JK)  
                                                     ! bet = b(k) - a(k)* gam(k)  
-    PVARP(:,:,JK)= ( ZY(:,:,JK) - PIMPL * PA(:,:,JK) / PRHODJA(:,:,JK) &
-                    * PVARP(:,:,JK-KKL)                                  &
-                   ) / ZBET(:,:)
+        PVARP(JI,JJ,JK)= ( ZY(JI,JJ,JK) - PIMPL * PA(JI,JJ,JK) / PRHODJA(JI,JJ,JK) &
+                    * PVARP(JI,JJ,JK-KKL)                                  &
+                   ) / ZBET(JI,JJ)
                                         ! res(k) = (y(k) -a(k)*res(k-1))/ bet 
+      END DO
+    END DO
   END DO
   ! special treatment for the last level
   ZGAM(:,:,IKE) = PIMPL * PA(:,:,IKE) / PRHODJA(:,:,IKE-KKL) / ZBET(:,:) 
diff --git a/src/MNH/turb.f90 b/src/MNH/turb.f90
index 41093060adac3b18facc2cc0f07ad14ebb7c7025..b7973ae35387fe506912cbf4d30415e330d8c403 100644
--- a/src/MNH/turb.f90
+++ b/src/MNH/turb.f90
@@ -1685,6 +1685,7 @@ REAL, DIMENSION(:,:,:), INTENT(OUT)   :: PLM
 !*       0.2   Declarations of local variables
 !
 REAL                :: ZD           ! distance to the surface
+REAL                :: ZVAR         ! Intermediary variable
 REAL, DIMENSION(SIZE(PUT,1),SIZE(PUT,2)) ::   ZWORK2D
 !
 REAL, DIMENSION(SIZE(PTHLT,1),SIZE(PTHLT,2),SIZE(PTHLT,3)) ::     &
@@ -1703,9 +1704,10 @@ REAL, DIMENSION(SIZE(PTHLT,1),SIZE(PTHLT,2),SIZE(PTHLT,3)) :: ZTMP1_DEVICE,ZTMP2
 !
 !   initialize the mixing length with the mesh grid
 !$acc kernels
-DO JK = IKTB,IKTE ! 1D turbulence scheme
-  PLM(:,:,JK) = PZZ(:,:,JK+KKL) - PZZ(:,:,JK)
-END DO
+PLM(:,:,IKTB:IKTE) = PZZ(:,:,IKTB+KKL:IKTE+KKL) - PZZ(:,:,IKTB:IKTE)
+!DO JK = IKTB,IKTE ! 1D turbulence scheme
+!  PLM(:,:,JK) = PZZ(:,:,JK+KKL) - PZZ(:,:,JK)
+!END DO
 PLM(:,:,KKU) = PLM(:,:,IKE)
 PLM(:,:,KKA) = PZZ(:,:,IKB) - PZZ(:,:,KKA)
 !$acc end kernels
@@ -1760,24 +1762,43 @@ CALL EMOIST(KRR,KRRI,PTHLT,PRT,ZLOCPEXNM,ZAMOIST,PSRCT,ZEMOIST)
 #endif
 !
 !$acc kernels present(PDIRCOSZW,PDZZ,PRT,PTHLT,PTHVREF,PTHLT,PZZ,PTKET)
-DO JK = IKTB+1,IKTE-1
-      ZDTHLDZ(:,:,JK)= 0.5*((PTHLT(:,:,JK+KKL)-PTHLT(:,:,JK))/PDZZ(:,:,JK+KKL)+      &
-                         (PTHLT(:,:,JK)-PTHLT(:,:,JK-KKL))/PDZZ(:,:,JK))
 ! For dry simulations
-IF (KRR>0) THEN    
-        ZDRTDZ(:,:,JK)= 0.5*((PRT(:,:,JK+KKL,1)-PRT(:,:,JK,1))/PDZZ(:,:,JK+KKL)+      &
-                         (PRT(:,:,JK,1)-PRT(:,:,JK-KKL,1))/PDZZ(:,:,JK))
+IF (KRR>0) THEN
+!$acc loop independent collapse(3)
+  DO JK = IKTB+1,IKTE-1
+    DO JJ=1,SIZE(PUT,2)
+      DO JI=1,SIZE(PUT,1)
+        ZDTHLDZ(JI,JJ,JK)= 0.5*((PTHLT(JI,JJ,JK+KKL)-PTHLT(JI,JJ,JK    ))/PDZZ(JI,JJ,JK+KKL)+ &
+                                (PTHLT(JI,JJ,JK    )-PTHLT(JI,JJ,JK-KKL))/PDZZ(JI,JJ,JK    ))
+        ZDRTDZ(JI,JJ,JK) = 0.5*((PRT(JI,JJ,JK+KKL,1)-PRT(JI,JJ,JK    ,1))/PDZZ(JI,JJ,JK+KKL)+ &
+                                (PRT(JI,JJ,JK    ,1)-PRT(JI,JJ,JK-KKL,1))/PDZZ(JI,JJ,JK    ))
+        ZVAR=XG/PTHVREF(JI,JJ,JK)*                                                  &
+             (ZETHETA(JI,JJ,JK)*ZDTHLDZ(JI,JJ,JK)+ZEMOIST(JI,JJ,JK)*ZDRTDZ(JI,JJ,JK))
+        !
+        IF (ZVAR>0.) THEN
+          PLM(JI,JJ,JK)=MAX(XMNH_EPSILON,MIN(PLM(JI,JJ,JK), &
+                        0.76* SQRT(PTKET(JI,JJ,JK)/ZVAR)))
+        END IF
+      END DO
+    END DO
+  END DO
 ELSE
-       ZDRTDZ(:,:,JK)=0
-ENDIF
-       ZWORK2D(:,:)=XG/PTHVREF(:,:,JK)*                                           &
-            (ZETHETA(:,:,JK)*ZDTHLDZ(:,:,JK)+ZEMOIST(:,:,JK)*ZDRTDZ(:,:,JK))
-  !
-  WHERE(ZWORK2D(:,:)>0.)
-    PLM(:,:,JK)=MAX(XMNH_EPSILON,MIN(PLM(:,:,JK),                &
-                    0.76* SQRT(PTKET(:,:,JK)/ZWORK2D(:,:))))
-  END WHERE
-END DO
+!$acc loop independent collapse(3)
+  DO JK = IKTB+1,IKTE-1
+    DO JJ=1,SIZE(PUT,2)
+      DO JI=1,SIZE(PUT,1)
+        ZDTHLDZ(JI,JJ,JK)= 0.5*((PTHLT(JI,JJ,JK+KKL)-PTHLT(JI,JJ,JK    ))/PDZZ(JI,JJ,JK+KKL)+ &
+                                (PTHLT(JI,JJ,JK    )-PTHLT(JI,JJ,JK-KKL))/PDZZ(JI,JJ,JK    ))
+        ZVAR=XG/PTHVREF(JI,JJ,JK)*ZETHETA(JI,JJ,JK)*ZDTHLDZ(JI,JJ,JK)
+        !
+        IF (ZVAR>0.) THEN
+          PLM(JI,JJ,JK)=MAX(XMNH_EPSILON,MIN(PLM(JI,JJ,JK), &
+                        0.76* SQRT(PTKET(JI,JJ,JK)/ZVAR)))
+        END IF
+      END DO
+    END DO
+  END DO
+END IF
 !  special case near the surface 
 ZDTHLDZ(:,:,IKB)=(PTHLT(:,:,IKB+KKL)-PTHLT(:,:,IKB))/PDZZ(:,:,IKB+KKL)
 ! For dry simulations