diff --git a/src/ZSOLVER/turb_hor_dyn_corr.f90 b/src/ZSOLVER/turb_hor_dyn_corr.f90
index e8016fc6cdc97f1a82d0bf2300896ece94472b3c..17291f66ca1fa3eb874b5fd563e96b0521b7a80e 100644
--- a/src/ZSOLVER/turb_hor_dyn_corr.f90
+++ b/src/ZSOLVER/turb_hor_dyn_corr.f90
@@ -625,6 +625,7 @@ CALL MYF_DEVICE(PDYY(:,:,IKB:IKB),ZTMP4_DEVICE(:,:,1:1))
 !$acc wait(1)
 !
 !$acc kernels async(4) present_cr(ZFLX,ZDIRSINZW)
+!$mnh_expand_array(JI=1:JIU,JJ=1:JJU)
 !if !defined(MNH_BITREP) && !defined(MNH_BITREP_OMP)
 #if !defined(MNH_BITREP)
 ZFLX(:,:,IKB-1) =                                                            &
@@ -637,7 +638,6 @@ ZFLX(:,:,IKB-1) =                                                            &
     - PUSLOPEM(:,:) * PCOSSLOPE(:,:)**2 * ZDIRSINZW(:,:) * PDIRCOSZW(:,:)    )
 #else
 !PW: BUG: commented 'acc loop independent collapse(2)' to workaround compiler bug (NVHPC 21.1)
-!$mnh_expand_array(JI=1:JIU,JJ=1:JJU)
 ZFLX(:,:,IKB-1) =                                                             &
         PTAU11M(:,:) * BR_P2(PCOSSLOPE(:,:)) * BR_P2(PDIRCOSZW(:,:))          &
   -2. * PTAU12M(:,:) * PCOSSLOPE(:,:)* PSINSLOPE(:,:) * PDIRCOSZW(:,:)        & 
@@ -646,8 +646,8 @@ ZFLX(:,:,IKB-1) =                                                             &
   +2. * PCDUEFF(:,:) *      (                                                 &
       PVSLOPEM(:,:) * PCOSSLOPE(:,:)    * PSINSLOPE(:,:) * ZDIRSINZW(:,:)     &
       - PUSLOPEM(:,:) * BR_P2(PCOSSLOPE(:,:)) * ZDIRSINZW(:,:) * PDIRCOSZW(:,:) )
-!$mnh_end_expand_array()
 #endif
+!$mnh_end_expand_array()
 !$acc end kernels
 ! 
 !!! wait for the computation of ZFLX(:,:,IKB) and ZFLX(:,:,IKB-1)