From 6d46c9de8e637db533e3ab211d33962cf9ad1be2 Mon Sep 17 00:00:00 2001
From: Philippe WAUTELET <philippe.wautelet@aero.obs-mip.fr>
Date: Tue, 18 Jan 2022 13:28:54 +0100
Subject: [PATCH] Philippe 18/01/2022: OpenACC: Cray compiler does not support
 directives with kernels and loop in the same one

---
 src/ZSOLVER/pressurez.f90                     | 20 +++++++++---
 .../communication.f90                         |  4 ++-
 .../datatypes.f90                             |  3 +-
 .../discretisation.f90                        |  7 ++++-
 .../multigrid.f90                             |  6 ++--
 .../profiles.f90                              | 15 ++++++---
 src/ZSOLVER/zsolver_inv.f90                   | 31 ++++++++++---------
 7 files changed, 58 insertions(+), 28 deletions(-)

diff --git a/src/ZSOLVER/pressurez.f90 b/src/ZSOLVER/pressurez.f90
index 4be2ba2ad..2d9b879ea 100644
--- a/src/ZSOLVER/pressurez.f90
+++ b/src/ZSOLVER/pressurez.f90
@@ -798,8 +798,11 @@ CALL GX_M_U_DEVICE(1,IKU,1,ZPHIT,PDXX,PDZZ,PDZX,ZDV_SOURCE)
 IF ( GWEST ) THEN
 !!!!!!!!!!!!!!!!  FUJI  compiler directive !!!!!!!!!!
 !!!!!!!!!!!!!!!!  FUJI  compiler directive !!!!!!!!!!
+   !$acc kernels async
 #ifdef MNH_COMPILER_NVHPC
-   !$acc kernels loop independent collapse(2) async
+   !$acc loop independent collapse(2)
+#else
+   !$acc loop independent
 #endif
    DO CONCURRENT (JJ=1:IJU , JK=2:IKU-1)
       ZDV_SOURCE(IIB,JJ,JK)=                                                    &
@@ -813,8 +816,11 @@ IF ( GWEST ) THEN
 ENDIF
   !
 IF( GEAST ) THEN
+   !$acc kernels async
 #ifdef MNH_COMPILER_NVHPC
-   !$acc kernels loop independent collapse(2) async
+   !$acc loop independent collapse(2)
+#else
+   !$acc loop independent
 #endif
    DO CONCURRENT (JJ=1:IJU , JK=2:IKU-1)
       ZDV_SOURCE(IIE+1,JJ,JK)=                                                   &
@@ -863,8 +869,11 @@ IF(.NOT. L2D) THEN
    IF ( GSOUTH ) THEN
 !!!!!!!!!!!!!!!!  FUJI  compiler directive !!!!!!!!!!
 !!!!!!!!!!!!!!!!  FUJI  compiler directive !!!!!!!!!!
+      !$acc kernels async
 #ifdef MNH_COMPILER_NVHPC
-      !$acc kernels loop independent collapse(2) async
+      !$acc loop independent collapse(2)
+#else
+      !$acc loop independent
 #endif
       DO CONCURRENT (JI=1:IIU , JK=2:IKU-1)
          ZDV_SOURCE(JI,IJB,JK)=                                                  &
@@ -878,8 +887,11 @@ IF(.NOT. L2D) THEN
    END IF
    !
    IF ( GNORTH ) THEN
+      !$acc kernels async
 #ifdef MNH_COMPILER_NVHPC
-      !$acc kernels loop independent collapse(2) async
+      !$acc loop independent collapse(2)
+#else
+      !$acc loop independent
 #endif
       DO CONCURRENT (JI=1:IIU , JK=2:IKU-1) 
          ZDV_SOURCE(JI,IJE+1,JK)=                                                &
diff --git a/src/ZSOLVER/tensorproductmultigrid_Source/communication.f90 b/src/ZSOLVER/tensorproductmultigrid_Source/communication.f90
index d95cde8d4..6b86c4adf 100644
--- a/src/ZSOLVER/tensorproductmultigrid_Source/communication.f90
+++ b/src/ZSOLVER/tensorproductmultigrid_Source/communication.f90
@@ -549,7 +549,8 @@ contains
            local_sumt = 0.0_rl
            za_st => a%st
            zb_st => b%st
-           !$acc kernels loop collapse(3)
+           !$acc kernels
+           !$acc loop collapse(3)
            do iz=0,nz+1
               do iy=a%icompy_min,a%icompy_max
                  do ix=a%icompx_min,a%icompx_max
@@ -558,6 +559,7 @@ contains
                  end do
               end do
            end do
+           !$acc end kernels
         end if
       else
         if (LUseO) local_sum = 0.0_rl
diff --git a/src/ZSOLVER/tensorproductmultigrid_Source/datatypes.f90 b/src/ZSOLVER/tensorproductmultigrid_Source/datatypes.f90
index e4c2a3703..f99505180 100644
--- a/src/ZSOLVER/tensorproductmultigrid_Source/datatypes.f90
+++ b/src/ZSOLVER/tensorproductmultigrid_Source/datatypes.f90
@@ -360,7 +360,8 @@ private
        
        if (LUseT) then
           zphi_st => phi%st
-          !$acc kernels loop collapse(3)
+          !$acc kernels
+          !$acc loop collapse(3)
           do iz=1,nz
              do iy=1,nlocaly
                 do ix=1,nlocalx
diff --git a/src/ZSOLVER/tensorproductmultigrid_Source/discretisation.f90 b/src/ZSOLVER/tensorproductmultigrid_Source/discretisation.f90
index b81b9dc95..fa4092f54 100644
--- a/src/ZSOLVER/tensorproductmultigrid_Source/discretisation.f90
+++ b/src/ZSOLVER/tensorproductmultigrid_Source/discretisation.f90
@@ -1578,7 +1578,10 @@ end subroutine construct_vertical_coeff
     if (LUseO) u0(:,:,:) = u%s(:,:,:)
     if (LUseT) then
        zu_st =>  u%st
-       !$acc kernels loop independent collapse(3)
+       !$acc kernels
+#ifdef MNH_COMPILER_NVHPC
+       !$acc loop independent collapse(3)
+#endif
        do concurrent (ii=nib:nie,ij=njb:nje,ik=nkb:nke)
           ut0(ii,ij,ik) = zu_st(ii,ij,ik)
        end do
@@ -1674,7 +1677,9 @@ end subroutine construct_vertical_coeff
                   Sut0, &
                   Sutmp,level )
        !$acc kernels
+#ifdef MNH_COMPILER_NVHPC
        !$acc loop independent collapse(3)
+#endif
        do concurrent ( ix=iib:iie,iy=ijb:ije,iz=1:nz )
           zu_st(ix,iy,iz) = & 
                rho*zSutmp_st(ix,iy,iz) & 
diff --git a/src/ZSOLVER/tensorproductmultigrid_Source/multigrid.f90 b/src/ZSOLVER/tensorproductmultigrid_Source/multigrid.f90
index b43fa68dd..6b55f0c13 100644
--- a/src/ZSOLVER/tensorproductmultigrid_Source/multigrid.f90
+++ b/src/ZSOLVER/tensorproductmultigrid_Source/multigrid.f90
@@ -503,7 +503,8 @@ contains
       if (LUseT) then 
          zphifine_st => phifine%st
          zphicoarse_st => phicoarse%st
-         !$acc kernels loop independent  collapse(3)
+         !$acc kernels
+         !$acc loop independent  collapse(3)
          do iz=1,nz
             do iy=iy_min,iy_max
                do ix=ix_min,ix_max
@@ -835,7 +836,8 @@ contains
          zphifine_st => phifine%st
          zphicoarse_st => phicoarse%st
 
-         !$acc kernels loop independent  collapse(5)
+         !$acc kernels
+         !$acc loop independent  collapse(5)
          do iz=1,nz 
             do diy = -1,0
                do dix = -1,0
diff --git a/src/ZSOLVER/tensorproductmultigrid_Source/profiles.f90 b/src/ZSOLVER/tensorproductmultigrid_Source/profiles.f90
index 70e7508d5..3fb50577b 100644
--- a/src/ZSOLVER/tensorproductmultigrid_Source/profiles.f90
+++ b/src/ZSOLVER/tensorproductmultigrid_Source/profiles.f90
@@ -88,7 +88,8 @@ private
         end if
         if (LUseT) then
            zu_st => u%st
-           !$acc kernels loop independent collapse(3)
+           !$acc kernels
+           !$acc loop independent collapse(3)
            do iz=1,nz
               do iy=iy_min, iy_max
                  do ix=ix_min, ix_max
@@ -113,7 +114,8 @@ private
        end if
        if (LUseT) then
           zu_st => u%st
-          !$acc kernels loop independent collapse(3)
+          !$acc kernels
+          !$acc loop independent collapse(3)
           do iz=1,nz
              do iy=iy_min, iy_max
                 do ix=ix_min, ix_max
@@ -162,7 +164,8 @@ private
           end do
        else
           zu_st => u%st
-          !$acc kernels loop independent collapse(3)
+          !$acc kernels
+          !$acc loop independent collapse(3)
           do iz=1,nz
              do iy=iy_min, iy_max
                 do ix=ix_min, ix_max
@@ -225,7 +228,8 @@ private
        endif
        if (LUseT) then
           zb_st => b%st
-          !$acc kernels loop independent collapse(3)
+          !$acc kernels
+          !$acc loop independent collapse(3)
           do iz=1,nz
              do iy=iy_min, iy_max
                 do ix=ix_min, ix_max
@@ -262,7 +266,8 @@ private
        end if
        if (LUseT) then
           zb_st => b%st
-          !$acc kernels loop independent collapse(3)
+          !$acc kernels
+          !$acc loop independent collapse(3)
           do iz=1,nz 
              do iy=iy_min, iy_max
                 do ix=ix_min, ix_max
diff --git a/src/ZSOLVER/zsolver_inv.f90 b/src/ZSOLVER/zsolver_inv.f90
index d15dcc1ca..ceaa670be 100644
--- a/src/ZSOLVER/zsolver_inv.f90
+++ b/src/ZSOLVER/zsolver_inv.f90
@@ -1,13 +1,8 @@
-!MNH_LIC Copyright 1994-2014 CNRS, Meteo-France and Universite Paul Sabatier
+!MNH_LIC Copyright 1994-2022 CNRS, Meteo-France and Universite Paul Sabatier
 !MNH_LIC This is part of the Meso-NH software governed by the CeCILL-C licence
 !MNH_LIC version 1. See LICENSE, CeCILL-C_V1-en.txt and CeCILL-C_V1-fr.txt  
 !MNH_LIC for details. version 1.
 !-----------------------------------------------------------------
-!--------------- special set of characters for RCS information
-!-----------------------------------------------------------------
-! $Source$ $Revision$
-! MASDEV4_7 solver 2006/05/18 13:07:25
-!-----------------------------------------------------------------
 !     ####################
       MODULE MODI_ZSOLVER_INV
 !     ####################
@@ -350,7 +345,8 @@ INTEGER :: ZDXM2, ZDYM2
      ZDXM2 = PDXHATM*PDXHATM
      !
      IF (LWEST_ll(HSPLITTING='B')) THEN
-        !$acc kernels loop independent collapse(2) async
+        !$acc kernels async
+        !$acc loop independent collapse(2)
         DO JK=IKB,IKE
            DO JJ = IJB,IJE
               PF_1_Y(IIB-1,JJ,JK) = PF_1_Y(IIB,JJ,JK) - PY(IIB-1,JJ,JK)*ZDXM2/PRHOM(JK)
@@ -360,7 +356,8 @@ INTEGER :: ZDXM2, ZDYM2
      END IF
      !
      IF (LEAST_ll(HSPLITTING='B')) THEN
-        !$acc kernels loop independent collapse(2) async
+        !$acc kernels async
+        !$acc loop independent collapse(2)
         DO JK=IKB,IKE
            DO JJ = IJB,IJE
               PF_1_Y(IIE+1,JJ,JK) = PF_1_Y(IIE,JJ,JK) + PY(IIE+1,JJ,JK)*ZDXM2/PRHOM(JK)
@@ -373,7 +370,8 @@ INTEGER :: ZDXM2, ZDYM2
      !    we set the solution at the corner point by the condition:
      !    dxm ( P ) = 0
      IF (LWEST_ll(HSPLITTING='B')) THEN
-        !$acc kernels loop independent async
+        !$acc kernels async
+        !$acc loop independent
         DO JJ = IJB,IJE
            PF_1_Y(IIB-1,JJ,IKB-1) = PF_1_Y(IIB,JJ,IKB-1) 
            PF_1_Y(IIB-1,JJ,IKE+1) = PF_1_Y(IIB,JJ,IKE+1)
@@ -381,7 +379,8 @@ INTEGER :: ZDXM2, ZDYM2
         !$acc end kernels
      END IF
      IF (LEAST_ll(HSPLITTING='B')) THEN
-        !$acc kernels loop independent async
+        !$acc kernels async
+        !$acc loop independent
         DO JJ = IJB,IJE
            PF_1_Y(IIE+1,JJ,IKB-1) = PF_1_Y(IIE,JJ,IKB-1)
            PF_1_Y(IIE+1,JJ,IKE+1) = PF_1_Y(IIE,JJ,IKE+1)
@@ -407,7 +406,8 @@ INTEGER :: ZDXM2, ZDYM2
         ZDYM2 = PDYHATM*PDYHATM 
         !
         IF (LSOUTH_ll(HSPLITTING='B')) THEN
-           !$acc kernels loop independent collapse(2) async
+           !$acc kernels async
+           !$acc loop independent collapse(2)
            DO JK=IKB,IKE
               DO JI = IIB,IIE
                  PF_1_Y(JI,IJB-1,JK) = PF_1_Y(JI,IJB,JK) - PY(JI,IJB-1,JK)*ZDYM2/PRHOM(JK)
@@ -417,7 +417,8 @@ INTEGER :: ZDXM2, ZDYM2
         END IF
         !
         IF (LNORTH_ll(HSPLITTING='B')) THEN
-           !$acc kernels loop independent collapse(2) async
+           !$acc kernels async
+           !$acc loop independent collapse(2)
            DO JK=IKB,IKE
               DO JI = IIB,IIE
                  PF_1_Y(JI,IJE+1,JK) = PF_1_Y(JI,IJE,JK) + PY(JI,IJE+1,JK)*ZDYM2/PRHOM(JK)
@@ -430,7 +431,8 @@ INTEGER :: ZDXM2, ZDYM2
         !    dym ( P )  = 0
         !
         IF (LSOUTH_ll(HSPLITTING='B')) THEN
-           !$acc kernels loop independent async
+           !$acc kernels async
+           !$acc loop independent
            DO JI = IIB,IIE
               PF_1_Y(JI,IJB-1,IKB-1) = PF_1_Y(JI,IJB,IKB-1)
               PF_1_Y(JI,IJB-1,IKE+1) = PF_1_Y(JI,IJB,IKE+1)
@@ -439,7 +441,8 @@ INTEGER :: ZDXM2, ZDYM2
         END IF
         !
         IF (LNORTH_ll(HSPLITTING='B')) THEN
-           !$acc kernels loop independent async
+           !$acc kernels async
+           !$acc loop independent
            DO JI = IIB,IIE
               PF_1_Y(JI,IJE+1,IKB-1) = PF_1_Y(JI,IJE,IKB-1)
               PF_1_Y(JI,IJE+1,IKE+1) = PF_1_Y(JI,IJE,IKE+1)
-- 
GitLab