From 495c3aa35d1b8f4805500f424c9782f4af533367 Mon Sep 17 00:00:00 2001
From: Juan ESCOBAR <juan.escobar@aero.obs-mip.fr>
Date: Wed, 16 Nov 2022 16:46:22 +0100
Subject: [PATCH] Juan 16/11/2022:MNH/tridiag_wind.f90, Cray/14.X optimization
 , replace DO CONCURRENT with !$mnh_do_concurrent & put Z "acc seq loop"
 inside parallel 2D_XY one

---
 src/MNH/tridiag_wind.f90 | 62 +++++++++++++++-------------------------
 1 file changed, 23 insertions(+), 39 deletions(-)

diff --git a/src/MNH/tridiag_wind.f90 b/src/MNH/tridiag_wind.f90
index c26ecad00..58dd63f4d 100644
--- a/src/MNH/tridiag_wind.f90
+++ b/src/MNH/tridiag_wind.f90
@@ -227,28 +227,28 @@ IKE=KKU-JPVEXT_TURB*KKL
 !
 ! 
 !$acc kernels ! async
-DO CONCURRENT (JJ=1:JJU,JI=1:JIU)
+!$mnh_do_concurrent(JI=1:JIU,JJ=1:JJU)
 ZY(JI,JJ,IKB) = PVARM(JI,JJ,IKB)  + PTSTEP*PSOURCE(JI,JJ,IKB) -   &
      PEXPL / PRHODJA(JI,JJ,IKB) * PA(JI,JJ,IKB+KKL) * (PVARM(JI,JJ,IKB+KKL) - PVARM(JI,JJ,IKB))
-END DO !CONCURRENT
+!$mnh_end_do() !CONCURRENT
 !$acc end kernels
 !
 !$acc kernels ! async
-DO CONCURRENT (JK=IKTB+1:IKTE-1,JJ=1:JJU,JI=1:JIU)
+!$mnh_do_concurrent(JI=1:JIU,JJ=1:JJU,JK=IKTB+1:IKTE-1)
   ZY(JI,JJ,JK)= PVARM(JI,JJ,JK)  + PTSTEP*PSOURCE(JI,JJ,JK) -               &
       PEXPL / PRHODJA(JI,JJ,JK) *                                          &
                              ( PVARM(JI,JJ,JK-KKL)*PA(JI,JJ,JK)                &
                               -PVARM(JI,JJ,JK)*(PA(JI,JJ,JK)+PA(JI,JJ,JK+KKL))   &
                               +PVARM(JI,JJ,JK+KKL)*PA(JI,JJ,JK+KKL)              &
                               )
-END DO !CONCURRENT  
+!$mnh_end_do() !CONCURRENT  
 !$acc end kernels
 ! 
 !$acc kernels ! async
-DO CONCURRENT (JJ=1:JJU,JI=1:JIU)
+!$mnh_do_concurrent(JI=1:JIU,JJ=1:JJU)
 ZY(JI,JJ,IKE)= PVARM(JI,JJ,IKE) + PTSTEP*PSOURCE(JI,JJ,IKE) +               &
      PEXPL / PRHODJA(JI,JJ,IKE) * PA(JI,JJ,IKE) * (PVARM(JI,JJ,IKE)-PVARM(JI,JJ,IKE-KKL))
-END DO !CONCURRENT
+!$mnh_end_do() !CONCURRENT
 !$acc end kernels
 !
 ! acc wait
@@ -263,25 +263,17 @@ IF ( PIMPL > 1.E-10 ) THEN
   !  going up
   !
   !$acc kernels
-#ifdef MNH_COMPILER_NVHPC
-   !$acc loop independent collapse(2)
-#endif
-   DO CONCURRENT (JJ=1:JJU,JI=1:JIU)
+  !$mnh_do_concurrent(JI=1:JIU,JJ=1:JJU)
       ZBET(JI,JJ) = 1. - PIMPL * (  PA(JI,JJ,IKB+KKL) / PRHODJA(JI,JJ,IKB) &  
            + PCOEFS(JI,JJ) *  PTSTEP        )   ! bet = b(ikb)
       PVARP(JI,JJ,IKB) = ZY(JI,JJ,IKB) / ZBET(JI,JJ)
-  END DO !CONCURRENT
+  !$mnh_end_do() !CONCURRENT
   !$acc end kernels
   !
   !$acc parallel
-  !$acc loop  seq
-  DO JK = IKB+KKL,IKE-KKL,KKL
-#ifdef MNH_COMPILER_NVHPC
-     !$acc loop independent gang, vector collapse(2)
-#else
-     !$acc loop independent
-#endif
-     DO  CONCURRENT ( JJ=1:JJU , JI=1:JIU )  
+  !$mnh_do_concurrent(JI=1:JIU,JJ=1:JJU)  
+     !$acc loop  seq
+     DO JK = IKB+KKL,IKE-KKL,KKL
         ZGAM(JI,JJ,JK) = PIMPL * PA(JI,JJ,JK) / PRHODJA(JI,JJ,JK-KKL) / ZBET(JI,JJ)  
         ! gam(k) = c(k-1) / bet
         ZBET(JI,JJ)    = 1. - PIMPL * (  PA(JI,JJ,JK) * (1. + ZGAM(JI,JJ,JK))  &
@@ -292,15 +284,12 @@ IF ( PIMPL > 1.E-10 ) THEN
              * PVARP(JI,JJ,JK-KKL)                                  &
              ) / ZBET(JI,JJ)
         ! res(k) = (y(k) -a(k)*res(k-1))/ bet 
-     END DO ! CONCURRENT
-  END DO
+     END DO
+  !$mnh_end_do() ! CONCURRENT
   !$acc end parallel  
   !$acc kernels
   ! special treatment for the last level
-#ifdef MNH_COMPILER_NVHPC
-  !$acc loop independent gang, vector collapse(2)
-#endif
-  DO CONCURRENT (JJ=1:JJU,JI=1:JIU)
+  !$mnh_do_concurrent(JI=1:JIU,JJ=1:JJU)  
      ZGAM(JI,JJ,IKE) = PIMPL * PA(JI,JJ,IKE) / PRHODJA(JI,JJ,IKE-KKL) / ZBET(JI,JJ) 
      ! gam(k) = c(k-1) / bet
      ZBET(JI,JJ)    = 1. - PIMPL * (  PA(JI,JJ,IKE) * (1. + ZGAM(JI,JJ,IKE))  &
@@ -310,31 +299,26 @@ IF ( PIMPL > 1.E-10 ) THEN
           * PVARP(JI,JJ,IKE-KKL)                      &
           ) / ZBET(JI,JJ)
      ! res(k) = (y(k) -a(k)*res(k-1))/ bet
-  END DO !CONCURRENT
+  !$mnh_end_do() !CONCURRENT
   !$acc end kernels
   !
   !  going down 
   !
   !$acc parallel
+  !$mnh_do_concurrent(JI=1:JIU,JJ=1:JJU)
   !$acc loop seq
-  DO JK = IKE-KKL,IKB,-1*KKL
-#ifdef MNH_COMPILER_NVHPC
-     !$acc loop gang, vector collapse(2)
-#else
-     !$acc loop independent
-#endif
-      DO CONCURRENT (JJ=1:JJU,JI=1:JIU)
+      DO JK = IKE-KKL,IKB,-1*KKL
          PVARP(JI,JJ,JK) = PVARP(JI,JJ,JK) - ZGAM(JI,JJ,JK+KKL) * PVARP(JI,JJ,JK+KKL)
-      END DO !CONCURRENT   
-  END DO
+      END DO    
+  !$mnh_end_do() !CONCURRENT
   !$acc end parallel
 !
 ELSE
 ! 
    !$acc kernels
-   DO CONCURRENT (JK=IKTB:IKTE,JJ=1:JJU,JI=1:JIU)
+   !$mnh_do_concurrent(JI=1:JIU,JJ=1:JJU,JK=IKTB:IKTE)
       PVARP(JI,JJ,JK) = ZY(JI,JJ,JK)
-   END DO !CONCURRENT   
+   !$mnh_end_do() !CONCURRENT   
 !$acc end kernels
 !
 END IF 
@@ -344,10 +328,10 @@ END IF
 !            ----------------------------------------
 !
 !$acc kernels
-DO CONCURRENT (JJ=1:JJU,JI=1:JIU)
+!$mnh_do_concurrent(JI=1:JIU,JJ=1:JJU)  
    PVARP(JI,JJ,KKA)=PVARP(JI,JJ,IKB)
    PVARP(JI,JJ,KKU)=PVARP(JI,JJ,IKE)
-END DO !CONCURRENT
+!$mnh_end_do() !CONCURRENT
 !$acc end kernels
 
 if ( mppdb_initialized ) then
-- 
GitLab