From 9eb0e1bf2327f29afebfbfde5bc1ec9264dfde12 Mon Sep 17 00:00:00 2001
From: ESCOBAR Juan <escj@nuwa>
Date: Tue, 1 Mar 2022 14:44:59 +0100
Subject: [PATCH] Juan 01/02/2022:ZSOLVER/advecuvw_rk.f90, Memory Leak with
 nvhpc22.2 with WHERE , even when GIBM FALSE /!\ -> replace DO CONCURRENT

---
 src/ZSOLVER/advecuvw_rk.f90 | 109 ++++++++++++++++++++++++------------
 1 file changed, 74 insertions(+), 35 deletions(-)

diff --git a/src/ZSOLVER/advecuvw_rk.f90 b/src/ZSOLVER/advecuvw_rk.f90
index 4e2f367d6..d7209636e 100644
--- a/src/ZSOLVER/advecuvw_rk.f90
+++ b/src/ZSOLVER/advecuvw_rk.f90
@@ -113,7 +113,8 @@ END MODULE MODI_ADVECUVW_RK
 !
 USE MODD_ARGSLIST_ll, ONLY: LIST_ll, HALO2LIST_ll
 USE MODD_CONF,        ONLY: NHALO
-USE MODD_IBM_PARAM_n, ONLY: LIBM, CIBM_ADV, XIBM_LS, XIBM_EPSI
+USE MODD_IBM_PARAM_n, ONLY: LIBM, XIBM_LS, XIBM_EPSI
+USE MODD_IBM_PARAM_n, ONLY: MODD_CIBM_ADV => CIBM_ADV
 USE MODD_PARAMETERS,  ONLY: JPVEXT
 USE MODD_SUB_MODEL_n, ONLY: XT_IBM_FORC
 !
@@ -217,8 +218,12 @@ TYPE(LIST_ll), POINTER      :: TZFIELDS4_ll ! list of fields to exchange
 LOGICAL :: GIBM !Intermediate variable used to work around a Cray compiler bug (CCE 13.0.0)
 REAL    :: ZIBM_EPSI !Intermediate variable used to work around a Cray compiler bug (CCE 13.0.0)
 REAL          :: ZTIME1,ZTIME2
+CHARACTER(LEN=6) :: CIBM_ADV
+LOGICAL :: GIBM_FREEZE,GIBM_LOWORD,GIBM_FORCIN
+INTEGER :: JII,JJI,JKI
 !-------------------------------------------------------------------------------
 
+
 IF (MPPDB_INITIALIZED) THEN
   !Check all IN arrays
   CALL MPPDB_CHECK(PU,"ADVECUVW_RK beg:PU")
@@ -240,9 +245,14 @@ END IF
 
 GIBM = LIBM
 ZIBM_EPSI = XIBM_EPSI
+CIBM_ADV = MODD_CIBM_ADV 
+
+GIBM_FREEZE = ( GIBM .AND. CIBM_ADV=='FREEZE' )
+GIBM_LOWORD = ( GIBM .AND. CIBM_ADV=='LOWORD' )
+GIBM_FORCIN = ( GIBM .AND. CIBM_ADV=='FORCIN' )
 
 #ifdef MNH_OPENACC
-if ( LIBM ) call Print_msg( NVERB_FATAL, 'GEN', 'ADVECUVW_RK', 'OpenACC: LIBM=T not yet implemented' )
+if ( GIBM ) call Print_msg( NVERB_FATAL, 'GEN', 'ADVECUVW_RK', 'OpenACC: LIBM=T not yet implemented' )
 #endif
 !
 !*       0.     INITIALIZATION
@@ -318,7 +328,7 @@ CALL INIT_ON_HOST_AND_DEVICE(ZWT,6e99,'ADVECUVW_RK::ZWT')
 !$acc data present( PU, PV, PW, PUT, PVT, PWT, PMXM_RHODJ, PMYM_RHODJ, PMZM_RHODJ, &
 !$acc &             PRUCT, PRVCT, PRWCT, PRUS_ADV, PRVS_ADV, PRWS_ADV,             &
 !$acc &             PRUS_OTHER, PRVS_OTHER, PRWS_OTHER, ZUT, ZVT, ZWT,             &
-!$acc &             ZRUS, ZRVS, ZRWS, ZIBM, ZBUT, ZBUTS )
+!$acc &             ZRUS, ZRVS, ZRWS, ZBUT, ZBUTS )
 
 SELECT CASE (HTEMP_SCHEME)
   CASE('RK11')
@@ -406,13 +416,13 @@ END SELECT
 !$acc update device(ZBUTS,ZBUT)
 !
 IF ( GIBM ) THEN
-!$acc kernels
+!$acc kernels present(ZIBM)
   ZIBM(:,:,:,:) = 1.
 !$acc end kernels
 END IF
 !
-IF (GIBM .AND. CIBM_ADV=='FREEZE') THEN
-!$acc kernels
+IF (GIBM_FREEZE) THEN
+!$acc kernels present(ZIBM)
   WHERE (XIBM_LS(:,:,:,2).GT.-ZIBM_EPSI) ZIBM(:,:,:,1) = 0.
   WHERE (XIBM_LS(:,:,:,3).GT.-ZIBM_EPSI) ZIBM(:,:,:,2) = 0.
   WHERE (XIBM_LS(:,:,:,4).GT.-ZIBM_EPSI) ZIBM(:,:,:,3) = 0.
@@ -420,18 +430,18 @@ IF (GIBM .AND. CIBM_ADV=='FREEZE') THEN
 ENDIF
 !
 !$acc kernels present(PRUS_ADV,PRVS_ADV,PRWS_ADV) present(ZUT,ZVT,ZWT) present(PU,PV,PW)
-PRUS_ADV = 0.
-PRVS_ADV = 0.
-PRWS_ADV = 0.
+PRUS_ADV(:,:,:) = 0.
+PRVS_ADV(:,:,:) = 0.
+PRWS_ADV(:,:,:) = 0.
 !
 !-------------------------------------------------------------------------------
 !
 !*       2.     Wind guess before RK loop
 !        --------------------------------
 !
-ZUT = PU
-ZVT = PV
-ZWT = PW
+ZUT(:,:,:) = PU(:,:,:)
+ZVT(:,:,:) = PV(:,:,:)
+ZWT(:,:,:) = PW(:,:,:)
 !$acc end kernels
 !
 #ifndef MNH_OPENACC
@@ -494,13 +504,13 @@ RKLOOP: DO JS = 1, ISPL
 !*       4.     Advection with WENO
 !        --------------------------
 !
-  IF (GIBM .AND. CIBM_ADV=='LOWORD') THEN
-!$acc kernels
+IF (GIBM_LOWORD) THEN
+!$acc kernels present(ZIBM)
     ZIBM(:,:,:,1)=ZRUS(:,:,:,JS)
     ZIBM(:,:,:,2)=ZRVS(:,:,:,JS)
     ZIBM(:,:,:,3)=ZRWS(:,:,:,JS)
 !$acc end kernels
-  ENDIF
+ENDIF
 !
 !!$TZHALO2_UT => TZHALO2MT_ll                   ! 1rst add3dfield in model_n
 !!$TZHALO2_VT => TZHALO2MT_ll%NEXT              ! 2nd  add3dfield in model_n
@@ -522,7 +532,7 @@ RKLOOP: DO JS = 1, ISPL
                        TZHALO2MT_ll )
   ENDIF
 !
-  IF (GIBM .AND. CIBM_ADV=='LOWORD') THEN
+  IF (GIBM_LOWORD) THEN
     IF (HUVW_ADV_SCHEME=='WENO_K') THEN
       CALL ADVECUVW_WENO_K (HLBCX, HLBCY,           3, ZUT, ZVT, ZWT,    &
                           PRUCT, PRVCT, PRWCT,                           &
@@ -537,10 +547,19 @@ RKLOOP: DO JS = 1, ISPL
        CALL ADVECUVW_2ND (ZUT, ZVT, ZWT, PRUCT, PRVCT, PRWCT,            &
                           ZIBM(:,:,:,1),  ZIBM(:,:,:,2),  ZIBM(:,:,:,3))
     ENDIF
-    WHERE(XIBM_LS(:,:,:,2).GT.-ZIBM_EPSI) ZRUS(:,:,:,JS)=ZIBM(:,:,:,1)
-    WHERE(XIBM_LS(:,:,:,3).GT.-ZIBM_EPSI) ZRVS(:,:,:,JS)=ZIBM(:,:,:,2)
-    WHERE(XIBM_LS(:,:,:,4).GT.-ZIBM_EPSI) ZRWS(:,:,:,JS)=ZIBM(:,:,:,3)
-    ZIBM(:,:,:,:)=1.
+! JE: Memory Leak with nvhpc22.2 with WHERE , even when GIBM FALSE /!\ -> replace DO CONCURRENT    
+  DO CONCURRENT ( JII=1:IIU , JJI=1:IJU , JKI=1:IKU )
+     IF (XIBM_LS(JII,JJI,JKI,2).GT.-ZIBM_EPSI) THEN
+        ZRUS(JII,JJI,JKI,JS)=ZIBM(JII,JJI,JKI,1)
+     END IF
+     IF (XIBM_LS(JII,JJI,JKI,3).GT.-ZIBM_EPSI) THEN
+        ZRVS(JII,JJI,JKI,JS)=ZIBM(JII,JJI,JKI,2)
+     END IF
+     IF (XIBM_LS(JII,JJI,JKI,4).GT.-ZIBM_EPSI) THEN
+        ZRWS(JII,JJI,JKI,JS)=ZIBM(JII,JJI,JKI,3)
+     END IF
+  END DO
+  ZIBM(:,:,:,:)=1.
   ENDIF
 !
   write ( ynum, '( I3 )' ) js
@@ -558,13 +577,22 @@ RKLOOP: DO JS = 1, ISPL
 ! acc update device(ZRUS(:,:,:,JS),ZRVS(:,:,:,JS),ZRWS(:,:,:,JS))
 #endif
 !
-  IF (GIBM .AND. CIBM_ADV=='FREEZE') THEN
-    WHERE(XIBM_LS(:,:,:,2).GT.-ZIBM_EPSI) ZRUS(:,:,:,JS)=ZUT(:,:,:)*PMXM_RHODJ(:,:,:)/PTSTEP
-    WHERE(XIBM_LS(:,:,:,3).GT.-ZIBM_EPSI) ZRVS(:,:,:,JS)=ZVT(:,:,:)*PMYM_RHODJ(:,:,:)/PTSTEP
-    WHERE(XIBM_LS(:,:,:,4).GT.-ZIBM_EPSI) ZRWS(:,:,:,JS)=ZWT(:,:,:)*PMZM_RHODJ(:,:,:)/PTSTEP
+  IF (GIBM_FREEZE) THEN
+  ! JE: Memory Leak with nvhpc22.2 with WHERE , even when GIBM FALSE /!\ -> replace DO CONCURRENT
+  DO CONCURRENT ( JII=1:IIU , JJI=1:IJU , JKI=1:IKU )
+     IF (XIBM_LS(JII,JJI,JKI,2).GT.-ZIBM_EPSI) THEN
+        ZRUS(JII,JJI,JKI,JS)=ZUT(JII,JJI,JKI)*PMXM_RHODJ(JII,JJI,JKI)/PTSTEP
+     END IF 
+     IF (XIBM_LS(JII,JJI,JKI,3).GT.-ZIBM_EPSI) THEN
+        ZRVS(JII,JJI,JKI,JS)=ZVT(JII,JJI,JKI)*PMYM_RHODJ(JII,JJI,JKI)/PTSTEP
+     END IF 
+     IF (XIBM_LS(JII,JJI,JKI,4).GT.-ZIBM_EPSI) THEN
+        ZRWS(JII,JJI,JKI,JS)=ZWT(JII,JJI,JKI)*PMZM_RHODJ(JII,JJI,JKI)/PTSTEP
+     END IF 
+  END DO
   ENDIF
 
-  IF (GIBM .AND. CIBM_ADV=='FORCIN') THEN
+  IF (GIBM_FORCIN) THEN
     CALL SECOND_MNH(ZTIME1)
     CALL IBM_FORCING_ADV(ZRUS(:,:,:,JS),ZRVS(:,:,:,JS),ZRWS(:,:,:,JS))
     CALL SECOND_MNH(ZTIME2)
@@ -573,49 +601,60 @@ RKLOOP: DO JS = 1, ISPL
 !
 ! Guesses at the end of the RK loop
 !
-!$acc kernels present(PRUS_ADV,PRVS_ADV,PRWS_ADV,ZBUTS) present(ZRUS,ZRVS,ZRWS,ZIBM)
+
   IF ( .NOT. GIBM ) THEN
+!$acc kernels present(PRUS_ADV,PRVS_ADV,PRWS_ADV,ZBUTS) present(ZRUS,ZRVS,ZRWS)     
     PRUS_ADV(:,:,:) = PRUS_ADV(:,:,:) + ZBUTS(JS) * ZRUS(:,:,:,JS)
     PRVS_ADV(:,:,:) = PRVS_ADV(:,:,:) + ZBUTS(JS) * ZRVS(:,:,:,JS)
     PRWS_ADV(:,:,:) = PRWS_ADV(:,:,:) + ZBUTS(JS) * ZRWS(:,:,:,JS)
-  ELSE
-    PRUS_ADV(:,:,:) = PRUS_ADV(:,:,:) + ZBUTS(JI) * ZRUS(:,:,:,JI) * ZIBM(:,:,:,1)
-    PRVS_ADV(:,:,:) = PRVS_ADV(:,:,:) + ZBUTS(JI) * ZRVS(:,:,:,JI) * ZIBM(:,:,:,2)
-    PRWS_ADV(:,:,:) = PRWS_ADV(:,:,:) + ZBUTS(JI) * ZRWS(:,:,:,JI) * ZIBM(:,:,:,3)
+!$acc end kernels    
+ ELSE
+!$acc kernels present(PRUS_ADV,PRVS_ADV,PRWS_ADV,ZBUTS) present(ZRUS,ZRVS,ZRWS,ZIBM)    
+    PRUS_ADV(:,:,:) = PRUS_ADV(:,:,:) + ZBUTS(JS) * ZRUS(:,:,:,JS) * ZIBM(:,:,:,1)
+    PRVS_ADV(:,:,:) = PRVS_ADV(:,:,:) + ZBUTS(JS) * ZRVS(:,:,:,JS) * ZIBM(:,:,:,2)
+    PRWS_ADV(:,:,:) = PRWS_ADV(:,:,:) + ZBUTS(JS) * ZRWS(:,:,:,JS) * ZIBM(:,:,:,3)
+!$acc end kernels    
   END IF
-!$acc end kernels
+
 !
   IF ( JS < ISPL ) THEN
-!$acc kernels present(ZUT,ZVT,ZWT) present(ZBUT) present(PU,PV,PW)        &
-!$acc & present(ZRUS,ZRVS,ZRWS) present(PRUS_OTHER,PRVS_OTHER,PRWS_OTHER) &
-!$acc & present(PMXM_RHODJ,PMYM_RHODJ,PMZM_RHODJ, ZIBM )
+!PW: note: 20211025: kernels split because performance problems if in 1 block with NVHPC 21.9
+! !$acc kernels present(ZUT,ZVT,ZWT) present(ZBUT) present(PU,PV,PW)             &
+! !$acc & present(ZRUS,ZRVS,ZRWS,ZIBM) present(PRUS_OTHER,PRVS_OTHER,PRWS_OTHER) &
+! !$acc & present(PMXM_RHODJ,PMYM_RHODJ,PMZM_RHODJ)
 !
+!$acc kernels present( ZUT, ZVT, ZWT )
     ZUT(:,:,:) = PU(:,:,:)
     ZVT(:,:,:) = PV(:,:,:)
     ZWT(:,:,:) = PW(:,:,:)
+!$acc end kernels
 !
     DO JI = 1, JS
 !
 ! Intermediate guesses inside the RK loop
 !
       IF ( .NOT. GIBM ) THEN
+!$acc kernels present( ZUT, ZVT, ZWT, ZRUS, ZRVS, ZRWS )
         ZUT(:,:,:) = ZUT(:,:,:) + ZBUT(JS,JI) *  PTSTEP *  &
           ( ZRUS(:,:,:,JI) + PRUS_OTHER(:,:,:) ) / PMXM_RHODJ(:,:,:)
         ZVT(:,:,:) = ZVT(:,:,:) + ZBUT(JS,JI) *  PTSTEP *  &
           ( ZRVS(:,:,:,JI) + PRVS_OTHER(:,:,:) ) / PMYM_RHODJ(:,:,:)
         ZWT(:,:,:) = ZWT(:,:,:) + ZBUT(JS,JI) *  PTSTEP *  &
           ( ZRWS(:,:,:,JI) + PRWS_OTHER(:,:,:) ) / PMZM_RHODJ(:,:,:)
+!$acc end kernels
       ELSE
+!$acc kernels present( ZUT, ZVT, ZWT, ZRUS, ZRVS, ZRWS, ZIBM )
         ZUT(:,:,:) = ZUT(:,:,:) + ZBUT(JS,JI) *  PTSTEP *  &
           ( ZRUS(:,:,:,JI) + PRUS_OTHER(:,:,:) ) / PMXM_RHODJ(:,:,:) * ZIBM(:,:,:,1)
         ZVT(:,:,:) = ZVT(:,:,:) + ZBUT(JS,JI) *  PTSTEP *  &
           ( ZRVS(:,:,:,JI) + PRVS_OTHER(:,:,:) ) / PMYM_RHODJ(:,:,:) * ZIBM(:,:,:,2)
         ZWT(:,:,:) = ZWT(:,:,:) + ZBUT(JS,JI) *  PTSTEP *  &
           ( ZRWS(:,:,:,JI) + PRWS_OTHER(:,:,:) ) / PMZM_RHODJ(:,:,:) * ZIBM(:,:,:,3)
+!$acc end kernels
       END IF
 !
     END DO
-!$acc end kernels
+! !$acc end kernels
 !$acc update self(ZUT,ZVT,ZWT)
   END IF
 !
-- 
GitLab