From e566a3b138303cf0a8b09e67f736668ec6d0e67a Mon Sep 17 00:00:00 2001
From: Philippe WAUTELET <philippe.wautelet@aero.obs-mip.fr>
Date: Tue, 24 Jan 2023 11:12:18 +0100
Subject: [PATCH] Philippe 24/01/2023: OpenACC: put XRU/V/WS_PRES always on GPU
 + reduce data transfers

---
 src/MNH/ini_modeln.f90 |  3 ++-
 src/MNH/modeln.f90     | 57 ++++++++++++++++++++++++++----------------
 2 files changed, 37 insertions(+), 23 deletions(-)

diff --git a/src/MNH/ini_modeln.f90 b/src/MNH/ini_modeln.f90
index 626593b0c..962c404f2 100644
--- a/src/MNH/ini_modeln.f90
+++ b/src/MNH/ini_modeln.f90
@@ -856,6 +856,7 @@ ALLOCATE(XRWS(IIU,IJU,IKU))     ; XRWS = 0.0
 ALLOCATE(XRUS_PRES(IIU,IJU,IKU)); XRUS_PRES = 0.0
 ALLOCATE(XRVS_PRES(IIU,IJU,IKU)); XRVS_PRES = 0.0
 ALLOCATE(XRWS_PRES(IIU,IJU,IKU)); XRWS_PRES = 0.0
+!$acc enter data copyin( XRUS_PRES, XRVS_PRES, XRWS_PRES )
 ALLOCATE(XRTHS(IIU,IJU,IKU))    ; XRTHS = 0.0
 !$acc enter data copyin(XRTHS)
 ALLOCATE(XRTHS_CLD(IIU,IJU,IKU)); XRTHS_CLD = 0.0
@@ -1941,7 +1942,7 @@ CALL READ_FIELD(KMI,TPINIFILE,IIU,IJU,IKU,                                    &
                 ZIBM_LS,XIBM_XMUT,XUMEANW,XVMEANW,XWMEANW,XUMEANN,XVMEANN,    &
                 XWMEANN,XUMEANE,XVMEANE,XWMEANE,XUMEANS,XVMEANS,XWMEANS       )
 
-!$acc update device( XPABST, XRT )
+!$acc update device( XPABST, XRT, XRUS_PRES, XRVS_PRES, XRWS_PRES )
 !
 !-------------------------------------------------------------------------------
 !
diff --git a/src/MNH/modeln.f90 b/src/MNH/modeln.f90
index a791fc8cb..d79846939 100644
--- a/src/MNH/modeln.f90
+++ b/src/MNH/modeln.f90
@@ -548,10 +548,17 @@ LOGICAL :: KACTIT
 LOGICAL :: KSEDI
 LOGICAL :: KHHONI
 !
+#ifndef MNH_OPENACC
 REAL, DIMENSION(:,:,:), ALLOCATABLE :: ZRUS,ZRVS
+REAL, DIMENSION(:,:,:), ALLOCATABLE :: ZRWS
+REAL, DIMENSION(:,:,:), ALLOCATABLE :: ZPABST !To give pressure at t
+                                                     ! (and not t+1) to resolved_cloud
+#else
+REAL, DIMENSION(:,:,:), POINTER, CONTIGUOUS :: ZRUS,ZRVS
 REAL, DIMENSION(:,:,:), POINTER, CONTIGUOUS :: ZRWS
 REAL, DIMENSION(:,:,:), POINTER, CONTIGUOUS :: ZPABST !To give pressure at t
                                                      ! (and not t+1) to resolved_cloud
+#endif
 REAL, DIMENSION(:,:,:), ALLOCATABLE :: ZJ
 !
 TYPE(LIST_ll), POINTER :: TZFIELDC_ll   ! list of fields to exchange
@@ -576,15 +583,16 @@ VSIGQSAT = VSIGQSAT_MODD
 TZBAKFILE=> NULL()
 TZOUTFILE=> NULL()
 
+#ifndef MNH_OPENACC
 allocate( ZRUS  (SIZE(XTHT,1),SIZE(XTHT,2),SIZE(XTHT,3)) )
 allocate( ZRVS  (SIZE(XTHT,1),SIZE(XTHT,2),SIZE(XTHT,3)) )
-#ifndef MNH_OPENACC
 allocate( ZRWS  (SIZE(XTHT,1),SIZE(XTHT,2),SIZE(XTHT,3)) )
 allocate( ZPABST(SIZE(XTHT,1),SIZE(XTHT,2),SIZE(XTHT,3)) )
 #else
 !Pin positions in the pools of MNH memory
 CALL MNH_MEM_POSITION_PIN()
-
+CALL MNH_MEM_GET( ZRUS,   SIZE( XTHT, 1 ), SIZE( XTHT, 2 ), SIZE( XTHT, 3 ) )
+CALL MNH_MEM_GET( ZRVS,   SIZE( XTHT, 1 ), SIZE( XTHT, 2 ), SIZE( XTHT, 3 ) )
 CALL MNH_MEM_GET( ZRWS,   SIZE( XTHT, 1 ), SIZE( XTHT, 2 ), SIZE( XTHT, 3 ) )
 CALL MNH_MEM_GET( ZPABST, SIZE( XTHT, 1 ), SIZE( XTHT, 2 ), SIZE( XTHT, 3 ) )
 #endif
@@ -1624,17 +1632,17 @@ ZTIME1 = ZTIME2
 XTIME_BU_PROCESS = 0.
 XTIME_LES_BU_PROCESS = 0.
 !
-!$acc update device(XRTHS)
 !
 !XRWS_PRES copy and not copyout (hidden in UPDATE_HALO)
-!$acc data create ( XUT, XVT, XWT )                                  &
-!$acc &    present( XTHT, XRT, XPABST, XTHVREF, XRHODJ )             &
-!$acc &    present( XDXX, XDYY, XDZZ, XDZX, XDZY, XRUS, XRVS, XRWS ) &
-!$acc &    copyin ( XSVT, XRTHS_CLD, XRRS_CLD )                      &
-!$acc &    copy   ( XRRS, XRWS_PRES )
+!$acc data create ( XUT, XVT, XWT )                                             &
+!$acc &    present( XTHT, XRT, XPABST, XTHVREF, XRHODJ )                        &
+!$acc &    present( XDXX, XDYY, XDZZ, XDZX, XDZY, XRUS, XRVS, XRWS, XRWS_PRES ) &
+!$acc &    copyin ( XSVT, XRTHS_CLD, XRRS_CLD )                                 &
+!$acc &    copy   ( XRRS )
 !
-!$acc update device(XUT, XVT, XWT, XTHT, XRT, XRHODJ)
-!$acc update device(XRUS, XRVS, XRWS)
+!$acc update device( XRUS, XRVS, XRWS )
+!$acc update device( XRUS_PRES, XRVS_PRES, XRWS_PRES )
+!$acc update device( XUT, XVT, XWT, XTHT, XRT, XRHODJ, XRTHS )
 !
 !$acc data copyin (XTKET, XRSVS_CLD) &
 !$acc &    copy   (XRTKES, XRSVS)    &
@@ -1672,8 +1680,8 @@ CALL GRAVITY_IMPL ( CLBCX, CLBCY, NRR, NRRL, NRRI,XTSTEP,            &
                  XTHT, XRT, XTHVREF, XRHODJ, XRWS, XRTHS, XRRS,      &
                  XRTHS_CLD, XRRS_CLD                                 )
 !
-! At the initial instant the difference with the ref state creates a 
-! vertical velocity production that must not be advected as it is 
+! At the initial instant the difference with the ref state creates a
+! vertical velocity production that must not be advected as it is
 ! compensated by the pressure gradient
 !
 IF (KTCOUNT == 1 .AND. CCONF=='START') THEN
@@ -1733,7 +1741,6 @@ IF ((CUVW_ADV_SCHEME(1:3)=='CEN') .AND. (CTEMP_SCHEME == 'LEFR')) THEN
   END IF
 ELSE
 
-!$acc data copyin(XRUS_PRES, XRVS_PRES)
   CALL ADVECTION_UVW(CUVW_ADV_SCHEME, CTEMP_SCHEME,                  &
                  NWENO_ORDER, LSPLIT_WENO,                           &
                  CLBCX, CLBCY, XTSTEP,                               &
@@ -1741,7 +1748,6 @@ ELSE
                  XRHODJ, XDXX, XDYY, XDZZ, XDZX, XDZY,               &
                  XRUS, XRVS, XRWS,                                   &
                  XRUS_PRES, XRVS_PRES, XRWS_PRES                     )
-!$acc end data
 END IF
 !
 !$acc end data
@@ -1766,10 +1772,11 @@ END IF
 !
 ZTIME1 = ZTIME2
 !
-!$acc update self( XRUS, XRVS, XRWS )
-ZRUS=XRUS
-ZRVS=XRVS
-ZRWS=XRWS
+!$acc kernels present(ZRUS,ZRVS,ZRWS)
+ZRUS(:,:,:)=XRUS(:,:,:)
+ZRVS(:,:,:)=XRVS(:,:,:)
+ZRWS(:,:,:)=XRWS(:,:,:)
+!$acc end kernels
 !
 if ( .not. l1d ) then
   if ( lbudget_u ) call Budget_store_init( tbudgets(NBUDGET_U), 'PRES', xrus(:, :, :) )
@@ -1783,6 +1790,7 @@ CALL MPPDB_CHECKLB(XLBYVM,"modeln XLBYVM",PRECISION,'LBYV',NRIMY)
 CALL MPPDB_CHECKLB(XLBXUS,"modeln XLBXUS",PRECISION,'LBXU',NRIMX)
 CALL MPPDB_CHECKLB(XLBYVS,"modeln XLBYVS",PRECISION,'LBYV',NRIMY)
 !
+!$acc update self( XRUS, XRVS, XRWS )
 CALL RAD_BOUND (CLBCX,CLBCY,CTURB,XCARPKMAX,             &
                 XTSTEP,                                  &
                 XDXHAT, XDYHAT, XZHAT,                   &
@@ -1792,9 +1800,11 @@ CALL RAD_BOUND (CLBCX,CLBCY,CTURB,XCARPKMAX,             &
                 XCPHASE, XCPHASE_PBL, XRHODJ,            &
                 XTKET,XRUS, XRVS, XRWS                   )
 !$acc update device( XRUS, XRVS, XRWS )
-ZRUS=XRUS-ZRUS
-ZRVS=XRVS-ZRVS
-ZRWS=XRWS-ZRWS
+!$acc kernels present(ZRUS,ZRVS,ZRWS)
+ZRUS(:,:,:)=XRUS(:,:,:)-ZRUS(:,:,:)
+ZRVS(:,:,:)=XRVS(:,:,:)-ZRVS(:,:,:)
+ZRWS(:,:,:)=XRWS(:,:,:)-ZRWS(:,:,:)
+!$acc end kernels
 !
 CALL SECOND_MNH2(ZTIME2)
 !
@@ -1834,9 +1844,12 @@ IF(.NOT. L1D) THEN
 !$acc end data
 !$acc update self( XRUS, XRVS, XRWS, XPABST )
 !
+!$acc kernels
   XRUS_PRES = XRUS - XRUS_PRES + ZRUS
   XRVS_PRES = XRVS - XRVS_PRES + ZRVS
   XRWS_PRES = XRWS - XRWS_PRES + ZRWS
+!$acc end kernels
+!$acc update self( XRUS_PRES, XRVS_PRES, XRWS_PRES )
   CALL MPPDB_CHECK3DM("after pressurez:XRU/V/WS",PRECISION,XRUS,XRVS,XRWS)
 !
 END IF
@@ -2472,7 +2485,7 @@ IF (OEXIT) THEN
 END IF
 
 #ifndef MNH_OPENACC
-DEALLOCATE( ZRWS )
+DEALLOCATE( ZRUS,ZRVS,ZRWS )
 DEALLOCATE( ZPABST )
 #else
 CALL MNH_MEM_RELEASE()
-- 
GitLab