From 93db0e88e79a3915b728971217fe897660f1a118 Mon Sep 17 00:00:00 2001
From: Philippe WAUTELET <philippe.wautelet@aero.obs-mip.fr>
Date: Wed, 18 Dec 2019 15:07:28 +0100
Subject: [PATCH] Philippe 18/12/2019: OpenACC: workaround PGI bugs

---
 src/MNH/advecuvw_rk.f90              | 15 ++++++++-------
 src/MNH/ice4_fast_rg.f90             |  4 ++--
 src/MNH/ice4_sedimentation_split.f90 |  8 +++++++-
 src/MNH/prandtl.f90                  | 15 ++++++++-------
 src/MNH/turb_hor_vw.f90              | 24 +++++++++++++++---------
 5 files changed, 40 insertions(+), 26 deletions(-)

diff --git a/src/MNH/advecuvw_rk.f90 b/src/MNH/advecuvw_rk.f90
index 34153fbe7..62b012caa 100644
--- a/src/MNH/advecuvw_rk.f90
+++ b/src/MNH/advecuvw_rk.f90
@@ -256,9 +256,6 @@ allocate(ZWT(SIZE(PUT,1),SIZE(PUT,2),SIZE(PUT,3)))
 #endif
 
 #ifdef MNH_OPENACC
-!Data zone necessary to work around a bug seen with PGI at least up to 18.10
-!If not, update on a section of ZRUS will update a section of the total size of ZRUS
-!$acc data present(ZRUS,ZRVS,ZRWS)
 CALL INIT_ON_HOST_AND_DEVICE(ZUT,4e99,'ADVECUVW_RK::ZUT')
 CALL INIT_ON_HOST_AND_DEVICE(ZVT,5e99,'ADVECUVW_RK::ZVT')
 CALL INIT_ON_HOST_AND_DEVICE(ZWT,6e99,'ADVECUVW_RK::ZWT')
@@ -426,10 +423,15 @@ INBVAR = 3
 CALL INIT_HALO2_ll(TZHALO2MT_ll,INBVAR,SIZE(PUT,1),SIZE(PUT,2),SIZE(PWT,3))
 !
 !$acc kernels
-ZRUS = 0.
-ZRVS = 0.
-ZRWS = 0.
+ZRUS(:, :, :, : ) = 0.
+ZRVS(:, :, :, : ) = 0.
+ZRWS(:, :, :, : ) = 0.
 !$acc end kernels
+
+!Necessary to work around a PGI bug (19.10)
+!because following update ZRUS(:,:,:,JS) are done on the WHOLE array
+!$acc update self(ZRUS,ZRVS,ZRWS)
+
 !-------------------------------------------------------------------------------
 !
 !*       3.     BEGINNING of Runge-Kutta loop
@@ -528,7 +530,6 @@ CALL CLEANLIST_ll(TZFIELDMT_ll)
 CALL  DEL_HALO2_ll(TZHALO2MT_ll)
 !$acc update self(PRUS_ADV,PRVS_ADV,PRWS_ADV)
 !-------------------------------------------------------------------------------
-!$acc end data
 !
 IF (MPPDB_INITIALIZED) THEN
   !Check all OUT arrays
diff --git a/src/MNH/ice4_fast_rg.f90 b/src/MNH/ice4_fast_rg.f90
index c319ceb3e..7d46ac697 100644
--- a/src/MNH/ice4_fast_rg.f90
+++ b/src/MNH/ice4_fast_rg.f90
@@ -367,10 +367,10 @@ DO JJ = 1, SIZE(GDRY)
   END IF
 END DO
 !$acc end kernels
-!PW: BUG: this is necessary to get correct results (PGI 18.10)
+!PW: BUG: this is necessary to get correct results (PGI 18.10, 19.10)
 ! !$acc update self(GDRY,IGDRY)
 ! !$acc update self(IGDRY)
-! IF(JJ==-999) print *,'PW: IGDRY=',IGDRY,COUNT(GDRY)
+IF(JJ==-999) print *,jj
 ! !$acc kernels
 IF(GDSOFT) THEN
 !$acc kernels
diff --git a/src/MNH/ice4_sedimentation_split.f90 b/src/MNH/ice4_sedimentation_split.f90
index 89b442ffc..9f81ca088 100644
--- a/src/MNH/ice4_sedimentation_split.f90
+++ b/src/MNH/ice4_sedimentation_split.f90
@@ -658,13 +658,19 @@ if (JK==-9999) print *,'PW: ISEDIM=',ISEDIM
     ENDIF
   ENDDO
   ZREMAINT(:,:) = ZREMAINT(:,:) - ZMAX_TSTEP(:,:)
-!$acc loop independent private(ZMRCHANGE)
+!PW:BUG PGI: do not set independent (wrong results with PGI 19.10)
+!acc loop independent private(ZMRCHANGE)
   DO JK = KKTB , KKTE
     ZMRCHANGE(:,:) = ZMAX_TSTEP(:,:) * POORHODZ(:,:,JK)*(ZWSED(:,:,JK+KKL)-ZWSED(:,:,JK))
     PRXT(:,:,JK) = PRXT(:,:,JK) + ZMRCHANGE(:,:) + PPRXS(:,:,JK) * ZMAX_TSTEP(:,:)
     PRXS(:,:,JK) = PRXS(:,:,JK) + ZMRCHANGE(:,:) * ZINVTSTEP
   ENDDO
+!$acc end kernels
+!$acc update self(ZWSED,ZMAX_TSTEP,PINPRX)
+!PW: BUG PGI: done on CPU to work around a PGI 19.10 bug
   PINPRX(:,:) = PINPRX(:,:) + ZWSED(:,:,KKB) / XRHOLW * (ZMAX_TSTEP(:,:) * ZINVTSTEP)
+!$acc update device(PINPRX)
+!$acc kernels
   IF (GPRESENT_PFPR) THEN
 !$acc loop independent
     DO JK = KKTB , KKTE
diff --git a/src/MNH/prandtl.f90 b/src/MNH/prandtl.f90
index b5d267575..8ec0361d6 100644
--- a/src/MNH/prandtl.f90
+++ b/src/MNH/prandtl.f90
@@ -611,20 +611,21 @@ call Print_msg( NVERB_WARNING, 'GEN', 'PRANDTL', 'OpenACC: L2D=.T. and KRR=0 not
 #else
     PRED2TH3(:,:,:) = BR_P2(PREDTH1(:,:,:)) +  BR_P2(XCTV)*BR_P2(PBLL_O_E(:,:,:)) * ZTMP2_DEVICE
 #endif
-!$acc end kernels
-!$acc kernels async
+!PW: merge kernels + remove async to prevent compiler crash...(bug PGI 19.10)
+! !$acc end kernels
+! !$acc kernels async
     PRED2TH3(:,:,IKB)=PRED2TH3(:,:,IKB+KKL)
-!$acc end kernels
+! !$acc end kernels
 #endif
 !
-!$acc kernels async
+! !$acc kernels async
     PRED2R3(:,:,:) = 0.
-!$acc end kernels
+! !$acc end kernels
 !
-!$acc kernels async
+! !$acc kernels async
     PRED2THR3(:,:,:) = 0.
 !$acc end kernels
-!$acc wait
+! !$acc wait
 !
   END IF
 !
diff --git a/src/MNH/turb_hor_vw.f90 b/src/MNH/turb_hor_vw.f90
index 8f92eb598..4dbcb63e6 100644
--- a/src/MNH/turb_hor_vw.f90
+++ b/src/MNH/turb_hor_vw.f90
@@ -467,7 +467,7 @@ END IF
 IF (LLES_CALL .AND. KSPLT==1) THEN
   CALL SECOND_MNH(ZTIME1)
 #ifndef MNH_OPENACC
-  CALL LES_MEAN_SUBGRID( MZF(1,IKU,1,MYF(ZFLX)), X_LES_SUBGRID_WV , .TRUE. ) 
+  CALL LES_MEAN_SUBGRID( MZF(1,IKU,1,MYF(ZFLX)), X_LES_SUBGRID_WV , .TRUE. )
   CALL LES_MEAN_SUBGRID( MZF(1,IKU,1,MYF(GZ_V_VW(1,IKU,1,PVM,PDZZ)*ZFLX)), X_LES_RES_ddxa_V_SBG_UaV , .TRUE.)
   CALL LES_MEAN_SUBGRID( MZF(1,IKU,1,MYF(GY_W_VW(1,IKU,1,PWM,PDYY,PDZZ,PDZY)*ZFLX)), X_LES_RES_ddxa_W_SBG_UaW , .TRUE.)
   CALL LES_MEAN_SUBGRID( MXF(GY_M_V(1,IKU,1,PTHLM,PDYY,PDZZ,PDZY)*MZF(1,IKU,1,ZFLX)), &
@@ -486,7 +486,7 @@ IF (LLES_CALL .AND. KSPLT==1) THEN
   !
   CALL MYF_DEVICE(ZFLX,ZTMP1_DEVICE)
   CALL MZF_DEVICE(1,IKU,1,ZTMP1_DEVICE,ZTMP2_DEVICE)
-  CALL LES_MEAN_SUBGRID( ZTMP2_DEVICE, X_LES_SUBGRID_WV , .TRUE. ) 
+  CALL LES_MEAN_SUBGRID( ZTMP2_DEVICE, X_LES_SUBGRID_WV , .TRUE. )
   !
   CALL GZ_V_VW_DEVICE(1,IKU,1,PVM,PDZZ,ZTMP1_DEVICE)
   !$acc kernels
@@ -498,7 +498,7 @@ IF (LLES_CALL .AND. KSPLT==1) THEN
   !
   CALL GY_W_VW_DEVICE(1,IKU,1,PWM,PDYY,PDZZ,PDZY,ZTMP1_DEVICE)
   !$acc kernels
-  ZTMP2_DEVICE = ZTMP1_DEVICE*ZFLX
+  ZTMP2_DEVICE(:,:,:) = ZTMP1_DEVICE(:,:,:)*ZFLX(:,:,:)
   !$acc end kernels
   CALL MYF_DEVICE(ZTMP2_DEVICE,ZTMP1_DEVICE)
   CALL MZF_DEVICE(1,IKU,1,ZTMP1_DEVICE,ZTMP2_DEVICE)
@@ -506,9 +506,12 @@ IF (LLES_CALL .AND. KSPLT==1) THEN
   !
   CALL GY_M_V_DEVICE(1,IKU,1,PTHLM,PDYY,PDZZ,PDZY,ZTMP1_DEVICE)
   CALL MZF_DEVICE(1,IKU,1,ZFLX,ZTMP2_DEVICE)
-  !$acc kernels
-  ZTMP3_DEVICE = ZTMP1_DEVICE*ZTMP2_DEVICE
-  !$acc end kernels
+!PW: kernel removed to work around a PGI 19.10 bug
+!$acc update self(ZTMP1_DEVICE,ZTMP2_DEVICE)
+!   !$acc kernels
+  ZTMP3_DEVICE(:,:,:) = ZTMP1_DEVICE(:,:,:)*ZTMP2_DEVICE(:,:,:)
+!   !$acc end kernels
+!$acc update device(ZTMP3_DEVICE)
   CALL MXF_DEVICE(ZTMP3_DEVICE,ZTMP1_DEVICE)
   CALL LES_MEAN_SUBGRID( ZTMP1_DEVICE, X_LES_RES_ddxa_Thl_SBG_UaW , .TRUE.)
   !
@@ -518,9 +521,12 @@ IF (LLES_CALL .AND. KSPLT==1) THEN
     !
     CALL GY_M_V_DEVICE(1,IKU,1,PRM(:,:,:,1),PDYY,PDZZ,PDZY,ZTMP1_DEVICE)
     CALL MZF_DEVICE(1,IKU,1,ZFLX,ZTMP2_DEVICE)
-    !$acc kernels
-    ZTMP3_DEVICE = ZTMP1_DEVICE*ZTMP2_DEVICE
-    !$acc end kernels
+!PW: kernel removed to work around a PGI 19.10 bug
+!$acc update self(ZTMP1_DEVICE,ZTMP2_DEVICE)
+!     !$acc kernels
+    ZTMP3_DEVICE(:,:,:) = ZTMP1_DEVICE(:,:,:)*ZTMP2_DEVICE(:,:,:)
+!     !$acc end kernels
+!$acc update device(ZTMP3_DEVICE)
     CALL MXF_DEVICE(ZTMP3_DEVICE,ZTMP1_DEVICE)
     CALL LES_MEAN_SUBGRID( ZTMP1_DEVICE, X_LES_RES_ddxa_Rt_SBG_UaW , .TRUE.)
     !
-- 
GitLab