From 93db0e88e79a3915b728971217fe897660f1a118 Mon Sep 17 00:00:00 2001 From: Philippe WAUTELET <philippe.wautelet@aero.obs-mip.fr> Date: Wed, 18 Dec 2019 15:07:28 +0100 Subject: [PATCH] Philippe 18/12/2019: OpenACC: workaround PGI bugs --- src/MNH/advecuvw_rk.f90 | 15 ++++++++------- src/MNH/ice4_fast_rg.f90 | 4 ++-- src/MNH/ice4_sedimentation_split.f90 | 8 +++++++- src/MNH/prandtl.f90 | 15 ++++++++------- src/MNH/turb_hor_vw.f90 | 24 +++++++++++++++--------- 5 files changed, 40 insertions(+), 26 deletions(-) diff --git a/src/MNH/advecuvw_rk.f90 b/src/MNH/advecuvw_rk.f90 index 34153fbe7..62b012caa 100644 --- a/src/MNH/advecuvw_rk.f90 +++ b/src/MNH/advecuvw_rk.f90 @@ -256,9 +256,6 @@ allocate(ZWT(SIZE(PUT,1),SIZE(PUT,2),SIZE(PUT,3))) #endif #ifdef MNH_OPENACC -!Data zone necessary to work around a bug seen with PGI at least up to 18.10 -!If not, update on a section of ZRUS will update a section of the total size of ZRUS -!$acc data present(ZRUS,ZRVS,ZRWS) CALL INIT_ON_HOST_AND_DEVICE(ZUT,4e99,'ADVECUVW_RK::ZUT') CALL INIT_ON_HOST_AND_DEVICE(ZVT,5e99,'ADVECUVW_RK::ZVT') CALL INIT_ON_HOST_AND_DEVICE(ZWT,6e99,'ADVECUVW_RK::ZWT') @@ -426,10 +423,15 @@ INBVAR = 3 CALL INIT_HALO2_ll(TZHALO2MT_ll,INBVAR,SIZE(PUT,1),SIZE(PUT,2),SIZE(PWT,3)) ! !$acc kernels -ZRUS = 0. -ZRVS = 0. -ZRWS = 0. +ZRUS(:, :, :, : ) = 0. +ZRVS(:, :, :, : ) = 0. +ZRWS(:, :, :, : ) = 0. !$acc end kernels + +!Necessary to work around a PGI bug (19.10) +!because following update ZRUS(:,:,:,JS) are done on the WHOLE array +!$acc update self(ZRUS,ZRVS,ZRWS) + !------------------------------------------------------------------------------- ! !* 3. BEGINNING of Runge-Kutta loop @@ -528,7 +530,6 @@ CALL CLEANLIST_ll(TZFIELDMT_ll) CALL DEL_HALO2_ll(TZHALO2MT_ll) !$acc update self(PRUS_ADV,PRVS_ADV,PRWS_ADV) !------------------------------------------------------------------------------- -!$acc end data ! IF (MPPDB_INITIALIZED) THEN !Check all OUT arrays diff --git a/src/MNH/ice4_fast_rg.f90 b/src/MNH/ice4_fast_rg.f90 index c319ceb3e..7d46ac697 100644 --- a/src/MNH/ice4_fast_rg.f90 +++ b/src/MNH/ice4_fast_rg.f90 @@ -367,10 +367,10 @@ DO JJ = 1, SIZE(GDRY) END IF END DO !$acc end kernels -!PW: BUG: this is necessary to get correct results (PGI 18.10) +!PW: BUG: this is necessary to get correct results (PGI 18.10, 19.10) ! !$acc update self(GDRY,IGDRY) ! !$acc update self(IGDRY) -! IF(JJ==-999) print *,'PW: IGDRY=',IGDRY,COUNT(GDRY) +IF(JJ==-999) print *,jj ! !$acc kernels IF(GDSOFT) THEN !$acc kernels diff --git a/src/MNH/ice4_sedimentation_split.f90 b/src/MNH/ice4_sedimentation_split.f90 index 89b442ffc..9f81ca088 100644 --- a/src/MNH/ice4_sedimentation_split.f90 +++ b/src/MNH/ice4_sedimentation_split.f90 @@ -658,13 +658,19 @@ if (JK==-9999) print *,'PW: ISEDIM=',ISEDIM ENDIF ENDDO ZREMAINT(:,:) = ZREMAINT(:,:) - ZMAX_TSTEP(:,:) -!$acc loop independent private(ZMRCHANGE) +!PW:BUG PGI: do not set independent (wrong results with PGI 19.10) +!acc loop independent private(ZMRCHANGE) DO JK = KKTB , KKTE ZMRCHANGE(:,:) = ZMAX_TSTEP(:,:) * POORHODZ(:,:,JK)*(ZWSED(:,:,JK+KKL)-ZWSED(:,:,JK)) PRXT(:,:,JK) = PRXT(:,:,JK) + ZMRCHANGE(:,:) + PPRXS(:,:,JK) * ZMAX_TSTEP(:,:) PRXS(:,:,JK) = PRXS(:,:,JK) + ZMRCHANGE(:,:) * ZINVTSTEP ENDDO +!$acc end kernels +!$acc update self(ZWSED,ZMAX_TSTEP,PINPRX) +!PW: BUG PGI: done on CPU to work around a PGI 19.10 bug PINPRX(:,:) = PINPRX(:,:) + ZWSED(:,:,KKB) / XRHOLW * (ZMAX_TSTEP(:,:) * ZINVTSTEP) +!$acc update device(PINPRX) +!$acc kernels IF (GPRESENT_PFPR) THEN !$acc loop independent DO JK = KKTB , KKTE diff --git a/src/MNH/prandtl.f90 b/src/MNH/prandtl.f90 index b5d267575..8ec0361d6 100644 --- a/src/MNH/prandtl.f90 +++ b/src/MNH/prandtl.f90 @@ -611,20 +611,21 @@ call Print_msg( NVERB_WARNING, 'GEN', 'PRANDTL', 'OpenACC: L2D=.T. and KRR=0 not #else PRED2TH3(:,:,:) = BR_P2(PREDTH1(:,:,:)) + BR_P2(XCTV)*BR_P2(PBLL_O_E(:,:,:)) * ZTMP2_DEVICE #endif -!$acc end kernels -!$acc kernels async +!PW: merge kernels + remove async to prevent compiler crash...(bug PGI 19.10) +! !$acc end kernels +! !$acc kernels async PRED2TH3(:,:,IKB)=PRED2TH3(:,:,IKB+KKL) -!$acc end kernels +! !$acc end kernels #endif ! -!$acc kernels async +! !$acc kernels async PRED2R3(:,:,:) = 0. -!$acc end kernels +! !$acc end kernels ! -!$acc kernels async +! !$acc kernels async PRED2THR3(:,:,:) = 0. !$acc end kernels -!$acc wait +! !$acc wait ! END IF ! diff --git a/src/MNH/turb_hor_vw.f90 b/src/MNH/turb_hor_vw.f90 index 8f92eb598..4dbcb63e6 100644 --- a/src/MNH/turb_hor_vw.f90 +++ b/src/MNH/turb_hor_vw.f90 @@ -467,7 +467,7 @@ END IF IF (LLES_CALL .AND. KSPLT==1) THEN CALL SECOND_MNH(ZTIME1) #ifndef MNH_OPENACC - CALL LES_MEAN_SUBGRID( MZF(1,IKU,1,MYF(ZFLX)), X_LES_SUBGRID_WV , .TRUE. ) + CALL LES_MEAN_SUBGRID( MZF(1,IKU,1,MYF(ZFLX)), X_LES_SUBGRID_WV , .TRUE. ) CALL LES_MEAN_SUBGRID( MZF(1,IKU,1,MYF(GZ_V_VW(1,IKU,1,PVM,PDZZ)*ZFLX)), X_LES_RES_ddxa_V_SBG_UaV , .TRUE.) CALL LES_MEAN_SUBGRID( MZF(1,IKU,1,MYF(GY_W_VW(1,IKU,1,PWM,PDYY,PDZZ,PDZY)*ZFLX)), X_LES_RES_ddxa_W_SBG_UaW , .TRUE.) CALL LES_MEAN_SUBGRID( MXF(GY_M_V(1,IKU,1,PTHLM,PDYY,PDZZ,PDZY)*MZF(1,IKU,1,ZFLX)), & @@ -486,7 +486,7 @@ IF (LLES_CALL .AND. KSPLT==1) THEN ! CALL MYF_DEVICE(ZFLX,ZTMP1_DEVICE) CALL MZF_DEVICE(1,IKU,1,ZTMP1_DEVICE,ZTMP2_DEVICE) - CALL LES_MEAN_SUBGRID( ZTMP2_DEVICE, X_LES_SUBGRID_WV , .TRUE. ) + CALL LES_MEAN_SUBGRID( ZTMP2_DEVICE, X_LES_SUBGRID_WV , .TRUE. ) ! CALL GZ_V_VW_DEVICE(1,IKU,1,PVM,PDZZ,ZTMP1_DEVICE) !$acc kernels @@ -498,7 +498,7 @@ IF (LLES_CALL .AND. KSPLT==1) THEN ! CALL GY_W_VW_DEVICE(1,IKU,1,PWM,PDYY,PDZZ,PDZY,ZTMP1_DEVICE) !$acc kernels - ZTMP2_DEVICE = ZTMP1_DEVICE*ZFLX + ZTMP2_DEVICE(:,:,:) = ZTMP1_DEVICE(:,:,:)*ZFLX(:,:,:) !$acc end kernels CALL MYF_DEVICE(ZTMP2_DEVICE,ZTMP1_DEVICE) CALL MZF_DEVICE(1,IKU,1,ZTMP1_DEVICE,ZTMP2_DEVICE) @@ -506,9 +506,12 @@ IF (LLES_CALL .AND. KSPLT==1) THEN ! CALL GY_M_V_DEVICE(1,IKU,1,PTHLM,PDYY,PDZZ,PDZY,ZTMP1_DEVICE) CALL MZF_DEVICE(1,IKU,1,ZFLX,ZTMP2_DEVICE) - !$acc kernels - ZTMP3_DEVICE = ZTMP1_DEVICE*ZTMP2_DEVICE - !$acc end kernels +!PW: kernel removed to work around a PGI 19.10 bug +!$acc update self(ZTMP1_DEVICE,ZTMP2_DEVICE) +! !$acc kernels + ZTMP3_DEVICE(:,:,:) = ZTMP1_DEVICE(:,:,:)*ZTMP2_DEVICE(:,:,:) +! !$acc end kernels +!$acc update device(ZTMP3_DEVICE) CALL MXF_DEVICE(ZTMP3_DEVICE,ZTMP1_DEVICE) CALL LES_MEAN_SUBGRID( ZTMP1_DEVICE, X_LES_RES_ddxa_Thl_SBG_UaW , .TRUE.) ! @@ -518,9 +521,12 @@ IF (LLES_CALL .AND. KSPLT==1) THEN ! CALL GY_M_V_DEVICE(1,IKU,1,PRM(:,:,:,1),PDYY,PDZZ,PDZY,ZTMP1_DEVICE) CALL MZF_DEVICE(1,IKU,1,ZFLX,ZTMP2_DEVICE) - !$acc kernels - ZTMP3_DEVICE = ZTMP1_DEVICE*ZTMP2_DEVICE - !$acc end kernels +!PW: kernel removed to work around a PGI 19.10 bug +!$acc update self(ZTMP1_DEVICE,ZTMP2_DEVICE) +! !$acc kernels + ZTMP3_DEVICE(:,:,:) = ZTMP1_DEVICE(:,:,:)*ZTMP2_DEVICE(:,:,:) +! !$acc end kernels +!$acc update device(ZTMP3_DEVICE) CALL MXF_DEVICE(ZTMP3_DEVICE,ZTMP1_DEVICE) CALL LES_MEAN_SUBGRID( ZTMP1_DEVICE, X_LES_RES_ddxa_Rt_SBG_UaW , .TRUE.) ! -- GitLab