Skip to content
Snippets Groups Projects
Commit b9a808ee authored by ESCOBAR MUNOZ Juan's avatar ESCOBAR MUNOZ Juan
Browse files

Juan 20/03/2023:ZSOLVER/turb_hor_uv.f90, Bypass Nvhpc BUG <-> zflx partially...

Juan 20/03/2023:ZSOLVER/turb_hor_uv.f90, Bypass Nvhpc BUG <->  zflx partially present/error 700: Illegal address during kernel execution <-> remove "acc loop ..." & replace DO CONCURRENT -> mnh_do_conccurent , better perf
parent 60c071be
No related branches found
No related tags found
No related merge requests found
......@@ -316,16 +316,13 @@ IKB = 1+JPVEXT
IKE = SIZE(PUM,3)-JPVEXT
!
!$acc kernels
#ifdef MNH_COMPILER_NVHPC
!$acc loop independent collapse(2)
#endif
DO CONCURRENT ( JI=1:JIU,JJ=1:JJU )
!$mnh_do_concurrent ( JI=1:JIU,JJ=1:JJU )
#ifndef MNH_BITREP
ZDIRSINZW(JI,JJ) = SQRT( 1. - PDIRCOSZW(JI,JJ)**2 )
#else
ZDIRSINZW(JI,JJ) = SQRT( 1. - BR_P2(PDIRCOSZW(JI,JJ)) )
#endif
END DO
!$mnh_end_do()
!$acc end kernels
!
#ifndef MNH_OPENACC
......@@ -356,12 +353,9 @@ CALL MXM_DEVICE(PK,ZTMP1_DEVICE)
CALL MYM_DEVICE(ZTMP1_DEVICE,ZTMP2_DEVICE)
IF (.NOT. L2D) THEN
!$acc kernels
#ifdef MNH_COMPILER_NVHPC
!$acc loop independent collapse(3)
#endif
DO CONCURRENT ( JI=1:JIU,JJ=1:JJU,JK=1:JKU)
!$mnh_do_concurrent ( JI=1:JIU,JJ=1:JJU,JK=1:JKU)
ZFLX(JI,JJ,JK)= - XCMFS * ZTMP2_DEVICE(JI,JJ,JK) * (GY_U_UV_PUM(JI,JJ,JK) + GX_V_UV_PVM(JI,JJ,JK))
END DO !CONCURRENT
!$mnh_end_do() !CONCURRENT
!$acc end kernels
ELSE
!$acc kernels present_cr(ZFLX)
......@@ -465,7 +459,7 @@ ZFLX(:,:,IKB) = - XCMFS * ZTMP2_DEVICE(:,:,1) * ( ZTMP5_DEVICE(:,:,1) + ZTMP6_DE
#endif
!
! extrapolates this flux under the ground with the surface flux
!$acc parallel present_cr(ZFLX)
!$acc kernels present_cr(ZFLX)
#ifndef MNH_BITREP
!$mnh_expand_array(JI=1:JIU,JJ=1:JJU)
ZFLX(:,:,IKB-1) = &
......@@ -480,7 +474,10 @@ ZFLX(:,:,IKB) = - XCMFS * ZTMP2_DEVICE(:,:,1) * ( ZTMP5_DEVICE(:,:,1) + ZTMP6_DE
+PVSLOPEM(:,:) * (PCOSSLOPE(:,:)**2 - PSINSLOPE(:,:)**2) * ZDIRSINZW(:,:) )
!$mnh_end_expand_array()
#else
#ifdef MNH_COMPILER_NVHPC
!PW: BUG: commented 'acc loop independent collapse(2)' to workaround compiler bug (NVHPC 21.1)
!$mnh_undef(OPENACC)
#endif
!$mnh_expand_array(JI=1:JIU,JJ=1:JJU)
ZFLX(:,:,IKB-1) = &
PTAU11M(:,:) * PCOSSLOPE(:,:) * PSINSLOPE(:,:) * BR_P2(PDIRCOSZW(:,:)) &
......@@ -493,8 +490,11 @@ ZFLX(:,:,IKB) = - XCMFS * ZTMP2_DEVICE(:,:,1) * ( ZTMP5_DEVICE(:,:,1) + ZTMP6_DE
PDIRCOSZW(:,:) * ZDIRSINZW(:,:) &
+PVSLOPEM(:,:) * (BR_P2(PCOSSLOPE(:,:)) - BR_P2(PSINSLOPE(:,:))) * ZDIRSINZW(:,:) )
!$mnh_end_expand_array()
#ifdef MNH_COMPILER_NVHPC
!$mnh_define(OPENACC)
#endif
#endif
!$acc end parallel
!$acc end kernels
!
#ifndef MNH_OPENACC
ZFLX(:,:,IKB-1:IKB-1) = 2. * MXM( MYM( ZFLX(:,:,IKB-1:IKB-1) ) ) &
......@@ -542,69 +542,48 @@ END IF
#else
CALL MYM_DEVICE(PRHODJ,ZTMP1_DEVICE)
!$acc kernels
#ifdef MNH_COMPILER_NVHPC
!$acc loop independent collapse(3)
#endif
DO CONCURRENT ( JI=1:JIU,JJ=1:JJU,JK=1:JKU)
!$mnh_do_concurrent ( JI=1:JIU,JJ=1:JJU,JK=1:JKU)
ZTMP2_DEVICE(JI,JJ,JK) = ZTMP1_DEVICE(JI,JJ,JK) * PINV_PDYY(JI,JJ,JK)
END DO !CONCURRENT
!$mnh_end_do() !CONCURRENT
!$acc end kernels
CALL MXM_DEVICE(ZTMP2_DEVICE,ZTMP1_DEVICE)
!$acc kernels
#ifdef MNH_COMPILER_NVHPC
!$acc loop independent collapse(3)
#endif
DO CONCURRENT ( JI=1:JIU,JJ=1:JJU,JK=1:JKU)
!$mnh_do_concurrent ( JI=1:JIU,JJ=1:JJU,JK=1:JKU)
ZTMP2_DEVICE(JI,JJ,JK) = ZFLX(JI,JJ,JK) * ZTMP1_DEVICE(JI,JJ,JK)
END DO !CONCURRENT
!$mnh_end_do() !CONCURRENT
!$acc end kernels
CALL DYF_DEVICE(ZTMP2_DEVICE,ZTMP1_DEVICE)
IF (.NOT. LFLAT) THEN
CALL MZM_DEVICE(ZFLX,ZTMP2_DEVICE)
CALL MZM_DEVICE(PDYY,ZTMP3_DEVICE)
!$acc kernels
#ifdef MNH_COMPILER_NVHPC
!$acc loop independent collapse(3)
#endif
DO CONCURRENT ( JI=1:JIU,JJ=1:JJU,JK=1:JKU)
!$mnh_do_concurrent ( JI=1:JIU,JJ=1:JJU,JK=1:JKU)
ZTMP4_DEVICE(JI,JJ,JK) = PDZY(JI,JJ,JK)/ZTMP3_DEVICE(JI,JJ,JK)
END DO !CONCURRENT
!$mnh_end_do() !CONCURRENT
!$acc end kernels
CALL MXM_DEVICE(ZTMP4_DEVICE,ZTMP5_DEVICE)
!$acc kernels
#ifdef MNH_COMPILER_NVHPC
!$acc loop independent collapse(3)
#endif
DO CONCURRENT ( JI=1:JIU,JJ=1:JJU,JK=1:JKU)
!$mnh_do_concurrent ( JI=1:JIU,JJ=1:JJU,JK=1:JKU)
ZTMP4_DEVICE(JI,JJ,JK) = ZTMP2_DEVICE(JI,JJ,JK)*ZTMP5_DEVICE(JI,JJ,JK)
END DO !CONCURRENT
!$mnh_end_do() !CONCURRENT
!$acc end kernels
CALL MYF_DEVICE(ZTMP4_DEVICE,ZTMP2_DEVICE)
!$acc kernels
#ifdef MNH_COMPILER_NVHPC
!$acc loop independent collapse(3)
#endif
DO CONCURRENT ( JI=1:JIU,JJ=1:JJU,JK=1:JKU)
!$mnh_do_concurrent ( JI=1:JIU,JJ=1:JJU,JK=1:JKU)
ZTMP3_DEVICE(JI,JJ,JK) = PMZM_PRHODJ(JI,JJ,JK) * PINV_PDZZ(JI,JJ,JK)
END DO !CONCURRENT
!$mnh_end_do() !CONCURRENT
!$acc end kernels
CALL MXM_DEVICE(ZTMP3_DEVICE,ZTMP4_DEVICE)
!$acc kernels
#ifdef MNH_COMPILER_NVHPC
!$acc loop independent collapse(3)
#endif
DO CONCURRENT ( JI=1:JIU,JJ=1:JJU,JK=1:JKU)
!$mnh_do_concurrent ( JI=1:JIU,JJ=1:JJU,JK=1:JKU)
ZTMP5_DEVICE(JI,JJ,JK) = ZTMP2_DEVICE(JI,JJ,JK)*ZTMP4_DEVICE(JI,JJ,JK)
END DO !CONCURRENT
!$mnh_end_do() !CONCURRENT
!$acc end kernels
CALL DZF_DEVICE( ZTMP5_DEVICE, ZTMP3_DEVICE )
!$acc kernels
#ifdef MNH_COMPILER_NVHPC
!$acc loop independent collapse(3)
#endif
DO CONCURRENT ( JI=1:JIU,JJ=1:JJU,JK=1:JKU)
!$mnh_do_concurrent ( JI=1:JIU,JJ=1:JJU,JK=1:JKU)
PRUS(JI,JJ,JK) = PRUS(JI,JJ,JK) - ZTMP1_DEVICE(JI,JJ,JK) + ZTMP3_DEVICE(JI,JJ,JK)
END DO !CONCURRENT
!$mnh_end_do() !CONCURRENT
!$acc end kernels
ELSE
!$acc kernels present_cr(PRUS)
......@@ -626,69 +605,48 @@ END IF
#else
CALL MXM_DEVICE(PRHODJ,ZTMP1_DEVICE)
!$acc kernels
#ifdef MNH_COMPILER_NVHPC
!$acc loop independent collapse(3)
#endif
DO CONCURRENT ( JI=1:JIU,JJ=1:JJU,JK=1:JKU)
!$mnh_do_concurrent ( JI=1:JIU,JJ=1:JJU,JK=1:JKU)
ZTMP2_DEVICE(JI,JJ,JK) = ZTMP1_DEVICE(JI,JJ,JK) * PINV_PDXX(JI,JJ,JK)
END DO !CONCURRENT
!$mnh_end_do() !CONCURRENT
!$acc end kernels
CALL MYM_DEVICE(ZTMP2_DEVICE,ZTMP1_DEVICE)
!$acc kernels
#ifdef MNH_COMPILER_NVHPC
!$acc loop independent collapse(3)
#endif
DO CONCURRENT ( JI=1:JIU,JJ=1:JJU,JK=1:JKU)
!$mnh_do_concurrent ( JI=1:JIU,JJ=1:JJU,JK=1:JKU)
ZTMP2_DEVICE(JI,JJ,JK) = ZFLX(JI,JJ,JK) * ZTMP1_DEVICE(JI,JJ,JK)
END DO !CONCURRENT
!$mnh_end_do() !CONCURRENT
!$acc end kernels
CALL DXF_DEVICE(ZTMP2_DEVICE,ZTMP1_DEVICE)
IF (.NOT. LFLAT) THEN
CALL MZM_DEVICE(ZFLX,ZTMP2_DEVICE)
CALL MZM_DEVICE(PDXX,ZTMP3_DEVICE)
!$acc kernels
#ifdef MNH_COMPILER_NVHPC
!$acc loop independent collapse(3)
#endif
DO CONCURRENT ( JI=1:JIU,JJ=1:JJU,JK=1:JKU)
!$mnh_do_concurrent ( JI=1:JIU,JJ=1:JJU,JK=1:JKU)
ZTMP4_DEVICE(JI,JJ,JK) = PDZX(JI,JJ,JK)/ZTMP3_DEVICE(JI,JJ,JK)
END DO !CONCURRENT
!$mnh_end_do() !CONCURRENT
!$acc end kernels
CALL MYM_DEVICE(ZTMP4_DEVICE,ZTMP5_DEVICE)
!$acc kernels
#ifdef MNH_COMPILER_NVHPC
!$acc loop independent collapse(3)
#endif
DO CONCURRENT ( JI=1:JIU,JJ=1:JJU,JK=1:JKU)
!$mnh_do_concurrent ( JI=1:JIU,JJ=1:JJU,JK=1:JKU)
ZTMP4_DEVICE(JI,JJ,JK) = ZTMP2_DEVICE(JI,JJ,JK)*ZTMP5_DEVICE(JI,JJ,JK)
END DO !CONCURRENT
!$mnh_end_do() !CONCURRENT
!$acc end kernels
CALL MXF_DEVICE(ZTMP4_DEVICE,ZTMP2_DEVICE)
!$acc kernels
#ifdef MNH_COMPILER_NVHPC
!$acc loop independent collapse(3)
#endif
DO CONCURRENT ( JI=1:JIU,JJ=1:JJU,JK=1:JKU)
!$mnh_do_concurrent ( JI=1:JIU,JJ=1:JJU,JK=1:JKU)
ZTMP3_DEVICE(JI,JJ,JK) = PMZM_PRHODJ(JI,JJ,JK) * PINV_PDZZ(JI,JJ,JK)
END DO !CONCURRENT
!$mnh_end_do() !CONCURRENT
!$acc end kernels
CALL MYM_DEVICE(ZTMP3_DEVICE,ZTMP4_DEVICE)
!$acc kernels
#ifdef MNH_COMPILER_NVHPC
!$acc loop independent collapse(3)
#endif
DO CONCURRENT ( JI=1:JIU,JJ=1:JJU,JK=1:JKU)
!$mnh_do_concurrent ( JI=1:JIU,JJ=1:JJU,JK=1:JKU)
ZTMP5_DEVICE(JI,JJ,JK) = ZTMP2_DEVICE(JI,JJ,JK)*ZTMP4_DEVICE(JI,JJ,JK)
END DO !CONCURRENT
!$mnh_end_do() !CONCURRENT
!$acc end kernels
CALL DZF_DEVICE( ZTMP5_DEVICE, ZTMP3_DEVICE )
!$acc kernels
#ifdef MNH_COMPILER_NVHPC
!$acc loop independent collapse(3)
#endif
DO CONCURRENT ( JI=1:JIU,JJ=1:JJU,JK=1:JKU)
!$mnh_do_concurrent ( JI=1:JIU,JJ=1:JJU,JK=1:JKU)
PRVS(JI,JJ,JK) = PRVS(JI,JJ,JK) - ZTMP1_DEVICE(JI,JJ,JK) + ZTMP3_DEVICE(JI,JJ,JK)
END DO !CONCURRENT
!$mnh_end_do() !CONCURRENT
!$acc end kernels
ELSE
!$acc kernels
......@@ -712,12 +670,9 @@ IF (KSPLT==1) THEN
#else
IF (.NOT. L2D) THEN
!$acc kernels
#ifdef MNH_COMPILER_NVHPC
!$acc loop independent collapse(3)
#endif
DO CONCURRENT ( JI=1:JIU,JJ=1:JJU,JK=1:JKU)
!$mnh_do_concurrent ( JI=1:JIU,JJ=1:JJU,JK=1:JKU)
ZTMP1_DEVICE(JI,JJ,JK) = ZFLX(JI,JJ,JK) * (GY_U_UV_PUM(JI,JJ,JK) + GX_V_UV_PVM(JI,JJ,JK))
END DO !CONCURRENT
!$mnh_end_do() !CONCURRENT
!$acc end kernels
ELSE
!$acc kernels present_cr(ZTMP1_DEVICE)
......@@ -727,12 +682,9 @@ IF (KSPLT==1) THEN
CALL MYF_DEVICE(ZTMP1_DEVICE,ZTMP2_DEVICE)
CALL MXF_DEVICE(ZTMP2_DEVICE,ZTMP1_DEVICE)
!$acc kernels
#ifdef MNH_COMPILER_NVHPC
!$acc loop independent collapse(3)
#endif
DO CONCURRENT ( JI=1:JIU,JJ=1:JJU,JK=1:JKU)
!$mnh_do_concurrent ( JI=1:JIU,JJ=1:JJU,JK=1:JKU)
ZWORK(JI,JJ,JK) = - ZTMP1_DEVICE(JI,JJ,JK)
END DO !CONCURRENT
!$mnh_end_do() !CONCURRENT
!$acc end kernels
#endif
!
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment