diff --git a/src/MNH/turb_hor_uv.f90 b/src/MNH/turb_hor_uv.f90 index f8aff35b3f6bedd7cf6309c2ebab845e306ce294..b9d578a5941164de1f73be6e467f3197fe570942 100644 --- a/src/MNH/turb_hor_uv.f90 +++ b/src/MNH/turb_hor_uv.f90 @@ -316,16 +316,13 @@ IKB = 1+JPVEXT IKE = SIZE(PUM,3)-JPVEXT ! !$acc kernels -#ifdef MNH_COMPILER_NVHPC -!$acc loop independent collapse(2) -#endif -DO CONCURRENT ( JI=1:JIU,JJ=1:JJU ) +!$mnh_do_concurrent ( JI=1:JIU,JJ=1:JJU ) #ifndef MNH_BITREP ZDIRSINZW(JI,JJ) = SQRT( 1. - PDIRCOSZW(JI,JJ)**2 ) #else ZDIRSINZW(JI,JJ) = SQRT( 1. - BR_P2(PDIRCOSZW(JI,JJ)) ) #endif -END DO +!$mnh_end_do() !$acc end kernels ! #ifndef MNH_OPENACC @@ -356,12 +353,9 @@ CALL MXM_DEVICE(PK,ZTMP1_DEVICE) CALL MYM_DEVICE(ZTMP1_DEVICE,ZTMP2_DEVICE) IF (.NOT. L2D) THEN !$acc kernels -#ifdef MNH_COMPILER_NVHPC - !$acc loop independent collapse(3) -#endif - DO CONCURRENT ( JI=1:JIU,JJ=1:JJU,JK=1:JKU) + !$mnh_do_concurrent ( JI=1:JIU,JJ=1:JJU,JK=1:JKU) ZFLX(JI,JJ,JK)= - XCMFS * ZTMP2_DEVICE(JI,JJ,JK) * (GY_U_UV_PUM(JI,JJ,JK) + GX_V_UV_PVM(JI,JJ,JK)) - END DO !CONCURRENT + !$mnh_end_do() !CONCURRENT !$acc end kernels ELSE !$acc kernels present_cr(ZFLX) @@ -465,7 +459,7 @@ ZFLX(:,:,IKB) = - XCMFS * ZTMP2_DEVICE(:,:,1) * ( ZTMP5_DEVICE(:,:,1) + ZTMP6_DE #endif ! ! extrapolates this flux under the ground with the surface flux -!$acc parallel present_cr(ZFLX) +!$acc kernels present_cr(ZFLX) #ifndef MNH_BITREP !$mnh_expand_array(JI=1:JIU,JJ=1:JJU) ZFLX(:,:,IKB-1) = & @@ -480,7 +474,10 @@ ZFLX(:,:,IKB) = - XCMFS * ZTMP2_DEVICE(:,:,1) * ( ZTMP5_DEVICE(:,:,1) + ZTMP6_DE +PVSLOPEM(:,:) * (PCOSSLOPE(:,:)**2 - PSINSLOPE(:,:)**2) * ZDIRSINZW(:,:) ) !$mnh_end_expand_array() #else +#ifdef MNH_COMPILER_NVHPC !PW: BUG: commented 'acc loop independent collapse(2)' to workaround compiler bug (NVHPC 21.1) +!$mnh_undef(OPENACC) +#endif !$mnh_expand_array(JI=1:JIU,JJ=1:JJU) ZFLX(:,:,IKB-1) = & PTAU11M(:,:) * PCOSSLOPE(:,:) * PSINSLOPE(:,:) * BR_P2(PDIRCOSZW(:,:)) & @@ -493,8 +490,11 @@ ZFLX(:,:,IKB) = - XCMFS * ZTMP2_DEVICE(:,:,1) * ( ZTMP5_DEVICE(:,:,1) + ZTMP6_DE PDIRCOSZW(:,:) * ZDIRSINZW(:,:) & +PVSLOPEM(:,:) * (BR_P2(PCOSSLOPE(:,:)) - BR_P2(PSINSLOPE(:,:))) * ZDIRSINZW(:,:) ) !$mnh_end_expand_array() +#ifdef MNH_COMPILER_NVHPC +!$mnh_define(OPENACC) +#endif #endif -!$acc end parallel +!$acc end kernels ! #ifndef MNH_OPENACC ZFLX(:,:,IKB-1:IKB-1) = 2. * MXM( MYM( ZFLX(:,:,IKB-1:IKB-1) ) ) & @@ -542,69 +542,48 @@ END IF #else CALL MYM_DEVICE(PRHODJ,ZTMP1_DEVICE) !$acc kernels -#ifdef MNH_COMPILER_NVHPC -!$acc loop independent collapse(3) -#endif -DO CONCURRENT ( JI=1:JIU,JJ=1:JJU,JK=1:JKU) +!$mnh_do_concurrent ( JI=1:JIU,JJ=1:JJU,JK=1:JKU) ZTMP2_DEVICE(JI,JJ,JK) = ZTMP1_DEVICE(JI,JJ,JK) * PINV_PDYY(JI,JJ,JK) -END DO !CONCURRENT +!$mnh_end_do() !CONCURRENT !$acc end kernels CALL MXM_DEVICE(ZTMP2_DEVICE,ZTMP1_DEVICE) !$acc kernels -#ifdef MNH_COMPILER_NVHPC -!$acc loop independent collapse(3) -#endif -DO CONCURRENT ( JI=1:JIU,JJ=1:JJU,JK=1:JKU) +!$mnh_do_concurrent ( JI=1:JIU,JJ=1:JJU,JK=1:JKU) ZTMP2_DEVICE(JI,JJ,JK) = ZFLX(JI,JJ,JK) * ZTMP1_DEVICE(JI,JJ,JK) -END DO !CONCURRENT +!$mnh_end_do() !CONCURRENT !$acc end kernels CALL DYF_DEVICE(ZTMP2_DEVICE,ZTMP1_DEVICE) IF (.NOT. LFLAT) THEN CALL MZM_DEVICE(ZFLX,ZTMP2_DEVICE) CALL MZM_DEVICE(PDYY,ZTMP3_DEVICE) !$acc kernels -#ifdef MNH_COMPILER_NVHPC - !$acc loop independent collapse(3) -#endif - DO CONCURRENT ( JI=1:JIU,JJ=1:JJU,JK=1:JKU) + !$mnh_do_concurrent ( JI=1:JIU,JJ=1:JJU,JK=1:JKU) ZTMP4_DEVICE(JI,JJ,JK) = PDZY(JI,JJ,JK)/ZTMP3_DEVICE(JI,JJ,JK) - END DO !CONCURRENT + !$mnh_end_do() !CONCURRENT !$acc end kernels CALL MXM_DEVICE(ZTMP4_DEVICE,ZTMP5_DEVICE) !$acc kernels -#ifdef MNH_COMPILER_NVHPC - !$acc loop independent collapse(3) -#endif - DO CONCURRENT ( JI=1:JIU,JJ=1:JJU,JK=1:JKU) + !$mnh_do_concurrent ( JI=1:JIU,JJ=1:JJU,JK=1:JKU) ZTMP4_DEVICE(JI,JJ,JK) = ZTMP2_DEVICE(JI,JJ,JK)*ZTMP5_DEVICE(JI,JJ,JK) - END DO !CONCURRENT + !$mnh_end_do() !CONCURRENT !$acc end kernels CALL MYF_DEVICE(ZTMP4_DEVICE,ZTMP2_DEVICE) !$acc kernels -#ifdef MNH_COMPILER_NVHPC - !$acc loop independent collapse(3) -#endif - DO CONCURRENT ( JI=1:JIU,JJ=1:JJU,JK=1:JKU) + !$mnh_do_concurrent ( JI=1:JIU,JJ=1:JJU,JK=1:JKU) ZTMP3_DEVICE(JI,JJ,JK) = PMZM_PRHODJ(JI,JJ,JK) * PINV_PDZZ(JI,JJ,JK) - END DO !CONCURRENT + !$mnh_end_do() !CONCURRENT !$acc end kernels CALL MXM_DEVICE(ZTMP3_DEVICE,ZTMP4_DEVICE) !$acc kernels -#ifdef MNH_COMPILER_NVHPC - !$acc loop independent collapse(3) -#endif - DO CONCURRENT ( JI=1:JIU,JJ=1:JJU,JK=1:JKU) + !$mnh_do_concurrent ( JI=1:JIU,JJ=1:JJU,JK=1:JKU) ZTMP5_DEVICE(JI,JJ,JK) = ZTMP2_DEVICE(JI,JJ,JK)*ZTMP4_DEVICE(JI,JJ,JK) - END DO !CONCURRENT + !$mnh_end_do() !CONCURRENT !$acc end kernels CALL DZF_DEVICE( ZTMP5_DEVICE, ZTMP3_DEVICE ) !$acc kernels -#ifdef MNH_COMPILER_NVHPC - !$acc loop independent collapse(3) -#endif - DO CONCURRENT ( JI=1:JIU,JJ=1:JJU,JK=1:JKU) + !$mnh_do_concurrent ( JI=1:JIU,JJ=1:JJU,JK=1:JKU) PRUS(JI,JJ,JK) = PRUS(JI,JJ,JK) - ZTMP1_DEVICE(JI,JJ,JK) + ZTMP3_DEVICE(JI,JJ,JK) - END DO !CONCURRENT + !$mnh_end_do() !CONCURRENT !$acc end kernels ELSE !$acc kernels present_cr(PRUS) @@ -626,69 +605,48 @@ END IF #else CALL MXM_DEVICE(PRHODJ,ZTMP1_DEVICE) !$acc kernels -#ifdef MNH_COMPILER_NVHPC -!$acc loop independent collapse(3) -#endif -DO CONCURRENT ( JI=1:JIU,JJ=1:JJU,JK=1:JKU) +!$mnh_do_concurrent ( JI=1:JIU,JJ=1:JJU,JK=1:JKU) ZTMP2_DEVICE(JI,JJ,JK) = ZTMP1_DEVICE(JI,JJ,JK) * PINV_PDXX(JI,JJ,JK) -END DO !CONCURRENT +!$mnh_end_do() !CONCURRENT !$acc end kernels CALL MYM_DEVICE(ZTMP2_DEVICE,ZTMP1_DEVICE) !$acc kernels -#ifdef MNH_COMPILER_NVHPC -!$acc loop independent collapse(3) -#endif -DO CONCURRENT ( JI=1:JIU,JJ=1:JJU,JK=1:JKU) +!$mnh_do_concurrent ( JI=1:JIU,JJ=1:JJU,JK=1:JKU) ZTMP2_DEVICE(JI,JJ,JK) = ZFLX(JI,JJ,JK) * ZTMP1_DEVICE(JI,JJ,JK) -END DO !CONCURRENT +!$mnh_end_do() !CONCURRENT !$acc end kernels CALL DXF_DEVICE(ZTMP2_DEVICE,ZTMP1_DEVICE) IF (.NOT. LFLAT) THEN CALL MZM_DEVICE(ZFLX,ZTMP2_DEVICE) CALL MZM_DEVICE(PDXX,ZTMP3_DEVICE) !$acc kernels -#ifdef MNH_COMPILER_NVHPC - !$acc loop independent collapse(3) -#endif - DO CONCURRENT ( JI=1:JIU,JJ=1:JJU,JK=1:JKU) + !$mnh_do_concurrent ( JI=1:JIU,JJ=1:JJU,JK=1:JKU) ZTMP4_DEVICE(JI,JJ,JK) = PDZX(JI,JJ,JK)/ZTMP3_DEVICE(JI,JJ,JK) - END DO !CONCURRENT + !$mnh_end_do() !CONCURRENT !$acc end kernels CALL MYM_DEVICE(ZTMP4_DEVICE,ZTMP5_DEVICE) !$acc kernels -#ifdef MNH_COMPILER_NVHPC - !$acc loop independent collapse(3) -#endif - DO CONCURRENT ( JI=1:JIU,JJ=1:JJU,JK=1:JKU) + !$mnh_do_concurrent ( JI=1:JIU,JJ=1:JJU,JK=1:JKU) ZTMP4_DEVICE(JI,JJ,JK) = ZTMP2_DEVICE(JI,JJ,JK)*ZTMP5_DEVICE(JI,JJ,JK) - END DO !CONCURRENT + !$mnh_end_do() !CONCURRENT !$acc end kernels CALL MXF_DEVICE(ZTMP4_DEVICE,ZTMP2_DEVICE) !$acc kernels -#ifdef MNH_COMPILER_NVHPC - !$acc loop independent collapse(3) -#endif - DO CONCURRENT ( JI=1:JIU,JJ=1:JJU,JK=1:JKU) + !$mnh_do_concurrent ( JI=1:JIU,JJ=1:JJU,JK=1:JKU) ZTMP3_DEVICE(JI,JJ,JK) = PMZM_PRHODJ(JI,JJ,JK) * PINV_PDZZ(JI,JJ,JK) - END DO !CONCURRENT + !$mnh_end_do() !CONCURRENT !$acc end kernels CALL MYM_DEVICE(ZTMP3_DEVICE,ZTMP4_DEVICE) !$acc kernels -#ifdef MNH_COMPILER_NVHPC - !$acc loop independent collapse(3) -#endif - DO CONCURRENT ( JI=1:JIU,JJ=1:JJU,JK=1:JKU) + !$mnh_do_concurrent ( JI=1:JIU,JJ=1:JJU,JK=1:JKU) ZTMP5_DEVICE(JI,JJ,JK) = ZTMP2_DEVICE(JI,JJ,JK)*ZTMP4_DEVICE(JI,JJ,JK) - END DO !CONCURRENT + !$mnh_end_do() !CONCURRENT !$acc end kernels CALL DZF_DEVICE( ZTMP5_DEVICE, ZTMP3_DEVICE ) !$acc kernels -#ifdef MNH_COMPILER_NVHPC - !$acc loop independent collapse(3) -#endif - DO CONCURRENT ( JI=1:JIU,JJ=1:JJU,JK=1:JKU) + !$mnh_do_concurrent ( JI=1:JIU,JJ=1:JJU,JK=1:JKU) PRVS(JI,JJ,JK) = PRVS(JI,JJ,JK) - ZTMP1_DEVICE(JI,JJ,JK) + ZTMP3_DEVICE(JI,JJ,JK) - END DO !CONCURRENT + !$mnh_end_do() !CONCURRENT !$acc end kernels ELSE !$acc kernels @@ -712,12 +670,9 @@ IF (KSPLT==1) THEN #else IF (.NOT. L2D) THEN !$acc kernels -#ifdef MNH_COMPILER_NVHPC - !$acc loop independent collapse(3) -#endif - DO CONCURRENT ( JI=1:JIU,JJ=1:JJU,JK=1:JKU) + !$mnh_do_concurrent ( JI=1:JIU,JJ=1:JJU,JK=1:JKU) ZTMP1_DEVICE(JI,JJ,JK) = ZFLX(JI,JJ,JK) * (GY_U_UV_PUM(JI,JJ,JK) + GX_V_UV_PVM(JI,JJ,JK)) - END DO !CONCURRENT + !$mnh_end_do() !CONCURRENT !$acc end kernels ELSE !$acc kernels present_cr(ZTMP1_DEVICE) @@ -727,12 +682,9 @@ IF (KSPLT==1) THEN CALL MYF_DEVICE(ZTMP1_DEVICE,ZTMP2_DEVICE) CALL MXF_DEVICE(ZTMP2_DEVICE,ZTMP1_DEVICE) !$acc kernels -#ifdef MNH_COMPILER_NVHPC - !$acc loop independent collapse(3) -#endif - DO CONCURRENT ( JI=1:JIU,JJ=1:JJU,JK=1:JKU) + !$mnh_do_concurrent ( JI=1:JIU,JJ=1:JJU,JK=1:JKU) ZWORK(JI,JJ,JK) = - ZTMP1_DEVICE(JI,JJ,JK) - END DO !CONCURRENT + !$mnh_end_do() !CONCURRENT !$acc end kernels #endif !