diff --git a/src/MNH_OPENACC_NV_CR.CPP b/src/MNH_OPENACC_NV_CR.CPP new file mode 100644 index 0000000000000000000000000000000000000000..5ef2fefc2357abb4f25a945b778162a62dcef414 --- /dev/null +++ b/src/MNH_OPENACC_NV_CR.CPP @@ -0,0 +1,17 @@ +# +# Some Macro to bypass OpenACC incompatiblity beetwen Nvidia & Cray Compiler +# + +# Activate $acc directive only for Nvidia Compiler +#ifdef MNH_COMPILER_NVHPC +#define acc_nv acc +#else +#define acc_nv !/!\ NOT ACTIVATE acc +#endif + +# Activate present directive only for Cray Compiler +#ifdef MNH_COMPILER_CCE +#define present_cr present +#else +#define present_cr !/!\ NOT ACTIVATE present +#endif diff --git a/src/Rules.LXcray.mk b/src/Rules.LXcray.mk index 1a1633ee6e76aaed4aa477030463fbae72a545fe..13e13b9559208f1f47d664a5b123c85ae5dc8f46 100644 --- a/src/Rules.LXcray.mk +++ b/src/Rules.LXcray.mk @@ -138,6 +138,7 @@ CPPFLAGS_SURCOUCHE += -traditional -DDEV_NULL -DMNH_COMPILER_CCE CPPFLAGS_RAD = -traditional CPPFLAGS_NEWLFI = -traditional -DSWAPIO -DLINUX -DLFI_INT=${LFI_INT} CPPFLAGS_MNH = -traditional -DMNH -DSFX_MNH -DMNH_NO_MPI_LOGICAL48 -DMNH_COMPILER_CCE +CPPFLAGS_MNH += -imacros MNH_OPENACC_NV_CR.CPP ifdef VER_GA CPPFLAGS_SURCOUCHE += -DMNH_GA INC += -I${GA_ROOT}/include diff --git a/src/ZSOLVER/advection_metsv.f90 b/src/ZSOLVER/advection_metsv.f90 index 896a0f5d1c1db242225c7ba4b218e0a7c52317ce..466f611a63fce63e741eb4ab6426d610bc40a094 100644 --- a/src/ZSOLVER/advection_metsv.f90 +++ b/src/ZSOLVER/advection_metsv.f90 @@ -526,11 +526,7 @@ END IF !PW: not necessary: data already on device due to contrav_device !$acc update device(ZRUCPPM,ZRVCPPM,ZRWCPPM) ! acc kernels IF (.NOT. L1D) THEN -#ifdef MNH_COMPILER_CCE - !$acc kernels present(ZCFLU,ZCFLV,ZCFLW) -#else - !$acc kernels -#endif + !$acc kernels present_cr(ZCFLU,ZCFLV,ZCFLW) ZCFLU(:,:,:) = 0.0 ; ZCFLV(:,:,:) = 0.0 ; ZCFLW(:,:,:) = 0.0 ZCFLU(IIB:IIE,IJB:IJE,:) = ABS(ZRUCPPM(IIB:IIE,IJB:IJE,:) * PTSTEP) ZCFLV(IIB:IIE,IJB:IJE,:) = ABS(ZRVCPPM(IIB:IIE,IJB:IJE,:) * PTSTEP) @@ -560,11 +556,7 @@ IF (.NOT. L1D) THEN ENDIF #ifndef MNH_BITREP IF (.NOT. L2D) THEN -#ifdef MNH_COMPILER_CCE - !$acc kernels present(ZCFL) -#else - !$acc kernels -#endif + !$acc kernels present_cr(ZCFL) ZCFL(:,:,:) = SQRT(ZCFLU(:,:,:)**2+ZCFLV(:,:,:)**2+ZCFLW(:,:,:)**2) !$acc end kernels ELSE @@ -575,18 +567,14 @@ IF (.NOT. L1D) THEN #else IF (.NOT. L2D) THEN !$acc kernels -#ifdef MNH_COMPILER_NVHPC - !$acc loop independent collapse(3) -#endif + !$acc_nv loop independent collapse(3) DO CONCURRENT ( JI=1:JIU,JJ=1:JJU,JK=1:JKU ) ZCFL(JI,JJ,JK) = SQRT(BR_P2(ZCFLU(JI,JJ,JK))+BR_P2(ZCFLV(JI,JJ,JK))+BR_P2(ZCFLW(JI,JJ,JK))) END DO !$acc end kernels ELSE !$acc kernels -#ifdef MNH_COMPILER_NVHPC - !$acc loop independent collapse(3) -#endif + !$acc_nv loop independent collapse(3) DO CONCURRENT ( JI=1:JIU,JJ=1:JJU,JK=1:JKU ) ZCFL(JI,JJ,JK) = SQRT(BR_P2(ZCFLU(JI,JJ,JK))+BR_P2(ZCFLW(JI,JJ,JK))) END DO @@ -600,9 +588,7 @@ ELSE #ifndef MNH_BITREP ZCFL(:,:,:) = SQRT(ZCFLW(:,:,:)**2) #else -#ifdef MNH_COMPILER_NVHPC - !$acc loop independent collapse(3) -#endif + !$acc_nv loop independent collapse(3) DO CONCURRENT ( JI=1:JIU,JJ=1:JJU,JK=1:JKU ) ZCFL(JI,JJ,JK) = SQRT(BR_P2(ZCFLW(JI,JJ,JK))) END DO @@ -934,18 +920,14 @@ DO JSPL=1,KSPLIT !$acc end kernels END IF !$acc kernels -#ifdef MNH_COMPILER_NVHPC - !$acc loop independent collapse(4) -#endif + !$acc_nv loop independent collapse(4) DO CONCURRENT ( JI=1:JIU,JJ=1:JJU,JK=1:JKU, JR=1:KRR ) ZR(JI,JJ,JK,JR) = ZR(JI,JJ,JK,JR) + ( ZRRS_PPM(JI,JJ,JK,JR) + ZRRS_OTHER(JI,JJ,JK,JR) + PRRS_CLD(JI,JJ,JK,JR) ) & * ZTSTEP_PPM / PRHODJ(JI,JJ,JK) END DO !CONCURRENT !$acc loop seq DO JSV = 1, KSV -#ifdef MNH_COMPILER_NVHPC - !$acc loop independent collapse(3) -#endif + !$acc_nv loop independent collapse(3) DO CONCURRENT ( JI=1:JIU,JJ=1:JJU,JK=1:JKU) ZSV(JI,JJ,JK,JSV) = ZSV(JI,JJ,JK,JSV) + ( ZRSVS_PPM(JI,JJ,JK,JSV) + ZRSVS_OTHER(JI,JJ,JK,JSV) + & PRSVS_CLD(JI,JJ,JK,JSV) ) * ZTSTEP_PPM / PRHODJ(JI,JJ,JK) diff --git a/src/ZSOLVER/contrav.f90 b/src/ZSOLVER/contrav.f90 index 4b7299be16db2f9a9de4fc2af39649b6d5f12082..3979014d1a0f6b3efa78546a89f90aa24ee7a0d3 100644 --- a/src/ZSOLVER/contrav.f90 +++ b/src/ZSOLVER/contrav.f90 @@ -708,16 +708,12 @@ IF (KADV_ORDER == 2 ) THEN #endif !$acc kernels ! -#ifdef MNH_COMPILER_NVHPC -!$acc loop independent collapse(3) -#endif +!$acc_nv loop independent collapse(3) do concurrent (ji=iib:iie,jj=1:iju,jk=ikb:ike+1) Z1(ji, jj, jk ) = ( PRUCT(ji, jj, jk ) + PRUCT(ji, jj, jk - 1 ) ) * PDZX (ji, jj, jk ) * 0.25 & + ( PRUCT(ji + 1, jj, jk ) + PRUCT(ji + 1, jj, jk - 1 ) ) * PDZX (ji + 1, jj, jk ) * 0.25 end do -#ifdef MNH_COMPILER_NVHPC -!$acc loop independent collapse(3) -#endif +!$acc_nv loop independent collapse(3) do concurrent (ji=1:iiu,jj=ijb:ije,jk=ikb:ike+1) Z2(ji, jj, jk ) = ( PRVCT(ji, jj, jk) + PRVCT( ji, jj, jk - 1) ) * PDZY(ji, jj, jk) * 0.25 & + ( PRVCT(ji, jj + 1, jk) + PRVCT( ji, jj + 1,jk - 1) ) * PDZY(ji, jj + 1, jk) * 0.25 @@ -725,9 +721,7 @@ IF (KADV_ORDER == 2 ) THEN PRWCT(:,:,:)=0. -#ifdef MNH_COMPILER_NVHPC -!$acc loop independent collapse(3) -#endif +!$acc_nv loop independent collapse(3) do concurrent (ji=iib:iie,jj=ijb:ije,jk=ikb:ike+1) PRWCT(ji ,jj, jk ) = ( PRWT(ji ,jj, jk ) - Z1(ji ,jj, jk ) - Z2(ji ,jj, jk ) ) / PDZZ(ji ,jj, jk ) end do @@ -783,9 +777,7 @@ ELSE IF (KADV_ORDER == 4 ) THEN !PW: OpenACC remarks: *computing only ztmp2 and reusing it at next iteration works ! but ji loop can not be collapsed -> 10x slower on GPU ! *ztmp1 and ztmp2 are not necessary but improve readability (no impact on performance) -#ifdef MNH_COMPILER_NVHPC -!$acc loop independent collapse(3) -#endif +!$acc_nv loop independent collapse(3) do concurrent(ji=IW:IE,jj=1:iju,jk=IKB:IKE+1) ztmp1 = ( 9.0 * PDZX(ji, jj, jk ) - ( PDZX(ji+1, jj, jk ) + PDZX(ji, jj, jk ) + PDZX(ji-1, jj, jk ) ) / 3.0 ) / 16.0 ztmp2 = ( 9.0 * PDZX(ji+1, jj, jk ) - ( PDZX(ji+2, jj, jk ) + PDZX(ji+1, jj, jk ) + PDZX(ji, jj, jk ) ) / 3.0 ) / 16.0 @@ -795,9 +787,7 @@ ELSE IF (KADV_ORDER == 4 ) THEN + ( PRUCT(ji+2, jj, jk ) + PRUCT(ji+2, jj, jk-1 ) ) * PDZX(ji+2, jj, jk) ) / 12.0 end do ! -#ifdef MNH_COMPILER_NVHPC -!$acc loop independent collapse(3) -#endif +!$acc_nv loop independent collapse(3) do concurrent(ji=1:iiu,jj=is:in,jk=IKB:IKE+1) ztmp1 = ( 9.0 * PDZY(ji, jj, jk ) - ( PDZY(ji, jj+1, jk ) + PDZY(ji, jj, jk ) + PDZY(ji, jj-1, jk ) ) / 3.0 ) / 16.0 ztmp2 = ( 9.0 * PDZY(ji, jj+1, jk ) - ( PDZY(ji, jj+2, jk ) + PDZY(ji, jj+1, jk ) + PDZY(ji, jj, jk ) ) / 3.0 ) / 16.0 @@ -814,9 +804,7 @@ ELSE IF (KADV_ORDER == 4 ) THEN ! !!$ IF (NHALO==1) THEN !$acc kernels async -#ifdef MNH_COMPILER_NVHPC -!$acc loop independent collapse(2) -#endif +!$acc_nv loop independent collapse(2) do concurrent(jj=1:iju,jk=IKB:IKE+1) ztmp1 = ( 9.0 * PDZX(IIE, jj, jk ) - ( PDZX(IIE+1, jj, jk ) + PDZX(IIE, jj, jk ) + PDZX(IIE-1, jj, jk ) ) / 3.0 ) / 16.0 ztmp2 = ( 9.0 * PDZX(IIE+1, jj, jk ) - ( ZDZX_EAST(jj, jk ) + PDZX(IIE+1, jj, jk ) + PDZX(IIE, jj, jk ) ) / 3.0 ) / 16.0 @@ -828,9 +816,7 @@ ELSE IF (KADV_ORDER == 4 ) THEN !$acc end kernels ! !$acc kernels async -#ifdef MNH_COMPILER_NVHPC -!$acc loop independent collapse(2) -#endif +!$acc_nv loop independent collapse(2) do concurrent(ji=1:iiu,jk=IKB:IKE+1) ztmp1 = ( 9.0 * PDZY(ji, IJE, jk) - ( PDZY (ji, IJE+1, jk) + PDZY(ji, IJE, jk) + PDZY(ji, IJE-1, jk) ) / 3.0 ) / 16.0 ztmp2 = ( 9.0 * PDZY(ji, IJE+1, jk) - ( ZDZY_NORTH(ji, jk) + PDZY(ji, IJE+1, jk) + PDZY(ji, IJE, jk) ) / 3.0 ) / 16.0 @@ -885,9 +871,7 @@ ELSE IF (KADV_ORDER == 4 ) THEN !!$ !!$ CALL MPPDB_CHECK3DM("contrav_device ::Z1/Z2/ PDZZ",PRECISION,Z1,Z2,PDZZ) PRWCT(:,:,:)=0. -#ifdef MNH_COMPILER_NVHPC -!$acc loop independent collapse(3) -#endif +!$acc_nv loop independent collapse(3) do concurrent (ji=iib:iie,jj=ijb:ije,jk=ikb:ike+1) PRWCT(ji ,jj, jk ) = ( PRWT(ji ,jj, jk ) - Z1(ji ,jj, jk ) - Z2(ji ,jj, jk ) ) / PDZZ(ji ,jj, jk ) end do