Skip to content
Snippets Groups Projects
Commit 20d202a7 authored by ESCOBAR MUNOZ Juan's avatar ESCOBAR MUNOZ Juan
Browse files

Juan 31/03/2022:Rules.LXcray.mk+ZSOLVER/, use acc_nv/present_cr macro defined...

Juan 31/03/2022:Rules.LXcray.mk+ZSOLVER/, use acc_nv/present_cr macro defined in MNH_OPENACC_NV_CR.CPP, to bypass OpenACC compatibilty beetwen Nvidia/Cray compiler
parent f5b3fb81
No related branches found
No related tags found
No related merge requests found
#
# Some Macro to bypass OpenACC incompatiblity beetwen Nvidia & Cray Compiler
#
# Activate $acc directive only for Nvidia Compiler
#ifdef MNH_COMPILER_NVHPC
#define acc_nv acc
#else
#define acc_nv !/!\ NOT ACTIVATE acc
#endif
# Activate present directive only for Cray Compiler
#ifdef MNH_COMPILER_CCE
#define present_cr present
#else
#define present_cr !/!\ NOT ACTIVATE present
#endif
...@@ -138,6 +138,7 @@ CPPFLAGS_SURCOUCHE += -traditional -DDEV_NULL -DMNH_COMPILER_CCE ...@@ -138,6 +138,7 @@ CPPFLAGS_SURCOUCHE += -traditional -DDEV_NULL -DMNH_COMPILER_CCE
CPPFLAGS_RAD = -traditional CPPFLAGS_RAD = -traditional
CPPFLAGS_NEWLFI = -traditional -DSWAPIO -DLINUX -DLFI_INT=${LFI_INT} CPPFLAGS_NEWLFI = -traditional -DSWAPIO -DLINUX -DLFI_INT=${LFI_INT}
CPPFLAGS_MNH = -traditional -DMNH -DSFX_MNH -DMNH_NO_MPI_LOGICAL48 -DMNH_COMPILER_CCE CPPFLAGS_MNH = -traditional -DMNH -DSFX_MNH -DMNH_NO_MPI_LOGICAL48 -DMNH_COMPILER_CCE
CPPFLAGS_MNH += -imacros MNH_OPENACC_NV_CR.CPP
ifdef VER_GA ifdef VER_GA
CPPFLAGS_SURCOUCHE += -DMNH_GA CPPFLAGS_SURCOUCHE += -DMNH_GA
INC += -I${GA_ROOT}/include INC += -I${GA_ROOT}/include
......
...@@ -526,11 +526,7 @@ END IF ...@@ -526,11 +526,7 @@ END IF
!PW: not necessary: data already on device due to contrav_device !$acc update device(ZRUCPPM,ZRVCPPM,ZRWCPPM) !PW: not necessary: data already on device due to contrav_device !$acc update device(ZRUCPPM,ZRVCPPM,ZRWCPPM)
! acc kernels ! acc kernels
IF (.NOT. L1D) THEN IF (.NOT. L1D) THEN
#ifdef MNH_COMPILER_CCE !$acc kernels present_cr(ZCFLU,ZCFLV,ZCFLW)
!$acc kernels present(ZCFLU,ZCFLV,ZCFLW)
#else
!$acc kernels
#endif
ZCFLU(:,:,:) = 0.0 ; ZCFLV(:,:,:) = 0.0 ; ZCFLW(:,:,:) = 0.0 ZCFLU(:,:,:) = 0.0 ; ZCFLV(:,:,:) = 0.0 ; ZCFLW(:,:,:) = 0.0
ZCFLU(IIB:IIE,IJB:IJE,:) = ABS(ZRUCPPM(IIB:IIE,IJB:IJE,:) * PTSTEP) ZCFLU(IIB:IIE,IJB:IJE,:) = ABS(ZRUCPPM(IIB:IIE,IJB:IJE,:) * PTSTEP)
ZCFLV(IIB:IIE,IJB:IJE,:) = ABS(ZRVCPPM(IIB:IIE,IJB:IJE,:) * PTSTEP) ZCFLV(IIB:IIE,IJB:IJE,:) = ABS(ZRVCPPM(IIB:IIE,IJB:IJE,:) * PTSTEP)
...@@ -560,11 +556,7 @@ IF (.NOT. L1D) THEN ...@@ -560,11 +556,7 @@ IF (.NOT. L1D) THEN
ENDIF ENDIF
#ifndef MNH_BITREP #ifndef MNH_BITREP
IF (.NOT. L2D) THEN IF (.NOT. L2D) THEN
#ifdef MNH_COMPILER_CCE !$acc kernels present_cr(ZCFL)
!$acc kernels present(ZCFL)
#else
!$acc kernels
#endif
ZCFL(:,:,:) = SQRT(ZCFLU(:,:,:)**2+ZCFLV(:,:,:)**2+ZCFLW(:,:,:)**2) ZCFL(:,:,:) = SQRT(ZCFLU(:,:,:)**2+ZCFLV(:,:,:)**2+ZCFLW(:,:,:)**2)
!$acc end kernels !$acc end kernels
ELSE ELSE
...@@ -575,18 +567,14 @@ IF (.NOT. L1D) THEN ...@@ -575,18 +567,14 @@ IF (.NOT. L1D) THEN
#else #else
IF (.NOT. L2D) THEN IF (.NOT. L2D) THEN
!$acc kernels !$acc kernels
#ifdef MNH_COMPILER_NVHPC !$acc_nv loop independent collapse(3)
!$acc loop independent collapse(3)
#endif
DO CONCURRENT ( JI=1:JIU,JJ=1:JJU,JK=1:JKU ) DO CONCURRENT ( JI=1:JIU,JJ=1:JJU,JK=1:JKU )
ZCFL(JI,JJ,JK) = SQRT(BR_P2(ZCFLU(JI,JJ,JK))+BR_P2(ZCFLV(JI,JJ,JK))+BR_P2(ZCFLW(JI,JJ,JK))) ZCFL(JI,JJ,JK) = SQRT(BR_P2(ZCFLU(JI,JJ,JK))+BR_P2(ZCFLV(JI,JJ,JK))+BR_P2(ZCFLW(JI,JJ,JK)))
END DO END DO
!$acc end kernels !$acc end kernels
ELSE ELSE
!$acc kernels !$acc kernels
#ifdef MNH_COMPILER_NVHPC !$acc_nv loop independent collapse(3)
!$acc loop independent collapse(3)
#endif
DO CONCURRENT ( JI=1:JIU,JJ=1:JJU,JK=1:JKU ) DO CONCURRENT ( JI=1:JIU,JJ=1:JJU,JK=1:JKU )
ZCFL(JI,JJ,JK) = SQRT(BR_P2(ZCFLU(JI,JJ,JK))+BR_P2(ZCFLW(JI,JJ,JK))) ZCFL(JI,JJ,JK) = SQRT(BR_P2(ZCFLU(JI,JJ,JK))+BR_P2(ZCFLW(JI,JJ,JK)))
END DO END DO
...@@ -600,9 +588,7 @@ ELSE ...@@ -600,9 +588,7 @@ ELSE
#ifndef MNH_BITREP #ifndef MNH_BITREP
ZCFL(:,:,:) = SQRT(ZCFLW(:,:,:)**2) ZCFL(:,:,:) = SQRT(ZCFLW(:,:,:)**2)
#else #else
#ifdef MNH_COMPILER_NVHPC !$acc_nv loop independent collapse(3)
!$acc loop independent collapse(3)
#endif
DO CONCURRENT ( JI=1:JIU,JJ=1:JJU,JK=1:JKU ) DO CONCURRENT ( JI=1:JIU,JJ=1:JJU,JK=1:JKU )
ZCFL(JI,JJ,JK) = SQRT(BR_P2(ZCFLW(JI,JJ,JK))) ZCFL(JI,JJ,JK) = SQRT(BR_P2(ZCFLW(JI,JJ,JK)))
END DO END DO
...@@ -934,18 +920,14 @@ DO JSPL=1,KSPLIT ...@@ -934,18 +920,14 @@ DO JSPL=1,KSPLIT
!$acc end kernels !$acc end kernels
END IF END IF
!$acc kernels !$acc kernels
#ifdef MNH_COMPILER_NVHPC !$acc_nv loop independent collapse(4)
!$acc loop independent collapse(4)
#endif
DO CONCURRENT ( JI=1:JIU,JJ=1:JJU,JK=1:JKU, JR=1:KRR ) DO CONCURRENT ( JI=1:JIU,JJ=1:JJU,JK=1:JKU, JR=1:KRR )
ZR(JI,JJ,JK,JR) = ZR(JI,JJ,JK,JR) + ( ZRRS_PPM(JI,JJ,JK,JR) + ZRRS_OTHER(JI,JJ,JK,JR) + PRRS_CLD(JI,JJ,JK,JR) ) & ZR(JI,JJ,JK,JR) = ZR(JI,JJ,JK,JR) + ( ZRRS_PPM(JI,JJ,JK,JR) + ZRRS_OTHER(JI,JJ,JK,JR) + PRRS_CLD(JI,JJ,JK,JR) ) &
* ZTSTEP_PPM / PRHODJ(JI,JJ,JK) * ZTSTEP_PPM / PRHODJ(JI,JJ,JK)
END DO !CONCURRENT END DO !CONCURRENT
!$acc loop seq !$acc loop seq
DO JSV = 1, KSV DO JSV = 1, KSV
#ifdef MNH_COMPILER_NVHPC !$acc_nv loop independent collapse(3)
!$acc loop independent collapse(3)
#endif
DO CONCURRENT ( JI=1:JIU,JJ=1:JJU,JK=1:JKU) DO CONCURRENT ( JI=1:JIU,JJ=1:JJU,JK=1:JKU)
ZSV(JI,JJ,JK,JSV) = ZSV(JI,JJ,JK,JSV) + ( ZRSVS_PPM(JI,JJ,JK,JSV) + ZRSVS_OTHER(JI,JJ,JK,JSV) + & ZSV(JI,JJ,JK,JSV) = ZSV(JI,JJ,JK,JSV) + ( ZRSVS_PPM(JI,JJ,JK,JSV) + ZRSVS_OTHER(JI,JJ,JK,JSV) + &
PRSVS_CLD(JI,JJ,JK,JSV) ) * ZTSTEP_PPM / PRHODJ(JI,JJ,JK) PRSVS_CLD(JI,JJ,JK,JSV) ) * ZTSTEP_PPM / PRHODJ(JI,JJ,JK)
......
...@@ -708,16 +708,12 @@ IF (KADV_ORDER == 2 ) THEN ...@@ -708,16 +708,12 @@ IF (KADV_ORDER == 2 ) THEN
#endif #endif
!$acc kernels !$acc kernels
! !
#ifdef MNH_COMPILER_NVHPC !$acc_nv loop independent collapse(3)
!$acc loop independent collapse(3)
#endif
do concurrent (ji=iib:iie,jj=1:iju,jk=ikb:ike+1) do concurrent (ji=iib:iie,jj=1:iju,jk=ikb:ike+1)
Z1(ji, jj, jk ) = ( PRUCT(ji, jj, jk ) + PRUCT(ji, jj, jk - 1 ) ) * PDZX (ji, jj, jk ) * 0.25 & Z1(ji, jj, jk ) = ( PRUCT(ji, jj, jk ) + PRUCT(ji, jj, jk - 1 ) ) * PDZX (ji, jj, jk ) * 0.25 &
+ ( PRUCT(ji + 1, jj, jk ) + PRUCT(ji + 1, jj, jk - 1 ) ) * PDZX (ji + 1, jj, jk ) * 0.25 + ( PRUCT(ji + 1, jj, jk ) + PRUCT(ji + 1, jj, jk - 1 ) ) * PDZX (ji + 1, jj, jk ) * 0.25
end do end do
#ifdef MNH_COMPILER_NVHPC !$acc_nv loop independent collapse(3)
!$acc loop independent collapse(3)
#endif
do concurrent (ji=1:iiu,jj=ijb:ije,jk=ikb:ike+1) do concurrent (ji=1:iiu,jj=ijb:ije,jk=ikb:ike+1)
Z2(ji, jj, jk ) = ( PRVCT(ji, jj, jk) + PRVCT( ji, jj, jk - 1) ) * PDZY(ji, jj, jk) * 0.25 & Z2(ji, jj, jk ) = ( PRVCT(ji, jj, jk) + PRVCT( ji, jj, jk - 1) ) * PDZY(ji, jj, jk) * 0.25 &
+ ( PRVCT(ji, jj + 1, jk) + PRVCT( ji, jj + 1,jk - 1) ) * PDZY(ji, jj + 1, jk) * 0.25 + ( PRVCT(ji, jj + 1, jk) + PRVCT( ji, jj + 1,jk - 1) ) * PDZY(ji, jj + 1, jk) * 0.25
...@@ -725,9 +721,7 @@ IF (KADV_ORDER == 2 ) THEN ...@@ -725,9 +721,7 @@ IF (KADV_ORDER == 2 ) THEN
PRWCT(:,:,:)=0. PRWCT(:,:,:)=0.
#ifdef MNH_COMPILER_NVHPC !$acc_nv loop independent collapse(3)
!$acc loop independent collapse(3)
#endif
do concurrent (ji=iib:iie,jj=ijb:ije,jk=ikb:ike+1) do concurrent (ji=iib:iie,jj=ijb:ije,jk=ikb:ike+1)
PRWCT(ji ,jj, jk ) = ( PRWT(ji ,jj, jk ) - Z1(ji ,jj, jk ) - Z2(ji ,jj, jk ) ) / PDZZ(ji ,jj, jk ) PRWCT(ji ,jj, jk ) = ( PRWT(ji ,jj, jk ) - Z1(ji ,jj, jk ) - Z2(ji ,jj, jk ) ) / PDZZ(ji ,jj, jk )
end do end do
...@@ -783,9 +777,7 @@ ELSE IF (KADV_ORDER == 4 ) THEN ...@@ -783,9 +777,7 @@ ELSE IF (KADV_ORDER == 4 ) THEN
!PW: OpenACC remarks: *computing only ztmp2 and reusing it at next iteration works !PW: OpenACC remarks: *computing only ztmp2 and reusing it at next iteration works
! but ji loop can not be collapsed -> 10x slower on GPU ! but ji loop can not be collapsed -> 10x slower on GPU
! *ztmp1 and ztmp2 are not necessary but improve readability (no impact on performance) ! *ztmp1 and ztmp2 are not necessary but improve readability (no impact on performance)
#ifdef MNH_COMPILER_NVHPC !$acc_nv loop independent collapse(3)
!$acc loop independent collapse(3)
#endif
do concurrent(ji=IW:IE,jj=1:iju,jk=IKB:IKE+1) do concurrent(ji=IW:IE,jj=1:iju,jk=IKB:IKE+1)
ztmp1 = ( 9.0 * PDZX(ji, jj, jk ) - ( PDZX(ji+1, jj, jk ) + PDZX(ji, jj, jk ) + PDZX(ji-1, jj, jk ) ) / 3.0 ) / 16.0 ztmp1 = ( 9.0 * PDZX(ji, jj, jk ) - ( PDZX(ji+1, jj, jk ) + PDZX(ji, jj, jk ) + PDZX(ji-1, jj, jk ) ) / 3.0 ) / 16.0
ztmp2 = ( 9.0 * PDZX(ji+1, jj, jk ) - ( PDZX(ji+2, jj, jk ) + PDZX(ji+1, jj, jk ) + PDZX(ji, jj, jk ) ) / 3.0 ) / 16.0 ztmp2 = ( 9.0 * PDZX(ji+1, jj, jk ) - ( PDZX(ji+2, jj, jk ) + PDZX(ji+1, jj, jk ) + PDZX(ji, jj, jk ) ) / 3.0 ) / 16.0
...@@ -795,9 +787,7 @@ ELSE IF (KADV_ORDER == 4 ) THEN ...@@ -795,9 +787,7 @@ ELSE IF (KADV_ORDER == 4 ) THEN
+ ( PRUCT(ji+2, jj, jk ) + PRUCT(ji+2, jj, jk-1 ) ) * PDZX(ji+2, jj, jk) ) / 12.0 + ( PRUCT(ji+2, jj, jk ) + PRUCT(ji+2, jj, jk-1 ) ) * PDZX(ji+2, jj, jk) ) / 12.0
end do end do
! !
#ifdef MNH_COMPILER_NVHPC !$acc_nv loop independent collapse(3)
!$acc loop independent collapse(3)
#endif
do concurrent(ji=1:iiu,jj=is:in,jk=IKB:IKE+1) do concurrent(ji=1:iiu,jj=is:in,jk=IKB:IKE+1)
ztmp1 = ( 9.0 * PDZY(ji, jj, jk ) - ( PDZY(ji, jj+1, jk ) + PDZY(ji, jj, jk ) + PDZY(ji, jj-1, jk ) ) / 3.0 ) / 16.0 ztmp1 = ( 9.0 * PDZY(ji, jj, jk ) - ( PDZY(ji, jj+1, jk ) + PDZY(ji, jj, jk ) + PDZY(ji, jj-1, jk ) ) / 3.0 ) / 16.0
ztmp2 = ( 9.0 * PDZY(ji, jj+1, jk ) - ( PDZY(ji, jj+2, jk ) + PDZY(ji, jj+1, jk ) + PDZY(ji, jj, jk ) ) / 3.0 ) / 16.0 ztmp2 = ( 9.0 * PDZY(ji, jj+1, jk ) - ( PDZY(ji, jj+2, jk ) + PDZY(ji, jj+1, jk ) + PDZY(ji, jj, jk ) ) / 3.0 ) / 16.0
...@@ -814,9 +804,7 @@ ELSE IF (KADV_ORDER == 4 ) THEN ...@@ -814,9 +804,7 @@ ELSE IF (KADV_ORDER == 4 ) THEN
! !
!!$ IF (NHALO==1) THEN !!$ IF (NHALO==1) THEN
!$acc kernels async !$acc kernels async
#ifdef MNH_COMPILER_NVHPC !$acc_nv loop independent collapse(2)
!$acc loop independent collapse(2)
#endif
do concurrent(jj=1:iju,jk=IKB:IKE+1) do concurrent(jj=1:iju,jk=IKB:IKE+1)
ztmp1 = ( 9.0 * PDZX(IIE, jj, jk ) - ( PDZX(IIE+1, jj, jk ) + PDZX(IIE, jj, jk ) + PDZX(IIE-1, jj, jk ) ) / 3.0 ) / 16.0 ztmp1 = ( 9.0 * PDZX(IIE, jj, jk ) - ( PDZX(IIE+1, jj, jk ) + PDZX(IIE, jj, jk ) + PDZX(IIE-1, jj, jk ) ) / 3.0 ) / 16.0
ztmp2 = ( 9.0 * PDZX(IIE+1, jj, jk ) - ( ZDZX_EAST(jj, jk ) + PDZX(IIE+1, jj, jk ) + PDZX(IIE, jj, jk ) ) / 3.0 ) / 16.0 ztmp2 = ( 9.0 * PDZX(IIE+1, jj, jk ) - ( ZDZX_EAST(jj, jk ) + PDZX(IIE+1, jj, jk ) + PDZX(IIE, jj, jk ) ) / 3.0 ) / 16.0
...@@ -828,9 +816,7 @@ ELSE IF (KADV_ORDER == 4 ) THEN ...@@ -828,9 +816,7 @@ ELSE IF (KADV_ORDER == 4 ) THEN
!$acc end kernels !$acc end kernels
! !
!$acc kernels async !$acc kernels async
#ifdef MNH_COMPILER_NVHPC !$acc_nv loop independent collapse(2)
!$acc loop independent collapse(2)
#endif
do concurrent(ji=1:iiu,jk=IKB:IKE+1) do concurrent(ji=1:iiu,jk=IKB:IKE+1)
ztmp1 = ( 9.0 * PDZY(ji, IJE, jk) - ( PDZY (ji, IJE+1, jk) + PDZY(ji, IJE, jk) + PDZY(ji, IJE-1, jk) ) / 3.0 ) / 16.0 ztmp1 = ( 9.0 * PDZY(ji, IJE, jk) - ( PDZY (ji, IJE+1, jk) + PDZY(ji, IJE, jk) + PDZY(ji, IJE-1, jk) ) / 3.0 ) / 16.0
ztmp2 = ( 9.0 * PDZY(ji, IJE+1, jk) - ( ZDZY_NORTH(ji, jk) + PDZY(ji, IJE+1, jk) + PDZY(ji, IJE, jk) ) / 3.0 ) / 16.0 ztmp2 = ( 9.0 * PDZY(ji, IJE+1, jk) - ( ZDZY_NORTH(ji, jk) + PDZY(ji, IJE+1, jk) + PDZY(ji, IJE, jk) ) / 3.0 ) / 16.0
...@@ -885,9 +871,7 @@ ELSE IF (KADV_ORDER == 4 ) THEN ...@@ -885,9 +871,7 @@ ELSE IF (KADV_ORDER == 4 ) THEN
!!$ !!$
!!$ CALL MPPDB_CHECK3DM("contrav_device ::Z1/Z2/ PDZZ",PRECISION,Z1,Z2,PDZZ) !!$ CALL MPPDB_CHECK3DM("contrav_device ::Z1/Z2/ PDZZ",PRECISION,Z1,Z2,PDZZ)
PRWCT(:,:,:)=0. PRWCT(:,:,:)=0.
#ifdef MNH_COMPILER_NVHPC !$acc_nv loop independent collapse(3)
!$acc loop independent collapse(3)
#endif
do concurrent (ji=iib:iie,jj=ijb:ije,jk=ikb:ike+1) do concurrent (ji=iib:iie,jj=ijb:ije,jk=ikb:ike+1)
PRWCT(ji ,jj, jk ) = ( PRWT(ji ,jj, jk ) - Z1(ji ,jj, jk ) - Z2(ji ,jj, jk ) ) / PDZZ(ji ,jj, jk ) PRWCT(ji ,jj, jk ) = ( PRWT(ji ,jj, jk ) - Z1(ji ,jj, jk ) - Z2(ji ,jj, jk ) ) / PDZZ(ji ,jj, jk )
end do end do
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment