Skip to content
Snippets Groups Projects
Commit 2080de7c authored by ESCOBAR MUNOZ Juan's avatar ESCOBAR MUNOZ Juan
Browse files

Juan 04/11/2022:MNH/tridiag*.f90, CCE Optimization, use "!$acc parallel" to...

Juan 04/11/2022:MNH/tridiag*.f90, CCE Optimization, use "!$acc parallel" to avoid multiple kernel lunch inside seq loop
parent eb08016c
No related branches found
No related tags found
No related merge requests found
...@@ -263,7 +263,7 @@ CALL MZM_DEVICE(PRHODJ,ZMZM_RHODJ) ...@@ -263,7 +263,7 @@ CALL MZM_DEVICE(PRHODJ,ZMZM_RHODJ)
ZRHODJ_DFDDTDZ_O_DZ2(:,:,:) = ZMZM_RHODJ(:,:,:)*PDFDDTDZ(:,:,:)/PDZZ(:,:,:)**2 ZRHODJ_DFDDTDZ_O_DZ2(:,:,:) = ZMZM_RHODJ(:,:,:)*PDFDDTDZ(:,:,:)/PDZZ(:,:,:)**2
#else #else
!$acc_nv loop independent collapse(3) !$acc_nv loop independent collapse(3)
DO CONCURRENT ( JI=1:JIU,JJ=1:JJU,JK=1:JKU) DO CONCURRENT (JK=1:JKU,JJ=1:JJU,JI=1:JIU)
ZRHODJ_DFDDTDZ_O_DZ2(JI,JJ,JK) = ZMZM_RHODJ(JI,JJ,JK)*PDFDDTDZ(JI,JJ,JK)/BR_P2(PDZZ(JI,JJ,JK)) ZRHODJ_DFDDTDZ_O_DZ2(JI,JJ,JK) = ZMZM_RHODJ(JI,JJ,JK)*PDFDDTDZ(JI,JJ,JK)/BR_P2(PDZZ(JI,JJ,JK))
END DO !CONCURRENT END DO !CONCURRENT
#endif #endif
...@@ -285,7 +285,7 @@ ZY=0. ...@@ -285,7 +285,7 @@ ZY=0.
#ifdef MNH_COMPILER_NVHPC #ifdef MNH_COMPILER_NVHPC
!$acc loop independent collapse(2) !$acc loop independent collapse(2)
#endif #endif
DO CONCURRENT ( JI=1:JIU,JJ=1:JJU) DO CONCURRENT (JJ=1:JJU,JI=1:JIU)
ZY(JI,JJ,IKB) = PRHODJ(JI,JJ,IKB)*PVARM(JI,JJ,IKB)/PTSTEP & ZY(JI,JJ,IKB) = PRHODJ(JI,JJ,IKB)*PVARM(JI,JJ,IKB)/PTSTEP &
- ZMZM_RHODJ(JI,JJ,IKB+KKL) * PF(JI,JJ,IKB+KKL)/PDZZ(JI,JJ,IKB+KKL) & - ZMZM_RHODJ(JI,JJ,IKB+KKL) * PF(JI,JJ,IKB+KKL)/PDZZ(JI,JJ,IKB+KKL) &
+ ZMZM_RHODJ(JI,JJ,IKB ) * PF(JI,JJ,IKB )/PDZZ(JI,JJ,IKB ) & + ZMZM_RHODJ(JI,JJ,IKB ) * PF(JI,JJ,IKB )/PDZZ(JI,JJ,IKB ) &
...@@ -298,7 +298,7 @@ END DO !CONCURRENT ...@@ -298,7 +298,7 @@ END DO !CONCURRENT
#ifdef MNH_COMPILER_NVHPC #ifdef MNH_COMPILER_NVHPC
!$acc loop independent collapse(3) !$acc loop independent collapse(3)
#endif #endif
DO CONCURRENT ( JI=1:JIU,JJ=1:JJU,JK=IKTB+1:IKTE-1) DO CONCURRENT (JK=IKTB+1:IKTE-1,JJ=1:JJU,JI=1:JIU)
ZY(JI,JJ,JK) = PRHODJ(JI,JJ,JK)*PVARM(JI,JJ,JK)/PTSTEP & ZY(JI,JJ,JK) = PRHODJ(JI,JJ,JK)*PVARM(JI,JJ,JK)/PTSTEP &
- ZMZM_RHODJ(JI,JJ,JK+KKL) * PF(JI,JJ,JK+KKL)/PDZZ(JI,JJ,JK+KKL) & - ZMZM_RHODJ(JI,JJ,JK+KKL) * PF(JI,JJ,JK+KKL)/PDZZ(JI,JJ,JK+KKL) &
+ ZMZM_RHODJ(JI,JJ,JK ) * PF(JI,JJ,JK )/PDZZ(JI,JJ,JK ) & + ZMZM_RHODJ(JI,JJ,JK ) * PF(JI,JJ,JK )/PDZZ(JI,JJ,JK ) &
...@@ -313,7 +313,7 @@ END DO !CONCURRENT ...@@ -313,7 +313,7 @@ END DO !CONCURRENT
#ifdef MNH_COMPILER_NVHPC #ifdef MNH_COMPILER_NVHPC
!$acc loop independent collapse(2) !$acc loop independent collapse(2)
#endif #endif
DO CONCURRENT ( JI=1:JIU,JJ=1:JJU) DO CONCURRENT (JJ=1:JJU,JI=1:JIU)
ZY(JI,JJ,IKE) = PRHODJ(JI,JJ,IKE)*PVARM(JI,JJ,IKE)/PTSTEP & ZY(JI,JJ,IKE) = PRHODJ(JI,JJ,IKE)*PVARM(JI,JJ,IKE)/PTSTEP &
- ZMZM_RHODJ(JI,JJ,IKE+KKL) * PF(JI,JJ,IKE+KKL)/PDZZ(JI,JJ,IKE+KKL) & - ZMZM_RHODJ(JI,JJ,IKE+KKL) * PF(JI,JJ,IKE+KKL)/PDZZ(JI,JJ,IKE+KKL) &
+ ZMZM_RHODJ(JI,JJ,IKE ) * PF(JI,JJ,IKE )/PDZZ(JI,JJ,IKE ) & + ZMZM_RHODJ(JI,JJ,IKE ) * PF(JI,JJ,IKE )/PDZZ(JI,JJ,IKE ) &
...@@ -336,7 +336,7 @@ IF ( PIMPL > 1.E-10 ) THEN ...@@ -336,7 +336,7 @@ IF ( PIMPL > 1.E-10 ) THEN
#ifdef MNH_COMPILER_NVHPC #ifdef MNH_COMPILER_NVHPC
!$acc loop independent collapse(2) !$acc loop independent collapse(2)
#endif #endif
DO CONCURRENT ( JI=1:JIU,JJ=1:JJU) DO CONCURRENT (JJ=1:JJU,JI=1:JIU)
ZB(JI,JJ,IKB) = PRHODJ(JI,JJ,IKB)/PTSTEP & ZB(JI,JJ,IKB) = PRHODJ(JI,JJ,IKB)/PTSTEP &
- ZRHODJ_DFDDTDZ_O_DZ2(JI,JJ,IKB+KKL) * PIMPL - ZRHODJ_DFDDTDZ_O_DZ2(JI,JJ,IKB+KKL) * PIMPL
END DO !CONCURRENT END DO !CONCURRENT
...@@ -346,7 +346,7 @@ END DO !CONCURRENT ...@@ -346,7 +346,7 @@ END DO !CONCURRENT
#ifdef MNH_COMPILER_NVHPC #ifdef MNH_COMPILER_NVHPC
!$acc loop independent collapse(2) !$acc loop independent collapse(2)
#endif #endif
DO CONCURRENT ( JI=1:JIU,JJ=1:JJU) DO CONCURRENT (JJ=1:JJU,JI=1:JIU)
ZC(JI,JJ,IKB) = ZRHODJ_DFDDTDZ_O_DZ2(JI,JJ,IKB+KKL) * PIMPL ZC(JI,JJ,IKB) = ZRHODJ_DFDDTDZ_O_DZ2(JI,JJ,IKB+KKL) * PIMPL
END DO !CONCURRENT END DO !CONCURRENT
!$acc end kernels !$acc end kernels
...@@ -355,7 +355,7 @@ END DO !CONCURRENT ...@@ -355,7 +355,7 @@ END DO !CONCURRENT
#ifdef MNH_COMPILER_NVHPC #ifdef MNH_COMPILER_NVHPC
!$acc loop independent collapse(3) !$acc loop independent collapse(3)
#endif #endif
DO CONCURRENT ( JI=1:JIU,JJ=1:JJU,JK=IKTB+1:IKTE-1) DO CONCURRENT (JK=IKTB+1:IKTE-1,JJ=1:JJU,JI=1:JIU)
ZA(JI,JJ,JK) = ZRHODJ_DFDDTDZ_O_DZ2(JI,JJ,JK) * PIMPL ZA(JI,JJ,JK) = ZRHODJ_DFDDTDZ_O_DZ2(JI,JJ,JK) * PIMPL
ZB(JI,JJ,JK) = PRHODJ(JI,JJ,JK)/PTSTEP & ZB(JI,JJ,JK) = PRHODJ(JI,JJ,JK)/PTSTEP &
- ZRHODJ_DFDDTDZ_O_DZ2(JI,JJ,JK+KKL) * PIMPL & - ZRHODJ_DFDDTDZ_O_DZ2(JI,JJ,JK+KKL) * PIMPL &
...@@ -368,7 +368,7 @@ END DO !CONCURRENT ...@@ -368,7 +368,7 @@ END DO !CONCURRENT
#ifdef MNH_COMPILER_NVHPC #ifdef MNH_COMPILER_NVHPC
!$acc loop independent collapse(2) !$acc loop independent collapse(2)
#endif #endif
DO CONCURRENT ( JI=1:JIU,JJ=1:JJU) DO CONCURRENT (JJ=1:JJU,JI=1:JIU)
ZA(JI,JJ,IKE) = ZRHODJ_DFDDTDZ_O_DZ2(JI,JJ,IKE ) * PIMPL ZA(JI,JJ,IKE) = ZRHODJ_DFDDTDZ_O_DZ2(JI,JJ,IKE ) * PIMPL
ZB(JI,JJ,IKE) = PRHODJ(JI,JJ,IKE)/PTSTEP & ZB(JI,JJ,IKE) = PRHODJ(JI,JJ,IKE)/PTSTEP &
- ZRHODJ_DFDDTDZ_O_DZ2(JI,JJ,IKE ) * PIMPL - ZRHODJ_DFDDTDZ_O_DZ2(JI,JJ,IKE ) * PIMPL
...@@ -385,16 +385,22 @@ END DO !CONCURRENT ...@@ -385,16 +385,22 @@ END DO !CONCURRENT
#ifdef MNH_COMPILER_NVHPC #ifdef MNH_COMPILER_NVHPC
!$acc loop independent collapse(2) !$acc loop independent collapse(2)
#endif #endif
DO CONCURRENT ( JI=1:JIU,JJ=1:JJU) DO CONCURRENT (JJ=1:JJU,JI=1:JIU)
ZBET(JI,JJ) = ZB(JI,JJ,IKB) ! bet = b(ikb) ZBET(JI,JJ) = ZB(JI,JJ,IKB) ! bet = b(ikb)
PVARP(JI,JJ,IKB) = ZY(JI,JJ,IKB) / ZBET(JI,JJ) PVARP(JI,JJ,IKB) = ZY(JI,JJ,IKB) / ZBET(JI,JJ)
END DO !CONCURRENT END DO !CONCURRENT
!$acc end kernels
! !
!$acc parallel
!$acc loop seq !$acc loop seq
DO JK = IKB+KKL,IKE-KKL,KKL DO JK = IKB+KKL,IKE-KKL,KKL
#ifdef MNH_COMPILER_NVHPC
! gang+vector needed or parallisation vector only ! gang+vector needed or parallisation vector only
!$acc_nv loop independent gang, vector collapse(2) !$acc loop independent gang, vector collapse(2)
DO CONCURRENT ( JI=1:JIU,JJ=1:JJU) #else
!$acc loop independent
#endif
DO CONCURRENT (JJ=1:JJU,JI=1:JIU)
ZGAM(JI,JJ,JK) = ZC(JI,JJ,JK-KKL) / ZBET(JI,JJ) ZGAM(JI,JJ,JK) = ZC(JI,JJ,JK-KKL) / ZBET(JI,JJ)
! gam(k) = c(k-1) / bet ! gam(k) = c(k-1) / bet
ZBET(JI,JJ) = ZB(JI,JJ,JK) - ZA(JI,JJ,JK) * ZGAM(JI,JJ,JK) ZBET(JI,JJ) = ZB(JI,JJ,JK) - ZA(JI,JJ,JK) * ZGAM(JI,JJ,JK)
...@@ -403,11 +409,13 @@ DO JK = IKB+KKL,IKE-KKL,KKL ...@@ -403,11 +409,13 @@ DO JK = IKB+KKL,IKE-KKL,KKL
! res(k) = (y(k) -a(k)*res(k-1))/ bet ! res(k) = (y(k) -a(k)*res(k-1))/ bet
END DO !CONCURRENT END DO !CONCURRENT
END DO END DO
!$acc end parallel
!$acc kernels
! special treatment for the last level ! special treatment for the last level
#ifdef MNH_COMPILER_NVHPC #ifdef MNH_COMPILER_NVHPC
!$acc loop independent collapse(2) !$acc loop independent collapse(2)
#endif #endif
DO CONCURRENT ( JI=1:JIU,JJ=1:JJU) DO CONCURRENT (JJ=1:JJU,JI=1:JIU)
ZGAM(JI,JJ,IKE) = ZC(JI,JJ,IKE-KKL) / ZBET(JI,JJ) ZGAM(JI,JJ,IKE) = ZC(JI,JJ,IKE-KKL) / ZBET(JI,JJ)
! gam(k) = c(k-1) / bet ! gam(k) = c(k-1) / bet
ZBET(JI,JJ) = ZB(JI,JJ,IKE) - ZA(JI,JJ,IKE) * ZGAM(JI,JJ,IKE) ZBET(JI,JJ) = ZB(JI,JJ,IKE) - ZA(JI,JJ,IKE) * ZGAM(JI,JJ,IKE)
...@@ -415,19 +423,25 @@ DO CONCURRENT ( JI=1:JIU,JJ=1:JJU) ...@@ -415,19 +423,25 @@ DO CONCURRENT ( JI=1:JIU,JJ=1:JJU)
PVARP(JI,JJ,IKE)= ( ZY(JI,JJ,IKE) - ZA(JI,JJ,IKE) * PVARP(JI,JJ,IKE-KKL) ) / ZBET(JI,JJ) PVARP(JI,JJ,IKE)= ( ZY(JI,JJ,IKE) - ZA(JI,JJ,IKE) * PVARP(JI,JJ,IKE-KKL) ) / ZBET(JI,JJ)
! res(k) = (y(k) -a(k)*res(k-1))/ bet ! res(k) = (y(k) -a(k)*res(k-1))/ bet
END DO !CONCURRENT END DO !CONCURRENT
!$acc end kernels
! !
!* 3.3 going down !* 3.3 going down
! ---------- ! ----------
! !
!$acc parallel
!$acc loop seq !$acc loop seq
DO JK = IKE-KKL,IKB,-1*KKL DO JK = IKE-KKL,IKB,-1*KKL
#ifdef MNH_COMPILER_NVHPC
! gang+vector needed or parallisation vector only ! gang+vector needed or parallisation vector only
!$acc_nv loop independent gang, vector collapse(2) !$acc loop independent gang, vector collapse(2)
DO CONCURRENT ( JI=1:JIU,JJ=1:JJU) #else
!$acc loop independent
#endif
DO CONCURRENT (JJ=1:JJU,JI=1:JIU)
PVARP(JI,JJ,JK) = PVARP(JI,JJ,JK) - ZGAM(JI,JJ,JK+KKL) * PVARP(JI,JJ,JK+KKL) PVARP(JI,JJ,JK) = PVARP(JI,JJ,JK) - ZGAM(JI,JJ,JK+KKL) * PVARP(JI,JJ,JK+KKL)
END DO !CONCURRENT END DO !CONCURRENT
END DO END DO
!$acc end kernels !$acc end parallel
! !
ELSE ELSE
! !
...@@ -435,7 +449,7 @@ ELSE ...@@ -435,7 +449,7 @@ ELSE
#ifdef MNH_COMPILER_NVHPC #ifdef MNH_COMPILER_NVHPC
!$acc loop independent collapse(3) !$acc loop independent collapse(3)
#endif #endif
DO CONCURRENT ( JI=1:JIU,JJ=1:JJU,JK=IKTB:IKTE) DO CONCURRENT (JK=IKTB:IKTE,JJ=1:JJU,JI=1:JIU)
PVARP(JI,JJ,JK) = ZY(JI,JJ,JK) * PTSTEP / PRHODJ(JI,JJ,JK) PVARP(JI,JJ,JK) = ZY(JI,JJ,JK) * PTSTEP / PRHODJ(JI,JJ,JK)
END DO !CONCURRENT END DO !CONCURRENT
!$acc end kernels !$acc end kernels
...@@ -450,7 +464,7 @@ END IF ...@@ -450,7 +464,7 @@ END IF
#ifdef MNH_COMPILER_NVHPC #ifdef MNH_COMPILER_NVHPC
!$acc loop independent collapse(2) !$acc loop independent collapse(2)
#endif #endif
DO CONCURRENT ( JI=1:JIU,JJ=1:JJU) DO CONCURRENT (JJ=1:JJU,JI=1:JIU)
PVARP(JI,JJ,KKA)=PVARP(JI,JJ,IKB) PVARP(JI,JJ,KKA)=PVARP(JI,JJ,IKB)
PVARP(JI,JJ,KKU)=PVARP(JI,JJ,IKE) PVARP(JI,JJ,KKU)=PVARP(JI,JJ,IKE)
END DO !CONCURRENT END DO !CONCURRENT
......
...@@ -211,28 +211,34 @@ CALL MNH_MEM_GET( zbet, JIU, JJU ) ...@@ -211,28 +211,34 @@ CALL MNH_MEM_GET( zbet, JIU, JJU )
!* 1. COMPUTE THE RIGHT HAND SIDE !* 1. COMPUTE THE RIGHT HAND SIDE
! --------------------------- ! ---------------------------
! !
!$acc kernels
IKT=SIZE(PVARM,3) IKT=SIZE(PVARM,3)
IKTB=1+JPVEXT_TURB IKTB=1+JPVEXT_TURB
IKTE=IKT-JPVEXT_TURB IKTE=IKT-JPVEXT_TURB
IKB=KKA+JPVEXT_TURB*KKL IKB=KKA+JPVEXT_TURB*KKL
IKE=KKU-JPVEXT_TURB*KKL IKE=KKU-JPVEXT_TURB*KKL
!$acc kernels
! !
! !
#ifdef MNH_COMPILER_NVHPC #ifdef MNH_COMPILER_NVHPC
!$acc loop independent collapse(2) !$acc loop independent collapse(2)
#endif #endif
DO CONCURRENT ( JI=1:JIU,JJ=1:JJU) DO CONCURRENT (JJ=1:JJU,JI=1:JIU)
ZY(JI,JJ,IKB) = PVARM(JI,JJ,IKB) + PTSTEP*PSOURCE(JI,JJ,IKB) - & ZY(JI,JJ,IKB) = PVARM(JI,JJ,IKB) + PTSTEP*PSOURCE(JI,JJ,IKB) - &
PEXPL / PRHODJ(JI,JJ,IKB) * PA(JI,JJ,IKB+KKL) * (PVARM(JI,JJ,IKB+KKL) - PVARM(JI,JJ,IKB)) PEXPL / PRHODJ(JI,JJ,IKB) * PA(JI,JJ,IKB+KKL) * (PVARM(JI,JJ,IKB+KKL) - PVARM(JI,JJ,IKB))
END DO !CONCURRENT END DO !CONCURRENT
!$acc end kernels
! !
!$acc parallel
!$acc loop seq
DO JK=IKTB+1,IKTE-1 DO JK=IKTB+1,IKTE-1
#ifdef MNH_COMPILER_NVHPC #ifdef MNH_COMPILER_NVHPC
!$acc loop independent collapse(2) !$acc loop independent collapse(2)
#else
!$acc loop independent
#endif #endif
DO CONCURRENT ( JI=1:JIU,JJ=1:JJU) DO CONCURRENT (JJ=1:JJU,JI=1:JIU)
ZY(JI,JJ,JK)= PVARM(JI,JJ,JK) + PTSTEP*PSOURCE(JI,JJ,JK) - & ZY(JI,JJ,JK)= PVARM(JI,JJ,JK) + PTSTEP*PSOURCE(JI,JJ,JK) - &
PEXPL / PRHODJ(JI,JJ,JK) * & PEXPL / PRHODJ(JI,JJ,JK) * &
( PVARM(JI,JJ,JK-KKL)*PA(JI,JJ,JK) & ( PVARM(JI,JJ,JK-KKL)*PA(JI,JJ,JK) &
...@@ -241,42 +247,46 @@ DO JK=IKTB+1,IKTE-1 ...@@ -241,42 +247,46 @@ DO JK=IKTB+1,IKTE-1
) )
END DO !CONCURRENT END DO !CONCURRENT
END DO END DO
!$acc end parallel
! !
!$acc kernels
#ifdef MNH_COMPILER_NVHPC #ifdef MNH_COMPILER_NVHPC
!$acc loop independent collapse(2) !$acc loop independent collapse(2)
#endif #endif
DO CONCURRENT ( JI=1:JIU,JJ=1:JJU) DO CONCURRENT (JJ=1:JJU,JI=1:JIU)
ZY(JI,JJ,IKE)= PVARM(JI,JJ,IKE) + PTSTEP*PSOURCE(JI,JJ,IKE) + & ZY(JI,JJ,IKE)= PVARM(JI,JJ,IKE) + PTSTEP*PSOURCE(JI,JJ,IKE) + &
PEXPL / PRHODJ(JI,JJ,IKE) * PA(JI,JJ,IKE) * (PVARM(JI,JJ,IKE)-PVARM(JI,JJ,IKE-KKL)) PEXPL / PRHODJ(JI,JJ,IKE) * PA(JI,JJ,IKE) * (PVARM(JI,JJ,IKE)-PVARM(JI,JJ,IKE-KKL))
END DO !CONCURRENT END DO !CONCURRENT
!$acc end kernels
! !
! !
!* 2. INVERSION OF THE TRIDIAGONAL SYSTEM !* 2. INVERSION OF THE TRIDIAGONAL SYSTEM
! ----------------------------------- ! -----------------------------------
! !
IF ( PIMPL > 1.E-10 ) THEN IF ( PIMPL > 1.E-10 ) THEN
! !$acc kernels
! !
! going up ! going up
! !
#ifdef MNH_COMPILER_NVHPC #ifdef MNH_COMPILER_NVHPC
!$acc loop independent collapse(2) !$acc loop independent collapse(2)
#endif #endif
DO CONCURRENT ( JI=1:JIU,JJ=1:JJU) DO CONCURRENT (JJ=1:JJU,JI=1:JIU)
ZBET(JI,JJ) = 1. + PIMPL * (PDIAG(JI,JJ,IKB)-PA(JI,JJ,IKB+KKL) / PRHODJ(JI,JJ,IKB)) ZBET(JI,JJ) = 1. + PIMPL * (PDIAG(JI,JJ,IKB)-PA(JI,JJ,IKB+KKL) / PRHODJ(JI,JJ,IKB))
! bet = b(ikb) ! bet = b(ikb)
PVARP(JI,JJ,IKB) = ZY(JI,JJ,IKB) / ZBET(JI,JJ) PVARP(JI,JJ,IKB) = ZY(JI,JJ,IKB) / ZBET(JI,JJ)
END DO !CONCURRENT END DO !CONCURRENT
! !
!$acc end kernels
!$acc parallel
!$acc loop seq !$acc loop seq
DO JK = IKB+KKL,IKE-KKL,KKL DO JK = IKB+KKL,IKE-KKL,KKL
#ifdef MNH_COMPILER_NVHPC #ifdef MNH_COMPILER_NVHPC
!$acc loop gang, vector collapse(2) independent !$acc loop gang, vector collapse(2) independent
#else
!$acc loop independent
#endif #endif
!dir$ concurrent ! collapse(JJ,JI) DO CONCURRENT (JJ=1:JJU,JI=1:JIU)
DO JJ=1,JJU
!dir$ concurrent
DO JI=1,JIU
ZGAM(JI,JJ,JK) = PIMPL * PA(JI,JJ,JK) / PRHODJ(JI,JJ,JK-KKL) / ZBET(JI,JJ) ZGAM(JI,JJ,JK) = PIMPL * PA(JI,JJ,JK) / PRHODJ(JI,JJ,JK-KKL) / ZBET(JI,JJ)
! gam(k) = c(k-1) / bet ! gam(k) = c(k-1) / bet
ZBET(JI,JJ) = 1. + PIMPL * ( PDIAG(JI,JJ,JK) - & ZBET(JI,JJ) = 1. + PIMPL * ( PDIAG(JI,JJ,JK) - &
...@@ -288,14 +298,15 @@ IF ( PIMPL > 1.E-10 ) THEN ...@@ -288,14 +298,15 @@ IF ( PIMPL > 1.E-10 ) THEN
* PVARP(JI,JJ,JK-KKL) & * PVARP(JI,JJ,JK-KKL) &
) / ZBET(JI,JJ) ) / ZBET(JI,JJ)
! res(k) = (y(k) -a(k)*res(k-1))/ bet ! res(k) = (y(k) -a(k)*res(k-1))/ bet
END DO END DO
END DO
END DO END DO
!$acc end parallel
!$acc kernels
! special treatment for the last level ! special treatment for the last level
#ifdef MNH_COMPILER_NVHPC #ifdef MNH_COMPILER_NVHPC
!$acc loop independent collapse(2) !$acc loop independent collapse(2)
#endif #endif
DO CONCURRENT ( JI=1:JIU,JJ=1:JJU) DO CONCURRENT (JJ=1:JJU,JI=1:JIU)
ZGAM(JI,JJ,IKE) = PIMPL * PA(JI,JJ,IKE) / PRHODJ(JI,JJ,IKE-KKL) / ZBET(JI,JJ) ZGAM(JI,JJ,IKE) = PIMPL * PA(JI,JJ,IKE) / PRHODJ(JI,JJ,IKE-KKL) / ZBET(JI,JJ)
! gam(k) = c(k-1) / bet ! gam(k) = c(k-1) / bet
ZBET(JI,JJ) = 1. + PIMPL * ( PDIAG(JI,JJ,IKE) - & ZBET(JI,JJ) = 1. + PIMPL * ( PDIAG(JI,JJ,IKE) - &
...@@ -307,38 +318,46 @@ IF ( PIMPL > 1.E-10 ) THEN ...@@ -307,38 +318,46 @@ IF ( PIMPL > 1.E-10 ) THEN
) / ZBET(JI,JJ) ) / ZBET(JI,JJ)
! res(k) = (y(k) -a(k)*res(k-1))/ bet ! res(k) = (y(k) -a(k)*res(k-1))/ bet
END DO !CONCURRENT END DO !CONCURRENT
!$acc end kernels
! !
! going down ! going down
! !
!$acc parallel
!$acc loop seq !$acc loop seq
DO JK = IKE-KKL,IKB,-1*KKL DO JK = IKE-KKL,IKB,-1*KKL
#ifdef MNH_COMPILER_NVHPC #ifdef MNH_COMPILER_NVHPC
!$acc loop gang, vector collapse(2) !$acc loop gang, vector collapse(2)
#else
!$acc loop independent
#endif #endif
DO CONCURRENT ( JI=1:JIU,JJ=1:JJU) DO CONCURRENT (JJ=1:JJU,JI=1:JIU)
PVARP(JI,JJ,JK) = PVARP(JI,JJ,JK) - ZGAM(JI,JJ,JK+KKL) * PVARP(JI,JJ,JK+KKL) PVARP(JI,JJ,JK) = PVARP(JI,JJ,JK) - ZGAM(JI,JJ,JK+KKL) * PVARP(JI,JJ,JK+KKL)
END DO !CONCURRENT END DO !CONCURRENT
END DO END DO
! !$acc end parallel
!
ELSE ELSE
! !
!$acc kernels
#ifdef MNH_COMPILER_NVHPC #ifdef MNH_COMPILER_NVHPC
!$acc loop independent collapse(2) !$acc loop independent collapse(2)
#endif #endif
DO CONCURRENT ( JI=1:JIU,JJ=1:JJU) DO CONCURRENT (JJ=1:JJU,JI=1:JIU)
PVARP(JI,JJ,IKTB:IKTE) = ZY(JI,JJ,IKTB:IKTE) PVARP(JI,JJ,IKTB:IKTE) = ZY(JI,JJ,IKTB:IKTE)
END DO !CONCURRENT END DO !CONCURRENT
! !
END IF !$acc end kernels
END IF
! !
! !
!* 3. FILL THE UPPER AND LOWER EXTERNAL VALUES !* 3. FILL THE UPPER AND LOWER EXTERNAL VALUES
! ---------------------------------------- ! ----------------------------------------
! !
!$acc kernels
#ifdef MNH_COMPILER_NVHPC #ifdef MNH_COMPILER_NVHPC
!$acc loop independent collapse(2) !$acc loop independent collapse(2)
#endif #endif
DO CONCURRENT ( JI=1:JIU,JJ=1:JJU) DO CONCURRENT (JJ=1:JJU,JI=1:JIU)
PVARP(JI,JJ,KKA)=PVARP(JI,JJ,IKB) PVARP(JI,JJ,KKA)=PVARP(JI,JJ,IKB)
PVARP(JI,JJ,KKU)=PVARP(JI,JJ,IKE) PVARP(JI,JJ,KKU)=PVARP(JI,JJ,IKE)
END DO !CONCURRENT END DO !CONCURRENT
......
...@@ -160,7 +160,7 @@ USE MODI_SHUMAN_DEVICE ...@@ -160,7 +160,7 @@ USE MODI_SHUMAN_DEVICE
#if defined(MNH_BITREP) || defined(MNH_BITREP_OMP) #if defined(MNH_BITREP) || defined(MNH_BITREP_OMP)
USE MODI_BITREP USE MODI_BITREP
#endif #endif
#ifdef MNH_BITREP_OMP #ifdef MNH_COMPILER_CCE
!$mnh_undef(LOOP) !$mnh_undef(LOOP)
!$mnh_undef(OPENACC) !$mnh_undef(OPENACC)
#endif #endif
...@@ -380,9 +380,14 @@ ZY=0. ...@@ -380,9 +380,14 @@ ZY=0.
ZBET(JI,JJ) = ZB(JI,JJ,IKB) ! bet = b(ikb) ZBET(JI,JJ) = ZB(JI,JJ,IKB) ! bet = b(ikb)
PVARP(JI,JJ,IKB) = ZY(JI,JJ,IKB) / ZBET(JI,JJ) PVARP(JI,JJ,IKB) = ZY(JI,JJ,IKB) / ZBET(JI,JJ)
!$mnh_end_do() !$mnh_end_do()
!$acc end kernels
! !
!$acc parallel
!$acc loop seq !$acc loop seq
DO JK = IKB+1,IKE-1 DO JK = IKB+1,IKE-1
#ifdef MNH_COMPILER_CCE
!$acc loop independent
#endif
!$mnh_do_concurrent(JI=1:JIU,JJ=1:JJU) !$mnh_do_concurrent(JI=1:JIU,JJ=1:JJU)
ZGAM(JI,JJ,JK) = ZC(JI,JJ,JK-1) / ZBET(JI,JJ) ZGAM(JI,JJ,JK) = ZC(JI,JJ,JK-1) / ZBET(JI,JJ)
! gam(k) = c(k-1) / bet ! gam(k) = c(k-1) / bet
...@@ -392,7 +397,9 @@ DO JK = IKB+1,IKE-1 ...@@ -392,7 +397,9 @@ DO JK = IKB+1,IKE-1
! res(k) = (y(k) -a(k)*res(k-1))/ bet ! res(k) = (y(k) -a(k)*res(k-1))/ bet
!$mnh_end_do() !$mnh_end_do()
END DO END DO
!$acc end parallel
! special treatment for the last level ! special treatment for the last level
!$acc kernels
!$mnh_do_concurrent(JI=1:JIU,JJ=1:JJU) !$mnh_do_concurrent(JI=1:JIU,JJ=1:JJU)
ZGAM(JI,JJ,IKE) = ZC(JI,JJ,IKE-1) / ZBET(JI,JJ) ZGAM(JI,JJ,IKE) = ZC(JI,JJ,IKE-1) / ZBET(JI,JJ)
! gam(k) = c(k-1) / bet ! gam(k) = c(k-1) / bet
...@@ -401,21 +408,28 @@ END DO ...@@ -401,21 +408,28 @@ END DO
PVARP(JI,JJ,IKE)= ( ZY(JI,JJ,IKE) - ZA(JI,JJ,IKE) * PVARP(JI,JJ,IKE-1) ) / ZBET(JI,JJ) PVARP(JI,JJ,IKE)= ( ZY(JI,JJ,IKE) - ZA(JI,JJ,IKE) * PVARP(JI,JJ,IKE-1) ) / ZBET(JI,JJ)
! res(k) = (y(k) -a(k)*res(k-1))/ bet ! res(k) = (y(k) -a(k)*res(k-1))/ bet
!$mnh_end_do() !$mnh_end_do()
!$acc end kernels
! !
!* 3.3 going down !* 3.3 going down
! ---------- ! ----------
! !
!$acc parallel
!$acc loop seq !$acc loop seq
DO JK = IKE-1,IKB,-1 DO JK = IKE-1,IKB,-1
#ifdef MNH_COMPILER_CCE
!$acc loop independent
#endif
!$mnh_do_concurrent(JI=1:JIU,JJ=1:JJU) !$mnh_do_concurrent(JI=1:JIU,JJ=1:JJU)
PVARP(JI,JJ,JK) = PVARP(JI,JJ,JK) - ZGAM(JI,JJ,JK+1) * PVARP(JI,JJ,JK+1) PVARP(JI,JJ,JK) = PVARP(JI,JJ,JK) - ZGAM(JI,JJ,JK+1) * PVARP(JI,JJ,JK+1)
!$mnh_end_do() !$mnh_end_do()
END DO END DO
!$acc end parallel
! !
! !
!* 4. FILL THE UPPER AND LOWER EXTERNAL VALUES !* 4. FILL THE UPPER AND LOWER EXTERNAL VALUES
! ---------------------------------------- ! ----------------------------------------
! !
!$acc kernels
!$mnh_do_concurrent(JI=1:JIU,JJ=1:JJU) !$mnh_do_concurrent(JI=1:JIU,JJ=1:JJU)
PVARP(JI,JJ,IKB-1)=PVARP(JI,JJ,IKB) PVARP(JI,JJ,IKB-1)=PVARP(JI,JJ,IKB)
PVARP(JI,JJ,IKE+1)=0. PVARP(JI,JJ,IKE+1)=0.
......
...@@ -227,14 +227,14 @@ IKE=KKU-JPVEXT_TURB*KKL ...@@ -227,14 +227,14 @@ IKE=KKU-JPVEXT_TURB*KKL
! !
! !
!$acc kernels ! async !$acc kernels ! async
DO CONCURRENT ( JI=1:JIU,JJ=1:JJU) DO CONCURRENT (JJ=1:JJU,JI=1:JIU)
ZY(JI,JJ,IKB) = PVARM(JI,JJ,IKB) + PTSTEP*PSOURCE(JI,JJ,IKB) - & ZY(JI,JJ,IKB) = PVARM(JI,JJ,IKB) + PTSTEP*PSOURCE(JI,JJ,IKB) - &
PEXPL / PRHODJA(JI,JJ,IKB) * PA(JI,JJ,IKB+KKL) * (PVARM(JI,JJ,IKB+KKL) - PVARM(JI,JJ,IKB)) PEXPL / PRHODJA(JI,JJ,IKB) * PA(JI,JJ,IKB+KKL) * (PVARM(JI,JJ,IKB+KKL) - PVARM(JI,JJ,IKB))
END DO !CONCURRENT END DO !CONCURRENT
!$acc end kernels !$acc end kernels
! !
!$acc kernels ! async !$acc kernels ! async
DO CONCURRENT ( JI=1:JIU,JJ=1:JJU,JK=IKTB+1:IKTE-1) DO CONCURRENT (JK=IKTB+1:IKTE-1,JJ=1:JJU,JI=1:JIU)
ZY(JI,JJ,JK)= PVARM(JI,JJ,JK) + PTSTEP*PSOURCE(JI,JJ,JK) - & ZY(JI,JJ,JK)= PVARM(JI,JJ,JK) + PTSTEP*PSOURCE(JI,JJ,JK) - &
PEXPL / PRHODJA(JI,JJ,JK) * & PEXPL / PRHODJA(JI,JJ,JK) * &
( PVARM(JI,JJ,JK-KKL)*PA(JI,JJ,JK) & ( PVARM(JI,JJ,JK-KKL)*PA(JI,JJ,JK) &
...@@ -245,7 +245,7 @@ END DO !CONCURRENT ...@@ -245,7 +245,7 @@ END DO !CONCURRENT
!$acc end kernels !$acc end kernels
! !
!$acc kernels ! async !$acc kernels ! async
DO CONCURRENT ( JI=1:JIU,JJ=1:JJU) DO CONCURRENT (JJ=1:JJU,JI=1:JIU)
ZY(JI,JJ,IKE)= PVARM(JI,JJ,IKE) + PTSTEP*PSOURCE(JI,JJ,IKE) + & ZY(JI,JJ,IKE)= PVARM(JI,JJ,IKE) + PTSTEP*PSOURCE(JI,JJ,IKE) + &
PEXPL / PRHODJA(JI,JJ,IKE) * PA(JI,JJ,IKE) * (PVARM(JI,JJ,IKE)-PVARM(JI,JJ,IKE-KKL)) PEXPL / PRHODJA(JI,JJ,IKE) * PA(JI,JJ,IKE) * (PVARM(JI,JJ,IKE)-PVARM(JI,JJ,IKE-KKL))
END DO !CONCURRENT END DO !CONCURRENT
...@@ -262,20 +262,24 @@ IF ( PIMPL > 1.E-10 ) THEN ...@@ -262,20 +262,24 @@ IF ( PIMPL > 1.E-10 ) THEN
! !
! going up ! going up
! !
!$acc kernels !$acc kernels
#ifdef MNH_COMPILER_NVHPC #ifdef MNH_COMPILER_NVHPC
!$acc loop independent collapse(2) !$acc loop independent collapse(2)
#endif #endif
DO CONCURRENT ( JI=1:JIU,JJ=1:JJU) DO CONCURRENT (JJ=1:JJU,JI=1:JIU)
ZBET(JI,JJ) = 1. - PIMPL * ( PA(JI,JJ,IKB+KKL) / PRHODJA(JI,JJ,IKB) & ZBET(JI,JJ) = 1. - PIMPL * ( PA(JI,JJ,IKB+KKL) / PRHODJA(JI,JJ,IKB) &
+ PCOEFS(JI,JJ) * PTSTEP ) ! bet = b(ikb) + PCOEFS(JI,JJ) * PTSTEP ) ! bet = b(ikb)
PVARP(JI,JJ,IKB) = ZY(JI,JJ,IKB) / ZBET(JI,JJ) PVARP(JI,JJ,IKB) = ZY(JI,JJ,IKB) / ZBET(JI,JJ)
END DO !CONCURRENT END DO !CONCURRENT
!$acc end kernels
! !
!$acc parallel
!$acc loop seq !$acc loop seq
DO JK = IKB+KKL,IKE-KKL,KKL DO JK = IKB+KKL,IKE-KKL,KKL
#ifdef MNH_COMPILER_NVHPC #ifdef MNH_COMPILER_NVHPC
!$acc loop independent gang, vector collapse(2) !$acc loop independent gang, vector collapse(2)
#else
!$acc loop independent
#endif #endif
DO CONCURRENT ( JJ=1:JJU , JI=1:JIU ) DO CONCURRENT ( JJ=1:JJU , JI=1:JIU )
ZGAM(JI,JJ,JK) = PIMPL * PA(JI,JJ,JK) / PRHODJA(JI,JJ,JK-KKL) / ZBET(JI,JJ) ZGAM(JI,JJ,JK) = PIMPL * PA(JI,JJ,JK) / PRHODJA(JI,JJ,JK-KKL) / ZBET(JI,JJ)
...@@ -290,11 +294,13 @@ IF ( PIMPL > 1.E-10 ) THEN ...@@ -290,11 +294,13 @@ IF ( PIMPL > 1.E-10 ) THEN
! res(k) = (y(k) -a(k)*res(k-1))/ bet ! res(k) = (y(k) -a(k)*res(k-1))/ bet
END DO ! CONCURRENT END DO ! CONCURRENT
END DO END DO
!$acc end parallel
!$acc kernels
! special treatment for the last level ! special treatment for the last level
#ifdef MNH_COMPILER_NVHPC #ifdef MNH_COMPILER_NVHPC
!$acc loop independent gang, vector collapse(2) !$acc loop independent gang, vector collapse(2)
#endif #endif
DO CONCURRENT ( JI=1:JIU,JJ=1:JJU) DO CONCURRENT (JJ=1:JJU,JI=1:JIU)
ZGAM(JI,JJ,IKE) = PIMPL * PA(JI,JJ,IKE) / PRHODJA(JI,JJ,IKE-KKL) / ZBET(JI,JJ) ZGAM(JI,JJ,IKE) = PIMPL * PA(JI,JJ,IKE) / PRHODJA(JI,JJ,IKE-KKL) / ZBET(JI,JJ)
! gam(k) = c(k-1) / bet ! gam(k) = c(k-1) / bet
ZBET(JI,JJ) = 1. - PIMPL * ( PA(JI,JJ,IKE) * (1. + ZGAM(JI,JJ,IKE)) & ZBET(JI,JJ) = 1. - PIMPL * ( PA(JI,JJ,IKE) * (1. + ZGAM(JI,JJ,IKE)) &
...@@ -305,24 +311,28 @@ IF ( PIMPL > 1.E-10 ) THEN ...@@ -305,24 +311,28 @@ IF ( PIMPL > 1.E-10 ) THEN
) / ZBET(JI,JJ) ) / ZBET(JI,JJ)
! res(k) = (y(k) -a(k)*res(k-1))/ bet ! res(k) = (y(k) -a(k)*res(k-1))/ bet
END DO !CONCURRENT END DO !CONCURRENT
!$acc end kernels
! !
! going down ! going down
! !
!$acc parallel
!$acc loop seq !$acc loop seq
DO JK = IKE-KKL,IKB,-1*KKL DO JK = IKE-KKL,IKB,-1*KKL
#ifdef MNH_COMPILER_NVHPC #ifdef MNH_COMPILER_NVHPC
!$acc loop gang, vector collapse(2) !$acc loop gang, vector collapse(2)
#else
!$acc loop independent
#endif #endif
DO CONCURRENT ( JI=1:JIU,JJ=1:JJU) DO CONCURRENT (JJ=1:JJU,JI=1:JIU)
PVARP(JI,JJ,JK) = PVARP(JI,JJ,JK) - ZGAM(JI,JJ,JK+KKL) * PVARP(JI,JJ,JK+KKL) PVARP(JI,JJ,JK) = PVARP(JI,JJ,JK) - ZGAM(JI,JJ,JK+KKL) * PVARP(JI,JJ,JK+KKL)
END DO !CONCURRENT END DO !CONCURRENT
END DO END DO
!$acc end kernels !$acc end parallel
! !
ELSE ELSE
! !
!$acc kernels !$acc kernels
DO CONCURRENT ( JI=1:JIU,JJ=1:JJU,JK=IKTB:IKTE) DO CONCURRENT (JK=IKTB:IKTE,JJ=1:JJU,JI=1:JIU)
PVARP(JI,JJ,JK) = ZY(JI,JJ,JK) PVARP(JI,JJ,JK) = ZY(JI,JJ,JK)
END DO !CONCURRENT END DO !CONCURRENT
!$acc end kernels !$acc end kernels
...@@ -334,7 +344,7 @@ END IF ...@@ -334,7 +344,7 @@ END IF
! ---------------------------------------- ! ----------------------------------------
! !
!$acc kernels !$acc kernels
DO CONCURRENT ( JI=1:JIU,JJ=1:JJU) DO CONCURRENT (JJ=1:JJU,JI=1:JIU)
PVARP(JI,JJ,KKA)=PVARP(JI,JJ,IKB) PVARP(JI,JJ,KKA)=PVARP(JI,JJ,IKB)
PVARP(JI,JJ,KKU)=PVARP(JI,JJ,IKE) PVARP(JI,JJ,KKU)=PVARP(JI,JJ,IKE)
END DO !CONCURRENT END DO !CONCURRENT
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment