diff --git a/src/ZSOLVER/turb.f90 b/src/ZSOLVER/turb.f90 index 739fc275276a26b67c211ecf60a75f1365458a99..b43950cf30b810cf4970fb83886cdc71256f95d4 100644 --- a/src/ZSOLVER/turb.f90 +++ b/src/ZSOLVER/turb.f90 @@ -11,8 +11,7 @@ module mode_turb use mode_msg USE MODE_MNH_ZWORK, ONLY: MNH_MEM_GET, MNH_MEM_POSITION_PIN, MNH_MEM_RELEASE #endif - -#ifdef MNH_BITREP +#if defined(MNH_BITREP) || defined(MNH_BITREP_OMP) use modi_bitrep #endif @@ -469,6 +468,7 @@ LOGICAL :: GOCEAN !Intermediate variable used to work around a Cray compiler bug ! REAL, DIMENSION(:,:,:), pointer , contiguous :: ZTEMP_BUD ! +LOGICAL :: GTURBLEN_BL89_TURBLEN_RM17_TURBLEN_ADAP_ORMC01 !------------------------------------------------------------------------------------------ ! ! IN variables @@ -706,13 +706,17 @@ ZRVORD= XRV / XRD ! GOCEAN = LOCEAN ! +GTURBLEN_BL89_TURBLEN_RM17_TURBLEN_ADAP_ORMC01 = & + HTURBLEN=='BL89' .OR. HTURBLEN=='RM17' .OR. HTURBLEN == 'ADAP' .OR. ORMC01 ! !$acc update device(PTHLT,PRT) -!$acc kernels present_cr(ZCOEF_DISS) +!$acc kernels present_cr(ZCOEF_DISS,ZTHLM,ZRM) !Copy data into ZTHLM and ZRM only if needed -IF (HTURBLEN=='BL89' .OR. HTURBLEN=='RM17' .OR. HTURBLEN == 'ADAP' .OR. ORMC01) THEN - ZTHLM(:,:,:) = PTHLT(:,:,:) - ZRM(:,:,:,:) = PRT(:,:,:,:) +IF (GTURBLEN_BL89_TURBLEN_RM17_TURBLEN_ADAP_ORMC01) THEN + DO CONCURRENT(JI=1:JIU,JJ=1:JJU,JK=1:JKU) + ZTHLM(JI,JJ,JK) = PTHLT(JI,JJ,JK) + ZRM(JI,JJ,JK,:) = PRT(JI,JJ,JK,:) + END DO END IF ! ZTRH(:, :, : ) = XUNDEF @@ -746,10 +750,12 @@ ELSE !PW: "BUG" PGI : results different on CPU and GPU due to the power function !See http://www.pgroup.com/userforum/viewtopic.php?t=5364&sid=ec7b762b17fb9bb3332a47f0db57af55 !Use of own functions allows bit-reproducible results -#ifndef MNH_BITREP +#if !defined(MNH_BITREP) && !defined(MNH_BITREP_OMP) ZEXN(:,:,:) = (PPABST(:,:,:)/XP00) ** (XRD/XCPD) #else - ZEXN(:,:,:) = BR_POW(PPABST(:,:,:)/XP00,XRD/XCPD) +DO CONCURRENT(JI=1:JIU,JJ=1:JJU,JK=1:JKU) + ZEXN(JI,JJ,JK) = BR_POW(PPABST(JI,JJ,JK)/XP00,XRD/XCPD) +END DO #endif END IF ! @@ -1003,7 +1009,7 @@ IF (ORMC01) THEN #ifdef MNH_OPENACC call Print_msg( NVERB_FATAL, 'GEN', 'TURB', 'OpenACC: ORMC01 not yet implemented' ) #endif -#ifndef MNH_BITREP +#if !defined(MNH_BITREP) && !defined(MNH_BITREP_OMP) ZUSTAR(:,:) = (PSFU(:,:)**2+PSFV(:,:)**2)**(0.25) #else ZUSTAR(:,:) = BR_POW( BR_P2( PSFU(:,:) ) + BR_P2( PSFV(:,:) ), 0.25 ) @@ -1063,17 +1069,15 @@ ENDIF ! !* 4.2 compute the proportionality coefficient between wind and stress ! -!$acc kernels present_cr(ZCDUEFF,ZTAU22M,ZTAU33M) -#ifndef MNH_BITREP +!$acc kernels ! present_cr(ZCDUEFF,ZUSLOPE,ZVSLOPE,PSFU,PSFV) +#if !defined(MNH_BITREP) && !defined(MNH_BITREP_OMP) ZCDUEFF(:,:) =-SQRT ( (PSFU(:,:)**2 + PSFV(:,:)**2) / & (XMNH_TINY + ZUSLOPE(:,:)**2 + ZVSLOPE(:,:)**2 ) ) #else -#ifdef MNH_COMPILER_NVHPC - !$acc loop independent collapse(2) -#endif + !$acc_nv loop independent collapse(2) DO CONCURRENT ( JI=1:JIU,JJ=1:JJU ) ZCDUEFF(JI,JJ) =-SQRT ( (BR_P2(PSFU(JI,JJ)) + BR_P2(PSFV(JI,JJ))) / & - (XMNH_TINY + BR_P2(ZUSLOPE(JI,JJ)) + BR_P2(ZVSLOPE(JI,JJ)) ) ) + (XMNH_TINY + BR_P2(ZUSLOPE(JI,JJ)) + BR_P2(ZVSLOPE(JI,JJ)) ) ) END DO #endif !$acc end kernels @@ -1855,7 +1859,7 @@ CALL MNH_MEM_GET( zdrvsatdt, size( pexn, 1 ), size( pexn, 2 ), size( pexn, 3 ) ) ! !* 1.1 Lv/Cph at t ! -!$acc kernels present(PLOCPEXN,ZDRVSATDT) +!$acc kernels ! present(ZRVSAT,ZDRVSATDT) ! present(PLOCPEXN) ! present ZDRVSATDT) PLOCPEXN(:,:,:) = ( PLTT + (XCPV-PC) * (PT(:,:,:)-XTT) ) / PCP(:,:,:) ! !* 1.2 Saturation vapor pressure at t @@ -1863,13 +1867,15 @@ CALL MNH_MEM_GET( zdrvsatdt, size( pexn, 1 ), size( pexn, 2 ), size( pexn, 3 ) ) !PW: "BUG" PGI : results different on CPU and GPU due to the EXP and LOG functions !See http://www.pgroup.com/userforum/viewtopic.php?t=5364&sid=ec7b762b17fb9bb3332a47f0db57af55 !Use of own functions allows bit-reproducible results -#ifndef MNH_BITREP +#if !defined(MNH_BITREP) && !defined(MNH_BITREP_OMP) ZRVSAT(:,:,:) = EXP( PALP - PBETA/PT(:,:,:) - PGAM*ALOG( PT(:,:,:) ) ) #else DO CONCURRENT ( JI=1:JIU,JJ=1:JJU,JK=1:JKU) ZRVSAT(JI,JJ,JK) = BR_EXP( PALP - PBETA/PT(JI,JJ,JK) - PGAM*BR_LOG( PT(JI,JJ,JK) ) ) END DO #endif +!$acc end kernels +!$acc kernels present(ZRVSAT,ZDRVSATDT) ! !* 1.3 saturation mixing ratio at t ! @@ -1884,24 +1890,36 @@ CALL MNH_MEM_GET( zdrvsatdt, size( pexn, 1 ), size( pexn, 2 ), size( pexn, 3 ) ) ! PAMOIST(:,:,:)= 0.5 / ( 1.0 + ZDRVSATDT(:,:,:) * PLOCPEXN(:,:,:) ) ! +!$acc end kernels +!$acc kernels !* 1.6 compute Atheta -! +#if !defined(MNH_BITREP) && !defined(MNH_BITREP_OMP) PATHETA(:,:,:)= PAMOIST(:,:,:) * PEXN(:,:,:) * & ( ( ZRVSAT(:,:,:) - PRT(:,:,:,1) ) * PLOCPEXN(:,:,:) / & ( 1. + ZDRVSATDT(:,:,:) * PLOCPEXN(:,:,:) ) * & ( & ZRVSAT(:,:,:) * (1. + ZRVSAT(:,:,:)/ZEPS) & -#ifndef MNH_BITREP * ( -2.*PBETA/PT(:,:,:) + PGAM ) / PT(:,:,:)**2 & -#else - * ( -2.*PBETA/PT(:,:,:) + PGAM ) / BR_P2(PT(:,:,:)) & -#endif +ZDRVSATDT(:,:,:) * (1. + 2. * ZRVSAT(:,:,:)/ZEPS) & * ( PBETA/PT(:,:,:) - PGAM ) / PT(:,:,:) & ) & - ZDRVSATDT(:,:,:) & ) -! +#else +DO CONCURRENT( JI=1:JIU,JJ=1:JJU,JK=1:JKU) + PATHETA(JI,JJ,JK)= PAMOIST(JI,JJ,JK) * PEXN(JI,JJ,JK) * & + ( ( ZRVSAT(JI,JJ,JK) - PRT(JI,JJ,JK,1) ) * PLOCPEXN(JI,JJ,JK) / & + ( 1. + ZDRVSATDT(JI,JJ,JK) * PLOCPEXN(JI,JJ,JK) ) * & + ( & + ZRVSAT(JI,JJ,JK) * (1. + ZRVSAT(JI,JJ,JK)/ZEPS) & + * ( -2.*PBETA/PT(JI,JJ,JK) + PGAM ) / BR_P2(PT(JI,JJ,JK)) & + +ZDRVSATDT(JI,JJ,JK) * (1. + 2. * ZRVSAT(JI,JJ,JK)/ZEPS) & + * ( PBETA/PT(JI,JJ,JK) - PGAM ) / PT(JI,JJ,JK) & + ) & + - ZDRVSATDT(JI,JJ,JK) & + ) +ENDDO +#endif !* 1.7 Lv/Cph/Exner at t-1 ! PLOCPEXN(:,:,:) = PLOCPEXN(:,:,:) / PEXN(:,:,:) @@ -2114,6 +2132,8 @@ END SUBROUTINE CLOUD_MODIF_LM ! END SUBROUTINE TURB + + !################### SUBROUTINE DELT(KKA,KKU,KKL,KKB, KKE,KKTB, KKTE,ORMC01,HTURBDIM,PDXX, PDYY,PZZ,PDIRCOSZW,PLM, ODZ) !################### @@ -2173,6 +2193,7 @@ LOGICAL, INTENT(IN) :: ODZ ! integer :: ji, jj, jk +integer :: JIU,JJU,JKU LOGICAL :: GOCEAN !Intermediate variable used to work around a Cray compiler bug (CCE 13.0.0) REAL :: ZALPHA ! proportionnality constant between Dz/2 and ! ! BL89 mixing length near the surface @@ -2181,12 +2202,10 @@ REAL :: ZD ! distance to the surface real, dimension(:,:,:), pointer , contiguous :: ztmp1_device, ztmp2_device #endif ! -integer :: jiu, jju, jku !------------------------------------------------------------------------------- - -jiu = size( pdxx, 1 ) -jju = size( pdxx, 2 ) -jku = size( pdxx, 3 ) +JIU=SIZE(pdxx,1) +JJU=SIZE(pdxx,2) +JKU=SIZE(pdxx,3) if ( mppdb_initialized ) then !Check all in arrays @@ -2197,8 +2216,6 @@ if ( mppdb_initialized ) then end if #ifdef MNH_OPENACC -!!$allocate( ztmp1_device( JIU,JJU,JKU ) ) -!!$allocate( ztmp2_device( JIU,JJU,JKU ) ) !Pin positions in the pools of MNH memory CALL MNH_MEM_POSITION_PIN() CALL MNH_MEM_GET( ztmp1_device, JIU,JJU,JKU ) @@ -2231,7 +2248,7 @@ IF (ODZ) THEN #endif ELSE #ifndef MNH_OPENACC -#ifndef MNH_BITREP +#if !defined(MNH_BITREP) && !defined(MNH_BITREP_OMP) PLM(:,:,:) = (PLM(:,:,:)*MXF(PDXX(:,:,:))*MYF(PDYY(:,:,:)) ) ** (1./3.) #else PLM(:,:,:) = BR_POW( PLM(:, :, : ) * MXF( PDXX(:, :, : ) ) * MYF( PDYY(:, :, : ) ), 1. / 3. ) @@ -2240,10 +2257,12 @@ IF (ODZ) THEN CALL MXF_DEVICE( PDXX, ZTMP1_DEVICE ) CALL MYF_DEVICE( PDYY, ZTMP2_DEVICE ) !$acc kernels -#ifndef MNH_BITREP +#if !defined(MNH_BITREP) && !defined(MNH_BITREP_OMP) PLM(:,:,:) = ( PLM(:,:,:) * ZTMP1_DEVICE(:,:,:) * ZTMP2_DEVICE(:,:,:) ) ** (1./3.) #else - PLM(:,:,:) = BR_POW( PLM(:,:,:) * ZTMP1_DEVICE(:,:,:) * ZTMP2_DEVICE(:,:,:), 1./3. ) +DO CONCURRENT ( JI=1:JIU,JJ=1:JJU,JK=1:JKU) + PLM(JI,JJ,JK) = BR_POW( PLM(JI,JJ,JK) * ZTMP1_DEVICE(JI,JJ,JK) * ZTMP2_DEVICE(JI,JJ,JK), 1./3. ) +ENDDO #endif !$acc end kernels #endif @@ -2263,7 +2282,7 @@ ELSE #endif ELSE #ifndef MNH_OPENACC -#ifndef MNH_BITREP +#if !defined(MNH_BITREP) && !defined(MNH_BITREP_OMP) PLM(:,:,:) = (MXF(PDXX(:,:,:))*MYF(PDYY(:,:,:)) ) ** (1./2.) #else PLM(:,:,:) = BR_POW(MXF(PDXX(:,:,:))*MYF(PDYY(:,:,:)), 1. / 2. ) @@ -2272,10 +2291,12 @@ ELSE CALL MXF_DEVICE( PDXX, ZTMP1_DEVICE ) CALL MYF_DEVICE( PDYY, ZTMP2_DEVICE ) !$acc kernels -#ifndef MNH_BITREP +#if !defined(MNH_BITREP) && !defined(MNH_BITREP_OMP) PLM(:,:,:) = ( ZTMP1_DEVICE * ZTMP2_DEVICE ) ** (1./2.) #else - PLM(:,:,:) = BR_POW( ZTMP1_DEVICE(:,:,:) * ZTMP2_DEVICE(:,:,:), 1. / 2. ) + DO CONCURRENT( JI=1:JIU, JJ=1:JJU, JK=1:JKU ) + PLM(JI,JJ,JK) = BR_POW( ZTMP1_DEVICE(JI,JJ,JK) * ZTMP2_DEVICE(JI,JJ,JK), 1. / 2. ) + END DO #endif !$acc end kernels #endif @@ -2287,9 +2308,8 @@ END IF ! mixing length limited by the distance normal to the surface ! (with the same factor as for BL89) ! - IF (.NOT. ORMC01) THEN -#ifndef MNH_BITREP +#if !defined(MNH_BITREP) && !defined(MNH_BITREP_OMP) ZALPHA = 0.5**(-1.5) #else ZALPHA = BR_POW( 0.5, -1.5 ) @@ -2339,7 +2359,8 @@ if ( mppdb_initialized ) then end if #ifdef MNH_OPENACC -CALL MNH_MEM_RELEASE() + !Release all memory allocated with MNH_MEM_GET calls since last call to MNH_MEM_POSITION_PIN + CALL MNH_MEM_RELEASE() #endif END SUBROUTINE DELT @@ -2512,7 +2533,7 @@ IF ( HTURBDIM /= '1DIM' ) THEN ! 3D turbulence scheme !PW: "BUG" PGI : results different on CPU and GPU due to the power function !See http://www.pgroup.com/userforum/viewtopic.php?t=5364&sid=ec7b762b17fb9bb3332a47f0db57af55 !Use of own functions allows bit-reproducible results -#ifndef MNH_BITREP +#if !defined(MNH_BITREP) && !defined(MNH_BITREP_OMP) ! #ifndef MNH_OPENACC PLM(:,:,:) = (PLM(:,:,:)*MXF(PDXX(:,:,:))*MYF(PDYY(:,:,:)) ) ** (1./3.) @@ -2542,7 +2563,9 @@ IF ( HTURBDIM /= '1DIM' ) THEN ! 3D turbulence scheme call Mppdb_check( plm, "Dear mid1:plm" ) end if !$acc kernels - PLM(:,:,:) = BR_POW( PLM(:,:,:)*ZTMP1_DEVICE *ZTMP2_DEVICE , 1./3. ) +DO CONCURRENT ( JI=1:JIU,JJ=1:JJU,JK=1:JKU) + PLM(JI,JJ,JK) = BR_POW( PLM(JI,JJ,JK)*ZTMP1_DEVICE(JI,JJ,JK) *ZTMP2_DEVICE(JI,JJ,JK) , 1./3. ) +ENDDO !$acc end kernels if ( mppdb_initialized ) then call Mppdb_check( plm, "Dear mid2:plm" ) @@ -2564,12 +2587,7 @@ CALL EMOIST(KRR,KRRI,PTHLT,PRT,PLOCPEXNM,PAMOIST,PSRCT,ZEMOIST) !$acc kernels present(ZWORK2D,PLM) IF (KRR>0) THEN !$acc_nv loop independent collapse(3) private(ZVAR) - !dir$ concurrent - DO JK = KKTB+1,KKTE-1 - !dir$ concurrent - DO JJ=1,SIZE(PLM,2) - !dir$ concurrent - DO JI=1,SIZE(PLM,1) + DO CONCURRENT( JI=1:JIU, JJ=1:JJU, JK = KKTB+1:KKTE-1) ZDTHLDZ(JI,JJ,JK)= 0.5*((PTHLT(JI,JJ,JK+KKL)-PTHLT(JI,JJ,JK ))/PDZZ(JI,JJ,JK+KKL)+ & (PTHLT(JI,JJ,JK )-PTHLT(JI,JJ,JK-KKL))/PDZZ(JI,JJ,JK )) ZDRTDZ(JI,JJ,JK) = 0.5*((PRT(JI,JJ,JK+KKL,1)-PRT(JI,JJ,JK ,1))/PDZZ(JI,JJ,JK+KKL)+ & @@ -2585,17 +2603,10 @@ IF (KRR>0) THEN PLM(JI,JJ,JK)=MAX(XMNH_EPSILON,MIN(PLM(JI,JJ,JK), & 0.76* SQRT(PTKET(JI,JJ,JK)/ZVAR))) END IF - END DO - END DO END DO ELSE! For dry atmos or unsalted ocean runs !$acc_nv loop independent collapse(3) private(ZVAR) - !dir$ concurrent - DO JK = KKTB+1,KKTE-1 - !dir$ concurrent - DO JJ=1,SIZE(PLM,2) - !dir$ concurrent - DO JI=1,SIZE(PLM,1) + DO CONCURRENT( JI=1:JIU, JJ=1:JJU, JK = KKTB+1:KKTE-1) ZDTHLDZ(JI,JJ,JK)= 0.5*((PTHLT(JI,JJ,JK+KKL)-PTHLT(JI,JJ,JK ))/PDZZ(JI,JJ,JK+KKL)+ & (PTHLT(JI,JJ,JK )-PTHLT(JI,JJ,JK-KKL))/PDZZ(JI,JJ,JK )) IF (GOCEAN) THEN @@ -2608,8 +2619,6 @@ ELSE! For dry atmos or unsalted ocean runs PLM(JI,JJ,JK)=MAX(XMNH_EPSILON,MIN(PLM(JI,JJ,JK), & 0.76* SQRT(PTKET(JI,JJ,JK)/ZVAR))) END IF - END DO - END DO END DO END IF ! special case near the surface