From 915e7cb287cd53c34655b244f3ec9e8d44880acb Mon Sep 17 00:00:00 2001 From: Juan Escobar <juan.escobar@aero.obs-mip.fr> Date: Tue, 14 Sep 2021 11:45:41 +0200 Subject: [PATCH] Juan 14/09/2021:add turb_hor_dyn_corr.f90 on ZSOLVER for GPU/MANAGED optimisation --- src/ZSOLVER/turb_hor_dyn_corr.f90 | 1297 +++++++++++++++++++++++++++++ 1 file changed, 1297 insertions(+) create mode 100644 src/ZSOLVER/turb_hor_dyn_corr.f90 diff --git a/src/ZSOLVER/turb_hor_dyn_corr.f90 b/src/ZSOLVER/turb_hor_dyn_corr.f90 new file mode 100644 index 000000000..1373e2597 --- /dev/null +++ b/src/ZSOLVER/turb_hor_dyn_corr.f90 @@ -0,0 +1,1297 @@ +!MNH_LIC Copyright 1994-2020 CNRS, Meteo-France and Universite Paul Sabatier +!MNH_LIC This is part of the Meso-NH software governed by the CeCILL-C licence +!MNH_LIC version 1. See LICENSE, CeCILL-C_V1-en.txt and CeCILL-C_V1-fr.txt +!MNH_LIC for details. version 1. +!----------------------------------------------------------------- +MODULE MODI_TURB_HOR_DYN_CORR +! +INTERFACE +! + SUBROUTINE TURB_HOR_DYN_CORR(KSPLT, PTSTEP, & + OCLOSE_OUT,OTURB_FLX,KRR, & + TPFILE, & + PK,PINV_PDZZ, & + PDXX,PDYY,PDZZ,PDZX,PDZY,PZZ, & + PDIRCOSZW, & + PCOSSLOPE,PSINSLOPE, & + PRHODJ, & + PCDUEFF,PTAU11M,PTAU12M,PTAU22M,PTAU33M, & + PUM,PVM,PWM,PUSLOPEM,PVSLOPEM, & + PTHLM,PRM,PSVM, & + PTKEM,PLM, & + PDP,PTP, & + PRUS,PRVS,PRWS ) +! +USE MODD_IO, ONLY: TFILEDATA +! +INTEGER, INTENT(IN) :: KSPLT ! split process index +REAL, INTENT(IN) :: PTSTEP ! timestep +LOGICAL, INTENT(IN) :: OCLOSE_OUT ! switch for syncronous + ! file opening +LOGICAL, INTENT(IN) :: OTURB_FLX ! switch to write the + ! turbulent fluxes in the syncronous FM-file +INTEGER, INTENT(IN) :: KRR ! number of moist var. +TYPE(TFILEDATA), INTENT(IN) :: TPFILE ! Output file +! +REAL, DIMENSION(:,:,:), INTENT(IN) :: PK ! Turbulent diffusion doef. + ! PK = PLM * SQRT(PTKEM) +REAL, DIMENSION(:,:,:), INTENT(IN) :: PINV_PDZZ ! 1./PDZZ +REAL, DIMENSION(:,:,:), INTENT(IN) :: PDXX, PDYY, PDZZ, PDZX, PDZY + ! Metric coefficients +REAL, DIMENSION(:,:,:), INTENT(IN) :: PZZ ! vertical grid +REAL, DIMENSION(:,:), INTENT(IN) :: PDIRCOSZW +! Director Cosinus along z directions at surface w-point +REAL, DIMENSION(:,:), INTENT(IN) :: PCOSSLOPE ! cosinus of the angle + ! between i and the slope vector +REAL, DIMENSION(:,:), INTENT(IN) :: PSINSLOPE ! sinus of the angle + ! between i and the slope vector +REAL, DIMENSION(:,:,:), INTENT(IN) :: PRHODJ ! density * grid volume +! +REAL, DIMENSION(:,:), INTENT(IN) :: PCDUEFF ! Cd * || u || at time t +REAL, DIMENSION(:,:), INTENT(IN) :: PTAU11M ! <uu> in the axes linked + ! to the maximum slope direction and the surface normal and the binormal + ! at time t - dt +REAL, DIMENSION(:,:), INTENT(IN) :: PTAU12M ! <uv> in the same axes +REAL, DIMENSION(:,:), INTENT(IN) :: PTAU22M ! <vv> in the same axes +REAL, DIMENSION(:,:), INTENT(IN) :: PTAU33M ! <ww> in the same axes +! +! Variables at t-1 +REAL, DIMENSION(:,:,:), INTENT(IN) :: PUM,PVM,PWM,PTHLM +REAL, DIMENSION(:,:,:,:), INTENT(IN) :: PRM +REAL, DIMENSION(:,:,:,:), INTENT(IN) :: PSVM +REAL, DIMENSION(:,:), INTENT(IN) :: PUSLOPEM ! wind component along the + ! maximum slope direction +REAL, DIMENSION(:,:), INTENT(IN) :: PVSLOPEM ! wind component along the + ! direction normal to the maximum slope one +! +REAL, DIMENSION(:,:,:), INTENT(IN) :: PTKEM ! TKE at time t- dt +REAL, DIMENSION(:,:,:), INTENT(IN) :: PLM ! Turb. mixing length +! +REAL, DIMENSION(:,:,:), INTENT(INOUT) :: PRUS, PRVS, PRWS +REAL, DIMENSION(:,:,:), INTENT(INOUT) :: PDP,PTP ! TKE production terms +! +END SUBROUTINE TURB_HOR_DYN_CORR +! +END INTERFACE +! +END MODULE MODI_TURB_HOR_DYN_CORR +! ################################################################ + SUBROUTINE TURB_HOR_DYN_CORR(KSPLT, PTSTEP, & + OCLOSE_OUT,OTURB_FLX,KRR, & + TPFILE, & + PK,PINV_PDZZ, & + PDXX,PDYY,PDZZ,PDZX,PDZY,PZZ, & + PDIRCOSZW, & + PCOSSLOPE,PSINSLOPE, & + PRHODJ, & + PCDUEFF,PTAU11M,PTAU12M,PTAU22M,PTAU33M, & + PUM,PVM,PWM,PUSLOPEM,PVSLOPEM, & + PTHLM,PRM,PSVM, & + PTKEM,PLM, & + PDP,PTP, & + PRUS,PRVS,PRWS ) +! ################################################################ +! +!!**** *TURB_HOR* -routine to compute the source terms in the meso-NH +!! model equations due to the non-vertical turbulent fluxes. +!! +!! PURPOSE +!! ------- +!! +!! see TURB_HOR +!! +!!** METHOD +!! ------ +!! +!! EXTERNAL +!! -------- +!! +!! IMPLICIT ARGUMENTS +!! ------------------ +!! +!! REFERENCE +!! --------- +!! +!! AUTHOR +!! ------ +!! +!! Joan Cuxart * INM and Meteo-France * +!! +!! MODIFICATIONS +!! ------------- +!! Aug , 1997 (V. Saravane) spliting of TURB_HOR +!! Nov 27, 1997 (V. Masson) clearing of the routine +!! Oct 18, 2000 (V. Masson) LES computations + LFLAT switch +!! Feb 15, 2001 (J. Stein) remove the use of w=0 at the +!! ground +!! Mar 12, 2001 (V. Masson and J. Stein) major bugs +!! + change of discretization at the surface +!! Nov 06, 2002 (V. Masson) LES budgets +!! October 2009 (G. Tanguy) add ILENCH=LEN(YCOMMENT) after +!! change of YCOMMENT +!! July 2012 (V.Masson) Implicitness of W +!! March 2014 (V.Masson) tridiag_w : bug between +!! mass and flux position +!! J.Escobar : 15/09/2015 : WENO5 & JPHEXT <> 1 +!! M.Moge 04/2016 Use openACC directives to port the TURB part of Meso-NH on GPU +!! Philippe Wautelet: 05/2016-04/2018: new data structures and calls for I/O +! P. Wautelet 20/05/2019: add name argument to ADDnFIELD_ll + new ADD4DFIELD_ll subroutine +! J.Escobar 13/08/2020: PGI/NVHPC BUG , extend DO CONCURRENT to 3D indexes +!! -------------------------------------------------------------------------- +! +!* 0. DECLARATIONS +! ------------ +! +USE MODD_ARGSLIST_ll, ONLY: LIST_ll +USE MODD_CST +USE MODD_CONF +USE MODD_CTURB +USE MODD_IO, ONLY: TFILEDATA +USE MODD_PARAMETERS +USE MODD_LES +USE MODD_NSV +! +USE MODE_ll +USE MODE_FIELD, ONLY: TFIELDDATA, TYPEREAL +USE MODE_IO_FIELD_WRITE, only: IO_Field_write +use mode_mppdb +! +USE MODI_GRADIENT_M +USE MODI_GRADIENT_U +USE MODI_GRADIENT_V +USE MODI_GRADIENT_W +#ifndef MNH_OPENACC +USE MODI_SHUMAN +#else +USE MODI_SHUMAN_DEVICE +#endif +USE MODI_COEFJ +USE MODI_LES_MEAN_SUBGRID +USE MODI_TRIDIAG_W +! +USE MODI_SECOND_MNH +USE MODE_MPPDB +#ifdef MNH_BITREP +USE MODI_BITREP +#endif +#ifdef MNH_OPENACC +USE MODE_MNH_ZWORK , ONLY : MNH_ALLOCATE_ZT3D , MNH_REL_ZT3D, MNH_ALLOCATE_ZT3DP , MNH_ALLOCATE_ZT2D +#endif +! +IMPLICIT NONE +! +! +!* 0.1 declaration of arguments +! +! +! +INTEGER, INTENT(IN) :: KSPLT ! split process index +REAL, INTENT(IN) :: PTSTEP ! timestep +LOGICAL, INTENT(IN) :: OCLOSE_OUT ! switch for syncronous + ! file opening +LOGICAL, INTENT(IN) :: OTURB_FLX ! switch to write the + ! turbulent fluxes in the syncronous FM-file +INTEGER, INTENT(IN) :: KRR ! number of moist var. +TYPE(TFILEDATA), INTENT(IN) :: TPFILE ! Output file +! +REAL, DIMENSION(:,:,:), INTENT(IN) :: PK ! Turbulent diffusion doef. + ! PK = PLM * SQRT(PTKEM) +REAL, DIMENSION(:,:,:), INTENT(IN) :: PINV_PDZZ ! 1./PDZZ +REAL, DIMENSION(:,:,:), INTENT(IN) :: PDXX, PDYY, PDZZ, PDZX, PDZY + ! Metric coefficients +REAL, DIMENSION(:,:,:), INTENT(IN) :: PZZ ! vertical grid +REAL, DIMENSION(:,:), INTENT(IN) :: PDIRCOSZW +! Director Cosinus along z directions at surface w-point +REAL, DIMENSION(:,:), INTENT(IN) :: PCOSSLOPE ! cosinus of the angle + ! between i and the slope vector +REAL, DIMENSION(:,:), INTENT(IN) :: PSINSLOPE ! sinus of the angle + ! between i and the slope vector +REAL, DIMENSION(:,:,:), INTENT(IN) :: PRHODJ ! density * grid volume +! +REAL, DIMENSION(:,:), INTENT(IN) :: PCDUEFF ! Cd * || u || at time t +REAL, DIMENSION(:,:), INTENT(IN) :: PTAU11M ! <uu> in the axes linked + ! to the maximum slope direction and the surface normal and the binormal + ! at time t - dt +REAL, DIMENSION(:,:), INTENT(IN) :: PTAU12M ! <uv> in the same axes +REAL, DIMENSION(:,:), INTENT(IN) :: PTAU22M ! <vv> in the same axes +REAL, DIMENSION(:,:), INTENT(IN) :: PTAU33M ! <ww> in the same axes +! +! Variables at t-1 +REAL, DIMENSION(:,:,:), INTENT(IN) :: PUM,PVM,PWM,PTHLM +REAL, DIMENSION(:,:,:,:), INTENT(IN) :: PRM +REAL, DIMENSION(:,:,:,:), INTENT(IN) :: PSVM +REAL, DIMENSION(:,:), INTENT(IN) :: PUSLOPEM ! wind component along the + ! maximum slope direction +REAL, DIMENSION(:,:), INTENT(IN) :: PVSLOPEM ! wind component along the + ! direction normal to the maximum slope one +! +REAL, DIMENSION(:,:,:), INTENT(IN) :: PTKEM ! TKE at time t- dt +REAL, DIMENSION(:,:,:), INTENT(IN) :: PLM ! Turb. mixing length +! +REAL, DIMENSION(:,:,:), INTENT(INOUT) :: PRUS, PRVS, PRWS +REAL, DIMENSION(:,:,:), INTENT(INOUT) :: PDP,PTP ! TKE production terms +! +!* 0.2 declaration of local variables +! +REAL, DIMENSION(:,:,:), pointer , contiguous :: ZFLX,ZWORK ! work arrays, PK is the turb. mixing coef. +! +REAL, DIMENSION(:,:), pointer , contiguous :: ZDIRSINZW +INTEGER :: IZFLX,IZWORK,IZDIRSINZW + ! sinus of the angle between the vertical and the normal to the orography +INTEGER :: IKB,IKE + ! Index values for the Beginning and End + ! mass points of the domain +INTEGER :: IKU +INTEGER :: JSV ! scalar loop counter +! +REAL, DIMENSION(:,:,:), pointer , contiguous :: GX_U_M_PUM +REAL, DIMENSION(:,:,:), pointer , contiguous :: GY_V_M_PVM +REAL, DIMENSION(:,:,:), pointer , contiguous :: GZ_W_M_PWM +REAL, DIMENSION(:,:,:), pointer , contiguous :: GZ_W_M_ZWP +INTEGER :: IGX_U_M_PUM,IGY_V_M_PVM,IGZ_W_M_PWM,IGZ_W_M_ZWP +REAL, DIMENSION(:,:,:), pointer , contiguous :: ZMZF_DZZ ! MZF(PDZZ) +REAL, DIMENSION(:,:,:), pointer , contiguous :: ZDFDDWDZ ! formal derivative of the +! ! flux (variable: dW/dz) +REAL, DIMENSION(:,:,:), pointer , contiguous :: ZWP ! W at future time-step +! +REAL, DIMENSION(:,:,:), pointer , contiguous :: ZDU_DZ_DZS_DX ! du/dz*dzs/dx surf +REAL, DIMENSION(:,:,:), pointer , contiguous :: ZDV_DZ_DZS_DY ! dv/dz*dzs/dy surf +REAL, DIMENSION(:,:,:), pointer , contiguous :: ZDU_DX ! du/dx surf +REAL, DIMENSION(:,:,:), pointer , contiguous :: ZDV_DY ! dv/dy surf +REAL, DIMENSION(:,:,:), pointer , contiguous :: ZDW_DZ ! dw/dz surf +INTEGER :: IZMZF_DZZ,IZDFDDWDZ,IZWP,IZDU_DZ_DZS_DX,IZDV_DZ_DZS_DY & + ,IZDU_DX,IZDV_DY,IZDW_DZ +! +INTEGER :: IINFO_ll ! return code of parallel routine +TYPE(LIST_ll), POINTER :: TZFIELDS_ll ! list of fields to exchange + +REAL :: ZTIME1, ZTIME2 + + +REAL, DIMENSION(:,:,:), pointer , contiguous :: ZCOEFF , ZDZZ + ! coefficients for the uncentred gradient + ! computation near the ground +INTEGER :: IZCOEFF , IZDZZ +! +#ifdef MNH_OPENACC +REAL, DIMENSION(:,:,:), pointer , contiguous :: ZTMP1_DEVICE, ZTMP2_DEVICE, ZTMP3_DEVICE, ZTMP4_DEVICE +INTEGER :: IZTMP1_DEVICE, IZTMP2_DEVICE, IZTMP3_DEVICE, IZTMP4_DEVICE +#endif +TYPE(TFIELDDATA) :: TZFIELD +! +INTEGER :: JIU,JJU,JKU +INTEGER :: JI,JJ,JK +! -------------------------------------------------------------------------- + +!$acc data present( PK, PINV_PDZZ, PDXX, PDYY, PDZZ, PDZX, PDZY, PZZ, PDIRCOSZW, & +!$acc & PCOSSLOPE, PSINSLOPE, PRHODJ, PCDUEFF, & +!$acc & PTAU11M, PTAU12M, PTAU22M, PTAU33M, & +!$acc & PUM, PVM, PWM, PTHLM, PRM, PSVM, PUSLOPEM, PVSLOPEM, PTKEM, PLM, & +!$acc & PRUS, PRVS, PRWS, PDP, PTP ) + +if ( mppdb_initialized ) then + !Check all in arrays + call Mppdb_check( pk, "Turb_hor_dyn_corr beg:pk" ) + call Mppdb_check( pinv_pdzz, "Turb_hor_dyn_corr beg:pinv_pdzz" ) + call Mppdb_check( pdxx, "Turb_hor_dyn_corr beg:pdxx" ) + call Mppdb_check( pdyy, "Turb_hor_dyn_corr beg:pdyy" ) + call Mppdb_check( pdzz, "Turb_hor_dyn_corr beg:pdzz" ) + call Mppdb_check( pdzx, "Turb_hor_dyn_corr beg:pdzx" ) + call Mppdb_check( pdzy, "Turb_hor_dyn_corr beg:pdzy" ) + call Mppdb_check( pzz, "Turb_hor_dyn_corr beg:pzz" ) + call Mppdb_check( pdircoszw, "Turb_hor_dyn_corr beg:pdircoszw" ) + call Mppdb_check( pcosslope, "Turb_hor_dyn_corr beg:pcosslope" ) + call Mppdb_check( psinslope, "Turb_hor_dyn_corr beg:psinslope" ) + call Mppdb_check( prhodj, "Turb_hor_dyn_corr beg:prhodj" ) + call Mppdb_check( pcdueff, "Turb_hor_dyn_corr beg:pcdueff" ) + call Mppdb_check( ptau11m, "Turb_hor_dyn_corr beg:ptau11m" ) + call Mppdb_check( ptau12m, "Turb_hor_dyn_corr beg:ptau12m" ) + call Mppdb_check( ptau22m, "Turb_hor_dyn_corr beg:ptau22m" ) + call Mppdb_check( ptau33m, "Turb_hor_dyn_corr beg:ptau33m" ) + call Mppdb_check( pum, "Turb_hor_dyn_corr beg:pum" ) + call Mppdb_check( pvm, "Turb_hor_dyn_corr beg:pvm" ) + call Mppdb_check( pwm, "Turb_hor_dyn_corr beg:pwm" ) + call Mppdb_check( pthlm, "Turb_hor_dyn_corr beg:pthlm" ) + call Mppdb_check( prm, "Turb_hor_dyn_corr beg:prm" ) + call Mppdb_check( psvm, "Turb_hor_dyn_corr beg:psvm" ) + call Mppdb_check( puslopem, "Turb_hor_dyn_corr beg:puslopem" ) + call Mppdb_check( pvslopem, "Turb_hor_dyn_corr beg:pvslopem" ) + call Mppdb_check( ptkem, "Turb_hor_dyn_corr beg:ptkem" ) + call Mppdb_check( plm, "Turb_hor_dyn_corr beg:plm" ) + !Check all inout arrays + call Mppdb_check( prus, "Turb_hor_dyn_corr beg:prus" ) + call Mppdb_check( prvs, "Turb_hor_dyn_corr beg:prvs" ) + call Mppdb_check( prws, "Turb_hor_dyn_corr beg:prws" ) + call Mppdb_check( pdp, "Turb_hor_dyn_corr beg:pdp" ) + call Mppdb_check( ptp, "Turb_hor_dyn_corr beg:ptp" ) +end if + +JIU = size(pum, 1 ) +JJU = size(pum, 2 ) +JKU = size(pum, 3 ) + +#ifndef MNH_OPENACC +allocate( zflx (JIU,JJU,JKU ) ) +allocate( zwork(JIU,JJU,JKU ) ) + +allocate( zdirsinzw(JIU,JJU ) ) + +allocate( gx_u_m_pum(JIU,JJU,JKU ) ) +allocate( gy_v_m_pvm(JIU,JJU,JKU ) ) +allocate( gz_w_m_pwm(JIU,JJU,JKU ) ) +allocate( gz_w_m_zwp(JIU,JJU,JKU ) ) +allocate( zmzf_dzz (JIU,JJU,JKU ) ) +allocate( zdfddwdz (JIU,JJU,JKU ) ) +allocate( zwp (JIU,JJU,JKU ) ) + +allocate( zdu_dz_dzs_dx(JIU,JJU, 1 ) ) +allocate( zdv_dz_dzs_dy(JIU,JJU, 1 ) ) +allocate( zdu_dx (JIU,JJU, 1 ) ) +allocate( zdv_dy (JIU,JJU, 1 ) ) +allocate( zdw_dz (JIU,JJU, 1 ) ) + +allocate( zcoeff(JIU,JJU, 1 + jpvext : 3 + jpvext ) ) +allocate( zdzz (JIU,JJU, 1 + jpvext : 3 + jpvext ) ) +#else +izflx = MNH_ALLOCATE_ZT3D( zflx ,JIU,JJU,JKU ) +izwork = MNH_ALLOCATE_ZT3D( zwork,JIU,JJU,JKU ) + +izdirsinzw = MNH_ALLOCATE_ZT2D( zdirsinzw,JIU,JJU ) + +igx_u_m_pum = MNH_ALLOCATE_ZT3D( gx_u_m_pum,JIU,JJU,JKU ) +igy_v_m_pvm = MNH_ALLOCATE_ZT3D( gy_v_m_pvm,JIU,JJU,JKU ) +igz_w_m_pwm = MNH_ALLOCATE_ZT3D( gz_w_m_pwm,JIU,JJU,JKU ) +igz_w_m_zwp = MNH_ALLOCATE_ZT3D( gz_w_m_zwp,JIU,JJU,JKU ) +izmzf_dzz = MNH_ALLOCATE_ZT3D( zmzf_dzz ,JIU,JJU,JKU ) +izdfddwdz = MNH_ALLOCATE_ZT3D( zdfddwdz ,JIU,JJU,JKU ) +izwp = MNH_ALLOCATE_ZT3D( zwp ,JIU,JJU,JKU ) + +izdu_dz_dzs_dx = MNH_ALLOCATE_ZT3DP( zdu_dz_dzs_dx,JIU,JJU, 1 , 1 ) +izdv_dz_dzs_dy = MNH_ALLOCATE_ZT3DP( zdv_dz_dzs_dy,JIU,JJU, 1 , 1 ) +izdu_dx = MNH_ALLOCATE_ZT3DP( zdu_dx ,JIU,JJU, 1 , 1 ) +izdv_dy = MNH_ALLOCATE_ZT3DP( zdv_dy ,JIU,JJU, 1 , 1 ) +izdw_dz = MNH_ALLOCATE_ZT3DP( zdw_dz ,JIU,JJU, 1 , 1 ) + +izcoeff = MNH_ALLOCATE_ZT3DP( zcoeff,JIU,JJU, 1 + jpvext , 3 + jpvext ) +izdzz = MNH_ALLOCATE_ZT3DP( zdzz ,JIU,JJU, 1 + jpvext , 3 + jpvext ) +#endif + +#ifdef MNH_OPENACC +iztmp1_device = MNH_ALLOCATE_ZT3D( ztmp1_device,JIU,JJU,JKU ) +iztmp2_device = MNH_ALLOCATE_ZT3D( ztmp2_device,JIU,JJU,JKU ) +iztmp3_device = MNH_ALLOCATE_ZT3D( ztmp3_device,JIU,JJU,JKU ) +iztmp4_device = MNH_ALLOCATE_ZT3D( ztmp4_device,JIU,JJU,JKU ) +#endif + +!$acc data present(ZFLX, ZWORK, ZDIRSINZW, ZCOEFF, ZDZZ, & +!$acc & GX_U_M_PUM, GY_V_M_PVM, GZ_W_M_PWM, GZ_W_M_ZWP, & +!$acc & ZMZF_DZZ, ZDFDDWDZ, ZWP, & +!$acc & ZDU_DZ_DZS_DX, ZDV_DZ_DZS_DY, ZDU_DX, ZDV_DY, ZDW_DZ, & +!$acc & ZTMP1_DEVICE, ZTMP2_DEVICE, ZTMP3_DEVICE, ZTMP4_DEVICE ) + +! +!* 1. PRELIMINARY COMPUTATIONS +! ------------------------ +NULLIFY(TZFIELDS_ll) +! +IKB = 1+JPVEXT +IKE = SIZE(PUM,3)-JPVEXT +IKU = SIZE(PUM,3) +! +! +!$acc kernels async(1) +#ifndef MNH_BITREP +ZDIRSINZW(:,:) = SQRT( 1. - PDIRCOSZW(:,:)**2 ) +#else +!$acc loop independent collapse(2) +DO CONCURRENT ( JI=1:JIU,JJ=1:JJU ) + ZDIRSINZW(JI,JJ) = SQRT( 1. - BR_P2(PDIRCOSZW(JI,JJ)) ) +END DO +#endif +!$acc end kernels +! +#ifndef MNH_OPENACC +GX_U_M_PUM = GX_U_M(PUM,PDXX,PDZZ,PDZX) +IF (.NOT. L2D) THEN + GY_V_M_PVM = GY_V_M(PVM,PDYY,PDZZ,PDZY) +END IF +GZ_W_M_PWM = GZ_W_M(PWM,PDZZ) +! +ZMZF_DZZ = MZF(PDZZ) +#else +CALL GX_U_M_DEVICE(1,IKU,1,PUM,PDXX,PDZZ,PDZX,GX_U_M_PUM) +IF (.NOT. L2D) THEN + CALL GY_V_M_DEVICE(1,IKU,1,PVM,PDYY,PDZZ,PDZY,GY_V_M_PVM) +END IF +CALL GZ_W_M_DEVICE(1,IKU,1,PWM,PDZZ,GZ_W_M_PWM) +! +CALL MZF_DEVICE(1,IKU,1,PDZZ,ZMZF_DZZ) +#endif +! +CALL ADD3DFIELD_ll( TZFIELDS_ll, ZFLX, 'TURB_HOR_DYN_CORR::ZFLX' ) + + +! compute the coefficients for the uncentred gradient computation near the +! ground +! +!* 9. < U'U'> +! ------- +! +! Computes the U variance +IF (.NOT. L2D) THEN + !$acc kernels async(2) + !$acc loop independent collapse(3) + DO CONCURRENT ( JI=1:JIU,JJ=1:JJU,JK=1:JKU) + ZFLX(JI,JJ,JK)= (2./3.) * PTKEM(JI,JJ,JK) & + - XCMFS * PK(JI,JJ,JK) *( (4./3.) * GX_U_M_PUM(JI,JJ,JK) & + -(2./3.) * ( GY_V_M_PVM(JI,JJ,JK) & + +GZ_W_M_PWM(JI,JJ,JK) ) ) + END DO !CONCURRENT + !$acc end kernels + !! & to be tested later + !! + XCMFB * PLM / SQRT(PTKEM) * (-2./3.) * PTP +ELSE + !$acc kernels async(2) + ZFLX(:,:,:)= (2./3.) * PTKEM & + - XCMFS * PK *( (4./3.) * GX_U_M_PUM & + -(2./3.) * ( GZ_W_M_PWM ) ) + !$acc end kernels + !! & to be tested later + !! + XCMFB * PLM / SQRT(PTKEM) * (-2./3.) * PTP +END IF +! +!$acc kernels async(2) +ZFLX(:,:,IKE+1) = ZFLX(:,:,IKE) +!$acc end kernels +! +!* prescription of du/dz and dv/dz with uncentered gradient at the surface +! prescription of dw/dz at Dz/2 above ground using the continuity equation +! using a Boussinesq hypothesis to remove the z dependance of rhod_ref +! (div u = 0) +! +#ifndef MNH_OPENACC +ZDZZ(:,:,:) = MXM(PDZZ(:,:,IKB:IKB+2)) +#else +CALL MXM_DEVICE(PDZZ(:,:,IKB:IKB+2),ZDZZ(:,:,:)) +#endif +!$acc kernels async(3) +ZCOEFF(:,:,IKB+2)= - ZDZZ(:,:,2) / & + ( (ZDZZ(:,:,3)+ZDZZ(:,:,2)) * ZDZZ(:,:,3) ) +ZCOEFF(:,:,IKB+1)= (ZDZZ(:,:,3)+ZDZZ(:,:,2)) / & + ( ZDZZ(:,:,2) * ZDZZ(:,:,3) ) +ZCOEFF(:,:,IKB)= - (ZDZZ(:,:,3)+2.*ZDZZ(:,:,2)) / & + ( (ZDZZ(:,:,3)+ZDZZ(:,:,2)) * ZDZZ(:,:,2) ) +!$acc end kernels +! +#ifndef MNH_OPENACC +ZDU_DZ_DZS_DX(:,:,:)=MXF ((ZCOEFF(:,:,IKB+2:IKB+2)*PUM(:,:,IKB+2:IKB+2) & + +ZCOEFF(:,:,IKB+1:IKB+1)*PUM(:,:,IKB+1:IKB+1) & + +ZCOEFF(:,:,IKB :IKB )*PUM(:,:,IKB :IKB ) & + )* 0.5 * ( PDZX(:,:,IKB+1:IKB+1)+PDZX(:,:,IKB:IKB)) & + )/ MXF(PDXX(:,:,IKB:IKB)) +! +ZDZZ(:,:,:) = MYM(PDZZ(:,:,IKB:IKB+2)) +#else +!$acc kernels async(3) +ZTMP1_DEVICE(:,:,1) = (ZCOEFF(:,:,IKB+2)*PUM(:,:,IKB+2) & + +ZCOEFF(:,:,IKB+1)*PUM(:,:,IKB+1) & + +ZCOEFF(:,:,IKB )*PUM(:,:,IKB) & + )* 0.5 * ( PDZX(:,:,IKB+1)+PDZX(:,:,IKB)) +!$acc end kernels +! +!!! wait for the computation of ZCOEFF and ZTMP1_DEVICE +!$acc wait(3) +! +CALL MXF_DEVICE(ZTMP1_DEVICE(:,:,1:1), ZTMP2_DEVICE(:,:,1:1)) +CALL MXF_DEVICE(PDXX(:,:,IKB:IKB), ZTMP1_DEVICE(:,:,1:1)) +!$acc kernels async(3) +ZDU_DZ_DZS_DX(:,:,1) = ZTMP2_DEVICE(:,:,1) / ZTMP1_DEVICE(:,:,1) +!$acc end kernels +! +CALL MYM_DEVICE(PDZZ(:,:,IKB:IKB+2),ZDZZ(:,:,:)) +#endif +!$acc kernels async(4) +ZCOEFF(:,:,IKB+2)= - ZDZZ(:,:,2) / & + ( (ZDZZ(:,:,3)+ZDZZ(:,:,2)) * ZDZZ(:,:,3) ) +ZCOEFF(:,:,IKB+1)= (ZDZZ(:,:,3)+ZDZZ(:,:,2)) / & + ( ZDZZ(:,:,2) * ZDZZ(:,:,3) ) +ZCOEFF(:,:,IKB)= - (ZDZZ(:,:,3)+2.*ZDZZ(:,:,2)) / & + ( (ZDZZ(:,:,3)+ZDZZ(:,:,2)) * ZDZZ(:,:,2) ) +!$acc end kernels +! +#ifndef MNH_OPENACC +ZDV_DZ_DZS_DY(:,:,:)=MYF ((ZCOEFF(:,:,IKB+2:IKB+2)*PVM(:,:,IKB+2:IKB+2) & + +ZCOEFF(:,:,IKB+1:IKB+1)*PVM(:,:,IKB+1:IKB+1) & + +ZCOEFF(:,:,IKB :IKB )*PVM(:,:,IKB :IKB ) & + )* 0.5 * ( PDZY(:,:,IKB+1:IKB+1)+PDZY(:,:,IKB:IKB)) & + )/ MYF(PDYY(:,:,IKB:IKB)) +#else +!$acc kernels async(4) +ZTMP3_DEVICE(:,:,1) = (ZCOEFF(:,:,IKB+2)*PVM(:,:,IKB+2) & + +ZCOEFF(:,:,IKB+1)*PVM(:,:,IKB+1) & + +ZCOEFF(:,:,IKB)*PVM(:,:,IKB) & + )* 0.5 * ( PDZY(:,:,IKB+1)+PDZY(:,:,IKB)) +!$acc end kernels +! +!!! wait for the computation of ZCOEFF and ZTMP3_DEVICE +!$acc wait(4) +#endif +! +#ifndef MNH_OPENACC +ZDU_DX(:,:,:)= DXF(PUM(:,:,IKB:IKB)) / MXF(PDXX(:,:,IKB:IKB)) & + - ZDU_DZ_DZS_DX(:,:,:) + +ZDV_DY(:,:,:)= DYF(PVM(:,:,IKB:IKB)) / MYF(PDYY(:,:,IKB:IKB)) & + - ZDV_DZ_DZS_DY(:,:,:) +#else +CALL MYF_DEVICE(ZTMP3_DEVICE(:,:,1:1), ZTMP4_DEVICE(:,:,1:1)) +CALL MYF_DEVICE(PDYY(:,:,IKB:IKB), ZTMP3_DEVICE(:,:,1:1)) +!$acc kernels async(4) +ZDV_DZ_DZS_DY(:,:,1)= ZTMP4_DEVICE(:,:,1) / ZTMP3_DEVICE(:,:,1) +!$acc end kernels +! +! +!!! wait for the computation of ZDU_DZ_DZS_DX +!$acc wait(3) +! +CALL DXF_DEVICE(PUM(:,:,IKB:IKB),ZTMP1_DEVICE(:,:,1:1)) +CALL MXF_DEVICE(PDXX(:,:,IKB:IKB),ZTMP2_DEVICE(:,:,1:1)) +!$acc kernels async(3) +ZDU_DX(:,:,1)= ZTMP1_DEVICE(:,:,1) / ZTMP2_DEVICE(:,:,1) - ZDU_DZ_DZS_DX(:,:,1) +!$acc end kernels + +!!! wait for the computation of ZDV_DZ_DZS_DY +!$acc wait(4) +! +CALL DYF_DEVICE(PVM(:,:,IKB:IKB),ZTMP3_DEVICE(:,:,1:1)) +CALL MYF_DEVICE(PDYY(:,:,IKB:IKB),ZTMP4_DEVICE(:,:,1:1)) +!$acc kernels! async(4) +ZDV_DY(:,:,1)= ZTMP3_DEVICE(:,:,1) / ZTMP4_DEVICE(:,:,1) - ZDV_DZ_DZS_DY(:,:,1) +!$acc end kernels +! +! +!!! wait for the computation of ZDU_DX and ZDV_DY +!$acc wait(3) async(4) +#endif +! +!$acc kernels async(4) +ZDW_DZ(:,:,:)=-ZDU_DX(:,:,:)-ZDV_DY(:,:,:) +!$acc end kernels +! +!* computation +! +!!! wait for the computation of ZFLX +!$acc wait(2) async(4) +!!! wait for the computation of ZDW_DZ +!$acc wait(4) +! +! ! !!! we can launch the update of ZFLX on the part that has already been computed +! ! !$acc update self(ZFLX(:,:,IKB+1:)) async(10) +!attention !!!!! je ne comprends pas pourquoi mais ce update plante à l'execution... +! du coup je ne peux pas faire de update self asynchrone... +! +!$acc kernels async(3) +ZFLX(:,:,IKB) = (2./3.) * PTKEM(:,:,IKB) & + - XCMFS * PK(:,:,IKB) * 2. * ZDU_DX(:,:,1) +!$acc end kernels + + +!! & to be tested later +!! + XCMFB * PLM(:,:,IKB:IKB) /SQRT(PTKEM(:,:,IKB:IKB)) * & +!! (-2./3.) * PTP(:,:,IKB:IKB) +! +! extrapolates this flux under the ground with the surface flux +! +! +!!! wait for the computation of ZDIRSINZW +!$acc wait(1) +! +!$acc kernels async(4) +#ifndef MNH_BITREP +ZFLX(:,:,IKB-1) = & + PTAU11M(:,:) * PCOSSLOPE(:,:)**2 * PDIRCOSZW(:,:)**2 & + -2. * PTAU12M(:,:) * PCOSSLOPE(:,:)* PSINSLOPE(:,:) * PDIRCOSZW(:,:) & + + PTAU22M(:,:) * PSINSLOPE(:,:)**2 & + + PTAU33M(:,:) * PCOSSLOPE(:,:)**2 * ZDIRSINZW(:,:)**2 & + +2. * PCDUEFF(:,:) * ( & + PVSLOPEM(:,:) * PCOSSLOPE(:,:) * PSINSLOPE(:,:) * ZDIRSINZW(:,:) & + - PUSLOPEM(:,:) * PCOSSLOPE(:,:)**2 * ZDIRSINZW(:,:) * PDIRCOSZW(:,:) ) +#else +!$acc loop independent collapse(2) +DO CONCURRENT ( JI=1:JIU,JJ=1:JJU ) +ZFLX(JI,JJ,IKB-1) = & + PTAU11M(JI,JJ) * BR_P2(PCOSSLOPE(JI,JJ)) * BR_P2(PDIRCOSZW(JI,JJ)) & + -2. * PTAU12M(JI,JJ) * PCOSSLOPE(JI,JJ)* PSINSLOPE(JI,JJ) * PDIRCOSZW(JI,JJ) & + + PTAU22M(JI,JJ) * BR_P2(PSINSLOPE(JI,JJ)) & + + PTAU33M(JI,JJ) * BR_P2(PCOSSLOPE(JI,JJ)) * BR_P2(ZDIRSINZW(JI,JJ)) & + +2. * PCDUEFF(JI,JJ) * ( & + PVSLOPEM(JI,JJ) * PCOSSLOPE(JI,JJ) * PSINSLOPE(JI,JJ) * ZDIRSINZW(JI,JJ) & + - PUSLOPEM(JI,JJ) * BR_P2(PCOSSLOPE(JI,JJ)) * ZDIRSINZW(JI,JJ) * PDIRCOSZW(JI,JJ) ) +END DO ! CONCURRENT +#endif +!$acc end kernels +! +!!! wait for the computation of ZFLX(:,:,IKB) and ZFLX(:,:,IKB-1) +!$acc wait(3) async(4) +! +!$acc kernels async(4) +ZFLX(:,:,IKB-1) = 2. * ZFLX(:,:,IKB-1) - ZFLX(:,:,IKB) +!$acc end kernels +! +! +!!! wait for the computation of ZFLX(:,:,IKB-1) +!$acc wait(4) +! + + +! ! !!! we can launch the update of ZFLX on the rest +! ! !$acc update self(ZFLX(:,:,1:IKB)) async(11) +! ! ! +! ! !!! and wait for the update self(ZFLX(...)) to complete +! ! !$acc wait(10) +! ! !$acc wait(11) +!attention !!!!! je ne comprends pas pourquoi mais le update self(ZFLX(:,:,IKB+1:)) plante à l'execution... +! du coup je ne peux pas faire de update self asynchrone... + + +! +!!! at this point there are no more async operations running +!!! to be absolutely sure, we do a wait +!$acc wait +! +!$acc update self(ZFLX) +CALL UPDATE_HALO_ll(TZFIELDS_ll, IINFO_ll) +!$acc update device(ZFLX) async(10) +! +IF ( OCLOSE_OUT .AND. OTURB_FLX ) THEN + ! stores <U U> + TZFIELD%CMNHNAME = 'U_VAR' + TZFIELD%CSTDNAME = '' + TZFIELD%CLONGNAME = 'U_VAR' + TZFIELD%CUNITS = 'm2 s-2' + TZFIELD%CDIR = 'XY' + TZFIELD%CCOMMENT = 'X_Y_Z_U_VAR' + TZFIELD%NGRID = 1 + TZFIELD%NTYPE = TYPEREAL + TZFIELD%NDIMS = 3 + TZFIELD%LTIMEDEP = .TRUE. + CALL IO_Field_write(TPFILE,TZFIELD,ZFLX) +END IF +! +! Complete the U tendency +#ifndef MNH_OPENACC +IF (.NOT. LFLAT) THEN + PRUS(:,:,:)=PRUS & + -DXM(PRHODJ * ZFLX / MXF(PDXX) ) & + +DZF( PDZX / MZM(PDXX) * MXM( MZM(PRHODJ*ZFLX) * PINV_PDZZ ) ) +ELSE + PRUS(:,:,:)=PRUS -DXM(PRHODJ * ZFLX / MXF(PDXX) ) +END IF +#else +CALL MXF_DEVICE(PDXX, ZTMP1_DEVICE) +!$acc kernels async(10) +!$acc loop independent collapse(3) +DO CONCURRENT ( JI=1:JIU,JJ=1:JJU,JK=1:JKU) + ZTMP2_DEVICE(JI,JJ,JK) = PRHODJ(JI,JJ,JK) * ZFLX(JI,JJ,JK) / ZTMP1_DEVICE(JI,JJ,JK) +END DO !CONCURRENT +!$acc end kernels +! +!!! wait for the computation of ZTMP2_DEVICE and the update of ZFLX +!$acc wait(10) +! +CALL DXM_DEVICE(ZTMP2_DEVICE, ZTMP3_DEVICE) +IF (.NOT. LFLAT) THEN + CALL MZM_DEVICE(PDXX,ZTMP1_DEVICE) + !$acc kernels + !$acc loop independent collapse(3) + DO CONCURRENT ( JI=1:JIU,JJ=1:JJU,JK=1:JKU) + ZTMP2_DEVICE(JI,JJ,JK) = PRHODJ(JI,JJ,JK) * ZFLX(JI,JJ,JK) + END DO !CONCURRENT + !$acc end kernels + CALL MZM_DEVICE(ZTMP2_DEVICE,ZTMP4_DEVICE) + !$acc kernels + !$acc loop independent collapse(3) + DO CONCURRENT ( JI=1:JIU,JJ=1:JJU,JK=1:JKU) + ZTMP2_DEVICE(JI,JJ,JK) = ZTMP4_DEVICE(JI,JJ,JK) * PINV_PDZZ(JI,JJ,JK) + END DO !CONCURRENT + !$acc end kernels + CALL MXM_DEVICE( ZTMP2_DEVICE, ZTMP4_DEVICE ) + !$acc kernels + !$acc loop independent collapse(3) + DO CONCURRENT ( JI=1:JIU,JJ=1:JJU,JK=1:JKU) + ZTMP2_DEVICE(JI,JJ,JK) = PDZX(JI,JJ,JK) / ZTMP1_DEVICE(JI,JJ,JK) * ZTMP4_DEVICE(JI,JJ,JK) + END DO !CONCURRENT + !$acc end kernels + CALL DZF_DEVICE(1,IKU,1,ZTMP2_DEVICE,ZTMP1_DEVICE) + !$acc kernels async(1) + PRUS(:,:,:)=PRUS(:,:,:) & + -ZTMP3_DEVICE(:,:,:) & + +ZTMP1_DEVICE(:,:,:) + !$acc end kernels +ELSE + !$acc kernels async(1) + PRUS(:,:,:)=PRUS(:,:,:) - ZTMP3_DEVICE(:,:,:) + !$acc end kernels +END IF +#endif +! +IF (KSPLT==1) THEN + ! Contribution to the dynamic production of TKE: + !$acc kernels async(2) + !$acc loop independent collapse(3) + DO CONCURRENT ( JI=1:JIU,JJ=1:JJU,JK=1:JKU) + ZWORK(JI,JJ,JK) = - ZFLX(JI,JJ,JK) * GX_U_M_PUM(JI,JJ,JK) + END DO !CONCURRENT + !$acc end kernels + ! + ! evaluate the dynamic production at w(IKB+1) in PDP(IKB) + ! + !$acc kernels async(2) + ZWORK(:,:,IKB) = 0.5* ( -ZFLX(:,:,IKB)*ZDU_DX(:,:,1) + ZWORK(:,:,IKB+1) ) + !$acc end kernels + ! + !$acc kernels async(2) + PDP(:,:,:) = PDP(:,:,:) + ZWORK(:,:,:) + !$acc end kernels +END IF +! +! Storage in the LES configuration +! +IF (LLES_CALL .AND. KSPLT==1) THEN + CALL SECOND_MNH(ZTIME1) +!$acc data copy(X_LES_SUBGRID_U2,X_LES_RES_ddxa_U_SBG_UaU) + CALL LES_MEAN_SUBGRID( ZFLX, X_LES_SUBGRID_U2 ) +#ifndef MNH_OPENACC + CALL LES_MEAN_SUBGRID( -ZWORK, X_LES_RES_ddxa_U_SBG_UaU , .TRUE.) +#else + ! + !!! wait for the computation of ZWORK and PDP + !$acc wait(2) + ! + !$acc kernels + ZTMP1_DEVICE = -ZWORK + !$acc end kernels + CALL LES_MEAN_SUBGRID( ZTMP1_DEVICE, X_LES_RES_ddxa_U_SBG_UaU , .TRUE.) + ! +#endif +!$acc end data + CALL SECOND_MNH(ZTIME2) + XTIME_LES = XTIME_LES + ZTIME2 - ZTIME1 +END IF + +! +!* 10. < V'V'> +! ------- +! +!!! wait for the computation of ZWORK and PDP (that uses ZFLX) +!$acc wait(2) +! +! Computes the V variance +IF (.NOT. L2D) THEN + !$acc kernels async(3) + !$acc loop independent collapse(3) + DO CONCURRENT ( JI=1:JIU,JJ=1:JJU,JK=1:JKU) + ZFLX(JI,JJ,JK)= (2./3.) * PTKEM(JI,JJ,JK) & + - XCMFS * PK(JI,JJ,JK) *( (4./3.) * GY_V_M_PVM(JI,JJ,JK) & + -(2./3.) * ( GX_U_M_PUM(JI,JJ,JK) & + +GZ_W_M_PWM(JI,JJ,JK) ) ) + END DO !CONCURRENT + !$acc end kernels + !! & to be tested + !! + XCMFB * PLM / SQRT(PTKEM) * (-2./3.) * PTP + ! +ELSE + !$acc kernels async(3) + ZFLX(:,:,:)= (2./3.) * PTKEM & + - XCMFS * PK *(-(2./3.) * ( GX_U_M_PUM & + +GZ_W_M_PWM ) ) + !$acc end kernels + !! & to be tested + !! + XCMFB * PLM / SQRT(PTKEM) * (-2./3.) * PTP + ! +END IF +! +!$acc kernels async(3) +ZFLX(:,:,IKE+1) = ZFLX(:,:,IKE) +!$acc end kernels +! +! ! !!! wait for the computation of ZFLX to begin the update +! ! !$acc wait(3) +! ! !$acc update self(ZFLX(:,:,IKB+1:)) async(10) +! +!$acc kernels async(3) +ZFLX(:,:,IKB) = (2./3.) * PTKEM(:,:,IKB) & + - XCMFS * PK(:,:,IKB) * 2. * ZDV_DY(:,:,1) +!$acc end kernels + +!! & to be tested +!! + XCMFB * PLM(:,:,IKB:IKB) /SQRT(PTKEM(:,:,IKB:IKB)) * & +!! (-2./3.) * PTP(:,:,IKB:IKB) +! +! extrapolates this flux under the ground with the surface flux +!$acc kernels async(3) +#ifndef MNH_BITREP +ZFLX(:,:,IKB-1) = & + PTAU11M(:,:) * PSINSLOPE(:,:)**2 * PDIRCOSZW(:,:)**2 & + +2. * PTAU12M(:,:) * PCOSSLOPE(:,:)* PSINSLOPE(:,:) * PDIRCOSZW(:,:) & + + PTAU22M(:,:) * PCOSSLOPE(:,:)**2 & + + PTAU33M(:,:) * PSINSLOPE(:,:)**2 * ZDIRSINZW(:,:)**2 & + -2. * PCDUEFF(:,:)* ( & + PUSLOPEM(:,:) * PSINSLOPE(:,:)**2 * ZDIRSINZW(:,:) * PDIRCOSZW(:,:) & + + PVSLOPEM(:,:) * PCOSSLOPE(:,:) * PSINSLOPE(:,:) * ZDIRSINZW(:,:) ) +#else +!$acc loop independent collapse(2) +DO CONCURRENT ( JI=1:JIU,JJ=1:JJU ) +ZFLX(JI,JJ,IKB-1) = & + PTAU11M(JI,JJ) * BR_P2(PSINSLOPE(JI,JJ)) * BR_P2(PDIRCOSZW(JI,JJ)) & + +2. * PTAU12M(JI,JJ) * PCOSSLOPE(JI,JJ)* PSINSLOPE(JI,JJ) * PDIRCOSZW(JI,JJ) & + + PTAU22M(JI,JJ) * BR_P2(PCOSSLOPE(JI,JJ)) & + + PTAU33M(JI,JJ) * BR_P2(PSINSLOPE(JI,JJ)) * BR_P2(ZDIRSINZW(JI,JJ)) & + -2. * PCDUEFF(JI,JJ)* ( & + PUSLOPEM(JI,JJ) * BR_P2(PSINSLOPE(JI,JJ)) * ZDIRSINZW(JI,JJ) * PDIRCOSZW(JI,JJ) & + + PVSLOPEM(JI,JJ) * PCOSSLOPE(JI,JJ) * PSINSLOPE(JI,JJ) * ZDIRSINZW(JI,JJ) ) +END DO ! CONCURRENT +#endif +!$acc end kernels +! +!$acc kernels async(3) +ZFLX(:,:,IKB-1) = 2. * ZFLX(:,:,IKB-1) - ZFLX(:,:,IKB) +!$acc end kernels +! +! +! ! !!! wait for the computation of ZFLX(:,:,1:IKB) to begin the update +! ! !$acc update self(ZFLX(:,:,IKB+1:)) async(3) +! ! ! +! ! !!! and wait for the update self(ZFLX(...)) to complete +! ! !$acc wait(10) +! ! !$acc wait(3) +! +!$acc wait(3) +!$acc update self(ZFLX) +CALL UPDATE_HALO_ll(TZFIELDS_ll, IINFO_ll) +!$acc update device(ZFLX) async(10) +! +IF ( OCLOSE_OUT .AND. OTURB_FLX ) THEN + ! stores <V V> + TZFIELD%CMNHNAME = 'V_VAR' + TZFIELD%CSTDNAME = '' + TZFIELD%CLONGNAME = 'V_VAR' + TZFIELD%CUNITS = 'm2 s-2' + TZFIELD%CDIR = 'XY' + TZFIELD%CCOMMENT = 'X_Y_Z_V_VAR' + TZFIELD%NGRID = 1 + TZFIELD%NTYPE = TYPEREAL + TZFIELD%NDIMS = 3 + TZFIELD%LTIMEDEP = .TRUE. + CALL IO_Field_write(TPFILE,TZFIELD,ZFLX) +END IF +! +!!! wait for the computation of PRUS (that uses ZTMP1_DEVICE and ZTMP3_DEVICE) +!$acc wait(1) +! +! +! +! Complete the V tendency +IF (.NOT. L2D) THEN +#ifndef MNH_OPENACC + IF (.NOT. LFLAT) THEN + PRVS(:,:,:)=PRVS & + -DYM(PRHODJ * ZFLX / MYF(PDYY) ) & + +DZF( PDZY / MZM(PDYY) * & + MYM( MZM(PRHODJ*ZFLX) * PINV_PDZZ ) ) + ELSE + PRVS(:,:,:)=PRVS -DYM(PRHODJ * ZFLX / MYF(PDYY) ) + END IF +! +! Contribution to the dynamic production of TKE: + IF (KSPLT==1) ZWORK(:,:,:) = - ZFLX(:,:,:) * GY_V_M_PVM +#else + CALL MYF_DEVICE(PDYY, ZTMP1_DEVICE) + !$acc kernels async(10) + !$acc loop independent collapse(3) + DO CONCURRENT ( JI=1:JIU,JJ=1:JJU,JK=1:JKU) + ZTMP2_DEVICE(JI,JJ,JK) = PRHODJ(JI,JJ,JK) * ZFLX(JI,JJ,JK) / ZTMP1_DEVICE(JI,JJ,JK) + END DO !CONCURRENT + !$acc end kernels + ! + !!! wait for the computation of ZTMP2_DEVICE and the update of ZFLX + !$acc wait(10) + ! + CALL DYM_DEVICE( ZTMP2_DEVICE,ZTMP3_DEVICE ) + IF (.NOT. LFLAT) THEN + CALL MZM_DEVICE(PDYY,ZTMP1_DEVICE) + !$acc kernels + !$acc loop independent collapse(3) + DO CONCURRENT ( JI=1:JIU,JJ=1:JJU,JK=1:JKU) + ZTMP2_DEVICE(JI,JJ,JK) = PRHODJ(JI,JJ,JK) * ZFLX(JI,JJ,JK) + END DO !CONCURRENT + !$acc end kernels + CALL MZM_DEVICE(ZTMP2_DEVICE,ZTMP4_DEVICE) + !$acc kernels + !$acc loop independent collapse(3) + DO CONCURRENT ( JI=1:JIU,JJ=1:JJU,JK=1:JKU) + ZTMP2_DEVICE(JI,JJ,JK) = ZTMP4_DEVICE(JI,JJ,JK) * PINV_PDZZ(JI,JJ,JK) + END DO !CONCURRENT + !$acc end kernels + CALL MYM_DEVICE( ZTMP2_DEVICE,ZTMP4_DEVICE ) + !$acc kernels + !$acc loop independent collapse(3) + DO CONCURRENT ( JI=1:JIU,JJ=1:JJU,JK=1:JKU) + ZTMP2_DEVICE(JI,JJ,JK) = PDZY(JI,JJ,JK) / ZTMP1_DEVICE(JI,JJ,JK) * ZTMP4_DEVICE(JI,JJ,JK) + END DO !CONCURRENT + !$acc end kernels + CALL DZF_DEVICE(1,IKU,1,ZTMP2_DEVICE,ZTMP4_DEVICE ) + !$acc kernels async(1) + PRVS(:,:,:)=PRVS(:,:,:) & + -ZTMP3_DEVICE(:,:,:) & + +ZTMP4_DEVICE(:,:,:) + !$acc end kernels + ELSE + !$acc kernels async(1) + !$acc loop independent collapse(3) + DO CONCURRENT ( JI=1:JIU,JJ=1:JJU,JK=1:JKU) + PRVS(JI,JJ,JK)=PRVS(JI,JJ,JK) - ZTMP3_DEVICE(JI,JJ,JK) + END DO !CONCURRENT + !$acc end kernels + END IF +! Contribution to the dynamic production of TKE: + IF (KSPLT==1) THEN + !$acc kernels async(2) + !$acc loop independent collapse(3) + DO CONCURRENT ( JI=1:JIU,JJ=1:JJU,JK=1:JKU) + ZWORK(JI,JJ,JK) = - ZFLX(JI,JJ,JK) * GY_V_M_PVM(JI,JJ,JK) + END DO !CONCURRENT + !$acc end kernels + ENDIF +#endif +ELSE + !$acc kernels async(2) + ZWORK(:,:,:) = 0. + !$acc end kernels +END IF +! +IF (KSPLT==1) THEN + ! + ! evaluate the dynamic production at w(IKB+1) in PDP(IKB) + ! + !$acc kernels async(2) + ZWORK(:,:,IKB) = 0.5* ( -ZFLX(:,:,IKB)*ZDV_DY(:,:,1) + ZWORK(:,:,IKB+1) ) + !$acc end kernels + ! + !$acc kernels async(2) + PDP(:,:,:) = PDP(:,:,:) + ZWORK(:,:,:) + !$acc end kernels +END IF +! +! Storage in the LES configuration +! +IF (LLES_CALL .AND. KSPLT==1) THEN + CALL SECOND_MNH(ZTIME1) +!$acc data copy(X_LES_SUBGRID_V2,X_LES_RES_ddxa_V_SBG_UaV) + CALL LES_MEAN_SUBGRID( ZFLX, X_LES_SUBGRID_V2 ) +#ifndef MNH_OPENACC + CALL LES_MEAN_SUBGRID( -ZWORK, X_LES_RES_ddxa_V_SBG_UaV , .TRUE.) +#else + ! + !!! wait for the computation of ZWORK and PDP + !$acc wait(2) + ! + !$acc kernels + ZTMP1_DEVICE = -ZWORK + !$acc end kernels + CALL LES_MEAN_SUBGRID( ZTMP1_DEVICE, X_LES_RES_ddxa_V_SBG_UaV , .TRUE.) + ! +#endif +!$acc end data + CALL SECOND_MNH(ZTIME2) + XTIME_LES = XTIME_LES + ZTIME2 - ZTIME1 +END IF +! +!* 11. < W'W'> +! ------- +! +! Computes the W variance +IF (.NOT. L2D) THEN + !$acc kernels async(2) + !$acc loop independent collapse(3) + DO CONCURRENT ( JI=1:JIU,JJ=1:JJU,JK=1:JKU) + ZFLX(JI,JJ,JK) = (2./3.) * PTKEM(JI,JJ,JK) & + - XCMFS * PK(JI,JJ,JK) *( (4./3.) * GZ_W_M_PWM(JI,JJ,JK) & + -(2./3.) * ( GX_U_M_PUM(JI,JJ,JK) & + +GY_V_M_PVM(JI,JJ,JK) ) ) + END DO !CONCURRENT + !$acc end kernels + !! & to be tested + !! -2.* XCMFB * PLM / SQRT(PTKEM) * (-2./3.) * PTP +ELSE + !$acc kernels async(2) + ZFLX(:,:,:)= (2./3.) * PTKEM & + - XCMFS * PK *( (4./3.) * GZ_W_M_PWM & + -(2./3.) * ( GX_U_M_PUM ) ) + !$acc end kernels + !! & to be tested + !! -2.* XCMFB * PLM / SQRT(PTKEM) * (-2./3.) * PTP +END IF +! +!$acc kernels async(2) +ZFLX(:,:,IKE+1)= ZFLX(:,:,IKE) +!$acc end kernels +! +!!! wait for the computation of ZWORK, PDP and ZFLX +!$acc wait(2) +! +! +!$acc kernels async(2) +ZFLX(:,:,IKB) = (2./3.) * PTKEM(:,:,IKB) & + - XCMFS * PK(:,:,IKB) * 2. * ZDW_DZ(:,:,1) +!$acc end kernels +! + +! & to be tested +! - 2.* XCMFB * PLM(:,:,IKB:IKB) /SQRT(PTKEM(:,:,IKB:IKB)) * & +! (-2./3.) * PTP(:,:,IKB:IKB) +! extrapolates this flux under the ground with the surface flux +!$acc kernels async(3) +#ifndef MNH_BITREP +ZFLX(:,:,IKB-1) = & + PTAU11M(:,:) * ZDIRSINZW(:,:)**2 & + + PTAU33M(:,:) * PDIRCOSZW(:,:)**2 & + +2. * PCDUEFF(:,:)* PUSLOPEM(:,:) * ZDIRSINZW(:,:) * PDIRCOSZW(:,:) +#else +!$acc loop independent collapse(2) +DO CONCURRENT ( JI=1:JIU,JJ=1:JJU ) +ZFLX(JI,JJ,IKB-1) = & + PTAU11M(JI,JJ) * BR_P2(ZDIRSINZW(JI,JJ)) & + + PTAU33M(JI,JJ) * BR_P2(PDIRCOSZW(JI,JJ)) & + +2. * PCDUEFF(JI,JJ)* PUSLOPEM(JI,JJ) * ZDIRSINZW(JI,JJ) * PDIRCOSZW(JI,JJ) +END DO ! CONCURRENT +#endif +!$acc end kernels + ! +! +!!! wait for the computation of ZFLX(:,:,IKB-1) and ZFLX(:,:,IKB) +!$acc wait(2) async(3) +! +!$acc kernels async(3) +ZFLX(:,:,IKB-1) = 2. * ZFLX(:,:,IKB-1) - ZFLX(:,:,IKB) +!$acc end kernels +! +IF ( OCLOSE_OUT .AND. OTURB_FLX ) THEN + !$acc wait(3) + !$acc update self(ZFLX) + ! stores <W W> + TZFIELD%CMNHNAME = 'W_VAR' + TZFIELD%CSTDNAME = '' + TZFIELD%CLONGNAME = 'W_VAR' + TZFIELD%CUNITS = 'm2 s-2' + TZFIELD%CDIR = 'XY' + TZFIELD%CCOMMENT = 'X_Y_Z_W_VAR' + TZFIELD%NGRID = 1 + TZFIELD%NTYPE = TYPEREAL + TZFIELD%NDIMS = 3 + TZFIELD%LTIMEDEP = .TRUE. + CALL IO_Field_write(TPFILE,TZFIELD,ZFLX) +END IF +! +! +!!! wait for the computation of PRVS (that uses ZTMP1_DEVICE and ZTMP3_DEVICE) +!$acc wait(1) +! + +! +! Complete the W tendency +! +!PRWS(:,:,:)=PRWS(:,:,:) - DZM( PRHODJ*ZFLX/MZF(PDZZ) ) +!$acc kernels async(2) +ZDFDDWDZ(:,:,:) = - XCMFS * PK(:,:,:) * (4./3.) +!$acc end kernels +!$acc kernels async(2) +ZDFDDWDZ(:,:,:IKB) = 0. +!$acc end kernels +! +!!! wait for the computation of ZFLX(:,:,IKB-1) and ZDFDDWDZ +!$acc wait(3) async(2) +!$acc wait(2) +! +CALL TRIDIAG_W(PWM,ZFLX,ZDFDDWDZ,PTSTEP,ZMZF_DZZ,PRHODJ,ZWP) +! +#ifndef MNH_OPENACC +PRWS = PRWS(:,:,:) + MZM(PRHODJ(:,:,:))*(ZWP(:,:,:)-PWM(:,:,:))/PTSTEP +#else +CALL MZM_DEVICE(PRHODJ(:,:,:),ZTMP1_DEVICE) +!$acc kernels async(1) +PRWS = PRWS(:,:,:) + ZTMP1_DEVICE *(ZWP(:,:,:)-PWM(:,:,:))/PTSTEP +!$acc end kernels +#endif +! +!* recomputes flux using guess of W +! +#ifndef MNH_OPENACC +GZ_W_M_ZWP = GZ_W_M(ZWP,PDZZ) +#else +CALL GZ_W_M_DEVICE(1,IKU,1,ZWP,PDZZ,GZ_W_M_ZWP) +#endif +!$acc kernels async(2) +!$acc loop independent collapse(3) +DO CONCURRENT ( JI=1:JIU,JJ=1:JJU,JK=IKB+1:JKU) + ZFLX(JI,JJ,JK)=ZFLX(JI,JJ,JK) & + - XCMFS * PK(JI,JJ,JK) * (4./3.) * (GZ_W_M_ZWP(JI,JJ,JK) - GZ_W_M_PWM(JI,JJ,JK)) +END DO !CONCURRENT +!$acc end kernels +! +IF (KSPLT==1) THEN + !Contribution to the dynamic production of TKE: + !$acc kernels async(2) + !$acc loop independent collapse(3) + DO CONCURRENT ( JI=1:JIU,JJ=1:JJU,JK=1:JKU) + ZWORK(JI,JJ,JK) = - ZFLX(JI,JJ,JK) * GZ_W_M_ZWP(JI,JJ,JK) + END DO !CONCURRENT + !$acc end kernels + ! + ! evaluate the dynamic production at w(IKB+1) in PDP(IKB) + ! + !$acc kernels async(2) + ZWORK(:,:,IKB) = 0.5* ( -ZFLX(:,:,IKB)*ZDW_DZ(:,:,1) + ZWORK(:,:,IKB+1) ) + !$acc end kernels + ! + !$acc kernels async(2) + PDP(:,:,:) = PDP(:,:,:) + ZWORK(:,:,:) + !$acc end kernels +END IF +! +! Storage in the LES configuration +! +! +IF (LLES_CALL .AND. KSPLT==1) THEN + CALL SECOND_MNH(ZTIME1) +#ifndef MNH_OPENACC + CALL LES_MEAN_SUBGRID( ZFLX, X_LES_SUBGRID_W2 ) + CALL LES_MEAN_SUBGRID( -ZWORK, X_LES_RES_ddxa_W_SBG_UaW , .TRUE.) + CALL LES_MEAN_SUBGRID( GZ_M_M(PTHLM,PDZZ)*ZFLX, X_LES_RES_ddxa_Thl_SBG_UaW , .TRUE.) + CALL LES_MEAN_SUBGRID(ZFLX*MZF(GZ_M_W(1,IKU,1,PTHLM,PDZZ)),X_LES_RES_ddz_Thl_SBG_W2) + IF (KRR>=1) THEN + CALL LES_MEAN_SUBGRID( GZ_M_M(PRM(:,:,:,1),PDZZ)*ZFLX, & + X_LES_RES_ddxa_Rt_SBG_UaW , .TRUE.) + CALL LES_MEAN_SUBGRID(ZFLX*MZF(GZ_M_W(1,IKU,1,PRM(:,:,:,1),PDZZ)), & + X_LES_RES_ddz_Rt_SBG_W2) + END IF + DO JSV=1,NSV + CALL LES_MEAN_SUBGRID( GZ_M_M(PSVM(:,:,:,JSV),PDZZ)*ZFLX, & + X_LES_RES_ddxa_Sv_SBG_UaW(:,:,:,JSV) , .TRUE.) + CALL LES_MEAN_SUBGRID(ZFLX*MZF(GZ_M_W(1,IKU,1,PSVM(:,:,:,JSV),PDZZ)), & + X_LES_RES_ddz_Sv_SBG_W2(:,:,:,JSV)) + END DO +#else +!$acc data copy(X_LES_SUBGRID_W2,X_LES_RES_ddxa_W_SBG_UaW,X_LES_RES_ddxa_Thl_SBG_UaW,X_LES_RES_ddz_Thl_SBG_W2) + ! + CALL LES_MEAN_SUBGRID( ZFLX, X_LES_SUBGRID_W2 ) + ! + ! + !!! wait for the computation of ZFLX, ZDP and ZWORK + !$acc wait(2) + ! + !$acc kernels + ZTMP1_DEVICE = -ZWORK + !$acc end kernels + CALL LES_MEAN_SUBGRID( ZTMP1_DEVICE, X_LES_RES_ddxa_W_SBG_UaW , .TRUE.) + ! + CALL GZ_M_M_DEVICE(1,IKU,1,PTHLM,PDZZ,ZTMP1_DEVICE) + !$acc kernels + ZTMP2_DEVICE = ZTMP1_DEVICE * ZFLX + !$acc end kernels + CALL LES_MEAN_SUBGRID( ZTMP2_DEVICE, X_LES_RES_ddxa_Thl_SBG_UaW , .TRUE.) + ! + CALL GZ_M_W_DEVICE(1,IKU,1,PTHLM,PDZZ,ZTMP1_DEVICE) + CALL MZF_DEVICE(1,IKU,1,ZTMP1_DEVICE,ZTMP2_DEVICE) + !$acc kernels + ZTMP3_DEVICE = ZFLX*ZTMP2_DEVICE + !$acc end kernels + CALL LES_MEAN_SUBGRID(ZTMP3_DEVICE,X_LES_RES_ddz_Thl_SBG_W2) + ! +!$acc end data + ! + IF (KRR>=1) THEN +!$acc data copy(X_LES_RES_ddxa_Rt_SBG_UaW,X_LES_RES_ddz_Rt_SBG_W2) + ! + CALL GZ_M_M_DEVICE(1,IKU,1,PRM(:,:,:,1),PDZZ,ZTMP1_DEVICE) + !$acc kernels + ZTMP2_DEVICE = ZTMP1_DEVICE*ZFLX + !$acc end kernels + CALL LES_MEAN_SUBGRID( ZTMP2_DEVICE, X_LES_RES_ddxa_Rt_SBG_UaW , .TRUE.) + ! + CALL GZ_M_W_DEVICE(1,IKU,1,PRM(:,:,:,1),PDZZ,ZTMP1_DEVICE) + CALL MZF_DEVICE(1,IKU,1,ZTMP1_DEVICE,ZTMP2_DEVICE) + !$acc kernels + ZTMP3_DEVICE = ZFLX*ZTMP2_DEVICE + !$acc end kernels + CALL LES_MEAN_SUBGRID(ZTMP3_DEVICE, X_LES_RES_ddz_Rt_SBG_W2) + ! +!$acc end data + END IF +!$acc data copy(X_LES_RES_ddxa_Sv_SBG_UaW,X_LES_RES_ddz_Sv_SBG_W2) + DO JSV=1,NSV + ! + ! + CALL GZ_M_M_DEVICE(1,IKU,1,PSVM(:,:,:,JSV),PDZZ,ZTMP1_DEVICE) + !$acc kernels + ZTMP2_DEVICE = ZTMP1_DEVICE*ZFLX + !$acc end kernels + CALL LES_MEAN_SUBGRID( ZTMP2_DEVICE, & + X_LES_RES_ddxa_Sv_SBG_UaW(:,:,:,JSV) , .TRUE.) + ! + CALL GZ_M_W_DEVICE(1,IKU,1,PSVM(:,:,:,JSV),PDZZ,ZTMP1_DEVICE) + CALL MZF_DEVICE(1,IKU,1,ZTMP1_DEVICE,ZTMP2_DEVICE) + !$acc kernels + ZTMP3_DEVICE = ZFLX*ZTMP2_DEVICE + !$acc end kernels + CALL LES_MEAN_SUBGRID(ZTMP3_DEVICE, X_LES_RES_ddz_Sv_SBG_W2(:,:,:,JSV)) + ! + ! + END DO +!$acc end data +#endif + CALL SECOND_MNH(ZTIME2) + XTIME_LES = XTIME_LES + ZTIME2 - ZTIME1 +END IF +! +! +!!! wait for the computation of ZFLX, ZDP and ZWORK +!$acc wait(2) +!!! wait for the computation of PRWS +!$acc wait(1) +! +!!! et un dernier wait pour etre sur +!$acc wait +! +CALL CLEANLIST_ll(TZFIELDS_ll) + +if ( mppdb_initialized ) then + !Check all inout arrays + call Mppdb_check( prus, "Turb_hor_dyn_corr end:prus" ) + call Mppdb_check( prvs, "Turb_hor_dyn_corr end:prvs" ) + call Mppdb_check( prws, "Turb_hor_dyn_corr end:prws" ) + call Mppdb_check( pdp, "Turb_hor_dyn_corr end:pdp" ) + call Mppdb_check( ptp, "Turb_hor_dyn_corr end:ptp" ) +end if + +!$acc end data + +#ifndef MNH_OPENACC +DEALLOCATE (ZFLX, ZWORK, ZDIRSINZW, ZCOEFF, ZDZZ, & + GX_U_M_PUM, GY_V_M_PVM, GZ_W_M_PWM, GZ_W_M_ZWP, & + ZMZF_DZZ, ZDFDDWDZ, ZWP, & + ZDU_DZ_DZS_DX, ZDV_DZ_DZS_DY, ZDU_DX, ZDV_DY, ZDW_DZ ) +#else +CALL MNH_REL_ZT3D(IZTMP1_DEVICE, IZTMP2_DEVICE, IZTMP3_DEVICE, IZTMP4_DEVICE ) +CALL MNH_REL_ZT3D(IZFLX, IZWORK, IZDIRSINZW, & + IGX_U_M_PUM, IGY_V_M_PVM, IGZ_W_M_PWM, IGZ_W_M_ZWP, & + IZMZF_DZZ, IZDFDDWDZ, IZWP, & + IZDU_DZ_DZS_DX, IZDV_DZ_DZS_DY, IZDU_DX, IZDV_DY, IZDW_DZ, & + IZCOEFF, IZDZZ ) + +#endif + + +!$acc end data + +END SUBROUTINE TURB_HOR_DYN_CORR -- GitLab