diff --git a/src/LIB/SURCOUCHE/src/mode_exchange_ll.f90 b/src/LIB/SURCOUCHE/src/mode_exchange_ll.f90 index a087d3ca94dd56e9aa8c593eea6d0e1ea347ddf9..9aba7c897d549ce040e03333614cecaf1027c0de 100644 --- a/src/LIB/SURCOUCHE/src/mode_exchange_ll.f90 +++ b/src/LIB/SURCOUCHE/src/mode_exchange_ll.f90 @@ -1,4 +1,4 @@ -!MNH_LIC Copyright 1998-2020 CNRS, Meteo-France and Universite Paul Sabatier +!MNH_LIC Copyright 1998-2022 CNRS, Meteo-France and Universite Paul Sabatier !MNH_LIC This is part of the Meso-NH software governed by the CeCILL-C licence !MNH_LIC version 1. See LICENSE, CeCILL-C_V1-en.txt and CeCILL-C_V1-fr.txt !MNH_LIC for details. version 1. @@ -794,23 +794,41 @@ INTEGER :: NB_REQ INTEGER :: KINFO ! return status ! !------------------------------------------------------------------------------- + +!$acc data present( PFIELDIN, PFIELDOUT ) + ! !* 1. UPDATE THE ZONES NOT SENT OR RECEIVED BY THE PROCESSOR ITSELF ! ------------------------------------------------------------- ! +#ifndef MNH_OPENACC CALL SEND_RECV_FIELD(TCRRT_COMDATA%TSEND_TRANS_BX, & TCRRT_COMDATA%TRECV_TRANS_BX, & PFIELDIN, PFIELDOUT, KINFO) +#else + CALL SEND_RECV_FIELD_DEVICE(TCRRT_COMDATA%TSEND_TRANS_BX, & + TCRRT_COMDATA%TRECV_TRANS_BX, & + PFIELDIN, PFIELDOUT, KINFO) +#endif ! !------------------------------------------------------------------------------- ! !* 2. UPDATE THE ZONES THE PROCESSOR SENDS OR RECEIVED FROM ITSELF ! ------------------------------------------------------------ ! +#ifndef MNH_OPENACC CALL COPY_CRSPD_TRANS(TCRRT_COMDATA%TSEND_TRANS_BX, & TCRRT_COMDATA%TRECV_TRANS_BX, & PFIELDIN, PFIELDOUT, KINFO) +#else + CALL COPY_CRSPD_TRANS_DEVICE(TCRRT_COMDATA%TSEND_TRANS_BX, & + TCRRT_COMDATA%TRECV_TRANS_BX, & + PFIELDIN, PFIELDOUT, KINFO) +#endif ! + +!$acc end data + !------------------------------------------------------------------------------- ! END SUBROUTINE REMAP_2WAY_X_ll @@ -866,10 +884,13 @@ INTEGER :: NB_REQ ! !* 0. DECLARATIONS ! - USE MODD_ARGSLIST_ll, ONLY : LIST_ll USE MODD_VAR_ll, ONLY : TCRRT_COMDATA ! - USE MODE_ARGSLIST_ll, ONLY : ADD3DFIELD_ll, CLEANLIST_ll + USE MODE_MPPDB + +! #ifdef MNH_OPENACC + USE MODI_GET_HALO +! #endif ! !* 0.1 declarations of arguments ! @@ -879,35 +900,56 @@ INTEGER :: NB_REQ ! !* 0.2 declarations of local variables ! - TYPE(LIST_ll), POINTER :: TZLIST ! !------------------------------------------------------------------------------- + +CALL MPPDB_CHECK( PFIELDIN, 'REMAP_X_2WAY_ll beg:PFIELDIN' ) + +!$acc data present( PFIELDIN, PFIELDOUT ) + ! !* 1. UPDATE THE ZONES NOT SENT OR RECEIVED BY THE PROCESSOR ITSELF ! ------------------------------------------------------------- ! +#ifndef MNH_OPENACC CALL SEND_RECV_FIELD(TCRRT_COMDATA%TRECV_TRANS_BX, & TCRRT_COMDATA%TSEND_TRANS_BX, & PFIELDIN, PFIELDOUT, KINFO) +#else + CALL SEND_RECV_FIELD_DEVICE(TCRRT_COMDATA%TRECV_TRANS_BX, & + TCRRT_COMDATA%TSEND_TRANS_BX, & + PFIELDIN, PFIELDOUT, KINFO) +#endif ! !------------------------------------------------------------------------------- ! !* 2. UPDATE THE ZONES THE PROCESSOR SENDS OR RECEIVED FROM ITSELF ! ------------------------------------------------------------ ! +#ifndef MNH_OPENACC CALL COPY_CRSPD_TRANS(TCRRT_COMDATA%TRECV_TRANS_BX, & TCRRT_COMDATA%TSEND_TRANS_BX, & PFIELDIN, PFIELDOUT, KINFO) +#else + CALL COPY_CRSPD_TRANS_DEVICE(TCRRT_COMDATA%TRECV_TRANS_BX, & + TCRRT_COMDATA%TSEND_TRANS_BX, & + PFIELDIN, PFIELDOUT, KINFO) +#endif ! !* 3. UPDATE HALO : ! ----------- ! - NULLIFY(TZLIST) - CALL ADD3DFIELD_ll( TZLIST, PFIELDOUT, 'REMAP_X_2WAY_ll::PFIELDOUT' ) -! - CALL UPDATE_HALO_ll(TZLIST, KINFO) - CALL CLEANLIST_ll(TZLIST) +#ifndef MNH_OPENACC + CALL GET_HALO( PFIELDOUT, HNAME = 'REMAP_X_2WAY_ll' ) +#else + CALL GET_HALO_D( PFIELDOUT, HNAME = 'UPDATE_HALO_ll::GET_HALO::REMAP_X_2WAY_ll' ) +#endif ! + +!$acc end data + +CALL MPPDB_CHECK( PFIELDOUT, 'REMAP_X_2WAY_ll end:PFIELDOUT' ) + !------------------------------------------------------------------------------- ! END SUBROUTINE REMAP_X_2WAY_ll @@ -957,6 +999,8 @@ INTEGER :: NB_REQ !* 0. DECLARATIONS ! USE MODD_VAR_ll, ONLY : TCRRT_COMDATA + + USE MODE_MPPDB ! ! !* 0.1 declarations of arguments @@ -966,22 +1010,51 @@ INTEGER :: NB_REQ INTEGER :: KINFO ! return status ! !------------------------------------------------------------------------------- + +IF (MPPDB_INITIALIZED) THEN + !Check all INOUT arrays + CALL MPPDB_CHECK( PFIELDIN, "REMAP_Y_X_ll beg:PFIELDIN" ) + CALL MPPDB_CHECK( PFIELDOUT, "REMAP_Y_X_ll beg:PFIELDOUT" ) +END IF + +!$acc data present( PFIELDIN, PFIELDOUT ) + ! !* 1. UPDATE THE ZONES NOT SENT OR RECEIVED BY THE PROCESSOR ITSELF ! ------------------------------------------------------------- ! +#ifndef MNH_OPENACC CALL SEND_RECV_FIELD(TCRRT_COMDATA%TSEND_TRANS_XY, & TCRRT_COMDATA%TRECV_TRANS_XY, & PFIELDIN, PFIELDOUT, KINFO) +#else + CALL SEND_RECV_FIELD_DEVICE(TCRRT_COMDATA%TSEND_TRANS_XY, & + TCRRT_COMDATA%TRECV_TRANS_XY, & + PFIELDIN, PFIELDOUT, KINFO) +#endif ! !------------------------------------------------------------------------------- ! !* 2. UPDATE THE ZONES THE PROCESSOR SENDS OR RECEIVED FROM ITSELF ! ------------------------------------------------------------ ! +#ifndef MNH_OPENACC CALL COPY_CRSPD_TRANS(TCRRT_COMDATA%TSEND_TRANS_XY, & TCRRT_COMDATA%TRECV_TRANS_XY, & PFIELDIN, PFIELDOUT, KINFO) +#else + CALL COPY_CRSPD_TRANS_DEVICE(TCRRT_COMDATA%TSEND_TRANS_XY, & + TCRRT_COMDATA%TRECV_TRANS_XY, & + PFIELDIN, PFIELDOUT, KINFO) +#endif + +!$acc end data + +IF (MPPDB_INITIALIZED) THEN + !Check all INOUT arrays + CALL MPPDB_CHECK( PFIELDIN, "REMAP_Y_X_ll end:PFIELDIN" ) + CALL MPPDB_CHECK( PFIELDOUT, "REMAP_Y_X_ll end:PFIELDOUT" ) +END IF ! !------------------------------------------------------------------------------- ! @@ -1032,6 +1105,8 @@ INTEGER :: NB_REQ !* 0. DECLARATIONS ! USE MODD_VAR_ll, ONLY : TCRRT_COMDATA + + USE MODE_MPPDB ! !* 0.1 declarations of arguments ! @@ -1040,22 +1115,51 @@ INTEGER :: NB_REQ INTEGER :: KINFO ! return status ! !------------------------------------------------------------------------------- + +IF (MPPDB_INITIALIZED) THEN + !Check all INOUT arrays + CALL MPPDB_CHECK( PFIELDIN, "REMAP_Y_X_ll beg:PFIELDIN" ) + CALL MPPDB_CHECK( PFIELDOUT, "REMAP_Y_X_ll beg:PFIELDOUT" ) +END IF + +!$acc data present( PFIELDIN, PFIELDOUT ) + ! !* 1. UPDATE THE ZONES NOT SENT OR RECEIVED BY THE PROCESSOR ITSELF ! ------------------------------------------------------------- ! +#ifndef MNH_OPENACC CALL SEND_RECV_FIELD(TCRRT_COMDATA%TRECV_TRANS_XY, & TCRRT_COMDATA%TSEND_TRANS_XY, & PFIELDIN, PFIELDOUT, KINFO) +#else + CALL SEND_RECV_FIELD_DEVICE(TCRRT_COMDATA%TRECV_TRANS_XY, & + TCRRT_COMDATA%TSEND_TRANS_XY, & + PFIELDIN, PFIELDOUT, KINFO) +#endif ! !------------------------------------------------------------------------------- ! !* 2. UPDATE THE ZONES THE PROCESSOR SENDS OR RECEIVED FROM ITSELF ! ------------------------------------------------------------ ! +#ifndef MNH_OPENACC CALL COPY_CRSPD_TRANS(TCRRT_COMDATA%TRECV_TRANS_XY, & TCRRT_COMDATA%TSEND_TRANS_XY, & PFIELDIN, PFIELDOUT, KINFO) +#else + CALL COPY_CRSPD_TRANS_DEVICE(TCRRT_COMDATA%TRECV_TRANS_XY, & + TCRRT_COMDATA%TSEND_TRANS_XY, & + PFIELDIN, PFIELDOUT, KINFO) +#endif + +!$acc end data + +IF (MPPDB_INITIALIZED) THEN + !Check all INOUT arrays + CALL MPPDB_CHECK( PFIELDIN, "REMAP_Y_X_ll end:PFIELDIN" ) + CALL MPPDB_CHECK( PFIELDOUT, "REMAP_Y_X_ll end:PFIELDOUT" ) +END IF ! !------------------------------------------------------------------------------- ! @@ -1397,6 +1501,128 @@ INTEGER :: NB_REQ ! END SUBROUTINE COPY_CRSPD_TRANS ! +#ifdef MNH_OPENACC +! ####################################################################### + SUBROUTINE COPY_CRSPD_TRANS_DEVICE(TPSEND, TPRECV, PFIELDIN, PFIELDOUT, KINFO) +! ####################################################################### +! +!!**** *COPY_CRSPD_TRANS* - +! +!! Purpose +!! ------- +! copy the zones a process sends to itself, instead of sending them +!! via MPI. +!! +!!** Method +!! ------ +! we go over all the zones of the TPSEND variable and find +! out those whose NUMBER equals IP. +! In this case the recipient of a ZONE is the same as the sender. +! To find out where the zone has to be copied we go over the +! TPRECV variable. +! +!! External +!! -------- +! Module MODE_EXCHANGE_ll +! COPY_ZONE_TRANS +! +!! Implicit Arguments +!! ------------------ +! Module MODD_ARGSLIST_ll +! type LIST_ll +! +! Module MODD_STRUCTURE_ll +! type CRSPD_ll +! +! Module MODD_VAR_ll +! IP - Number of local processor=subdomain +! +!! Reference +!! --------- +! +!! Author +!! ------ +! N. Gicquel * CERFACS - CNRM * +!! +!! Modifications +!! ------------- +!! 1 october 1998 +! +!------------------------------------------------------------------------------- +! +!* 0. DECLARATIONS +! ------------ +! + USE MODD_ARGSLIST_ll, ONLY : LIST_ll + USE MODD_STRUCTURE_ll, ONLY : CRSPD_ll + USE MODD_VAR_ll, ONLY : IP +! + IMPLICIT NONE +! +!* 0.1 declarations of arguments +! + TYPE(CRSPD_ll), POINTER :: TPSEND ! CRSPD to be sent + TYPE(CRSPD_ll), POINTER :: TPRECV ! CRSPD to be received + REAL, DIMENSION(:,:,:), INTENT(INOUT) :: PFIELDIN + REAL, DIMENSION(:,:,:), INTENT(INOUT) :: PFIELDOUT + INTEGER :: KINFO ! return status +! +!* 0.2 declarations of local variables +! + TYPE(CRSPD_ll), POINTER :: TZSEND, TZRECV + INTEGER :: JSEND +! +!------------------------------------------------------------------------------- +! +!* 1. GO OVER THE TPSEND_CRSPD LIST OF ZONES +! -------------------------------------- +! + IF (.NOT.ASSOCIATED(TPSEND)) THEN + RETURN + ENDIF +! +!$acc data present( PFIELDIN, PFIELDOUT ) + TZSEND => TPSEND + DO JSEND = 1, TPSEND%NCARD +! +!* 1.1 Test whether a zone is sent to the same proc +! + IF (TZSEND%TELT%NUMBER == IP) THEN +! +!* 1.2 If so, go over the TPRECV list of zones +!* and test whether the zone to be received corresponds to +!* the zone to be sent +! + TZRECV => TPRECV + DO WHILE (ASSOCIATED(TZRECV)) + IF (TZRECV%TELT%NUMBER == IP & + .AND. TZRECV%TELT%MSSGTAG == TZSEND%TELT%MSSGTAG) THEN +! +!* 1.2.1 If so, copy the zone +! + CALL COPY_ZONE_TRANS_DEVICE(TZSEND%TELT, TZRECV%TELT, PFIELDIN, PFIELDOUT, & + KINFO) +! + ENDIF +! + TZRECV => TZRECV%TNEXT +! + ENDDO +! + ENDIF +! + TZSEND => TZSEND%TNEXT +! + ENDDO + +!$acc end data + +! +!------------------------------------------------------------------------------- +! + END SUBROUTINE COPY_CRSPD_TRANS_DEVICE +! +#endif ! ###################################################################### SUBROUTINE COPY_ZONE_TRANS(TPSEND, TPRECV, PFIELDIN, PFIELDOUT, KINFO) ! ###################################################################### @@ -1482,6 +1708,95 @@ INTEGER :: NB_REQ ! END SUBROUTINE COPY_ZONE_TRANS ! +#ifdef MNH_OPENACC +! ###################################################################### + SUBROUTINE COPY_ZONE_TRANS_DEVICE(TPSEND, TPRECV, PFIELDIN, PFIELDOUT, KINFO) +! ###################################################################### +! +!!**** *COPY_ZONE_TRANS* - +! +!! Purpose +!! ------- +! this routine copies the values of the PFIELDIN field situated +! in the TPSEND zone, into the PFIELDOUT field at the TPRECV zone +! +!! Implicit Arguments +!! ------------------ +! Module MODD_ARGSLIST_ll +! type LIST_ll +! +! Module MODD_STRUCTURE_ll +! type ZONE_ll +! +!! Reference +!! --------- +! +!! Author +!! ------ +! N. Gicquel * CERFACS - CNRM * +!! +!! Modifications +!! ------------- +!! 1 october 1998 +! +!------------------------------------------------------------------------------- +! +!* 0. DECLARATIONS +! ------------ +! + USE MODD_ARGSLIST_ll, ONLY : LIST_ll + USE MODD_STRUCTURE_ll, ONLY : ZONE_ll +! + IMPLICIT NONE +! +!* 0.1 declarations of arguments +! + + TYPE(ZONE_ll) :: TPSEND ! ZONE_ll to be sent + TYPE(ZONE_ll) :: TPRECV ! ZONE_ll to be received + REAL, DIMENSION(:,:,:), INTENT(INOUT) :: PFIELDIN + REAL, DIMENSION(:,:,:), INTENT(INOUT) :: PFIELDOUT + INTEGER :: KINFO ! return status +! +!* 0.2 declarations of local variables +! + TYPE(LIST_ll), POINTER :: TZLIST + INTEGER :: IIBS, IIES, IJBS, IJES, IKBS, IKES, IIBR, IIER, & + IJBR, IJER, IKBR, IKER +! +!------------------------------------------------------------------------------- +! +!* 1. INITIALISE DIMENSIONS +! --------------------- +! + IIBS = TPSEND%NXOR + IIES = TPSEND%NXEND + IJBS = TPSEND%NYOR + IJES = TPSEND%NYEND + IKBS = TPSEND%NZOR + IKES = TPSEND%NZEND +! + IIBR = TPRECV%NXOR + IIER = TPRECV%NXEND + IJBR = TPRECV%NYOR + IJER = TPRECV%NYEND + IKBR = TPRECV%NZOR + IKER = TPRECV%NZEND +! +!* 2. COPY THE VALUES OF PFIELDIN SITUATED IN THE TPSEND ZONE +!* IN THE ENTRIES OF PFIELDOUT DEFINED BY TPRECV +! ------------------------------------------------------- +! +!$acc kernels present( PFIELDIN, PFIELDOUT ) + PFIELDOUT(IIBR:IIER,IJBR:IJER,IKBR:IKER) = & + PFIELDIN(IIBS:IIES,IJBS:IJES,IKBS:IKES) +!$acc end kernels +! +!------------------------------------------------------------------------------- +! + END SUBROUTINE COPY_ZONE_TRANS_DEVICE +! +#endif ! ######################################################## SUBROUTINE FILLIN_BUFFER(TPFIELD, TPZONE, PBUFFER, KINC) ! ######################################################## @@ -2238,6 +2553,429 @@ INTEGER :: NB_REQ,NFIRST_REQ_RECV ! END SUBROUTINE SEND_RECV_FIELD ! +#ifdef MNH_OPENACC +! ###################################################### + SUBROUTINE SEND_RECV_FIELD_DEVICE(TPCRSPDSEND, TPCRSPDRECV, & + PFIELDIN, PFIELDOUT, KINFO) +! ###################################################### +! +!!**** *SEND_RECV_FIELD* - +! +!! Purpose +!! ------- +! This routine sends the data of the PFIELDIN field +! to the correspondants of the TPCRSPDSEND list +! and receives the data of the PFIELDOUT field +! from the correspondants of the TPCRSPDRECV list +! +!!** Method +!! ------ +! +! This routine is based on the following rule : +! one sent for one received (if it is possible); +! The algorithm is the following : +! +! while (there is some messages to send) +! OR (there is some messages to receive) +! do +! if (there is some messages to send) +! send the first of the list +! end if +! if (there is some messages to receive) +! try to receive +! end if +! done +! +! The receptions are non-blocking and don't follow necessarly +! the order of the TPCRSPDRECV list +! +!! External +!! ------- +! Module MODE_TOOLS_ll +! GET_MAX_SIZE +! +!! Implicit Arguments +!! ------------------ +! Module MODD_STRUCTURE_ll +! type CRSPD_ll, ZONE_ll +! +! Module MODD_ARGSLIST_ll +! type LIST_ll +! +! Module MODD_VAR_ll +! IP - Number of local processor=subdomain +! NCOMBUFFSIZE1 - buffer size +! NTRANS_COM - mpi communicator +! NNEXTTAG, NMAXTAG - variable to define message tag +! +! Module MODD_PARAMETERS_ll +! JPVEXT - vertical halo size +! +! Module MODD_DIM_ll +! NKMAX_TMP_ll - maximum vertical dimension +! +!! Reference +!! --------- +! +!! Author +!! ------ +! N. Gicquel * CERFACS - CNRM * +! J. Escobar 18/08/2018 : Bug on MPI_RECV <-> uninitialized IMAXSIZESEND/IMAXSIZERECV variables +! +!------------------------------------------------------------------------------- +! +!* 0. DECLARATIONS +! ------------ +! + USE MODD_ARGSLIST_ll, ONLY : LIST_ll + use modd_precision, only: MNHREAL_MPI + USE MODD_STRUCTURE_ll, ONLY : CRSPD_ll, ZONE_ll + USE MODD_VAR_ll, ONLY : NCOMBUFFSIZE1, IP, NTRANS_COM, & + NNEXTTAG, NMAXTAG + USE MODD_PARAMETERS_ll, ONLY : JPVEXT + USE MODD_DIM_ll, ONLY : NKMAX_TMP_ll +! +#ifdef MNH_OPENACC + USE MODE_MNH_ZWORK, ONLY: MNH_MEM_GET, MNH_MEM_POSITION_PIN, MNH_MEM_RELEASE +#endif + USE MODE_TOOLS_ll, ONLY : GET_MAX_SIZE +! +!JUANZ + USE MODD_CONFZ, ONLY : LMNH_MPI_BSEND +!JUANZ + IMPLICIT NONE +! +!* 0.1 declarations of arguments +! + TYPE(CRSPD_ll), POINTER :: TPCRSPDSEND, TPCRSPDRECV + REAL, DIMENSION(:, :, :), INTENT(INOUT) :: PFIELDIN + REAL, DIMENSION(:, :, :), INTENT(INOUT) :: PFIELDOUT +! + INTEGER :: KINFO +! +!* 0.2 declarations of local variables +! + INTEGER :: JINC, JI, JJ, JK ! Loop and counter variables + INTEGER :: FOUND, KERROR +! + TYPE(CRSPD_ll), POINTER :: TZMAILSEND, TZMAILRECV + TYPE(ZONE_ll), POINTER :: TZZONESEND, TZZONERECV +! +! JUAN +!if defined (MNH_MPI_ISEND) +#ifndef MNH_OPENACC + REAL, DIMENSION (:,:), ALLOCATABLE :: TZBUFFER +#else + REAL, DIMENSION (:,:), POINTER, CONTIGUOUS :: TZBUFFER +#endif +!!$#else +!!$ REAL, DIMENSION (:), ALLOCATABLE :: TZBUFFER +!!$#endif +! JUAN +! + INTEGER IRECVSTATUS(MPI_STATUS_SIZE) ! Status of completed receive request +! + LOGICAL :: IRECVFLAG + INTEGER :: IMSGTAG, ISENDERPROC +! + INTEGER :: IRECVNB, ISENDNB ! Total numbers of receive and send to do + INTEGER :: IRECVDONE +! + INTEGER, SAVE :: ITAGOFFSET = 0 +! + INTEGER :: IMAXSIZESEND, IMAXSIZERECV, IBUFFSIZE +! +!JUAN + INTEGER :: JKMAX,JJMIN,JJMAX,JIMIN,JIMAX + INTEGER :: JKOR,JKEND,JJOR,JJEND,JIOR,JIEND + + INTEGER :: JIJ,JIJMAX + INTEGER :: JIBOX,JJBOX +!JUAN +! JUAN +!if defined (MNH_MPI_ISEND) +INTEGER,PARAMETER :: MPI_MAX_REQ = 1024 +INTEGER,SAVE,DIMENSION(MPI_MAX_REQ) :: REQ_TAB +INTEGER,SAVE,DIMENSION(MPI_STATUS_SIZE,MPI_MAX_REQ) :: STATUS_TAB +INTEGER :: NB_REQ,NFIRST_REQ_RECV +!endif +! JUAN +!------------------------------------------------------------------------------- + +!$acc data present( PFIELDIN, PFIELDOUT ) + +! +!* 1. INITIALISATIONS +! --------------- +! + IRECVNB = 0 + ISENDNB = 0 + IRECVDONE = 0 +! +!* 1.1 computation of the buffer'size +! + IF (.NOT.ASSOCIATED(TPCRSPDSEND)) THEN + ISENDNB = 0 + IMAXSIZESEND = 0 + ELSE + ISENDNB = TPCRSPDSEND%NCARDDIF + IMAXSIZESEND = GET_MAX_SIZE(TPCRSPDSEND) + ENDIF +! + IF (.NOT.ASSOCIATED(TPCRSPDRECV)) THEN + IRECVNB = 0 + IMAXSIZERECV = 0 + ELSE + IRECVNB = TPCRSPDRECV%NCARDDIF + IMAXSIZERECV = GET_MAX_SIZE(TPCRSPDRECV) + ENDIF +! + IBUFFSIZE = IMAXSIZESEND + IF (IMAXSIZERECV > IBUFFSIZE) IBUFFSIZE = IMAXSIZERECV +! + IBUFFSIZE = IBUFFSIZE * (NKMAX_TMP_ll + 2 * JPVEXT) +! + !Pin positions in the pools of MNH memory + CALL MNH_MEM_POSITION_PIN() + + IF ( .NOT. LMNH_MPI_BSEND) THEN + CALL MNH_MEM_GET( TZBUFFER, IBUFFSIZE, ISENDNB + IRECVNB ) + else + CALL MNH_MEM_GET( TZBUFFER, IBUFFSIZE, 1 ) + endif +!$acc kernels present( TZBUFFER ) + TZBUFFER(:,:) = 0.0 +!$acc end kernels +! + TZMAILRECV => TPCRSPDRECV + TZMAILSEND => TPCRSPDSEND +! +!NZJUAN CALL MPI_BARRIER(NTRANS_COM, KERROR) +! +!------------------------------------------------------------------------------- +! +!* 2. MAIN LOOP +! --------- +! +! JUAN +!if defined (MNH_MPI_ISEND) + IF ( .NOT. LMNH_MPI_BSEND) THEN + NB_REQ = 0 + endif + ! JUAN + DO WHILE (ASSOCIATED(TZMAILSEND)) +! +!* 2.1 if there is still something to send +! + IF (ASSOCIATED(TZMAILSEND)) THEN + TZZONESEND => TZMAILSEND%TELT + IF (TZZONESEND%NUMBER /= IP) THEN +! JUAN +!if defined (MNH_MPI_ISEND) + IF ( .NOT. LMNH_MPI_BSEND) THEN + NB_REQ = NB_REQ + 1 + endif +! JUAN +! Z axe + JKMAX = SIZE(PFIELDIN,3) + JKOR = TZZONESEND%NZOR + JKEND = TZZONESEND%NZEND +! Y axe + JJMAX = SIZE(PFIELDIN,2) + JJOR = TZZONESEND%NYOR + JJEND = TZZONESEND%NYEND +! X axe + JIMAX = SIZE(PFIELDIN,1) + JIOR = TZZONESEND%NXOR + JIEND = TZZONESEND%NXEND + + JINC = 0 + JIBOX = JIEND - JIOR + 1 + JJBOX = JJEND - JJOR + 1 + JIJMAX = JIBOX*JJBOX + +!acc kernels present( TZBUFFER ) + DO JK=TZZONESEND%NZOR, TZZONESEND%NZEND +!$acc kernels present( TZBUFFER ) +!CDIR NODEP +!OCL NOVREC + DO JIJ=1,JIJMAX + JI = JIOR + MOD( JIJ -1 , JIBOX ) + JJ = JJOR + ( JIJ -1 ) / JIBOX +!JUAN +!if defined (MNH_MPI_ISEND) + IF ( .NOT. LMNH_MPI_BSEND) THEN + TZBUFFER(JIJ+JINC,NB_REQ) = PFIELDIN(JI, JJ, JK) + else + TZBUFFER(JIJ+JINC,1) = PFIELDIN(JI, JJ, JK) + endif + !JUAN + ENDDO + JINC = JINC + JIJMAX +!$acc end kernels + ENDDO +!acc end kernels +! +!if defined(MNH_MPI_BSEND) +!$acc host_data use_device( TZBUFFER ) + IF (LMNH_MPI_BSEND) THEN + CALL MPI_BSEND(TZBUFFER, JINC, MNHREAL_MPI, TZZONESEND%NUMBER - 1, & + TZZONESEND%MSSGTAG + ITAGOFFSET, NTRANS_COM, KERROR) + else + + CALL MPI_ISEND(TZBUFFER(1,NB_REQ), JINC, MNHREAL_MPI, TZZONESEND%NUMBER - 1, & + TZZONESEND%MSSGTAG + ITAGOFFSET, NTRANS_COM, REQ_TAB(NB_REQ), KERROR) + + endif +!$acc end host_data + ENDIF + TZMAILSEND => TZMAILSEND%TNEXT + ENDIF +! + ENDDO + +!NZJUAN CALL MPI_BARRIER(NTRANS_COM, KERROR) + +! JUAN +!if defined (MNH_MPI_ISEND) + IF ( .NOT. LMNH_MPI_BSEND) THEN + NFIRST_REQ_RECV = NB_REQ + endif +! JUAN + + DO WHILE (ASSOCIATED(TZMAILRECV)) + TZZONERECV => TZMAILRECV%TELT + IF (TZZONERECV%NUMBER == IP) THEN + TZMAILRECV => TZMAILRECV%TNEXT + ELSE + ! JUAN + !if defined (MNH_MPI_ISEND) + IF ( .NOT. LMNH_MPI_BSEND) THEN + NB_REQ = NB_REQ + 1 + !JUAN NZ CALL MPI_IRECV(TZBUFFER(1,NB_REQ), NCOMBUFFSIZE1, MNHREAL_MPI, & +!$acc host_data use_device( TZBUFFER ) + CALL MPI_IRECV(TZBUFFER(1,NB_REQ), IBUFFSIZE, MNHREAL_MPI, & + TZZONERECV%NUMBER-1, TZZONERECV%MSSGTAG + ITAGOFFSET, & + NTRANS_COM, REQ_TAB(NB_REQ), KERROR) +!$acc end host_data + else + !JUAN NZ CALL MPI_RECV(TZBUFFER, NCOMBUFFSIZE1, MNHREAL_MPI, TZZONERECV%NUMBER-1, & +!$acc host_data use_device( TZBUFFER ) + CALL MPI_RECV(TZBUFFER, IBUFFSIZE, MNHREAL_MPI, TZZONERECV%NUMBER-1, & + TZZONERECV%MSSGTAG + ITAGOFFSET, NTRANS_COM, IRECVSTATUS, KERROR) +!$acc end host_data + !JUAN + ! Z axe + JKMAX = SIZE(PFIELDOUT,3) + JKOR = TZZONERECV%NZOR + JKEND = TZZONERECV%NZEND + ! Y axe + JJMAX = SIZE(PFIELDOUT,2) + JJOR = TZZONERECV%NYOR + JJEND = TZZONERECV%NYEND + ! X axe + JIMAX = SIZE(PFIELDOUT,1) + JIOR = TZZONERECV%NXOR + JIEND = TZZONERECV%NXEND + ! + JINC = 0 + JIBOX = JIEND - JIOR + 1 + JJBOX = JJEND - JJOR + 1 + JIJMAX = JIBOX*JJBOX +!$acc kernels present( TZBUFFER ) + DO JK = TZZONERECV%NZOR, TZZONERECV%NZEND + JINC = ( JK - TZZONERECV%NZOR ) * JIJMAX + !CDIR NODEP + !OCL NOVREC +!$acc loop independent + DO JIJ=1,JIJMAX + JI = JIOR + MOD( JIJ -1 , JIBOX ) + JJ = JJOR + ( JIJ -1 ) / JIBOX + PFIELDOUT(JI, JJ, JK) = TZBUFFER(JIJ+JINC,1) + ENDDO +! JINC = JINC + JIJMAX + ENDDO +!$acc end kernels + ! + endif + ! JUAN + TZMAILRECV => TZMAILRECV%TNEXT + !JUAN + ENDIF + ! + ENDDO + + +!if defined(MNH_MPI_ISEND) + IF ( .NOT. LMNH_MPI_BSEND) THEN + + CALL MPI_WAITALL(NB_REQ,REQ_TAB,STATUS_TAB,KINFO) + + + TZMAILRECV => TPCRSPDRECV + NB_REQ = NFIRST_REQ_RECV + + DO WHILE (ASSOCIATED(TZMAILRECV)) + TZZONERECV => TZMAILRECV%TELT + IF (TZZONERECV%NUMBER == IP) THEN + TZMAILRECV => TZMAILRECV%TNEXT + ELSE + NB_REQ = NB_REQ + 1 + TZMAILRECV => TZMAILRECV%TNEXT + + !JUAN + ! Z axe + JKMAX = SIZE(PFIELDOUT,3) + JKOR = TZZONERECV%NZOR + JKEND = TZZONERECV%NZEND + ! Y axe + JJMAX = SIZE(PFIELDOUT,2) + JJOR = TZZONERECV%NYOR + JJEND = TZZONERECV%NYEND + ! X axe + JIMAX = SIZE(PFIELDOUT,1) + JIOR = TZZONERECV%NXOR + JIEND = TZZONERECV%NXEND + ! + JINC = 0 + JIBOX = JIEND - JIOR + 1 + JJBOX = JJEND - JJOR + 1 + JIJMAX = JIBOX*JJBOX +!$acc kernels present( TZBUFFER ) + DO JK = TZZONERECV%NZOR, TZZONERECV%NZEND + JINC = ( JK - TZZONERECV%NZOR ) * JIJMAX + !CDIR NODEP + !OCL NOVREC +!$acc loop independent + DO JIJ=1,JIJMAX + JI = JIOR + MOD( JIJ -1 , JIBOX ) + JJ = JJOR + ( JIJ -1 ) / JIBOX + PFIELDOUT(JI, JJ, JK) = TZBUFFER(JIJ+JINC,NB_REQ) + ENDDO + JINC = JINC + JIJMAX + ENDDO +!$acc end kernels + ! + ENDIF + + ! + ENDDO + endif + ! + !NZJUAN CALL MPI_BARRIER(NTRANS_COM, KERROR) + ! + ITAGOFFSET = MOD((ITAGOFFSET + NNEXTTAG), NMAXTAG) +! + !Release all memory allocated with MNH_MEM_GET calls since last call to MNH_MEM_POSITION_PIN + CALL MNH_MEM_RELEASE() + +!$acc end data +! +!------------------------------------------------------------------------------- +! + END SUBROUTINE SEND_RECV_FIELD_DEVICE +! +#endif ! ############################################################## SUBROUTINE SEND_RECV_CRSPD(TPCRSPDSEND, TPCRSPDRECV, & TPFIELDLISTSEND, TPFIELDLISTRECV, & diff --git a/src/LIB/SURCOUCHE/src/mode_splittingz_ll.f90 b/src/LIB/SURCOUCHE/src/mode_splittingz_ll.f90 index fc5c40069db48cc06299afa288b986b0cf1d54db..e629d30aaf6a3ac7a04b7d03a8a1fcbbcd854556 100644 --- a/src/LIB/SURCOUCHE/src/mode_splittingz_ll.f90 +++ b/src/LIB/SURCOUCHE/src/mode_splittingz_ll.f90 @@ -1,4 +1,4 @@ -!MNH_LIC Copyright 2011-2019 CNRS, Meteo-France and Universite Paul Sabatier +!MNH_LIC Copyright 2011-2022 CNRS, Meteo-France and Universite Paul Sabatier !MNH_LIC This is part of the Meso-NH software governed by the CeCILL-C licence !MNH_LIC version 1. See LICENSE, CeCILL-C_V1-en.txt and CeCILL-C_V1-fr.txt !MNH_LIC for details. version 1. @@ -1065,9 +1065,15 @@ CONTAINS ! !* 0. DECLARATIONS ! +#ifndef MNH_OPENACC USE MODE_EXCHANGE_ll, ONLY : SEND_RECV_FIELD, COPY_CRSPD_TRANS +#else + USE MODE_EXCHANGE_ll, ONLY : SEND_RECV_FIELD_DEVICE, COPY_CRSPD_TRANS_DEVICE +#endif USE MODD_VAR_ll , ONLY : TCRRT_COMDATA USE MODD_CONFZ , ONLY : NZ_SPLITTING ! for debug IZ=1=flat_inv; IZ=2=flat_invz ; IZ=1+2=the two + + USE MODE_MPPDB ! IMPLICIT NONE ! @@ -1081,31 +1087,65 @@ CONTAINS ! ! !------------------------------------------------------------------------------- + + IF (MPPDB_INITIALIZED) THEN + !Check all INOUT arrays + CALL MPPDB_CHECK( PFIELDIN, "REMAP_SXP1_YP2_Z_SX_YP2_ZP1_ll beg:PFIELDIN" ) + CALL MPPDB_CHECK( PFIELDOUT, "REMAP_SXP1_YP2_Z_SX_YP2_ZP1_ll beg:PFIELDOUT" ) + END IF + +!$acc data present( PFIELDIN, PFIELDOUT ) ! !* 1. UPDATE THE ZONES NOT SENT OR RECEIVED BY THE PROCESSOR ITSELF ! ------------------------------------------------------------- ! - IF ( IAND(NZ_SPLITTING,4) .EQ. 0 ) THEN + IF ( IAND(NZ_SPLITTING,4) .EQ. 0 ) THEN +#ifndef MNH_OPENACC CALL SEND_RECV_FIELD(TCRRT_COMDATA%TSEND_SXP1_YP2_Z_SX_YP2_ZP1, & TCRRT_COMDATA%TRECV_SXP1_YP2_Z_SX_YP2_ZP1, & PFIELDIN, PFIELDOUT, KINFO) +#else + CALL SEND_RECV_FIELD_DEVICE(TCRRT_COMDATA%TSEND_SXP1_YP2_Z_SX_YP2_ZP1, & + TCRRT_COMDATA%TRECV_SXP1_YP2_Z_SX_YP2_ZP1, & + PFIELDIN, PFIELDOUT, KINFO) +#endif ! !------------------------------------------------------------------------------- ! !* 2. UPDATE THE ZONES THE PROCESSOR SENDS OR RECEIVED FROM ITSELF ! ------------------------------------------------------------ ! +#ifndef MNH_OPENACC CALL COPY_CRSPD_TRANS(TCRRT_COMDATA%TSEND_SXP1_YP2_Z_SX_YP2_ZP1, & TCRRT_COMDATA%TRECV_SXP1_YP2_Z_SX_YP2_ZP1, & PFIELDIN, PFIELDOUT, KINFO) +#else + CALL COPY_CRSPD_TRANS_DEVICE(TCRRT_COMDATA%TSEND_SXP1_YP2_Z_SX_YP2_ZP1, & + TCRRT_COMDATA%TRECV_SXP1_YP2_Z_SX_YP2_ZP1, & + PFIELDIN, PFIELDOUT, KINFO) +#endif ELSE ! !------------------------------------------------------------------------------- ! +#ifndef MNH_OPENACC CALL ALL_SEND_RECV(TCRRT_COMDATA%TSEND_BOX_SXP1_YP2_Z_SX_YP2_ZP1, & TCRRT_COMDATA%TRECV_BOX_SXP1_YP2_Z_SX_YP2_ZP1, & PFIELDIN, PFIELDOUT, KINFO) +#else + CALL ALL_SEND_RECV_DEVICE(TCRRT_COMDATA%TSEND_BOX_SXP1_YP2_Z_SX_YP2_ZP1, & + TCRRT_COMDATA%TRECV_BOX_SXP1_YP2_Z_SX_YP2_ZP1, & + PFIELDIN, PFIELDOUT, KINFO) +#endif ENDIF + +!$acc end data + IF (MPPDB_INITIALIZED) THEN + !Check all INOUT arrays + CALL MPPDB_CHECK( PFIELDIN, "REMAP_SXP1_YP2_Z_SX_YP2_ZP1_ll end:PFIELDIN" ) + CALL MPPDB_CHECK( PFIELDOUT, "REMAP_SXP1_YP2_Z_SX_YP2_ZP1_ll end:PFIELDOUT" ) + END IF + ! END SUBROUTINE REMAP_SXP1_YP2_Z_SX_YP2_ZP1_ll @@ -1195,7 +1235,11 @@ CONTAINS ! !* 0. DECLARATIONS ! +#ifndef MNH_OPENACC USE MODE_EXCHANGE_ll, ONLY : SEND_RECV_FIELD, COPY_CRSPD_TRANS +#else + USE MODE_EXCHANGE_ll, ONLY : SEND_RECV_FIELD_DEVICE, COPY_CRSPD_TRANS_DEVICE +#endif USE MODD_VAR_ll , ONLY : TCRRT_COMDATA USE MODD_CONFZ , ONLY : NZ_SPLITTING ! for debug IZ=1=flat_inv; IZ=2=flat_invz ; IZ=1+2=the two ! @@ -1208,28 +1252,52 @@ CONTAINS INTEGER :: KINFO ! return status ! !------------------------------------------------------------------------------- + +!$acc data present( PFIELDIN, PFIELDOUT ) + ! !* 1. UPDATE THE ZONES NOT SENT OR RECEIVED BY THE PROCESSOR ITSELF ! ------------------------------------------------------------- ! IF ( IAND(NZ_SPLITTING,4) .EQ. 0 ) THEN +#ifndef MNH_OPENACC CALL SEND_RECV_FIELD(TCRRT_COMDATA%TRECV_SXP1_YP2_Z_SX_YP2_ZP1, & TCRRT_COMDATA%TSEND_SXP1_YP2_Z_SX_YP2_ZP1, & PFIELDIN, PFIELDOUT, KINFO) +#else + CALL SEND_RECV_FIELD_DEVICE(TCRRT_COMDATA%TRECV_SXP1_YP2_Z_SX_YP2_ZP1, & + TCRRT_COMDATA%TSEND_SXP1_YP2_Z_SX_YP2_ZP1, & + PFIELDIN, PFIELDOUT, KINFO) +#endif ! !------------------------------------------------------------------------------- ! !* 2. UPDATE THE ZONES THE PROCESSOR SENDS OR RECEIVED FROM ITSELF ! ------------------------------------------------------------ ! +#ifndef MNH_OPENACC CALL COPY_CRSPD_TRANS(TCRRT_COMDATA%TRECV_SXP1_YP2_Z_SX_YP2_ZP1, & TCRRT_COMDATA%TSEND_SXP1_YP2_Z_SX_YP2_ZP1, & PFIELDIN, PFIELDOUT, KINFO) +#else + CALL COPY_CRSPD_TRANS_DEVICE(TCRRT_COMDATA%TRECV_SXP1_YP2_Z_SX_YP2_ZP1, & + TCRRT_COMDATA%TSEND_SXP1_YP2_Z_SX_YP2_ZP1, & + PFIELDIN, PFIELDOUT, KINFO) +#endif ELSE +#ifndef MNH_OPENACC CALL ALL_SEND_RECV(TCRRT_COMDATA%TRECV_BOX_SXP1_YP2_Z_SX_YP2_ZP1, & TCRRT_COMDATA%TSEND_BOX_SXP1_YP2_Z_SX_YP2_ZP1, & PFIELDIN, PFIELDOUT, KINFO) +#else + CALL ALL_SEND_RECV_DEVICE(TCRRT_COMDATA%TRECV_BOX_SXP1_YP2_Z_SX_YP2_ZP1, & + TCRRT_COMDATA%TSEND_BOX_SXP1_YP2_Z_SX_YP2_ZP1, & + PFIELDIN, PFIELDOUT, KINFO) +#endif ENDIF + +!$acc end data + ! !------------------------------------------------------------------------------- ! @@ -1258,7 +1326,11 @@ CONTAINS ! !* 0. DECLARATIONS ! +#ifndef MNH_OPENACC USE MODE_EXCHANGE_ll, ONLY : SEND_RECV_FIELD, COPY_CRSPD_TRANS +#else + USE MODE_EXCHANGE_ll, ONLY : SEND_RECV_FIELD_DEVICE, COPY_CRSPD_TRANS_DEVICE +#endif USE MODD_VAR_ll , ONLY : TCRRT_COMDATA USE MODD_CONFZ , ONLY : NZ_SPLITTING ! for debug IZ=1=flat_inv; IZ=2=flat_invz ; IZ=1+2=the two ! @@ -1271,32 +1343,53 @@ CONTAINS INTEGER :: KINFO ! return status ! !------------------------------------------------------------------------------- +!$acc data present( PFIELDIN, PFIELDOUT ) ! !* 1. UPDATE THE ZONES NOT SENT OR RECEIVED BY THE PROCESSOR ITSELF ! ------------------------------------------------------------- ! IF ( IAND(NZ_SPLITTING,4) .EQ. 0 ) THEN +#ifndef MNH_OPENACC CALL SEND_RECV_FIELD(TCRRT_COMDATA%TSEND_SX_YP2_ZP1_SXP2_Y_ZP1, & TCRRT_COMDATA%TRECV_SX_YP2_ZP1_SXP2_Y_ZP1, & PFIELDIN, PFIELDOUT, KINFO) +#else + CALL SEND_RECV_FIELD_DEVICE(TCRRT_COMDATA%TSEND_SX_YP2_ZP1_SXP2_Y_ZP1, & + TCRRT_COMDATA%TRECV_SX_YP2_ZP1_SXP2_Y_ZP1, & + PFIELDIN, PFIELDOUT, KINFO) +#endif ! !------------------------------------------------------------------------------- ! !* 2. UPDATE THE ZONES THE PROCESSOR SENDS OR RECEIVED FROM ITSELF ! ------------------------------------------------------------ ! +#ifndef MNH_OPENACC CALL COPY_CRSPD_TRANS(TCRRT_COMDATA%TSEND_SX_YP2_ZP1_SXP2_Y_ZP1, & TCRRT_COMDATA%TRECV_SX_YP2_ZP1_SXP2_Y_ZP1, & PFIELDIN, PFIELDOUT, KINFO) +#else + CALL COPY_CRSPD_TRANS_DEVICE(TCRRT_COMDATA%TSEND_SX_YP2_ZP1_SXP2_Y_ZP1, & + TCRRT_COMDATA%TRECV_SX_YP2_ZP1_SXP2_Y_ZP1, & + PFIELDIN, PFIELDOUT, KINFO) +#endif ELSE ! !------------------------------------------------------------------------------- ! +#ifndef MNH_OPENACC CALL ALL_SEND_RECV(TCRRT_COMDATA%TSEND_BOX_SX_YP2_ZP1_SXP2_Y_ZP1, & TCRRT_COMDATA%TRECV_BOX_SX_YP2_ZP1_SXP2_Y_ZP1, & PFIELDIN, PFIELDOUT, KINFO) +#else + CALL ALL_SEND_RECV_DEVICE(TCRRT_COMDATA%TSEND_BOX_SX_YP2_ZP1_SXP2_Y_ZP1, & + TCRRT_COMDATA%TRECV_BOX_SX_YP2_ZP1_SXP2_Y_ZP1, & + PFIELDIN, PFIELDOUT, KINFO) +#endif ENDIF ! + +!$acc end data !------------------------------------------------------------------------------- ! END SUBROUTINE REMAP_SX_YP2_ZP1_SXP2_Y_ZP1_ll @@ -1324,7 +1417,11 @@ CONTAINS ! !* 0. DECLARATIONS ! +#ifndef MNH_OPENACC USE MODE_EXCHANGE_ll, ONLY : SEND_RECV_FIELD, COPY_CRSPD_TRANS +#else + USE MODE_EXCHANGE_ll, ONLY : SEND_RECV_FIELD_DEVICE, COPY_CRSPD_TRANS_DEVICE +#endif USE MODD_VAR_ll , ONLY : TCRRT_COMDATA USE MODD_CONFZ , ONLY : NZ_SPLITTING ! for debug IZ=1=flat_inv; IZ=2=flat_invz ; IZ=1+2=the two ! @@ -1342,25 +1439,43 @@ CONTAINS ! ------------------------------------------------------------- ! IF ( IAND(NZ_SPLITTING,4) .EQ. 0 ) THEN +#ifndef MNH_OPENACC CALL SEND_RECV_FIELD(TCRRT_COMDATA%TRECV_SX_YP2_ZP1_SXP2_Y_ZP1, & TCRRT_COMDATA%TSEND_SX_YP2_ZP1_SXP2_Y_ZP1, & PFIELDIN, PFIELDOUT, KINFO) +#else + CALL SEND_RECV_FIELD_DEVICE(TCRRT_COMDATA%TRECV_SX_YP2_ZP1_SXP2_Y_ZP1, & + TCRRT_COMDATA%TSEND_SX_YP2_ZP1_SXP2_Y_ZP1, & + PFIELDIN, PFIELDOUT, KINFO) +#endif ! !------------------------------------------------------------------------------- ! !* 2. UPDATE THE ZONES THE PROCESSOR SENDS OR RECEIVED FROM ITSELF ! ------------------------------------------------------------ ! +#ifndef MNH_OPENACC CALL COPY_CRSPD_TRANS(TCRRT_COMDATA%TRECV_SX_YP2_ZP1_SXP2_Y_ZP1, & TCRRT_COMDATA%TSEND_SX_YP2_ZP1_SXP2_Y_ZP1, & PFIELDIN, PFIELDOUT, KINFO) +#else + CALL COPY_CRSPD_TRANS_DEVICE(TCRRT_COMDATA%TRECV_SX_YP2_ZP1_SXP2_Y_ZP1, & + TCRRT_COMDATA%TSEND_SX_YP2_ZP1_SXP2_Y_ZP1, & + PFIELDIN, PFIELDOUT, KINFO) +#endif ELSE ! !------------------------------------------------------------------------------- ! +#ifndef MNH_OPENACC CALL ALL_SEND_RECV(TCRRT_COMDATA%TRECV_BOX_SX_YP2_ZP1_SXP2_Y_ZP1, & TCRRT_COMDATA%TSEND_BOX_SX_YP2_ZP1_SXP2_Y_ZP1, & PFIELDIN, PFIELDOUT, KINFO) +#else + CALL ALL_SEND_RECV_DEVICE(TCRRT_COMDATA%TRECV_BOX_SX_YP2_ZP1_SXP2_Y_ZP1, & + TCRRT_COMDATA%TSEND_BOX_SX_YP2_ZP1_SXP2_Y_ZP1, & + PFIELDIN, PFIELDOUT, KINFO) +#endif ENDIF ! !------------------------------------------------------------------------------- @@ -1390,7 +1505,11 @@ CONTAINS ! !* 0. DECLARATIONS ! +#ifndef MNH_OPENACC USE MODE_EXCHANGE_ll, ONLY : SEND_RECV_FIELD, COPY_CRSPD_TRANS +#else + USE MODE_EXCHANGE_ll, ONLY : SEND_RECV_FIELD_DEVICE, COPY_CRSPD_TRANS_DEVICE +#endif USE MODD_VAR_ll , ONLY : TCRRT_COMDATA USE MODD_CONFZ , ONLY : NZ_SPLITTING ! for debug IZ=1=flat_inv; IZ=2=flat_invz ; IZ=1+2=the two ! @@ -1403,28 +1522,52 @@ CONTAINS INTEGER :: KINFO ! return status ! !------------------------------------------------------------------------------- + +!$acc data present( PFIELDIN, PFIELDOUT ) + ! !* 1. UPDATE THE ZONES NOT SENT OR RECEIVED BY THE PROCESSOR ITSELF ! ------------------------------------------------------------- ! IF ( IAND(NZ_SPLITTING,4) .EQ. 0 ) THEN +#ifndef MNH_OPENACC CALL SEND_RECV_FIELD(TCRRT_COMDATA%TSEND_SXP2_Y_ZP1_SXP2_YP1_Z, & TCRRT_COMDATA%TRECV_SXP2_Y_ZP1_SXP2_YP1_Z, & PFIELDIN, PFIELDOUT, KINFO) +#else + CALL SEND_RECV_FIELD_DEVICE(TCRRT_COMDATA%TSEND_SXP2_Y_ZP1_SXP2_YP1_Z, & + TCRRT_COMDATA%TRECV_SXP2_Y_ZP1_SXP2_YP1_Z, & + PFIELDIN, PFIELDOUT, KINFO) +#endif ! !------------------------------------------------------------------------------- ! !* 2. UPDATE THE ZONES THE PROCESSOR SENDS OR RECEIVED FROM ITSELF ! ------------------------------------------------------------ ! +#ifndef MNH_OPENACC CALL COPY_CRSPD_TRANS(TCRRT_COMDATA%TSEND_SXP2_Y_ZP1_SXP2_YP1_Z, & TCRRT_COMDATA%TRECV_SXP2_Y_ZP1_SXP2_YP1_Z, & PFIELDIN, PFIELDOUT, KINFO) +#else + CALL COPY_CRSPD_TRANS_DEVICE(TCRRT_COMDATA%TSEND_SXP2_Y_ZP1_SXP2_YP1_Z, & + TCRRT_COMDATA%TRECV_SXP2_Y_ZP1_SXP2_YP1_Z, & + PFIELDIN, PFIELDOUT, KINFO) +#endif ELSE +#ifndef MNH_OPENACC CALL ALL_SEND_RECV(TCRRT_COMDATA%TSEND_BOX_SXP2_Y_ZP1_SXP2_YP1_Z, & TCRRT_COMDATA%TRECV_BOX_SXP2_Y_ZP1_SXP2_YP1_Z, & PFIELDIN, PFIELDOUT, KINFO) +#else + CALL ALL_SEND_RECV_DEVICE(TCRRT_COMDATA%TSEND_BOX_SXP2_Y_ZP1_SXP2_YP1_Z, & + TCRRT_COMDATA%TRECV_BOX_SXP2_Y_ZP1_SXP2_YP1_Z, & + PFIELDIN, PFIELDOUT, KINFO) +#endif ENDIF + +!$acc end data + ! !------------------------------------------------------------------------------- ! @@ -1519,7 +1662,11 @@ CONTAINS ! !* 0. DECLARATIONS ! +#ifndef MNH_OPENACC USE MODE_EXCHANGE_ll, ONLY : SEND_RECV_FIELD, COPY_CRSPD_TRANS +#else + USE MODE_EXCHANGE_ll, ONLY : SEND_RECV_FIELD_DEVICE, COPY_CRSPD_TRANS_DEVICE +#endif USE MODD_VAR_ll , ONLY : TCRRT_COMDATA USE MODD_CONFZ , ONLY : NZ_SPLITTING ! for debug IZ=1=flat_inv; IZ=2=flat_invz ; IZ=1+2=the two ! @@ -1532,28 +1679,52 @@ CONTAINS INTEGER :: KINFO ! return status ! !------------------------------------------------------------------------------- + +!$acc data present( PFIELDIN, PFIELDOUT ) + ! !* 1. UPDATE THE ZONES NOT SENT OR RECEIVED BY THE PROCESSOR ITSELF ! ------------------------------------------------------------- ! IF ( IAND(NZ_SPLITTING,4) .EQ. 0 ) THEN +#ifndef MNH_OPENACC CALL SEND_RECV_FIELD(TCRRT_COMDATA%TRECV_SXP2_Y_ZP1_SXP2_YP1_Z, & TCRRT_COMDATA%TSEND_SXP2_Y_ZP1_SXP2_YP1_Z, & PFIELDIN, PFIELDOUT, KINFO) +#else + CALL SEND_RECV_FIELD_DEVICE(TCRRT_COMDATA%TRECV_SXP2_Y_ZP1_SXP2_YP1_Z, & + TCRRT_COMDATA%TSEND_SXP2_Y_ZP1_SXP2_YP1_Z, & + PFIELDIN, PFIELDOUT, KINFO) +#endif ! !------------------------------------------------------------------------------- ! !* 2. UPDATE THE ZONES THE PROCESSOR SENDS OR RECEIVED FROM ITSELF ! ------------------------------------------------------------ ! +#ifndef MNH_OPENACC CALL COPY_CRSPD_TRANS(TCRRT_COMDATA%TRECV_SXP2_Y_ZP1_SXP2_YP1_Z, & TCRRT_COMDATA%TSEND_SXP2_Y_ZP1_SXP2_YP1_Z, & PFIELDIN, PFIELDOUT, KINFO) +#else + CALL COPY_CRSPD_TRANS_DEVICE(TCRRT_COMDATA%TRECV_SXP2_Y_ZP1_SXP2_YP1_Z, & + TCRRT_COMDATA%TSEND_SXP2_Y_ZP1_SXP2_YP1_Z, & + PFIELDIN, PFIELDOUT, KINFO) +#endif ELSE +#ifndef MNH_OPENACC CALL ALL_SEND_RECV(TCRRT_COMDATA%TRECV_BOX_SXP2_Y_ZP1_SXP2_YP1_Z, & TCRRT_COMDATA%TSEND_BOX_SXP2_Y_ZP1_SXP2_YP1_Z, & PFIELDIN, PFIELDOUT, KINFO) +#else + CALL ALL_SEND_RECV_DEVICE(TCRRT_COMDATA%TRECV_BOX_SXP2_Y_ZP1_SXP2_YP1_Z, & + TCRRT_COMDATA%TSEND_BOX_SXP2_Y_ZP1_SXP2_YP1_Z, & + PFIELDIN, PFIELDOUT, KINFO) +#endif END IF + +!$acc end data + ! !------------------------------------------------------------------------------- ! @@ -1859,6 +2030,122 @@ END FUNCTION LSOUTHZ_ll DEALLOCATE(ZSEND,ZRECV) END SUBROUTINE ALL_SEND_RECV +#ifdef MNH_OPENACC + SUBROUTINE ALL_SEND_RECV_DEVICE(TSEND_BOX_FROM,TRECV_BOX_TO, & + PFIELDIN, PFIELDOUT, KINFO) + ! + use modd_precision, only: MNHREAL_MPI + USE MODD_STRUCTURE_ll, ONLY: BOX_ll + USE MODD_VAR_ll, ONLY: NMNH_COMM_WORLD + +#ifdef MNH_OPENACC + USE MODE_MNH_ZWORK, ONLY: MNH_MEM_GET, MNH_MEM_POSITION_PIN, MNH_MEM_RELEASE +#endif + ! + IMPLICIT NONE + ! + ! Argument + ! + TYPE(BOX_ll) , POINTER :: TSEND_BOX_FROM,TRECV_BOX_TO + REAL, DIMENSION(:,:,:), INTENT(INOUT) :: PFIELDIN ! field to be sent + REAL, DIMENSION(:,:,:), INTENT(INOUT) :: PFIELDOUT ! reception field + INTEGER :: KINFO ! return status + ! + ! local var + ! + INTEGER :: JB,JI,JJ,JK, JCNT ! loop + INTEGER :: JCNT0 +#ifndef MNH_OPENACC + REAL, DIMENSION(:), ALLOCATABLE :: ZSEND ! buffer to be sent + REAL, DIMENSION(:), ALLOCATABLE :: ZRECV ! buffer to be recv +#else + REAL, DIMENSION(:), POINTER, CONTIGUOUS :: ZSEND ! buffer to be sent + REAL, DIMENSION(:), POINTER, CONTIGUOUS :: ZRECV ! buffer to be recv +#endif + ! + +!$acc data present( PFIELDIN, PFIELDOUT ) + +#ifndef MNH_OPENACC + ALLOCATE(ZSEND(TSEND_BOX_FROM%NSIZE)) + ALLOCATE(ZRECV(TRECV_BOX_TO%NSIZE)) +#else + !Pin positions in the pools of MNH memory + CALL MNH_MEM_POSITION_PIN() + + CALL MNH_MEM_GET( ZSEND, TSEND_BOX_FROM%NSIZE ) + CALL MNH_MEM_GET( ZRECV, TRECV_BOX_TO%NSIZE ) +#endif + ! +!$acc update self( PFIELDIN ) +!acc kernels present( ZSEND ) + JCNT = 0 +JCNT0 = 0 + DO JB = 1, TSEND_BOX_FROM%NBOX + IF ( TSEND_BOX_FROM%NCNT(JB) .NE. 0 ) THEN +!acc kernels present( ZSEND ) + DO JK=TSEND_BOX_FROM%NZOR(JB),TSEND_BOX_FROM%NZEND(JB) +! JCNT0 = JCNT + DO JJ=TSEND_BOX_FROM%NYOR(JB),TSEND_BOX_FROM%NYEND(JB) +!acc kernels present( ZSEND ) + DO JI=TSEND_BOX_FROM%NXOR(JB),TSEND_BOX_FROM%NXEND(JB) +! JCNT = JCNT0 + ( JJ - TSEND_BOX_FROM%NYOR(JB) ) * (TSEND_BOX_FROM%NXEND(JB)-TSEND_BOX_FROM%NXOR(JB)+1) & +! + JI - TSEND_BOX_FROM%NXOR(JB) + 1 + JCNT = JCNT + 1 + ZSEND(JCNT) = PFIELDIN(JI,JJ,JK) + END DO +!acc end kernels + END DO + JCNT0 = JCNT0 + ( TSEND_BOX_FROM%NXEND(JB) - TSEND_BOX_FROM%NXOR(JB) + 1 ) & + * ( TSEND_BOX_FROM%NYEND(JB) - TSEND_BOX_FROM%NYOR(JB) + 1 ) + END DO +!acc end kernels + END IF + END DO +!acc end kernels +!$acc update device( ZSEND ) + ! +!$acc host_data use_device( ZSEND, ZRECV ) + CALL mpi_alltoallv(ZSEND,TSEND_BOX_FROM%NCNT,TSEND_BOX_FROM%NSTRT,MNHREAL_MPI,& + ZRECV,TRECV_BOX_TO%NCNT ,TRECV_BOX_TO%NSTRT ,MNHREAL_MPI,& + TSEND_BOX_FROM%NCOM,KINFO) +!$acc end host_data + ! +!$acc update self( ZRECV ) +!acc kernels present( ZRECV ) + JCNT = 0 + PFIELDOUT = 0.0 + DO JB = 1, TRECV_BOX_TO%NBOX + IF ( TRECV_BOX_TO%NCNT(JB) .NE. 0 ) THEN +!acc kernels present( ZRECV ) + DO JK=TRECV_BOX_TO%NZOR(JB),TRECV_BOX_TO%NZEND(JB) + DO JJ=TRECV_BOX_TO%NYOR(JB),TRECV_BOX_TO%NYEND(JB) +!acc kernels present( ZRECV ) + DO JI=TRECV_BOX_TO%NXOR(JB),TRECV_BOX_TO%NXEND(JB) + JCNT = JCNT + 1 + PFIELDOUT(JI,JJ,JK) = ZRECV(JCNT) + END DO +!acc end kernels + END DO + END DO +!acc end kernels + END IF + END DO +!acc end kernels +!$acc update device( PFIELDOUT ) + ! +#ifndef MNH_OPENACC + DEALLOCATE(ZSEND,ZRECV) +#else + !Release all memory allocated with MNH_MEM_GET calls since last call to MNH_MEM_POSITION_PIN + CALL MNH_MEM_RELEASE() +#endif + +!$acc end data + + END SUBROUTINE ALL_SEND_RECV_DEVICE +#endif + ! ########################################## SUBROUTINE GET_ORZ_ll( HSPLIT, KXOR, KYOR ) ! ########################################## diff --git a/src/MNH/compute_spectre.f90 b/src/MNH/compute_spectre.f90 index fe9b93f5a3d0e5e86d42fa611db5e67fcd69e011..294453d7868b757c5bfd1dcbafcf8549dd9b493a 100644 --- a/src/MNH/compute_spectre.f90 +++ b/src/MNH/compute_spectre.f90 @@ -150,6 +150,9 @@ INTEGER :: IRESP ! return code in FM routines REAL :: ZMOY_C, ZMOY_S, ZVAR_C, ZVAR_S, ZVAR_S2 !computation of statistical moments !------------------------------------------------------------------------------- ! +#ifdef MNH_OPENACC +CALL PRINT_MSG(NVERB_FATAL,'GEN','COMPUTE_SPECTRE','OpenACC: not yet ported') +#endif ILUOUT = TLUOUT%NLU ! !* 1. COMPUTE LOOP BOUNDS diff --git a/src/MNH/conjgrad.f90 b/src/MNH/conjgrad.f90 index 1e290402211817736c2c73ec41206dbf3046037a..c38478a7f0c16fea7b06d7ca1a24578e60f4d7f6 100644 --- a/src/MNH/conjgrad.f90 +++ b/src/MNH/conjgrad.f90 @@ -130,6 +130,11 @@ END MODULE MODI_CONJGRAD !* 0. DECLARATIONS ! ------------ ! +#ifdef MNH_OPENACC +USE MODE_MNH_ZWORK, ONLY: MNH_MEM_GET, MNH_MEM_POSITION_PIN, MNH_MEM_RELEASE +#endif +USE MODE_MPPDB + USE MODI_DOTPROD USE MODI_FLAT_INV USE MODI_QLAP @@ -185,6 +190,7 @@ REAL, DIMENSION(:,:,:), INTENT(INOUT) :: PPHI ! solution of the equation ! INTEGER :: JM ! loop index ! +#ifndef MNH_OPENACC REAL, DIMENSION(SIZE(PPHI,1),SIZE(PPHI,2),SIZE(PPHI,3)) :: ZDELTA ! array containing the auxilary field DELTA of the CG method ! @@ -199,18 +205,72 @@ REAL, DIMENSION(SIZE(PPHI,1),SIZE(PPHI,2),SIZE(PPHI,3)) :: ZWORKD ! work ! REAL, DIMENSION(SIZE(PPHI,1),SIZE(PPHI,2),SIZE(PPHI,3)) :: ZWORKP ! work ! array containing the result of the F inversion * Q (P) +#else +REAL, DIMENSION(:,:,:), pointer , contiguous :: ZDELTA ! array containing the auxilary field DELTA of the CG method +REAL, DIMENSION(:,:,:), pointer , contiguous :: ZP ! array containing the auxilary field P of the CG method +REAL, DIMENSION(:,:,:), pointer , contiguous :: ZWORK ! work ! array containing the source term to be multiplied by the F inverse +REAL, DIMENSION(:,:,:), pointer , contiguous :: ZWORKD ! work array containing the result of the F inversion * Q (DELTA) +REAL, DIMENSION(:,:,:), pointer , contiguous :: ZWORKP ! work array containing the result of the F inversion * Q (P) +#endif ! REAL :: ZALPHA, ZLAMBDA ! amplitude of the descent in the Conjugate ! directions REAL :: ZDOTPP ! dot product of ZWORKP by itself -! +#ifdef MNH_OPENACC +INTEGER :: JIU,JJU,JKU +#endif + !------------------------------------------------------------------------------- + + +!$acc data present( PDXX, PDYY, PDZX, PDZY, PDZZ, PRHODJ, PTHETAV ) & +!$acc & present( PRHOM, PAF, PBF, PCF, PTRIGSX, PTRIGSY, KIFAXX, KIFAXY, PY ) & +!$acc & present( PPHI ) + +IF (MPPDB_INITIALIZED) THEN + !Check all IN arrays + CALL MPPDB_CHECK(PDXX,"CONJGRAD beg:PDXX") + CALL MPPDB_CHECK(PDYY,"CONJGRAD beg:PDYY") + CALL MPPDB_CHECK(PDZX,"CONJGRAD beg:PDZX") + CALL MPPDB_CHECK(PDZY,"CONJGRAD beg:PDZY") + CALL MPPDB_CHECK(PDZZ,"CONJGRAD beg:PDZZ") + CALL MPPDB_CHECK(PRHODJ,"CONJGRAD beg:PRHODJ") + CALL MPPDB_CHECK(PTHETAV,"CONJGRAD beg:PTHETAV") + CALL MPPDB_CHECK(PRHOM,"CONJGRAD beg:PRHOM") + CALL MPPDB_CHECK(PAF,"CONJGRAD beg:PAF") + CALL MPPDB_CHECK(PBF,"CONJGRAD beg:PBF") + CALL MPPDB_CHECK(PCF,"CONJGRAD beg:PCF") + CALL MPPDB_CHECK(PTRIGSX,"CONJGRAD beg:PTRIGSX") + CALL MPPDB_CHECK(PTRIGSY,"CONJGRAD beg:PTRIGSY") + CALL MPPDB_CHECK(KIFAXX,"CONJGRAD beg:KIFAXX") + CALL MPPDB_CHECK(KIFAXY,"CONJGRAD beg:KIFAXY") + CALL MPPDB_CHECK(PY,"CONJGRAD beg:PY") + !Check all INOUT arrays + CALL MPPDB_CHECK(PPHI,"CONJGRAD beg:PPHI") +END IF ! !* 1. INITIALIZATIONS ! --------------- ! +#ifdef MNH_OPENACC +JIU = size(PPHI, 1 ) +JJU = size(PPHI, 2 ) +JKU = size(PPHI, 3 ) + +!Pin positions in the pools of MNH memory +CALL MNH_MEM_POSITION_PIN() + +CALL MNH_MEM_GET( ZDELTA, JIU, JJU, JKU ) +CALL MNH_MEM_GET( ZP, JIU, JJU, JKU ) +CALL MNH_MEM_GET( ZWORK, JIU, JJU, JKU ) +CALL MNH_MEM_GET( ZWORKD, JIU, JJU, JKU ) +CALL MNH_MEM_GET( ZWORKP, JIU, JJU, JKU ) +#endif + ZLAMBDA = 0. +!$acc kernels ZP(:,:,:) = 0. +!$acc end kernels ! !------------------------------------------------------------------------------- ! @@ -221,15 +281,24 @@ DO JM = 1,KITR ! !* 2.1 compute the new pressure function ! +!$acc kernels PPHI(:,:,:) = PPHI(:,:,:) + ZLAMBDA * ZP(:,:,:) ! the case JM =0 is special because PPHI is not changed +!$acc end kernels ! !* 2.2 compute the auxiliary field DELTA ! ! -1 ! compute the vector DELTA = F * ( Y - Q ( PHI ) ) ! +#ifndef MNH_OPENACC ZWORK(:,:,:) = QLAP(HLBCX,HLBCY,PDXX,PDYY,PDZX,PDZY,PDZZ,PRHODJ,PTHETAV,PPHI) ! Q (PHI) +#else + CALL QLAP_DEVICE( ZWORK, HLBCX, HLBCY, PDXX, PDYY, PDZX, PDZY, PDZZ, PRHODJ, PTHETAV, PPHI ) ! Q (PHI) +#endif +! +!$acc kernels ZWORK(:,:,:) = PY(:,:,:) - ZWORK(:,:,:) ! Y - Q (PHI) +!$acc end kernels ! CALL FLAT_INV(HLBCX,HLBCY,PDXHATM,PDYHATM,PRHOM,PAF,PBF,PCF, &! -1 PTRIGSX,PTRIGSY,KIFAXX,KIFAXY,ZWORK,ZDELTA) ! F (Y - Q (PHI))) @@ -240,21 +309,33 @@ DO JM = 1,KITR ! compute the vector P = DELTA + alpha F * Q ( DELTA ) ! IF (JM == 1) THEN +!$acc kernels ZP(:,:,:) = ZDELTA(:,:,:) ! P = DELTA at the first solver iteration +!$acc end kernels ELSE +#ifndef MNH_OPENACC ZWORK(:,:,:) = QLAP(HLBCX,HLBCY,PDXX,PDYY,PDZX,PDZY,PDZZ,PRHODJ,PTHETAV,ZDELTA) ! Q ( DELTA ) +#else + CALL QLAP_DEVICE( ZWORK, HLBCX, HLBCY, PDXX, PDYY, PDZX, PDZY, PDZZ, PRHODJ, PTHETAV, ZDELTA ) ! Q ( DELTA ) +#endif CALL FLAT_INV(HLBCX,HLBCY,PDXHATM,PDYHATM,PRHOM,PAF,PBF,PCF, & ! -1 PTRIGSX,PTRIGSY,KIFAXX,KIFAXY,ZWORK,ZWORKD) ! F * Q ( DELTA ) ! ZALPHA = - DOTPROD(ZWORKD,ZWORKP,HLBCX,HLBCY)/ZDOTPP ! ZWORKP,ZDOTPP come from the previous solver iteration (section 2.4) +!$acc kernels ZP(:,:,:) = ZDELTA(:,:,:) + ZALPHA * ZP(:,:,:) ! new vector P +!$acc end kernels ! END IF ! !* 2.4 compute LAMBDA ! ! +#ifndef MNH_OPENACC ZWORK(:,:,:) = QLAP(HLBCX,HLBCY,PDXX,PDYY,PDZX,PDZY,PDZZ,PRHODJ,PTHETAV,ZP) ! Q ( P ) +#else + CALL QLAP_DEVICE( ZWORK, HLBCX, HLBCY, PDXX, PDYY, PDZX, PDZY, PDZZ, PRHODJ, PTHETAV, ZP ) ! Q ( P ) +#endif CALL FLAT_INV(HLBCX,HLBCY,PDXHATM,PDYHATM,PRHOM,PAF,PBF,PCF,& ! -1 PTRIGSX,PTRIGSY,KIFAXX,KIFAXY,ZWORK,ZWORKP) ! F * Q ( P ) ! @@ -271,7 +352,22 @@ END DO ! end of the loop for the iterative solver !* 3. COMPUTE THE FINAL PRESSURE FUNCTION ! ----------------------------------- ! +!$acc kernels PPHI(:,:,:) = PPHI(:,:,:) + ZLAMBDA * ZP(:,:,:) +!$acc end kernels ! !------------------------------------------------------------------------------- + +#ifdef MNH_OPENACC +!Release all memory allocated with MNH_MEM_GET calls since last call to MNH_MEM_POSITION_PIN +CALL MNH_MEM_RELEASE() +#endif + +IF (MPPDB_INITIALIZED) THEN + !Check all INOUT arrays + CALL MPPDB_CHECK(PPHI,"CONJGRAD end:PPHI") +END IF + +!$acc end data + END SUBROUTINE CONJGRAD diff --git a/src/MNH/conresol.f90 b/src/MNH/conresol.f90 index 02d322ad7bdaa7d0a1c3c76b22f4642e7484af86..61ae8417aa1a7f3ebc75644c4ca936b9e12c9e50 100644 --- a/src/MNH/conresol.f90 +++ b/src/MNH/conresol.f90 @@ -129,6 +129,11 @@ END MODULE MODI_CONRESOL !* 0. DECLARATIONS ! ------------ ! +#ifdef MNH_OPENACC +USE MODE_MNH_ZWORK, ONLY: MNH_MEM_GET, MNH_MEM_POSITION_PIN, MNH_MEM_RELEASE +#endif +USE MODE_MPPDB +! USE MODI_DOTPROD USE MODI_FLAT_INV USE MODI_QLAP @@ -184,26 +189,83 @@ REAL, DIMENSION(:,:,:), INTENT(INOUT) :: PPHI ! solution of the equation ! INTEGER :: JM ! loop index ! +#ifndef MNH_OPENACC REAL, DIMENSION(SIZE(PPHI,1),SIZE(PPHI,2),SIZE(PPHI,3)) :: ZDELTA, ZKSI ! array containing the auxilary fields DELTA and KSI of the CR method REAL, DIMENSION(SIZE(PPHI,1),SIZE(PPHI,2),SIZE(PPHI,3)) :: ZP, ZQ ! array containing the auxilary fields P and Q of the CR method REAL, DIMENSION(SIZE(PPHI,1),SIZE(PPHI,2),SIZE(PPHI,3)) :: ZRESIDUE ! array containing the error field at each iteration Q(PHI) - Y +#else +REAL, DIMENSION(:,:,:), pointer , contiguous :: ZDELTA, ZKSI ! array containing the auxilary fields DELTA and KSI of the CR method +REAL, DIMENSION(:,:,:), pointer , contiguous :: ZP, ZQ ! array containing the auxilary fields P and Q of the CR method +REAL, DIMENSION(:,:,:), pointer , contiguous :: ZRESIDUE ! array containing the error field at each iteration Q(PHI) - Y +#endif ! REAL :: ZALPHA, ZLAMBDA ! amplitude of the descent in the Conjugate ! directions REAL :: ZDOT_DELTA ! dot product of ZDELTA by itself +#ifdef MNH_OPENACC +INTEGER :: JIU,JJU,JKU +#endif ! !------------------------------------------------------------------------------- + +!$acc data present( PDXX, PDYY, PDZX, PDZY, PDZZ, PRHODJ, PTHETAV ) & +!$acc & present( PRHOM, PAF, PBF, PCF, PTRIGSX, PTRIGSY, KIFAXX, KIFAXY, PY ) & +!$acc & present( PPHI ) + +IF (MPPDB_INITIALIZED) THEN + !Check all IN arrays + CALL MPPDB_CHECK(PDXX,"CRESI beg:PDXX") + CALL MPPDB_CHECK(PDYY,"CRESI beg:PDYY") + CALL MPPDB_CHECK(PDZX,"CRESI beg:PDZX") + CALL MPPDB_CHECK(PDZY,"CRESI beg:PDZY") + CALL MPPDB_CHECK(PDZZ,"CRESI beg:PDZZ") + CALL MPPDB_CHECK(PRHODJ,"CRESI beg:PRHODJ") + CALL MPPDB_CHECK(PTHETAV,"CRESI beg:PTHETAV") + CALL MPPDB_CHECK(PRHOM,"CRESI beg:PRHOM") + CALL MPPDB_CHECK(PAF,"CRESI beg:PAF") + CALL MPPDB_CHECK(PBF,"CRESI beg:PBF") + CALL MPPDB_CHECK(PCF,"CRESI beg:PCF") + CALL MPPDB_CHECK(PTRIGSX,"CRESI beg:PTRIGSX") + CALL MPPDB_CHECK(PTRIGSY,"CRESI beg:PTRIGSY") + CALL MPPDB_CHECK(KIFAXX,"CRESI beg:KIFAXX") + CALL MPPDB_CHECK(KIFAXY,"CRESI beg:KIFAXY") + CALL MPPDB_CHECK(PY,"CRESI beg:PY") + !Check all INOUT arrays + CALL MPPDB_CHECK(PPHI,"CRESI beg:PPHI") +END IF ! !* 1. INITIALIZATIONS ! --------------- ! -! +#ifdef MNH_OPENACC +JIU = size(PPHI, 1 ) +JJU = size(PPHI, 2 ) +JKU = size(PPHI, 3 ) + +!Pin positions in the pools of MNH memory +CALL MNH_MEM_POSITION_PIN() + +CALL MNH_MEM_GET( ZDELTA, JIU, JJU, JKU ) +CALL MNH_MEM_GET( ZKSI, JIU, JJU, JKU ) +CALL MNH_MEM_GET( ZP, JIU, JJU, JKU ) +CALL MNH_MEM_GET( ZQ, JIU, JJU, JKU ) +CALL MNH_MEM_GET( ZRESIDUE, JIU, JJU, JKU ) +#endif + +! !* 1.1 compute the vector: r^(0) = Q(PHI) - Y ! +#ifndef MNH_OPENACC ZRESIDUE(:,:,:) = QLAP(HLBCX,HLBCY,PDXX,PDYY,PDZX,PDZY,PDZZ,PRHODJ,PTHETAV,PPHI) - PY(:,:,:) +#else +CALL QLAP_DEVICE(ZRESIDUE,HLBCX,HLBCY,PDXX,PDYY,PDZX,PDZY,PDZZ,PRHODJ,PTHETAV,PPHI) +!$acc kernels present( PY, ZRESIDUE ) +ZRESIDUE(:,:,:) = ZRESIDUE(:,:,:) - PY(:,:,:) +!$acc end kernels +#endif ! !* 1.2 compute the vector: p^(0) = F^(-1)*( Q(PHI) - Y ) ! @@ -212,7 +274,11 @@ CALL FLAT_INV(HLBCX,HLBCY,PDXHATM,PDYHATM,PRHOM,PAF,PBF,PCF, & ! !* 1.3 compute the vector: delta^(0) = Q ( p^(0) ) ! +#ifndef MNH_OPENACC ZDELTA = QLAP(HLBCX,HLBCY,PDXX,PDYY,PDZX,PDZY,PDZZ,PRHODJ,PTHETAV,ZP) +#else +CALL QLAP_DEVICE(ZDELTA,HLBCX,HLBCY,PDXX,PDYY,PDZX,PDZY,PDZZ,PRHODJ,PTHETAV,ZP) +#endif ! !------------------------------------------------------------------------------- ! @@ -228,7 +294,9 @@ DO JM = 1,KITR ! !* 2.2 update the pressure function PHI ! +!$acc kernels PPHI(:,:,:) = PPHI(:,:,:) + ZLAMBDA * ZP(:,:,:) +!$acc end kernels ! ! IF( JM == KITR ) EXIT @@ -236,7 +304,9 @@ DO JM = 1,KITR ! !* 2.3 update the residual error: r ! +!$acc kernels ZRESIDUE(:,:,:) = ZRESIDUE(:,:,:) + ZLAMBDA * ZDELTA(:,:,:) +!$acc end kernels ! !* 2.4 compute the vector: q = F^(-1)*( Q(PHI) - Y ) ! @@ -245,7 +315,11 @@ DO JM = 1,KITR ! !* 2.5 compute the auxiliary field: ksi = Q ( q ) ! +#ifndef MNH_OPENACC ZKSI= QLAP(HLBCX,HLBCY,PDXX,PDYY,PDZX,PDZY,PDZZ,PRHODJ,PTHETAV,ZQ) +#else + CALL QLAP_DEVICE(ZKSI,HLBCX,HLBCY,PDXX,PDYY,PDZX,PDZY,PDZZ,PRHODJ,PTHETAV,ZQ) +#endif ! -1 !* 2.6 compute the step ALPHA ! @@ -253,12 +327,25 @@ DO JM = 1,KITR ! !* 2.7 update p and DELTA ! +!$acc kernels ZP(:,:,:) = ZQ(:,:,:) + ZALPHA * ZP(:,:,:) ZDELTA(:,:,:) = ZKSI(:,:,:) + ZALPHA * ZDELTA(:,:,:) +!$acc end kernels ! END DO ! end of the loop for the iterative solver ! -! +#ifdef MNH_OPENACC +!Release all memory allocated with MNH_MEM_GET calls since last call to MNH_MEM_POSITION_PIN +CALL MNH_MEM_RELEASE() +#endif + +IF (MPPDB_INITIALIZED) THEN + !Check all INOUT arrays + CALL MPPDB_CHECK(PPHI,"CONRESOL end:PPHI") +END IF + +!$acc end data + !------------------------------------------------------------------------------- ! END SUBROUTINE CONRESOL diff --git a/src/MNH/conresolz.f90 b/src/MNH/conresolz.f90 index e6d50b07f4710711d416072241d50dfa565228e0..ad63b26a572ca49ca7a8db16cc178ad1753f39fe 100644 --- a/src/MNH/conresolz.f90 +++ b/src/MNH/conresolz.f90 @@ -139,6 +139,11 @@ END MODULE MODI_CONRESOLZ !* 0. DECLARATIONS ! ------------ ! +#ifdef MNH_OPENACC +USE MODE_MNH_ZWORK, ONLY: MNH_MEM_GET, MNH_MEM_POSITION_PIN, MNH_MEM_RELEASE +#endif +USE MODE_MPPDB +! USE MODI_DOTPROD USE MODI_FLAT_INVZ USE MODI_QLAP @@ -200,39 +205,110 @@ REAL, DIMENSION(:,:,:), INTENT(IN) :: PBF_SXP2_YP1_Z ! elements of the tri ! INTEGER :: JM ! loop index ! +#ifndef MNH_OPENACC REAL, DIMENSION(SIZE(PPHI,1),SIZE(PPHI,2),SIZE(PPHI,3)) :: ZDELTA, ZKSI ! array containing the auxilary fields DELTA and KSI of the CR method REAL, DIMENSION(SIZE(PPHI,1),SIZE(PPHI,2),SIZE(PPHI,3)) :: ZP, ZQ ! array containing the auxilary fields P and Q of the CR method REAL, DIMENSION(SIZE(PPHI,1),SIZE(PPHI,2),SIZE(PPHI,3)) :: ZRESIDUE ! array containing the error field at each iteration Q(PHI) - Y +#else +REAL, DIMENSION(:,:,:), pointer , contiguous :: ZDELTA, ZKSI ! array containing the auxilary fields DELTA and KSI of the CR method +REAL, DIMENSION(:,:,:), pointer , contiguous :: ZP, ZQ ! array containing the auxilary fields P and Q of the CR method +REAL, DIMENSION(:,:,:), pointer , contiguous :: ZRESIDUE ! array containing the error field at each iteration Q(PHI) - Y +#endif ! REAL :: ZALPHA, ZLAMBDA ! amplitude of the descent in the Conjugate ! directions REAL :: ZDOT_DELTA ! dot product of ZDELTA by itself ! +#ifdef MNH_OPENACC +INTEGER :: JIU,JJU,JKU +#endif !------------------------------------------------------------------------------- + +!$acc data present( PPHI ) & +!$acc & present( PRHOM, PAF, PBF, PCF, PTRIGSX, PTRIGSY, KIFAXX, KIFAXY, PBFB, PBF_SXP2_YP1_Z, PTHETAV, PY ) & +!$acc & present( PDXX, PDYY, PDZX, PDZY, PDZZ,PRHODJ ) + +IF (MPPDB_INITIALIZED) THEN + !Check all IN arrays + CALL MPPDB_CHECK(PDXX,"CONRESOLZ beg:PDXX") + CALL MPPDB_CHECK(PDYY,"CONRESOLZ beg:PDYY") + CALL MPPDB_CHECK(PDZX,"CONRESOLZ beg:PDZX") + CALL MPPDB_CHECK(PDZY,"CONRESOLZ beg:PDZY") + CALL MPPDB_CHECK(PDZZ,"CONRESOLZ beg:PDZZ") + CALL MPPDB_CHECK(PRHODJ,"CONRESOLZ beg:PRHODJ") + CALL MPPDB_CHECK(PTHETAV,"CONRESOLZ beg:PTHETAV") + CALL MPPDB_CHECK(PBF,"CONRESOLZ beg:PBF") + CALL MPPDB_CHECK(PY,"CONRESOLZ beg:PY") + CALL MPPDB_CHECK(PBFB,"CONRESOLZ beg:PBFB") +! CALL MPPDB_CHECK(PBF_SXP2_YP1_Z,"CONRESOLZ beg:PBF_SXP2_YP1_Z") + !Check all INOUT arrays + CALL MPPDB_CHECK(PPHI,"CONRESOLZ beg:PPHI") +END IF ! !* 1. INITIALIZATIONS ! --------------- ! +#ifdef MNH_OPENACC +JIU = size(PPHI, 1 ) +JJU = size(PPHI, 2 ) +JKU = size(PPHI, 3 ) + +!Pin positions in the pools of MNH memory +CALL MNH_MEM_POSITION_PIN() + +CALL MNH_MEM_GET( ZDELTA , JIU, JJU, JKU ) +CALL MNH_MEM_GET( ZKSI , JIU, JJU, JKU ) +CALL MNH_MEM_GET( ZP , JIU, JJU, JKU ) +CALL MNH_MEM_GET( ZQ , JIU, JJU, JKU ) +CALL MNH_MEM_GET( ZRESIDUE , JIU, JJU, JKU ) +#endif ! !* 1.1 compute the vector: r^(0) = Q(PHI) - Y ! +#ifndef MNH_OPENACC ZRESIDUE(:,:,:) = QLAP(HLBCX,HLBCY,PDXX,PDYY,PDZX,PDZY,PDZZ,PRHODJ,PTHETAV,PPHI) - PY(:,:,:) +#else +CALL QLAP_DEVICE(ZRESIDUE,HLBCX,HLBCY,PDXX,PDYY,PDZX,PDZY,PDZZ,PRHODJ,PTHETAV,PPHI) +!$acc kernels present( PY, ZRESIDUE ) +ZRESIDUE(:,:,:) = ZRESIDUE(:,:,:) - PY(:,:,:) +!$acc end kernels +#endif ! !* 1.2 compute the vector: p^(0) = F^(-1)*( Q(PHI) - Y ) ! +IF (MPPDB_INITIALIZED) THEN + CALL MPPDB_CHECK(PY,"CONRESOLZ bef FLAT_INVZ:PY") + + CALL MPPDB_CHECK(PRHOM,"CONRESOLZ bef FLAT_INVZ:PRHOM") + CALL MPPDB_CHECK(PAF,"CONRESOLZ bef FLAT_INVZ:PAF") + CALL MPPDB_CHECK(PBF,"CONRESOLZ bef FLAT_INVZ:PBF") + CALL MPPDB_CHECK(PCF,"CONRESOLZ bef FLAT_INVZ:PCF") + CALL MPPDB_CHECK(PTRIGSX,"CONRESOLZ bef FLAT_INVZ:PTRIGSX") + CALL MPPDB_CHECK(PTRIGSY,"CONRESOLZ bef FLAT_INVZ:PTRIGSY") + CALL MPPDB_CHECK(ZRESIDUE,"CONRESOLZ bef FLAT_INVZ:ZRESIDUE") + CALL MPPDB_CHECK(PBFB,"CONRESOLZ bef FLAT_INVZ:PBFB") +! CALL MPPDB_CHECK(PBF_SXP2_YP1_Z,"CONRESOLZ bef FLAT_INVZ:PBF_SXP2_YP1_Z") +END IF CALL FLAT_INVZ(HLBCX,HLBCY,PDXHATM,PDYHATM,PRHOM,PAF,PBF,PCF, & PTRIGSX,PTRIGSY,KIFAXX,KIFAXY,ZRESIDUE,ZP,& PBFB,& PBF_SXP2_YP1_Z) !JUAN Z_SPLITING +IF (MPPDB_INITIALIZED) THEN + CALL MPPDB_CHECK(ZP,"CONRESOLZ aft FLAT_INVZ:ZP") +END IF !JUAN print*, "size ZP=",SIZE(ZP) !JUAN print*, "size ZRESIDUE=",SIZE(ZRESIDUE) ! !* 1.3 compute the vector: delta^(0) = Q ( p^(0) ) ! +#ifndef MNH_OPENACC ZDELTA = QLAP(HLBCX,HLBCY,PDXX,PDYY,PDZX,PDZY,PDZZ,PRHODJ,PTHETAV,ZP) +#else +CALL QLAP_DEVICE(ZDELTA,HLBCX,HLBCY,PDXX,PDYY,PDZX,PDZY,PDZZ,PRHODJ,PTHETAV,ZP) +#endif ! !------------------------------------------------------------------------------- ! @@ -248,7 +324,9 @@ DO JM = 1,KITR ! !* 2.2 update the pressure function PHI ! +!$acc kernels PPHI(:,:,:) = PPHI(:,:,:) + ZLAMBDA * ZP(:,:,:) +!$acc end kernels ! ! IF( JM == KITR ) EXIT @@ -256,7 +334,9 @@ DO JM = 1,KITR ! !* 2.3 update the residual error: r ! +!$acc kernels ZRESIDUE(:,:,:) = ZRESIDUE(:,:,:) + ZLAMBDA * ZDELTA(:,:,:) +!$acc end kernels ! !* 2.4 compute the vector: q = F^(-1)*( Q(PHI) - Y ) ! @@ -267,7 +347,11 @@ CALL FLAT_INVZ(HLBCX,HLBCY,PDXHATM,PDYHATM,PRHOM,PAF,PBF,PCF, & ! !* 2.5 compute the auxiliary field: ksi = Q ( q ) ! +#ifndef MNH_OPENACC ZKSI= QLAP(HLBCX,HLBCY,PDXX,PDYY,PDZX,PDZY,PDZZ,PRHODJ,PTHETAV,ZQ) +#else + CALL QLAP_DEVICE(ZKSI,HLBCX,HLBCY,PDXX,PDYY,PDZX,PDZY,PDZZ,PRHODJ,PTHETAV,ZQ) +#endif ! -1 !* 2.6 compute the step ALPHA ! @@ -275,12 +359,25 @@ CALL FLAT_INVZ(HLBCX,HLBCY,PDXHATM,PDYHATM,PRHOM,PAF,PBF,PCF, & ! !* 2.7 update p and DELTA ! +!$acc kernels ZP(:,:,:) = ZQ(:,:,:) + ZALPHA * ZP(:,:,:) ZDELTA(:,:,:) = ZKSI(:,:,:) + ZALPHA * ZDELTA(:,:,:) +!$acc end kernels ! END DO ! end of the loop for the iterative solver ! -! +#ifdef MNH_OPENACC +!Release all memory allocated with MNH_MEM_GET calls since last call to MNH_MEM_POSITION_PIN +CALL MNH_MEM_RELEASE() +#endif + +!$acc end data + +IF (MPPDB_INITIALIZED) THEN + !Check all INOUT arrays + CALL MPPDB_CHECK(PPHI,"CONRESOLZ end:PPHI") +END IF +! !------------------------------------------------------------------------------- ! END SUBROUTINE CONRESOLZ diff --git a/src/MNH/dotprod.f90 b/src/MNH/dotprod.f90 index ac6e40652448c7995146f0d72a11a3d7882ad97a..2d0ac3e214d2307841b133bb563a5580581cda41 100644 --- a/src/MNH/dotprod.f90 +++ b/src/MNH/dotprod.f90 @@ -1,13 +1,8 @@ -!MNH_LIC Copyright 1994-2014 CNRS, Meteo-France and Universite Paul Sabatier +!MNH_LIC Copyright 1994-2022 CNRS, Meteo-France and Universite Paul Sabatier !MNH_LIC This is part of the Meso-NH software governed by the CeCILL-C licence -!MNH_LIC version 1. See LICENSE, CeCILL-C_V1-en.txt and CeCILL-C_V1-fr.txt +!MNH_LIC version 1. See LICENSE, CeCILL-C_V1-en.txt and CeCILL-C_V1-fr.txt !MNH_LIC for details. version 1. !----------------------------------------------------------------- -!--------------- special set of characters for RCS information -!----------------------------------------------------------------- -! $Source$ $Revision$ -! MASDEV4_7 solver 2006/05/18 13:07:25 -!----------------------------------------------------------------- ! ################### MODULE MODI_DOTPROD ! ################### @@ -41,13 +36,13 @@ END MODULE MODI_DOTPROD !! !! PURPOSE !! ------- -! The purpose of this function is to compute dot product of the vectors +! The purpose of this function is to compute dot product of the vectors ! stored in the arrays PA, PB. The elements of PA and PB are localized at ! mass points. ! !!** METHOD -!! ------ -!! The scalar product DOTPROD of 2 vectors A and B is defined by : +!! ------ +!! The scalar product DOTPROD of 2 vectors A and B is defined by : !! DOTPROD = SUM( A(i,j,k)* B(i,j,k) ) !! The bounds for the summation depend on the l.b.c. !! @@ -58,7 +53,7 @@ END MODULE MODI_DOTPROD !! IMPLICIT ARGUMENTS !! ------------------ !! Module MODD_PARAMETERS: declaration of parameter variables -!! JPHEXT, JPVEXT: define the number of marginal points out of the +!! JPHEXT, JPVEXT: define the number of marginal points out of the !! physical domain along horizontal and vertical directions respectively !! Module MODD_CONF: model configurations !! L2D: logical switch for 2D model version @@ -73,7 +68,7 @@ END MODULE MODI_DOTPROD !! !! MODIFICATIONS !! ------------- -!! Original 25/07/94 +!! Original 25/07/94 !! J.-P. Pinty 12/11/99 Parallelization !! !------------------------------------------------------------------------------- @@ -85,10 +80,15 @@ USE MODD_PARAMETERS USE MODD_CONF ! USE MODE_ll +#ifdef MNH_OPENACC +USE MODE_MNH_ZWORK, ONLY: MNH_MEM_GET, MNH_MEM_POSITION_PIN, MNH_MEM_RELEASE +#endif +USE MODE_MPPDB !JUAN USE MODE_REPRO_SUM !JUAN ! +! IMPLICIT NONE ! !* 0.1 Declarations of arguments and result @@ -96,8 +96,8 @@ IMPLICIT NONE ! REAL, DIMENSION(:,:,:), INTENT(IN) :: PA, PB ! input vectors ! -CHARACTER (LEN=4), DIMENSION(2), INTENT(IN) :: HLBCX ! x-direction LBC type -CHARACTER (LEN=4), DIMENSION(2), INTENT(IN) :: HLBCY ! y-direction LBC type +CHARACTER (LEN=4), DIMENSION(2), INTENT(IN) :: HLBCX ! x-direction LBC type +CHARACTER (LEN=4), DIMENSION(2), INTENT(IN) :: HLBCY ! y-direction LBC type ! REAL :: PDOTPROD ! dot product ! @@ -117,15 +117,24 @@ INTEGER :: ILBXB,ILBYB,ILBXE,ILBYE ! loop indices depending on the ! lateral boundary conditions ! INTEGER :: IINFO_ll -!JUAN16 +#ifndef MNH_OPENACC REAL, ALLOCATABLE, DIMENSION(:,:) :: ZDOTPROD -!JUAN16 +#else +REAL, POINTER, CONTIGUOUS, DIMENSION(:,:) :: ZDOTPROD +#endif ! !------------------------------------------------------------------------------- ! !* 1. COMPUTE LOOP BOUNDS -! ------------------- -! +!------------------- +if ( mppdb_initialized ) then + !Check all in arrays + call Mppdb_check( PA, "Dotprod beg:PA" ) + call Mppdb_check( PB, "Dotprod beg:PB" ) +end if + +!$acc data present( PA, PB ) + CALL GET_INDICE_ll(IIB,IJB,IIE,IJE) ! IKB=1+JPVEXT @@ -163,22 +172,39 @@ ELSE ! 2d version ILBYE = IJB ENDIF ! -!* 2. COMPUTE THE DOT PRODUCT +!* 2. COMPUTE THE DOT PRODUCT ! ----------------------- ! !JUAN16 +#ifndef MNH_OPENACC ALLOCATE(ZDOTPROD(ILBXB:ILBXE,ILBYB:ILBYE)) -ZDOTPROD = 0. -DO JK = IKB-1,IKE+1 - DO JJ = ILBYB,ILBYE - DO JI = ILBXB,ILBXE +#else +CALL MNH_MEM_POSITION_PIN() +CALL MNH_MEM_GET(ZDOTPROD, ILBXB,ILBXE ,ILBYB,ILBYE ) +#endif +!$acc kernels present(ZDOTPROD) +ZDOTPROD(:,:) = 0. +#ifdef MNH_COMPILER_NVHPC + !$acc loop independent collapse(2) +#endif + DO CONCURRENT (JI=ILBXB:ILBXE,JJ=ILBYB:ILBYE) + !$acc loop seq + DO JK = IKB-1,IKE+1 ZDOTPROD(JI,JJ) = ZDOTPROD(JI,JJ) + PA(JI,JJ,JK) * PB(JI,JJ,JK) END DO END DO -END DO +!$acc end kernels +!$acc update host(ZDOTPROD) PDOTPROD = SUM_DD_R2_ll(ZDOTPROD) !JUAN16 -! +#ifndef MNH_OPENACC +DEALLOCATE(ZDOTPROD) +#else +CALL MNH_MEM_RELEASE() +#endif + +!$acc end data + !------------------------------------------------------------------------------- ! END FUNCTION DOTPROD diff --git a/src/MNH/fft.f90 b/src/MNH/fft.f90 index e34b3765bf8daf623de6e9d041e253f65bd04869..4d7536c30873167f3b702ae5b9c04bd5f8f6c420 100644 --- a/src/MNH/fft.f90 +++ b/src/MNH/fft.f90 @@ -1,4 +1,4 @@ -!MNH_LIC Copyright 1994-2019 CNRS, Meteo-France and Universite Paul Sabatier +!MNH_LIC Copyright 1994-2022 CNRS, Meteo-France and Universite Paul Sabatier !MNH_LIC This is part of the Meso-NH software governed by the CeCILL-C licence !MNH_LIC version 1. See LICENSE, CeCILL-C_V1-en.txt and CeCILL-C_V1-fr.txt !MNH_LIC for details. version 1. @@ -85,6 +85,7 @@ END SUBROUTINE SET99 SUBROUTINE FFT991( A, WORK, TRIGS, IFAX, INC, JUMP, N, ILOT, ISIGN, KSZA, KSZW, KSZT ) + USE MODE_MPPDB IMPLICIT NONE @@ -141,6 +142,16 @@ SUBROUTINE FFT991( A, WORK, TRIGS, IFAX, INC, JUMP, N, ILOT, ISIGN, KSZA, KSZW, INTEGER :: II, JJ, IX, IZ INTEGER :: I0 + IF (MPPDB_INITIALIZED) THEN + !Check all IN arrays + CALL MPPDB_CHECK( TRIGS, "FFT991 beg:TRIGS" ) + CALL MPPDB_CHECK( IFAX, "FFT991 beg:IFAX" ) + !Check all INOUT arrays + CALL MPPDB_CHECK( A, "FFT991 beg:A" ) + END IF + +!$acc data present( A, WORK ) + ! Initialisation of WORK useful to compare results with MPPDB_CHECK (otherwise all values are not set by FFT991 WORK(:) = 0. @@ -163,20 +174,26 @@ SUBROUTINE FFT991( A, WORK, TRIGS, IFAX, INC, JUMP, N, ILOT, ISIGN, KSZA, KSZW, ISTART=1 DO NB=1,NBLOX IA=ISTART +!$acc kernels !CDIR NODEP !*vocl loop,novrec +!$acc loop independent private( I ) DO J=1,NVEX I = ISTART + ( J - 1 ) * JUMP A(I+INC)=0.5*A(I) END DO +!$acc end kernels IF ( MOD(N,2) == 0 ) THEN +!$acc kernels I0 = ISTART + N * INC !CDIR NODEP !*vocl loop,novrec +!$acc loop independent private( I ) DO J=1,NVEX I = I0 + ( J - 1 ) * JUMP A(I)=0.5*A(I) END DO +!$acc end kernels END IF IA=ISTART+INC ILA=1 @@ -207,6 +224,7 @@ SUBROUTINE FFT991( A, WORK, TRIGS, IFAX, INC, JUMP, N, ILOT, ISIGN, KSZA, KSZW, ! IF NECESSARY, COPY RESULTS BACK TO A ! ------------------------------------ IF ( MOD(NFAX,2) == 1 ) THEN +!$acc kernels IBASE=1 JBASE=IA DO JJ=1,NVEX @@ -220,12 +238,15 @@ SUBROUTINE FFT991( A, WORK, TRIGS, IFAX, INC, JUMP, N, ILOT, ISIGN, KSZA, KSZW, IBASE=IBASE+NX JBASE=JBASE+JUMP END DO +!$acc end kernels END IF ! ! FILL IN ZEROS AT END ! -------------------- +!$acc kernels A(ISTART+N*INC::JUMP) = 0.0 A(ISTART+(N+1)*INC::JUMP) = 0.0 +!$acc end kernels ! ISTART=ISTART+NVEX*JUMP NVEX=1020 @@ -266,6 +287,7 @@ SUBROUTINE FFT991( A, WORK, TRIGS, IFAX, INC, JUMP, N, ILOT, ISIGN, KSZA, KSZW, ! IF NECESSARY, COPY RESULTS BACK TO A ! ------------------------------------ IF ( MOD(NFAX,2) == 1 ) THEN +!$acc kernels IBASE=1 JBASE=IA DO JJ=1,NVEX @@ -279,19 +301,25 @@ SUBROUTINE FFT991( A, WORK, TRIGS, IFAX, INC, JUMP, N, ILOT, ISIGN, KSZA, KSZW, IBASE=IBASE+NX JBASE=JBASE+JUMP END DO +!$acc end kernels END IF ! ! SHIFT A(0) & FILL IN ZERO IMAG PARTS ! ------------------------------------ +!$acc kernels !CDIR NODEP !*vocl loop,novrec +!$acc loop independent private( I ) DO J=1,NVEX IX = ISTART + ( J - 1 ) * JUMP A(IX)=A(IX+INC) A(IX+INC)=0.0 END DO +!$acc end kernels IF ( MOD(N,2) == 0 ) THEN +!$acc kernels A(ISTART+(N+1)*INC::JUMP) = 0.0 +!$acc end kernels END IF ! ISTART=ISTART+NVEX*JUMP @@ -314,11 +342,22 @@ SUBROUTINE FFT991( A, WORK, TRIGS, IFAX, INC, JUMP, N, ILOT, ISIGN, KSZA, KSZW, ENDIF END IF +!$acc end data + + IF (MPPDB_INITIALIZED) THEN + !Check all INOUT arrays + CALL MPPDB_CHECK( A, "FFT991 end:A" ) + !Check all OUT arrays + CALL MPPDB_CHECK( WORK, "FFT991 end:WORK" ) + END IF END SUBROUTINE FFT991 SUBROUTINE RPASSM(A,B,C,D,TRIGS,INC1,INC2,INC3,INC4,ILOT,N,IFAC,ILA,IERR,KSZ1,KSZ2,KSZ3,KSZ4,KSZ5) +#ifdef MNH_OPENACC + USE MODE_MNH_ZWORK, ONLY: MNH_MEM_GET, MNH_MEM_POSITION_PIN, MNH_MEM_RELEASE +#endif IMPLICIT NONE @@ -355,7 +394,11 @@ SUBROUTINE RPASSM(A,B,C,D,TRIGS,INC1,INC2,INC3,INC4,ILOT,N,IFAC,ILA,IERR,KSZ1,KS ! !----------------------------------------------------------------------- ! +#ifndef MNH_OPENACC REAL A10(1020),A11(1020),A20(1020),A21(1020),B10(1020),B11(1020),B20(1020),B21(1020) +#else + REAL, DIMENSION(:), POINTER, CONTIGUOUS :: A10, A11, A20, A21, B10, B11, B20, B21 +#endif INTEGER :: M, IINK, JINK, JUMP, KSTOP INTEGER :: IBAD, IBASE, JBASE, IGO @@ -365,12 +408,30 @@ SUBROUTINE RPASSM(A,B,C,D,TRIGS,INC1,INC2,INC3,INC4,ILOT,N,IFAC,ILA,IERR,KSZ1,KS REAL :: C1, C2, C3, C4, C5, S1, S2, S3, S4, S5 REAL :: QQRT5, SIN45, SSIN36, SSIN45, SSIN60, SSIN72 +#ifdef MNH_OPENACC + !Pin positions in the pools of MNH memory + CALL MNH_MEM_POSITION_PIN() + + CALL MNH_MEM_GET( A10, 1020 ) + CALL MNH_MEM_GET( A11, 1020 ) + CALL MNH_MEM_GET( A20, 1020 ) + CALL MNH_MEM_GET( A21, 1020 ) + CALL MNH_MEM_GET( B10, 1020 ) + CALL MNH_MEM_GET( B11, 1020 ) + CALL MNH_MEM_GET( B20, 1020 ) + CALL MNH_MEM_GET( B21, 1020 ) +#endif + +!$acc data present( A, B, C, D , A10, A11, A20, A21, B10, B11, B20, B21 ) copyin( TRIGS ) + +!acc kernels M=N/IFAC IINK=ILA*INC1 JINK=ILA*INC2 JUMP=(IFAC-1)*JINK KSTOP=(N-IFAC)/(2*IFAC) +!acc end kernels IBAD=1 IF (ILOT.GT.1020) GO TO 910 IBASE=0 @@ -393,21 +454,25 @@ SUBROUTINE RPASSM(A,B,C,D,TRIGS,INC1,INC2,INC3,INC4,ILOT,N,IFAC,ILA,IERR,KSZ1,KS IF ( ILA /= M ) THEN +!$acc kernels DO IL=1,ILA I=IBASE J=JBASE !CDIR$ IVDEP !!CDIR NODEP !*VOCL LOOP,NOVREC +!$acc loop independent private( I, J ) DO IJK=1,ILOT I = IBASE + (IJK - 1 ) * INC3 J = JBASE + (IJK - 1 ) * INC4 C(JA+J)=A(IA+I)+A(IB+I) C(JB+J)=A(IA+I)-A(IB+I) END DO +!acc end kernels IBASE=IBASE+INC1 JBASE=JBASE+INC2 END DO +!$acc end kernels IA=IA+IINK IINK=2*IINK IB=IB-IINK @@ -415,6 +480,7 @@ SUBROUTINE RPASSM(A,B,C,D,TRIGS,INC1,INC2,INC3,INC4,ILOT,N,IFAC,ILA,IERR,KSZ1,KS JBASE=JBASE+JUMP JUMP=2*JUMP+JINK IF ( IA /= IB ) THEN +!$acc kernels DO K=ILA,KSTOP,ILA KB=K+K C1=TRIGS(KB+1) @@ -426,6 +492,7 @@ SUBROUTINE RPASSM(A,B,C,D,TRIGS,INC1,INC2,INC3,INC4,ILOT,N,IFAC,ILA,IERR,KSZ1,KS !CDIR$ IVDEP !!CDIR NODEP !*VOCL LOOP,NOVREC +!$acc loop independent private( I, J ) DO IJK=1,ILOT I = IBASE + (IJK - 1 ) * INC3 J = JBASE + (IJK - 1 ) * INC4 @@ -441,8 +508,10 @@ SUBROUTINE RPASSM(A,B,C,D,TRIGS,INC1,INC2,INC3,INC4,ILOT,N,IFAC,ILA,IERR,KSZ1,KS IB=IB-IINK JBASE=JBASE+JUMP END DO +!$acc end kernels END IF IF ( IA <= IB ) THEN +!$acc kernels IBASE=0 DO IL=1,ILA I=IBASE @@ -450,6 +519,7 @@ SUBROUTINE RPASSM(A,B,C,D,TRIGS,INC1,INC2,INC3,INC4,ILOT,N,IFAC,ILA,IERR,KSZ1,KS !CDIR$ IVDEP !!CDIR NODEP !*VOCL LOOP,NOVREC +!$acc loop independent private( I, J ) DO IJK=1,ILOT I = IBASE + (IJK - 1 ) * INC3 J = JBASE + (IJK - 1 ) * INC4 @@ -459,16 +529,19 @@ SUBROUTINE RPASSM(A,B,C,D,TRIGS,INC1,INC2,INC3,INC4,ILOT,N,IFAC,ILA,IERR,KSZ1,KS IBASE=IBASE+INC1 JBASE=JBASE+INC2 END DO +!$acc end kernels END IF ELSE +!$acc kernels DO IL=1,ILA I=IBASE J=JBASE !CDIR$ IVDEP !!CDIR NODEP !*VOCL LOOP,NOVREC +!$acc loop independent private( I, J ) DO IJK=1,ILOT I = IBASE + (IJK - 1 ) * INC3 J = JBASE + (IJK - 1 ) * INC4 @@ -478,6 +551,7 @@ SUBROUTINE RPASSM(A,B,C,D,TRIGS,INC1,INC2,INC3,INC4,ILOT,N,IFAC,ILA,IERR,KSZ1,KS IBASE=IBASE+INC1 JBASE=JBASE+INC2 END DO +!$acc end kernels END IF IBAD=0 @@ -495,12 +569,14 @@ SUBROUTINE RPASSM(A,B,C,D,TRIGS,INC1,INC2,INC3,INC4,ILOT,N,IFAC,ILA,IERR,KSZ1,KS IF ( ILA /= M ) THEN +!$acc kernels DO IL=1,ILA I=IBASE J=JBASE !CDIR$ IVDEP !!CDIR NODEP !*VOCL LOOP,NOVREC +!$acc loop independent private( I, J ) DO IJK=1,ILOT I = IBASE + (IJK - 1 ) * INC3 J = JBASE + (IJK - 1 ) * INC4 @@ -511,6 +587,7 @@ SUBROUTINE RPASSM(A,B,C,D,TRIGS,INC1,INC2,INC3,INC4,ILOT,N,IFAC,ILA,IERR,KSZ1,KS IBASE=IBASE+INC1 JBASE=JBASE+INC2 END DO +!$acc end kernels IA=IA+IINK IINK=2*IINK IB=IB+IINK @@ -518,6 +595,7 @@ SUBROUTINE RPASSM(A,B,C,D,TRIGS,INC1,INC2,INC3,INC4,ILOT,N,IFAC,ILA,IERR,KSZ1,KS JBASE=JBASE+JUMP JUMP=2*JUMP+JINK IF ( IA /= IC ) THEN +!$acc kernels DO K=ILA,KSTOP,ILA KB=K+K KC=KB+KB @@ -532,6 +610,7 @@ SUBROUTINE RPASSM(A,B,C,D,TRIGS,INC1,INC2,INC3,INC4,ILOT,N,IFAC,ILA,IERR,KSZ1,KS !CDIR$ IVDEP !!CDIR NODEP !*VOCL LOOP,NOVREC +!$acc loop independent private( I, J ) DO IJK=1,ILOT I = IBASE + (IJK - 1 ) * INC3 J = JBASE + (IJK - 1 ) * INC4 @@ -566,9 +645,11 @@ SUBROUTINE RPASSM(A,B,C,D,TRIGS,INC1,INC2,INC3,INC4,ILOT,N,IFAC,ILA,IERR,KSZ1,KS IC=IC-IINK JBASE=JBASE+JUMP END DO +!$acc end kernels END IF IF ( IA <= IC ) THEN +!$acc kernels IBASE=0 DO IL=1,ILA I=IBASE @@ -576,6 +657,7 @@ SUBROUTINE RPASSM(A,B,C,D,TRIGS,INC1,INC2,INC3,INC4,ILOT,N,IFAC,ILA,IERR,KSZ1,KS !CDIR$ IVDEP !!CDIR NODEP !*VOCL LOOP,NOVREC +!$acc loop independent private( I, J ) DO IJK=1,ILOT I = IBASE + (IJK - 1 ) * INC3 J = JBASE + (IJK - 1 ) * INC4 @@ -586,10 +668,12 @@ SUBROUTINE RPASSM(A,B,C,D,TRIGS,INC1,INC2,INC3,INC4,ILOT,N,IFAC,ILA,IERR,KSZ1,KS IBASE=IBASE+INC1 JBASE=JBASE+INC2 END DO +!$acc end kernels END IF ELSE +!$acc kernels SSIN60=2.0*XSIN60 DO IL=1,ILA I=IBASE @@ -597,6 +681,7 @@ SUBROUTINE RPASSM(A,B,C,D,TRIGS,INC1,INC2,INC3,INC4,ILOT,N,IFAC,ILA,IERR,KSZ1,KS !CDIR$ IVDEP !!CDIR NODEP !*VOCL LOOP,NOVREC +!$acc loop independent private( I, J ) DO IJK=1,ILOT I = IBASE + (IJK - 1 ) * INC3 J = JBASE + (IJK - 1 ) * INC4 @@ -607,6 +692,7 @@ SUBROUTINE RPASSM(A,B,C,D,TRIGS,INC1,INC2,INC3,INC4,ILOT,N,IFAC,ILA,IERR,KSZ1,KS IBASE=IBASE+INC1 JBASE=JBASE+INC2 END DO +!$acc end kernels END IF IBAD=0 @@ -627,12 +713,14 @@ SUBROUTINE RPASSM(A,B,C,D,TRIGS,INC1,INC2,INC3,INC4,ILOT,N,IFAC,ILA,IERR,KSZ1,KS IF ( ILA /= M) THEN +!$acc kernels DO IL=1,ILA I=IBASE J=JBASE !CDIR$ IVDEP !!CDIR NODEP !*VOCL LOOP,NOVREC +!$acc loop independent private( I, J ) DO IJK=1,ILOT I = IBASE + (IJK - 1 ) * INC3 J = JBASE + (IJK - 1 ) * INC4 @@ -644,6 +732,7 @@ SUBROUTINE RPASSM(A,B,C,D,TRIGS,INC1,INC2,INC3,INC4,ILOT,N,IFAC,ILA,IERR,KSZ1,KS IBASE=IBASE+INC1 JBASE=JBASE+INC2 END DO +!$acc end kernels IA=IA+IINK IINK=2*IINK IB=IB+IINK @@ -652,6 +741,7 @@ SUBROUTINE RPASSM(A,B,C,D,TRIGS,INC1,INC2,INC3,INC4,ILOT,N,IFAC,ILA,IERR,KSZ1,KS JBASE=JBASE+JUMP JUMP=2*JUMP+JINK IF ( IB /= IC ) THEN +!$acc kernels DO K=ILA,KSTOP,ILA KB=K+K KC=KB+KB @@ -669,6 +759,7 @@ SUBROUTINE RPASSM(A,B,C,D,TRIGS,INC1,INC2,INC3,INC4,ILOT,N,IFAC,ILA,IERR,KSZ1,KS !CDIR$ IVDEP !!CDIR NODEP !*VOCL LOOP,NOVREC +!$acc loop independent private( I, J ) DO IJK=1,ILOT I = IBASE + (IJK - 1 ) * INC3 J = JBASE + (IJK - 1 ) * INC4 @@ -702,9 +793,11 @@ SUBROUTINE RPASSM(A,B,C,D,TRIGS,INC1,INC2,INC3,INC4,ILOT,N,IFAC,ILA,IERR,KSZ1,KS ID=ID-IINK JBASE=JBASE+JUMP END DO +!$acc end kernels END IF IF ( IB <= IC ) THEN +!$acc kernels IBASE=0 SIN45=SQRT(0.5) DO IL=1,ILA @@ -713,6 +806,7 @@ SUBROUTINE RPASSM(A,B,C,D,TRIGS,INC1,INC2,INC3,INC4,ILOT,N,IFAC,ILA,IERR,KSZ1,KS !CDIR$ IVDEP !!CDIR NODEP !*VOCL LOOP,NOVREC +!$acc loop independent private( I, J ) DO IJK=1,ILOT I = IBASE + (IJK - 1 ) * INC3 J = JBASE + (IJK - 1 ) * INC4 @@ -724,16 +818,19 @@ SUBROUTINE RPASSM(A,B,C,D,TRIGS,INC1,INC2,INC3,INC4,ILOT,N,IFAC,ILA,IERR,KSZ1,KS IBASE=IBASE+INC1 JBASE=JBASE+INC2 END DO +!$acc end kernels END IF ELSE ! ILA == M +!$acc kernels DO IL=1,ILA I=IBASE J=JBASE !CDIR$ IVDEP !!CDIR NODEP !*VOCL LOOP,NOVREC +!$acc loop independent private( I, J ) DO IJK=1,ILOT I = IBASE + (IJK - 1 ) * INC3 J = JBASE + (IJK - 1 ) * INC4 @@ -745,6 +842,7 @@ SUBROUTINE RPASSM(A,B,C,D,TRIGS,INC1,INC2,INC3,INC4,ILOT,N,IFAC,ILA,IERR,KSZ1,KS IBASE=IBASE+INC1 JBASE=JBASE+INC2 END DO +!$acc end kernels END IF IBAD=0 @@ -767,12 +865,14 @@ SUBROUTINE RPASSM(A,B,C,D,TRIGS,INC1,INC2,INC3,INC4,ILOT,N,IFAC,ILA,IERR,KSZ1,KS IF ( ILA /= M ) THEN +!$acc kernels DO IL=1,ILA I=IBASE J=JBASE !CDIR$ IVDEP !!CDIR NODEP !*VOCL LOOP,NOVREC +!$acc loop independent private( I, J ) DO IJK=1,ILOT I = IBASE + (IJK - 1 ) * INC3 J = JBASE + (IJK - 1 ) * INC4 @@ -793,6 +893,7 @@ SUBROUTINE RPASSM(A,B,C,D,TRIGS,INC1,INC2,INC3,INC4,ILOT,N,IFAC,ILA,IERR,KSZ1,KS IBASE=IBASE+INC1 JBASE=JBASE+INC2 END DO +!$acc end kernels IA=IA+IINK IINK=2*IINK IB=IB+IINK @@ -802,6 +903,7 @@ SUBROUTINE RPASSM(A,B,C,D,TRIGS,INC1,INC2,INC3,INC4,ILOT,N,IFAC,ILA,IERR,KSZ1,KS JBASE=JBASE+JUMP JUMP=2*JUMP+JINK IF ( IB /= ID ) THEN +!$acc kernels DO K=ILA,KSTOP,ILA KB=K+K KC=KB+KB @@ -822,6 +924,7 @@ SUBROUTINE RPASSM(A,B,C,D,TRIGS,INC1,INC2,INC3,INC4,ILOT,N,IFAC,ILA,IERR,KSZ1,KS !CDIR$ IVDEP !!CDIR NODEP !*VOCL LOOP,NOVREC +!$acc loop independent private( I, J ) DO IJK=1,ILOT I = IBASE + (IJK - 1 ) * INC3 J = JBASE + (IJK - 1 ) * INC4 @@ -860,9 +963,11 @@ SUBROUTINE RPASSM(A,B,C,D,TRIGS,INC1,INC2,INC3,INC4,ILOT,N,IFAC,ILA,IERR,KSZ1,KS IE=IE-IINK JBASE=JBASE+JUMP END DO +!$acc end kernels END IF IF ( IB <= ID ) THEN +!$acc kernels IBASE=0 DO IL=1,ILA I=IBASE @@ -870,6 +975,7 @@ SUBROUTINE RPASSM(A,B,C,D,TRIGS,INC1,INC2,INC3,INC4,ILOT,N,IFAC,ILA,IERR,KSZ1,KS !CDIR$ IVDEP !!CDIR NODEP !*VOCL LOOP,NOVREC +!$acc loop independent private( I, J ) DO IJK=1,ILOT I = IBASE + (IJK - 1 ) * INC3 J = JBASE + (IJK - 1 ) * INC4 @@ -890,10 +996,12 @@ SUBROUTINE RPASSM(A,B,C,D,TRIGS,INC1,INC2,INC3,INC4,ILOT,N,IFAC,ILA,IERR,KSZ1,KS IBASE=IBASE+INC1 JBASE=JBASE+INC2 END DO +!$acc end kernels END IF ELSE +!$acc kernels QQRT5=2.0*XQRT5 SSIN36=2.0*XSIN36 SSIN72=2.0*XSIN72 @@ -903,6 +1011,7 @@ SUBROUTINE RPASSM(A,B,C,D,TRIGS,INC1,INC2,INC3,INC4,ILOT,N,IFAC,ILA,IERR,KSZ1,KS !CDIR$ IVDEP !!CDIR NODEP !*VOCL LOOP,NOVREC +!$acc loop independent private( I, J ) DO IJK=1,ILOT I = IBASE + (IJK - 1 ) * INC3 J = JBASE + (IJK - 1 ) * INC4 @@ -919,6 +1028,7 @@ SUBROUTINE RPASSM(A,B,C,D,TRIGS,INC1,INC2,INC3,INC4,ILOT,N,IFAC,ILA,IERR,KSZ1,KS IBASE=IBASE+INC1 JBASE=JBASE+INC2 END DO +!$acc end kernels END IF IBAD=0 @@ -943,12 +1053,14 @@ SUBROUTINE RPASSM(A,B,C,D,TRIGS,INC1,INC2,INC3,INC4,ILOT,N,IFAC,ILA,IERR,KSZ1,KS IF ( ILA /= M ) THEN +!$acc kernels DO IL=1,ILA I=IBASE J=JBASE !CDIR$ IVDEP !!CDIR NODEP !*VOCL LOOP,NOVREC +!$acc loop independent private( I, J ) DO IJK=1,ILOT I = IBASE + (IJK - 1 ) * INC3 J = JBASE + (IJK - 1 ) * INC4 @@ -966,6 +1078,7 @@ SUBROUTINE RPASSM(A,B,C,D,TRIGS,INC1,INC2,INC3,INC4,ILOT,N,IFAC,ILA,IERR,KSZ1,KS IBASE=IBASE+INC1 JBASE=JBASE+INC2 END DO +!$acc end kernels IA=IA+IINK IINK=2*IINK IB=IB+IINK @@ -976,6 +1089,7 @@ SUBROUTINE RPASSM(A,B,C,D,TRIGS,INC1,INC2,INC3,INC4,ILOT,N,IFAC,ILA,IERR,KSZ1,KS JBASE=JBASE+JUMP JUMP=2*JUMP+JINK IF ( IC /= ID ) THEN +!$acc kernels DO K=ILA,KSTOP,ILA KB=K+K KC=KB+KB @@ -999,6 +1113,7 @@ SUBROUTINE RPASSM(A,B,C,D,TRIGS,INC1,INC2,INC3,INC4,ILOT,N,IFAC,ILA,IERR,KSZ1,KS !CDIR$ IVDEP !!CDIR NODEP !*VOCL LOOP,NOVREC +!$acc loop independent private( I, J ) DO IJK=1,ILOT I = IBASE + (IJK - 1 ) * INC3 J = JBASE + (IJK - 1 ) * INC4 @@ -1044,9 +1159,11 @@ SUBROUTINE RPASSM(A,B,C,D,TRIGS,INC1,INC2,INC3,INC4,ILOT,N,IFAC,ILA,IERR,KSZ1,KS IF=IF-IINK JBASE=JBASE+JUMP END DO +!$acc end kernels END IF IF ( IC <= ID ) THEN +!$acc kernels IBASE=0 DO IL=1,ILA I=IBASE @@ -1054,6 +1171,7 @@ SUBROUTINE RPASSM(A,B,C,D,TRIGS,INC1,INC2,INC3,INC4,ILOT,N,IFAC,ILA,IERR,KSZ1,KS !CDIR$ IVDEP !!CDIR NODEP !*VOCL LOOP,NOVREC +!$acc loop independent private( I, J ) DO IJK=1,ILOT I = IBASE + (IJK - 1 ) * INC3 J = JBASE + (IJK - 1 ) * INC4 @@ -1069,10 +1187,12 @@ SUBROUTINE RPASSM(A,B,C,D,TRIGS,INC1,INC2,INC3,INC4,ILOT,N,IFAC,ILA,IERR,KSZ1,KS IBASE=IBASE+INC1 JBASE=JBASE+INC2 END DO +!$acc end kernels END IF ELSE +!$acc kernels SSIN60=2.0*XSIN60 DO IL=1,ILA I=IBASE @@ -1080,6 +1200,7 @@ SUBROUTINE RPASSM(A,B,C,D,TRIGS,INC1,INC2,INC3,INC4,ILOT,N,IFAC,ILA,IERR,KSZ1,KS !CDIR$ IVDEP !!CDIR NODEP !*VOCL LOOP,NOVREC +!$acc loop independent private( I, J ) DO IJK=1,ILOT I = IBASE + (IJK - 1 ) * INC3 J = JBASE + (IJK - 1 ) * INC4 @@ -1097,6 +1218,7 @@ SUBROUTINE RPASSM(A,B,C,D,TRIGS,INC1,INC2,INC3,INC4,ILOT,N,IFAC,ILA,IERR,KSZ1,KS IBASE=IBASE+INC1 JBASE=JBASE+INC2 END DO +!$acc end kernels END IF IBAD=0 @@ -1109,6 +1231,7 @@ SUBROUTINE RPASSM(A,B,C,D,TRIGS,INC1,INC2,INC3,INC4,ILOT,N,IFAC,ILA,IERR,KSZ1,KS IF ( ILA /= M ) THEN IBAD = 3 ELSE +!$acc kernels IA=1 IB=IA+ILA*INC1 IC=IB+2*ILA*INC1 @@ -1130,6 +1253,7 @@ SUBROUTINE RPASSM(A,B,C,D,TRIGS,INC1,INC2,INC3,INC4,ILOT,N,IFAC,ILA,IERR,KSZ1,KS !CDIR$ IVDEP !!CDIR NODEP !*VOCL LOOP,NOVREC +!$acc loop independent private( I, J ) DO IJK=1,ILOT I = IBASE + (IJK - 1 ) * INC3 J = JBASE + (IJK - 1 ) * INC4 @@ -1149,6 +1273,7 @@ SUBROUTINE RPASSM(A,B,C,D,TRIGS,INC1,INC2,INC3,INC4,ILOT,N,IFAC,ILA,IERR,KSZ1,KS IBASE=IBASE+INC1 JBASE=JBASE+INC2 END DO +!$acc end kernels IBAD=0 END IF !ILA==M @@ -1162,6 +1287,14 @@ SUBROUTINE RPASSM(A,B,C,D,TRIGS,INC1,INC2,INC3,INC4,ILOT,N,IFAC,ILA,IERR,KSZ1,KS 910 continue IERR=IBAD +#ifdef MNH_OPENACC + !Release all memory allocated with MNH_MEM_GET calls since last call to MNH_MEM_POSITION_PIN + CALL MNH_MEM_RELEASE() +#endif + +!$acc end data + + END SUBROUTINE RPASSM @@ -1213,6 +1346,11 @@ SUBROUTINE QPASSM(A,B,C,D,TRIGS,INC1,INC2,INC3,INC4,ILOT,N,IFAC,ILA,IERR,KSZ1,KS REAL :: B0, B1, B2, B3, B4, B5, B6, B10, B11, B20, B21 REAL :: SIN45, ZQRT5, ZSIN36, ZSIN45, ZSIN60, ZSIN72 + +!$acc data present( A, B, C, D ) copyin( TRIGS ) + + + M=N/IFAC IINK=ILA*INC1 JINK=ILA*INC2 @@ -1240,12 +1378,14 @@ SUBROUTINE QPASSM(A,B,C,D,TRIGS,INC1,INC2,INC3,INC4,ILOT,N,IFAC,ILA,IERR,KSZ1,KS IF ( ILA /= M ) THEN +!$acc kernels DO JL=1,ILA I=IBASE J=JBASE !CDIR$ IVDEP !!CDIR NODEP !*VOCL LOOP,NOVREC +!$acc loop independent private( I, J ) DO IJK=1,ILOT I = IBASE + (IJK - 1 ) * INC3 J = JBASE + (IJK - 1 ) * INC4 @@ -1255,12 +1395,14 @@ SUBROUTINE QPASSM(A,B,C,D,TRIGS,INC1,INC2,INC3,INC4,ILOT,N,IFAC,ILA,IERR,KSZ1,KS IBASE=IBASE+INC1 JBASE=JBASE+INC2 END DO +!$acc end kernels JA=JA+JINK JINK=2*JINK JB=JB-JINK IBASE=IBASE+IJUMP IJUMP=2*IJUMP+IINK IF ( JA /= JB ) THEN +!$acc kernels DO K=ILA,KSTOP,ILA KB=K+K C1=TRIGS(KB+1) @@ -1272,6 +1414,7 @@ SUBROUTINE QPASSM(A,B,C,D,TRIGS,INC1,INC2,INC3,INC4,ILOT,N,IFAC,ILA,IERR,KSZ1,KS !CDIR$ IVDEP !!CDIR NODEP !*VOCL LOOP,NOVREC +!$acc loop independent private( I, J ) DO IJK=1,ILOT I = IBASE + (IJK - 1 ) * INC3 J = JBASE + (IJK - 1 ) * INC4 @@ -1287,9 +1430,11 @@ SUBROUTINE QPASSM(A,B,C,D,TRIGS,INC1,INC2,INC3,INC4,ILOT,N,IFAC,ILA,IERR,KSZ1,KS JA=JA+JINK JB=JB-JINK END DO +!$acc end kernels END IF IF ( JA <= JB ) THEN +!$acc kernels JBASE=0 DO JL=1,ILA I=IBASE @@ -1297,6 +1442,7 @@ SUBROUTINE QPASSM(A,B,C,D,TRIGS,INC1,INC2,INC3,INC4,ILOT,N,IFAC,ILA,IERR,KSZ1,KS !CDIR$ IVDEP !!CDIR NODEP !*VOCL LOOP,NOVREC +!$acc loop independent private( I, J ) DO IJK=1,ILOT I = IBASE + (IJK - 1 ) * INC3 J = JBASE + (IJK - 1 ) * INC4 @@ -1306,10 +1452,12 @@ SUBROUTINE QPASSM(A,B,C,D,TRIGS,INC1,INC2,INC3,INC4,ILOT,N,IFAC,ILA,IERR,KSZ1,KS IBASE=IBASE+INC1 JBASE=JBASE+INC2 END DO +!$acc end kernels END IF ELSE +!$acc kernels Z=1.0/REAL(N) DO JL=1,ILA I=IBASE @@ -1317,6 +1465,7 @@ SUBROUTINE QPASSM(A,B,C,D,TRIGS,INC1,INC2,INC3,INC4,ILOT,N,IFAC,ILA,IERR,KSZ1,KS !CDIR$ IVDEP !!CDIR NODEP !*VOCL LOOP,NOVREC +!$acc loop independent private( I, J ) DO IJK=1,ILOT I = IBASE + (IJK - 1 ) * INC3 J = JBASE + (IJK - 1 ) * INC4 @@ -1326,6 +1475,7 @@ SUBROUTINE QPASSM(A,B,C,D,TRIGS,INC1,INC2,INC3,INC4,ILOT,N,IFAC,ILA,IERR,KSZ1,KS IBASE=IBASE+INC1 JBASE=JBASE+INC2 END DO +!$acc end kernels END IF @@ -1346,12 +1496,14 @@ SUBROUTINE QPASSM(A,B,C,D,TRIGS,INC1,INC2,INC3,INC4,ILOT,N,IFAC,ILA,IERR,KSZ1,KS IF ( ILA /= M ) THEN +!$acc kernels DO JL=1,ILA I=IBASE J=JBASE !CDIR$ IVDEP !!CDIR NODEP !*VOCL LOOP,NOVREC +!$acc loop independent private( I, J ) DO IJK=1,ILOT I = IBASE + (IJK - 1 ) * INC3 J = JBASE + (IJK - 1 ) * INC4 @@ -1362,6 +1514,7 @@ SUBROUTINE QPASSM(A,B,C,D,TRIGS,INC1,INC2,INC3,INC4,ILOT,N,IFAC,ILA,IERR,KSZ1,KS IBASE=IBASE+INC1 JBASE=JBASE+INC2 END DO +!$acc end kernels JA=JA+JINK JINK=2*JINK JB=JB+JINK @@ -1369,6 +1522,7 @@ SUBROUTINE QPASSM(A,B,C,D,TRIGS,INC1,INC2,INC3,INC4,ILOT,N,IFAC,ILA,IERR,KSZ1,KS IBASE=IBASE+IJUMP IJUMP=2*IJUMP+IINK IF ( JA /= JC ) THEN +!$acc kernels DO K=ILA,KSTOP,ILA KB=K+K KC=KB+KB @@ -1383,6 +1537,7 @@ SUBROUTINE QPASSM(A,B,C,D,TRIGS,INC1,INC2,INC3,INC4,ILOT,N,IFAC,ILA,IERR,KSZ1,KS !CDIR$ IVDEP !!CDIR NODEP !*VOCL LOOP,NOVREC +!$acc loop independent private( I, J ) DO IJK=1,ILOT I = IBASE + (IJK - 1 ) * INC3 J = JBASE + (IJK - 1 ) * INC4 @@ -1407,9 +1562,11 @@ SUBROUTINE QPASSM(A,B,C,D,TRIGS,INC1,INC2,INC3,INC4,ILOT,N,IFAC,ILA,IERR,KSZ1,KS JB=JB+JINK JC=JC-JINK END DO +!$acc end kernels END IF IF ( JA <= JC ) THEN +!$acc kernels JBASE=0 DO JL=1,ILA I=IBASE @@ -1417,6 +1574,7 @@ SUBROUTINE QPASSM(A,B,C,D,TRIGS,INC1,INC2,INC3,INC4,ILOT,N,IFAC,ILA,IERR,KSZ1,KS !CDIR$ IVDEP !!CDIR NODEP !*VOCL LOOP,NOVREC +!$acc loop independent private( I, J ) DO IJK=1,ILOT I = IBASE + (IJK - 1 ) * INC3 J = JBASE + (IJK - 1 ) * INC4 @@ -1427,10 +1585,12 @@ SUBROUTINE QPASSM(A,B,C,D,TRIGS,INC1,INC2,INC3,INC4,ILOT,N,IFAC,ILA,IERR,KSZ1,KS IBASE=IBASE+INC1 JBASE=JBASE+INC2 END DO +!$acc end kernels END IF ELSE +!$acc kernels Z=1.0/REAL(N) ZSIN60=Z*XSIN60 DO JL=1,ILA @@ -1439,6 +1599,7 @@ SUBROUTINE QPASSM(A,B,C,D,TRIGS,INC1,INC2,INC3,INC4,ILOT,N,IFAC,ILA,IERR,KSZ1,KS !CDIR$ IVDEP !!CDIR NODEP !*VOCL LOOP,NOVREC +!$acc loop independent private( I, J ) DO IJK=1,ILOT I = IBASE + (IJK - 1 ) * INC3 J = JBASE + (IJK - 1 ) * INC4 @@ -1449,6 +1610,7 @@ SUBROUTINE QPASSM(A,B,C,D,TRIGS,INC1,INC2,INC3,INC4,ILOT,N,IFAC,ILA,IERR,KSZ1,KS IBASE=IBASE+INC1 JBASE=JBASE+INC2 END DO +!$acc end kernels END IF IBAD=0 @@ -1469,12 +1631,14 @@ SUBROUTINE QPASSM(A,B,C,D,TRIGS,INC1,INC2,INC3,INC4,ILOT,N,IFAC,ILA,IERR,KSZ1,KS JD=JB IF ( ILA /= M ) THEN +!$acc kernels DO JL=1,ILA I=IBASE J=JBASE !CDIR$ IVDEP !!CDIR NODEP !*VOCL LOOP,NOVREC +!$acc loop independent private( I, J ) DO IJK=1,ILOT I = IBASE + (IJK - 1 ) * INC3 J = JBASE + (IJK - 1 ) * INC4 @@ -1486,6 +1650,7 @@ SUBROUTINE QPASSM(A,B,C,D,TRIGS,INC1,INC2,INC3,INC4,ILOT,N,IFAC,ILA,IERR,KSZ1,KS IBASE=IBASE+INC1 JBASE=JBASE+INC2 END DO +!$acc end kernels JA=JA+JINK JINK=2*JINK JB=JB+JINK @@ -1494,6 +1659,7 @@ SUBROUTINE QPASSM(A,B,C,D,TRIGS,INC1,INC2,INC3,INC4,ILOT,N,IFAC,ILA,IERR,KSZ1,KS IBASE=IBASE+IJUMP IJUMP=2*IJUMP+IINK IF ( JB /= JC ) THEN +!$acc kernels DO K=ILA,KSTOP,ILA KB=K+K KC=KB+KB @@ -1511,6 +1677,7 @@ SUBROUTINE QPASSM(A,B,C,D,TRIGS,INC1,INC2,INC3,INC4,ILOT,N,IFAC,ILA,IERR,KSZ1,KS !CDIR$ IVDEP !!CDIR NODEP !*VOCL LOOP,NOVREC +!$acc loop independent private( I, J ) DO IJK=1,ILOT I = IBASE + (IJK - 1 ) * INC3 J = JBASE + (IJK - 1 ) * INC4 @@ -1540,9 +1707,11 @@ SUBROUTINE QPASSM(A,B,C,D,TRIGS,INC1,INC2,INC3,INC4,ILOT,N,IFAC,ILA,IERR,KSZ1,KS JC=JC-JINK JD=JD-JINK END DO +!$acc end kernels END IF IF ( JB <= JC ) THEN +!$acc kernels SIN45=SQRT(0.5) JBASE=0 DO JL=1,ILA @@ -1551,6 +1720,7 @@ SUBROUTINE QPASSM(A,B,C,D,TRIGS,INC1,INC2,INC3,INC4,ILOT,N,IFAC,ILA,IERR,KSZ1,KS !CDIR$ IVDEP !!CDIR NODEP !*VOCL LOOP,NOVREC +!$acc loop independent private( I, J ) DO IJK=1,ILOT I = IBASE + (IJK - 1 ) * INC3 J = JBASE + (IJK - 1 ) * INC4 @@ -1562,9 +1732,11 @@ SUBROUTINE QPASSM(A,B,C,D,TRIGS,INC1,INC2,INC3,INC4,ILOT,N,IFAC,ILA,IERR,KSZ1,KS IBASE=IBASE+INC1 JBASE=JBASE+INC2 END DO +!$acc end kernels END IF ELSE ! ILA == M +!$acc kernels Z=1.0/REAL(N) DO JL=1,ILA I=IBASE @@ -1572,6 +1744,7 @@ SUBROUTINE QPASSM(A,B,C,D,TRIGS,INC1,INC2,INC3,INC4,ILOT,N,IFAC,ILA,IERR,KSZ1,KS !CDIR$ IVDEP !!CDIR NODEP !*VOCL LOOP,NOVREC +!$acc loop independent private( I, J ) DO IJK=1,ILOT I = IBASE + (IJK - 1 ) * INC3 J = JBASE + (IJK - 1 ) * INC4 @@ -1583,6 +1756,7 @@ SUBROUTINE QPASSM(A,B,C,D,TRIGS,INC1,INC2,INC3,INC4,ILOT,N,IFAC,ILA,IERR,KSZ1,KS IBASE=IBASE+INC1 JBASE=JBASE+INC2 END DO +!$acc end kernels END IF IBAD=0 @@ -1604,12 +1778,14 @@ SUBROUTINE QPASSM(A,B,C,D,TRIGS,INC1,INC2,INC3,INC4,ILOT,N,IFAC,ILA,IERR,KSZ1,KS JE=JB IF ( ILA /= M ) THEN +!$acc kernels DO JL=1,ILA I=IBASE J=JBASE !CDIR$ IVDEP !!CDIR NODEP !*VOCL LOOP,NOVREC +!$acc loop independent private( I, J ) DO IJK=1,ILOT I = IBASE + (IJK - 1 ) * INC3 J = JBASE + (IJK - 1 ) * INC4 @@ -1628,6 +1804,7 @@ SUBROUTINE QPASSM(A,B,C,D,TRIGS,INC1,INC2,INC3,INC4,ILOT,N,IFAC,ILA,IERR,KSZ1,KS IBASE=IBASE+INC1 JBASE=JBASE+INC2 END DO +!$acc end kernels JA=JA+JINK JINK=2*JINK JB=JB+JINK @@ -1637,6 +1814,7 @@ SUBROUTINE QPASSM(A,B,C,D,TRIGS,INC1,INC2,INC3,INC4,ILOT,N,IFAC,ILA,IERR,KSZ1,KS IBASE=IBASE+IJUMP IJUMP=2*IJUMP+IINK IF ( JB /= JD ) THEN +!$acc kernels DO K=ILA,KSTOP,ILA KB=K+K KC=KB+KB @@ -1657,6 +1835,7 @@ SUBROUTINE QPASSM(A,B,C,D,TRIGS,INC1,INC2,INC3,INC4,ILOT,N,IFAC,ILA,IERR,KSZ1,KS !CDIR$ IVDEP !!CDIR NODEP !*VOCL LOOP,NOVREC +!$acc loop independent private( I, J ) DO IJK=1,ILOT I = IBASE + (IJK - 1 ) * INC3 J = JBASE + (IJK - 1 ) * INC4 @@ -1701,9 +1880,11 @@ SUBROUTINE QPASSM(A,B,C,D,TRIGS,INC1,INC2,INC3,INC4,ILOT,N,IFAC,ILA,IERR,KSZ1,KS JD=JD-JINK JE=JE-JINK END DO +!$acc end kernels END IF IF ( JB <= JD ) THEN +!$acc kernels JBASE=0 DO JL=1,ILA I=IBASE @@ -1711,6 +1892,7 @@ SUBROUTINE QPASSM(A,B,C,D,TRIGS,INC1,INC2,INC3,INC4,ILOT,N,IFAC,ILA,IERR,KSZ1,KS !CDIR$ IVDEP !!CDIR NODEP !*VOCL LOOP,NOVREC +!$acc loop independent private( I, J ) DO IJK=1,ILOT I = IBASE + (IJK - 1 ) * INC3 J = JBASE + (IJK - 1 ) * INC4 @@ -1729,9 +1911,11 @@ SUBROUTINE QPASSM(A,B,C,D,TRIGS,INC1,INC2,INC3,INC4,ILOT,N,IFAC,ILA,IERR,KSZ1,KS IBASE=IBASE+INC1 JBASE=JBASE+INC2 END DO +!$acc end kernels END IF ELSE +!$acc kernels Z=1.0/REAL(N) ZQRT5=Z*XQRT5 ZSIN36=Z*XSIN36 @@ -1742,6 +1926,7 @@ SUBROUTINE QPASSM(A,B,C,D,TRIGS,INC1,INC2,INC3,INC4,ILOT,N,IFAC,ILA,IERR,KSZ1,KS !CDIR$ IVDEP !!CDIR NODEP !*VOCL LOOP,NOVREC +!$acc loop independent private( I, J ) DO IJK=1,ILOT I = IBASE + (IJK - 1 ) * INC3 J = JBASE + (IJK - 1 ) * INC4 @@ -1760,6 +1945,7 @@ SUBROUTINE QPASSM(A,B,C,D,TRIGS,INC1,INC2,INC3,INC4,ILOT,N,IFAC,ILA,IERR,KSZ1,KS IBASE=IBASE+INC1 JBASE=JBASE+INC2 END DO +!$acc end kernels END IF IBAD=0 @@ -1784,12 +1970,14 @@ SUBROUTINE QPASSM(A,B,C,D,TRIGS,INC1,INC2,INC3,INC4,ILOT,N,IFAC,ILA,IERR,KSZ1,KS IF ( ILA /= M ) THEN +!$acc kernels DO JL=1,ILA I=IBASE J=JBASE !CDIR$ IVDEP !!CDIR NODEP !*VOCL LOOP,NOVREC +!$acc loop independent private( I, J ) DO IJK=1,ILOT I = IBASE + (IJK - 1 ) * INC3 J = JBASE + (IJK - 1 ) * INC4 @@ -1805,6 +1993,7 @@ SUBROUTINE QPASSM(A,B,C,D,TRIGS,INC1,INC2,INC3,INC4,ILOT,N,IFAC,ILA,IERR,KSZ1,KS IBASE=IBASE+INC1 JBASE=JBASE+INC2 END DO +!$acc end kernels JA=JA+JINK JINK=2*JINK JB=JB+JINK @@ -1815,6 +2004,7 @@ SUBROUTINE QPASSM(A,B,C,D,TRIGS,INC1,INC2,INC3,INC4,ILOT,N,IFAC,ILA,IERR,KSZ1,KS IBASE=IBASE+IJUMP IJUMP=2*IJUMP+IINK IF (JC /= JD ) THEN +!$acc kernels DO K=ILA,KSTOP,ILA KB=K+K KC=KB+KB @@ -1838,6 +2028,7 @@ SUBROUTINE QPASSM(A,B,C,D,TRIGS,INC1,INC2,INC3,INC4,ILOT,N,IFAC,ILA,IERR,KSZ1,KS !CDIR$ IVDEP !!CDIR NODEP !*VOCL LOOP,NOVREC +!$acc loop independent private( I, J ) DO IJK=1,ILOT I = IBASE + (IJK - 1 ) * INC3 J = JBASE + (IJK - 1 ) * INC4 @@ -1887,9 +2078,11 @@ SUBROUTINE QPASSM(A,B,C,D,TRIGS,INC1,INC2,INC3,INC4,ILOT,N,IFAC,ILA,IERR,KSZ1,KS JE=JE-JINK JF=JF-JINK END DO +!$acc end kernels END IF IF ( JC <= JD ) THEN +!$acc kernels JBASE=0 DO JL=1,ILA I=IBASE @@ -1897,6 +2090,7 @@ SUBROUTINE QPASSM(A,B,C,D,TRIGS,INC1,INC2,INC3,INC4,ILOT,N,IFAC,ILA,IERR,KSZ1,KS !CDIR$ IVDEP !!CDIR NODEP !*VOCL LOOP,NOVREC +!$acc loop independent private( I, J ) DO IJK=1,ILOT I = IBASE + (IJK - 1 ) * INC3 J = JBASE + (IJK - 1 ) * INC4 @@ -1910,10 +2104,12 @@ SUBROUTINE QPASSM(A,B,C,D,TRIGS,INC1,INC2,INC3,INC4,ILOT,N,IFAC,ILA,IERR,KSZ1,KS IBASE=IBASE+INC1 JBASE=JBASE+INC2 END DO +!$acc end kernels END IF ELSE +!$acc kernels Z=1.0/REAL(N) ZSIN60=Z*XSIN60 DO JL=1,ILA @@ -1922,6 +2118,7 @@ SUBROUTINE QPASSM(A,B,C,D,TRIGS,INC1,INC2,INC3,INC4,ILOT,N,IFAC,ILA,IERR,KSZ1,KS !CDIR$ IVDEP !!CDIR NODEP !*VOCL LOOP,NOVREC +!$acc loop independent private( I, J ) DO IJK=1,ILOT I = IBASE + (IJK - 1 ) * INC3 J = JBASE + (IJK - 1 ) * INC4 @@ -1937,6 +2134,7 @@ SUBROUTINE QPASSM(A,B,C,D,TRIGS,INC1,INC2,INC3,INC4,ILOT,N,IFAC,ILA,IERR,KSZ1,KS IBASE=IBASE+INC1 JBASE=JBASE+INC2 END DO +!$acc end kernels END IF IBAD=0 @@ -1950,6 +2148,7 @@ SUBROUTINE QPASSM(A,B,C,D,TRIGS,INC1,INC2,INC3,INC4,ILOT,N,IFAC,ILA,IERR,KSZ1,KS IF ( ILA /= M ) THEN IBAD = 3 ELSE +!$acc kernels IA=1 IB=IA+IINK IC=IB+IINK @@ -1972,6 +2171,7 @@ SUBROUTINE QPASSM(A,B,C,D,TRIGS,INC1,INC2,INC3,INC4,ILOT,N,IFAC,ILA,IERR,KSZ1,KS !CDIR$ IVDEP !!CDIR NODEP !*VOCL LOOP,NOVREC +!$acc loop independent private( I, J ) DO IJK=1,ILOT I = IBASE + (IJK - 1 ) * INC3 J = JBASE + (IJK - 1 ) * INC4 @@ -1995,6 +2195,7 @@ SUBROUTINE QPASSM(A,B,C,D,TRIGS,INC1,INC2,INC3,INC4,ILOT,N,IFAC,ILA,IERR,KSZ1,KS IBASE=IBASE+INC1 JBASE=JBASE+INC2 END DO +!$acc end kernels IBAD=0 END IF @@ -2010,6 +2211,8 @@ SUBROUTINE QPASSM(A,B,C,D,TRIGS,INC1,INC2,INC3,INC4,ILOT,N,IFAC,ILA,IERR,KSZ1,KS 910 CONTINUE IERR=IBAD +!$acc end data + END SUBROUTINE QPASSM END MODULE MODE_FFT diff --git a/src/MNH/fft55.f90 b/src/MNH/fft55.f90 index e38287d59f2a1e3a6e687865beff1cc837b6158d..96d59414805831ad5edcb552fb553f09c8497e8e 100644 --- a/src/MNH/fft55.f90 +++ b/src/MNH/fft55.f90 @@ -82,6 +82,8 @@ CONTAINS ! ! USE MODE_FFT, ONLY: FFT991 +USE MODE_MPPDB + IMPLICIT NONE ! !* 0.1 declarations of arguments @@ -123,8 +125,19 @@ REAL, DIMENSION(:), POINTER :: ZA REAL, DIMENSION(:), POINTER :: ZWORK ! !------------------------------------------------------------------------------- + +IF (MPPDB_INITIALIZED) THEN + !Check all IN arrays + CALL MPPDB_CHECK( PTRIGS, "FFT55 beg:PTRIGS" ) + CALL MPPDB_CHECK( KIFAX, "FFT55 beg:KIFAX" ) + !Check all INOUT arrays + CALL MPPDB_CHECK( PA, "FFT55 beg:PA" ) +END IF + ZA(1:KJUMP*KLOT) => PA(:,:,:) ZWORK(1:KJUMP*KLOT) => PWORK(:,:,:) + +!$acc data present( ZA, ZWORK, PTRIGS ) ! !* 1. COMPUTE LOOP BOUNDS ! ------------------- @@ -145,8 +158,10 @@ IF (KISIGN.EQ.1) THEN ! 2.1 preprocessing ! ------------- ! +!$acc kernels ZSCALE=0.5*REAL(KN) ! this loop works for odd and even case +!$acc loop gang independent private( IJA, IJB, IIA0, IIB0, ZRI, ZCO ) DO JK=1,(KN-1)/2 IJA=JK+1 IJB=KN+1-JK @@ -159,6 +174,7 @@ IF (KISIGN.EQ.1) THEN ! !ocl novrec !CDIR NODEP +!$acc loop vector independent private( IIA, IIB, ZT1, ZT2 ) DO JJ=1,INVEX IIA=IIA0+(JJ-1)*KJUMP IIB=IIB0+(JJ-1)*KJUMP @@ -180,6 +196,7 @@ IF (KISIGN.EQ.1) THEN ! !ocl novrec !CDIR NODEP +!$acc loop independent private( IIA, IIB ) DO JJ=1,INVEX IIA=IIA0+(JJ-1)*KJUMP IIB=IIB0+(JJ-1)*KJUMP @@ -188,6 +205,7 @@ IF (KISIGN.EQ.1) THEN !cvj IIA=IIA+KJUMP !cvj IIB=IIB+KJUMP ENDDO +!$acc end kernels ! ! 2.2 periodic Fourier analysis ! ------------------------- @@ -198,12 +216,15 @@ IF (KISIGN.EQ.1) THEN ! ! 2.3 postprocessing ! -------------- +!$acc kernels +!$acc loop seq DO JK=2,KN,2 !cvj IJA=ISTART+(JK-1) IJA0=ISTART+(JK-1) ! !ocl novrec !CDIR NODEP +!$acc loop vector independent private( IJA ) DO JJ=1,INVEX IJA=IJA0+(JJ-1)*KJUMP ZA(IJA)=ZA(IJA+1)-ZA(IJA+2) @@ -211,6 +232,7 @@ IF (KISIGN.EQ.1) THEN !cvj IJA=IJA+KJUMP ENDDO ENDDO +!$acc end kernels ! ISTART=ISTART+INVEX*KJUMP INVEX=1020 @@ -224,6 +246,7 @@ ELSE ! ISTART=1 DO JNB=1,INBLOX +!$acc kernels ! ! 3.1 preprocessing ! ------------- @@ -235,6 +258,7 @@ ELSE ! !ocl novrec !CDIR NODEP +!$acc loop independent private( IIA, IIB ) DO JJ=1,INVEX IIA=IIA0+(JJ-1)*KJUMP IIB=IIB0+(JJ-1)*KJUMP @@ -245,12 +269,14 @@ ELSE ENDDO ! ! this loop works for odd and even case +!$acc loop seq DO JK=KN-2+IEVEN,2,-2 !cvj IIA=ISTART+(JK-1) IIA0=ISTART+(JK-1) ! !ocl novrec !CDIR NODEP +!$acc loop vector independent private( IIA ) DO JJ=1,INVEX IIA=IIA0+(JJ-1)*KJUMP ZA(IIA+2)=ZA(IIA+1)-ZA(IIA) @@ -258,6 +284,7 @@ ELSE !cvj IIA=IIA+KJUMP ENDDO ENDDO +!$acc end kernels ! ! 3.2 periodic Fourier synthesis ! -------------------------- @@ -271,6 +298,8 @@ ELSE ! ZSCALE=0.5/REAL(KN) ! this loop works for odd and even case +!$acc kernels present( ZA ) +!$acc loop gang independent private( IIA, IIB, IJA0, IJB0, ZRI, ZCO ) DO JK=1,(KN-1)/2 IIA=JK+1 IIB=KN-JK+1 @@ -283,6 +312,7 @@ ELSE ! !ocl novrec !CDIR NODEP +!$acc loop vector independent private( IJA, IJB, ZT1, ZT2 ) DO JJ=1,INVEX IJA=IJA0+(JJ-1)*KJUMP IJB=IJB0+(JJ-1)*KJUMP @@ -303,6 +333,7 @@ ELSE ! !ocl novrec !CDIR NODEP +!$acc loop independent private( IJA, IJB ) DO JJ=1,INVEX IJA=IJA0+(JJ-1)*KJUMP IJB=IJB0+(JJ-1)*KJUMP @@ -311,12 +342,22 @@ ELSE !cvj IJA=IJA+KJUMP !cvj IJB=IJB+KJUMP ENDDO +!$acc end kernels ! ISTART=ISTART+INVEX*KJUMP INVEX=1020 ENDDO ! ENDIF + +!$acc end data + +IF (MPPDB_INITIALIZED) THEN + !Check all INOUT arrays + CALL MPPDB_CHECK( PA, "FFT55 end:PA" ) + !Check all OUT arrays + CALL MPPDB_CHECK( PWORK, "FFT55 end:PWORK" ) +END IF ! !------------------------------------------------------------------------------- ! diff --git a/src/MNH/flat_inv.f90 b/src/MNH/flat_inv.f90 index c38ee7be211450e3c7962cba0fb61775501881f2..502475f0b5fee58b71c9b547949104650026766a 100644 --- a/src/MNH/flat_inv.f90 +++ b/src/MNH/flat_inv.f90 @@ -129,6 +129,10 @@ USE MODD_PARAMETERS USE MODE_FFT, ONLY: FFT991 USE MODE_FFT55, ONLY: FFT55 USE MODE_ll +#ifdef MNH_OPENACC +USE MODE_MNH_ZWORK, ONLY: MNH_MEM_GET, MNH_MEM_POSITION_PIN, MNH_MEM_RELEASE +#endif +use mode_mppdb ! ! IMPLICIT NONE @@ -166,8 +170,13 @@ REAL, DIMENSION(:,:,:), INTENT(OUT):: PF_1_Y ! solution of the equation ! !* 0.2 declaration of local variables ! +#ifndef MNH_OPENACC REAL, DIMENSION(:,:,:), ALLOCATABLE :: ZY ! work array to store the RHS of the equation REAL, DIMENSION(:,:,:), ALLOCATABLE :: ZAF ! work array to expand PAF +#else +REAL, DIMENSION(:,:,:), POINTER, CONTIGUOUS :: ZY ! work array to store the RHS of the equation +REAL, DIMENSION(:,:,:), POINTER, CONTIGUOUS :: ZAF ! work array to expand PAF +#endif INTEGER :: IIB ! indice I for the first inner mass point along x INTEGER :: IIE ! indice I for the last inner mass point along x INTEGER :: IIMAX ! number of inner mass points along the x direction @@ -197,6 +206,7 @@ INTEGER :: INC2X,INC2Y ! increment between the start of one data vector and ! the next for the FFT along x,y resp. ! ! +#ifndef MNH_OPENACC REAL, DIMENSION(:,:,:), ALLOCATABLE, TARGET :: ZWORKX ! work array used by ! the FFT. It has been enlarged in order to be sufficient for 2D and 3D cases REAL, DIMENSION(:,:,:), ALLOCATABLE, TARGET :: ZWORKY ! work array used by @@ -216,9 +226,42 @@ INTEGER :: IIX,IJX,IIY,IJY ! dimensions of the extended x or y slices subdomain ! REAL, DIMENSION(:,:,:), ALLOCATABLE, TARGET :: ZBAND_YT ! array in Y slices distribution transpose REAL, DIMENSION(:,:,:), ALLOCATABLE, TARGET :: ZBAND_YRT ! array in Y slices distribution transpose +#else +REAL, DIMENSION(:,:,:), POINTER, CONTIGUOUS :: ZWORKX ! work array used by +! the FFT. It has been enlarged in order to be sufficient for 2D and 3D cases +REAL, DIMENSION(:,:,:), POINTER, CONTIGUOUS :: ZWORKY ! work array used by +! the FFT. It has been enlarged in order to be sufficient for 2D and 3D cases +! +REAL, DIMENSION(:,:,:), POINTER, CONTIGUOUS :: ZGAM +! intermediate arrays +REAL, DIMENSION(:,:), POINTER, CONTIGUOUS :: ZBETX ! for the tridiag. +! matrix inversion +REAL, DIMENSION(:,:,:), POINTER, CONTIGUOUS :: ZBAND_X ! array in X slices distribution +REAL, DIMENSION(:,:,:), POINTER, CONTIGUOUS :: ZBAND_Y ! array in Y slices distribution +REAL, DIMENSION(:,:,:), POINTER, CONTIGUOUS :: ZBAND_YR ! array in Y slices distribution +! +INTEGER :: IINFO_ll ! return code of parallel routine +! +INTEGER :: IIX,IJX,IIY,IJY ! dimensions of the extended x or y slices subdomain +! +REAL, DIMENSION(:,:,:), POINTER, CONTIGUOUS :: ZBAND_YT ! array in Y slices distribution transpose +REAL, DIMENSION(:,:,:), POINTER, CONTIGUOUS :: ZBAND_YRT ! array in Y slices distribution transpose +#endif REAL, DIMENSION(:), POINTER, CONTIGUOUS :: ZBAND1D REAL, DIMENSION(:), POINTER, CONTIGUOUS :: ZWORK1D !------------------------------------------------------------------------------- +IF (MPPDB_INITIALIZED) THEN + !Check all IN arrays + CALL MPPDB_CHECK( PRHOM, "FLAT_INV beg:PRHOM" ) + CALL MPPDB_CHECK( PAF, "FLAT_INV beg:PAF" ) + CALL MPPDB_CHECK( PCF, "FLAT_INV beg:PCF" ) + CALL MPPDB_CHECK( PBF, "FLAT_INV beg:PBF" ) + CALL MPPDB_CHECK( PTRIGSX, "FLAT_INV beg:PTRIGSX" ) + CALL MPPDB_CHECK( PTRIGSY, "FLAT_INV beg:PTRIGSY" ) + CALL MPPDB_CHECK( KIFAXX, "FLAT_INV beg:KIFAXX" ) + CALL MPPDB_CHECK( KIFAXY, "FLAT_INV beg:KIFAXY" ) + CALL MPPDB_CHECK( PY, "FLAT_INV beg:PY" ) +END IF ! !* 1. COMPUTE LOOP BOUNDS ! ------------------- @@ -234,6 +277,7 @@ IKB=1+JPVEXT IKE=IKU - JPVEXT IKMAX=IKE-IKB+1 ! +#ifndef MNH_OPENACC ALLOCATE( ZY (SIZE( PY, 1 ), SIZE( PY, 2 ), SIZE( PY, 3 ) ) ) ALLOCATE( ZAF(SIZE( PBF, 1 ), SIZE( PBF, 2 ), SIZE( PBF, 3 )) ) @@ -248,12 +292,38 @@ IF (.NOT. L2D) THEN ALLOCATE(ZBAND_YT(IJY,IIY,IKU)) ALLOCATE(ZBAND_YRT(IJY,IIY,IKU)) END IF +#else +!Pin positions in the pools of MNH memory +CALL MNH_MEM_POSITION_PIN() + +CALL MNH_MEM_GET( ZY, SIZE( PY, 1 ), SIZE( PY, 2 ), SIZE( PY, 3 ) ) +CALL MNH_MEM_GET( ZAF, SIZE( PBF, 1 ), SIZE( PBF, 2 ), SIZE( PBF, 3 ) ) + +CALL MNH_MEM_GET( ZBAND_X, IIX, IJX, IKU ) +CALL MNH_MEM_GET( ZBAND_Y, IIY, IJY, IKU ) +CALL MNH_MEM_GET( ZBAND_YR, IIY, IJY, IKU ) +CALL MNH_MEM_GET( ZWORKX, IIX, IJX, IKU ) +CALL MNH_MEM_GET( ZWORKY, IIY, IJY, IKU ) +CALL MNH_MEM_GET( ZBETX, IIY, IJY ) +CALL MNH_MEM_GET( ZGAM, IIY, IJY, IKU ) +IF ( .NOT. L2D ) THEN + CALL MNH_MEM_GET( ZBAND_YT, IJY, IIY, IKU ) + CALL MNH_MEM_GET( ZBAND_YRT, IJY, IIY, IKU ) +ELSE + CALL MNH_MEM_GET( ZBAND_YT, 0, 0, 0 ) + CALL MNH_MEM_GET( ZBAND_YRT, 0, 0, 0 ) +END IF +#endif ! !------------------------------------------------------------------------------- ! !* 2. COMPUTE THE ARRAY INCREMENTS FOR THE FFT ! ---------------------------------------- ! + +!$acc data present( PRHOM, PAF, PCF, PBF, PTRIGSX, PTRIGSY, KIFAXX, KIFAXY, PY, PF_1_Y ) & +!$acc & present( ZY, ZAF, ZBAND_X, ZBAND_Y, ZBAND_YR, ZWORKX, ZWORKY, ZBETX, ZGAM, ZBAND_YT, ZBAND_YRT ) + IF(.NOT. L2D) THEN ! ILOTX = IJX*IKU @@ -279,8 +349,10 @@ ENDIF ! !* 3.1 copy the RHS in a local array REMAP functions will shift the indices for the FFT ! +!$acc kernels PF_1_Y(:,:,:) = 0. ZY(:,:,:) = PY(:,:,:) +!$acc end kernels ! !* 3.2 form homogeneous boundary condition used by the FFT for non-periodic ! cases @@ -290,19 +362,23 @@ ZY(:,:,:) = PY(:,:,:) IF (HLBCX(1) /= 'CYCL') THEN ! IF (LWEST_ll(HSPLITTING='B')) THEN +!$acc kernels DO JK=IKB,IKE DO JJ = IJB, IJE ZY(IIB,JJ,JK) = ZY(IIB,JJ,JK) + PY(IIB-1,JJ,JK) END DO END DO +!$acc end kernels END IF ! IF (LEAST_ll(HSPLITTING='B')) THEN +!$acc kernels DO JK=IKB,IKE DO JJ = IJB, IJE ZY(IIE,JJ,JK) = ZY(IIE,JJ,JK) - PY(IIE+1,JJ,JK) END DO END DO +!$acc end kernels END IF END IF ! @@ -310,26 +386,32 @@ END IF ! IF (HLBCY(1) /= 'CYCL'.AND. (.NOT. L2D)) THEN IF (LSOUTH_ll(HSPLITTING='B')) THEN +!$acc kernels DO JK=IKB,IKE DO JI = IIB, IIE ZY(JI,IJB,JK) = ZY(JI,IJB,JK) + PY(JI,IJB-1,JK) END DO END DO +!$acc end kernels END IF ! IF (LNORTH_ll(HSPLITTING='B')) THEN +!$acc kernels DO JK=IKB,IKE DO JI = IIB, IIE ZY(JI,IJE,JK) = ZY(JI,IJE,JK) - PY(JI,IJE+1,JK) END DO END DO +!$acc end kernels END IF END IF ! ! !* 3.3 2way structure -> xslice structure, + data shift ! +!$acc kernels ZBAND_X(:,:,:) = 0. +!$acc end kernels CALL REMAP_2WAY_X_ll(ZY,ZBAND_X,IINFO_ll) ! ! @@ -345,12 +427,13 @@ IF (HLBCX(1) == 'CYCL') THEN CALL FFT991( ZBAND1D, ZWORK1D, PTRIGSX, KIFAXX, INC1X, INC2X, IIMAX, ILOTX,-1, & SIZE(ZBAND1D), SIZE(ZWORK1D), size(PTRIGSX) ) ELSE - CALL FFT55(ZBAND_X(1:,1:,IKB-1:),ZWORKX,PTRIGSX,KIFAXX,INC2X, & - IIMAX,ILOTX,-1 ) + CALL FFT55( ZBAND_X(1:,1:,IKB-1:), ZWORKX, PTRIGSX, KIFAXX, INC2X, IIMAX, ILOTX, -1 ) END IF ! ! -ZBAND_Y=0. +!$acc kernels +ZBAND_Y(:,:,:) = 0. +!$acc end kernels CALL REMAP_X_Y_ll(ZBAND_X,ZBAND_Y,IINFO_ll) ! IF (.NOT. L2D) THEN @@ -365,8 +448,7 @@ IF (.NOT. L2D) THEN CALL FFT991( ZBAND1D, ZWORK1D, PTRIGSY, KIFAXY, INC1Y, INC2Y, IJMAX, ILOTY,-1, & SIZE(ZBAND1D), SIZE(ZWORK1D), SIZE(PTRIGSY) ) ELSE - CALL FFT55(ZBAND_YT(1:,1:,IKB-1:),ZWORKY,PTRIGSY,KIFAXY,INC2Y, & - IJMAX,ILOTY,-1 ) + CALL FFT55( ZBAND_YT(1:,1:,IKB-1:), ZWORKY, PTRIGSY, KIFAXY, INC2Y, IJMAX, ILOTY, -1 ) END IF ! END IF @@ -375,9 +457,13 @@ END IF ! average of the pressure field equal to zero. IF (LWEST_ll(HSPLITTING='Y')) THEN IF (L2D) THEN +!$acc kernels ZBAND_Y(1,1,IKE+1)=0 +!$acc end kernels ELSE +!$acc kernels ZBAND_YT(1,1,IKE+1)=0. +!$acc end kernels END IF END IF ! @@ -389,10 +475,15 @@ END IF CALL FAST_SPREAD(PAF,ZAF,IIY,IJY,IKU) ! IF (LWEST_ll(HSPLITTING='Y')) THEN +!$acc kernels ZAF(1,1,IKE+1)=0. !singular matrix corresponding to the horizontal average +!$acc end kernels END IF ! IF (L2D) THEN +#ifdef MNH_OPENACC +CALL PRINT_MSG( NVERB_WARNING, 'GEN', 'FLAT_INV', 'L2D=T not tested' ) +#endif CALL FAST_SUBSTITUTION_2D(ZBAND_YR,ZBETX,PBF,ZGAM,PCF,ZAF & ,ZBAND_Y,IIY,IJY,IKU) ELSE @@ -414,8 +505,7 @@ IF (.NOT. L2D) THEN CALL FFT991( ZBAND1D, ZBAND_YRT, PTRIGSY, KIFAXY, INC1Y, INC2Y, IJMAX,ILOTY, +1, & SIZE(ZBAND1D), SIZE(ZWORK1D), SIZE(PTRIGSY) ) ELSE - CALL FFT55( ZBAND_YRT(1:,1:,IKB-1:),ZWORKY,PTRIGSY,KIFAXY,INC2Y, & - IJMAX,ILOTY,+1 ) + CALL FFT55( ZBAND_YRT(1:,1:,IKB-1:), ZWORKY, PTRIGSY, KIFAXY, INC2Y, IJMAX, ILOTY, +1 ) END IF ! array transposition J --> I CALL FAST_TRANSPOSE(ZBAND_YRT,ZBAND_YR,IJY,IIY,IKU) @@ -423,7 +513,9 @@ ENDIF ! ! Transposition Y-> X ! +!$acc kernels ZBAND_X(:,:,:) = 0. +!$acc end kernels CALL REMAP_Y_X_ll(ZBAND_YR,ZBAND_X,IINFO_ll) ! ! @@ -431,10 +523,9 @@ IF (HLBCX(1) == 'CYCL') THEN ZBAND1D( 1 : SIZE(ZBAND_X,1)*SIZE(ZBAND_X,2)*SIZE(ZBAND_X(:,:,IKB-1:),3) ) => ZBAND_X(:,:,IKB-1:) ZWORK1D( 1 : SIZE(ZWORKX) ) => ZWORKX(:,:,:) CALL FFT991( ZBAND1D, ZWORK1D, PTRIGSX, KIFAXX, INC1X, INC2X, IIMAX, ILOTX, +1, & - SIZE(ZBAND1D), SIZE(ZWORK1D), SIZE(PTRIGSX)) + SIZE(ZBAND1D), SIZE(ZWORK1D), SIZE(PTRIGSX) ) ELSE - CALL FFT55( ZBAND_X(1:,1:,IKB-1:),ZWORKX,PTRIGSX,KIFAXX,INC2X, & - IIMAX,ILOTX,+1 ) + CALL FFT55( ZBAND_X(1:,1:,IKB-1:), ZWORKX, PTRIGSX, KIFAXX, INC2X, IIMAX, ILOTX, +1 ) END IF ! !------------------------------------------------------------------------------- @@ -455,34 +546,42 @@ IF (HLBCX(1) /= 'CYCL') THEN ZDXM2 = PDXHATM*PDXHATM ! IF (LWEST_ll(HSPLITTING='B')) THEN +!$acc kernels DO JK=IKB,IKE DO JJ = IJB,IJE PF_1_Y(IIB-1,JJ,JK) = PF_1_Y(IIB,JJ,JK) - PY(IIB-1,JJ,JK)*ZDXM2/PRHOM(JK) END DO END DO +!$acc end kernels END IF ! IF (LEAST_ll(HSPLITTING='B')) THEN +!$acc kernels DO JK=IKB,IKE DO JJ = IJB,IJE PF_1_Y(IIE+1,JJ,JK) = PF_1_Y(IIE,JJ,JK) + PY(IIE+1,JJ,JK)*ZDXM2/PRHOM(JK) END DO END DO +!$acc end kernels END IF ! ! we set the solution at the corner point by the condition: ! dxm ( P ) = 0 IF (LWEST_ll(HSPLITTING='B')) THEN +!$acc kernels DO JJ = IJB,IJE PF_1_Y(IIB-1,JJ,IKB-1) = PF_1_Y(IIB,JJ,IKB-1) PF_1_Y(IIB-1,JJ,IKE+1) = PF_1_Y(IIB,JJ,IKE+1) END DO +!$acc end kernels END IF IF (LEAST_ll(HSPLITTING='B')) THEN +!$acc kernels DO JJ = IJB,IJE PF_1_Y(IIE+1,JJ,IKB-1) = PF_1_Y(IIE,JJ,IKB-1) PF_1_Y(IIE+1,JJ,IKE+1) = PF_1_Y(IIE,JJ,IKE+1) END DO +!$acc end kernels END IF ! ELSE @@ -502,35 +601,43 @@ IF (.NOT.L2D) THEN ZDYM2 = PDYHATM*PDYHATM ! IF (LSOUTH_ll(HSPLITTING='B')) THEN +!$acc kernels DO JK=IKB,IKE DO JI = IIB,IIE PF_1_Y(JI,IJB-1,JK) = PF_1_Y(JI,IJB,JK) - PY(JI,IJB-1,JK)*ZDYM2/PRHOM(JK) END DO END DO +!$acc end kernels END IF ! IF (LNORTH_ll(HSPLITTING='B')) THEN +!$acc kernels DO JK=IKB,IKE DO JI = IIB,IIE PF_1_Y(JI,IJE+1,JK) = PF_1_Y(JI,IJE,JK) + PY(JI,IJE+1,JK)*ZDYM2/PRHOM(JK) END DO END DO +!$acc end kernels END IF ! we set the solution at the corner point by the condition: ! dym ( P ) = 0 ! IF (LSOUTH_ll(HSPLITTING='B')) THEN +!$acc kernels DO JI = IIB,IIE PF_1_Y(JI,IJB-1,IKB-1) = PF_1_Y(JI,IJB,IKB-1) PF_1_Y(JI,IJB-1,IKE+1) = PF_1_Y(JI,IJB,IKE+1) END DO +!$acc end kernels END IF ! IF (LNORTH_ll(HSPLITTING='B')) THEN +!$acc kernels DO JI = IIB,IIE PF_1_Y(JI,IJE+1,IKB-1) = PF_1_Y(JI,IJE,IKB-1) PF_1_Y(JI,IJE+1,IKE+1) = PF_1_Y(JI,IJE,IKE+1) END DO +!$acc end kernels END IF ELSE ! @@ -547,22 +654,31 @@ END IF IF (.NOT. L2D .AND. HLBCX(1)/='CYCL' .AND. HLBCY(1)/='CYCL') THEN ! the following verticals are not used IF ( (LWEST_ll(HSPLITTING='B')).AND.(LSOUTH_ll(HSPLITTING='B')) ) THEN +!$acc kernels PF_1_Y(IIB-1,IJB-1,:)=0. +!$acc end kernels END IF ! IF ( (LWEST_ll(HSPLITTING='B')).AND.(LNORTH_ll(HSPLITTING='B')) ) THEN +!$acc kernels PF_1_Y(IIB-1,IJE+1,:)=0. +!$acc end kernels END IF ! IF ( (LEAST_ll(HSPLITTING='B')).AND.(LSOUTH_ll(HSPLITTING='B')) ) THEN +!$acc kernels PF_1_Y(IIE+1,IJB-1,:)=0. +!$acc end kernels END IF ! IF ( (LEAST_ll(HSPLITTING='B')).AND.(LNORTH_ll(HSPLITTING='B')) ) THEN +!$acc kernels PF_1_Y(IIE+1,IJE+1,:)=0. +!$acc end kernels END IF END IF ! +#ifndef MNH_OPENACC DEALLOCATE(ZY) DEALLOCATE(ZAF) DEALLOCATE(ZBAND_X) @@ -574,6 +690,17 @@ DEALLOCATE(ZWORKX) DEALLOCATE(ZWORKY) DEALLOCATE(ZBETX) DEALLOCATE(ZGAM) +#else +!Release all memory allocated with MNH_MEM_GET calls since last call to MNH_MEM_POSITION_PIN +CALL MNH_MEM_RELEASE() +#endif + +!$acc end data + +IF (MPPDB_INITIALIZED) THEN + !Check all OUT arrays + CALL MPPDB_CHECK( PF_1_Y, "FLAT_INV end:PF_1_Y" ) +END IF ! !------------------------------------------------------------------------------- ! @@ -586,6 +713,9 @@ CONTAINS INTEGER :: IJI,II,IJ,IIJ ! index in array X and XT INTEGER :: JK ! +!$acc data present( PX, PXT ) + +!$acc kernels DO JK=1,KNK ! PERMUTATION(PX,PXT) !CDIR NODEP @@ -601,7 +731,10 @@ CONTAINS END DO END DO -! +!$acc end kernels + +!$acc end data +! END SUBROUTINE FAST_TRANSPOSE SUBROUTINE FAST_SUBSTITUTION_3D(PBAND_YR,PBETX,PPBF,PGAM,PPCF,PAF & @@ -618,6 +751,9 @@ INTEGER :: JK ! initialization ! ! +!$acc data present( PBAND_YR, PGAM, PBAND_Y, PPBF, PAF, PBETX, PPCF ) + +!$acc kernels PBAND_YR = 0.0 PBETX(:) = PPBF(:,IKB-1) PBAND_YR(:,IKB-1) = PBAND_Y(:,IKB-1) & @@ -644,7 +780,10 @@ DO JK = IKE,IKB-1,-1 PBAND_YR(:,JK) = PBAND_YR(:,JK) - & PGAM(:,JK+1)*PBAND_YR(:,JK+1) END DO -! +!$acc end kernels + +!$acc end data +! ! END SUBROUTINE FAST_SUBSTITUTION_3D ! @@ -665,6 +804,9 @@ INTEGER :: JK ! initialization ! ! +!$acc data present( PBAND_YR, PGAM, PBAND_Y, PPBF, PAF, PBETX, PPCF ) + +!$acc kernels PBAND_YR = 0.0 PBETX(:,1) = PPBF(:,1,IKB-1) PBAND_YR(:,1,IKB-1) = PBAND_Y(:,1,IKB-1) & @@ -690,7 +832,10 @@ DO JK = IKE,IKB-1,-1 PBAND_YR(:,1,JK) = PBAND_YR(:,1,JK) - & PGAM(:,1,JK+1)*PBAND_YR(:,1,JK+1) END DO -! +!$acc end kernels + +!$acc end data +! ! END SUBROUTINE FAST_SUBSTITUTION_2D @@ -701,11 +846,17 @@ REAL, DIMENSION(KIY*KJY,KKU), INTENT(OUT) :: PTAB3D INTEGER :: JIJ,JK ! +!$acc data present( PTAB1D, PTAB3D ) + +!$acc kernels DO JK=1,KKU DO JIJ=1,KIY*KJY PTAB3D(JIJ,JK) = PTAB1D(JK) ENDDO ENDDO +!$acc end kernels + +!$acc end data ! END SUBROUTINE FAST_SPREAD ! diff --git a/src/MNH/flat_invz.f90 b/src/MNH/flat_invz.f90 index 930a2efbaaf07be4495456288c6e8f7c3c19af9f..41538cd0e5e625a1da6862977fd905e669683493 100644 --- a/src/MNH/flat_invz.f90 +++ b/src/MNH/flat_invz.f90 @@ -151,6 +151,10 @@ SUBROUTINE FLAT_INVZ(HLBCX,HLBCY,PDXHATM,PDYHATM,PRHOM,PAF,PBF,PCF, & USE MODE_FFT, ONLY: FFT991 USE MODE_FFT55, ONLY: FFT55 +#ifdef MNH_OPENACC + USE MODE_MNH_ZWORK, ONLY: MNH_MEM_GET, MNH_MEM_POSITION_PIN, MNH_MEM_RELEASE +#endif + use mode_mppdb ! IMPLICIT NONE ! @@ -193,8 +197,13 @@ SUBROUTINE FLAT_INVZ(HLBCX,HLBCY,PDXHATM,PDYHATM,PRHOM,PAF,PBF,PCF, & ! !* 0.2 declaration of local variables ! +#ifndef MNH_OPENACC REAL, DIMENSION(:,:,:), ALLOCATABLE :: ZY_B ! work array to store the RHS of the equation REAL, DIMENSION(:,:,:), ALLOCATABLE :: ZAF ! work array to expand PAF +#else + REAL, DIMENSION(:,:,:), POINTER, CONTIGUOUS :: ZY_B ! work array to store the RHS of the equation + REAL, DIMENSION(:,:,:), POINTER, CONTIGUOUS :: ZAF ! work array to expand PAF +#endif INTEGER :: IIB ! indice I for the first inner mass point along x INTEGER :: IIE ! indice I for the last inner mass point along x INTEGER :: IIMAX ! number of inner mass points along the x direction @@ -230,6 +239,7 @@ SUBROUTINE FLAT_INVZ(HLBCX,HLBCY,PDXHATM,PDYHATM,PRHOM,PAF,PBF,PCF, & ! the next for the FFT along zx,zy splitting resp. !JUANE ! +#ifndef MNH_OPENACC REAL, DIMENSION(:,:,:), ALLOCATABLE, TARGET :: ZWORKX ! work array used by ! the FFT. It has been enlarged in order to be sufficient for 2D and 3D cases REAL, DIMENSION(:,:,:), ALLOCATABLE, TARGET :: ZWORKY ! work array used by @@ -249,6 +259,27 @@ SUBROUTINE FLAT_INVZ(HLBCX,HLBCY,PDXHATM,PDYHATM,PRHOM,PAF,PBF,PCF, & ! REAL, DIMENSION(:,:,:), ALLOCATABLE, TARGET :: ZBAND_YT ! array in Y slices distribution transpose REAL, DIMENSION(:,:,:), ALLOCATABLE, TARGET :: ZBAND_YRT ! array in Y slices distribution transpose +#else + REAL, DIMENSION(:,:,:), POINTER, CONTIGUOUS :: ZWORKX ! work array used by + ! the FFT. It has been enlarged in order to be sufficient for 2D and 3D cases + REAL, DIMENSION(:,:,:), POINTER, CONTIGUOUS :: ZWORKY ! work array used by + ! the FFT. It has been enlarged in order to be sufficient for 2D and 3D cases + ! + REAL, DIMENSION(:,:,:), POINTER, CONTIGUOUS :: ZGAM + ! intermediate arrays + REAL, DIMENSION(:,:), POINTER, CONTIGUOUS :: ZBETX ! for the tridiag. + ! matrix inversion + REAL, DIMENSION(:,:,:), POINTER, CONTIGUOUS :: ZBAND_X ! array in X slices distribution + REAL, DIMENSION(:,:,:), POINTER, CONTIGUOUS :: ZBAND_Y ! array in Y slices distribution + REAL, DIMENSION(:,:,:), POINTER, CONTIGUOUS :: ZBAND_YR ! array in Y slices distribution + ! + INTEGER :: IINFO_ll ! return code of parallel routine + ! + INTEGER :: IIX,IJX,IIY,IJY ! dimensions of the extended x or y slices subdomain + ! + REAL, DIMENSION(:,:,:), POINTER, CONTIGUOUS :: ZBAND_YT ! array in Y slices distribution transpose + REAL, DIMENSION(:,:,:), POINTER, CONTIGUOUS :: ZBAND_YRT ! array in Y slices distribution transpose +#endif ! REAL(kind=MNHTIME), DIMENSION(2) :: ZT0,ZT1 !JUAN Z_SPLITTING @@ -262,16 +293,23 @@ SUBROUTINE FLAT_INVZ(HLBCX,HLBCY,PDXHATM,PDYHATM,PRHOM,PAF,PBF,PCF, & INTEGER :: II_SX_YP2_ZP1,IJ_SX_YP2_ZP1,IK_SX_YP2_ZP1 ! dimensions of SX_YP2_ZP1 slices INTEGER :: II_SXP2_Y_ZP1,IJ_SXP2_Y_ZP1,IK_SXP2_Y_ZP1 ! dimensions of SXP2_Y_ZP1 slices ! +#ifndef MNH_OPENACC REAL, DIMENSION(:,:,:), ALLOCATABLE, TARGET :: ZBAND_SXP2_YP1_Z ! array in SXP2_YP1_Z slices distribution REAL, DIMENSION(:,:,:), ALLOCATABLE, TARGET :: ZBAND_SX_YP2_ZP1 ! array in SX_YP2_ZP1 slices distribution REAL, DIMENSION(:,:,:), ALLOCATABLE, TARGET :: ZBAND_SXP2_Y_ZP1 ! array in SXP2_Y_ZP1 slices distribution REAL, DIMENSION(:,:,:), ALLOCATABLE, TARGET :: ZBAND_SXP2_Y_ZP1R ! array in SXP2_Y_ZP1 slices distribution +! REAL, DIMENSION(:,:,:), ALLOCATABLE, TARGET :: ZBAND_SXP2_Y_ZP1RBIS ! array in SXP2_Y_ZP1 slices distribution +! REAL, DIMENSION(:,:,:), ALLOCATABLE, TARGET :: ZBAND_B ! array in B slices distribution REAL, DIMENSION(:,:,:), ALLOCATABLE, TARGET :: ZBAND_SXP1_YP2_Z ! array in SXP1_YP2_Z slices distribution REAL, DIMENSION(:,:,:), ALLOCATABLE, TARGET :: ZWORK_SX_YP2_ZP1 ! work array for SX_YP2_ZP1 FFT REAL, DIMENSION(:,:,:), ALLOCATABLE, TARGET :: ZWORK_SXP2_Y_ZP1 ! work array for SXP2_Y_ZP1 FFT REAL, DIMENSION(:,:,:), ALLOCATABLE, TARGET :: ZBAND_SXP2_Y_ZP1T ! array in SXP2_Y_ZP1T slices distribution transpose REAL, DIMENSION(:,:,:), ALLOCATABLE, TARGET :: ZBAND_SXP2_Y_ZP1RT ! array in SXP2_Y_ZP1T slices distribution transpose +! REAL, DIMENSION(:,:,:), ALLOCATABLE :: ZAF_B ! work array in B slices for expand PAF +! REAL, DIMENSION(:,:,:), ALLOCATABLE :: ZGAM_B ! work array in B slices +! REAL, DIMENSION(:,:,:), ALLOCATABLE :: ZBAND_BR ! result of substitution in B slices +! REAL, DIMENSION(:,:) , ALLOCATABLE :: ZBETX_B ! pivot of substitution in B slices ! ! JUAN P1/P2 SPLITTING ! @@ -279,6 +317,31 @@ SUBROUTINE FLAT_INVZ(HLBCX,HLBCY,PDXHATM,PDYHATM,PRHOM,PAF,PBF,PCF, & REAL, DIMENSION(:,:,:), ALLOCATABLE :: ZGAM_SXP2_YP1_Z REAL, DIMENSION(:,:,:), ALLOCATABLE :: ZBAND_SXP2_YP1_ZR REAL, DIMENSION(:,:) , ALLOCATABLE :: ZBETX_SXP2_YP1_Z +#else + REAL, DIMENSION(:,:,:), POINTER, CONTIGUOUS :: ZBAND_SXP2_YP1_Z ! array in SXP2_YP1_Z slices distribution + REAL, DIMENSION(:,:,:), POINTER, CONTIGUOUS :: ZBAND_SX_YP2_ZP1 ! array in SX_YP2_ZP1 slices distribution + REAL, DIMENSION(:,:,:), POINTER, CONTIGUOUS :: ZBAND_SXP2_Y_ZP1 ! array in SXP2_Y_ZP1 slices distribution + REAL, DIMENSION(:,:,:), POINTER, CONTIGUOUS :: ZBAND_SXP2_Y_ZP1R ! array in SXP2_Y_ZP1 slices distribution +! REAL, DIMENSION(:,:,:), POINTER, CONTIGUOUS :: ZBAND_SXP2_Y_ZP1RBIS ! array in SXP2_Y_ZP1 slices distribution +! REAL, DIMENSION(:,:,:), POINTER, CONTIGUOUS :: ZBAND_B ! array in B slices distribution + REAL, DIMENSION(:,:,:), POINTER, CONTIGUOUS :: ZBAND_SXP1_YP2_Z ! array in SXP1_YP2_Z slices distribution + + REAL, DIMENSION(:,:,:), POINTER, CONTIGUOUS :: ZWORK_SX_YP2_ZP1 ! work array for SX_YP2_ZP1 FFT + REAL, DIMENSION(:,:,:), POINTER, CONTIGUOUS :: ZWORK_SXP2_Y_ZP1 ! work array for SXP2_Y_ZP1 FFT + REAL, DIMENSION(:,:,:), POINTER, CONTIGUOUS :: ZBAND_SXP2_Y_ZP1T ! array in SXP2_Y_ZP1T slices distribution transpose + REAL, DIMENSION(:,:,:), POINTER, CONTIGUOUS :: ZBAND_SXP2_Y_ZP1RT ! array in SXP2_Y_ZP1T slices distribution transpose +! REAL, DIMENSION(:,:,:), POINTER, CONTIGUOUS :: ZAF_B ! work array in B slices for expand PAF +! REAL, DIMENSION(:,:,:), POINTER, CONTIGUOUS :: ZGAM_B ! work array in B slices +! REAL, DIMENSION(:,:,:), POINTER, CONTIGUOUS :: ZBAND_BR ! result of substitution in B slices +! REAL, DIMENSION(:,:) , POINTER, CONTIGUOUS :: ZBETX_B ! pivot of substitution in B slices + ! + ! JUAN P1/P2 SPLITTING + ! + REAL, DIMENSION(:,:,:), POINTER, CONTIGUOUS :: ZAF_SXP2_YP1_Z + REAL, DIMENSION(:,:,:), POINTER, CONTIGUOUS :: ZGAM_SXP2_YP1_Z + REAL, DIMENSION(:,:,:), POINTER, CONTIGUOUS :: ZBAND_SXP2_YP1_ZR + REAL, DIMENSION(:,:) , POINTER, CONTIGUOUS :: ZBETX_SXP2_YP1_Z +#endif ! INTEGER :: IIBI,IJBI,IIEI,IJEI ! INTEGER :: IERROR @@ -303,6 +366,7 @@ SUBROUTINE FLAT_INVZ(HLBCX,HLBCY,PDXHATM,PDYHATM,PRHOM,PAF,PBF,PCF, & IKB=1+JPVEXT IKE=IKU - JPVEXT ! +#ifndef MNH_OPENACC ALLOCATE( ZY_B(SIZE( PY, 1 ), SIZE( PY, 2 ), SIZE( PY, 3 ) ) ) ALLOCATE( ZAF (SIZE( PBF, 1 ), SIZE( PBF, 2 ), SIZE( PBF, 3 )) ) ! @@ -319,6 +383,40 @@ SUBROUTINE FLAT_INVZ(HLBCX,HLBCY,PDXHATM,PDYHATM,PRHOM,PAF,PBF,PCF, & ALLOCATE(ZBAND_YRT(IJY,IIY,IKU)) END IF END IF ! NZ_SPLITTING +#else + !Pin positions in the pools of MNH memory + CALL MNH_MEM_POSITION_PIN() + + CALL MNH_MEM_GET( ZY_B, SIZE( PY, 1 ), SIZE( PY, 2 ), SIZE( PY, 3 ) ) + CALL MNH_MEM_GET( ZAF, SIZE( PBF, 1 ), SIZE( PBF, 2 ), SIZE( PBF, 3 ) ) + + IF ( IAND( NZ_SPLITTING, 1) > 0 ) THEN + CALL MNH_MEM_GET( ZBAND_X, IIX, IJX, IKU ) + CALL MNH_MEM_GET( ZBAND_Y, IIY, IJY, IKU ) + CALL MNH_MEM_GET( ZBAND_YR, IIY, IJY, IKU ) + CALL MNH_MEM_GET( ZWORKX, IIX, IJX, IKU ) + CALL MNH_MEM_GET( ZWORKY, IIY, IJY, IKU ) + CALL MNH_MEM_GET( ZBETX, IIY, IJY ) + CALL MNH_MEM_GET( ZGAM, IIY, IJY, IKU ) + IF ( .NOT. L2D ) THEN + CALL MNH_MEM_GET( ZBAND_YT, IJY, IIY, IKU ) + CALL MNH_MEM_GET( ZBAND_YRT, IJY, IIY, IKU ) + ELSE + CALL MNH_MEM_GET( ZBAND_YT, 0, 0, 0 ) + CALL MNH_MEM_GET( ZBAND_YRT, 0, 0, 0 ) + END IF + ELSE + CALL MNH_MEM_GET( ZBAND_X, 0, 0, 0 ) + CALL MNH_MEM_GET( ZBAND_Y, 0, 0, 0 ) + CALL MNH_MEM_GET( ZBAND_YR, 0, 0, 0 ) + CALL MNH_MEM_GET( ZWORKX, 0, 0, 0 ) + CALL MNH_MEM_GET( ZWORKY, 0, 0, 0 ) + CALL MNH_MEM_GET( ZBETX, 0 ,0 ) + CALL MNH_MEM_GET( ZGAM, 0, 0, 0 ) + CALL MNH_MEM_GET( ZBAND_YT, 0, 0, 0 ) + CALL MNH_MEM_GET( ZBAND_YRT, 0, 0, 0 ) + END IF +#endif ! !JUAN Z_SPLITTING !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! ! @@ -329,11 +427,14 @@ SUBROUTINE FLAT_INVZ(HLBCX,HLBCY,PDXHATM,PDYHATM,PRHOM,PAF,PBF,PCF, & CALL GET_DIM_EXTZ_LL('SX_YP2_ZP1',II_SX_YP2_ZP1,IJ_SX_YP2_ZP1,IK_SX_YP2_ZP1) CALL GET_DIM_EXTZ_LL('SXP2_Y_ZP1',II_SXP2_Y_ZP1,IJ_SXP2_Y_ZP1,IK_SXP2_Y_ZP1) ! +#ifndef MNH_OPENACC ALLOCATE(ZBAND_SXP1_YP2_Z(II_SXP1_YP2_Z,IJ_SXP1_YP2_Z,IK_SXP1_YP2_Z)) ALLOCATE(ZBAND_SXP2_YP1_Z(II_SXP2_YP1_Z,IJ_SXP2_YP1_Z,IK_SXP2_YP1_Z)) ALLOCATE(ZBAND_SX_YP2_ZP1(II_SX_YP2_ZP1+2,IJ_SX_YP2_ZP1,IK_SX_YP2_ZP1)) ! add 2 points=0.0 at end for FFT ALLOCATE(ZBAND_SXP2_Y_ZP1(II_SXP2_Y_ZP1,IJ_SXP2_Y_ZP1+2,IK_SXP2_Y_ZP1)) ! idem ALLOCATE(ZBAND_SXP2_Y_ZP1R(II_SXP2_Y_ZP1,IJ_SXP2_Y_ZP1+2,IK_SXP2_Y_ZP1)) +!Not used ALLOCATE(ZBAND_SXP2_Y_ZP1RBIS(II_SXP2_Y_ZP1,IJ_SXP2_Y_ZP1+2,IK_SXP2_Y_ZP1)) +!Not used ALLOCATE(ZBAND_B (II_B,IJ_B,IK_B)) ALLOCATE(ZWORK_SX_YP2_ZP1(II_SX_YP2_ZP1+2,IJ_SX_YP2_ZP1,IK_SX_YP2_ZP1)) ! idem ALLOCATE(ZWORK_SXP2_Y_ZP1(II_SXP2_Y_ZP1,IJ_SXP2_Y_ZP1+2,IK_SXP2_Y_ZP1)) ! idem IF (.NOT. L2D) THEN @@ -343,6 +444,10 @@ SUBROUTINE FLAT_INVZ(HLBCX,HLBCY,PDXHATM,PDYHATM,PRHOM,PAF,PBF,PCF, & ALLOCATE(ZBAND_SXP2_Y_ZP1T( 0, 0, 0 )) ALLOCATE(ZBAND_SXP2_Y_ZP1RT(0, 0, 0 )) END IF +!Not used ALLOCATE(ZAF_B(II_B,IJ_B,IK_B)) +!Not used ALLOCATE(ZGAM_B(II_B,IJ_B,IK_B)) +!Not used ALLOCATE(ZBAND_BR(II_B,IJ_B,IK_B)) +!Not used ALLOCATE(ZBETX_B(II_B,IJ_B)) ! ! JUAN P1/P2 SPLITTING ! @@ -350,9 +455,76 @@ SUBROUTINE FLAT_INVZ(HLBCX,HLBCY,PDXHATM,PDYHATM,PRHOM,PAF,PBF,PCF, & ALLOCATE(ZGAM_SXP2_YP1_Z(II_SXP2_YP1_Z,IJ_SXP2_YP1_Z,IK_SXP2_YP1_Z)) ALLOCATE(ZBAND_SXP2_YP1_ZR(II_SXP2_YP1_Z,IJ_SXP2_YP1_Z,IK_SXP2_YP1_Z)) ALLOCATE(ZBETX_SXP2_YP1_Z(II_SXP2_YP1_Z,IJ_SXP2_YP1_Z)) +#else + CALL MNH_MEM_GET( ZBAND_SXP1_YP2_Z, II_SXP1_YP2_Z, IJ_SXP1_YP2_Z, IK_SXP1_YP2_Z ) + CALL MNH_MEM_GET( ZBAND_SXP2_YP1_Z, II_SXP2_YP1_Z, IJ_SXP2_YP1_Z, IK_SXP2_YP1_Z ) + CALL MNH_MEM_GET( ZBAND_SX_YP2_ZP1, II_SX_YP2_ZP1+2, IJ_SX_YP2_ZP1, IK_SX_YP2_ZP1 ) ! add 2 points=0.0 at end for FFT + CALL MNH_MEM_GET( ZBAND_SXP2_Y_ZP1, II_SXP2_Y_ZP1, IJ_SXP2_Y_ZP1+2, IK_SXP2_Y_ZP1 ) ! idem + CALL MNH_MEM_GET( ZBAND_SXP2_Y_ZP1R, II_SXP2_Y_ZP1, IJ_SXP2_Y_ZP1+2, IK_SXP2_Y_ZP1 ) +!Not used CALL MNH_MEM_GET( ZBAND_SXP2_Y_ZP1RBIS, II_SXP2_Y_ZP1, IJ_SXP2_Y_ZP1+2, IK_SXP2_Y_ZP1 ) +!Not used CALL MNH_MEM_GET( ZBAND_B, II_B, IJ_B,IK_B ) + CALL MNH_MEM_GET( ZWORK_SX_YP2_ZP1, II_SX_YP2_ZP1+2, IJ_SX_YP2_ZP1, IK_SX_YP2_ZP1 ) ! idem + CALL MNH_MEM_GET( ZWORK_SXP2_Y_ZP1, II_SXP2_Y_ZP1, IJ_SXP2_Y_ZP1+2, IK_SXP2_Y_ZP1 ) ! idem + IF (.NOT. L2D) THEN + CALL MNH_MEM_GET( ZBAND_SXP2_Y_ZP1T, IJ_SXP2_Y_ZP1+2, II_SXP2_Y_ZP1, IK_SXP2_Y_ZP1 ) ! idem + CALL MNH_MEM_GET( ZBAND_SXP2_Y_ZP1RT, IJ_SXP2_Y_ZP1+2, II_SXP2_Y_ZP1, IK_SXP2_Y_ZP1 ) ! idem + ELSE + CALL MNH_MEM_GET( ZBAND_SXP2_Y_ZP1T, 0, 0, 0 ) + CALL MNH_MEM_GET( ZBAND_SXP2_Y_ZP1RT, 0, 0, 0 ) + END IF +!Not used CALL MNH_MEM_GET( ZAF_B, II_B, IJ_B, IK_B ) +!Not used CALL MNH_MEM_GET( ZGAM_B, II_B, IJ_B, IK_B ) +!Not used CALL MNH_MEM_GET( ZBAND_BR, II_B, IJ_B, IK_B ) +!Not used CALL MNH_MEM_GET( ZBETX_B, II_B, IJ_B ) + ! + ! JUAN P1/P2 SPLITTING + ! + CALL MNH_MEM_GET( ZAF_SXP2_YP1_Z, II_SXP2_YP1_Z, IJ_SXP2_YP1_Z, IK_SXP2_YP1_Z ) + CALL MNH_MEM_GET( ZGAM_SXP2_YP1_Z, II_SXP2_YP1_Z, IJ_SXP2_YP1_Z, IK_SXP2_YP1_Z ) + CALL MNH_MEM_GET( ZBAND_SXP2_YP1_ZR, II_SXP2_YP1_Z, IJ_SXP2_YP1_Z, IK_SXP2_YP1_Z ) + CALL MNH_MEM_GET( ZBETX_SXP2_YP1_Z, II_SXP2_YP1_Z, IJ_SXP2_YP1_Z ) +#endif ! ! JUAN P1/P2 SPLITTING ! + ZBAND_SXP1_YP2_Z(:,:,:) = XUNDEF + ZBAND_SXP2_YP1_Z(:,:,:) = XUNDEF + ZBAND_SX_YP2_ZP1(:,:,:) = XUNDEF + ZBAND_SXP2_Y_ZP1(:,:,:) = XUNDEF + ZBAND_SXP2_Y_ZP1R(:,:,:) = XUNDEF +! ZBAND_SXP2_Y_ZP1RBIS(:,:,:) = XUNDEF +! ZBAND_B(:,:,:) = XUNDEF + ZWORK_SX_YP2_ZP1(:,:,:) = XUNDEF + ZWORK_SXP2_Y_ZP1(:,:,:) = XUNDEF + IF ( .NOT. L2D ) THEN + ZBAND_SXP2_Y_ZP1T(:,:,:) = XUNDEF + ZBAND_SXP2_Y_ZP1RT(:,:,:) = XUNDEF + END IF +#ifdef MNH_OPENACC + ELSE + CALL MNH_MEM_GET( ZBAND_SXP1_YP2_Z, 0, 0, 0 ) + CALL MNH_MEM_GET( ZBAND_SXP2_YP1_Z, 0, 0, 0 ) + CALL MNH_MEM_GET( ZBAND_SX_YP2_ZP1, 0, 0, 0 ) + CALL MNH_MEM_GET( ZBAND_SXP2_Y_ZP1, 0, 0, 0 ) + CALL MNH_MEM_GET( ZBAND_SXP2_Y_ZP1R, 0, 0, 0 ) +!Not used CALL MNH_MEM_GET( ZBAND_SXP2_Y_ZP1RBIS, 0, 0, 0 ) +!Not used CALL MNH_MEM_GET( ZBAND_B, 0, 0 ) + CALL MNH_MEM_GET( ZWORK_SX_YP2_ZP1, 0, 0, 0 ) + CALL MNH_MEM_GET( ZWORK_SXP2_Y_ZP1, 0, 0, 0 ) + CALL MNH_MEM_GET( ZBAND_SXP2_Y_ZP1T, 0, 0, 0 ) + CALL MNH_MEM_GET( ZBAND_SXP2_Y_ZP1RT, 0, 0, 0 ) +!Not used CALL MNH_MEM_GET( ZAF_B, 0, 0, 0 ) +!Not used CALL MNH_MEM_GET( ZGAM_B, 0, 0, 0 ) +!Not used CALL MNH_MEM_GET( ZBAND_BR, 0, 0, 0 ) +!Not used CALL MNH_MEM_GET( ZBETX_B, 0, 0 ) + ! + ! JUAN P1/P2 SPLITTING + ! + CALL MNH_MEM_GET( ZAF_SXP2_YP1_Z, 0, 0, 0 ) + CALL MNH_MEM_GET( ZGAM_SXP2_YP1_Z, 0, 0, 0 ) + CALL MNH_MEM_GET( ZBAND_SXP2_YP1_ZR, 0, 0, 0 ) + CALL MNH_MEM_GET( ZBETX_SXP2_YP1_Z, 0, 0 ) +#endif ENDIF ! NZ_SPLITTING ! !JUAN Z_SPLITTING @@ -362,6 +534,14 @@ SUBROUTINE FLAT_INVZ(HLBCX,HLBCY,PDXHATM,PDYHATM,PRHOM,PAF,PBF,PCF, & !* 2. COMPUTE THE ARRAY INCREMENTS FOR THE FFT ! ---------------------------------------- ! +!$acc data present( PRHOM, PAF, PBF, PCF, PTRIGSX, PTRIGSY, KIFAXX, KIFAXY, PY, PBFB, PBF_SXP2_YP1_Z ) & +!$acc & present( PF_1_Y ) & +!$acc & present( ZWORKX, ZWORKY, ZWORK_SX_YP2_ZP1, ZWORK_SXP2_Y_ZP1 , & +!$acc & ZBAND_X, ZBAND_Y, ZBAND_YRT, ZBAND_YR, ZBAND_YT, & +!$acc & ZBAND_SX_YP2_ZP1, ZBAND_SXP2_Y_ZP1, ZBAND_SXP2_Y_ZP1T, ZBAND_SXP2_Y_ZP1R, ZBAND_SXP2_Y_ZP1RT ) & +!$acc & present( ZY_B, ZBAND_SXP1_YP2_Z, ZBAND_SXP2_YP1_Z, ZAF_SXP2_YP1_Z, ZBAND_SXP2_YP1_ZR, ZGAM, ZGAM_SXP2_YP1_Z, & +!$acc & ZBETX, ZBETX_SXP2_YP1_Z ) + IF(.NOT. L2D) THEN ! ILOTX = IJX*IKU @@ -400,8 +580,10 @@ SUBROUTINE FLAT_INVZ(HLBCX,HLBCY,PDXHATM,PDYHATM,PRHOM,PAF,PBF,PCF, & ! !* 3.1 copy the RHS in a local array REMAP functions will shift the indices for the FFT ! +!$acc kernels PF_1_Y(:,:,:) = 0. ZY_B(:,:,:) = PY(:,:,:) +!$acc end kernels ! !* 3.2 form homogeneous boundary condition used by the FFT for non-periodic ! cases @@ -411,19 +593,23 @@ SUBROUTINE FLAT_INVZ(HLBCX,HLBCY,PDXHATM,PDYHATM,PRHOM,PAF,PBF,PCF, & IF (HLBCX(1) /= 'CYCL') THEN ! IF (LWEST_ll(HSPLITTING='B')) THEN +!$acc kernels DO JK=IKB,IKE DO JJ = IJB, IJE ZY_B(IIB,JJ,JK) = ZY_B(IIB,JJ,JK) + PY(IIB-1,JJ,JK) END DO END DO +!$acc end kernels END IF ! IF (LEAST_ll(HSPLITTING='B')) THEN +!$acc kernels DO JK=IKB,IKE DO JJ = IJB, IJE ZY_B(IIE,JJ,JK) = ZY_B(IIE,JJ,JK) - PY(IIE+1,JJ,JK) END DO END DO +!$acc end kernels END IF END IF ! @@ -431,19 +617,23 @@ SUBROUTINE FLAT_INVZ(HLBCX,HLBCY,PDXHATM,PDYHATM,PRHOM,PAF,PBF,PCF, & ! IF (HLBCY(1) /= 'CYCL'.AND. (.NOT. L2D)) THEN IF (LSOUTH_ll(HSPLITTING='B')) THEN +!$acc kernels DO JK=IKB,IKE DO JI = IIB, IIE ZY_B(JI,IJB,JK) = ZY_B(JI,IJB,JK) + PY(JI,IJB-1,JK) END DO END DO +!$acc end kernels END IF ! IF (LNORTH_ll(HSPLITTING='B')) THEN +!$acc kernels DO JK=IKB,IKE DO JI = IIB, IIE ZY_B(JI,IJE,JK) = ZY_B(JI,IJE,JK) - PY(JI,IJE+1,JK) END DO END DO +!$acc end kernels END IF END IF ! @@ -451,7 +641,12 @@ SUBROUTINE FLAT_INVZ(HLBCX,HLBCY,PDXHATM,PDYHATM,PRHOM,PAF,PBF,PCF, & !* 3.3 2way structure -> xslice structure, + data shift ! IF ( IAND(NZ_SPLITTING,1) > 0 ) THEN +#ifdef MNH_OPENACC +CALL PRINT_MSG( NVERB_WARNING, 'GEN', 'FLAT_INVZ', 'OpenACC: IAND(NZ_SPLITTING,1) > 0 not tested' ) +#endif +!$acc kernels ZBAND_X(:,:,:) = 0.0 +!$acc end kernels CALL REMAP_2WAY_X_ll(ZY_B,ZBAND_X,IINFO_ll) END IF ! @@ -461,8 +656,10 @@ SUBROUTINE FLAT_INVZ(HLBCX,HLBCY,PDXHATM,PDYHATM,PRHOM,PAF,PBF,PCF, & !!$ ZBAND_B = ZY_B !!$ print*,"IP=",IP," SIZE(ZY_B)=",SIZE(ZY_B,1),SIZE(ZY_B,2)," IIBI:IIEI,IJBI:IJEI=" , IIBI,IIEI,IJBI,IJEI !!$ print*,"IP=",IP," SIZE(ZBAND_SXP1_YP2_Z)=",SIZE(ZBAND_SXP1_YP2_Z,1),SIZE(ZBAND_SXP1_YP2_Z,2) +!$acc kernels ZBAND_SXP1_YP2_Z(:,:,:) = ZY_B (IIBI:IIEI,IJBI:IJEI,:) ZBAND_SX_YP2_ZP1(:,:,:) = 0. +!$acc end kernels CALL SECOND_MNH2(ZT0) CALL REMAP_SXP1_YP2_Z_SX_YP2_ZP1_ll(ZBAND_SXP1_YP2_Z,ZBAND_SX_YP2_ZP1,IINFO_ll) @@ -489,11 +686,12 @@ SUBROUTINE FLAT_INVZ(HLBCX,HLBCY,PDXHATM,PDYHATM,PRHOM,PAF,PBF,PCF, & CALL FFT991( ZBAND1D, ZWORK1D, PTRIGSX, KIFAXX, INC1X, INC2X, IIMAX, ILOTX,-1, & SIZE(ZBAND1D), SIZE(ZWORK1D), size(PTRIGSX) ) ELSE - CALL FFT55(ZBAND_X(1:,1:,IKB-1:),ZWORKX,PTRIGSX,KIFAXX,INC2X, & - IIMAX,ILOTX,-1 ) + CALL FFT55( ZBAND_X(1:,1:,IKB-1:), ZWORKX, PTRIGSX, KIFAXX, INC2X, IIMAX, ILOTX, -1 ) END IF ! - ZBAND_Y=0. +!$acc kernels + ZBAND_Y(:,:,:) = 0. +!$acc end kernels CALL REMAP_X_Y_ll(ZBAND_X,ZBAND_Y,IINFO_ll) END IF ! NZ_SPLITTING ! @@ -501,6 +699,9 @@ SUBROUTINE FLAT_INVZ(HLBCX,HLBCY,PDXHATM,PDYHATM,PRHOM,PAF,PBF,PCF, & ! IF ( IAND(NZ_SPLITTING,2) > 0 ) THEN IF (HLBCX(1) == 'CYCL') THEN +#ifdef MNH_OPENACC +CALL PRINT_MSG( NVERB_FATAL, 'GEN', 'FLAT_INVZ', 'OpenACC: HLBCX(1)==CYCL not ported' ) +#endif ZBAND1D( 1 : SIZE(ZBAND_SX_YP2_ZP1) ) => ZBAND_SX_YP2_ZP1(:,:,:) ZWORK1D( 1 : SIZE(ZWORK_SX_YP2_ZP1) ) => ZWORK_SX_YP2_ZP1(:,:,:) CALL FFT991( ZBAND1D, ZWORK1D, PTRIGSX, KIFAXX, INC1_SX_YP2_ZP1, INC2_SX_YP2_ZP1, IIMAX, ILOT_SX_YP2_ZP1, -1, & @@ -512,7 +713,9 @@ SUBROUTINE FLAT_INVZ(HLBCX,HLBCY,PDXHATM,PDYHATM,PRHOM,PAF,PBF,PCF, & CALL FFT55(ZBAND_SX_YP2_ZP1,ZWORK_SX_YP2_ZP1,PTRIGSX,KIFAXX,INC2_SX_YP2_ZP1, & IIMAX,ILOT_SX_YP2_ZP1,-1 ) END IF +!$acc kernels ZBAND_SXP2_Y_ZP1(:,:,:) = 0. +!$acc end kernels CALL SECOND_MNH2(ZT0) CALL REMAP_SX_YP2_ZP1_SXP2_Y_ZP1_ll(ZBAND_SX_YP2_ZP1,ZBAND_SXP2_Y_ZP1,IINFO_ll) CALL SECOND_MNH2(ZT1) @@ -538,8 +741,7 @@ SUBROUTINE FLAT_INVZ(HLBCX,HLBCY,PDXHATM,PDYHATM,PRHOM,PAF,PBF,PCF, & CALL FFT991( ZBAND1D, ZWORK1D, PTRIGSY, KIFAXY, INC1Y, INC2Y, IJMAX, ILOTY,-1, & SIZE(ZBAND1D), SIZE(ZWORK1D), SIZE(PTRIGSY) ) ELSE - CALL FFT55(ZBAND_YT(1:,1:,IKB-1:),ZWORKY,PTRIGSY,KIFAXY,INC2Y, & - IJMAX,ILOTY,-1 ) + CALL FFT55( ZBAND_YT(1:,1:,IKB-1:), ZWORKY, PTRIGSY, KIFAXY, INC2Y, IJMAX, ILOTY, -1 ) END IF ! END IF @@ -553,16 +755,18 @@ SUBROUTINE FLAT_INVZ(HLBCX,HLBCY,PDXHATM,PDYHATM,PRHOM,PAF,PBF,PCF, & CALL FAST_TRANSPOSE(ZBAND_SXP2_Y_ZP1,ZBAND_SXP2_Y_ZP1T,II_SXP2_Y_ZP1,IJ_SXP2_Y_ZP1+2,IK_SXP2_Y_ZP1) ! IF (HLBCY(1) == 'CYCL') THEN +#ifdef MNH_OPENACC +CALL PRINT_MSG( NVERB_FATAL, 'GEN', 'FLAT_INVZ', 'OpenACC: HLBCY(1)==CYCL not ported' ) +#endif ZBAND1D( 1 : SIZE(ZBAND_SXP2_Y_ZP1T) ) => ZBAND_SXP2_Y_ZP1T(:,:,:) ZWORK1D( 1 : SIZE(ZWORK_SXP2_Y_ZP1) ) => ZWORK_SXP2_Y_ZP1(:,:,:) - CALL FFT991( ZBAND1D, ZWORK1D, PTRIGSY, KIFAXY, INC1_SXP2_Y_ZP1,INC2_SXP2_Y_ZP1, IJMAX, ILOT_SXP2_Y_ZP1, -1, & + CALL FFT991( ZBAND1D, ZWORK1D, PTRIGSY, KIFAXY, INC1_SXP2_Y_ZP1, INC2_SXP2_Y_ZP1, IJMAX, ILOT_SXP2_Y_ZP1, -1, & SIZE(ZBAND1D), SIZE(ZWORK1D), size(PTRIGSY) ) ! move (N+1) values in (2) values to avoid to lost them ZBAND_SXP2_Y_ZP1T(2,:,:) = ZBAND_SXP2_Y_ZP1T(INC2_SXP2_Y_ZP1-1,:,:) !ZBAND_SXP2_Y_ZP1T(INC2_SXP2_Y_ZP1-1,:,:) = 0.0 ELSE - CALL FFT55(ZBAND_SXP2_Y_ZP1T,ZWORK_SXP2_Y_ZP1,PTRIGSY,KIFAXY,INC2_SXP2_Y_ZP1, & - IJMAX,ILOT_SXP2_Y_ZP1,-1 ) + CALL FFT55( ZBAND_SXP2_Y_ZP1T, ZWORK_SXP2_Y_ZP1, PTRIGSY, KIFAXY, INC2_SXP2_Y_ZP1, IJMAX, ILOT_SXP2_Y_ZP1, -1 ) END IF END IF END IF ! NZ_SPLITTING @@ -579,16 +783,23 @@ SUBROUTINE FLAT_INVZ(HLBCX,HLBCY,PDXHATM,PDYHATM,PRHOM,PAF,PBF,PCF, & IF (L2D) THEN ZBAND_Y(1,1,IKE+1)=0. ELSE +!$acc kernels ZBAND_YT(1,1,IKE+1)=0. +!$acc end kernels END IF END IF CALL FAST_SPREAD(PAF,ZAF,IIY,IJY,IKU) ! IF (LWEST_ll(HSPLITTING='Y')) THEN +!$acc kernels ZAF(1,1,IKE+1)=0. !singular matrix corresponding to the horizontal average +!$acc end kernels END IF ! IF (L2D) THEN +#ifdef MNH_OPENACC +CALL PRINT_MSG( NVERB_WARNING, 'GEN', 'FLAT_INVZ', 'L2D=T not tested' ) +#endif CALL FAST_SUBSTITUTION_2D(ZBAND_YR,ZBETX,PBF,ZGAM,PCF,ZAF & ,ZBAND_Y,IIY,IJY,IKU) ELSE @@ -635,7 +846,9 @@ SUBROUTINE FLAT_INVZ(HLBCX,HLBCY,PDXHATM,PDYHATM,PRHOM,PAF,PBF,PCF, & ! IF ( ( IAND(NZ_SPLITTING,2) > 0 ) .AND. ( IAND(NZ_SPLITTING,8) > 0 )) THEN CALL FAST_TRANSPOSE(ZBAND_SXP2_Y_ZP1T,ZBAND_SXP2_Y_ZP1,IJ_SXP2_Y_ZP1+2,II_SXP2_Y_ZP1,IK_SXP2_Y_ZP1) +!$acc kernels ZBAND_SXP2_YP1_Z(:,:,:) = 0.0 +!$acc end kernels CALL SECOND_MNH2(ZT0) CALL REMAP_SXP2_Y_ZP1_SXP2_YP1_Z_ll(ZBAND_SXP2_Y_ZP1,ZBAND_SXP2_YP1_Z,IINFO_ll) CALL SECOND_MNH2(ZT1) @@ -647,13 +860,17 @@ SUBROUTINE FLAT_INVZ(HLBCX,HLBCY,PDXHATM,PDYHATM,PRHOM,PAF,PBF,PCF, & IF (L2D) THEN !Juan a faire ZBAND_Y(1,1,IKE+1)=0 ELSE +!$acc kernels ZBAND_SXP2_YP1_Z(1,1,IKE+1)=0. +!$acc end kernels END IF END IF CALL FAST_SPREAD(PAF,ZAF_SXP2_YP1_Z,II_SXP2_YP1_Z,IJ_SXP2_YP1_Z,IK_SXP2_YP1_Z) ! IF (LWESTZ_ll(HSPLITTING='SXP2_YP1_Z').AND.LSOUTHZ_ll(HSPLITTING='SXP2_YP1_Z')) THEN +!$acc kernels ZAF_SXP2_YP1_Z(1,1,IKE+1)=0. !singular matrix corresponding to the horizontal average +!$acc end kernels END IF ! IF (L2D) THEN @@ -684,8 +901,7 @@ SUBROUTINE FLAT_INVZ(HLBCX,HLBCY,PDXHATM,PDYHATM,PRHOM,PAF,PBF,PCF, & CALL FFT991( ZBAND1D, ZWORK1D, PTRIGSY, KIFAXY, INC1Y, INC2Y, IJMAX, ILOTY, +1, & SIZE(ZBAND1D), SIZE(ZWORK1D), SIZE(PTRIGSY) ) ELSE - CALL FFT55( ZBAND_YRT(1:,1:,IKB-1:),ZWORKY,PTRIGSY,KIFAXY,INC2Y, & - IJMAX,ILOTY,+1 ) + CALL FFT55( ZBAND_YRT(1:,1:,IKB-1:), ZWORKY, PTRIGSY, KIFAXY, INC2Y, IJMAX, ILOTY, +1 ) END IF ! array transposition J --> I CALL FAST_TRANSPOSE(ZBAND_YRT,ZBAND_YR,IJY,IIY,IKU) @@ -694,7 +910,9 @@ SUBROUTINE FLAT_INVZ(HLBCX,HLBCY,PDXHATM,PDYHATM,PRHOM,PAF,PBF,PCF, & ! ! Transposition Y-> X ! +!$acc kernels ZBAND_X(:,:,:) = 0. +!$acc end kernels CALL REMAP_Y_X_ll(ZBAND_YR,ZBAND_X,IINFO_ll) ! IF (HLBCX(1) == 'CYCL') THEN @@ -703,8 +921,7 @@ SUBROUTINE FLAT_INVZ(HLBCX,HLBCY,PDXHATM,PDYHATM,PRHOM,PAF,PBF,PCF, & CALL FFT991( ZBAND1D, ZWORK1D, PTRIGSX, KIFAXX, INC1X, INC2X, IIMAX, ILOTX, +1, & SIZE(ZBAND1D), SIZE(ZWORK1D), SIZE(PTRIGSX) ) ELSE - CALL FFT55( ZBAND_X(1:,1:,IKB-1:),ZWORKX,PTRIGSX,KIFAXX,INC2X, & - IIMAX,ILOTX,+1 ) + CALL FFT55( ZBAND_X(1:,1:,IKB-1:), ZWORKX, PTRIGSX, KIFAXX, INC2X, IIMAX, ILOTX, +1 ) END IF END IF ! NZ_SPLITTING @@ -732,15 +949,16 @@ SUBROUTINE FLAT_INVZ(HLBCX,HLBCY,PDXHATM,PDYHATM,PRHOM,PAF,PBF,PCF, & IF (.NOT. L2D) THEN IF (HLBCY(1) == 'CYCL') THEN ! re-set (N+1) values with (2) values ( stored here to avoid to lost them ) +!$acc kernels ZBAND_SXP2_Y_ZP1RT(INC2_SXP2_Y_ZP1-1,:,:) = ZBAND_SXP2_Y_ZP1RT(2,:,:) ZBAND_SXP2_Y_ZP1RT(2,:,:) = 0.0 +!$acc end kernels ZBAND1D( 1 : SIZE(ZBAND_SXP2_Y_ZP1RT) ) => ZBAND_SXP2_Y_ZP1RT(:,:,:) ZWORK1D( 1 : SIZE(ZWORK_SXP2_Y_ZP1) ) => ZWORK_SXP2_Y_ZP1(:,:,:) CALL FFT991( ZBAND1D, ZWORK1D, PTRIGSY, KIFAXY, INC1_SXP2_Y_ZP1, INC2_SXP2_Y_ZP1, IJMAX, & ILOT_SXP2_Y_ZP1, +1, SIZE(ZBAND1D), SIZE(ZWORK1D), size(PTRIGSY) ) ELSE - CALL FFT55(ZBAND_SXP2_Y_ZP1RT,ZWORK_SXP2_Y_ZP1,PTRIGSY,KIFAXY,INC2_SXP2_Y_ZP1, & - IJMAX,ILOT_SXP2_Y_ZP1,+1 ) + CALL FFT55( ZBAND_SXP2_Y_ZP1RT, ZWORK_SXP2_Y_ZP1, PTRIGSY, KIFAXY, INC2_SXP2_Y_ZP1, IJMAX, ILOT_SXP2_Y_ZP1, +1 ) END IF ! array transposition J --> I CALL FAST_TRANSPOSE(ZBAND_SXP2_Y_ZP1RT,ZBAND_SXP2_Y_ZP1R,IJ_SXP2_Y_ZP1+2,II_SXP2_Y_ZP1,IK_SXP2_Y_ZP1) @@ -749,7 +967,9 @@ SUBROUTINE FLAT_INVZ(HLBCX,HLBCY,PDXHATM,PDYHATM,PRHOM,PAF,PBF,PCF, & ! ! Transposition Y-> X ! +!$acc kernels ZBAND_SX_YP2_ZP1(:,:,:) = 0. +!$acc end kernels CALL SECOND_MNH2(ZT0) CALL REMAP_SXP2_Y_ZP1_SX_YP2_ZP1_ll(ZBAND_SXP2_Y_ZP1R,ZBAND_SX_YP2_ZP1,IINFO_ll) CALL SECOND_MNH2(ZT1) @@ -757,15 +977,16 @@ SUBROUTINE FLAT_INVZ(HLBCX,HLBCY,PDXHATM,PDYHATM,PRHOM,PAF,PBF,PCF, & ! IF (HLBCX(1) == 'CYCL') THEN ! re-set (N+1) values with (2) values ( stored here to avoid to lost them ) +!$acc kernels ZBAND_SX_YP2_ZP1(INC2_SX_YP2_ZP1-1,:,:) = ZBAND_SX_YP2_ZP1(2,:,:) ZBAND_SX_YP2_ZP1(2,:,:) = 0.0 +!$acc end kernels ZBAND1D( 1 : SIZE(ZBAND_SX_YP2_ZP1) ) => ZBAND_SX_YP2_ZP1(:,:,:) ZWORK1D( 1 : SIZE(ZWORK_SX_YP2_ZP1) ) => ZWORK_SX_YP2_ZP1(:,:,:) CALL FFT991( ZBAND1D, ZWORK1D, PTRIGSX, KIFAXX, INC1_SX_YP2_ZP1, INC2_SX_YP2_ZP1, IIMAX, & ILOT_SX_YP2_ZP1, +1, SIZE(ZBAND1D), SIZE(ZWORK1D), size(PTRIGSX) ) ELSE - CALL FFT55(ZBAND_SX_YP2_ZP1,ZWORK_SX_YP2_ZP1,PTRIGSX,KIFAXX,INC2_SX_YP2_ZP1, & - IIMAX,ILOT_SX_YP2_ZP1,+1 ) + CALL FFT55( ZBAND_SX_YP2_ZP1, ZWORK_SX_YP2_ZP1, PTRIGSX, KIFAXX, INC2_SX_YP2_ZP1, IIMAX, ILOT_SX_YP2_ZP1, +1 ) END IF END IF ! NZ_SPLITTING @@ -790,13 +1011,15 @@ SUBROUTINE FLAT_INVZ(HLBCX,HLBCY,PDXHATM,PDYHATM,PRHOM,PAF,PBF,PCF, & CALL REMAP_SX_YP2_ZP1_SXP1_YP2_Z_ll(ZBAND_SX_YP2_ZP1,ZBAND_SXP1_YP2_Z,IINFO_ll) CALL SECOND_MNH2(ZT1) TIMEZ%T_MAP_SX_YP2_ZP1_B = TIMEZ%T_MAP_SX_YP2_ZP1_B + ZT1 - ZT0 - IF ( IAND(NZ_SPLITTING,1) > 0 ) THEN - ! for test save 2D value - CALL GET_HALO(ZBAND_B) - ZBAND_BR = PF_1_Y - ZBAND_B - END IF +! IF ( IAND(NZ_SPLITTING,1) > 0 ) THEN +! ! for test save 2D value +! CALL GET_HALO(ZBAND_B) +! ZBAND_BR(:,:,:) = PF_1_Y(:,:,:) - ZBAND_B(:,:,:) +! END IF +!$acc kernels PF_1_Y(IIBI:IIEI,IJBI:IJEI,:) = ZBAND_SXP1_YP2_Z(:,:,:) !PF_1_Y = ZBAND_B +!$acc end kernels END IF ! !* 7.2 complete the lateral boundaries @@ -808,34 +1031,42 @@ SUBROUTINE FLAT_INVZ(HLBCX,HLBCY,PDXHATM,PDYHATM,PRHOM,PAF,PBF,PCF, & ZDXM2 = PDXHATM*PDXHATM ! IF (LWEST_ll(HSPLITTING='B')) THEN +!$acc kernels DO JK=IKB,IKE DO JJ = IJB,IJE PF_1_Y(IIB-1,JJ,JK) = PF_1_Y(IIB,JJ,JK) - PY(IIB-1,JJ,JK)*ZDXM2/PRHOM(JK) END DO END DO +!$acc end kernels END IF ! IF (LEAST_ll(HSPLITTING='B')) THEN +!$acc kernels DO JK=IKB,IKE DO JJ = IJB,IJE PF_1_Y(IIE+1,JJ,JK) = PF_1_Y(IIE,JJ,JK) + PY(IIE+1,JJ,JK)*ZDXM2/PRHOM(JK) END DO END DO +!$acc end kernels END IF ! ! we set the solution at the corner point by the condition: ! dxm ( P ) = 0 IF (LWEST_ll(HSPLITTING='B')) THEN +!$acc kernels DO JJ = IJB,IJE PF_1_Y(IIB-1,JJ,IKB-1) = PF_1_Y(IIB,JJ,IKB-1) PF_1_Y(IIB-1,JJ,IKE+1) = PF_1_Y(IIB,JJ,IKE+1) END DO +!$acc end kernels END IF IF (LEAST_ll(HSPLITTING='B')) THEN +!$acc kernels DO JJ = IJB,IJE PF_1_Y(IIE+1,JJ,IKB-1) = PF_1_Y(IIE,JJ,IKB-1) PF_1_Y(IIE+1,JJ,IKE+1) = PF_1_Y(IIE,JJ,IKE+1) END DO +!$acc end kernels END IF ! ELSE @@ -855,35 +1086,43 @@ SUBROUTINE FLAT_INVZ(HLBCX,HLBCY,PDXHATM,PDYHATM,PRHOM,PAF,PBF,PCF, & ZDYM2 = PDYHATM*PDYHATM ! IF (LSOUTH_ll(HSPLITTING='B')) THEN +!$acc kernels DO JK=IKB,IKE DO JI = IIB,IIE PF_1_Y(JI,IJB-1,JK) = PF_1_Y(JI,IJB,JK) - PY(JI,IJB-1,JK)*ZDYM2/PRHOM(JK) END DO END DO +!$acc end kernels END IF ! IF (LNORTH_ll(HSPLITTING='B')) THEN +!$acc kernels DO JK=IKB,IKE DO JI = IIB,IIE PF_1_Y(JI,IJE+1,JK) = PF_1_Y(JI,IJE,JK) + PY(JI,IJE+1,JK)*ZDYM2/PRHOM(JK) END DO END DO +!$acc end kernels END IF ! we set the solution at the corner point by the condition: ! dym ( P ) = 0 ! IF (LSOUTH_ll(HSPLITTING='B')) THEN +!$acc kernels DO JI = IIB,IIE PF_1_Y(JI,IJB-1,IKB-1) = PF_1_Y(JI,IJB,IKB-1) PF_1_Y(JI,IJB-1,IKE+1) = PF_1_Y(JI,IJB,IKE+1) END DO +!$acc end kernels END IF ! IF (LNORTH_ll(HSPLITTING='B')) THEN +!$acc kernels DO JI = IIB,IIE PF_1_Y(JI,IJE+1,IKB-1) = PF_1_Y(JI,IJE,IKB-1) PF_1_Y(JI,IJE+1,IKE+1) = PF_1_Y(JI,IJE,IKE+1) END DO +!$acc end kernels END IF ELSE ! @@ -898,25 +1137,37 @@ SUBROUTINE FLAT_INVZ(HLBCX,HLBCY,PDXHATM,PDYHATM,PRHOM,PAF,PBF,PCF, & END IF ! !JUAN Z_SPLITTING - ! +#ifndef MNH_OPENACC CALL GET_HALO( PF_1_Y, HNAME='PF_1_Y' ) +#else + CALL GET_HALO_D( PF_1_Y, HNAME='UPDATE_HALO_ll::GET_HALO::PF_1_Y' ) +#endif + ! !JUAN Z_SPLITTING IF (.NOT. L2D .AND. HLBCX(1)/='CYCL' .AND. HLBCY(1)/='CYCL') THEN ! the following verticals are not used IF ( (LWEST_ll(HSPLITTING='B')).AND.(LSOUTH_ll(HSPLITTING='B')) ) THEN +!$acc kernels PF_1_Y(IIB-1,IJB-1,:)=0. +!$acc end kernels END IF ! IF ( (LWEST_ll(HSPLITTING='B')).AND.(LNORTH_ll(HSPLITTING='B')) ) THEN +!$acc kernels PF_1_Y(IIB-1,IJE+1,:)=0. +!$acc end kernels END IF ! IF ( (LEAST_ll(HSPLITTING='B')).AND.(LSOUTH_ll(HSPLITTING='B')) ) THEN +!$acc kernels PF_1_Y(IIE+1,IJB-1,:)=0. +!$acc end kernels END IF ! IF ( (LEAST_ll(HSPLITTING='B')).AND.(LNORTH_ll(HSPLITTING='B')) ) THEN +!$acc kernels PF_1_Y(IIE+1,IJE+1,:)=0. +!$acc end kernels END IF END IF ! @@ -934,6 +1185,10 @@ SUBROUTINE FLAT_INVZ(HLBCX,HLBCY,PDXHATM,PDYHATM,PRHOM,PAF,PBF,PCF, & !ZY_B = SQRT(ZY_B * ZY_B) !print*,"MAX ZY_B=",MAXVAL(ZY_B) ! + +!$acc end data + +#ifndef MNH_OPENACC IF ( IAND(NZ_SPLITTING,1) > 0 ) THEN DEALLOCATE(ZBAND_X) DEALLOCATE(ZBAND_Y) @@ -952,18 +1207,27 @@ SUBROUTINE FLAT_INVZ(HLBCX,HLBCY,PDXHATM,PDYHATM,PRHOM,PAF,PBF,PCF, & DEALLOCATE(ZBAND_SX_YP2_ZP1) DEALLOCATE(ZBAND_SXP2_Y_ZP1) DEALLOCATE(ZBAND_SXP2_Y_ZP1R) +! DEALLOCATE(ZBAND_B) DEALLOCATE(ZWORK_SX_YP2_ZP1) DEALLOCATE(ZWORK_SXP2_Y_ZP1) IF (.NOT. L2D) THEN DEALLOCATE(ZBAND_SXP2_Y_ZP1T) DEALLOCATE(ZBAND_SXP2_Y_ZP1RT) END IF +! DEALLOCATE(ZAF_B) +! DEALLOCATE(ZGAM_B) +! DEALLOCATE(ZBAND_BR) +! DEALLOCATE(ZBETX_B) ! JUAN P1/P2 SPLITTING DEALLOCATE(ZAF_SXP2_YP1_Z) DEALLOCATE(ZGAM_SXP2_YP1_Z) DEALLOCATE(ZBAND_SXP2_YP1_ZR) DEALLOCATE(ZBETX_SXP2_YP1_Z) END IF +#else + !Release all memory allocated with MNH_MEM_GET calls since last call to MNH_MEM_POSITION_PIN + CALL MNH_MEM_RELEASE() +#endif !JUAN SCALASCA TEST ! CALL MPI_BARRIER(NTRANS_COM, IERROR) !JUAN SCALASCA TEST @@ -979,6 +1243,9 @@ CONTAINS INTEGER :: IJI,II,IJ,IIJ ! index in array X and XT INTEGER :: JK ! +!$acc data present( PX, PXT ) + +!$acc kernels DO JK=1,KNK ! PERMUTATION(PX,PXT) !CDIR NODEP @@ -994,7 +1261,10 @@ CONTAINS END DO END DO - ! +!$acc end kernels + +!$acc end data + ! END SUBROUTINE FAST_TRANSPOSE SUBROUTINE FAST_SUBSTITUTION_3D(PBAND_YR,PBETX,PPBF,PGAM,PPCF,PAF & @@ -1011,6 +1281,9 @@ CONTAINS ! initialization ! ! +!$acc data present( PBAND_YR, PGAM, PBAND_Y, PPBF, PAF, PBETX, PPCF ) + +!$acc kernels PBAND_YR = 0.0 PBETX(:) = PPBF(:,IKB-1) PBAND_YR(:,IKB-1) = PBAND_Y(:,IKB-1) & @@ -1037,7 +1310,10 @@ CONTAINS PBAND_YR(:,JK) = PBAND_YR(:,JK) - & PGAM(:,JK+1)*PBAND_YR(:,JK+1) END DO - ! +!$acc end kernels + +!$acc end data + ! ! END SUBROUTINE FAST_SUBSTITUTION_3D ! @@ -1058,6 +1334,9 @@ CONTAINS ! initialization ! ! +!$acc data present( PBAND_YR, PGAM, PBAND_Y, PPBF, PAF, PBETX, PPCF ) + +!$acc kernels PBAND_YR = 0.0 PBETX(:,1) = PPBF(:,1,IKB-1) PBAND_YR(:,1,IKB-1) = PBAND_Y(:,1,IKB-1) & @@ -1083,7 +1362,10 @@ CONTAINS PBAND_YR(:,1,JK) = PBAND_YR(:,1,JK) - & PGAM(:,1,JK+1)*PBAND_YR(:,1,JK+1) END DO - ! +!$acc end kernels + +!$acc end data + ! ! END SUBROUTINE FAST_SUBSTITUTION_2D @@ -1094,11 +1376,17 @@ CONTAINS INTEGER :: JIJ,JK ! +!$acc data present( PTAB1D, PTAB3D ) + +!$acc kernels DO JK=1,KKU DO JIJ=1,KIY*KJY PTAB3D(JIJ,JK) = PTAB1D(JK) ENDDO ENDDO +!$acc end kernels + +!$acc end data ! END SUBROUTINE FAST_SPREAD ! diff --git a/src/MNH/gdiv.f90 b/src/MNH/gdiv.f90 index b22065908c6e897d61706999fd1374582060f1d4..1ae26eddfe5067825a40df141381cc574532764b 100644 --- a/src/MNH/gdiv.f90 +++ b/src/MNH/gdiv.f90 @@ -1,4 +1,4 @@ -!MNH_LIC Copyright 1994-2020 CNRS, Meteo-France and Universite Paul Sabatier +!MNH_LIC Copyright 1994-2022 CNRS, Meteo-France and Universite Paul Sabatier !MNH_LIC This is part of the Meso-NH software governed by the CeCILL-C licence !MNH_LIC version 1. See LICENSE, CeCILL-C_V1-en.txt and CeCILL-C_V1-fr.txt !MNH_LIC for details. version 1. @@ -32,6 +32,30 @@ REAL, DIMENSION(:,:,:), INTENT(OUT) :: PGDIV ! divergence at ! a mass point ! END SUBROUTINE GDIV +! + SUBROUTINE GDIV_DEVICE(HLBCX,HLBCY,PDXX,PDYY,PDZX,PDZY,PDZZ,PU,PV,PW,PGDIV) +! +IMPLICIT NONE +! +CHARACTER (LEN=4), DIMENSION(2), INTENT(IN) :: HLBCX ! x-direction LBC type +CHARACTER (LEN=4), DIMENSION(2), INTENT(IN) :: HLBCY ! y-direction LBC type +! + ! Metric coefficients: +REAL, DIMENSION(:,:,:), INTENT(IN) :: PDXX ! d*xx +REAL, DIMENSION(:,:,:), INTENT(IN) :: PDYY ! d*yy +REAL, DIMENSION(:,:,:), INTENT(IN) :: PDZX ! d*zx +REAL, DIMENSION(:,:,:), INTENT(IN) :: PDZY ! d*zy +REAL, DIMENSION(:,:,:), INTENT(IN) :: PDZZ ! d*zz +! + ! Field components +REAL, DIMENSION(:,:,:), INTENT(INOUT) :: PU ! along x +REAL, DIMENSION(:,:,:), INTENT(INOUT) :: PV ! along y +REAL, DIMENSION(:,:,:), INTENT(INOUT) :: PW ! along z +! +REAL, DIMENSION(:,:,:), INTENT(OUT) :: PGDIV ! divergence at + ! a mass point +! +END SUBROUTINE GDIV_DEVICE ! END INTERFACE ! @@ -106,6 +130,8 @@ USE MODI_CONTRAV ! USE MODE_ll ! +USE MODE_MPPDB +! IMPLICIT NONE ! !* 0.1 declarations of arguments @@ -147,6 +173,18 @@ INTEGER :: JI,JJ,JK ! loop indexes ! ! !------------------------------------------------------------------------------- +IF (MPPDB_INITIALIZED) THEN + !Check all IN arrays + CALL MPPDB_CHECK(PDXX,"GDIV beg:PDXX") + CALL MPPDB_CHECK(PDYY,"GDIV beg:PDYY") + CALL MPPDB_CHECK(PDZZ,"GDIV beg:PDZZ") + !Check all INOUT arrays + CALL MPPDB_CHECK(PDZX,"GDIV beg:PDZX") + CALL MPPDB_CHECK(PDZY,"GDIV beg:PDZY") + CALL MPPDB_CHECK(PU,"GDIV beg:PU") + CALL MPPDB_CHECK(PV,"GDIV beg:PV") + CALL MPPDB_CHECK(PW,"GDIV beg:PW") +END IF ! !* 1. COMPUTE LOOP BOUNDS ! ------------------- @@ -180,8 +218,10 @@ CALL CONTRAV(HLBCX,HLBCY,PU,PV,PW,PDXX,PDYY,PDZZ,PDZX,PDZY,ZUC,ZVC,ZWC,4) !* 3. COMPUTE THE DIVERGENCE ! ---------------------- ! -PGDIV=0. !usefull for the four corners and halo zones +PGDIV=0. !useful for the four corners and halo zones ! +Z1(:,:,:)=0.;Z2(:,:,:)=0.;Z3(:,:,:)=0. !Set to 0. to allow check with mppdb_check + Z1(IIB:IIE,:,:)=ZUC(IIB+1:IIE+1,:,:)-ZUC(IIB:IIE,:,:) Z2(:,IJB:IJE,:)=ZVC(:,IJB+1:IJE+1,:)-ZVC(:,IJB:IJE,:) Z3(:,:,IKB:IKE)=ZWC(:,:,IKB+1:IKE+1)-ZWC(:,:,IKB:IKE) @@ -190,7 +230,7 @@ PGDIV(IIB:IIE,IJB:IJE,IKB:IKE)= Z1(IIB:IIE,IJB:IJE,IKB:IKE) + & Z2(IIB:IIE,IJB:IJE,IKB:IKE) + & Z3(IIB:IIE,IJB:IJE,IKB:IKE) ! only the divergences computed - ! in the inner mass points are meaningful + ! in the inner mass points are meaningful ! !------------------------------------------------------------------------------- ! @@ -273,6 +313,386 @@ IF (.NOT. L2D .AND. HLBCY(2) /= 'CYCL' .AND. LNORTH_ll()) THEN END IF ! ! +! +IF (MPPDB_INITIALIZED) THEN + !Check all INOUT arrays + CALL MPPDB_CHECK(PDZX,"GDIV end:PDZX") + CALL MPPDB_CHECK(PDZY,"GDIV end:PDZY") + CALL MPPDB_CHECK(PU,"GDIV end:PU") + CALL MPPDB_CHECK(PV,"GDIV end:PV") + CALL MPPDB_CHECK(PW,"GDIV end:PW") + !Check all OUT arrays + CALL MPPDB_CHECK(PGDIV,"GDIV end:PGDIV") +END IF !------------------------------------------------------------------------------- ! END SUBROUTINE GDIV + +#ifdef MNH_OPENACC +! #################################################################### + SUBROUTINE GDIV_DEVICE(HLBCX,HLBCY,PDXX,PDYY,PDZX,PDZY,PDZZ,PU,PV,PW,PGDIV) +! #################################################################### +! +!!**** *GDIV * - Compute J times the divergence of 1/J times a vector defined +!! by its cartesian components +!! +!! PURPOSE +!! ------- +! The purpose of this function is to compute J times the divergence of +! 1/J times a vector which cartesian components are (U, V, W). The result +! is localized at a mass point: +! +! GDIV = dxf (UC) + dyf (VC) + dzf (WC) +! +! where UC, VC, WC are the contravariant components of the vector. +! The array is completed outside the physical domain by the value of the +! normal component at the boundary. +! +!!** METHOD +!! ------ +!! First, we compute the contravariant components by using +!! the suboutine CONTRAV (The metric coefficients are dummy arguments). Then +!! we use the Shuman finite difference operators DXF, DYF, DZF to compute +!! the divergence. The result is localized at a mass point. +!! +!! EXTERNAL +!! -------- +!! SUBROUTINE CONTRAV : compute the contavariant components +!! +!! IMPLICIT ARGUMENTS +!! ------------------ +!! Module MODD_PARAMETERS: declaration of parameter variables +!! JPHEXT, JPVEXT: define the number of marginal points out of the +!! physical domain along horizontal and vertical directions respectively +!! Module MODD_CONF: model configurations +!! L2D: logical for 2D model version +!! Module MODI_SHUMAN : interface for the Shuman operators +!! Module MODI_CONTRAV : interface for the contravariant components +!! +!! REFERENCE +!! --------- +!! Book2 of documentation (routine GDIV) +!! +!! +!! AUTHOR +!! ------ +!! P. Hereil and J. Stein * Meteo France * +!! +!! MODIFICATIONS +!! ------------- +!! Original 11/07/94 +!! 17/07/97 ( J. Stein and V. Masson) initialize the corner +!! verticals +!! 30/09/97 ( J. Stein ) bug correction for the case of +!! non-vanishing oro. at the open lbc +!! Modification 15/06/98 (D.Lugato, R.Guivarch) Parallelisation +!! 22/08/02 (P Jabouille) simplification of parallel coding +!------------------------------------------------------------------------------- +! +!* 0. DECLARATIONS +! ------------ +! +USE MODD_PARAMETERS +USE MODD_CONF +USE MODI_CONTRAV +! +USE MODE_ll +! +#ifdef MNH_OPENACC +USE MODE_MNH_ZWORK, ONLY: MNH_MEM_GET, MNH_MEM_POSITION_PIN, MNH_MEM_RELEASE +#endif +USE MODE_MPPDB +! +IMPLICIT NONE +! +!* 0.1 declarations of arguments +! +CHARACTER (LEN=4), DIMENSION(2), INTENT(IN) :: HLBCX ! x-direction LBC type +CHARACTER (LEN=4), DIMENSION(2), INTENT(IN) :: HLBCY ! y-direction LBC type +! + ! Metric coefficients: +REAL, DIMENSION(:,:,:), INTENT(IN) :: PDXX ! d*xx +REAL, DIMENSION(:,:,:), INTENT(IN) :: PDYY ! d*yy +REAL, DIMENSION(:,:,:), INTENT(IN) :: PDZX ! d*zx +REAL, DIMENSION(:,:,:), INTENT(IN) :: PDZY ! d*zy +REAL, DIMENSION(:,:,:), INTENT(IN) :: PDZZ ! d*zz +! + ! Field components +REAL, DIMENSION(:,:,:), INTENT(INOUT) :: PU ! along x +REAL, DIMENSION(:,:,:), INTENT(INOUT) :: PV ! along y +REAL, DIMENSION(:,:,:), INTENT(INOUT) :: PW ! along z +! +REAL, DIMENSION(:,:,:), INTENT(OUT) :: PGDIV ! divergence at + ! a mass point +! +!* 0.2 declarations of local variables +! + ! Contravariant components along: +REAL, DIMENSION(:,:,:) , POINTER , CONTIGUOUS :: ZUC ! x +REAL, DIMENSION(:,:,:) , POINTER , CONTIGUOUS :: ZVC ! y +REAL, DIMENSION(:,:,:) , POINTER , CONTIGUOUS :: ZWC ! z +REAL, DIMENSION(:,:,:) , POINTER , CONTIGUOUS :: Z1,Z2,Z3 !work arrays +! +INTEGER :: IIB ! indice I for the first inner mass point along x +INTEGER :: IIE ! indice I for the last inner mass point along x +INTEGER :: IJB ! indice J for the first inner mass point along y +INTEGER :: IJE ! indice J for the last inner mass point along y +INTEGER :: IKB ! indice K for the first inner mass point along z +INTEGER :: IKE ! indice K for the last inner mass point along z +! +INTEGER :: JI,JJ,JK ! loop indexes +! +INTEGER :: IIU,IJU,IKU +! +LOGICAL :: GWEST,GEAST,GSOUTH,GNORTH +! +!------------------------------------------------------------------------------- +IF (MPPDB_INITIALIZED) THEN + !Check all IN arrays + CALL MPPDB_CHECK(PDXX,"GDIV beg:PDXX") + CALL MPPDB_CHECK(PDYY,"GDIV beg:PDYY") + CALL MPPDB_CHECK(PDZZ,"GDIV beg:PDZZ") + !Check all INOUT arrays + CALL MPPDB_CHECK(PDZX,"GDIV beg:PDZX") + CALL MPPDB_CHECK(PDZY,"GDIV beg:PDZY") + CALL MPPDB_CHECK(PU,"GDIV beg:PU") + CALL MPPDB_CHECK(PV,"GDIV beg:PV") + CALL MPPDB_CHECK(PW,"GDIV beg:PW") +END IF + +!$acc data present( PDXX, PDYY, PDZX, PDZY, PDZZ, PU, PV, PW, PGDIV ) + +! +!* 1. COMPUTE LOOP BOUNDS +! ------------------- +! +! +CALL GET_INDICE_ll(IIB,IJB,IIE,IJE) +IKB=1+JPVEXT +IKE=SIZE(PU,3) - JPVEXT +! +! +IIU=SIZE(PU,1) +IJU=SIZE(PU,2) +IKU=SIZE(PU,3) +! +#ifndef MNH_OPENACC +ALLOCATE(ZUC(IIU,IJU,IKU),ZVC(IIU,IJU,IKU),ZWC(IIU,IJU,IKU)) +ALLOCATE(Z1(IIU,IJU,IKU),Z2(IIU,IJU,IKU),Z3(IIU,IJU,IKU)) +#else +!Pin positions in the pools of MNH memory +CALL MNH_MEM_POSITION_PIN() + +CALL MNH_MEM_GET( ZUC, IIU, IJU, IKU ) +CALL MNH_MEM_GET( ZVC, IIU, IJU, IKU ) +CALL MNH_MEM_GET( ZWC, IIU, IJU, IKU ) +CALL MNH_MEM_GET( Z1, IIU, IJU, IKU ) +CALL MNH_MEM_GET( Z2, IIU, IJU, IKU ) +CALL MNH_MEM_GET( Z3, IIU, IJU, IKU ) +!$acc data present( ZUC, ZVC, ZWC, Z1, Z2, Z3 ) +#endif +! +GWEST = ( HLBCX(1) /= 'CYCL' .AND. LWEST_ll() ) +GEAST = ( HLBCX(2) /= 'CYCL' .AND. LEAST_ll() ) +GSOUTH = ( .NOT. L2D .AND. HLBCY(1) /= 'CYCL' .AND. LSOUTH_ll() ) +GNORTH = ( .NOT. L2D .AND. HLBCY(2) /= 'CYCL' .AND. LNORTH_ll() ) +! +!------------------------------------------------------------------------------- +! +!* 2. COMPUTE THE CONTRAVARIANT COMPONENTS +! ------------------------------------ +! +!* 2.1 prepare the boundary conditions +! + +! +!$acc kernels + DO CONCURRENT ( JI=1:IIU,JJ=1:IJU ) + PU(JI,JJ,IKB-1)=PU(JI,JJ,IKB) + PU(JI,JJ,IKE+1)=PU(JI,JJ,IKE) + PV(JI,JJ,IKB-1)=PV(JI,JJ,IKB) + PV(JI,JJ,IKE+1)=PV(JI,JJ,IKE) + END DO +!$acc end kernels +! +! +!* 2.1 compute the contravariant components +! +#ifndef MNH_OPENACC +CALL CONTRAV(HLBCX,HLBCY,PU,PV,PW,PDXX,PDYY,PDZZ,PDZX,PDZY,ZUC,ZVC,ZWC,4) +#else +CALL CONTRAV_DEVICE(HLBCX,HLBCY,PU,PV,PW,PDXX,PDYY,PDZZ,PDZX,PDZY,ZUC,ZVC,ZWC,4, & + ODATA_ON_DEVICE=.TRUE.) +#endif +! +!------------------------------------------------------------------------------- +! +!* 3. COMPUTE THE DIVERGENCE +! ---------------------- +! +!$acc kernels +Z1(:,:,:)=0. +Z2(:,:,:)=0. +Z3(:,:,:)=0. + +#ifdef MNH_COMPILER_NVHPC +!$acc loop independent collapse(3) +#endif +DO CONCURRENT (JI=1:IIU,JJ=1:IJU,JK=1:IKU) + PGDIV(JI,JJ,JK)=0. !useful for the four corners and halo zones +ENDDO +! +#ifdef MNH_COMPILER_NVHPC +!$acc loop independent collapse(3) +#endif +DO CONCURRENT (JI=IIB:IIE,JJ=1:IJU,JK=1:IKU) + Z1(JI,JJ,JK)=ZUC(JI+IIB+1-(IIB) ,JJ,JK)-ZUC(JI,JJ,JK) +ENDDO +#ifdef MNH_COMPILER_NVHPC +!$acc loop independent collapse(3) +#endif +DO CONCURRENT (JI=1:IIU,JJ=IJB:IJE,JK=1:IKU) + Z2(JI,JJ,JK)=ZVC(JI,JJ+IJB+1-(IJB) ,JK)-ZVC(JI,JJ,JK) +ENDDO +#ifdef MNH_COMPILER_NVHPC +!$acc loop independent collapse(3) +#endif +DO CONCURRENT (JI=1:IIU,JJ=1:IJU,JK=IKB:IKE) + Z3(JI,JJ,JK)=ZWC(JI,JJ,JK+IKB+1-(IKB) )-ZWC(JI,JJ,JK) +ENDDO +! +PGDIV(IIB:IIE,IJB:IJE,IKB:IKE)= Z1(IIB:IIE,IJB:IJE,IKB:IKE) + & + Z2(IIB:IIE,IJB:IJE,IKB:IKE) + & + Z3(IIB:IIE,IJB:IJE,IKB:IKE) + ! only the divergences computed + ! in the inner mass points are meaningful +!$acc end kernels +! +!------------------------------------------------------------------------------- +! +!* 4. SET DIVERGENCE AT THE OUTER POINTS +! ---------------------------------- +! +!* 4.1 set divergence at the upper and lower boundary +! +! we set the divergence equal to the vertical contravariant component above +! and under the physical domain +!$acc kernels async +DO JJ=IJB,IJE + DO JI=IIB,IIE + PGDIV(JI,JJ,IKB-1)=ZWC(JI,JJ,IKB) + PGDIV(JI,JJ,IKE+1)=ZWC(JI,JJ,IKE+1) + END DO +END DO +!$acc end kernels +! +!* 4.2 set divergence at the lateral boundaries +! +! we set the divergence equal to the horizontal contravariant component at +! the right and the left of the physical domain in both horizontal directions +! for non-periodic cases +! +IF( GWEST ) THEN + !$acc kernels async + DO JK=IKB,IKE + DO JJ=IJB,IJE + PGDIV(IIB-1,JJ,JK)=ZUC(IIB,JJ,JK) + END DO + END DO + !$acc end kernels +END IF +! +IF( GEAST ) THEN + !$acc kernels async + DO JK=IKB,IKE + DO JJ=IJB,IJE + PGDIV(IIE+1,JJ,JK)=ZUC(IIE+1,JJ,JK) + END DO + END DO + !$acc end kernels +END IF +! +! +IF ( GSOUTH ) THEN + !$acc kernels async + DO JK=IKB,IKE + DO JI=IIB,IIE + PGDIV(JI,IJB-1,JK)=ZVC(JI,IJB,JK) + END DO + END DO + !$acc end kernels +END IF +! +IF ( GNORTH ) THEN + !$acc kernels async + DO JK=IKB,IKE + DO JI=IIB,IIE + PGDIV(JI,IJE+1,JK)=ZVC(JI,IJE+1,JK) + END DO + END DO + !$acc end kernels +END IF +! +! wait on GPU for all boundary condition update +!$acc wait +! +!* 4.3 set divergence at the corner points +! +! it is the following of the condition of copy the horizontal component +! under the bottom of the model +! +IF( GWEST ) THEN + !$acc kernels async + PGDIV(IIB-1,IJB:IJE,IKB-1)=PGDIV(IIB-1,IJB:IJE,IKB) + PGDIV(IIB-1,IJB:IJE,IKE+1)=PGDIV(IIB-1,IJB:IJE,IKE) + !$acc end kernels +END IF +! +IF ( GEAST ) THEN + !$acc kernels async + PGDIV(IIE+1,IJB:IJE,IKB-1)=PGDIV(IIE+1,IJB:IJE,IKB) + PGDIV(IIE+1,IJB:IJE,IKE+1)=PGDIV(IIE+1,IJB:IJE,IKE) + !$acc end kernels +END IF +! +IF ( GSOUTH ) THEN + !$acc kernels async + PGDIV(IIB:IIE,IJB-1,IKB-1)=PGDIV(IIB:IIE,IJB-1,IKB) + PGDIV(IIB:IIE,IJB-1,IKE+1)=PGDIV(IIB:IIE,IJB-1,IKE) + !$acc end kernels +END IF +! +IF ( GNORTH ) THEN + !$acc kernels async + PGDIV(IIB:IIE,IJE+1,IKB-1)=PGDIV(IIB:IIE,IJE+1,IKB) + PGDIV(IIB:IIE,IJE+1,IKE+1)=PGDIV(IIB:IIE,IJE+1,IKE) + !$acc end kernels +END IF +! +! wait on GPU for all corner update +!$acc wait +! +#ifndef MNH_OPENACC +DEALLOCATE( ZUC, ZVC, ZWC, Z1, Z2, Z3 ) +#else +!$acc end data +!Release all memory allocated with MNH_MEM_GET calls since last call to MNH_MEM_POSITION_PIN +CALL MNH_MEM_RELEASE() +#endif + +!$acc end data + +IF (MPPDB_INITIALIZED) THEN + !Check all INOUT arrays + CALL MPPDB_CHECK(PDZX,"GDIV end:PDZX") + CALL MPPDB_CHECK(PDZY,"GDIV end:PDZY") + CALL MPPDB_CHECK(PU,"GDIV end:PU") + CALL MPPDB_CHECK(PV,"GDIV end:PV") + CALL MPPDB_CHECK(PW,"GDIV end:PW") + !Check all OUT arrays + CALL MPPDB_CHECK(PGDIV,"GDIV end:PGDIV") +END IF +!------------------------------------------------------------------------------- +! +END SUBROUTINE GDIV_DEVICE +#endif diff --git a/src/MNH/mass_leak.f90 b/src/MNH/mass_leak.f90 index 20faa17c6e1866af765ce6512fd804dad5dda48c..5dbbe0e5be5703aa61076158b65b085611ea5233 100644 --- a/src/MNH/mass_leak.f90 +++ b/src/MNH/mass_leak.f90 @@ -1,4 +1,4 @@ -!MNH_LIC Copyright 1995-2021 CNRS, Meteo-France and Universite Paul Sabatier +!MNH_LIC Copyright 1995-2022 CNRS, Meteo-France and Universite Paul Sabatier !MNH_LIC This is part of the Meso-NH software governed by the CeCILL-C licence !MNH_LIC version 1. See LICENSE, CeCILL-C_V1-en.txt and CeCILL-C_V1-fr.txt !MNH_LIC for details. version 1. @@ -104,6 +104,9 @@ USE MODD_PARAMETERS ! USE MODE_ll USE MODE_MPPDB +#ifdef MNH_OPENACC +USE MODE_MNH_ZWORK, ONLY: MNH_MEM_GET, MNH_MEM_POSITION_PIN, MNH_MEM_RELEASE +#endif !JUAN USE MODE_REPRO_SUM !JUAN @@ -128,7 +131,11 @@ REAL, DIMENSION(:,:,:), INTENT(INOUT) :: PRVS ! momentum tendencies ! !JUAN16 REAL :: ZLEAK ! total leak of mass +#ifndef MNH_OPENACC REAL, ALLOCATABLE, DIMENSION (:,:) :: ZLEAK_W_2D , ZLEAK_E_2D , ZLEAK_S_2D , ZLEAK_N_2D +#else +REAL, POINTER, CONTIGUOUS, DIMENSION (:,:) :: ZLEAK_W_2D , ZLEAK_E_2D , ZLEAK_S_2D , ZLEAK_N_2D +#endif !JUAN16 REAL :: ZUSTOP ! wind correction! @@ -143,9 +150,12 @@ INTEGER :: JJ ! Loop index in y direction INTEGER :: JK ! Loop index in z direction ! INTEGER :: IINFO_ll ! return code of parallel routine +! +LOGICAL :: GWEST,GEAST,GSOUTH,GNORTH REAL :: ZLEAK_W,ZLEAK_E,ZLEAK_S,ZLEAK_N -! +!$acc data present( PDXX, PDYY, PRHODJ, PRUS, PRVS ) + IF (MPPDB_INITIALIZED) THEN !Check all IN arrays CALL MPPDB_CHECK(PDXX, "MASS_LEAK beg:PDXX") @@ -165,6 +175,10 @@ CALL GET_INDICE_ll(IIB,IJB,IIE,IJE) IKB = 1 + JPVEXT IKE = SIZE(PRUS,3) - JPVEXT ! +GWEST = ( HLBCX(1) /= 'CYCL' .AND. LWEST_ll() ) +GEAST = ( HLBCX(2) /= 'CYCL' .AND. LEAST_ll() ) +GSOUTH = ( HLBCY(1) /= 'CYCL' .AND. LSOUTH_ll() ) +GNORTH = ( HLBCY(2) /= 'CYCL' .AND. LNORTH_ll() ) ! !------------------------------------------------------------------------------- ! @@ -177,52 +191,89 @@ ZLEAK_W=0. ZLEAK_S=0. ZLEAK_N=0. ! +#ifndef MNH_OPENACC IF( HLBCX(1) /= 'CYCL' ) THEN ALLOCATE( ZLEAK_W_2D(IIB:IIB,IJB:IJE)) - ZLEAK_W_2D = 0.0 - IF (LWEST_ll()) THEN + ALLOCATE( ZLEAK_E_2D(IIE+1:IIE+1,IJB:IJE)) +END IF +IF( HLBCY(1) /= 'CYCL' ) THEN + ALLOCATE( ZLEAK_S_2D(IIB:IIE,IJB:IJB)) + ALLOCATE( ZLEAK_N_2D(IIB:IIE,IJE+1:IJE+1)) +END IF +#else +!Pin positions in the pools of MNH memory +CALL MNH_MEM_POSITION_PIN() +IF( HLBCX(1) /= 'CYCL' ) THEN + CALL MNH_MEM_GET(ZLEAK_W_2D , IIB ,IIB , IJB,IJE ) + CALL MNH_MEM_GET(ZLEAK_E_2D , IIE+1,IIE+1 , IJB,IJE ) +END IF +IF( HLBCY(1) /= 'CYCL' ) THEN + CALL MNH_MEM_GET(ZLEAK_S_2D , IIB,IIE , IJB ,IJB ) + CALL MNH_MEM_GET(ZLEAK_N_2D , IIB,IIE , IJE+1,IJE+1 ) +END IF +#endif +! +IF( HLBCX(1) /= 'CYCL' ) THEN + !$acc kernels present(ZLEAK_W_2D) async + ZLEAK_W_2D(:,:) = 0.0 + IF( GWEST ) THEN + !$acc loop seq DO JK=IKB,IKE DO JJ=IJB,IJE ZLEAK_W_2D(IIB,JJ) = ZLEAK_W_2D(IIB,JJ) - 1./PDXX(IIB,JJ,JK) *PRUS(IIB,JJ,JK) END DO - END DO + END DO END IF -! - ALLOCATE( ZLEAK_E_2D(IIE+1:IIE+1,IJB:IJE)) - ZLEAK_E_2D = 0.0 - IF (LEAST_ll()) THEN - DO JK=IKB,IKE - DO JJ=IJB,IJE - ZLEAK_E_2D(IIE+1,JJ) = ZLEAK_E_2D(IIE+1,JJ) + 1./PDXX(IIE+1,JJ,JK)*PRUS(IIE+1,JJ,JK) - END DO - END DO - END IF -! + !$acc end kernels + ! + !$acc kernels present(ZLEAK_E_2D) async + ZLEAK_E_2D(:,:) = 0.0 + IF( GEAST ) THEN + !$acc loop seq + DO JK=IKB,IKE + DO JJ=IJB,IJE + ZLEAK_E_2D(IIE+1,JJ) = ZLEAK_E_2D(IIE+1,JJ) + 1./PDXX(IIE+1,JJ,JK)*PRUS(IIE+1,JJ,JK) + END DO + END DO + END IF + !$acc end kernels + ! + !$acc wait + ! + !$acc update host(ZLEAK_W_2D,ZLEAK_E_2D) ZLEAK_W = SUM_DD_R2_ll(ZLEAK_W_2D) ZLEAK_E = SUM_DD_R2_ll(ZLEAK_E_2D) END IF ! IF( HLBCY(1) /= 'CYCL' ) THEN - ALLOCATE( ZLEAK_S_2D(IIB:IIE,IJB:IJB)) - ZLEAK_S_2D = 0.0 - IF (LSOUTH_ll()) THEN - DO JI=IIB,IIE - DO JK=IKB,IKE + ! + !$acc kernels present(ZLEAK_S_2D) async + ZLEAK_S_2D(:,:) = 0.0 + IF( GSOUTH ) THEN + !$acc loop seq + DO JK=IKB,IKE + DO JI=IIB,IIE ZLEAK_S_2D(JI,IJB) = ZLEAK_S_2D(JI,IJB) - 1./PDYY(JI,IJB,JK) *PRVS(JI,IJB,JK) END DO END DO END IF + !$acc end kernels ! - ALLOCATE( ZLEAK_N_2D(IIB:IIE,IJE+1:IJE+1)) - ZLEAK_N_2D = 0.0 - IF (LNORTH_ll()) THEN - DO JI=IIB,IIE - DO JK=IKB,IKE + !$acc kernels present(ZLEAK_N_2D) async + ZLEAK_N_2D(:,:) = 0.0 + IF ( GNORTH ) THEN + !$acc loop seq + DO JK=IKB,IKE + DO JI=IIB,IIE ZLEAK_N_2D(JI,IJE+1) = ZLEAK_N_2D(JI,IJE+1) + 1./PDYY(JI,IJE+1,JK)*PRVS(JI,IJE+1,JK) END DO END DO END IF + !$acc end kernels + ! + !$acc wait ! + !$acc update host(ZLEAK_S_2D,ZLEAK_N_2D) ZLEAK_S = SUM_DD_R2_ll(ZLEAK_S_2D) ZLEAK_N = SUM_DD_R2_ll(ZLEAK_N_2D) END IF @@ -239,18 +290,31 @@ ZLEAK = ZLEAK_E + ZLEAK_W + ZLEAK_S + ZLEAK_N ZUSTOP=ZLEAK ZUSTOP=ZUSTOP/PLINMASS ! -IF (HLBCX(1)=='OPEN' .AND. LWEST_ll() ) & - PRUS(IIB,:,:)=PRUS(IIB,:,:)+ZUSTOP*0.5*(PRHODJ(IIB,:,:)+PRHODJ(IIB-1,:,:)) +IF (HLBCX(1)=='OPEN' .AND. LWEST_ll() ) THEN + !$acc kernels async + PRUS(IIB,:,:)=PRUS(IIB,:,:)+ZUSTOP*0.5*(PRHODJ(IIB,:,:)+PRHODJ(IIB-1,:,:)) + !$acc end kernels +END IF ! -IF (HLBCX(2)=='OPEN' .AND. LEAST_ll() ) & - PRUS(IIE+1,:,:)=PRUS(IIE+1,:,:)-ZUSTOP*0.5*(PRHODJ(IIE+1,:,:)+PRHODJ(IIE,:,:)) +IF (HLBCX(2)=='OPEN' .AND. LEAST_ll() ) THEN + !$acc kernels async + PRUS(IIE+1,:,:)=PRUS(IIE+1,:,:)-ZUSTOP*0.5*(PRHODJ(IIE+1,:,:)+PRHODJ(IIE,:,:)) + !$acc end kernels +END IF ! -IF (HLBCY(1)=='OPEN' .AND. LSOUTH_ll() ) & - PRVS(:,IJB,:)=PRVS(:,IJB,:)+ZUSTOP*0.5*(PRHODJ(:,IJB,:)+PRHODJ(:,IJB-1,:)) +IF (HLBCY(1)=='OPEN' .AND. LSOUTH_ll() ) THEN + !$acc kernels async + PRVS(:,IJB,:)=PRVS(:,IJB,:)+ZUSTOP*0.5*(PRHODJ(:,IJB,:)+PRHODJ(:,IJB-1,:)) + !$acc end kernels +END IF ! -IF (HLBCY(2)=='OPEN' .AND. LNORTH_ll() ) & - PRVS(:,IJE+1,:)=PRVS(:,IJE+1,:)-ZUSTOP*0.5*(PRHODJ(:,IJE+1,:)+PRHODJ(:,IJE,:)) +IF (HLBCY(2)=='OPEN' .AND. LNORTH_ll() ) THEN + !$acc kernels async + PRVS(:,IJE+1,:)=PRVS(:,IJE+1,:)-ZUSTOP*0.5*(PRHODJ(:,IJE+1,:)+PRHODJ(:,IJE,:)) + !$acc end kernels +END IF ! +!$acc wait ! ! IF (MPPDB_INITIALIZED) THEN @@ -258,6 +322,21 @@ IF (MPPDB_INITIALIZED) THEN CALL MPPDB_CHECK(PRUS,"MASS_LEAK end:PRUS") CALL MPPDB_CHECK(PRVS,"MASS_LEAK end:PRVS") END IF + +#ifndef MNH_OPENACC +IF( HLBCX(1) /= 'CYCL' ) THEN + DEALLOCATE(ZLEAK_W_2D,ZLEAK_E_2D) +END IF +IF( HLBCY(1) /= 'CYCL' ) THEN + DEALLOCATE( ZLEAK_S_2D,ZLEAK_N_2D) +END IF +#else +!Release all memory allocated with MNH_MEM_GET calls since last call to MNH_MEM_POSITION_PIN +CALL MNH_MEM_RELEASE() +#endif + +!$acc end data + !------------------------------------------------------------------------------- ! END SUBROUTINE MASS_LEAK diff --git a/src/MNH/modeln.f90 b/src/MNH/modeln.f90 index 321e097e185c1671a776e030e80f7173e741dbb8..effbaa577f42feb9b90f084fc635cbecf7767479 100644 --- a/src/MNH/modeln.f90 +++ b/src/MNH/modeln.f90 @@ -1816,6 +1816,9 @@ IF(.NOT. L1D) THEN XRVS_PRES = XRVS XRWS_PRES = XRWS ! +!$acc data copyin( XRHOM, XAF, XBFY, XCF, XTRIGSX, XTRIGSY, NIFAXX, NIFAXY, XBFB, XBF_SXP2_YP1_Z ) & +!$acc & copyin( XTHT, XRT, XRHODREF, XTHVREF, XRVREF, XEXNREF, XPABST ) & +!$acc & present( XRHODJ, XDXX, XDYY, XDZZ, XDZX, XDZY, XRUS, XRVS, XRWS ) CALL PRESSUREZ( CLBCX,CLBCY,CPRESOPT,NITR,LITRADJ,KTCOUNT, XRELAX,IMI, & XRHODJ,XDXX,XDYY,XDZZ,XDZX,XDZY,XDXHATM,XDYHATM,XRHOM, & XAF,XBFY,XCF,XTRIGSX,XTRIGSY,NIFAXX,NIFAXY, & @@ -1824,6 +1827,7 @@ IF(.NOT. L1D) THEN XRUS, XRVS, XRWS, XPABST, & XBFB,& XBF_SXP2_YP1_Z) !JUAN Z_SPLITING +!$acc end data ! XRUS_PRES = XRUS - XRUS_PRES + ZRUS XRVS_PRES = XRVS - XRVS_PRES + ZRVS diff --git a/src/MNH/p_abs.f90 b/src/MNH/p_abs.f90 index 16f888bc871cd2519f079ed9883dd8e8aa7fb27f..91f4f669fdf3ef21554e345d17d0e7dd0d215b3f 100644 --- a/src/MNH/p_abs.f90 +++ b/src/MNH/p_abs.f90 @@ -118,14 +118,19 @@ USE MODD_PARAMETERS USE MODD_REF, ONLY: LBOUSS ! USE MODE_ll +#ifdef MNH_OPENACC +USE MODE_MNH_ZWORK, ONLY: MNH_MEM_GET, MNH_MEM_POSITION_PIN, MNH_MEM_RELEASE +#endif USE MODE_REPRO_SUM ! #ifdef MNH_BITREP USE MODI_BITREP #endif ! +USE MODE_MPPDB +! IMPLICIT NONE -! +! !* 0.1 Declarations of dummy arguments : ! ! @@ -163,7 +168,7 @@ INTEGER :: IIB ! indice I Beginning in x direction INTEGER :: IJB ! indice J Beginning in y direction INTEGER :: IKB ! indice K Beginning in z direction INTEGER :: IIE ! indice I End in x direction -INTEGER :: IJE ! indice J End in y direction +INTEGER :: IJE ! indice J End in y direction INTEGER :: IKE ! indice K End in z direction INTEGER :: JI ! Loop index in x direction INTEGER :: JJ ! Loop index in y direction @@ -177,34 +182,75 @@ REAL :: ZMASSGUESS ! guess of mass resulting of the pressure function ! provided by the pressure solveur, to an arbitary constant REAL :: ZWATERMASST ! Total mass of water Mw !JUAN16 +#ifndef MNH_OPENACC REAL, ALLOCATABLE, DIMENSION(:,:) :: ZMASS_O_PI_2D,ZMASSGUESS_2D,ZWATERMASST_2D +#else +REAL, DIMENSION(:,:) , POINTER , CONTIGUOUS :: ZMASS_O_PI_2D,ZMASSGUESS_2D,ZWATERMASST_2D +#endif !JUAN16 REAL :: ZPI0 ! constant to retrieve the absolute Exner pressure INTEGER :: JWATER ! loop index on the different types of water -REAL, DIMENSION(SIZE(PTHT,1),SIZE(PTHT,2),SIZE(PTHT,3)) & +#ifndef MNH_OPENACC +REAL, DIMENSION(:,:,:) , ALLOCATABLE & +#else +REAL, DIMENSION(:,:,:) , POINTER , CONTIGUOUS & +#endif :: ZRTOT, ZRHOREF, ZWORK REAL :: ZPHI0 ! INTEGER :: IINFO_ll ! +LOGICAL :: GPRVREF0 +! +INTEGER :: IIU,IJU !------------------------------------------------------------------------------- +IF (MPPDB_INITIALIZED) THEN + !Check all IN arrays + CALL MPPDB_CHECK(PTHT,"P_ABS beg:PTHT") + CALL MPPDB_CHECK(PRT,"P_ABS beg:PRT") + CALL MPPDB_CHECK(PRHODJ,"P_ABS beg:PRHODJ") + CALL MPPDB_CHECK(PTHETAV,"P_ABS beg:PTHETAV") + CALL MPPDB_CHECK(PRHODREF,"P_ABS beg:PRHODREF") + CALL MPPDB_CHECK(PTHVREF,"P_ABS beg:PTHVREF") + CALL MPPDB_CHECK(PRVREF,"P_ABS beg:PRVREF") + CALL MPPDB_CHECK(PEXNREF,"P_ABS beg:PEXNREF") + CALL MPPDB_CHECK(PPHIT,"P_ABS beg:PPHIT") +END IF ! !* 1. COMPUTE DIMENSIONS OF ARRAYS AND OTHER INDICES: ! ---------------------------------------------- ! +IIU = SIZE(PTHT,1) +IJU = SIZE(PTHT,2) IKU = SIZE(PTHT,3) IKB = 1 + JPVEXT IKE = IKU - JPVEXT ! CALL GET_INDICE_ll(IIB,IJB,IIE,IJE) ! -ALLOCATE(ZMASS_O_PI_2D(IIB:IIE,IJB:IJE)) -ALLOCATE(ZMASSGUESS_2D(IIB:IIE,IJB:IJE)) -ALLOCATE(ZWATERMASST_2D(IIB:IIE,IJB:IJE)) +GPRVREF0 = ( SIZE(PRVREF,1) == 0 ) +! ! ZP00_O_RD = XP00 / XRD ZCVD_O_RD = (XCPD - XRD) / XRD ! +#ifndef MNH_OPENACC +ALLOCATE(ZMASS_O_PI_2D(IIB:IIE,IJB:IJE)) +ALLOCATE(ZMASSGUESS_2D(IIB:IIE,IJB:IJE)) +ALLOCATE(ZWATERMASST_2D(IIB:IIE,IJB:IJE)) +ALLOCATE (ZRTOT(IIU,IJU,IKU), ZRHOREF(IIU,IJU,IKU), ZWORK(IIU,IJU,IKU)) +#else +!Pin positions in the pools of MNH memory +CALL MNH_MEM_POSITION_PIN() + +CALL MNH_MEM_GET(ZMASS_O_PI_2D , IIB,IIE , IJB,IJE) +CALL MNH_MEM_GET(ZMASSGUESS_2D , IIB,IIE , IJB,IJE) +CALL MNH_MEM_GET(ZWATERMASST_2D , IIB,IIE , IJB,IJE) +CALL MNH_MEM_GET( ZRTOT, IIU, IJU, IKU ) +CALL MNH_MEM_GET( ZRHOREF, IIU, IJU, IKU ) +CALL MNH_MEM_GET( ZWORK, IIU, IJU, IKU ) +#endif + !------------------------------------------------------------------------------- ! ! @@ -214,11 +260,13 @@ ZCVD_O_RD = (XCPD - XRD) / XRD ! ! IF ( CEQNSYS=='DUR' .OR. CEQNSYS=='MAE' ) THEN -! + ! + !$acc kernels IF(KRR > 0) THEN ! ! compute the mixing ratio of the total water (ZRTOT) ZRTOT(:,:,:) = PRT(:,:,:,1) + !$acc loop seq DO JWATER = 2 , 1+KRRL+KRRI ZRTOT(:,:,:) = ZRTOT(:,:,:) + PRT(:,:,:,JWATER) END DO @@ -229,20 +277,23 @@ IF ( CEQNSYS=='DUR' .OR. CEQNSYS=='MAE' ) THEN ZMASSGUESS_2D = 0. ZMASS_O_PI_2D = 0. ZWATERMASST_2D = 0. + !$acc end kernels ! IF ( CEQNSYS == 'DUR' ) THEN + !$acc kernels ! compute the Jacobian in ZWORK - IF ( SIZE(PRVREF,1) == 0 ) THEN + IF ( GPRVREF0 ) THEN ZWORK(:,:,:)= PRHODJ * XTH00 / ( PRHODREF * PTHVREF ) ELSE ZWORK(:,:,:)=PRHODJ * XTH00 & / ( PRHODREF * PTHVREF * (1. + PRVREF) ) END IF ! + !$acc loop seq DO JK = IKB,IKE - DO JJ = IJB,IJE - DO JI = IIB,IIE - ZMASSGUESS_2D(JI,JJ) = ZMASSGUESS_2D(JI,JJ) + & + !$acc_nv loop independent collapse(2) + DO CONCURRENT (JI = IIB:IIE , JJ = IJB:IJE ) + ZMASSGUESS_2D(JI,JJ) = ZMASSGUESS_2D(JI,JJ) + & #ifndef MNH_BITREP (PEXNREF(JI,JJ,JK)+PPHIT(JI,JJ,JK))**ZCVD_O_RD & #else @@ -252,10 +303,10 @@ IF ( CEQNSYS=='DUR' .OR. CEQNSYS=='MAE' ) THEN ZMASS_O_PI_2D(JI,JJ) = ZMASS_O_PI_2D(JI,JJ) + ZWORK(JI,JJ,JK) / PTHETAV(JI,JJ,JK) ZWATERMASST_2D(JI,JJ) = ZWATERMASST_2D(JI,JJ) + & ZRTOT(JI,JJ,JK) * ZWORK(JI,JJ,JK) * PRHODREF(JI,JJ,JK) - END DO - END DO + END DO END DO -! + !$acc end kernels + ! ELSE DO JK = IKB,IKE DO JJ = IJB,IJE @@ -277,33 +328,40 @@ IF ( CEQNSYS=='DUR' .OR. CEQNSYS=='MAE' ) THEN END IF ! ! + !$acc update host(ZMASSGUESS_2D,ZMASS_O_PI_2D,ZWATERMASST_2D) ZMASSGUESS = SUM_DD_R2_ll(ZMASSGUESS_2D) ZMASS_O_PI = SUM_DD_R2_ll(ZMASS_O_PI_2D) ZWATERMASST = SUM_DD_R2_ll(ZWATERMASST_2D) ! ZMASS_O_PI = ZMASS_O_PI*ZP00_O_RD*ZCVD_O_RD ZPI0 = (PDRYMASST + ZWATERMASST - ZP00_O_RD*ZMASSGUESS ) / ZMASS_O_PI + !$acc kernels PPHIT(:,:,:) = PPHIT(:,:,:) + ZPI0 + !$acc end kernels ! ! ! ! Second iteration ! + !$acc kernels ZMASSGUESS_2D = 0. + !$acc end kernels IF ( CEQNSYS == 'DUR' ) THEN - DO JK = IKB,IKE - DO JJ = IJB,IJE - DO JI = IIB,IIE - ZMASSGUESS_2D(JI,JJ) = ZMASSGUESS_2D(JI,JJ) + & + !$acc kernels + !$acc loop seq + DO JK = IKB,IKE + !$acc_nv loop independent collapse(2) + DO CONCURRENT (JI = IIB:IIE , JJ = IJB:IJE ) + ZMASSGUESS_2D(JI,JJ) = ZMASSGUESS_2D(JI,JJ) + & #ifndef MNH_BITREP - (PEXNREF(JI,JJ,JK)+PPHIT(JI,JJ,JK))**ZCVD_O_RD & + (PEXNREF(JI,JJ,JK)+PPHIT(JI,JJ,JK))**ZCVD_O_RD & #else - BR_POW((PEXNREF(JI,JJ,JK)+PPHIT(JI,JJ,JK)),ZCVD_O_RD) & + BR_POW((PEXNREF(JI,JJ,JK)+PPHIT(JI,JJ,JK)),ZCVD_O_RD) & #endif - * ZWORK(JI,JJ,JK) / PTHETAV(JI,JJ,JK) + * ZWORK(JI,JJ,JK) / PTHETAV(JI,JJ,JK) END DO - END DO - END DO + END DO + !$acc end kernels ELSE DO JK = IKB,IKE DO JJ = IJB,IJE @@ -320,11 +378,13 @@ IF ( CEQNSYS=='DUR' .OR. CEQNSYS=='MAE' ) THEN END DO END IF ! - + !$acc update host(ZMASSGUESS_2D) ZMASSGUESS = SUM_DD_R2_ll(ZMASSGUESS_2D) ! ZPI0 = (PDRYMASST + ZWATERMASST - ZP00_O_RD*ZMASSGUESS ) / ZMASS_O_PI + !$acc kernels PPHIT(:,:,:) = PPHIT(:,:,:) + ZPI0 + !$acc end kernels ! ! ELSEIF( CEQNSYS == 'LHE' ) THEN @@ -413,6 +473,16 @@ ELSEIF( CEQNSYS == 'LHE' ) THEN ! END IF ! +#ifndef MNH_OPENACC +DEALLOCATE(ZMASS_O_PI_2D,ZMASSGUESS_2D,ZWATERMASST_2D) +DEALLOCATE (ZRTOT, ZRHOREF, ZWORK) +#else +!Release all memory allocated with MNH_MEM_GET calls since last call to MNH_MEM_POSITION_PIN +CALL MNH_MEM_RELEASE() +#endif +IF (MPPDB_INITIALIZED) THEN + CALL MPPDB_CHECK(PPHIT,"P_ABS end:PPHIT") +END IF !------------------------------------------------------------------------------- ! END SUBROUTINE P_ABS diff --git a/src/MNH/pressurez.f90 b/src/MNH/pressurez.f90 index ee3bbf18be8ebcf6b0de7e34f6f6a86576f6fbc9..a3716704e5a9d3d671542a4141c87d5c4ce6cf98 100644 --- a/src/MNH/pressurez.f90 +++ b/src/MNH/pressurez.f90 @@ -243,6 +243,9 @@ USE MODD_VAR_ll, ONLY: NMNH_COMM_WORLD , NPROC ! use mode_budget, only: Budget_store_end USE MODE_ll +#ifdef MNH_OPENACC +USE MODE_MNH_ZWORK, ONLY: MNH_MEM_GET, MNH_MEM_POSITION_PIN, MNH_MEM_RELEASE +#endif USE MODE_MPPDB USE MODE_MSG USE MODE_SUM2_ll, ONLY: GMAXLOC_ll @@ -256,12 +259,18 @@ USE MODI_CONRESOLZ USE MODI_FLAT_INV USE MODI_FLAT_INVZ USE MODI_GDIV +#ifdef MNH_OPENACC +USE MODI_GET_HALO +#endif USE MODI_GRADIENT_M USE MODI_IBM_BALANCE USE MODI_MASS_LEAK USE MODI_P_ABS USE MODI_RICHARDSON USE MODI_SHUMAN +#ifdef MNH_OPENACC +USE MODI_SHUMAN_DEVICE +#endif ! IMPLICIT NONE ! @@ -347,7 +356,11 @@ REAL, OPTIONAL :: PRESIDUAL ! ! Metric coefficients: ! +#ifndef MNH_OPENACC REAL, DIMENSION(SIZE(PPABST,1),SIZE(PPABST,2),SIZE(PPABST,3)) :: ZDV_SOURCE +#else +REAL, DIMENSION(:,:,:) , POINTER , CONTIGUOUS :: ZDV_SOURCE +#endif ! ! divergence of the sources ! INTEGER :: IIB ! indice I for the first inner mass point along x @@ -359,11 +372,19 @@ INTEGER :: IKE ! indice K for the last inner mass point along z INTEGER :: ILUOUT ! Logical unit of output listing INTEGER :: IRESP ! Return code of FM routines ! +#ifndef MNH_OPENACC REAL, DIMENSION(SIZE(PPABST,1),SIZE(PPABST,2),SIZE(PPABST,3)) :: ZTHETAV, & ! virtual potential temperature ZPHIT ! MAE + DUR => Exner function perturbation ! LHE => Exner function perturbation * CPD * THVREF +#else +REAL, DIMENSION(:,:,:) , POINTER , CONTIGUOUS :: ZTHETAV, & + ! virtual potential temperature + ZPHIT + ! MAE + DUR => Exner function perturbation + ! LHE => Exner function perturbation * CPD * THVREF +#endif ! REAL :: ZPHI0 REAL :: ZRV_OV_RD ! XRV / XRD @@ -374,16 +395,31 @@ INTEGER :: IIU,IJU,IKU ! array sizes in I,J,K INTEGER :: JK ! loop index on the vertical levels INTEGER :: JI,JJ ! +#ifndef MNH_OPENACC REAL, DIMENSION(SIZE(PDXX,1),SIZE(PDXX,3)) :: ZPABS_S ! local pressure on southern side REAL, DIMENSION(SIZE(PDXX,1),SIZE(PDXX,3)) :: ZPABS_N ! local pressure on northern side REAL, DIMENSION(SIZE(PDYY,2),SIZE(PDXX,3)) :: ZPABS_E ! local pressure on eastern side REAL, DIMENSION(SIZE(PDYY,2),SIZE(PDXX,3)) :: ZPABS_W ! local pressure on western side +#else +REAL, POINTER, CONTIGUOUS, DIMENSION(:,:) :: ZPABS_S ! local pressure on southern side +REAL, POINTER, CONTIGUOUS, DIMENSION(:,:) :: ZPABS_N ! local pressure on northern side +REAL, POINTER, CONTIGUOUS, DIMENSION(:,:) :: ZPABS_E ! local pressure on eastern side +REAL, POINTER, CONTIGUOUS, DIMENSION(:,:) :: ZPABS_W ! local pressure on western side +#endif INTEGER :: IINFO_ll,KINFO TYPE(LIST_ll), POINTER :: TZFIELDS_ll, TZFIELDS2_ll ! list of fields to exchange ! INTEGER :: IIB_I,IIE_I,IJB_I,IJE_I INTEGER :: IIMAX_ll,IJMAX_ll ! +#ifdef MNH_OPENACC +REAL, DIMENSION(:,:,:) , POINTER , CONTIGUOUS :: ZPRHODJ, ZGZ_M_W +REAL, DIMENSION(:,:,:) , POINTER , CONTIGUOUS :: ZMXM_PRHODJ, ZMYM_PRHODJ, ZMZM_PRHODJ +#endif +! +! +LOGICAL :: GWEST,GEAST,GSOUTH,GNORTH +LOGICAL :: GSOUTH2D,GNORTH2D,GPRVREF0 ! !------------------------------------------------------------------------------ IF (MPPDB_INITIALIZED) THEN @@ -415,6 +451,10 @@ IF (MPPDB_INITIALIZED) THEN CALL MPPDB_CHECK(PRWS,"PRESSUREZ beg:PRWS") CALL MPPDB_CHECK(PPABST,"PRESSUREZ beg:PPABST") END IF +!$acc data present( PRHODJ, PDXX, PDYY, PDZZ, PDZX, PDZY, PTHT, PRT, PRHODREF, PTHVREF, PRVREF, PEXNREF ) & +!$acc & present( PRUS, PRVS, PRWS, PPABST ) & +!$acc & present( PRHOT, PAF, PBF, PCF, PTRIGSX, PTRIGSY, KIFAXX, KIFAXY, PBFB, PBF_SXP2_YP1_Z ) + !------------------------------------------------------------------------------- ! !* 1. PRELIMINARIES @@ -436,10 +476,41 @@ IKB= 1+JPVEXT IKU= SIZE(PPABST,3) IKE= IKU - JPVEXT ! +GWEST = ( HLBCX(1) /= 'CYCL' .AND. LWEST_ll() ) +GEAST = ( HLBCX(2) /= 'CYCL' .AND. LEAST_ll() ) +GSOUTH = ( .NOT. L2D .AND. HLBCY(1) /= 'CYCL' .AND. LSOUTH_ll() ) +GNORTH = ( .NOT. L2D .AND. HLBCY(2) /= 'CYCL' .AND. LNORTH_ll() ) +GSOUTH2D = ( L2D .AND. LSOUTH_ll() ) +GNORTH2D = ( L2D .AND. LNORTH_ll() ) +! +GPRVREF0 = ( SIZE(PRVREF,1) == 0 ) +! +#ifdef MNH_OPENACC +!Pin positions in the pools of MNH memory +CALL MNH_MEM_POSITION_PIN() + +CALL MNH_MEM_GET( ZDV_SOURCE, IIU, IJU, IKU ) +CALL MNH_MEM_GET( ZTHETAV, IIU, IJU, IKU ) +CALL MNH_MEM_GET( ZPHIT, IIU, IJU, IKU ) +! +CALL MNH_MEM_GET( ZPABS_S, IIU,IKU ) +CALL MNH_MEM_GET( ZPABS_N, IIU,IKU ) +CALL MNH_MEM_GET( ZPABS_E, IJU,IKU ) +CALL MNH_MEM_GET( ZPABS_W, IJU,IKU ) +! +CALL MNH_MEM_GET( ZPRHODJ, IIU, IJU, IKU ) +CALL MNH_MEM_GET( ZGZ_M_W, IIU, IJU, IKU ) +CALL MNH_MEM_GET( ZMXM_PRHODJ, IIU, IJU, IKU ) +CALL MNH_MEM_GET( ZMYM_PRHODJ, IIU, IJU, IKU ) +CALL MNH_MEM_GET( ZMZM_PRHODJ, IIU, IJU, IKU ) +#endif +! +!$acc kernels ZPABS_S(:,:) = 0. ZPABS_N(:,:) = 0. ZPABS_E(:,:) = 0. ZPABS_W(:,:) = 0. +!$acc end kernels ! Done in model_n before call to Rad_bound ! if ( lbudget_u ) call Budget_store_init( tbudgets(NBUDGET_U), 'PRES', prus(:, :, :) ) @@ -461,25 +532,44 @@ END IF ! -------------------------------------------------- ! IF (LIBM) THEN +#ifdef MNH_OPENACC + CALL PRINT_MSG( NVERB_FATAL, 'GEN', 'PRESSUREZ', 'OpenACC: IBM not yet ported' ) +#endif +!$acc kernels WHERE(XIBM_LS(:,:,:,2).GT.-XIBM_EPSI) PRUS(:,:,:) = 0. WHERE(XIBM_LS(:,:,:,3).GT.-XIBM_EPSI) PRVS(:,:,:) = 0. WHERE(XIBM_LS(:,:,:,4).GT.-XIBM_EPSI) PRWS(:,:,:) = 0. +!$acc end kernels ENDIF ! -CALL MPPDB_CHECK3D(PRUS,"pressurez 4-before update_halo_ll::PRUS",PRECISION) -CALL MPPDB_CHECK3D(PRVS,"pressurez 4-before update_halo_ll::PRVS",PRECISION) -CALL MPPDB_CHECK3D(PRWS,"pressurez 4-before update_halo_ll::PRWS",PRECISION) +IF (MPPDB_INITIALIZED) THEN + CALL MPPDB_CHECK3D(PRUS,"pressurez 4-before update_halo_ll::PRUS",PRECISION) + CALL MPPDB_CHECK3D(PRVS,"pressurez 4-before update_halo_ll::PRVS",PRECISION) + CALL MPPDB_CHECK3D(PRWS,"pressurez 4-before update_halo_ll::PRWS",PRECISION) +END IF +#ifndef MNH_OPENACC NULLIFY(TZFIELDS_ll) CALL ADD3DFIELD_ll( TZFIELDS_ll, PRUS, 'PRESSUREZ::PRUS' ) CALL ADD3DFIELD_ll( TZFIELDS_ll, PRVS, 'PRESSUREZ::PRVS' ) CALL ADD3DFIELD_ll( TZFIELDS_ll, PRWS, 'PRESSUREZ::PRWS' ) CALL UPDATE_HALO_ll(TZFIELDS_ll,IINFO_ll) CALL CLEANLIST_ll(TZFIELDS_ll) -CALL MPPDB_CHECK3D(PRUS,"pressurez 4-after update_halo_ll::PRUS",PRECISION) -CALL MPPDB_CHECK3D(PRVS,"pressurez 4-after update_halo_ll::PRVS",PRECISION) -CALL MPPDB_CHECK3D(PRWS,"pressurez 4-after update_halo_ll::PRWS",PRECISION) +#else +CALL GET_HALO_D(PRUS,HNAME='UPDATE_HALO_ll::PRESSUREZ::PRUS' ) +CALL GET_HALO_D(PRVS,HNAME='UPDATE_HALO_ll::PRESSUREZ::PRVS' ) +CALL GET_HALO_D(PRWS,HNAME='UPDATE_HALO_ll::PRESSUREZ::PRWS' ) +#endif +IF (MPPDB_INITIALIZED) THEN + CALL MPPDB_CHECK3D(PRUS,"pressurez 4-after update_halo_ll::PRUS",PRECISION) + CALL MPPDB_CHECK3D(PRVS,"pressurez 4-after update_halo_ll::PRVS",PRECISION) + CALL MPPDB_CHECK3D(PRWS,"pressurez 4-after update_halo_ll::PRWS",PRECISION) +END IF ! +#ifndef MNH_OPENACC CALL GDIV(HLBCX,HLBCY,PDXX,PDYY,PDZX,PDZY,PDZZ,PRUS,PRVS,PRWS,ZDV_SOURCE) +#else +CALL GDIV_DEVICE(HLBCX,HLBCY,PDXX,PDYY,PDZX,PDZY,PDZZ,PRUS,PRVS,PRWS,ZDV_SOURCE) +#endif ! IF (LIBM) THEN CALL IBM_BALANCE(XIBM_LS,XIBM_SU,PRUS,PRVS,PRWS,ZDV_SOURCE) @@ -487,15 +577,27 @@ ENDIF ! ! The non-homogenous Neuman problem is transformed in an homogenous Neuman ! problem in the non-periodic cases -IF (HLBCX(1) /= 'CYCL') THEN - IF (LWEST_ll()) ZDV_SOURCE(IIB-1,:,:) = 0. - IF (LEAST_ll()) ZDV_SOURCE(IIE+1,:,:) = 0. -ENDIF -! -IF (.NOT. L2D .AND. HLBCY(1) /= 'CYCL') THEN - IF (LSOUTH_ll()) ZDV_SOURCE(:,IJB-1,:) = 0. - IF (LNORTH_ll()) ZDV_SOURCE(:,IJE+1,:) = 0. -ENDIF +IF ( GWEST ) THEN + !$acc kernels async + ZDV_SOURCE(IIB-1,:,:) = 0. + !$acc end kernels +END IF +IF ( GEAST ) THEN + !$acc kernels async + ZDV_SOURCE(IIE+1,:,:) = 0. + !$acc end kernels +END IF +IF ( GSOUTH ) THEN + !$acc kernels async + ZDV_SOURCE(:,IJB-1,:) = 0. + !$acc end kernels +END IF +IF ( GNORTH ) THEN + !$acc kernels async + ZDV_SOURCE(:,IJE+1,:) = 0. + !$acc end kernels +END IF +!$acc wait IF (LIBM) THEN ! @@ -524,11 +626,13 @@ ENDIF ! ------------------------------------------------------- ! IF(CEQNSYS=='MAE' .OR. CEQNSYS=='DUR') THEN + !$acc kernels IF(KRR > 0) THEN ! ! compute the ratio : 1 + total water mass / dry air mass ZRV_OV_RD = XRV / XRD ZTHETAV(:,:,:) = 1. + PRT(:,:,:,1) + !$acc loop seq DO JWATER = 2 , 1+KRRL+KRRI ZTHETAV(:,:,:) = ZTHETAV(:,:,:) + PRT(:,:,:,JWATER) END DO @@ -539,6 +643,7 @@ IF(CEQNSYS=='MAE' .OR. CEQNSYS=='DUR') THEN ! compute the virtual potential temperature when water is absent ZTHETAV(:,:,:) = PTHT(:,:,:) END IF + !$acc end kernels ! IF (LIBM) THEN WHERE (XIBM_LS(:,:,:,1).GT.-XIBM_EPSI) @@ -546,13 +651,28 @@ IF(CEQNSYS=='MAE' .OR. CEQNSYS=='DUR') THEN ENDWHERE ENDIF ! +#ifndef MNH_OPENACC #ifndef MNH_BITREP ZPHIT(:,:,:)=(PPABST(:,:,:)/XP00)**(XRD/XCPD)-PEXNREF(:,:,:) #else ZPHIT(:,:,:)=BR_POW(PPABST(:,:,:)/XP00,XRD/XCPD)-PEXNREF(:,:,:) #endif +#else + !$acc kernels + DO CONCURRENT ( JI=1:IIU,JJ=1:IJU,JK=1:IKU ) +#ifndef MNH_BITREP + ZPHIT(JI,JJ,JK)=(PPABST(JI,JJ,JK)/XP00)**(XRD/XCPD)-PEXNREF(JI,JJ,JK) +#else + ZPHIT(JI,JJ,JK)=BR_POW((PPABST(JI,JJ,JK)/XP00),(XRD/XCPD))-PEXNREF(JI,JJ,JK) +#endif + END DO + !$acc end kernels +#endif ! ELSEIF(CEQNSYS=='LHE') THEN +#ifdef MNH_OPENACC + CALL PRINT_MSG( NVERB_FATAL, 'GEN', 'PRESSUREZ', 'OpenACC: CEQNSYS=LHE not yet ported' ) +#endif IF ( .NOT. LOCEAN) THEN ZPHIT(:,:,:)= ((PPABST(:,:,:)/XP00)**(XRD/XCPD)-PEXNREF(:,:,:)) & * XCPD * PTHVREF(:,:,:) @@ -565,6 +685,10 @@ ELSEIF(CEQNSYS=='LHE') THEN END IF ! IF(CEQNSYS=='LHE'.AND. LFLAT .AND. LCARTESIAN .AND. .NOT. LIBM) THEN +#ifdef MNH_OPENACC + CALL PRINT_MSG( NVERB_FATAL, 'GEN', 'PRESSUREZ', 'OpenACC: CEQNSYS=LHE.AND. LFLAT .AND. LCARTESIAN' // & + ' .AND. .NOT. LIBM not yet tested' ) +#endif ! flat cartesian LHE case -> exact solution IF ( HPRESOPT /= "ZRESI" ) THEN CALL FLAT_INV(HLBCX,HLBCY,PDXHATM,PDYHATM,PRHOT,PAF,PBF,PCF, & @@ -594,7 +718,7 @@ ELSE KIFAXX,KIFAXY,KITR,ZDV_SOURCE,ZPHIT) ! CASE('ZRESI') ! Conjugate Residual method - CALL CONRESOLZ(HLBCX,HLBCY,PDXX,PDYY,PDZX,PDZY,PDZZ,PRHODJ,ZTHETAV, & + CALL CONRESOLZ(HLBCX,HLBCY,PDXX,PDYY,PDZX,PDZY,PDZZ,PRHODJ,ZTHETAV, & PDXHATM,PDYHATM,PRHOT,PAF,PBF,PCF,PTRIGSX,PTRIGSY, & KIFAXX,KIFAXY,KITR,ZDV_SOURCE,ZPHIT, & PBFB,& @@ -607,32 +731,41 @@ END IF !* 6. ADD THE PRESSURE GRADIENT TO THE SOURCES ! ---------------------------------------- ! -IF ( HLBCX(1) /= 'CYCL' ) THEN - IF(LWEST_ll()) THEN +IF ( GWEST ) THEN + !$acc kernels async ZPHIT(IIB-1,:,IKB-1) = ZPHIT(IIB,:,IKB) ZPHIT(IIB-1,:,IKE+1) = ZPHIT(IIB,:,IKE) - ENDIF - IF(LEAST_ll()) THEN + !$acc end kernels +END IF +IF ( GEAST ) THEN + !$acc kernels async ZPHIT(IIE+1,:,IKB-1) = ZPHIT(IIE,:,IKB) ZPHIT(IIE+1,:,IKE+1) = ZPHIT(IIE,:,IKE) - ENDIF -ENDIF + !$acc end kernels +END IF ! -IF ( HLBCY(1) /= 'CYCL' ) THEN - IF (LSOUTH_ll()) THEN +IF ( GSOUTH ) THEN + !$acc kernels async ZPHIT(:,IJB-1,IKB-1) = ZPHIT(:,IJB,IKB) ZPHIT(:,IJB-1,IKE+1) = ZPHIT(:,IJB,IKE) - ENDIF - IF (LNORTH_ll()) THEN - ZPHIT(:,IJE+1,IKB-1) = ZPHIT(:,IJE,IKB) - ZPHIT(:,IJE+1,IKE+1) = ZPHIT(:,IJE,IKE) - ENDIF -ENDIF -! -IF ( L2D ) THEN - IF (LSOUTH_ll()) ZPHIT(:,IJB-1,:) = ZPHIT(:,IJB,:) - IF (LNORTH_ll()) ZPHIT(:,IJE+1,:) = ZPHIT(:,IJB,:) -ENDIF + !$acc end kernels +END IF +IF ( GNORTH ) THEN + !$acc kernels async + ZPHIT(:,IJE+1,IKB-1) = ZPHIT(:,IJE,IKB-1) + !$acc end kernels +END IF +IF ( GSOUTH2D ) THEN + !$acc kernels async + ZPHIT(:,IJB-1,:) = ZPHIT(:,IJB,:) + !$acc end kernels +END IF +IF ( GNORTH2D ) THEN + !$acc kernels async + ZPHIT(:,IJE+1,:) = ZPHIT(:,IJB,:) + !$acc end kernels +END IF +!$acc wait ! IF (LIBM) THEN ! @@ -660,40 +793,74 @@ IF (LIBM) THEN ! ENDIF ! +#ifndef MNH_OPENACC ZDV_SOURCE = GX_M_U(1,IKU,1,ZPHIT,PDXX,PDZZ,PDZX) +#else +CALL GX_M_U_DEVICE(1,IKU,1,ZPHIT,PDXX,PDZZ,PDZX,ZDV_SOURCE) +#endif ! -IF ( HLBCX(1) /= 'CYCL' ) THEN - IF(LWEST_ll()) THEN +IF ( GWEST ) THEN !!!!!!!!!!!!!!!! FUJI compiler directive !!!!!!!!!! !!!!!!!!!!!!!!!! FUJI compiler directive !!!!!!!!!! - DO JK=2,IKU-1 - ZDV_SOURCE(IIB,:,JK)= & - (ZPHIT(IIB,:,JK) - ZPHIT(IIB-1,:,JK) - 0.5 * ( & - PDZX(IIB,:,JK) * (ZPHIT(IIB,:,JK)-ZPHIT(IIB,:,JK-1)) / PDZZ(IIB,:,JK) & - +PDZX(IIB,:,JK+1) * (ZPHIT(IIB,:,JK+1)-ZPHIT(IIB,:,JK)) / PDZZ(IIB,:,JK+1) & + !$acc kernels async +#ifdef MNH_COMPILER_NVHPC + !$acc loop independent collapse(2) +#else + !$acc loop independent +#endif + DO CONCURRENT (JJ=1:IJU , JK=2:IKU-1) + ZDV_SOURCE(IIB,JJ,JK)= & + (ZPHIT(IIB,JJ,JK) - ZPHIT(IIB-1,JJ,JK) - 0.5 * ( & + PDZX(IIB,JJ,JK) * (ZPHIT(IIB,JJ,JK)-ZPHIT(IIB,JJ,JK-1)) / PDZZ(IIB,JJ,JK) & + +PDZX(IIB,JJ,JK+1) * (ZPHIT(IIB,JJ,JK+1)-ZPHIT(IIB,JJ,JK)) / PDZZ(IIB,JJ,JK+1) & ) & - ) / PDXX(IIB,:,JK) - END DO - ENDIF + ) / PDXX(IIB,JJ,JK) + END DO + !$acc end kernels +ENDIF ! - IF(LEAST_ll()) THEN - DO JK=2,IKU-1 - ZDV_SOURCE(IIE+1,:,JK)= & - (ZPHIT(IIE+1,:,JK) - ZPHIT(IIE+1-1,:,JK) - 0.5 * ( & - PDZX(IIE+1,:,JK) * (ZPHIT(IIE+1-1,:,JK)-ZPHIT(IIE+1-1,:,JK-1)) & - / PDZZ(IIE+1-1,:,JK) & - +PDZX(IIE+1,:,JK+1) * (ZPHIT(IIE+1-1,:,JK+1)-ZPHIT(IIE+1-1,:,JK)) & - / PDZZ(IIE+1-1,:,JK+1) & +IF( GEAST ) THEN + !$acc kernels async +#ifdef MNH_COMPILER_NVHPC + !$acc loop independent collapse(2) +#else + !$acc loop independent +#endif + DO CONCURRENT (JJ=1:IJU , JK=2:IKU-1) + ZDV_SOURCE(IIE+1,JJ,JK)= & + (ZPHIT(IIE+1,JJ,JK) - ZPHIT(IIE+1-1,JJ,JK) - 0.5 * ( & + PDZX(IIE+1,JJ,JK) * (ZPHIT(IIE+1-1,JJ,JK)-ZPHIT(IIE+1-1,JJ,JK-1)) & + / PDZZ(IIE+1-1,JJ,JK) & + +PDZX(IIE+1,JJ,JK+1) * (ZPHIT(IIE+1-1,JJ,JK+1)-ZPHIT(IIE+1-1,JJ,JK)) & + / PDZZ(IIE+1-1,JJ,JK+1) & ) & - ) / PDXX(IIE+1,:,JK) - END DO - END IF + ) / PDXX(IIE+1,JJ,JK) + END DO + !$acc end kernels END IF +!$acc wait ! -CALL MPPDB_CHECK3DM("before MXM PRESSUREZ :PRU/V/WS",PRECISION,PRUS,PRVS,PRWS) +IF (MPPDB_INITIALIZED) THEN + CALL MPPDB_CHECK3DM("before MXM PRESSUREZ :PRU/V/WS",PRECISION,PRUS,PRVS,PRWS) +END IF IF(CEQNSYS=='MAE' .OR. CEQNSYS=='DUR') THEN +#ifndef MNH_OPENACC PRUS = PRUS - MXM(PRHODJ * XCPD * ZTHETAV) * ZDV_SOURCE PRWS = PRWS - MZM(PRHODJ * XCPD * ZTHETAV) * GZ_M_W(1,IKU,1,ZPHIT,PDZZ) +#else + !$acc kernels present(ZPRHODJ) + ZPRHODJ(:,:,:) = PRHODJ(:,:,:) * XCPD * ZTHETAV(:,:,:) + !$acc end kernels + CALL MXM_DEVICE(ZPRHODJ, ZMXM_PRHODJ) + CALL MZM_DEVICE(ZPRHODJ, ZMZM_PRHODJ) + CALL GZ_M_W_DEVICE(1,IKU,1,ZPHIT,PDZZ,ZGZ_M_W) + !$acc kernels + !dir$ concurrent + PRUS(:,:,:) = PRUS(:,:,:) - ZMXM_PRHODJ(:,:,:) * ZDV_SOURCE(:,:,:) + !dir$ concurrent + PRWS(:,:,:) = PRWS(:,:,:) - ZMZM_PRHODJ(:,:,:) * ZGZ_M_W(:,:,:) + !$acc end kernels +#endif ELSEIF(CEQNSYS=='LHE') THEN PRUS = PRUS - MXM(PRHODJ) * ZDV_SOURCE PRWS = PRWS - MZM(PRHODJ) * GZ_M_W(1,IKU,1,ZPHIT,PDZZ) @@ -701,76 +868,130 @@ END IF ! IF(.NOT. L2D) THEN ! - ZDV_SOURCE = GY_M_V(1,IKU,1,ZPHIT,PDYY,PDZZ,PDZY) +#ifndef MNH_OPENACC + ZDV_SOURCE = GY_M_V(1,IKU,1,ZPHIT,PDYY,PDZZ,PDZY) +#else + CALL GY_M_V_DEVICE(1,IKU,1,ZPHIT,PDYY,PDZZ,PDZY,ZDV_SOURCE) +#endif ! - IF ( HLBCY(1) /= 'CYCL' ) THEN - IF (LSOUTH_ll()) THEN + IF ( GSOUTH ) THEN !!!!!!!!!!!!!!!! FUJI compiler directive !!!!!!!!!! !!!!!!!!!!!!!!!! FUJI compiler directive !!!!!!!!!! - DO JK=2,IKU-1 - ZDV_SOURCE(:,IJB,JK)= & - (ZPHIT(:,IJB,JK) - ZPHIT(:,IJB-1,JK) - 0.5 * ( & - PDZY(:,IJB,JK) * (ZPHIT(:,IJB,JK)-ZPHIT(:,IJB,JK-1)) / PDZZ(:,IJB,JK) & - +PDZY(:,IJB,JK+1) * (ZPHIT(:,IJB,JK+1)-ZPHIT(:,IJB,JK)) / PDZZ(:,IJB,JK+1) & - ) & - ) / PDYY(:,IJB,JK) + !$acc kernels async +#ifdef MNH_COMPILER_NVHPC + !$acc loop independent collapse(2) +#else + !$acc loop independent +#endif + DO CONCURRENT (JI=1:IIU , JK=2:IKU-1) + ZDV_SOURCE(JI,IJB,JK)= & + (ZPHIT(JI,IJB,JK) - ZPHIT(JI,IJB-1,JK) - 0.5 * ( & + PDZY(JI,IJB,JK) * (ZPHIT(JI,IJB,JK)-ZPHIT(JI,IJB,JK-1)) / PDZZ(JI,IJB,JK) & + +PDZY(JI,IJB,JK+1) * (ZPHIT(JI,IJB,JK+1)-ZPHIT(JI,IJB,JK)) / PDZZ(JI,IJB,JK+1) & + ) & + ) / PDYY(JI,IJB,JK) END DO - END IF - ! - IF (LNORTH_ll()) THEN - DO JK=2,IKU-1 - ZDV_SOURCE(:,IJE+1,JK)= & - (ZPHIT(:,IJE+1,JK) - ZPHIT(:,IJE+1-1,JK) - 0.5 * ( & - PDZY(:,IJE+1,JK) * (ZPHIT(:,IJE+1-1,JK)-ZPHIT(:,IJE+1-1,JK-1)) & - / PDZZ(:,IJE+1-1,JK) & - +PDZY(:,IJE+1,JK+1) * (ZPHIT(:,IJE+1-1,JK+1)-ZPHIT(:,IJE+1-1,JK)) & - / PDZZ(:,IJE+1-1,JK+1) & - ) & - ) / PDYY(:,IJE+1,JK) + !$acc end kernels + END IF + ! + IF ( GNORTH ) THEN + !$acc kernels async +#ifdef MNH_COMPILER_NVHPC + !$acc loop independent collapse(2) +#else + !$acc loop independent +#endif + DO CONCURRENT (JI=1:IIU , JK=2:IKU-1) + ZDV_SOURCE(JI,IJE+1,JK)= & + (ZPHIT(JI,IJE+1,JK) - ZPHIT(JI,IJE+1-1,JK) - 0.5 * ( & + PDZY(JI,IJE+1,JK) * (ZPHIT(JI,IJE+1-1,JK)-ZPHIT(JI,IJE+1-1,JK-1)) & + / PDZZ(JI,IJE+1-1,JK) & + +PDZY(JI,IJE+1,JK+1) * (ZPHIT(JI,IJE+1-1,JK+1)-ZPHIT(JI,IJE+1-1,JK)) & + / PDZZ(JI,IJE+1-1,JK+1) & + ) & + ) / PDYY(JI,IJE+1,JK) END DO - END IF + !$acc end kernels END IF +!$acc wait ! - CALL MPPDB_CHECK3DM("before MYM PRESSUREZ :PRU/V/WS",PRECISION,PRUS,PRVS,PRWS) +IF (MPPDB_INITIALIZED) THEN + CALL MPPDB_CHECK3DM("before MYM PRESSUREZ :PRU/V/WS",PRECISION,PRUS,PRVS,PRWS) +END IF IF(CEQNSYS=='MAE' .OR. CEQNSYS=='DUR') THEN +#ifndef MNH_OPENACC PRVS = PRVS - MYM(PRHODJ * XCPD * ZTHETAV) * ZDV_SOURCE +#else + CALL MYM_DEVICE(ZPRHODJ,ZMYM_PRHODJ) + !$acc kernels + !dir$ concurrent + PRVS(:,:,:) = PRVS(:,:,:) - ZMYM_PRHODJ(:,:,:) * ZDV_SOURCE(:,:,:) + !$acc end kernels +#endif ELSEIF(CEQNSYS=='LHE') THEN +#ifndef MNH_OPENACC PRVS = PRVS - MYM(PRHODJ) * ZDV_SOURCE +#else + CALL MYM_DEVICE(PRHODJ,ZMYM_PRHODJ) + !$acc kernels + !dir$ concurrent + PRVS(:,:,:) = PRVS(:,:,:) - ZMYM_PRHODJ(:,:,:) * ZDV_SOURCE(:,:,:) + !$acc end kernels +#endif END IF END IF ! !! same boundary conditions as in gdiv ... !! (provisory coding) !! (necessary when NVERB=1) !! - PRUS(:,:,IKB-1)=PRUS(:,:,IKB) - PRUS(:,:,IKE+1)=PRUS(:,:,IKE) - PRVS(:,:,IKB-1)=PRVS(:,:,IKB) - PRVS(:,:,IKE+1)=PRVS(:,:,IKE) -! +!$acc kernels +PRUS(:,:,IKB-1)=PRUS(:,:,IKB) +PRUS(:,:,IKE+1)=PRUS(:,:,IKE) +PRVS(:,:,IKB-1)=PRVS(:,:,IKB) +PRVS(:,:,IKE+1)=PRVS(:,:,IKE) +!$acc end kernels +! +#ifndef MNH_OPENACC NULLIFY(TZFIELDS2_ll) CALL ADD3DFIELD_ll( TZFIELDS2_ll, PRUS, 'PRESSUREZ::PRUS' ) CALL ADD3DFIELD_ll( TZFIELDS2_ll, PRVS, 'PRESSUREZ::PRVS' ) CALL ADD3DFIELD_ll( TZFIELDS2_ll, PRWS, 'PRESSUREZ::PRWS' ) CALL UPDATE_HALO_ll(TZFIELDS2_ll,IINFO_ll) CALL CLEANLIST_ll(TZFIELDS2_ll) +#else +CALL GET_HALO_D(PRUS,HNAME='UPDATE_HALO_ll::PRESSUREZ::PRUS' ) +CALL GET_HALO_D(PRVS,HNAME='UPDATE_HALO_ll::PRESSUREZ::PRVS' ) +CALL GET_HALO_D(PRWS,HNAME='UPDATE_HALO_ll::PRESSUREZ::PRWS' ) +#endif ! ! compute the residual divergence +#ifndef MNH_OPENACC CALL GDIV(HLBCX,HLBCY,PDXX,PDYY,PDZX,PDZY,PDZZ,PRUS,PRVS,PRWS,ZDV_SOURCE) +#else +CALL GDIV_DEVICE(HLBCX,HLBCY,PDXX,PDYY,PDZX,PDZY,PDZZ,PRUS,PRVS,PRWS,ZDV_SOURCE) +#endif ! IF (LIBM) THEN ZDV_SOURCE(:,:,:)=ZDV_SOURCE(:,:,:)*XIBM_SU(:,:,:,2) ENDIF ! IF ( CEQNSYS=='DUR' ) THEN - IF ( SIZE(PRVREF,1) == 0 ) THEN + IF ( GPRVREF0 ) THEN + !$acc kernels ZDV_SOURCE=ZDV_SOURCE/PRHODJ/XTH00*PRHODREF*PTHVREF + !$acc end kernels ELSE + !$acc kernels ZDV_SOURCE=ZDV_SOURCE/PRHODJ/XTH00*PRHODREF*PTHVREF*(1.+PRVREF) + !$acc end kernels END IF ELSEIF( CEQNSYS=='MAE' .OR. CEQNSYS=='LHE' ) THEN + !$acc kernels ZDV_SOURCE=ZDV_SOURCE/PRHODJ*PRHODREF + !$acc end kernels END IF ! +!$acc update host(ZDV_SOURCE) ZMAXVAL=MAX_ll(ABS(ZDV_SOURCE),IINFO_ll) !JUANZ IF (PRESENT(PRESIDUAL)) PRESIDUAL = ZMAXVAL @@ -830,76 +1051,98 @@ IF ((ZMAX_ll > 1.E-12) .AND. KTCOUNT >0 ) THEN PRVREF, PEXNREF, ZPHIT, ZPHI0 ) ! IF(CEQNSYS=='MAE' .OR. CEQNSYS=='DUR') THEN + !$acc kernels #ifndef MNH_BITREP - PPABST(:,:,:)=XP00*(ZPHIT+PEXNREF)**(XCPD/XRD) + PPABST(:,:,:)=XP00*(ZPHIT(:,:,:)+PEXNREF(:,:,:))**(XCPD/XRD) #else - PPABST(:,:,:)=XP00*BR_POW((ZPHIT+PEXNREF),(XCPD/XRD)) + DO CONCURRENT(JI=1:IIU,JJ=1:IJU,JK=1:IKU) + PPABST(JI,JJ,JK)=XP00*BR_POW((ZPHIT(JI,JJ,JK)+PEXNREF(JI,JJ,JK)),(XCPD/XRD)) + END DO #endif + !$acc end kernels ELSEIF(CEQNSYS=='LHE') THEN IF (.NOT. LOCEAN) THEN - ! Deep atmosphere case : computing of PI fluctuation ; ZPHI0 (computed in P_ABS routine) is added + !$acc kernels + ! Deep atmosphere case : computing of PI fluctuation ; ZPHI0 (computed in P_ABS routine) is added XPHIT(:,:,:) = (ZPHIT+ZPHI0)/(XCPD*PTHVREF) ! Absolute Pressure PPABST(:,:,:)=XP00*(XPHIT(:,:,:)+PEXNREF)**(XCPD/XRD) ! Computing press fluctuation XPHIT(:,:,:) = PPABST(:,:,:) - XP00*PEXNREF**(XCPD/XRD) + !$acc end kernels ELSE + !$acc kernels ! Shallow atmosphere ou ocean XPHIT(:,:,:) = (ZPHIT+ZPHI0)*PRHODREF PPABST(:,:,:)=XPHIT(:,:,:) + XP00*PEXNREF**(XCPD/XRD) + !$acc end kernels END IF ENDIF ! - IF( HLBCX(1) == 'CYCL' ) THEN - IF (LWEST_ll()) THEN - ZPABS_W(:,:)= PPABST(IIB,:,:) - END IF -! - IF (LEAST_ll()) THEN - ZPABS_E(:,:)= PPABST(IIE+1,:,:) - END IF -! + IF( GWEST ) THEN + !$acc kernels async + ZPABS_W(:,:)= PPABST(IIB,:,:) + !$acc end kernels END IF ! - IF( HLBCY(1) == 'CYCL' ) THEN - IF (LSOUTH_ll()) THEN - ZPABS_S(:,:)= PPABST(:,IJB,:) - END IF -! - IF (LNORTH_ll()) THEN - ZPABS_N(:,:)= PPABST(:,IJE+1,:) - END IF + IF ( GEAST ) THEN + !$acc kernels async + ZPABS_E(:,:)= PPABST(IIE+1,:,:) + !$acc end kernels + END IF ! + IF( GSOUTH ) THEN + !$acc kernels async + ZPABS_S(:,:)= PPABST(:,IJB,:) + !$acc end kernels END IF ! + IF ( GNORTH ) THEN + !$acc kernels async + ZPABS_N(:,:)= PPABST(:,IJE+1,:) + !$acc end kernels + END IF + ! + !$acc wait + ! +#ifndef MNH_OPENACC CALL ADD3DFIELD_ll( TZFIELDS_ll, PPABST, 'PRESSUREZ::PPABST' ) CALL UPDATE_HALO_ll(TZFIELDS_ll,IINFO_ll) CALL CLEANLIST_ll(TZFIELDS_ll) +#else + CALL GET_HALO_D( PPABST,HNAME='UPDATE_HALO_ll::PRESSUREZ::PPABST' ) +#endif ! - IF( HLBCX(1) == 'CYCL' ) THEN - IF (LWEST_ll()) THEN - PPABST(IIB,:,:) = ZPABS_W(:,:) - END IF -! - IF (LEAST_ll()) THEN - PPABST(IIE+1,:,:) = ZPABS_E(:,:) - END IF -! + IF( GWEST ) THEN + !$acc kernels async + PPABST(IIB,:,:) = ZPABS_W(:,:) + !$acc end kernels END IF ! - IF( HLBCY(1) == 'CYCL' ) THEN - IF (LSOUTH_ll()) THEN - PPABST(:,IJB,:) = ZPABS_S(:,:) - END IF + IF ( GEAST ) THEN + !$acc kernels async + PPABST(IIE+1,:,:) = ZPABS_E(:,:) + !$acc end kernels + END IF ! - IF (LNORTH_ll()) THEN - PPABST(:,IJE+1,:) = ZPABS_N(:,:) - END IF + IF( GSOUTH ) THEN + !$acc kernels async + PPABST(:,IJB,:) = ZPABS_S(:,:) + !$acc end kernels + END IF ! + IF ( GNORTH ) THEN + !$acc kernels async + PPABST(:,IJE+1,:) = ZPABS_N(:,:) + !$acc end kernels END IF ! -END IF +!$acc wait ! +END IF + +!$acc update self( PRUS, PRVS, PRWS, PPABST ) + IF (MPPDB_INITIALIZED) THEN !Check all INOUT arrays CALL MPPDB_CHECK(PRUS,"PRESSUREZ end:PRUS") @@ -907,6 +1150,13 @@ IF (MPPDB_INITIALIZED) THEN CALL MPPDB_CHECK(PRWS,"PRESSUREZ end:PRWS") CALL MPPDB_CHECK(PPABST,"PRESSUREZ end:PPABST") END IF +#ifdef MNH_OPENACC +!Release all memory allocated with MNH_MEM_GET calls since last call to MNH_MEM_POSITION_PIN +CALL MNH_MEM_RELEASE() +#endif + +!$acc end data + !------------------------------------------------------------------------------- ! END SUBROUTINE PRESSUREZ diff --git a/src/MNH/qlap.f90 b/src/MNH/qlap.f90 index 24c49ab38f58702aec4c40a6996f0575a3d2f8e5..a16622cf669221a9e951fab6fe4ab913ec93fb05 100644 --- a/src/MNH/qlap.f90 +++ b/src/MNH/qlap.f90 @@ -1,4 +1,4 @@ -!MNH_LIC Copyright 1994-2021 CNRS, Meteo-France and Universite Paul Sabatier +!MNH_LIC Copyright 1994-2022 CNRS, Meteo-France and Universite Paul Sabatier !MNH_LIC This is part of the Meso-NH software governed by the CeCILL-C licence !MNH_LIC version 1. See LICENSE, CeCILL-C_V1-en.txt and CeCILL-C_V1-fr.txt !MNH_LIC for details. version 1. @@ -33,6 +33,33 @@ REAL, DIMENSION(SIZE(PY,1),SIZE(PY,2),SIZE(PY,3)) :: PQLAP ! final divergence ! END FUNCTION QLAP ! +! +#ifdef MNH_OPENACC +SUBROUTINE QLAP_DEVICE(PQLAP,HLBCX,HLBCY,PDXX,PDYY,PDZX,PDZY,PDZZ,PRHODJ,PTHETAV,PY) +! +IMPLICIT NONE +! +REAL, DIMENSION(:,:,:), INTENT(INOUT) :: PQLAP ! final divergence +! +CHARACTER (LEN=4), DIMENSION(2), INTENT(IN) :: HLBCX ! x-direction LBC type +CHARACTER (LEN=4), DIMENSION(2), INTENT(IN) :: HLBCY ! y-direction LBC type +! + ! Metric coefficients: +REAL, DIMENSION(:,:,:), INTENT(IN) :: PDXX ! d*xx +REAL, DIMENSION(:,:,:), INTENT(IN) :: PDYY ! d*yy +REAL, DIMENSION(:,:,:), INTENT(IN) :: PDZX ! d*zx +REAL, DIMENSION(:,:,:), INTENT(IN) :: PDZY ! d*zy +REAL, DIMENSION(:,:,:), INTENT(IN) :: PDZZ ! d*zz +! +REAL, DIMENSION(:,:,:), INTENT(IN) :: PRHODJ ! density of reference * J +REAL, DIMENSION(:,:,:), INTENT(IN) :: PTHETAV ! virtual potential temp. at time t +! +REAL, DIMENSION(:,:,:), INTENT(IN) :: PY ! field components +! + +! +END SUBROUTINE QLAP_DEVICE +#endif END INTERFACE ! END MODULE MODI_QLAP @@ -44,50 +71,50 @@ END MODULE MODI_QLAP RESULT(PQLAP) ! ######################################################################### ! -!!**** *QLAP * - compute the complete quasi-laplacien QLAP of a field P +!!**** *QLAP * - compute the complete quasi-laplacien QLAP of a field P !! !! PURPOSE !! ------- ! This function computes the quasi-laplacien QLAP of the scalar field P ! localized at a mass point, with non-vanishing orography. -! The result is localized at a mass point and defined by: +! The result is localized at a mass point and defined by: ! for Durran and MAE anelastic equations -! ( ( GX_M_U (PY) ) ) +! ( ( GX_M_U (PY) ) ) ! PQLAP = GDIV ( rho * CPd * Thetav * J ( GX_M_V (PY) ) ) -! ( ( GX_M_W (PY) ) ) +! ( ( GX_M_W (PY) ) ) ! or for Lipps and Hemler -! ( ( GX_M_U (PY) ) ) +! ( ( GX_M_U (PY) ) ) ! PQLAP = GDIV ( rho * J ( GX_M_V (PY) ) ) -! ( ( GX_M_W (PY) ) ) +! ( ( GX_M_W (PY) ) ) ! Where GX_M_.. are the cartesian components of the gradient of PY and -! GDIV is the operator acting on a vector AA: -! -! GDIV ( AA ) = J * divergence (1/J AA ) -! +! GDIV is the operator acting on a vector AA: +! +! GDIV ( AA ) = J * divergence (1/J AA ) +! !!** METHOD !! ------ -!! First, we compute the gradients along x, y , z of the P field. The -!! result is multiplied by rhod * CPd * Thetav or rhod depending on the -!! chosen anelastic system where the suffixes indicate +!! First, we compute the gradients along x, y , z of the P field. The +!! result is multiplied by rhod * CPd * Thetav or rhod depending on the +!! chosen anelastic system where the suffixes indicate !! d dry and v for virtual. -!! Then, the pseudo-divergence ( J * DIV (1/J o ) ) is computed by the +!! Then, the pseudo-divergence ( J * DIV (1/J o ) ) is computed by the !! subroutine GDIV. The result is localized at a mass point. !! !! EXTERNAL !! -------- -!! Function GX_M_U : compute the gradient along x -!! Function GY_M_V : compute the gradient along y -!! Function GZ_M_W : compute the gradient along z -!! FUNCTION MXM: compute an average in the x direction for a variable +!! Function GX_M_U : compute the gradient along x +!! Function GY_M_V : compute the gradient along y +!! Function GZ_M_W : compute the gradient along z +!! FUNCTION MXM: compute an average in the x direction for a variable !! at a mass localization -!! FUNCTION MYM: compute an average in the y direction for a variable +!! FUNCTION MYM: compute an average in the y direction for a variable !! at a mass localization -!! FUNCTION MZM: compute an average in the z direction for a variable +!! FUNCTION MZM: compute an average in the z direction for a variable !! at a mass localization !! Subroutine GDIV : compute J times the divergence of 1/J times a vector !! !! IMPLICIT ARGUMENTS -!! ------------------ +!! ------------------ !! Module MODD_PARAMETERS: JPHEXT, JPVEXT !! Module MODD_CONF: L2D,CEQNSYS !! Module MODD_CST : XCPD @@ -102,13 +129,13 @@ END MODULE MODI_QLAP !! !! MODIFICATIONS !! ------------- -!! Original 11/07/94 +!! Original 11/07/94 !! Modification 16/03/95 change the argument list of the gradient !! 14/01/97 New anelastic equation ( Stein ) !! 17/12/97 include the case of non-vanishing orography !! at the lbc ( Stein ) !! 06/12 V.Masson : update_halo due to CONTRAV changes -!! J.Escobar : 15/09/2015 : WENO5 & JPHEXT <> 1 +!! J.Escobar : 15/09/2015 : WENO5 & JPHEXT <> 1 ! P. Wautelet 20/05/2019: add name argument to ADDnFIELD_ll + new ADD4DFIELD_ll subroutine ! F. Auguste 02/21: add IBM !------------------------------------------------------------------------------- @@ -135,14 +162,14 @@ IMPLICIT NONE !* 0.1 declarations of arguments ! ! -CHARACTER (LEN=4), DIMENSION(2), INTENT(IN) :: HLBCX ! x-direction LBC type -CHARACTER (LEN=4), DIMENSION(2), INTENT(IN) :: HLBCY ! y-direction LBC type +CHARACTER (LEN=4), DIMENSION(2), INTENT(IN) :: HLBCX ! x-direction LBC type +CHARACTER (LEN=4), DIMENSION(2), INTENT(IN) :: HLBCY ! y-direction LBC type ! ! Metric coefficients: -REAL, DIMENSION(:,:,:), INTENT(IN) :: PDXX ! d*xx -REAL, DIMENSION(:,:,:), INTENT(IN) :: PDYY ! d*yy -REAL, DIMENSION(:,:,:), INTENT(IN) :: PDZX ! d*zx -REAL, DIMENSION(:,:,:), INTENT(IN) :: PDZY ! d*zy +REAL, DIMENSION(:,:,:), INTENT(IN) :: PDXX ! d*xx +REAL, DIMENSION(:,:,:), INTENT(IN) :: PDYY ! d*yy +REAL, DIMENSION(:,:,:), INTENT(IN) :: PDZX ! d*zx +REAL, DIMENSION(:,:,:), INTENT(IN) :: PDZY ! d*zy REAL, DIMENSION(:,:,:), INTENT(IN) :: PDZZ ! d*zz ! REAL, DIMENSION(:,:,:), INTENT(IN) :: PRHODJ ! density of reference * J @@ -150,7 +177,7 @@ REAL, DIMENSION(:,:,:), INTENT(IN) :: PTHETAV ! virtual potential temp. at ti ! REAL, DIMENSION(:,:,:), INTENT(IN) :: PY ! field components ! -REAL, DIMENSION(SIZE(PY,1),SIZE(PY,2),SIZE(PY,3)) :: PQLAP ! final divergence +REAL, DIMENSION(SIZE(PY,1),SIZE(PY,2),SIZE(PY,3)) :: PQLAP ! final divergence ! !* 0.2 declarations of local variables ! @@ -166,6 +193,17 @@ TYPE(LIST_ll), POINTER :: TZFIELDS_ll ! list of fields to exchange INTEGER :: IINFO_ll INTEGER :: IIB,IIE,IJB,IJE,IKB,IKE !------------------------------------------------------------------------------- +IF (MPPDB_INITIALIZED) THEN + !Check all IN arrays + CALL MPPDB_CHECK(PDXX,"QLAP beg:PDXX") + CALL MPPDB_CHECK(PDYY,"QLAP beg:PDYY") + CALL MPPDB_CHECK(PDZX,"QLAP beg:PDZX") + CALL MPPDB_CHECK(PDZY,"QLAP beg:PDZY") + CALL MPPDB_CHECK(PDZZ,"QLAP beg:PDZZ") + CALL MPPDB_CHECK(PRHODJ,"QLAP beg:PRHODJ") + CALL MPPDB_CHECK(PTHETAV,"QLAP beg:PTHETAV") + CALL MPPDB_CHECK(PY,"QLAP beg:PY") +END IF ! ! !* 1. COMPUTE THE GRADIENT COMPONENTS @@ -177,7 +215,7 @@ IKU=SIZE(PY,3) IKE = IKU - JPVEXT IKB = 1 + JPVEXT ! -ZU = GX_M_U(1,IKU,1,PY,PDXX,PDZZ,PDZX) +ZU(:,:,:) = GX_M_U( 1, IKU, 1, PY(:,:,:), PDXX(:,:,:), PDZZ(:,:,:), PDZX(:,:,:) ) CALL MPPDB_CHECK3D(ZU,'QLAP::ZU',PRECISION) ! IF ( HLBCX(1) /= 'CYCL' .AND. LWEST_ll() ) THEN @@ -185,7 +223,7 @@ IF ( HLBCX(1) /= 'CYCL' .AND. LWEST_ll() ) THEN DO JJ=1,IJU ZU(IIB,JJ,JK)= (PY(IIB,JJ,JK) - PY(IIB-1,JJ,JK) - 0.5 * ( & PDZX(IIB,JJ,JK) * (PY(IIB,JJ,JK)-PY(IIB,JJ,JK-1)) / PDZZ(IIB,JJ,JK) & - +PDZX(IIB,JJ,JK+1) * (PY(IIB,JJ,JK+1)-PY(IIB,JJ,JK)) / PDZZ(IIB,JJ,JK+1) & + +PDZX(IIB,JJ,JK+1) * (PY(IIB,JJ,JK+1)-PY(IIB,JJ,JK)) / PDZZ(IIB,JJ,JK+1) & ) ) / PDXX(IIB,JJ,JK) END DO END DO @@ -197,25 +235,25 @@ IF ( HLBCX(1) /= 'CYCL' .AND. LEAST_ll() ) THEN DO JJ=1,IJU ZU(IIE+1,JJ,JK)= (PY(IIE+1,JJ,JK) - PY(IIE+1-1,JJ,JK) - 0.5 * ( & PDZX(IIE+1,JJ,JK) * (PY(IIE+1-1,JJ,JK)-PY(IIE+1-1,JJ,JK-1)) / PDZZ(IIE+1-1,JJ,JK) & - +PDZX(IIE+1,JJ,JK+1) * (PY(IIE+1-1,JJ,JK+1)-PY(IIE+1-1,JJ,JK)) / PDZZ(IIE+1-1,JJ,JK+1)& + +PDZX(IIE+1,JJ,JK+1) * (PY(IIE+1-1,JJ,JK+1)-PY(IIE+1-1,JJ,JK)) / PDZZ(IIE+1-1,JJ,JK+1)& ) ) / PDXX(IIE+1,JJ,JK) END DO END DO END IF CALL MPPDB_CHECK3D(ZU,'QLAP::ZU/E',PRECISION) ! -IF(.NOT. L2D) THEN +IF(.NOT. L2D) THEN ! - ZV = GY_M_V(1,IKU,1,PY,PDYY,PDZZ,PDZY) + ZV(:,:,:) = GY_M_V( 1, IKU, 1, PY(:,:,:), PDYY(:,:,:), PDZZ(:,:,:), PDZY(:,:,:) ) CALL MPPDB_CHECK3D(ZV,'QLAP::ZV',PRECISION) ! - IF ( HLBCY(1) /= 'CYCL' .AND. LSOUTH_ll() ) THEN + IF ( HLBCY(1) /= 'CYCL' .AND. LSOUTH_ll() ) THEN DO JK=2,IKU-1 DO JI=1,IIU ZV(JI,IJB,JK)= (PY(JI,IJB,JK) - PY(JI,IJB-1,JK) - 0.5 * ( & PDZY(JI,IJB,JK) * (PY(JI,IJB,JK)-PY(JI,IJB,JK-1)) / PDZZ(JI,IJB,JK) & - +PDZY(JI,IJB,JK+1) * (PY(JI,IJB,JK+1)-PY(JI,IJB,JK)) / PDZZ(JI,IJB,JK+1) & - ) ) / PDYY(JI,IJB,JK) + +PDZY(JI,IJB,JK+1) * (PY(JI,IJB,JK+1)-PY(JI,IJB,JK)) / PDZZ(JI,IJB,JK+1) & + ) ) / PDYY(JI,IJB,JK) END DO END DO END IF @@ -227,33 +265,33 @@ IF(.NOT. L2D) THEN ZV(JI,IJE+1,JK)= (PY(JI,IJE+1,JK) - PY(JI,IJE+1-1,JK) - 0.5 * ( & PDZY(JI,IJE+1,JK) * (PY(JI,IJE+1-1,JK)-PY(JI,IJE+1-1,JK-1)) / PDZZ(JI,IJE+1-1,JK) & +PDZY(JI,IJE+1,JK+1) * (PY(JI,IJE+1-1,JK+1)-PY(JI,IJE+1-1,JK)) / PDZZ(JI,IJE+1-1,JK+1)& - ) ) / PDYY(JI,IJE+1,JK) + ) ) / PDYY(JI,IJE+1,JK) END DO END DO END IF CALL MPPDB_CHECK3D(ZV,'QLAP::ZV/N',PRECISION) ! ELSE - ZV=0. + ZV(:,:,:) = 0. ENDIF ! IF ( CEQNSYS == 'DUR' .OR. CEQNSYS == 'MAE' ) THEN - ZU = MXM(PRHODJ * XCPD * PTHETAV) * ZU - IF(.NOT. L2D) THEN - ZV = MYM(PRHODJ * XCPD * PTHETAV) * ZV + ZU(:,:,:) = MXM( PRHODJ(:,:,:) * XCPD * PTHETAV(:,:,:) ) * ZU(:,:,:) + IF(.NOT. L2D) THEN + ZV(:,:,:) = MYM( PRHODJ(:,:,:) * XCPD * PTHETAV(:,:,:) ) * ZV(:,:,:) END IF - ZW = MZM(PRHODJ * XCPD * PTHETAV) * GZ_M_W(1,IKU,1,PY,PDZZ) -ELSEIF ( CEQNSYS == 'LHE' ) THEN - ZU = MXM(PRHODJ) * ZU - IF(.NOT. L2D) THEN - ZV = MYM(PRHODJ) * ZV + ZW(:,:,:) = MZM( PRHODJ(:,:,:) * XCPD * PTHETAV(:,:,:) ) * GZ_M_W( 1, IKU, 1, PY(:,:,:), PDZZ(:,:,:) ) +ELSEIF ( CEQNSYS == 'LHE' ) THEN + ZU(:,:,:) = MXM( PRHODJ(:,:,:) ) * ZU(:,:,:) + IF(.NOT. L2D) THEN + ZV(:,:,:) = MYM( PRHODJ(:,:,:) ) * ZV(:,:,:) ENDIF - ZW = MZM(PRHODJ) * GZ_M_W(1,IKU,1,PY,PDZZ) + ZW(:,:,:) = MZM( PRHODJ(:,:,:) ) * GZ_M_W( 1, IKU, 1, PY(:,:,:), PDZZ(:,:,:) ) END IF ! !------------------------------------------------------------------------------- ! -!* 2. COMPUTE THE DIVERGENCE +!* 2. COMPUTE THE DIVERGENCE ! ---------------------- ! NULLIFY(TZFIELDS_ll) @@ -284,6 +322,330 @@ IF (LIBM) THEN ! ENDIF ! +IF (MPPDB_INITIALIZED) THEN + !Check all OUT arrays + CALL MPPDB_CHECK(PQLAP,"QLAP end:PQLAP") +END IF !------------------------------------------------------------------------------- ! END FUNCTION QLAP + +#ifdef MNH_OPENACC +! ######################################################################### + SUBROUTINE QLAP_DEVICE(PQLAP,HLBCX,HLBCY,PDXX,PDYY,PDZX,PDZY,PDZZ,PRHODJ,PTHETAV,PY) +! ######################################################################### +! +!!**** *QLAP * - compute the complete quasi-laplacien QLAP of a field P +!! +!! PURPOSE +!! ------- +! This function computes the quasi-laplacien QLAP of the scalar field P +! localized at a mass point, with non-vanishing orography. +! The result is localized at a mass point and defined by: +! for Durran and MAE anelastic equations +! ( ( GX_M_U (PY) ) ) +! PQLAP = GDIV ( rho * CPd * Thetav * J ( GX_M_V (PY) ) ) +! ( ( GX_M_W (PY) ) ) +! or for Lipps and Hemler +! ( ( GX_M_U (PY) ) ) +! PQLAP = GDIV ( rho * J ( GX_M_V (PY) ) ) +! ( ( GX_M_W (PY) ) ) +! Where GX_M_.. are the cartesian components of the gradient of PY and +! GDIV is the operator acting on a vector AA: +! +! GDIV ( AA ) = J * divergence (1/J AA ) +! +!!** METHOD +!! ------ +!! First, we compute the gradients along x, y , z of the P field. The +!! result is multiplied by rhod * CPd * Thetav or rhod depending on the +!! chosen anelastic system where the suffixes indicate +!! d dry and v for virtual. +!! Then, the pseudo-divergence ( J * DIV (1/J o ) ) is computed by the +!! subroutine GDIV. The result is localized at a mass point. +!! +!! EXTERNAL +!! -------- +!! Function GX_M_U : compute the gradient along x +!! Function GY_M_V : compute the gradient along y +!! Function GZ_M_W : compute the gradient along z +!! FUNCTION MXM: compute an average in the x direction for a variable +!! at a mass localization +!! FUNCTION MYM: compute an average in the y direction for a variable +!! at a mass localization +!! FUNCTION MZM: compute an average in the z direction for a variable +!! at a mass localization +!! Subroutine GDIV : compute J times the divergence of 1/J times a vector +!! +!! IMPLICIT ARGUMENTS +!! ------------------ +!! Module MODD_PARAMETERS: JPHEXT, JPVEXT +!! Module MODD_CONF: L2D,CEQNSYS +!! Module MODD_CST : XCPD +!! +!! REFERENCE +!! --------- +!! Pressure solver documentation ( Scientific documentation ) +!! +!! AUTHOR +!! ------ +!! P. Hereil and J. Stein * Meteo France * +!! +!! MODIFICATIONS +!! ------------- +!! Original 11/07/94 +!! Modification 16/03/95 change the argument list of the gradient +!! 14/01/97 New anelastic equation ( Stein ) +!! 17/12/97 include the case of non-vanishing orography +!! at the lbc ( Stein ) +!! 06/12 V.Masson : update_halo due to CONTRAV changes +!! J.Escobar : 15/09/2015 : WENO5 & JPHEXT <> 1 +! P. Wautelet 20/05/2019: add name argument to ADDnFIELD_ll + new ADD4DFIELD_ll subroutine +! F. Auguste 02/21: add IBM +!------------------------------------------------------------------------------- +! +!* 0. DECLARATIONS +! ------------ +! +USE MODE_ll +! +USE MODD_PARAMETERS +USE MODD_CONF +USE MODD_CST +USE MODI_GDIV +USE MODI_GRADIENT_M +USE MODI_SHUMAN +USE MODI_SHUMAN_DEVICE +! +USE MODE_MPPDB +! +USE MODE_MNH_ZWORK, ONLY: MNH_MEM_GET, MNH_MEM_POSITION_PIN, MNH_MEM_RELEASE +! +USE MODI_GET_HALO +USE MODD_IBM_PARAM_n, ONLY: XIBM_LS, LIBM, XIBM_SU +USE MODI_IBM_BALANCE +! +IMPLICIT NONE +! +!* 0.1 declarations of arguments +! +! +REAL, DIMENSION(:,:,:), INTENT(INOUT) :: PQLAP ! final divergence +! +CHARACTER (LEN=4), DIMENSION(2), INTENT(IN) :: HLBCX ! x-direction LBC type +CHARACTER (LEN=4), DIMENSION(2), INTENT(IN) :: HLBCY ! y-direction LBC type +! + ! Metric coefficients: +REAL, DIMENSION(:,:,:), INTENT(IN) :: PDXX ! d*xx +REAL, DIMENSION(:,:,:), INTENT(IN) :: PDYY ! d*yy +REAL, DIMENSION(:,:,:), INTENT(IN) :: PDZX ! d*zx +REAL, DIMENSION(:,:,:), INTENT(IN) :: PDZY ! d*zy +REAL, DIMENSION(:,:,:), INTENT(IN) :: PDZZ ! d*zz +! +REAL, DIMENSION(:,:,:), INTENT(IN) :: PRHODJ ! density of reference * J +REAL, DIMENSION(:,:,:), INTENT(IN) :: PTHETAV ! virtual potential temp. at time t +! +REAL, DIMENSION(:,:,:), INTENT(IN) :: PY ! field components +! +!* 0.2 declarations of local variables +! +REAL, DIMENSION(:,:,:) , POINTER , CONTIGUOUS :: ZU ! rho*J*gradient along x +! +REAL, DIMENSION(:,:,:) , POINTER , CONTIGUOUS :: ZV ! rho*J*gradient along y +! +REAL, DIMENSION(:,:,:) , POINTER , CONTIGUOUS :: ZW ! rho*J*gradient along z +! +INTEGER :: IIU,IJU,IKU ! I,J,K array sizes +INTEGER :: JK,JJ,JI ! vertical loop index +TYPE(LIST_ll), POINTER :: TZFIELDS_ll ! list of fields to exchange +INTEGER :: IINFO_ll +INTEGER :: IIB,IIE,IJB,IJE,IKB,IKE +! +REAL, DIMENSION(:,:,:), pointer , contiguous :: ZMXM,ZMYM,ZMZM,ZRHODJ,ZGZMW +! +LOGICAL :: GWEST,GEAST,GSOUTH,GNORTH +!------------------------------------------------------------------------------- +IF (MPPDB_INITIALIZED) THEN + !Check all IN arrays + CALL MPPDB_CHECK(PDXX,"QLAP beg:PDXX") + CALL MPPDB_CHECK(PDYY,"QLAP beg:PDYY") + CALL MPPDB_CHECK(PDZX,"QLAP beg:PDZX") + CALL MPPDB_CHECK(PDZY,"QLAP beg:PDZY") + CALL MPPDB_CHECK(PDZZ,"QLAP beg:PDZZ") + CALL MPPDB_CHECK(PRHODJ,"QLAP beg:PRHODJ") + CALL MPPDB_CHECK(PTHETAV,"QLAP beg:PTHETAV") + CALL MPPDB_CHECK(PY,"QLAP beg:PY") +END IF +! +! +!* 1. COMPUTE THE GRADIENT COMPONENTS +! ------------------------------- +! +CALL GET_DIM_EXT_ll('B',IIU,IJU) +CALL GET_INDICE_ll (IIB,IJB,IIE,IJE) +IKU=SIZE(PY,3) +IKE = IKU - JPVEXT +IKB = 1 + JPVEXT +! +GWEST = ( HLBCX(1) /= 'CYCL' .AND. LWEST_ll() ) +GEAST = ( HLBCX(2) /= 'CYCL' .AND. LEAST_ll() ) +GSOUTH = ( HLBCY(1) /= 'CYCL' .AND. LSOUTH_ll() ) +GNORTH = ( HLBCY(2) /= 'CYCL' .AND. LNORTH_ll() ) +! +CALL MNH_MEM_POSITION_PIN() +CALL MNH_MEM_GET( ZU, IIU, IJU, IKU ) +CALL MNH_MEM_GET( ZV, IIU, IJU, IKU ) +CALL MNH_MEM_GET( ZW, IIU, IJU, IKU ) +! +CALL MNH_MEM_GET( ZMXM, IIU, IJU, IKU ) +CALL MNH_MEM_GET( ZMYM, IIU, IJU, IKU ) +CALL MNH_MEM_GET( ZMZM, IIU, IJU, IKU ) +CALL MNH_MEM_GET( ZRHODJ, IIU, IJU, IKU ) +CALL MNH_MEM_GET( ZGZMW, IIU, IJU, IKU ) +! +CALL GX_M_U_DEVICE( 1, IKU, 1, PY(:,:,:), PDXX(:,:,:), PDZZ(:,:,:), PDZX(:,:,:), ZU(:,:,:) ) +CALL MPPDB_CHECK3D(ZU,'QLAP::ZU',PRECISION) +! +IF ( GWEST ) THEN + !$acc kernels async + DO JK=2,IKU-1 + DO JJ=1,IJU + ZU(IIB,JJ,JK)= (PY(IIB,JJ,JK) - PY(IIB-1,JJ,JK) - 0.5 * ( & + PDZX(IIB,JJ,JK) * (PY(IIB,JJ,JK)-PY(IIB,JJ,JK-1)) / PDZZ(IIB,JJ,JK) & + +PDZX(IIB,JJ,JK+1) * (PY(IIB,JJ,JK+1)-PY(IIB,JJ,JK)) / PDZZ(IIB,JJ,JK+1) & + ) ) / PDXX(IIB,JJ,JK) + END DO + END DO + !$acc end kernels +END IF +! +IF ( GEAST ) THEN + !$acc kernels async + DO JK=2,IKU-1 + DO JJ=1,IJU + ZU(IIE+1,JJ,JK)= (PY(IIE+1,JJ,JK) - PY(IIE+1-1,JJ,JK) - 0.5 * ( & + PDZX(IIE+1,JJ,JK) * (PY(IIE+1-1,JJ,JK)-PY(IIE+1-1,JJ,JK-1)) / PDZZ(IIE+1-1,JJ,JK) & + +PDZX(IIE+1,JJ,JK+1) * (PY(IIE+1-1,JJ,JK+1)-PY(IIE+1-1,JJ,JK)) / PDZZ(IIE+1-1,JJ,JK+1)& + ) ) / PDXX(IIE+1,JJ,JK) + END DO + END DO + !$acc end kernels +END IF +!$acc wait +! +CALL MPPDB_CHECK3D(ZU,'QLAP::ZU/W',PRECISION) +CALL MPPDB_CHECK3D(ZU,'QLAP::ZU/E',PRECISION) +! +IF(.NOT. L2D) THEN +! + CALL GY_M_V_DEVICE( 1, IKU, 1, PY(:,:,:), PDYY(:,:,:), PDZZ(:,:,:), PDZY(:,:,:), ZV(:,:,:) ) + CALL MPPDB_CHECK3D(ZV,'QLAP::ZV',PRECISION) +! + IF ( GSOUTH ) THEN + !$acc kernels async + DO JK=2,IKU-1 + DO JI=1,IIU + ZV(JI,IJB,JK)= (PY(JI,IJB,JK) - PY(JI,IJB-1,JK) - 0.5 * ( & + PDZY(JI,IJB,JK) * (PY(JI,IJB,JK)-PY(JI,IJB,JK-1)) / PDZZ(JI,IJB,JK) & + +PDZY(JI,IJB,JK+1) * (PY(JI,IJB,JK+1)-PY(JI,IJB,JK)) / PDZZ(JI,IJB,JK+1) & + ) ) / PDYY(JI,IJB,JK) + END DO + END DO + !$acc end kernels + END IF + + + IF ( GNORTH ) THEN + !$acc kernels async + DO JK=2,IKU-1 + DO JI=1,IIU + ZV(JI,IJE+1,JK)= (PY(JI,IJE+1,JK) - PY(JI,IJE+1-1,JK) - 0.5 * ( & + PDZY(JI,IJE+1,JK) * (PY(JI,IJE+1-1,JK)-PY(JI,IJE+1-1,JK-1)) & + / PDZZ(JI,IJE+1-1,JK) & + +PDZY(JI,IJE+1,JK+1) * (PY(JI,IJE+1-1,JK+1)-PY(JI,IJE+1-1,JK)) & + / PDZZ(JI,IJE+1-1,JK+1)& + ) ) / PDYY(JI,IJE+1,JK) + END DO + END DO + !$acc end kernels + END IF + !$acc wait + CALL MPPDB_CHECK3D(ZV,'QLAP::ZV/S',PRECISION) + CALL MPPDB_CHECK3D(ZV,'QLAP::ZV/N',PRECISION) +! +ELSE + ZV(:,:,:) = 0. +ENDIF +! +IF ( CEQNSYS == 'DUR' .OR. CEQNSYS == 'MAE' ) THEN + !$acc kernels + ZRHODJ(:,:,:) = PRHODJ(:,:,:) * XCPD * PTHETAV(:,:,:) + !$acc end kernels + CALL MXM_DEVICE( ZRHODJ(:,:,:), ZMXM(:,:,:) ) + !$acc kernels present(ZU) + ZU(:,:,:) = ZMXM(:,:,:) * ZU(:,:,:) + !$acc end kernels + IF(.NOT. L2D) THEN + CALL MYM_DEVICE( ZRHODJ(:,:,:), ZMYM(:,:,:) ) + !$acc kernels present(ZV) + ZV(:,:,:) = ZMYM(:,:,:) * ZV(:,:,:) + !$acc end kernels + END IF + CALL MZM_DEVICE( ZRHODJ(:,:,:), ZMZM(:,:,:) ) + CALL GZ_M_W_DEVICE( 1, IKU, 1, PY(:,:,:), PDZZ(:,:,:), ZGZMW(:,:,:) ) + !$acc kernels present(ZW) + ZW(:,:,:) = ZMZM(:,:,:) * ZGZMW(:,:,:) + !$acc end kernels +ELSE IF ( CEQNSYS == 'LHE' ) THEN + ZU(:,:,:) = MXM( PRHODJ(:,:,:) ) * ZU(:,:,:) + IF(.NOT. L2D) THEN + ZV(:,:,:) = MYM( PRHODJ(:,:,:) ) * ZV(:,:,:) + ENDIF + ZW(:,:,:) = MZM( PRHODJ(:,:,:) ) * GZ_M_W( 1, IKU, 1, PY(:,:,:), PDZZ(:,:,:) ) +END IF +! +!------------------------------------------------------------------------------- +! +!* 2. COMPUTE THE DIVERGENCE +! ---------------------- +! +CALL GET_HALO_D(ZU,HNAME='QLAP::ZU') +CALL GET_HALO_D(ZV,HNAME='QLAP::ZV') +CALL GET_HALO_D(ZW,HNAME='QLAP::ZW') +! +#ifndef MNH_OPENACC +CALL GDIV(HLBCX,HLBCY,PDXX,PDYY,PDZX,PDZY,PDZZ,ZU,ZV,ZW,PQLAP) +#else +CALL GDIV_DEVICE(HLBCX,HLBCY,PDXX,PDYY,PDZX,PDZY,PDZZ,ZU,ZV,ZW,PQLAP) +#endif +! +IF (LIBM) THEN + ! + CALL IBM_BALANCE(XIBM_LS,XIBM_SU,ZU,ZV,ZW,PQLAP) + ! + PQLAP(:,:,IKB-1) = PQLAP(:,:,IKB-1)*XIBM_SU(:,:,IKB,1) + PQLAP(:,:,IKE+1) = PQLAP(:,:,IKE+1)*XIBM_SU(:,:,IKE,1) + ! + IF ( HLBCX(1) /= 'CYCL' ) THEN + IF(LWEST_ll()) PQLAP(IIB-1,:,:) = PQLAP(IIB-1,:,:)*XIBM_SU(IIB,:,:,1) + IF(LEAST_ll()) PQLAP(IIE+1,:,:) = PQLAP(IIE+1,:,:)*XIBM_SU(IIE,:,:,1) + ENDIF + ! + IF ( HLBCY(1) /= 'CYCL' ) THEN + IF (LSOUTH_ll()) PQLAP(:,IJB-1,:) = PQLAP(:,IJB-1,:)*XIBM_SU(:,IJB,:,1) + IF (LNORTH_ll()) PQLAP(:,IJE+1,:) = PQLAP(:,IJE+1,:)*XIBM_SU(:,IJE,:,1) + ENDIF + ! +ENDIF +! +!Release all memory allocated with MNH_MEM_GET calls since last call to MNH_MEM_POSITION_PIN +CALL MNH_MEM_RELEASE() + +IF (MPPDB_INITIALIZED) THEN + !Check all OUT arrays + CALL MPPDB_CHECK(PQLAP,"QLAP end:PQLAP") +END IF +!------------------------------------------------------------------------------- +! +END SUBROUTINE QLAP_DEVICE +#endif diff --git a/src/MNH/richardson.f90 b/src/MNH/richardson.f90 index de6643c686223559ebe0cc44c5ddad9db98c26da..fbce9d7c3e566e6a7066ce50c69c4143ca101f53 100644 --- a/src/MNH/richardson.f90 +++ b/src/MNH/richardson.f90 @@ -131,6 +131,12 @@ END MODULE MODI_RICHARDSON !* 0. DECLARATIONS ! ------------ USE MODD_CONF + +#ifdef MNH_OPENACC +USE MODE_MNH_ZWORK, ONLY: MNH_MEM_GET, MNH_MEM_POSITION_PIN, MNH_MEM_RELEASE +#endif +USE MODE_MPPDB + USE MODI_QLAP USE MODI_FLAT_INV ! @@ -189,9 +195,47 @@ REAL, DIMENSION(:,:,:), INTENT(INOUT) :: PPHI ! solution of the equation ! INTEGER :: JM ! loop index ! +#ifndef MNH_OPENACC REAL, DIMENSION(SIZE(PPHI,1),SIZE(PPHI,2),SIZE(PPHI,3)) :: ZF_1_Y ! RHS of the preconditioned problem REAL, DIMENSION(SIZE(PPHI,1),SIZE(PPHI,2),SIZE(PPHI,3)) :: ZCORREC ! iterative correction to the solution REAL, DIMENSION(SIZE(PPHI,1),SIZE(PPHI,2),SIZE(PPHI,3)) :: ZWORK ! quasi-laplacien Q of the iterative solution +#else +REAL, DIMENSION(:,:,:), POINTER, CONTIGUOUS :: ZF_1_Y ! RHS of the preconditioned problem +REAL, DIMENSION(:,:,:), POINTER, CONTIGUOUS :: ZCORREC ! iterative correction to the solution +REAL, DIMENSION(:,:,:), POINTER, CONTIGUOUS :: ZWORK ! quasi-laplacien Q of the iterative solution +#endif +! +! +IF (MPPDB_INITIALIZED) THEN + !Check all IN arrays + CALL MPPDB_CHECK(PDXX,"RICHARDSON beg:PDXX") + CALL MPPDB_CHECK(PDYY,"RICHARDSON beg:PDYY") + CALL MPPDB_CHECK(PDZX,"RICHARDSON beg:PDZX") + CALL MPPDB_CHECK(PDZY,"RICHARDSON beg:PDZY") + CALL MPPDB_CHECK(PDZZ,"RICHARDSON beg:PDZZ") + CALL MPPDB_CHECK(PRHODJ,"RICHARDSON beg:PRHODJ") + CALL MPPDB_CHECK(PTHETAV,"RICHARDSON beg:PTHETAV") + CALL MPPDB_CHECK(PRHOM,"RICHARDSON beg:PRHOM") + CALL MPPDB_CHECK(PAF,"RICHARDSON beg:PAF") + CALL MPPDB_CHECK(PCF,"RICHARDSON beg:PCF") + CALL MPPDB_CHECK(PBF,"RICHARDSON beg:PBF") + CALL MPPDB_CHECK(PY,"RICHARDSON beg:PY") + !Check all INOUT arrays + CALL MPPDB_CHECK(PPHI,"RICHARDSON beg:PPHI") +END IF + +!$acc data present( PDXX, PDYY, PDZX, PDZY, PDZZ ) & +!$acc & present( PRHODJ, PTHETAV, PRHOM, PAF, PBF, PCF, PTRIGSX, PTRIGSY, KIFAXX, KIFAXY, PY ) & +!$acc & present( PPHI ) + +#ifdef MNH_OPENACC +!Pin positions in the pools of MNH memory +CALL MNH_MEM_POSITION_PIN() + +CALL MNH_MEM_GET( ZF_1_Y, SIZE( PPHI, 1 ), SIZE( PPHI, 2 ), SIZE( PPHI, 3 ) ) +CALL MNH_MEM_GET( ZCORREC, SIZE( PPHI, 1 ), SIZE( PPHI, 2 ), SIZE( PPHI, 3 ) ) +CALL MNH_MEM_GET( ZWORK, SIZE( PPHI, 1 ), SIZE( PPHI, 2 ), SIZE( PPHI, 3 ) ) +#endif !------------------------------------------------------------------------------- ! !* 1. COMPUTE THE RHS OF THE PRECONDITIONED PROBLEM @@ -206,23 +250,45 @@ CALL FLAT_INV(HLBCX,HLBCY,PDXHATM,PDYHATM,PRHOM,PAF,PBF,PCF, & ! -1 ! -------------- ! IF ( KTCOUNT < 1 .OR. ( KTCOUNT == 1 .AND. CCONF == 'START') ) THEN +!$acc kernels PPHI(:,:,:) = ZF_1_Y(:,:,:) ! when no first guess is available, we take the solution for the flat problem +!$acc end kernels END IF ! DO JM = 1,KITR ! +#ifndef MNH_OPENACC ZWORK(:,:,:) = QLAP(HLBCX,HLBCY,PDXX,PDYY,PDZX,PDZY,PDZZ,PRHODJ,PTHETAV,PPHI) ! Q (PHI) +#else + CALL QLAP_DEVICE( ZWORK, HLBCX, HLBCY, PDXX, PDYY, PDZX, PDZY, PDZZ, PRHODJ, PTHETAV, PPHI ) ! Q (PHI) +#endif ! +!$acc kernels ZCORREC(:,:,:) = 0. +!$acc end kernels ! CALL FLAT_INV(HLBCX,HLBCY,PDXHATM,PDYHATM,PRHOM,PAF,PBF,PCF, & ! -1 PTRIGSX,PTRIGSY,KIFAXX,KIFAXY,ZWORK,ZCORREC ) ! F * Q (PHI) ! ! -1 -1 ! update the iterative solution PHI = PHI + relax* (F (Y) - F * Q (PHI)) -! +!$acc kernels PPHI(:,:,:) = PPHI(:,:,:) + PRELAX * ( ZF_1_Y(:,:,:) - ZCORREC(:,:,:) ) +!$acc end kernels +! END DO + +#ifdef MNH_OPENACC +!Release all memory allocated with MNH_MEM_GET calls since last call to MNH_MEM_POSITION_PIN +CALL MNH_MEM_RELEASE() +#endif + +!$acc end data + +IF (MPPDB_INITIALIZED) THEN + !Check all INOUT arrays + CALL MPPDB_CHECK(PPHI,"RICHARDSON end:PPHI") +END IF !------------------------------------------------------------------------------- ! END SUBROUTINE RICHARDSON