diff --git a/src/ZSOLVER/get_halo.f90 b/src/ZSOLVER/get_halo.f90 index 717d08414fc4a38bb9b8d37d26d2719aa1c5affc..7dec4456cba761f8600a66e3bdf78a86b3c860de 100644 --- a/src/ZSOLVER/get_halo.f90 +++ b/src/ZSOLVER/get_halo.f90 @@ -63,6 +63,19 @@ INTERFACE ! END SUBROUTINE GET_HALO2_DD END INTERFACE +INTERFACE + SUBROUTINE GET_HALO2_DF(PSRC, TP_PSRC_HALO2_ll, HNAME) + ! + USE MODD_ARGSLIST_ll, ONLY : HALO2LIST_ll + ! + IMPLICIT NONE + ! + REAL, DIMENSION(:,:,:), INTENT(INOUT) :: PSRC ! variable at t + TYPE(HALO2LIST_ll), POINTER :: TP_PSRC_HALO2_ll ! halo2 for SRC + character(len=*), optional, intent(in) :: HNAME ! Name of the field to be added + ! + END SUBROUTINE GET_HALO2_DF +END INTERFACE INTERFACE SUBROUTINE GET_HALO_D(PSRC, HDIR, HNAME) IMPLICIT NONE @@ -243,13 +256,16 @@ MODULE MODD_HALO_D REAL, SAVE , ALLOCATABLE, DIMENSION(:,:) :: ZNORTHC_2D_IN, ZSOUTHC_2D_IN, ZWESTC_2D_IN, ZEASTC_2D_IN REAL, SAVE , ALLOCATABLE, DIMENSION(:,:) :: ZNORTHC_2D_OUT, ZSOUTHC_2D_OUT, ZWESTC_2D_OUT, ZEASTC_2D_OUT - - + + REAL, SAVE , ALLOCATABLE, DIMENSION(:,:,:) :: ZNORTH2F_IN, ZSOUTH2F_IN, ZWEST2F_IN, ZEAST2F_IN + REAL, SAVE , ALLOCATABLE, DIMENSION(:,:,:) :: ZNORTH2F_OUT, ZSOUTH2F_OUT, ZWEST2F_OUT, ZEAST2F_OUT + LOGICAL, SAVE :: GFIRST_GET_HALO_D = .TRUE. LOGICAL, SAVE :: GFIRST_INIT_HALO_D = .TRUE. INTEGER, SAVE :: IHALO_1 INTEGER, SAVE :: NP_NORTH,NP_SOUTH,NP_WEST,NP_EAST + INTEGER, SAVE :: IHALO2,IHALO2_1 CONTAINS @@ -267,7 +283,9 @@ CONTAINS IF (GFIRST_INIT_HALO_D) THEN ! - IHALO_1 = NHALO-1 + IHALO_1 = NHALO-1 + IHALO2 = MAX(2,NHALO) + IHALO2_1 = IHALO2-1 ! ! Init HALO ! @@ -338,6 +356,20 @@ CONTAINS ALLOCATE ( ZWESTC_2D_OUT ( 1:IIB-1 , IJB:IJE ) ) ALLOCATE ( ZEASTC_2D_OUT ( IIE+1:IIU , IJB:IJE ) ) !$acc enter data create (ZNORTHC_2D_OUT, ZSOUTHC_2D_OUT, ZWESTC_2D_OUT, ZEASTC_2D_OUT) + ! + ! Init HALO2 for Full update in 1 time <-> GET_HALO + GET_HALO2 + ! + ALLOCATE ( ZSOUTH2F_IN ( IIB:IIE , IJB:IJB+IHALO2_1 , IKU ) ) + ALLOCATE ( ZNORTH2F_IN ( IIB:IIE , IJE-IHALO2_1:IJE , IKU ) ) + ALLOCATE ( ZWEST2F_IN ( IIB:IIB+IHALO2_1 , IJB:IJE , IKU ) ) + ALLOCATE ( ZEAST2F_IN ( IIE-IHALO2_1:IIE , IJB:IJE , IKU ) ) + !$acc enter data create (ZNORTH2F_IN, ZSOUTH2F_IN, ZWEST2F_IN, ZEAST2F_IN) + ! + ALLOCATE ( ZSOUTH2F_OUT ( IIB:IIE , IJB-IHALO2:IJB-1 , IKU ) ) + ALLOCATE ( ZNORTH2F_OUT ( IIB:IIE , IJE+1:IJE+IHALO2 , IKU ) ) + ALLOCATE ( ZWEST2F_OUT ( IIB-IHALO2:IIB-1 , IJB:IJE , IKU ) ) + ALLOCATE ( ZEAST2F_OUT ( IIE+1:IIE+IHALO2 , IJB:IJE , IKU ) ) + !$acc enter data create (ZNORTH2F_OUT, ZSOUTH2F_OUT, ZWEST2F_OUT, ZEAST2F_OUT) IF (.NOT. GWEST ) THEN NP_WEST = ( IP-1 -1 ) + 1 @@ -2104,6 +2136,268 @@ END IF !$acc end data END SUBROUTINE GET_HALO2_DD +!------------------------------------------------------------------------------- +! ######################################## + SUBROUTINE GET_HALO2_DF(PSRC, TP_PSRC_HALO2F_ll, HNAME) +! ######################################## +#define MNH_GPUDIRECT +! +USE MODD_HALO_D +USE MODE_ll +USE MODD_ARGSLIST_ll, ONLY : LIST_ll +USE MODD_PARAMETERS, ONLY : JPHEXT +! +USE MODD_IO, ONLY : GSMONOPROC +USE MODE_MNH_ZWORK, ONLY : GWEST , GEAST, GSOUTH , GNORTH +USE MODE_MNH_ZWORK, ONLY : IIU,IJU,IKU +USE MODE_MNH_ZWORK, ONLY : IIB,IJB ,IIE,IJE +! +USE MODD_CONF, ONLY : NHALO +USE MODE_DEVICE +USE MODE_MPPDB +! +USE MODD_VAR_ll, ONLY : IP,NPROC,NP1,NP2 +USE MODD_VAR_ll, ONLY : NMNH_COMM_WORLD +USE MODD_MPIF, ONLY : MPI_STATUSES_IGNORE +USE MODD_PRECISION, ONLY : MNHREAL_MPI +! +IMPLICIT NONE +! +REAL, DIMENSION(:,:,:), INTENT(INOUT) :: PSRC ! variable at t +TYPE(HALO2LIST_ll), POINTER :: TP_PSRC_HALO2F_ll ! halo2 for SRC +character(len=*), optional, intent(in) :: HNAME ! Name of the field to be added +! +character(len=:), allocatable :: yname +INTEGER :: IERROR ! error return code + +INTEGER,PARAMETER :: IS_WEST=1 , IS_EAST=2, IS_SOUTH=3, IS_NORTH=4 + +LOGICAL :: LX , LY + +INTEGER :: INB_REQ , IREQ(8) +INTEGER :: IERR + +REAL , DIMENSION(:,:) , POINTER , CONTIGUOUS :: ZH2F_EAST,ZH2F_WEST,ZH2F_NORTH,ZH2F_SOUTH + +if ( NPROC == 1 ) RETURN + +!$acc data present ( PSRC ) & +!$acc present (ZNORTH2F_IN, ZSOUTH2F_IN, ZWEST2F_IN, ZEAST2F_IN) & +!$acc present (ZNORTH2F_OUT, ZSOUTH2F_OUT, ZWEST2F_OUT, ZEAST2F_OUT) +! +!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! + +LX = .TRUE. +LY = .TRUE. + +!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! + +INB_REQ = 0 + +! +! Post the recieve of Zxxxx_IN buffer first via MPI(Gpu_direct) +! + +IF (LX) THEN + IF (.NOT. GWEST) THEN +#ifdef MNH_GPUDIRECT + !$acc host_data use_device(ZWEST2F_OUT) +#endif + INB_REQ = INB_REQ + 1 + CALL MPI_IRECV(ZWEST2F_OUT,SIZE(ZWEST2F_OUT),MNHREAL_MPI,NP_WEST-1,1000+IS_EAST,NMNH_COMM_WORLD,IREQ(INB_REQ),IERR) +#ifdef MNH_GPUDIRECT + !$acc end host_data +#endif + END IF + IF (.NOT.GEAST) THEN +#ifdef MNH_GPUDIRECT + !$acc host_data use_device(ZEAST2F_OUT) +#endif + INB_REQ = INB_REQ + 1 + CALL MPI_IRECV(ZEAST2F_OUT,SIZE(ZEAST2F_OUT),MNHREAL_MPI,NP_EAST-1,1000+IS_WEST,NMNH_COMM_WORLD,IREQ(INB_REQ),IERR) +#ifdef MNH_GPUDIRECT + !$acc end host_data +#endif + ENDIF +END IF + +IF (LY) THEN + IF (.NOT.GSOUTH) THEN +#ifdef MNH_GPUDIRECT + !$acc host_data use_device(ZSOUTH2F_OUT) +#endif + INB_REQ = INB_REQ + 1 + CALL MPI_IRECV(ZSOUTH2F_OUT,SIZE(ZSOUTH2F_OUT),MNHREAL_MPI,NP_SOUTH-1,1000+IS_NORTH,NMNH_COMM_WORLD,IREQ(INB_REQ),IERR) +#ifdef MNH_GPUDIRECT + !$acc end host_data +#endif + ENDIF + IF (.NOT.GNORTH) THEN +#ifdef MNH_GPUDIRECT + !$acc host_data use_device(ZNORTH2F_OUT) +#endif + INB_REQ = INB_REQ + 1 + CALL MPI_IRECV(ZNORTH2F_OUT,SIZE(ZNORTH2F_OUT),MNHREAL_MPI,NP_NORTH-1,1000+IS_SOUTH,NMNH_COMM_WORLD,IREQ(INB_REQ),IERR) +#ifdef MNH_GPUDIRECT + !$acc end host_data +#endif + ENDIF +END IF + +!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! + +!Copy the halo on the device PSRC to Zxxxx_IN + +IF (LX) THEN + IF (.NOT. GWEST) THEN + !$acc kernels async(IS_WEST) + ZWEST2F_IN ( IIB:IIB+IHALO2_1 , IJB:IJE , : ) = PSRC( IIB:IIB+IHALO2_1 , IJB:IJE , : ) +!!$ ZWEST2F_IN ( : , : ) = PSRC( IIB+1 , : , : ) + !$acc end kernels + END IF + IF (.NOT.GEAST) THEN + !$acc kernels async(IS_EAST) + ZEAST2F_IN ( IIE-IHALO2_1:IIE , IJB:IJE , : ) = PSRC( IIE-IHALO2_1:IIE , IJB:IJE , : ) +!!$ ZEAST2F_IN ( : , : ) = PSRC( IIE-1 , : , : ) + !$acc end kernels + ENDIF +END IF +IF (LY) THEN + IF (.NOT.GSOUTH) THEN + !$acc kernels async(IS_SOUTH) + ZSOUTH2F_IN ( IIB:IIE , IJB:IJB+IHALO2_1 , : ) = PSRC( IIB:IIE , IJB:IJB+IHALO2_1 , : ) +!!$ ZSOUTH2F_IN ( : , : ) = PSRC( : , IJB+1 , : ) + !$acc end kernels + ENDIF + IF (.NOT.GNORTH) THEN + !$acc kernels async(IS_NORTH) + ZNORTH2F_IN ( IIB:IIE , IJE-IHALO2_1:IJE , : ) = PSRC( IIB:IIE , IJE-IHALO2_1:IJE , : ) +!!$ ZNORTH2F_IN ( : , : ) = PSRC( : , IJE-1 , : ) + !$acc end kernels + ENDIF +ENDIF +!$acc wait + +!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! +! +! Send Zxxxx2F_IN buffer via MPI(Gpu_direct) or copy to host +! +IF (LX) THEN + IF (.NOT. GWEST) THEN +#ifdef MNH_GPUDIRECT + !$acc host_data use_device(ZWEST2F_IN) +#else + !$acc update host(ZWEST2F_IN) +#endif + INB_REQ = INB_REQ + 1 + CALL MPI_ISEND(ZWEST2F_IN,SIZE(ZWEST2F_IN) ,MNHREAL_MPI,NP_WEST-1,1000+IS_WEST,NMNH_COMM_WORLD,IREQ(INB_REQ),IERR) +#ifdef MNH_GPUDIRECT + !$acc end host_data +#endif + END IF + IF (.NOT.GEAST) THEN +#ifdef MNH_GPUDIRECT + !$acc host_data use_device(ZEAST2F_IN) +#else + !$acc update host(ZEAST2F_IN) +#endif + INB_REQ = INB_REQ + 1 + CALL MPI_ISEND(ZEAST2F_IN,SIZE(ZEAST2F_IN) ,MNHREAL_MPI,NP_EAST-1,1000+IS_EAST,NMNH_COMM_WORLD,IREQ(INB_REQ),IERR) +#ifdef MNH_GPUDIRECT + !$acc end host_data +#endif + ENDIF +END IF + +IF (LY) THEN + IF (.NOT.GSOUTH) THEN +#ifdef MNH_GPUDIRECT + !$acc host_data use_device(ZSOUTH2F_IN) +#else + !$acc update host(ZSOUTH2F_IN) +#endif + INB_REQ = INB_REQ + 1 + CALL MPI_ISEND(ZSOUTH2F_IN,SIZE(ZSOUTH2F_IN) ,MNHREAL_MPI,NP_SOUTH-1,1000+IS_SOUTH,NMNH_COMM_WORLD,IREQ(INB_REQ),IERR) +#ifdef MNH_GPUDIRECT + !$acc end host_data +#endif + ENDIF + IF (.NOT.GNORTH) THEN +#ifdef MNH_GPUDIRECT + !$acc host_data use_device(ZNORTH2F_IN) +#else + !$acc update host(ZNORTH2F_IN) +#endif + INB_REQ = INB_REQ + 1 + CALL MPI_ISEND(ZNORTH2F_IN,SIZE(ZNORTH2F_IN) ,MNHREAL_MPI,NP_NORTH-1,1000+IS_NORTH,NMNH_COMM_WORLD,IREQ(INB_REQ),IERR) +#ifdef MNH_GPUDIRECT + !$acc end host_data +#endif + ENDIF +ENDIF + +!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! + +IF ( INB_REQ > 0 ) THEN + CALL MPI_WAITALL(INB_REQ,IREQ,MPI_STATUSES_IGNORE,IERR) +END IF + +!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! + +! Update halo in PSRC + %HALO2 + +!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! + + +IF (LX) THEN + IF (.NOT.GWEST) THEN +#ifndef MNH_GPUDIRECT + !$acc update device(ZWEST2F_OUT) async(IS_WEST) +#endif + ZH2F_WEST => TP_PSRC_HALO2F_ll%HALO2%WEST + !$acc kernels async(IS_WEST) + PSRC( 1:IIB-1 , IJB:IJE , : ) = ZWEST2F_OUT( 1:IIB-1 , IJB:IJE , : ) + ZH2F_WEST( IJB:IJE , : ) = ZWEST2F_OUT( IIB-2, IJB:IJE , : ) + !$acc end kernels + ENDIF + IF (.NOT.GEAST) THEN +#ifndef MNH_GPUDIRECT + !$acc update device(ZEAST2F_OUT) async(IS_EAST) +#endif + ZH2F_EAST => TP_PSRC_HALO2F_ll%HALO2%EAST + !$acc kernels async(IS_EAST) + PSRC( IIE+1:IIU , IJB:IJE , : ) = ZEAST2F_OUT( IIE+1:IIU , IJB:IJE , : ) + ZH2F_EAST( IJB:IJE , : ) = ZEAST2F_OUT( IIE+2 , IJB:IJE , : ) + !$acc end kernels + ENDIF +END IF +IF (LY) THEN + IF (.NOT.GSOUTH) THEN +#ifndef MNH_GPUDIRECT + !$acc update device(ZSOUTH2F_OUT) async(IS_SOUTH) +#endif + ZH2F_SOUTH => TP_PSRC_HALO2F_ll%HALO2%SOUTH + !$acc kernels async(IS_SOUTH) + PSRC( IIB:IIE , 1:IJB-1 , : ) = ZSOUTH2F_OUT( IIB:IIE , 1:IJB-1 , : ) + ZH2F_SOUTH( IIB:IIE , : ) = ZSOUTH2F_OUT( IIB:IIE , IJB-2 , : ) + !$acc end kernels + ENDIF + IF (.NOT.GNORTH) THEN +#ifndef MNH_GPUDIRECT + !$acc update device(ZNORTH2F_OUT) async(IS_NORTH) +#endif + ZH2F_NORTH => TP_PSRC_HALO2F_ll%HALO2%NORTH + !$acc kernels async(IS_NORTH) + PSRC( IIB:IIE , IJE+1:IJU , : ) = ZNORTH2F_OUT ( IIB:IIE , IJE+1:IJU , : ) + ZH2F_NORTH( IIB:IIE , : ) = ZNORTH2F_OUT ( IIB:IIE , IJE+2 , : ) + !$acc end kernels + ENDIF +END IF +!$acc wait + +!$acc end data + +END SUBROUTINE GET_HALO2_DF ! ! ################################################### SUBROUTINE GET_HALO2_D(PSRC, TP_PSRC_HALO2_ll, HNAME)