diff --git a/tensorproductmultigrid_Source/communication.f90 b/tensorproductmultigrid_Source/communication.f90 index ed3b86f9ebfa78bcfdb97b97b64409f1421055b0..8fc5fe85790e5dcf21ee8015d422a4522a19c91d 100644 --- a/tensorproductmultigrid_Source/communication.f90 +++ b/tensorproductmultigrid_Source/communication.f90 @@ -47,6 +47,7 @@ module communication public::comm_preinitialise public::comm_initialise public::comm_finalise +public::scalarprod_mnh public::scalarprod public::boundary_mnh public::haloswap_mnh @@ -378,6 +379,54 @@ contains !================================================================== ! Scalar product of two fields !================================================================== + subroutine scalarprod_mnh(m, a, b, s) + implicit none + integer, intent(in) :: m + type(scalar3d), intent(in) :: a + type(scalar3d), intent(in) :: b + real(kind=rl), intent(out) :: s + integer :: nprocs, rank, ierr + integer :: p_horiz(2) + integer :: stepsize + integer, parameter :: dim_horiz = 2 + real(kind=rl) :: local_sum, global_sum + integer :: nlocal, nz, i + real(kind=rl) :: ddot + + nlocal = a%ix_max-a%ix_min+1 + nz = a%grid_param%nz + ! Work out coordinates of processor + call mpi_comm_size(MPI_COMM_HORIZ,nprocs,ierr) + call mpi_comm_rank(MPI_COMM_HORIZ,rank,ierr) + stepsize = 2**(pproc-m) + if (nprocs > 1) then + ! Only inlcude local sum if the processor coordinates + ! are multiples of stepsize + call mpi_cart_coords(MPI_COMM_HORIZ,rank,dim_horiz,p_horiz,ierr) + if ( (stepsize == 1) .or. & + ( (stepsize > 1) .and. & + (mod(p_horiz(1),stepsize)==0) .and. & + (mod(p_horiz(2),stepsize)==0) ) ) then + local_sum = 0.0_rl + do i = 1, nlocal + local_sum = local_sum & + + ddot((nz+2)*nlocal,a%s(0,1,i),1,b%s(0,1,i),1) + end do + else + local_sum = 0.0_rl + end if + call mpi_allreduce(local_sum,global_sum,1,MPI_DOUBLE_PRECISION, & + MPI_SUM,MPI_COMM_HORIZ,ierr) + else + global_sum = 0.0_rl + do i = 1, nlocal + global_sum = global_sum & + + ddot((nz+2)*nlocal,a%s(0,1,i),1,b%s(0,1,i),1) + end do + end if + s = global_sum + end subroutine scalarprod_mnh +!------------------------------------------------------------------------------- subroutine scalarprod(m, a, b, s) implicit none integer, intent(in) :: m diff --git a/tensorproductmultigrid_Source/conjugategradient.f90 b/tensorproductmultigrid_Source/conjugategradient.f90 index d0253bf9a93d5febcb8081da1284312a080c7404..320e272c9ffbd58d44279094ad1edcd57656c184 100644 --- a/tensorproductmultigrid_Source/conjugategradient.f90 +++ b/tensorproductmultigrid_Source/conjugategradient.f90 @@ -225,9 +225,9 @@ contains if (LUseO) call dcopy(n_lin,z%s,1,p%s,1) if (LUseT) call dcopy(n_lin,z%st,1,p%st,1) ! rz_old = <r,z> - call scalarprod(m,r,z,rz_old) + call scalarprod_mnh(m,r,z,rz_old) ! res0 <- ||r|| - call scalarprod(m,r,r,res0) + call scalarprod_mnh(m,r,r,res0) res0 = dsqrt(res0) if (cg_param%verbose > 0) then if (i_am_master_mpi) then @@ -241,7 +241,7 @@ contains call haloswap_mnh(level,m,p) call apply_mnh(p,Ap) ! alpha <- res_old / <p,A.p> - call scalarprod(m,p,Ap,pAp) + call scalarprod_mnh(m,p,Ap,pAp) alpha = rz_old/pAp ! x <- x + alpha*p if (LUseO) call daxpy(n_lin,alpha,p%s,1,u%s,1) @@ -249,7 +249,7 @@ contains ! r <- r - alpha*A.p if (LUseO) call daxpy(n_lin,-alpha,Ap%s,1,r%s,1) if (LUseT) call daxpy(n_lin,-alpha,Ap%st,1,r%st,1) - call scalarprod(m,r,r,res) + call scalarprod_mnh(m,r,r,res) res = dsqrt(res) if (cg_param%verbose > 1) then if (i_am_master_mpi) then @@ -272,7 +272,7 @@ contains if (LUseO) call dcopy(n_lin,r%s,1,z%s,1) if (LUseT) call dcopy(n_lin,r%st,1,z%st,1) end if - call scalarprod(m,r,z,rz) + call scalarprod_mnh(m,r,z,rz) ! p <- res/res_old*p if (LUseO) call dscal(n_lin,rz/rz_old,p%s,1) if (LUseT) call dscal(n_lin,rz/rz_old,p%st,1) diff --git a/tensorproductmultigrid_Source/multigrid.f90 b/tensorproductmultigrid_Source/multigrid.f90 index 5ec2a7a631fbc233e3153807790fb761a1607006..897ac633f0481030fec97ea9d2cd507879fa3bb6 100644 --- a/tensorproductmultigrid_Source/multigrid.f90 +++ b/tensorproductmultigrid_Source/multigrid.f90 @@ -1494,7 +1494,7 @@ contains if (LUseT) call dcopy(n_gridpoints,xu_mg(level,pproc)%st,1,pp%st,1) ! Calculate rz_old = <pp,b> call start_timer(t_scalprod) - call scalarprod(pproc,pp,xb_mg(level,pproc),rz_old) + call scalarprod_mnh(pproc,pp,xb_mg(level,pproc),rz_old) call finish_timer(t_scalprod) do iter = 1, maxiter ! Apply matrix q <- A.pp @@ -1503,7 +1503,7 @@ contains call finish_timer(t_apply) ! Calculate pq <- <pp,q> call start_timer(t_scalprod) - call scalarprod(pproc,pp,q,pq) + call scalarprod_mnh(pproc,pp,q,pq) call finish_timer(t_scalprod) alpha = rz_old/pq ! x <- x + alpha*p @@ -1539,7 +1539,7 @@ contains if (LUseO) call dcopy(n_gridpoints,xu_mg(level,pproc)%s,1,q%s,1) if (LUseT) call dcopy(n_gridpoints,xu_mg(level,pproc)%st,1,q%st,1) call start_timer(t_scalprod) - call scalarprod(pproc,q,xb_mg(level,pproc),rz) + call scalarprod_mnh(pproc,q,xb_mg(level,pproc),rz) call finish_timer(t_scalprod) beta = rz/rz_old ! p <- beta*p