discretisation.f90

!=== COPYRIGHT AND LICENSE STATEMENT ===
!
!  This file is part of the TensorProductMultigrid code.
!  
!  (c) The copyright relating to this work is owned jointly by the
!  Crown, Met Office and NERC [2014]. However, it has been created
!  with the help of the GungHo Consortium, whose members are identified
!  at https://puma.nerc.ac.uk/trac/GungHo/wiki .
!  
!  Main Developer: Eike Mueller
!  
!  TensorProductMultigrid is free software: you can redistribute it and/or
!  modify it under the terms of the GNU Lesser General Public License as
!  published by the Free Software Foundation, either version 3 of the
!  License, or (at your option) any later version.
!  
!  TensorProductMultigrid is distributed in the hope that it will be useful,
!  but WITHOUT ANY WARRANTY; without even the implied warranty of
!  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
!  GNU Lesser General Public License for more details.
!  
!  You should have received a copy of the GNU Lesser General Public License
!  along with TensorProductMultigrid (see files COPYING and COPYING.LESSER).
!  If not, see <http://www.gnu.org/licenses/>.
!
!=== COPYRIGHT AND LICENSE STATEMENT ===


!==================================================================
!
!  Discretisation module of the model problem
!
!
!  -omega2 * (d^2/dx^2 + d^2/dy^2 + lambda2 * d^2/dz^2 ) u
!                                                + delta u = RHS
!  [Cartesian]
!
!  or
!
!  -omega2 * (laplace_{2d} + lambda2/r^2 d/dr (r^2 d/dr)) u
!                                                + delta u = RHS
!  [Spherical]
!
!  We use a cell centered finite volume discretisation with
!  The equation is discretised either in a unit cube or on 1/6th
!  of a cubed sphere grid.
!
!  The vertical grid spacing is not necessarily uniform and can
!  be chosen by specifying the vertical grid in a vector.
!
!  The following boundary conditions are used:
!
!  * Dirichlet in the horizontal
!  * Neumann in the vertical
!
!  For delta = 0 the operator reduces to the Poisson operator.
!
!    Eike Mueller, University of Bath, Feb 2012
!
!==================================================================

module discretisation

  use parameters
  use messages
  use datatypes
  use communication
#ifndef MNH
  use mpi
#else
  use modd_mpif
#endif 

  implicit none

private

  type model_parameters
    real(kind=rl) :: omega2   ! omega^2
    real(kind=rl) :: lambda2  ! lambda^2
    real(kind=rl) :: delta    ! delta
  end type model_parameters

! --- Stencil ---
!

! Grid traversal direction in SOR
  integer, parameter :: DIRECTION_FORWARD = 1
  integer, parameter :: DIRECTION_BACKWARD = 2

! Ordering in SOR
  ! Lexicographic ordering
  integer, parameter :: ORDERING_LEX = 1
  ! Red-black ordering
  integer, parameter :: ORDERING_RB = 2

  type smoother_parameters
    ! smoother
    integer :: smoother
    ! relaxation parameter
    real(kind=rl) :: rho
    ! ordering of degrees of freedom
    integer :: ordering
  end type smoother_parameters

  ! Allowed smoothers
  integer, parameter :: SMOOTHER_LINE_SOR = 3
  integer, parameter :: SMOOTHER_LINE_SSOR = 4
  integer, parameter :: SMOOTHER_LINE_JAC = 6

  ! Number of levels
  integer :: nlev

  ! Grid parameters
  type(grid_parameters) :: grid_param

  ! Model parameters
  type(model_parameters) :: model_param

  ! Smoother parameters
  type(smoother_parameters) :: smoother_param

  ! Arrays for measuring the residual reduction
  real(kind=rl), allocatable :: log_resreduction(:)
  integer, allocatable :: nsmooth_total(:)

  ! Data structure for storing the vertical discretisation
  type vertical_coefficients
    real(kind=rl), pointer , contiguous :: a(:)
    real(kind=rl), pointer , contiguous :: b(:)
    real(kind=rl), pointer , contiguous :: c(:)
    real(kind=rl), pointer , contiguous :: d(:)
  end type vertical_coefficients

  ! Stoarge for vertical coefficients
  type(vertical_coefficients) :: vert_coeff

  type Temp_jacobi
     real(kind=rl), pointer , contiguous :: r(:)
     real(kind=rl), pointer , contiguous :: c(:), utmp(:)
     real(kind=rl), pointer , contiguous :: u0(:,:,:) 
     real(kind=rl), pointer , contiguous :: ut0(:,:,:)
     type(scalar3d) , pointer            :: Sr,Sc,Sut0,Sutmp
  end type Temp_jacobi

  integer , parameter :: max_lev = 128
  type (Temp_jacobi) , save , dimension(max_lev) :: Tjacobi

  real(kind=rl), pointer , contiguous , dimension(:) :: zt1d_discretisation
  INTEGER                                            :: zt1d_discretisation_size = 10000000
  INTEGER , parameter                                :: zt1d_discretisation_size_factor = 4 * 8
  INTEGER                                            :: nt1d_discretisation_top = 0 , nt1d_discretisation_max = 0
  
  INTEGER , ALLOCATABLE, DIMENSION (:)               :: nt1d_discretisation_pointer , nt1d_discretisation_pointer_ksize
  INTEGER , parameter                                :: zt1d_discretisation_pointer_size = 128
  INTEGER                                            :: nt1d_discretisation_pointer_top = 0 , nt1d_discretisation_pointer_max = 0
  
  logical                                            :: gfirst_call_zt1d_discretisation = .true.
  

public::discretisation_initialise_mnh
public::discretisation_initialise
public::discretisation_finalise
public::smooth_mnh
public::smooth
public::line_SOR
public::line_SSOR
public::line_jacobi_mnh
public::line_jacobi
public::calculate_residual_mnh
public::calculate_residual
public::apply_mnh
public::apply
public::model_parameters
public::smoother_parameters
public::volume_of_element
public::SMOOTHER_LINE_SOR
public::SMOOTHER_LINE_SSOR
public::SMOOTHER_LINE_JAC
public::DIRECTION_FORWARD
public::DIRECTION_BACKWARD
public::ORDERING_LEX
public::ORDERING_RB
public::zt1d_discretisation_init
public::zt1d_discretisation_allocate3d

contains

!==================================================================
! Initialise module
!==================================================================

  subroutine zt1d_discretisation_init(kiu,kju,kku)
    implicit none

    integer , optional , intent(in) :: kiu,kju,kku ! size of 1 3D MNH array

    if (gfirst_call_zt1d_discretisation) then
       !
       gfirst_call_zt1d_discretisation = .false.
       !
       if (present(kiu)) then
          zt1d_discretisation_size = kiu*kju*kku * zt1d_discretisation_size_factor
       end if
       allocate(zt1d_discretisation(zt1d_discretisation_size))
       zt1d_discretisation = 0.0
       !$acc enter data copyin(zt1d_discretisation)
       !
       allocate(nt1d_discretisation_pointer(zt1d_discretisation_pointer_size))
       nt1d_discretisation_pointer = 0.0
       allocate(nt1d_discretisation_pointer_ksize(zt1d_discretisation_pointer_size))
       nt1d_discretisation_pointer_ksize = 0.0
       !
    end if
    
  end subroutine zt1d_discretisation_init

  function zt1d_discretisation_allocate3d(ptab3d,kib,kie,kjb,kje,kkb,kke) result(kindex)
    implicit none
    real(kind=rl), pointer , contiguous , dimension(:,:,:) :: ptab3d
    !real(kind=rl), pointer , dimension(:,:,:) :: ptab3d
    integer , intent(in)                                   :: kib,kie,kjb,kje,kkb,kke
    integer                                                :: kindex

    ! local var

    integer :: ksize, kbeg, kend

    if ( nt1d_discretisation_pointer_top == zt1d_discretisation_pointer_size ) then
       print*,"ERROR zt1d_discretisation_allocate3d:: zt1d_discretisation_pointer_size to small=",zt1d_discretisation_pointer_size
       print*,"ERROR zt1d_discretisation_allocate3d:: Augment it "
       stop 
    end if

    ksize = (kie-kib+1)*(kje-kjb+1)*(kke-kkb+1)

    kbeg = nt1d_discretisation_top + 1
    kend  = nt1d_discretisation_top + ksize

    if ( kend > zt1d_discretisation_size ) then
       print*,"ERROR zt1d_discretisation_allocate3d:: zt1d_discretisation_size to small=",zt1d_discretisation_size
       print*,"ERROR zt1d_discretisation_allocate3d:: Augment it "
       stop       
    end if

    ptab3d(kib:kie,kjb:kje,kkb:kke) => zt1d_discretisation(kbeg:)

    nt1d_discretisation_pointer_top = nt1d_discretisation_pointer_top + 1
    nt1d_discretisation_top = kend
    
    nt1d_discretisation_pointer(nt1d_discretisation_pointer_top)       = kend
    nt1d_discretisation_pointer_ksize(nt1d_discretisation_pointer_top) = ksize

    if (  nt1d_discretisation_pointer_top > nt1d_discretisation_pointer_max ) then
       nt1d_discretisation_pointer_max =  nt1d_discretisation_pointer_top
!!$       print*,"zt1d_discretisation_allocate3d:: nt1d_discretisation_pointer_max=",nt1d_discretisation_pointer_max
!!$       call flush(6)
    endif
    
    if (  nt1d_discretisation_top > nt1d_discretisation_max ) then
       nt1d_discretisation_max =  nt1d_discretisation_top
!!$       print*,"zt1d_discretisation_allocate3d:: nt1d_discretisation_max=",nt1d_discretisation_max
!!$       call flush(6)
    endif

    kindex = nt1d_discretisation_pointer_top
  end function zt1d_discretisation_allocate3d

  
 subroutine discretisation_initialise_mnh(grid_param_in, &
                                       model_param_in, &
                                       smoother_param_in, &
                                       nlev_in, &
                                       PA_K,PB_K,PC_K,PD_K)
    implicit none
    type(grid_parameters), intent(in)   :: grid_param_in
    type(model_parameters), intent(in)   :: model_param_in
    type(smoother_parameters), intent(in)   :: smoother_param_in
    integer, intent(in) :: nlev_in
    real(kind=rl) , optional , intent (in) :: PA_K(:),PB_K(:),PC_K(:),PD_K(:)

    ! local var
    integer :: k

    grid_param = grid_param_in
    model_param = model_param_in
    smoother_param = smoother_param_in
    nlev = nlev_in
    allocate(log_resreduction(nlev))
    allocate(nsmooth_total(nlev))
    log_resreduction(:) = 0.0_rl
    nsmooth_total(:) = 0
    allocate(r_grid(grid_param%nz+1))
    if (grid_param%graded) then
      do k=1,grid_param%nz+1
        r_grid(k) = grid_param%H*(1.0_rl*(k-1.0_rl)/grid_param%nz)**2
      end do
    else
      do k=1,grid_param%nz+1
        r_grid(k) = grid_param%H*(1.0_rl*(k-1.0_rl)/grid_param%nz)
      end do
    end if
    ! Allocate arrays for vertical discretisation matrices
    ! and calculate matrix entries
    allocate(vert_coeff%a(grid_param%nz))
    allocate(vert_coeff%b(grid_param%nz))
    allocate(vert_coeff%c(grid_param%nz))
    allocate(vert_coeff%d(grid_param%nz))
    call construct_vertical_coeff_mnh(PA_K,PB_K,PC_K,PD_K)
    !$acc enter data copyin(vert_coeff%a,vert_coeff%b,vert_coeff%c,vert_coeff%d)
  end subroutine discretisation_initialise_mnh

  subroutine discretisation_initialise(grid_param_in, &
                                       model_param_in, &
                                       smoother_param_in, &
                                       nlev_in)
    implicit none
    type(grid_parameters), intent(in)   :: grid_param_in
    type(model_parameters), intent(in)   :: model_param_in
    type(smoother_parameters), intent(in)   :: smoother_param_in
    integer, intent(in) :: nlev_in
    integer :: k
    grid_param = grid_param_in
    model_param = model_param_in
    smoother_param = smoother_param_in
    nlev = nlev_in
    allocate(log_resreduction(nlev))
    allocate(nsmooth_total(nlev))
    log_resreduction(:) = 0.0_rl
    nsmooth_total(:) = 0
    allocate(r_grid(grid_param%nz+1))
    if (grid_param%graded) then
      do k=1,grid_param%nz+1
        r_grid(k) = grid_param%H*(1.0_rl*(k-1.0_rl)/grid_param%nz)**2
      end do
    else
      do k=1,grid_param%nz+1
        r_grid(k) = grid_param%H*(1.0_rl*(k-1.0_rl)/grid_param%nz)
      end do
    end if
#ifdef CARTESIANGEOMETRY
#else
    r_grid(:) = 1.0_rl + r_grid(:)
#endif
    ! Allocate arrays for vertical discretisation matrices
    ! and calculate matrix entries
    allocate(vert_coeff%a(grid_param%nz))
    allocate(vert_coeff%b(grid_param%nz))
    allocate(vert_coeff%c(grid_param%nz))
    allocate(vert_coeff%d(grid_param%nz))
    call construct_vertical_coeff()
  end subroutine discretisation_initialise

!==================================================================
! Finalise module
!==================================================================
  subroutine discretisation_finalise()
    implicit none
    integer :: level
    real(kind=rl) :: rho_avg
#ifdef MEASURESMOOTHINGRATE
    if (i_am_master_mpi) then
      write(STDOUT,'("Average smoothing rates:")')
      do level=nlev,1,-1
        if (nsmooth_total(level) > 0) then
          rho_avg = exp(log_resreduction(level)/nsmooth_total(level))
        else
          rho_avg = 1.0_rl
        end if
        write(STDOUT,'("rho_{avg}(",I3,") = ",E10.4," ( ",I5," x )")') &
          level, rho_avg, nsmooth_total(level)
      end do
    end if
#endif
    deallocate(log_resreduction)
    deallocate(nsmooth_total)
    deallocate(r_grid)
    ! Deallocate storage for vertical discretisation matrices
    deallocate(vert_coeff%a)
    deallocate(vert_coeff%b)
    deallocate(vert_coeff%c)
    deallocate(vert_coeff%d)
  end subroutine discretisation_finalise

!==================================================================
! Construct alpha_{i',j'} and |T_{ij}| needed for the
! horizontal stencil
! ( alpha_{i+1,j},
!   alpha_{i-1,j},
!   alpha_{i,j+1},
!   alpha_{i,j-1},
!   alpha_{ij})
! (ix,iy) are LOCAL indices of the grid boxes, which are
! converted to global indices
!==================================================================
  subroutine construct_alpha_T_mnh(grid_param,ix,iy,alpha_T,Tij)
    implicit none
    type(grid_parameters), intent(in) :: grid_param
    integer, intent(in) :: ix
    integer, intent(in) :: iy
    real(kind=rl), intent(inout), dimension(5) :: alpha_T
    real(kind=rl), intent(out) :: Tij

    !local var
    real(kind=rl)              :: h, rho_i, sigma_j
    real(kind=rl)              :: xcoef 
    logical                    :: l2nd       

    h = grid_param%L/grid_param%n
    ! Cartesian coefficients
    Tij = h**2
    ! optimisation for newman MNH case = all coef constant
    alpha_T(1:4) = 1.0_rl
    alpha_T(5) = 4.0_rl
    return
    xcoef = 0.5_rl ! 0.0
    l2nd = .false. ! .true. ! .false.
    alpha_T(1) = 1.0
    alpha_T(2) = 1.0
    if (ix == grid_param%n) then
      alpha_T(1) = xcoef * 2.0_rl
      if (l2nd) alpha_T(2) = 2.0_rl
    end if
    if (ix == 1) then
      alpha_T(2) = xcoef * 2.0_rl
      if (l2nd) alpha_T(1) = 2.0_rl
    end if
    alpha_T(3) = 1.0
    alpha_T(4) = 1.0
    if (iy == grid_param%n) then
      alpha_T(3) = xcoef * 2.0_rl
      if (l2nd) alpha_T(4) = 2.0
    end if
    if (iy == 1) then
      alpha_T(4) = xcoef * 2.0_rl
      if (l2nd) alpha_T(3) = 2.0
    end if

    alpha_T(5) = alpha_T(1) + alpha_T(2) + alpha_T(3) + alpha_T(4)
  end subroutine construct_alpha_T_mnh
! constant coef for MNH
  subroutine construct_alpha_T_cst_mnh(grid_param,alpha_T,Tij)
    implicit none
    type(grid_parameters), intent(in) :: grid_param
    real(kind=rl), intent(inout), dimension(5) :: alpha_T
    real(kind=rl), intent(out) :: Tij

    !local var
    real(kind=rl)              :: h

    h = grid_param%L/grid_param%n
    ! Cartesian coefficients
    Tij = h**2
    ! optimisation for newman MNH case = all coef constant
    alpha_T(1:4) = 1.0_rl
    alpha_T(5) = 4.0_rl
 
  end subroutine construct_alpha_T_cst_mnh
!==================================================================
  subroutine construct_alpha_T(grid_param,ix,iy,alpha_T,Tij)
    implicit none
    type(grid_parameters), intent(in) :: grid_param
    integer, intent(in) :: ix
    integer, intent(in) :: iy
    real(kind=rl), intent(inout), dimension(5) :: alpha_T
    real(kind=rl), intent(out) :: Tij
    real(kind=rl)              :: h, rho_i, sigma_j
#ifdef CARTESIANGEOMETRY
    h = grid_param%L/grid_param%n
    ! Cartesian coefficients
    Tij = h**2
    if (ix == grid_param%n) then
      alpha_T(1) = 2.0_rl  
    else
      alpha_T(1) = 1.0_rl
    end if
    if (ix == 1) then
      alpha_T(2) = 2.0_rl  
    else
      alpha_T(2) = 1.0_rl
    end if
    if (iy == grid_param%n) then
      alpha_T(3) = 2.0_rl  
    else
      alpha_T(3) = 1.0_rl
    end if
    if (iy == 1) then
      alpha_T(4) = 2.0_rl  
    else
      alpha_T(4) = 1.0_rl
    end if
#else
    ! Coefficients in cubed sphere geometry
    ! (rho_i,sigma_j) \in [-1,1] x [-1,1] are the coordinates of the
    ! cubed sphere segment
    h = 2.0_rl/grid_param%n
    Tij = volume_of_element(ix,iy,grid_param)
    rho_i = 2.0_rl*(1.0_rl*ix-0.5_rl)/grid_param%n-1.0_rl
    sigma_j = 2.0_rl*(1.0_rl*iy-0.5_rl)/grid_param%n-1.0_rl
    ! alpha_{i+1,j}
    if (ix == grid_param%n) then
      alpha_T(1) = 2.0_rl*DSQRT((1.0_rl+(rho_i+0.25_rl*h)**2)/(1.0_rl+sigma_j**2))
    else
      alpha_T(1) = DSQRT((1.0_rl+(rho_i+0.5_rl*h)**2)/(1.0_rl+sigma_j**2))
    end if
    ! alpha_{i-1,j}
    if (ix == 1) then
      alpha_T(2) = 2.0_rl*DSQRT((1.0_rl+(rho_i-0.25_rl*h)**2)/(1.0_rl+sigma_j**2))
    else
      alpha_T(2) = DSQRT((1.0_rl+(rho_i-0.5_rl*h)**2)/(1.0_rl+sigma_j**2))
    end if
    ! alpha_{i,j+1}
    if (iy == grid_param%n) then
      alpha_T(3) = 2.0_rl*DSQRT((1.0_rl+(sigma_j+0.25_rl*h)**2)/(1.0_rl+rho_i**2))
    else
      alpha_T(3) = DSQRT((1.0_rl+(sigma_j+0.5_rl*h)**2)/(1.0_rl+rho_i**2))
    end if
    ! alpha_{i,j-1}
    if (iy == 1) then
      alpha_T(4) = 2.0_rl*DSQRT((1.0_rl+(sigma_j-0.25_rl*h)**2)/(1.0_rl+rho_i**2))
    else
      alpha_T(4) = DSQRT((1.0_rl+(sigma_j-0.5_rl*h)**2)/(1.0_rl+rho_i**2))
    end if
#endif
    alpha_T(5) = alpha_T(1) + alpha_T(2) + alpha_T(3) + alpha_T(4)
  end subroutine construct_alpha_T
!==================================================================
! Construct coefficients of tridiagonal matrix A_T
! describing the coupling in the vertical direction and the
! diagonal matrix diag(d)
!==================================================================
subroutine construct_vertical_coeff_mnh(PA_K,PB_K,PC_K,PD_K)
  implicit none
  real(kind=rl) , optional , intent (in) :: PA_K(:),PB_K(:),PC_K(:),PD_K(:)
  !local var
  real(kind=rl) :: a_k_tmp, b_k_tmp, c_k_tmp, d_k_tmp
  real(kind=rl) :: omega2, lambda2, delta, vol_r, surface_k, surface_kp1
  integer :: k
  
  IF (.NOT. PRESENT(PA_K)) THEN
  omega2 = model_param%omega2
  lambda2 = model_param%lambda2
  delta = model_param%delta
  do k = 1, grid_param%nz

    vol_r = r_grid(k+1)-r_grid(k)
    surface_k = 1.0_rl
    surface_kp1 = 1.0_rl

    ! Diagonal element
    a_k_tmp = delta*vol_r
    ! off diagonal elements
    ! Boundary conditions
    ! Top
    if (k == grid_param%nz) then
      if (grid_param%vertbc == VERTBC_DIRICHLET) then
        b_k_tmp = - 2.0_rl * omega2*lambda2 &
                           * surface_kp1/(r_grid(k+1)-r_grid(k))
      else
        b_k_tmp = 0.0_rl
      end if
    else
      b_k_tmp = - 2.0_rl*omega2*lambda2 &
              * surface_kp1/(r_grid(k+2)-r_grid(k))
    end if
    ! Bottom
    if (k == 1) then
      if (grid_param%vertbc == VERTBC_DIRICHLET) then
        c_k_tmp = - 2.0_rl * omega2*lambda2 &
                           * surface_k/(r_grid(k+1)-r_grid(k))
      else
        c_k_tmp = 0.0_rl
      end if
    else
      c_k_tmp = - 2.0_rl * omega2 * lambda2 &
              * surface_k/(r_grid(k+1)-r_grid(k-1))
    end if
    ! Diagonal matrix d_k
    d_k_tmp = - omega2 * (r_grid(k+1)-r_grid(k))
    vert_coeff%a(k) = a_k_tmp/d_k_tmp
    vert_coeff%b(k) = b_k_tmp/d_k_tmp
    vert_coeff%c(k) = c_k_tmp/d_k_tmp
    vert_coeff%d(k) = d_k_tmp
  end do
  ELSE
  do k = 1, grid_param%nz
    vert_coeff%a(k) = PA_K(k)
    vert_coeff%b(k) = PB_K(k)
    vert_coeff%c(k) = PC_K(k)
    vert_coeff%d(k) = PD_K(k)
  end do
  ENDIF
end subroutine construct_vertical_coeff_mnh

subroutine construct_vertical_coeff()
  implicit none
  real(kind=rl) :: a_k_tmp, b_k_tmp, c_k_tmp, d_k_tmp
  real(kind=rl) :: omega2, lambda2, delta, vol_r, surface_k, surface_kp1
  integer :: k
  omega2 = model_param%omega2
  lambda2 = model_param%lambda2
  delta = model_param%delta
  do k = 1, grid_param%nz
#ifdef  CARTESIANGEOMETRY
    vol_r = r_grid(k+1)-r_grid(k)
    surface_k = 1.0_rl
    surface_kp1 = 1.0_rl
#else
    vol_r = (r_grid(k+1)**3 - r_grid(k)**3)/3.0_rl
    surface_k = r_grid(k)**2
    surface_kp1 = r_grid(k+1)**2
#endif
    ! Diagonal element
    a_k_tmp = delta*vol_r
    ! off diagonal elements
    ! Boundary conditions
    ! Top
    if (k == grid_param%nz) then
      if (grid_param%vertbc == VERTBC_DIRICHLET) then
        b_k_tmp = - 2.0_rl * omega2*lambda2 &
                           * surface_kp1/(r_grid(k+1)-r_grid(k))
      else
        b_k_tmp = 0.0_rl
      end if
    else
      b_k_tmp = - 2.0_rl*omega2*lambda2 &
              * surface_kp1/(r_grid(k+2)-r_grid(k))
    end if
    ! Bottom
    if (k == 1) then
      if (grid_param%vertbc == VERTBC_DIRICHLET) then
        c_k_tmp = - 2.0_rl * omega2*lambda2 &
                           * surface_k/(r_grid(k+1)-r_grid(k))
      else
        c_k_tmp = 0.0_rl
      end if
    else
      c_k_tmp = - 2.0_rl * omega2 * lambda2 &
              * surface_k/(r_grid(k+1)-r_grid(k-1))
    end if
    ! Diagonal matrix d_k
    d_k_tmp = - omega2 * (r_grid(k+1)-r_grid(k))
    vert_coeff%a(k) = a_k_tmp/d_k_tmp
    vert_coeff%b(k) = b_k_tmp/d_k_tmp
    vert_coeff%c(k) = c_k_tmp/d_k_tmp
    vert_coeff%d(k) = d_k_tmp
  end do
end subroutine construct_vertical_coeff

!==================================================================
! Calculate local residual r = b - A.u
!==================================================================
  subroutine calculate_residual_mnh(level,m,b,u,r)
    implicit none
    integer, intent(in)                :: level
    integer, intent(in)                :: m
    type(scalar3d), intent(in)         :: b
    type(scalar3d), intent(inout)      :: u
    type(scalar3d), intent(inout)      :: r
    integer :: ix,iy,iz
    integer ::  iib,iie,ijb,ije,ikb,ike

    real , dimension(:,:,:) , pointer , contiguous :: zr_st , zb_st

    ! r <- A.u
    !call boundary_mnh(u)
    call apply_mnh(u,r)
    ! r <- b - r = b - A.u
    if (LUseO) then
       do ix=u%icompx_min,u%icompx_max
          do iy=u%icompy_min,u%icompy_max
             do iz=1,u%grid_param%nz
                r%s(iz,iy,ix) = b%s(iz,iy,ix) - r%s(iz,iy,ix)
             end do
          end do
       end do
    endif
    if (LUseT) then
!!$       do iz=1,u%grid_param%nz
!!$          do iy=u%icompy_min,u%icompy_max
!!$             do ix=u%icompx_min,u%icompx_max
!!$                r%st(ix,iy,iz) = b%st(ix,iy,iz) - r%st(ix,iy,iz)
!!$             end do
!!$          end do
!!$       end do
       !-----------------
       iib=u%icompx_min
       iie=u%icompx_max
       ijb=u%icompy_min
       ije=u%icompy_max
       ikb=1
       ike=u%grid_param%nz

       zr_st => r%st
       zb_st => b%st
       !$acc kernels present(zr_st,zb_st)
       zr_st(iib:iie,ijb:ije,ikb:ike) = zb_st(iib:iie,ijb:ije,ikb:ike) - zr_st(iib:iie,ijb:ije,ikb:ike)
       !$acc end kernels
    endif

  end subroutine calculate_residual_mnh

  subroutine calculate_residual(level,m,b,u,r)
    implicit none
    integer, intent(in)                :: level
    integer, intent(in)                :: m
    type(scalar3d), intent(in)         :: b
    type(scalar3d), intent(inout)      :: u
    type(scalar3d), intent(inout)      :: r
    integer :: ix,iy,iz


    ! r <- A.u
    call apply(u,r)
    ! r <- b - r = b - A.u
    do ix=u%icompx_min,u%icompx_max
      do iy=u%icompy_min,u%icompy_max
        do iz=1,u%grid_param%nz
          r%s(iz,iy,ix) = b%s(iz,iy,ix) - r%s(iz,iy,ix)
        end do
      end do
    end do
  end subroutine calculate_residual

!==================================================================
! Apply operator v = A.u
!==================================================================
  subroutine apply_mnh(u,v)
    implicit none
    type(scalar3d), intent(inout)      :: u
    type(scalar3d), intent(inout)      :: v

    ! local var
    real(kind=rl), dimension(5) :: alpha_T
    real(kind=rl) :: Tij
    real(kind=rl) :: a_k, b_k, c_k, d_k
    integer :: ix,iy,iz
    real(kind=rl) :: tmp
    integer :: iib,iie,ijb,ije

    real(kind=rl), dimension(:,:,:) , pointer , contiguous :: zv_st , zu_st  
    real(kind=rl), dimension(:)     , pointer , contiguous :: za_k, zb_k, zc_k, zd_k
    integer :: ii,ij
    integer :: ize

    integer :: nib,nie,njb,nje,nzb,nze

    call boundary_mnh(u)

    if (LUseO) then     
       do ix=u%icompx_min,u%icompx_max
          do iy=u%icompy_min,u%icompy_max
             ! Construct horizontal part of stencil
             call construct_alpha_T_mnh(u%grid_param,  &
                  ix+u%ix_min-1, &
                  iy+u%iy_min-1, &
                  alpha_T,Tij)
             do iz=1,u%grid_param%nz
                a_k = vert_coeff%a(iz)
                b_k = vert_coeff%b(iz)
                c_k = vert_coeff%c(iz)
                d_k = vert_coeff%d(iz)
                tmp = ((a_k-b_k-c_k)*Tij ) * u%s(iz,iy,ix)
                if (iz < grid_param%nz) then
                   tmp = tmp + b_k*Tij * u%s(iz+1,iy,ix)
                end if
                if (iz > 1) then
                   tmp = tmp + c_k*Tij * u%s(iz-1,iy,ix)
                end if
                if ((iz > 1) .and. (iz < grid_param%nz)) then
                   tmp = tmp - alpha_T(5) * u%s(iz,  iy  ,ix  ) &
                        + alpha_T(1) * u%s(iz,  iy  ,ix+1) &
                        + alpha_T(2) * u%s(iz,  iy  ,ix-1) &
                        + alpha_T(3) * u%s(iz,  iy+1,ix  ) &
                        + alpha_T(4) * u%s(iz,  iy-1,ix  )
                end if
                v%s(iz,iy,ix) = d_k*tmp 
             end do
          end do
       end do
    endif
    if (LUseT) then 
       call construct_alpha_T_cst_mnh(u%grid_param,alpha_T,Tij)    
       !-----------------------------------------------------------
       iib=u%icompx_min
       iie=u%icompx_max
       ijb=u%icompy_min
       ije=u%icompy_max
       ize=u%grid_param%nz
       !
       zv_st => v%st
       zu_st => u%st
       zb_k => vert_coeff%b
       zc_k => vert_coeff%c
       zd_k => vert_coeff%d

       nib = Lbound(zv_st,1) ; nie = Ubound(zv_st,1)
       njb = Lbound(zv_st,2) ; nje = Ubound(zv_st,2)
       nzb = Lbound(zv_st,3) ; nze = Ubound(zv_st,3)

       call apply_mnh_dim(zv_st,zu_st,zb_k,zc_k,zd_k)
    
    endif

  contains

    subroutine apply_mnh_dim(pzv_st,pzu_st,pzb_k,pzc_k,pzd_k)

      implicit none

      real :: pzv_st(nib:nie,njb:nje,nzb:nze), &
              pzu_st(nib:nie,njb:nje,nzb:nze), &
              pzb_k(ize),pzc_k(ize),pzd_k(ize)

      !$acc kernels present_cr(pzb_k,pzv_st,pzu_st)
      iz=1
      !$mnh_do_concurrent( ii=iib:iie , ij=ijb:ije )
         pzv_st(ii,ij,iz) = pzd_k(iz)* ( (-pzb_k(iz)-pzc_k(iz))*Tij * pzu_st(ii,ij,iz  )  &
                                          +pzb_k(iz)           *Tij * pzu_st(ii,ij,iz+1)  )
      !$mnh_end_do()
      
      !
      !$mnh_do_concurrent( ii=iib:iie , ij=ijb:ije , iz=2:ize-1 )
         pzv_st(ii,ij,iz) = pzd_k(iz)* ( ((-pzb_k(iz)-pzc_k(iz))*Tij - 4.0_rl ) * pzu_st(ii,ij,iz)   &
                                           +pzb_k(iz)           *Tij            * pzu_st(ii,ij,iz+1) &
                                                     +pzc_k(iz) *Tij            * pzu_st(ii,ij,iz-1) &
                                     +                                            pzu_st(ii+1,ij,iz) &
                                     +                                            pzu_st(ii-1,ij,iz) &
                                     +                                            pzu_st(ii,ij+1,iz) &
                                     +                                            pzu_st(ii,ij-1,iz) &
                                       )
       !$mnh_end_do()
       !
       iz=ize
       !$mnh_do_concurrent( ii=iib:iie , ij=ijb:ije )       
             pzv_st(ii,ij,iz) = pzd_k(iz)*  (  (-pzb_k(iz)-pzc_k(iz))*Tij  * pzu_st(ii,ij,iz)    &
                                                          +pzc_k(iz) *Tij  * pzu_st(ii,ij,iz-1)  )
       !$mnh_end_do()
       !$acc end kernels     
    
     end subroutine apply_mnh_dim

  end subroutine apply_mnh

  subroutine apply(u,v)
    implicit none
    type(scalar3d), intent(in)         :: u
    type(scalar3d), intent(inout)      :: v
    real(kind=rl), dimension(5) :: alpha_T
    real(kind=rl) :: Tij
    real(kind=rl) :: a_k, b_k, c_k, d_k
    integer :: ix,iy,iz
    real(kind=rl) :: tmp

    do ix=u%icompx_min,u%icompx_max
      do iy=u%icompy_min,u%icompy_max
        ! Construct horizontal part of stencil
        call construct_alpha_T(u%grid_param,  &
                               ix+u%ix_min-1, &
                               iy+u%iy_min-1, &
                               alpha_T,Tij)
        do iz=1,u%grid_param%nz
          a_k = vert_coeff%a(iz)
          b_k = vert_coeff%b(iz)
          c_k = vert_coeff%c(iz)
          d_k = vert_coeff%d(iz)
          tmp = ((a_k-b_k-c_k)*Tij - alpha_T(5)) * u%s(iz,iy,ix)
          if (iz < grid_param%nz) then
            tmp = tmp + b_k*Tij * u%s(iz+1,iy,ix)
          end if
          if (iz > 1) then
            tmp = tmp + c_k*Tij * u%s(iz-1,iy,ix)
          end if
          tmp = tmp + alpha_T(1) * u%s(iz,  iy  ,ix+1) &
                    + alpha_T(2) * u%s(iz,  iy  ,ix-1) &
                    + alpha_T(3) * u%s(iz,  iy+1,ix  ) &
                    + alpha_T(4) * u%s(iz,  iy-1,ix  )
          v%s(iz,iy,ix) = d_k*tmp
        end do
      end do
    end do
  end subroutine apply
!==================================================================
!==================================================================
!
!     S M O O T H E R S
!
!==================================================================
!==================================================================

!==================================================================
! Perform nsmooth smoother iterations
!==================================================================
  subroutine smooth_mnh(level,m,nsmooth,direction,b,u)
    implicit none
    integer, intent(in) :: level
    integer, intent(in) :: m
    integer, intent(in) :: nsmooth       ! Number of smoothing steps
    integer, intent(in) :: direction     ! Direction
    type(scalar3d), intent(inout) :: b   ! RHS
    type(scalar3d), intent(inout) :: u   ! solution vector
    integer :: i
    real(kind=rl) :: log_res_initial, log_res_final
    type(scalar3d) :: r
    integer :: halo_size
    integer :: nlocal, nz

#ifdef MEASURESMOOTHINGRATE
    r%ix_min = u%ix_min
    r%ix_max = u%ix_max
    r%iy_min = u%iy_min
    r%iy_max = u%iy_max
    r%icompx_min = u%icompx_min
    r%icompx_max = u%icompx_max
    r%icompy_min = u%icompy_min
    r%icompy_max = u%icompy_max
    r%halo_size = u%halo_size
    r%isactive = u%isactive
    r%grid_param = u%grid_param
    nlocal = r%ix_max-r%ix_min+1
    halo_size = r%halo_size
    nz = r%grid_param%nz

    if (LUseO) then
       allocate(r%s(0:nz+1,                       &
                    1-halo_size:nlocal+halo_size, &
                    1-halo_size:nlocal+halo_size))
    end if

    if (LUseT) then
       allocate(r%st(1-halo_size:nlocal+halo_size, &
                     1-halo_size:nlocal+halo_size, &
                     0:nz+1))
       !$acc enter data create (r%st)
    end if

    call calculate_residual(level,m,b,u,r)
    log_res_initial = log(l2norm(r))
#endif
    ! Carry out nsmooth iterations of the smoother
    if (smoother_param%smoother == SMOOTHER_LINE_SOR) then
      do i=1,nsmooth
        call line_SOR_mnh(level,m,direction,b,u)
      end do
    else if (smoother_param%smoother == SMOOTHER_LINE_SSOR) then
      do i=1,nsmooth
        call line_SSOR_mnh(level,m,direction,b,u)
      end do
    else if (smoother_param%smoother == SMOOTHER_LINE_JAC) then
      do i=1,nsmooth
        call line_jacobi_mnh(level,m,b,u)
      end do
    end if
#ifdef MEASURESMOOTHINGRATE
    call calculate_residual_mnh(level,m,b,u,r)
    log_res_final = log(l2norm(r))
    log_resreduction(level) = log_resreduction(level) &
                            + (log_res_final - log_res_initial)
    nsmooth_total(level) = nsmooth_total(level) + nsmooth
    if (LUseO) deallocate(r%s)
    if (LUseT) then
       !$acc exit data delete(r%st)
       deallocate(r%st)       
    end if
#endif
  end subroutine smooth_mnh
!==================================================================
! Perform nsmooth smoother iterations
!==================================================================
  subroutine smooth(level,m,nsmooth,direction,b,u)
    implicit none
    integer, intent(in) :: level
    integer, intent(in) :: m
    integer, intent(in) :: nsmooth       ! Number of smoothing steps
    integer, intent(in) :: direction     ! Direction
    type(scalar3d), intent(inout) :: b   ! RHS
    type(scalar3d), intent(inout) :: u   ! solution vector
    integer :: i
    real(kind=rl) :: log_res_initial, log_res_final
    type(scalar3d) :: r
    integer :: halo_size
    integer :: nlocal, nz

#ifdef MEASURESMOOTHINGRATE
    r%ix_min = u%ix_min
    r%ix_max = u%ix_max
    r%iy_min = u%iy_min
    r%iy_max = u%iy_max
    r%icompx_min = u%icompx_min
    r%icompx_max = u%icompx_max
    r%icompy_min = u%icompy_min
    r%icompy_max = u%icompy_max
    r%halo_size = u%halo_size
    r%isactive = u%isactive
    r%grid_param = u%grid_param
    nlocal = r%ix_max-r%ix_min+1
    halo_size = r%halo_size
    nz = r%grid_param%nz
    if (LUseO) then