Skip to content
Snippets Groups Projects
discretisation.f90 26.9 KiB
Newer Older
  • Learn to ignore specific revisions
  • 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879
    !=== COPYRIGHT AND LICENSE STATEMENT ===
    !
    !  This file is part of the TensorProductMultigrid code.
    !  
    !  (c) The copyright relating to this work is owned jointly by the
    !  Crown, Met Office and NERC [2014]. However, it has been created
    !  with the help of the GungHo Consortium, whose members are identified
    !  at https://puma.nerc.ac.uk/trac/GungHo/wiki .
    !  
    !  Main Developer: Eike Mueller
    !  
    !  TensorProductMultigrid is free software: you can redistribute it and/or
    !  modify it under the terms of the GNU Lesser General Public License as
    !  published by the Free Software Foundation, either version 3 of the
    !  License, or (at your option) any later version.
    !  
    !  TensorProductMultigrid is distributed in the hope that it will be useful,
    !  but WITHOUT ANY WARRANTY; without even the implied warranty of
    !  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
    !  GNU Lesser General Public License for more details.
    !  
    !  You should have received a copy of the GNU Lesser General Public License
    !  along with TensorProductMultigrid (see files COPYING and COPYING.LESSER).
    !  If not, see <http://www.gnu.org/licenses/>.
    !
    !=== COPYRIGHT AND LICENSE STATEMENT ===
    
    
    !==================================================================
    !
    !  Discretisation module of the model problem
    !
    !
    !  -omega2 * (d^2/dx^2 + d^2/dy^2 + lambda2 * d^2/dz^2 ) u
    !                                                + delta u = RHS
    !  [Cartesian]
    !
    !  or
    !
    !  -omega2 * (laplace_{2d} + lambda2/r^2 d/dr (r^2 d/dr)) u
    !                                                + delta u = RHS
    !  [Spherical]
    !
    !  We use a cell centered finite volume discretisation with
    !  The equation is discretised either in a unit cube or on 1/6th
    !  of a cubed sphere grid.
    !
    !  The vertical grid spacing is not necessarily uniform and can
    !  be chosen by specifying the vertical grid in a vector.
    !
    !  The following boundary conditions are used:
    !
    !  * Dirichlet in the horizontal
    !  * Neumann in the vertical
    !
    !  For delta = 0 the operator reduces to the Poisson operator.
    !
    !    Eike Mueller, University of Bath, Feb 2012
    !
    !==================================================================
    
    module discretisation
    
      use parameters
      use messages
      use datatypes
      use communication
      use mpi
    
      implicit none
    
    private
    
      type model_parameters
        real(kind=rl) :: omega2   ! omega^2
        real(kind=rl) :: lambda2  ! lambda^2
        real(kind=rl) :: delta    ! delta
      end type model_parameters
    
    ! --- Stencil ---
    !
    
    ! Grid traversal direction in SOR
      integer, parameter :: DIRECTION_FORWARD = 1
      integer, parameter :: DIRECTION_BACKWARD = 2
    
    ! Ordering in SOR
      ! Lexicographic ordering
      integer, parameter :: ORDERING_LEX = 1
      ! Red-black ordering
      integer, parameter :: ORDERING_RB = 2
    
      type smoother_parameters
        ! smoother
        integer :: smoother
        ! relaxation parameter
        real(kind=rl) :: rho
        ! ordering of degrees of freedom
        integer :: ordering
      end type smoother_parameters
    
      ! Allowed smoothers
      integer, parameter :: SMOOTHER_LINE_SOR = 3
      integer, parameter :: SMOOTHER_LINE_SSOR = 4
      integer, parameter :: SMOOTHER_LINE_JAC = 6
    
      ! Number of levels
      integer :: nlev
    
      ! Grid parameters
      type(grid_parameters) :: grid_param
    
      ! Model parameters
      type(model_parameters) :: model_param
    
      ! Smoother parameters
      type(smoother_parameters) :: smoother_param
    
      ! Arrays for measuring the residual reduction
      real(kind=rl), allocatable :: log_resreduction(:)
      integer, allocatable :: nsmooth_total(:)
    
      ! Data structure for storing the vertical discretisation
      type vertical_coefficients
        real(kind=rl), allocatable :: a(:)
        real(kind=rl), allocatable :: b(:)
        real(kind=rl), allocatable :: c(:)
        real(kind=rl), allocatable :: d(:)
      end type vertical_coefficients
    
      ! Stoarge for vertical coefficients
      type(vertical_coefficients) :: vert_coeff
    
    public::discretisation_initialise
    public::discretisation_finalise
    public::smooth
    public::line_SOR
    public::line_SSOR
    public::line_jacobi
    public::calculate_residual
    public::apply
    public::model_parameters
    public::smoother_parameters
    public::volume_of_element
    public::SMOOTHER_LINE_SOR
    public::SMOOTHER_LINE_SSOR
    public::SMOOTHER_LINE_JAC
    public::DIRECTION_FORWARD
    public::DIRECTION_BACKWARD
    public::ORDERING_LEX
    public::ORDERING_RB
    
    contains
    
    !==================================================================
    ! Initialise module
    !==================================================================
      subroutine discretisation_initialise(grid_param_in, &
                                           model_param_in, &
                                           smoother_param_in, &
                                           nlev_in)
        implicit none
        type(grid_parameters), intent(in)   :: grid_param_in
        type(model_parameters), intent(in)   :: model_param_in
        type(smoother_parameters), intent(in)   :: smoother_param_in
        integer, intent(in) :: nlev_in
        integer :: k
        grid_param = grid_param_in
        model_param = model_param_in
        smoother_param = smoother_param_in
        nlev = nlev_in
        allocate(log_resreduction(nlev))
        allocate(nsmooth_total(nlev))
        log_resreduction(:) = 0.0_rl
        nsmooth_total(:) = 0
        allocate(r_grid(grid_param%nz+1))
        if (grid_param%graded) then
          do k=1,grid_param%nz+1
            r_grid(k) = grid_param%H*(1.0_rl*(k-1.0_rl)/grid_param%nz)**2
          end do
        else
          do k=1,grid_param%nz+1
            r_grid(k) = grid_param%H*(1.0_rl*(k-1.0_rl)/grid_param%nz)
          end do
        end if
    #ifdef CARTESIANGEOMETRY
    #else
        r_grid(:) = 1.0_rl + r_grid(:)
    #endif
        ! Allocate arrays for vertical discretisation matrices
        ! and calculate matrix entries
        allocate(vert_coeff%a(grid_param%nz))
        allocate(vert_coeff%b(grid_param%nz))
        allocate(vert_coeff%c(grid_param%nz))
        allocate(vert_coeff%d(grid_param%nz))
        call construct_vertical_coeff()
      end subroutine discretisation_initialise
    
    !==================================================================
    ! Finalise module
    !==================================================================
      subroutine discretisation_finalise()
        implicit none
        integer :: level
        real(kind=rl) :: rho_avg
    #ifdef MEASURESMOOTHINGRATE
        if (i_am_master_mpi) then
          write(STDOUT,'("Average smoothing rates:")')
          do level=nlev,1,-1
            if (nsmooth_total(level) > 0) then
              rho_avg = exp(log_resreduction(level)/nsmooth_total(level))
            else
              rho_avg = 1.0_rl
            end if
            write(STDOUT,'("rho_{avg}(",I3,") = ",E10.4," ( ",I5," x )")') &
              level, rho_avg, nsmooth_total(level)
          end do
        end if
    #endif
        deallocate(log_resreduction)
        deallocate(nsmooth_total)
        deallocate(r_grid)
        ! Deallocate storage for vertical discretisation matrices
        deallocate(vert_coeff%a)
        deallocate(vert_coeff%b)
        deallocate(vert_coeff%c)
        deallocate(vert_coeff%d)
      end subroutine discretisation_finalise
    
    !==================================================================
    ! Construct alpha_{i',j'} and |T_{ij}| needed for the
    ! horizontal stencil
    ! ( alpha_{i+1,j},
    !   alpha_{i-1,j},
    !   alpha_{i,j+1},
    !   alpha_{i,j-1},
    !   alpha_{ij})
    ! (ix,iy) are LOCAL indices of the grid boxes, which are
    ! converted to global indices
    !==================================================================
      subroutine construct_alpha_T(grid_param,ix,iy,alpha_T,Tij)
        implicit none
        type(grid_parameters), intent(in) :: grid_param
        integer, intent(in) :: ix
        integer, intent(in) :: iy
        real(kind=rl), intent(inout), dimension(5) :: alpha_T
        real(kind=rl), intent(out) :: Tij
        real(kind=rl)                  :: h, rho_i, sigma_j
    #ifdef CARTESIANGEOMETRY
        h = grid_param%L/grid_param%n
        ! Cartesian coefficients
        Tij = h**2
        if (ix == grid_param%n) then
          alpha_T(1) = 2.0_rl
        else
          alpha_T(1) = 1.0_rl
        end if
        if (ix == 1) then
          alpha_T(2) = 2.0_rl
        else
          alpha_T(2) = 1.0_rl
        end if
        if (iy == grid_param%n) then
          alpha_T(3) = 2.0_rl
        else
          alpha_T(3) = 1.0_rl
        end if
        if (iy == 1) then
          alpha_T(4) = 2.0_rl
        else
          alpha_T(4) = 1.0_rl
        end if
    #else
        ! Coefficients in cubed sphere geometry
        ! (rho_i,sigma_j) \in [-1,1] x [-1,1] are the coordinates of the
        ! cubed sphere segment
        h = 2.0_rl/grid_param%n
        Tij = volume_of_element(ix,iy,grid_param)
        rho_i = 2.0_rl*(1.0_rl*ix-0.5_rl)/grid_param%n-1.0_rl
        sigma_j = 2.0_rl*(1.0_rl*iy-0.5_rl)/grid_param%n-1.0_rl
        ! alpha_{i+1,j}
        if (ix == grid_param%n) then
          alpha_T(1) = 2.0_rl*DSQRT((1.0_rl+(rho_i+0.25_rl*h)**2)/(1.0_rl+sigma_j**2))
        else
          alpha_T(1) = DSQRT((1.0_rl+(rho_i+0.5_rl*h)**2)/(1.0_rl+sigma_j**2))
        end if
        ! alpha_{i-1,j}
        if (ix == 1) then
          alpha_T(2) = 2.0_rl*DSQRT((1.0_rl+(rho_i-0.25_rl*h)**2)/(1.0_rl+sigma_j**2))
        else
          alpha_T(2) = DSQRT((1.0_rl+(rho_i-0.5_rl*h)**2)/(1.0_rl+sigma_j**2))
        end if
        ! alpha_{i,j+1}
        if (iy == grid_param%n) then
          alpha_T(3) = 2.0_rl*DSQRT((1.0_rl+(sigma_j+0.25_rl*h)**2)/(1.0_rl+rho_i**2))
        else
          alpha_T(3) = DSQRT((1.0_rl+(sigma_j+0.5_rl*h)**2)/(1.0_rl+rho_i**2))
        end if
        ! alpha_{i,j-1}
        if (iy == 1) then
          alpha_T(4) = 2.0_rl*DSQRT((1.0_rl+(sigma_j-0.25_rl*h)**2)/(1.0_rl+rho_i**2))
        else
          alpha_T(4) = DSQRT((1.0_rl+(sigma_j-0.5_rl*h)**2)/(1.0_rl+rho_i**2))
        end if
    #endif
        alpha_T(5) = alpha_T(1) + alpha_T(2) + alpha_T(3) + alpha_T(4)
      end subroutine construct_alpha_T
    
    !==================================================================
    ! Construct coefficients of tridiagonal matrix A_T
    ! describing the coupling in the vertical direction and the
    ! diagonal matrix diag(d)
    !==================================================================
    subroutine construct_vertical_coeff()
      implicit none
      real(kind=rl) :: a_k_tmp, b_k_tmp, c_k_tmp, d_k_tmp
      real(kind=rl) :: omega2, lambda2, delta, vol_r, surface_k, surface_kp1
      integer :: k
      omega2 = model_param%omega2
      lambda2 = model_param%lambda2
      delta = model_param%delta
      do k = 1, grid_param%nz
    #ifdef  CARTESIANGEOMETRY
        vol_r = r_grid(k+1)-r_grid(k)
        surface_k = 1.0_rl
        surface_kp1 = 1.0_rl
    #else
        vol_r = (r_grid(k+1)**3 - r_grid(k)**3)/3.0_rl
        surface_k = r_grid(k)**2
        surface_kp1 = r_grid(k+1)**2
    #endif
        ! Diagonal element
        a_k_tmp = delta*vol_r
        ! off diagonal elements
        ! Boundary conditions
        ! Top
        if (k == grid_param%nz) then
          if (grid_param%vertbc == VERTBC_DIRICHLET) then
            b_k_tmp = - 2.0_rl * omega2*lambda2 &
                               * surface_kp1/(r_grid(k+1)-r_grid(k))
          else
            b_k_tmp = 0.0_rl
          end if
        else
          b_k_tmp = - 2.0_rl*omega2*lambda2 &
                  * surface_kp1/(r_grid(k+2)-r_grid(k))
        end if
        ! Bottom
        if (k == 1) then
          if (grid_param%vertbc == VERTBC_DIRICHLET) then
            c_k_tmp = - 2.0_rl * omega2*lambda2 &
                               * surface_k/(r_grid(k+1)-r_grid(k))
          else
            c_k_tmp = 0.0_rl
          end if
        else
          c_k_tmp = - 2.0_rl * omega2 * lambda2 &
                  * surface_k/(r_grid(k+1)-r_grid(k-1))
        end if
        ! Diagonal matrix d_k
        d_k_tmp = - omega2 * (r_grid(k+1)-r_grid(k))
        vert_coeff%a(k) = a_k_tmp/d_k_tmp
        vert_coeff%b(k) = b_k_tmp/d_k_tmp
        vert_coeff%c(k) = c_k_tmp/d_k_tmp
        vert_coeff%d(k) = d_k_tmp
      end do
    end subroutine construct_vertical_coeff
    
    !==================================================================
    ! Calculate local residual r = b - A.u
    !==================================================================
      subroutine calculate_residual(level,m,b,u,r)
        implicit none
        integer, intent(in)                :: level
        integer, intent(in)                :: m
        type(scalar3d), intent(in)         :: b
        type(scalar3d), intent(inout)      :: u
        type(scalar3d), intent(inout)      :: r
        integer :: ix,iy,iz
    
        ! r <- A.u
        call apply(u,r)
        ! r <- b - r = b - A.u
        do ix=u%icompx_min,u%icompx_max
          do iy=u%icompy_min,u%icompy_max
            do iz=1,u%grid_param%nz
              r%s(iz,iy,ix) = b%s(iz,iy,ix) - r%s(iz,iy,ix)
            end do
          end do
        end do
      end subroutine calculate_residual
    
    !==================================================================
    ! Apply operator v = A.u
    !==================================================================
      subroutine apply(u,v)
        implicit none
        type(scalar3d), intent(in)         :: u
        type(scalar3d), intent(inout)      :: v
        real(kind=rl), dimension(5) :: alpha_T
        real(kind=rl) :: Tij
        real(kind=rl) :: a_k, b_k, c_k, d_k
        integer :: ix,iy,iz
        real(kind=rl) :: tmp
    
        do ix=u%icompx_min,u%icompx_max
          do iy=u%icompy_min,u%icompy_max
            ! Construct horizontal part of stencil
            call construct_alpha_T(u%grid_param,  &
                                   ix+u%ix_min-1, &
                                   iy+u%iy_min-1, &
                                   alpha_T,Tij)
            do iz=1,u%grid_param%nz
              a_k = vert_coeff%a(iz)
              b_k = vert_coeff%b(iz)
              c_k = vert_coeff%c(iz)
              d_k = vert_coeff%d(iz)
              tmp = ((a_k-b_k-c_k)*Tij - alpha_T(5)) * u%s(iz,iy,ix)
              if (iz < grid_param%nz) then
                tmp = tmp + b_k*Tij * u%s(iz+1,iy,ix)
              end if
              if (iz > 1) then
                tmp = tmp + c_k*Tij * u%s(iz-1,iy,ix)
              end if
              tmp = tmp + alpha_T(1) * u%s(iz,  iy  ,ix+1) &
                        + alpha_T(2) * u%s(iz,  iy  ,ix-1) &
                        + alpha_T(3) * u%s(iz,  iy+1,ix  ) &
                        + alpha_T(4) * u%s(iz,  iy-1,ix  )
              v%s(iz,iy,ix) = d_k*tmp
            end do
          end do
        end do
      end subroutine apply
    
    !==================================================================
    !==================================================================
    !
    !     S M O O T H E R S
    !
    !==================================================================
    !==================================================================
    
    !==================================================================
    ! Perform nsmooth smoother iterations
    !==================================================================
      subroutine smooth(level,m,nsmooth,direction,b,u)
        implicit none
        integer, intent(in) :: level
        integer, intent(in) :: m
        integer, intent(in) :: nsmooth       ! Number of smoothing steps
        integer, intent(in) :: direction     ! Direction
        type(scalar3d), intent(inout) :: b   ! RHS
        type(scalar3d), intent(inout) :: u   ! solution vector
        integer :: i
        real(kind=rl) :: log_res_initial, log_res_final
        type(scalar3d) :: r
        integer :: halo_size
        integer :: nlocal, nz
    
    #ifdef MEASURESMOOTHINGRATE
        r%ix_min = u%ix_min
        r%ix_max = u%ix_max
        r%iy_min = u%iy_min
        r%iy_max = u%iy_max
        r%icompx_min = u%icompx_min
        r%icompx_max = u%icompx_max
        r%icompy_min = u%icompy_min
        r%icompy_max = u%icompy_max
        r%halo_size = u%halo_size
        r%isactive = u%isactive
        r%grid_param = u%grid_param
        nlocal = r%ix_max-r%ix_min+1
        halo_size = r%halo_size
        nz = r%grid_param%nz
        allocate(r%s(0:nz+1,                       &
                     1-halo_size:nlocal+halo_size, &
                     1-halo_size:nlocal+halo_size))
        call calculate_residual(level,m,b,u,r)
        log_res_initial = log(l2norm(r))
    #endif
        ! Carry out nsmooth iterations of the smoother
        if (smoother_param%smoother == SMOOTHER_LINE_SOR) then
          do i=1,nsmooth
            call line_SOR(level,m,direction,b,u)
          end do
        else if (smoother_param%smoother == SMOOTHER_LINE_SSOR) then
          do i=1,nsmooth
            call line_SSOR(level,m,direction,b,u)
          end do
        else if (smoother_param%smoother == SMOOTHER_LINE_JAC) then
          do i=1,nsmooth
            call line_jacobi(level,m,b,u)
          end do
        end if
    #ifdef MEASURESMOOTHINGRATE
        call calculate_residual(level,m,b,u,r)
        log_res_final = log(l2norm(r))
        log_resreduction(level) = log_resreduction(level) &
                                + (log_res_final - log_res_initial)
        nsmooth_total(level) = nsmooth_total(level) + nsmooth
        deallocate(r%s)
    #endif
      end subroutine smooth
    
    !==================================================================
    ! SOR line smoother
    !==================================================================
      subroutine line_SOR(level,m,direction,b,u)
    
        implicit none
    
        integer, intent(in)                :: level
        integer, intent(in)                :: m
        integer, intent(in)                :: direction
        type(scalar3d), intent(in)         :: b
        type(scalar3d), intent(inout)      :: u
        real(kind=rl), allocatable :: r(:)
        integer :: nz, nlocal
        real(kind=rl), allocatable :: c(:), utmp(:)
        integer :: ixmin(5), ixmax(5), dix
        integer :: iymin(5), iymax(5), diy
        integer :: color
        integer :: nsweeps, isweep
        integer :: ordering
        real(kind=rl) :: rho
        integer, dimension(4) :: send_requests, recv_requests
        integer :: tmp, ierr
        integer :: iblock
        logical :: overlap_comms
    
        ordering = smoother_param%ordering
        rho = smoother_param%rho
    
        nz = u%grid_param%nz
    
        ! Create residual vector
        allocate(r(nz))
        ! Allocate memory for auxiliary vectors for Thomas algorithm
        allocate(c(nz))
        allocate(utmp(nz))
        nlocal = u%ix_max-u%ix_min+1
    #ifdef OVERLAPCOMMS
        overlap_comms = (nlocal > 2)
    #else
        overlap_comms = .false.
    #endif
        ! Block 1 (N)
        ixmin(1) = 1
        ixmax(1) = nlocal
        iymin(1) = 1
        iymax(1) = 1
        ! Block 2 (S)
        ixmin(2) = 1
        ixmax(2) = nlocal
        iymin(2) = nlocal
        iymax(2) = nlocal
        ! Block 3 (W)
        ixmin(3) = 1
        ixmax(3) = 1
        iymin(3) = 2
        iymax(3) = nlocal-1
        ! Block 4 (E)
        ixmin(4) = nlocal
        ixmax(4) = nlocal
        iymin(4) = 2
        iymax(4) = nlocal-1
        ! Block 5 (INTERIOR)
        if (overlap_comms) then
          ixmin(5) = 2
          ixmax(5) = nlocal-1
          iymin(5) = 2
          iymax(5) = nlocal-1
        else
          ! If there are no interior cells, do not overlap
          ! communications and calculations, just loop over interior cells
          ixmin(5) = 1
          ixmax(5) = nlocal
          iymin(5) = 1
          iymax(5) = nlocal
        end if
        dix = +1
        diy = +1
        color = 1
        ! When iteration backwards over the grid, reverse the direction
        if (direction == DIRECTION_BACKWARD) then
          do iblock = 1, 5
            tmp = ixmax(iblock)
            ixmax(iblock) = ixmin(iblock)
            ixmin(iblock) = tmp
            tmp = iymax(iblock)
            iymax(iblock) = iymin(iblock)
            iymin(iblock) = tmp
          end do
          dix = -1
          diy = -1
          color = 0
        end if
        nsweeps = 1
        if (ordering == ORDERING_LEX) then
          nsweeps = 1
        else if (ordering == ORDERING_RB) then
          nsweeps = 2
        end if
        do isweep = 1, nsweeps
          if (overlap_comms) then
            ! Loop over cells next to boundary (iblock = 1,...,4)
            do iblock = 1, 4
              call loop_over_grid(iblock)
            end do
            ! Initiate halo exchange
            call ihaloswap(level,m,u,send_requests,recv_requests)
          end if
          ! Loop over INTERIOR cells
          iblock = 5
          call loop_over_grid(iblock)
          if (overlap_comms) then
            if (m > 0) then
              call mpi_waitall(4,recv_requests, MPI_STATUSES_IGNORE, ierr)
            end if
          else
            call haloswap(level,m,u)
          end if
          color = 1-color
        end do
    
        ! Free memory again
        deallocate(r)
        deallocate(c)
        deallocate(utmp)
    
        contains
    
        !------------------------------------------------------------------
        ! Loop over grid, for a given block
        !------------------------------------------------------------------
        subroutine loop_over_grid(iblock)
          implicit none
          integer, intent(in) :: iblock
          integer :: ix,iy,iz
          do ix=ixmin(iblock),ixmax(iblock),dix
            do iy=iymin(iblock),iymax(iblock),diy
              if (ordering == ORDERING_RB) then
                if (mod((ix+u%ix_min)+(iy+u%iy_min),2) .ne. color) cycle
              end if
              call apply_tridiag_solve(ix,iy,r,c,b,         &
                                       u%s(1:nz,iy  ,ix+1), &
                                       u%s(1:nz,iy  ,ix-1), &
                                       u%s(1:nz,iy+1,ix  ), &
                                       u%s(1:nz,iy-1,ix  ), &
                                       utmp)
               ! Add to field with overrelaxation-factor
              do iz=1,nz
                u%s(iz,iy,ix) = (1.0_rl-rho)*u%s(iz,iy,ix) + rho*utmp(iz)
              end do
            end do
          end do
        end subroutine loop_over_grid
    
      end subroutine line_SOR
    
    !==================================================================
    ! SSOR line smoother
    !==================================================================
      subroutine line_SSOR(level,m,direction,b,u)
        implicit none
        integer, intent(in)                :: level
        integer, intent(in)                :: m
        integer, intent(in)                :: direction
        type(scalar3d), intent(in)         :: b
        type(scalar3d), intent(inout)      :: u
        if (direction == DIRECTION_FORWARD) then
          call line_SOR(level,m,DIRECTION_FORWARD,b,u)
          call line_SOR(level,m,DIRECTION_BACKWARD,b,u)
        else
          call line_SOR(level,m,DIRECTION_BACKWARD,b,u)
          call line_SOR(level,m,DIRECTION_FORWARD,b,u)
        end if
      end subroutine line_SSOR
    
    !==================================================================
    ! Jacobi line smoother
    !==================================================================
      subroutine line_Jacobi(level,m,b,u)
        implicit none
        integer, intent(in)                :: level
        integer, intent(in)                :: m
        type(scalar3d), intent(in)         :: b
        type(scalar3d), intent(inout)      :: u
        real(kind=rl), allocatable :: r(:)
        integer :: ix,iy,iz, nz
        real(kind=rl), dimension(5) :: alpha_T
        real(kind=rl), allocatable :: c(:), utmp(:)
        real(kind=rl), allocatable :: u0(:,:,:)
        integer :: nlocal, halo_size
        real(kind=rl) :: rho
        logical :: overlap_comms
        integer, dimension(4) :: send_requests, recv_requests
        integer :: ixmin(5), ixmax(5)
        integer :: iymin(5), iymax(5)
        integer :: iblock, ierr
    
        ! Set optimal smoothing parameter on each level
        rho = 2.0_rl/(2.0_rl+4.0_rl*model_param%omega2*u%grid_param%n**2/(1.0_rl+4.0_rl*model_param%omega2*u%grid_param%n**2))
    
        nz = u%grid_param%nz
        nlocal = u%ix_max -u%ix_min + 1
        halo_size = u%halo_size
    
    #ifdef OVERLAPCOMMS
        overlap_comms = (nlocal > 2)
    #else
        overlap_comms = .false.
    #endif
    
        ! Block 1 (N)
        ixmin(1) = 1
        ixmax(1) = nlocal
        iymin(1) = 1
        iymax(1) = 1
        ! Block 2 (S)
        ixmin(2) = 1
        ixmax(2) = nlocal
        iymin(2) = nlocal
        iymax(2) = nlocal
        ! Block 3 (W)
        ixmin(3) = 1
        ixmax(3) = 1
        iymin(3) = 2
        iymax(3) = nlocal-1
        ! Block 4 (E)
        ixmin(4) = nlocal
        ixmax(4) = nlocal
        iymin(4) = 2
        iymax(4) = nlocal-1
        ! Block 5 (INTERIOR)
        if (overlap_comms) then
          ixmin(5) = 2
          ixmax(5) = nlocal-1
          iymin(5) = 2
          iymax(5) = nlocal-1
        else
          ! If there are no interior cells, do not overlap
          ! communications and calculations, just loop over interior cells
          ixmin(5) = 1
          ixmax(5) = nlocal
          iymin(5) = 1
          iymax(5) = nlocal
        end if
    
        ! Temporary vector
        allocate(u0(0:u%grid_param%nz+1,            &
                    1-halo_size:nlocal+halo_size,   &
                    1-halo_size:nlocal+halo_size) )
        u0(:,:,:) = u%s(:,:,:)
        ! Create residual vector
        allocate(r(nz))
        ! Allocate memory for auxiliary vectors for Thomas algorithm
        allocate(c(nz))
        allocate(utmp(nz))
    
        ! Loop over grid
        if (overlap_comms) then
        ! Loop over cells next to boundary (iblock = 1,...,4)
          do iblock = 1, 4
            call loop_over_grid(iblock)
          end do
          ! Initiate halo exchange
          call ihaloswap(level,m,u,send_requests,recv_requests)
        end if
        ! Loop over INTERIOR cells
        iblock = 5
        call loop_over_grid(iblock)
        if (overlap_comms) then
          if (m > 0) then
            call mpi_waitall(4,recv_requests, MPI_STATUSES_IGNORE, ierr)
          end if
        else
          call haloswap(level,m,u)
        end if
    
        ! Free memory again
        deallocate(r)
        deallocate(c)
        deallocate(u0)
        deallocate(utmp)
    
      contains
    
      subroutine loop_over_grid(iblock)
        implicit none
        integer, intent(in) :: iblock
        integer :: ix,iy,iz
        do ix=ixmin(iblock),ixmax(iblock)
          do iy=iymin(iblock),iymax(iblock)
            call apply_tridiag_solve(ix,iy,r,c,b,        &
                                     u0(1:nz,iy  ,ix+1), &
                                     u0(1:nz,iy  ,ix-1), &
                                     u0(1:nz,iy+1,ix  ), &
                                     u0(1:nz,iy-1,ix  ), &
                                     utmp)
            ! Add correction
            do iz=1,nz
              u%s(iz,iy,ix) = rho*utmp(iz) + (1.0_rl-rho)*u0(iz,iy,ix)
            end do
          end do
        end do
      end subroutine loop_over_grid
    
      end subroutine line_Jacobi
    
    !==================================================================
    ! At a given horizontal position (ix,iy) (local coordinates),
    ! calculate
    !
    ! u_out = T(ix,iy)^{-1} (b_(ix,iy)
    !       - sum_{ix',iy' != ix,iy} A_{(ix,iy),(ix',iy')}*u_in(ix',iy'))
    !
    !==================================================================
      subroutine apply_tridiag_solve(ix,iy,r,c,b, &
                                     u_in_1,      &
                                     u_in_2,      &
                                     u_in_3,      &
                                     u_in_4,      &
                                     u_out)
    
        implicit none
        integer, intent(in) :: ix
        integer, intent(in) :: iy
        real(kind=rl), intent(inout), dimension(:) :: r
        real(kind=rl), intent(inout), dimension(:) :: c
        type(scalar3d), intent(in) :: b
        real(kind=rl), intent(in), dimension(:) :: u_in_1
        real(kind=rl), intent(in), dimension(:) :: u_in_2
        real(kind=rl), intent(in), dimension(:) :: u_in_3
        real(kind=rl), intent(in), dimension(:) :: u_in_4
        real(kind=rl), intent(inout), dimension(:) :: u_out
        real(kind=rl), dimension(5) :: alpha_T
        real(kind=rl) :: Tij
        real(kind=rl) :: alpha_div_Tij, tmp, b_k_tmp, c_k_tmp
        integer :: iz, nz
    
        nz = b%grid_param%nz
    
        call construct_alpha_T(b%grid_param,  &
                               ix+b%ix_min-1, &
                               iy+b%iy_min-1, &
                               alpha_T,Tij)
        ! Calculate r_i = b_i - A_{ij} u_i
        do iz=1,nz
          r(iz) = b%s(iz,iy,ix) - vert_coeff%d(iz) * ( &
                    alpha_T(1) * u_in_1(iz) + &
                    alpha_T(2) * u_in_2(iz) + &
                    alpha_T(3) * u_in_3(iz) + &
                    alpha_T(4) * u_in_4(iz) )
        end do
    
        ! Thomas algorithm
        ! STEP 1: Create modified coefficients
        iz = 1
        alpha_div_Tij = alpha_T(5)/Tij
        tmp = (vert_coeff%a(iz)-vert_coeff%b(iz)-vert_coeff%c(iz)) &
                 - alpha_div_Tij
        c(iz) = vert_coeff%b(iz)/tmp
        u_out(iz) = r(iz) / (tmp*Tij*vert_coeff%d(iz))
        do iz=2,nz
          b_k_tmp = vert_coeff%b(iz)
          c_k_tmp = vert_coeff%c(iz)
          tmp = ((vert_coeff%a(iz)-b_k_tmp-c_k_tmp)-alpha_div_Tij) &
              - c(iz-1)*c_k_tmp
          c(iz) = b_k_tmp / tmp
          u_out(iz) = (r(iz) / (Tij*vert_coeff%d(iz)) - u_out(iz-1)*c_k_tmp) / tmp
        end do
        ! STEP 2: back substitution
        do iz=nz-1,1,-1
          u_out(iz) = u_out(iz) - c(iz) * u_out(iz+1)
        end do
      end subroutine apply_tridiag_solve
    
    end module discretisation