From 150a0121e87fbb32e80fbf857d2a5082c1f9d08f Mon Sep 17 00:00:00 2001
From: Juan Escobar <escj@aero.obs-mip.fr>
Date: Thu, 18 Jul 2019 15:03:28 +0200
Subject: [PATCH] Juan 18/07/2019:discretisation.f90, optimisation of tridiag,
 split loop to compute tmp_k first

---
 .../discretisation.f90                        | 28 ++++++++-----------
 1 file changed, 12 insertions(+), 16 deletions(-)

diff --git a/tensorproductmultigrid_Source/discretisation.f90 b/tensorproductmultigrid_Source/discretisation.f90
index 7fca1ce70..a42888a1d 100644
--- a/tensorproductmultigrid_Source/discretisation.f90
+++ b/tensorproductmultigrid_Source/discretisation.f90
@@ -1792,7 +1792,7 @@ end subroutine line_Jacobi_mnh
     integer :: iz, nz
 
     real, dimension(:,:,:) , pointer :: zSr_st , zb_st , zSu_in_st , zSu_out_st
-    real, dimension(:)     , pointer :: za_k, zb_k, zc_k, zd_k
+    real, dimension(:)     , pointer :: za_k, zb_k, zc_k, zd_k , tmp_k
     integer :: ii,ij
 
     nz = b%grid_param%nz
@@ -1809,8 +1809,9 @@ end subroutine line_Jacobi_mnh
       zb_k  => vert_coeff%b
       zc_k  => vert_coeff%c
       zd_k  => vert_coeff%d
+      allocate(tmp_k(size(zb_k)))
 
-       !$acc kernels
+      !$acc kernels
        iz=1 
        zSr_st(iib:iie,ijb:ije,iz) = zb_st(iib:iie,ijb:ije,iz)
        do iz=2,nz-1
@@ -1831,29 +1832,21 @@ end subroutine line_Jacobi_mnh
        c(iz) = vert_coeff%b(iz)/tmp
        zSu_out_st(iib:iie,ijb:ije,iz) = zSr_st(iib:iie,ijb:ije,iz) / (tmp*Tij*zd_k(iz))
        !
-       !acc loop seq
        do iz=2,nz-1
           b_k_tmp = zb_k(iz)
           c_k_tmp = zc_k(iz)
-          tmp = ((za_k(iz)-b_k_tmp-c_k_tmp)-alpha_div_Tij) &
+          tmp_k(iz) = ((za_k(iz)-b_k_tmp-c_k_tmp)-alpha_div_Tij) &
                - c(iz-1)*c_k_tmp
-          c(iz) = b_k_tmp / tmp
-
-!!$          zSu_out_st(iib:iie,ijb:ije,iz) = (zSr_st(iib:iie,ijb:ije,iz) / (Tij*zd_k(iz)) & 
-!!$                                         - zSu_out_st(iib:iie,ijb:ije,iz-1)*c_k_tmp) / tmp
-
+          c(iz) = b_k_tmp / tmp_k(iz)
+       end do
+       do iz=2,nz-1
           !$acc loop independent collapse(2)
           do ij=ijb,ije
              do ii=iib,iie
                 zSu_out_st(ii,ij,iz) = (zSr_st(ii,ij,iz) / (Tij*zd_k(iz)) & 
-                                     - zSu_out_st(ii,ij,iz-1)*c_k_tmp) / tmp
+                                     - zSu_out_st(ii,ij,iz-1)*c_k_tmp) / tmp_k(iz)
              end do
-          end do
-!!$          do concurrent ( ij=ijb:ije ,  ii=iib:iie )
-!!$             zSu_out_st(ii,ij,iz) = (zSr_st(ii,ij,iz) / (Tij*zd_k(iz)) & 
-!!$                  - zSu_out_st(ii,ij,iz-1)*c_k_tmp) / tmp
-!!$          end do
-          
+          end do          
        end do
        !
        iz=nz
@@ -1871,6 +1864,9 @@ end subroutine line_Jacobi_mnh
                                         - c(iz) * zSu_out_st(iib:iie,ijb:ije,iz+1)
        end do
        !$acc end kernels
+
+       deallocate(tmp_k)
+
     end if
 
   end subroutine apply_tridiag_solve_mnh_allT
-- 
GitLab