From 763d448c702c3a82f6fc3c86225b489733f14d03 Mon Sep 17 00:00:00 2001
From: Juan Escobar <juan.escobar@aero.obs-mip.fr>
Date: Wed, 18 Aug 2021 15:40:30 +0200
Subject: [PATCH] Juan 18/08/2021: add acc kernels to init routines

---
 .../datatypes.f90                             |   7 +-
 .../multigrid.f90                             |  35 ++--
 .../profiles.f90                              | 157 ++++++++++++------
 3 files changed, 132 insertions(+), 67 deletions(-)

diff --git a/src/ZSOLVER/tensorproductmultigrid_Source/datatypes.f90 b/src/ZSOLVER/tensorproductmultigrid_Source/datatypes.f90
index 4ff30cd06..d0ddee6da 100644
--- a/src/ZSOLVER/tensorproductmultigrid_Source/datatypes.f90
+++ b/src/ZSOLVER/tensorproductmultigrid_Source/datatypes.f90
@@ -142,6 +142,8 @@ private
                                                        ! direction
     integer, parameter                  :: dim_horiz = 2 ! horiz. dimension
 
+    real(kind=rl) , dimension(:,:,:), pointer , contiguous :: zphi_st
+
     phi%grid_param = grid_param
     call mpi_comm_size(comm_horiz, nproc, ierr)
     nlocal = grid_param%n/sqrt(1.0*nproc)
@@ -190,11 +192,12 @@ private
     phi%s(:,:,:) = 0.0_rl
     end if
     if (LUseT) then
-    allocate(phi%st(1-halo_size:nlocal+halo_size, &
+    allocate(zphi_st(1-halo_size:nlocal+halo_size, &
                     1-halo_size:nlocal+halo_size, &
                     0:grid_param%nz+1))
+    phi%st => zphi_st
     !$acc kernels
-    phi%st(:,:,:) = 0.0_rl
+    zphi_st(:,:,:) = 0.0_rl
     !$acc end kernels
     end if
 
diff --git a/src/ZSOLVER/tensorproductmultigrid_Source/multigrid.f90 b/src/ZSOLVER/tensorproductmultigrid_Source/multigrid.f90
index e642d1722..bb1a7d3f7 100644
--- a/src/ZSOLVER/tensorproductmultigrid_Source/multigrid.f90
+++ b/src/ZSOLVER/tensorproductmultigrid_Source/multigrid.f90
@@ -166,7 +166,7 @@ contains
     integer                            :: vertbc
     character(len=32)                  :: t_label
 
-
+    real , dimension(:,:,:) , pointer , contiguous :: zxu_mg_st,zxb_mg_st,zxr_mg_st
 
     if (i_am_master_mpi) &
       write(STDOUT,*) '*** Initialising multigrid ***'
@@ -272,18 +272,27 @@ contains
       endif
 
       if (LUseT) then
-      allocate(xu_mg(level,m)%st(1-halo_size:nlocal+halo_size, &
-                                 1-halo_size:nlocal+halo_size, &
-                                 0:nz+1))
-      allocate(xb_mg(level,m)%st(1-halo_size:nlocal+halo_size, &
-                                 1-halo_size:nlocal+halo_size, &
-                                 0:nz+1))
-      allocate(xr_mg(level,m)%st(1-halo_size:nlocal+halo_size, &
-                                 1-halo_size:nlocal+halo_size, &
-                                 0:nz+1))
-      xu_mg(level,m)%st(:,:,:) = 0.0_rl
-      xb_mg(level,m)%st(:,:,:) = 0.0_rl
-      xr_mg(level,m)%st(:,:,:) = 0.0_rl
+         allocate(zxu_mg_st(1-halo_size:nlocal+halo_size, &
+              1-halo_size:nlocal+halo_size, &
+              0:nz+1))
+         xu_mg(level,m)%st => zxu_mg_st
+         
+         allocate(zxb_mg_st(1-halo_size:nlocal+halo_size, &
+              1-halo_size:nlocal+halo_size, &
+              0:nz+1))
+         xb_mg(level,m)%st => zxb_mg_st
+         
+         allocate(zxr_mg_st(1-halo_size:nlocal+halo_size, &
+              1-halo_size:nlocal+halo_size, &
+              0:nz+1))
+          xr_mg(level,m)%st => zxr_mg_st
+
+      
+      !$acc kernels
+      zxu_mg_st(:,:,:) = 0.0_rl
+      zxb_mg_st(:,:,:) = 0.0_rl
+      zxr_mg_st(:,:,:) = 0.0_rl
+      !$acc end kernels
       endif
 
       ! NB: 1st coordinate is in the y-direction of the processor grid,
diff --git a/src/ZSOLVER/tensorproductmultigrid_Source/profiles.f90 b/src/ZSOLVER/tensorproductmultigrid_Source/profiles.f90
index 489210645..e20e0d221 100644
--- a/src/ZSOLVER/tensorproductmultigrid_Source/profiles.f90
+++ b/src/ZSOLVER/tensorproductmultigrid_Source/profiles.f90
@@ -65,23 +65,40 @@ private
     integer       , optional, intent(in) :: KIB,KIE,KIU,KJB,KJE,KJU,KKU
     real(kind=rl) , optional, intent(in) :: PU(:,:,:)
 
+    real , dimension(:,:,:) , pointer , contiguous :: zu_st
+
     ix_min = u%ix_min
     ix_max = u%ix_max
     iy_min = u%iy_min
     iy_max = u%iy_max
     IF (.NOT. PRESENT(KIB) ) THEN
-    ! Initialise RHS
-    do ix=ix_min, ix_max
-      do iy=iy_min, iy_max
-        do iz=1,u%grid_param%nz
- 
-           if (LUseO) u%s(iz,iy-iy_min+1,ix-ix_min+1) = 0.0_rl
-           if (LUseT) u%st(ix-ix_min+1,iy-iy_min+1,iz) = 0.0_rl
-
-        end do
-      end do
-    end do
-    ELSE
+       ! Initialise RHS
+        if (LUseO) then
+           do ix=ix_min, ix_max
+              do iy=iy_min, iy_max
+                 do iz=1,u%grid_param%nz
+                    
+                    u%s(iz,iy-iy_min+1,ix-ix_min+1) = 0.0_rl
+                    
+                 end do
+              end do
+           end do
+        end if
+        if (LUseT) then
+           zu_st => u%st
+           !$acc kernels loop independent collapse(3)
+           do iz=1,u%grid_param%nz
+              do iy=iy_min, iy_max
+                 do ix=ix_min, ix_max
+                    
+                    zu_st(ix-ix_min+1,iy-iy_min+1,iz) = 0.0_rl
+                    
+                 end do
+              end do
+           end do
+           !$acc end kernels
+        end if
+     ELSE
    ! Initialise RHS
        if (LUseO) then
           do ix=ix_min, ix_max
@@ -93,13 +110,16 @@ private
           end do
        end if
        if (LUseT) then
+          zu_st => u%st
+          !$acc kernels loop independent collapse(3)
           do iz=1,u%grid_param%nz
              do iy=iy_min, iy_max
                 do ix=ix_min, ix_max
-                   u%st(ix-ix_min+1,iy-iy_min+1,iz) = PU(IX-ix_min+KIB,IY-iy_min+KJB,IZ)
+                   zu_st(ix-ix_min+1,iy-iy_min+1,iz) = PU(IX-ix_min+KIB,IY-iy_min+KJB,IZ)
                 end do
              end do
           end do
+          !$acc end kernels
        end if
     END IF
   end subroutine initialise_u_mnh
@@ -160,53 +180,86 @@ private
     integer       , optional, intent(in) :: KIB,KIE,KIU,KJB,KJE,KJU,KKU
     real(kind=rl) , optional, intent(in) :: PY(:,:,:)
 
+    real , dimension(:,:,:) , pointer , contiguous :: zb_st
+
     ix_min = b%ix_min
     ix_max = b%ix_max
     iy_min = b%iy_min
     iy_max = b%iy_max
     IF (.NOT. PRESENT(KIB) ) THEN
     ! Initialise RHS
-    do ix=ix_min, ix_max
-      do iy=iy_min, iy_max
-        do iz=1,b%grid_param%nz
-          x = 1.0_rl*((ix-0.5_rl)/(1.0_rl*b%grid_param%n))
-          y = 1.0_rl*((iy-0.5_rl)/(1.0_rl*b%grid_param%n))
-          z = 1.0_rl*((iz-0.5_rl)/(1.0_rl*b%grid_param%nz))
-
-          if (LUseO) b%s(iz,iy-iy_min+1,ix-ix_min+1) = 0.0_rl
-          if (LUseT) b%st(ix-ix_min+1,iy-iy_min+1,iz) = 0.0_rl
-
-          if ( ( x .ge. 0.1_rl ) .and. ( x .le. 0.4_rl ) &
-            .and. (y .ge. 0.3_rl ) .and. ( y .le. 0.6_rl ) &
-            .and. (z .ge. 0.2_rl ) .and. ( z .le. 0.7_rl ) ) &
-            then
-            if (LUseO) b%s(iz,iy-iy_min+1,ix-ix_min+1) = 1.0_rl 
-            if (LUseT) b%st(ix-ix_min+1,iy-iy_min+1,iz) = 1.0_rl 
-          end if
-
-        end do
-      end do
-    end do
+       if (LUseO) then   
+          do ix=ix_min, ix_max
+             do iy=iy_min, iy_max
+                do iz=1,b%grid_param%nz
+                   x = 1.0_rl*((ix-0.5_rl)/(1.0_rl*b%grid_param%n))
+                   y = 1.0_rl*((iy-0.5_rl)/(1.0_rl*b%grid_param%n))
+                   z = 1.0_rl*((iz-0.5_rl)/(1.0_rl*b%grid_param%nz))
+                   
+                   b%s(iz,iy-iy_min+1,ix-ix_min+1) = 0.0_rl
+                   
+                   if ( ( x .ge. 0.1_rl ) .and. ( x .le. 0.4_rl ) &
+                        .and. (y .ge. 0.3_rl ) .and. ( y .le. 0.6_rl ) &
+                        .and. (z .ge. 0.2_rl ) .and. ( z .le. 0.7_rl ) ) &
+                        then
+                      
+                      b%s(iz,iy-iy_min+1,ix-ix_min+1) = 1.0_rl 
+                      
+                   end if
+                   
+                end do
+             end do
+          end do
+       endif
+       if (LUseT) then
+          zb_st => b%st
+          !$acc kernels loop independent collapse(3)
+          do iz=1,b%grid_param%nz
+             do iy=iy_min, iy_max
+                do ix=ix_min, ix_max
+                   x = 1.0_rl*((ix-0.5_rl)/(1.0_rl*b%grid_param%n))
+                   y = 1.0_rl*((iy-0.5_rl)/(1.0_rl*b%grid_param%n))
+                   z = 1.0_rl*((iz-0.5_rl)/(1.0_rl*b%grid_param%nz))
+                   
+                   zb_st(ix-ix_min+1,iy-iy_min+1,iz) = 0.0_rl
+                   
+                   if ( ( x .ge. 0.1_rl ) .and. ( x .le. 0.4_rl ) &
+                        .and. (y .ge. 0.3_rl ) .and. ( y .le. 0.6_rl ) &
+                        .and. (z .ge. 0.2_rl ) .and. ( z .le. 0.7_rl ) ) &
+                        then
+                      
+                      zb_st(ix-ix_min+1,iy-iy_min+1,iz) = 1.0_rl
+                      
+                   end if
+                   
+                end do
+             end do
+          end do
+          !$acc end kernels
+       end if
     ELSE
    ! Initialise RHS
-      if (LUseO) then    
-         do ix=ix_min, ix_max
-            do iy=iy_min, iy_max
-               do iz=1,b%grid_param%nz
-                  b%s(iz,iy-iy_min+1,ix-ix_min+1) = PY(IX-ix_min+KIB,IY-iy_min+KJB,IZ)
-               end do
-            end do
-         end do
-      end if
-     if (LUseT) then   
-        do iz=1,b%grid_param%nz 
-           do iy=iy_min, iy_max
-              do ix=ix_min, ix_max
-                  b%st(ix-ix_min+1,iy-iy_min+1,iz) = PY(IX-ix_min+KIB,IY-iy_min+KJB,IZ)
-               end do
-            end do
-         end do
-      end if
+       if (LUseO) then    
+          do ix=ix_min, ix_max
+             do iy=iy_min, iy_max
+                do iz=1,b%grid_param%nz
+                   b%s(iz,iy-iy_min+1,ix-ix_min+1) = PY(IX-ix_min+KIB,IY-iy_min+KJB,IZ)
+                end do
+             end do
+          end do
+       end if
+       if (LUseT) then
+          zb_st => b%st
+          !$acc kernels loop independent collapse(3)
+          do iz=1,b%grid_param%nz 
+             do iy=iy_min, iy_max
+                do ix=ix_min, ix_max
+                   zb_st(ix-ix_min+1,iy-iy_min+1,iz) = PY(IX-ix_min+KIB,IY-iy_min+KJB,IZ)
+                end do
+             end do
+          end do
+          !$acc end kernels
+       end if
     END IF
   end subroutine initialise_rhs_mnh
 !==================================================================
-- 
GitLab