From 7fa37be529b9669c9d601ca4f111caefa97822ca Mon Sep 17 00:00:00 2001
From: Juan ESCOBAR <juan.escobar@aero.obs-mip.fr>
Date: Fri, 4 Nov 2022 18:42:12 +0100
Subject: [PATCH] Juan 04/11/2022:mode_double_double.f90, CCE Optimization,
 declare SUM_DD_R2_ll_DEVICE & use "!$acc parallel" to avoid multiple kernel
 lunch in seq loop

---
 src/LIB/SURCOUCHE/src/mode_double_double.f90 | 52 ++++++++++++++++++++
 1 file changed, 52 insertions(+)

diff --git a/src/LIB/SURCOUCHE/src/mode_double_double.f90 b/src/LIB/SURCOUCHE/src/mode_double_double.f90
index b9e58898f..8b70ae69e 100644
--- a/src/LIB/SURCOUCHE/src/mode_double_double.f90
+++ b/src/LIB/SURCOUCHE/src/mode_double_double.f90
@@ -215,6 +215,58 @@ CONTAINS
     c = ddc%R
   END FUNCTION SUM_DD_R2_LL
 
+  FUNCTION SUM_DD_R2_ll_DEVICE (a) RESULT(c)
+    !----------------------------------------------------------------------
+    ! 
+    ! Purpose: 
+    ! Modification of original codes written by David H. Bailey    
+    ! This subroutine computes ddc = ddb + a
+    ! Could be inlined by compiler <=> elemental function
+    ! 
+    !----------------------------------------------------------------------
+    USE mode_reduce_sum, ONLY: REDUCESUM_ll
+    !
+    ! Arguments
+    !
+    REAL                                     :: c     ! result
+    REAL,DIMENSION(:,:), INTENT(in)          :: a     ! input
+    !
+    ! Local workspace
+    !
+    TYPE(DOUBLE_DOUBLE)                      :: ddc  
+    TYPE(DOUBLE_DOUBLE),DIMENSION(SIZE(a,1)) :: ddb   
+    REAL               ,DIMENSION(SIZE(a,1)) :: e, t1, t2
+    INTEGER                                  :: i,j
+    INTEGER                                  :: IINFO_ll 
+    !
+    !-----------------------------------------------------------------------
+    !
+    !   Compute dda + ddb using Knuth's trick.
+    !$acc kernels
+    ddb%R = 0.0
+    ddb%E = 0.0
+    !$acc end kernels
+    !$acc parallel
+    !$acc loop seq
+    DO j=1,SIZE(a,2)
+       !$acc loop independent
+       DO i=1,SIZE(a,1)
+          t1(i) = a(i,j)  + ddb(i)%R
+          e(i)  = t1(i) - a(i,j)
+          t2(i) = ((ddb(i)%R - e(i)) + (a(i,j) - (t1(i) - e(i)))) &
+               + ddb(i)%E
+          !
+          !   The result is t1 + t2, after normalization.
+          ddb(i)%R = t1(i) + t2(i)
+          ddb(i)%E = t2(i) - ((t1(i) + t2(i)) - t1(i)) 
+       END DO
+    END DO
+    !$acc end parallel
+    ddc = SUM_DD_DD1(ddb)
+    CALL REDUCESUM_ll(ddc,IINFO_ll)
+    c = ddc%R
+  END FUNCTION SUM_DD_R2_LL_DEVICE
+
  FUNCTION SUM_DD_R2_R1_ll (a) RESULT(c)
     !----------------------------------------------------------------------
     ! 
-- 
GitLab