From b39dfb02580ca23a53f92f898264bdaac520a618 Mon Sep 17 00:00:00 2001
From: Philippe WAUTELET <philippe.wautelet@aero.obs-mip.fr>
Date: Mon, 17 Jul 2023 11:21:45 +0200
Subject: [PATCH] Philippe 17/07/2023: FFT: invert loops for better performance
 and to allow correct parallelization by NVHPC OpenACC

---
 src/MNH/fft.f90 | 336 ++++++++++++++++++++++++------------------------
 1 file changed, 168 insertions(+), 168 deletions(-)

diff --git a/src/MNH/fft.f90 b/src/MNH/fft.f90
index 7bfd0ef4b..79a2bee6b 100644
--- a/src/MNH/fft.f90
+++ b/src/MNH/fft.f90
@@ -415,14 +415,14 @@ SUBROUTINE RPASSM( PA, PB, PC, PD, PTRIGS, KINC3, KINC4, KLOT, KN, KFAC, KLA, KE
 
 !$acc kernels
 !$acc loop independent
-        DO IIL=1,KLA
+        DO IJK=0,KLOT-1
 !CDIR$ IVDEP
 !!CDIR NODEP
 !*VOCL LOOP,NOVREC
 !$acc loop independent private( II, IJ )
-          DO IJK=1,KLOT
-            II = IIBASE + IIL - 1 + (IJK - 1 ) * KINC3
-            IJ = IJBASE + IIL - 1 + (IJK - 1 ) * KINC4
+          DO IIL=0,KLA-1
+            II = IIBASE + IJK * KINC3 + IIL
+            IJ = IJBASE + IJK * KINC4 + IIL
             PC(IJA+IJ)=PA(IIA+II)+PA(IIB+II)
             PC(IJB+IJ)=PA(IIA+II)-PA(IIB+II)
           END DO
@@ -450,14 +450,14 @@ SUBROUTINE RPASSM( PA, PB, PC, PD, PTRIGS, KINC3, KINC4, KLOT, KN, KFAC, KLA, KE
             IIA = IIA0 + (IIK-KLA)/KLA * IINK
             IIB = IIB0 - (IIK-KLA)/KLA * IINK
 !$acc loop independent
-            DO IIL=1,KLA
+            DO IJK=0,KLOT-1
 !CDIR$ IVDEP
 !!CDIR NODEP
 !*VOCL LOOP,NOVREC
 !$acc loop independent private( II, IJ )
-              DO IJK=1,KLOT
-                II =         IIL - 1 + (IJK - 1 ) * KINC3
-                IJ = IJBASE + IIL - 1 + (IJK - 1 ) * KINC4
+              DO IIL=0,KLA-1
+                II =          IJK * KINC3 + IIL
+                IJ = IJBASE + IJK * KINC4 + IIL
                 PC(IJA+IJ)=PA(IIA+II)+PA(IIB+II)
                 PD(IJA+IJ)=PB(IIA+II)-PB(IIB+II)
                 PC(IJB+IJ)=ZC1*(PA(IIA+II)-PA(IIB+II))-ZS1*(PB(IIA+II)+PB(IIB+II))
@@ -474,14 +474,14 @@ SUBROUTINE RPASSM( PA, PB, PC, PD, PTRIGS, KINC3, KINC4, KLOT, KN, KFAC, KLA, KE
 !$acc kernels
           IIBASE=0
 !$acc loop independent
-          DO IIL=1,KLA
+          DO IJK=0,KLOT-1
 !CDIR$ IVDEP
 !!CDIR NODEP
 !*VOCL LOOP,NOVREC
 !$acc loop independent private( II, IJ )
-            DO IJK=1,KLOT
-              II = IIBASE + IIL - 1 + (IJK - 1 ) * KINC3
-              IJ = IJBASE + IIL - 1 + (IJK - 1 ) * KINC4
+            DO IIL=0,KLA-1
+              II = IIBASE + IJK * KINC3 + IIL
+              IJ = IJBASE + IJK * KINC4 + IIL
               PC(IJA+IJ)=PA(IIA+II)
               PC(IJB+IJ)=-PB(IIA+II)
             END DO
@@ -495,14 +495,14 @@ SUBROUTINE RPASSM( PA, PB, PC, PD, PTRIGS, KINC3, KINC4, KLOT, KN, KFAC, KLA, KE
 
 !$acc kernels
 !$acc loop independent
-        DO IIL=1,KLA
+        DO IJK=0,KLOT-1
 !CDIR$ IVDEP
 !!CDIR NODEP
 !*VOCL LOOP,NOVREC
 !$acc loop independent private( II, IJ )
-          DO IJK=1,KLOT
-            II = IIBASE + IIL - 1 + (IJK - 1 ) * KINC3
-            IJ = IJBASE + IIL - 1 + (IJK - 1 ) * KINC4
+          DO IIL=0,KLA-1
+            II = IIBASE + IJK * KINC3 + IIL
+            IJ = IJBASE + IJK * KINC4 + IIL
             PC(IJA+IJ)=2.0*(PA(IIA+II)+PA(IIB+II))
             PC(IJB+IJ)=2.0*(PA(IIA+II)-PA(IIB+II))
           END DO
@@ -529,14 +529,14 @@ SUBROUTINE RPASSM( PA, PB, PC, PD, PTRIGS, KINC3, KINC4, KLOT, KN, KFAC, KLA, KE
 
 !$acc kernels
 !$acc loop independent
-        DO IIL=1,KLA
+        DO IJK=0,KLOT-1
 !CDIR$ IVDEP
 !!CDIR NODEP
 !*VOCL LOOP,NOVREC
 !$acc loop independent private( II, IJ )
-          DO IJK=1,KLOT
-            II = IIBASE + IIL - 1 + (IJK - 1 ) * KINC3
-            IJ = IJBASE + IIL - 1 + (IJK - 1 ) * KINC4
+          DO IIL=0,KLA-1
+            II = IIBASE + IJK * KINC3 + IIL
+            IJ = IJBASE + IJK * KINC4 + IIL
             PC(IJA+IJ)=PA(IIA+II)+PA(IIB+II)
             PC(IJB+IJ)=(PA(IIA+II)-0.5*PA(IIB+II))-(XSIN60*(PB(IIB+II)))
             PC(IJC+IJ)=(PA(IIA+II)-0.5*PA(IIB+II))+(XSIN60*(PB(IIB+II)))
@@ -570,14 +570,14 @@ SUBROUTINE RPASSM( PA, PB, PC, PD, PTRIGS, KINC3, KINC4, KLOT, KN, KFAC, KLA, KE
             IIB = IIB0 + (IIK-KLA)/KLA * IINK
             IIC = IIC0 - (IIK-KLA)/KLA * IINK
 !$acc loop independent
-            DO IIL=1,KLA
+            DO IJK=0,KLOT-1
 !CDIR$ IVDEP
 !!CDIR NODEP
 !*VOCL LOOP,NOVREC
 !$acc loop independent private( II, IJ )
-              DO IJK=1,KLOT
-                II =         IIL - 1 + (IJK - 1 ) * KINC3
-                IJ = IJBASE + IIL - 1 + (IJK - 1 ) * KINC4
+              DO IIL=0,KLA-1
+                II =          IJK * KINC3 + IIL
+                IJ = IJBASE + IJK * KINC4 + IIL
                 PC(IJA+IJ)=PA(IIA+II)+(PA(IIB+II)+PA(IIC+II))
                 PD(IJA+IJ)=PB(IIA+II)+(PB(IIB+II)-PB(IIC+II))
                 PC(IJB+IJ)=                                                      &
@@ -614,14 +614,14 @@ SUBROUTINE RPASSM( PA, PB, PC, PD, PTRIGS, KINC3, KINC4, KLOT, KN, KFAC, KLA, KE
 !$acc kernels
           IIBASE=0
 !$acc loop independent
-          DO IIL=1,KLA
+          DO IJK=0,KLOT-1
 !CDIR$ IVDEP
 !!CDIR NODEP
 !*VOCL LOOP,NOVREC
 !$acc loop independent private( II, IJ )
-            DO IJK=1,KLOT
-              II = IIBASE + IIL - 1 + (IJK - 1 ) * KINC3
-              IJ = IJBASE + IIL - 1 + (IJK - 1 ) * KINC4
+            DO IIL=0,KLA-1
+              II = IIBASE + IJK * KINC3 + IIL
+              IJ = IJBASE + IJK * KINC4 + IIL
               PC(IJA+IJ)=PA(IIA+II)+PA(IIB+II)
               PC(IJB+IJ)=(0.5*PA(IIA+II)-PA(IIB+II))-(XSIN60*PB(IIA+II))
               PC(IJC+IJ)=-(0.5*PA(IIA+II)-PA(IIB+II))-(XSIN60*PB(IIA+II))
@@ -637,14 +637,14 @@ SUBROUTINE RPASSM( PA, PB, PC, PD, PTRIGS, KINC3, KINC4, KLOT, KN, KFAC, KLA, KE
 !$acc kernels
         ZSSIN60=2.0*XSIN60
 !$acc loop independent
-        DO IIL=1,KLA
+        DO IJK=0,KLOT-1
 !CDIR$ IVDEP
 !!CDIR NODEP
 !*VOCL LOOP,NOVREC
 !$acc loop independent private( II, IJ )
-          DO IJK=1,KLOT
-            II = IIBASE + IIL - 1 + (IJK - 1 ) * KINC3
-            IJ = IJBASE + IIL - 1 + (IJK - 1 ) * KINC4
+          DO IIL=0,KLA-1
+            II = IIBASE + IJK * KINC3 + IIL
+            IJ = IJBASE + IJK * KINC4 + IIL
             PC(IJA+IJ)=2.0*(PA(IIA+II)+PA(IIB+II))
             PC(IJB+IJ)=(2.0*PA(IIA+II)-PA(IIB+II))-(ZSSIN60*PB(IIB+II))
             PC(IJC+IJ)=(2.0*PA(IIA+II)-PA(IIB+II))+(ZSSIN60*PB(IIB+II))
@@ -675,14 +675,14 @@ SUBROUTINE RPASSM( PA, PB, PC, PD, PTRIGS, KINC3, KINC4, KLOT, KN, KFAC, KLA, KE
 
 !$acc kernels
 !$acc loop independent
-        DO IIL=1,KLA
+        DO IJK=0,KLOT-1
 !CDIR$ IVDEP
 !!CDIR NODEP
 !*VOCL LOOP,NOVREC
 !$acc loop independent private( II, IJ )
-          DO IJK=1,KLOT
-            II = IIBASE + IIL - 1 + (IJK - 1 ) * KINC3
-            IJ = IJBASE + IIL - 1 + (IJK - 1 ) * KINC4
+          DO IIL=0,KLA-1
+            II = IIBASE + IJK * KINC3 + IIL
+            IJ = IJBASE + IJK * KINC4 + IIL
             PC(IJA+IJ)=(PA(IIA+II)+PA(IIC+II))+PA(IIB+II)
             PC(IJB+IJ)=(PA(IIA+II)-PA(IIC+II))-PB(IIB+II)
             PC(IJC+IJ)=(PA(IIA+II)+PA(IIC+II))-PA(IIB+II)
@@ -723,14 +723,14 @@ SUBROUTINE RPASSM( PA, PB, PC, PD, PTRIGS, KINC3, KINC4, KLOT, KN, KFAC, KLA, KE
             IIC = IIC0 - (IIK-KLA)/KLA * IINK
             IID = IID0 - (IIK-KLA)/KLA * IINK
 !$acc loop independent
-            DO IIL=1,KLA
+            DO IJK=0,KLOT-1
 !CDIR$ IVDEP
 !!CDIR NODEP
 !*VOCL LOOP,NOVREC
 !$acc loop independent private( II, IJ )
-              DO IJK=1,KLOT
-                II =         IIL - 1 + (IJK - 1 ) * KINC3
-                IJ = IJBASE + IIL - 1 + (IJK - 1 ) * KINC4
+              DO IIL=0,KLA-1
+                II =          IJK * KINC3 + IIL
+                IJ = IJBASE + IJK * KINC4 + IIL
                 PC(IJA+IJ)=(PA(IIA+II)+PA(IIC+II))+(PA(IIB+II)+PA(IID+II))
                 PD(IJA+IJ)=(PB(IIA+II)-PB(IIC+II))+(PB(IIB+II)-PB(IID+II))
                 PC(IJC+IJ)=                                     &
@@ -767,14 +767,14 @@ SUBROUTINE RPASSM( PA, PB, PC, PD, PTRIGS, KINC3, KINC4, KLOT, KN, KFAC, KLA, KE
           IIBASE=0
           ZSIN45=SQRT(0.5)
 !$acc loop independent
-          DO IIL=1,KLA
+          DO IJK=0,KLOT-1
 !CDIR$ IVDEP
 !!CDIR NODEP
 !*VOCL LOOP,NOVREC
 !$acc loop independent private( II, IJ )
-            DO IJK=1,KLOT
-              II = IIBASE + IIL - 1 + (IJK - 1 ) * KINC3
-              IJ = IJBASE + IIL - 1 + (IJK - 1 ) * KINC4
+            DO IIL=0,KLA-1
+              II = IIBASE + IJK * KINC3 + IIL
+              IJ = IJBASE + IJK * KINC4 + IIL
               PC(IJA+IJ)=PA(IIA+II)+PA(IIB+II)
               PC(IJB+IJ)=ZSIN45*((PA(IIA+II)-PA(IIB+II))-(PB(IIA+II)+PB(IIB+II)))
               PC(IJC+IJ)=PB(IIB+II)-PB(IIA+II)
@@ -790,14 +790,14 @@ SUBROUTINE RPASSM( PA, PB, PC, PD, PTRIGS, KINC3, KINC4, KLOT, KN, KFAC, KLA, KE
 
 !$acc kernels
 !$acc loop independent
-        DO IIL=1,KLA
+        DO IJK=0,KLOT-1
 !CDIR$ IVDEP
 !!CDIR NODEP
 !*VOCL LOOP,NOVREC
 !$acc loop independent private( II, IJ )
-          DO IJK=1,KLOT
-            II = IIBASE + IIL - 1 + (IJK - 1 ) * KINC3
-            IJ = IJBASE + IIL - 1 + (IJK - 1 ) * KINC4
+          DO IIL=0,KLA-1
+            II = IIBASE + IJK * KINC3 + IIL
+            IJ = IJBASE + IJK * KINC4 + IIL
             PC(IJA+IJ)=2.0*((PA(IIA+II)+PA(IIC+II))+PA(IIB+II))
             PC(IJB+IJ)=2.0*((PA(IIA+II)-PA(IIC+II))-PB(IIB+II))
             PC(IJC+IJ)=2.0*((PA(IIA+II)+PA(IIC+II))-PA(IIB+II))
@@ -831,14 +831,14 @@ SUBROUTINE RPASSM( PA, PB, PC, PD, PTRIGS, KINC3, KINC4, KLOT, KN, KFAC, KLA, KE
 
 !$acc kernels
 !$acc loop independent
-        DO IIL=1,KLA
+        DO IJK=0,KLOT-1
 !CDIR$ IVDEP
 !!CDIR NODEP
 !*VOCL LOOP,NOVREC
 !$acc loop independent private( II, IJ )
-          DO IJK=1,KLOT
-            II = IIBASE + IIL - 1 + (IJK - 1 ) * KINC3
-            IJ = IJBASE + IIL - 1 + (IJK - 1 ) * KINC4
+          DO IIL=0,KLA-1
+            II = IIBASE + IJK * KINC3 + IIL
+            IJ = IJBASE + IJK * KINC4 + IIL
             PC(IJA+IJ)=PA(IIA+II)+(PA(IIB+II)+PA(IIC+II))
             PC(IJB+IJ)=((PA(IIA+II)-0.25*(PA(IIB+II)+PA(IIC+II)))+XQRT5*(PA(IIB+II)-PA(IIC+ &
               II)))                                                        &
@@ -894,14 +894,14 @@ SUBROUTINE RPASSM( PA, PB, PC, PD, PTRIGS, KINC3, KINC4, KLOT, KN, KFAC, KLA, KE
             IID = IID0 - (IIK-KLA)/KLA * IINK
             IIE = IIE0 - (IIK-KLA)/KLA * IINK
 !$acc loop independent
-            DO IIL=1,KLA
+            DO IJK=0,KLOT-1
 !CDIR$ IVDEP
 !!CDIR NODEP
 !*VOCL LOOP,NOVREC
 !$acc loop independent private( II, IJ, ZA10, ZA11, ZA20, ZA21, ZB10, ZB11, ZB20, ZB21 )
-              DO IJK=1,KLOT
-                II =         IIL - 1 + (IJK - 1 ) * KINC3
-                IJ = IJBASE + IIL - 1 + (IJK - 1 ) * KINC4
+              DO IIL=0,KLA-1
+                II =          IJK * KINC3 + IIL
+                IJ = IJBASE + IJK * KINC4 + IIL
 
                 ZA10=(PA(IIA+II)-0.25*((PA(IIB+II)+PA(IIE+II))+(PA(IIC+II)+PA(IID+II)))) &
                     +XQRT5*((PA(IIB+II)+PA(IIE+II))-(PA(IIC+II)+PA(IID+II)))
@@ -942,14 +942,14 @@ SUBROUTINE RPASSM( PA, PB, PC, PD, PTRIGS, KINC3, KINC4, KLOT, KN, KFAC, KLA, KE
 !$acc kernels
           IIBASE=0
 !$acc loop independent
-          DO IIL=1,KLA
+          DO IJK=0,KLOT-1
 !CDIR$ IVDEP
 !!CDIR NODEP
 !*VOCL LOOP,NOVREC
 !$acc loop independent private( II, IJ )
-            DO IJK=1,KLOT
-              II = IIBASE + IIL - 1 + (IJK - 1 ) * KINC3
-              IJ = IJBASE + IIL - 1 + (IJK - 1 ) * KINC4
+            DO IIL=0,KLA-1
+              II = IIBASE + IJK * KINC3 + IIL
+              IJ = IJBASE + IJK * KINC4 + IIL
               PC(IJA+IJ)=(PA(IIA+II)+PA(IIB+II))+PA(IIC+II)
               PC(IJB+IJ)=(XQRT5*(PA(IIA+II)-PA(IIB+II))+(0.25*(PA(IIA+II)+PA(IIB+II))-PA(IIC+ &
                 II)))                                                        &
@@ -977,14 +977,14 @@ SUBROUTINE RPASSM( PA, PB, PC, PD, PTRIGS, KINC3, KINC4, KLOT, KN, KFAC, KLA, KE
         ZSSIN36=2.0*XSIN36
         ZSSIN72=2.0*XSIN72
 !$acc loop independent
-        DO IIL=1,KLA
+        DO IJK=0,KLOT-1
 !CDIR$ IVDEP
 !!CDIR NODEP
 !*VOCL LOOP,NOVREC
 !$acc loop independent private( II, IJ )
-          DO IJK=1,KLOT
-            II = IIBASE + IIL - 1 + (IJK - 1 ) * KINC3
-            IJ = IJBASE + IIL - 1 + (IJK - 1 ) * KINC4
+          DO IIL=0,KLA-1
+            II = IIBASE + IJK * KINC3 + IIL
+            IJ = IJBASE + IJK * KINC4 + IIL
             PC(IJA+IJ)=2.0*(PA(IIA+II)+(PA(IIB+II)+PA(IIC+II)))
             PC(IJB+IJ)=(2.0*(PA(IIA+II)-0.25*(PA(IIB+II)+PA(IIC+II)))                 &
                 +ZQQRT5*(PA(IIB+II)-PA(IIC+II)))-(ZSSIN72*PB(IIB+II)+ZSSIN36*PB(IIC+II))
@@ -1025,14 +1025,14 @@ SUBROUTINE RPASSM( PA, PB, PC, PD, PTRIGS, KINC3, KINC4, KLOT, KN, KFAC, KLA, KE
 
 !$acc kernels
 !$acc loop independent
-        DO IIL=1,KLA
+        DO IJK=0,KLOT-1
 !CDIR$ IVDEP
 !!CDIR NODEP
 !*VOCL LOOP,NOVREC
 !$acc loop independent private( II, IJ )
-          DO IJK=1,KLOT
-            II = IIBASE + IIL - 1 + (IJK - 1 ) * KINC3
-            IJ = IJBASE + IIL - 1 + (IJK - 1 ) * KINC4
+          DO IIL=0,KLA-1
+            II = IIBASE + IJK * KINC3 + IIL
+            IJ = IJBASE + IJK * KINC4 + IIL
             PC(IJA+IJ)=(PA(IIA+II)+PA(IID+II))+(PA(IIB+II)+PA(IIC+II))
             PC(IJD+IJ)=(PA(IIA+II)-PA(IID+II))-(PA(IIB+II)-PA(IIC+II))
             PC(IJB+IJ)=((PA(IIA+II)-PA(IID+II))+0.5*(PA(IIB+II)-PA(IIC+II))) &
@@ -1092,14 +1092,14 @@ SUBROUTINE RPASSM( PA, PB, PC, PD, PTRIGS, KINC3, KINC4, KLOT, KN, KFAC, KLA, KE
             IIE = IIE0 - (IIK-KLA)/KLA * IINK
             IIF = IIF0 - (IIK-KLA)/KLA * IINK
 !$acc loop independent
-            DO IIL=1,KLA
+            DO IJK=0,KLOT-1
 !CDIR$ IVDEP
 !!CDIR NODEP
 !*VOCL LOOP,NOVREC
 !$acc loop independent private( II, IJ, ZA11, ZA20, ZA21, ZB11, ZB20, ZB21 )
-              DO IJK=1,KLOT
-                II =         IIL - 1 + (IJK - 1 ) * KINC3
-                IJ = IJBASE + IIL - 1 + (IJK - 1 ) * KINC4
+              DO IIL=0,KLA-1
+                II =          IJK * KINC3 + IIL
+                IJ = IJBASE + IJK * KINC4 + IIL
 
                 ZA11= (PA(IIE+II)+PA(IIB+II))+(PA(IIC+II)+PA(IIF+II))
                 ZA20=(PA(IIA+II)+PA(IID+II))-0.5*ZA11
@@ -1147,14 +1147,14 @@ SUBROUTINE RPASSM( PA, PB, PC, PD, PTRIGS, KINC3, KINC4, KLOT, KN, KFAC, KLA, KE
 !$acc kernels
           IIBASE=0
 !$acc loop independent
-          DO IIL=1,KLA
+          DO IJK=0,KLOT-1
 !CDIR$ IVDEP
 !!CDIR NODEP
 !*VOCL LOOP,NOVREC
 !$acc loop independent private( II, IJ )
-            DO IJK=1,KLOT
-              II = IIBASE + IIL - 1 + (IJK - 1 ) * KINC3
-              IJ = IJBASE + IIL - 1 + (IJK - 1 ) * KINC4
+            DO IIL=0,KLA-1
+              II = IIBASE + IJK * KINC3 + IIL
+              IJ = IJBASE + IJK * KINC4 + IIL
               PC(IJA+IJ)=PA(IIB+II)+(PA(IIA+II)+PA(IIC+II))
               PC(IJD+IJ)=PB(IIB+II)-(PB(IIA+II)+PB(IIC+II))
               PC(IJB+IJ)=(XSIN60*(PA(IIA+II)-PA(IIC+II)))-(0.5*(PB(IIA+II)+PB(IIC+II))+PB(IIB+ &
@@ -1175,14 +1175,14 @@ SUBROUTINE RPASSM( PA, PB, PC, PD, PTRIGS, KINC3, KINC4, KLOT, KN, KFAC, KLA, KE
 !$acc kernels
         ZSSIN60=2.0*XSIN60
 !$acc loop independent
-        DO IIL=1,KLA
+        DO IJK=0,KLOT-1
 !CDIR$ IVDEP
 !!CDIR NODEP
 !*VOCL LOOP,NOVREC
 !$acc loop independent private( II, IJ )
-          DO IJK=1,KLOT
-            II = IIBASE + IIL - 1 + (IJK - 1 ) * KINC3
-            IJ = IJBASE + IIL - 1 + (IJK - 1 ) * KINC4
+          DO IIL=0,KLA-1
+            II = IIBASE + IJK * KINC3 + IIL
+            IJ = IJBASE + IJK * KINC4 + IIL
             PC(IJA+IJ)=(2.0*(PA(IIA+II)+PA(IID+II)))+(2.0*(PA(IIB+II)+PA(IIC+II)))
             PC(IJD+IJ)=(2.0*(PA(IIA+II)-PA(IID+II)))-(2.0*(PA(IIB+II)-PA(IIC+II)))
             PC(IJB+IJ)=(2.0*(PA(IIA+II)-PA(IID+II))+(PA(IIB+II)-PA(IIC+II))) &
@@ -1227,14 +1227,14 @@ SUBROUTINE RPASSM( PA, PB, PC, PD, PTRIGS, KINC3, KINC4, KLOT, KN, KFAC, KLA, KE
         ZSSIN45=SQRT(2.0)
 
 !$acc loop independent
-        DO IIL=1,KLA
+        DO IJK=0,KLOT-1
 !CDIR$ IVDEP
 !!CDIR NODEP
 !*VOCL LOOP,NOVREC
 !$acc loop independent private( II, IJ )
-          DO IJK=1,KLOT
-            II = IIBASE + IIL - 1 + (IJK - 1 ) * KINC3
-            IJ = IJBASE + IIL - 1 + (IJK - 1 ) * KINC4
+          DO IIL=0,KLA-1
+            II = IIBASE + IJK * KINC3 + IIL
+            IJ = IJBASE + IJK * KINC4 + IIL
             PC(IJA+IJ)=2.0*(((PA(IIA+II)+PA(IIE+II))+PA(IIC+II))+(PA(IIB+II)+PA(IID+II)))
             PC(IJE+IJ)=2.0*(((PA(IIA+II)+PA(IIE+II))+PA(IIC+II))-(PA(IIB+II)+PA(IID+II)))
             PC(IJC+IJ)=2.0*(((PA(IIA+II)+PA(IIE+II))-PA(IIC+II))-(PB(IIB+II)-PB(IID+II)))
@@ -1350,14 +1350,14 @@ SUBROUTINE QPASSM( PA, PB, PC, PD, PTRIGS, KINC3, KINC4, KLOT, KN, KFAC, KLA, KE
 
 !$acc kernels
 !$acc loop independent
-        DO IJL=1,KLA
+        DO IJK=0,KLOT-1
 !CDIR$ IVDEP
 !!CDIR NODEP
 !*VOCL LOOP,NOVREC
 !$acc loop independent private( II, IJ )
-          DO IJK=1,KLOT
-            II = IIBASE + IJL - 1 + (IJK - 1 ) * KINC3
-            IJ = IJBASE + IJL - 1 + (IJK - 1 ) * KINC4
+          DO IJL=0,KLA-1
+            II = IIBASE + IJK * KINC3 + IJL
+            IJ = IJBASE + IJK * KINC4 + IJL
             PC(IJA+IJ)=PA(IIA+II)+PA(IIB+II)
             PC(IJB+IJ)=PA(IIA+II)-PA(IIB+II)
           END DO
@@ -1384,14 +1384,14 @@ SUBROUTINE QPASSM( PA, PB, PC, PD, PTRIGS, KINC3, KINC4, KLOT, KN, KFAC, KLA, KE
             IJA = IJA0 + (IIK-KLA)/KLA * JINK
             IJB = IJB0 - (IIK-KLA)/KLA * JINK
 !$acc loop independent
-            DO IJL=1,KLA
+            DO IJK=0,KLOT-1
 !CDIR$ IVDEP
 !!CDIR NODEP
 !*VOCL LOOP,NOVREC
 !$acc loop independent private( II, IJ )
-              DO IJK=1,KLOT
-                II = IIBASE + IJL - 1 + (IJK - 1 ) * KINC3
-                IJ =         IJL - 1 + (IJK - 1 ) * KINC4
+              DO IJL=0,KLA-1
+                II = IIBASE + IJK * KINC3 + IJL
+                IJ =          IJK * KINC4 + IJL
                 PC(IJA+IJ)=PA(IIA+II)+(ZC1*PA(IIB+II)+ZS1*PB(IIB+II))
                 PC(IJB+IJ)=PA(IIA+II)-(ZC1*PA(IIB+II)+ZS1*PB(IIB+II))
                 PD(IJA+IJ)=(ZC1*PB(IIB+II)-ZS1*PA(IIB+II))+PB(IIA+II)
@@ -1409,14 +1409,14 @@ SUBROUTINE QPASSM( PA, PB, PC, PD, PTRIGS, KINC3, KINC4, KLOT, KN, KFAC, KLA, KE
 !$acc kernels
           IJBASE=0
 !$acc loop independent
-          DO IJL=1,KLA
+          DO IJK=0,KLOT-1
 !CDIR$ IVDEP
 !!CDIR NODEP
 !*VOCL LOOP,NOVREC
 !$acc loop independent private( II, IJ )
-            DO IJK=1,KLOT
-              II = IIBASE + IJL - 1 + (IJK - 1 ) * KINC3
-              IJ = IJBASE + IJL - 1 + (IJK - 1 ) * KINC4
+            DO IJL=0,KLA-1
+              II = IIBASE + IJK * KINC3 + IJL
+              IJ = IJBASE + IJK * KINC4 + IJL
               PC(IJA+IJ)=PA(IIA+II)
               PD(IJA+IJ)=-PA(IIB+II)
             END DO
@@ -1431,14 +1431,14 @@ SUBROUTINE QPASSM( PA, PB, PC, PD, PTRIGS, KINC3, KINC4, KLOT, KN, KFAC, KLA, KE
 !$acc kernels
         ZZ=1.0/REAL(KN)
 !$acc loop independent
-        DO IJL=1,KLA
+        DO IJK=0,KLOT-1
 !CDIR$ IVDEP
 !!CDIR NODEP
 !*VOCL LOOP,NOVREC
 !$acc loop independent private( II, IJ )
-          DO IJK=1,KLOT
-            II = IIBASE + IJL - 1 + (IJK - 1 ) * KINC3
-            IJ = IJBASE + IJL - 1 + (IJK - 1 ) * KINC4
+          DO IJL=0,KLA-1
+            II = IIBASE + IJK * KINC3 + IJL
+            IJ = IJBASE + IJK * KINC4 + IJL
             PC(IJA+IJ)=ZZ*(PA(IIA+II)+PA(IIB+II))
             PC(IJB+IJ)=ZZ*(PA(IIA+II)-PA(IIB+II))
           END DO
@@ -1468,14 +1468,14 @@ SUBROUTINE QPASSM( PA, PB, PC, PD, PTRIGS, KINC3, KINC4, KLOT, KN, KFAC, KLA, KE
 
 !$acc kernels
 !$acc loop independent
-        DO IJL=1,KLA
+        DO IJK=0,KLOT-1
 !CDIR$ IVDEP
 !!CDIR NODEP
 !*VOCL LOOP,NOVREC
 !$acc loop independent private( II, IJ )
-          DO IJK=1,KLOT
-            II = IIBASE + IJL - 1 + (IJK - 1 ) * KINC3
-            IJ = IJBASE + IJL - 1 + (IJK - 1 ) * KINC4
+          DO IJL=0,KLA-1
+            II = IIBASE + IJK * KINC3 + IJL
+            IJ = IJBASE + IJK * KINC4 + IJL
             PC(IJA+IJ)=PA(IIA+II)+(PA(IIB+II)+PA(IIC+II))
             PC(IJB+IJ)=PA(IIA+II)-0.5*(PA(IIB+II)+PA(IIC+II))
             PD(IJB+IJ)=XSIN60*(PA(IIC+II)-PA(IIB+II))
@@ -1509,14 +1509,14 @@ SUBROUTINE QPASSM( PA, PB, PC, PD, PTRIGS, KINC3, KINC4, KLOT, KN, KFAC, KLA, KE
             IJB = IJB0 + (IIK-KLA)/KLA * JINK
             IJC = IJC0 - (IIK-KLA)/KLA * JINK
 !$acc loop independent
-            DO IJL=1,KLA
+            DO IJK=0,KLOT-1
 !CDIR$ IVDEP
 !!CDIR NODEP
 !*VOCL LOOP,NOVREC
 !$acc loop independent private( II, IJ, ZA1, ZA2, ZA3, ZB1, ZB2, ZB3 )
-              DO IJK=1,KLOT
-                II = IIBASE + IJL - 1 + (IJK - 1 ) * KINC3
-                IJ =         IJL - 1 + (IJK - 1 ) * KINC4
+              DO IJL=0,KLA-1
+                II = IIBASE + IJK * KINC3 + IJL
+                IJ =          IJK * KINC4 + IJL
                 ZA1=(ZC1*PA(IIB+II)+ZS1*PB(IIB+II))+(ZC2*PA(IIC+II)+ZS2*PB(IIC+II))
                 ZB1=(ZC1*PB(IIB+II)-ZS1*PA(IIB+II))+(ZC2*PB(IIC+II)-ZS2*PA(IIC+II))
                 ZA2=PA(IIA+II)-0.5*ZA1
@@ -1543,14 +1543,14 @@ SUBROUTINE QPASSM( PA, PB, PC, PD, PTRIGS, KINC3, KINC4, KLOT, KN, KFAC, KLA, KE
 !$acc kernels
           IJBASE=0
 !$acc loop independent
-          DO IJL=1,KLA
+          DO IJK=0,KLOT-1
 !CDIR$ IVDEP
 !!CDIR NODEP
 !*VOCL LOOP,NOVREC
 !$acc loop independent private( II, IJ )
-            DO IJK=1,KLOT
-              II = IIBASE + IJL - 1 + (IJK - 1 ) * KINC3
-              IJ = IJBASE + IJL - 1 + (IJK - 1 ) * KINC4
+            DO IJL=0,KLA-1
+              II = IIBASE + IJK * KINC3 + IJL
+              IJ = IJBASE + IJK * KINC4 + IJL
               PC(IJA+IJ)=PA(IIA+II)+0.5*(PA(IIB+II)-PA(IIC+II))
               PD(IJA+IJ)=-XSIN60*(PA(IIB+II)+PA(IIC+II))
               PC(IJB+IJ)=PA(IIA+II)-(PA(IIB+II)-PA(IIC+II))
@@ -1567,14 +1567,14 @@ SUBROUTINE QPASSM( PA, PB, PC, PD, PTRIGS, KINC3, KINC4, KLOT, KN, KFAC, KLA, KE
         ZZ=1.0/REAL(KN)
         ZZSIN60=ZZ*XSIN60
 !$acc loop independent
-        DO IJL=1,KLA
+        DO IJK=0,KLOT-1
 !CDIR$ IVDEP
 !!CDIR NODEP
 !*VOCL LOOP,NOVREC
 !$acc loop independent private( II, IJ )
-          DO IJK=1,KLOT
-            II = IIBASE + IJL - 1 + (IJK - 1 ) * KINC3
-            IJ = IJBASE + IJL - 1 + (IJK - 1 ) * KINC4
+          DO IJL=0,KLA-1
+            II = IIBASE + IJK * KINC3 + IJL
+            IJ = IJBASE + IJK * KINC4 + IJL
             PC(IJA+IJ)=ZZ*(PA(IIA+II)+(PA(IIB+II)+PA(IIC+II)))
             PC(IJB+IJ)=ZZ*(PA(IIA+II)-0.5*(PA(IIB+II)+PA(IIC+II)))
             PD(IJB+IJ)=ZZSIN60*(PA(IIC+II)-PA(IIB+II))
@@ -1605,14 +1605,14 @@ SUBROUTINE QPASSM( PA, PB, PC, PD, PTRIGS, KINC3, KINC4, KLOT, KN, KFAC, KLA, KE
       IF ( KLA /= IM ) THEN
 !$acc kernels
 !$acc loop independent
-        DO IJL=1,KLA
+        DO IJK=0,KLOT-1
 !CDIR$ IVDEP
 !!CDIR NODEP
 !*VOCL LOOP,NOVREC
 !$acc loop independent private( II, IJ )
-          DO IJK=1,KLOT
-            II = IIBASE + IJL - 1 + (IJK - 1 ) * KINC3
-            IJ = IJBASE + IJL - 1 + (IJK - 1 ) * KINC4
+          DO IJL=0,KLA-1
+            II = IIBASE + IJK * KINC3 + IJL
+            IJ = IJBASE + IJK * KINC4 + IJL
             PC(IJA+IJ)=(PA(IIA+II)+PA(IIC+II))+(PA(IIB+II)+PA(IID+II))
             PC(IJC+IJ)=(PA(IIA+II)+PA(IIC+II))-(PA(IIB+II)+PA(IID+II))
             PC(IJB+IJ)=PA(IIA+II)-PA(IIC+II)
@@ -1653,14 +1653,14 @@ SUBROUTINE QPASSM( PA, PB, PC, PD, PTRIGS, KINC3, KINC4, KLOT, KN, KFAC, KLA, KE
             IJC = IJC0 - (IIK-KLA)/KLA * JINK
             IJD = IJD0 - (IIK-KLA)/KLA * JINK
 !$acc loop independent
-            DO IJL=1,KLA
+            DO IJK=0,KLOT-1
 !CDIR$ IVDEP
 !!CDIR NODEP
 !*VOCL LOOP,NOVREC
 !$acc loop independent private( II, IJ, ZA0, ZA1, ZA2, ZA3, ZB0, ZB1, ZB2, ZB3 )
-              DO IJK=1,KLOT
-                II = IIBASE + IJL - 1 + (IJK - 1 ) * KINC3
-                IJ =         IJL - 1 + (IJK - 1 ) * KINC4
+              DO IJL=0,KLA-1
+                II = IIBASE + IJK * KINC3 + IJL
+                IJ =          IJK * KINC4 + IJL
                 ZA0=PA(IIA+II)+(ZC2*PA(IIC+II)+ZS2*PB(IIC+II))
                 ZA2=PA(IIA+II)-(ZC2*PA(IIC+II)+ZS2*PB(IIC+II))
                 ZA1=(ZC1*PA(IIB+II)+ZS1*PB(IIB+II))+(ZC3*PA(IID+II)+ZS3*PB(IID+II))
@@ -1693,14 +1693,14 @@ SUBROUTINE QPASSM( PA, PB, PC, PD, PTRIGS, KINC3, KINC4, KLOT, KN, KFAC, KLA, KE
           ZSIN45=SQRT(0.5)
           IJBASE=0
 !$acc loop independent
-          DO IJL=1,KLA
+          DO IJK=0,KLOT-1
 !CDIR$ IVDEP
 !!CDIR NODEP
 !*VOCL LOOP,NOVREC
 !$acc loop independent private( II, IJ )
-            DO IJK=1,KLOT
-              II = IIBASE + IJL - 1 + (IJK - 1 ) * KINC3
-              IJ = IJBASE + IJL - 1 + (IJK - 1 ) * KINC4
+            DO IJL=0,KLA-1
+              II = IIBASE + IJK * KINC3 + IJL
+              IJ = IJBASE + IJK * KINC4 + IJL
               PC(IJA+IJ)=PA(IIA+II)+ZSIN45*(PA(IIB+II)-PA(IID+II))
               PC(IJB+IJ)=PA(IIA+II)-ZSIN45*(PA(IIB+II)-PA(IID+II))
               PD(IJA+IJ)=-PA(IIC+II)-ZSIN45*(PA(IIB+II)+PA(IID+II))
@@ -1716,14 +1716,14 @@ SUBROUTINE QPASSM( PA, PB, PC, PD, PTRIGS, KINC3, KINC4, KLOT, KN, KFAC, KLA, KE
 !$acc kernels
         ZZ=1.0/REAL(KN)
 !$acc loop independent
-        DO IJL=1,KLA
+        DO IJK=0,KLOT-1
 !CDIR$ IVDEP
 !!CDIR NODEP
 !*VOCL LOOP,NOVREC
 !$acc loop independent private( II, IJ )
-          DO IJK=1,KLOT
-            II = IIBASE + IJL - 1 + (IJK - 1 ) * KINC3
-            IJ = IJBASE + IJL - 1 + (IJK - 1 ) * KINC4
+          DO IJL=0,KLA-1
+            II = IIBASE + IJK * KINC3 + IJL
+            IJ = IJBASE + IJK * KINC4 + IJL
             PC(IJA+IJ)=ZZ*((PA(IIA+II)+PA(IIC+II))+(PA(IIB+II)+PA(IID+II)))
             PC(IJC+IJ)=ZZ*((PA(IIA+II)+PA(IIC+II))-(PA(IIB+II)+PA(IID+II)))
             PC(IJB+IJ)=ZZ*(PA(IIA+II)-PA(IIC+II))
@@ -1756,14 +1756,14 @@ SUBROUTINE QPASSM( PA, PB, PC, PD, PTRIGS, KINC3, KINC4, KLOT, KN, KFAC, KLA, KE
       IF ( KLA /= IM ) THEN
 !$acc kernels
 !$acc loop independent
-        DO IJL=1,KLA
+        DO IJK=0,KLOT-1
 !CDIR$ IVDEP
 !!CDIR NODEP
 !*VOCL LOOP,NOVREC
 !$acc loop independent private( II, IJ )
-          DO IJK=1,KLOT
-            II = IIBASE + IJL - 1 + (IJK - 1 ) * KINC3
-            IJ = IJBASE + IJL - 1 + (IJK - 1 ) * KINC4
+          DO IJL=0,KLA-1
+            II = IIBASE + IJK * KINC3 + IJL
+            IJ = IJBASE + IJK * KINC4 + IJL
             ZA1=PA(IIB+II)+PA(IIE+II)
             ZA3=PA(IIB+II)-PA(IIE+II)
             ZA2=PA(IIC+II)+PA(IID+II)
@@ -1817,15 +1817,15 @@ SUBROUTINE QPASSM( PA, PB, PC, PD, PTRIGS, KINC3, KINC4, KLOT, KN, KFAC, KLA, KE
             IJD = IJD0 - (IIK-KLA)/KLA * JINK
             IJE = IJE0 - (IIK-KLA)/KLA * JINK
 !$acc loop independent
-            DO IJL=1,KLA
+            DO IJK=0,KLOT-1
 !CDIR$ IVDEP
 !!CDIR NODEP
 !*VOCL LOOP,NOVREC
 !$acc loop independent private( II, IJ, ZA1, ZA2, ZA3, ZA4, ZA5, ZA6, ZA10, ZA11, ZA20, ZA21, &
 !$acc &                         ZB1, ZB2, ZB3, ZB4, ZB5, ZB6, ZB10, ZB11, ZB20, ZB21 )
-              DO IJK=1,KLOT
-                II = IIBASE + IJL - 1 + (IJK - 1 ) * KINC3
-                IJ =         IJL - 1 + (IJK - 1 ) * KINC4
+              DO IJL=0,KLA-1
+                II = IIBASE + IJK * KINC3 + IJL
+                IJ =          IJK * KINC4 + IJL
                 ZA1=(ZC1*PA(IIB+II)+ZS1*PB(IIB+II))+(ZC4*PA(IIE+II)+ZS4*PB(IIE+II))
                 ZA3=(ZC1*PA(IIB+II)+ZS1*PB(IIB+II))-(ZC4*PA(IIE+II)+ZS4*PB(IIE+II))
                 ZA2=(ZC2*PA(IIC+II)+ZS2*PB(IIC+II))+(ZC3*PA(IID+II)+ZS3*PB(IID+II))
@@ -1872,14 +1872,14 @@ SUBROUTINE QPASSM( PA, PB, PC, PD, PTRIGS, KINC3, KINC4, KLOT, KN, KFAC, KLA, KE
 !$acc kernels
           IJBASE=0
 !$acc loop independent
-          DO IJL=1,KLA
+          DO IJK=0,KLOT-1
 !CDIR$ IVDEP
 !!CDIR NODEP
 !*VOCL LOOP,NOVREC
 !$acc loop independent private( II, IJ, ZA1, ZA2, ZA3, ZA4, ZA5, ZA6 )
-            DO IJK=1,KLOT
-              II = IIBASE + IJL - 1 + (IJK - 1 ) * KINC3
-              IJ = IJBASE + IJL - 1 + (IJK - 1 ) * KINC4
+            DO IJL=0,KLA-1
+              II = IIBASE + IJK * KINC3 + IJL
+              IJ = IJBASE + IJK * KINC4 + IJL
               ZA1=PA(IIB+II)+PA(IIE+II)
               ZA3=PA(IIB+II)-PA(IIE+II)
               ZA2=PA(IIC+II)+PA(IID+II)
@@ -1905,14 +1905,14 @@ SUBROUTINE QPASSM( PA, PB, PC, PD, PTRIGS, KINC3, KINC4, KLOT, KN, KFAC, KLA, KE
         ZZSIN36=ZZ*XSIN36
         ZZSIN72=ZZ*XSIN72
 !$acc loop independent
-        DO IJL=1,KLA
+        DO IJK=0,KLOT-1
 !CDIR$ IVDEP
 !!CDIR NODEP
 !*VOCL LOOP,NOVREC
 !$acc loop independent private( II, IJ, ZA1, ZA2, ZA3, ZA4, ZA5, ZA6 )
-          DO IJK=1,KLOT
-            II = IIBASE + IJL - 1 + (IJK - 1 ) * KINC3
-            IJ = IJBASE + IJL - 1 + (IJK - 1 ) * KINC4
+          DO IJL=0,KLA-1
+            II = IIBASE + IJK * KINC3 + IJL
+            IJ = IJBASE + IJK * KINC4 + IJL
             ZA1=PA(IIB+II)+PA(IIE+II)
             ZA3=PA(IIB+II)-PA(IIE+II)
             ZA2=PA(IIC+II)+PA(IID+II)
@@ -1955,14 +1955,14 @@ SUBROUTINE QPASSM( PA, PB, PC, PD, PTRIGS, KINC3, KINC4, KLOT, KN, KFAC, KLA, KE
 
 !$acc kernels
 !$acc loop independent
-        DO IJL=1,KLA
+        DO IJK=0,KLOT-1
 !CDIR$ IVDEP
 !!CDIR NODEP
 !*VOCL LOOP,NOVREC
 !$acc loop independent private( II, IJ, ZA11 )
-          DO IJK=1,KLOT
-            II = IIBASE + IJL - 1 + (IJK - 1 ) * KINC3
-            IJ = IJBASE + IJL - 1 + (IJK - 1 ) * KINC4
+          DO IJL=0,KLA-1
+            II = IIBASE + IJK * KINC3 + IJL
+            IJ = IJBASE + IJK * KINC4 + IJL
             ZA11=(PA(IIC+II)+PA(IIF+II))+(PA(IIB+II)+PA(IIE+II))
             PC(IJA+IJ)=(PA(IIA+II)+PA(IID+II))+ZA11
             PC(IJC+IJ)=(PA(IIA+II)+PA(IID+II)-0.5*ZA11)
@@ -2020,14 +2020,14 @@ SUBROUTINE QPASSM( PA, PB, PC, PD, PTRIGS, KINC3, KINC4, KLOT, KN, KFAC, KLA, KE
             IJE = IJE0 - (IIK-KLA)/KLA * JINK
             IJF = IJF0 - (IIK-KLA)/KLA * JINK
 !$acc loop independent
-            DO IJL=1,KLA
+            DO IJK=0,KLOT-1
 !CDIR$ IVDEP
 !!CDIR NODEP
 !*VOCL LOOP,NOVREC
 !$acc loop independent private( II, IJ, ZA1, ZA2, ZA3, ZA4, ZA5, ZA11, ZA20, ZA21, ZB1, ZB2, ZB3, ZB4, ZB5, ZB11, ZB20, ZB21 )
-              DO IJK=1,KLOT
-                II = IIBASE + IJL - 1 + (IJK - 1 ) * KINC3
-                IJ =         IJL - 1 + (IJK - 1 ) * KINC4
+              DO IJL=0,KLA-1
+                II = IIBASE + IJK * KINC3 + IJL
+                IJ =          IJK * KINC4 + IJL
                 ZA1=ZC1*PA(IIB+II)+ZS1*PB(IIB+II)
                 ZB1=ZC1*PB(IIB+II)-ZS1*PA(IIB+II)
                 ZA2=ZC2*PA(IIC+II)+ZS2*PB(IIC+II)
@@ -2079,14 +2079,14 @@ SUBROUTINE QPASSM( PA, PB, PC, PD, PTRIGS, KINC3, KINC4, KLOT, KN, KFAC, KLA, KE
 !$acc kernels
           IJBASE=0
 !$acc loop independent
-          DO IJL=1,KLA
+          DO IJK=0,KLOT-1
 !CDIR$ IVDEP
 !!CDIR NODEP
 !*VOCL LOOP,NOVREC
 !$acc loop independent private( II, IJ )
-            DO IJK=1,KLOT
-              II = IIBASE + IJL - 1 + (IJK - 1 ) * KINC3
-              IJ = IJBASE + IJL - 1 + (IJK - 1 ) * KINC4
+            DO IJL=0,KLA-1
+              II = IIBASE + IJK * KINC3 + IJL
+              IJ = IJBASE + IJK * KINC4 + IJL
               PC(IJA+IJ)=(PA(IIA+II)+0.5*(PA(IIC+II)-PA(IIE+II)))+ XSIN60*(PA(IIB+II)-PA(IIF+II))
               PD(IJA+IJ)=-(PA(IID+II)+0.5*(PA(IIB+II)+PA(IIF+II)))-XSIN60*(PA(IIC+II)+PA(IIE+II))
               PC(IJB+IJ)=PA(IIA+II)-(PA(IIC+II)-PA(IIE+II))
@@ -2106,14 +2106,14 @@ SUBROUTINE QPASSM( PA, PB, PC, PD, PTRIGS, KINC3, KINC4, KLOT, KN, KFAC, KLA, KE
         ZZ=1.0/REAL(KN)
         ZZSIN60=ZZ*XSIN60
 !$acc loop independent
-        DO IJL=1,KLA
+        DO IJK=0,KLOT-1
 !CDIR$ IVDEP
 !!CDIR NODEP
 !*VOCL LOOP,NOVREC
 !$acc loop independent private( II, IJ, ZA11 )
-          DO IJK=1,KLOT
-            II = IIBASE + IJL - 1 + (IJK - 1 ) * KINC3
-            IJ = IJBASE + IJL - 1 + (IJK - 1 ) * KINC4
+          DO IJL=0,KLA-1
+            II = IIBASE + IJK * KINC3 + IJL
+            IJ = IJBASE + IJK * KINC4 + IJL
             ZA11=(PA(IIC+II)+PA(IIF+II))+(PA(IIB+II)+PA(IIE+II))
             PC(IJA+IJ)=ZZ*((PA(IIA+II)+PA(IID+II))+ZA11)
             PC(IJC+IJ)=ZZ*((PA(IIA+II)+PA(IID+II))-0.5*ZA11)
@@ -2158,14 +2158,14 @@ SUBROUTINE QPASSM( PA, PB, PC, PD, PTRIGS, KINC3, KINC4, KLOT, KN, KFAC, KLA, KE
         ZZSIN45=ZZ*SQRT(0.5)
 
 !$acc loop independent
-        DO IJL=1,KLA
+        DO IJK=0,KLOT-1
 !CDIR$ IVDEP
 !!CDIR NODEP
 !*VOCL LOOP,NOVREC
 !$acc loop independent private( II, IJ )
-          DO IJK=1,KLOT
-            II = IIBASE + IJL - 1 + (IJK - 1 ) * KINC3
-            IJ = IJBASE + IJL - 1 + (IJK - 1 ) * KINC4
+          DO IJL=0,KLA-1
+            II = IIBASE + IJK * KINC3 + IJL
+            IJ = IJBASE + IJK * KINC4 + IJL
             PC(IJA+IJ)=ZZ*(((PA(IIA+II)+PA(IIE+II))+(PA(IIC+II)+PA(IIG+II)))+ &
                 ((PA(IID+II)+PA(IIH+II))+(PA(IIB+II)+PA(IIF+II))))
             PC(IJE+IJ)=ZZ*(((PA(IIA+II)+PA(IIE+II))+(PA(IIC+II)+PA(IIG+II)))- &
-- 
GitLab