From b39dfb02580ca23a53f92f898264bdaac520a618 Mon Sep 17 00:00:00 2001 From: Philippe WAUTELET <philippe.wautelet@aero.obs-mip.fr> Date: Mon, 17 Jul 2023 11:21:45 +0200 Subject: [PATCH] Philippe 17/07/2023: FFT: invert loops for better performance and to allow correct parallelization by NVHPC OpenACC --- src/MNH/fft.f90 | 336 ++++++++++++++++++++++++------------------------ 1 file changed, 168 insertions(+), 168 deletions(-) diff --git a/src/MNH/fft.f90 b/src/MNH/fft.f90 index 7bfd0ef4b..79a2bee6b 100644 --- a/src/MNH/fft.f90 +++ b/src/MNH/fft.f90 @@ -415,14 +415,14 @@ SUBROUTINE RPASSM( PA, PB, PC, PD, PTRIGS, KINC3, KINC4, KLOT, KN, KFAC, KLA, KE !$acc kernels !$acc loop independent - DO IIL=1,KLA + DO IJK=0,KLOT-1 !CDIR$ IVDEP !!CDIR NODEP !*VOCL LOOP,NOVREC !$acc loop independent private( II, IJ ) - DO IJK=1,KLOT - II = IIBASE + IIL - 1 + (IJK - 1 ) * KINC3 - IJ = IJBASE + IIL - 1 + (IJK - 1 ) * KINC4 + DO IIL=0,KLA-1 + II = IIBASE + IJK * KINC3 + IIL + IJ = IJBASE + IJK * KINC4 + IIL PC(IJA+IJ)=PA(IIA+II)+PA(IIB+II) PC(IJB+IJ)=PA(IIA+II)-PA(IIB+II) END DO @@ -450,14 +450,14 @@ SUBROUTINE RPASSM( PA, PB, PC, PD, PTRIGS, KINC3, KINC4, KLOT, KN, KFAC, KLA, KE IIA = IIA0 + (IIK-KLA)/KLA * IINK IIB = IIB0 - (IIK-KLA)/KLA * IINK !$acc loop independent - DO IIL=1,KLA + DO IJK=0,KLOT-1 !CDIR$ IVDEP !!CDIR NODEP !*VOCL LOOP,NOVREC !$acc loop independent private( II, IJ ) - DO IJK=1,KLOT - II = IIL - 1 + (IJK - 1 ) * KINC3 - IJ = IJBASE + IIL - 1 + (IJK - 1 ) * KINC4 + DO IIL=0,KLA-1 + II = IJK * KINC3 + IIL + IJ = IJBASE + IJK * KINC4 + IIL PC(IJA+IJ)=PA(IIA+II)+PA(IIB+II) PD(IJA+IJ)=PB(IIA+II)-PB(IIB+II) PC(IJB+IJ)=ZC1*(PA(IIA+II)-PA(IIB+II))-ZS1*(PB(IIA+II)+PB(IIB+II)) @@ -474,14 +474,14 @@ SUBROUTINE RPASSM( PA, PB, PC, PD, PTRIGS, KINC3, KINC4, KLOT, KN, KFAC, KLA, KE !$acc kernels IIBASE=0 !$acc loop independent - DO IIL=1,KLA + DO IJK=0,KLOT-1 !CDIR$ IVDEP !!CDIR NODEP !*VOCL LOOP,NOVREC !$acc loop independent private( II, IJ ) - DO IJK=1,KLOT - II = IIBASE + IIL - 1 + (IJK - 1 ) * KINC3 - IJ = IJBASE + IIL - 1 + (IJK - 1 ) * KINC4 + DO IIL=0,KLA-1 + II = IIBASE + IJK * KINC3 + IIL + IJ = IJBASE + IJK * KINC4 + IIL PC(IJA+IJ)=PA(IIA+II) PC(IJB+IJ)=-PB(IIA+II) END DO @@ -495,14 +495,14 @@ SUBROUTINE RPASSM( PA, PB, PC, PD, PTRIGS, KINC3, KINC4, KLOT, KN, KFAC, KLA, KE !$acc kernels !$acc loop independent - DO IIL=1,KLA + DO IJK=0,KLOT-1 !CDIR$ IVDEP !!CDIR NODEP !*VOCL LOOP,NOVREC !$acc loop independent private( II, IJ ) - DO IJK=1,KLOT - II = IIBASE + IIL - 1 + (IJK - 1 ) * KINC3 - IJ = IJBASE + IIL - 1 + (IJK - 1 ) * KINC4 + DO IIL=0,KLA-1 + II = IIBASE + IJK * KINC3 + IIL + IJ = IJBASE + IJK * KINC4 + IIL PC(IJA+IJ)=2.0*(PA(IIA+II)+PA(IIB+II)) PC(IJB+IJ)=2.0*(PA(IIA+II)-PA(IIB+II)) END DO @@ -529,14 +529,14 @@ SUBROUTINE RPASSM( PA, PB, PC, PD, PTRIGS, KINC3, KINC4, KLOT, KN, KFAC, KLA, KE !$acc kernels !$acc loop independent - DO IIL=1,KLA + DO IJK=0,KLOT-1 !CDIR$ IVDEP !!CDIR NODEP !*VOCL LOOP,NOVREC !$acc loop independent private( II, IJ ) - DO IJK=1,KLOT - II = IIBASE + IIL - 1 + (IJK - 1 ) * KINC3 - IJ = IJBASE + IIL - 1 + (IJK - 1 ) * KINC4 + DO IIL=0,KLA-1 + II = IIBASE + IJK * KINC3 + IIL + IJ = IJBASE + IJK * KINC4 + IIL PC(IJA+IJ)=PA(IIA+II)+PA(IIB+II) PC(IJB+IJ)=(PA(IIA+II)-0.5*PA(IIB+II))-(XSIN60*(PB(IIB+II))) PC(IJC+IJ)=(PA(IIA+II)-0.5*PA(IIB+II))+(XSIN60*(PB(IIB+II))) @@ -570,14 +570,14 @@ SUBROUTINE RPASSM( PA, PB, PC, PD, PTRIGS, KINC3, KINC4, KLOT, KN, KFAC, KLA, KE IIB = IIB0 + (IIK-KLA)/KLA * IINK IIC = IIC0 - (IIK-KLA)/KLA * IINK !$acc loop independent - DO IIL=1,KLA + DO IJK=0,KLOT-1 !CDIR$ IVDEP !!CDIR NODEP !*VOCL LOOP,NOVREC !$acc loop independent private( II, IJ ) - DO IJK=1,KLOT - II = IIL - 1 + (IJK - 1 ) * KINC3 - IJ = IJBASE + IIL - 1 + (IJK - 1 ) * KINC4 + DO IIL=0,KLA-1 + II = IJK * KINC3 + IIL + IJ = IJBASE + IJK * KINC4 + IIL PC(IJA+IJ)=PA(IIA+II)+(PA(IIB+II)+PA(IIC+II)) PD(IJA+IJ)=PB(IIA+II)+(PB(IIB+II)-PB(IIC+II)) PC(IJB+IJ)= & @@ -614,14 +614,14 @@ SUBROUTINE RPASSM( PA, PB, PC, PD, PTRIGS, KINC3, KINC4, KLOT, KN, KFAC, KLA, KE !$acc kernels IIBASE=0 !$acc loop independent - DO IIL=1,KLA + DO IJK=0,KLOT-1 !CDIR$ IVDEP !!CDIR NODEP !*VOCL LOOP,NOVREC !$acc loop independent private( II, IJ ) - DO IJK=1,KLOT - II = IIBASE + IIL - 1 + (IJK - 1 ) * KINC3 - IJ = IJBASE + IIL - 1 + (IJK - 1 ) * KINC4 + DO IIL=0,KLA-1 + II = IIBASE + IJK * KINC3 + IIL + IJ = IJBASE + IJK * KINC4 + IIL PC(IJA+IJ)=PA(IIA+II)+PA(IIB+II) PC(IJB+IJ)=(0.5*PA(IIA+II)-PA(IIB+II))-(XSIN60*PB(IIA+II)) PC(IJC+IJ)=-(0.5*PA(IIA+II)-PA(IIB+II))-(XSIN60*PB(IIA+II)) @@ -637,14 +637,14 @@ SUBROUTINE RPASSM( PA, PB, PC, PD, PTRIGS, KINC3, KINC4, KLOT, KN, KFAC, KLA, KE !$acc kernels ZSSIN60=2.0*XSIN60 !$acc loop independent - DO IIL=1,KLA + DO IJK=0,KLOT-1 !CDIR$ IVDEP !!CDIR NODEP !*VOCL LOOP,NOVREC !$acc loop independent private( II, IJ ) - DO IJK=1,KLOT - II = IIBASE + IIL - 1 + (IJK - 1 ) * KINC3 - IJ = IJBASE + IIL - 1 + (IJK - 1 ) * KINC4 + DO IIL=0,KLA-1 + II = IIBASE + IJK * KINC3 + IIL + IJ = IJBASE + IJK * KINC4 + IIL PC(IJA+IJ)=2.0*(PA(IIA+II)+PA(IIB+II)) PC(IJB+IJ)=(2.0*PA(IIA+II)-PA(IIB+II))-(ZSSIN60*PB(IIB+II)) PC(IJC+IJ)=(2.0*PA(IIA+II)-PA(IIB+II))+(ZSSIN60*PB(IIB+II)) @@ -675,14 +675,14 @@ SUBROUTINE RPASSM( PA, PB, PC, PD, PTRIGS, KINC3, KINC4, KLOT, KN, KFAC, KLA, KE !$acc kernels !$acc loop independent - DO IIL=1,KLA + DO IJK=0,KLOT-1 !CDIR$ IVDEP !!CDIR NODEP !*VOCL LOOP,NOVREC !$acc loop independent private( II, IJ ) - DO IJK=1,KLOT - II = IIBASE + IIL - 1 + (IJK - 1 ) * KINC3 - IJ = IJBASE + IIL - 1 + (IJK - 1 ) * KINC4 + DO IIL=0,KLA-1 + II = IIBASE + IJK * KINC3 + IIL + IJ = IJBASE + IJK * KINC4 + IIL PC(IJA+IJ)=(PA(IIA+II)+PA(IIC+II))+PA(IIB+II) PC(IJB+IJ)=(PA(IIA+II)-PA(IIC+II))-PB(IIB+II) PC(IJC+IJ)=(PA(IIA+II)+PA(IIC+II))-PA(IIB+II) @@ -723,14 +723,14 @@ SUBROUTINE RPASSM( PA, PB, PC, PD, PTRIGS, KINC3, KINC4, KLOT, KN, KFAC, KLA, KE IIC = IIC0 - (IIK-KLA)/KLA * IINK IID = IID0 - (IIK-KLA)/KLA * IINK !$acc loop independent - DO IIL=1,KLA + DO IJK=0,KLOT-1 !CDIR$ IVDEP !!CDIR NODEP !*VOCL LOOP,NOVREC !$acc loop independent private( II, IJ ) - DO IJK=1,KLOT - II = IIL - 1 + (IJK - 1 ) * KINC3 - IJ = IJBASE + IIL - 1 + (IJK - 1 ) * KINC4 + DO IIL=0,KLA-1 + II = IJK * KINC3 + IIL + IJ = IJBASE + IJK * KINC4 + IIL PC(IJA+IJ)=(PA(IIA+II)+PA(IIC+II))+(PA(IIB+II)+PA(IID+II)) PD(IJA+IJ)=(PB(IIA+II)-PB(IIC+II))+(PB(IIB+II)-PB(IID+II)) PC(IJC+IJ)= & @@ -767,14 +767,14 @@ SUBROUTINE RPASSM( PA, PB, PC, PD, PTRIGS, KINC3, KINC4, KLOT, KN, KFAC, KLA, KE IIBASE=0 ZSIN45=SQRT(0.5) !$acc loop independent - DO IIL=1,KLA + DO IJK=0,KLOT-1 !CDIR$ IVDEP !!CDIR NODEP !*VOCL LOOP,NOVREC !$acc loop independent private( II, IJ ) - DO IJK=1,KLOT - II = IIBASE + IIL - 1 + (IJK - 1 ) * KINC3 - IJ = IJBASE + IIL - 1 + (IJK - 1 ) * KINC4 + DO IIL=0,KLA-1 + II = IIBASE + IJK * KINC3 + IIL + IJ = IJBASE + IJK * KINC4 + IIL PC(IJA+IJ)=PA(IIA+II)+PA(IIB+II) PC(IJB+IJ)=ZSIN45*((PA(IIA+II)-PA(IIB+II))-(PB(IIA+II)+PB(IIB+II))) PC(IJC+IJ)=PB(IIB+II)-PB(IIA+II) @@ -790,14 +790,14 @@ SUBROUTINE RPASSM( PA, PB, PC, PD, PTRIGS, KINC3, KINC4, KLOT, KN, KFAC, KLA, KE !$acc kernels !$acc loop independent - DO IIL=1,KLA + DO IJK=0,KLOT-1 !CDIR$ IVDEP !!CDIR NODEP !*VOCL LOOP,NOVREC !$acc loop independent private( II, IJ ) - DO IJK=1,KLOT - II = IIBASE + IIL - 1 + (IJK - 1 ) * KINC3 - IJ = IJBASE + IIL - 1 + (IJK - 1 ) * KINC4 + DO IIL=0,KLA-1 + II = IIBASE + IJK * KINC3 + IIL + IJ = IJBASE + IJK * KINC4 + IIL PC(IJA+IJ)=2.0*((PA(IIA+II)+PA(IIC+II))+PA(IIB+II)) PC(IJB+IJ)=2.0*((PA(IIA+II)-PA(IIC+II))-PB(IIB+II)) PC(IJC+IJ)=2.0*((PA(IIA+II)+PA(IIC+II))-PA(IIB+II)) @@ -831,14 +831,14 @@ SUBROUTINE RPASSM( PA, PB, PC, PD, PTRIGS, KINC3, KINC4, KLOT, KN, KFAC, KLA, KE !$acc kernels !$acc loop independent - DO IIL=1,KLA + DO IJK=0,KLOT-1 !CDIR$ IVDEP !!CDIR NODEP !*VOCL LOOP,NOVREC !$acc loop independent private( II, IJ ) - DO IJK=1,KLOT - II = IIBASE + IIL - 1 + (IJK - 1 ) * KINC3 - IJ = IJBASE + IIL - 1 + (IJK - 1 ) * KINC4 + DO IIL=0,KLA-1 + II = IIBASE + IJK * KINC3 + IIL + IJ = IJBASE + IJK * KINC4 + IIL PC(IJA+IJ)=PA(IIA+II)+(PA(IIB+II)+PA(IIC+II)) PC(IJB+IJ)=((PA(IIA+II)-0.25*(PA(IIB+II)+PA(IIC+II)))+XQRT5*(PA(IIB+II)-PA(IIC+ & II))) & @@ -894,14 +894,14 @@ SUBROUTINE RPASSM( PA, PB, PC, PD, PTRIGS, KINC3, KINC4, KLOT, KN, KFAC, KLA, KE IID = IID0 - (IIK-KLA)/KLA * IINK IIE = IIE0 - (IIK-KLA)/KLA * IINK !$acc loop independent - DO IIL=1,KLA + DO IJK=0,KLOT-1 !CDIR$ IVDEP !!CDIR NODEP !*VOCL LOOP,NOVREC !$acc loop independent private( II, IJ, ZA10, ZA11, ZA20, ZA21, ZB10, ZB11, ZB20, ZB21 ) - DO IJK=1,KLOT - II = IIL - 1 + (IJK - 1 ) * KINC3 - IJ = IJBASE + IIL - 1 + (IJK - 1 ) * KINC4 + DO IIL=0,KLA-1 + II = IJK * KINC3 + IIL + IJ = IJBASE + IJK * KINC4 + IIL ZA10=(PA(IIA+II)-0.25*((PA(IIB+II)+PA(IIE+II))+(PA(IIC+II)+PA(IID+II)))) & +XQRT5*((PA(IIB+II)+PA(IIE+II))-(PA(IIC+II)+PA(IID+II))) @@ -942,14 +942,14 @@ SUBROUTINE RPASSM( PA, PB, PC, PD, PTRIGS, KINC3, KINC4, KLOT, KN, KFAC, KLA, KE !$acc kernels IIBASE=0 !$acc loop independent - DO IIL=1,KLA + DO IJK=0,KLOT-1 !CDIR$ IVDEP !!CDIR NODEP !*VOCL LOOP,NOVREC !$acc loop independent private( II, IJ ) - DO IJK=1,KLOT - II = IIBASE + IIL - 1 + (IJK - 1 ) * KINC3 - IJ = IJBASE + IIL - 1 + (IJK - 1 ) * KINC4 + DO IIL=0,KLA-1 + II = IIBASE + IJK * KINC3 + IIL + IJ = IJBASE + IJK * KINC4 + IIL PC(IJA+IJ)=(PA(IIA+II)+PA(IIB+II))+PA(IIC+II) PC(IJB+IJ)=(XQRT5*(PA(IIA+II)-PA(IIB+II))+(0.25*(PA(IIA+II)+PA(IIB+II))-PA(IIC+ & II))) & @@ -977,14 +977,14 @@ SUBROUTINE RPASSM( PA, PB, PC, PD, PTRIGS, KINC3, KINC4, KLOT, KN, KFAC, KLA, KE ZSSIN36=2.0*XSIN36 ZSSIN72=2.0*XSIN72 !$acc loop independent - DO IIL=1,KLA + DO IJK=0,KLOT-1 !CDIR$ IVDEP !!CDIR NODEP !*VOCL LOOP,NOVREC !$acc loop independent private( II, IJ ) - DO IJK=1,KLOT - II = IIBASE + IIL - 1 + (IJK - 1 ) * KINC3 - IJ = IJBASE + IIL - 1 + (IJK - 1 ) * KINC4 + DO IIL=0,KLA-1 + II = IIBASE + IJK * KINC3 + IIL + IJ = IJBASE + IJK * KINC4 + IIL PC(IJA+IJ)=2.0*(PA(IIA+II)+(PA(IIB+II)+PA(IIC+II))) PC(IJB+IJ)=(2.0*(PA(IIA+II)-0.25*(PA(IIB+II)+PA(IIC+II))) & +ZQQRT5*(PA(IIB+II)-PA(IIC+II)))-(ZSSIN72*PB(IIB+II)+ZSSIN36*PB(IIC+II)) @@ -1025,14 +1025,14 @@ SUBROUTINE RPASSM( PA, PB, PC, PD, PTRIGS, KINC3, KINC4, KLOT, KN, KFAC, KLA, KE !$acc kernels !$acc loop independent - DO IIL=1,KLA + DO IJK=0,KLOT-1 !CDIR$ IVDEP !!CDIR NODEP !*VOCL LOOP,NOVREC !$acc loop independent private( II, IJ ) - DO IJK=1,KLOT - II = IIBASE + IIL - 1 + (IJK - 1 ) * KINC3 - IJ = IJBASE + IIL - 1 + (IJK - 1 ) * KINC4 + DO IIL=0,KLA-1 + II = IIBASE + IJK * KINC3 + IIL + IJ = IJBASE + IJK * KINC4 + IIL PC(IJA+IJ)=(PA(IIA+II)+PA(IID+II))+(PA(IIB+II)+PA(IIC+II)) PC(IJD+IJ)=(PA(IIA+II)-PA(IID+II))-(PA(IIB+II)-PA(IIC+II)) PC(IJB+IJ)=((PA(IIA+II)-PA(IID+II))+0.5*(PA(IIB+II)-PA(IIC+II))) & @@ -1092,14 +1092,14 @@ SUBROUTINE RPASSM( PA, PB, PC, PD, PTRIGS, KINC3, KINC4, KLOT, KN, KFAC, KLA, KE IIE = IIE0 - (IIK-KLA)/KLA * IINK IIF = IIF0 - (IIK-KLA)/KLA * IINK !$acc loop independent - DO IIL=1,KLA + DO IJK=0,KLOT-1 !CDIR$ IVDEP !!CDIR NODEP !*VOCL LOOP,NOVREC !$acc loop independent private( II, IJ, ZA11, ZA20, ZA21, ZB11, ZB20, ZB21 ) - DO IJK=1,KLOT - II = IIL - 1 + (IJK - 1 ) * KINC3 - IJ = IJBASE + IIL - 1 + (IJK - 1 ) * KINC4 + DO IIL=0,KLA-1 + II = IJK * KINC3 + IIL + IJ = IJBASE + IJK * KINC4 + IIL ZA11= (PA(IIE+II)+PA(IIB+II))+(PA(IIC+II)+PA(IIF+II)) ZA20=(PA(IIA+II)+PA(IID+II))-0.5*ZA11 @@ -1147,14 +1147,14 @@ SUBROUTINE RPASSM( PA, PB, PC, PD, PTRIGS, KINC3, KINC4, KLOT, KN, KFAC, KLA, KE !$acc kernels IIBASE=0 !$acc loop independent - DO IIL=1,KLA + DO IJK=0,KLOT-1 !CDIR$ IVDEP !!CDIR NODEP !*VOCL LOOP,NOVREC !$acc loop independent private( II, IJ ) - DO IJK=1,KLOT - II = IIBASE + IIL - 1 + (IJK - 1 ) * KINC3 - IJ = IJBASE + IIL - 1 + (IJK - 1 ) * KINC4 + DO IIL=0,KLA-1 + II = IIBASE + IJK * KINC3 + IIL + IJ = IJBASE + IJK * KINC4 + IIL PC(IJA+IJ)=PA(IIB+II)+(PA(IIA+II)+PA(IIC+II)) PC(IJD+IJ)=PB(IIB+II)-(PB(IIA+II)+PB(IIC+II)) PC(IJB+IJ)=(XSIN60*(PA(IIA+II)-PA(IIC+II)))-(0.5*(PB(IIA+II)+PB(IIC+II))+PB(IIB+ & @@ -1175,14 +1175,14 @@ SUBROUTINE RPASSM( PA, PB, PC, PD, PTRIGS, KINC3, KINC4, KLOT, KN, KFAC, KLA, KE !$acc kernels ZSSIN60=2.0*XSIN60 !$acc loop independent - DO IIL=1,KLA + DO IJK=0,KLOT-1 !CDIR$ IVDEP !!CDIR NODEP !*VOCL LOOP,NOVREC !$acc loop independent private( II, IJ ) - DO IJK=1,KLOT - II = IIBASE + IIL - 1 + (IJK - 1 ) * KINC3 - IJ = IJBASE + IIL - 1 + (IJK - 1 ) * KINC4 + DO IIL=0,KLA-1 + II = IIBASE + IJK * KINC3 + IIL + IJ = IJBASE + IJK * KINC4 + IIL PC(IJA+IJ)=(2.0*(PA(IIA+II)+PA(IID+II)))+(2.0*(PA(IIB+II)+PA(IIC+II))) PC(IJD+IJ)=(2.0*(PA(IIA+II)-PA(IID+II)))-(2.0*(PA(IIB+II)-PA(IIC+II))) PC(IJB+IJ)=(2.0*(PA(IIA+II)-PA(IID+II))+(PA(IIB+II)-PA(IIC+II))) & @@ -1227,14 +1227,14 @@ SUBROUTINE RPASSM( PA, PB, PC, PD, PTRIGS, KINC3, KINC4, KLOT, KN, KFAC, KLA, KE ZSSIN45=SQRT(2.0) !$acc loop independent - DO IIL=1,KLA + DO IJK=0,KLOT-1 !CDIR$ IVDEP !!CDIR NODEP !*VOCL LOOP,NOVREC !$acc loop independent private( II, IJ ) - DO IJK=1,KLOT - II = IIBASE + IIL - 1 + (IJK - 1 ) * KINC3 - IJ = IJBASE + IIL - 1 + (IJK - 1 ) * KINC4 + DO IIL=0,KLA-1 + II = IIBASE + IJK * KINC3 + IIL + IJ = IJBASE + IJK * KINC4 + IIL PC(IJA+IJ)=2.0*(((PA(IIA+II)+PA(IIE+II))+PA(IIC+II))+(PA(IIB+II)+PA(IID+II))) PC(IJE+IJ)=2.0*(((PA(IIA+II)+PA(IIE+II))+PA(IIC+II))-(PA(IIB+II)+PA(IID+II))) PC(IJC+IJ)=2.0*(((PA(IIA+II)+PA(IIE+II))-PA(IIC+II))-(PB(IIB+II)-PB(IID+II))) @@ -1350,14 +1350,14 @@ SUBROUTINE QPASSM( PA, PB, PC, PD, PTRIGS, KINC3, KINC4, KLOT, KN, KFAC, KLA, KE !$acc kernels !$acc loop independent - DO IJL=1,KLA + DO IJK=0,KLOT-1 !CDIR$ IVDEP !!CDIR NODEP !*VOCL LOOP,NOVREC !$acc loop independent private( II, IJ ) - DO IJK=1,KLOT - II = IIBASE + IJL - 1 + (IJK - 1 ) * KINC3 - IJ = IJBASE + IJL - 1 + (IJK - 1 ) * KINC4 + DO IJL=0,KLA-1 + II = IIBASE + IJK * KINC3 + IJL + IJ = IJBASE + IJK * KINC4 + IJL PC(IJA+IJ)=PA(IIA+II)+PA(IIB+II) PC(IJB+IJ)=PA(IIA+II)-PA(IIB+II) END DO @@ -1384,14 +1384,14 @@ SUBROUTINE QPASSM( PA, PB, PC, PD, PTRIGS, KINC3, KINC4, KLOT, KN, KFAC, KLA, KE IJA = IJA0 + (IIK-KLA)/KLA * JINK IJB = IJB0 - (IIK-KLA)/KLA * JINK !$acc loop independent - DO IJL=1,KLA + DO IJK=0,KLOT-1 !CDIR$ IVDEP !!CDIR NODEP !*VOCL LOOP,NOVREC !$acc loop independent private( II, IJ ) - DO IJK=1,KLOT - II = IIBASE + IJL - 1 + (IJK - 1 ) * KINC3 - IJ = IJL - 1 + (IJK - 1 ) * KINC4 + DO IJL=0,KLA-1 + II = IIBASE + IJK * KINC3 + IJL + IJ = IJK * KINC4 + IJL PC(IJA+IJ)=PA(IIA+II)+(ZC1*PA(IIB+II)+ZS1*PB(IIB+II)) PC(IJB+IJ)=PA(IIA+II)-(ZC1*PA(IIB+II)+ZS1*PB(IIB+II)) PD(IJA+IJ)=(ZC1*PB(IIB+II)-ZS1*PA(IIB+II))+PB(IIA+II) @@ -1409,14 +1409,14 @@ SUBROUTINE QPASSM( PA, PB, PC, PD, PTRIGS, KINC3, KINC4, KLOT, KN, KFAC, KLA, KE !$acc kernels IJBASE=0 !$acc loop independent - DO IJL=1,KLA + DO IJK=0,KLOT-1 !CDIR$ IVDEP !!CDIR NODEP !*VOCL LOOP,NOVREC !$acc loop independent private( II, IJ ) - DO IJK=1,KLOT - II = IIBASE + IJL - 1 + (IJK - 1 ) * KINC3 - IJ = IJBASE + IJL - 1 + (IJK - 1 ) * KINC4 + DO IJL=0,KLA-1 + II = IIBASE + IJK * KINC3 + IJL + IJ = IJBASE + IJK * KINC4 + IJL PC(IJA+IJ)=PA(IIA+II) PD(IJA+IJ)=-PA(IIB+II) END DO @@ -1431,14 +1431,14 @@ SUBROUTINE QPASSM( PA, PB, PC, PD, PTRIGS, KINC3, KINC4, KLOT, KN, KFAC, KLA, KE !$acc kernels ZZ=1.0/REAL(KN) !$acc loop independent - DO IJL=1,KLA + DO IJK=0,KLOT-1 !CDIR$ IVDEP !!CDIR NODEP !*VOCL LOOP,NOVREC !$acc loop independent private( II, IJ ) - DO IJK=1,KLOT - II = IIBASE + IJL - 1 + (IJK - 1 ) * KINC3 - IJ = IJBASE + IJL - 1 + (IJK - 1 ) * KINC4 + DO IJL=0,KLA-1 + II = IIBASE + IJK * KINC3 + IJL + IJ = IJBASE + IJK * KINC4 + IJL PC(IJA+IJ)=ZZ*(PA(IIA+II)+PA(IIB+II)) PC(IJB+IJ)=ZZ*(PA(IIA+II)-PA(IIB+II)) END DO @@ -1468,14 +1468,14 @@ SUBROUTINE QPASSM( PA, PB, PC, PD, PTRIGS, KINC3, KINC4, KLOT, KN, KFAC, KLA, KE !$acc kernels !$acc loop independent - DO IJL=1,KLA + DO IJK=0,KLOT-1 !CDIR$ IVDEP !!CDIR NODEP !*VOCL LOOP,NOVREC !$acc loop independent private( II, IJ ) - DO IJK=1,KLOT - II = IIBASE + IJL - 1 + (IJK - 1 ) * KINC3 - IJ = IJBASE + IJL - 1 + (IJK - 1 ) * KINC4 + DO IJL=0,KLA-1 + II = IIBASE + IJK * KINC3 + IJL + IJ = IJBASE + IJK * KINC4 + IJL PC(IJA+IJ)=PA(IIA+II)+(PA(IIB+II)+PA(IIC+II)) PC(IJB+IJ)=PA(IIA+II)-0.5*(PA(IIB+II)+PA(IIC+II)) PD(IJB+IJ)=XSIN60*(PA(IIC+II)-PA(IIB+II)) @@ -1509,14 +1509,14 @@ SUBROUTINE QPASSM( PA, PB, PC, PD, PTRIGS, KINC3, KINC4, KLOT, KN, KFAC, KLA, KE IJB = IJB0 + (IIK-KLA)/KLA * JINK IJC = IJC0 - (IIK-KLA)/KLA * JINK !$acc loop independent - DO IJL=1,KLA + DO IJK=0,KLOT-1 !CDIR$ IVDEP !!CDIR NODEP !*VOCL LOOP,NOVREC !$acc loop independent private( II, IJ, ZA1, ZA2, ZA3, ZB1, ZB2, ZB3 ) - DO IJK=1,KLOT - II = IIBASE + IJL - 1 + (IJK - 1 ) * KINC3 - IJ = IJL - 1 + (IJK - 1 ) * KINC4 + DO IJL=0,KLA-1 + II = IIBASE + IJK * KINC3 + IJL + IJ = IJK * KINC4 + IJL ZA1=(ZC1*PA(IIB+II)+ZS1*PB(IIB+II))+(ZC2*PA(IIC+II)+ZS2*PB(IIC+II)) ZB1=(ZC1*PB(IIB+II)-ZS1*PA(IIB+II))+(ZC2*PB(IIC+II)-ZS2*PA(IIC+II)) ZA2=PA(IIA+II)-0.5*ZA1 @@ -1543,14 +1543,14 @@ SUBROUTINE QPASSM( PA, PB, PC, PD, PTRIGS, KINC3, KINC4, KLOT, KN, KFAC, KLA, KE !$acc kernels IJBASE=0 !$acc loop independent - DO IJL=1,KLA + DO IJK=0,KLOT-1 !CDIR$ IVDEP !!CDIR NODEP !*VOCL LOOP,NOVREC !$acc loop independent private( II, IJ ) - DO IJK=1,KLOT - II = IIBASE + IJL - 1 + (IJK - 1 ) * KINC3 - IJ = IJBASE + IJL - 1 + (IJK - 1 ) * KINC4 + DO IJL=0,KLA-1 + II = IIBASE + IJK * KINC3 + IJL + IJ = IJBASE + IJK * KINC4 + IJL PC(IJA+IJ)=PA(IIA+II)+0.5*(PA(IIB+II)-PA(IIC+II)) PD(IJA+IJ)=-XSIN60*(PA(IIB+II)+PA(IIC+II)) PC(IJB+IJ)=PA(IIA+II)-(PA(IIB+II)-PA(IIC+II)) @@ -1567,14 +1567,14 @@ SUBROUTINE QPASSM( PA, PB, PC, PD, PTRIGS, KINC3, KINC4, KLOT, KN, KFAC, KLA, KE ZZ=1.0/REAL(KN) ZZSIN60=ZZ*XSIN60 !$acc loop independent - DO IJL=1,KLA + DO IJK=0,KLOT-1 !CDIR$ IVDEP !!CDIR NODEP !*VOCL LOOP,NOVREC !$acc loop independent private( II, IJ ) - DO IJK=1,KLOT - II = IIBASE + IJL - 1 + (IJK - 1 ) * KINC3 - IJ = IJBASE + IJL - 1 + (IJK - 1 ) * KINC4 + DO IJL=0,KLA-1 + II = IIBASE + IJK * KINC3 + IJL + IJ = IJBASE + IJK * KINC4 + IJL PC(IJA+IJ)=ZZ*(PA(IIA+II)+(PA(IIB+II)+PA(IIC+II))) PC(IJB+IJ)=ZZ*(PA(IIA+II)-0.5*(PA(IIB+II)+PA(IIC+II))) PD(IJB+IJ)=ZZSIN60*(PA(IIC+II)-PA(IIB+II)) @@ -1605,14 +1605,14 @@ SUBROUTINE QPASSM( PA, PB, PC, PD, PTRIGS, KINC3, KINC4, KLOT, KN, KFAC, KLA, KE IF ( KLA /= IM ) THEN !$acc kernels !$acc loop independent - DO IJL=1,KLA + DO IJK=0,KLOT-1 !CDIR$ IVDEP !!CDIR NODEP !*VOCL LOOP,NOVREC !$acc loop independent private( II, IJ ) - DO IJK=1,KLOT - II = IIBASE + IJL - 1 + (IJK - 1 ) * KINC3 - IJ = IJBASE + IJL - 1 + (IJK - 1 ) * KINC4 + DO IJL=0,KLA-1 + II = IIBASE + IJK * KINC3 + IJL + IJ = IJBASE + IJK * KINC4 + IJL PC(IJA+IJ)=(PA(IIA+II)+PA(IIC+II))+(PA(IIB+II)+PA(IID+II)) PC(IJC+IJ)=(PA(IIA+II)+PA(IIC+II))-(PA(IIB+II)+PA(IID+II)) PC(IJB+IJ)=PA(IIA+II)-PA(IIC+II) @@ -1653,14 +1653,14 @@ SUBROUTINE QPASSM( PA, PB, PC, PD, PTRIGS, KINC3, KINC4, KLOT, KN, KFAC, KLA, KE IJC = IJC0 - (IIK-KLA)/KLA * JINK IJD = IJD0 - (IIK-KLA)/KLA * JINK !$acc loop independent - DO IJL=1,KLA + DO IJK=0,KLOT-1 !CDIR$ IVDEP !!CDIR NODEP !*VOCL LOOP,NOVREC !$acc loop independent private( II, IJ, ZA0, ZA1, ZA2, ZA3, ZB0, ZB1, ZB2, ZB3 ) - DO IJK=1,KLOT - II = IIBASE + IJL - 1 + (IJK - 1 ) * KINC3 - IJ = IJL - 1 + (IJK - 1 ) * KINC4 + DO IJL=0,KLA-1 + II = IIBASE + IJK * KINC3 + IJL + IJ = IJK * KINC4 + IJL ZA0=PA(IIA+II)+(ZC2*PA(IIC+II)+ZS2*PB(IIC+II)) ZA2=PA(IIA+II)-(ZC2*PA(IIC+II)+ZS2*PB(IIC+II)) ZA1=(ZC1*PA(IIB+II)+ZS1*PB(IIB+II))+(ZC3*PA(IID+II)+ZS3*PB(IID+II)) @@ -1693,14 +1693,14 @@ SUBROUTINE QPASSM( PA, PB, PC, PD, PTRIGS, KINC3, KINC4, KLOT, KN, KFAC, KLA, KE ZSIN45=SQRT(0.5) IJBASE=0 !$acc loop independent - DO IJL=1,KLA + DO IJK=0,KLOT-1 !CDIR$ IVDEP !!CDIR NODEP !*VOCL LOOP,NOVREC !$acc loop independent private( II, IJ ) - DO IJK=1,KLOT - II = IIBASE + IJL - 1 + (IJK - 1 ) * KINC3 - IJ = IJBASE + IJL - 1 + (IJK - 1 ) * KINC4 + DO IJL=0,KLA-1 + II = IIBASE + IJK * KINC3 + IJL + IJ = IJBASE + IJK * KINC4 + IJL PC(IJA+IJ)=PA(IIA+II)+ZSIN45*(PA(IIB+II)-PA(IID+II)) PC(IJB+IJ)=PA(IIA+II)-ZSIN45*(PA(IIB+II)-PA(IID+II)) PD(IJA+IJ)=-PA(IIC+II)-ZSIN45*(PA(IIB+II)+PA(IID+II)) @@ -1716,14 +1716,14 @@ SUBROUTINE QPASSM( PA, PB, PC, PD, PTRIGS, KINC3, KINC4, KLOT, KN, KFAC, KLA, KE !$acc kernels ZZ=1.0/REAL(KN) !$acc loop independent - DO IJL=1,KLA + DO IJK=0,KLOT-1 !CDIR$ IVDEP !!CDIR NODEP !*VOCL LOOP,NOVREC !$acc loop independent private( II, IJ ) - DO IJK=1,KLOT - II = IIBASE + IJL - 1 + (IJK - 1 ) * KINC3 - IJ = IJBASE + IJL - 1 + (IJK - 1 ) * KINC4 + DO IJL=0,KLA-1 + II = IIBASE + IJK * KINC3 + IJL + IJ = IJBASE + IJK * KINC4 + IJL PC(IJA+IJ)=ZZ*((PA(IIA+II)+PA(IIC+II))+(PA(IIB+II)+PA(IID+II))) PC(IJC+IJ)=ZZ*((PA(IIA+II)+PA(IIC+II))-(PA(IIB+II)+PA(IID+II))) PC(IJB+IJ)=ZZ*(PA(IIA+II)-PA(IIC+II)) @@ -1756,14 +1756,14 @@ SUBROUTINE QPASSM( PA, PB, PC, PD, PTRIGS, KINC3, KINC4, KLOT, KN, KFAC, KLA, KE IF ( KLA /= IM ) THEN !$acc kernels !$acc loop independent - DO IJL=1,KLA + DO IJK=0,KLOT-1 !CDIR$ IVDEP !!CDIR NODEP !*VOCL LOOP,NOVREC !$acc loop independent private( II, IJ ) - DO IJK=1,KLOT - II = IIBASE + IJL - 1 + (IJK - 1 ) * KINC3 - IJ = IJBASE + IJL - 1 + (IJK - 1 ) * KINC4 + DO IJL=0,KLA-1 + II = IIBASE + IJK * KINC3 + IJL + IJ = IJBASE + IJK * KINC4 + IJL ZA1=PA(IIB+II)+PA(IIE+II) ZA3=PA(IIB+II)-PA(IIE+II) ZA2=PA(IIC+II)+PA(IID+II) @@ -1817,15 +1817,15 @@ SUBROUTINE QPASSM( PA, PB, PC, PD, PTRIGS, KINC3, KINC4, KLOT, KN, KFAC, KLA, KE IJD = IJD0 - (IIK-KLA)/KLA * JINK IJE = IJE0 - (IIK-KLA)/KLA * JINK !$acc loop independent - DO IJL=1,KLA + DO IJK=0,KLOT-1 !CDIR$ IVDEP !!CDIR NODEP !*VOCL LOOP,NOVREC !$acc loop independent private( II, IJ, ZA1, ZA2, ZA3, ZA4, ZA5, ZA6, ZA10, ZA11, ZA20, ZA21, & !$acc & ZB1, ZB2, ZB3, ZB4, ZB5, ZB6, ZB10, ZB11, ZB20, ZB21 ) - DO IJK=1,KLOT - II = IIBASE + IJL - 1 + (IJK - 1 ) * KINC3 - IJ = IJL - 1 + (IJK - 1 ) * KINC4 + DO IJL=0,KLA-1 + II = IIBASE + IJK * KINC3 + IJL + IJ = IJK * KINC4 + IJL ZA1=(ZC1*PA(IIB+II)+ZS1*PB(IIB+II))+(ZC4*PA(IIE+II)+ZS4*PB(IIE+II)) ZA3=(ZC1*PA(IIB+II)+ZS1*PB(IIB+II))-(ZC4*PA(IIE+II)+ZS4*PB(IIE+II)) ZA2=(ZC2*PA(IIC+II)+ZS2*PB(IIC+II))+(ZC3*PA(IID+II)+ZS3*PB(IID+II)) @@ -1872,14 +1872,14 @@ SUBROUTINE QPASSM( PA, PB, PC, PD, PTRIGS, KINC3, KINC4, KLOT, KN, KFAC, KLA, KE !$acc kernels IJBASE=0 !$acc loop independent - DO IJL=1,KLA + DO IJK=0,KLOT-1 !CDIR$ IVDEP !!CDIR NODEP !*VOCL LOOP,NOVREC !$acc loop independent private( II, IJ, ZA1, ZA2, ZA3, ZA4, ZA5, ZA6 ) - DO IJK=1,KLOT - II = IIBASE + IJL - 1 + (IJK - 1 ) * KINC3 - IJ = IJBASE + IJL - 1 + (IJK - 1 ) * KINC4 + DO IJL=0,KLA-1 + II = IIBASE + IJK * KINC3 + IJL + IJ = IJBASE + IJK * KINC4 + IJL ZA1=PA(IIB+II)+PA(IIE+II) ZA3=PA(IIB+II)-PA(IIE+II) ZA2=PA(IIC+II)+PA(IID+II) @@ -1905,14 +1905,14 @@ SUBROUTINE QPASSM( PA, PB, PC, PD, PTRIGS, KINC3, KINC4, KLOT, KN, KFAC, KLA, KE ZZSIN36=ZZ*XSIN36 ZZSIN72=ZZ*XSIN72 !$acc loop independent - DO IJL=1,KLA + DO IJK=0,KLOT-1 !CDIR$ IVDEP !!CDIR NODEP !*VOCL LOOP,NOVREC !$acc loop independent private( II, IJ, ZA1, ZA2, ZA3, ZA4, ZA5, ZA6 ) - DO IJK=1,KLOT - II = IIBASE + IJL - 1 + (IJK - 1 ) * KINC3 - IJ = IJBASE + IJL - 1 + (IJK - 1 ) * KINC4 + DO IJL=0,KLA-1 + II = IIBASE + IJK * KINC3 + IJL + IJ = IJBASE + IJK * KINC4 + IJL ZA1=PA(IIB+II)+PA(IIE+II) ZA3=PA(IIB+II)-PA(IIE+II) ZA2=PA(IIC+II)+PA(IID+II) @@ -1955,14 +1955,14 @@ SUBROUTINE QPASSM( PA, PB, PC, PD, PTRIGS, KINC3, KINC4, KLOT, KN, KFAC, KLA, KE !$acc kernels !$acc loop independent - DO IJL=1,KLA + DO IJK=0,KLOT-1 !CDIR$ IVDEP !!CDIR NODEP !*VOCL LOOP,NOVREC !$acc loop independent private( II, IJ, ZA11 ) - DO IJK=1,KLOT - II = IIBASE + IJL - 1 + (IJK - 1 ) * KINC3 - IJ = IJBASE + IJL - 1 + (IJK - 1 ) * KINC4 + DO IJL=0,KLA-1 + II = IIBASE + IJK * KINC3 + IJL + IJ = IJBASE + IJK * KINC4 + IJL ZA11=(PA(IIC+II)+PA(IIF+II))+(PA(IIB+II)+PA(IIE+II)) PC(IJA+IJ)=(PA(IIA+II)+PA(IID+II))+ZA11 PC(IJC+IJ)=(PA(IIA+II)+PA(IID+II)-0.5*ZA11) @@ -2020,14 +2020,14 @@ SUBROUTINE QPASSM( PA, PB, PC, PD, PTRIGS, KINC3, KINC4, KLOT, KN, KFAC, KLA, KE IJE = IJE0 - (IIK-KLA)/KLA * JINK IJF = IJF0 - (IIK-KLA)/KLA * JINK !$acc loop independent - DO IJL=1,KLA + DO IJK=0,KLOT-1 !CDIR$ IVDEP !!CDIR NODEP !*VOCL LOOP,NOVREC !$acc loop independent private( II, IJ, ZA1, ZA2, ZA3, ZA4, ZA5, ZA11, ZA20, ZA21, ZB1, ZB2, ZB3, ZB4, ZB5, ZB11, ZB20, ZB21 ) - DO IJK=1,KLOT - II = IIBASE + IJL - 1 + (IJK - 1 ) * KINC3 - IJ = IJL - 1 + (IJK - 1 ) * KINC4 + DO IJL=0,KLA-1 + II = IIBASE + IJK * KINC3 + IJL + IJ = IJK * KINC4 + IJL ZA1=ZC1*PA(IIB+II)+ZS1*PB(IIB+II) ZB1=ZC1*PB(IIB+II)-ZS1*PA(IIB+II) ZA2=ZC2*PA(IIC+II)+ZS2*PB(IIC+II) @@ -2079,14 +2079,14 @@ SUBROUTINE QPASSM( PA, PB, PC, PD, PTRIGS, KINC3, KINC4, KLOT, KN, KFAC, KLA, KE !$acc kernels IJBASE=0 !$acc loop independent - DO IJL=1,KLA + DO IJK=0,KLOT-1 !CDIR$ IVDEP !!CDIR NODEP !*VOCL LOOP,NOVREC !$acc loop independent private( II, IJ ) - DO IJK=1,KLOT - II = IIBASE + IJL - 1 + (IJK - 1 ) * KINC3 - IJ = IJBASE + IJL - 1 + (IJK - 1 ) * KINC4 + DO IJL=0,KLA-1 + II = IIBASE + IJK * KINC3 + IJL + IJ = IJBASE + IJK * KINC4 + IJL PC(IJA+IJ)=(PA(IIA+II)+0.5*(PA(IIC+II)-PA(IIE+II)))+ XSIN60*(PA(IIB+II)-PA(IIF+II)) PD(IJA+IJ)=-(PA(IID+II)+0.5*(PA(IIB+II)+PA(IIF+II)))-XSIN60*(PA(IIC+II)+PA(IIE+II)) PC(IJB+IJ)=PA(IIA+II)-(PA(IIC+II)-PA(IIE+II)) @@ -2106,14 +2106,14 @@ SUBROUTINE QPASSM( PA, PB, PC, PD, PTRIGS, KINC3, KINC4, KLOT, KN, KFAC, KLA, KE ZZ=1.0/REAL(KN) ZZSIN60=ZZ*XSIN60 !$acc loop independent - DO IJL=1,KLA + DO IJK=0,KLOT-1 !CDIR$ IVDEP !!CDIR NODEP !*VOCL LOOP,NOVREC !$acc loop independent private( II, IJ, ZA11 ) - DO IJK=1,KLOT - II = IIBASE + IJL - 1 + (IJK - 1 ) * KINC3 - IJ = IJBASE + IJL - 1 + (IJK - 1 ) * KINC4 + DO IJL=0,KLA-1 + II = IIBASE + IJK * KINC3 + IJL + IJ = IJBASE + IJK * KINC4 + IJL ZA11=(PA(IIC+II)+PA(IIF+II))+(PA(IIB+II)+PA(IIE+II)) PC(IJA+IJ)=ZZ*((PA(IIA+II)+PA(IID+II))+ZA11) PC(IJC+IJ)=ZZ*((PA(IIA+II)+PA(IID+II))-0.5*ZA11) @@ -2158,14 +2158,14 @@ SUBROUTINE QPASSM( PA, PB, PC, PD, PTRIGS, KINC3, KINC4, KLOT, KN, KFAC, KLA, KE ZZSIN45=ZZ*SQRT(0.5) !$acc loop independent - DO IJL=1,KLA + DO IJK=0,KLOT-1 !CDIR$ IVDEP !!CDIR NODEP !*VOCL LOOP,NOVREC !$acc loop independent private( II, IJ ) - DO IJK=1,KLOT - II = IIBASE + IJL - 1 + (IJK - 1 ) * KINC3 - IJ = IJBASE + IJL - 1 + (IJK - 1 ) * KINC4 + DO IJL=0,KLA-1 + II = IIBASE + IJK * KINC3 + IJL + IJ = IJBASE + IJK * KINC4 + IJL PC(IJA+IJ)=ZZ*(((PA(IIA+II)+PA(IIE+II))+(PA(IIC+II)+PA(IIG+II)))+ & ((PA(IID+II)+PA(IIH+II))+(PA(IIB+II)+PA(IIF+II)))) PC(IJE+IJ)=ZZ*(((PA(IIA+II)+PA(IIE+II))+(PA(IIC+II)+PA(IIG+II)))- & -- GitLab