From 4e676ee7d62679db6ccf78bc573a99c002f62ed0 Mon Sep 17 00:00:00 2001 From: Philippe WAUTELET <philippe.wautelet@aero.obs-mip.fr> Date: Mon, 15 May 2023 16:18:18 +0200 Subject: [PATCH] Philippe 15/05/2023: FFT: respect DOCTOR norm --- src/MNH/fft.f90 | 2779 ++++++++++++++++++++++++----------------------- 1 file changed, 1391 insertions(+), 1388 deletions(-) diff --git a/src/MNH/fft.f90 b/src/MNH/fft.f90 index 1cdf1a0ef..e75f2032a 100644 --- a/src/MNH/fft.f90 +++ b/src/MNH/fft.f90 @@ -28,171 +28,171 @@ REAL, PARAMETER :: XQRT5 = 0.559016994374947 CONTAINS -SUBROUTINE SET99( TRIGS, IFAX, N ) - ! SUBROUTINE 'SET99' - COMPUTES FACTORS OF N & TRIGONOMETRIC +SUBROUTINE SET99( PTRIGS, KFAX, KN ) + ! SUBROUTINE 'SET99' - COMPUTES FACTORS OF KN & TRIGONOMETRIC ! FUNCTIONS REQUIRED BY FFT991 IMPLICIT NONE - REAL, DIMENSION(N), INTENT(INOUT) :: TRIGS - INTEGER, DIMENSION(19), INTENT(INOUT) :: IFAX - INTEGER, INTENT(IN) :: N + REAL, DIMENSION(KN), INTENT(INOUT) :: PTRIGS + INTEGER, DIMENSION(19), INTENT(INOUT) :: KFAX + INTEGER, INTENT(IN) :: KN - INTEGER, PARAMETER, DIMENSION(7) :: NLFAX = [ 6, 8, 5, 4, 3, 2, 1 ] + INTEGER, PARAMETER, DIMENSION(7) :: ILFAX = [ 6, 8, 5, 4, 3, 2, 1 ] - INTEGER :: I, IFAC, IL, K, NFAX, NU - INTEGER, DIMENSION(10) :: JFAX - REAL :: ANGLE, DEL + INTEGER :: II, IFAC, IL, IK, IFAX, IU + INTEGER, DIMENSION(10) :: IJFAX + REAL :: ZANGLE, ZDEL - DEL=4.0*ASIN(1.0)/REAL(N) - DO K = 0, (N/2)-1 - ANGLE=REAL(K)*DEL - TRIGS(2*K+1)=COS(ANGLE) - TRIGS(2*K+2)=SIN(ANGLE) + ZDEL=4.0*ASIN(1.0)/REAL(KN) + DO IK = 0, (KN/2)-1 + ZANGLE=REAL(IK)*ZDEL + PTRIGS(2*IK+1)=COS(ZANGLE) + PTRIGS(2*IK+2)=SIN(ZANGLE) END DO - ! FIND FACTORS OF N (8,6,5,4,3,2; ONLY ONE 8 ALLOWED) + ! FIND FACTORS OF KN (8,6,5,4,3,2; ONLY ONE 8 ALLOWED) ! LOOK FOR SIXES FIRST, STORE FACTORS IN DESCENDING ORDER - NU=N + IU=KN IFAC=6 - K=0 + IK=0 IL=1 DO - IF ( MOD(NU,IFAC) == 0 ) THEN - K=K+1 - JFAX(K)=IFAC - IF ( IFAC == 8 .AND. K /= 1 ) THEN - JFAX(1)=8 - JFAX(K)=6 + IF ( MOD(IU,IFAC) == 0 ) THEN + IK=IK+1 + IJFAX(IK)=IFAC + IF ( IFAC == 8 .AND. IK /= 1 ) THEN + IJFAX(1)=8 + IJFAX(IK)=6 END IF - NU=NU/IFAC - IF ( NU == 1 ) EXIT + IU=IU/IFAC + IF ( IU == 1 ) EXIT IF ( IFAC /= 8 ) CYCLE END IF IL=IL+1 - IFAC=NLFAX(IL) + IFAC=ILFAX(IL) IF ( IFAC <= 1 ) CALL PRINT_MSG( NVERB_FATAL, 'GEN', 'SET99', 'illegal factor' ) END DO ! NOW REVERSE ORDER OF FACTORS - NFAX=K - IFAX(1)=NFAX - DO I=1,NFAX - IFAX(NFAX+2-I)=JFAX(I) + IFAX=IK + KFAX(1)=IFAX + DO II=1,IFAX + KFAX(IFAX+2-II)=IJFAX(II) END DO - IFAX(10)=N + KFAX(10)=KN END SUBROUTINE SET99 -SUBROUTINE FFT991( A, WORK, TRIGS, IFAX, JUMP, N, ILOT, ISIGN, KSZA, KSZW, KSZT ) +SUBROUTINE FFT991( PA, PWORK, PTRIGS, PFAX, KJUMP, KN, KLOT, KSIGN, KSZA, KSZW, KSZT ) USE MODE_MPPDB IMPLICIT NONE - REAL, DIMENSION(KSZA), INTENT(INOUT) :: A - REAL, DIMENSION(KSZW), INTENT(OUT) :: WORK - REAL, DIMENSION(KSZT), INTENT(IN) :: TRIGS - INTEGER, DIMENSION(19), INTENT(IN) :: IFAX - INTEGER, INTENT(IN) :: JUMP, N, ILOT, ISIGN - INTEGER, INTENT(IN) :: KSZA, KSZW, KSZT + REAL, DIMENSION(KSZA), INTENT(INOUT) :: PA + REAL, DIMENSION(KSZW), INTENT(OUT) :: PWORK + REAL, DIMENSION(KSZT), INTENT(IN) :: PTRIGS + INTEGER, DIMENSION(19), INTENT(IN) :: PFAX + INTEGER, INTENT(IN) :: KJUMP, KN, KLOT, KSIGN + INTEGER, INTENT(IN) :: KSZA, KSZW, KSZT ! ! SUBROUTINE 'FFT991' - MULTIPLE FAST REAL PERIODIC TRANSFORM ! SUPERSEDES PREVIOUS ROUTINE 'FFT991' ! -! REAL TRANSFORM OF LENGTH N PERFORMED BY REMOVING REDUNDANT -! OPERATIONS FROM COMPLEX TRANSFORM OF LENGTH N +! REAL TRANSFORM OF LENGTH KN PERFORMED BY REMOVING REDUNDANT +! OPERATIONS FROM COMPLEX TRANSFORM OF LENGTH KN ! -! A IS THE ARRAY CONTAINING INPUT & OUTPUT DATA -! WORK IS AN AREA OF SIZE (N+1)*MIN(ILOT,LOT) -! TRIGS IS A PREVIOUSLY PREPARED LIST OF TRIG FUNCTION VALUES -! IFAX IS A PREVIOUSLY PREPARED LIST OF FACTORS OF N +! PA IS THE ARRAY CONTAINING INPUT & OUTPUT DATA +! PWORK IS AN AREA OF SIZE (KN+1)*MIN(KLOT,LOT) +! PTRIGS IS PA PREVIOUSLY PREPARED LIST OF TRIG FUNCTION VALUES +! KFAX IS PA PREVIOUSLY PREPARED LIST OF FACTORS OF KN ! JUMP IS THE INCREMENT BETWEEN THE START OF EACH DATA VECTOR -! N IS THE LENGTH OF THE DATA VECTORS -! ILOT IS THE NUMBER OF DATA VECTORS -! ISIGN = +1 FOR TRANSFORM FROM SPECTRAL TO GRIDPOINT +! KN IS THE LENGTH OF THE DATA VECTORS +! KLOT IS THE NUMBER OF DATA VECTORS +! KSIGN = +1 FOR TRANSFORM FROM SPECTRAL TO GRIDPOINT ! = -1 FOR TRANSFORM FROM GRIDPOINT TO SPECTRAL ! ! ORDERING OF COEFFICIENTS: -! A(0),B(0),A(1),B(1),A(2),B(2),...,A(N/2),B(N/2) -! WHERE B(0)=B(N/2)=0; (N+2) LOCATIONS REQUIRED +! PA(0),PB(0),PA(1),PB(1),PA(2),PB(2),...,PA(KN/2),PB(KN/2) +! WHERE PB(0)=PB(KN/2)=0; (KN+2) LOCATIONS REQUIRED ! ! ORDERING OF DATA: -! X(0),X(1),X(2),...,X(N-1), 0 , 0 ; (N+2) LOCATIONS REQUIRED +! X(0),X(1),X(2),...,X(KN-1), 0 , 0 ; (KN+2) LOCATIONS REQUIRED ! ! VECTORIZATION IS ACHIEVED ON CRAY BY DOING THE TRANSFORMS ! IN PARALLEL ! -! N MUST BE COMPOSED OF FACTORS 2,3 & 5 BUT DOES NOT HAVE TO BE EVEN +! KN MUST BE COMPOSED OF FACTORS 2,3 & 5 BUT DOES NOT HAVE TO BE EVEN ! ! DEFINITION OF TRANSFORMS: ! ------------------------- ! -! ISIGN=+1: X(J)=SUM(K=0,...,N-1)(C(K)*EXP(2*I*J*K*PI/N)) -! WHERE C(K)=A(K)+I*B(K) AND C(N-K)=A(K)-I*B(K) +! KSIGN=+1: X(J)=SUM(K=0,...,KN-1)(PC(K)*EXP(2*I*J*K*PI/KN)) +! WHERE PC(K)=PA(K)+I*PB(K) AND PC(KN-K)=PA(K)-I*PB(K) ! -! ISIGN=-1: A(K)=(1/N)*SUM(J=0,...,N-1)(X(J)*COS(2*J*K*PI/N)) -! B(K)=-(1/N)*SUM(J=0,...,N-1)(X(J)*SIN(2*J*K*PI/N)) +! KSIGN=-1: PA(K)=(1/KN)*SUM(J=0,...,KN-1)(X(J)*COS(2*J*K*PI/KN)) +! PB(K)=-(1/KN)*SUM(J=0,...,KN-1)(X(J)*SIN(2*J*K*PI/KN)) ! - INTEGER :: NFAX, NX, NBLOX, NVEX + INTEGER :: IFAX, INX, INBLOX, INVEX INTEGER :: I, J, IA, ISTART, ILA, IGO - INTEGER :: NB, K, IFAC, IERR - INTEGER :: IBASE, JBASE + INTEGER :: INB, IK, IFAC, IERR + INTEGER :: IIBASE, IJBASE INTEGER :: II, JJ, IX, IZ INTEGER :: I0 IF (MPPDB_INITIALIZED) THEN !Check all IN arrays - CALL MPPDB_CHECK( TRIGS, "FFT991 beg:TRIGS" ) - CALL MPPDB_CHECK( IFAX, "FFT991 beg:IFAX" ) + CALL MPPDB_CHECK( PTRIGS, "FFT991 beg:PTRIGS" ) + CALL MPPDB_CHECK( PFAX, "FFT991 beg:KFAX" ) !Check all INOUT arrays - CALL MPPDB_CHECK( A, "FFT991 beg:A" ) + CALL MPPDB_CHECK( PA, "FFT991 beg:PA" ) END IF -!$acc data present( A, WORK ) +!$acc data present( PA, PWORK ) - ! Initialisation of WORK useful to compare results with MPPDB_CHECK (otherwise all values are not set by FFT991 - WORK(:) = 0. + ! Initialisation of PWORK useful to compare results with MPPDB_CHECK (otherwise all values are not set by FFT991 + PWORK(:) = 0. #if 0 - !PW: Original version: but in that case, intent of TRIGS and IFAX are not correct - IF(IFAX(10).NE.N) CALL SET99(TRIGS,IFAX,N) + !PW: Original version: but in that case, intent of PTRIGS and KFAX are not correct + IF(PFAX(10).NE.KN) CALL SET99(PTRIGS,KFAX,KN) #else - IF(IFAX(10).NE.N) CALL PRINT_MSG( NVERB_FATAL, 'GEN', 'FFT991', 'TRIGS and IFAX not initialised (set99 not yet called)' ) + IF(PFAX(10).NE.KN) CALL PRINT_MSG( NVERB_FATAL, 'GEN', 'FFT991', 'PTRIGS and KFAX not initialised (set99 not yet called)' ) #endif - NFAX=IFAX(1) - NX=N+1 - IF (MOD(N,2).EQ.1) NX=N - NBLOX=1+(ILOT-1)/NVECLEN - NVEX=ILOT-(NBLOX-1)*NVECLEN + IFAX=PFAX(1) + INX=KN+1 + IF (MOD(KN,2).EQ.1) INX=KN + INBLOX=1+(KLOT-1)/NVECLEN + INVEX=KLOT-(INBLOX-1)*NVECLEN - IF ( ISIGN == 1 ) THEN + IF ( KSIGN == 1 ) THEN ! -! ISIGN=+1, SPECTRAL TO GRIDPOINT TRANSFORM +! KSIGN=+1, SPECTRAL TO GRIDPOINT TRANSFORM ! ----------------------------------------- ISTART=1 - DO NB=1,NBLOX + DO INB=1,INBLOX IA=ISTART !$acc kernels !CDIR NODEP !*vocl loop,novrec !$acc loop independent private( I ) - DO J=1,NVEX - I = ISTART + ( J - 1 ) * JUMP - A(I+1)=0.5*A(I) + DO J=1,INVEX + I = ISTART + ( J - 1 ) * KJUMP + PA(I+1)=0.5*PA(I) END DO !$acc end kernels - IF ( MOD(N,2) == 0 ) THEN + IF ( MOD(KN,2) == 0 ) THEN !$acc kernels - I0 = ISTART + N + I0 = ISTART + KN !CDIR NODEP !*vocl loop,novrec !$acc loop independent private( I ) - DO J=1,NVEX - I = I0 + ( J - 1 ) * JUMP - A(I)=0.5*A(I) + DO J=1,INVEX + I = I0 + ( J - 1 ) * KJUMP + PA(I)=0.5*PA(I) END DO !$acc end kernels END IF @@ -200,21 +200,21 @@ SUBROUTINE FFT991( A, WORK, TRIGS, IFAX, JUMP, N, ILOT, ISIGN, KSZA, KSZW, KSZT ILA=1 IGO=+1 ! - DO K=1,NFAX - IFAC=IFAX(K+1) + DO IK=1,IFAX + IFAC=PFAX(IK+1) IERR=-1 IF ( IGO == 1 ) THEN - CALL RPASSM(A(IA:),A(IA+ILA:),WORK(1:),WORK(IFAC*ILA+1:), & - TRIGS(:), & - JUMP,NX,NVEX,N,IFAC,ILA,IERR, & - SIZE(A(IA:)),SIZE(A(IA+ILA:)),SIZE(WORK(1:)), & - SIZE(WORK(IFAC*ILA+1:)),SIZE(TRIGS(:))) + CALL RPASSM(PA(IA:),PA(IA+ILA:),PWORK(1:),PWORK(IFAC*ILA+1:), & + PTRIGS(:), & + KJUMP,INX,INVEX,KN,IFAC,ILA,IERR, & + SIZE(PA(IA:)),SIZE(PA(IA+ILA:)),SIZE(PWORK(1:)), & + SIZE(PWORK(IFAC*ILA+1:)),SIZE(PTRIGS(:))) ELSE - CALL RPASSM(WORK(1:),WORK(ILA+1:),A(IA:),A(IA+IFAC*ILA:), & - TRIGS(:), & - NX,JUMP,NVEX,N,IFAC,ILA,IERR, & - SIZE(WORK(1:)),SIZE(WORK(ILA+1:)),SIZE(A(IA:)), & - SIZE(A(IA+IFAC*ILA:)),SIZE(TRIGS(:))) + CALL RPASSM(PWORK(1:),PWORK(ILA+1:),PA(IA:),PA(IA+IFAC*ILA:), & + PTRIGS(:), & + INX,KJUMP,INVEX,KN,IFAC,ILA,IERR, & + SIZE(PWORK(1:)),SIZE(PWORK(ILA+1:)),SIZE(PA(IA:)), & + SIZE(PA(IA+IFAC*ILA:)),SIZE(PTRIGS(:))) END IF IF (IERR.NE.0) GO TO 500 ILA=IFAC*ILA @@ -222,17 +222,17 @@ SUBROUTINE FFT991( A, WORK, TRIGS, IFAX, JUMP, N, ILOT, ISIGN, KSZA, KSZW, KSZT IA=ISTART END DO ! -! IF NECESSARY, COPY RESULTS BACK TO A -! ------------------------------------ - IF ( MOD(NFAX,2) == 1 ) THEN +! IF NECESSARY, COPY RESULTS BACK TO PA +! ------------------------------------- + IF ( MOD(IFAX,2) == 1 ) THEN !$acc kernels !CDIR NODEP !*vocl loop,novrec -!$acc loop independent private( IBASE, JBASE ) - DO JJ = 1, NVEX - IBASE = 1 + (JJ-1) * NX - JBASE = IA + (JJ-1) * JUMP - A(JBASE:JBASE+N-1) = WORK(IBASE:IBASE+N-1) +!$acc loop independent private( IIBASE, IJBASE ) + DO JJ = 1, INVEX + IIBASE = 1 + (JJ-1) * INX + IJBASE = IA + (JJ-1) * KJUMP + PA(IJBASE:IJBASE+KN-1) = PWORK(IIBASE:IIBASE+KN-1) END DO !$acc end kernels END IF @@ -240,81 +240,81 @@ SUBROUTINE FFT991( A, WORK, TRIGS, IFAX, JUMP, N, ILOT, ISIGN, KSZA, KSZW, KSZT ! FILL IN ZEROS AT END ! -------------------- !$acc kernels - A(ISTART+N::JUMP) = 0.0 - A(ISTART+N+1::JUMP) = 0.0 + PA(ISTART+KN::KJUMP) = 0.0 + PA(ISTART+KN+1::KJUMP) = 0.0 !$acc end kernels ! - ISTART=ISTART+NVEX*JUMP - NVEX=NVECLEN + ISTART=ISTART+INVEX*KJUMP + INVEX=NVECLEN END DO - ELSE IF ( ISIGN == -1 ) THEN + ELSE IF ( KSIGN == -1 ) THEN ! -! ISIGN=-1, GRIDPOINT TO SPECTRAL TRANSFORM +! KSIGN=-1, GRIDPOINT TO SPECTRAL TRANSFORM ! ----------------------------------------- ISTART=1 - DO NB=1,NBLOX + DO INB=1,INBLOX IA=ISTART - ILA=N + ILA=KN IGO=+1 ! - DO K=1,NFAX - IFAC=IFAX(NFAX+2-K) + DO IK=1,IFAX + IFAC=PFAX(IFAX+2-IK) ILA=ILA/IFAC IERR=-1 IF ( IGO == 1 ) THEN - CALL QPASSM(A(IA:),A(IA+IFAC*ILA:),WORK(1:),WORK(ILA+1:), & - TRIGS(:), & - JUMP,NX,NVEX,N,IFAC,ILA,IERR, & - SIZE(A(IA:)),SIZE(A(IA+IFAC*ILA:)),SIZE(WORK(1:)), & - SIZE(WORK(ILA+1:)),SIZE(TRIGS(:))) + CALL QPASSM(PA(IA:),PA(IA+IFAC*ILA:),PWORK(1:),PWORK(ILA+1:), & + PTRIGS(:), & + KJUMP,INX,INVEX,KN,IFAC,ILA,IERR, & + SIZE(PA(IA:)),SIZE(PA(IA+IFAC*ILA:)),SIZE(PWORK(1:)), & + SIZE(PWORK(ILA+1:)),SIZE(PTRIGS(:))) ELSE - CALL QPASSM(WORK(1:),WORK(IFAC*ILA+1:),A(IA:),A(IA+ILA:), & - TRIGS(:), & - NX,JUMP,NVEX,N,IFAC,ILA,IERR, & - SIZE(WORK(1:)),SIZE(WORK(IFAC*ILA+1:)),SIZE(A(IA:)), & - SIZE(A(IA+ILA:)),SIZE(TRIGS(:))) + CALL QPASSM(PWORK(1:),PWORK(IFAC*ILA+1:),PA(IA:),PA(IA+ILA:), & + PTRIGS(:), & + INX,KJUMP,INVEX,KN,IFAC,ILA,IERR, & + SIZE(PWORK(1:)),SIZE(PWORK(IFAC*ILA+1:)),SIZE(PA(IA:)), & + SIZE(PA(IA+ILA:)),SIZE(PTRIGS(:))) END IF IF (IERR.NE.0) GO TO 500 IGO=-IGO IA=ISTART+1 END DO ! -! IF NECESSARY, COPY RESULTS BACK TO A -! ------------------------------------ - IF ( MOD(NFAX,2) == 1 ) THEN +! IF NECESSARY, COPY RESULTS BACK TO PA +! ------------------------------------- + IF ( MOD(IFAX,2) == 1 ) THEN !$acc kernels !CDIR NODEP !*vocl loop,novrec -!$acc loop independent private( IBASE, JBASE ) - DO JJ = 1, NVEX - IBASE = 1 + (JJ-1) * NX - JBASE = IA + (JJ-1) * JUMP - A(JBASE:JBASE+N-1) = WORK(IBASE:IBASE+N-1) +!$acc loop independent private( IIBASE, IJBASE ) + DO JJ = 1, INVEX + IIBASE = 1 + (JJ-1) * INX + IJBASE = IA + (JJ-1) * KJUMP + PA(IJBASE:IJBASE+KN-1) = PWORK(IIBASE:IIBASE+KN-1) END DO !$acc end kernels END IF ! -! SHIFT A(0) & FILL IN ZERO IMAG PARTS -! ------------------------------------ +! SHIFT PA(0) & FILL IN ZERO IMAG PARTS +! ------------------------------------- !$acc kernels !CDIR NODEP !*vocl loop,novrec !$acc loop independent private( I ) - DO J=1,NVEX - IX = ISTART + ( J - 1 ) * JUMP - A(IX)=A(IX+1) - A(IX+1)=0.0 + DO J=1,INVEX + IX = ISTART + ( J - 1 ) * KJUMP + PA(IX)=PA(IX+1) + PA(IX+1)=0.0 END DO !$acc end kernels - IF ( MOD(N,2) == 0 ) THEN + IF ( MOD(KN,2) == 0 ) THEN !$acc kernels - A(ISTART+N+1::JUMP) = 0.0 + PA(ISTART+KN+1::KJUMP) = 0.0 !$acc end kernels END IF ! - ISTART=ISTART+NVEX*JUMP - NVEX=NVECLEN + ISTART=ISTART+INVEX*KJUMP + INVEX=NVECLEN END DO END IF ! @@ -329,7 +329,7 @@ SUBROUTINE FFT991( A, WORK, TRIGS, IFAX, JUMP, N, ILOT, ISIGN, KSZA, KSZW, KSZT 540 FORMAT( 'FACTOR =',I3,', NOT CATERED FOR') ELSE WRITE(6,560) IFAC - 560 FORMAT('FACTOR =',I3,', ONLY CATERED FOR IF ILA*IFAC=N') + 560 FORMAT('FACTOR =',I3,', ONLY CATERED FOR IF ILA*IFAC=KN') ENDIF END IF @@ -337,167 +337,167 @@ SUBROUTINE FFT991( A, WORK, TRIGS, IFAX, JUMP, N, ILOT, ISIGN, KSZA, KSZW, KSZT IF (MPPDB_INITIALIZED) THEN !Check all INOUT arrays - CALL MPPDB_CHECK( A, "FFT991 end:A" ) + CALL MPPDB_CHECK( PA, "FFT991 end:PA" ) !Check all OUT arrays - CALL MPPDB_CHECK( WORK, "FFT991 end:WORK" ) + CALL MPPDB_CHECK( PWORK, "FFT991 end:PWORK" ) END IF END SUBROUTINE FFT991 -SUBROUTINE RPASSM(A,B,C,D,TRIGS,INC3,INC4,ILOT,N,IFAC,ILA,IERR,KSZ1,KSZ2,KSZ3,KSZ4,KSZ5) +SUBROUTINE RPASSM(PA,PB,PC,PD,PTRIGS,KINC3,KINC4,KLOT,KN,KFAC,KLA,KERR,KSZ1,KSZ2,KSZ3,KSZ4,KSZ5) IMPLICIT NONE - REAL, DIMENSION(KSZ1), INTENT(IN) :: A - REAL, DIMENSION(KSZ2), INTENT(IN) :: B - REAL, DIMENSION(KSZ3), INTENT(INOUT) :: C - REAL, DIMENSION(KSZ4), INTENT(INOUT) :: D - REAL, DIMENSION(KSZ5), INTENT(IN) :: TRIGS - INTEGER, INTENT(IN) :: INC3, INC4, ILOT, N, IFAC, ILA - INTEGER, INTENT(OUT) :: IERR + REAL, DIMENSION(KSZ1), INTENT(IN) :: PA + REAL, DIMENSION(KSZ2), INTENT(IN) :: PB + REAL, DIMENSION(KSZ3), INTENT(INOUT) :: PC + REAL, DIMENSION(KSZ4), INTENT(INOUT) :: PD + REAL, DIMENSION(KSZ5), INTENT(IN) :: PTRIGS + INTEGER, INTENT(IN) :: KINC3, KINC4, KLOT, KN, KFAC, KLA + INTEGER, INTENT(OUT) :: KERR INTEGER, INTENT(IN) :: KSZ1,KSZ2,KSZ3,KSZ4,KSZ5 ! ! SUBROUTINE 'RPASSM' - PERFORMS ONE PASS THROUGH DATA AS PART ! OF MULTIPLE REAL FFT (FOURIER SYNTHESIS) ROUTINE ! -! A IS FIRST REAL INPUT VECTOR -! EQUIVALENCE B(1) WITH A (ILA+1) -! C IS FIRST REAL OUTPUT VECTOR -! EQUIVALENCE D(1) WITH C(IFAC*ILA+1) -! TRIGS IS A PRECALCULATED LIST OF SINES & COSINES -! INC3 IS THE INCREMENT BETWEEN INPUT VECTORS A -! INC4 IS THE INCREMENT BETWEEN OUTPUT VECTORS C -! ILOT IS THE NUMBER OF VECTORS -! N IS THE LENGTH OF THE VECTORS -! IFAC IS THE CURRENT FACTOR OF N -! ILA IS THE PRODUCT OF PREVIOUS FACTORS -! IERR IS AN ERROR INDICATOR: +! KA IS FIRST REAL INPUT VECTOR +! EQUIVALENCE PB(1) WITH PA (KLA+1) +! PC IS FIRST REAL OUTPUT VECTOR +! EQUIVALENCE PD(1) WITH PC(KFAC*KLA+1) +! PTRIGS IS PA PRECALCULATED LIST OF SINES & COSINES +! KINC3 IS THE INCREMENT BETWEEN INPUT VECTORS PA +! KINC4 IS THE INCREMENT BETWEEN OUTPUT VECTORS PC +! KLOT IS THE NUMBER OF VECTORS +! KN IS THE LENGTH OF THE VECTORS +! KFAC IS THE CURRENT FACTOR OF KN +! KLA IS THE PRODUCT OF PREVIOUS FACTORS +! KERR IS AN ERROR INDICATOR: ! 0 - PASS COMPLETED WITHOUT ERROR -! 1 - ILOT GREATER THAN NVECLEN -! 2 - IFAC NOT CATERED FOR -! 3 - IFAC ONLY CATERED FOR IF ILA=N/IFAC +! 1 - KLOT GREATER THAN NVECLEN +! 2 - KFAC NOT CATERED FOR +! 3 - KFAC ONLY CATERED FOR IF KLA=KN/KFAC ! !----------------------------------------------------------------------- ! - INTEGER :: M, IINK, JINK, JUMP, KSTOP - INTEGER :: IBAD, IBASE, JBASE, IGO - INTEGER :: I, J, IA, IB, JA, JB - INTEGER :: IL, IJK, K, KB, IC, JC, KC - INTEGER :: ID, JD, KD, IE, JE, KE, IF, JF, KF, JG, JH - INTEGER :: IA0, IB0, IC0, ID0, IE0, IF0, JBASE0 - REAL :: A10, A11, A20, A21, B10, B11, B20, B21 - REAL :: C1, C2, C3, C4, C5, S1, S2, S3, S4, S5 - REAL :: QQRT5, SIN45, SSIN36, SSIN45, SSIN60, SSIN72 + INTEGER :: IM, IINK, JINK, JUMP, ISTOP + INTEGER :: IBAD, IIBASE, IJBASE, IGO + INTEGER :: II, IJ, IIA, IIB, IJA, IJB + INTEGER :: IIL, IJK, IIK, IKB, IIC, IJC, IKC + INTEGER :: IID, IJD, IKD, IIE, IJE, IKE, IIF, IJF, IKF, IJG, IJH + INTEGER :: IIA0, IIB0, IIC0, IID0, IIE0, IIF0, IJBASE0 + REAL :: ZA10, ZA11, ZA20, ZA21, ZB10, ZB11, ZB20, ZB21 + REAL :: ZC1, ZC2, ZC3, ZC4, ZC5, ZS1, ZS2, ZS3, ZS4, ZS5 + REAL :: ZQQRT5, ZSIN45, ZSSIN36, ZSSIN45, ZSSIN60, ZSSIN72 -!$acc data present( A, B, C, D ) copyin( TRIGS ) +!$acc data present( PA, PB, PC, PD ) copyin( PTRIGS ) !acc kernels - M=N/IFAC - IINK=ILA - JINK=ILA - JUMP=(IFAC-1)*JINK - KSTOP=(N-IFAC)/(2*IFAC) + IM=KN/KFAC + IINK=KLA + JINK=KLA + JUMP=(KFAC-1)*JINK + ISTOP=(KN-KFAC)/(2*KFAC) !acc end kernels IBAD=1 - IF (ILOT.GT.NVECLEN) GO TO 910 - IBASE=0 - JBASE=0 - IGO=IFAC-1 + IF (KLOT.GT.NVECLEN) GO TO 910 + IIBASE=0 + IJBASE=0 + IGO=KFAC-1 IF (IGO.EQ.7) IGO=6 IBAD=2 IF (IGO.LT.1.OR.IGO.GT.6) GO TO 910 - SELECT CASE ( IFAC ) + SELECT CASE ( KFAC ) CASE ( 2 ) ! ! CODING FOR FACTOR 2 ! ------------------- - IA=1 - IB=IA+2*M-ILA - JA=1 - JB=JA+JINK + IIA=1 + IIB=IIA+2*IM-KLA + IJA=1 + IJB=IJA+JINK - IF ( ILA /= M ) THEN + IF ( KLA /= IM ) THEN !$acc kernels !$acc loop independent - DO IL=1,ILA + DO IIL=1,KLA !CDIR$ IVDEP !!CDIR NODEP !*VOCL LOOP,NOVREC -!$acc loop independent private( I, J ) - DO IJK=1,ILOT - I = IBASE + IL - 1 + (IJK - 1 ) * INC3 - J = JBASE + IL - 1 + (IJK - 1 ) * INC4 - C(JA+J)=A(IA+I)+A(IB+I) - C(JB+J)=A(IA+I)-A(IB+I) +!$acc loop independent private( II, IJ ) + DO IJK=1,KLOT + II = IIBASE + IIL - 1 + (IJK - 1 ) * KINC3 + IJ = IJBASE + IIL - 1 + (IJK - 1 ) * KINC4 + PC(IJA+IJ)=PA(IIA+II)+PA(IIB+II) + PC(IJB+IJ)=PA(IIA+II)-PA(IIB+II) END DO END DO - IBASE=IBASE+ILA - JBASE=JBASE+ILA + IIBASE=IIBASE+KLA + IJBASE=IJBASE+KLA !$acc end kernels - IA=IA+IINK + IIA=IIA+IINK IINK=2*IINK - IB=IB-IINK - IBASE=0 - JBASE=JBASE+JUMP + IIB=IIB-IINK + IIBASE=0 + IJBASE=IJBASE+JUMP JUMP=2*JUMP+JINK - IF ( IA /= IB ) THEN + IF ( IIA /= IIB ) THEN !$acc kernels - JBASE0 = JBASE - IA0 = IA - IB0 = IB -!$acc loop independent private( KB, C1, S1, JBASE, IA, IB ) - DO K=ILA,KSTOP,ILA - KB=K+K - C1=TRIGS(KB+1) - S1=TRIGS(KB+2) - JBASE = JBASE0 + (K-ILA)/ILA * (ILA + JUMP ) - IA = IA0 + (K-ILA)/ILA * IINK - IB = IB0 - (K-ILA)/ILA * IINK + IJBASE0 = IJBASE + IIA0 = IIA + IIB0 = IIB +!$acc loop independent private( IKB, ZC1, ZS1, IJBASE, IIA, IIB ) + DO IIK=KLA,ISTOP,KLA + IKB=IIK+IIK + ZC1=PTRIGS(IKB+1) + ZS1=PTRIGS(IKB+2) + IJBASE = IJBASE0 + (IIK-KLA)/KLA * (KLA + JUMP ) + IIA = IIA0 + (IIK-KLA)/KLA * IINK + IIB = IIB0 - (IIK-KLA)/KLA * IINK !$acc loop independent - DO IL=1,ILA + DO IIL=1,KLA !CDIR$ IVDEP !!CDIR NODEP !*VOCL LOOP,NOVREC -!$acc loop independent private( I, J ) - DO IJK=1,ILOT - I = IL - 1 + (IJK - 1 ) * INC3 - J = JBASE + IL - 1 + (IJK - 1 ) * INC4 - C(JA+J)=A(IA+I)+A(IB+I) - D(JA+J)=B(IA+I)-B(IB+I) - C(JB+J)=C1*(A(IA+I)-A(IB+I))-S1*(B(IA+I)+B(IB+I)) - D(JB+J)=S1*(A(IA+I)-A(IB+I))+C1*(B(IA+I)+B(IB+I)) +!$acc loop independent private( II, IJ ) + DO IJK=1,KLOT + II = IIL - 1 + (IJK - 1 ) * KINC3 + IJ = IJBASE + IIL - 1 + (IJK - 1 ) * KINC4 + PC(IJA+IJ)=PA(IIA+II)+PA(IIB+II) + PD(IJA+IJ)=PB(IIA+II)-PB(IIB+II) + PC(IJB+IJ)=ZC1*(PA(IIA+II)-PA(IIB+II))-ZS1*(PB(IIA+II)+PB(IIB+II)) + PD(IJB+IJ)=ZS1*(PA(IIA+II)-PA(IIB+II))+ZC1*(PB(IIA+II)+PB(IIB+II)) END DO END DO END DO - JBASE = JBASE0 + ((KSTOP-ILA)/ILA+1) * ( ILA + JUMP ) - IA = IA0 + ((KSTOP-ILA)/ILA+1) * IINK - IB = IB0 - ((KSTOP-ILA)/ILA+1) * IINK + IJBASE = IJBASE0 + ((ISTOP-KLA)/KLA+1) * ( KLA + JUMP ) + IIA = IIA0 + ((ISTOP-KLA)/KLA+1) * IINK + IIB = IIB0 - ((ISTOP-KLA)/KLA+1) * IINK !$acc end kernels END IF - IF ( IA <= IB ) THEN + IF ( IIA <= IIB ) THEN !$acc kernels - IBASE=0 + IIBASE=0 !$acc loop independent - DO IL=1,ILA + DO IIL=1,KLA !CDIR$ IVDEP !!CDIR NODEP !*VOCL LOOP,NOVREC -!$acc loop independent private( I, J ) - DO IJK=1,ILOT - I = IBASE + IL - 1 + (IJK - 1 ) * INC3 - J = JBASE + IL - 1 + (IJK - 1 ) * INC4 - C(JA+J)=A(IA+I) - C(JB+J)=-B(IA+I) +!$acc loop independent private( II, IJ ) + DO IJK=1,KLOT + II = IIBASE + IIL - 1 + (IJK - 1 ) * KINC3 + IJ = IJBASE + IIL - 1 + (IJK - 1 ) * KINC4 + PC(IJA+IJ)=PA(IIA+II) + PC(IJB+IJ)=-PB(IIA+II) END DO END DO - IBASE=IBASE+ILA - JBASE=JBASE+ILA + IIBASE=IIBASE+KLA + IJBASE=IJBASE+KLA !$acc end kernels END IF @@ -505,20 +505,20 @@ SUBROUTINE RPASSM(A,B,C,D,TRIGS,INC3,INC4,ILOT,N,IFAC,ILA,IERR,KSZ1,KSZ2,KSZ3,KS !$acc kernels !$acc loop independent - DO IL=1,ILA + DO IIL=1,KLA !CDIR$ IVDEP !!CDIR NODEP !*VOCL LOOP,NOVREC -!$acc loop independent private( I, J ) - DO IJK=1,ILOT - I = IBASE + IL - 1 + (IJK - 1 ) * INC3 - J = JBASE + IL - 1 + (IJK - 1 ) * INC4 - C(JA+J)=2.0*(A(IA+I)+A(IB+I)) - C(JB+J)=2.0*(A(IA+I)-A(IB+I)) +!$acc loop independent private( II, IJ ) + DO IJK=1,KLOT + II = IIBASE + IIL - 1 + (IJK - 1 ) * KINC3 + IJ = IJBASE + IIL - 1 + (IJK - 1 ) * KINC4 + PC(IJA+IJ)=2.0*(PA(IIA+II)+PA(IIB+II)) + PC(IJB+IJ)=2.0*(PA(IIA+II)-PA(IIB+II)) END DO END DO - IBASE=IBASE+ILA - JBASE=JBASE+ILA + IIBASE=IIBASE+KLA + IJBASE=IJBASE+KLA !$acc end kernels END IF IBAD=0 @@ -528,140 +528,140 @@ SUBROUTINE RPASSM(A,B,C,D,TRIGS,INC3,INC4,ILOT,N,IFAC,ILA,IERR,KSZ1,KSZ2,KSZ3,KS ! ! CODING FOR FACTOR 3 ! ------------------- - IA=1 - IB=IA+2*M-ILA - IC=IB - JA=1 - JB=JA+JINK - JC=JB+JINK + IIA=1 + IIB=IIA+2*IM-KLA + IIC=IIB + IJA=1 + IJB=IJA+JINK + IJC=IJB+JINK - IF ( ILA /= M ) THEN + IF ( KLA /= IM ) THEN !$acc kernels !$acc loop independent - DO IL=1,ILA + DO IIL=1,KLA !CDIR$ IVDEP !!CDIR NODEP !*VOCL LOOP,NOVREC -!$acc loop independent private( I, J ) - DO IJK=1,ILOT - I = IBASE + IL - 1 + (IJK - 1 ) * INC3 - J = JBASE + IL - 1 + (IJK - 1 ) * INC4 - C(JA+J)=A(IA+I)+A(IB+I) - C(JB+J)=(A(IA+I)-0.5*A(IB+I))-(XSIN60*(B(IB+I))) - C(JC+J)=(A(IA+I)-0.5*A(IB+I))+(XSIN60*(B(IB+I))) +!$acc loop independent private( II, IJ ) + DO IJK=1,KLOT + II = IIBASE + IIL - 1 + (IJK - 1 ) * KINC3 + IJ = IJBASE + IIL - 1 + (IJK - 1 ) * KINC4 + PC(IJA+IJ)=PA(IIA+II)+PA(IIB+II) + PC(IJB+IJ)=(PA(IIA+II)-0.5*PA(IIB+II))-(XSIN60*(PB(IIB+II))) + PC(IJC+IJ)=(PA(IIA+II)-0.5*PA(IIB+II))+(XSIN60*(PB(IIB+II))) END DO END DO - IBASE=IBASE+ILA - JBASE=JBASE+ILA + IIBASE=IIBASE+KLA + IJBASE=IJBASE+KLA !$acc end kernels - IA=IA+IINK + IIA=IIA+IINK IINK=2*IINK - IB=IB+IINK - IC=IC-IINK - JBASE=JBASE+JUMP + IIB=IIB+IINK + IIC=IIC-IINK + IJBASE=IJBASE+JUMP JUMP=2*JUMP+JINK - IF ( IA /= IC ) THEN + IF ( IIA /= IIC ) THEN !$acc kernels - JBASE0 = JBASE - IA0 = IA - IB0 = IB - IC0 = IC -!$acc loop independent private( KB, KC, C1, S1, C2, S2, C3, S3, JBASE, IA, IB, IC ) - DO K=ILA,KSTOP,ILA - KB=K+K - KC=KB+KB - C1=TRIGS(KB+1) - S1=TRIGS(KB+2) - C2=TRIGS(KC+1) - S2=TRIGS(KC+2) - JBASE = JBASE0 + (K-ILA)/ILA * (ILA + JUMP ) - IA = IA0 + (K-ILA)/ILA * IINK - IB = IB0 + (K-ILA)/ILA * IINK - IC = IC0 - (K-ILA)/ILA * IINK + IJBASE0 = IJBASE + IIA0 = IIA + IIB0 = IIB + IIC0 = IIC +!$acc loop independent private( IKB, IKC, ZC1, ZS1, ZC2, ZS2, ZC3, ZS3, IJBASE, IIA, IIB, IIC ) + DO IIK=KLA,ISTOP,KLA + IKB=IIK+IIK + IKC=IKB+IKB + ZC1=PTRIGS(IKB+1) + ZS1=PTRIGS(IKB+2) + ZC2=PTRIGS(IKC+1) + ZS2=PTRIGS(IKC+2) + IJBASE = IJBASE0 + (IIK-KLA)/KLA * (KLA + JUMP ) + IIA = IIA0 + (IIK-KLA)/KLA * IINK + IIB = IIB0 + (IIK-KLA)/KLA * IINK + IIC = IIC0 - (IIK-KLA)/KLA * IINK !$acc loop independent - DO IL=1,ILA + DO IIL=1,KLA !CDIR$ IVDEP !!CDIR NODEP !*VOCL LOOP,NOVREC -!$acc loop independent private( I, J ) - DO IJK=1,ILOT - I = IL - 1 + (IJK - 1 ) * INC3 - J = JBASE + IL - 1 + (IJK - 1 ) * INC4 - C(JA+J)=A(IA+I)+(A(IB+I)+A(IC+I)) - D(JA+J)=B(IA+I)+(B(IB+I)-B(IC+I)) - C(JB+J)= & - C1*((A(IA+I)-0.5*(A(IB+I)+A(IC+I)))-(XSIN60*(B(IB+I)+B(IC+ & - I)))) & - -S1*((B(IA+I)-0.5*(B(IB+I)-B(IC+I)))+(XSIN60*(A(IB+I)-A(IC+ & - I)))) - D(JB+J)= & - S1*((A(IA+I)-0.5*(A(IB+I)+A(IC+I)))-(XSIN60*(B(IB+I)+B(IC+ & - I)))) & - +C1*((B(IA+I)-0.5*(B(IB+I)-B(IC+I)))+(XSIN60*(A(IB+I)-A(IC+ & - I)))) - C(JC+J)= & - C2*((A(IA+I)-0.5*(A(IB+I)+A(IC+I)))+(XSIN60*(B(IB+I)+B(IC+ & - I)))) & - -S2*((B(IA+I)-0.5*(B(IB+I)-B(IC+I)))-(XSIN60*(A(IB+I)-A(IC+ & - I)))) - D(JC+J)= & - S2*((A(IA+I)-0.5*(A(IB+I)+A(IC+I)))+(XSIN60*(B(IB+I)+B(IC+ & - I)))) & - +C2*((B(IA+I)-0.5*(B(IB+I)-B(IC+I)))-(XSIN60*(A(IB+I)-A(IC+ & - I)))) +!$acc loop independent private( II, IJ ) + DO IJK=1,KLOT + II = IIL - 1 + (IJK - 1 ) * KINC3 + IJ = IJBASE + IIL - 1 + (IJK - 1 ) * KINC4 + PC(IJA+IJ)=PA(IIA+II)+(PA(IIB+II)+PA(IIC+II)) + PD(IJA+IJ)=PB(IIA+II)+(PB(IIB+II)-PB(IIC+II)) + PC(IJB+IJ)= & + ZC1*((PA(IIA+II)-0.5*(PA(IIB+II)+PA(IIC+II)))-(XSIN60*(PB(IIB+II)+PB(IIC+ & + II)))) & + -ZS1*((PB(IIA+II)-0.5*(PB(IIB+II)-PB(IIC+II)))+(XSIN60*(PA(IIB+II)-PA(IIC+ & + II)))) + PD(IJB+IJ)= & + ZS1*((PA(IIA+II)-0.5*(PA(IIB+II)+PA(IIC+II)))-(XSIN60*(PB(IIB+II)+PB(IIC+ & + II)))) & + +ZC1*((PB(IIA+II)-0.5*(PB(IIB+II)-PB(IIC+II)))+(XSIN60*(PA(IIB+II)-PA(IIC+ & + II)))) + PC(IJC+IJ)= & + ZC2*((PA(IIA+II)-0.5*(PA(IIB+II)+PA(IIC+II)))+(XSIN60*(PB(IIB+II)+PB(IIC+ & + II)))) & + -ZS2*((PB(IIA+II)-0.5*(PB(IIB+II)-PB(IIC+II)))-(XSIN60*(PA(IIB+II)-PA(IIC+ & + II)))) + PD(IJC+IJ)= & + ZS2*((PA(IIA+II)-0.5*(PA(IIB+II)+PA(IIC+II)))+(XSIN60*(PB(IIB+II)+PB(IIC+ & + II)))) & + +ZC2*((PB(IIA+II)-0.5*(PB(IIB+II)-PB(IIC+II)))-(XSIN60*(PA(IIB+II)-PA(IIC+ & + II)))) END DO END DO END DO - JBASE = JBASE0 + ((KSTOP-ILA)/ILA+1) * ( ILA + JUMP ) - IA = IA0 + ((KSTOP-ILA)/ILA+1) * IINK - IB = IB0 + ((KSTOP-ILA)/ILA+1) * IINK - IC = IC0 - ((KSTOP-ILA)/ILA+1) * IINK + IJBASE = IJBASE0 + ((ISTOP-KLA)/KLA+1) * ( KLA + JUMP ) + IIA = IIA0 + ((ISTOP-KLA)/KLA+1) * IINK + IIB = IIB0 + ((ISTOP-KLA)/KLA+1) * IINK + IIC = IIC0 - ((ISTOP-KLA)/KLA+1) * IINK !$acc end kernels END IF - IF ( IA <= IC ) THEN + IF ( IIA <= IIC ) THEN !$acc kernels - IBASE=0 + IIBASE=0 !$acc loop independent - DO IL=1,ILA + DO IIL=1,KLA !CDIR$ IVDEP !!CDIR NODEP !*VOCL LOOP,NOVREC -!$acc loop independent private( I, J ) - DO IJK=1,ILOT - I = IBASE + IL - 1 + (IJK - 1 ) * INC3 - J = JBASE + IL - 1 + (IJK - 1 ) * INC4 - C(JA+J)=A(IA+I)+A(IB+I) - C(JB+J)=(0.5*A(IA+I)-A(IB+I))-(XSIN60*B(IA+I)) - C(JC+J)=-(0.5*A(IA+I)-A(IB+I))-(XSIN60*B(IA+I)) +!$acc loop independent private( II, IJ ) + DO IJK=1,KLOT + II = IIBASE + IIL - 1 + (IJK - 1 ) * KINC3 + IJ = IJBASE + IIL - 1 + (IJK - 1 ) * KINC4 + PC(IJA+IJ)=PA(IIA+II)+PA(IIB+II) + PC(IJB+IJ)=(0.5*PA(IIA+II)-PA(IIB+II))-(XSIN60*PB(IIA+II)) + PC(IJC+IJ)=-(0.5*PA(IIA+II)-PA(IIB+II))-(XSIN60*PB(IIA+II)) END DO END DO - IBASE = IBASE + ILA - JBASE = JBASE + ILA + IIBASE = IIBASE + KLA + IJBASE = IJBASE + KLA !$acc end kernels END IF ELSE !$acc kernels - SSIN60=2.0*XSIN60 + ZSSIN60=2.0*XSIN60 !$acc loop independent - DO IL=1,ILA + DO IIL=1,KLA !CDIR$ IVDEP !!CDIR NODEP !*VOCL LOOP,NOVREC -!$acc loop independent private( I, J ) - DO IJK=1,ILOT - I = IBASE + IL - 1 + (IJK - 1 ) * INC3 - J = JBASE + IL - 1 + (IJK - 1 ) * INC4 - C(JA+J)=2.0*(A(IA+I)+A(IB+I)) - C(JB+J)=(2.0*A(IA+I)-A(IB+I))-(SSIN60*B(IB+I)) - C(JC+J)=(2.0*A(IA+I)-A(IB+I))+(SSIN60*B(IB+I)) +!$acc loop independent private( II, IJ ) + DO IJK=1,KLOT + II = IIBASE + IIL - 1 + (IJK - 1 ) * KINC3 + IJ = IJBASE + IIL - 1 + (IJK - 1 ) * KINC4 + PC(IJA+IJ)=2.0*(PA(IIA+II)+PA(IIB+II)) + PC(IJB+IJ)=(2.0*PA(IIA+II)-PA(IIB+II))-(ZSSIN60*PB(IIB+II)) + PC(IJC+IJ)=(2.0*PA(IIA+II)-PA(IIB+II))+(ZSSIN60*PB(IIB+II)) END DO END DO - IBASE = IBASE + ILA - JBASE = JBASE + ILA + IIBASE = IIBASE + KLA + IJBASE = IJBASE + KLA !$acc end kernels END IF @@ -672,150 +672,150 @@ SUBROUTINE RPASSM(A,B,C,D,TRIGS,INC3,INC4,ILOT,N,IFAC,ILA,IERR,KSZ1,KSZ2,KSZ3,KS ! ! CODING FOR FACTOR 4 ! ------------------- - IA=1 - IB=IA+2*M-ILA - IC=IB+2*M - ID=IB - JA=1 - JB=JA+JINK - JC=JB+JINK - JD=JC+JINK + IIA=1 + IIB=IIA+2*IM-KLA + IIC=IIB+2*IM + IID=IIB + IJA=1 + IJB=IJA+JINK + IJC=IJB+JINK + IJD=IJC+JINK - IF ( ILA /= M) THEN + IF ( KLA /= IM) THEN !$acc kernels !$acc loop independent - DO IL=1,ILA + DO IIL=1,KLA !CDIR$ IVDEP !!CDIR NODEP !*VOCL LOOP,NOVREC -!$acc loop independent private( I, J ) - DO IJK=1,ILOT - I = IBASE + IL - 1 + (IJK - 1 ) * INC3 - J = JBASE + IL - 1 + (IJK - 1 ) * INC4 - C(JA+J)=(A(IA+I)+A(IC+I))+A(IB+I) - C(JB+J)=(A(IA+I)-A(IC+I))-B(IB+I) - C(JC+J)=(A(IA+I)+A(IC+I))-A(IB+I) - C(JD+J)=(A(IA+I)-A(IC+I))+B(IB+I) +!$acc loop independent private( II, IJ ) + DO IJK=1,KLOT + II = IIBASE + IIL - 1 + (IJK - 1 ) * KINC3 + IJ = IJBASE + IIL - 1 + (IJK - 1 ) * KINC4 + PC(IJA+IJ)=(PA(IIA+II)+PA(IIC+II))+PA(IIB+II) + PC(IJB+IJ)=(PA(IIA+II)-PA(IIC+II))-PB(IIB+II) + PC(IJC+IJ)=(PA(IIA+II)+PA(IIC+II))-PA(IIB+II) + PC(IJD+IJ)=(PA(IIA+II)-PA(IIC+II))+PB(IIB+II) END DO END DO - IBASE = IBASE + ILA - JBASE = JBASE + ILA + IIBASE = IIBASE + KLA + IJBASE = IJBASE + KLA !$acc end kernels - IA=IA+IINK + IIA=IIA+IINK IINK=2*IINK - IB=IB+IINK - IC=IC-IINK - ID=ID-IINK - JBASE=JBASE+JUMP + IIB=IIB+IINK + IIC=IIC-IINK + IID=IID-IINK + IJBASE=IJBASE+JUMP JUMP=2*JUMP+JINK - IF ( IB /= IC ) THEN + IF ( IIB /= IIC ) THEN !$acc kernels - JBASE0 = JBASE - IA0 = IA - IB0 = IB - IC0 = IC - ID0 = ID -!$acc loop independent private( KB, KC, KD, C1, S1, C2, S2, C3, S3, JBASE, IA, IB, IC, ID ) - DO K=ILA,KSTOP,ILA - KB=K+K - KC=KB+KB - KD=KC+KB - C1=TRIGS(KB+1) - S1=TRIGS(KB+2) - C2=TRIGS(KC+1) - S2=TRIGS(KC+2) - C3=TRIGS(KD+1) - S3=TRIGS(KD+2) - JBASE = JBASE0 + (K-ILA)/ILA * (ILA + JUMP ) - IA = IA0 + (K-ILA)/ILA * IINK - IB = IB0 + (K-ILA)/ILA * IINK - IC = IC0 - (K-ILA)/ILA * IINK - ID = ID0 - (K-ILA)/ILA * IINK + IJBASE0 = IJBASE + IIA0 = IIA + IIB0 = IIB + IIC0 = IIC + IID0 = IID +!$acc loop independent private( IKB, IKC, IKD, ZC1, ZS1, ZC2, ZS2, ZC3, ZS3, IJBASE, IIA, IIB, IIC, IID ) + DO IIK=KLA,ISTOP,KLA + IKB=IIK+IIK + IKC=IKB+IKB + IKD=IKC+IKB + ZC1=PTRIGS(IKB+1) + ZS1=PTRIGS(IKB+2) + ZC2=PTRIGS(IKC+1) + ZS2=PTRIGS(IKC+2) + ZC3=PTRIGS(IKD+1) + ZS3=PTRIGS(IKD+2) + IJBASE = IJBASE0 + (IIK-KLA)/KLA * (KLA + JUMP ) + IIA = IIA0 + (IIK-KLA)/KLA * IINK + IIB = IIB0 + (IIK-KLA)/KLA * IINK + IIC = IIC0 - (IIK-KLA)/KLA * IINK + IID = IID0 - (IIK-KLA)/KLA * IINK !$acc loop independent - DO IL=1,ILA + DO IIL=1,KLA !CDIR$ IVDEP !!CDIR NODEP !*VOCL LOOP,NOVREC -!$acc loop independent private( I, J ) - DO IJK=1,ILOT - I = IL - 1 + (IJK - 1 ) * INC3 - J = JBASE + IL - 1 + (IJK - 1 ) * INC4 - C(JA+J)=(A(IA+I)+A(IC+I))+(A(IB+I)+A(ID+I)) - D(JA+J)=(B(IA+I)-B(IC+I))+(B(IB+I)-B(ID+I)) - C(JC+J)= & - C2*((A(IA+I)+A(IC+I))-(A(IB+I)+A(ID+I))) & - -S2*((B(IA+I)-B(IC+I))-(B(IB+I)-B(ID+I))) - D(JC+J)= & - S2*((A(IA+I)+A(IC+I))-(A(IB+I)+A(ID+I))) & - +C2*((B(IA+I)-B(IC+I))-(B(IB+I)-B(ID+I))) - C(JB+J)= & - C1*((A(IA+I)-A(IC+I))-(B(IB+I)+B(ID+I))) & - -S1*((B(IA+I)+B(IC+I))+(A(IB+I)-A(ID+I))) - D(JB+J)= & - S1*((A(IA+I)-A(IC+I))-(B(IB+I)+B(ID+I))) & - +C1*((B(IA+I)+B(IC+I))+(A(IB+I)-A(ID+I))) - C(JD+J)= & - C3*((A(IA+I)-A(IC+I))+(B(IB+I)+B(ID+I))) & - -S3*((B(IA+I)+B(IC+I))-(A(IB+I)-A(ID+I))) - D(JD+J)= & - S3*((A(IA+I)-A(IC+I))+(B(IB+I)+B(ID+I))) & - +C3*((B(IA+I)+B(IC+I))-(A(IB+I)-A(ID+I))) +!$acc loop independent private( II, IJ ) + DO IJK=1,KLOT + II = IIL - 1 + (IJK - 1 ) * KINC3 + IJ = IJBASE + IIL - 1 + (IJK - 1 ) * KINC4 + PC(IJA+IJ)=(PA(IIA+II)+PA(IIC+II))+(PA(IIB+II)+PA(IID+II)) + PD(IJA+IJ)=(PB(IIA+II)-PB(IIC+II))+(PB(IIB+II)-PB(IID+II)) + PC(IJC+IJ)= & + ZC2*((PA(IIA+II)+PA(IIC+II))-(PA(IIB+II)+PA(IID+II))) & + -ZS2*((PB(IIA+II)-PB(IIC+II))-(PB(IIB+II)-PB(IID+II))) + PD(IJC+IJ)= & + ZS2*((PA(IIA+II)+PA(IIC+II))-(PA(IIB+II)+PA(IID+II))) & + +ZC2*((PB(IIA+II)-PB(IIC+II))-(PB(IIB+II)-PB(IID+II))) + PC(IJB+IJ)= & + ZC1*((PA(IIA+II)-PA(IIC+II))-(PB(IIB+II)+PB(IID+II))) & + -ZS1*((PB(IIA+II)+PB(IIC+II))+(PA(IIB+II)-PA(IID+II))) + PD(IJB+IJ)= & + ZS1*((PA(IIA+II)-PA(IIC+II))-(PB(IIB+II)+PB(IID+II))) & + +ZC1*((PB(IIA+II)+PB(IIC+II))+(PA(IIB+II)-PA(IID+II))) + PC(IJD+IJ)= & + ZC3*((PA(IIA+II)-PA(IIC+II))+(PB(IIB+II)+PB(IID+II))) & + -ZS3*((PB(IIA+II)+PB(IIC+II))-(PA(IIB+II)-PA(IID+II))) + PD(IJD+IJ)= & + ZS3*((PA(IIA+II)-PA(IIC+II))+(PB(IIB+II)+PB(IID+II))) & + +ZC3*((PB(IIA+II)+PB(IIC+II))-(PA(IIB+II)-PA(IID+II))) END DO END DO END DO - JBASE = JBASE0 + ((KSTOP-ILA)/ILA+1) * ( ILA + JUMP ) - IA = IA0 + ((KSTOP-ILA)/ILA+1) * IINK - IB = IB0 + ((KSTOP-ILA)/ILA+1) * IINK - IC = IC0 - ((KSTOP-ILA)/ILA+1) * IINK - ID = ID0 - ((KSTOP-ILA)/ILA+1) * IINK + IJBASE = IJBASE0 + ((ISTOP-KLA)/KLA+1) * ( KLA + JUMP ) + IIA = IIA0 + ((ISTOP-KLA)/KLA+1) * IINK + IIB = IIB0 + ((ISTOP-KLA)/KLA+1) * IINK + IIC = IIC0 - ((ISTOP-KLA)/KLA+1) * IINK + IID = IID0 - ((ISTOP-KLA)/KLA+1) * IINK !$acc end kernels END IF - IF ( IB <= IC ) THEN + IF ( IIB <= IIC ) THEN !$acc kernels - IBASE=0 - SIN45=SQRT(0.5) + IIBASE=0 + ZSIN45=SQRT(0.5) !$acc loop independent - DO IL=1,ILA + DO IIL=1,KLA !CDIR$ IVDEP !!CDIR NODEP !*VOCL LOOP,NOVREC -!$acc loop independent private( I, J ) - DO IJK=1,ILOT - I = IBASE + IL - 1 + (IJK - 1 ) * INC3 - J = JBASE + IL - 1 + (IJK - 1 ) * INC4 - C(JA+J)=A(IA+I)+A(IB+I) - C(JB+J)=SIN45*((A(IA+I)-A(IB+I))-(B(IA+I)+B(IB+I))) - C(JC+J)=B(IB+I)-B(IA+I) - C(JD+J)=-SIN45*((A(IA+I)-A(IB+I))+(B(IA+I)+B(IB+I))) +!$acc loop independent private( II, IJ ) + DO IJK=1,KLOT + II = IIBASE + IIL - 1 + (IJK - 1 ) * KINC3 + IJ = IJBASE + IIL - 1 + (IJK - 1 ) * KINC4 + PC(IJA+IJ)=PA(IIA+II)+PA(IIB+II) + PC(IJB+IJ)=ZSIN45*((PA(IIA+II)-PA(IIB+II))-(PB(IIA+II)+PB(IIB+II))) + PC(IJC+IJ)=PB(IIB+II)-PB(IIA+II) + PC(IJD+IJ)=-ZSIN45*((PA(IIA+II)-PA(IIB+II))+(PB(IIA+II)+PB(IIB+II))) END DO END DO - IBASE = IBASE + ILA - JBASE = JBASE + ILA + IIBASE = IIBASE + KLA + IJBASE = IJBASE + KLA !$acc end kernels END IF - ELSE ! ILA == M + ELSE ! KLA == M !$acc kernels !$acc loop independent - DO IL=1,ILA + DO IIL=1,KLA !CDIR$ IVDEP !!CDIR NODEP !*VOCL LOOP,NOVREC -!$acc loop independent private( I, J ) - DO IJK=1,ILOT - I = IBASE + IL - 1 + (IJK - 1 ) * INC3 - J = JBASE + IL - 1 + (IJK - 1 ) * INC4 - C(JA+J)=2.0*((A(IA+I)+A(IC+I))+A(IB+I)) - C(JB+J)=2.0*((A(IA+I)-A(IC+I))-B(IB+I)) - C(JC+J)=2.0*((A(IA+I)+A(IC+I))-A(IB+I)) - C(JD+J)=2.0*((A(IA+I)-A(IC+I))+B(IB+I)) +!$acc loop independent private( II, IJ ) + DO IJK=1,KLOT + II = IIBASE + IIL - 1 + (IJK - 1 ) * KINC3 + IJ = IJBASE + IIL - 1 + (IJK - 1 ) * KINC4 + PC(IJA+IJ)=2.0*((PA(IIA+II)+PA(IIC+II))+PA(IIB+II)) + PC(IJB+IJ)=2.0*((PA(IIA+II)-PA(IIC+II))-PB(IIB+II)) + PC(IJC+IJ)=2.0*((PA(IIA+II)+PA(IIC+II))-PA(IIB+II)) + PC(IJD+IJ)=2.0*((PA(IIA+II)-PA(IIC+II))+PB(IIB+II)) END DO END DO - IBASE = IBASE + ILA - JBASE = JBASE + ILA + IIBASE = IIBASE + KLA + IJBASE = IJBASE + KLA !$acc end kernels END IF @@ -826,188 +826,188 @@ SUBROUTINE RPASSM(A,B,C,D,TRIGS,INC3,INC4,ILOT,N,IFAC,ILA,IERR,KSZ1,KSZ2,KSZ3,KS ! ! CODING FOR FACTOR 5 ! ------------------- - IA=1 - IB=IA+2*M-ILA - IC=IB+2*M - ID=IC - IE=IB - JA=1 - JB=JA+JINK - JC=JB+JINK - JD=JC+JINK - JE=JD+JINK - - IF ( ILA /= M ) THEN + IIA=1 + IIB=IIA+2*IM-KLA + IIC=IIB+2*IM + IID=IIC + IIE=IIB + IJA=1 + IJB=IJA+JINK + IJC=IJB+JINK + IJD=IJC+JINK + IJE=IJD+JINK + + IF ( KLA /= IM ) THEN !$acc kernels !$acc loop independent - DO IL=1,ILA + DO IIL=1,KLA !CDIR$ IVDEP !!CDIR NODEP !*VOCL LOOP,NOVREC -!$acc loop independent private( I, J ) - DO IJK=1,ILOT - I = IBASE + IL - 1 + (IJK - 1 ) * INC3 - J = JBASE + IL - 1 + (IJK - 1 ) * INC4 - C(JA+J)=A(IA+I)+(A(IB+I)+A(IC+I)) - C(JB+J)=((A(IA+I)-0.25*(A(IB+I)+A(IC+I)))+XQRT5*(A(IB+I)-A(IC+ & - I))) & - -(XSIN72*B(IB+I)+XSIN36*B(IC+I)) - C(JC+J)=((A(IA+I)-0.25*(A(IB+I)+A(IC+I)))-XQRT5*(A(IB+I)-A(IC+ & - I))) & - -(XSIN36*B(IB+I)-XSIN72*B(IC+I)) - C(JD+J)=((A(IA+I)-0.25*(A(IB+I)+A(IC+I)))-XQRT5*(A(IB+I)-A(IC+ & - I))) & - +(XSIN36*B(IB+I)-XSIN72*B(IC+I)) - C(JE+J)=((A(IA+I)-0.25*(A(IB+I)+A(IC+I)))+XQRT5*(A(IB+I)-A(IC+ & - I))) & - +(XSIN72*B(IB+I)+XSIN36*B(IC+I)) +!$acc loop independent private( II, IJ ) + DO IJK=1,KLOT + II = IIBASE + IIL - 1 + (IJK - 1 ) * KINC3 + IJ = IJBASE + IIL - 1 + (IJK - 1 ) * KINC4 + PC(IJA+IJ)=PA(IIA+II)+(PA(IIB+II)+PA(IIC+II)) + PC(IJB+IJ)=((PA(IIA+II)-0.25*(PA(IIB+II)+PA(IIC+II)))+XQRT5*(PA(IIB+II)-PA(IIC+ & + II))) & + -(XSIN72*PB(IIB+II)+XSIN36*PB(IIC+II)) + PC(IJC+IJ)=((PA(IIA+II)-0.25*(PA(IIB+II)+PA(IIC+II)))-XQRT5*(PA(IIB+II)-PA(IIC+ & + II))) & + -(XSIN36*PB(IIB+II)-XSIN72*PB(IIC+II)) + PC(IJD+IJ)=((PA(IIA+II)-0.25*(PA(IIB+II)+PA(IIC+II)))-XQRT5*(PA(IIB+II)-PA(IIC+ & + II))) & + +(XSIN36*PB(IIB+II)-XSIN72*PB(IIC+II)) + PC(IJE+IJ)=((PA(IIA+II)-0.25*(PA(IIB+II)+PA(IIC+II)))+XQRT5*(PA(IIB+II)-PA(IIC+ & + II))) & + +(XSIN72*PB(IIB+II)+XSIN36*PB(IIC+II)) END DO END DO - IBASE = IBASE + ILA - JBASE = JBASE + ILA + IIBASE = IIBASE + KLA + IJBASE = IJBASE + KLA !$acc end kernels - IA=IA+IINK + IIA=IIA+IINK IINK=2*IINK - IB=IB+IINK - IC=IC+IINK - ID=ID-IINK - IE=IE-IINK - JBASE=JBASE+JUMP + IIB=IIB+IINK + IIC=IIC+IINK + IID=IID-IINK + IIE=IIE-IINK + IJBASE=IJBASE+JUMP JUMP=2*JUMP+JINK - IF ( IB /= ID ) THEN + IF ( IIB /= IID ) THEN !$acc kernels - JBASE0 = JBASE - IA0 = IA - IB0 = IB - IC0 = IC - ID0 = ID - IE0 = IE -!$acc loop independent private( KB, KC, KD, KE, C1, S1, C2, S2, C3, S3, C4, S4, JBASE, IA, IB, IC, ID, IE ) - DO K=ILA,KSTOP,ILA - KB=K+K - KC=KB+KB - KD=KC+KB - KE=KD+KB - C1=TRIGS(KB+1) - S1=TRIGS(KB+2) - C2=TRIGS(KC+1) - S2=TRIGS(KC+2) - C3=TRIGS(KD+1) - S3=TRIGS(KD+2) - C4=TRIGS(KE+1) - S4=TRIGS(KE+2) - JBASE = JBASE0 + (K-ILA)/ILA * (ILA + JUMP ) - IA = IA0 + (K-ILA)/ILA * IINK - IB = IB0 + (K-ILA)/ILA * IINK - IC = IC0 + (K-ILA)/ILA * IINK - ID = ID0 - (K-ILA)/ILA * IINK - IE = IE0 - (K-ILA)/ILA * IINK + IJBASE0 = IJBASE + IIA0 = IIA + IIB0 = IIB + IIC0 = IIC + IID0 = IID + IIE0 = IIE +!$acc loop independent private( IKB, IKC, IKD, IKE, ZC1, ZS1, ZC2, ZS2, ZC3, ZS3, ZC4, ZS4, IJBASE, IIA, IIB, IIC, IID, IIE ) + DO IIK=KLA,ISTOP,KLA + IKB=IIK+IIK + IKC=IKB+IKB + IKD=IKC+IKB + IKE=IKD+IKB + ZC1=PTRIGS(IKB+1) + ZS1=PTRIGS(IKB+2) + ZC2=PTRIGS(IKC+1) + ZS2=PTRIGS(IKC+2) + ZC3=PTRIGS(IKD+1) + ZS3=PTRIGS(IKD+2) + ZC4=PTRIGS(IKE+1) + ZS4=PTRIGS(IKE+2) + IJBASE = IJBASE0 + (IIK-KLA)/KLA * (KLA + JUMP ) + IIA = IIA0 + (IIK-KLA)/KLA * IINK + IIB = IIB0 + (IIK-KLA)/KLA * IINK + IIC = IIC0 + (IIK-KLA)/KLA * IINK + IID = IID0 - (IIK-KLA)/KLA * IINK + IIE = IIE0 - (IIK-KLA)/KLA * IINK !$acc loop independent - DO IL=1,ILA + DO IIL=1,KLA !CDIR$ IVDEP !!CDIR NODEP !*VOCL LOOP,NOVREC -!$acc loop independent private( I, J, A10, A11, A20, A21, B10, B11, B20, B21 ) - DO IJK=1,ILOT - I = IL - 1 + (IJK - 1 ) * INC3 - J = JBASE + IL - 1 + (IJK - 1 ) * INC4 - - A10=(A(IA+I)-0.25*((A(IB+I)+A(IE+I))+(A(IC+I)+A(ID+I)))) & - +XQRT5*((A(IB+I)+A(IE+I))-(A(IC+I)+A(ID+I))) - A20=(A(IA+I)-0.25*((A(IB+I)+A(IE+I))+(A(IC+I)+A(ID+I)))) & - -XQRT5*((A(IB+I)+A(IE+I))-(A(IC+I)+A(ID+I))) - B10=(B(IA+I)-0.25*((B(IB+I)-B(IE+I))+(B(IC+I)-B(ID+I)))) & - +XQRT5*((B(IB+I)-B(IE+I))-(B(IC+I)-B(ID+I))) - B20=(B(IA+I)-0.25*((B(IB+I)-B(IE+I))+(B(IC+I)-B(ID+I)))) & - -XQRT5*((B(IB+I)-B(IE+I))-(B(IC+I)-B(ID+I))) - A11=XSIN72*(B(IB+I)+B(IE+I))+XSIN36*(B(IC+I)+B(ID+I)) - A21=XSIN36*(B(IB+I)+B(IE+I))-XSIN72*(B(IC+I)+B(ID+I)) - B11=XSIN72*(A(IB+I)-A(IE+I))+XSIN36*(A(IC+I)-A(ID+I)) - B21=XSIN36*(A(IB+I)-A(IE+I))-XSIN72*(A(IC+I)-A(ID+I)) - - C(JA+J)=A(IA+I)+((A(IB+I)+A(IE+I))+(A(IC+I)+A(ID+I))) - D(JA+J)=B(IA+I)+((B(IB+I)-B(IE+I))+(B(IC+I)-B(ID+I))) - C(JB+J)=C1*(A10-A11)-S1*(B10+B11) - D(JB+J)=S1*(A10-A11)+C1*(B10+B11) - C(JE+J)=C4*(A10+A11)-S4*(B10-B11) - D(JE+J)=S4*(A10+A11)+C4*(B10-B11) - C(JC+J)=C2*(A20-A21)-S2*(B20+B21) - D(JC+J)=S2*(A20-A21)+C2*(B20+B21) - C(JD+J)=C3*(A20+A21)-S3*(B20-B21) - D(JD+J)=S3*(A20+A21)+C3*(B20-B21) +!$acc loop independent private( II, IJ, ZA10, ZA11, ZA20, ZA21, ZB10, ZB11, ZB20, ZB21 ) + DO IJK=1,KLOT + II = IIL - 1 + (IJK - 1 ) * KINC3 + IJ = IJBASE + IIL - 1 + (IJK - 1 ) * KINC4 + + ZA10=(PA(IIA+II)-0.25*((PA(IIB+II)+PA(IIE+II))+(PA(IIC+II)+PA(IID+II)))) & + +XQRT5*((PA(IIB+II)+PA(IIE+II))-(PA(IIC+II)+PA(IID+II))) + ZA20=(PA(IIA+II)-0.25*((PA(IIB+II)+PA(IIE+II))+(PA(IIC+II)+PA(IID+II)))) & + -XQRT5*((PA(IIB+II)+PA(IIE+II))-(PA(IIC+II)+PA(IID+II))) + ZB10=(PB(IIA+II)-0.25*((PB(IIB+II)-PB(IIE+II))+(PB(IIC+II)-PB(IID+II)))) & + +XQRT5*((PB(IIB+II)-PB(IIE+II))-(PB(IIC+II)-PB(IID+II))) + ZB20=(PB(IIA+II)-0.25*((PB(IIB+II)-PB(IIE+II))+(PB(IIC+II)-PB(IID+II)))) & + -XQRT5*((PB(IIB+II)-PB(IIE+II))-(PB(IIC+II)-PB(IID+II))) + ZA11=XSIN72*(PB(IIB+II)+PB(IIE+II))+XSIN36*(PB(IIC+II)+PB(IID+II)) + ZA21=XSIN36*(PB(IIB+II)+PB(IIE+II))-XSIN72*(PB(IIC+II)+PB(IID+II)) + ZB11=XSIN72*(PA(IIB+II)-PA(IIE+II))+XSIN36*(PA(IIC+II)-PA(IID+II)) + ZB21=XSIN36*(PA(IIB+II)-PA(IIE+II))-XSIN72*(PA(IIC+II)-PA(IID+II)) + + PC(IJA+IJ)=PA(IIA+II)+((PA(IIB+II)+PA(IIE+II))+(PA(IIC+II)+PA(IID+II))) + PD(IJA+IJ)=PB(IIA+II)+((PB(IIB+II)-PB(IIE+II))+(PB(IIC+II)-PB(IID+II))) + PC(IJB+IJ)=ZC1*(ZA10-ZA11)-ZS1*(ZB10+ZB11) + PD(IJB+IJ)=ZS1*(ZA10-ZA11)+ZC1*(ZB10+ZB11) + PC(IJE+IJ)=ZC4*(ZA10+ZA11)-ZS4*(ZB10-ZB11) + PD(IJE+IJ)=ZS4*(ZA10+ZA11)+ZC4*(ZB10-ZB11) + PC(IJC+IJ)=ZC2*(ZA20-ZA21)-ZS2*(ZB20+ZB21) + PD(IJC+IJ)=ZS2*(ZA20-ZA21)+ZC2*(ZB20+ZB21) + PC(IJD+IJ)=ZC3*(ZA20+ZA21)-ZS3*(ZB20-ZB21) + PD(IJD+IJ)=ZS3*(ZA20+ZA21)+ZC3*(ZB20-ZB21) END DO END DO END DO - JBASE = JBASE0 + ((KSTOP-ILA)/ILA+1) * ( ILA + JUMP ) - IA = IA0 + ((KSTOP-ILA)/ILA+1) * IINK - IB = IB0 + ((KSTOP-ILA)/ILA+1) * IINK - IC = IC0 + ((KSTOP-ILA)/ILA+1) * IINK - ID = ID0 - ((KSTOP-ILA)/ILA+1) * IINK - IE = IE0 - ((KSTOP-ILA)/ILA+1) * IINK + IJBASE = IJBASE0 + ((ISTOP-KLA)/KLA+1) * ( KLA + JUMP ) + IIA = IIA0 + ((ISTOP-KLA)/KLA+1) * IINK + IIB = IIB0 + ((ISTOP-KLA)/KLA+1) * IINK + IIC = IIC0 + ((ISTOP-KLA)/KLA+1) * IINK + IID = IID0 - ((ISTOP-KLA)/KLA+1) * IINK + IIE = IIE0 - ((ISTOP-KLA)/KLA+1) * IINK !$acc end kernels END IF - IF ( IB <= ID ) THEN + IF ( IIB <= IID ) THEN !$acc kernels - IBASE=0 + IIBASE=0 !$acc loop independent - DO IL=1,ILA + DO IIL=1,KLA !CDIR$ IVDEP !!CDIR NODEP !*VOCL LOOP,NOVREC -!$acc loop independent private( I, J ) - DO IJK=1,ILOT - I = IBASE + IL - 1 + (IJK - 1 ) * INC3 - J = JBASE + IL - 1 + (IJK - 1 ) * INC4 - C(JA+J)=(A(IA+I)+A(IB+I))+A(IC+I) - C(JB+J)=(XQRT5*(A(IA+I)-A(IB+I))+(0.25*(A(IA+I)+A(IB+I))-A(IC+ & - I))) & - -(XSIN36*B(IA+I)+XSIN72*B(IB+I)) - C(JE+J)=-(XQRT5*(A(IA+I)-A(IB+I))+(0.25*(A(IA+I)+A(IB+I))-A(IC+ & - I))) & - -(XSIN36*B(IA+I)+XSIN72*B(IB+I)) - C(JC+J)=(XQRT5*(A(IA+I)-A(IB+I))-(0.25*(A(IA+I)+A(IB+I))-A(IC+ & - I))) & - -(XSIN72*B(IA+I)-XSIN36*B(IB+I)) - C(JD+J)=-(XQRT5*(A(IA+I)-A(IB+I))-(0.25*(A(IA+I)+A(IB+I))-A(IC+ & - I))) & - -(XSIN72*B(IA+I)-XSIN36*B(IB+I)) +!$acc loop independent private( II, IJ ) + DO IJK=1,KLOT + II = IIBASE + IIL - 1 + (IJK - 1 ) * KINC3 + IJ = IJBASE + IIL - 1 + (IJK - 1 ) * KINC4 + PC(IJA+IJ)=(PA(IIA+II)+PA(IIB+II))+PA(IIC+II) + PC(IJB+IJ)=(XQRT5*(PA(IIA+II)-PA(IIB+II))+(0.25*(PA(IIA+II)+PA(IIB+II))-PA(IIC+ & + II))) & + -(XSIN36*PB(IIA+II)+XSIN72*PB(IIB+II)) + PC(IJE+IJ)=-(XQRT5*(PA(IIA+II)-PA(IIB+II))+(0.25*(PA(IIA+II)+PA(IIB+II))-PA(IIC+ & + II))) & + -(XSIN36*PB(IIA+II)+XSIN72*PB(IIB+II)) + PC(IJC+IJ)=(XQRT5*(PA(IIA+II)-PA(IIB+II))-(0.25*(PA(IIA+II)+PA(IIB+II))-PA(IIC+ & + II))) & + -(XSIN72*PB(IIA+II)-XSIN36*PB(IIB+II)) + PC(IJD+IJ)=-(XQRT5*(PA(IIA+II)-PA(IIB+II))-(0.25*(PA(IIA+II)+PA(IIB+II))-PA(IIC+ & + II))) & + -(XSIN72*PB(IIA+II)-XSIN36*PB(IIB+II)) END DO END DO - IBASE = IBASE + ILA - JBASE = JBASE + ILA + IIBASE = IIBASE + KLA + IJBASE = IJBASE + KLA !$acc end kernels END IF ELSE !$acc kernels - QQRT5=2.0*XQRT5 - SSIN36=2.0*XSIN36 - SSIN72=2.0*XSIN72 + ZQQRT5=2.0*XQRT5 + ZSSIN36=2.0*XSIN36 + ZSSIN72=2.0*XSIN72 !$acc loop independent - DO IL=1,ILA + DO IIL=1,KLA !CDIR$ IVDEP !!CDIR NODEP !*VOCL LOOP,NOVREC -!$acc loop independent private( I, J ) - DO IJK=1,ILOT - I = IBASE + IL - 1 + (IJK - 1 ) * INC3 - J = JBASE + IL - 1 + (IJK - 1 ) * INC4 - C(JA+J)=2.0*(A(IA+I)+(A(IB+I)+A(IC+I))) - C(JB+J)=(2.0*(A(IA+I)-0.25*(A(IB+I)+A(IC+I))) & - +QQRT5*(A(IB+I)-A(IC+I)))-(SSIN72*B(IB+I)+SSIN36*B(IC+I)) - C(JC+J)=(2.0*(A(IA+I)-0.25*(A(IB+I)+A(IC+I))) & - -QQRT5*(A(IB+I)-A(IC+I)))-(SSIN36*B(IB+I)-SSIN72*B(IC+I)) - C(JD+J)=(2.0*(A(IA+I)-0.25*(A(IB+I)+A(IC+I))) & - -QQRT5*(A(IB+I)-A(IC+I)))+(SSIN36*B(IB+I)-SSIN72*B(IC+I)) - C(JE+J)=(2.0*(A(IA+I)-0.25*(A(IB+I)+A(IC+I))) & - +QQRT5*(A(IB+I)-A(IC+I)))+(SSIN72*B(IB+I)+SSIN36*B(IC+I)) +!$acc loop independent private( II, IJ ) + DO IJK=1,KLOT + II = IIBASE + IIL - 1 + (IJK - 1 ) * KINC3 + IJ = IJBASE + IIL - 1 + (IJK - 1 ) * KINC4 + PC(IJA+IJ)=2.0*(PA(IIA+II)+(PA(IIB+II)+PA(IIC+II))) + PC(IJB+IJ)=(2.0*(PA(IIA+II)-0.25*(PA(IIB+II)+PA(IIC+II))) & + +ZQQRT5*(PA(IIB+II)-PA(IIC+II)))-(ZSSIN72*PB(IIB+II)+ZSSIN36*PB(IIC+II)) + PC(IJC+IJ)=(2.0*(PA(IIA+II)-0.25*(PA(IIB+II)+PA(IIC+II))) & + -ZQQRT5*(PA(IIB+II)-PA(IIC+II)))-(ZSSIN36*PB(IIB+II)-ZSSIN72*PB(IIC+II)) + PC(IJD+IJ)=(2.0*(PA(IIA+II)-0.25*(PA(IIB+II)+PA(IIC+II))) & + -ZQQRT5*(PA(IIB+II)-PA(IIC+II)))+(ZSSIN36*PB(IIB+II)-ZSSIN72*PB(IIC+II)) + PC(IJE+IJ)=(2.0*(PA(IIA+II)-0.25*(PA(IIB+II)+PA(IIC+II))) & + +ZQQRT5*(PA(IIB+II)-PA(IIC+II)))+(ZSSIN72*PB(IIB+II)+ZSSIN36*PB(IIC+II)) END DO END DO - IBASE = IBASE + ILA - JBASE = JBASE + ILA + IIBASE = IIBASE + KLA + IJBASE = IJBASE + KLA !$acc end kernels END IF @@ -1018,194 +1018,195 @@ SUBROUTINE RPASSM(A,B,C,D,TRIGS,INC3,INC4,ILOT,N,IFAC,ILA,IERR,KSZ1,KSZ2,KSZ3,KS ! ! CODING FOR FACTOR 6 ! ------------------- - IA=1 - IB=IA+2*M-ILA - IC=IB+2*M - ID=IC+2*M - IE=IC - IF=IB - JA=1 - JB=JA+JINK - JC=JB+JINK - JD=JC+JINK - JE=JD+JINK - JF=JE+JINK - - IF ( ILA /= M ) THEN + IIA=1 + IIB=IIA+2*IM-KLA + IIC=IIB+2*IM + IID=IIC+2*IM + IIE=IIC + IIF=IIB + IJA=1 + IJB=IJA+JINK + IJC=IJB+JINK + IJD=IJC+JINK + IJE=IJD+JINK + IJF=IJE+JINK + + IF ( KLA /= IM ) THEN !$acc kernels !$acc loop independent - DO IL=1,ILA + DO IIL=1,KLA !CDIR$ IVDEP !!CDIR NODEP !*VOCL LOOP,NOVREC -!$acc loop independent private( I, J ) - DO IJK=1,ILOT - I = IBASE + IL - 1 + (IJK - 1 ) * INC3 - J = JBASE + IL - 1 + (IJK - 1 ) * INC4 - C(JA+J)=(A(IA+I)+A(ID+I))+(A(IB+I)+A(IC+I)) - C(JD+J)=(A(IA+I)-A(ID+I))-(A(IB+I)-A(IC+I)) - C(JB+J)=((A(IA+I)-A(ID+I))+0.5*(A(IB+I)-A(IC+I))) & - -(XSIN60*(B(IB+I)+B(IC+I))) - C(JF+J)=((A(IA+I)-A(ID+I))+0.5*(A(IB+I)-A(IC+I))) & - +(XSIN60*(B(IB+I)+B(IC+I))) - C(JC+J)=((A(IA+I)+A(ID+I))-0.5*(A(IB+I)+A(IC+I))) & - -(XSIN60*(B(IB+I)-B(IC+I))) - C(JE+J)=((A(IA+I)+A(ID+I))-0.5*(A(IB+I)+A(IC+I))) & - +(XSIN60*(B(IB+I)-B(IC+I))) +!$acc loop independent private( II, IJ ) + DO IJK=1,KLOT + II = IIBASE + IIL - 1 + (IJK - 1 ) * KINC3 + IJ = IJBASE + IIL - 1 + (IJK - 1 ) * KINC4 + PC(IJA+IJ)=(PA(IIA+II)+PA(IID+II))+(PA(IIB+II)+PA(IIC+II)) + PC(IJD+IJ)=(PA(IIA+II)-PA(IID+II))-(PA(IIB+II)-PA(IIC+II)) + PC(IJB+IJ)=((PA(IIA+II)-PA(IID+II))+0.5*(PA(IIB+II)-PA(IIC+II))) & + -(XSIN60*(PB(IIB+II)+PB(IIC+II))) + PC(IJF+IJ)=((PA(IIA+II)-PA(IID+II))+0.5*(PA(IIB+II)-PA(IIC+II))) & + +(XSIN60*(PB(IIB+II)+PB(IIC+II))) + PC(IJC+IJ)=((PA(IIA+II)+PA(IID+II))-0.5*(PA(IIB+II)+PA(IIC+II))) & + -(XSIN60*(PB(IIB+II)-PB(IIC+II))) + PC(IJE+IJ)=((PA(IIA+II)+PA(IID+II))-0.5*(PA(IIB+II)+PA(IIC+II))) & + +(XSIN60*(PB(IIB+II)-PB(IIC+II))) END DO END DO - IBASE = IBASE + ILA - JBASE = JBASE + ILA + IIBASE = IIBASE + KLA + IJBASE = IJBASE + KLA !$acc end kernels - IA=IA+IINK + IIA=IIA+IINK IINK=2*IINK - IB=IB+IINK - IC=IC+IINK - ID=ID-IINK - IE=IE-IINK - IF=IF-IINK - JBASE=JBASE+JUMP + IIB=IIB+IINK + IIC=IIC+IINK + IID=IID-IINK + IIE=IIE-IINK + IIF=IIF-IINK + IJBASE=IJBASE+JUMP JUMP=2*JUMP+JINK - IF ( IC /= ID ) THEN + IF ( IIC /= IID ) THEN !$acc kernels - JBASE0 = JBASE - IA0 = IA - IB0 = IB - IC0 = IC - ID0 = ID - IE0 = IE - IF0 = IF -!$acc loop independent private( KB, KC, KD, KE, KF, C1, S1, C2, S2, C3, S3, C4, S4, C5, S5, JBASE, IA, IB, IC, ID, IE, IF ) - DO K=ILA,KSTOP,ILA - KB=K+K - KC=KB+KB - KD=KC+KB - KE=KD+KB - KF=KE+KB - C1=TRIGS(KB+1) - S1=TRIGS(KB+2) - C2=TRIGS(KC+1) - S2=TRIGS(KC+2) - C3=TRIGS(KD+1) - S3=TRIGS(KD+2) - C4=TRIGS(KE+1) - S4=TRIGS(KE+2) - C5=TRIGS(KF+1) - S5=TRIGS(KF+2) - JBASE = JBASE0 + (K-ILA)/ILA * (ILA + JUMP ) - IA = IA0 + (K-ILA)/ILA * IINK - IB = IB0 + (K-ILA)/ILA * IINK - IC = IC0 + (K-ILA)/ILA * IINK - ID = ID0 - (K-ILA)/ILA * IINK - IE = IE0 - (K-ILA)/ILA * IINK - IF = IF0 - (K-ILA)/ILA * IINK + IJBASE0 = IJBASE + IIA0 = IIA + IIB0 = IIB + IIC0 = IIC + IID0 = IID + IIE0 = IIE + IIF0 = IIF +!$acc loop independent private( IKB, IKC, IKD, IKE, IKF, ZC1, ZS1, ZC2, ZS2, ZC3, ZS3, ZC4, ZS4, ZC5, ZS5, & +!$acc & IJBASE, IIA, IIB, IIC, IID, IIE, IIF ) + DO IIK=KLA,ISTOP,KLA + IKB=IIK+IIK + IKC=IKB+IKB + IKD=IKC+IKB + IKE=IKD+IKB + IKF=IKE+IKB + ZC1=PTRIGS(IKB+1) + ZS1=PTRIGS(IKB+2) + ZC2=PTRIGS(IKC+1) + ZS2=PTRIGS(IKC+2) + ZC3=PTRIGS(IKD+1) + ZS3=PTRIGS(IKD+2) + ZC4=PTRIGS(IKE+1) + ZS4=PTRIGS(IKE+2) + ZC5=PTRIGS(IKF+1) + ZS5=PTRIGS(IKF+2) + IJBASE = IJBASE0 + (IIK-KLA)/KLA * (KLA + JUMP ) + IIA = IIA0 + (IIK-KLA)/KLA * IINK + IIB = IIB0 + (IIK-KLA)/KLA * IINK + IIC = IIC0 + (IIK-KLA)/KLA * IINK + IID = IID0 - (IIK-KLA)/KLA * IINK + IIE = IIE0 - (IIK-KLA)/KLA * IINK + IIF = IIF0 - (IIK-KLA)/KLA * IINK !$acc loop independent - DO IL=1,ILA + DO IIL=1,KLA !CDIR$ IVDEP !!CDIR NODEP !*VOCL LOOP,NOVREC -!$acc loop independent private( I, J, A11, A20, A21, B11, B20, B21 ) - DO IJK=1,ILOT - I = IL - 1 + (IJK - 1 ) * INC3 - J = JBASE + IL - 1 + (IJK - 1 ) * INC4 - - A11= (A(IE+I)+A(IB+I))+(A(IC+I)+A(IF+I)) - A20=(A(IA+I)+A(ID+I))-0.5*A11 - A21=XSIN60*((A(IE+I)+A(IB+I))-(A(IC+I)+A(IF+I))) - B11= (B(IB+I)-B(IE+I))+(B(IC+I)-B(IF+I)) - B20=(B(IA+I)-B(ID+I))-0.5*B11 - B21=XSIN60*((B(IB+I)-B(IE+I))-(B(IC+I)-B(IF+I))) - - C(JA+J)=(A(IA+I)+A(ID+I))+A11 - D(JA+J)=(B(IA+I)-B(ID+I))+B11 - C(JC+J)=C2*(A20-B21)-S2*(B20+A21) - D(JC+J)=S2*(A20-B21)+C2*(B20+A21) - C(JE+J)=C4*(A20+B21)-S4*(B20-A21) - D(JE+J)=S4*(A20+B21)+C4*(B20-A21) - - A11=(A(IE+I)-A(IB+I))+(A(IC+I)-A(IF+I)) - B11=(B(IE+I)+B(IB+I))-(B(IC+I)+B(IF+I)) - A20=(A(IA+I)-A(ID+I))-0.5*A11 - A21=XSIN60*((A(IE+I)-A(IB+I))-(A(IC+I)-A(IF+I))) - B20=(B(IA+I)+B(ID+I))+0.5*B11 - B21=XSIN60*((B(IE+I)+B(IB+I))+(B(IC+I)+B(IF+I))) - - C(JD+J)= & - C3*((A(IA+I)-A(ID+I))+A11)-S3*((B(IA+I)+B(ID+I))-B11) - D(JD+J)= & - S3*((A(IA+I)-A(ID+I))+A11)+C3*((B(IA+I)+B(ID+I))-B11) - C(JB+J)=C1*(A20-B21)-S1*(B20-A21) - D(JB+J)=S1*(A20-B21)+C1*(B20-A21) - C(JF+J)=C5*(A20+B21)-S5*(B20+A21) - D(JF+J)=S5*(A20+B21)+C5*(B20+A21) +!$acc loop independent private( II, IJ, ZA11, ZA20, ZA21, ZB11, ZB20, ZB21 ) + DO IJK=1,KLOT + II = IIL - 1 + (IJK - 1 ) * KINC3 + IJ = IJBASE + IIL - 1 + (IJK - 1 ) * KINC4 + + ZA11= (PA(IIE+II)+PA(IIB+II))+(PA(IIC+II)+PA(IIF+II)) + ZA20=(PA(IIA+II)+PA(IID+II))-0.5*ZA11 + ZA21=XSIN60*((PA(IIE+II)+PA(IIB+II))-(PA(IIC+II)+PA(IIF+II))) + ZB11= (PB(IIB+II)-PB(IIE+II))+(PB(IIC+II)-PB(IIF+II)) + ZB20=(PB(IIA+II)-PB(IID+II))-0.5*ZB11 + ZB21=XSIN60*((PB(IIB+II)-PB(IIE+II))-(PB(IIC+II)-PB(IIF+II))) + + PC(IJA+IJ)=(PA(IIA+II)+PA(IID+II))+ZA11 + PD(IJA+IJ)=(PB(IIA+II)-PB(IID+II))+ZB11 + PC(IJC+IJ)=ZC2*(ZA20-ZB21)-ZS2*(ZB20+ZA21) + PD(IJC+IJ)=ZS2*(ZA20-ZB21)+ZC2*(ZB20+ZA21) + PC(IJE+IJ)=ZC4*(ZA20+ZB21)-ZS4*(ZB20-ZA21) + PD(IJE+IJ)=ZS4*(ZA20+ZB21)+ZC4*(ZB20-ZA21) + + ZA11=(PA(IIE+II)-PA(IIB+II))+(PA(IIC+II)-PA(IIF+II)) + ZB11=(PB(IIE+II)+PB(IIB+II))-(PB(IIC+II)+PB(IIF+II)) + ZA20=(PA(IIA+II)-PA(IID+II))-0.5*ZA11 + ZA21=XSIN60*((PA(IIE+II)-PA(IIB+II))-(PA(IIC+II)-PA(IIF+II))) + ZB20=(PB(IIA+II)+PB(IID+II))+0.5*ZB11 + ZB21=XSIN60*((PB(IIE+II)+PB(IIB+II))+(PB(IIC+II)+PB(IIF+II))) + + PC(IJD+IJ)= & + ZC3*((PA(IIA+II)-PA(IID+II))+ZA11)-ZS3*((PB(IIA+II)+PB(IID+II))-ZB11) + PD(IJD+IJ)= & + ZS3*((PA(IIA+II)-PA(IID+II))+ZA11)+ZC3*((PB(IIA+II)+PB(IID+II))-ZB11) + PC(IJB+IJ)=ZC1*(ZA20-ZB21)-ZS1*(ZB20-ZA21) + PD(IJB+IJ)=ZS1*(ZA20-ZB21)+ZC1*(ZB20-ZA21) + PC(IJF+IJ)=ZC5*(ZA20+ZB21)-ZS5*(ZB20+ZA21) + PD(IJF+IJ)=ZS5*(ZA20+ZB21)+ZC5*(ZB20+ZA21) END DO END DO END DO - JBASE = JBASE0 + ((KSTOP-ILA)/ILA+1) * ( ILA + JUMP ) - IA = IA0 + ((KSTOP-ILA)/ILA+1) * IINK - IB = IB0 + ((KSTOP-ILA)/ILA+1) * IINK - IC = IC0 + ((KSTOP-ILA)/ILA+1) * IINK - ID = ID0 - ((KSTOP-ILA)/ILA+1) * IINK - IE = IE0 - ((KSTOP-ILA)/ILA+1) * IINK - IF = IF0 - ((KSTOP-ILA)/ILA+1) * IINK + IJBASE = IJBASE0 + ((ISTOP-KLA)/KLA+1) * ( KLA + JUMP ) + IIA = IIA0 + ((ISTOP-KLA)/KLA+1) * IINK + IIB = IIB0 + ((ISTOP-KLA)/KLA+1) * IINK + IIC = IIC0 + ((ISTOP-KLA)/KLA+1) * IINK + IID = IID0 - ((ISTOP-KLA)/KLA+1) * IINK + IIE = IIE0 - ((ISTOP-KLA)/KLA+1) * IINK + IIF = IIF0 - ((ISTOP-KLA)/KLA+1) * IINK !$acc end kernels END IF - IF ( IC <= ID ) THEN + IF ( IIC <= IID ) THEN !$acc kernels - IBASE=0 + IIBASE=0 !$acc loop independent - DO IL=1,ILA + DO IIL=1,KLA !CDIR$ IVDEP !!CDIR NODEP !*VOCL LOOP,NOVREC -!$acc loop independent private( I, J ) - DO IJK=1,ILOT - I = IBASE + IL - 1 + (IJK - 1 ) * INC3 - J = JBASE + IL - 1 + (IJK - 1 ) * INC4 - C(JA+J)=A(IB+I)+(A(IA+I)+A(IC+I)) - C(JD+J)=B(IB+I)-(B(IA+I)+B(IC+I)) - C(JB+J)=(XSIN60*(A(IA+I)-A(IC+I)))-(0.5*(B(IA+I)+B(IC+I))+B(IB+ & - I)) - C(JF+J)=-(XSIN60*(A(IA+I)-A(IC+I)))-(0.5*(B(IA+I)+B(IC+I))+B(IB+ & - I)) - C(JC+J)=XSIN60*(B(IC+I)-B(IA+I))+(0.5*(A(IA+I)+A(IC+I))-A(IB+I)) - C(JE+J)=XSIN60*(B(IC+I)-B(IA+I))-(0.5*(A(IA+I)+A(IC+I))-A(IB+I)) +!$acc loop independent private( II, IJ ) + DO IJK=1,KLOT + II = IIBASE + IIL - 1 + (IJK - 1 ) * KINC3 + IJ = IJBASE + IIL - 1 + (IJK - 1 ) * KINC4 + PC(IJA+IJ)=PA(IIB+II)+(PA(IIA+II)+PA(IIC+II)) + PC(IJD+IJ)=PB(IIB+II)-(PB(IIA+II)+PB(IIC+II)) + PC(IJB+IJ)=(XSIN60*(PA(IIA+II)-PA(IIC+II)))-(0.5*(PB(IIA+II)+PB(IIC+II))+PB(IIB+ & + II)) + PC(IJF+IJ)=-(XSIN60*(PA(IIA+II)-PA(IIC+II)))-(0.5*(PB(IIA+II)+PB(IIC+II))+PB(IIB+ & + II)) + PC(IJC+IJ)=XSIN60*(PB(IIC+II)-PB(IIA+II))+(0.5*(PA(IIA+II)+PA(IIC+II))-PA(IIB+II)) + PC(IJE+IJ)=XSIN60*(PB(IIC+II)-PB(IIA+II))-(0.5*(PA(IIA+II)+PA(IIC+II))-PA(IIB+II)) END DO END DO - IBASE = IBASE + ILA - JBASE = JBASE + ILA + IIBASE = IIBASE + KLA + IJBASE = IJBASE + KLA !$acc end kernels END IF ELSE !$acc kernels - SSIN60=2.0*XSIN60 + ZSSIN60=2.0*XSIN60 !$acc loop independent - DO IL=1,ILA + DO IIL=1,KLA !CDIR$ IVDEP !!CDIR NODEP !*VOCL LOOP,NOVREC -!$acc loop independent private( I, J ) - DO IJK=1,ILOT - I = IBASE + IL - 1 + (IJK - 1 ) * INC3 - J = JBASE + IL - 1 + (IJK - 1 ) * INC4 - C(JA+J)=(2.0*(A(IA+I)+A(ID+I)))+(2.0*(A(IB+I)+A(IC+I))) - C(JD+J)=(2.0*(A(IA+I)-A(ID+I)))-(2.0*(A(IB+I)-A(IC+I))) - C(JB+J)=(2.0*(A(IA+I)-A(ID+I))+(A(IB+I)-A(IC+I))) & - -(SSIN60*(B(IB+I)+B(IC+I))) - C(JF+J)=(2.0*(A(IA+I)-A(ID+I))+(A(IB+I)-A(IC+I))) & - +(SSIN60*(B(IB+I)+B(IC+I))) - C(JC+J)=(2.0*(A(IA+I)+A(ID+I))-(A(IB+I)+A(IC+I))) & - -(SSIN60*(B(IB+I)-B(IC+I))) - C(JE+J)=(2.0*(A(IA+I)+A(ID+I))-(A(IB+I)+A(IC+I))) & - +(SSIN60*(B(IB+I)-B(IC+I))) +!$acc loop independent private( II, IJ ) + DO IJK=1,KLOT + II = IIBASE + IIL - 1 + (IJK - 1 ) * KINC3 + IJ = IJBASE + IIL - 1 + (IJK - 1 ) * KINC4 + PC(IJA+IJ)=(2.0*(PA(IIA+II)+PA(IID+II)))+(2.0*(PA(IIB+II)+PA(IIC+II))) + PC(IJD+IJ)=(2.0*(PA(IIA+II)-PA(IID+II)))-(2.0*(PA(IIB+II)-PA(IIC+II))) + PC(IJB+IJ)=(2.0*(PA(IIA+II)-PA(IID+II))+(PA(IIB+II)-PA(IIC+II))) & + -(ZSSIN60*(PB(IIB+II)+PB(IIC+II))) + PC(IJF+IJ)=(2.0*(PA(IIA+II)-PA(IID+II))+(PA(IIB+II)-PA(IIC+II))) & + +(ZSSIN60*(PB(IIB+II)+PB(IIC+II))) + PC(IJC+IJ)=(2.0*(PA(IIA+II)+PA(IID+II))-(PA(IIB+II)+PA(IIC+II))) & + -(ZSSIN60*(PB(IIB+II)-PB(IIC+II))) + PC(IJE+IJ)=(2.0*(PA(IIA+II)+PA(IID+II))-(PA(IIB+II)+PA(IIC+II))) & + +(ZSSIN60*(PB(IIB+II)-PB(IIC+II))) END DO END DO - IBASE = IBASE + ILA - JBASE = JBASE + ILA + IIBASE = IIBASE + KLA + IJBASE = IJBASE + KLA !$acc end kernels END IF @@ -1216,53 +1217,53 @@ SUBROUTINE RPASSM(A,B,C,D,TRIGS,INC3,INC4,ILOT,N,IFAC,ILA,IERR,KSZ1,KSZ2,KSZ3,KS ! ! CODING FOR FACTOR 8 ! ------------------- - IF ( ILA /= M ) THEN + IF ( KLA /= IM ) THEN IBAD = 3 ELSE !$acc kernels - IA=1 - IB=IA+ILA - IC=IB+2*ILA - ID=IC+2*ILA - IE=ID+2*ILA - JA=1 - JB=JA+JINK - JC=JB+JINK - JD=JC+JINK - JE=JD+JINK - JF=JE+JINK - JG=JF+JINK - JH=JG+JINK - SSIN45=SQRT(2.0) + IIA=1 + IIB=IIA+KLA + IIC=IIB+2*KLA + IID=IIC+2*KLA + IIE=IID+2*KLA + IJA=1 + IJB=IJA+JINK + IJC=IJB+JINK + IJD=IJC+JINK + IJE=IJD+JINK + IJF=IJE+JINK + IJG=IJF+JINK + IJH=IJG+JINK + ZSSIN45=SQRT(2.0) !$acc loop independent - DO IL=1,ILA + DO IIL=1,KLA !CDIR$ IVDEP !!CDIR NODEP !*VOCL LOOP,NOVREC -!$acc loop independent private( I, J ) - DO IJK=1,ILOT - I = IBASE + IL - 1 + (IJK - 1 ) * INC3 - J = JBASE + IL - 1 + (IJK - 1 ) * INC4 - C(JA+J)=2.0*(((A(IA+I)+A(IE+I))+A(IC+I))+(A(IB+I)+A(ID+I))) - C(JE+J)=2.0*(((A(IA+I)+A(IE+I))+A(IC+I))-(A(IB+I)+A(ID+I))) - C(JC+J)=2.0*(((A(IA+I)+A(IE+I))-A(IC+I))-(B(IB+I)-B(ID+I))) - C(JG+J)=2.0*(((A(IA+I)+A(IE+I))-A(IC+I))+(B(IB+I)-B(ID+I))) - C(JB+J)=2.0*((A(IA+I)-A(IE+I))-B(IC+I)) & - +SSIN45*((A(IB+I)-A(ID+I))-(B(IB+I)+B(ID+I))) - C(JF+J)=2.0*((A(IA+I)-A(IE+I))-B(IC+I)) & - -SSIN45*((A(IB+I)-A(ID+I))-(B(IB+I)+B(ID+I))) - C(JD+J)=2.0*((A(IA+I)-A(IE+I))+B(IC+I)) & - -SSIN45*((A(IB+I)-A(ID+I))+(B(IB+I)+B(ID+I))) - C(JH+J)=2.0*((A(IA+I)-A(IE+I))+B(IC+I)) & - +SSIN45*((A(IB+I)-A(ID+I))+(B(IB+I)+B(ID+I))) +!$acc loop independent private( II, IJ ) + DO IJK=1,KLOT + II = IIBASE + IIL - 1 + (IJK - 1 ) * KINC3 + IJ = IJBASE + IIL - 1 + (IJK - 1 ) * KINC4 + PC(IJA+IJ)=2.0*(((PA(IIA+II)+PA(IIE+II))+PA(IIC+II))+(PA(IIB+II)+PA(IID+II))) + PC(IJE+IJ)=2.0*(((PA(IIA+II)+PA(IIE+II))+PA(IIC+II))-(PA(IIB+II)+PA(IID+II))) + PC(IJC+IJ)=2.0*(((PA(IIA+II)+PA(IIE+II))-PA(IIC+II))-(PB(IIB+II)-PB(IID+II))) + PC(IJG+IJ)=2.0*(((PA(IIA+II)+PA(IIE+II))-PA(IIC+II))+(PB(IIB+II)-PB(IID+II))) + PC(IJB+IJ)=2.0*((PA(IIA+II)-PA(IIE+II))-PB(IIC+II)) & + +ZSSIN45*((PA(IIB+II)-PA(IID+II))-(PB(IIB+II)+PB(IID+II))) + PC(IJF+IJ)=2.0*((PA(IIA+II)-PA(IIE+II))-PB(IIC+II)) & + -ZSSIN45*((PA(IIB+II)-PA(IID+II))-(PB(IIB+II)+PB(IID+II))) + PC(IJD+IJ)=2.0*((PA(IIA+II)-PA(IIE+II))+PB(IIC+II)) & + -ZSSIN45*((PA(IIB+II)-PA(IID+II))+(PB(IIB+II)+PB(IID+II))) + PC(IJH+IJ)=2.0*((PA(IIA+II)-PA(IIE+II))+PB(IIC+II)) & + +ZSSIN45*((PA(IIB+II)-PA(IID+II))+(PB(IIB+II)+PB(IID+II))) END DO END DO - IBASE = IBASE + ILA - JBASE = JBASE + ILA + IIBASE = IIBASE + KLA + IJBASE = IJBASE + KLA !$acc end kernels IBAD=0 - END IF !ILA==M + END IF !KLA==M CASE DEFAULT CALL PRINT_MSG( NVERB_ERROR, 'GEN', 'RPASSM', 'invalid factor' ) @@ -1272,7 +1273,7 @@ SUBROUTINE RPASSM(A,B,C,D,TRIGS,INC3,INC4,ILOT,N,IFAC,ILA,IERR,KSZ1,KSZ2,KSZ3,KS ! RETURN ! ------ 910 continue - IERR=IBAD + KERR=IBAD !$acc end data @@ -1281,180 +1282,180 @@ END SUBROUTINE RPASSM -SUBROUTINE QPASSM(A,B,C,D,TRIGS,INC3,INC4,ILOT,N,IFAC,ILA,IERR,KSZ1,KSZ2,KSZ3,KSZ4,KSZ5) +SUBROUTINE QPASSM(PA,PB,PC,PD,PTRIGS,KINC3,KINC4,KLOT,KN,KFAC,KLA,KERR,KSZ1,KSZ2,KSZ3,KSZ4,KSZ5) IMPLICIT NONE - REAL, DIMENSION(KSZ1), INTENT(IN) :: A - REAL, DIMENSION(KSZ2), INTENT(IN) :: B - REAL, DIMENSION(KSZ3), INTENT(INOUT) :: C - REAL, DIMENSION(KSZ4), INTENT(INOUT) :: D - REAL, DIMENSION(KSZ5), INTENT(IN) :: TRIGS - INTEGER, INTENT(IN) :: INC3, INC4, ILOT, N, IFAC, ILA - INTEGER, INTENT(OUT) :: IERR + REAL, DIMENSION(KSZ1), INTENT(IN) :: PA + REAL, DIMENSION(KSZ2), INTENT(IN) :: PB + REAL, DIMENSION(KSZ3), INTENT(INOUT) :: PC + REAL, DIMENSION(KSZ4), INTENT(INOUT) :: PD + REAL, DIMENSION(KSZ5), INTENT(IN) :: PTRIGS + INTEGER, INTENT(IN) :: KINC3, KINC4, KLOT, KN, KFAC, KLA + INTEGER, INTENT(OUT) :: KERR INTEGER, INTENT(IN) :: KSZ1, KSZ2, KSZ3, KSZ4, KSZ5 ! ! SUBROUTINE 'QPASSM' - PERFORMS ONE PASS THROUGH DATA AS PART ! OF MULTIPLE REAL FFT (FOURIER ANALYSIS) ROUTINE ! -! A IS FIRST REAL INPUT VECTOR -! EQUIVALENCE B(1) WITH A(IFAC*ILA+1) -! C IS FIRST REAL OUTPUT VECTOR -! EQUIVALENCE D(1) WITH C(ILA+1) -! TRIGS IS A PRECALCULATED LIST OF SINES & COSINES -! INC3 IS THE INCREMENT BETWEEN INPUT VECTORS A -! INC4 IS THE INCREMENT BETWEEN OUTPUT VECTORS C -! ILOT IS THE NUMBER OF VECTORS -! N IS THE LENGTH OF THE VECTORS -! IFAC IS THE CURRENT FACTOR OF N -! ILA = N/(PRODUCT OF FACTORS USED SO FAR) -! IERR IS AN ERROR INDICATOR: +! PA IS FIRST REAL INPUT VECTOR +! EQUIVALENCE PB(1) WITH PA(KFAC*KLA+1) +! PC IS FIRST REAL OUTPUT VECTOR +! EQUIVALENCE PD(1) WITH PC(KLA+1) +! PTRIGS IS PA PRECALCULATED LIST OF SINES & COSINES +! KINC3 IS THE INCREMENT BETWEEN INPUT VECTORS PA +! KINC4 IS THE INCREMENT BETWEEN OUTPUT VECTORS PC +! KLOT IS THE NUMBER OF VECTORS +! KN IS THE LENGTH OF THE VECTORS +! KFAC IS THE CURRENT FACTOR OF KN +! KLA = KN/(PRODUCT OF FACTORS USED SO FAR) +! KERR IS AN ERROR INDICATOR: ! 0 - PASS COMPLETED WITHOUT ERROR -! 1 - ILOT GREATER THAN NVECLEN -! 2 - IFAC NOT CATERED FOR -! 3 - IFAC ONLY CATERED FOR IF ILA=N/IFAC +! 1 - KLOT GREATER THAN NVECLEN +! 2 - KFAC NOT CATERED FOR +! 3 - KFAC ONLY CATERED FOR IF KLA=KN/KFAC ! !----------------------------------------------------------------------- ! - INTEGER :: M, IINK, JINK, IJUMP, KSTOP - INTEGER :: IBAD, IBASE, JBASE, IGO - INTEGER :: I, J, IA, IB, JA, JB - INTEGER :: jL, IJK, K, KB, IC, JC, KC - INTEGER :: ID, JD, KD, IE, JE, KE, IF, JF, KF, IG, IH - INTEGER :: IBASE0, JA0, JB0, JC0, JD0, JE0, JF0 - REAL :: C1, C2, C3, C4, C5, S1, S2, S3, S4, S5, Z - REAL :: A0, A1, A2, A3, A4, A5, A6, A10, A11, A20, A21 - REAL :: B0, B1, B2, B3, B4, B5, B6, B10, B11, B20, B21 - REAL :: SIN45, ZQRT5, ZSIN36, ZSIN45, ZSIN60, ZSIN72 + INTEGER :: IM, IINK, JINK, IJUMP, ISTOP + INTEGER :: IBAD, IIBASE, IJBASE, IGO + INTEGER :: II, IJ, IIA, IIB, IJA, IJB + INTEGER :: IJL, IJK, IIK, IKB, IIC, IJC, IKC + INTEGER :: IID, IJD, IKD, IIE, IJE, IKE, IIF, IJF, IKF, IIG, IIH + INTEGER :: IBASE0, IJA0, IJB0, IJC0, IJD0, IJE0, IJF0 + REAL :: ZC1, ZC2, ZC3, ZC4, ZC5, ZS1, ZS2, ZS3, ZS4, ZS5, ZZ + REAL :: ZA0, ZA1, ZA2, ZA3, ZA4, ZA5, ZA6, ZA10, ZA11, ZA20, ZA21 + REAL :: ZB0, ZB1, ZB2, ZB3, ZB4, ZB5, ZB6, ZB10, ZB11, ZB20, ZB21 + REAL :: ZSIN45, ZZQRT5, ZZSIN36, ZZSIN45, ZZSIN60, ZZSIN72 -!$acc data present( A, B, C, D ) copyin( TRIGS ) +!$acc data present( PA, PB, PC, PD ) copyin( PTRIGS ) - M=N/IFAC - IINK=ILA - JINK=ILA - IJUMP=(IFAC-1)*IINK - KSTOP=(N-IFAC)/(2*IFAC) + IM=KN/KFAC + IINK=KLA + JINK=KLA + IJUMP=(KFAC-1)*IINK + ISTOP=(KN-KFAC)/(2*KFAC) IBAD=1 - IF (ILOT.GT.NVECLEN) GO TO 910 - IBASE=0 - JBASE=0 - IGO=IFAC-1 + IF (KLOT.GT.NVECLEN) GO TO 910 + IIBASE=0 + IJBASE=0 + IGO=KFAC-1 IF (IGO.EQ.7) IGO=6 IBAD=2 IF (IGO.LT.1.OR.IGO.GT.6) GO TO 910 - SELECT CASE( IFAC ) + SELECT CASE( KFAC ) ! ! CODING FOR FACTOR 2 ! ------------------- CASE ( 2 ) - IA=1 - IB=IA+IINK - JA=1 - JB=JA+2*M-ILA + IIA=1 + IIB=IIA+IINK + IJA=1 + IJB=IJA+2*IM-KLA - IF ( ILA /= M ) THEN + IF ( KLA /= IM ) THEN !$acc kernels !$acc loop independent - DO JL=1,ILA + DO IJL=1,KLA !CDIR$ IVDEP !!CDIR NODEP !*VOCL LOOP,NOVREC -!$acc loop independent private( I, J ) - DO IJK=1,ILOT - I = IBASE + JL - 1 + (IJK - 1 ) * INC3 - J = JBASE + JL - 1 + (IJK - 1 ) * INC4 - C(JA+J)=A(IA+I)+A(IB+I) - C(JB+J)=A(IA+I)-A(IB+I) +!$acc loop independent private( II, IJ ) + DO IJK=1,KLOT + II = IIBASE + IJL - 1 + (IJK - 1 ) * KINC3 + IJ = IJBASE + IJL - 1 + (IJK - 1 ) * KINC4 + PC(IJA+IJ)=PA(IIA+II)+PA(IIB+II) + PC(IJB+IJ)=PA(IIA+II)-PA(IIB+II) END DO END DO - IBASE = IBASE + ILA - JBASE = JBASE + ILA + IIBASE = IIBASE + KLA + IJBASE = IJBASE + KLA !$acc end kernels - JA=JA+JINK + IJA=IJA+JINK JINK=2*JINK - JB=JB-JINK - IBASE=IBASE+IJUMP + IJB=IJB-JINK + IIBASE=IIBASE+IJUMP IJUMP=2*IJUMP+IINK - IF ( JA /= JB ) THEN + IF ( IJA /= IJB ) THEN !$acc kernels - IBASE0 = IBASE - JA0 = JA - JB0 = JB -!$acc loop independent private( KB, C1, S1, IBASE, JA, JB ) - DO K=ILA,KSTOP,ILA - KB=K+K - C1=TRIGS(KB+1) - S1=TRIGS(KB+2) - IBASE = IBASE0 + (K-ILA)/ILA * (ILA + IJUMP ) - JA = JA0 + (K-ILA)/ILA * JINK - JB = JB0 - (K-ILA)/ILA * JINK + IBASE0 = IIBASE + IJA0 = IJA + IJB0 = IJB +!$acc loop independent private( IKB, ZC1, ZS1, IIBASE, IJA, IJB ) + DO IIK=KLA,ISTOP,KLA + IKB=IIK+IIK + ZC1=PTRIGS(IKB+1) + ZS1=PTRIGS(IKB+2) + IIBASE = IBASE0 + (IIK-KLA)/KLA * (KLA + IJUMP ) + IJA = IJA0 + (IIK-KLA)/KLA * JINK + IJB = IJB0 - (IIK-KLA)/KLA * JINK !$acc loop independent - DO JL=1,ILA + DO IJL=1,KLA !CDIR$ IVDEP !!CDIR NODEP !*VOCL LOOP,NOVREC -!$acc loop independent private( I, J ) - DO IJK=1,ILOT - I = IBASE + JL - 1 + (IJK - 1 ) * INC3 - J = JL - 1 + (IJK - 1 ) * INC4 - C(JA+J)=A(IA+I)+(C1*A(IB+I)+S1*B(IB+I)) - C(JB+J)=A(IA+I)-(C1*A(IB+I)+S1*B(IB+I)) - D(JA+J)=(C1*B(IB+I)-S1*A(IB+I))+B(IA+I) - D(JB+J)=(C1*B(IB+I)-S1*A(IB+I))-B(IA+I) +!$acc loop independent private( II, IJ ) + DO IJK=1,KLOT + II = IIBASE + IJL - 1 + (IJK - 1 ) * KINC3 + IJ = IJL - 1 + (IJK - 1 ) * KINC4 + PC(IJA+IJ)=PA(IIA+II)+(ZC1*PA(IIB+II)+ZS1*PB(IIB+II)) + PC(IJB+IJ)=PA(IIA+II)-(ZC1*PA(IIB+II)+ZS1*PB(IIB+II)) + PD(IJA+IJ)=(ZC1*PB(IIB+II)-ZS1*PA(IIB+II))+PB(IIA+II) + PD(IJB+IJ)=(ZC1*PB(IIB+II)-ZS1*PA(IIB+II))-PB(IIA+II) END DO END DO END DO - IBASE = IBASE0 + ((KSTOP-ILA)/ILA+1) * ( ILA + IJUMP ) - JA = JA0 + ((KSTOP-ILA)/ILA+1) * JINK - JB = JB0 - ((KSTOP-ILA)/ILA+1) * JINK + IIBASE = IBASE0 + ((ISTOP-KLA)/KLA+1) * ( KLA + IJUMP ) + IJA = IJA0 + ((ISTOP-KLA)/KLA+1) * JINK + IJB = IJB0 - ((ISTOP-KLA)/KLA+1) * JINK !$acc end kernels END IF - IF ( JA <= JB ) THEN + IF ( IJA <= IJB ) THEN !$acc kernels - JBASE=0 + IJBASE=0 !$acc loop independent - DO JL=1,ILA + DO IJL=1,KLA !CDIR$ IVDEP !!CDIR NODEP !*VOCL LOOP,NOVREC -!$acc loop independent private( I, J ) - DO IJK=1,ILOT - I = IBASE + JL - 1 + (IJK - 1 ) * INC3 - J = JBASE + JL - 1 + (IJK - 1 ) * INC4 - C(JA+J)=A(IA+I) - D(JA+J)=-A(IB+I) +!$acc loop independent private( II, IJ ) + DO IJK=1,KLOT + II = IIBASE + IJL - 1 + (IJK - 1 ) * KINC3 + IJ = IJBASE + IJL - 1 + (IJK - 1 ) * KINC4 + PC(IJA+IJ)=PA(IIA+II) + PD(IJA+IJ)=-PA(IIB+II) END DO END DO - IBASE = IBASE + ILA - JBASE = JBASE + ILA + IIBASE = IIBASE + KLA + IJBASE = IJBASE + KLA !$acc end kernels END IF ELSE !$acc kernels - Z=1.0/REAL(N) + ZZ=1.0/REAL(KN) !$acc loop independent - DO JL=1,ILA + DO IJL=1,KLA !CDIR$ IVDEP !!CDIR NODEP !*VOCL LOOP,NOVREC -!$acc loop independent private( I, J ) - DO IJK=1,ILOT - I = IBASE + JL - 1 + (IJK - 1 ) * INC3 - J = JBASE + JL - 1 + (IJK - 1 ) * INC4 - C(JA+J)=Z*(A(IA+I)+A(IB+I)) - C(JB+J)=Z*(A(IA+I)-A(IB+I)) +!$acc loop independent private( II, IJ ) + DO IJK=1,KLOT + II = IIBASE + IJL - 1 + (IJK - 1 ) * KINC3 + IJ = IJBASE + IJL - 1 + (IJK - 1 ) * KINC4 + PC(IJA+IJ)=ZZ*(PA(IIA+II)+PA(IIB+II)) + PC(IJB+IJ)=ZZ*(PA(IIA+II)-PA(IIB+II)) END DO END DO - IBASE = IBASE + ILA - JBASE = JBASE + ILA + IIBASE = IIBASE + KLA + IJBASE = IJBASE + KLA !$acc end kernels END IF @@ -1467,131 +1468,131 @@ SUBROUTINE QPASSM(A,B,C,D,TRIGS,INC3,INC4,ILOT,N,IFAC,ILA,IERR,KSZ1,KSZ2,KSZ3,KS ! ! CODING FOR FACTOR 3 !C ------------------- - IA=1 - IB=IA+IINK - IC=IB+IINK - JA=1 - JB=JA+2*M-ILA - JC=JB + IIA=1 + IIB=IIA+IINK + IIC=IIB+IINK + IJA=1 + IJB=IJA+2*IM-KLA + IJC=IJB - IF ( ILA /= M ) THEN + IF ( KLA /= IM ) THEN !$acc kernels !$acc loop independent - DO JL=1,ILA + DO IJL=1,KLA !CDIR$ IVDEP !!CDIR NODEP !*VOCL LOOP,NOVREC -!$acc loop independent private( I, J ) - DO IJK=1,ILOT - I = IBASE + JL - 1 + (IJK - 1 ) * INC3 - J = JBASE + JL - 1 + (IJK - 1 ) * INC4 - C(JA+J)=A(IA+I)+(A(IB+I)+A(IC+I)) - C(JB+J)=A(IA+I)-0.5*(A(IB+I)+A(IC+I)) - D(JB+J)=XSIN60*(A(IC+I)-A(IB+I)) +!$acc loop independent private( II, IJ ) + DO IJK=1,KLOT + II = IIBASE + IJL - 1 + (IJK - 1 ) * KINC3 + IJ = IJBASE + IJL - 1 + (IJK - 1 ) * KINC4 + PC(IJA+IJ)=PA(IIA+II)+(PA(IIB+II)+PA(IIC+II)) + PC(IJB+IJ)=PA(IIA+II)-0.5*(PA(IIB+II)+PA(IIC+II)) + PD(IJB+IJ)=XSIN60*(PA(IIC+II)-PA(IIB+II)) END DO END DO - IBASE = IBASE + ILA - JBASE = JBASE + ILA + IIBASE = IIBASE + KLA + IJBASE = IJBASE + KLA !$acc end kernels - JA=JA+JINK + IJA=IJA+JINK JINK=2*JINK - JB=JB+JINK - JC=JC-JINK - IBASE=IBASE+IJUMP + IJB=IJB+JINK + IJC=IJC-JINK + IIBASE=IIBASE+IJUMP IJUMP=2*IJUMP+IINK - IF ( JA /= JC ) THEN + IF ( IJA /= IJC ) THEN !$acc kernels - IBASE0 = IBASE - JA0 = JA - JB0 = JB - JC0 = JC -!$acc loop independent private( KB, KC, C1, S1, C2, S2, IBASE, JA, JB, JC ) - DO K=ILA,KSTOP,ILA - KB=K+K - KC=KB+KB - C1=TRIGS(KB+1) - S1=TRIGS(KB+2) - C2=TRIGS(KC+1) - S2=TRIGS(KC+2) - IBASE = IBASE0 + (K-ILA)/ILA * (ILA + IJUMP ) - JA = JA0 + (K-ILA)/ILA * JINK - JB = JB0 + (K-ILA)/ILA * JINK - JC = JC0 - (K-ILA)/ILA * JINK + IBASE0 = IIBASE + IJA0 = IJA + IJB0 = IJB + IJC0 = IJC +!$acc loop independent private( IKB, IKC, ZC1, ZS1, ZC2, ZS2, IIBASE, IJA, IJB, IJC ) + DO IIK=KLA,ISTOP,KLA + IKB=IIK+IIK + IKC=IKB+IKB + ZC1=PTRIGS(IKB+1) + ZS1=PTRIGS(IKB+2) + ZC2=PTRIGS(IKC+1) + ZS2=PTRIGS(IKC+2) + IIBASE = IBASE0 + (IIK-KLA)/KLA * (KLA + IJUMP ) + IJA = IJA0 + (IIK-KLA)/KLA * JINK + IJB = IJB0 + (IIK-KLA)/KLA * JINK + IJC = IJC0 - (IIK-KLA)/KLA * JINK !$acc loop independent - DO JL=1,ILA + DO IJL=1,KLA !CDIR$ IVDEP !!CDIR NODEP !*VOCL LOOP,NOVREC -!$acc loop independent private( I, J, A1, A2, A3, B1, B2, B3 ) - DO IJK=1,ILOT - I = IBASE + JL - 1 + (IJK - 1 ) * INC3 - J = JL - 1 + (IJK - 1 ) * INC4 - A1=(C1*A(IB+I)+S1*B(IB+I))+(C2*A(IC+I)+S2*B(IC+I)) - B1=(C1*B(IB+I)-S1*A(IB+I))+(C2*B(IC+I)-S2*A(IC+I)) - A2=A(IA+I)-0.5*A1 - B2=B(IA+I)-0.5*B1 - A3=XSIN60*((C1*A(IB+I)+S1*B(IB+I))-(C2*A(IC+I)+S2*B(IC+I))) - B3=XSIN60*((C1*B(IB+I)-S1*A(IB+I))-(C2*B(IC+I)-S2*A(IC+I))) - C(JA+J)=A(IA+I)+A1 - D(JA+J)=B(IA+I)+B1 - C(JB+J)=A2+B3 - D(JB+J)=B2-A3 - C(JC+J)=A2-B3 - D(JC+J)=-(B2+A3) +!$acc loop independent private( II, IJ, ZA1, ZA2, ZA3, ZB1, ZB2, ZB3 ) + DO IJK=1,KLOT + II = IIBASE + IJL - 1 + (IJK - 1 ) * KINC3 + IJ = IJL - 1 + (IJK - 1 ) * KINC4 + ZA1=(ZC1*PA(IIB+II)+ZS1*PB(IIB+II))+(ZC2*PA(IIC+II)+ZS2*PB(IIC+II)) + ZB1=(ZC1*PB(IIB+II)-ZS1*PA(IIB+II))+(ZC2*PB(IIC+II)-ZS2*PA(IIC+II)) + ZA2=PA(IIA+II)-0.5*ZA1 + ZB2=PB(IIA+II)-0.5*ZB1 + ZA3=XSIN60*((ZC1*PA(IIB+II)+ZS1*PB(IIB+II))-(ZC2*PA(IIC+II)+ZS2*PB(IIC+II))) + ZB3=XSIN60*((ZC1*PB(IIB+II)-ZS1*PA(IIB+II))-(ZC2*PB(IIC+II)-ZS2*PA(IIC+II))) + PC(IJA+IJ)=PA(IIA+II)+ZA1 + PD(IJA+IJ)=PB(IIA+II)+ZB1 + PC(IJB+IJ)=ZA2+ZB3 + PD(IJB+IJ)=ZB2-ZA3 + PC(IJC+IJ)=ZA2-ZB3 + PD(IJC+IJ)=-(ZB2+ZA3) END DO END DO END DO - IBASE = IBASE0 + ((KSTOP-ILA)/ILA+1) * ( ILA + IJUMP ) - JA = JA0 + ((KSTOP-ILA)/ILA+1) * JINK - JB = JB0 + ((KSTOP-ILA)/ILA+1) * JINK - JC = JC0 - ((KSTOP-ILA)/ILA+1) * JINK + IIBASE = IBASE0 + ((ISTOP-KLA)/KLA+1) * ( KLA + IJUMP ) + IJA = IJA0 + ((ISTOP-KLA)/KLA+1) * JINK + IJB = IJB0 + ((ISTOP-KLA)/KLA+1) * JINK + IJC = IJC0 - ((ISTOP-KLA)/KLA+1) * JINK !$acc end kernels END IF - IF ( JA <= JC ) THEN + IF ( IJA <= IJC ) THEN !$acc kernels - JBASE=0 + IJBASE=0 !$acc loop independent - DO JL=1,ILA + DO IJL=1,KLA !CDIR$ IVDEP !!CDIR NODEP !*VOCL LOOP,NOVREC -!$acc loop independent private( I, J ) - DO IJK=1,ILOT - I = IBASE + JL - 1 + (IJK - 1 ) * INC3 - J = JBASE + JL - 1 + (IJK - 1 ) * INC4 - C(JA+J)=A(IA+I)+0.5*(A(IB+I)-A(IC+I)) - D(JA+J)=-XSIN60*(A(IB+I)+A(IC+I)) - C(JB+J)=A(IA+I)-(A(IB+I)-A(IC+I)) +!$acc loop independent private( II, IJ ) + DO IJK=1,KLOT + II = IIBASE + IJL - 1 + (IJK - 1 ) * KINC3 + IJ = IJBASE + IJL - 1 + (IJK - 1 ) * KINC4 + PC(IJA+IJ)=PA(IIA+II)+0.5*(PA(IIB+II)-PA(IIC+II)) + PD(IJA+IJ)=-XSIN60*(PA(IIB+II)+PA(IIC+II)) + PC(IJB+IJ)=PA(IIA+II)-(PA(IIB+II)-PA(IIC+II)) END DO END DO - IBASE = IBASE + ILA - JBASE = JBASE + ILA + IIBASE = IIBASE + KLA + IJBASE = IJBASE + KLA !$acc end kernels END IF ELSE !$acc kernels - Z=1.0/REAL(N) - ZSIN60=Z*XSIN60 + ZZ=1.0/REAL(KN) + ZZSIN60=ZZ*XSIN60 !$acc loop independent - DO JL=1,ILA + DO IJL=1,KLA !CDIR$ IVDEP !!CDIR NODEP !*VOCL LOOP,NOVREC -!$acc loop independent private( I, J ) - DO IJK=1,ILOT - I = IBASE + JL - 1 + (IJK - 1 ) * INC3 - J = JBASE + JL - 1 + (IJK - 1 ) * INC4 - C(JA+J)=Z*(A(IA+I)+(A(IB+I)+A(IC+I))) - C(JB+J)=Z*(A(IA+I)-0.5*(A(IB+I)+A(IC+I))) - D(JB+J)=ZSIN60*(A(IC+I)-A(IB+I)) +!$acc loop independent private( II, IJ ) + DO IJK=1,KLOT + II = IIBASE + IJL - 1 + (IJK - 1 ) * KINC3 + IJ = IJBASE + IJL - 1 + (IJK - 1 ) * KINC4 + PC(IJA+IJ)=ZZ*(PA(IIA+II)+(PA(IIB+II)+PA(IIC+II))) + PC(IJB+IJ)=ZZ*(PA(IIA+II)-0.5*(PA(IIB+II)+PA(IIC+II))) + PD(IJB+IJ)=ZZSIN60*(PA(IIC+II)-PA(IIB+II)) END DO END DO - IBASE = IBASE + ILA - JBASE = JBASE + ILA + IIBASE = IIBASE + KLA + IJBASE = IJBASE + KLA !$acc end kernels END IF @@ -1603,145 +1604,145 @@ SUBROUTINE QPASSM(A,B,C,D,TRIGS,INC3,INC4,ILOT,N,IFAC,ILA,IERR,KSZ1,KSZ2,KSZ3,KS ! ! CODING FOR FACTOR 4 ! ------------------- - IA=1 - IB=IA+IINK - IC=IB+IINK - ID=IC+IINK - JA=1 - JB=JA+2*M-ILA - JC=JB+2*M - JD=JB - - IF ( ILA /= M ) THEN + IIA=1 + IIB=IIA+IINK + IIC=IIB+IINK + IID=IIC+IINK + IJA=1 + IJB=IJA+2*IM-KLA + IJC=IJB+2*IM + IJD=IJB + + IF ( KLA /= IM ) THEN !$acc kernels !$acc loop independent - DO JL=1,ILA + DO IJL=1,KLA !CDIR$ IVDEP !!CDIR NODEP !*VOCL LOOP,NOVREC -!$acc loop independent private( I, J ) - DO IJK=1,ILOT - I = IBASE + JL - 1 + (IJK - 1 ) * INC3 - J = JBASE + JL - 1 + (IJK - 1 ) * INC4 - C(JA+J)=(A(IA+I)+A(IC+I))+(A(IB+I)+A(ID+I)) - C(JC+J)=(A(IA+I)+A(IC+I))-(A(IB+I)+A(ID+I)) - C(JB+J)=A(IA+I)-A(IC+I) - D(JB+J)=A(ID+I)-A(IB+I) +!$acc loop independent private( II, IJ ) + DO IJK=1,KLOT + II = IIBASE + IJL - 1 + (IJK - 1 ) * KINC3 + IJ = IJBASE + IJL - 1 + (IJK - 1 ) * KINC4 + PC(IJA+IJ)=(PA(IIA+II)+PA(IIC+II))+(PA(IIB+II)+PA(IID+II)) + PC(IJC+IJ)=(PA(IIA+II)+PA(IIC+II))-(PA(IIB+II)+PA(IID+II)) + PC(IJB+IJ)=PA(IIA+II)-PA(IIC+II) + PD(IJB+IJ)=PA(IID+II)-PA(IIB+II) END DO END DO - IBASE = IBASE + ILA - JBASE = JBASE + ILA + IIBASE = IIBASE + KLA + IJBASE = IJBASE + KLA !$acc end kernels - JA=JA+JINK + IJA=IJA+JINK JINK=2*JINK - JB=JB+JINK - JC=JC-JINK - JD=JD-JINK - IBASE=IBASE+IJUMP + IJB=IJB+JINK + IJC=IJC-JINK + IJD=IJD-JINK + IIBASE=IIBASE+IJUMP IJUMP=2*IJUMP+IINK - IF ( JB /= JC ) THEN + IF ( IJB /= IJC ) THEN !$acc kernels - IBASE0 = IBASE - JA0 = JA - JB0 = JB - JC0 = JC - JD0 = JD -!$acc loop independent private( KB, KC, KD, C1, S1, C2, S2, C3, S3, IBASE, JA, JB, JC, JD ) - DO K=ILA,KSTOP,ILA - KB=K+K - KC=KB+KB - KD=KC+KB - C1=TRIGS(KB+1) - S1=TRIGS(KB+2) - C2=TRIGS(KC+1) - S2=TRIGS(KC+2) - C3=TRIGS(KD+1) - S3=TRIGS(KD+2) - IBASE = IBASE0 + (K-ILA)/ILA * (ILA + IJUMP ) - JA = JA0 + (K-ILA)/ILA * JINK - JB = JB0 + (K-ILA)/ILA * JINK - JC = JC0 - (K-ILA)/ILA * JINK - JD = JD0 - (K-ILA)/ILA * JINK + IBASE0 = IIBASE + IJA0 = IJA + IJB0 = IJB + IJC0 = IJC + IJD0 = IJD +!$acc loop independent private( IKB, IKC, IKD, ZC1, ZS1, ZC2, ZS2, ZC3, ZS3, IIBASE, IJA, IJB, IJC, IJD ) + DO IIK=KLA,ISTOP,KLA + IKB=IIK+IIK + IKC=IKB+IKB + IKD=IKC+IKB + ZC1=PTRIGS(IKB+1) + ZS1=PTRIGS(IKB+2) + ZC2=PTRIGS(IKC+1) + ZS2=PTRIGS(IKC+2) + ZC3=PTRIGS(IKD+1) + ZS3=PTRIGS(IKD+2) + IIBASE = IBASE0 + (IIK-KLA)/KLA * (KLA + IJUMP ) + IJA = IJA0 + (IIK-KLA)/KLA * JINK + IJB = IJB0 + (IIK-KLA)/KLA * JINK + IJC = IJC0 - (IIK-KLA)/KLA * JINK + IJD = IJD0 - (IIK-KLA)/KLA * JINK !$acc loop independent - DO JL=1,ILA + DO IJL=1,KLA !CDIR$ IVDEP !!CDIR NODEP !*VOCL LOOP,NOVREC -!$acc loop independent private( I, J, A0, A1, A2, A3, B0, B1, B2, B3 ) - DO IJK=1,ILOT - I = IBASE + JL - 1 + (IJK - 1 ) * INC3 - J = JL - 1 + (IJK - 1 ) * INC4 - A0=A(IA+I)+(C2*A(IC+I)+S2*B(IC+I)) - A2=A(IA+I)-(C2*A(IC+I)+S2*B(IC+I)) - A1=(C1*A(IB+I)+S1*B(IB+I))+(C3*A(ID+I)+S3*B(ID+I)) - A3=(C1*A(IB+I)+S1*B(IB+I))-(C3*A(ID+I)+S3*B(ID+I)) - B0=B(IA+I)+(C2*B(IC+I)-S2*A(IC+I)) - B2=B(IA+I)-(C2*B(IC+I)-S2*A(IC+I)) - B1=(C1*B(IB+I)-S1*A(IB+I))+(C3*B(ID+I)-S3*A(ID+I)) - B3=(C1*B(IB+I)-S1*A(IB+I))-(C3*B(ID+I)-S3*A(ID+I)) - C(JA+J)=A0+A1 - C(JC+J)=A0-A1 - D(JA+J)=B0+B1 - D(JC+J)=B1-B0 - C(JB+J)=A2+B3 - C(JD+J)=A2-B3 - D(JB+J)=B2-A3 - D(JD+J)=-(B2+A3) +!$acc loop independent private( II, IJ, ZA0, ZA1, ZA2, ZA3, ZB0, ZB1, ZB2, ZB3 ) + DO IJK=1,KLOT + II = IIBASE + IJL - 1 + (IJK - 1 ) * KINC3 + IJ = IJL - 1 + (IJK - 1 ) * KINC4 + ZA0=PA(IIA+II)+(ZC2*PA(IIC+II)+ZS2*PB(IIC+II)) + ZA2=PA(IIA+II)-(ZC2*PA(IIC+II)+ZS2*PB(IIC+II)) + ZA1=(ZC1*PA(IIB+II)+ZS1*PB(IIB+II))+(ZC3*PA(IID+II)+ZS3*PB(IID+II)) + ZA3=(ZC1*PA(IIB+II)+ZS1*PB(IIB+II))-(ZC3*PA(IID+II)+ZS3*PB(IID+II)) + ZB0=PB(IIA+II)+(ZC2*PB(IIC+II)-ZS2*PA(IIC+II)) + ZB2=PB(IIA+II)-(ZC2*PB(IIC+II)-ZS2*PA(IIC+II)) + ZB1=(ZC1*PB(IIB+II)-ZS1*PA(IIB+II))+(ZC3*PB(IID+II)-ZS3*PA(IID+II)) + ZB3=(ZC1*PB(IIB+II)-ZS1*PA(IIB+II))-(ZC3*PB(IID+II)-ZS3*PA(IID+II)) + PC(IJA+IJ)=ZA0+ZA1 + PC(IJC+IJ)=ZA0-ZA1 + PD(IJA+IJ)=ZB0+ZB1 + PD(IJC+IJ)=ZB1-ZB0 + PC(IJB+IJ)=ZA2+ZB3 + PC(IJD+IJ)=ZA2-ZB3 + PD(IJB+IJ)=ZB2-ZA3 + PD(IJD+IJ)=-(ZB2+ZA3) END DO END DO END DO - IBASE = IBASE0 + ((KSTOP-ILA)/ILA+1) * ( ILA + IJUMP ) - JA = JA0 + ((KSTOP-ILA)/ILA+1) * JINK - JB = JB0 + ((KSTOP-ILA)/ILA+1) * JINK - JC = JC0 - ((KSTOP-ILA)/ILA+1) * JINK - JD = JD0 - ((KSTOP-ILA)/ILA+1) * JINK + IIBASE = IBASE0 + ((ISTOP-KLA)/KLA+1) * ( KLA + IJUMP ) + IJA = IJA0 + ((ISTOP-KLA)/KLA+1) * JINK + IJB = IJB0 + ((ISTOP-KLA)/KLA+1) * JINK + IJC = IJC0 - ((ISTOP-KLA)/KLA+1) * JINK + IJD = IJD0 - ((ISTOP-KLA)/KLA+1) * JINK !$acc end kernels END IF - IF ( JB <= JC ) THEN + IF ( IJB <= IJC ) THEN !$acc kernels - SIN45=SQRT(0.5) - JBASE=0 + ZSIN45=SQRT(0.5) + IJBASE=0 !$acc loop independent - DO JL=1,ILA + DO IJL=1,KLA !CDIR$ IVDEP !!CDIR NODEP !*VOCL LOOP,NOVREC -!$acc loop independent private( I, J ) - DO IJK=1,ILOT - I = IBASE + JL - 1 + (IJK - 1 ) * INC3 - J = JBASE + JL - 1 + (IJK - 1 ) * INC4 - C(JA+J)=A(IA+I)+SIN45*(A(IB+I)-A(ID+I)) - C(JB+J)=A(IA+I)-SIN45*(A(IB+I)-A(ID+I)) - D(JA+J)=-A(IC+I)-SIN45*(A(IB+I)+A(ID+I)) - D(JB+J)=A(IC+I)-SIN45*(A(IB+I)+A(ID+I)) +!$acc loop independent private( II, IJ ) + DO IJK=1,KLOT + II = IIBASE + IJL - 1 + (IJK - 1 ) * KINC3 + IJ = IJBASE + IJL - 1 + (IJK - 1 ) * KINC4 + PC(IJA+IJ)=PA(IIA+II)+ZSIN45*(PA(IIB+II)-PA(IID+II)) + PC(IJB+IJ)=PA(IIA+II)-ZSIN45*(PA(IIB+II)-PA(IID+II)) + PD(IJA+IJ)=-PA(IIC+II)-ZSIN45*(PA(IIB+II)+PA(IID+II)) + PD(IJB+IJ)=PA(IIC+II)-ZSIN45*(PA(IIB+II)+PA(IID+II)) END DO END DO - IBASE = IBASE + ILA - JBASE = JBASE + ILA + IIBASE = IIBASE + KLA + IJBASE = IJBASE + KLA !$acc end kernels END IF - ELSE ! ILA == M + ELSE ! KLA == M !$acc kernels - Z=1.0/REAL(N) + ZZ=1.0/REAL(KN) !$acc loop independent - DO JL=1,ILA + DO IJL=1,KLA !CDIR$ IVDEP !!CDIR NODEP !*VOCL LOOP,NOVREC -!$acc loop independent private( I, J ) - DO IJK=1,ILOT - I = IBASE + JL - 1 + (IJK - 1 ) * INC3 - J = JBASE + JL - 1 + (IJK - 1 ) * INC4 - C(JA+J)=Z*((A(IA+I)+A(IC+I))+(A(IB+I)+A(ID+I))) - C(JC+J)=Z*((A(IA+I)+A(IC+I))-(A(IB+I)+A(ID+I))) - C(JB+J)=Z*(A(IA+I)-A(IC+I)) - D(JB+J)=Z*(A(ID+I)-A(IB+I)) +!$acc loop independent private( II, IJ ) + DO IJK=1,KLOT + II = IIBASE + IJL - 1 + (IJK - 1 ) * KINC3 + IJ = IJBASE + IJL - 1 + (IJK - 1 ) * KINC4 + PC(IJA+IJ)=ZZ*((PA(IIA+II)+PA(IIC+II))+(PA(IIB+II)+PA(IID+II))) + PC(IJC+IJ)=ZZ*((PA(IIA+II)+PA(IIC+II))-(PA(IIB+II)+PA(IID+II))) + PC(IJB+IJ)=ZZ*(PA(IIA+II)-PA(IIC+II)) + PD(IJB+IJ)=ZZ*(PA(IID+II)-PA(IIB+II)) END DO END DO - IBASE = IBASE + ILA - JBASE = JBASE + ILA + IIBASE = IIBASE + KLA + IJBASE = IJBASE + KLA !$acc end kernels END IF IBAD=0 @@ -1752,191 +1753,192 @@ SUBROUTINE QPASSM(A,B,C,D,TRIGS,INC3,INC4,ILOT,N,IFAC,ILA,IERR,KSZ1,KSZ2,KSZ3,KS ! ! CODING FOR FACTOR 5 ! ------------------- - IA=1 - IB=IA+IINK - IC=IB+IINK - ID=IC+IINK - IE=ID+IINK - JA=1 - JB=JA+2*M-ILA - JC=JB+2*M - JD=JC - JE=JB - - IF ( ILA /= M ) THEN + IIA=1 + IIB=IIA+IINK + IIC=IIB+IINK + IID=IIC+IINK + IIE=IID+IINK + IJA=1 + IJB=IJA+2*IM-KLA + IJC=IJB+2*IM + IJD=IJC + IJE=IJB + + IF ( KLA /= IM ) THEN !$acc kernels !$acc loop independent - DO JL=1,ILA + DO IJL=1,KLA !CDIR$ IVDEP !!CDIR NODEP !*VOCL LOOP,NOVREC -!$acc loop independent private( I, J ) - DO IJK=1,ILOT - I = IBASE + JL - 1 + (IJK - 1 ) * INC3 - J = JBASE + JL - 1 + (IJK - 1 ) * INC4 - A1=A(IB+I)+A(IE+I) - A3=A(IB+I)-A(IE+I) - A2=A(IC+I)+A(ID+I) - A4=A(IC+I)-A(ID+I) - A5=A(IA+I)-0.25*(A1+A2) - A6=XQRT5*(A1-A2) - C(JA+J)=A(IA+I)+(A1+A2) - C(JB+J)=A5+A6 - C(JC+J)=A5-A6 - D(JB+J)=-XSIN72*A3-XSIN36*A4 - D(JC+J)=-XSIN36*A3+XSIN72*A4 +!$acc loop independent private( II, IJ ) + DO IJK=1,KLOT + II = IIBASE + IJL - 1 + (IJK - 1 ) * KINC3 + IJ = IJBASE + IJL - 1 + (IJK - 1 ) * KINC4 + ZA1=PA(IIB+II)+PA(IIE+II) + ZA3=PA(IIB+II)-PA(IIE+II) + ZA2=PA(IIC+II)+PA(IID+II) + ZA4=PA(IIC+II)-PA(IID+II) + ZA5=PA(IIA+II)-0.25*(ZA1+ZA2) + ZA6=XQRT5*(ZA1-ZA2) + PC(IJA+IJ)=PA(IIA+II)+(ZA1+ZA2) + PC(IJB+IJ)=ZA5+ZA6 + PC(IJC+IJ)=ZA5-ZA6 + PD(IJB+IJ)=-XSIN72*ZA3-XSIN36*ZA4 + PD(IJC+IJ)=-XSIN36*ZA3+XSIN72*ZA4 END DO END DO - IBASE = IBASE + ILA - JBASE = JBASE + ILA + IIBASE = IIBASE + KLA + IJBASE = IJBASE + KLA !$acc end kernels - JA=JA+JINK + IJA=IJA+JINK JINK=2*JINK - JB=JB+JINK - JC=JC+JINK - JD=JD-JINK - JE=JE-JINK - IBASE=IBASE+IJUMP + IJB=IJB+JINK + IJC=IJC+JINK + IJD=IJD-JINK + IJE=IJE-JINK + IIBASE=IIBASE+IJUMP IJUMP=2*IJUMP+IINK - IF ( JB /= JD ) THEN + IF ( IJB /= IJD ) THEN !$acc kernels - IBASE0 = IBASE - JA0 = JA - JB0 = JB - JC0 = JC - JD0 = JD - JE0 = JE -!$acc loop independent private( KB, KC, KD, KE, C1, S1, C2, S2, C3, S3, C4, S4, IBASE, JA, JB, JC, JD, JE ) - DO K=ILA,KSTOP,ILA - KB=K+K - KC=KB+KB - KD=KC+KB - KE=KD+KB - C1=TRIGS(KB+1) - S1=TRIGS(KB+2) - C2=TRIGS(KC+1) - S2=TRIGS(KC+2) - C3=TRIGS(KD+1) - S3=TRIGS(KD+2) - C4=TRIGS(KE+1) - S4=TRIGS(KE+2) - IBASE = IBASE0 + (K-ILA)/ILA * (ILA + IJUMP ) - JA = JA0 + (K-ILA)/ILA * JINK - JB = JB0 + (K-ILA)/ILA * JINK - JC = JC0 + (K-ILA)/ILA * JINK - JD = JD0 - (K-ILA)/ILA * JINK - JE = JE0 - (K-ILA)/ILA * JINK + IBASE0 = IIBASE + IJA0 = IJA + IJB0 = IJB + IJC0 = IJC + IJD0 = IJD + IJE0 = IJE +!$acc loop independent private( IKB, IKC, IKD, IKE, ZC1, ZS1, ZC2, ZS2, ZC3, ZS3, ZC4, ZS4, IIBASE, IJA, IJB, IJC, IJD, IJE ) + DO IIK=KLA,ISTOP,KLA + IKB=IIK+IIK + IKC=IKB+IKB + IKD=IKC+IKB + IKE=IKD+IKB + ZC1=PTRIGS(IKB+1) + ZS1=PTRIGS(IKB+2) + ZC2=PTRIGS(IKC+1) + ZS2=PTRIGS(IKC+2) + ZC3=PTRIGS(IKD+1) + ZS3=PTRIGS(IKD+2) + ZC4=PTRIGS(IKE+1) + ZS4=PTRIGS(IKE+2) + IIBASE = IBASE0 + (IIK-KLA)/KLA * (KLA + IJUMP ) + IJA = IJA0 + (IIK-KLA)/KLA * JINK + IJB = IJB0 + (IIK-KLA)/KLA * JINK + IJC = IJC0 + (IIK-KLA)/KLA * JINK + IJD = IJD0 - (IIK-KLA)/KLA * JINK + IJE = IJE0 - (IIK-KLA)/KLA * JINK !$acc loop independent - DO JL=1,ILA + DO IJL=1,KLA !CDIR$ IVDEP !!CDIR NODEP !*VOCL LOOP,NOVREC -!$acc loop independent private( I, J, A1, A2, A3, A4, A5, A6, A10, A11, A20, A21, B1, B2, B3, B4, B5, B6, B10, B11, B20, B21 ) - DO IJK=1,ILOT - I = IBASE + JL - 1 + (IJK - 1 ) * INC3 - J = JL - 1 + (IJK - 1 ) * INC4 - A1=(C1*A(IB+I)+S1*B(IB+I))+(C4*A(IE+I)+S4*B(IE+I)) - A3=(C1*A(IB+I)+S1*B(IB+I))-(C4*A(IE+I)+S4*B(IE+I)) - A2=(C2*A(IC+I)+S2*B(IC+I))+(C3*A(ID+I)+S3*B(ID+I)) - A4=(C2*A(IC+I)+S2*B(IC+I))-(C3*A(ID+I)+S3*B(ID+I)) - B1=(C1*B(IB+I)-S1*A(IB+I))+(C4*B(IE+I)-S4*A(IE+I)) - B3=(C1*B(IB+I)-S1*A(IB+I))-(C4*B(IE+I)-S4*A(IE+I)) - B2=(C2*B(IC+I)-S2*A(IC+I))+(C3*B(ID+I)-S3*A(ID+I)) - B4=(C2*B(IC+I)-S2*A(IC+I))-(C3*B(ID+I)-S3*A(ID+I)) - A5=A(IA+I)-0.25*(A1+A2) - A6=XQRT5*(A1-A2) - B5=B(IA+I)-0.25*(B1+B2) - B6=XQRT5*(B1-B2) - A10=A5+A6 - A20=A5-A6 - B10=B5+B6 - B20=B5-B6 - A11=XSIN72*B3+XSIN36*B4 - A21=XSIN36*B3-XSIN72*B4 - B11=XSIN72*A3+XSIN36*A4 - B21=XSIN36*A3-XSIN72*A4 - C(JA+J)=A(IA+I)+(A1+A2) - C(JB+J)=A10+A11 - C(JE+J)=A10-A11 - C(JC+J)=A20+A21 - C(JD+J)=A20-A21 - D(JA+J)=B(IA+I)+(B1+B2) - D(JB+J)=B10-B11 - D(JE+J)=-(B10+B11) - D(JC+J)=B20-B21 - D(JD+J)=-(B20+B21) +!$acc loop independent private( II, IJ, ZA1, ZA2, ZA3, ZA4, ZA5, ZA6, ZA10, ZA11, ZA20, ZA21, & +!$acc & ZB1, ZB2, ZB3, ZB4, ZB5, ZB6, ZB10, ZB11, ZB20, ZB21 ) + DO IJK=1,KLOT + II = IIBASE + IJL - 1 + (IJK - 1 ) * KINC3 + IJ = IJL - 1 + (IJK - 1 ) * KINC4 + ZA1=(ZC1*PA(IIB+II)+ZS1*PB(IIB+II))+(ZC4*PA(IIE+II)+ZS4*PB(IIE+II)) + ZA3=(ZC1*PA(IIB+II)+ZS1*PB(IIB+II))-(ZC4*PA(IIE+II)+ZS4*PB(IIE+II)) + ZA2=(ZC2*PA(IIC+II)+ZS2*PB(IIC+II))+(ZC3*PA(IID+II)+ZS3*PB(IID+II)) + ZA4=(ZC2*PA(IIC+II)+ZS2*PB(IIC+II))-(ZC3*PA(IID+II)+ZS3*PB(IID+II)) + ZB1=(ZC1*PB(IIB+II)-ZS1*PA(IIB+II))+(ZC4*PB(IIE+II)-ZS4*PA(IIE+II)) + ZB3=(ZC1*PB(IIB+II)-ZS1*PA(IIB+II))-(ZC4*PB(IIE+II)-ZS4*PA(IIE+II)) + ZB2=(ZC2*PB(IIC+II)-ZS2*PA(IIC+II))+(ZC3*PB(IID+II)-ZS3*PA(IID+II)) + ZB4=(ZC2*PB(IIC+II)-ZS2*PA(IIC+II))-(ZC3*PB(IID+II)-ZS3*PA(IID+II)) + ZA5=PA(IIA+II)-0.25*(ZA1+ZA2) + ZA6=XQRT5*(ZA1-ZA2) + ZB5=PB(IIA+II)-0.25*(ZB1+ZB2) + ZB6=XQRT5*(ZB1-ZB2) + ZA10=ZA5+ZA6 + ZA20=ZA5-ZA6 + ZB10=ZB5+ZB6 + ZB20=ZB5-ZB6 + ZA11=XSIN72*ZB3+XSIN36*ZB4 + ZA21=XSIN36*ZB3-XSIN72*ZB4 + ZB11=XSIN72*ZA3+XSIN36*ZA4 + ZB21=XSIN36*ZA3-XSIN72*ZA4 + PC(IJA+IJ)=PA(IIA+II)+(ZA1+ZA2) + PC(IJB+IJ)=ZA10+ZA11 + PC(IJE+IJ)=ZA10-ZA11 + PC(IJC+IJ)=ZA20+ZA21 + PC(IJD+IJ)=ZA20-ZA21 + PD(IJA+IJ)=PB(IIA+II)+(ZB1+ZB2) + PD(IJB+IJ)=ZB10-ZB11 + PD(IJE+IJ)=-(ZB10+ZB11) + PD(IJC+IJ)=ZB20-ZB21 + PD(IJD+IJ)=-(ZB20+ZB21) END DO END DO END DO - IBASE = IBASE0 + ((KSTOP-ILA)/ILA+1) * ( ILA + IJUMP ) - JA = JA0 + ((KSTOP-ILA)/ILA+1) * JINK - JB = JB0 + ((KSTOP-ILA)/ILA+1) * JINK - JC = JC0 + ((KSTOP-ILA)/ILA+1) * JINK - JD = JD0 - ((KSTOP-ILA)/ILA+1) * JINK - JE = JE0 - ((KSTOP-ILA)/ILA+1) * JINK + IIBASE = IBASE0 + ((ISTOP-KLA)/KLA+1) * ( KLA + IJUMP ) + IJA = IJA0 + ((ISTOP-KLA)/KLA+1) * JINK + IJB = IJB0 + ((ISTOP-KLA)/KLA+1) * JINK + IJC = IJC0 + ((ISTOP-KLA)/KLA+1) * JINK + IJD = IJD0 - ((ISTOP-KLA)/KLA+1) * JINK + IJE = IJE0 - ((ISTOP-KLA)/KLA+1) * JINK !$acc end kernels END IF - IF ( JB <= JD ) THEN + IF ( IJB <= IJD ) THEN !$acc kernels - JBASE=0 + IJBASE=0 !$acc loop independent - DO JL=1,ILA + DO IJL=1,KLA !CDIR$ IVDEP !!CDIR NODEP !*VOCL LOOP,NOVREC -!$acc loop independent private( I, J, A1, A2, A3, A4, A5, A6 ) - DO IJK=1,ILOT - I = IBASE + JL - 1 + (IJK - 1 ) * INC3 - J = JBASE + JL - 1 + (IJK - 1 ) * INC4 - A1=A(IB+I)+A(IE+I) - A3=A(IB+I)-A(IE+I) - A2=A(IC+I)+A(ID+I) - A4=A(IC+I)-A(ID+I) - A5=A(IA+I)+0.25*(A3-A4) - A6=XQRT5*(A3+A4) - C(JA+J)=A5+A6 - C(JB+J)=A5-A6 - C(JC+J)=A(IA+I)-(A3-A4) - D(JA+J)=-XSIN36*A1-XSIN72*A2 - D(JB+J)=-XSIN72*A1+XSIN36*A2 +!$acc loop independent private( II, IJ, ZA1, ZA2, ZA3, ZA4, ZA5, ZA6 ) + DO IJK=1,KLOT + II = IIBASE + IJL - 1 + (IJK - 1 ) * KINC3 + IJ = IJBASE + IJL - 1 + (IJK - 1 ) * KINC4 + ZA1=PA(IIB+II)+PA(IIE+II) + ZA3=PA(IIB+II)-PA(IIE+II) + ZA2=PA(IIC+II)+PA(IID+II) + ZA4=PA(IIC+II)-PA(IID+II) + ZA5=PA(IIA+II)+0.25*(ZA3-ZA4) + ZA6=XQRT5*(ZA3+ZA4) + PC(IJA+IJ)=ZA5+ZA6 + PC(IJB+IJ)=ZA5-ZA6 + PC(IJC+IJ)=PA(IIA+II)-(ZA3-ZA4) + PD(IJA+IJ)=-XSIN36*ZA1-XSIN72*ZA2 + PD(IJB+IJ)=-XSIN72*ZA1+XSIN36*ZA2 END DO END DO - IBASE = IBASE + ILA - JBASE = JBASE + ILA + IIBASE = IIBASE + KLA + IJBASE = IJBASE + KLA !$acc end kernels END IF ELSE !$acc kernels - Z=1.0/REAL(N) - ZQRT5=Z*XQRT5 - ZSIN36=Z*XSIN36 - ZSIN72=Z*XSIN72 + ZZ=1.0/REAL(KN) + ZZQRT5=ZZ*XQRT5 + ZZSIN36=ZZ*XSIN36 + ZZSIN72=ZZ*XSIN72 !$acc loop independent - DO JL=1,ILA + DO IJL=1,KLA !CDIR$ IVDEP !!CDIR NODEP !*VOCL LOOP,NOVREC -!$acc loop independent private( I, J, A1, A2, A3, A4, A5, A6 ) - DO IJK=1,ILOT - I = IBASE + JL - 1 + (IJK - 1 ) * INC3 - J = JBASE + JL - 1 + (IJK - 1 ) * INC4 - A1=A(IB+I)+A(IE+I) - A3=A(IB+I)-A(IE+I) - A2=A(IC+I)+A(ID+I) - A4=A(IC+I)-A(ID+I) - A5=Z*(A(IA+I)-0.25*(A1+A2)) - A6=ZQRT5*(A1-A2) - C(JA+J)=Z*(A(IA+I)+(A1+A2)) - C(JB+J)=A5+A6 - C(JC+J)=A5-A6 - D(JB+J)=-ZSIN72*A3-ZSIN36*A4 - D(JC+J)=-ZSIN36*A3+ZSIN72*A4 +!$acc loop independent private( II, IJ, ZA1, ZA2, ZA3, ZA4, ZA5, ZA6 ) + DO IJK=1,KLOT + II = IIBASE + IJL - 1 + (IJK - 1 ) * KINC3 + IJ = IJBASE + IJL - 1 + (IJK - 1 ) * KINC4 + ZA1=PA(IIB+II)+PA(IIE+II) + ZA3=PA(IIB+II)-PA(IIE+II) + ZA2=PA(IIC+II)+PA(IID+II) + ZA4=PA(IIC+II)-PA(IID+II) + ZA5=ZZ*(PA(IIA+II)-0.25*(ZA1+ZA2)) + ZA6=ZZQRT5*(ZA1-ZA2) + PC(IJA+IJ)=ZZ*(PA(IIA+II)+(ZA1+ZA2)) + PC(IJB+IJ)=ZA5+ZA6 + PC(IJC+IJ)=ZA5-ZA6 + PD(IJB+IJ)=-ZZSIN72*ZA3-ZZSIN36*ZA4 + PD(IJC+IJ)=-ZZSIN36*ZA3+ZZSIN72*ZA4 END DO END DO - IBASE = IBASE + ILA - JBASE = JBASE + ILA + IIBASE = IIBASE + KLA + IJBASE = IJBASE + KLA !$acc end kernels END IF IBAD=0 @@ -1947,193 +1949,194 @@ SUBROUTINE QPASSM(A,B,C,D,TRIGS,INC3,INC4,ILOT,N,IFAC,ILA,IERR,KSZ1,KSZ2,KSZ3,KS ! ! CODING FOR FACTOR 6 ! ------------------- - IA=1 - IB=IA+IINK - IC=IB+IINK - ID=IC+IINK - IE=ID+IINK - IF=IE+IINK - JA=1 - JB=JA+2*M-ILA - JC=JB+2*M - JD=JC+2*M - JE=JC - JF=JB - - IF ( ILA /= M ) THEN + IIA=1 + IIB=IIA+IINK + IIC=IIB+IINK + IID=IIC+IINK + IIE=IID+IINK + IIF=IIE+IINK + IJA=1 + IJB=IJA+2*IM-KLA + IJC=IJB+2*IM + IJD=IJC+2*IM + IJE=IJC + IJF=IJB + + IF ( KLA /= IM ) THEN !$acc kernels !$acc loop independent - DO JL=1,ILA + DO IJL=1,KLA !CDIR$ IVDEP !!CDIR NODEP !*VOCL LOOP,NOVREC -!$acc loop independent private( I, J, A11 ) - DO IJK=1,ILOT - I = IBASE + JL - 1 + (IJK - 1 ) * INC3 - J = JBASE + JL - 1 + (IJK - 1 ) * INC4 - A11=(A(IC+I)+A(IF+I))+(A(IB+I)+A(IE+I)) - C(JA+J)=(A(IA+I)+A(ID+I))+A11 - C(JC+J)=(A(IA+I)+A(ID+I)-0.5*A11) - D(JC+J)=XSIN60*((A(IC+I)+A(IF+I))-(A(IB+I)+A(IE+I))) - A11=(A(IC+I)-A(IF+I))+(A(IE+I)-A(IB+I)) - C(JB+J)=(A(IA+I)-A(ID+I))-0.5*A11 - D(JB+J)=XSIN60*((A(IE+I)-A(IB+I))-(A(IC+I)-A(IF+I))) - C(JD+J)=(A(IA+I)-A(ID+I))+A11 +!$acc loop independent private( II, IJ, ZA11 ) + DO IJK=1,KLOT + II = IIBASE + IJL - 1 + (IJK - 1 ) * KINC3 + IJ = IJBASE + IJL - 1 + (IJK - 1 ) * KINC4 + ZA11=(PA(IIC+II)+PA(IIF+II))+(PA(IIB+II)+PA(IIE+II)) + PC(IJA+IJ)=(PA(IIA+II)+PA(IID+II))+ZA11 + PC(IJC+IJ)=(PA(IIA+II)+PA(IID+II)-0.5*ZA11) + PD(IJC+IJ)=XSIN60*((PA(IIC+II)+PA(IIF+II))-(PA(IIB+II)+PA(IIE+II))) + ZA11=(PA(IIC+II)-PA(IIF+II))+(PA(IIE+II)-PA(IIB+II)) + PC(IJB+IJ)=(PA(IIA+II)-PA(IID+II))-0.5*ZA11 + PD(IJB+IJ)=XSIN60*((PA(IIE+II)-PA(IIB+II))-(PA(IIC+II)-PA(IIF+II))) + PC(IJD+IJ)=(PA(IIA+II)-PA(IID+II))+ZA11 END DO END DO - IBASE = IBASE + ILA - JBASE = JBASE + ILA + IIBASE = IIBASE + KLA + IJBASE = IJBASE + KLA !$acc end kernels - JA=JA+JINK + IJA=IJA+JINK JINK=2*JINK - JB=JB+JINK - JC=JC+JINK - JD=JD-JINK - JE=JE-JINK - JF=JF-JINK - IBASE=IBASE+IJUMP + IJB=IJB+JINK + IJC=IJC+JINK + IJD=IJD-JINK + IJE=IJE-JINK + IJF=IJF-JINK + IIBASE=IIBASE+IJUMP IJUMP=2*IJUMP+IINK - IF (JC /= JD ) THEN + IF (IJC /= IJD ) THEN !$acc kernels - IBASE0 = IBASE - JA0 = JA - JB0 = JB - JC0 = JC - JD0 = JD - JE0 = JE - JF0 = JF -!$acc loop independent private( KB, KC, KD, KE, KF, C1, S1, C2, S2, C3, S3, C4, S4, C5, S5, IBASE, JA, JB, JC, JD, JE, JF ) - DO K=ILA,KSTOP,ILA - KB=K+K - KC=KB+KB - KD=KC+KB - KE=KD+KB - KF=KE+KB - C1=TRIGS(KB+1) - S1=TRIGS(KB+2) - C2=TRIGS(KC+1) - S2=TRIGS(KC+2) - C3=TRIGS(KD+1) - S3=TRIGS(KD+2) - C4=TRIGS(KE+1) - S4=TRIGS(KE+2) - C5=TRIGS(KF+1) - S5=TRIGS(KF+2) - IBASE = IBASE0 + (K-ILA)/ILA * (ILA + IJUMP ) - JA = JA0 + (K-ILA)/ILA * JINK - JB = JB0 + (K-ILA)/ILA * JINK - JC = JC0 + (K-ILA)/ILA * JINK - JD = JD0 - (K-ILA)/ILA * JINK - JE = JE0 - (K-ILA)/ILA * JINK - JF = JF0 - (K-ILA)/ILA * JINK + IBASE0 = IIBASE + IJA0 = IJA + IJB0 = IJB + IJC0 = IJC + IJD0 = IJD + IJE0 = IJE + IJF0 = IJF +!$acc loop independent private( IKB, IKC, IKD, IKE, IKF, ZC1, ZS1, ZC2, ZS2, ZC3, ZS3, ZC4, ZS4, ZC5, ZS5, & +!$acc & IIBASE, IJA, IJB, IJC, IJD, IJE, IJF ) + DO IIK=KLA,ISTOP,KLA + IKB=IIK+IIK + IKC=IKB+IKB + IKD=IKC+IKB + IKE=IKD+IKB + IKF=IKE+IKB + ZC1=PTRIGS(IKB+1) + ZS1=PTRIGS(IKB+2) + ZC2=PTRIGS(IKC+1) + ZS2=PTRIGS(IKC+2) + ZC3=PTRIGS(IKD+1) + ZS3=PTRIGS(IKD+2) + ZC4=PTRIGS(IKE+1) + ZS4=PTRIGS(IKE+2) + ZC5=PTRIGS(IKF+1) + ZS5=PTRIGS(IKF+2) + IIBASE = IBASE0 + (IIK-KLA)/KLA * (KLA + IJUMP ) + IJA = IJA0 + (IIK-KLA)/KLA * JINK + IJB = IJB0 + (IIK-KLA)/KLA * JINK + IJC = IJC0 + (IIK-KLA)/KLA * JINK + IJD = IJD0 - (IIK-KLA)/KLA * JINK + IJE = IJE0 - (IIK-KLA)/KLA * JINK + IJF = IJF0 - (IIK-KLA)/KLA * JINK !$acc loop independent - DO JL=1,ILA + DO IJL=1,KLA !CDIR$ IVDEP !!CDIR NODEP !*VOCL LOOP,NOVREC -!$acc loop independent private( I, J, A1, A2, A3, A4, A5, A11, A20, A21, B1, B2, B3, B4, B5, B11, B20, B21 ) - DO IJK=1,ILOT - I = IBASE + JL - 1 + (IJK - 1 ) * INC3 - J = JL - 1 + (IJK - 1 ) * INC4 - A1=C1*A(IB+I)+S1*B(IB+I) - B1=C1*B(IB+I)-S1*A(IB+I) - A2=C2*A(IC+I)+S2*B(IC+I) - B2=C2*B(IC+I)-S2*A(IC+I) - A3=C3*A(ID+I)+S3*B(ID+I) - B3=C3*B(ID+I)-S3*A(ID+I) - A4=C4*A(IE+I)+S4*B(IE+I) - B4=C4*B(IE+I)-S4*A(IE+I) - A5=C5*A(IF+I)+S5*B(IF+I) - B5=C5*B(IF+I)-S5*A(IF+I) - A11=(A2+A5)+(A1+A4) - A20=(A(IA+I)+A3)-0.5*A11 - A21=XSIN60*((A2+A5)-(A1+A4)) - B11=(B2+B5)+(B1+B4) - B20=(B(IA+I)+B3)-0.5*B11 - B21=XSIN60*((B2+B5)-(B1+B4)) - C(JA+J)=(A(IA+I)+A3)+A11 - D(JA+J)=(B(IA+I)+B3)+B11 - C(JC+J)=A20-B21 - D(JC+J)=A21+B20 - C(JE+J)=A20+B21 - D(JE+J)=A21-B20 - A11=(A2-A5)+(A4-A1) - A20=(A(IA+I)-A3)-0.5*A11 - A21=XSIN60*((A4-A1)-(A2-A5)) - B11=(B5-B2)-(B4-B1) - B20=(B3-B(IA+I))-0.5*B11 - B21=XSIN60*((B5-B2)+(B4-B1)) - C(JB+J)=A20-B21 - D(JB+J)=A21-B20 - C(JD+J)=A11+(A(IA+I)-A3) - D(JD+J)=B11+(B3-B(IA+I)) - C(JF+J)=A20+B21 - D(JF+J)=A21+B20 +!$acc loop independent private( II, IJ, ZA1, ZA2, ZA3, ZA4, ZA5, ZA11, ZA20, ZA21, ZB1, ZB2, ZB3, ZB4, ZB5, ZB11, ZB20, ZB21 ) + DO IJK=1,KLOT + II = IIBASE + IJL - 1 + (IJK - 1 ) * KINC3 + IJ = IJL - 1 + (IJK - 1 ) * KINC4 + ZA1=ZC1*PA(IIB+II)+ZS1*PB(IIB+II) + ZB1=ZC1*PB(IIB+II)-ZS1*PA(IIB+II) + ZA2=ZC2*PA(IIC+II)+ZS2*PB(IIC+II) + ZB2=ZC2*PB(IIC+II)-ZS2*PA(IIC+II) + ZA3=ZC3*PA(IID+II)+ZS3*PB(IID+II) + ZB3=ZC3*PB(IID+II)-ZS3*PA(IID+II) + ZA4=ZC4*PA(IIE+II)+ZS4*PB(IIE+II) + ZB4=ZC4*PB(IIE+II)-ZS4*PA(IIE+II) + ZA5=ZC5*PA(IIF+II)+ZS5*PB(IIF+II) + ZB5=ZC5*PB(IIF+II)-ZS5*PA(IIF+II) + ZA11=(ZA2+ZA5)+(ZA1+ZA4) + ZA20=(PA(IIA+II)+ZA3)-0.5*ZA11 + ZA21=XSIN60*((ZA2+ZA5)-(ZA1+ZA4)) + ZB11=(ZB2+ZB5)+(ZB1+ZB4) + ZB20=(PB(IIA+II)+ZB3)-0.5*ZB11 + ZB21=XSIN60*((ZB2+ZB5)-(ZB1+ZB4)) + PC(IJA+IJ)=(PA(IIA+II)+ZA3)+ZA11 + PD(IJA+IJ)=(PB(IIA+II)+ZB3)+ZB11 + PC(IJC+IJ)=ZA20-ZB21 + PD(IJC+IJ)=ZA21+ZB20 + PC(IJE+IJ)=ZA20+ZB21 + PD(IJE+IJ)=ZA21-ZB20 + ZA11=(ZA2-ZA5)+(ZA4-ZA1) + ZA20=(PA(IIA+II)-ZA3)-0.5*ZA11 + ZA21=XSIN60*((ZA4-ZA1)-(ZA2-ZA5)) + ZB11=(ZB5-ZB2)-(ZB4-ZB1) + ZB20=(ZB3-PB(IIA+II))-0.5*ZB11 + ZB21=XSIN60*((ZB5-ZB2)+(ZB4-ZB1)) + PC(IJB+IJ)=ZA20-ZB21 + PD(IJB+IJ)=ZA21-ZB20 + PC(IJD+IJ)=ZA11+(PA(IIA+II)-ZA3) + PD(IJD+IJ)=ZB11+(ZB3-PB(IIA+II)) + PC(IJF+IJ)=ZA20+ZB21 + PD(IJF+IJ)=ZA21+ZB20 END DO END DO END DO - IBASE = IBASE0 + ((KSTOP-ILA)/ILA+1) * ( ILA + IJUMP ) - JA = JA0 + ((KSTOP-ILA)/ILA+1) * JINK - JB = JB0 + ((KSTOP-ILA)/ILA+1) * JINK - JC = JC0 + ((KSTOP-ILA)/ILA+1) * JINK - JD = JD0 - ((KSTOP-ILA)/ILA+1) * JINK - JE = JE0 - ((KSTOP-ILA)/ILA+1) * JINK - JF = JF0 - ((KSTOP-ILA)/ILA+1) * JINK + IIBASE = IBASE0 + ((ISTOP-KLA)/KLA+1) * ( KLA + IJUMP ) + IJA = IJA0 + ((ISTOP-KLA)/KLA+1) * JINK + IJB = IJB0 + ((ISTOP-KLA)/KLA+1) * JINK + IJC = IJC0 + ((ISTOP-KLA)/KLA+1) * JINK + IJD = IJD0 - ((ISTOP-KLA)/KLA+1) * JINK + IJE = IJE0 - ((ISTOP-KLA)/KLA+1) * JINK + IJF = IJF0 - ((ISTOP-KLA)/KLA+1) * JINK !$acc end kernels END IF - IF ( JC <= JD ) THEN + IF ( IJC <= IJD ) THEN !$acc kernels - JBASE=0 + IJBASE=0 !$acc loop independent - DO JL=1,ILA + DO IJL=1,KLA !CDIR$ IVDEP !!CDIR NODEP !*VOCL LOOP,NOVREC -!$acc loop independent private( I, J ) - DO IJK=1,ILOT - I = IBASE + JL - 1 + (IJK - 1 ) * INC3 - J = JBASE + JL - 1 + (IJK - 1 ) * INC4 - C(JA+J)=(A(IA+I)+0.5*(A(IC+I)-A(IE+I)))+ XSIN60*(A(IB+I)-A(IF+I)) - D(JA+J)=-(A(ID+I)+0.5*(A(IB+I)+A(IF+I)))-XSIN60*(A(IC+I)+A(IE+I)) - C(JB+J)=A(IA+I)-(A(IC+I)-A(IE+I)) - D(JB+J)=A(ID+I)-(A(IB+I)+A(IF+I)) - C(JC+J)=(A(IA+I)+0.5*(A(IC+I)-A(IE+I)))-XSIN60*(A(IB+I)-A(IF+I)) - D(JC+J)=-(A(ID+I)+0.5*(A(IB+I)+A(IF+I)))+XSIN60*(A(IC+I)+A(IE+I)) +!$acc loop independent private( II, IJ ) + DO IJK=1,KLOT + II = IIBASE + IJL - 1 + (IJK - 1 ) * KINC3 + IJ = IJBASE + IJL - 1 + (IJK - 1 ) * KINC4 + PC(IJA+IJ)=(PA(IIA+II)+0.5*(PA(IIC+II)-PA(IIE+II)))+ XSIN60*(PA(IIB+II)-PA(IIF+II)) + PD(IJA+IJ)=-(PA(IID+II)+0.5*(PA(IIB+II)+PA(IIF+II)))-XSIN60*(PA(IIC+II)+PA(IIE+II)) + PC(IJB+IJ)=PA(IIA+II)-(PA(IIC+II)-PA(IIE+II)) + PD(IJB+IJ)=PA(IID+II)-(PA(IIB+II)+PA(IIF+II)) + PC(IJC+IJ)=(PA(IIA+II)+0.5*(PA(IIC+II)-PA(IIE+II)))-XSIN60*(PA(IIB+II)-PA(IIF+II)) + PD(IJC+IJ)=-(PA(IID+II)+0.5*(PA(IIB+II)+PA(IIF+II)))+XSIN60*(PA(IIC+II)+PA(IIE+II)) END DO END DO - IBASE = IBASE + ILA - JBASE = JBASE + ILA + IIBASE = IIBASE + KLA + IJBASE = IJBASE + KLA !$acc end kernels END IF ELSE !$acc kernels - Z=1.0/REAL(N) - ZSIN60=Z*XSIN60 + ZZ=1.0/REAL(KN) + ZZSIN60=ZZ*XSIN60 !$acc loop independent - DO JL=1,ILA + DO IJL=1,KLA !CDIR$ IVDEP !!CDIR NODEP !*VOCL LOOP,NOVREC -!$acc loop independent private( I, J, A11 ) - DO IJK=1,ILOT - I = IBASE + JL - 1 + (IJK - 1 ) * INC3 - J = JBASE + JL - 1 + (IJK - 1 ) * INC4 - A11=(A(IC+I)+A(IF+I))+(A(IB+I)+A(IE+I)) - C(JA+J)=Z*((A(IA+I)+A(ID+I))+A11) - C(JC+J)=Z*((A(IA+I)+A(ID+I))-0.5*A11) - D(JC+J)=ZSIN60*((A(IC+I)+A(IF+I))-(A(IB+I)+A(IE+I))) - A11=(A(IC+I)-A(IF+I))+(A(IE+I)-A(IB+I)) - C(JB+J)=Z*((A(IA+I)-A(ID+I))-0.5*A11) - D(JB+J)=ZSIN60*((A(IE+I)-A(IB+I))-(A(IC+I)-A(IF+I))) - C(JD+J)=Z*((A(IA+I)-A(ID+I))+A11) +!$acc loop independent private( II, IJ, ZA11 ) + DO IJK=1,KLOT + II = IIBASE + IJL - 1 + (IJK - 1 ) * KINC3 + IJ = IJBASE + IJL - 1 + (IJK - 1 ) * KINC4 + ZA11=(PA(IIC+II)+PA(IIF+II))+(PA(IIB+II)+PA(IIE+II)) + PC(IJA+IJ)=ZZ*((PA(IIA+II)+PA(IID+II))+ZA11) + PC(IJC+IJ)=ZZ*((PA(IIA+II)+PA(IID+II))-0.5*ZA11) + PD(IJC+IJ)=ZZSIN60*((PA(IIC+II)+PA(IIF+II))-(PA(IIB+II)+PA(IIE+II))) + ZA11=(PA(IIC+II)-PA(IIF+II))+(PA(IIE+II)-PA(IIB+II)) + PC(IJB+IJ)=ZZ*((PA(IIA+II)-PA(IID+II))-0.5*ZA11) + PD(IJB+IJ)=ZZSIN60*((PA(IIE+II)-PA(IIB+II))-(PA(IIC+II)-PA(IIF+II))) + PC(IJD+IJ)=ZZ*((PA(IIA+II)-PA(IID+II))+ZA11) END DO END DO - IBASE = IBASE + ILA - JBASE = JBASE + ILA + IIBASE = IIBASE + KLA + IJBASE = IJBASE + KLA !$acc end kernels END IF @@ -2145,53 +2148,53 @@ SUBROUTINE QPASSM(A,B,C,D,TRIGS,INC3,INC4,ILOT,N,IFAC,ILA,IERR,KSZ1,KSZ2,KSZ3,KS ! ! CODING FOR FACTOR 8 ! ------------------- - IF ( ILA /= M ) THEN + IF ( KLA /= IM ) THEN IBAD = 3 ELSE !$acc kernels - IA=1 - IB=IA+IINK - IC=IB+IINK - ID=IC+IINK - IE=ID+IINK - IF=IE+IINK - IG=IF+IINK - IH=IG+IINK - JA=1 - JB=JA+ILA - JC=JB+2*M - JD=JC+2*M - JE=JD+2*M - Z=1.0/REAL(N) - ZSIN45=Z*SQRT(0.5) + IIA=1 + IIB=IIA+IINK + IIC=IIB+IINK + IID=IIC+IINK + IIE=IID+IINK + IIF=IIE+IINK + IIG=IIF+IINK + IIH=IIG+IINK + IJA=1 + IJB=IJA+KLA + IJC=IJB+2*IM + IJD=IJC+2*IM + IJE=IJD+2*IM + ZZ=1.0/REAL(KN) + ZZSIN45=ZZ*SQRT(0.5) !$acc loop independent - DO JL=1,ILA + DO IJL=1,KLA !CDIR$ IVDEP !!CDIR NODEP !*VOCL LOOP,NOVREC -!$acc loop independent private( I, J ) - DO IJK=1,ILOT - I = IBASE + JL - 1 + (IJK - 1 ) * INC3 - J = JBASE + JL - 1 + (IJK - 1 ) * INC4 - C(JA+J)=Z*(((A(IA+I)+A(IE+I))+(A(IC+I)+A(IG+I)))+ & - ((A(ID+I)+A(IH+I))+(A(IB+I)+A(IF+I)))) - C(JE+J)=Z*(((A(IA+I)+A(IE+I))+(A(IC+I)+A(IG+I)))- & - ((A(ID+I)+A(IH+I))+(A(IB+I)+A(IF+I)))) - C(JC+J)=Z*((A(IA+I)+A(IE+I))-(A(IC+I)+A(IG+I))) - D(JC+J)=Z*((A(ID+I)+A(IH+I))-(A(IB+I)+A(IF+I))) - C(JB+J)=Z*(A(IA+I)-A(IE+I)) & - +ZSIN45*((A(IH+I)-A(ID+I))-(A(IF+I)-A(IB+I))) - C(JD+J)=Z*(A(IA+I)-A(IE+I)) & - -ZSIN45*((A(IH+I)-A(ID+I))-(A(IF+I)-A(IB+I))) - D(JB+J)=ZSIN45*((A(IH+I)-A(ID+I))+(A(IF+I)-A(IB+I))) & - +Z*(A(IG+I)-A(IC+I)) - D(JD+J)=ZSIN45*((A(IH+I)-A(ID+I))+(A(IF+I)-A(IB+I))) & - -Z*(A(IG+I)-A(IC+I)) +!$acc loop independent private( II, IJ ) + DO IJK=1,KLOT + II = IIBASE + IJL - 1 + (IJK - 1 ) * KINC3 + IJ = IJBASE + IJL - 1 + (IJK - 1 ) * KINC4 + PC(IJA+IJ)=ZZ*(((PA(IIA+II)+PA(IIE+II))+(PA(IIC+II)+PA(IIG+II)))+ & + ((PA(IID+II)+PA(IIH+II))+(PA(IIB+II)+PA(IIF+II)))) + PC(IJE+IJ)=ZZ*(((PA(IIA+II)+PA(IIE+II))+(PA(IIC+II)+PA(IIG+II)))- & + ((PA(IID+II)+PA(IIH+II))+(PA(IIB+II)+PA(IIF+II)))) + PC(IJC+IJ)=ZZ*((PA(IIA+II)+PA(IIE+II))-(PA(IIC+II)+PA(IIG+II))) + PD(IJC+IJ)=ZZ*((PA(IID+II)+PA(IIH+II))-(PA(IIB+II)+PA(IIF+II))) + PC(IJB+IJ)=ZZ*(PA(IIA+II)-PA(IIE+II)) & + +ZZSIN45*((PA(IIH+II)-PA(IID+II))-(PA(IIF+II)-PA(IIB+II))) + PC(IJD+IJ)=ZZ*(PA(IIA+II)-PA(IIE+II)) & + -ZZSIN45*((PA(IIH+II)-PA(IID+II))-(PA(IIF+II)-PA(IIB+II))) + PD(IJB+IJ)=ZZSIN45*((PA(IIH+II)-PA(IID+II))+(PA(IIF+II)-PA(IIB+II))) & + +ZZ*(PA(IIG+II)-PA(IIC+II)) + PD(IJD+IJ)=ZZSIN45*((PA(IIH+II)-PA(IID+II))+(PA(IIF+II)-PA(IIB+II))) & + -ZZ*(PA(IIG+II)-PA(IIC+II)) END DO END DO - IBASE = IBASE + ILA - JBASE = JBASE + ILA + IIBASE = IIBASE + KLA + IJBASE = IJBASE + KLA !$acc end kernels IBAD=0 END IF @@ -2206,7 +2209,7 @@ SUBROUTINE QPASSM(A,B,C,D,TRIGS,INC3,INC4,ILOT,N,IFAC,ILA,IERR,KSZ1,KSZ2,KSZ3,KS ! RETURN ! ------ 910 CONTINUE - IERR=IBAD + KERR=IBAD !$acc end data -- GitLab