diff --git a/src/MNH/fft.f90 b/src/MNH/fft.f90 index ef31e249f6de21d6482094cfc558ee1f28b28c0f..1cdf1a0efc85ffc0639a1ca8627e7781984eaf88 100644 --- a/src/MNH/fft.f90 +++ b/src/MNH/fft.f90 @@ -346,9 +346,6 @@ END SUBROUTINE FFT991 SUBROUTINE RPASSM(A,B,C,D,TRIGS,INC3,INC4,ILOT,N,IFAC,ILA,IERR,KSZ1,KSZ2,KSZ3,KSZ4,KSZ5) -#ifdef MNH_OPENACC - USE MODE_MNH_ZWORK, ONLY: MNH_MEM_GET, MNH_MEM_POSITION_PIN, MNH_MEM_RELEASE -#endif IMPLICIT NONE @@ -383,36 +380,18 @@ SUBROUTINE RPASSM(A,B,C,D,TRIGS,INC3,INC4,ILOT,N,IFAC,ILA,IERR,KSZ1,KSZ2,KSZ3,KS ! !----------------------------------------------------------------------- ! -#ifndef MNH_OPENACC - REAL A10(NVECLEN),A11(NVECLEN),A20(NVECLEN),A21(NVECLEN),B10(NVECLEN),B11(NVECLEN),B20(NVECLEN),B21(NVECLEN) -#else - REAL, DIMENSION(:), POINTER, CONTIGUOUS :: A10, A11, A20, A21, B10, B11, B20, B21 -#endif - INTEGER :: M, IINK, JINK, JUMP, KSTOP INTEGER :: IBAD, IBASE, JBASE, IGO INTEGER :: I, J, IA, IB, JA, JB INTEGER :: IL, IJK, K, KB, IC, JC, KC INTEGER :: ID, JD, KD, IE, JE, KE, IF, JF, KF, JG, JH INTEGER :: IA0, IB0, IC0, ID0, IE0, IF0, JBASE0 + REAL :: A10, A11, A20, A21, B10, B11, B20, B21 REAL :: C1, C2, C3, C4, C5, S1, S2, S3, S4, S5 REAL :: QQRT5, SIN45, SSIN36, SSIN45, SSIN60, SSIN72 -#ifdef MNH_OPENACC - !Pin positions in the pools of MNH memory - CALL MNH_MEM_POSITION_PIN() - - CALL MNH_MEM_GET( A10, NVECLEN ) - CALL MNH_MEM_GET( A11, NVECLEN ) - CALL MNH_MEM_GET( A20, NVECLEN ) - CALL MNH_MEM_GET( A21, NVECLEN ) - CALL MNH_MEM_GET( B10, NVECLEN ) - CALL MNH_MEM_GET( B11, NVECLEN ) - CALL MNH_MEM_GET( B20, NVECLEN ) - CALL MNH_MEM_GET( B21, NVECLEN ) -#endif -!$acc data present( A, B, C, D , A10, A11, A20, A21, B10, B11, B20, B21 ) copyin( TRIGS ) +!$acc data present( A, B, C, D ) copyin( TRIGS ) !acc kernels M=N/IFAC @@ -929,34 +908,34 @@ SUBROUTINE RPASSM(A,B,C,D,TRIGS,INC3,INC4,ILOT,N,IFAC,ILA,IERR,KSZ1,KSZ2,KSZ3,KS !CDIR$ IVDEP !!CDIR NODEP !*VOCL LOOP,NOVREC -!$acc loop independent private( I, J ) +!$acc loop independent private( I, J, A10, A11, A20, A21, B10, B11, B20, B21 ) DO IJK=1,ILOT I = IL - 1 + (IJK - 1 ) * INC3 J = JBASE + IL - 1 + (IJK - 1 ) * INC4 - A10(IJK)=(A(IA+I)-0.25*((A(IB+I)+A(IE+I))+(A(IC+I)+A(ID+I)))) & + A10=(A(IA+I)-0.25*((A(IB+I)+A(IE+I))+(A(IC+I)+A(ID+I)))) & +XQRT5*((A(IB+I)+A(IE+I))-(A(IC+I)+A(ID+I))) - A20(IJK)=(A(IA+I)-0.25*((A(IB+I)+A(IE+I))+(A(IC+I)+A(ID+I)))) & + A20=(A(IA+I)-0.25*((A(IB+I)+A(IE+I))+(A(IC+I)+A(ID+I)))) & -XQRT5*((A(IB+I)+A(IE+I))-(A(IC+I)+A(ID+I))) - B10(IJK)=(B(IA+I)-0.25*((B(IB+I)-B(IE+I))+(B(IC+I)-B(ID+I)))) & + B10=(B(IA+I)-0.25*((B(IB+I)-B(IE+I))+(B(IC+I)-B(ID+I)))) & +XQRT5*((B(IB+I)-B(IE+I))-(B(IC+I)-B(ID+I))) - B20(IJK)=(B(IA+I)-0.25*((B(IB+I)-B(IE+I))+(B(IC+I)-B(ID+I)))) & + B20=(B(IA+I)-0.25*((B(IB+I)-B(IE+I))+(B(IC+I)-B(ID+I)))) & -XQRT5*((B(IB+I)-B(IE+I))-(B(IC+I)-B(ID+I))) - A11(IJK)=XSIN72*(B(IB+I)+B(IE+I))+XSIN36*(B(IC+I)+B(ID+I)) - A21(IJK)=XSIN36*(B(IB+I)+B(IE+I))-XSIN72*(B(IC+I)+B(ID+I)) - B11(IJK)=XSIN72*(A(IB+I)-A(IE+I))+XSIN36*(A(IC+I)-A(ID+I)) - B21(IJK)=XSIN36*(A(IB+I)-A(IE+I))-XSIN72*(A(IC+I)-A(ID+I)) + A11=XSIN72*(B(IB+I)+B(IE+I))+XSIN36*(B(IC+I)+B(ID+I)) + A21=XSIN36*(B(IB+I)+B(IE+I))-XSIN72*(B(IC+I)+B(ID+I)) + B11=XSIN72*(A(IB+I)-A(IE+I))+XSIN36*(A(IC+I)-A(ID+I)) + B21=XSIN36*(A(IB+I)-A(IE+I))-XSIN72*(A(IC+I)-A(ID+I)) C(JA+J)=A(IA+I)+((A(IB+I)+A(IE+I))+(A(IC+I)+A(ID+I))) D(JA+J)=B(IA+I)+((B(IB+I)-B(IE+I))+(B(IC+I)-B(ID+I))) - C(JB+J)=C1*(A10(IJK)-A11(IJK))-S1*(B10(IJK)+B11(IJK)) - D(JB+J)=S1*(A10(IJK)-A11(IJK))+C1*(B10(IJK)+B11(IJK)) - C(JE+J)=C4*(A10(IJK)+A11(IJK))-S4*(B10(IJK)-B11(IJK)) - D(JE+J)=S4*(A10(IJK)+A11(IJK))+C4*(B10(IJK)-B11(IJK)) - C(JC+J)=C2*(A20(IJK)-A21(IJK))-S2*(B20(IJK)+B21(IJK)) - D(JC+J)=S2*(A20(IJK)-A21(IJK))+C2*(B20(IJK)+B21(IJK)) - C(JD+J)=C3*(A20(IJK)+A21(IJK))-S3*(B20(IJK)-B21(IJK)) - D(JD+J)=S3*(A20(IJK)+A21(IJK))+C3*(B20(IJK)-B21(IJK)) + C(JB+J)=C1*(A10-A11)-S1*(B10+B11) + D(JB+J)=S1*(A10-A11)+C1*(B10+B11) + C(JE+J)=C4*(A10+A11)-S4*(B10-B11) + D(JE+J)=S4*(A10+A11)+C4*(B10-B11) + C(JC+J)=C2*(A20-A21)-S2*(B20+B21) + D(JC+J)=S2*(A20-A21)+C2*(B20+B21) + C(JD+J)=C3*(A20+A21)-S3*(B20-B21) + D(JD+J)=S3*(A20+A21)+C3*(B20-B21) END DO END DO END DO @@ -1126,40 +1105,40 @@ SUBROUTINE RPASSM(A,B,C,D,TRIGS,INC3,INC4,ILOT,N,IFAC,ILA,IERR,KSZ1,KSZ2,KSZ3,KS !CDIR$ IVDEP !!CDIR NODEP !*VOCL LOOP,NOVREC -!$acc loop independent private( I, J ) +!$acc loop independent private( I, J, A11, A20, A21, B11, B20, B21 ) DO IJK=1,ILOT I = IL - 1 + (IJK - 1 ) * INC3 J = JBASE + IL - 1 + (IJK - 1 ) * INC4 - A11(IJK)= (A(IE+I)+A(IB+I))+(A(IC+I)+A(IF+I)) - A20(IJK)=(A(IA+I)+A(ID+I))-0.5*A11(IJK) - A21(IJK)=XSIN60*((A(IE+I)+A(IB+I))-(A(IC+I)+A(IF+I))) - B11(IJK)= (B(IB+I)-B(IE+I))+(B(IC+I)-B(IF+I)) - B20(IJK)=(B(IA+I)-B(ID+I))-0.5*B11(IJK) - B21(IJK)=XSIN60*((B(IB+I)-B(IE+I))-(B(IC+I)-B(IF+I))) - - C(JA+J)=(A(IA+I)+A(ID+I))+A11(IJK) - D(JA+J)=(B(IA+I)-B(ID+I))+B11(IJK) - C(JC+J)=C2*(A20(IJK)-B21(IJK))-S2*(B20(IJK)+A21(IJK)) - D(JC+J)=S2*(A20(IJK)-B21(IJK))+C2*(B20(IJK)+A21(IJK)) - C(JE+J)=C4*(A20(IJK)+B21(IJK))-S4*(B20(IJK)-A21(IJK)) - D(JE+J)=S4*(A20(IJK)+B21(IJK))+C4*(B20(IJK)-A21(IJK)) - - A11(IJK)=(A(IE+I)-A(IB+I))+(A(IC+I)-A(IF+I)) - B11(IJK)=(B(IE+I)+B(IB+I))-(B(IC+I)+B(IF+I)) - A20(IJK)=(A(IA+I)-A(ID+I))-0.5*A11(IJK) - A21(IJK)=XSIN60*((A(IE+I)-A(IB+I))-(A(IC+I)-A(IF+I))) - B20(IJK)=(B(IA+I)+B(ID+I))+0.5*B11(IJK) - B21(IJK)=XSIN60*((B(IE+I)+B(IB+I))+(B(IC+I)+B(IF+I))) + A11= (A(IE+I)+A(IB+I))+(A(IC+I)+A(IF+I)) + A20=(A(IA+I)+A(ID+I))-0.5*A11 + A21=XSIN60*((A(IE+I)+A(IB+I))-(A(IC+I)+A(IF+I))) + B11= (B(IB+I)-B(IE+I))+(B(IC+I)-B(IF+I)) + B20=(B(IA+I)-B(ID+I))-0.5*B11 + B21=XSIN60*((B(IB+I)-B(IE+I))-(B(IC+I)-B(IF+I))) + + C(JA+J)=(A(IA+I)+A(ID+I))+A11 + D(JA+J)=(B(IA+I)-B(ID+I))+B11 + C(JC+J)=C2*(A20-B21)-S2*(B20+A21) + D(JC+J)=S2*(A20-B21)+C2*(B20+A21) + C(JE+J)=C4*(A20+B21)-S4*(B20-A21) + D(JE+J)=S4*(A20+B21)+C4*(B20-A21) + + A11=(A(IE+I)-A(IB+I))+(A(IC+I)-A(IF+I)) + B11=(B(IE+I)+B(IB+I))-(B(IC+I)+B(IF+I)) + A20=(A(IA+I)-A(ID+I))-0.5*A11 + A21=XSIN60*((A(IE+I)-A(IB+I))-(A(IC+I)-A(IF+I))) + B20=(B(IA+I)+B(ID+I))+0.5*B11 + B21=XSIN60*((B(IE+I)+B(IB+I))+(B(IC+I)+B(IF+I))) C(JD+J)= & - C3*((A(IA+I)-A(ID+I))+A11(IJK))-S3*((B(IA+I)+B(ID+I))-B11(IJK)) + C3*((A(IA+I)-A(ID+I))+A11)-S3*((B(IA+I)+B(ID+I))-B11) D(JD+J)= & - S3*((A(IA+I)-A(ID+I))+A11(IJK))+C3*((B(IA+I)+B(ID+I))-B11(IJK)) - C(JB+J)=C1*(A20(IJK)-B21(IJK))-S1*(B20(IJK)-A21(IJK)) - D(JB+J)=S1*(A20(IJK)-B21(IJK))+C1*(B20(IJK)-A21(IJK)) - C(JF+J)=C5*(A20(IJK)+B21(IJK))-S5*(B20(IJK)+A21(IJK)) - D(JF+J)=S5*(A20(IJK)+B21(IJK))+C5*(B20(IJK)+A21(IJK)) + S3*((A(IA+I)-A(ID+I))+A11)+C3*((B(IA+I)+B(ID+I))-B11) + C(JB+J)=C1*(A20-B21)-S1*(B20-A21) + D(JB+J)=S1*(A20-B21)+C1*(B20-A21) + C(JF+J)=C5*(A20+B21)-S5*(B20+A21) + D(JF+J)=S5*(A20+B21)+C5*(B20+A21) END DO END DO END DO @@ -1295,11 +1274,6 @@ SUBROUTINE RPASSM(A,B,C,D,TRIGS,INC3,INC4,ILOT,N,IFAC,ILA,IERR,KSZ1,KSZ2,KSZ3,KS 910 continue IERR=IBAD -#ifdef MNH_OPENACC - !Release all memory allocated with MNH_MEM_GET calls since last call to MNH_MEM_POSITION_PIN - CALL MNH_MEM_RELEASE() -#endif - !$acc end data @@ -1549,7 +1523,7 @@ SUBROUTINE QPASSM(A,B,C,D,TRIGS,INC3,INC4,ILOT,N,IFAC,ILA,IERR,KSZ1,KSZ2,KSZ3,KS !CDIR$ IVDEP !!CDIR NODEP !*VOCL LOOP,NOVREC -!$acc loop independent private( I, J ) +!$acc loop independent private( I, J, A1, A2, A3, B1, B2, B3 ) DO IJK=1,ILOT I = IBASE + JL - 1 + (IJK - 1 ) * INC3 J = JL - 1 + (IJK - 1 ) * INC4 @@ -1693,7 +1667,7 @@ SUBROUTINE QPASSM(A,B,C,D,TRIGS,INC3,INC4,ILOT,N,IFAC,ILA,IERR,KSZ1,KSZ2,KSZ3,KS !CDIR$ IVDEP !!CDIR NODEP !*VOCL LOOP,NOVREC -!$acc loop independent private( I, J ) +!$acc loop independent private( I, J, A0, A1, A2, A3, B0, B1, B2, B3 ) DO IJK=1,ILOT I = IBASE + JL - 1 + (IJK - 1 ) * INC3 J = JL - 1 + (IJK - 1 ) * INC4 @@ -1857,7 +1831,7 @@ SUBROUTINE QPASSM(A,B,C,D,TRIGS,INC3,INC4,ILOT,N,IFAC,ILA,IERR,KSZ1,KSZ2,KSZ3,KS !CDIR$ IVDEP !!CDIR NODEP !*VOCL LOOP,NOVREC -!$acc loop independent private( I, J ) +!$acc loop independent private( I, J, A1, A2, A3, A4, A5, A6, A10, A11, A20, A21, B1, B2, B3, B4, B5, B6, B10, B11, B20, B21 ) DO IJK=1,ILOT I = IBASE + JL - 1 + (IJK - 1 ) * INC3 J = JL - 1 + (IJK - 1 ) * INC4 @@ -1911,7 +1885,7 @@ SUBROUTINE QPASSM(A,B,C,D,TRIGS,INC3,INC4,ILOT,N,IFAC,ILA,IERR,KSZ1,KSZ2,KSZ3,KS !CDIR$ IVDEP !!CDIR NODEP !*VOCL LOOP,NOVREC -!$acc loop independent private( I, J ) +!$acc loop independent private( I, J, A1, A2, A3, A4, A5, A6 ) DO IJK=1,ILOT I = IBASE + JL - 1 + (IJK - 1 ) * INC3 J = JBASE + JL - 1 + (IJK - 1 ) * INC4 @@ -1944,7 +1918,7 @@ SUBROUTINE QPASSM(A,B,C,D,TRIGS,INC3,INC4,ILOT,N,IFAC,ILA,IERR,KSZ1,KSZ2,KSZ3,KS !CDIR$ IVDEP !!CDIR NODEP !*VOCL LOOP,NOVREC -!$acc loop independent private( I, J ) +!$acc loop independent private( I, J, A1, A2, A3, A4, A5, A6 ) DO IJK=1,ILOT I = IBASE + JL - 1 + (IJK - 1 ) * INC3 J = JBASE + JL - 1 + (IJK - 1 ) * INC4 @@ -1994,7 +1968,7 @@ SUBROUTINE QPASSM(A,B,C,D,TRIGS,INC3,INC4,ILOT,N,IFAC,ILA,IERR,KSZ1,KSZ2,KSZ3,KS !CDIR$ IVDEP !!CDIR NODEP !*VOCL LOOP,NOVREC -!$acc loop independent private( I, J ) +!$acc loop independent private( I, J, A11 ) DO IJK=1,ILOT I = IBASE + JL - 1 + (IJK - 1 ) * INC3 J = JBASE + JL - 1 + (IJK - 1 ) * INC4 @@ -2058,7 +2032,7 @@ SUBROUTINE QPASSM(A,B,C,D,TRIGS,INC3,INC4,ILOT,N,IFAC,ILA,IERR,KSZ1,KSZ2,KSZ3,KS !CDIR$ IVDEP !!CDIR NODEP !*VOCL LOOP,NOVREC -!$acc loop independent private( I, J ) +!$acc loop independent private( I, J, A1, A2, A3, A4, A5, A11, A20, A21, B1, B2, B3, B4, B5, B11, B20, B21 ) DO IJK=1,ILOT I = IBASE + JL - 1 + (IJK - 1 ) * INC3 J = JL - 1 + (IJK - 1 ) * INC4 @@ -2144,7 +2118,7 @@ SUBROUTINE QPASSM(A,B,C,D,TRIGS,INC3,INC4,ILOT,N,IFAC,ILA,IERR,KSZ1,KSZ2,KSZ3,KS !CDIR$ IVDEP !!CDIR NODEP !*VOCL LOOP,NOVREC -!$acc loop independent private( I, J ) +!$acc loop independent private( I, J, A11 ) DO IJK=1,ILOT I = IBASE + JL - 1 + (IJK - 1 ) * INC3 J = JBASE + JL - 1 + (IJK - 1 ) * INC4