diff --git a/src/MNH/fft.f90 b/src/MNH/fft.f90 index 79a2bee6bfc9ae02957fea5effba3ccbcc0d338f..eabc0591d61b423eaa794142969065f5843169ad 100644 --- a/src/MNH/fft.f90 +++ b/src/MNH/fft.f90 @@ -151,13 +151,15 @@ SUBROUTINE FFT991( PA, PWORK, PTRIGS, PFAX, KJUMP, KN, KLOT, KSIGN ) CALL MPPDB_CHECK( PA, "FFT991 beg:PA" ) END IF -ZWORK(1 : SIZE(PWORK)) => PWORK(:,:,:) + ZWORK(1 : SIZE(PWORK)) => PWORK(:,:,:) !$acc data present( PA, ZWORK ) - ! Initialisation of ZWORK useful to compare results with MPPDB_CHECK (otherwise all values are not set by FFT991 - ZWORK(:) = 0. + IF ( MPPDB_INITIALIZED ) THEN + ! Initialisation of ZWORK useful to compare results with MPPDB_CHECK (otherwise all values are not set by FFT991 + ZWORK(:) = 0. !$acc update device( ZWORK ) + END IF #if 0 !PW: Original version: but in that case, intent of PTRIGS and KFAX are not correct @@ -414,19 +416,12 @@ SUBROUTINE RPASSM( PA, PB, PC, PD, PTRIGS, KINC3, KINC4, KLOT, KN, KFAC, KLA, KE IF ( KLA /= IM ) THEN !$acc kernels -!$acc loop independent - DO IJK=0,KLOT-1 -!CDIR$ IVDEP -!!CDIR NODEP -!*VOCL LOOP,NOVREC -!$acc loop independent private( II, IJ ) - DO IIL=0,KLA-1 + !$mnh_do_concurrent ( IIL=0:KLA-1, IJK=0:KLOT-1 ) II = IIBASE + IJK * KINC3 + IIL IJ = IJBASE + IJK * KINC4 + IIL PC(IJA+IJ)=PA(IIA+II)+PA(IIB+II) PC(IJB+IJ)=PA(IIA+II)-PA(IIB+II) - END DO - END DO + !$mnh_end_do() IIBASE=IIBASE+KLA IJBASE=IJBASE+KLA !$acc end kernels @@ -449,21 +444,14 @@ SUBROUTINE RPASSM( PA, PB, PC, PD, PTRIGS, KINC3, KINC4, KLOT, KN, KFAC, KLA, KE IJBASE = IJBASE0 + (IIK-KLA)/KLA * (KLA + JUMP ) IIA = IIA0 + (IIK-KLA)/KLA * IINK IIB = IIB0 - (IIK-KLA)/KLA * IINK -!$acc loop independent - DO IJK=0,KLOT-1 -!CDIR$ IVDEP -!!CDIR NODEP -!*VOCL LOOP,NOVREC -!$acc loop independent private( II, IJ ) - DO IIL=0,KLA-1 + !$mnh_do_concurrent ( IIL=0:KLA-1, IJK=0:KLOT-1 ) II = IJK * KINC3 + IIL IJ = IJBASE + IJK * KINC4 + IIL PC(IJA+IJ)=PA(IIA+II)+PA(IIB+II) PD(IJA+IJ)=PB(IIA+II)-PB(IIB+II) PC(IJB+IJ)=ZC1*(PA(IIA+II)-PA(IIB+II))-ZS1*(PB(IIA+II)+PB(IIB+II)) PD(IJB+IJ)=ZS1*(PA(IIA+II)-PA(IIB+II))+ZC1*(PB(IIA+II)+PB(IIB+II)) - END DO - END DO + !$mnh_end_do() END DO IJBASE = IJBASE0 + ((ISTOP-KLA)/KLA+1) * ( KLA + JUMP ) IIA = IIA0 + ((ISTOP-KLA)/KLA+1) * IINK @@ -473,19 +461,12 @@ SUBROUTINE RPASSM( PA, PB, PC, PD, PTRIGS, KINC3, KINC4, KLOT, KN, KFAC, KLA, KE IF ( IIA <= IIB ) THEN !$acc kernels IIBASE=0 -!$acc loop independent - DO IJK=0,KLOT-1 -!CDIR$ IVDEP -!!CDIR NODEP -!*VOCL LOOP,NOVREC -!$acc loop independent private( II, IJ ) - DO IIL=0,KLA-1 + !$mnh_do_concurrent ( IIL=0:KLA-1, IJK=0:KLOT-1 ) II = IIBASE + IJK * KINC3 + IIL IJ = IJBASE + IJK * KINC4 + IIL PC(IJA+IJ)=PA(IIA+II) PC(IJB+IJ)=-PB(IIA+II) - END DO - END DO + !$mnh_end_do() IIBASE=IIBASE+KLA IJBASE=IJBASE+KLA !$acc end kernels @@ -494,19 +475,12 @@ SUBROUTINE RPASSM( PA, PB, PC, PD, PTRIGS, KINC3, KINC4, KLOT, KN, KFAC, KLA, KE ELSE !$acc kernels -!$acc loop independent - DO IJK=0,KLOT-1 -!CDIR$ IVDEP -!!CDIR NODEP -!*VOCL LOOP,NOVREC -!$acc loop independent private( II, IJ ) - DO IIL=0,KLA-1 + !$mnh_do_concurrent ( IIL=0:KLA-1, IJK=0:KLOT-1 ) II = IIBASE + IJK * KINC3 + IIL IJ = IJBASE + IJK * KINC4 + IIL PC(IJA+IJ)=2.0*(PA(IIA+II)+PA(IIB+II)) PC(IJB+IJ)=2.0*(PA(IIA+II)-PA(IIB+II)) - END DO - END DO + !$mnh_end_do() IIBASE=IIBASE+KLA IJBASE=IJBASE+KLA !$acc end kernels @@ -528,20 +502,13 @@ SUBROUTINE RPASSM( PA, PB, PC, PD, PTRIGS, KINC3, KINC4, KLOT, KN, KFAC, KLA, KE IF ( KLA /= IM ) THEN !$acc kernels -!$acc loop independent - DO IJK=0,KLOT-1 -!CDIR$ IVDEP -!!CDIR NODEP -!*VOCL LOOP,NOVREC -!$acc loop independent private( II, IJ ) - DO IIL=0,KLA-1 + !$mnh_do_concurrent ( IIL=0:KLA-1, IJK=0:KLOT-1 ) II = IIBASE + IJK * KINC3 + IIL IJ = IJBASE + IJK * KINC4 + IIL PC(IJA+IJ)=PA(IIA+II)+PA(IIB+II) PC(IJB+IJ)=(PA(IIA+II)-0.5*PA(IIB+II))-(XSIN60*(PB(IIB+II))) PC(IJC+IJ)=(PA(IIA+II)-0.5*PA(IIB+II))+(XSIN60*(PB(IIB+II))) - END DO - END DO + !$mnh_end_do() IIBASE=IIBASE+KLA IJBASE=IJBASE+KLA !$acc end kernels @@ -569,13 +536,7 @@ SUBROUTINE RPASSM( PA, PB, PC, PD, PTRIGS, KINC3, KINC4, KLOT, KN, KFAC, KLA, KE IIA = IIA0 + (IIK-KLA)/KLA * IINK IIB = IIB0 + (IIK-KLA)/KLA * IINK IIC = IIC0 - (IIK-KLA)/KLA * IINK -!$acc loop independent - DO IJK=0,KLOT-1 -!CDIR$ IVDEP -!!CDIR NODEP -!*VOCL LOOP,NOVREC -!$acc loop independent private( II, IJ ) - DO IIL=0,KLA-1 + !$mnh_do_concurrent ( IIL=0:KLA-1, IJK=0:KLOT-1 ) II = IJK * KINC3 + IIL IJ = IJBASE + IJK * KINC4 + IIL PC(IJA+IJ)=PA(IIA+II)+(PA(IIB+II)+PA(IIC+II)) @@ -600,8 +561,7 @@ SUBROUTINE RPASSM( PA, PB, PC, PD, PTRIGS, KINC3, KINC4, KLOT, KN, KFAC, KLA, KE II)))) & +ZC2*((PB(IIA+II)-0.5*(PB(IIB+II)-PB(IIC+II)))-(XSIN60*(PA(IIB+II)-PA(IIC+ & II)))) - END DO - END DO + !$mnh_end_do() END DO IJBASE = IJBASE0 + ((ISTOP-KLA)/KLA+1) * ( KLA + JUMP ) IIA = IIA0 + ((ISTOP-KLA)/KLA+1) * IINK @@ -613,20 +573,13 @@ SUBROUTINE RPASSM( PA, PB, PC, PD, PTRIGS, KINC3, KINC4, KLOT, KN, KFAC, KLA, KE IF ( IIA <= IIC ) THEN !$acc kernels IIBASE=0 -!$acc loop independent - DO IJK=0,KLOT-1 -!CDIR$ IVDEP -!!CDIR NODEP -!*VOCL LOOP,NOVREC -!$acc loop independent private( II, IJ ) - DO IIL=0,KLA-1 + !$mnh_do_concurrent ( IIL=0:KLA-1, IJK=0:KLOT-1 ) II = IIBASE + IJK * KINC3 + IIL IJ = IJBASE + IJK * KINC4 + IIL PC(IJA+IJ)=PA(IIA+II)+PA(IIB+II) PC(IJB+IJ)=(0.5*PA(IIA+II)-PA(IIB+II))-(XSIN60*PB(IIA+II)) PC(IJC+IJ)=-(0.5*PA(IIA+II)-PA(IIB+II))-(XSIN60*PB(IIA+II)) - END DO - END DO + !$mnh_end_do() IIBASE = IIBASE + KLA IJBASE = IJBASE + KLA !$acc end kernels @@ -636,20 +589,13 @@ SUBROUTINE RPASSM( PA, PB, PC, PD, PTRIGS, KINC3, KINC4, KLOT, KN, KFAC, KLA, KE !$acc kernels ZSSIN60=2.0*XSIN60 -!$acc loop independent - DO IJK=0,KLOT-1 -!CDIR$ IVDEP -!!CDIR NODEP -!*VOCL LOOP,NOVREC -!$acc loop independent private( II, IJ ) - DO IIL=0,KLA-1 + !$mnh_do_concurrent ( IIL=0:KLA-1, IJK=0:KLOT-1 ) II = IIBASE + IJK * KINC3 + IIL IJ = IJBASE + IJK * KINC4 + IIL PC(IJA+IJ)=2.0*(PA(IIA+II)+PA(IIB+II)) PC(IJB+IJ)=(2.0*PA(IIA+II)-PA(IIB+II))-(ZSSIN60*PB(IIB+II)) PC(IJC+IJ)=(2.0*PA(IIA+II)-PA(IIB+II))+(ZSSIN60*PB(IIB+II)) - END DO - END DO + !$mnh_end_do() IIBASE = IIBASE + KLA IJBASE = IJBASE + KLA !$acc end kernels @@ -674,21 +620,14 @@ SUBROUTINE RPASSM( PA, PB, PC, PD, PTRIGS, KINC3, KINC4, KLOT, KN, KFAC, KLA, KE IF ( KLA /= IM) THEN !$acc kernels -!$acc loop independent - DO IJK=0,KLOT-1 -!CDIR$ IVDEP -!!CDIR NODEP -!*VOCL LOOP,NOVREC -!$acc loop independent private( II, IJ ) - DO IIL=0,KLA-1 + !$mnh_do_concurrent ( IIL=0:KLA-1, IJK=0:KLOT-1 ) II = IIBASE + IJK * KINC3 + IIL IJ = IJBASE + IJK * KINC4 + IIL PC(IJA+IJ)=(PA(IIA+II)+PA(IIC+II))+PA(IIB+II) PC(IJB+IJ)=(PA(IIA+II)-PA(IIC+II))-PB(IIB+II) PC(IJC+IJ)=(PA(IIA+II)+PA(IIC+II))-PA(IIB+II) PC(IJD+IJ)=(PA(IIA+II)-PA(IIC+II))+PB(IIB+II) - END DO - END DO + !$mnh_end_do() IIBASE = IIBASE + KLA IJBASE = IJBASE + KLA !$acc end kernels @@ -722,13 +661,7 @@ SUBROUTINE RPASSM( PA, PB, PC, PD, PTRIGS, KINC3, KINC4, KLOT, KN, KFAC, KLA, KE IIB = IIB0 + (IIK-KLA)/KLA * IINK IIC = IIC0 - (IIK-KLA)/KLA * IINK IID = IID0 - (IIK-KLA)/KLA * IINK -!$acc loop independent - DO IJK=0,KLOT-1 -!CDIR$ IVDEP -!!CDIR NODEP -!*VOCL LOOP,NOVREC -!$acc loop independent private( II, IJ ) - DO IIL=0,KLA-1 + !$mnh_do_concurrent ( IIL=0:KLA-1, IJK=0:KLOT-1 ) II = IJK * KINC3 + IIL IJ = IJBASE + IJK * KINC4 + IIL PC(IJA+IJ)=(PA(IIA+II)+PA(IIC+II))+(PA(IIB+II)+PA(IID+II)) @@ -751,8 +684,7 @@ SUBROUTINE RPASSM( PA, PB, PC, PD, PTRIGS, KINC3, KINC4, KLOT, KN, KFAC, KLA, KE PD(IJD+IJ)= & ZS3*((PA(IIA+II)-PA(IIC+II))+(PB(IIB+II)+PB(IID+II))) & +ZC3*((PB(IIA+II)+PB(IIC+II))-(PA(IIB+II)-PA(IID+II))) - END DO - END DO + !$mnh_end_do() END DO IJBASE = IJBASE0 + ((ISTOP-KLA)/KLA+1) * ( KLA + JUMP ) IIA = IIA0 + ((ISTOP-KLA)/KLA+1) * IINK @@ -766,21 +698,14 @@ SUBROUTINE RPASSM( PA, PB, PC, PD, PTRIGS, KINC3, KINC4, KLOT, KN, KFAC, KLA, KE !$acc kernels IIBASE=0 ZSIN45=SQRT(0.5) -!$acc loop independent - DO IJK=0,KLOT-1 -!CDIR$ IVDEP -!!CDIR NODEP -!*VOCL LOOP,NOVREC -!$acc loop independent private( II, IJ ) - DO IIL=0,KLA-1 + !$mnh_do_concurrent ( IIL=0:KLA-1, IJK=0:KLOT-1 ) II = IIBASE + IJK * KINC3 + IIL IJ = IJBASE + IJK * KINC4 + IIL PC(IJA+IJ)=PA(IIA+II)+PA(IIB+II) PC(IJB+IJ)=ZSIN45*((PA(IIA+II)-PA(IIB+II))-(PB(IIA+II)+PB(IIB+II))) PC(IJC+IJ)=PB(IIB+II)-PB(IIA+II) PC(IJD+IJ)=-ZSIN45*((PA(IIA+II)-PA(IIB+II))+(PB(IIA+II)+PB(IIB+II))) - END DO - END DO + !$mnh_end_do() IIBASE = IIBASE + KLA IJBASE = IJBASE + KLA !$acc end kernels @@ -789,21 +714,14 @@ SUBROUTINE RPASSM( PA, PB, PC, PD, PTRIGS, KINC3, KINC4, KLOT, KN, KFAC, KLA, KE ELSE ! KLA == M !$acc kernels -!$acc loop independent - DO IJK=0,KLOT-1 -!CDIR$ IVDEP -!!CDIR NODEP -!*VOCL LOOP,NOVREC -!$acc loop independent private( II, IJ ) - DO IIL=0,KLA-1 + !$mnh_do_concurrent ( IIL=0:KLA-1, IJK=0:KLOT-1 ) II = IIBASE + IJK * KINC3 + IIL IJ = IJBASE + IJK * KINC4 + IIL PC(IJA+IJ)=2.0*((PA(IIA+II)+PA(IIC+II))+PA(IIB+II)) PC(IJB+IJ)=2.0*((PA(IIA+II)-PA(IIC+II))-PB(IIB+II)) PC(IJC+IJ)=2.0*((PA(IIA+II)+PA(IIC+II))-PA(IIB+II)) PC(IJD+IJ)=2.0*((PA(IIA+II)-PA(IIC+II))+PB(IIB+II)) - END DO - END DO + !$mnh_end_do() IIBASE = IIBASE + KLA IJBASE = IJBASE + KLA !$acc end kernels @@ -830,13 +748,7 @@ SUBROUTINE RPASSM( PA, PB, PC, PD, PTRIGS, KINC3, KINC4, KLOT, KN, KFAC, KLA, KE IF ( KLA /= IM ) THEN !$acc kernels -!$acc loop independent - DO IJK=0,KLOT-1 -!CDIR$ IVDEP -!!CDIR NODEP -!*VOCL LOOP,NOVREC -!$acc loop independent private( II, IJ ) - DO IIL=0,KLA-1 + !$mnh_do_concurrent ( IIL=0:KLA-1, IJK=0:KLOT-1 ) II = IIBASE + IJK * KINC3 + IIL IJ = IJBASE + IJK * KINC4 + IIL PC(IJA+IJ)=PA(IIA+II)+(PA(IIB+II)+PA(IIC+II)) @@ -852,8 +764,7 @@ SUBROUTINE RPASSM( PA, PB, PC, PD, PTRIGS, KINC3, KINC4, KLOT, KN, KFAC, KLA, KE PC(IJE+IJ)=((PA(IIA+II)-0.25*(PA(IIB+II)+PA(IIC+II)))+XQRT5*(PA(IIB+II)-PA(IIC+ & II))) & +(XSIN72*PB(IIB+II)+XSIN36*PB(IIC+II)) - END DO - END DO + !$mnh_end_do() IIBASE = IIBASE + KLA IJBASE = IJBASE + KLA !$acc end kernels @@ -893,13 +804,7 @@ SUBROUTINE RPASSM( PA, PB, PC, PD, PTRIGS, KINC3, KINC4, KLOT, KN, KFAC, KLA, KE IIC = IIC0 + (IIK-KLA)/KLA * IINK IID = IID0 - (IIK-KLA)/KLA * IINK IIE = IIE0 - (IIK-KLA)/KLA * IINK -!$acc loop independent - DO IJK=0,KLOT-1 -!CDIR$ IVDEP -!!CDIR NODEP -!*VOCL LOOP,NOVREC -!$acc loop independent private( II, IJ, ZA10, ZA11, ZA20, ZA21, ZB10, ZB11, ZB20, ZB21 ) - DO IIL=0,KLA-1 + !$mnh_do_concurrent ( IIL=0:KLA-1, IJK=0:KLOT-1 ) II = IJK * KINC3 + IIL IJ = IJBASE + IJK * KINC4 + IIL @@ -926,8 +831,7 @@ SUBROUTINE RPASSM( PA, PB, PC, PD, PTRIGS, KINC3, KINC4, KLOT, KN, KFAC, KLA, KE PD(IJC+IJ)=ZS2*(ZA20-ZA21)+ZC2*(ZB20+ZB21) PC(IJD+IJ)=ZC3*(ZA20+ZA21)-ZS3*(ZB20-ZB21) PD(IJD+IJ)=ZS3*(ZA20+ZA21)+ZC3*(ZB20-ZB21) - END DO - END DO + !$mnh_end_do() END DO IJBASE = IJBASE0 + ((ISTOP-KLA)/KLA+1) * ( KLA + JUMP ) IIA = IIA0 + ((ISTOP-KLA)/KLA+1) * IINK @@ -941,13 +845,7 @@ SUBROUTINE RPASSM( PA, PB, PC, PD, PTRIGS, KINC3, KINC4, KLOT, KN, KFAC, KLA, KE IF ( IIB <= IID ) THEN !$acc kernels IIBASE=0 -!$acc loop independent - DO IJK=0,KLOT-1 -!CDIR$ IVDEP -!!CDIR NODEP -!*VOCL LOOP,NOVREC -!$acc loop independent private( II, IJ ) - DO IIL=0,KLA-1 + !$mnh_do_concurrent ( IIL=0:KLA-1, IJK=0:KLOT-1 ) II = IIBASE + IJK * KINC3 + IIL IJ = IJBASE + IJK * KINC4 + IIL PC(IJA+IJ)=(PA(IIA+II)+PA(IIB+II))+PA(IIC+II) @@ -963,8 +861,7 @@ SUBROUTINE RPASSM( PA, PB, PC, PD, PTRIGS, KINC3, KINC4, KLOT, KN, KFAC, KLA, KE PC(IJD+IJ)=-(XQRT5*(PA(IIA+II)-PA(IIB+II))-(0.25*(PA(IIA+II)+PA(IIB+II))-PA(IIC+ & II))) & -(XSIN72*PB(IIA+II)-XSIN36*PB(IIB+II)) - END DO - END DO + !$mnh_end_do() IIBASE = IIBASE + KLA IJBASE = IJBASE + KLA !$acc end kernels @@ -976,13 +873,7 @@ SUBROUTINE RPASSM( PA, PB, PC, PD, PTRIGS, KINC3, KINC4, KLOT, KN, KFAC, KLA, KE ZQQRT5=2.0*XQRT5 ZSSIN36=2.0*XSIN36 ZSSIN72=2.0*XSIN72 -!$acc loop independent - DO IJK=0,KLOT-1 -!CDIR$ IVDEP -!!CDIR NODEP -!*VOCL LOOP,NOVREC -!$acc loop independent private( II, IJ ) - DO IIL=0,KLA-1 + !$mnh_do_concurrent ( IIL=0:KLA-1, IJK=0:KLOT-1 ) II = IIBASE + IJK * KINC3 + IIL IJ = IJBASE + IJK * KINC4 + IIL PC(IJA+IJ)=2.0*(PA(IIA+II)+(PA(IIB+II)+PA(IIC+II))) @@ -994,8 +885,7 @@ SUBROUTINE RPASSM( PA, PB, PC, PD, PTRIGS, KINC3, KINC4, KLOT, KN, KFAC, KLA, KE -ZQQRT5*(PA(IIB+II)-PA(IIC+II)))+(ZSSIN36*PB(IIB+II)-ZSSIN72*PB(IIC+II)) PC(IJE+IJ)=(2.0*(PA(IIA+II)-0.25*(PA(IIB+II)+PA(IIC+II))) & +ZQQRT5*(PA(IIB+II)-PA(IIC+II)))+(ZSSIN72*PB(IIB+II)+ZSSIN36*PB(IIC+II)) - END DO - END DO + !$mnh_end_do() IIBASE = IIBASE + KLA IJBASE = IJBASE + KLA !$acc end kernels @@ -1024,13 +914,7 @@ SUBROUTINE RPASSM( PA, PB, PC, PD, PTRIGS, KINC3, KINC4, KLOT, KN, KFAC, KLA, KE IF ( KLA /= IM ) THEN !$acc kernels -!$acc loop independent - DO IJK=0,KLOT-1 -!CDIR$ IVDEP -!!CDIR NODEP -!*VOCL LOOP,NOVREC -!$acc loop independent private( II, IJ ) - DO IIL=0,KLA-1 + !$mnh_do_concurrent ( IIL=0:KLA-1, IJK=0:KLOT-1 ) II = IIBASE + IJK * KINC3 + IIL IJ = IJBASE + IJK * KINC4 + IIL PC(IJA+IJ)=(PA(IIA+II)+PA(IID+II))+(PA(IIB+II)+PA(IIC+II)) @@ -1043,8 +927,7 @@ SUBROUTINE RPASSM( PA, PB, PC, PD, PTRIGS, KINC3, KINC4, KLOT, KN, KFAC, KLA, KE -(XSIN60*(PB(IIB+II)-PB(IIC+II))) PC(IJE+IJ)=((PA(IIA+II)+PA(IID+II))-0.5*(PA(IIB+II)+PA(IIC+II))) & +(XSIN60*(PB(IIB+II)-PB(IIC+II))) - END DO - END DO + !$mnh_end_do() IIBASE = IIBASE + KLA IJBASE = IJBASE + KLA !$acc end kernels @@ -1091,13 +974,7 @@ SUBROUTINE RPASSM( PA, PB, PC, PD, PTRIGS, KINC3, KINC4, KLOT, KN, KFAC, KLA, KE IID = IID0 - (IIK-KLA)/KLA * IINK IIE = IIE0 - (IIK-KLA)/KLA * IINK IIF = IIF0 - (IIK-KLA)/KLA * IINK -!$acc loop independent - DO IJK=0,KLOT-1 -!CDIR$ IVDEP -!!CDIR NODEP -!*VOCL LOOP,NOVREC -!$acc loop independent private( II, IJ, ZA11, ZA20, ZA21, ZB11, ZB20, ZB21 ) - DO IIL=0,KLA-1 + !$mnh_do_concurrent ( IIL=0:KLA-1, IJK=0:KLOT-1 ) II = IJK * KINC3 + IIL IJ = IJBASE + IJK * KINC4 + IIL @@ -1130,8 +1007,7 @@ SUBROUTINE RPASSM( PA, PB, PC, PD, PTRIGS, KINC3, KINC4, KLOT, KN, KFAC, KLA, KE PD(IJB+IJ)=ZS1*(ZA20-ZB21)+ZC1*(ZB20-ZA21) PC(IJF+IJ)=ZC5*(ZA20+ZB21)-ZS5*(ZB20+ZA21) PD(IJF+IJ)=ZS5*(ZA20+ZB21)+ZC5*(ZB20+ZA21) - END DO - END DO + !$mnh_end_do() END DO IJBASE = IJBASE0 + ((ISTOP-KLA)/KLA+1) * ( KLA + JUMP ) IIA = IIA0 + ((ISTOP-KLA)/KLA+1) * IINK @@ -1146,13 +1022,7 @@ SUBROUTINE RPASSM( PA, PB, PC, PD, PTRIGS, KINC3, KINC4, KLOT, KN, KFAC, KLA, KE IF ( IIC <= IID ) THEN !$acc kernels IIBASE=0 -!$acc loop independent - DO IJK=0,KLOT-1 -!CDIR$ IVDEP -!!CDIR NODEP -!*VOCL LOOP,NOVREC -!$acc loop independent private( II, IJ ) - DO IIL=0,KLA-1 + !$mnh_do_concurrent ( IIL=0:KLA-1, IJK=0:KLOT-1 ) II = IIBASE + IJK * KINC3 + IIL IJ = IJBASE + IJK * KINC4 + IIL PC(IJA+IJ)=PA(IIB+II)+(PA(IIA+II)+PA(IIC+II)) @@ -1163,8 +1033,7 @@ SUBROUTINE RPASSM( PA, PB, PC, PD, PTRIGS, KINC3, KINC4, KLOT, KN, KFAC, KLA, KE II)) PC(IJC+IJ)=XSIN60*(PB(IIC+II)-PB(IIA+II))+(0.5*(PA(IIA+II)+PA(IIC+II))-PA(IIB+II)) PC(IJE+IJ)=XSIN60*(PB(IIC+II)-PB(IIA+II))-(0.5*(PA(IIA+II)+PA(IIC+II))-PA(IIB+II)) - END DO - END DO + !$mnh_end_do() IIBASE = IIBASE + KLA IJBASE = IJBASE + KLA !$acc end kernels @@ -1174,13 +1043,7 @@ SUBROUTINE RPASSM( PA, PB, PC, PD, PTRIGS, KINC3, KINC4, KLOT, KN, KFAC, KLA, KE !$acc kernels ZSSIN60=2.0*XSIN60 -!$acc loop independent - DO IJK=0,KLOT-1 -!CDIR$ IVDEP -!!CDIR NODEP -!*VOCL LOOP,NOVREC -!$acc loop independent private( II, IJ ) - DO IIL=0,KLA-1 + !$mnh_do_concurrent ( IIL=0:KLA-1, IJK=0:KLOT-1 ) II = IIBASE + IJK * KINC3 + IIL IJ = IJBASE + IJK * KINC4 + IIL PC(IJA+IJ)=(2.0*(PA(IIA+II)+PA(IID+II)))+(2.0*(PA(IIB+II)+PA(IIC+II))) @@ -1193,8 +1056,7 @@ SUBROUTINE RPASSM( PA, PB, PC, PD, PTRIGS, KINC3, KINC4, KLOT, KN, KFAC, KLA, KE -(ZSSIN60*(PB(IIB+II)-PB(IIC+II))) PC(IJE+IJ)=(2.0*(PA(IIA+II)+PA(IID+II))-(PA(IIB+II)+PA(IIC+II))) & +(ZSSIN60*(PB(IIB+II)-PB(IIC+II))) - END DO - END DO + !$mnh_end_do() IIBASE = IIBASE + KLA IJBASE = IJBASE + KLA !$acc end kernels @@ -1226,13 +1088,7 @@ SUBROUTINE RPASSM( PA, PB, PC, PD, PTRIGS, KINC3, KINC4, KLOT, KN, KFAC, KLA, KE IJH=IJG+JINK ZSSIN45=SQRT(2.0) -!$acc loop independent - DO IJK=0,KLOT-1 -!CDIR$ IVDEP -!!CDIR NODEP -!*VOCL LOOP,NOVREC -!$acc loop independent private( II, IJ ) - DO IIL=0,KLA-1 + !$mnh_do_concurrent ( IIL=0:KLA-1, IJK=0:KLOT-1 ) II = IIBASE + IJK * KINC3 + IIL IJ = IJBASE + IJK * KINC4 + IIL PC(IJA+IJ)=2.0*(((PA(IIA+II)+PA(IIE+II))+PA(IIC+II))+(PA(IIB+II)+PA(IID+II))) @@ -1247,8 +1103,7 @@ SUBROUTINE RPASSM( PA, PB, PC, PD, PTRIGS, KINC3, KINC4, KLOT, KN, KFAC, KLA, KE -ZSSIN45*((PA(IIB+II)-PA(IID+II))+(PB(IIB+II)+PB(IID+II))) PC(IJH+IJ)=2.0*((PA(IIA+II)-PA(IIE+II))+PB(IIC+II)) & +ZSSIN45*((PA(IIB+II)-PA(IID+II))+(PB(IIB+II)+PB(IID+II))) - END DO - END DO + !$mnh_end_do() IIBASE = IIBASE + KLA IJBASE = IJBASE + KLA !$acc end kernels @@ -1349,19 +1204,12 @@ SUBROUTINE QPASSM( PA, PB, PC, PD, PTRIGS, KINC3, KINC4, KLOT, KN, KFAC, KLA, KE IF ( KLA /= IM ) THEN !$acc kernels -!$acc loop independent - DO IJK=0,KLOT-1 -!CDIR$ IVDEP -!!CDIR NODEP -!*VOCL LOOP,NOVREC -!$acc loop independent private( II, IJ ) - DO IJL=0,KLA-1 + !$mnh_do_concurrent ( IJL=0:KLA-1, IJK=0:KLOT-1 ) II = IIBASE + IJK * KINC3 + IJL IJ = IJBASE + IJK * KINC4 + IJL PC(IJA+IJ)=PA(IIA+II)+PA(IIB+II) PC(IJB+IJ)=PA(IIA+II)-PA(IIB+II) - END DO - END DO + !$mnh_end_do() IIBASE = IIBASE + KLA IJBASE = IJBASE + KLA !$acc end kernels @@ -1383,21 +1231,14 @@ SUBROUTINE QPASSM( PA, PB, PC, PD, PTRIGS, KINC3, KINC4, KLOT, KN, KFAC, KLA, KE IIBASE = IBASE0 + (IIK-KLA)/KLA * (KLA + IJUMP ) IJA = IJA0 + (IIK-KLA)/KLA * JINK IJB = IJB0 - (IIK-KLA)/KLA * JINK -!$acc loop independent - DO IJK=0,KLOT-1 -!CDIR$ IVDEP -!!CDIR NODEP -!*VOCL LOOP,NOVREC -!$acc loop independent private( II, IJ ) - DO IJL=0,KLA-1 + !$mnh_do_concurrent ( IJL=0:KLA-1, IJK=0:KLOT-1 ) II = IIBASE + IJK * KINC3 + IJL IJ = IJK * KINC4 + IJL PC(IJA+IJ)=PA(IIA+II)+(ZC1*PA(IIB+II)+ZS1*PB(IIB+II)) PC(IJB+IJ)=PA(IIA+II)-(ZC1*PA(IIB+II)+ZS1*PB(IIB+II)) PD(IJA+IJ)=(ZC1*PB(IIB+II)-ZS1*PA(IIB+II))+PB(IIA+II) PD(IJB+IJ)=(ZC1*PB(IIB+II)-ZS1*PA(IIB+II))-PB(IIA+II) - END DO - END DO + !$mnh_end_do() END DO IIBASE = IBASE0 + ((ISTOP-KLA)/KLA+1) * ( KLA + IJUMP ) IJA = IJA0 + ((ISTOP-KLA)/KLA+1) * JINK @@ -1408,19 +1249,12 @@ SUBROUTINE QPASSM( PA, PB, PC, PD, PTRIGS, KINC3, KINC4, KLOT, KN, KFAC, KLA, KE IF ( IJA <= IJB ) THEN !$acc kernels IJBASE=0 -!$acc loop independent - DO IJK=0,KLOT-1 -!CDIR$ IVDEP -!!CDIR NODEP -!*VOCL LOOP,NOVREC -!$acc loop independent private( II, IJ ) - DO IJL=0,KLA-1 + !$mnh_do_concurrent ( IJL=0:KLA-1, IJK=0:KLOT-1 ) II = IIBASE + IJK * KINC3 + IJL IJ = IJBASE + IJK * KINC4 + IJL PC(IJA+IJ)=PA(IIA+II) PD(IJA+IJ)=-PA(IIB+II) - END DO - END DO + !$mnh_end_do() IIBASE = IIBASE + KLA IJBASE = IJBASE + KLA !$acc end kernels @@ -1430,19 +1264,12 @@ SUBROUTINE QPASSM( PA, PB, PC, PD, PTRIGS, KINC3, KINC4, KLOT, KN, KFAC, KLA, KE !$acc kernels ZZ=1.0/REAL(KN) -!$acc loop independent - DO IJK=0,KLOT-1 -!CDIR$ IVDEP -!!CDIR NODEP -!*VOCL LOOP,NOVREC -!$acc loop independent private( II, IJ ) - DO IJL=0,KLA-1 + !$mnh_do_concurrent ( IJL=0:KLA-1, IJK=0:KLOT-1 ) II = IIBASE + IJK * KINC3 + IJL IJ = IJBASE + IJK * KINC4 + IJL PC(IJA+IJ)=ZZ*(PA(IIA+II)+PA(IIB+II)) PC(IJB+IJ)=ZZ*(PA(IIA+II)-PA(IIB+II)) - END DO - END DO + !$mnh_end_do() IIBASE = IIBASE + KLA IJBASE = IJBASE + KLA !$acc end kernels @@ -1467,20 +1294,13 @@ SUBROUTINE QPASSM( PA, PB, PC, PD, PTRIGS, KINC3, KINC4, KLOT, KN, KFAC, KLA, KE IF ( KLA /= IM ) THEN !$acc kernels -!$acc loop independent - DO IJK=0,KLOT-1 -!CDIR$ IVDEP -!!CDIR NODEP -!*VOCL LOOP,NOVREC -!$acc loop independent private( II, IJ ) - DO IJL=0,KLA-1 + !$mnh_do_concurrent ( IJL=0:KLA-1, IJK=0:KLOT-1 ) II = IIBASE + IJK * KINC3 + IJL IJ = IJBASE + IJK * KINC4 + IJL PC(IJA+IJ)=PA(IIA+II)+(PA(IIB+II)+PA(IIC+II)) PC(IJB+IJ)=PA(IIA+II)-0.5*(PA(IIB+II)+PA(IIC+II)) PD(IJB+IJ)=XSIN60*(PA(IIC+II)-PA(IIB+II)) - END DO - END DO + !$mnh_end_do() IIBASE = IIBASE + KLA IJBASE = IJBASE + KLA !$acc end kernels @@ -1508,13 +1328,7 @@ SUBROUTINE QPASSM( PA, PB, PC, PD, PTRIGS, KINC3, KINC4, KLOT, KN, KFAC, KLA, KE IJA = IJA0 + (IIK-KLA)/KLA * JINK IJB = IJB0 + (IIK-KLA)/KLA * JINK IJC = IJC0 - (IIK-KLA)/KLA * JINK -!$acc loop independent - DO IJK=0,KLOT-1 -!CDIR$ IVDEP -!!CDIR NODEP -!*VOCL LOOP,NOVREC -!$acc loop independent private( II, IJ, ZA1, ZA2, ZA3, ZB1, ZB2, ZB3 ) - DO IJL=0,KLA-1 + !$mnh_do_concurrent ( IJL=0:KLA-1, IJK=0:KLOT-1 ) II = IIBASE + IJK * KINC3 + IJL IJ = IJK * KINC4 + IJL ZA1=(ZC1*PA(IIB+II)+ZS1*PB(IIB+II))+(ZC2*PA(IIC+II)+ZS2*PB(IIC+II)) @@ -1529,8 +1343,7 @@ SUBROUTINE QPASSM( PA, PB, PC, PD, PTRIGS, KINC3, KINC4, KLOT, KN, KFAC, KLA, KE PD(IJB+IJ)=ZB2-ZA3 PC(IJC+IJ)=ZA2-ZB3 PD(IJC+IJ)=-(ZB2+ZA3) - END DO - END DO + !$mnh_end_do() END DO IIBASE = IBASE0 + ((ISTOP-KLA)/KLA+1) * ( KLA + IJUMP ) IJA = IJA0 + ((ISTOP-KLA)/KLA+1) * JINK @@ -1542,20 +1355,13 @@ SUBROUTINE QPASSM( PA, PB, PC, PD, PTRIGS, KINC3, KINC4, KLOT, KN, KFAC, KLA, KE IF ( IJA <= IJC ) THEN !$acc kernels IJBASE=0 -!$acc loop independent - DO IJK=0,KLOT-1 -!CDIR$ IVDEP -!!CDIR NODEP -!*VOCL LOOP,NOVREC -!$acc loop independent private( II, IJ ) - DO IJL=0,KLA-1 + !$mnh_do_concurrent ( IJL=0:KLA-1, IJK=0:KLOT-1 ) II = IIBASE + IJK * KINC3 + IJL IJ = IJBASE + IJK * KINC4 + IJL PC(IJA+IJ)=PA(IIA+II)+0.5*(PA(IIB+II)-PA(IIC+II)) PD(IJA+IJ)=-XSIN60*(PA(IIB+II)+PA(IIC+II)) PC(IJB+IJ)=PA(IIA+II)-(PA(IIB+II)-PA(IIC+II)) - END DO - END DO + !$mnh_end_do() IIBASE = IIBASE + KLA IJBASE = IJBASE + KLA !$acc end kernels @@ -1566,20 +1372,13 @@ SUBROUTINE QPASSM( PA, PB, PC, PD, PTRIGS, KINC3, KINC4, KLOT, KN, KFAC, KLA, KE !$acc kernels ZZ=1.0/REAL(KN) ZZSIN60=ZZ*XSIN60 -!$acc loop independent - DO IJK=0,KLOT-1 -!CDIR$ IVDEP -!!CDIR NODEP -!*VOCL LOOP,NOVREC -!$acc loop independent private( II, IJ ) - DO IJL=0,KLA-1 + !$mnh_do_concurrent ( IJL=0:KLA-1, IJK=0:KLOT-1 ) II = IIBASE + IJK * KINC3 + IJL IJ = IJBASE + IJK * KINC4 + IJL PC(IJA+IJ)=ZZ*(PA(IIA+II)+(PA(IIB+II)+PA(IIC+II))) PC(IJB+IJ)=ZZ*(PA(IIA+II)-0.5*(PA(IIB+II)+PA(IIC+II))) PD(IJB+IJ)=ZZSIN60*(PA(IIC+II)-PA(IIB+II)) - END DO - END DO + !$mnh_end_do() IIBASE = IIBASE + KLA IJBASE = IJBASE + KLA !$acc end kernels @@ -1604,21 +1403,14 @@ SUBROUTINE QPASSM( PA, PB, PC, PD, PTRIGS, KINC3, KINC4, KLOT, KN, KFAC, KLA, KE IF ( KLA /= IM ) THEN !$acc kernels -!$acc loop independent - DO IJK=0,KLOT-1 -!CDIR$ IVDEP -!!CDIR NODEP -!*VOCL LOOP,NOVREC -!$acc loop independent private( II, IJ ) - DO IJL=0,KLA-1 + !$mnh_do_concurrent ( IJL=0:KLA-1, IJK=0:KLOT-1 ) II = IIBASE + IJK * KINC3 + IJL IJ = IJBASE + IJK * KINC4 + IJL PC(IJA+IJ)=(PA(IIA+II)+PA(IIC+II))+(PA(IIB+II)+PA(IID+II)) PC(IJC+IJ)=(PA(IIA+II)+PA(IIC+II))-(PA(IIB+II)+PA(IID+II)) PC(IJB+IJ)=PA(IIA+II)-PA(IIC+II) PD(IJB+IJ)=PA(IID+II)-PA(IIB+II) - END DO - END DO + !$mnh_end_do() IIBASE = IIBASE + KLA IJBASE = IJBASE + KLA !$acc end kernels @@ -1652,13 +1444,7 @@ SUBROUTINE QPASSM( PA, PB, PC, PD, PTRIGS, KINC3, KINC4, KLOT, KN, KFAC, KLA, KE IJB = IJB0 + (IIK-KLA)/KLA * JINK IJC = IJC0 - (IIK-KLA)/KLA * JINK IJD = IJD0 - (IIK-KLA)/KLA * JINK -!$acc loop independent - DO IJK=0,KLOT-1 -!CDIR$ IVDEP -!!CDIR NODEP -!*VOCL LOOP,NOVREC -!$acc loop independent private( II, IJ, ZA0, ZA1, ZA2, ZA3, ZB0, ZB1, ZB2, ZB3 ) - DO IJL=0,KLA-1 + !$mnh_do_concurrent ( IJL=0:KLA-1, IJK=0:KLOT-1 ) II = IIBASE + IJK * KINC3 + IJL IJ = IJK * KINC4 + IJL ZA0=PA(IIA+II)+(ZC2*PA(IIC+II)+ZS2*PB(IIC+II)) @@ -1677,8 +1463,7 @@ SUBROUTINE QPASSM( PA, PB, PC, PD, PTRIGS, KINC3, KINC4, KLOT, KN, KFAC, KLA, KE PC(IJD+IJ)=ZA2-ZB3 PD(IJB+IJ)=ZB2-ZA3 PD(IJD+IJ)=-(ZB2+ZA3) - END DO - END DO + !$mnh_end_do() END DO IIBASE = IBASE0 + ((ISTOP-KLA)/KLA+1) * ( KLA + IJUMP ) IJA = IJA0 + ((ISTOP-KLA)/KLA+1) * JINK @@ -1692,21 +1477,14 @@ SUBROUTINE QPASSM( PA, PB, PC, PD, PTRIGS, KINC3, KINC4, KLOT, KN, KFAC, KLA, KE !$acc kernels ZSIN45=SQRT(0.5) IJBASE=0 -!$acc loop independent - DO IJK=0,KLOT-1 -!CDIR$ IVDEP -!!CDIR NODEP -!*VOCL LOOP,NOVREC -!$acc loop independent private( II, IJ ) - DO IJL=0,KLA-1 + !$mnh_do_concurrent ( IJL=0:KLA-1, IJK=0:KLOT-1 ) II = IIBASE + IJK * KINC3 + IJL IJ = IJBASE + IJK * KINC4 + IJL PC(IJA+IJ)=PA(IIA+II)+ZSIN45*(PA(IIB+II)-PA(IID+II)) PC(IJB+IJ)=PA(IIA+II)-ZSIN45*(PA(IIB+II)-PA(IID+II)) PD(IJA+IJ)=-PA(IIC+II)-ZSIN45*(PA(IIB+II)+PA(IID+II)) PD(IJB+IJ)=PA(IIC+II)-ZSIN45*(PA(IIB+II)+PA(IID+II)) - END DO - END DO + !$mnh_end_do() IIBASE = IIBASE + KLA IJBASE = IJBASE + KLA !$acc end kernels @@ -1715,21 +1493,14 @@ SUBROUTINE QPASSM( PA, PB, PC, PD, PTRIGS, KINC3, KINC4, KLOT, KN, KFAC, KLA, KE ELSE ! KLA == M !$acc kernels ZZ=1.0/REAL(KN) -!$acc loop independent - DO IJK=0,KLOT-1 -!CDIR$ IVDEP -!!CDIR NODEP -!*VOCL LOOP,NOVREC -!$acc loop independent private( II, IJ ) - DO IJL=0,KLA-1 + !$mnh_do_concurrent ( IJL=0:KLA-1, IJK=0:KLOT-1 ) II = IIBASE + IJK * KINC3 + IJL IJ = IJBASE + IJK * KINC4 + IJL PC(IJA+IJ)=ZZ*((PA(IIA+II)+PA(IIC+II))+(PA(IIB+II)+PA(IID+II))) PC(IJC+IJ)=ZZ*((PA(IIA+II)+PA(IIC+II))-(PA(IIB+II)+PA(IID+II))) PC(IJB+IJ)=ZZ*(PA(IIA+II)-PA(IIC+II)) PD(IJB+IJ)=ZZ*(PA(IID+II)-PA(IIB+II)) - END DO - END DO + !$mnh_end_do() IIBASE = IIBASE + KLA IJBASE = IJBASE + KLA !$acc end kernels @@ -1755,13 +1526,7 @@ SUBROUTINE QPASSM( PA, PB, PC, PD, PTRIGS, KINC3, KINC4, KLOT, KN, KFAC, KLA, KE IF ( KLA /= IM ) THEN !$acc kernels -!$acc loop independent - DO IJK=0,KLOT-1 -!CDIR$ IVDEP -!!CDIR NODEP -!*VOCL LOOP,NOVREC -!$acc loop independent private( II, IJ ) - DO IJL=0,KLA-1 + !$mnh_do_concurrent ( IJL=0:KLA-1, IJK=0:KLOT-1 ) II = IIBASE + IJK * KINC3 + IJL IJ = IJBASE + IJK * KINC4 + IJL ZA1=PA(IIB+II)+PA(IIE+II) @@ -1775,8 +1540,7 @@ SUBROUTINE QPASSM( PA, PB, PC, PD, PTRIGS, KINC3, KINC4, KLOT, KN, KFAC, KLA, KE PC(IJC+IJ)=ZA5-ZA6 PD(IJB+IJ)=-XSIN72*ZA3-XSIN36*ZA4 PD(IJC+IJ)=-XSIN36*ZA3+XSIN72*ZA4 - END DO - END DO + !$mnh_end_do() IIBASE = IIBASE + KLA IJBASE = IJBASE + KLA !$acc end kernels @@ -1816,14 +1580,7 @@ SUBROUTINE QPASSM( PA, PB, PC, PD, PTRIGS, KINC3, KINC4, KLOT, KN, KFAC, KLA, KE IJC = IJC0 + (IIK-KLA)/KLA * JINK IJD = IJD0 - (IIK-KLA)/KLA * JINK IJE = IJE0 - (IIK-KLA)/KLA * JINK -!$acc loop independent - DO IJK=0,KLOT-1 -!CDIR$ IVDEP -!!CDIR NODEP -!*VOCL LOOP,NOVREC -!$acc loop independent private( II, IJ, ZA1, ZA2, ZA3, ZA4, ZA5, ZA6, ZA10, ZA11, ZA20, ZA21, & -!$acc & ZB1, ZB2, ZB3, ZB4, ZB5, ZB6, ZB10, ZB11, ZB20, ZB21 ) - DO IJL=0,KLA-1 + !$mnh_do_concurrent ( IJL=0:KLA-1, IJK=0:KLOT-1 ) II = IIBASE + IJK * KINC3 + IJL IJ = IJK * KINC4 + IJL ZA1=(ZC1*PA(IIB+II)+ZS1*PB(IIB+II))+(ZC4*PA(IIE+II)+ZS4*PB(IIE+II)) @@ -1856,8 +1613,7 @@ SUBROUTINE QPASSM( PA, PB, PC, PD, PTRIGS, KINC3, KINC4, KLOT, KN, KFAC, KLA, KE PD(IJE+IJ)=-(ZB10+ZB11) PD(IJC+IJ)=ZB20-ZB21 PD(IJD+IJ)=-(ZB20+ZB21) - END DO - END DO + !$mnh_end_do() END DO IIBASE = IBASE0 + ((ISTOP-KLA)/KLA+1) * ( KLA + IJUMP ) IJA = IJA0 + ((ISTOP-KLA)/KLA+1) * JINK @@ -1871,13 +1627,7 @@ SUBROUTINE QPASSM( PA, PB, PC, PD, PTRIGS, KINC3, KINC4, KLOT, KN, KFAC, KLA, KE IF ( IJB <= IJD ) THEN !$acc kernels IJBASE=0 -!$acc loop independent - DO IJK=0,KLOT-1 -!CDIR$ IVDEP -!!CDIR NODEP -!*VOCL LOOP,NOVREC -!$acc loop independent private( II, IJ, ZA1, ZA2, ZA3, ZA4, ZA5, ZA6 ) - DO IJL=0,KLA-1 + !$mnh_do_concurrent ( IJL=0:KLA-1, IJK=0:KLOT-1 ) II = IIBASE + IJK * KINC3 + IJL IJ = IJBASE + IJK * KINC4 + IJL ZA1=PA(IIB+II)+PA(IIE+II) @@ -1891,8 +1641,7 @@ SUBROUTINE QPASSM( PA, PB, PC, PD, PTRIGS, KINC3, KINC4, KLOT, KN, KFAC, KLA, KE PC(IJC+IJ)=PA(IIA+II)-(ZA3-ZA4) PD(IJA+IJ)=-XSIN36*ZA1-XSIN72*ZA2 PD(IJB+IJ)=-XSIN72*ZA1+XSIN36*ZA2 - END DO - END DO + !$mnh_end_do() IIBASE = IIBASE + KLA IJBASE = IJBASE + KLA !$acc end kernels @@ -1904,13 +1653,7 @@ SUBROUTINE QPASSM( PA, PB, PC, PD, PTRIGS, KINC3, KINC4, KLOT, KN, KFAC, KLA, KE ZZQRT5=ZZ*XQRT5 ZZSIN36=ZZ*XSIN36 ZZSIN72=ZZ*XSIN72 -!$acc loop independent - DO IJK=0,KLOT-1 -!CDIR$ IVDEP -!!CDIR NODEP -!*VOCL LOOP,NOVREC -!$acc loop independent private( II, IJ, ZA1, ZA2, ZA3, ZA4, ZA5, ZA6 ) - DO IJL=0,KLA-1 + !$mnh_do_concurrent ( IJL=0:KLA-1, IJK=0:KLOT-1 ) II = IIBASE + IJK * KINC3 + IJL IJ = IJBASE + IJK * KINC4 + IJL ZA1=PA(IIB+II)+PA(IIE+II) @@ -1924,8 +1667,7 @@ SUBROUTINE QPASSM( PA, PB, PC, PD, PTRIGS, KINC3, KINC4, KLOT, KN, KFAC, KLA, KE PC(IJC+IJ)=ZA5-ZA6 PD(IJB+IJ)=-ZZSIN72*ZA3-ZZSIN36*ZA4 PD(IJC+IJ)=-ZZSIN36*ZA3+ZZSIN72*ZA4 - END DO - END DO + !$mnh_end_do() IIBASE = IIBASE + KLA IJBASE = IJBASE + KLA !$acc end kernels @@ -1954,13 +1696,7 @@ SUBROUTINE QPASSM( PA, PB, PC, PD, PTRIGS, KINC3, KINC4, KLOT, KN, KFAC, KLA, KE IF ( KLA /= IM ) THEN !$acc kernels -!$acc loop independent - DO IJK=0,KLOT-1 -!CDIR$ IVDEP -!!CDIR NODEP -!*VOCL LOOP,NOVREC -!$acc loop independent private( II, IJ, ZA11 ) - DO IJL=0,KLA-1 + !$mnh_do_concurrent ( IJL=0:KLA-1, IJK=0:KLOT-1 ) II = IIBASE + IJK * KINC3 + IJL IJ = IJBASE + IJK * KINC4 + IJL ZA11=(PA(IIC+II)+PA(IIF+II))+(PA(IIB+II)+PA(IIE+II)) @@ -1971,8 +1707,7 @@ SUBROUTINE QPASSM( PA, PB, PC, PD, PTRIGS, KINC3, KINC4, KLOT, KN, KFAC, KLA, KE PC(IJB+IJ)=(PA(IIA+II)-PA(IID+II))-0.5*ZA11 PD(IJB+IJ)=XSIN60*((PA(IIE+II)-PA(IIB+II))-(PA(IIC+II)-PA(IIF+II))) PC(IJD+IJ)=(PA(IIA+II)-PA(IID+II))+ZA11 - END DO - END DO + !$mnh_end_do() IIBASE = IIBASE + KLA IJBASE = IJBASE + KLA !$acc end kernels @@ -2019,13 +1754,7 @@ SUBROUTINE QPASSM( PA, PB, PC, PD, PTRIGS, KINC3, KINC4, KLOT, KN, KFAC, KLA, KE IJD = IJD0 - (IIK-KLA)/KLA * JINK IJE = IJE0 - (IIK-KLA)/KLA * JINK IJF = IJF0 - (IIK-KLA)/KLA * JINK -!$acc loop independent - DO IJK=0,KLOT-1 -!CDIR$ IVDEP -!!CDIR NODEP -!*VOCL LOOP,NOVREC -!$acc loop independent private( II, IJ, ZA1, ZA2, ZA3, ZA4, ZA5, ZA11, ZA20, ZA21, ZB1, ZB2, ZB3, ZB4, ZB5, ZB11, ZB20, ZB21 ) - DO IJL=0,KLA-1 + !$mnh_do_concurrent ( IJL=0:KLA-1, IJK=0:KLOT-1 ) II = IIBASE + IJK * KINC3 + IJL IJ = IJK * KINC4 + IJL ZA1=ZC1*PA(IIB+II)+ZS1*PB(IIB+II) @@ -2062,8 +1791,7 @@ SUBROUTINE QPASSM( PA, PB, PC, PD, PTRIGS, KINC3, KINC4, KLOT, KN, KFAC, KLA, KE PD(IJD+IJ)=ZB11+(ZB3-PB(IIA+II)) PC(IJF+IJ)=ZA20+ZB21 PD(IJF+IJ)=ZA21+ZB20 - END DO - END DO + !$mnh_end_do() END DO IIBASE = IBASE0 + ((ISTOP-KLA)/KLA+1) * ( KLA + IJUMP ) IJA = IJA0 + ((ISTOP-KLA)/KLA+1) * JINK @@ -2078,13 +1806,7 @@ SUBROUTINE QPASSM( PA, PB, PC, PD, PTRIGS, KINC3, KINC4, KLOT, KN, KFAC, KLA, KE IF ( IJC <= IJD ) THEN !$acc kernels IJBASE=0 -!$acc loop independent - DO IJK=0,KLOT-1 -!CDIR$ IVDEP -!!CDIR NODEP -!*VOCL LOOP,NOVREC -!$acc loop independent private( II, IJ ) - DO IJL=0,KLA-1 + !$mnh_do_concurrent ( IJL=0:KLA-1, IJK=0:KLOT-1 ) II = IIBASE + IJK * KINC3 + IJL IJ = IJBASE + IJK * KINC4 + IJL PC(IJA+IJ)=(PA(IIA+II)+0.5*(PA(IIC+II)-PA(IIE+II)))+ XSIN60*(PA(IIB+II)-PA(IIF+II)) @@ -2093,8 +1815,7 @@ SUBROUTINE QPASSM( PA, PB, PC, PD, PTRIGS, KINC3, KINC4, KLOT, KN, KFAC, KLA, KE PD(IJB+IJ)=PA(IID+II)-(PA(IIB+II)+PA(IIF+II)) PC(IJC+IJ)=(PA(IIA+II)+0.5*(PA(IIC+II)-PA(IIE+II)))-XSIN60*(PA(IIB+II)-PA(IIF+II)) PD(IJC+IJ)=-(PA(IID+II)+0.5*(PA(IIB+II)+PA(IIF+II)))+XSIN60*(PA(IIC+II)+PA(IIE+II)) - END DO - END DO + !$mnh_end_do() IIBASE = IIBASE + KLA IJBASE = IJBASE + KLA !$acc end kernels @@ -2105,13 +1826,7 @@ SUBROUTINE QPASSM( PA, PB, PC, PD, PTRIGS, KINC3, KINC4, KLOT, KN, KFAC, KLA, KE !$acc kernels ZZ=1.0/REAL(KN) ZZSIN60=ZZ*XSIN60 -!$acc loop independent - DO IJK=0,KLOT-1 -!CDIR$ IVDEP -!!CDIR NODEP -!*VOCL LOOP,NOVREC -!$acc loop independent private( II, IJ, ZA11 ) - DO IJL=0,KLA-1 + !$mnh_do_concurrent ( IJL=0:KLA-1, IJK=0:KLOT-1 ) II = IIBASE + IJK * KINC3 + IJL IJ = IJBASE + IJK * KINC4 + IJL ZA11=(PA(IIC+II)+PA(IIF+II))+(PA(IIB+II)+PA(IIE+II)) @@ -2122,8 +1837,7 @@ SUBROUTINE QPASSM( PA, PB, PC, PD, PTRIGS, KINC3, KINC4, KLOT, KN, KFAC, KLA, KE PC(IJB+IJ)=ZZ*((PA(IIA+II)-PA(IID+II))-0.5*ZA11) PD(IJB+IJ)=ZZSIN60*((PA(IIE+II)-PA(IIB+II))-(PA(IIC+II)-PA(IIF+II))) PC(IJD+IJ)=ZZ*((PA(IIA+II)-PA(IID+II))+ZA11) - END DO - END DO + !$mnh_end_do() IIBASE = IIBASE + KLA IJBASE = IJBASE + KLA !$acc end kernels @@ -2157,13 +1871,7 @@ SUBROUTINE QPASSM( PA, PB, PC, PD, PTRIGS, KINC3, KINC4, KLOT, KN, KFAC, KLA, KE ZZ=1.0/REAL(KN) ZZSIN45=ZZ*SQRT(0.5) -!$acc loop independent - DO IJK=0,KLOT-1 -!CDIR$ IVDEP -!!CDIR NODEP -!*VOCL LOOP,NOVREC -!$acc loop independent private( II, IJ ) - DO IJL=0,KLA-1 + !$mnh_do_concurrent ( IJL=0:KLA-1, IJK=0:KLOT-1 ) II = IIBASE + IJK * KINC3 + IJL IJ = IJBASE + IJK * KINC4 + IJL PC(IJA+IJ)=ZZ*(((PA(IIA+II)+PA(IIE+II))+(PA(IIC+II)+PA(IIG+II)))+ & @@ -2180,8 +1888,7 @@ SUBROUTINE QPASSM( PA, PB, PC, PD, PTRIGS, KINC3, KINC4, KLOT, KN, KFAC, KLA, KE +ZZ*(PA(IIG+II)-PA(IIC+II)) PD(IJD+IJ)=ZZSIN45*((PA(IIH+II)-PA(IID+II))+(PA(IIF+II)-PA(IIB+II))) & -ZZ*(PA(IIG+II)-PA(IIC+II)) - END DO - END DO + !$mnh_end_do() IIBASE = IIBASE + KLA IJBASE = IJBASE + KLA !$acc end kernels