diff --git a/src/MNH/advec_weno_k_3_aux.f90 b/src/MNH/advec_weno_k_3_aux.f90 index fc7d56ff51318e24425e39b2ddf02522807fcfea..1e233d8a5007118733210db1bddb948152abfb71 100644 --- a/src/MNH/advec_weno_k_3_aux.f90 +++ b/src/MNH/advec_weno_k_3_aux.f90 @@ -433,7 +433,7 @@ CASE ('CYCL') ! IF( LEAST_ll() .AND. .FALSE. ) THEN! East boundary is physical (monoproc) ! -!$acc kernels +!$acc kernels present_cr( ZFPOS1,ZBPOS1,ZOMP1) ! First positive stencil, needs indices i-2, i-1, i ZFPOS1(IW,:,:) = 1./6. * (2.0*PSRC(IE,:,:) - 7.0*PSRC(IW-1,:,:) + 11.0*PSRC(IW,:,:))! Flux IW ZFPOS1(IW-1,:,:) = 1./6. * (2.0*PSRC(IE-1,:,:) - 7.0*PSRC(IE,:,:) + 11.0*PSRC(IW-1,:,:))! Flux IW-1 @@ -443,6 +443,8 @@ ZBPOS1(IW-1,:,:) = 13./12. * (PSRC(IE-1,:,:) - 2.0*PSRC(IE,:,:) + PSRC(IW-1, + 1./4 * (PSRC(IE-1,:,:) - 4.0*PSRC(IE,:,:) + 3.0*PSRC(IW-1,:,:))**2 ! Smoothness indicator IW-1 ZOMP1(IW-1:IW,:,:) = 1./10. / (ZEPS + ZBPOS1(IW-1:IW,:,:))**2! Non-normalized weight IW,IW-1 ! +!$acc end kernels +!$acc kernels present_cr( ZFPOS2,ZBPOS2, ZOMP2) ! Second positive stencil, needs indices i-1, i, i+1 ZFPOS2(IW,:,:) = 1./6.* (-1.0*PSRC(IW-1,:,:) + 5.0*PSRC(IW,:,:) + 2.0*PSRC(IW+1,:,:))! Flux IW ZFPOS2(IW-1,:,:) = 1./6.* (-1.0*PSRC(IE,:,:) + 5.0*PSRC(IW-1,:,:) + 2.0*PSRC(IW,:,:)) ! Flux IW-1 @@ -452,6 +454,8 @@ ZBPOS2(IW-1,:,:) = 13./12 * (PSRC(IE,:,:) - 2.0*PSRC(IW-1,:,:) + PSRC(IW,:,:))** + 1./4 * (PSRC(IE,:,:) - PSRC(IW,:,:))**2! Smoothness indicator IW-1 ZOMP2(IW-1:IW,:,:) = 3./5. / (ZEPS + ZBPOS2(IW-1:IW,:,:))**2! Non-normalized weight IW,IW-1 ! +!$acc end kernels +!$acc kernels present_cr(ZFNEG3,ZBNEG3,ZOMN3) ! Third negative stencil, needs indices i-1, i, i+1 ZFNEG3(IW,:,:) = 1./6 * (-1.0*PSRC(IW-1,:,:) + 5.0*PSRC(IW,:,:) + 2.0*PSRC(IW+1,:,:))! Flux IW ZFNEG3(IW-1,:,:) = 1./6 * (-1.0*PSRC(IE,:,:) + 5.0*PSRC(IW-1,:,:) + 2.0*PSRC(IW,:,:))! Flux IW-1 @@ -464,7 +468,7 @@ ZOMN3(IW-1:IW,:,:) = 3./10. / (ZEPS + ZBNEG3(IW-1:IW,:,:))**2! Non-normalized we !$acc end kernels ELSEIF(IW>3) THEN! East boundary is proc border, with minimum 3 HALO points on west side ! -!$acc kernels +!$acc kernels present_cr(ZFPOS1,ZBPOS1,ZOMP1,ZFPOS2,ZBPOS2,ZOMP2,ZFNEG3,ZBNEG3,ZOMN3,PSRC) ! First positive stencil, needs indices i-2, i-1, i ZFPOS1(IW,:,:) = 1./6. * (2.0*PSRC(IW-2,:,:) - 7.0*PSRC(IW-1,:,:) + 11.0*PSRC(IW,:,:))! Flux IW ZFPOS1(IW-1,:,:) = 1./6. * (2.0*PSRC(IW-3,:,:) - 7.0*PSRC(IW-2,:,:) + 11.0*PSRC(IW-1,:,:))! Flux IW-1 @@ -497,7 +501,7 @@ ZFNEG3(IW,:,:) = 1./6 * (-1.0*PSRC(IW-1,:,:) + 5.0*PSRC(IW,:,:) + 2.0*PSRC(I call Print_msg(NVERB_FATAL,'GEN','ADVEC_WENO_K_3_UX','WENO5/CYCL fluxes calculation needs JPHEXT (&NHALO) >= 3 on west side') ENDIF ! -!$acc kernels +!$acc kernels present_cr(ZFPOS3,ZBPOS3,ZOMP3,ZFNEG1,ZBNEG1,ZOMN1,ZFNEG2,ZBNEG2,ZOMN2,PR,PSRC) ! Third positive stencil, needs indices i, i+1, i+2 ZFPOS3(IW,:,:) = 1./6 * (2.0*PSRC(IW,:,:) + 5.0*PSRC(IW+1,:,:) - PSRC(IW+2,:,:)) ! Flux IW ZFPOS3(IW-1,:,:) = 1./6 * (2.0*PSRC(IW-1,:,:) + 5.0*PSRC(IW,:,:) - PSRC(IW+1,:,:)) ! Flux IW-1 @@ -543,7 +547,7 @@ ZFNEG3(IW,:,:) = 1./6 * (-1.0*PSRC(IW-1,:,:) + 5.0*PSRC(IW,:,:) + 2.0*PSRC(I ! Open, or Wall, or Nest boundary condition => WENO order reduction !--------------------------------------------------------------------------- ! -!$acc kernels +!$acc kernels present_cr(PR,ZFPOS1,ZBPOS1,ZFNEG1,ZOMP1,ZOMP2,ZOMN1,ZOMN2,PSRC) ! WENO scheme order 1, IW-1 PR(IW-1,:,:) = PSRC(IW-1,:,:) * (0.5+SIGN(0.5,PRUCT(IW-1,:,:))) + & PSRC(IW,:,:) * & @@ -589,7 +593,8 @@ ELSE ! ! ----- Positive fluxes ----- ! -!$acc kernels +!$acc kernels present_cr(ZFPOS1,ZBPOS1,ZOMP1,ZFPOS2,ZBPOS2,ZOMP2,ZFPOS3,ZBPOS3,ZOMP3,ZOMN2,ZFNEG1) & +!$acc & present_cr(ZBNEG1,ZOMN1,ZFNEG2,ZBNEG2,PSRC) ! First positive stencil, needs indices i-2, i-1, i ZFPOS1(IW,:,:) = 1./6. * (2.0*PSRC(IW-2,:,:) - 7.0*PSRC(IW-1,:,:) + 11.0*PSRC(IW,:,:)) ! Flux IW ZFPOS1(IW-1,:,:) = 1./6. * (2.0*PSRC(IW-3,:,:) - 7.0*PSRC(IW-2,:,:) + 11.0*PSRC(IW-1,:,:)) ! Flux IW-1 @@ -681,7 +686,8 @@ IF( LEAST_ll() ) THEN ! IF (LWEST_ll() .AND. .FALSE. ) THEN ! West boundary is physical (monoproc) ! -!$acc kernels +!$acc kernels present_cr(ZFPOS1,ZBPOS1,ZOMP1,ZFPOS2,ZBPOS2,ZOMP2,ZFPOS3,ZBPOS3,ZOMP3,ZOMN2,ZFNEG1) & +!$acc & present_cr(ZBNEG1,ZOMN1,ZFNEG2,ZBNEG2,PSRC) ! Third positive stencil, needs indices i, i+1, i+2 ZFPOS3(IE-1,:,:) = 1./6 * (2.0*PSRC(IE-1,:,:) + 5.0*PSRC(IE,:,:) - PSRC(IE+1,:,:)) ! Flux IE-1 ZFPOS3(IE,:,:) = 1./6 * (2.0*PSRC(IE,:,:) + 5.0*PSRC(IE+1,:,:) - PSRC(IW,:,:)) ! Flux IE @@ -712,7 +718,8 @@ IF( LEAST_ll() ) THEN ! ELSEIF(IE<=SIZE(PSRC,1)-3) THEN ! West boundary is proc border, with minimum 3 HALO points on east side ! -!$acc kernels +!$acc kernels present_cr(ZFPOS1,ZBPOS1,ZOMP1,ZFPOS2,ZBPOS2,ZOMP2,ZFPOS3,ZBPOS3,ZOMP3,ZOMN2,ZFNEG1) & +!$acc & present_cr(ZBNEG1,ZOMN1,ZFNEG2,ZBNEG2,PSRC) ! Third positive stencil, needs indices i, i+1, i+2 ZFPOS3(IE-1,:,:) = 1./6 * (2.0*PSRC(IE-1,:,:) + 5.0*PSRC(IE,:,:) - PSRC(IE+1,:,:)) ! Flux IE-1 ZFPOS3(IE,:,:) = 1./6 * (2.0*PSRC(IE,:,:) + 5.0*PSRC(IE+1,:,:) - PSRC(IE+2,:,:)) ! Flux IE @@ -745,7 +752,8 @@ IF( LEAST_ll() ) THEN call Print_msg(NVERB_FATAL,'GEN','ADVEC_WENO_K_3_UX','WENO5/CYCL fluxes calculation needs JPHEXT (&NHALO) >= 3 on east side') ENDIF ! -!$acc kernels +!$acc kernels present_cr(ZFPOS1,ZBPOS1,ZOMP1,ZFPOS2,ZBPOS2,ZOMP2,ZFPOS3,ZBPOS3,ZOMP3,ZOMN2,ZFNEG1) & +!$acc & present_cr(ZBNEG1,ZOMN1,ZFNEG2,ZBNEG2,PR,ZOMN3,PSRC) ! First positive stencil, needs indices i-2, i-1, i ZFPOS1(IE-1,:,:) = 1./6. * (2.0*PSRC(IE-3,:,:) - 7.0*PSRC(IE-2,:,:) + 11.0*PSRC(IE-1,:,:)) ! Flux IE-1 ZFPOS1(IE,:,:) = 1./6. * (2.0*PSRC(IE-2,:,:) - 7.0*PSRC(IE-1,:,:) + 11.0*PSRC(IE,:,:)) ! Flux IE @@ -791,7 +799,8 @@ IF( LEAST_ll() ) THEN ! Open, or Wall, or Nest boundary condition => WENO order reduction !--------------------------------------------------------------------------- ! -!$acc kernels +!$acc kernels present_cr(ZFPOS1,ZBPOS1,ZOMP1,ZFPOS2,ZBPOS2,ZOMP2,ZOMN2,ZFNEG1) & +!$acc & present_cr(ZBNEG1,ZOMN1,ZFNEG2,ZBNEG2,PR,PSRC) ! WENO scheme order 1, IE PR(IE,:,:) = PSRC(IE,:,:) * (0.5+SIGN(0.5,PRUCT(IE,:,:))) + & PSRC(IE+1,:,:) * & @@ -837,7 +846,8 @@ ELSE ! ! ----- Positive fluxes ----- ! -!$acc kernels +!$acc kernels present_cr(ZFPOS1,ZBPOS1,ZOMP1,ZFPOS2,ZBPOS2,ZOMP2,ZFPOS3,ZBPOS3,ZOMP3,ZOMN2,ZFNEG1) & +!$acc & present_cr(ZBNEG1,ZOMN1,ZFNEG2,ZBNEG2,PR,ZOMN3,PSRC) ! First positive stencil, needs indices i-2, i-1, i ZFPOS1(IE-1,:,:) = 1./6. * (2.0*PSRC(IE-3,:,:) - 7.0*PSRC(IE-2,:,:) + 11.0*PSRC(IE-1,:,:)) ! Flux IE-1 ZFPOS1(IE,:,:) = 1./6. * (2.0*PSRC(IE-2,:,:) - 7.0*PSRC(IE-1,:,:) + 11.0*PSRC(IE,:,:)) ! Flux IE @@ -911,7 +921,7 @@ ELSE END IF ! IF(LWEST_ll()) !------------------------------------------------------------------------------- ! -!$acc kernels +!$acc kernels present_cr(PR) PR = PR * PRUCT ! Add contravariant flux !$acc end kernels ! @@ -1025,7 +1035,8 @@ CALL GET_INDICE_ll(IIB,IJB,IIE,IJE) !* 0.4. INITIALIZE THE FIELD ! --------------------- ! -!$acc kernels +!$acc kernels present_cr(ZFPOS1,ZBPOS1,ZOMP1,ZFPOS2,ZBPOS2,ZOMP2,ZFPOS3,ZBPOS3,ZOMP3,ZOMN2,ZFNEG1) & +!$acc & present_cr(ZBNEG1,ZOMN1,ZFNEG2,ZBNEG2,PR,ZOMN3) PR(:,:,:) = 0.0 ! ZFPOS1 = 0.0 @@ -1136,7 +1147,7 @@ IF( LWEST_ll() ) THEN ! IF(LEAST_ll() .AND. .FALSE. ) THEN ! East border is physical ! -!$acc kernels +!$acc kernels present_cr(ZFPOS1,ZBPOS1,ZOMP1,ZFPOS2,ZBPOS2,ZFNEG3,ZBNEG3,ZOMN3) ! First positive stencil, needs indices i-3, i-2, i-1 ZFPOS1(IW+1,:,:) = 1./6. * (2.0*PSRC(IE,:,:) - 7.0*PSRC(IW-1,:,:) + 11.0*PSRC(IW,:,:)) ! Flux IW+1 ZFPOS1(IW,:,:) = 1./6. * (2.0*PSRC(IE-1,:,:) - 7.0*PSRC(IE,:,:) + 11.0*PSRC(IW-1,:,:)) ! Flux IW @@ -1167,7 +1178,7 @@ IF( LWEST_ll() ) THEN ! ELSEIF(IW>3) THEN ! East boundary is proc border, with minimum 3 HALO points on west side ! -!$acc kernels +!$acc kernels present_cr(ZFPOS1,ZBPOS1,ZOMP1,ZFPOS2,ZBPOS2,ZOMP2,ZFNEG3,ZBNEG3,ZOMN3,PSRC) ! First positive stencil, needs indices i-3, i-2, i-1 ZFPOS1(IW+1,:,:) = 1./6. * (2.0*PSRC(IW-2,:,:) - 7.0*PSRC(IW-1,:,:) + 11.0*PSRC(IW,:,:)) ! Flux IW+1 ZFPOS1(IW,:,:) = 1./6. * (2.0*PSRC(IW-3,:,:) - 7.0*PSRC(IW-2,:,:) + 11.0*PSRC(IW-1,:,:)) ! Flux IW @@ -1200,7 +1211,8 @@ IF( LWEST_ll() ) THEN call Print_msg(NVERB_FATAL,'GEN','ADVEC_WENO_K_3_MX','WENO5/CYCL fluxes calculation needs JPHEXT (&NHALO) >= 3 on west side') ENDIF ! -!$acc kernels +!$acc kernels present_cr(ZFPOS1,ZBPOS1,ZOMP1,ZFPOS2,ZBPOS2,ZOMP2,ZFPOS3,ZBPOS3,ZOMP3,ZOMN2,ZFNEG1) & +!$acc & present_cr(ZBNEG1,ZOMN1,ZFNEG2,ZBNEG2,PR,ZOMN3,PSRC) ! Third positive stencil, needs indices i-1, i, i+1 ZFPOS3(IW+1,:,:) = 1./6 * (2.0*PSRC(IW,:,:) + 5.0*PSRC(IW+1,:,:) - PSRC(IW+2,:,:)) ! Flux IW+1 ZFPOS3(IW,:,:) = 1./6 * (2.0*PSRC(IW-1,:,:) + 5.0*PSRC(IW,:,:) - PSRC(IW+1,:,:)) ! Flux IW @@ -1247,7 +1259,7 @@ IF( LWEST_ll() ) THEN ! Open, or Wall, or Nest boundary condition => WENO order reduction !--------------------------------------------------------------------------- ! -!$acc kernels +!$acc kernels present_cr(PR,ZFPOS1,ZFPOS2,ZBPOS1,ZBPOS2,ZFNEG1,ZFNEG2,ZBNEG1,ZBNEG2,ZOMP1,ZOMP2,ZOMN1,ZOMN2) ! WENO scheme order 1, IW PR(IW,:,:) = PSRC(IW-1,:,:) * (0.5+SIGN(0.5,PRUCT(IW,:,:))) + & PSRC(IW,:,:) * (0.5-SIGN(0.5,PRUCT(IW,:,:))) @@ -1292,7 +1304,8 @@ ELSE ! ! ----- Positive fluxes ----- ! -!$acc kernels +!$acc kernels present_cr(ZFPOS1,ZBPOS1,ZOMP1,ZFPOS2,ZBPOS2,ZOMP2,ZFPOS3,ZBPOS3,ZOMP3,ZOMN2,ZFNEG1) & +!$acc & present_cr(ZBNEG1,ZOMN1,ZFNEG2,ZBNEG2,PR,ZOMN3,PSRC) ! First positive stencil, needs indices i-3, i-2, i-1 ZFPOS1(IW+1,:,:) = 1./6. * (2.0*PSRC(IW-2,:,:) - 7.0*PSRC(IW-1,:,:) + 11.0*PSRC(IW,:,:)) ! Flux IW+1 ZFPOS1(IW,:,:) = 1./6. * (2.0*PSRC(IW-3,:,:) - 7.0*PSRC(IW-2,:,:) + 11.0*PSRC(IW-1,:,:)) ! Flux IW @@ -1383,7 +1396,7 @@ IF(LEAST_ll() ) THEN ! IF(LWEST_ll() .AND. .FALSE. ) THEN ! West border is physical ! -!$acc kernels +!$acc kernels present_cr(ZFPOS3,ZBPOS3,ZOMP3,ZFNEG1,ZBNEG1,ZOMN1,ZFNEG2,ZBNEG2,ZOMN2,PSRC) ! Third positive stencil, needs indices i, i+1, i+2 ZFPOS3(IE,:,:) = 1./6 * (2.0*PSRC(IE-1,:,:) + 5.0*PSRC(IE,:,:) - PSRC(IE+1,:,:)) ! Flux IE ZFPOS3(IE+1,:,:) = 1./6 * (2.0*PSRC(IE,:,:) + 5.0*PSRC(IE+1,:,:) - PSRC(IW,:,:)) ! Flux IE+1 @@ -1414,7 +1427,7 @@ IF(LEAST_ll() ) THEN ! ELSEIF(IE<=SIZE(PSRC,1)-3) THEN ! West boundary is proc border, with minimum 3 HALO points on east side ! -!$acc kernels +!$acc kernels present_cr(ZFPOS3,ZBPOS3,ZOMP3,ZFNEG1,ZBNEG1,ZOMN1,ZFNEG2,ZBNEG2,ZOMN2,PSRC) ! Third positive stencil, needs indices i, i+1, i+2 ZFPOS3(IE,:,:) = 1./6 * (2.0*PSRC(IE-1,:,:) + 5.0*PSRC(IE,:,:) - PSRC(IE+1,:,:)) ! Flux IE ZFPOS3(IE+1,:,:) = 1./6 * (2.0*PSRC(IE,:,:) + 5.0*PSRC(IE+1,:,:) - PSRC(IE+2,:,:)) ! Flux IE+1 @@ -1447,7 +1460,7 @@ IF(LEAST_ll() ) THEN call Print_msg(NVERB_FATAL,'GEN','ADVEC_WENO_K_3_MX','WENO5/CYCL fluxes calculation needs JPHEXT (&NHALO) >= 3 on east side') ENDIF ! -!$acc kernels +!$acc kernels present_cr(ZFPOS1,ZBPOS1,ZOMP1,ZFPOS2,ZBPOS2,ZOMP2,ZFNEG3,ZBNEG3,ZOMN3,PR,PSRC) ! First positive stencil, needs indices i-3, i-2, i-1 ZFPOS1(IE,:,:) = 1./6. * (2.0*PSRC(IE-3,:,:) - 7.0*PSRC(IE-2,:,:) + 11.0*PSRC(IE-1,:,:)) ! Flux IE ZFPOS1(IE+1,:,:) = 1./6. * (2.0*PSRC(IE-2,:,:) - 7.0*PSRC(IE-1,:,:) + 11.0*PSRC(IE,:,:)) ! Flux IE+1 @@ -1493,7 +1506,7 @@ IF(LEAST_ll() ) THEN ! Open, or Wall, or Nest boundary condition => WENO order reduction !--------------------------------------------------------------------------- ! -!$acc kernels +!$acc kernels present_cr(ZFPOS1,ZFPOS2,ZBPOS1,ZBPOS2,ZFNEG1,ZBNEG1,ZOMP1,ZOMP2,ZOMN1,ZOMN2,PR,PSRC) ! WENO scheme order 1, IE+1 PR(IE+1,:,:) = PSRC(IE,:,:) * (0.5+SIGN(0.5,PRUCT(IE+1,:,:))) + & PSRC(IE+1,:,:) * (0.5-SIGN(0.5,PRUCT(IE+1,:,:))) @@ -1538,7 +1551,8 @@ ELSE ! ! ----- Positive fluxes ----- ! -!$acc kernels +!$acc kernels present_cr(ZFPOS1,ZBPOS1,ZOMP1,ZFPOS2,ZBPOS2,ZOMP2,ZFPOS3,ZBPOS3,ZOMP3,ZOMN2,ZFNEG1) & +!$acc & present_cr(ZBNEG1,ZOMN1,ZFNEG2,ZBNEG2,PR,ZOMN3,PSRC) ! First positive stencil, needs indices i-3, i-2, i-1 ZFPOS1(IE,:,:) = 1./6. * (2.0*PSRC(IE-3,:,:) - 7.0*PSRC(IE-2,:,:) + 11.0*PSRC(IE-1,:,:)) ! Flux IE ZFPOS1(IE+1,:,:) = 1./6. * (2.0*PSRC(IE-2,:,:) - 7.0*PSRC(IE-1,:,:) + 11.0*PSRC(IE,:,:)) ! Flux IE+1 @@ -1730,7 +1744,8 @@ CALL GET_INDICE_ll(IIB,IJB,IIE,IJE) !* 0.4. INITIALIZE THE FIELD ! --------------------- ! -!$acc kernels +!$acc kernels present_cr(ZFPOS1,ZBPOS1,ZOMP1,ZFPOS2,ZBPOS2,ZOMP2,ZFPOS3,ZBPOS3,ZOMP3,ZOMN2,ZFNEG1) & +!$acc & present_cr(ZBNEG1,ZOMN1,ZFNEG2,ZBNEG2,PR,ZOMN3) PR(:,:,:) = 0.0 ! ZFPOS1 = 0.0 @@ -1840,7 +1855,7 @@ IF(LSOUTH_ll()) THEN ! IF(LNORTH_ll() .AND. .FALSE. ) THEN ! North border is physical ! -!$acc kernels +!$acc kernels present_cr(ZFPOS1,ZBPOS1,ZOMP1,ZFPOS2,ZBPOS2,ZOMP2,ZFNEG3,ZBNEG3,ZOMN3) ! First positive stencil, needs indices i-3, i-2, i-1 ZFPOS1(:,IS+1,:) = 1./6. * (2.0*PSRC(:,IN,:) - 7.0*PSRC(:,IS-1,:) + 11.0*PSRC(:,IS,:)) ! Flux IS+1 ZFPOS1(:,IS,:) = 1./6. * (2.0*PSRC(:,IN-1,:) - 7.0*PSRC(:,IN,:) + 11.0*PSRC(:,IS-1,:)) ! Flux IS @@ -1871,7 +1886,7 @@ IF(LSOUTH_ll()) THEN ! ELSEIF(IS>3) THEN ! North boundary is proc border, with minimum 3 HALO points on sounth side ! -!$acc kernels +!$acc kernels present_cr(ZFPOS1,ZBPOS1,ZOMP1,ZFPOS2,ZBPOS2,ZOMP2,ZFNEG3,ZBNEG3,ZOMN3) ! First positive stencil, needs indices i-3, i-2, i-1 ZFPOS1(:,IS+1,:) = 1./6. * (2.0*PSRC(:,IS-2,:) - 7.0*PSRC(:,IS-1,:) + 11.0*PSRC(:,IS,:)) ! Flux IS+1 ZFPOS1(:,IS,:) = 1./6. * (2.0*PSRC(:,IS-3,:) - 7.0*PSRC(:,IS-2,:) + 11.0*PSRC(:,IS-1,:)) ! Flux IS @@ -1904,7 +1919,8 @@ IF(LSOUTH_ll()) THEN call Print_msg(NVERB_FATAL,'GEN','ADVEC_WENO_K_3_MY','WENO5/CYCL fluxes calculation needs JPHEXT (&NHALO) >= 3 on south side') ENDIF ! -!$acc kernels +!$acc kernels present_cr(ZFPOS1,ZBPOS1,ZOMP1,ZFPOS2,ZBPOS2,ZOMP2,ZFPOS3,ZBPOS3,ZOMP3,ZOMN2,ZFNEG1) & +!$acc & present_cr(ZBNEG1,ZOMN1,ZFNEG2,ZBNEG2,PR,ZOMN3) ! Third positive stencil, needs indices i-1, i, i+1 ZFPOS3(:,IS+1,:) = 1./6 * (2.0*PSRC(:,IS,:) + 5.0*PSRC(:,IS+1,:) - PSRC(:,IS+2,:)) ! Flux IS+1 ZFPOS3(:,IS,:) = 1./6 * (2.0*PSRC(:,IS-1,:) + 5.0*PSRC(:,IS,:) - PSRC(:,IS+1,:)) ! Flux IS @@ -1952,7 +1968,7 @@ IF(LSOUTH_ll()) THEN ! Open, or Wall, or Nest boundary condition => WENO order reduction !--------------------------------------------------------------------------- ! -!$acc kernels +!$acc kernels present_cr(PR,ZFPOS1,ZFPOS2,ZBPOS1,ZBPOS2,ZFNEG1,ZFNEG2,ZBNEG1,ZBNEG2,ZOMP1,ZOMP2,ZOMN1,ZOMN2) ! WENO scheme order 1, IS PR(:,IS,:) = PSRC(:,IS-1,:) * (0.5+SIGN(0.5,PRVCT(:,IS,:))) + & PSRC(:,IS,:) * (0.5-SIGN(0.5,PRVCT(:,IS,:))) @@ -1997,7 +2013,8 @@ ELSE ! ! ----- Positive fluxes ----- ! -!$acc kernels +!$acc kernels present_cr(ZFPOS1,ZBPOS1,ZOMP1,ZFPOS2,ZBPOS2,ZOMP2,ZFPOS3,ZBPOS3,ZOMP3,ZOMN2,ZFNEG1) & +!$acc & present_cr(ZBNEG1,ZOMN1,ZFNEG2,ZBNEG2,PR,ZOMN3) ! First positive stencil, needs indices i-3, i-2, i-1 ZFPOS1(:,IS+1,:) = 1./6. * (2.0*PSRC(:,IS-2,:) - 7.0*PSRC(:,IS-1,:) + 11.0*PSRC(:,IS,:)) ! Flux IS+1 ZFPOS1(:,IS,:) = 1./6. * (2.0*PSRC(:,IS-3,:) - 7.0*PSRC(:,IS-2,:) + 11.0*PSRC(:,IS-1,:)) ! Flux IS @@ -2088,7 +2105,7 @@ IF( LNORTH_ll() ) THEN ! IF (LSOUTH_ll() .AND. .FALSE. ) THEN ! South border is physical ! -!$acc kernels +!$acc kernels present_cr(ZFPOS3,ZBPOS3,ZOMP3,ZFNEG1,ZBNEG1,ZOMN1,ZFNEG2,ZBNEG2,ZOMN2) ! Third positive stencil, needs indices i, i+1, i+2 ZFPOS3(:,IN,:) = 1./6 * (2.0*PSRC(:,IN-1,:) + 5.0*PSRC(:,IN,:) - PSRC(:,IN+1,:)) ! Flux IN ZFPOS3(:,IN+1,:) = 1./6 * (2.0*PSRC(:,IN,:) + 5.0*PSRC(:,IN+1,:) - PSRC(:,IS,:)) ! Flux IN+1 @@ -2119,7 +2136,7 @@ IF( LNORTH_ll() ) THEN ! ELSEIF(IN<=SIZE(PSRC,2)-3) THEN ! South boundary is proc border, with minimum 3 HALO points on north side ! -!$acc kernels +!$acc kernels present_cr(ZFPOS3,ZBPOS3,ZOMP3,ZFNEG1,ZBNEG1,ZOMN1,ZFNEG2,ZBNEG2,ZOMN2) ! Third positive stencil, needs indices i, i+1, i+2 ZFPOS3(:,IN,:) = 1./6 * (2.0*PSRC(:,IN-1,:) + 5.0*PSRC(:,IN,:) - PSRC(:,IN+1,:)) ! Flux IN ZFPOS3(:,IN+1,:) = 1./6 * (2.0*PSRC(:,IN,:) + 5.0*PSRC(:,IN+1,:) - PSRC(:,IN+2,:)) ! Flux IN+1 @@ -2152,7 +2169,7 @@ IF( LNORTH_ll() ) THEN call Print_msg(NVERB_FATAL,'GEN','ADVEC_WENO_K_3_MY','WENO5/CYCL fluxes calculation needs JPHEXT (&NHALO) >= 3 on south side') ENDIF ! -!$acc kernels +!$acc kernels present_cr(ZFPOS1,ZBPOS1,ZOMP1,ZFPOS2,ZBPOS2,ZOMP2,ZFNEG3,ZBNEG3,ZOMN3) ! First positive stencil, needs indices i-3, i-2, i-1 ZFPOS1(:,IN,:) = 1./6. * (2.0*PSRC(:,IN-3,:) - 7.0*PSRC(:,IN-2,:) + 11.0*PSRC(:,IN-1,:)) ! Flux IN ZFPOS1(:,IN+1,:) = 1./6. * (2.0*PSRC(:,IN-2,:) - 7.0*PSRC(:,IN-1,:) + 11.0*PSRC(:,IN,:)) ! Flux IN+1 @@ -2198,7 +2215,7 @@ IF( LNORTH_ll() ) THEN ! Open, or Wall, or Nest boundary condition => WENO order reduction !--------------------------------------------------------------------------- ! -!$acc kernels +!$acc kernels present_cr(PR,ZFPOS1,ZFPOS2,ZBPOS1,ZBPOS2,ZFNEG1,ZFNEG2,ZBNEG1,ZBNEG2,ZOMP1,ZOMP2,ZOMN1,ZOMN2,PR) ! WENO scheme order 1, IN+1 PR(:,IN+1,:) = PSRC(:,IN,:) * (0.5+SIGN(0.5,PRVCT(:,IN+1,:))) + & PSRC(:,IN+1,:) * (0.5-SIGN(0.5,PRVCT(:,IN+1,:))) @@ -2243,7 +2260,8 @@ ELSE ! ! ----- Positive fluxes ----- ! -!$acc kernels +!$acc kernels present_cr(ZFPOS1,ZBPOS1,ZOMP1,ZFPOS2,ZBPOS2,ZOMP2,ZFPOS3,ZBPOS3,ZOMP3,ZOMN2,ZFNEG1) & +!$acc & present_cr(ZBNEG1,ZOMN1,ZFNEG2,ZBNEG2,PR,ZOMN3) ! First positive stencil, needs indices i-3, i-2, i-1 ZFPOS1(:,IN,:) = 1./6. * (2.0*PSRC(:,IN-3,:) - 7.0*PSRC(:,IN-2,:) + 11.0*PSRC(:,IN-1,:)) ! Flux IN ZFPOS1(:,IN+1,:) = 1./6. * (2.0*PSRC(:,IN-2,:) - 7.0*PSRC(:,IN-1,:) + 11.0*PSRC(:,IN,:)) ! Flux IN+1 diff --git a/src/MNH/bl89.f90 b/src/MNH/bl89.f90 index 01e89df1f4a0a8a0674b2389c4dc5ae03ef4509b..dbd82b24e0a1880ade50638fa82d54a92cf4b21b 100644 --- a/src/MNH/bl89.f90 +++ b/src/MNH/bl89.f90 @@ -358,7 +358,9 @@ END IF !but algorithm must remain the same. !!!!!!!!!!!! -!$acc kernels +!$acc kernels present_cr(ZTEST0,ZPOTE,ZTEST,ZTESTM,ZLWORK1,ZLWORK2,ZLWORK,ZINTE) & +!$acc & present_cr(ZTEST0,ZPOTE,ZTEST,ZTESTM,ZLWORK1,ZLWORK2,ZLWORK,ZINTE) & +!$acc & present_cr(ZLWORK1,ZLWORK2,ZPOTE,ZLWORK2,ZLM) ZDELTVPT(:,IKTB:IKTE)=ZVPT(:,IKTB:IKTE)-ZVPT(:,IKTB-KKL:IKTE-KKL) ZDELTVPT(:,KKU)=ZVPT(:,KKU)-ZVPT(:,KKU-KKL) ZDELTVPT(:,KKA)=0. diff --git a/src/MNH/ice4_sedimentation_split.f90 b/src/MNH/ice4_sedimentation_split.f90 index c6c09affbec29fb50dafaf29292584689550fbae..de1f7577609e07aaae8dd1eaf70fafe5abc8b4f1 100644 --- a/src/MNH/ice4_sedimentation_split.f90 +++ b/src/MNH/ice4_sedimentation_split.f90 @@ -648,7 +648,7 @@ DO WHILE (ANY(ZREMAINT>0.)) !******* for cloud ZWSED(:,:,:) = 0. !$acc loop independent private(JI,JJ,JK,JL,ZZWLBDC,ZRAY,ZZT,ZZWLBDA,ZZCC) - DO JL=1, ISEDIM + DO CONCURRENT( JL = 1 : ISEDIM ) JI=I1(JL) JJ=I2(JL) JK=I3(JL) @@ -680,7 +680,7 @@ DO WHILE (ANY(ZREMAINT>0.)) ! ******* for pristine ice ZWSED(:,:,:) = 0. !$acc loop independent private(JI,JJ,JK,JL) - DO JL=1, ISEDIM + DO CONCURRENT( JL = 1 : ISEDIM ) JI=I1(JL) JJ=I2(JL) JK=I3(JL) @@ -731,7 +731,7 @@ DO WHILE (ANY(ZREMAINT>0.)) !$acc kernels ZWSED(:,:,:) = 0. !$acc loop independent private(JI,JJ,JK,JL) - DO JL=1, ISEDIM + DO CONCURRENT( JL = 1 : ISEDIM ) JI=I1(JL) JJ=I2(JL) JK=I3(JL) diff --git a/src/ZSOLVER/contrav.f90 b/src/ZSOLVER/contrav.f90 index ac9d433709c107859df5a69ba33f72f5d045f418..a1e367d3569d5ede930f11c592beccd279db5f0c 100644 --- a/src/ZSOLVER/contrav.f90 +++ b/src/ZSOLVER/contrav.f90 @@ -615,7 +615,64 @@ CALL GET_INDICE_ll( IIB,IJB,IIE,IJE) IKB=1+JPVEXT IKE=IKU - JPVEXT -!$acc data present( PRUT, PRVT, PRWT, PDXX, PDYY, PDZZ, PDZX, PDZY, PRUCT, PRVCT, PRWCT, Z1, Z2 ) +IF (KADV_ORDER == 4 ) THEN + IF( .NOT. LFLAT) THEN + IF ( GFIRST_CALL_CONTRAV_DEVICE ) THEN + GFIRST_CALL_CONTRAV_DEVICE = .FALSE. + NULLIFY(TZHALO2_U) + NULLIFY(TZHALO2_V) + NULLIFY(TZHALO2_DZX) + NULLIFY(TZHALO2_DZY) + CALL INIT_HALO2_ll(TZHALO2_U,1,IIU,IJU,IKU) + CALL INIT_HALO2_ll(TZHALO2_V,1,IIU,IJU,IKU) + CALL INIT_HALO2_ll(TZHALO2_DZX,1,IIU,IJU,IKU) + CALL INIT_HALO2_ll(TZHALO2_DZY,1,IIU,IJU,IKU) + END IF + ZU_EAST => TZHALO2_U%HALO2%EAST + ZDZX_EAST => TZHALO2_DZX%HALO2%EAST + ZV_NORTH => TZHALO2_V%HALO2%NORTH + ZDZY_NORTH => TZHALO2_DZY%HALO2%NORTH + END IF +END IF + +CALL CONTRAV_DEVICE_DIM(& + PRUT,PRVT,PRWT,& + PDXX,PDYY,PDZZ,PDZX,PDZY,& + PRUCT,PRVCT,PRWCT,Z1,Z2,& + ZU_EAST,ZV_NORTH,ZDZX_EAST,ZDZY_NORTH) + +!Release all memory allocated with MNH_MEM_GET calls since last call to MNH_MEM_POSITION_PIN +CALL MNH_MEM_RELEASE() + +IF (MPPDB_INITIALIZED) THEN + !Check all OUT arrays + CALL MPPDB_CHECK(PRUCT,"CONTRAV end:PRUCT") + CALL MPPDB_CHECK(PRVCT,"CONTRAV end:PRVCT") + CALL MPPDB_CHECK(PRWCT,"CONTRAV end:PRWCT") +END IF + +CONTAINS + + SUBROUTINE CONTRAV_DEVICE_DIM(& + PRUT,PRVT,PRWT,& + PDXX,PDYY,PDZZ,PDZX,PDZY,& + PRUCT,PRVCT,PRWCT,Z1,Z2,& + ZU_EAST,ZV_NORTH,ZDZX_EAST,ZDZY_NORTH) + + IMPLICIT NONE + + REAL, DIMENSION(IIU, IJU, IKU) :: & + PRUT,PRVT,PRWT,& + PDXX,PDYY,PDZZ,PDZX,PDZY,& + PRUCT,PRVCT,PRWCT,Z1,Z2 + REAL :: ZU_EAST(IJU,IKU),ZV_NORTH(IIU,IKU),ZDZX_EAST(IJU,IKU),ZDZY_NORTH(IIU,IKU) + +! +! Begin Compute +! + +!$acc data present( PRUT, PRVT, PRWT, PDXX, PDYY, PDZZ, PDZX, PDZY, PRUCT, PRVCT, PRWCT, Z1, Z2 ) & +!$acc& present(ZU_EAST,ZV_NORTH,ZDZX_EAST,ZDZY_NORTH) IF (GDATA_ON_DEVICE) THEN !PW TODO:remplacer (ailleurs aussi...) 1/PDXX... par PINV_PDXX (fait pour la turbulence...) cfr MNH/turb_hor_splt.f90 @@ -642,17 +699,17 @@ IF (KADV_ORDER == 4 ) THEN !!$ NULLIFY(TZFIELD_DZY) !!$ CALL ADD3DFIELD_ll( TZFIELD_DZX, PDZX, 'CONTRAV::PDZX' ) !!$ CALL ADD3DFIELD_ll( TZFIELD_DZY, PDZY, 'CONTRAV::PDZY' ) - IF ( GFIRST_CALL_CONTRAV_DEVICE ) THEN - GFIRST_CALL_CONTRAV_DEVICE = .FALSE. - NULLIFY(TZHALO2_U) - NULLIFY(TZHALO2_V) - NULLIFY(TZHALO2_DZX) - NULLIFY(TZHALO2_DZY) - CALL INIT_HALO2_ll(TZHALO2_U,1,IIU,IJU,IKU) - CALL INIT_HALO2_ll(TZHALO2_V,1,IIU,IJU,IKU) - CALL INIT_HALO2_ll(TZHALO2_DZX,1,IIU,IJU,IKU) - CALL INIT_HALO2_ll(TZHALO2_DZY,1,IIU,IJU,IKU) - END IF +!!$ IF ( GFIRST_CALL_CONTRAV_DEVICE ) THEN +!!$ GFIRST_CALL_CONTRAV_DEVICE = .FALSE. +!!$ NULLIFY(TZHALO2_U) +!!$ NULLIFY(TZHALO2_V) +!!$ NULLIFY(TZHALO2_DZX) +!!$ NULLIFY(TZHALO2_DZY) +!!$ CALL INIT_HALO2_ll(TZHALO2_U,1,IIU,IJU,IKU) +!!$ CALL INIT_HALO2_ll(TZHALO2_V,1,IIU,IJU,IKU) +!!$ CALL INIT_HALO2_ll(TZHALO2_DZX,1,IIU,IJU,IKU) +!!$ CALL INIT_HALO2_ll(TZHALO2_DZY,1,IIU,IJU,IKU) +!!$ END IF !!$ CALL UPDATE_HALO2_ll(TZFIELD_U, TZHALO2_U, IINFO_ll) !!$ CALL UPDATE_HALO2_ll(TZFIELD_V, TZHALO2_V, IINFO_ll) !!$ CALL UPDATE_HALO2_ll(TZFIELD_DZX, TZHALO2_DZX, IINFO_ll) @@ -670,14 +727,14 @@ IF (KADV_ORDER == 4 ) THEN !!$ ALLOCATE(ZU_EAST(IJU,IKU),ZV_NORTH(IIU,IKU),ZDZX_EAST(IJU,IKU),ZDZY_NORTH(IIU,IKU)) !!$ !$acc enter data create( zu_east, zv_north, zdzx_east, zdzy_north ) !!$ !$acc kernels - ZU_EAST => TZHALO2_U%HALO2%EAST - ZDZX_EAST => TZHALO2_DZX%HALO2%EAST - ZV_NORTH => TZHALO2_V%HALO2%NORTH - ZDZY_NORTH => TZHALO2_DZY%HALO2%NORTH +!!$ ZU_EAST => TZHALO2_U%HALO2%EAST +!!$ ZDZX_EAST => TZHALO2_DZX%HALO2%EAST +!!$ ZV_NORTH => TZHALO2_V%HALO2%NORTH +!!$ ZDZY_NORTH => TZHALO2_DZY%HALO2%NORTH !!$ !$acc end kernels !!$!$acc update device(ZU_EAST,ZV_NORTH,ZDZX_EAST,ZDZY_NORTH) - END IF -END IF + END IF ! NOT FLAT +END IF ! KADV_ORDER == 4 ! ! !* 2. Compute the vertical contravariant components (flat case) @@ -897,15 +954,11 @@ END IF FLAT !$acc end data -!Release all memory allocated with MNH_MEM_GET calls since last call to MNH_MEM_POSITION_PIN -CALL MNH_MEM_RELEASE() - -IF (MPPDB_INITIALIZED) THEN - !Check all OUT arrays - CALL MPPDB_CHECK(PRUCT,"CONTRAV end:PRUCT") - CALL MPPDB_CHECK(PRVCT,"CONTRAV end:PRVCT") - CALL MPPDB_CHECK(PRWCT,"CONTRAV end:PRWCT") -END IF +! +! End Compute +! + + END SUBROUTINE CONTRAV_DEVICE_DIM !----------------------------------------------------------------------- END SUBROUTINE CONTRAV_DEVICE diff --git a/src/ZSOLVER/finalize_mnh.f90 b/src/ZSOLVER/finalize_mnh.f90 new file mode 100644 index 0000000000000000000000000000000000000000..f5436387a43ff254b9700ae929d7c3b139d9a34f --- /dev/null +++ b/src/ZSOLVER/finalize_mnh.f90 @@ -0,0 +1,82 @@ +!MNH_LIC Copyright 2021-2021 CNRS, Meteo-France and Universite Paul Sabatier +!MNH_LIC This is part of the Meso-NH software governed by the CeCILL-C licence +!MNH_LIC version 1. See LICENSE, CeCILL-C_V1-en.txt and CeCILL-C_V1-fr.txt +!MNH_LIC for details. version 1. +!----------------------------------------------------------------- +! Author: +! P. Wautelet 06/07/2021 +! Modifications: +! +!----------------------------------------------------------------- +MODULE MODE_FINALIZE_MNH + +IMPLICIT NONE + +CONTAINS + +SUBROUTINE FINALIZE_MNH + USE MODD_CONF, only: LCHECK, NMODEL + USE MODD_IO, only: NIO_VERB, NVERB_DEBUG + USE MODD_LUNIT, only: TLUOUT0 + USE MODD_LUNIT_n, only: LUNIT_MODEL + USE MODD_MNH_SURFEX_n, only: SURFEX_DEALLO_LIST +#ifdef CPLOASIS + USE MODD_SFX_OASIS, only: LOASIS +#endif + + USE MODE_INIT_ll, only: END_PARA_ll + USE MODE_IO_FILE, only: IO_File_close + USE MODE_IO_MANAGE_STRUCT, only: IO_Filelist_print +#ifdef MNH_OPENACC + USE MODE_MNH_ZWORK, only: PRINT_FLATPOOL_STATS +#endif + USE multigrid, only: mg_finalise + USE MODE_MPPDB, only: MPPDB_BARRIER + USE MODE_MSG, only: MSG_STATS + +#ifdef CPLOASIS + USE MODI_SFX_OASIS_END +#endif + + IMPLICIT NONE + + INTEGER :: IRESP + INTEGER :: JMODEL + + !Print the list of all files and some statistics on them + IF ( NIO_VERB >= NVERB_DEBUG ) CALL IO_Filelist_print() + +#ifdef MNH_OPENACC + CALL PRINT_FLATPOOL_STATS() +#endif + call mg_finalise() + + !Print the number of printed messages via Print_msg + CALL MSG_STATS() + + !Close all the opened 'output listing' files + IF ( TLUOUT0%LOPENED ) CALL IO_File_close(TLUOUT0) + DO JMODEL = 1, NMODEL + IF ( ASSOCIATED( LUNIT_MODEL(JMODEL)%TLUOUT ) ) THEN + IF ( LUNIT_MODEL(JMODEL)%TLUOUT%LOPENED ) CALL IO_File_close( LUNIT_MODEL(JMODEL)%TLUOUT) + END IF + END DO + + !Finalize the parallel libraries + IF ( LCHECK ) THEN + CALL MPPDB_BARRIER() + ELSE + CALL END_PARA_ll( IRESP ) +#ifdef CPLOASIS + IF ( LOASIS ) THEN + CALL SFX_OASIS_END() + END IF +#endif + END IF + + !Free SURFEX structures if necessary + CALL SURFEX_DEALLO_LIST() + +END SUBROUTINE FINALIZE_MNH + +END MODULE MODE_FINALIZE_MNH diff --git a/src/ZSOLVER/tensorproductmultigrid_Source/communication.f90 b/src/ZSOLVER/tensorproductmultigrid_Source/communication.f90 index f69262deff758a763c32b3ddf4c5265e4bc413e8..77f0f54eea9d6a6d4963233f27d48f212c42af38 100644 --- a/src/ZSOLVER/tensorproductmultigrid_Source/communication.f90 +++ b/src/ZSOLVER/tensorproductmultigrid_Source/communication.f90 @@ -697,6 +697,8 @@ contains integer :: icompx_max,icompy_max real , dimension(:,:,:) , pointer , contiguous :: za_st + + integer :: nib,nie,njb,nje,nzb,nze ! Update Real Boundary for Newman case u(0) = u(1) , etc ... @@ -728,31 +730,45 @@ contains icompx_max = a%icompx_max icompy_max = a%icompy_max - !$acc kernels - if ( ix_min == 1 ) then - !acc kernels - za_st(0,:,:) = za_st(1,:,:) - !acc end kernels - endif - if ( ix_max == n ) then - !acc kernels - za_st(icompx_max+1,:,:) = za_st(icompx_max,:,:) - !acc end kernels - endif - if ( iy_min == 1 ) then - !acc kernels - za_st(:,0,:) = za_st(:,1,:) - !acc end kernels - endif - if ( iy_max == n ) then - !acc kernels - za_st(:,icompy_max+1,:) = za_st(:,icompy_max,:) - !acc end kernels - endif - !$acc end kernels - + nib = Lbound(za_st,1) ; nie = Ubound(za_st,1) + njb = Lbound(za_st,2) ; nje = Ubound(za_st,2) + nzb = Lbound(za_st,3) ; nze = Ubound(za_st,3) + + call boundary_mnh_dim(za_st) + endif + contains + subroutine boundary_mnh_dim(pza_st) + implicit none + + real :: pza_st(nib:nie,njb:nje,nzb:nze) + + !$acc kernels + if ( ix_min == 1 ) then + !acc kernels + pza_st(0,:,:) = pza_st(1,:,:) + !acc end kernels + endif + if ( ix_max == n ) then + !acc kernels + pza_st(icompx_max+1,:,:) = pza_st(icompx_max,:,:) + !acc end kernels + endif + if ( iy_min == 1 ) then + !acc kernels + pza_st(:,0,:) = pza_st(:,1,:) + !acc end kernels + endif + if ( iy_max == n ) then + !acc kernels + pza_st(:,icompy_max+1,:) = pza_st(:,icompy_max,:) + !acc end kernels + endif + !$acc end kernels + + end subroutine boundary_mnh_dim + end subroutine boundary_mnh !================================================================== ! Initiate asynchronous halo exchange @@ -1125,6 +1141,77 @@ contains requests_nsT(:) = MPI_REQUEST_NULL requests_ewT(:) = MPI_REQUEST_NULL ! + ! Init Pointer + ! +#ifdef MNH_GPUDIRECT + if (LUseT) then + ! Send to south + if (Gneighbour_s) then + ztab_halo_st_haloTin => tab_halo_st(level,m)%haloTin + end if + ! Send to north + if (Gneighbour_n) then + ztab_halo_nt_haloTin => tab_halo_nt(level,m)%haloTin + end if + ! Send to east + if (Gneighbour_e) then + ztab_halo_et_haloTin => tab_halo_et(level,m)%haloTin + end if + ! Send to west + if (Gneighbour_w) then + ztab_halo_wt_haloTin => tab_halo_wt(level,m)%haloTin + end if + ! Receive from north + if (Gneighbour_n) then + ztab_halo_nt_haloTout => tab_halo_nt(level,m)%haloTout + end if + ! Receive from south + if (Gneighbour_s) then + ztab_halo_st_haloTout => tab_halo_st(level,m)%haloTout + end if + ! Receive from west + if (Gneighbour_w) then + ztab_halo_wt_haloTout => tab_halo_wt(level,m)%haloTout + end if + ! Receive from east + if (Gneighbour_e) then + ztab_halo_et_haloTout => tab_halo_et(level,m)%haloTout + end if + end if +#endif + ! + call haloswap_mnh_dim(ztab_halo_st_haloTin,ztab_halo_nt_haloTin,& + ztab_halo_et_haloTin,ztab_halo_wt_haloTin,& + ztab_halo_nt_haloTout,ztab_halo_st_haloTout,& + ztab_halo_wt_haloTout,ztab_halo_et_haloTout,& + zst) + ! + end if! (stepsize == 1) ... + if (comm_measuretime) then + call finish_timer(t_haloswap(level,m)) + end if + end if ! (m > 0) + +contains + subroutine haloswap_mnh_dim(pztab_halo_st_haloTin,pztab_halo_nt_haloTin,& + pztab_halo_et_haloTin,pztab_halo_wt_haloTin,& + pztab_halo_nt_haloTout,pztab_halo_st_haloTout,& + pztab_halo_wt_haloTout,pztab_halo_et_haloTout,& + pzst) + + implicit none + real :: pztab_halo_st_haloTin(1:a_n,1:halo_size,1:nz+2), & + pztab_halo_nt_haloTin(1:a_n,1:halo_size,1:nz+2), & + pztab_halo_et_haloTin(1:halo_size,1:a_n+2*halo_size,1:nz+2), & + pztab_halo_wt_haloTin(1:halo_size,1:a_n+2*halo_size,1:nz+2), & + pztab_halo_nt_haloTout(1:a_n,1:halo_size,1:nz+2), & + pztab_halo_st_haloTout(1:a_n,1:halo_size,1:nz+2), & + pztab_halo_wt_haloTout(1:halo_size,1:a_n+2*halo_size,1:nz+2), & + pztab_halo_et_haloTout(1:halo_size,1:a_n+2*halo_size,1:nz+2), & + pzst(1-halo_size:a_n+halo_size,1-halo_size:a_n+halo_size,0:nz+1) + ! + ! Do Comm + ! #ifdef MNH_GPUDIRECT if (LUseT) then ! @@ -1132,37 +1219,37 @@ contains ! ! Send to south if (Gneighbour_s) then - ztab_halo_st_haloTin => tab_halo_st(level,m)%haloTin - !$acc kernels async(IS_SOUTH) present_cr(zst,ztab_halo_st_haloTin) +!!$ pztab_halo_st_haloTin => tab_halo_st(level,m)%haloTin + !$acc kernels async(IS_SOUTH) present_cr(pzst,pztab_halo_st_haloTin) !$mnh_do_concurrent( ii=1:a_n,ij=1:halo_size,ik=1:nz+2 ) - ztab_halo_st_haloTin(ii,ij,ik) = zst(ii,ij+a_n-halo_size,ik-1) + pztab_halo_st_haloTin(ii,ij,ik) = pzst(ii,ij+a_n-halo_size,ik-1) !$mnh_end_do() !$acc end kernels end if ! Send to north if (Gneighbour_n) then - ztab_halo_nt_haloTin => tab_halo_nt(level,m)%haloTin - !$acc kernels async(IS_NORTH) present_cr(zst,ztab_halo_nt_haloTin) +!!$ pztab_halo_nt_haloTin => tab_halo_nt(level,m)%haloTin + !$acc kernels async(IS_NORTH) present_cr(pzst,pztab_halo_nt_haloTin) !$mnh_do_concurrent( ii=1:a_n,ij=1:halo_size,ik=1:nz+2 ) - ztab_halo_nt_haloTin(ii,ij,ik) = zst(ii,ij,ik-1) + pztab_halo_nt_haloTin(ii,ij,ik) = pzst(ii,ij,ik-1) !$mnh_end_do() !$acc end kernels end if ! Send to east if (Gneighbour_e) then - ztab_halo_et_haloTin => tab_halo_et(level,m)%haloTin - !$acc kernels async(IS_EAST) present_cr(zst,ztab_halo_et_haloTin) +!!$ pztab_halo_et_haloTin => tab_halo_et(level,m)%haloTin + !$acc kernels async(IS_EAST) present_cr(pzst,pztab_halo_et_haloTin) !$mnh_do_concurrent( ii=1:halo_size,ij=1:a_n+2*halo_size,ik=1:nz+2 ) - ztab_halo_et_haloTin(ii,ij,ik) = zst(ii+a_n-halo_size,ij-halo_size,ik-1) + pztab_halo_et_haloTin(ii,ij,ik) = pzst(ii+a_n-halo_size,ij-halo_size,ik-1) !$mnh_end_do() !$acc end kernels end if ! Send to west if (Gneighbour_w) then - ztab_halo_wt_haloTin => tab_halo_wt(level,m)%haloTin - !$acc kernels async(IS_WEST) present_cr(zst,ztab_halo_wt_haloTin) +!!$ pztab_halo_wt_haloTin => tab_halo_wt(level,m)%haloTin + !$acc kernels async(IS_WEST) present_cr(pzst,pztab_halo_wt_haloTin) !$mnh_do_concurrent( ii=1:halo_size,ij=1:a_n+2*halo_size,ik=1:nz+2 ) - ztab_halo_wt_haloTin(ii,ij,ik) = zst(ii,ij-halo_size,ik-1) + pztab_halo_wt_haloTin(ii,ij,ik) = pzst(ii,ij-halo_size,ik-1) !$mnh_end_do() !$acc end kernels end if @@ -1177,14 +1264,14 @@ contains if (LUseT) then #ifdef MNH_GPUDIRECT if (Gneighbour_n) then - ztab_halo_nt_haloTout => tab_halo_nt(level,m)%haloTout - !$acc host_data use_device(ztab_halo_nt_haloTout) - call mpi_irecv(ztab_halo_nt_haloTout,size(ztab_halo_nt_haloTout), & +!!$ pztab_halo_nt_haloTout => tab_halo_nt(level,m)%haloTout + !$acc host_data use_device(pztab_halo_nt_haloTout) + call mpi_irecv(pztab_halo_nt_haloTout,size(pztab_halo_nt_haloTout), & MPI_DOUBLE_PRECISION,neighbour_n_rank,recvtag, & MPI_COMM_HORIZ, requests_nsT(1), ierr) !$acc end host_data end if - !print*,"mpi_irecv(ztab_halo_nt_haloTout,neighbour_n_rank=",neighbour_n_rank + !print*,"mpi_irecv(pztab_halo_nt_haloTout,neighbour_n_rank=",neighbour_n_rank #else call mpi_irecv(a%st(1,0-(halo_size-1),0),1, & halo_nst(level,m),neighbour_n_rank,recvtag, & @@ -1200,14 +1287,14 @@ contains if (LUseT) then #ifdef MNH_GPUDIRECT if (Gneighbour_s) then - ztab_halo_st_haloTout => tab_halo_st(level,m)%haloTout - !$acc host_data use_device (ztab_halo_st_haloTout) - call mpi_irecv(ztab_halo_st_haloTout,size(ztab_halo_st_haloTout), & +!!$ pztab_halo_st_haloTout => tab_halo_st(level,m)%haloTout + !$acc host_data use_device (pztab_halo_st_haloTout) + call mpi_irecv(pztab_halo_st_haloTout,size(pztab_halo_st_haloTout), & MPI_DOUBLE_PRECISION,neighbour_s_rank,recvtag, & MPI_COMM_HORIZ, requests_nsT(2), ierr) !$acc end host_data end if - !print*,"mpi_irecv(ztab_halo_st_haloTout,neighbour_s_rank=",neighbour_s_rank + !print*,"mpi_irecv(pztab_halo_st_haloTout,neighbour_s_rank=",neighbour_s_rank #else call mpi_irecv(a%st(1,a_n+1,0),1, & halo_nst(level,m),neighbour_s_rank,recvtag, & @@ -1229,13 +1316,13 @@ contains if (LUseT) then #ifdef MNH_GPUDIRECT if (Gneighbour_s) then - !$acc host_data use_device(ztab_halo_st_haloTin) - call mpi_isend(ztab_halo_st_haloTin,size(ztab_halo_st_haloTin), & + !$acc host_data use_device(pztab_halo_st_haloTin) + call mpi_isend(pztab_halo_st_haloTin,size(pztab_halo_st_haloTin), & MPI_DOUBLE_PRECISION,neighbour_s_rank,sendtag, & MPI_COMM_HORIZ, requests_nsT(3), ierr) !$acc end host_data end if - !print*,"mpi_isend(ztab_halo_st_haloTin,neighbour_s_rank=",neighbour_s_rank + !print*,"mpi_isend(pztab_halo_st_haloTin,neighbour_s_rank=",neighbour_s_rank #else call mpi_isend(a%st(1,a_n-(halo_size-1),0),1, & halo_nst(level,m),neighbour_s_rank,sendtag, & @@ -1251,13 +1338,13 @@ contains if (LUseT) then #ifdef MNH_GPUDIRECT if (Gneighbour_n) then - !$acc host_data use_device(ztab_halo_nt_haloTin) - call mpi_isend(ztab_halo_nt_haloTin,size(ztab_halo_nt_haloTin), & + !$acc host_data use_device(pztab_halo_nt_haloTin) + call mpi_isend(pztab_halo_nt_haloTin,size(pztab_halo_nt_haloTin), & MPI_DOUBLE_PRECISION,neighbour_n_rank,sendtag, & MPI_COMM_HORIZ, requests_nsT(4), ierr) !$acc end host_data end if - !print*,"mpi_isend(ztab_halo_nt_haloTin,neighbour_n_rank=",neighbour_n_rank + !print*,"mpi_isend(pztab_halo_nt_haloTin,neighbour_n_rank=",neighbour_n_rank #else call mpi_isend(a%st(1,1,0),1, & halo_nst(level,m),neighbour_n_rank,sendtag, & @@ -1278,14 +1365,14 @@ contains if (LUseT) then #ifdef MNH_GPUDIRECT if (Gneighbour_w) then - ztab_halo_wt_haloTout => tab_halo_wt(level,m)%haloTout - !$acc host_data use_device(ztab_halo_wt_haloTout) - call mpi_irecv(ztab_halo_wt_haloTout,size(ztab_halo_wt_haloTout), & +!!$ pztab_halo_wt_haloTout => tab_halo_wt(level,m)%haloTout + !$acc host_data use_device(pztab_halo_wt_haloTout) + call mpi_irecv(pztab_halo_wt_haloTout,size(pztab_halo_wt_haloTout), & MPI_DOUBLE_PRECISION,neighbour_w_rank,recvtag, & MPI_COMM_HORIZ, requests_ewT(1), ierr) !$acc end host_data end if - !print*,"mpi_irecv(ztab_halo_wt_haloTout,neighbour_w_rank=",neighbour_w_rank + !print*,"mpi_irecv(pztab_halo_wt_haloTout,neighbour_w_rank=",neighbour_w_rank #else call mpi_irecv(a%st(0-(halo_size-1),0,0),1, & halo_wet(level,m),neighbour_w_rank,recvtag, & @@ -1301,14 +1388,14 @@ contains if (LUseT) then #ifdef MNH_GPUDIRECT if (Gneighbour_e) then - ztab_halo_et_haloTout => tab_halo_et(level,m)%haloTout - !$acc host_data use_device(ztab_halo_et_haloTout) - call mpi_irecv(ztab_halo_et_haloTout,size(ztab_halo_et_haloTout), & +!!$ pztab_halo_et_haloTout => tab_halo_et(level,m)%haloTout + !$acc host_data use_device(pztab_halo_et_haloTout) + call mpi_irecv(pztab_halo_et_haloTout,size(pztab_halo_et_haloTout), & MPI_DOUBLE_PRECISION,neighbour_e_rank,recvtag, & MPI_COMM_HORIZ, requests_ewT(2), ierr) !$acc end host_data end if - !print*,"mpi_irecv(ztab_halo_et_haloTout,neighbour_e_rank=",neighbour_e_rank + !print*,"mpi_irecv(pztab_halo_et_haloTout,neighbour_e_rank=",neighbour_e_rank #else call mpi_irecv(a%st(a_n+1,0,0),1, & halo_wet(level,m),neighbour_e_rank,recvtag, & @@ -1325,13 +1412,13 @@ contains if (LUseT) then #ifdef MNH_GPUDIRECT if (Gneighbour_e) then - !$acc host_data use_device(ztab_halo_et_haloTin) - call mpi_isend(ztab_halo_et_haloTin,size(ztab_halo_et_haloTin), & + !$acc host_data use_device(pztab_halo_et_haloTin) + call mpi_isend(pztab_halo_et_haloTin,size(pztab_halo_et_haloTin), & MPI_DOUBLE_PRECISION,neighbour_e_rank,sendtag, & MPI_COMM_HORIZ, requests_ewT(3), ierr) !$acc end host_data end if - !print*,"mpi_isend(ztab_halo_et_haloTin,neighbour_e_rank=",neighbour_e_rank + !print*,"mpi_isend(pztab_halo_et_haloTin,neighbour_e_rank=",neighbour_e_rank #else call mpi_isend(a%st(a_n-(halo_size-1),0,0),1, & halo_wet(level,m),neighbour_e_rank,sendtag, & @@ -1347,13 +1434,13 @@ contains if (LUseT) then #ifdef MNH_GPUDIRECT if (Gneighbour_w) then - !$acc host_data use_device(ztab_halo_wt_haloTin) - call mpi_isend(ztab_halo_wt_haloTin,size(ztab_halo_wt_haloTin), & + !$acc host_data use_device(pztab_halo_wt_haloTin) + call mpi_isend(pztab_halo_wt_haloTin,size(pztab_halo_wt_haloTin), & MPI_DOUBLE_PRECISION,neighbour_w_rank,sendtag, & MPI_COMM_HORIZ, requests_ewT(4), ierr) !$acc end host_data end if - !print*,"mpi_isend(ztab_halo_wt_haloTin,neighbour_w_rank=",neighbour_w_rank + !print*,"mpi_isend(pztab_halo_wt_haloTin,neighbour_w_rank=",neighbour_w_rank #else call mpi_isend(a%st(1,0,0),1, & halo_wet(level,m),neighbour_w_rank,sendtag, & @@ -1372,33 +1459,33 @@ contains if (LUseT) then if (Gneighbour_n) then ! copy north halo for GPU managed - !$acc kernels async(IS_NORTH) present_cr(zst,ztab_halo_nt_haloTout) + !$acc kernels async(IS_NORTH) present_cr(pzst,pztab_halo_nt_haloTout) !$mnh_do_concurrent( ii=1:a_n,ij=1:halo_size,ik=1:nz+2 ) - zst(ii,ij-halo_size,ik-1) = ztab_halo_nt_haloTout(ii,ij,ik) + pzst(ii,ij-halo_size,ik-1) = pztab_halo_nt_haloTout(ii,ij,ik) !$mnh_end_do() !$acc end kernels end if if (Gneighbour_s) then ! copy south halo for GPU managed - !$acc kernels async(IS_SOUTH) present_cr(zst,ztab_halo_st_haloTout) + !$acc kernels async(IS_SOUTH) present_cr(pzst,pztab_halo_st_haloTout) !$mnh_do_concurrent( ii=1:a_n,ij=1:halo_size,ik=1:nz+2 ) - zst(ii,ij+a_n,ik-1) = ztab_halo_st_haloTout(ii,ij,ik) + pzst(ii,ij+a_n,ik-1) = pztab_halo_st_haloTout(ii,ij,ik) !$mnh_end_do() !$acc end kernels end if if (Gneighbour_w) then ! copy west halo for GPU managed - !$acc kernels async(IS_WEST) present_cr(zst,ztab_halo_wt_haloTout) + !$acc kernels async(IS_WEST) present_cr(pzst,pztab_halo_wt_haloTout) !$mnh_do_concurrent( ii=1:halo_size,ij=1:a_n+2*halo_size,ik=1:nz+2 ) - zst(ii-halo_size,ij-halo_size,ik-1) = ztab_halo_wt_haloTout(ii,ij,ik) + pzst(ii-halo_size,ij-halo_size,ik-1) = pztab_halo_wt_haloTout(ii,ij,ik) !$mnh_end_do() !$acc end kernels end if if (Gneighbour_e) then ! copy east halo for GPU managed - !$acc kernels async(IS_EAST) present_cr(zst,ztab_halo_et_haloTout) + !$acc kernels async(IS_EAST) present_cr(pzst,pztab_halo_et_haloTout) !$mnh_do_concurrent( ii=1:halo_size,ij=1:a_n+2*halo_size,ik=1:nz+2 ) - zst(ii+a_n,ij-halo_size,ik-1) = ztab_halo_et_haloTout(ii,ij,ik) + pzst(ii+a_n,ij-halo_size,ik-1) = pztab_halo_et_haloTout(ii,ij,ik) !$mnh_end_do() !$acc end kernels end if @@ -1406,13 +1493,11 @@ contains call acc_wait_haloswap_mnh() end if #endif - end if! (stepsize == 1) ... - if (comm_measuretime) then - call finish_timer(t_haloswap(level,m)) - end if - end if - - contains + ! + ! End Comm + ! + end subroutine haloswap_mnh_dim + subroutine acc_wait_haloswap_mnh() if (Gneighbour_s) then !$acc wait(IS_SOUTH) diff --git a/src/ZSOLVER/tensorproductmultigrid_Source/discretisation.f90 b/src/ZSOLVER/tensorproductmultigrid_Source/discretisation.f90 index afc9d71dcf2276959a97e60486eb2012d92e86ae..edbe5ebfc57b22b3c2543f9445bb52b0d28a70fe 100644 --- a/src/ZSOLVER/tensorproductmultigrid_Source/discretisation.f90 +++ b/src/ZSOLVER/tensorproductmultigrid_Source/discretisation.f90 @@ -663,6 +663,8 @@ end subroutine construct_vertical_coeff real , dimension(:,:,:) , pointer , contiguous :: zr_st , zb_st + integer :: nib,nie,njb,nje,nzb,nze + ! r <- A.u !call boundary_mnh(u) call apply_mnh(u,r) @@ -694,11 +696,29 @@ end subroutine construct_vertical_coeff zr_st => r%st zb_st => b%st - !$acc kernels present(zr_st,zb_st) - zr_st(iib:iie,ijb:ije,ikb:ike) = zb_st(iib:iie,ijb:ije,ikb:ike) - zr_st(iib:iie,ijb:ije,ikb:ike) - !$acc end kernels + + nib = Lbound(zr_st,1) ; nie = Ubound(zr_st,1) + njb = Lbound(zr_st,2) ; nje = Ubound(zr_st,2) + nzb = Lbound(zr_st,3) ; nze = Ubound(zr_st,3) + + call calculate_residual_mnh_dim(zr_st,zb_st) + endif + contains + + subroutine calculate_residual_mnh_dim(pzr_st,pzb_st) + implicit none + + real :: pzr_st(nib:nie,njb:nje,nzb:nze), & + pzb_st(nib:nie,njb:nje,nzb:nze) + + !$acc kernels present(pzr_st,pzb_st) + pzr_st(iib:iie,ijb:ije,ikb:ike) = pzb_st(iib:iie,ijb:ije,ikb:ike) - pzr_st(iib:iie,ijb:ije,ikb:ike) + !$acc end kernels + + end subroutine calculate_residual_mnh_dim + end subroutine calculate_residual_mnh subroutine calculate_residual(level,m,b,u,r) @@ -744,6 +764,8 @@ end subroutine construct_vertical_coeff integer :: ii,ij integer :: ize + integer :: nib,nie,njb,nje,nzb,nze + call boundary_mnh(u) if (LUseO) then @@ -792,37 +814,53 @@ end subroutine construct_vertical_coeff zb_k => vert_coeff%b zc_k => vert_coeff%c zd_k => vert_coeff%d + + nib = Lbound(zv_st,1) ; nie = Ubound(zv_st,1) + njb = Lbound(zv_st,2) ; nje = Ubound(zv_st,2) + nzb = Lbound(zv_st,3) ; nze = Ubound(zv_st,3) + + call apply_mnh_dim(zv_st,zu_st,zb_k,zc_k,zd_k) - !$acc kernels present_cr(zb_k,zv_st) - iz=1 - !$mnh_do_concurrent( ii=iib:iie , ij=ijb:ije ) - zv_st(ii,ij,iz) = zd_k(iz)* ( (-zb_k(iz)-zc_k(iz))*Tij * zu_st(ii,ij,iz ) & - +zb_k(iz) *Tij * zu_st(ii,ij,iz+1) ) - !$mnh_end_do() - - ! -!!$ !$acc loop seq -!!$ do iz=2,ize-1 - !$mnh_do_concurrent( ii=iib:iie , ij=ijb:ije , iz=2:ize-1 ) - zv_st(ii,ij,iz) = zd_k(iz)* ( ((-zb_k(iz)-zc_k(iz))*Tij - 4.0_rl ) * zu_st(ii,ij,iz) & - +zb_k(iz) *Tij * zu_st(ii,ij,iz+1) & - +zc_k(iz) *Tij * zu_st(ii,ij,iz-1) & - + zu_st(ii+1,ij,iz) & - + zu_st(ii-1,ij,iz) & - + zu_st(ii,ij+1,iz) & - + zu_st(ii,ij-1,iz) & + endif + + contains + + subroutine apply_mnh_dim(pzv_st,pzu_st,pzb_k,pzc_k,pzd_k) + + implicit none + + real :: pzv_st(nib:nie,njb:nje,nzb:nze), & + pzu_st(nib:nie,njb:nje,nzb:nze), & + pzb_k(ize),pzc_k(ize),pzd_k(ize) + + !$acc kernels present_cr(pzb_k,pzv_st,pzu_st) + iz=1 + !$mnh_do_concurrent( ii=iib:iie , ij=ijb:ije ) + pzv_st(ii,ij,iz) = pzd_k(iz)* ( (-pzb_k(iz)-pzc_k(iz))*Tij * pzu_st(ii,ij,iz ) & + +pzb_k(iz) *Tij * pzu_st(ii,ij,iz+1) ) + !$mnh_end_do() + + ! + !$mnh_do_concurrent( ii=iib:iie , ij=ijb:ije , iz=2:ize-1 ) + pzv_st(ii,ij,iz) = pzd_k(iz)* ( ((-pzb_k(iz)-pzc_k(iz))*Tij - 4.0_rl ) * pzu_st(ii,ij,iz) & + +pzb_k(iz) *Tij * pzu_st(ii,ij,iz+1) & + +pzc_k(iz) *Tij * pzu_st(ii,ij,iz-1) & + + pzu_st(ii+1,ij,iz) & + + pzu_st(ii-1,ij,iz) & + + pzu_st(ii,ij+1,iz) & + + pzu_st(ii,ij-1,iz) & ) !$mnh_end_do() -!!$ end do ! iz=ize !$mnh_do_concurrent( ii=iib:iie , ij=ijb:ije ) - zv_st(ii,ij,iz) = zd_k(iz)* ( (-zb_k(iz)-zc_k(iz))*Tij * zu_st(ii,ij,iz) & - +zc_k(iz) *Tij * zu_st(ii,ij,iz-1) ) + pzv_st(ii,ij,iz) = pzd_k(iz)* ( (-pzb_k(iz)-pzc_k(iz))*Tij * pzu_st(ii,ij,iz) & + +pzc_k(iz) *Tij * pzu_st(ii,ij,iz-1) ) !$mnh_end_do() - !$acc end kernels - endif + !$acc end kernels + end subroutine apply_mnh_dim + end subroutine apply_mnh subroutine apply(u,v) @@ -1661,30 +1699,57 @@ end subroutine construct_vertical_coeff end do end do end if - if (LUseT) then + + if (LUseT) then + iib=ixmin(iblock) iie=ixmax(iblock) ijb=iymin(iblock) ije=iymax(iblock) - + zu_st => u%st zSutmp_st => Sutmp%st zSut0_st => Sut0%st - + call apply_tridiag_solve_mnh_allT(iib,iie,ijb,ije,Sr,c,b, & - Sut0, & - Sutmp,level ) - !$acc kernels present_cr(zsut0_st,zu_st) - !$mnh_do_concurrent( ix=iib:iie , iy=ijb:ije , iz=1:nz ) - zu_st(ix,iy,iz) = & - rho*zSutmp_st(ix,iy,iz) & - + (1.0_rl-rho)*zSut0_st(ix,iy,iz) - !$mnh_end_do() ! concurrent - !$acc end kernels + Sut0, & + Sutmp,level ) + + call loop_over_grid_jacobi_mnh_dim(& + zu_st,zSutmp_st,zSut0_st,& + iib,iie,ijb,ije) + end if end subroutine loop_over_grid_jacobi_mnh +! contains + + subroutine loop_over_grid_jacobi_mnh_dim(& + zu_st,zSutmp_st,zSut0_st,& + iib,iie,ijb,ije) + + implicit none + + real :: zu_st (nib:nie,njb:nje,nkb:nke ),& + zSutmp_st(nib:nie,njb:nje,nkb:nke ),& + zSut0_st (nib:nie,njb:nje,nkb:nke ) + + integer :: iib,iie,ijb,ije + + ! local var + integer :: ix,iy,iz + + !$acc kernels present_cr(zsut0_st,zSutmp_st,zu_st) + !$mnh_do_concurrent( ix=iib:iie , iy=ijb:ije , iz=1:nz ) + zu_st(ix,iy,iz) = & + rho*zSutmp_st(ix,iy,iz) & + + (1.0_rl-rho)*zSut0_st(ix,iy,iz) + !$mnh_end_do() ! concurrent + !$acc end kernels + + end subroutine loop_over_grid_jacobi_mnh_dim + end subroutine line_Jacobi_mnh !================================================================== ! Jacobi line smoother @@ -2122,7 +2187,7 @@ end subroutine line_Jacobi_mnh integer :: ii,ij,iz ! acc kernels present(pzSr_st,pzSu_out_st,pzb_st,pzSu_in_st) - !$acc parallel present(pzSr_st,pzSu_out_st,pzb_st,pzSu_in_st) + !$acc parallel present(pzSr_st,pzSu_out_st,pzb_st,pzSu_in_st) & !$acc & present(pzc_k,pzd_k,ptmp_k,pc_k) !$mnh_do_concurrent ( ii=iib:iie , ij=ijb:ije ) iz=1 diff --git a/src/ZSOLVER/tensorproductmultigrid_Source/multigrid.f90 b/src/ZSOLVER/tensorproductmultigrid_Source/multigrid.f90 index db1065fe259edd8a850de407f24b5a25e6c1cd19..b72b933725bbe147bc38280112bbe9d0a640f866 100644 --- a/src/ZSOLVER/tensorproductmultigrid_Source/multigrid.f90 +++ b/src/ZSOLVER/tensorproductmultigrid_Source/multigrid.f90 @@ -132,6 +132,8 @@ private type(time), allocatable, dimension(:,:) :: t_coarsesolve type(time), allocatable, dimension(:,:) :: t_total + logical :: Gmg_initialized = .FALSE. + contains !================================================================== @@ -172,6 +174,8 @@ contains real , dimension(:,:,:) , pointer , contiguous :: zxu_mg_st,zxb_mg_st,zxr_mg_st + Gmg_initialized = .TRUE. + if (i_am_master_mpi) & write(STDOUT,*) '*** Initialising multigrid ***' ! Check that cell counts are valid @@ -413,24 +417,28 @@ contains character(len=80) :: s integer :: ierr + If ( .not. Gmg_initialized ) return + if (i_am_master_mpi) & write(STDOUT,*) '*** Finalising multigrid ***' ! Deallocate memory level = mg_param%n_lev m = pproc reduced_m = .false. + !if (mg_param%verbose >= 0 ) call print_timerinfo("--- V-cycle timing results ---") do while (level > 0) - write(s,'("level = ",I3,", m = ",I3)') level,m - call print_timerinfo(s) - call print_elapsed(t_smooth(level,m),.True.,1.0_rl) - call print_elapsed(t_restrict(level,m),.True.,1.0_rl) - call print_elapsed(t_prolongate(level,m),.True.,1.0_rl) - call print_elapsed(t_residual(level,m),.True.,1.0_rl) - call print_elapsed(t_addcorr(level,m),.True.,1.0_rl) - call print_elapsed(t_coarsesolve(level,m),.True.,1.0_rl) - call print_elapsed(t_total(level,m),.True.,1.0_rl) - + if (mg_param%verbose >= 10 ) then + write(s,'("level = ",I3,", m = ",I3)') level,m + call print_timerinfo(s) + call print_elapsed(t_smooth(level,m),.True.,1.0_rl) + call print_elapsed(t_restrict(level,m),.True.,1.0_rl) + call print_elapsed(t_prolongate(level,m),.True.,1.0_rl) + call print_elapsed(t_residual(level,m),.True.,1.0_rl) + call print_elapsed(t_addcorr(level,m),.True.,1.0_rl) + call print_elapsed(t_coarsesolve(level,m),.True.,1.0_rl) + call print_elapsed(t_total(level,m),.True.,1.0_rl) + endif if (LUseO) then deallocate(xu_mg(level,m)%s) deallocate(xb_mg(level,m)%s) @@ -468,7 +476,8 @@ contains deallocate(t_residual) deallocate(t_addcorr) deallocate(t_coarsesolve) - if (i_am_master_mpi) write(STDOUT,'("")') + !if ( (i_am_master_mpi) ) write(STDOUT,'("")') + if ( (i_am_master_mpi) .and. (mg_param%verbose >= 10) ) write(STDOUT,'("")') end subroutine mg_finalise !================================================================== @@ -1763,6 +1772,7 @@ contains end if end if end if + if (mg_param%verbose >= 100 ) then call print_timerinfo("--- Main iteration timing results ---") call print_elapsed(t_apply,.True.,1.0_rl) call print_elapsed(t_res,.True.,1.0_rl) @@ -1770,6 +1780,7 @@ contains call print_elapsed(t_l2norm,.True.,1.0_rl) call print_elapsed(t_scalprod,.True.,1.0_rl) call print_elapsed(t_mainloop,.True.,1.0_rl) + end if !!$ if (i_am_master_mpi) write(STDOUT,'("")') end subroutine mg_solve_mnh