diff --git a/src/MNH/tools.f90 b/src/MNH/tools.f90 index 939db6cd461bf03d5860d148ac8b1b9762a3cb83..d72fe60251fe26df22fbb7fc4c6f7b69120da766 100644 --- a/src/MNH/tools.f90 +++ b/src/MNH/tools.f90 @@ -178,7 +178,7 @@ if (.not. mppdb_initialized ) then ic = 0 -!$acc kernels +! acc kernels !To allow comparisons... (i1 is not fully used) !Can be removed in production @@ -188,7 +188,7 @@ ic = 0 ! different runs of this subroutine BUT final result should be the same !Comment the following line + atomic directives to have consistent values for debugging !Warning: huge impact on performance -!$acc loop private(idx) independent +!$acc parallel loop private(idx) independent do ji = 1, size( ltab, 1 ) if ( ltab(ji ) ) then !$acc atomic capture @@ -198,7 +198,7 @@ ic = 0 i1(idx) = ji end if end do -!$acc end kernels +! acc end kernels else @@ -248,7 +248,7 @@ if (.not. mppdb_initialized ) then ic = 0 -!$acc kernels +! acc kernels !To allow comparisons... (i1/i2 are not fully used) !Can be removed in production @@ -259,7 +259,7 @@ ic = 0 ! different runs of this subroutine BUT final result should be the same !Comment the following line + atomic directives to have consistent values for debugging !Warning: huge impact on performance -!$acc loop collapse(2) private(idx) independent +!$acc parallel loop collapse(2) private(idx) independent do jj = 1, size( ltab, 2 ) do ji = 1, size( ltab, 1 ) if ( ltab(ji, jj ) ) then @@ -272,7 +272,7 @@ ic = 0 end if end do end do -!$acc end kernels +! acc end kernels else @@ -325,7 +325,7 @@ if (.not. mppdb_initialized ) then ic = 0 -!$acc kernels +! acc kernels !To allow comparisons... (i1/i2/i3 are not fully used) !Can be removed in production @@ -337,11 +337,13 @@ ic = 0 ! different runs of this subroutine BUT final result should be the same !Comment the following line + atomic directives to have consistent values for debugging !Warning: huge impact on performance -!$acc loop collapse(3) private(idx) independent -!!$ do jk = 1, size( ltab, 3 ) -!!$ do jj = 1, size( ltab, 2 ) -!!$ do ji = 1, size( ltab, 1 ) - do concurrent ( ji=1:size(ltab,1) , jj=1:size(ltab,2) , jk=1:size(ltab,3 ) ) +!#ifdef MNH_COMPILER_NVHPC +!$acc parallel loop collapse(3) private(idx) independent +!#endif + do jk = 1, size( ltab, 3 ) + do jj = 1, size( ltab, 2 ) + do ji = 1, size( ltab, 1 ) +!!$ do concurrent ( ji=1:size(ltab,1) , jj=1:size(ltab,2) , jk=1:size(ltab,3 ) ) if ( ltab(ji, jj, jk ) ) then !$acc atomic capture ic = ic +1 @@ -352,9 +354,9 @@ ic = 0 i3(idx) = jk end if end do -!!$ end do -!!$ end do -!$acc end kernels + end do + end do +! acc end kernels else