From 0defbfd4852b55095ffdcf113f2fdded883260c1 Mon Sep 17 00:00:00 2001
From: Philippe WAUTELET <philippe.wautelet@aero.obs-mip.fr>
Date: Thu, 31 Jan 2019 17:09:20 +0100
Subject: [PATCH] Philippe 31/01/2019: OpenACC: optimisation of ice4_fast_rg,
 ice4_fast_rh and ice4_fast_rs

---
 src/MNH/ice4_fast_rg.f90 | 103 +++++++++++++++++----------
 src/MNH/ice4_fast_rh.f90 | 150 +++++++++++++++++++++++++--------------
 src/MNH/ice4_fast_rs.f90 | 147 +++++++++++++++++++++-----------------
 3 files changed, 246 insertions(+), 154 deletions(-)

diff --git a/src/MNH/ice4_fast_rg.f90 b/src/MNH/ice4_fast_rg.f90
index cc2cd2f45..ad8cf0525 100644
--- a/src/MNH/ice4_fast_rg.f90
+++ b/src/MNH/ice4_fast_rg.f90
@@ -176,15 +176,16 @@ INTEGER, PARAMETER :: IRCDRYG=1, IRIDRYG=2, IRIWETG=3, IRSDRYG=4, IRSWETG=5, IRR
 INTEGER                            :: IGDRY
 INTEGER                            :: IDX, JJ
 INTEGER                            :: IRR !Workaround of PGI bug with OpenACC (at least up to 18.10 version)
+INTEGER, DIMENSION(SIZE(PRHODREF)) :: I1
+INTEGER, DIMENSION(SIZE(PRHODREF)) :: IVEC1, IVEC2
 LOGICAL                            :: GCRFLIMIT,GDSOFT,GEVLIMIT,GNULLWETG,GWETGPOST !Workaround of PGI bug with OpenACC (at least up to 18.10 version)
 LOGICAL, DIMENSION(SIZE(PRHODREF)) :: GDRY, GLDRYG, GMASK
-INTEGER, DIMENSION(SIZE(PRHODREF)) :: IVEC1, IVEC2
 REAL, DIMENSION(SIZE(PRHODREF))    :: ZVEC1, ZVEC2, ZVEC3
 REAL, DIMENSION(SIZE(PRHODREF))    :: ZZW,         &
                                       ZRDRYG_INIT, & !Initial dry growth rate of the graupeln
                                       ZRWETG_INIT    !Initial wet growth rate of the graupeln
 !
-!$acc declare create(IGDRY,IDX,JJ,GDRY,GLDRYG,GMASK,IVEC1,IVEC2,ZVEC1,ZVEC2,ZVEC3,ZZW,ZRDRYG_INIT,ZRWETG_INIT)
+!$acc declare create(IGDRY,I1,IVEC1,IVEC2,GDRY,GLDRYG,GMASK,ZVEC1,ZVEC2,ZVEC3,ZZW,ZRDRYG_INIT,ZRWETG_INIT)
 !
 !$acc declare copyin(XALPI,XALPW,XBETAI,XBETAW,XGAMW,XCI,XCL,XCPV,XESTT,XGAMI,XLMTT,XLVTT,XMD,XMV,XRV,XTT,   &
 !$acc&               LCRFLIMIT,LEVLIMIT,LNULLWETG,LWETGPOST,                                                 &
@@ -341,17 +342,39 @@ ELSE
 ENDIF
 
 ! Wet and dry collection of rs on graupel (6.2.1)
-GDRY(:)=PRST(:)>XRTMIN(5) .AND. PRGT(:)>XRTMIN(6) .AND. LDCOMPUTE(:)
+IGDRY = 0
+GDRY(:) = .FALSE.
+!$acc loop private(IDX) independent
+DO JJ = 1, SIZE(GDRY)
+  IF (PRST(JJ)>XRTMIN(5) .AND. PRGT(JJ)>XRTMIN(6) .AND. LDCOMPUTE(JJ)) THEN
+!$acc atomic capture
+    IGDRY = IGDRY + 1
+    IDX = IGDRY
+!$acc end atomic
+    I1(IDX) = JJ
+    GDRY(JJ) = .TRUE.
+  END IF
+END DO
+!$acc end kernels
+!PW: BUG: this is necessary to get correct results (PGI 18.10)
+! !$acc update self(GDRY,IGDRY)
+!$acc update self(IGDRY)
+IF(JJ==-999) print *,'PW: IGDRY=',IGDRY,COUNT(GDRY)
+! !$acc kernels
 IF(GDSOFT) THEN
+!$acc kernels
   WHERE(.NOT. GDRY(:))
     PRG_TEND(:, IRSDRYG)=0.
     PRG_TEND(:, IRSWETG)=0.
   END WHERE
+!$acc end kernels
 ELSE
+!$acc kernels
   PRG_TEND(:, IRSDRYG)=0.
   PRG_TEND(:, IRSWETG)=0.
-  IGDRY=COUNT(GDRY(:))
+!$acc end kernels
   IF(IGDRY>0)THEN
+!$acc kernels
     !
     !*       6.2.3  select the (PLBDAG,PLBDAS) couplet
     !
@@ -359,13 +382,9 @@ ELSE
     ZVEC1(1:IGDRY)=PACK(PLBDAG(:), MASK=GDRY(:))
     ZVEC2(1:IGDRY)=PACK(PLBDAS(:), MASK=GDRY(:))
 #else
-    IDX = 0
-    DO JJ = 1,KSIZE
-      IF (GDRY(JJ)) THEN
-        IDX = IDX+1
-        ZVEC1(IDX) = PLBDAG(JJ)
-        ZVEC2(IDX) = PLBDAS(JJ)
-      END IF
+    DO JJ = 1, IGDRY
+      ZVEC1(JJ) = PLBDAG(I1(JJ))
+      ZVEC2(JJ) = PLBDAS(I1(JJ))
     END DO
 #endif
     !
@@ -405,14 +424,9 @@ ELSE
 #ifndef _OPENACC
     ZZW(:)=UNPACK(VECTOR=ZVEC3(1:IGDRY), MASK=GDRY(:), FIELD=0.0)
 #else
-    IDX = 0
-    DO JJ=1,KSIZE
-      IF (GDRY(JJ)) THEN
-        IDX = IDX + 1
-        ZZW(JJ) = ZVEC3(IDX)
-      ELSE
-        ZZW(JJ) = 0.
-      END IF
+    ZZW(:) = 0.
+    DO JJ = 1, IGDRY
+      ZZW(I1(JJ)) = ZVEC3(JJ)
     END DO
 #endif
     !
@@ -437,21 +451,45 @@ ELSE
       PRG_TEND(:, IRSDRYG)=PRG_TEND(:, IRSWETG)*XCOLSG*BR_EXP(XCOLEXSG*(PT(:)-XTT))
 #endif
     END WHERE
+!$acc end kernels
   ENDIF
 ENDIF
+!$acc kernels
 !
 !*       6.2.6  accretion of raindrops on the graupeln
 !
-GDRY(:)=PRRT(:)>XRTMIN(3) .AND. PRGT(:)>XRTMIN(6) .AND. LDCOMPUTE(:)
+IGDRY = 0
+GDRY(:) = .FALSE.
+!$acc loop private(IDX) independent
+DO JJ = 1, SIZE(GDRY)
+  IF (PRRT(JJ)>XRTMIN(3) .AND. PRGT(JJ)>XRTMIN(6) .AND. LDCOMPUTE(JJ)) THEN
+!$acc atomic capture
+    IGDRY = IGDRY + 1
+    IDX = IGDRY
+!$acc end atomic
+    I1(IDX) = JJ
+    GDRY(JJ) = .TRUE.
+  END IF
+END DO
+!$acc end kernels
+!PW: BUG: this is necessary to get correct results (PGI 18.10)
+! !$acc update self(GDRY,IGDRY)
+!$acc update self(IGDRY)
+IF(JJ==-999) print *,'PW: IGDRY=',IGDRY,COUNT(GDRY)
+! !$acc kernels
 IF(GDSOFT) THEN
+!$acc kernels
   WHERE(.NOT. GDRY(:))
     PRG_TEND(:, IRRDRYG)=0.
   END WHERE
+!$acc end kernels
 ELSE
+!$acc kernels
   PRG_TEND(:, IRRDRYG)=0.
-  IGDRY=COUNT(GDRY(:))
+!$acc end kernels
   !
   IF(IGDRY>0) THEN
+!$acc kernels
     !
     !*       6.2.8  select the (PLBDAG,PLBDAR) couplet
     !
@@ -459,13 +497,9 @@ ELSE
     ZVEC1(1:IGDRY)=PACK(PLBDAG(:), MASK=GDRY(:))
     ZVEC2(1:IGDRY)=PACK(PLBDAR(:), MASK=GDRY(:))
 #else
-    IDX = 0
-    DO JJ = 1,KSIZE
-      IF (GDRY(JJ)) THEN
-        IDX = IDX+1
-        ZVEC1(IDX) = PLBDAG(JJ)
-        ZVEC2(IDX) = PLBDAR(JJ)
-      END IF
+    DO JJ = 1, IGDRY
+      ZVEC1(JJ) = PLBDAG(I1(JJ))
+      ZVEC2(JJ) = PLBDAR(I1(JJ))
     END DO
 #endif
     !
@@ -505,14 +539,9 @@ ELSE
 #ifndef _OPENACC
     ZZW(:)=UNPACK(VECTOR=ZVEC3(1:IGDRY), MASK=GDRY, FIELD=0.)
 #else
-    IDX = 0
-    DO JJ=1,KSIZE
-      IF (GDRY(JJ)) THEN
-        IDX = IDX + 1
-        ZZW(JJ) = ZVEC3(IDX)
-      ELSE
-        ZZW(JJ) = 0.
-      END IF
+    ZZW(:) = 0.
+    DO JJ = 1, IGDRY
+      ZZW(I1(JJ)) = ZVEC3(JJ)
     END DO
 #endif
     !
@@ -533,8 +562,10 @@ ELSE
                        XLBRDRYG3/               BR_P2(PLBDAR(:))  )
 #endif
     END WHERE
+!$acc end kernels
   ENDIF
 ENDIF
+!$acc kernels
 
 ZRDRYG_INIT(:)=PRG_TEND(:, IRCDRYG)+PRG_TEND(:, IRIDRYG)+PRG_TEND(:, IRSDRYG)+PRG_TEND(:, IRRDRYG)
 
diff --git a/src/MNH/ice4_fast_rh.f90 b/src/MNH/ice4_fast_rh.f90
index 732164c1c..5eaa5e4c7 100644
--- a/src/MNH/ice4_fast_rh.f90
+++ b/src/MNH/ice4_fast_rh.f90
@@ -160,6 +160,7 @@ INTEGER, PARAMETER :: IRCWETH=1, IRRWETH=2, IRIDRYH=3, IRIWETH=4, IRSDRYH=5, IRS
 !
 INTEGER                            :: IHAIL, IGWET
 INTEGER                            :: IDX, JJ
+INTEGER, DIMENSION(SIZE(PRHODREF)) :: I1
 INTEGER, DIMENSION(SIZE(PRHODREF)) :: IVEC1, IVEC2
 LOGICAL                            :: GCONVHG,GDSOFT,GEVLIMIT,GNULLWETH,GWETHPOST !Workaround of PGI bug with OpenACC (at least up to 18.10 version)
 LOGICAL, DIMENSION(SIZE(PRHODREF)) :: GHAIL, GWET, GMASK, LLWETH, LLDRYH
@@ -168,7 +169,7 @@ REAL, DIMENSION(SIZE(PRHODREF))    :: ZZW, &
                                       ZRDRYH_INIT, ZRWETH_INIT, &
                                       ZRDRYHG
 !
-!$acc declare create(IHAIL,IGWET,IDX,JJ,IVEC1,IVEC2,GHAIL,GWET,GMASK,LLWETH,LLDRYH, &
+!$acc declare create(IHAIL,IGWET,I1,IVEC1,IVEC2,GHAIL,GWET,GMASK,LLWETH,LLDRYH, &
 !$acc&               ZVEC1,ZVEC2,ZVEC3,ZZW,ZRDRYH_INIT,ZRWETH_INIT,ZRDRYHG)
 !
 !$acc declare copyin(XALPI,XALPW,XBETAI,XBETAW,XGAMW,XCI,XCL,XCPV,XESTT,XGAMI,XLMTT,XLVTT,XMD,XMV,XRV,XTT,           &
@@ -271,17 +272,39 @@ ENDIF
 !
 !*       7.2.1  accretion of aggregates on the hailstones
 !
-GWET(:) = PRHT(:)>XRTMIN(7) .AND. PRST(:)>XRTMIN(5) .AND. LDCOMPUTE(:)
+IGWET = 0
+GWET(:) = .FALSE.
+!$acc loop private(IDX) independent
+DO JJ = 1, SIZE(GWET)
+  IF (PRHT(JJ)>XRTMIN(7) .AND. PRST(JJ)>XRTMIN(5) .AND. LDCOMPUTE(JJ)) THEN
+!$acc atomic capture
+    IGWET = IGWET + 1
+    IDX = IGWET
+!$acc end atomic
+    I1(IDX) = JJ
+    GWET(JJ) = .TRUE.
+  END IF
+END DO
+!$acc end kernels
+!PW: BUG: this is necessary to get correct results (PGI 18.10)
+! !$acc update self(GWET,IGWET)
+!$acc update self(IGWET)
+IF(JJ==-999) print *,'PW: IGWET=',IGWET,COUNT(GWET)
+! !$acc kernels
 IF(GDSOFT) THEN
+!$acc kernels
   WHERE(.NOT. GWET(:))
     PRH_TEND(:, IRSWETH)=0.
     PRH_TEND(:, IRSDRYH)=0.
   END WHERE
+!$acc end kernels
 ELSE
+!$acc kernels
   PRH_TEND(:, IRSWETH)=0.
   PRH_TEND(:, IRSDRYH)=0.
-  IGWET=COUNT(GWET(:))
+!$acc end kernels
   IF(IGWET>0)THEN
+!$acc kernels
     !
     !*       7.2.3  select the (PLBDAH,PLBDAS) couplet
     !
@@ -289,13 +312,9 @@ ELSE
     ZVEC1(1:IGWET) = PACK( PLBDAH(:),MASK=GWET(:) )
     ZVEC2(1:IGWET) = PACK( PLBDAS(:),MASK=GWET(:) )
 #else
-    IDX = 0
-    DO JJ = 1,KSIZE
-      IF (GWET(JJ)) THEN
-        IDX = IDX+1
-        ZVEC1(IDX) = PLBDAH(JJ)
-        ZVEC2(IDX) = PLBDAS(JJ)
-      END IF
+    DO JJ = 1, IGWET
+      ZVEC1(JJ) = PLBDAH(I1(JJ))
+      ZVEC2(JJ) = PLBDAS(I1(JJ))
     END DO
 #endif
     !
@@ -335,14 +354,9 @@ ELSE
 #ifndef _OPENACC
     ZZW(:) = UNPACK( VECTOR=ZVEC3(1:IGWET),MASK=GWET,FIELD=0.0 )
 #else
-    IDX = 0
-    DO JJ=1,KSIZE
-      IF (GWET(JJ)) THEN
-        IDX = IDX + 1
-        ZZW(JJ) = ZVEC3(IDX)
-      ELSE
-        ZZW(JJ) = 0.
-      END IF
+    ZZW(:) = 0.
+    DO JJ = 1, IGWET
+      ZZW(I1(JJ)) = ZVEC3(JJ)
     END DO
 #endif
     !
@@ -365,22 +379,46 @@ ELSE
       PRH_TEND(:, IRSDRYH)=PRH_TEND(:, IRSWETH)*(XCOLSH*BR_EXP(XCOLEXSH*(PT(:)-XTT)))
 #endif
     END WHERE
+!$acc end kernels
   ENDIF
 ENDIF
 !
 !*       7.2.6  accretion of graupeln on the hailstones
 !
-GWET(:) = PRHT(:)>XRTMIN(7) .AND. PRGT(:)>XRTMIN(6) .AND. LDCOMPUTE(:)
+!$acc kernels
+IGWET = 0
+GWET(:) = .FALSE.
+!$acc loop private(IDX) independent
+DO JJ = 1, SIZE(GWET)
+  IF (PRHT(JJ)>XRTMIN(7) .AND. PRGT(JJ)>XRTMIN(6) .AND. LDCOMPUTE(JJ)) THEN
+!$acc atomic capture
+    IGWET = IGWET + 1
+    IDX = IGWET
+!$acc end atomic
+    I1(IDX) = JJ
+    GWET(JJ) = .TRUE.
+  END IF
+END DO
+!$acc end kernels
+!PW: BUG: this is necessary to get correct results (PGI 18.10)
+! !$acc update self(GWET,IGWET)
+!$acc update self(IGWET)
+IF(JJ==-999) print *,'PW: IGWET=',IGWET,COUNT(GWET)
+! !$acc kernels
 IF(GDSOFT) THEN
+!$acc kernels
   WHERE(.NOT. GWET(:))
     PRH_TEND(:, IRGWETH)=0.
     PRH_TEND(:, IRGDRYH)=0.
   END WHERE
+!$acc end kernels
 ELSE
+!$acc kernels
   PRH_TEND(:, IRGWETH)=0.
   PRH_TEND(:, IRGDRYH)=0.
-  IGWET=COUNT(GWET(:))
+!$acc end kernels
   IF(IGWET>0)THEN
+!$acc kernels
     !
     !*       7.2.8  select the (PLBDAH,PLBDAG) couplet
     !
@@ -388,13 +426,9 @@ ELSE
     ZVEC1(1:IGWET) = PACK( PLBDAH(:),MASK=GWET(:) )
     ZVEC2(1:IGWET) = PACK( PLBDAG(:),MASK=GWET(:) )
 #else
-    IDX = 0
-    DO JJ = 1,KSIZE
-      IF (GWET(JJ)) THEN
-        IDX = IDX+1
-        ZVEC1(IDX) = PLBDAH(JJ)
-        ZVEC2(IDX) = PLBDAG(JJ)
-      END IF
+    DO JJ = 1, IGWET
+      ZVEC1(JJ) = PLBDAH(I1(JJ))
+      ZVEC2(JJ) = PLBDAG(I1(JJ))
     END DO
 #endif
     !
@@ -434,14 +468,9 @@ ELSE
 #ifndef _OPENACC
     ZZW(:) = UNPACK( VECTOR=ZVEC3(1:IGWET),MASK=GWET,FIELD=0.0 )
 #else
-    IDX = 0
-    DO JJ=1,KSIZE
-      IF (GWET(JJ)) THEN
-        IDX = IDX + 1
-        ZZW(JJ) = ZVEC3(IDX)
-      ELSE
-        ZZW(JJ) = 0.
-      END IF
+    ZZW(:) = 0.
+    DO JJ = 1, IGWET
+      ZZW(I1(JJ)) = ZVEC3(JJ)
     END DO
 #endif
     !
@@ -471,20 +500,44 @@ ELSE
       PRH_TEND(:, IRGDRYH)=PRH_TEND(:, IRGDRYH)*(XCOLGH*BR_EXP(XCOLEXGH*(PT(:)-XTT)))
 #endif
     END WHERE
+!$acc end kernels
   END IF
 ENDIF
+!$acc kernels
 !
 !*       7.2.11  accretion of raindrops on the hailstones
 !
-GWET(:) = PRHT(:)>XRTMIN(7) .AND. PRRT(:)>XRTMIN(3) .AND. LDCOMPUTE(:)
+IGWET = 0
+GWET(:) = .FALSE.
+!$acc loop private(IDX) independent
+DO JJ = 1, SIZE(GWET)
+  IF (PRHT(JJ)>XRTMIN(7) .AND. PRRT(JJ)>XRTMIN(3) .AND. LDCOMPUTE(JJ)) THEN
+!$acc atomic capture
+    IGWET = IGWET + 1
+    IDX = IGWET
+!$acc end atomic
+    I1(IDX) = JJ
+    GWET(JJ) = .TRUE.
+  END IF
+END DO
+!$acc end kernels
+!PW: BUG: this is necessary to get correct results (PGI 18.10)
+! !$acc update self(GWET,IGWET)
+!$acc update self(IGWET)
+IF(JJ==-999) print *,'PW: IGWET=',IGWET,COUNT(GWET)
+! !$acc kernels
 IF(GDSOFT) THEN
+!$acc kernels
   WHERE(.NOT. GWET(:))
     PRH_TEND(:, IRRWETH)=0.
   END WHERE
+!$acc end kernels
 ELSE
+!$acc kernels
   PRH_TEND(:, IRRWETH)=0.
-  IGWET=COUNT(GWET(:))
+!$acc end kernels
   IF(IGWET>0)THEN
+!$acc kernels
     !
     !*       7.2.12  select the (PLBDAH,PLBDAR) couplet
     !
@@ -492,13 +545,9 @@ ELSE
     ZVEC1(1:IGWET)=PACK(PLBDAH(:), MASK=GWET(:))
     ZVEC2(1:IGWET)=PACK(PLBDAR(:), MASK=GWET(:))
 #else
-    IDX = 0
-    DO JJ = 1,KSIZE
-      IF (GWET(JJ)) THEN
-        IDX = IDX+1
-        ZVEC1(IDX) = PLBDAH(JJ)
-        ZVEC2(IDX) = PLBDAR(JJ)
-      END IF
+    DO JJ = 1, IGWET
+      ZVEC1(JJ) = PLBDAH(I1(JJ))
+      ZVEC2(JJ) = PLBDAR(I1(JJ))
     END DO
 #endif
     !
@@ -538,14 +587,9 @@ ELSE
 #ifndef _OPENACC
     ZZW(:)=UNPACK(VECTOR=ZVEC3(1:IGWET), MASK=GWET, FIELD=0.)
 #else
-    IDX = 0
-    DO JJ=1,KSIZE
-      IF (GWET(JJ)) THEN
-        IDX = IDX + 1
-        ZZW(JJ) = ZVEC3(IDX)
-      ELSE
-        ZZW(JJ) = 0.
-      END IF
+    ZZW(:) = 0.
+    DO JJ = 1, IGWET
+      ZZW(I1(JJ)) = ZVEC3(JJ)
     END DO
 #endif
     !
@@ -566,8 +610,10 @@ ELSE
                        XLBRWETH3/               BR_P2(PLBDAR(:)) )
 #endif
     END WHERE
+!$acc end kernels
   ENDIF
 ENDIF
+!$acc kernels
 !
 ZRDRYH_INIT(:)=PRH_TEND(:, IRCWETH)+PRH_TEND(:, IRIDRYH)+PRH_TEND(:, IRSDRYH)+PRH_TEND(:, IRRWETH)+PRH_TEND(:, IRGDRYH)
 !
diff --git a/src/MNH/ice4_fast_rs.f90 b/src/MNH/ice4_fast_rs.f90
index ca706b991..ce5262a0c 100644
--- a/src/MNH/ice4_fast_rs.f90
+++ b/src/MNH/ice4_fast_rs.f90
@@ -142,13 +142,15 @@ INTEGER, PARAMETER :: IRCRIMS=1, IRCRIMSS=2, IRSRIMCG=3, IRRACCS=4, IRRACCSS=5,
 !
 INTEGER                            :: IGRIM, IGACC
 INTEGER                            :: IDX, JJ
+INTEGER                            :: ISIZE
+INTEGER, DIMENSION(SIZE(PRHODREF)) :: I1
 INTEGER, DIMENSION(SIZE(PRHODREF)) :: IVEC1, IVEC2
 LOGICAL                            :: GDSOFT,GEVLIMIT !Workaround of PGI bug with OpenACC (at least up to 18.10 version)
 LOGICAL, DIMENSION(SIZE(PRHODREF)) :: GRIM, GACC, GMASK
 REAL, DIMENSION(SIZE(PRHODREF))    :: ZVEC1, ZVEC2, ZVEC3
 REAL, DIMENSION(SIZE(PRHODREF))    :: ZZW, ZZW2, ZZW6, ZFREEZ_RATE
 !
-!$acc declare create(IGRIM,IGACC,IDX,JJ,IVEC1,IVEC2,GRIM,GACC,GMASK,ZVEC1,ZVEC2,ZVEC3,ZZW,ZZW2,ZZW6,ZFREEZ_RATE)
+!$acc declare create(IGRIM,IGACC,I1,IVEC1,IVEC2,GRIM,GACC,GMASK,ZVEC1,ZVEC2,ZVEC3,ZZW,ZZW2,ZZW6,ZFREEZ_RATE)
 !
 !$acc declare copyin(XALPI,XALPW,XBETAI,XBETAW,XCI,XCL,XCPV,XESTT,XGAMI,XGAMW,XLMTT,XLVTT,XMD,XMV,XRV,XTT,  &
 !$acc&               LEVLIMIT, CSNOWRIMING,                                                                 &
@@ -194,6 +196,9 @@ END IF
 !PW: bug?: for the moment I take the value from the CPU as OK. GPU value seems to be lost when this subroutine is called
 GDSOFT = LDSOFT;GEVLIMIT = LEVLIMIT !Workaround of PGI bug with OpenACC (at least up to 18.10 version)
 ! !$acc end kernels
+!$acc kernels
+ISIZE = KSIZE
+!$acc end kernels
 !
 !*       5.0    maximum freezing rate
 !
@@ -234,34 +239,52 @@ END WHERE
 !
 !*       5.1    cloud droplet riming of the aggregates
 !
-GRIM(:) = PRCT(:)>XRTMIN(2) .AND. PRST(:)>XRTMIN(5) .AND. LDCOMPUTE(:)
+IGRIM = 0
+GRIM(:) = .FALSE.
+!$acc loop private(IDX) independent
+DO JJ = 1, SIZE(GRIM)
+  IF (PRCT(JJ)>XRTMIN(2) .AND. PRST(JJ)>XRTMIN(5) .AND. LDCOMPUTE(JJ)) THEN
+!$acc atomic capture
+    IGRIM = IGRIM + 1
+    IDX = IGRIM
+!$acc end atomic
+    I1(IDX) = JJ
+    GRIM(JJ) = .TRUE.
+  END IF
+END DO
+!$acc end kernels
+!PW: BUG: this is necessary to get correct results (PGI 18.10)
+! !$acc update self(GRIM,IGRIM)
+!$acc update self(IGRIM)
+IF(JJ==-999) print *,'PW: IGRIM=',IGRIM,COUNT(GRIM)
+! !$acc kernels
 !
 ! Collection of cloud droplets by snow: this rate is used for riming (T<0) and for conversion/melting (T>0)
 IF(GDSOFT) THEN
+!$acc kernels
   WHERE(.NOT. GRIM(:))
     PRS_TEND(:, IRCRIMS)=0.
     PRS_TEND(:, IRCRIMSS)=0.
     PRS_TEND(:, IRSRIMCG)=0.
   END WHERE
+!$acc end kernels
 ELSE
+!$acc kernels
   PRS_TEND(:, IRCRIMS)=0.
   PRS_TEND(:, IRCRIMSS)=0.
   PRS_TEND(:, IRSRIMCG)=0.
-  IGRIM = COUNT(GRIM(:))
+!$acc end kernels
   !
   IF(IGRIM>0) THEN
     !
     !        5.1.1  select the PLBDAS
     !
+!$acc kernels
 #ifndef _OPENACC
     ZVEC1(1:IGRIM) = PACK( PLBDAS(:),MASK=GRIM(:) )
 #else
-    IDX = 0
-    DO JJ = 1,KSIZE
-      IF (GRIM(JJ)) THEN
-        IDX = IDX+1
-        ZVEC1(IDX) = PLBDAS(JJ)
-      END IF
+    DO JJ = 1, IGRIM
+      ZVEC1(JJ) = PLBDAS(I1(JJ))
     END DO
 #endif
     !
@@ -286,14 +309,9 @@ ELSE
 #ifndef _OPENACC
     ZZW(:) = UNPACK( VECTOR=ZVEC1(1:IGRIM),MASK=GRIM,FIELD=0.0 )
 #else
-    IDX = 0
-    DO JJ=1,KSIZE
-      IF (GRIM(JJ)) THEN
-        IDX = IDX + 1
-        ZZW(JJ) = ZVEC1(IDX)
-      ELSE
-        ZZW(JJ) = 0.
-      END IF
+    ZZW(:) = 0.
+    DO JJ = 1, IGRIM
+      ZZW(I1(JJ)) = ZVEC1(JJ)
     END DO
 #endif
     !
@@ -319,14 +337,9 @@ ELSE
 #ifndef _OPENACC
     ZZW(:) = UNPACK( VECTOR=ZVEC1(1:IGRIM),MASK=GRIM,FIELD=0.0 )
 #else
-    IDX = 0
-    DO JJ=1,KSIZE
-      IF (GRIM(JJ)) THEN
-        IDX = IDX + 1
-        ZZW(JJ) = ZVEC1(IDX)
-      ELSE
-        ZZW(JJ) = 0.
-      END IF
+    ZZW(:) = 0.
+    DO JJ = 1, IGRIM
+      ZZW(I1(JJ)) = ZVEC1(JJ)
     END DO
 #endif
 
@@ -335,14 +348,9 @@ ELSE
 #ifndef _OPENACC
     ZZW2(:) = UNPACK( VECTOR=ZVEC1(1:IGRIM),MASK=GRIM,FIELD=0.0)
 #else
-    IDX = 0
-    DO JJ=1,KSIZE
-      IF (GRIM(JJ)) THEN
-        IDX = IDX + 1
-        ZZW2(JJ) = ZVEC1(IDX)
-      ELSE
-        ZZW2(JJ) = 0.
-      END IF
+    ZZW2(:) = 0.
+    DO JJ = 1, IGRIM
+      ZZW2(I1(JJ)) = ZVEC1(JJ)
     END DO
 #endif
     !
@@ -381,9 +389,11 @@ ELSE
     ELSE
       PRS_TEND(:, IRSRIMCG)=0.
     END IF
+!$acc end kernels
   ENDIF
 ENDIF
 !
+!$acc kernels
 GRIM(:) = GRIM(:) .AND. PT(:)<XTT ! More restrictive GRIM mask to be used for riming by negative temperature only
 PRCRIMSS(:)=0.
 PRCRIMSG(:)=0.
@@ -411,20 +421,42 @@ PA_TH(:) = PA_TH(:) + PRCRIMSG(:)*(PLSFACT(:)-PLVFACT(:))
 !
 !*       5.2    rain accretion onto the aggregates
 !
-GACC(:) = PRRT(:)>XRTMIN(3) .AND. PRST(:)>XRTMIN(5) .AND. LDCOMPUTE(:)
+IGACC = 0
+GACC(:) = .FALSE.
+!$acc loop private(IDX) independent
+DO JJ = 1, SIZE(GACC)
+  IF (PRRT(JJ)>XRTMIN(3) .AND. PRST(JJ)>XRTMIN(5) .AND. LDCOMPUTE(JJ)) THEN
+!$acc atomic capture
+    IGACC = IGACC + 1
+    IDX = IGACC
+!$acc end atomic
+    I1(IDX) = JJ
+    GACC(JJ) = .TRUE.
+  END IF
+END DO
+!$acc end kernels
+!PW: BUG: this is necessary to get correct results (PGI 18.10)
+! !$acc update self(GACC,IGACC)
+!$acc update self(IGACC)
+IF(JJ==-999) print *,'PW: IGACC=',IGACC,COUNT(GACC)
+! !$acc kernels
 IF(GDSOFT) THEN
+!$acc kernels
   GMASK(:) = .NOT. GACC(:)
   WHERE(GMASK(:))
     PRS_TEND(:, IRRACCS)=0.
     PRS_TEND(:, IRRACCSS)=0.
     PRS_TEND(:, IRSACCRG)=0.
   END WHERE
+!$acc end kernels
 ELSE
+!$acc kernels
   PRS_TEND(:, IRRACCS)=0.
   PRS_TEND(:, IRRACCSS)=0.
   PRS_TEND(:, IRSACCRG)=0.
-  IGACC = COUNT(GACC(:))
+!$acc end kernels
   IF(IGACC>0)THEN
+!$acc kernels
     !
     !
     !        5.2.1  select the (PLBDAS,PLBDAR) couplet
@@ -433,13 +465,9 @@ ELSE
     ZVEC1(1:IGACC) = PACK( PLBDAS(:),MASK=GACC(:) )
     ZVEC2(1:IGACC) = PACK( PLBDAR(:),MASK=GACC(:) )
 #else
-    IDX = 0
-    DO JJ = 1,KSIZE
-      IF (GACC(JJ)) THEN
-        IDX = IDX+1
-        ZVEC1(IDX) = PLBDAS(JJ)
-        ZVEC2(IDX) = PLBDAR(JJ)
-      END IF
+    DO JJ = 1, IGACC
+      ZVEC1(JJ) = PLBDAS(I1(JJ))
+      ZVEC2(JJ) = PLBDAR(I1(JJ))
     END DO
 #endif
     !
@@ -479,14 +507,9 @@ ELSE
 #ifndef _OPENACC
     ZZW(:) = UNPACK( VECTOR=ZVEC3(1:IGACC),MASK=GACC,FIELD=0.0 )
 #else
-    IDX = 0
-    DO JJ=1,KSIZE
-      IF (GACC(JJ)) THEN
-        IDX = IDX + 1
-        ZZW(JJ) = ZVEC3(IDX)
-      ELSE
-        ZZW(JJ) = 0.
-      END IF
+    ZZW(:) = 0.
+    DO JJ = 1, IGACC
+      ZZW(I1(JJ)) = ZVEC3(JJ)
     END DO
 #endif
     !
@@ -523,14 +546,9 @@ ELSE
 #ifndef _OPENACC
     ZZW(:) = UNPACK( VECTOR=ZVEC3(1:IGACC),MASK=GACC(:),FIELD=0.0 )
 #else
-    IDX = 0
-    DO JJ=1,KSIZE
-      IF (GACC(JJ)) THEN
-        IDX = IDX + 1
-        ZZW(JJ) = ZVEC3(IDX)
-      ELSE
-        ZZW(JJ) = 0.
-      END IF
+    ZZW(:) = 0.
+    DO JJ = 1, IGACC
+      ZZW(I1(JJ)) = ZVEC3(JJ)
     END DO
 #endif
     WHERE(GACC(:))
@@ -550,14 +568,9 @@ ELSE
 #ifndef _OPENACC
     ZZW(:) = UNPACK( VECTOR=ZVEC3(1:IGACC),MASK=GACC,FIELD=0.0 )
 #else
-    IDX = 0
-    DO JJ=1,KSIZE
-      IF (GACC(JJ)) THEN
-        IDX = IDX + 1
-        ZZW(JJ) = ZVEC3(IDX)
-      ELSE
-        ZZW(JJ) = 0.
-      END IF
+    ZZW(:) = 0.
+    DO JJ = 1, IGACC
+      ZZW(I1(JJ)) = ZVEC3(JJ)
     END DO
 #endif
     !
@@ -579,8 +592,10 @@ ELSE
             XLBSACCR3/               BR_P2(PLBDAS(:)) )/PLBDAR(:)
 #endif
     END WHERE
+!$acc end kernels
   ENDIF
 ENDIF
+!$acc kernels
 !
 GACC(:) = GACC(:) .AND. PT(:)<XTT ! More restrictive GACC mask to be used for accretion by negative temperature only
 PRRACCSS(:)=0.
-- 
GitLab