diff --git a/MY_RUN/BENCH/CINES_MNH55X/004_Hector_128x128_MNH-55X/003_mesonh_step1/EXSEG1.nam b/MY_RUN/BENCH/CINES_MNH55X/004_Hector_128x128_MNH-55X/003_mesonh_step1/EXSEG1.nam index cbfeca7d31195ad0c0dfbc8ba72acc279b1083f4..e20de675e344204737c1cb73fdce2a285b4f49f8 100644 --- a/MY_RUN/BENCH/CINES_MNH55X/004_Hector_128x128_MNH-55X/003_mesonh_step1/EXSEG1.nam +++ b/MY_RUN/BENCH/CINES_MNH55X/004_Hector_128x128_MNH-55X/003_mesonh_step1/EXSEG1.nam @@ -1,14 +1,15 @@ &NAM_CONFZ ! NB_PROCIO_R=1 , ! NB_PROCIO_W=1 , - ! LMNH_MPI_BSEND = T , + LMNH_MPI_BSEND = F , + LMNH_MPI_ALLTOALLV_REMAP=T ! MPI_BUFFER_SIZE = 40 / &NAM_LUNITn CINIFILE = "DA0128" , CINIFILEPGD = "PGD00128" / &NAM_CONFn LUSERV = T, LUSERC = T, LUSERR = T, LUSERI = T, LUSERS = T, LUSERG = T, LUSERH = F, LUSECI = T / -&NAM_DYNn XTSTEP = 15.00000 , +&NAM_DYNn XTSTEP = 10.00000 , CPRESOPT = "ZRESI", NITR = 12 !CPRESOPT = "ZSOLV", NITR = 12 XRELAX = 1., LHORELAX_UVWTH = T, LHORELAX_RV = T, LVE_RELAX = T, @@ -19,8 +20,11 @@ CRAD = "ECMW", CCLOUD = "ICE3" ! "NONE" ! "ICE3", CDCONV = "NONE", CSCONV = "NONE" / +&NAM_PARAM_ICE + LRED = F +/ &NAM_PARAM_RADn XDTRAD = 1800., XDTRAD_CLONLY = 1800., LCLEAR_SKY = F, - NRAD_COLNBR = 500 / + NRAD_COLNBR = 40 / &NAM_PARAM_MFSHALLn XIMPL_MF = 1, CMF_UPDRAFT = "EDKF", CMF_CLOUD = "NONE", LMIXUV = T, LMF_FLX = F / &NAM_LBCn CLBCX = 2*"OPEN", CLBCY = 2*"OPEN", XCPHASE = 20. / diff --git a/MY_RUN/BENCH/CINES_MNH55X/004_Hector_128x128_MNH-55X/003_mesonh_step1/parameters_mg.nam b/MY_RUN/BENCH/CINES_MNH55X/004_Hector_128x128_MNH-55X/003_mesonh_step1/parameters_mg.nam index 5d76f4dff351b9c8873c37072d3f1f0747877d70..6fc7d78601d4f0e0156cfca0d400e2ee8eaab53a 100644 --- a/MY_RUN/BENCH/CINES_MNH55X/004_Hector_128x128_MNH-55X/003_mesonh_step1/parameters_mg.nam +++ b/MY_RUN/BENCH/CINES_MNH55X/004_Hector_128x128_MNH-55X/003_mesonh_step1/parameters_mg.nam @@ -84,7 +84,7 @@ ! parameters of the smoother ! ¶meters_smoother - smoother = 3, ! Smoother method + smoother = 6, ! Smoother method ! 3 = line SOR ! 4 = line SSOR ! 6 = line Jacobi @@ -102,6 +102,7 @@ ¶meters_multigrid verbose = 10, ! Verbosity level n_lev = 8, ! Number of levels + iswitch_cpu_gpu = 100 ! /!\ =100 deactive switch cpu <-> gpu computation | work only with MANAGED Memory + nvhpc lev_split = 3, ! First level where data is pulled together n_presmooth = 1, ! Number of presmoothing steps n_postsmooth = 1, ! Number of postsmoothing steps diff --git a/MY_RUN/BENCH/CINES_MNH55X/004_Hector_128x128_MNH-55X/004_mesonh_step2/EXSEG1.nam b/MY_RUN/BENCH/CINES_MNH55X/004_Hector_128x128_MNH-55X/004_mesonh_step2/EXSEG1.nam index f2dcd82899ee955178d92a7bf90d484ac6c78920..eaacc47c3d7c223177ce59b1e8bdb4638b551de0 100644 --- a/MY_RUN/BENCH/CINES_MNH55X/004_Hector_128x128_MNH-55X/004_mesonh_step2/EXSEG1.nam +++ b/MY_RUN/BENCH/CINES_MNH55X/004_Hector_128x128_MNH-55X/004_mesonh_step2/EXSEG1.nam @@ -4,7 +4,8 @@ &NAM_CONFZ ! NB_PROCIO_R=1 , ! NB_PROCIO_W=1 , - LMNH_MPI_BSEND = F , + LMNH_MPI_BSEND = F , + LMNH_MPI_ALLTOALLV_REMAP=T ! MPI_BUFFER_SIZE = 40 !NZ_PROC=1 / @@ -12,7 +13,7 @@ &NAM_LUNITn CINIFILE = "D0128.1.DAR01.004" , CINIFILEPGD = "PGD00128" / &NAM_CONFn LUSERV = T, LUSERC = T, LUSERR = T, LUSERI = T, LUSERS = T, LUSERG = T, LUSERH = F, LUSECI = T / -&NAM_DYNn XTSTEP = 15.00000 , +&NAM_DYNn XTSTEP = 10.00000 , CPRESOPT = "ZRESI", !CPRESOPT = "ZSOLV", NITR = 12, @@ -24,8 +25,11 @@ CRAD = "ECMWF", CCLOUD = "ICE3", ! "NONE" , ! "ICE3", CDCONV = "NONE", CSCONV = "NONE" / +&NAM_PARAM_ICE + LRED = F +/ &NAM_PARAM_RADn XDTRAD = 1800., XDTRAD_CLONLY = 1800., LCLEAR_SKY = F, - NRAD_COLNBR = 500 / + NRAD_COLNBR = 40 / &NAM_PARAM_MFSHALLn XIMPL_MF = 1, CMF_UPDRAFT = "EDKF", CMF_CLOUD = "NONE", LMIXUV = T, LMF_FLX = F / &NAM_LBCn CLBCX = 2*"OPEN", CLBCY = 2*"OPEN", XCPHASE = 20. / @@ -38,7 +42,7 @@ CSEG = "DAR02", CSPLIT = "BSPLITTING" NHALO=1 LCHECK = F / -&NAM_DYN XSEGLEN = 150.0 ! 150.0000 , +&NAM_DYN XSEGLEN = 1000.0 ! 150.0000 , LCORIO = T, LNUMDIFU = F, LNUMDIFTH = F, XALKTOP = 0.001, XALZBOT = 22000. / diff --git a/MY_RUN/BENCH/CINES_MNH55X/004_Hector_128x128_MNH-55X/004_mesonh_step2/parameters_mg.nam b/MY_RUN/BENCH/CINES_MNH55X/004_Hector_128x128_MNH-55X/004_mesonh_step2/parameters_mg.nam index 5d76f4dff351b9c8873c37072d3f1f0747877d70..6fc7d78601d4f0e0156cfca0d400e2ee8eaab53a 100644 --- a/MY_RUN/BENCH/CINES_MNH55X/004_Hector_128x128_MNH-55X/004_mesonh_step2/parameters_mg.nam +++ b/MY_RUN/BENCH/CINES_MNH55X/004_Hector_128x128_MNH-55X/004_mesonh_step2/parameters_mg.nam @@ -84,7 +84,7 @@ ! parameters of the smoother ! ¶meters_smoother - smoother = 3, ! Smoother method + smoother = 6, ! Smoother method ! 3 = line SOR ! 4 = line SSOR ! 6 = line Jacobi @@ -102,6 +102,7 @@ ¶meters_multigrid verbose = 10, ! Verbosity level n_lev = 8, ! Number of levels + iswitch_cpu_gpu = 100 ! /!\ =100 deactive switch cpu <-> gpu computation | work only with MANAGED Memory + nvhpc lev_split = 3, ! First level where data is pulled together n_presmooth = 1, ! Number of presmoothing steps n_postsmooth = 1, ! Number of postsmoothing steps diff --git a/MY_RUN/BENCH/CINES_MNH55X/004_Hector_256x256_MNH-55X/003_mesonh_step1/EXSEG1.nam b/MY_RUN/BENCH/CINES_MNH55X/004_Hector_256x256_MNH-55X/003_mesonh_step1/EXSEG1.nam index 1f733b80a6013aaa15ffc2721e7b9076b87bd5b8..5c4e19a79b1c280eaf231ffbb57ab0c389dc8e85 100644 --- a/MY_RUN/BENCH/CINES_MNH55X/004_Hector_256x256_MNH-55X/003_mesonh_step1/EXSEG1.nam +++ b/MY_RUN/BENCH/CINES_MNH55X/004_Hector_256x256_MNH-55X/003_mesonh_step1/EXSEG1.nam @@ -1,14 +1,15 @@ &NAM_CONFZ ! NB_PROCIO_R=1 , ! NB_PROCIO_W=1 , - ! LMNH_MPI_BSEND = T , - MPI_BUFFER_SIZE = 200 + LMNH_MPI_BSEND = F , + LMNH_MPI_ALLTOALLV_REMAP=T + ! MPI_BUFFER_SIZE = 200 / &NAM_LUNITn CINIFILE = "DA0256" , CINIFILEPGD = "PGD00256" / &NAM_CONFn LUSERV = T, LUSERC = T, LUSERR = T, LUSERI = T, LUSERS = T, LUSERG = T, LUSERH = F, LUSECI = T / -&NAM_DYNn XTSTEP = 15.00000 , +&NAM_DYNn XTSTEP = 10.00000 , CPRESOPT = "ZRESI", NITR = 12 !CPRESOPT = "ZSOLV", NITR = 12 XRELAX = 1., LHORELAX_UVWTH = T, LHORELAX_RV = T, LVE_RELAX = T, @@ -19,8 +20,11 @@ CRAD = "ECMW", CCLOUD = "ICE3" ! "NONE" ! "ICE3", CDCONV = "NONE", CSCONV = "NONE" / +&NAM_PARAM_ICE + LRED = F +/ &NAM_PARAM_RADn XDTRAD = 1800., XDTRAD_CLONLY = 1800., LCLEAR_SKY = F, - NRAD_COLNBR = 500 / + NRAD_COLNBR = 40 / &NAM_PARAM_MFSHALLn XIMPL_MF = 1, CMF_UPDRAFT = "EDKF", CMF_CLOUD = "NONE", LMIXUV = T, LMF_FLX = F / &NAM_LBCn CLBCX = 2*"OPEN", CLBCY = 2*"OPEN", XCPHASE = 20. / diff --git a/MY_RUN/BENCH/CINES_MNH55X/004_Hector_256x256_MNH-55X/003_mesonh_step1/parameters_mg.nam b/MY_RUN/BENCH/CINES_MNH55X/004_Hector_256x256_MNH-55X/003_mesonh_step1/parameters_mg.nam index 209e1d572e4d230aa3a8ad8fd4fc9450a5652197..666001d377973abca1c6bd232931a957a92ec294 100644 --- a/MY_RUN/BENCH/CINES_MNH55X/004_Hector_256x256_MNH-55X/003_mesonh_step1/parameters_mg.nam +++ b/MY_RUN/BENCH/CINES_MNH55X/004_Hector_256x256_MNH-55X/003_mesonh_step1/parameters_mg.nam @@ -84,7 +84,7 @@ ! parameters of the smoother ! ¶meters_smoother - smoother = 3, ! Smoother method + smoother = 6, ! Smoother method ! 3 = line SOR ! 4 = line SSOR ! 6 = line Jacobi @@ -104,6 +104,7 @@ ¶meters_multigrid verbose = 10, ! Verbosity level n_lev = 9, ! Number of levels + iswitch_cpu_gpu = 100 ! /!\ =100 deactive switch cpu <-> gpu computation | work only with MANAGED Memory + nvhpc lev_split = 5, ! First level where data is pulled together n_presmooth = 1, ! Number of presmoothing steps n_postsmooth = 1, ! Number of postsmoothing steps diff --git a/MY_RUN/BENCH/CINES_MNH55X/004_Hector_256x256_MNH-55X/004_mesonh_step2/EXSEG1.nam b/MY_RUN/BENCH/CINES_MNH55X/004_Hector_256x256_MNH-55X/004_mesonh_step2/EXSEG1.nam index c20340edb8d4fd2ba9b4f4b3b6974b72553f674e..0e1f86606c262ce22f364aeb42cf861f582eb417 100644 --- a/MY_RUN/BENCH/CINES_MNH55X/004_Hector_256x256_MNH-55X/004_mesonh_step2/EXSEG1.nam +++ b/MY_RUN/BENCH/CINES_MNH55X/004_Hector_256x256_MNH-55X/004_mesonh_step2/EXSEG1.nam @@ -4,7 +4,8 @@ &NAM_CONFZ ! NB_PROCIO_R=1 , ! NB_PROCIO_W=1 , - LMNH_MPI_BSEND = F , + LMNH_MPI_BSEND = F , + LMNH_MPI_ALLTOALLV_REMAP=T ! MPI_BUFFER_SIZE = 40 !NZ_PROC=1 / @@ -12,7 +13,7 @@ &NAM_LUNITn CINIFILE = "D0256.1.DAR01.004" , CINIFILEPGD = "PGD00256" / &NAM_CONFn LUSERV = T, LUSERC = T, LUSERR = T, LUSERI = T, LUSERS = T, LUSERG = T, LUSERH = F, LUSECI = T / -&NAM_DYNn XTSTEP = 15.00000 , +&NAM_DYNn XTSTEP = 10.00000 , CPRESOPT = "ZRESI", !CPRESOPT = "ZSOLV", NITR = 12, @@ -24,8 +25,11 @@ CRAD = "ECMWF", CCLOUD = "ICE3", ! "NONE" , ! "ICE3", CDCONV = "NONE", CSCONV = "NONE" / +&NAM_PARAM_ICE + LRED = F +/ &NAM_PARAM_RADn XDTRAD = 1800., XDTRAD_CLONLY = 1800., LCLEAR_SKY = F, - NRAD_COLNBR = 500 / + NRAD_COLNBR = 40 / &NAM_PARAM_MFSHALLn XIMPL_MF = 1, CMF_UPDRAFT = "EDKF", CMF_CLOUD = "NONE", LMIXUV = T, LMF_FLX = F / &NAM_LBCn CLBCX = 2*"OPEN", CLBCY = 2*"OPEN", XCPHASE = 20. / @@ -38,7 +42,7 @@ CSEG = "DAR02", CSPLIT = "BSPLITTING" NHALO=1 LCHECK = F / -&NAM_DYN XSEGLEN = 150.0 ! 150.0000 , +&NAM_DYN XSEGLEN = 1000.0 ! 150.0000 , LCORIO = T, LNUMDIFU = F, LNUMDIFTH = F, XALKTOP = 0.001, XALZBOT = 22000. / diff --git a/MY_RUN/BENCH/CINES_MNH55X/004_Hector_256x256_MNH-55X/004_mesonh_step2/parameters_mg.nam b/MY_RUN/BENCH/CINES_MNH55X/004_Hector_256x256_MNH-55X/004_mesonh_step2/parameters_mg.nam index 209e1d572e4d230aa3a8ad8fd4fc9450a5652197..291140b8af2a7ab348299dbf713d7933db911ca3 100644 --- a/MY_RUN/BENCH/CINES_MNH55X/004_Hector_256x256_MNH-55X/004_mesonh_step2/parameters_mg.nam +++ b/MY_RUN/BENCH/CINES_MNH55X/004_Hector_256x256_MNH-55X/004_mesonh_step2/parameters_mg.nam @@ -30,7 +30,7 @@ ! * Conjugate gradient parameters ! ********************************************************************* ¶meters_conjugategradient - verbose = 10, ! Verbosity level + verbose = 0 ! 10, ! Verbosity level maxiter = 5, ! Maximal number of iterations resreduction = 1.0e-1, ! Target residual reduction n_prec = 1 ! Number of smoother applications in @@ -84,7 +84,7 @@ ! parameters of the smoother ! ¶meters_smoother - smoother = 3, ! Smoother method + smoother = 6, ! Smoother method ! 3 = line SOR ! 4 = line SSOR ! 6 = line Jacobi @@ -102,8 +102,9 @@ ! * Multigrid parameters ! ********************************************************************* ¶meters_multigrid - verbose = 10, ! Verbosity level + verbose = 0 ! 10, ! Verbosity level n_lev = 9, ! Number of levels + iswitch_cpu_gpu = 100 ! /!\ =100 deactive switch cpu <-> gpu computation | work only with MANAGED Memory + nvhpc lev_split = 5, ! First level where data is pulled together n_presmooth = 1, ! Number of presmoothing steps n_postsmooth = 1, ! Number of postsmoothing steps diff --git a/MY_RUN/BENCH/CINES_MNH55X/004_Hector_512x512_MNH-55X/003_mesonh_step1/EXSEG1.nam b/MY_RUN/BENCH/CINES_MNH55X/004_Hector_512x512_MNH-55X/003_mesonh_step1/EXSEG1.nam index 878afa281253ff16017a9c0177e961b3675533fc..9068ed62f22488507480ff5c83a50c2f089d58ab 100644 --- a/MY_RUN/BENCH/CINES_MNH55X/004_Hector_512x512_MNH-55X/003_mesonh_step1/EXSEG1.nam +++ b/MY_RUN/BENCH/CINES_MNH55X/004_Hector_512x512_MNH-55X/003_mesonh_step1/EXSEG1.nam @@ -1,8 +1,9 @@ &NAM_CONFZ ! NB_PROCIO_R=1 , ! NB_PROCIO_W=1 , - ! LMNH_MPI_BSEND = T , - MPI_BUFFER_SIZE = 1000 + LMNH_MPI_BSEND = F , + LMNH_MPI_ALLTOALLV_REMAP=T + ! MPI_BUFFER_SIZE = 1000 / &NAM_LUNITn CINIFILE = "DA0512" , CINIFILEPGD = "PGD00512" / @@ -19,8 +20,11 @@ CRAD = "ECMW", CCLOUD = "ICE3" ! "NONE" ! "ICE3", CDCONV = "NONE", CSCONV = "NONE" / +&NAM_PARAM_ICE + LRED = F +/ &NAM_PARAM_RADn XDTRAD = 1800., XDTRAD_CLONLY = 1800., LCLEAR_SKY = F, - NRAD_COLNBR = 500 / + NRAD_COLNBR = 40 / &NAM_PARAM_MFSHALLn XIMPL_MF = 1, CMF_UPDRAFT = "EDKF", CMF_CLOUD = "NONE", LMIXUV = T, LMF_FLX = F / &NAM_LBCn CLBCX = 2*"OPEN", CLBCY = 2*"OPEN", XCPHASE = 20. / diff --git a/MY_RUN/BENCH/CINES_MNH55X/004_Hector_512x512_MNH-55X/003_mesonh_step1/parameters_mg.nam b/MY_RUN/BENCH/CINES_MNH55X/004_Hector_512x512_MNH-55X/003_mesonh_step1/parameters_mg.nam index 1a5d92f6f3e36ecd068964fd6fdf580cb1940fe6..c603e7d6036c2dbd7b2e768bbb94ba5f4b7ecadc 100644 --- a/MY_RUN/BENCH/CINES_MNH55X/004_Hector_512x512_MNH-55X/003_mesonh_step1/parameters_mg.nam +++ b/MY_RUN/BENCH/CINES_MNH55X/004_Hector_512x512_MNH-55X/003_mesonh_step1/parameters_mg.nam @@ -84,7 +84,7 @@ ! parameters of the smoother ! ¶meters_smoother - smoother = 3, ! Smoother method + smoother = 6, ! Smoother method ! 3 = line SOR ! 4 = line SSOR ! 6 = line Jacobi @@ -102,6 +102,7 @@ ¶meters_multigrid verbose = 10, ! Verbosity level n_lev = 10, ! Number of levels + iswitch_cpu_gpu = 100 ! /!\ =100 deactive switch cpu <-> gpu computation | work only with MANAGED Memory + nvhpc lev_split = 5, ! First level where data is pulled together n_presmooth = 1, ! Number of presmoothing steps n_postsmooth = 1, ! Number of postsmoothing steps diff --git a/MY_RUN/BENCH/CINES_MNH55X/004_Hector_512x512_MNH-55X/004_mesonh_step2/EXSEG1.nam b/MY_RUN/BENCH/CINES_MNH55X/004_Hector_512x512_MNH-55X/004_mesonh_step2/EXSEG1.nam index 8b73157a9f279cde77f15e96c489f18570dfb1dc..e6d8caaf7eb39861092548dad607c92730a656b5 100644 --- a/MY_RUN/BENCH/CINES_MNH55X/004_Hector_512x512_MNH-55X/004_mesonh_step2/EXSEG1.nam +++ b/MY_RUN/BENCH/CINES_MNH55X/004_Hector_512x512_MNH-55X/004_mesonh_step2/EXSEG1.nam @@ -4,8 +4,9 @@ &NAM_CONFZ ! NB_PROCIO_R=1 , ! NB_PROCIO_W=1 , - ! LMNH_MPI_BSEND = T , - MPI_BUFFER_SIZE = 1000 + LMNH_MPI_BSEND = F , + LMNH_MPI_ALLTOALLV_REMAP=T + ! MPI_BUFFER_SIZE = 1000 / &NAM_LUNITn CINIFILE = "D0512.1.DAR01.004" ! "D0512.1.DAR01.003" , @@ -24,8 +25,11 @@ CRAD = "ECMWF", CCLOUD = "ICE3", ! "NONE" , ! "ICE3", CDCONV = "NONE", CSCONV = "NONE" / +&NAM_PARAM_ICE + LRED = F +/ &NAM_PARAM_RADn XDTRAD = 1800., XDTRAD_CLONLY = 1800., LCLEAR_SKY = F, - NRAD_COLNBR = 500 / + NRAD_COLNBR = 40 / &NAM_PARAM_MFSHALLn XIMPL_MF = 1, CMF_UPDRAFT = "EDKF", CMF_CLOUD = "NONE", LMIXUV = T, LMF_FLX = F / &NAM_LBCn CLBCX = 2*"OPEN", CLBCY = 2*"OPEN", XCPHASE = 20. / @@ -38,7 +42,7 @@ CSEG = "DAR02", CSPLIT = "BSPLITTING" NHALO=1 LCHECK = F / -&NAM_DYN XSEGLEN = 100.0 ! 150.0000 , +&NAM_DYN XSEGLEN = 1000.0 ! 150.0000 , LCORIO = T, LNUMDIFU = F, LNUMDIFTH = F, XALKTOP = 0.001, XALZBOT = 22000. / diff --git a/MY_RUN/BENCH/CINES_MNH55X/004_Hector_512x512_MNH-55X/004_mesonh_step2/parameters_mg.nam b/MY_RUN/BENCH/CINES_MNH55X/004_Hector_512x512_MNH-55X/004_mesonh_step2/parameters_mg.nam index 1a5d92f6f3e36ecd068964fd6fdf580cb1940fe6..c603e7d6036c2dbd7b2e768bbb94ba5f4b7ecadc 100644 --- a/MY_RUN/BENCH/CINES_MNH55X/004_Hector_512x512_MNH-55X/004_mesonh_step2/parameters_mg.nam +++ b/MY_RUN/BENCH/CINES_MNH55X/004_Hector_512x512_MNH-55X/004_mesonh_step2/parameters_mg.nam @@ -84,7 +84,7 @@ ! parameters of the smoother ! ¶meters_smoother - smoother = 3, ! Smoother method + smoother = 6, ! Smoother method ! 3 = line SOR ! 4 = line SSOR ! 6 = line Jacobi @@ -102,6 +102,7 @@ ¶meters_multigrid verbose = 10, ! Verbosity level n_lev = 10, ! Number of levels + iswitch_cpu_gpu = 100 ! /!\ =100 deactive switch cpu <-> gpu computation | work only with MANAGED Memory + nvhpc lev_split = 5, ! First level where data is pulled together n_presmooth = 1, ! Number of presmoothing steps n_postsmooth = 1, ! Number of postsmoothing steps diff --git a/MY_RUN/BENCH/CINES_MNH55X/004_Hector_64x64_MNH-55X/003_mesonh_step1/EXSEG1.nam b/MY_RUN/BENCH/CINES_MNH55X/004_Hector_64x64_MNH-55X/003_mesonh_step1/EXSEG1.nam index 0898b916f1f8f098f34bdeffa2a342f7296dcd8e..907bcfee050d5e170b3172ca3a9fb654d790a981 100644 --- a/MY_RUN/BENCH/CINES_MNH55X/004_Hector_64x64_MNH-55X/003_mesonh_step1/EXSEG1.nam +++ b/MY_RUN/BENCH/CINES_MNH55X/004_Hector_64x64_MNH-55X/003_mesonh_step1/EXSEG1.nam @@ -1,14 +1,15 @@ &NAM_CONFZ ! NB_PROCIO_R=1 , ! NB_PROCIO_W=1 , - ! LMNH_MPI_BSEND = T , + LMNH_MPI_BSEND = F , + LMNH_MPI_ALLTOALLV_REMAP=T ! MPI_BUFFER_SIZE = 40 / &NAM_LUNITn CINIFILE = "DA0064" , CINIFILEPGD = "PGD00064" / &NAM_CONFn LUSERV = T, LUSERC = T, LUSERR = T, LUSERI = T, LUSERS = T, LUSERG = T, LUSERH = F, LUSECI = T / -&NAM_DYNn XTSTEP = 15.00000 , +&NAM_DYNn XTSTEP = 10.00000 , CPRESOPT = "ZRESI", NITR = 12 !CPRESOPT = "ZSOLV", NITR = 12 XRELAX = 1., LHORELAX_UVWTH = T, LHORELAX_RV = T, LVE_RELAX = T, @@ -19,8 +20,11 @@ CRAD = "ECMW" ! "NONE " ! "FIXE" ! "ECMW", CCLOUD = "ICE3" ! "NONE" ! "ICE3", CDCONV = "NONE", CSCONV = "NONE" / +&NAM_PARAM_ICE + LRED = F +/ &NAM_PARAM_RADn XDTRAD = 1800., XDTRAD_CLONLY = 1800., LCLEAR_SKY = F, - NRAD_COLNBR = 500 / + NRAD_COLNBR = 40 / &NAM_PARAM_MFSHALLn XIMPL_MF = 1, CMF_UPDRAFT = "EDKF", CMF_CLOUD = "NONE", LMIXUV = T, LMF_FLX = F / &NAM_LBCn CLBCX = 2*"OPEN", CLBCY = 2*"OPEN", XCPHASE = 20. / diff --git a/MY_RUN/BENCH/CINES_MNH55X/004_Hector_64x64_MNH-55X/003_mesonh_step1/parameters_mg.nam b/MY_RUN/BENCH/CINES_MNH55X/004_Hector_64x64_MNH-55X/003_mesonh_step1/parameters_mg.nam index da6e1ac67e7e9cd235de8d628379403ef298c3ad..2436ea121898c1c05da953cf1de0101d62e680a8 100644 --- a/MY_RUN/BENCH/CINES_MNH55X/004_Hector_64x64_MNH-55X/003_mesonh_step1/parameters_mg.nam +++ b/MY_RUN/BENCH/CINES_MNH55X/004_Hector_64x64_MNH-55X/003_mesonh_step1/parameters_mg.nam @@ -84,7 +84,7 @@ ! parameters of the smoother ! ¶meters_smoother - smoother = 3, ! Smoother method + smoother = 6, ! Smoother method ! 3 = line SOR ! 4 = line SSOR ! 6 = line Jacobi @@ -101,6 +101,7 @@ ¶meters_multigrid verbose = 10, ! Verbosity level n_lev = 7, ! Number of levels + iswitch_cpu_gpu = 100 ! /!\ =100 deactive switch cpu <-> gpu computation | work only with MANAGED Memory + nvhpc lev_split = 3, ! First level where data is pulled together n_presmooth = 1, ! Number of presmoothing steps n_postsmooth = 1, ! Number of postsmoothing steps diff --git a/MY_RUN/BENCH/CINES_MNH55X/004_Hector_64x64_MNH-55X/004_mesonh_step2/EXSEG1.nam b/MY_RUN/BENCH/CINES_MNH55X/004_Hector_64x64_MNH-55X/004_mesonh_step2/EXSEG1.nam index a46063ee7c62303314fc9fc60781fdbb0b17ed81..68a81d3c2cd7f09af2a34d3e86fc8053ea7a27b8 100644 --- a/MY_RUN/BENCH/CINES_MNH55X/004_Hector_64x64_MNH-55X/004_mesonh_step2/EXSEG1.nam +++ b/MY_RUN/BENCH/CINES_MNH55X/004_Hector_64x64_MNH-55X/004_mesonh_step2/EXSEG1.nam @@ -4,7 +4,8 @@ &NAM_CONFZ ! NB_PROCIO_R=1 , ! NB_PROCIO_W=1 , - LMNH_MPI_BSEND = F , + LMNH_MPI_BSEND = F , + LMNH_MPI_ALLTOALLV_REMAP=T ! MPI_BUFFER_SIZE = 40 !NZ_PROC=1 / @@ -12,7 +13,7 @@ &NAM_LUNITn CINIFILE = "D0064.1.DAR01.004" , CINIFILEPGD = "PGD00064" / &NAM_CONFn LUSERV = T, LUSERC = T, LUSERR = T, LUSERI = T, LUSERS = T, LUSERG = T, LUSERH = F, LUSECI = T / -&NAM_DYNn XTSTEP = 15.00000 , +&NAM_DYNn XTSTEP = 10.00000 , CPRESOPT = "ZRESI", !CPRESOPT = "ZSOLV", NITR = 12, @@ -24,8 +25,11 @@ CRAD = "ECMWF", CCLOUD = "ICE3", ! "NONE" , ! "ICE3", CDCONV = "NONE", CSCONV = "NONE" / +&NAM_PARAM_ICE + LRED = F +/ &NAM_PARAM_RADn XDTRAD = 1800., XDTRAD_CLONLY = 1800., LCLEAR_SKY = F, - NRAD_COLNBR = 500 / + NRAD_COLNBR = 40 / &NAM_PARAM_MFSHALLn XIMPL_MF = 1, CMF_UPDRAFT = "EDKF", CMF_CLOUD = "NONE", LMIXUV = T, LMF_FLX = F / &NAM_LBCn CLBCX = 2*"OPEN", CLBCY = 2*"OPEN", XCPHASE = 20. / @@ -39,7 +43,7 @@ CSEG = "DAR02", CSPLIT = "BSPLITTING" NHALO=1 LCHECK = F / -&NAM_DYN XSEGLEN = 150.0 ! 150.0000 , +&NAM_DYN XSEGLEN = 1000.0 ! 150.0000 , LCORIO = T, LNUMDIFU = F, LNUMDIFTH = F, XALKTOP = 0.001, XALZBOT = 22000. / diff --git a/MY_RUN/BENCH/CINES_MNH55X/004_Hector_64x64_MNH-55X/004_mesonh_step2/parameters_mg.nam b/MY_RUN/BENCH/CINES_MNH55X/004_Hector_64x64_MNH-55X/004_mesonh_step2/parameters_mg.nam index 53ff93a8dae18a4a5291904716de46f9ddc0aa6d..89de24348c38d0825cb7cfc706bb6c8b0c8bf1d9 100644 --- a/MY_RUN/BENCH/CINES_MNH55X/004_Hector_64x64_MNH-55X/004_mesonh_step2/parameters_mg.nam +++ b/MY_RUN/BENCH/CINES_MNH55X/004_Hector_64x64_MNH-55X/004_mesonh_step2/parameters_mg.nam @@ -99,8 +99,9 @@ ! * Multigrid parameters ! ********************************************************************* ¶meters_multigrid - verbose = 10, ! Verbosity level + verbose = 0, ! Verbosity level n_lev = 7, ! Number of levels + iswitch_cpu_gpu = 100 ! /!\ =100 deactive switch cpu <-> gpu computation | work only with MANAGED Memory + nvhpc lev_split = 3, ! First level where data is pulled together n_presmooth = 1, ! Number of presmoothing steps n_postsmooth = 1, ! Number of postsmoothing steps diff --git a/bin/Rocprof b/bin/Rocprof new file mode 100755 index 0000000000000000000000000000000000000000..c0bc6ab32b105d79ada07cc7cd790b4c8bc5f73b --- /dev/null +++ b/bin/Rocprof @@ -0,0 +1,18 @@ +#!/bin/bash +if [[ -n ${OMPI_COMM_WORLD_RANK+z} ]]; then + # mpich + export MPI_RANK=${OMPI_COMM_WORLD_RANK} +elif [[ -n ${MV2_COMM_WORLD_RANK+z} ]]; then + # ompi + export MPI_RANK=${MV2_COMM_WORLD_RANK} +elif [[ -n ${SLURM_PROCID+z} ]]; then + #srun + export MPI_RANK=${SLURM_PROCID} +fi +args="$*" +pid="$$" +outdir="dir_Rocprof-${SLURM_JOBID}" +outfile="results_Rocprof-MNH${XYZ}_${NP}NP_${NG}NG_${NC}NC_${CG}CG.${MPI_RANK}IP.${SLURM_JOBID}" +eval "rocprof -o ${outfile}.csv $*" +rm -f ${outfile}.{db,json,sysinfo.txt} + diff --git a/bin/set_rocm_device b/bin/set_rocm_device new file mode 100755 index 0000000000000000000000000000000000000000..74833ee7951714052b0bad633ef53fb6d57747fc --- /dev/null +++ b/bin/set_rocm_device @@ -0,0 +1,23 @@ +#!/bin/bash +#set -x + +Gpuinfo='rocm-smi -i' +NB_DEVICE=${NB_DEVICE:-$( ${Gpuinfo} | grep GPU | wc -l )} +[ ${NB_DEVICE} -eq 0 ] && NB_DEVICE=1 + +export GPU_OFFSET=${GPU_OFFSET:-0} + +#[[ ${IP} -ge 1 ]] && IP=$(( IP +1 )) +#[[ ${IP} -ge 2 ]] && IP=$(( IP +1 )) + +export LIP=${OMPI_COMM_WORLD_LOCAL_RANK:-${SLURM_LOCALID}} +export IP=${OMPI_COMM_WORLD_RANK:-${SLURM_PROCID}} +export NP=${OMPI_COMM_WORLD_SIZE:-${SLURM_NTASKS}} +export NN=${OMPI_MCA_orte_num_nodes:-${SLURM_NNODES}} +export NPN=$(( 1 + (NP-1)/ NN )) +export HN=$( hostname ) +#export ROCR_VISIBLE_DEVICES=$(( IP % NB_DEVICE )) +export ROCR_VISIBLE_DEVICES=$(( GPU_OFFSET + LIP / ( 1 + (NPN-1) / NB_DEVICE ) )) +echo LIP=${LIP} IP=${IP} NP=${NP} NN=${NN} NPN=${NPN} NG=${NB_DEVICE} GPU=${ROCR_VISIBLE_DEVICES} ${HN} + +exec $* diff --git a/bin/spll_lst b/bin/spll_lst new file mode 100755 index 0000000000000000000000000000000000000000..5d8e69438a6fa36bb09c2d2d45e0f1c021f4024b --- /dev/null +++ b/bin/spll_lst @@ -0,0 +1,174 @@ +#!/bin/bash +#MNH_LIC Copyright 1994-2019 CNRS, Meteo-France and Universite Paul Sabatier +#MNH_LIC This is part of the Meso-NH software governed by the CeCILL-C licence +#MNH_LIC version 1. See LICENSE, CeCILL-C_V1-en.txt and CeCILL-C_V1-fr.txt +#MNH_LIC for details. version 1. +#set -x +if [ $# -ne 2 ] +then +echo ERROR USAGE :: spll NSOURCE file +exit 1 +fi +NSOURCE=$1 +file=$2 +DIRNAME=`dirname $file` +BASENAME=`basename $file` +SUF=`echo $BASENAME | awk -F'.' '{print$2}'` +BASENAME2=`basename $BASENAME .$SUF` +LOCAL=$PWD +TOCASE=${TOCASE-"tolower"} +# +export PERL5LIB=$SRC_MESONH/bin +export FILE_WITHOUT_INTERFACE_NEDEED="\ +rttov.*.F90|rttvi.F90|tstrad.*.F90|\ +ch_f77.fx90|nband_model.fx90|BASIC.f90|mode_tmat.f90|\ +ini_cmfshall.f90|mode_double_double.f90|mode_fgau.f90|\ +extern_usersurc_ll.f90|\ +extern_userio.f90|fmreadwrit.f90|fm_read_ll.f90|poub.f90|\ +mode_glt.*.F90|\ +rrtm_.*.F90|srtm_.*.F90|\ +libs4py.f90|\ +ec_meminfo.F90" +# + +if [ "$SUF" = "f" ] +then + +echo "!depfile:$BASENAME2.D" > $DIRNAME/spll_$BASENAME +cat $file >> $DIRNAME/spll_$BASENAME +echo "#" $file > $DIRNAME/$BASENAME2.D + +else + +TMP=/tmp/split.${USER}.$$ +mkdir $TMP +trap "[ -d $TMP ] && rm -rf $TMP" 0 + +export LOC_INTFBDIR=$TMP +export INTFBDIR=$TMP + +#cp $file $TMP/fichier_a_split.f90 +cp $file $TMP/$BASENAME +echo DIRNAME=$DIRNAME BASENAME=$BASENAME + +# +# Split des sources +# +( +cd $TMP +spl ${BASENAME} > liste_file +) +# +#generation de l'interface +# +( +cd $TMP +if [ "`ls modi_* 2>/dev/null`" = "" ] +then +if [ "`echo $BASENAME | egrep -i $FILE_WITHOUT_INTERFACE_NEDEED `" = "" ] +then + for sfile in `cat liste_file` + do + if [ "`egrep -i '^ *module|^ *program' $sfile`" = "" ] + then + if [ "$SUF" = "f90" ] + then + if [ "${CHECK_MODI}" != "" ] + then + #echo "HELLO JUAN OK :: $sfile " + BASENAME_MODI=$( basename $sfile .$SUF ) + echo "!auto_modi_in_contains" > $sfile.tmp + echo "module modi_${BASENAME_MODI}" >> $sfile.tmp + echo " contains " >> $sfile.tmp + cat $sfile >> $sfile.tmp + echo "end module modi_${BASENAME_MODI}" >> $sfile.tmp + mv $sfile.tmp $sfile + else + make_intfbl_f90.pl $sfile + fi + elif [ "$SUF" = "fx90" ] + then + make_intfbl_f77.pl $sfile + fi + fi + done +fi +fi +ls -1 $TMP > liste_file +egrep -v "liste_file" liste_file > liste_file2 +) +# +# génération des dependances +# +( +cd $TMP +echo "#=========== $BASENAME2.$SUF dependence ==========================" > $DIRNAME/$BASENAME2.D + +for sfile in `cat liste_file2` +do +# +# preparation of splitted file +# +base=`echo $sfile | awk -F'.' '{print$1}'` +spllbase="spll_$base" +spllfile="$spllbase.$SUF" +echo "#---------------------- splitted $spllfile dependence -----------" >> $DIRNAME/$BASENAME2.D +# +echo "!depfile:$BASENAME2.D" > $spllfile +# + if [ "`grep -l '^[^\!]*\$n' $sfile`" != "" ] + then +# clonage of "$n" file + let iloop=0 + while [ $iloop -lt $NSOURCE ] + do + let iloop=iloop+1 + sed -e 's/$n/'$iloop'/g' $sfile >> $spllfile + done + else + cat $sfile >> $spllfile + fi +# +# if splitted file differente of old one, update ... +# + +if [ "`diff $spllfile $DIRNAME/$spllfile 2>&1 `" != "" ] +then + cp $spllfile $DIRNAME/$spllfile + touch $DIRNAME/$BASENAME2.D +fi +dep=`egrep -i "^[[:space:]]*use " $spllfile | sed -e 's/,/ /g' | awk '{ print '${TOCASE}'($2)".mod"}' | sort -u ` +mod=`egrep -i "^[[:space:]]*module " $spllfile | sed -e 's/,/ /g' | awk '{ print '${TOCASE}'($2)".mod"}' | sort -u | grep -iv procedure ` + +if [ "x$mod" != "x" ] +then +echo $spllbase.o : >> $DIRNAME/$BASENAME2.D +echo $mod : $spllfile $dep >> $DIRNAME/$BASENAME2.D +else +echo $spllbase.o : $spllfile $dep >> $DIRNAME/$BASENAME2.D +fi + +if [ "$SUF" = "f90" ] +then +echo " "' $(F90) -I$(OBJDIR)/MOD $(INC) -c $(F90FLAGS) $<' >> $DIRNAME/$BASENAME2.D +else +echo " "' cp $< $(OBJDIR)/$(*F).f ' >> $DIRNAME/$BASENAME2.D +echo " "' $(FX90) -I$(OBJDIR)/MOD $(INC) -c $(FX90FLAGS) $(OBJDIR)/$(*F).f ' >> $DIRNAME/$BASENAME2.D +echo " "' rm $(OBJDIR)/$(*F).f ' >> $DIRNAME/$BASENAME2.D +fi +echo " -mv $spllbase.o $spllbase.lst "' $(OBJDIR)/. || '"echo OK $spllbase.o " >> $DIRNAME/$BASENAME2.D +for filemod in $mod +do +echo " -mv $filemod "' $(OBJDIR)/MOD/. || '"echo OK $filemod " >> $DIRNAME/$BASENAME2.D +done + +unset dep mod + +done +touch $DIRNAME/$BASENAME2.D + +) +rm -fr $TMP + +fi + diff --git a/bin/spll_new b/bin/spll_new new file mode 100755 index 0000000000000000000000000000000000000000..1db545d5444c9255b4d683ebc7556a2bcd1bd380 --- /dev/null +++ b/bin/spll_new @@ -0,0 +1,174 @@ +#!/bin/bash +#MNH_LIC Copyright 1994-2019 CNRS, Meteo-France and Universite Paul Sabatier +#MNH_LIC This is part of the Meso-NH software governed by the CeCILL-C licence +#MNH_LIC version 1. See LICENSE, CeCILL-C_V1-en.txt and CeCILL-C_V1-fr.txt +#MNH_LIC for details. version 1. +#set -x +if [ $# -ne 2 ] +then +echo ERROR USAGE :: spll NSOURCE file +exit 1 +fi +NSOURCE=$1 +file=$2 +DIRNAME=`dirname $file` +BASENAME=`basename $file` +SUF=`echo $BASENAME | awk -F'.' '{print$2}'` +BASENAME2=`basename $BASENAME .$SUF` +LOCAL=$PWD +TOCASE=${TOCASE-"tolower"} +# +export PERL5LIB=$SRC_MESONH/bin +export FILE_WITHOUT_INTERFACE_NEDEED="\ +rttov.*.F90|rttvi.F90|tstrad.*.F90|\ +ch_f77.fx90|nband_model.fx90|BASIC.f90|mode_tmat.f90|\ +ini_cmfshall.f90|mode_double_double.f90|mode_fgau.f90|\ +extern_usersurc_ll.f90|\ +extern_userio.f90|fmreadwrit.f90|fm_read_ll.f90|poub.f90|\ +mode_glt.*.F90|\ +rrtm_.*.F90|srtm_.*.F90|\ +libs4py.f90|\ +ec_meminfo.F90" +# + +if [ "$SUF" = "f" ] +then + +echo "!depfile:$BASENAME2.D" > $DIRNAME/spll_$BASENAME +cat $file >> $DIRNAME/spll_$BASENAME +echo "#" $file > $DIRNAME/$BASENAME2.D + +else + +TMP=/tmp/split.${USER}.$$ +mkdir $TMP +trap "[ -d $TMP ] && rm -rf $TMP" 0 + +export LOC_INTFBDIR=$TMP +export INTFBDIR=$TMP + +#cp $file $TMP/fichier_a_split.f90 +cp $file $TMP/$BASENAME +echo DIRNAME=$DIRNAME BASENAME=$BASENAME + +# +# Split des sources +# +( +cd $TMP +spl ${BASENAME} > liste_file +) +# +#generation de l'interface +# +( +cd $TMP +if [ "`ls modi_* 2>/dev/null`" = "" ] +then +if [ "`echo $BASENAME | egrep -i $FILE_WITHOUT_INTERFACE_NEDEED `" = "" ] +then + for sfile in `cat liste_file` + do + if [ "`egrep -i '^ *module|^ *program' $sfile`" = "" ] + then + if [ "$SUF" = "f90" ] + then + if [ "${CHECK_MODI}" != "" ] + then + #echo "HELLO JUAN OK :: $sfile " + BASENAME_MODI=$( basename $sfile .$SUF ) + echo "!auto_modi_in_contains" > $sfile.tmp + echo "module modi_${BASENAME_MODI}" >> $sfile.tmp + echo " contains " >> $sfile.tmp + cat $sfile >> $sfile.tmp + echo "end module modi_${BASENAME_MODI}" >> $sfile.tmp + mv $sfile.tmp $sfile + else + make_intfbl_f90.pl $sfile + fi + elif [ "$SUF" = "fx90" ] + then + make_intfbl_f77.pl $sfile + fi + fi + done +fi +fi +ls -1 $TMP > liste_file +egrep -v "liste_file" liste_file > liste_file2 +) +# +# génération des dependances +# +( +cd $TMP +echo "#=========== $BASENAME2.$SUF dependence ==========================" > $DIRNAME/$BASENAME2.D + +for sfile in `cat liste_file2` +do +# +# preparation of splitted file +# +base=`echo $sfile | awk -F'.' '{print$1}'` +spllbase="spll_$base" +spllfile="$spllbase.$SUF" +echo "#---------------------- splitted $spllfile dependence -----------" >> $DIRNAME/$BASENAME2.D +# +echo "!depfile:$BASENAME2.D" > $spllfile +# + if [ "`grep -l '^[^\!]*\$n' $sfile`" != "" ] + then +# clonage of "$n" file + let iloop=0 + while [ $iloop -lt $NSOURCE ] + do + let iloop=iloop+1 + sed -e 's/$n/'$iloop'/g' $sfile >> $spllfile + done + else + cat $sfile >> $spllfile + fi +# +# if splitted file differente of old one, update ... +# + +if [ "`diff $spllfile $DIRNAME/$spllfile 2>&1 `" != "" ] +then + cp $spllfile $DIRNAME/$spllfile + touch $DIRNAME/$BASENAME2.D +fi +dep=`egrep -i "^[[:space:]]*use " $spllfile | sed -e 's/,/ /g' | awk '{ print '${TOCASE}'($2)".mod"}' | sort -u ` +mod=`egrep -i "^[[:space:]]*module " $spllfile | sed -e 's/,/ /g' | awk '{ print '${TOCASE}'($2)".mod"}' | sort -u | grep -iv procedure ` + +if [ "x$mod" != "x" ] +then +echo $spllbase.o : >> $DIRNAME/$BASENAME2.D +echo $mod : $spllfile $dep >> $DIRNAME/$BASENAME2.D +else +echo $spllbase.o : $spllfile $dep >> $DIRNAME/$BASENAME2.D +fi + +if [ "$SUF" = "f90" ] +then +echo " "' $(F90) -I$(OBJDIR)/MOD $(INC) -c $(F90FLAGS) $<' >> $DIRNAME/$BASENAME2.D +else +echo " "' cp $< $(OBJDIR)/$(*F).f ' >> $DIRNAME/$BASENAME2.D +echo " "' $(FX90) -I$(OBJDIR)/MOD $(INC) -c $(FX90FLAGS) $(OBJDIR)/$(*F).f ' >> $DIRNAME/$BASENAME2.D +echo " "' rm $(OBJDIR)/$(*F).f ' >> $DIRNAME/$BASENAME2.D +fi +echo " -mv $spllbase.o"' $(OBJDIR)/. || '"echo OK $spllbase.o " >> $DIRNAME/$BASENAME2.D +for filemod in $mod +do +echo " -mv $filemod "' $(OBJDIR)/MOD/. || '"echo OK $filemod " >> $DIRNAME/$BASENAME2.D +done + +unset dep mod + +done +touch $DIRNAME/$BASENAME2.D + +) +rm -fr $TMP + +fi + diff --git a/src/ARCH_SRC/cray_gu/gu.c b/src/ARCH_SRC/cray_gu/gu.c new file mode 100644 index 0000000000000000000000000000000000000000..24581c7e06e7b6efaeae025b894cd22f80b30634 --- /dev/null +++ b/src/ARCH_SRC/cray_gu/gu.c @@ -0,0 +1,45 @@ +/* + +* Copyright -2022 Hewlett Packard Enterprise Development LP + +*/ + +void __cce_floatingpt_gu( void ); // Gradual Underflow. + + + +static void (*p)(void) __attribute__((section(".init_array"))) = __cce_floatingpt_gu; + + + +#define DENORMS_ARE_ZEROS 6 + +#define FLUSH_TO_ZERO_MASK 15 + + + +void __cce_floatingpt_gu( void ) + +{ + + unsigned long gu_src = (1 << DENORMS_ARE_ZEROS); + + unsigned long gu_dst = (1 << FLUSH_TO_ZERO_MASK); + + + + do { + + unsigned long __mxcsr; + + __asm__ volatile("stmxcsr %0" : : "m" (__mxcsr)); + + __mxcsr &= ~(gu_src | gu_dst); + + __asm__ volatile("ldmxcsr %0" : : "m" (__mxcsr)); + + } + + while (0); + +} diff --git a/src/LIB/BITREP/br_transcendentals.cpp b/src/LIB/BITREP/br_transcendentals.cpp index f0ce340a74baff86ac7b4ab927348d7d55e32391..b837517f961f1ece4499a279a5afdf17fdba48f1 100644 --- a/src/LIB/BITREP/br_transcendentals.cpp +++ b/src/LIB/BITREP/br_transcendentals.cpp @@ -20,9 +20,12 @@ namespace bitrep /************* * CONSTANTS * *************/ - -static const double const_2_over_pi = 6.3661977236758138e-1; +#ifdef MNH_BITREP_OMP +#pragma omp declare target +#endif +static const double const_2_over_pi = 6.3661977236758138e-1; + static const double __sin_cos_coefficient[16] = { 1.590307857061102704e-10, /* sin0 */ @@ -43,7 +46,9 @@ static const double __sin_cos_coefficient[16] = -5.000000000000000000e-01, /* cos6 */ 1.000000000000000000e+00, /* cos7 */ }; - +#ifdef MNH_BITREP_OMP +#pragma omp end declare target +#endif /***************************************** diff --git a/src/LIB/MPIvide/mpivide.c b/src/LIB/MPIvide/mpivide.c index eebe91b71a8e3178634fb98688f9118c8325037b..1a9a8d9a3501b53562c7dd941f307c363c3dd11f 100644 --- a/src/LIB/MPIvide/mpivide.c +++ b/src/LIB/MPIvide/mpivide.c @@ -158,6 +158,20 @@ int *__ierr; *__ierr = 0; } +#pragma weak mpi_sendrecv__ = mpi_sendrecv +#pragma weak mpi_sendrecv_ = mpi_sendrecv +void mpi_sendrecv(void *sendbuf, int *sendcounts, int *sendtype, int *dest , int *sendtag , + void *recvbuf, int *recvcounts, int *recvtype, int *source, int *recvtag , + int *comm, int *__ierr) +{ + int size; + + disppass("sendrecv"); + size = findtypesize(*sendtype); + memcpy(recvbuf, sendbuf, (*recvcounts)*size); + + *__ierr = 0; +} #pragma weak mpi_alltoallv__ = mpi_alltoallv #pragma weak mpi_alltoallv_ = mpi_alltoallv diff --git a/src/LIB/SURCOUCHE/src/mode_double_double.f90 b/src/LIB/SURCOUCHE/src/mode_double_double.f90 index b9e58898f24f8dd022c8f70c6dc6031f6e5427cc..8b70ae69e51e816725c6cba1ac39781dad35442f 100644 --- a/src/LIB/SURCOUCHE/src/mode_double_double.f90 +++ b/src/LIB/SURCOUCHE/src/mode_double_double.f90 @@ -215,6 +215,58 @@ CONTAINS c = ddc%R END FUNCTION SUM_DD_R2_LL + FUNCTION SUM_DD_R2_ll_DEVICE (a) RESULT(c) + !---------------------------------------------------------------------- + ! + ! Purpose: + ! Modification of original codes written by David H. Bailey + ! This subroutine computes ddc = ddb + a + ! Could be inlined by compiler <=> elemental function + ! + !---------------------------------------------------------------------- + USE mode_reduce_sum, ONLY: REDUCESUM_ll + ! + ! Arguments + ! + REAL :: c ! result + REAL,DIMENSION(:,:), INTENT(in) :: a ! input + ! + ! Local workspace + ! + TYPE(DOUBLE_DOUBLE) :: ddc + TYPE(DOUBLE_DOUBLE),DIMENSION(SIZE(a,1)) :: ddb + REAL ,DIMENSION(SIZE(a,1)) :: e, t1, t2 + INTEGER :: i,j + INTEGER :: IINFO_ll + ! + !----------------------------------------------------------------------- + ! + ! Compute dda + ddb using Knuth's trick. + !$acc kernels + ddb%R = 0.0 + ddb%E = 0.0 + !$acc end kernels + !$acc parallel + !$acc loop seq + DO j=1,SIZE(a,2) + !$acc loop independent + DO i=1,SIZE(a,1) + t1(i) = a(i,j) + ddb(i)%R + e(i) = t1(i) - a(i,j) + t2(i) = ((ddb(i)%R - e(i)) + (a(i,j) - (t1(i) - e(i)))) & + + ddb(i)%E + ! + ! The result is t1 + t2, after normalization. + ddb(i)%R = t1(i) + t2(i) + ddb(i)%E = t2(i) - ((t1(i) + t2(i)) - t1(i)) + END DO + END DO + !$acc end parallel + ddc = SUM_DD_DD1(ddb) + CALL REDUCESUM_ll(ddc,IINFO_ll) + c = ddc%R + END FUNCTION SUM_DD_R2_LL_DEVICE + FUNCTION SUM_DD_R2_R1_ll (a) RESULT(c) !---------------------------------------------------------------------- ! diff --git a/src/LIB/SURCOUCHE/src/mode_io_field_write.f90 b/src/LIB/SURCOUCHE/src/mode_io_field_write.f90 index f2f43501c84eafb6860bf8ddb716c21532ce769a..d746793eb317597323062eef75cc9284391b2daf 100644 --- a/src/LIB/SURCOUCHE/src/mode_io_field_write.f90 +++ b/src/LIB/SURCOUCHE/src/mode_io_field_write.f90 @@ -1429,7 +1429,7 @@ end subroutine IO_Ndimlist_reduce ! end of MNH_GA #endif !Not global reduction because a broadcast is done in IO_Field_write_error_check - call MPI_REDUCE( -Abs( [ iresp_lfi, iresp_nc4 ] ), iresps(:), 1, MNHINT_MPI, MPI_MIN, & + call MPI_REDUCE( -Abs( [ iresp_lfi, iresp_nc4 ] ), iresps(:), 2, MNHINT_MPI, MPI_MIN, & tpfile%nmaster_rank - 1, tpfile%nmpicomm, ierr ) iresp_lfi = iresps(1) iresp_nc4 = iresps(2) diff --git a/src/MNH/advection_metsv.f90 b/src/MNH/advection_metsv.f90 index d54409639d5a2b9ac09b87ac85ed31599c42bfe3..408861cddf11554929925b44c84373269cecc515 100644 --- a/src/MNH/advection_metsv.f90 +++ b/src/MNH/advection_metsv.f90 @@ -190,7 +190,7 @@ USE MODI_ADV_BOUNDARIES #if defined(MNH_BITREP) || defined(MNH_BITREP_OMP) USE MODI_BITREP #endif -#ifdef MNH_BITREP_OMP +#ifdef MNH_COMPILER_CCE !$mnh_undef(LOOP) !$mnh_undef(OPENACC) #endif diff --git a/src/MNH/advection_uvw_cen.f90 b/src/MNH/advection_uvw_cen.f90 index b6102eb35422295e84c7d985d0cfdfda1f2af0e5..69f0e1206c52e35695b8a082796e26fa83e3b721 100644 --- a/src/MNH/advection_uvw_cen.f90 +++ b/src/MNH/advection_uvw_cen.f90 @@ -380,7 +380,8 @@ ELSEIF (HUVW_ADV_SCHEME=='CEN4TH') THEN ! END IF ! -!$acc kernels present( ZRUS, ZRVS, ZRWS, ZMXM_RHODJ, ZMYM_RHODJ, ZMZM_RHODJ ) +!$acc kernels present(ZRUS,ZRVS,ZRWS,ZMXM_RHODJ,ZMYM_RHODJ,ZMZM_RHODJ) & +!$acc present_cr(PRUS,PRVS,PRWS,PDUM,PDWM) ZUS(:,:,:) = ZRUS(:,:,:)/ZMXM_RHODJ(:,:,:)*2.*PTSTEP ZVS(:,:,:) = ZRVS(:,:,:)/ZMYM_RHODJ(:,:,:)*2.*PTSTEP ZWS(:,:,:) = ZRWS(:,:,:)/ZMZM_RHODJ(:,:,:)*2.*PTSTEP diff --git a/src/MNH/advecuvw_2nd.f90 b/src/MNH/advecuvw_2nd.f90 index a8313c2f5e94de2a323694ca77fb9c364b3eab6f..5f79d3f6a9303de892144f69e71947a8fe37521d 100644 --- a/src/MNH/advecuvw_2nd.f90 +++ b/src/MNH/advecuvw_2nd.f90 @@ -117,6 +117,10 @@ REAL, DIMENSION(:,:,:), INTENT(INOUT) :: PRUS, PRVS, PRWS ! Sources of Momentu !* 1. COMPUTES THE ADVECTIVE TENDANCIES ! --------------------------------- ! +!PASCAL +print*," ATTENTION variable PRUS sur GPU " +STOP + PRUS(:,:,:) = PRUS(:,:,:) & -DXM( MXF(PRUCT(:,:,:))*MXF(PUT(:,:,:)) ) ! diff --git a/src/MNH/advecuvw_rk.f90 b/src/MNH/advecuvw_rk.f90 index fc1572e0ea75d6a15b710636d5e20281716edaf6..5119193677bf1ee85d79fbecc03c837818394865 100644 --- a/src/MNH/advecuvw_rk.f90 +++ b/src/MNH/advecuvw_rk.f90 @@ -213,6 +213,7 @@ TYPE(LIST_ll), POINTER :: TZFIELDS4_ll ! list of fields to exchange LOGICAL :: GIBM !Intermediate variable used to work around a Cray compiler bug (CCE 13.0.0) REAL :: ZIBM_EPSI !Intermediate variable used to work around a Cray compiler bug (CCE 13.0.0) REAL :: ZTIME1,ZTIME2 +INTEGER :: JJI,JJJ,JJK !------------------------------------------------------------------------------- IF (MPPDB_INITIALIZED) THEN @@ -314,7 +315,7 @@ CALL INIT_ON_HOST_AND_DEVICE(ZWT,6e99,'ADVECUVW_RK::ZWT') !$acc data present( PU, PV, PW, PUT, PVT, PWT, PMXM_RHODJ, PMYM_RHODJ, PMZM_RHODJ, & !$acc & PRUCT, PRVCT, PRWCT, PRUS_ADV, PRVS_ADV, PRWS_ADV, & !$acc & PRUS_OTHER, PRVS_OTHER, PRWS_OTHER, ZUT, ZVT, ZWT, & -!$acc & ZRUS, ZRVS, ZRWS, ZIBM, ZBUT, ZBUTS ) +!$acc & ZRUS, ZRVS, ZRWS, ZBUT, ZBUTS ) SELECT CASE (HTEMP_SCHEME) CASE('RK11') @@ -402,20 +403,20 @@ END SELECT !$acc update device(ZBUTS,ZBUT) ! IF ( GIBM ) THEN -!$acc kernels +!$acc kernels present(ZIBM) ZIBM(:,:,:,:) = 1. !$acc end kernels END IF ! IF (GIBM .AND. CIBM_ADV=='FREEZE') THEN -!$acc kernels +!$acc kernels present(ZIBM) WHERE (XIBM_LS(:,:,:,2).GT.-ZIBM_EPSI) ZIBM(:,:,:,1) = 0. WHERE (XIBM_LS(:,:,:,3).GT.-ZIBM_EPSI) ZIBM(:,:,:,2) = 0. WHERE (XIBM_LS(:,:,:,4).GT.-ZIBM_EPSI) ZIBM(:,:,:,3) = 0. !$acc end kernels ENDIF ! -!$acc kernels present(PRUS_ADV,PRVS_ADV,PRWS_ADV) present(ZUT,ZVT,ZWT) present(PU,PV,PW) +!$acc kernels present_cr(PU,PV,PW,PRUS_ADV,PRVS_ADV,PRWS_ADV,ZUT,ZVT,ZWT) PRUS_ADV = 0. PRVS_ADV = 0. PRWS_ADV = 0. @@ -447,7 +448,7 @@ CALL ADD3DFIELD_ll( TZFIELDMT_ll, ZWT, 'ADVECUVW_RK::ZWT' ) INBVAR = 3 CALL INIT_HALO2_ll(TZHALO2MT_ll,INBVAR,SIZE(PUT,1),SIZE(PUT,2),SIZE(PWT,3)) ! -!$acc kernels +!$acc kernels present_cr(ZRUS,ZRVS,ZRWS) ZRUS(:, :, :, : ) = 0. ZRVS(:, :, :, : ) = 0. ZRWS(:, :, :, : ) = 0. @@ -486,7 +487,7 @@ RKLOOP: DO JS = 1, ISPL ! -------------------------- ! IF (GIBM .AND. CIBM_ADV=='LOWORD') THEN -!$acc kernels +!$acc kernels present_cr(ZIBM,ZRUS,ZRVS,ZRWS) ZIBM(:,:,:,1)=ZRUS(:,:,:,JS) ZIBM(:,:,:,2)=ZRVS(:,:,:,JS) ZIBM(:,:,:,3)=ZRWS(:,:,:,JS) @@ -513,13 +514,25 @@ RKLOOP: DO JS = 1, ISPL TZHALO2MT_ll ) ENDIF IF (HUVW_ADV_SCHEME=='CEN4TH') THEN + !$acc update self(ZIBM) CALL ADVECUVW_2ND (ZUT, ZVT, ZWT, PRUCT, PRVCT, PRWCT, & ZIBM(:,:,:,1), ZIBM(:,:,:,2), ZIBM(:,:,:,3)) + !$acc update device(ZIBM) ENDIF - WHERE(XIBM_LS(:,:,:,2).GT.-ZIBM_EPSI) ZRUS(:,:,:,JS)=ZIBM(:,:,:,1) - WHERE(XIBM_LS(:,:,:,3).GT.-ZIBM_EPSI) ZRVS(:,:,:,JS)=ZIBM(:,:,:,2) - WHERE(XIBM_LS(:,:,:,4).GT.-ZIBM_EPSI) ZRWS(:,:,:,JS)=ZIBM(:,:,:,3) + !$acc kernels present_cr(ZIBM,ZRUS,ZRVS,ZRWS) + !$mnh_expand_where(JJI=1:IIU,JJJ=1:IJU,JJK=1:IKU) + WHERE(XIBM_LS(:,:,:,2).GT.-ZIBM_EPSI) + ZRUS(:,:,:,JS)=ZIBM(:,:,:,1) + END WHERE + WHERE(XIBM_LS(:,:,:,3).GT.-ZIBM_EPSI) + ZRVS(:,:,:,JS)=ZIBM(:,:,:,2) + END WHERE + WHERE(XIBM_LS(:,:,:,4).GT.-ZIBM_EPSI) + ZRWS(:,:,:,JS)=ZIBM(:,:,:,3) + END WHERE + !$mnh_end_expand_where() ZIBM(:,:,:,:)=1. + !$acc end kernels ENDIF ! write ( ynum, '( I3 )' ) js @@ -552,15 +565,15 @@ RKLOOP: DO JS = 1, ISPL ! ! Guesses at the end of the RK loop ! -!$acc kernels present(PRUS_ADV,PRVS_ADV,PRWS_ADV,ZBUTS) present(ZRUS,ZRVS,ZRWS,ZIBM) +!$acc kernels present_cr(PRUS_ADV,PRVS_ADV,PRWS_ADV,ZBUTS) present_cr(ZRUS,ZRVS,ZRWS,ZIBM) IF ( .NOT. GIBM ) THEN PRUS_ADV(:,:,:) = PRUS_ADV(:,:,:) + ZBUTS(JS) * ZRUS(:,:,:,JS) PRVS_ADV(:,:,:) = PRVS_ADV(:,:,:) + ZBUTS(JS) * ZRVS(:,:,:,JS) PRWS_ADV(:,:,:) = PRWS_ADV(:,:,:) + ZBUTS(JS) * ZRWS(:,:,:,JS) ELSE - PRUS_ADV(:,:,:) = PRUS_ADV(:,:,:) + ZBUTS(JI) * ZRUS(:,:,:,JI) * ZIBM(:,:,:,1) - PRVS_ADV(:,:,:) = PRVS_ADV(:,:,:) + ZBUTS(JI) * ZRVS(:,:,:,JI) * ZIBM(:,:,:,2) - PRWS_ADV(:,:,:) = PRWS_ADV(:,:,:) + ZBUTS(JI) * ZRWS(:,:,:,JI) * ZIBM(:,:,:,3) + PRUS_ADV(:,:,:) = PRUS_ADV(:,:,:) + ZBUTS(JS) * ZRUS(:,:,:,JS) * ZIBM(:,:,:,1) + PRVS_ADV(:,:,:) = PRVS_ADV(:,:,:) + ZBUTS(JS) * ZRVS(:,:,:,JS) * ZIBM(:,:,:,2) + PRWS_ADV(:,:,:) = PRWS_ADV(:,:,:) + ZBUTS(JS) * ZRWS(:,:,:,JS) * ZIBM(:,:,:,3) END IF !$acc end kernels ! @@ -570,7 +583,7 @@ RKLOOP: DO JS = 1, ISPL ! !$acc & present(ZRUS,ZRVS,ZRWS,ZIBM) present(PRUS_OTHER,PRVS_OTHER,PRWS_OTHER) & ! !$acc & present(PMXM_RHODJ,PMYM_RHODJ,PMZM_RHODJ) ! -!$acc kernels present( ZUT, ZVT, ZWT ) +!$acc kernels present_cr( ZUT, ZVT, ZWT ) ZUT(:,:,:) = PU(:,:,:) ZVT(:,:,:) = PV(:,:,:) ZWT(:,:,:) = PW(:,:,:) @@ -581,7 +594,7 @@ RKLOOP: DO JS = 1, ISPL ! Intermediate guesses inside the RK loop ! IF ( .NOT. GIBM ) THEN -!$acc kernels present( ZUT, ZVT, ZWT, ZRUS, ZRVS, ZRWS ) +!$acc kernels present_cr(ZUT,ZVT,ZWT,ZRUS,ZRVS,ZRWS ) ZUT(:,:,:) = ZUT(:,:,:) + ZBUT(JS,JI) * PTSTEP * & ( ZRUS(:,:,:,JI) + PRUS_OTHER(:,:,:) ) / PMXM_RHODJ(:,:,:) ZVT(:,:,:) = ZVT(:,:,:) + ZBUT(JS,JI) * PTSTEP * & @@ -590,7 +603,7 @@ RKLOOP: DO JS = 1, ISPL ( ZRWS(:,:,:,JI) + PRWS_OTHER(:,:,:) ) / PMZM_RHODJ(:,:,:) !$acc end kernels ELSE -!$acc kernels present( ZUT, ZVT, ZWT, ZRUS, ZRVS, ZRWS, ZIBM ) +!$acc kernels present_cr(ZUT,ZVT,ZWT,ZRUS,ZRVS,ZRWS,ZIBM ) ZUT(:,:,:) = ZUT(:,:,:) + ZBUT(JS,JI) * PTSTEP * & ( ZRUS(:,:,:,JI) + PRUS_OTHER(:,:,:) ) / PMXM_RHODJ(:,:,:) * ZIBM(:,:,:,1) ZVT(:,:,:) = ZVT(:,:,:) + ZBUT(JS,JI) * PTSTEP * & diff --git a/src/MNH/condensation.f90 b/src/MNH/condensation.f90 index a92e8dc74e71b26338cd3b7d2baee72750022920..d4ca85714eadd55b5c6057a7e8db749f2b015fcb 100644 --- a/src/MNH/condensation.f90 +++ b/src/MNH/condensation.f90 @@ -147,6 +147,10 @@ USE MODI_COMPUTE_FRAC_ICE USE MODI_BITREP #endif ! +#if defined(MNH_COMPILER_CCE) && defined(MNH_BITREP_OMP) +!$mnh_undef(LOOP) +!$mnh_undef(OPENACC) +#endif ! IMPLICIT NONE ! @@ -399,7 +403,7 @@ IKTB=1+JPVEXT IKTE=KKU-JPVEXT ! ! -!$acc kernels +!$acc kernels present_cr(ZRT) PCLDFR(:,:,:) = 0. ! Initialize values PSIGRC(:,:,:) = 0. ! Initialize values ! @@ -412,13 +416,13 @@ PSIGRC(:,:,:) = 0. ! Initialize values ! latent heat of vaporisation/sublimation IF(GPRESENT_PLV .AND. GPRESENT_PLS) THEN !$acc data present( PLV, PLS ) -!$acc kernels +!$acc kernels present_cr(ZLV,ZLS) ZLV(:,:,:)=PLV(:,:,:) ZLS(:,:,:)=PLS(:,:,:) !$acc end kernels !$acc end data ELSE -!$acc kernels +!$acc kernels present_cr(ZLV, ZLS) ! latent heat of vaporisation/sublimation ZLV(KIB:KIE,KJB:KJE,IKTB:IKTE) = XLVTT + ( XCPV - XCL ) * ( PT(KIB:KIE,KJB:KJE,IKTB:IKTE) - XTT ) ZLS(KIB:KIE,KJB:KJE,IKTB:IKTE) = XLSTT + ( XCPV - XCI ) * ( PT(KIB:KIE,KJB:KJE,IKTB:IKTE) - XTT ) @@ -426,12 +430,12 @@ ELSE ENDIF IF(GPRESENT_PCPH) THEN !$acc data present( PCPH ) -!$acc kernels +!$acc kernels present_cr(ZCPD) ZCPD(:,:,:)=PCPH(:,:,:) !$acc end kernels !$acc end data ELSE -!$acc kernels +!$acc kernels present_cr(ZCPD) ZCPD(KIB:KIE,KJB:KJE,IKTB:IKTE) = XCPD + XCPV*PRV(KIB:KIE,KJB:KJE,IKTB:IKTE) & + XCL*PRC(KIB:KIE,KJB:KJE,IKTB:IKTE) + XCI*PRI(KIB:KIE,KJB:KJE,IKTB:IKTE) & + XCI*(PRS(KIB:KIE,KJB:KJE,IKTB:IKTE) + PRG(KIB:KIE,KJB:KJE,IKTB:IKTE) ) @@ -441,7 +445,7 @@ ENDIF !acc kernels ! Preliminary calculations needed for computing the "turbulent part" of Sigma_s IF ( .NOT. OSIGMAS ) THEN -!$acc kernels +!$acc kernels present_cr(ZTLK,ITPL,ZTMIN,ZZZP) ! store temperature at saturation ZTLK(KIB:KIE,KJB:KJE,IKTB:IKTE) = PT(KIB:KIE,KJB:KJE,IKTB:IKTE) & - ZLV(KIB:KIE,KJB:KJE,IKTB:IKTE)*PRC(KIB:KIE,KJB:KJE,IKTB:IKTE)/ZCPD(KIB:KIE,KJB:KJE,IKTB:IKTE) & @@ -501,9 +505,13 @@ DO CONCURRENT ( JK=IKTB:IKTE ) JKPK(JK)=MAX(MIN(JK+KKL,IKTE),IKTB) JKMK(JK)=MAX(MIN(JK-KKL,IKTE),IKTB) END DO -!$acc_nv loop independent collapse(3) -DO CONCURRENT (JI=KIB:KIE,JJ=KJB:KJE,JK=IKTB:IKTE) - ! latent heats +! Bypass CCE/14++ compile bug with index in the good order !!! +#if defined(MNH_COMPILER_CCE) && defined(MNH_BITREP_OMP) +DO CONCURRENT(JI=KIB:KIE,JJ=KJB:KJE,JK=IKTB:IKTE) +#else +!$mnh_do_concurrent(JI=KIB:KIE,JJ=KJB:KJE,JK=IKTB:IKTE) +#endif + ! latent heats ! saturated water vapor mixing ratio over liquid water #if !defined(MNH_BITREP) && !defined(MNH_BITREP_OMP) ZPVP(JI,JJ,JK) = MIN(EXP( XALPW - XBETAW / PT(JI,JJ,JK) - XGAMW * LOG( PT(JI,JJ,JK) ) ), .99*PPABS(JI,JJ,JK)) @@ -652,7 +660,8 @@ DO CONCURRENT (JI=KIB:KIE,JJ=KJB:KJE,JK=IKTB:IKTE) ZCONDP(JI,JJ,JK) = BR_EXP( 1.2*ZQ1P(JI,JJ,JK)-1. ) #endif END IF - + ZCONDP(JI,JJ,JK) = ZCONDP(JI,JJ,JK) * ZSIGMAP(JI,JJ,JK) + INQ1P(JI,JJ,JK) = MIN( MAX(-22,FLOOR(MIN(100., MAX(-100., 2*ZQ1P(JI,JJ,JK)))) ), 10) !inner min/max prevents sigfpe when 2*zq1 does not fit into an int ZINCP(JI,JJ,JK) = 2.*ZQ1P(JI,JJ,JK) - INQ1P(JI,JJ,JK) @@ -698,8 +707,11 @@ DO CONCURRENT (JI=KIB:KIE,JJ=KJB:KJE,JK=IKTB:IKTE) IF(YLAMBDA3=='CB')THEN PSIGRC(JI,JJ,JK) = PSIGRC(JI,JJ,JK)* MIN( 3. , MAX(1.,1.-ZQ1P(JI,JJ,JK)) ) ENDIF - -END DO +#if defined(MNH_COMPILER_CCE) && defined(MNH_BITREP_OMP) +ENDDO ! CONCURRENT +#else +!$mnh_end_do() !CONCURRENT +#endif !$acc end kernels !$acc end data diff --git a/src/MNH/gradient_m.f90 b/src/MNH/gradient_m.f90 index 9353d3f20dc559bf2f5fb2712dec30f3b37aedea..15ced819881101e4ca0b3212c44407fe3bfa4137 100644 --- a/src/MNH/gradient_m.f90 +++ b/src/MNH/gradient_m.f90 @@ -866,7 +866,7 @@ INTEGER IIU,IJU,IKU,JI IIU=SIZE(PY,1) IJU=SIZE(PY,2) IKU=SIZE(PY,3) -!$acc kernels present_cr(PGX_M_U) +!$acc kernels !!!!present_cr(PGX_M_U) IF (.NOT. LFLAT) THEN PGX_M_U(1+JPHEXT:IIU,1:IJU,1+JPVEXT_TURB:IKU-JPVEXT_TURB) = & ( PY(1+JPHEXT:IIU,1:IJU,1+JPVEXT_TURB:IKU-JPVEXT_TURB)-PY(JPHEXT:IIU-1,1:IJU,1+JPVEXT_TURB:IKU-JPVEXT_TURB) & diff --git a/src/MNH/gravity_impl.f90 b/src/MNH/gravity_impl.f90 index fb02de4bdd1abef5893e64754de57f8d80b07079..6ae2c343055062b7b9cc20822c57311fe46c4469 100644 --- a/src/MNH/gravity_impl.f90 +++ b/src/MNH/gravity_impl.f90 @@ -176,9 +176,9 @@ if ( lbudget_w ) call Budget_store_init( tbudgets(NBUDGET_W), 'GRAV', prws(:, :, !$acc kernels ZTH(:,:,:) = (PRTHS(:,:,:) + PRTHS_CLD(:,:,:)) / PRHODJ(:,:,:) * PTSTEP ! -DO CONCURRENT ( JI=1:JIU,JJ=1:JJU,JK=1:JKU , JR = 1:KRR ) +!$mnh_do_concurrent(JI=1:JIU,JJ=1:JJU,JK=1:JKU,JR=1:KRR ) ZR(JI,JJ,JK,JR) = (PRRS(JI,JJ,JK,JR) + PRRS_CLD(JI,JJ,JK,JR)) / PRHODJ(JI,JJ,JK) * PTSTEP -END DO ! CONCURRENT +!$mnh_end_do() ! CONCURRENT !$acc end kernels ! #ifndef MNH_OPENACC @@ -200,9 +200,9 @@ END DO CALL GRAVITY ( KRR,KRRL, KRRI, ZTH, ZR, PRHODJ, PTHVREF, ZRWS_GRAV(:,:,:) ) ! !$acc kernels -DO CONCURRENT ( JI=1:JIU,JJ=1:JJU,JK=1:JKU ) +!$mnh_do_concurrent(JI=1:JIU,JJ=1:JJU,JK=1:JKU) PRWS(JI,JJ,JK) = PRWS(JI,JJ,JK) + ZRWS_GRAV(JI,JJ,JK) -END DO +!$mnh_end_do() !$acc end kernels ! if ( lbudget_w ) then diff --git a/src/MNH/ice4_fast_rg.f90 b/src/MNH/ice4_fast_rg.f90 index 8a49ae40f56303340297e2e990e70552cca9e69d..0e2e92debfab853e29a55eedf2dd64f94959d066 100644 --- a/src/MNH/ice4_fast_rg.f90 +++ b/src/MNH/ice4_fast_rg.f90 @@ -404,7 +404,7 @@ ENDIF ! Wet and dry collection of rs on graupel (6.2.1) IGDRY = 0 !$acc end kernels -!$acc parallel loop private(idx) independent +!$acc parallel loop private(idx) copy(IGDRY) independent DO CONCURRENT( JJ = 1 : SIZE( GDRY ) ) ZDRY(JJ)=MAX(0., -SIGN(1., XRTMIN(5)-PRST(JJ))) * & ! WHERE(PRST(:)>XRTMIN(5)) &MAX(0., -SIGN(1., XRTMIN(6)-PRGT(JJ))) * & ! WHERE(PRGT(:)>XRTMIN(6)) @@ -518,7 +518,7 @@ ENDIF !* 6.2.6 accretion of raindrops on the graupeln ! IGDRY = 0 -!$acc parallel loop private(idx) independent +!$acc parallel loop private(idx) copy(IGDRY) independent DO CONCURRENT( JJ = 1 : SIZE( GDRY ) ) ZDRY(JJ)=MAX(0., -SIGN(1., XRTMIN(3)-PRRT(JJ))) * & ! WHERE(PRRT(:)>XRTMIN(3)) &MAX(0., -SIGN(1., XRTMIN(6)-PRGT(JJ))) * & ! WHERE(PRGT(:)>XRTMIN(6)) diff --git a/src/MNH/ice4_fast_rh.f90 b/src/MNH/ice4_fast_rh.f90 index 33ce69daa008c6c148e3f6558ba72c2f8a54075b..04c1d71eed20ff8b52631deaafe84e6a4e9d7590 100644 --- a/src/MNH/ice4_fast_rh.f90 +++ b/src/MNH/ice4_fast_rh.f90 @@ -318,11 +318,12 @@ ELSE #endif END WHERE ENDIF +!$acc end kernels ! !* 7.2.1 accretion of aggregates on the hailstones ! IGWET = 0 -!$acc loop private(IDX) independent +!$acc parallel loop private(IDX) copy(IGWET) independent DO JJ = 1, SIZE(GWET) ZWET(JJ) = MAX(0., -SIGN(1., XRTMIN(7)-PRHT(JJ))) * & ! WHERE(PRHT(:)>XRTMIN(7)) &MAX(0., -SIGN(1., XRTMIN(5)-PRST(JJ))) * & ! WHERE(PRST(:)>XRTMIN(5)) @@ -338,7 +339,7 @@ DO JJ = 1, SIZE(GWET) GWET(JJ) = .FALSE. END IF END DO -!$acc end kernels +! acc end parallel loop IF(LDSOFT) THEN !$acc kernels @@ -353,7 +354,7 @@ ELSE PRH_TEND(:, IRSDRYH)=0. !$acc end kernels IF(IGWET>0)THEN -!$acc kernels +!$acc kernels present_cr(ZVEC1,ZVEC2,ZVEC3,ZZW) ! !* 7.2.3 select the (PLBDAH,PLBDAS) couplet ! @@ -396,6 +397,7 @@ ELSE * (ZVEC1(JJ) - 1.0) END DO ZZW(:) = 0. + !$acc loop independent DO JJ = 1, IGWET ZZW(I1(JJ)) = ZVEC3(JJ) END DO @@ -425,9 +427,8 @@ ENDIF ! !* 7.2.6 accretion of graupeln on the hailstones ! -!$acc kernels IGWET = 0 -!$acc loop private(IDX) independent +!$acc parallel loop private(IDX) copy(IGWET) independent DO JJ = 1, SIZE(GWET) ZWET(JJ)=MAX(0., -SIGN(1., XRTMIN(7)-PRHT(JJ))) * & ! WHERE(PRHT(:)>XRTMIN(7)) &MAX(0., -SIGN(1., XRTMIN(6)-PRGT(JJ))) * & ! WHERE(PRGT(:)>XRTMIN(6)) @@ -443,7 +444,7 @@ DO JJ = 1, SIZE(GWET) GWET(JJ) = .FALSE. END IF END DO -!$acc end kernels +! acc end parallel loop IF(LDSOFT) THEN !$acc kernels @@ -458,7 +459,7 @@ ELSE PRH_TEND(:, IRGDRYH)=0. !$acc end kernels IF(IGWET>0)THEN -!$acc kernels +!$acc kernels present_cr(ZVEC1,ZVEC2,ZVEC3,ZZW) ! !* 7.2.8 select the (PLBDAH,PLBDAG) couplet ! @@ -501,6 +502,7 @@ ELSE * (ZVEC1(JJ) - 1.0) END DO ZZW(:) = 0. + !$acc loop independent DO JJ = 1, IGWET ZZW(I1(JJ)) = ZVEC3(JJ) END DO @@ -534,12 +536,11 @@ ELSE !$acc end kernels END IF ENDIF -!$acc kernels ! !* 7.2.11 accretion of raindrops on the hailstones ! IGWET = 0 -!$acc loop private(IDX) independent +!$acc parallel loop private(IDX) copy(IGWET) independent DO JJ = 1, SIZE(GWET) ZWET(JJ)=MAX(0., -SIGN(1., XRTMIN(7)-PRHT(JJ))) * & ! WHERE(PRHT(:)>XRTMIN(7)) &MAX(0., -SIGN(1., XRTMIN(3)-PRRT(JJ))) * & ! WHERE(PRRT(:)>XRTMIN(3)) @@ -555,7 +556,7 @@ DO JJ = 1, SIZE(GWET) GWET(JJ) = .FALSE. END IF END DO -!$acc end kernels +! acc end parallel loop IF(LDSOFT) THEN !$acc kernels @@ -568,7 +569,7 @@ ELSE PRH_TEND(:, IRRWETH)=0. !$acc end kernels IF(IGWET>0)THEN -!$acc kernels +!$acc kernels present_cr(ZVEC1,ZVEC2) ! !* 7.2.12 select the (PLBDAH,PLBDAR) couplet ! @@ -611,6 +612,7 @@ ELSE *(ZVEC1(JJ) - 1.0) END DO ZZW(:) = 0. + !$acc loop independent DO JJ = 1, IGWET ZZW(I1(JJ)) = ZVEC3(JJ) END DO @@ -683,6 +685,7 @@ ELSE ( PRHODREF(:)*(XLMTT-XCL*(XTT-PT(:))) ) END WHERE ENDIF +!$acc loop independent DO JL=1, ISIZE !We must agregate, at least, the cold species ZRWETH_INIT(JL)=ZHAIL(JL) * MAX(PRH_TEND(JL, IRIWETH)+PRH_TEND(JL, IRSWETH)+PRH_TEND(JL, IRGWETH), & @@ -694,16 +697,19 @@ ENDDO !* 7.4 Select Wet or Dry case ! !Wet case +!$acc loop independent DO JL=1, ISIZE ZWETH(JL) = ZHAIL(JL) * & & MAX(0., SIGN(1., MAX(0., ZRDRYH_INIT(JL)-PRH_TEND(JL, IRIDRYH)-PRH_TEND(JL, IRSDRYH)-PRH_TEND(JL, IRGDRYH)) - & &MAX(0., ZRWETH_INIT(JL)-PRH_TEND(JL, IRIWETH)-PRH_TEND(JL, IRSWETH)-PRH_TEND(JL, IRGWETH)))) ENDDO IF(LNULLWETH) THEN + !$acc loop independent DO JL=1, ISIZE ZWETH(JL) = ZWETH(JL) * MAX(0., -SIGN(1., -ZRDRYH_INIT(JL))) ! WHERE(ZRDRYH_INIT(:)>0.) ENDDO ELSE + !$acc loop independent DO JL=1, ISIZE ZWETH(JL) = ZWETH(JL) * MAX(0., -SIGN(1., -ZRWETH_INIT(JL))) ! WHERE(ZRWETH_INIT(:)>0.) ENDDO @@ -713,6 +719,7 @@ IF(.NOT. LWETHPOST) THEN ZWETH(JL) = ZWETH(JL) * MAX(0., -SIGN(1., PT(JL)-XTT)) ! WHERE(PT(:)<XTT) ENDDO ENDIF +!$acc loop independent DO JL=1, ISIZE ZDRYH(JL) = ZHAIL(JL) * & & MAX(0., -SIGN(1., PT(JL)-XTT)) * & ! WHERE(PT(:)<XTT) diff --git a/src/MNH/ice4_fast_rs.f90 b/src/MNH/ice4_fast_rs.f90 index 94a0ad92c338d299574c904da60c39c317555690..4fb4434d67b3fd23aada400b895ea39b0fbbc745 100644 --- a/src/MNH/ice4_fast_rs.f90 +++ b/src/MNH/ice4_fast_rs.f90 @@ -287,12 +287,11 @@ DO JL=1, ISIZE &PRS_TEND(JL, IFREEZ2) * PRIAGGS(JL)) - & PRIAGGS(JL)) ENDDO -! +!$acc end kernels !* 5.1 cloud droplet riming of the aggregates ! IGRIM = 0 -!$acc end kernels -!$acc parallel loop private(idx) independent present_cr( I1, GRIM, PCOMPUTE, PRCT, PRST, XRTMIN, ZRIM ) +!$acc parallel loop private(idx) copy(IGRIM) independent present_cr( I1, GRIM, PCOMPUTE, PRCT, PRST, XRTMIN, ZRIM ) DO CONCURRENT( JJ = 1 : SIZE( GRIM ) ) ZRIM(JJ)=MAX(0., -SIGN(1., XRTMIN(2)-PRCT(JJ))) * & !WHERE(PRCT(:)>XRTMIN(2)) &MAX(0., -SIGN(1., XRTMIN(5)-PRST(JJ))) * & !WHERE(PRST(:)>XRTMIN(5)) @@ -461,12 +460,12 @@ DO JL=1, ISIZE PA_RG(JL) = PA_RG(JL) + PRCRIMSG(JL)+PRSRIMCG(JL) PA_TH(JL) = PA_TH(JL) + PRCRIMSG(JL)*(PLSFACT(JL)-PLVFACT(JL)) ENDDO +!$acc end kernels ! !* 5.2 rain accretion onto the aggregates ! IGACC = 0 -!$acc end kernels -!$acc parallel loop private(idx) independent present_cr( I1, GACC, PCOMPUTE, PRRT, PRST, XRTMIN, ZACC ) +!$acc parallel loop private(idx) copy(IGACC) independent present_cr( I1, GACC, PCOMPUTE, PRRT, PRST, XRTMIN, ZACC ) DO CONCURRENT( JJ = 1 : SIZE( GACC ) ) ZACC(JJ)=MAX(0., -SIGN(1., XRTMIN(3)-PRRT(JJ))) * & !WHERE(PRRT(:)>XRTMIN(3)) &MAX(0., -SIGN(1., XRTMIN(5)-PRST(JJ))) * & !WHERE(PRST(:)>XRTMIN(5)) diff --git a/src/MNH/ice4_rsrimcg_old.f90 b/src/MNH/ice4_rsrimcg_old.f90 index 6d4fb2fd3e097a3dda66de2319448bbf32aba4c9..8abfaed13e9f8cfec25b2a06a31a53af3382f637 100644 --- a/src/MNH/ice4_rsrimcg_old.f90 +++ b/src/MNH/ice4_rsrimcg_old.f90 @@ -137,27 +137,32 @@ CALL MNH_MEM_GET( zzw, size( prhodref ) ) ! !$acc kernels PRSRIMCG_MR(:)=0. +!$acc end kernels ! IF(.NOT. ODSOFT) THEN - IGRIM = 0 - GRIM(:) = .FALSE. -!$acc loop private(IDX) independent +!$acc kernels + GRIM(:) = .FALSE. +!$acc end kernels + IGRIM = 0 +!$acc parallel loop private(IDX) copy(IGRIM) independent DO JL = 1, SIZE(GRIM) IF ( PRCT(JL)>XRTMIN(2) .AND. PRST(JL)>XRTMIN(5) .AND. ODCOMPUTE(JL) .AND. PT(JL)<XTT ) THEN !$acc atomic capture IGRIM = IGRIM + 1 IDX = IGRIM !$acc end atomic - IVEC1(IDX) = Jl + IVEC1(IDX) = JL GRIM(JL) = .TRUE. END IF END DO ! +! acc end parallel loop IF(IGRIM>0) THEN + !$acc kernels ! ! 5.1.1 select the PLBDAS ! -!$acc loop independent + !$acc loop independent DO CONCURRENT( JL = 1 : IGRIM ) ZVEC1(JL) = PLBDAS(IVEC1(JL)) ! @@ -181,11 +186,11 @@ IF(.NOT. ODSOFT) THEN ZVEC1(JL) = XGAMINC_RIM2( IVEC2(JL)+1 )* ZVEC2(JL) & - XGAMINC_RIM2( IVEC2(JL) )*(ZVEC2(JL) - 1.0) END DO -!$acc loop independent + !$acc loop independent DO CONCURRENT( JL = 1 : size( prhodref ) ) ZZW(JL) = 0. END DO -!$acc loop independent + !$acc loop independent DO CONCURRENT( JL = 1 : IGRIM ) ZZW(IVEC1(JL)) = ZVEC1(JL) END DO @@ -193,7 +198,7 @@ IF(.NOT. ODSOFT) THEN ! 5.1.6 riming-conversion of the large sized aggregates into graupeln ! ! -!$acc loop independent + !$acc loop independent DO CONCURRENT( JL = 1 : size( prhodref ) ) IF ( GRIM(JL) ) THEN #ifndef MNH_BITREP @@ -205,8 +210,10 @@ IF(.NOT. ODSOFT) THEN PRSRIMCG_MR(:)=MIN(PRST(:), PRSRIMCG_MR(:)) END IF END DO - END IF -ENDIF + !$acc end kernels + END IF ! IGRIM +ENDIF ! ODSOFT +!$acc kernels PB_RS(:) = PB_RS(:) - PRSRIMCG_MR(:) PB_RG(:) = PB_RG(:) + PRSRIMCG_MR(:) !$acc end kernels diff --git a/src/MNH/ice4_sedimentation_split.f90 b/src/MNH/ice4_sedimentation_split.f90 index df9b7259447e37c126fdc1712cbb4728fc9663eb..255d6a6884e7ad396df21e9a2f39e88ab7cb68fb 100644 --- a/src/MNH/ice4_sedimentation_split.f90 +++ b/src/MNH/ice4_sedimentation_split.f90 @@ -92,6 +92,9 @@ USE MODE_MPPDB !Warning: intrinsics gamma does not give same results between CPU and GPU with NVHPC (tested with 22.2 version) USE MODI_GAMMA #endif +#if defined(TARGET_NV70) +USE MODI_GAMMA +#endif ! IMPLICIT NONE ! @@ -611,7 +614,7 @@ ZREMAINT(:,:) = PTSTEP ! DO WHILE (ANY(ZREMAINT>0.)) ISEDIM = 0 -!$acc parallel loop private(idx) independent +!$acc parallel loop private(idx) copy(ISEDIM) independent DO JK = KKTB,KKTE !$acc loop independent collapse(2) DO JJ = KJB,KJE diff --git a/src/MNH/ice_adjust.f90 b/src/MNH/ice_adjust.f90 index dc70a8b50853ed0b1c4cf7ee969df0e517b7a5d4..77b7e509c646f432c28d8566ebbf6205b1246645 100644 --- a/src/MNH/ice_adjust.f90 +++ b/src/MNH/ice_adjust.f90 @@ -410,25 +410,25 @@ CALL MNH_MEM_GET( ZTEMP_BUD , IIU, IJU, IKU ) #endif if ( lbudget_th ) then - !$acc kernels + !$acc kernels present_cr(ZTEMP_BUD) ZTEMP_BUD(:,:,:) = pths(:, :, :) * prhodj(:, :, :) !$acc end kernels call Budget_store_init( tbudgets(NBUDGET_TH), trim( hbuname ), ZTEMP_BUD(:,:,:) ) end if if ( lbudget_rv ) then - !$acc kernels + !$acc kernels present_cr(ZTEMP_BUD) ZTEMP_BUD(:,:,:) = prvs(:, :, :) * prhodj(:, :, :) !$acc end kernels call Budget_store_init( tbudgets(NBUDGET_RV), trim( hbuname ), ZTEMP_BUD(:,:,:) ) end if if ( lbudget_rc ) then - !$acc kernels + !$acc kernels present_cr(ZTEMP_BUD) ZTEMP_BUD(:,:,:) = prcs(:, :, :) * prhodj(:, :, :) !$acc end kernels call Budget_store_init( tbudgets(NBUDGET_RC), trim( hbuname ), ZTEMP_BUD(:,:,:) ) end if if ( lbudget_ri ) then - !$acc kernels + !$acc kernels present_cr(ZTEMP_BUD) ZTEMP_BUD(:,:,:) = pris(:, :, :) * prhodj(:, :, :) !$acc end kernels call Budget_store_init( tbudgets(NBUDGET_RI), trim( hbuname ), ZTEMP_BUD(:,:,:) ) @@ -524,10 +524,7 @@ ENDDO ! end of the iterative loop ! ------------------------------------------------- ! !$acc kernels -#ifdef MNH_COMPILER_NVHPC -!$acc loop independent collapse(3) -#endif -DO CONCURRENT (JI=1:IIU,JJ=1:IJU,JK=1:IKU) +!$mnh_do_concurrent (JI=1:IIU,JJ=1:IJU,JK=1:IKU) ! !* 5.0 compute the variation of mixing ratio @@ -562,7 +559,7 @@ PRVS(JI,JJ,JK) = PRVS(JI,JJ,JK) - ZW2(JI,JJ,JK) PRIS(JI,JJ,JK) = PRIS(JI,JJ,JK) + ZW2(JI,JJ,JK) PTHS(JI,JJ,JK) = PTHS(JI,JJ,JK) + & ZW2(JI,JJ,JK) * ZLS(JI,JJ,JK) / (ZCPH(JI,JJ,JK) * PEXNREF(JI,JJ,JK)) -ENDDO +!$mnh_end_do() !$acc end kernels ! @@ -571,14 +568,14 @@ ENDDO ! IF ( .NOT. OSUBG_COND ) THEN !$acc kernels present_cr(GTEMP) -DO CONCURRENT (JI=1:IIU,JJ=1:IJU,JK=1:IKU) +!$mnh_do_concurrent (JI=1:IIU,JJ=1:IJU,JK=1:IKU) GTEMP(JI,JJ,JK) = PRCS(JI,JJ,JK) + PRIS(JI,JJ,JK) > 1.E-12 / PTSTEP IF ( GTEMP(JI,JJ,JK) )THEN PCLDFR(JI,JJ,JK) = 1. ELSE PCLDFR(JI,JJ,JK) = 0. ENDIF -ENDDO +!$mnh_end_do() IF ( SIZE(PSRCS,3) /= 0 ) THEN PSRCS(:,:,:) = PCLDFR(:,:,:) END IF @@ -684,25 +681,25 @@ IF(GPOUT_TH) POUT_TH=ZT / PEXN(:,:,:) ! ---------------------- ! if ( lbudget_th ) then - !$acc kernels + !$acc kernels present_cr(ZTEMP_BUD) ZTEMP_BUD(:,:,:) = pths(:, :, :) * prhodj(:, :, :) !$acc end kernels call Budget_store_end( tbudgets(NBUDGET_TH), trim( hbuname ), ZTEMP_BUD(:,:,:) ) end if if ( lbudget_rv ) then - !$acc kernels + !$acc kernels present_cr(ZTEMP_BUD) ZTEMP_BUD(:,:,:) = prvs(:, :, :) * prhodj(:, :, :) !$acc end kernels call Budget_store_end( tbudgets(NBUDGET_RV), trim( hbuname ), ZTEMP_BUD(:,:,:) ) end if if ( lbudget_rc ) then - !$acc kernels + !$acc kernels present_cr(ZTEMP_BUD) ZTEMP_BUD(:,:,:) = prcs(:, :, :) * prhodj(:, :, :) !$acc end kernels call Budget_store_end( tbudgets(NBUDGET_RC), trim( hbuname ), ZTEMP_BUD(:,:,:) ) end if if ( lbudget_ri ) then - !$acc kernels + !$acc kernels present_cr(ZTEMP_BUD) ZTEMP_BUD(:,:,:) = pris(:, :, :) * prhodj(:, :, :) !$acc end kernels call Budget_store_end( tbudgets(NBUDGET_RI), trim( hbuname ), ZTEMP_BUD(:,:,:) ) diff --git a/src/MNH/mode_mnh_zwork.f90 b/src/MNH/mode_mnh_zwork.f90 index 2c8514dc26c76d8c5ec632fa6332ccd88880ae28..38b5e1c721bd97668f198c6f18be58f1d0eeb505 100644 --- a/src/MNH/mode_mnh_zwork.f90 +++ b/src/MNH/mode_mnh_zwork.f90 @@ -149,7 +149,7 @@ MODULE MODE_MNH_ZWORK !------ Real 1DFLAT pool - INTEGER, PARAMETER :: JPMAX_T1DFLAT_R = 120 !Used to determine max size of buffer ZT1DFLAT + INTEGER, PARAMETER :: JPMAX_T1DFLAT_R = 300 !Used to determine max size of buffer ZT1DFLAT !(3D size of the mesh * JPMAX_T1DFLAT_R) INTEGER, SAVE :: NPMAX_POOL_T1DFLAT_R = 250 !Maximum size of the pool (max number of arrays) INTEGER(KIND=MNHINT64), ALLOCATABLE, DIMENSION (:) :: NT1DFLAT_POOL_R !Position in ZT1DFLAT of the beginning of each array @@ -1539,6 +1539,7 @@ CONTAINS NTOT_GETSIZE_ZT1DFLAT = NTOT_GETSIZE_ZT1DFLAT + KSIZE IF ( NT1DFLAT_POS_R + KSIZE > NT1DFLAT_MAXSIZE_R ) THEN + print*,"MNH_GET_ZT1DFLAT ZT1DFLAT too small, JPMAX_T1DFLAT_R =" , JPMAX_T1DFLAT_R WRITE( YSIZE, '( I0 )' ) KSIZE WRITE( YAVAIL, '( I0 )' ) NT1DFLAT_MAXSIZE_R - NT1DFLAT_POS_R WRITE( YMAX, '( I0 )' ) NT1DFLAT_MAXSIZE_R diff --git a/src/MNH/mode_prandtl.f90 b/src/MNH/mode_prandtl.f90 index 50bf57bf14c697c5ee35b7c21b84c7055fe0249b..e8e8bb1cbafb99793e6025d2e8c44e1105982534 100644 --- a/src/MNH/mode_prandtl.f90 +++ b/src/MNH/mode_prandtl.f90 @@ -500,7 +500,7 @@ IF (LTURBDIM_3DIM) THEN D_PHI3DTDZ_O_DDTDZ(PPHI3,PREDTH1,PREDR1,PRED2TH3,PRED2THR3,HTURBDIM,OUSERV) ) #else CALL D_PHI3DTDZ_O_DDTDZ( PPHI3, PREDTH1, PREDR1, PRED2TH3, PRED2THR3, HTURBDIM, OUSERV, ZTMP1_DEVICE ) -!$acc kernels +!$acc kernels present_cr(PD_PHI3DTDZ2_O_DDTDZ) PD_PHI3DTDZ2_O_DDTDZ(:,:,:) = PDTDZ(:,:,:) * (PPHI3(:,:,:) + ZTMP1_DEVICE(:,:,:) ) !$acc end kernels #endif @@ -673,8 +673,10 @@ SUBROUTINE M3_WTH_W2TH(PREDTH1,PREDR1,PD,PKEFF,PTKE,PM3_WTH_W2TH) REAL, DIMENSION(SIZE(PD,1),SIZE(PD,2),SIZE(PD,3)),INTENT(OUT) :: PM3_WTH_W2TH #endif INTEGER :: IKB, IKE -#ifdef MNH_OPENACC +#ifndef MNH_OPENACC REAL, DIMENSION(:,:,:), allocatable :: ZTMP1_DEVICE +#else + REAL, DIMENSION(:,:,:), pointer , contiguous :: ZTMP1_DEVICE #endif !$acc data present( PREDTH1, PREDR1, PD, PKEFF, PTKE, PM3_WTH_W2TH ) @@ -683,11 +685,15 @@ SUBROUTINE M3_WTH_W2TH(PREDTH1,PREDR1,PD,PKEFF,PTKE,PM3_WTH_W2TH) call Print_msg( NVERB_WARNING, 'GEN', 'M3_WTH_W2TH', 'OpenACC: not yet tested' ) #endif -#ifdef MNH_OPENACC +#ifndef MNH_OPENACC allocate( ztmp1_device(size( pd, 1 ), size( pd, 2 ), size( pd, 3 ) ) ) +#else +!Pin positions in the pools of MNH memory + CALL MNH_MEM_POSITION_PIN() + CALL MNH_MEM_GET(ztmp1_device,size( pd, 1 ), size( pd, 2 ), size( pd, 3 ) ) #endif -!$acc data create( ztmp1_device ) +!$acc data present( ztmp1_device ) IKB = 1+JPVEXT_TURB IKE = SIZE(PD,3)-JPVEXT_TURB @@ -709,6 +715,11 @@ PM3_WTH_W2TH(:,:,IKE+1)=PM3_WTH_W2TH(:,:,IKE) !$acc end data !$acc end data +#ifdef MNH_OPENACC + !Release all memory allocated with MNH_MEM_GET calls since last call to MNH_MEM_POSITION_PIN + CALL MNH_MEM_RELEASE() +#endif + #ifndef MNH_OPENACC END FUNCTION M3_WTH_W2TH @@ -734,8 +745,10 @@ SUBROUTINE D_M3_WTH_W2TH_O_DDTDZ(PREDTH1,PREDR1,PD,PBLL_O_E,PETHETA,PKEFF,PTKE,P REAL, DIMENSION(SIZE(PD,1),SIZE(PD,2),SIZE(PD,3)),INTENT(OUT) :: PD_M3_WTH_W2TH_O_DDTDZ #endif INTEGER :: IKB, IKE -#ifdef MNH_OPENACC +#ifndef MNH_OPENACC REAL, DIMENSION(:,:,:), allocatable :: ZTMP1_DEVICE +#else +REAL, DIMENSION(:,:,:), pointer,contiguous :: ZTMP1_DEVICE #endif !$acc data present( PREDTH1, PREDR1, PD, PBLL_O_E, PETHETA, PKEFF, PTKE, PD_M3_WTH_W2TH_O_DDTDZ ) @@ -744,11 +757,15 @@ SUBROUTINE D_M3_WTH_W2TH_O_DDTDZ(PREDTH1,PREDR1,PD,PBLL_O_E,PETHETA,PKEFF,PTKE,P call Print_msg( NVERB_WARNING, 'GEN', 'D_M3_WTH_W2TH_O_DDTDZ', 'OpenACC: not yet tested' ) #endif -#ifdef MNH_OPENACC +#ifndef MNH_OPENACC allocate( ztmp1_device(size( pd, 1 ), size( pd, 2 ), size( pd, 3 ) ) ) +#else +CALL MNH_MEM_POSITION_PIN() + +CALL MNH_MEM_GET(ztmp1_device, size( pd, 1 ), size( pd, 2 ), size( pd, 3 ) ) #endif -!$acc data create( ztmp1_device ) +!$acc data present( ztmp1_device ) IKB = 1+JPVEXT_TURB IKE = SIZE(PD,3)-JPVEXT_TURB @@ -780,6 +797,11 @@ PD_M3_WTH_W2TH_O_DDTDZ(:,:,IKE+1)=PD_M3_WTH_W2TH_O_DDTDZ(:,:,IKE) !$acc end data !$acc end data +#ifdef MNH_OPENACC +!Release all memory allocated with MNH_MEM_GET calls since last call to MNH_MEM_POSITION_PIN +CALL MNH_MEM_RELEASE() + +#endif #ifndef MNH_OPENACC END FUNCTION D_M3_WTH_W2TH_O_DDTDZ @@ -804,8 +826,10 @@ SUBROUTINE M3_WTH_W2R(PD,PKEFF,PTKE,PBLL_O_E,PEMOIST,PDTDZ,PM3_WTH_W2R) REAL, DIMENSION(SIZE(PD,1),SIZE(PD,2),SIZE(PD,3)),INTENT(OUT) :: PM3_WTH_W2R #endif INTEGER :: IKB, IKE -#ifdef MNH_OPENACC +#ifndef MNH_OPENACC REAL, DIMENSION(:,:,:), allocatable :: ZTMP1_DEVICE +#else + REAL, DIMENSION(:,:,:), pointer,contiguous :: ZTMP1_DEVICE #endif !$acc data present( PD, PKEFF, PTKE, PBLL_O_E, PEMOIST, PDTDZ, PM3_WTH_W2R ) @@ -814,11 +838,15 @@ SUBROUTINE M3_WTH_W2R(PD,PKEFF,PTKE,PBLL_O_E,PEMOIST,PDTDZ,PM3_WTH_W2R) call Print_msg( NVERB_WARNING, 'GEN', 'M3_WTH_W2R', 'OpenACC: not yet tested' ) #endif -#ifdef MNH_OPENACC +#ifndef MNH_OPENACC allocate( ztmp1_device(size( pd, 1 ), size( pd, 2 ), size( pd, 3 ) ) ) +#else +CALL MNH_MEM_POSITION_PIN() + +CALL MNH_MEM_GET(ztmp1_device, size( pd, 1 ), size( pd, 2 ), size( pd, 3 ) ) #endif -!$acc data create( ztmp1_device ) +!$acc data present( ztmp1_device ) IKB = 1+JPVEXT_TURB IKE = SIZE(PD,3)-JPVEXT_TURB @@ -838,6 +866,12 @@ PM3_WTH_W2R(:,:,IKE+1)=PM3_WTH_W2R(:,:,IKE) !$acc end data !$acc end data +#ifdef MNH_OPENACC + +!Release all memory allocated with MNH_MEM_GET calls since last call to MNH_MEM_POSITION_PIN +CALL MNH_MEM_RELEASE() + +#endif #ifndef MNH_OPENACC END FUNCTION M3_WTH_W2R @@ -863,8 +897,10 @@ SUBROUTINE D_M3_WTH_W2R_O_DDTDZ(PREDTH1,PREDR1,PD,PKEFF,PTKE,PBLL_O_E,PEMOIST,PD REAL, DIMENSION(SIZE(PD,1),SIZE(PD,2),SIZE(PD,3)),INTENT(OUT) :: PD_M3_WTH_W2R_O_DDTDZ #endif INTEGER :: IKB, IKE -#ifdef MNH_OPENACC +#ifndef MNH_OPENACC REAL, DIMENSION(:,:,:), allocatable :: ZTMP1_DEVICE + #else + REAL, DIMENSION(:,:,:), pointer,contiguous :: ZTMP1_DEVICE #endif !$acc data present( PREDTH1, PREDR1, PD, PKEFF, PTKE, PBLL_O_E, PEMOIST, PD_M3_WTH_W2R_O_DDTDZ ) @@ -873,11 +909,16 @@ SUBROUTINE D_M3_WTH_W2R_O_DDTDZ(PREDTH1,PREDR1,PD,PKEFF,PTKE,PBLL_O_E,PEMOIST,PD call Print_msg( NVERB_WARNING, 'GEN', 'D_M3_WTH_W2R_O_DDTDZ', 'OpenACC: not yet tested' ) #endif -#ifdef MNH_OPENACC +#ifndef MNH_OPENACC allocate( ztmp1_device(size( pd, 1 ), size( pd, 2 ), size( pd, 3 ) ) ) +#else +!Pin positions in the pools of MNH memory +CALL MNH_MEM_POSITION_PIN() + +CALL MNH_MEM_GET(ztmp1_device,size( pd, 1 ), size( pd, 2 ), size( pd, 3 ) ) #endif -!$acc data create( ztmp1_device ) +!$acc data present( ztmp1_device ) IKB = 1+JPVEXT_TURB IKE = SIZE(PD,3)-JPVEXT_TURB @@ -899,6 +940,10 @@ PD_M3_WTH_W2R_O_DDTDZ(:,:,IKE+1)=PD_M3_WTH_W2R_O_DDTDZ(:,:,IKE) !$acc end data !$acc end data +#ifdef MNH_OPENACC +!Release all memory allocated with MNH_MEM_GET calls since last call to MNH_MEM_POSITION_PIN +CALL MNH_MEM_RELEASE() +#endif #ifndef MNH_OPENACC END FUNCTION D_M3_WTH_W2R_O_DDTDZ @@ -926,8 +971,10 @@ SUBROUTINE M3_WTH_WR2(PD,PKEFF,PTKE,PSQRT_TKE,PBLL_O_E,PBETA,PLEPS,PEMOIST,PDTDZ REAL, DIMENSION(SIZE(PD,1),SIZE(PD,2),SIZE(PD,3)),INTENT(OUT) :: PM3_WTH_WR2 #endif INTEGER :: IKB, IKE -#ifdef MNH_OPENACC +#ifndef MNH_OPENACC REAL, DIMENSION(:,:,:), allocatable :: ZTMP1_DEVICE, ZTMP2_DEVICE + #else + REAL, DIMENSION(:,:,:), pointer,contiguous :: ZTMP1_DEVICE, ZTMP2_DEVICE #endif !$acc data present( PD, PKEFF, PTKE, PSQRT_TKE, PBLL_O_E, PBETA, PLEPS, PEMOIST, PDTDZ, PM3_WTH_WR2 ) @@ -936,12 +983,18 @@ SUBROUTINE M3_WTH_WR2(PD,PKEFF,PTKE,PSQRT_TKE,PBLL_O_E,PBETA,PLEPS,PEMOIST,PDTDZ call Print_msg( NVERB_WARNING, 'GEN', 'M3_WTH_WR2', 'OpenACC: not yet tested' ) #endif -#ifdef MNH_OPENACC +#ifndef MNH_OPENACC allocate( ztmp1_device(size( pd, 1 ), size( pd, 2 ), size( pd, 3 ) ) ) allocate( ztmp2_device(size( pd, 1 ), size( pd, 2 ), size( pd, 3 ) ) ) +#else +!Pin positions in the pools of MNH memory +CALL MNH_MEM_POSITION_PIN() + +CALL MNH_MEM_GET(ztmp1_device,size( pd, 1 ), size( pd, 2 ), size( pd, 3 )) +CALL MNH_MEM_GET(ztmp2_device,size( pd, 1 ), size( pd, 2 ), size( pd, 3 )) #endif -!$acc data create( ztmp1_device, ztmp2_device ) +!$acc data present( ztmp1_device, ztmp2_device ) IKB = 1+JPVEXT_TURB IKE = SIZE(PD,3)-JPVEXT_TURB @@ -974,6 +1027,10 @@ PM3_WTH_WR2(:,:,IKE+1)=PM3_WTH_WR2(:,:,IKE) !$acc end data !$acc end data +#ifdef MNH_OPENACC +!Release all memory allocated with MNH_MEM_GET calls since last call to MNH_MEM_POSITION_PIN +CALL MNH_MEM_RELEASE() +#endif #ifndef MNH_OPENACC END FUNCTION M3_WTH_WR2 @@ -1004,8 +1061,10 @@ SUBROUTINE D_M3_WTH_WR2_O_DDTDZ(PREDTH1,PREDR1,PD,PKEFF,PTKE,PSQRT_TKE,PBLL_O_E, REAL, DIMENSION(SIZE(PD,1),SIZE(PD,2),SIZE(PD,3)),INTENT(OUT) :: PD_M3_WTH_WR2_O_DDTDZ #endif INTEGER :: IKB, IKE -#ifdef MNH_OPENACC +#ifndef MNH_OPENACC REAL, DIMENSION(:,:,:), allocatable :: ZTMP1_DEVICE, ZTMP2_DEVICE + #else +REAL, DIMENSION(:,:,:), pointer,contiguous :: ZTMP1_DEVICE, ZTMP2_DEVICE #endif !$acc data present( PTKE, PSQRT_TKE, PBETA, PLEPS, PREDTH1, PREDR1, PD, PKEFF, PBLL_O_E, PEMOIST, PD_M3_WTH_WR2_O_DDTDZ ) @@ -1014,12 +1073,18 @@ SUBROUTINE D_M3_WTH_WR2_O_DDTDZ(PREDTH1,PREDR1,PD,PKEFF,PTKE,PSQRT_TKE,PBLL_O_E, call Print_msg( NVERB_WARNING, 'GEN', 'D_M3_WTH_WR2_O_DDTDZ', 'OpenACC: not yet tested' ) #endif -#ifdef MNH_OPENACC +#ifndef MNH_OPENACC allocate( ztmp1_device(size( pd, 1 ), size( pd, 2 ), size( pd, 3 ) ) ) allocate( ztmp2_device(size( pd, 1 ), size( pd, 2 ), size( pd, 3 ) ) ) +#else +!Pin positions in the pools of MNH memory +CALL MNH_MEM_POSITION_PIN() + +CALL MNH_MEM_GET( ztmp1_device,size( pd, 1 ), size( pd, 2 ), size( pd, 3 ) ) +CALL MNH_MEM_GET( ztmp2_device,size( pd, 1 ), size( pd, 2 ), size( pd, 3 ) ) #endif -!$acc data create( ztmp1_device, ztmp2_device ) +!$acc data present( ztmp1_device, ztmp2_device ) IKB = 1+JPVEXT_TURB IKE = SIZE(PD,3)-JPVEXT_TURB @@ -1054,6 +1119,10 @@ PD_M3_WTH_WR2_O_DDTDZ(:,:,IKE+1)=PD_M3_WTH_WR2_O_DDTDZ(:,:,IKE) !$acc end data !$acc end data +#ifdef MNH_OPENACC +!Release all memory allocated with MNH_MEM_GET calls since last call to MNH_MEM_POSITION_PIN +CALL MNH_MEM_RELEASE() +#endif #ifndef MNH_OPENACC END FUNCTION D_M3_WTH_WR2_O_DDTDZ @@ -1080,8 +1149,11 @@ SUBROUTINE M3_WTH_WTHR(PREDR1,PD,PKEFF,PTKE,PSQRT_TKE,PBETA,PLEPS,PEMOIST,PM3_WT REAL, DIMENSION(SIZE(PREDR1,1),SIZE(PREDR1,2),SIZE(PREDR1,3)),INTENT(OUT) :: PM3_WTH_WTHR #endif INTEGER :: IKB, IKE -#ifdef MNH_OPENACC +#ifndef MNH_OPENACC REAL, DIMENSION(:,:,:), allocatable :: ZTMP1_DEVICE, ZTMP2_DEVICE +#else +REAL, DIMENSION(:,:,:), pointer,contiguous :: ZTMP1_DEVICE, ZTMP2_DEVICE + #endif !$acc data present( PREDR1, PD, PKEFF, PTKE, PSQRT_TKE, PBETA, PLEPS, PEMOIST, PM3_WTH_WTHR ) @@ -1090,12 +1162,19 @@ SUBROUTINE M3_WTH_WTHR(PREDR1,PD,PKEFF,PTKE,PSQRT_TKE,PBETA,PLEPS,PEMOIST,PM3_WT call Print_msg( NVERB_WARNING, 'GEN', 'M3_WTH_WTHR', 'OpenACC: not yet tested' ) #endif -#ifdef MNH_OPENACC +#ifndef MNH_OPENACC allocate( ztmp1_device(size( pd, 1 ), size( pd, 2 ), size( pd, 3 ) ) ) allocate( ztmp2_device(size( pd, 1 ), size( pd, 2 ), size( pd, 3 ) ) ) +#else +!Pin positions in the pools of MNH memory +CALL MNH_MEM_POSITION_PIN() + +CALL MNH_MEM_GET( ztmp1_device,size( pd, 1 ), size( pd, 2 ), size( pd, 3 ) ) +CALL MNH_MEM_GET( ztmp2_device,size( pd, 1 ), size( pd, 2 ), size( pd, 3 ) ) + #endif -!$acc data create( ztmp1_device, ztmp2_device ) +!$acc data present( ztmp1_device, ztmp2_device ) IKB = 1+JPVEXT_TURB IKE = SIZE(PD,3)-JPVEXT_TURB @@ -1119,6 +1198,10 @@ PM3_WTH_WTHR(:,:,IKE+1)=PM3_WTH_WTHR(:,:,IKE) !$acc end data !$acc end data +#ifdef MNH_OPENACC +!Release all memory allocated with MNH_MEM_GET calls since last call to MNH_MEM_POSITION_PIN +CALL MNH_MEM_RELEASE() +#endif #ifndef MNH_OPENACC END FUNCTION M3_WTH_WTHR @@ -1185,8 +1268,11 @@ SUBROUTINE M3_TH2_W2TH(PREDTH1,PREDR1,PD,PDTDZ,PLM,PLEPS,PTKE,PM3_TH2_W2TH) REAL, DIMENSION(SIZE(PD,1),SIZE(PD,2),SIZE(PD,3)),INTENT(OUT) :: PM3_TH2_W2TH #endif INTEGER :: IKB, IKE -#ifdef MNH_OPENACC +#ifndef MNH_OPENACC REAL, DIMENSION(:,:,:), allocatable :: ZTMP1_DEVICE, ZTMP2_DEVICE + #else +REAL, DIMENSION(:,:,:), pointer,contiguous :: ZTMP1_DEVICE, ZTMP2_DEVICE + #endif !$acc data present( PREDTH1, PREDR1, PD, PDTDZ, PLM, PLEPS, PTKE, PM3_TH2_W2TH ) @@ -1195,12 +1281,18 @@ SUBROUTINE M3_TH2_W2TH(PREDTH1,PREDR1,PD,PDTDZ,PLM,PLEPS,PTKE,PM3_TH2_W2TH) call Print_msg( NVERB_WARNING, 'GEN', 'M3_TH2_W2TH', 'OpenACC: not yet tested' ) #endif -#ifdef MNH_OPENACC +#ifndef MNH_OPENACC allocate( ztmp1_device(size( pd, 1 ), size( pd, 2 ), size( pd, 3 ) ) ) allocate( ztmp2_device(size( pd, 1 ), size( pd, 2 ), size( pd, 3 ) ) ) +#else +!Pin positions in the pools of MNH memory +CALL MNH_MEM_POSITION_PIN() + +CALL MNH_MEM_GET( ztmp1_device,size( pd, 1 ), size( pd, 2 ), size( pd, 3 ) ) +CALL MNH_MEM_GET( ztmp2_device,size( pd, 1 ), size( pd, 2 ), size( pd, 3 ) ) #endif -!$acc data create( ztmp1_device, ztmp2_device ) +!$acc data present( ztmp1_device, ztmp2_device ) IKB = 1+JPVEXT_TURB IKE = SIZE(PD,3)-JPVEXT_TURB @@ -1225,6 +1317,10 @@ PM3_TH2_W2TH(:,:,IKE+1)=PM3_TH2_W2TH(:,:,IKE) !$acc end data !$acc end data +#ifdef MNH_OPENACC +!Release all memory allocated with MNH_MEM_GET calls since last call to MNH_MEM_POSITION_PIN +CALL MNH_MEM_RELEASE() +#endif #ifndef MNH_OPENACC END FUNCTION M3_TH2_W2TH @@ -1250,8 +1346,10 @@ SUBROUTINE D_M3_TH2_W2TH_O_DDTDZ(PREDTH1,PREDR1,PD,PLM,PLEPS,PTKE,OUSERV,PD_M3_T REAL, DIMENSION(SIZE(PD,1),SIZE(PD,2),SIZE(PD,3)),INTENT(OUT) :: PD_M3_TH2_W2TH_O_DDTDZ #endif INTEGER :: IKB, IKE -#ifdef MNH_OPENACC +#ifndef MNH_OPENACC REAL, DIMENSION(:,:,:), allocatable :: ZTMP1_DEVICE, ZTMP2_DEVICE +#else +REAL, DIMENSION(:,:,:), pointer,contiguous :: ZTMP1_DEVICE, ZTMP2_DEVICE #endif !$acc data present( PREDTH1, PREDR1, PD, PLM, PLEPS, PTKE, PD_M3_TH2_W2TH_O_DDTDZ ) @@ -1260,12 +1358,18 @@ SUBROUTINE D_M3_TH2_W2TH_O_DDTDZ(PREDTH1,PREDR1,PD,PLM,PLEPS,PTKE,OUSERV,PD_M3_T call Print_msg( NVERB_WARNING, 'GEN', 'D_M3_TH2_W2TH_O_DDTDZ', 'OpenACC: not yet tested' ) #endif -#ifdef MNH_OPENACC +#ifndef MNH_OPENACC allocate( ztmp1_device(size( pd, 1 ), size( pd, 2 ), size( pd, 3 ) ) ) allocate( ztmp2_device(size( pd, 1 ), size( pd, 2 ), size( pd, 3 ) ) ) +#else +!Pin positions in the pools of MNH memory +CALL MNH_MEM_POSITION_PIN() + +CALL MNH_MEM_GET( ztmp1_device,size( pd, 1 ), size( pd, 2 ), size( pd, 3 ) ) +CALL MNH_MEM_GET( ztmp2_device,size( pd, 1 ), size( pd, 2 ), size( pd, 3 ) ) #endif -!$acc data create( ztmp1_device, ztmp2_device ) +!$acc data present( ztmp1_device, ztmp2_device ) IKB = 1+JPVEXT_TURB IKE = SIZE(PD,3)-JPVEXT_TURB @@ -1325,6 +1429,10 @@ PD_M3_TH2_W2TH_O_DDTDZ(:,:,IKE+1)=PD_M3_TH2_W2TH_O_DDTDZ(:,:,IKE) !$acc end data !$acc end data +#ifdef MNH_OPENACC +!Release all memory allocated with MNH_MEM_GET calls since last call to MNH_MEM_POSITION_PIN +CALL MNH_MEM_RELEASE() +#endif #ifndef MNH_OPENACC END FUNCTION D_M3_TH2_W2TH_O_DDTDZ @@ -1348,8 +1456,10 @@ SUBROUTINE M3_TH2_WTH2(PREDTH1,PREDR1,PD,PLEPS,PSQRT_TKE,PM3_TH2_WTH2) REAL, DIMENSION(SIZE(PD,1),SIZE(PD,2),SIZE(PD,3)),INTENT(OUT) :: PM3_TH2_WTH2 #endif INTEGER :: IKB, IKE -#ifdef MNH_OPENACC +#ifndef MNH_OPENACC REAL, DIMENSION(:,:,:), allocatable :: ZTMP1_DEVICE, ZTMP2_DEVICE + #else +REAL, DIMENSION(:,:,:), pointer,contiguous :: ZTMP1_DEVICE, ZTMP2_DEVICE #endif !$acc data present( PREDTH1, PREDR1, PD, PLEPS, PSQRT_TKE, PM3_TH2_WTH2 ) @@ -1358,12 +1468,18 @@ SUBROUTINE M3_TH2_WTH2(PREDTH1,PREDR1,PD,PLEPS,PSQRT_TKE,PM3_TH2_WTH2) call Print_msg( NVERB_WARNING, 'GEN', 'M3_TH2_WTH2', 'OpenACC: not yet tested' ) #endif -#ifdef MNH_OPENACC +#ifndef MNH_OPENACC allocate( ztmp1_device(size( pd, 1 ), size( pd, 2 ), size( pd, 3 ) ) ) allocate( ztmp2_device(size( pd, 1 ), size( pd, 2 ), size( pd, 3 ) ) ) +#else +!Pin positions in the pools of MNH memory +CALL MNH_MEM_POSITION_PIN() + +CALL MNH_MEM_GET( ztmp1_device,size( pd, 1 ), size( pd, 2 ), size( pd, 3 ) ) +CALL MNH_MEM_GET( ztmp2_device,size( pd, 1 ), size( pd, 2 ), size( pd, 3 ) ) #endif -!$acc data create( ztmp1_device, ztmp2_device ) +!$acc data present( ztmp1_device, ztmp2_device ) IKB = 1+JPVEXT_TURB IKE = SIZE(PD,3)-JPVEXT_TURB @@ -1395,6 +1511,10 @@ PM3_TH2_WTH2(:,:,IKE+1)=PM3_TH2_WTH2(:,:,IKE) !$acc end data !$acc end data +#ifdef MNH_OPENACC +!Release all memory allocated with MNH_MEM_GET calls since last call to MNH_MEM_POSITION_PIN +CALL MNH_MEM_RELEASE() +#endif #ifndef MNH_OPENACC END FUNCTION M3_TH2_WTH2 @@ -1420,8 +1540,11 @@ SUBROUTINE D_M3_TH2_WTH2_O_DDTDZ(PREDTH1,PREDR1,PD,PLEPS,PSQRT_TKE,PBLL_O_E,PETH REAL, DIMENSION(SIZE(PD,1),SIZE(PD,2),SIZE(PD,3)),INTENT(OUT) :: PD_M3_TH2_WTH2_O_DDTDZ #endif INTEGER :: IKB, IKE -#ifdef MNH_OPENACC +#ifndef MNH_OPENACC REAL, DIMENSION(:,:,:), allocatable :: ZTMP1_DEVICE, ZTMP2_DEVICE + #else +REAL, DIMENSION(:,:,:), pointer,contiguous :: ZTMP1_DEVICE, ZTMP2_DEVICE + #endif !$acc data present( PREDTH1, PREDR1, PD, PLEPS, PSQRT_TKE, PBLL_O_E, PETHETA, PD_M3_TH2_WTH2_O_DDTDZ ) @@ -1430,12 +1553,19 @@ SUBROUTINE D_M3_TH2_WTH2_O_DDTDZ(PREDTH1,PREDR1,PD,PLEPS,PSQRT_TKE,PBLL_O_E,PETH call Print_msg( NVERB_WARNING, 'GEN', 'D_M3_TH2_WTH2_O_DDTDZ', 'OpenACC: not yet tested' ) #endif -#ifdef MNH_OPENACC +#ifndef MNH_OPENACC allocate( ztmp1_device(size( pd, 1 ), size( pd, 2 ), size( pd, 3 ) ) ) allocate( ztmp2_device(size( pd, 1 ), size( pd, 2 ), size( pd, 3 ) ) ) +#else +!Pin positions in the pools of MNH memory +CALL MNH_MEM_POSITION_PIN() + +CALL MNH_MEM_GET( ztmp1_device,size( pd, 1 ), size( pd, 2 ), size( pd, 3 ) ) +CALL MNH_MEM_GET( ztmp2_device,size( pd, 1 ), size( pd, 2 ), size( pd, 3 ) ) + #endif -!$acc data create( ztmp1_device, ztmp2_device ) +!$acc data present( ztmp1_device, ztmp2_device ) IKB = 1+JPVEXT_TURB IKE = SIZE(PD,3)-JPVEXT_TURB @@ -1471,6 +1601,10 @@ PD_M3_TH2_WTH2_O_DDTDZ(:,:,IKE+1)=PD_M3_TH2_WTH2_O_DDTDZ(:,:,IKE) !$acc end data !$acc end data +#ifdef MNH_OPENACC +!Release all memory allocated with MNH_MEM_GET calls since last call to MNH_MEM_POSITION_PIN +CALL MNH_MEM_RELEASE() +#endif #ifndef MNH_OPENACC END FUNCTION D_M3_TH2_WTH2_O_DDTDZ @@ -1496,8 +1630,11 @@ SUBROUTINE M3_TH2_W2R(PD,PLM,PLEPS,PTKE,PBLL_O_E,PEMOIST,PDTDZ,PM3_TH2_W2R) REAL, DIMENSION(SIZE(PD,1),SIZE(PD,2),SIZE(PD,3)),INTENT(OUT) :: PM3_TH2_W2R #endif INTEGER :: IKB, IKE -#ifdef MNH_OPENACC +#ifndef MNH_OPENACC REAL, DIMENSION(:,:,:), allocatable :: ZTMP1_DEVICE, ZTMP2_DEVICE + #else +REAL, DIMENSION(:,:,:), pointer,contiguous :: ZTMP1_DEVICE, ZTMP2_DEVICE + #endif !$acc data present( PD, PLM, PLEPS, PTKE, PBLL_O_E, PEMOIST, PDTDZ, PM3_TH2_W2R ) @@ -1506,12 +1643,19 @@ SUBROUTINE M3_TH2_W2R(PD,PLM,PLEPS,PTKE,PBLL_O_E,PEMOIST,PDTDZ,PM3_TH2_W2R) call Print_msg( NVERB_WARNING, 'GEN', 'M3_TH2_W2R', 'OpenACC: not yet tested' ) #endif -#ifdef MNH_OPENACC +#ifndef MNH_OPENACC allocate( ztmp1_device(size( pd, 1 ), size( pd, 2 ), size( pd, 3 ) ) ) allocate( ztmp2_device(size( pd, 1 ), size( pd, 2 ), size( pd, 3 ) ) ) +#else +!Pin positions in the pools of MNH memory +CALL MNH_MEM_POSITION_PIN() + +CALL MNH_MEM_GET( ztmp1_device,size( pd, 1 ), size( pd, 2 ), size( pd, 3 ) ) +CALL MNH_MEM_GET( ztmp2_device,size( pd, 1 ), size( pd, 2 ), size( pd, 3 ) ) + #endif -!$acc data create( ztmp1_device, ztmp2_device ) +!$acc data present( ztmp1_device, ztmp2_device ) IKB = 1+JPVEXT_TURB IKE = SIZE(PD,3)-JPVEXT_TURB @@ -1546,6 +1690,10 @@ PM3_TH2_W2R(:,:,IKE+1)=PM3_TH2_W2R(:,:,IKE) !$acc end data !$acc end data +#ifdef MNH_OPENACC +!Release all memory allocated with MNH_MEM_GET calls since last call to MNH_MEM_POSITION_PIN +CALL MNH_MEM_RELEASE() +#endif #ifndef MNH_OPENACC END FUNCTION M3_TH2_W2R @@ -1573,8 +1721,10 @@ SUBROUTINE D_M3_TH2_W2R_O_DDTDZ(PREDTH1,PREDR1,PD,PLM,PLEPS,PTKE,PBLL_O_E,PEMOIS REAL, DIMENSION(SIZE(PD,1),SIZE(PD,2),SIZE(PD,3)),INTENT(OUT) :: PD_M3_TH2_W2R_O_DDTDZ #endif INTEGER :: IKB, IKE -#ifdef MNH_OPENACC +#ifndef MNH_OPENACC REAL, DIMENSION(:,:,:), allocatable :: ZTMP1_DEVICE, ZTMP2_DEVICE +#else +REAL, DIMENSION(:,:,:), pointer,contiguous :: ZTMP1_DEVICE, ZTMP2_DEVICE #endif !$acc data present( PREDTH1, PREDR1, PD, PLM, PLEPS, PTKE, PBLL_O_E, PEMOIST, PDTDZ, PD_M3_TH2_W2R_O_DDTDZ ) @@ -1583,12 +1733,19 @@ SUBROUTINE D_M3_TH2_W2R_O_DDTDZ(PREDTH1,PREDR1,PD,PLM,PLEPS,PTKE,PBLL_O_E,PEMOIS call Print_msg( NVERB_WARNING, 'GEN', 'D_M3_TH2_W2R_O_DDTDZ', 'OpenACC: not yet tested' ) #endif -#ifdef MNH_OPENACC +#ifndef MNH_OPENACC allocate( ztmp1_device(size( pd, 1 ), size( pd, 2 ), size( pd, 3 ) ) ) allocate( ztmp2_device(size( pd, 1 ), size( pd, 2 ), size( pd, 3 ) ) ) +#else +!Pin positions in the pools of MNH memory +CALL MNH_MEM_POSITION_PIN() + +CALL MNH_MEM_GET( ztmp1_device,size( pd, 1 ), size( pd, 2 ), size( pd, 3 ) ) +CALL MNH_MEM_GET( ztmp2_device,size( pd, 1 ), size( pd, 2 ), size( pd, 3 ) ) + #endif -!$acc data create( ztmp1_device, ztmp2_device ) +!$acc data present( ztmp1_device, ztmp2_device ) IKB = 1+JPVEXT_TURB IKE = SIZE(PD,3)-JPVEXT_TURB @@ -1621,6 +1778,10 @@ PD_M3_TH2_W2R_O_DDTDZ(:,:,IKE+1)=PD_M3_TH2_W2R_O_DDTDZ(:,:,IKE) !$acc end data !$acc end data +#ifdef MNH_OPENACC +!Release all memory allocated with MNH_MEM_GET calls since last call to MNH_MEM_POSITION_PIN +CALL MNH_MEM_RELEASE() +#endif #ifndef MNH_OPENACC END FUNCTION D_M3_TH2_W2R_O_DDTDZ @@ -1645,8 +1806,10 @@ SUBROUTINE M3_TH2_WR2(PD,PLEPS,PSQRT_TKE,PBLL_O_E,PEMOIST,PDTDZ,PM3_TH2_WR2) REAL, DIMENSION(SIZE(PD,1),SIZE(PD,2),SIZE(PD,3)),INTENT(OUT) :: PM3_TH2_WR2 #endif INTEGER :: IKB, IKE -#ifdef MNH_OPENACC +#ifndef MNH_OPENACC REAL, DIMENSION(:,:,:), allocatable :: ZTMP1_DEVICE, ZTMP2_DEVICE +#else +REAL, DIMENSION(:,:,:), pointer,contiguous :: ZTMP1_DEVICE, ZTMP2_DEVICE #endif !$acc data present( PD, PLEPS, PSQRT_TKE, PBLL_O_E, PEMOIST, PDTDZ, PM3_TH2_WR2 ) @@ -1655,12 +1818,19 @@ SUBROUTINE M3_TH2_WR2(PD,PLEPS,PSQRT_TKE,PBLL_O_E,PEMOIST,PDTDZ,PM3_TH2_WR2) call Print_msg( NVERB_WARNING, 'GEN', 'M3_TH2_WR2', 'OpenACC: not yet tested' ) #endif -#ifdef MNH_OPENACC +#ifndef MNH_OPENACC allocate( ztmp1_device(size( pd, 1 ), size( pd, 2 ), size( pd, 3 ) ) ) allocate( ztmp2_device(size( pd, 1 ), size( pd, 2 ), size( pd, 3 ) ) ) +#else +!Pin positions in the pools of MNH memory +CALL MNH_MEM_POSITION_PIN() + +CALL MNH_MEM_GET( ztmp1_device,size( pd, 1 ), size( pd, 2 ), size( pd, 3 ) ) +CALL MNH_MEM_GET( ztmp2_device,size( pd, 1 ), size( pd, 2 ), size( pd, 3 ) ) + #endif -!$acc data create( ztmp1_device, ztmp2_device ) +!$acc data present( ztmp1_device, ztmp2_device ) IKB = 1+JPVEXT_TURB IKE = SIZE(PD,3)-JPVEXT_TURB @@ -1695,6 +1865,10 @@ PM3_TH2_WR2(:,:,IKE+1)=PM3_TH2_WR2(:,:,IKE) !$acc end data !$acc end data +#ifdef MNH_OPENACC +!Release all memory allocated with MNH_MEM_GET calls since last call to MNH_MEM_POSITION_PIN +CALL MNH_MEM_RELEASE() +#endif #ifndef MNH_OPENACC END FUNCTION M3_TH2_WR2 @@ -1721,8 +1895,10 @@ SUBROUTINE D_M3_TH2_WR2_O_DDTDZ(PREDTH1,PREDR1,PD,PLEPS,PSQRT_TKE,PBLL_O_E,PEMOI REAL, DIMENSION(SIZE(PD,1),SIZE(PD,2),SIZE(PD,3)),INTENT(OUT) :: PD_M3_TH2_WR2_O_DDTDZ #endif INTEGER :: IKB, IKE -#ifdef MNH_OPENACC +#ifndef MNH_OPENACC REAL, DIMENSION(:,:,:), allocatable :: ZTMP1_DEVICE, ZTMP2_DEVICE +#else +REAL, DIMENSION(:,:,:), pointer,contiguous :: ZTMP1_DEVICE, ZTMP2_DEVICE #endif !$acc data present( PREDTH1, PREDR1, PD, PLEPS, PSQRT_TKE, PBLL_O_E, PEMOIST, PDTDZ, PD_M3_TH2_WR2_O_DDTDZ ) @@ -1730,12 +1906,19 @@ SUBROUTINE D_M3_TH2_WR2_O_DDTDZ(PREDTH1,PREDR1,PD,PLEPS,PSQRT_TKE,PBLL_O_E,PEMOI call Print_msg( NVERB_WARNING, 'GEN', 'D_M3_TH2_WR2_O_DDTDZ', 'OpenACC: not yet tested' ) #endif -#ifdef MNH_OPENACC +#ifndef MNH_OPENACC allocate( ztmp1_device(size( pd, 1 ), size( pd, 2 ), size( pd, 3 ) ) ) allocate( ztmp2_device(size( pd, 1 ), size( pd, 2 ), size( pd, 3 ) ) ) +#else +!Pin positions in the pools of MNH memory +CALL MNH_MEM_POSITION_PIN() + +CALL MNH_MEM_GET( ztmp1_device,size( pd, 1 ), size( pd, 2 ), size( pd, 3 ) ) +CALL MNH_MEM_GET( ztmp2_device,size( pd, 1 ), size( pd, 2 ), size( pd, 3 ) ) + #endif -!$acc data create( ztmp1_device, ztmp2_device ) +!$acc data present( ztmp1_device, ztmp2_device ) IKB = 1+JPVEXT_TURB IKE = SIZE(PD,3)-JPVEXT_TURB @@ -1773,6 +1956,10 @@ PD_M3_TH2_WR2_O_DDTDZ(:,:,IKE+1)=PD_M3_TH2_WR2_O_DDTDZ(:,:,IKE) !$acc end data !$acc end data +#ifdef MNH_OPENACC +!Release all memory allocated with MNH_MEM_GET calls since last call to MNH_MEM_POSITION_PIN +CALL MNH_MEM_RELEASE() +#endif #ifndef MNH_OPENACC END FUNCTION D_M3_TH2_WR2_O_DDTDZ @@ -1798,8 +1985,10 @@ SUBROUTINE M3_TH2_WTHR(PREDR1,PD,PLEPS,PSQRT_TKE,PBLL_O_E,PEMOIST,PDTDZ,PM3_TH2_ REAL, DIMENSION(SIZE(PD,1),SIZE(PD,2),SIZE(PD,3)),INTENT(OUT) :: PM3_TH2_WTHR #endif INTEGER :: IKB, IKE -#ifdef MNH_OPENACC +#ifndef MNH_OPENACC REAL, DIMENSION(:,:,:), allocatable :: ZTMP1_DEVICE, ZTMP2_DEVICE +#else +REAL, DIMENSION(:,:,:), pointer,contiguous :: ZTMP1_DEVICE, ZTMP2_DEVICE #endif !$acc data present( PREDR1, PD, PLEPS, PSQRT_TKE, PBLL_O_E, PEMOIST, PDTDZ, PM3_TH2_WTHR ) @@ -1808,12 +1997,19 @@ SUBROUTINE M3_TH2_WTHR(PREDR1,PD,PLEPS,PSQRT_TKE,PBLL_O_E,PEMOIST,PDTDZ,PM3_TH2_ call Print_msg( NVERB_WARNING, 'GEN', 'M3_TH2_WTHR', 'OpenACC: not yet tested' ) #endif -#ifdef MNH_OPENACC +#ifndef MNH_OPENACC allocate( ztmp1_device(size( pd, 1 ), size( pd, 2 ), size( pd, 3 ) ) ) allocate( ztmp2_device(size( pd, 1 ), size( pd, 2 ), size( pd, 3 ) ) ) +#else +!Pin positions in the pools of MNH memory +CALL MNH_MEM_POSITION_PIN() + +CALL MNH_MEM_GET( ztmp1_device,size( pd, 1 ), size( pd, 2 ), size( pd, 3 ) ) +CALL MNH_MEM_GET( ztmp2_device,size( pd, 1 ), size( pd, 2 ), size( pd, 3 ) ) + #endif -!$acc data create( ztmp1_device, ztmp2_device ) +!$acc data present( ztmp1_device, ztmp2_device ) IKB = 1+JPVEXT_TURB IKE = SIZE(PD,3)-JPVEXT_TURB @@ -1837,6 +2033,10 @@ PM3_TH2_WTHR(:,:,IKE+1)=PM3_TH2_WTHR(:,:,IKE) !$acc end data !$acc end data +#ifdef MNH_OPENACC +!Release all memory allocated with MNH_MEM_GET calls since last call to MNH_MEM_POSITION_PIN +CALL MNH_MEM_RELEASE() +#endif #ifndef MNH_OPENACC END FUNCTION M3_TH2_WTHR @@ -1863,8 +2063,10 @@ SUBROUTINE D_M3_TH2_WTHR_O_DDTDZ(PREDTH1,PREDR1,PD,PLEPS,PSQRT_TKE,PBLL_O_E,PEMO REAL, DIMENSION(SIZE(PD,1),SIZE(PD,2),SIZE(PD,3)),INTENT(OUT) :: PD_M3_TH2_WTHR_O_DDTDZ #endif INTEGER :: IKB, IKE -#ifdef MNH_OPENACC +#ifndef MNH_OPENACC REAL, DIMENSION(:,:,:), allocatable :: ZTMP1_DEVICE, ZTMP2_DEVICE +#else +REAL, DIMENSION(:,:,:), pointer,contiguous :: ZTMP1_DEVICE, ZTMP2_DEVICE #endif !$acc data present( PREDTH1, PREDR1, PD, PLEPS, PSQRT_TKE, PBLL_O_E, PEMOIST, PDTDZ, PD_M3_TH2_WTHR_O_DDTDZ ) @@ -1873,12 +2075,19 @@ SUBROUTINE D_M3_TH2_WTHR_O_DDTDZ(PREDTH1,PREDR1,PD,PLEPS,PSQRT_TKE,PBLL_O_E,PEMO call Print_msg( NVERB_WARNING, 'GEN', 'D_M3_TH2_WTHR_O_DDTDZ', 'OpenACC: not yet tested' ) #endif -#ifdef MNH_OPENACC +#ifndef MNH_OPENACC allocate( ztmp1_device(size( pd, 1 ), size( pd, 2 ), size( pd, 3 ) ) ) allocate( ztmp2_device(size( pd, 1 ), size( pd, 2 ), size( pd, 3 ) ) ) +#else +!Pin positions in the pools of MNH memory +CALL MNH_MEM_POSITION_PIN() + +CALL MNH_MEM_GET( ztmp1_device,size( pd, 1 ), size( pd, 2 ), size( pd, 3 ) ) +CALL MNH_MEM_GET( ztmp2_device,size( pd, 1 ), size( pd, 2 ), size( pd, 3 ) ) + #endif -!$acc data create( ztmp1_device, ztmp2_device ) +!$acc data present( ztmp1_device, ztmp2_device ) IKB = 1+JPVEXT_TURB IKE = SIZE(PD,3)-JPVEXT_TURB @@ -1903,6 +2112,10 @@ PD_M3_TH2_WTHR_O_DDTDZ(:,:,IKE+1)=PD_M3_TH2_WTHR_O_DDTDZ(:,:,IKE) !$acc end data !$acc end data +#ifdef MNH_OPENACC +!Release all memory allocated with MNH_MEM_GET calls since last call to MNH_MEM_POSITION_PIN +CALL MNH_MEM_RELEASE() +#endif #ifndef MNH_OPENACC END FUNCTION D_M3_TH2_WTHR_O_DDTDZ @@ -1926,8 +2139,10 @@ SUBROUTINE M3_THR_WTHR(PREDTH1,PREDR1,PD,PLEPS,PSQRT_TKE,PM3_THR_WTHR) REAL, DIMENSION(SIZE(PD,1),SIZE(PD,2),SIZE(PD,3)),INTENT(OUT) :: PM3_THR_WTHR #endif INTEGER :: IKB, IKE -#ifdef MNH_OPENACC +#ifndef MNH_OPENACC REAL, DIMENSION(:,:,:), allocatable :: ZTMP1_DEVICE, ZTMP2_DEVICE +#else +REAL, DIMENSION(:,:,:), pointer,contiguous :: ZTMP1_DEVICE, ZTMP2_DEVICE #endif !$acc data present( PREDTH1, PREDR1, PD, PLEPS, PSQRT_TKE, PM3_THR_WTHR ) @@ -1936,12 +2151,19 @@ SUBROUTINE M3_THR_WTHR(PREDTH1,PREDR1,PD,PLEPS,PSQRT_TKE,PM3_THR_WTHR) call Print_msg( NVERB_WARNING, 'GEN', 'M3_THR_WTHR', 'OpenACC: not yet tested' ) #endif -#ifdef MNH_OPENACC +#ifndef MNH_OPENACC allocate( ztmp1_device(size( pd, 1 ), size( pd, 2 ), size( pd, 3 ) ) ) allocate( ztmp2_device(size( pd, 1 ), size( pd, 2 ), size( pd, 3 ) ) ) +#else +!Pin positions in the pools of MNH memory +CALL MNH_MEM_POSITION_PIN() + +CALL MNH_MEM_GET( ztmp1_device,size( pd, 1 ), size( pd, 2 ), size( pd, 3 ) ) +CALL MNH_MEM_GET( ztmp2_device,size( pd, 1 ), size( pd, 2 ), size( pd, 3 ) ) + #endif -!$acc data create( ztmp1_device, ztmp2_device ) +!$acc data present( ztmp1_device, ztmp2_device ) IKB = 1+JPVEXT_TURB IKE = SIZE(PD,3)-JPVEXT_TURB @@ -1965,6 +2187,10 @@ PM3_THR_WTHR(:,:,IKE+1)=PM3_THR_WTHR(:,:,IKE) !$acc end data !$acc end data +#ifdef MNH_OPENACC +!Release all memory allocated with MNH_MEM_GET calls since last call to MNH_MEM_POSITION_PIN +CALL MNH_MEM_RELEASE() +#endif #ifndef MNH_OPENACC END FUNCTION M3_THR_WTHR @@ -1990,8 +2216,10 @@ SUBROUTINE D_M3_THR_WTHR_O_DDTDZ(PREDTH1,PREDR1,PD,PLEPS,PSQRT_TKE,PBLL_O_E,PETH REAL, DIMENSION(SIZE(PD,1),SIZE(PD,2),SIZE(PD,3)),INTENT(OUT) :: PD_M3_THR_WTHR_O_DDTDZ #endif INTEGER :: IKB, IKE -#ifdef MNH_OPENACC +#ifndef MNH_OPENACC REAL, DIMENSION(:,:,:), allocatable :: ZTMP1_DEVICE, ZTMP2_DEVICE +#else +REAL, DIMENSION(:,:,:), pointer,contiguous :: ZTMP1_DEVICE, ZTMP2_DEVICE #endif !$acc data present( PREDTH1, PREDR1, PD, PLEPS, PSQRT_TKE, PBLL_O_E, PETHETA, PD_M3_THR_WTHR_O_DDTDZ ) @@ -2000,12 +2228,19 @@ SUBROUTINE D_M3_THR_WTHR_O_DDTDZ(PREDTH1,PREDR1,PD,PLEPS,PSQRT_TKE,PBLL_O_E,PETH call Print_msg( NVERB_WARNING, 'GEN', 'D_M3_THR_WTHR_O_DDTDZ', 'OpenACC: not yet tested' ) #endif -#ifdef MNH_OPENACC +#ifndef MNH_OPENACC allocate( ztmp1_device(size( pd, 1 ), size( pd, 2 ), size( pd, 3 ) ) ) allocate( ztmp2_device(size( pd, 1 ), size( pd, 2 ), size( pd, 3 ) ) ) +#else +!Pin positions in the pools of MNH memory +CALL MNH_MEM_POSITION_PIN() + +CALL MNH_MEM_GET( ztmp1_device,size( pd, 1 ), size( pd, 2 ), size( pd, 3 ) ) +CALL MNH_MEM_GET( ztmp2_device,size( pd, 1 ), size( pd, 2 ), size( pd, 3 ) ) + #endif -!$acc data create( ztmp1_device, ztmp2_device ) +!$acc data present( ztmp1_device, ztmp2_device ) IKB = 1+JPVEXT_TURB IKE = SIZE(PD,3)-JPVEXT_TURB @@ -2029,6 +2264,10 @@ PD_M3_THR_WTHR_O_DDTDZ(:,:,IKE+1)=PD_M3_THR_WTHR_O_DDTDZ(:,:,IKE) !$acc end data !$acc end data +#ifdef MNH_OPENACC +!Release all memory allocated with MNH_MEM_GET calls since last call to MNH_MEM_POSITION_PIN +CALL MNH_MEM_RELEASE() +#endif #ifndef MNH_OPENACC END FUNCTION D_M3_THR_WTHR_O_DDTDZ @@ -2054,8 +2293,10 @@ SUBROUTINE M3_THR_WTH2(PREDR1,PD,PLEPS,PSQRT_TKE,PBLL_O_E,PETHETA,PDRDZ,PM3_THR_ REAL, DIMENSION(SIZE(PD,1),SIZE(PD,2),SIZE(PD,3)),INTENT(OUT) :: PM3_THR_WTH2 #endif INTEGER :: IKB, IKE -#ifdef MNH_OPENACC +#ifndef MNH_OPENACC REAL, DIMENSION(:,:,:), allocatable :: ZTMP1_DEVICE, ZTMP2_DEVICE +#else +REAL, DIMENSION(:,:,:), pointer,contiguous :: ZTMP1_DEVICE, ZTMP2_DEVICE #endif !$acc data present( PREDR1, PD, PLEPS, PSQRT_TKE, PBLL_O_E, PETHETA, PDRDZ, PM3_THR_WTH2 ) @@ -2064,12 +2305,19 @@ SUBROUTINE M3_THR_WTH2(PREDR1,PD,PLEPS,PSQRT_TKE,PBLL_O_E,PETHETA,PDRDZ,PM3_THR_ call Print_msg( NVERB_WARNING, 'GEN', 'M3_THR_WTH2', 'OpenACC: not yet tested' ) #endif -#ifdef MNH_OPENACC +#ifndef MNH_OPENACC allocate( ztmp1_device(size( pd, 1 ), size( pd, 2 ), size( pd, 3 ) ) ) allocate( ztmp2_device(size( pd, 1 ), size( pd, 2 ), size( pd, 3 ) ) ) +#else +!Pin positions in the pools of MNH memory +CALL MNH_MEM_POSITION_PIN() + +CALL MNH_MEM_GET( ztmp1_device,size( pd, 1 ), size( pd, 2 ), size( pd, 3 ) ) +CALL MNH_MEM_GET( ztmp2_device,size( pd, 1 ), size( pd, 2 ), size( pd, 3 ) ) + #endif -!$acc data create( ztmp1_device, ztmp2_device ) +!$acc data present( ztmp1_device, ztmp2_device ) IKB = 1+JPVEXT_TURB IKE = SIZE(PD,3)-JPVEXT_TURB @@ -2093,6 +2341,10 @@ PM3_THR_WTH2(:,:,IKE+1)=PM3_THR_WTH2(:,:,IKE) !$acc end data !$acc end data +#ifdef MNH_OPENACC +!Release all memory allocated with MNH_MEM_GET calls since last call to MNH_MEM_POSITION_PIN +CALL MNH_MEM_RELEASE() +#endif #ifndef MNH_OPENACC END FUNCTION M3_THR_WTH2 @@ -2119,8 +2371,10 @@ SUBROUTINE D_M3_THR_WTH2_O_DDTDZ(PREDTH1,PREDR1,PD,PLEPS,PSQRT_TKE,PBLL_O_E,PETH REAL, DIMENSION(SIZE(PD,1),SIZE(PD,2),SIZE(PD,3)),INTENT(OUT) :: PD_M3_THR_WTH2_O_DDTDZ #endif INTEGER :: IKB, IKE -#ifdef MNH_OPENACC +#ifndef MNH_OPENACC REAL, DIMENSION(:,:,:), allocatable :: ZTMP1_DEVICE, ZTMP2_DEVICE +#else +REAL, DIMENSION(:,:,:), pointer,contiguous :: ZTMP1_DEVICE, ZTMP2_DEVICE #endif !$acc data present( PREDTH1, PREDR1, PD, PLEPS, PSQRT_TKE, PBLL_O_E, PETHETA, PDRDZ, PD_M3_THR_WTH2_O_DDTDZ ) @@ -2129,12 +2383,19 @@ SUBROUTINE D_M3_THR_WTH2_O_DDTDZ(PREDTH1,PREDR1,PD,PLEPS,PSQRT_TKE,PBLL_O_E,PETH call Print_msg( NVERB_WARNING, 'GEN', 'D_M3_THR_WTH2_O_DDTDZ', 'OpenACC: not yet tested' ) #endif -#ifdef MNH_OPENACC +#ifndef MNH_OPENACC allocate( ztmp1_device(size( pd, 1 ), size( pd, 2 ), size( pd, 3 ) ) ) allocate( ztmp2_device(size( pd, 1 ), size( pd, 2 ), size( pd, 3 ) ) ) +#else +!Pin positions in the pools of MNH memory +CALL MNH_MEM_POSITION_PIN() + +CALL MNH_MEM_GET( ztmp1_device,size( pd, 1 ), size( pd, 2 ), size( pd, 3 ) ) +CALL MNH_MEM_GET( ztmp2_device,size( pd, 1 ), size( pd, 2 ), size( pd, 3 ) ) + #endif -!$acc data create( ztmp1_device, ztmp2_device ) +!$acc data present( ztmp1_device, ztmp2_device ) IKB = 1+JPVEXT_TURB IKE = SIZE(PD,3)-JPVEXT_TURB @@ -2171,6 +2432,10 @@ PD_M3_THR_WTH2_O_DDTDZ(:,:,IKE+1)=PD_M3_THR_WTH2_O_DDTDZ(:,:,IKE) !$acc end data !$acc end data +#ifdef MNH_OPENACC +!Release all memory allocated with MNH_MEM_GET calls since last call to MNH_MEM_POSITION_PIN +CALL MNH_MEM_RELEASE() +#endif #ifndef MNH_OPENACC END FUNCTION D_M3_THR_WTH2_O_DDTDZ @@ -2196,8 +2461,10 @@ SUBROUTINE D_M3_THR_WTH2_O_DDRDZ(PREDTH1,PREDR1,PD,PLEPS,PSQRT_TKE,PBLL_O_E,PETH REAL, DIMENSION(SIZE(PD,1),SIZE(PD,2),SIZE(PD,3)),INTENT(OUT) :: PD_M3_THR_WTH2_O_DDRDZ #endif INTEGER :: IKB, IKE -#ifdef MNH_OPENACC +#ifndef MNH_OPENACC REAL, DIMENSION(:,:,:), allocatable :: ZTMP1_DEVICE, ZTMP2_DEVICE +#else +REAL, DIMENSION(:,:,:), pointer,contiguous :: ZTMP1_DEVICE, ZTMP2_DEVICE #endif !$acc data present( PREDTH1, PREDR1, PD, PLEPS, PSQRT_TKE, PBLL_O_E, PETHETA, PD_M3_THR_WTH2_O_DDRDZ ) @@ -2206,12 +2473,19 @@ SUBROUTINE D_M3_THR_WTH2_O_DDRDZ(PREDTH1,PREDR1,PD,PLEPS,PSQRT_TKE,PBLL_O_E,PETH call Print_msg( NVERB_WARNING, 'GEN', 'D_M3_THR_WTH2_O_DDRDZ', 'OpenACC: not yet tested' ) #endif -#ifdef MNH_OPENACC +#ifndef MNH_OPENACC allocate( ztmp1_device(size( pd, 1 ), size( pd, 2 ), size( pd, 3 ) ) ) allocate( ztmp2_device(size( pd, 1 ), size( pd, 2 ), size( pd, 3 ) ) ) +#else +!Pin positions in the pools of MNH memory +CALL MNH_MEM_POSITION_PIN() + +CALL MNH_MEM_GET( ztmp1_device,size( pd, 1 ), size( pd, 2 ), size( pd, 3 ) ) +CALL MNH_MEM_GET( ztmp2_device,size( pd, 1 ), size( pd, 2 ), size( pd, 3 ) ) + #endif -!$acc data create( ztmp1_device, ztmp2_device ) +!$acc data present( ztmp1_device, ztmp2_device ) IKB = 1+JPVEXT_TURB IKE = SIZE(PD,3)-JPVEXT_TURB @@ -2238,6 +2512,10 @@ PD_M3_THR_WTH2_O_DDRDZ(:,:,IKE+1)=PD_M3_THR_WTH2_O_DDRDZ(:,:,IKE) !$acc end data !$acc end data +#ifdef MNH_OPENACC +!Release all memory allocated with MNH_MEM_GET calls since last call to MNH_MEM_POSITION_PIN +CALL MNH_MEM_RELEASE() +#endif #ifndef MNH_OPENACC END FUNCTION D_M3_THR_WTH2_O_DDRDZ @@ -2262,8 +2540,10 @@ SUBROUTINE M3_THR_W2TH(PREDR1,PD,PLM,PLEPS,PTKE,PDRDZ,PM3_THR_W2TH) REAL, DIMENSION(SIZE(PD,1),SIZE(PD,2),SIZE(PD,3)),INTENT(OUT) :: PM3_THR_W2TH #endif INTEGER :: IKB, IKE -#ifdef MNH_OPENACC +#ifndef MNH_OPENACC REAL, DIMENSION(:,:,:), allocatable :: ZTMP1_DEVICE, ZTMP2_DEVICE +#else +REAL, DIMENSION(:,:,:), pointer,contiguous :: ZTMP1_DEVICE, ZTMP2_DEVICE #endif !$acc data present( PREDR1, PD, PLM, PLEPS, PTKE, PDRDZ, PM3_THR_W2TH ) @@ -2272,12 +2552,19 @@ SUBROUTINE M3_THR_W2TH(PREDR1,PD,PLM,PLEPS,PTKE,PDRDZ,PM3_THR_W2TH) call Print_msg( NVERB_WARNING, 'GEN', 'M3_THR_W2TH', 'OpenACC: not yet tested' ) #endif -#ifdef MNH_OPENACC +#ifndef MNH_OPENACC allocate( ztmp1_device(size( pd, 1 ), size( pd, 2 ), size( pd, 3 ) ) ) allocate( ztmp2_device(size( pd, 1 ), size( pd, 2 ), size( pd, 3 ) ) ) +#else +!Pin positions in the pools of MNH memory +CALL MNH_MEM_POSITION_PIN() + +CALL MNH_MEM_GET( ztmp1_device,size( pd, 1 ), size( pd, 2 ), size( pd, 3 ) ) +CALL MNH_MEM_GET( ztmp2_device,size( pd, 1 ), size( pd, 2 ), size( pd, 3 ) ) + #endif -!$acc data create( ztmp1_device, ztmp2_device ) +!$acc data present( ztmp1_device, ztmp2_device ) IKB = 1+JPVEXT_TURB IKE = SIZE(PD,3)-JPVEXT_TURB @@ -2301,6 +2588,10 @@ PM3_THR_W2TH(:,:,IKE+1)=PM3_THR_W2TH(:,:,IKE) !$acc end data !$acc end data +#ifdef MNH_OPENACC +!Release all memory allocated with MNH_MEM_GET calls since last call to MNH_MEM_POSITION_PIN +CALL MNH_MEM_RELEASE() +#endif #ifndef MNH_OPENACC END FUNCTION M3_THR_W2TH @@ -2328,8 +2619,10 @@ SUBROUTINE D_M3_THR_W2TH_O_DDTDZ(PREDTH1,PREDR1,PD,PLM,PLEPS,PTKE,PBLL_O_E,PDRDZ REAL, DIMENSION(SIZE(PD,1),SIZE(PD,2),SIZE(PD,3)),INTENT(OUT) :: PD_M3_THR_W2TH_O_DDTDZ #endif INTEGER :: IKB, IKE -#ifdef MNH_OPENACC +#ifndef MNH_OPENACC REAL, DIMENSION(:,:,:), allocatable :: ZTMP1_DEVICE, ZTMP2_DEVICE +#else +REAL, DIMENSION(:,:,:), pointer,contiguous :: ZTMP1_DEVICE, ZTMP2_DEVICE #endif !$acc data present( PREDTH1, PREDR1, PD, PLM, PLEPS, PTKE, PBLL_O_E, PDRDZ, PETHETA, PD_M3_THR_W2TH_O_DDTDZ ) @@ -2338,12 +2631,19 @@ SUBROUTINE D_M3_THR_W2TH_O_DDTDZ(PREDTH1,PREDR1,PD,PLM,PLEPS,PTKE,PBLL_O_E,PDRDZ call Print_msg( NVERB_WARNING, 'GEN', 'D_M3_THR_W2TH_O_DDTDZ', 'OpenACC: not yet tested' ) #endif -#ifdef MNH_OPENACC +#ifndef MNH_OPENACC allocate( ztmp1_device(size( pd, 1 ), size( pd, 2 ), size( pd, 3 ) ) ) allocate( ztmp2_device(size( pd, 1 ), size( pd, 2 ), size( pd, 3 ) ) ) +#else +!Pin positions in the pools of MNH memory +CALL MNH_MEM_POSITION_PIN() + +CALL MNH_MEM_GET( ztmp1_device,size( pd, 1 ), size( pd, 2 ), size( pd, 3 ) ) +CALL MNH_MEM_GET( ztmp2_device,size( pd, 1 ), size( pd, 2 ), size( pd, 3 ) ) + #endif -!$acc data create( ztmp1_device, ztmp2_device ) +!$acc data present( ztmp1_device, ztmp2_device ) IKB = 1+JPVEXT_TURB IKE = SIZE(PD,3)-JPVEXT_TURB @@ -2404,8 +2704,10 @@ SUBROUTINE D_M3_THR_W2TH_O_DDRDZ(PREDTH1,PREDR1,PD,PLM,PLEPS,PTKE,PD_M3_THR_W2TH REAL, DIMENSION(SIZE(PD,1),SIZE(PD,2),SIZE(PD,3)),INTENT(OUT) :: PD_M3_THR_W2TH_O_DDRDZ #endif INTEGER :: IKB, IKE -#ifdef MNH_OPENACC +#ifndef MNH_OPENACC REAL, DIMENSION(:,:,:), allocatable :: ZTMP1_DEVICE, ZTMP2_DEVICE +#else +REAL, DIMENSION(:,:,:), pointer,contiguous :: ZTMP1_DEVICE, ZTMP2_DEVICE #endif !$acc data present( PREDTH1, PREDR1, PD, PLM, PLEPS, PTKE, PD_M3_THR_W2TH_O_DDRDZ ) @@ -2414,12 +2716,19 @@ SUBROUTINE D_M3_THR_W2TH_O_DDRDZ(PREDTH1,PREDR1,PD,PLM,PLEPS,PTKE,PD_M3_THR_W2TH call Print_msg( NVERB_WARNING, 'GEN', 'D_M3_THR_W2TH_O_DDRDZ', 'OpenACC: not yet tested' ) #endif -#ifdef MNH_OPENACC +#ifndef MNH_OPENACC allocate( ztmp1_device(size( pd, 1 ), size( pd, 2 ), size( pd, 3 ) ) ) allocate( ztmp2_device(size( pd, 1 ), size( pd, 2 ), size( pd, 3 ) ) ) +#else +!Pin positions in the pools of MNH memory +CALL MNH_MEM_POSITION_PIN() + +CALL MNH_MEM_GET( ztmp1_device,size( pd, 1 ), size( pd, 2 ), size( pd, 3 ) ) +CALL MNH_MEM_GET( ztmp2_device,size( pd, 1 ), size( pd, 2 ), size( pd, 3 ) ) + #endif -!$acc data create( ztmp1_device, ztmp2_device ) +!$acc data present( ztmp1_device, ztmp2_device ) IKB = 1+JPVEXT_TURB IKE = SIZE(PD,3)-JPVEXT_TURB @@ -2454,6 +2763,10 @@ PD_M3_THR_W2TH_O_DDRDZ(:,:,IKE+1)=PD_M3_THR_W2TH_O_DDRDZ(:,:,IKE) !$acc end data !$acc end data +#ifdef MNH_OPENACC +!Release all memory allocated with MNH_MEM_GET calls since last call to MNH_MEM_POSITION_PIN +CALL MNH_MEM_RELEASE() +#endif #ifndef MNH_OPENACC END FUNCTION D_M3_THR_W2TH_O_DDRDZ diff --git a/src/MNH/modeln.f90 b/src/MNH/modeln.f90 index effbaa577f42feb9b90f084fc635cbecf7767479..66d645709688f4df45f991d1d22453cf2cc6fd22 100644 --- a/src/MNH/modeln.f90 +++ b/src/MNH/modeln.f90 @@ -1676,7 +1676,7 @@ CALL GRAVITY_IMPL ( CLBCX, CLBCY, NRR, NRRL, NRRI,XTSTEP, & ! compensated by the pressure gradient ! IF (KTCOUNT == 1 .AND. CCONF=='START') THEN -!$acc kernels present( ZRWS ) +!$acc kernels present( ZRWS,XRWS_PRES ) XRWS_PRES(:,:,:) = ZRWS(:,:,:) - XRWS(:,:,:) !$acc end kernels END IF diff --git a/src/MNH/p_abs.f90 b/src/MNH/p_abs.f90 index 91f4f669fdf3ef21554e345d17d0e7dd0d215b3f..d615c4ccd96c426d1a0e85b126c50e1db0da2146 100644 --- a/src/MNH/p_abs.f90 +++ b/src/MNH/p_abs.f90 @@ -123,7 +123,7 @@ USE MODE_MNH_ZWORK, ONLY: MNH_MEM_GET, MNH_MEM_POSITION_PIN, MNH_MEM_RELEASE #endif USE MODE_REPRO_SUM ! -#ifdef MNH_BITREP +#if defined(MNH_BITREP) || defined(MNH_BITREP_OMP) USE MODI_BITREP #endif ! @@ -294,7 +294,7 @@ IF ( CEQNSYS=='DUR' .OR. CEQNSYS=='MAE' ) THEN !$acc_nv loop independent collapse(2) DO CONCURRENT (JI = IIB:IIE , JJ = IJB:IJE ) ZMASSGUESS_2D(JI,JJ) = ZMASSGUESS_2D(JI,JJ) + & -#ifndef MNH_BITREP +#if !defined(MNH_BITREP) && !defined(MNH_BITREP_OMP) (PEXNREF(JI,JJ,JK)+PPHIT(JI,JJ,JK))**ZCVD_O_RD & #else BR_POW((PEXNREF(JI,JJ,JK)+PPHIT(JI,JJ,JK)),ZCVD_O_RD) & @@ -312,7 +312,7 @@ IF ( CEQNSYS=='DUR' .OR. CEQNSYS=='MAE' ) THEN DO JJ = IJB,IJE DO JI = IIB,IIE ZMASSGUESS_2D(JI,JJ) = ZMASSGUESS_2D(JI,JJ) + & -#ifndef MNH_BITREP +#if !defined(MNH_BITREP) && !defined(MNH_BITREP_OMP) (PEXNREF(JI,JJ,JK)+PPHIT(JI,JJ,JK))**ZCVD_O_RD & #else BR_POW((PEXNREF(JI,JJ,JK)+PPHIT(JI,JJ,JK)),ZCVD_O_RD) & @@ -348,12 +348,12 @@ IF ( CEQNSYS=='DUR' .OR. CEQNSYS=='MAE' ) THEN !$acc end kernels IF ( CEQNSYS == 'DUR' ) THEN !$acc kernels - !$acc loop seq + !$acc loop seq DO JK = IKB,IKE !$acc_nv loop independent collapse(2) DO CONCURRENT (JI = IIB:IIE , JJ = IJB:IJE ) ZMASSGUESS_2D(JI,JJ) = ZMASSGUESS_2D(JI,JJ) + & -#ifndef MNH_BITREP +#if !defined(MNH_BITREP) && !defined(MNH_BITREP_OMP) (PEXNREF(JI,JJ,JK)+PPHIT(JI,JJ,JK))**ZCVD_O_RD & #else BR_POW((PEXNREF(JI,JJ,JK)+PPHIT(JI,JJ,JK)),ZCVD_O_RD) & @@ -367,7 +367,7 @@ IF ( CEQNSYS=='DUR' .OR. CEQNSYS=='MAE' ) THEN DO JJ = IJB,IJE DO JI = IIB,IIE ZMASSGUESS_2D(JI,JJ) = ZMASSGUESS_2D(JI,JJ) + & -#ifndef MNH_BITREP +#if !defined(MNH_BITREP) && !defined(MNH_BITREP_OMP) (PEXNREF(JI,JJ,JK)+PPHIT(JI,JJ,JK))**ZCVD_O_RD & #else BR_POW((PEXNREF(JI,JJ,JK)+PPHIT(JI,JJ,JK)),ZCVD_O_RD) & @@ -403,7 +403,7 @@ ELSEIF( CEQNSYS == 'LHE' ) THEN IF (LBOUSS) THEN ZRHOREF(:,:,:) = PRHODREF(:,:,:) ELSE -#ifndef MNH_BITREP +#if !defined(MNH_BITREP) && !defined(MNH_BITREP_OMP) ZRHOREF(:,:,:) = PEXNREF(:,:,:) ** ZCVD_O_RD & #else ZRHOREF(:,:,:) = BR_POW( PEXNREF(:,:,:), ZCVD_O_RD )& diff --git a/src/MNH/prandtl.f90 b/src/MNH/prandtl.f90 index 9c1fe36274d6f7d0d55ca9150aa7c061bf511dc4..f2cc9f810f3a36a4769bae4ad7b07f094e0a44bb 100644 --- a/src/MNH/prandtl.f90 +++ b/src/MNH/prandtl.f90 @@ -209,7 +209,7 @@ use mode_msg #if defined(MNH_BITREP) || defined(MNH_BITREP_OMP) USE MODI_BITREP #endif -#ifdef MNH_BITREP_OMP +#ifdef MNH_COMPILER_CCE !$mnh_undef(LOOP) !$mnh_undef(OPENACC) #endif @@ -759,8 +759,8 @@ ELSE ! 3D case in a 3D model + BR_P2( XCTV*PBLL_O_E(:,:,:)*PETHETA(:,:,:) ) & * ZTMP2_DEVICE(:,:,:) #endif - PRED2TH3(:,:,IKB)=PRED2TH3(:,:,IKB+KKL) !$mnh_end_expand_array() + PRED2TH3(:,:,IKB)=PRED2TH3(:,:,IKB+KKL) !$acc end kernels #endif ! @@ -985,7 +985,7 @@ ELSE IF (L2D) THEN ! 3D case in a 2D model !$acc kernels !$mnh_expand_array(JI=1:JIU,JJ=1:JJU,JK=1:JKU) #if !defined(MNH_BITREP) && !defined(MNH_BITREP_OMP) - ZTMP1_DEVICE(:,:,:) = (XG / PTHVREF * PLM * PLEPS / PTKEM)**2 + ZTMP1_DEVICE(:,:,:) = (XG / PTHVREF(:,:,:) * PLM(:,:,:) * PLEPS(:,:,:) / PTKEM(:,:,:))**2 #else ZTMP1_DEVICE(:,:,:) = BR_P2(XG / PTHVREF(:,:,:) * PLM(:,:,:) * PLEPS(:,:,:) / PTKEM(:,:,:)) #endif @@ -1066,7 +1066,7 @@ ELSE ! 3D case in a 3D model !$acc kernels !$mnh_expand_array(JI=1:JIU,JJ=1:JJU,JK=1:JKU) #if !defined(MNH_BITREP) && !defined(MNH_BITREP_OMP) - ZTMP1_DEVICE(:,:,:) = (XG / PTHVREF * PLM * PLEPS / PTKEM)**2 + ZTMP1_DEVICE(:,:,:) = (XG / PTHVREF(:,:,:) * PLM(:,:,:) * PLEPS(:,:,:) / PTKEM(:,:,:))**2 #else ZTMP1_DEVICE(:,:,:) = BR_P2(XG / PTHVREF(:,:,:) * PLM(:,:,:) * PLEPS(:,:,:) / PTKEM(:,:,:)) #endif diff --git a/src/MNH/pressurez.f90 b/src/MNH/pressurez.f90 index a3716704e5a9d3d671542a4141c87d5c4ce6cf98..5d6a7a860bed9a8c9e8e869b07f170a708c9cdeb 100644 --- a/src/MNH/pressurez.f90 +++ b/src/MNH/pressurez.f90 @@ -250,7 +250,7 @@ USE MODE_MPPDB USE MODE_MSG USE MODE_SUM2_ll, ONLY: GMAXLOC_ll ! -#ifdef MNH_BITREP +#if defined(MNH_BITREP) || defined(MNH_BITREP_OMP) USE MODI_BITREP #endif USE MODI_CONJGRAD @@ -652,7 +652,7 @@ IF(CEQNSYS=='MAE' .OR. CEQNSYS=='DUR') THEN ENDIF ! #ifndef MNH_OPENACC -#ifndef MNH_BITREP +#if !defined(MNH_BITREP) && !defined(MNH_BITREP_OMP) ZPHIT(:,:,:)=(PPABST(:,:,:)/XP00)**(XRD/XCPD)-PEXNREF(:,:,:) #else ZPHIT(:,:,:)=BR_POW(PPABST(:,:,:)/XP00,XRD/XCPD)-PEXNREF(:,:,:) @@ -660,7 +660,7 @@ IF(CEQNSYS=='MAE' .OR. CEQNSYS=='DUR') THEN #else !$acc kernels DO CONCURRENT ( JI=1:IIU,JJ=1:IJU,JK=1:IKU ) -#ifndef MNH_BITREP +#if !defined(MNH_BITREP) && !defined(MNH_BITREP_OMP) ZPHIT(JI,JJ,JK)=(PPABST(JI,JJ,JK)/XP00)**(XRD/XCPD)-PEXNREF(JI,JJ,JK) #else ZPHIT(JI,JJ,JK)=BR_POW((PPABST(JI,JJ,JK)/XP00),(XRD/XCPD))-PEXNREF(JI,JJ,JK) @@ -1052,7 +1052,7 @@ IF ((ZMAX_ll > 1.E-12) .AND. KTCOUNT >0 ) THEN ! IF(CEQNSYS=='MAE' .OR. CEQNSYS=='DUR') THEN !$acc kernels -#ifndef MNH_BITREP +#if !defined(MNH_BITREP) && !defined(MNH_BITREP_OMP) PPABST(:,:,:)=XP00*(ZPHIT(:,:,:)+PEXNREF(:,:,:))**(XCPD/XRD) #else DO CONCURRENT(JI=1:IIU,JJ=1:IJU,JK=1:IKU) diff --git a/src/MNH/rain_ice.f90 b/src/MNH/rain_ice.f90 index ab373a19e69ec04a9aeaa6dd10b5bbb732390aee..2c9d9a8347ba274c57b44559115f7f6c7a365657 100644 --- a/src/MNH/rain_ice.f90 +++ b/src/MNH/rain_ice.f90 @@ -534,10 +534,12 @@ CALL RAIN_ICE_NUCLEATION(IIB, IIE, IJB, IJE, IKTB, IKTE,KRR,PTSTEP,& ! optimization by looking for locations where ! the microphysical fields are larger than a minimal value only !!! ! -!$acc kernels +!$acc kernels present_cr(GMICRO) GMICRO(:,:,:) = .FALSE. +!$acc end kernels IF ( KRR == 7 ) THEN +!$acc kernels present_cr(GMICRO) GMICRO(IIB:IIE,IJB:IJE,IKTB:IKTE) = & PRCT(IIB:IIE,IJB:IJE,IKTB:IKTE)>XRTMIN(2) .OR. & PRRT(IIB:IIE,IJB:IJE,IKTB:IKTE)>XRTMIN(3) .OR. & @@ -545,15 +547,17 @@ GMICRO(:,:,:) = .FALSE. PRST(IIB:IIE,IJB:IJE,IKTB:IKTE)>XRTMIN(5) .OR. & PRGT(IIB:IIE,IJB:IJE,IKTB:IKTE)>XRTMIN(6) .OR. & PRHT(IIB:IIE,IJB:IJE,IKTB:IKTE)>XRTMIN(7) +!$acc end kernels ELSE IF( KRR == 6 ) THEN +!$acc kernels present_cr(GMICRO) GMICRO(IIB:IIE,IJB:IJE,IKTB:IKTE) = & PRCT(IIB:IIE,IJB:IJE,IKTB:IKTE)>XRTMIN(2) .OR. & PRRT(IIB:IIE,IJB:IJE,IKTB:IKTE)>XRTMIN(3) .OR. & PRIT(IIB:IIE,IJB:IJE,IKTB:IKTE)>XRTMIN(4) .OR. & PRST(IIB:IIE,IJB:IJE,IKTB:IKTE)>XRTMIN(5) .OR. & PRGT(IIB:IIE,IJB:IJE,IKTB:IKTE)>XRTMIN(6) - END IF !$acc end kernels + END IF #ifndef MNH_OPENACC IMICRO = COUNTJV( GMICRO(:,:,:),I1(:),I2(:),I3(:)) @@ -996,7 +1000,7 @@ IF( IMICRO >= 0 ) THEN ENDIF !Diagnostic of precipitation fraction -!$acc kernels +!$acc kernels present_cr(PRAINFR,ZRS_ZERO,ZRG_ZERO) PRAINFR(:,:,:) = 0. #ifdef MNH_OPENACC ZRS_ZERO(:,:,:) = 0. @@ -1082,7 +1086,7 @@ IF( IMICRO >= 0 ) THEN ! IF( OWARM ) THEN ! Check if the formation of the raindrops by the slow ! warm processes is allowed -!$acc kernels +!$acc kernels present_cr(PEVAP3D) PEVAP3D(:,:,:)= 0. !$acc end kernels CALL RAIN_ICE_WARM(GMICRO, IMICRO, I1, I2, I3, & @@ -1120,7 +1124,7 @@ IF( IMICRO >= 0 ) THEN ! ---------------------------------------------- ! IF ( KRR == 7 ) THEN -!$acc kernels +!$acc kernels present_cr(ZLBDAH) ZLBDAH(:) = 0. !$acc end kernels CALL RAIN_ICE_FAST_RH(GMICRO, ZRHODREF, ZRVT, ZRCT, ZRIT, ZRST, ZRGT, ZRHT, ZRHODJ, ZPRES, & @@ -1249,7 +1253,7 @@ ELSE END IF !sedimentation of rain fraction -!$acc kernels +!$acc kernels present_cr(ZRR,ZRS,ZRG) ZRR(:,:,:) = PRRS(:,:,:) * PTSTEP ZRS(:,:,:) = PRSS(:,:,:) * PTSTEP ZRG(:,:,:) = PRGS(:,:,:) * PTSTEP diff --git a/src/MNH/rain_ice_fast_rg.f90 b/src/MNH/rain_ice_fast_rg.f90 index 9d3fd8ae8ae4d9477b3c4ffceb943eb7617c1433..14101b80037ee6d9d656237d2199ad2e71120d3c 100644 --- a/src/MNH/rain_ice_fast_rg.f90 +++ b/src/MNH/rain_ice_fast_rg.f90 @@ -57,9 +57,9 @@ use mode_tools, only: Countjv_device #if defined(MNH_BITREP) || defined(MNH_BITREP_OMP) USE MODI_BITREP #endif -#ifdef MNH_BITREP_OMP +#ifdef MNH_COMPILER_CCE !$mnh_undef(LOOP) -!$mnh_undef(OPENACC) +! mnh_undef(OPENACC) #endif IMPLICIT NONE @@ -199,10 +199,12 @@ CALL MNH_MEM_GET( ZZW1, SIZE(PRHODREF), 7 ) ! !* 6.1 rain contact freezing ! -!$acc kernels +!$acc kernels present_cr(ZZW1,GWORK) ZZW1(:,:) = 0.0 GWORK(:) = PRIT(:)>XRTMIN(4) .AND. PRRT(:)>XRTMIN(3) .AND. PRIS(:)>0.0 .AND. PRRS(:)>0.0 +!$acc end kernels #if !defined(MNH_BITREP) && !defined(MNH_BITREP_OMP) +!$acc parallel present_cr(ZZW1,GWORK) !$acc loop independent DO CONCURRENT ( JL=1:JLU ) IF ( GWORK(JL) ) THEN @@ -218,7 +220,9 @@ CALL MNH_MEM_GET( ZZW1, SIZE(PRHODREF), 7 ) PTHS(JL) = PTHS(JL) + ZZW1(JL,4)*(PLSFACT(JL)-PLVFACT(JL)) ! f(L_f*RRCFRIG) END IF END DO ! CONCURRENT +!$acc end parallel #else +!$acc parallel present_cr(ZZW1,GWORK) !$acc loop independent DO CONCURRENT ( JL=1:JLU ) IF ( GWORK(JL) ) THEN @@ -234,8 +238,9 @@ CALL MNH_MEM_GET( ZZW1, SIZE(PRHODREF), 7 ) PTHS(JL) = PTHS(JL) + ZZW1(JL,4)*(PLSFACT(JL)-PLVFACT(JL)) ! f(L_f*RRCFRIG) END IF END DO ! CONCURRENT +!$acc end parallel #endif -!$acc end kernels + IF (MPPDB_INITIALIZED) THEN CALL MPPDB_CHECK(PRRS,"RAIN_ICE_FAST_RG 6.1:PRRS") @@ -269,10 +274,12 @@ END IF ! !* 6.2 compute the Dry growth case ! -!$acc kernels +!$acc kernels present_cr(GWORK) ZZW1(:,:) = 0.0 - GWORK(:) = PRGT(:)>XRTMIN(6) .AND. PRCT(:)>XRTMIN(2) .AND. PRCS(:)>0.0 +!$acc end kernels +!$acc parallel present_cr(GWORK) !$mnh_expand_where(JL=1:JLU) + GWORK(:) = PRGT(:)>XRTMIN(6) .AND. PRCT(:)>XRTMIN(2) .AND. PRCS(:)>0.0 WHERE( GWORK(:) ) #if !defined(MNH_BITREP) && !defined(MNH_BITREP_OMP) ZZW(:) = PLBDAG(:)**(XCXG-XDG-2.0) * PRHODREF(:)**(-XCEXVT) @@ -282,8 +289,10 @@ END IF ZZW1(:,1) = MIN( PRCS(:),XFCDRYG * PRCT(:) * ZZW(:) ) ! RCDRYG END WHERE !$mnh_end_expand_where() - GWORK(:) = PRGT(:)>XRTMIN(6) .AND. PRIT(:)>XRTMIN(4) .AND. PRIS(:)>0.0 +!$acc end parallel +!$acc parallel present_cr(GWORK) !$mnh_expand_where(JL=1:JLU) + GWORK(:) = PRGT(:)>XRTMIN(6) .AND. PRIT(:)>XRTMIN(4) .AND. PRIS(:)>0.0 WHERE( GWORK(:) ) #if !defined(MNH_BITREP) && !defined(MNH_BITREP_OMP) ZZW(:) = PLBDAG(:)**(XCXG-XDG-2.0) * PRHODREF(:)**(-XCEXVT) @@ -296,14 +305,14 @@ END IF #endif END WHERE !$mnh_end_expand_where() -!$acc end kernels +!$acc end parallel IF (MPPDB_INITIALIZED) THEN CALL MPPDB_CHECK(ZZW1,"RAIN_ICE_FAST_RG 6.2:ZZW1") END IF ! !* 6.2.1 accretion of aggregates on the graupeln ! -!$acc kernels +!$acc kernels present_cr(GWORK) GWORK(:) = PRST(:)>XRTMIN(5) .AND. PRGT(:)>XRTMIN(6) .AND. PRSS(:)>0.0 !$acc end kernels #ifndef MNH_OPENACC @@ -341,7 +350,8 @@ END IF ! !* 6.2.3 select the (PLBDAG,PLBDAS) couplet ! -!$acc kernels +!$acc parallel present_cr(ZVECLBDAG,ZVECLBDAS) + !$mnh_expand_where(JL=1:IGDRY) ZVECLBDAG(1:IGDRY) = PLBDAG(I1(1:IGDRY)) ZVECLBDAS(1:IGDRY) = PLBDAS(I1(1:IGDRY)) ! @@ -349,7 +359,6 @@ END IF ! in the geometrical set of (Lbda_g,Lbda_s) couplet use to ! tabulate the SDRYG-kernel ! - !$mnh_expand_where(JL=1:IGDRY) ZVEC1(1:IGDRY) = MAX( 1.00001, MIN( REAL(NDRYLBDAG)-0.00001, & #if !defined(MNH_BITREP) && !defined(MNH_BITREP_OMP) XDRYINTP1G * LOG( ZVECLBDAG(1:IGDRY) ) + XDRYINTP2G ) ) @@ -368,6 +377,8 @@ END IF IVEC2(1:IGDRY) = INT( ZVEC2(1:IGDRY) ) ZVEC2(1:IGDRY) = ZVEC2(1:IGDRY) - REAL( IVEC2(1:IGDRY) ) !$mnh_end_expand_where() +!$acc end parallel +!$acc kernels ! present_cr(ZVECLBDAG,ZVECLBDAS) ! !* 6.2.5 perform the bilinear interpolation of the normalized ! SDRYG-kernel @@ -425,7 +436,7 @@ END IF ! !* 6.2.6 accretion of raindrops on the graupeln ! -!$acc kernels +!$acc kernels present_cr(GWORK) GWORK(:) = PRRT(:)>XRTMIN(3) .AND. PRGT(:)>XRTMIN(6) .AND. PRSS(:)>0.0 !$acc end kernels #ifndef MNH_OPENACC @@ -463,7 +474,8 @@ END IF ! !* 6.2.8 select the (PLBDAG,PLBDAR) couplet ! -!$acc kernels +!$acc parallel present_cr(ZVECLBDAG,ZVECLBDAR) + !$mnh_expand_where(JL=1:IGDRY) ZVECLBDAG(1:IGDRY) = PLBDAG(I1(1:IGDRY)) ZVECLBDAR(1:IGDRY) = PLBDAR(I1(1:IGDRY)) ! @@ -471,7 +483,6 @@ END IF ! in the geometrical set of (Lbda_g,Lbda_r) couplet use to ! tabulate the RDRYG-kernel ! - !$mnh_expand_where(JL=1:IGDRY) ZVEC1(1:IGDRY) = MAX( 1.00001, MIN( REAL(NDRYLBDAG)-0.00001, & #if !defined(MNH_BITREP) && !defined(MNH_BITREP_OMP) XDRYINTP1G * LOG( ZVECLBDAG(1:IGDRY) ) + XDRYINTP2G ) ) @@ -490,10 +501,12 @@ END IF IVEC2(1:IGDRY) = INT( ZVEC2(1:IGDRY) ) ZVEC2(1:IGDRY) = ZVEC2(1:IGDRY) - REAL( IVEC2(1:IGDRY) ) !$mnh_end_expand_where() +!$acc end parallel ! !* 6.2.10 perform the bilinear interpolation of the normalized ! RDRYG-kernel ! +!$acc kernels ! present_cr(ZVECLBDAG,ZVECLBDAR,ZZW1) !$acc loop independent DO CONCURRENT (JJ = 1:IGDRY ) ZVEC3(JJ) = ( XKER_RDRYG(IVEC1(JJ)+1,IVEC2(JJ)+1)* ZVEC2(JJ) & @@ -503,7 +516,6 @@ END IF - XKER_RDRYG(IVEC1(JJ) ,IVEC2(JJ) )*(ZVEC2(JJ) - 1.0) ) & * (ZVEC1(JJ) - 1.0) END DO ! CONCURRENT -! !$acc loop independent , private (JL) DO CONCURRENT (JJ=1:IGDRY) JL = I1(JJ) @@ -543,15 +555,17 @@ END IF #endif END IF ! -!$acc kernels +!$acc kernels present_cr(GWORK) PRDRYG(:) = ZZW1(:,1) + ZZW1(:,2) + ZZW1(:,3) + ZZW1(:,4) ! !* 6.3 compute the Wet growth case ! PRWETG(:) = 0.0 GWORK(:) = PRGT(:)>XRTMIN(6) +!$acc end kernels #if !defined(MNH_BITREP) && !defined(MNH_BITREP_OMP) -!$acc loop independent +!$acc parallel present_cr(GWORK) + !$acc loop independent DO CONCURRENT ( JL=1:JLU ) IF ( GWORK(JL) ) THEN ZZW1(JL,5) = MIN( PRIS(JL), & @@ -574,7 +588,9 @@ END IF ( PRHODREF(JL)*(XLMTT-XCL*(XTT-PZT(JL))) ) ) END IF END DO ! CONCURRENT + !$acc end parallel #else +!$acc parallel present_cr(GWORK) !$acc loop independent DO CONCURRENT ( JL=1:JLU ) IF ( GWORK(JL) ) THEN @@ -598,10 +614,12 @@ END IF ( PRHODREF(JL)*(XLMTT-XCL*(XTT-PZT(JL))) ) ) END IF END DO ! CONCURRENT + !$acc end parallel #endif ! !* 6.4 Select Wet or Dry case ! +!$acc kernels present_cr(GWORK) IF ( KRR == 7 ) THEN !$acc loop independent DO CONCURRENT (JL=1:JLU) @@ -634,7 +652,8 @@ DO CONCURRENT (JL=1:JLU) ENDIF ENDDO ELSE IF( KRR == 6 ) THEN - GWORK(:) = PRGT(:)>XRTMIN(6) .AND. PZT(:)<XTT .AND. PRDRYG(:)>=PRWETG(:) .AND. PRWETG(:)>0.0 ! Wet case + !$mnh_expand_where(JL=1:JLU) + GWORK(:) = PRGT(:)>XRTMIN(6) .AND. PZT(:)<XTT .AND. PRDRYG(:)>=PRWETG(:) .AND. PRWETG(:)>0.0 ! Wet case WHERE( GWORK(:) ) PRCS(:) = PRCS(:) - ZZW1(:,1) PRIS(:) = PRIS(:) - ZZW1(:,5) @@ -645,7 +664,8 @@ ELSE IF( KRR == 6 ) THEN PTHS(:) = PTHS(:) + (PRWETG(:)-ZZW1(:,5)-ZZW1(:,6))*(PLSFACT(:)-PLVFACT(:)) ! f(L_f*(RCWETG+RRWETG)) END WHERE - END IF + !$mnh_end_expand_where() +END IF !$acc end kernels IF (MPPDB_INITIALIZED) THEN @@ -682,8 +702,10 @@ END IF if ( lbudget_rg ) call Budget_store_init( tbudgets(NBUDGET_RG), 'DRYG', Unpack ( prgs(:) * prhodj(:), & mask = omicro(:,:,:), field = 0. ) ) -!$acc kernels +!$acc kernels present_cr(GWORK) +!$mnh_expand_where(JL=1:JLU) GWORK(:) = PRGT(:)>XRTMIN(6) .AND. PZT(:)<XTT .AND. PRDRYG(:)<PRWETG(:) .AND. PRDRYG(:)>0.0 ! Dry case + WHERE( GWORK(:) ) PRCS(:) = PRCS(:) - ZZW1(:,1) PRIS(:) = PRIS(:) - ZZW1(:,2) @@ -693,6 +715,7 @@ END IF PTHS(:) = PTHS(:) + (ZZW1(:,1)+ZZW1(:,4))*(PLSFACT(:)-PLVFACT(:)) ! ! f(L_f*(RCDRYG+RRDRYG)) END WHERE + !$mnh_end_expand_where() !$acc end kernels IF (MPPDB_INITIALIZED) THEN @@ -719,9 +742,11 @@ END IF ! !* 6.5 Melting of the graupeln ! -!$acc kernels +!$acc kernels present_cr(GWORK) GWORK(:) = PRGT(:)>XRTMIN(6) .AND. PRGS(:)>0.0 .AND. PZT(:)>XTT +!$acc end kernels #if !defined(MNH_BITREP) && !defined(MNH_BITREP_OMP) +!$acc parallel present_cr(GWORK) !$acc loop independent DO CONCURRENT ( JL=1:JLU ) IF ( GWORK(JL) ) THEN @@ -743,7 +768,9 @@ END IF PTHS(JL) = PTHS(JL) - ZZW(JL)*(PLSFACT(JL)-PLVFACT(JL)) ! f(L_f*(-RGMLTR)) END IF END DO ! CONCURRENT +!$acc end parallel #else +!$acc parallel present_cr(GWORK) !$acc loop independent DO CONCURRENT ( JL=1:JLU ) IF ( GWORK(JL) ) THEN @@ -765,8 +792,8 @@ END IF PTHS(JL) = PTHS(JL) - ZZW(JL)*(PLSFACT(JL)-PLVFACT(JL)) ! f(L_f*(-RGMLTR)) END IF END DO ! CONCURRENT +!$acc end parallel #endif -!$acc end kernels IF (MPPDB_INITIALIZED) THEN CALL MPPDB_CHECK(PRRS,"RAIN_ICE_FAST_RG 6.5:PRRS") diff --git a/src/MNH/rain_ice_fast_ri.f90 b/src/MNH/rain_ice_fast_ri.f90 index bf08fef051dfb9ca2910cc4f72a4d26b6c773037..65d9a1fcefe5a8e5680f47c2b2fdc1f29ebd8dc3 100644 --- a/src/MNH/rain_ice_fast_ri.f90 +++ b/src/MNH/rain_ice_fast_ri.f90 @@ -40,6 +40,10 @@ use mode_mppdb #if defined(MNH_BITREP) || defined(MNH_BITREP_OMP) USE MODI_BITREP #endif +#ifdef MNH_COMPILER_CCE +!$mnh_undef(LOOP) +!$mnh_undef(OPENACC) +#endif IMPLICIT NONE ! @@ -158,6 +162,7 @@ CALL MNH_MEM_GET( ZLBEXI, SIZE(PRHODREF) ) zzw(:) = 0. GWORK(:) = PRCS(:)>0.0 .AND. PSSI(:)>0.0 .AND. PRIT(:)>XRTMIN(4) .AND. PCIT(:)>0.0 #if !defined(MNH_BITREP) && !defined(MNH_BITREP_OMP) + !$mnh_expand_where(JL=1:JLU) WHERE( GWORK(:) ) ZZW(:) = MIN(1.E8,XLBI*( PRHODREF(:)*PRIT(:)/PCIT(:) )**XLBEXI) ! Lbda_i ZZW(:) = MIN( PRCS(:),( PSSI(:) / (PRHODREF(:)*PAI(:)) ) * PCIT(:) * & @@ -166,10 +171,11 @@ CALL MNH_MEM_GET( ZLBEXI, SIZE(PRHODREF) ) PRIS(:) = PRIS(:) + ZZW(:) PTHS(:) = PTHS(:) + ZZW(:)*(PLSFACT(:)-PLVFACT(:)) ! f(L_f*(RCBERI)) END WHERE + !$mnh_end_expand_where() #else !!$ Le DO concurrent n'est pas bit-reproductible BUG NVHPC 20.7 - DO CONCURRENT ( JL=1:JLU ) + !$mnh_do_concurrent( JL=1:JLU ) ZLBEXI(JL) = XLBEXI IF ( GWORK(JL) ) THEN ZZW(JL) = MIN(1.E8,XLBI*BR_POW( PRHODREF(JL)*PRIT(JL)/PCIT(JL), ZLBEXI(JL) ) ) ! Lbda_i @@ -179,7 +185,7 @@ CALL MNH_MEM_GET( ZLBEXI, SIZE(PRHODREF) ) PRIS(JL) = PRIS(JL) + ZZW(JL) PTHS(JL) = PTHS(JL) + ZZW(JL)*(PLSFACT(JL)-PLVFACT(JL)) ! f(L_f*(RCBERI)) END IF - END DO ! CONCURRENT + !$mnh_end_do() ! CONCURRENT !!! WHERE( GWORK(:) ) !!!! ZLBEXI(:) = XLBEXI diff --git a/src/MNH/rain_ice_fast_rs.f90 b/src/MNH/rain_ice_fast_rs.f90 index 88462be8600be46516244dc0e1d0020729bd1668..24c30713af7d7ba6280938b1fc7fbf69e0052061 100644 --- a/src/MNH/rain_ice_fast_rs.f90 +++ b/src/MNH/rain_ice_fast_rs.f90 @@ -177,7 +177,7 @@ JJU = size(PRHODREF) ! !* 5.1 cloud droplet riming of the aggregates ! -!$acc kernels +!$acc kernels present_cr(GWORK) GWORK(:) = PRCT(:)>XRTMIN(2) .AND. PRST(:)>XRTMIN(5) .AND. PRCS(:)>0.0 .AND. PZT(:)<XTT !$acc end kernels #ifndef MNH_OPENACC @@ -366,7 +366,7 @@ END IF ! !* 5.2 rain accretion onto the aggregates ! -!$acc kernels +!$acc kernels present_cr(GWORK) GWORK(:) = PRRT(:)>XRTMIN(3) .AND. PRST(:)>XRTMIN(5) .AND. PRRS(:)>0.0 .AND. PZT(:)<XTT !$acc end kernels #ifndef MNH_OPENACC @@ -586,9 +586,11 @@ END IF ! !* 5.3 Conversion-Melting of the aggregates ! -!$acc kernels +!$acc kernels present_cr(GWORK,zzw) zzw(:) = 0. GWORK(:) = PRST(:)>XRTMIN(5) .AND. PRSS(:)>0.0 .AND. PZT(:)>XTT +!$acc end kernels +!$acc parallel present_cr(GWORK,zzw) !$acc loop independent DO CONCURRENT (JJ=1:JJU) IF ( GWORK(JJ) ) THEN @@ -618,7 +620,7 @@ END IF PRGS(JJ) = PRGS(JJ) + ZZW(JJ) END IF END DO ! CONCURRENT -!$acc end kernels +!$acc end parallel if ( lbudget_rs ) call Budget_store_add( tbudgets(NBUDGET_RS), 'CMEL', & Unpack ( -zzw(:) * prhodj(:), mask = omicro(:,:,:), field = 0. ) ) diff --git a/src/MNH/rain_ice_nucleation.f90 b/src/MNH/rain_ice_nucleation.f90 index c44d51d60bdbf4eb4c34e1abbc66346917140739..dbac01ac7c272af6e32df903580505fd626d2c53 100644 --- a/src/MNH/rain_ice_nucleation.f90 +++ b/src/MNH/rain_ice_nucleation.f90 @@ -173,9 +173,11 @@ DO CONCURRENT ( JI=1:JIU,JJ=1:JJU,JK=1:JKU) END DO #endif ! +!$acc end kernels + ! optimization by looking for locations where ! the temperature is negative only !!! -! +!$acc kernels present_cr(GNEGT) GNEGT(:,:,:) = .FALSE. GNEGT(KIB:KIE,KJB:KJE,KKTB:KKTE) = PT(KIB:KIE,KJB:KJE,KKTB:KKTE)<XTT !$acc end kernels @@ -289,8 +291,19 @@ END DO DO CONCURRENT ( JL=1:INEGT ) ZZW(JL) = ZZW(JL) - ZCIT(JL) END DO - ZZWMAX = MAXVAL(ZZW(1:INEGT)) +#ifndef MNH_COMPILER_NVHPC + ZZWMAX = MAXVAL(ZZW(1:INEGT)) !$acc end kernels +#else +!$acc end kernels + ZZWMAX = 0.0 +!$acc parallel reduction(max:ZZWMAX) + !$mnh_do_concurrent( JL=1:INEGT) + ZZWMAX = MAX(ZZWMAX,ZZW(JL)) + !$mnh_end_do() +!$acc end parallel +#endif + IF( ZZWMAX > 0.0 ) THEN !$acc kernels diff --git a/src/MNH/rain_ice_red.f90 b/src/MNH/rain_ice_red.f90 index 4e10302337f10d7fded5072b817bb336bb08c0aa..c598b4c698398ba0fb4678006d8485f1133c9f23 100644 --- a/src/MNH/rain_ice_red.f90 +++ b/src/MNH/rain_ice_red.f90 @@ -359,6 +359,11 @@ REAL, DIMENSION(:,:,:), OPTIONAL, INTENT(INOUT) :: PRHS ! Hail m.r. source REAL, DIMENSION(:,:), OPTIONAL, INTENT(OUT) :: PINPRH! Hail instant precip REAL, DIMENSION(:,:,:,:), OPTIONAL, INTENT(OUT) :: PFPR ! upper-air precipitation fluxes ! +#ifdef MNH_COMPILER_CCE +STOP "RAIN_ICE_RED TROP LENT A COMPILER AVEC CRAY/CCE >> 30 Minutes " +STOP "ENLEVE LE ifdefMNH_COMPILER_CCE , SI VOUS EN AVEZ BESOIN sur GPU AMD " +#else +! !* 0.2 Declarations of local variables : ! INTEGER :: IIB ! Define the domain where is @@ -1400,8 +1405,8 @@ IF(HSUBG_AUCV_RC=='PDF ' .AND. CSUBG_PR_PDF=='SIGM') GTEST=.true. #endif IF(IMICRO>0) THEN -!$acc loop independent - DO JL=1, IMICRO +!acc loop independent + !$mnh_do_concurrent(JL=1:IMICRO) ZRVT(JL) = PRVT(I1(JL),I2(JL),I3(JL)) ZRCT(JL) = PRCT(I1(JL),I2(JL),I3(JL)) ZRRT(JL) = PRRT(I1(JL),I2(JL),I3(JL)) @@ -1430,7 +1435,7 @@ IF(IMICRO>0) THEN ELSE ZHLI_LCF(JL)=0. ENDIF - ENDDO + !$mnh_end_do() IF(GEXT_TEND) THEN !$acc loop independent DO JL=1, IMICRO @@ -2895,5 +2900,6 @@ CONTAINS END SUBROUTINE CORRECT_NEGATIVITIES ! +#endif END SUBROUTINE RAIN_ICE_RED diff --git a/src/MNH/rain_ice_slow.f90 b/src/MNH/rain_ice_slow.f90 index 5d5cf206840dbad012c02153fce1494aa5e6fa8d..690cbda36b5fbe985e99272d99da77b7d18b737f 100644 --- a/src/MNH/rain_ice_slow.f90 +++ b/src/MNH/rain_ice_slow.f90 @@ -42,6 +42,10 @@ use mode_mppdb #if defined(MNH_BITREP) || defined(MNH_BITREP_OMP) USE MODI_BITREP #endif +#ifdef MNH_COMPILER_CCE +!$mnh_undef(LOOP) +!$mnh_undef(OPENACC) +#endif IMPLICIT NONE ! @@ -155,7 +159,7 @@ CALL MNH_MEM_GET( zz_diff, SIZE(PLSFACT) ) ZZW(:) = 0.0 GWORK(:) = PZT(:)<XTT-35.0 .AND. PRCT(:)>XRTMIN(2) .AND. PRCS(:)>0. - DO CONCURRENT ( JL=1:JLU ) + !$mnh_do_concurrent( JL=1:JLU ) IF ( GWORK(JL) ) THEN ZZW(JL) = MIN( PRCS(JL),XHON*PRHODREF(JL)*PRCT(JL) & #if !defined(MNH_BITREP) && !defined(MNH_BITREP_OMP) @@ -167,7 +171,7 @@ CALL MNH_MEM_GET( zz_diff, SIZE(PLSFACT) ) PRCS(JL) = PRCS(JL) - ZZW(JL) PTHS(JL) = PTHS(JL) + ZZW(JL) * zz_diff(JL) ! f(L_f*(RCHONI)) END IF -ENDDO + !$mnh_end_do() !$acc end kernels if ( lbudget_th ) call Budget_store_add( tbudgets(NBUDGET_TH), 'HON', & @@ -180,7 +184,7 @@ ENDDO !* 3.3 compute the spontaneous freezing source: RRHONG ! !$acc kernels -DO CONCURRENT (JL=1:JLU) +!$mnh_do_concurrent (JL=1:JLU) ZZW(JL) = 0.0 GWORK(JL) = PZT(JL)<XTT-35.0 .AND. PRRT(JL)>XRTMIN(3) .AND. PRRS(JL)>0. IF( GWORK(JL) )THEN @@ -189,7 +193,7 @@ DO CONCURRENT (JL=1:JLU) PRRS(JL) = PRRS(JL) - ZZW(JL) PTHS(JL) = PTHS(JL) + ZZW(JL) * zz_diff(JL) ! f(L_f*(RRHONG)) ENDIF -ENDDO +!$mnh_end_do() !$acc end kernels if ( lbudget_th ) call Budget_store_add( tbudgets(NBUDGET_TH), 'SFR', & @@ -242,8 +246,7 @@ END DO !* 3.4.3 compute the deposition on r_s: RVDEPS ! GWORK(:) = PRST(:)>0.0 - !$acc loop independent - DO CONCURRENT ( JL=1:JLU ) + !$mnh_do_concurrent( JL=1:JLU ) IF ( GWORK(JL) ) THEN PLBDAS(JL) = MIN( XLBDAS_MAX, & #if !defined(MNH_BITREP) && !defined(MNH_BITREP_OMP) @@ -254,7 +257,7 @@ END DO ELSE PLBDAS(JL) = 0. END IF - END DO ! CONCURRENT + !$mnh_end_do() ! CONCURRENT ZZW(:) = 0.0 GWORK(:) = (PRST(:)>XRTMIN(5)) .AND. (PRSS(:)>0.0) !$acc loop independent @@ -324,7 +327,7 @@ END DO #endif ZZW(:) = 0.0 GWORK(:) = PRIT(:)>XRTMIN(4) .AND. PRIS(:)>0.0 -DO CONCURRENT ( JL=1:JLU ) +!$mnh_do_concurrent( JL=1:JLU ) IF ( GWORK(JL) ) THEN #if !defined(MNH_BITREP) && !defined(MNH_BITREP_OMP) ZZW(JL) = MIN( PRIS(JL),XTIMAUTI * EXP( XTEXAUTI*(PZT(JL)-XTT) ) & @@ -337,7 +340,7 @@ DO CONCURRENT ( JL=1:JLU ) PRIS(JL) = PRIS(JL) - ZZW(JL) !!END WHERE END IF -END DO +!$mnh_end_do() !$acc end kernels if ( lbudget_ri ) call Budget_store_add( tbudgets(NBUDGET_RI), 'AUTS', & diff --git a/src/MNH/rain_ice_warm.f90 b/src/MNH/rain_ice_warm.f90 index a222b82310583cf6f2acf8c4f85ef0938235587d..48ec7a5d07dd5a376f40eccb205e846447495727 100644 --- a/src/MNH/rain_ice_warm.f90 +++ b/src/MNH/rain_ice_warm.f90 @@ -178,7 +178,7 @@ CALL MNH_MEM_GET( ZZW4, JLU ) !* 4.2 compute the autoconversion of r_c for r_r production: RCAUTR ! !$acc kernels -DO CONCURRENT (JL=1:JLU) +!$mnh_do_concurrent(JL=1:JLU) zzw(JL) = 0. GWORK(JL) = PRCS(JL)>0.0 .AND. PHLC_HCF(JL)>0.0 IF( GWORK(JL) )THEN @@ -187,7 +187,7 @@ zzw(JL) = 0. PRCS(JL) = PRCS(JL) - ZZW(JL) PRRS(JL) = PRRS(JL) + ZZW(JL) ENDIF -ENDDO +!$mnh_end_do() !$acc end kernels if ( lbudget_rc ) call Budget_store_add( tbudgets(NBUDGET_RC), 'AUTO', & @@ -291,6 +291,7 @@ ENDDO !Evaporation only when there's no cloud (RC must be 0) GWORK(:) = PRRT(:)>XRTMIN(3) .AND. PRCT(:)<=XRTMIN(2) #if !defined(MNH_BITREP) && !defined(MNH_BITREP_OMP) + !$mnh_expand_where(JL=1:JLU) WHERE( GWORK(:) ) ZZW(:) = EXP( XALPW - XBETAW/PZT(:) - XGAMW*ALOG(PZT(:) ) ) ! es_w PUSW(:) = 1.0 - PRVT(:)*( PPRES(:)-ZZW(:) ) / ( (XMV/XMD) * ZZW(:) ) @@ -303,6 +304,7 @@ ENDDO PRVS(:) = PRVS(:) + ZZW(:) PTHS(:) = PTHS(:) - ZZW(:)*PLVFACT(:) END WHERE + !$mnh_end_expand_where() #else !$acc loop independent DO CONCURRENT ( JL=1:JLU ) @@ -358,10 +360,9 @@ IF (CSUBG_RR_EVAP=='CLFR') GCSUBG_RR_EVAP=.true. !Ces variables devraient être sorties de rain_ice_slow et on mettrait le calcul de T^u, T^s !et plusieurs versions (comme actuellement, en ciel clair, en ciel nuageux) de PKA, PDV, PCJ dans rain_ice !On utiliserait la bonne version suivant l'option NONE, CLFR... dans l'évaporation et ailleurs - - GWORK(:) = PRRT(:)>XRTMIN(3) .AND. ZZW4(:)>PCF(:) !$acc loop independent DO CONCURRENT ( JL=1:JLU ) + GWORK(JL) = PRRT(JL)>XRTMIN(3) .AND. ZZW4(JL)>PCF(JL) IF ( GWORK(JL) ) THEN ! outside the cloud (environment) the use of T^u (unsaturated) instead of T ! Bechtold et al. 1993 diff --git a/src/MNH/resolved_cloud.f90 b/src/MNH/resolved_cloud.f90 index 0870549d407b4d22330b48ed4d6b8d13cc42bb83..7af6ad2666f16e0712e397bd830b908f7dd711ad 100644 --- a/src/MNH/resolved_cloud.f90 +++ b/src/MNH/resolved_cloud.f90 @@ -342,6 +342,11 @@ USE MODI_SLOW_TERMS USE MODE_MNH_ZWORK, ONLY: MNH_MEM_GET, MNH_MEM_POSITION_PIN, MNH_MEM_RELEASE #endif ! +#if defined(MNH_COMPILER_CCE) && defined(MNH_BITREP_OMP) +!$mnh_undef(LOOP) +!$mnh_undef(OPENACC) +#endif +! IMPLICIT NONE ! !* 0.1 Declarations of dummy arguments : @@ -705,20 +710,20 @@ END IF !$acc kernels present_cr(PTHS,PRS,PRHODJ,PSVS) PTHS(:,:,:) = PTHS(:,:,:) / PRHODJ(:,:,:) ! -DO CONCURRENT ( JI=1:JIU,JJ=1:JJU,JK=1:JKU ,JRR = 1:KRR ) +!$mnh_do_concurrent(JI=1:JIU,JJ=1:JJU,JK=1:JKU,JRR=1:KRR) PRS(JI,JJ,JK,JRR) = PRS(JI,JJ,JK,JRR) / PRHODJ(JI,JJ,JK) -END DO ! CONCURRENT +!$mnh_end_do() ! CONCURRENT ! IF (GCLOUD_C2R2_CLOUD_C3R5_CLOUD_KHKO_CLOUD_LIMA) THEN - DO CONCURRENT ( JI=1:JIU , JJ=1:JJU,JK=1:JKU , JSV=ISVBEG:ISVEND ) + !$mnh_do_concurrent(JI=1:JIU,JJ=1:JJU,JK=1:JKU,JSV=ISVBEG:ISVEND ) PSVS(JI,JJ,JK,JSV) = PSVS(JI,JJ,JK,JSV) / PRHODJ(JI,JJ,JK) - ENDDO + !$mnh_end_do() ENDIF ! ! complete the lateral boundaries to avoid possible problems ! -!dir$ concurrent -DO JI=1,JPHEXT +!dir concurrent +!$mnh_do_concurrent(JI=1:JPHEXT) PTHS(JI,:,:) = PTHS(IIB,:,:) PTHS(IIE+JI,:,:) = PTHS(IIE,:,:) PTHS(:,JI,:) = PTHS(:,IJB,:) @@ -728,7 +733,7 @@ DO JI=1,JPHEXT PRS(IIE+JI,:,:,:) = PRS(IIE,:,:,:) PRS(:,JI,:,:) = PRS(:,IJB,:,:) PRS(:,IJE+JI,:,:) = PRS(:,IJE,:,:) -END DO +!$mnh_end_do() ! ! complete the physical boundaries to avoid some computations ! @@ -932,9 +937,9 @@ CALL PRINT_MSG(NVERB_FATAL,'GEN','RESOLVED_CLOUD','C2R2//KHKO not yet implemente #if !defined(MNH_BITREP) && !defined(MNH_BITREP_OMP) ZEXN(:,:,:)= (PPABST(:,:,:)/XP00)**(XRD/XCPD) #else -DO CONCURRENT ( JI=1:JIU,JJ=1:JJU,JK=1:JKU ) +!$mnh_do_concurrent(JI=1:JIU,JJ=1:JJU,JK=1:JKU) ZEXN(JI,JJ,JK)= BR_POW( PPABST(JI,JJ,JK)/XP00, XRD/XCPD ) -ENDDO +!$mnh_end_do() #endif !$acc end kernels ! @@ -978,8 +983,7 @@ ENDDO ENDIF IF (LRED) THEN !$acc kernels -!$acc loop independent - DO CONCURRENT( JI = 1 : JIU, JJ = 1 : JJU, JK = 1 : JKU ) + !$mnh_do_concurrent(JI=1:JIU,JJ=1:JJU,JK=1:JKU) LLMICRO(JI,JJ,JK)=PRT(JI,JJ,JK,2)>XRTMIN(2) .OR. & PRT(JI,JJ,JK,3)>XRTMIN(3) .OR. & PRT(JI,JJ,JK,4)>XRTMIN(4) .OR. & @@ -990,7 +994,7 @@ ENDDO PRS(JI,JJ,JK,4)>ZRSMIN(4) .OR. & PRS(JI,JJ,JK,5)>ZRSMIN(5) .OR. & PRS(JI,JJ,JK,6)>ZRSMIN(6) - END DO + !$mnh_end_do() !$acc end kernels CALL RAIN_ICE_RED (SIZE(PTHT, 1), SIZE(PTHT, 2), SIZE(PTHT, 3), COUNT(LLMICRO), & OSEDIC, CSEDIM, HSUBG_AUCV, CSUBG_AUCV_RI, & @@ -1053,9 +1057,9 @@ CALL PRINT_MSG(NVERB_FATAL,'GEN','RESOLVED_CLOUD','ICE4 not yet implemented') #if !defined(MNH_BITREP) && !defined(MNH_BITREP_OMP) ZEXN(:,:,:)= (PPABST(:,:,:)/XP00)**(XRD/XCPD) #else - DO CONCURRENT ( JI=1:JIU,JJ=1:JJU,JK=1:JKU ) + !$mnh_do_concurrent(JI=1:JIU,JJ=1:JJU,JK=1:JKU) ZEXN(JI,JJ,JK)= BR_POW( PPABST(JI,JJ,JK)/XP00, XRD/XCPD ) - ENDDO + !$mnh_end_do() #endif !$acc end kernels ! @@ -1270,18 +1274,19 @@ call Sources_neg_correct( hcloud, 'NECON', krr, ptstep, ppabst, ptht, prt, pths, ! --------------------------------------- ! !$acc kernels -DO CONCURRENT ( JI=1:JIU,JJ=1:JJU,JK=1:JKU) +!$mnh_do_concurrent(JI=1:JIU,JJ=1:JJU,JK=1:JKU) PTHS(JI,JJ,JK) = PTHS(JI,JJ,JK) * PRHODJ(JI,JJ,JK) -END DO ! CONCURRENT +!$mnh_end_do() ! CONCURRENT !$acc end kernels ! !$acc kernels -!$acc loop seq -DO JRR=1,KRR - DO CONCURRENT ( JI=1:JIU,JJ=1:JJU,JK=1:JKU ) +!$mnh_do_concurrent(JI=1:JIU,JJ=1:JJU,JK=1:JKU,JRR=1:KRR) + !dir nextscalar + ! acc loop seq + !DO JRR=1,KRR PRS(JI,JJ,JK,JRR) = PRS(JI,JJ,JK,JRR) * PRHODJ(JI,JJ,JK) - END DO ! CONCURRENT -END DO + !END DO +!$mnh_end_do() ! CONCURRENT !$acc end kernels ! IF (HCLOUD=='C2R2' .OR. HCLOUD=='C3R5' .OR. HCLOUD=='KHKO' .OR. HCLOUD=='LIMA') THEN diff --git a/src/MNH/shuman_device.f90 b/src/MNH/shuman_device.f90 index e5675a066e0678eb24a66deefe24440b0b1c794e..10de3c8e9ad86a06e316a97d40b788e4354aa73e 100644 --- a/src/MNH/shuman_device.f90 +++ b/src/MNH/shuman_device.f90 @@ -415,11 +415,10 @@ IKU = SIZE(PA,3) ! !$acc kernels present_cr(PA,PMYF) #ifndef _OPT_LINEARIZED_LOOPS -!$acc_nv loop independent collapse(3) !TODO: remplacer le 1 par JPHEXT ? -DO CONCURRENT ( JI=1:IIU , JJ=1:IJU-1 , JK=1:IKU ) +!$mnh_do_concurrent(JI=1:IIU,JJ=1:IJU-1,JK=1:IKU) PMYF(JI,JJ,JK) = 0.5*( PA(JI,JJ,JK)+PA(JI,JJ+1,JK) ) -END DO +!$mnh_end_do() #else JIJKOR = 1 + IIU JIJKEND = IIU*IJU*IKU @@ -431,10 +430,9 @@ DO JIJK=JIJKOR , JIJKEND END DO #endif ! -!$acc_nv loop independent collapse(3) -DO CONCURRENT ( JI=1:IIU , JJ=1:JPHEXT , JK=1:IKU ) +!$mnh_do_concurrent(JI=1:IIU,JJ=1:JPHEXT,JK=1:IKU) PMYF(JI,IJU-JPHEXT+JJ,JK) = PMYF(JI,JPHEXT+JJ,JK) ! for reprod JPHEXT <> 1 -END DO +!$mnh_end_do() !$acc end kernels !$acc end data @@ -530,11 +528,10 @@ IKU = SIZE(PA,3) ! #ifndef _OPT_LINEARIZED_LOOPS !$acc kernels present_cr(PA,PMYM) -!$acc_nv loop independent collapse(3) !TODO: remplacer le 1+1 par 1+JPHEXT ? -DO CONCURRENT ( JI=1:IIU , JJ=2:IJU , JK=1:IKU ) +!$mnh_do_concurrent(JI=1:IIU,JJ=2:IJU,JK=1:IKU) PMYM(JI,JJ,JK) = 0.5*( PA(JI,JJ,JK)+PA(JI,JJ-1,JK) ) -END DO +!$mnh_end_do() #else JIJKOR = 1 + IIU JIJKEND = IIU*IJU*IKU @@ -546,10 +543,9 @@ DO JIJK=JIJKOR , JIJKEND END DO #endif ! -!$acc_nv loop independent collapse(3) -DO CONCURRENT ( JI=1:IIU , JJ=1:JPHEXT , JK=1:IKU ) +!$mnh_do_concurrent(JI=1:IIU,JJ=1:JPHEXT,JK=1:IKU) PMYM(JI,JJ,JK) = PMYM(JI,IJU-2*JPHEXT+JJ,JK) ! for reprod JPHEXT <> 1 -END DO +!$mnh_end_do() !$acc end kernels !$acc end data @@ -1116,11 +1112,10 @@ IKU = SIZE(PA,3) ! !$acc kernels present_cr(PA,PDYF) #ifndef _OPT_LINEARIZED_LOOPS -!$acc_nv loop independent collapse(3) !TODO: remplacer le 1 par JPHEXT ? -DO CONCURRENT ( JI=1:IIU , JJ=1:IJU-1 , JK=1:IKU ) +!$mnh_do_concurrent(JI=1:IIU,JJ=1:IJU-1,JK=1:IKU) PDYF(JI,JJ,JK) = PA(JI,JJ+1,JK) - PA(JI,JJ,JK) -END DO +!$mnh_end_do() #else JIJKOR = 1 + IIU JIJKEND = IIU*IJU*IKU @@ -1134,10 +1129,9 @@ END DO ! !$acc loop seq DO JJ=1,JPHEXT -!$acc_nv loop independent collapse(2) -DO CONCURRENT ( JI=1:IIU , JK=1:IKU ) +!$mnh_do_concurrent(JI=1:IIU,JK=1:IKU) PDYF(JI,IJU-JPHEXT+JJ,JK) = PDYF(JI,JPHEXT+JJ,JK) ! for reprod JPHEXT <> 1 -END DO +!$mnh_end_do() END DO !$acc end kernels diff --git a/src/MNH/sources_neg_correct.f90 b/src/MNH/sources_neg_correct.f90 index d27079dd00da2092e883ce0ba6d696d80beec495..f08c426fa5cbf672b38da78df30c33a1e0abb2e3 100644 --- a/src/MNH/sources_neg_correct.f90 +++ b/src/MNH/sources_neg_correct.f90 @@ -276,7 +276,7 @@ CLOUD: select case ( hcloud ) jrmax = Size( prrs, 4 ) do jr = 2, jrmax !PW: kernels directive inside do loop on jr because compiler bug... (NVHPC 21.7) -!$acc kernels +!$acc kernels present_cr(zexn,zcph,zlv) where ( prrs(:, :, :, jr) < 0. ) prrs(:, :, :, 1) = prrs(:, :, :, 1) + prrs(:, :, :, jr) prths(:, :, :) = prths(:, :, :) - prrs(:, :, :, jr) * zlv(:, :, :) / & @@ -286,7 +286,7 @@ CLOUD: select case ( hcloud ) !$acc end kernels end do -!$acc kernels +!$acc kernels present_cr(zexn,zcph,zlv) where ( prrs(:, :, :, 1) < 0. .and. prrs(:, :, :, 2) > 0. ) prrs(:, :, :, 1) = prrs(:, :, :, 1) + prrs(:, :, :, 2) prths(:, :, :) = prths(:, :, :) - prrs(:, :, :, 2) * zlv(:, :, :) / & @@ -368,7 +368,7 @@ CLOUD: select case ( hcloud ) end where !$acc end kernels do jsv = 2, 3 -!$acc kernels +!$acc kernels present_cr(zexn,zcph,zlv) !PW: kernels directive inside do loop on jr because compiler bug... (NVHPC 21.7) where ( prrs(:, :, :, jsv) < 0. .or. prsvs(:, :, :, nsv_c2r2beg - 1 + jsv) < 0. ) prrs(:, :, :, 1) = prrs(:, :, :, 1) + prrs(:, :, :, jsv) @@ -379,7 +379,7 @@ CLOUD: select case ( hcloud ) end where !$acc end kernels end do -!$acc kernels +!$acc kernels present_cr(zexn,zcph,zlv) where ( prrs(:, :, :, 1) < 0. .and. prrs(:, :, :, 2) > 0. ) prrs(:, :, :, 1) = prrs(:, :, :, 1) + prrs(:, :, :, 2) prths(:, :, :) = prths(:, :, :) - prrs(:, :, :, 2) * zlv(:, :, :) / & @@ -391,7 +391,7 @@ CLOUD: select case ( hcloud ) ! ! case( 'LIMA' ) -!$acc kernels +!$acc kernels present_cr(zexn,zcph,zlv) ! Correction where rc<0 or Nc<0 if ( lwarm_lima ) then where ( prrs(:, :, :, 2) < xrtmin_lima(2) / ptstep .or. prsvs(:, :, :, nsv_lima_nc) < xctmin_lima(2) / ptstep ) @@ -422,7 +422,7 @@ CLOUD: select case ( hcloud ) !$acc end kernels ! Correction where ri<0 or Ni<0 if ( lcold_lima ) then -!$acc kernels +!$acc kernels present_cr(zexn,zcph,zls) where ( prrs(:, :, :, 4) < xrtmin_lima(4) / ptstep .or. prsvs(:, :, :, nsv_lima_ni) < xctmin_lima(4) / ptstep ) prrs(:, :, :, 1) = prrs(:, :, :, 1) + prrs(:, :, :, 4) prths(:, :, :) = prths(:, :, :) - prrs(:, :, :, 4) * zls(:, :, :) / & @@ -434,7 +434,7 @@ CLOUD: select case ( hcloud ) if ( hbudname /= 'NETUR' ) then do jr = 5, Size( prrs, 4 ) !PW: kernels directive inside do loop on jr because compiler bug... (NVHPC 21.7) -!$acc kernels +!$acc kernels present_cr(zexn,zcph,zls) where ( prrs(:, :, :, jr) < 0. ) prrs(:, :, :, 1) = prrs(:, :, :, 1) + prrs(:, :, :, jr) prths(:, :, :) = prths(:, :, :) - prrs(:, :, :, jr) * zls(:, :, :) / & @@ -445,7 +445,7 @@ CLOUD: select case ( hcloud ) end do end if if(krr > 3) then -!$acc kernels +!$acc kernels present_cr(zcor,zexn,zcph,zls) where ( prrs(:, :, :, 1) < 0. .and. prrs(:, :, :, 4) > 0. ) zcor(:, :, :) = Min( -prrs(:, :, :, 1), prrs(:, :, :, 4) ) prrs(:, :, :, 1) = prrs(:, :, :, 1) + zcor(:, :, :) diff --git a/src/MNH/tke_eps_sources.f90 b/src/MNH/tke_eps_sources.f90 index 29ac9391eca93ea12df85436603ea27fab7fce30..0636775b90ea108da8c655e84deaa8faa1166e6f 100644 --- a/src/MNH/tke_eps_sources.f90 +++ b/src/MNH/tke_eps_sources.f90 @@ -423,7 +423,9 @@ CALL MZM_DEVICE(ZKEFF, ZTMP1_DEVICE) !Warning: re-used later CALL MZM_DEVICE(PRHODJ,ZTMP2_DEVICE) !Warning: re-used later !$acc kernels ! present(ZA) #if !defined(MNH_BITREP) && !defined(MNH_BITREP_OMP) +!$mnh_expand_array(JI=1:JIU,JJ=1:JJU,JK=1:JKU) ZA(:,:,:) = - PTSTEP * XCET * ZTMP1_DEVICE(:,:,:) * ZTMP2_DEVICE(:,:,:) / PDZZ(:,:,:)**2 +!$mnh_end_expand_array() #else !$acc_nv loop independent collapse(3) DO CONCURRENT ( JI=1:JIU,JJ=1:JJU,JK=1:JKU) diff --git a/src/MNH/tools.f90 b/src/MNH/tools.f90 index 846462cbbcc4e3a6b61c4724bb643ba9874501a6..a811b054567e80016073f18b470e4c00af38510a 100644 --- a/src/MNH/tools.f90 +++ b/src/MNH/tools.f90 @@ -188,7 +188,7 @@ ic = 0 ! different runs of this subroutine BUT final result should be the same !Comment the following line + atomic directives to have consistent values for debugging !Warning: huge impact on performance -!$acc parallel loop private(idx) independent +!$acc parallel loop private(idx) copy(ic) independent do ji = 1, size( ltab, 1 ) if ( ltab(ji ) ) then !$acc atomic capture @@ -198,7 +198,7 @@ ic = 0 i1(idx) = ji end if end do -! acc end kernels +! acc end parallel else @@ -259,7 +259,7 @@ ic = 0 ! different runs of this subroutine BUT final result should be the same !Comment the following line + atomic directives to have consistent values for debugging !Warning: huge impact on performance -!$acc parallel loop collapse(2) private(idx) independent +!$acc parallel loop collapse(2) private(idx) copy(ic) independent do jj = 1, size( ltab, 2 ) do ji = 1, size( ltab, 1 ) if ( ltab(ji, jj ) ) then @@ -272,7 +272,7 @@ ic = 0 end if end do end do -! acc end kernels +! acc end parallel else @@ -337,7 +337,7 @@ ic = 0 ! different runs of this subroutine BUT final result should be the same !Comment the following line + atomic directives to have consistent values for debugging !Warning: huge impact on performance -!$acc parallel loop collapse(3) private(idx) independent +!$acc parallel loop collapse(3) private(idx) copy(ic) independent do jk = 1, size( ltab, 3 ) do jj = 1, size( ltab, 2 ) do ji = 1, size( ltab, 1 ) @@ -353,7 +353,7 @@ ic = 0 end do end do end do -! acc end kernels +! acc end parellel else diff --git a/src/MNH/tridiag_thermo.f90 b/src/MNH/tridiag_thermo.f90 index c96d99382707f1623ae9460b973256a7855ce63b..871581a0642cda548e118707ec8fdf0c861fb994 100644 --- a/src/MNH/tridiag_thermo.f90 +++ b/src/MNH/tridiag_thermo.f90 @@ -263,7 +263,7 @@ CALL MZM_DEVICE(PRHODJ,ZMZM_RHODJ) ZRHODJ_DFDDTDZ_O_DZ2(:,:,:) = ZMZM_RHODJ(:,:,:)*PDFDDTDZ(:,:,:)/PDZZ(:,:,:)**2 #else !$acc_nv loop independent collapse(3) -DO CONCURRENT ( JI=1:JIU,JJ=1:JJU,JK=1:JKU) +DO CONCURRENT (JK=1:JKU,JJ=1:JJU,JI=1:JIU) ZRHODJ_DFDDTDZ_O_DZ2(JI,JJ,JK) = ZMZM_RHODJ(JI,JJ,JK)*PDFDDTDZ(JI,JJ,JK)/BR_P2(PDZZ(JI,JJ,JK)) END DO !CONCURRENT #endif @@ -285,7 +285,7 @@ ZY=0. #ifdef MNH_COMPILER_NVHPC !$acc loop independent collapse(2) #endif -DO CONCURRENT ( JI=1:JIU,JJ=1:JJU) +DO CONCURRENT (JJ=1:JJU,JI=1:JIU) ZY(JI,JJ,IKB) = PRHODJ(JI,JJ,IKB)*PVARM(JI,JJ,IKB)/PTSTEP & - ZMZM_RHODJ(JI,JJ,IKB+KKL) * PF(JI,JJ,IKB+KKL)/PDZZ(JI,JJ,IKB+KKL) & + ZMZM_RHODJ(JI,JJ,IKB ) * PF(JI,JJ,IKB )/PDZZ(JI,JJ,IKB ) & @@ -298,7 +298,7 @@ END DO !CONCURRENT #ifdef MNH_COMPILER_NVHPC !$acc loop independent collapse(3) #endif -DO CONCURRENT ( JI=1:JIU,JJ=1:JJU,JK=IKTB+1:IKTE-1) +DO CONCURRENT (JK=IKTB+1:IKTE-1,JJ=1:JJU,JI=1:JIU) ZY(JI,JJ,JK) = PRHODJ(JI,JJ,JK)*PVARM(JI,JJ,JK)/PTSTEP & - ZMZM_RHODJ(JI,JJ,JK+KKL) * PF(JI,JJ,JK+KKL)/PDZZ(JI,JJ,JK+KKL) & + ZMZM_RHODJ(JI,JJ,JK ) * PF(JI,JJ,JK )/PDZZ(JI,JJ,JK ) & @@ -313,7 +313,7 @@ END DO !CONCURRENT #ifdef MNH_COMPILER_NVHPC !$acc loop independent collapse(2) #endif -DO CONCURRENT ( JI=1:JIU,JJ=1:JJU) +DO CONCURRENT (JJ=1:JJU,JI=1:JIU) ZY(JI,JJ,IKE) = PRHODJ(JI,JJ,IKE)*PVARM(JI,JJ,IKE)/PTSTEP & - ZMZM_RHODJ(JI,JJ,IKE+KKL) * PF(JI,JJ,IKE+KKL)/PDZZ(JI,JJ,IKE+KKL) & + ZMZM_RHODJ(JI,JJ,IKE ) * PF(JI,JJ,IKE )/PDZZ(JI,JJ,IKE ) & @@ -336,7 +336,7 @@ IF ( PIMPL > 1.E-10 ) THEN #ifdef MNH_COMPILER_NVHPC !$acc loop independent collapse(2) #endif -DO CONCURRENT ( JI=1:JIU,JJ=1:JJU) +DO CONCURRENT (JJ=1:JJU,JI=1:JIU) ZB(JI,JJ,IKB) = PRHODJ(JI,JJ,IKB)/PTSTEP & - ZRHODJ_DFDDTDZ_O_DZ2(JI,JJ,IKB+KKL) * PIMPL END DO !CONCURRENT @@ -346,7 +346,7 @@ END DO !CONCURRENT #ifdef MNH_COMPILER_NVHPC !$acc loop independent collapse(2) #endif -DO CONCURRENT ( JI=1:JIU,JJ=1:JJU) +DO CONCURRENT (JJ=1:JJU,JI=1:JIU) ZC(JI,JJ,IKB) = ZRHODJ_DFDDTDZ_O_DZ2(JI,JJ,IKB+KKL) * PIMPL END DO !CONCURRENT !$acc end kernels @@ -355,7 +355,7 @@ END DO !CONCURRENT #ifdef MNH_COMPILER_NVHPC !$acc loop independent collapse(3) #endif -DO CONCURRENT ( JI=1:JIU,JJ=1:JJU,JK=IKTB+1:IKTE-1) +DO CONCURRENT (JK=IKTB+1:IKTE-1,JJ=1:JJU,JI=1:JIU) ZA(JI,JJ,JK) = ZRHODJ_DFDDTDZ_O_DZ2(JI,JJ,JK) * PIMPL ZB(JI,JJ,JK) = PRHODJ(JI,JJ,JK)/PTSTEP & - ZRHODJ_DFDDTDZ_O_DZ2(JI,JJ,JK+KKL) * PIMPL & @@ -368,7 +368,7 @@ END DO !CONCURRENT #ifdef MNH_COMPILER_NVHPC !$acc loop independent collapse(2) #endif -DO CONCURRENT ( JI=1:JIU,JJ=1:JJU) +DO CONCURRENT (JJ=1:JJU,JI=1:JIU) ZA(JI,JJ,IKE) = ZRHODJ_DFDDTDZ_O_DZ2(JI,JJ,IKE ) * PIMPL ZB(JI,JJ,IKE) = PRHODJ(JI,JJ,IKE)/PTSTEP & - ZRHODJ_DFDDTDZ_O_DZ2(JI,JJ,IKE ) * PIMPL @@ -385,16 +385,22 @@ END DO !CONCURRENT #ifdef MNH_COMPILER_NVHPC !$acc loop independent collapse(2) #endif -DO CONCURRENT ( JI=1:JIU,JJ=1:JJU) +DO CONCURRENT (JJ=1:JJU,JI=1:JIU) ZBET(JI,JJ) = ZB(JI,JJ,IKB) ! bet = b(ikb) PVARP(JI,JJ,IKB) = ZY(JI,JJ,IKB) / ZBET(JI,JJ) END DO !CONCURRENT +!$acc end kernels ! +!$acc parallel !$acc loop seq DO JK = IKB+KKL,IKE-KKL,KKL +#ifdef MNH_COMPILER_NVHPC ! gang+vector needed or parallisation vector only - !$acc_nv loop independent gang, vector collapse(2) - DO CONCURRENT ( JI=1:JIU,JJ=1:JJU) + !$acc loop independent gang, vector collapse(2) +#else + !$acc loop independent +#endif + DO CONCURRENT (JJ=1:JJU,JI=1:JIU) ZGAM(JI,JJ,JK) = ZC(JI,JJ,JK-KKL) / ZBET(JI,JJ) ! gam(k) = c(k-1) / bet ZBET(JI,JJ) = ZB(JI,JJ,JK) - ZA(JI,JJ,JK) * ZGAM(JI,JJ,JK) @@ -403,11 +409,13 @@ DO JK = IKB+KKL,IKE-KKL,KKL ! res(k) = (y(k) -a(k)*res(k-1))/ bet END DO !CONCURRENT END DO +!$acc end parallel +!$acc kernels ! special treatment for the last level #ifdef MNH_COMPILER_NVHPC !$acc loop independent collapse(2) #endif -DO CONCURRENT ( JI=1:JIU,JJ=1:JJU) +DO CONCURRENT (JJ=1:JJU,JI=1:JIU) ZGAM(JI,JJ,IKE) = ZC(JI,JJ,IKE-KKL) / ZBET(JI,JJ) ! gam(k) = c(k-1) / bet ZBET(JI,JJ) = ZB(JI,JJ,IKE) - ZA(JI,JJ,IKE) * ZGAM(JI,JJ,IKE) @@ -415,19 +423,25 @@ DO CONCURRENT ( JI=1:JIU,JJ=1:JJU) PVARP(JI,JJ,IKE)= ( ZY(JI,JJ,IKE) - ZA(JI,JJ,IKE) * PVARP(JI,JJ,IKE-KKL) ) / ZBET(JI,JJ) ! res(k) = (y(k) -a(k)*res(k-1))/ bet END DO !CONCURRENT +!$acc end kernels ! !* 3.3 going down ! ---------- ! +!$acc parallel !$acc loop seq DO JK = IKE-KKL,IKB,-1*KKL +#ifdef MNH_COMPILER_NVHPC ! gang+vector needed or parallisation vector only - !$acc_nv loop independent gang, vector collapse(2) - DO CONCURRENT ( JI=1:JIU,JJ=1:JJU) + !$acc loop independent gang, vector collapse(2) +#else + !$acc loop independent +#endif + DO CONCURRENT (JJ=1:JJU,JI=1:JIU) PVARP(JI,JJ,JK) = PVARP(JI,JJ,JK) - ZGAM(JI,JJ,JK+KKL) * PVARP(JI,JJ,JK+KKL) END DO !CONCURRENT END DO -!$acc end kernels +!$acc end parallel ! ELSE ! @@ -435,7 +449,7 @@ ELSE #ifdef MNH_COMPILER_NVHPC !$acc loop independent collapse(3) #endif -DO CONCURRENT ( JI=1:JIU,JJ=1:JJU,JK=IKTB:IKTE) +DO CONCURRENT (JK=IKTB:IKTE,JJ=1:JJU,JI=1:JIU) PVARP(JI,JJ,JK) = ZY(JI,JJ,JK) * PTSTEP / PRHODJ(JI,JJ,JK) END DO !CONCURRENT !$acc end kernels @@ -450,7 +464,7 @@ END IF #ifdef MNH_COMPILER_NVHPC !$acc loop independent collapse(2) #endif -DO CONCURRENT ( JI=1:JIU,JJ=1:JJU) +DO CONCURRENT (JJ=1:JJU,JI=1:JIU) PVARP(JI,JJ,KKA)=PVARP(JI,JJ,IKB) PVARP(JI,JJ,KKU)=PVARP(JI,JJ,IKE) END DO !CONCURRENT diff --git a/src/MNH/tridiag_tke.f90 b/src/MNH/tridiag_tke.f90 index 4cf3e15c4018fa0a4ee172a79300894d26694882..89840fb59c15629d93b8d70dc27acb5ed3473947 100644 --- a/src/MNH/tridiag_tke.f90 +++ b/src/MNH/tridiag_tke.f90 @@ -211,28 +211,34 @@ CALL MNH_MEM_GET( zbet, JIU, JJU ) !* 1. COMPUTE THE RIGHT HAND SIDE ! --------------------------- ! -!$acc kernels + IKT=SIZE(PVARM,3) IKTB=1+JPVEXT_TURB IKTE=IKT-JPVEXT_TURB IKB=KKA+JPVEXT_TURB*KKL IKE=KKU-JPVEXT_TURB*KKL +!$acc kernels ! ! #ifdef MNH_COMPILER_NVHPC !$acc loop independent collapse(2) #endif -DO CONCURRENT ( JI=1:JIU,JJ=1:JJU) +DO CONCURRENT (JJ=1:JJU,JI=1:JIU) ZY(JI,JJ,IKB) = PVARM(JI,JJ,IKB) + PTSTEP*PSOURCE(JI,JJ,IKB) - & PEXPL / PRHODJ(JI,JJ,IKB) * PA(JI,JJ,IKB+KKL) * (PVARM(JI,JJ,IKB+KKL) - PVARM(JI,JJ,IKB)) END DO !CONCURRENT +!$acc end kernels ! +!$acc parallel +!$acc loop seq DO JK=IKTB+1,IKTE-1 #ifdef MNH_COMPILER_NVHPC !$acc loop independent collapse(2) +#else + !$acc loop independent #endif - DO CONCURRENT ( JI=1:JIU,JJ=1:JJU) + DO CONCURRENT (JJ=1:JJU,JI=1:JIU) ZY(JI,JJ,JK)= PVARM(JI,JJ,JK) + PTSTEP*PSOURCE(JI,JJ,JK) - & PEXPL / PRHODJ(JI,JJ,JK) * & ( PVARM(JI,JJ,JK-KKL)*PA(JI,JJ,JK) & @@ -241,42 +247,46 @@ DO JK=IKTB+1,IKTE-1 ) END DO !CONCURRENT END DO +!$acc end parallel ! +!$acc kernels #ifdef MNH_COMPILER_NVHPC !$acc loop independent collapse(2) #endif -DO CONCURRENT ( JI=1:JIU,JJ=1:JJU) +DO CONCURRENT (JJ=1:JJU,JI=1:JIU) ZY(JI,JJ,IKE)= PVARM(JI,JJ,IKE) + PTSTEP*PSOURCE(JI,JJ,IKE) + & PEXPL / PRHODJ(JI,JJ,IKE) * PA(JI,JJ,IKE) * (PVARM(JI,JJ,IKE)-PVARM(JI,JJ,IKE-KKL)) END DO !CONCURRENT +!$acc end kernels ! ! !* 2. INVERSION OF THE TRIDIAGONAL SYSTEM ! ----------------------------------- ! IF ( PIMPL > 1.E-10 ) THEN -! +!$acc kernels ! ! going up - ! + ! #ifdef MNH_COMPILER_NVHPC !$acc loop independent collapse(2) #endif - DO CONCURRENT ( JI=1:JIU,JJ=1:JJU) + DO CONCURRENT (JJ=1:JJU,JI=1:JIU) ZBET(JI,JJ) = 1. + PIMPL * (PDIAG(JI,JJ,IKB)-PA(JI,JJ,IKB+KKL) / PRHODJ(JI,JJ,IKB)) ! bet = b(ikb) PVARP(JI,JJ,IKB) = ZY(JI,JJ,IKB) / ZBET(JI,JJ) END DO !CONCURRENT ! +!$acc end kernels +!$acc parallel !$acc loop seq DO JK = IKB+KKL,IKE-KKL,KKL #ifdef MNH_COMPILER_NVHPC !$acc loop gang, vector collapse(2) independent +#else + !$acc loop independent #endif - !dir$ concurrent ! collapse(JJ,JI) - DO JJ=1,JJU - !dir$ concurrent - DO JI=1,JIU + DO CONCURRENT (JJ=1:JJU,JI=1:JIU) ZGAM(JI,JJ,JK) = PIMPL * PA(JI,JJ,JK) / PRHODJ(JI,JJ,JK-KKL) / ZBET(JI,JJ) ! gam(k) = c(k-1) / bet ZBET(JI,JJ) = 1. + PIMPL * ( PDIAG(JI,JJ,JK) - & @@ -288,14 +298,15 @@ IF ( PIMPL > 1.E-10 ) THEN * PVARP(JI,JJ,JK-KKL) & ) / ZBET(JI,JJ) ! res(k) = (y(k) -a(k)*res(k-1))/ bet - END DO - END DO + END DO END DO +!$acc end parallel +!$acc kernels ! special treatment for the last level #ifdef MNH_COMPILER_NVHPC !$acc loop independent collapse(2) #endif - DO CONCURRENT ( JI=1:JIU,JJ=1:JJU) + DO CONCURRENT (JJ=1:JJU,JI=1:JIU) ZGAM(JI,JJ,IKE) = PIMPL * PA(JI,JJ,IKE) / PRHODJ(JI,JJ,IKE-KKL) / ZBET(JI,JJ) ! gam(k) = c(k-1) / bet ZBET(JI,JJ) = 1. + PIMPL * ( PDIAG(JI,JJ,IKE) - & @@ -307,38 +318,46 @@ IF ( PIMPL > 1.E-10 ) THEN ) / ZBET(JI,JJ) ! res(k) = (y(k) -a(k)*res(k-1))/ bet END DO !CONCURRENT +!$acc end kernels ! ! going down ! +!$acc parallel !$acc loop seq DO JK = IKE-KKL,IKB,-1*KKL #ifdef MNH_COMPILER_NVHPC !$acc loop gang, vector collapse(2) +#else + !$acc loop independent #endif - DO CONCURRENT ( JI=1:JIU,JJ=1:JJU) + DO CONCURRENT (JJ=1:JJU,JI=1:JIU) PVARP(JI,JJ,JK) = PVARP(JI,JJ,JK) - ZGAM(JI,JJ,JK+KKL) * PVARP(JI,JJ,JK+KKL) END DO !CONCURRENT END DO -! +!$acc end parallel +! ELSE ! +!$acc kernels #ifdef MNH_COMPILER_NVHPC !$acc loop independent collapse(2) #endif - DO CONCURRENT ( JI=1:JIU,JJ=1:JJU) + DO CONCURRENT (JJ=1:JJU,JI=1:JIU) PVARP(JI,JJ,IKTB:IKTE) = ZY(JI,JJ,IKTB:IKTE) END DO !CONCURRENT ! -END IF +!$acc end kernels +END IF ! ! !* 3. FILL THE UPPER AND LOWER EXTERNAL VALUES ! ---------------------------------------- ! +!$acc kernels #ifdef MNH_COMPILER_NVHPC !$acc loop independent collapse(2) #endif -DO CONCURRENT ( JI=1:JIU,JJ=1:JJU) +DO CONCURRENT (JJ=1:JJU,JI=1:JIU) PVARP(JI,JJ,KKA)=PVARP(JI,JJ,IKB) PVARP(JI,JJ,KKU)=PVARP(JI,JJ,IKE) END DO !CONCURRENT diff --git a/src/MNH/tridiag_w.f90 b/src/MNH/tridiag_w.f90 index a0647140e0d693da22771879b812dc6c03acf76f..62561c15f533a70575c9646ad790be965ad6851e 100644 --- a/src/MNH/tridiag_w.f90 +++ b/src/MNH/tridiag_w.f90 @@ -160,7 +160,7 @@ USE MODI_SHUMAN_DEVICE #if defined(MNH_BITREP) || defined(MNH_BITREP_OMP) USE MODI_BITREP #endif -#ifdef MNH_BITREP_OMP +#ifdef MNH_COMPILER_CCE !$mnh_undef(LOOP) !$mnh_undef(OPENACC) #endif @@ -380,9 +380,14 @@ ZY=0. ZBET(JI,JJ) = ZB(JI,JJ,IKB) ! bet = b(ikb) PVARP(JI,JJ,IKB) = ZY(JI,JJ,IKB) / ZBET(JI,JJ) !$mnh_end_do() +!$acc end kernels ! +!$acc parallel !$acc loop seq DO JK = IKB+1,IKE-1 +#ifdef MNH_COMPILER_CCE + !$acc loop independent +#endif !$mnh_do_concurrent(JI=1:JIU,JJ=1:JJU) ZGAM(JI,JJ,JK) = ZC(JI,JJ,JK-1) / ZBET(JI,JJ) ! gam(k) = c(k-1) / bet @@ -392,7 +397,9 @@ DO JK = IKB+1,IKE-1 ! res(k) = (y(k) -a(k)*res(k-1))/ bet !$mnh_end_do() END DO +!$acc end parallel ! special treatment for the last level +!$acc kernels !$mnh_do_concurrent(JI=1:JIU,JJ=1:JJU) ZGAM(JI,JJ,IKE) = ZC(JI,JJ,IKE-1) / ZBET(JI,JJ) ! gam(k) = c(k-1) / bet @@ -401,21 +408,28 @@ END DO PVARP(JI,JJ,IKE)= ( ZY(JI,JJ,IKE) - ZA(JI,JJ,IKE) * PVARP(JI,JJ,IKE-1) ) / ZBET(JI,JJ) ! res(k) = (y(k) -a(k)*res(k-1))/ bet !$mnh_end_do() +!$acc end kernels ! !* 3.3 going down ! ---------- ! +!$acc parallel !$acc loop seq DO JK = IKE-1,IKB,-1 +#ifdef MNH_COMPILER_CCE + !$acc loop independent +#endif !$mnh_do_concurrent(JI=1:JIU,JJ=1:JJU) PVARP(JI,JJ,JK) = PVARP(JI,JJ,JK) - ZGAM(JI,JJ,JK+1) * PVARP(JI,JJ,JK+1) !$mnh_end_do() END DO +!$acc end parallel ! ! !* 4. FILL THE UPPER AND LOWER EXTERNAL VALUES ! ---------------------------------------- ! +!$acc kernels !$mnh_do_concurrent(JI=1:JIU,JJ=1:JJU) PVARP(JI,JJ,IKB-1)=PVARP(JI,JJ,IKB) PVARP(JI,JJ,IKE+1)=0. diff --git a/src/MNH/tridiag_wind.f90 b/src/MNH/tridiag_wind.f90 index d0c16c3fb3b16ca54f69a215d3b08cb83a0d8997..58dd63f4d332bab728aabd3b79ea1c6838500d13 100644 --- a/src/MNH/tridiag_wind.f90 +++ b/src/MNH/tridiag_wind.f90 @@ -227,28 +227,28 @@ IKE=KKU-JPVEXT_TURB*KKL ! ! !$acc kernels ! async -DO CONCURRENT ( JI=1:JIU,JJ=1:JJU) +!$mnh_do_concurrent(JI=1:JIU,JJ=1:JJU) ZY(JI,JJ,IKB) = PVARM(JI,JJ,IKB) + PTSTEP*PSOURCE(JI,JJ,IKB) - & PEXPL / PRHODJA(JI,JJ,IKB) * PA(JI,JJ,IKB+KKL) * (PVARM(JI,JJ,IKB+KKL) - PVARM(JI,JJ,IKB)) -END DO !CONCURRENT +!$mnh_end_do() !CONCURRENT !$acc end kernels ! !$acc kernels ! async -DO CONCURRENT ( JI=1:JIU,JJ=1:JJU,JK=IKTB+1:IKTE-1) +!$mnh_do_concurrent(JI=1:JIU,JJ=1:JJU,JK=IKTB+1:IKTE-1) ZY(JI,JJ,JK)= PVARM(JI,JJ,JK) + PTSTEP*PSOURCE(JI,JJ,JK) - & PEXPL / PRHODJA(JI,JJ,JK) * & ( PVARM(JI,JJ,JK-KKL)*PA(JI,JJ,JK) & -PVARM(JI,JJ,JK)*(PA(JI,JJ,JK)+PA(JI,JJ,JK+KKL)) & +PVARM(JI,JJ,JK+KKL)*PA(JI,JJ,JK+KKL) & ) -END DO !CONCURRENT +!$mnh_end_do() !CONCURRENT !$acc end kernels ! !$acc kernels ! async -DO CONCURRENT ( JI=1:JIU,JJ=1:JJU) +!$mnh_do_concurrent(JI=1:JIU,JJ=1:JJU) ZY(JI,JJ,IKE)= PVARM(JI,JJ,IKE) + PTSTEP*PSOURCE(JI,JJ,IKE) + & PEXPL / PRHODJA(JI,JJ,IKE) * PA(JI,JJ,IKE) * (PVARM(JI,JJ,IKE)-PVARM(JI,JJ,IKE-KKL)) -END DO !CONCURRENT +!$mnh_end_do() !CONCURRENT !$acc end kernels ! ! acc wait @@ -262,22 +262,18 @@ IF ( PIMPL > 1.E-10 ) THEN ! ! going up ! - !$acc kernels -#ifdef MNH_COMPILER_NVHPC - !$acc loop independent collapse(2) -#endif - DO CONCURRENT ( JI=1:JIU,JJ=1:JJU) + !$acc kernels + !$mnh_do_concurrent(JI=1:JIU,JJ=1:JJU) ZBET(JI,JJ) = 1. - PIMPL * ( PA(JI,JJ,IKB+KKL) / PRHODJA(JI,JJ,IKB) & + PCOEFS(JI,JJ) * PTSTEP ) ! bet = b(ikb) PVARP(JI,JJ,IKB) = ZY(JI,JJ,IKB) / ZBET(JI,JJ) - END DO !CONCURRENT + !$mnh_end_do() !CONCURRENT + !$acc end kernels ! - !$acc loop seq - DO JK = IKB+KKL,IKE-KKL,KKL -#ifdef MNH_COMPILER_NVHPC - !$acc loop independent gang, vector collapse(2) -#endif - DO CONCURRENT ( JJ=1:JJU , JI=1:JIU ) + !$acc parallel + !$mnh_do_concurrent(JI=1:JIU,JJ=1:JJU) + !$acc loop seq + DO JK = IKB+KKL,IKE-KKL,KKL ZGAM(JI,JJ,JK) = PIMPL * PA(JI,JJ,JK) / PRHODJA(JI,JJ,JK-KKL) / ZBET(JI,JJ) ! gam(k) = c(k-1) / bet ZBET(JI,JJ) = 1. - PIMPL * ( PA(JI,JJ,JK) * (1. + ZGAM(JI,JJ,JK)) & @@ -288,13 +284,12 @@ IF ( PIMPL > 1.E-10 ) THEN * PVARP(JI,JJ,JK-KKL) & ) / ZBET(JI,JJ) ! res(k) = (y(k) -a(k)*res(k-1))/ bet - END DO ! CONCURRENT - END DO + END DO + !$mnh_end_do() ! CONCURRENT + !$acc end parallel + !$acc kernels ! special treatment for the last level -#ifdef MNH_COMPILER_NVHPC - !$acc loop independent gang, vector collapse(2) -#endif - DO CONCURRENT ( JI=1:JIU,JJ=1:JJU) + !$mnh_do_concurrent(JI=1:JIU,JJ=1:JJU) ZGAM(JI,JJ,IKE) = PIMPL * PA(JI,JJ,IKE) / PRHODJA(JI,JJ,IKE-KKL) / ZBET(JI,JJ) ! gam(k) = c(k-1) / bet ZBET(JI,JJ) = 1. - PIMPL * ( PA(JI,JJ,IKE) * (1. + ZGAM(JI,JJ,IKE)) & @@ -304,27 +299,26 @@ IF ( PIMPL > 1.E-10 ) THEN * PVARP(JI,JJ,IKE-KKL) & ) / ZBET(JI,JJ) ! res(k) = (y(k) -a(k)*res(k-1))/ bet - END DO !CONCURRENT + !$mnh_end_do() !CONCURRENT + !$acc end kernels ! ! going down ! + !$acc parallel + !$mnh_do_concurrent(JI=1:JIU,JJ=1:JJU) !$acc loop seq - DO JK = IKE-KKL,IKB,-1*KKL -#ifdef MNH_COMPILER_NVHPC - !$acc loop gang, vector collapse(2) -#endif - DO CONCURRENT ( JI=1:JIU,JJ=1:JJU) + DO JK = IKE-KKL,IKB,-1*KKL PVARP(JI,JJ,JK) = PVARP(JI,JJ,JK) - ZGAM(JI,JJ,JK+KKL) * PVARP(JI,JJ,JK+KKL) - END DO !CONCURRENT - END DO -!$acc end kernels + END DO + !$mnh_end_do() !CONCURRENT + !$acc end parallel ! ELSE ! !$acc kernels - DO CONCURRENT ( JI=1:JIU,JJ=1:JJU,JK=IKTB:IKTE) + !$mnh_do_concurrent(JI=1:JIU,JJ=1:JJU,JK=IKTB:IKTE) PVARP(JI,JJ,JK) = ZY(JI,JJ,JK) - END DO !CONCURRENT + !$mnh_end_do() !CONCURRENT !$acc end kernels ! END IF @@ -334,10 +328,10 @@ END IF ! ---------------------------------------- ! !$acc kernels -DO CONCURRENT ( JI=1:JIU,JJ=1:JJU) +!$mnh_do_concurrent(JI=1:JIU,JJ=1:JJU) PVARP(JI,JJ,KKA)=PVARP(JI,JJ,IKB) PVARP(JI,JJ,KKU)=PVARP(JI,JJ,IKE) -END DO !CONCURRENT +!$mnh_end_do() !CONCURRENT !$acc end kernels if ( mppdb_initialized ) then diff --git a/src/MNH/turb.f90 b/src/MNH/turb.f90 index 7ff9e29c6ac83295cab74bcf501e001167e185f5..abfde1c07e497aa2ca8f533e93754fe493cf832f 100644 --- a/src/MNH/turb.f90 +++ b/src/MNH/turb.f90 @@ -14,7 +14,7 @@ module mode_turb #if defined(MNH_BITREP) || defined(MNH_BITREP_OMP) use modi_bitrep #endif -#ifdef MNH_BITREP_OMP +#ifdef MNH_COMPILER_CCE !$mnh_undef(LOOP) !$mnh_undef(OPENACC) #endif @@ -712,15 +712,21 @@ GTURBLEN_BL89_TURBLEN_RM17_TURBLEN_ADAP_ORMC01 = & HTURBLEN=='BL89' .OR. HTURBLEN=='RM17' .OR. HTURBLEN == 'ADAP' .OR. ORMC01 ! !$acc update device(PTHLT,PRT) -!$acc kernels present_cr(ZCOEF_DISS,ZTHLM,ZRM) +!PASCAL +!!$acc kernels present_cr(ZCOEF_DISS,ZTHLM,ZRM,zcp) !Copy data into ZTHLM and ZRM only if needed IF (HTURBLEN=='BL89' .OR. HTURBLEN=='RM17' .OR. HTURBLEN=='ADAP' .OR. ORMC01) THEN - DO CONCURRENT(JI=1:JIU,JJ=1:JJU,JK=1:JKU) - ZTHLM(JI,JJ,JK) = PTHLT(JI,JJ,JK) - ZRM(JI,JJ,JK,:) = PRT(JI,JJ,JK,:) - END DO + !$acc kernels present_cr(ZTHLM,ZRM) + ZTHLM(:,:,:) = PTHLT(:,:,:) + ZRM(:,:,:,:) = PRT(:,:,:,:) + !DO CONCURRENT(JI=1:JIU,JJ=1:JJU,JK=1:JKU) + !ZTHLM(JI,JJ,JK) = PTHLT(JI,JJ,JK) + !ZRM(JI,JJ,JK,:) = PRT(JI,JJ,JK,:) + !END DO + !$acc end kernels END IF ! +!$acc kernels present_cr(ZTRH,ZCOEF_DISS,ZTHLM,ZRM,zcp) ZTRH(:, :, : ) = XUNDEF ! !---------------------------------------------------------------------------- @@ -805,7 +811,7 @@ IF (KRRL >=1) THEN CALL COMPUTE_FUNCTION_THERMO(XALPI,XBETAI,XGAMI,XLSTT,XCI,ZT,ZEXN,ZCP, & ZLSOCPEXNM,ZAMOIST_ICE,ZATHETA_ICE) ! -!$acc kernels present( zamoist, zatheta, zlocpexnm, zlvocpexnm, zlsocpexnm, zamoist_ice, zatheta_ice ) +!$acc kernels present_cr( zamoist, zatheta, zlocpexnm, zlvocpexnm, zlsocpexnm, zamoist_ice, zatheta_ice ) DO CONCURRENT(JI=1:JIU,JJ=1:JJU,JK=1:JKU) IF (PRT(JI,JJ,JK,2)+PRT(JI,JJ,JK,4)>0.0) THEN ZFRAC_ICE(JI,JJ,JK) = PRT(JI,JJ,JK,4) / ( PRT(JI,JJ,JK,2)+PRT(JI,JJ,JK,4) ) @@ -861,7 +867,7 @@ IF (KRRL >=1) THEN END IF ! ELSE -!$acc kernels present( zlocpexnm ) +!$acc kernels present_cr( zlocpexnm ) ZLOCPEXNM=0. !$acc end kernels END IF ! loop end on KRRL >= 1 @@ -870,7 +876,7 @@ END IF ! loop end on KRRL >= 1 ! !$acc update device(PRRS,PRTHLS) IF ( KRRL >= 1 ) THEN -!$acc kernels present( zlocpexnm ) +!$acc kernels present_cr( zlocpexnm ) IF ( KRRI >= 1 ) THEN DO CONCURRENT (JI=1:JIU,JJ=1:JJU,JK=1:JKU) ! Rnp at t @@ -907,7 +913,7 @@ SELECT CASE (HTURBLEN) ! ------------------ CASE ('BL89') -!$acc kernels +!$acc kernels present_cr(ZSHEAR) ZSHEAR(:, :, : ) = 0. !$acc end kernels CALL BL89(KKA,KKU,KKL,PZZ,PDZZ,PTHVREF,ZTHLM,KRR,ZRM,PTKET,ZSHEAR,PLEM) @@ -998,7 +1004,7 @@ END IF !* 3.6 Dissipative length ! ------------------ ! -!$acc kernels +!$acc kernels present_cr(ZLEPS) ZLEPS(:,:,:)=PLEM(:,:,:) ! !* 3.7 Correction in the Surface Boundary Layer (Redelsperger 2001) @@ -1064,7 +1070,7 @@ ENDIF ! CALL UPDATE_ROTATE_WIND(ZUSLOPE,ZVSLOPE) ELSE -!$acc kernels +!$acc kernels present_cr(ZUSLOPE,ZVSLOPE) ZUSLOPE=PUT(:,:,KKA) ZVSLOPE=PVT(:,:,KKA) !$acc end kernels @@ -1127,7 +1133,7 @@ IF (HTOM=='TM06') THEN CALL GZ_W_M_DEVICE(ZMTH2,PDZZ,ZFTH2) ! -d(w'th'2 )/dz !CALL GZ_W_M_DEVICE(ZMR2, PDZZ,ZFR2) ! -d(w'r'2 )/dz !CALL GZ_W_M_DEVICE(ZMTHR,PDZZ,ZFTHR) ! -d(w'th'r')/dz -!$acc kernels +!$acc kernels present_cr(ZFWTH,ZFTH2,ZFWR,ZFR2,ZFTHR) ZFWTH = -ZFWTH !ZFWR = -ZFWR ZFTH2 = -ZFTH2 @@ -1150,7 +1156,7 @@ IF (HTOM=='TM06') THEN ZFTHR = 0. !$acc end kernels ELSE -!$acc kernels +!$acc kernels present_cr(ZFWTH,ZFWR,ZFTH2,ZFR2,ZFTHR) ZFWTH(:,:,:) = 0. ZFWR(:,:,:) = 0. ZFTH2(:,:,:) = 0. @@ -1170,13 +1176,13 @@ if ( lbudget_w ) call Budget_store_init( tbudgets(NBUDGET_W ), 'VTURB', prws ( if ( lbudget_th ) then if ( krri >= 1 .and. krrl >= 1 ) then - !$acc kernels present(ZTEMP_BUD) + !$acc kernels present_cr(ZTEMP_BUD) ZTEMP_BUD(:,:,:) = prthls(:, :, :) + zlvocpexnm(:, :, :) * prrs(:, :, :, 2) & + zlsocpexnm(:, :, :) * prrs(:, :, :, 4) !$acc end kernels call Budget_store_init( tbudgets(NBUDGET_TH), 'VTURB', ZTEMP_BUD(:,:,:) ) else if ( krrl >= 1 ) then - !$acc kernels present( ZTEMP_BUD, zlocpexnm ) + !$acc kernels present_cr( ZTEMP_BUD, zlocpexnm ) ZTEMP_BUD(:,:,:) = prthls(:, :, :) + zlocpexnm(:, :, :) * prrs(:, :, :, 2) !$acc end kernels call Budget_store_init( tbudgets(NBUDGET_TH), 'VTURB', ZTEMP_BUD(:,:,:) ) @@ -1187,12 +1193,12 @@ end if if ( lbudget_rv ) then if ( krri >= 1 .and. krrl >= 1 ) then - !$acc kernels present(ZTEMP_BUD) + !$acc kernels present_cr(ZTEMP_BUD) ZTEMP_BUD(:,:,:) = prrs(:, :, :, 1) - prrs(:, :, :, 2) - prrs(:, :, :, 4) !$acc end kernels call Budget_store_init( tbudgets(NBUDGET_RV), 'VTURB', ZTEMP_BUD(:,:,:) ) else if ( krrl >= 1 ) then - !$acc kernels present(ZTEMP_BUD) + !$acc kernels present_cr(ZTEMP_BUD) ZTEMP_BUD(:,:,:) = prrs(:, :, :, 1) - prrs(:, :, :, 2) !$acc end kernels call Budget_store_init( tbudgets(NBUDGET_RV), 'VTURB', ZTEMP_BUD(:,:,:) ) @@ -1236,13 +1242,13 @@ if ( lbudget_w ) call Budget_store_end( tbudgets(NBUDGET_W), 'VTURB', prws(:, :, if ( lbudget_th ) then if ( krri >= 1 .and. krrl >= 1 ) then - !$acc kernels present(ZTEMP_BUD) + !$acc kernels present_cr(ZTEMP_BUD) ZTEMP_BUD(:,:,:) = prthls(:, :, :) + zlvocpexnm(:, :, :) * prrs(:, :, :, 2) & + zlsocpexnm(:, :, :) * prrs(:, :, :, 4) !$acc end kernels call Budget_store_end( tbudgets(NBUDGET_TH), 'VTURB', ZTEMP_BUD(:,:,:) ) else if ( krrl >= 1 ) then - !$acc kernels present(ZTEMP_BUD, zlocpexnm ) + !$acc kernels present_cr(ZTEMP_BUD, zlocpexnm ) ZTEMP_BUD(:,:,:) = prthls(:, :, :) + zlocpexnm(:, :, :) * prrs(:, :, :, 2) !$acc end kernels call Budget_store_end( tbudgets(NBUDGET_TH), 'VTURB', ZTEMP_BUD(:,:,:) ) @@ -1253,12 +1259,12 @@ end if if ( lbudget_rv ) then if ( krri >= 1 .and. krrl >= 1 ) then - !$acc kernels present(ZTEMP_BUD) + !$acc kernels present_cr(ZTEMP_BUD) ZTEMP_BUD(:,:,:) = prrs(:, :, :, 1) - prrs(:, :, :, 2) - prrs(:, :, :, 4) !$acc end kernels call Budget_store_end( tbudgets(NBUDGET_RV), 'VTURB', ZTEMP_BUD(:,:,:) ) else if ( krrl >= 1 ) then - !$acc kernels present(ZTEMP_BUD) + !$acc kernels present_cr(ZTEMP_BUD) ZTEMP_BUD(:,:,:) = prrs(:, :, :, 1) - prrs(:, :, :, 2) !$acc end kernels call Budget_store_end( tbudgets(NBUDGET_RV), 'VTURB', ZTEMP_BUD(:,:,:) ) @@ -1283,13 +1289,13 @@ if ( hturbdim == '3DIM' ) then if (lbudget_th) then if ( krri >= 1 .and. krrl >= 1 ) then - !$acc kernels present(ZTEMP_BUD) + !$acc kernels present_cr(ZTEMP_BUD) ZTEMP_BUD(:,:,:) = prthls(:, :, :) + zlvocpexnm(:, :, :) * prrs(:, :, :, 2) & + zlsocpexnm(:, :, :) * prrs(:, :, :, 4) !$acc end kernels call Budget_store_init( tbudgets(NBUDGET_TH), 'HTURB', ZTEMP_BUD(:,:,:) ) else if ( krrl >= 1 ) then - !$acc kernels present(ZTEMP_BUD, zlocpexnm ) + !$acc kernels present_cr(ZTEMP_BUD, zlocpexnm ) ZTEMP_BUD(:,:,:) = prthls(:, :, :) + zlocpexnm(:, :, :) * prrs(:, :, :, 2) !$acc end kernels call Budget_store_init( tbudgets(NBUDGET_TH), 'HTURB', ZTEMP_BUD(:,:,:) ) @@ -1300,12 +1306,12 @@ if ( hturbdim == '3DIM' ) then if ( lbudget_rv ) then if ( krri >= 1 .and. krrl >= 1 ) then - !$acc kernels present(ZTEMP_BUD) + !$acc kernels present_cr(ZTEMP_BUD) ZTEMP_BUD(:,:,:) = prrs(:, :, :, 1) - prrs(:, :, :, 2) - prrs(:, :, :, 4) !$acc end kernels call Budget_store_init( tbudgets(NBUDGET_RV), 'HTURB', ZTEMP_BUD(:,:,:) ) else if ( krrl >= 1 ) then - !$acc kernels present(ZTEMP_BUD) + !$acc kernels present_cr(ZTEMP_BUD) ZTEMP_BUD(:,:,:) = prrs(:, :, :, 1) - prrs(:, :, :, 2) !$acc end kernels call Budget_store_init( tbudgets(NBUDGET_RV), 'HTURB', ZTEMP_BUD(:,:,:) ) @@ -1345,13 +1351,13 @@ if ( hturbdim == '3DIM' ) then if ( lbudget_th ) then if ( krri >= 1 .and. krrl >= 1 ) then - !$acc kernels present(ZTEMP_BUD) + !$acc kernels present_cr(ZTEMP_BUD) ZTEMP_BUD(:,:,:) = prthls(:, :, :) + zlvocpexnm(:, :, :) * prrs(:, :, :, 2) & + zlsocpexnm(:, :, :) * prrs(:, :, :, 4) !$acc end kernels call Budget_store_end( tbudgets(NBUDGET_TH), 'HTURB', ZTEMP_BUD(:,:,:) ) else if ( krrl >= 1 ) then - !$acc kernels present(ZTEMP_BUD, zlocpexnm ) + !$acc kernels present_cr(ZTEMP_BUD, zlocpexnm ) ZTEMP_BUD(:,:,:) = prthls(:, :, :) + zlocpexnm(:, :, :) * prrs(:, :, :, 2) !$acc end kernels call Budget_store_end( tbudgets(NBUDGET_TH), 'HTURB', ZTEMP_BUD(:,:,:) ) @@ -1362,12 +1368,12 @@ if ( hturbdim == '3DIM' ) then if ( lbudget_rv ) then if ( krri >= 1 .and. krrl >= 1 ) then - !$acc kernels present(ZTEMP_BUD) + !$acc kernels present_cr(ZTEMP_BUD) ZTEMP_BUD(:,:,:) = prrs(:, :, :, 1) - prrs(:, :, :, 2) - prrs(:, :, :, 4) !$acc end kernels call Budget_store_end( tbudgets(NBUDGET_RV), 'HTURB', ZTEMP_BUD(:,:,:) ) else if ( krrl >= 1 ) then - !$acc kernels present(ZTEMP_BUD) + !$acc kernels present_cr(ZTEMP_BUD) ZTEMP_BUD(:,:,:) = prrs(:, :, :, 1) - prrs(:, :, :, 2) !$acc end kernels call Budget_store_end( tbudgets(NBUDGET_RV), 'HTURB', ZTEMP_BUD(:,:,:) ) @@ -1399,7 +1405,7 @@ end if PTHP = PTHP + XG / PTHVREF * MZF( PFLXZTHVMF ) #else CALL MZF_DEVICE( PFLXZTHVMF, ZTMP1_DEVICE ) -!$acc kernels +!$acc kernels present_cr(PTHP) PTHP(:,:,:) = PTHP(:,:,:) + XG / PTHVREF(:,:,:) * ZTMP1_DEVICE(:,:,:) !$acc end kernels #endif @@ -1481,7 +1487,7 @@ END IF ! IF ( KRRL >= 1 ) THEN IF ( KRRI >= 1 ) THEN -!$acc kernels +!$acc kernels present_cr(PRT,PRRS,PTHLT,PRTHLS) PRT(:,:,:,1) = PRT(:,:,:,1) - PRT(:,:,:,2) - PRT(:,:,:,4) PRRS(:,:,:,1) = PRRS(:,:,:,1) - PRRS(:,:,:,2) - PRRS(:,:,:,4) PTHLT(:,:,:) = PTHLT(:,:,:) + ZLVOCPEXNM(:,:,:) * PRT(:,:,:,2) & @@ -1499,7 +1505,7 @@ IF ( KRRL >= 1 ) THEN CALL MNH_MEM_RELEASE() #endif ELSE -!$acc kernels present( zlocpexnm ) +!$acc kernels present_cr(PRT,PRRS,PTHLT,PRTHLS, zlocpexnm ) PRT(:,:,:,1) = PRT(:,:,:,1) - PRT(:,:,:,2) PRRS(:,:,:,1) = PRRS(:,:,:,1) - PRRS(:,:,:,2) PTHLT(:,:,:) = PTHLT(:,:,:) + ZLOCPEXNM(:,:,:) * PRT(:,:,:,2) @@ -1533,7 +1539,7 @@ IF (LLES_CALL) THEN #ifndef MNH_OPENACC CALL LES_MEAN_SUBGRID((PSFU*PSFU+PSFV*PSFV)**0.25,X_LES_USTAR) #else -!$acc kernels +!$acc kernels present_cr(ZTMP1_DEVICE) ZTMP1_DEVICE(:,:,1) = (PSFU*PSFU+PSFV*PSFV)**0.25 !$acc end kernels CALL LES_MEAN_SUBGRID(ZTMP1_DEVICE(:,:,1),X_LES_USTAR) @@ -1579,17 +1585,17 @@ IF (LLES_CALL) THEN #else IF (HTURBDIM=="1DIM") THEN !$acc data copy(X_LES_SUBGRID_U2,X_LES_SUBGRID_V2,X_LES_SUBGRID_W2,X_LES_RES_ddz_Thl_SBG_W2) -!$acc kernels +!$acc kernels present_cr(ZTMP1_DEVICE) ZTMP1_DEVICE = 2./3.*PTKET !$acc end kernels CALL LES_MEAN_SUBGRID(ZTMP1_DEVICE,X_LES_SUBGRID_U2) -!$acc kernels +!$acc kernels present_cr(X_LES_SUBGRID_V2,X_LES_SUBGRID_W2) X_LES_SUBGRID_V2(:,:,:) = X_LES_SUBGRID_U2(:,:,:) X_LES_SUBGRID_W2(:,:,:) = X_LES_SUBGRID_U2(:,:,:) !$acc end kernels CALL GZ_M_W_DEVICE(KKA,KKU,KKL,PTHLT,PDZZ,ZTMP2_DEVICE) CALL MZF_DEVICE( ZTMP2_DEVICE, ZTMP3_DEVICE ) -!$acc kernels +!$acc kernels present_cr(ZTMP2_DEVICE) ZTMP2_DEVICE = ZTMP1_DEVICE*ZTMP3_DEVICE !$acc end kernels CALL LES_MEAN_SUBGRID(ZTMP2_DEVICE,X_LES_RES_ddz_Thl_SBG_W2) @@ -1598,7 +1604,7 @@ IF (LLES_CALL) THEN !$acc data copy(X_LES_RES_ddz_Rt_SBG_W2) CALL GZ_M_W_DEVICE(KKA,KKU,KKL,PRT(:,:,:,1),PDZZ,ZTMP2_DEVICE) CALL MZF_DEVICE( ZTMP2_DEVICE, ZTMP3_DEVICE ) -!$acc kernels +!$acc kernels present_cr(ZTMP2_DEVICE) ZTMP2_DEVICE = ZTMP1_DEVICE*PTKET*ZTMP3_DEVICE !$acc end kernels CALL LES_MEAN_SUBGRID(ZTMP2_DEVICE,X_LES_RES_ddz_Rt_SBG_W2) @@ -1608,7 +1614,7 @@ IF (LLES_CALL) THEN DO JSV=1,NSV CALL GZ_M_W_DEVICE(KKA,KKU,KKL,PSVT(:,:,:,JSV),PDZZ,ZTMP2_DEVICE) CALL MZF_DEVICE( ZTMP2_DEVICE, ZTMP3_DEVICE ) -!$acc kernels +!$acc kernels present_cr(ZTMP2_DEVICE) ZTMP2_DEVICE = ZTMP1_DEVICE*PTKET*ZTMP3_DEVICE !$acc end kernels CALL LES_MEAN_SUBGRID(ZTMP2_DEVICE,X_LES_RES_ddz_Sv_SBG_W2(:,:,:,JSV)) @@ -1627,7 +1633,7 @@ IF (LLES_CALL) THEN ! !* presso-correlations for subgrid Tke are equal to zero. ! -!$acc kernels +!$acc kernels present_cr(ZLEPS) ZLEPS = 0. !ZLEPS is used as a work array (not used anymore) !$acc end kernels CALL LES_MEAN_SUBGRID(ZLEPS,X_LES_SUBGRID_WP) @@ -1757,7 +1763,7 @@ geast = HLBCX(2) /= "CYCL" .AND. LEAST_ll() gsouth = HLBCY(1) /= "CYCL" .AND. LSOUTH_ll() gnorth = HLBCY(2) /= "CYCL" .AND. LNORTH_ll() -!$acc kernels +!$acc kernels present_cr(PUSLOPE) IF ( gwest ) THEN PUSLOPE(IIB-1,:)=PUSLOPE(IIB,:) PVSLOPE(IIB-1,:)=PVSLOPE(IIB,:) @@ -1847,13 +1853,13 @@ CALL MNH_MEM_GET( zrvsat , size( pexn, 1 ), size( pexn, 2 ), size( pexn, 3 ) ) CALL MNH_MEM_GET( zdrvsatdt, size( pexn, 1 ), size( pexn, 2 ), size( pexn, 3 ) ) #endif -!$acc data present( zrvsat, zdrvsatdt ) +!$acc data present_cr( zrvsat, zdrvsatdt ) ZEPS = XMV / XMD ! !* 1.1 Lv/Cph at t ! -!$acc kernels ! present(ZRVSAT,ZDRVSATDT) ! present(PLOCPEXN) ! present ZDRVSATDT) +!$acc kernels present_cr(PLOCPEXN) ! present(ZRVSAT,ZDRVSATDT) ! present(PLOCPEXN) ! present ZDRVSATDT) PLOCPEXN(:,:,:) = ( PLTT + (XCPV-PC) * (PT(:,:,:)-XTT) ) / PCP(:,:,:) ! !* 1.2 Saturation vapor pressure at t @@ -1885,9 +1891,9 @@ CALL MNH_MEM_GET( zdrvsatdt, size( pexn, 1 ), size( pexn, 2 ), size( pexn, 3 ) ) PAMOIST(:,:,:)= 0.5 / ( 1.0 + ZDRVSATDT(:,:,:) * PLOCPEXN(:,:,:) ) ! !$acc end kernels -!$acc kernels !* 1.6 compute Atheta #if !defined(MNH_BITREP) && !defined(MNH_BITREP_OMP) +!$acc kernels present_cr(PATHETA) PATHETA(:,:,:)= PAMOIST(:,:,:) * PEXN(:,:,:) * & ( ( ZRVSAT(:,:,:) - PRT(:,:,:,1) ) * PLOCPEXN(:,:,:) / & ( 1. + ZDRVSATDT(:,:,:) * PLOCPEXN(:,:,:) ) * & @@ -1899,7 +1905,9 @@ CALL MNH_MEM_GET( zdrvsatdt, size( pexn, 1 ), size( pexn, 2 ), size( pexn, 3 ) ) ) & - ZDRVSATDT(:,:,:) & ) +!$acc end kernels #else +!$acc kernels !$acc_nv loop independent collapse(3) DO CONCURRENT(JI=1:JIU,JJ=1:JJU,JK=1:JKU) PATHETA(JI,JJ,JK)= PAMOIST(JI,JJ,JK) * PEXN(JI,JJ,JK) * & @@ -1914,11 +1922,12 @@ DO CONCURRENT(JI=1:JIU,JJ=1:JJU,JK=1:JKU) - ZDRVSATDT(JI,JJ,JK) & ) ENDDO -#endif !$acc end kernels +#endif !* 1.7 Lv/Cph/Exner at t-1 ! -!$acc kernels present(PLOCPEXN) +!!$acc kernels present(PLOCPEXN) +!$acc kernels !$acc_nv loop independent collapse(3) DO CONCURRENT(JI=1:JIU,JJ=1:JJU,JK=1:JKU) PLOCPEXN(JI,JJ,JK) = PLOCPEXN(JI,JJ,JK) / PEXN(JI,JJ,JK) @@ -2048,8 +2057,10 @@ ELSE !* 3.1 BL89 mixing length ! ------------------ CASE ('BL89','RM17') -!$acc kernels present(ZSHEAR) +!$acc kernels present_cr(ZSHEAR) +!$mnh_expand_array(JI=1:JIU,JJ=1:JJU,JK=1:JKU) ZSHEAR(:, :, : ) = 0. +!$mnh_end_expand_array() !$acc end kernels CALL BL89(KKA,KKU,KKL,PZZ,PDZZ,PTHVREF,ZTHLM,KRR,ZRM,PTKET,ZSHEAR,ZLM_CLOUD) ! @@ -2226,7 +2237,7 @@ GOCEAN = LOCEAN !$acc present( ztmp1_device, ztmp2_device ) IF (ODZ) THEN -!$acc kernels +!$acc kernels present_cr(PLM) ! Dz is take into account in the computation DO JK = KKTB,KKTE ! 1D turbulence scheme PLM(:,:,JK) = PZZ(:,:,JK+KKL) - PZZ(:,:,JK) @@ -2240,7 +2251,7 @@ IF (ODZ) THEN PLM(:,:,:) = SQRT( PLM(:,:,:)*MXF(PDXX(:,:,:)) ) #else CALL MXF_DEVICE( PDXX, ZTMP1_DEVICE ) -!$acc kernels +!$acc kernels present_cr(PLM) PLM(:,:,:) = SQRT( PLM(:,:,:) * ZTMP1_DEVICE(:,:,:) ) !$acc end kernels #endif @@ -2254,21 +2265,23 @@ IF (ODZ) THEN #else CALL MXF_DEVICE( PDXX, ZTMP1_DEVICE ) CALL MYF_DEVICE( PDYY, ZTMP2_DEVICE ) -!$acc kernels #if !defined(MNH_BITREP) && !defined(MNH_BITREP_OMP) +!$acc kernels present_cr(PLM) PLM(:,:,:) = ( PLM(:,:,:) * ZTMP1_DEVICE(:,:,:) * ZTMP2_DEVICE(:,:,:) ) ** (1./3.) +!$acc end kernels #else +!$acc kernels DO CONCURRENT ( JI=1:JIU,JJ=1:JJU,JK=1:JKU) PLM(JI,JJ,JK) = BR_POW( PLM(JI,JJ,JK) * ZTMP1_DEVICE(JI,JJ,JK) * ZTMP2_DEVICE(JI,JJ,JK), 1./3. ) ENDDO -#endif !$acc end kernels +#endif #endif END IF END IF ELSE ! Dz not taken into account in computation to assure invariability with vertical grid mesh -!$acc kernels +!$acc kernels present_cr(PLM) PLM(:,:,:)=1.E10 !$acc end kernels IF ( HTURBDIM /= '1DIM' ) THEN ! 3D turbulence scheme @@ -2509,7 +2522,7 @@ CALL MNH_MEM_GET( ZTMP2_DEVICE, JIU, JJU, JKU ) ! ! initialize the mixing length with the mesh grid -!$acc kernels +!$acc kernels present_cr(PLM) ! 1D turbulence scheme PLM(:,:,KKTB:KKTE) = PZZ(:,:,KKTB+KKL:KKTE+KKL) - PZZ(:,:,KKTB:KKTE) PLM(:,:,KKU) = PLM(:,:,KKE) @@ -2521,7 +2534,7 @@ IF ( HTURBDIM /= '1DIM' ) THEN ! 3D turbulence scheme PLM(:,:,:) = SQRT( PLM(:,:,:)*MXF(PDXX(:,:,:)) ) #else CALL MXF_DEVICE(PDXX,ZTMP1_DEVICE) -!$acc kernels +!$acc kernels present_cr(PLM) PLM(:,:,:) = SQRT( PLM(:,:,:)*ZTMP1_DEVICE ) !$acc end kernels if ( mppdb_initialized ) then @@ -2540,7 +2553,7 @@ IF ( HTURBDIM /= '1DIM' ) THEN ! 3D turbulence scheme #else CALL MXF_DEVICE(PDXX,ZTMP1_DEVICE) CALL MYF_DEVICE(PDYY,ZTMP2_DEVICE) -!$acc kernels +!$acc kernels present_cr(PLM) PLM(:,:,:) = (PLM(:,:,:)*ZTMP1_DEVICE*ZTMP2_DEVICE ) ** (1./3.) !$acc end kernels if ( mppdb_initialized ) then diff --git a/src/MNH/turb_hor_dyn_corr.f90 b/src/MNH/turb_hor_dyn_corr.f90 index 3472c9da381ad65a7bc6a6e6d6d5a24db745c214..d504591e9f6bfcf6b684fa7f536dbb7ad55debaa 100644 --- a/src/MNH/turb_hor_dyn_corr.f90 +++ b/src/MNH/turb_hor_dyn_corr.f90 @@ -175,7 +175,7 @@ USE MODE_MPPDB #if defined(MNH_BITREP) || defined(MNH_BITREP_OMP) USE MODI_BITREP #endif -#ifdef MNH_BITREP_OMP +#ifdef MNH_COMPILER_CCE !$mnh_undef(LOOP) !$mnh_undef(OPENACC) #endif @@ -395,7 +395,8 @@ IKU = SIZE(PUM,3) ! ! !$acc kernels async(1) -#if !defined(MNH_BITREP) && !defined(MNH_BITREP_OMP) +!if !defined(MNH_BITREP) && !defined(MNH_BITREP_OMP) +#if !defined(MNH_BITREP) ZDIRSINZW(:,:) = SQRT( 1. - PDIRCOSZW(:,:)**2 ) #else !$mnh_expand_array(JI=1:JIU,JJ=1:JJU ) @@ -619,7 +620,8 @@ CALL MYF_DEVICE(PDYY(:,:,IKB:IKB),ZTMP4_DEVICE(:,:,1:1)) !$acc wait(1) ! !$acc kernels async(4) present_cr(ZFLX,ZDIRSINZW) -#if !defined(MNH_BITREP) && !defined(MNH_BITREP_OMP) +!if !defined(MNH_BITREP) && !defined(MNH_BITREP_OMP) +#if !defined(MNH_BITREP) ZFLX(:,:,IKB-1) = & PTAU11M(:,:) * PCOSSLOPE(:,:)**2 * PDIRCOSZW(:,:)**2 & -2. * PTAU12M(:,:) * PCOSSLOPE(:,:)* PSINSLOPE(:,:) * PDIRCOSZW(:,:) & @@ -779,7 +781,7 @@ IF (LLES_CALL .AND. KSPLT==1) THEN !!! wait for the computation of ZWORK and PDP !$acc wait(2) ! - !$acc kernels + !$acc kernels present_cr(ZTMP1_DEVICE) ZTMP1_DEVICE(:,:,:) = -ZWORK(:,:,:) !$acc end kernels CALL LES_MEAN_SUBGRID( ZTMP1_DEVICE, X_LES_RES_ddxa_U_SBG_UaU , .TRUE.) @@ -844,7 +846,8 @@ ZFLX(:,:,IKE+1) = ZFLX(:,:,IKE) ! ! extrapolates this flux under the ground with the surface flux !$acc kernels async(3) present_cr(ZFLX) -#if !defined(MNH_BITREP) && !defined(MNH_BITREP_OMP) +!if !defined(MNH_BITREP) && !defined(MNH_BITREP_OMP) +#if !defined(MNH_BITREP) ZFLX(:,:,IKB-1) = & PTAU11M(:,:) * PSINSLOPE(:,:)**2 * PDIRCOSZW(:,:)**2 & +2. * PTAU12M(:,:) * PCOSSLOPE(:,:)* PSINSLOPE(:,:) * PDIRCOSZW(:,:) & @@ -1068,7 +1071,8 @@ ZFLX(:,:,IKE+1)= ZFLX(:,:,IKE) ! (-2./3.) * PTP(:,:,IKB:IKB) ! extrapolates this flux under the ground with the surface flux !$acc kernels async(3) present_cr(ZFLX) -#if !defined(MNH_BITREP) && !defined(MNH_BITREP_OMP) +!if !defined(MNH_BITREP) && !defined(MNH_BITREP_OMP) +#if !defined(MNH_BITREP) ZFLX(:,:,IKB-1) = & PTAU11M(:,:) * ZDIRSINZW(:,:)**2 & + PTAU33M(:,:) * PDIRCOSZW(:,:)**2 & diff --git a/src/MNH/turb_hor_sv_flux.f90 b/src/MNH/turb_hor_sv_flux.f90 index bd352dcef38aa6f2317a283bfe9f001bdf6e6128..9cf51e163bbca854699e59e3eb926b34372c548c 100644 --- a/src/MNH/turb_hor_sv_flux.f90 +++ b/src/MNH/turb_hor_sv_flux.f90 @@ -310,7 +310,7 @@ DO JSV=1,ISV #else CALL MXM_DEVICE(PK,ZTMP1_DEVICE) CALL GX_M_U_DEVICE(1,IKU,1,PSVM(:,:,:,JSV),PDXX,PDZZ,PDZX,ZTMP2_DEVICE) -!$acc kernels +!$acc kernels present_cr(ZFLXX) ZFLXX(:,:,:) = -ZCSV * ZTMP1_DEVICE * ZTMP2_DEVICE #endif ZFLXX(:,:,IKE+1) = ZFLXX(:,:,IKE) @@ -330,13 +330,13 @@ DO JSV=1,ISV #else CALL MXM_DEVICE( PK(:,:,IKB:IKB),ZTMP1_DEVICE(:,:,1:1) ) CALL DXM_DEVICE(PSVM(:,:,IKB:IKB,JSV),ZTMP2_DEVICE(:,:,1:1)) -!$acc kernels +!$acc kernels present_cr(ZTMP3_DEVICE) ZTMP3_DEVICE(:,:,1) = ZCOEFF(:,:,IKB+2)*PSVM(:,:,IKB+2,JSV) & +ZCOEFF(:,:,IKB+1)*PSVM(:,:,IKB+1,JSV) & +ZCOEFF(:,:,IKB)*PSVM(:,:,IKB,JSV) !$acc end kernels CALL MXM_DEVICE( ZTMP3_DEVICE(:,:,1:1), ZTMP4_DEVICE(:,:,1:1) ) -!$acc kernels +!$acc kernels present_cr(ZFLXX) ZFLXX(:,:,IKB) = -ZCSV * ZTMP1_DEVICE(:,:,1) * & ( ZTMP2_DEVICE(:,:,1) * PINV_PDXX(:,:,IKB) & - ZTMP4_DEVICE(:,:,1) * 0.5 * ( PDZX(:,:,IKB+1)+PDZX(:,:,IKB) ) & @@ -346,14 +346,14 @@ DO JSV=1,ISV #endif ! extrapolates the flux under the ground so that the vertical average with ! the IKB flux gives the ground value -!$acc kernels +!$acc kernels present_cr(ZWORK2D) ZWORK2D(:,:,1)=PSFSVM(:,:,JSV) * PDIRCOSXW(:,:) !$acc end kernels #ifndef MNH_OPENACC ZFLXX(:,:,IKB-1:IKB-1) = 2. * MXM( ZWORK2D(:,:,1:1) ) - ZFLXX(:,:,IKB:IKB) #else CALL MXM_DEVICE( ZWORK2D(:,:,1:1),ZTMP1_DEVICE(:,:,1:1) ) -!$acc kernels +!$acc kernels present_cr(ZFLXX) ZFLXX(:,:,IKB-1) = 2. * ZTMP1_DEVICE(:,:,1) - ZFLXX(:,:,IKB) !$acc end kernels #endif @@ -391,7 +391,7 @@ DO JSV=1,ISV ! CALL GX_W_UW_DEVICE(PWM,PDXX,PDZZ,PDZX,ZTMP1_DEVICE) CALL MZM_DEVICE(ZFLXX,ZTMP2_DEVICE) - !$acc kernels + !$acc kernels present_cr(ZTMP3_DEVICE) ZTMP3_DEVICE = ZTMP1_DEVICE*ZTMP2_DEVICE !$acc end kernels CALL MXF_DEVICE(ZTMP3_DEVICE,ZTMP1_DEVICE) @@ -400,7 +400,7 @@ DO JSV=1,ISV ! CALL GX_M_M_DEVICE(PSVM(:,:,:,JSV),PDXX,PDZZ,PDZX,ZTMP1_DEVICE) CALL MXF_DEVICE(ZFLXX,ZTMP2_DEVICE) - !$acc kernels + !$acc kernels present_cr(ZTMP3_DEVICE) ZTMP3_DEVICE = ZTMP1_DEVICE*ZTMP2_DEVICE !$acc end kernels CALL LES_MEAN_SUBGRID( ZTMP3_DEVICE, X_LES_RES_ddxa_Sv_SBG_UaSv(:,:,:,JSV), .TRUE. ) @@ -422,7 +422,7 @@ DO JSV=1,ISV #else CALL MYM_DEVICE(PK,ZTMP1_DEVICE) CALL GY_M_V_DEVICE(1,IKU,1,PSVM(:,:,:,JSV),PDYY,PDZZ,PDZY,ZTMP2_DEVICE) - !$acc kernels + !$acc kernels present_cr(ZFLXY) ZFLXY(:,:,:)=-ZCSV * ZTMP1_DEVICE * ZTMP2_DEVICE #endif ZFLXY(:,:,IKE+1) = ZFLXY(:,:,IKE) @@ -443,13 +443,13 @@ DO JSV=1,ISV #else CALL MYM_DEVICE( PK(:,:,IKB:IKB), ZTMP1_DEVICE(:,:,1:1) ) CALL DYM_DEVICE( PSVM(:,:,IKB:IKB,JSV), ZTMP2_DEVICE(:,:,1:1) ) - !$acc kernels + !$acc kernels present_cr(ZTMP3_DEVICE) ZTMP3_DEVICE(:,:,1) = ZCOEFF(:,:,IKB+2)*PSVM(:,:,IKB+2,JSV) & +ZCOEFF(:,:,IKB+1)*PSVM(:,:,IKB+1,JSV) & +ZCOEFF(:,:,IKB)*PSVM(:,:,IKB,JSV) !$acc end kernels CALL MYM_DEVICE( ZTMP3_DEVICE(:,:,1:1), ZTMP4_DEVICE(:,:,1:1) ) - !$acc kernels + !$acc kernels present_cr(ZFLXY) ZFLXY(:,:,IKB) = -ZCSV * ZTMP1_DEVICE(:,:,1) * & ( ZTMP2_DEVICE(:,:,1) * PINV_PDYY(:,:,IKB) & - ZTMP4_DEVICE(:,:,1) * 0.5 * ( PDZY(:,:,IKB+1)+PDZY(:,:,IKB) ) & @@ -459,14 +459,14 @@ DO JSV=1,ISV #endif ! extrapolates the flux under the ground so that the vertical average with ! the IKB flux gives the ground value -!$acc kernels +!$acc kernels present_cr(ZWORK2D) ZWORK2D(:,:,1)=PSFSVM(:,:,JSV) * PDIRCOSYW(:,:) !$acc end kernels #ifndef MNH_OPENACC ZFLXY(:,:,IKB-1:IKB-1) = 2. * MYM( ZWORK2D(:,:,1:1) ) - ZFLXY(:,:,IKB:IKB) #else CALL MYM_DEVICE( ZWORK2D(:,:,1:1), ZTMP1_DEVICE(:,:,1:1) ) - !$acc kernels + !$acc kernels present_cr(ZFLXY) ZFLXY(:,:,IKB-1) = 2. * ZTMP1_DEVICE(:,:,1) - ZFLXY(:,:,IKB) !$acc end kernels #endif @@ -510,7 +510,7 @@ DO JSV=1,ISV ! CALL GY_W_VW_DEVICE(PWM,PDYY,PDZZ,PDZY,ZTMP1_DEVICE) CALL MZM_DEVICE(ZFLXY,ZTMP2_DEVICE) - !$acc kernels + !$acc kernels present_cr(ZTMP3_DEVICE) ZTMP3_DEVICE = ZTMP1_DEVICE*ZTMP2_DEVICE !$acc end kernels CALL MYF_DEVICE(ZTMP3_DEVICE,ZTMP1_DEVICE) @@ -519,7 +519,7 @@ DO JSV=1,ISV ! CALL GY_M_M_DEVICE(PSVM(:,:,:,JSV),PDYY,PDZZ,PDZY,ZTMP1_DEVICE) CALL MYF_DEVICE(ZFLXY,ZTMP2_DEVICE) - !$acc kernels + !$acc kernels present_cr(ZTMP3_DEVICE) ZTMP3_DEVICE = ZTMP1_DEVICE*ZTMP2_DEVICE !$acc end kernels CALL LES_MEAN_SUBGRID( ZTMP3_DEVICE, X_LES_RES_ddxa_Sv_SBG_UaSv(:,:,:,JSV) , .TRUE. ) @@ -562,64 +562,64 @@ DO JSV=1,ISV END IF #else CALL MXM_DEVICE(PRHODJ,ZTMP1_DEVICE) - !$acc kernels + !$acc kernels present_cr(ZTMP2_DEVICE) ZTMP2_DEVICE = ZTMP1_DEVICE * ZFLXX * PINV_PDXX !$acc end kernels CALL DXF_DEVICE( ZTMP2_DEVICE, ZTMP1_DEVICE ) IF (.NOT. L2D) THEN CALL MYM_DEVICE(PRHODJ,ZTMP3_DEVICE) - !$acc kernels + !$acc kernels present_cr(ZTMP4_DEVICE) ZTMP4_DEVICE = ZTMP1_DEVICE * ZFLXY * PINV_PDYY !$acc end kernels CALL DYF_DEVICE( ZTMP4_DEVICE, ZTMP2_DEVICE ) IF (.NOT. LFLAT) THEN - !$acc kernels + !$acc kernels present_cr(ZTMP3_DEVICE) ZTMP3_DEVICE = ZFLXX * PINV_PDXX !$acc end kernels CALL MZM_DEVICE(ZTMP3_DEVICE,ZTMP4_DEVICE) - !$acc kernels + !$acc kernels present_cr(ZTMP3_DEVICE) ZTMP3_DEVICE = ZTMP4_DEVICE * PDZX !$acc end kernels CALL MXF_DEVICE( ZTMP3_DEVICE, ZTMP4_DEVICE ) - !$acc kernels + !$acc kernels present_cr(ZTMP3_DEVICE) ZTMP3_DEVICE = ZFLXY * PINV_PDYY !$acc end kernels CALL MZM_DEVICE(ZTMP3_DEVICE,ZTMP5_DEVICE) - !$acc kernels + !$acc kernels present_cr(ZTMP3_DEVICE) ZTMP3_DEVICE = ZTMP5_DEVICE * PDZY !$acc end kernels CALL MYF_DEVICE( ZTMP3_DEVICE,ZTMP5_DEVICE ) - !$acc kernels + !$acc kernels present_cr(ZTMP3_DEVICE) ZTMP3_DEVICE = PMZM_PRHODJ * PINV_PDZZ * ( ZTMP4_DEVICE + ZTMP5_DEVICE ) !$acc end kernels CALL DZF_DEVICE( ZTMP3_DEVICE, ZTMP4_DEVICE ) - !$acc kernels + !$acc kernels present_cr(PRSVS) PRSVS(:,:,:,JSV) = PRSVS(:,:,:,JSV) - ZTMP1_DEVICE - ZTMP2_DEVICE + ZTMP4_DEVICE !$acc end kernels ELSE - !$acc kernels + !$acc kernels present_cr(PRSVS) PRSVS(:,:,:,JSV) = PRSVS(:,:,:,JSV) - ZTMP1_DEVICE - ZTMP2_DEVICE !$acc end kernels END IF ELSE IF (.NOT. LFLAT) THEN - !$acc kernels + !$acc kernels present_cr(ZTMP3_DEVICE) ZTMP3_DEVICE = ZFLXX * PINV_PDXX !$acc end kernels CALL MZM_DEVICE(ZTMP3_DEVICE,ZTMP4_DEVICE) - !$acc kernels + !$acc kernels present_cr(ZTMP3_DEVICE) ZTMP3_DEVICE = ZTMP4_DEVICE * PDZX !$acc end kernels CALL MXF_DEVICE( ZTMP3_DEVICE, ZTMP4_DEVICE ) - !$acc kernels + !$acc kernels present_cr(ZTMP3_DEVICE) ZTMP3_DEVICE = PMZM_PRHODJ * PINV_PDZZ * ZTMP4_DEVICE !$acc end kernels CALL DZF_DEVICE( ZTMP3_DEVICE, ZTMP2_DEVICE) - !$acc kernels + !$acc kernels present_cr(PRSVS) PRSVS(:,:,:,JSV) = PRSVS(:,:,:,JSV) - ZTMP1_DEVICE + ZTMP2_DEVICE !$acc end kernels ELSE - !$acc kernels + !$acc kernels present_cr(PRSVS) PRSVS(:,:,:,JSV) = PRSVS(:,:,:,JSV) - ZTMP1_DEVICE !$acc end kernels END IF diff --git a/src/MNH/turb_hor_thermo_corr.f90 b/src/MNH/turb_hor_thermo_corr.f90 index 3c4d4877355864790db10e39043841e4ddcf0806..26005448572dfd181b3586af97b106ff147b390f 100644 --- a/src/MNH/turb_hor_thermo_corr.f90 +++ b/src/MNH/turb_hor_thermo_corr.f90 @@ -301,7 +301,7 @@ IKU = NKMAX + 2 * JPVEXT ! ! compute the coefficients for the uncentred gradient computation near the ! ground -!$acc kernels +!$acc kernels present_cr(ZCOEFF) ZCOEFF(:,:,IKB+2)= - PDZZ(:,:,IKB+1) / & ( (PDZZ(:,:,IKB+2)+PDZZ(:,:,IKB+1)) * PDZZ(:,:,IKB+2) ) ZCOEFF(:,:,IKB+1)= (PDZZ(:,:,IKB+2)+PDZZ(:,:,IKB+1)) / & @@ -343,20 +343,24 @@ IF ( ( KRRL > 0 .AND. OSUBG_COND) .OR. ( OTURB_FLX .AND. tpfile%lopened ) & IF (.NOT. L2D) THEN CALL GX_M_M_DEVICE(PTHLM,PDXX,PDZZ,PDZX,ZTMP1_DEVICE) CALL GY_M_M_DEVICE(PTHLM,PDYY,PDZZ,PDZY,ZTMP2_DEVICE) -!$acc kernels +!$acc kernels present_cr(ZFLX) #ifndef MNH_BITREP ZFLX(:,:,:) = XCTV * PLM(:,:,:) * PLEPS(:,:,:) * ( ZTMP1_DEVICE(:,:,:)**2 + ZTMP2_DEVICE(:,:,:)**2 ) #else - ZFLX(:,:,:) = XCTV * PLM(:,:,:) * PLEPS(:,:,:) * ( BR_P2(ZTMP1_DEVICE) + BR_P2(ZTMP2_DEVICE) ) +!$mnh_expand_array(JI=1:JIU,JJ=1:JJU,JK=1:JKU) + ZFLX(:,:,:) = XCTV * PLM(:,:,:) * PLEPS(:,:,:) * ( BR_P2(ZTMP1_DEVICE(:,:,:)) + BR_P2(ZTMP2_DEVICE(:,:,:)) ) +!$mnh_end_expand_array() #endif !$acc end kernels ELSE CALL GX_M_M_DEVICE(PTHLM,PDXX,PDZZ,PDZX,ZTMP1_DEVICE) -!$acc kernels +!$acc kernels present_cr(ZFLX) #ifndef MNH_BITREP ZFLX(:,:,:) = XCTV * PLM(:,:,:) * PLEPS(:,:,:) * ZTMP1_DEVICE(:,:,:)**2 #else - ZFLX(:,:,:) = XCTV * PLM(:,:,:) * PLEPS(:,:,:) * BR_P2(ZTMP1_DEVICE) +!$mnh_expand_array(JI=1:JIU,JJ=1:JJU,JK=1:JKU) + ZFLX(:,:,:) = XCTV * PLM(:,:,:) * PLEPS(:,:,:) * BR_P2(ZTMP1_DEVICE(:,:,:)) +!$mnh_end_expand_array() #endif !$acc end kernels END IF @@ -384,20 +388,21 @@ IF ( ( KRRL > 0 .AND. OSUBG_COND) .OR. ( OTURB_FLX .AND. tpfile%lopened ) & ) ** 2 ) #else CALL DXM_DEVICE(PTHLM(:,:,IKB:IKB), ZTMP1_DEVICE(:,:,1:1)) -!$acc kernels +!$acc kernels present_cr(ZTMP2_DEVICE) ZTMP2_DEVICE(:,:,1) = ZTMP1_DEVICE(:,:,1) * PINV_PDXX(:,:,IKB) !$acc end kernels CALL MXF_DEVICE(ZTMP2_DEVICE(:,:,1:1), ZTMP3_DEVICE(:,:,1:1)) CALL MXF_DEVICE(PDXX(:,:,IKB:IKB), ZTMP4_DEVICE(:,:,1:1)) ! CALL DYM_DEVICE(PTHLM(:,:,IKB:IKB), ZTMP1_DEVICE(:,:,1:1)) -!$acc kernels +!$acc kernels present_cr(ZTMP2_DEVICE) ZTMP2_DEVICE(:,:,1) = ZTMP1_DEVICE(:,:,1) * PINV_PDYY(:,:,IKB) !$acc end kernels CALL MYF_DEVICE(ZTMP2_DEVICE(:,:,1:1), ZTMP1_DEVICE(:,:,1:1)) CALL MYF_DEVICE(PDYY(:,:,IKB:IKB), ZTMP2_DEVICE(:,:,1:1)) ! -!$acc kernels +!$acc kernels present_cr(ZFLX) +!$mnh_expand_array(JI=1:JIU,JJ=1:JJU) ZFLX(:,:,IKB) = XCTV * PLM(:,:,IKB) & * PLEPS(:,:,IKB) * ( & ( ZTMP3_DEVICE(:,:,1) & @@ -414,12 +419,15 @@ IF ( ( KRRL > 0 .AND. OSUBG_COND) .OR. ( OTURB_FLX .AND. tpfile%lopened ) & ) * 0.5 * ( PDZY(:,:,IKB+1)+PDZY(:,:,IKB) ) & / ZTMP2_DEVICE(:,:,1) & ) ** 2 ) +!$mnh_end_expand_array() #endif ! ZFLX(:,:,IKB-1) = ZFLX(:,:,IKB) ! IF ( KRRL > 0 ) THEN +!$mnh_expand_array(JI=1:JIU,JJ=1:JJU,JK=1:JKU) ZWORK(:,:,:) = ZFLX(:,:,:) * PATHETA(:,:,:) * PATHETA(:,:,:) +!$mnh_end_expand_array() END IF !$acc end kernels ! @@ -457,23 +465,23 @@ IF ( ( KRRL > 0 .AND. OSUBG_COND) .OR. ( OTURB_FLX .AND. tpfile%lopened ) & CALL LES_MEAN_SUBGRID( ZFLX, X_LES_SUBGRID_Thl2, .TRUE. ) ! CALL MZF_DEVICE( PWM, ZTMP1_DEVICE) -!$acc kernels +!$acc kernels present_cr(ZTMP2_DEVICE) ZTMP2_DEVICE(:,:,:) = ZTMP1_DEVICE(:,:,:)*ZFLX(:,:,:) !$acc end kernels CALL LES_MEAN_SUBGRID( ZTMP2_DEVICE, X_LES_RES_W_SBG_Thl2, .TRUE. ) ! -!$acc kernels +!$acc kernels present_cr(ZTMP1_DEVICE) ZTMP1_DEVICE(:,:,:) = -2.*XCTD*SQRT(PTKEM(:,:,:))*ZFLX(:,:,:)/PLEPS(:,:,:) !$acc end kernels CALL LES_MEAN_SUBGRID( ZTMP1_DEVICE ,X_LES_SUBGRID_DISS_Thl2, .TRUE. ) ! CALL ETHETA(KRR,KRRI,PTHLM,PRM,PLOCPEXNM,PATHETA,PSRCM, ZA(:,:,:)) -!$acc kernels +!$acc kernels present_cr(ZTMP1_DEVICE) ZTMP1_DEVICE(:,:,:) = ZA(:,:,:)*ZFLX(:,:,:) !$acc end kernels CALL LES_MEAN_SUBGRID( ZTMP1_DEVICE, X_LES_SUBGRID_ThlThv, .TRUE. ) ! -!$acc kernels +!$acc kernels present_cr(ZTMP1_DEVICE) ZTMP1_DEVICE(:,:,:) = -XG/PTHVREF(:,:,:)/3.*ZA(:,:,:)*ZFLX(:,:,:) !$acc end kernels CALL LES_MEAN_SUBGRID( ZTMP1_DEVICE, X_LES_SUBGRID_ThlPz, .TRUE. ) @@ -518,7 +526,7 @@ IF ( ( KRRL > 0 .AND. OSUBG_COND) .OR. ( OTURB_FLX .AND. tpfile%lopened ) & ELSE CALL GX_M_M_DEVICE(PTHLM ,PDXX,PDZZ,PDZX,ZTMP1_DEVICE) CALL GX_M_M_DEVICE(PRM(:,:,:,1),PDXX,PDZZ,PDZX,ZTMP2_DEVICE) -!$acc kernels +!$acc kernels present_cr(ZFLX) ZFLX(:,:,:)=PLM(:,:,:) * PLEPS(:,:,:) * (ZTMP1_DEVICE(:,:,:)*ZTMP2_DEVICE(:,:,:)) * (XCHT1+XCHT2) !$acc end kernels END IF @@ -559,32 +567,32 @@ IF ( ( KRRL > 0 .AND. OSUBG_COND) .OR. ( OTURB_FLX .AND. tpfile%lopened ) & ) ) #else CALL DXM_DEVICE(PTHLM(:,:,IKB:IKB), ZTMP1_DEVICE(:,:,1:1)) -!$acc kernels +!$acc kernels present_cr(ZTMP2_DEVICE) ZTMP2_DEVICE(:,:,1) = ZTMP1_DEVICE(:,:,1)* PINV_PDXX(:,:,IKB) !$acc end kernels CALL MXF_DEVICE( ZTMP2_DEVICE(:,:,1:1), ZTMP3_DEVICE(:,:,1:1) ) CALL MXF_DEVICE(PDXX(:,:,IKB:IKB),ZTMP4_DEVICE(:,:,1:1)) ! CALL DXM_DEVICE(PRM(:,:,IKB:IKB,1), ZTMP1_DEVICE(:,:,1:1)) -!$acc kernels +!$acc kernels present_cr(ZTMP2_DEVICE) ZTMP2_DEVICE(:,:,1) = ZTMP1_DEVICE(:,:,1)* PINV_PDXX(:,:,IKB) !$acc end kernels CALL MXF_DEVICE( ZTMP2_DEVICE(:,:,1:1), ZTMP5_DEVICE(:,:,1:1) ) ! CALL DYM_DEVICE(PTHLM(:,:,IKB:IKB), ZTMP1_DEVICE(:,:,1:1)) -!$acc kernels +!$acc kernels present_cr(ZTMP2_DEVICE) ZTMP2_DEVICE(:,:,1) = ZTMP1_DEVICE(:,:,1)* PINV_PDYY(:,:,IKB) !$acc end kernels CALL MYF_DEVICE( ZTMP2_DEVICE(:,:,1:1), ZTMP6_DEVICE(:,:,1:1) ) CALL MYF_DEVICE(PDYY(:,:,IKB:IKB),ZTMP7_DEVICE(:,:,1:1)) ! CALL DYM_DEVICE(PRM(:,:,IKB:IKB,1), ZTMP1_DEVICE(:,:,1:1)) -!$acc kernels +!$acc kernels present_cr(ZTMP2_DEVICE) ZTMP2_DEVICE(:,:,1) = ZTMP1_DEVICE(:,:,1)* PINV_PDYY(:,:,IKB) !$acc end kernels CALL MYF_DEVICE( ZTMP2_DEVICE(:,:,1:1), ZTMP8_DEVICE(:,:,1:1) ) ! -!$acc kernels +!$acc kernels present_cr(ZFLX) ZFLX(:,:,IKB) = (XCHT1+XCHT2) * PLM(:,:,IKB) & * PLEPS(:,:,IKB) * ( & ( ZTMP3_DEVICE(:,:,1) & @@ -620,8 +628,10 @@ IF ( ( KRRL > 0 .AND. OSUBG_COND) .OR. ( OTURB_FLX .AND. tpfile%lopened ) & ZFLX(:,:,IKB-1) = ZFLX(:,:,IKB) ! IF ( KRRL > 0 ) THEN +!$mnh_expand_array(JI=1:JIU,JJ=1:JJU,JK=1:JKU) ZWORK(:,:,:) = ZWORK(:,:,:) + & 2. * PATHETA(:,:,:) * PAMOIST(:,:,:) * ZFLX(:,:,:) +!$mnh_end_expand_array() END IF !$acc end kernels ! stores <THl Rnp> @@ -660,33 +670,33 @@ IF ( ( KRRL > 0 .AND. OSUBG_COND) .OR. ( OTURB_FLX .AND. tpfile%lopened ) & CALL LES_MEAN_SUBGRID( ZFLX, X_LES_SUBGRID_ThlRt, .TRUE. ) ! CALL MZF_DEVICE( PWM, ZTMP1_DEVICE ) -!$acc kernels +!$acc kernels present_cr(ZTMP2_DEVICE) ZTMP2_DEVICE(:,:,:) = ZTMP1_DEVICE(:,:,:)*ZFLX(:,:,:) !$acc end kernels CALL LES_MEAN_SUBGRID( ZTMP2_DEVICE, X_LES_RES_W_SBG_ThlRt, .TRUE. ) ! -!$acc kernels +!$acc kernels present_cr(ZTMP1_DEVICE) ZTMP1_DEVICE(:,:,:) = -XCTD*SQRT(PTKEM(:,:,:))*ZFLX(:,:,:)/PLEPS(:,:,:) !$acc end kernels CALL LES_MEAN_SUBGRID( ZTMP1_DEVICE ,X_LES_SUBGRID_DISS_ThlRt, .TRUE. ) ! -!$acc kernels +!$acc kernels present_cr(ZTMP1_DEVICE) ZTMP1_DEVICE(:,:,:) = ZA(:,:,:)*ZFLX(:,:,:) !$acc end kernels CALL LES_MEAN_SUBGRID( ZTMP1_DEVICE, X_LES_SUBGRID_RtThv, .TRUE. ) ! -!$acc kernels +!$acc kernels present_cr(ZTMP1_DEVICE) ZTMP1_DEVICE(:,:,:) = -XG/PTHVREF(:,:,:)/3.*ZA(:,:,:)*ZFLX(:,:,:) !$acc end kernels CALL LES_MEAN_SUBGRID( ZTMP1_DEVICE, X_LES_SUBGRID_RtPz,.TRUE.) ! CALL EMOIST(KRR,KRRI,PTHLM,PRM,PLOCPEXNM,PAMOIST,PSRCM,ZA(:,:,:)) -!$acc kernels +!$acc kernels present_cr(ZTMP1_DEVICE) ZTMP1_DEVICE(:,:,:) = ZA(:,:,:)*ZFLX(:,:,:) !$acc end kernels CALL LES_MEAN_SUBGRID( ZTMP1_DEVICE, X_LES_SUBGRID_ThlThv, .TRUE. ) ! -!$acc kernels +!$acc kernels present_cr(ZTMP1_DEVICE) ZTMP1_DEVICE(:,:,:) = -XG/PTHVREF(:,:,:)/3.*ZA(:,:,:)*ZFLX(:,:,:) !$acc end kernels CALL LES_MEAN_SUBGRID( ZTMP1_DEVICE, X_LES_SUBGRID_ThlPz,.TRUE.) @@ -722,20 +732,24 @@ IF ( ( KRRL > 0 .AND. OSUBG_COND) .OR. ( OTURB_FLX .AND. tpfile%lopened ) & IF (.NOT. L2D) THEN CALL GX_M_M_DEVICE(PRM(:,:,:,1),PDXX,PDZZ,PDZX,ZTMP1_DEVICE) CALL GY_M_M_DEVICE(PRM(:,:,:,1),PDYY,PDZZ,PDZY,ZTMP2_DEVICE) -!$acc kernels +!$acc kernels present_cr(ZFLX) #ifndef MNH_BITREP ZFLX(:,:,:) = XCHV * PLM(:,:,:) * PLEPS(:,:,:) * ( ZTMP1_DEVICE(:,:,:)**2 + ZTMP2_DEVICE(:,:,:)**2 ) #else - ZFLX(:,:,:) = XCHV * PLM(:,:,:) * PLEPS(:,:,:) * ( BR_P2(ZTMP1_DEVICE) + BR_P2(ZTMP2_DEVICE) ) +!$mnh_expand_array(JI=1:JIU,JJ=1:JJU,JK=1:JKU) + ZFLX(:,:,:) = XCHV * PLM(:,:,:) * PLEPS(:,:,:) * ( BR_P2(ZTMP1_DEVICE(:,:,:)) + BR_P2(ZTMP2_DEVICE(:,:,:)) ) +!$mnh_end_expand_array() #endif !$acc end kernels ELSE CALL GX_M_M_DEVICE(PRM(:,:,:,1),PDXX,PDZZ,PDZX,ZTMP1_DEVICE) -!$acc kernels +!$acc kernels present_cr(ZFLX) #ifndef MNH_BITREP ZFLX(:,:,:) = XCHV * PLM(:,:,:) * PLEPS(:,:,:) * ZTMP1_DEVICE(:,:,:)**2 #else - ZFLX(:,:,:) = XCHV * PLM(:,:,:) * PLEPS(:,:,:) * BR_P2(ZTMP1_DEVICE) +!$mnh_expand_array(JI=1:JIU,JJ=1:JJU,JK=1:JKU) + ZFLX(:,:,:) = XCHV * PLM(:,:,:) * PLEPS(:,:,:) * BR_P2(ZTMP1_DEVICE(:,:,:)) +!$mnh_end_expand_array() #endif !$acc end kernels END IF @@ -762,20 +776,21 @@ IF ( ( KRRL > 0 .AND. OSUBG_COND) .OR. ( OTURB_FLX .AND. tpfile%lopened ) & ) ** 2 ) #else CALL DXM_DEVICE(PRM(:,:,IKB:IKB,1), ZTMP1_DEVICE(:,:,1:1)) -!$acc kernels +!$acc kernels present_cr(ZTMP2_DEVICE) ZTMP2_DEVICE(:,:,1) = ZTMP1_DEVICE(:,:,1)* PINV_PDXX(:,:,IKB) !$acc end kernels CALL MXF_DEVICE( ZTMP2_DEVICE(:,:,1:1), ZTMP3_DEVICE(:,:,1:1) ) CALL MXF_DEVICE(PDXX(:,:,IKB:IKB),ZTMP4_DEVICE(:,:,1:1)) ! CALL DYM_DEVICE(PRM(:,:,IKB:IKB,1), ZTMP1_DEVICE(:,:,1:1)) -!$acc kernels +!$acc kernels present_cr(ZTMP2_DEVICE) ZTMP2_DEVICE(:,:,1) = ZTMP1_DEVICE(:,:,1)* PINV_PDYY(:,:,IKB) !$acc end kernels CALL MYF_DEVICE( ZTMP2_DEVICE(:,:,1:1), ZTMP5_DEVICE(:,:,1:1) ) CALL MYF_DEVICE(PDYY(:,:,IKB:IKB),ZTMP6_DEVICE(:,:,1:1)) ! -!$acc kernels +!$acc kernels present_cr(ZFLX) +!$mnh_expand_array(JI=1:JIU,JJ=1:JJU,JK=1:JKU) ZFLX(:,:,IKB) = XCHV * PLM(:,:,IKB) & * PLEPS(:,:,IKB) * ( & ( ZTMP3_DEVICE(:,:,1) & @@ -792,12 +807,15 @@ IF ( ( KRRL > 0 .AND. OSUBG_COND) .OR. ( OTURB_FLX .AND. tpfile%lopened ) & ) * 0.5 * ( PDZY(:,:,IKB+1)+PDZY(:,:,IKB) ) & / ZTMP6_DEVICE(:,:,1) & ) ** 2 ) +!$mnh_end_expand_array() #endif ! ZFLX(:,:,IKB-1) = ZFLX(:,:,IKB) ! IF ( KRRL > 0 ) THEN + !$mnh_expand_array(JI=1:JIU,JJ=1:JJU,JK=1:JKU) ZWORK(:,:,:) = ZWORK(:,:,:)+ PAMOIST(:,:,:) * PAMOIST(:,:,:) * ZFLX(:,:,:) + !$mnh_end_expand_array() END IF !$acc end kernels ! stores <Rnp Rnp> @@ -833,22 +851,22 @@ IF ( ( KRRL > 0 .AND. OSUBG_COND) .OR. ( OTURB_FLX .AND. tpfile%lopened ) & CALL LES_MEAN_SUBGRID( ZFLX, X_LES_SUBGRID_Rt2, .TRUE. ) ! CALL MZF_DEVICE( PWM, ZTMP1_DEVICE ) -!$acc kernels +!$acc kernels present_cr(ZTMP2_DEVICE) ZTMP2_DEVICE(:,:,:) = ZTMP1_DEVICE(:,:,:)*ZFLX(:,:,:) !$acc end kernels CALL LES_MEAN_SUBGRID( ZTMP2_DEVICE, X_LES_RES_W_SBG_Rt2, .TRUE. ) ! -!$acc kernels +!$acc kernels present_cr(ZTMP1_DEVICE) ZTMP1_DEVICE(:,:,:) = ZA(:,:,:)*ZFLX(:,:,:) !$acc end kernels CALL LES_MEAN_SUBGRID( ZTMP1_DEVICE, X_LES_SUBGRID_RtThv, .TRUE. ) ! -!$acc kernels +!$acc kernels present_cr(ZTMP1_DEVICE) ZTMP1_DEVICE(:,:,:) = -XG/PTHVREF(:,:,:)/3.*ZA(:,:,:)*ZFLX(:,:,:) !$acc end kernels CALL LES_MEAN_SUBGRID( ZTMP1_DEVICE, X_LES_SUBGRID_RtPz,.TRUE.) ! -!$acc kernels +!$acc kernels present_cr(ZTMP1_DEVICE) ZTMP1_DEVICE(:,:,:) = -2.*XCTD*SQRT(PTKEM(:,:,:))*ZFLX(:,:,:)/PLEPS(:,:,:) !$acc end kernels CALL LES_MEAN_SUBGRID( ZTMP1_DEVICE, X_LES_SUBGRID_DISS_Rt2, .TRUE. ) @@ -865,7 +883,7 @@ IF ( ( KRRL > 0 .AND. OSUBG_COND) .OR. ( OTURB_FLX .AND. tpfile%lopened ) & ! IF ( KRRL > 0 ) THEN ! - !$acc kernels + !$acc kernels present_cr(PSIGS) PSIGS(:,:,:)=PSIGS(:,:,:)*PSIGS(:,:,:) + ZWORK(:,:,:) ! Extrapolate PSIGS at the ground and at the top PSIGS(:,:,IKB-1) = PSIGS(:,:,IKB) diff --git a/src/MNH/turb_hor_thermo_flux.f90 b/src/MNH/turb_hor_thermo_flux.f90 index 52cff51886aedfc51234d2c944b6cf074f25af2b..e1fc823fd6ab8fada0b6a76e5ed174d9b379b91c 100644 --- a/src/MNH/turb_hor_thermo_flux.f90 +++ b/src/MNH/turb_hor_thermo_flux.f90 @@ -331,7 +331,7 @@ ZFLX(:,:,IKE+1) = ZFLX(:,:,IKE) #else CALL MXM_DEVICE( PK, ZTMP1_DEVICE ) CALL GX_M_U_DEVICE(1,IKU,1,PTHLM,PDXX,PDZZ,PDZX,ZTMP2_DEVICE) -!$acc kernels +!$acc kernels present_cr(ZFLX) !$acc_nv loop independent collapse(3) DO CONCURRENT ( JI=1:JIU,JJ=1:JJU,JK=1:JKU) ZFLX(JI,JJ,JK) = -XCSHF * ZTMP1_DEVICE(JI,JJ,JK) * ZTMP2_DEVICE(JI,JJ,JK) @@ -359,7 +359,7 @@ ZTMP3_DEVICE(:,:,1) = ZCOEFF(:,:,IKB+2)*PTHLM(:,:,IKB+2) & +ZCOEFF(:,:,IKB )*PTHLM(:,:,IKB ) !$acc end kernels CALL MXM_DEVICE( ZTMP3_DEVICE(:,:,1:1), ZTMP4_DEVICE(:,:,1:1)) -!$acc kernels present_cr(ZFLX,ZTMP1_DEVICE) +!$acc kernels present_cr(ZFLX,ZTMP1_DEVICE,ZTMP2_DEVICE) ZFLX(:,:,IKB) = -XCSHF * ZTMP1_DEVICE(:,:,1) * & ( ZTMP2_DEVICE(:,:,1) * PINV_PDXX(:,:,IKB) - ZTMP4_DEVICE(:,:,1) & *0.5* ( PDZX(:,:,IKB+1)+PDZX(:,:,IKB)) & @@ -373,7 +373,7 @@ ZFLX(:,:,IKB) = -XCSHF * ZTMP1_DEVICE(:,:,1) * & ZFLX(:,:,IKB-1:IKB-1) = 2. * MXM( SPREAD( PSFTHM(:,:)* PDIRCOSXW(:,:), 3,1) ) & - ZFLX(:,:,IKB:IKB) #else -! acc kernels +! acc kernels present_cr(ZTMP1_DEVICE) DO CONCURRENT ( JI=1:JIU,JJ=1:JJU ) ZTMP1_DEVICE(JI,JJ,1) = PSFTHM(JI,JJ)* PDIRCOSXW(JI,JJ) END DO @@ -397,44 +397,44 @@ END IF #else IF (.NOT. LFLAT) THEN CALL MXM_DEVICE(PRHODJ, ZTMP1_DEVICE) - !$acc kernels + !$acc kernels present_cr(ZTMP2_DEVICE) !$acc_nv loop independent collapse(3) DO CONCURRENT ( JI=1:JIU,JJ=1:JJU,JK=1:JKU) ZTMP2_DEVICE(JI,JJ,JK) = ZTMP1_DEVICE(JI,JJ,JK) * ZFLX(JI,JJ,JK) * PINV_PDXX(JI,JJ,JK) END DO !$acc end kernels CALL DXF_DEVICE(ZTMP2_DEVICE, ZTMP3_DEVICE) - !$acc kernels + !$acc kernels present_cr(ZTMP2_DEVICE) !$acc_nv loop independent collapse(3) DO CONCURRENT ( JI=1:JIU,JJ=1:JJU,JK=1:JKU) ZTMP2_DEVICE(JI,JJ,JK) = ZFLX(JI,JJ,JK) * PINV_PDXX(JI,JJ,JK) END DO !$acc end kernels CALL MZM_DEVICE(ZTMP2_DEVICE,ZTMP4_DEVICE) - !$acc kernels + !$acc kernels present_cr(ZTMP2_DEVICE) !$acc_nv loop independent collapse(3) DO CONCURRENT ( JI=1:JIU,JJ=1:JJU,JK=1:JKU) ZTMP2_DEVICE(JI,JJ,JK) = PDZX(JI,JJ,JK)*ZTMP4_DEVICE(JI,JJ,JK) END DO !$acc end kernels CALL MXF_DEVICE(ZTMP2_DEVICE, ZTMP4_DEVICE) -!$acc kernels +!$acc kernels present_cr(ZTMP2_DEVICE) !$acc_nv loop independent collapse(3) DO CONCURRENT ( JI=1:JIU,JJ=1:JJU,JK=1:JKU) ZTMP2_DEVICE(JI,JJ,JK) = PMZM_PRHODJ(JI,JJ,JK) * ZTMP4_DEVICE(JI,JJ,JK) * PINV_PDZZ(JI,JJ,JK) END DO !$acc end kernels CALL DZF_DEVICE( ZTMP2_DEVICE, ZTMP4_DEVICE ) -!$acc kernels +!$acc kernels present_cr(PRTHLS) PRTHLS(:,:,:) = PRTHLS(:,:,:) - ZTMP3_DEVICE(:,:,:) + ZTMP4_DEVICE(:,:,:) !$acc end kernels ELSE CALL MXM_DEVICE(PRHODJ, ZTMP1_DEVICE) -!$acc kernels +!$acc kernels present_cr(ZTMP2_DEVICE) ZTMP2_DEVICE(:,:,:) = ZTMP1_DEVICE(:,:,:) * ZFLX(:,:,:) * PINV_PDXX(:,:,:) !$acc end kernels CALL DXF_DEVICE(ZTMP2_DEVICE, ZTMP3_DEVICE) -!$acc kernels +!$acc kernels present_cr(PRTHLS) PRTHLS(:,:,:) = PRTHLS(:,:,:) - ZTMP3_DEVICE(:,:,:) !$acc end kernels END IF @@ -486,7 +486,7 @@ END IF #else IF ( KRRL >= 1 ) THEN IF (.NOT. LFLAT) THEN - !$acc kernels + !$acc kernels present_cr(ZTMP1_DEVICE) ZTMP1_DEVICE(:,:,:) = PRHODJ(:,:,:)*PATHETA(:,:,:)*PSRCM(:,:,:) !$acc end kernels CALL MZM_DEVICE( ZTMP1_DEVICE, ZTMP4_DEVICE ) @@ -511,7 +511,7 @@ IF ( KRRL >= 1 ) THEN ZFLXC(:,:,:) = 2.*( ZTMP2_DEVICE(:,:,:) +ZTMP7_DEVICE(:,:,:) ) !$acc end kernels IF ( KRRI >= 1 ) THEN - !$acc kernels + !$acc kernels present_cr(ZTMP1_DEVICE) ZTMP1_DEVICE(:,:,:) = PRHODJ(:,:,:)*PATHETA(:,:,:)*PSRCM(:,:,:) !$acc end kernels CALL MXM_DEVICE( ZTMP1_DEVICE, ZTMP2_DEVICE ) @@ -528,15 +528,15 @@ IF ( KRRL >= 1 ) THEN PRRS(:,:,:,4) = PRRS(:,:,:,4) + 2. * (- ZTMP2_DEVICE(:,:,:) + ZTMP4_DEVICE(:,:,:) )*PFRAC_ICE(:,:,:) !$acc end kernels ELSE - !$acc kernels + !$acc kernels present_cr(ZTMP1_DEVICE) ZTMP1_DEVICE(:,:,:) = PRHODJ(:,:,:)*PATHETA(:,:,:)*PSRCM(:,:,:) !$acc end kernels CALL MXM_DEVICE( ZTMP1_DEVICE, ZTMP2_DEVICE ) - !$acc kernels + !$acc kernels present_cr(ZTMP1_DEVICE) ZTMP1_DEVICE(:,:,:) = ZTMP2_DEVICE(:,:,:) *ZFLX(:,:,:)*PINV_PDXX(:,:,:) !$acc end kernels CALL DXF_DEVICE( ZTMP6_DEVICE, ZTMP2_DEVICE) - !$acc kernels + !$acc kernels present_cr(ZTMP3_DEVICE) ZTMP3_DEVICE(:,:,:) = ZTMP4_DEVICE(:,:,:)*ZTMP5_DEVICE(:,:,:)*PINV_PDZZ(:,:,:) !$acc end kernels CALL DZF_DEVICE( ZTMP3_DEVICE, ZTMP4_DEVICE ) @@ -545,19 +545,19 @@ IF ( KRRL >= 1 ) THEN !$acc end kernels END IF ELSE - !$acc kernels + !$acc kernels present_cr(ZTMP1_DEVICE) ZTMP1_DEVICE(:,:,:) = PRHODJ(:,:,:)*PATHETA(:,:,:)*PSRCM(:,:,:) !$acc end kernels CALL MXM_DEVICE( ZTMP1_DEVICE,ZTMP2_DEVICE ) - !$acc kernels + !$acc kernels present_cr(ZTMP3_DEVICE) ZTMP3_DEVICE(:,:,:) = ZTMP2_DEVICE(:,:,:)*ZFLX(:,:,:) !$acc end kernels CALL MXF_DEVICE( ZTMP3_DEVICE, ZTMP4_DEVICE ) - !$acc kernels + !$acc kernels present_cr(ZFLXC) ZFLXC(:,:,:) = 2.*ZTMP4_DEVICE(:,:,:) !$acc end kernels IF ( KRRI >= 1 ) THEN - !$acc kernels + !$acc kernels present_cr(ZTMP1_DEVICE) ZTMP1_DEVICE(:,:,:) = ZTMP2_DEVICE(:,:,:)*ZFLX(:,:,:)*PINV_PDXX(:,:,:) !$acc end kernels CALL DXF_DEVICE( ZTMP1_DEVICE, ZTMP2_DEVICE ) @@ -566,7 +566,7 @@ IF ( KRRL >= 1 ) THEN PRRS(:,:,:,4) = PRRS(:,:,:,4) - 2. * ZTMP2_DEVICE(:,:,:)*PFRAC_ICE(:,:,:) !$acc end kernels ELSE - !$acc kernels + !$acc kernels present_cr(ZTMP1_DEVICE) ZTMP1_DEVICE(:,:,:) = ZTMP2_DEVICE(:,:,:)*ZFLX(:,:,:)*PINV_PDXX(:,:,:) !$acc end kernels CALL DXF_DEVICE( ZTMP1_DEVICE, ZTMP2_DEVICE ) @@ -618,7 +618,7 @@ IF (KSPLT==1 .AND. LLES_CALL) THEN ! CALL GX_W_UW_DEVICE(PWM,PDXX,PDZZ,PDZX,ZTMP1_DEVICE) CALL MZM_DEVICE(ZFLX,ZTMP2_DEVICE) - !$acc kernels + !$acc kernels present_cr(ZTMP3_DEVICE) ZTMP3_DEVICE(:,:,:) = ZTMP1_DEVICE(:,:,:)*ZTMP2_DEVICE(:,:,:) !$acc end kernels CALL MXF_DEVICE(ZTMP3_DEVICE,ZTMP1_DEVICE) @@ -627,14 +627,14 @@ IF (KSPLT==1 .AND. LLES_CALL) THEN ! CALL GX_M_M_DEVICE(PTHLM,PDXX,PDZZ,PDZX,ZTMP1_DEVICE) CALL MXF_DEVICE(ZFLX,ZTMP2_DEVICE) - !$acc kernels + !$acc kernels present_cr(ZTMP3_DEVICE) ZTMP3_DEVICE(:,:,:) = ZTMP1_DEVICE(:,:,:) * ZTMP2_DEVICE(:,:,:) !$acc end kernels CALL LES_MEAN_SUBGRID( ZTMP3_DEVICE,X_LES_RES_ddxa_Thl_SBG_UaThl , .TRUE. ) ! IF (KRR>=1) THEN CALL GX_M_M_DEVICE(PRM(:,:,:,1),PDXX,PDZZ,PDZX,ZTMP1_DEVICE) - !$acc kernels + !$acc kernels present_cr(ZTMP3_DEVICE) ZTMP3_DEVICE(:,:,:) = ZTMP1_DEVICE(:,:,:) * ZTMP2_DEVICE(:,:,:) !$acc end kernels CALL LES_MEAN_SUBGRID( ZTMP3_DEVICE,X_LES_RES_ddxa_Rt_SBG_UaThl , .TRUE. ) @@ -764,7 +764,7 @@ END IF #else CALL MXM_DEVICE( PK, ZTMP1_DEVICE ) CALL GX_M_U_DEVICE(1,IKU,1,PRM(:,:,:,1),PDXX,PDZZ,PDZX,ZTMP2_DEVICE) -!$acc kernels +!$acc kernels present_cr(ZFLX) !$acc_nv loop independent collapse(3) DO CONCURRENT ( JI=1:JIU,JJ=1:JJU,JK=1:JKU) ZFLX(JI,JJ,JK) = -XCHF * ZTMP1_DEVICE(JI,JJ,JK) * ZTMP2_DEVICE(JI,JJ,JK) @@ -782,7 +782,7 @@ END DO +ZCOEFF(:,:,IKB )*PRM(:,:,IKB ,1) !$acc end kernels CALL MXM_DEVICE(ZTMP3_DEVICE(:,:,1:1),ZTMP4_DEVICE(:,:,1:1)) -!$acc kernels present_cr(ZFLX) +!$acc kernels present_cr(ZFLX,ZTMP1_DEVICE) ZFLX(:,:,IKB) = -XCHF * ZTMP1_DEVICE(:,:,1) * & ( ZTMP2_DEVICE(:,:,1) * PINV_PDXX(:,:,IKB) & -ZTMP4_DEVICE(:,:,1) & @@ -803,28 +803,28 @@ END DO ! IF (.NOT. LFLAT) THEN CALL MXM_DEVICE(PRHODJ,ZTMP1_DEVICE) - !$acc kernels + !$acc kernels present_cr(ZTMP2_DEVICE) !$acc_nv loop independent collapse(3) DO CONCURRENT ( JI=1:JIU,JJ=1:JJU,JK=1:JKU) ZTMP2_DEVICE(JI,JJ,JK) = ZTMP1_DEVICE(JI,JJ,JK) * ZFLX(JI,JJ,JK) * PINV_PDXX(JI,JJ,JK) END DO !$acc end kernels CALL DXF_DEVICE( ZTMP2_DEVICE, ZTMP3_DEVICE ) - !$acc kernels + !$acc kernels present_cr(ZTMP2_DEVICE) !$acc_nv loop independent collapse(3) DO CONCURRENT ( JI=1:JIU,JJ=1:JJU,JK=1:JKU) ZTMP2_DEVICE(JI,JJ,JK) = ZFLX(JI,JJ,JK) * PINV_PDXX(JI,JJ,JK) END DO !$acc end kernels CALL MZM_DEVICE(ZTMP2_DEVICE,ZTMP4_DEVICE) - !$acc kernels + !$acc kernels present_cr(ZTMP2_DEVICE) !$acc_nv loop independent collapse(3) DO CONCURRENT ( JI=1:JIU,JJ=1:JJU,JK=1:JKU) ZTMP2_DEVICE(JI,JJ,JK) = PDZX(JI,JJ,JK)*ZTMP4_DEVICE(JI,JJ,JK) END DO !$acc end kernels CALL MXF_DEVICE(ZTMP2_DEVICE,ZTMP4_DEVICE) - !$acc kernels + !$acc kernels present_cr(ZTMP2_DEVICE) !$acc_nv loop independent collapse(3) DO CONCURRENT ( JI=1:JIU,JJ=1:JJU,JK=1:JKU) ZTMP2_DEVICE(JI,JJ,JK) = PMZM_PRHODJ(JI,JJ,JK) * ZTMP4_DEVICE(JI,JJ,JK) * PINV_PDZZ(JI,JJ,JK) @@ -839,7 +839,7 @@ END DO !$acc end kernels ELSE CALL MXM_DEVICE(PRHODJ,ZTMP1_DEVICE) - !$acc kernels + !$acc kernels present_cr(ZTMP2_DEVICE) ZTMP2_DEVICE(:,:,:) = ZTMP1_DEVICE(:,:,:) * ZFLX(:,:,:) * PINV_PDXX(:,:,:) !$acc end kernels CALL DXF_DEVICE( ZTMP2_DEVICE, ZTMP3_DEVICE ) @@ -851,7 +851,7 @@ END DO ! Compute the equivalent tendancy for Rc and Ri ! IF ( KRRL >= 1 ) THEN - !$acc kernels present_cr(ZTMP2_DEVICE) + !$acc kernels present_cr(ZTMP1_DEVICE,ZTMP2_DEVICE) ZTMP1_DEVICE(:,:,:) = PRHODJ(:,:,:)*PAMOIST(:,:,:)*PSRCM(:,:,:) ZTMP2_DEVICE(:,:,:) = ZFLX(:,:,:)*PINV_PDXX(:,:,:) !$acc end kernels @@ -893,24 +893,24 @@ END DO !$acc end kernels END IF ELSE - !$acc kernels + !$acc kernels present_cr(ZTMP4_DEVICE) ZTMP4_DEVICE(:,:,:) = ZTMP8_DEVICE(:,:,:)*ZTMP2_DEVICE(:,:,:) !$acc end kernels CALL DXF_DEVICE(ZTMP4_DEVICE, ZTMP5_DEVICE) - !$acc kernels + !$acc kernels present_cr(ZTMP3_DEVICE) ZTMP3_DEVICE(:,:,:) = ZTMP8_DEVICE(:,:,:)*ZFLX(:,:,:) !$acc end kernels CALL MXF_DEVICE( ZTMP3_DEVICE, ZTMP4_DEVICE ) - !$acc kernels + !$acc kernels present_cr(ZFLXC) ZFLXC(:,:,:) = ZFLXC(:,:,:) + 2.*ZTMP4_DEVICE(:,:,:) !$acc end kernels IF ( KRRI >= 1 ) THEN - !$acc kernels + !$acc kernels present_cr(PRRS) PRRS(:,:,:,2) = PRRS(:,:,:,2) - 2. * ZTMP5_DEVICE(:,:,:)*(1.0-PFRAC_ICE(:,:,:)) PRRS(:,:,:,4) = PRRS(:,:,:,4) - 2. * ZTMP5_DEVICE(:,:,:)*PFRAC_ICE(:,:,:) !$acc end kernels ELSE - !$acc kernels + !$acc kernels present_cr(PRRS) PRRS(:,:,:,2) = PRRS(:,:,:,2) - 2. * ZTMP5_DEVICE(:,:,:) !$acc end kernels END IF @@ -944,7 +944,7 @@ END DO ! CALL GX_W_UW_DEVICE(PWM,PDXX,PDZZ,PDZX,ZTMP1_DEVICE) CALL MZM_DEVICE(ZFLX,ZTMP2_DEVICE) - !$acc kernels + !$acc kernels present_cr(ZTMP3_DEVICE) ZTMP3_DEVICE(:,:,:) = ZTMP1_DEVICE(:,:,:)*ZTMP2_DEVICE(:,:,:) !$acc end kernels CALL MXF_DEVICE(ZTMP3_DEVICE,ZTMP4_DEVICE) @@ -953,14 +953,14 @@ END DO ! CALL GX_M_M_DEVICE(PTHLM,PDXX,PDZZ,PDZX,ZTMP1_DEVICE) CALL MXF_DEVICE(ZFLX,ZTMP2_DEVICE) - !$acc kernels + !$acc kernels present_cr(ZTMP3_DEVICE) ZTMP3_DEVICE(:,:,:) = ZTMP1_DEVICE(:,:,:)*ZTMP2_DEVICE(:,:,:) !$acc end kernels CALL LES_MEAN_SUBGRID( ZTMP3_DEVICE, X_LES_RES_ddxa_Thl_SBG_UaRt , .TRUE. ) ! CALL GX_M_M_DEVICE(PRM(:,:,:,1),PDXX,PDZZ,PDZX,ZTMP1_DEVICE) CALL MXF_DEVICE(ZFLX,ZTMP2_DEVICE) - !$acc kernels + !$acc kernels present_cr(ZTMP3_DEVICE) ZTMP3_DEVICE(:,:,:) = ZTMP1_DEVICE(:,:,:)*ZTMP2_DEVICE(:,:,:) !$acc end kernels CALL LES_MEAN_SUBGRID( ZTMP3_DEVICE, X_LES_RES_ddxa_Rt_SBG_UaRt , .TRUE. ) @@ -1139,7 +1139,7 @@ END IF #else CALL MYM_DEVICE( PK, ZTMP1_DEVICE ) CALL GY_M_V_DEVICE(1,IKU,1,PTHLM,PDYY,PDZZ,PDZY,ZTMP2_DEVICE) - !$acc kernels + !$acc kernels present_cr(ZFLX) !$acc_nv loop independent collapse(3) DO CONCURRENT ( JI=1:JIU,JJ=1:JJU,JK=1:JKU) ZFLX(JI,JJ,JK) = -XCSHF * ZTMP1_DEVICE(JI,JJ,JK) * ZTMP2_DEVICE(JI,JJ,JK) @@ -1147,7 +1147,7 @@ END IF ZFLX(:,:,IKE+1) = ZFLX(:,:,IKE) !$acc end kernels ELSE - !$acc kernels + !$acc kernels present_cr(ZFLX) ZFLX(:,:,:) = 0. !$acc end kernels END IF @@ -1173,7 +1173,7 @@ ZFLX(:,:,IKB) = -XCSHF * ZTMP1_DEVICE(:,:,1) * & ! extrapolates the flux under the ground so that the vertical average with ! the IKB flux gives the ground value ( warning the tangential surface ! flux has been set to 0 for the moment !! to be improved ) -!$acc kernels +!$acc kernels present_cr(ZTMP1_DEVICE) ZTMP1_DEVICE(:,:,1) = PSFTHM(:,:)* PDIRCOSYW(:,:) !$acc end kernels CALL MYM_DEVICE( ZTMP1_DEVICE(:,:,1:1), ZTMP2_DEVICE(:,:,1:1) ) @@ -1186,28 +1186,28 @@ ZFLX(:,:,IKB-1) = 2. * ZTMP2_DEVICE(:,:,1) - ZFLX(:,:,IKB) IF (.NOT. L2D) THEN IF (.NOT. LFLAT) THEN CALL MYM_DEVICE(PRHODJ, ZTMP1_DEVICE) - !$acc kernels + !$acc kernels present_cr(ZTMP2_DEVICE) !$acc_nv loop independent collapse(3) DO CONCURRENT ( JI=1:JIU,JJ=1:JJU,JK=1:JKU) ZTMP2_DEVICE(JI,JJ,JK) = ZTMP1_DEVICE(JI,JJ,JK) * ZFLX(JI,JJ,JK) * PINV_PDYY(JI,JJ,JK) END DO !$acc end kernels CALL DYF_DEVICE( ZTMP2_DEVICE, ZTMP3_DEVICE ) - !$acc kernels + !$acc kernels present_cr(ZTMP1_DEVICE) !$acc_nv loop independent collapse(3) DO CONCURRENT ( JI=1:JIU,JJ=1:JJU,JK=1:JKU) ZTMP1_DEVICE(JI,JJ,JK) = ZFLX(JI,JJ,JK) * PINV_PDYY(JI,JJ,JK) END DO !$acc end kernels CALL MZM_DEVICE(ZTMP1_DEVICE, ZTMP2_DEVICE) - !$acc kernels + !$acc kernels present_cr(ZTMP1_DEVICE) !$acc_nv loop independent collapse(3) DO CONCURRENT ( JI=1:JIU,JJ=1:JJU,JK=1:JKU) ZTMP1_DEVICE(JI,JJ,JK) = PDZY(JI,JJ,JK)*ZTMP2_DEVICE(JI,JJ,JK) END DO !$acc end kernels CALL MYF_DEVICE(ZTMP1_DEVICE, ZTMP2_DEVICE) - !$acc kernels + !$acc kernels present_cr(ZTMP1_DEVICE) !$acc_nv loop independent collapse(3) DO CONCURRENT ( JI=1:JIU,JJ=1:JJU,JK=1:JKU) ZTMP1_DEVICE(JI,JJ,JK) = PMZM_PRHODJ(JI,JJ,JK) * ZTMP2_DEVICE(JI,JJ,JK) * PINV_PDZZ(JI,JJ,JK) @@ -1222,7 +1222,7 @@ IF (.NOT. L2D) THEN !$acc end kernels ELSE CALL MYM_DEVICE(PRHODJ, ZTMP1_DEVICE) - !$acc kernels + !$acc kernels present_cr(ZTMP2_DEVICE) ZTMP2_DEVICE(:,:,:) = ZTMP1_DEVICE(:,:,:) * ZFLX(:,:,:) * PINV_PDYY(:,:,:) !$acc end kernels CALL DYF_DEVICE( ZTMP2_DEVICE, ZTMP3_DEVICE ) @@ -1237,7 +1237,7 @@ END IF !IF ( OSUBG_COND .AND. KRRL > 0 .AND. .NOT. L2D) THEN IF ( KRRL >= 1 .AND. .NOT. L2D) THEN IF (.NOT. LFLAT) THEN - !$acc kernels + !$acc kernels present_cr(ZTMP1_DEVICE) ZTMP1_DEVICE(:,:,:) = PRHODJ(:,:,:)*PATHETA(:,:,:)*PSRCM(:,:,:) !$acc end kernels CALL MYM_DEVICE( ZTMP1_DEVICE, ZTMP2_DEVICE ) @@ -1275,11 +1275,11 @@ IF ( KRRL >= 1 .AND. .NOT. L2D) THEN PRRS(:,:,:,4) = PRRS(:,:,:,4) + 2. * (- ZTMP4_DEVICE(:,:,:) + ZTMP5_DEVICE(:,:,:) )*PFRAC_ICE(:,:,:) !$acc end kernels ELSE - !$acc kernels + !$acc kernels present_cr(ZTMP3_DEVICE) ZTMP3_DEVICE(:,:,:) = ZTMP2_DEVICE(:,:,:)*ZFLX(:,:,:)*PINV_PDYY(:,:,:) !$acc end kernels CALL DYF_DEVICE( ZTMP3_DEVICE, ZTMP4_DEVICE ) - !$acc kernels + !$acc kernels present_cr(ZTMP3_DEVICE) ZTMP3_DEVICE(:,:,:) = ZTMP6_DEVICE(:,:,:)*PINV_PDZZ(:,:,:) !$acc end kernels CALL DZF_DEVICE( ZTMP3_DEVICE, ZTMP5_DEVICE ) @@ -1288,29 +1288,29 @@ IF ( KRRL >= 1 .AND. .NOT. L2D) THEN !$acc end kernels END IF ELSE - !$acc kernels + !$acc kernels present_cr(ZTMP1_DEVICE) ZTMP1_DEVICE(:,:,:) = PRHODJ(:,:,:)*PATHETA(:,:,:)*PSRCM(:,:,:) !$acc end kernels CALL MYM_DEVICE( ZTMP1_DEVICE, ZTMP2_DEVICE ) - !$acc kernels + !$acc kernels present_cr(ZTMP1_DEVICE) ZTMP1_DEVICE(:,:,:) = ZTMP2_DEVICE(:,:,:)*ZFLX(:,:,:) !$acc end kernels CALL MYF_DEVICE( ZTMP1_DEVICE, ZTMP3_DEVICE ) - !$acc kernels + !$acc kernels present_cr(ZFLXC) ZFLXC(:,:,:) = 2.*ZTMP3_DEVICE(:,:,:) !$acc end kernels ! - !$acc kernels + !$acc kernels present_cr(ZTMP1_DEVICE) ZTMP1_DEVICE(:,:,:) = ZTMP2_DEVICE(:,:,:)*ZFLX(:,:,:)*PINV_PDYY(:,:,:) !$acc end kernels CALL DYF_DEVICE( ZTMP1_DEVICE, ZTMP2_DEVICE ) IF ( KRRI >= 1 ) THEN - !$acc kernels + !$acc kernels present_cr(PRRS) PRRS(:,:,:,2) = PRRS(:,:,:,2) - 2. * ZTMP2_DEVICE(:,:,:)*(1.0-PFRAC_ICE(:,:,:)) PRRS(:,:,:,4) = PRRS(:,:,:,4) - 2. * ZTMP2_DEVICE(:,:,:)*PFRAC_ICE(:,:,:) !$acc end kernels ELSE - !$acc kernels + !$acc kernels present_cr(PRRS) PRRS(:,:,:,2) = PRRS(:,:,:,2) - 2. * ZTMP2_DEVICE(:,:,:) !$acc end kernels END IF @@ -1345,7 +1345,7 @@ IF (KSPLT==1 .AND. LLES_CALL) THEN ! CALL GY_W_VW_DEVICE(PWM,PDYY,PDZZ,PDZY,ZTMP1_DEVICE) CALL MZM_DEVICE(ZFLX,ZTMP2_DEVICE) - !$acc kernels + !$acc kernels present_cr(ZTMP3_DEVICE) ZTMP3_DEVICE(:,:,:) = ZTMP1_DEVICE(:,:,:) * ZTMP2_DEVICE(:,:,:) !$acc end kernels CALL MYF_DEVICE(ZTMP3_DEVICE, ZTMP4_DEVICE) @@ -1354,7 +1354,7 @@ IF (KSPLT==1 .AND. LLES_CALL) THEN ! CALL GY_M_M_DEVICE(PTHLM,PDYY,PDZZ,PDZY,ZTMP1_DEVICE) CALL MYF_DEVICE(ZFLX,ZTMP2_DEVICE) - !$acc kernels + !$acc kernels present_cr(ZTMP3_DEVICE) ZTMP3_DEVICE(:,:,:) = ZTMP1_DEVICE(:,:,:) * ZTMP2_DEVICE(:,:,:) !$acc end kernels CALL LES_MEAN_SUBGRID( ZTMP3_DEVICE, X_LES_RES_ddxa_Thl_SBG_UaThl , .TRUE. ) @@ -1365,7 +1365,7 @@ IF (KSPLT==1 .AND. LLES_CALL) THEN !$acc data copy(X_LES_RES_ddxa_Rt_SBG_UaThl) CALL GY_M_M_DEVICE(PRM(:,:,:,1),PDYY,PDZZ,PDZY,ZTMP1_DEVICE) CALL MYF_DEVICE(ZFLX,ZTMP2_DEVICE) - !$acc kernels + !$acc kernels present_cr(ZTMP3_DEVICE) ZTMP3_DEVICE(:,:,:) = ZTMP1_DEVICE(:,:,:) * ZTMP2_DEVICE(:,:,:) !$acc end kernels CALL LES_MEAN_SUBGRID( ZTMP3_DEVICE,X_LES_RES_ddxa_Rt_SBG_UaThl , .TRUE. ) @@ -1506,7 +1506,7 @@ IF (KRR/=0) THEN IF (.NOT. L2D) THEN CALL MYM_DEVICE( PK, ZTMP1_DEVICE ) CALL GY_M_V_DEVICE(1,IKU,1,PRM(:,:,:,1),PDYY,PDZZ,PDZY, ZTMP2_DEVICE) - !$acc kernels + !$acc kernels present_cr(ZFLX) !$acc_nv loop independent collapse(3) DO CONCURRENT ( JI=1:JIU,JJ=1:JJU,JK=1:JKU) ZFLX(JI,JJ,JK) = -XCHF * ZTMP1_DEVICE(JI,JJ,JK) * ZTMP2_DEVICE(JI,JJ,JK) @@ -1514,7 +1514,7 @@ IF (KRR/=0) THEN ZFLX(:,:,IKE+1) = ZFLX(:,:,IKE) !$acc end kernels ELSE - !$acc kernels + !$acc kernels present_cr(ZFLX) ZFLX(:,:,:) = 0. !$acc end kernels END IF @@ -1539,7 +1539,7 @@ IF (KRR/=0) THEN ! extrapolates the flux under the ground so that the vertical average with ! the IKB flux gives the ground value ( warning the tangential surface ! flux has been set to 0 for the moment !! to be improved ) - !$acc kernels + !$acc kernels present_cr(ZTMP1_DEVICE) ZTMP1_DEVICE(:,:,1) = PSFRM(:,:)* PDIRCOSYW(:,:) !$acc end kernels CALL MYM_DEVICE( ZTMP1_DEVICE(:,:,1:1), ZTMP2_DEVICE(:,:,1:1) ) @@ -1552,7 +1552,7 @@ IF (KRR/=0) THEN IF (.NOT. L2D) THEN IF (.NOT. LFLAT) THEN CALL MYM_DEVICE(PRHODJ, ZTMP1_DEVICE) - !$acc kernels + !$acc kernels present_cr(ZTMP2_DEVICE) !$acc_nv loop independent collapse(3) DO CONCURRENT ( JI=1:JIU,JJ=1:JJU,JK=1:JKU) ZTMP2_DEVICE(JI,JJ,JK) = ZTMP1_DEVICE(JI,JJ,JK) * ZFLX(JI,JJ,JK) * PINV_PDYY(JI,JJ,JK) @@ -1560,21 +1560,21 @@ IF (KRR/=0) THEN !$acc end kernels CALL DYF_DEVICE( ZTMP2_DEVICE, ZTMP3_DEVICE ) ! - !$acc kernels + !$acc kernels present_cr(ZTMP1_DEVICE) !$acc_nv loop independent collapse(3) DO CONCURRENT ( JI=1:JIU,JJ=1:JJU,JK=1:JKU) ZTMP1_DEVICE(JI,JJ,JK) = ZFLX(JI,JJ,JK) * PINV_PDYY(JI,JJ,JK) END DO !$acc end kernels CALL MZM_DEVICE(ZTMP1_DEVICE,ZTMP2_DEVICE) - !$acc kernels + !$acc kernels present_cr(ZTMP1_DEVICE) !$acc_nv loop independent collapse(3) DO CONCURRENT ( JI=1:JIU,JJ=1:JJU,JK=1:JKU) ZTMP1_DEVICE(JI,JJ,JK) = PDZY(JI,JJ,JK)*ZTMP2_DEVICE(JI,JJ,JK) END DO !$acc end kernels CALL MYF_DEVICE(ZTMP1_DEVICE,ZTMP2_DEVICE) - !$acc kernels + !$acc kernels present_cr(ZTMP1_DEVICE) !$acc_nv loop independent collapse(3) DO CONCURRENT ( JI=1:JIU,JJ=1:JJU,JK=1:JKU) ZTMP1_DEVICE(JI,JJ,JK) = PMZM_PRHODJ(JI,JJ,JK) * ZTMP2_DEVICE(JI,JJ,JK) * PINV_PDZZ(JI,JJ,JK) @@ -1590,7 +1590,7 @@ IF (KRR/=0) THEN !$acc end kernels ELSE CALL MYM_DEVICE(PRHODJ, ZTMP1_DEVICE) - !$acc kernels + !$acc kernels present_cr(ZTMP2_DEVICE) ZTMP2_DEVICE(:,:,:) = ZTMP1_DEVICE(:,:,:) * ZFLX(:,:,:) * PINV_PDYY(:,:,:) !$acc end kernels CALL DYF_DEVICE( ZTMP2_DEVICE, ZTMP3_DEVICE ) @@ -1604,7 +1604,7 @@ IF (KRR/=0) THEN ! IF ( KRRL >= 1 .AND. .NOT. L2D) THEN ! Sub-grid condensation IF (.NOT. LFLAT) THEN - !$acc kernels + !$acc kernels present_cr(ZTMP1_DEVICE) ZTMP1_DEVICE(:,:,:) = PRHODJ(:,:,:)*PAMOIST(:,:,:)*PSRCM(:,:,:) !$acc end kernels CALL MYM_DEVICE( ZTMP1_DEVICE, ZTMP2_DEVICE ) @@ -1637,39 +1637,39 @@ IF (KRR/=0) THEN ZTMP2_DEVICE(:,:,:) = ZTMP1_DEVICE(:,:,:)* PINV_PDZZ(:,:,:) !$acc end kernels CALL DZF_DEVICE( ZTMP2_DEVICE, ZTMP4_DEVICE ) - !$acc kernels + !$acc kernels present_cr(PRRS) PRRS(:,:,:,2) = PRRS(:,:,:,2) + 2. * (- ZTMP3_DEVICE(:,:,:) + ZTMP4_DEVICE(:,:,:) )*(1.0-PFRAC_ICE(:,:,:)) PRRS(:,:,:,4) = PRRS(:,:,:,4) + 2. * (- ZTMP3_DEVICE(:,:,:) + ZTMP4_DEVICE(:,:,:) )*PFRAC_ICE(:,:,:) !$acc end kernels ELSE - !$acc kernels + !$acc kernels present_cr(PRRS) PRRS(:,:,:,2) = PRRS(:,:,:,2) + 2. * (- ZTMP3_DEVICE(:,:,:) + ZTMP4_DEVICE(:,:,:) ) !$acc end kernels END IF ELSE - !$acc kernels + !$acc kernels present_cr(ZTMP1_DEVICE) ZTMP1_DEVICE(:,:,:) = PRHODJ(:,:,:)*PAMOIST(:,:,:)*PSRCM(:,:,:) !$acc end kernels CALL MYM_DEVICE( ZTMP1_DEVICE, ZTMP2_DEVICE ) - !$acc kernels + !$acc kernels present_cr(ZTMP3_DEVICE) ZTMP3_DEVICE(:,:,:) = ZTMP2_DEVICE(:,:,:)*ZFLX(:,:,:) !$acc end kernels CALL MXF_DEVICE( ZTMP3_DEVICE, ZTMP4_DEVICE ) - !$acc kernels + !$acc kernels present_cr(ZFLXC) ZFLXC(:,:,:) = ZFLXC(:,:,:) + 2.*ZTMP4_DEVICE(:,:,:) !$acc end kernels ! - !$acc kernels + !$acc kernels present_cr(ZTMP1_DEVICE) ZTMP1_DEVICE(:,:,:) = ZTMP3_DEVICE(:,:,:)* PINV_PDYY(:,:,:) !$acc end kernels CALL DYF_DEVICE( ZTMP1_DEVICE, ZTMP2_DEVICE ) IF ( KRRI >= 1 ) THEN - !$acc kernels + !$acc kernels present_cr(PRRS) PRRS(:,:,:,2) = PRRS(:,:,:,2) - 2. * ZTMP2_DEVICE(:,:,:)*(1.0-PFRAC_ICE(:,:,:)) PRRS(:,:,:,4) = PRRS(:,:,:,4) - 2. * ZTMP2_DEVICE(:,:,:)*PFRAC_ICE(:,:,:) !$acc end kernels ELSE - !$acc kernels + !$acc kernels present_cr(PRRS) PRRS(:,:,:,2) = PRRS(:,:,:,2) - 2. * ZTMP2_DEVICE(:,:,:) !$acc end kernels END IF @@ -1703,7 +1703,7 @@ IF (KRR/=0) THEN ! CALL GY_W_VW_DEVICE(PWM,PDYY,PDZZ,PDZY,ZTMP1_DEVICE) CALL MZM_DEVICE(ZFLX,ZTMP2_DEVICE) - !$acc kernels + !$acc kernels present_cr(ZTMP3_DEVICE) ZTMP3_DEVICE(:,:,:) = ZTMP1_DEVICE(:,:,:) * ZTMP2_DEVICE(:,:,:) !$acc end kernels CALL MYF_DEVICE(ZTMP3_DEVICE, ZTMP4_DEVICE) @@ -1712,14 +1712,14 @@ IF (KRR/=0) THEN ! CALL GY_M_M_DEVICE(PTHLM,PDYY,PDZZ,PDZY,ZTMP1_DEVICE) CALL MYF_DEVICE(ZFLX,ZTMP2_DEVICE) - !$acc kernels + !$acc kernels present_cr(ZTMP3_DEVICE) ZTMP3_DEVICE(:,:,:) = ZTMP1_DEVICE(:,:,:) * ZTMP2_DEVICE(:,:,:) !$acc end kernels CALL LES_MEAN_SUBGRID( ZTMP3_DEVICE, X_LES_RES_ddxa_Thl_SBG_UaRt , .TRUE. ) ! CALL GY_M_M_DEVICE(PRM(:,:,:,1),PDYY,PDZZ,PDZY,ZTMP1_DEVICE) CALL MYF_DEVICE(ZFLX,ZTMP2_DEVICE) - !$acc kernels + !$acc kernels present_cr(ZTMP3_DEVICE) ZTMP3_DEVICE(:,:,:) = ZTMP1_DEVICE(:,:,:) * ZTMP2_DEVICE(:,:,:) !$acc end kernels CALL LES_MEAN_SUBGRID( ZTMP3_DEVICE, X_LES_RES_ddxa_Rt_SBG_UaRt , .TRUE. ) diff --git a/src/MNH/turb_hor_uv.f90 b/src/MNH/turb_hor_uv.f90 index 0a11a382961e68d936cd90c57dc7889e4237bf28..f8aff35b3f6bedd7cf6309c2ebab845e306ce294 100644 --- a/src/MNH/turb_hor_uv.f90 +++ b/src/MNH/turb_hor_uv.f90 @@ -364,13 +364,13 @@ IF (.NOT. L2D) THEN END DO !CONCURRENT !$acc end kernels ELSE - !$acc kernels + !$acc kernels present_cr(ZFLX) ZFLX(:,:,:)= - XCMFS * ZTMP2_DEVICE * (GX_V_UV_PVM) !$acc end kernels END IF #endif ! -!$acc kernels +!$acc kernels present_cr(ZFLX) ZFLX(:,:,IKE+1)= ZFLX(:,:,IKE) !$acc end kernels ! @@ -437,7 +437,7 @@ ZTMP6_DEVICE(:,:,1) = (PVM(:,:,IKB+1)-PVM(:,:,IKB))*(1./ZTMP3_DEVICE(:,:,1)+1./Z !$acc end kernels CALL MXM_DEVICE(ZTMP6_DEVICE(:,:,1:1),ZTMP4_DEVICE(:,:,1:1)) ! -!$acc kernels +!$acc kernels present_cr(ZTMP6_DEVICE) ZTMP6_DEVICE(:,:,1:1) = (PDZX(:,:,IKB+1:IKB+1)+PDZX(:,:,IKB:IKB)) !$acc end kernels CALL MYM_DEVICE(ZTMP6_DEVICE(:,:,1:1),ZTMP2_DEVICE(:,:,1:1)) @@ -465,13 +465,10 @@ ZFLX(:,:,IKB) = - XCMFS * ZTMP2_DEVICE(:,:,1) * ( ZTMP5_DEVICE(:,:,1) + ZTMP6_DE #endif ! ! extrapolates this flux under the ground with the surface flux -#ifdef MNH_COMPILER_CCE -!$acc kernels present(ZFLX) -#else -!$acc kernels -#endif +!$acc parallel present_cr(ZFLX) #ifndef MNH_BITREP -ZFLX(:,:,IKB-1) = & +!$mnh_expand_array(JI=1:JIU,JJ=1:JJU) + ZFLX(:,:,IKB-1) = & PTAU11M(:,:) * PCOSSLOPE(:,:) * PSINSLOPE(:,:) * PDIRCOSZW(:,:)**2 & +PTAU12M(:,:) * (PCOSSLOPE(:,:)**2 - PSINSLOPE(:,:)**2) * & PDIRCOSZW(:,:)**2 & @@ -481,25 +478,23 @@ ZFLX(:,:,IKB-1) = & 2. * PUSLOPEM(:,:) * PCOSSLOPE(:,:) * PSINSLOPE(:,:) * & PDIRCOSZW(:,:) * ZDIRSINZW(:,:) & +PVSLOPEM(:,:) * (PCOSSLOPE(:,:)**2 - PSINSLOPE(:,:)**2) * ZDIRSINZW(:,:) ) +!$mnh_end_expand_array() #else !PW: BUG: commented 'acc loop independent collapse(2)' to workaround compiler bug (NVHPC 21.1) -#ifdef MNH_COMPILER_NVHPC -!acc loop independent collapse(2) -#endif -DO CONCURRENT ( JI=1:JIU,JJ=1:JJU ) -ZFLX(JI,JJ,IKB-1) = & - PTAU11M(JI,JJ) * PCOSSLOPE(JI,JJ) * PSINSLOPE(JI,JJ) * BR_P2(PDIRCOSZW(JI,JJ)) & - +PTAU12M(JI,JJ) * (BR_P2(PCOSSLOPE(JI,JJ)) - BR_P2(PSINSLOPE(JI,JJ))) * & - BR_P2(PDIRCOSZW(JI,JJ)) & - -PTAU22M(JI,JJ) * PCOSSLOPE(JI,JJ) * PSINSLOPE(JI,JJ) & - +PTAU33M(JI,JJ) * PCOSSLOPE(JI,JJ) * PSINSLOPE(JI,JJ) * BR_P2(ZDIRSINZW(JI,JJ)) & - -PCDUEFF(JI,JJ) * ( & - 2. * PUSLOPEM(JI,JJ) * PCOSSLOPE(JI,JJ) * PSINSLOPE(JI,JJ) * & - PDIRCOSZW(JI,JJ) * ZDIRSINZW(JI,JJ) & - +PVSLOPEM(JI,JJ) * (BR_P2(PCOSSLOPE(JI,JJ)) - BR_P2(PSINSLOPE(JI,JJ))) * ZDIRSINZW(JI,JJ) ) -END DO ! CONCURRENT +!$mnh_expand_array(JI=1:JIU,JJ=1:JJU) + ZFLX(:,:,IKB-1) = & + PTAU11M(:,:) * PCOSSLOPE(:,:) * PSINSLOPE(:,:) * BR_P2(PDIRCOSZW(:,:)) & + +PTAU12M(:,:) * (BR_P2(PCOSSLOPE(:,:)) - BR_P2(PSINSLOPE(:,:))) * & + BR_P2(PDIRCOSZW(:,:)) & + -PTAU22M(:,:) * PCOSSLOPE(:,:) * PSINSLOPE(:,:) & + +PTAU33M(:,:) * PCOSSLOPE(:,:) * PSINSLOPE(:,:) * BR_P2(ZDIRSINZW(:,:)) & + -PCDUEFF(:,:) * ( & + 2. * PUSLOPEM(:,:) * PCOSSLOPE(:,:) * PSINSLOPE(:,:) * & + PDIRCOSZW(:,:) * ZDIRSINZW(:,:) & + +PVSLOPEM(:,:) * (BR_P2(PCOSSLOPE(:,:)) - BR_P2(PSINSLOPE(:,:))) * ZDIRSINZW(:,:) ) +!$mnh_end_expand_array() #endif -!$acc end kernels +!$acc end parallel ! #ifndef MNH_OPENACC ZFLX(:,:,IKB-1:IKB-1) = 2. * MXM( MYM( ZFLX(:,:,IKB-1:IKB-1) ) ) & @@ -612,7 +607,7 @@ IF (.NOT. LFLAT) THEN END DO !CONCURRENT !$acc end kernels ELSE -!$acc kernels +!$acc kernels present_cr(PRUS) PRUS(:,:,:) = PRUS(:,:,:) - ZTMP1_DEVICE !$acc end kernels END IF @@ -725,7 +720,7 @@ IF (KSPLT==1) THEN END DO !CONCURRENT !$acc end kernels ELSE -!$acc kernels +!$acc kernels present_cr(ZTMP1_DEVICE) ZTMP1_DEVICE = ZFLX * GX_V_UV_PVM !$acc end kernels ENDIF @@ -830,7 +825,7 @@ IF (KSPLT==1) THEN #endif ! ! dynamic production - !$acc kernels + !$acc kernels present_cr(PDP) PDP(:,:,:) = PDP(:,:,:) + ZWORK(:,:,:) !$acc end kernels ! @@ -851,7 +846,7 @@ IF (LLES_CALL .AND. KSPLT==1) THEN CALL LES_MEAN_SUBGRID(ZTMP2_DEVICE,X_LES_SUBGRID_UV) ! CALL GY_U_UV_DEVICE(PUM,PDYY,PDZZ,PDZY,ZTMP1_DEVICE) -!$acc kernels +!$acc kernels present_cr(ZTMP3_DEVICE) ZTMP3_DEVICE = ZTMP1_DEVICE*ZFLX !$acc end kernels CALL MYF_DEVICE(ZTMP3_DEVICE,ZTMP2_DEVICE) @@ -859,7 +854,7 @@ IF (LLES_CALL .AND. KSPLT==1) THEN CALL LES_MEAN_SUBGRID(ZTMP1_DEVICE, X_LES_RES_ddxa_U_SBG_UaU , .TRUE.) ! CALL GX_V_UV_DEVICE(PVM,PDXX,PDZZ,PDZX,ZTMP1_DEVICE) -!$acc kernels +!$acc kernels present_cr(ZTMP3_DEVICE) ZTMP3_DEVICE = ZTMP1_DEVICE*ZFLX !$acc end kernels CALL MYF_DEVICE(ZTMP3_DEVICE,ZTMP2_DEVICE) diff --git a/src/MNH/turb_hor_uw.f90 b/src/MNH/turb_hor_uw.f90 index 6a840a431d78330d426f944b3be4deed970628e1..186fb9de8477f20f67b5b993b3d8e2c2e244fa4e 100644 --- a/src/MNH/turb_hor_uw.f90 +++ b/src/MNH/turb_hor_uw.f90 @@ -533,14 +533,14 @@ IF (LLES_CALL .AND. KSPLT==1) THEN CALL LES_MEAN_SUBGRID( ZTMP2_DEVICE, X_LES_SUBGRID_WU , .TRUE. ) ! CALL GZ_U_UW_DEVICE(PUM,PDZZ,ZTMP1_DEVICE) - !$acc kernels + !$acc kernels present_cr(ZTMP2_DEVICE) ZTMP2_DEVICE = ZTMP1_DEVICE*ZFLX !$acc end kernels CALL MXF_DEVICE(ZTMP2_DEVICE,ZTMP1_DEVICE) CALL MZF_DEVICE( ZTMP1_DEVICE, ZTMP2_DEVICE ) CALL LES_MEAN_SUBGRID( ZTMP2_DEVICE, X_LES_RES_ddxa_U_SBG_UaU , .TRUE.) ! - !$acc kernels + !$acc kernels present_cr(ZTMP1_DEVICE) ZTMP1_DEVICE = GX_W_UW_PWM*ZFLX !$acc end kernels CALL MXF_DEVICE(ZTMP1_DEVICE,ZTMP2_DEVICE) @@ -549,7 +549,7 @@ IF (LLES_CALL .AND. KSPLT==1) THEN ! CALL MZF_DEVICE( ZFLX, ZTMP1_DEVICE ) CALL GX_M_U_DEVICE(1,IKU,1,PTHLM,PDXX,PDZZ,PDZX,ZTMP2_DEVICE) - !$acc kernels + !$acc kernels present_cr(ZTMP3_DEVICE) ZTMP3_DEVICE = ZTMP2_DEVICE*ZTMP1_DEVICE !$acc end kernels CALL MXF_DEVICE(ZTMP3_DEVICE,ZTMP1_DEVICE) @@ -561,7 +561,7 @@ IF (LLES_CALL .AND. KSPLT==1) THEN ! CALL MZF_DEVICE( ZFLX, ZTMP1_DEVICE ) CALL GX_M_U_DEVICE(1,IKU,1,PRM(:,:,:,1),PDXX,PDZZ,PDZX,ZTMP2_DEVICE) - !$acc kernels + !$acc kernels present_cr(ZTMP3_DEVICE) ZTMP3_DEVICE = ZTMP1_DEVICE*ZTMP2_DEVICE !$acc end kernels CALL MXF_DEVICE(ZTMP3_DEVICE,ZTMP1_DEVICE) @@ -574,7 +574,7 @@ IF (LLES_CALL .AND. KSPLT==1) THEN CALL MZF_DEVICE( ZFLX, ZTMP1_DEVICE ) DO JSV=1,NSV CALL GX_M_U_DEVICE(1,IKU,1,PSVM(:,:,:,JSV),PDXX,PDZZ,PDZX,ZTMP2_DEVICE) - !$acc kernels + !$acc kernels present_cr(ZTMP3_DEVICE) ZTMP3_DEVICE = ZTMP1_DEVICE*ZTMP2_DEVICE !$acc end kernels CALL MXF_DEVICE(ZTMP3_DEVICE,ZTMP1_DEVICE) diff --git a/src/MNH/turb_hor_vw.f90 b/src/MNH/turb_hor_vw.f90 index 3596ac8ce694996184701a1fa9dc63f48ed251e5..0237734124cc629ac97d07adb3ed742a9b1dcce3 100644 --- a/src/MNH/turb_hor_vw.f90 +++ b/src/MNH/turb_hor_vw.f90 @@ -428,15 +428,15 @@ IF (.NOT. L2D) THEN !$acc end kernels ELSE CALL MYM_DEVICE(PRHODJ, ZTMP1_DEVICE) - !$acc kernels + !$acc kernels present_cr(ZTMP2_DEVICE) ZTMP2_DEVICE = ZTMP1_DEVICE * PINV_PDYY !$acc end kernels CALL MZM_DEVICE(ZTMP2_DEVICE, ZTMP1_DEVICE) - !$acc kernels + !$acc kernels present_cr(ZTMP2_DEVICE) ZTMP2_DEVICE = ZTMP1_DEVICE * ZFLX - !$acc end kernels + !$acc end kernels CALL DYF_DEVICE( ZTMP2_DEVICE, ZTMP1_DEVICE ) - !$acc kernels + !$acc kernels present_cr(PRWS) PRWS(:,:,:) = PRWS(:,:,:) - ZTMP1_DEVICE !$acc end kernels END IF @@ -555,7 +555,7 @@ IF (LLES_CALL .AND. KSPLT==1) THEN CALL LES_MEAN_SUBGRID( ZTMP2_DEVICE, X_LES_SUBGRID_WV , .TRUE. ) ! CALL GZ_V_VW_DEVICE(PVM,PDZZ,ZTMP1_DEVICE) - !$acc kernels + !$acc kernels present_cr(ZTMP2_DEVICE) ZTMP2_DEVICE = ZTMP1_DEVICE*ZFLX !$acc end kernels CALL MYF_DEVICE(ZTMP2_DEVICE,ZTMP1_DEVICE) @@ -563,7 +563,7 @@ IF (LLES_CALL .AND. KSPLT==1) THEN CALL LES_MEAN_SUBGRID( ZTMP2_DEVICE, X_LES_RES_ddxa_V_SBG_UaV , .TRUE.) ! CALL GY_W_VW_DEVICE(PWM,PDYY,PDZZ,PDZY,ZTMP1_DEVICE) - !$acc kernels + !$acc kernels present_cr(ZTMP2_DEVICE) ZTMP2_DEVICE(:,:,:) = ZTMP1_DEVICE(:,:,:)*ZFLX(:,:,:) !$acc end kernels CALL MYF_DEVICE(ZTMP2_DEVICE,ZTMP1_DEVICE) @@ -574,7 +574,7 @@ IF (LLES_CALL .AND. KSPLT==1) THEN CALL MZF_DEVICE( ZFLX, ZTMP2_DEVICE ) !PW: kernel removed to work around a PGI 19.10 bug !$acc update self(ZTMP1_DEVICE,ZTMP2_DEVICE) -! !$acc kernels +! !$acc kernels present_cr(ZTMP3_DEVICE) ZTMP3_DEVICE(:,:,:) = ZTMP1_DEVICE(:,:,:)*ZTMP2_DEVICE(:,:,:) ! !$acc end kernels !$acc update device(ZTMP3_DEVICE) @@ -589,7 +589,7 @@ IF (LLES_CALL .AND. KSPLT==1) THEN CALL MZF_DEVICE( ZFLX, ZTMP2_DEVICE ) !PW: kernel removed to work around a PGI 19.10 bug !$acc update self(ZTMP1_DEVICE,ZTMP2_DEVICE) -! !$acc kernels +! !$acc kernels present_cr(ZTMP3_DEVICE) ZTMP3_DEVICE(:,:,:) = ZTMP1_DEVICE(:,:,:)*ZTMP2_DEVICE(:,:,:) ! !$acc end kernels !$acc update device(ZTMP3_DEVICE) @@ -603,7 +603,7 @@ IF (LLES_CALL .AND. KSPLT==1) THEN CALL MZF_DEVICE( ZFLX, ZTMP2_DEVICE ) DO JSV=1,NSV CALL GY_M_V_DEVICE(1,IKU,1,PSVM(:,:,:,JSV),PDYY,PDZZ,PDZY,ZTMP1_DEVICE) - !$acc kernels + !$acc kernels present_cr(ZTMP3_DEVICE) ZTMP3_DEVICE = ZTMP1_DEVICE*ZTMP2_DEVICE !$acc end kernels CALL MXF_DEVICE(ZTMP3_DEVICE,ZTMP1_DEVICE) diff --git a/src/MNH/turb_ver_dyn_flux.f90 b/src/MNH/turb_ver_dyn_flux.f90 index a5ac03e889085a55c08b4261ea3712bdfd0a06cb..f608c5be745833416c0c2e2a8ac742480444da69 100644 --- a/src/MNH/turb_ver_dyn_flux.f90 +++ b/src/MNH/turb_ver_dyn_flux.f90 @@ -523,7 +523,7 @@ IKTE=IKT-JPVEXT_TURB GOCEAN = LOCEAN ! -!$acc kernels +!$acc kernels present_cr(ZA,PDP,ZSOURCE) ZA(:,:,:) = XUNDEF PDP(:,:,:) = XUNDEF ZSOURCE(:,:,:) = 0. @@ -536,18 +536,22 @@ DO CONCURRENT ( JI=1:JIU,JJ=1:JJU ) ZDIRSINZW(JI,JJ) = SQRT(1.-BR_P2(PDIRCOSZW(JI,JJ))) END DO #endif +!$acc end kernels + ! compute the coefficients for the uncentred gradient computation near the ! ground ! #ifndef MNH_OPENACC ZKEFF(:,:,:) = MZM( PLM(:,:,:) * SQRT(PTKEM(:,:,:)) ) #else +!PASCAL +!$acc kernels present_cr(ZTMP1_DEVICE) ZTMP1_DEVICE(:,:,:) = PLM(:,:,:) * SQRT(PTKEM(:,:,:)) !$acc end kernels CALL MZM_DEVICE(ZTMP1_DEVICE,ZKEFF) #endif ! -!$acc kernels +!$acc kernels present_cr(ZUSLOPEM,ZVSLOPEM) ZUSLOPEM(:,:,1)=PUSLOPEM(:,:) ZVSLOPEM(:,:,1)=PVSLOPEM(:,:) !$acc end kernels @@ -687,7 +691,7 @@ ELSE !ATMOS MODEL ONLY #endif ENDIF ! -!$acc kernels +!$acc kernels present_cr(ZSOURCE) ZSOURCE(:,:,IKTB+1:IKTE-1) = 0. ZSOURCE(:,:,IKE) = 0. !$acc end kernels @@ -709,7 +713,7 @@ CALL TRIDIAG_WIND(KKA,KKU,KKL,PUM,ZA,ZCOEFS(:,:,1),PTSTEP,PEXPL,PIMPL, & #ifndef MNH_OPENACC PRUS(:,:,:)=PRUS(:,:,:)+MXM(PRHODJ(:,:,:))*(ZRES(:,:,:)-PUM(:,:,:))/PTSTEP #else -!$acc kernels +!$acc kernels present_cr(PRUS) PRUS(:,:,:)=PRUS(:,:,:)+ZTMP1_DEVICE(:,:,:)*(ZRES(:,:,:)-PUM(:,:,:))/PTSTEP !$acc end kernels #endif @@ -785,7 +789,7 @@ END IF ! ! first part of total momentum flux ! -!$acc kernels +!$acc kernels present_cr(PWU) PWU(:,:,:) = ZFLXZ(:,:,:) !$acc end kernels ! @@ -804,7 +808,7 @@ END DO !$acc end kernels CALL MXF_DEVICE( ZTMP2_DEVICE,ZTMP3_DEVICE ) CALL MZF_DEVICE( ZTMP3_DEVICE, ZTMP4_DEVICE ) -!$acc kernels +!$acc kernels present_cr(PDP) PDP(:,:,:) = - ZTMP4_DEVICE(:,:,:) !$acc end kernels #endif @@ -822,7 +826,7 @@ ZTMP2_DEVICE(:,:,IKB:IKB) = ZFLXZ(:,:,IKB+KKL:IKB+KKL) * (PUM(:,:,IKB+KKL:IKB+KK / ZTMP1_DEVICE(:,:,IKB+KKL:IKB+KKL) !$acc end kernels CALL MXF_DEVICE(ZTMP2_DEVICE(:,:,IKB:IKB), ZTMP3_DEVICE(:,:,IKB:IKB)) -!$acc kernels +!$acc kernels present_cr(PDP) PDP(:,:,IKB:IKB) = - ZTMP3_DEVICE(:,:,IKB:IKB) !$acc end kernels #endif @@ -852,14 +856,14 @@ IF (LLES_CALL) THEN CALL LES_MEAN_SUBGRID( ZTMP2_DEVICE, X_LES_SUBGRID_WU ) ! CALL GZ_U_UW_DEVICE(PUM,PDZZ,ZTMP1_DEVICE) - !$acc kernels + !$acc kernels present_cr(ZTMP2_DEVICE) ZTMP2_DEVICE(:,:,:) = ZTMP1_DEVICE(:,:,:)*ZFLXZ(:,:,:) !$acc end kernels CALL MXF_DEVICE( ZTMP2_DEVICE, ZTMP3_DEVICE ) CALL MZF_DEVICE( ZTMP3_DEVICE, ZTMP4_DEVICE ) CALL LES_MEAN_SUBGRID( ZTMP4_DEVICE, X_LES_RES_ddxa_U_SBG_UaU ) ! - !$acc kernels + !$acc kernels present_cr(ZTMP1_DEVICE) ZTMP1_DEVICE(:,:,:) = XCMFS * ZKEFF(:,:,:) !$acc end kernels CALL LES_MEAN_SUBGRID( ZTMP1_DEVICE, X_LES_SUBGRID_Km ) @@ -876,12 +880,15 @@ END IF IF(HTURBDIM=='3DIM') THEN ! Compute the source for the W wind component ! used to compute the W source at the ground -!$acc kernels +!PASCAL +!$acc kernels present_cr(ZFLXZ) ZFLXZ(:,:,KKA) = 2 * ZFLXZ(:,:,IKB) - ZFLXZ(:,:,IKB+KKL) ! extrapolation +!$acc end kernels IF (GOCEAN) THEN +!$acc kernels present_cr(ZFLXZ) ZFLXZ(:,:,KKU) = 2 * ZFLXZ(:,:,IKE) - ZFLXZ(:,:,IKE-KKL) ! extrapolation - END IF !$acc end kernels + END IF ! #ifndef MNH_OPENACC IF (.NOT. LFLAT) THEN @@ -932,11 +939,11 @@ IF(HTURBDIM=='3DIM') THEN END DO !$acc end kernels CALL DZM_DEVICE( ZTMP3_DEVICE, ZTMP2_DEVICE ) - !$acc kernels + !$acc kernels present_cr(PRWS) PRWS(:,:,:)= PRWS(:,:,:) - ZTMP1_DEVICE(:,:,:) + ZTMP2_DEVICE(:,:,:) !$acc end kernels ELSE - !$acc kernels + !$acc kernels present_cr(PRWS) PRWS(:,:,:)= PRWS(:,:,:) -ZTMP1_DEVICE(:,:,:) !$acc end kernels END IF @@ -1013,7 +1020,7 @@ IF (GOCEAN) THEN ) END IF ! -!$acc kernels +!$acc kernels present_cr(PDP) PDP(:,:,:)=PDP(:,:,:)+ZA(:,:,:) !$acc end kernels ! @@ -1039,7 +1046,7 @@ END IF !$acc & X_LES_RES_ddxa_Rt_SBG_UaW,X_LES_RES_ddxa_Sv_SBG_UaW) ! CALL GX_W_UW_DEVICE( PWM,PDXX,PDZZ,PDZX,ZTMP1_DEVICE) - !$acc kernels + !$acc kernels present_cr(ZTMP2_DEVICE) ZTMP2_DEVICE(:,:,:) = ZTMP1_DEVICE(:,:,:)*ZFLXZ(:,:,:) !$acc end kernels CALL MXF_DEVICE(ZTMP2_DEVICE,ZTMP3_DEVICE) @@ -1048,7 +1055,7 @@ END IF ! CALL GX_M_U_DEVICE(KKA,KKU,KKL,PTHLM,PDXX,PDZZ,PDZX,ZTMP1_DEVICE) CALL MZF_DEVICE( ZFLXZ, ZTMP2_DEVICE ) - !$acc kernels + !$acc kernels present_cr(ZTMP3_DEVICE) ZTMP3_DEVICE(:,:,:) = ZTMP1_DEVICE(:,:,:)*ZTMP2_DEVICE(:,:,:) !$acc end kernels CALL MXF_DEVICE(ZTMP3_DEVICE,ZTMP4_DEVICE) @@ -1057,7 +1064,7 @@ END IF IF (KRR>=1) THEN CALL GX_U_M_DEVICE(PRM(:,:,:,1),PDXX,PDZZ,PDZX,ZTMP1_DEVICE) CALL MZF_DEVICE( ZFLXZ, ZTMP2_DEVICE ) - !$acc kernels + !$acc kernels present_cr(ZTMP3_DEVICE) ZTMP3_DEVICE(:,:,:) = ZTMP1_DEVICE(:,:,:)*ZTMP2_DEVICE(:,:,:) !$acc end kernels CALL MXF_DEVICE(ZTMP3_DEVICE,ZTMP4_DEVICE) @@ -1067,7 +1074,7 @@ END IF DO JSV=1,NSV CALL GX_U_M_DEVICE(PSVM(:,:,:,JSV),PDXX,PDZZ,PDZX,ZTMP1_DEVICE) CALL MZF_DEVICE( ZFLXZ, ZTMP2_DEVICE ) - !$acc kernels + !$acc kernels present_cr(ZTMP3_DEVICE) ZTMP3_DEVICE(:,:,:) = ZTMP1_DEVICE(:,:,:)*ZTMP2_DEVICE(:,:,:) !$acc end kernels CALL MXF_DEVICE(ZTMP3_DEVICE,ZTMP4_DEVICE) @@ -1206,11 +1213,11 @@ ELSE ! Atmos case * 0.5 * ( 1. + MYM(PRHODJ(:,:,KKA:KKA)) / MYM(PRHODJ(:,:,IKB:IKB)) ) ENDIF !No flux at the atmosphere top -!$acc kernels +!$acc kernels present_cr(ZSOURCE) ZSOURCE(:,:,IKE) = 0. !$acc end kernels ENDIF ! End of Ocean or Atmospher Cases -!$acc kernels +!$acc kernels present_cr(ZSOURCE) ZSOURCE(:,:,IKTB+1:IKTE-1) = 0. !$acc end kernels ! @@ -1229,7 +1236,7 @@ CALL TRIDIAG_WIND(KKA,KKU,KKL,PVM,ZA,ZCOEFS(:,:,1),PTSTEP,PEXPL,PIMPL, & #ifndef MNH_OPENACC PRVS(:,:,:)=PRVS(:,:,:)+MYM(PRHODJ(:,:,:))*(ZRES(:,:,:)-PVM(:,:,:))/PTSTEP #else -!$acc kernels +!$acc kernels present_cr(PRVS) PRVS(:,:,:)=PRVS(:,:,:)+ZTMP1_DEVICE*(ZRES(:,:,:)-PVM(:,:,:))/PTSTEP !$acc end kernels #endif @@ -1302,7 +1309,7 @@ END IF ! ! second part of total momentum flux ! -!$acc kernels +!$acc kernels present_cr(PWV) PWV(:,:,:) = ZFLXZ(:,:,:) !$acc end kernels ! @@ -1355,7 +1362,7 @@ IF (GOCEAN) THEN ) END IF ! -!$acc kernels +!$acc kernels present_cr(PDP) PDP(:,:,:)=PDP(:,:,:)+ZA(:,:,:) !$acc end kernels ! @@ -1375,7 +1382,7 @@ IF (LLES_CALL) THEN CALL LES_MEAN_SUBGRID( ZTMP2_DEVICE, X_LES_SUBGRID_WV ) ! CALL GZ_V_VW_DEVICE(PVM,PDZZ,ZTMP1_DEVICE) -!$acc kernels +!$acc kernels present_cr(ZTMP2_DEVICE) ZTMP2_DEVICE(:,:,:) = ZTMP1_DEVICE(:,:,:)*ZFLXZ(:,:,:) !$acc end kernels CALL MYF_DEVICE(ZTMP2_DEVICE,ZTMP3_DEVICE) @@ -1393,12 +1400,15 @@ END IF ! IF(HTURBDIM=='3DIM') THEN ! Compute the source for the W wind component -!$acc kernels + !PASCAL +!$acc kernels present_cr(ZFLXZ) ZFLXZ(:,:,KKA) = 2 * ZFLXZ(:,:,IKB) - ZFLXZ(:,:,IKB+KKL) ! extrapolation +!$acc end kernels IF (GOCEAN) THEN +!$acc kernels present_cr(ZFLXZ) ZFLXZ(:,:,KKU) = 2 * ZFLXZ(:,:,IKE) - ZFLXZ(:,:,IKE-KKL) ! extrapolation - END IF !$acc end kernels + END IF ! #ifndef MNH_OPENACC IF (.NOT. L2D) THEN @@ -1452,11 +1462,11 @@ IF(HTURBDIM=='3DIM') THEN END DO !$acc end kernels CALL DZM_DEVICE( ZTMP3_DEVICE, ZTMP4_DEVICE ) - !$acc kernels + !$acc kernels present_cr(PRWS) PRWS(:,:,:)= PRWS(:,:,:) - ZTMP1_DEVICE(:,:,:) + ZTMP4_DEVICE(:,:,:) !$acc end kernels ELSE - !$acc kernels + !$acc kernels present_cr(PRWS) PRWS(:,:,:)= PRWS(:,:,:) - ZTMP1_DEVICE(:,:,:) !$acc end kernels END IF @@ -1532,7 +1542,7 @@ IF(HTURBDIM=='3DIM') THEN ) END IF ! -!$acc kernels +!$acc kernels present_cr(PDP) PDP(:,:,:)=PDP(:,:,:)+ZA(:,:,:) !$acc end kernels ! @@ -1555,7 +1565,7 @@ IF(HTURBDIM=='3DIM') THEN !$acc data copy(X_LES_RES_ddxa_W_SBG_UaW,X_LES_RES_ddxa_Thl_SBG_UaW,X_LES_RES_ddxa_Rt_SBG_UaW) ! CALL GY_W_VW_DEVICE( PWM,PDYY,PDZZ,PDZY,ZTMP1_DEVICE) - !$acc kernels + !$acc kernels present_cr(ZTMP2_DEVICE) ZTMP2_DEVICE(:,:,:) = ZTMP1_DEVICE(:,:,:)*ZFLXZ(:,:,:) !$acc end kernels CALL MYF_DEVICE(ZTMP2_DEVICE,ZTMP3_DEVICE) @@ -1564,7 +1574,7 @@ IF(HTURBDIM=='3DIM') THEN ! CALL GY_M_V_DEVICE(KKA,KKU,KKL,PTHLM,PDYY,PDZZ,PDZY,ZTMP1_DEVICE) CALL MZF_DEVICE( ZFLXZ, ZTMP2_DEVICE ) - !$acc kernels + !$acc kernels present_cr(ZTMP3_DEVICE) ZTMP3_DEVICE(:,:,:) = ZTMP1_DEVICE(:,:,:)*ZTMP2_DEVICE(:,:,:) !$acc end kernels CALL MYF_DEVICE(ZTMP3_DEVICE,ZTMP4_DEVICE) @@ -1573,7 +1583,7 @@ IF(HTURBDIM=='3DIM') THEN IF (KRR>=1) THEN CALL GY_V_M_DEVICE(PRM(:,:,:,1),PDYY,PDZZ,PDZY,ZTMP1_DEVICE) CALL MZF_DEVICE( ZFLXZ, ZTMP2_DEVICE ) - !$acc kernels + !$acc kernels present_cr(ZTMP3_DEVICE) ZTMP3_DEVICE(:,:,:) = ZTMP1_DEVICE(:,:,:)*ZTMP2_DEVICE(:,:,:) !$acc end kernels CALL MYF_DEVICE(ZTMP3_DEVICE,ZTMP4_DEVICE) @@ -1600,7 +1610,7 @@ IF ( OTURB_FLX .AND. tpfile%lopened .AND. HTURBDIM == '1DIM') THEN -XCMFS*PLM(:,:,:)*SQRT(PTKEM(:,:,:))*GZ_W_M(PWM,PDZZ) #else CALL GZ_W_M_DEVICE(PWM,PDZZ,ZTMP1_DEVICE) -!$acc kernels +!$acc kernels present_cr(ZFLXZ) ZFLXZ(:,:,:)= (2./3.) * PTKEM(:,:,:) & -XCMFS*PLM(:,:,:)*SQRT(PTKEM(:,:,:))*ZTMP1_DEVICE(:,:,:) !$acc end kernels diff --git a/src/MNH/turb_ver_thermo_corr.f90 b/src/MNH/turb_ver_thermo_corr.f90 index a97503d7cbd8d6eb37bdc193b8c8dad8318d1742..c4a2059561ba99795e8e59b96ab5619ff0305c8d 100644 --- a/src/MNH/turb_ver_thermo_corr.f90 +++ b/src/MNH/turb_ver_thermo_corr.f90 @@ -511,7 +511,7 @@ GUSERV = (KRR/=0) ! ! compute the coefficients for the uncentred gradient computation near the ! ground -!$acc kernels +!$acc kernels present_cr(ZCOEFF,ZKEFF,ZTMP1_DEVICE) ZCOEFF(:,:,IKB+2*KKL)= - PDZZ(:,:,IKB+KKL) / & ( (PDZZ(:,:,IKB+2*KKL)+PDZZ(:,:,IKB+KKL)) * PDZZ(:,:,IKB+2*KKL) ) ZCOEFF(:,:,IKB+KKL)= (PDZZ(:,:,IKB+2*KKL)+PDZZ(:,:,IKB+KKL)) / & @@ -561,7 +561,7 @@ END IF #endif ZDFDDTDZ(:,:,:) = 0. ! this term, because of discretization, is treated separately #else - !$acc kernels + !$acc kernels present_cr(ZTMP1_DEVICE) #ifndef MNH_BITREP !dir$ concurrent ZTMP1_DEVICE(:,:,:) = PPHI3(:,:,:)*PDTH_DZ(:,:,:)**2 @@ -582,6 +582,8 @@ END IF DO CONCURRENT ( JI=1:JIU,JJ=1:JJU,JK=1:JKU) ZF (JI,JJ,JK) = XCTV*PLM(JI,JJ,JK)*PLEPS(JI,JJ,JK)*ZTMP2_DEVICE(JI,JJ,JK) END DO + !$acc end kernels + !$acc kernels present_cr(ZDFDDTDZ) ZDFDDTDZ(:,:,:) = 0. ! this term, because of discretization, is treated separately !$acc end kernels #endif @@ -599,12 +601,12 @@ END IF #else IF (GFTH2) THEN CALL M3_TH2_WTH2( PREDTH1(:,:,:),PREDR1(:,:,:),PD(:,:,:),PLEPS(:,:,:),PSQRT_TKE(:,:,:),ZTMP1_DEVICE(:,:,:)) - !$acc kernels + !$acc kernels present_cr(ZF) ZF(:,:,:) = ZF(:,:,:) + ZTMP1_DEVICE(:,:,:) * PFTH2(:,:,:) !$acc end kernels CALL D_M3_TH2_WTH2_O_DDTDZ( PREDTH1(:,:,:),PREDR1(:,:,:),& & PD(:,:,:),PLEPS(:,:,:),PSQRT_TKE(:,:,:),PBLL_O_E(:,:,:),PETHETA(:,:,:),ZTMP2_DEVICE(:,:,:)) - !$acc kernels + !$acc kernels present_cr(ZDFDDTDZ) ZDFDDTDZ(:,:,:) = ZDFDDTDZ(:,:,:) + ZTMP2_DEVICE(:,:,:) * PFTH2(:,:,:) !$acc end kernels END IF @@ -623,12 +625,12 @@ END IF CALL MZF_DEVICE( PFWTH(:,:,:), ZTMP1_DEVICE(:,:,:) ) CALL M3_TH2_W2TH( PREDTH1(:,:,:), PREDR1(:,:,:), PD(:,:,:), PDTH_DZ(:,:,:), PLM(:,:,:), & PLEPS(:,:,:), PTKEM(:,:,:), ZTMP2_DEVICE(:,:,:) ) - !$acc kernels + !$acc kernels present_cr(ZF) ZF(:,:,:) = ZF(:,:,:) + ZTMP2_DEVICE(:,:,:) * ZTMP1_DEVICE(:,:,:) !$acc end kernels CALL D_M3_TH2_W2TH_O_DDTDZ( PREDTH1(:,:,:),PREDR1(:,:,:),PD(:,:,:),& & PLM(:,:,:),PLEPS(:,:,:),PTKEM(:,:,:),GUSERV,ZTMP2_DEVICE(:,:,:)) - !$acc kernels + !$acc kernels present_cr(ZDFDDTDZ) ZDFDDTDZ(:,:,:) = ZDFDDTDZ(:,:,:) + ZTMP2_DEVICE(:,:,:) * ZTMP1_DEVICE(:,:,:) !$acc end kernels END IF @@ -647,12 +649,12 @@ END IF IF (GFR2) THEN CALL M3_TH2_WR2( PD(:,:,:),PLEPS(:,:,:),PSQRT_TKE(:,:,:),PBLL_O_E(:,:,:),& & PEMOIST(:,:,:),PDTH_DZ(:,:,:),ZTMP1_DEVICE(:,:,:)) - !$acc kernels + !$acc kernels present_cr(ZF) ZF(:,:,:) = ZF(:,:,:) + ZTMP1_DEVICE(:,:,:) * PFR2(:,:,:) !$acc end kernels CALL D_M3_TH2_WR2_O_DDTDZ( PREDTH1(:,:,:),PREDR1(:,:,:),PD(:,:,:),& & PLEPS(:,:,:),PSQRT_TKE(:,:,:),PBLL_O_E(:,:,:),PEMOIST(:,:,:),PDTH_DZ(:,:,:),ZTMP2_DEVICE(:,:,:)) - !$acc kernels + !$acc kernels present_cr(ZDFDDTDZ) ZDFDDTDZ(:,:,:) = ZDFDDTDZ(:,:,:) + ZTMP2_DEVICE(:,:,:) * PFR2(:,:,:) !$acc end kernels END IF @@ -673,10 +675,10 @@ END IF & PEMOIST(:,:,:),PDTH_DZ(:,:,:),ZTMP2_DEVICE(:,:,:)) CALL D_M3_TH2_W2R_O_DDTDZ( PREDTH1(:,:,:),PREDR1(:,:,:),PD(:,:,:),& & PLM(:,:,:),PLEPS(:,:,:),PTKEM(:,:,:),PBLL_O_E(:,:,:),PEMOIST(:,:,:),PDTH_DZ(:,:,:),ZTMP3_DEVICE(:,:,:)) - !$acc kernels + !$acc kernels present_cr(ZF) ZF(:,:,:) = ZF(:,:,:) + ZTMP2_DEVICE(:,:,:) * ZTMP1_DEVICE(:,:,:) !$acc end kernels - !$acc kernels + !$acc kernels present_cr(ZDFDDTDZ) ZDFDDTDZ(:,:,:) = ZDFDDTDZ(:,:,:) + ZTMP3_DEVICE(:,:,:) * ZTMP1_DEVICE(:,:,:) !$acc end kernels END IF @@ -694,12 +696,12 @@ END IF IF (GFTHR) THEN CALL M3_TH2_WTHR( PREDR1(:,:,:),PD(:,:,:),PLEPS(:,:,:),PSQRT_TKE(:,:,:),& & PBLL_O_E(:,:,:),PEMOIST(:,:,:),PDTH_DZ(:,:,:),ZTMP1_DEVICE(:,:,:)) - !$acc kernels + !$acc kernels present_cr(ZF) ZF(:,:,:) = ZF(:,:,:) + ZTMP1_DEVICE(:,:,:) * PFTHR(:,:,:) !$acc end kernels CALL D_M3_TH2_WTHR_O_DDTDZ( PREDTH1(:,:,:),PREDR1(:,:,:),& & PD(:,:,:),PLEPS(:,:,:),PSQRT_TKE(:,:,:),PBLL_O_E(:,:,:),PEMOIST(:,:,:),PDTH_DZ(:,:,:),ZTMP2_DEVICE(:,:,:)) - !$acc kernels + !$acc kernels present_cr(ZDFDDTDZ) ZDFDDTDZ(:,:,:) = ZDFDDTDZ(:,:,:) + ZTMP2_DEVICE(:,:,:) * PFTHR(:,:,:) !$acc end kernels END IF @@ -715,7 +717,7 @@ END IF ! *DZM(PTHLP(:,:,:) - PTHLM(:,:,:)) / PDZZ(:,:,:) ) & + PIMPL * ZDFDDTDZ(:,:,:) * MZF(DZM(PTHLP(:,:,:) - PTHLM(:,:,:)) / PDZZ(:,:,:) ) #else - !$acc kernels + !$acc kernels present_cr(ZTMP1_DEVICE) ZTMP1_DEVICE(:,:,:) = PTHLP(:,:,:) - PTHLM(:,:,:) !$acc end kernels CALL DZM_DEVICE( ZTMP1_DEVICE(:,:,:), ZTMP2_DEVICE(:,:,:) ) @@ -729,7 +731,8 @@ END IF !$acc end kernels CALL MZF_DEVICE( ZTMP3_DEVICE(:,:,:), ZTMP4_DEVICE(:,:,:) ) #ifdef MNH_COMPILER_CCE - !$acc kernels present(ZFLXZ) + !PASVSCAL$acc kernels present(ZFLXZ) + !$acc kernels #else !$acc kernels #endif @@ -740,10 +743,12 @@ END IF ZFLXZ(JI,JJ,JK) = ZF(JI,JJ,JK) & + PIMPL * ZDFDDTDZ(JI,JJ,JK) * ZTMP4_DEVICE(JI,JJ,JK) END DO +!$acc end kernels #endif ! ! special case near the ground ( uncentred gradient ) #ifndef MNH_BITREP + !$acc kernels present_cr(ZFLXZ) ZFLXZ(:,:,IKB) = XCTV * PPHI3(:,:,IKB+KKL) * PLM(:,:,IKB) & * PLEPS(:,:,IKB) & *( PEXPL * & @@ -755,7 +760,9 @@ END IF +ZCOEFF(:,:,IKB+KKL )*PTHLP(:,:,IKB+KKL ) & +ZCOEFF(:,:,IKB )*PTHLP(:,:,IKB ) )**2 & ) + !$acc end kernels #else + !$acc kernels #ifdef MNH_COMPILER_NVHPC !$acc loop independent collapse(2) #endif @@ -772,12 +779,13 @@ END IF +ZCOEFF(JI,JJ,IKB )*PTHLP(JI,JJ,IKB ) ) & ) END DO + !$acc end kernels #endif - ! + + !$acc kernels present_cr(ZFLXZ,PSIGS) ZFLXZ(:,:,KKA) = ZFLXZ(:,:,IKB) ! ZFLXZ(:,:,:) = MAX(0., ZFLXZ(:,:,:)) - ! IF (KRRL > 0) THEN #ifndef MNH_BITREP PSIGS(:,:,:) = ZFLXZ(:,:,:) * PATHETA(:,:,:)**2 @@ -820,22 +828,22 @@ END IF CALL LES_MEAN_SUBGRID( ZFLXZ(:,:,:), X_LES_SUBGRID_Thl2 ) ! CALL MZF_DEVICE( PWM(:,:,:), ZTMP1_DEVICE(:,:,:) ) - !$acc kernels + !$acc kernels present_cr(ZTMP2_DEVICE) ZTMP2_DEVICE(:,:,:) = ZTMP1_DEVICE(:,:,:)*ZFLXZ(:,:,:) !$acc end kernels CALL LES_MEAN_SUBGRID( ZTMP2_DEVICE(:,:,:), X_LES_RES_W_SBG_Thl2 ) ! - !$acc kernels + !$acc kernels present_cr(ZTMP1_DEVICE) ZTMP1_DEVICE(:,:,:) = -2.*XCTD*PSQRT_TKE(:,:,:)*ZFLXZ(:,:,:)/PLEPS(:,:,:) !$acc end kernels CALL LES_MEAN_SUBGRID( ZTMP1_DEVICE(:,:,:), X_LES_SUBGRID_DISS_Thl2 ) ! - !$acc kernels + !$acc kernels present_cr(ZTMP1_DEVICE) ZTMP1_DEVICE(:,:,:) = PETHETA(:,:,:)*ZFLXZ(:,:,:) !$acc end kernels CALL LES_MEAN_SUBGRID( ZTMP1_DEVICE(:,:,:), X_LES_SUBGRID_ThlThv ) ! - !$acc kernels + !$acc kernels present_cr(ZTMP1_DEVICE) ZTMP1_DEVICE(:,:,:) = -XA3*PBETA(:,:,:)*PETHETA(:,:,:)*ZFLXZ(:,:,:) !$acc end kernels CALL LES_MEAN_SUBGRID( ZTMP1_DEVICE(:,:,:), X_LES_SUBGRID_ThlPz, .TRUE. ) @@ -855,12 +863,12 @@ END IF #ifndef MNH_OPENACC ZF (:,:,:) = XCTV*PLM(:,:,:)*PLEPS(:,:,:)*MZF(0.5*(PPHI3(:,:,:)+PPSI3(:,:,:))*PDTH_DZ(:,:,:)*PDR_DZ(:,:,:)) #else -!$acc kernels +!$acc kernels present_cr(ZTMP1_DEVICE) !dir$ concurrent ZTMP1_DEVICE(:,:,:) = 0.5*(PPHI3(:,:,:)+PPSI3(:,:,:))*PDTH_DZ(:,:,:)*PDR_DZ(:,:,:) !$acc end kernels CALL MZF_DEVICE( ZTMP1_DEVICE(:,:,:), ZTMP2_DEVICE(:,:,:) ) -!$acc kernels +!$acc kernels #ifdef MNH_COMPILER_NVHPC !$acc loop independent collapse(3) #endif @@ -869,7 +877,7 @@ END IF END DO !$acc end kernels #endif -!$acc kernels +!$acc kernels present_cr(ZDFDDTDZ,ZDFDDRDZ) ZDFDDTDZ(:,:,:) = 0. ! this term, because of discretization, is treated separately ZDFDDRDZ(:,:,:) = 0. ! this term, because of discretization, is treated separately !$acc end kernels @@ -890,17 +898,17 @@ END IF IF (GFTH2) THEN CALL M3_THR_WTH2( PREDR1(:,:,:),PD(:,:,:),PLEPS(:,:,:),PSQRT_TKE(:,:,:),& & PBLL_O_E(:,:,:),PETHETA(:,:,:),PDR_DZ(:,:,:),ZTMP1_DEVICE(:,:,:)) - !$acc kernels + !$acc kernels present_cr(ZF) ZF(:,:,:) = ZF(:,:,:) + ZTMP1_DEVICE(:,:,:) * PFTH2(:,:,:) !$acc end kernels CALL D_M3_THR_WTH2_O_DDTDZ( PREDTH1(:,:,:),PREDR1(:,:,:),& & PD(:,:,:),PLEPS(:,:,:),PSQRT_TKE(:,:,:),PBLL_O_E(:,:,:),PETHETA(:,:,:),PDR_DZ(:,:,:),ZTMP1_DEVICE(:,:,:)) - !$acc kernels + !$acc kernels present_cr(ZDFDDTDZ) ZDFDDTDZ(:,:,:) = ZDFDDTDZ(:,:,:) + ZTMP1_DEVICE(:,:,:) * PFTH2(:,:,:) !$acc end kernels CALL D_M3_THR_WTH2_O_DDRDZ( PREDTH1(:,:,:),PREDR1(:,:,:),& & PD(:,:,:),PLEPS(:,:,:),PSQRT_TKE(:,:,:),PBLL_O_E(:,:,:),PETHETA(:,:,:),ZTMP1_DEVICE(:,:,:)) - !$acc kernels + !$acc kernels present_cr(ZDFDDRDZ) ZDFDDRDZ(:,:,:) = ZDFDDRDZ(:,:,:) + ZTMP1_DEVICE(:,:,:) * PFTH2(:,:,:) !$acc end kernels END IF @@ -921,17 +929,17 @@ END IF CALL M3_THR_W2TH( PREDR1(:,:,:),PD(:,:,:),PLM(:,:,:),PLEPS(:,:,:),PTKEM(:,:,:),& & PDR_DZ(:,:,:),ZTMP1_DEVICE(:,:,:)) CALL MZF_DEVICE( PFWTH(:,:,:), ZTMP2_DEVICE(:,:,:) ) - !$acc kernels + !$acc kernels present_cr(ZF) ZF(:,:,:) = ZF(:,:,:) + ZTMP1_DEVICE(:,:,:) * ZTMP2_DEVICE(:,:,:) !$acc end kernels CALL D_M3_THR_W2TH_O_DDTDZ( PREDTH1(:,:,:),PREDR1(:,:,:),& & PD(:,:,:),PLM(:,:,:),PLEPS(:,:,:),PTKEM(:,:,:),PBLL_O_E(:,:,:),PDR_DZ(:,:,:),PETHETA(:,:,:),ZTMP1_DEVICE(:,:,:)) - !$acc kernels + !$acc kernels present_cr(ZDFDDTDZ) ZDFDDTDZ(:,:,:) = ZDFDDTDZ(:,:,:) + ZTMP1_DEVICE(:,:,:) * ZTMP2_DEVICE(:,:,:) !$acc end kernels CALL D_M3_THR_W2TH_O_DDRDZ( PREDTH1(:,:,:),PREDR1(:,:,:),& & PD(:,:,:),PLM(:,:,:),PLEPS(:,:,:),PTKEM(:,:,:),ZTMP1_DEVICE(:,:,:)) - !$acc kernels + !$acc kernels present_cr(ZDFDDRDZ) ZDFDDRDZ(:,:,:) = ZDFDDRDZ(:,:,:) + ZTMP1_DEVICE(:,:,:) * ZTMP2_DEVICE(:,:,:) !$acc end kernels END IF @@ -951,17 +959,17 @@ END IF IF (GFR2) THEN CALL M3_THR_WR2( PREDTH1(:,:,:),PD(:,:,:),PLEPS(:,:,:),PSQRT_TKE(:,:,:),& & PBLL_O_E(:,:,:),PEMOIST(:,:,:),PDTH_DZ(:,:,:),ZTMP1_DEVICE(:,:,:)) - !$acc kernels + !$acc kernels present_cr(ZF) ZF(:,:,:) = ZF(:,:,:) + ZTMP1_DEVICE(:,:,:) * PFR2(:,:,:) !$acc end kernels CALL D_M3_THR_WR2_O_DDTDZ( PREDR1(:,:,:),PREDTH1(:,:,:),PD(:,:,:),& & PLEPS(:,:,:),PSQRT_TKE(:,:,:),PBLL_O_E(:,:,:),PEMOIST(:,:,:),ZTMP1_DEVICE(:,:,:)) - !$acc kernels + !$acc kernels present_cr(ZDFDDTDZ) ZDFDDTDZ(:,:,:) = ZDFDDTDZ(:,:,:) + ZTMP1_DEVICE(:,:,:) * PFR2(:,:,:) !$acc end kernels CALL D_M3_THR_WR2_O_DDRDZ( PREDR1(:,:,:),PREDTH1(:,:,:),PD(:,:,:),& & PLEPS(:,:,:),PSQRT_TKE(:,:,:),PBLL_O_E(:,:,:),PEMOIST(:,:,:),PDTH_DZ(:,:,:),ZTMP1_DEVICE(:,:,:)) - !$acc kernels + !$acc kernels present_cr(ZDFDDRDZ) ZDFDDRDZ(:,:,:) = ZDFDDRDZ(:,:,:) + ZTMP1_DEVICE(:,:,:) * PFR2(:,:,:) !$acc end kernels END IF @@ -982,17 +990,17 @@ END IF CALL MZF_DEVICE( PFWR(:,:,:), ZTMP1_DEVICE(:,:,:) ) CALL M3_THR_W2R( PREDTH1(:,:,:),PD(:,:,:),PLM(:,:,:),PLEPS(:,:,:),PTKEM(:,:,:),& & PDTH_DZ(:,:,:),ZTMP2_DEVICE(:,:,:)) - !$acc kernels + !$acc kernels present_cr(ZF) ZF(:,:,:) = ZF(:,:,:) + ZTMP2_DEVICE(:,:,:) * ZTMP1_DEVICE(:,:,:) !$acc end kernels CALL D_M3_THR_W2R_O_DDTDZ( PREDR1(:,:,:),PREDTH1(:,:,:),PD(:,:,:),& & PLM(:,:,:),PLEPS(:,:,:),PTKEM(:,:,:),ZTMP2_DEVICE(:,:,:)) - !$acc kernels + !$acc kernels present_cr(ZDFDDTDZ) ZDFDDTDZ(:,:,:) = ZDFDDTDZ(:,:,:) + ZTMP2_DEVICE(:,:,:) * ZTMP1_DEVICE(:,:,:) !$acc end kernels CALL D_M3_THR_W2R_O_DDRDZ( PREDR1(:,:,:),PREDTH1(:,:,:),PD(:,:,:),& & PLM(:,:,:),PLEPS(:,:,:),PTKEM(:,:,:),PBLL_O_E(:,:,:),PDTH_DZ(:,:,:),PEMOIST(:,:,:),ZTMP2_DEVICE(:,:,:)) - !$acc kernels + !$acc kernels present_cr(ZDFDDRDZ) ZDFDDRDZ(:,:,:) = ZDFDDRDZ(:,:,:) + ZTMP2_DEVICE(:,:,:) * ZTMP1_DEVICE(:,:,:) !$acc end kernels END IF @@ -1012,17 +1020,17 @@ END IF IF (GFTHR) THEN CALL M3_THR_WTHR( PREDTH1(:,:,:),PREDR1(:,:,:),PD(:,:,:),PLEPS(:,:,:),& & PSQRT_TKE(:,:,:),ZTMP1_DEVICE(:,:,:)) - !$acc kernels + !$acc kernels present_cr(ZF) ZF(:,:,:) = ZF(:,:,:) + ZTMP1_DEVICE(:,:,:) * PFTHR(:,:,:) !$acc end kernels CALL D_M3_THR_WTHR_O_DDTDZ( PREDTH1(:,:,:),PREDR1(:,:,:),& & PD(:,:,:),PLEPS(:,:,:),PSQRT_TKE(:,:,:),PBLL_O_E(:,:,:),PETHETA(:,:,:),ZTMP1_DEVICE(:,:,:)) - !$acc kernels + !$acc kernels present_cr(ZDFDDTDZ) ZDFDDTDZ(:,:,:) = ZDFDDTDZ(:,:,:) + ZTMP1_DEVICE(:,:,:) * PFTHR(:,:,:) !$acc end kernels CALL D_M3_THR_WTHR_O_DDRDZ( PREDR1(:,:,:),PREDTH1(:,:,:),& & PD(:,:,:),PLEPS(:,:,:),PSQRT_TKE(:,:,:),PBLL_O_E(:,:,:),PEMOIST(:,:,:),ZTMP1_DEVICE(:,:,:)) - !$acc kernels + !$acc kernels present_cr(ZDFDDRDZ) ZDFDDRDZ(:,:,:) = ZDFDDRDZ(:,:,:) + ZTMP1_DEVICE(:,:,:) * PFTHR(:,:,:) !$acc end kernels END IF @@ -1045,13 +1053,13 @@ END IF + PIMPL * ZDFDDTDZ(:,:,:) * MZF(DZM(PTHLP(:,:,:) - PTHLM(:,:,:)) / PDZZ(:,:,:) ) & + PIMPL * ZDFDDRDZ(:,:,:) * MZF(DZM(PRP(:,:,:) - PRM(:,:,:,1)) / PDZZ(:,:,:) ) #else - !$acc kernels + !$acc kernels present_cr(ZTMP1_DEVICE,ZTMP2_DEVICE) ZTMP1_DEVICE(:,:,:) = PTHLP(:,:,:) - PTHLM(:,:,:) ZTMP2_DEVICE(:,:,:) = PRP(:,:,:) - PRM(:,:,:,1) !$acc end kernels CALL DZM_DEVICE( ZTMP1_DEVICE(:,:,:), ZTMP3_DEVICE(:,:,:) ) CALL DZM_DEVICE( ZTMP2_DEVICE(:,:,:), ZTMP4_DEVICE(:,:,:) ) - !$acc kernels + !$acc kernels #ifdef MNH_COMPILER_NVHPC !$acc loop independent collapse(3) #endif @@ -1069,7 +1077,7 @@ END IF HTURBDIM,GUSERV,ZTMP5_DEVICE(:,:,:)) ! d(phi3*drdz )/ddrdz term CALL D_PSI3DRDZ_O_DDRDZ(PPSI3(:,:,:),PREDR1(:,:,:),PREDTH1(:,:,:),PRED2R3(:,:,:),PRED2THR3(:,:,:), & HTURBDIM,GUSERV,ZTMP6_DEVICE(:,:,:)) ! d(psi3*drdz )/ddrdz term - !$acc kernels + !$acc kernels present_cr(ZTMP1_DEVICE,ZTMP8_DEVICE) ZTMP1_DEVICE(:,:,:) = PTHLP(:,:,:) - PTHLM(:,:,:) ZTMP8_DEVICE(:,:,:) = PRP(:,:,:) - PRM(:,:,:,1) !$acc end kernels @@ -1088,7 +1096,7 @@ END IF END DO !$acc end kernels !!! - !$acc kernels + !$acc kernels present_cr(ZTMP1_DEVICE,ZTMP2_DEVICE) ZTMP1_DEVICE(:,:,:) = PTHLP(:,:,:) - PTHLM(:,:,:) ZTMP2_DEVICE(:,:,:) = PRP(:,:,:) - PRM(:,:,:,1) !$acc end kernels @@ -1107,7 +1115,7 @@ END IF CALL MZF_DEVICE( ZTMP7_DEVICE(:,:,:), ZTMP3_DEVICE(:,:,:) ) CALL MZF_DEVICE( ZTMP1_DEVICE(:,:,:), ZTMP4_DEVICE(:,:,:) ) CALL MZF_DEVICE( ZTMP2_DEVICE(:,:,:), ZTMP5_DEVICE(:,:,:) ) - !$acc kernels + !$acc kernels present_cr(zflxz) #ifdef MNH_COMPILER_NVHPC !$acc loop independent collapse(3) #endif @@ -1142,9 +1150,9 @@ END IF +ZCOEFF(JI,JJ,IKB )*PRP(JI,JJ,IKB )) & ) END DO - ! +!$acc end kernels + !$acc kernels present_cr(ZFLXZ,PSIGS) ZFLXZ(:,:,KKA) = ZFLXZ(:,:,IKB) - ! IF ( KRRL > 0 ) THEN PSIGS(:,:,:) = PSIGS(:,:,:) + & 2. * PATHETA(:,:,:) * PAMOIST(:,:,:) * ZFLXZ(:,:,:) @@ -1185,32 +1193,32 @@ END IF CALL LES_MEAN_SUBGRID( ZFLXZ(:,:,:), X_LES_SUBGRID_THlRt ) ! CALL MZF_DEVICE( PWM(:,:,:), ZTMP1_DEVICE(:,:,:) ) - !$acc kernels + !$acc kernels present_cr(ZTMP2_DEVICE) ZTMP2_DEVICE(:,:,:) = ZTMP1_DEVICE(:,:,:)*ZFLXZ(:,:,:) !$acc end kernels CALL LES_MEAN_SUBGRID( ZTMP2_DEVICE(:,:,:), X_LES_RES_W_SBG_ThlRt ) ! - !$acc kernels + !$acc kernels present_cr(ZTMP1_DEVICE) ZTMP1_DEVICE(:,:,:) = -2.*XCTD*PSQRT_TKE(:,:,:)*ZFLXZ(:,:,:)/PLEPS(:,:,:) !$acc end kernels CALL LES_MEAN_SUBGRID( ZTMP1_DEVICE(:,:,:), X_LES_SUBGRID_DISS_ThlRt ) ! - !$acc kernels + !$acc kernels present_cr(ZTMP1_DEVICE) ZTMP1_DEVICE(:,:,:) = PETHETA(:,:,:)*ZFLXZ(:,:,:) !$acc end kernels CALL LES_MEAN_SUBGRID( ZTMP1_DEVICE(:,:,:), X_LES_SUBGRID_RtThv ) ! - !$acc kernels + !$acc kernels present_cr(ZTMP1_DEVICE) ZTMP1_DEVICE(:,:,:) = -XA3*PBETA(:,:,:)*PETHETA(:,:,:)*ZFLXZ(:,:,:) !$acc end kernels CALL LES_MEAN_SUBGRID( ZTMP1_DEVICE(:,:,:), X_LES_SUBGRID_RtPz, .TRUE. ) ! - !$acc kernels + !$acc kernels present_cr(ZTMP1_DEVICE) ZTMP1_DEVICE(:,:,:) = PEMOIST(:,:,:)*ZFLXZ(:,:,:) !$acc end kernels CALL LES_MEAN_SUBGRID( ZTMP1_DEVICE(:,:,:), X_LES_SUBGRID_ThlThv , .TRUE. ) ! - !$acc kernels + !$acc kernels present_cr(ZTMP1_DEVICE) ZTMP1_DEVICE(:,:,:) = -XA3*PBETA(:,:,:)*PEMOIST(:,:,:)*ZFLXZ(:,:,:) !$acc end kernels CALL LES_MEAN_SUBGRID( ZTMP1_DEVICE(:,:,:), X_LES_SUBGRID_ThlPz, .TRUE. ) @@ -1233,7 +1241,7 @@ END IF ZF (:,:,:) = XCTV*PLM(:,:,:)*PLEPS(:,:,:)*MZF(PPSI3(:,:,:)*BR_P2(PDR_DZ(:,:,:))) #endif #else - !$acc kernels + !$acc kernels !!present_cr(ZTMP1_DEVICE) #ifndef MNH_BITREP !dir$ concurrent ZTMP1_DEVICE(:,:,:) = PPSI3(:,:,:)*PDR_DZ(:,:,:)**2 @@ -1272,12 +1280,12 @@ END IF IF (GFR2) THEN CALL M3_R2_WR2( PREDR1(:,:,:),PREDTH1(:,:,:),PD(:,:,:),PLEPS(:,:,:),& & PSQRT_TKE(:,:,:),ZTMP1_DEVICE(:,:,:)) - !$acc kernels + !$acc kernels present_cr(ZF) ZF(:,:,:) = ZF(:,:,:) + ZTMP1_DEVICE(:,:,:) * PFR2(:,:,:) !$acc end kernels CALL D_M3_R2_WR2_O_DDRDZ( PREDR1(:,:,:),PREDTH1(:,:,:),& & PD(:,:,:),PLEPS(:,:,:),PSQRT_TKE(:,:,:),PBLL_O_E(:,:,:),PEMOIST(:,:,:),ZTMP1_DEVICE(:,:,:)) - !$acc kernels + !$acc kernels present_cr(ZDFDDRDZ) ZDFDDRDZ(:,:,:) = ZDFDDRDZ(:,:,:) + ZTMP1_DEVICE(:,:,:) * PFR2(:,:,:) !$acc end kernels END IF @@ -1296,12 +1304,12 @@ END IF CALL MZF_DEVICE( PFWR(:,:,:), ZTMP1_DEVICE(:,:,:) ) CALL M3_R2_W2R( PREDR1(:,:,:),PREDTH1(:,:,:),PD(:,:,:),PDR_DZ(:,:,:),& & PLM(:,:,:),PLEPS(:,:,:),PTKEM(:,:,:),ZTMP2_DEVICE(:,:,:)) - !$acc kernels + !$acc kernels present_cr(ZF) ZF(:,:,:) = ZF(:,:,:) + ZTMP2_DEVICE(:,:,:) * ZTMP1_DEVICE(:,:,:) !$acc end kernels CALL D_M3_R2_W2R_O_DDRDZ( PREDR1(:,:,:),PREDTH1(:,:,:),& & PD(:,:,:),PLM(:,:,:),PLEPS(:,:,:),PTKEM(:,:,:),GUSERV,ZTMP3_DEVICE(:,:,:)) - !$acc kernels + !$acc kernels present_cr(ZDFDDRDZ) ZDFDDRDZ(:,:,:) = ZDFDDRDZ(:,:,:) + ZTMP3_DEVICE(:,:,:) * ZTMP1_DEVICE(:,:,:) !$acc end kernels END IF @@ -1320,12 +1328,12 @@ END IF IF (GFTH2) THEN CALL M3_R2_WTH2( PD(:,:,:),PLEPS(:,:,:),PSQRT_TKE(:,:,:),& & PBLL_O_E(:,:,:),PETHETA(:,:,:),PDR_DZ(:,:,:),ZTMP1_DEVICE(:,:,:)) - !$acc kernels + !$acc kernels present_cr(ZF) ZF(:,:,:) = ZF(:,:,:) + ZTMP1_DEVICE(:,:,:) * PFTH2(:,:,:) !$acc end kernels CALL D_M3_R2_WTH2_O_DDRDZ( PREDR1(:,:,:),& & PREDTH1(:,:,:),PD(:,:,:),PLEPS(:,:,:),PSQRT_TKE(:,:,:),PBLL_O_E(:,:,:),PETHETA(:,:,:),PDR_DZ(:,:,:),ZTMP2_DEVICE(:,:,:)) - !$acc kernels + !$acc kernels present_cr(ZDFDDRDZ) ZDFDDRDZ(:,:,:) = ZDFDDRDZ(:,:,:) + ZTMP2_DEVICE(:,:,:) * PFTH2(:,:,:) !$acc end kernels END IF @@ -1345,12 +1353,12 @@ END IF CALL MZF_DEVICE( PFWTH(:,:,:), ZTMP1_DEVICE(:,:,:) ) CALL M3_R2_W2TH( PD(:,:,:),PLM(:,:,:),PLEPS(:,:,:),PTKEM(:,:,:),& & PBLL_O_E(:,:,:),PETHETA(:,:,:),PDR_DZ(:,:,:),ZTMP2_DEVICE(:,:,:)) - !$acc kernels + !$acc kernels present_cr(ZF) ZF(:,:,:) = ZF(:,:,:) + ZTMP2_DEVICE(:,:,:) * ZTMP1_DEVICE(:,:,:) !$acc end kernels CALL D_M3_R2_W2TH_O_DDRDZ( PREDR1(:,:,:),PREDTH1(:,:,:),& & PD(:,:,:),PLM(:,:,:),PLEPS(:,:,:),PTKEM(:,:,:),PBLL_O_E(:,:,:),PETHETA(:,:,:),PDR_DZ(:,:,:),ZTMP3_DEVICE(:,:,:)) - !$acc kernels + !$acc kernels present_cr(ZDFDDRDZ) ZDFDDRDZ(:,:,:) = ZDFDDRDZ(:,:,:) + ZTMP3_DEVICE(:,:,:) * ZTMP1_DEVICE(:,:,:) !$acc end kernels END IF @@ -1368,12 +1376,12 @@ END IF IF (GFTHR) THEN CALL M3_R2_WTHR( PREDTH1(:,:,:),PD(:,:,:),PLEPS(:,:,:),& & PSQRT_TKE(:,:,:),PBLL_O_E(:,:,:),PETHETA(:,:,:),PDR_DZ(:,:,:),ZTMP1_DEVICE(:,:,:)) - !$acc kernels + !$acc kernels present_cr(ZF) ZF(:,:,:) = ZF(:,:,:) + ZTMP1_DEVICE(:,:,:) * PFTHR(:,:,:) !$acc end kernels CALL D_M3_R2_WTHR_O_DDRDZ( PREDR1(:,:,:),PREDTH1(:,:,:),& & PD(:,:,:),PLEPS(:,:,:),PSQRT_TKE(:,:,:),PBLL_O_E(:,:,:),PETHETA(:,:,:),PDR_DZ(:,:,:),ZTMP2_DEVICE(:,:,:)) - !$acc kernels + !$acc kernels present_cr(ZDFDDRDZ) ZDFDDRDZ(:,:,:) = ZDFDDRDZ(:,:,:) + ZTMP2_DEVICE(:,:,:) * PFTHR(:,:,:) !$acc end kernels END IF @@ -1391,7 +1399,7 @@ END IF #else CALL D_PSI3DRDZ2_O_DDRDZ(PPSI3(:,:,:),PREDR1(:,:,:),PREDTH1(:,:,:),PRED2R3(:,:,:),PRED2THR3(:,:,:),PDR_DZ(:,:,:), & HTURBDIM,GUSERV,ZTMP1_DEVICE(:,:,:)) - !$acc kernels + !$acc kernels present_cr(ZTMP2_DEVICE) ZTMP2_DEVICE(:,:,:) = PRP(:,:,:) - PRM(:,:,:,1) !$acc end kernels CALL DZM_DEVICE( ZTMP2_DEVICE(:,:,:), ZTMP3_DEVICE(:,:,:) ) @@ -1414,7 +1422,7 @@ END IF !$acc end kernels CALL MZF_DEVICE( ZTMP2_DEVICE(:,:,:), ZTMP3_DEVICE(:,:,:) ) #ifdef MNH_COMPILER_CCE - !$acc kernels present(ZFLXZ) + !$acc kernels #else !$acc kernels #endif @@ -1426,10 +1434,12 @@ END IF + PIMPL * XCTV*PLM(JI,JJ,JK)*PLEPS(JI,JJ,JK) * ZTMP1_DEVICE(JI,JJ,JK) & + PIMPL * ZDFDDRDZ(JI,JJ,JK) * ZTMP3_DEVICE(JI,JJ,JK) END DO + !$acc end kernels #endif ! ! special case near the ground ( uncentred gradient ) #ifndef MNH_BITREP + !$acc kernels present_cr(ZFLXZ) ZFLXZ(:,:,IKB) = XCHV * PPSI3(:,:,IKB+KKL) * PLM(:,:,IKB) & * PLEPS(:,:,IKB) & *( PEXPL * & @@ -1441,7 +1451,9 @@ END IF +ZCOEFF(:,:,IKB+KKL )*PRP(:,:,IKB+KKL ) & +ZCOEFF(:,:,IKB )*PRP(:,:,IKB ))**2 & ) + !$acc end kernels #else + !$acc kernels #ifdef MNH_COMPILER_NVHPC !$acc loop independent collapse(2) #endif @@ -1458,8 +1470,10 @@ END IF +ZCOEFF(JI,JJ,IKB )*PRP(JI,JJ,IKB )) & ) END DO + !$acc end kernels #endif ! + !$acc kernels present_cr(ZFLXZ) ZFLXZ(:,:,KKA) = ZFLXZ(:,:,IKB) ! IF ( KRRL > 0 ) THEN @@ -1528,7 +1542,7 @@ END IF ! IF ( KRRL > 0 ) THEN ! Extrapolate PSIGS at the ground and at the top -!$acc kernels +!$acc kernels present_cr(PSIGS) PSIGS(:,:,KKA) = PSIGS(:,:,IKB) PSIGS(:,:,KKU) = PSIGS(:,:,IKE) PSIGS(:,:,:) = SQRT( MAX (PSIGS(:,:,:) , 1.E-12) ) diff --git a/src/MNH/turb_ver_thermo_flux.f90 b/src/MNH/turb_ver_thermo_flux.f90 index 62bff9543488fe3c66f1293995e814969a17bead..bf4f25c3eebe12f619637c6f5343249d176d59e2 100644 --- a/src/MNH/turb_ver_thermo_flux.f90 +++ b/src/MNH/turb_ver_thermo_flux.f90 @@ -703,7 +703,7 @@ IF(LHGRAD) THEN #ifdef MNH_OPENACC call Print_msg( NVERB_FATAL, 'GEN', 'TURB_VER_THERMO_FLUX', 'OpenACC: LHGRAD=T not yet implemented' ) #endif -!$acc kernels +!$acc kernels present_cr(zcld_thold) IF ( KRRL >= 1 ) THEN IF ( KRRI >= 1 ) THEN ZCLD_THOLD(:,:,:) = PRM(:,:,:,2) + PRM(:,:,:,4) @@ -929,7 +929,7 @@ ELSE ! No coupling O and A cases END IF ! IF (GOCEAN) THEN -!$acc kernels +!$acc kernels present_cr(zf) ZF(:,:,IKE) = XSSTFL(:,:) *0.5*(1. + PRHODJ(:,:,KKU) / PRHODJ(:,:,IKE)) !$acc end kernels ELSE !end ocean case (in nocoupled case) @@ -1427,13 +1427,13 @@ IF (KRR /= 0) THEN ! is taken into account in the vertical part ! IF (HTURBDIM=='3DIM') THEN -!$acc kernels present_cr(ZF) +!$acc kernels present_cr(zf) ZF(:,:,IKB) = ( PIMPL*PSFRP(:,:) + PEXPL*PSFRM(:,:) ) & * PDIRCOSZW(:,:) & * 0.5 * (1. + PRHODJ(:,:,KKA) / PRHODJ(:,:,IKB)) !$acc end kernels ELSE -!$acc kernels +!$acc kernels present_cr(zf) ZF(:,:,IKB) = ( PIMPL*PSFRP(:,:) + PEXPL*PSFRM(:,:) ) & / PDIRCOSZW(:,:) & * 0.5 * (1. + PRHODJ(:,:,KKA) / PRHODJ(:,:,IKB)) @@ -1503,7 +1503,7 @@ END DO ! ! replace the flux by the Leonard terms above ZALT and ZCLD_THOLD IF (LHGRAD) THEN -!$acc kernels +!$acc kernels present_cr(zf_new,zflxz) WHERE ( (ZCLD_THOLD(:,:,:) >= ZCLDTHOLD ) .AND. ( ZALT(:,:,:) >= ZALTHGRAD ) ) ZFLXZ(:,:,:) = ZF_NEW(:,:,:) END WHERE @@ -1736,7 +1736,7 @@ IF ( ((OTURB_FLX .AND. tpfile%lopened) .OR. LLES_CALL) .AND. (KRRL > 0) ) THEN ZA(:,:,:) = DZM(PIMPL * PTHLP(:,:,:) + PEXPL * PTHLM(:,:,:)) / PDZZ(:,:,:) * & (-PPHI3(:,:,:)*MZM(PLM(:,:,:)*PSQRT_TKE(:,:,:))) * XCSHF #else -!$acc kernels +!$acc kernels present_cr(ztmp1_device) ZTMP1_DEVICE(:,:,:) = PIMPL * PTHLP(:,:,:) + PEXPL * PTHLM(:,:,:) !$acc end kernels CALL DZM_DEVICE( ZTMP1_DEVICE, ZTMP2_DEVICE ) diff --git a/src/Makefile b/src/Makefile index e45b99a3129f4486b8972c18b545cadb389251a5..ef8e3f15572713f0af1aa4f09e74053e70f58b58 100644 --- a/src/Makefile +++ b/src/Makefile @@ -324,7 +324,7 @@ $(ECCODES_MOD) : AEC_PATH=$(CDF_PATH) cmake ${DIR_ECCODES_SRC} -DCMAKE_INSTALL_PREFIX=${DIR_ECCODES_INSTALL} -DBUILD_SHARED_LIBS=OFF \ -DENABLE_NETCDF=OFF -DENABLE_JPG=OFF -DENABLE_PYTHON=OFF -DENABLE_EXAMPLES=OFF \ -DCMAKE_Fortran_COMPILER=$(FC) -DCMAKE_C_COMPILER=$(CC) \ - -DCMAKE_Fortran_FLAGS=$(ECCODES_FFLAGS) -DCMAKE_C_FLAGS=$(ECCODES_CFLAGS) && \ + -DCMAKE_Fortran_FLAGS=$(ECCODES_FFLAGS) -DCMAKE_C_FLAGS=$(ECCODES_CFLAGS) ${EC_CONF} && \ $(MAKE) && $(MAKE) install && $(MAKE) clean eccodes_lib_clean : @@ -524,45 +524,45 @@ ARFLAGS=r # # GENERATION OF DEPENDANCE FILES : *.D # - +SPLL ?= spll %.D:%.F90 $(CPP) $(INC) $(CPPFLAGS) $< > $(OBJDIR)/$(*F).f90 - spll $(NSOURCE) $(OBJDIR)/$(*F).f90 + $(SPLL) $(NSOURCE) $(OBJDIR)/$(*F).f90 rm -f $(OBJDIR)/$(*F).f90 %.D:%.f90 $(CPP) $(INC) $(CPPFLAGS) $< > $(OBJDIR)/$(*F).f90 - spll $(NSOURCE) $(OBJDIR)/$(*F).f90 + $(SPLL) $(NSOURCE) $(OBJDIR)/$(*F).f90 rm -f $(OBJDIR)/$(*F).f90 %.D:%.fx90 $(CPP) $(INC) $(CPPFLAGS) $< > $(OBJDIR)/$(*F).fx90 - spll $(NSOURCE) $(OBJDIR)/$(*F).fx90 + $(SPLL) $(NSOURCE) $(OBJDIR)/$(*F).fx90 rm -f $(OBJDIR)/$(*F).fx90 %.D:%.f $(CPP) $(INC) $(CPPFLAGS) $< > $(OBJDIR)/$(*F).f - spll $(NSOURCE) $(OBJDIR)/$(*F).f + $(SPLL) $(NSOURCE) $(OBJDIR)/$(*F).f rm -f $(OBJDIR)/$(*F).f # # GENERATION OF OBJET FILES : *.o # -%.o:%.f90 - echo "inc=$(INC)" - $(F90) -I$(OBJDIR) $(INC) -c $(F90FLAGS) $< - -mv $(*F).o $(OBJDIR)/. || echo OK $(*F).o +#%.o:%.f90 +# echo "inc=$(INC)" +# $(F90) -I$(OBJDIR) $(INC) -c $(F90FLAGS) $< +# -mv $(*F).o $(OBJDIR)/. || echo OK $(*F).o %.o:%.f echo "inc=$(INC)" $(F77) -I$(OBJDIR) $(INC) -c $(F77FLAGS) $< -mv $(*F).o $(OBJDIR)/. || echo OK $(*F).o -%.o:%.fx90 - cp $< $(OBJDIR)/$(*F).f - $(FX90) -I$(OBJDIR) $(INC) -c $(FX90FLAGS) $(OBJDIR)/$(*F).f - -mv $(*F).o $(OBJDIR)/. || echo OK $(*F).o +#%.o:%.fx90 +# cp $< $(OBJDIR)/$(*F).f +# $(FX90) -I$(OBJDIR) $(INC) -c $(FX90FLAGS) $(OBJDIR)/$(*F).f +# -mv $(*F).o $(OBJDIR)/. || echo OK $(*F).o %.o : %.c $(CC) $(INC) $(CFLAGS) $(CPPFLAGS_C) -c $< -o $(OBJDIR)/$(*F).o diff --git a/src/Makefile.MESONH.mk b/src/Makefile.MESONH.mk index 6ad235831de52676df58510c69e06282150f02c8..4bb42a92c93c8888e05a60633e9d031d1013450c 100644 --- a/src/Makefile.MESONH.mk +++ b/src/Makefile.MESONH.mk @@ -337,7 +337,7 @@ INC_MPI = -I$(B)$(DIR_MPI) DIR_MASTER += $(DIR_MPI) OBJS_LISTE_MASTER += mpivide.o INC += $(INC_MPI) -mpivide.o : CPPFLAGS += -DMNH_INT=$(MNH_INT) -DMNH_REAL=$(MNH_REAL) \ +mpivide.o : CPPFLAGS_C += -DMNH_INT=$(MNH_INT) -DMNH_REAL=$(MNH_REAL) \ -I$(DIR_MPI)/include VPATH += $(DIR_MPI) endif diff --git a/src/Rules.LXcray.mk b/src/Rules.LXcray.mk index cd2b5204569c4eb87a710bb4c7ebe4c1a59b5e10..2fb71c0a0aa5643e5ad788a7ab8aede026bfc13a 100644 --- a/src/Rules.LXcray.mk +++ b/src/Rules.LXcray.mk @@ -7,12 +7,16 @@ # Compiler Options # # # ########################################################## +# +# craytfn version +CFV=$(shell crayftn --version | grep -E -m1 -o ' [[:digit:]\.]{2,}( |$$)' | sed 's/\.//g' ) +# #OBJDIR_PATH=/home/escj/azertyuiopqsdfghjklm/wxcvbn/azertyuiopqsdfghjklmwxcvbn # -OPT_BASE = -hpic -emf -h acc_model=auto_async_none:no_fast_addr:no_deep_copy +OPT_BASE = -hpic -emf -h acc_model=auto_async_none:no_fast_addr:no_deep_copy -halias=none OPT_PERF0 = -O0 -g -OPT_PERF1 = -O1 -hflex_mp=intolerant -Ofp0 -hnofma -hnoomp -K trap=fp -OPT_PERF2 = -O2 -hflex_mp=intolerant -Ofp0 -hnofma -hnoomp +OPT_PERF1 = -G2 -O1 -hflex_mp=intolerant -Ofp0 -hnofma -hnoomp -K trap=fp -hipa0 +OPT_PERF2 = -G2 -O2 -hflex_mp=intolerant -Ofp0 -hnofma -hnoomp -hipa0 # OPT_NOOPENACC = -hnoacc OPT_OPENACC = -hacc @@ -64,9 +68,48 @@ OPT_NOCB = $(OPT_BASE) $(OPT_PERF2) $(OPT_OPENACC) #OBJS_REPROD= spll_mode_sum_ll.o #$(OBJS_REPROD) : OPT = $(OPT_BASE) $(OPT_PERF2) $(OPT_OPENACC) -Mvect=nosimd -Minfo=all -g #OBJS_O1_OPENACC= spll_ice4_tendencies.o spll_turb_ver_thermo_flux.o -#$(OBJS_O1_OPENACC) : OPT = $(OPT_BASE) $(OPT_PERF1) $(OPT_OPENACC) +#OBJS_O1_OPENACC= spll_rain_ice_red.o +#$(OBJS_O1_OPENACC) : OPT = $(OPT_BASE) $(OPT_PERF0) $(OPT_OPENACC) + +#OBJS_ASYNAll=spll_turb_ver_thermo_flux.o spll_turb_ver_thermo_corr.o spll_turb_ver_dyn_flux.o \ +# spll_turb_hor_uv.o spll_turb_hor_uw.o spll_turb_hor_vw.o spll_mode_turb.o\ +# spll_turb_hor_dyn_corr.o spll_turb_hor_thermo_flux.o spll_turb_hor_sv_corr.o +# spll_mode_turb.o spll_p_abs.o spll_advection_metsv.o \ +# spll_advection_uvw.o spll_resolved_cloud.o +#$(OBJS_ASYNAll) : OPT = $(OPT_BASE) $(OPT_PERF2) $(OPT_OPENACC) -h acc_model=auto_async_all + +OBJS_CONCURRENT=spll_multigrid.o spll_turb_ver_thermo_flux.o +#spll_resolved_cloud.o +$(OBJS_CONCURRENT) : OPT = $(OPT_BASE) $(OPT_PERF2) $(OPT_OPENACC) -h concurrent -rdimo + +endif +ifeq "$(OPTLEVEL)" "OPENACCNOBITREP" +#MNH_BITREP_OMP=YES +CPPFLAGS += -DMNH_OPENACC -DMNH_GPUDIRECT +OPT = $(OPT_BASE) $(OPT_PERF2) $(OPT_OPENACC) +OPT0 = $(OPT_BASE) $(OPT_PERF0) $(OPT_OPENACC) +OPT_NOCB = $(OPT_BASE) $(OPT_PERF2) $(OPT_OPENACC) +#CXXFLAGS = -fopenmp +#OBJS_REPROD= spll_mode_sum_ll.o +#$(OBJS_REPROD) : OPT = $(OPT_BASE) $(OPT_PERF2) $(OPT_OPENACC) -Mvect=nosimd -Minfo=all -g +#OBJS_O1_OPENACC= spll_ice4_tendencies.o spll_turb_ver_thermo_flux.o +#OBJS_O1_OPENACC= spll_rain_ice_red.o +#$(OBJS_O1_OPENACC) : OPT = $(OPT_BASE) $(OPT_PERF0) $(OPT_OPENACC) + +#OBJS_ASYNAll=spll_turb_ver_thermo_flux.o spll_turb_ver_thermo_corr.o spll_turb_ver_dyn_flux.o \ +# spll_turb_hor_uv.o spll_turb_hor_uw.o spll_turb_hor_vw.o spll_mode_turb.o\ +# spll_turb_hor_dyn_corr.o spll_turb_hor_thermo_flux.o spll_turb_hor_sv_corr.o +# spll_mode_turb.o spll_p_abs.o spll_advection_metsv.o \ +# spll_advection_uvw.o spll_resolved_cloud.o +#$(OBJS_ASYNAll) : OPT = $(OPT_BASE) $(OPT_PERF2) $(OPT_OPENACC) -h acc_model=auto_async_all + +OBJS_CONCURRENT=spll_multigrid.o spll_turb_ver_thermo_flux.o +# spll_resolved_cloud.o +$(OBJS_CONCURRENT) : OPT = $(OPT_BASE) $(OPT_PERF2) $(OPT_OPENACC) -h concurrent -rdimo + endif # + ifeq "$(OPTLEVEL)" "OPENACCDEFONLY" MNH_BITREP_OMP=YES CPPFLAGS += -DMNH_OPENACC -D_FAKEOPENACC @@ -137,7 +180,10 @@ CPPFLAGS_SURFEX = CPPFLAGS_SURCOUCHE += -DDEV_NULL -DMNH_COMPILER_CCE CPPFLAGS_RAD = CPPFLAGS_NEWLFI = -DSWAPIO -DLINUX -DLFI_INT=${LFI_INT} -CPPFLAGS_MNH = -DMNH -DSFX_MNH -DMNH_NO_MPI_LOGICAL48 -DMNH_COMPILER_CCE +CPPFLAGS_MNH = -DMNH=MNH -DSFX_MNH -DMNH_NO_MPI_LOGICAL48 -DMNH_COMPILER_CCE +ifeq ($(shell test $(CFV) -ge 1402 ; echo $$?),0) +CPPFLAGS_MNH += -DMNH_COMPILER_CCE_1403 +endif CPPFLAGS_MNH += -DMNH_EXPAND -DMNH_EXPAND_LOOP -DMNH_EXPAND_OPENACC CPPFLAGS_MNH += -imacros MNH_OPENACC_NV_CR.CPP ifdef VER_GA @@ -203,7 +249,16 @@ MNH_COMPRESS=yes MNH_GRIBAPI=no # NETCDF_SUPPFLAGS='-emf' -ECCODES_FFLAGS='-emf' +ECCODES_FFLAGS='-emf -hsystem_alloc' +ECCODES_CFLAGS=' -gdwarf-4 ' +EC_CONF='-DIEEE_LE=1' +CDF_CONF= CPP=cpp +HDF_CONF= CPP=cpp +# +# Bypat nvidia70 problème +ifeq "$(CRAY_ACCEL_TARGET)" "nvidia70" +CPPFLAGS_MNH += -DTARGET_NV70 +endif ########################################################## # # # Source of MESONH PACKAGE Distribution # @@ -226,10 +281,11 @@ include Makefile.MESONH.mk ########################################################## # Juan & Maud 20/03/2008 --> Ifort 10.1.008 Bug O2 optimization #OPT_PERF1 = -O1 -OBJS_O1= spll_schu.o spll_ps2str.o spll_ini_one_way_n.o spll_urban_solar_abs.o spll_mode_ekf.o +OBJS_O1= spll_schu.o spll_ps2str.o spll_ini_one_way_n.o spll_urban_solar_abs.o spll_mode_ekf.o mode_ekf.mod #spll_p_abs.o $(OBJS_O1) : OPT = $(OPT_BASE) $(OPT_PERF1) -OBJS_O0= spll_mode_gridproj.o spll_ini_dynamics.o spll_sunpos_n.o spll_average_diag.o +OBJS_O0= mode_gridproj.mod spll_mode_gridproj.o spll_ini_dynamics.o spll_sunpos_n.o spll_average_diag.o spll_write_lfifm1_for_diag.o \ + spll_write_aircraft_balloon.o spll_mode_write_les_n.o mode_write_les_n.mod modd_mnh_surfex_n.mod # spll_ground_param_n.o $(OBJS_O0) : OPT = $(OPT_BASE) $(OPT_PERF0) @@ -240,18 +296,19 @@ $(OBJS_I8) : OPT = $(OPT_BASE) $(OPT_PERF2) $(OPT_I8) endif ifeq "$(MNH_INT)" "8" -OBJS_I4=spll_modd_netcdf.o +OBJS_I4=spll_modd_netcdf.o modd_netcdf.mod $(OBJS_I4) : OPT = $(OPT_BASE_I4) endif -OBJS_LST = spll_condensation.o spll_turb_ver_thermo_flux.o spll_rotate_wind.o spll_mode_mnh_zwork.o spll_contrav_device.o \ +OBJS_LST = spll_condensation.o spll_turb_ver_thermo_flux.o spll_rotate_wind.o spll_mode_mnh_zwork.o mode_mnh_zwork.mod spll_contrav_device.o \ spll_advection_metsv.o \ spll_mym_device.o spll_myf_device.o spll_dyf_device.o \ spll_gx_m_u_device.o spll_gy_m_v_device.o \ spll_gravity.o spll_gravity_impl.o \ - spll_advection_uvw.o spll_resolved_cloud.o spll_mode_tools.o spll_mode_rain_ice_sedimentation_split.o spll_ice_adjust.o \ + spll_advection_uvw.o spll_resolved_cloud.o spll_mode_tools.o mode_tools.mod \ + spll_mode_rain_ice_sedimentation_split.o mode_rain_ice_sedimentation_split.mod spll_ice_adjust.o \ spll_countjv1d_device.o spll_countjv2d_device.o spll_countjv3d_device.o \ - spll_mode_turb.o spll_phys_param_n.o spll_model_n.o spll_tridiag_tke.o \ + spll_mode_turb.o mode_turb.mod spll_phys_param_n.o spll_model_n.o spll_tridiag_tke.o \ spll_turb_ver_thermo_corr.o spll_tke_eps_sources.o spll_turb_ver_dyn_flux.o \ spll_turb_hor_uv.o spll_turb_hor_uw.o spll_turb_hor_vw.o \ spll_turb_hor_dyn_corr.o spll_turb_hor_thermo_flux.o \ @@ -260,11 +317,20 @@ OBJS_LST = spll_condensation.o spll_turb_ver_thermo_flux.o spll_rotate_wind.o sp spll_get_halo_start_d.o spll_get_halo_stop_d.o spll_mass_leak.o \ spll_ppm_01_x.o spll_ppm_01_y.o spll_ppm_01_z.o \ spll_ppm_s0_x.o spll_ppm_s0_y.o spll_ppm_s0_z.o \ - spll_mode_rain_ice_slow.o spll_mode_rain_ice_fast_rs.o \ - spll_prandtl.o spll_mode_rain_ice_fast_rg.o spll_mode_rain_ice_fast_ri.o \ - spll_mode_rain_ice.o spll_mode_rain_warm.o \ - spll_rotate_wind.o spll_ground_param_n.o spll_advec_ppm_algo.o \ - spll_turb_hor_sv_corr.o + spll_mode_rain_ice_slow.o mode_rain_ice_slow.mod spll_mode_rain_ice_fast_rs.o mode_rain_ice_fast_rs.mod \ + spll_prandtl.o spll_mode_rain_ice_fast_rg.o mode_rain_ice_fast_rg.mod spll_mode_rain_ice_fast_ri.o mode_rain_ice_fast_ri.mod \ + spll_mode_rain_ice.o mode_rain_ice.mod spll_mode_rain_ice_warm.o mode_rain_ice_warm.mod \ + spll_ground_param_n.o spll_advec_ppm_algo.o \ + spll_turb_hor_sv_corr.o spll_rain_ice.o spll_mode_rain_ice_nucleation.o mode_rain_ice_nucleation.mod \ + spll_mode_prandtl.o mode_prandtl.mod spll_turb_ver.o spll_mode_repro_sum.o mode_repro_sum.mod \ + spll_tridiag_thermo.o spll_tridiag_wind.o spll_tridiag_w.o \ + spll_tridiag_tke.o spll_advec_weno_k_2_my.o spll_advec_weno_k_2_mx.o \ + spll_advec_weno_k_2_vy.o spll_advec_weno_k_2_ux.o spll_mxm_device.o \ + spll_mym_device.o spll_mzm_device.o -$(OBJS_LST) : OPT += -rdimo +#$(OBJS_LST) : OPT += -rdimo + +OPT += -rimo + +SPLL = spll_lst diff --git a/src/Rules.LXgfortran.mk b/src/Rules.LXgfortran.mk index 5ec39bca92b635e1da8988dd66665dfa6b9cbe03..c82bc7b12d5cab36c48444dc662e201fe7853a8b 100644 --- a/src/Rules.LXgfortran.mk +++ b/src/Rules.LXgfortran.mk @@ -97,15 +97,17 @@ LDFLAGS = $(OPT) # # preprocessing flags # -CPP = cpp -P -traditional -Wcomment +#CPP = cpp -P -traditional -Wcomment +CPP = mnh_expand # -CPPFLAGS_C = -DLITTLE_endian +CPPFLAGS_C += -DLITTLE_endian CPPFLAGS_SURFEX = CPPFLAGS_SURCOUCHE += -DDEV_NULL CPPFLAGS_RAD = CPPFLAGS_NEWLFI = -DSWAPIO -DLINUX -DLFI_INT=${LFI_INT} -CPPFLAGS_MNH = -DMNH -DSFX_MNH +CPPFLAGS_MNH = -DMNH=MNH -DSFX_MNH CPPFLAGS_MNH += -imacros MNH_OPENACC_NV_CR.CPP +CPPFLAGS_MNH += -DMNH_EXPAND ifdef VER_GA CPPFLAGS_SURCOUCHE += -DMNH_GA INC += -I${GA_ROOT}/include @@ -198,3 +200,5 @@ OBJS_O0= spll_lima_phillips_integ.o $(OBJS_O0) : OPT = $(OPT_BASE) $(OPT_PERF0) endif endif + +SPLL = spll_new diff --git a/src/Rules.LXifort.mk b/src/Rules.LXifort.mk index b667f42703360d2ee7e0d9ae062470c379bb9e05..46c66c03ae64231f28fb5e2e96776eb40e39f5ef 100644 --- a/src/Rules.LXifort.mk +++ b/src/Rules.LXifort.mk @@ -177,13 +177,17 @@ LDFLAGS = -Wl,--allow-multiple-definition -Wl,-warn-once $(PAR) -Wl,-rpath= # # preprocessing flags # -CPP = cpp -P -traditional -Wcomment +#CPP = cpp -P -traditional -Wcomment +CPP = mnh_expand # +CPPFLAGS_C += -DLITTLE_endian CPPFLAGS_SURFEX = CPPFLAGS_SURCOUCHE += -DDEV_NULL CPPFLAGS_RAD = CPPFLAGS_NEWLFI = -DSWAPIO -DLINUX -DLFI_INT=${LFI_INT} -CPPFLAGS_MNH = -DMNH -DSFX_MNH +CPPFLAGS_MNH = -DMNH=MNH -DSFX_MNH +CPPFLAGS_MNH += -imacros MNH_OPENACC_NV_CR.CPP +CPPFLAGS_MNH += -DMNH_EXPAND ifdef VER_GA CPPFLAGS_SURCOUCHE += -DMNH_GA INC += -I${GA_ROOT}/include @@ -266,3 +270,4 @@ OBJS_I4=spll_modd_netcdf.o $(OBJS_I4) : OPT = $(OPT_BASE_I4) endif +SPLL = spll_new diff --git a/src/Rules.LXnvhpc.mk b/src/Rules.LXnvhpc.mk index eb4bb1c62dc6784c2a136153c94dbf091433ca9b..d6052e0a964971986239c36eb343c2b3f556e178 100644 --- a/src/Rules.LXnvhpc.mk +++ b/src/Rules.LXnvhpc.mk @@ -200,7 +200,7 @@ CPPFLAGS_SURFEX = CPPFLAGS_SURCOUCHE += -DMNH_COMPILER_NVHPC CPPFLAGS_RAD = CPPFLAGS_NEWLFI = -DSWAPIO -DLINUX -DLFI_INT=${LFI_INT} -CPPFLAGS_MNH = -DMNH -DMNH_PGI -DSFX_MNH +CPPFLAGS_MNH = -DMNH=MNH -DMNH_PGI -DSFX_MNH CPPFLAGS_MNH += -Uvector -Upixel CPPFLAGS_MNH += -DMNH_EXPAND -DMNH_EXPAND_OPENACC #CPPFLAGS_MNH += -imacros MNH_OPENACC_NV_CR.CPP @@ -304,3 +304,5 @@ ifeq "$(MNH_INT)" "8" OBJS_I4=spll_modd_netcdf.o $(OBJS_I4) : OPT = $(OPT_BASE_I4) endif + +SPLL = spll_new diff --git a/src/Rules.LXnvhpc2202.mk b/src/Rules.LXnvhpc2202.mk index 698193af6b3245fe6d918295c3d5882b490b7708..15ef6ced160560b3f1ca44055813aef81adb69cc 100644 --- a/src/Rules.LXnvhpc2202.mk +++ b/src/Rules.LXnvhpc2202.mk @@ -22,7 +22,7 @@ TP= -tp=px # #Version of CUDA #(8.0 at least if compute capability >= 6.0) -CUDALEVEL=cuda11.4 +CUDALEVEL=cuda11.0 # #Compute capability of GPU # @@ -87,6 +87,15 @@ OPT = $(OPT_BASE) $(OPT_PERF2) OPT0 = $(OPT_BASE) $(OPT_PERF0) OPT_NOCB = $(OPT_BASE) $(OPT_PERF2) # +# +# List of Files with compilation problem in O2 +# +OBJS_REPROD= spll_mode_sum_ll.o mode_sum_ll.mod + +OBJS_O1_OPENACC= spll_ice4_tendencies.o spll_turb_ver_thermo_flux.o spll_mode_device.o mode_device.mod spll_mppdb_check3d_real_mg.o \ + spll_mppdb_check0d_real_mg.o spll_mode_turb.o mode_turb.mod spll_modd_les_n.o modd_les_n.mod +# +# ifeq "$(OPTLEVEL)" "DEBUG" OPT = -g $(OPT_BASE) $(OPT_PERF0) $(OPT_CHECK) OPT0 = -g $(OPT_BASE) $(OPT_PERF0) $(OPT_CHECK) @@ -99,7 +108,6 @@ OPT = -g $(OPT_BASE) $(OPT_MANAGED) $(OPT_PERF2) OPT0 = -g $(OPT_BASE) $(OPT_MANAGED) $(OPT_PERF0) OPT_NOCB = -g $(OPT_BASE) $(OPT_MANAGED) $(OPT_PERF2) CXXFLAGS = -g -acc -Kieee -Mnofma $(OPT_MANAGED) -OBJS_REPROD= spll_mode_sum_ll.o $(OBJS_REPROD) : OPT = $(OPT_BASE) $(OPT_MANAGED) $(OPT_PERF2) -Mvect=nosimd -Minfo=all -g endif # @@ -109,12 +117,10 @@ OPT = $(OPT_BASE) $(OPT_MANAGED) $(OPT_PERF2) OPT0 = $(OPT_BASE) $(OPT_MANAGED) $(OPT_PERF0) OPT_NOCB = $(OPT_BASE) $(OPT_MANAGED) $(OPT_PERF2) CXXFLAGS = -acc -Kieee -Mnofma $(OPT_MANAGED) -gpu=nofma -OBJS_REPROD= spll_mode_sum_ll.o $(OBJS_REPROD) : OPT = $(OPT_BASE) $(OPT_MANAGED) $(OPT_PERF2) -Mvect=nosimd -Minfo=all -OBJS_OPENACC = spll_modd_halo_d.o +OBJS_OPENACC = spll_modd_halo_d.o modd_halo_d.mod $(OBJS_OPENACC) : OPT = $(OPT_BASE) $(OPT_OPENACC) $(OPT_PERF2) -OBJS_O1_MANAGED= spll_ice4_tendencies.o spll_turb_ver_thermo_flux.o spll_turb_hor_dyn_corr.o spll_mode_turb.o -$(OBJS_O1_MANAGED) : OPT = $(OPT_BASE) $(OPT_MANAGED) $(OPT_PERF1) +$(OBJS_O1_OPENACC) : OPT = $(OPT_BASE) $(OPT_MANAGED) $(OPT_PERF1) endif # ifeq "$(OPTLEVEL)" "MULTICORE" @@ -124,16 +130,16 @@ OPT0 = $(OPT_BASE) $(OPT_MULTICORE) $(OPT_PERF0) OPT_NOCB = $(OPT_BASE) $(OPT_MULTICORE) CXXFLAGS = -acc -Kieee -Mnofma $(OPT_MULTICORE) endif -# + ifeq "$(OPTLEVEL)" "OPENACC" CPPFLAGS += -DMNH_OPENACC -DMNH_GPUDIRECT # -imacros MNH_OPENACC_SHUMAN_MACRO.CPP -OPT = -g $(OPT_BASE) $(OPT_OPENACC) $(OPT_PERF0) -OPT0 = -g $(OPT_BASE) $(OPT_OPENACC) $(OPT_PERF0) -OPT_NOCB = -g $(OPT_BASE) $(OPT_OPENACC) $(OPT_PERF0) -CXXFLAGS = -g -acc -Kieee -Mnofma $(OPT_OPENACC) -OBJS_REPROD= spll_mode_sum_ll.o -$(OBJS_REPROD) : OPT = $(OPT_BASE) $(OPT_OPENACC) $(OPT_PERF2) -Mvect=nosimd -Minfo=all -g +OPT = -gopt $(OPT_BASE) $(OPT_OPENACC) $(OPT_PERF2) +OPT0 = -gopt $(OPT_BASE) $(OPT_OPENACC) $(OPT_PERF0) +OPT_NOCB = -gopt $(OPT_BASE) $(OPT_OPENACC) $(OPT_PERF2) +CXXFLAGS = -gopt -acc -Kieee -Mnofma $(OPT_OPENACC) +$(OBJS_REPROD) : OPT = $(OPT_BASE) $(OPT_OPENACC) $(OPT_PERF2) -Mvect=nosimd -Minfo=all -gopt +$(OBJS_O1_OPENACC) : OPT = -gopt $(OPT_BASE) $(OPT_OPENACC) $(OPT_PERF1) endif ifeq "$(OPTLEVEL)" "OPENACCO2" CPPFLAGS += -DMNH_OPENACC -DMNH_GPUDIRECT @@ -141,10 +147,17 @@ OPT = $(OPT_BASE) $(OPT_OPENACC) $(OPT_PERF2) OPT0 = $(OPT_BASE) $(OPT_OPENACC) $(OPT_PERF0) OPT_NOCB = $(OPT_BASE) $(OPT_OPENACC) $(OPT_PERF2) CXXFLAGS = -acc -Kieee -Mnofma $(OPT_OPENACC) -OBJS_REPROD= spll_mode_sum_ll.o $(OBJS_REPROD) : OPT = $(OPT_BASE) $(OPT_OPENACC) $(OPT_PERF2) -Mvect=nosimd -Minfo=all -OBJS_O1_OPENACC= spll_ice4_tendencies.o spll_turb_ver_thermo_flux.o spll_mode_device.o spll_mppdb_check3d_real_mg.o \ - spll_mppdb_check0d_real_mg.o spll_mode_turb.o +$(OBJS_O1_OPENACC) : OPT = $(OPT_BASE) $(OPT_OPENACC) $(OPT_PERF1) +endif +ifeq "$(OPTLEVEL)" "OPENACCO2NOBITREP" +MNH_BITREP = NO +CPPFLAGS += -DMNH_OPENACC -DMNH_GPUDIRECT +OPT = $(OPT_BASE) $(OPT_OPENACC) $(OPT_PERF2) +OPT0 = $(OPT_BASE) $(OPT_OPENACC) $(OPT_PERF0) +OPT_NOCB = $(OPT_BASE) $(OPT_OPENACC) $(OPT_PERF2) +CXXFLAGS = -acc -Kieee -Mnofma $(OPT_OPENACC) +$(OBJS_REPROD) : OPT = $(OPT_BASE) $(OPT_OPENACC) $(OPT_PERF2) -Mvect=nosimd -Minfo=all $(OBJS_O1_OPENACC) : OPT = $(OPT_BASE) $(OPT_OPENACC) $(OPT_PERF1) endif # @@ -154,7 +167,6 @@ OPT = -g $(OPT_BASE) $(OPT_NOOPENACC) $(OPT_PERF0) OPT0 = -g $(OPT_BASE) $(OPT_NOOPENACC) $(OPT_PERF0) OPT_NOCB = -g $(OPT_BASE) $(OPT_NOOPENACC) $(OPT_PERF0) CXXFLAGS = -Kieee -Mnofma $(OPT_NOOPENACC) -OBJS_REPROD= spll_mode_sum_ll.o $(OBJS_REPROD) : OPT = -g $(OPT_BASE) $(OPT_NOOPENACC) $(OPT_PERF2) -Mvect=nosimd -Minfo=all endif # @@ -164,7 +176,6 @@ OPT = $(OPT_BASE) $(OPT_NOOPENACC) $(OPT_PERF2) OPT0 = $(OPT_BASE) $(OPT_NOOPENACC) $(OPT_PERF0) OPT_NOCB = $(OPT_BASE) $(OPT_NOOPENACC) $(OPT_PERF2) CXXFLAGS = -Kieee -Mnofma $(OPT_NOOPENACC) -OBJS_REPROD= spll_mode_sum_ll.o $(OBJS_REPROD) : OPT = $(OPT_BASE) $(OPT_NOOPENACC) $(OPT_PERF2) -Mvect=nosimd -Minfo=all -g endif # @@ -200,7 +211,7 @@ CPPFLAGS_SURFEX = CPPFLAGS_SURCOUCHE += -DMNH_COMPILER_NVHPC CPPFLAGS_RAD = CPPFLAGS_NEWLFI = -DSWAPIO -DLINUX -DLFI_INT=${LFI_INT} -CPPFLAGS_MNH = -DMNH -DMNH_PGI -DSFX_MNH +CPPFLAGS_MNH = -DMNH=MNH -DMNH_PGI -DSFX_MNH CPPFLAGS_MNH += -Uvector -Upixel CPPFLAGS_MNH += -DMNH_EXPAND -DMNH_EXPAND_OPENACC #CPPFLAGS_MNH += -imacros MNH_OPENACC_NV_CR.CPP @@ -208,7 +219,7 @@ CPPFLAGS_MNH += -DMNH_EXPAND -DMNH_EXPAND_OPENACC # BITREP flags # #if MNH_BITREP exists => compile with the BITREP library -MNH_BITREP = YES +MNH_BITREP ?= YES ifeq "$(MNH_BITREP)" "YES" CPPFLAGS_MNH += -DMNH_BITREP endif @@ -263,11 +274,12 @@ include Makefile.MESONH.mk # etc ... # # # ########################################################## -OBJS_O1 += spll_modd_isba_n.o spll_mode_construct_ll.o \ - spll_init_surf_atm_n.o spll_mode_scatter_ll.o spll_convert_patch_teb.o \ - spll_define_mask_n.o spll_del1dfield_ll.o spll_mode_fm.o spll_mode_gather_ll.o \ +ifneq "$(OPTLEVEL)" "DEBUG" +OBJS_O1 += spll_modd_isba_n.o modd_isba_n.mod spll_mode_construct_ll.o mode_construct_ll.mod \ + spll_init_surf_atm_n.o spll_mode_scatter_ll.o mode_scatter_ll.mod spll_convert_patch_teb.o \ + spll_define_mask_n.o spll_del1dfield_ll.o spll_mode_fm.o mode_fm.mod spll_mode_gather_ll.o mode_gather_ll.mod \ spll_convect_updraft.o spll_convect_updraft_shal.o \ - spll_mode_dustopt.o spll_mode_saltopt.o \ + spll_mode_dustopt.o mode_dustopt.mod spll_mode_saltopt.o mode_saltopt.mod \ spll_aeroopt_get.o spll_write_lfifm1_for_diag_supp.o spll_write_lfifm1_for_diag.o spll_write_lfifm_n.o \ #spll_unpack_1d_2d_from2d.o @@ -275,22 +287,24 @@ OBJS_O1 += spll_modd_isba_n.o spll_mode_construct_ll.o \ #spll_phys_param_n.o $(OBJS_O1) : OPT = $(OPT_BASE) $(OPT_PERF1) -OBJS_O0= spll_mode_mppdb.o \ +OBJS_O0= spll_mode_mppdb.o mode_mppdb.mod \ spll_fft55.o spll_fft.o spll_flat_invz.o \ - spll_mode_repro_sum.o \ - spll_modd_les_n.o \ - spll_default_desfm_n.o -#default_desfmn: workaround a nvhpc 21.9 bug (run stall in default_desfmn) - + spll_mode_repro_sum.o mode_repro_sum.mod \ + spll_modd_les_n.o modd_les_n.mod \ + spll_default_desfm_n.o \ + spll_modd_pack_gr_field_n.o modd_pack_gr_field_n.mod +$(OBJS_O0) : OPT = -g $(OPT_BASE) $(OPT_PERF0) +#default_desfmn: workaround a nvhpc 21.9 bug (run stall in default_desfmn) # spll_fast_terms.o # spll_modd_ch_solver_n.o \ # spll_modd_dummy_gr_field_n.o spll_modd_dyn_n.o # spll_mode_sum_ll.o -$(OBJS_O0) : OPT = -g $(OPT_BASE) $(OPT_PERF0) -OBJS_O2= spll_mode_device.o + +OBJS_O2= spll_mode_device.o mode_device.mod $(OBJS_O2) : OPT = $(OPT_BASE) $(OPT_OPENACC) $(OPT_PERF2) +endif ifneq "$(findstring 8,$(LFI_INT))" "" OBJS_I8=spll_NEWLFI_ALL.o @@ -298,6 +312,8 @@ $(OBJS_I8) : OPT = $(OPT_BASE) $(OPT_PERF2) $(OPT_I8) endif ifeq "$(MNH_INT)" "8" -OBJS_I4=spll_modd_netcdf.o +OBJS_I4=spll_modd_netcdf.o modd_netcdf.mod $(OBJS_I4) : OPT = $(OPT_BASE_I4) endif + +SPLL = spll_new diff --git a/src/ZSOLVER/advec_4th_order_aux.f90 b/src/ZSOLVER/advec_4th_order_aux.f90 new file mode 100644 index 0000000000000000000000000000000000000000..9838c4762cf61ca7e0ba265ebd579076e5cc3352 --- /dev/null +++ b/src/ZSOLVER/advec_4th_order_aux.f90 @@ -0,0 +1,728 @@ +!MNH_LIC Copyright 2005-2022 CNRS, Meteo-France and Universite Paul Sabatier +!MNH_LIC This is part of the Meso-NH software governed by the CeCILL-C licence +!MNH_LIC version 1. See LICENSE, CeCILL-C_V1-en.txt and CeCILL-C_V1-fr.txt +!MNH_LIC for details. version 1. +!----------------------------------------------------------------- +! ############################### + MODULE MODI_ADVEC_4TH_ORDER_AUX +! ############################### +! +INTERFACE +! + SUBROUTINE ADVEC_4TH_ORDER_ALGO(HLBCX, HLBCY, PFIELDT, KGRID, & + PMEANX, PMEANY,TPHALO2 ) +! +USE MODD_ARGSLIST_ll, ONLY : HALO2_ll +! +CHARACTER (LEN=4), DIMENSION(2), INTENT(IN) :: HLBCX ! X direction LBC type +CHARACTER (LEN=4), DIMENSION(2), INTENT(IN) :: HLBCY ! Y direction LBC type +! +REAL, DIMENSION(:,:,:), INTENT(OUT) :: PMEANX, PMEANY ! fluxes +REAL, DIMENSION(:,:,:), INTENT(IN) :: PFIELDT ! variable at t +INTEGER, INTENT(IN) :: KGRID ! C grid localisation +! +TYPE(HALO2_ll), POINTER :: TPHALO2 ! halo2 for the field at t +! +END SUBROUTINE ADVEC_4TH_ORDER_ALGO +! +!------------------------------------------------------------------------------- +! +#ifndef MNH_OPENACC + FUNCTION MZF4(PA) RESULT(PMZF4) +#else + SUBROUTINE MZF4(PA,PMZF4) +#endif +! +REAL, DIMENSION(:,:,:), INTENT(IN) :: PA ! variable at flux + ! side +REAL, DIMENSION(SIZE(PA,1),SIZE(PA,2),SIZE(PA,3)) :: PMZF4 ! result at mass + ! localization +! +#ifndef MNH_OPENACC + END FUNCTION MZF4 +#else + END SUBROUTINE MZF4 +#endif +! +!------------------------------------------------------------------------------- +! +#ifndef MNH_OPENACC + FUNCTION MZM4(PA) RESULT(PMZM4) +#else + SUBROUTINE MZM4(PA,PMZM4) +#endif +! +REAL, DIMENSION(:,:,:), INTENT(IN) :: PA ! variable at mass + ! localization +REAL, DIMENSION(SIZE(PA,1),SIZE(PA,2),SIZE(PA,3)) :: PMZM4 ! result at flux + ! localization +#ifndef MNH_OPENACC + END FUNCTION MZM4 +#else + END SUBROUTINE MZM4 +#endif +! +!------------------------------------------------------------------------------- +! +END INTERFACE +! +END MODULE MODI_ADVEC_4TH_ORDER_AUX +! +!------------------------------------------------------------------------------- +! +! ######################################################################## + SUBROUTINE ADVEC_4TH_ORDER_ALGO(HLBCX, HLBCY, PFIELDT, KGRID, & + PMEANX, PMEANY,TPHALO2 ) +! ######################################################################## +!! +!!**** *ADVEC_4TH_ORDER_ALGO * - routine used to compute 4th order horizontal +!! advection fluxes of 3D prognostic variables +!! +!! PURPOSE +!! ------- +!! The purpose of this routine is to compute 2sd or 4th order horizontal +!! advection fluxes of a prognostic variable. +!! +!!** METHOD +!! ------ +!! In case of cyclic LBCs, the routine returns the scalar component of the +!! advection fluxes by applying a 4th order horizontal averaging operator to +!! the prognostic variable on each grid level. In the case of open LBCs, the +!! averaging operator degenerates to a 2nd order one on the first ring +!! inside the computationnal domain. +!! The "halo2" (or the second layer of the halo) of the prognostic +!! variable is passed as argument. +!! +!! IMPLICIT ARGUMENTS +!! ------------------ +!! +!! MODULE MODD_ARGSLIST +!! HALO2LIST_ll : type for a list of "HALO2_lls" +!! +!! REFERENCE +!! --------- +!! Book2 of documentation ( routine ADVEC_4TH_ORDER ) +!! User Interface for the MesoNH Parallel Package +!! +!! AUTHOR +!! ------ +!! J.-P. Pinty * Laboratoire d'Aerologie* +!! +!! MODIFICATIONS +!! ------------- +!! Original 25/10/05 +! J. Escobar 21/03/2013: for HALOK comment all NHALO=1 test +! P. Wautelet 21/11/2019: TPHALO2 dummy argument is no longer optional +! +!------------------------------------------------------------------------------- +! +!* 0. DECLARATIONS +! ------------ +! +USE MODD_ARGSLIST_ll, ONLY: HALO2_ll +USE MODD_CONF +! +#ifdef MNH_OPENACC +USE MODE_DEVICE +#endif +use mode_ll, only: GET_INDICE_ll, LWEST_ll, LEAST_ll, LNORTH_ll, LSOUTH_ll +#ifdef MNH_OPENACC +USE MODE_MNH_ZWORK, ONLY: MNH_MEM_GET, MNH_MEM_POSITION_PIN, MNH_MEM_RELEASE +#endif +use mode_mppdb +#ifdef MNH_OPENACC +use mode_msg +#endif +! +IMPLICIT NONE +! +!* 0.1 Declarations of dummy arguments : +! +CHARACTER (LEN=4), DIMENSION(2), INTENT(IN) :: HLBCX ! X direction LBC type +CHARACTER (LEN=4), DIMENSION(2), INTENT(IN) :: HLBCY ! Y direction LBC type +! +REAL, DIMENSION(:,:,:), INTENT(OUT) :: PMEANX, PMEANY ! fluxes +REAL, DIMENSION(:,:,:), INTENT(IN) :: PFIELDT ! variable at t +INTEGER, INTENT(IN) :: KGRID ! C grid localisation +! +TYPE(HALO2_ll), POINTER :: TPHALO2 ! halo2 for the field at t +! +!* 0.2 Declarations of local variables : +! +INTEGER:: IW,IE,IS,IN,IT,IB,IWF,IEF,ISF,INF ! Coordinate of forth order diffusion area +! +INTEGER:: IIB,IJB ! Begining useful area in x,y directions +INTEGER:: IIE,IJE ! End useful area in x,y directions +! +INTEGER:: ILUOUT,IRESP ! for prints +! +! JUAN ACC +LOGICAL :: GWEST , GEAST +LOGICAL :: GSOUTH , GNORTH +REAL, DIMENSION(:,:), pointer, contiguous :: ZHALO2_WEST, ZHALO2_EAST +REAL, DIMENSION(:,:), pointer, contiguous :: ZHALO2_SOUTH, ZHALO2_NORTH +! + +!$acc data present( PMEANX, PMEANY, PFIELDT ) + +IF (MPPDB_INITIALIZED) THEN + !Check all IN arrays + CALL MPPDB_CHECK(PFIELDT,"ADVEC_4TH_ORDER_ALGO beg:PFIELDT") +END IF + +!------------------------------------------------------------------------------- +! +!* 0.3. COMPUTES THE DOMAIN DIMENSIONS +! ------------------------------ +! +! +CALL GET_INDICE_ll(IIB,IJB,IIE,IJE) +! +GWEST = LWEST_ll() +GEAST = LEAST_ll() +GSOUTH = LSOUTH_ll() +GNORTH = LNORTH_ll() +! +!------------------------------------------------------------------------------- +! +!* 0.4. INITIALIZE THE FIELDS +! --------------------- +! +!$acc kernels present(PMEANX,PMEANY) +PMEANX(:,:,:) = 0.0 +PMEANY(:,:,:) = 0.0 +!$acc end kernels +! +!------------------------------------------------------------------------------- +! +! +!* 1. CALCULATE THE NUMERICAL MEAN IN THE X DIRECTION +! ----------------------------------------------- +! +SELECT CASE ( HLBCX(1) ) ! X direction LBC type: (1) for left side +! +!* 1.1 CYCLIC CASE IN THE X DIRECTION: +! +CASE ('CYCL') ! In that case one must have HLBCX(1) == HLBCX(2) +! +#ifdef MNH_OPENACC +call Print_msg( NVERB_WARNING, 'GEN', 'ADVEC_4TH_ORDER_ALGO', 'OpenACC: HLBCX(1) AND CYCL not yet tested' ) +#endif +ZHALO2_WEST => TPHALO2%WEST +ZHALO2_EAST => TPHALO2%EAST +! +!$acc kernels present(PMEANX,ZHALO2_WEST,ZHALO2_EAST) + IW=IIB+1 + IE=IIE +! + IF(KGRID == 2) THEN + IWF=IW-1 + IEF=IE-1 + ELSE + IWF=IW + IEF=IE + END IF +! +!* lateral boundary conditions + PMEANX(IWF-1,:,:) = (7.0*( PFIELDT(IW-1,:,:)+PFIELDT(IW-2,:,:) ) - & + ( PFIELDT(IW,:,:)+ZHALO2_WEST(:,:) ) )/12.0 +! + PMEANX(IEF+1,:,:) = (7.0*( PFIELDT(IE+1,:,:)+PFIELDT(IE,:,:) ) - & + ( ZHALO2_EAST(:,:)+PFIELDT(IE-1,:,:) ) )/12.0 +! +!* inner domain + PMEANX(IWF:IEF,:,:) = (7.0*( PFIELDT(IW:IE,:,:)+PFIELDT(IW-1:IE-1,:,:) ) - & + ( PFIELDT(IW+1:IE+1,:,:)+PFIELDT(IW-2:IE-2,:,:) ) )/12.0 +!$acc end kernels +! +!!$! +!!$ +!!$ IF(NHALO == 1) THEN +!!$ PMEANX(IWF-1,:,:) = (7.0*( PFIELDT(IW-1,:,:)+PFIELDT(IW-2,:,:) ) - & +!!$ ( PFIELDT(IW,:,:)+ZPHALO2_WEST(:,:) ) )/12.0 +!!$! +!!$ PMEANX(IEF+1,:,:) = (7.0*( PFIELDT(IE+1,:,:)+PFIELDT(IE,:,:) ) - & +!!$ ( ZPHALO2_EAST(:,:)+PFIELDT(IE-1,:,:) ) )/12.0 +!!$ ENDIF +!!$! +!!$ PMEANX(IWF:IEF,:,:) = (7.0*( PFIELDT(IW:IE,:,:)+PFIELDT(IW-1:IE-1,:,:) ) - & +!!$ ( PFIELDT(IW+1:IE+1,:,:)+PFIELDT(IW-2:IE-2,:,:) ) )/12.0 +!!$! +!* 1.2 NON CYCLIC CASE IN THE X DIRECTION +! +CASE ('OPEN','WALL','NEST') +! +ZHALO2_WEST => TPHALO2%WEST +ZHALO2_EAST => TPHALO2%EAST +! +!$acc kernels present(PMEANX,ZHALO2_WEST,ZHALO2_EAST) + IF (GWEST) THEN + IF(KGRID == 2) THEN + IW=IIB+2 ! special case of C grid + ELSE + IW=IIB+1 + END IF + ELSE +!!$ IF(NHALO == 1) THEN + IW=IIB+1 +!!$ ELSE +!!$ IW=IIB +!!$ ENDIF + ENDIF +!!$ IF (GEAST .OR. NHALO == 1) THEN + IF (GEAST) THEN +! T. Maric +! IE=IIE-1 ! original + IE=IIE + ELSE + IE=IIE + END IF +! + IF(KGRID == 2) THEN + IWF=IW-1 + IEF=IE-1 + ELSE + IWF=IW + IEF=IE + END IF +! +! T. Maric. 16.1.2006. +! write(*,*)' IW, IE, IWF, IEF = ',IW, IE, IWF, IEF +! stop 'Stopping in advec_4th_order_aux.f90' +! +!* Use a second order scheme at the physical border +! + IF (GWEST) THEN + PMEANX(IWF-1,:,:) = 0.5*( PFIELDT(IW-1,:,:)+PFIELDT(IW-2,:,:) ) + ! T. Maric + ! PMEANX(1,:,:) = PMEANX(IWF-1,:,:) + ! extrapolate + !PMEANX(1,:,:) = 0.5*(3.0*PFIELDT(1,:,:) - PFIELDT(2,:,:)) +!!$ ELSE IF (NHALO == 1) THEN + ELSE + PMEANX(IWF-1,:,:) = (7.0*( PFIELDT(IW-1,:,:)+PFIELDT(IW-2,:,:) ) - & + ( PFIELDT(IW,:,:)+ZHALO2_WEST(:,:) ) )/12.0 + ENDIF +! + IF (GEAST) THEN + PMEANX(IEF+1,:,:) = 0.5*( PFIELDT(IE+1,:,:)+PFIELDT(IE,:,:) ) +!!$ ELSEIF (NHALO == 1) THEN + ELSE + PMEANX(IEF+1,:,:) = (7.0*( PFIELDT(IE+1,:,:)+PFIELDT(IE,:,:) ) - & + ( ZHALO2_EAST(:,:)+PFIELDT(IE-1,:,:) ) )/12.0 + ENDIF +! +!* Use a fourth order scheme elsewhere +! + PMEANX(IWF:IEF,:,:) = (7.0*( PFIELDT(IW:IE,:,:)+PFIELDT(IW-1:IE-1,:,:) ) - & + ( PFIELDT(IW+1:IE+1,:,:)+PFIELDT(IW-2:IE-2,:,:) ) )/12.0 +!$acc end kernels +END SELECT +! +!------------------------------------------------------------------------------- +! +!* 2. COMPUTES THE 4TH ORDER MEAN IN THE Y DIRECTION +! ---------------------------------------------- +! +IF ( .NOT. L2D ) THEN + SELECT CASE ( HLBCY(1) ) ! Y direction LBC type: (1) for left side +! +!* 2.1 CYCLIC CASE IN THE Y DIRECTION: +! + CASE ('CYCL') ! In that case one must have HLBCY(1) == HLBCY(2) +! +#ifdef MNH_OPENACC +call Print_msg( NVERB_WARNING, 'GEN', 'ADVEC_4TH_ORDER_ALGO', 'OpenACC: HLBCX(2) AND CYCL not yet tested' ) +#endif +ZHALO2_SOUTH => TPHALO2%SOUTH +ZHALO2_NORTH => TPHALO2%NORTH +! +!$acc kernels present(PMEANY,ZHALO2_SOUTH,ZHALO2_NORTH) +! +! + IS=IJB+1 + IN=IJE +! + IF(KGRID == 3) THEN + ISF=IS-1 + INF=IN-1 + ELSE + ISF=IS + INF=IN + END IF +! +!* lateral boundary conditions + PMEANY(:,ISF-1,:) = (7.0*( PFIELDT(:,IS-1,:)+PFIELDT(:,IS-2,:) ) - & + ( PFIELDT(:,IS,:)+ZHALO2_SOUTH(:,:) ) )/12.0 +! + PMEANY(:,INF+1,:) = (7.0*( PFIELDT(:,IN+1,:)+PFIELDT(:,IN,:) ) - & + ( ZHALO2_NORTH(:,:)+PFIELDT(:,IN-1,:) ) )/12.0 +! +!* inner domain + PMEANY(:,ISF:INF,:) = (7.0*( PFIELDT(:,IS:IN,:)+PFIELDT(:,IS-1:IN-1,:)) - & + ( PFIELDT(:,IS+1:IN+1,:)+PFIELDT(:,IS-2:IN-2,:) ))/12.0 +!$acc end kernels +!!$! +!!$ IF(NHALO == 1) THEN +!!$ PMEANY(:,ISF-1,:) = (7.0*( PFIELDT(:,IS,:)+PFIELDT(:,IS-1,:) ) - & +!!$ ( PFIELDT(:,IS+1,:)+ZPHALO2_SOUTH(:,:) ) )/12.0 +!!$! +!!$ PMEANY(:,ISF+1,:) = (7.0*( PFIELDT(:,IS,:)+PFIELDT(:,IS-1,:) ) - & +!!$ ( ZPHALO2_NORTH(:,:)+PFIELDT(:,IS-2,:) ) )/12.0 +!!$ ENDIF +!!$! +!!$ PMEANY(:,ISF:INF,:) = (7.0*( PFIELDT(:,IS:IN,:)+PFIELDT(:,IS-1:IN-1,:)) - & +!!$ ( PFIELDT(:,IS+1:IN+1,:)+PFIELDT(:,IS-2:IN-2,:) ))/12.0 +!!$! +!* 2.2 NON CYCLIC CASE IN THE Y DIRECTION +! + CASE ('OPEN','WALL','NEST') +! +ZHALO2_SOUTH => TPHALO2%SOUTH +ZHALO2_NORTH => TPHALO2%NORTH +! +!$acc kernels present(PMEANY,ZHALO2_SOUTH,ZHALO2_NORTH) + IF (GSOUTH) THEN + IF(KGRID == 3) THEN + IS=IJB+2 ! special case of C grid + ELSE + IS=IJB+1 + END IF + ELSE +!!$ IF(NHALO == 1) THEN + IS=IJB+1 +!!$ ELSE +!!$ IS=IJB +!!$ ENDIF + ENDIF +!!$ IF (GNORTH .OR. NHALO == 1) THEN + IF (GNORTH) THEN +! T. Maric +! IN=IJE-1 ! original + IN=IJE + ELSE + IN=IJE + END IF +! + IF(KGRID == 3) THEN + ISF=IS-1 + INF=IN-1 + ELSE + ISF=IS + INF=IN + END IF +! +!* Use a second order scheme at the physical border +! + IF (GSOUTH) THEN + PMEANY(:,ISF-1,:) = 0.5*( PFIELDT(:,IS-1,:)+PFIELDT(:,IS-2,:) ) + ! T. Maric + ! PMEANY(:,1,:) = PMEANY(:,ISF-1,:) + ! extrapolate + !PMEANY(:,1,:) = 0.5*(3.0*PFIELDT(:,1,:) - PFIELDT(:,2,:)) +!!$ ELSEIF (NHALO == 1) THEN + ELSE +!!$ PMEANY(:,ISF-1,:) = (7.0*( PFIELDT(:,IS,:)+PFIELDT(:,IS-1,:)) - & +!!$ ( PFIELDT(:,IS+1,:)+TPHALO2%SOUTH(:,:) ))/12.0 + PMEANY(:,ISF-1,:) = (7.0*( PFIELDT(:,IS-1,:)+PFIELDT(:,IS-2,:)) - & + ( PFIELDT(:,IS,:)+ZHALO2_SOUTH(:,:) ))/12.0 + ENDIF +! + IF (GNORTH) THEN + PMEANY(:,INF+1,:) = 0.5*( PFIELDT(:,IN+1,:)+PFIELDT(:,IN,:) ) +!!$ ELSEIF (NHALO == 1) THEN + ELSE +!!$ PMEANY(:,INF+1,:) = (7.0*( PFIELDT(:,IN,:)+PFIELDT(:,IN-1,:)) - & +!!$ ( TPHALO2%NORTH(:,:)+PFIELDT(:,IN-2,:) ))/12.0 + PMEANY(:,INF+1,:) = (7.0*( PFIELDT(:,IN+1,:)+PFIELDT(:,IN,:)) - & + ( ZHALO2_NORTH(:,:)+PFIELDT(:,IN-1,:) ))/12.0 + ENDIF +! +!* Use a fourth order scheme elsewhere +! + PMEANY(:,ISF:INF,:) = (7.0*( PFIELDT(:,IS:IN,:)+PFIELDT(:,IS-1:IN-1,:)) - & + ( PFIELDT(:,IS+1:IN+1,:)+PFIELDT(:,IS-2:IN-2,:) ))/12.0 +!$acc end kernels +! + END SELECT +ELSE +!$acc kernels present(PMEANY) + PMEANY(:,:,:) = 0.0 +!$acc end kernels +ENDIF +! +IF (MPPDB_INITIALIZED) THEN + !Check all OUT arrays + CALL MPPDB_CHECK(PMEANX,"ADVEC_4TH_ORDER_ALGO end:PMEANX") + CALL MPPDB_CHECK(PMEANY,"ADVEC_4TH_ORDER_ALGO end:PMEANY") +END IF + +!$acc end data + +!------------------------------------------------------------------------------- +! +END SUBROUTINE ADVEC_4TH_ORDER_ALGO +! +!------------------------------------------------------------------------------- +! +! ################################ +#ifndef MNH_OPENACC + FUNCTION MZF4(PA) RESULT(PMZF4) +#else + SUBROUTINE MZF4(PA,PMZF4) +#endif +! ################################ +! +!!**** *MZF4* - 4th order Shuman operator : mean operator in z direction for a +!! variable at a flux side +!! +!! PURPOSE +!! ------- +!! The purpose of this function is to compute a 4th order mean value +!! along the z direction (K index) for a field PA localized at a z-flux +!! point (w point). The result is localized at a mass point. +! +!!** METHOD +!! ------ +!! The result PMZF4(:,:,k) is defined by +!! PMZF4(:,:,k)=0.5*(PA(:,:,k)+PA(:,:,k+1)) at k=1 and size(PA,3)-1 +!! PMZF4(:,:,k)=-999. at k=size(PA,3) +!! PMZF4(:,:,k)=7/12*(PA(:,:,k)+PA(:,:,k+1)) +!! -1/12*(PA(:,:,k-1)+PA(:,:,k+2)) elsewhere +!! +!! EXTERNAL +!! -------- +!! NONE +!! +!! IMPLICIT ARGUMENTS +!! ------------------ +!! NONE +!! +!! REFERENCE +!! --------- +!! Book2 of documentation of Meso-NH (SHUMAN operators) +!! Technical specifications Report of The Meso-NH (chapters 3) +!! +!! AUTHOR +!! ------ +!! J.-P. Pinty * Lab Aerologie * +!! +!! MODIFICATIONS +!! ------------- +!! Original 25/10/05 +!! +!------------------------------------------------------------------------------- +! +!* 0. DECLARATIONS +! ------------ +! +IMPLICIT NONE +! +!* 0.1 Declarations of argument and result +! +! +REAL, DIMENSION(:,:,:), INTENT(IN) :: PA ! variable at flux + ! side +REAL, DIMENSION(SIZE(PA,1),SIZE(PA,2),SIZE(PA,3)) :: PMZF4 ! result at mass + ! localization +! +!* 0.2 Declarations of local variables +! +! +INTEGER :: JK ! loop index in z direction +INTEGER :: IKU ! upper bound in z direction of PA +! +INTEGER :: IIU,IJU,IIJU ! upper bounds in the x and y directions of PA +INTEGER :: JIJ,JIJK ! running loop indexes after linearisation +INTEGER :: JIJKOR1,JIJKEND1 ! loop boundaries +INTEGER :: JIJKOR2,JIJKEND2 ! loop boundaries +INTEGER :: JIJKOR3,JIJKEND3 ! loop boundaries +! +!------------------------------------------------------------------------------- + +!$acc data present( PA, PMZF4 ) +! +!* 1. DEFINITION OF MZF4 +! ------------------ +! +IIU = SIZE(PA,1) +IJU = SIZE(PA,2) +IKU = SIZE(PA,3) +! +IIJU = IIU*IJU +! +JIJKOR1 = 1 + IIJU +JIJKEND1 = 2*IIJU +! +!$acc kernels +!CDIR NODEP +!OCL NOVREC +DO JIJK=JIJKOR1 , JIJKEND1 + PMZF4(JIJK-IIJU,1,1) = 0.5*( PA(JIJK-IIJU,1,1)+PA(JIJK,1,1) ) +END DO +! +JIJKOR2 = 1 + JIJKEND1 +JIJKEND2 = IIJU*IKU - IIJU +! +!CDIR NODEP +!OCL NOVREC +DO JIJK=JIJKOR2 , JIJKEND2 + PMZF4(JIJK-IIJU,1,1) = (7.0*( PA(JIJK,1,1)+PA(JIJK-IIJU,1,1) ) - & + ( PA(JIJK+IIJU,1,1)+PA(JIJK-2*IIJU,1,1) ) )/12.0 +END DO +! +JIJKOR3 = 1 + JIJKEND2 +JIJKEND3 = IIJU*IKU +! +!CDIR NODEP +!OCL NOVREC +DO JIJK=JIJKOR3 , JIJKEND3 + PMZF4(JIJK-IIJU,1,1) = 0.5*( PA(JIJK-IIJU,1,1)+PA(JIJK,1,1) ) +END DO +! +!CDIR NODEP +!OCL NOVREC +DO JIJ=1,IIJU + PMZF4(JIJ,1,IKU) = -999. +END DO +!$acc end kernels + +!$acc end data + +!------------------------------------------------------------------------------- +! +#ifndef MNH_OPENACC + END FUNCTION MZF4 +#else + END SUBROUTINE MZF4 +#endif +! +!------------------------------------------------------------------------------- +! +! ################################ +#ifndef MNH_OPENACC + FUNCTION MZM4(PA) RESULT(PMZM4) +#else + SUBROUTINE MZM4(PA,PMZM4) +#endif +! ################################ +! +!!**** *MZM4* - 4th order Shuman operator : mean operator in z direction for a +!! mass variable +!! +!! PURPOSE +!! ------- +!! The purpose of this function is to compute a 4th order mean value +!! along the z direction (K index) for a field PA localized at a mass +!! point. The result is localized at a z-flux point (w point). +!! +!!** METHOD +!! ------ +!! The result PMZM4(:,:,k) is defined by +!! PMZM4(:,:,k)=0.5*(PA(:,:,k)+PA(:,:,k+1)) at k=2 and size(PA,3) +!! PMZM4(:,:,k)=-999. at k=1 +!! PMZM4(:,:,k)=7/12*(PA(:,:,k)+PA(:,:,k+1)) +!! -1/12*(PA(:,:,k-1)+PA(:,:,k+2)) elsewhere +!! +!! EXTERNAL +!! -------- +!! NONE +!! +!! IMPLICIT ARGUMENTS(PMEANX,PMEANY) +!! ------------------ +!! NONE +!! +!! REFERENCE +!! --------- +!! Book2 of documentation of Meso-NH (SHUMAN operators) +!! Technical specifications Report of The Meso-NH (chapters 3) +!! +!! AUTHOR +!! ------ +!! J.-P. Pinty * Lab Aerologie * +!! +!! MODIFICATIONS +!! ------------- +!! Original 25/10/05 +!! +!------------------------------------------------------------------------------- +! +!* 0. DECLARATIONS +! ------------ +! +IMPLICIT NONE +! +!* 0.1 Declarations of argument and result +! +! +REAL, DIMENSION(:,:,:), INTENT(IN) :: PA ! variable at mass + ! localization +REAL, DIMENSION(SIZE(PA,1),SIZE(PA,2),SIZE(PA,3)) :: PMZM4 ! result at flux + ! localization +! +!* 0.2 Declarations of local variables +! +! +INTEGER :: JK ! loop index in z direction +INTEGER :: IKU ! upper bound in z direction of PA +! +INTEGER :: IIU,IJU,IIJU ! upper bounds in the x and y directions of PA +INTEGER :: JIJ,JIJK ! running loop indexes after linearisation +INTEGER :: JIJKOR1,JIJKEND1 ! loop boundaries +INTEGER :: JIJKOR2,JIJKEND2 ! loop boundaries +! +!------------------------------------------------------------------------------- + +!$acc data present( PA, PMZM4 ) +! +!* 1. DEFINITION OF MZM4 +! ------------------ +! +IIU = SIZE(PA,1) +IJU = SIZE(PA,2) +IKU = SIZE(PA,3) +! +IIJU = IIU*IJU +! +JIJKOR1 = 1 + IIJU +JIJKEND1 = JIJKOR1 + IIJU +! +!$acc kernels +!CDIR NODEP +!OCL NOVREC +DO JIJK=JIJKOR1 , JIJKEND1 + PMZM4(JIJK,1,1) = 0.5*( PA(JIJK,1,1)+PA(JIJK-IIJU,1,1) ) +END DO +! +JIJKOR2 = 1 + JIJKEND1 +JIJKEND2 = IIJU*IKU - IIJU +! +!CDIR NODEP +!OCL NOVREC +DO JIJK=JIJKOR2 , JIJKEND2 + PMZM4(JIJK,1,1) = (7.0*( PA(JIJK,1,1)+PA(JIJK-IIJU,1,1) ) - & + ( PA(JIJK+IIJU,1,1)+PA(JIJK-2*IIJU,1,1) ) )/12.0 +END DO +! +!CDIR NODEP +!OCL NOVREC +DO JIJ=1,IIJU + PMZM4(JIJ,1,IKU) = 0.5*( PA(JIJ,1,IKU)+PA(JIJ-IIJU,1,IKU) ) +END DO +! +!CDIR NODEP +!OCL NOVREC +DO JIJ=1,IIJU + PMZM4(JIJ,1,1) = -999. +END DO +!$acc end kernels + +!$acc end data + +!------------------------------------------------------------------------------- +! +#ifndef MNH_OPENACC + END FUNCTION MZM4 +#else + END SUBROUTINE MZM4 +#endif diff --git a/src/ZSOLVER/advection_metsv.f90 b/src/ZSOLVER/advection_metsv.f90 index c9cbce0aeec1af60169da227853743a30d66a052..410aece0ad7e61db207c731085a59788b524f19b 100644 --- a/src/ZSOLVER/advection_metsv.f90 +++ b/src/ZSOLVER/advection_metsv.f90 @@ -188,9 +188,14 @@ use mode_sum_ll, only: MAX_ll use mode_tools_ll, only: GET_INDICE_ll, lnorth_ll, lsouth_ll, least_ll, lwest_ll ! USE MODI_ADV_BOUNDARIES -#ifdef MNH_BITREP +#if defined(MNH_BITREP) || defined(MNH_BITREP_OMP) USE MODI_BITREP #endif +#if defined(MNH_COMPILER_CCE) && defined(MNH_BITREP_OMP) +! mnh_undef(LOOP) +! mnh_undef(OPENACC) +#endif + USE MODI_CONTRAV USE MODI_GET_HALO USE MODI_PPM_RHODJ @@ -534,27 +539,40 @@ IF (.NOT. L1D) THEN !$acc end kernels IF (LIBM) THEN !$acc kernels -#ifndef MNH_BITREP +#if !defined(MNH_BITREP) && !defined(MNH_BITREP_OMP) +!$mnh_expand_array(JI=IIB:IIE,JJ=IJB:IJE,JK=1:JKU) ZCFLU(IIB:IIE,IJB:IJE,:) = ZCFLU(IIB:IIE,IJB:IJE,:)*(1.-exp(-(XIBM_LS(IIB:IIE,IJB:IJE,:,2)/& (XRHODJ(IIB:IIE,IJB:IJE,:)/XRHODREF(IIB:IIE,IJB:IJE,:))**(1./3.))**2.)) ZCFLV(IIB:IIE,IJB:IJE,:) = ZCFLV(IIB:IIE,IJB:IJE,:)*(1.-exp(-(XIBM_LS(IIB:IIE,IJB:IJE,:,3)/& (XRHODJ(IIB:IIE,IJB:IJE,:)/XRHODREF(IIB:IIE,IJB:IJE,:))**(1./3.))**2.)) ZCFLW(IIB:IIE,IJB:IJE,:) = ZCFLW(IIB:IIE,IJB:IJE,:)*(1.-exp(-(XIBM_LS(IIB:IIE,IJB:IJE,:,4)/& (XRHODJ(IIB:IIE,IJB:IJE,:)/XRHODREF(IIB:IIE,IJB:IJE,:))**(1./3.))**2.)) +!$mnh_end_expand_array() +#else +#if defined(MNH_COMPILER_CCE) && defined(MNH_BITREP_OMP) +DO CONCURRENT (JK=1:JKU,JJ=IJB:IJE,JI=IIB:IIE) #else +!$mnh_expand_array(JI=IIB:IIE,JJ=IJB:IJE,JK=1:JKU) +#endif ZCFLU(IIB:IIE,IJB:IJE,:) = ZCFLU(IIB:IIE,IJB:IJE,:)*(1.-Br_exp(-Br_pow(XIBM_LS(IIB:IIE,IJB:IJE,:,2)/& Br_pow(XRHODJ(IIB:IIE,IJB:IJE,:)/XRHODREF(IIB:IIE,IJB:IJE,:),1./3.),2.))) ZCFLV(IIB:IIE,IJB:IJE,:) = ZCFLV(IIB:IIE,IJB:IJE,:)*(1.-Br_exp(-Br_pow(XIBM_LS(IIB:IIE,IJB:IJE,:,3)/& Br_pow(XRHODJ(IIB:IIE,IJB:IJE,:)/XRHODREF(IIB:IIE,IJB:IJE,:),1./3.),2.))) ZCFLW(IIB:IIE,IJB:IJE,:) = ZCFLW(IIB:IIE,IJB:IJE,:)*(1.-Br_exp(-Br_pow(XIBM_LS(IIB:IIE,IJB:IJE,:,4)/& Br_pow(XRHODJ(IIB:IIE,IJB:IJE,:)/XRHODREF(IIB:IIE,IJB:IJE,:),1./3.),2.))) +#if defined(MNH_COMPILER_CCE) && defined(MNH_BITREP_OMP) +END DO ! CONCURRENT +#else +!$mnh_end_expand_array() +#endif #endif WHERE (XIBM_LS(IIB:IIE,IJB:IJE,:,2).GT.(-ZIBM_EPSI)) ZCFLU(IIB:IIE,IJB:IJE,:)=0. WHERE (XIBM_LS(IIB:IIE,IJB:IJE,:,3).GT.(-ZIBM_EPSI)) ZCFLV(IIB:IIE,IJB:IJE,:)=0. WHERE (XIBM_LS(IIB:IIE,IJB:IJE,:,4).GT.(-ZIBM_EPSI)) ZCFLW(IIB:IIE,IJB:IJE,:)=0. !$acc end kernels ENDIF -#ifndef MNH_BITREP +!if !defined(MNH_BITREP) && !defined(MNH_BITREP_OMP) +#if !defined(MNH_BITREP) IF (.NOT. L2D) THEN !$acc kernels present_cr(ZCFL) ZCFL(:,:,:) = SQRT(ZCFLU(:,:,:)**2+ZCFLV(:,:,:)**2+ZCFLW(:,:,:)**2) @@ -567,17 +585,15 @@ IF (.NOT. L1D) THEN #else IF (.NOT. L2D) THEN !$acc kernels - !$acc_nv loop independent collapse(3) - DO CONCURRENT ( JI=1:JIU,JJ=1:JJU,JK=1:JKU ) + !$mnh_do_concurrent(JI=1:JIU,JJ=1:JJU,JK=1:JKU) ZCFL(JI,JJ,JK) = SQRT(BR_P2(ZCFLU(JI,JJ,JK))+BR_P2(ZCFLV(JI,JJ,JK))+BR_P2(ZCFLW(JI,JJ,JK))) - END DO + !$mnh_end_do() !$acc end kernels ELSE !$acc kernels - !$acc_nv loop independent collapse(3) - DO CONCURRENT ( JI=1:JIU,JJ=1:JJU,JK=1:JKU ) + !$mnh_do_concurrent( JI=1:JIU,JJ=1:JJU,JK=1:JKU ) ZCFL(JI,JJ,JK) = SQRT(BR_P2(ZCFLU(JI,JJ,JK))+BR_P2(ZCFLW(JI,JJ,JK))) - END DO + !$mnh_end_do() !$acc end kernels END IF #endif @@ -585,13 +601,13 @@ ELSE !$acc kernels ZCFLU(:,:,:) = 0.0 ; ZCFLV(:,:,:) = 0.0 ; ZCFLW(:,:,:) = 0.0 ZCFLW(IIB:IIE,IJB:IJE,:) = ABS(ZRWCPPM(IIB:IIE,IJB:IJE,:) * PTSTEP) -#ifndef MNH_BITREP +!if !defined(MNH_BITREP) && !defined(MNH_BITREP_OMP) +#if !defined(MNH_BITREP) ZCFL(:,:,:) = SQRT(ZCFLW(:,:,:)**2) -#else - !$acc_nv loop independent collapse(3) - DO CONCURRENT ( JI=1:JIU,JJ=1:JJU,JK=1:JKU ) +#else + !$mnh_do_concurrent(JI=1:JIU,JJ=1:JJU,JK=1:JKU ) ZCFL(JI,JJ,JK) = SQRT(BR_P2(ZCFLW(JI,JJ,JK))) - END DO + !$mnh_end_do() #endif !$acc end kernels END IF @@ -661,12 +677,24 @@ ZCFLW_MAX = MAX_ll(ZCFLW,IINFO_ll) ZCFL_MAX = MAX_ll(ZCFL,IINFO_ll) #else ! +#ifndef MNH_COMPILER_NVHPC !$acc kernels ZCFLU_MAX = MAXVAL(ZCFLU(IIB:IIE,IJB:IJE,IKB:IKE)) ZCFLV_MAX = MAXVAL(ZCFLV(IIB:IIE,IJB:IJE,IKB:IKE)) ZCFLW_MAX = MAXVAL(ZCFLW(IIB:IIE,IJB:IJE,IKB:IKE)) ZCFL_MAX = MAXVAL(ZCFL (IIB:IIE,IJB:IJE,IKB:IKE)) !$acc end kernels +#else +ZCFLU_MAX = 0.0 ; ZCFLV_MAX = 0.0 ; ZCFLW_MAX = 0.0 ; ZCFL_MAX = 0.0 +!$acc parallel reduction(max:ZCFLU_MAX,ZCFLV_MAX,ZCFLW_MAX,ZCFL_MAX) +!$mnh_do_concurrent(JI=IIB:IIE,JJ=IJB:IJE,JK=IKB:IKE) + ZCFLU_MAX = MAX(ZCFLU_MAX,ZCFLU(JI,JJ,JK)) + ZCFLV_MAX = MAX(ZCFLV_MAX,ZCFLV(JI,JJ,JK)) + ZCFLW_MAX = MAX(ZCFLW_MAX,ZCFLW(JI,JJ,JK)) + ZCFL_MAX = MAX(ZCFL_MAX,ZCFL (JI,JJ,JK)) +!$mnh_end_do() +!$acc end parallel +#endif ! CALL MPI_ALLREDUCE(MPI_IN_PLACE,ZCFLU_MAX,1,MNHREAL_MPI,MPI_MAX,NMNH_COMM_WORLD,IINFO_ll) CALL MPI_ALLREDUCE(MPI_IN_PLACE,ZCFLV_MAX,1,MNHREAL_MPI,MPI_MAX,NMNH_COMM_WORLD,IINFO_ll) @@ -749,7 +777,9 @@ ZRWCPPM(:,:,:) = ZRWCPPM(:,:,:)*ZTSTEP_PPM !dir$ concurrent ZRTHS_OTHER(:,:,:) = PRTHS(:,:,:) - PTHT(:,:,:) * PRHODJ(:,:,:) / PTSTEP !dir$ concurrent -IF (GTKE) ZRTKES_OTHER(:,:,:) = PRTKES(:,:,:) - PTKET(:,:,:) * PRHODJ(:,:,:) / PTSTEP +IF (GTKE) THEN + ZRTKES_OTHER(:,:,:) = PRTKES(:,:,:) - PTKET(:,:,:) * PRHODJ(:,:,:) / PTSTEP +END IF DO JR = 1, KRR !dir$ concurrent ZRRS_OTHER(:,:,:,JR) = PRRS(:,:,:,JR) - PRT(:,:,:,JR) * PRHODJ(:,:,:) / PTSTEP @@ -845,10 +875,18 @@ CALL PPM_RHODJ(HLBCX,HLBCY, ZRUCPPM, ZRVCPPM, ZRWCPPM, & !$acc kernels !dir$ concurrent ZTH(:,:,:) = PTHT(:,:,:) -!dir$ concurrent -IF (KRR /=0 ) ZR(:,:,:,:) = PRT(:,:,:,:) -!dir$ concurrent -IF (KSV /=0 ) ZSV(:,:,:,:) = PSVT(:,:,:,:) +!dir concurrent +IF (KRR /=0 ) THEN + !$mnh_expand_array(JI=1:JIU,JJ=1:JJU,JK=1:JKU,JR=1:KRR ) + ZR(:,:,:,:) = PRT(:,:,:,:) + !$mnh_end_expand_array() +END IF +!dir concurrent +IF (KSV /=0 ) THEN + !$mnh_expand_array(JI=1:JIU,JJ=1:JJU,JK=1:JKU,JSV=1:KSV) + ZSV(:,:,:,:) = PSVT(:,:,:,:) + !$mnh_end_expand_array() +END IF ! IF (GTKE) THEN PRTKES_ADV(:,:,:) = 0. @@ -895,15 +933,21 @@ DO JSPL=1,KSPLIT ! Tendencies of PPM ! ! acc kernels - !$acc kernels - !dir$ concurrent + !$acc kernels present_cr(PRTHS,ZRTHS_PPM) PRTHS(:,:,:) = PRTHS (:,:,:) + ZRTHS_PPM (:,:,:) / KSPLIT - !dir$ concurrent - IF (GTKE) PRTKES_ADV(:,:,:) = PRTKES_ADV(:,:,:) + ZRTKES_PPM(:,:,:) / KSPLIT - !dir$ concurrent - IF (KRR /=0) PRRS (:,:,:,:) = PRRS (:,:,:,:) + ZRRS_PPM (:,:,:,:) / KSPLIT - !dir$ concurrent - IF (KSV /=0 ) PRSVS (:,:,:,:) = PRSVS (:,:,:,:) + ZRSVS_PPM (:,:,:,:) / KSPLIT + IF (GTKE) THEN + PRTKES_ADV(:,:,:) = PRTKES_ADV(:,:,:) + ZRTKES_PPM(:,:,:) / KSPLIT + END IF + IF (KRR /=0) THEN + !$mnh_expand_array(JI=1:JIU,JJ=1:JJU,JK=1:JKU,JR=1:KRR) + PRRS (:,:,:,:) = PRRS (:,:,:,:) + ZRRS_PPM (:,:,:,:) / KSPLIT + !$mnh_end_expand_array() + END IF + IF (KSV /=0 ) THEN + !$mnh_expand_array(JI=1:JIU,JJ=1:JJU,JK=1:JKU,JSV=1:KSV) + PRSVS (:,:,:,:) = PRSVS (:,:,:,:) + ZRSVS_PPM (:,:,:,:) / KSPLIT + !$mnh_end_expand_array() + END IF !$acc end kernels ! IF (JSPL<KSPLIT) THEN @@ -920,18 +964,16 @@ DO JSPL=1,KSPLIT !$acc end kernels END IF !$acc kernels - !$acc_nv loop independent collapse(4) - DO CONCURRENT ( JI=1:JIU,JJ=1:JJU,JK=1:JKU, JR=1:KRR ) + !$mnh_do_concurrent( JI=1:JIU,JJ=1:JJU,JK=1:JKU, JR=1:KRR ) ZR(JI,JJ,JK,JR) = ZR(JI,JJ,JK,JR) + ( ZRRS_PPM(JI,JJ,JK,JR) + ZRRS_OTHER(JI,JJ,JK,JR) + PRRS_CLD(JI,JJ,JK,JR) ) & * ZTSTEP_PPM / PRHODJ(JI,JJ,JK) - END DO !CONCURRENT + !$mnh_end_do() !CONCURRENT !$acc loop seq DO JSV = 1, KSV - !$acc_nv loop independent collapse(3) - DO CONCURRENT ( JI=1:JIU,JJ=1:JJU,JK=1:JKU) + !$mnh_do_concurrent ( JI=1:JIU,JJ=1:JJU,JK=1:JKU) ZSV(JI,JJ,JK,JSV) = ZSV(JI,JJ,JK,JSV) + ( ZRSVS_PPM(JI,JJ,JK,JSV) + ZRSVS_OTHER(JI,JJ,JK,JSV) + & PRSVS_CLD(JI,JJ,JK,JSV) ) * ZTSTEP_PPM / PRHODJ(JI,JJ,JK) - END DO !CONCURRENT + !$mnh_end_do() !CONCURRENT END DO !$acc end kernels END IF diff --git a/src/ZSOLVER/advection_uvw.f90 b/src/ZSOLVER/advection_uvw.f90 index 6885bca91dfdc68586b310582e8c704d970621ed..58ba42fe5460a68a9b0c80cb3f7f3ec2d15dc89e 100644 --- a/src/ZSOLVER/advection_uvw.f90 +++ b/src/ZSOLVER/advection_uvw.f90 @@ -437,30 +437,24 @@ DO JSPL=1,ISPLIT ! Tendencies on wind ! acc update device(ZRUS_ADV,ZRVS_ADV,ZRWS_ADV) !$acc kernels -#ifdef MNH_COMPILER_NVHPC -!$acc loop independent collapse(3) -#endif -DO CONCURRENT (JI=1:IIU , JJ=1:IJU , JK=1:IKU ) +!$mnh_do_concurrent(JI=1:IIU,JJ=1:IJU,JK=1:IKU ) PRUS(JI,JJ,JK) = PRUS(JI,JJ,JK) + ZRUS_ADV(JI,JJ,JK) / ISPLIT PRVS(JI,JJ,JK) = PRVS(JI,JJ,JK) + ZRVS_ADV(JI,JJ,JK) / ISPLIT PRWS(JI,JJ,JK) = PRWS(JI,JJ,JK) + ZRWS_ADV(JI,JJ,JK) / ISPLIT -END DO +!$mnh_end_do() IF (JSPL<ISPLIT) THEN ! ! Guesses for next time splitting loop ! ! -#ifdef MNH_COMPILER_NVHPC -!$acc loop independent collapse(3) -#endif -DO CONCURRENT (JI=1:IIU , JJ=1:IJU , JK=1:IKU ) +!$mnh_do_concurrent(JI=1:IIU,JJ=1:IJU,JK=1:IKU) ZU(JI,JJ,JK) = ZU(JI,JJ,JK) + ZTSTEP / ZMXM_RHODJ(JI,JJ,JK) * & (ZRUS_OTHER(JI,JJ,JK) + ZRUS_ADV(JI,JJ,JK)) ZV(JI,JJ,JK) = ZV(JI,JJ,JK) + ZTSTEP / ZMYM_RHODJ(JI,JJ,JK) * & (ZRVS_OTHER(JI,JJ,JK) + ZRVS_ADV(JI,JJ,JK)) ZW(JI,JJ,JK) = ZW(JI,JJ,JK) + ZTSTEP / ZMZM_RHODJ(JI,JJ,JK) * & (ZRWS_OTHER(JI,JJ,JK) + ZRWS_ADV(JI,JJ,JK)) -END DO +!$mnh_end_do() END IF !$acc end kernels ! diff --git a/src/ZSOLVER/advection_uvw_cen.f90 b/src/ZSOLVER/advection_uvw_cen.f90 index 289737f14f2e05774aa9ce3acba37a52c2e1f3d7..a4055eda197f8a47a2aa2ab075f9a3c9336cf74c 100644 --- a/src/ZSOLVER/advection_uvw_cen.f90 +++ b/src/ZSOLVER/advection_uvw_cen.f90 @@ -16,7 +16,11 @@ INTERFACE PUT, PVT, PWT, & PRHODJ, PDXX, PDYY, PDZZ, PDZX, PDZY, & PRUS,PRVS, PRWS, & +#ifndef MNH_OPENACC TPHALO2MLIST ) +#else + TPHALO2_UT,TPHALO2_VT,TPHALO2_WT ) +#endif ! USE MODD_ARGSLIST_ll, ONLY : HALO2LIST_ll ! @@ -37,7 +41,11 @@ REAL, DIMENSION(:,:,:), INTENT(INOUT) :: PRUS , PRVS , PRWS ! Sources terms ! ! halo lists for 4th order advection +#ifndef MNH_OPENACC TYPE(HALO2LIST_ll), POINTER :: TPHALO2MLIST ! momentum variables +#else +TYPE(HALO2LIST_ll), POINTER :: TPHALO2_UT,TPHALO2_VT,TPHALO2_WT +#endif ! END SUBROUTINE ADVECTION_UVW_CEN ! @@ -53,7 +61,11 @@ END MODULE MODI_ADVECTION_UVW_CEN PUT, PVT, PWT, & PRHODJ, PDXX, PDYY, PDZZ, PDZX, PDZY, & PRUS,PRVS, PRWS, & +#ifndef MNH_OPENACC TPHALO2MLIST ) +#else + TPHALO2_UT,TPHALO2_VT,TPHALO2_WT ) +#endif ! ########################################################################## ! !!**** *ADVECTION * - routine to call the specialized advection routines @@ -141,7 +153,11 @@ REAL, DIMENSION(:,:,:), INTENT(INOUT) :: PRUS , PRVS , PRWS ! Sources terms ! ! halo lists for 4th order advection +#ifndef MNH_OPENACC TYPE(HALO2LIST_ll), POINTER :: TPHALO2MLIST ! momentum variables +#else +TYPE(HALO2LIST_ll), POINTER :: TPHALO2_UT,TPHALO2_VT,TPHALO2_WT +#endif ! ! !* 0.2 declarations of local variables @@ -378,11 +394,17 @@ IF (HUVW_ADV_SCHEME=='CEN2ND' ) THEN ELSEIF (HUVW_ADV_SCHEME=='CEN4TH') THEN ! CALL ADVECUVW_4TH ( HLBCX, HLBCY, ZRUCT, ZRVCT, ZRWCT, & - PUT, PVT, PWT, ZRUS, ZRVS, ZRWS, TPHALO2MLIST ) + PUT, PVT, PWT, ZRUS, ZRVS, ZRWS, & +#ifndef MNH_OPENACC + TPHALO2MLIST ) +#else + TPHALO2_UT,TPHALO2_VT,TPHALO2_WT ) +#endif ! END IF ! -!$acc kernels present( ZRUS, ZRVS, ZRWS, ZMXM_RHODJ, ZMYM_RHODJ, ZMZM_RHODJ ) +!$acc kernels present(ZRUS,ZRVS,ZRWS,ZMXM_RHODJ,ZMYM_RHODJ,ZMZM_RHODJ) & +!$acc present_cr(PRUS,PRVS,PRWS,PDUM,PDWM) ZUS(:,:,:) = ZRUS(:,:,:)/ZMXM_RHODJ(:,:,:)*2.*PTSTEP ZVS(:,:,:) = ZRVS(:,:,:)/ZMYM_RHODJ(:,:,:)*2.*PTSTEP ZWS(:,:,:) = ZRWS(:,:,:)/ZMZM_RHODJ(:,:,:)*2.*PTSTEP diff --git a/src/ZSOLVER/advecuvw_4th.f90 b/src/ZSOLVER/advecuvw_4th.f90 new file mode 100644 index 0000000000000000000000000000000000000000..5705d338cd931d3d5e538cc72088273bcbcfbb92 --- /dev/null +++ b/src/ZSOLVER/advecuvw_4th.f90 @@ -0,0 +1,412 @@ +!MNH_LIC Copyright 2005-2022 CNRS, Meteo-France and Universite Paul Sabatier +!MNH_LIC This is part of the Meso-NH software governed by the CeCILL-C licence +!MNH_LIC version 1. See LICENSE, CeCILL-C_V1-en.txt and CeCILL-C_V1-fr.txt +!MNH_LIC for details. version 1. +!----------------------------------------------------------------- +! ########################### + MODULE MODI_ADVECUVW_4TH +! ########################### +! +INTERFACE +! + SUBROUTINE ADVECUVW_4TH ( HLBCX, HLBCY, PRUCT, PRVCT, PRWCT, & + PUT, PVT, PWT, PRUS, PRVS, PRWS, & +#ifndef MNH_OPENACC + TPHALO2LIST ) +#else + TPHALO2_UT,TPHALO2_VT,TPHALO2_WT ) +#endif +! +USE MODD_ARGSLIST_ll, ONLY : HALO2LIST_ll +! +CHARACTER (LEN=4), DIMENSION(2), INTENT(IN) :: HLBCX ! X direction LBC type +CHARACTER (LEN=4), DIMENSION(2), INTENT(IN) :: HLBCY ! Y direction LBC type +! +REAL, DIMENSION(:,:,:), INTENT(IN) :: PRUCT ! contravariant +REAL, DIMENSION(:,:,:), INTENT(IN) :: PRVCT ! components +REAL, DIMENSION(:,:,:), INTENT(IN) :: PRWCT ! of momentum +! +REAL, DIMENSION(:,:,:), INTENT(IN) :: PUT, PVT, PWT ! U,V,W at t +! +REAL, DIMENSION(:,:,:), INTENT(INOUT) :: PRUS, PRVS, PRWS ! Source terms +! +#ifndef MNH_OPENACC +TYPE(HALO2LIST_ll), POINTER :: TPHALO2LIST ! list for diffusion +#else +TYPE(HALO2LIST_ll), POINTER :: TPHALO2_UT,TPHALO2_VT,TPHALO2_WT +#endif +! +END SUBROUTINE ADVECUVW_4TH +! +END INTERFACE +! +END MODULE MODI_ADVECUVW_4TH +! +! +! ###################################################################### + SUBROUTINE ADVECUVW_4TH ( HLBCX, HLBCY, PRUCT, PRVCT, PRWCT, & + PUT, PVT, PWT, PRUS, PRVS, PRWS, & +#ifndef MNH_OPENACC + TPHALO2LIST ) +#else + TPHALO2_UT,TPHALO2_VT,TPHALO2_WT ) +#endif +! ###################################################################### +! +!!**** *ADVECUVW_4TH * - routine to compute the 4th order centered +!! advection tendency of momentum (U,V,W) +!! +!! PURPOSE +!! ------- +!! The purpose of this routine is to call the ADVEC_4TH_ORDER_ALGO +!! routine for the horizontal advection and the MZM4 and MZF4 functions for +!! the vertical advection of momentum. The code is +!! parallelized and works for various boundary conditions. +!! +!!** METHOD +!! ------ +!! For each wind component the ADVECUVW_4TH routine calls +!! the ADVEC_4TH_ORDER_ALGO routine which computes the numerical advection +!! of any 3D field. +!! The following variables are passed as argument to ADVEC_4TH_ORDER_ALGO : +!! +!! -- The variable at t +!! -- The second layer of the halo of the field at t +!! -- The horizontal advection fluxes +!! -- The localisation on the model grid : +!! +!! IGRID = 1 for mass grid point +!! IGRID = 2 for U grid point +!! IGRID = 3 for V grid point +!! IGRID = 4 for W grid point +!! +!! EXTERNAL +!! -------- +!! BUDGET : Stores the different budget components +!! (not used in current version) +!! ADVEC_4TH_ORDER_ALGO : computes the horizontal advection fluxes +!! MZF4 and MZM4 : computes the vertical advection fluxes +!! +!! IMPLICIT ARGUMENTS +!! ------------------ +!! MODULE MODD_BUDGET: +!! NBUMOD : model in which budget is calculated +!! CBUTYPE : type of desired budget +!! 'CART' for cartesian box configuration +!! 'MASK' for budget zone defined by a mask +!! 'NONE' ' for no budget +!! NBUPROCCTR : process counter used for each budget variable +!! Switches for budgets activations: +!! +!! MODULE MODD_ARGSLIST +!! HALO2LIST_ll : type for a list of "HALO2_lls" +!! +!! REFERENCE +!! --------- +!! Book2 of documentation ( routine ADVECUVW_4TH ) +!! +!! AUTHOR +!! ------ +!! J.-P. Pinty * Laboratoire d'Aerologie* +!! +!! MODIFICATIONS +!! ------------- +!! Original 25/10/05 +!! Modif +!! J.Escobar 21/03/2013: for HALOK comment all NHALO=1 test +!! +!------------------------------------------------------------------------------- +! +!* 0. DECLARATIONS +! ------------ +! +USE MODD_ARGSLIST_ll, ONLY : HALO2LIST_ll +USE MODD_CONF +USE MODD_GRID_n +USE MODD_PARAMETERS + +USE MODE_ll +#ifdef MNH_OPENACC +USE MODE_MNH_ZWORK, ONLY: MNH_MEM_GET, MNH_MEM_POSITION_PIN, MNH_MEM_RELEASE +#endif +use mode_mppdb + +USE MODI_ADVEC_4TH_ORDER_AUX +#ifndef MNH_OPENACC +USE MODI_SHUMAN +#else +USE MODI_SHUMAN_DEVICE +#endif +! +IMPLICIT NONE +! +!* 0.1 Declarations of dummy arguments : +! +! +CHARACTER (LEN=4), DIMENSION(2), INTENT(IN) :: HLBCX ! X direction LBC type +CHARACTER (LEN=4), DIMENSION(2), INTENT(IN) :: HLBCY ! Y direction LBC type +! +! +REAL, DIMENSION(:,:,:), INTENT(IN) :: PRUCT ! contravariant +REAL, DIMENSION(:,:,:), INTENT(IN) :: PRVCT ! components +REAL, DIMENSION(:,:,:), INTENT(IN) :: PRWCT ! of momentum +! +REAL, DIMENSION(:,:,:), INTENT(IN) :: PUT, PVT, PWT ! Variables at t +! +REAL, DIMENSION(:,:,:), INTENT(INOUT) :: PRUS, PRVS, PRWS ! Source terms +! +#ifndef MNH_OPENACC +TYPE(HALO2LIST_ll), POINTER :: TPHALO2LIST ! list for diffusion +#else +TYPE(HALO2LIST_ll), POINTER :: TPHALO2_UT,TPHALO2_VT,TPHALO2_WT +#endif +! +!* 0.2 Declarations of local variables : +! +TYPE(HALO2LIST_ll), POINTER :: TZHALO2LIST +! +INTEGER :: IGRID ! localisation on the model grid +#ifndef MNH_OPENACC +REAL, DIMENSION(SIZE(PUT,1),SIZE(PUT,2),SIZE(PUT,3)) :: ZMEANX, ZMEANY ! fluxes +#else +INTEGER :: IIU, IJU, IKU +REAL, DIMENSION(:,:,:), POINTER, CONTIGUOUS :: ZMEANX, ZMEANY ! fluxes +! +REAL, DIMENSION(:,:,:), POINTER, CONTIGUOUS :: ZTEMP1, ZTEMP2, ZTEMP3, ZTEMP4 +#endif +! +#if 0 +#define dxm(PDXM,PA) PDXM(2:IIU,:,:) = PA(2:IIU,:,:) - PA(1:IIU-1,:,:) ; PDXM(1,:,:) = PDXM(IIU-2*JPHEXT+1,:,:) ! DXM(PDXM,PA) +#define mxf(PMXF,PA) PMXF(1:IIU-1,:,:) = 0.5*( PA(2:IIU,:,:)+PA(1:IIU-1,:,:) ) ; PMXF(IIU,:,:) = PMXF(2*JPHEXT,:,:) ! MXF(PMXF,PA) +#define mxm(PMXM,PA) PMXM(2:IIU,:,:) = 0.5*( PA(2:IIU,:,:)+PA(1:IIU-1,:,:) ) ; PMXM(1,:,:) = PMXM(IIU-2*JPHEXT+1,:,:) ! MXM(PMXM,PA) +#define dyf(PDYF,PA) PDYF(:,1:IJU-1,:) = PA(:,2:IJU,:) - PA(:,1:IJU-1,:) ; PDYF(:,IJU,:) = PDYF(:,2*JPHEXT,:) ! DYF(PDYF,PA) +#define dzf(PDZF,PA) PDZF(:,:,1:IKU-1) = PA(:,:,2:IKU) - PA(:,:,1:IKU-1) ; PDZF(:,:,IKU) = -999. ! DZF(PDZF,PA) +#define mzm4(PMZM4,PA) PMZM4(:,:,3:IKU-1) = (7.0*( PA(:,:,3:IKU-1)+PA(:,:,2:IKU-2) ) - (PA(:,:,4:IKU)+PA(:,:,1:IKU-3) ) )/12.0 ; \ + PMZM4(:,:,2) = 0.5*( PA(:,:,2)+PA(:,:,1) ) ; PMZM4(:,:,IKU) = 0.5*( PA(:,:,IKU)+PA(:,:,IKU-1) ) ; PMZM4(:,:,1) = -999. +#define mym(PMYM,PA) PMYM(:,2:IJU,:) = 0.5*( PA(:,2:IJU,:)+PA(:,1:IJU-1,:) ) ; PMYM(:,1,:) = PMYM(:,IJU-2*JPHEXT+1,:) ! MYM(PMYM,PA) +#define dxf(PDXF,PA) PDXF(1:IIU-1,:,:) = PA(2:IIU,:,:) - PA(1:IIU-1,:,:) ; PDXF(IIU,:,:) = PDXF(2*JPHEXT,:,:) ! DXF(PDXF,PA) +#define myf(PMYF,PA) PMYF(:,1:IJU-1,:) = 0.5*( PA(:,1:IJU-1,:)+PA(:,2:IJU,:) ) ; PMYF(:,IJU,:) = PMYF(:,2*JPHEXT,:) ! MYF(PMYF,PA) +#define dym(PDYM,PA) PDYM(:,2:IJU,:) = PA(:,2:IJU,:) - PA(:,1:IJU-1,:) ; PDYM(:,1,:) = PDYM(:,IJU-2*JPHEXT+1,:) ! DYM(PDYM,PA) +#define mzm(PMZM,PA) PMZM(:,:,2:IKU) = 0.5*( PA(:,:,2:IKU)+PA(:,:,1:IKU-1) ) ; PMZM(:,:,1) = -999. ! MZM(PMZM,PA) +#define mzf(PMZF,PA) PMZF(:,:,1:IKU-1) = 0.5*( PA(:,:,1:IKU-1)+PA(:,:,2:IKU) ) ; PMZF(:,:,IKU) = -999. ! MZF(PMZF,PA) +#define dzm(PDZM,PA) PDZM(:,:,2:IKU) = PA(:,:,2:IKU) - PA(:,:,1:IKU-1) ; PDZM(:,:,1) = -999. ! DZM(PDZM,PA) +#define mzf4(PMZF4,PA) PMZF4(:,:,2:IKU-2) = (7.0*( PA(:,:,3:IKU-1)+PA(:,:,2:IKU-2) ) - (PA(:,:,4:IKU)+PA(:,:,1:IKU-3) ) )/12.0 ; \ + PMZF4(:,:,1) = 0.5*( PA(:,:,2)+PA(:,:,1) ) ; PMZF4(:,:,IKU-1) = 0.5*( PA(:,:,IKU)+PA(:,:,IKU-1) ) ; PMZF4(:,:,IKU) = -999. +#endif +! +IF (MPPDB_INITIALIZED) THEN + !Check all IN arrays + CALL MPPDB_CHECK(PRUCT,"ADVECUVW_4TH beg:PRUCT") + CALL MPPDB_CHECK(PRVCT,"ADVECUVW_4TH beg:PRVCT") + CALL MPPDB_CHECK(PRWCT,"ADVECUVW_4TH beg:PRWCT") + CALL MPPDB_CHECK(PUT,"ADVECUVW_4TH beg:PUT") + CALL MPPDB_CHECK(PVT,"ADVECUVW_4TH beg:PVT") + CALL MPPDB_CHECK(PWT,"ADVECUVW_4TH beg:PWT") + !Check all INOUT arrays + CALL MPPDB_CHECK(PRUS,"ADVECUVW_4TH beg:PRUS") + CALL MPPDB_CHECK(PRVS,"ADVECUVW_4TH beg:PRVS") + CALL MPPDB_CHECK(PRWS,"ADVECUVW_4TH beg:PRWS") +END IF + +#ifdef MNH_OPENACC +IIU = SIZE( PUT, 1 ) +IJU = SIZE( PUT, 2 ) +IKU = SIZE( PUT, 3 ) + +!Pin positions in the pools of MNH memory +CALL MNH_MEM_POSITION_PIN() + +CALL MNH_MEM_GET( ZMEANX, IIU, IJU, IKU ) +CALL MNH_MEM_GET( ZMEANY, IIU, IJU, IKU ) + +CALL MNH_MEM_GET( ZTEMP1, IIU, IJU, IKU ) +CALL MNH_MEM_GET( ZTEMP2, IIU, IJU, IKU ) +CALL MNH_MEM_GET( ZTEMP3, IIU, IJU, IKU ) +CALL MNH_MEM_GET( ZTEMP4, IIU, IJU, IKU ) +#endif + +!$acc data present( PRUCT, PRVCT, PRWCT, PUT, PVT, PWT, PRUS, PRVS, PRWS, ZMEANX, ZMEANY, ZTEMP1, ZTEMP2, ZTEMP3, ZTEMP4 ) + +!------------------------------------------------------------------------------- +! +!* 2. CALL THE ADVEC_4TH_ORDER_ALGO ROUTINE FOR MOMENTUM +! -------------------------------------------------- +! +IGRID = 2 +!!$IF(NHALO == 1) THEN +#ifndef MNH_OPENACC + TZHALO2LIST => TPHALO2LIST +#else + TZHALO2LIST => TPHALO2_UT +#endif + CALL ADVEC_4TH_ORDER_ALGO(HLBCX, HLBCY, PUT, IGRID, ZMEANX, ZMEANY, & + TZHALO2LIST%HALO2 ) +!!$ELSE +!!$ CALL ADVEC_4TH_ORDER_ALGO(HLBCX, HLBCY, PUT, IGRID, ZMEANX, ZMEANY) +!!$ENDIF +! +#ifndef MNH_OPENACC +PRUS(:,:,:) = PRUS(:,:,:) & + -DXM( MXF(PRUCT(:,:,:))*ZMEANX(:,:,:) ) +! +PRUS(:,:,:) = PRUS(:,:,:) & + -DYF( MXM(PRVCT(:,:,:))*ZMEANY(:,:,:) ) +! +PRUS(:,:,:) = PRUS(:,:,:) & + -DZF( MXM(PRWCT(:,:,:))*MZM4(PUT(:,:,:)) ) +#else +call mxf_device(PRUCT,ZTEMP1) +!$acc kernels +ZTEMP2 = ZTEMP1 * ZMEANX +!$acc end kernels +call dxm_device(ZTEMP2,ZTEMP3) +!$acc kernels +PRUS(:,:,:) = PRUS(:,:,:) - ZTEMP3 +!$acc end kernels +! +call mxm_device(PRVCT,ZTEMP1) +!$acc kernels +ZTEMP2 = ZTEMP1 * ZMEANY +!$acc end kernels +call dyf_device(ZTEMP2,ZTEMP3) +!$acc kernels +PRUS(:,:,:) = PRUS(:,:,:) - ZTEMP3 +!$acc end kernels +! +call MZM4( PUT , ZTEMP1 ) +call mxm_device(PRWCT,ZTEMP2) +!$acc kernels +ZTEMP3 = ZTEMP1 * ZTEMP2 +!$acc end kernels +call dzf_device( ZTEMP3, ZTEMP4 ) +!$acc kernels +PRUS(:,:,:) = PRUS(:,:,:) - ZTEMP4 +!$acc end kernels +#endif +! +! +IGRID = 3 +!!$IF(NHALO == 1) THEN +#ifndef MNH_OPENACC + TZHALO2LIST => TZHALO2LIST%NEXT +#else + TZHALO2LIST => TPHALO2_VT +#endif + CALL ADVEC_4TH_ORDER_ALGO(HLBCX, HLBCY, PVT, IGRID, ZMEANX, ZMEANY, & + TZHALO2LIST%HALO2 ) +!!$ELSE +!!$ CALL ADVEC_4TH_ORDER_ALGO(HLBCX, HLBCY, PVT, IGRID, ZMEANX, ZMEANY) +!!$ENDIF +! +#ifndef MNH_OPENACC +PRVS(:,:,:) = PRVS(:,:,:) & + -DXF( MYM(PRUCT(:,:,:))*ZMEANX(:,:,:) ) +! +PRVS(:,:,:) = PRVS(:,:,:) & + -DYM( MYF(PRVCT(:,:,:))*ZMEANY(:,:,:) ) +! +PRVS(:,:,:) = PRVS(:,:,:) & + -DZF( MYM(PRWCT(:,:,:))*MZM4(PVT(:,:,:)) ) +#else +call mym_device(PRUCT,ZTEMP1) +!$acc kernels +ZTEMP2 = ZTEMP1 * ZMEANX +!$acc end kernels +call dxf_device(ZTEMP2,ZTEMP3) +!$acc kernels +PRVS(:,:,:) = PRVS(:,:,:) - ZTEMP3 +!$acc end kernels +! +call myf_device(PRVCT,ZTEMP1) +!$acc kernels +ZTEMP2 = ZTEMP1 * ZMEANY +!$acc end kernels +call dym_device(ZTEMP2,ZTEMP3) +!$acc kernels +PRVS(:,:,:) = PRVS(:,:,:) - ZTEMP3 +!$acc end kernels +! +call mym_device(PRWCT,ZTEMP1) +CALL MZM4( PVT , ZTEMP2) +!$acc kernels +ZTEMP3 = ZTEMP1 * ZTEMP2 +!$acc end kernels +call dzf_device( ZTEMP3, ZTEMP4 ) +!$acc kernels +PRVS(:,:,:) = PRVS(:,:,:) - ZTEMP4 +!$acc end kernels +#endif +CALL MPPDB_CHECK(PRUCT,"ADVECUVW_4TH 02: PRUCT") +! +! +IGRID = 4 +! +!!$IF(NHALO == 1) THEN +#ifndef MNH_OPENACC + TZHALO2LIST => TZHALO2LIST%NEXT +#else + TZHALO2LIST => TPHALO2_WT +#endif + CALL ADVEC_4TH_ORDER_ALGO(HLBCX, HLBCY, PWT, IGRID, ZMEANX, ZMEANY, & + TZHALO2LIST%HALO2 ) +!!$ELSE +!!$ CALL ADVEC_4TH_ORDER_ALGO(HLBCX, HLBCY, PWT, IGRID, ZMEANX, ZMEANY) +!!$ENDIF +! +#ifndef MNH_OPENACC +PRWS(:,:,:) = PRWS(:,:,:) & + -DXF( MZM(PRUCT(:,:,:))*ZMEANX(:,:,:) ) +! +PRWS(:,:,:) = PRWS(:,:,:) & + -DYF( MZM(PRVCT(:,:,:))*ZMEANY(:,:,:) ) +! +PRWS(:,:,:) = PRWS(:,:,:) & + -DZM( MZF(PRWCT(:,:,:))*MZF4(PWT(:,:,:)) ) +#else +call mzm_device(PRUCT,ZTEMP1) +!$acc kernels +ZTEMP2 = ZTEMP1 * ZMEANX +!$acc end kernels +call dxf_device(ZTEMP2,ZTEMP3) +!$acc kernels +PRWS(:,:,:) = PRWS(:,:,:) - ZTEMP3 +!$acc end kernels +! +call mzm_device(PRVCT,ZTEMP1) +!$acc kernels +ZTEMP2 = ZTEMP1 * ZMEANY +!$acc end kernels +call dyf_device(ZTEMP2,ZTEMP3) +!$acc kernels +PRWS(:,:,:) = PRWS(:,:,:) - ZTEMP3 +!$acc end kernels +! +call mzf_device( PRWCT, ZTEMP1 ) +CALL MZF4( PWT , ZTEMP2 ) +!$acc kernels +ZTEMP1 = ZTEMP1 * ZTEMP2 +!$acc end kernels +call dzm_device( ZTEMP1, ZTEMP4 ) +!$acc kernels +PRWS(:,:,:) = PRWS(:,:,:) - ZTEMP4 +!$acc end kernels +#endif + +!$acc end data + +#ifdef MNH_OPENACC +!Release all memory allocated with MNH_MEM_GET calls since last call to MNH_MEM_POSITION_PIN +CALL MNH_MEM_RELEASE() +#endif + +IF (MPPDB_INITIALIZED) THEN + !Check all INOUT arrays + CALL MPPDB_CHECK(PRUS,"ADVECUVW_4TH end:PRUS") + CALL MPPDB_CHECK(PRVS,"ADVECUVW_4TH end:PRVS") + CALL MPPDB_CHECK(PRWS,"ADVECUVW_4TH end:PRWS") +END IF + +!------------------------------------------------------------------------------- +! +! +END SUBROUTINE ADVECUVW_4TH diff --git a/src/ZSOLVER/advecuvw_rk.f90 b/src/ZSOLVER/advecuvw_rk.f90 index 831fb4385b589548272dcb90ca3f64f6a72a5238..e702792924df7aca7e38368376c30c4c3ffb0b34 100644 --- a/src/ZSOLVER/advecuvw_rk.f90 +++ b/src/ZSOLVER/advecuvw_rk.f90 @@ -463,7 +463,7 @@ INBVAR = 3 CALL INIT_HALO2_ll(TZHALO2MT_ll,INBVAR,SIZE(PUT,1),SIZE(PUT,2),SIZE(PWT,3)) #endif ! -!$acc kernels +!$acc kernels present_cr(ZRUS,ZRVS,ZRWS) ZRUS(:, :, :, : ) = 0. ZRVS(:, :, :, : ) = 0. ZRWS(:, :, :, : ) = 0. @@ -529,12 +529,16 @@ ENDIF #endif ELSE IF ((HUVW_ADV_SCHEME=='CEN4TH') .AND. (HTEMP_SCHEME=='RKC4')) THEN #ifdef MNH_OPENACC - STOP "HUVW_ADV_SCHEME=='CEN4TH') .AND. (HTEMP_SCHEME=='RKC4') NOT TESTED WITH OPENACC" + !STOP "HUVW_ADV_SCHEME=='CEN4TH') .AND. (HTEMP_SCHEME=='RKC4') NOT TESTED WITH OPENACC" #endif CALL ADVECUVW_4TH (HLBCX, HLBCY, PRUCT, PRVCT, PRWCT, & ZUT, ZVT, ZWT, & ZRUS(:,:,:,JS), ZRVS(:,:,:,JS), ZRWS(:,:,:,JS), & - TZHALO2MT_ll ) +#ifndef MNH_OPENACC + TZHALO2MT_ll ) +#else + TZHALO2_UT,TZHALO2_VT,TZHALO2_WT ) +#endif ENDIF ! IF (GIBM_LOWORD) THEN @@ -671,7 +675,7 @@ CALL CLEANLIST_ll(TZFIELDMT_ll) #ifndef MNH_OPENACC CALL DEL_HALO2_ll(TZHALO2MT_ll) #endif -!$acc update self(PRUS_ADV,PRVS_ADV,PRWS_ADV) +!!!!!!!!!!$acc update self(PRUS_ADV,PRVS_ADV,PRWS_ADV) !------------------------------------------------------------------------------- !$acc end data diff --git a/src/ZSOLVER/contrav.f90 b/src/ZSOLVER/contrav.f90 index 78b9f656f428dbb7487a4b5e0de9080ed5a77dff..2976ea75498e15d3ed7c362ee51d87121228775e 100644 --- a/src/ZSOLVER/contrav.f90 +++ b/src/ZSOLVER/contrav.f90 @@ -708,23 +708,20 @@ IF (KADV_ORDER == 2 ) THEN #endif !$acc kernels ! -!$acc_nv loop independent collapse(3) - do concurrent (ji=iib:iie,jj=1:iju,jk=ikb:ike+1) + !$mnh_do_concurrent(ji=iib:iie,jj=1:iju,jk=ikb:ike+1) Z1(ji, jj, jk ) = ( PRUCT(ji, jj, jk ) + PRUCT(ji, jj, jk - 1 ) ) * PDZX (ji, jj, jk ) * 0.25 & + ( PRUCT(ji + 1, jj, jk ) + PRUCT(ji + 1, jj, jk - 1 ) ) * PDZX (ji + 1, jj, jk ) * 0.25 - end do -!$acc_nv loop independent collapse(3) - do concurrent (ji=1:iiu,jj=ijb:ije,jk=ikb:ike+1) + !$mnh_end_do() + !$mnh_do_concurrent(ji=1:iiu,jj=ijb:ije,jk=ikb:ike+1) Z2(ji, jj, jk ) = ( PRVCT(ji, jj, jk) + PRVCT( ji, jj, jk - 1) ) * PDZY(ji, jj, jk) * 0.25 & + ( PRVCT(ji, jj + 1, jk) + PRVCT( ji, jj + 1,jk - 1) ) * PDZY(ji, jj + 1, jk) * 0.25 - end do + !$mnh_end_do() PRWCT(:,:,:)=0. -!$acc_nv loop independent collapse(3) - do concurrent (ji=iib:iie,jj=ijb:ije,jk=ikb:ike+1) + !$mnh_do_concurrent(ji=iib:iie,jj=ijb:ije,jk=ikb:ike+1) PRWCT(ji ,jj, jk ) = ( PRWT(ji ,jj, jk ) - Z1(ji ,jj, jk ) - Z2(ji ,jj, jk ) ) / PDZZ(ji ,jj, jk ) - end do + !$mnh_end_do() ! !$acc end kernels ELSE IF (KADV_ORDER == 4 ) THEN @@ -777,25 +774,23 @@ ELSE IF (KADV_ORDER == 4 ) THEN !PW: OpenACC remarks: *computing only ztmp2 and reusing it at next iteration works ! but ji loop can not be collapsed -> 10x slower on GPU ! *ztmp1 and ztmp2 are not necessary but improve readability (no impact on performance) -!$acc_nv loop independent collapse(3) - do concurrent(ji=IW:IE,jj=1:iju,jk=IKB:IKE+1) + !$mnh_do_concurrent(ji=IW:IE,jj=1:iju,jk=IKB:IKE+1) ztmp1 = ( 9.0 * PDZX(ji, jj, jk ) - ( PDZX(ji+1, jj, jk ) + PDZX(ji, jj, jk ) + PDZX(ji-1, jj, jk ) ) / 3.0 ) / 16.0 ztmp2 = ( 9.0 * PDZX(ji+1, jj, jk ) - ( PDZX(ji+2, jj, jk ) + PDZX(ji+1, jj, jk ) + PDZX(ji, jj, jk ) ) / 3.0 ) / 16.0 Z1(ji, jj, jk ) = 7.0 * ( ( PRUCT(ji, jj, jk ) + PRUCT(ji, jj, jk-1 ) ) * ztmp1 & + ( PRUCT(ji+1, jj, jk ) + PRUCT(ji+1, jj, jk-1 ) ) * ztmp2 ) / 12.0 & - 0.5 * ( ( PRUCT(ji-1, jj, jk ) + PRUCT(ji-1, jj, jk-1 ) ) * PDZX(ji-1, jj, jk) & + ( PRUCT(ji+2, jj, jk ) + PRUCT(ji+2, jj, jk-1 ) ) * PDZX(ji+2, jj, jk) ) / 12.0 - end do + !$mnh_end_do() ! -!$acc_nv loop independent collapse(3) - do concurrent(ji=1:iiu,jj=is:in,jk=IKB:IKE+1) + !$mnh_do_concurrent(ji=1:iiu,jj=is:in,jk=IKB:IKE+1) ztmp1 = ( 9.0 * PDZY(ji, jj, jk ) - ( PDZY(ji, jj+1, jk ) + PDZY(ji, jj, jk ) + PDZY(ji, jj-1, jk ) ) / 3.0 ) / 16.0 ztmp2 = ( 9.0 * PDZY(ji, jj+1, jk ) - ( PDZY(ji, jj+2, jk ) + PDZY(ji, jj+1, jk ) + PDZY(ji, jj, jk ) ) / 3.0 ) / 16.0 Z2(ji, jj, jk ) = 7.0 * ( ( PRVCT(ji, jj, jk ) + PRVCT(ji, jj, jk-1 ) ) * ztmp1 & + ( PRVCT(ji, jj+1, jk ) + PRVCT(ji, jj+1, jk-1 ) ) * ztmp2 ) / 12.0 & - 0.5 * ( ( PRVCT(ji, jj-1, jk ) + PRVCT(ji, jj-1, jk-1 ) ) * PDZY(ji, jj-1, jk ) & + ( PRVCT(ji, jj+2, jk ) + PRVCT(ji, jj+2, jk-1 ) ) * PDZY(ji, jj+2, jk ) ) / 12.0 - end do + !$mnh_end_do() !$acc end kernels ! !!$CALL MPPDB_CHECK3DM("contrav_device :: dom Z1/Z2",PRECISION,Z1,Z2) @@ -804,27 +799,25 @@ ELSE IF (KADV_ORDER == 4 ) THEN ! !!$ IF (NHALO==1) THEN !$acc kernels async -!$acc_nv loop independent collapse(2) - do concurrent(jj=1:iju,jk=IKB:IKE+1) + !$mnh_do_concurrent(jj=1:iju,jk=IKB:IKE+1) ztmp1 = ( 9.0 * PDZX(IIE, jj, jk ) - ( PDZX(IIE+1, jj, jk ) + PDZX(IIE, jj, jk ) + PDZX(IIE-1, jj, jk ) ) / 3.0 ) / 16.0 ztmp2 = ( 9.0 * PDZX(IIE+1, jj, jk ) - ( ZDZX_EAST(jj, jk ) + PDZX(IIE+1, jj, jk ) + PDZX(IIE, jj, jk ) ) / 3.0 ) / 16.0 Z1(IIE, jj, jk ) = 7.0 * ( ( PRUCT(IIE, jj, jk ) + PRUCT(IIE, jj, jk-1 ) ) * ztmp1 & + ( PRUCT(IIE+1, jj, jk ) + PRUCT(IIE+1, jj, jk-1 ) ) * ztmp2 ) / 12.0 & - 0.5 * ( ( PRUCT(IIE-1, jj, jk ) + PRUCT(IIE-1, jj, jk-1 ) ) * PDZX(IIE-1, jj, jk) & + ( ZU_EAST (jj, jk ) + ZU_EAST (jj, jk-1 ) ) * ZDZX_EAST (jj, jk) ) / 12.0 - end do + !$mnh_end_do() !$acc end kernels ! !$acc kernels async -!$acc_nv loop independent collapse(2) - do concurrent(ji=1:iiu,jk=IKB:IKE+1) + !$mnh_do_concurrent(ji=1:iiu,jk=IKB:IKE+1) ztmp1 = ( 9.0 * PDZY(ji, IJE, jk) - ( PDZY (ji, IJE+1, jk) + PDZY(ji, IJE, jk) + PDZY(ji, IJE-1, jk) ) / 3.0 ) / 16.0 ztmp2 = ( 9.0 * PDZY(ji, IJE+1, jk) - ( ZDZY_NORTH(ji, jk) + PDZY(ji, IJE+1, jk) + PDZY(ji, IJE, jk) ) / 3.0 ) / 16.0 Z2(ji, IJE, jk ) = 7.0 * ( ( PRVCT (ji, IJE, jk ) + PRVCT (ji, IJE, jk-1 ) ) * ztmp1 & + ( PRVCT (ji, IJE+1, jk ) + PRVCT (ji, IJE+1, jk-1 ) ) * ztmp2 ) / 12.0 & - 0.5 * ( ( PRVCT (ji, IJE-1, jk ) + PRVCT (ji, IJE-1, jk-1 ) ) * PDZY (ji, IJE-1, jk ) & + ( ZV_NORTH(ji, jk ) + ZV_NORTH(ji, jk-1 ) ) * ZDZY_NORTH(ji, jk ) ) / 12.0 - end do + !$mnh_end_do() !$acc end kernels !$acc wait !!$ END IF @@ -871,10 +864,9 @@ ELSE IF (KADV_ORDER == 4 ) THEN !!$ !!$ CALL MPPDB_CHECK3DM("contrav_device ::Z1/Z2/ PDZZ",PRECISION,Z1,Z2,PDZZ) PRWCT(:,:,:)=0. -!$acc_nv loop independent collapse(3) - do concurrent (ji=iib:iie,jj=ijb:ije,jk=ikb:ike+1) + !$mnh_do_concurrent (ji=iib:iie,jj=ijb:ije,jk=ikb:ike+1) PRWCT(ji ,jj, jk ) = ( PRWT(ji ,jj, jk ) - Z1(ji ,jj, jk ) - Z2(ji ,jj, jk ) ) / PDZZ(ji ,jj, jk ) - end do + !$mnh_end_do() !$acc end kernels ! CALL MPPDB_CHECK3DM("contrav_device :: PRWCT/Z1/Z2",PRECISION,PRWCT,Z1,Z2) diff --git a/src/ZSOLVER/dotprod.f90 b/src/ZSOLVER/dotprod.f90 index e473801b82143326b3323affb757e60545074909..dab2e6a88597dd0afb3e5672aa3b144383795c4c 100644 --- a/src/ZSOLVER/dotprod.f90 +++ b/src/ZSOLVER/dotprod.f90 @@ -185,16 +185,16 @@ CALL MNH_MEM_GET(ZDOTPROD, ILBXB,ILBXE ,ILBYB,ILBYE ) #endif !$acc kernels present(ZDOTPROD) ZDOTPROD(:,:) = 0. -#ifdef MNH_COMPILER_NVHPC - !$acc loop independent collapse(2) -#endif - DO CONCURRENT (JI=ILBXB:ILBXE,JJ=ILBYB:ILBYE) - !$acc loop seq - DO JK = IKB-1,IKE+1 - ZDOTPROD(JI,JJ) = ZDOTPROD(JI,JJ) + PA(JI,JJ,JK) * PB(JI,JJ,JK) - END DO - END DO !$acc end kernels +!$acc parallel +!$mnh_do_concurrent(JI=ILBXB:ILBXE,JJ=ILBYB:ILBYE) + !dir$ nextscalar + !$acc loop seq + DO JK = IKB-1,IKE+1 + ZDOTPROD(JI,JJ) = ZDOTPROD(JI,JJ) + PA(JI,JJ,JK) * PB(JI,JJ,JK) + END DO +!$mnh_end_do() +!$acc end parallel !$acc update host(ZDOTPROD) PDOTPROD = SUM_DD_R2_ll(ZDOTPROD) !JUAN16 diff --git a/src/ZSOLVER/get_halo.f90 b/src/ZSOLVER/get_halo.f90 index 719fa0a19ddc3316164ab1969bc4b1423bcd2743..e53338d1f9e3db6a9798375bf8c17f4289b2ab20 100644 --- a/src/ZSOLVER/get_halo.f90 +++ b/src/ZSOLVER/get_halo.f90 @@ -464,6 +464,11 @@ INTEGER,PARAMETER :: IS_WEST=1 , IS_EAST=2, IS_SOUTH=3, IS_NORTH=4 LOGICAL :: LX , LY INTEGER :: NB_REQ, IERR ! +INTEGER :: JI,JJ,JK, JIU,JJU,JKU + +JIU = SIZE(PSRC,1) +JJU = SIZE(PSRC,2) +JKU = SIZE(PSRC,3) CALL INIT_HALO_D() @@ -553,12 +558,16 @@ END IF IF (LX) THEN IF (.NOT. GWEST) THEN !$acc kernels async(IS_WEST) - ZWEST_IN ( IIB:IIB+IHALO_1 , IJB:IJE , : ) = PSRC( IIB:IIB+IHALO_1 , IJB:IJE , : ) + !$mnh_expand_array(JI=IIB:IIB+IHALO_1 , JJ=IJB:IJE , JK=1:JKU ) + ZWEST_IN ( IIB:IIB+IHALO_1 , IJB:IJE , : ) = PSRC( IIB:IIB+IHALO_1 , IJB:IJE , : ) + !$mnh_end_expand_array() !$acc end kernels END IF IF (.NOT.GEAST) THEN !$acc kernels async(IS_EAST) - ZEAST_IN ( IIE-IHALO_1:IIE , IJB:IJE , : ) = PSRC( IIE-IHALO_1:IIE , IJB:IJE , : ) + !$mnh_expand_array(JI=IIE-IHALO_1:IIE , JJ=IJB:IJE , JK=1:JKU) + ZEAST_IN ( IIE-IHALO_1:IIE , IJB:IJE , : ) = PSRC( IIE-IHALO_1:IIE , IJB:IJE , : ) + !$mnh_end_expand_array() !$acc end kernels ENDIF END IF @@ -566,12 +575,16 @@ END IF IF (LY) THEN IF (.NOT.GSOUTH) THEN !$acc kernels async(IS_SOUTH) - ZSOUTH_IN ( IIB:IIE , IJB:IJB+IHALO_1 , : ) = PSRC( IIB:IIE , IJB:IJB+IHALO_1 , : ) + !$mnh_expand_array(JI=IIB:IIE , JJ=IJB:IJB+IHALO_1 , JK=1:JKU ) + ZSOUTH_IN ( IIB:IIE , IJB:IJB+IHALO_1 , : ) = PSRC( IIB:IIE , IJB:IJB+IHALO_1 , : ) + !$mnh_end_expand_array() !$acc end kernels ENDIF IF (.NOT.GNORTH) THEN !$acc kernels async(IS_NORTH) - ZNORTH_IN ( IIB:IIE , IJE-IHALO_1:IJE , : ) = PSRC( IIB:IIE , IJE-IHALO_1:IJE , : ) + !$mnh_expand_array(JI=IIB:IIE , JJ=IJE-IHALO_1:IJE , JK=1:JKU ) + ZNORTH_IN ( IIB:IIE , IJE-IHALO_1:IJE , : ) = PSRC( IIB:IIE , IJE-IHALO_1:IJE , : ) + !$mnh_end_expand_array() !$acc end kernels ENDIF ENDIF @@ -676,6 +689,11 @@ INTEGER,PARAMETER :: IS_WEST=1 , IS_EAST=2, IS_SOUTH=3, IS_NORTH=4 LOGICAL :: LX , LY INTEGER :: NB_REQ, IERR ! +INTEGER :: JI,JJ,JK, JIU,JJU,JKU + +JIU = SIZE(PSRC,1) +JJU = SIZE(PSRC,2) +JKU = SIZE(PSRC,3) CALL INIT_HALO_D() @@ -715,7 +733,9 @@ IF (LX) THEN !$acc update device(ZWEST_OUT) async(IS_WEST) #endif !$acc kernels async(IS_WEST) - PSRC( 1:IIB-1 , IJB:IJE , : ) = ZWEST_OUT( 1:IIB-1 , IJB:IJE , : ) + !$mnh_expand_array(JI=1:IIB-1 , JJ=IJB:IJE , JK=1:JKU ) + PSRC( 1:IIB-1 , IJB:IJE , : ) = ZWEST_OUT( 1:IIB-1 , IJB:IJE , : ) + !$mnh_end_expand_array() !$acc end kernels ENDIF IF (.NOT.GEAST) THEN @@ -723,7 +743,9 @@ IF (LX) THEN !$acc update device(ZEAST_OUT) async(IS_EAST) #endif !$acc kernels async(IS_EAST) - PSRC( IIE+1:IIU , IJB:IJE , : ) = ZEAST_OUT( IIE+1:IIU , IJB:IJE , : ) + !$mnh_expand_array(JI=IIE+1:IIU , JJ=IJB:IJE , JK=1:JKU ) + PSRC( IIE+1:IIU , IJB:IJE , : ) = ZEAST_OUT( IIE+1:IIU , IJB:IJE , : ) + !$mnh_end_expand_array() !$acc end kernels ENDIF END IF @@ -733,7 +755,9 @@ IF (LY) THEN !$acc update device(ZSOUTH_OUT) async(IS_SOUTH) #endif !$acc kernels async(IS_SOUTH) - PSRC( IIB:IIE , 1:IJB-1 , : ) = ZSOUTH_OUT( IIB:IIE , 1:IJB-1 , : ) + !$mnh_expand_array(JI=IIB:IIE , JJ=1:IJB-1 , JK=1:JKU ) + PSRC( IIB:IIE , 1:IJB-1 , : ) = ZSOUTH_OUT( IIB:IIE , 1:IJB-1 , : ) + !$mnh_end_expand_array() !$acc end kernels ENDIF IF (.NOT.GNORTH) THEN @@ -741,7 +765,9 @@ IF (LY) THEN !$acc update device(ZNORTH_OUT) async(IS_NORTH) #endif !$acc kernels async(IS_NORTH) - PSRC( IIB:IIE , IJE+1:IJU , : ) = ZNORTH_OUT ( IIB:IIE , IJE+1:IJU , : ) + !$mnh_expand_array(JI=IIB:IIE , JJ=IJE+1:IJU , JK=1:JKU ) + PSRC( IIB:IIE , IJE+1:IJU , : ) = ZNORTH_OUT ( IIB:IIE , IJE+1:IJU , : ) + !$mnh_end_expand_array() !$acc end kernels ENDIF END IF diff --git a/src/ZSOLVER/modeln.f90 b/src/ZSOLVER/modeln.f90 index c36c99582f7efc80ed9e44bfffa215911304c774..4f635b6ff3a2f6bceebe945be752984869292266 100644 --- a/src/ZSOLVER/modeln.f90 +++ b/src/ZSOLVER/modeln.f90 @@ -459,6 +459,9 @@ USE MODI_WRITE_LFIFM_n USE MODI_WRITE_SERIES_n USE MODI_WRITE_STATION_n USE MODI_WRITE_SURF_ATM_N +#ifdef MNH_BITREP_OMP +USE MODI_BITREPZ +#endif ! IMPLICIT NONE ! @@ -560,6 +563,10 @@ REAL, DIMENSION(:,:,:), ALLOCATABLE :: ZJ ! TYPE(LIST_ll), POINTER :: TZFIELDC_ll ! list of fields to exchange TYPE(HALO2LIST_ll), POINTER :: TZHALO2C_ll ! list of fields to exchange +#ifdef MNH_OPENACC +TYPE(HALO2LIST_ll), SAVE , POINTER :: TZHALO2_UT,TZHALO2_VT,TZHALO2_WT +LOGICAL , SAVE :: GFIRST_CALL_MODELN = .TRUE. +#endif LOGICAL :: GCLD ! conditionnal call for dust wet deposition LOGICAL :: GCLOUD_ONLY ! conditionnal radiation computations for ! the only cloudy columns @@ -1645,6 +1652,10 @@ XTIME_LES_BU_PROCESS = 0. !$acc data copyin (XTKET, XRSVS_CLD) & !$acc & copy (XRTKES, XRSVS) & !$acc & copyout(XRTKEMS) +#ifdef MNH_BITREP_OMP +CALL SBR_FZ(XRRS_CLD) +CALL SBR_FZ(XRT) +#endif CALL ADVECTION_METSV ( TZBAKFILE, CUVW_ADV_SCHEME, & CMET_ADV_SCHEME, CSV_ADV_SCHEME, CCLOUD, NSPLIT, & LSPLIT_CFL, XSPLIT_CFL, LCFL_WRIT, & @@ -1679,7 +1690,7 @@ CALL GRAVITY_IMPL ( CLBCX, CLBCY, NRR, NRRL, NRRI,XTSTEP, & ! compensated by the pressure gradient ! IF (KTCOUNT == 1 .AND. CCONF=='START') THEN -!$acc kernels present( ZRWS ) +!$acc kernels present( ZRWS,XRWS_PRES) XRWS_PRES(:,:,:) = ZRWS(:,:,:) - XRWS(:,:,:) !$acc end kernels END IF @@ -1711,9 +1722,23 @@ IF ((CUVW_ADV_SCHEME(1:3)=='CEN') .AND. (CTEMP_SCHEME == 'LEFR')) THEN CALL ADD3DFIELD_ll( TZFIELDC_ll, XUT, 'MODEL_n::XUT' ) CALL ADD3DFIELD_ll( TZFIELDC_ll, XVT, 'MODEL_n::XVT' ) CALL ADD3DFIELD_ll( TZFIELDC_ll, XWT, 'MODEL_n::XWT' ) +#ifndef MNH_OPENACC CALL INIT_HALO2_ll(TZHALO2C_ll,3,IIU,IJU,IKU) CALL UPDATE_HALO_ll(TZFIELDC_ll,IINFO_ll) CALL UPDATE_HALO2_ll(TZFIELDC_ll, TZHALO2C_ll, IINFO_ll) +#else + IF (GFIRST_CALL_MODELN) THEN + GFIRST_CALL_MODELN = .FALSE. + NULLIFY(TZHALO2_UT,TZHALO2_VT,TZHALO2_WT) + CALL INIT_HALO2_ll(TZHALO2_UT,1,IIU,IJU,IKU) + CALL INIT_HALO2_ll(TZHALO2_VT,1,IIU,IJU,IKU) + CALL INIT_HALO2_ll(TZHALO2_WT,1,IIU,IJU,IKU) + END IF + + CALL GET_HALO2_DF(XUT,TZHALO2_UT,HNAME='XUT') + CALL GET_HALO2_DF(XVT,TZHALO2_VT,HNAME='XVT') + CALL GET_HALO2_DF(XWT,TZHALO2_WT,HNAME='XWT') +#endif !$acc update device(XUT, XVT, XWT) END IF !$acc data copyin(XUM, XVM, XWM) & @@ -1725,13 +1750,19 @@ IF ((CUVW_ADV_SCHEME(1:3)=='CEN') .AND. (CTEMP_SCHEME == 'LEFR')) THEN XUT, XVT, XWT, & XRHODJ, XDXX, XDYY, XDZZ, XDZX, XDZY, & XRUS,XRVS, XRWS, & +#ifndef MNH_OPENACC TZHALO2C_ll ) +#else + TZHALO2_UT,TZHALO2_VT,TZHALO2_WT ) +#endif !$acc end data IF (CUVW_ADV_SCHEME=='CEN4TH') THEN CALL CLEANLIST_ll(TZFIELDC_ll) NULLIFY(TZFIELDC_ll) +#ifndef MNH_OPENACC CALL DEL_HALO2_ll(TZHALO2C_ll) NULLIFY(TZHALO2C_ll) +#endif END IF ELSE @@ -1821,7 +1852,7 @@ ZPABST(:,:,:) = XPABST(:,:,:) ! IF(.NOT. L1D) THEN ! - !$acc kernels ! present(XRUS_PRES,XRVS_PRES,XRWS_PRES) + !$acc kernels present(XRUS,XRVS,XRWS) !present(XRUS_PRES,XRVS_PRES,XRWS_PRES) XRUS_PRES(:,:,:) = XRUS(:,:,:) XRVS_PRES(:,:,:) = XRVS(:,:,:) XRWS_PRES(:,:,:) = XRWS(:,:,:) @@ -1840,7 +1871,7 @@ IF(.NOT. L1D) THEN XA_K,XB_K,XC_K,XD_K) !JUAN FULL ZSOLVER ! !$acc update host(XPABST) - !$acc kernels ! present(XRUS_PRES,XRVS_PRES,XRWS_PRES) + !$acc kernels present(XRUS,XRVS,XRWS) !present(XRUS_PRES,XRVS_PRES,XRWS_PRES) XRUS_PRES(:,:,:) = XRUS(:,:,:) - XRUS_PRES(:,:,:) + ZRUS(:,:,:) XRVS_PRES(:,:,:) = XRVS(:,:,:) - XRVS_PRES(:,:,:) + ZRVS(:,:,:) XRWS_PRES(:,:,:) = XRWS(:,:,:) - XRWS_PRES(:,:,:) + ZRWS(:,:,:) diff --git a/src/ZSOLVER/p_abs.f90 b/src/ZSOLVER/p_abs.f90 index 3ea1219e40ef239168cdb6e2a901fc637f64e7e9..3b41b15e68fb88d7c47a282f49c6b36a9a1cdb7f 100644 --- a/src/ZSOLVER/p_abs.f90 +++ b/src/ZSOLVER/p_abs.f90 @@ -123,6 +123,10 @@ USE MODE_REPRO_SUM #if defined(MNH_BITREP) || defined(MNH_BITREP_OMP) USE MODI_BITREP #endif +#ifdef MNH_COMPILER_CCE +!$mnh_undef(LOOP) +!$mnh_undef(OPENACC) +#endif #ifdef MNH_OPENACC USE MODE_MNH_ZWORK, ONLY: MNH_MEM_GET, MNH_MEM_POSITION_PIN, MNH_MEM_RELEASE #endif @@ -155,6 +159,9 @@ REAL, DIMENSION(:,:,:), INTENT(IN) :: PRVREF ! vapor mixing ratio ! for the reference state REAL, DIMENSION(:,:,:), INTENT(IN) :: PEXNREF! Exner function of the ! reference state +#ifdef MNH_COMPILER_CCE_1403 +REAL, DIMENSION(:,:,:), POINTER , CONTIGUOUS :: PEXNREF_BR +#endif ! REAL, INTENT(INOUT) :: PPHI0 ! PHI0 at t REAL, DIMENSION(:,:,:), INTENT(INOUT) :: PPHIT ! Perturbation of @@ -241,6 +248,9 @@ ALLOCATE (ZRTOT(IIU,IJU,IKU), ZRHOREF(IIU,IJU,IKU), ZWORK(IIU,IJU,IKU)) !Pin positions in the pools of MNH memory CALL MNH_MEM_POSITION_PIN() +#ifdef MNH_COMPILER_CCE_1403 +CALL MNH_MEM_GET(PEXNREF_BR , IIB,IIE , IJB,IJE, IKB,IKE) +#endif CALL MNH_MEM_GET(ZMASS_O_PI_2D , IIB,IIE , IJB,IJE) CALL MNH_MEM_GET(ZMASSGUESS_2D , IIB,IIE , IJB,IJE) CALL MNH_MEM_GET(ZWATERMASST_2D , IIB,IIE , IJB,IJE) @@ -286,24 +296,35 @@ IF ( CEQNSYS=='DUR' .OR. CEQNSYS=='MAE' ) THEN ZWORK(:,:,:)=PRHODJ * XTH00 & / ( PRHODREF * PTHVREF * (1. + PRVREF) ) END IF - ! +#if defined(MNH_COMPILER_CCE_1403) && defined(MNH_BITREP_OMP) + !$acc loop + !$mnh_do_concurrent(JI=IIB:IIE,JJ=IJB:IJE,JK=IKB:IKE ) + PEXNREF_BR(JI,JJ,JK)=BR_POW((PEXNREF(JI,JJ,JK)+PPHIT(JI,JJ,JK)),ZCVD_O_RD) + !$mnh_end_do() +#endif + !$acc end kernels + !$acc parallel !$acc loop seq DO JK = IKB,IKE - !$acc_nv loop independent collapse(2) - DO CONCURRENT (JI = IIB:IIE , JJ = IJB:IJE ) + !$acc loop independent + DO CONCURRENT ( JJ = IJB:IJE , JI = IIB:IIE ) ZMASSGUESS_2D(JI,JJ) = ZMASSGUESS_2D(JI,JJ) + & #if !defined(MNH_BITREP) && !defined(MNH_BITREP_OMP) (PEXNREF(JI,JJ,JK)+PPHIT(JI,JJ,JK))**ZCVD_O_RD & #else +#ifndef MNH_COMPILER_CCE_1403 BR_POW((PEXNREF(JI,JJ,JK)+PPHIT(JI,JJ,JK)),ZCVD_O_RD) & +#else + PEXNREF_BR(JI,JJ,JK) & +#endif #endif * ZWORK(JI,JJ,JK) / PTHETAV(JI,JJ,JK) ZMASS_O_PI_2D(JI,JJ) = ZMASS_O_PI_2D(JI,JJ) + ZWORK(JI,JJ,JK) / PTHETAV(JI,JJ,JK) ZWATERMASST_2D(JI,JJ) = ZWATERMASST_2D(JI,JJ) + & ZRTOT(JI,JJ,JK) * ZWORK(JI,JJ,JK) * PRHODREF(JI,JJ,JK) - END DO + END DO END DO - !$acc end kernels + !$acc end parallel ! ELSE DO JK = IKB,IKE @@ -327,9 +348,9 @@ IF ( CEQNSYS=='DUR' .OR. CEQNSYS=='MAE' ) THEN ! ! !$acc update host(ZMASSGUESS_2D,ZMASS_O_PI_2D,ZWATERMASST_2D) - ZMASSGUESS = SUM_DD_R2_ll(ZMASSGUESS_2D) - ZMASS_O_PI = SUM_DD_R2_ll(ZMASS_O_PI_2D) - ZWATERMASST = SUM_DD_R2_ll(ZWATERMASST_2D) + ZMASSGUESS = SUM_DD_R2_ll_DEVICE(ZMASSGUESS_2D) + ZMASS_O_PI = SUM_DD_R2_ll_DEVICE(ZMASS_O_PI_2D) + ZWATERMASST = SUM_DD_R2_ll_DEVICE(ZWATERMASST_2D) ! ZMASS_O_PI = ZMASS_O_PI*ZP00_O_RD*ZCVD_O_RD ZPI0 = (PDRYMASST + ZWATERMASST - ZP00_O_RD*ZMASSGUESS ) / ZMASS_O_PI @@ -345,21 +366,33 @@ IF ( CEQNSYS=='DUR' .OR. CEQNSYS=='MAE' ) THEN ZMASSGUESS_2D = 0. !$acc end kernels IF ( CEQNSYS == 'DUR' ) THEN + #if defined(MNH_COMPILER_CCE_1403) && defined(MNH_BITREP_OMP) !$acc kernels + !$acc loop + !$mnh_do_concurrent(JI=IIB:IIE,JJ=IJB:IJE,JK=IKB:IKE ) + PEXNREF_BR(JI,JJ,JK)=BR_POW((PEXNREF(JI,JJ,JK)+PPHIT(JI,JJ,JK)),ZCVD_O_RD) + !$mnh_end_do() + !$acc end kernels +#endif + !$acc parallel !$acc loop seq DO JK = IKB,IKE - !$acc_nv loop independent collapse(2) - DO CONCURRENT (JI = IIB:IIE , JJ = IJB:IJE ) + !$acc loop independent + DO CONCURRENT ( JJ = IJB:IJE , JI = IIB:IIE ) ZMASSGUESS_2D(JI,JJ) = ZMASSGUESS_2D(JI,JJ) + & #if !defined(MNH_BITREP) && !defined(MNH_BITREP_OMP) (PEXNREF(JI,JJ,JK)+PPHIT(JI,JJ,JK))**ZCVD_O_RD & #else +#ifndef MNH_COMPILER_CCE_1403 BR_POW((PEXNREF(JI,JJ,JK)+PPHIT(JI,JJ,JK)),ZCVD_O_RD) & +#else + PEXNREF_BR(JI,JJ,JK) & +#endif #endif * ZWORK(JI,JJ,JK) / PTHETAV(JI,JJ,JK) - END DO + END DO END DO - !$acc end kernels + !$acc end parallel ELSE DO JK = IKB,IKE DO JJ = IJB,IJE @@ -377,7 +410,7 @@ IF ( CEQNSYS=='DUR' .OR. CEQNSYS=='MAE' ) THEN END IF ! !$acc update host(ZMASSGUESS_2D) - ZMASSGUESS = SUM_DD_R2_ll(ZMASSGUESS_2D) + ZMASSGUESS = SUM_DD_R2_ll_DEVICE(ZMASSGUESS_2D) ! ZPI0 = (PDRYMASST + ZWATERMASST - ZP00_O_RD*ZMASSGUESS ) / ZMASS_O_PI !$acc kernels diff --git a/src/ZSOLVER/ppm.f90 b/src/ZSOLVER/ppm.f90 index 83f0849e7424d7c723d17c9f989e793359f142eb..51bf1a52a6a656822aced4e815e24d06350ec5b6 100644 --- a/src/ZSOLVER/ppm.f90 +++ b/src/ZSOLVER/ppm.f90 @@ -397,9 +397,12 @@ use mode_mppdb use mode_msg #endif -#ifdef MNH_BITREP +#if defined(MNH_BITREP) || defined(MNH_BITREP_OMP) USE MODI_BITREP #endif +#ifdef MNH_BITREP_OMP +USE MODI_BITREPZ +#endif USE MODI_GET_HALO #ifndef MNH_OPENACC USE MODI_SHUMAN @@ -470,7 +473,11 @@ INTEGER :: IJS,IJN #endif LOGICAL :: GWEST , GEAST !------------------------------------------------------------------------------- - +! +#ifdef MNH_BITREP_OMP +CALL SBR_FZ(PSRC(:,:,:)) +#endif +! !$acc data present( PSRC, PCR, PRHO, PR , & !$acc & ZQL, ZQR, ZDQ, ZQ6, ZDMQ, ZQL0, ZQR0, ZQ60, ZFPOS, ZFNEG ) @@ -518,10 +525,7 @@ ZFNEG(:,:,:) = PSRC(:,:,:) CALL GET_HALO_D(PSRC,HDIR="01_X", HNAME='PSRC') ! !$acc kernels -!$acc loop independent collapse(3) - do jk = 1, iku - do jj = 1, iju - do ji = 1, iiu +!$mnh_do_concurrent (ji=1:iiu,jj=1:iju,jk=1:iku) PR (ji, jj, jk ) = PSRC(ji, jj, jk ) ZQL (ji, jj, jk ) = PSRC(ji, jj, jk ) ZQR (ji, jj, jk ) = PSRC(ji, jj, jk ) @@ -531,9 +535,7 @@ CALL GET_HALO_D(PSRC,HDIR="01_X", HNAME='PSRC') ZQL0 (ji, jj, jk ) = PSRC(ji, jj, jk ) ZQR0 (ji, jj, jk ) = PSRC(ji, jj, jk ) ZQ60 (ji, jj, jk ) = PSRC(ji, jj, jk ) - end do - end do -end do +!$mnh_end_do() ! #if 0 ZFPOS(:,1:IJS,:)=PSRC(:,1:IJS,:) @@ -632,7 +634,7 @@ CASE ('CYCL','WALL') ! In that case one must have HLBCX(1) == HLBCX(2) ZQL(:,IJS:IJN,:) = PSRC(:,IJS:IJN,:) ZQR(:,IJS:IJN,:) = PSRC(:,IJS:IJN,:) ZQ6(:,IJS:IJN,:) = 0.0 -#ifndef MNH_BITREP +#if !defined(MNH_BITREP) && !defined(MNH_BITREP_OMP) ELSEWHERE ( ZQ60(:,IJS:IJN,:)*ZDQ(:,IJS:IJN,:) < -(ZDQ(:,IJS:IJN,:))**2 ) #else ELSEWHERE ( ZQ60(:,IJS:IJN,:)*ZDQ(:,IJS:IJN,:) < -BR_P2(ZDQ(:,IJS:IJN,:)) ) @@ -640,7 +642,7 @@ CASE ('CYCL','WALL') ! In that case one must have HLBCX(1) == HLBCX(2) ZQ6(:,IJS:IJN,:) = 3.0*(ZQL0(:,IJS:IJN,:) - PSRC(:,IJS:IJN,:)) ZQR(:,IJS:IJN,:) = ZQL0(:,IJS:IJN,:) - ZQ6(:,IJS:IJN,:) ZQL(:,IJS:IJN,:) = ZQL0(:,IJS:IJN,:) -#ifndef MNH_BITREP +#if !defined(MNH_BITREP) && !defined(MNH_BITREP_OMP) ELSEWHERE ( ZQ60(:,IJS:IJN,:)*ZDQ(:,IJS:IJN,:) > (ZDQ(:,IJS:IJN,:))**2 ) #else ELSEWHERE ( ZQ60(:,IJS:IJN,:)*ZDQ(:,IJS:IJN,:) > BR_P2(ZDQ(:,IJS:IJN,:)) ) @@ -970,6 +972,10 @@ ENDDO ; ENDDO ; ENDDO ! END SELECT ! +#ifdef MNH_BITREP_OMP +CALL SBR_FZ(PR(:,:,:)) +#endif +! IF (MPPDB_INITIALIZED) THEN !Check all INOUT arrays CALL MPPDB_CHECK(PSRC,"PPM_01_X end:PSRC") @@ -1197,9 +1203,12 @@ use mode_msg #endif use mode_mppdb -#ifdef MNH_BITREP +#if defined(MNH_BITREP) || defined(MNH_BITREP_OMP) USE MODI_BITREP #endif +#ifdef MNH_BITREP_OMP +USE MODI_BITREPZ +#endif USE MODI_GET_HALO #ifndef MNH_OPENACC USE MODI_SHUMAN @@ -1272,7 +1281,11 @@ INTEGER :: IJN,IJS #endif integer :: ji, jj, jk !------------------------------------------------------------------------------- - +! +#ifdef MNH_BITREP_OMP +CALL SBR_FZ(PSRC(:,:,:)) +#endif +! !$acc data present( PSRC, PCR, PRHO, PR, & !$acc & ZQL, ZQR, ZDQ, ZQ6, ZDMQ, ZQL0, ZQR0, ZQ60, ZFPOS, ZFNEG ) @@ -1850,6 +1863,10 @@ CALL GET_HALO_D(ZQL0,HDIR="01_Y", HNAME='ZQL0') ! END SELECT ! +#ifdef MNH_BITREP_OMP +CALL SBR_FZ(PR(:,:,:)) +#endif +! IF (MPPDB_INITIALIZED) THEN !Check all INOUT arrays CALL MPPDB_CHECK(PSRC,"PPM_01_Y end:PSRC") @@ -2076,9 +2093,12 @@ USE MODI_SHUMAN USE MODI_SHUMAN_DEVICE #endif USE MODI_GET_HALO -#ifdef MNH_BITREP +#if defined(MNH_BITREP) || defined(MNH_BITREP_OMP) USE MODI_BITREP #endif +#ifdef MNH_BITREP_OMP +USE MODI_BITREPZ +#endif ! USE MODD_CONF USE MODD_PARAMETERS @@ -2147,7 +2167,11 @@ INTEGER :: I,J,K integer :: ji, jj, jk ! !------------------------------------------------------------------------------- - +! +#ifdef MNH_BITREP_OMP +CALL SBR_FZ(PSRC(:,:,:)) +#endif +! !$acc data present( PSRC, PCR, PRHO, PR, & !$acc & ZQL, ZQR, ZDQ, ZQ6, ZDMQ, ZQL0, ZQR0, ZQ60, ZFPOS, ZFNEG ) IF (MPPDB_INITIALIZED) THEN @@ -2290,7 +2314,7 @@ WHERE ( ZDMQ == 0.0 ) ZQL = PSRC ZQR = PSRC ZQ6 = 0.0 -#ifndef MNH_BITREP +#if !defined(MNH_BITREP) && !defined(MNH_BITREP_OMP) ELSEWHERE ( ZQ60*ZDQ < -(ZDQ)**2 ) #else ELSEWHERE ( ZQ60*ZDQ < -BR_P2(ZDQ) ) @@ -2298,7 +2322,7 @@ ELSEWHERE ( ZQ60*ZDQ < -BR_P2(ZDQ) ) ZQ6 = 3.0*(ZQL0 - PSRC) ZQR = ZQL0 - ZQ6 ZQL = ZQL0 -#ifndef MNH_BITREP +#if !defined(MNH_BITREP) && !defined(MNH_BITREP_OMP) ELSEWHERE ( ZQ60*ZDQ > (ZDQ)**2 ) #else ELSEWHERE ( ZQ60*ZDQ > BR_P2(ZDQ) ) @@ -2419,6 +2443,10 @@ end do !Unnecessary CALL GET_HALO_D(PR) #endif ! +#ifdef MNH_BITREP_OMP +CALL SBR_FZ(PR(:,:,:)) +#endif +! IF (MPPDB_INITIALIZED) THEN !Check all INOUT arrays CALL MPPDB_CHECK(PSRC,"PPM_01_Z end:PSRC") diff --git a/src/ZSOLVER/pressurez.f90 b/src/ZSOLVER/pressurez.f90 index 16e5cded61f9ab76926de0ce89f6b4912e50c49f..b91bb89c2dfe822b9629183f617ef126a68c9922 100644 --- a/src/ZSOLVER/pressurez.f90 +++ b/src/ZSOLVER/pressurez.f90 @@ -537,9 +537,18 @@ END IF ! -------------------------------------------------- ! IF (LIBM) THEN - WHERE(XIBM_LS(:,:,:,2).GT.-XIBM_EPSI) PRUS(:,:,:) = 0. - WHERE(XIBM_LS(:,:,:,3).GT.-XIBM_EPSI) PRVS(:,:,:) = 0. - WHERE(XIBM_LS(:,:,:,4).GT.-XIBM_EPSI) PRWS(:,:,:) = 0. + WHERE(XIBM_LS(:,:,:,2).GT.-XIBM_EPSI) + !dir$ safe_address + PRUS(:,:,:) = 0. + ENDWHERE + WHERE(XIBM_LS(:,:,:,3).GT.-XIBM_EPSI) + !dir$ safe_address + PRVS(:,:,:) = 0. + ENDWHERE + WHERE(XIBM_LS(:,:,:,4).GT.-XIBM_EPSI) + !dir$ safe_address + PRWS(:,:,:) = 0. + ENDWHERE ENDIF ! IF (MPPDB_INITIALIZED) THEN diff --git a/src/ZSOLVER/rain_ice_red.f90 b/src/ZSOLVER/rain_ice_red.f90 index 0c7cc40178808fd0e88b3a9069bbcb9d262c3ba6..3aeb5a1335d50b469af71689f3e57fb2b2e8e8a8 100644 --- a/src/ZSOLVER/rain_ice_red.f90 +++ b/src/ZSOLVER/rain_ice_red.f90 @@ -359,6 +359,11 @@ REAL, DIMENSION(:,:,:), OPTIONAL, INTENT(INOUT) :: PRHS ! Hail m.r. source REAL, DIMENSION(:,:), OPTIONAL, INTENT(OUT) :: PINPRH! Hail instant precip REAL, DIMENSION(:,:,:,:), OPTIONAL, INTENT(OUT) :: PFPR ! upper-air precipitation fluxes ! +#ifdef MNH_COMPILER_CCE +STOP "RAIN_ICE_RED TROP LENT A COMPILER AVEC CRAY/CCE >> 30 Minutes " +STOP "ENLEVE LE ifdefMNH_COMPILER_CCE , SI VOUS EN AVEZ BESOIN sur GPU AMD " +#else +! !* 0.2 Declarations of local variables : ! INTEGER :: IIB ! Define the domain where is @@ -734,9 +739,9 @@ END IF imicro = count(odmicro) !$acc end kernels -JIU = size(ptht, 1 ) -JJU = size(ptht, 2 ) -JKU = size(ptht, 3 ) +JIU = SIZE( ptht, 1 ) +JJU = SIZE( ptht, 2 ) +JKU = SIZE( ptht, 3 ) #ifndef MNH_OPENACC allocate( i1(imicro ) ) @@ -746,22 +751,22 @@ allocate( i3(imicro ) ) allocate( zw(size( pexnref, 1 ), size( pexnref, 2 ), size( pexnref, 3 ) ) ) allocate( zt(size( pexnref, 1 ), size( pexnref, 2 ), size( pexnref, 3 ) ) ) -allocate( zz_rvheni_mr(size( ptht, 1 ), size( ptht, 2 ), size( ptht, 3 ) ) ) -allocate( zz_rvheni (size( ptht, 1 ), size( ptht, 2 ), size( ptht, 3 ) ) ) -allocate( zz_lvfact (size( ptht, 1 ), size( ptht, 2 ), size( ptht, 3 ) ) ) -allocate( zz_lsfact (size( ptht, 1 ), size( ptht, 2 ), size( ptht, 3 ) ) ) -allocate( zlsfact3d (size( ptht, 1 ), size( ptht, 2 ), size( ptht, 3 ) ) ) +allocate( zz_rvheni_mr(jiu, jju, jku ) ) +allocate( zz_rvheni (jiu, jju, jku ) ) +allocate( zz_lvfact (jiu, jju, jku ) ) +allocate( zz_lsfact (jiu, jju, jku ) ) +allocate( zlsfact3d (jiu, jju, jku ) ) -allocate( ZHLC_HCF3D(size( ptht, 1 ), size( ptht, 2 ), size( ptht, 3 ) ) ) -allocate( ZHLC_LCF3D(size( ptht, 1 ), size( ptht, 2 ), size( ptht, 3 ) ) ) -allocate( ZHLC_HRC3D(size( ptht, 1 ), size( ptht, 2 ), size( ptht, 3 ) ) ) -allocate( ZHLC_LRC3D(size( ptht, 1 ), size( ptht, 2 ), size( ptht, 3 ) ) ) -allocate( ZHLI_HCF3D(size( ptht, 1 ), size( ptht, 2 ), size( ptht, 3 ) ) ) -allocate( ZHLI_LCF3D(size( ptht, 1 ), size( ptht, 2 ), size( ptht, 3 ) ) ) -allocate( ZHLI_HRI3D(size( ptht, 1 ), size( ptht, 2 ), size( ptht, 3 ) ) ) -allocate( ZHLI_LRI3D(size( ptht, 1 ), size( ptht, 2 ), size( ptht, 3 ) ) ) +allocate( ZHLC_HCF3D(jiu, jju, jku ) ) +allocate( ZHLC_LCF3D(jiu, jju, jku ) ) +allocate( ZHLC_HRC3D(jiu, jju, jku ) ) +allocate( ZHLC_LRC3D(jiu, jju, jku ) ) +allocate( ZHLI_HCF3D(jiu, jju, jku ) ) +allocate( ZHLI_LCF3D(jiu, jju, jku ) ) +allocate( ZHLI_HRI3D(jiu, jju, jku ) ) +allocate( ZHLI_LRI3D(jiu, jju, jku ) ) -allocate( zinpri(size( ptht, 1 ), size( ptht, 2 ) ) ) +allocate( zinpri(jiu, jju ) ) allocate( zrvt (imicro ) ) allocate( zrct (imicro ) ) @@ -934,15 +939,14 @@ allocate( zrh_tend(imicro, 10 ) ) allocate( zssi(imicro ) ) -allocate( zw_rvs(size( ptht, 1 ), size( ptht, 2 ), size( ptht, 3 ) ) ) -allocate( zw_rcs(size( ptht, 1 ), size( ptht, 2 ), size( ptht, 3 ) ) ) -allocate( zw_rrs(size( ptht, 1 ), size( ptht, 2 ), size( ptht, 3 ) ) ) -allocate( zw_ris(size( ptht, 1 ), size( ptht, 2 ), size( ptht, 3 ) ) ) -allocate( zw_rss(size( ptht, 1 ), size( ptht, 2 ), size( ptht, 3 ) ) ) -allocate( zw_rgs(size( ptht, 1 ), size( ptht, 2 ), size( ptht, 3 ) ) ) -allocate( zw_rhs(size( ptht, 1 ), size( ptht, 2 ), size( ptht, 3 ) ) ) -allocate( zw_ths(size( ptht, 1 ), size( ptht, 2 ), size( ptht, 3 ) ) ) - +allocate( zw_rvs(jiu, jju, jku ) ) +allocate( zw_rcs(jiu, jju, jku ) ) +allocate( zw_rrs(jiu, jju, jku ) ) +allocate( zw_ris(jiu, jju, jku ) ) +allocate( zw_rss(jiu, jju, jku ) ) +allocate( zw_rgs(jiu, jju, jku ) ) +allocate( zw_rhs(jiu, jju, jku ) ) +allocate( zw_ths(jiu, jju, jku ) ) allocate( ZTEMP_BUD(JIU,JJU,JKU) ) #else !Pin positions in the pools of MNH memory @@ -955,22 +959,22 @@ CALL MNH_MEM_GET( i3, imicro ) CALL MNH_MEM_GET( zw, size( pexnref, 1 ), size( pexnref, 2 ), size( pexnref, 3 ) ) CALL MNH_MEM_GET( zt, size( pexnref, 1 ), size( pexnref, 2 ), size( pexnref, 3 ) ) -CALL MNH_MEM_GET( zz_rvheni_mr, size( ptht, 1 ), size( ptht, 2 ), size( ptht, 3 ) ) -CALL MNH_MEM_GET( zz_rvheni, size( ptht, 1 ), size( ptht, 2 ), size( ptht, 3 ) ) -CALL MNH_MEM_GET( zz_lvfact, size( ptht, 1 ), size( ptht, 2 ), size( ptht, 3 ) ) -CALL MNH_MEM_GET( zz_lsfact, size( ptht, 1 ), size( ptht, 2 ), size( ptht, 3 ) ) -CALL MNH_MEM_GET( zlsfact3d, size( ptht, 1 ), size( ptht, 2 ), size( ptht, 3 ) ) +CALL MNH_MEM_GET( zz_rvheni_mr, jiu, jju, jku ) +CALL MNH_MEM_GET( zz_rvheni, jiu, jju, jku ) +CALL MNH_MEM_GET( zz_lvfact, jiu, jju, jku ) +CALL MNH_MEM_GET( zz_lsfact, jiu, jju, jku ) +CALL MNH_MEM_GET( zlsfact3d, jiu, jju, jku ) -CALL MNH_MEM_GET( ZHLC_HCF3D, size( ptht, 1 ), size( ptht, 2 ), size( ptht, 3 ) ) -CALL MNH_MEM_GET( ZHLC_LCF3D, size( ptht, 1 ), size( ptht, 2 ), size( ptht, 3 ) ) -CALL MNH_MEM_GET( ZHLC_HRC3D, size( ptht, 1 ), size( ptht, 2 ), size( ptht, 3 ) ) -CALL MNH_MEM_GET( ZHLC_LRC3D, size( ptht, 1 ), size( ptht, 2 ), size( ptht, 3 ) ) -CALL MNH_MEM_GET( ZHLI_HCF3D, size( ptht, 1 ), size( ptht, 2 ), size( ptht, 3 ) ) -CALL MNH_MEM_GET( ZHLI_LCF3D, size( ptht, 1 ), size( ptht, 2 ), size( ptht, 3 ) ) -CALL MNH_MEM_GET( ZHLI_HRI3D, size( ptht, 1 ), size( ptht, 2 ), size( ptht, 3 ) ) -CALL MNH_MEM_GET( ZHLI_LRI3D, size( ptht, 1 ), size( ptht, 2 ), size( ptht, 3 ) ) +CALL MNH_MEM_GET( ZHLC_HCF3D, jiu, jju, jku ) +CALL MNH_MEM_GET( ZHLC_LCF3D, jiu, jju, jku ) +CALL MNH_MEM_GET( ZHLC_HRC3D, jiu, jju, jku ) +CALL MNH_MEM_GET( ZHLC_LRC3D, jiu, jju, jku ) +CALL MNH_MEM_GET( ZHLI_HCF3D, jiu, jju, jku ) +CALL MNH_MEM_GET( ZHLI_LCF3D, jiu, jju, jku ) +CALL MNH_MEM_GET( ZHLI_HRI3D, jiu, jju, jku ) +CALL MNH_MEM_GET( ZHLI_LRI3D, jiu, jju, jku ) -CALL MNH_MEM_GET( zinpri, size( ptht, 1 ), size( ptht, 2 ) ) +CALL MNH_MEM_GET( zinpri, jiu, jju ) CALL MNH_MEM_GET( zrvt , imicro ) CALL MNH_MEM_GET( zrct , imicro ) @@ -1143,17 +1147,15 @@ CALL MNH_MEM_GET( zrh_tend, imicro, 10 ) CALL MNH_MEM_GET( zssi, imicro ) -CALL MNH_MEM_GET( zw_rvs, size( ptht, 1 ), size( ptht, 2 ), size( ptht, 3 ) ) -CALL MNH_MEM_GET( zw_rcs, size( ptht, 1 ), size( ptht, 2 ), size( ptht, 3 ) ) -CALL MNH_MEM_GET( zw_rrs, size( ptht, 1 ), size( ptht, 2 ), size( ptht, 3 ) ) -CALL MNH_MEM_GET( zw_ris, size( ptht, 1 ), size( ptht, 2 ), size( ptht, 3 ) ) -CALL MNH_MEM_GET( zw_rss, size( ptht, 1 ), size( ptht, 2 ), size( ptht, 3 ) ) -CALL MNH_MEM_GET( zw_rgs, size( ptht, 1 ), size( ptht, 2 ), size( ptht, 3 ) ) -CALL MNH_MEM_GET( zw_rhs, size( ptht, 1 ), size( ptht, 2 ), size( ptht, 3 ) ) -CALL MNH_MEM_GET( zw_ths, size( ptht, 1 ), size( ptht, 2 ), size( ptht, 3 ) ) - +CALL MNH_MEM_GET( zw_rvs, jiu, jju, jku ) +CALL MNH_MEM_GET( zw_rcs, jiu, jju, jku ) +CALL MNH_MEM_GET( zw_rrs, jiu, jju, jku ) +CALL MNH_MEM_GET( zw_ris, jiu, jju, jku ) +CALL MNH_MEM_GET( zw_rss, jiu, jju, jku ) +CALL MNH_MEM_GET( zw_rgs, jiu, jju, jku ) +CALL MNH_MEM_GET( zw_rhs, jiu, jju, jku ) +CALL MNH_MEM_GET( zw_ths, jiu, jju, jku ) CALL MNH_MEM_GET( ZTEMP_BUD, JIU, JJU, JKU ) - !$acc data present( I1, I2, I3, & !$acc & ZW, ZT, ZZ_RVHENI_MR, ZZ_RVHENI, ZZ_LVFACT, ZZ_LSFACT, ZLSFACT3D, ZINPRI, & !$acc & ZRVT, ZRCT, ZRRT, ZRIT, ZRST, ZRGT, ZRHT, ZCIT, ZTHT, ZRHODREF, ZZT, ZPRES, ZEXN, & @@ -1184,13 +1186,13 @@ CALL MNH_MEM_GET( ZTEMP_BUD, JIU, JJU, JKU ) !------------------------------------------------------------------------------- if ( lbu_enable ) then if ( lbudget_th ) then - !$acc kernels present(ZTEMP_BUD) + !$acc kernels present_cr(ZTEMP_BUD) ZTEMP_BUD(:,:,:) = pths(:, :, :) * prhodj(:, :, :) !$acc end kernels call Budget_store_init( tbudgets(NBUDGET_TH), 'HENU', ZTEMP_BUD(:,:,:) ) end if if ( lbudget_rv ) then - !$acc kernels present(ZTEMP_BUD) + !$acc kernels present_cr(ZTEMP_BUD) ZTEMP_BUD(:,:,:) = prvs(:, :, :) * prhodj(:, :, :) !$acc end kernels call Budget_store_init( tbudgets(NBUDGET_RV), 'HENU', ZTEMP_BUD(:,:,:) ) @@ -1202,7 +1204,13 @@ end if ! ----------------------- ! CALL GET_INDICE_ll (IIB,IJB,IIE,IJE) +#ifdef MNH_COMPILER_CCE +!$acc kernels present(ZRS_TEND,ZRG_TEND,ZRH_TEND,ZRCHONI,ZRVDEPS,ZRIAGGS,ZRIAUTS, & +!$acc & ZRVDEPG,ZRCAUTR,ZRCACCR,ZRREVAV,ZRSMLTG,ZRCMLTSR,ZRICFRRG, & +!$acc & ZRRCFRIG,ZRICFRR,ZRGMLTR,ZRHMLTR,ZRCBERI) +#else !$acc kernels +#endif IKB=KKA+JPVEXT*KKL IKE=KKU-JPVEXT*KKL IKTB=1+JPVEXT @@ -1277,37 +1285,37 @@ IF(.NOT. LSEDIM_AFTER) THEN !* 2.1 sedimentation ! if ( lbudget_rc .and. osedic ) then - !$acc kernels present(ZTEMP_BUD) + !$acc kernels present_cr(ZTEMP_BUD) ZTEMP_BUD(:,:,:) = prcs(:, :, :) * prhodj(:, :, :) !$acc end kernels call Budget_store_init( tbudgets(NBUDGET_RC), 'SEDI', ZTEMP_BUD(:,:,:) ) end if if ( lbudget_rr ) then - !$acc kernels present(ZTEMP_BUD) + !$acc kernels present_cr(ZTEMP_BUD) ZTEMP_BUD(:,:,:) = prrs(:, :, :) * prhodj(:, :, :) !$acc end kernels call Budget_store_init( tbudgets(NBUDGET_RR), 'SEDI', ZTEMP_BUD(:,:,:) ) end if if ( lbudget_ri ) then - !$acc kernels present(ZTEMP_BUD) + !$acc kernels present_cr(ZTEMP_BUD) ZTEMP_BUD(:,:,:) = pris(:, :, :) * prhodj(:, :, :) !$acc end kernels call Budget_store_init( tbudgets(NBUDGET_RI), 'SEDI', ZTEMP_BUD(:,:,:) ) end if if ( lbudget_rs ) then - !$acc kernels present(ZTEMP_BUD) + !$acc kernels present_cr(ZTEMP_BUD) ZTEMP_BUD(:,:,:) = prss(:, :, :) * prhodj(:, :, :) !$acc end kernels call Budget_store_init( tbudgets(NBUDGET_RS), 'SEDI', ZTEMP_BUD(:,:,:) ) end if if ( lbudget_rg ) then - !$acc kernels present(ZTEMP_BUD) + !$acc kernels present_cr(ZTEMP_BUD) ZTEMP_BUD(:,:,:) = prgs(:, :, :) * prhodj(:, :, :) !$acc end kernels call Budget_store_init( tbudgets(NBUDGET_RG), 'SEDI', ZTEMP_BUD(:,:,:) ) end if if ( lbudget_rh ) then - !$acc kernels present(ZTEMP_BUD) + !$acc kernels present_cr(ZTEMP_BUD) ZTEMP_BUD(:,:,:) = prhs(:, :, :) * prhodj(:, :, :) !$acc end kernels call Budget_store_init( tbudgets(NBUDGET_RH), 'SEDI', ZTEMP_BUD(:,:,:) ) @@ -1317,7 +1325,7 @@ IF(.NOT. LSEDIM_AFTER) THEN !Remark: the 2 source terms SEDI and DEPO could be mixed and stored in the same source term (SEDI) ! if osedic=T and ldeposc=T (a warning is printed in ini_budget in that case) if ( lbudget_rc .and. ldeposc .and. .not.osedic ) then - !$acc kernels present(ZTEMP_BUD) + !$acc kernels present_cr(ZTEMP_BUD) ZTEMP_BUD(:,:,:) = prcs(:, :, :) * prhodj(:, :, :) !$acc end kernels call Budget_store_init( tbudgets(NBUDGET_RC), 'DEPO', ZTEMP_BUD(:,:,:) ) @@ -1394,37 +1402,37 @@ IF(.NOT. LSEDIM_AFTER) THEN !* 2.2 budget storage ! if ( lbudget_rc .and. osedic ) then - !$acc kernels present(ZTEMP_BUD) + !$acc kernels present_cr(ZTEMP_BUD) ZTEMP_BUD(:,:,:) = prcs(:, :, :) * prhodj(:, :, :) !$acc end kernels call Budget_store_end( tbudgets(NBUDGET_RC), 'SEDI', ZTEMP_BUD(:,:,:) ) end if if ( lbudget_rr ) then - !$acc kernels present(ZTEMP_BUD) + !$acc kernels present_cr(ZTEMP_BUD) ZTEMP_BUD(:,:,:) = prrs(:, :, :) * prhodj(:, :, :) !$acc end kernels call Budget_store_end( tbudgets(NBUDGET_RR), 'SEDI', ZTEMP_BUD(:,:,:) ) end if if ( lbudget_ri ) then - !$acc kernels present(ZTEMP_BUD) + !$acc kernels present_cr(ZTEMP_BUD) ZTEMP_BUD(:,:,:) = pris(:, :, :) * prhodj(:, :, :) !$acc end kernels call Budget_store_end( tbudgets(NBUDGET_RI), 'SEDI', ZTEMP_BUD(:,:,:) ) end if if ( lbudget_rs ) then - !$acc kernels present(ZTEMP_BUD) + !$acc kernels present_cr(ZTEMP_BUD) ZTEMP_BUD(:,:,:) = prss(:, :, :) * prhodj(:, :, :) !$acc end kernels call Budget_store_end( tbudgets(NBUDGET_RS), 'SEDI', ZTEMP_BUD(:,:,:) ) end if if ( lbudget_rg ) then - !$acc kernels present(ZTEMP_BUD) + !$acc kernels present_cr(ZTEMP_BUD) ZTEMP_BUD(:,:,:) = prgs(:, :, :) * prhodj(:, :, :) !$acc end kernels call Budget_store_end( tbudgets(NBUDGET_RG), 'SEDI', ZTEMP_BUD(:,:,:) ) end if if ( lbudget_rh ) then - !$acc kernels present(ZTEMP_BUD) + !$acc kernels present_cr(ZTEMP_BUD) ZTEMP_BUD(:,:,:) = prhs(:, :, :) * prhodj(:, :, :) !$acc end kernels call Budget_store_end( tbudgets(NBUDGET_RH), 'SEDI', ZTEMP_BUD(:,:,:) ) @@ -1433,7 +1441,7 @@ IF(.NOT. LSEDIM_AFTER) THEN !If osedic=T and ldeposc=T, DEPO is in fact mixed and stored with the SEDI source term !(a warning is printed in ini_budget in that case) if ( lbudget_rc .and. ldeposc .and. .not.osedic) then - !$acc kernels present(ZTEMP_BUD) + !$acc kernels present_cr(ZTEMP_BUD) ZTEMP_BUD(:,:,:) = prcs(:, :, :) * prhodj(:, :, :) !$acc end kernels call Budget_store_end( tbudgets(NBUDGET_RC), 'DEPO', ZTEMP_BUD(:,:,:) ) @@ -1455,10 +1463,21 @@ CALL COUNTJV_DEVICE(ODMICRO(:,:,:),I1(:),I2(:),I3(:),IMICRO) !Packing GTEST=.false. IF(HSUBG_AUCV_RC=='PDF ' .AND. CSUBG_PR_PDF=='SIGM') GTEST=.true. +#ifdef MNH_COMPILER_CCE +!$acc kernels present(ZSIGMA_RC,ZRHT,ZEXT_RH, & +!$acc & ZTOT_RVHENI, ZTOT_RCHONI, ZTOT_RRHONG, ZTOT_RVDEPS, ZTOT_RIAGGS, ZTOT_RIAUTS, ZTOT_RVDEPG, ZTOT_RCAUTR, & +!$acc & ZTOT_RCACCR, ZTOT_RREVAV, ZTOT_RCRIMSS, ZTOT_RCRIMSG, ZTOT_RSRIMCG, ZTOT_RIMLTC, ZTOT_RCBERI, ZTOT_RHMLTR, & +!$acc & ZTOT_RSMLTG, ZTOT_RCMLTSR, ZTOT_RRACCSS, ZTOT_RRACCSG, ZTOT_RSACCRG, ZTOT_RICFRRG, ZTOT_RRCFRIG, & +!$acc & ZTOT_RICFRR, ZTOT_RCWETG, ZTOT_RIWETG, ZTOT_RRWETG, ZTOT_RSWETG, ZTOT_RCDRYG, ZTOT_RIDRYG, ZTOT_RRDRYG, & +!$acc & ZTOT_RSDRYG, ZTOT_RWETGH, ZTOT_RGMLTR, ZTOT_RCWETH, ZTOT_RIWETH, ZTOT_RSWETH, ZTOT_RGWETH, ZTOT_RRWETH, & +!$acc & ZTOT_RCDRYH, ZTOT_RIDRYH, ZTOT_RSDRYH, ZTOT_RRDRYH, ZTOT_RGDRYH, ZTOT_RDRYHG) +#else !$acc kernels +#endif + IF(IMICRO>0) THEN -!$acc loop independent - DO JL=1, IMICRO +!acc loop independent + !$mnh_do_concurrent(JL=1:IMICRO) ZRVT(JL) = PRVT(I1(JL),I2(JL),I3(JL)) ZRCT(JL) = PRCT(I1(JL),I2(JL),I3(JL)) ZRRT(JL) = PRRT(I1(JL),I2(JL),I3(JL)) @@ -1487,7 +1506,7 @@ IF(IMICRO>0) THEN ELSE ZHLI_LCF(JL)=0. ENDIF - ENDDO + !$mnh_end_do() IF(GEXT_TEND) THEN !$acc loop independent DO JL=1, IMICRO @@ -1589,7 +1608,7 @@ IF(XTSTEP_TS/=0.)THEN INB_ITER_MAX=MAX(NMAXITER, INB_ITER_MAX) !For the case XMRSTEP/=0. at the same time ENDIF !acc end kernels -!$acc kernels +!$acc kernels present_cr(IITER,ZTIME) IITER(:)=0 ZTIME(:)=0. ! Current integration time (all points may have a different integration time) !$acc end kernels @@ -1977,7 +1996,7 @@ ENDDO ! ! !$acc kernels IF(IMICRO>0) THEN -!$acc kernels +!$acc kernels present_cr(ZHLC_HCF3D,ZHLC_LCF3D,ZHLC_HRC3D,ZHLC_LRC3D,ZHLI_HCF3D,ZHLI_LCF3D,ZHLI_HRI3D,ZHLI_LRI3D) ZHLC_HCF3D(:,:,:)=0. ZHLC_LCF3D(:,:,:)=0. ZHLC_HRC3D(:,:,:)=0. @@ -2000,7 +2019,7 @@ IF(IMICRO>0) THEN END DO !$acc end kernels ELSE -!$acc kernels +!$acc kernels present_cr(PRAINFR,ZHLC_HCF3D,ZHLC_LCF3D,ZHLC_HRC3D,ZHLC_LRC3D,ZHLI_HCF3D,ZHLI_LCF3D,ZHLI_HRI3D,ZHLI_LRI3D,PCIT) PRAINFR(:,:,:)=0. ZHLC_HCF3D(:,:,:)=0. ZHLC_LCF3D(:,:,:)=0. @@ -2013,7 +2032,7 @@ ELSE PCIT(:,:,:) = 0. !$acc end kernels ENDIF -!$acc kernels +!$acc kernels present_cr(PEVAP3D) IF(OWARM) THEN PEVAP3D(:,:,:) = 0. !$acc loop independent @@ -2053,19 +2072,19 @@ ENDDO if ( lbu_enable ) then !Note: there is an other contribution for HENU later if ( lbudget_th ) then - !$acc kernels present(ZTEMP_BUD) + !$acc kernels present_cr(ZTEMP_BUD) ZTEMP_BUD(:,:,:) = pths(:, :, :) * prhodj(:, :, :) !$acc end kernels call Budget_store_end( tbudgets(NBUDGET_TH), 'HENU', ZTEMP_BUD(:,:,:) ) end if if ( lbudget_rv ) then - !$acc kernels present(ZTEMP_BUD) + !$acc kernels present_cr(ZTEMP_BUD) ZTEMP_BUD(:,:,:) = prvs(:, :, :) * prhodj(:, :, :) !$acc end kernels call Budget_store_end( tbudgets(NBUDGET_RV), 'HENU', ZTEMP_BUD(:,:,:) ) end if if ( lbudget_ri ) then - !$acc kernels present(ZTEMP_BUD) + !$acc kernels present_cr(ZTEMP_BUD) ZTEMP_BUD(:,:,:) = zz_rvheni(:, :, :) * prhodj(:, :, :) !$acc end kernels call Budget_store_add( tbudgets(NBUDGET_RI), 'HENU', ZTEMP_BUD(:,:,:) ) @@ -2227,7 +2246,7 @@ ELSE END DO !$acc end kernels ! -!$acc kernels +!$acc kernels present_cr(ZW_RHS) ZW_RVS(:,:,:) = ( ZW_RVS(:,:,:) - PRVT(:,:,:) ) * ZINV_TSTEP ZW_RCS(:,:,:) = ( ZW_RCS(:,:,:) - PRCT(:,:,:) ) * ZINV_TSTEP ZW_RRS(:,:,:) = ( ZW_RRS(:,:,:) - PRRT(:,:,:) ) * ZINV_TSTEP @@ -2801,7 +2820,6 @@ CONTAINS REAL, DIMENSION(:,:,:), POINTER, CONTIGUOUS :: ZW #endif ! -!$acc data present( PRV, PRC, PRR, PRI, PRS, PRG, PTH, PLVFACT, PLSFACT, PRH ) ! IF (MPPDB_INITIALIZED) THEN !Check all IN arrays @@ -2818,6 +2836,8 @@ CONTAINS CALL MPPDB_CHECK(PTH,"CORRECT_NEGATIVITIES beg:PTH") END IF +!$acc data present( PRV, PRC, PRR, PRI, PRS, PRG, PTH, PLVFACT, PLSFACT ) + #ifndef MNH_OPENACC allocate( gw(size( prv, 1 ), size( prv, 2 ), size( prv, 3 ) ) ) allocate( zw(size( prv, 1 ), size( prv, 2 ), size( prv, 3 ) ) ) @@ -2831,6 +2851,7 @@ CONTAINS !$acc data present( GW, ZW ) #endif +!$acc data present( PRH ) if ( present( PRH ) ) !$acc kernels !We correct negativities with conservation ! 1) deal with negative values for mixing ratio, except for vapor @@ -2926,6 +2947,7 @@ CONTAINS ENDDO ENDIF !$acc end kernels +!$acc end data !$acc end data @@ -2934,6 +2956,8 @@ CONTAINS CALL MNH_MEM_RELEASE() #endif +!$acc end data + IF (MPPDB_INITIALIZED) THEN !Check all INOUT arrays CALL MPPDB_CHECK(PRV,"CORRECT_NEGATIVITIES end:PRV") @@ -2946,9 +2970,8 @@ CONTAINS CALL MPPDB_CHECK(PTH,"CORRECT_NEGATIVITIES end:PTH") END IF -!$acc end data - END SUBROUTINE CORRECT_NEGATIVITIES ! +#endif END SUBROUTINE RAIN_ICE_RED diff --git a/src/ZSOLVER/turb.f90 b/src/ZSOLVER/turb.f90 index b4a4fff2c118bea73d1f0c2b74556ab5007ce139..c72f7d21d069a9b13a49a2e85337988ac7d0226d 100644 --- a/src/ZSOLVER/turb.f90 +++ b/src/ZSOLVER/turb.f90 @@ -14,6 +14,10 @@ module mode_turb #if defined(MNH_BITREP) || defined(MNH_BITREP_OMP) use modi_bitrep #endif +#if defined(MNH_COMPILER_CCE) && defined(MNH_BITREP_OMP) +!$mnh_undef(LOOP) +!$mnh_undef(OPENACC) +#endif implicit none @@ -710,15 +714,18 @@ GTURBLEN_BL89_TURBLEN_RM17_TURBLEN_ADAP_ORMC01 = & HTURBLEN=='BL89' .OR. HTURBLEN=='RM17' .OR. HTURBLEN == 'ADAP' .OR. ORMC01 ! !$acc update device(PTHLT,PRT) -!$acc kernels present_cr(ZCOEF_DISS,ZTHLM,ZRM) !Copy data into ZTHLM and ZRM only if needed IF (GTURBLEN_BL89_TURBLEN_RM17_TURBLEN_ADAP_ORMC01) THEN - DO CONCURRENT(JI=1:JIU,JJ=1:JJU,JK=1:JKU) - ZTHLM(JI,JJ,JK) = PTHLT(JI,JJ,JK) - ZRM(JI,JJ,JK,:) = PRT(JI,JJ,JK,:) - END DO +!$acc kernels present_cr(ZTHLM,ZRM) + ZTHLM(:,:,:) = PTHLT(:,:,:) + ZRM(:,:,:,:) = PRT(:,:,:,:) + !DO CONCURRENT(JI=1:JIU,JJ=1:JJU,JK=1:JKU) + !ZTHLM(JI,JJ,JK) = PTHLT(JI,JJ,JK) + !ZRM(JI,JJ,JK,:) = PRT(JI,JJ,JK,:) + !END DO +!$acc end kernels END IF -! +!$acc kernels present_cr(ZTRH,ZCOEF_DISS,ZTHLM,ZRM,zcp) ZTRH(:, :, : ) = XUNDEF ! !---------------------------------------------------------------------------- @@ -753,9 +760,9 @@ ELSE #if !defined(MNH_BITREP) && !defined(MNH_BITREP_OMP) ZEXN(:,:,:) = (PPABST(:,:,:)/XP00) ** (XRD/XCPD) #else -DO CONCURRENT(JI=1:JIU,JJ=1:JJU,JK=1:JKU) +!$mnh_do_concurrent(JI=1:JIU,JJ=1:JJU,JK=1:JKU) ZEXN(JI,JJ,JK) = BR_POW(PPABST(JI,JJ,JK)/XP00,XRD/XCPD) -END DO +!$mnh_end_do() #endif END IF ! @@ -803,19 +810,21 @@ IF (KRRL >=1) THEN CALL COMPUTE_FUNCTION_THERMO(XALPI,XBETAI,XGAMI,XLSTT,XCI,ZT,ZEXN,ZCP, & ZLSOCPEXNM,ZAMOIST_ICE,ZATHETA_ICE) ! -!$acc kernels present( zamoist, zatheta, zlocpexnm, zlvocpexnm, zlsocpexnm, zamoist_ice, zatheta_ice ) - DO CONCURRENT(JI=1:JIU,JJ=1:JJU,JK=1:JKU) +!$acc kernels present_cr( zamoist, zatheta, zlocpexnm, zlvocpexnm, zlsocpexnm, zamoist_ice, zatheta_ice ) + !$mnh_do_concurrent(JI=1:JIU,JJ=1:JJU,JK=1:JKU) IF (PRT(JI,JJ,JK,2)+PRT(JI,JJ,JK,4)>0.0) THEN ZFRAC_ICE(JI,JJ,JK) = PRT(JI,JJ,JK,4) / ( PRT(JI,JJ,JK,2)+PRT(JI,JJ,JK,4) ) END IF - END DO + !$mnh_end_do() ! + !$mnh_expand_array(JI=1:JIU,JJ=1:JJU,JK=1:JKU) ZLOCPEXNM(:,:,:) = (1.0-ZFRAC_ICE(:,:,:))*ZLVOCPEXNM(:,:,:) & +ZFRAC_ICE(:,:,:) *ZLSOCPEXNM(:,:,:) ZAMOIST(:,:,:) = (1.0-ZFRAC_ICE(:,:,:))*ZAMOIST(:,:,:) & +ZFRAC_ICE(:,:,:) *ZAMOIST_ICE(:,:,:) ZATHETA(:,:,:) = (1.0-ZFRAC_ICE(:,:,:))*ZATHETA(:,:,:) & +ZFRAC_ICE(:,:,:) *ZATHETA_ICE(:,:,:) + !$mnh_end_expand_array() !$acc end kernels !$acc end data @@ -859,7 +868,7 @@ IF (KRRL >=1) THEN END IF ! ELSE -!$acc kernels present( zlocpexnm ) +!$acc kernels present_cr( zlocpexnm ) ZLOCPEXNM=0. !$acc end kernels END IF ! loop end on KRRL >= 1 @@ -868,9 +877,9 @@ END IF ! loop end on KRRL >= 1 ! !$acc update device(PRRS,PRTHLS) IF ( KRRL >= 1 ) THEN -!$acc kernels present( zlocpexnm ) +!$acc kernels present_cr( zlocpexnm ) IF ( KRRI >= 1 ) THEN - DO CONCURRENT (JI=1:JIU,JJ=1:JJU,JK=1:JKU) + !$mnh_do_concurrent(JI=1:JIU,JJ=1:JJU,JK=1:JKU) ! Rnp at t PRT(JI,JJ,JK,1) = PRT(JI,JJ,JK,1) + PRT(JI,JJ,JK,2) + PRT(JI,JJ,JK,4) PRRS(JI,JJ,JK,1) = PRRS(JI,JJ,JK,1) + PRRS(JI,JJ,JK,2) + PRRS(JI,JJ,JK,4) @@ -879,16 +888,16 @@ IF ( KRRL >= 1 ) THEN - ZLSOCPEXNM(JI,JJ,JK) * PRT(JI,JJ,JK,4) PRTHLS(JI,JJ,JK) = PRTHLS(JI,JJ,JK) - ZLVOCPEXNM(JI,JJ,JK) * PRRS(JI,JJ,JK,2) & - ZLSOCPEXNM(JI,JJ,JK) * PRRS(JI,JJ,JK,4) - ENDDO + !$mnh_end_do() ELSE - DO CONCURRENT (JI=1:JIU,JJ=1:JJU,JK=1:JKU) + !$mnh_do_concurrent(JI=1:JIU,JJ=1:JJU,JK=1:JKU) ! Rnp at t PRT(JI,JJ,JK,1) = PRT(JI,JJ,JK,1) + PRT(JI,JJ,JK,2) PRRS(JI,JJ,JK,1) = PRRS(JI,JJ,JK,1) + PRRS(JI,JJ,JK,2) ! Theta_l at t PTHLT(JI,JJ,JK) = PTHLT(JI,JJ,JK) - ZLOCPEXNM(JI,JJ,JK) * PRT(JI,JJ,JK,2) PRTHLS(JI,JJ,JK) = PRTHLS(JI,JJ,JK) - ZLOCPEXNM(JI,JJ,JK) * PRRS(JI,JJ,JK,2) - ENDDO + !$mnh_end_do() END IF !$acc end kernels END IF @@ -905,7 +914,7 @@ SELECT CASE (HTURBLEN) ! ------------------ CASE ('BL89') -!$acc kernels +!$acc kernels present_cr(ZSHEAR) ZSHEAR(:, :, : ) = 0. !$acc end kernels CALL BL89(KKA,KKU,KKL,PZZ,PDZZ,PTHVREF,ZTHLM,KRR,ZRM,PTKET,ZSHEAR,PLEM) @@ -996,7 +1005,7 @@ END IF !* 3.6 Dissipative length ! ------------------ ! -!$acc kernels +!$acc kernels present_cr(ZLEPS) ZLEPS(:,:,:)=PLEM(:,:,:) ! !* 3.7 Correction in the Surface Boundary Layer (Redelsperger 2001) @@ -1009,11 +1018,13 @@ IF (ORMC01) THEN #ifdef MNH_OPENACC call Print_msg( NVERB_FATAL, 'GEN', 'TURB', 'OpenACC: ORMC01 not yet implemented' ) #endif +!$mnh_expand_array(JI=1:JIU,JJ=1:JJU) #if !defined(MNH_BITREP) && !defined(MNH_BITREP_OMP) ZUSTAR(:,:) = (PSFU(:,:)**2+PSFV(:,:)**2)**(0.25) #else ZUSTAR(:,:) = BR_POW( BR_P2( PSFU(:,:) ) + BR_P2( PSFV(:,:) ), 0.25 ) #endif +!$mnh_end_expand_array() IF (KRR>0) THEN ZLMO(:,:) = LMO( ZUSTAR(:,:), ZTHLM(:,:,IKB), ZRM(:,:,IKB,1), PSFTH(:,:), PSFRV(:,:) ) ELSE @@ -1060,7 +1071,7 @@ ENDIF ! CALL UPDATE_ROTATE_WIND(ZUSLOPE,ZVSLOPE) ELSE -!$acc kernels +!$acc kernels present_cr(ZUSLOPE,ZVSLOPE) ZUSLOPE=PUT(:,:,KKA) ZVSLOPE=PVT(:,:,KKA) !$acc end kernels @@ -1074,11 +1085,10 @@ ENDIF ZCDUEFF(:,:) =-SQRT ( (PSFU(:,:)**2 + PSFV(:,:)**2) / & (XMNH_TINY + ZUSLOPE(:,:)**2 + ZVSLOPE(:,:)**2 ) ) #else - !$acc_nv loop independent collapse(2) - DO CONCURRENT ( JI=1:JIU,JJ=1:JJU ) + !$mnh_do_concurrent(JI=1:JIU,JJ=1:JJU) ZCDUEFF(JI,JJ) =-SQRT ( (BR_P2(PSFU(JI,JJ)) + BR_P2(PSFV(JI,JJ))) / & (XMNH_TINY + BR_P2(ZUSLOPE(JI,JJ)) + BR_P2(ZVSLOPE(JI,JJ)) ) ) - END DO + !$mnh_end_do() #endif !$acc end kernels ! @@ -1123,7 +1133,7 @@ IF (HTOM=='TM06') THEN CALL GZ_W_M_DEVICE(ZMTH2,PDZZ,ZFTH2) ! -d(w'th'2 )/dz !CALL GZ_W_M_DEVICE(ZMR2, PDZZ,ZFR2) ! -d(w'r'2 )/dz !CALL GZ_W_M_DEVICE(ZMTHR,PDZZ,ZFTHR) ! -d(w'th'r')/dz -!$acc kernels +!$acc kernels present_cr(ZFWTH,ZFTH2,ZFWR,ZFR2,ZFTHR) ZFWTH = -ZFWTH !ZFWR = -ZFWR ZFTH2 = -ZFTH2 @@ -1146,7 +1156,7 @@ IF (HTOM=='TM06') THEN ZFTHR = 0. !$acc end kernels ELSE -!$acc kernels +!$acc kernels present_cr(ZFWTH,ZFWR,ZFTH2,ZFR2,ZFTHR) ZFWTH(:,:,:) = 0. ZFWR(:,:,:) = 0. ZFTH2(:,:,:) = 0. @@ -1166,13 +1176,13 @@ if ( lbudget_w ) call Budget_store_init( tbudgets(NBUDGET_W ), 'VTURB', prws ( if ( lbudget_th ) then if ( krri >= 1 .and. krrl >= 1 ) then - !$acc kernels present(ZTEMP_BUD) + !$acc kernels present_cr(ZTEMP_BUD) ZTEMP_BUD(:,:,:) = prthls(:, :, :) + zlvocpexnm(:, :, :) * prrs(:, :, :, 2) & + zlsocpexnm(:, :, :) * prrs(:, :, :, 4) !$acc end kernels call Budget_store_init( tbudgets(NBUDGET_TH), 'VTURB', ZTEMP_BUD(:,:,:) ) else if ( krrl >= 1 ) then - !$acc kernels present( ZTEMP_BUD, zlocpexnm ) + !$acc kernels present_cr( ZTEMP_BUD, zlocpexnm ) ZTEMP_BUD(:,:,:) = prthls(:, :, :) + zlocpexnm(:, :, :) * prrs(:, :, :, 2) !$acc end kernels call Budget_store_init( tbudgets(NBUDGET_TH), 'VTURB', ZTEMP_BUD(:,:,:) ) @@ -1183,12 +1193,12 @@ end if if ( lbudget_rv ) then if ( krri >= 1 .and. krrl >= 1 ) then - !$acc kernels present(ZTEMP_BUD) + !$acc kernels present_cr(ZTEMP_BUD) ZTEMP_BUD(:,:,:) = prrs(:, :, :, 1) - prrs(:, :, :, 2) - prrs(:, :, :, 4) !$acc end kernels call Budget_store_init( tbudgets(NBUDGET_RV), 'VTURB', ZTEMP_BUD(:,:,:) ) else if ( krrl >= 1 ) then - !$acc kernels present(ZTEMP_BUD) + !$acc kernels present_cr(ZTEMP_BUD) ZTEMP_BUD(:,:,:) = prrs(:, :, :, 1) - prrs(:, :, :, 2) !$acc end kernels call Budget_store_init( tbudgets(NBUDGET_RV), 'VTURB', ZTEMP_BUD(:,:,:) ) @@ -1232,13 +1242,13 @@ if ( lbudget_w ) call Budget_store_end( tbudgets(NBUDGET_W), 'VTURB', prws(:, :, if ( lbudget_th ) then if ( krri >= 1 .and. krrl >= 1 ) then - !$acc kernels present(ZTEMP_BUD) + !$acc kernels present_cr(ZTEMP_BUD) ZTEMP_BUD(:,:,:) = prthls(:, :, :) + zlvocpexnm(:, :, :) * prrs(:, :, :, 2) & + zlsocpexnm(:, :, :) * prrs(:, :, :, 4) !$acc end kernels call Budget_store_end( tbudgets(NBUDGET_TH), 'VTURB', ZTEMP_BUD(:,:,:) ) else if ( krrl >= 1 ) then - !$acc kernels present(ZTEMP_BUD, zlocpexnm ) + !$acc kernels present_cr(ZTEMP_BUD, zlocpexnm ) ZTEMP_BUD(:,:,:) = prthls(:, :, :) + zlocpexnm(:, :, :) * prrs(:, :, :, 2) !$acc end kernels call Budget_store_end( tbudgets(NBUDGET_TH), 'VTURB', ZTEMP_BUD(:,:,:) ) @@ -1249,12 +1259,12 @@ end if if ( lbudget_rv ) then if ( krri >= 1 .and. krrl >= 1 ) then - !$acc kernels present(ZTEMP_BUD) + !$acc kernels present_cr(ZTEMP_BUD) ZTEMP_BUD(:,:,:) = prrs(:, :, :, 1) - prrs(:, :, :, 2) - prrs(:, :, :, 4) !$acc end kernels call Budget_store_end( tbudgets(NBUDGET_RV), 'VTURB', ZTEMP_BUD(:,:,:) ) else if ( krrl >= 1 ) then - !$acc kernels present(ZTEMP_BUD) + !$acc kernels present_cr(ZTEMP_BUD) ZTEMP_BUD(:,:,:) = prrs(:, :, :, 1) - prrs(:, :, :, 2) !$acc end kernels call Budget_store_end( tbudgets(NBUDGET_RV), 'VTURB', ZTEMP_BUD(:,:,:) ) @@ -1279,13 +1289,13 @@ if ( hturbdim == '3DIM' ) then if (lbudget_th) then if ( krri >= 1 .and. krrl >= 1 ) then - !$acc kernels present(ZTEMP_BUD) + !$acc kernels present_cr(ZTEMP_BUD) ZTEMP_BUD(:,:,:) = prthls(:, :, :) + zlvocpexnm(:, :, :) * prrs(:, :, :, 2) & + zlsocpexnm(:, :, :) * prrs(:, :, :, 4) !$acc end kernels call Budget_store_init( tbudgets(NBUDGET_TH), 'HTURB', ZTEMP_BUD(:,:,:) ) else if ( krrl >= 1 ) then - !$acc kernels present(ZTEMP_BUD, zlocpexnm ) + !$acc kernels present_cr(ZTEMP_BUD, zlocpexnm ) ZTEMP_BUD(:,:,:) = prthls(:, :, :) + zlocpexnm(:, :, :) * prrs(:, :, :, 2) !$acc end kernels call Budget_store_init( tbudgets(NBUDGET_TH), 'HTURB', ZTEMP_BUD(:,:,:) ) @@ -1296,12 +1306,12 @@ if ( hturbdim == '3DIM' ) then if ( lbudget_rv ) then if ( krri >= 1 .and. krrl >= 1 ) then - !$acc kernels present(ZTEMP_BUD) + !$acc kernels present_cr(ZTEMP_BUD) ZTEMP_BUD(:,:,:) = prrs(:, :, :, 1) - prrs(:, :, :, 2) - prrs(:, :, :, 4) !$acc end kernels call Budget_store_init( tbudgets(NBUDGET_RV), 'HTURB', ZTEMP_BUD(:,:,:) ) else if ( krrl >= 1 ) then - !$acc kernels present(ZTEMP_BUD) + !$acc kernels present_cr(ZTEMP_BUD) ZTEMP_BUD(:,:,:) = prrs(:, :, :, 1) - prrs(:, :, :, 2) !$acc end kernels call Budget_store_init( tbudgets(NBUDGET_RV), 'HTURB', ZTEMP_BUD(:,:,:) ) @@ -1341,13 +1351,13 @@ if ( hturbdim == '3DIM' ) then if ( lbudget_th ) then if ( krri >= 1 .and. krrl >= 1 ) then - !$acc kernels present(ZTEMP_BUD) + !$acc kernels present_cr(ZTEMP_BUD) ZTEMP_BUD(:,:,:) = prthls(:, :, :) + zlvocpexnm(:, :, :) * prrs(:, :, :, 2) & + zlsocpexnm(:, :, :) * prrs(:, :, :, 4) !$acc end kernels call Budget_store_end( tbudgets(NBUDGET_TH), 'HTURB', ZTEMP_BUD(:,:,:) ) else if ( krrl >= 1 ) then - !$acc kernels present(ZTEMP_BUD, zlocpexnm ) + !$acc kernels present_cr(ZTEMP_BUD, zlocpexnm ) ZTEMP_BUD(:,:,:) = prthls(:, :, :) + zlocpexnm(:, :, :) * prrs(:, :, :, 2) !$acc end kernels call Budget_store_end( tbudgets(NBUDGET_TH), 'HTURB', ZTEMP_BUD(:,:,:) ) @@ -1358,12 +1368,12 @@ if ( hturbdim == '3DIM' ) then if ( lbudget_rv ) then if ( krri >= 1 .and. krrl >= 1 ) then - !$acc kernels present(ZTEMP_BUD) + !$acc kernels present_cr(ZTEMP_BUD) ZTEMP_BUD(:,:,:) = prrs(:, :, :, 1) - prrs(:, :, :, 2) - prrs(:, :, :, 4) !$acc end kernels call Budget_store_end( tbudgets(NBUDGET_RV), 'HTURB', ZTEMP_BUD(:,:,:) ) else if ( krrl >= 1 ) then - !$acc kernels present(ZTEMP_BUD) + !$acc kernels present_cr(ZTEMP_BUD) ZTEMP_BUD(:,:,:) = prrs(:, :, :, 1) - prrs(:, :, :, 2) !$acc end kernels call Budget_store_end( tbudgets(NBUDGET_RV), 'HTURB', ZTEMP_BUD(:,:,:) ) @@ -1395,7 +1405,7 @@ end if PTHP = PTHP + XG / PTHVREF * MZF( PFLXZTHVMF ) #else CALL MZF_DEVICE( PFLXZTHVMF, ZTMP1_DEVICE ) -!$acc kernels +!$acc kernels present_cr(PTHP) PTHP(:,:,:) = PTHP(:,:,:) + XG / PTHVREF(:,:,:) * ZTMP1_DEVICE(:,:,:) !$acc end kernels #endif @@ -1477,7 +1487,7 @@ END IF ! IF ( KRRL >= 1 ) THEN IF ( KRRI >= 1 ) THEN -!$acc kernels +!$acc kernels present_cr(PRT,PRRS,PTHLT,PRTHLS) PRT(:,:,:,1) = PRT(:,:,:,1) - PRT(:,:,:,2) - PRT(:,:,:,4) PRRS(:,:,:,1) = PRRS(:,:,:,1) - PRRS(:,:,:,2) - PRRS(:,:,:,4) PTHLT(:,:,:) = PTHLT(:,:,:) + ZLVOCPEXNM(:,:,:) * PRT(:,:,:,2) & @@ -1495,7 +1505,7 @@ IF ( KRRL >= 1 ) THEN CALL MNH_MEM_RELEASE() #endif ELSE -!$acc kernels present( zlocpexnm ) +!$acc kernels present_cr(PRT,PRRS,PTHLT,PRTHLS, zlocpexnm ) PRT(:,:,:,1) = PRT(:,:,:,1) - PRT(:,:,:,2) PRRS(:,:,:,1) = PRRS(:,:,:,1) - PRRS(:,:,:,2) PTHLT(:,:,:) = PTHLT(:,:,:) + ZLOCPEXNM(:,:,:) * PRT(:,:,:,2) @@ -1529,7 +1539,7 @@ IF (LLES_CALL) THEN #ifndef MNH_OPENACC CALL LES_MEAN_SUBGRID((PSFU*PSFU+PSFV*PSFV)**0.25,X_LES_USTAR) #else -!$acc kernels +!$acc kernels present_cr(ZTMP1_DEVICE) ZTMP1_DEVICE(:,:,1) = (PSFU*PSFU+PSFV*PSFV)**0.25 !$acc end kernels CALL LES_MEAN_SUBGRID(ZTMP1_DEVICE(:,:,1),X_LES_USTAR) @@ -1575,17 +1585,17 @@ IF (LLES_CALL) THEN #else IF (HTURBDIM=="1DIM") THEN !$acc data copy(X_LES_SUBGRID_U2,X_LES_SUBGRID_V2,X_LES_SUBGRID_W2,X_LES_RES_ddz_Thl_SBG_W2) -!$acc kernels +!$acc kernels present_cr(ZTMP1_DEVICE) ZTMP1_DEVICE = 2./3.*PTKET !$acc end kernels CALL LES_MEAN_SUBGRID(ZTMP1_DEVICE,X_LES_SUBGRID_U2) -!$acc kernels +!$acc kernels present_cr(X_LES_SUBGRID_V2,X_LES_SUBGRID_W2) X_LES_SUBGRID_V2(:,:,:) = X_LES_SUBGRID_U2(:,:,:) X_LES_SUBGRID_W2(:,:,:) = X_LES_SUBGRID_U2(:,:,:) !$acc end kernels CALL GZ_M_W_DEVICE(KKA,KKU,KKL,PTHLT,PDZZ,ZTMP2_DEVICE) CALL MZF_DEVICE( ZTMP2_DEVICE, ZTMP3_DEVICE ) -!$acc kernels +!$acc kernels present_cr(ZTMP2_DEVICE) ZTMP2_DEVICE = ZTMP1_DEVICE*ZTMP3_DEVICE !$acc end kernels CALL LES_MEAN_SUBGRID(ZTMP2_DEVICE,X_LES_RES_ddz_Thl_SBG_W2) @@ -1594,7 +1604,7 @@ IF (LLES_CALL) THEN !$acc data copy(X_LES_RES_ddz_Rt_SBG_W2) CALL GZ_M_W_DEVICE(KKA,KKU,KKL,PRT(:,:,:,1),PDZZ,ZTMP2_DEVICE) CALL MZF_DEVICE( ZTMP2_DEVICE, ZTMP3_DEVICE ) -!$acc kernels +!$acc kernels present_cr(ZTMP2_DEVICE) ZTMP2_DEVICE = ZTMP1_DEVICE*PTKET*ZTMP3_DEVICE !$acc end kernels CALL LES_MEAN_SUBGRID(ZTMP2_DEVICE,X_LES_RES_ddz_Rt_SBG_W2) @@ -1604,7 +1614,7 @@ IF (LLES_CALL) THEN DO JSV=1,NSV CALL GZ_M_W_DEVICE(KKA,KKU,KKL,PSVT(:,:,:,JSV),PDZZ,ZTMP2_DEVICE) CALL MZF_DEVICE( ZTMP2_DEVICE, ZTMP3_DEVICE ) -!$acc kernels +!$acc kernels present_cr(ZTMP2_DEVICE) ZTMP2_DEVICE = ZTMP1_DEVICE*PTKET*ZTMP3_DEVICE !$acc end kernels CALL LES_MEAN_SUBGRID(ZTMP2_DEVICE,X_LES_RES_ddz_Sv_SBG_W2(:,:,:,JSV)) @@ -1623,7 +1633,7 @@ IF (LLES_CALL) THEN ! !* presso-correlations for subgrid Tke are equal to zero. ! -!$acc kernels +!$acc kernels present_cr(ZLEPS) ZLEPS = 0. !ZLEPS is used as a work array (not used anymore) !$acc end kernels CALL LES_MEAN_SUBGRID(ZLEPS,X_LES_SUBGRID_WP) @@ -1763,7 +1773,7 @@ geast = HLBCX(2) /= "CYCL" .AND. LEAST_ll() gsouth = HLBCY(1) /= "CYCL" .AND. LSOUTH_ll() gnorth = HLBCY(2) /= "CYCL" .AND. LNORTH_ll() -!$acc kernels +!$acc kernels present_cr(PUSLOPE,PVSLOPE) IF ( gwest ) THEN PUSLOPE(IIB-1,:)=PUSLOPE(IIB,:) PVSLOPE(IIB-1,:)=PVSLOPE(IIB,:) @@ -1859,7 +1869,7 @@ CALL MNH_MEM_GET( zdrvsatdt, size( pexn, 1 ), size( pexn, 2 ), size( pexn, 3 ) ) ! !* 1.1 Lv/Cph at t ! -!$acc kernels ! present(ZRVSAT,ZDRVSATDT) ! present(PLOCPEXN) ! present ZDRVSATDT) +!$acc kernels present_cr(PLOCPEXN) ! present(ZRVSAT,ZDRVSATDT) ! present(PLOCPEXN) ! present ZDRVSATDT) PLOCPEXN(:,:,:) = ( PLTT + (XCPV-PC) * (PT(:,:,:)-XTT) ) / PCP(:,:,:) ! !* 1.2 Saturation vapor pressure at t @@ -1870,9 +1880,9 @@ CALL MNH_MEM_GET( zdrvsatdt, size( pexn, 1 ), size( pexn, 2 ), size( pexn, 3 ) ) #if !defined(MNH_BITREP) && !defined(MNH_BITREP_OMP) ZRVSAT(:,:,:) = EXP( PALP - PBETA/PT(:,:,:) - PGAM*ALOG( PT(:,:,:) ) ) #else - DO CONCURRENT ( JI=1:JIU,JJ=1:JJU,JK=1:JKU) + !$mnh_do_concurrent(JI=1:JIU,JJ=1:JJU,JK=1:JKU) ZRVSAT(JI,JJ,JK) = BR_EXP( PALP - PBETA/PT(JI,JJ,JK) - PGAM*BR_LOG( PT(JI,JJ,JK) ) ) - END DO + !$mnh_end_do() #endif !$acc end kernels !$acc kernels present_cr(ZRVSAT,ZDRVSATDT) @@ -1906,8 +1916,7 @@ CALL MNH_MEM_GET( zdrvsatdt, size( pexn, 1 ), size( pexn, 2 ), size( pexn, 3 ) ) - ZDRVSATDT(:,:,:) & ) #else -!$acc_nv loop independent collapse(3) -DO CONCURRENT(JI=1:JIU,JJ=1:JJU,JK=1:JKU) +!$mnh_do_concurrent(JI=1:JIU,JJ=1:JJU,JK=1:JKU) PATHETA(JI,JJ,JK)= PAMOIST(JI,JJ,JK) * PEXN(JI,JJ,JK) * & ( ( ZRVSAT(JI,JJ,JK) - PRT(JI,JJ,JK,1) ) * PLOCPEXN(JI,JJ,JK) / & ( 1. + ZDRVSATDT(JI,JJ,JK) * PLOCPEXN(JI,JJ,JK) ) * & @@ -1919,16 +1928,15 @@ DO CONCURRENT(JI=1:JIU,JJ=1:JJU,JK=1:JKU) ) & - ZDRVSATDT(JI,JJ,JK) & ) -ENDDO +!$mnh_end_do() #endif !$acc end kernels !* 1.7 Lv/Cph/Exner at t-1 ! -!$acc kernels present(PLOCPEXN) -!$acc_nv loop independent collapse(3) -DO CONCURRENT(JI=1:JIU,JJ=1:JJU,JK=1:JKU) +!$acc kernels present_cr(PLOCPEXN) +!$mnh_do_concurrent(JI=1:JIU,JJ=1:JJU,JK=1:JKU) PLOCPEXN(JI,JJ,JK) = PLOCPEXN(JI,JJ,JK) / PEXN(JI,JJ,JK) -END DO +!$mnh_end_do() !$acc end kernels if ( mppdb_initialized ) then @@ -2054,8 +2062,10 @@ ELSE !* 3.1 BL89 mixing length ! ------------------ CASE ('BL89','RM17') -!$acc kernels present(ZSHEAR) +!$acc kernels present_cr(ZSHEAR) +!$mnh_expand_array(JI=1:JIU,JJ=1:JJU,JK=1:JKU) ZSHEAR(:, :, : ) = 0. +!$mnh_end_expand_array() !$acc end kernels CALL BL89(KKA,KKU,KKL,PZZ,PDZZ,PTHVREF,ZTHLM,KRR,ZRM,PTKET,ZSHEAR,ZLM_CLOUD) ! @@ -2232,7 +2242,7 @@ GOCEAN = LOCEAN !$acc present( ztmp1_device, ztmp2_device ) IF (ODZ) THEN -!$acc kernels +!$acc kernels present_cr(PLM) ! Dz is take into account in the computation DO JK = KKTB,KKTE ! 1D turbulence scheme PLM(:,:,JK) = PZZ(:,:,JK+KKL) - PZZ(:,:,JK) @@ -2246,7 +2256,7 @@ IF (ODZ) THEN PLM(:,:,:) = SQRT( PLM(:,:,:)*MXF(PDXX(:,:,:)) ) #else CALL MXF_DEVICE( PDXX, ZTMP1_DEVICE ) -!$acc kernels +!$acc kernels present_cr(PLM) PLM(:,:,:) = SQRT( PLM(:,:,:) * ZTMP1_DEVICE(:,:,:) ) !$acc end kernels #endif @@ -2264,9 +2274,9 @@ IF (ODZ) THEN #if !defined(MNH_BITREP) && !defined(MNH_BITREP_OMP) PLM(:,:,:) = ( PLM(:,:,:) * ZTMP1_DEVICE(:,:,:) * ZTMP2_DEVICE(:,:,:) ) ** (1./3.) #else -DO CONCURRENT ( JI=1:JIU,JJ=1:JJU,JK=1:JKU) +!$mnh_do_concurrent(JI=1:JIU,JJ=1:JJU,JK=1:JKU) PLM(JI,JJ,JK) = BR_POW( PLM(JI,JJ,JK) * ZTMP1_DEVICE(JI,JJ,JK) * ZTMP2_DEVICE(JI,JJ,JK), 1./3. ) -ENDDO +!$mnh_end_do() #endif !$acc end kernels #endif @@ -2274,7 +2284,7 @@ ENDDO END IF ELSE ! Dz not taken into account in computation to assure invariability with vertical grid mesh -!$acc kernels +!$acc kernels present_cr(PLM) PLM(:,:,:)=1.E10 !$acc end kernels IF ( HTURBDIM /= '1DIM' ) THEN ! 3D turbulence scheme @@ -2298,9 +2308,9 @@ ELSE #if !defined(MNH_BITREP) && !defined(MNH_BITREP_OMP) PLM(:,:,:) = ( ZTMP1_DEVICE * ZTMP2_DEVICE ) ** (1./2.) #else - DO CONCURRENT( JI=1:JIU, JJ=1:JJU, JK=1:JKU ) + !$mnh_do_concurrent(JI=1:JIU,JJ=1:JJU,JK=1:JKU) PLM(JI,JJ,JK) = BR_POW( ZTMP1_DEVICE(JI,JJ,JK) * ZTMP2_DEVICE(JI,JJ,JK), 1. / 2. ) - END DO + !$mnh_end_do() #endif !$acc end kernels #endif @@ -2323,6 +2333,7 @@ IF (.NOT. ORMC01) THEN DO JJ=1,JJU DO JI=1,JIU IF (GOCEAN) THEN + !$acc loop seq DO JK=KKTE,KKTB,-1 ZD=ZALPHA*(PZZ(JI,JJ,KKTE+1)-PZZ(JI,JJ,JK)) IF ( PLM(JI,JJ,JK)>ZD) THEN @@ -2332,6 +2343,7 @@ IF (.NOT. ORMC01) THEN ENDIF END DO ELSE + !$acc loop seq DO JK=KKTB,KKTE ZD = ZALPHA * ( 0.5 * ( PZZ(JI, JJ, JK) + PZZ(JI, JJ, JK+KKL) ) - PZZ(JI, JJ, KKB) ) * PDIRCOSZW(JI, JJ) IF ( PLM(JI,JJ,JK) > ZD ) THEN @@ -2347,12 +2359,12 @@ IF (.NOT. ORMC01) THEN END IF ! !$acc kernels -DO CONCURRENT(JI=1:JIU , JJ=1:JJU ) - PLM(JI,JJ,KKA) = PLM(JI,JJ,KKB ) -END DO -DO CONCURRENT(JI=1:JIU , JJ=1:JJU ) - PLM(JI,JJ,KKU ) = PLM(JI,JJ,KKE) -END DO +!$mnh_do_concurrent(JI=1:JIU,JJ=1:JJU) + PLM(JI,JJ,KKA) = PLM(JI,JJ,KKB) +! mnh_end_do() +! mnh_do_concurrent(JI=1:JIU,JJ=1:JJU) + PLM(JI,JJ,KKU) = PLM(JI,JJ,KKE) +!$mnh_end_do() !$acc end kernels !$acc end data @@ -2513,7 +2525,7 @@ CALL MNH_MEM_GET( ZTMP2_DEVICE, JIU, JJU, JKU ) ! ! initialize the mixing length with the mesh grid -!$acc kernels +!$acc kernels present_cr(PLM) ! 1D turbulence scheme PLM(:,:,KKTB:KKTE) = PZZ(:,:,KKTB+KKL:KKTE+KKL) - PZZ(:,:,KKTB:KKTE) PLM(:,:,KKU) = PLM(:,:,KKE) @@ -2525,7 +2537,7 @@ IF ( HTURBDIM /= '1DIM' ) THEN ! 3D turbulence scheme PLM(:,:,:) = SQRT( PLM(:,:,:)*MXF(PDXX(:,:,:)) ) #else CALL MXF_DEVICE(PDXX,ZTMP1_DEVICE) -!$acc kernels +!$acc kernels present_cr(PLM) PLM(:,:,:) = SQRT( PLM(:,:,:)*ZTMP1_DEVICE ) !$acc end kernels if ( mppdb_initialized ) then @@ -2544,7 +2556,7 @@ IF ( HTURBDIM /= '1DIM' ) THEN ! 3D turbulence scheme #else CALL MXF_DEVICE(PDXX,ZTMP1_DEVICE) CALL MYF_DEVICE(PDYY,ZTMP2_DEVICE) -!$acc kernels +!$acc kernels present_cr(PLM) PLM(:,:,:) = (PLM(:,:,:)*ZTMP1_DEVICE*ZTMP2_DEVICE ) ** (1./3.) !$acc end kernels if ( mppdb_initialized ) then @@ -2567,9 +2579,9 @@ IF ( HTURBDIM /= '1DIM' ) THEN ! 3D turbulence scheme call Mppdb_check( plm, "Dear mid1:plm" ) end if !$acc kernels -DO CONCURRENT ( JI=1:JIU,JJ=1:JJU,JK=1:JKU) +!$mnh_do_concurrent(JI=1:JIU,JJ=1:JJU,JK=1:JKU) PLM(JI,JJ,JK) = BR_POW( PLM(JI,JJ,JK)*ZTMP1_DEVICE(JI,JJ,JK) *ZTMP2_DEVICE(JI,JJ,JK) , 1./3. ) -ENDDO +!$mnh_end_do() !$acc end kernels if ( mppdb_initialized ) then call Mppdb_check( plm, "Dear mid2:plm" ) @@ -2590,8 +2602,8 @@ CALL EMOIST(KRR,KRRI,PTHLT,PRT,PLOCPEXNM,PAMOIST,PSRCT,ZEMOIST) ! !$acc kernels present(ZWORK2D,PLM) IF (KRR>0) THEN - !$acc_nv loop independent collapse(3) private(ZVAR) - DO CONCURRENT( JI=1:JIU, JJ=1:JJU, JK = KKTB+1:KKTE-1) + ! acc loop private(ZVAR) + !$mnh_do_concurrent(JI=1:JIU,JJ=1:JJU,JK=KKTB+1:KKTE-1) ZDTHLDZ(JI,JJ,JK)= 0.5*((PTHLT(JI,JJ,JK+KKL)-PTHLT(JI,JJ,JK ))/PDZZ(JI,JJ,JK+KKL)+ & (PTHLT(JI,JJ,JK )-PTHLT(JI,JJ,JK-KKL))/PDZZ(JI,JJ,JK )) ZDRTDZ(JI,JJ,JK) = 0.5*((PRT(JI,JJ,JK+KKL,1)-PRT(JI,JJ,JK ,1))/PDZZ(JI,JJ,JK+KKL)+ & @@ -2607,10 +2619,10 @@ IF (KRR>0) THEN PLM(JI,JJ,JK)=MAX(XMNH_EPSILON,MIN(PLM(JI,JJ,JK), & 0.76* SQRT(PTKET(JI,JJ,JK)/ZVAR))) END IF - END DO + !$mnh_end_do() ELSE! For dry atmos or unsalted ocean runs - !$acc_nv loop independent collapse(3) private(ZVAR) - DO CONCURRENT( JI=1:JIU, JJ=1:JJU, JK = KKTB+1:KKTE-1) + ! acc loop private(ZVAR) + !$mnh_do_concurrent(JI=1:JIU,JJ=1:JJU,JK=KKTB+1:KKTE-1) ZDTHLDZ(JI,JJ,JK)= 0.5*((PTHLT(JI,JJ,JK+KKL)-PTHLT(JI,JJ,JK ))/PDZZ(JI,JJ,JK+KKL)+ & (PTHLT(JI,JJ,JK )-PTHLT(JI,JJ,JK-KKL))/PDZZ(JI,JJ,JK )) IF (GOCEAN) THEN @@ -2623,7 +2635,7 @@ ELSE! For dry atmos or unsalted ocean runs PLM(JI,JJ,JK)=MAX(XMNH_EPSILON,MIN(PLM(JI,JJ,JK), & 0.76* SQRT(PTKET(JI,JJ,JK)/ZVAR))) END IF - END DO + !$mnh_end_do() END IF ! special case near the surface ZDTHLDZ(:,:,KKB)=(PTHLT(:,:,KKB+KKL)-PTHLT(:,:,KKB))/PDZZ(:,:,KKB+KKL) @@ -2640,12 +2652,9 @@ IF (GOCEAN) THEN ZWORK2D(:,:)=XG*(XALPHAOC*ZDTHLDZ(:,:,KKB)-XBETAOC*ZDRTDZ(:,:,KKB)) #else !PW: bug: nvhpc 21.11 does not parallelize this loop even with loop independent directive! -#ifdef MNH_COMPILER_NVHPC -!$acc loop independent collapse(2) -#endif - DO CONCURRENT( JI = 1 : JIU, JJ = 1 : JJU ) + !$mnh_do_concurrent(JI=1:JIU,JJ=1:JJU) ZWORK2D(JI,JJ)=XG*(XALPHAOC*ZDTHLDZ(JI,JJ,KKB)-XBETAOC*ZDRTDZ(JI,JJ,KKB)) - END DO + !$mnh_end_do() #endif ELSE #if 0 @@ -2654,21 +2663,18 @@ ELSE (ZETHETA(:,:,KKB)*ZDTHLDZ(:,:,KKB)+ZEMOIST(:,:,KKB)*ZDRTDZ(:,:,KKB)) #else !PW: bug: nvhpc 21.11 does not parallelize this loop even with loop independent directive! -#ifdef MNH_COMPILER_NVHPC -!$acc loop independent collapse(2) -#endif - DO CONCURRENT( JI = 1 : JIU, JJ = 1 : JJU ) + !$mnh_do_concurrent(JI=1:JIU,JJ=1:JJU) ZWORK2D(JI,JJ)=XG/PTHVREF(JI,JJ,KKB)* & (ZETHETA(JI,JJ,KKB)*ZDTHLDZ(JI,JJ,KKB)+ZEMOIST(JI,JJ,KKB)*ZDRTDZ(JI,JJ,KKB)) - END DO + !$mnh_end_do() #endif END IF -DO CONCURRENT(JI=1:JIU,JJ=1:JJU) +!$mnh_do_concurrent(JI=1:JIU,JJ=1:JJU) IF (ZWORK2D(JI,JJ)>0.) THEN PLM(JI,JJ,KKB)=MAX(XMNH_EPSILON,MIN( PLM(JI,JJ,KKB), & 0.76* SQRT(PTKET(JI,JJ,KKB)/ZWORK2D(JI,JJ)))) END IF -END DO +!$mnh_end_do() ! ! mixing length limited by the distance normal to the surface (with the same factor as for BL89) ! @@ -2678,6 +2684,7 @@ IF (.NOT. ORMC01) THEN DO JJ=1,SIZE(PLM,2) DO JI=1,SIZE(PLM,1) IF (GOCEAN) THEN + !$acc loop seq DO JK=KKTE,KKTB,-1 ZD=ZALPHA*(PZZ(JI,JJ,KKTE+1)-PZZ(JI,JJ,JK)) IF ( PLM(JI,JJ,JK)>ZD) THEN @@ -2687,6 +2694,7 @@ IF (.NOT. ORMC01) THEN ENDIF END DO ELSE + !$acc loop seq DO JK=KKTB,KKTE ZD=ZALPHA*(0.5*(PZZ(JI,JJ,JK)+PZZ(JI,JJ,JK+KKL))-PZZ(JI,JJ,KKB)) & *PDIRCOSZW(JI,JJ) diff --git a/src/ZSOLVER/turb_hor_dyn_corr.f90 b/src/ZSOLVER/turb_hor_dyn_corr.f90 index 850e010d2323a0b7673c250f90808939bfe408fd..17291f66ca1fa3eb874b5fd563e96b0521b7a80e 100644 --- a/src/ZSOLVER/turb_hor_dyn_corr.f90 +++ b/src/ZSOLVER/turb_hor_dyn_corr.f90 @@ -174,9 +174,13 @@ USE MODI_SHUMAN_DEVICE #endif USE MODI_TRIDIAG_W ! -#ifdef MNH_BITREP +#if defined(MNH_BITREP) || defined(MNH_BITREP_OMP) USE MODI_BITREP #endif +#ifdef MNH_COMPILER_CCE +!$mnh_undef(LOOP) +!$mnh_undef(OPENACC) +#endif ! IMPLICIT NONE ! @@ -395,15 +399,13 @@ IKU = SIZE(PUM,3) ! ! !$acc kernels async(1) -#ifndef MNH_BITREP +!if !defined(MNH_BITREP) && !defined(MNH_BITREP_OMP) +#if !defined(MNH_BITREP) ZDIRSINZW(:,:) = SQRT( 1. - PDIRCOSZW(:,:)**2 ) #else -#ifdef MNH_COMPILER_NVHPC -!$acc loop independent collapse(2) -#endif -DO CONCURRENT ( JI=1:JIU,JJ=1:JJU ) - ZDIRSINZW(JI,JJ) = SQRT( 1. - BR_P2(PDIRCOSZW(JI,JJ)) ) -END DO +!$mnh_expand_array(JI=1:JIU,JJ=1:JJU ) + ZDIRSINZW(:,:) = SQRT( 1. - BR_P2(PDIRCOSZW(:,:)) ) +!$mnh_end_expand_array() #endif !$acc end kernels ! @@ -437,16 +439,13 @@ CALL ADD3DFIELD_ll( TZFIELDS_ll, ZFLX, 'TURB_HOR_DYN_CORR::ZFLX' ) ! ! Computes the U variance IF (.NOT. L2D) THEN - !$acc kernels async(2) -#ifdef MNH_COMPILER_NVHPC - !$acc loop independent collapse(3) -#endif - DO CONCURRENT ( JI=1:JIU,JJ=1:JJU,JK=1:JKU) - ZFLX(JI,JJ,JK)= (2./3.) * PTKEM(JI,JJ,JK) & - - XCMFS * PK(JI,JJ,JK) *( (4./3.) * GX_U_M_PUM(JI,JJ,JK) & - -(2./3.) * ( GY_V_M_PVM(JI,JJ,JK) & - +GZ_W_M_PWM(JI,JJ,JK) ) ) - END DO !CONCURRENT + !$acc kernels async(2) present_cr(zflx,gz_w_m_pwm) + !$mnh_expand_array(JI=1:JIU,JJ=1:JJU,JK=1:JKU) + ZFLX(:,:,:)= (2./3.) * PTKEM(:,:,:) & + - XCMFS * PK(:,:,:) *( (4./3.) * GX_U_M_PUM(:,:,:) & + -(2./3.) * ( GY_V_M_PVM(:,:,:) & + +GZ_W_M_PWM(:,:,:) ) ) + !$mnh_end_expand_array() !$acc end kernels !! & to be tested later !! + XCMFB * PLM / SQRT(PTKEM) * (-2./3.) * PTP @@ -461,10 +460,9 @@ ELSE END IF ! !$acc kernels async(2) -!$acc_nv loop independent collapse(2) -DO CONCURRENT (JI=1:JIU,JJ=1:JJU) - ZFLX(JI,JJ,IKE+1) = ZFLX(JI,JJ,IKE) -ENDDO +!$mnh_expand_array(JI=1:JIU,JJ=1:JJU) + ZFLX(:,:,IKE+1) = ZFLX(:,:,IKE) +!$mnh_end_expand_array() !$acc end kernels ! !* prescription of du/dz and dv/dz with uncentered gradient at the surface @@ -477,16 +475,15 @@ ZDZZ(:,:,:) = MXM(PDZZ(:,:,IKB:IKB+2)) #else CALL MXM_DEVICE(PDZZ(:,:,IKB:IKB+2),ZDZZ(:,:,:)) #endif -!$acc kernels async(3) -!$acc_nv loop independent collapse(2) -DO CONCURRENT (JI=1:JIU,JJ=1:JJU) - ZCOEFF(JI,JJ,IKB+2)= - ZDZZ(JI,JJ,2) / & - ( (ZDZZ(JI,JJ,3)+ZDZZ(JI,JJ,2)) * ZDZZ(JI,JJ,3) ) - ZCOEFF(JI,JJ,IKB+1)= (ZDZZ(JI,JJ,3)+ZDZZ(JI,JJ,2)) / & - ( ZDZZ(JI,JJ,2) * ZDZZ(JI,JJ,3) ) - ZCOEFF(JI,JJ,IKB)= - (ZDZZ(JI,JJ,3)+2.*ZDZZ(JI,JJ,2)) / & - ( (ZDZZ(JI,JJ,3)+ZDZZ(JI,JJ,2)) * ZDZZ(JI,JJ,2) ) -ENDDO +!$acc kernels async(3) present_cr(zdzz,zcoeff) +!$mnh_expand_array(JI=1:JIU,JJ=1:JJU) + ZCOEFF(:,:,IKB+2)= - ZDZZ(:,:,2) / & + ( (ZDZZ(:,:,3)+ZDZZ(:,:,2)) * ZDZZ(:,:,3) ) + ZCOEFF(:,:,IKB+1)= (ZDZZ(:,:,3)+ZDZZ(:,:,2)) / & + ( ZDZZ(:,:,2) * ZDZZ(:,:,3) ) + ZCOEFF(:,:,IKB)= - (ZDZZ(:,:,3)+2.*ZDZZ(:,:,2)) / & + ( (ZDZZ(:,:,3)+ZDZZ(:,:,2)) * ZDZZ(:,:,2) ) +!$mnh_end_expand_array() !$acc end kernels ! #ifndef MNH_OPENACC @@ -498,14 +495,13 @@ ZDU_DZ_DZS_DX(:,:,:)=MXF ((ZCOEFF(:,:,IKB+2:IKB+2)*PUM(:,:,IKB+2:IKB+2) & ! ZDZZ(:,:,:) = MYM(PDZZ(:,:,IKB:IKB+2)) #else -!$acc kernels async(3) -!$acc_nv loop independent collapse(2) -DO CONCURRENT (JI=1:JIU,JJ=1:JJU) - ZTMP1_DEVICE(JI,JJ,1) = (ZCOEFF(JI,JJ,IKB+2)*PUM(JI,JJ,IKB+2) & - +ZCOEFF(JI,JJ,IKB+1)*PUM(JI,JJ,IKB+1) & - +ZCOEFF(JI,JJ,IKB)*PUM(JI,JJ,IKB) & - )* 0.5 * ( PDZX(JI,JJ,IKB+1)+PDZX(JI,JJ,IKB)) -ENDDO +!$acc kernels async(3) present_cr(pum,ztmp1_device) +!$mnh_expand_array(JI=1:JIU,JJ=1:JJU) + ZTMP1_DEVICE(:,:,1) = (ZCOEFF(:,:,IKB+2)*PUM(:,:,IKB+2) & + +ZCOEFF(:,:,IKB+1)*PUM(:,:,IKB+1) & + +ZCOEFF(:,:,IKB)*PUM(:,:,IKB) & + )* 0.5 * ( PDZX(:,:,IKB+1)+PDZX(:,:,IKB)) +!$mnh_end_expand_array() !$acc end kernels ! !!! wait for the computation of ZCOEFF and ZTMP1_DEVICE @@ -513,25 +509,23 @@ ENDDO ! CALL MXF_DEVICE(ZTMP1_DEVICE(:,:,1:1), ZTMP2_DEVICE(:,:,1:1)) CALL MXF_DEVICE(PDXX(:,:,IKB:IKB), ZTMP1_DEVICE(:,:,1:1)) -!$acc kernels async(3) -!$acc_nv loop independent collapse(2) -DO CONCURRENT (JI=1:JIU,JJ=1:JJU) - ZDU_DZ_DZS_DX(JI,JJ,1) = ZTMP2_DEVICE(JI,JJ,1) / ZTMP1_DEVICE(JI,JJ,1) -ENDDO +!$acc kernels async(3) present_cr(ztmp1_device,zdu_dz_dzs_dx) +!$mnh_expand_array(JI=1:JIU,JJ=1:JJU) + ZDU_DZ_DZS_DX(:,:,1) = ZTMP2_DEVICE(:,:,1) / ZTMP1_DEVICE(:,:,1) +!$mnh_end_expand_array() !$acc end kernels ! CALL MYM_DEVICE(PDZZ(:,:,IKB:IKB+2),ZDZZ(:,:,:)) #endif -!$acc kernels async(4) -!$acc_nv loop independent collapse(2) -DO CONCURRENT (JI=1:JIU,JJ=1:JJU) - ZCOEFF(JI,JJ,IKB+2)= - ZDZZ(JI,JJ,2) / & - ( (ZDZZ(JI,JJ,3)+ZDZZ(JI,JJ,2)) * ZDZZ(JI,JJ,3) ) - ZCOEFF(JI,JJ,IKB+1)= (ZDZZ(JI,JJ,3)+ZDZZ(JI,JJ,2)) / & - ( ZDZZ(JI,JJ,2) * ZDZZ(JI,JJ,3) ) - ZCOEFF(JI,JJ,IKB)= - (ZDZZ(JI,JJ,3)+2.*ZDZZ(JI,JJ,2)) / & - ( (ZDZZ(JI,JJ,3)+ZDZZ(JI,JJ,2)) * ZDZZ(JI,JJ,2) ) -ENDDO +!$acc kernels async(4) present_cr(zdzz,zcoeff) +!$mnh_expand_array(JI=1:JIU,JJ=1:JJU) + ZCOEFF(:,:,IKB+2)= - ZDZZ(:,:,2) / & + ( (ZDZZ(:,:,3)+ZDZZ(:,:,2)) * ZDZZ(:,:,3) ) + ZCOEFF(:,:,IKB+1)= (ZDZZ(:,:,3)+ZDZZ(:,:,2)) / & + ( ZDZZ(:,:,2) * ZDZZ(:,:,3) ) + ZCOEFF(:,:,IKB)= - (ZDZZ(:,:,3)+2.*ZDZZ(:,:,2)) / & + ( (ZDZZ(:,:,3)+ZDZZ(:,:,2)) * ZDZZ(:,:,2) ) +!$mnh_end_expand_array() !$acc end kernels ! #ifndef MNH_OPENACC @@ -541,14 +535,13 @@ ZDV_DZ_DZS_DY(:,:,:)=MYF ((ZCOEFF(:,:,IKB+2:IKB+2)*PVM(:,:,IKB+2:IKB+2) & )* 0.5 * ( PDZY(:,:,IKB+1:IKB+1)+PDZY(:,:,IKB:IKB)) & )/ MYF(PDYY(:,:,IKB:IKB)) #else -!$acc kernels async(4) -!$acc_nv loop independent collapse(2) -DO CONCURRENT (JI=1:JIU,JJ=1:JJU) - ZTMP3_DEVICE(JI,JJ,1) = (ZCOEFF(JI,JJ,IKB+2)*PVM(JI,JJ,IKB+2) & - +ZCOEFF(JI,JJ,IKB+1)*PVM(JI,JJ,IKB+1) & - +ZCOEFF(JI,JJ,IKB)*PVM(JI,JJ,IKB) & - )* 0.5 * ( PDZY(JI,JJ,IKB+1)+PDZY(JI,JJ,IKB)) -ENDDO +!$acc kernels async(4) present_cr(pvm,ztmp3_device) +!$mnh_expand_array(JI=1:JIU,JJ=1:JJU) + ZTMP3_DEVICE(:,:,1) = (ZCOEFF(:,:,IKB+2)*PVM(:,:,IKB+2) & + +ZCOEFF(:,:,IKB+1)*PVM(:,:,IKB+1) & + +ZCOEFF(:,:,IKB)*PVM(:,:,IKB) & + )* 0.5 * ( PDZY(:,:,IKB+1)+PDZY(:,:,IKB)) +!$mnh_end_expand_array() !$acc end kernels ! !!! wait for the computation of ZCOEFF and ZTMP3_DEVICE @@ -574,11 +567,10 @@ ZDV_DZ_DZS_DY(:,:,1)= ZTMP4_DEVICE(:,:,1) / ZTMP3_DEVICE(:,:,1) ! CALL DXF_DEVICE(PUM(:,:,IKB:IKB),ZTMP1_DEVICE(:,:,1:1)) CALL MXF_DEVICE(PDXX(:,:,IKB:IKB),ZTMP2_DEVICE(:,:,1:1)) -!$acc kernels async(3) -!$acc_nv loop independent collapse(2) -DO CONCURRENT (JI=1:JIU,JJ=1:JJU) - ZDU_DX(JI,JJ,1)= ZTMP1_DEVICE(JI,JJ,1) / ZTMP2_DEVICE(JI,JJ,1) - ZDU_DZ_DZS_DX(JI,JJ,1) -ENDDO +!$acc kernels async(3) present_cr(zdu_dz_dzs_dx,zdu_dx) +!$mnh_expand_array(JI=1:JIU,JJ=1:JJU) + ZDU_DX(:,:,1)= ZTMP1_DEVICE(:,:,1) / ZTMP2_DEVICE(:,:,1) - ZDU_DZ_DZS_DX(:,:,1) +!$mnh_end_expand_array() !$acc end kernels !!! wait for the computation of ZDV_DZ_DZS_DY @@ -586,11 +578,10 @@ ENDDO ! CALL DYF_DEVICE(PVM(:,:,IKB:IKB),ZTMP3_DEVICE(:,:,1:1)) CALL MYF_DEVICE(PDYY(:,:,IKB:IKB),ZTMP4_DEVICE(:,:,1:1)) -!$acc kernels async(4) -!$acc_nv loop independent collapse(2) -DO CONCURRENT (JI=1:JIU,JJ=1:JJU) - ZDV_DY(JI,JJ,1)= ZTMP3_DEVICE(JI,JJ,1) / ZTMP4_DEVICE(JI,JJ,1) - ZDV_DZ_DZS_DY(JI,JJ,1) -ENDDO +!$acc kernels async(4) present_cr(zdv_dz_dzs_dy,zdv_dy) +!$mnh_expand_array(JI=1:JIU,JJ=1:JJU) + ZDV_DY(:,:,1)= ZTMP3_DEVICE(:,:,1) / ZTMP4_DEVICE(:,:,1) - ZDV_DZ_DZS_DY(:,:,1) +!$mnh_end_expand_array() !$acc end kernels ! ! @@ -598,11 +589,10 @@ ENDDO !$acc wait(3) async(4) #endif ! -!$acc kernels async(4) -!$acc_nv loop independent collapse(2) -DO CONCURRENT (JI=1:JIU,JJ=1:JJU) - ZDW_DZ(JI,JJ,1)=-ZDU_DX(JI,JJ,1)-ZDV_DY(JI,JJ,1) -ENDDO +!$acc kernels async(4) present_cr(zdv_dy,zdw_dz) +!$mnh_expand_array(JI=1:JIU,JJ=1:JJU) + ZDW_DZ(:,:,1)=-ZDU_DX(:,:,1)-ZDV_DY(:,:,1) +!$mnh_end_expand_array() !$acc end kernels ! !* computation @@ -617,12 +607,11 @@ ENDDO !attention !!!!! je ne comprends pas pourquoi mais ce update plante à l'execution... ! du coup je ne peux pas faire de update self asynchrone... ! -!$acc kernels async(3) -!$acc_nv loop independent collapse(2) -DO CONCURRENT (JI=1:JIU,JJ=1:JJU) - ZFLX(JI,JJ,IKB) = (2./3.) * PTKEM(JI,JJ,IKB) & - - XCMFS * PK(JI,JJ,IKB) * 2. * ZDU_DX(JI,JJ,1) -ENDDO +!$acc kernels async(3) present_cr(zdu_dx,zflx) +!$mnh_expand_array(JI=1:JIU,JJ=1:JJU) + ZFLX(:,:,IKB) = (2./3.) * PTKEM(:,:,IKB) & + - XCMFS * PK(:,:,IKB) * 2. * ZDU_DX(:,:,1) +!$mnh_end_expand_array() !$acc end kernels !! & to be tested later @@ -635,8 +624,10 @@ ENDDO !!! wait for the computation of ZDIRSINZW !$acc wait(1) ! -!$acc kernels async(4) present_cr(ZFLX) -#ifndef MNH_BITREP +!$acc kernels async(4) present_cr(ZFLX,ZDIRSINZW) +!$mnh_expand_array(JI=1:JIU,JJ=1:JJU) +!if !defined(MNH_BITREP) && !defined(MNH_BITREP_OMP) +#if !defined(MNH_BITREP) ZFLX(:,:,IKB-1) = & PTAU11M(:,:) * PCOSSLOPE(:,:)**2 * PDIRCOSZW(:,:)**2 & -2. * PTAU12M(:,:) * PCOSSLOPE(:,:)* PSINSLOPE(:,:) * PDIRCOSZW(:,:) & @@ -647,28 +638,25 @@ ZFLX(:,:,IKB-1) = & - PUSLOPEM(:,:) * PCOSSLOPE(:,:)**2 * ZDIRSINZW(:,:) * PDIRCOSZW(:,:) ) #else !PW: BUG: commented 'acc loop independent collapse(2)' to workaround compiler bug (NVHPC 21.1) -!$acc_nv loop independent collapse(2) -DO CONCURRENT ( JI=1:JIU,JJ=1:JJU ) -ZFLX(JI,JJ,IKB-1) = & - PTAU11M(JI,JJ) * BR_P2(PCOSSLOPE(JI,JJ)) * BR_P2(PDIRCOSZW(JI,JJ)) & - -2. * PTAU12M(JI,JJ) * PCOSSLOPE(JI,JJ)* PSINSLOPE(JI,JJ) * PDIRCOSZW(JI,JJ) & - + PTAU22M(JI,JJ) * BR_P2(PSINSLOPE(JI,JJ)) & - + PTAU33M(JI,JJ) * BR_P2(PCOSSLOPE(JI,JJ)) * BR_P2(ZDIRSINZW(JI,JJ)) & - +2. * PCDUEFF(JI,JJ) * ( & - PVSLOPEM(JI,JJ) * PCOSSLOPE(JI,JJ) * PSINSLOPE(JI,JJ) * ZDIRSINZW(JI,JJ) & - - PUSLOPEM(JI,JJ) * BR_P2(PCOSSLOPE(JI,JJ)) * ZDIRSINZW(JI,JJ) * PDIRCOSZW(JI,JJ) ) -END DO ! CONCURRENT +ZFLX(:,:,IKB-1) = & + PTAU11M(:,:) * BR_P2(PCOSSLOPE(:,:)) * BR_P2(PDIRCOSZW(:,:)) & + -2. * PTAU12M(:,:) * PCOSSLOPE(:,:)* PSINSLOPE(:,:) * PDIRCOSZW(:,:) & + + PTAU22M(:,:) * BR_P2(PSINSLOPE(:,:)) & + + PTAU33M(:,:) * BR_P2(PCOSSLOPE(:,:)) * BR_P2(ZDIRSINZW(:,:)) & + +2. * PCDUEFF(:,:) * ( & + PVSLOPEM(:,:) * PCOSSLOPE(:,:) * PSINSLOPE(:,:) * ZDIRSINZW(:,:) & + - PUSLOPEM(:,:) * BR_P2(PCOSSLOPE(:,:)) * ZDIRSINZW(:,:) * PDIRCOSZW(:,:) ) #endif +!$mnh_end_expand_array() !$acc end kernels ! !!! wait for the computation of ZFLX(:,:,IKB) and ZFLX(:,:,IKB-1) !$acc wait(3) async(4) ! !$acc kernels async(4) -!$acc_nv loop independent collapse(2) -DO CONCURRENT (JI=1:JIU,JJ=1:JJU) - ZFLX(JI,JJ,IKB-1) = 2. * ZFLX(JI,JJ,IKB-1) - ZFLX(JI,JJ,IKB) -ENDDO +!$mnh_expand_array(JI=1:JIU,JJ=1:JJU) + ZFLX(:,:,IKB-1) = 2. * ZFLX(:,:,IKB-1) - ZFLX(:,:,IKB) +!$mnh_end_expand_array() !$acc end kernels ! ! @@ -724,13 +712,10 @@ ELSE END IF #else CALL MXF_DEVICE(PDXX, ZTMP1_DEVICE) -!$acc kernels async(10) -#ifdef MNH_COMPILER_NVHPC -!$acc loop independent collapse(3) -#endif -DO CONCURRENT ( JI=1:JIU,JJ=1:JJU,JK=1:JKU) - ZTMP2_DEVICE(JI,JJ,JK) = PRHODJ(JI,JJ,JK) * ZFLX(JI,JJ,JK) / ZTMP1_DEVICE(JI,JJ,JK) -END DO !CONCURRENT +!$acc kernels async(10) present_cr(ztmp1_device,ztmp2_device) +!$mnh_expand_array(JI=1:JIU,JJ=1:JJU,JK=1:JKU) + ZTMP2_DEVICE(:,:,:) = PRHODJ(:,:,:) * ZFLX(:,:,:) / ZTMP1_DEVICE(:,:,:) +!$mnh_end_expand_array() !$acc end kernels ! !!! wait for the computation of ZTMP2_DEVICE and the update of ZFLX @@ -739,31 +724,22 @@ END DO !CONCURRENT CALL DXM_DEVICE(ZTMP2_DEVICE, ZTMP3_DEVICE) IF (.NOT. LFLAT) THEN CALL MZM_DEVICE(PDXX,ZTMP1_DEVICE) - !$acc kernels -#ifdef MNH_COMPILER_NVHPC - !$acc loop independent collapse(3) -#endif - DO CONCURRENT ( JI=1:JIU,JJ=1:JJU,JK=1:JKU) - ZTMP2_DEVICE(JI,JJ,JK) = PRHODJ(JI,JJ,JK) * ZFLX(JI,JJ,JK) - END DO !CONCURRENT + !$acc kernels present_cr(zflx,ztmp2_device) + !$mnh_expand_array(JI=1:JIU,JJ=1:JJU,JK=1:JKU) + ZTMP2_DEVICE(:,:,:) = PRHODJ(:,:,:) * ZFLX(:,:,:) + !$mnh_end_expand_array() !$acc end kernels CALL MZM_DEVICE(ZTMP2_DEVICE,ZTMP4_DEVICE) - !$acc kernels -#ifdef MNH_COMPILER_NVHPC - !$acc loop independent collapse(3) -#endif - DO CONCURRENT ( JI=1:JIU,JJ=1:JJU,JK=1:JKU) - ZTMP2_DEVICE(JI,JJ,JK) = ZTMP4_DEVICE(JI,JJ,JK) * PINV_PDZZ(JI,JJ,JK) - END DO !CONCURRENT + !$acc kernels present_cr(ztmp4_device,ztmp2_device) + !$mnh_expand_array(JI=1:JIU,JJ=1:JJU,JK=1:JKU) + ZTMP2_DEVICE(:,:,:) = ZTMP4_DEVICE(:,:,:) * PINV_PDZZ(:,:,:) + !$mnh_end_expand_array() !$acc end kernels CALL MXM_DEVICE( ZTMP2_DEVICE, ZTMP4_DEVICE ) - !$acc kernels -#ifdef MNH_COMPILER_NVHPC - !$acc loop independent collapse(3) -#endif - DO CONCURRENT ( JI=1:JIU,JJ=1:JJU,JK=1:JKU) - ZTMP2_DEVICE(JI,JJ,JK) = PDZX(JI,JJ,JK) / ZTMP1_DEVICE(JI,JJ,JK) * ZTMP4_DEVICE(JI,JJ,JK) - END DO !CONCURRENT + !$acc kernels present_cr(ztmp4_device,ztmp2_device) + !$mnh_expand_array(JI=1:JIU,JJ=1:JJU,JK=1:JKU) + ZTMP2_DEVICE(:,:,:) = PDZX(:,:,:) / ZTMP1_DEVICE(:,:,:) * ZTMP4_DEVICE(:,:,:) + !$mnh_end_expand_array() !$acc end kernels CALL DZF_DEVICE( ZTMP2_DEVICE, ZTMP1_DEVICE ) !$acc kernels async(1) @@ -780,22 +756,18 @@ END IF ! IF (KSPLT==1) THEN ! Contribution to the dynamic production of TKE: - !$acc kernels async(2) -#ifdef MNH_COMPILER_NVHPC - !$acc loop independent collapse(3) -#endif - DO CONCURRENT ( JI=1:JIU,JJ=1:JJU,JK=1:JKU) - ZWORK(JI,JJ,JK) = - ZFLX(JI,JJ,JK) * GX_U_M_PUM(JI,JJ,JK) - END DO !CONCURRENT + !$acc kernels async(2) present_cr(gx_u_m_pum,zwork) + !$mnh_expand_array(JI=1:JIU,JJ=1:JJU,JK=1:JKU) + ZWORK(:,:,:) = - ZFLX(:,:,:) * GX_U_M_PUM(:,:,:) + !$mnh_end_expand_array() !$acc end kernels ! ! evaluate the dynamic production at w(IKB+1) in PDP(IKB) ! - !$acc kernels async(2) - !$acc_nv loop independent collapse(2) - DO CONCURRENT (JI=1:JIU,JJ=1:JJU) - ZWORK(JI,JJ,IKB) = 0.5* ( -ZFLX(JI,JJ,IKB)*ZDU_DX(JI,JJ,1) + ZWORK(JI,JJ,IKB+1) ) - ENDDO + !$acc kernels async(2) present_cr(zdu_dx,zwork) + !$mnh_expand_array(JI=1:JIU,JJ=1:JJU) + ZWORK(:,:,IKB) = 0.5* ( -ZFLX(:,:,IKB)*ZDU_DX(:,:,1) + ZWORK(:,:,IKB+1) ) + !$mnh_end_expand_array() !$acc end kernels ! !$acc kernels async(2) @@ -816,7 +788,7 @@ IF (LLES_CALL .AND. KSPLT==1) THEN !!! wait for the computation of ZWORK and PDP !$acc wait(2) ! - !$acc kernels + !$acc kernels present_cr(ZTMP1_DEVICE) ZTMP1_DEVICE(:,:,:) = -ZWORK(:,:,:) !$acc end kernels CALL LES_MEAN_SUBGRID( ZTMP1_DEVICE, X_LES_RES_ddxa_U_SBG_UaU , .TRUE.) @@ -836,28 +808,24 @@ END IF ! ! Computes the V variance IF (.NOT. L2D) THEN - !$acc kernels async(3) -#ifdef MNH_COMPILER_NVHPC - !$acc loop independent collapse(3) -#endif - DO CONCURRENT ( JI=1:JIU,JJ=1:JJU,JK=1:JKU) - ZFLX(JI,JJ,JK)= (2./3.) * PTKEM(JI,JJ,JK) & - - XCMFS * PK(JI,JJ,JK) *( (4./3.) * GY_V_M_PVM(JI,JJ,JK) & - -(2./3.) * ( GX_U_M_PUM(JI,JJ,JK) & - +GZ_W_M_PWM(JI,JJ,JK) ) ) - END DO !CONCURRENT + !$acc kernels async(3) present_cr(gz_w_m_pwm,zflx) + !$mnh_expand_array(JI=1:JIU,JJ=1:JJU,JK=1:JKU) + ZFLX(:,:,:)= (2./3.) * PTKEM(:,:,:) & + - XCMFS * PK(:,:,:) *( (4./3.) * GY_V_M_PVM(:,:,:) & + -(2./3.) * ( GX_U_M_PUM(:,:,:) & + +GZ_W_M_PWM(:,:,:) ) ) + !$mnh_end_expand_array() !$acc end kernels !! & to be tested !! + XCMFB * PLM / SQRT(PTKEM) * (-2./3.) * PTP ! ELSE - !$acc kernels async(3) - !$acc_nv loop independent collapse(3) - DO CONCURRENT (JI=1:JIU,JJ=1:JJU,JK=1:JKU) - ZFLX(JI,JJ,JK)= (2./3.) * PTKEM(JI,JJ,JK) & - - XCMFS * PK(JI,JJ,JK) *(-(2./3.) * ( GX_U_M_PUM(JI,JJ,JK) & - +GZ_W_M_PWM(JI,JJ,JK) ) ) - ENDDO + !$acc kernels async(3) present_cr(gz_w_m_pwm,zflx) + !$mnh_expand_array(JI=1:JIU,JJ=1:JJU,JK=1:JKU) + ZFLX(:,:,:)= (2./3.) * PTKEM(:,:,:) & + - XCMFS * PK(:,:,:) *(-(2./3.) * ( GX_U_M_PUM(:,:,:) & + +GZ_W_M_PWM(:,:,:) ) ) + !$mnh_end_expand_array() !$acc end kernels !! & to be tested !! + XCMFB * PLM / SQRT(PTKEM) * (-2./3.) * PTP @@ -872,12 +840,11 @@ ZFLX(:,:,IKE+1) = ZFLX(:,:,IKE) ! ! !$acc wait(3) ! ! !$acc update self(ZFLX(:,:,IKB+1:)) async(10) ! -!$acc kernels async(3) -!$acc_nv loop independent collapse(2) -DO CONCURRENT (JI=1:JIU,JJ=1:JJU) - ZFLX(JI,JJ,IKB) = (2./3.) * PTKEM(JI,JJ,IKB) & - - XCMFS * PK(JI,JJ,IKB) * 2. * ZDV_DY(JI,JJ,1) -ENDDO +!$acc kernels async(3) present_cr(zdv_dy,zflx) +!$mnh_expand_array(JI=1:JIU,JJ=1:JJU) + ZFLX(:,:,IKB) = (2./3.) * PTKEM(:,:,IKB) & + - XCMFS * PK(:,:,IKB) * 2. * ZDV_DY(:,:,1) +!$mnh_end_expand_array() !$acc end kernels !! & to be tested @@ -886,7 +853,8 @@ ENDDO ! ! extrapolates this flux under the ground with the surface flux !$acc kernels async(3) present_cr(ZFLX) -#ifndef MNH_BITREP +!if !defined(MNH_BITREP) && !defined(MNH_BITREP_OMP) +#if !defined(MNH_BITREP) ZFLX(:,:,IKB-1) = & PTAU11M(:,:) * PSINSLOPE(:,:)**2 * PDIRCOSZW(:,:)**2 & +2. * PTAU12M(:,:) * PCOSSLOPE(:,:)* PSINSLOPE(:,:) * PDIRCOSZW(:,:) & @@ -897,17 +865,16 @@ ZFLX(:,:,IKB-1) = & + PVSLOPEM(:,:) * PCOSSLOPE(:,:) * PSINSLOPE(:,:) * ZDIRSINZW(:,:) ) #else !PW: BUG: commented 'acc loop independent collapse(2)' to workaround compiler bug (NVHPC 21.1) -!$acc_nv loop independent collapse(2) -DO CONCURRENT ( JI=1:JIU,JJ=1:JJU ) -ZFLX(JI,JJ,IKB-1) = & - PTAU11M(JI,JJ) * BR_P2(PSINSLOPE(JI,JJ)) * BR_P2(PDIRCOSZW(JI,JJ)) & - +2. * PTAU12M(JI,JJ) * PCOSSLOPE(JI,JJ)* PSINSLOPE(JI,JJ) * PDIRCOSZW(JI,JJ) & - + PTAU22M(JI,JJ) * BR_P2(PCOSSLOPE(JI,JJ)) & - + PTAU33M(JI,JJ) * BR_P2(PSINSLOPE(JI,JJ)) * BR_P2(ZDIRSINZW(JI,JJ)) & - -2. * PCDUEFF(JI,JJ)* ( & - PUSLOPEM(JI,JJ) * BR_P2(PSINSLOPE(JI,JJ)) * ZDIRSINZW(JI,JJ) * PDIRCOSZW(JI,JJ) & - + PVSLOPEM(JI,JJ) * PCOSSLOPE(JI,JJ) * PSINSLOPE(JI,JJ) * ZDIRSINZW(JI,JJ) ) -END DO ! CONCURRENT +!$mnh_expand_array(JI=1:JIU,JJ=1:JJU ) +ZFLX(:,:,IKB-1) = & + PTAU11M(:,:) * BR_P2(PSINSLOPE(:,:)) * BR_P2(PDIRCOSZW(:,:)) & + +2. * PTAU12M(:,:) * PCOSSLOPE(:,:)* PSINSLOPE(:,:) * PDIRCOSZW(:,:) & + + PTAU22M(:,:) * BR_P2(PCOSSLOPE(:,:)) & + + PTAU33M(:,:) * BR_P2(PSINSLOPE(:,:)) * BR_P2(ZDIRSINZW(:,:)) & + -2. * PCDUEFF(:,:)* ( & + PUSLOPEM(:,:) * BR_P2(PSINSLOPE(:,:)) * ZDIRSINZW(:,:) * PDIRCOSZW(:,:) & + + PVSLOPEM(:,:) * PCOSSLOPE(:,:) * PSINSLOPE(:,:) * ZDIRSINZW(:,:) ) +!$mnh_end_expand_array() #endif !$acc end kernels ! @@ -966,13 +933,10 @@ IF (.NOT. L2D) THEN IF (KSPLT==1) ZWORK(:,:,:) = - ZFLX(:,:,:) * GY_V_M_PVM #else CALL MYF_DEVICE(PDYY, ZTMP1_DEVICE) - !$acc kernels async(10) -#ifdef MNH_COMPILER_NVHPC - !$acc loop independent collapse(3) -#endif - DO CONCURRENT ( JI=1:JIU,JJ=1:JJU,JK=1:JKU) - ZTMP2_DEVICE(JI,JJ,JK) = PRHODJ(JI,JJ,JK) * ZFLX(JI,JJ,JK) / ZTMP1_DEVICE(JI,JJ,JK) - END DO !CONCURRENT + !$acc kernels async(10) present_cr(ztmp1_device,ztmp2_device) + !$mnh_expand_array(JI=1:JIU,JJ=1:JJU,JK=1:JKU) + ZTMP2_DEVICE(:,:,:) = PRHODJ(:,:,:) * ZFLX(:,:,:) / ZTMP1_DEVICE(:,:,:) + !$mnh_end_expand_array() !$acc end kernels ! !!! wait for the computation of ZTMP2_DEVICE and the update of ZFLX @@ -981,31 +945,22 @@ IF (.NOT. L2D) THEN CALL DYM_DEVICE( ZTMP2_DEVICE,ZTMP3_DEVICE ) IF (.NOT. LFLAT) THEN CALL MZM_DEVICE(PDYY,ZTMP1_DEVICE) - !$acc kernels -#ifdef MNH_COMPILER_NVHPC - !$acc loop independent collapse(3) -#endif - DO CONCURRENT ( JI=1:JIU,JJ=1:JJU,JK=1:JKU) - ZTMP2_DEVICE(JI,JJ,JK) = PRHODJ(JI,JJ,JK) * ZFLX(JI,JJ,JK) - END DO !CONCURRENT + !$acc kernels present_cr(zflx,ztmp2_device) + !$mnh_expand_array(JI=1:JIU,JJ=1:JJU,JK=1:JKU) + ZTMP2_DEVICE(:,:,:) = PRHODJ(:,:,:) * ZFLX(:,:,:) + !$mnh_end_expand_array() !$acc end kernels CALL MZM_DEVICE(ZTMP2_DEVICE,ZTMP4_DEVICE) - !$acc kernels -#ifdef MNH_COMPILER_NVHPC - !$acc loop independent collapse(3) -#endif - DO CONCURRENT ( JI=1:JIU,JJ=1:JJU,JK=1:JKU) - ZTMP2_DEVICE(JI,JJ,JK) = ZTMP4_DEVICE(JI,JJ,JK) * PINV_PDZZ(JI,JJ,JK) - END DO !CONCURRENT + !$acc kernels present_cr(ztmp4_device,ztmp2_device) + !$mnh_expand_array(JI=1:JIU,JJ=1:JJU,JK=1:JKU) + ZTMP2_DEVICE(:,:,:) = ZTMP4_DEVICE(:,:,:) * PINV_PDZZ(:,:,:) + !$mnh_end_expand_array() !$acc end kernels CALL MYM_DEVICE( ZTMP2_DEVICE,ZTMP4_DEVICE ) - !$acc kernels -#ifdef MNH_COMPILER_NVHPC - !$acc loop independent collapse(3) -#endif - DO CONCURRENT ( JI=1:JIU,JJ=1:JJU,JK=1:JKU) - ZTMP2_DEVICE(JI,JJ,JK) = PDZY(JI,JJ,JK) / ZTMP1_DEVICE(JI,JJ,JK) * ZTMP4_DEVICE(JI,JJ,JK) - END DO !CONCURRENT + !$acc kernels present_cr(ztmp4_device,ztmp2_device) + !$mnh_expand_array(JI=1:JIU,JJ=1:JJU,JK=1:JKU) + ZTMP2_DEVICE(:,:,:) = PDZY(:,:,:) / ZTMP1_DEVICE(:,:,:) * ZTMP4_DEVICE(:,:,:) + !$mnh_end_expand_array() !$acc end kernels CALL DZF_DEVICE( ZTMP2_DEVICE, ZTMP4_DEVICE ) !$acc kernels async(1) @@ -1015,23 +970,17 @@ IF (.NOT. L2D) THEN !$acc end kernels ELSE !$acc kernels async(1) -#ifdef MNH_COMPILER_NVHPC - !$acc loop independent collapse(3) -#endif - DO CONCURRENT ( JI=1:JIU,JJ=1:JJU,JK=1:JKU) - PRVS(JI,JJ,JK)=PRVS(JI,JJ,JK) - ZTMP3_DEVICE(JI,JJ,JK) - END DO !CONCURRENT + !$mnh_expand_array(JI=1:JIU,JJ=1:JJU,JK=1:JKU) + PRVS(:,:,:)=PRVS(:,:,:) - ZTMP3_DEVICE(:,:,:) + !$mnh_end_expand_array() !$acc end kernels END IF ! Contribution to the dynamic production of TKE: IF (KSPLT==1) THEN - !$acc kernels async(2) -#ifdef MNH_COMPILER_NVHPC - !$acc loop independent collapse(3) -#endif - DO CONCURRENT ( JI=1:JIU,JJ=1:JJU,JK=1:JKU) - ZWORK(JI,JJ,JK) = - ZFLX(JI,JJ,JK) * GY_V_M_PVM(JI,JJ,JK) - END DO !CONCURRENT + !$acc kernels async(2) present_cr(gy_v_m_pvm,zwork) + !$mnh_expand_array(JI=1:JIU,JJ=1:JJU,JK=1:JKU) + ZWORK(:,:,:) = - ZFLX(:,:,:) * GY_V_M_PVM(:,:,:) + !$mnh_end_expand_array() !$acc end kernels ENDIF #endif @@ -1045,11 +994,10 @@ IF (KSPLT==1) THEN ! ! evaluate the dynamic production at w(IKB+1) in PDP(IKB) ! - !$acc kernels async(2) - !$acc_nv loop independent collapse(2) - DO CONCURRENT (JI=1:JIU,JJ=1:JJU) - ZWORK(JI,JJ,IKB) = 0.5* ( -ZFLX(JI,JJ,IKB)*ZDV_DY(JI,JJ,1) + ZWORK(JI,JJ,IKB+1) ) - ENDDO + !$acc kernels async(2) present_cr(zdv_dy,zwork) + !$mnh_expand_array(JI=1:JIU,JJ=1:JJU) + ZWORK(:,:,IKB) = 0.5* ( -ZFLX(:,:,IKB)*ZDV_DY(:,:,1) + ZWORK(:,:,IKB+1) ) + !$mnh_end_expand_array() !$acc end kernels ! !$acc kernels async(2) @@ -1070,11 +1018,10 @@ IF (LLES_CALL .AND. KSPLT==1) THEN !!! wait for the computation of ZWORK and PDP !$acc wait(2) ! - !$acc kernels - !$acc_nv loop independent collapse(3) - DO CONCURRENT (JI=1:JIU,JJ=1:JJU,JK=1:JKU) - ZTMP1_DEVICE(JI,JJ,JK) = -ZWORK(JI,JJ,JK) - ENDDO + !$acc kernels present_cr(zwork,ztmp1_device) + !$mnh_expand_array(JI=1:JIU,JJ=1:JJU,JK=1:JKU) + ZTMP1_DEVICE(:,:,:) = -ZWORK(:,:,:) + !$mnh_end_expand_array() !$acc end kernels CALL LES_MEAN_SUBGRID( ZTMP1_DEVICE, X_LES_RES_ddxa_V_SBG_UaV , .TRUE.) ! @@ -1089,27 +1036,23 @@ END IF ! ! Computes the W variance IF (.NOT. L2D) THEN - !$acc kernels async(2) -#ifdef MNH_COMPILER_NVHPC - !$acc loop independent collapse(3) -#endif - DO CONCURRENT ( JI=1:JIU,JJ=1:JJU,JK=1:JKU) - ZFLX(JI,JJ,JK) = (2./3.) * PTKEM(JI,JJ,JK) & - - XCMFS * PK(JI,JJ,JK) *( (4./3.) * GZ_W_M_PWM(JI,JJ,JK) & - -(2./3.) * ( GX_U_M_PUM(JI,JJ,JK) & - +GY_V_M_PVM(JI,JJ,JK) ) ) - END DO !CONCURRENT + !$acc kernels async(2) present_cr(gy_v_m_pvm,zflx) + !$mnh_expand_array(JI=1:JIU,JJ=1:JJU,JK=1:JKU) + ZFLX(:,:,:) = (2./3.) * PTKEM(:,:,:) & + - XCMFS * PK(:,:,:) *( (4./3.) * GZ_W_M_PWM(:,:,:) & + -(2./3.) * ( GX_U_M_PUM(:,:,:) & + +GY_V_M_PVM(:,:,:) ) ) + !$mnh_end_expand_array() !$acc end kernels !! & to be tested !! -2.* XCMFB * PLM / SQRT(PTKEM) * (-2./3.) * PTP ELSE - !$acc kernels async(2) - !$acc_nv loop independent collapse(3) - DO CONCURRENT (JI=1:JIU,JJ=1:JJU,JK=1:JKU) - ZFLX(JI,JJ,JK)= (2./3.) * PTKEM(JI,JJ,JK) & - - XCMFS * PK(JI,JJ,JK) *( (4./3.) * GZ_W_M_PWM(JI,JJ,JK) & - -(2./3.) * ( GX_U_M_PUM(JI,JJ,JK) ) ) - ENDDO + !$acc kernels async(2) present_cr(gx_u_m_pum,zflx) + !$mnh_expand_array(JI=1:JIU,JJ=1:JJU,JK=1:JKU) + ZFLX(:,:,:)= (2./3.) * PTKEM(:,:,:) & + - XCMFS * PK(:,:,:) *( (4./3.) * GZ_W_M_PWM(:,:,:) & + -(2./3.) * ( GX_U_M_PUM(:,:,:) ) ) + !$mnh_end_expand_array() !$acc end kernels !! & to be tested !! -2.* XCMFB * PLM / SQRT(PTKEM) * (-2./3.) * PTP @@ -1123,12 +1066,11 @@ ZFLX(:,:,IKE+1)= ZFLX(:,:,IKE) !$acc wait(2) ! ! -!$acc kernels async(2) -!$acc_nv loop independent collapse(2) -DO CONCURRENT (JI=1:JIU,JJ=1:JJU) - ZFLX(JI,JJ,IKB) = (2./3.) * PTKEM(JI,JJ,IKB) & - - XCMFS * PK(JI,JJ,IKB) * 2. * ZDW_DZ(JI,JJ,1) -ENDDO +!$acc kernels async(2) present_cr(zdw_dz,zflx) +!$mnh_expand_array(JI=1:JIU,JJ=1:JJU) + ZFLX(:,:,IKB) = (2./3.) * PTKEM(:,:,IKB) & + - XCMFS * PK(:,:,IKB) * 2. * ZDW_DZ(:,:,1) +!$mnh_end_expand_array() !$acc end kernels ! @@ -1137,24 +1079,24 @@ ENDDO ! - 2.* XCMFB * PLM(:,:,IKB:IKB) /SQRT(PTKEM(:,:,IKB:IKB)) * & ! (-2./3.) * PTP(:,:,IKB:IKB) ! extrapolates this flux under the ground with the surface flux -!$acc kernels async(3) present_cr(ZFLX) -#ifndef MNH_BITREP +!$acc kernels async(3) present_cr(ZFLX) +!if !defined(MNH_BITREP) && !defined(MNH_BITREP_OMP) +#if !defined(MNH_BITREP) ZFLX(:,:,IKB-1) = & PTAU11M(:,:) * ZDIRSINZW(:,:)**2 & + PTAU33M(:,:) * PDIRCOSZW(:,:)**2 & +2. * PCDUEFF(:,:)* PUSLOPEM(:,:) * ZDIRSINZW(:,:) * PDIRCOSZW(:,:) #else !PW: BUG: commented 'acc loop independent collapse(2)' to workaround compiler bug (NVHPC 21.1) -!$acc_nv loop independent collapse(2) -DO CONCURRENT ( JI=1:JIU,JJ=1:JJU ) -ZFLX(JI,JJ,IKB-1) = & - PTAU11M(JI,JJ) * BR_P2(ZDIRSINZW(JI,JJ)) & - + PTAU33M(JI,JJ) * BR_P2(PDIRCOSZW(JI,JJ)) & - +2. * PCDUEFF(JI,JJ)* PUSLOPEM(JI,JJ) * ZDIRSINZW(JI,JJ) * PDIRCOSZW(JI,JJ) -END DO ! CONCURRENT +!$mnh_expand_array(JI=1:JIU,JJ=1:JJU ) +ZFLX(:,:,IKB-1) = & + PTAU11M(:,:) * BR_P2(ZDIRSINZW(:,:)) & + + PTAU33M(:,:) * BR_P2(PDIRCOSZW(:,:)) & + +2. * PCDUEFF(:,:)* PUSLOPEM(:,:) * ZDIRSINZW(:,:) * PDIRCOSZW(:,:) +!$mnh_end_expand_array() #endif !$acc end kernels - ! +! ! !!! wait for the computation of ZFLX(:,:,IKB-1) and ZFLX(:,:,IKB) !$acc wait(2) async(3) @@ -1218,34 +1160,27 @@ GZ_W_M_ZWP = GZ_W_M(ZWP,PDZZ) #else CALL GZ_W_M_DEVICE(ZWP,PDZZ,GZ_W_M_ZWP) #endif -!$acc kernels async(2) -#ifdef MNH_COMPILER_NVHPC -!$acc loop independent collapse(3) -#endif -DO CONCURRENT ( JI=1:JIU,JJ=1:JJU,JK=IKB+1:JKU) - ZFLX(JI,JJ,JK)=ZFLX(JI,JJ,JK) & - - XCMFS * PK(JI,JJ,JK) * (4./3.) * (GZ_W_M_ZWP(JI,JJ,JK) - GZ_W_M_PWM(JI,JJ,JK)) -END DO !CONCURRENT +!$acc kernels async(2) present_cr(gz_w_m_pwm,zflx) +!$mnh_expand_array(JI=1:JIU,JJ=1:JJU,JK=IKB+1:JKU) + ZFLX(:,:,:)=ZFLX(:,:,:) & + - XCMFS * PK(:,:,:) * (4./3.) * (GZ_W_M_ZWP(:,:,:) - GZ_W_M_PWM(:,:,:)) +!$mnh_end_expand_array() !$acc end kernels ! IF (KSPLT==1) THEN !Contribution to the dynamic production of TKE: - !$acc kernels async(2) -#ifdef MNH_COMPILER_NVHPC - !$acc loop independent collapse(3) -#endif - DO CONCURRENT ( JI=1:JIU,JJ=1:JJU,JK=1:JKU) - ZWORK(JI,JJ,JK) = - ZFLX(JI,JJ,JK) * GZ_W_M_ZWP(JI,JJ,JK) - END DO !CONCURRENT + !$acc kernels async(2) present_cr(gz_w_m_zwp,zwork) + !$mnh_expand_array(JI=1:JIU,JJ=1:JJU,JK=1:JKU) + ZWORK(:,:,:) = - ZFLX(:,:,:) * GZ_W_M_ZWP(:,:,:) + !$mnh_end_expand_array() !$acc end kernels ! ! evaluate the dynamic production at w(IKB+1) in PDP(IKB) ! - !$acc kernels async(2) - !$acc_nv loop independent collapse(2) - DO CONCURRENT (JI=1:JIU,JJ=1:JJU) - ZWORK(JI,JJ,IKB) = 0.5* ( -ZFLX(JI,JJ,IKB)*ZDW_DZ(JI,JJ,1) + ZWORK(JI,JJ,IKB+1) ) - ENDDO + !$acc kernels async(2) present_cr(zdw_dz,zwork) + !$mnh_expand_array(JI=1:JIU,JJ=1:JJU) + ZWORK(:,:,IKB) = 0.5* ( -ZFLX(:,:,IKB)*ZDW_DZ(:,:,1) + ZWORK(:,:,IKB+1) ) + !$mnh_end_expand_array() !$acc end kernels ! !$acc kernels async(2) @@ -1284,30 +1219,27 @@ IF (LLES_CALL .AND. KSPLT==1) THEN !!! wait for the computation of ZFLX, ZDP and ZWORK !$acc wait(2) ! - !$acc kernels - !$acc_nv loop independent collapse(3) - DO CONCURRENT (JI=1:JIU,JJ=1:JJU,JK=1:JKU) - ZTMP1_DEVICE(JI,JJ,JK) = -ZWORK(JI,JJ,JK) - ENDDO + !$acc kernels present_cr(zwork,ztmp1_device) + !$mnh_expand_array(JI=1:JIU,JJ=1:JJU,JK=1:JKU) + ZTMP1_DEVICE(:,:,:) = -ZWORK(:,:,:) + !$mnh_end_expand_array() !$acc end kernels CALL LES_MEAN_SUBGRID( ZTMP1_DEVICE, X_LES_RES_ddxa_W_SBG_UaW , .TRUE.) ! CALL GZ_M_M_DEVICE(PTHLM,PDZZ,ZTMP1_DEVICE) - !$acc kernels - !$acc_nv loop independent collapse(3) - DO CONCURRENT (JI=1:JIU,JJ=1:JJU,JK=1:JKU) - ZTMP2_DEVICE(JI,JJ,JK) = ZTMP1_DEVICE(JI,JJ,JK) * ZFLX(JI,JJ,JK) - ENDDO + !$acc kernels present_cr(zflx,ztmp2_device) + !$mnh_expand_array(JI=1:JIU,JJ=1:JJU,JK=1:JKU) + ZTMP2_DEVICE(:,:,:) = ZTMP1_DEVICE(:,:,:) * ZFLX(:,:,:) + !$mnh_end_expand_array() !$acc end kernels CALL LES_MEAN_SUBGRID( ZTMP2_DEVICE, X_LES_RES_ddxa_Thl_SBG_UaW , .TRUE.) ! CALL GZ_M_W_DEVICE(1,IKU,1,PTHLM,PDZZ,ZTMP1_DEVICE) CALL MZF_DEVICE( ZTMP1_DEVICE, ZTMP2_DEVICE ) - !$acc kernels - !$acc_nv loop independent collapse(3) - DO CONCURRENT (JI=1:JIU,JJ=1:JJU,JK=1:JKU) - ZTMP3_DEVICE(JI,JJ,JK) = ZFLX(JI,JJ,JK)*ZTMP2_DEVICE(JI,JJ,JK) - ENDDO + !$acc kernels present_cr(ztmp2_device,ztmp3_device) + !$mnh_expand_array(JI=1:JIU,JJ=1:JJU,JK=1:JKU) + ZTMP3_DEVICE(:,:,:) = ZFLX(:,:,:)*ZTMP2_DEVICE(:,:,:) + !$mnh_end_expand_array() !$acc end kernels CALL LES_MEAN_SUBGRID(ZTMP3_DEVICE,X_LES_RES_ddz_Thl_SBG_W2) ! @@ -1317,21 +1249,19 @@ IF (LLES_CALL .AND. KSPLT==1) THEN !$acc data copy(X_LES_RES_ddxa_Rt_SBG_UaW,X_LES_RES_ddz_Rt_SBG_W2) ! CALL GZ_M_M_DEVICE(PRM(:,:,:,1),PDZZ,ZTMP1_DEVICE) - !$acc kernels - !$acc_nv loop independent collapse(3) - DO CONCURRENT (JI=1:JIU,JJ=1:JJU,JK=1:JKU) - ZTMP2_DEVICE(JI,JJ,JK) = ZTMP1_DEVICE(JI,JJ,JK)*ZFLX(JI,JJ,JK) - ENDDO + !$acc kernels present_cr(zflx,ztmp2_device) + !$mnh_expand_array(JI=1:JIU,JJ=1:JJU,JK=1:JKU) + ZTMP2_DEVICE(:,:,:) = ZTMP1_DEVICE(:,:,:)*ZFLX(:,:,:) + !$mnh_end_expand_array() !$acc end kernels CALL LES_MEAN_SUBGRID( ZTMP2_DEVICE, X_LES_RES_ddxa_Rt_SBG_UaW , .TRUE.) ! CALL GZ_M_W_DEVICE(1,IKU,1,PRM(:,:,:,1),PDZZ,ZTMP1_DEVICE) CALL MZF_DEVICE( ZTMP1_DEVICE, ZTMP2_DEVICE ) - !$acc kernels - !$acc_nv loop independent collapse(3) - DO CONCURRENT (JI=1:JIU,JJ=1:JJU,JK=1:JKU) - ZTMP3_DEVICE(JI,JJ,JK) = ZFLX(JI,JJ,JK)*ZTMP2_DEVICE(JI,JJ,JK) - ENDDO + !$acc kernels present_cr(ztmp2_device,ztmp3_device) + !$mnh_expand_array(JI=1:JIU,JJ=1:JJU,JK=1:JKU) + ZTMP3_DEVICE(:,:,:) = ZFLX(:,:,:)*ZTMP2_DEVICE(:,:,:) + !$mnh_end_expand_array() !$acc end kernels CALL LES_MEAN_SUBGRID(ZTMP3_DEVICE, X_LES_RES_ddz_Rt_SBG_W2) ! @@ -1342,22 +1272,20 @@ IF (LLES_CALL .AND. KSPLT==1) THEN ! ! CALL GZ_M_M_DEVICE(PSVM(:,:,:,JSV),PDZZ,ZTMP1_DEVICE) - !$acc kernels - !$acc_nv loop independent collapse(3) - DO CONCURRENT (JI=1:JIU,JJ=1:JJU,JK=1:JKU) - ZTMP2_DEVICE(JI,JJ,JK) = ZTMP1_DEVICE(JI,JJ,JK)*ZFLX(JI,JJ,JK) - ENDDO + !$acc kernels present_cr(zflx,ztmp2_device) + !$mnh_expand_array(JI=1:JIU,JJ=1:JJU,JK=1:JKU) + ZTMP2_DEVICE(:,:,:) = ZTMP1_DEVICE(:,:,:)*ZFLX(:,:,:) + !$mnh_end_expand_array() !$acc end kernels CALL LES_MEAN_SUBGRID( ZTMP2_DEVICE, & X_LES_RES_ddxa_Sv_SBG_UaW(:,:,:,JSV) , .TRUE.) ! CALL GZ_M_W_DEVICE(1,IKU,1,PSVM(:,:,:,JSV),PDZZ,ZTMP1_DEVICE) CALL MZF_DEVICE(ZTMP1_DEVICE,ZTMP2_DEVICE) - !$acc kernels - !$acc_nv loop independent collapse(3) - DO CONCURRENT (JI=1:JIU,JJ=1:JJU,JK=1:JKU) - ZTMP3_DEVICE(JI,JJ,JK) = ZFLX(JI,JJ,JK)*ZTMP2_DEVICE(JI,JJ,JK) - ENDDO + !$acc kernels present_cr(ztmp2_device,ztmp3_device) + !$mnh_expand_array(JI=1:JIU,JJ=1:JJU,JK=1:JKU) + ZTMP3_DEVICE(:,:,:) = ZFLX(:,:,:)*ZTMP2_DEVICE(:,:,:) + !$mnh_end_expand_array() !$acc end kernels CALL LES_MEAN_SUBGRID(ZTMP3_DEVICE, X_LES_RES_ddz_Sv_SBG_W2(:,:,:,JSV)) ! diff --git a/src/configure b/src/configure index 8c46c4f948cfbdd89111ee558c1f04d2a71ff54c..4f1afc4dd362d188e4c8500d2c47a9834c3969bb 100755 --- a/src/configure +++ b/src/configure @@ -52,6 +52,83 @@ cd ${LOCAL}/conf TARG=$(uname -s -n) # case "$TARG" in +'Linux login6'*) + export ARCH=${ARCH:-LXcray} + export VER_MPI=${VER_MPI:-MPICRAY} + export OPTLEVEL=${OPTLEVEL:-OPENACC} + export MVWORK=${MVWORK:-NO} + export VER_CDF=${VER_CDF:-CDFAUTO} +if [ "${VER_MPI}" == "MPICRAY" ] +then +export MNHENV=${MNHENV:-" +module purge +module load craype-x86-trento +module load craype-network-ofi +module load PrgEnv-cray/8.3.3 +module load craype/2.7.19 +module load cray-mpich/8.1.21 +module load libfabric/1.15.2.0 +module load craype-accel-amd-gfx90a +export version=5.2.3 +module load rocm/${version} + +export CCE_ALIAS_NONE=:on +export CCE_NO_PTR_TO_PTR_ALIAS=:on + +module list +"} +else +export MNHENV=${MNHENV:-" +module purge +module use /lus/home/NAT/gda2203/SHARED/escobar/my_modules >& /dev/null +module load craype-x86-trento +module load PrgEnv-cray/8.3.3 +module load craype/2.7.19 +module load craype-accel-amd-gfx90a +export version=5.2.3 +module load rocm/${version} +module rm craype-network-ofi cray-mpich cray-libsci cray-dsmml +module load ompij/4.1.2-cray-15.0.0-ucx_rocm + +export CCE_ALIAS_NONE=:on +export CCE_NO_PTR_TO_PTR_ALIAS=:on + +module list +"} +fi + + ;; + +'Linux login2'*) + export ARCH=${ARCH:-LXcray} + export VER_MPI=${VER_MPI:-MPICRAY} + #export VER_MPI=${VER_MPI:-MPIAUTO} + #export OPTLEVEL=${OPTLEVEL:-MANAGED02} + export OPTLEVEL=${OPTLEVEL:-OPENACC} + # export OPTLEVEL=${OPTLEVEL:-OPENACCDEFONLY} + export MVWORK=${MVWORK:-NO} + export VER_CDF=${VER_CDF:-CDFAUTO} + #export VER_USER=${VER_USER:-ZSOLVER} + export MNHENV=${MNHENV:-" +module purge +module load craype-x86-trento +module load craype-network-ofi +module load PrgEnv-cray/8.3.3 + +module load libfabric/1.15.0.0 +module load xpmem/2.4.4-2.3_2.12__gff0e1d9.shasta +module load cray-pmi/6.1.2 +module load craype-accel-amd-gfx90a +module load rocm + + +ulimit -s unlimited + + +#export CRAY_ACC_DEBUG=2 +"} + ;; + 'Linux jean-zay'*|'Linux idrsrv'*) export ARCH=${ARCH:-LXifort} export VER_MPI=${VER_MPI:-MPIINTEL}