diff --git a/src/Rules.LXpgi.mk b/src/Rules.LXpgi.mk index 618e8bde38a9432f75b0f275a600707cf9cd6319..787bcdf3b7d50ecb8c095383629f4fff240f0852 100644 --- a/src/Rules.LXpgi.mk +++ b/src/Rules.LXpgi.mk @@ -9,26 +9,41 @@ ########################################################## #OBJDIR_PATH=${WORKDIR} # +#Version of CUDA +#(8.0 at least if compute capability >= 6.0) +# CUDA 7.5 OK on aeropc45 with PGI 16.10 +# CUDA 8.0 KO on aeropc45 with PGI 16.10 +CUDALEVEL=cuda7.5 +# +#Compute capability of GPU +# +#Aeropc45: 50 +#Nuwa: 35 +#Ouessant Firestone K80: 35 +#Ouessant Minsky P100: 60 +OPT_CPTCAP= +# +#Compiler info level +#OPT_INFO = -Minfo=ftn,accel,inline,ipa,loop,lre,mp,opt,par,unified,vect,ccff +OPT_INFO = -Minfo=accel,ccff +# +#Compiler profiling options +OPT_PROF = -Mprof=ccff +# #PW: if -Ktrap=fp: nvprof/pgprof do not work OPT_BASE = -Ktrap=fp ... -#PW: warning: -Mvect=nosimd is necessary to prevent non reproductibility (i.e. for SUM intrinsic in SUM_1DFIELD_ll) -OPT_BASE = -Mbackslash -Kieee -nofma -Mvect=nosimd -# -Munixlogical -# -Mrecursive -mcmodel=medium -OPT_PERF0 = -O0 -g -Minfo=ccff,accel -Mprof=ccff +#PW: -g: big impact on performance +OPT_BASE = -Mbackslash -Mextend -Kieee -nofma +# +OPT_PERF0 = -O0 +OPT_PERF1 = -O1 OPT_PERF2 = -O2 -#OPT_CUDA = -O2 -Mcuda=keepgpu -ta=nvidia,cc20,cuda3.1,host,time -Minfo=accel,intensity,all,ccff -#OPT_CUDA = -O3 -fast -ta=nvidia,cc20,cuda4.2,keepgpu,host -Minfo=accel,all,intensity,ccff -OPT_MULTICORE = -g -O2 -ta=multicore -Minfo=ccff,accel -OPT_CUDA = -O2 -Mcuda=cuda7.5,nofma -ta=host,tesla,nofma,cc35,cuda7.5 -Minfo=ccff,accel -Mprof=ccff -OPT_NOCUDA = -g -O2 -ta=host -Minfo=ccff -Mprof=ccff -#OPT_CUDA = -O2 -Kieee -nofma -Mcuda=nordc -ta=host,tesla,nofma,cc35,cuda6.5,nordc,managed -Minfo=ccff,accel -Mprof=ccff -#OPT_CUDA = -O2 -Kieee -nofma -ta=host,tesla,nofma,cc35,cuda6.5,keepgpu,managed -Mcuda -Minfo=ccff,accel -Mprof=ccff -#OPT_CUDA = -O2 -Kieee -nofma -ta=host,nvidia,nofma,cc20,cc35,cuda5.5,keepgpu -Minfo=ccff,accel -Mprof=ccff -#OPT_CUDA = -O2 -Kieee -nofma -ta=host,nvidia,nofma,cc20,cc35,cuda5.0 -Minfo=ccff,all,intensity -Mprof=ccff -#OPT_CUDA = -O2 -Kieee -ta=host,nvidia,cc20,cuda4.2 -Minfo=ccff,all,intensity - +# +OPT_MANAGED = -ta=host,tesla,nofma,$(OPT_CPTCAP),$(CUDALEVEL),managed -Mcuda=nofma,$(OPT_CPTCAP),$(CUDALEVEL) +OPT_MULTICORE = -ta=multicore +OPT_NOOPENACC = -ta=host $(OPT_INFO) $(OPT_PROF) +OPT_OPENACC = -ta=host,tesla,nofma,$(OPT_CPTCAP),$(CUDALEVEL) -Mcuda=nofma,$(OPT_CPTCAP),$(CUDALEVEL) +# OPT_CHECK = -C #-Mchkfpstk -Mchkptr -OPT_PROF = -Mprof=time,ccff OPT_I8 = -i8 OPT_R8 = -r8 # @@ -60,57 +75,51 @@ OPT = $(OPT_BASE) $(OPT_PERF2) OPT0 = $(OPT_BASE) $(OPT_PERF0) OPT_NOCB = $(OPT_BASE) $(OPT_PERF2) # -ifeq "$(OPTLEVEL)" "O2PROF" -OPT = $(OPT_BASE) $(OPT_PERF2) $(OPT_PROF) -OPT0 = $(OPT_BASE) $(OPT_PERF0) $(OPT_PROF) -OPT_NOCB = $(OPT_BASE) $(OPT_PERF2) $(OPT_PROF) -endif ifeq "$(OPTLEVEL)" "DEBUG" OPT = $(OPT_BASE) $(OPT_PERF0) $(OPT_CHECK) OPT0 = $(OPT_BASE) $(OPT_PERF0) $(OPT_CHECK) OPT_NOCB = $(OPT_BASE) $(OPT_PERF0) endif - +# +ifeq "$(OPTLEVEL)" "MANAGED" +CPPFLAGS += -D_OPENACC +OPT = $(OPT_BASE) $(OPT_MANAGED) $(OPT_PERF2) +OPT0 = $(OPT_BASE) $(OPT_MANAGED) $(OPT_PERF0) +OPT_NOCB = $(OPT_BASE) $(OPT_MANAGED) $(OPT_PERF2) +CXXFLAGS = -acc -Kieee -Mnofma $(OPT_MANAGED) +endif +# ifeq "$(OPTLEVEL)" "MULTICORE" +CPPFLAGS += -D_OPENACC OPT = $(OPT_BASE) $(OPT_MULTICORE) OPT0 = $(OPT_BASE) $(OPT_MULTICORE) $(OPT_PERF0) OPT_NOCB = $(OPT_BASE) $(OPT_MULTICORE) +CXXFLAGS = -acc -Kieee -Mnofma $(OPT_MULTICORE) endif - -ifeq "$(OPTLEVEL)" "CUDA" +# +ifeq "$(OPTLEVEL)" "OPENACC" CPPFLAGS += -D_OPENACC -OPT = $(OPT_BASE) $(OPT_CUDA) -OPT0 = $(OPT_BASE) $(OPT_CUDA) $(OPT_PERF0) -OPT_NOCB = $(OPT_BASE) $(OPT_CUDA) -#ifdef DO_COMP_USER -#OPT = $(OPT_BASE) $(OPT_CUDA) -ta:tesla:managed -CXXFLAGS = -acc -Kieee -Mnofma -Mvect=nosimd $(OPT_CUDA) +OPT = $(OPT_BASE) $(OPT_OPENACC) $(OPT_PERF2) +OPT0 = $(OPT_BASE) $(OPT_OPENACC) $(OPT_PERF0) +OPT_NOCB = $(OPT_BASE) $(OPT_OPENACC) $(OPT_PERF2) +CXXFLAGS = -acc -Kieee -Mnofma $(OPT_OPENACC) endif - +# ifeq "$(OPTLEVEL)" "OPENACCDEFONLY" CPPFLAGS += -D_OPENACC -D_FAKEOPENACC -OPT = $(OPT_BASE) $(OPT_NOCUDA) -OPT0 = $(OPT_BASE) $(OPT_NOCUDA) $(OPT_PERF0) -OPT_NOCB = $(OPT_BASE) $(OPT_NOCUDA) -CXXFLAGS = -Kieee -Mnofma -Mvect=nosimd $(OPT_NOCUDA) -endif - -ifeq "$(OPTLEVEL)" "NOCUDA" -OPT = $(OPT_BASE) $(OPT_NOCUDA) -OPT0 = $(OPT_BASE) $(OPT_NOCUDA) $(OPT_PERF0) -OPT_NOCB = $(OPT_BASE) $(OPT_NOCUDA) -CXXFLAGS = -Kieee -Mnofma -Mvect=nosimd $(OPT_NOCUDA) +OPT = $(OPT_BASE) $(OPT_NOOPENACC) $(OPT_PERF2) +OPT0 = $(OPT_BASE) $(OPT_NOOPENACC) $(OPT_PERF0) +OPT_NOCB = $(OPT_BASE) $(OPT_NOOPENACC) $(OPT_PERF2) +CXXFLAGS = -Kieee -Mnofma $(OPT_NOOPENACC) endif - -ifeq "$(OPTLEVEL)" "CUDA_DB" -CPPFLAGS += -D_OPENACC -OPT_CUDA = -g -O0 -Kieee -nofma -ta=host,nvidia,nofma,cc35,cuda6.5 -Minfo=ccff,all,intensity -OPT = $(OPT_BASE) $(OPT_CUDA) -OPT0 = $(OPT_BASE) $(OPT_CUDA) -OPT_NOCB = $(OPT_BASE) $(OPT_CUDA) +# +ifeq "$(OPTLEVEL)" "NOOPENACC" +#CPPFLAGS += -D_OPT_LINEARIZED_LOOPS +OPT = $(OPT_BASE) $(OPT_NOOPENACC) $(OPT_PERF2) +OPT0 = $(OPT_BASE) $(OPT_NOOPENACC) $(OPT_PERF0) +OPT_NOCB = $(OPT_BASE) $(OPT_NOOPENACC) $(OPT_PERF2) +CXXFLAGS = -Kieee -Mnofma $(OPT_NOOPENACC) endif - -#-Mcuda -ta=nvidia,host,time -Minfo=accel,intensity # FC = pgf90 ifeq "$(VER_MPI)" "MPIAUTO" @@ -135,14 +144,12 @@ LDFLAGS = -Wl,-warn-once $(OPT) # CPP = cpp -P -traditional -Wcomment # - CPPFLAGS_SURFEX = CPPFLAGS_SURCOUCHE += -DMNH_LINUX -DMNH_MPI_RANK_KIND=$(MNH_MPI_RANK_KIND) CPPFLAGS_RAD = CPPFLAGS_NEWLFI = -DSWAPIO -DLINUX -DLFI_INT=${LFI_INT} -DLFI_RECL=${LFI_RECL} CPPFLAGS_MNH = -DMNH -DMNH_PGI -DSFX_MNH CPPFLAGS_MNH += -Uvector -Upixel - # # Gribex flags # @@ -183,23 +190,24 @@ include Makefile.MESONH.mk # etc ... # # # ########################################################## -OPT_PERF1 = -O1 -Kieee -g -OBJS_O1= spll_modd_isba_n.o spll_pack_isba_patch_n.o spll_mode_construct_ll.o \ +OBJS_O1= spll_modd_isba_n.o spll_mode_construct_ll.o \ spll_init_surf_atm_n.o spll_mode_scatter_ll.o spll_convert_patch_teb.o \ spll_define_mask_n.o spll_del1dfield_ll.o spll_mode_fm.o spll_mode_gather_ll.o \ - spll_phys_param_n.o \ - spll_convect_updraft.o spll_convect_updraft_shal.o + spll_convect_updraft.o spll_convect_updraft_shal.o \ + spll_mode_dustopt.o spll_mode_saltopt.o +#spll_pack_isba_patch_n.o +#spll_phys_param_n.o $(OBJS_O1) : OPT = $(OPT_BASE) $(OPT_PERF1) -OBJS_O0= spll_mode_mppdb.o + +OBJS_O0= spll_mode_mppdb.o \ + spll_fft55.o spll_fft.o spll_flat_invz.o \ + spll_mode_repro_sum.o \ +# spll_fast_terms.o +# spll_mode_sum_ll.o $(OBJS_O0) : OPT = $(OPT_BASE) $(OPT_PERF0) OBJS_O2= spll_mode_device.o -$(OBJS_O2) : OPT = $(OPT_BASE) $(OPT_CUDA) - -# -#MODULE_SYSTEM = /opt/F95_42/lib/ -#VPATH += $(MODULE_SYSTEM) -# +$(OBJS_O2) : OPT = $(OPT_BASE) $(OPT_OPENACC) $(OPT_PERF2) ifneq "$(findstring 8,$(LFI_INT))" "" OBJS_I8=spll_NEWLFI_ALL.o @@ -210,4 +218,3 @@ ifeq "$(MNH_INT)" "8" OBJS_I4=spll_modd_netcdf.o $(OBJS_I4) : OPT = $(OPT_BASE_I4) endif -