Sie sind auf Seite 1von 3

SHELL = csh ##################### intel compiler ############# # CC = icc # FC = ifort # CFLAGS = -w -O3 -ip -c # FCFLAGS = -align all -FR

-convert big_endian -fno-alias -fno-fnalias # OPT = -O3 # LD = $(FC) # LDOPT = # settings for GT 240M XXX = 32 YYY = 16 # settings for GTX 280 # XXX = 32 # YYY = 8 # settings for 5600 Quadro and GTX 8800 # XXX = 8 # YYY = 8 ##################### gcc/gfortran ############### CC = gcc FC = gfortran FCFLAGS = -fconvert=big-endian -frecord-marker=4 -ffree-form OPT = -O3 LD = $(FC) # -rpath /usr/local/cuda/lib64/ LDOPT = ################################################## # do not change this definition, change the one further down FLOAT = float RWORDSIZE = 4 ##################### cuda location ############## # eces-shell #CUDALIBPATH = ~/emu/cuda/lib # ncsa CUDALIBPATH = /usr/local/cuda/lib64 LIBCUBLAS = $(CUDALIBPATH)/libcublas.so LIBCUDART = $(CUDALIBPATH)/libcudart.so.3 LIBCUBLASEMU = $(CUDALIBPATH)/libcublasemu.so ######################## THIS SECTION YOU CAN CHANGE ################## # # Hard coded number of levels (35 for conus, 28 for jan00) MKX = 28 # uncomment this to use FLOAT4 data type (optimization) #FLOAT4 = -DFLOAT_4=4 # this must always be defined but they do not do anything unless # other settings are set DEBUG_I = 59 DEBUG_J = 45 DEBUG_K = 1 DEBUGOUTPUT = -DDEBUG_I=$(DEBUG_I) -DDEBUG_J=$(DEBUG_J) -DDEBUG_K=$(DEBUG_K) $(F LOAT4) # uncomment to run on emulator instead of the device #DEVICEEMU = -DDEVICEEMU #DEVICEEMU_NVCC = -deviceemu $(DEVICEEMU) #LIBCUBLAS = $(LIBCUBLASEMU)

# uncomment to output detailed debug data output # must have DEVICEEMU settings uncommented above #DEBUGOUTPUT = -DDEBUGOUTPUT -DDEBUG_I=$(DEBUG_I) -DDEBUG_J=$(DEBUG_J) -DDEBUG_K =$(DEBUG_K) $(FLOAT4) # uncomment to allow settings to force closer agreement #DEBUGDEBUG = -DDEBUGDEBUG # uncomment for to promote to 8 byte floats # note, if you do this without DEVICE EMU above, compiler will complain it does not have enough shared mem #PROMOTE = -DPROMOTE #FLOAT = double #FCFLAGS = -fconvert=big-endian -frecord-marker=4 -fdefault-real-8 #RWORDSIZE = 8 ######################## END OF SECTION YOU CAN CHANGE ################ #NVCC NVCC PHASE PHASE NVOPT = = = = = nvcc -DCUDA /usr/local/cuda/bin/nvcc -DCUDA -ccbin /usr/bin #--ptxas-options=-v -ptx -cuda $(DEVICEEMU_NVCC) $(PROMOTE) $(DEBUGDEBUG) $(DEBUGOUTPUT) \ -DXXX=$(XXX) -DYYY=$(YYY) -DMKX=$(MKX) --host-compilation 'C++' --use_

fast_math .SUFFIXES : all : vanilla chocolate compare_snaps wsm5.cu.o : wsm5.cu m4 wsm5.cu | sed "s/float/$(FLOAT)/g" > y.cu $(NVCC) $(PHASE) $(NVOPT) y.cu /bin/mv y.cu.cpp wsm5.cu.cpp $(CC) $(CFLAGS) -c wsm5.cu.cpp wsm5_gpu.cu.o : wsm5_gpu.cu spt.h m4 wsm5_gpu.cu | spt.pl | sed "s/float/$(FLOAT)/g" > x.cu $(NVCC) $(PHASE) $(NVOPT) x.cu /bin/mv x.cu.cpp wsm5_gpu.cu.cpp $(CC) $(CFLAGS) -c wsm5_gpu.cu.cpp libmassv.o : libmassv.F /lib/cpp -C -P libmassv.F > libmassv.f90 $(FC) -c $(OPT) $(FCFLAGS) libmassv.f90 microclock.o : microclock.c $(CC) -c $(CFLAGS) -DMKX=$(MKX) microclock.c # vanilla : module_mp_wsm5.F libmassv.o microclock.o # /lib/cpp -C -P $(DEBUGDEBUG) $(DEVICEEMU) $(DEBUGOUTPUT) -DRWORDSIZE=$(RWOR DSIZE) module_mp_wsm5.F > module_mp_wsm5.f90 # $(FC) -c $(OPT) $(FCFLAGS) module_mp_wsm5.f90 # $(LD) -o wsm5_driver_vanilla $(LDOPT) module_mp_wsm5.o libmassv.o microcloc k.o $(LIBCUBLASEMU) vanilla : module_mp_wsm5.F libmassv.o microclock.o /lib/cpp -C -P $(DEBUGDEBUG) $(DEVICE) $(DEBUGOUTPUT) -DRWORDSIZE=$(RWORDSIZE ) module_mp_wsm5.F > module_mp_wsm5.f90

$(FC) -c $(OPT) $(FCFLAGS) module_mp_wsm5.f90 $(LD) -o wsm5_driver_vanilla $(LDOPT) module_mp_wsm5.o libmassv.o microclock. o $(LIBCUBLAS) chocolate : module_mp_wsm5.F wsm5.cu.o wsm5_gpu.cu.o libmassv.o microclock.o /lib/cpp -C -P $(DEBUGDEBUG) $(DEVICEEMU) $(DEBUGOUTPUT) -DRUN_ON_GPU -DRWORD SIZE=$(RWORDSIZE) module_mp_wsm5.F > module_mp_wsm5.f90 $(FC) -c $(OPT) $(FCFLAGS) module_mp_wsm5.f90 $(LD) -o wsm5_driver_chocolate $(LDOPT) module_mp_wsm5.o wsm5.cu.o wsm5_gpu.c u.o libmassv.o microclock.o $(LIBCUBLAS) compare_snaps : compare_snaps.F \cp compare_snaps.F compare_snaps.f90 $(FC) -o compare_snaps $(FCFLAGS) compare_snaps.f90 \rm compare_snaps.f90 clean clena : \rm -f *.o *.cu.c x.cu y.cu wsm5_driver_* *.mod *.f90 x.ptx tar: tar cvf wsm5gpu_`date +"%Y%m%d"`.tar *.cu *.F *.h *.m4 *.pl makefile