######################################################################## # # environment variable CPU is used to set platform-dependent make variables. # # If CPU is undefined; use the test_os script to figure it out: ifeq ($(CPU),) CPU := $(shell ./test_os) endif # Od Dec alpha comiler options: # FC = f77 # FFLAGS = -O -fast # FFLAGS = -O -fast -r8 # Sun compiler options which Bowman found is fastest, particularly for # inlining square roots: # FFLAGS = -fast -O4 -Bstatic # DEC alpha Linux options: ifeq ($(CPU),LINUX_alpha) FC = fort FFLAGS = -extend_source -r8 -fast -assume no2underscores endif ifeq ($(CPU),LINUX_lahey) FC = lf95 FFLAGS = --dbl -O --tpp --wide --nap --nchk --npca --nsav --ntrace \ --prefetch 2 --staticlink --info # --mldefault cdecl # --prefetch 2 --staticlink -X9 # --prefetch 2 --staticlink -L/usr/local/lff95/lib -lblas # FFLAGS = -O2 endif # Don't want inlining with interprocedural optimization turned on for # these benchmarking codes, because the compiler may figure out that the # external loops only need to be done once... ifeq ($(CPU),LINUX_INTEL) FC = ifc FFLAGS = -r8 -O3 -tpp7 -axiMKW -pad -unroll # In some case -pc32 can speed up / or sqrt ops # -Zp16 or -Zp8 had no effect # FFLAGS = -r8 -O3 -tpp7 -axiMKW -pad -unroll -fno-alias -fno-fnalias # alias switches had no effect # FFLAGS = -r8 -O3 -tpp7 -axiMKW -pad -unroll -parallel endif all: stream stream_tuned stream: stream_d.f mysecond.f $(FC) $(FFLAGS) -o stream stream_d.f mysecond.f stream_tuned: stream_tuned.f second.f $(FC) $(FFLAGS) -o stream_tuned stream_tuned.f second.f speed: speed.f speedsub.f $(FC) $(FFLAGS) -c speedsub.f saxpy_g.f # $(FC) $(FFLAGS) -o speed speed.f speedsub.o saxpy_g.o $(FC) $(FFLAGS) -o speed speed.f speedsub.o saxpy_g.o \ /usr/local/lff95/lib/libblas.a # $(F90HOME)/lib/libblas.a # Doesn't exist yet # # Have to force link to blas?? # # $(FC) $(FFLAGS) -c speedsub.f second.f # $(FC) $(FFLAGS) -o speed speed.f speedsub.o second.o blas/libblas1.a # The free pentium-optimized assembly-language blas from 1996 in # subdirectory blas are # 15% faster for small vectors but # 30% slower for large vectors. So don't bother with them. linpacks: linpacks.f second.f $(FC) -O -o linpacks linpacks.f second.f linpackd: linpackd.f second.f $(FC) -O -o linpackd linpackd.f second.f whetstoned: whetstoned.f $(FC) -O -o whetstoned whetstoned.f clock.o: clock.c cc -c clock.c clean: rm -f *.o *~ stream speed stream_tuned