program speed6 c--measure cpu floating point speed c To compile and run this on a unix machine, do: c c SR01% make c SR01% time speed6 c just did 10 million multiplications c 6.3u 0.5s 0:09 68% 82+27847k 1+0io 13pf+0w c c or on the cray, make sure the proper cputime routine at the end is c selected, and perhaps increase nsize and nops, and then type: c c cf77 -Zp -o speed6 speed6.f c c or on a Sun: c c cc -c clock.c c f77 -O -o speed6 speed6.f clock.o c c The above info from node SR01 (a DECstation 5000/200) says that the cpu c time was 6.3 seconds for the user code, 0.5 seconds for system calls, c elapsed time of 9 seconds, 68% of which was used by this process, c 82 kBytes of memory for the program, 27 MBytes for data c 1 disk read, no disk writes c 13 page faults, no swaps. c implicit none integer, parameter :: million=1000000 call speedn(100) write(6,*) " " call speedn(10*million) end program subroutine speedn(nsize) implicit none ! parameter (nsize=10000000) ! parameter (nsize=100) real a(nsize) real b(nsize) real c(nsize) real d(nsize) real time1, time2, pi integer, parameter :: nx=33 integer, parameter :: nv=15 real g(nx,nv), gnew(nx,nv), r(nx,nv), s(nx,nv) real gt(nv,nx), rt(nv,nx), st(nv,nx) integer i, nmat, nsize, ix, nops2, nops, imax, imax_s ! ml_external daxpy !initialize values: pi=3.14159 nops2=40.e6 nops=nops2 write(6,*) 'Vector length =', nsize do 20 i=1,nsize a(i)=pi*i b(i)=pi/i c(i)=pi*3.4*(i-nsize/2) d(i)=1/b(i) 20 continue s=1 r=0.5 do ix=1,nx g(ix,:) = 1.0/ix gt(:,ix) = 1.0/ix enddo gnew=g imax=nops/nsize if(imax .lt. 1) then imax=1 nops=nsize endif cccccccccccccccccccccccccccccccccccccccccccccc call cputime(time1) do i=1,imax*2 call mult(c,a,b,nsize) enddo call cputime(time2) call wrsum(nops*2,time1,time2,'*') cccccccccccccccccccccccccccccccccccccccccccccc call cputime(time1) imax_s=nops2/(nx*nv) imax_s=max(imax_s,1) do i=1,imax_s call sweep(gnew,g,r,s,nx,nv) enddo call cputime(time2) call wrsum(2*imax_s*nx*nv,time1,time2,'sweep') cccccccccccccccccccccccccccccccccccccccccccccc call cputime(time1) imax_s=nops2/(nx*nv) imax_s=max(imax_s,1) do i=1,imax_s ! call sweepf(gnew,g,r,s,nx,nv) call sweepf2(gt,rt,st,nx,nv) enddo call cputime(time2) call wrsum(2*imax_s*nx*nv,time1,time2,'fast sweep') cccccccccccccccccccccccccccccccccccccccccccccc call cputime(time1) do i=1,imax call div(c,a,b,nsize) enddo call cputime(time2) call wrsum(nops,time1,time2,'/') cccccccccccccccccccccccccccccccccccccccccccccc call cputime(time1) do i=1,imax call addmult(c,a,b,d,nsize) enddo call cputime(time2) call wrsum(7*nops,time1,time2,'multiple *+') cccccccccccccccccccccccccccccccccccccccccccccc call cputime(time1) do i=1,imax call dotprod(c,a,b,nsize) enddo call cputime(time2) call wrsum(2*nops,time1,time2,'Dot *+') cccccccccccccccccccccccccccccccccccccccccccccc call cputime(time1) do i=1,imax call logme(c,a,nsize) enddo call cputime(time2) call wrsum(nops,time1,time2,'logs') cccccccccccccccccccccccccccccccccccccccccccccc call cputime(time1) do i=1,imax call lpsqrt(c,a,nsize) enddo call cputime(time2) call wrsum(nops,time1,time2,'sqrts') cccccccccccccccccccccccccccccccccccccccccccccc call cputime(time1) do i=1,imax*4 call daxpy(nsize,3.1459, c, 1, b, 1) enddo call cputime(time2) call wrsum(4*2*nops,time1,time2,'daxpy *+') cccccccccccccccccccccccccccccccccccccccccccccc call cputime(time1) do i=1,imax*4 call daxpy0(nsize,3.1459, c, 1, b, 1) enddo call cputime(time2) call wrsum(4*2*nops,time1,time2,'daxpy0 *+') cccccccccccccccccccccccccccccccccccccccccccccc call cputime(time1) do i=1,imax*4 call daxpy4(nsize,3.1459, c, 1, b, 1) enddo call cputime(time2) call wrsum(4*2*nops,time1,time2,'daxpy4 *+') cccccccccccccccccccccccccccccccccccccccccccccc nmat=nsize**(1./3) call cputime(time1) do i=1,imax call matmult(c,a,b,nmat) enddo call cputime(time2) call wrsum(2*nmat**3*imax,time1,time2,'MatMult') c********************************************************* c call cputime(time1) c do i=1,imax c call move(c,a,b,nsize) c enddo c call cputime(time2) c call wrsum(nops,time1,time2,'moves') return end c********************************************************* subroutine wrsum(nops,time1,time2,optype) integer nops real time1,time2 character optype*(*) integer, parameter :: million=1000000 write(6,101) nops/million, optype, (time2-time1), > nops/million/(time2-time1) 101 format(' did',i4,' million ',a,' ops in ',f10.4, > ' cpu secs =',f10.4,' MFLOPS') return end c********************************************************* subroutine cputime(time) implicit none real time real second call CPU_TIME(time) ! time = second() c second() already exists on cray, otherwise, use c my version in second.f which calls etime return end c unix version relying on C-library call to clock(): c subroutine cputime(time) c implicit none c integer ticks,clock c real time c ticks=clock() c time=1.0e-6*ticks c return c end c vax version: c subroutine cputime(time) c call clock(itime) c itime=0 c time=itime c time=time/1.e8 c return c end c c alternatively, there may be a usable VAX version in c towner's [nbeam]slib. c do nothing dummy version, which may be okay for unix systems with c the "time" command: c subroutine cputime(time) c time=0 c return c end