*************************************************************************
**       TEST PROGRAMME FOR THE various types of sparse solvers
*************************************************************************

      program validation
*
      implicit none
      integer lda, ldstrt, lwork
      parameter (lda = 1000, ldstrt = 60)
      parameter (lwork = ldstrt**2 + ldstrt*(lda+5) + 5*lda + 1)
*
      integer i, j, n, m
      integer revcom, colx, coly, colz, nbscal
      integer irc(5), icntl(8), info(3)
*
      integer matvec, precondLeft, precondRight, dotProd
      parameter (matvec=1, precondLeft=2, precondRight=3, dotProd=4)
*
      integer nout
*
      complex*16  a(lda,lda), work(lwork), xtrue(lda)
      real*8  cntl(5), rinfo(2)
      real*8 dx, x, pi
*
      complex*16 ZERO, ONE
      parameter (ZERO = (0.0d0, 0.0d0), ONE = (1.0d0, 0.0d0))
*
      logical :: problem_delta = .false.

      pi = abs(acos(-1.0))

*
***************************************************************
** Generate the test matrix A and set the right-hand side
** in positions (n+1) to 2n of the array work.
** The right-hand side is chosen such that the exact solution
** is the vector of all ones.
***************************************************************
*
      write(*,*) '***********************************************'
      write(*,*) 'This code is an example of use of'
      write(*,*) 'optimized dynamic relaxation, a.k.a.'
      write(*,*) 'optimized 2cd Order Richardson iteration, or'
      write(*,*) 'a modified form of 2cd order Chebyshev iteration, or'
      write(*,*) 'a type of recycled GMRES(2) instead of the'
      write(*,*) 'standard restarted GMRES(2).'
      write(*,*) 'Results are written in output files'
      write(*,*) 'fort... and sol_zTestgmres.'
      write(*,*) '***********************************************'
      write(*,*)
      write(*,*) 'Matrix size ( < ', lda, ") ?"
      read(*,*) n
!     n = 19
      write(*,*) "Matrix size =", n
      if (n.gt.lda) then
        write(*,*) 'You are asking for a too large matrix'
        goto 100
      endif

!-------------------------------------
! For Jin Chen's test problem, grid points evenly spaced from 0 to 1,
! n interior grid points plus two grid points on the boundaries at 0 and 1:
      dx = 1.0/(n+1)
!     dx = 1.0

      do j = 1,n

! desired solution for x for Jin Chen's test problem
! (will later calculate the RHS needed to get this solution):
!        xtrue(j) = ONE
         x=j*dx
         xtrue(j) = sin(2*pi*x)
      enddo
! slightly perturb the desired solution so it isn't an exact eigenvector:
!      xtrue(n/2)=xtrue(n/2)+1.e-4
!-------------------------------------

      if(problem_delta) then
         ! The exact answer for 1 delta function on RHS should be:
         do j = 1,n/2+1
            xtrue(j) = 0.5*j
         enddo
         do j = n/2+2, n
            xtrue(j) = xtrue(j-1)-0.5
         enddo
      endif

*
! Solves A x = b, where A = grad^2 Poisson operator:
      do j = 1,n
        do i = 1,n
          a(i,j) = ZERO
        enddo
      enddo
*
      do i = 1,n
        a(i,i) = (-2.d0, 0.d0)/dx**2
      enddo
      do i = 1,n-1
        a(i,i+1) = (1.d0, 0.d0)/dx**2
        a(i+1,i) = (1.d0, 0.d0)/dx**2
      enddo
*
! compute b needed to give the desired solution for x:
       call ZGEMV('N',n,n,ONE,A,lda, xtrue, 1,ZERO,work(n+1),1)
!
! initial guess:       
      do j = 1,n
        work(j) = ZERO
      enddo

!----------------------------------------
      if(problem_delta) then
!     ! or give an explicit b and ignore xtrue:
         do j = 1,n
            work(n+j) = ZERO
         enddo
!     one or two delta functions:
!     
         work(n+n/2+1) = -1/dx**2
         work(n+n*0.75+1) = -1/dx**2
      endif
!!      
!-----------------------------------

*
*********************************
** Choose the restart parameter
*********************************
*
!      write(*,*) 'Restart  <', ldstrt
!      read(*,*) m
*
*******************************************************
** Initialize the control parameters to default value
*******************************************************
*
*     call init_zgmres(icntl,cntl)
*
*************************
*c Tune some parameters
*************************
*
** Tolerance
*      cntl(1) = 1.d-10
** Save the convergence history in file fort.40
*      icntl(3) = 40
** No preconditioning
*      icntl(4) = 0
** ICGS orthogonalization
*      icntl(5) = 3
** Maximum number of iterations
*      icntl(7) = 1000
*! use recurrence formula to calculate residual:
*      icntl(8) = 0
*
*****************************************
** 1st or 2cd order Richardson iterative solution of A x = b:
*****************************************
*
      call riter2gmres(n, A, lda, work(1), work(n+1), info, rinfo)
*
*******************************
* dump the solution on a file
*******************************
*
      nout = 11
      open(nout,FILE='sol_zTestgmres',STATUS='unknown')
!     write(nout,*) 'Restart : ', m
      write(nout,*) 'info(1) = ',info(1),'  info(2) = ',info(2)
      write(nout,*) 'rinfo(1) = ',rinfo(1),'  rinfo(2) = ',rinfo(2)
      write(nout,*) 'Optimal workspace = ', info(3)
      write(nout,*) 'Solution : '
      do j=1,n
        write(nout,*) work(j)
      enddo
      write(nout,*) '   '
*

      write(12,*) "# x Phi(x), Phi_true(x)"
      write(12,*) "0   0.0     0.0"
      do j=1,n
         write(12,*) j, real(work(j)), real(xtrue(j))
      enddo
      write(12,*) n+1," 0.0    0.0"

      write(*,*) "# iterations =", info(2), "relative residual err=", 
     &     rinfo(2)
      write(*,*) "average convergence rate /iteration", 
     &     rinfo(2)**(1./info(2))
100    continue
*
      stop
      end

!-----------------------------------------------------------------------
      subroutine riter1(n,A,lda,x,b, info, rinfo)
      ! solve A x = b using 1st order Richardson iteration
      ! on input, x contains the initial guess.
      implicit none
      integer n, lda
      complex*16 A(lda,lda)
      complex*16 x(n), b(n)
      integer info(2)
      real*8 rinfo(2)

      complex*16 Ax(n), xp(n), xn(n), res(n)
      complex*16, parameter :: ONE = (1.0d0, 0.0d0)
      complex*16, parameter :: ZERO = (0.0d0, 0.0d0)
      real*8 dt, err
      integer iter, niter, maxiter

      dt=0.4
      maxiter = 100

      write(*,*) "b=", b

      do iter = 1, maxiter

!     perform the matrix vector product Ax = A * x
         call zgemv('N',n,n,ONE,A,lda,x,1,ZERO,Ax,1)
         res = Ax - b
!        write(*,*) "x =", x
!        write(*,*) "res =", res
         xn = x + dt*res
         x = xn
         err=maxval(abs(res))
         write(*,*) "err =", err
         if(err .le. 1.e-10) goto 20
      enddo

 20   niter = iter
      info(2)=niter
      rinfo(2)=err

      return
      end subroutine riter1

!-----------------------------------------------------------------------
      subroutine riter2(n,A,lda,x,b, info, rinfo)
      ! solve A x = b using 2cd order Richardson iteration
      ! on input, x contains the initial guess.
      implicit none
      integer n, lda
      complex*16 A(lda,lda)
      complex*16 x(n), b(n)
      integer info(2)
      real*8 rinfo(2)

      complex*16 Ax(n), xp(n), xn(n), res(n)
      complex*16, parameter :: ONE = (1.0d0, 0.0d0)
      complex*16, parameter :: ZERO = (0.0d0, 0.0d0)
      real*8 dt, nu, err, pi, kmin, alpha, beta1, nudt_half
      integer iter, niter, maxiter

      dt=0.5*2.0
      nu=2*2*3.14159/n * 1.1
      maxiter = 1000
      pi = 2*abs(acos(0.0))

      ! Calculated optimal iteration parameters:
      kmin=2*pi/(n+1)
      alpha = 1-2*kmin
      beta1 = 1/4.+alpha/4+sqrt(alpha)/2
      
      nudt_half=(1-alpha)/(1+alpha)
      dt = sqrt(beta1*(1+nudt_half))
!     dt = (1+2*kmin)/(1+kmin)/2*(1-kmin+sqrt(1-2*kmin))
!     dt = sqrt(dt)
      nu=nudt_half*2/dt*0.33

! must be an error in the above calculation of the optimal parameters, because 
! I empirically find a better fit is obtained if I multiply this nu by 0.33...

      write(*,*) "alpha, beta1 =", alpha, beta1
      write(*,*) "dt, nu =", dt, nu
      write(*,*) "predicted error reduction / iteration =", 1/(1+nu*dt)
      write(*,*) "b=", b
      write(15,*) "# iteration #,   err"
      do iter = 1, maxiter

!     perform the matrix vector product Ax = A * x
         call zgemv('N',n,n,ONE,A,lda,x,1,ZERO,Ax,1)
         res = Ax - b
!        write(*,*) "x =", x
!        write(*,*) "res =", res
         xn = x + (x-xp)*(1-nu*dt/2)/(1+nu*dt/2) + dt**2*res/(1+nu*dt/2)
         xp = x
         x = xn
         err=maxval(abs(res))
         write(15,*) iter, err
         if(err .le. 1.e-10) goto 20
      enddo

 20   niter = iter
      info(2)=niter
      rinfo(2)=err

      return
      end subroutine riter2

!-----------------------------------------------------------------------
      subroutine riter2gmres(n,A,lda,x,b, info, rinfo)
      !
      ! solve A x = b using optimized 2cd order Richardson iteration,
      ! which is like GMRES(2), but with a modified restarting procedure.
      !
      ! on output, x will contain the calculated solution
      !
      implicit none
      integer n, lda
      complex*16 A(lda,lda)
      complex*16 b(n)
      integer info(2)
      real*8 rinfo(2)

      complex*16 x(n)     ! the current estimate  x_{p}

      complex*16 x_pm1(n) ! the previous estimate x_{p-1}
      complex*16 x_pp1(n) ! the next estimate     x_{p+1}
      complex*16 r_p(n)   ! the residual error = Ax - b
      complex*16 r_pm1(n) ! the residual error for x_{p-1}
      complex*16 Ar(n)    ! A.r

      complex*16 a1, b1, c1, d1, det, alpha, beta
      complex*16, parameter :: ONE = (1.0d0, 0.0d0)
      complex*16, parameter :: ZERO = (0.0d0, 0.0d0)
      complex*16 zdot, x1, x2
      real*8 err, sin2theta, bnorm
      integer iter, niter, maxiter, j

      ! n_colin = # of times that search directions were co-linear:
      integer, save :: n_colin = 0 

      logical, save :: first_time=.true.
      real*8, save :: epsilon, sqrt_epsilon
      real*8 dlamch ! lapack routine to give machine precision parameters

      if(first_time) then
         epsilon = dlamch("e")
         sqrt_epsilon = sqrt(epsilon)
         first_time = .false.
      endif

      ! initialization
      x_pm1 = 0  ! initial guess
      x = b    ! the initial residual gives the initial search direction

      ! find the initial search direction s = r = Ax-b:
      ! (note that we don't need the initial best fit here,
      ! just a vector in the direction of the best fit.)
      call zgemv('N',n,n,ONE,A,lda,x,1,ZERO,r_p,1)
      r_p = b - r_p
      r_pm1 = b

      write(15,*) "# iteration #,   relative error in residual"

      bnorm = sqrt(zdot(b,b,n))

      err = sqrt(zdot(r_pm1,r_pm1,n))/bnorm
!     print *, "err_pm1=", err
      write(15,*) "0 ", err
!     err = sqrt(zdot(r_p,r_p,n))/bnorm
!     print *, "err_p  =", err

      maxiter = 1000
      do iter = 1, maxiter

         call zgemv('N',n,n,ONE,A,lda,r_p,1,ZERO,Ar,1)

         a1 = zdot(r_pm1 - r_p, r_pm1 - r_p, n)
         b1 = zdot(r_pm1 - r_p, Ar, n)
         c1 = conjg(b1)
         d1 = zdot(Ar, Ar, n)

         det = a1*d1 - b1*c1
!        print *, "iter =", iter
!        print *, "det =", det
         sin2theta = abs(det/(a1*d1))
!        print *, "|sin(theta)|^2 =", sin2theta

         ! If the two search directions are nearly parallel, then the
         ! determinant is very small, and alpha and beta are not 
         ! independently well determined, though the sum alpha+beta 
         ! is well determined, as is the improved solution x_{p+1}.
         ! To handle this special case, set alpha=0 and solve for
         ! beta alone (this is equivalent to reverting to simpler
         ! 1st order Richardson iteration = steepest descents).
         ! (This special case will occur only when the residual is nearly
         ! and eigenvector of A and the solution is just about to converge.
         ! This problem was first discovered for Jin Chen's test case.)

         if(sin2theta .gt. sqrt_epsilon) then

            x1 = zdot(r_pm1 -r_p, r_p, n) 
            x2 = zdot(Ar,r_p,n, n)
            alpha = ( d1*x1 - b1*x2 )/det
            beta  = (-c1*x1 + a1*x2 )/det
         
         else

            alpha = 0.0
            beta  = zdot(Ar,r_p,n)/zdot(Ar,Ar,n)
            n_colin = n_colin + 1

         endif

         ! next estimate:
         x_pp1 = x + alpha*(x-x_pm1) + beta*r_p

         ! find the residual error of the new estimate:
         r_pm1=r_p
         call zgemv('N',n,n,ONE,A,lda,x_pp1,1,ZERO,r_p,1)
         r_p = b - r_p

         err = sqrt(zdot(r_p,r_p,n))/bnorm
         write(15,*) iter, err
!        print *, "iter #, relative residual error=", iter, err

         x_pm1 = x
         x = x_pp1

         if(err .le. 1.e-10) goto 20

         ! the algorithm above might not be fully optimized,
         ! perhaps some steps could be reordered and merged.

      enddo

 20   niter = iter
      ! after finishing a do loop, the index is 1 higher than the max:
      if(niter .gt. maxiter) niter=maxiter
      info(2)=niter
      rinfo(2)=err

      write (15, *) "# "
      write (15, *) "# search directions were co-linear", 
     &     n_colin, "times"

      write (17, *) "# j    x(j)    b(j)      residual = b - A x"
      do j=1, n
         write (17,*) j, real(x(j)), real(b(j)), real(r_p(j))
      enddo

      return
      end subroutine riter2gmres

!-----------------------------------------------------------------------
      complex*16 function zdot(x,y,n)
      !
      ! calculate complex inner product (x,y) = x* . y

      integer i,n
      complex*16 x(n), y(n)

      zdot=0.0
      do i=1,n
         zdot=zdot+conjg(x(i))*y(i)
      enddo
      return
      end