module linear
!
!     (c) Copyright 1991 to 1998 by Michael A. Beer, William D. Dorland, 
!     P. B. Snyder, Q. P. Liu, and Gregory W. Hammett. ALL RIGHTS RESERVED.
!     
!
! This module contains variables and routines related to taking derivatives
! and evaluating wave-particle resonance approximation terms.
!  
  implicit none

!
! FLR corrections to Phi
!
  complex, dimension(:,:,:,:), allocatable :: phi_n, phi_u, phi_tpar, phi_tperp, &
       phi_qperp, phi_rperp, phi_flr1, phi_flr2, phi_flr3, phi_nd, phi_tpard, &
       phi_tperpd, phi_qperpb

!
! FLR corrections to A_par
!
!     J0apar is the base FLR term, Gamma_0 A_par (or other model for J0)
!     apar_flr1 is 1/2 gradperphat^2
!     apar_flr2 is gradperphathat^2
!     apar_u is the FLR term from the upar eqn, same for apar_qpar &apar_qperp
!             
  complex, dimension(:,:,:,:), allocatable :: J0apar, apar_flr1, &
       apar_flr2, apar_u, apar_qpar, apar_qperp
  
! Weights related to FLR effects
  
  real, dimension(:,:,:,:), allocatable :: gnwgt, gtpwgt, gtwgt, gnwgtd, gtpwgtd, gtwgtd, &       
       gqwgtb, grwgt, g0wgt, g0wgtp, flrwgt, flrwgt2, flrwgt3, gewgt, twgt, nwgt, &
       pfilter3, guawgt, gqawgt
  
  real, dimension(:,:,:), allocatable :: pfilter, pfilter2
  real, dimension(:,:), allocatable :: denom0   ! field-line-average term

! Arrays for k_parallel FFT's
  real, dimension(:,:,:), allocatable :: kpar, tablekp

contains

  subroutine flrinit(init_crit)
!     
!     initializes some arrays used in flr, poisson, timestep, and nlpsc
!     
!     (c) Copyright 1991 to 1995 by Michael A. Beer, William D. Dorland, 
!     and Gregory W. Hammett. ALL RIGHTS RESERVED.
!

    use mp, only: max_allreduce, proc0, iproc
    use itg_data, only: nspecies, beta_e, etak, etak_par, &
         rho, iflr, tau, charge, n_I, s_perp, tiovte, a0, &
         semi_imp, iphi00, lin, shr, jacobian
    use gryffin_grid, only: ld, l_left, l_right, md, ldb, nd, x0, &
         rkperp2, rky2, rkx2
    use gryffin_layouts
    use constants, only: ii

    real rkp2, rgammas, tmu, wgt, zfilter
    real s18ceE, s18cfE
    integer i, j, l, m, n, ifail, lmax, init_crit
    
    if(.not.allocated(phi_n)) then

       allocate(phi_n(ld, m_low:m_alloc, n_low:n_alloc, nspecies), &
            phi_u(ld, m_low:m_alloc, n_low:n_alloc, nspecies), &
            phi_tpar(ld, m_low:m_alloc, n_low:n_alloc, nspecies), &
            phi_tperp(ld, m_low:m_alloc, n_low:n_alloc, nspecies), &
            phi_qperp(ld, m_low:m_alloc, n_low:n_alloc, nspecies), &
            phi_rperp(ld, m_low:m_alloc, n_low:n_alloc, nspecies), &
            phi_flr1(ld, m_low:m_alloc, n_low:n_alloc, nspecies), &
            phi_flr2(ld, m_low:m_alloc, n_low:n_alloc, nspecies), &
!            phi_flr3(ld, m_low:m_alloc, n_low:n_alloc, nspecies), &
            phi_nd(ld, m_low:m_alloc, n_low:n_alloc, nspecies), &
            phi_tpard(ld, m_low:m_alloc, n_low:n_alloc, nspecies), &
            phi_tperpd(ld, m_low:m_alloc, n_low:n_alloc, nspecies), &
            phi_qperpb(ld, m_low:m_alloc, n_low:n_alloc, nspecies))

       phi_n = 0.; phi_u = 0.; phi_tpar = 0.; phi_tperp = 0.; phi_qperp = 0.
       phi_rperp = 0.; phi_flr1 = 0.; phi_flr2 = 0.; phi_nd = 0.; phi_tpard = 0.
       phi_tperpd = 0.; phi_qperpb = 0.

       if (beta_e > 0.) then
          allocate (J0apar(ld, m_low:m_alloc, n_low:n_alloc, nspecies))
          allocate (apar_flr1(ld, m_low:m_alloc, n_low:n_alloc, nspecies))
          allocate (apar_flr2(ld, m_low:m_alloc, n_low:n_alloc, nspecies))
          allocate (apar_u(ld, m_low:m_alloc, n_low:n_alloc, nspecies))
          allocate (apar_qpar(ld, m_low:m_alloc, n_low:n_alloc, nspecies))
          allocate (apar_qperp(ld, m_low:m_alloc, n_low:n_alloc, nspecies))
          
          J0apar = 0.;  apar_flr1 = 0.;  apar_flr2 = 0.
          apar_u = 0.;  apar_qpar = 0.;  apar_qperp = 0.

       endif

       allocate(gnwgt(ld, m_low:m_alloc, n_low:n_alloc, nspecies), &
            gtpwgt   (ld, m_low:m_alloc, n_low:n_alloc, nspecies), &
            gtwgt    (ld, m_low:m_alloc, n_low:n_alloc, nspecies), &
            gnwgtd   (ld, m_low:m_alloc, n_low:n_alloc, nspecies), &
            gtpwgtd  (ld, m_low:m_alloc, n_low:n_alloc, nspecies), &
            gtwgtd   (ld, m_low:m_alloc, n_low:n_alloc, nspecies), &       
            gqwgtb   (ld, m_low:m_alloc, n_low:n_alloc, nspecies), &
            grwgt    (ld, m_low:m_alloc, n_low:n_alloc, nspecies), &
            g0wgt    (ld, m_low:m_alloc, n_low:n_alloc, nspecies), &
            g0wgtp   (ld, m_low:m_alloc, n_low:n_alloc, nspecies), &
            flrwgt   (ld, m_low:m_alloc, n_low:n_alloc, nspecies), &
            flrwgt2  (ld, m_low:m_alloc, n_low:n_alloc, nspecies), &
            flrwgt3  (ld, m_low:m_alloc, n_low:n_alloc, nspecies), &
            gewgt    (ld, m_low:m_alloc, n_low:n_alloc, nspecies), &
            twgt     (ld, m_low:m_alloc, n_low:n_alloc, nspecies), &
            nwgt     (ld, m_low:m_alloc, n_low:n_alloc, nspecies), &
            pfilter3 (ld, m_low:m_alloc, n_low:n_alloc, nspecies))

       gnwgt = 0.;  gtpwgt = 0.;  gtwgt = 0.;  gnwgtd = 0.;  gtpwgtd = 0.
       gtwgtd = 0.; gqwgtb = 0.;  grwgt = 0.;  g0wgt = 0.;  g0wgtp = 0.
       flrwgt = 0.;  flrwgt2 = 0.;  flrwgt3 = 0.;  gewgt = 0.;  twgt = 0.
       nwgt = 0.;  pfilter3 = 0.

       if (beta_e > 0.) then
          allocate (guawgt(ld, m_low:m_alloc, n_low:n_alloc, nspecies))
          allocate (gqawgt(ld, m_low:m_alloc, n_low:n_alloc, nspecies))
          
          guawgt = 0.
          gqawgt = 0.          
       endif

       allocate (pfilter(ld, m_low:m_alloc, n_low:n_alloc), &
            pfilter2(ld, m_low:m_alloc, n_low:n_alloc), &
            denom0(m_low:m_alloc, n_low:n_alloc))

       pfilter = 0.;  pfilter2 = 0.;  denom0 = 0.

    endif
!     
!     beginning of flr and poisson initializations
!     flr
!********************************************************************
!
    if(init_crit == 0) then
       do i=1,nspecies
          do n = n_low, n_high
             do m = m_low, m_high
                do l=1,ld-1
                   gnwgt(l,m,n,i)=(g0wgt(l,m,n,i)+etak(m,n,i)*flrwgt(l,m,n,i))
                   gtpwgt(l,m,n,i)=etak_par(m,n,i)*g0wgt(l,m,n,i)
                   gtwgt(l,m,n,i)=etak(m,n,i)*(g0wgt(l,m,n,i)+flrwgt2(l,m,n,i))+flrwgt(l,m,n,i)  
!                   grwgt(l,m,n,i)=flrwgt(l,m,n,i)+etak(m,n,i)*(g0wgt(l,m,n,i) &
!                        +flrwgt2(l,m,n,i)+flrwgt(l,m,n,i))
                enddo
             enddo
          enddo
       enddo

       if (beta_e > 0.) then
          do i = 1, nspecies
             do n = n_low, n_high
                do m = m_low, m_high
                   do l=1,ld-1
                      guawgt(l,m,n,i) = g0wgt(l,m,n,i)*(1.+etak_par(m,n,i)) &
                           +etak(m,n,i)*flrwgt(l,m,n,i)
                     gqawgt(l,m,n,i)= &
                          (g0wgt(l,m,n,i)+flrwgt2(l,m,n,i))*etak(m,n,i) &
                          +(1.+etak_par(m,n,i))*flrwgt(l,m,n,i)
                     enddo
                  enddo
               enddo
            enddo
         else
            guawgt = 0.
            gqawgt = 0.
         endif
         return
      endif

!********************************************************************
!     initialize some arrays only once per run:
!     
    ifail=0
    
    do i=1,nspecies
       do n = n_low, n_high
          do m = m_low, m_high
             do l=1,ld-1
                rkp2=rkperp2(l,m,n)*rho(i)**2
                rgammas=s18cfE(rkp2,ifail)/s18ceE(rkp2,ifail)
!
!     Use "new" Pade approximation
!
                if (iflr == 1) then
                   g0wgt(l,m,n,i)=1./(1.+rkp2/2.)
                   flrwgt(l,m,n,i)=-rkp2/(2.+rkp2)
                   flrwgt2(l,m,n,i)=-rkp2/(1.+rkp2/2.)**2
!     
!     Use "new" FLR model
!     
                else if(iflr == 2.or.iflr == 3.or.iflr == 8.or.iflr == 9) then                 
                   g0wgt(l,m,n,i)=sqrt(s18ceE(rkp2,ifail))
                   flrwgt(l,m,n,i)=-rkp2/2.*(1.-rgammas)
                   flrwgt2(l,m,n,i)=-rkp2/2.*(1.+(1.-rgammas)*(1.-rkp2/2.*(3.+rgammas)))
!     
!     Use exp(-b/2) model
!     
                else if(iflr == 4) then
                   g0wgt(l,m,n,i)=exp(-rkp2/2.)
                   flrwgt(l,m,n,i)=-rkp2/2.
                   flrwgt2(l,m,n,i)=-rkp2*(1.-rkp2/4.)
!
!     Try Taylor series
!     
                else if(iflr == 5) then
                   flrwgt(l,m,n,i)=-rkp2/2.
                   flrwgt2(l,m,n,i)=-rkp2/2.
                   g0wgt(l,m,n,i)=1.0
!
!     Like Ron, but Linsker treatment different
!     
                else if(iflr == 6) then
                   g0wgt(l,m,n,i)=s18ceE(rkp2,ifail)
                   flrwgt(l,m,n,i)=-rkp2*(1.-rgammas)
                   flrwgt2(l,m,n,i)=-rkp2*(2.-rgammas)+2.*rkp2**2*(1-rgammas)
!     
!     Full FLR, but no Linsker effect
!     
                else if(iflr == 7) then
                   g0wgt(l,m,n,i)=s18ceE(rkp2,ifail)
                   flrwgt(l,m,n,i)=-rkp2*(1.-rgammas)
                   flrwgt2(l,m,n,i)=-rkp2*(1.-rgammas)
                endif

                flrwgt3(l,m,n,i)=flrwgt(l,m,n,i)**2
                gnwgt(l,m,n,i)=(1.+etak(m,n,i)*flrwgt(l,m,n,i))*g0wgt(l,m,n,i)
                gtpwgt(l,m,n,i)=etak_par(m,n,i)*g0wgt(l,m,n,i)
                gtwgt(l,m,n,i)=etak(m,n,i)*(1.+flrwgt2(l,m,n,i))+flrwgt(l,m,n,i)
                gtwgt(l,m,n,i)=gtwgt(l,m,n,i)*g0wgt(l,m,n,i)
!                grwgt(l,m,n,i)=(flrwgt(l,m,n,i)+etak(m,n,i)*(1.+flrwgt2(l,m,n,i) &
!                     +flrwgt(l,m,n,i)))*g0wgt(l,m,n,i)
!
!     toroidal FLR terms, keep old forms for backward compatability
!
!     Pade in Poisson with new toroidal FLR terms,
!     suggested forms in M.A.Beer Ph.D. thesis
!
                if (iflr == 8) then
                   gnwgtd(l,m,n,i)=(2.+flrwgt(l,m,n,i))*g0wgt(l,m,n,i)/tau(i)
                   gtpwgtd(l,m,n,i)=2.*g0wgt(l,m,n,i)/tau(i)
                   gtwgtd(l,m,n,i)=(1.+2*flrwgt(l,m,n,i) &
                        +flrwgt2(l,m,n,i))*g0wgt(l,m,n,i)/tau(i)
                   gqwgtb(l,m,n,i)=(flrwgt2(l,m,n,i)-flrwgt(l,m,n,i)) &
                        *g0wgt(l,m,n,i)
!
!     Non-O(b) accurate closure in thesis
!
                else if(iflr == 9) then   
                   gnwgtd(l,m,n,i)=(2.+3.*flrwgt(l,m,n,i) &
                        -flrwgt2(l,m,n,i))*g0wgt(l,m,n,i)/tau(i)
                   gtpwgtd(l,m,n,i)=2.*g0wgt(l,m,n,i)/tau(i)
                   gtwgtd(l,m,n,i)=(1.+flrwgt(l,m,n,i) &
                        +4.*flrwgt2(l,m,n,i))*g0wgt(l,m,n,i)/tau(i)  
                   gqwgtb(l,m,n,i)=0.0    ! only tested this model with gpar B=0
!
!     Old forms of toroidal terms, for backward comp. only
!
                else
                   gnwgtd(l,m,n,i)=2.*g0wgt(l,m,n,i)/tau(i)
                   gtpwgtd(l,m,n,i)=2.*g0wgt(l,m,n,i)/tau(i)
                   gtwgtd(l,m,n,i)=(1.+flrwgt(l,m,n,i)*3.)*g0wgt(l,m,n,i)/tau(i)
                   gqwgtb(l,m,n,i)=0.0
                endif
                gnwgtd(l,m,n,i)=gnwgtd(l,m,n,i)*charge(i)
                gtpwgtd(l,m,n,i)=gtpwgtd(l,m,n,i)*charge(i)
                gtwgtd(l,m,n,i)=gtwgtd(l,m,n,i)*charge(i)
                flrwgt(l,m,n,i)=flrwgt(l,m,n,i)*g0wgt(l,m,n,i)
                flrwgt2(l,m,n,i)=flrwgt2(l,m,n,i)*g0wgt(l,m,n,i)
                flrwgt3(l,m,n,i)=flrwgt3(l,m,n,i)*g0wgt(l,m,n,i)
             enddo
          enddo
       enddo
    enddo
!
! end of initialization section
!********************************************************************
!     
! poisson
!********************************************************************
! initialize some arrays only once per run:
!
! precalculating the g0wgt and g2wgt arrays in FLR, and the
! pfilter array in POISSON, reduces the CPU time spent in these
! subroutines by about 25%.
!     
    do i=1,nspecies
       do n = n_low, n_high
          do m = m_low, m_high
             do l=1,ld-1
                rkp2=rkperp2(l,m,n)*rho(i)**2
!     
!     Use "new" Pade approximation if iflr=1:
!     
                if (iflr == 1) then
                   wgt=(1.-1./(1.+rkp2))
                   g0wgtp(l,m,n,i)=2./(2.+rkp2)
                   gewgt(l,m,n,i)=2./(2.+rkp2)
                   twgt(l,m,n,i)=-rkp2/(2.+rkp2)
                   nwgt(l,m,n,i)=1.
!     
!     or use "new" FLR model for iflr=2 or 3:
!     
                else if(iflr == 2.or.iflr == 3) then
                   wgt=(1.-s18ceE(rkp2,ifail))
                   rgammas=s18cfE(rkp2,ifail)/s18ceE(rkp2,ifail)     
                   tmu=1.-rkp2*(1.-rkp2/2.)+rkp2/2.*rgammas*(1.-rkp2*rgammas)
                   twgt(l,m,n,i)=-rkp2/2.*(1.-rgammas)
                   nwgt(l,m,n,i)=1.-rkp2*(1.-rkp2/4.)+rkp2*rgammas/2.*(1.+rkp2*(1.-3.*rgammas/2.))
                   gewgt(l,m,n,i)=sqrt(s18ceE(rkp2,ifail))
                   g0wgtp(l,m,n,i)=gewgt(l,m,n,i)/tmu
!     
!     <J_0> = exp(-b/2) for iflr=4:
!     
                else if(iflr == 4) then
                   wgt=(1.-s18ceE(rkp2,ifail))
                   g0wgtp(l,m,n,i)=exp(-rkp2/2.)
                   gewgt(l,m,n,i)=exp(-rkp2/2.)
                   twgt(l,m,n,i)=-rkp2/2.
                   nwgt(l,m,n,i)=1.
!     
!     or use Taylor series:
!     
                else if(iflr == 5) then
                   wgt=rkp2
                   g0wgtp(l,m,n,i)=1.
                   gewgt(l,m,n,i)=1.
                   twgt(l,m,n,i)=-rkp2/2.
                   nwgt(l,m,n,i)=1.
!     
!     Like Ron, but Linsker treatment different
!     
                else if(iflr == 6) then
                   wgt=(1.-s18ceE(rkp2,ifail))
                   twgt(l,m,n,i)=0.0
                   nwgt(l,m,n,i)=1.
                   g0wgtp(l,m,n,i)=1.
!
!      Full FLR, but no Linsker effect
!     
                else if(iflr == 7) then
                   wgt=(1.-s18ceE(rkp2,ifail))
                   twgt(l,m,n,i)=0.0
                   nwgt(l,m,n,i)=1.
                   g0wgtp(l,m,n,i)=1.
!
!     Pade in Poisson with new toroidal FLR terms
!
                else if(iflr == 8.or.iflr == 9) then
                   wgt=(1.-s18ceE(rkp2,ifail))
                   g0wgtp(l,m,n,i)=1.
                   gewgt(l,m,n,i)=1.
                   twgt(l,m,n,i)=-rkp2/2./(1.+rkp2/2.)**2
                   nwgt(l,m,n,i)=1./(1.+rkp2/2.)
                endif
                
                g0wgtp(l,m,n,i)=g0wgtp(l,m,n,i)*charge(i)*n_I(i)
                pfilter2(l,m,n)=pfilter2(l,m,n)+wgt*n_I(i)*charge(i)**2/tau(i)
                pfilter3(l,m,n,i)=wgt*n_I(i)*charge(i)**2/tau(i)
!
! GWH:
! Handle finite-size particle shapes/filtering. [Should check exactly 
! at what stage the particle simulations put in their phi filters, i.e.,
! before or after calculating <phi>?  Also, do they filter in real space
! or in field-line coordinates?  (Doesn't matter in the shearless 
! limit). Do different species have different sizes?]
!
! Use Dimit's style of filter (the parallel filter is elsewhere):
! Does he have 1/B factors or not?  Assume not:
!                zfilter=exp(-s_perp**4*rky2(m,n)**2/b_mag(l)**4) &
!                     /(1+s_perp**4*rkx2(l,m,n)**2/b_mag(l)**4)
                zfilter=exp(-s_perp**4*rky2(m,n)**2) &
                     /(1+s_perp**4*rkx2(l,m,n)**2)
!
! Standard filter style:
!               rkperp2=rky2(m,n)+rkx2(l,m,n)         
!               zfilter=exp(-s_perp**2*rkperp2)
                twgt(l,m,n,i)=twgt(l,m,n,i)*zfilter
                nwgt(l,m,n,i)=nwgt(l,m,n,i)*zfilter

! Put particle shapes/filtering in the coefficients used to find n_i_bar:

             enddo
          enddo
       enddo
    enddo

    pfilter = 1.0/(tiovte+pfilter2)
   
!     
!     determine constant a0 for use in semi-implicit method
!
    a0=0.
    if (beta_e > 0.) then
       if (semi_imp == 2) a0 = sqrt(1./(tiovte*beta_e))
       if (semi_imp == 2) then
          do n=n_low, n_high
             do m=m_low, m_high
                do l=1,ld-1
                   if (pfilter2(l,m,n) > 0.) then
                      a0=max(a0,1.01*sqrt(rkperp2(l,m,n)*(1./pfilter2(l,m,n)+ &
                           1./tiovte)/(2.0*tiovte*beta_e)))
                   endif
                enddo
             enddo
          enddo
          call max_allreduce(a0)
       endif
       if (semi_imp == 4) a0=a0*2.0
       if(proc0) write(*,*) 'a0= ',a0,'  1/(tau*beta_e)^(1/2)= ',sqrt(1./(tiovte*beta_e))
    endif

!cfpp$ skip R  
    if(iphi00 == 2 .and. (lin == 1 .or. shr == 0.)) then
       m = 1
       do n = n_low, n_high
          if(idx_local(m_lo, m, n)) then
             denom0(m,n)=0.               
             if (n /= 1) then
                do l=l_left,l_right-1
                   denom0(m,n)=denom0(m,n)+pfilter(l,m,n)*pfilter2(l,m,n)*jacobian(l)
!   added dl/B factor for flux surface average
                enddo
             endif
          endif
       enddo
    else
       m = 1
       do n=n_low, n_high
          if(idx_local(m_lo, m, n)) then
             if (n == 1) cycle
             denom0(m,n)=0.
             do l=l_left,l_right-1
                denom0(m,n)=denom0(m,n)+pfilter(l,m,n)*pfilter2(l,m,n)*jacobian(l)
             enddo
          endif
       enddo
    endif
!cfpp$ noskip R
      
  end subroutine flrinit

  subroutine init_kparfft

    use constants, only: pi
    use gryffin_grid, only: md, ldb, x0, ld
    use itg_data, only: shr, qsf, igeo, iperiod
    use bounds, only: ndom, scon, nconz, ndomz
    use gryffin_layouts
    integer :: j, l, lmax, m

    allocate (kpar(ld*nconz, ndomz, md))
    kpar = 0.

    if (iperiod == 3) allocate (tablekp(2*ldb*(nconz+1), &
         ndomz, m_low:m_alloc))

!     
! These arrays are replicated on each processor: 
    do m=1,md
       do j=1,ndom(m)
          lmax=ldb*scon(m,j)
          do l=1,lmax
             if (l <= lmax/2) then
                kpar(l,j,m)=(l-1)*2*pi/(x0*scon(m,j)*2.) 
             else
                kpar(l,j,m)=(l-1-lmax)*2*pi/(x0*scon(m,j)*2.) 
             endif
!     
!     local if qsf<0
!     
             if (qsf < 0 .and. igeo == 0) kpar(l,j,m)=shr
          enddo
       enddo
    enddo

  end subroutine init_kparfft

  subroutine flr(phi)
!
! calculate the proper FLR-averages of the potential
!
!     (c) Copyright 1991 to 1995 by Michael A. Beer, William D. Dorland, 
!     and Gregory W. Hammett. ALL RIGHTS RESERVED.
!
    use gryffin_layouts

! define the arguments:
    complex, dimension(:,m_low:,n_low:) :: phi

! declare other local variables:
    integer :: m

    m = size(gnwgt, 4)
! start executable code:
  
    phi_n      = spread(phi,4,m) * gnwgt
    phi_u      = spread(phi,4,m) * g0wgt
    phi_tpar   = spread(phi,4,m) * gtpwgt
    phi_tperp  = spread(phi,4,m) * gtwgt
    phi_qperp  = spread(phi,4,m) * flrwgt
    phi_flr1   = spread(phi,4,m) * flrwgt
    phi_flr2   = spread(phi,4,m) * flrwgt2
    phi_nd     = spread(phi,4,m) * gnwgtd
    phi_tpard  = spread(phi,4,m) * gtpwgtd
    phi_tperpd = spread(phi,4,m) * gtwgtd
    phi_qperpb = spread(phi,4,m) * gqwgtb
!!    if(nperpmom == 3) phi_flr3 = spread(phi,4,m) * grwgt
  
  end subroutine flr

  subroutine emflr(ap)
!
! calculate the proper FLR-averages of the vector potential
!
!     (c) Copyright 1991 to 1995 by Michael A. Beer, William D. Dorland, 
!     and Gregory W. Hammett. ALL RIGHTS RESERVED.
!

    use gryffin_layouts

! define the arguments:
    complex, dimension(:,m_low:,n_low:) :: ap

! declare other local variables:
    integer :: m

    m = size(gnwgt, 4)
  
    J0apar =     spread(ap,4,m) * g0wgt
    apar_u =     spread(ap,4,m) * guawgt
    apar_qpar =  spread(ap,4,m) * gtpwgt
    apar_qperp = spread(ap,4,m) * gqawgt
    apar_flr1 =  spread(ap,4,m) * flrwgt
    apar_flr2 =  spread(ap,4,m) * flrwgt2

  end subroutine emflr


  subroutine kparfft(f, iabs, nu)
!
!     (c) Copyright 1991 to 1998 by Michael A. Beer, William D. Dorland, 
!     P. B. Snyder, Q. P. Liu, and Gregory W. Hammett. ALL RIGHTS RESERVED.
!     
!     
!     Changing code to default complex notation.  bdorland
!
!     calculates    i * kpar * f     if iabs=-1
!     calculates        kpar * f     if iabs=0
!     calculates       |kpar|* f     if iabs=1
!     calculates  -i * |kpar|* f     if iabs=2
!     calculates   a/(1+nu*kpar**2)  if iabs=3
!     calculates   a/(1+nu*kpar**4)  if iabs=4
!     calculates   (1+nu*kpar**2)*a  if iabs=5
!     calculates   nu*kpar**2 *a     if iabs=6
!     calculates   a/kpar (for kpar.ne.0.) if iabs=7
!     and  f/(1+s_par**4*kpar**4)    if iabs=13
!
    use gryffin_layouts
    use redistribute
    use itg_data, only: nspecies, gpar, s_par, iperiod
    use bounds, only: ndom, scon, mmcon, nncon, &
                      mcon, ncon, conpos, nconz
    use fft_work
    use constants, only: ii
    use gryffin_grid, only: ld, ldb, nd
    use gryffin_redist, only: kpar_redist_type, init_kpar_redist

    type (kpar_redist_type), dimension(:), allocatable, save :: kmap

    integer i, j, k, l, m, n, iabs, m0, lmax, kidx

    real, intent (in) :: nu

    complex, dimension(:,m_low:,n_low:,:) :: f
!
! Change next line only if you have checked both cases: iperiod = 2 or 3
! since a and b are used differently in those two cases.
! (Each is allocated only once for iperiod = 2, many times for iperiod = 3)

    complex, dimension(:,:), allocatable, save :: a, b
    real, dimension(:), allocatable, save :: work, ttablekp

    real :: scale, tol_loc, sgpar
    integer :: isign, nwork, ntablekp, mdloc, ndloc
    integer :: k_low, k_high, k_alloc

    logical :: first = .true.

    tol_loc = 1.e-12

    mdloc = m_high-m_low+1
    ndloc = n_high-n_low+1

    if(iabs <= 0) then
       sgpar=gpar
    else if(iabs < 3 .or. iabs == 13) then
       sgpar=abs(gpar)
    else
       sgpar = 1.
    endif

    !
    !     easiest to use switch for iperiod=3
    !
    if (iperiod == 3) then

       ! FFT work array: 
       nwork=ccfft_work0 + ccfft_work1*ldb*(nconz+1)

       ! trig table: 
       ntablekp = ccfft_table0 + ccfft_table1*ldb*(nconz+1)

       if(first) then
          allocate (work(nwork))
          allocate (kmap(m_low:m_alloc))
       endif

! Load balancing is main concern in this loop.
       do m0=m_low, m_high

! start parallel section here
          if (first) call init_kpar_redist(nspecies, ldb, m0, &
                  ncon(m0,:), ndom(m0), scon(m0,:), mmcon(m0,:,:), &
                  nncon(m0,:,:), mcon, conpos, kmap(m0))
             
          k_low = kmap(m0)%k_lo%llim_proc
          k_high = kmap(m0)%k_lo%ulim_proc
          k_alloc = kmap(m0)%k_lo%ulim_alloc

          allocate (a(ld*nconz-1, k_low:k_alloc))
          allocate (b(ld*nconz-1, k_low:k_alloc))          

          if (first) then
             b = 0.
             do j = 1, kmap(m0)%k_lo%ndom   
                lmax = ldb*scon(m0,j)
                scale=1.0/sqrt(real(lmax))
                call ccfft(0, lmax, scale, b(1:lmax,k_low), &
                     b(1:lmax,k_low), tablekp(1:ntablekp,j,m0), work, 0) 
             enddo
          endif
         
          call gather (kmap(m0)%connect, f, a)

          isign=-1
          do kidx=k_low, k_high
             if (kmap(m0)%k_lo%skip(kidx)) cycle
             j = kmap(m0)%k_lo%j(kidx)
             lmax = kmap(m0)%k_lo%lmax(kidx)
             scale=kmap(m0)%k_lo%scale(kidx)
             call ccfft(isign, lmax, scale, a(1:lmax,kidx), &
                  b(1:lmax,kidx), tablekp(1:ntablekp,j,m0), work, 0)
          enddo

          do kidx=k_low, k_high
             if (kmap(m0)%k_lo%skip(kidx)) cycle
             j = kmap(m0)%k_lo%j(kidx)
             lmax = kmap(m0)%k_lo%lmax(kidx)
             select case (iabs)
                case (-1)
                   do l=1,lmax
                      a(l,kidx)=ii*kpar(l,j,m0)*b(l,kidx)
                   enddo
                case (0)
                   do l=1,lmax
                      a(l,kidx)=kpar(l,j,m0)*b(l,kidx)
                   enddo
                case (1)
                   do l=1,lmax
                      a(l,kidx)=abs(kpar(l,j,m0))*b(l,kidx)
                   enddo
                case (2)
                   do l=1,lmax
                      a(l,kidx)=-ii*abs(kpar(l,j,m0))*b(l,kidx)
                   enddo
                case (3)
                   do l=1,lmax
                      a(l,kidx)=b(l,kidx)/(1.+nu*kpar(l,j,m0)**2*gpar**2)
                   enddo
                case (4)
                   do l=1,lmax
                      a(l,kidx)=b(l,kidx)/(1.+nu*kpar(l,j,m0)**4*gpar**4)
                   enddo
                case (5)
                   do l=1,lmax
                      a(l,kidx)=(1.+nu*kpar(l,j,m0)**2*gpar**2)*b(l,kidx)
                   enddo
                case (6)
                   do l=1,lmax
                      a(l,kidx)=(nu*kpar(l,j,m0)**2*gpar**2)*b(l,kidx)
                   enddo
                case (7)
                   do l=1,lmax
                      if (abs(kpar(l,j,m0)*gpar) > tol_loc) then
                         a(l,kidx)=b(l,kidx)/(kpar(l,j,m0)*gpar)
                      else 
                         a(l,kidx)=0.
                      endif
                   enddo
                case (13)
                   do l=1,lmax
                      a(l,kidx)=b(l,kidx)/(1+(sgpar*kpar(l,j,m0)*s_par)**4)
                   enddo
             end select
          enddo

          isign=1
          do kidx=k_low, k_high
             if (kmap(m0)%k_lo%skip(kidx)) cycle
             j = kmap(m0)%k_lo%j(kidx)
             lmax = kmap(m0)%k_lo%lmax(kidx)
             scale = kmap(m0)%k_lo%scale(kidx)
             call ccfft(isign, lmax, scale, a(1:lmax,kidx), &
                  b(1:lmax,kidx), tablekp(1:ntablekp,j,m0), work, 0)
          enddo

          call scatter (kmap(m0)%connect, b, f)

          if(iabs < 3) then
             do j = 1, kmap(m0)%k_lo%ndom
                do k=1,ncon(m0,j)*scon(m0,j)
                   do i=1,nspecies
                      m=mmcon(m0,j,k)
                      n=nncon(m0,j,k)
                      if (idx_local(m_lo, m, n)) then
                         do l=1,ldb
                            f(l,m,n,i)=f(l,m,n,i)*sgpar
                         enddo
                      endif
                   enddo
                enddo
             enddo
          endif

          deallocate (a, b)
       enddo

    else
!
!     non-connected way for iperiod=0,1,2
!
       scale=1.0/sqrt(float(ldb))

       ! FFT work array: 
       nwork=ccfft_work0 + ccfft_work1*ldb

       ! trig table: 
       ntablekp = ccfft_table0 + ccfft_table1*ldb

       if(first) then
          allocate(work(nwork), ttablekp(ntablekp))
          allocate (a(ld, mdloc*ndloc*nspecies))
          allocate (b(ld, mdloc*ndloc*nspecies))
          b = 0.
          call ccfft(0, ldb, scale, b(1:ldb,1), b(1:ldb,1), &
               ttablekp, work, 0)
       endif

       do i=1,nspecies
          do n=n_low, n_high
             do m=m_low, m_high
                kidx=(m-m_low+1)+(n-n_low)*mdloc+(i-1)*mdloc*ndloc
                do l=1,ldb
                   a(l,kidx)=f(l,m,n,i)
                enddo
             enddo
          enddo
       enddo

       isign=-1
       do kidx=1,mdloc*ndloc*nspecies
          call ccfft(isign, ldb, scale, a(1:ldb,kidx), b(1:ldb,kidx), ttablekp, work, 0)
       enddo

       do kidx=1,mdloc*ndloc*nspecies
          select case (iabs)
             case (-1)                
                do l=1,ldb
                   a(l,kidx)=ii*kpar(l,1,1)*b(l,kidx)
                enddo
             case (0)
                do l=1,ldb
                   a(l,kidx)=kpar(l,1,1)*b(l,kidx)
                enddo
             case (1)
                do l=1,ldb
                   a(l,kidx)=abs(kpar(l,1,1))*b(l,kidx)
                enddo
             case (2)
                do l=1,ldb
                   a(l,kidx)=-ii*abs(kpar(l,1,1))*b(l,kidx)
                enddo
             case (3) 
                do l=1,ldb
                   a(l,kidx)=b(l,kidx)/(1.+nu*kpar(l,1,1)**2*gpar**2)
                enddo
             case (4) 
                do l=1,ldb
                   a(l,kidx)=b(l,kidx)/(1.+nu*kpar(l,1,1)**4*gpar**4)
                enddo
             case (5)
                do l=1,ldb
                   a(l,kidx)=(1.+nu*kpar(l,1,1)**2*gpar**2)*b(l,kidx)
                enddo
             case (6)
                do l=1,ldb
                   a(l,kidx)=(nu*kpar(l,1,1)**2*gpar**2)*b(l,kidx)
                enddo
             case (7)
                do l=1,ldb
                   if (abs(kpar(l,1,1)*gpar) > tol_loc) then
                      a(l,kidx)=b(l,kidx)/(kpar(l,1,1)*gpar)
                   else 
                      a(l,kidx)=0.
                   endif
                enddo
             case (13)
                do l=1,ldb
                   a(l,kidx)=b(l,kidx)/(1+(sgpar*kpar(l,1,1)*s_par)**4)
                enddo
          end select
       enddo

       isign=1

       do kidx=1,mdloc*ndloc*nspecies
          call ccfft(isign, ldb, scale, a(1:ldb,kidx), b(1:ldb,kidx), ttablekp, work, 0)
       enddo

       if(iabs < 10) then
          do i=1,nspecies
             do n=n_low, n_high
                do m=m_low, m_high
                   kidx=(m-m_low+1)+(n-n_low)*mdloc+(i-1)*mdloc*ndloc
                   do l=1,ldb
                      f(l,m,n,i)=b(l,kidx)*sgpar
                   enddo
                enddo
             enddo
          enddo
       else
          do i=1,nspecies
             do n=n_low, n_high
                do m=m_low, m_high
                   kidx=(m-m_low+1)+(n-n_low)*mdloc+(i-1)*mdloc*ndloc
                   do l=1,ldb
                      f(l,m,n,i)=b(l,kidx)
                   enddo
                enddo
             enddo
          enddo
       endif
    endif

    first = .false.

  end subroutine kparfft

  subroutine kparffte(f, iabs, nu)
    !     
    !     Changing code to default complex notation.  bdorland
    !
    !     calculates    i * kpar * f     if iabs=-1
    !     calculates        kpar * f     if iabs=0
    !     calculates       |kpar|* f     if iabs=1
    !     calculates  -i * |kpar|* f     if iabs=2
    !     calculates   a/(1+nu*kpar**2)  if iabs=3
    !     calculates   a/(1+nu*kpar**4)  if iabs=4
    !     calculates   (1+nu*kpar**2)*a  if iabs=5
    !     calculates   nu*kpar**2 *a     if iabs=6
    !     calculates   a/kpar (for kpar.ne.0.) if iabs=7
    !     and  f/(1+s_par**4*kpar**4)    if iabs=13
    !
    use gryffin_layouts
    use redistribute
    use itg_data, only: gpar, iperiod, s_par
    use bounds, only: ndom, scon, mmcon, nncon, &
                      mcon, ncon, conpos, nconz
    use fft_work
    use constants, only: ii
    use gryffin_grid, only: ld, ldb, nd
    use gryffin_redist, only: kpar_redist_type, init_kpar_redist

    type (kpar_redist_type), dimension(:), allocatable, save :: kmap

    integer i, j, k, l, m, n, iabs, kidx, m0, lmax

    real, intent (in) :: nu

    complex, dimension(:,m_low:,n_low:) :: f
!
! Change next line only if you have checked both cases: iperiod = 2 or 3
! since a and b are used differently in those two cases.
! (Each is allocated only once for iperiod = 2, many times for iperiod = 3)

    complex, dimension(:,:), allocatable, save :: a, b
    real, dimension(:), allocatable, save :: work, ttablekp

    real :: scale, tol_loc, sgpar
    integer :: isign, nwork, ntablekp, mdloc, ndloc
    integer :: k_low, k_high, k_alloc

    logical :: first = .true.

    tol_loc = 1.e-12

    mdloc = m_high-m_low+1
    ndloc = n_high-n_low+1

    if(iabs <= 0) then
       sgpar=gpar
    else if(iabs < 3 .or. iabs == 13) then
       sgpar=abs(gpar)
    else
       sgpar = 1.
    endif

    !
    !     easiest to use switch for iperiod=3
    !
    if (iperiod == 3) then

       ! FFT work array: 
       nwork=ccfft_work0 + ccfft_work1*ldb*(nconz+1)

       ! trig table: 
       ntablekp = ccfft_table0 + ccfft_table1*ldb*(nconz+1)

       if(first) then
          allocate (work(nwork))
          allocate (kmap(m_low:m_alloc))
       endif

       do m0=m_low, m_high

! start parallel section here
          if (first) call init_kpar_redist(0, ldb, m0, &
               ncon(m0,:), ndom(m0), scon(m0,:), mmcon(m0,:,:), &
               nncon(m0,:,:), mcon, conpos, kmap(m0))
          
          k_low = kmap(m0)%k_lo%llim_proc
          k_high = kmap(m0)%k_lo%ulim_proc
          k_alloc = kmap(m0)%k_lo%ulim_alloc
          
          allocate (a(ld*nconz-1, k_low:k_alloc))
          allocate (b(ld*nconz-1, k_low:k_alloc))          

          if (first) then
             b = 0.
             do j = 1, kmap(m0)%k_lo%ndom   
                lmax = ldb*scon(m0,j)
                scale=1.0/sqrt(real(lmax))
                call ccfft(0, lmax, scale, b(1:lmax,k_low), &
                     b(1:lmax,k_low), tablekp(1:ntablekp,j,m0), work, 0) 
             enddo
          endif
         
          call gather (kmap(m0)%connect, f, a)

          isign=-1
          do kidx=k_low, k_high
             if (kmap(m0)%k_lo%skip(kidx)) cycle
             j = kmap(m0)%k_lo%j(kidx)
             lmax = kmap(m0)%k_lo%lmax(kidx)
             scale=kmap(m0)%k_lo%scale(kidx)
             call ccfft(isign, lmax, scale, a(1:lmax,kidx), &
                  b(1:lmax,kidx), tablekp(1:ntablekp,j,m0), work, 0)
          enddo

          do kidx=k_low, k_high
             if (kmap(m0)%k_lo%skip(kidx)) cycle
             j = kmap(m0)%k_lo%j(kidx)
             lmax = kmap(m0)%k_lo%lmax(kidx)
             select case (iabs)
             case (-1) 
                do l=1,lmax
                   a(l,kidx)=ii*kpar(l,j,m0)*b(l,kidx)
                enddo
             case (0)
                do l=1,lmax
                   a(l,kidx)=kpar(l,j,m0)*b(l,kidx)
                enddo
             case (1)
                do l=1,lmax
                   a(l,kidx)=abs(kpar(l,j,m0))*b(l,kidx)
                enddo
             case (2)
                do l=1,lmax
                   a(l,kidx)=-ii*abs(kpar(l,j,m0))*b(l,kidx)
                enddo
             case (3)
                do l=1,lmax
                   a(l,kidx)=b(l,kidx)/(1.+nu*kpar(l,j,m0)**2*gpar**2)
                enddo
             case (4)
                do l=1,lmax
                   a(l,kidx)=b(l,kidx)/(1.+nu*kpar(l,j,m0)**4*gpar**4)
                enddo
             case (5) 
                do l=1,lmax
                   a(l,kidx)=(1.+nu*kpar(l,j,m0)**2*gpar**2)*b(l,kidx)
                enddo
             case (6)
                do l=1,lmax
                   a(l,kidx)=(nu*kpar(l,j,m0)**2*gpar**2)*b(l,kidx)
                enddo
             case (7)
                do l=1,lmax
                   if (abs(kpar(l,j,m0)*gpar) > tol_loc) then
                      a(l,kidx)=b(l,kidx)/(kpar(l,j,m0)*gpar)
                   else 
                      a(l,kidx)=0.
                   endif
                enddo
             case (13)
                do l=1,lmax
                   a(l,kidx)=b(l,kidx)/(1+(sgpar*kpar(l,j,m0)*s_par)**4)
                enddo
             end select
          enddo
          
          isign=1
          do kidx=k_low, k_high
             if (kmap(m0)%k_lo%skip(kidx)) cycle
             j = kmap(m0)%k_lo%j(kidx)
             lmax = kmap(m0)%k_lo%lmax(kidx)
             scale = kmap(m0)%k_lo%scale(kidx)
             call ccfft(isign, lmax, scale, a(1:lmax,kidx), &
                  b(1:lmax,kidx), tablekp(1:ntablekp,j,m0), work, 0)
          enddo

          call scatter (kmap(m0)%connect, b, f)

          if(iabs < 3) then
             do j = 1, kmap(m0)%k_lo%ndom
                do k=1,ncon(m0,j)*scon(m0,j)
                   m=mmcon(m0,j,k)
                   n=nncon(m0,j,k)
                   if (idx_local(m_lo, m, n)) then
                      do l=1,ldb
                         f(l,m,n)=f(l,m,n)*sgpar
                      enddo
                   endif
                enddo
             enddo
          endif

          deallocate (a, b)
       enddo
    else
!
!     non-connected way for iperiod=0,1,2
!
       scale=1.0/sqrt(float(ldb))

       ! FFT work array: 
       nwork=ccfft_work0 + ccfft_work1*ldb

       ! trig table: 
       ntablekp = ccfft_table0 + ccfft_table1*ldb

       if(first) then
          allocate (work(nwork), ttablekp(ntablekp))
          allocate (a(ld, mdloc*ndloc))
          allocate (b(ld, mdloc*ndloc))
          b = 0.
          call ccfft(0, ldb, scale, b(1:ldb,1), b(1:ldb,1), &
               ttablekp, work, 0)
       endif

       do n=n_low, n_high
          do m=m_low, m_high
             kidx=(m-m_low+1)+(n-n_low)*mdloc
             do l=1,ldb
                a(l,kidx)=f(l,m,n)
             enddo
          enddo
       enddo

       isign=-1
       do kidx=1,mdloc*ndloc
          call ccfft(isign, ldb, scale, a(1:ldb,kidx), b(1:ldb,kidx), ttablekp, work, 0)
       enddo

       do kidx=1,mdloc*ndloc
          select case (iabs)
             case (-1)
                do l=1,ldb
                   a(l,kidx)=ii*kpar(l,1,1)*b(l,kidx)
                enddo
             case (0)
                do l=1,ldb
                   a(l,kidx)=kpar(l,1,1)*b(l,kidx)
                enddo
             case (1)
                do l=1,ldb
                   a(l,kidx)=abs(kpar(l,1,1))*b(l,kidx)
                enddo
             case (2)
                do l=1,ldb
                   a(l,kidx)=-ii*abs(kpar(l,1,1))*b(l,kidx)
                enddo
             case (3)
                do l=1,ldb
                   a(l,kidx)=b(l,kidx)/(1.+nu*kpar(l,1,1)**2*gpar**2)
                enddo
             case (4)
                do l=1,ldb
                   a(l,kidx)=b(l,kidx)/(1.+nu*kpar(l,1,1)**4*gpar**4)
                enddo
             case (5)
                do l=1,ldb
                   a(l,kidx)=(1.+nu*kpar(l,1,1)**2*gpar**2)*b(l,kidx)
                enddo
             case (6)
                do l=1,ldb
                   a(l,kidx)=(nu*kpar(l,1,1)**2*gpar**2)*b(l,kidx)
                enddo
             case (7)
                do l=1,ldb
                   if (abs(kpar(l,1,1)*gpar) > tol_loc) then
                      a(l,kidx)=b(l,kidx)/(kpar(l,1,1)*gpar)
                   else 
                      a(l,kidx)=0.
                   endif
                enddo
             case (13)
                do l=1,ldb
                   a(l,kidx)=b(l,kidx)/(1+(sgpar*kpar(l,1,1)*s_par)**4)
                enddo
          end select
       enddo

       isign=1

       do kidx=1,mdloc*ndloc
          call ccfft(isign, ldb, scale, a(1:ldb,kidx), b(1:ldb,kidx), ttablekp, work, 0)
       enddo

       if(iabs < 10) then
          do n=n_low, n_high
             do m=m_low, m_high
                kidx=(m-m_low+1)+(n-n_low)*mdloc
                do l=1,ldb
                   f(l,m,n)=b(l,kidx)*sgpar
                enddo
             enddo
          enddo
       else
          do n=n_low, n_high
             do m=m_low, m_high
                kidx=(m-m_low+1)+(n-n_low)*mdloc
                do l=1,ldb
                   f(l,m,n)=b(l,kidx)
                enddo
             enddo
          enddo
       endif

    endif

    first = .false.

  end subroutine kparffte

  subroutine filter(density1, u_par1, t_par1, q_par1, t_perp1, q_perp1, dt_loc)
!
!     (c) Copyright 1991 to 1998 by Michael A. Beer, William D. Dorland, 
!     P. B. Snyder, Q. P. Liu, and Gregory W. Hammett. ALL RIGHTS RESERVED.
!     
    use itg_data, only: iflr, icrit, nspecies, rho, rmu1, iexp
    use itg_data, only: nparmom, nperpmom, rdiff
    use gryffin_layouts
    use gryffin_grid, only: ldb, rkx2, rkperp2
    
    complex, dimension(:,m_low:,n_low:,:) &
         :: density1, u_par1, t_par1, q_par1, t_perp1, q_perp1
    real, dimension(m_low:) :: dt_loc
    
    real, dimension(ldb, m_low:m_alloc, &
         n_low:n_alloc, nspecies) :: pfilterf, pfiltern
    
! declare other local variables
    integer n,m,l,i
    real rkp2
!
!       If iflr=6, use k^2 damping on all moments, like Ron
!
    if (iflr == 6) then          
       if(icrit /= 0) write(*,*) 'update the filter subroutine'
       do i=1,nspecies
          do n = n_low, n_high
             do m = m_low, m_high
                do l=1,ldb
!                 pfiltern(l,m,n,i)=1./(1.+dt_loc(m)*rmu1*rkx2(l,m,n)*rho(i)**2)
                   pfilterf(l,m,n,i)=1./(1.+dt_loc(m)*rmu1*rkx2(l,m,n)*rho(i)**2)
                enddo
             enddo
          enddo
       enddo
    else 
       do i=1,nspecies
          do n = n_low, n_high
             do m = m_low, m_high
                do l=1,ldb
                   rkp2=rkperp2(l,m,n)*rho(i)**2
                   pfiltern(l,m,n,i)=1./(1.+dt_loc(m)*rdiff(m,n)*rkp2**iexp)
                   pfilterf(l,m,n,i)=1./(1.+dt_loc(m)*rdiff(m,n)*rkp2)
                enddo
             enddo
          enddo
       enddo
    endif
    
    do n = n_low, n_high
       do m = m_low, m_high
          do l=1,ldb
             density1(l,m,n,:)=density1(l,m,n,:)*pfiltern(l,m,n,:)
!           density1(l,m,n,:)=density1(l,m,n,:)*pfilterf(l,m,n,:)
             u_par1(l,m,n,:)=u_par1(l,m,n,:)*pfilterf(l,m,n,:)
             t_par1(l,m,n,:)=t_par1(l,m,n,:)*pfilterf(l,m,n,:)
             t_perp1(l,m,n,:)=t_perp1(l,m,n,:)*pfilterf(l,m,n,:)
          enddo
       enddo
    enddo
    
    if(nparmom >= 4) then
       do n = n_low, n_high
          do m = m_low, m_high
             do l=1,ldb
                q_par1(l,m,n,:)=q_par1(l,m,n,:)*pfilterf(l,m,n,:)
             enddo
          enddo
       enddo
    endif
    
    if(nperpmom >= 2) then
       do n = n_low, n_high
          do m = m_low, m_high        
             do l=1,ldb
                q_perp1(l,m,n,:)=q_perp1(l,m,n,:)*pfilterf(l,m,n,:)
             enddo
          enddo
       enddo
    endif
    
  end subroutine filter

end module linear