module gryffin_redist ! ! (c) Copyright 1991 to 1998 by Michael A. Beer, William D. Dorland, ! P. B. Snyder, Q. P. Liu, and Gregory W. Hammett. ALL RIGHTS RESERVED. ! use redistribute, only: redist_type implicit none private ! nlps redistribution: public :: init_gryffin_redist, delete_gryffin_redist public :: x_layout, y_layout public :: gryffin_redist_type ! iperiod=2 kpar redistribution public :: ky_is_zero public :: init_fill_ends, gryffin_fill_type ! iperiod=3 kpar redistribution public :: init_kpar_redist, delete_kpar_redist public :: kpar_layout public :: kpar_redist_type type :: y_layout integer :: iproc integer :: lmax, md, nd, malias, nalias integer :: llim_world, ulim_world, llim_proc, ulim_proc, blocksize, ulim_alloc end type y_layout type :: x_layout integer :: iproc integer :: lmax, md, nd, malias, nalias, nddp, nadd integer :: llim_world, ulim_world, llim_proc, ulim_proc, blocksize, ulim_alloc end type x_layout type :: gryffin_redist_type type (x_layout) :: x_lo type (y_layout) :: y_lo type (redist_type) :: kx2x, x2y end type gryffin_redist_type type :: gryffin_fill_type type (redist_type) :: fill ! could put more stuff in here for debugging, etc. end type gryffin_fill_type type :: kpar_layout integer :: iproc, iproc_group, mproc integer :: ndom, ncontot, nspecies integer :: llim_group, ulim_group, llim_proc, ulim_proc, blocksize, ulim_alloc integer, dimension(:), pointer :: j, lmax, ip_list logical, dimension(:), pointer :: skip real, dimension(:), pointer :: scale end type kpar_layout type :: kpar_redist_type type (kpar_layout) :: k_lo type (redist_type) :: connect end type kpar_redist_type contains subroutine init_gryffin_redist (lmax, md, nd, malias, nalias, nddp, nadd, r) use mp, only: iproc, nproc, proc0 use redistribute use gryffin_layouts, only: m_lo, proc_id, idx_local implicit none integer, intent (in) :: lmax, md, nd, malias, nalias, nddp, nadd type (gryffin_redist_type), intent (out) :: r type (index_list_type), dimension(0:nproc-1) :: to_list, from_list integer, dimension(0:nproc-1) :: nn_to, nn_from integer :: l, m, n, xidx, yidx integer :: nn, ip, to_low integer, dimension(3) :: from_low ! Set up and save data layout for y transforms r%y_lo%iproc = iproc r%y_lo%lmax = lmax r%y_lo%md = md r%y_lo%malias = malias r%y_lo%nd = nd r%y_lo%nalias = nalias r%y_lo%llim_world = 0 r%y_lo%ulim_world = lmax*nalias - 1 r%y_lo%blocksize = r%y_lo%ulim_world/nproc + 1 r%y_lo%llim_proc = r%y_lo%blocksize*iproc r%y_lo%ulim_proc = min(r%y_lo%ulim_world, & r%y_lo%llim_proc + r%y_lo%blocksize - 1) r%y_lo%ulim_alloc = max(r%y_lo%llim_proc, r%y_lo%ulim_proc) ! Set up and save data layout for x transforms r%x_lo%iproc = iproc r%x_lo%lmax = lmax r%x_lo%md = md r%x_lo%malias = malias r%x_lo%nd = nd r%x_lo%nalias = nalias r%x_lo%nddp = nddp r%x_lo%nadd = nadd r%x_lo%llim_world = 0 r%x_lo%ulim_world = lmax*md - 1 r%x_lo%blocksize = r%x_lo%ulim_world/nproc + 1 r%x_lo%llim_proc = r%x_lo%blocksize*iproc r%x_lo%ulim_proc = min(r%x_lo%ulim_world, & r%x_lo%llim_proc + r%x_lo%blocksize - 1) r%x_lo%ulim_alloc = max(r%x_lo%llim_proc, r%x_lo%ulim_proc) ! count number of elements to be redistributed to/from each processor ! in first redistribution (for x transforms). nn_to = 0 nn_from = 0 do n = m_lo%nl_world, m_lo%nu_world do m = m_lo%ml_world, m_lo%mu_world do l = 1, lmax xidx = l-1 + (m-1)*lmax if (idx_local(m_lo,m,n)) then ip = xproc_id(r, xidx) nn_from(ip) = nn_from(ip) + 1 end if if (xidx_local(r,xidx)) then ip = proc_id(m_lo, m, n) nn_to(ip) = nn_to(ip) + 1 end if end do end do end do ! Allocate arrays for lists of indices that will be passed do ip = 0, nproc-1 if (nn_from(ip) > 0) then allocate (from_list(ip)%first(nn_from(ip))) allocate (from_list(ip)%second(nn_from(ip))) allocate (from_list(ip)%third(nn_from(ip))) end if if (nn_to(ip) > 0) then allocate (to_list(ip)%first(nn_to(ip))) allocate (to_list(ip)%second(nn_to(ip))) end if end do ! get local indices of elements redistributed to/from other processors nn_to = 0 nn_from = 0 do n = m_lo%nl_world, m_lo%nu_world do m = m_lo%ml_world, m_lo%mu_world do l = 1, lmax xidx = l-1 + (m-1)*lmax if (idx_local(m_lo,m,n)) then ip = xproc_id(r,xidx) nn = nn_from(ip) + 1 nn_from(ip) = nn from_list(ip)%first(nn) = l from_list(ip)%second(nn) = m from_list(ip)%third(nn) = n end if if (xidx_local(r,xidx)) then ip = proc_id(m_lo,m,n) nn = nn_to(ip) + 1 nn_to(ip) = nn ! complication is related to dealiasing if (n <= nddp) then to_list(ip)%first(nn) = n else to_list(ip)%first(nn) = n + nadd endif to_list(ip)%second(nn) = xidx end if end do end do end do from_low(1) = 1 from_low(2) = m_lo%ml_proc from_low(3) = m_lo%nl_proc to_low = r%x_lo%llim_proc call init_redist (r%kx2x, 'c', to_low, to_list, from_low, from_list) call delete_list(to_list) call delete_list(from_list) ! Now do it again for the redistribution for the y transforms ! count number of elements to be redistributed to/from each processor ! for second redistribution nn_to = 0 nn_from = 0 do m = m_lo%ml_world, m_lo%mu_world do n = 1, nalias do l = 1, lmax yidx = l-1 + (n-1)*lmax xidx = l-1 + (m-1)*lmax if (xidx_local(r,xidx)) then ip = yproc_id(r, yidx) nn_from(ip) = nn_from(ip) + 1 end if if (yidx_local(r,yidx)) then ip = xproc_id(r, xidx) nn_to(ip) = nn_to(ip) + 1 end if end do end do end do do ip = 0, nproc-1 if (nn_from(ip) > 0) then allocate (from_list(ip)%first(nn_from(ip))) allocate (from_list(ip)%second(nn_from(ip))) end if if (nn_to(ip) > 0) then allocate (to_list(ip)%first(nn_to(ip))) allocate (to_list(ip)%second(nn_to(ip))) end if end do ! get local indices of elements redistributed to/from other processors nn_to = 0 nn_from = 0 do m = m_lo%ml_world, m_lo%mu_world do n = 1, nalias do l = 1, lmax yidx = l-1 + (n-1)*lmax xidx = l-1 + (m-1)*lmax if (xidx_local(r,xidx)) then ip = yproc_id(r,yidx) nn = nn_from(ip) + 1 nn_from(ip) = nn from_list(ip)%first(nn) = n from_list(ip)%second(nn) = xidx end if if (yidx_local(r,yidx)) then ip = xproc_id(r,xidx) nn = nn_to(ip) + 1 nn_to(ip) = nn to_list(ip)%first(nn) = m to_list(ip)%second(nn) = yidx end if end do end do end do from_low(1) = 1 from_low(2) = r%x_lo%llim_proc to_low = r%y_lo%llim_proc call init_redist (r%x2y, 'c', to_low, to_list, from_low(1:2), from_list) call delete_list(to_list) call delete_list(from_list) end subroutine init_gryffin_redist subroutine delete_gryffin_redist (r) use mp, only: nproc use redistribute, only: delete_redist implicit none type (gryffin_redist_type), intent (in out) :: r integer :: i r%y_lo%iproc = -1 r%y_lo%lmax = -1 r%x_lo%iproc = -1 r%x_lo%lmax = -1 call delete_redist(r%kx2x) call delete_redist(r%x2y) end subroutine delete_gryffin_redist function yproc_id (r, idx) implicit none integer :: yproc_id type (gryffin_redist_type), intent (in) :: r integer, intent (in) :: idx yproc_id = idx/r%y_lo%blocksize end function yproc_id function yidx_local (r, idx) implicit none logical :: yidx_local type (gryffin_redist_type), intent (in) :: r integer, intent (in) :: idx yidx_local = r%y_lo%iproc == yproc_id(r,idx) end function yidx_local function xproc_id (r, idx) implicit none integer :: xproc_id type (gryffin_redist_type), intent (in) :: r integer, intent (in) :: idx xproc_id = idx/r%x_lo%blocksize end function xproc_id function xidx_local (r, idx) implicit none logical :: xidx_local type (gryffin_redist_type), intent (in) :: r integer, intent (in) :: idx xidx_local = r%x_lo%iproc == xproc_id(r,idx) end function xidx_local function ky_is_zero (r, idx) implicit none logical :: ky_is_zero type (gryffin_redist_type), intent (in) :: r integer, intent (in) :: idx ky_is_zero = idx < r%x_lo%lmax end function ky_is_zero subroutine init_fill_ends (e, mrr, nrr, ninv, b1, znshift, & l_left, l_right, ld, n_max, nd) use constants, only: ii use mp, only: iproc, nproc use redistribute use gryffin_layouts, only: m_lo, proc_id, idx_local type (gryffin_fill_type), intent (out) :: e integer, dimension(:,:), intent (in) :: mrr integer, dimension(:), intent (in) :: nrr integer, intent (in) :: l_left, l_right, ld, n_max, nd integer, dimension(-nd:), intent (in) :: ninv real, intent (in) :: b1, znshift type (index_list_type), dimension(0:nproc-1) :: to_list, from_list integer, dimension(0:nproc-1) :: nn_to, nn_from integer, dimension(3) :: from_low, to_low real :: beta1 integer :: l, m, n, mr, nr, nshift, ipfrom, ipto, ip, nn ! Figure out where data will be going to/from: nn_to = 0 nn_from = 0 do n = m_lo%nl_world, m_lo%nu_world do m = m_lo%ml_world, m_lo%mu_world nr=nrr(n) mr=mrr(m,n) beta1=mr*b1 nshift=mr*znshift ! If there is data to be shifted, continue if ((mr == 0).or.((abs(nr+nshift) <= n_max) .and. (nshift > 0))) then ! This data will be coming from processor ipfrom: ipfrom = proc_id(m_lo, m, ninv(nr+nshift)) ! and going to processor ipto: ipto = proc_id(m_lo, m, n) ! if (iproc == ipto), then guard cell data will be coming to here from processor ipfrom if (iproc == ipto) then do l = 1, l_left - 1 nn_to(ipfrom) = nn_to(ipfrom) + 1 enddo endif ! if (iproc == ipfrom) then physical data will going from here to processor ipto if (iproc == ipfrom) then do l = 1, l_left - 1 nn_from(ipto) = nn_from(ipto) + 1 enddo endif endif ! ditto if ((mr == 0).or.((abs(nr-nshift) <= n_max) .and. (nshift > 0))) then ipfrom = proc_id(m_lo, m, ninv(nr-nshift)) ipto = proc_id(m_lo, m, n) if (iproc == ipto) then do l = l_right, ld nn_to(ipfrom) = nn_to(ipfrom) + 1 enddo endif if (iproc == ipfrom) then do l = l_right, ld nn_from(ipto) = nn_from(ipto) + 1 enddo endif endif enddo enddo do ip = 0, nproc-1 if (nn_to(ip) > 0) then allocate (to_list(ip)%first(nn_to(ip))) allocate (to_list(ip)%second(nn_to(ip))) allocate (to_list(ip)%third(nn_to(ip))) end if if (nn_from(ip) > 0) then allocate (from_list(ip)%first(nn_from(ip))) allocate (from_list(ip)%second(nn_from(ip))) allocate (from_list(ip)%third(nn_from(ip))) end if end do nn_to = 0 nn_from = 0 do n=m_lo%nl_world, m_lo%nu_world do m=m_lo%ml_world, m_lo%mu_world nr=nrr(n) mr=mrr(m,n) beta1=mr*b1 nshift=mr*znshift if ((mr == 0).or.((abs(nr+nshift) <= n_max) .and. (nshift > 0))) then ipfrom = proc_id(m_lo, m, ninv(nr+nshift)) ipto = proc_id(m_lo, m, n) if (iproc == ipto) then do l = 1, l_left - 1 nn = nn_to(ipfrom) + 1 nn_to(ipfrom) = nn to_list(ipfrom)%first(nn) = l to_list(ipfrom)%second(nn) = m to_list(ipfrom)%third(nn) = n enddo endif if (iproc == ipfrom) then do l = 1, l_left - 1 nn = nn_from(ipto) + 1 nn_from(ipto) = nn from_list(ipto)%first(nn) = l + l_right - l_left from_list(ipto)%second(nn) = m from_list(ipto)%third(nn) = ninv(nr+nshift) enddo endif endif if ((mr == 0).or.((abs(nr-nshift) <= n_max) .and. (nshift > 0))) then ipfrom = proc_id(m_lo, m, ninv(nr-nshift)) ipto = proc_id(m_lo, m, n) if (iproc == ipto) then do l = l_right, ld nn = nn_to(ipfrom) + 1 nn_to(ipfrom) = nn to_list(ipfrom)%first(nn) = l to_list(ipfrom)%second(nn) = m to_list(ipfrom)%third(nn) = n enddo endif if (iproc == ipfrom) then do l = l_right, ld nn = nn_from(ipto) + 1 nn_from(ipto) = nn from_list(ipto)%first(nn) = l - l_right + l_left from_list(ipto)%second(nn) = m from_list(ipto)%third(nn) = ninv(nr-nshift) enddo endif endif enddo enddo from_low(1) = 1 from_low(2) = m_lo%ml_proc from_low(3) = m_lo%nl_proc to_low(1) = 1 to_low(2) = m_lo%ml_proc to_low(3) = m_lo%nl_proc call init_fill (e%fill, 'c', to_low, to_list, from_low, from_list) call delete_list(to_list) call delete_list(from_list) end subroutine init_fill_ends subroutine init_kpar_redist (nspecies, lmax, m0, ncon, ndom, scon, & mmcon, nncon, mcon, conpos, kp) use mp, only: iproc, nproc, proc0 use redistribute use gryffin_layouts, only: m_lo, proc_id, idx_local implicit none integer, intent (in) :: nspecies, lmax, m0, ndom integer, dimension(:), intent (in) :: ncon, scon integer, dimension(:,:), intent (in) :: mmcon, nncon, mcon, conpos type (kpar_redist_type), intent (out) :: kp type (index_list_type), dimension(0:nproc-1) :: to_list, from_list integer, dimension(0:nproc-1) :: nn_to, nn_from, ip_list integer :: i, j, k, l, m, n, kidx, mproc, iproc_group integer :: nn, ip, to_low, ncontot, ipfrom, ipto integer, dimension(:), allocatable :: from_low integer :: nsp ! ! No guarantee that there will not be communication in the limit of ! each proc coming in with a different n and no connections, ! particularly with multiple ion species. ! ! Some coding to guarantee better alignment between incoming ! and outgoing procs will eventually be needed. ! if (nspecies > 0) then allocate (from_low(4)) nsp = nspecies else allocate (from_low(3)) nsp = 1 endif ncontot = 0 do j = 1, ndom ncontot = ncontot + ncon(j) enddo ! Get a list of all procs in this group ! (i.e., those that have the same value of m0). ip_list = -1 mproc = 0 do n = m_lo%nl_world, m_lo%nu_world ip = proc_id (m_lo, m0, n) if (any(ip_list == ip)) cycle ip_list(mproc) = ip mproc = mproc + 1 enddo ! ! Find the position of the present processor in that list ! do ip = 0, mproc - 1 if (ip_list(ip) == iproc) iproc_group = ip enddo ! Set up and save data layout for connected kpar transforms kp%k_lo%ndom = ndom kp%k_lo%ncontot = ncontot kp%k_lo%iproc = iproc kp%k_lo%iproc_group = iproc_group kp%k_lo%mproc = mproc kp%k_lo%nspecies = nsp kp%k_lo%llim_group = 0 kp%k_lo%ulim_group = ndom*ncontot*nsp - 1 kp%k_lo%blocksize = kp%k_lo%ulim_group/mproc + 1 kp%k_lo%llim_proc = kp%k_lo%blocksize*iproc_group kp%k_lo%ulim_proc = min(kp%k_lo%ulim_group, & kp%k_lo%llim_proc + kp%k_lo%blocksize - 1) kp%k_lo%ulim_alloc = max(kp%k_lo%llim_proc, kp%k_lo%ulim_proc) allocate (kp%k_lo%skip (kp%k_lo%llim_proc:kp%k_lo%ulim_alloc)) allocate (kp%k_lo%lmax (kp%k_lo%llim_proc:kp%k_lo%ulim_alloc)) allocate (kp%k_lo%j (kp%k_lo%llim_proc:kp%k_lo%ulim_alloc)) allocate (kp%k_lo%scale(kp%k_lo%llim_proc:kp%k_lo%ulim_alloc)) allocate (kp%k_lo%ip_list(0:mproc-1)) kp%k_lo%ip_list = ip_list(0:mproc-1) ! count number of elements to be redistributed to/from each processor nn_to = 0 nn_from = 0 ! Load balancing could be a problem as a result of this initialization. do j = 1, kp%k_lo%ndom do k=1,ncon(j)*scon(j) do i=1, nsp m=mmcon(j,k) n=nncon(j,k) kidx=(j-1)+(mcon(m,n)-1)*ndom+(i-1)*ncontot*ndom ipfrom = proc_id (m_lo, m, n) ipto = kproc_id (kp, kidx) if (iproc == ipfrom) then do l=1,lmax nn_from(ipto) = nn_from(ipto) + 1 enddo endif if (iproc == ipto) then do l=1,lmax nn_to(ipfrom) = nn_to(ipfrom) + 1 enddo endif enddo enddo enddo ! Allocate arrays for lists of indices that will be passed do ip = 0, nproc-1 if (nn_from(ip) > 0) then allocate (from_list(ip)%first(nn_from(ip))) allocate (from_list(ip)%second(nn_from(ip))) allocate (from_list(ip)%third(nn_from(ip))) if (nspecies > 0) allocate (from_list(ip)%fourth(nn_from(ip))) end if if (nn_to(ip) > 0) then allocate (to_list(ip)%first(nn_to(ip))) allocate (to_list(ip)%second(nn_to(ip))) end if end do ! get local indices of elements redistributed to/from other processors nn_to = 0 nn_from = 0 kp%k_lo%skip = .true. do j = 1, kp%k_lo%ndom do k=1,ncon(j)*scon(j) do i=1, nsp m=mmcon(j,k) n=nncon(j,k) kidx=(j-1)+(mcon(m,n)-1)*ndom+(i-1)*ncontot*ndom ipfrom = proc_id (m_lo, m, n) ipto = kproc_id (kp, kidx) if (iproc == ipfrom) then do l=1,lmax nn = nn_from(ipto) + 1 nn_from(ipto) = nn from_list(ipto)%first(nn) = l from_list(ipto)%second(nn) = m from_list(ipto)%third(nn) = n if(nspecies > 0) from_list(ipto)%fourth(nn) = i enddo endif if (iproc == ipto) then do l=1,lmax nn = nn_to(ipfrom) + 1 nn_to(ipfrom) = nn to_list(ipfrom)%first(nn) = l+lmax*(conpos(m,n)-1) to_list(ipfrom)%second(nn) = kidx enddo ! putting these in arrays should ease later optimizations for load balancing kp%k_lo%skip(kidx) = .false. kp%k_lo%j(kidx) = j kp%k_lo%scale(kidx) = 1.0/sqrt(real(lmax*scon(j))) kp%k_lo%lmax(kidx) = lmax*scon(j) endif enddo enddo enddo from_low(1) = 1 from_low(2) = m_lo%ml_proc from_low(3) = m_lo%nl_proc if(nspecies > 0) from_low(4) = 1 to_low = kp%k_lo%llim_proc call init_redist (kp%connect, 'c', to_low, to_list, from_low, from_list) call delete_list(to_list) call delete_list(from_list) end subroutine init_kpar_redist subroutine delete_kpar_redist (k) use redistribute, only: delete_redist type (kpar_redist_type), intent (in out) :: k call delete_redist(k%connect) end subroutine delete_kpar_redist function kproc_id (kp, idx) integer :: kproc_id type (kpar_redist_type), intent (in) :: kp integer, intent (in) :: idx integer :: mp mp = idx/kp%k_lo%blocksize kproc_id = kp%k_lo%ip_list(mp) end function kproc_id function kidx_local (kp, idx) logical :: kidx_local type (kpar_redist_type), intent (in) :: kp integer, intent (in) :: idx kidx_local = kp%k_lo%iproc == kproc_id(kp, idx) end function kidx_local end module gryffin_redist