!{\src2tex{textfont=tt}}
!!****f* ABINIT/cexch
!! NAME
!!  cexch
!!
!! FUNCTION
!!  Calculate and write the excitonic Hamiltonian on an external binary file (Fortran file open
!!  in random mode) for subsequent treatment in the Bethe-Salpeter code.
!!
!! COPYRIGHT
!! Copyright (C) 1992-2009 EXC group (L.Reining, V.Olevano, F.Sottile, S.Albrecht, G.Onida)
!! Copyright (C) 2009-2010 ABINIT group (L.Reining, V.Olevano, F.Sottile, S.Albrecht, G.Onida, M.Giantomassi)
!! This file is distributed under the terms of the
!! GNU General Public License, see ~abinit/COPYING
!! or http://www.gnu.org/copyleft/gpl.txt .
!! For the initials of contributors, see ~abinit/doc/developers/contributors.txt .
!!
!! INPUTS
!!  BSp<excparam>=The parameters for the Bethe-Salpeter calculation. 
!!  Dtfil<datafiles_type>=File names and unit numbers.
!!  BS_files<excfiles>=File names internally used in the BS code.
!!  usepaw=1 if PAW is used, 0 otherwise.
!!  nspinor=Number of spinorial components.
!!  nsppol=Number of independent spin polarizations.
!!  Cryst<crystal_structure>=Info on the crystalline structure.
!!  Kmesh<BZ_mesh_type>=The list of k-points in the BZ, IBZ and symmetry tables.
!!  Qmesh<BZ_mesh_type>=The list of q-points for epsilon^{-1} and related symmetry tables. 
!!  ktabr(nfftot_osc,BSp%nkbz)=The FFT index of $(R^{-1}(r-\tau))$ where R is symmetry needed to obtains 
!!    the k-points from the irreducible image.  Used to symmetrize u_Sk where S = \transpose R^{-1}
!!  Gsph_Max<gvectors_type>=Info on the G-sphere used to describe wavefunctions and W (the largest one is actually stored).  
!!  igfft(Bsp%npwvec,2*BSp%mG0(1)+1,2*BSp%mG0(2)+1,BSp%mG0(3)+1)=Index of G-G0 in the FFT grid. The first 
!!    dimension refers to G, the remainig three dimensions define G0.
!!  Vcp<coulombian_type>=The Coulomb interaction in reciprocal space. A cutoff can be used
!!  Er<epsilonm1_results>=Data type gathering info and data for the symmetrized inverse dielectric matrix.
!!  trans<transition>=For each transition, it gives the index of the k-point, valence and conduction index
!!    as well as the transition energy.
!!  transtab(BSp%nkbz,BSp%nbndv,BSp%nbndc)=Correspondence between the tupla (k,v,c) and the transition index in trans.
!!  nfftot_osc=Total Number of FFT points used for the oscillator matrix elements.
!!  ngfft_osc(18)=Info on the FFT algorithm used to calculate the oscillator matrix elements.
!!  MPI_enreg<MPI_type>=Info on the parallelism.
!!  Psps<Pseudopotential_type>=Variables related to pseudopotentials
!!  Pawtab(Psps%ntypat)<pawtab_type>=PAW tabulated starting data.
!!  Pawang<pawang_type>=PAW angular mesh and related data.
!!  Paw_pwff(Cryst%ntypat*usepaw)<Paw_pwff_type>=Form factor used to calculate the onsite mat. elements of a plane wave.
!!  Wfd<wfs_descriptor>=Handler for the wavefunctions.
!!  spaceComm=MPI communicator.
!!  prtvol=Verbosity level.
!!
!! OUTPUT
!!  The excitonic Hamiltonian is saved on an external binary file (see below).
!!
!! SIDE EFFECTS 
!!  When the routine returns Er%epsm1 contains W (calculation of W is done in-place without having to allocated extra memory) 
!!
!! NOTES
!!  1) Version for K_V = K_C (q=0), thus KP_V = KP_C
!!  2) normal fcc supercell in bulk
!!  3) Omega = 0 only
!!  No exchange limit: use LDA energies in case 
!!  4) Hermiticity is exploited
!!  5) Symmetry of H-k-k' = H*kk' not used
!!  6) Coulomb term can be approssimateed as diagonal in G
!!  7) Valence bands treated from lomo on
!!
!! spin structure
!! ----------------------------------- for qpol=0 v-+ can be obtained by v+-so:
!! | [diag-W+v]++     |      v+-     |  qpol = 0 nsppol = 1 -> nspinblock=1 
!! -----------------------------------  qpol = 0 nsppol = 2 -> nspinblock=3
!! |     v-+          | [diag-W+v]-- |  qpol \=0 nsppol = 2 -> nspinblock=4
!! -----------------------------------   
!!
!!
!! for each spin block  count the number of subblocks of the exciton hamiltonian to calculate
!!
!!          ! cblock=1  | cblock=2 |
!! ---------------------------------
!! rblock=1 | RESONANT  | COUPLING |  
!! ---------------------------------
!! rblock=2 | ANTICOUP  | ANTIRESO |
!! ---------------------------------
!! note: anticoup block is not used
!!
!!  The following files are used:
!!
!!  Input:
!!  Output (access direct):
!!    unit hreso_unt : h excitonic resonant Hamiltonian
!!    hcoup_unt: h excitonic coupling Hamiltonian
!!
!! PARENTS
!!
!! CHILDREN
!!
!! SOURCE

#if defined HAVE_CONFIG_H
#include "config.h"
#endif

#include "abi_common.h"

subroutine cexch(BSp,Dtfil,BS_files,usepaw,nspinor,nsppol,Cryst,Kmesh,Qmesh,ktabr,Gsph_Max,igfft,Vcp,&
& Wfd,Er,trans,transtab,nfftot_osc,ngfft_osc,MPI_enreg,Psps,Pawtab,Pawang,Paw_pwff,spaceComm,prtvol)

 use defs_basis
 use defs_datatypes
 use defs_abitypes
 use defs_gw
 use m_bs_defs
 use m_xmpi
 use m_errors

 use m_gwdefs,       only : czero_gw
 use m_io_tools,     only : get_unit
 use m_crystal,      only : crystal_structure
 use m_gsphere,      only : gvectors_type
 use m_coulombian,   only : coulombian_type
 use m_bz_mesh,      only : bz_mesh_type, get_BZ_item 
 use m_screening,    only : make_W
 use m_paw_pwij,     only : paw_pwff_type, paw_pwij_type, init_paw_pwij, destroy_paw_pwij, paw_rho_tw_g
 use m_wfs,          only : wfs_descriptor, wfd_get_ur, wfd_get_cprj, wfd_change_ngfft
 use m_oscillators,  only : rho_tw_g

!This section has been created automatically by the script Abilint (TD).
!Do not modify the following lines by hand.
 use interfaces_14_hidewrite
 use interfaces_51_manage_mpi
 use interfaces_53_abiutil
 use interfaces_69_bse, except_this_one => cexch
!End of the abilint section

 implicit none

!Arguments ------------------------------------
!scalars
 integer,intent(in) :: usepaw,nspinor,nsppol,nfftot_osc,spaceComm,prtvol
 type(excparam),intent(in) :: BSp
 type(excfiles),intent(in) :: BS_files
 type(MPI_type),intent(inout) :: MPI_enreg
 type(Epsilonm1_results),intent(inout) :: Er
 type(BZ_mesh_type),intent(in) :: Kmesh,Qmesh
 type(crystal_structure),intent(in) :: Cryst
 type(Coulombian_type),intent(in) :: Vcp
 type(Gvectors_type),intent(in) :: Gsph_Max
 type(datafiles_type),intent(in) :: Dtfil
 type(Pseudopotential_type),intent(in) :: Psps
 type(pawang_type),intent(in) :: Pawang
 type(wfs_descriptor),intent(inout) :: Wfd
!arrays
 integer,intent(in) :: ngfft_osc(18)
 integer,intent(in) :: transtab(BSp%nkbz,BSp%nbndv,BSp%nbndc)
 integer,intent(in) :: igfft(Bsp%npwvec,2*BSp%mG0(1)+1,2*BSp%mG0(2)+1,2*BSp%mG0(3)+1)
 integer,intent(in) :: ktabr(nfftot_osc,BSp%nkbz)
 type(transition),intent(in) :: trans(BSp%nh)
 type(Pawtab_type),intent(in) :: Pawtab(Psps%ntypat)
 type(Paw_pwff_type),intent(in) :: Paw_pwff(Psps%ntypat*usepaw)

!Local variables ------------------------------
!scalars
 integer,parameter :: tim_fourdp=0,map2sphere=1
 integer :: spad,i1,i2,nprocs
 integer :: paral_kgb,dim_rtwg,use_padfft
 integer :: my_rank,master,mgfft_osc
! integer :: hreso_unt,hcoup_unt,itemp
 integer :: nh,fftalga_osc
 integer :: ik_ibz,itim_k,isym_k
 integer :: iq_bz,iq_ibz,isym_q,itim_q,iqbz0 
 integer :: ig01,ig02,ig03,ierr
 integer :: iv,ic
 integer :: istat,hsize,isppol
!integer :: kstart=1
!integer :: kcstart=1
 integer :: recl4dpc
 !real(dp) :: q0vol,fcc_const
 complex(dpc) :: ph_mkt
 logical :: do_resonant,do_coupling
 logical :: hreso_exists
!logical :: hcoup_exists
 character(len=fnlen) :: fname
! character(len=500) :: msg
!arrays
 integer,allocatable :: igfftg0(:),task_distrib(:,:,:,:)
 integer,allocatable :: gbound(:,:)
 real(dp) :: qbz(3),spinrot_k(4)
 complex(gwpc),allocatable :: rhotwg1(:),rhxtwg_q0(:,:,:,:,:) 
 complex(gwpc),allocatable :: wfr1(:),wfr2(:)
 !type(Cprj_type),allocatable :: Cprj_k(:,:)
 type(Cprj_type),allocatable :: Cp1(:,:),Cp2(:,:)
 type(Paw_pwij_type),allocatable :: Pwij_q0(:)

!************************************************************************

 DBG_ENTER("COLL")

 ABI_CHECK(nsppol==1,"nsppol==2 not coded")
 ABI_CHECK(nspinor==1,"nspinor==2 not coded")
 ABI_CHECK(nfftot_osc==PRODUCT(ngfft_osc(1:3)),"mismatch in FFT size")

 master=0
 my_rank = xcomm_rank(spaceComm)
 nprocs  = xcomm_size(spaceComm)

 if ( ANY(ngfft_osc(1:3) /= Wfd%ngfft(1:3)) ) call wfd_change_ngfft(Wfd,Cryst,Psps,ngfft_osc) 

! Basic constants  
 dim_rtwg=1; paral_kgb=0
 recl4dpc = get_reclen("dpc")

! Analytic integration of 4pi/q^2 over the volume element:
! $4pi/V \int_V d^3q 1/q^2 =4pi bz_geometric_factor V^(-2/3)$
! i_sz=4*pi*bz_geometry_factor*q0_vol**(-two_thirds) where q0_vol= V_BZ/N_k
! bz_geometry_factor: sphere=7.79, fcc=7.44, sc=6.188, bcc=6.946, wz=5.255
! (see gwa.pdf, appendix A.4)

! If q=0 and C=V then set up rho-twiddle(G=0) to reflect an
! analytic integration of q**-2 over the volume element:
! <q**-2> = 7.44 V**(-2/3)   (for fcc cell)

! q0vol = (8.0*pi**3) / (Cryst%ucvol*BSp%nkbz)
! fcc_const = SQRT(7.44*q0vol**(-2.0/3.0))
! rtw = (6.0*pi**2/(Cryst%ucvol*BSp%nkbz))**(1./3.)
! Average of (q+q')**-2 integration for head of Coulomb matrix 
! INTRTW(QL) = (2*pi*rtw + pi*(rtw**2/QL-QL)*LOG((QL+rtw)/(QL-rtw)))
! &              * (Cryst%ucvol*BSp%nkbz)/(2*pi)**3. * QL*QL

! Only the upper triangle is stored in memory.
 nh=BSp%nh; hsize = (nh*nh + nh)/2

 do_resonant=.TRUE.
 inquire(file=BS_files%exh,exist=hreso_exists)

#if 0
 if (hreso_exists) then  ! if already exists

  hreso_unt = get_unit()
  open(unit=hreso_unt,file=BS_files%exh,access='direct',recl=recl4dpc)
  read(hreso_unt,rec=hsize+1) itemp

  if (itemp == nh) then
   msg='reading resonant exciton Hamiltonian from '//TRIM(BS_files%exh)
   call wrtout(std_out,msg,"COLL")
   read(hreso_unt,rec=hsize+2) kstart
   if (kstart < BSp%nkbz) then
    kstart = kstart + 1
    print *, 'starting from k ', kstart
    MSG_ERROR("Not coded")
   else
    do_resonant=.FALSE.
   end if
  else
   MSG_ERROR("wrong in.exh")
  end if

  close(hreso_unt)

 else
  call wrtout(std_out,' Calculating resonant term from scratch ',"COLL")
  do_resonant=.TRUE.
  kstart=1
 end if
#endif
      
 do_coupling=.TRUE.

#if 0
 if (BSp%COUPLING) then

  inquire(file='in.exc',exist=hcoup_exists)

  if (hcoup_exists) then
   hcoup_unt = get_unit()
   open(unit=hcoup_unt,file='in.exc',access='direct',recl=recl4dpc)
   read(hcoup_unt,rec=hsize+1) itemp

   if (itemp == nh) then
    call wrtout(std_out,' Reading coupling exciton Hamiltonian from in.exc',"COLL")
    read(hcoup_unt,rec=hsize+2) kcstart
    if (kcstart < BSp%nkbz) then
     do_coupling=.TRUE.
     kcstart = kcstart + 1
     print *, 'starting from k ', kcstart
    else
     do_coupling=.FALSE.
    end if
   else
    MSG_ERROR('ERROR: wrong in.exc')
   end if
   close(hcoup_unt)

  else ! not existent, save final results
   write(std_out,' Calculating coupling term',"COLL")
   do_coupling=.TRUE.
   kcstart = 1
  end if
 else
  do_coupling=.FALSE.
 end if
      
 if ((.not.do_resonant).and.(.not.do_coupling)) then 
  call wrtout(std_out," Hamiltonian is already on disk, nothing to do. Returning","COLL")
  RETURN
 end if
#endif

 if (usepaw==1) then
   !allocate(Cprj_k(Cryst%natom,nspinor*Bsp%nbnds)); call cprj_alloc(Cprj_k, 0,Wfd%nlmn_atm)
   allocate(Cp1(Wfd%natom,Wfd%nspinor)); call cprj_alloc(Cp1,0,Wfd%nlmn_atm)
   allocate(Cp2(Wfd%natom,Wfd%nspinor)); call cprj_alloc(Cp2,0,Wfd%nlmn_atm)
 end if

! ================================================================
!  Construct the screened interaction W in the irreducible wedge.
! * W(q,G1,G2) = vc^{1/2} (q,G1) e^{-1}(q,G1,G2) vc^{1/2) (q,G2)
! * Use Coulomb term for q-->0, 
! * Only the first small Q is used, shall we average if nqlwl>1?
! ================================================================
 if (BSp%COULOMBTERM) then 
   call make_W(Er,Vcp) !TODO mqmem==0, change info ER%, and update Hscr, clean treatment of epsm1 and different IDs
 end if

 call wrtout(std_out," Calculating all matrix elements for q=0 to save CPU time","COLL")

 allocate(wfr1(nfftot_osc*Wfd%nspinor))
 allocate(wfr2(nfftot_osc*Wfd%nspinor))

 ! Identify q==0
 iqbz0=0
 do iq_bz=1,Qmesh%nbz
   if (ALL(ABS(Qmesh%bz(:,iq_bz))<tol3)) iqbz0 = iq_bz
 end do
 ABI_CHECK(iqbz0/=0,"q=0 not found in q-point list!")

 ! * Get iq_ibz, and symmetries from iqbz0.
 call get_BZ_item(Qmesh,iqbz0,qbz,iq_ibz,isym_q,itim_q)

 if (usepaw==1) then ! Prepare onsite contributions at q==0
   allocate(Pwij_q0(Cryst%ntypat))
   call init_paw_pwij(Pwij_q0,BSp%npweps,Qmesh%bz(:,iqbz0),Gsph_Max%gvec,Cryst%rprimd,Dtfil,Psps,Pawtab,Paw_pwff)
 end if

 ig01 = BSp%mG0(1)+1 ! The index of G in the FFT mesh.
 ig02 = BSp%mG0(2)+1
 ig03 = BSp%mG0(3)+1

 allocate(igfftg0(BSp%npweps))
 igfftg0(1:BSp%npweps) = igfft(1:BSp%npweps,ig01,ig02,ig03)

 ! Evaluate the tables needed for the padded FFT performed in rhotwg.
 ! There is no need to shift the G-sphere as we only have vertical transitions.
 mgfft_osc   = MAXVAL(ngfft_osc(1:3))
 fftalga_osc = ngfft_osc(7)/100 !; fftalgc_osc=MOD(ngfft_osc(7),10)
 use_padfft=0; if (fftalga_osc==3) use_padfft=1  ! Padded FFT with FFTW3 is OK!

 allocate(gbound(2*mgfft_osc+8,2*use_padfft))
 if (use_padfft==1) call sphereboundary(gbound,1,Gsph_Max%gvec,mgfft_osc,BSp%npweps)

 allocate(rhotwg1(BSp%npweps))

 allocate(rhxtwg_q0(BSp%npweps,BSp%nbnds,BSp%nbnds,BSp%nkibz,nsppol), STAT=istat)
 ABI_CHECK(istat==0,"out-of-memory in rhxtwg_q0")
 rhxtwg_q0 = czero

 ! Distribute the calculation of the matrix elements.
 allocate(task_distrib(BSp%nbnds,BSp%nbnds,BSp%nkibz,nsppol))
 call fill_task_distrib_4D(nprocs,task_distrib)

 do isppol=1,nsppol
   do ik_ibz=1,BSp%nkibz ! loop over the k-points in IBZ

    if ( ALL(task_distrib(:,:,ik_ibz,isppol)/= my_rank) ) CYCLE

    itim_k=1; isym_k=1; ph_mkt=cone; spinrot_k=Cryst%spinrot(:,isym_k)

    !if (usepaw==1) then
    !  indx_kibz=nspinor*BSp%nbnds*(ik_ibz-1)+nspinor*BSp%nbnds*Kmesh%nibz*(isppol-1)
    !  ibsp=0
    !  do ib=1,BSp%nbnds
    !    call wfd_get_cprj(Wfd,ib,ik_ibz,isppol,Cryst,Cp1,sorted=.FALSE.)
    !    ibsp = ibsp + nspinor
    !    !call cprj_copy(Cp1,Cprj_k(:,ibsp:ibsp+(nspinor-1)))
    !  end do
    !end if

    do iv=1,BSp%nbnds ! Loop over band V
      if ( ALL(task_distrib(:,iv,ik_ibz,isppol)/=my_rank) ) CYCLE

      call wfd_get_ur(Wfd,iv,ik_ibz,isppol,wfr1)

      if (usepaw==1) call wfd_get_cprj(Wfd,iv,ik_ibz,isppol,Cryst,Cp1,sorted=.FALSE.)

      do ic=1,BSp%nbnds ! Loop over band C 
        if ( task_distrib(ic,iv,ik_ibz,isppol)/=my_rank ) CYCLE

        call wfd_get_ur(Wfd,ic,ik_ibz,isppol,wfr2)

        if (usepaw==1) call wfd_get_cprj(Wfd,ic,ik_ibz,isppol,Cryst,Cp2,sorted=.FALSE.)

        call rho_tw_g(paral_kgb,nspinor,BSp%npweps,nfftot_osc,ngfft_osc,map2sphere,use_padfft,igfftg0,gbound,&
&         wfr1,1,ktabr(:,1),ph_mkt,spinrot_k,&
&         wfr2,1,ktabr(:,1),ph_mkt,spinrot_k,&
&         dim_rtwg,rhotwg1,tim_fourdp,MPI_enreg)

        if (usepaw==1) then ! Add PAW onsite contribution.
          spad=(nspinor-1)
          i1=iv; if (nspinor==2) i1=(2*iv-1)
          i2=ic; if (nspinor==2) i2=(2*ic-1)
          call paw_rho_tw_g(Bsp%npweps,dim_rtwg,nspinor,Cryst%natom,Cryst%ntypat,Cryst%typat,Cryst%xred,Gsph_Max%gvec,&
&           Cp1,Cp2,Pwij_q0,rhotwg1)
!&           Cprj_k(:,i1:i1+spad),Cprj_k(:,i2:i2+spad),Pwij_q0,rhotwg1)
        end if

        ! If q=0 treat Exchange and Coulomb-term independently
        if (iv <= BSp%homo .and. ic <= BSp%homo .or. &
&           iv >  BSp%homo .and. ic >  BSp%homo) then

          if (iv/=ic) then !COULOMB term: C/=V: ignore them
            rhotwg1(1) = czero_gw
          else
            ! If q=0 and C=V then set up rho-twiddle(G=0) to reflect an
            ! analytic integration of q**-2 over the volume element:
            ! <q**-2> = 7.44 V**(-2/3)   (for fcc cell)
            !rhotwg1(1) = fcc_const * qpg(1,iqbz0)
            rhotwg1(1) = SQRT(Vcp%i_sz) / Vcp%vcqlwl_sqrt(1,1)
            !if (vcut) rhotwg1(1) = 1.0
          end if

        else
!         At present this term is set to zero
!         EXCHANGE term: limit value. 
!         Set up rho-twiddle(G=0) using small vector q instead of zero and k.BSp perturbation theory (see notes)
          rhotwg1(1) = czero_gw
        end if

        rhxtwg_q0(:,iv,ic,ik_ibz,isppol) = rhotwg1(:)
      end do ! ic
    end do ! iv
   end do ! ik_ibz
 end do ! isppol

 ! Gather matrix elements on each node.
 call xsum_mpi(rhxtwg_q0,spaceComm,ierr)

 deallocate(task_distrib)
 deallocate(rhotwg1)
 deallocate(igfftg0)
 deallocate(gbound, STAT=istat)
 deallocate(wfr1,wfr2)

 if (usepaw==1) then ! Optional deallocation for PAW.
   call destroy_paw_pwij(Pwij_q0); deallocate(Pwij_q0)
   !call cprj_free(Cprj_k); deallocate(Cprj_k)
   call cprj_free(Cp1); deallocate(Cp1)
   call cprj_free(Cp2); deallocate(Cp2)
 end if

! =============================
! === Resonant Hamiltonian ====
! =============================
 if (do_resonant) then   !TODO spin support is missing.
   fname=BS_files%exh
   call calc_exch(BSp,Dtfil,usepaw,nspinor,nsppol,Cryst,Kmesh,Qmesh,ktabr,Gsph_Max,igfft,Vcp,&
&    Wfd,Er,trans,transtab,nfftot_osc,ngfft_osc,Psps,Pawtab,Pawang,Paw_pwff,rhxtwg_q0(:,:,:,:,1),.TRUE.,&
&    fname,spaceComm,prtvol)
 end if 
      
! ============================
! === Coupling Hamiltonian ===
! ============================                                         
 if (do_coupling.and.BSp%COUPLING) then ! TODO spin support is missing. 
   fname='out.exc'
   call calc_exch(BSp,Dtfil,usepaw,nspinor,nsppol,Cryst,Kmesh,Qmesh,ktabr,Gsph_Max,igfft,Vcp,&
&    Wfd,Er,trans,transtab,nfftot_osc,ngfft_osc,Psps,Pawtab,Pawang,Paw_pwff,rhxtwg_q0(:,:,:,:,1),.FALSE.,&
&    fname,spaceComm,prtvol)
 end if 
      
 ! * Free memory.
 deallocate(rhxtwg_q0)

 DBG_EXIT("COLL")

end subroutine cexch
!!***
