!{\src2tex{textfont=tt}}
!!****f* ABINIT/bethe_salpeter
!! NAME
!!  bethe_salpeter
!!
!! FUNCTION
!!  Main routine to calculate dielectric properties by solving the Bethe-Salpeter equation in
!!  Frequency-Reciprocal space on a transition (electron-hole) basis set.
!!
!! COPYRIGHT
!! Copyright (C) 2009-2010 ABINIT group (M.Giantomassi, L. Reining, V. Olevano, F. Sottile, S. Albrecht, G. Onida)
!! This file is distributed under the terms of the
!! GNU General Public License, see ~abinit/COPYING
!! or http://www.gnu.org/copyleft/gpl.txt .
!! For the initials of contributors, see ~abinit/doc/developers/contributors.txt .
!!
!! INPUTS
!! acell(3)=Length scales of primitive translations (bohr)
!! codvsn=Code version
!! Dtfil<datafiles_type>=Variables related to files.
!! Dtset<dataset_type>=All input variables for this dataset.
!! iexit=Exit flag
!! Pawang<pawang_type)>=PAW angular mesh and related data.
!! Pawrad(ntypat*usepaw)<pawrad_type>=Paw radial mesh and related data.
!! Pawtab(ntypat*usepaw)<pawtab_type>=Paw tabulated starting data.
!! Psps<pseudopotential_type>=Variables related to pseudopotentials.
!!   Before entering the first time in the routine, a significant part of Psps has been initialized :
!!   the integers dimekb,lmnmax,lnmax,mpssang,mpssoang,mpsso,mgrid,ntypat,n1xccc,usepaw,useylm,
!!   and the arrays dimensioned to npsp. All the remaining components of Psps are to be initialized in
!!   the call to pspini. The next time the code enters bethe_salpeter, Psps might be identical to the
!!   one of the previous Dtset, in which case, no reinitialisation is scheduled in pspini.F90.
!! rprim(3,3)=Dimensionless real space primitive translations.
!! xred(3,natom)=Reduced atomic coordinates.
!!
!! Input files used during the calculation.
!!  KSS        : Kohn Sham electronic structure file.
!!  SCR (SUSC) : Files containing the symmetrized epsilon^-1 or the irreducible RPA polarizability,
!!               respectively. Used to construct the screening W.
!!  GW file    : Optional file with the GW QP corrections.
!!
!! OUTPUT
!!  Output is written on the main output file and on the following external files:
!!   * _RPA_NLF_MDF: macroscopic RPA dielectric function without non-local field effects.
!!   * _GW_NLF_MDF: macroscopic RPA dielectric function without non-local field effects calculated
!!                 with GW energies or the scissors operator.
!!   * _EXC_MDF: macroscopic dielectric function with excitonic effects obtained by solving the
!!              Bethe-Salpeter problem at different level of sophistication.
!!
!! PARENTS
!!      driver
!!
!! NOTES
!!
!! ON THE USE OF FFT GRIDS:
!! =================
!! In case of PAW:
!! ---------------
!!    Two FFT grids are used:
!!    - A "coarse" FFT grid (defined by ecut) for the application of the Hamiltonian on the plane waves basis.
!!      It is defined by nfft, ngfft, mgfft, ...
!!      Hamiltonian, wave-functions, density related to WFs (rhor here), ... are expressed on this grid.
!!    - A "fine" FFT grid (defined) by ecutdg) for the computation of the density inside PAW spheres.
!!      It is defined by nfftf, ngfftf, mgfftf, ... Total density, potentials, ... are expressed on this grid.
!! In case of norm-conserving:
!! ---------------------------
!!    - Only the usual FFT grid (defined by ecut) is used. It is defined by nfft, ngfft, mgfft, ...
!!      For compatibility reasons, (nfftf,ngfftf,mgfftf) are set equal to (nfft,ngfft,mgfft) in that case.
!!
!! CHILDREN
!!      bstruct_clean,build_spectra,cexch,cexch_haydock,chkpawovlp,cigfft
!!      destroy_bs_parameters,destroy_bz_mesh_type,destroy_coulombian
!!      destroy_crystal,destroy_epsilonm1_results,destroy_gvectors,destroy_hur
!!      destroy_paw_an,destroy_paw_ij,destroy_paw_pwff,energies_init
!!      exc_iterative_diago,exccoupl,excden,exceig,excoverlap,fourdp,get_gftt
!!      haydock_diago,hdr_clean,init_paw_an,init_paw_ij,init_paw_pwff
!!      init_pawfgr,initmpi_seq,make_hur_commutator,metric,mkdump_er,mkrdim
!!      nhatgrid,nullify_hur,nullify_paw_an,nullify_paw_ij,pawdenpot
!!      pawfgrtab_free,pawfgrtab_init,pawinit,pawmknhat,pawnabla_init
!!      pawpuxinit,print_ngfft,print_pawtab,print_psps,printexcevl,prtrhomxmn
!!      pspini,rdkss,rhoij_alloc,rhoij_copy,rhoij_free,rotate_fft_mesh
!!      setsymrhoij,setup_bse,status,test_charge,timab,wfd_destroy,wfd_init
!!      wfd_mkrho,wfd_print,wrtout
!!
!! SOURCE

#if defined HAVE_CONFIG_H
#include "config.h"
#endif

#include "abi_common.h"

subroutine bethe_salpeter(acell,codvsn,Dtfil,Dtset,iexit,Pawang,Pawrad,Pawtab,Psps,rprim,xred)

 use defs_basis
 use m_bs_defs
 use defs_datatypes
 use defs_abitypes
 use defs_gw
 use m_xmpi
 use m_errors

 use m_header,          only : hdr_clean
 use m_fft_mesh,        only : setmesh, rotate_FFT_mesh, cigfft, get_gftt, print_ngfft
 use m_crystal,         only : crystal_structure, destroy_crystal
 use m_bz_mesh,         only : bz_mesh_type, destroy_bz_mesh_type
 use m_ebands,          only : update_occ, reportgap, get_valence_idx, get_bandenergy, bstruct_clean
 use m_gsphere,         only : gvectors_type, destroy_Gvectors
 use m_io_kss,          only : rdkss
 use m_coulombian,      only : coulombian_type, destroy_Coulombian, cutoff_density
 use m_screening,       only : destroy_Epsilonm1_results, mkdump_Er
 use m_paw_dmft,        only : paw_dmft_type
 use m_paw_toolbox,     only : nullify_paw_ij, init_paw_ij, destroy_paw_ij, init_pawfgr, pawfgrtab_free, pawfgrtab_init,&
&                              nullify_paw_an, init_paw_an, destroy_paw_an, print_pawtab, print_paw_ij
 use m_paw_commutator,  only : HUr_commutator, destroy_Hur, nullify_Hur, make_Hur_commutator
 use m_paw_pwij,        only : paw_pwff_type, init_paw_pwff, destroy_paw_pwff
 use m_wfs,             only : wfd_init, wfd_destroy, wfd_print, wfs_descriptor

!This section has been created automatically by the script Abilint (TD).
!Do not modify the following lines by hand.
 use interfaces_14_hidewrite
 use interfaces_18_timing
 use interfaces_32_util
 use interfaces_42_geometry
 use interfaces_51_manage_mpi
 use interfaces_53_abiutil
 use interfaces_53_ffts
 use interfaces_65_psp
 use interfaces_66_paw
 use interfaces_67_common
 use interfaces_68_gw
 use interfaces_69_bse
!End of the abilint section

 implicit none

!Arguments ------------------------------------
!scalars
 integer,intent(inout) :: iexit
 character(len=6),intent(in) :: codvsn
 type(datafiles_type),intent(in) :: Dtfil
 type(dataset_type),intent(inout) :: Dtset
 type(pawang_type),intent(inout) :: Pawang
 type(pseudopotential_type),intent(inout) :: Psps
!arrays
 real(dp),intent(in) :: acell(3),rprim(3,3),xred(3,Dtset%natom)
 type(pawrad_type),intent(inout) :: Pawrad(Psps%ntypat*Psps%usepaw)
 type(pawtab_type),intent(inout) :: Pawtab(Psps%ntypat*Psps%usepaw)

!Local variables ------------------------------
!scalars
 integer,parameter :: level=90,tim_fourdp=0
 integer,save :: nsym_old=-1
 integer :: comm_world
 integer :: dim_kxcg,has_dijU,has_dijso,bs_nstates
 integer :: ik_bz,ikxc,ipert,mband
 integer :: accessfil,approx_type,choice,cplex,cplex_dij
 integer :: iat,id_required,ider,idir,ierr
 integer :: usexcnhat,nfft_osc,mgfft_osc
 integer :: istat,isym,itypat,izero
 integer :: optcut,optgr0,optgr1,optgr2,option,option_test,optrad,optrhoij,psp_gencond
 integer :: nhatgrdim,nkxc1,nprocs,nspden_rhoij,nzlmopt,ifft
 integer :: my_rank,rhoxsp_method,spaceComm,master
 integer :: nG01d,nG02d,nG03d
 integer :: mgfftf !,mkmem_
 integer :: spin,my_spin
 integer :: nbvw,ndij,nfftf,nfftf_tot,nfftot_osc
 integer :: my_minb,my_maxb
 real(dp) :: nelect_kss !,renormdielconst
 real(dp) :: ucvol,drude_plsmf,ecore,ecut_eff,ecutdg_eff
 real(dp) :: gsqcutc_eff,gsqcutf_eff
 real(dp) :: compch_fft,compch_sph,diecut_eff_dum,gsq_osc
 complex(dpc) :: gw_gap,excgap
 logical :: do_full_diago = .TRUE.
 logical :: iscompatibleFFT,ltemp,use_const_screen
 character(len=500) :: msg
 type(Pawfgr_type) :: Pawfgr
 type(excfiles) :: BS_files
 type(excparam) :: BSp
 type(paw_dmft_type) :: Paw_dmft
 type(MPI_type) :: MPI_enreg_seq
 type(Crystal_structure) :: Cryst
 type(BZ_mesh_type) :: Kmesh,Qmesh
 type(Epsilonm1_results) :: Er
 type(Gvectors_type) :: Gsph_Max
 type(Hdr_type) :: Hdr_kss,Hdr_bse
 type(Bandstructure_type) :: KS_BSt
 type(Energies_type) :: KS_energies
 type(Coulombian_type) :: Vcp
 type(wfs_descriptor) :: Wfs
!arrays
 integer,save :: paw_gencond(6)=(/-1,-1,-1,-1,-1,-1/)
 integer :: ngfft_osc(18),ngfftc(18),ngfftf(18)
 integer,allocatable :: ktabr(:,:),l_size_atm(:),nlmn_type(:)
 integer,allocatable :: nband(:,:)
 integer,allocatable :: igfft(:,:,:,:),nq_spl(:)
 integer,allocatable :: irottb(:,:)
 integer,pointer :: transtab(:,:,:)
 integer,allocatable :: gfft_osc(:,:)
 real(dp),parameter :: k0(3)=zero
 real(dp) :: tsec(2)
 real(dp) :: gmet(3,3),gprimd(3,3),qphon(3),rmet(3,3),rprimd(3,3)
 real(dp),allocatable :: qmax(:)
 real(dp),allocatable :: ks_nhat(:,:),ks_nhatgr(:,:,:),ks_rhog(:,:)
 real(dp),allocatable :: ks_rhor(:,:)
 complex(gwpc),allocatable :: kxcg(:,:)
 complex(dpc),pointer :: gwenergy(:,:,:)
 logical,allocatable :: bks_mask(:,:,:),keep_ur(:,:,:)
 type(transition),pointer :: Trans(:)
 type(Pawrhoij_type),allocatable :: KS_Pawrhoij(:)
 type(Paw_pwff_type),allocatable :: Paw_pwff(:)
 type(Pawfgrtab_type),allocatable :: Pawfgrtab(:)
 type(HUr_commutator),allocatable :: Hur(:)
 type(Paw_ij_type),allocatable :: KS_paw_ij(:)
 type(Paw_an_type),allocatable :: KS_paw_an(:)

!************************************************************************

 DBG_ENTER('COLL')

 call timab(650,1,tsec) ! bse(Total)
 call timab(651,1,tsec) ! bse(Init1)

 write(msg,'(8a)')&
& ' Exciton: Calculation of dielectric properties by solving the Bethe-Salpeter equation ',ch10,&
& ' in frequency domain and reciprocal space on a transitions basis set. ',ch10,&
& ' Based on a program developed by L. Reining, V. Olevano, F. Sottile, ',ch10,&
& ' S. Albrecht, and G. Onida. Incorporated in ABINIT by M. Giantomassi. ',ch10
 call wrtout(std_out,msg,'COLL')
 call wrtout(ab_out,msg,'COLL')

#if defined HAVE_GW_DPC
 if (gwpc/=8) then
   write(msg,'(6a)')ch10,&
&   ' Number of bytes for double precision complex /=8 ',ch10,&
&   ' Cannot continue due to kind mismatch in BLAS library ',ch10,&
&   ' Some BLAS interfaces are not generated by abilint '
   MSG_ERROR(msg)
 end if
 write(msg,'(a,i2,a)')'.Using double precision arithmetic ; gwpc = ',gwpc,ch10
#else
 write(msg,'(a,i2,a)')'.Using single precision arithmetic ; gwpc = ',gwpc,ch10
#endif
 call wrtout(std_out,msg,'COLL')
 call wrtout(ab_out,msg,'COLL')

 comm_world = xmpi_world
 spaceComm = comm_world

 nprocs  = xcomm_size(spaceComm)
 my_rank = xcomm_rank(spaceComm)
 master=0

!* Fake MPI_type for the sequential part.
 call initmpi_seq(MPI_enreg_seq)
!
!* localrdwf defines the IO locality
!1--> each node has access to files (default).
!0--> only master has access.
!
!* accesswff defines the format of the output.
!1--> Plain Fortran file
!2--> Set all outputs to netcdf format (not implemented)
!3--> Set all outputs to ETSF format

 accessfil=0
 if (Dtset%accesswff==IO_MODE_NETCDF) accessfil=1
 if (Dtset%accesswff==IO_MODE_ETSF  ) accessfil=3
 if (Dtset%accesswff==IO_MODE_MPI   ) accessfil=4

!===================================================
!=== Initialize names for input and output files ===
!===================================================

!=== Some variables need to be initialized/nullify at start ===
 call energies_init(KS_energies)
 usexcnhat=0
 call mkrdim(acell,rprim,rprimd)
 call metric(gmet,gprimd,ab_out,rmet,rprimd,ucvol)
!
!=== Define FFT grid(s) sizes ===
!* Be careful! This mesh is only used for densities, potentials and the matrix elements of v_Hxc. It is NOT the
!(usually coarser) GW FFT mesh employed for the oscillator matrix elements that is defined in setmesh.F90.
!See also NOTES in the comments at the beginning of this file.
!NOTE: This mesh is defined in invars2m using ecutwfn, in GW Dtset%ecut is forced to be equal to Dtset%ecutwfn.

!TODO Recheck getng, should use same trick as that used in screening and sigma.

 call init_pawfgr(Dtset,Pawfgr,mgfftf,nfftf,ecut_eff,ecutdg_eff,ngfftc,ngfftf,&
& gsqcutc_eff=gsqcutc_eff,gsqcutf_eff=gsqcutf_eff,gmet=gmet,k0=k0)

 call print_ngfft(ngfftf,header='Dense FFT mesh used for densities and potentials')
 nfftf_tot=PRODUCT(ngfftf(1:3))
!
!=== Open and read pseudopotential files ===
 call status(0,Dtfil%filstat,iexit,level,'call pspini   ')

 call pspini(Dtset,ecore,psp_gencond,gsqcutc_eff,gsqcutf_eff,level,Pawrad,Pawtab,Psps,rprimd)
 if (psp_gencond==1) call print_psps(Psps,std_out,0,'COLL')

!=== Initialization of basic objects including the BSp structure that defines the parameters of the run ===
 call timab(652,1,tsec) ! setup_bse

 call setup_bse(codvsn,acell,rprim,ngfftf,ngfft_osc,Dtset,Dtfil,BS_files,Psps,Pawtab,BSp,&
& Cryst,Kmesh,Qmesh,KS_BSt,gwenergy,Hdr_kss,Gsph_Max,Vcp,Hdr_bse,Er,Trans,transtab,gw_gap,spaceComm)

 call timab(652,2,tsec) ! setup_bse

 nfftot_osc=PRODUCT(ngfft_osc(1:3))
 nfft_osc  =nfftot_osc  !no FFT //
 mgfft_osc =MAXVAL(ngfft_osc(1:3))

!XG090617 Please, do not remove this write, unless you have checked
!that the code executes correctly on max+g95 (especially, Tv5#70).
!It is one more a silly write, perhaps needed because the compiler does not treat correctly
!non-nullified pointers.
 write(std_out,*)' screening after setup_sigma : Er%Hscr%headform=',Er%Hscr%headform
!END XG090617

 call print_ngfft(ngfft_osc,header='FFT mesh used for oscillator strengths')

!TRYING TO RECREATE AN "ABINIT ENVIRONMENT"
 KS_energies%e_corepsp=ecore/Cryst%ucvol

!============================
!==== PAW initialization ====
!============================
 if (Dtset%usepaw==1) then

   call chkpawovlp(Cryst%natom,Cryst%ntypat,Dtset%pawovlp,Pawtab,Cryst%rmet,Cryst%typat,xred)

   allocate(nlmn_type(Cryst%ntypat))
   do itypat=1,Cryst%ntypat
     nlmn_type(itypat)=Pawtab(itypat)%lmn_size
   end do

   cplex_dij=Dtset%nspinor; cplex=1; ndij=1

   allocate(KS_Pawrhoij(Cryst%natom))
   nspden_rhoij=Dtset%nspden ; if (Dtset%pawspnorb>0.and.Dtset%nspinor==2) nspden_rhoij=4
   call rhoij_alloc(Dtset%pawcpxocc,nlmn_type,nspden_rhoij,Dtset%nspinor,Dtset%nsppol,KS_Pawrhoij,Cryst%typat)

   deallocate(nlmn_type)

!  === Initialize values for several basic arrays ===
!  TODO Check pawxcdev>2 since gaunt coefficients are allocated with different size
   if (psp_gencond==1.or.&
&   paw_gencond(1)/=Dtset%pawlcutd .or.paw_gencond(2)/=Dtset%pawlmix  .or.&
&   paw_gencond(3)/=Dtset%pawnphi  .or.paw_gencond(4)/=Dtset%pawntheta.or.&
&   paw_gencond(5)/=Dtset%pawspnorb.or.paw_gencond(6)/=Dtset%pawxcdev) then

     call timab(553,1,tsec)
     diecut_eff_dum=ABS(Dtset%diecut)*Dtset%dilatmx**2

     call pawinit(diecut_eff_dum,Psps%indlmn,Dtset%pawlcutd,Dtset%pawlmix,Psps%lmnmax,Psps%mpsang,&
&     Dtset%pawnphi,Cryst%nsym,Dtset%pawntheta,Cryst%ntypat,Pawang,Pawrad,Dtset%pawspnorb,Pawtab,Dtset%pawxcdev)

     paw_gencond(1)=Dtset%pawlcutd  ; paw_gencond(2)=Dtset%pawlmix
     paw_gencond(3)=Dtset%pawnphi   ; paw_gencond(4)=Dtset%pawntheta
     paw_gencond(5)=Dtset%pawspnorb ; paw_gencond(6)=Dtset%pawxcdev
     call timab(553,2,tsec)
   else
     if (Pawtab(1)%has_kij  ==1) Pawtab(1:Cryst%ntypat)%has_kij  =2
     if (Pawtab(1)%has_nabla==1) Pawtab(1:Cryst%ntypat)%has_nabla=2
   end if
   Psps%n1xccc=MAXVAL(Pawtab(1:Cryst%ntypat)%usetcore)

!  Initialize optional flags in Pawtab to zero
!  (Cannot be done in Pawinit since the routine is called only if some parameters are changed)
   Pawtab(:)%has_nabla = 0
   Pawtab(:)%usepawu   = 0
   Pawtab(:)%useexexch = 0
   Pawtab(:)%exchmix   =zero

!  * Evaluate <phi_i|nabla|phi_j>-<tphi_i|nabla|tphi_j> for the long wavelength limit.
!  TODO solve problem with memory leak and clean this part as well as the associated flag
   call pawnabla_init(Psps%mpsang,Psps%lmnmax,Cryst%ntypat,Psps%indlmn,Pawrad,Pawtab)

!  if (psp_gencond==1) then !.or. nsym_old/=Cryst%nsym) then
   call setsymrhoij(gprimd,Pawang%l_max-1,Cryst%nsym,Dtset%pawprtvol,Cryst%rprimd,Cryst%symrec,Pawang%zarot)
   nsym_old=Cryst%nsym
!  end if

!  === Initialize and compute data for LDA+U ===
   if (Dtset%usepawu>0.or.Dtset%useexexch>0) then
     Paw_dmft%use_dmft=dtset%usedmft
     call pawpuxinit(Dtset%dmatpuopt,Dtset%exchmix,Dtset%jpawu,Dtset%lexexch,Dtset%lpawu,&
&     Psps%indlmn,Psps%lmnmax,Cryst%ntypat,Pawang,Dtset%pawprtvol,Pawrad,Pawtab,Dtset%upawu,&
&     Dtset%usedmft,Dtset%useexexch,Dtset%usepawu)
     MSG_ERROR("BS equation with LDA+U not completely coded")
   end if
!  Paw_dmft%use_dmft=0
!  DMFT not available
   ABI_CHECK(Dtset%usedmft==0,"DMFT + BSE not allowed")
   ABI_CHECK(Dtset%useexexch==0,"EXX + BSE not allowed")

   if (my_rank==master) call print_pawtab(Pawtab)
!  3-
!  if (mkmem_==0) then
!  open(Dtfil%unpaw,file=dtfil%fnametmp_paw,form='unformatted',status='unknown')
!  rewind(unit=Dtfil%unpaw)
!  end if

!  === Get Pawrhoij from the header of the KSS file ===
   call rhoij_copy(Hdr_kss%pawrhoij,KS_Pawrhoij)

!  === Re-symmetrize symrhoij ===
!  this call leads to a SIGFAULT, likely some pointer is not initialized correctly
   choice=1; optrhoij=1; ipert=0; idir=0
!  call symrhoij(choice,Cryst%gprimd,Psps%indlmn,Cryst%indsym,ipert,Psps%lmnmax,Cryst%natom,Cryst%natom,Cryst%nsym,&
!  &  Cryst%ntypat,optrhoij,Pawang,Dtset%pawprtvol,KS_Pawrhoij,Cryst%rprimd,Cryst%symafm,Cryst%symrec,Cryst%typat)

!  === Evaluate form factor of radial part of phi.phj-tphi.tphj ===
   rhoxsp_method=1 ! Arnaud-Alouani
!  if (Dtset%userie==1) rhoxsp_method=1 ! Arnaud-Alouani
!  if (Dtset%userie==2) rhoxsp_method=2 ! Shiskin-Kresse

   allocate(gfft_osc(3,nfftot_osc))
   call get_gftt(ngfft_osc,k0,gmet,gsq_osc,gfft_osc)
   deallocate(gfft_osc)

!  * Set up q grids, make qmax 20% larger than largest expected:
   allocate(nq_spl(Psps%ntypat),qmax(Psps%ntypat))
   nq_spl = Psps%mqgrid_ff
   qmax = SQRT(gsq_osc)*1.2d0 ! qmax=Psps%qgrid_ff(Psps%mqgrid_ff)
   allocate(Paw_pwff(Psps%ntypat))

   call init_paw_pwff(Paw_pwff,rhoxsp_method,nq_spl,qmax,gmet,Pawrad,Pawtab,Psps)

   deallocate(nq_spl,qmax)
!  
!  === Variables/arrays related to the fine FFT grid ===
   allocate(ks_nhat(nfftf,Dtset%nspden)); ks_nhat=zero
   allocate(Pawfgrtab(Cryst%natom),l_size_atm(Cryst%natom))
   do iat=1,Cryst%natom
     l_size_atm(iat)=Pawtab(Cryst%typat(iat))%l_size
   end do
   cplex=1
   call pawfgrtab_init(Pawfgrtab,cplex,l_size_atm,Dtset%nspden)
   deallocate(l_size_atm)
   compch_fft=greatest_real
   usexcnhat=MAXVAL(Pawtab(:)%usexcnhat)
!  * 0 if Vloc in atomic data is Vbare    (Blochl s formulation)
!  * 1 if Vloc in atomic data is VH(tnzc) (Kresse s formulation)
   write(msg,'(a,i2)')' bethe_salpeter : using usexcnhat = ',usexcnhat
   call wrtout(std_out,msg,'COLL')
!  
!  === Identify parts of the rectangular grid where the density has to be calculated ===

   optcut=0;optgr0=Dtset%pawstgylm; optgr1=0; optgr2=0; optrad=1-Dtset%pawstgylm
   if (Dtset%xclevel==2.and.usexcnhat>0) optgr1=Dtset%pawstgylm

   call nhatgrid(Cryst%atindx1,gmet,MPI_enreg_seq,Cryst%natom,Cryst%natom,Cryst%nattyp,ngfftf,Cryst%ntypat,&
   optcut,optgr0,optgr1,optgr2,optrad,Pawfgrtab,Pawtab,Cryst%rprimd,Cryst%ucvol,Cryst%xred)

 end if !End of PAW Initialization

!Allocate these arrays anyway, since they are passed to subroutines.
 if (.not.allocated(ks_nhat)) allocate(ks_nhat(nfftf,0))

!==================================================
!==== Read KS band structure from the KSS file ====
!==================================================

!* Initialize wave function handler, allocate wavefunctions.
 my_minb=1; my_maxb=BSp%nbnds; my_spin=0 ! nsppol==2 not coded anyway.
 mband=BSp%nbnds; allocate(nband(Kmesh%nibz,Dtset%nsppol)); nband=mband

!At present, no memory distribution, each node has the full set of states.
 allocate(bks_mask(mband,Kmesh%nibz,Dtset%nsppol)); bks_mask=.TRUE.

 allocate(keep_ur(mband,Kmesh%nibz,Dtset%nsppol)); keep_ur=.FALSE.
 if (MODULO(Dtset%gwmem,10)==1) then
   do spin=1,Dtset%nsppol
     if (spin==my_spin.or.my_spin==0) keep_ur(:,:,spin)=.TRUE.
   end do
 end if

 call wfd_init(Wfs,Cryst,Pawtab,Psps,keep_ur,Dtset%paral_kgb,BSp%npwwfn,mband,nband,Kmesh%nibz,&
& Dtset%nsppol,bks_mask,Dtset%nspden,Dtset%nspinor,Hdr_kss%istwfk,Kmesh%ibz,ngfft_osc,BSp%mg0,Gsph_Max%gvec,Dtset%nloalg,comm_world)

 deallocate(bks_mask,nband,keep_ur)

 call wfd_print(Wfs,mode_paral='PERS')

 call timab(651,2,tsec) ! bse(Init1)
 call timab(653,1,tsec) ! bse(rdkss)

 my_minb=1; my_maxb=BSp%nbnds; nbvw=0 ! Do not divide states into occupied and empty

 call rdkss(Dtfil%fnameabi_kss,Cryst,Pawtab,Bsp%nbnds,BSp%npwwfn,Psps%mpsang,Dtset%accesswff,nelect_kss,Wfs)

 call timab(653,2,tsec) ! bse(rdkss)
 call timab(654,1,tsec) ! bse(rdmkeps^-1)

!read em1 file (unit 12) for q-points and parameters

 if (Er%fform==1002) then
!  TODO clean this part and add an option to retrieve a single frequency to save memory.
   id_required=4; ikxc=0; approx_type=0; option_test=0
   dim_kxcg=0
   allocate(kxcg(Er%npwe,dim_kxcg))
   call mkdump_Er(Er,Vcp,Er%npwe,Gsph_Max%gvec,dim_kxcg,kxcg,id_required,approx_type,ikxc,option_test,&
&   dtfil%fnameabo_sgr,Dtset%accesswff,Dtset%localrdwf,nfftf_tot,ngfftf,spaceComm)
   deallocate(kxcg)
   ABI_CHECK(ABS(Er%omega(1))<tol4,'need frequency 0')

 else if (Er%fform==1003) then
   MSG_ERROR("fform==1003 is not supported")
 else
   inquire(file='rdc.in',exist=use_const_screen)
   if (use_const_screen) then
!    ?!  uses renormalized dielectric constant screening
!    ?   open(19,file='rdc.in')
!    ?   read(19,*) renormdielconst
!    ?   close(19)
!    ?   epsm1q(:,:,:) = 1.0 / renormdielconst
!    ?   write(msg,'(3a,f5.2)')&
!    ?&   ' Using constant static dielectric screening',ch10,&
!    ?&   ' renormalized dielectric constant = ',renormdielconst
!    ?   call wrtout(std_out,msg,"COLL")
     MSG_ERROR("not coded")
   else
     MSG_ERROR("file rdc.in not found")
   end if
 end if

 call timab(654,2,tsec) ! bse(rdmkeps^-1)
 call timab(655,1,tsec) ! bse(mkrho)

 nG01d = 2*Bsp%mg0(1)+1
 nG02d = 2*Bsp%mg0(2)+1
 nG03d = 2*Bsp%mg0(3)+1

 allocate(igfft(BSp%npwvec,nG01d,nG02d,nG03d))
 call cigfft(BSp%mg0,BSp%npwvec,ngfft_osc,Gsph_Max%gvec,igfft,ierr)

 if (ierr/=0) then
   write(msg,'(a,i4,3a)')&
&   ' Found ',ierr,' G-G0 vectors falling outside the FFT box. ',ch10,&
&   ' This is not allowed. Check mG0 and the setup of the GW FFT mesh. '
   MSG_ERROR(msg)
 end if

!=== Calculate the FFT index of $(R^{-1}(r-\tau))$ ===
!* S=\transpose R^{-1} and k_BZ = S k_IBZ
!* irottb is the FFT index of $R^{-1} (r-\tau)$ used to symmetrize u_Sk.

 allocate(irottb(nfftot_osc,Cryst%nsym))

 call rotate_FFT_mesh(Cryst%nsym,Cryst%symrel,Cryst%tnons,ngfft_osc,irottb,iscompatibleFFT)

 allocate(ktabr(nfftot_osc,Kmesh%nbz))
 do ik_bz=1,Kmesh%nbz
   isym=Kmesh%tabo(ik_bz)
   do ifft=1,nfftot_osc
     ktabr(ifft,ik_bz)=irottb(ifft,isym)
   end do
 end do

!===========================
!=== COMPUTE THE DENSITY ===
!===========================
!* Evaluate Planewave part (complete charge in case of NC pseudos).

 allocate(ks_rhor(nfftf,Dtset%nspden))
 call wfd_mkrho(Wfs,Cryst,Psps,Kmesh,KS_BSt,ngfftf,nfftf,ks_rhor)

!TODO this has to be done in a better way, moreover wont work for PAW
!Check Vcp!
!$ call cutoff_density(ngfftf,Dtset%nspden,Dtset%nsppol,Vcp,ks_rhor,MPI_enreg)
!
!=== Additional computation for PAW ===
 if (Dtset%usepaw==1) then

!  * Calculate the compensation charge nhat.
   nhatgrdim=0; if (Dtset%xclevel==2) nhatgrdim=usexcnhat*Dtset%pawnhatxc
   cplex=1; ider=2*nhatgrdim; izero=0; ipert=0; idir=0; qphon(:)=zero
   if (nhatgrdim>0) allocate(ks_nhatgr(nfftf,Dtset%nspden,3))

   call pawmknhat(compch_fft,cplex,ider,idir,ipert,izero,Cryst%gprimd,MPI_enreg_seq,&
&   Cryst%natom,Cryst%natom,nfftf,ngfftf,nhatgrdim,Dtset%nspden,Cryst%ntypat,Dtset%paral_kgb,Pawang,&
&   Pawfgrtab,ks_nhatgr,ks_nhat,KS_Pawrhoij,KS_Pawrhoij,Pawtab,qphon,Cryst%rprimd,Cryst%ucvol,Cryst%xred)

!  === Evaluate onsite energies, potentials, densities ===
!  * Initialize variables/arrays related to the PAW spheres.
!  * Initialize also lmselect (index of non-zero LM-moments of densities).
!  TODO call init_paw_ij in scfcv and respfn, fix small issues
   allocate(KS_paw_ij(Cryst%natom))
   call nullify_paw_ij(KS_paw_ij)

   cplex=1; cplex_dij=Dtset%nspinor; has_dijso=Dtset%pawspnorb; has_dijU=Dtset%usepawu
   has_dijso=Dtset%pawspnorb ; has_dijU=Dtset%usepawu

   call init_paw_ij(KS_paw_ij,cplex,cplex_dij,Dtset%nspinor,Dtset%nsppol,&
&   Dtset%nspden,Dtset%pawspnorb,Cryst%natom,Cryst%ntypat,Cryst%typat,Pawtab,&
&   has_dij=1,has_dijhartree=1,has_dijhat=1,has_dijxc=0,has_dijxc_val=0,&
&   has_dijso=has_dijso,has_dijU=has_dijU,has_exexch_pot=1,has_pawu_occ=1)

   allocate(KS_paw_an(Cryst%natom))
   call nullify_paw_an(KS_paw_an)

   nkxc1=0
   call init_paw_an(Cryst%natom,Cryst%ntypat,nkxc1,Dtset%nspden,cplex,Dtset%pawxcdev,&
&   Cryst%typat,Pawang,Pawtab,KS_paw_an,has_vxc=1,has_vxcval=0)
!  
!  === Calculate onsite vxc with and without core charge ===
   nzlmopt=-1; option=0; ipert=0; compch_sph=greatest_real

   call pawdenpot(compch_sph,KS_energies%e_paw,KS_energies%e_pawdc,ipert,&
&   Dtset%ixc,MPI_enreg_seq,Cryst%natom,Cryst%natom,Dtset%nspden,&
&   Cryst%ntypat,nzlmopt,option,Dtset%paral_kgb,KS_Paw_an,KS_Paw_an,KS_paw_ij,&
&   Pawang,Dtset%pawprtvol,Pawrad,KS_Pawrhoij,Dtset%pawspnorb,&
&   Pawtab,Dtset%pawxcdev,Dtset%spnorbscl,Dtset%xclevel,Psps%znuclpsp)
 end if !PAW

 if (.not.allocated(ks_nhatgr)) allocate(ks_nhatgr(nfftf,Dtset%nspden,0))

 call test_charge(nfftf,KS_BSt%nelect,Dtset%nspden,ks_rhor,Cryst%ucvol,&
& Dtset%usepaw,usexcnhat,Pawfgr%usefinegrid,compch_sph,compch_fft,drude_plsmf)
!
!=== For PAW, add the compensation charge on the FFT mesh, then get rho(G) ===
 if (Dtset%usepaw==1) ks_rhor(:,:)=ks_rhor(:,:)+ks_nhat(:,:)
 call prtrhomxmn(std_out,MPI_enreg_seq,nfftf,ngfftf,Dtset%nspden,1,ks_rhor)

 allocate(ks_rhog(2,nfftf))

 call fourdp(1,ks_rhog,ks_rhor(:,1),-1,MPI_enreg_seq,nfftf,ngfftf,Dtset%paral_kgb,tim_fourdp)

 deallocate(ks_rhog,ks_rhor)

 call timab(655,2,tsec) ! bse(mkrho)
 call timab(656,1,tsec) ! bse(mkexcham)

!Set up reduced BZ for symmetry reduction of Hamiltonian
!CALL IDENTSK (Kmesh%bz, NKBZ, NKBZMX, SOP, NSOPMX,NSOP, NKS, KRBZSTAB, KBZ_RBZ,b1,b2,b3)

!=============================================
!==== Calculate the excitonic Hamiltonian ====
!=============================================
 if (BSp%use_haydock) then
   call cexch_haydock(BSp,Dtfil,Dtset%usepaw,Dtset%nspinor,Dtset%nsppol,Cryst,Kmesh,Qmesh,ktabr,Gsph_Max,igfft,Vcp,&
&   Wfs,Er,Trans,transtab,nfftot_osc,ngfft_osc,MPI_enreg_seq,Psps,Pawtab,Pawang,Paw_pwff)
 else
   call cexch(BSp,Dtfil,BS_files,Dtset%usepaw,Dtset%nspinor,Dtset%nsppol,Cryst,Kmesh,Qmesh,ktabr,Gsph_Max,igfft,Vcp,&
&   Wfs,Er,Trans,transtab,nfftot_osc,ngfft_osc,MPI_enreg_seq,Psps,Pawtab,Pawang,Paw_pwff,spaceComm,Dtset%prtvol)
 end if

!NOTE: Now Er contains W but it useless at this point. Free the object.
 call destroy_Epsilonm1_results(Er)

 allocate(HUr(Cryst%natom*Dtset%usepaw))
 call nullify_Hur(HUr)
 if (Dtset%usepawu/=0) then !FIXME here I need Paw_ij
   MSG_ERROR("not tested")
   call make_Hur_commutator(Dtset%nsppol,Dtset%pawprtvol,Cryst,Psps,Pawtab,Pawang,Pawrad,KS_Paw_ij,Hur)
 end if

 call timab(656,2,tsec) ! bse(mkexcham)
 call timab(657,1,tsec) ! bse(mkexceps)

!======================================================
!=== Calculation of macroscopic dielectric function ===
!======================================================

 select case (BSp%coupling)

   case (.FALSE.) ! Only Resonant.

     if (.not.BS_files%in_eig_exists) then
       if (BSp%use_haydock) do_full_diago=.FALSE.

       if (do_full_diago) then ! Direct diagonalization of the excitonic hamiltonian
         call exceig(BSp%nh,BS_files,BS_files%out_eig,Dtset%prtvol,spaceComm)

         if (.FALSE.) then ! Cannot use nband, have to introduce bs_nstates
           bs_nstates=16
           call exc_iterative_diago(Bsp%nh,bs_nstates,Dtset%nstep,Dtset%nline,Dtset%nbdbuf,&
&           Dtset%tolwfr,BS_files,Dtset%prtvol,spaceComm)
         end if

       else if (BSp%use_haydock) then
         call haydock_diago(BSp,Dtset%nspinor,Dtset%nsppol,Dtset%usepaw,dtfil%fnameabo_exc_mdf,&
&         Cryst,Kmesh,KS_BSt%eig,Dtset%inclvkb,Wfs,Psps,Pawtab,Hur,transtab,spaceComm)

       else
         MSG_ERROR("check Bsp%")
       end if

     else
       call wrtout(std_out," Reading exciton eigenvalues/vectors from file: "//TRIM(BS_files%in_eig),"COLL")
     end if ! exist in_eig

   case (.TRUE.) ! Hamiltonian with Coupling

     if (.not.BS_files%in_eig_exists) then ! calculate eigenvectors considering coupling
       call exccoupl(BSp%nh,BS_files%out_eig,BS_files,Dtset%prtvol,spaceComm)
     else
       call wrtout(std_out," Reading exciton coupling eigenvectors from file: "//TRIM(BS_files%in_eig),"COLL")
     end if

     inquire(file='in.exovl',exist=ltemp)
     if (.not.ltemp) then ! Calculate overlap matrix between excitonic eigenvectors.
       call excoverlap(BSp%nh,BS_files%out_eig,BS_files,spaceComm)
     else
       call wrtout(std_out,' Reading exciton eigenvectors overlap from in.exovl',"COLL")
     end if

 end select

 if (.not.BSp%use_haydock) then
   call printexcevl(BSp%nh,BS_files%out_eig,gw_gap,excgap)

   call build_spectra(BSp,Dtset%nspinor,Dtset%nsppol,Dtset%usepaw,Dtfil,BS_files,Cryst,Kmesh,KS_BSt%eig,gwenergy,&
&   Psps,Pawtab,BSp%nh,Trans,Dtset%inclvkb,Wfs,Hur,drude_plsmf,spaceComm)

!  * calculate electron, hole, excited state density
   if (.FALSE.) then ! Not tested (at all)
     call excden(BSp,BS_files%out_eig,BSp%nh,trans,ngfft_osc,nfftot_osc,Kmesh,ktabr,Wfs)
   end if
 end if

 call timab(657,2,tsec) ! bse(mkexceps)

!=== Free memory ===
 deallocate(ktabr)
 deallocate(igfft)
 deallocate(irottb)
 deallocate(gwenergy)
 deallocate(Trans,transtab)

 deallocate(Pawfgr%fintocoa,Pawfgr%coatofin, STAT=istat)
 deallocate(ks_nhat,   STAT=istat)
 deallocate(ks_nhatgr, STAT=istat)

!* Destroy the dinamic arrays in the local data structures.
 call destroy_crystal(Cryst)
 call destroy_Gvectors(Gsph_Max)
 call destroy_bz_mesh_type(Kmesh)
 call destroy_bz_mesh_type(Qmesh)
 call wfd_destroy(Wfs) 

 call hdr_clean(Hdr_kss)
 call hdr_clean(Hdr_bse)
 call bstruct_clean(KS_BSt)
 call destroy_Coulombian(Vcp)
 call destroy_Hur(Hur)
 call destroy_bs_parameters(BSp)

 if (Dtset%usepaw==1) then ! Optional deallocation for PAW.
   call rhoij_free(KS_Pawrhoij);    deallocate(KS_Pawrhoij)
   call pawfgrtab_free(Pawfgrtab);  deallocate(Pawfgrtab)
   call destroy_paw_ij(KS_paw_ij);  deallocate(KS_paw_ij)
   call destroy_paw_an(KS_paw_an);  deallocate(KS_paw_an)
   call destroy_paw_pwff(Paw_pwff); deallocate(Paw_pwff)
 end if

 call timab(650,2,tsec) ! bse(Total)

 DBG_EXIT('COLL')

end subroutine bethe_salpeter
!!***
