!{\src2tex{textfont=tt}}
!!****f* ABINIT/screening
!! NAME
!! screening
!!
!! FUNCTION
!! Calculate screening and dielectric functions
!!
!! COPYRIGHT
!! Copyright (C) 2001-2009 ABINIT group (GMR, VO, LR, RWG, MT, MG, RShaltaf)
!! This file is distributed under the terms of the
!! GNU General Public License, see ~abinit/COPYING
!! or http://www.gnu.org/copyleft/gpl.txt .
!! For the initials of contributors, see ~abinit/doc/developers/contributors.txt .
!!
!! INPUTS
!! acell(3)=length scales of primitive translations (bohr)
!! codvsn=code version
!! Dtfil<datafiles_type)>=variables related to file names and unit numbers.
!! iexit=exit flag
!! Pawang<pawang_type)>=paw angular mesh and related data
!! Pawrad(ntypat*usepaw) <type(pawrad_type)>=paw radial mesh and related data
!! Pawtab(ntypat*usepaw) <type(pawtab_type)>=paw tabulated starting data
!! Psps <type(pseudopotential_type)>=variables related to pseudopotentials
!!  Before entering the first time in screening, a significant part of Psps has been initialized:
!!  the integers dimekb,lmnmax,lnmax,mpssang,mpssoang,mpsso,mgrid, ntypat,n1xccc,usepaw,useylm,
!!  and the arrays dimensioned to npsp. All the remaining components of Psps are to be initialized in
!!  the call to pspini. The next time the code enters screening, Psps might be identical to the
!!  one of the previous Dtset, in which case, no reinitialisation is scheduled in pspini.F90.
!! rprim(3,3)=dimensionless real space primitive translations
!! xred(3,natom) = reduced atomic coordinates
!!
!! OUTPUT
!! Output is written on the main output file.
!! The symmetrical inverse dielectric matrix is stored in the _SCR file
!!
!! SIDE EFFECTS
!!  Dtset<type(dataset_type)>=all input variables for this dataset
!!
!! NOTES
!! USE OF FFT GRIDS:
!! =================
!! In case of PAW:
!! ---------------
!!    Two FFT grids are used:
!!    - A "coarse" FFT grid (defined by ecut) for the application of the Hamiltonian on the plane waves basis.
!!      It is defined by nfft, ngfft, mgfft, ...
!!      Hamiltonian, wave-functions, density related to WFs (rhor here), ... are expressed on this grid.
!!    - A "fine" FFT grid (defined) by ecutdg) for the computation of the density inside PAW spheres.
!!      It is defined by nfftf, ngfftf, mgfftf, ...Total density, potentials, ... are expressed on this grid.
!! In case of norm-conserving:
!! ---------------------------
!!    - Only the usual FFT grid (defined by ecut) is used. It is defined by nfft, ngfft, mgfft, ...
!!      For compatibility reasons, (nfftf,ngfftf,mgfftf) are set equal to (nfft,ngfft,mgfft) in that case.
!!
!! PARENTS
!!      driver
!!
!! CHILDREN
!!      calc_wf_qp,ccgradvnl,cchi0,cchi0q0,cigfft,ckxcldag,cvc,density,distrb2
!!      fftwfn,findnq,findq,findshells,hdr_clean,hermitianize,identk
!!      lattice,leave_new,matcginv,matrginv,metric,mkrdim,pclock,printcm,printv
!!      rdgw,rdkss,rdlda,rdldaabinit,rdqps,setmesh,setshells,surot,testlda
!!      timab,wrscr,wrtout
!!
!! SOURCE

#if defined HAVE_CONFIG_H
#include "config.h"
#endif

#include "abi_common.h"

subroutine screening(acell,codvsn,Dtfil,Dtset,iexit,MPI_enreg,Pawang,Pawrad,Pawtab,Psps,rprim,xred)

 use defs_basis
 use defs_datatypes
 use defs_abitypes
 use m_errors
 use m_paw_toolbox
 use m_electrons

 use m_gwdefs,        only : GW_TOLQ0, GW_TOLQ, czero_gw
 use m_numeric_tools, only : print_arr
 use m_geometry,      only : normv
 use m_crystal,       only : DestroyCrystal
 use m_bz_mesh,       only : destroy_bz_mesh_type
 use m_little_group,  only : destroy_little_group
 use m_gsphere,       only : destroy_Gvectors, nullify_Gpairs_type, destroy_Gpairs_type, init_Gpairs_type
 use m_coulombian,    only : destroy_Coulombian, cutoff_density
 use m_qparticles,    only : rdqps, rdgw, show_QP
 use m_io_screening,  only : init_ScrHdr, scr_hdr_io, write_screening, free_scrhdr
 use m_spectra,       only : dump_spectra, repr_dielconst, destroy_spectra, W_EM_LF, W_EM_NLF, W_EELF
 use m_fft_mesh,      only : rotate_FFT_mesh, cigfft
 use m_io_kss,        only : rdkss
 use m_wfs,           only : init_Wfs, destroy_Wfs, reinit_Wfs, nullify_Wfs, print_Wfs, calc_wf_qp, calc_wf_qp_Wfval

!This section has been created automatically by the script Abilint (TD).
!Do not modify the following lines by hand.
 use interfaces_12_hide_mpi
 use interfaces_14_hidewrite
 use interfaces_18_timing
 use interfaces_27_toolbox_oop
 use interfaces_28_numeric_noabirule
 use interfaces_32_util
 use interfaces_42_geometry
 use interfaces_51_manage_mpi
 use interfaces_53_ffts
 use interfaces_56_recipspace
 use interfaces_62_iowfdenpot
 use interfaces_65_nonlocal
 use interfaces_65_psp
 use interfaces_66_paw
 use interfaces_67_common
 use interfaces_68_gw
!End of the abilint section

 implicit none

!Arguments ------------------------------------
!scalars
 integer,intent(inout) :: iexit
 character(len=6),intent(in) :: codvsn
 type(Datafiles_type),intent(in) :: Dtfil
 type(Dataset_type),intent(inout) :: Dtset
 type(MPI_type),intent(inout) :: MPI_enreg
 type(Pawang_type),intent(inout) :: Pawang
 type(Pseudopotential_type),intent(inout) :: Psps
!arrays
 real(dp),intent(in) :: acell(3),rprim(3,3),xred(3,Dtset%natom)
 type(Pawrad_type),intent(inout) :: Pawrad(Psps%ntypat*Dtset%usepaw)
 type(Pawtab_type),intent(inout) :: Pawtab(Psps%ntypat*Dtset%usepaw)

!Local variables ------------------------------
 character(len=4) :: ctype='RPA ',tag
!scalars
 integer,parameter :: level=23,tim_fourdp=4,NOMEGA_PRINTED=15
 integer,save :: nsym_old=-1
 integer :: choice,cplex,cplex_dij,dim1_rhox=0,dim2_rhox=0,dim_kxcg,dim_wing
 integer :: fileID,fform_chi0,fform_em1,iat,iband,ider,idir,ierr
 integer :: ifft,ii,ik,ikbz,ikq,ikxc,ilmn,initialized,iomega,ios,ipert
 integer :: iqibz,iqcalc,irank,ispden,isppol,is_qeq0,istat,isym,itypat,itype,izero,jj
 integer :: label,lm_size,lmn2_size,localrdwf_,master,mgfftf,mgfftgw
 integer :: mkmem_,moved_atm_inside,moved_rhor,my_maxb,my_minb
 integer :: my_nbnds,nG01d,nG02d,nG03d,nbcw,nbsc,nbvw,nkxc,nkxc1,n3xccc,optene,istep
 integer :: nfftf,nfftf_tot,nfftgw,nfftgw_tot,nhatgrdim,nprocs,nspden_rhoij
 integer :: nscf,ntasks,nzlmopt,ks_iv,qp_iv
 integer :: optgr0,optgr1,optgr2,option,approx_type,option_test,optrad,optrhoij,psp_gencond,rank,rdwr
 integer :: rhoxsp_method,spaceComm,spaceComm_q,test_type,tordering,unem1ggp=886,unt_em1,unt_susc,usexcnhat,vkb_dim
 real(dp) :: boxcut,boxcutc,compch_fft,compch_sph,diecut_eff_dum,domegareal,e0
 real(dp) :: ecore,ecut_eff,ecutdg_eff,efermi,nelect
 real(dp) :: gsqcutc_eff,gsqcutf_eff,omegaplasma,ucvol,vxcavg
 logical :: found,has_qeq0,iscompatibleFFT,ltest,have_valence,is_first_qcalc,use_symmetries
 logical :: update_energies=.FALSE.
 character(len=10) :: string
 character(len=500) :: msg
 character(len=80) :: bar
 character(len=fnlen) :: filnam,fname_scr,fname_susc
 type(Bandstructure_type) :: KS_BSt,QP_BSt
 type(BZ_mesh_type) :: Kmesh,Qmesh
 type(Coulombian_type) :: Vcp
 type(Crystal_structure) :: Cryst
 type(Epsilonm1_parameters) :: Ep
 type(Energies_type) :: KS_energies
 type(Gpairs_type) :: Gpairs_q
 type(Gvectors_type) :: Gsph_epsG0,Gsph_wfn
 type(Hdr_type) :: Hdr_kss,Hdr_local
 type(MPI_type) :: MPI_enreg_seq
 type(Pawfgr_type) :: Pawfgr
 type(ScrHdr_type) :: Hem1,Hchi0
 type(Wavefunctions_information) :: Wf,Wf_val
 type(Spectra_type) :: Spectra
!arrays
 integer,save :: paw_gencond(6)=(/-1,-1,-1,-1,-1,-1/)
 integer :: ibocc(Dtset%nsppol),ngfft_gw(18),ngfftc(18),ngfftf(18)
 integer,allocatable :: dimlmn(:),gvec(:,:),gw_distrb(:,:,:),igfftf(:),irottb(:,:),irottb_test(:,:)
 integer,allocatable :: istart(:),istop(:),ktabr(:,:),l_size_atm(:)
 integer,allocatable :: ks_vbik(:,:),ks_occ_idx(:,:),qp_vbik(:,:),qp_occ_idx(:,:),nlmn(:)
 integer,allocatable :: igfft(:,:,:,:)
 real(dp) :: gmet(3,3),gprimd(3,3),k0(3),qtmp(3),rmet(3,3),rprimd(3,3),tsec(2),strsxc(6)
 real(dp),allocatable :: chi0sumrule(:),en_qp(:,:,:)
 real(dp),allocatable :: nhat(:,:),nhatgr(:,:,:)
 real(dp),allocatable :: dummy_ene(:,:,:),dummy_occ(:,:,:)
 real(dp),allocatable :: pawrhox_spl(:,:,:,:,:)
 real(dp),allocatable :: ph1d(:,:),ph1df(:,:)
 real(dp),allocatable :: rhog(:,:),rhor(:,:),rhor_p(:,:)
 real(dp),allocatable :: vkb(:,:,:,:),vkbd(:,:,:,:),vkbsign(:,:),z(:),zw(:)
 real(dp),allocatable :: grewtn(:,:),xred_dummy(:,:),kxc(:,:)
 real(dp),allocatable :: ks_vhartr(:),vpsp(:),ks_vtrial(:,:),ks_vxc(:,:),xccc3d(:)
 complex(gwpc),allocatable :: kxcg(:,:)
 complex(dpc),allocatable :: m_lda_to_qp(:,:,:,:)
 complex(dpc),allocatable :: lwing(:,:,:),uwing(:,:,:)
 complex(gwpc),allocatable,target :: chi0(:,:,:)
 complex(gwpc),pointer :: epsm1(:,:,:),vc_sqrt(:)
 logical,allocatable :: mask(:)
 character(len=80) :: title(2)
 character(len=fnlen) :: tmpfil(7)
 character(len=fnlen) :: fname
 type(Cprj_type),allocatable :: Cprj_bz(:,:),Cprj_ibz(:,:)
 type(Little_group),pointer :: Ltg_q(:)
 type(Paw_an_type),allocatable :: Paw_an(:)
 type(Paw_ij_type),allocatable :: Paw_ij(:)
 type(Pawfgrtab_type),allocatable :: Pawfgrtab(:)
 type(Pawrhoij_type),allocatable :: Pawrhoij(:)

#undef HAVE_GW_CUTOFF
#if defined HAVE_GW_CUTOFF
 ! * Variables added for cutoffed matrix elements
 integer :: direction
 real(dp) :: width,z0
#endif

!************************************************************************

 DBG_ENTER("COLL")

 call timab(301,1,tsec) ! overall time
 call timab(302,1,tsec) ! screening(1)

 write(msg,'(6a)')&
& ' SCREENING: Calculation of the susceptibility and dielectric matrices ',ch10,ch10,&
& ' Based on a program developped by R.W. Godby, V. Olevano, G. Onida, and L. Reining.',ch10,&
& ' Incorporated in ABINIT by V. Olevano, G.-M. Rignanese, and M. Torrent.'
 call wrtout(ab_out, msg,'COLL')
 call wrtout(std_out,msg,'COLL')

#if defined HAVE_GW_DPC
 if (gwpc/=8) then
  write(msg,'(6a)')ch10,&
&  ' Number of bytes for double precision complex /=8 ',ch10,&
&  ' Cannot continue due to kind mismatch in BLAS library ',ch10,&
&  ' Some BLAS interfaces are not generated by abilint '
  MSG_ERROR(msg)
 end if
 write(msg,'(a,i2,a)')'.Using double precision arithmetic ; gwpc = ',gwpc,ch10
#else
 write(msg,'(a,i2,a)')'.Using single precision arithmetic ; gwpc = ',gwpc,ch10
#endif
 call wrtout(std_out,msg,'COLL')
 call wrtout(ab_out, msg,'COLL')
!
!=== Initialize MPI variables, and parallelization level ===
!* gwpara: 0--> sequential run, 1--> parallelism over k-points, 2--> parallelism over bands.
!* If gwpara==2, each node has both fully and partially occupied states while conduction bands are divided
 call xmpi_nproc(nprocs,ierr)
 if (nprocs==1) Dtset%gwpara=0
 MPI_enreg%gwpara     =Dtset%gwpara
 MPI_enreg%parareel   =0
 MPI_enreg%paralbd    =0
 MPI_enreg%paral_level=2 ! This means k-points but it is not used
 MPI_enreg%me_fft     =0
 MPI_enreg%nproc_fft  =1

 call xcomm_init  (MPI_enreg,spaceComm)
 call xmaster_init(MPI_enreg,master   )
 call xme_init    (MPI_enreg,rank     )

!* Fake MPI_type for the sequential part.
 call initmpi_seq(MPI_enreg_seq) 
!MPI_enreg_seq%nproc_fft=1 

!=== Create names for the temporary files based on Dtfil%filnam_ds(5) ===
!* localrdwf defines the IO locality:
!1--> Each node has access to files (default).
!0--> Only master has access.
!* accesswff defines the format of the output:
!1--> Plain Fortran file
!2--> Set all outputs to netcdf format (not implemented)
!3--> Set all outputs to ETSF format

 tmpfil(1)=TRIM(Dtfil%filnam_ds(5))//'_WF1'
 tmpfil(2)=TRIM(Dtfil%filnam_ds(5))//'_WF2'
 tmpfil(3)=TRIM(Dtfil%filnam_ds(5))//'_KG'
 tmpfil(4)=TRIM(Dtfil%filnam_ds(5))//'_DUM' !(real dummy name)
 tmpfil(6)=TRIM(Dtfil%filnam_ds(5))//'_YLM'
 tmpfil(7)=TRIM(Dtfil%filnam_ds(5))//'_PAW'
!For parallel case: the index of the processor must be appended.
 if (MPI_enreg%paral_compil_kpt==1) then
  call int2char4(MPI_enreg%me,tag)
  jj=1 ; if (MPI_enreg%paral_compil_mpio==1 .and. Dtset%accesswff==1) jj=3
  do ii=jj,7
   tmpfil(ii)=TRIM(tmpfil(ii))//'_P-'//tag
  end do
 end if

!=== Some variables need to be initialized/nullify at start ===
 call energies_init(KS_energies)
 usexcnhat=0

!Nullify the pointers in the data types.
 call nullify_Gpairs_type(Gpairs_q)

 call mkrdim(acell,rprim,rprimd)
 call metric(gmet,gprimd,ab_out,rmet,rprimd,ucvol)
!
!=== Define FFT grid(s) sizes ===
!* Be careful! This mesh is only used for densities and potentials. It is NOT the (usually coarser)
!GW FFT mesh employed for the oscillator matrix elements that is defined in setmesh.F90.
!See also NOTES in the comments at the beginning of this file.
!* NOTE: The mesh is defined in invars2m using ecutwfn, in GW Dtset%ecut is forced to be equal to Dtset%ecutwfn.

 k0(:)=zero
 call init_pawfgr(Dtset,k0,gmet,Pawfgr,mgfftf,nfftf,ecut_eff,ecutdg_eff,gsqcutc_eff,gsqcutf_eff,ngfftc,ngfftf)

 call print_ngfft(ngfftf,'FFT mesh for densities and potentials')
 nfftf_tot=PRODUCT(ngfftf(1:3))
!
!=== Open and read pseudopotential files ===
 call status(0,Dtfil%filstat,iexit,level,'call pspini   ')

 call pspini(Dtset,ecore,psp_gencond,gsqcutc_eff,gsqcutf_eff,level,Pawrad,Pawtab,Psps,rprimd)
 if (psp_gencond==1) call print_psps(Psps,std_out,0,'COLL')

!=== Initialize dimensions and basic objects ===
 call setup_screening(codvsn,acell,rprim,ngfftf,Dtset,Dtfil,Psps,Pawtab,MPI_enreg,&
& ngfft_gw,Hdr_kss,Hdr_local,Cryst,Kmesh,Qmesh,KS_BSt,Ltg_q,Gsph_epsG0,Gsph_wfn,Vcp,Ep)

 call print_ngfft(ngfft_gw,'FFT mesh for oscillator strengths')

 nfftgw_tot=PRODUCT(ngfft_gw(1:3))
 mgfftgw   =MAXVAL (ngfft_gw(1:3))
 nfftgw    =nfftgw_tot ! no FFT //

!TRYING TO RECREATE AN "ABINIT ENVIRONMENT"
 KS_energies%e_corepsp=ecore/Cryst%ucvol

!=====================================================
!=== Prepare the distribution of the wavefunctions ===
!=====================================================
!* If have_valence==true, valence and partially occupied are stored in Wf_val 
!while conduction bands are divided on each node and stored in Wf.
!This method is mandatory if gwpara==2 and/or we are using awtr==1 or the spectral method.
!* If awtr==1, we evaluate chi0 taking advantage of time-reversal (speed-up~2)
!* Useful indeces: 
!nbvw = Max. number of fully/partially occupied states over spin 
!nbcw = Max. number of unoccupied states considering the spin 
!TODO: Here for semiconducting systems we have to be sure that each processor has all the 
!states considered in the SCGW, moreover nbsc<nbvw
!TODO in case of SCGW vale and conduction has to be recalculated to avoid errors 
!if a metal becomes semiconductor or viceversa.
!TODO: Ideally nbvw should include only the states v such that the transition
!c-->v is taken into account in cchi0 (see GW_TOLDOCC). In the present implementation
!Wf_val contains all the states whose occupation is less than tol8. Due the the long
!tail of the smearing function it happens that a large number of states are allocated
!on each node. This facilitate the calculation of the density but it is a waste of memory
!and CPU time in cchi0. 
!Solution: change calc_density to perform the calculation in parallel also if gwpara==2
!wf_val should contain only the occupied states contributing to chi0

 allocate(ks_occ_idx(KS_BSt%nkpt,KS_BSt%nsppol))
 allocate(ks_vbik   (KS_BSt%nkpt,KS_BSt%nsppol))
 allocate(qp_vbik   (KS_BSt%nkpt,KS_BSt%nsppol))

 call update_occ(KS_BSt,Dtset%fixmom,prtvol=0)
 ks_occ_idx = get_occupied(KS_BSt,tol8) ! tol8 to be consistent when the density 
 ks_vbik    = get_valence_idx(KS_BSt)   ! is calculated locally using Wf_val without MPI.

!* Get ibocc, i.e Max occupied band index for each spin.
 ibocc(:)=MAXVAL(ks_occ_idx(:,:),DIM=1)
 deallocate(ks_occ_idx)

 have_valence=.FALSE. ; nbvw=0
!if (Ep%awtr==1.or.Dtset%spmeth/=0) then
 if (MPI_enreg%gwpara==2.or.Ep%awtr==1.or.Dtset%spmeth>0) then
  have_valence=.TRUE. 
  nbvw=MAXVAL(ibocc) 
  nbcw=Ep%nbnds-nbvw
  write(msg,'(4a,i5,2a,i5,2a,i5)')ch10,&
&  ' screening : taking advantage of time-reversal symmetry ',ch10,&
&  ' Maximum band index for partially occupied states nbvw = ',nbvw,ch10,&
&  ' Remaining bands to be divided among processors   nbcw = ',nbcw,ch10,&
&  ' Number of bands treated by each node ~',nbcw/nprocs
  call wrtout(ab_out,msg,'COLL')
  call wrtout(std_out,msg,'PERS')
  if (Cryst%timrev/=2) then 
   write(msg,'(a)')'Time-reversal cannot be used since Cryst%timrev/=2'
   MSG_ERROR(msg)
  end if
  if (MPI_enreg%gwpara/=0.and.MPI_enreg%gwpara/=2) then
   msg='Only sequential version or band parallelism are compatible with time-reversal trick or spectral method'
   MSG_ERROR(msg)
  end if
 end if

 my_minb=1 
 my_maxb=Ep%nbnds ; my_nbnds=my_maxb-my_minb+1
 allocate(MPI_enreg%proc_distrb(Ep%nkibz,Ep%nbnds,Ep%nsppol))
 MPI_enreg%proc_distrb(:,:,:)=rank

 if (MPI_enreg%gwpara==2) then
  write(msg,'(2a)')ch10,' loop over bands done in parallel (assuming time reversal!)'
  call wrtout(std_out,msg,'COLL')

  MPI_enreg%proc_distrb = -999
  allocate(istart(nprocs),istop(nprocs))

  if (Dtset%gwcomp==0) then
!  * No completeness trick, each proc has fully and partially occupied states.
   MPI_enreg%proc_distrb(:,1:nbvw,:)=rank
   call split_work2(nbcw,nprocs,istart,istop)
   my_minb=nbvw+istart(rank+1) 
   my_maxb=nbvw+istop (rank+1)
   do irank=0,nprocs-1
    MPI_enreg%proc_distrb(:,nbvw+istart(irank+1):nbvw+istop(irank+1),:)=irank
   end do
  else
!  * Use completeness trick, divide entire set of states.
   call split_work2(Ep%nbnds,nprocs,istart,istop)
   my_minb=istart(rank+1) 
   my_maxb=istop (rank+1)
   do irank=0,nprocs-1
    MPI_enreg%proc_distrb(:,istart(irank+1):istop(irank+1),:)=irank
   end do
  end if

  my_nbnds=my_maxb-my_minb+1
  if (my_nbnds<=0) then
   write(msg,'(3a,2(i4,a),a)')&
&   ' One or more processors has zero number of bands ',ch10,&
&   ' my_minb= ',my_minb,' my_maxb= ',my_maxb,ch10,&
&   ' This is a waste, decrease the number of processors. '
   MSG_ERROR(msg)
  end if
  deallocate(istart,istop)
! * Announce the treatment of bands by each node.
  do irank=0,nprocs-1
   if (irank==rank) then
    write(msg,'(4(a,i4))')&
&    ' treating ',my_nbnds,' bands from ',my_minb,' up to ',my_maxb,' by node ',irank
    call wrtout(std_out,msg,'PERS')
   end if
  end do
 end if

!==========================
!=== PAW initialization ===
!==========================
 allocate(Cprj_ibz(Cryst%natom,Dtset%nspinor*Ep%nbnds*Ep%nkibz*Ep%nsppol*Dtset%usepaw), STAT=istat)
 if (istat/=0) then
  MSG_ERROR('out-of-memory in Cprj_ibz')
 end if

 if (Dtset%usepaw==1) then

  call chkpawovlp(Cryst%natom,Cryst%ntypat,Dtset%pawovlp,Pawtab,Cryst%rmet,Cryst%typat,Cryst%xred)
  allocate(dimlmn(Cryst%natom))
  do iat=1,Cryst%natom
   dimlmn(iat)=Pawtab(Cryst%typat(iat))%lmn_size
  end do

  call cprj_alloc(Cprj_ibz,0,dimlmn)
  allocate(Pawrhoij(Cryst%natom),nlmn(Cryst%ntypat))
  do itypat=1,Cryst%ntypat
   nlmn(itypat)=Pawtab(itypat)%lmn_size
  end do
  nspden_rhoij=Dtset%nspden ; if (Dtset%pawspnorb>0.and.Dtset%nspinor==2) nspden_rhoij=4
  call rhoij_alloc(Dtset%pawcpxocc,nlmn,nspden_rhoij,Dtset%nsppol,Pawrhoij,Cryst%typat)
  deallocate(nlmn)
! 
! === Initialize values for several basic arrays stored in Pawinit ===
! TODO Check pawxcdev>2 since gaunt coefficients are allocated with different sizes
  if (psp_gencond==1.or.&
&  paw_gencond(1)/=Dtset%pawlcutd .or.paw_gencond(2)/=Dtset%pawlmix  .or.&
&  paw_gencond(3)/=Dtset%pawnphi  .or.paw_gencond(4)/=Dtset%pawntheta.or.&
&  paw_gencond(5)/=Dtset%pawspnorb.or.paw_gencond(6)/=Dtset%pawxcdev) then

   call timab(553,1,tsec)
   diecut_eff_dum=ABS(Dtset%diecut)*Dtset%dilatmx**2

   call pawinit(diecut_eff_dum,Psps%indlmn,Dtset%pawlcutd,Dtset%pawlmix,Psps%lmnmax,Psps%mpsang,&
&   Dtset%pawnphi,Cryst%nsym,Dtset%pawntheta,Cryst%ntypat,Pawang,Pawrad,Dtset%pawspnorb,Pawtab,Dtset%pawxcdev)

   paw_gencond(1)=Dtset%pawlcutd  ; paw_gencond(2)=Dtset%pawlmix
   paw_gencond(3)=Dtset%pawnphi   ; paw_gencond(4)=Dtset%pawntheta
   paw_gencond(5)=Dtset%pawspnorb ; paw_gencond(6)=Dtset%pawxcdev
   call timab(553,2,tsec)
  end if
  Psps%n1xccc=MAXVAL(Pawtab(1:Cryst%ntypat)%usetcore)

! Initialize optional flags in Pawtab to zero 
! (Cannot be done in Pawinit since the routine is called only if some pars. are changed)
  Pawtab(:)%has_nabla = 0 
  Pawtab(:)%usepawu   = 0
  Pawtab(:)%useexexch = 0
  Pawtab(:)%exchmix   =zero

! * Evaluate <phi_i|nabla|phi_j>-<tphi_i|nabla|tphi_j> for the long wavelength limit.
! TODO solve problem with memory leak
  call pawnabla_init(Psps%mpsang,Psps%lmnmax,Cryst%ntypat,Psps%indlmn,Pawrad,Pawtab)

! FIXME see above. Note that here nsym_old comes from KSS.
! if (psp_gencond==1.or.nsym_old/=Cryst%nsym) then
  call setsymrhoij(gprimd,Pawang%l_max-1,Cryst%nsym,Dtset%pawprtvol,rprimd,Cryst%symafm,Cryst%symrec,Pawang%zarot)
  nsym_old=Cryst%nsym
! end if

! * Initialize and compute data for LDA+U.
  if (Dtset%usepawu>0.or.Dtset%useexexch>0) then
   call pawpuxinit(Dtset%dmatpuopt,Dtset%exchmix,Dtset%jpawu,Dtset%lexexch,Dtset%lpawu,&
&   Psps%indlmn,Psps%lmnmax,Cryst%ntypat,Pawang,Dtset%pawprtvol,Pawrad,Pawtab,Dtset%upawu,&
&   Dtset%useexexch,Dtset%usepawu)

   if (Dtset%useexexch>0) then 
    msg='Local exact exchanged not yet implemented in GW'
    MSG_ERROR(msg)
   end if
  end if

  if (rank==master) call print_pawtab(Pawtab)

! === Eventually open temporary storage file ===
! FIXME also mkmem_ not yet defined
! if (mkmem_==0) then
! open(Dtfil%unpaw,file=tmpfil(7),form='unformatted',status='unknown')
! rewind(unit=Dtfil%unpaw)
! end if

! === Get Pawrhoij from the header of the KSS file ===
  call rhoij_copy(Hdr_kss%Pawrhoij,Pawrhoij)

! === Re-symmetrize symrhoij ===
! this call leads to a SIGFAULT, likely some pointer is not initialized correctly
  choice=1 ; optrhoij=1 ; ipert=0 ; idir=0
! call symrhoij(choice,Cryst%gprimd,Psps%indlmn,Cryst%indsym,ipert,Psps%lmnmax,Cryst%natom,Cryst%natom,Cryst%nsym,&
! &  Cryst%ntypat,optrhoij,Pawang,Dtset%pawprtvol,Pawrhoij,Cryst%rprimd,Cryst%symafm,Cryst%symrec,Cryst%typat)
! 
! === Evaluate form factors for the radial part of phi.phj-tphi.tphj ===
! rhoxsp_method=1
  rhoxsp_method=2
  if (Dtset%userie==1) rhoxsp_method=1
  if (Dtset%userie==2) rhoxsp_method=2
  if (rhoxsp_method==1) then ! Arnaud-Alouani
   dim1_rhox=2*(Psps%mpsang-1)
   dim2_rhox=Psps%lnmax*(Psps%lnmax+1)/2
  else if (rhoxsp_method==2) then  ! Shiskin-Kresse
   dim1_rhox=MAXVAL(Pawtab(:)%l_size)**2
   dim2_rhox=MAXVAL(pawtab(:)%lmn2_size)
  end if
  allocate(pawrhox_spl(Psps%mqgrid_ff,2,0:dim1_rhox,dim2_rhox,Cryst%ntypat))

  call paw_mkrhox_spl(Cryst%ntypat,Psps,Pawrad,Pawtab,pawrhox_spl,rhoxsp_method,dim1_rhox,dim2_rhox)
! 
! === Variables/arrays related to the fine FFT grid ===
  allocate(nhat(nfftf,Dtset%nspden))
  nhat(:,:)=zero ; cplex=1
  allocate(Pawfgrtab(Cryst%natom),l_size_atm(Cryst%natom))
  do iat=1,Cryst%natom
   l_size_atm(iat)=Pawtab(Cryst%typat(iat))%l_size
  end do
  call pawfgrtab_init(Pawfgrtab,cplex,l_size_atm,Dtset%nspden)
  deallocate(l_size_atm)
  compch_fft=greatest_real
  usexcnhat=MAXVAL(Pawtab(:)%vlocopt)
! * 0 --> Vloc in atomic data is Vbare    (Blochl s formulation)
! * 1 --> Vloc in atomic data is VH(tnzc) (Kresse s formulation)
  write(msg,'(a,i3)')' screening : using usexcnhat = ',usexcnhat
  call wrtout(std_out,msg,'COLL')
! 
! === Identify parts of the rectangular grid where the density has to be calculated ===
  call status(0,Dtfil%filstat,iexit,level,'call nhatgrid ')

  optgr0=Dtset%pawstgylm ; optgr1=0 ; optgr2=0 ; optrad=1-Dtset%pawstgylm
  if (Dtset%xclevel==2.and.usexcnhat>0) optgr1=Dtset%pawstgylm

  call nhatgrid(Cryst%atindx1,gmet,MPI_enreg_seq,Cryst%natom,Cryst%nattyp,nfftf,ngfftf,Cryst%ntypat,&
&  optgr0,optgr1,optgr2,optrad,Pawfgrtab,Pawtab,Cryst%rprimd,Cryst%typat,Cryst%ucvol,Cryst%xred)

 end if ! End of PAW initialization.

!Allocate these arrays anyway, since they are passed to subroutines.
 if (.not.allocated(pawrhox_spl)) allocate(pawrhox_spl(1,1,1,1,0))  
 if (.not.allocated(nhat       )) allocate(nhat(nfftf,0)) 

!==================================================
!==== Read KS band structure from the KSS file ====
!==================================================

!=== Initialize the Wf_info object ===
!* Allocate %wfg and if required, also %wfr.
 call nullify_Wfs(Wf_val)

 call init_Wfs(Wf,Dtset%gwmem,Dtset%paral_kgb,Ep%npwwfn,my_minb,my_maxb,Ep%nkibz,&
& Dtset%nsppol,Dtset%nspden,Dtset%nspinor,ngfft_gw,Gsph_wfn%gvec,MPI_enreg)
!
!=== Allocate Kleynmann-Bylander form factors, derivatives and sign ===
!* PAW does not need vkb 
!* In case of partial calculation, allocate stuff only if gamma is in the list.
!TODO all this stuff should be encapsulated in an object.
 has_qeq0=.TRUE.
 if (Ep%nqcalc/=Ep%nqibz) then
  do iqcalc=1,Ep%nqcalc
   has_qeq0=(normv(Ep%qcalc(:,iqcalc),gmet,'G')<GW_TOLQ0)
   if (has_qeq0) EXIT
  end do
 end if

 vkb_dim=0 ; if (Dtset%usepaw==0.and.Ep%inclvkb/=0.and.has_qeq0) vkb_dim=1
 allocate(vkb (Ep%npwwfn,Cryst%ntypat,Psps%mpsang,Ep%nkibz*vkb_dim),STAT=istat)
 allocate(vkbd(Ep%npwwfn,Cryst%ntypat,Psps%mpsang,Ep%nkibz*vkb_dim),STAT=istat)
 allocate(vkbsign(Psps%mpsang,Cryst%ntypat*vkb_dim))

 allocate(gvec(3,Ep%npwvec))
 allocate(dummy_ene(Ep%nkibz,Ep%nbnds,Ep%nsppol)) ; dummy_ene(:,:,:)=zero
 allocate(dummy_occ(Ep%nkibz,Ep%nbnds,Ep%nsppol)) ; dummy_occ(:,:,:)=zero

 if (have_valence) then 
! * Store occupied and empty states in two different arrays to use faster equation based 
! on time reversal symmetry or spectral method. Note that if nsppol==2 than wfg_val 
! might also contain unoccupied states, this case is treated inside cchi0 and cchi0q0
! TODO gwpara==2 should be the default.
! * accesswf to use plain Fortran-IO or ETSF-IO.
  call init_Wfs(Wf_val,Dtset%gwmem,Dtset%paral_kgb,Ep%npwwfn,1,nbvw,Ep%nkibz,&
&  Ep%nsppol,Dtset%nspden,Dtset%nspinor,ngfft_gw,Gsph_wfn%gvec,MPI_enreg)

  call rdkss(Dtfil,Dtset%usepaw,Pawtab,Cryst%nsym,Ep%nbnds,nbvw,Ep%nkibz,Ep%npwvec,Dtset%nspinor,Ep%nsppol,Ep%npwwfn,&
&  gvec,dummy_ene,dummy_occ,Wf%wfg,Cprj_ibz,Cryst%ntypat,Cryst%natom,Psps%mpsang,vkb_dim,vkbsign,vkb,vkbd,nelect,&
&  MPI_enreg,my_minb,my_maxb,Dtset%accesswff,Dtset%localrdwf,Dtset%prtvol,wfgval_out=Wf_val%wfg)
 else 
! * Old implementation: store all the wavefunctions in a unique arrays
! * It can be used if the system does not present time reversal symmetry 
  call rdkss(Dtfil,Dtset%usepaw,Pawtab,Cryst%nsym,Ep%nbnds,nbvw,Ep%nkibz,Ep%npwvec,Dtset%nspinor,Ep%nsppol,Ep%npwwfn,&
&  gvec,dummy_ene,dummy_occ,Wf%wfg,Cprj_ibz,Cryst%ntypat,Cryst%natom,Psps%mpsang,vkb_dim,vkbsign,vkb,vkbd,nelect,&
&  MPI_enreg,my_minb,my_maxb,Dtset%accesswff,Dtset%localrdwf,Dtset%prtvol) 
 end if 

 deallocate(dummy_ene,dummy_occ)
 if (Cryst%nsym/=Dtset%nsym .and. Dtset%usepaw==1) stop 'Cryst%nsym/=Dtset%nsym, check pawinit and symrhoij'

!=== Define the G-G0 shift for the FFT of the oscillators ===
!* Ep%mG0 gives the MAX G0 component to account for umklapps.
!* Note that we use Ep%npwepG0 to dimension.
!TODO Should take into account possible FFT //. 
 nG01d = 2*Ep%mG0(1)+1 
 nG02d = 2*Ep%mG0(2)+1 
 nG03d = 2*Ep%mG0(3)+1

 allocate(igfft(Ep%npwepG0,nG01d,nG02d,nG03d))
 call cigfft(Ep%mG0,Ep%npwepG0,ngfft_gw,gvec,igfft,ierr)

 if (ierr/=0) then
  write(msg,'(a,i4,3a)')&
&  ' Found ',ierr,' G-G0 vectors falling outside the FFT box. ',ch10,&
&  ' This is not allowed. Check mG0 and the setup of the FFT mesh. '
  MSG_ERROR(msg)
 end if
!
!=== Get the FFT index of $ (R^{-1}(r-\tau)) $ === 
!* S= $\transpose R^{-1}$ and k_BZ = S k_IBZ
!* irottb is the FFT index of $ R^{-1} (r-\tau) $ used to symmetrize u_Sk.
 allocate(irottb(nfftgw,Cryst%nsym))
 call rotate_FFT_mesh(Cryst,ngfft_gw,irottb,iscompatibleFFT)

 allocate(ktabr(nfftgw,Kmesh%nbz))
 do ikbz=1,Kmesh%nbz
  isym=Kmesh%tabo(ikbz)
  do ifft=1,nfftgw
   ktabr(ifft,ikbz)=irottb(ifft,isym)
  end do
 end do
 deallocate(irottb)

!=== Compute structure factor phases and large sphere cut-off ===
!WARNING cannot use Dtset%mgfft, this has to be checked better
!mgfft=MAXVAL(ngfftc(:))
!allocate(ph1d(2,3*(2*mgfft+1)*Cryst%natom),ph1df(2,3*(2*mgfftf+1)*Cryst%natom))
 write(std_out,*)' CHECK ',Dtset%mgfftdg,mgfftf
 if (Dtset%mgfftdg/=mgfftf) then
  write(std_out,*)"WARNING Dtset%mgfftf /= mgfftf"
! write(std_out,*)'HACKING Dtset%mgfftf'
! Dtset%mgfftdg=mgfftf
 end if
 allocate(ph1d(2,3*(2*Dtset%mgfft+1)*Cryst%natom),ph1df(2,3*(2*mgfftf+1)*Cryst%natom))
 call status(0,Dtfil%filstat,iexit,level,'call getph    ')
 call getph(Cryst%atindx,Cryst%natom,ngfftc(1),ngfftc(2),ngfftc(3),ph1d,Cryst%xred)

 if (Psps%usepaw==1.and.Pawfgr%usefinegrid==1) then
  call getph(Cryst%atindx,Cryst%natom,ngfftf(1),ngfftf(2),ngfftf(3),ph1df,Cryst%xred)
 else
  ph1df(:,:)=ph1d(:,:)
 end if

!=== Symmetrize <Proj_i|Cnk> to have them in the Full Brillouin zone ===
 allocate(Cprj_bz(Cryst%natom,Dtset%nspinor*Ep%nbnds*Kmesh%nbz*Ep%nsppol*Dtset%usepaw))
 if (Dtset%usepaw==1) then
  call cprj_alloc(Cprj_bz,0,dimlmn)
! TODO add rotation in spinor space
  call paw_symcprj(Pawtab,Cryst,Dtset%nspinor,Ep%nbnds,KS_BSt%nband,Dtset%nsppol,Psps,Kmesh,Cprj_ibz,Pawang,dimlmn,Cprj_bz)
 end if

 call timab(302,2,tsec) ! screening(1)

 call timab(303,1,tsec) ! screening(fftwfn
 call timab(303,2,tsec)

!=== Initialize QP_BSt using KS bands === 
!* In case of SCGW, update QP_BSt using the QPS file.
 call copy_bandstructure(KS_BSt,QP_BSt)

!=== Self-consistent GW === 
!* Read Unitary transformation and QP energies.
!* Calculate new density.
 if (Ep%gwcalctyp>=10) then
  call timab(304,1,tsec) ! KS => QP; [wfrg]

! * Initialize with KS eigenvalues and eigenfunctions.
  allocate(m_lda_to_qp(Ep%nbnds,Ep%nbnds,Kmesh%nibz,Ep%nsppol))
  m_lda_to_qp = czero
  do iband=1,Ep%nbnds
   m_lda_to_qp(iband,iband,:,:) = cone
  end do

! TODO switch on the renormalization of n in screening
! QPS should report bdgw
  allocate(rhor_p(nfftf,Dtset%nspden))
  call rdqps(QP_BSt,Dtfil%filqps,Dtset%usepaw,Dtset%nspden,1,nscf,Dtset%prtvol,&
&  nfftf,ngfftf,Cryst%ucvol,Dtset%paral_kgb,MPI_enreg_seq,nbsc,m_lda_to_qp,rhor_p)
  deallocate(rhor_p)

#if 0
! === Report the unitary transformation on ab_out for automatic testing ===
! * Note the different tolerance used for ab_out and std_out aiming at 
! improving the portability of the automatic tests.
! * Only a subset of k-points is reported on ab_out.
  if (rank==master) then
   call show_QP(QP_BSt,m_lda_to_qp,fromb=Sp%minbdgw,tob=Sp%maxbdgw,unit=std_out,tolmat=0.001_dp)
!  if (prtvol>0) 
!  allocate(kmask(BSt%nkpt)) ; kmask=.FALSE. ; kmask(1:MIN(BSt%nkpt,2))=.TRUE.
!  kmask=.TRUE.
!  call show_QP(BSt,m_lda_to_qp,fromb=Sp%minbdgw,tob=Sp%maxbdgw,unit=ab_out,tolmat=0.01_dp)
!  deallocate(kmask)
!  end if
  end if
#endif

! FIXME this is to preserve the old implementation for the head and the wings in ccchi0q0
! But has to be rationalized
  KS_BSt%eig=QP_BSt%eig

! * Get new occ. factors and fermi level.
  call update_occ(QP_BSt,Dtset%fixmom)
  qp_vbik(:,:) = get_valence_idx(QP_BSt)
! 
! === Update only the wfg treated with GW ===
! * If iteration no. 0 skip tranformation.
! TODO rewrite this part, using nbsc should be faster
  if (nscf/=0) then
   if (.not.have_valence) then 
!   All bands on each processor
    call calc_wf_qp(MPI_enreg,Kmesh%nibz,Ep%nbnds,Wf%npwwfn,Ep%nsppol,Dtset%nspinor,&
&    m_lda_to_qp,my_minb,my_maxb,1,nbsc,Wf%wfg)
    call reinit_Wfs(Wf)
   else
    call calc_wf_qp_Wfval(MPI_enreg,Kmesh%nibz,Ep%nbnds,Wf%npwwfn,Ep%nsppol,Dtset%nspinor,&
&    m_lda_to_qp,my_minb,my_maxb,1,nbsc,Wf%wfg,nbvw,Wf_val%wfg)
    call reinit_Wfs(Wf    )
    call reinit_Wfs(Wf_val)
   end if

!  === For PAW update and re-symmetrize cprj in the full BZ ===
   if (Dtset%usepaw==1) then
    call update_cprj(Cryst%natom,Kmesh%nibz,Ep%nbnds,Ep%nsppol,Dtset%nspinor,m_lda_to_qp,dimlmn,Cprj_ibz)
!   TODO add rotation in spinor space
    call paw_symcprj(Pawtab,Cryst,Dtset%nspinor,Ep%nbnds,KS_BSt%nband,&
&    Dtset%nsppol,Psps,Kmesh,Cprj_ibz,Pawang,dimlmn,Cprj_bz)
   end if
  end if ! nscf/=0

  deallocate(m_lda_to_qp)
  call timab(304,2,tsec)
 end if ! gwcalctyp>=10

 call timab(305,1,tsec) ! screening(2)
!
!=== In case update the eigenvalues ===
!* Either use a scissor operator or an external file.
 inquire(file='__in.gw__',exist=update_energies)
 if (Ep%soenergy>0.1d-4) then
  write(msg,'(5a,f7.3,a)')&
&  ' screening : performing a first self-consistency',ch10,&
&  ' update of the energies in W by a scissor operator',ch10,&
&  ' applying a scissor operator of [eV] : ',Ep%soenergy*Ha_eV,ch10
  call wrtout(std_out,msg,'COLL') ; call wrtout(ab_out,msg,'COLL')
  call apply_scissor(QP_BSt,Ep%soenergy)
 else if (update_energies) then
  write(msg,'(4a)')&
&  ' screening : performing a first self-consistency',ch10,&
&  ' update of the energies in W by a previous GW calculation',ch10
  call wrtout(std_out,msg,'COLL') ; call wrtout(ab_out,msg,'COLL')
  call rdgw(QP_BSt,'__in.gw__',QP_BSt%eig)
  call update_occ(QP_BSt,Dtset%fixmom)
 end if

#if defined HAVE_GW_CUTOFF
!Here we should check that 0<= z0 and d<= 1
!MG It wont work in case of gwpara==2
 if (.FALSE.) then
  z0 = dtset%userra
  width  = dtset%userrb
  direction = dtset%useria
  call cutoff_m_elem(Ep,Kmesh,gvec,Cryst%symrec,Wf,KS_BSt%eig,QP_BSt%eig,z0,width,KS_BSt%occ,direction,gprimd)
  return
 end if
#endif
!
!========================
!=== COMPUTE DENSITY ====
!========================
!* Evaluate PW part (complete charge in case of NC pseudos)
!TODO this part has to be rewritten. If I decrease the tol on the occupations 
!I have to code some MPI stuff also if have_valence==.TRUE.

!*Tables for the dense FFT mesh used for rhor ===
 allocate(igfftf(Wf%npwwfn),mask(Wf%npwwfn))
 call kgindex(igfftf,gvec,mask,MPI_enreg_seq,ngfftf,Wf%npwwfn)
 if (.not.ALL(mask)) then 
  MSG_ERROR("FFT para not yet implemented")
 end if
 deallocate(mask)
!TODO deallocate gvec, everything should be passed through objects

 allocate(rhor(nfftf,Dtset%nspden))
 if (have_valence) then ! Valence states are stored on each proc thus use_MPI==.FALSE.
  call calc_density(Wf_val,ngfftf,nfftf,igfftf,Cryst,Kmesh,QP_BSt,MPI_enreg,.FALSE.,rhor)
 else                   ! Each proc has the full set of WFR.
  call calc_density(Wf,ngfftf,nfftf,igfftf,Cryst,Kmesh,QP_BSt,MPI_enreg,.FALSE.,rhor)
 end if

 deallocate(igfftf)

!TODO this has to be done in a better way, moreover wont work for PAW
 call cutoff_density(ngfftf,Dtset%nspden,Dtset%nsppol,Vcp,rhor,MPI_enreg)
!
!=== Additional computation for PAW ===
 if (Dtset%usepaw==1) then
! * Add the compensation charge to the PW density.
  nhatgrdim=0 ; if (Dtset%xclevel==2) nhatgrdim=usexcnhat*Dtset%pawnhatxc
  cplex=1 ; ider=2*nhatgrdim ; izero=0
  if (nhatgrdim>0) allocate(nhatgr(nfftf,Dtset%nspden,3))
  call pawmknhat(compch_fft,cplex,ider,idir,ipert,izero,Cryst%gprimd,MPI_enreg_seq,&
&  Cryst%natom,nfftf,ngfftf,nhatgrdim,Dtset%nspden,Cryst%ntypat,Dtset%paral_kgb,Pawang,&
&  Pawfgrtab,nhatgr,nhat,Pawrhoij,Pawrhoij,Pawtab,k0,Cryst%rprimd,Cryst%typat,Cryst%ucvol)
! 
! === Evaluate onsite energies, potentials, densities ===
! * Initialize variables/arrays related to the PAW spheres.
! * Initialize also lmselect (index of non-zero LM-moments of densities).
! TODO call init_paw_ij in scfcv and respfn, fix small issues
  cplex=1 ; cplex_dij=Dtset%nspinor
  allocate(Paw_ij(Cryst%natom))
  call nullify_paw_ij(Paw_ij)
  call init_paw_ij(Paw_ij,cplex,cplex_dij,Dtset%nspinor,Dtset%nsppol,&
&  Dtset%nspden,Dtset%pawspnorb,Cryst%natom,Cryst%ntypat,Cryst%typat,Pawtab,has_dijhartree=1)

  allocate(Paw_an(Cryst%natom))
  call nullify_paw_an(Paw_an)
  nkxc1=0
  call init_paw_an(Cryst%natom,Cryst%ntypat,nkxc1,Dtset%nspden,cplex,Dtset%pawxcdev,&
&  Cryst%typat,Pawang,Pawtab,Paw_an,has_vxc=1,has_vxcval=0)

  call status(0,Dtfil%filstat,iexit,level,'call pawdenpot')

  nzlmopt=-1 ; option=0 ; compch_sph=greatest_real
  call pawdenpot(compch_sph,KS_energies%e_paw,KS_energies%e_pawdc,idir,ipert,Dtset%ixc,Cryst%natom,Dtset%nspden,&
&  Cryst%ntypat,nzlmopt,option,Paw_an,Paw_an,Paw_ij,Pawang,Dtset%pawprtvol,Pawrad,Pawrhoij,Dtset%pawspnorb,&
&  Pawtab,Dtset%pawxcdev,dtset%spnorbscl,Cryst%typat,Dtset%xclevel,Psps%znuclpsp)
 end if

 if (.not.allocated(nhatgr)) allocate(nhatgr(nfftf,Dtset%nspden,0))

 call test_charge(nfftf,KS_BSt%nelect,Dtset%nspden,rhor,ucvol,nhat,Dtset%usepaw,&
& usexcnhat,Pawfgr%usefinegrid,compch_sph,compch_fft,omegaplasma)

!* For PAW, add the compensation charge the FFT mesh, then get rho(G).
 if (Dtset%usepaw==1) rhor(:,:)=rhor(:,:)+nhat(:,:)
 call prtrhomxmn(std_out,MPI_enreg,nfftf,ngfftf,Dtset%nspden,1,rhor)

 allocate(rhog(2,nfftf))
 call fourdp(1,rhog,rhor(:,1),-1,MPI_enreg,nfftf,ngfftf,Dtset%paral_kgb,tim_fourdp)

!The following steps have been gathered in the setvtr routine:
!- get Ewald energy and Ewald forces
!- compute local ionic pseudopotential vpsp
!- eventually compute 3D core electron density xccc3d
!- eventually compute vxc and vhartr
!- set up ks_vtrial
!**************************************************************
!**** NOTE THAT Vxc CONTAINS THE CORE-DENSITY CONTRIBUTION ****
!**************************************************************

 call status(0,Dtfil%filstat,iexit,level,'call setvtr   ')

 allocate(grewtn(3,Cryst%natom),xred_dummy(3,Cryst%natom)) ; xred_dummy=Cryst%xred
 nkxc=0
 if (Dtset%nspden==1) nkxc=2
 if (Dtset%nspden>=2) nkxc=3 ! check GGA and spinor that is messy !!!
 allocate(kxc(nfftf,nkxc))

 n3xccc=0 ; if (Psps%n1xccc/=0) n3xccc=nfftf
 allocate(xccc3d(n3xccc))
 allocate(ks_vhartr(nfftf),ks_vtrial(nfftf,Dtset%nspden),vpsp(nfftf),ks_vxc(nfftf,Dtset%nspden))

 optene=4 ; moved_atm_inside=0 ; moved_rhor=0 ; initialized=1 ; istep=1
 call setvtr(Cryst%atindx1,Dtset,KS_energies,Cryst%gmet,Cryst%gprimd,grewtn,gsqcutf_eff,initialized,istep,kxc,mgfftf,&
& moved_atm_inside,moved_rhor,MPI_enreg_seq,Cryst%nattyp,nfftf,ngfftf,nhat,nhatgr,nhatgrdim,nkxc,Cryst%ntypat,&
& Psps%n1xccc,n3xccc,optene,Pawtab,ph1df,Psps,rhog,rhor,Cryst%rmet,Cryst%rprimd,strsxc,Cryst%ucvol,usexcnhat,&
& ks_vhartr,vpsp,ks_vtrial,ks_vxc,vxcavg,xccc3d,xred_dummy,Cryst%xred)
!FIXME here xred is INOUT due to ionion_realSpace and xredcart!

 deallocate(grewtn,xred_dummy)
 deallocate(xccc3d,STAT=istat)

!TODO this has to be checked
!call prtene(Dtset,KS_energies,std_out,Dtset%usepaw)

!=== For PAW, compute pseudopotential strengths Dij (unsymmetrized quantities!) ===
 if (Dtset%usepaw==1) then
  call status(0,Dtfil%filstat,iexit,level,'call pawdij   ')

  cplex=1 ; ipert=0 ; idir=0
  call pawdij(cplex,Dtset,Dtset%enunit,idir,ipert,MPI_enreg_seq,Cryst%natom,nfftf,ngfftf,Dtset%nspden,Cryst%ntypat,&
&  Paw_an,Paw_ij,Pawang,Pawfgrtab,Dtset%pawprtvol,Pawrad,Dtset%pawspnorb,Pawtab,Dtset%pawxcdev,&
&  Cryst%typat,Cryst%ucvol,ks_vtrial,ks_vxc)

  call status(0,Dtfil%filstat,iexit,level,'call symdij   ')

  call symdij(Cryst%gprimd,Psps%indlmn,Cryst%indsym,ipert,Psps%lmnmax,Cryst%natom,Cryst%nsym,Cryst%ntypat,0,Paw_ij,Pawang,&
&  Dtset%pawprtvol,Cryst%rprimd,Cryst%symafm,Cryst%symrec,Cryst%typat)

! * Output of the pseudopotential strengths Dij and the augmentation occupancies Rhoij.
  call pawprt(Dtset,Psps%indlmn,Psps%lmnmax,Paw_ij,Pawrhoij,Pawtab)
 end if !PAW
!
!=== Calculate the frequency mesh ===
!* First omega is always zero without broadening.
!FIXME what about metals? I think we should add eta, this means we need to know if
!the system is metallic, for example using occopt
 allocate(Ep%omega(Ep%nomega))
 domegareal=Ep%omegaermax/(Ep%nomegaer-1) !step along real axis, might be 0
 Ep%omega(1)=czero
 do iomega=2,Ep%nomegaer
  Ep%omega(iomega)=CMPLX((iomega-1)*domegareal,zero,kind=dpc)
 end do
 if (Ep%plasmon_pole_model.and.Ep%nomega==2) then
  e0=Dtset%ppmfrq ; if (e0<0.1d-4) e0=omegaplasma
  Ep%omega(2)=CMPLX(zero,e0,kind=dpc)
 end if

!=== For AC, use Gauss-Legendre quadrature method === 
!* Replace $ \int_0^\infty dx f(x) $ with $ \int_0^1 dz f(1/z - 1)/z^2 $.
!* Note that the grid is not log as required by CD, so we cannot use the same SCR file.
 if (Ep%analytic_continuation) then
  allocate(z(Ep%nomegaei),zw(Ep%nomegaei)) ! knots and weights for AC
  call coeffs_gausslegint(zero,one,z,zw,Ep%nomegaei)
  do iomega=1,Ep%nomegaei
   Ep%omega(Ep%nomegaer+iomega)=CMPLX(zero,one/z(iomega)-one,kind=dpc)
  end do
  deallocate(z,zw)
 else if (Ep%contour_deformation) then
  e0=Dtset%ppmfrq ; if (e0<0.1d-4) e0=omegaplasma
  do iomega=1,Ep%nomegaei
   Ep%omega(Ep%nomegaer+iomega)=CMPLX(zero,e0/three*(EXP(two/(Ep%nomegaei+1)*LOG(four)*iomega)-one),kind=dpc)
  end do
 end if
!
!* Report frequency mesh for chi0.
 write(msg,'(2a)')ch10,' calculating chi0 at frequencies [eV] :'
 call wrtout(std_out,msg,'COLL')
 call wrtout(ab_out,msg,'COLL')
 do iomega=1,Ep%nomega
  write(msg,'(i3,2es16.6)')iomega,Ep%omega(iomega)*Ha_eV
  call wrtout(std_out,msg,'COLL')
  call wrtout(ab_out,msg,'COLL')
 end do

!* Allocate chi0, wings and array for chi0sumrule check.
 allocate(chi0sumrule(Ep%npwe))

 
 

 allocate(chi0(Ep%npwe*Ep%nI,Ep%npwe*Ep%nJ,Ep%nomega),STAT=istat)
 if (istat/=0) then 
  write(msg,'(a,f16.1)')'out of memory in chi0, size= ',Ep%npwe**2*Ep%nI*Ep%nJ*Ep%nomega
  MSG_ERROR(msg)
 end if

!=== Open file to write independent matrix elements of \epsilon^-1 ===
!if (Dtset%prtvol==10) then
 if (rank==master) then
  filnam=TRIM(Dtfil%filnam_ds(4))//'_EM1'
  call isfile(filnam,'new')
  open(unem1ggp,file=filnam,status='unknown',form='formatted')
 end if
!end if
 call timab(305,2,tsec) ! screening(2)
!
!>>>>>>>>>>>>>>>>>>>>>>>>>>>>>> END OF THE INITIALIZATION PART <<<<<<<<<<<<<<<<<<<<<<<<<<<
!
 call print_Wfs(Wf,mode_paral='PERS')
 if (have_valence) then 
  call print_Wfs(Wf_val,mode_paral='PERS')
 end if
!
!=== Loop over q-points. Calculate \epsilon^{-1} and save on disc ===
 do iqibz=1,Qmesh%nibz
  call timab(306,1,tsec) !loop1
  is_first_qcalc=(iqibz==1)

! * Selective q-point calculation.
  found=.FALSE. ; label=iqibz
  if (Ep%nqcalc/=Ep%nqibz) then
   do iqcalc=1,Ep%nqcalc
    qtmp(:)=Qmesh%ibz(:,iqibz)-Ep%qcalc(:,iqcalc)
    found=(normv(qtmp,gmet,'G')<GW_TOLQ)
    if (found) then
     label=iqcalc ; EXIT !iqcalc
    end if
   end do
   if (.not.found) CYCLE !iqibz
   qtmp(:)=Ep%qcalc(:,1)-Qmesh%ibz(:,iqibz)
   is_first_qcalc=(normv(qtmp,gmet,'G')<GW_TOLQ)
  end if

  bar=REPEAT('-',80)
  write(msg,'(4a,1x,a,i2,a,f9.6,2(",",f9.6),3a)')ch10,ch10,bar,ch10,&
&  ' q-point number ',label,'        q = (',(Qmesh%ibz(ii,iqibz),ii=1,3),') [r.l.u.]',ch10,bar
  call wrtout(std_out,msg,'COLL')
  call wrtout(ab_out,msg,'COLL')
  is_qeq0=0
  if (normv(Qmesh%ibz(:,iqibz),gmet,'G')<GW_TOLQ0) is_qeq0=1

#if 0
! === Define the distribution of the tasks inside cchi0|cchi0q0 according to gwpara and symchi ===
! * For the moment spaceComm_q==MPI_COMM_WORLD but, it would be possible to create pools of processors for each q.
! The only thing that has to be solved is the output of the SUSC and SCR file.
  allocate(gw_distrb(Kmesh%nbz,Ep%nbnds,Ep%nsppol))
  use_symmetries=(Ep%symchi/=0) ; spaceComm_q=MPI_enreg%world_comm

  call screening_gwdistrb(MPI_enreg%gwpara,Dtset%gwcomp,spaceComm_q,Kmesh%nbz,Ep%nbnds,nbvw,nbcw,Ep%nsppol,&
&  use_symmetries,Kmesh,Qmesh,Ltg_q(iqibz),MPI_enreg,my_minb,my_maxb,gw_distrb)

  deallocate(gw_distrb)
#endif

#if 0
! if (Dtset%prtvol==10) then
! === Find the independent set of G-Gp pairs for this q-point. ===
! Useful to write the independent matrix elements of epsilon^-1 or to speed-up the KK transform
! In the long wavelength limit we set q==0, because we still can use symmetryes for the Body.
  qtmp(:)=Qmesh%ibz(:,iqibz) ; if (normv(qtmp,gmet,'G')<GW_TOLQ0) qtmp(:)=zero
  call init_Gpairs_type(Gpairs_q,qtmp,Gsph_epsG0,Cryst)
! end if
#endif 

  if (MPI_enreg%gwpara==1) then
!  === Parallelization over k-points, redefine the distribution of k-points ===
!  IMPORTANT: Note that Kmesh%nbz is used in the first dimension of proc_distrb. This implies that
!  proc_distrb *MUST* be redefined if it is passed to routines employing the IBZ indexing.
   deallocate(MPI_enreg%proc_distrb) ; allocate(MPI_enreg%proc_distrb(Kmesh%nbz,Ep%nbnds,Ep%nsppol))
!  If nprocs>ntasks, proc_distrb==-999 for rank>ntasks-1 and no harm should be done
   MPI_enreg%proc_distrb(:,:,:)=-999
   allocate(istart(nprocs),istop(nprocs))

   if (Ep%symchi==0) then ! No symmetries, divide the full BZ among procs.
    ntasks=Kmesh%nbz
    call split_work2(ntasks,nprocs,istart,istop)
    do irank=0,nprocs-1
     if (istart(irank+1)==ntasks+1) CYCLE
     ii=istart(irank+1) ; jj=istop(irank+1)
     MPI_enreg%proc_distrb(ii:jj,:,:)=irank
    end do
   else if (Ep%symchi/=0) then ! Divide IBZ_q among procs, 
!   * Distribution might be unefficient for particular qs.
!   * Here proc_distrb is -999 for all the k-points not in the IBZ_q.
    ntasks=SUM(Ltg_q(iqibz)%ibzq(:)) 
    call split_work2(ntasks,nprocs,istart,istop)
    do irank=0,nprocs-1
     do ikbz=1,Kmesh%nbz
      do ikq=istart(irank+1),istop(irank+1)
       if (Ltg_q(iqibz)%bz2ibz(ikbz)==ikq) MPI_enreg%proc_distrb(ikbz,:,:)=irank
      end do
     end do
    end do
   end if
   deallocate(istart,istop)
!  * Announce the treatment of k-points by each proc.
   do ikbz=1,Kmesh%nbz 
    do isppol=1,Ep%nsppol
     if (MPI_enreg%proc_distrb(ikbz,Ep%nbnds,isppol)==rank) then
      write(msg,'(3(a,i4))')'P : treating k-point ',ikbz,' and spin ',isppol,' by node ',rank
      call wrtout(std_out,msg,'PERS')
     end if
    end do
   end do
  end if !gwpara==1
  
  if (is_qeq0==1) then  ! Special treatment of the long wavelenght limit.
   call timab(307,1,tsec)
   call status(iqibz,Dtfil%filstat,iexit,level,'call cchi0q0  ')

   allocate(lwing(Ep%npwe*Ep%nI,Ep%nomega,Ep%nqlwl))
   allocate(uwing(Ep%npwe*Ep%nJ,Ep%nomega,Ep%nqlwl))

   call cchi0q0(have_valence,Dtset,Cryst,Dtfil,Ep,Psps,Kmesh,QP_BSt,KS_BSt,&
&   Gsph_epsG0,Gsph_wfn,rhoxsp_method,dim1_rhox,dim2_rhox,gvec,Pawang,Pawrad,&
&   Pawtab,Paw_ij,ktabr,nbvw,ngfft_gw,igfft,nfftgw,chi0,lwing,uwing,&
&   MPI_enreg,Ltg_q(iqibz),pawrhox_spl,Cprj_ibz,Cprj_bz,vkb_dim,vkbsign,vkb,vkbd,chi0sumrule,Wf,Wf_val)

   deallocate(vkb,vkbd,vkbsign) ! Free the K-B form factors.
   call timab(307,2,tsec)

  else ! Calculate cchi0 for q/=0.
   call timab(308,1,tsec) 
   call status(iqibz,Dtfil%filstat,iexit,level,'call cchi0    ')

   call cchi0(have_valence,Dtset,Cryst,Dtfil,Qmesh%ibz(:,iqibz),Ep,Psps,Kmesh,&
&   QP_BSt,Gsph_epsG0,Gsph_wfn,rhoxsp_method,dim1_rhox,dim2_rhox,gvec,&
&   Pawang,Pawtab,nbvw,ngfft_gw,igfft,nfftgw,chi0,MPI_enreg,ktabr,Ltg_q(iqibz),&
&   pawrhox_spl,Cprj_bz,chi0sumrule,Wf,Wf_val)

   call timab(308,2,tsec)
  end if
! 
! === Print chi0(q,G,Gp,omega), then calculate epsilon and epsilon^-1 for this q ===
! * Only master works but this part could be parallelized over frequencies
  do iomega=1,MIN(Ep%nomega,NOMEGA_PRINTED)
   write(msg,'(1x,a,i4,a,2f9.4,a)')&
&   ' chi0(G,G'') at the ',iomega,' th omega',Ep%omega(iomega)*Ha_eV,' [eV]'
   call wrtout(std_out,msg,'COLL')
   call wrtout(ab_out,msg,'COLL')
   write(msg,'(1x,a,i3,a,i4,a)')' chi0(q =',iqibz, ', omega =',iomega,', G,G'')'
   if (Ep%nqcalc/=Ep%nqibz) write(msg,'(a,i3,a,i4,a)')'  chi0(q=',iqcalc,', omega=',iomega,', G,G'')'
   call wrtout(std_out,msg,'COLL')
   call print_arr(chi0(:,:,iomega))
   call print_arr(chi0(:,:,iomega),max_r=2,unit=ab_out)
  end do

  if (Ep%nomega>NOMEGA_PRINTED) then 
   write(msg,'(a,i3,a)')' No. of calculated frequencies > ',NOMEGA_PRINTED,', stop printing '
   call wrtout(std_out,msg,'COLL')
   call wrtout(ab_out,msg,'COLL')
  end if

! Divide by the volume 
! TODO this should be done in cchi0, but I have to update all the test, sigh!
  chi0(:,:,:)=chi0(:,:,:)/ucvol
! 
! === Write chi0 on _SUSC file ===
! * Master creates and write the header if this is the first q-point calculated.
  if (rank==master) then
   title(1)='CHI0 file: chi0'
   title(2)=' ' 
   if (is_qeq0==1) then
    string='0' ; if (Dtset%usepaw==0.and.Ep%inclvkb/=0) call int2char(Ep%inclvkb,string)
    title(1)=title(1)(1:21)//', calculated using inclvkb = '//string
   end if
   unt_susc=Dtfil%unchi0
   fname_susc=TRIM(Dtfil%filnam_ds(4))//'_SUS'

!  * Open file and write header for polarizability files.
   if (is_first_qcalc) then
    open(unit=unt_susc,file=fname_susc,status='unknown',form='unformatted',iostat=ios)
    if (ios/=0) then 
     write(msg,'(3a)')' Opening file ',TRIM(fname_susc),' as new-unformatted'
     MSG_ERROR(msg)
    end if  
    fileID=1 ; ikxc=0 ; test_type=0 ; tordering=1
    call init_ScrHdr(fileID,ikxc,test_type,tordering,title,Ep%npwe,gvec,Ep,Hdr_local,Hchi0)
    rdwr=2 ; localrdwf_=0 ; fform_chi0=1102 ! Use the new format
    call scr_hdr_io(fform_chi0,rdwr,unt_susc,spaceComm,master,Dtset%accesswff,localrdwf_,Hchi0)
    call free_scrhdr(Hchi0)
   end if
   call write_screening(unt_susc,Dtset%accesswff,Ep%npwe,Ep%nomega,Ep%omega,chi0)
  end if
! 
! Quick and dirty coding of the RPA functional, development stage!
! MG: here be careful since kxcg is not calculated!
! the computation of the kernel should be rewritten from scratch!
  dim_kxcg=0
  allocate(kxcg(Ep%npwe,Ep%npwe*dim_kxcg))
  if(.false.) call calc_rpa_functional(iqibz,Ep,Vcp,Qmesh,Dtfil,gmet,kxcg,MPI_enreg,chi0)
! 
! ==========================================================
! === Calculate RPA \tilde\epsilon^{-1} overwriting chi0 ===
! ==========================================================
  approx_type=0 !RPA
  option_test=0 !TESTPARTICLE
  dim_wing = 0 ; if (is_qeq0==1) dim_wing=Ep%nqlwl

  if (dim_wing==0) then
   if (.not.allocated(lwing)) allocate(lwing(Ep%npwe*Ep%nI,Ep%nomega,Ep%nqlwl*dim_wing))
   if (.not.allocated(uwing)) allocate(uwing(Ep%npwe*Ep%nJ,Ep%nomega,Ep%nqlwl*dim_wing))
  end if

  call make_epsm1_driver(iqibz,dim_wing,Ep%npwe,Ep%nI,Ep%nJ,Ep%nomega,Ep%omega,&
&  approx_type,option_test,Vcp,dim_kxcg,kxcg,MPI_enreg,lwing,uwing,chi0,Spectra)

  deallocate(lwing,uwing)

  if ( rank==master .and. is_qeq0==1 ) then 
   call repr_dielconst(Spectra,msg)
   call wrtout(std_out,msg,'COLL') 
   call wrtout(ab_out,msg,'COLL')

   if (Ep%nomegaer>2) then
    fname=TRIM(Dtfil%filnam_ds(4))//'_ELF' 
    call dump_spectra(Spectra,W_EELF,fname)

    fname=TRIM(Dtfil%filnam_ds(4))//'_EM1_LF' 
    call dump_spectra(Spectra,W_EM_LF,fname)

    fname=TRIM(Dtfil%filnam_ds(4))//'_EM1_NLF' 
    call dump_spectra(Spectra,W_EM_NLF,fname)
   end if
  end if ! master and is_qeq0==1

  call destroy_spectra(Spectra)
  deallocate(kxcg)

  epsm1   => chi0
  vc_sqrt => Vcp%vc_sqrt(:,iqibz)  ! Contains vc^{1/2}(q,G), complex-valued due to a possible cutoff
! 
! === Output the sum rule evaluation ===
  chi0sumrule(:)=chi0sumrule(:)/ucvol
  call output_chi0sumrule((is_qeq0==1),iqibz,Ep%npwe,omegaplasma,chi0sumrule,epsm1(:,:,1),vc_sqrt)

! === Write heads and wings on the main output ===
  if (is_qeq0==1) then
   write(msg,'(1x,2a)')' Heads and wings of the symmetrical epsilon^-1(G,G'') ',ch10
   call wrtout(ab_out,msg,'COLL')
   do iomega=1,Ep%nomega
    write(msg,'(2x,a,i4,a,2f9.4,a)')&
&    ' Upper and lower wings at the ',iomega,' th omega',Ep%omega(iomega)*Ha_eV,' [eV]'
    call wrtout(ab_out,msg,'COLL')
    call print_arr(epsm1(1,:,iomega),max_r=9,unit=ab_out)
    call print_arr(epsm1(:,1,iomega),max_r=9,unit=ab_out)
    write(msg,'(a)')ch10
    call wrtout(ab_out,msg,'COLL')
   end do
  end if

  if (rank==master) then
   call timab(310,1,tsec) ! wrscr

#if 0
!  if (Dtset%prtvol==10) then 
!  Write the independent matrix elements of \tilde epsilon^-1.
   write(msg,'(a,3(f10.6),a)')' Symmetrical epsilon^-1(G,G'') at q = ( ',Qmesh%ibz(:,iqibz),' ) [r.l.u.]'
   call outeps(Ep%npwe,Ep%nomega,Ep%omega,epsm1,Gsph_epsG0,Gpairs_q,msg,unem1ggp,Dtset%prtvol) 
!  end if 
#endif 

!  === Write the symmetrical epsilon^-1 on file ===
!  This might be parallelized but I have to use xsum_mpi in cchi0 and cchi0q0
   title(1)='SCR file: epsilon^-1'
   if (is_qeq0==1) then
    string='0' ; if (Dtset%usepaw==0.and.Ep%inclvkb/=0) call int2char(Ep%inclvkb,string)
    title(1)=title(1)(1:21)//', calculated using inclvkb = '//string
   end if
   title(2)='TESTPARTICLE'
   ctype='RPA'
   title(2)(14:17)=ctype !this has to be modified
   unt_em1=Dtfil%unscr ; fname_scr=TRIM(Dtfil%filnam_ds(4))//'_SCR'

   if (is_first_qcalc) then
!   === Open file and write the header for the SCR file ===
!   * Here we write the RPA approximation for \tilde\epsilon^{-1}
    open(unit=unt_em1,file=fname_scr,status='unknown',form='unformatted',iostat=ios)
    if (ios/=0) then 
     write(msg,'(3a)')' Opening file ',TRIM(fname_scr),' as new-unformatted '
     MSG_ERROR(msg)
    end if  
    fileID=4 ; ikxc=0 ; test_type=0 ; tordering=1
    call init_ScrHdr(fileID,ikxc,test_type,tordering,title,Ep%npwe,gvec,Ep,Hdr_local,Hem1)
!   here we still use the old fform
    rdwr=2 ; fform_em1=1002 ; localrdwf_=0
    call scr_hdr_io(fform_em1,rdwr,unt_em1,spaceComm,master,Dtset%accesswff,localrdwf_,Hem1)
    call free_scrhdr(Hem1)
   end if

   call write_screening(unt_em1,Dtset%accesswff,Ep%npwe,Ep%nomega,Ep%omega,epsm1)

  end if ! rank==master

  call timab(310,2,tsec)
  call timab(306,2,tsec)
 end do ! Loop over q-points
!
!----------------------------- END OF THE CALCULATION ------------------------
!
!=== Close Files ===
!if (Dtset%prtvol==10 .and. rank==0) close(unem1ggp)
 if (rank==master) then 
  close(unem1ggp)
  close(unt_em1)
  close(unt_susc)
 end if
!
!=== Free memory ===
 call status(0,Dtfil%filstat,iexit,level,'deallocate    ')

 deallocate(chi0sumrule)
 deallocate(chi0)

 deallocate(rhor,rhog,ks_vbik,qp_vbik,ktabr)
 deallocate(ks_vhartr,ks_vtrial,vpsp,ks_vxc)
 deallocate(gvec,igfft)
 deallocate(ph1d,ph1df)

 if (allocated(dimlmn)) deallocate(dimlmn )
 if (associated(MPI_enreg%proc_distrb)) deallocate(MPI_enreg%proc_distrb)
!
!=== Destroy the dinamic arrays in the local data structures ===
!
!* Optional deallocation for PAW.
 deallocate(Pawfgr%fintocoa,Pawfgr%coatofin)
 deallocate(nhatgr,     STAT=istat)
 deallocate(nhat,       STAT=istat)
 deallocate(pawrhox_spl,STAT=istat)

 if (Dtset%usepaw==1) then
  call cprj_free(Cprj_ibz)       ; deallocate(Cprj_ibz)
  call cprj_free(Cprj_bz )       ; deallocate(Cprj_bz )
  call rhoij_free(Pawrhoij)      ; deallocate(Pawrhoij)
  call pawfgrtab_free(Pawfgrtab) ; deallocate(Pawfgrtab)
  call destroy_paw_ij(Paw_ij)    ; deallocate(Paw_ij)
  call destroy_paw_an(Paw_an)    ; deallocate(Paw_an)
 end if

 call destroy_Wfs(Wf)
 if (have_valence) then 
  call destroy_Wfs(Wf_val) 
 end if
 do iqibz=1,Qmesh%nibz
  call destroy_little_group(Ltg_q(iqibz))
 end do
 deallocate(Ltg_q)
 call destroy_BZ_mesh_type(Kmesh)
 call destroy_BZ_mesh_type(Qmesh)
 call DestroyCrystal(Cryst)
 call destroy_Gvectors(Gsph_epsG0)
 call destroy_Gvectors(Gsph_wfn)
 call destroy_Gpairs_type(Gpairs_q)
 call destroy_Coulombian(Vcp)
 call destroy_Epsilonm1_parameters(Ep)
 call hdr_clean(Hdr_kss)
 call hdr_clean(Hdr_local)
 call bstruct_clean(KS_BSt)
 call bstruct_clean(QP_BSt)

 call timab(301,2,tsec)
 call status(0,Dtfil%filstat,iexit,level,'exit          ')

 DBG_EXIT("COLL")

end subroutine screening
!!***
