!{\src2tex{textfont=tt}}
!!****f* ABINIT/sigma
!! NAME
!! sigma
!!
!! FUNCTION
!! Calculate the matrix elements self-energy operator.
!!
!! COPYRIGHT
!! Copyright (C) 1999-2009 ABINIT group (GMR, VO, LR, RWG, MT, MG, RShaltaf)
!! This file is distributed under the terms of the
!! GNU General Public License, see ~abinit/COPYING
!! or http://www.gnu.org/copyleft/gpl.txt .
!! For the initials of contributors, see ~abinit/doc/developers/contributors.txt .
!!
!! INPUTS
!! acell(3)=length scales of primitive translations (bohr)
!! codvsn=code version
!! Dtfil<type(datafiles_type)>=variables related to files
!! Dtset<type(dataset_type)>=all input variables for this dataset
!! iexit=exit flag
!! MPI_enreg<MPI_type>=information about MPI parallelization
!! Pawang<type(pawang_type)>=paw angular mesh and related data
!! Pawrad(ntypat*usepaw)<type(pawrad_type)>=paw radial mesh and related data
!! Pawtab(ntypat*usepaw)<type(pawtab_type)>=paw tabulated starting data
!! Psps<type(pseudopotential_type)>=variables related to pseudopotentials
!!   Before entering the first time in sigma, a significant part of Psps has been initialized :
!!   the integers dimekb,lmnmax,lnmax,mpssang,mpssoang,mpsso,mgrid,ntypat,n1xccc,usepaw,useylm,
!!   and the arrays dimensioned to npsp. All the remaining components of Psps are to be initialized in
!!   the call to pspini. The next time the code enters screening, Psps might be identical to the
!!   one of the previous Dtset, in which case, no reinitialisation is scheduled in pspini.F90.
!! rprim(3,3)=dimensionless real space primitive translations
!! xred(3,natom) = reduced atomic coordinates
!!
!! OUTPUT
!!  Output is written on the main abinit output file. Some results are stored in external files
!!
!! PARENTS
!!      driver
!!
!! NOTES
!!
!! ON THE USE OF FFT GRIDS:
!! =================
!! In case of PAW:
!! ---------------
!!    Two FFT grids are used:
!!    - A "coarse" FFT grid (defined by ecut) for the application of the Hamiltonian on the plane waves basis.
!!      It is defined by nfft, ngfft, mgfft, ...
!!      Hamiltonian, wave-functions, density related to WFs (rhor here), ... are expressed on this grid.
!!    - A "fine" FFT grid (defined) by ecutdg) for the computation of the density inside PAW spheres.
!!      It is defined by nfftf, ngfftf, mgfftf, ... Total density, potentials, ... are expressed on this grid.
!! In case of norm-conserving:
!! ---------------------------
!!    - Only the usual FFT grid (defined by ecut) is used. It is defined by nfft, ngfft, mgfft, ...
!!      For compatibility reasons, (nfftf,ngfftf,mgfftf) are set equal to (nfft,ngfft,mgfft) in that case.
!!
!! CHILDREN
!!      abi_etsf_electrons_put,abi_etsf_geo_put,abi_etsf_init,bstruct_clean
!!      calc_density,calc_vhxc_braket,calc_wf_qp,chkpawovlp,cigfft
!!      copy_bandstructure,cprj_alloc,cprj_free,csigme,cutoff_density
!!      destroy_bands_symmetries,destroy_bz_mesh_type,destroy_coulombian
!!      destroy_epsilonm1_results,destroy_gvectors,destroy_little_group
!!      destroy_paw_an,destroy_paw_ij,destroy_ppmodel,destroy_sigma_parameters
!!      destroy_sigma_results,destroy_wfs,destroycrystal,duplicate_wfs
!!      energies_init,eps1_tc,etsf_dump_qp,fappnd,findqg0,fourdp
!!      get_bands_sym_gw,get_eneocc_vect,getph,hdr_clean,init_paw_an
!!      init_paw_ij,init_pawfgr,init_ppmodel,init_sigma_results,init_wfs
!!      initmpi_seq,int2char4,ioarr,kgindex,metric,mkdump_er,mkrdim,nhatgrid
!!      nullify_little_group,nullify_paw_an,nullify_paw_ij,paw_mkrhox_spl
!!      paw_symcprj,pawdenpot,pawdij,pawfgrtab_free,pawfgrtab_init,pawinit
!!      pawmknhat,pawmkrhoij,pawprt,pawpuxinit,print_ngfft,print_pawtab
!!      print_psps,print_wfs,prtrhomxmn,pspini,rdgw,rdkss,rdqps,reinit_wfs
!!      reportgap,rhoij_alloc,rhoij_copy,rhoij_free,rotate_fft_mesh,setsymrhoij
!!      setup_little_group,setup_ppmodel,setup_sigma,setvtr,show_qp
!!      sigma_gwdistrb,split_work2,status,symdij,symrhoij,test_charge,timab
!!      update_cprj,update_occ,write_sigma_results,write_sigma_results_header
!!      wrqps,wrtout,xcomm_init,xmaster_init,xme_init,xmpi_nproc
!!
!! SOURCE
#if defined HAVE_CONFIG_H
#include "config.h"
#endif

#include "abi_common.h"

subroutine sigma(acell,codvsn,Dtfil,Dtset,iexit,MPI_enreg,Pawang,Pawrad,Pawtab,Psps,rprim,xred)

 use defs_basis
 use defs_datatypes
 use defs_abitypes
 use defs_wvltypes
 use m_errors
 use m_paw_toolbox
 use m_sigma_results
 use m_electrons
 use m_ppmodel
 use m_gwdefs,        only : GW_TOLQ0, unt_gw, unt_sig, unt_sgr, unt_sgm
 use m_numeric_tools, only : imax_loc
 use m_geometry,      only : normv
 use m_crystal,       only : DestroyCrystal
 use m_bz_mesh,       only : findqg0, destroy_bz_mesh_type
 use m_little_group,  only : setup_little_group, nullify_little_group, destroy_little_group
 use m_gsphere,       only : destroy_Gvectors
 use m_coulombian,    only : destroy_Coulombian, cutoff_density
 use m_qparticles,    only : wrqps, rdqps, rdgw, show_QP
 use m_fft_mesh,      only : rotate_FFT_mesh, cigfft
 use m_io_kss,        only : rdkss
 use m_screening,     only : mkdump_er, destroy_epsilonm1_results
 use m_wfs,           only : init_Wfs, destroy_Wfs, reinit_Wfs, print_Wfs, duplicate_Wfs, calc_wf_qp

!This section has been created automatically by the script Abilint (TD).
!Do not modify the following lines by hand.
 use interfaces_12_hide_mpi
 use interfaces_14_hidewrite
 use interfaces_18_timing
 use interfaces_27_toolbox_oop
 use interfaces_32_util
 use interfaces_42_geometry
 use interfaces_51_manage_mpi
 use interfaces_53_ffts
 use interfaces_56_recipspace
 use interfaces_61_ionetcdf
 use interfaces_62_iowfdenpot
 use interfaces_65_nonlocal
 use interfaces_65_psp
 use interfaces_66_paw
 use interfaces_67_common
 use interfaces_68_gw
!End of the abilint section

 implicit none

!Arguments ------------------------------------
!scalars
 integer,intent(inout) :: iexit
 character(len=6),intent(in) :: codvsn
 type(Datafiles_type),intent(in) :: Dtfil
 type(Dataset_type),intent(inout) :: Dtset
 type(MPI_type),intent(inout) :: MPI_enreg
 type(Pawang_type),intent(inout) :: Pawang
 type(Pseudopotential_type),intent(inout) :: Psps
!arrays
 real(dp),intent(in) :: acell(3),rprim(3,3),xred(3,Dtset%natom)
 type(Pawrad_type),intent(inout) :: Pawrad(Psps%ntypat*Psps%usepaw)
 type(Pawtab_type),intent(inout) :: Pawtab(Psps%ntypat*Psps%usepaw)

!Local variables-------------------------------
!scalars
 integer,parameter :: level=25,tim_fourdp=5
 integer,save :: nsym_old=-1
 integer :: accessfil,approx_type,b1gw,b2gw,bantot,choice,cplex,cplex_dij
 integer :: dim1_rhox=0,dim2_rhox=0,dim_kxcg,fformr,has_dijU,has_dijso,iab,iapp
 integer :: iat,ib,ib1,ib2,id_required,ider,idir,idx_dwn,idx_up,ierr,ifft,ii,ik
 integer :: ik_bz,ikbz,ikcalc,ikibz,ikq,ikxc,ilmn,initialized,iorder,ipert,iq
 integer :: iqm,irank,is,is_idx,ispden,isppol,istat,istep,isym,itypat,izero,jj
 integer :: jlmn,k0lmn,klmn,ks_iv,lm_size,lmn2_size,lpawumax,make_newh0,master
 integer :: mband_,method,mgfft,mgfftf,mgfftgw,mkmem_,mod10,moved_atm_inside
 integer :: moved_rhor,my_maxb,my_minb,my_nbnds,n3xccc,nG01d,nG02d,nG03d
 integer :: nband_k,nbcw,nbsc,nbvw,nbw,ndij,nfftf,nfftf_tot,nfftgw,nfftgw_tot
 integer :: nhatgrdim,nkxc,nkxc1,nprocs,nscf,nspden_rhoij,ntasks,nzlmopt,optene
 integer :: optgr0,optgr1,optgr2,option,option_test,optrad,optrhoij,psp_gencond
 integer :: qp_iv,rank,rdwr,rdwrpaw,rhoxsp_method,shift,spaceComm,spaceComm_kgw
 integer :: use_umklp,usexcnhat,vkb_dim
 real(dp) :: band_ene,boxcut,boxcutc,compch_fft,compch_sph,diecut_eff_dum
 real(dp) :: drude_plsmf,dummy,ecore,ecut_eff,ecutdg_eff,ehartree
 real(dp) :: exchange_energy,gsqcutc_eff,gsqcutf_eff,nelect,norm,oldefermi
 real(dp) :: ucvol,vxcavg,vxcavg_qp
 complex(dpc) :: max_degw
 logical :: iscompatibleFFT,ltest,only_trace,use_symmetries
 character(len=4) :: tag
 character(len=500) :: header,msg
 character(len=fnlen) :: filapp,fildos,fname,fname_dump
 type(BZ_mesh_type) :: Kmesh,Qmesh
 type(Bandstructure_type) :: KS_BSt,QP_BSt
 type(Coulombian_type) :: Vcp
 type(Crystal_structure) :: Cryst
 type(Energies_type) :: KS_energies,QP_energies
 type(Epsilonm1_results) :: Er
 type(Gvectors_type) :: Gsph_Max
 type(Hdr_type) :: Hdr_kss,Hdr_sigma
 type(MPI_type) :: MPI_enreg_seq
 type(PPmodel_type) :: PPm
 type(Pawfgr_type) :: Pawfgr
 type(Sigma_parameters) :: Sp
 type(Sigma_results) :: Sr
 type(Wavefunctions_information) :: Wf_info,Wf_info_braket
 type(Wvl_wf_type) :: Dummy_wvl
!arrays
 integer,save :: paw_gencond(6)=(/-1,-1,-1,-1,-1,-1/)
 integer :: g0(3),ibocc(Dtset%nsppol),ngfft_gw(18),ngfftc(18),ngfftf(18)
 integer,allocatable :: dimlmn(:),gvec(:,:),gw_distrb(:,:,:),igfft(:,:,:,:)
 integer,allocatable :: igfftf(:),irottb(:,:),istart(:),istop(:),ks_vbik(:,:)
 integer,allocatable :: ktabr(:,:),l_size_atm(:),nlmn(:),qp_vbik(:,:)
 real(dp) :: gmet(3,3),gprimd(3,3),k0(3),kgwmk(3),rmet(3,3),rprimd(3,3)
 real(dp) :: strsxc(6),tsec(2)
 real(dp),allocatable :: dummy_ene(:,:,:),dummy_occ(:,:,:),grewtn(:,:)
 real(dp),allocatable :: ks_nhat(:,:),ks_nhatgr(:,:,:),ks_rhog(:,:)
 real(dp),allocatable :: ks_rhor(:,:),ks_vhartr(:),ks_vtrial(:,:),ks_vxc(:,:)
 real(dp),allocatable :: kxc(:,:),kxc_qp(:,:),pawrhox_spl(:,:,:,:,:),ph1d(:,:)
 real(dp),allocatable :: ph1df(:,:),prev_rhor(:,:),qp_nhat(:,:)
 real(dp),allocatable :: qp_nhatgr(:,:,:),qp_occpack(:),qp_rhog(:,:)
 real(dp),allocatable :: qp_rhor(:,:),qp_vhartr(:),qp_vtrial(:,:),qp_vxc(:,:)
 real(dp),allocatable :: sr_gwenergy(:,:,:),vkb(:,:,:,:),vkbd(:,:,:,:)
 real(dp),allocatable :: vkbsign(:,:),vpsp(:),xccc3d(:),xred_dummy(:,:)
 complex(dpc) :: ovlp(2)
 complex(dpc),allocatable :: ctmp(:,:),h0_me(:,:,:,:),hbare(:,:,:,:)
 complex(dpc),allocatable :: hlda(:,:,:,:),htmp(:,:,:,:),ks_vUme(:,:,:,:)
 complex(dpc),allocatable :: ks_vhme(:,:,:,:),ks_vxcme(:,:,:,:)
 complex(dpc),allocatable :: ks_vxcvalme(:,:,:,:),m_lda_to_qp(:,:,:,:)
 complex(dpc),allocatable :: qp_vUme(:,:,:,:),qp_vhme(:,:,:,:)
 complex(dpc),allocatable :: qp_vxcme(:,:,:,:),qp_vxcvalme(:,:,:,:),uks2qp(:,:)
 complex(gwpc),allocatable :: kxcg(:,:)
 complex(gwpc),pointer :: cgdwn(:),cgup(:)
 logical,allocatable :: kmask(:),mask(:)
 character(len=fnlen) :: tmpfil(7)
 type(Bands_Symmetries),allocatable :: BSym(:)
 type(Cprj_type),allocatable :: Cprj_bz(:,:),Cprj_ibz(:,:)
 type(Little_group),allocatable :: Ltg_k(:)
 type(Paw_an_type),allocatable :: KS_paw_an(:),QP_paw_an(:)
 type(Paw_ij_type),allocatable :: KS_paw_ij(:),QP_paw_ij(:)
 type(Pawfgrtab_type),allocatable :: Pawfgrtab(:)
 type(Pawrhoij_type),allocatable :: KS_Pawrhoij(:),Pawrhoij_dum(:)
 type(Pawrhoij_type),allocatable :: QP_pawrhoij(:)

!************************************************************************

 DBG_ENTER('COLL')

#if defined FC_PGI6
 write(msg,'(3a)')&
& ' Due to a bug in PGI v6, the compilation of sigma.F90 has been skipped ',ch10,&
& ' To perform GW calculations use a more recent version of PGI, alternatively you might try a different compiler '
 MSG_ERROR(msg)
#else

 call timab(401,1,tsec) ! overall time
 call timab(402,1,tsec) ! sigma(1)

 write(msg,'(7a)')&
& ' SIGMA: Calculation of the GW corrections ',ch10,ch10,&
& ' Based on a program developped by R.W. Godby, V. Olevano, G. Onida, and L. Reining.',ch10,&
& ' Incorporated in ABINIT by V. Olevano, G.-M. Rignanese, and M. Torrent.'
 call wrtout(std_out,msg,'COLL')
 call wrtout(ab_out,msg,'COLL')
#if defined HAVE_GW_DPC
 if (gwpc/=8) then
  write(msg,'(6a)')ch10,&
&  ' Number of bytes for double precision complex /=8 ',ch10,&
&  ' Cannot continue due to kind mismatch in BLAS library ',ch10,&
&  ' Some BLAS interfaces are not generated by abilint '
  MSG_ERROR(msg)
 end if
 write(msg,'(a,i2,a)')'.Using double precision arithmetic ; gwpc = ',gwpc,ch10
#else
 write(msg,'(a,i2,a)')'.Using single precision arithmetic ; gwpc = ',gwpc,ch10
#endif
 call wrtout(std_out,msg,'COLL')
 call wrtout(ab_out,msg,'COLL')
!
!=== Initialize MPI variables, and parallelization level ===
!* gwpara 0--> sequential run, 1--> parallelism over k-points, 2--> parallelism over bands.
!* In case of gwpara==1 memory is not parallelized.
!* If gwpara==2, bands are divided among processors but each proc has all the states where GW corrections are required.
 call xmpi_nproc(nprocs,ierr)
 if (nprocs==1) Dtset%gwpara=0
 MPI_enreg%gwpara      = Dtset%gwpara
 MPI_enreg%parareel    = 0
 MPI_enreg%paralbd     = 0
 MPI_enreg%paral_level = 2   ! This means k-points but it is not used
 MPI_enreg%me_fft      = 0
 MPI_enreg%nproc_fft   = 1

 call xcomm_init  (MPI_enreg,spaceComm)
 call xme_init    (MPI_enreg,rank     )
 call xmaster_init(MPI_enreg,master   )

!* Fake MPI_type for the sequential part.
 call initmpi_seq(MPI_enreg_seq) 
!
!=== Create names for the temporary files based on Dtfil%filnam_ds(5) ===
!* localrdwf defines the IO locality
!1--> each node has access to files (default).
!0--> only master has access.
!
!* accesswff defines the format of the output.
!1--> Plain Fortran file
!2--> Set all outputs to netcdf format (not implemented)
!3--> Set all outputs to ETSF format

 accessfil=0
 if (Dtset%accesswff==2) accessfil=1
 if (Dtset%accesswff==3) accessfil=3
 if (Dtset%accesswff==1) accessfil=4

!Prepare the name of the auxiliary files DEN, DOS, EIG...
 iapp=0
 call fappnd(filapp,dtfil%filnam_ds(4),iapp)

 tmpfil(1)=TRIM(Dtfil%filnam_ds(5))//'_WF1'
 tmpfil(2)=TRIM(Dtfil%filnam_ds(5))//'_WF2'
 tmpfil(3)=TRIM(Dtfil%filnam_ds(5))//'_KG'
 tmpfil(4)=TRIM(Dtfil%filnam_ds(5))//'_DUM'
 tmpfil(6)=TRIM(Dtfil%filnam_ds(5))//'_YLM'
 tmpfil(7)=TRIM(Dtfil%filnam_ds(5))//'_PAW'
!
!* Parallel case: the index of the processor must be appended
 if (MPI_enreg%paral_compil_kpt==1) then
  call int2char4(MPI_enreg%me,tag)
  jj=1 ; if (MPI_enreg%paral_compil_mpio==1.and.Dtset%accesswff==1) jj=3
  do ii=jj,7
   tmpfil(ii)=TRIM(tmpfil(ii))//'_P-'//tag
  end do
 end if
!
!=== Some variables need to be initialized/nullify at start ===
 call energies_init(KS_energies)
 usexcnhat=0
 call mkrdim(acell,rprim,rprimd)
 call metric(gmet,gprimd,ab_out,rmet,rprimd,ucvol)
!
!=== Define FFT grid(s) sizes ===
!* Be careful! This mesh is only used for densities, potentials and the matrix elements of v_Hxc. It is NOT the
!(usually coarser) GW FFT mesh employed for the oscillator matrix elements that is defined in setmesh.F90.
!See also NOTES in the comments at the beginning of this file.
!NOTE: This mesh is defined in invars2m using ecutwfn, in GW Dtset%ecut is forced to be equal to Dtset%ecutwfn.

 k0(:)=zero
 call init_pawfgr(Dtset,k0,gmet,Pawfgr,mgfftf,nfftf,ecut_eff,ecutdg_eff,gsqcutc_eff,gsqcutf_eff,ngfftc,ngfftf)

 call print_ngfft(ngfftf,header='FFT mesh for density and matrix elements of V_Hxc')
 nfftf_tot=PRODUCT(ngfftf(1:3))
!
!=== Open and read pseudopotential files ===
 call status(0,Dtfil%filstat,iexit,level,'call pspini   ')

 call pspini(Dtset,ecore,psp_gencond,gsqcutc_eff,gsqcutf_eff,level,Pawrad,Pawtab,Psps,rprimd)
 if (psp_gencond==1) call print_psps(Psps,std_out,0,'COLL')

!=== Initialize Sp, Er and basic objects ===
!* Sp is completetly initialized here.  
!* Er is only initialized with dimensions, (SCR|SUSC) file is read in mkdump_Er
 call setup_sigma(codvsn,acell,rprim,ngfftf,Dtset,Dtfil,Psps,Pawtab,MPI_enreg,&
& ngfft_gw,Hdr_kss,Hdr_sigma,Cryst,Kmesh,Qmesh,KS_BSt,Gsph_Max,Vcp,Er,Sp)

 call print_ngfft(ngfft_gw,header='FFT mesh for oscillator strengths')

 mod10=MOD(Sp%gwcalctyp,10)
 b1gw=Sp%minbdgw
 b2gw=Sp%maxbdgw

 nfftgw_tot=PRODUCT(ngfft_gw(1:3))
 nfftgw    =nfftgw_tot  !no FFT //
 mgfftgw   =MAXVAL(ngfft_gw(1:3))
!
!TRYING TO RECREATE AN "ABINIT ENVIRONMENT"
 KS_energies%e_corepsp=ecore/Cryst%ucvol

!=== Calculate KS occupation numbers and ks_vbk(nkibz,nsppol) ====
!* ks_vbk gives the (valence|last Fermi band) index for each k and spin.
!* fixmom is passed to fermi.F90 to fix the problem with newocc in case of magnetic metals
 allocate(ks_vbik(KS_BSt%nkpt,KS_BSt%nsppol))
 allocate(qp_vbik(KS_BSt%nkpt,KS_BSt%nsppol))

 call update_occ(KS_BSt,Dtset%fixmom,prtvol=0)
 ks_vbik(:,:) = get_valence_idx(KS_BSt)

!=== Initialize MPI_enreg%proc_distrb according to gwpara ===
!* If parallelism over k-points (gwpara==1) proc_distrb is redefined on-the-fly for each q-point.
 my_minb=1 
 my_maxb=Sp%nbnds ; my_nbnds=my_maxb-my_minb+1
 allocate(MPI_enreg%proc_distrb(Kmesh%nibz,Sp%nbnds,Sp%nsppol)) ; MPI_enreg%proc_distrb(:,:,:)=rank
 if (MPI_enreg%gwpara==2) then
! * Setup distrb in case of band parallelism.
  write(msg,'(2a)')ch10,' sigma : loop over bands done in parallel '
  call wrtout(std_out,msg,'PERS')
  allocate(istart(nprocs),istop(nprocs))
  MPI_enreg%proc_distrb(:,:,:)=-999
  call split_work2(Sp%nbnds,nprocs,istart,istop)
  my_minb=istart(rank+1) ; my_maxb=istop(rank+1)
  if (my_minb>my_maxb) then
   write(msg,'(3a,2(i6,a),a)')&
&   ' One or more processors has zero number of bands ',ch10,&
&   ' my_minb = ',my_minb,' my_maxb = ',my_maxb,ch10,&
&   ' This is a waste, decrease the number of processors '
   MSG_ERROR(msg)
  end if
  do irank=0,nprocs-1
   MPI_enreg%proc_distrb(:,istart(irank+1):istop(irank+1),:)=irank
  end do
  deallocate(istart,istop)
  my_nbnds=my_maxb-my_minb+1
! * Announce the treatment of bands by each proc.
  do irank=0,nprocs-1
   if (irank==rank) then
    write(msg,'(4(a,i4))')' treating ',my_nbnds,' bands from ',my_minb,' up to ',my_maxb,' by node ',irank
    call wrtout(std_out,msg,'PERS')
   end if
  end do
 end if
!
!============================
!==== PAW initialization ====
!============================
 allocate(Cprj_ibz(Cryst%natom,Dtset%nspinor*Kmesh%nibz*Sp%nbnds*Sp%nsppol*Dtset%usepaw))
 if (Dtset%usepaw==1) then
  call chkpawovlp(Cryst%natom,Cryst%ntypat,Dtset%pawovlp,Pawtab,Cryst%rmet,Cryst%typat,Cryst%xred)
  allocate(dimlmn(Cryst%natom),nlmn(Cryst%ntypat))
  do iat=1,Cryst%natom
   dimlmn(iat)=Pawtab(Cryst%typat(iat))%lmn_size
  end do
  do itypat=1,Cryst%ntypat
   nlmn(itypat)=Pawtab(itypat)%lmn_size
  end do

  call cprj_alloc(Cprj_ibz,0,dimlmn)
  allocate(KS_Pawrhoij(Cryst%natom))
  nspden_rhoij=Dtset%nspden ; if (Dtset%pawspnorb>0.and.Dtset%nspinor==2) nspden_rhoij=4
  call rhoij_alloc(Dtset%pawcpxocc,nlmn,nspden_rhoij,Dtset%nsppol,KS_Pawrhoij,Cryst%typat)

! === Initialize values for several basic arrays ===
! TODO Check pawxcdev>2 since gaunt coefficients are allocated with different size
  if (psp_gencond==1.or.&
&  paw_gencond(1)/=Dtset%pawlcutd .or.paw_gencond(2)/=Dtset%pawlmix  .or.&
&  paw_gencond(3)/=Dtset%pawnphi  .or.paw_gencond(4)/=Dtset%pawntheta.or.&
&  paw_gencond(5)/=Dtset%pawspnorb.or.paw_gencond(6)/=Dtset%pawxcdev) then

   call timab(553,1,tsec)
   diecut_eff_dum=ABS(Dtset%diecut)*Dtset%dilatmx**2

   call pawinit(diecut_eff_dum,Psps%indlmn,Dtset%pawlcutd,Dtset%pawlmix,Psps%lmnmax,Psps%mpsang,&
&   Dtset%pawnphi,Cryst%nsym,Dtset%pawntheta,Cryst%ntypat,Pawang,Pawrad,Dtset%pawspnorb,Pawtab,Dtset%pawxcdev)

   paw_gencond(1)=Dtset%pawlcutd  ; paw_gencond(2)=Dtset%pawlmix
   paw_gencond(3)=Dtset%pawnphi   ; paw_gencond(4)=Dtset%pawntheta
   paw_gencond(5)=Dtset%pawspnorb ; paw_gencond(6)=Dtset%pawxcdev
   call timab(553,2,tsec)
  end if
  Psps%n1xccc=MAXVAL(Pawtab(1:Cryst%ntypat)%usetcore)

! Initialize optional flags in Pawtab to zero 
! (Cannot be done in Pawinit since the routine is called only if some pars. are changed)
  Pawtab(:)%has_nabla = 0 
  Pawtab(:)%usepawu   = 0
  Pawtab(:)%useexexch = 0
  Pawtab(:)%exchmix   =zero

! if (psp_gencond==1) then !.or. nsym_old/=Cryst%nsym) then
  call setsymrhoij(gprimd,Pawang%l_max-1,Cryst%nsym,Dtset%pawprtvol,Cryst%rprimd,Cryst%symafm,Cryst%symrec,Pawang%zarot)
  nsym_old=Cryst%nsym
! end if

! === Initialize and compute data for LDA+U ===
  if (Dtset%usepawu>0.or.Dtset%useexexch>0) then
   call pawpuxinit(Dtset%dmatpuopt,Dtset%exchmix,Dtset%jpawu,Dtset%lexexch,Dtset%lpawu,&
&   Psps%indlmn,Psps%lmnmax,Cryst%ntypat,Pawang,Dtset%pawprtvol,Pawrad,Pawtab,Dtset%upawu,&
&   Dtset%useexexch,Dtset%usepawu)
  end if

  if (rank==master) call print_pawtab(Pawtab)
! 3-
! if (mkmem_==0) then
! open(Dtfil%unpaw,file=tmpfil(7),form='unformatted',status='unknown')
! rewind(unit=Dtfil%unpaw)
! end if

! === Get Pawrhoij from the header of the KSS file ===
  call rhoij_copy(Hdr_kss%pawrhoij,KS_Pawrhoij)

! === Re-symmetrize symrhoij ===
! this call leads to a SIGFAULT, likely some pointer is not initialized correctly
  choice=1 ; optrhoij=1 ; ipert=0 ; idir=0
! call symrhoij(choice,Cryst%gprimd,Psps%indlmn,Cryst%indsym,ipert,Psps%lmnmax,Cryst%natom,Cryst%natom,Cryst%nsym,&
! &  Cryst%ntypat,optrhoij,Pawang,Dtset%pawprtvol,KS_Pawrhoij,Cryst%rprimd,Cryst%symafm,Cryst%symrec,Cryst%typat)

! === Evaluate form factor of radial part of phi.phj-tphi.tphj ===
  rhoxsp_method=1
! rhoxsp_method=2 
  if (Dtset%userie==1) rhoxsp_method=1
  if (Dtset%userie==2) rhoxsp_method=2
  if (rhoxsp_method==1) then ! Arnaud-Alouani
   dim1_rhox=2*(Psps%mpsang-1)
   dim2_rhox=Psps%lnmax*(Psps%lnmax+1)/2
  else if (rhoxsp_method==2) then ! Shiskin-Kresse
   dim1_rhox=MAXVAL(Pawtab(:)%l_size)**2
   dim2_rhox=MAXVAL(Pawtab(:)%lmn2_size)
  end if
  allocate(pawrhox_spl(Psps%mqgrid_ff,2,0:dim1_rhox,dim2_rhox,Cryst%ntypat))

  call paw_mkrhox_spl(Cryst%ntypat,Psps,Pawrad,Pawtab,pawrhox_spl,rhoxsp_method,dim1_rhox,dim2_rhox)
! 
! === Variables/arrays related to the fine FFT grid ===
  allocate(ks_nhat(nfftf,Dtset%nspden)) ; ks_nhat(:,:)=zero
  allocate(Pawfgrtab(Cryst%natom),l_size_atm(Cryst%natom))
  do iat=1,Cryst%natom
   l_size_atm(iat)=Pawtab(Cryst%typat(iat))%l_size
  end do
  cplex=1
  call pawfgrtab_init(Pawfgrtab,cplex,l_size_atm,Dtset%nspden)
  deallocate(l_size_atm)
  compch_fft=greatest_real
  usexcnhat=MAXVAL(Pawtab(:)%vlocopt)
! * 0 if Vloc in atomic data is Vbare    (Blochl s formulation)
! * 1 if Vloc in atomic data is VH(tnzc) (Kresse s formulation)
  write(msg,'(a,i2)')' sigma : using usexcnhat = ',usexcnhat
  call wrtout(std_out,msg,'COLL')
! 
! === Identify parts of the rectangular grid where the density has to be calculated ===
  call status(0,Dtfil%filstat,iexit,level,'call nhatgrid ')

  optgr0=Dtset%pawstgylm ; optgr1=0 ; optgr2=0 ; optrad=1-Dtset%pawstgylm
  if (Dtset%xclevel==2.and.usexcnhat>0) optgr1=Dtset%pawstgylm

  call nhatgrid(Cryst%atindx1,gmet,MPI_enreg_seq,Cryst%natom,Cryst%nattyp,nfftf,ngfftf,Cryst%ntypat,&
  optgr0,optgr1,optgr2,optrad,Pawfgrtab,Pawtab,Cryst%rprimd,Cryst%typat,Cryst%ucvol,Cryst%xred)

 end if !End of PAW Initialization

!Allocate these arrays anyway, since they are passed to subroutines.
 if (.not.allocated(ks_nhat))     allocate(ks_nhat(nfftf,0))
 if (.not.allocated(pawrhox_spl)) allocate(pawrhox_spl(1,1,1,1,0))  

!==================================================
!==== Read KS band structure from the KSS file ====
!==================================================
 call timab(403,1,tsec) !rdkss

!* Initialize Wf_info, allocate wavefunctions.
 call init_Wfs(Wf_info,Dtset%gwmem,Dtset%paral_kgb,Sp%npwwfn,my_minb,my_maxb,&
& Kmesh%nibz,Sp%nsppol,Dtset%nspden,Dtset%nspinor,ngfft_gw,Gsph_Max%gvec,MPI_enreg)

 vkb_dim=0 ! No need of vkb.
 allocate(vkbsign(Psps%mpsang,Cryst%ntypat*vkb_dim), STAT=istat)
 allocate(vkb (Sp%npwwfn,Cryst%ntypat,Psps%mpsang,Kmesh%nibz*vkb_dim), STAT=istat)
 allocate(vkbd(Sp%npwwfn,Cryst%ntypat,Psps%mpsang,Kmesh%nibz*vkb_dim), STAT=istat)

!TODO all this stuff has been already read, it is a waste! Note the different ordering of indeces.
 allocate(gvec(3,Sp%npwvec))
 allocate(dummy_ene(Kmesh%nibz,Sp%nbnds,Sp%nsppol)) ; dummy_ene(:,:,:)=zero
 allocate(dummy_occ(Kmesh%nibz,Sp%nbnds,Sp%nsppol)) ; dummy_occ(:,:,:)=zero

 nbvw=0 ! Do not divide states into occupied and empty 
 call rdkss(Dtfil,Dtset%usepaw,Pawtab,Cryst%nsym,Sp%nbnds,nbvw,Kmesh%nibz,Sp%npwvec,Dtset%nspinor,Sp%nsppol,Sp%npwwfn,&
& gvec,dummy_ene,dummy_occ,Wf_info%wfg,Cprj_ibz,Cryst%ntypat,Cryst%natom,Psps%mpsang,vkb_dim,vkbsign,vkb,vkbd,&
& nelect,MPI_enreg,my_minb,my_maxb,Dtset%accesswff,Dtset%localrdwf,Dtset%prtvol)

 deallocate(gvec)
 deallocate(dummy_ene,dummy_occ)
 deallocate(vkbsign,vkb,vkbd)
 call timab(403,2,tsec) ! rdkss
!
!=== Find little group of the k-points for GW corrections ===
!* The little group is calculated only if sys_sigma.
!* If use_umklp==1 then also symmetries requiring an umklapp to preserve k_gw are included.
 allocate(Ltg_k(Sp%nkcalc))
 use_umklp=1
 do ikcalc=1,Sp%nkcalc
  call nullify_little_group(Ltg_k(ikcalc))
  if (Sp%symsigma/=0) then
   call setup_little_group(Sp%xkcalc(:,ikcalc),Qmesh,Cryst,Sp%npwvec,Gsph_Max%gvec,&
&   0,use_umklp,Dtset%prtvol,Ltg_k(ikcalc))
  end if
 end do

!=== For PAW: deal the with onsite representation of the wavefunctions ===
!* Symmetrize <Proj_i|Cnk> to have them in the Full Brillouin zone.
!* Evaluate $<phj|e^{-i(q+G)}|phi>-<tphj|e^{-i(q+G)}|tphi>$.
!TODO memory is not distributed.
!TODO add rotation in spinor space

 call timab(404,1,tsec) ! mkrhox
 allocate(Cprj_bz(Cryst%natom,Dtset%nspinor*Sp%nbnds*Kmesh%nbz*Sp%nsppol*Dtset%usepaw),STAT=istat)

 if (Dtset%usepaw==1) then
  call cprj_alloc(Cprj_bz,0,dimlmn)
  call paw_symcprj(Pawtab,Cryst,Dtset%nspinor,Sp%nbnds,KS_BSt%nband,&
&  Dtset%nsppol,Psps,Kmesh,Cprj_ibz,Pawang,dimlmn,Cprj_bz)
 end if

 call timab(404,2,tsec) ! mkrhox
 call timab(402,2,tsec) ! sigma(1)
!
!=== Define the G-G0 shifts for the FFT of the oscillators ===
!* Sp%mG0 gives the MAX G0 component to account for umklapp.
!* Note the size MAX(Sp%npwx,Sp%npwc).
!TODO Should take into account possible FFT //. 
 call timab(405,1,tsec) ! sigma(2)
 nG01d = 2*Sp%mG0(1)+1 
 nG02d = 2*Sp%mG0(2)+1 
 nG03d = 2*Sp%mG0(3)+1

!allocate(igfft(Sp%npwvec,nG01d,nG02d,nG03d))
!call cigfft(Sp%mG0,Sp%npwvec,ngfft_gw,Gsph_Max%gvec,igfft,ierr)

 allocate(igfft(MAX(Sp%npwx,Sp%npwc),nG01d,nG02d,nG03d))
 call cigfft(Sp%mG0,MAX(Sp%npwx,Sp%npwc),ngfft_gw,Gsph_Max%gvec,igfft,ierr)

 if (ierr/=0) then
  write(msg,'(a,i4,3a)')&
&  ' Found ',ierr,' G-G0 vectors falling outside the FFT box. ',ch10,&
&  ' This is not allowed. Check mG0 and the setup of the GW FFT mesh. '
  MSG_ERROR(msg)
 end if
!
!=== Get FFT index of $(R^{-1}(r-\tau))$ ===
!* S=\transpose R^{-1} and k_BZ = S k_IBZ
!* irottb is the FFT index of $R^{-1} (r-\tau)$ used to symmetrize u_Sk.
 allocate(irottb(nfftgw,Cryst%nsym))
 call rotate_FFT_mesh(Cryst,ngfft_gw,irottb,iscompatibleFFT)

 allocate(ktabr(nfftgw_tot,Kmesh%nbz))
 do ik_bz=1,Kmesh%nbz
  isym=Kmesh%tabo(ik_bz)
  do ifft=1,nfftgw_tot
   ktabr(ifft,ik_bz)=irottb(ifft,isym)
  end do
 end do
 deallocate(irottb)
!
!=== Compute structure factor phases and large sphere cut-off ===
!WARNING cannot use Dtset%mgfft, this has to be checked better
!mgfft=MAXVAL(ngfftc(:))
!allocate(ph1d(2,3*(2*mgfft+1)*Cryst%natom),ph1df(2,3*(2*mgfftf+1)*Cryst%natom))
 write(std_out,*)' CHECK ',Dtset%mgfftdg,mgfftf
 if (Dtset%mgfftdg/=mgfftf) then
  write(std_out,*)"WARNING Dtset%mgfftf /= mgfftf"
! write(std_out,*)'HACKING Dtset%mgfftf'
! Dtset%mgfftdg=mgfftf
 end if
 allocate(ph1d(2,3*(2*Dtset%mgfft+1)*Cryst%natom),ph1df(2,3*(2*mgfftf+1)*Cryst%natom))

 call status(0,Dtfil%filstat,iexit,level,'call getph    ')

 call getph(Cryst%atindx,Cryst%natom,ngfftc(1),ngfftc(2),ngfftc(3),ph1d,Cryst%xred)

 if (Psps%usepaw==1.and.Pawfgr%usefinegrid==1) then
  call getph(Cryst%atindx,Cryst%natom,ngfftf(1),ngfftf(2),ngfftf(3),ph1df,Cryst%xred)
 else
  ph1df(:,:)=ph1d(:,:)
 end if
!
!=== Initialize the object Wf_info_braket (wavefunctions for GW corrections) ===
!* Be careful here since duplicate_Wfs uses MPI_enreg%proc_distrb
 call init_Wfs(Wf_info_braket,Dtset%gwmem,Dtset%paral_kgb,Sp%npwwfn,b1gw,b2gw,Sp%nkcalc,&
& Sp%nsppol,Dtset%nspden,Dtset%nspinor,ngfft_gw,Gsph_Max%gvec,MPI_enreg)

 call duplicate_Wfs(MPI_enreg,Wf_info,Wf_info_braket,Sp%kcalc,Kmesh)

!=== Get irreducible representation for GW wavefunction ===
!* Warning still under development.
 if (Dtset%useria==1) then
! call check_zarot(Sp%npwvec,Cryst,Gsph_Max%gvec,Psps,Pawang,Gsph_Max%rottb,Gsph_Max%rottbm1)

  only_trace=.FALSE.
  allocate(BSym(Kmesh%nibz))
  call get_Bands_Sym_GW(Dtset%nspinor,Sp%nbnds,Kmesh%nibz,Dtset%nsppol,Dtset%usepaw,only_trace,&
&  Cryst,Kmesh,KS_BSt,Wf_info_braket%ngfft,Wf_info_braket,Pawtab,Pawang,Psps,Cprj_ibz,MPI_enreg,BSym)

  do ikibz=1,Kmesh%nibz
   call destroy_Bands_Symmetries(BSym(ikibz))
  end do
  deallocate(BSym)
 end if

 call timab(405,2,tsec) ! sigma(2)
!
!===========================
!=== COMPUTE THE DENSITY ===
!===========================
!* Evaluate Planewave part (complete charge in case of NC pseudos).
!* If gwpara=2 do the calculation in parallel inside calc_density.
 call timab(406,1,tsec) ! ks_rho

!=== Tables for the dense FFT mesh used for ks_rhor ===
 allocate(igfftf(Wf_info%npwwfn),mask(Wf_info%npwwfn))
 call kgindex(igfftf,Gsph_Max%gvec,mask,MPI_enreg_seq,ngfftf,Wf_info%npwwfn)
 if (.not.ALL(mask)) then 
  MSG_ERROR("FFT para not yet implemented")
 end if
 deallocate(mask)

 allocate(ks_rhor(nfftf,Dtset%nspden))
 call calc_density(Wf_info,ngfftf,nfftf,igfftf,Cryst,Kmesh,KS_BSt,MPI_enreg,.TRUE.,ks_rhor)

!TODO this has to be done in a better way, moreover wont work for PAW
 call cutoff_density(ngfftf,Dtset%nspden,Dtset%nsppol,Vcp,ks_rhor,MPI_enreg)
!
!=== Additional computation for PAW ===
 if (Dtset%usepaw==1) then

! * Calculate the compensation charge nhat.
  nhatgrdim=0 ; if (Dtset%xclevel==2) nhatgrdim=usexcnhat*Dtset%pawnhatxc
  cplex=1 ; ider=2*nhatgrdim ; izero=0
  if (nhatgrdim>0) allocate(ks_nhatgr(nfftf,Dtset%nspden,3))

  call pawmknhat(compch_fft,cplex,ider,idir,ipert,izero,Cryst%gprimd,MPI_enreg_seq,&
&  Cryst%natom,nfftf,ngfftf,nhatgrdim,Dtset%nspden,Cryst%ntypat,Dtset%paral_kgb,Pawang,&
&  Pawfgrtab,ks_nhatgr,ks_nhat,KS_Pawrhoij,KS_Pawrhoij,Pawtab,k0,Cryst%rprimd,Cryst%typat,Cryst%ucvol)

! === Evaluate onsite energies, potentials, densities ===
! * Initialize variables/arrays related to the PAW spheres.
! * Initialize also lmselect (index of non-zero LM-moments of densities).
! TODO call init_paw_ij in scfcv and respfn, fix small issues
  allocate(KS_paw_ij(Cryst%natom))
  call nullify_paw_ij(KS_paw_ij)
  cplex=1 ; cplex_dij=Dtset%nspinor ; has_dijso=Dtset%pawspnorb ; has_dijU=Dtset%usepawu

  call init_paw_ij(KS_paw_ij,cplex,cplex_dij,Dtset%nspinor,Dtset%nsppol,&
&  Dtset%nspden,Dtset%pawspnorb,Cryst%natom,Cryst%ntypat,Cryst%typat,Pawtab,&
&  has_dijhartree=1,has_dijhat=1,has_dijxc=1,has_dijxc_val=1,has_dijso=has_dijso,has_dijU=has_dijU)

  allocate(KS_paw_an(Cryst%natom))
  call nullify_paw_an(KS_paw_an)
  nkxc1=0
  call init_paw_an(Cryst%natom,Cryst%ntypat,nkxc1,Dtset%nspden,cplex,Dtset%pawxcdev,&
&  Cryst%typat,Pawang,Pawtab,KS_paw_an,has_vxc=1,has_vxcval=1)
! 
! === Calculate onsite vxc with and without core charge ===
  call status(0,Dtfil%filstat,iexit,level,'call pawdenpot')

  nzlmopt=-1 ; option=0 ; compch_sph=greatest_real
  call pawdenpot(compch_sph,KS_energies%e_paw,KS_energies%e_pawdc,idir,ipert,Dtset%ixc,Cryst%natom,Dtset%nspden,&
&  Cryst%ntypat,nzlmopt,option,KS_Paw_an,KS_Paw_an,KS_paw_ij,Pawang,Dtset%pawprtvol,Pawrad,KS_Pawrhoij,Dtset%pawspnorb,&
&  Pawtab,Dtset%pawxcdev,dtset%spnorbscl,Cryst%typat,Dtset%xclevel,Psps%znuclpsp)

 end if !PAW

 if (.not.allocated(ks_nhatgr)) allocate(ks_nhatgr(nfftf,Dtset%nspden,0))

 call test_charge(nfftf,KS_BSt%nelect,Dtset%nspden,ks_rhor,Cryst%ucvol,ks_nhat,Dtset%usepaw,&
& usexcnhat,Pawfgr%usefinegrid,compch_sph,compch_fft,drude_plsmf)
!
!=== For PAW, add the compensation charge on the FFT mesh, then get rho(G) ===
 if (Dtset%usepaw==1) ks_rhor(:,:)=ks_rhor(:,:)+ks_nhat(:,:)
 call prtrhomxmn(std_out,MPI_enreg,nfftf,ngfftf,Dtset%nspden,1,ks_rhor)

 allocate(ks_rhog(2,nfftf))
 call fourdp(1,ks_rhog,ks_rhor(:,1),-1,MPI_enreg,nfftf,ngfftf,Dtset%paral_kgb,tim_fourdp)
!
!The following steps have been gathered in the setvtr routine:
!- get Ewald energy and Ewald forces
!- compute local ionic pseudopotential vpsp
!- eventually compute 3D core electron density xccc3d
!- eventually compute vxc and vhartr
!- set up ks_vtrial
!
!*******************************************************************
!**** NOTE THAT HERE Vxc CONTAINS THE CORE-DENSITY CONTRIBUTION ****
!*******************************************************************
 call status(0,Dtfil%filstat,iexit,level,'call setvtr   ')

 allocate(grewtn(3,Cryst%natom),xred_dummy(3,Cryst%natom)) ; xred_dummy(:,:)=xred(:,:)
 nkxc=0
 if (Dtset%nspden==1) nkxc=2
 if (Dtset%nspden>=2) nkxc=3 ! check GGA and spinor which is quite messy !!!
 allocate(kxc(nfftf,nkxc))

 n3xccc=0 ; if (Psps%n1xccc/=0) n3xccc=nfftf
 allocate(xccc3d(n3xccc))
 allocate(ks_vhartr(nfftf),ks_vtrial(nfftf,Dtset%nspden),vpsp(nfftf),ks_vxc(nfftf,Dtset%nspden))

 optene=4 ; moved_atm_inside=0 ; moved_rhor=0 ; initialized=1 ; istep=1
 call setvtr(Cryst%atindx1,Dtset,KS_energies,gmet,gprimd,grewtn,gsqcutf_eff,initialized,istep,kxc,mgfftf,&
& moved_atm_inside,moved_rhor,MPI_enreg_seq,Cryst%nattyp,nfftf,ngfftf,ks_nhat,ks_nhatgr,nhatgrdim,nkxc,Cryst%ntypat,&
& Psps%n1xccc,n3xccc,optene,Pawtab,ph1df,Psps,ks_rhog,ks_rhor,Cryst%rmet,Cryst%rprimd,strsxc,Cryst%ucvol,usexcnhat,&
& ks_vhartr,vpsp,ks_vtrial,ks_vxc,vxcavg,xccc3d,xred_dummy,Cryst%xred)
!FIXME here xred is INOUT due to ionion_realSpace and xredcart!

!TODO this has to be checked
!call prtene(Dtset,KS_energies,std_out,Dtset%usepaw)

!=== For PAW, compute Dij (unsymmetrized quantities!) ===
 if (Dtset%usepaw==1) then
  call status(0,Dtfil%filstat,iexit,level,'call pawdij   ')

  cplex=1 ; ipert=0 ; idir=0
  call pawdij(cplex,Dtset,Dtset%enunit,idir,ipert,MPI_enreg_seq,Cryst%natom,nfftf,ngfftf,Dtset%nspden,Cryst%ntypat,&
&  KS_paw_an,KS_paw_ij,Pawang,Pawfgrtab,Dtset%pawprtvol,Pawrad,Dtset%pawspnorb,Pawtab,Dtset%pawxcdev,&
&  Cryst%typat,Cryst%ucvol,ks_vtrial,ks_vxc)

  call status(0,Dtfil%filstat,iexit,level,'call symdij   ')

  call symdij(Cryst%gprimd,Psps%indlmn,Cryst%indsym,ipert,Psps%lmnmax,Cryst%natom,Cryst%nsym,Cryst%ntypat,0,KS_paw_ij,Pawang,&
&  Dtset%pawprtvol,Cryst%rprimd,Cryst%symafm,Cryst%symrec,Cryst%typat)

! * Output the pseudopotential strengths Dij and the augmentation occupancies Rhoij.
  call pawprt(Dtset,Psps%indlmn,Psps%lmnmax,KS_paw_ij,KS_Pawrhoij,Pawtab)
 end if
 call timab(406,2,tsec) ! ks_rho
!
!=== Calculate Vxc(b1,b2,k,s)=<b1,k,s|v_{xc}|b2,k,s>  for all the states included in GW ===
!* ks_vxcvalme is calculated without NLCC, ks_vxcme contains NLCC (if any)
!* This part is parallelized within MPI_COMM_WORD since each node has all GW wavefunctions.
 call timab(407,1,tsec) ! vHxc_me
 make_newh0=0
 allocate(ks_vxcme   (b1gw:b2gw,b1gw:b2gw,Kmesh%nibz,Sp%nsppol*Sp%nsig_ab))
 allocate(ks_vxcvalme(b1gw:b2gw,b1gw:b2gw,Kmesh%nibz,Sp%nsppol*Sp%nsig_ab))
 allocate(ks_vhme    (b1gw:b2gw,b1gw:b2gw,Kmesh%nibz,Sp%nsppol*Sp%nsig_ab))
 allocate(ks_vUme    (b1gw:b2gw,b1gw:b2gw,Kmesh%nibz,Sp%nsppol*Sp%nsig_ab))
 allocate(h0_me      (b1gw:b2gw,b1gw:b2gw,Kmesh%nibz,Sp%nsppol*Sp%nsig_ab*make_newh0))

 call calc_vHxc_braket(Dtset,Sp,Kmesh,make_newh0,b1gw,b2gw,gsqcutf_eff,nfftf_tot,nfftf,ngfftf,igfftf,Gsph_Max%gvec,&
& Wf_info_braket,ks_vtrial,ks_vhartr,ks_vxc,Psps,Cprj_ibz,Pawtab,KS_paw_an,Pawang,Pawfgrtab,Pawrad,KS_paw_ij,&
& MPI_enreg,Cryst,ks_rhor,ks_rhog,usexcnhat,ks_nhat,ks_nhatgr,nhatgrdim,ks_vhme,ks_vxcvalme,ks_vxcme,ks_vUme,h0_me)

 deallocate(h0_me,STAT=istat)

 if (Dtset%usepaw==1.and..FALSE.) then


! this is to check the update of h0
  make_newh0=1
  allocate(h0_me(b1gw:b2gw,b1gw:b2gw,Kmesh%nibz,Sp%nsppol*Sp%nsig_ab*make_newh0))
  call calc_vHxc_braket(Dtset,Sp,Kmesh,make_newh0,b1gw,b2gw,gsqcutf_eff,nfftf_tot,nfftf,ngfftf,igfftf,Gsph_Max%gvec,&
&  Wf_info_braket,ks_vtrial,ks_vhartr,ks_vxc,Psps,Cprj_ibz,Pawtab,KS_paw_an,Pawang,Pawfgrtab,Pawrad,KS_paw_ij,&
&  MPI_enreg,Cryst,ks_rhor,ks_rhog,usexcnhat,ks_nhat,ks_nhatgr,nhatgrdim,ks_vhme,ks_vxcvalme,ks_vxcme,ks_vUme,h0_me)

! Check eigenvalues
  do ii=1,Sp%nkcalc
   ikibz=Kmesh%tab(Sp%kcalc(ii))
   do is=1,Sp%nsppol
    do ib=b1gw,b2gw
     write(99,*)ikibz,ib,is,REAL((h0_me(ib,ib,ikibz,is)-KS_BSt%eig(ib,ikibz,is)))*Ha_eV
    end do
   end do
  end do
  deallocate(h0_me)
 end if

 call timab(407,2,tsec) ! vHxc_me

 call timab(408,1,tsec) ! rdqps

!Do not break this coding! When gwcalctyp>10, the order of the bands can be interexchanged after
!the diagonalization. Therefore, we have to correctly assign the matrix elements to the corresponding 
!bands and we cannot skip the following even though it looks unuseful.
 if (Dtset%gwcalctyp>=10) then
  write(msg,'(2a)')ch10,' *************** KS Energies *******************'
  call wrtout(std_out,msg,'COLL')
 end if

!=== QP_BSt stores energies and occ. used for the calculation ===
!* Initialize QP_BSt with KS values. 
!* In case of SC update QP_BSt using the QPS file.
 call copy_bandstructure(KS_BSt,QP_BSt)

 allocate(qp_rhor(nfftf,Dtset%nspden))

 if (Dtset%gwcalctyp<10) then 
! === One-shot GW === 
! * Copy the KS density to qp_rhor.
  qp_rhor(:,:)=ks_rhor(:,:) 
 else
! === Self-consistent GW === 
! * Read the unitary matrix and the QP energies of the previous step from the QPS file.
  call energies_init(QP_energies) 
  QP_energies%e_corepsp=ecore/Cryst%ucvol

  allocate(prev_rhor(nfftf,Dtset%nspden)) ! Previous density for mixing, problem if G //
  prev_rhor(:,:)=ks_rhor(:,:) 

  allocate(m_lda_to_qp(Sp%nbnds,Sp%nbnds,Kmesh%nibz,Sp%nsppol))
  m_lda_to_qp(:,:,:,:)=czero
  do ib=1,Sp%nbnds
   m_lda_to_qp(ib,ib,:,:)=cone ! Initialize the QP amplitudes with KS
  end do

! * rdqps reads m_lda_to_qp and updates the energies in QP_BSt.
! TODO switch on the renormalization of n in sigma.

  call rdqps(QP_BSt,Dtfil%filqps,Dtset%usepaw,Dtset%nspden,1,nscf,Dtset%prtvol,&
&  nfftf,ngfftf,Cryst%ucvol,Dtset%paral_kgb,MPI_enreg_seq,nbsc,m_lda_to_qp,prev_rhor)

! === Report the unitary transformation on ab_out for automatic testing ===
! * Note the different tolerance used for ab_out and std_out aiming at 
! improving the portability of the automatic tests.
! * Only a subset of k-points is reported on ab_out.
  if (rank==master) then
   call show_QP(QP_BSt,m_lda_to_qp,fromb=Sp%minbdgw,tob=Sp%maxbdgw,unit=std_out,tolmat=0.001_dp)
!  if (prtvol>0) 
!  allocate(kmask(BSt%nkpt)) ; kmask=.FALSE. ; kmask(1:MIN(BSt%nkpt,2))=.TRUE.
!  kmask=.TRUE.
!  call show_QP(BSt,m_lda_to_qp,fromb=Sp%minbdgw,tob=Sp%maxbdgw,unit=ab_out,tolmat=0.01_dp)
!  deallocate(kmask)
!  end if
  end if

! === Compute QP wfg as linear combination of KS states ===
! * Wf_info%wfg is modified inside calc_wf_qp
! * For PAW, update also the on-site projections.
! * WARNING the first dimension of MPI_enreg MUST be Kmesh%nibz 
! TODO here we should use nbsc instead of nbnds

  call calc_wf_qp(MPI_enreg,Kmesh%nibz,Sp%nbnds,Wf_info%npwwfn,Sp%nsppol,Dtset%nspinor,&
&  m_lda_to_qp,my_minb,my_maxb,b1gw,b2gw,Wf_info%wfg)

! === Reinit the storage mode of Wf_info and Wf_info_braket as wfs have just changed ===
! * Update also the wavefunctions for GW corrections on each processor
  call reinit_Wfs(Wf_info)
  call reinit_Wfs(Wf_info_braket)
  call duplicate_Wfs(MPI_enreg,Wf_info,Wf_info_braket,Sp%kcalc,Kmesh)

  if (Dtset%usepaw==1) then
   call update_cprj(Cryst%natom,Kmesh%nibz,Sp%nbnds,Sp%nsppol,Dtset%nspinor,m_lda_to_qp,dimlmn,Cprj_ibz)
  end if
! 
! === Compute QP occupation numbers ===
  write(msg,'(3a)')ch10,' sigma : calculating QP occupation numbers ',ch10
  call wrtout(std_out,msg,'COLL')

  call update_occ(QP_BSt,Dtset%fixmom,prtvol=0)
  qp_vbik(:,:) = get_valence_idx(QP_BSt)
! 
! === Compute QP density using updated wfg ===
! * If gwpara==2 do the calculation in parallel
  call calc_density(Wf_info,ngfftf,nfftf,igfftf,Cryst,Kmesh,QP_BSt,MPI_enreg,.TRUE.,qp_rhor)

! === Additional allocations/calculations for SCGW+PAW ===
  if (Dtset%usepaw==1) then

!  * Calculate new rhoij_qp from updated Cprj_ibz, note use_rhoij_==1.
   allocate(QP_pawrhoij(Cryst%natom))
   nspden_rhoij=Dtset%nspden ; if (Dtset%pawspnorb>0.and.Dtset%nspinor==2) nspden_rhoij=4
   call rhoij_alloc(Dtset%pawcpxocc,nlmn,nspden_rhoij,Dtset%nsppol,QP_pawrhoij,Cryst%typat,use_rhoij_=1,use_rhoijres=1)
!  
!  Here cprj are unsorted, see ctocprj.F90
   mband_=Sp%nbnds ; mkmem_=Kmesh%nibz
   allocate(qp_occpack(mband_*Kmesh%nibz*Dtset%nsppol))
   call get_eneocc_vect(QP_BSt,'occ',qp_occpack)

!  * Make QP $ \rho_{ij} $.
   call pawmkrhoij(Cryst%atindx1,Cprj_ibz,dimlmn,KS_BSt%istwfk,Dtset%kptopt,mband_,mkmem_,MPI_enreg_seq,Cryst%natom,&
&   Cryst%nattyp,KS_BSt%nband,Kmesh%nibz,Dtset%nspinor,Dtset%nsppol,Cryst%ntypat,qp_occpack,&
&   Dtset%pawprtvol,QP_pawrhoij,Dtfil%unpaw,Kmesh%wt)
   deallocate(qp_occpack)

!  * Symmetrize QP $ \rho_{ij} $.
   choice=1 ; optrhoij=1 ; ipert=0
   call symrhoij(choice,Cryst%gprimd,Psps%indlmn,Cryst%indsym,ipert,Psps%lmnmax,Cryst%natom,Cryst%natom,Cryst%nsym,&
&   Cryst%ntypat,optrhoij,Pawang,Dtset%pawprtvol,QP_pawrhoij,Cryst%rprimd,Cryst%symafm,Cryst%symrec,Cryst%typat)

   do iat=1,Cryst%natom
    QP_pawrhoij(iat)%use_rhoij_=0
    deallocate(QP_pawrhoij(iat)%rhoij_)
   end do

!  === Make QP nhat ===
   allocate(qp_nhat(nfftf,Dtset%nspden)) ; qp_nhat(:,:)=zero
   nhatgrdim=0 ; if (Dtset%xclevel==2) nhatgrdim=usexcnhat
   cplex=1 ; ider=2*nhatgrdim ; idir=0 ; ipert=0 ; izero=0
   if (nhatgrdim>0) allocate(qp_nhatgr(nfftf,Dtset%nspden,3))
   k0(:)=zero

!  === Make QP nhat ===
   call pawmknhat(compch_fft,cplex,ider,idir,ipert,izero,Cryst%gprimd,MPI_enreg_seq,&
&   Cryst%natom,nfftf,ngfftf,nhatgrdim,Dtset%nspden,Cryst%ntypat,Dtset%paral_kgb,Pawang,&
&   Pawfgrtab,QP_nhatgr,QP_nhat,QP_Pawrhoij,QP_Pawrhoij,Pawtab,k0,Cryst%rprimd,Cryst%typat,Cryst%ucvol)

!  === Variables/arrays related to the PAW spheres for the QP Hamiltonian ===
!  TODO call init_paw_ij in scfcv and respfn, fix small issues
   cplex=1 ; cplex_dij=Dtset%nspinor ; has_dijso=Dtset%pawspnorb ; has_dijU=Dtset%usepawu
   allocate(QP_paw_ij(Cryst%natom))
   call nullify_paw_ij(QP_paw_ij)
   call init_paw_ij(QP_paw_ij,cplex,cplex_dij,Dtset%nspinor,Dtset%nsppol,&
&   Dtset%nspden,Dtset%pawspnorb,Cryst%natom,Cryst%ntypat,Cryst%typat,Pawtab,&
&   has_dijhartree=1,has_dijhat=1,has_dijxc=1,has_dijxc_val=1,has_dijso=has_dijso,has_dijU=has_dijU)

   allocate(QP_paw_an(Cryst%natom))
   call nullify_paw_an(QP_paw_an)
   nkxc1=0
   call init_paw_an(Cryst%natom,Cryst%ntypat,nkxc1,Dtset%nspden,cplex,Dtset%pawxcdev,&
&   Cryst%typat,Pawang,Pawtab,QP_paw_an,has_vxc=1,has_vxcval=1)
!  
!  === Evaluate on-site" energies, potentials, densities using QP density ===
!  * Initialize also "lmselect" (index of non-zero LM-moments of densities).
   call status(0,Dtfil%filstat,iexit,level,'call pawdenpot')

   nzlmopt=-1 ; option=0 ; compch_sph=greatest_real
   call pawdenpot(compch_sph,QP_energies%e_paw,QP_energies%e_pawdc,idir,ipert,Dtset%ixc,Cryst%natom,Dtset%nspden,&
&   Cryst%ntypat,nzlmopt,option,QP_paw_an,QP_paw_an,QP_paw_ij,Pawang,Dtset%pawprtvol,Pawrad,QP_pawrhoij,Dtset%pawspnorb,&
&   Pawtab,Dtset%pawxcdev,dtset%spnorbscl,Cryst%typat,Dtset%xclevel,Psps%znuclpsp)

!  === Re-symmetrize PAW Cprj_bz in the full BZ ===
!  TODO add rotation in spinor space
   call paw_symcprj(Pawtab,Cryst,Dtset%nspinor,Sp%nbnds,KS_BSt%nband,Dtset%nsppol,Psps,Kmesh,Cprj_ibz,Pawang,dimlmn,Cprj_bz)
  end if

! Allocate these arrays anyway, since they are passed to subroutines.
  if (.not.allocated(qp_nhat  )) allocate(qp_nhat  (nfftf,0))
  if (.not.allocated(qp_nhatgr)) allocate(qp_nhatgr(nfftf,Dtset%nspden,0))

! here I should renormalize the density
  call test_charge(nfftf,KS_BSt%nelect,Dtset%nspden,qp_rhor,Cryst%ucvol,qp_nhat,Dtset%usepaw,&
&  usexcnhat,Pawfgr%usefinegrid,compch_sph,compch_fft,drude_plsmf)

  if (Dtset%usepaw==1) qp_rhor(:,:)=qp_rhor(:,:)+qp_nhat(:,:)
  call prtrhomxmn(std_out,MPI_enreg,nfftf,ngfftf,Dtset%nspden,1,qp_rhor)
! 
! ==== Simple mixing of the densities to damp oscillations in the Hartree potential ===
! TODO Implement similar trick for PAW+GW, nhat is missing
  write(msg,'(2a,f5.3,a)')ch10,' sigma: mixing QP densities using rhoqpmix = ',Dtset%rhoqpmix,ch10
  call wrtout(std_out,msg,'COLL')
  qp_rhor=prev_rhor+Dtset%rhoqpmix*(qp_rhor-prev_rhor)
  deallocate(prev_rhor)

  allocate(qp_rhog(2,nfftf))
  call fourdp(1,qp_rhog,qp_rhor(:,1),-1,MPI_enreg,nfftf,ngfftf,Dtset%paral_kgb,tim_fourdp)

! === Output the QP density ===
  if (rank==master.and.Dtset%prtden/=0) then
   rdwr=2 ; fformr=52 ; rdwrpaw=0
   fname=TRIM(filapp)//'_QP_DEN'
   call ioarr(accessfil,qp_rhor,Dtset,dummy,fformr,fname,Hdr_sigma,MPI_enreg,nfftf,Pawrhoij_dum,rdwr,rdwrpaw,ngfftf)
   if (accessfil==3) then
!   Complete the geometry and electronic information with missing values from hdr_io().
    call abi_etsf_geo_put(Dtset,fname,Psps,Cryst%rprimd,Cryst%xred)
    call abi_etsf_electrons_put(Dtset,fname)
   end if
  end if

  call status(0,Dtfil%filstat,iexit,level,'call setvtr   ')
  nkxc=0
  if (Dtset%nspden==1) nkxc=2
  if (Dtset%nspden>=2) nkxc=3 !check GGA and spinor that is messy !!!
  allocate(kxc_qp(nfftf,nkxc))
! 
! **** NOTE THAT Vxc CONTAINS THE CORE-DENSITY CONTRIBUTION ****
! FIXME here xred is INOUT due to ionion_realSpace and xredcart, why?
  n3xccc=0 ; if (Psps%n1xccc/=0) n3xccc=nfftf
  allocate(qp_vhartr(nfftf),qp_vtrial(nfftf,Dtset%nspden),qp_vxc(nfftf,Dtset%nspden))

  optene=4 ; moved_atm_inside=0 ; moved_rhor=0 ; initialized=1 ; istep=1
  call setvtr(Cryst%atindx1,Dtset,QP_energies,gmet,gprimd,grewtn,gsqcutf_eff,initialized,istep,kxc_qp,mgfftf,&
&  moved_atm_inside,moved_rhor,MPI_enreg_seq,Cryst%nattyp,nfftf,ngfftf,qp_nhat,qp_nhatgr,nhatgrdim,nkxc,Cryst%ntypat,&
&  Psps%n1xccc,n3xccc,optene,Pawtab,ph1df,Psps,qp_rhog,qp_rhor,Cryst%rmet,Cryst%rprimd,strsxc,Cryst%ucvol,usexcnhat,&
&  qp_vhartr,vpsp,qp_vtrial,qp_vxc,vxcavg_qp,xccc3d,xred_dummy,Cryst%xred)

  deallocate(kxc_qp)

! === For PAW, compute QP Dij ===
  if (Dtset%usepaw==1) then
   call status(0,Dtfil%filstat,iexit,level,'call pawdij   ')

   cplex=1 ; ipert=0 ; idir=0
   call pawdij(cplex,Dtset,Dtset%enunit,idir,ipert,MPI_enreg_seq,Cryst%natom,nfftf,ngfftf,Dtset%nspden,Cryst%ntypat,&
&   QP_paw_an,QP_paw_ij,Pawang,Pawfgrtab,Dtset%pawprtvol,Pawrad,Dtset%pawspnorb,Pawtab,Dtset%pawxcdev,&
&   Cryst%typat,Cryst%ucvol,qp_vtrial,qp_vxc)

   call status(0,Dtfil%filstat,iexit,level,'call symdij   ')

   call symdij(Cryst%gprimd,Psps%indlmn,Cryst%indsym,ipert,Psps%lmnmax,Cryst%natom,Cryst%nsym,Cryst%ntypat,0,QP_paw_ij,Pawang,&
&   Dtset%pawprtvol,Cryst%rprimd,Cryst%symafm,Cryst%symrec,Cryst%typat)

!  * Output the PQ pseudopotential strengths Dij and the augmentation occupancies Rhoij.
   call pawprt(Dtset,Psps%indlmn,Psps%lmnmax,QP_paw_ij,KS_Pawrhoij,Pawtab)
  end if !PAW

! TODO this has to be checked
! call prtene(Dtset,QP_energies,std_out,Dtset%usepaw)
  ehartree=half*SUM(qp_rhor(:,1)*qp_vhartr(:))/DBLE(nfftf)*Cryst%ucvol

  write(msg,'(a,80a)')ch10,('-',ii=1,80)
  call wrtout(ab_out,msg,'COLL')
  write(msg,'(5a,f9.4,3a,es21.14,2a,es21.14)')ch10,&
&  ' QP results after the unitary transformation in the KS subspace: ',ch10,ch10,&
&  '  Number of electrons    = ',qp_rhog(1,1)*Cryst%ucvol,ch10,ch10,&
&  '  QP Band energy    [Ha] = ',get_bandenergy(QP_BSt),ch10,&
&  '  QP Hartree energy [Ha] = ',ehartree
  call wrtout(ab_out,msg,'COLL')
  write(msg,'(a,80a)')ch10,('-',ii=1,80)
  call wrtout(ab_out,msg,'COLL')

! TODO Since plasmonpole model 2-3-4 depend on the Fourier components of the density
! in case of self-consistency we might calculate here the ppm coefficients using qp_rhor
 end if ! gwcalctyp>=10
!
!=== Get KS hamiltonian hlda(b1,b1,k,s)= <b1,k,s|H_s|b1,k,s> ===
 allocate(hlda(b1gw:b2gw,b1gw:b2gw,Kmesh%nibz,Sp%nsppol*Sp%nsig_ab))
 hlda(:,:,:,:)=czero

 if (Dtset%nspinor==1) then
  do is=1,Sp%nsppol
   do ik=1,Kmesh%nibz
    do ib=b1gw,b2gw
     hlda(ib,ib,ik,is) = KS_BSt%eig(ib,ik,is)
    end do
   end do
  end do
 else
! === Spinorial case ===
! * Note that here vxc contains the contribution of the core.
! * Scale ovlp if orthonormalization is not satisfied as npwwfn might be < npwvec.
! Here we fill only the entries relative to IBZ, because Wf_info_braket is needed
! TODO add spin-orbit case
  do is=1,Sp%nsppol
   do ikcalc=1,Sp%nkcalc
    ikibz=Kmesh%tab(Sp%kcalc(ikcalc)) ! Irred k-point for GW
    do ib=b1gw,b2gw
     cgup  => Wf_info_braket%wfg(1:Wf_info_braket%npwwfn,ib,ikcalc,is)
     cgdwn => Wf_info_braket%wfg(Wf_info_braket%npwwfn+1:2*Wf_info_braket%npwwfn,ib,ikcalc,is)
     shift=Sp%nspinor*Sp%nbnds*(ikibz-1)
     idx_up =shift+(2*ib-1) ; idx_dwn=idx_up+1
     ovlp(1) = overlap_cmplx(cgup ,cgup ,Dtset%usepaw,Cprj_ibz(:,idx_up ),Cprj_ibz(:,idx_up ),Cryst%typat,Pawtab)
     ovlp(2) = overlap_cmplx(cgdwn,cgdwn,Dtset%usepaw,Cprj_ibz(:,idx_dwn),Cprj_ibz(:,idx_dwn),Cryst%typat,Pawtab)
!    write(*,*)ovlp(1),ovlp(2)
     norm=REAL(ovlp(1)+ovlp(2))
     ovlp(1)=REAL(ovlp(1)/norm)
     ovlp(2)=REAL(ovlp(2)/norm)
!    ovlp(2)=cone-ovlp(1)
     hlda(ib,ib,ikibz,1) = KS_BSt%eig(ib,ikibz,1)*ovlp(1)-ks_vxcme(ib,ib,ikibz,3)
     hlda(ib,ib,ikibz,2) = KS_BSt%eig(ib,ikibz,1)*ovlp(2)-ks_vxcme(ib,ib,ikibz,4)
     hlda(ib,ib,ikibz,3) = ks_vxcme(ib,ib,ikibz,3)
     hlda(ib,ib,ikibz,4) = ks_vxcme(ib,ib,ikibz,4)
    end do
   end do
  end do
 end if
!
!=== Initialize Sigma results ===
!TODO it is better if we use ragged arrays indexed by the k-point
 call init_sigma_results(Sp,Kmesh%nibz,Dtset%usepawu,Sr)
!
!=== Setup of the bare Hamiltonian := T + v_{loc} + v_{nl} + v_H ===
!* The representation depends wheter we are updating the wfs or not.
!* ks_vUme is zero unless we are using LDA+U as starting point, see calc_vHxc_braket
!* Note that vH matrix elements are calculated using the true uncutted interaction.

 if (Dtset%gwcalctyp<10) then
! * For one-shot GW use the KS representation.
  Sr%hhartree(:,:,:,:)=hlda(:,:,:,:)-ks_vxcvalme(:,:,:,:)-ks_vUme(:,:,:,:)
 else
! === Self-consistent on energies and|or wavefunctions ===
! * For NC get the bare Hamiltonian  $H_{bare}= T+v_{loc}+ v_{nl}$ in the KS representation
! * For PAW, calculate the matrix elements of h0, store also the new Dij in QP_Paw_ij.
! * h0 is defined as T+vH[tn+nhat+tnZc] + vxc[tnc] + dij_eff and
! dij_eff = dij^0 + dij^hartree + dij^xc-dij^xc_val + dijhat - dijhat_val.
! In the above expression tn, tnhat are QP quantities.

  if (Dtset%usepaw==0) then
   allocate(hbare(b1gw:b2gw,b1gw:b2gw,Kmesh%nibz,Sp%nsppol*Sp%nsig_ab))
   hbare(:,:,:,:)=hlda(:,:,:,:)-ks_vhme(:,:,:,:)-ks_vxcvalme(:,:,:,:)

!  * Change basis from KS to QP, hbare is overwritten: A_{QP} = U^\dagger A_{KS} U
   allocate(htmp(b1gw:b2gw,b1gw:b2gw,Kmesh%nibz,Sp%nsppol*Sp%nsig_ab))
   allocate(ctmp(b1gw:b2gw,b1gw:b2gw),uks2qp(b1gw:b2gw,b1gw:b2gw))
   htmp(:,:,:,:)=hbare(:,:,:,:) ; hbare(:,:,:,:)=czero

   do is=1,Sp%nsppol
    do ik=1,Kmesh%nibz
     uks2qp(:,:) = m_lda_to_qp(b1gw:b2gw,b1gw:b2gw,ik,is)
     do iab=1,Sp%nsig_ab
      is_idx=is ; if (Sp%nsig_ab>1) is_idx=iab
      ctmp(:,:)=MATMUL(htmp(:,:,ik,is_idx),uks2qp(:,:))
      hbare(:,:,ik,is_idx)=MATMUL(TRANSPOSE(CONJG(uks2qp(:,:))),ctmp(:,:))
     end do
    end do
   end do
   deallocate(htmp,ctmp,uks2qp)
  end if ! usepaw==0

! === Calculate the matrix elements===
! * This part is parallelized within MPI_COMM_WORD since each node has all GW wavefunctions.
! * In case of PAW construct new bare Hamiltonian.
  write(msg,'(2a)')ch10,' *************** QP Energies *******************'
  call wrtout(std_out,msg,'COLL')

  make_newh0=0 ; if (Dtset%usepaw==1) make_newh0=1
  allocate(qp_vxcme   (b1gw:b2gw,b1gw:b2gw,Kmesh%nibz,Sp%nsppol*Sp%nsig_ab))
  allocate(qp_vxcvalme(b1gw:b2gw,b1gw:b2gw,Kmesh%nibz,Sp%nsppol*Sp%nsig_ab))
  allocate(qp_vUme    (b1gw:b2gw,b1gw:b2gw,Kmesh%nibz,Sp%nsppol*Sp%nsig_ab))
  allocate(qp_vhme    (b1gw:b2gw,b1gw:b2gw,Kmesh%nibz,Sp%nsppol*Sp%nsig_ab))
  allocate(h0_me      (b1gw:b2gw,b1gw:b2gw,Kmesh%nibz,Sp%nsppol*Sp%nsig_ab*make_newh0))
  qp_vhme=ks_vhme ! Actually this is not needed.

  call calc_vHxc_braket(Dtset,Sp,Kmesh,make_newh0,b1gw,b2gw,gsqcutf_eff,nfftf_tot,nfftf,ngfftf,igfftf,Gsph_Max%gvec,&
&  Wf_info_braket,qp_vtrial,qp_vhartr,qp_vxc,Psps,Cprj_ibz,Pawtab,QP_paw_an,Pawang,Pawfgrtab,Pawrad,QP_paw_ij,&
&  MPI_enreg,Cryst,qp_rhor,qp_rhog,usexcnhat,qp_nhat,qp_nhatgr,nhatgrdim,qp_vhme,qp_vxcvalme,qp_vxcme,qp_vUme,h0_me)

  if (Dtset%usepaw==0) then
   Sr%hhartree(:,:,:,:)=hbare(:,:,:,:)+qp_vhme(:,:,:,:)
  else
   Sr%hhartree(:,:,:,:)=h0_me(:,:,:,:)
  end if

  deallocate(qp_nhat,STAT=istat)
  deallocate(h0_me,  STAT=istat)
  deallocate(qp_rhog,qp_vhartr,qp_vxc)
  deallocate(qp_vxcme,qp_vUme,qp_vhme)
 end if ! gwcalctyp<10
!
!=== Free some memory ===
 deallocate(igfftf)
 if (allocated(ks_vhme)) deallocate(ks_vhme)
 if (allocated(hbare  )) deallocate(hbare )
 if (allocated(hlda   )) deallocate(hlda  )
!
!=== Prepare the storage of QP amplitudes and energies ===
!* Initialize with KS wavefunctions and energies.
 Sr%eigvec_qp(:,:,:,:)=czero
 Sr%en_qp_diago(:,:,:)=zero
 do ib=1,Sp%nbnds
  Sr%en_qp_diago(ib,:,:)=KS_BSt%eig(ib,:,:)
  Sr%eigvec_qp(ib,ib,:,:)=cone
 end do
!
!=== Store <n,k,s|V_xc[n_val]|n,k,s> and <n,k,s|V_U|n,k,s> ===
!* Note that we store the matrix elements of V_xc in the KS basis set, not in the QP basis set
!* Matrix elements of V_U are zero unless we are using LDA+U as starting point
 do ib=b1gw,b2gw
  Sr%vxcme(ib,:,:)=ks_vxcvalme(ib,ib,:,:)
  Sr%vUme (ib,:,:)=ks_vUme    (ib,ib,:,:)
 end do
 deallocate(ks_vxcme,ks_vxcvalme,ks_vUme)
!
!=== Initial guess for the GW energies ===
!* Save also energies of the previous iteration.
 do is=1,Sp%nsppol
  do ik=1,Kmesh%nibz
   do ib=1,Sp%nbnds
    Sr%e0 (ib,ik,is)=QP_BSt%eig(ib,ik,is)
    Sr%egw(ib,ik,is)=QP_BSt%eig(ib,ik,is)
   end do
   Sr%e0gap(ik,is)=zero
   ks_iv=ks_vbik(ik,is)
   if (Sp%nbnds>=ks_iv+1) Sr%e0gap(ik,is)=Sr%e0(ks_iv+1,ik,is)-Sr%e0(ks_iv,ik,is)
  end do
 end do
!
!=== If required apply a scissor operator or update the energies ===
!TODO check if other Sr entries have to be updated
!moreover this part should be done only in case of semiconductors
!FIXME To me it makes more sense if we apply the scissor to KS_BS but I have to RECHECK csigme
 if (Sp%soenergy>0.1d-4) then
  write(msg,'(6a,f10.5,a)')ch10,&
&  ' sigma : performing a first self-consistency',ch10,&
&  '  update of the energies in G by a scissor operator',ch10, &
&  '  applying a scissor operator of [eV] ',Sp%soenergy*Ha_eV,ch10
  call wrtout(std_out,msg,'COLL') ; call wrtout(ab_out,msg,'COLL')
  do is=1,Sp%nsppol
   do ik=1,Kmesh%nibz
    ks_iv=ks_vbik(ik,is)
    if (Sp%nbnds>=ks_iv+1) then
     Sr%egw    (ks_iv+1:Sp%nbnds,ik,is) = Sr%egw    (ks_iv+1:Sp%nbnds,ik,is)+Sp%soenergy
     QP_BSt%eig(ks_iv+1:Sp%nbnds,ik,is) = QP_BSt%eig(ks_iv+1:Sp%nbnds,ik,is)+Sp%soenergy
    end if
   end do
  end do
! £call apply_scissor(QP_BSt,Ep%soenergy)
 else if (.FALSE.) then
  write(msg,'(4a)')ch10,&
&  ' sigma : performing a first self-consistency',ch10,&
&  '  update of the energies in G by a previous GW calculation'
  call wrtout(std_out,msg,'COLL') ; call wrtout(ab_out,msg,'COLL')
! TODO Recheck this part, is not clear to me!
  allocate(sr_gwenergy(Sp%nbnds,Kmesh%nibz,Sp%nsppol))
  call rdgw(QP_BSt,'__in.gw__',sr_gwenergy)
  Sr%egw=sr_gwenergy
  deallocate(sr_gwenergy)
! * Recalculate the new fermi level
  call update_occ(QP_BSt,Dtset%fixmom,prtvol=0)
 end if
!
!In case of AC refer all the energies wrt to the fermi level
!Take care because results from ppmodel cannot be used for AC
!FIXME check ks_energy or qp_energy (in case of SCGW?)

 if (mod10==1) then
! All these quantities will be passed to csigme
! if I skipped the self-consistent part then here I have to use fermi
  QP_BSt%eig = QP_BSt%eig -QP_BSt%fermie
  Sr%egw = Sr%egw-QP_BSt%fermie
  Sr%e0  = Sr%e0 -QP_BSt%fermie
  oldefermi=QP_BSt%fermie
! TODO Recheck fermi
! Clean EVERYTHING in particulare the treatment of E fermi
  QP_BSt%fermie=zero
 end if
 call timab(408,2,tsec) ! rdqps

!=== Get epsilon^{-1} either from the _SCR or the _SUSC file and store it in Er%epsm1 ===
!* If Er%mqmem==0, allocate and read a single q-slice inside csigme.
!TODO Er%nomega should be initialized so that only the frequencies really needed are stored in memory
!TODO TDDFT not yet operative
!TODO remove Dtfil is only used to write EELS and abs spectrum on file
 call timab(409,1,tsec) ! getW
 id_required=4 ; ikxc=0 ; approx_type=0 ; option_test=0 ; fname_dump=TRIM(Dtfil%filnam_ds(4))//'_SCR'
 dim_kxcg=0
 allocate(kxcg(Er%npwe,Er%npwe*dim_kxcg))
 call mkdump_Er(Er,Vcp,dim_kxcg,kxcg,Dtfil,id_required,approx_type,ikxc,option_test,&
& fname_dump,Dtset%accesswff,Dtset%localrdwf,MPI_enreg)
 deallocate(kxcg)

#if 1
!Added by Rshaltaf for the vertex correction inclusion
!MG FIXME this subroutine requires too much memory, is obsolete and should be replaced by Er% methods
!It does not work amymore since now I use the agmented FFT mesh for ks_rhor 
 if (Dtset%gwgamma==1) then
  call eps1_tc(Dtset,MPI_enreg,ngfft_gw,nfftgw_tot,ks_rhor,Cryst%rprimd,Sp%npwc,gmet,gprimd,Gsph_Max%gvec,Er%nomega,&
&  Er%epsm1,Qmesh%nibz,Cryst%ucvol,Qmesh%ibz,Er%omega,Dtfil,Hdr_kss,Er%Hscr%npwwfn_used,Sp%npwvec,Er%Hscr%nbnds_used)
 end if
#endif

!=== Calculate plasmonpole model parameters ===
!TODO In case of PAW ks_rhor contains only tn + nhat, PPmodels requiring n(G) not tested
!TODO Maybe its better if we use mqmem as input variable
!TODO should pass qp_rhor but drude_plsmf of KS density!
 if (Dtset%usepaw==1.and.(Sp%ppmodel/=1.and.Sp%ppmodel/=0)) STOP 'PAW + this ppmodel not tested'

 call init_PPmodel(PPm,Er,Sp%ppmodel,drude_plsmf,mod10)

 if (Er%mqmem/=0) then
  call setup_ppmodel(PPm,Dtset%paral_kgb,Qmesh,Er,MPI_enreg,nfftf,Gsph_Max%gvec,ngfftf,gmet,gprimd,ks_rhor(:,1))
 end if
 call timab(409,2,tsec) ! getW
!
!=== Write generic info on ab_out and open files to store final results ===
 if (rank==master) then
  call ReportGap(KS_BSt,header='KS Band Gaps',unit=ab_out)
  call write_sigma_results_header(Sp,Er,Cryst,Kmesh,Qmesh)
  fname=TRIM(Dtfil%filnam_ds(4))//'_GW'  ; open(unt_gw,file=fname,status='unknown',form='formatted')
  write(unt_gw,*)Sp%nkcalc,Sp%nsppol
  fname=TRIM(Dtfil%filnam_ds(4))//'_SIG' ; open(unt_sig,file=fname,status='unknown',form='formatted')
  fname=TRIM(Dtfil%filnam_ds(4))//'_SGR' ; open(unt_sgr,file=fname,status='unknown',form='formatted')
  if (mod10==1) then ! Sigma along imag. axis
   fname=TRIM(Dtfil%filnam_ds(4))//'_SGM' ; open(unt_sgm,file=fname,status='unknown',form='formatted')
  end if
 end if
!
!=================================================================
!=== Calculate self-energy and output results for each k-point ===
!=================================================================
!* Here it would be possible to calculate the QP correction for the same k-point using a PPmodel
!in the first iteration just to improve the initial guess and CD or AC in the second step. Useful
!if the KS level is far from the expected QP results. Previously it was possible to do such
!calculation by simply listing the same k-point twice in kptgw. Now this trick is not allowed anymore.
!Everything, indeed, should be done in a clean and transparent way inside csigme.
!
 call timab(420,1,tsec) ! sigma(csigme)
 call print_Wfs(Wf_info_braket,mode_paral='PERS')
 call print_Wfs(Wf_info,mode_paral='PERS')

 do ikcalc=1,Sp%nkcalc

  write(msg,'(2a,i5)')ch10,' Calculating GW corrections for k-point number : ',ikcalc
  call wrtout(std_out,msg,'COLL')

  ikibz=Kmesh%tab(Sp%kcalc(ikcalc)) ! Index of the irred k-point for GW

  if (MPI_enreg%gwpara==1) then
!  === Parallelization over k-points, redefine the distribution of k-points ===
!  * NOTE that Kmesh%nbz is used as first dimension of proc_distrb. This implies that
!  proc_distrb *MUST* be redefined if it is passed to routines employing the IBZ indexing.
   deallocate(MPI_enreg%proc_distrb)
   allocate(MPI_enreg%proc_distrb(Kmesh%nbz,Sp%nbnds,Sp%nsppol))
!  If nprocs>ntasks, proc_distrb==-999 for rank>ntasks-1 and no harm should be done
   MPI_enreg%proc_distrb=-999
   allocate(istart(nprocs),istop(nprocs))
   if (Sp%symsigma==0) then
!   * No symmetries, divide the full BZ among procs
    ntasks=Kmesh%nbz
    call split_work2(ntasks,nprocs,istart,istop)
    do irank=0,nprocs-1
     if (istart(irank+1)==ntasks+1) CYCLE
     ii=istart(irank+1) ; jj=istop(irank+1)
     MPI_enreg%proc_distrb(ii:jj,:,:)=irank
    end do
   else if (Sp%symsigma/=0) then
!   * Divide the IBZ_q among procs. Distrb might be not so efficient for particular qs.
!   Here proc_distrb is -999 for all the k-points not in the IBZ_q.
    ntasks=SUM(Ltg_k(ikcalc)%ibzq(:))
    call split_work2(ntasks,nprocs,istart,istop)
!   Identify q and G0 where q+G0=k_j-k_i
    do irank=0,nprocs-1
     do ik=1,Kmesh%nbz
      kgwmk(:)= Sp%xkcalc(:,ikcalc)-Kmesh%bz(:,ik) ! Warn xkcalc must be inside the BZ
      do iq=istart(irank+1),istop(irank+1)
       call findqg0(iqm,g0,kgwmk,Qmesh%nbz,Qmesh%bz,Sp%mG0)
       if (Ltg_k(ikcalc)%bz2ibz(iqm)==iq) MPI_enreg%proc_distrb(ik,:,:)=irank
      end do
     end do
    end do
   end if
   deallocate(istart,istop)
!  === Announce the treatment of k-points by each proc ===
   do ik=1,Kmesh%nbz
    do is=1,Sp%nsppol
     if (MPI_enreg%proc_distrb(ik,Sp%nbnds,is)==rank) then
      write(msg,'(3(a,i4))')'P sigma : treating k-point ',ik,' and spin ',is,' by node ',rank
      call wrtout(std_out,msg,'PERS')
     end if
    end do
   end do
  end if ! gwpara==1
! 
! === Call csigme to calculate matrix elements of Sigma ===
  call status(ikcalc,Dtfil%filstat,iexit,level,'call csigme   ')

#if 0
! === Define the distribution of the tasks inside cisgme according to gwpara and symsigma ===
! * For the moment spaceComm_q==MPI_COMM_WORLD but, it would be possible to create pools of processors for each kgw
! The only thing that has to be solved is the transmission&output of the results.
  allocate(gw_distrb(Kmesh%nbz,Sp%nbnds,Sp%nsppol))
  use_symmetries=(Sp%symsigma/=0) ; spaceComm_kgw=MPI_enreg%world_comm

  call sigma_gwdistrb(MPI_enreg%gwpara,spaceComm_kgw,Kmesh%nbz,Sp%nbnds,Sp%nsppol,use_symmetries,Sp%mG0,&
&  Kmesh,Qmesh,Ltg_k(ikcalc),MPI_enreg,my_minb,my_maxb,gw_distrb)

  deallocate(gw_distrb)
#endif

  call csigme(ikcalc,rhoxsp_method,dim1_rhox,dim2_rhox,Dtfil,Dtset,Cryst,QP_BSt,Sp,Sr,Er,Gsph_Max,Vcp,Kmesh,Qmesh,&
&  Ltg_k(ikcalc),PPm,Pawang,Pawtab,Psps,Cprj_bz,Wf_info,Wf_info_braket,MPI_enreg,pawrhox_spl,Gsph_Max%gvec,ktabr,&
&  ngfft_gw,igfft,nfftgw_tot,my_minb,my_maxb,ngfftf,nfftf,ks_rhor)
! 
! === Calculate direct gap for each spin and print out final results ===
! * We use the valence index of the KS system because we still do not know
! all the QP corrections. Ideally one should use the QP valence index
  do is=1,Sp%nsppol
   if ( Sp%maxbnd(ikcalc) >= ks_vbik(ikibz,is)+1 .and. &
&   Sp%minbnd(ikcalc) <= ks_vbik(ikibz,is)  ) then
    ks_iv=ks_vbik(ikibz,is)
    Sr%egwgap (ikibz,is)=  Sr%egw(ks_iv+1,ikibz,is) -  Sr%egw(ks_iv,ikibz,is)
    Sr%degwgap(ikibz,is)= Sr%degw(ks_iv+1,ikibz,is) - Sr%degw(ks_iv,ikibz,is)
   else
!   The "gap" cannot be computed
    Sr%e0gap  (ikibz,is)=zero
    Sr%egwgap (ikibz,is)=zero
    Sr%degwgap(ikibz,is)=zero
   end if
  end do

  if (rank==master) then
   call write_sigma_results(ikcalc,ikibz,Sp,Sr,Kmesh,KS_BSt)
  end if
 end do !ikcalc

 call timab(420,2,tsec) ! sigma(csigme)
!
!=== Update the energies in QP_BSt ===
!* If QPSCGW, use diagonalized eigenvalues otherwise perturbative results.
 if (Sp%gwcalctyp>=10) then
  do ib=1,Sp%nbnds
   QP_BSt%eig(ib,:,:)=Sr%en_qp_diago(ib,:,:)
  end do
 else
  QP_BSt%eig(:,:,:)=Sr%egw(:,:,:)
 end if

!=== This part only if all k-points in IBZ are calculated ===
 if (Sp%nkcalc==Kmesh%nibz) then
! * Recalculate new occupations and Fermi level.
  call update_occ(QP_BSt,Dtset%fixmom,prtvol=0)
  qp_vbik(:,:) = get_valence_idx(QP_BSt)
  write(msg,'(2a,3x,2(es16.6,a))')ch10,' New Fermi energy : ',QP_BSt%fermie,' Ha ,',QP_BSt%fermie*Ha_eV,' eV'
  call wrtout(std_out,msg,'COLL')
  call wrtout(ab_out,msg,'COLL')
! 
! === If all k-points and all occupied bands are calculated, output EXX ===
! FIXME here be careful about the check on  ks_vbik in case of metals
! if (rank==master.and.Sp%nkcalc==Kmesh%nibz.and.ALL(Sp%minbnd(:)==1).and.ALL(Sp%maxbnd(:)>=MAXVAL(nbv(:)))) then
  if (ALL(Sp%minbnd(:)==1).and. ALL(Sp%maxbnd(:)>=MAXVAL(MAXVAL(ks_vbik(:,:),DIM=1))) ) then
   exchange_energy=zero
   do is=1,Sp%nsppol
    do ik=1,Kmesh%nibz
     do ib=b1gw,b2gw
      if (Sp%nsig_ab==1) then
       exchange_energy = exchange_energy + half*QP_BSt%occ(ib,ik,is)*Kmesh%wt(ik)*Sr%sigxme(ib,ik,is)
      else
       exchange_energy = exchange_energy + half*QP_BSt%occ(ib,ik,is)*Kmesh%wt(ik)*SUM(Sr%sigxme(ib,ik,:))
      end if
     end do
    end do
   end do
   write(msg,'(a,2(es16.6,a))')' New Exchange energy : ',exchange_energy,' Ha ,',exchange_energy*Ha_eV,' eV'
   call wrtout(std_out,msg,'COLL')
   call wrtout(ab_out,msg,'COLL')
  end if

! * Report the QP gaps (Fundamental and Optical)
  call ReportGap(QP_BSt,header='QP Band Gaps',unit=ab_out)

! * Calculate QP DOS 
! TODO: get_dos is not parallelized, yet!
! add piece of code to calculate tetra in get_dos if not associated
  if ((Dtset%prtdos==1.or.Dtset%prtdos==2).and.rank==master) then
   fildos=TRIM(filapp)//'_QP_DOS'
!  £call get_dos(QP_BSt,Kmesh,Dtset%prtdos,fildos,Dtset%tsmear,Dtset%dosdeltae)
  end if

 end if  ! Sp%nkcalc==Kmesh%nibz

!=== Write SCF data in case of self-consistent calculation === 
!* Save Sr%en_qp_diago, Sr%eigvec_qp and m_lda_to_qp in the _QPS file.
!* Note that in the first iteration qp_rhor contains KS rhor, then the mixed rhor.
 if (rank==master.and.Sp%gwcalctyp>=10) then

  fname=TRIM(Dtfil%filnam_ds(4))//'_QPS'
  call wrqps(fname,Sp,Kmesh,Dtset%nspden,nscf,nfftf,ngfftf,Sr,m_lda_to_qp,qp_rhor)

! === Report the MAX variation for each kptgw and spin ===
  write(msg,'(2a)')ch10,' Convergence of QP corrections '
  call wrtout(ab_out,msg,'COLL')
  do isppol=1,Sp%nsppol
   write(msg,'(a,i2,a)')' >>>>> For spin ',isppol,' <<<<< '
   call wrtout(ab_out,msg,'COLL')
   do ikcalc=1,Sp%nkcalc
    ib1   = Sp%minbnd(ikcalc)
    ib2   = Sp%maxbnd(ikcalc)
    ikbz  = Sp%kcalc(ikcalc)
    ikibz = Kmesh%tab(ikbz)
    ii      = imax_loc( ABS(Sr%degw(ib1:ib2,ikibz,isppol)) )
    max_degw = Sr%degw(ii,ikibz,isppol)
    write(msg,('(a,i3,a,2f8.3,a,i3)'))&
&    '   kptgw no:',ikcalc,'; Maximum DeltaE = (',max_degw*Ha_eV,') for band index:',ii
    call wrtout(ab_out,msg,'COLL')
   end do
  end do

 end if

!=== Dump GW results to a NETCDF file ===
!WARNING complex real GW corrections is not activated in abi_etsf_init
!Here solve problem with v5/t63
!£if (rank==master.and.Dtset%accesswff==3) then
 if (rank==master.and.Dtset%useria==2.) then
  fname=TRIM(Dtfil%filnam_ds(4))//'_QP_EIG'
  call abi_etsf_init(Dtset,fname,4,.FALSE.,Hdr_sigma%lmn_size,Psps,Dummy_wvl)
  call etsf_dump_QP(Sr,QP_BSt,KS_BSt,Hdr_sigma,Cryst,Kmesh,fname)
 end if
!
!----------------------------- END OF THE CALCULATION ------------------------
!=== Close Files === 
 if (rank==master) then
  close(unt_gw )
  close(unt_sig)
  close(unt_sgr)
  if (mod10==1) close(unt_sgm)
 end if

 call status(0,Dtfil%filstat,iexit,level,'deallocate    ')

 deallocate(igfft,ktabr)
 deallocate(ks_vbik,qp_vbik,ph1d,ph1df)
 deallocate(qp_rhor,ks_rhor,ks_rhog)
 deallocate(ks_vhartr,ks_vtrial,vpsp,ks_vxc)
 deallocate(kxc,xccc3d,grewtn,xred_dummy)

 if (allocated(m_lda_to_qp)) deallocate(m_lda_to_qp)
 if (allocated(qp_vtrial  )) deallocate(qp_vtrial)

 if (associated(MPI_enreg%proc_distrb)) deallocate(MPI_enreg%proc_distrb)
!
!=== Destroy the dinamic arrays in the local data structures ===
!* Optional deallocation for PAW.
 deallocate(Pawfgr%fintocoa,Pawfgr%coatofin,STAT=istat)
 deallocate(ks_nhat,    STAT=istat)
 deallocate(ks_nhatgr,  STAT=istat)
 deallocate(qp_nhatgr,  STAT=istat)
 deallocate(pawrhox_spl,STAT=istat)

 if (Dtset%usepaw==1) then
  deallocate(dimlmn,nlmn)
  call cprj_free(Cprj_ibz)       ; deallocate(Cprj_ibz)
  call cprj_free(Cprj_bz )       ; deallocate(Cprj_bz )
  call rhoij_free(KS_Pawrhoij)   ; deallocate(KS_Pawrhoij)
  call pawfgrtab_free(Pawfgrtab) ; deallocate(Pawfgrtab)
  call destroy_paw_ij(KS_paw_ij) ; deallocate(KS_paw_ij)
  call destroy_paw_an(KS_paw_an) ; deallocate(KS_paw_an)
  if (Dtset%gwcalctyp>=10) then
   call rhoij_free(QP_pawrhoij)   ; deallocate(QP_pawrhoij)
   call destroy_paw_ij(QP_paw_ij) ; deallocate(QP_paw_ij)
   call destroy_paw_an(QP_paw_an) ; deallocate(QP_paw_an)
  end if
 end if
!
!* Destroy objects.
 call destroy_Wfs(Wf_info)
 call destroy_Wfs(Wf_info_braket)
 do ikcalc=1,Sp%nkcalc
  call destroy_little_group(Ltg_k(ikcalc))
 end do
 deallocate(Ltg_k)
 call destroy_BZ_mesh_type(Kmesh)
 call destroy_BZ_mesh_type(Qmesh)
 call destroy_Gvectors(Gsph_Max)
 call destroy_Coulombian(Vcp)
 call DestroyCrystal(Cryst)
 call destroy_Sigma_results(Sr)
 call destroy_Sigma_parameters(Sp)
 call destroy_Epsilonm1_results(Er)
 call destroy_PPmodel(PPm)
 call hdr_clean(Hdr_sigma)
 call hdr_clean(Hdr_kss)
 call bstruct_clean(KS_BSt)
 call bstruct_clean(QP_BSt)

 call timab(401,2,tsec)
 call status(0,Dtfil%filstat,iexit,level,'exit          ')

#endif
!Workaround for buggy PGI6

 DBG_EXIT('COLL')

end subroutine sigma
!!***
