!{\src2tex{textfont=tt}}
!!****f* ABINIT/pawmkrhoij
!!
!! NAME
!! pawmkrhoij
!!
!! FUNCTION
!! Calculate the PAW quantities rhoij (augmentation occupancies)
!! Remember:for each atom, rho_ij=Sum_{n,k} {occ(n,k)*<Cnk|p_i><p_j|Cnk>}
!!
!! COPYRIGHT
!! Copyright (C) 1998-2010 ABINIT group (FJ, MT)
!! This file is distributed under the terms of the
!! GNU General Public License, see ~abinit/COPYING
!! or http://www.gnu.org/copyleft/gpl.txt .
!! For the initials of contributors, see ~abinit/doc/developers/contributors.txt.
!!
!! INPUTS
!!  atindx1(natom)=index table for atoms, inverse of atindx
!!  cprj(natom,nspinor*mband*mkmem*nsppol)= wave functions projected with non-local projectors:
!!                                   cprj_nk(i)=<p_i|Cnk> where p_i is a non-local projector.
!!  dimcprj(natom)=array of dimensions of array cprj (ordered by atom-type)
!!  istwfk(nkpt)=parameter that describes the storage of wfs
!!  kptopt=option for the generation of k points
!!  mband=maximum number of bands
!!  mband_cprj=maximum number of bands used in the dimensionning of cprj array (usually mband/nproc_band)
!!  mkmem =number of k points which can fit in memory; set to 0 if use disk
!!  mpi_enreg=informations about MPI parallelization
!!  natom=number of atoms in cell
!!  nband=number of bands for all k points
!!  nkpt=number of k points
!!  nspinor=number of spinorial components of the wavefunctions
!!  nsppol=1 for unpolarized, 2 for spin-polarized
!!  occ(mband*nkpt*nsppol)=occupation number for each band for each k
!!  paral_kgb=Flag related to the kpoint-band-fft parallelism
!!  paw_dmft  <type(paw_dmft_type)>= paw+dmft related data
!!  pawprtvol=control print volume and debugging output for PAW
!!  unpaw=unit number for cprj PAW data (if used)
!!  wtk(nkpt)=weight assigned to each k point
!!
!! SIDE EFFECTS
!!  pawrhoij(natom) <type(pawrhoij_type)>= paw rhoij occupancies and related data
!!  On input: arrays dimensions
!!  On output:
!!    pawrhoij(:)%rhoij_(lmn2_size,nspden)=
!!          Sum_{n,k} {occ(n,k)*conjugate[cprj_nk(ii)].cprj_nk(jj)} (non symetrized)
!!
!! PARENTS
!!      vtorho
!!
!! CHILDREN
!!      cprj_alloc,cprj_diskinit_r,cprj_free,cprj_get,leave_test,pawaccrhoij
!!      print_ij,timab,wrtout,xcomm_init,xme_init,xsum_mpi
!!
!! NOTES
!!  The cprj are distributed over band processors.
!!  Only the mod((iband-1)/mpi_enreg%bandpp,mpi_enreg%nproc_band) projectors
!!  are stored on each proc.
!!
!! SOURCE

#if defined HAVE_CONFIG_H
#include "config.h"
#endif

#include "abi_common.h"

 subroutine pawmkrhoij(atindx1,cprj,dimcprj,istwfk,kptopt,mband,mband_cprj,mkmem,mpi_enreg,&
&                      natom,nband,nkpt,nspinor,nsppol,occ,paral_kgb,paw_dmft,pawprtvol,pawrhoij,unpaw,wtk)

 use defs_basis
 use defs_datatypes
 use defs_abitypes
 use m_errors
 use m_xmpi

 use m_paw_dmft, only: paw_dmft_type

!This section has been created automatically by the script Abilint (TD).
!Do not modify the following lines by hand.
 use interfaces_14_hidewrite
 use interfaces_18_timing
 use interfaces_32_util
 use interfaces_51_manage_mpi
 use interfaces_53_abiutil
 use interfaces_66_paw, except_this_one => pawmkrhoij
!End of the abilint section

 implicit none

!Arguments ---------------------------------------------
!scalars
 integer,intent(in) :: kptopt,mband,mband_cprj,mkmem,natom,nkpt,nspinor,nsppol
 integer,intent(in) :: paral_kgb,pawprtvol,unpaw
 type(MPI_type),intent(inout) :: mpi_enreg
!arrays
 integer,intent(in) :: atindx1(natom),dimcprj(natom),istwfk(nkpt),nband(nkpt*nsppol)
 real(dp),intent(in) :: occ(mband*nkpt*nsppol),wtk(nkpt)
 type(cprj_type),intent(in) :: cprj(natom,nspinor*mband_cprj*mkmem*nsppol)
 type(paw_dmft_type), intent(in) :: paw_dmft
 type(pawrhoij_type),intent(inout) :: pawrhoij(natom)

!Local variables ---------------------------------------
!scalars
 integer :: bdtot_index,bufdim,cplex,iatom,ib,ib1,iband,iband1,ibc1,ibg,ib_this_proc,ierr,ikpt
 integer :: iorder_cprj,isppol,jdim,me,natinc,nband_k,nband_k_cprj,nbandc1,nprocband,nsp2
 integer :: option,spaceComm,use_nondiag_occup_dmft
 logical :: locc_test,usetimerev
 real(dp) :: occup,wtk_k
 character(len=500) :: message
!arrays
 integer,allocatable :: dimlmn(:),idum(:)
 real(dp) :: tsec(2)
 real(dp),allocatable :: buffer1(:),buffer2(:)
 character(len=8),parameter :: dspin(6)=(/"up      ","down    ","dens (n)","magn (x)","magn (y)","magn (z)"/)
 type(cprj_type),allocatable :: cwaveprj(:,:),cwaveprjb(:,:)

!************************************************************************

 DBG_ENTER("COLL")

!Init MPI data
 call xcomm_init(mpi_enreg,spaceComm,spaceComm_bandfft=mpi_enreg%comm_kpt)
 call xme_init(mpi_enreg,me)

!Check if cprj is distributed over bands
 nprocband=(mband/mband_cprj)
 if (paral_kgb==1.and.nprocband/=mpi_enreg%nproc_band) then
   MSG_BUG('  mband/mband_cprj must be equal to mband !')
 end if
 if (paw_dmft%use_dmft/=0.and.nprocband/=1) then
   MSG_BUG('  parallelization over bands not compatible with DMFT !')
 end if

!Initialise and check dmft variables
 if(paw_dmft%use_dmft/=0) then
   nbandc1=(paw_dmft%mbandc-1)*paw_dmft%use_dmft+1
 else
   nbandc1=1
 end if

!Initialize temporary file (if used)
 iorder_cprj=0
 call cprj_diskinit_r(atindx1,natom,iorder_cprj,mkmem,natom,0,dimcprj,nspinor,unpaw)

!Allocate temporary cwaveprj storage
 allocate(cwaveprj(natom,nspinor))
 call cprj_alloc(cwaveprj,0,dimcprj)
 if(paw_dmft%use_dmft/=0) then
   allocate(cwaveprjb(natom,nspinor))
   call cprj_alloc(cwaveprjb,0,dimcprj)
 end if

!Initialize output quantities
 do iatom=1,natom
   pawrhoij(iatom)%rhoij_=zero
 end do

!LOOP OVER SPINS
 option=1
 usetimerev=(kptopt>0.and.kptopt<3)
 bdtot_index=0;ibg=0
 do isppol=1,nsppol

!  LOOP OVER k POINTS
   do ikpt=1,nkpt

     nband_k=nband(ikpt+(isppol-1)*nkpt)
     nband_k_cprj=nband_k/nprocband
     wtk_k=wtk(ikpt)

     if(mpi_enreg%paral_compil_kpt==1)then
       if(minval(abs(mpi_enreg%proc_distrb(ikpt,1:nband_k,isppol)-me))/=0) then
         bdtot_index=bdtot_index+nband_k
         cycle
       end if
     end if

     cplex=2;if (istwfk(ikpt)>1) cplex=1

!    LOOP OVER BANDS
     ib_this_proc=0
     do ib=1,nband_k
       iband=bdtot_index+ib

!      Parallelization: treat only some bands
       if(mpi_enreg%paral_compil_kpt==1)then
         if (paral_kgb==1) then
           if (mod((ib-1)/mpi_enreg%bandpp,mpi_enreg%nproc_band)/=mpi_enreg%me_band) cycle
         else
           if (mpi_enreg%proc_distrb(ikpt,ib,isppol)/=me) cycle
         end if
       end if
       ib_this_proc=ib_this_proc+1

!      DMFT: LOOP ON ADDITIONAL BANDS
       do ibc1=1,nbandc1
!        check if dmft and occupations
!        write(6,*) 'ib,ibc1          ',ib,ibc1

!        DMFT stuff: extract cprj and occupations for additional band
         if(paw_dmft%use_dmft /= 0) then
           ib1 = paw_dmft%include_bands(ibc1)
!          write(6,*) 'use_dmft=1 ib,ib1',ib,ib1
           iband1 = bdtot_index+ib1
!          write(6,*) 'ib, ib1          ',paw_dmft%band_in(ib),paw_dmft%band_in(ib1)
           if(paw_dmft%band_in(ib)) then
             if(.not.paw_dmft%band_in(ib1))  stop
             use_nondiag_occup_dmft = 1
             occup = paw_dmft%occnd(ib,ib1,ikpt,isppol)
             locc_test = abs(paw_dmft%occnd(ib,ib1,ikpt,isppol))>tol8
!            write(6,*) 'use_dmft=1,band_in(ib)=1, ib,ibc1',ib,ib1,locc_test
             if (locc_test .or. mkmem == 0) then
               call cprj_get(atindx1,cwaveprjb,cprj,natom,ib1,ibg,ikpt,iorder_cprj,isppol,&
&               mband_cprj,mkmem,mpi_enreg,natom,1,nband_k_cprj,nspinor,nsppol,unpaw)
             end if
           else
             use_nondiag_occup_dmft = 0
             locc_test = (abs(occ(iband))>tol8)
             occup = occ(iband)
             if(ibc1 /= 1 .and. .not.(paw_dmft%band_in(ib))) cycle
           end if
         else  ! nbandc1=1
           use_nondiag_occup_dmft=0
           locc_test = (abs(occ(iband))>tol8)
           occup = occ(iband)
         end if

!        Extract cprj for current band
!        Must read cprj when mkmem=0 (even if unused) to have right pointer inside _PAW file
         if (locc_test.or.mkmem==0) then
           call cprj_get(atindx1,cwaveprj,cprj,natom,ib_this_proc,ibg,ikpt,iorder_cprj,isppol,&
&           mband_cprj,mkmem,mpi_enreg,natom,1,mband_cprj,nspinor,nsppol,unpaw)
         end if

!        Accumulate contribution from (occupied) current band
         if (locc_test) then
           if(use_nondiag_occup_dmft == 1) then
             call pawaccrhoij(atindx1,cplex,cwaveprj,cwaveprjb,0,isppol,natom,&
&             nspinor,occup,option,pawrhoij,usetimerev,wtk_k)
           else
             call pawaccrhoij(atindx1,cplex,cwaveprj,cwaveprj ,0,isppol,natom,&
&             nspinor,occup,option,pawrhoij,usetimerev,wtk_k)
           end if
         end if

       end do ! ib1c
     end do ! ib

     bdtot_index=bdtot_index+nband_k
     if (mkmem/=0) ibg=ibg+nspinor*nband_k_cprj

   end do ! ikpt
 end do ! isppol

!deallocate temporary cwaveprj storage
 call cprj_free(cwaveprj)
 deallocate(cwaveprj)
 if(paw_dmft%use_dmft/=0) then
   call cprj_free(cwaveprjb)
   deallocate(cwaveprjb)
 end if

!MPI: need to exchange arrays between procs
!==========================================
 if(mpi_enreg%paral_compil_kpt==1)then
   call leave_test(mpi_enreg)

!  Exchange pawrhoij%rhoij_
   call timab(48,1,tsec)
   allocate(dimlmn(natom))
   dimlmn(1:natom)=pawrhoij(1:natom)%cplex*pawrhoij(1:natom)%lmn2_size
   nsp2=pawrhoij(1)%nsppol;if (pawrhoij(1)%nspden==4) nsp2=4
   bufdim=sum(dimlmn)*nsp2
   allocate(buffer1(bufdim),buffer2(bufdim))
   jdim=0
   do iatom=1,natom
     do isppol=1,nsp2
       buffer1(jdim+1:jdim+dimlmn(iatom))=pawrhoij(iatom)%rhoij_(:,isppol)
       jdim=jdim+dimlmn(iatom)
     end do
   end do
   call xsum_mpi(buffer1,buffer2,bufdim,spaceComm,ierr) !Build sum of everything
   if (paral_kgb==1.and.nprocband>1) then
     call xsum_mpi(buffer2,mpi_enreg%comm_band,ierr) !Build sum over band processors
   end if
   jdim=0
   do iatom=1,natom
     do isppol=1,nsp2
       pawrhoij(iatom)%rhoij_(:,isppol)=buffer2(jdim+1:jdim+dimlmn(iatom))
       jdim=jdim+dimlmn(iatom)
     end do
   end do
   deallocate(buffer1,buffer2,dimlmn)
   call timab(48,2,tsec)
 end if ! mpi_enreg%paral_compil_kpt==1

!Print info
 if (abs(pawprtvol)>=1) then
   natinc=1;if(natom>1.and.pawprtvol>=0) natinc=natom-1
   do iatom=1,natom,natinc
     nsp2=pawrhoij(iatom)%nsppol;if (pawrhoij(iatom)%nspden==4) nsp2=4
     write(message, '(4a,i3,a)') ch10," PAW TEST:",ch10,&
&     ' ====== Values of RHOIJ in pawmkrhoij (iatom=',iatom,') ======'
     if (pawrhoij(iatom)%nspden==2.and.pawrhoij(iatom)%nsppol==1) write(message,'(3a)') trim(message),ch10,&
&     '      (antiferromagnetism case: only one spin component)'
     call wrtout(std_out,message,'COLL')
     do isppol=1,nsp2
       if (pawrhoij(iatom)%nspden/=1) then
         write(message, '(3a)') '   Component ',trim(dspin(isppol+2*(pawrhoij(iatom)%nspden/4))),':'
         call wrtout(std_out,message,'COLL')
       end if
       option=2;if (pawrhoij(iatom)%cplex==2.and.pawrhoij(iatom)%nspinor==1) option=1
       call print_ij(pawrhoij(iatom)%rhoij_(:,isppol),pawrhoij(iatom)%lmn2_size,&
&       pawrhoij(iatom)%cplex,pawrhoij(iatom)%lmn_size,1,-1,idum,0,pawprtvol,idum,&
&       -1._dp,1,opt_sym=option)
     end do
   end do
 end if

 DBG_EXIT("COLL")

end subroutine pawmkrhoij
!!***
