!{\src2tex{textfont=tt}}
!!****f* ABINIT/haydock_diago
!! NAME
!! haydock_diago
!!
!! FUNCTION
!!  Calculate the imaginary part of the macroscopic dielectric function via the Haydock recursive method.
!!
!! COPYRIGHT
!! Copyright (C) 2009-2010 ABINIT group (L.Reining, V.Olevano, F.Sottile, S.Albrecht, G.Onida, M.Giantomassi)
!! This file is distributed under the terms of the
!! GNU General Public License, see ~abinit/COPYING
!! or http://www.gnu.org/copyleft/gpl.txt .
!! For the initials of contributors, see ~abinit/doc/developers/contributors.txt .
!!
!! INPUTS
!! nsppol=Number of independent spin polarizations.
!! nspinor=Number of spinorial components.
!! usepaw=1 for PAW, 0 otherwise.
!! inclvkb=If different from zero, the commutator [Vnl,r] is included in the calculation of 
!!   the matrix element of the velocity operator. Meaningless for PAW.
!! BSp<type(excparam)=The parameter for the Bethe-Salpeter run.
!! fnameabo_exc_mdf=File name for _EXC_MDF ouput file
!! Kmesh<type(bz_mesh_type)>=The list of k-points in the BZ, IBZ and symmetry tables.
!! Cryst<type(crystal_structure)>=Info on the crystalline structure.
!! transtab(BSp%nkbz,BSp%nbndv,BSp%nbndc)=Correspondence btw the tupla (k,v,c) and the transition index
!! energy(BSp%nbnds,BSp%nkibz,nsppol)=KS energies.
!! Psps <type(pseudopotential_type)>=variables related to pseudopotentials.
!! Pawtab(Cryst%ntypat*usepaw)<pawtab_type>=PAW tabulated starting data.
!! Hur(Cryst%natom*usepaw)<type(HUr_commutator)>=Only for PAW and LDA+U, quantities used to evaluate the commutator [H_u,r].
!! Wfs<wfs_descriptor>=Handler for the wavefunctions.
!! spaceComm=MPI communicator.
!!
!! OUTPUT
!!  The imaginary part of the macroscopic dielectric function is written on the external file _EXC_MDF
!!
!! PARENTS
!!
!! CHILDREN
!!
!! SOURCE

#if defined HAVE_CONFIG_H
#include "config.h"
#endif

#include "abi_common.h"

subroutine haydock_diago(BSp,nspinor,nsppol,usepaw,fnameabo_exc_mdf,&
&  Cryst,Kmesh,energy,inclvkb,Wfs,Psps,Pawtab,Hur,transtab,spaceComm)

 use defs_basis
 use defs_datatypes
 use defs_abitypes,      only : datafiles_type
 use m_bs_defs
 use m_xmpi
 use m_errors

 use m_io_tools,          only : get_unit
 use m_blas,              only : xdotc
 use m_crystal,           only : crystal_structure 
 use m_bz_mesh,           only : bz_mesh_type
 use m_commutator_vkbr,   only : kb_potential
 use m_paw_commutator,    only : HUr_commutator
 use m_wfs,               only : wfs_descriptor

!This section has been created automatically by the script Abilint (TD).
!Do not modify the following lines by hand.
 use interfaces_69_bse, except_this_one => haydock_diago
!End of the abilint section

 implicit none

!Arguments ------------------------------------
!scalars
 integer,intent(in) :: nsppol,nspinor,usepaw,inclvkb,spaceComm
 type(excparam),intent(in) :: BSp
 type(bz_mesh_type),intent(in) :: Kmesh
 type(crystal_structure),intent(in) :: Cryst
 type(wfs_descriptor),intent(inout) :: Wfs
 type(pseudopotential_type),intent(in) :: Psps
 character(len=fnlen),intent(in) :: fnameabo_exc_mdf
!arrays
 integer,intent(in) :: transtab(BSp%nkbz,BSp%nbndv,BSp%nbndc)
 real(dp),intent(in) :: energy(BSp%nbnds,BSp%nkibz,nsppol)
 type(pawtab_type),intent(in) :: Pawtab(Cryst%ntypat*usepaw)
 type(HUr_commutator),intent(in) :: Hur(Cryst%natom*usepaw)

!Local variables ------------------------------
!scalars
 integer ::  iv,ic,io,ik,iq,mdf_unt,ios
 integer :: nprocs,my_rank,master
 real(dp) :: FAQ,tpula,omegaev,ave,acell1
 character(len=500) :: msg
!arrays
 real(dp),allocatable :: spectrum(:,:)
 complex(dpc),allocatable :: fcvk(:,:,:,:,:),omega(:),rhotilde(:,:)

!************************************************************************

 ABI_CHECK(nsppol==1,"nsppol==2 not coded")

 nprocs  = xcomm_size(spaceComm)
 my_rank = xcomm_rank(spaceComm)
 master=0

 FAQ = 2*4*pi/(Cryst%ucvol*BSp%nkbz)

 acell1 = sqrt(2.0) * sqrt(Cryst%rprimd(1,1)**2+Cryst%rprimd(2,1)**2+Cryst%rprimd(3,1)**2)
 tpula = 2 * pi / acell1

 allocate(omega(BSp%nomega))
 do io=1,BSp%nomega
   omega(io) = (BSp%omegai + (io-1)*BSp%domega) + (0,1)*BSp%broad
 end do

 allocate(fcvk(BSp%nbnds,BSp%nbnds,BSp%nkbz,nsppol,BSp%nq))

 call cfcvk(BSp%nbnds,BSp%nkibz,BSp%nkbz,Kmesh,nspinor,nsppol,energy,BSp,&
&  inclvkb,usepaw,Cryst,Wfs,Psps,Pawtab,Hur,fcvk,spaceComm)

 allocate(rhotilde(BSp%nh,BSp%nq))

 ! Fill rhotilde using same indeces ordering as the excitonic Hamiltonian.
 do iq=1,BSp%nq
   do ik=1,BSp%nkbz
     do iv=BSp%lomo,BSp%homo
       do ic=BSp%lumo,BSp%nbnds
        rhotilde(transtab(ik,iv-BSp%lomo+1,ic-BSp%lumo+1), iq)=fcvk(ic,iv,ik,1,iq)
       end do
     end do
   end do
 end do

 deallocate(fcvk)

 allocate(spectrum(BSp%nomega,BSp%nq))

 if (BSp%nq==1) then
   call haydock(BSp,rhotilde(:,1),omega,FAQ,spectrum(:,1),spaceComm)
 else
   do iq=4,BSp%nq
     call haydock(BSp,rhotilde(:,iq),omega,FAQ,spectrum(:,iq),spaceComm)
   end do        
 end if ! TODO add possibility of restarting the calculation.

 deallocate(rhotilde)

 if (my_rank==master) then ! Master writes the final results.
   mdf_unt = get_unit()

   open(unit=mdf_unt,file=fnameabo_exc_mdf,status='unknown',form='formatted',iostat=ios)
   msg = " Opening file: "//TRIM(fnameabo_exc_mdf)
   ABI_CHECK(ios==0,msg)

   ! Writing to ab_out for automatic testing.
   ! This part will be removed when fldiff will be able to compare two mdf files.

   if (BSp%nq==1) then
     write(mdf_unt,*)'# omega   eps_2'
     write(ab_out,*)'# omega   eps_2'
     do io=1,BSp%nomega
       omegaev = REAL(omega(io))*Ha_eV
       write(mdf_unt,'(f7.3,e12.4)') omegaev, spectrum(io,1)
       write(ab_out,'(f7.3,f9.4)')   omegaev, spectrum(io,1)
     end do
   else
     write(mdf_unt,*)'# omega   eps_2    x    y    z'
     write(ab_out,* )'# omega   eps_2    x    y    z'
     do io=1,BSp%nomega
       omegaev = REAL(omega(io))*Ha_eV
       ave = (spectrum(io,4) +  spectrum(io,5) +  spectrum(io,6))/3.
       write(mdf_unt,'(f7.3,4e12.4)') omegaev, ave, spectrum(io,4),spectrum(io,5), spectrum(io,6)
       write(ab_out, '(f7.3,4f9.4)' ) omegaev, ave, spectrum(io,4),spectrum(io,5), spectrum(io,6)
     end do
   end if

   close(mdf_unt)
 end if !my_rank==master

 deallocate(omega,spectrum)

end subroutine haydock_diago
!!***

!----------------------------------------------------------------------

!!****f* ABINIT/haydock
!! NAME
!! haydock
!!
!! FUNCTION
!!  Reads the excitonic Hamiltonian from file and construct the Lanczos set of vectors 
!!  by iterative matrix-vector multiplications.
!!
!! COPYRIGHT
!! Copyright (C) 2009-2010 ABINIT group (L.Reining, V.Olevano, F.Sottile, S.Albrecht, G.Onida, M.Giantomassi)
!! This file is distributed under the terms of the
!! GNU General Public License, see ~abinit/COPYING
!! or http://www.gnu.org/copyleft/gpl.txt .
!! For the initials of contributors, see ~abinit/doc/developers/contributors.txt .
!!
!! INPUTS
!!  FAQ=Multiplicative factor (k-point sampling and unit cell volume)
!!  BSp<type(excparam)>=Parameters defining the Bethe-Salpeter calculation.
!!  rhotilde(BSp%nh)=Initial vector for the Haydock recursive scheme.
!!  omega(BSp%nomega)=Frequency mesh for the macroscopic dielectric function (broadening is already included).
!!  spaceComm=MPI communicator.
!!
!! OUTPUT
!!  spectrum(BSp%nomega)=The imaginary part of the macroscopic dielectric function.
!!
!! PARENTS
!!
!! CHILDREN
!!
!! SOURCE

#if defined HAVE_CONFIG_H
#include "config.h"
#endif

#include "abi_common.h"

subroutine haydock(BSp,rhotilde,omega,FAQ,spectrum,spaceComm)

 use defs_basis
 use m_bs_defs
 use m_xmpi
 use m_errors

 use m_io_tools,       only : get_unit, existent_file, delete_file
 use m_numeric_tools,  only : continued_fract
 use m_blas,           only : xdotc

!This section has been created automatically by the script Abilint (TD).
!Do not modify the following lines by hand.
 use interfaces_14_hidewrite
 use interfaces_51_manage_mpi
 use interfaces_linalg
!End of the abilint section

 implicit none

!Arguments ------------------------------------
!scalars
 integer,intent(in) :: spaceComm
 real(dp),intent(in) :: FAQ
 type(excparam),intent(in) :: BSp
!arrays
 real(dp),intent(out) :: spectrum(BSp%nomega)
 complex(dpc),intent(in) :: rhotilde(BSp%nh),omega(BSp%nomega)

!Local variables ------------------------------
!scalars
 integer :: inn,it,unt,converged,ios,nprocs,my_rank,master,ierr,istat 
 integer :: my_t1,my_t2,my_nt,nh_file,niter_file,niter_tot,niter_done
 real(dp) :: max_spectra,norm
 complex(dpc) :: factor
 character(len=500) :: msg
 character(len=fnlen) :: excfile,restart_file
!arrays
 real(dp),allocatable :: old_spectrum(:),bb(:)
 complex(dpc),allocatable :: aa(:),hreso(:,:),phi_nm1(:),phi_n(:),phi_np1(:),hphi_n(:)
 complex(dpc),allocatable :: aa_file(:),bb_file(:),phi_n_file(:),phi_nm1_file(:)
 complex(dpc),allocatable :: buffer_dpc(:)

!************************************************************************

 nprocs  = xcomm_size(spaceComm)
 my_rank = xcomm_rank(spaceComm)
 master=0

 write(msg,'(a,i0)')' Haydock algorithm. Number of iteration:',BSp%niter
 call wrtout(std_out,msg,"COLL")

 ! Check for presence of the restart file.
 niter_file=0; restart_file = "__SAVE_HAYDOCK__"
 if (.FALSE..and.existent_file(restart_file) ) then
   msg = " Restarting Haydock calculation from file "//TRIM(restart_file)
   call wrtout(std_out,msg,"COLL")
   call wrtout(ab_out,msg,"COLL")
   unt = get_unit()
   open(unit=unt,file=restart_file,form="formatted")

   read(unt,*)nh_file
   msg = "Rank of Hamiltonian on disk should be equal to the rank of the present Hamiltonian"
   ABI_CHECK(nh_file==BSp%nh,msg)

   read(unt,*)niter_file
   write(msg,'(a,i5)')" Number of iterations already performed: ",niter_file
   call wrtout(std_out,msg,"COLL")
   call wrtout(ab_out,msg,"COLL")

   allocate(aa_file(niter_file),bb_file(niter_file))
   do inn=1,niter_file
     read(unt,*)it,aa_file(inn),bb_file(inn)
     if (inn/=it) then 
       write(msg,'(2(a,i0))')"Found it_file: ",it," while it should be: ",inn
       MSG_ERROR(msg)
     end if
   end do
   allocate(phi_nm1_file(nh_file),phi_n_file(nh_file))
   read(unt,*)phi_nm1_file
   read(unt,*)phi_n_file
   close(unt)
 end if

 ! TODO check symmetries of calculated Hamiltonian as a_n should be real.

 ! For n>1 we have
 ! 1) a_n = <n|H|n>
 ! 2) b_n = || H|n> - a_n|n> -b_{n-1}|n-1> ||
 ! 3) |n+1> = [H|n> -a_n|n> -b_{n-1}|n-1>]/b_n
 !
 ! The sequences starts with |1> normalized to 1 and b_0 =0, therefore:
 ! a_1 = <1|H|1>
 ! b_1 = || H|1> - a_1|1> ||
 ! |2> = [H|1> - a_1|1>]/b_1
 !
 ! Divide the columns of the Hamiltoninan among the nodes.
 call split_work(BSp%nh,spaceComm,my_t1,my_t2,verbose=1)
 my_nt = my_t2-my_t1+1
 ABI_CHECK(my_nt>0,"found processor with 0 rows")

 !allocate(hreso(BSp%nh,BSp%nh), STAT=istat)
 allocate(hreso(BSp%nh,my_nt), STAT=istat)
 ABI_CHECK(istat==0,"out of memory in hreso")

 excfile = 'out_haydock.exh'
 if (existent_file('in_haydock.exh')) excfile='in_haydock.exh'

 unt = get_unit()
 open(unt,file=excfile,form='unformatted',iostat=ios)
 ABI_CHECK(ios==0," Opening file: "//TRIM(excfile))

 if (nprocs==1) then
   do it=1,BSp%nh
     read(unt)hreso(it,:)
   end do
 else 
   allocate(buffer_dpc(BSp%nh))
   do it=1,BSp%nh
     read(unt)buffer_dpc(:)
     hreso(it,:) = buffer_dpc(my_t1:my_t2)
   end do
   deallocate(buffer_dpc)
 end if

 close(unt)

 allocate(hphi_n(Bsp%nh))
 allocate(phi_nm1(my_nt),phi_n(my_nt),phi_np1(my_nt))

 niter_tot = niter_file + Bsp%niter
 allocate(aa(niter_tot),bb(niter_tot))

 if (niter_file==0) then ! Calculation from scratch.
   phi_nm1(:)=rhotilde(my_t1:my_t2)  ! Select the slice treated by this node.
   norm = DZNRM2(BSp%nh,rhotilde,1)  
   phi_nm1(:)=phi_nm1(:)/norm        ! Normalization
                                                                              
   !hphi_n = MATMUL(hreso,phi_nm1)
   call ZGEMV('N',BSp%nh,my_nt,cone,hreso,BSp%nh,phi_nm1,1,czero,hphi_n,1)
   call xsum_mpi(hphi_n,spaceComm,ierr)

   aa(1)=xdotc(my_nt,phi_nm1,1,hphi_n(my_t1:),1)
   call xsum_mpi(aa(1),spaceComm,ierr)

   phi_n = hphi_n(my_t1:my_t2) - aa(1)*phi_nm1

   !bb(1) = DZNRM2(my_nt,phi_n,1)
   bb(1) = xdotc(my_nt,phi_n,1,phi_n,1)
   call xsum_mpi(bb(1),spaceComm,ierr)
   bb(1) = SQRT(bb(1))

   phi_n = phi_n/bb(1)
   niter_done=1

 else ! Use previously calculate a"s and b"s.
   niter_done=niter_file
   aa(1:niter_file) = aa_file
   bb(1:niter_file) = bb_file
   phi_nm1(:)=phi_nm1_file(my_t1:my_t2)   ! Select the slice treated by this node.
   phi_n  (:)=phi_n_file  (my_t1:my_t2)   
   deallocate(aa_file,bb_file)
   deallocate(phi_nm1_file,phi_n_file)
 end if

 allocate(old_spectrum(BSp%nomega)); old_spectrum=zero 
 factor = -FAQ*(DZNRM2(BSp%nh,rhotilde,1)**2); converged=0

 do inn=niter_done+1,niter_tot
   !
   !open(unt,file=excfile,form='unformatted',iostat=ios)
   !do it=1,BSp%nh
   ! read(unt)(hreso(it,itp),itp=1,BSp%nh)
   ! hphi_n(it)=xdotu(BSp%nh,hreso(it,:),1,phi_n,1)
   !end do
   !close(unt)
   write(msg,'(a)')' Iter b re_a im_a '
   call wrtout(std_out,msg,"COLL")

   !hphi_n = MATMUL(hreso,phi_n)
   call ZGEMV('N',BSp%nh,my_nt,cone,hreso,BSp%nh,phi_n,1,czero,hphi_n,1)
   call xsum_mpi(hphi_n,spaceComm,ierr)

   aa(inn) = xdotc(my_nt,phi_n,1,hphi_n(my_t1:),1)
   call xsum_mpi(aa(inn),spaceComm,ierr)

   ! |n+1> = H|n> - A(n)|n> - B(n-1)|n-1>
   phi_np1 = hphi_n(my_t1:my_t2) - aa(inn)*phi_n - bb(inn-1)*phi_nm1

   !call zaxpy(BS_K_dim,-Af(it)*(1._SP,0._SP),Vn,1,Vnp1,1)
   !call zaxpy(BS_K_dim,-Bf(it)*(1._SP,0._SP),Vnm1,1,Vnp1,1)

   !bb(inn) = DZNRM2(my_nt,phi_np1,1)
   !bb(inn) = DOT_PRODUCT(phi_np1,phi_np1)
   bb(inn) = xdotc(my_nt,phi_np1,1,phi_np1,1)
   call xsum_mpi(bb(inn),spaceComm,ierr)
   bb(inn) = SQRT(bb(inn))

   phi_np1 = phi_np1/bb(inn)
   
   phi_nm1 = phi_n
   phi_n   = phi_np1

   call continued_fract(inn,factor,aa,bb,Bsp%nomega,omega,spectrum)
   max_spectra = MAXVAL(spectrum)

   if (inn>MIN(niter_tot,niter_done+4)) then ! Avoid spurious convergence.
     if ( ALL( ABS(spectrum - old_spectrum) < ABS(spectrum)*BSp%haydock_tol ) ) then 
       converged = converged+1
     else 
       converged = 0
     end if
     if (converged==2) then 
       write(msg,'(a,es10.2,a,i0,a)')&
&        " >>> Haydock algorithm converged twice within haydock_tol= ",Bsp%haydock_tol," after ",inn," iterations." 
       call wrtout(std_out,msg,'COLL')
       call wrtout(ab_out,msg,'COLL')
       EXIT
     end if
   end if

   old_spectrum = spectrum

   write(msg,'(1x,i3,3es12.4)')inn,bb(inn),REAL(aa(inn)),AIMAG(aa(inn)) !max_spectra,
   call wrtout(std_out,msg,"COLL")
 end do ! it

 if (converged/=2) then
   write(msg,'(a,es10.2,a,i0,a)')&
&    " WARNING: Haydock algorithm did not converge within ",Bsp%haydock_tol," after ",Bsp%niter,"iterations."
   call wrtout(std_out,msg,'COLL')
   call wrtout(ab_out,msg,'COLL')
 end if

 if (.TRUE.) then ! Save the a"s and the b"s for possible restarting.
   ! 1) Rank of Hamiltonian.
   ! 2) Number of iterations performed.
   ! 3) FIXME: Here I should reporte info on the Q.
   ! 4) do iter=1,niter_performed 
   !     aa(iter),bb(iter)
   !    end do
   ! 5) |n-1>
   !    |n>
   !
   if (my_rank==master) then ! Open the file and writes basic dimensions and infos.
     !call delete_file(restart_file,ierr)
     unt = get_unit()
     open(unit=unt,file=restart_file,form="formatted")
     write(unt,*)Bsp%nh
     !write(unt,*)rhotilde(:)/DZNRM2(BSp%nh,phi_nm1,1) ! Normalization
     write(unt,*)MIN(inn,niter_tot)     ! NB if the previous loop completed inn=niter_tot+1
     do it=1,MIN(inn,niter_tot)         !    if exited then inn is not incremented by one.
       write(unt,*)it,aa(it),bb(it)
     end do
   end if

   ! hphi_n is used as workspace to gather |n> and |n+1>
   hphi_n = czero
   hphi_n(my_t1:my_t2) = phi_nm1
   call xsum_master(hphi_n,master,spaceComm,ierr)
   if (my_rank==master) write(unt,*)hphi_n ! |n-1>

   hphi_n = czero
   hphi_n(my_t1:my_t2) = phi_n
   call xsum_master(hphi_n,master,spaceComm,ierr)
   if (my_rank==master) then 
     write(unt,*)hphi_n ! |n>
     close(unt)
   end if
 end if

 deallocate(old_spectrum)
 deallocate(hphi_n,phi_nm1,phi_n,phi_np1)
 deallocate(aa,bb,hreso)

 call xbarrier_mpi(spaceComm)

end subroutine haydock
!!***

!----------------------------------------------------------------------
