!{\src2tex{textfont=tt}}
!!****f* ABINIT/scalapack
!! NAME
!!  scalapack
!!
!! FUNCTION
!!  This module contains functions and subroutine using ScaLAPACK library.
!!  The code has to be compiled with --enable-scalapack.
!!
!! COPYRIGHT
!!  Copyright (C) 2001-2010 ABINIT group (CS,GZ,FB,MG)
!!  This file is distributed under the terms of the
!!  GNU General Public License, see ~ABINIT/COPYING
!!  or http://www.gnu.org/copyleft/gpl.txt .
!!
!! SOURCE

#if defined HAVE_CONFIG_H
#include "config.h"
#endif

!#define DEBUG_MODE

#include "abi_common.h"

#if defined HAVE_LINALG_MPI

!-------------------------------------------------------
! set up of a processor grid for ScaLAPACK
! as a function of the total number of processors attributed to the grid
!-------------------------------------------------------

!!***

!!****f* ABINIT/build_grid_scalapack
!! NAME
!!  build_grid_scalapack
!!
!! FUNCTION
!!  Builds a processor grid for ScaLAPACK.
!!
!! INPUTS
!!  nbprocs= total number of processors
!!  communicator= MPI communicator
!!
!! OUTPUT
!!  grid= array representing the grid of processors
!!
!! SIDE EFFECTS
!!  None
!!
!! PARENTS
!!      scalapack
!!
!! CHILDREN
!!
!! SOURCE

subroutine build_grid_scalapack(grid,nbprocs, communicator)

  use defs_scalapack
  use defs_basis
  use m_errors

  implicit none

!Arguments ------------------------------------
  TYPE(grid_scalapack),INTENT(out)     :: grid
  INTEGER,INTENT(in)                     :: nbprocs
  INTEGER, INTENT(in)                    :: communicator

!Local variables-------------------------------
  INTEGER  :: i

! *********************************************************************

 DBG_ENTER("COLL")

 grid%nbprocs=nbprocs

!Search for a rectangular grid of processors 
 i=INT(SQRT(float(nbprocs)))
 do while (MOD(nbprocs,i) /= 0)
   i = i-1
 end do 

 grid%dims(1) = i
 grid%dims(2) = INT(nbprocs/i)

 grid%ictxt = communicator

 call BLACS_GRIDINIT(grid%ictxt,'R',grid%dims(1),grid%dims(2))           

 DBG_EXIT("COLL")

end subroutine build_grid_scalapack
!!***

!----------------------------------------------------------------------

!!****f* ABINIT/build_processor_scalapack
!! NAME
!!  build_processor_scalapack
!!
!! FUNCTION
!!  Builds a processor descriptor for ScaLAPACK.
!!  Build of the data related to one processor in a grid
!!
!! INPUTS
!!  grid= array representing the grid of processors
!!  myproc= selected processor
!!  comm= MPI communicator
!!
!! OUTPUT
!!  processor= descriptor of a processor
!!
!! SIDE EFFECTS
!!  None
!!
!! PARENTS
!!      scalapack
!!
!! CHILDREN
!!
!! SOURCE

subroutine build_processor_scalapack(processor,grid,myproc, comm)

  use defs_scalapack
  use defs_basis
  use m_errors

  implicit none

!Arguments ------------------------------------
  TYPE(processor_scalapack),INTENT(out)  :: processor
  TYPE(grid_scalapack),INTENT(in)       :: grid
  INTEGER,INTENT(in)                      :: myproc
  INTEGER,INTENT(in)                      :: comm

!Local variables-------------------------------

! *********************************************************************

 DBG_ENTER("COLL")

 processor%grid= grid

 processor%myproc = myproc

 processor%comm = comm

 call BLACS_GRIDINFO(grid%ictxt,processor%grid%dims(1), &
& processor%grid%dims(2),processor%coords(1), &
& processor%coords(2))

!These values are the same as those computed by BLACS_GRIDINFO
!except in the case where the mmyproc argument is not the
!local proc
 processor%coords(1) = INT((myproc) / grid%dims(2))
 processor%coords(2) = MOD((myproc), grid%dims(2))

 DBG_EXIT("COLL")

end subroutine build_processor_scalapack
!!***

!----------------------------------------------------------------------

!!****f* ABINIT/init_scalapack
!! NAME
!!  init_scalapack
!!
!! FUNCTION
!!  Initializes an instance of processor ScaLAPACK from an MPI communicator.
!!
!! INPUTS
!!  communicator= MPI communicator
!!
!! OUTPUT
!!  processor= descriptor of a processor
!!
!! SIDE EFFECTS
!!  None
!!
!! PARENTS
!!      exceig,m_abilasi,subdiago
!!
!! CHILDREN
!!
!! SOURCE

subroutine init_scalapack(processor,communicator)

  use defs_scalapack
  use defs_basis
  use m_errors
#if defined HAVE_MPI && defined HAVE_MPI2
 use mpi
#endif

!This section has been created automatically by the script Abilint (TD).
!Do not modify the following lines by hand.
 use interfaces_51_manage_mpi, except_this_one => init_scalapack
!End of the abilint section

 implicit none

#if defined HAVE_MPI && defined HAVE_MPI1
 include 'mpif.h'
#endif

!Arguments ------------------------------------
  TYPE(processor_scalapack),INTENT(out)    :: processor
  INTEGER, INTENT(in)                       :: communicator

!Local variables-------------------------------
  TYPE(grid_scalapack)                    :: grid
  INTEGER                                   :: nbproc,myproc
  INTEGER                                   :: ierr

! *********************************************************************

 DBG_ENTER("COLL")

 call MPI_COMM_SIZE(communicator, nbproc, ierr)
 call MPI_COMM_RANK(communicator, myproc, ierr)

 call build_grid_scalapack(grid, nbproc, communicator)

 call build_processor_scalapack(processor, grid, myproc, communicator)

 DBG_EXIT("COLL")

end subroutine init_scalapack
!!***

!----------------------------------------------------------------------

!!****f* ABINIT/end_scalapack
!! NAME
!!  end_scalapack
!!
!! FUNCTION
!!  Removes a processor from the ScaLAPACK grid.
!!
!! INPUTS
!!  None
!!
!! OUTPUT
!!  None
!!
!! SIDE EFFECTS
!!  processor= descriptor of a processor
!!
!! PARENTS
!!      exceig,m_abilasi,subdiago
!!
!! CHILDREN
!!
!! SOURCE

subroutine end_scalapack(processor)

  use defs_scalapack
  use defs_basis

  implicit none

!Arguments ------------------------------------
  TYPE(processor_scalapack),INTENT(inout)    :: processor

!Local variables-------------------------------

! *********************************************************************

 call BLACS_GRIDEXIT(processor%grid%ictxt)

!call BLACS_EXIT(0)

end subroutine end_scalapack
!!***

!-------------------------------------------------------
! Initialisation of a SCALAPACK matrix (each proc initialize its own part of the matrix)
!-------------------------------------------------------

!!****f* ABINIT/init_matrix_scalapack
!! NAME
!!  init_matrix_scalapack
!!
!! FUNCTION
!!  Initializes a matrix descriptor for ScaLAPACK.
!!
!! INPUTS
!!  processor= descriptor of a processor
!!  nbli_global= total number of lines
!!  nbco_global= total number of columns
!!  istwf_k= option parameter that describes the storage of wfs
!!  tbloc= custom block size
!!
!! OUTPUT
!!  matrix= the matrix to process
!!
!! SIDE EFFECTS
!!  None
!!
!! PARENTS
!!      exceig,m_abilasi,subdiago
!!
!! CHILDREN
!!
!! SOURCE

subroutine init_matrix_scalapack(matrix,nbli_global,nbco_global,processor,istwf_k,tbloc)

 use defs_scalapack
 use defs_basis
 use m_errors

!This section has been created automatically by the script Abilint (TD).
!Do not modify the following lines by hand.
 use interfaces_16_hideleave
 use interfaces_51_manage_mpi, except_this_one => init_matrix_scalapack
!End of the abilint section

  implicit none

!Arguments ------------------------------------
  TYPE(matrix_scalapack),INTENT(out)           :: matrix
  TYPE(processor_scalapack),INTENT(in),TARGET  :: processor
  INTEGER,INTENT(in)                           :: nbli_global,nbco_global
  INTEGER,INTENT(in)                           :: istwf_k
  INTEGER,INTENT(in),OPTIONAL                  :: tbloc

!Local variables-------------------------------
  INTEGER, PARAMETER                :: SIZE_BLOCS = 40
  INTEGER             :: info,sizeb

  INTEGER :: NUMROC
  EXTERNAL NUMROC

! *********************************************************************

 DBG_ENTER("COLL")

 if (PRESENT(tbloc)) then
   sizeb = tbloc
 else
   sizeb  = SIZE_BLOCS
 end if

!Records of the matrix type :
 matrix%processor => processor
 matrix%sizeb_blocs(1) = MIN(sizeb,nbli_global)
 matrix%sizeb_blocs(2) = MIN(sizeb,nbco_global)
 matrix%sizeb_global(1) = nbli_global
 matrix%sizeb_global(2) = nbco_global


!Size of the local buffer
!NUMROC computes the NUMber of Rows Or Columns of a distributed matrix owned by the process indicated by IPROC.
 matrix%sizeb_local(1) = NUMROC(nbli_global,matrix%sizeb_blocs(1), &
& processor%coords(1),0, &
& processor%grid%dims(1))

 matrix%sizeb_local(2) = NUMROC(nbco_global,matrix%sizeb_blocs(2), &
& processor%coords(2),0, &
& processor%grid%dims(2))

 call idx_loc(matrix,matrix%sizeb_global(1),matrix%sizeb_global(2), &
& matrix%sizeb_local(1),matrix%sizeb_local(2))

!Initialisation of the SCALAPACK description of the matrix
 call DESCINIT(matrix%descript%tab, nbli_global, nbco_global, &
& matrix%sizeb_blocs(1), matrix%sizeb_blocs(2), 0,0 , &
& processor%grid%ictxt, MAX(1,matrix%sizeb_local(1)), &
& info)

 if (info /= 0) then
   write(6,*) processor%myproc,'error initialisation matrix scalapack',info 
   call leave_new('PERS')
 end if

 if (istwf_k/=2) then
   ALLOCATE(matrix%buffer_cplx(matrix%sizeb_local(1),matrix%sizeb_local(2)))
   matrix%buffer_cplx(:,:) = (0._DP,0._DP)
 else
   ALLOCATE(matrix%buffer_real(matrix%sizeb_local(1),matrix%sizeb_local(2)))
   matrix%buffer_real(:,:) = 0._DP
 end if

 DBG_EXIT("COLL")

end subroutine init_matrix_scalapack
!!***

!-------------------------------------------------------
! Destruction of the records of a SCALAPACK matrix
!-------------------------------------------------------
!!****f* ABINIT/destruction_matrix_scalapack
!! NAME
!!  destruction_matrix_scalapack
!!
!! FUNCTION
!!  Destroys a matrix descriptor for ScaLAPACK.
!!
!! INPUTS
!!  None
!!
!! OUTPUT
!!  None
!!
!! SIDE EFFECTS
!!  matrix= the matrix to process
!!
!! PARENTS
!!      exceig,m_abilasi,subdiago
!!
!! CHILDREN
!!
!! SOURCE

subroutine destruction_matrix_scalapack(matrix)

  use defs_scalapack
  use defs_basis

  implicit none

!Arguments ------------------------------------
  TYPE(matrix_scalapack),INTENT(inout)    :: matrix

!Local variables-------------------------------

! *********************************************************************

!MG This is dangerous as the pointers are not nullified when the object is initialized.
!Likely gfortran will bomb out here.

 NULLIFY(matrix%processor)
 matrix%sizeb_global = 0
 if (ASSOCIATED(matrix%buffer_cplx)) then
   DEALLOCATE(matrix%buffer_cplx)
 end if
 if (ASSOCIATED(matrix%buffer_real)) then
   DEALLOCATE(matrix%buffer_real)
 end if
 if (ASSOCIATED(matrix%ipiv)) then
   DEALLOCATE(matrix%ipiv)
 end if
 matrix%sizeb_blocs = 0
 matrix%sizeb_local = 0
 matrix%descript%tab = 0

end subroutine destruction_matrix_scalapack
!!***

!-------------------------------------------------------
! Access to a component thanks to its local indices
!-------------------------------------------------------

!!****f* ABINIT/matrix_get_local_cplx
!! NAME
!!  matrix_get_local_cplx
!!
!! FUNCTION
!!  Returns a local matrix coefficient of complex type.
!!
!! INPUTS
!!  matrix= the matrix to process
!!  i= row in the matrix
!!  j= column in the matrix
!!
!! OUTPUT
!!  None
!!
!! SIDE EFFECTS
!!  None
!!
!! PARENTS
!!
!! CHILDREN
!!
!! SOURCE

function matrix_get_local_cplx(matrix,i,j)

  use defs_scalapack
  use defs_basis

  implicit none

!Arguments ------------------------------------
  TYPE(matrix_scalapack),INTENT(in)    :: matrix
  INTEGER, INTENT(in)                   :: i,j
  COMPLEX(dp)                           :: matrix_get_local_cplx

!Local variables-------------------------------

! *********************************************************************

 matrix_get_local_cplx = matrix%buffer_cplx(i,j)

end function matrix_get_local_cplx
!!***

!!****f* ABINIT/matrix_get_local_real
!! NAME
!!  matrix_get_local_real
!!
!! FUNCTION
!!  Returns a local matrix coefficient of double precision type.
!!
!! INPUTS
!!  matrix= the matrix to process
!!  i= row in the matrix
!!  j= column in the matrix
!!
!! OUTPUT
!!  None
!!
!! SIDE EFFECTS
!!  None
!!
!! PARENTS
!!
!! CHILDREN
!!
!! SOURCE

function matrix_get_local_real(matrix,i,j)

  use defs_scalapack 
  use defs_basis

  implicit none

!Arguments ------------------------------------
  TYPE(matrix_scalapack),INTENT(in)    :: matrix
  INTEGER, INTENT(in)                   :: i,j

!Return value ---------------------------------
  REAL(dp)                           :: matrix_get_local_real

!Local variables-------------------------------

! *********************************************************************

 matrix_get_local_real = matrix%buffer_real(i,j)
 
end function matrix_get_local_real
!!***

!-------------------------------------------------------
! Positioning of a component of a matrix thanks to its local indices
!-------------------------------------------------------

!!****f* ABINIT/matrix_set_local_cplx
!! NAME
!!  matrix_set_local_cplx
!!
!! FUNCTION
!!  Sets a local matrix coefficient of complex type.
!!
!! INPUTS
!!  i= row in the matrix
!!  j= column in the matrix
!!  value= the value to set
!!
!! OUTPUT
!!  matrix= the matrix to process
!!
!! SIDE EFFECTS
!!  None
!!
!! PARENTS
!!      scalapack
!!
!! CHILDREN
!!
!! SOURCE

subroutine matrix_set_local_cplx(matrix,i,j,value)

  use defs_scalapack
  use defs_basis

  implicit none

!Arguments ------------------------------------
  TYPE(matrix_scalapack),INTENT(out)   :: matrix

  INTEGER, INTENT(in)                   :: i,j
  COMPLEX(dp), INTENT(in)               :: value

!Local variables-------------------------------

! *********************************************************************

 matrix%buffer_cplx(i,j) = value

end subroutine matrix_set_local_cplx
!!***

!!****f* ABINIT/matrix_set_local_real
!! NAME
!!  matrix_set_local_real
!!
!! FUNCTION
!!  Sets a local matrix coefficient of double precision type.
!!
!! INPUTS
!!  i= row in the matrix
!!  j= column in the matrix
!!  value= the value to set
!!
!! OUTPUT
!!  matrix= the matrix to process
!!
!! SIDE EFFECTS
!!  None
!!
!! PARENTS
!!      scalapack
!!
!! CHILDREN
!!
!! SOURCE

subroutine matrix_set_local_real(matrix,i,j,value)

  use defs_scalapack
  use defs_basis

  implicit none

!Arguments ------------------------------------
  TYPE(matrix_scalapack),INTENT(out)   :: matrix
  INTEGER, INTENT(in)                   :: i,j
  REAL(dp), INTENT(in)               :: value

!Local variables-------------------------------

! *********************************************************************

 matrix%buffer_real(i,j) = value

end subroutine matrix_set_local_real
!!***


!!****f* ABINIT/idx_loc
!! NAME
!!  idx_loc
!!
!! FUNCTION
!!  Determination of the local indices of a matrix coefficient with respect
!!  to its global indices, independently of the processor.
!!
!! INPUTS
!!  matrix= the matrix to process
!!  i= row in the matrix
!!  j= column in the matrix
!!
!! OUTPUT
!!  iloc= local row of the coefficient
!!  jloc= local column of the coefficient
!!
!! PARENTS
!!      scalapack
!!
!! CHILDREN
!!
!! SOURCE

subroutine idx_loc(matrix,i,j,iloc,jloc)

  use defs_scalapack
  use defs_basis

!This section has been created automatically by the script Abilint (TD).
!Do not modify the following lines by hand.
 use interfaces_51_manage_mpi, except_this_one => idx_loc
!End of the abilint section

  implicit none

!Arguments ------------------------------------
  TYPE(matrix_scalapack),INTENT(in)    :: matrix
  INTEGER, INTENT(in)                   :: i,j
  INTEGER, INTENT(out)                  :: iloc,jloc

!Local variables-------------------------------
  INTEGER :: NUMROC
  EXTERNAL NUMROC

! *********************************************************************

 iloc = glob_loc(matrix,i,1)
 jloc = glob_loc(matrix,j,2)

end subroutine idx_loc
!!***

!!****f* ABINIT/glob_loc
!! NAME
!!  glob_loc
!!
!! FUNCTION
!!  Returns the global location of a matrix coefficient.
!!
!! INPUTS
!!  matrix= the matrix to process
!!  idx= number of rows in the distributed matrix
!!  lico= block size index
!!
!! OUTPUT
!!  None
!!
!! SIDE EFFECTS
!!  None
!!
!! PARENTS
!!
!! CHILDREN
!!
!! SOURCE

function glob_loc(matrix,idx,lico)

  use defs_scalapack
  use defs_basis

  implicit none

!Arguments ------------------------------------
  TYPE(matrix_scalapack),INTENT(in)    :: matrix
  INTEGER, INTENT(in)                   :: idx, lico

!Return value ---------------------------------
  INTEGER :: glob_loc

!Local variables-------------------------------
  INTEGER :: NUMROC
  EXTERNAL NUMROC

! *********************************************************************

 glob_loc = NUMROC(idx,matrix%sizeb_blocs(lico), &
& matrix%processor%coords(lico),0, &
& matrix%processor%grid%dims(lico))


end function glob_loc
!!***


!!****f* ABINIT/idx_glob
!! NAME
!!  idx_glob
!!
!! FUNCTION
!!  Determination of the global indices of a term of the matrix with respect
!!  to its local indices.
!!
!! INPUTS
!!  matrix= the matrix to process
!!  iloc= local row of the coefficient
!!  jloc= local column of the coefficient
!!
!! OUTPUT
!!  i= row in the matrix
!!  j= column in the matrix
!!
!! PARENTS
!!      exceig,scalapack
!!
!! CHILDREN
!!
!! SOURCE

subroutine idx_glob(matrix,iloc,jloc,i,j)

  use defs_scalapack
  use defs_basis

!This section has been created automatically by the script Abilint (TD).
!Do not modify the following lines by hand.
 use interfaces_51_manage_mpi, except_this_one => idx_glob
!End of the abilint section

  implicit none

!Arguments ------------------------------------
  TYPE(matrix_scalapack),INTENT(in)    :: matrix
  INTEGER, INTENT(out)                  :: i,j
  INTEGER, INTENT(in)                   :: iloc,jloc

!Local variables-------------------------------
  INTEGER :: nbcycli,nbcycco,resteli,resteco,nblocsli,nblocsco

! *********************************************************************

 i = loc_glob(matrix,matrix%processor,iloc,1)
 j = loc_glob(matrix,matrix%processor,jloc,2)

end subroutine idx_glob
!!***

!!****f* ABINIT/loc_glob
!! NAME
!!  loc_glob
!!
!! FUNCTION
!!  Determination of the global index from a local index (row or column)
!!  as a function of a given processor
!!
!! INPUTS
!!  matrix= the matrix to process
!!  proc= descriptor of a processor
!!  idx= number of rows in the distributed matrix
!!  lico= block size index
!!
!! OUTPUT
!!  None
!!
!! PARENTS
!!
!! CHILDREN
!!
!! SOURCE

function loc_glob(matrix,proc,idx,lico)

  use defs_scalapack
  use defs_basis

  implicit none

!Arguments ------------------------------------
  TYPE(matrix_scalapack),INTENT(in)    :: matrix
  TYPE(processor_scalapack),INTENT(in) :: proc
  INTEGER, INTENT(in)                   :: idx,lico

!Return value ---------------------------------
  INTEGER :: loc_glob

!Local variables-------------------------------
  INTEGER :: nbcyc,reste,nblocs

! *********************************************************************

 nbcyc = INT((idx-1)/matrix%sizeb_blocs(lico))
 reste = MOD(idx-1,matrix%sizeb_blocs(lico))
 nblocs = nbcyc*proc%grid%dims(lico)+ proc%coords(lico)

 loc_glob = nblocs * matrix%sizeb_blocs(lico) + reste + 1

end function loc_glob
!!***


!!****f* ABINIT/matrix_from_global
!! NAME
!!  matrix_from_global
!!
!! FUNCTION
!!  Routine to fill a SCALAPACK matrix with respect to a full matrix.
!!
!! INPUTS
!!  istwf_k= option parameter that describes the storage of wfs
!!  reference= one-dimensional array
!!
!! OUTPUT
!!  None
!!
!! SIDE EFFECTS
!!  matrix= the matrix to process
!!
!! PARENTS
!!      subdiago
!!
!! CHILDREN
!!
!! SOURCE

subroutine matrix_from_global(matrix,reference,istwf_k)

  use defs_scalapack
  use defs_basis

!This section has been created automatically by the script Abilint (TD).
!Do not modify the following lines by hand.
 use interfaces_16_hideleave
 use interfaces_51_manage_mpi, except_this_one => matrix_from_global
!End of the abilint section

  implicit none

!Arguments ------------------------------------
  TYPE(matrix_scalapack),INTENT(inout)  :: matrix
  REAL(dp),DIMENSION(:),INTENT(in)       :: reference
  INTEGER,INTENT(in)                     :: istwf_k
  COMPLEX(dp)::val_cplx
  REAL(dp)   ::val_real

!Local variables-------------------------------
  INTEGER :: i,j,iglob,jglob,ind
  REAL(dp) :: err
  INTEGER :: cptr

! *********************************************************************

!err = 0._DP
!cptr = 0

 do i=1,matrix%sizeb_local(1)
   do j=1,matrix%sizeb_local(2)
     call idx_glob(matrix,i,j,iglob,jglob)

     ind = jglob*(jglob-1)+2*iglob-1

     if (istwf_k/=2) then
       val_cplx = dcmplx(reference(ind),reference(ind+1))
       call matrix_set_local_cplx(matrix,i,j,val_cplx)
     else
       val_real = reference(ind)
       call matrix_set_local_real(matrix,i,j,val_real)

       if(abs(reference(ind+1))>1.0d-10)then
         write(6,'(a,a,a,a,2i5,1es16.6,a,a)')ch10,&
&         ' scalapack : BUG ',&
&         '  For istwf_k=2, observed the following element of matrix :',ch10,&
&         iglob,jglob,reference(ind+1),ch10,&
&         '  with a non-negligible imaginary part.'
         call leave_new('PERS')
       end if

     end if

!    cptr = cptr + 1
   end do
 end do

!if (cptr /= 0) then
!PRINT *,matrix%processor%myproc,"error Linf matrix scalapack", &
!&  err,"on",cptr,"terms"
!endif

end subroutine matrix_from_global
!!***


!!****f* ABINIT/matrix_to_global
!! NAME
!!  matrix_to_global
!!
!! FUNCTION
!!  Inserts a ScaLAPACK matrix into a global one.
!!
!! INPUTS
!!  matrix= the matrix to process
!!  istwf_k= option parameter that describes the storage of wfs
!!  nband_k= number of bands at this k point for that spin polarization
!!
!! OUTPUT
!!  None
!!
!! SIDE EFFECTS
!!  reference= one-dimensional array
!!
!! PARENTS
!!      subdiago
!!
!! CHILDREN
!!
!! SOURCE

subroutine matrix_to_global(matrix,reference,istwf_k)

  use defs_scalapack
  use defs_basis

!This section has been created automatically by the script Abilint (TD).
!Do not modify the following lines by hand.
 use interfaces_51_manage_mpi, except_this_one => matrix_to_global
!End of the abilint section

  implicit none

!Arguments ------------------------------------
  TYPE(matrix_scalapack),INTENT(in)        :: matrix
  REAL(dp),DIMENSION(:),INTENT(inout)       :: reference!(nband_k*(nband_k+1))
  INTEGER,INTENT(in)           :: istwf_k!,nband_k

!Local variables-------------------------------
  INTEGER  :: i,j,iglob,jglob,ind
  REAL(dp) :: err
  INTEGER  :: cptr

! *********************************************************************

!err = 0._DP
!cptr = 0

 do i=1,matrix%sizeb_local(1)
   do j=1,matrix%sizeb_local(2)
     call idx_glob(matrix,i,j,iglob,jglob)

     ind = jglob*(jglob-1)+2*iglob-1

     if (ind <= matrix%sizeb_global(2)*(matrix%sizeb_global(2)+1)) then
       if (istwf_k/=2) then
         reference(ind)   = REAL(matrix_get_local_cplx(matrix,i,j))
         reference(ind+1) = IMAG(matrix_get_local_cplx(matrix,i,j))
       else
         reference(ind) = matrix_get_local_real(matrix,i,j)
       end if
     end if
!    cptr = cptr + 1
   end do
 end do

!if (cptr /= 0) then
!PRINT *,matrix%processor%myproc,"erreur Linf matrix scalapack", &
!&  err,"on",cptr,"terms"
!endif

end subroutine matrix_to_global
!!***

!----------------------------------------------------------------------

!!****f* ABINIT/matrix_to_reference
!! NAME
!!  matrix_to_reference
!!
!! FUNCTION
!!  Routine to fill a full matrix with respect to a SCALAPACK matrix.
!!
!! INPUTS
!!  matrix= the matrix to process
!!  istwf_k= option parameter that describes the storage of wfs
!!
!! OUTPUT
!!  None
!!
!! SIDE EFFECTS
!!  reference= one-dimensional array
!!
!! PARENTS
!!      subdiago
!!
!! CHILDREN
!!
!! SOURCE

subroutine matrix_to_reference(matrix,reference,istwf_k)

  use defs_scalapack
  use defs_basis

!This section has been created automatically by the script Abilint (TD).
!Do not modify the following lines by hand.
 use interfaces_51_manage_mpi, except_this_one => matrix_to_reference
!End of the abilint section

  implicit none

!Arguments ------------------------------------
  TYPE(matrix_scalapack),INTENT(in)        :: matrix
  REAL(dp),DIMENSION(:,:),INTENT(inout)     :: reference
  INTEGER,INTENT(in)                        :: istwf_k

!Local variables-------------------------------
  INTEGER  :: i,j,iglob,jglob,ind
  REAL(dp) :: err
  INTEGER  :: cptr

! *********************************************************************

!err = 0._DP
!cptr = 0

 do i=1,matrix%sizeb_local(1)
   do j=1,matrix%sizeb_local(2)
     call idx_glob(matrix,i,j,iglob,jglob)

     ind=(iglob-1)*2+1

     if (istwf_k/=2) then
       reference(ind,jglob)   = REAL(matrix_get_local_cplx(matrix,i,j))
       reference(ind+1,jglob) = IMAG(matrix_get_local_cplx(matrix,i,j))
     else
       reference(ind,jglob)   = matrix_get_local_real(matrix,i,j)
       reference(ind+1,jglob) = 0._dp
     end if

!    cptr = cptr + 1
   end do
 end do

!if (cptr /= 0) then
!PRINT *,matrix%processor%myproc,"error Linf matrix scalapack", &
!&  err,"on",cptr,"terms"
!endif 

end subroutine matrix_to_reference
!!***

!----------------------------------------------------------------------

!!****f* ABINIT/slk_matrix_from_global_dpc_2D
!! NAME
!!  slk_matrix_from_global_dpc_2D
!!
!! FUNCTION
!!  Routine to fill a complex SCALAPACK matrix with respect to a global matrix.
!!  TARGET: Two-dimensional double precision complex matrix 
!!
!! INPUTS
!!  glob_mat=Two-dimensional array containing the global matrix.
!!  uplo=String specifying whether only the upper or lower triangular part of the global matrix is used:
!!    = "U":  Upper triangular
!!    = "L":  Lower triangular
!!    = "A":  Full matrix (used for general complex matrices)
!!
!! SIDE EFFECTS
!!  Slk_mat<matrix_scalapack>=The distributed matrix.
!!    %buffer_cplx=Local buffer containg the value this node is dealing with.
!!
!! PARENTS
!!      m_abilasi
!!
!! CHILDREN
!!
!! SOURCE

subroutine slk_matrix_from_global_dpc_2D(Slk_mat,uplo,glob_mat)

 use defs_basis
 use defs_scalapack
 use m_errors

!This section has been created automatically by the script Abilint (TD).
!Do not modify the following lines by hand.
 use interfaces_51_manage_mpi, except_this_one => slk_matrix_from_global_dpc_2D
!End of the abilint section

 implicit none

!Arguments ------------------------------------
!scalars
 character(len=*),intent(in) :: uplo
 type(matrix_scalapack),intent(inout)  :: Slk_mat
!array
 complex(dpc),intent(in) :: glob_mat(:,:)

!Local variables-------------------------------
 integer :: ii,jj,iglob,jglob

!************************************************************************

 ABI_CHECK(associated(Slk_mat%buffer_cplx),"%buffer_cplx not associated")

 select case (uplo(1:1))

   case ("A","a") ! Full global matrix is used.
     do jj=1,Slk_mat%sizeb_local(2)
       do ii=1,Slk_mat%sizeb_local(1)
         call idx_glob(Slk_mat,ii,jj,iglob,jglob)
         Slk_mat%buffer_cplx(ii,jj) = glob_mat(iglob,jglob) 
       end do 
     end do 

   case ("U","u") ! Only the upper triangle of the global matrix is used.
     do jj=1,Slk_mat%sizeb_local(2)
       do ii=1,Slk_mat%sizeb_local(1)
         call idx_glob(Slk_mat,ii,jj,iglob,jglob)
         if (jglob>=iglob) then
           Slk_mat%buffer_cplx(ii,jj) =        glob_mat(iglob,jglob) 
         else 
           Slk_mat%buffer_cplx(ii,jj) = DCONJG( glob_mat(jglob,iglob) )
         end if
       end do 
     end do 

   case ("L","l") ! Only the lower triangle of the global matrix is used.
     do jj=1,Slk_mat%sizeb_local(2)
       do ii=1,Slk_mat%sizeb_local(1)
         call idx_glob(Slk_mat,ii,jj,iglob,jglob)
         if (jglob<=iglob) then
           Slk_mat%buffer_cplx(ii,jj) =        glob_mat(iglob,jglob) 
         else 
           Slk_mat%buffer_cplx(ii,jj) = DCONJG( glob_mat(jglob,iglob) )
         end if
       end do 
     end do 

     case default
     MSG_PERS_BUG(" Wrong uplo: "//TRIM(uplo))
 end select

end subroutine slk_matrix_from_global_dpc_2D
!!***

!----------------------------------------------------------------------

!!****f* ABINIT/slk_matrix_from_global_dpc_1Dp
!! NAME
!!  slk_matrix_from_global_dpc_1Dp
!!
!! FUNCTION
!!  Routine to fill a complex SCALAPACK matrix with respect to a global matrix.
!!  TARGET: double precision complex matrix in packed form.
!!
!! INPUTS
!!  glob_pmat(n*(n+1)/2)=One-dimensional array containing the global matrix A packed columnwise in a linear array.  
!!    The j-th column of A is stored in the array glob_pmat as follows: 
!!      if uplo = ’U’, glob_pmat(i + (j-1)*j/2)       = A(i,j) for 1<=i<=j; 
!!      if uplo = ’L’, glob_pmat(i + (j-1)*(2*n-j)/2) = A(i,j) for j<=i<=n.
!!      where n is the number of rows or columns in the global matrix.
!!  uplo=String specifying whether only the upper or lower triangular part of the global matrix is used:
!!    = "U":  Upper triangular
!!    = "L":  Lower triangular
!!
!! SIDE EFFECTS
!!  Slk_mat<matrix_scalapack>=The distributed matrix.
!!    %buffer_cplx=Local buffer containg the value this node is dealing with.
!!
!! PARENTS
!!      m_abilasi
!!
!! CHILDREN
!!
!! SOURCE

subroutine slk_matrix_from_global_dpc_1Dp(Slk_mat,uplo,glob_pmat)

 use defs_basis
 use defs_scalapack
 use m_errors

!This section has been created automatically by the script Abilint (TD).
!Do not modify the following lines by hand.
 use interfaces_51_manage_mpi, except_this_one => slk_matrix_from_global_dpc_1Dp
!End of the abilint section

 implicit none

!Arguments ------------------------------------
!scalars
 character(len=*),intent(in) :: uplo
 type(matrix_scalapack),intent(inout)  :: Slk_mat
!array
 complex(dpc),intent(in) :: glob_pmat(:)

!Local variables-------------------------------
 integer :: ii,jj,iglob,jglob,ind,n
 real(dp) :: szm

!************************************************************************

 ABI_CHECK(associated(Slk_mat%buffer_cplx),"%buffer_cplx not associated")

 szm = SIZE(glob_pmat)
 n = NINT( (-1 + SQRT(one+8*szm) )*half ); if (n*(n+1)/2 /= SIZE(glob_pmat)) stop "Buggy compiler"

 select case (uplo(1:1))

   case ("U","u") ! Only the upper triangle of the global matrix is used.
     do jj=1,Slk_mat%sizeb_local(2)
       do ii=1,Slk_mat%sizeb_local(1)
         call idx_glob(Slk_mat,ii,jj,iglob,jglob)

         if (jglob>=iglob) then
           ind = iglob + jglob*(jglob-1)/2
           Slk_mat%buffer_cplx(ii,jj) =        glob_pmat(ind) 
         else 
           ind = jglob + iglob*(iglob-1)/2
           Slk_mat%buffer_cplx(ii,jj) = DCONJG( glob_pmat(ind) )
         end if

       end do 
     end do 

   case ("L","l") ! Only the lower triangle of the global matrix is used.
     do jj=1,Slk_mat%sizeb_local(2)
       do ii=1,Slk_mat%sizeb_local(1)
         call idx_glob(Slk_mat,ii,jj,iglob,jglob)

         if (jglob<=iglob) then
           ind = iglob + (jglob-1)*(2*n-jglob)/2
           Slk_mat%buffer_cplx(ii,jj) =        glob_pmat(ind) 
         else 
           ind = jglob + (iglob-1)*(2*n-iglob)/2
           Slk_mat%buffer_cplx(ii,jj) = DCONJG( glob_pmat(ind) )
         end if
         
       end do 
     end do 

     case default
     MSG_PERS_BUG(" Wrong uplo: "//TRIM(uplo))
 end select

end subroutine slk_matrix_from_global_dpc_1Dp
!!***

!----------------------------------------------------------------------

!!****f* ABINIT/slk_matrix_to_global_dpc_2D
!! NAME
!!  slk_matrix_to_global_dpc_2D
!!
!! FUNCTION
!!  Routine to fill a global matrix with respect to a SCALAPACK matrix.
!!  TARGET: Two-dimensional Double precision complex matrix.
!!
!! INPUTS
!!  Slk_mat<matrix_scalapack>=The distributed matrix.
!!  uplo=String specifying whether the upper or lower triangular part of the global matrix has to be filled:
!!    = "U":  Upper triangular
!!    = "L":  Lower triangular
!!    = "A":  Full matrix is filled (used for general complex matrices)
!!
!! SIDE EFFECTS
!!  glob_mat=The global matrix where the entries owned by this processors have been overwritten.
!!  Note that the remaing entries not treated by this node are not changed.
!!
!! PARENTS
!!      m_abilasi
!!
!! CHILDREN
!!
!! SOURCE

subroutine slk_matrix_to_global_dpc_2D(Slk_mat,uplo,glob_mat)

 use defs_basis
 use defs_scalapack
 use m_errors

!This section has been created automatically by the script Abilint (TD).
!Do not modify the following lines by hand.
 use interfaces_51_manage_mpi, except_this_one => slk_matrix_to_global_dpc_2D
!End of the abilint section

 implicit none

!Arguments ------------------------------------
!scalaras
 character(len=*),intent(in) :: uplo
 type(matrix_scalapack),intent(in) :: Slk_mat
!arrays
 complex(dpc),intent(inout) :: glob_mat(:,:)

!Local variables-------------------------------
 integer :: ii,jj,iglob,jglob

!************************************************************************

 select case (uplo(1:1))

   case ("A","a")  ! Full global matrix has to be filled. 
     do jj=1,Slk_mat%sizeb_local(2)
       do ii=1,Slk_mat%sizeb_local(1)
         call idx_glob(Slk_mat,ii,jj,iglob,jglob)
         glob_mat(iglob,jglob) = Slk_mat%buffer_cplx(ii,jj)
       end do
     end do

   case ("U","u") ! Only the upper triangle of the global matrix is filled.
     do jj=1,Slk_mat%sizeb_local(2)
       do ii=1,Slk_mat%sizeb_local(1)
         call idx_glob(Slk_mat,ii,jj,iglob,jglob)
         if (jglob>=iglob) glob_mat(iglob,jglob) = Slk_mat%buffer_cplx(ii,jj)
       end do
     end do

   case ("L","l") ! Only the lower triangle of the global matrix is filled.
     do jj=1,Slk_mat%sizeb_local(2)
       do ii=1,Slk_mat%sizeb_local(1)
         call idx_glob(Slk_mat,ii,jj,iglob,jglob)
         if (jglob<=iglob) glob_mat(iglob,jglob) = Slk_mat%buffer_cplx(ii,jj)
       end do
     end do

     case default
     MSG_PERS_BUG(" Wrong uplo: "//TRIM(uplo))
 end select

end subroutine slk_matrix_to_global_dpc_2D
!!***

!----------------------------------------------------------------------

!!****f* ABINIT/my_locr
!! NAME
!! my_locr
!!
!! FUNCTION
!!  Method of matrix_scalapack wrapping the scaLAPACK tool LOCr. 
!!
!! INPUTS
!!  Slk_mat<matrix_scalapack>
!! 
!! OUTPUT 
!!  my_locr= For the meaning see NOTES below.
!!
!! NOTES
!!  Let K be the number of rows or columns of a distributed matrix, and assume that its process grid has dimension p x q.
!!  LOCr( K ) denotes the number of elements of K that a process would receive if K were distributed over the p 
!!  processes of its process column.
!!  Similarly,  LOCc(  K  ) denotes the number of elements of K that a process would receive if K were distributed over
!!  the q processes of its process row.
!!  The values of LOCr() and LOCc() may be determined via a call to the ScaLAPACK tool function, NUMROC:
!!          LOCr( M ) = NUMROC( M, MB_A, MYROW, RSRC_A, NPROW ),
!!          LOCc( N ) = NUMROC( N, NB_A, MYCOL, CSRC_A, NPCOL ).  An upper bound for these quantities may  be  computed
!!  by:
!!          LOCr( M ) <= ceil( ceil(M/MB_A)/NPROW )*MB_A
!!          LOCc( N ) <= ceil( ceil(N/NB_A)/NPCOL )*NB_A
!!
!! PARENTS
!!
!! CHILDREN
!!
!! SOURCE

function my_locr(Slk_mat)

 use defs_basis
 use defs_scalapack

 implicit none

!Arguments ------------------------------------
!scalars
 integer :: my_locr
 type(matrix_scalapack),intent(in) :: Slk_mat

!Local variables-------------------------------
 integer :: M, MB_A, MYROW, RSRC_A, NPROW
 integer,external :: NUMROC

! *************************************************************************

 M      = Slk_mat%descript%tab(M_ )      ! The number of rows in the global matrix.
 MB_A   = Slk_mat%descript%tab(MB_)      ! The number of rows in a block.
 MYROW  = Slk_mat%processor%coords(1)    ! The row index of my processor
 RSRC_A = Slk_mat%descript%tab(RSRC_)    ! The row of the processors at the beginning.
 NPROW  = Slk_mat%processor%grid%dims(1) ! The number of processors per row in the Scalapack grid.

 my_locr = NUMROC( M, MB_A, MYROW, RSRC_A, NPROW )

end function my_locr
!!***

!----------------------------------------------------------------------

!!****f* ABINIT/my_locc
!! NAME
!! my_locc
!!
!! FUNCTION
!!  Method of matrix_scalapack wrapping the scaLAPACK tool LOCc. 
!!
!! INPUTS
!!  Slk_mat<matrix_scalapack>
!! 
!! OUTPUT 
!!  my_locc= For the meaning see NOTES below.
!!
!! NOTES
!!  Let K be the number of rows or columns of a distributed matrix, and assume that its process grid has dimension p x q.
!!  LOCr( K ) denotes the number of elements of K that a process would receive if K were distributed over the p 
!!  processes of its process column.
!!  Similarly,  LOCc(  K  ) denotes the number of elements of K that a process would receive if K were distributed over
!!  the q processes of its process row.
!!  The values of LOCr() and LOCc() may be determined via a call to the ScaLAPACK tool function, NUMROC:
!!          LOCr( M ) = NUMROC( M, MB_A, MYROW, RSRC_A, NPROW ),
!!          LOCc( N ) = NUMROC( N, NB_A, MYCOL, CSRC_A, NPCOL ).  An upper bound for these quantities may  be  computed
!!  by:
!!          LOCr( M ) <= ceil( ceil(M/MB_A)/NPROW )*MB_A
!!          LOCc( N ) <= ceil( ceil(N/NB_A)/NPCOL )*NB_A
!!
!! PARENTS
!!
!! CHILDREN
!!
!! SOURCE

function my_locc(Slk_mat)

 use defs_basis
 use defs_scalapack

 implicit none

!Arguments ------------------------------------
!scalars
 integer :: my_locc
 type(matrix_scalapack),intent(in) :: Slk_mat

!Local variables-------------------------------
 integer :: N, NB_A, MYCOL, CSRC_A, NPCOL
 integer,external :: NUMROC

! *************************************************************************

 N      = Slk_mat%descript%tab(N_ )      ! The number of columns in the global matrix.
 NB_A   = Slk_mat%descript%tab(NB_)      ! The number of columns in a block.
 MYCOL  = Slk_mat%processor%coords(2)    ! The column index of my processor
 CSRC_A = Slk_mat%descript%tab(CSRC_)    ! The column of the processors at the beginning.
 NPCOL  = Slk_mat%processor%grid%dims(2) ! The number of processors per column in the Scalapack grid.

 my_locc = NUMROC( N, NB_A, MYCOL, CSRC_A, NPCOL )
 
end function my_locc
!!***

!----------------------------------------------------------------------

!!****f* ABINIT/matrix_pzgemm
!! NAME
!!  matrix_pzgemm
!!
!! FUNCTION
!!  Extended matrix*matrix product
!!  C := alpha*A*B - beta*C
!!
!!  For a simple matrix vector product, one can simply pass 
!!  alpha = (1.,0.) and beta (0.,0.)
!!
!! INPUTS
!!  matrix1= first ScaLAPACK matrix (matrix A)
!!  matrix2= second ScaLAPACK matrix (matrix B)
!!  alpha= scalar multiplicator for the A*B product
!!  beta= scalar multiplicator for the C matrix
!!
!! OUTPUT
!!  None
!!
!! SIDE EFFECTS
!!  results= ScaLAPACK matrix coming out of the operation
!!
!! PARENTS
!!
!! CHILDREN
!!
!! SOURCE

subroutine matrix_pzgemm(matrix1,alpha,matrix2,beta,results)

  use defs_scalapack
  use defs_basis

  implicit none

!Arguments ------------------------------------
  TYPE(matrix_scalapack),INTENT(in)        :: matrix1,matrix2
  TYPE(matrix_scalapack),INTENT(inout)     :: results
  COMPLEX(dp), intent(in)                   :: alpha, beta

!Local variables-------------------------------

  call PZGEMM('N','N',matrix1%sizeb_global(1),matrix2%sizeb_global(2),&
       &      matrix1%sizeb_global(2),alpha,matrix1%buffer_cplx,1,1, &
       &      matrix1%descript%tab,matrix2%buffer_cplx,1,1, &
       &      matrix2%descript%tab,beta,results%buffer_cplx,1,1, &
       &      results%descript%tab)

end subroutine matrix_pzgemm
!!***

!----------------------------------------------------------------------

!!****f* ABINIT/compute_eigen_values_vectors
!! NAME
!!  compute_eigen_values_vectors
!!
!! FUNCTION
!!  Calculation of eigenvalues and eigenvectors.
!!  Complex and real cases.
!!
!! INPUTS
!!  processor= descriptor of a processor
!!  matrix= the matrix to process
!!  communicator= MPI communicator
!!  istwf_k= option parameter that describes the storage of wfs
!!
!! OUTPUT
!!  None
!!
!! SIDE EFFECTS
!!  results= ScaLAPACK matrix coming out of the operation
!!  eigen= eigenvalues of the matrix
!!
!! PARENTS
!!      subdiago
!!
!! CHILDREN
!!
!! SOURCE

subroutine compute_eigen_values_vectors(processor,matrix,results,eigen,communicator,istwf_k)

  use defs_scalapack
  use defs_basis

#if defined HAVE_MPI && defined HAVE_MPI2
 use mpi
#endif

 implicit none

#if defined HAVE_MPI && defined HAVE_MPI1
 include 'mpif.h'
#endif

!Arguments ------------------------------------
  TYPE(processor_scalapack),INTENT(in)       :: processor
  TYPE(matrix_scalapack),INTENT(in)          :: matrix
  TYPE(matrix_scalapack),INTENT(inout)       :: results
  DOUBLE PRECISION,DIMENSION(:),INTENT(inout) :: eigen
  INTEGER,INTENT(in)  :: communicator,istwf_k

!Local variables-------------------------------
  INTEGER            :: LRWORK,LIWORK,LCWORK,INFO
 
  INTEGER         , dimension(1) :: IWORK_tmp
  DOUBLE PRECISION, dimension(1) :: RWORK_tmp
  COMPLEX(dp)     , dimension(1) :: CWORK_tmp

  INTEGER         , allocatable  :: IWORK(:)
  DOUBLE PRECISION, allocatable  :: RWORK(:)
  COMPLEX(dp)     , allocatable  :: CWORK(:)

  INTEGER,          allocatable :: ICLUSTR(:)
  INTEGER,          allocatable :: IFAIL(:)
  DOUBLE PRECISION, allocatable :: GAP(:)

  DOUBLE PRECISION            :: ABSTOL,ORFAC
  INTEGER,          PARAMETER :: IZERO=0

  INTEGER ::  M,NZ,IA,JA,IZ,JZ,ierr,TWORK_tmp(3),TWORK(3)
  
  DOUBLE PRECISION :: PDLAMCH
  EXTERNAL PDLAMCH

  ! Initialisation
  INFO   = 0 
  IWORK(:) = 0
  RWORK(:) = 0._dp
  CWORK(:) = (0._dp,0._dp)
 
  ABSTOL = PDLAMCH(processor%grid%ictxt,'U')
  ORFAC  = -1.D+0

  ! Allocation of the variables for the results of the calculations
  allocate(IFAIL(matrix%sizeb_global(2)))
  allocate(ICLUSTR(2*processor%grid%dims(1)*processor%grid%dims(2)))
  allocate(GAP(processor%grid%dims(1)*processor%grid%dims(2)))
  
  ! Get the size of the work arrays
  if (istwf_k/=2) then
     call PZHEEVX('V','A','U',&
          &      matrix%sizeb_global(2),&
          &      matrix%buffer_cplx,1,1,matrix%descript%tab, &
          &      ZERO,ZERO,IZERO,IZERO,ABSTOL,&
          &      m,nz,eigen,ORFAC, &
          &      results%buffer_cplx,1,1,results%descript%tab, &
          &      CWORK_tmp,-1,RWORK_tmp,-1,IWORK_tmp,-1,&
          &      IFAIL,ICLUSTR,GAP,INFO)
  else 
     call PDSYEVX('V','A','U',&
          &      matrix%sizeb_global(2),&
          &      matrix%buffer_real,1,1,matrix%descript%tab, &
          &      ZERO,ZERO,IZERO,IZERO,ABSTOL,&
          &      m,nz,eigen,ORFAC, &
          &      results%buffer_real,1,1,results%descript%tab, &
          &      RWORK_tmp,-1,IWORK_tmp,-1,&
          &      IFAIL,ICLUSTR,GAP,INFO)
  end if

  if (INFO/=0) then
     write(6,'(A,I6)') "Problem to compute workspace to use ScaLAPACK, INFO=",INFO
     !call leave_new('PERS')
  endif

  TWORK_tmp(1) = IWORK_tmp(1)
  TWORK_tmp(2) = INT(RWORK_tmp(1))
  TWORK_tmp(3) = INT(REAL(CWORK_tmp(1)))

 !! Get the maximum of the size of the work arrays processor%comm
  call MPI_ALLREDUCE(TWORK_tmp,TWORK,3,MPI_INTEGER,MPI_MAX,communicator,ierr)

  LIWORK = TWORK(1)
  LRWORK = TWORK(2) + matrix%sizeb_global(2) *(matrix%sizeb_global(2)-1)
  LCWORK = TWORK(3)
     
  ! Allocation of the work arrays
  if (LIWORK>0) allocate(IWORK(LIWORK))
  if (LRWORK>0) allocate(RWORK(LRWORK))
  if (LCWORK>0) allocate(CWORK(LCWORK))
 
  ! Call the calculation routine
  if (istwf_k/=2) then
     !write(6,*) 'I am using PZHEEVX'
     call PZHEEVX('V','A','U',&
          &      matrix%sizeb_global(2),&
          &      matrix%buffer_cplx,1,1,matrix%descript%tab, &
          &      ZERO,ZERO,IZERO,IZERO,ABSTOL,&
          &      m,nz,eigen,ORFAC, &
          &      results%buffer_cplx,1,1,results%descript%tab, &
          &      CWORK,LCWORK,RWORK,LRWORK,IWORK,LIWORK,&
          &      IFAIL,ICLUSTR,GAP,INFO)
  else
     !write(6,*) ' I am using PDSYEVX'
     call PDSYEVX('V','A','U',&
          &      matrix%sizeb_global(2),&
          &      matrix%buffer_real,1,1,matrix%descript%tab, &
          &      ZERO,ZERO,IZERO,IZERO,ABSTOL,&
          &      m,nz,eigen,ORFAC, &
          &      results%buffer_real,1,1,results%descript%tab, &
          &      RWORK,LRWORK,IWORK,LIWORK,&
          &      IFAIL,ICLUSTR,GAP,INFO)
  endif

  if (INFO/=0) then
     write(6,'(A,I6)') "Problem to compute eigenvalues and eigenvectors with ScaLAPACK, INFO=",INFO
     !call leave_new('PERS')
  endif

  deallocate(IFAIl,ICLUSTR,GAP)
  if (allocated(IWORK)) deallocate(IWORK)
  if (allocated(RWORK)) deallocate(RWORK)
  if (allocated(CWORK)) deallocate(CWORK)

end subroutine compute_eigen_values_vectors
!!***

!----------------------------------------------------------------------

!!****f* ABINIT/compute_eigen_problem
!! NAME
!!  compute_eigen_problem
!!
!! FUNCTION
!!  Calculation of eigenvalues and eigenvectors:
!!  A * X = lambda * B * X
!!  Complex and real cases.
!!
!! INPUTS
!!  processor= descriptor of a processor
!!  matrix1= first ScaLAPACK matrix (matrix A)
!!  matrix2= second ScaLAPACK matrix (matrix B)
!!  communicator= MPI communicator
!!  istwf_k= option parameter that describes the storage of wfs
!!
!! OUTPUT
!!  None
!!
!! SIDE EFFECTS
!!  results= ScaLAPACK matrix coming out of the operation
!!  eigen= eigenvalues of the matrix
!!
!! PARENTS
!!      subdiago
!!
!! CHILDREN
!!
!! SOURCE

subroutine compute_eigen_problem(processor,matrix1,matrix2,results,eigen,communicator,istwf_k)

  use defs_scalapack
  use defs_basis

#if defined HAVE_MPI && defined HAVE_MPI2
 use mpi
#endif

 implicit none

#if defined HAVE_MPI && defined HAVE_MPI1
 include 'mpif.h'
#endif

!Arguments ------------------------------------
  TYPE(processor_scalapack),INTENT(in)       :: processor
  TYPE(matrix_scalapack),INTENT(in)          :: matrix1,matrix2
  TYPE(matrix_scalapack),INTENT(inout)       :: results
  DOUBLE PRECISION,DIMENSION(:),INTENT(inout) :: eigen

  INTEGER,INTENT(in)  :: communicator,istwf_k

!Local variables-------------------------------
  INTEGER            :: LRWORK,LIWORK,LCWORK,INFO
 
  INTEGER         , dimension(1) :: IWORK_tmp
  DOUBLE PRECISION, dimension(1) :: RWORK_tmp
  COMPLEX(dp)     , dimension(1) :: CWORK_tmp

  INTEGER         , allocatable  :: IWORK(:)
  DOUBLE PRECISION, allocatable  :: RWORK(:)
  COMPLEX(dp)     , allocatable  :: CWORK(:)


  INTEGER,          allocatable :: ICLUSTR(:)
  INTEGER,          allocatable :: IFAIL(:)
  DOUBLE PRECISION, allocatable :: GAP(:)

  DOUBLE PRECISION            :: ABSTOL,ORFAC
  INTEGER         , PARAMETER :: IZERO=0

  INTEGER ::  M,NZ,IA,JA,IZ,JZ,ierr,TWORK_tmp(3),TWORK(3)

  DOUBLE PRECISION :: PDLAMCH
  EXTERNAL PDLAMCH

  ! Initialisation
  INFO   = 0   
  IWORK(:) = 0
  RWORK(:) = 0._dp
  CWORK(:) = (0._dp,0._dp)

  ABSTOL = PDLAMCH(processor%grid%ictxt,'U')
  ORFAC  = -1.D+0

  ! Allocate the arrays for the results of the calculation
  allocate(IFAIL  (matrix1%sizeb_global(2)))
  allocate(ICLUSTR(2*processor%grid%dims(1)*processor%grid%dims(2)))
  allocate(GAP    (  processor%grid%dims(1)*processor%grid%dims(2)))

  ! Get the size of the work arrays
  if (istwf_k/=2) then
     call PZHEGVX(1,'V','A','U',&
          &      matrix1%sizeb_global(2),&
          &      matrix1%buffer_cplx,1,1,matrix1%descript%tab, &
          &      matrix2%buffer_cplx,1,1,matrix2%descript%tab, &
          &      ZERO,ZERO,IZERO,IZERO,ABSTOL,&
          &      m,nz,eigen,ORFAC, &
          &      results%buffer_cplx,1,1,results%descript%tab, &
          &      CWORK_tmp,-1,RWORK_tmp,-1,IWORK_tmp,-1,&
          &      IFAIL,ICLUSTR,GAP,INFO)
  else
     call PDSYGVX(1,'V','A','U',&
          &      matrix1%sizeb_global(2),&
          &      matrix1%buffer_real,1,1,matrix1%descript%tab, &
          &      matrix2%buffer_real,1,1,matrix2%descript%tab, &
          &      ZERO,ZERO,IZERO,IZERO,ABSTOL,&
          &      m,nz,eigen,ORFAC, &
          &      results%buffer_real,1,1,results%descript%tab, &
          &      RWORK_tmp,-1,IWORK_tmp,-1,&
          &      IFAIL,ICLUSTR,GAP,INFO)
  endif

  if (INFO/=0) then
     write(6,'(A,I6)') "Problem to compute workspace to use ScaLAPACK, INFO=",INFO
     !call leave_new('PERS')
  endif

  TWORK_tmp(1) = IWORK_tmp(1)
  TWORK_tmp(2) = INT(RWORK_tmp(1)) + matrix1%sizeb_global(2) *(matrix1%sizeb_global(2)-1)
  TWORK_tmp(3) = INT(REAL(CWORK_tmp(1)))
 
 ! Get the maximum of sizes of the work arrays processor%comm 
  call MPI_ALLREDUCE(TWORK_tmp,TWORK,3,MPI_INTEGER,MPI_MAX,communicator,ierr)

  LIWORK = TWORK(1)
  LRWORK = TWORK(2)
  LCWORK = TWORK(3)

 ! Allocate the work arrays
  if (LIWORK>0) allocate(IWORK(LIWORK))
  if (LRWORK>0) allocate(RWORK(LRWORK))
  if (LCWORK>0) allocate(CWORK(LCWORK))

  ! Call the calculation routine 
  if (istwf_k/=2) then
     !write(6,*) 'I am using PZHEGVX'
     call PZHEGVX(1,'V','A','U',&
          &      matrix1%sizeb_global(2),&
          &      matrix1%buffer_cplx,1,1,matrix1%descript%tab, &
          &      matrix2%buffer_cplx,1,1,matrix2%descript%tab, &
          &      ZERO,ZERO,IZERO,IZERO,ABSTOL,&
          &      m,nz,eigen,ORFAC, &
          &      results%buffer_cplx,1,1,results%descript%tab, &
          &      CWORK,LCWORK,RWORK,LRWORK,IWORK,LIWORK,&
          &      IFAIL,ICLUSTR,GAP,INFO)
  else
     !write(6,*) 'I am using PDSYGVX'
     call PDSYGVX(1,'V','A','U',&
          &      matrix1%sizeb_global(2),&
          &      matrix1%buffer_real,1,1,matrix1%descript%tab, &
          &      matrix2%buffer_real,1,1,matrix2%descript%tab, &
          &      ZERO,ZERO,IZERO,IZERO,ABSTOL,&
          &      m,nz,eigen,ORFAC, &
          &      results%buffer_real,1,1,results%descript%tab, &
          &      RWORK,LRWORK,IWORK,LIWORK,&
          &      IFAIL,ICLUSTR,GAP,INFO)
  endif
  
  if (INFO/=0) then
     write(6,'(A,I6)') "Problem to compute eigen problem with ScaLAPACK, INFO=",INFO
     !call leave_new('PERS')
  endif

  deallocate(IFAIl,ICLUSTR,GAP)
  if (allocated(IWORK)) deallocate(IWORK)
  if (allocated(RWORK)) deallocate(RWORK)
  if (allocated(CWORK)) deallocate(CWORK)

end subroutine compute_eigen_problem
!!***

!----------------------------------------------------------------------

!!****f* ABINIT/slk_pzheev
!! NAME
!! slk_pzheev
!!
!! FUNCTION
!!  slk_pzheev provides an object-oriented interface to the ScaLAPACK routine PHZEEV which computes selected  
!!  eigenvalues and, optionally, eigenvectors of an Hermitian matrix A.
!!   A * X = lambda * X
!!
!! INPUTS
!!
!!  JOBZ    (global input) CHARACTER*1
!!          Specifies whether or not to compute the eigenvectors:
!!          = "N":  Compute eigenvalues only.
!!          = "V":  Compute eigenvalues and eigenvectors.
!!  UPLO    (global input) CHARACTER*1
!!          Specifies whether the upper or lower triangular part of the symmetric matrix A is stored:
!!          = "U":  Upper triangular
!!          = "L":  Lower triangular
!!
!!  Slk_mat<type(matrix_scalapack)>=The object storing the local buffer, the array descriptor, the context 
!!    and other quantities needed to call ScaLAPACK routines.
!!  Slk_vec<matrix_scalapack>=The distributed eigenvectors. Not referenced if JOBZ="N"
!!
!! OUTPUT 
!!  W       (global output) DOUBLE PRECISION array, dimension (N) where N is the rank of the global matrix.
!!          On normal exit, the first M entries contain the selected eigenvalues in ascending order.
!!
!! SIDE EFFECTS
!!  If JOBZ="V", the local buffer Slk_vec%buffer_cplx will contain part of the distributed eigenvectors.
!!
!! PARENTS
!!      exceig,m_abilasi
!!
!! CHILDREN
!!
!! SOURCE

subroutine slk_pzheev(jobz,uplo,Slk_mat,Slk_vec,w)

 use defs_basis
 use defs_scalapack
 use m_errors 

 implicit none

!Arguments ------------------------------------
!scalars
 character(len=*),intent(in) :: jobz,uplo 
 type(matrix_scalapack),intent(inout) :: Slk_mat
 type(matrix_scalapack),intent(inout) :: Slk_vec 
!arrays
 real(dp),intent(out) :: w(:)

!Local variables ------------------------------
!scalars
 integer :: lwork,lrwork,info
 character(len=500) :: msg
!arrays
 real(dp),allocatable :: rwork(:)
 complex(dpc),allocatable :: work(:)

!************************************************************************

       ABI_CHECK(associated(Slk_mat%buffer_cplx),"buffer_cplx not associated")

!      Get optimal size of workspace.
       lwork=-1; lrwork=-1
       allocate(work(1),rwork(1))

       call PZHEEV(jobz,uplo,Slk_mat%sizeb_global(2),Slk_mat%buffer_cplx,1,1,Slk_mat%descript%tab, &
&       w,Slk_vec%buffer_cplx,1,1,Slk_vec%descript%tab,work,lwork,rwork,lrwork,info)

       ABI_CHECK(info==0,"Error during the calculation of the workspace size")

       lwork = NINT(REAL(work(1))); lrwork= NINT(rwork(1))*2  ! Don't know why but rwork(1) is not enough!
!      write(*,*)lwork,lrwork
       deallocate(work,rwork)

!      Solve the problem.
       allocate(work(lwork),rwork(lrwork))

       call PZHEEV(jobz,uplo,Slk_mat%sizeb_global(2),Slk_mat%buffer_cplx,1,1,Slk_mat%descript%tab, &
&       w,Slk_vec%buffer_cplx,1,1,Slk_vec%descript%tab,work,lwork,rwork,lrwork,info)

       if (info/=0) then
         write(msg,'(a,i7)')" PZHEEV returned info= ",info
         MSG_PERS_ERROR(msg)
       end if

       deallocate(work,rwork)

     end subroutine slk_pzheev
!!***

!----------------------------------------------------------------------

!!****f* ABINIT/slk_pzheevx
!! NAME
!!  slk_pzheevx
!!
!! FUNCTION
!!  slk_pzheevx provides an object-oriented interface to the ScaLAPACK routine PZHEEVX that 
!!  computes selected eigenvalues and, optionally, eigenvectors of a complex hermitian matrix A.
!!   A * X = lambda *  X
!!
!! INPUTS
!!  Slk_mat<matrix_scalapack>=ScaLAPACK matrix (matrix A)
!!
!!  Slk_vec<matrix_scalapack>=The distributed eigenvectors X. Not referenced if JOBZ="N"
!!
!!  JOBZ    (global input) CHARACTER*1
!!          Specifies whether or not to compute the eigenvectors:
!!          = "N":  Compute eigenvalues only.
!!          = "V":  Compute eigenvalues and eigenvectors.
!!                                                                                                    
!!  RANGE   (global input) CHARACTER*1
!!          = "A": all eigenvalues will be found.
!!          = "V": all eigenvalues in the interval [VL,VU] will be found.
!!          = "I": the IL-th through IU-th eigenvalues will be found.
!!                                                                                                    
!!  UPLO    (global input) CHARACTER*1
!!          Specifies whether the upper or lower triangular part of the Hermitian matrix A is stored:
!!          = "U":  Upper triangular
!!          = "L":  Lower triangular
!!
!!  VL      (global input) DOUBLE PRECISION
!!          If  RANGE="V",the lower bound of the interval to be searched for eigenvalues. Not referenced if RANGE =
!!          "A" or "I"
!!                                                                                                                      
!!  VU      (global input) DOUBLE PRECISION
!!          If RANGE="V", the upper bound of the interval to be searched for eigenvalues.  Not referenced  if  RANGE  =
!!          "A" or "I".
!!
!!  IL     (global input) INTEGER
!!         If  RANGE="I",  the  index  (from smallest to largest) of the smallest eigenvalue to be returned.  IL >= 1.
!!         Not referenced if RANGE = "A" or "V".
!!
!!  IU     (global input) INTEGER
!!         If RANGE="I", the index (from smallest to largest) of the largest eigenvalue to be returned.  min(IL,N)  <=
!!         IU <= N.  Not referenced if RANGE = "A" or "V"
!!
!!  ABSTOL  (global input) DOUBLE PRECISION
!!          If JOBZ="V", setting ABSTOL to PDLAMCH( CONTEXT, "U") yields the most orthogonal eigenvectors.
!!          The  absolute error tolerance for the eigenvalues.  An approximate eigenvalue is accepted as converged when
!!          it is determined to lie in an interval [a,b] of width less than or equal to
!!
!!           ABSTOL + EPS *   max( |a|,|b| ) ,
!!
!!          where EPS is the machine precision.  If ABSTOL is less than or equal to zero, then EPS*norm(T) will be used
!!          in  its  place, where norm(T) is the 1-norm of the tridiagonal matrix obtained by reducing A to tridiagonal form.
!!          Eigenvalues will be computed  most  accurately  when  ABSTOL  is  set  to  twice  the  underflow  threshold
!!          2*PDLAMCH("S")  not  zero.   If  this routine returns with ((MOD(INFO,2).NE.0) .OR.  (MOD(INFO/8,2).NE.0)),
!!          indicating that some eigenvalues or eigenvectors did not converge, try setting ABSTOL to 2*PDLAMCH("S").
!!
!! OUTPUT
!!  mene_found= (global output) Total number of eigenvalues found.  0 <= mene_found <= N.
!!                                                                                                                      
!!  eigen(N)= (global output) Eigenvalues of A where N is the dimension of M
!!            On normal exit, the first mene_found entries contain the selected eigenvalues in ascending order.
!!
!! SIDE EFFECTS
!!  If JOBZ="V", the local buffer Slk_vec%buffer_cplx will contain part of the distributed eigenvectors.
!!
!!  Slk_mat<ScaLAPACK_matrix>=
!!    %buffer_cplx is destroyed when the routine returns
!!
!! PARENTS
!!      exceig,m_abilasi
!!
!! CHILDREN
!!
!! SOURCE

subroutine slk_pzheevx(jobz,range,uplo,Slk_mat,vl,vu,il,iu,abstol,Slk_vec,mene_found,eigen)

 use defs_basis
 use defs_scalapack
 use m_errors

 use m_fstrings,  only : starts_with

#if defined HAVE_MPI && defined HAVE_MPI2
 use mpi
#endif

!This section has been created automatically by the script Abilint (TD).
!Do not modify the following lines by hand.
 use interfaces_14_hidewrite
!End of the abilint section

 implicit none

#if defined HAVE_MPI && defined HAVE_MPI1
 include 'mpif.h'
#endif

!Arguments ------------------------------------
 integer,intent(in) :: il,iu
 integer,intent(out) :: mene_found
 real(dp),intent(in) :: abstol,vl,vu
 character(len=*),intent(in) :: jobz,range,uplo 
 type(matrix_scalapack),intent(inout) :: Slk_mat
 type(matrix_scalapack),intent(inout) :: Slk_vec
!arrays
 real(dp),intent(out) :: eigen(:)

!Local variables-------------------------------
!scalars
 integer  :: lwork,lrwork,liwork,info
 integer ::  nvec_calc,ierr 
 real(dp) :: orfac
 !real(dp),external :: PDLAMCH
 character(len=500) :: msg
!arrays
 integer :: ibuff(3),max_ibuff(3)
 integer,allocatable  :: iwork(:),iclustr(:),ifail(:)
 real(dp),allocatable  :: rwork(:),gap(:)
 complex(dpc),allocatable :: work(:)

!************************************************************************

!      abstol = PDLAMCH(Slk_vecprocessor%grid%ictxt,'U')

       orfac  = -one ! Only for eigenvectors: use default value 10d-3.
!      Vectors within orfac*norm(A) will be reorthogonalized. 

!      Allocate the arrays for the results of the calculation
       allocate(gap( Slk_mat%processor%grid%dims(1) * Slk_mat%processor%grid%dims(2)))

       if (starts_with(jobz,(/"V","v"/))) then 
         allocate(ifail(Slk_mat%sizeb_global(2)))
         allocate(iclustr( 2*Slk_mat%processor%grid%dims(1) * Slk_mat%processor%grid%dims(2)))
       end if
!      
!      Get the optimal size of the work arrays.
       lwork=-1; lrwork=-1; liwork=-1
       allocate(work(1),iwork(1))   
       allocate(rwork(3))  ! NB: In ScaLAPACK v1.7 dimension of rwork is max(3,lrwork)
!      This is clearly seen in the source in which rwork(1:3) is accessed  
!      during the calcuation of the workspace size.

       call PZHEEVX(jobz,range,uplo, Slk_mat%sizeb_global(2),Slk_mat%buffer_cplx,1,1,Slk_mat%descript%tab,&
&       vl,vu,il,iu,abstol,mene_found,nvec_calc,eigen,orfac,&
&       Slk_vec%buffer_cplx,1,1,Slk_vec%descript%tab,&
&       work,lwork,rwork,lrwork,iwork,liwork,ifail,iclustr,gap,info)

       write(msg,'(a,i7)')" Problem to compute workspace to use ScaLAPACK, info: ",info
       ABI_CHECK(info==0,msg)

       lwork  = NINT(REAL(work(1)),kind=dp)
       lrwork = NINT(rwork(1))
       liwork = iwork(1)

!      write(*,*)"After workspace1",lwork,lrwork,liwork
!      call MPI_BARRIER(MPI_COMM_WORLD,ierr)

       deallocate(work,rwork,iwork)
!      
!      FROM THE SCALAPACK MAN PAGE:
!      The computed eigenvectors may not be orthogonal if the minimal workspace is supplied and ORFAC is too
!      small. If you  want to guarantee orthogonality (at the cost of potentially poor performance) you should
!      add the following to LRWORK: (CLUSTERSIZE-1)*N where CLUSTERSIZE is  the  number  of  eigenvalues  in  the
!      largest cluster, where a cluster is defined as a set of close eigenvalues: { W(K),...,W(K+CLUSTERSIZE-1) |
!      W(J+1) <= W(J) + ORFAC*2*norm(A) }. 

       if ( starts_with(jobz,(/"V","v"/)) ) then 
         lrwork = INT( lrwork + Slk_mat%sizeb_global(2) *(Slk_mat%sizeb_global(2)-1) )
       end if

!      ibuff(1) = lwork
!      ibuff(2) = lrwork !INT(lrwork + Slk_mat%sizeb_global(2) *(Slk_mat%sizeb_global(2)-1)
!      ibuff(3) = liwork
       
!      Get the maximum of sizes of the work arrays processor%comm 
!      call MPI_ALLREDUCE(ibuff,max_ibuff,3,MPI_INTEGER,MPI_MAX,communicator,ierr)

!      lwork  = max_ibuff(1)
!      lrwork = max_ibuff(2)
!      liwork = max_ibuff(3)

!      write(*,*)"After workspace2",lwork,lrwork,liwork
!      call MPI_BARRIER(MPI_COMM_WORLD,ierr)

       allocate(work (lwork ))
       allocate(rwork(lrwork))
       allocate(iwork(liwork))
!      
!      Call the scaLAPACK routine. 

!      write(std_out,*) 'I am using PZHEEVX'
       call PZHEEVX(jobz,range,uplo, Slk_mat%sizeb_global(2),Slk_mat%buffer_cplx,1,1,Slk_mat%descript%tab,&
&       vl,vu,il,iu,abstol,mene_found,nvec_calc, eigen,orfac,&
&       Slk_vec%buffer_cplx,1,1,Slk_vec%descript%tab,&
&       work,lwork,rwork,lrwork,iwork,liwork,ifail,iclustr,gap,info)

!      Handle the possible error.
       if (info < 0) then 
         write(msg,'(a,i7,a)')" The ",-info,"-th argument of PZHEEVX had an illegal value."
         if (info==-25) msg = " LRWORK is too small to compute all the eigenvectors requested, no computation is performed" 
         MSG_PERS_ERROR(msg)
       end if

       if (info > 0) then
         write(msg,'(a,i7)') " PZHEEVX returned info: ",info
         call wrtout(std_out,msg,"PERS")
         if (MOD(info,2)/=0)then
           write(msg,'(3a)')&
&           " One or more eigenvectors failed to converge. ",ch10,&
&           " Their indices are stored in IFAIL. Ensure ABSTOL=2.0*PDLAMCH('U')"
           call wrtout(std_out,msg,"PERS")
         end if
         if (MOD(info/2,2)/=0) then
           write(msg,'(5a)')&
&           " Eigenvectors corresponding to one or more clusters of eigenvalues ",ch10,&
&           " could not be reorthogonalized because of insufficient workspace. ",ch10,&
&           " The indices of the clusters are stored in the array ICLUSTR."
           call wrtout(std_out,msg,"PERS")
         end if
         if (MOD(info/4,2)/=0) then
           write(msg,'(3a)')" Space limit prevented PZHEEVX from computing all of the eigenvectors between VL and VU. ",ch10,&
&           " The number of eigenvectors  computed  is returned in NZ."
           call wrtout(std_out,msg,"PERS")
         end if
         if (MOD(info/8,2)/=0) then 
           msg = " PZSTEBZ  failed to compute eigenvalues. Ensure ABSTOL=2.0*PDLAMCH('U')"
           call wrtout(std_out,msg,"PERS")
         end if
         MSG_PERS_ERROR("Cannot continue")
       end if

!      Check the number of eigenvalues found wrt to the number of vectors calculated.
       if ( starts_with(jobz,(/'V','v'/)) .and. mene_found/=nvec_calc) then
         write(msg,'(5a)')&
&         " The user supplied insufficient space and PZHEEVX is not able to detect this before beginning computation. ",ch10,&
&         " To get all the  eigenvectors requested, the user must supply both sufficient space to hold the ",ch10,&
&         " eigenvectors in Z (M .LE. DESCZ(N_)) and sufficient workspace to compute them. "
         MSG_PERS_ERROR(msg)
       end if

       deallocate(work,rwork,iwork)
       deallocate(gap)

       if ( starts_with(jobz,(/"V","v"/)) ) then 
         deallocate(ifail,iclustr)
       end if

     end subroutine slk_pzheevx
!!***

!----------------------------------------------------------------------

!!****f* ABINIT/slk_pzhegvx
!! NAME
!!  slk_pzhegvx
!!
!! FUNCTION
!!  slk_pzhegvx provides an object-oriented interface to the ScaLAPACK routine PZHEGVX that 
!!  computes selected eigenvalues and, optionally, eigenvectors of a complex generalized 
!!  Hermitian-definite eigenproblem, of the form 
!!  sub( A )*x=(lambda)*sub( B )*x,  sub( A )*sub( B )x=(lambda)*x,  or sub( B )*sub( A )*x=(lambda)*x.
!!  Here sub( A ) denoting A( IA:IA+N-1, JA:JA+N-1 ) is assumed to be
!!  Hermitian, and sub( B ) denoting B( IB:IB+N-1, JB:JB+N-1 ) is assumed
!!  to be Hermitian positive definite.
!!
!! INPUTS
!!  Slk_matA<matrix_scalapack>=ScaLAPACK matrix (matrix A)
!!  Slk_matB<matrix_scalapack>=ScaLAPACK matrix (matrix B)
!!  Slk_vec<matrix_scalapack>=The distributed eigenvectors X. Not referenced if JOBZ="N"
!!
!!  IBTYPE   (global input) INTEGER
!!          Specifies the problem type to be solved:
!!          = 1:  sub( A )*x = (lambda)*sub( B )*x
!!          = 2:  sub( A )*sub( B )*x = (lambda)*x
!!          = 3:  sub( B )*sub( A )*x = (lambda)*x
!!
!!  JOBZ    (global input) CHARACTER*1
!!          Specifies whether or not to compute the eigenvectors:
!!          = "N":  Compute eigenvalues only.
!!          = "V":  Compute eigenvalues and eigenvectors.
!!                                                                                                    
!!  RANGE   (global input) CHARACTER*1
!!          = "A": all eigenvalues will be found.
!!          = "V": all eigenvalues in the interval [VL,VU] will be found.
!!          = "I": the IL-th through IU-th eigenvalues will be found.
!!                                                                                                    
!!  UPLO    (global input) CHARACTER*1
!!          Specifies whether the upper or lower triangular part of the Hermitian matrix sub(A) and sub(B) is stored:
!!          = "U":  Upper triangular
!!          = "L":  Lower triangular
!!
!!  VL      (global input) DOUBLE PRECISION
!!          If  RANGE="V",the lower bound of the interval to be searched for eigenvalues. Not referenced if RANGE =
!!          "A" or "I"
!!                                                                                                                      
!!  VU      (global input) DOUBLE PRECISION
!!          If RANGE="V", the upper bound of the interval to be searched for eigenvalues.  Not referenced  if  RANGE  =
!!          "A" or "I".
!!
!!  IL     (global input) INTEGER
!!         If  RANGE="I",  the  index  (from smallest to largest) of the smallest eigenvalue to be returned.  IL >= 1.
!!         Not referenced if RANGE = "A" or "V".
!!
!!  IU     (global input) INTEGER
!!         If RANGE="I", the index (from smallest to largest) of the largest eigenvalue to be returned.  min(IL,N)  <=
!!         IU <= N.  Not referenced if RANGE = "A" or "V"
!!
!!  ABSTOL  (global input) DOUBLE PRECISION
!!          If JOBZ="V", setting ABSTOL to PDLAMCH( CONTEXT, "U") yields the most orthogonal eigenvectors.
!!          The  absolute error tolerance for the eigenvalues.  An approximate eigenvalue is accepted as converged when
!!          it is determined to lie in an interval [a,b] of width less than or equal to
!!
!!           ABSTOL + EPS *   max( |a|,|b| ) ,
!!
!!          where EPS is the machine precision.  If ABSTOL is less than or equal to zero, then EPS*norm(T) will be used
!!          in  its  place, where norm(T) is the 1-norm of the tridiagonal matrix obtained by reducing A to tridiagonal form.
!!          Eigenvalues will be computed  most  accurately  when  ABSTOL  is  set  to  twice  the  underflow  threshold
!!          2*PDLAMCH("S")  not  zero.   If  this routine returns with ((MOD(INFO,2).NE.0) .OR.  (MOD(INFO/8,2).NE.0)),
!!          indicating that some eigenvalues or eigenvectors did not converge, try setting ABSTOL to 2*PDLAMCH("S").
!!
!! OUTPUT
!!  mene_found= (global output) Total number of eigenvalues found.  0 <= mene_found <= N.
!!                                                                                                                      
!!  eigen(N)= (global output) Eigenvalues of A where N is the dimension of M
!!            On normal exit, the first mene_found entries contain the selected eigenvalues in ascending order.
!!
!! SIDE EFFECTS
!!  Slk_vec<matrix_scalapack>:
!!   %buffer_cplx local output (global dimension (N,N)
!!     If JOBZ = 'V', then on normal exit the first M columns of Z
!!     contain the orthonormal eigenvectors of the matrix
!!     corresponding to the selected eigenvalues.  
!!     If JOBZ = 'N', then Z is not referenced.
!!
!!  Slk_matA<matrix_scalapack>:
!!    %buffer_cplx 
!!      (local input/local output) COMPLEX(DPC) pointer into the
!!      local memory to an array of dimension (LLD_A, LOCc(JA+N-1)).
!!      On entry, this array contains the local pieces of the
!!      N-by-N Hermitian distributed matrix sub( A ). If UPLO = 'U',
!!      the leading N-by-N upper triangular part of sub( A ) contains
!!      the upper triangular part of the matrix.  If UPLO = 'L', the
!!      leading N-by-N lower triangular part of sub( A ) contains
!!      the lower triangular part of the matrix.
!!
!!      On exit, if JOBZ = 'V', then if INFO = 0, sub( A ) contains
!!      the distributed matrix Z of eigenvectors.  The eigenvectors
!!      are normalized as follows:
!!      if IBTYPE = 1 or 2, Z**H*sub( B )*Z = I;
!!      if IBTYPE = 3, Z**H*inv( sub( B ) )*Z = I.
!!      If JOBZ = 'N', then on exit the upper triangle (if UPLO='U')
!!      or the lower triangle (if UPLO='L') of sub( A ), including
!!      the diagonal, is destroyed.
!!
!!  Slk_matB=
!!    %buffer_cplx 
!!      (local input/local output) COMPLEX*(DPC) pointer into the
!!      local memory to an array of dimension (LLD_B, LOCc(JB+N-1)).
!!      On entry, this array contains the local pieces of the
!!      N-by-N Hermitian distributed matrix sub( B ). If UPLO = 'U',
!!      the leading N-by-N upper triangular part of sub( B ) contains
!!      the upper triangular part of the matrix.  If UPLO = 'L', the
!!      leading N-by-N lower triangular part of sub( B ) contains
!!      the lower triangular part of the matrix.
!!
!!      On exit, if INFO <= N, the part of sub( B ) containing the
!!      matrix is overwritten by the triangular factor U or L from
!!      the Cholesky factorization sub( B ) = U**H*U or
!!      sub( B ) = L*L**H.
!!
!! PARENTS
!!
!! CHILDREN
!!
!! SOURCE

subroutine slk_pzhegvx(ibtype,jobz,range,uplo,Slk_matA,Slk_matB,vl,vu,il,iu,abstol,Slk_vec,mene_found,eigen)

 use defs_basis
 use defs_scalapack
 use m_errors

 use m_fstrings,  only : starts_with

#if defined HAVE_MPI && defined HAVE_MPI2
 use mpi
#endif

!This section has been created automatically by the script Abilint (TD).
!Do not modify the following lines by hand.
 use interfaces_14_hidewrite
!End of the abilint section

 implicit none

#if defined HAVE_MPI && defined HAVE_MPI1
 include 'mpif.h'
#endif

!Arguments ------------------------------------
 integer,intent(in) :: il,iu,ibtype
 integer,intent(out) :: mene_found
 real(dp),intent(in) :: abstol,vl,vu
 character(len=*),intent(in) :: jobz,range,uplo 
 type(matrix_scalapack),intent(inout) :: Slk_matA
 type(matrix_scalapack),intent(inout) :: Slk_matB
 type(matrix_scalapack),intent(inout) :: Slk_vec
!arrays
 real(dp),intent(out) :: eigen(:)

!Local variables-------------------------------
!scalars
 integer  :: lwork,lrwork,liwork,info
 integer ::  nvec_calc,ierr 
 real(dp) :: orfac
 logical :: ltest
 !real(dp),external :: PDLAMCH
 character(len=500) :: msg
!arrays
 integer :: ibuff(3),max_ibuff(3)
 integer :: desca(DLEN_),descb(DLEN_),descz(DLEN_)
 integer,allocatable  :: iwork(:),iclustr(:),ifail(:)
 real(dp),allocatable  :: rwork(:),gap(:)
 complex(dpc),allocatable :: work(:)

!************************************************************************

!      abstol = PDLAMCH(Slk_vecprocessor%grid%ictxt,'U')

       orfac  = -one ! Only for eigenvectors: use default value 10d-3.
!      Vectors within orfac*norm(A) will be reorthogonalized. 

!      ======================
!      Alignment requirements
!      ======================
!      The distributed submatrices A(IA:*, JA:*), C(IC:IC+M-1,JC:JC+N-1),
!      and B( IB:IB+N-1, JB:JB+N-1 ) must verify some alignment properties,

       desca = Slk_matA%descript%tab
       descb = Slk_matB%descript%tab
       if (starts_with(jobz,(/"V","v"/))) then 
         descz = Slk_vec%descript%tab
       else 
         descz = Slk_matA%descript%tab
       end if
       
       ltest = .TRUE.
       ltest = ltest .and. (DESCA(MB_) == DESCA(NB_))
!      IA = IB = IZ
!      JA = IB = JZ
       ltest = ltest .and.ALL(DESCA(M_   )==(/DESCB(M_   ),DESCZ(M_   )/))
       ltest = ltest .and.ALL(DESCA(N_   )==(/DESCB(N_   ),DESCZ(N_   )/))
       ltest = ltest .and.ALL(DESCA(MB_  )==(/DESCB(MB_  ),DESCZ(MB_  )/))
       ltest = ltest .and.ALL(DESCA(NB_  )==(/DESCB(NB_  ),DESCZ(NB_  )/))
       ltest = ltest .and.ALL(DESCA(RSRC_)==(/DESCB(RSRC_),DESCZ(RSRC_)/))
       ltest = ltest .and.ALL(DESCA(CSRC_)==(/DESCB(CSRC_),DESCZ(CSRC_)/))
!      MOD( IA-1, DESCA( MB_ ) ) = 0
!      MOD( JA-1, DESCA( NB_ ) ) = 0
!      MOD( IB-1, DESCB( MB_ ) ) = 0
!      MOD( JB-1, DESCB( NB_ ) ) = 0

       if (.not.ltest) then
         msg = " Alignment requirements not satisfied, check the caller"
         MSG_PERS_ERROR(msg)
       end if

!      Allocate the arrays for the results of the calculation
       allocate(gap( Slk_matA%processor%grid%dims(1) * Slk_matA%processor%grid%dims(2)))

       if (starts_with(jobz,(/"V","v"/))) then 
         allocate(ifail(Slk_matA%sizeb_global(2)))
         allocate(iclustr( 2*Slk_matA%processor%grid%dims(1) * Slk_matA%processor%grid%dims(2)))
       else 
         allocate(ifail(1))
       end if
!      
!      Get the optimal size of the work arrays.
       lwork=-1; lrwork=-1; liwork=-1
       allocate(work(1),iwork(1))   
       allocate(rwork(3))  ! NB: In ScaLAPACK v1.7 dimension of rwork is max(3,lrwork)
!      This is clearly seen in the source in which rwork(1:3) is accessed  
!      during the calcuation of the workspace size.

       call pzhegvx(ibtype,jobz,range,uplo, Slk_matA%sizeb_global(2),Slk_matA%buffer_cplx,1,1,Slk_matA%descript%tab,&
&       Slk_matB%buffer_cplx,1,1,Slk_matB%descript%tab,&
&       vl,vu,il,iu,abstol,mene_found,nvec_calc,eigen,orfac,&
&       Slk_vec%buffer_cplx,1,1,Slk_vec%descript%tab,&
&       work,lwork,rwork,lrwork,iwork,liwork,ifail,iclustr,gap,info)

       write(msg,'(a,i7)')" Problem to compute workspace to use ScaLAPACK, info: ",info
       ABI_CHECK(info==0,msg)

       lwork  = NINT(REAL(work(1)),kind=dp)
       lrwork = NINT(rwork(1))
       liwork = iwork(1)

       deallocate(work,rwork,iwork)
!      
!      FROM THE SCALAPACK MAN PAGE:
!      The computed eigenvectors may not be orthogonal if the minimal workspace is supplied and ORFAC is too
!      small. If you  want to guarantee orthogonality (at the cost of potentially poor performance) you should
!      add the following to LRWORK: (CLUSTERSIZE-1)*N where CLUSTERSIZE is  the  number  of  eigenvalues  in  the
!      largest cluster, where a cluster is defined as a set of close eigenvalues: { W(K),...,W(K+CLUSTERSIZE-1) |
!      W(J+1) <= W(J) + ORFAC*2*norm(A) }. 

       if ( starts_with(jobz,(/"V","v"/)) ) then 
         lrwork = INT( lrwork + Slk_matA%sizeb_global(2) *(Slk_matA%sizeb_global(2)-1) )
       end if

!      ibuff(1) = lwork
!      ibuff(2) = lrwork !INT(lrwork + Slk_matA%sizeb_global(2) *(Slk_matA%sizeb_global(2)-1)
!      ibuff(3) = liwork
       
!      Get the maximum of sizes of the work arrays processor%comm 
!      call MPI_ALLREDUCE(ibuff,max_ibuff,3,MPI_INTEGER,MPI_MAX,communicator,ierr)

!      lwork  = max_ibuff(1)
!      lrwork = max_ibuff(2)
!      liwork = max_ibuff(3)

!      write(*,*)"After workspace2",lwork,lrwork,liwork
!      call MPI_BARRIER(MPI_COMM_WORLD,ierr)

       allocate(work (lwork ))
       allocate(rwork(lrwork))
       allocate(iwork(liwork))
!      
!      Call the scaLAPACK routine. 

!      write(std_out,*) 'I am using PZHEGVX'
       call pzhegvx(ibtype,jobz,range,uplo, Slk_matA%sizeb_global(2),Slk_matA%buffer_cplx,1,1,Slk_matA%descript%tab,&
&       Slk_matB%buffer_cplx,1,1,Slk_matB%descript%tab,&
&       vl,vu,il,iu,abstol,mene_found,nvec_calc, eigen,orfac,&
&       Slk_vec%buffer_cplx,1,1,Slk_vec%descript%tab,&
&       work,lwork,rwork,lrwork,iwork,liwork,ifail,iclustr,gap,info)

!      Handle the possible error.
       if (info < 0) then 
         write(msg,'(a,i7,a)')" The ",-info,"-th argument of PZHEGVX had an illegal value."
         if (info==-25) msg = " LRWORK is too small to compute all the eigenvectors requested, no computation is performed" 
         MSG_PERS_ERROR(msg)
       end if

       if (info > 0) then
         write(msg,'(a,i7)') " PZHEGVX returned info: ",info
         call wrtout(std_out,msg,"PERS")
         if (MOD(info,2)/=0)then
           write(msg,'(3a)')&
           " One or more eigenvectors failed to converge. ",ch10,&
&           " Their indices are stored in IFAIL. Ensure ABSTOL=2.0*PDLAMCH('U')"
           call wrtout(std_out,msg,"PERS")
         end if
         if (MOD(info/2,2)/=0) then
           write(msg,'(5a)')&
&           " Eigenvectors corresponding to one or more clusters of eigenvalues ",ch10,&
&           " could not be reorthogonalized because of insufficient workspace. ",ch10,&
&           " The indices of the clusters are stored in the array ICLUSTR."
           call wrtout(std_out,msg,"PERS")
         end if
         if (MOD(info/4,2)/=0) then
           write(msg,'(3a)')&
&           " Space limit prevented PZHEGVX from computing all of the eigenvectors between VL and VU. ",ch10,&
&           " The number of eigenvectors  computed  is returned in NZ."
           call wrtout(std_out,msg,"PERS")
         end if
         if (MOD(info/8,2)/=0) then 
           msg = " PZSTEBZ  failed to compute eigenvalues. Ensure ABSTOL=2.0*PDLAMCH('U')"
           call wrtout(std_out,msg,"PERS")
         end if
         if (MOD(info/16,2)/=0) then 
           write(msg,'(3a)')&
&           " B was not positive definite.",ch10,&
&           " IFAIL(1) indicates the order of the smallest minor which is not positive definite."
           call wrtout(std_out,msg,"PERS")
         end if 
         MSG_PERS_ERROR("Cannot continue")
       end if

!      Check the number of eigenvalues found wrt to the number of vectors calculated.
       if ( starts_with(jobz,(/'V','v'/)) .and. mene_found/=nvec_calc) then
         write(msg,'(5a)')&
&         " The user supplied insufficient space and PZHEGVX is not able to detect this before beginning computation. ",ch10,&
&         " To get all the  eigenvectors requested, the user must supply both sufficient space to hold the ",ch10,&
&         " eigenvectors in Z (M .LE. DESCZ(N_)) and sufficient workspace to compute them. "
         MSG_PERS_ERROR(msg)
       end if

       deallocate(work,rwork,iwork)
       deallocate(gap)

       if ( starts_with(jobz,(/"V","v"/)) ) then 
         deallocate(iclustr)
       end if
       deallocate(ifail)

     end subroutine slk_pzhegvx
!!***

!----------------------------------------------------------------------

!!****f* ABINIT/slk_zinvert
!! NAME
!! slk_zinvert
!!
!! FUNCTION
!!  slk_zinvert provides an object-oriented interface to the ScaLAPACK set of routines used to compute 
!!  the inverse of a complex matrix in double precision
!!
!! SIDE EFFECTS
!!  Slk_mat<type(matrix_scalapack)>=The object storing the local buffer, the array descriptor, the context 
!!    and other quantities needed to call ScaLAPACK routines. In input, the matrix to invert, in output
!!    the matrix inverted and distributed among the nodes.
!!
!! PARENTS
!!      exceig,m_abilasi
!!
!! CHILDREN
!!
!! SOURCE

subroutine slk_zinvert(Slk_mat)

 use defs_basis
 use defs_scalapack
 use m_errors

!This section has been created automatically by the script Abilint (TD).
!Do not modify the following lines by hand.
 use interfaces_51_manage_mpi, except_this_one => slk_zinvert
!End of the abilint section

 implicit none

!Arguments ------------------------------------
!scalars
 type(matrix_scalapack),intent(inout) :: Slk_mat

!Local variables ------------------------------
!scalars
 integer :: lwork,info !,istat
 integer :: ipiv_size,liwork
 character(len=500) :: msg
!array
 integer,allocatable :: ipiv(:),iwork(:)
 complex(dpc),allocatable :: work(:)

!************************************************************************

       ABI_CHECK(associated(Slk_mat%buffer_cplx),"buffer_cplx not associated")

!      IMPORTANT NOTE: PZGETRF requires square block decomposition i.e.,  MB_A = NB_A.
       if ( Slk_mat%descript%tab(MB_)/=Slk_mat%descript%tab(NB_) ) then
         msg =" PZGETRF requires square block decomposition i.e.,  MB_A = NB_A."
         MSG_PERS_ERROR(msg)
       end if

       ipiv_size = my_locr(Slk_mat) + Slk_mat%descript%tab(MB_)  
       allocate(ipiv(ipiv_size))

       call PZGETRF(Slk_mat%sizeb_global(1),Slk_mat%sizeb_global(2),Slk_mat%buffer_cplx,&
&       1,1,Slk_mat%descript%tab,ipiv,info) ! P * L * U  Factorization.

       if (info/=0) then 
         write(msg,'(a,i7)')" PZGETRF returned info= ",info
         MSG_PERS_ERROR(msg)
       end if

!      Get optimal size of workspace for PZGETRI.
       lwork=-1; liwork=-1
       allocate(work(1),iwork(1))

       call PZGETRI(Slk_mat%sizeb_global(1),Slk_mat%buffer_cplx,1,1,Slk_mat%descript%tab,ipiv,&
&       work,lwork,iwork,liwork,info)

       ABI_CHECK(info==0,"PZGETRI: Error during compuation of workspace size")

       lwork = NINT(REAL(work(1))); liwork=iwork(1)
       deallocate(work,iwork)

!      Solve the problem.
       allocate(work(lwork),iwork(liwork))

       call PZGETRI(Slk_mat%sizeb_global(1),Slk_mat%buffer_cplx,1,1,Slk_mat%descript%tab,ipiv,&
&       work,lwork,iwork,liwork,info)

       if (info/=0) then 
         write(msg,'(a,i7)')" PZGETRI returned info= ",info
         MSG_PERS_ERROR(msg)
       end if

       deallocate(work,iwork)
       deallocate(ipiv)

     end subroutine slk_zinvert
!!***

!----------------------------------------------------------------------

!!****f* ABINIT/slk_write
!! NAME
!!  slk_write
!!
!! FUNCTION
!!  Routine to write a squared scaLAPACK distributed matrix on an external file using MPI-IO.
!!
!! INPUTS
!!  Slk_mat<matrix_scalapack>=Structured datatype defining the scaLAPACK distribution with the local buffer 
!!    containing the distributed matrix.
!!  uplo=String specifying whether only the upper or lower triangular part of the global matrix is used:
!!    = "U":  Upper triangular
!!    = "L":  Lower triangular
!!    = "A":  Full matrix (used for general complex matrices)
!!  fname=The name of the external file to be written.
!!  use_mpiio=if .TRUE. MPI-IO routines are used.
!!  [offset]=Offset, default is zero.
!!  [flags]=MPI-IO flags used to open the file in MPI_FILE_OPEN.
!!    Default is MPI_MODE_CREATE + MPI_MODE_WRONLY + MPI_MODE_EXCL.
!!
!! OUTPUT
!!  Only writing. The global scaLAPACK matrix is written on file fname.
!!  The file is opened and closed inside the routine. Any exception is fatal.
!!
!! TODO 
!!  * Check the portability of Offset if Fortran direct access is used. On some architectures indeed
!!    recl4dpc might be given in words instead of bytes
!!  * Generalize the implementation adding the writing the real buffer.
!!
!! PARENTS
!!      exceig
!!
!! CHILDREN
!!
!! SOURCE

subroutine slk_write(Slk_mat,uplo,fname,use_mpiio,offset,flags)

 use defs_basis
 use defs_scalapack
 use m_errors

 use m_io_tools,    only : get_unit

#if defined HAVE_MPI && defined HAVE_MPI2
 use mpi
#endif

!This section has been created automatically by the script Abilint (TD).
!Do not modify the following lines by hand.
 use interfaces_51_manage_mpi, except_this_one => slk_write
!End of the abilint section

 implicit none

#if defined HAVE_MPI && defined HAVE_MPI1
 include 'mpif.h'
#endif

!Arguments ------------------------------------
!scalars
 integer,optional,intent(in) :: flags
 integer(kind=i8b),optional,intent(in) :: offset
 logical,intent(in) :: use_mpiio
 character(len=*),intent(in) :: fname 
 character(len=*),intent(in) :: uplo
 type(matrix_scalapack),intent(in) :: Slk_mat

!Local variables ------------------------------
!scalars
 integer :: jloc,iloc,iglob,jglob,nrows_glob,ij_glob,ijp_glob
 integer :: tmp_unt,recl4dpc,ios
 integer(kind=i8b) :: srec
#if defined HAVE_MPI_IO
 integer :: spaceComm,my_flags,fh,buffer_size,filetype,ierr,ilen,ierr2,ij_loc,istat
 integer(kind=MPI_OFFSET_KIND) :: my_offset
 character(len=5000) :: mpi_msg_error
!arrays
 integer,allocatable :: map(:)
 complex(dpc),allocatable :: buffer1_cplx(:)
#else 
 integer(kind=i8b) :: my_offset
#endif
 character(len=500) :: msg

!************************************************************************

       ABI_CHECK(associated(Slk_mat%buffer_cplx),"%buffer_cplx not associated")

       if (use_mpiio) then
#ifdef HAVE_MPI_IO
         spaceComm = Slk_mat%processor%comm

!        Open the file.
!        my_flags=MPI_MODE_CREATE + MPI_MODE_WRONLY + MPI_MODE_EXCL ! MPI_MODE_APPEND
!        my_flags=MPI_MODE_CREATE + MPI_MODE_WRONLY + MPI_MODE_EXCL + MPI_MODE_APPEND
         my_flags=MPI_MODE_CREATE + MPI_MODE_WRONLY + MPI_MODE_APPEND
         if (PRESENT(flags)) my_flags = flags
         
         call MPI_FILE_OPEN(spaceComm, fname, my_flags, MPI_INFO_NULL, fh, ierr)
         msg = " MPI_FILE_OPEN "//TRIM(fname)
         ABI_CHECK_MPI(ierr,msg)

!        Define the mapping between the scaLAPACK buffer and the storage on file.
         nrows_glob=Slk_mat%sizeb_global(1)
         buffer_size= PRODUCT(Slk_mat%sizeb_local(1:2))
         allocate(map(buffer_size))

         select case (uplo(1:1))

           case ("A","a") ! The entire global matrix is written.
             ij_loc=0
             do jloc=1,Slk_mat%sizeb_local(2) 
               do iloc=1,Slk_mat%sizeb_local(1) 
                 call idx_glob(Slk_mat,iloc,jloc,iglob,jglob)
                 ij_glob = iglob + (jglob-1)*nrows_glob
                 ij_loc  = ij_loc+1
                 map(ij_loc) = ij_glob-1  ! C indexing
               end do
             end do

           case ("U","u") ! Only the upper triangle of the global matrix is written.

             allocate(buffer1_cplx(buffer_size), STAT=istat)

             ij_loc=0
             do jloc=1,Slk_mat%sizeb_local(2) 
               do iloc=1,Slk_mat%sizeb_local(1) 
                 call idx_glob(Slk_mat,iloc,jloc,iglob,jglob)
                 if (jglob>=iglob) then
                   ijp_glob = iglob + jglob*(jglob-1)/2  ! Index for packed form
                   ij_loc = ij_loc+1
                   buffer1_cplx(ij_loc) = Slk_mat%buffer_cplx(iloc,jloc)
                   map(ij_loc) = ijp_glob-1  ! C indexing
                 end if
               end do
             end do

             buffer_size = ij_loc

           case ("L","l") ! Only the lower triangle of the global matrix is written.

             allocate(buffer1_cplx(buffer_size), STAT=istat)
             
             ij_loc=0
             do jloc=1,Slk_mat%sizeb_local(2) 
               do iloc=1,Slk_mat%sizeb_local(1) 
                 call idx_glob(Slk_mat,iloc,jloc,iglob,jglob)
                 if (jglob<=iglob) then
                   ijp_glob = iglob + (jglob-1)*(2*nrows_glob-jglob)/2 ! Index for packed form
                   ij_loc = ij_loc+1
                   buffer1_cplx(ij_loc) = Slk_mat%buffer_cplx(iloc,jloc)
                   map(ij_loc) = ijp_glob-1  ! C indexing
                 end if
               end do
             end do

             buffer_size = ij_loc

             case default
             MSG_PERS_BUG(" Wrong uplo: "//TRIM(uplo))
         end select

         call MPI_TYPE_CREATE_INDEXED_BLOCK(buffer_size, 1, map, MPI_DOUBLE_COMPLEX, filetype, ierr)
         ABI_CHECK_MPI(ierr,"INDEXED_BLOCK")
         deallocate(map)

         call MPI_TYPE_COMMIT(filetype, ierr)
         ABI_CHECK_MPI(ierr,"TYPE_COMMIT")

         my_offset=0 
         if (PRESENT(offset)) then
           my_offset=INT(offset,kind=MPI_OFFSET_KIND)
!          Check the kind as abilint complains if offset is defined with kind=MPI_OFFSET_KIND,
           msg="KIND(offset)/=MPI_OFFSET_KIND, check the source"
           ABI_CHECK(KIND(offset)==MPI_OFFSET_KIND,msg)
         end if
         
         call MPI_FILE_SET_VIEW(fh, my_offset, MPI_DOUBLE_COMPLEX, filetype, 'native', MPI_INFO_NULL, ierr)
         ABI_CHECK_MPI(ierr,"SET_VIEW")

!        Collective writing of the local buffer.
         select case (uplo(1:1))

           case ("A","a") ! The entire global matrix is written on file.
             call MPI_FILE_WRITE_ALL(fh, Slk_mat%buffer_cplx, buffer_size, MPI_DOUBLE_COMPLEX, MPI_STATUS_IGNORE, ierr)
             ABI_CHECK_MPI(ierr,"WRITE_ALL")

           case ("U","u","L","l") ! Only the upper or the lower triangle of the global matrix is written on file.
             call MPI_FILE_WRITE_ALL(fh, buffer1_cplx, buffer_size, MPI_DOUBLE_COMPLEX, MPI_STATUS_IGNORE, ierr)
             ABI_CHECK_MPI(ierr,"WRITE_ALL")
             deallocate(buffer1_cplx)
         end select
         
!        Close the file and release the MPI filetype.
         call MPI_FILE_CLOSE(fh, ierr)               
         ABI_CHECK_MPI(ierr,"FILE_CLOSE")

         call MPI_TYPE_FREE(filetype, ierr)
         ABI_CHECK_MPI(ierr,"TYPE_FREE")

         call MPI_BARRIER(spaceComm, ierr)
         RETURN

#else
         MSG_ERROR("MPI-IO support not activated")
#endif 

       else ! Code without MPI-IO. Fortran direct access mode is used.
         my_offset=0; if (PRESENT(offset)) my_offset=offset

         recl4dpc = get_reclen("dpc"); srec=my_offset/recl4dpc
         tmp_unt = get_unit()
         open(unit=tmp_unt,file=fname,access="direct",recl=recl4dpc,iostat=ios)
         msg= " Opening file "//TRIM(fname)
         ABI_CHECK(ios==0,msg)

         select case (uplo(1:1))

           case ("A","a") ! The entire global matrix is written on file.
             nrows_glob=Slk_mat%sizeb_global(1)
             do jloc=1,Slk_mat%sizeb_local(2) 
               do iloc=1,Slk_mat%sizeb_local(1) 
                 call idx_glob(Slk_mat,iloc,jloc,iglob,jglob)
                 ij_glob = iglob + (jglob-1)*nrows_glob
                 write(tmp_unt,rec=ij_glob+srec) Slk_mat%buffer_cplx(iloc,jloc)
               end do
             end do

           case ("U","u") ! Only the upper triangle of the global matrix is written on file.
             do jloc=1,Slk_mat%sizeb_local(2) 
               do iloc=1,Slk_mat%sizeb_local(1) 
                 call idx_glob(Slk_mat,iloc,jloc,iglob,jglob)
                 if (jglob>=iglob) then
                   ijp_glob = iglob + jglob*(jglob-1)/2  ! Index for packed form
                   write(tmp_unt,rec=ijp_glob+srec) Slk_mat%buffer_cplx(iloc,jloc)
                 end if
               end do
             end do

           case ("L","l") ! Only the lower triangle of the global matrix is written on file.
             do jloc=1,Slk_mat%sizeb_local(2) 
               do iloc=1,Slk_mat%sizeb_local(1) 
                 call idx_glob(Slk_mat,iloc,jloc,iglob,jglob)
                 if (jglob<=iglob) then
                   ijp_glob = iglob + (jglob-1)*(2*nrows_glob-jglob)/2 ! Index for packed form
                   write(tmp_unt,rec=ijp_glob+srec) Slk_mat%buffer_cplx(iloc,jloc)
                 end if
               end do
             end do

             case default
             MSG_PERS_BUG(" Wrong uplo: "//TRIM(uplo))
         end select

         close(tmp_unt)
         RETURN
       end if ! use_mpiio

     end subroutine slk_write
!!***

!----------------------------------------------------------------------

!!****f* ABINIT/slk_read
!! NAME
!!  slk_read
!!
!! FUNCTION
!!  Routine to read a squared scaLAPACK distributed matrix from an external file using MPI-IO.
!!
!! INPUTS
!!  uplo=String specifying whether only the upper or lower triangular part of the global matrix is stored on disk:
!!    = "U":  Upper triangular is stored
!!    = "L":  Lower triangular is stored
!!    = "A":  Full matrix (used for general complex matrices)
!!  symtype=Symmetry type of the matrix stored on disk (used only if uplo = "L" or "A").
!!    = "H" for Hermitian matrix
!!    = "S" for symmetric matrix.
!!    = "N" if matrix has no symmetry (not compatible with uplo="L" or uplo="U".
!!  fname=The name of the external file to be written.
!!  use_mpiio=if .TRUE. MPI-IO routines are used.
!!  [offset]=Offset, default is zero.
!!  [flags]=MPI-IO flags used to open the file in MPI_FILE_OPEN. Default is MPI_MODE_RDONLY.
!!
!! SIDE EFFECTS
!!  Slk_mat<matrix_scalapack>=Structured datatype defining the scaLAPACK distribution with the local buffer 
!!    supposed to be allocated.
!!    %buffer_cplx=Local buffer containg the distributed matrix stored on the external file.
!!  The file is opened and closed inside the routine. Any exception is fatal.
!!
!! TODO 
!!  * Check the portability of Offset if Fortran direct access is used. On some architectures indeed
!!    recl4dpc might be given in words instead of bytes
!!  * Generalize the implementation adding the writing the real buffer.
!!
!! PARENTS
!!      exceig
!!
!! CHILDREN
!!
!! SOURCE

subroutine slk_read(Slk_mat,uplo,symtype,fname,use_mpiio,offset,flags)

 use defs_basis
 use defs_scalapack
 use m_errors

 use m_io_tools,  only : get_unit

#if defined HAVE_MPI && defined HAVE_MPI2
 use mpi
#endif

!This section has been created automatically by the script Abilint (TD).
!Do not modify the following lines by hand.
 use interfaces_51_manage_mpi, except_this_one => slk_read
!End of the abilint section

 implicit none

#if defined HAVE_MPI && defined HAVE_MPI1
 include 'mpif.h'
#endif

!Arguments ------------------------------------
!scalars
 integer,optional,intent(in) :: flags
 integer(kind=i8b),optional,intent(in) :: offset
 logical,intent(in) :: use_mpiio
 character(len=*),intent(in) :: fname,symtype
 character(len=*),intent(in) :: uplo
 type(matrix_scalapack),intent(inout) :: Slk_mat

!Local variables ------------------------------
!scalars
 integer :: jloc,iloc,iglob,jglob,nrows_glob,ij_glob,ijp_glob
 logical :: is_hermitian
 integer :: tmp_unt,recl4dpc,ios,srec
#if defined HAVE_MPI_IO
 integer :: spaceComm,my_flags,fh,buffer_size,filetype,ierr,ilen,ierr2,ij_loc,istat
 integer(kind=MPI_OFFSET_KIND) :: my_offset
 character(len=5000) :: mpi_msg_error
!arrays
 integer,allocatable :: map(:)
 complex(dpc),allocatable :: buffer1_cplx(:)
#else
 integer(kind=i8b) :: my_offset
#endif
 character(len=500) :: msg

!************************************************************************

       ABI_CHECK(associated(Slk_mat%buffer_cplx),"%buffer_cplx not associated")

       select case (symtype(1:1))
         case ("H","h") 
           is_hermitian=.TRUE.
         case ("S","s") 
           is_hermitian=.FALSE.
         case("N","n")
           if ( ALL(uplo(1:1)/=(/"A","a"/)) ) then
             msg = " Found symtype= "//TRIM(symtype)//", but uplo= "//TRIM(uplo)
             MSG_ERROR(msg)
           end if
           case default
           MSG_ERROR("Wrong symtype "//TRIM(symtype))
       end select

       nrows_glob=Slk_mat%sizeb_global(1)

       if (use_mpiio) then
#if defined HAVE_MPI_IO
         write(*,*)"slk_read: Using MPI-IO"
         spaceComm = Slk_mat%processor%comm

!        Open the file.
         my_flags=MPI_MODE_RDONLY; if (PRESENT(flags)) my_flags = flags
         
         call MPI_FILE_OPEN(spaceComm, fname, my_flags, MPI_INFO_NULL, fh, ierr)
         msg = " FILE_OPEN "//TRIM(fname)
         ABI_CHECK_MPI(ierr,msg)

!        Define the mapping between scaLAPACK buffer and the storage on file.
         buffer_size= PRODUCT(Slk_mat%sizeb_local(1:2))
         allocate(map(buffer_size))

         select case (uplo(1:1))

           case ("A","a") ! The entire global matrix is stored on disk.
             ij_loc=0
             do jloc=1,Slk_mat%sizeb_local(2) 
               do iloc=1,Slk_mat%sizeb_local(1) 
                 call idx_glob(Slk_mat,iloc,jloc,iglob,jglob)
                 ij_glob = iglob + (jglob-1)*nrows_glob
                 ij_loc  = ij_loc+1
                 map(ij_loc) = ij_glob-1  ! C indexing
               end do
             end do

           case ("U","u") ! Only the upper triangle of the global matrix is stored on disk.
             ij_loc=0
             do jloc=1,Slk_mat%sizeb_local(2) 
               do iloc=1,Slk_mat%sizeb_local(1) 
                 call idx_glob(Slk_mat,iloc,jloc,iglob,jglob)
                 if (jglob>=iglob) then
                   ijp_glob = iglob + jglob*(jglob-1)/2  ! Index for packed form
                 else
                   ijp_glob = jglob + iglob*(iglob-1)/2  ! Index for packed form
                 end if
                 ij_loc = ij_loc+1
                 map(ij_loc) = ijp_glob-1  ! C indexing
               end do
             end do

           case ("L","l") ! Only the lower triangle of the global matrix is stored on disk.
             ij_loc=0
             do jloc=1,Slk_mat%sizeb_local(2) 
               do iloc=1,Slk_mat%sizeb_local(1) 
                 call idx_glob(Slk_mat,iloc,jloc,iglob,jglob)
                 if (jglob<=iglob) then
                   ijp_glob = iglob + (jglob-1)*(2*nrows_glob-jglob)/2 ! Index for packed form
                 else
                   ijp_glob = jglob + (iglob-1)*(2*nrows_glob-iglob)/2 ! Index for packed form
                 end if
                 ij_loc = ij_loc+1
                 map(ij_loc) = ijp_glob-1  ! C indexing
               end do
             end do

             case default
             MSG_PERS_BUG(" Wrong uplo: "//TRIM(uplo))
         end select

         call MPI_TYPE_CREATE_INDEXED_BLOCK(buffer_size, 1, map, MPI_DOUBLE_COMPLEX, filetype, ierr)
         ABI_CHECK_MPI(ierr,"INDEXED_BLOCK")
         deallocate(map)

         call MPI_TYPE_COMMIT(filetype, ierr)
         ABI_CHECK_MPI(ierr,"TYPE_COMMIT")

         my_offset=0
         if (PRESENT(offset)) then
           my_offset=INT(offset,kind=MPI_OFFSET_KIND)
!          Check the kind as abilint complains if offset is defined with kind=MPI_OFFSET_KIND,
           ABI_CHECK(KIND(offset)==MPI_OFFSET_KIND,"KIND(offset)/=MPI_OFFSET_KIND, check the source") 
         end if

         call MPI_FILE_SET_VIEW(fh, my_offset, MPI_DOUBLE_COMPLEX, filetype, 'native', MPI_INFO_NULL, ierr)
         ABI_CHECK_MPI(ierr,"SET_VIEW")

!        Collective reading of the local buffer.
         select case (uplo(1:1))

           case ("A","a") ! Full global matrix is used.
             call MPI_FILE_READ_ALL(fh, Slk_mat%buffer_cplx, buffer_size, MPI_DOUBLE_COMPLEX, MPI_STATUS_IGNORE, ierr)

           case ("U","u") ! Only the upper triangle of the global matrix is used.

             allocate(buffer1_cplx(buffer_size), STAT=istat)
             call MPI_FILE_READ_ALL(fh, buffer1_cplx, buffer_size, MPI_DOUBLE_COMPLEX, MPI_STATUS_IGNORE, ierr)
             ABI_CHECK_MPI(ierr,"READ_ALL")

!            Reconstruct the local matrix from buffer1_cplx.
             ij_loc=0
             do jloc=1,Slk_mat%sizeb_local(2) 
               do iloc=1,Slk_mat%sizeb_local(1) 
                 call idx_glob(Slk_mat,iloc,jloc,iglob,jglob)
                 ij_loc = ij_loc+1
                 if (jglob>=iglob) then
                   Slk_mat%buffer_cplx(iloc,jloc) =  buffer1_cplx(ij_loc)
                 else
                   if (is_hermitian) then
                     Slk_mat%buffer_cplx(iloc,jloc) =  DCONJG(buffer1_cplx(ij_loc))
                   else
                     Slk_mat%buffer_cplx(iloc,jloc) =         buffer1_cplx(ij_loc)
                   end if
                 end if
               end do
             end do

             deallocate(buffer1_cplx)

           case ("L","l") ! Only the lower triangle of the global matrix is used.

             allocate(buffer1_cplx(buffer_size), STAT=istat)
             call MPI_FILE_READ_ALL(fh, buffer1_cplx, buffer_size, MPI_DOUBLE_COMPLEX, MPI_STATUS_IGNORE, ierr)
             ABI_CHECK_MPI(ierr,"READ_ALL")

!            Reconstruct the local matrix from buffer1_cplx.
             ij_loc=0
             do jloc=1,Slk_mat%sizeb_local(2) 
               do iloc=1,Slk_mat%sizeb_local(1) 
                 call idx_glob(Slk_mat,iloc,jloc,iglob,jglob)
                 ij_loc = ij_loc+1
                 if (jglob<=iglob) then
                   Slk_mat%buffer_cplx(iloc,jloc) =  buffer1_cplx(ij_loc)
                 else
                   if (is_hermitian) then 
                     Slk_mat%buffer_cplx(iloc,jloc) =  DCONJG(buffer1_cplx(ij_loc))
                   else 
                     Slk_mat%buffer_cplx(iloc,jloc) =         buffer1_cplx(ij_loc)
                   end if
                 end if
               end do
             end do

             deallocate(buffer1_cplx)

         end select

!        Close the file and release the MPI filetype.
         call MPI_FILE_CLOSE(fh, ierr)               
         ABI_CHECK_MPI(ierr,"FILE_CLOSE")
         
         call MPI_TYPE_FREE(filetype, ierr)
         ABI_CHECK_MPI(ierr,"TYPE_FREE")

         call MPI_BARRIER(spaceComm, ierr)

         RETURN
#else
         MSG_ERROR("MPI-IO support not enabled")
#endif

       else  ! Code without MPI-IO. Fortran direct access mode is used.
!        
         write(*,*)"slk_read: Using FORTRAN-IO"
         my_offset=0; if (PRESENT(offset)) my_offset=offset
         
         recl4dpc = get_reclen("dpc"); srec=my_offset/recl4dpc
         tmp_unt = get_unit()
         open(unit=tmp_unt,file=fname,access="direct",recl=recl4dpc,iostat=ios)
         msg= " Opening file "//TRIM(fname)
         ABI_CHECK(ios==0,msg)

         select case (uplo(1:1))
           
           case ("A","a") ! The entire global matrix is stored on disk
             do jloc=1,Slk_mat%sizeb_local(2) 
               do iloc=1,Slk_mat%sizeb_local(1) 
                 call idx_glob(Slk_mat,iloc,jloc,iglob,jglob)
                 ij_glob = iglob + (jglob-1)*nrows_glob
                 read(tmp_unt,rec=ij_glob+srec) Slk_mat%buffer_cplx(iloc,jloc)
               end do
             end do
             
           case ("U","u") ! Only the upper triangle of the global matrix is stored on disk.
             do jloc=1,Slk_mat%sizeb_local(2) 
               do iloc=1,Slk_mat%sizeb_local(1) 
                 call idx_glob(Slk_mat,iloc,jloc,iglob,jglob)
                 if (jglob>=iglob) then
                   ijp_glob = iglob + jglob*(jglob-1)/2  ! Index for packed form
                 else
                   ijp_glob = jglob + iglob*(iglob-1)/2  ! Index for packed form
                 end if
                 read(tmp_unt,rec=ijp_glob+srec) Slk_mat%buffer_cplx(iloc,jloc)
                 if (is_hermitian.and.jglob<iglob) Slk_mat%buffer_cplx(iloc,jloc) = DCONJG( Slk_mat%buffer_cplx(iloc,jloc) )
               end do
             end do
             
           case ("L","l") ! Only the lower triangle of the global matrix is stored on disk.
             do jloc=1,Slk_mat%sizeb_local(2) 
               do iloc=1,Slk_mat%sizeb_local(1) 
                 call idx_glob(Slk_mat,iloc,jloc,iglob,jglob)
                 if (jglob<=iglob) then
                   ijp_glob = iglob + (jglob-1)*(2*nrows_glob-jglob)/2 ! Index for packed form
                 else
                   ijp_glob = jglob + (iglob-1)*(2*nrows_glob-iglob)/2 ! Index for packed form
                 end if
                 read(tmp_unt,rec=ijp_glob+srec) Slk_mat%buffer_cplx(iloc,jloc)
                 if (is_hermitian.and.jglob>iglob) Slk_mat%buffer_cplx(iloc,jloc) = DCONJG( Slk_mat%buffer_cplx(iloc,jloc) )
               end do
             end do
             
             case default
             MSG_PERS_BUG(" Wrong uplo: "//TRIM(uplo))
         end select

         close(tmp_unt)
         RETURN
       end if ! use_mpiio

     end subroutine slk_read
!!***

!!****f* ABINIT/no_scalapack
!! NAME
!!  no_scalapack
!!
!! FUNCTION
!!
!! INPUTS
!!
!! OUTPUT
!!
!! SIDE EFFECTS
!!
!! PARENTS
!!
!! CHILDREN
!!
!! SOURCE  

#else
subroutine no_scalapack


 implicit none

end subroutine no_scalapack

#endif
!!***
