!{\src2tex{textfont=tt}}
!!****f* ABINIT/pawmkaewf
!! NAME
!! pawmkaewf
!!
!! FUNCTION
!! Construct complete AE wave functions on the fine FFT grid adding onsite PAW corrections.
!!
!! COPYRIGHT
!! Copyright (C) 2008-2010 ABINIT group (MG)
!! This file is distributed under the terms of the
!! GNU General Public License, see ~ABINIT/COPYING
!! or http://www.gnu.org/copyleft/gpl.txt .
!!
!! INPUTS
!! dimcprj(natom)=array of dimensions of array cprj (not ordered)
!! natom=number of atoms in cell
!! ntypat=number of types of atoms in the cell
!! mpw=maximum dimensioned size of npw.
!! mband=maximum number of bands
!! mkmem=number of k points which can fit in memory; set to 0 if use disk
!! nkpt=Total number of k-points
!! nsppol=1 for unpolarized, 2 for spin-polarized
!! nspinor=number of spinorial components of the wavefunctions
!! unks=unit number for G vectors.
!! nband(nkpt*nsppol)=Number of bands for each k-point and spin.
!! istwfk(nkpt)=Storage mode at each k-point.
!! paral_kgb=Option for kgb parallelism
!! Pawfgrtab(natom) <type(pawfgrtab_type)> : data about the fine grid around each atom
!! Pawrad(ntypat) <type(pawrad_type)> : radial mesh data for each type of atom
!! Pawtab(ntypat) <type(pawtab_type)> : PAW functions around each type of atom
!! Psps <type(pseudopotential_type)> : basic pseudopotential data
!! Dtfil <type(datafiles_type)>=variables related to files
!! typat(natom) : list of atom types
!! cg(2,mpw*nspinor*mband*mkmem*nsppol)=planewave coefficients of wavefunctions.
!! Cprj(natom,nspinor*mband*mkmem*nsppol)=<p_lmn|Cnk> coefficients for each WF |Cnk>
!!   and each |p_lmn> non-local projector
!! Wffnow=struct info for current wf disk file
!! npwarr(nkpt)=Number of plane waves at each k-point
!! ngfftf(18)=contain all needed information about 3D FFT, see ~abinit/doc/input_variables/vargs.htm#ngfft
!!  Note that ngfftf refers to the fine mesh.
!! kg(3,mpw*mkmem)=reduced planewave coordinates
!! eigen(mband*nkpt*nsppol)=eigenvalues (hartree) for all bands at each k point
!! occ(mband*nkpt*nsppol)=occupations for all bands at each k point
!! Hdr<hdr_type>=the header of wf, den and pot files
!! kpt(3,nkpt)=reduced coordinates of k points.
!!
!! OUTPUT
!!  ierr=Status error
!!  Main output is written on file (NETCDF file format).
!!
!! NOTES
!! In PAW calculations, the pseudized wavefunction us represented
!! on a relatively small plane wave basis set and is not normalized
!! as it does not include the on-site PAW contributions which is described
!! in terms of real spherical harmonics and radial functions.
!! For post-processing and proper visualization, it is necessary
!! to use the full electronic wave function, which is what this subroutine constructs.
!! Specifically, it computes the pseudo part by doing an FFT from G- to r-space
!! using the dense mesh defined by pawecutdg. The on-site PAW terms are also
!! computed in real space inside each sphere and added to the pseudo part.
!! Notice that this formula is expressed on the fine grid, and requires
!! interpolating the PAW radial functions onto this grid, as well as calling
!! initylmr in order to get the angular functions on the grid points.
!!
!! PARENTS
!!      outscfcv
!!
!! CHILDREN
!!      abi_etsf_dims_init,abi_etsf_electrons_put,abi_etsf_geo_put,cprj_alloc
!!      cprj_diskinit_r,cprj_free,cprj_get,etsf_io_data_init
!!      etsf_io_electrons_put,etsf_io_file_merge,etsf_io_low_close
!!      etsf_io_low_error_to_str,etsf_io_low_open_modify
!!      etsf_io_low_set_write_mode,etsf_io_main_def,etsf_io_main_put,fourwf
!!      hdr_io_etsf,hdr_skip,ini_wf_etsf,initylmr,int2char4,rdnpw,rwwf,sort_dp
!!      sphereboundary,spline,splint,wrtout,xbarrier_mpi,xcomm_init,xdefineoff
!!      xmaster_init,xmax_mpi,xme_init
!!
!! SOURCE

#if defined HAVE_CONFIG_H
#include "config.h"
#endif

#include "abi_common.h"

subroutine pawmkaewf(Dtset,natom,mpw,nspinor,mband,nkpt,mkmem,nsppol,ntypat,nband,istwfk,npwarr,kpt,&
& paral_kgb,ngfftf,kg,dimcprj,Pawfgrtab,Pawrad,Pawtab,&
& Psps,Hdr,Dtfil,typat,eigen,occ,cg,Cprj,Wffnow,MPI_enreg,ierr)

 use defs_basis
 use defs_datatypes
 use defs_abitypes
 use defs_wvltypes
 use m_xmpi
 use m_errors
 use m_splines
 use m_wffile

#if defined HAVE_ETSF_IO
 use etsf_io
 !use etsf_io_file,     only : etsf_io_file_merge
 use m_abi_etsf,       only : abi_etsf_dims_init
#endif

 use m_numeric_tools,  only : imax_loc
 use m_crystal,        only : destroy_crystal, crystal_structure
 use m_crystal_io,     only : init_crystal_from_hdr

!This section has been created automatically by the script Abilint (TD).
!Do not modify the following lines by hand.
 use interfaces_14_hidewrite
 use interfaces_27_toolbox_oop
 use interfaces_28_numeric_noabirule
 use interfaces_32_util
 use interfaces_51_manage_mpi
 use interfaces_53_abiutil
 use interfaces_53_ffts
 use interfaces_59_io_mpi
 use interfaces_61_ionetcdf
 use interfaces_62_iowfdenpot
!End of the abilint section

 implicit none

!Arguments ------------------------------------
!scalars
 integer,intent(in) :: natom,ntypat
 integer,intent(in) :: mband,mkmem,mpw,nsppol,paral_kgb,nkpt
 integer,intent(in) :: nspinor
 integer,intent(out) :: ierr
 type(Datafiles_type),intent(in) :: Dtfil
 type(pseudopotential_type),intent(in) :: Psps
 type(MPI_type),intent(inout) :: MPI_enreg
 type(wffile_type),intent(inout) :: Wffnow
 type(hdr_type),intent(inout) :: Hdr
 type(dataset_type),intent(in) :: Dtset
!arrays
 integer,intent(in) :: typat(natom),nband(nkpt*nsppol),istwfk(nkpt),npwarr(nkpt),dimcprj(natom)
 integer,intent(in) :: ngfftf(18),kg(3,mpw*mkmem)
 real(dp),intent(in) :: cg(2,mpw*nspinor*mband*mkmem*nsppol)
 real(dp),target,intent(in) :: eigen(mband*nkpt*nsppol)
 real(dp),target,intent(in) :: occ(mband*nkpt*nsppol)
 real(dp),intent(in) :: kpt(3,nkpt)
 type(pawfgrtab_type),intent(in) :: Pawfgrtab(natom)
 type(pawrad_type),intent(in) :: Pawrad(ntypat)
 type(pawtab_type),intent(in) :: Pawtab(ntypat)
 type(cprj_type),intent(in) :: Cprj(natom,nspinor*mband*mkmem*nsppol*Psps%usepaw)

!Local variables-------------------------------
!scalars
 integer :: bdtot_index,formeig,iband,icg,mgfftf,istat
 integer :: iatom,ifgd,ifftsph,ifft,inl,ipsang,itypat,ispinor,ipw,ndat,ii,i1,i2,i3
 integer :: jl,jlm,jln,jm,jlmn
 integer :: nsp,nfgd,ln_size,lmn_size,normchoice,option,option_ylmr,tim_rwwf,tim_fourwf
 integer :: iorder_cprj,spaceComm,rank,ibsp,ibg,isppol,ikpt,nband_k,cplex,master
 integer :: mcg_disk,n1,n2,n3,n4,n5,n6,ikg,npwout,istwf_k,npw_k
 integer :: indx,mesh_size,nfftot,my_spin,nprocs
 real(dp) :: phj,rR,tphj,ybcbeg,ybcend,re_p,im_p,weight,norm,norm_rerr,max_rerr,imur,reur
 character(len=500) :: msg
!arrays
 integer,allocatable :: gbound(:,:),kg_dum(:,:),kg_k(:,:)
 integer,allocatable :: atindx(:),atindx1(:),nattyp(:)
 integer,allocatable,target :: my_kpoints(:),my_spins(:)
 integer,allocatable :: my_kstable(:,:),my_nkpt(:)
 integer, allocatable :: iperm(:)
 real(dp) :: ylm_gr(3,3,0),rfft(3)
 real(dp) :: kpoint(3)
 real(dp),allocatable :: ff(:),nrm(:),nrm_sort(:),phigrd(:,:),tphigrd(:,:),ylm_tmp(:,:),ylm(:,:)
 real(dp),allocatable :: cg_disk(:,:),fofgin(:,:),fofgout(:,:)
 real(dp),allocatable :: eig_dum(:),occ_dum(:),denpot(:,:,:)
 real(dp),allocatable :: fofr(:,:,:,:),phkr(:,:)
 real(dp),allocatable,target :: ur(:,:)
 type(cprj_type),allocatable :: Cprj_k(:,:)

 type paw_partialwaves_type
  real(dp),pointer :: phi(:,:)
  real(dp),pointer :: tphi(:,:)
 end type paw_partialwaves_type
 type(paw_partialwaves_type),allocatable :: Paw_spline(:),Paw_onsite(:)

#if defined HAVE_ETSF_IO
 integer :: irank,fform,ncid,rdwr,var_main
 logical :: kdep,lstat
 character(len=4) :: tag
 character(len=80) :: file_title
 character(len=fnlen) :: my_fname,my_basename,out_file
 character(len=etsf_io_low_error_len) :: errmess
 character(len=256),allocatable :: merge_files(:)
 type(etsf_dims) :: Dims
 type(etsf_groups_flags) :: Groups_flags
 !type(etsf_groups) :: Groups
 type(etsf_split) :: Split
 type(etsf_main) :: MainFolder
 type(etsf_io_low_error) :: Error
 type(etsf_electrons) :: Electrons_folder
 type(Wvl_wf_type) :: Dummy_wfs
#endif

! ************************************************************************

 DBG_ENTER("COLL")

!Compatibility tests
 ABI_CHECK(paral_kgb==0,"paral_kgb/=0 not coded")
 ABI_CHECK(MPI_enreg%mode_para/='b',"mode_para=b not coded")
 ABI_CHECK(SIZE(dimcprj)>0,"dimcprj should be allocated")
 ABI_CHECK(nspinor==1,"nspinor not tested, check fourwf")

 ierr=0
#ifndef HAVE_ETSF_IO
 ierr=-1
 write(msg,'(3a)')&
& " ETSF-IO support must be enabled in order to output AE PAW wavefunction. ",ch10,&
& " No output will be produced, use --enable-etsf-io at configure-time. "
 MSG_WARNING(msg)
 RETURN
!These statements are necessary to avoid the compiler complain about unused variables:
 ii=Dtset%usepaw;ii=Dtfil%unpaw;ii=Hdr%usepaw
 rR=eigen(1);rR=occ(1)
#endif

!Init parallelism
 call xcomm_init(MPI_enreg,spaceComm)
 call xmaster_init(MPI_enreg,master)
 call xme_init(MPI_enreg,rank)
 nprocs = xcomm_size(spaceComm)

 ABI_CHECK(nprocs==1,"k spin parallelism not yet active")

 if (MPI_enreg%paral_compil_kpt==1)then
   call wrtout(std_out,'pawmkaewf: loop on k-points and spins done in parallel','COLL')
   call xbarrier_mpi(spaceComm)
 end if

 mgfftf=MAXVAL(ngfftf(1:3))

!=== Calculate my list of k-points and spin ===
!* my_kstable gives the sequential index for each k-point treated by rank.
!* cannot check for MPI_enreg%proc_distrb if nprocs ==1
 allocate(my_kstable(nkpt,nsppol))
 my_kstable=0

 if (nprocs==1) then

   ii=0
   do ikpt=1,nkpt
     ii=ii+1; my_kstable(ikpt,:) = ii
   end do

   allocate(my_spins(nsppol))
   do isppol=1,nsppol
     my_spins(isppol)=isppol
   end do

   allocate(my_kpoints(nkpt))
   do ikpt=1,nkpt
     my_kpoints(ikpt) = ikpt
   end do

 else ! parallelism over k and spin.

   do isppol=1,nsppol
     ii=0
     do ikpt=1,nkpt
       nband_k = nband(ikpt+(isppol-1)*nkpt)
       if (ALL(MPI_enreg%proc_distrb(ikpt,1:nband_k,isppol)==rank)) then
         ii=ii+1
         my_kstable(ikpt,isppol)=ii
       end if
     end do
   end do

   allocate(my_nkpt(nsppol))
   do isppol=1,nsppol
     my_nkpt(isppol) = COUNT(my_kstable(:,isppol)>0)
   end do

!  Each node has to deal with a single spin.
   if (nsppol>1 .and. ALL(my_nkpt>0)) then
     msg =' Non optimal distribution, some wave functions won''t be correctly initialized.'
     MSG_ERROR(msg)
   end if
   my_spin = imax_loc(my_nkpt)

   allocate(my_spins(1))
   my_spins(1)=my_spin

   allocate(my_kpoints(my_nkpt(my_spin)))
   ii=0
   do ikpt=1,nkpt
     if (my_kstable(ikpt,my_spin)/=0) then
       ii=ii+1
       my_kpoints(ii) = ikpt
     end if
   end do

   deallocate(my_nkpt)

 end if ! nprocs==1

#if defined HAVE_ETSF_IO

!=== Initialize NETCDF files ===
 my_basename=dtfil%fnameabo_ae_wfk

!* For parallel case: the index of the processor must be appended.
!XG 100108 : One would better have it done in dtfil_init1 !!!!
 if (nprocs>1) then
   allocate(merge_files(nprocs))
   do irank=1,nprocs
     call int2char4(irank,tag)
     merge_files(irank)=TRIM(Dtfil%filnam_ds(4))//'_P-'//tag//"_AE_WFK-etsf.nc"
     if (irank==rank+1) then
       my_basename=TRIM(Dtfil%filnam_ds(4))//'_P-'//tag//"_AE_WFK"
     end if
   end do
!  my_basename=TRIM(Dtfil%filnam_ds(4))//'_P-'//tag//"_AE_WFK"
!  my_basename=merge_files(rank+1)
 end  if

 my_fname=TRIM(my_basename)//"-etsf.nc"

 write(msg,'(2a)')' Opening file for AE PAW wave functions: ',TRIM(my_fname)
 call wrtout(std_out,msg,'PERS')
 call wrtout(ab_out,msg,'PERS')
 call xbarrier_mpi(spaceComm)

!Initialize Dims, remeber the hacking with with Dims
!call abi_etsf_dims_init(Dtset,2,Hdr%lmn_size,Psps,Dummy_wfs)
 call abi_etsf_dims_init(Dims,Dtset,2,Psps,Dummy_wfs)

!Change some values since we work in real space on the dense FFT mesh.
 Dims%max_number_of_coefficients      = etsf_no_dimension
 Dims%max_number_of_basis_grid_points = etsf_no_dimension
 Dims%number_of_localization_regions  = etsf_no_dimension
 Dims%real_or_complex_coefficients    = etsf_no_dimension

 Dims%real_or_complex_wavefunctions   = 2

 Dims%number_of_grid_points_vector1  = ngfftf(1)
 Dims%number_of_grid_points_vector2  = ngfftf(2)
 Dims%number_of_grid_points_vector3  = ngfftf(3)

!Dimensions for variables that can be splitted.
 Dims%my_number_of_kpoints = SIZE(my_kpoints) !etsf_no_dimension
 Dims%my_number_of_spins   = SIZE(my_spins)   !etsf_no_dimension

!Split data using k-points and spins
 if (nprocs>1) then
   Split%my_kpoints => my_kpoints(:)
   nullify(Split%my_spins)
   if (nsppol>1) Split%my_spins => my_spins(:)
 else
   nullify(Split%my_kpoints)
   nullify(Split%my_spins)
 end if

!=== Set-up the variables ===
!* These mandatory values are always written by the hdr_io_etsf() routine.
 Groups_flags%geometry  = etsf_geometry_all
 Groups_flags%electrons = etsf_electrons_all - etsf_electrons_x_functional - etsf_electrons_c_functional
 Groups_flags%kpoints   = etsf_kpoints_red_coord_kpt + etsf_kpoints_kpoint_weights

!These variables may be written depending on prt<something> input variables.
 Groups_flags%basisdata = etsf_basisdata_basis_set
 if (Dtset%usewvl==0) then
   Groups_flags%basisdata= Groups_flags%basisdata + &
&   etsf_basisdata_kin_cutoff + etsf_basisdata_n_coeff + etsf_basisdata_red_coord_pw
 else
   Groups_flags%basisdata= Groups_flags%basisdata + etsf_basisdata_coord_grid + etsf_basisdata_n_coeff_grid
 end if

!Groups_flags%basisdata = etsf_basisdata_none

!Wavefunctions in real space.
 Groups_flags%main = etsf_main_wfs_rsp

!=== Create the file ===
!* If the group contains main, we remove it for a while to be sure to
!add it at the end, after ABINIT private variables.
 var_main = Groups_flags%main
 Groups_flags%main = etsf_main_none
 write(file_title,'(a)')"PAW AE wavefunction given in real space"

 write(*,*)"Before  etsf_io_data_init"
 kdep=.TRUE.

 call etsf_io_data_init(my_fname,Groups_flags,Dims,file_title,'PAW AE Wavefunction File generated by ABINIT with ETSF_IO',&
& lstat,Error,k_dependent=kdep,overwrite=.TRUE.,Split_definition=Split)
 if (.not.lstat) goto 1000

 write(*,*)" my_number_of_kpoints ",Dims%my_number_of_kpoints
 write(*,*)" my_number_of_spins ",Dims%my_number_of_spins

!* Add the private ABINIT variables.
 call etsf_io_low_open_modify(ncid,my_fname,lstat,Error_data=Error)
 if (.not.lstat) goto 1000

 call ini_wf_etsf(Dtset,Hdr%lmn_size,Psps%npsp,Psps%ntypat,ncid)

!Add the main part as last variables in the ETSF file.
 write(*,*)"Before  etsf_io_main_def"
 call etsf_io_main_def(ncid,lstat,Error,k_dependent=kdep,flags=var_main, Split=Split)
 if (.not.lstat) goto 1000

!Close the file.
 call etsf_io_low_close(ncid,lstat,Error_data=Error)
 if (.not.lstat) goto 1000

!Complete the geometry information with missing values from hdr_io().
 call abi_etsf_geo_put(Dtset,my_basename,Psps)

!To use the following statements, do not forget to declare:
!timrev(integer), Cryst(crystal_structure)
!timrev=2
!call init_crystal_from_hdr(Cryst,Hdr,timrev)
!call abi_crystal_put(Cryst,my_fname)
!call destroy_crystal(Cryst)

 call abi_etsf_electrons_put(Dtset,my_basename)

!We open again for further additions
 call etsf_io_low_open_modify(ncid,my_fname,lstat,Error_data=Error)
 if (.not.lstat) goto 1000

!* Write the header.
!FIXME problem in the case of splitting over k-points due to SIZE mismatch
!in hdr%npwarr(number_of_kpoints) and number_of_coefficients(my_mkpt)

 fform=502; rdwr=2
 call hdr_io_etsf(fform,Hdr,rdwr,ncid)

 call etsf_io_low_close(ncid,lstat,Error)
 if (.not.lstat) goto 1000

!=== Prepare the writing of the results ===
!
!1) Open file for writing
 call etsf_io_low_open_modify(ncid,my_fname,lstat,Error_data=Error)
 if (.not.lstat) goto 1000

!2) Switch to write mode.
 call etsf_io_low_set_write_mode(ncid,lstat,Error_data=Error)
 if (.not.lstat) goto 1000
#endif

!=== Prepare the spline ===
!* Calculate 2nd derivatives for spline, they only depend on the atom type.
!TODO phi_spl and tphi might be stored in pawtab (?)
 allocate(Paw_spline(ntypat))

 do itypat=1,ntypat
   ln_size  =Pawtab(itypat)%basis_size
   mesh_size=Pawrad(itypat)%mesh_size

   allocate(Paw_spline(itypat)%phi (mesh_size,ln_size))
   allocate(Paw_spline(itypat)%tphi(mesh_size,ln_size))

!  * Calculate 2nd derivatives of %phi and %tphi.
   do inl=1,ln_size
     ybcbeg=zero; ybcend=zero 
     call spline(Pawrad(itypat)%rad,Pawtab(itypat)%phi(:,inl), mesh_size,ybcbeg,ybcend,Paw_spline(itypat)%phi(:,inl))
     ybcbeg=zero; ybcend=zero 
     call spline(Pawrad(itypat)%rad,Pawtab(itypat)%tphi(:,inl),mesh_size,ybcbeg,ybcend,Paw_spline(itypat)%tphi(:,inl))
   end do

 end do

!=== Perform spline for each atom ===
!* FFT points within PAW sphere depend on the particular atom.
 allocate(Paw_onsite(natom))

 do iatom=1,natom
   itypat   = typat(iatom)
   ln_size  = Pawtab(itypat)%basis_size
   lmn_size = Pawtab(itypat)%lmn_size
   mesh_size= Pawrad(itypat)%mesh_size
   nfgd     = Pawfgrtab(iatom)%nfgd ! no. of points in the fine grid for this PAW sphere

!  * Obtain |r-R| on fine grid, note that rfgd is given in Cartesian coordinates.
   allocate(nrm(nfgd))
   do ifgd=1,nfgd
     nrm(ifgd) = sqrt(dot_product(Pawfgrtab(iatom)%rfgd(:,ifgd),Pawfgrtab(iatom)%rfgd(:,ifgd)))
   end do

!  * Compute Ylm for each r-R vector.
   ipsang = 1 + (Pawtab(itypat)%l_size-1)/2 ! recall l_size=2*l_max-1 where l_max is shifted by 1.
   allocate(ylm_tmp(ipsang**2,nfgd))
   option_ylmr= 1 ! To compute Ylm(r-R).
   normchoice = 1 ! Use computed norms of input vectors.
   call initylmr(ipsang,normchoice,nfgd,nrm,option_ylmr,Pawfgrtab(iatom)%rfgd,ylm_tmp,ylm_gr)

!  Exchange dimensions for better memory access.
   allocate(ylm(nfgd,ipsang**2))
   do ii=1,ipsang**2
     ylm(:,ii) = ylm_tmp(ii,:)
   end do
   deallocate(ylm_tmp)

!  In order to do spline fits, the |r-R| data must be sorted
!  * Sort the nrm points, keeping track of which goes where
   allocate(nrm_sort(nfgd))
   nrm_sort = nrm

   allocate(iperm(nfgd))
   do ifgd=1,nfgd
     iperm(ifgd)=ifgd
   end do

   call sort_dp(nfgd,nrm_sort,iperm,tol8)  ! TODO check iperm,add doc, should be correct though

!  Now make spline fits of phi and tphi  onto the fine grid around the atom
   allocate(phigrd(nfgd,ln_size),tphigrd(nfgd,ln_size))
   allocate(ff(nfgd))

   do inl=1,ln_size
!    * splint phi onto points and reorder indices.
     ybcbeg = zero; ybcend = zero;
     call splint(mesh_size,Pawrad(itypat)%rad,Pawtab(itypat)%phi(:,inl),Paw_spline(itypat)%phi(:,inl),nfgd,nrm_sort,ff)
     do ifgd=1,nfgd
       ii=iperm(ifgd)
       phigrd(ii,inl) = ff(ifgd)
     end do

!    * splint tphi onto points and reorder the indices.
     ybcbeg = zero; ybcend = zero;
     call splint(mesh_size,Pawrad(itypat)%rad,Pawtab(itypat)%tphi(:,inl),Paw_spline(itypat)%tphi(:,inl),nfgd,nrm_sort,ff)
     do ifgd=1,nfgd
       ii=iperm(ifgd)
       tphigrd(ii,inl) = ff(ifgd)
     end do
   end do !inl

   deallocate(ff)

!  === Calculate AE and PS partial waves inside the sphere ===
!  * recall that <r|phi>=u(r)*Slm(r^)/r, thus avoid division by zero
   allocate(Paw_onsite(iatom)%phi (nfgd,lmn_size))
   allocate(Paw_onsite(iatom)%tphi(nfgd,lmn_size))

   do jlmn=1,lmn_size
     jl  = Psps%indlmn(1,jlmn,itypat)
     jm  = Psps%indlmn(2,jlmn,itypat)
     jlm = Psps%indlmn(4,jlmn,itypat)
     jln = Psps%indlmn(5,jlmn,itypat)

     do ifgd=1,nfgd ! loop over fine grid points in current PAW sphere
       if (nrm(ifgd)>tol16) then
         rR  = nrm(ifgd) ! value of |r-R|
         phj = phigrd (ifgd,jln)*ylm(ifgd,jlm)/rR
         tphj= tphigrd(ifgd,jln)*ylm(ifgd,jlm)/rR
         Paw_onsite(iatom)%phi (ifgd,jlmn) =  phj
         Paw_onsite(iatom)%tphi(ifgd,jlmn) = tphj
       else
         Paw_onsite(iatom)%phi (ifgd,jlmn) = zero
         Paw_onsite(iatom)%tphi(ifgd,jlmn) = zero
       end if
     end do !nfgd
   end do !jlmn

   deallocate(nrm,nrm_sort)
   deallocate(iperm)
   deallocate(phigrd,tphigrd,ylm)

 end do !iatom

!* Free 2nd derivates used for spline.
 do itypat=1,ntypat
   deallocate(Paw_spline(itypat)%phi)
   deallocate(Paw_spline(itypat)%tphi)
 end do
 deallocate(Paw_spline)


!FIXME check ordering in cprj and Eventually in external file
!why is iorder_cprj not stored in the file for crosschecking purpose?
!Here Im assuming cprj are not ordered!

!Prepare temporary PAW file if mkmem==0
 allocate(atindx(natom),atindx1(natom))
 allocate(nattyp(ntypat))

 indx=1
 do itypat=1,ntypat
   nattyp(itypat)=0
   do iatom=1,natom
     if (typat(iatom)==itypat) then
       atindx (iatom )=indx
       atindx1(indx  )=iatom
       indx=indx+1
       nattyp(itypat)=nattyp(itypat)+1
     end if
   end do
 end do

 iorder_cprj=0
 call cprj_diskinit_r(atindx1,natom,iorder_cprj,mkmem,natom,0,dimcprj,nspinor,Dtfil%unpaw)

 tim_fourwf=0; tim_rwwf=0; weight=one

!mkmem==0 means wf and kg info on disk file
!* Skip header of Wffnow
!* Define offsets, in the case of MPI I/O.
 if (mkmem==0) then
   call hdr_skip(Wffnow,ierr)
   formeig=0
   call xdefineOff(formeig,Wffnow,MPI_enreg,nband,npwarr,nspinor,nsppol,nkpt)
   mcg_disk=mpw*nspinor*mband
   allocate(cg_disk(2,mcg_disk))
 end if

!n4,n5,n6 are FFT dimensions, modified to avoid cache trashing
 n1=ngfftf(1); n2=ngfftf(2); n3=ngfftf(3)
 n4=ngfftf(4); n5=ngfftf(5); n6=ngfftf(6)
 nfftot=n1*n2*n3

!Loop over spin and k points
 bdtot_index=0; icg=0; ibg=0; norm_rerr=smallest_real
 do isppol=1,nsppol

   if (mkmem==0) rewind(Dtfil%unkg) ! Rewind the kpgsph data file on unit Dtfil%unkg
   ikg=0
   do ikpt=1,nkpt

     kpoint  = kpt(:,ikpt)
     nband_k = nband(ikpt+(isppol-1)*nkpt)
     npw_k   = npwarr(ikpt)
     istwf_k = istwfk(ikpt)

     if (MPI_enreg%paral_compil_kpt==1)then
       if (MINVAL(ABS(MPI_enreg%proc_distrb(ikpt,1:nband_k,isppol)-rank))/=0) then
         bdtot_index=bdtot_index+nband_k; CYCLE
       end if
     end if

     allocate(phkr(2,nfftot))
     do i3=0,n3-1
       rfft(3)=DBLE(i3)/n3
       do i2=0,n2-1
         rfft(2)=DBLE(i2)/n2
         do i1=0,n1-1
           rfft(1)=DBLE(i1)/n1
           ifft = 1 +i1 +i2*n1 +i3*n1*n2
           phkr(1,ifft) = COS(two_pi*dot_product(kpoint,rfft))
           phkr(2,ifft) = SIN(two_pi*dot_product(kpoint,rfft))
         end do
       end do
     end do

     allocate(Cprj_k(natom,nspinor*nband_k))
     call cprj_alloc(Cprj_k,0,dimcprj)

!    Extract cprj for this k-point according to mkmem
     if (mkmem==0) then
       call cprj_get(atindx1,Cprj_k,Cprj,natom,1,ibg,ikpt,iorder_cprj,isppol,mband,mkmem,&
&       MPI_enreg,natom,nband_k,nband_k,nspinor,nsppol,Dtfil%unpaw)
     else
       ibsp=0
       do iband=1,nband_k
         do ispinor=1,nspinor
           ibsp=ibsp+1
           do iatom=1,natom
             Cprj_k(iatom,ibsp)%cp(:,:)=Cprj(iatom,ibsp+ibg)%cp(:,:)
           end do
         end do
       end do
     end if

     allocate(gbound(2*mgfftf+8,2),kg_k(3,npw_k))

!    Do i/o as needed
     if (mkmem==0) then
       nsp=nspinor
       call rdnpw(ikpt,isppol,nband_k,npw_k,nsp,0,Dtfil%unkg)
       read(Dtfil%unkg) kg_k(:,1:npw_k) ! Read k+g data
       call sphereboundary(gbound,istwf_k,kg_k,mgfftf,npw_k)
!      Read the wavefunction block for ikpt,isppol
       allocate(eig_dum(mband),kg_dum(3,0),occ_dum(mband))
       call rwwf(cg_disk,eig_dum,0,0,0,ikpt,isppol,kg_dum,mband,mcg_disk,&
&       MPI_enreg,nband_k,nband_k,npw_k,nspinor,occ_dum,-2,0,tim_rwwf,Wffnow)
       deallocate(eig_dum,kg_dum,occ_dum)
     else
       kg_k(:,1:npw_k)=kg(:,1+ikg:npw_k+ikg)
       call sphereboundary(gbound,istwf_k,kg_k,mgfftf,npw_k)
     end if !mkmem==0

#if defined HAVE_ETSF_IO
!    === Write eigenvalues and occupations ===
!    write(*,*)eigen(bdtot_index+1:bdtot_index+nband_k)
     Electrons_folder%eigenvalues__number_of_states = Dtset%mband
     Electrons_folder%eigenvalues%data1D         => eigen(bdtot_index+1:bdtot_index+nband_k)
     Electrons_folder%eigenvalues__kpoint_access = my_kstable(ikpt,isppol) !ikpt
     Electrons_folder%eigenvalues__spin_access   = isppol
     if (nprocs>1) Electrons_folder%eigenvalues__spin_access = 1

     Electrons_folder%occupations__number_of_states = Dtset%mband
     Electrons_folder%occupations%data1D         => occ(bdtot_index+1:bdtot_index+nband_k)
     Electrons_folder%occupations__kpoint_access = my_kstable(ikpt,isppol) !ikpt
     Electrons_folder%occupations__spin_access   = isppol
     if (nprocs>1) Electrons_folder%occupations__spin_access = 1

     write(*,*)"rank ",rank," about to write",my_kstable(ikpt,isppol)


     call etsf_io_electrons_put(ncid,Electrons_folder,lstat,Error)
     if (.not.lstat) goto 1000
#endif

     do iband=1,nband_k ! Loop over bands.

!      * Fourier transform on the real fft box of the smooth part.
       ndat=nspinor
       allocate(fofgin(2,npw_k*ndat))
       allocate(fofr(2,n4,n5,n6*ndat))

       if (mkmem/=0) then
         do ipw=1,npw_k*nspinor
           fofgin(:,ipw)=cg(:,ipw+(iband-1)*npw_k*nspinor+icg)
         end do
       else
         do ipw=1,npw_k*nspinor
           fofgin(:,ipw)=cg_disk(:,ipw+(iband-1)*npw_k*nspinor)
         end do
       end if

!      Complex can be set to 0 with this option(0) of fourwf
       option=0; cplex=0; npwout=1
       allocate(denpot(cplex*n4,n5,n6))
       allocate(fofgout(2,npwout*ndat))

       call fourwf(cplex,denpot,fofgin,fofgout,fofr,gbound,gbound,istwf_k,kg_k,kg_k,&
&       mgfftf,MPI_enreg,ndat,ngfftf,npw_k,npwout,n4,n5,n6,option,paral_kgb,tim_fourwf,weight,weight)

!      deallocate(fofgout)

!      Here I do not know if fourwf works in the case of spinors,
!      It seems that not all fftalg option support ndata! should check!
!      Do not forget to declare real(dp)::fofgin_down(:,:) to use the following statements
!      if (nspinor==2) then
!      allocate(fofgin_down(2,npw_k))
!      fofgin_down(:,:)=fofgin(:,1+npw_k:2*npw_k)
!      ! Complex can be set to 0 with this option(0) of fourwf
!      cplex=1; option=1; npwout=1; ndat=1
!      call fourwf(cplex,denpot,fofgin_down,fofgout,fofr,gbound,gbound,istwf_k,kg_k,kg_k,&
!      &mgfftf,MPI_enreg,ndat,ngfftf,npw_k,npwout,n4,n5,n6,option,paral_kgb,tim_fourwf,weight,weight)
!      deallocate(fofgin_down)
!      end if

!      * Add phase e^{ikr} since it is contained in cprj.
       allocate(ur(2,n1*n2*n3))
       do i3=1,n3
         do i2=1,n2
           do i1=1,n1
             ii = i1 + n1*(i2-1)+ n1*n2*(i3-1)
!            ur(:,ii)=fofr(:,i1,i2,i3)
             ur(1,ii)= fofr(1,i1,i2,i3) * phkr(1,ii) - fofr(2,i1,i2,i3) * phkr(2,ii)
             ur(2,ii)= fofr(1,i1,i2,i3) * phkr(2,ii) + fofr(2,i1,i2,i3) * phkr(1,ii)
           end do
         end do
       end do
       deallocate(fofr)

!      === Add onsite term on the augmented FFT mesh ===
       do iatom=1,natom
         itypat  =typat(iatom)
         lmn_size=Pawtab(itypat)%lmn_size
         ln_size =Pawtab(itypat)%basis_size ! no. of nl elements in PAW basis
         nfgd    =Pawfgrtab(iatom)%nfgd     ! no. of points in the fine grid for this PAW sphere

         ibsp=(iband-1)*nspinor
         do ispinor=1,nspinor
           ibsp=ibsp+1
           do jlmn=1,lmn_size
             jl=Psps%indlmn(1,jlmn,itypat)
             jm=Psps%indlmn(2,jlmn,itypat)
             re_p = Cprj_k(iatom,ibsp)%cp(1,jlmn)
             im_p = Cprj_k(iatom,ibsp)%cp(2,jlmn)

             do ifgd=1,nfgd ! loop over fine grid points in current PAW sphere.
               ifftsph = Pawfgrtab(iatom)%ifftsph(ifgd) ! index of the point on the grid
               phj  = Paw_onsite(iatom)% phi(ifgd,jlmn)
               tphj = Paw_onsite(iatom)%tphi(ifgd,jlmn)
               ur(1,ifftsph) = ur(1,ifftsph) + re_p * (phj-tphj)
               ur(2,ifftsph) = ur(2,ifftsph) + im_p * (phj-tphj)
             end do

           end do !jlmn
         end do !ispinor
       end do !iatom

!      * Remove the phase e^{ikr}, we store u(r).
       do i3=1,n3
         do i2=1,n2
           do i1=1,n1
             ii = i1 + n1*(i2-1)+ n1*n2*(i3-1)
             reur=ur(1,ii)
             imur=ur(2,ii)
             ur(1,ii)=  reur * phkr(1,ii) + imur * phkr(2,ii)
             ur(2,ii)= -reur * phkr(2,ii) + imur * phkr(1,ii)
           end do
         end do
       end do

       norm=zero
       do ifft=1,nfftot
         norm = norm + ur(1,ifft)**2+ur(2,ifft)**2
       end do
       norm=norm/nfftot
!      norm = SUM(SUM(ur*ur,DIM=2),DIM=1)/nfftot

       norm_rerr = MAX((ABS(norm-one))*100,norm_rerr)
       write(std_out,*)"norm = ",norm
!      ur = ur/norm

#if defined HAVE_ETSF_IO
       MainFolder%real_space_wavefunctions%data2D  => ur
       MainFolder%wfs_rsp__spin_access   =  isppol !this is wrong if para!
       if (nprocs>1) MainFolder%wfs_rsp__spin_access = 1
       MainFolder%wfs_rsp__kpoint_access = my_kstable(ikpt,isppol) !ikpt
       MainFolder%wfs_rsp__state_access  = iband
!      main_folder%wfs_coeff__number_of_coefficients = npw * nspinor

!      do ifft=1,nfftot
!      write(777,*)ur(:,ifft)
!      end do

!      We use the group level write routine.
       call etsf_io_main_put(ncid,MainFolder,lstat,Error_data=Error)
       if (.not.lstat) goto 1000
#endif

       deallocate(ur)

       deallocate(fofgin)
       deallocate(fofgout)
       deallocate(denpot, STAT=istat)

     end do !nband_k

     bdtot_index=bdtot_index+nband_k

     if (mkmem/=0) then
       ibg=ibg+nspinor*nband_k
       icg=icg+npw_k*nspinor*nband_k
       ikg=ikg+npw_k
     end if

     deallocate(gbound,kg_k)
     deallocate(phkr)

     call cprj_free(Cprj_k)
     deallocate(Cprj_k)

   end do !ikpt
 end do !nsppol

 if (mkmem==0) deallocate(cg_disk)

!* Free augmentation waves.
 do iatom=1,natom
   deallocate(Paw_onsite(iatom)%phi)
   deallocate(Paw_onsite(iatom)%tphi)
 end do
 deallocate(Paw_onsite)

 deallocate(atindx,atindx1)
 deallocate(nattyp)

 deallocate(my_kpoints,my_spins)
 deallocate(my_kstable)

!* Maximum relative error over CPUs.
 call xmax_mpi(norm_rerr,max_rerr,spaceComm,ierr)
 write(*,*)"max_rerr=",max_rerr
 if (max_rerr>ten) then
   write(msg,'(7a)')&
&   " Inaccuracy on the normalization of the wave funtions exceeds 10%. ",ch10,&
&   " Likely due to the use of a too coarse FFT mesh or unconverged wavefunctions. ",ch10,&
&   " Numerical values inside the augmentation regions might be inaccurate. ",ch10,&
&   " Action: increase pawecutdg in your input file. "
   MSG_COMMENT(msg)
 end if

#if defined HAVE_ETSF_IO
!=== Merge partial files ===
 if (nprocs>1) then
   call xbarrier_mpi(spaceComm)
   if (rank==master) then
     out_file="test_merge"
     write(msg,'(2a)')'Master node is merging NETCDF partial files into: ',TRIM(out_file)
     call wrtout(std_out, msg,'COLL')
     !$call etsf_io_file_merge(out_file,merge_files,lstat,Error)
     if (.not.lstat) goto 1000
   end if
   call xbarrier_mpi(spaceComm)
 end if

 if (allocated(merge_files)) deallocate(merge_files)

!=== Handle the error ===
 1000 continue
 if (.not.lstat) then
   call etsf_io_low_error_to_str(Errmess,Error)
   msg=errmess(1:MIN(400,LEN(errmess)))
   MSG_PERS_ERROR(msg)
 end if
#endif

 DBG_EXIT("COLL")

end subroutine pawmkaewf
!!***
