!=======================================================================
!
!  PROGRAM  PHASE/0 2019.01 ($Rev: 589 $)
!
!  MODULE: m_ES_IO
!
!  AUTHOR(S): T. Yamasaki   August/20/2003
!  
!  Contact address :  Phase System Consortium
!                     E-mail: phase_system@nims.go.jp URL https://azuma.nims.go.jp
!  
!
!
!=======================================================================
!
!     The original version of this set of the computer programs "PHASE"
!  was developed by the members of the Theory Group of Joint Research
!  Center for Atom Technology (JRCAT), based in Tsukuba, in the period
!  1993-2001.
!
!     Since 2002, this set has been tuned and new functions have been
!  added to it as a part of the national project "Frontier Simulation 
!  Software for Industrial Science (FSIS)",  which is supported by
!  the IT program of the Ministry of Education, Culture, Sports,
!  Science and Technology (MEXT) of Japan. 
!     Since 2006, this program set has been developed as a part of the
!  national project "Revolutionary Simulation Software (RSS21)", which
!  is supported by the next-generation IT program of MEXT of Japan.
!   Since 2013, this program set has been further developed centering on PHASE System
!  Consortium.
!   The activity of development of this program set has been supervised by Takahisa Ohno.
!
#ifdef __TIMER_SUB__
#   define __TIMER_SUB_START(a)  call timer_sta(a)
#   define __TIMER_SUB_STOP(a)   call timer_end(a)
#else
#   define __TIMER_SUB_START(a)
#   define __TIMER_SUB_STOP(a)
#endif
#ifdef __TIMER_IODO__
#   define __TIMER_IODO_START(a)   call timer_sta(a)
#   define __TIMER_IODO_STOP(a)    call timer_end(a)
#else
#   define __TIMER_IODO_START(a)
#   define __TIMER_IODO_STOP(a)
#endif
#ifdef __TIMER_IOCOMM__
#   define __TIMER_IOCOMM_START_w_BARRIER(str,a)   call timer_barrier(str) ;   call timer_sta(a)
#   define __TIMER_IOCOMM_START(a)       call timer_sta(a)
#   define __TIMER_IOCOMM_STOP(a)        call timer_end(a)
#else
#   define __TIMER_IOCOMM_START_w_BARRIER(str,a)
#   define __TIMER_IOCOMM_START(a)
#   define __TIMER_IOCOMM_STOP(a)
#endif

!#define _DEBUG_ESIO_

!
module m_ES_IO
! $Id: m_ES_IO.F90 589 2018-10-16 07:14:04Z ktagami $
  use m_Electronic_Structure, only : zaj_l,neordr,nrvf_ordr,eko_l,occup_l,efermi,efermi_spin,totch&
       &                            ,vnlph_l,vlhxc_l,eko_ek
  use m_Electronic_Structure, only : m_ES_WF_in_Rspace
  use m_PlaneWaveBasisSet,    only : kgp,kg1,ngabc,nbase,iba
  use m_Kpoints,              only : kv3, vkxyz, vkxyz_ek, kv3_ek, k_symmetry, qwgt, &
       &                             m_Kp_get_nkmesh, m_Kp_get_kptable_bxsf
  use m_Timing,               only : tstatc0_begin, tstatc0_end
  use m_Control_Parameters,   only : nspin,kimg,neg,num_extra_bands,af,ipri, printable, neg_previous &
       &                           , wf_filetype, wf_title, eigmin_wf, eigmax_wf, ekmode, neg_is_enlarged &
       &                           , icond, fixed_charge_k_parallel, sw_ekzaj, numk_zajsaved, Nw_Psicoef &
       &                           , precision_WFfile
  use m_Const_parameters,     only : DP, SP, CMPLDP, BUCS, OFF, YES, EK, SCF, DENSITY_ONLY &
       &                           , CUBE, VTK, BINARY, GAMMA, GAMMA_base_symmetrization, ONE_BY_ONE &
       &                           , FIXED_CHARGE, FIXED_CHARGE_CONTINUATION, DELTA &
       &                           , EFERMI_VICINITY, ALL_VALUES, GRID, ON
  use m_Parallelization,      only : mpi_comm_group,mpi_k_world,mpi_e_world,is_kngp,ie_kngp,npes &
       &                           , mype,ierr,map_k, map_ek,ista_e,iend_e,istep_e,map_z, np_e &
       &                           , ista_k,iend_k,myrank_e,myrank_k,map_e,nrank_e &
       &                           , ista_kngp,iend_kngp, nrank_k  &
       &                           , ista_g1k,iend_g1k, np_g1k , myrank_g, nrank_g
  use m_IterationNumbers,     only : nk_in_the_process, nk_converged, nkgroup &
       &                           , first_kpoint_in_this_job, iteration_ionic, iteration_electronic
  use m_FFT,                  only : fft_box_size_WF,nfft
  use m_Crystal_Structure,    only : altv, sw_fix_total_spin, altv_refcell
  use m_Ionic_System,         only : natm,natm2,iatomn,m_IS_pack_all_ions_in_uc
  use m_PseudoPotential,      only : ival
  use m_Crystal_Structure,    only : univol, rltv

! ===================================== added by K. Tagami ============= 11.0
  use m_Control_Parameters,    only : ndim_spinor, noncol, &
       &                              previous_nspin_collinear, &
       &                              previous_nband_collinear
! ====================================================================== 11.0
  use m_ErrorMessages,        only : EOF_REACHED

  use m_Control_Parameters, only : ndim_magmom, ik_wf_squared, &
       &                           ib1_wf_squared, ib2_wf_squared, &
       &                           wf_squared_filetype,  max_projs, proj_attribute, &
       &                           ndim_chgpot, SpinOrbit_Mode, &
       &                           wf_orb_proj_print_format, proj_group, num_proj_elems, &
       &                           sw_band_unfolding
  use m_Const_parameters,     only : Neglected, PAI2, CARTS, DELTA07, HARTREE, BOHR
  use m_Files,              only :  nfwfk_sq, m_Files_open_nfwfksq_noncl, &
       &                            nfwfk_integ_mom, &
       &                            m_Files_open_nfwfk_integ_mom, &
       &                            m_Files_close_nfwfk_integ_mom, &
       &                            m_Files_open_nfwfk_orb_proj, &
       &                            m_Files_close_nfwfk_orb_proj, &
       &                            nfwfk_orb_proj
  use m_PseudoPotential,   only : nlmt, ilmt, lmta, q, &
       &                          nlmta_phi, nlmtt_phi, qorb, m_PP_tell_iorb_lmtt, &
       &                          m_PP_tell_iorb_ia_l_m_tau, ilmt_phi, &
       &                          mtp_phi, lmta_phi, ltp_phi, taup_phi
  use m_Nonlocal_Potential,   only : norm_phig
  use m_Charge_Density,    only : chgq_l, hsr, hsi, &
       &                          m_CD_softpart_ktsub_noncl, &
       &                          m_CD_hardpart_ktsub_noncl, &
       &                          m_CD_alloc_rspace_charge, &
       &                          m_CD_dealloc_rspace_charge, &
       &                          m_CD_rspace_charge_noncl
  use m_Ionic_System,      only : ityp, iproj_group
  use m_Electronic_Structure,  only : fsr_l, fsi_l, compr_l, compi_l
  use m_ES_Noncollinear,   only : m_ES_set_Pauli_Matrix
  use m_SpinOrbit_Potential,  only :  MatU_ylm_RC_L0,  MatU_ylm_RC_L1,  MatU_ylm_RC_L2, &
       &                              MatU_ylm_RC_L3

! ==== EXP_CELLOPT ==== 2015/09/24
  use m_PlaneWaveBasisSet,    only : kg1_prev
! ===================== 2015/09/24

  implicit none
  include 'mpif.h'

  integer istatus(mpi_status_size)

  real(kind=SP), allocatable, dimension(:,:)  :: wf_l   ! work wave functions
  real(kind=DP), allocatable, dimension(:,:)  :: wfdp_l ! work wave functions

!  1.  m_ESIO_rd_EigenValues_etc    <-(Initial_Electronic_Structure)
!  2.  m_ESIO_wd_EigenValues_etc    <-(WriteDownData_onto_Files, Postprocessing)
!  3.  m_ESIO_wd_EigenValues        <-(WriteDownData_onto_Files, Convergence_Check, Postprocessing)
!  4.  m_ESIO_wd_EigenValues_ek     <-(WriteDownData_onto_Files)
!  5.  m_ESIO_wd_vlhxc              <-(Postprocessing)
!  6.  m_ESIO_rd_WFs                <-(Initial_Electronic_Structure, scf_rd_wf_and_chg)
!  7.  m_ESIO_rd_WFs_import_frm_collin    <-(Initial_Electronic_Structure)
!  8.  m_ESIO_wd_WFs                <-(WriteDownData_onto_Files)
!  9.  m_ESIO_wd_WFs_standardout    <-(Renewal_of_WaveFunctions)
! 10.  m_ESIO_rd_WFs_and_EVs_ek     <-(Initial_Electronic_Structure)
! 11.  m_ESIO_rd_EVs_ek             <-(Initial_Electronic_Structure)
! 12.  m_ESIO_wd_Psicoef            <-(WriteDownData_onto_Files)
! 13.  m_ESIO_wd_WFs_and_EVs_ek     <-(WriteDownData_onto_Files, WriteDownData_onto_Files_ek, Convergence_Check)
! 14.  m_ESIO_wd_WFn                <-(Postprocessing)
! 15.  m_ESIO_wd_Efermi             <-(WriteDownData_onto_Files)
! 16.  m_ESIO_rd_Eferm              <-(Initial_Electronic_Structure, scf_rd_wf_and_chg)

contains
  subroutine m_ESIO_rd_EigenValues_etc(nfout,nfcntn_bin,F_CNTN_BIN_partitioned)

    integer, intent(in) :: nfout, nfcntn_bin
    logical, intent(in) :: F_CNTN_BIN_partitioned
    integer  :: ik, ie
    integer, allocatable, dimension(:,:) :: n1_wk, n2_wk  ! MPI
    real(DP),allocatable, dimension(:,:) :: e1_wk, e2_wk  ! MPI
!!$    read(nfcntn_bin) neordr,nrvf_ordr,eko_l,occup_l,efermi,totch
                                                  __TIMER_SUB_START(1370)
    if(F_CNTN_BIN_partitioned) then
       if(neg_previous /= neg) then
          write(nfout,'(" !! neg_previous /= neg <<m_ESIO_rd_EigenValues_etc>>")')
          write(nfout,'(" !! neg_prevous = ",i8)') neg_previous
          write(nfout,'(" !! neg         = ",i8)') neg
          write(nfout,'(" neg_previous sould be neg when F_CNTN_BIN_in_partitioned is true")')
          stop ' neg_previous sould be neg when F_CNTN_BIN_in_partitioned is true'
       end if
       allocate(n1_wk(neg,iend_k-ista_k+1), n2_wk(neg,iend_k-ista_k+1))
       allocate(e1_wk(np_e,iend_k-ista_k+1),e2_wk(np_e,iend_k-ista_k+1))
       n1_wk = 0; n2_wk = 0
       e1_wk = 0; e2_wk = 0
       ! -- neordr, nrvf_ordr --
                                                  __TIMER_IODO_START(1405)
       read(nfcntn_bin) n1_wk
       read(nfcntn_bin) n2_wk
                                                  __TIMER_IODO_STOP(1405)
                                                  __TIMER_IODO_START(1406)
       do ik = ista_k, iend_k
          neordr(1:neg,ik) = n1_wk(1:neg,ik-ista_k+1)
          nrvf_ordr(1:neg,ik) = n2_wk(1:neg,ik-ista_k+1)
       end do
                                                  __TIMER_IODO_STOP(1406)
       ! -- eko_l, occup_l --
                                                  __TIMER_IODO_START(1407)
       read(nfcntn_bin) e1_wk
       read(nfcntn_bin) e2_wk
                                                  __TIMER_IODO_STOP(1407)
                                                  __TIMER_IODO_START(1408)
       do ik = ista_k, iend_k
          do ie = ista_e, iend_e
             eko_l(map_z(ie),ik) = e1_wk(ie-ista_e+1,ik-ista_k+1)
             occup_l(map_z(ie),ik) = e2_wk(ie-ista_e+1,ik-ista_k+1)
          end do
       end do
                                                  __TIMER_IODO_STOP(1408)
       deallocate(e2_wk,e1_wk,n2_wk,n1_wk)
       ! -- nfermi, totch --
       read(nfcntn_bin) efermi, totch
    else
       allocate(n1_wk(neg_previous,kv3),n2_wk(neg_previous,kv3)) ! MPI
       allocate(e1_wk(neg_previous,kv3),e2_wk(neg_previous,kv3)) ! MPI
       n1_wk = 0; n2_wk = 0
       e1_wk = 0; e2_wk = 0

       ! -- neordr, nrvf_ordr --
                                                  __TIMER_IODO_START(1409)
       if(mype == 0) read(nfcntn_bin) n1_wk                ! MPI
       if(mype == 0) read(nfcntn_bin) n2_wk                ! MPI
                                                  __TIMER_IODO_STOP(1409)
                                                  __TIMER_IOCOMM_START_w_BARRIER(mpi_comm_group,1410)
       if(npes > 1) then
          call mpi_bcast(n1_wk,neg_previous*kv3,mpi_integer,0,mpi_comm_group,ierr) ! MPI
          call mpi_bcast(n2_wk,neg_previous*kv3,mpi_integer,0,mpi_comm_group,ierr) ! MPI
       endif
                                                  __TIMER_IOCOMM_STOP(1410)
                                                  __TIMER_IODO_START(1411)
       do ik = ista_k, iend_k                              ! MPI
          neordr(1:neg_previous,ik) = n1_wk(1:neg_previous,ik)
          nrvf_ordr(1:neg_previous,ik) = n2_wk(1:neg_previous,ik)
          if(neg_previous < neg) then
             do ie = neg_previous+1, neg
                neordr(ie,ik) = ie
                nrvf_ordr(ie,ik) = ie
             end do
          end if
       end do                                              ! MPI
                                                  __TIMER_IODO_STOP(1411)
       ! -- eko_l, occup_l --
                                                  __TIMER_IODO_START(1412)
       if(mype == 0) read(nfcntn_bin) e1_wk                ! MPI
       if(mype == 0) read(nfcntn_bin) e2_wk                ! MPI
                                                  __TIMER_IODO_STOP(1412)
                                                  __TIMER_IOCOMM_START_w_BARRIER(mpi_comm_group,1413)
       if(npes > 1) then
          call mpi_bcast(e1_wk,neg_previous*kv3,mpi_double_precision,0,mpi_comm_group,ierr) ! MPI
          call mpi_bcast(e2_wk,neg_previous*kv3,mpi_double_precision,0,mpi_comm_group,ierr)! MPI
       end if
                                                  __TIMER_IOCOMM_STOP(1413)
                                                  __TIMER_IODO_START(1414)
       do ik = ista_k, iend_k                              ! MPI
          do ie = 1, neg_previous                          ! MPI
             if(map_e(ie) == myrank_e) then                ! MPI
                eko_l(map_z(ie),ik) = e1_wk(ie,ik)         ! MPI
                occup_l(map_z(ie),ik) = e2_wk(ie,ik)       ! MPI
             end if
          end do                                           ! MPI
          if(neg_previous < neg) then
             do ie = neg_previous+1, neg
                if(map_e(ie) == myrank_e) then
                   eko_l(map_z(ie),ik) = 1.d+15
                   occup_l(map_z(ie),ik) = 0.d0
                end if
            end do           
          end if
       end do                                              ! MPI
                                                  __TIMER_IODO_STOP(1414)
       ! -- nfermi, totch --
       if(mype == 0) read(nfcntn_bin) efermi, totch        ! MPI
       if(npes > 1) then
          call mpi_bcast(efermi,1,mpi_double_precision,0,mpi_comm_group,ierr) ! MPI
          call mpi_bcast(totch,1, mpi_double_precision,0,mpi_comm_group,ierr) ! MPI
       end if

       deallocate(e2_wk,e1_wk,n2_wk,n1_wk)
    end if

    if(printable) write(nfout,'(" TOTCH (total charge) = ",f12.6 &
         & ," (= ",8d25.12,") at m_ESIO_rd_EigenValues_etc")') totch,totch

                                                  __TIMER_SUB_STOP(1370)
  end subroutine m_ESIO_rd_EigenValues_etc

  subroutine m_ESIO_wd_EigenValues_etc(nfcntn_bin,F_CNTN_BIN_partitioned,totch_flag)

    integer, intent(in) :: nfcntn_bin
    logical, intent(in) :: F_CNTN_BIN_partitioned
    integer, optional, intent(in) :: totch_flag

    integer  :: ik, ie, mpi_comm
    integer, allocatable, dimension(:,:) :: n_wk, n2_mpi  ! MPI
    real(DP),allocatable, dimension(:,:) :: e_wk, e2_mpi  ! MPI
    integer  :: id_sname = -1
                                                  __TIMER_SUB_START(1371)
    call tstatc0_begin('m_ESIO_wd_EigenValues_etc ',id_sname)

    mpi_comm = mpi_comm_group

    if(F_CNTN_BIN_partitioned) then
       allocate(n_wk(neg,iend_k-ista_k+1))
       allocate(e_wk(np_e,iend_k-ista_k+1))
       n_wk = 0; e_wk = 0
       !  -- neordr --
                                                  __TIMER_IODO_START(1415)
       do ik = ista_k, iend_k
          n_wk(1:neg,ik-ista_k+1) = neordr(1:neg,ik)
       end do
                                                  __TIMER_IODO_STOP(1415)
                                                  __TIMER_IODO_START(1416)
       write(nfcntn_bin) n_wk
                                                  __TIMER_IODO_STOP(1416)
       !  -- nrvf_ordr --
                                                  __TIMER_IODO_START(1417)
       do ik = ista_k, iend_k
          n_wk(1:neg,ik-ista_k+1) = nrvf_ordr(1:neg,ik)
       end do
                                                  __TIMER_IODO_STOP(1417)
                                                  __TIMER_IODO_START(1418)
       write(nfcntn_bin) n_wk
                                                  __TIMER_IODO_STOP(1418)
       !  -- eko_l --
       e_wk = 0.d0
                                                  __TIMER_IODO_START(1419)
       do ik = ista_k, iend_k
          do ie = ista_e, iend_e
             e_wk(ie-ista_e+1,ik-ista_k+1) = eko_l(map_z(ie),ik)
!!$             e_wk(ie,ik-ista_k+1) = eko_l(map_z(ie),ik)
          end do
       end do
                                                  __TIMER_IODO_STOP(1419)
                                                  __TIMER_IODO_START(1420)
       write(nfcntn_bin) e_wk
                                                  __TIMER_IODO_STOP(1420)
!!$       if(npes >= 2) then
!!$          call mpi_allreduce(e_wk,e2_mpi,neg*(iend_k-ista_k+1),mpi_double_precision &
!!$               &               , mpi_sum, mpi_k_world)
!!$       else
!!$          e2_mpi = e_wk
!!$       end if
!!$       write(nfcntn_bin) e2_mpi

       !  -- occup_l --
       e_wk = 0.d0
                                                  __TIMER_IODO_START(1421)
       do ik = ista_k, iend_k
          do ie = ista_e, iend_e, istep_e
             e_wk(ie-ista_e+1,ik-ista_k+1) = occup_l(map_z(ie),ik)
!!$             e_wk(ie,ik-ista_k+1) = occup_l(map_z(ie),ik)
          end do
       end do
                                                  __TIMER_IODO_STOP(1421)
                                                  __TIMER_IODO_START(1422)
       write(nfcntn_bin) e_wk
                                                  __TIMER_IODO_STOP(1422)
!!$       if(npes >= 2) then
!!$          call mpi_allreduce(e_wk,e2_mpi,neg*(iend_k-ista_k+1),mpi_double_precision &
!!$               &               , mpi_sum, mpi_k_world)
!!$       else
!!$          e2_mpi = e_wk
!!$       end if
!!$       write(nfcntn_bin) e2_mpi
!!$       deallocate(e2_mpi, e_wk, n_wk)
       deallocate(e_wk, n_wk)

       if(totch_flag == OFF) then
          write(nfcntn_bin) efermi                     ! MPI
       else
          write(nfcntn_bin) efermi,totch               ! MPI
       end if
    else
       allocate(n_wk(neg,kv3)); allocate(n2_mpi(neg,kv3))! MPI
       allocate(e_wk(neg,kv3)); allocate(e2_mpi(neg,kv3))! MPI
       n_wk = 0; n2_mpi = 0
       e_wk = 0; e2_mpi = 0
       !  -- neordr --
       n_wk = 0                                          ! MPI
                                                  __TIMER_IODO_START(1423)
       do ik = 1, kv3                                     ! MPI
          if(map_k(ik) /= myrank_k) cycle                 ! MPI
          n_wk(1:neg,ik) = neordr(1:neg,ik)               ! MPI
       end do                                             ! MPI
                                                  __TIMER_IODO_STOP(1423)
       if(npes >= 2) then
          call mpi_allreduce(n_wk,n2_mpi,neg*kv3,mpi_integer,mpi_sum,mpi_comm,ierr)
          n2_mpi = n2_mpi/nrank_e
       else
          n2_mpi = n_wk
       end if
                                                  __TIMER_IODO_START(1425)
       if(mype == 0) write(nfcntn_bin) n2_mpi             ! MPI ; writing (neordr)
                                                  __TIMER_IODO_STOP(1425)
       !  -- nrvf_ordr --
                                                  __TIMER_IODO_START(1426)
       n_wk = 0                                           ! MPI
       do ik = 1, kv3                                     ! MPI
          if(map_k(ik) /= myrank_k) cycle                 ! MPI
          n_wk(1:neg,ik) = nrvf_ordr(1:neg,ik)            ! MPI
       end do                                             ! MPI
                                                  __TIMER_IODO_STOP(1426)
       if(npes >= 2) then
          call mpi_allreduce(n_wk,n2_mpi,neg*kv3,mpi_integer,mpi_sum,mpi_comm,ierr)
          n2_mpi = n2_mpi/nrank_e
       else
          n2_mpi = n_wk
       end if
                                                  __TIMER_IODO_START(1428)
       if(mype == 0) write(nfcntn_bin) n2_mpi             ! MPI ; writing (nrvf_ordr)
                                                  __TIMER_IODO_STOP(1428)
       !  -- eko_l --
       e_wk = 0.d0                                        ! MPI
                                                  __TIMER_IODO_START(1429)
       do ik = 1, kv3                                     ! MPI
          if(map_k(ik) /= myrank_k) cycle                 ! MPI
          do ie = 1, neg                                  ! MPI
             if(map_e(ie) /= myrank_e) cycle              ! MPI
             e_wk(ie,ik) = eko_l(map_z(ie),ik)            ! MPI
          end do
       end do
                                                  __TIMER_IODO_STOP(1429)
       if(npes >= 2) then
          call mpi_allreduce(e_wk,e2_mpi,neg*kv3,mpi_double_precision,mpi_sum,mpi_comm,ierr)
       else
          e2_mpi = e_wk
       end if
                                                  __TIMER_IODO_START(1431)
       if(mype == 0) write(nfcntn_bin) e2_mpi             ! MPI ; writing (eko_l)
                                                  __TIMER_IODO_STOP(1431)

       !  -- occup_l --
       e_wk = 0.d0
                                                  __TIMER_IODO_START(1432)
       do ik = 1, kv3                                     ! MPI
          if(map_k(ik) /= myrank_k) cycle                 ! MPI
          do ie = 1, neg                                  ! MPI
             if(map_e(ie) /= myrank_e) cycle              ! MPI
             e_wk(ie,ik) = occup_l(map_z(ie),ik)          ! MPI
          end do                                            ! MPI
       end do                                               ! MPI
                                                  __TIMER_IODO_STOP(1432)
       if(npes >= 2) then
          call mpi_allreduce(e_wk,e2_mpi,neg*kv3,mpi_double_precision,mpi_sum,mpi_comm,ierr)
       else
          e2_mpi = e_wk
       end if
                                                  __TIMER_IODO_START(1434)
       if(mype == 0) write(nfcntn_bin) e2_mpi             ! MPI ; writing (occup_l)
                                                  __TIMER_IODO_STOP(1434)
       if(mype == 0) then
          if(totch_flag == OFF) then
             write(nfcntn_bin) efermi                     ! MPI
          else
             write(nfcntn_bin) efermi,totch               ! MPI
          end if
       end if
       deallocate(n_wk); deallocate(n2_mpi)              ! MPI
       deallocate(e_wk); deallocate(e2_mpi)              ! MPI
    end if
    call tstatc0_end(id_sname)
                                                  __TIMER_SUB_STOP(1371)
  end subroutine m_ESIO_wd_EigenValues_etc

  subroutine m_ESIO_wd_EigenValues(nf,iprieigen,nooccupation)

    integer, intent(in)              :: nf
    integer, intent(in)              :: iprieigen
    integer, intent(in)              :: nooccupation
    integer                          :: ie,  ipri0, kv3_i, ks
    integer                          :: hconst_min, lzero_max
    integer, parameter :: NCOLUMN = 6
    integer, parameter :: EIGEN_VALUES = 1, OCCUPATIONS = 2
    integer :: writemode
    real(kind=DP),allocatable, dimension(:,:) :: e_mpi, o_mpi
                                                  __TIMER_SUB_START(1378)
    allocate(e_mpi(neg,kv3)); e_mpi = 0.d0
    allocate(o_mpi(neg,kv3)); o_mpi = 0.d0

    call set_writemode(writemode)  ! ->(writemode) = ALL_VALUES or FERMI_VICINITY
    call get_ipri0(iprieigen,ipri0)

    if(ipri0 >= 2) then
       if(ipri0 >= 3 .and. nf == 6 .and. printable) call wd_neordr()

       call set_kv3_i_and_ks() ! -> kv3_i, ks
#ifndef _DEBUG_WRITE_
       if(writemode == EFERMI_VICINITY .and. kv3_i == kv3) &
            & call cal_vicinity_range(hconst_min,lzero_max) ! -> lzero_max, hconst_min
#endif
!     --- Energy eigen values ---
       call put_kpartArray_into(eko_l,e_mpi)
       if(printable) then
          if(ks == 0 .and. nf==6) call wd_efermi()
!!$          if(ks == 0) call wd_efermi()
          call wd_k_and_values(EIGEN_VALUES)
       end if
    end if
!     --- Occupations ---
    if(ipri0 >= 2 .and. nooccupation /= YES) then
       call put_kpartArray_into(occup_l,o_mpi)
       if(printable) call wd_k_and_values(OCCUPATIONS)
    end if
    deallocate(e_mpi)
    deallocate(o_mpi)
                                                  __TIMER_SUB_STOP(1378)
  contains
    subroutine set_writemode(writemode)
      integer, intent(out) :: writemode
      writemode = ALL_VALUES
#ifndef _DEBUG_WRITE_
      if(nf==6) then
         if((icond == FIXED_CHARGE_CONTINUATION .or. icond == FIXED_CHARGE) .and. &
              & fixed_charge_k_parallel == ONE_BY_ONE) then
            writemode = ALL_VALUES
         else
            if(neg <= NCOLUMN) then
               writemode = ALL_VALUES
            else
               writemode = EFERMI_VICINITY
            end if
         end if
      end if
#endif
    end subroutine set_writemode

    subroutine set_kv3_i_and_ks()
!!$      if(iprieigen>=2 .and. printable) then
      if((icond == FIXED_CHARGE_CONTINUATION .or. icond == FIXED_CHARGE) .and. &
           & fixed_charge_k_parallel == ONE_BY_ONE) then
         kv3_i = kv3_ek - kv3*(nkgroup-1)
         if(kv3_i > kv3) kv3_i = kv3
         ks = max(1,first_kpoint_in_this_job) - 1 + kv3*(nkgroup-1)
      else
         kv3_i = kv3
         ks = 0
      end if

      if(iprieigen>=3 .and. printable) &
           & write(nf,'(" kv3_i, kv3, ks, nkgroup = ",4i8)') kv3_i, kv3, ks, nkgroup
!!$
!!$      call mpi_bcast(kv3_i,1,mpi_integer,0,mpi_comm_group,ierr)
!!$      call mpi_bcast(ks,1,mpi_integer,0,mpi_comm_group,ierr)
    end subroutine set_kv3_i_and_ks

    subroutine wd_k_and_values(mode)
      integer, intent(in) :: mode
      integer :: ik, nb
      integer :: ie_s, ie_e, nhw, neg_t
      real(kind=DP) :: hw, hc, hv
      
#ifndef _DEBUG_WRITE_
      if(writemode == EFERMI_VICINITY) then
         hw = (lzero_max-hconst_min)*0.5d0
         hv = (lzero_max+hconst_min)*0.5d0
         hc = NCOLUMN*0.5d0
         nhw = Int(hw/hc + 1.d0)
         ie_s = max(nint(hv-nhw*hc + DELTA),1)
         ie_e = min(ie_s + nhw*NCOLUMN - 1,neg)
      end if
#endif

      if(mode == EIGEN_VALUES) then
#ifndef _DEBUG_WRITE_
         if(writemode == EFERMI_VICINITY) then
            write(nf,'(" ======  Energy Eigen Values in the vicinity of the Fermi energy level (Range=" &
                 & ,i7," :",i7,") =====")') ie_s, ie_e
         else
#endif
            if(nf==6) write(nf,'(" ======  Energy Eigen Values ======")')
#ifndef _DEBUG_WRITE_
         end if
#endif
      else
#ifndef _DEBUG_WRITE_
         if(writemode == EFERMI_VICINITY) then
            write(nf,'(" ======  Occupations in the vicinity of the Fermi energy level (Range=" &
                 & ,i7," :",i7,") =====")') ie_s, ie_e
         else
#endif
            if(nf==6) write(nf,'(" ======  Occupations ======")')
#ifndef _DEBUG_WRITE_
         end if
#endif
      end if
                                                  __TIMER_IODO_START(1463)
      do ik = 1, kv3_i, ndim_spinor
#ifndef _DEBUG_WRITE_
!!$         if(mode == OCCUPATIONS) e_mpi(:,ik) = e_mpi(:,ik)/(qwgt(ik)*kv3)
!!$         if(mode == OCCUPATIONS) o_mpi(:,ik) = o_mpi(:,ik)/(qwgt(ik)*kv3)
         if(writemode==EFERMI_VICINITY .and. kv3==kv3_i) then
            call wd_k_and_efermi_vicinities(ik,ie_s,ie_e,mode)
         else
#endif
            if(nf /= 6 .and. mode == EIGEN_VALUES) write(nf,'(" ===== energy eigen values =====")')
            if(nf /= 6 .and. mode == OCCUPATIONS)  write(nf,'(" ===== occupations =====")')
            if ( noncol ) then
               call wd_k_points_noncl(ik)
            else
               call wd_k_points(ik)
            endif
            neg_t = neg
            if(neg_is_enlarged) neg_t = neg - num_extra_bands
            if(mode == EIGEN_VALUES) then
               write(nf,'(5f16.8)') (e_mpi(nb,ik),nb = 1, neg_t) ! =eko(neordr(nb,ik),ik)
            else if(mode == OCCUPATIONS) then
               write(nf,'(5f16.8)') (o_mpi(nb,ik)/(qwgt(ik)*kv3/ndim_spinor),nb = 1, neg_t) ! =occup(neordr(nb,ik),ik)
            end if
#ifndef _DEBUG_WRITE_
         end if
#endif
      end do
                                                  __TIMER_IODO_STOP(1463)
    end subroutine wd_k_and_values

    subroutine put_kpartArray_into(a_l,a_all)
      real(kind=DP), intent(in), dimension(np_e,ista_k:iend_k) :: a_l
      real(kind=DP), intent(out), dimension(neg,kv3) :: a_all
      integer :: ik, ierr, ie
      integer :: ito

                                                  __TIMER_IODO_START(1461)
      a_all = 0.d0
      do ik = 1, kv3
         if(map_k(ik) /= myrank_k) cycle
         do ie = 1, neg
            if(map_e(ie) /= myrank_e) cycle
            ito = nrvf_ordr(ie,ik)
            a_all(ito,ik) = a_l(map_z(ie),ik)
         end do
      end do
                                                  __TIMER_IODO_STOP(1461)
      if(npes >= 2) then
         call mpi_allreduce(MPI_IN_PLACE,a_all,neg*kv3,mpi_double_precision,mpi_sum,mpi_comm_group,ierr)
      end if
     end subroutine put_kpartArray_into

    subroutine wd_neordr()
      integer :: ik
      write(nf,'(" kv3 = ",i8, " neg = ",i8)') kv3,neg
      do ik= 1,kv3
         write(nf,'(" map_k(",i3,") = ",i8)') ik,map_k(ik)
         if(map_k(ik) /= myrank_k) cycle
         write(nf,'(" neordr ik=",i8)') ik
         write(nf,'(10i8)') neordr(1:neg,ik)
      end do
    end subroutine wd_neordr

    subroutine get_ipri0(ipri_in, ipri_out)
      integer, intent(in)  :: ipri_in
      integer, intent(out) :: ipri_out
      if(npes > 1) then
         if(mype == 0) ipri_out = ipri_in
         call mpi_bcast(ipri_out,1,mpi_integer,0,mpi_comm_group,ierr)
      else
         ipri_out = ipri_in
      end if
    end subroutine get_ipri0

    subroutine wd_k_points(ik)
      integer, intent(in) :: ik
       if(nspin == 1) then
#ifdef _EIGENVALUES_IN_OLD_FORMAT_
          write(nf,'(i6,3f16.8)') ik+ks,(vkxyz(ik,1:3,BUCS))
#else
          write(nf,'(" ik = ",i4," (",3f10.6," )")') ik+ks,(vkxyz(ik,1:3,BUCS))
#endif
       else
#ifdef _EIGENVALUES_IN_OLD_FORMAT_
          if(mod(ik,2) == 1) then
             write(nf,'(i6,"    UP ",3f16.8)') ik+ks,(vkxyz(ik,1:3,BUCS))
          else
             write(nf,'(i6,"  DOWN ",3f16.8)') ik+ks,(vkxyz(ik,1:3,BUCS))
          end if
#else
          if(mod(ik,2) == 1) then
             write(nf,'(" ik = ",i4," (",3f10.6,")    UP ")') ik+ks,(vkxyz(ik,1:3,BUCS))
          else
             write(nf,'(" ik = ",i4," (",3f10.6,")  DOWN ")') ik+ks,(vkxyz(ik,1:3,BUCS))
          end if
#endif
       end if
     end subroutine wd_k_points

! ============================== added by K. Tagami ==================== 11.0
     subroutine wd_k_points_noncl(ik)
       integer, intent(in) :: ik
#ifdef _EIGENVALUES_IN_OLD_FORMAT_
       write(nf,'(i6,3f16.8)') ik+ks,(vkxyz(ik,1:3,BUCS))
#else
       write(nf,'(" ik = ",i4," (",3f10.6," )")') ik+ks,(vkxyz(ik,1:3,BUCS))
#endif
     end subroutine wd_k_points_noncl
! ====================================================================== 11.0

     subroutine cal_vicinity_range(hconst_min, lzero_max)
       integer, intent(out) :: hconst_min, lzero_max
       integer :: hconst,lzero
       integer :: ik, ie, nb
       integer :: ito

       e_mpi = 0.d0
       do ik = 1, kv3
          if(map_k(ik) /= myrank_k) cycle
          do ie = 1, neg
             if(map_e(ie) /= myrank_e) cycle
             ito = nrvf_ordr(ie,ik)
             e_mpi(ito,ik) = occup_l(map_z(ie),ik)
          end do
       end do
       if(npes >= 2) then
          call mpi_allreduce(MPI_IN_PLACE,e_mpi,neg*kv3,mpi_double_precision &
               &                  ,mpi_sum,mpi_comm_group,ierr) ! MPI
       end if
       hconst_min = neg
       lzero_max = 0
       do ik = 1, kv3, ndim_spinor
          hconst = neg
          lzero  = 1
          do nb = neg,1,-1
             if(e_mpi(nb,ik)<DELTA) lzero = nb
!!$             if(e_mpi(nb,ik)>=qwgt(ik)*kv3-DELTA ) then
             if(e_mpi(nb,ik)>=qwgt(ik)*kv3/ndim_spinor-DELTA ) then
                hconst = nb
                exit
             end if
          end do
          if(hconst < hconst_min) hconst_min = hconst
          if(lzero_max < lzero)   lzero_max = lzero
       end do
       if(hconst_min > lzero_max) hconst_min = max(lzero_max-1,1)

       if(ipri>=2) write(nf,'(" hconst_min,lzero_max = ",2i8)') hconst_min,lzero_max
     end subroutine cal_vicinity_range

     subroutine wd_k_and_efermi_vicinities(ik,ie_s,ie_e,mode)
       integer, intent(in) :: ik, ie_s,ie_e,mode
       integer :: nb, nbloop, ie, ie1, ie2
       if(hconst_min >= 1) then
!!$          nb = lzero_max-hconst_min+1
          nb = ie_e - ie_s + 1
          nbloop = Int((nb-1)/NCOLUMN+1)
          do ie = 1, nbloop
!!$             ie1 = hconst_min+(ie-1)*NCOLUMN
!!$             ie2 = min(hconst_min+ie*NCOLUMN-1,neg)
             ie1 = max(ie_s+(ie-1)*NCOLUMN,1)
             ie2 = min(ie_s+ie*NCOLUMN-1,neg)
             if(ie == 1) then
                if(mode == EIGEN_VALUES) then
                   write(nf,'(" ik = ",i4,"  ",8f12.6)') ik, (e_mpi(nb,ik),nb=ie1,ie2)
                else if(mode == OCCUPATIONS) then
                   write(nf,'(" ik = ",i4,"  ",8f12.6)') ik, &
                        &           (o_mpi(nb,ik)/(qwgt(ik)*kv3/ndim_spinor),nb=ie1,ie2)
                end if
             else
                if(mode == EIGEN_VALUES) then
                   write(nf,'(12x,8f12.6)') (e_mpi(nb,ik),nb=ie1,ie2)
                else if(mode == OCCUPATIONS) then
                   write(nf,'(12x,8f12.6)') &
                        &      (o_mpi(nb,ik)/(qwgt(ik)*kv3/ndim_spinor),nb=ie1,ie2)
                end if
             end if
          end do
       end if
     end subroutine wd_k_and_efermi_vicinities

     subroutine wd_efermi()
       if(nf == 6) write(nf,'(" **** Eigen Values and Occupations ****")')
       write(nf,'(" ** iteration_ionic = ",i8, ", iteration_electronic = ",i8," **")') &
            & iteration_ionic, iteration_electronic
       write(nf,'(" EFermi = ",f16.8)') efermi
       if(sw_fix_total_spin == YES .and. nspin == 2) then
          write(nf,'(" Efermi_spin(1) = ",f16.8, ",  Efermi_spin(2) = ",f16.8)') &
               & efermi_spin(1), efermi_spin(2)
       end if
     end subroutine wd_efermi
  end subroutine m_ESIO_wd_EigenValues

  subroutine m_ESIO_wd_EigenValues_ek(nf,mode)
    integer, intent(in)              :: nf, mode

    real(kind=DP), parameter :: delta = 1.d-12
    real(kind=DP), allocatable, dimension(:) :: eko_t
    integer, allocatable, dimension(:)       :: neordr_t
    integer                     :: ik, ib,jb,ibo,jbo, neg_t

! =========================== added by K. Tagami ================ 11.0
    integer :: ikskip
! =============================================================== 11.0

    allocate(eko_t(neg))
    allocate(neordr_t(neg))
    eko_t = 0; neordr_t = 0

    if(mode == SCF .and. printable) write(nf,'(" ======  Energy Eigen Values ======")')
!!$    do ik = 1, kv3_ek
!!$    do ik = 1, nk_in_the_process

    if(printable) then
! ========================= modified by K. Tagami ============ 11.0
!       write(nf,'(" nk_converged = ",i8)') min(kv3_ek,nk_converged)
!       do ik = 1, kv3_ek
!          call wd_k_points
!       end do
       if ( noncol ) then
          write(nf,'(" nk_converged = ",i8)') min(kv3_ek,nk_converged) /ndim_spinor
          do ik = 1, kv3_ek, ndim_spinor
             call wd_k_points_noncl
          end do
       else
          write(nf,'(" nk_converged = ",i8)') min(kv3_ek,nk_converged)
          do ik = 1, kv3_ek
             call wd_k_points
          end do
       endif
! ============================================================= 11.0

       write(nf,'(" -----")')
    end if

! ====================== added by K. Tagami =================== 11.0
    if ( noncol ) then
       ikskip = ndim_spinor
    else
       ikskip = 1
    endif
! ============================================================ 11.0

! ============================ modified by K. Tagami ============ 11.0
!    do ik = 1, nk_converged
    do ik = 1, nk_converged, ikskip
! =============================================================== 11.0

!       if(sw_ekzaj == OFF .and. ik <= first_kpoint_in_this_job) cycle
       if(ik > kv3_ek) cycle
       if(mode == EK .and. printable) write(nf,'("=== energy_eigen_values ===")')
       eko_t = eko_ek(:,ik)
       if(nspin == 1 .or. (nspin == 2 .and. mod(ik,2) == 1)) &
            & neordr_t(1:neg) = (/(ib,ib=1,neg)/)
       do ib = 1, neg-1
          do jb = ib+1, neg
             ibo = neordr_t(ib)
             jbo = neordr_t(jb)
             if(eko_t(jbo)  < eko_t(ibo)-delta) then        ! MPI
                neordr_t(jb) = ibo
                neordr_t(ib) = jbo
             end if
          end do
       end do
       if(printable) then
! ================================ modified by K. Tagami ========== 11.0
!          call wd_k_points
          if ( noncol ) then
             call wd_k_points_noncl
          else
             call wd_k_points
          endif
! ================================================================= 11.0
          neg_t = neg

          if(neg_is_enlarged) neg_t = neg - num_extra_bands
          if(mode == SCF) then
             write(nf,'(5f16.8)') (eko_t(neordr_t(ib)),ib=1,neg_t)
          else
             write(nf,'(4f18.10)') (eko_t(neordr_t(ib)),ib=1,neg_t)
          end if
       end if
    end do

    deallocate(neordr_t);     deallocate(eko_t)

  contains

    subroutine wd_k_points
      integer :: j, k
      real(kind=DP) :: c1, vkxyz_wk(3)

      if ( sw_band_unfolding == ON ) then
         Do j=1, 3
            c1 = 0.0d0
            Do k=1, 3
               c1 = c1 +altv_refcell(k,j) *vkxyz_ek(ik,k,CARTS)
            End Do
            vkxyz_wk(j) = c1 /PAI2
         End Do
      else
         vkxyz_wk = vkxyz_ek(ik,1:3,BUCS)
      endif

      if(mode == SCF) then
         if(nspin == 1) then
            write(nf,'(i6,3f18.10)') ik, vkxyz_wk(1:3)
         else
            if(mod(ik,2) == 1) then
               write(nf,'(i6,"    UP ",3f18.10)') ik, vkxyz_wk(1:3)
            else
               write(nf,'(i6,"  DOWN ",3f18.10)') ik, vkxyz_wk(1:3)
            end if
         end if
      else
         if(nspin == 1) then
            write(nf,'(" ik = ",i4," (",3f10.6," )")') ik, vkxyz_wk(1:3)
         else
            if(mod(ik,2) == 1) then
               write(nf,'(" ik = ",i4," (",3f10.6,")    UP ")') ik, vkxyz_wk(1:3)
            else
               write(nf,'(" ik = ",i4," (",3f10.6,")  DOWN ")') ik, vkxyz_wk(1:3)
            end if
         end if
      end if

    end subroutine wd_k_points

! ============================== added by K. Tagami ==================== 11.0
    subroutine wd_k_points_noncl
      integer :: j, k
      real(kind=DP) :: c1, vkxyz_wk(3)

      if ( sw_band_unfolding == ON ) then
         Do j=1, 3
            c1 = 0.0d0
            Do k=1, 3
               c1 = c1 +altv_refcell(k,j) *vkxyz_ek(ik,k,CARTS)
            End Do
            vkxyz_wk(j) = c1 /PAI2
         End Do
      else
         vkxyz_wk = vkxyz_ek(ik,1:3,BUCS)
      endif

      if (mode == SCF) then
         write(nf,'(i6,3f18.10)') ik, vkxyz_wk(1:3)
      else
         write(nf,'(" ik = ",i4," (",3f10.6," )")') ik, vkxyz_wk(1:3)
      endif
    end subroutine wd_k_points_noncl
! ====================================================================== 11.0

  end subroutine m_ESIO_wd_EigenValues_ek

! ======================================= modified by K. Tagami ========== 11.0
!  subroutine m_ESIO_wd_vlhxc(nfvlc)
!
  subroutine m_ESIO_wd_vlhxc( nfvlc, ismax )
    integer, intent(in)              :: ismax
! ======================================================================= 11.0

    integer, intent(in)              :: nfvlc
    integer                          :: is, ik, i
    real(DP),allocatable, dimension(:,:,:):: vlhxc_mpi,vlhxc_mpi2

    if(npes >= 2) then

! ========================== modiifed by K. Tagami ============= 11.0
!       allocate(vlhxc_mpi(kgp,kimg,nspin)); vlhxc_mpi = 0.d0  ! MPI
!       allocate(vlhxc_mpi2(kgp,kimg,nspin))

       allocate(vlhxc_mpi(kgp,kimg,ismax));
       allocate(vlhxc_mpi2(kgp,kimg,ismax))
! ============================================================= 11.0
       vlhxc_mpi = 0.0d0;       vlhxc_mpi2 = 0.0d0

! ========================== modiifed by K. Tagami ============= 11.0
!       do is = 1, nspin
       do is = 1, ismax
! ============================================================== 11.0
          do ik = 1, kimg
             do i = ista_kngp, iend_kngp
                vlhxc_mpi(i,ik,is) = vlhxc_l(i,ik,is)
             end do
          end do
       end do

! ========================== modiifed by K. Tagami ====================== 11.0
!       call mpi_allreduce(vlhxc_mpi,vlhxc_mpi2,kgp*kimg*nspin &
!            &     , mpi_double_precision, mpi_sum, mpi_comm_group,ierr)
       call mpi_allreduce( vlhxc_mpi, vlhxc_mpi2, kgp*kimg*ismax, &
            &              mpi_double_precision, mpi_sum, mpi_comm_group, ierr )
! ====================================================================== 11.0

       if (mype == 0) write(nfvlc) vlhxc_mpi2

       deallocate(vlhxc_mpi); deallocate(vlhxc_mpi2)
    else
       write(nfvlc) vlhxc_l
    end if
  end subroutine m_ESIO_wd_vlhxc

  subroutine m_ESIO_rd_WFs(nfout,nfzaj, F_ZAJ_partitioned)
    integer, intent(in) :: nfout, nfzaj
    logical, intent(in) :: F_ZAJ_partitioned
    integer    :: ik,ib,ri, i
    integer    :: id_sname = -1
    integer    :: ierror 
                                                  __TIMER_SUB_START(1372)
    call tstatc0_begin('m_ESIO_rd_WFs ',id_sname)

    if(precision_WFfile==SP) then
       if(ipri >= 1) write(nfout,*) ' !D Reading zaj (single_precision)'
    else
       if(ipri >= 1) write(nfout,*) ' !D Reading zaj (double_precision)'
    end if
    if(precision_WFfile==SP) then
       allocate(wf_l(kg1,kimg)); wf_l = 0.d0
    else
       allocate(wfdp_l(kg1,kimg)); wfdp_l = 0.d0
    end if
    rewind nfzaj
    if(F_ZAJ_partitioned) then
       do ik = ista_k, iend_k, af+1        ! MPI
                                                  __TIMER_IODO_START(1435)
          do ib = ista_e, iend_e, istep_e  ! MPI
             if(ib > neg_previous) cycle
                                                  __TIMER_IODO_START(1436)
           if(precision_WFfile==SP) then
             read(nfzaj) wf_l
                                                  __TIMER_IODO_STOP(1436)
             if(kimg == 1) then
                do i = 1, kg1
                   zaj_l(i,map_z(ib),ik,1) = wf_l(i,1)
                end do
             else if(kimg==2) then
                do i = 1, kg1
                   zaj_l(i,map_z(ib),ik,1) = wf_l(i,1)
                   zaj_l(i,map_z(ib),ik,2) = wf_l(i,2)
                end do
             end if
           else if(precision_WFfile==DP) then
             read(nfzaj) wfdp_l
                                                  __TIMER_IODO_STOP(1436)
             if(kimg == 1) then
                do i = 1, kg1
                   zaj_l(i,map_z(ib),ik,1) = wfdp_l(i,1)
                end do
             else if(kimg==2) then
                do i = 1, kg1
                   zaj_l(i,map_z(ib),ik,1) = wfdp_l(i,1)
                   zaj_l(i,map_z(ib),ik,2) = wfdp_l(i,2)
                end do
             end if

           end if
          end do
                                                  __TIMER_IODO_STOP(1435)
       end do
    else
#ifdef _DEBUG_ESIO_
       if(mype == 0) write(nfout,'("### zaj reading")')
#endif
       do ik = 1, kv3, af+1
                                                  __TIMER_IODO_START(1437)
          do ib = 1, neg_previous
                                                  __TIMER_IODO_START(1438)
! -----------------
           if(precision_WFfile==SP) then
             if(mype == 0) read(nfzaj, end = 9999, err = 9999) wf_l  ! MPI
#ifdef _DEBUG_ESIO_
             if(mype == 0) then
                write(nfout,'(" ik = ",i3, " ib = ",i4)')  ik, ib
                write(nfout,'(8f8.4)') (wf_l(ri,1),ri=1,8)
             end if
#endif
                                                  __TIMER_IODO_STOP(1438)
                                                  __TIMER_IOCOMM_START_w_BARRIER(mpi_comm,1439)
                                                  __TIMER_IOCOMM_STOP(1439)
             if(mype == 0 .and. map_ek(ib,ik) /= 0) then ! MPI
                call mpi_send(wf_l,kg1*kimg,mpi_real,map_ek(ib,ik),1,mpi_comm_group,ierr) ! MPI
             else if(map_ek(ib,ik) == mype .and. map_ek(ib,ik) /= 0) then                  ! MPI
                call mpi_recv(wf_l,kg1*kimg,mpi_real,0,1,mpi_comm_group,istatus,ierr)     ! MPI
             end if
             if(map_ek(ib,ik) == mype) then              ! MPI
                do ri = 1, kimg
                   zaj_l(1:kg1,map_z(ib),ik,ri) = wf_l(1:kg1,ri)  ! MPI
                end do
             end if

! -----------------
           else if(precision_WFfile==DP) then
             if(mype == 0) read(nfzaj, end = 9999, err = 9999) wfdp_l  ! MPI
#ifdef _DEBUG_ESIO_
             if(mype == 0) then
                write(nfout,'(" ik = ",i3, " ib = ",i4)')  ik, ib
                write(nfout,'(8f8.4)') (wfdp_l(ri,1),ri=1,8)
             end if
             call flush(nfout)
#endif
                                                  __TIMER_IODO_STOP(1438)
                                                  __TIMER_IOCOMM_START_w_BARRIER(mpi_comm,1439)
                                                  __TIMER_IOCOMM_STOP(1439)
             if(mype == 0 .and. map_ek(ib,ik) /= 0) then ! MPI
                call mpi_send(wfdp_l,kg1*kimg,mpi_double_precision,map_ek(ib,ik),1,mpi_comm_group,ierr) ! MPI
             else if(map_ek(ib,ik) == mype .and. map_ek(ib,ik) /= 0) then                  ! MPI
                call mpi_recv(wfdp_l,kg1*kimg,mpi_double_precision,0,1,mpi_comm_group,istatus,ierr)     ! MPI
             end if
             if(map_ek(ib,ik) == mype) then              ! MPI
                do ri = 1, kimg
                   zaj_l(1:kg1,map_z(ib),ik,ri) = wfdp_l(1:kg1,ri)  ! MPI
                end do
             end if

           endif
! -----------------
          end do
                                                  __TIMER_IODO_STOP(1437)
       end do
    end if

    if(precision_WFfile==SP) then
       deallocate(wf_l)
    else if(precision_WFfile==DP) then
       deallocate(wfdp_l)
    end if
    call tstatc0_end(id_sname)                      
    return
9999 continue
    ierror = EOF_REACHED
    call phase_error_wo_filename(ierror, nfout, nfzaj, __LINE__, __FILE__)
                                                  __TIMER_SUB_STOP(1372)
  end subroutine m_ESIO_rd_WFs

! ==== EXP_CELLOPT ==== 2015/09/24
  subroutine m_ESIO_import_WFs_prev_cell(nfout,nfzaj, F_ZAJ_partitioned)
    integer, intent(in) :: nfout, nfzaj
    logical, intent(in) :: F_ZAJ_partitioned
    integer    :: ik,ib,ri, i
    integer    :: id_sname = -1
    integer    :: ierror

    call tstatc0_begin('m_ESIO_import_WFs_prev_cell ',id_sname)

    if(precision_WFfile==SP) then
       if(ipri >= 1) write(nfout,*) ' !D Reading zaj (single_precision)'
    else
       if(ipri >= 1) write(nfout,*) ' !D Reading zaj (double_precision)'
    end if
    if(precision_WFfile==SP) then
       allocate(wf_l(kg1_prev,kimg)); wf_l = 0.d0
    else
       allocate(wfdp_l(kg1_prev,kimg)); wfdp_l = 0.d0
    end if
    rewind nfzaj

    zaj_l = 0.0d0

    if(F_ZAJ_partitioned) then
       do ik = ista_k, iend_k, af+1        ! MPI

          do ib = ista_e, iend_e, istep_e  ! MPI
             if(ib > neg_previous) cycle

             if(precision_WFfile==SP) then
                read(nfzaj) wf_l

                if(kimg == 1) then
                   do i = 1, min( kg1, kg1_prev )
                      zaj_l(i,map_z(ib),ik,1) = wf_l(i,1)
                   end do
                else if(kimg==2) then
                   do i = 1, min( kg1, kg1_prev )
                      zaj_l(i,map_z(ib),ik,1) = wf_l(i,1)
                      zaj_l(i,map_z(ib),ik,2) = wf_l(i,2)
                   end do
                end if
             else if(precision_WFfile==DP) then
                read(nfzaj) wfdp_l

                if(kimg == 1) then
                   do i = 1, min( kg1, kg1_prev )
                      zaj_l(i,map_z(ib),ik,1) = wfdp_l(i,1)
                   end do
                else if(kimg==2) then
                   do i = 1, min( kg1, kg1_prev )
                      zaj_l(i,map_z(ib),ik,1) = wfdp_l(i,1)
                      zaj_l(i,map_z(ib),ik,2) = wfdp_l(i,2)
                   end do
                end if

             end if
          end do
       end do
    else

       do ik = 1, kv3, af+1
          do ib = 1, neg_previous
             ! -----------------
             if(precision_WFfile==SP) then
                if(mype == 0) read(nfzaj, end = 9999, err = 9999) wf_l  
                if(mype == 0 .and. map_ek(ib,ik) /= 0) then 
                   call mpi_send(wf_l,kg1_prev*kimg,mpi_real,map_ek(ib,ik),1,mpi_comm_group,ierr) ! MPI
                else if(map_ek(ib,ik) == mype .and. map_ek(ib,ik) /= 0) then
                   call mpi_recv(wf_l,kg1_prev*kimg,mpi_real,0,1,mpi_comm_group,istatus,ierr)     ! MPI
                end if
                if(map_ek(ib,ik) == mype) then              ! MPI
                   do ri = 1, kimg
                      do i = 1, min( kg1, kg1_prev )
                         zaj_l(i,map_z(ib),ik,ri) = wf_l(i,ri)  ! MPI
                      end do
                   end do
                end if

                ! -----------------
             else if(precision_WFfile==DP) then
                if(mype == 0) read(nfzaj, end = 9999, err = 9999) wfdp_l
                if(mype == 0 .and. map_ek(ib,ik) /= 0) then
                   call mpi_send(wfdp_l,kg1_prev*kimg,mpi_double_precision,map_ek(ib,ik),1,mpi_comm_group,ierr) ! MPI
                else if(map_ek(ib,ik) == mype .and. map_ek(ib,ik) /= 0) then
                   call mpi_recv(wfdp_l,kg1_prev*kimg,mpi_double_precision,0,1,mpi_comm_group,istatus,ierr)     ! MPI
                end if
                if(map_ek(ib,ik) == mype) then              ! MPI
                   do i = 1, min( kg1, kg1_prev )
                      zaj_l(i,map_z(ib),ik,ri) = wfdp_l(i,ri)  ! MPI
                   end do
                end if
             endif
             ! -----------------
          end do
       end do
    end if
!
    if(precision_WFfile==SP) then
       deallocate(wf_l)
    else if(precision_WFfile==DP) then
       deallocate(wfdp_l)
    end if
    call tstatc0_end(id_sname)
    return
9999 continue
    ierror = EOF_REACHED
    call phase_error_wo_filename(ierror, nfout, nfzaj, __LINE__, __FILE__)
  end subroutine m_ESIO_import_WFs_prev_cell
! ===================== 2015/09/24

! ==================================== added by K. Tagami =============== 11.0
  subroutine m_ESIO_rd_WFs_import_frm_collin(nfout,nfzaj, F_ZAJ_partitioned)
    integer, intent(in) :: nfout, nfzaj
    logical, intent(in) :: F_ZAJ_partitioned

    integer  neg_to_be_read

    integer    :: id_sname = -1
    call tstatc0_begin('m_ESIO_rd_WFs_import_frm_collin ',id_sname)

    if(precision_WFfile==SP) then
       allocate(wf_l(kg1,kimg))   ;       wf_l = 0
    else
       allocate(wfdp_l(kg1,kimg)) ;       wfdp_l = 0
    end if

    rewind nfzaj
    if(ipri >= 1) write(nfout,*) ' !D Reading zaj'

    if(F_ZAJ_partitioned) then
       write(*,*) &
            & 'Not supported : importing collinear Wfns  when F_ZAJ_partitioned = true'

    else
       neg_to_be_read = neg / 2

       write(nfout,*) '******************************** '
       write(nfout,*) '!! Collinear wavefunctions are used. '
       write(nfout,*) '!! neg_to_be_read is assumed to be ', neg_to_be_read
       write(nfout,*) '******************************** '

       if ( previous_nspin_collinear == 1 ) then
          call case_previous_nspin_eq_1
       else if ( previous_nspin_collinear == 2 ) then
          call case_previous_nspin_eq_2
       endif

    end if

    if(precision_WFfile==SP) then
       deallocate(wf_l)
    else if(precision_WFfile==DP) then
       deallocate(wfdp_l)
    end if
    call tstatc0_end(id_sname)

  contains

    subroutine case_previous_nspin_eq_2
      integer :: ik, ib, ri, i
      integer :: ib_0, is

      do ik = 1, kv3, ndim_spinor
         Do is=1, ndim_spinor
            do ib_0 = 1, previous_nband_collinear
               ib = ( ib_0 -1 )*ndim_spinor + is
               if(precision_WFfile==SP) then
                  if(mype == 0) read(nfzaj) wf_l  

                  if(mype == 0 .and. map_ek(ib,ik) /= 0) then
                     call mpi_send( wf_l, kg1*kimg, mpi_real, map_ek(ib,ik), 1, &
                          &         mpi_comm_group, ierr )
                  else if(map_ek(ib,ik) == mype .and. map_ek(ib,ik) /= 0 ) then
                     call mpi_recv( wf_l, kg1*kimg, mpi_real, 0, 1, &
                          &         mpi_comm_group, istatus, ierr )
                  end if

                  if ( ib_0 > neg_to_be_read ) cycle

                  if(map_ek(ib,ik) == mype) then              ! MPI
                     do ri = 1, kimg
                        zaj_l(1:kg1,map_z(ib),ik+is-1,ri) = wf_l(1:kg1,ri)  ! MPI
                     end do
                  end if
               else if(precision_WFfile==DP) then
                  if(mype == 0) read(nfzaj) wfdp_l  

                  if(mype == 0 .and. map_ek(ib,ik) /= 0) then
                     call mpi_send( wfdp_l, kg1*kimg, mpi_double_precision, map_ek(ib,ik), 1, mpi_comm_group, ierr )
                  else if(map_ek(ib,ik) == mype .and. map_ek(ib,ik) /= 0 ) then
                     call mpi_recv( wfdp_l, kg1*kimg, mpi_double_precision, 0, 1, mpi_comm_group, istatus, ierr )
                  end if

                  if ( ib_0 > neg_to_be_read ) cycle

                  if(map_ek(ib,ik) == mype) then              ! MPI
                     do ri = 1, kimg
                        zaj_l(1:kg1,map_z(ib),ik+is-1,ri) = wfdp_l(1:kg1,ri)  ! MPI
                     end do
                  end if
               end if
            end do
         End Do
      end do
    end subroutine case_previous_nspin_eq_2

    subroutine case_previous_nspin_eq_1
      integer :: ik, ib, ri, i
      integer :: ib_0, is

      do ik = 1, kv3, ndim_spinor
         do ib_0 = 1, previous_nband_collinear
            if(precision_WFfile==SP) then
               if(mype == 0) read(nfzaj) wf_l              ! MPI

               Do is=1, ndim_spinor
                  ib = ( ib_0 -1 )*ndim_spinor + is
                  if(mype == 0 .and. map_ek(ib,ik) /= 0) then ! MPI
                     call mpi_send( wf_l, kg1*kimg, mpi_real, map_ek(ib,ik), 1, &
                          &         mpi_comm_group, ierr ) ! MPI
                  else if(map_ek(ib,ik) == mype .and. map_ek(ib,ik) /= 0 ) then     ! MPI
                     call mpi_recv( wf_l, kg1*kimg, mpi_real, 0, 1, &
                          &         mpi_comm_group, istatus, ierr )     ! MPI
                  end if

                  if ( ib_0 > neg_to_be_read ) cycle

                  if(map_ek(ib,ik) == mype) then              ! MPI
                     do ri = 1, kimg
                        zaj_l(1:kg1,map_z(ib),ik+is-1,ri) = wf_l(1:kg1,ri)  ! MPI
                     end do
                  end if

               end do
            else
               Do is=1, ndim_spinor
                  ib = ( ib_0 -1 )*ndim_spinor + is
                  if(mype == 0 .and. map_ek(ib,ik) /= 0) then
                     call mpi_send( wfdp_l, kg1*kimg, mpi_double_precision, map_ek(ib,ik), 1, mpi_comm_group, ierr )
                  else if(map_ek(ib,ik) == mype .and. map_ek(ib,ik) /= 0 ) then
                     call mpi_recv( wfdp_l, kg1*kimg, mpi_double_precision, 0, 1, mpi_comm_group, istatus, ierr )
                  end if

                  if ( ib_0 > neg_to_be_read ) cycle

                  if(map_ek(ib,ik) == mype) then              ! MPI
                     do ri = 1, kimg
                        zaj_l(1:kg1,map_z(ib),ik+is-1,ri) = wfdp_l(1:kg1,ri)  ! MPI
                     end do
                  end if

               end do
            end if
         End Do
      end do
    end subroutine case_previous_nspin_eq_1

  end subroutine m_ESIO_rd_WFs_import_frm_collin
!===================================================================== 11.0

  subroutine m_ESIO_wd_WFs(nfout,nfzaj,F_ZAJ_partitioned)
    integer, intent(in) :: nfout,nfzaj
    logical, intent(in) :: F_ZAJ_partitioned
    integer :: ik,ib,ri
    integer :: id_sname = -1
    call tstatc0_begin('m_ESIO_wd_WFs ',id_sname)

   if(precision_WFfile==SP) then
    allocate(wf_l(kg1,kimg));    wf_l = 0
    call mpi_barrier(mpi_comm_group,ierr)
    !!$ print *, ' !D Writing zaj '
    if(ipri >= 1) write(nfout,*) ' !D Writing zaj (single_precision)'
    rewind nfzaj
    if(F_ZAJ_partitioned) then
       do ik = ista_k, iend_k, af+1        ! MPI
          do ib = ista_e, iend_e, istep_e  ! MPI
             do ri = 1, kimg
                wf_l(1:kg1,ri) = zaj_l(1:kg1,map_z(ib),ik,ri)
             end do
             write(nfzaj) wf_l
          end do
       end do
    else
#ifdef _DEBUG_ESIO_
       if(mype == 0) write(nfout,'("### zaj writing")')
#endif
       do ik = 1, kv3, af+1
          do ib = 1, neg
             if(map_ek(ib,ik) == mype) then                          ! MPI
                do ri = 1, kimg
                   wf_l(1:kg1,ri) = zaj_l(1:kg1,map_z(ib),ik,ri)
                end do
                if(map_ek(ib,ik) /= 0) &                             ! MPI
                     &   call mpi_send(wf_l,kg1*kimg,mpi_real,0,1,mpi_comm_group,ierr) ! MPI
             else if(mype == 0 .and. map_ek(ib,ik) /= 0) then
                call mpi_recv(wf_l,kg1*kimg,mpi_real,map_ek(ib,ik),1,mpi_comm_group,istatus,ierr)!MPI
             end if
             if(mype == 0) write(nfzaj)  wf_l                        ! MPI
#ifdef _DEBUG_ESIO_
             if(mype == 0) then
                write(nfout,'(" ik = ",i4, " ib = ",i5)')  ik, ib
                write(nfout,'(8f8.4)') (wf_l(ri,1),ri=1,8)
             end if
#endif
          end do
       end do
    end if
    deallocate(wf_l)
   else if(precision_WFfile==DP) then
    allocate(wfdp_l(kg1,kimg));    wfdp_l = 0
    call mpi_barrier(mpi_comm_group,ierr)
    if(ipri >= 1) write(nfout,*) ' !D Writing zaj (double_precision) '
    rewind nfzaj
    if(F_ZAJ_partitioned) then
       do ik = ista_k, iend_k, af+1
          do ib = ista_e, iend_e, istep_e
             do ri = 1, kimg
                wfdp_l(1:kg1,ri) = zaj_l(1:kg1,map_z(ib),ik,ri)
             end do
             write(nfzaj) wfdp_l
          end do
       end do
    else
#ifdef _DEBUG_ESIO_
       if(mype == 0) write(nfout,'("### zaj writing")')
#endif
       do ik = 1, kv3, af+1
          do ib = 1, neg
             if(map_ek(ib,ik) == mype) then
                do ri = 1, kimg
                   wfdp_l(1:kg1,ri) = zaj_l(1:kg1,map_z(ib),ik,ri)
                end do
                if(map_ek(ib,ik) /= 0) &
                     &   call mpi_send(wfdp_l,kg1*kimg,mpi_double_precision,0,1,mpi_comm_group,ierr)
             else if(mype == 0 .and. map_ek(ib,ik) /= 0) then
                call mpi_recv(wfdp_l,kg1*kimg,mpi_double_precision,map_ek(ib,ik),1,mpi_comm_group,istatus,ierr)
             end if
             if(mype == 0) write(nfzaj)  wfdp_l
#ifdef _DEBUG_ESIO_
             if(mype == 0) then
                write(nfout,'(" ik = ",i4, " ib = ",i5)')  ik, ib
                write(nfout,'(8f8.4)') (wfdp_l(ri,1),ri=1,8)
             end if
#endif
          end do
       end do
    end if
    deallocate(wfdp_l)
   end if
    call tstatc0_end(id_sname)
  end subroutine m_ESIO_wd_WFs

  subroutine m_ESIO_wd_WFs_standardout(nfout,ipriwf)
    integer, intent(in) :: nfout,ipriwf
    integer :: ik,ib,ri, i, ic, ipriwf0, icycle, icolumn, max_elements, istart, iend
    integer :: id_sname = -1
    real(kind=DP) :: phase2r, phase2i, phaser,phasei
    complex(kind=CMPLDP) :: exp2theta, exptheta
    call tstatc0_begin('m_ESIO_wd_WFs_stndout ',id_sname)
    
    ipriwf0 = ipriwf
    if(npes > 1) call mpi_bcast(ipriwf0,1,mpi_integer,0,mpi_comm_group,ierr)

    if(ipriwf0 >= 2) then
       icolumn = 10
       if(precision_WFfile==SP) then
          allocate(wf_l(kg1,kimg+3)); wf_l = 0.d0
       else
          allocate(wfdp_l(kg1,kimg+3)); wfdp_l = 0.d0
       end if
       call mpi_barrier(mpi_comm_group,ierr)
       if(mype == 0)  write(nfout,*) ' !wf Writing zaj '

       do ik = 1, kv3, af+1
          max_elements = iba(ik)
          if(mype == 0) write(nfout,'(" !wf   ik = ",i5)') ik
          do ib = 1, neg
             if(mype == 0) write(nfout,'(" !wf   ib = ",i5)') ib
           if(precision_WFfile==SP) then
             if(map_ek(ib,ik) == mype) then                          ! MPI
                do ri = 1, kimg
                   wf_l(1:kg1,ri) = zaj_l(1:kg1,map_z(ib),ik,ri)
                end do
                if(map_ek(ib,ik) /= 0) &                             ! MPI
                     &   call mpi_send(wf_l,kg1*kimg,mpi_real,0,1,mpi_comm_group,ierr) ! MPI
             else if(mype == 0 .and. map_ek(ib,ik) /= 0) then
                call mpi_recv(wf_l,kg1*kimg,mpi_real,map_ek(ib,ik),1,mpi_comm_group,istatus,ierr)!MPI
             end if
           else
             if(map_ek(ib,ik) == mype) then                          ! MPI
                do ri = 1, kimg
                   wfdp_l(1:kg1,ri) = zaj_l(1:kg1,map_z(ib),ik,ri)
                end do
                if(map_ek(ib,ik) /= 0) &                             ! MPI
                     &   call mpi_send(wfdp_l,kg1*kimg,mpi_double_precision,0,1,mpi_comm_group,ierr) ! MPI
             else if(mype == 0 .and. map_ek(ib,ik) /= 0) then
                call mpi_recv(wfdp_l,kg1*kimg,mpi_double_precision,map_ek(ib,ik),1,mpi_comm_group,istatus,ierr)!MPI
             end if
           end if
             if(mype == 0) then
                if(kimg == 2) then
                 if(precision_WFFile==SP) then
                   do i = 1, iba(ik)
                      wf_l(i,3) = wf_l(i,1)**2 + wf_l(i,2)**2
                   end do
                 else
                   do i = 1, iba(ik)
                      wfdp_l(i,3) = wfdp_l(i,1)**2 + wfdp_l(i,2)**2
                   end do
                 end if
                   if(k_symmetry(ik) == GAMMA .or. k_symmetry(ik) == GAMMA_base_symmetrization) then
                    if(precision_WFfile==SP) then
                      phase2r = (wf_l(1,1)**2 - wf_l(1,2)**2)/wf_l(1,3)
                      phase2i = -2.d0*wf_l(1,1)*wf_l(1,2)/wf_l(1,3)
                    else
                      phase2r = (wfdp_l(1,1)**2 - wfdp_l(1,2)**2)/wfdp_l(1,3)
                      phase2i = -2.d0*wfdp_l(1,1)*wfdp_l(1,2)/wfdp_l(1,3)
                    end if
                      exp2theta = cmplx(phase2r, phase2i)
                      exptheta = sqrt(exp2theta)
                      phaser = real(exptheta)
                      phasei = imag(exptheta)
                      write(nfout,'(" !wf exp2theta = ",2d20.8)') exp2theta
                      write(nfout,'(" !wf           = ",2d20.8)') phase2r, phase2i
                      write(nfout,'(" !wf |exp2theta|**2 = ",d20.8)') dsqrt(phase2r**2 + phase2i**2)
                      write(nfout,'(" !wf exptheta  = ",2d20.8)') exptheta
                      write(nfout,'(" !wf |exptheta| = ",d20.8)') abs(exptheta)
                     if(precision_WFfile==SP) then
                      do i = 1, iba(ik)
!!$                      wf_l(i,4) = real(exptheta*cmplx(wf_l(i,1),wf_l(i,2)))
!!$                      wf_l(i,5) = imag(exptheta*cmplx(wf_l(i,1),wf_l(i,2)))
                         wf_l(i,4) = phaser*wf_l(i,1) - phasei*wf_l(i,2)
                         wf_l(i,5) = phaser*wf_l(i,2) + phasei*wf_l(i,1)
                      end do
                     else
                      do i = 1, iba(ik)
                         wfdp_l(i,4) = phaser*wfdp_l(i,1) - phasei*wfdp_l(i,2)
                         wfdp_l(i,5) = phaser*wfdp_l(i,2) + phasei*wfdp_l(i,1)
                      end do
                     end if
                   end if
                else
                 if(precision_WFfile==SP) then
                   do i = 1, iba(ik)
                      wf_l(i,2) = wf_l(i,1)**2
                   end do
                 else
                   do i = 1, iba(ik)
                      wfdp_l(i,2) = wfdp_l(i,1)**2
                   end do
                 end if
                   exp2theta = 1.d0
                   exptheta = 1.d0
                end if
                icycle = ceiling(dble(min(max_elements,iba(ik)))/icolumn)
                istart = 1
                do ic = 1, icycle
                   iend = min(istart+icolumn-1,max_elements,iba(ik))
                   write(nfout,'(" !wf (nx)    ",10i10)') (ngabc(nbase(i,ik),1),i=istart,iend)
                   write(nfout,'(" !wf (ny)    ",10i10)') (ngabc(nbase(i,ik),2),i=istart,iend)
                   write(nfout,'(" !wf (nz)    ",10i10)') (ngabc(nbase(i,ik),3),i=istart,iend)
                 if(precision_WFfile==SP) then
                   write(nfout,'(" !wf (zaj-r) ",10d10.2)') (wf_l(i,1),i=istart,iend)
                   if(kimg == 2) write(nfout,'(" !wf (zaj-i) ",10d10.2)') (wf_l(i,2),i=istart,iend)
                   write(nfout,'(" !wf abs     ",10d10.2)') (wf_l(i,kimg+1),i=istart,iend)
                   if(kimg == 2) then
                      if(k_symmetry(ik) == GAMMA .or. k_symmetry(ik) == GAMMA_base_symmetrization) then
                         write(nfout,'(" !wf (zaj-r)d",10d10.2)') (wf_l(i,4),i=istart,iend)
                         write(nfout,'(" !wf (zaj-i)d",10d10.2)') (wf_l(i,5),i=istart,iend)
                      end if
                   end if
                 else
                   write(nfout,'(" !wf (zaj-r) ",10d10.2)') (wfdp_l(i,1),i=istart,iend)
                   if(kimg == 2) write(nfout,'(" !wf (zaj-i) ",10d10.2)') (wfdp_l(i,2),i=istart,iend)
                   write(nfout,'(" !wf abs     ",10d10.2)') (wfdp_l(i,kimg+1),i=istart,iend)
                   if(kimg == 2) then
                      if(k_symmetry(ik) == GAMMA .or. k_symmetry(ik) == GAMMA_base_symmetrization) then
                         write(nfout,'(" !wf (zaj-r)d",10d10.2)') (wfdp_l(i,4),i=istart,iend)
                         write(nfout,'(" !wf (zaj-i)d",10d10.2)') (wfdp_l(i,5),i=istart,iend)
                      end if
                   end if
                 end if
                   istart = iend+1
                end do
             end if
          end do
       end do
      if(precision_WFfile==SP) then
       deallocate(wf_l)
      else
       deallocate(wfdp_l)
      end if
    end if
    call tstatc0_end(id_sname)
  end subroutine m_ESIO_wd_WFs_standardout

  subroutine m_ESIO_rd_WFs_and_EVs_ek(nfout,nf)
    integer, intent(in) :: nfout,nf
    integer  :: ik, ie, iks, ri, ikg, ikt, ike, ike2
    integer, allocatable, dimension(:,:) :: n_mpi, n2_mpi  ! MPI
    real(DP),allocatable, dimension(:,:) :: e_mpi, e2_mpi  ! MPI
!!$    read(nf) neordr,nrvf_ordr,eko_l,occup_l,efermi,totch

    allocate(n_mpi(neg,nspin)); allocate(n2_mpi(neg,nspin)) ! MPI
    allocate(e_mpi(neg,nspin)); allocate(e2_mpi(neg,nspin)) ! MPI
    allocate(wf_l(kg1,kimg))

    n_mpi =0; n2_mpi = 0
    e_mpi =0; e2_mpi = 0; wf_l = 0
    if(ipri >= 1) write(nfout,*) ' !D Reading zaj'

    rewind nf

    eko_ek = 0.d0
!!$    do ik = 1, nk_in_the_process-kv3, kv3
    do ik = 1, nk_in_the_process - nspin, nspin
       if(ipri >= 1) write(nfout,*) ' !D     skipping ik = ', ik
!!$       do iks = 1, kv3, af+1
       do iks = 1, nspin, af+1
          do ie = 1, neg
             if(mype == 0) read(nf) wf_l
          end do
       end do
       if(mype == 0) read(nf) n_mpi
       if(mype == 0) read(nf) n2_mpi
       if(mype == 0) read(nf) e_mpi

       if(mype == 0) then
!!$          do iks = 1, kv3
          do iks = 1, nspin
             do ie = 1, neg
                eko_ek(ie,ik+iks-1) = e_mpi(n_mpi(ie,iks),iks)
             end do
          end do
          if(ipri >= 3) then
!!$             do iks=1,kv3
             do iks=1,nspin
                write(nfout,'(" ik = ",i5)') ik+iks-1
                write(nfout,'(8f8.4)') (e_mpi(n_mpi(ie,iks),iks),ie=1,neg)
             end do
          end if
       end if
    end do
       
    if(nk_in_the_process > kv3_ek) goto 1001

!!$    do ik = 1, kv3, af+1
    KPOINT_LOOP: do ikg = 1, nrank_k
       do ikt = 1, nspin, af+1
          ik = (ikg-1)*nspin+ikt
          if(nk_in_the_process -1 + ik > kv3_ek) exit KPOINT_LOOP
          if(nk_in_the_process -1 + ik > numk_zajsaved) exit KPOINT_LOOP
          if(ipri>=1) write(nfout,*) ' !D     reading  ik = ', ik+first_kpoint_in_this_job-1

          do ie = 1, neg
             if(ipri>=2) write(nfout,*) ' !D     ie = ', ie
             if(mype == 0) read(nf,err=2,end=2) wf_l     ! MPI
             if(mype == 0 .and. map_ek(ie,ik) /= 0) then ! MPI
                call mpi_send(wf_l,kg1*kimg,mpi_real,map_ek(ie,ik),1,mpi_comm_group,ierr) ! MPI
             else if(map_ek(ie,ik) == mype .and. map_ek(ie,ik) /= 0) then                  ! MPI
                call mpi_recv(wf_l,kg1*kimg,mpi_real,0,1,mpi_comm_group,istatus,ierr)     ! MPI
             end if
             if(map_ek(ie,ik) == mype) then              ! MPI
                do ri = 1, kimg
                   zaj_l(1:kg1,map_z(ie),ik,ri) = wf_l(1:kg1,ri)  ! MPI
                end do
             end if
          end do
       end do

       if(mype == 0) read(nf) n_mpi                ! MPI
       if(mype == 0) read(nf) n2_mpi               ! MPI
       call mpi_bcast(n_mpi,neg*nspin,mpi_integer,0,mpi_comm_group,ierr) ! MPI
       call mpi_bcast(n2_mpi,neg*nspin,mpi_integer,0,mpi_comm_group,ierr)! MPI

       do ikt = 1, nspin                             ! MPI
          ik = (ikg-1)*nspin + ikt
          if(map_k(ik) == myrank_k) then
             neordr(1:neg,ik) = n_mpi(1:neg,ikt)     ! MPI
             nrvf_ordr(1:neg,ik) = n2_mpi(1:neg,ikt) ! MPI
          end if
       end do                                        ! MPI

       if(mype == 0) read(nf) e_mpi                ! MPI
       call mpi_bcast(e_mpi,neg*nspin,mpi_double_precision,0,mpi_comm_group,ierr) ! MPI

       do ikt = 1, nspin                                   ! MPI
          ik = (ikg-1)*nspin + ikt
          do ie = 1, neg                                   ! MPI
             if(map_ek(ie,ik) == mype) then                ! MPI
                eko_l(map_z(ie),ik) = e_mpi(ie,ikt)        ! MPI
             end if                                        ! MPI
          end do                                           ! MPI
          do ie = 1, neg
             eko_ek(ie,nk_in_the_process+ik-1) = e_mpi(n_mpi(ie,ikt),ikt)
          end do
       end do                                              ! MPI
       goto 3
2      continue
       stop ' eof from nf <<m_ESIO_rd_WFs_and_EVs_ek>>'
3      continue
    end do KPOINT_LOOP

    if(ipri>=2) write(nfout,*) ' !D     ikg  = ', ikg
!!$    if(ikg <= 1) stop ' ikg <= 1 <<m_ESIO_rd_WFs_and_EVs_ek>>'
    if(ikg < nrank_k .and. ikg > 1 ) then
       do ike = ikg, nrank_k
          if(ipri>=2) write(nfout,*) ' !D     ike  = ', ike
          if(ipri>=2) write(nfout,*) ' !D     zaj_l'
          do ikt = 1, nspin, af+1
             ik   = (ikg-2)*nspin+ikt
             ike2 = (ike-1)*nspin+ikt
             if(ipri>=2) write(nfout,*) ' !D     ik, ike2 = ',ik,ike2
             do ie = 1, neg
                if(map_ek(ie,ik) == mype) then
                   do ri = 1, kimg
                      wf_l(1:kg1,ri) = zaj_l(1:kg1,map_z(ie),ik,ri)
                   end do
                end if

                if(map_ek(ie,ik) == mype) then
                   call mpi_send(wf_l,kg1*kimg,mpi_real,map_ek(ie,ike2),1,mpi_comm_group,ierr)
                else if(map_ek(ie,ike2) == mype) then
                   call mpi_recv(wf_l,kg1*kimg,mpi_real,map_ek(ie,ik),1,mpi_comm_group,istatus,ierr)
                end if
                if(map_ek(ie,ike2) == mype) then
                   do ri = 1, kimg
                      zaj_l(1:kg1,map_z(ie),ike2,ri) = wf_l(1:kg1,ri)
                   end do
                end if
             end do
          end do

          ! ---> neordr, nrvf_ordr
          do ikt = 1, nspin
             ik = (ikg-2)*nspin+ikt
             if(map_k(ik) == myrank_k) then
                n_mpi(1:neg,ikt) = neordr(1:neg,ik)
                n2_mpi(1:neg,ikt) = nrvf_ordr(1:neg,ik)
             end if
          end do
          call mpi_bcast(n_mpi,neg*nspin,mpi_integer,map_k(ik),mpi_comm_group,ierr)
          call mpi_bcast(n2_mpi,neg*nspin,mpi_integer,map_k(ik),mpi_comm_group,ierr)

          if(ipri>=2) write(nfout,*) ' !D     neordr and nrvf_ordr'
          do ikt = 1, nspin
             ike2 = (ike-1)*nspin+ikt
             if(map_k(ike2) == myrank_k) then
                neordr(1:neg,ike2) = n_mpi(1:neg,ikt)     ! MPI
                nrvf_ordr(1:neg,ike2) = n2_mpi(1:neg,ikt) ! MPI
             end if
          end do

          ! ---> eko_l
          if(ipri>=2) write(nfout,*) ' !D     eko_l'
          e_mpi = 0.d0
          do ikt = 1, nspin
             ik = (ikg-2)*nspin+ikt
             if(ipri>=2) write(nfout,'(" !D eko_l ik = ",i6)') ik
             do ie = 1, neg
                if(map_ek(ie,ik) == mype) then
                   e_mpi(ie,ikt) = eko_l(map_z(ie),ik)
                end if
             end do
          end do
          if(npes >= 2) then
             call mpi_allreduce(e_mpi,e2_mpi,neg*nspin,mpi_double_precision &
                  & ,mpi_sum,mpi_comm_group,ierr)
          else
             e2_mpi  = e_mpi
          end if

          if(ipri>=2) then
             write(nfout,'(" <<m_ESIO_rd_WFs_and_EVs_ek>>")')
             do ikt = 1, nspin
                ik = (ikg-2)*nspin+ikt
                write(nfout,'(" ik = ",i5)') ik
                write(nfout,'(10f8.4)') (e2_mpi(ie,ikt),ie=1,neg)
             end do
          end if

          do ikt = 1, nspin
             ike2 = (ike-1)*nspin+ikt
             if(ipri>=2) write(nfout,'(" !D eko_l ike2 = ",i6)') ike2
             do ie = 1, neg
                if(map_ek(ie,ike2) == mype) then
                   eko_l(map_z(ie),ike2) = e2_mpi(ie,ikt)
                end if
             end do
          end do

       end do
    end if

1001 continue

    if(npes >= 2) &
         & call mpi_bcast(eko_ek,neg*kv3_ek,mpi_double_precision,0,mpi_comm_group,ierr)

    if(nk_in_the_process > kv3_ek) goto 1002
    call m_ESIO_wd_EigenValues(nfout,2,nooccupation=YES)

1002 continue

!!$    ik = nk_in_the_process
!!$    do iks = 1, kv3
!!$          do ie = 1, neg
!!$             eko_ek(ie,ik+iks-1) = e_mpi(n_mpi(ie,iks),iks)
!!$          end do
!!$       end do
!!$    end if
!!$
    if(ipri >= 1) then
       write(nfout,'(" <<m_ESIO_rd_WFs_and_EVs_ek>>")')
       do iks = 1, kv3_ek
          if(iks > numk_zajsaved) cycle
          write(nfout,'(" ik = ",i5)') iks
          write(nfout,'(10f8.4)') (eko_ek(ie,iks),ie=1,neg)
       end do
    end if


    rewind nf

    do ik = 1, nk_in_the_process-nspin, nspin
       do iks = 1, nspin, af+1
          do ie = 1, neg
             if(mype == 0) read(nf) wf_l                 ! MPI
          end do
       end do
       if(mype == 0) read(nf) n_mpi                   ! MPI
       if(mype == 0) read(nf) n2_mpi                  ! MPI
       if(mype == 0) read(nf) e_mpi                   ! MPI
    end do

    deallocate(wf_l)                                    ! MPI
    deallocate(n_mpi); deallocate(n2_mpi)               ! MPI
    deallocate(e_mpi); deallocate(e2_mpi)               ! MPI

!!$    write(nfout,'(" ---<< m_ESIO_rd_WFs_and_EVs_ek>>---")')
  end subroutine m_ESIO_rd_WFs_and_EVs_ek

  subroutine m_ESIO_rd_EVs_ek(nfout,nf)
    integer, intent(in) :: nfout,nf
    integer  :: ik, ie, iks
    integer, allocatable, dimension(:,:) :: n_mpi, n2_mpi  ! MPI
    real(DP),allocatable, dimension(:,:) :: e_mpi, e2_mpi  ! MPI

    allocate(n_mpi(neg,nspin)); allocate(n2_mpi(neg,nspin)) ! MPI
    allocate(e_mpi(neg,nspin)); allocate(e2_mpi(neg,nspin)) ! MPI
    allocate(wf_l(kg1,kimg))
    n_mpi =0; n2_mpi = 0
    e_mpi =0; e2_mpi = 0; wf_l = 0

    if(ipri >= 1) write(nfout,*) ' !D Reading zaj <<m_ESIO_rd_EVs_ek>>'

    rewind nf

    eko_ek = 0.d0
!!$    do ik = 1, nk_in_the_process-kv3, kv3
    do ik = 1, nk_in_the_process - nspin, nspin
       if(ipri >= 1) write(nfout,*) ' !D     skipping ik = ', ik
!!$       do iks = 1, kv3, af+1
       do iks = 1, nspin, af+1
          do ie = 1, neg
             if(mype == 0) read(nf) wf_l
          end do
       end do
       if(mype == 0) read(nf) n_mpi
       if(mype == 0) read(nf) n2_mpi
       if(mype == 0) read(nf) e_mpi

       if(mype == 0) then
          do iks = 1, nspin
             do ie = 1, neg
                eko_ek(ie,ik+iks-1) = e_mpi(n_mpi(ie,iks),iks)
             end do
          end do
          if(ipri >= 3) then
             do iks=1,nspin
                write(nfout,'(" ik = ",i5)') ik+iks-1
                write(nfout,'(8f8.4)') (e_mpi(n_mpi(ie,iks),iks),ie=1,neg)
             end do
          end if
       end if
    end do
       
    write(nfout,'(" ---<< m_ESIO_rd_EVs_ek>>---")')
  end subroutine m_ESIO_rd_EVs_ek

  subroutine m_ESIO_wd_Psicoef(ipri,nfout,nf)
    integer, intent(in) :: ipri,nfout, nf

    integer, parameter :: Ncol = 5
    integer :: ik, ie, ri,  nel, ig, ib,ib1,ib2,ibt,ibsize
    integer, allocatable, dimension(:) :: n_mpi
    real(DP),allocatable, dimension(:) :: e_mpi
    real(DP),allocatable, dimension(:,:,:) :: wf

    integer :: id_sname = -1
    call tstatc0_begin('m_ESIO_wd_Psicoef ',id_sname)

    allocate(e_mpi(neg)); e_mpi = 0.d0
    allocate(n_mpi(neg)); n_mpi = 0

    if(mype == 0) write(nf,'(" !!COEFFICIENTS of WAVE functions")')

    KPOINT: do ik = 1, kv3, af+1
       nel = min(Nw_Psicoef,iba(ik))
       allocate(wf_l(nel,kimg)); wf_l = 0.d0
       call wd_k_points()
       e_mpi = 0.d0
       n_mpi = 0
       if(map_k(ik) == myrank_k) then
!!$          if(ipri>=1) then
          if(mype == 0) then
             write(nfout,'(" ik = ", i4, " neordr ")') ik
             write(nfout,'(8i8)') (neordr(ie,ik),ie=1,neg)
          end if
          do ie = 1, neg
!!$             if(map_ek(ie,ik) == mype) n_mpi(ie) = neordr(ie,ik)
             n_mpi(ie) = neordr(ie,ik)
             if(map_e(ie) /= myrank_e) cycle
             e_mpi(ie) = eko_l(map_z(ie),ik)
          end do
       end if
       if(npes>=2) call mpi_allreduce(MPI_IN_PLACE,e_mpi,neg,mpi_double_precision,mpi_sum,mpi_comm_group,ierr)
       if(nrank_k>=2) then
          if(map_ek(1,ik) == mype .and. map_ek(1,ik) /= 0) then
             call mpi_send(n_mpi,neg,mpi_integer,0,1,mpi_comm_group,ierr)
          else if(mype == 0 .and. map_ek(1,ik) /= 0) then
             call mpi_recv(n_mpi,neg,mpi_integer,map_ek(1,ik),1,mpi_comm_group,istatus,ierr)
          end if
!!$          call mpi_allreduce(MPI_IN_PLACE,n_mpi,neg,mpi_integer,mpi_sum,mpi_comm_group,ierr)
       end if
       do ie = 1, neg, Ncol
          ib1 = ie; ib2 = min(ie+Ncol-1,neg)
          ibsize = ib2-ib1+1
          if(mype == 0) then
             allocate(wf(nel,kimg,ibsize)); wf=0.d0
          end if
          do ib = ib1, ib2
!!$             ibt = n_mpi(ib)
             ibt = ib
             if(map_ek(ibt,ik) == mype) then                          ! MPI
                do ri = 1, kimg
                   wf_l(1:nel,ri) = zaj_l(1:nel,map_z(ibt),ik,ri)
                end do
                if(map_ek(ibt,ik) /= 0) &                             ! MPI
                     &   call mpi_send(wf_l,nel*kimg,mpi_real,0,1,mpi_comm_group,ierr) ! MPI
             else if(mype == 0 .and. map_ek(ibt,ik) /= 0) then
                call mpi_recv(wf_l,nel*kimg,mpi_real,map_ek(ibt,ik),1,mpi_comm_group,istatus,ierr)!MPI
             end if
             if(mype == 0) wf(:,:,ib-ib1+1) = wf_l(:,:)
          end do
          call wd_eko(ib1,ib2)
!!$          if(ik <= 2) then
             do ig = 1, nel
                call wd_coef(ibsize,ig)
             end do
!!$          end if
          if(mype == 0) deallocate(wf)
       end do
       deallocate(wf_l)
    end do KPOINT

    call tstatc0_end(id_sname)
  contains
    subroutine wd_coef(ibsize,ig)
      integer, intent(in) :: ibsize,ig
      integer :: ib
!!$      character(3) :: nan
      if(mype == 0) then
         if(kimg==2) then
            write(nf,'(i4," ( ",3i4," )", 5(" (",2f11.5," )"))') &
                 & ig, ngabc(nbase(ig,ik),1:3),(wf(ig,1,ib),wf(ig,2,ib),ib=1,ibsize)
!!$                 & ig, ngabc(nbase(ig,ik),1:3),(wf(ig,1,ie),wf(ig,2,ie),ie=1,3)
         else
!!$            nan="---"
            write(nf,'(i4," ( ",3i4," )", 5(" (",f11.5,4x,"---",4x," )"))') &
                 & ig, ngabc(nbase(ig,ik),1:3),(wf(ig,1,ib),ib=1,ibsize)
!!$            write(nf,'(i4," ( ",3i4," )", 5f11.5)') &
!!$                 & ig, ngabc(nbase(ig,ik),1:3),(wf(ig,1,ie),ie=1,ibsize)
         end if
      end if
    end subroutine wd_coef

    subroutine wd_eko(ib1,ib2)
      integer, intent(in) :: ib1,ib2
      integer :: ie
      if(mype == 0) then
         write(nf,'(a12,5x,a5,5(i7,2x,f10.5,7x))') "ig", "\ e: ", (ie,e_mpi(n_mpi(ie)),ie=ib1,ib2)
      end if
    end subroutine wd_eko

    subroutine wd_k_points()
      if(mype == 0) then
         if(nspin == 1) then
            write(nf,'(" ik = ",i6,"    ( ",3f14.6," )")') ik,(vkxyz(ik,1:3,BUCS))
         else
            if(mod(ik,2) == 1) then
               write(nf,'(" ik = ",i6,"    UP ","    ( ",3f14.6," )")') ik,(vkxyz(ik,1:3,BUCS))
            else
               write(nf,'(" ik = ",i6,"  DOWN ","    ( ",3f14.6," )")') ik,(vkxyz(ik,1:3,BUCS))
            end if
         end if
      end if
    end subroutine wd_k_points
  end subroutine m_ESIO_wd_Psicoef

  subroutine m_ESIO_wd_BandSymInput(ipri,nfout,nf)
    integer, intent(in) :: ipri,nfout, nf

    integer :: ik, ie, ri,  nel, ig, ib,ib1,ib2,ibt,ibsize
    integer, allocatable, dimension(:) :: n_mpi
    real(DP),allocatable, dimension(:) :: e_mpi

    integer :: id_sname = -1
    call tstatc0_begin('m_ESIO_wd_BandSymInput ',id_sname)

    allocate(e_mpi(neg)); e_mpi = 0.d0
    allocate(n_mpi(neg)); n_mpi = 0

    if(mype == 0) then
       write(nf,'("##PSIINPSTART")')
       write(nf,'("#MAGNETIC_STATE")')
       if(nspin == 2 .and. af == 0) then
          write(nf,'("2  0 ! nspin = 2, af = 0 (FERRO)"/"#")')
       else if(nspin == 2 .and. af == 1) then
          write(nf,'("2  1 ! nspin = 2, af = 1 (ANTIFERRO)"/"#")')
       else if(nspin == 1) then
          write(nf,'("1  0 ! nspin = 1, af = 0 (PARAMAGNETIC)"/"#")')
       else
          write(nf,'(2i5," ! magnetic state = unknown"/"#")') nspin, af
       end if
    end if

    KPOINT: do ik = 1, kv3, af+1

       nel = min(Nw_Psicoef,iba(ik))
       allocate(wf_l(nel,kimg)); wf_l = 0.d0
       call wd_k_points()
       ! --- Eigen Energies --->
       e_mpi = 0.d0
       n_mpi = 0
       if(map_k(ik) == myrank_k) then
          if(ipri>=1) then
             write(nfout,'(" ik = ", i4, " neordr ")') ik
             write(nfout,'(8i8)') (neordr(ie,ik),ie=1,neg)
          end if
          do ie = 1, neg
!!$             if(map_ek(ie,ik) == mype) n_mpi(ie) = neordr(ie,ik)
             n_mpi(ie) = neordr(ie,ik)
             if(map_e(ie) /= myrank_e) cycle
             e_mpi(ie) = eko_l(map_z(ie),ik)
          end do
       end if
       if(npes>=2) call mpi_allreduce(MPI_IN_PLACE,e_mpi,neg,mpi_double_precision,mpi_sum,mpi_comm_group,ierr)
       if(nrank_k>=2) then
          if(map_ek(1,ik) == mype .and. map_ek(1,ik) /= 0) then
             call mpi_send(n_mpi,neg,mpi_integer,0,1,mpi_comm_group,ierr)
          else if(mype == 0 .and. map_ek(1,ik) /= 0) then
             call mpi_recv(n_mpi,neg,mpi_integer,map_ek(1,ik),1,mpi_comm_group,istatus,ierr)
          end if
!!$          call mpi_allreduce(MPI_IN_PLACE,n_mpi,neg,mpi_integer,mpi_sum,mpi_comm_group,ierr)
       end if
       call wd_eko(1, neg)
       ! <--- 
       if(mype == 0) write(nf,'("#Number_of_Gvectors"/,i0,/,"#")') nel
       call wd_gvectors(ik)

       ! --- Wave Function Coefficients --->
      if(mype == 0)  write(nf,'("#PSICOEF")')
       do ie = 1, neg
          if(mype == 0)  write(nf,'(i0,"  #EIGEN_ENERGY NUMBER")') ie
          if(map_ek(ie,ik) == mype) then
             wf_l(1:nel,1:kimg) = zaj_l(1:nel,map_z(ie),ik,1:kimg)
             if(map_ek(ie,ik) /= 0) &
                  &   call mpi_send(wf_l,nel*kimg,mpi_real,0,1,mpi_comm_group,ierr) ! MPI
          else if(mype == 0 .and. map_ek(ie,ik) /= 0) then
             call mpi_recv(wf_l,nel*kimg,mpi_real,map_ek(ie,ik),1,mpi_comm_group,istatus,ierr)!MPI
          end if
          call wd_coef2()
       end do
       if(mype == 0) write(nf,'("#")')
       deallocate(wf_l)
    end do KPOINT
    if(mype == 0) write(nf,'("##")')

    call tstatc0_end(id_sname)
  contains
    subroutine wd_coef2()
      integer :: ig
      if(mype == 0) then
         if(kimg==2) then
            write(nf,'(8f11.5)') (wf_l(ig,1),wf_l(ig,2),ig=1,nel)
         else
            write(nf,'(8f11.5)') (wf_l(ig,1), 0.d0, ig=1,nel)
         end if
      end if
    end subroutine wd_coef2

    subroutine wd_gvectors(ik)
      integer, intent(in) :: ik
      integer :: i
      if(mype == 0) then
         write(nf,'("#GVECTOR")')
         do i = 1, nel
            write(nf,'(i0,1x,3i6)') i,ngabc(nbase(i,ik),1:3)
         end do
         write(nf,'("#")')
      end if
    end subroutine wd_gvectors

    subroutine wd_nelements()
      if(mype == 0) write(nf,'("#Number_of_Gvectors"/,i0,/,"#")') nel
    end subroutine wd_nelements

    subroutine wd_eko(ib1,ib2)
      integer, intent(in) :: ib1,ib2
      integer :: ie
      if(mype == 0) then
         write(nf,'("#EIGEN_ENERGY")')
         do ie = ib1,ib2
            write(nf,'(i0,2x,f10.5)') ie,e_mpi(n_mpi(ie))
         end do
         write(nf,'("#")')
      end if
    end subroutine wd_eko

    subroutine wd_k_points()
      character(len=4),dimension(3) :: spinstate
      data spinstate/"    ","  UP","DOWN"/
      integer :: ip
      if(mype == 0) then
         if(nspin == 1) then
            ip = 1
         else
            if(mod(ik,2) == 1) then
               ip = 2
            else
               ip = 3
            end if
         end if
         write(nf,'("#KPOINT"/,i06,3f14.6,2x,a4,/"#")') ik,(vkxyz(ik,1:3,BUCS)),spinstate(ip)
      end if
    end subroutine wd_k_points
  end subroutine m_ESIO_wd_BandSymInput

  subroutine m_ESIO_wd_WFs_and_EVs_ek(nfout,nf)
    integer, intent(in) :: nfout, nf
    integer  :: ik, ie, ri, ikg, ikt
    integer, allocatable, dimension(:,:) :: n_mpi
    real(DP),allocatable, dimension(:,:) :: e_mpi
    integer :: id_sname = -1
    call tstatc0_begin('m_ESIO_wd_WFs_and_EVs_ek ',id_sname)

    if(precision_WFfile==SP) then
       allocate(wf_l(kg1,kimg));    wf_l = 0
    else
       allocate(wfdp_l(kg1,kimg));    wfdp_l = 0
    end if

    call mpi_barrier(mpi_comm_group,ierr)
    if(ipri>=2) write(nfout,'(" !D Writing WaveFunctions ")')

    allocate(n_mpi(neg,nspin)); n_mpi = 0
    allocate(e_mpi(neg,nspin)); e_mpi = 0.d0

    KPOINT: do ikg = 1, nrank_k
       ! ---> zaj_l
       if((ikg-1)*nspin + 1 > kv3) then
          if(ipri >= 1) write(nfout,'(" !D ik > kv3")')
          exit KPOINT
       end if

       do ikt = 1, nspin, af+1
          ik = (ikg-1)*nspin + ikt
          if(ipri>=1) write(nfout,'(" !D Writing WaveFunctions ik = ",i5)') ik
          do ie = 1, neg
           if(precision_WFfile==SP) then
             if(map_ek(ie,ik) == mype) then                          ! MPI
                do ri = 1, kimg
                   wf_l(1:kg1,ri) = zaj_l(1:kg1,map_z(ie),ik,ri)
                end do
                if(map_ek(ie,ik) /= 0) &                             ! MPI
                     &   call mpi_send(wf_l,kg1*kimg,mpi_real,0,1,mpi_comm_group,ierr) ! MPI
             else if(mype == 0 .and. map_ek(ie,ik) /= 0) then
                call mpi_recv(wf_l,kg1*kimg,mpi_real,map_ek(ie,ik),1,mpi_comm_group,istatus,ierr)!MPI
             end if
             if(mype == 0) write(nf)  wf_l                        ! MPI
           else if(precision_WFfile==DP) then
             if(map_ek(ie,ik) == mype) then
                do ri = 1, kimg
                   wfdp_l(1:kg1,ri) = zaj_l(1:kg1,map_z(ie),ik,ri)
                end do
                if(map_ek(ie,ik) /= 0) &  
                     &   call mpi_send(wfdp_l,kg1*kimg,mpi_double_precision,0,1,mpi_comm_group,ierr)
             else if(mype == 0 .and. map_ek(ie,ik) /= 0) then
                call mpi_recv(wfdp_l,kg1*kimg,mpi_double_precision,map_ek(ie,ik),1,mpi_comm_group,istatus,ierr)
             end if
             if(mype == 0) write(nf)  wfdp_l
           end if
          end do
       end do

       ! --->  neordr
       n_mpi = 0                                          ! MPI
       do ikt = 1, nspin                                  ! MPI
          ik = (ikg-1)*nspin + ikt
          if(map_k(ik) /= myrank_k) cycle                 ! MPI
          n_mpi(1:neg,ikt) = neordr(1:neg,ik)             ! MPI
       end do                                             ! MPI
       if(npes >= 2) then
          call mpi_allreduce(MPI_IN_PLACE,n_mpi,neg*nspin,mpi_integer,mpi_sum &
               &                      ,mpi_comm_group,ierr)  ! MPI
          n_mpi = n_mpi/nrank_e
       end if
       if(ipri>=2) write(nfout,'(" !D Writing neordr ik = ",i5)') ik
       if(mype == 0) write(nf) n_mpi             ! MPI ; writing (neordr)

       ! --->  nrvf_ordr
       n_mpi = 0                                          ! MPI
       do ikt = 1, nspin                                  ! MPI
          ik = (ikg-1)*nspin + ikt
          if(map_k(ik) /= myrank_k) cycle                 ! MPI
          n_mpi(1:neg,ikt) = nrvf_ordr(1:neg,ik)          ! MPI
       end do                                             ! MPI
       if(npes >= 2) then
          call mpi_allreduce(MPI_IN_PLACE,n_mpi,neg*nspin,mpi_integer,mpi_sum &
               &                      ,mpi_comm_group,ierr)  ! MPI
          n_mpi = n_mpi/nrank_e
       end if
       if(ipri>=2) write(nfout,'(" !D Writing nrvf_ordr ik = ",i5)') ik
       if(mype == 0) write(nf) n_mpi             ! MPI ; writing (nrvf_ordr)

       e_mpi = 0.d0                                       ! MPI
       do ikt = 1, nspin                                  ! MPI
          ik = (ikg-1)*nspin + ikt
          if(map_k(ik) /= myrank_k) cycle                 ! MPI
          do ie = 1, neg                                  ! MPI
             if(map_e(ie) /= myrank_e) cycle              ! MPI
             e_mpi(ie,ikt) = eko_l(map_z(ie),ik)           ! MPI
          end do
       end do
       if(npes >= 2) then
          call mpi_allreduce(MPI_IN_PLACE,e_mpi,neg*nspin,mpi_double_precision &
               &               ,mpi_sum,mpi_comm_group,ierr) ! MPI
       end if
       if(ipri>=2) write(nfout,'(" !D Writing eko_l ik = ",i5)') ik
       if(mype == 0) write(nf) e_mpi             ! MPI ; writing (eko_l)
    end do KPOINT

!!$    do ik = 1, kv3, af+1
!!$       do ie = 1, neg
!!$          if(map_ek(ie,ik) == mype) then                          ! MPI
!!$             do ri = 1, kimg
!!$                wf_l(1:kg1,ri) = zaj_l(1:kg1,map_z(ie),ik,ri)
!!$             end do
!!$             if(map_ek(ie,ik) /= 0) &                             ! MPI
!!$            &   call mpi_send(wf_l,kg1*kimg,mpi_real,0,1,mpi_comm_group,ierr) ! MPI
!!$          else if(mype == 0 .and. map_ek(ie,ik) /= 0) then
!!$             call mpi_recv(wf_l,kg1*kimg,mpi_real,map_ek(ie,ik),1,mpi_comm_group,istatus,ierr)!MPI
!!$          end if
!!$          if(mype == 0) write(nf)  wf_l                        ! MPI
!!$       end do
!!$    end do
!!$    deallocate(wf_l)
!!$
!!$    allocate(n_mpi(neg,kv3)); allocate(n2_mpi(neg,kv3))! MPI
!!$    allocate(e_mpi(neg,kv3)); allocate(e2_mpi(neg,kv3))! MPI
!!$
!!$    n_mpi = 0                                          ! MPI
!!$    do ik = 1, kv3                                     ! MPI
!!$       if(map_k(ik) /= myrank_k) cycle                 ! MPI
!!$       n_mpi(1:neg,ik) = neordr(1:neg,ik)              ! MPI
!!$    end do                                             ! MPI
!!$    if(npes >= 2) then
!!$       call mpi_allreduce(n_mpi,n2_mpi,neg*kv3,mpi_integer,mpi_sum &
!!$            &                      ,mpi_comm_group,ierr)  ! MPI
!!$       n2_mpi = n2_mpi/nrank_e
!!$    else
!!$       n2_mpi = n_mpi
!!$    end if
!!$    if(mype == 0) write(nf) n2_mpi             ! MPI ; writing (neordr)
!!$
!!$    n_mpi = 0                                          ! MPI
!!$    do ik = 1, kv3                                     ! MPI
!!$       if(map_k(ik) /= myrank_k) cycle                 ! MPI
!!$       n_mpi(1:neg,ik) = nrvf_ordr(1:neg,ik)           ! MPI
!!$    end do                                             ! MPI
!!$    if(npes >= 2) then
!!$       call mpi_allreduce(n_mpi,n2_mpi,neg*kv3,mpi_integer,mpi_sum &
!!$            &                      ,mpi_comm_group,ierr)  ! MPI
!!$       n2_mpi = n2_mpi/nrank_e
!!$    else
!!$       n2_mpi = n_mpi
!!$    end if
!!$    if(mype == 0) write(nf) n2_mpi             ! MPI ; writing (nrvf_ordr)
!!$
!!$    e_mpi = 0.d0                                       ! MPI
!!$    do ik = 1, kv3                                     ! MPI
!!$       if(map_k(ik) /= myrank_k) cycle                 ! MPI
!!$       do ie = 1, neg                                  ! MPI
!!$          if(map_e(ie) /= myrank_e) cycle              ! MPI
!!$          e_mpi(ie,ik) = eko_l(map_z(ie),ik)           ! MPI
!!$       end do
!!$    end do
!!$    if(npes >= 2) then
!!$       call mpi_allreduce(e_mpi,e2_mpi,neg*kv3,mpi_double_precision &
!!$            &               ,mpi_sum,mpi_comm_group,ierr) ! MPI
!!$    else
!!$       e2_mpi = e_mpi
!!$    end if
!!$    if(mype == 0) write(nf) e2_mpi             ! MPI ; writing (eko_l)

!!$    e_mpi = 0.d0                                       ! MPI
!!$    do ik = 1, kv3                                     ! MPI
!!$       if(map_k(ik) /= myrank_k) cycle                 ! MPI
!!$       do ie = 1, neg                                  ! MPI
!!$          if(map_e(ie) /= myrank_e) cycle              ! MPI
!!$          e_mpi(ie,ik) = occup_l(map_z(ie),ik)         ! MPI
!!$       end do                                          ! MPI
!!$    end do                                             ! MPI
!!$    if(npes >= 2) then
!!$       call mpi_allreduce(e_mpi,e2_mpi,neg*kv3,mpi_double_precision &
!!$            &                  ,mpi_sum,mpi_comm_group,ierr) ! MPI
!!$    else
!!$       e2_mpi = e_mpi
!!$    end if
!!$    if(mype == 0) write(nf) e2_mpi             ! MPI ; writing (occup_l)

    if(precision_WFfile==SP) then
       deallocate(wf_l)
    else if(precision_WFfile==DP) then
       deallocate(wfdp_l)
    end if
    deallocate(n_mpi)
    deallocate(e_mpi)

    call tstatc0_end(id_sname)

  end subroutine m_ESIO_wd_WFs_and_EVs_ek

  subroutine m_ESIO_wd_WFn(nfout,nfwfk,ik,ib)
    integer, intent(in) :: nfout,nfwfk
    integer, intent(in) :: ik,ib
    integer :: jb
    real(kind=DP), allocatable :: bfft(:)
    real(kind=DP) :: eig
    integer :: id_sname = -1
    call tstatc0_begin('m_ESIO_wd_WFn ',id_sname)

    allocate(bfft(nfft));    bfft = 0
    call mpi_barrier(mpi_comm_group,ierr)
    if(ipri >= 1) write(nfout,*) ' !D Writing Wavefunctions '
    rewind nfwfk

    if(map_k(ik) == myrank_k) jb = neordr(ib,ik)
    if(nrank_k > 1) call mpi_bcast(jb,1,mpi_integer,map_k(ik),mpi_e_world(myrank_e),ierr)

    if(map_ek(jb,ik) == mype) then 
       call m_ES_WF_in_Rspace(ik,jb,bfft)
       eig = eko_l(map_z(jb),ik)
       if(map_ek(jb,ik) /= 0) then 
          call mpi_send(bfft,nfft,mpi_double_precision,0,1,mpi_comm_group,ierr)
          call mpi_send(eig,1,mpi_double_precision,0,1,mpi_comm_group,ierr)
       end if
    else if(mype == 0 .and. map_ek(jb,ik) /= 0) then
       call mpi_recv(bfft,nfft,mpi_double_precision,map_ek(jb,ik),1,mpi_comm_group,istatus,ierr)
       call mpi_recv(eig,1,mpi_double_precision,map_ek(jb,ik),1,mpi_comm_group,istatus,ierr)
    end if
    if(mype == 0) then 
       call wd_wf(nfout,nfwfk,ik,ib,eig)
    end if
    deallocate(bfft)
    call tstatc0_end(id_sname)
  contains
    subroutine wd_wf(nfout,nfwfk,ik,ib,eig)
      integer, intent(in) :: nfout,nfwfk,ik,ib
      real(kind=DP), intent(in) :: eig
      integer :: i,j,k, id, nl, nm, nn, nlhf,inew,jnew,knew,ip,mm
      real(kind=DP),allocatable,dimension(:,:,:,:) :: wkwf
      real(kind=DP) ::      x,y,z
      integer, parameter :: UP = 1 , DOWN = 2
      integer ::            up_down
      real(kind=DP),allocatable,dimension(:,:) :: cps_full
      integer, allocatable,dimension(:) :: ityp_full
      integer :: m, nk
      real(kind=DP) :: norm
      real(kind=DP) :: normr,normi
      integer :: n1,n2,n3
      real(kind=DP) :: dn1,dn2,dn3
      integer :: icomp

      id = fft_box_size_WF(1,0)
      mm = fft_box_size_WF(2,0)
      nl = fft_box_size_WF(1,1)
      nm = fft_box_size_WF(2,1)
      nn = fft_box_size_WF(3,1)

      if(kimg == 1) then
         nlhf = id/2
      else
         nlhf = id
      end if

      if(wf_filetype == DENSITY_ONLY &
      & .or. wf_filetype == VTK &
      & .or. wf_filetype == BINARY) then
         allocate(wkwf(nl,nm,nn,2)); wkwf = 0.d0
      else if(wf_filetype == CUBE) then
         allocate(wkwf(nn,nm,nl,2)); wkwf = 0.d0
      end if

      if(nspin==2) then
         if(mod(ik/nspin,2) == 1) then
            up_down = UP
         else
            up_down = DOWN
         end if
      end if
      nk = (ik-1)/nspin+1

      if(ipri >= 2) write(nfout,9001) nl*nm*nn, nl, nm, nn
9001  format(' Wavefunction ',i8,'(',3i5,')')

      if(ipri >= 2) write(nfout,*) ' !D FFT cube mapping start'
      do i = 1, nm
         do j = 1, nn
            do k = 1, nl
               if(kimg == 1 .and. k > nlhf) then
                  knew = id - k
                  jnew = nn+2 - j
                  inew = nm+2 - i
                  if(jnew > nn) then
                     jnew = jnew - nn
                  end if
                  if(inew > nm) then
                     inew = inew - nm
                  end if
               else
                  knew = k; jnew = j; inew = i
               end if
               ip = nlhf*mm*(jnew-1) + nlhf*(inew-1) + knew
               if(wf_filetype == DENSITY_ONLY &
               & .or. wf_filetype == VTK &
               & .or. wf_filetype == BINARY) then
                  wkwf(k,i,j,1) = bfft(ip*2-1)
                  wkwf(k,i,j,2) = bfft(ip*2)
               else if(wf_filetype == CUBE) then
                  wkwf(j,i,k,1) = bfft(ip*2-1)
                  wkwf(j,i,k,2) = bfft(ip*2)
               end if
            end do
         end do
      end do
! Normalization
      normr = 0.d0
      normi = 0.d0
      do i = 1, nm
         do j = 1, nn
            do k = 1, nl
               if(wf_filetype == DENSITY_ONLY &
               & .or. wf_filetype == VTK &
               & .or. wf_filetype == BINARY) then
                  normr = normr + wkwf(k,i,j,1)*wkwf(k,i,j,1)
                  normi = normi + wkwf(k,i,j,2)*wkwf(k,i,j,2)
               else if(wf_filetype == CUBE) then
                  normr = normr + wkwf(j,i,k,1)*wkwf(j,i,k,1)
                  normi = normi + wkwf(j,i,k,2)*wkwf(j,i,k,2)
               end if
            end do
         end do
      end do
      norm = normr + normi
      write(nfout,*) 'Real and imaginary parts of wf = ',normr/norm,normi/norm
      if(wf_filetype == VTK .or. wf_filetype == BINARY) then
         if(normr>normi) then
            norm = normr
            icomp = 1
            write(nfout,*) 'Real part of wf will be outputed.'
         else
            norm = normi
            icomp = 2
            write(nfout,*) 'Imaginary part of wf will be outputed.'
         end if
      end if
      norm = univol*norm/dble(nm*nn*nl)
      norm = 1.d0/dsqrt(norm)
      do i = 1, nm
         do j = 1, nn
            do k = 1, nl
               if(wf_filetype == DENSITY_ONLY &
               & .or. wf_filetype == VTK &
               & .or. wf_filetype == BINARY) then
                  wkwf(k,i,j,1) = norm*wkwf(k,i,j,1)
                  wkwf(k,i,j,2) = norm*wkwf(k,i,j,2)
               else if(wf_filetype == CUBE) then
                  wkwf(j,i,k,1) = norm*wkwf(j,i,k,1)
                  wkwf(j,i,k,2) = norm*wkwf(j,i,k,2)
               end if
            end do
         end do
      end do

      if(wf_filetype == DENSITY_ONLY) then
         write(nfwfk,9001) nl*nm*nn, nl, nm, nn
         write(nfwfk,'(6e13.5)') wkwf
      else if(wf_filetype == BINARY) then
         write(nfwfk) nl*nm*nn, nl, nm, nn
         write(nfwfk) altv, nspin, up_down, nk, ib, eig
         write(nfwfk) wkwf(:,:,:,icomp)
      else if(wf_filetype == VTK) then
         write(nfwfk,'("# vtk DataFile Version 2.0")')
         if(nspin == 2) then
            if(up_down == 1) then
               write(nfwfk,'(" SCF Wavefunction UP : k=",i7," n=",i7," eig=",f20.5)') nk,ib,eig
            else
               write(nfwfk,'(" SCF Wavefunction DOWN : k=",i7," n=",i7," eig=",f20.5)') nk,ib,eig
            end if
         else
            write(nfwfk,'(" SCF Wavefunction : k=",i7," n=",i7," eig=",f20.5)') nk, ib, eig
         end if
         write(nfwfk,'("ASCII")')
         write(nfwfk,'("DATASET STRUCTURED_GRID")')
         write(nfwfk,'("DIMENSIONS",3(1x,i5))') nl+1,nm+1,nn+1
         write(nfwfk,'("POINTS",1x,i10,1x,"float")') (nl+1)*(nm+1)*(nn+1)
         do n1=0,nl
            do n2=0,nm
               do n3=0,nn
                  dn1 = n1/dble(nl)
                  dn2 = n2/dble(nm)
                  dn3 = n3/dble(nn)
                  x = altv(1,1)*dn1 + altv(1,2)*dn2 + altv(1,3)*dn3
                  y = altv(2,1)*dn1 + altv(2,2)*dn2 + altv(2,3)*dn3
                  z = altv(3,1)*dn1 + altv(3,2)*dn2 + altv(3,3)*dn3
                  write(nfwfk,'(3(1x,e13.5))') x,y,z
               end do
            end do
         end do
         write(nfwfk,'("")')
         write(nfwfk,'("POINT_DATA",1x,i10)') (nl+1)*(nm+1)*(nn+1)
         write(nfwfk,'("SCALARS scalars float")')
         write(nfwfk,'("LOOKUP_TABLE default")')
         do n1=0,nl
            i=n1+1
             if(n1==nl) i=1
            do n2=0,nm
               j=n2+1
               if(n2==nm) j=1
               do n3=0,nn
                  k=n3+1
                  if(n3==nn) k=1
                  write(nfwfk,'(e13.5)') wkwf(i,j,k,icomp)
               end do
            end do
         end do
      else if(wf_filetype == CUBE) then
         if(len_trim(wf_title) >= 1) then
            write(nfwfk,*) trim(wf_title)
         else
            write(nfwfk,'(" Calculated by phase")')
         end if
         if(nspin == 2) then
            if(up_down == 1) then
               write(nfwfk,'(" SCF Wavefunction UP : k=",i7," n=",i7," eig=",f20.5)') nk,ib,eig
            else
               write(nfwfk,'(" SCF Wavefunction DOWN : k=",i7," n=",i7," eig=",f20.5)') nk,ib,eig
            end if
         else
            write(nfwfk,'(" SCF Wavefunction : k=",i7," n=",i7," eig=",f20.5)') nk, ib, eig
         end if
         x = 0.d0; y = 0.d0; z = 0.d0
         write(nfwfk,'(i6,3f10.4)') -natm2, x,y,z
         do i = 1, 3
            write(nfwfk,'(i6,3f10.6)') fft_box_size_WF(i,1), altv(1:3,i)/dble(fft_box_size_WF(i,1))
         end do

         allocate(cps_full(natm2,3))
         allocate(ityp_full(natm2))
         cps_full = 0; ityp_full = 0
         call m_IS_pack_all_ions_in_uc(ityp_full,cps_full)
         do i = 1, natm2
            m = ityp_full(i)
            write(nfwfk,'(f8.4,4f10.6)') iatomn(m), ival(m), cps_full(i,1:3)
         end do
         deallocate(ityp_full,cps_full)

         write(nfwfk,'(10i5)') 2,1,2
         write(nfwfk,'(6e13.5)') wkwf(:,:,:,1)
         write(nfwfk,'(6e13.5)') wkwf(:,:,:,2)
  
      end if
      if(allocated(wkwf)) deallocate(wkwf)
    end subroutine wd_wf
  end subroutine m_ESIO_wd_WFn

  logical function m_ESIO_check_energy(ik,ib)
    integer, intent(in) :: ik,ib
    integer :: jb
    real(kind=DP) :: eig
    if(map_k(ik) == myrank_k) jb = neordr(ib,ik)
    if(nrank_k > 1) call mpi_bcast(jb,1,mpi_integer,map_k(ik),mpi_e_world(myrank_e),ierr)

    if(map_ek(jb,ik) == mype) then
       eig = eko_l(map_z(jb),ik)
       if(eig >= eigmin_wf .and. eig <= eigmax_wf) then
          m_ESIO_check_energy = .true.
       else
          m_ESIO_check_energy = .false.
       end if
    end if
    call mpi_bcast(m_ESIO_check_energy,1,mpi_logical,map_ek(jb,ik),mpi_comm_group,ierr) 
  end function m_ESIO_check_energy

  subroutine m_ESIO_wd_Efermi(nfout,nfefermi)
    integer, intent(in) :: nfout, nfefermi
                                                  __TIMER_SUB_START(1377)
    if(mype == 0) then
                                                  __TIMER_IODO_START(1460)
       write(nfefermi,'(f16.8," : Efermi")') efermi
       if(sw_fix_total_spin == YES .and. nspin == 2) then
          write(nfefermi,'(2f16.8," : Ffermi_spin(1), Efermi_spin(2)")') &
               & efermi_spin(1),efermi_spin(2)
       end if
                                                  __TIMER_IODO_STOP(1460)
    end if
                                                  __TIMER_SUB_STOP(1377)
  end subroutine m_ESIO_wd_Efermi

  subroutine m_ESIO_rd_Efermi(nfout,nfefermi)
    integer, intent(in) :: nfout, nfefermi
                                                  __TIMER_SUB_START(1376)
    if(mype == 0) then
                                                  __TIMER_IODO_START(1458)
       read(nfefermi,*,err=1001,end=1001) efermi
       write(nfout,'(" ! efermi = ",f16.8," : this is read from nfefermi")') efermi
       if(sw_fix_total_spin == YES .and. nspin == 2) then
          read(nfefermi,*,err=1002,end=1002) efermi_spin(1),efermi_spin(2)
          write(nfout,'(" ! efermi_spin = ",2f16.8," : these are read from nfefermi")') efermi_spin(1:2)
       end if
                                                  __TIMER_IODO_STOP(1458)
       goto 1010
1001   continue
       efermi = 0.d0
1002   continue
       if(sw_fix_total_spin == YES .and. nspin == 2) then
          efermi_spin(1) = 0.d0; efermi_spin(2) = 0.d0
       end if
1010   continue
    end if
    if(npes > 1) then
                                                  __TIMER_IOCOMM_START_w_BARRIER(mpi_comm_group,1459)
       call mpi_bcast(efermi,1,mpi_double_precision,0,mpi_comm_group,ierr)
       if(sw_fix_total_spin == YES .and. nspin == 2) then
          call mpi_bcast(efermi_spin,2,mpi_double_precision,0,mpi_comm_group,ierr)
       end if
                                                  __TIMER_IOCOMM_STOP(1459)
    end if
                                                  __TIMER_SUB_STOP(1376)
  end subroutine m_ESIO_rd_Efermi


! === KT_add === 2015/05/16
  subroutine m_ESIO_wd_Wfn_squared_noncl( nfout )
    integer, intent(in) :: nfout

    integer :: iloop, ib
    logical :: goflag
    real(kind=DP), allocatable :: chgq_l_save(:,:,:)
    real(kind=DP), allocatable :: hsr_save(:,:,:,:), hsi_save(:,:,:,:)

    if ( ekmode == ON ) then
       if ( nk_in_the_process <= ik_wf_squared &
            &  .and. nk_in_the_process -1 +kv3 > ik_wf_squared ) then
          goflag = .true.
       else
          goflag = .false.
       endif
    else
       goflag = .true.
    endif

    if ( .not. goflag ) return

    allocate( chgq_l_save(ista_kngp:iend_kngp,kimg,ndim_magmom) )
    allocate( hsr_save(natm,nlmt,nlmt,ndim_magmom) )
    allocate( hsi_save(natm,nlmt,nlmt,ndim_magmom) )

    chgq_l_save = chgq_l;     hsr_save = hsr;     hsi_save = hsi

    Do ib=ib1_wf_squared, ib2_wf_squared
       if ( ekmode == ON ) then
          call m_CD_softpart_ktsub_noncl( nfout, kv3, &
               &                          ik_wf_squared -nk_in_the_process+1, ib )
          call m_CD_hardpart_ktsub_noncl( nfout, ik_wf_squared -nk_in_the_process +1, &
               &                          ib )
       else
          call m_CD_softpart_ktsub_noncl( nfout, kv3, ik_wf_squared, ib )
          call m_CD_hardpart_ktsub_noncl( nfout, ik_wf_squared, ib )
       endif

       call m_CD_alloc_rspace_charge()
       Do iloop=1, ndim_magmom
          call m_Files_open_nfwfksq_noncl( iloop, ik_wf_squared, ib )
          call m_CD_rspace_charge_noncl( iloop, nfwfk_sq, nfout, wf_squared_filetype )
       End do
       call m_CD_dealloc_rspace_charge()

    End Do

    chgq_l = chgq_l_save;     hsr_save = hsr;     hsi_save = hsi
    deallocate( chgq_l_save ); deallocate( hsr_save );  deallocate( hsi_save )

  end subroutine m_ESIO_wd_Wfn_squared_noncl
! ============== 2015/05/16

  subroutine m_ESIO_wd_Wfn_integ_magmom       ! noncl
    integer :: ik, ie, ib1, ig, is1, is2, ik2
    integer :: ia, it, lmt1, lmt2, p1, p2
    integer :: neg_t
    real(kind=DP) :: csum(3)
    complex(kind=CMPLDP) :: z1, z2
    complex(kind=CMPLDP) :: PauliMatrix( ndim_magmom, ndim_spinor, ndim_spinor )

    real(kind=DP), allocatable :: work(:,:,:), magmom_each_wfn(:,:,:)

    call m_ES_set_Pauli_matrix( PauliMatrix )

    allocate( magmom_each_wfn( kv3/ndim_spinor, neg, 3 ) )
    magmom_each_wfn = 0.0d0

    Do ik=1, kv3, ndim_spinor
       if ( map_k(ik) /= myrank_k ) cycle! MPI

       Do ie=ista_e, iend_e
          ib1 = map_z(ie)

          csum = 0.0d0

          Do ig=1, iba(ik)
             Do is1=1, ndim_spinor
                Do is2=1, ndim_spinor
                   z1 = cmplx( zaj_l( ig, ib1, ik+is1-1, 1 ), &
                        &      zaj_l( ig, ib1, ik+is1-1, kimg ) )
                   z2 = cmplx( zaj_l( ig, ib1, ik+is2-1, 1 ), &
                        &      zaj_l( ig, ib1, ik+is2-1, kimg ) )

                   csum(1) = csum(1) +conjg(z1) *PauliMatrix(2,is1,is2) *z2
                   csum(2) = csum(2) +conjg(z1) *PauliMatrix(3,is1,is2) *z2
                   csum(3) = csum(3) +conjg(z1) *PauliMatrix(4,is1,is2) *z2
                End do
             End Do
          End Do
!
          Do ia=1, natm
             it = ityp(ia)
             do lmt1 = 1, ilmt(it)
                p1 = lmta(lmt1,ia)
                do lmt2 = 1, ilmt(it)
                   p2 = lmta(lmt2,ia)
                   Do is1=1, ndim_spinor
                      Do is2=1, ndim_spinor
                         z1 = cmplx( fsr_l(ib1,p1,ik+is1-1), fsi_l(ib1,p1,ik+is1-1) )
                         z2 = cmplx( fsr_l(ib1,p2,ik+is2-1), fsi_l(ib1,p2,ik+is2-1) )

                         csum(1) = csum(1) +conjg(z1) *q(lmt1,lmt2,it) &
                              &                       *PauliMatrix(2,is1,is2) *z2
                         csum(2) = csum(2) +conjg(z1) *q(lmt1,lmt2,it) &
                              &                       *PauliMatrix(3,is1,is2) *z2
                         csum(3) = csum(3) +conjg(z1) *q(lmt1,lmt2,it) &
                              &                       *PauliMatrix(4,is1,is2) *z2
                      End do
                   End do
                End do
             End do
          End Do
!
          ik2 = (ik-1)/ndim_spinor +1
          magmom_each_wfn( ik2, ie, 1:3 ) = csum(1:3)
       End Do
    End Do
!
    if ( npes > 1 ) then
       allocate( work (kv3/ndim_spinor, neg, 3 ) ); work = 0.0d0
       call mpi_allreduce( magmom_each_wfn, work, kv3/ndim_spinor *neg *3, &
            &              mpi_double_precision, mpi_sum, mpi_comm_group, ierr )
       magmom_each_wfn = work
       deallocate( work )
    endif

!
    neg_t = neg -num_extra_bands

    if ( ekmode == on ) then
       if ( mype == 0 ) then
          if ( nk_in_the_process == 1 ) then
             call m_Files_open_nfwfk_integ_mom(2)
             write(nfwfk_integ_mom,'(A,I8)') 'num_kpoints = ', kv3_ek /ndim_spinor
             write(nfwfk_integ_mom,'(A,I8)') 'num_bands   = ', neg_t
             write(nfwfk_integ_mom,'(A,I8)') 'nspin       = ', nspin /ndim_spinor
                                                ! in order to work band**pl properly
             write(nfwfk_integ_mom,*)
          else
             call m_Files_open_nfwfk_integ_mom(3)
          endif
       endif
    else
       if ( mype == 0 ) then
          call m_Files_open_nfwfk_integ_mom(icond)
          write(nfwfk_integ_mom,'(A,I8)') 'num_kpoints = ', kv3 /ndim_spinor
          write(nfwfk_integ_mom,'(A,I8)') 'num_bands   = ', neg_t
          write(nfwfk_integ_mom,'(A,I8)') 'nspin       = ', nspin
          write(nfwfk_integ_mom,*)
       endif
    endif

    if ( ekmode == ON ) then
       if ( mype == 0 ) then
          Do ik=1, kv3, ndim_spinor
             write(nfwfk_integ_mom,'(A)') "================= "
             write(nfwfk_integ_mom,'(" ik = ",i4," (",3f10.6," )")') &
                  &                   ik +nk_in_the_process-1, (vkxyz(ik,1:3,BUCS))
             Do ie=1, neg_t
                ik2 = ( ik -1 )/ndim_spinor +1
                write(nfwfk_integ_mom,'(I5,3F16.8)') ie, magmom_each_wfn( ik2, ie, 1:3 )
             End Do
          End Do
          write(nfwfk_integ_mom,*)
       endif
    else
       if ( mype == 0 ) then
          Do ik=1, kv3, ndim_spinor
             write(nfwfk_integ_mom,'(A)') "================= "
             write(nfwfk_integ_mom,'(" ik = ",i4," (",3f10.6," )")') &
                  &                   ik, (vkxyz(ik,1:3,BUCS))
             Do ie=1, neg_t
                ik2 = ( ik -1 )/ndim_spinor +1
                write(nfwfk_integ_mom,'(I5,3F16.8)') ie, magmom_each_wfn( ik2, ie, 1:3 )
             End Do
          End Do
          write(nfwfk_integ_mom,*)
       endif
    endif

    deallocate( magmom_each_wfn )

    call m_Files_close_nfwfk_integ_mom

  end subroutine m_ESIO_wd_Wfn_integ_magmom

  subroutine m_ESIO_wd_Wfn_orb_proj
    use m_Control_Parameters,  only : population_diag_mode, sw_diagonalize_population, &
         &                            use_rotated_compri
    use m_Const_Parameters,    only : DIAG_CHARGE_DENSITY_MATRIX, &
         &                            DIAG_SPIN_DENSITY_MATRIX, &
         &                            DIAG_LS_with_t2g_octa, DIAG_LS, zi
    use m_PseudoPotential, only :    nloc, ntau, iproj_phi, lmtt_phi
    use m_Electronic_Structure,  only : porb_rot_matrix_real, porb_rot_matrix_cmplx

    integer :: neg_t, kfac, num_orbitals
    real(kind=DP), allocatable :: compr(:,:,:,:), compi(:,:,:,:), norm_phig_mpi(:,:)
    real(kind=DP), allocatable :: compr_rot_l(:,:,:,:), compi_rot_l(:,:,:,:)
    
    if ( ekmode == OFF ) return

    kfac = 1
    if ( sw_diagonalize_population == ON ) then
       if ( population_diag_mode /= DIAG_CHARGE_DENSITY_MATRIX ) then
          if ( noncol ) kfac = ndim_spinor
       endif
    endif
    num_orbitals = nlmta_phi *kfac
    
    if ( sw_diagonalize_population == ON ) then
       if ( population_diag_mode == DIAG_CHARGE_DENSITY_MATRIX ) then
          call read_porb_rot_matrix_real
       else 
          if ( noncol ) call read_porb_rot_matrix_cmplx
       endif
    endif

    if ( sw_diagonalize_population == ON .and. use_rotated_compri == YES ) then
       allocate(compr_rot_l(np_e,num_orbitals,1,ista_k:iend_k))
       allocate(compi_rot_l(np_e,num_orbitals,1,ista_k:iend_k))
       compr_rot_l = 0.d0; compi_rot_l = 0.0d0
       if ( population_diag_mode == DIAG_CHARGE_DENSITY_MATRIX ) then
          call set_compri_rot_chgden_mode
       else
          if ( noncol ) call set_compri_rot_spnden_mode
       endif
    endif
    
    allocate(compr(neg,num_orbitals,1,kv3));  compr = 0.d0
    allocate(compi(neg,num_orbitals,1,kv3));  compi = 0.d0
    if(.not.allocated(norm_phig_mpi)) allocate(norm_phig_mpi(nlmtt_phi,kv3/nspin))
    norm_phig_mpi=0.d0

    call set_array_compri_etc

    neg_t = neg -num_extra_bands

    if ( mype == 0 ) then
       if ( nk_in_the_process == 1 ) then
          call m_Files_open_nfwfk_orb_proj(2)

          write(nfwfk_orb_proj,'(A)') '# obrital projection for bands '
          write(nfwfk_orb_proj,*)

          write(nfwfk_orb_proj,'(A,I8)') 'num_kpoints = ', kv3_ek /ndim_spinor
          write(nfwfk_orb_proj,'(A,I8)') 'num_bands   = ', neg_t
          write(nfwfk_orb_proj,'(A,I8)') 'nspin       = ', nspin /ndim_spinor

          write(nfwfk_orb_proj,'(A,I8)') 'num of orbitals = ', nlmta_phi *kfac
          write(nfwfk_orb_proj,*)
       else
          call m_Files_open_nfwfk_orb_proj(3)
       endif
    endif

    if ( sw_diagonalize_population == ON ) then
       if ( use_rotated_compri == YES ) then
          if ( population_diag_mode == DIAG_CHARGE_DENSITY_MATRIX ) then
             call case_ordinal
          else 
             if ( noncol ) call case_spinmixed_basis
          endif
       else
          if ( population_diag_mode == DIAG_CHARGE_DENSITY_MATRIX ) then
             call case_mode1
          else 
             if ( noncol ) call case_mode2
          endif
       endif
    else
       if ( SpinOrbit_Mode /= Neglected .and. wf_orb_proj_print_format == 1 ) then
          call case_with_j
       else
          call case_ordinal    
       endif
    endif

    if ( mype == 0 ) call m_Files_close_nfwfk_orb_proj

    deallocate( compr ); deallocate( compi ); deallocate( norm_phig_mpi )
    if ( allocated(compr_rot_l) ) deallocate( compr_rot_l )
    if ( allocated(compi_rot_l) ) deallocate( compi_rot_l )

  contains
    
    subroutine set_compri_rot_chgden_mode
      integer :: ik, ib, ia, it, immax
      integer :: lmt1, lmt2, il1, il2, im1, im2, tau1, tau2, iorb1, iorb2
      integer :: is1, is2, jj1, jj2, istmp
      complex(kind=CMPLDP), allocatable :: ztmp(:), zwork(:,:,:)
      
      allocate( ztmp(1) )
      allocate( zwork(nlmta_phi,1,1) );
      
      do ik = 1, kv3
         if(map_k(ik) /= myrank_k) cycle
         
         do ib = 1, np_e
            zwork = 0.0d0;
            do iorb1=1, nlmta_phi
               zwork(iorb1,:,1) = cmplx( compr_l(ib,iorb1,:,ik), &
                    &                    compi_l(ib,iorb1,:,ik) )
            end do
            do ia=1,natm
               it = ityp(ia)
               if (iproj_group(ia) == 0) cycle
               
               do lmt1=1,ilmt_phi(it)
                  il1 = ltp_phi(lmt1,it);      im1 = mtp_phi(lmt1,it)
                  tau1 = taup_phi(lmt1,it);    iorb1 = lmta_phi(lmt1,ia)
                  immax = 2*il1 -1
                  
                  ztmp = 0.0d0
                  do lmt2=1, ilmt_phi(it)
                     il2 = ltp_phi(lmt2,it);    im2 = mtp_phi(lmt2,it)
                     tau2 = taup_phi(lmt2,it);  iorb2 = lmta_phi(lmt2,ia)
                     if ( il1 /= il2 ) cycle
                     if ( tau1 /= tau2 ) cycle
                     ztmp(:) = ztmp(:) &
                          &    + porb_rot_matrix_real(ia,il1,im2,im1) &
                          &                 *zwork(iorb2,:,1)
                  end do
                  compr_rot_l(ib,iorb1,:,ik) = real(ztmp(:))
                  compi_rot_l(ib,iorb1,:,ik) = aimag(ztmp(:))
               end do
            end do
         end do
      end do
      deallocate( ztmp );      deallocate( zwork )
    end subroutine set_compri_rot_chgden_mode
    
    subroutine set_compri_rot_spnden_mode
      integer :: ik, ib, ia, it, immax
      integer :: lmt1, lmt2, il1, il2, im1, im2, tau1, tau2, iorb1, iorb2
      integer :: is1, is2, jj1, jj2, istmp
      complex(kind=CMPLDP), allocatable :: ztmp(:,:), zwork(:,:,:)
      
      allocate( ztmp(1,ndim_spinor) )
      allocate( zwork(nlmta_phi,1,ndim_spinor) );
      
      do ik = 1, kv3, ndim_spinor
         if(map_k(ik) /= myrank_k) cycle
         
         do ib = 1, np_e
            zwork = 0.0d0;
            do iorb1=1, nlmta_phi
               zwork(iorb1,1,1) = cmplx( compr_l(ib,iorb1,1,ik), &
                    &                    compi_l(ib,iorb1,1,ik) )
               zwork(iorb1,1,2) = cmplx( compr_l(ib,iorb1,1,ik+1), &
                    &                    compi_l(ib,iorb1,1,ik+1) )
            end do            
            do ia=1,natm
               it = ityp(ia)
               if (iproj_group(ia) == 0) cycle
               
               do lmt1=1,ilmt_phi(it)
                  il1 = ltp_phi(lmt1,it);      im1 = mtp_phi(lmt1,it)
                  tau1 = taup_phi(lmt1,it);    iorb1 = lmta_phi(lmt1,ia)
                  immax = 2*il1 -1
                  
                  Do is1=1, ndim_spinor
                     ztmp = 0.0d0;
                     
                     do lmt2=1, ilmt_phi(it)
                        il2 = ltp_phi(lmt2,it);    im2 = mtp_phi(lmt2,it)
                        tau2 = taup_phi(lmt2,it);  iorb2 = lmta_phi(lmt2,ia)
                        if ( il1 /= il2 ) cycle
                        if ( tau1 /= tau2 ) cycle
                        
                        Do is2=1, ndim_spinor
                           jj2 = immax*(is2-1) +im2
                           jj1 = (im1 -1)*ndim_spinor +is1
                           ztmp(:,is2) = ztmp(:,is2) &
                                &      + conjg(porb_rot_matrix_cmplx(ia,il1,jj2,jj1)) &
                                &        *zwork(iorb2,:,is2)
                        end do
                     end do
                     compr_rot_l(ib,ndim_spinor*(iorb1-1)+is1,:,ik) &
                          &          = real(ztmp(:,1))
                     compi_rot_l(ib,ndim_spinor*(iorb1-1)+is1,:,ik) &
                          &          = aimag(ztmp(:,1))
                     compr_rot_l(ib,ndim_spinor*(iorb1-1)+is1,:,ik+1) &
                          &          = real(ztmp(:,2))
                     compi_rot_l(ib,ndim_spinor*(iorb1-1)+is1,:,ik+1) &
                          &          = aimag(ztmp(:,2))
                  end do
               end do
            end do
         end do
      end do
      deallocate( ztmp );    deallocate( zwork )
    end subroutine set_compri_rot_spnden_mode

    subroutine read_porb_rot_matrix_real
      integer :: ia, il, immax, im, im2, size2, lmax, mmax, num
      integer :: lun = 4000

      lmax = nloc;   mmax = 2*lmax -1

      if ( allocated(porb_rot_matrix_real) ) deallocate( porb_rot_matrix_real )
      allocate( porb_rot_matrix_real(natm,lmax,mmax,mmax) ); 
      porb_rot_matrix_real = 0.0d0

      if ( mype == 0 ) then
         open( lun, file="porb_rot_matrix.data", status="old", form="unformatted")
         read(lun) population_diag_mode
         
         Do ia=1, natm
            if (iproj_group(ia) == 0) cycle
            read(lun) num
            Do il=1, nloc
               immax = 2*il -1
               read(lun) num
               Do im=1, immax
                  read(lun) ( porb_rot_matrix_real(ia,il,im2,im), im2=1, immax )
               End do
            End Do
         End Do
         close(lun)
      endif
      size2 = natm *nloc *mmax *mmax
      call mpi_bcast( porb_rot_matrix_real, size2, mpi_double_precision, &
           &          0, mpi_comm_group, ierr )
!
    end subroutine read_porb_rot_matrix_real

    subroutine read_porb_rot_matrix_cmplx
      integer :: ia, il, immax, im, im2, size2, lmax, mmax, num, fac
      integer :: lun = 4000

      lmax = nloc;   mmax = 2*lmax -1
      fac = ndim_spinor

      if ( allocated(porb_rot_matrix_cmplx) ) deallocate( porb_rot_matrix_cmplx )
      allocate( porb_rot_matrix_cmplx(natm,lmax,fac*mmax,fac*mmax) ); 
      porb_rot_matrix_cmplx = 0.0d0

      if ( mype == 0 ) then
         open( lun, file="porb_rot_matrix.data", status="old", form="unformatted")
         read(lun) num
         Do ia=1, natm
            if (iproj_group(ia) == 0) cycle
            read(lun) num
            Do il=1, nloc
               immax = 2*il -1
               read(lun) num
               Do im=1, immax*fac
                  read(lun) ( porb_rot_matrix_cmplx(ia,il,im2,im), im2=1, immax*fac )
               End do
            End Do
         End Do
         close(lun)
      endif
      size2 = natm *nloc *mmax *mmax *fac *fac *2
      call mpi_bcast( porb_rot_matrix_cmplx, size2, mpi_double_precision, &
           &          0, mpi_comm_group, ierr )

    end subroutine read_porb_rot_matrix_cmplx

    subroutine set_array_compri_etc
      integer :: ik, ie, ib, iksnl
      integer :: iorb, lmt
      integer :: ia, il, im, tau, is
      real(kind=DP), allocatable :: compr_mpi(:,:,:,:), compi_mpi(:,:,:,:), &
           &                        norm_phig_mpi2(:,:)
      real(kind=DP), allocatable :: porb(:)

      do ik = 1, kv3
         if(map_k(ik) /= myrank_k) cycle
         iksnl = (ik-1)/nspin + 1
         
         if ( sw_diagonalize_population==ON .and. use_rotated_compri==YES ) then
            do ie = ista_e, iend_e, istep_e
               ib = map_z(ie)
               compr(ie,1:num_orbitals,1,ik) = compr_rot_l(ib,1:num_orbitals,1,ik)
               compi(ie,1:num_orbitals,1,ik) = compi_rot_l(ib,1:num_orbitals,1,ik)
             end do
         else
            do ie = ista_e, iend_e, istep_e
               ib = map_z(ie)
               compr(ie,1:num_orbitals,1,ik) = compr_l(ib,1:num_orbitals,1,ik)
               compi(ie,1:num_orbitals,1,ik) = compi_l(ib,1:num_orbitals,1,ik)
             end do
         endif
         norm_phig_mpi(1:nlmtt_phi,iksnl)  = norm_phig(1:nlmtt_phi,iksnl)
      end do
      
      if ( npes >1 ) then
         allocate( compr_mpi( neg, num_orbitals, 1, kv3 ) ); compr_mpi = 0.0d0
         allocate( compi_mpi( neg, num_orbitals, 1, kv3 ) ); compi_mpi = 0.0d0
         allocate( norm_phig_mpi2( nlmtt_phi, kv3/nspin ) )
         call mpi_allreduce( compr, compr_mpi, neg*num_orbitals*1*kv3, &
              &              mpi_double_precision, mpi_sum, mpi_comm_group, ierr )
         call mpi_allreduce( compi, compi_mpi, neg*num_orbitals*1*kv3, &
              &              mpi_double_precision, mpi_sum, mpi_comm_group, ierr )
         call mpi_allreduce( norm_phig_mpi, norm_phig_mpi2, nlmtt_phi*kv3/nspin, &
              &              mpi_double_precision, mpi_sum, mpi_comm_group, ierr )
         compr = compr_mpi;   compi = compi_mpi

         norm_phig_mpi = norm_phig_mpi2 /dble(nrank_e)

         deallocate( compr_mpi ); deallocate( compi_mpi ); 
         deallocate( norm_phig_mpi2 )
      end if
    end subroutine set_array_compri_etc

    subroutine set_porb_mode1( ik, porb )
      integer, intent(in) :: ik
      real(kind=DP), intent(out) :: porb( neg,nlmta_phi*ndim_spinor )

      integer :: ia, ib, it, lmax, mmax, immax, ismax
      integer :: lmt, il, im, tau, ip, iorb, lmtt
      integer :: lmt1, lmt2, im1, im2, iorb1, iorb2, lmtt1, lmtt2
      integer :: is, is1, is2, istmp, iksnl, im3
      real(kind=DP) :: c1, c2, ctmp
      complex(kind=CMPLDP) :: ztmp, z1, z2

      complex(kind=CMPLDP), allocatable :: dm_ssrep(:,:,:,:,:)
      real(kind=DP), allocatable :: porb0(:,:,:,:), dm(:,:,:,:,:)

      lmax = nloc;   mmax = 2*lmax -1
      iksnl = (ik-1)/nspin + 1

      if ( noncol ) then
         allocate( dm_ssrep( ntau, lmax, mmax, mmax, ndim_chgpot ) );
         allocate( dm( ntau, lmax, mmax, mmax, ndim_magmom ) );
         allocate( porb0( ntau, lmax, mmax, ndim_magmom ) );
      else
         allocate( dm_ssrep( ntau, lmax, mmax, mmax, 1 ) );
         allocate( dm( ntau, lmax, mmax, mmax, 1 ) );
         allocate( porb0( ntau, lmax, mmax, 1 ) );
      endif

      ibloop: do ib=1, neg
         do ia=1,natm
            it = ityp(ia)
            if(iproj_group(ia) == 0) cycle
            
            dm_ssrep = 0.0d0;  dm = 0.0d0;  porb0 = 0.d00
            
            ! diagonal part
            do lmt=1,ilmt_phi(it)
               il = ltp_phi(lmt,it); im  = mtp_phi(lmt,it);
               tau = taup_phi(lmt,it)
               ip  = iproj_phi(lmt,it)
               iorb = lmta_phi(lmt,ia); lmtt = lmtt_phi(lmt,it)
               
               Do is1=1, ndim_spinor
                  Do is2=1, ndim_spinor
                     istmp = ( is1 -1 )*ndim_spinor + is2
                     
                     z1 = dcmplx( compr(ib,iorb,1,ik+is1-1 ), &
                          &       compi(ib,iorb,1,ik+is1-1 ) )
                     z2 = dcmplx( compr(ib,iorb,1,ik+is2-1 ), &
                          &       compi(ib,iorb,1,ik+is2-1 ) )
                     ztmp = z1 *conjg(z2) &
                          &      *( 1.d0+qorb(iorb)/norm_phig_mpi(lmtt,iksnl) )
                     dm_ssrep(tau,il,im,im,istmp) = dm_ssrep(tau,il,im,im,istmp) &
                          &                        +ztmp
                  end do
               end Do
            end do
            ! non-diagonal part
            do lmt2=1,ilmt_phi(it)
               do lmt1=1,ilmt_phi(it)
                  if ( lmt1 == lmt2 ) cycle
                  if (ltp_phi(lmt1,it) /= ltp_phi(lmt2,it)) cycle
                  if (taup_phi(lmt1,it) /= taup_phi(lmt2,it)) cycle
                  
                  il = ltp_phi(lmt1,it);     tau = taup_phi(lmt1,it)
                  ip  = iproj_phi(lmt1,it)
                  im1   = mtp_phi(lmt1,it);     im2 = mtp_phi(lmt2,it); 
                  iorb1 = lmta_phi(lmt1,ia);  iorb2 = lmta_phi(lmt2,ia)
                  lmtt1 = lmtt_phi(lmt1,it);  lmtt2 = lmtt_phi(lmt2,it)
                  
                  c1 = ( qorb(iorb1) +qorb(iorb2) ) /2.0d0      ! approx
                  c2 = sqrt(norm_phig_mpi(lmtt1,iksnl)*norm_phig_mpi(lmtt2,iksnl))
                  
                  Do is1=1, ndim_spinor
                     Do is2=1, ndim_spinor
                        istmp = ( is1 -1 )*ndim_spinor + is2
                        z1 = dcmplx( compr(ib,iorb1,1,ik+is1-1 ), &
                             &       compi(ib,iorb1,1,ik+is1-1 ) )
                        z2 = dcmplx( compr(ib,iorb2,1,ik+is2-1 ), &
                             &       compi(ib,iorb2,1,ik+is2-1 ) )
                        ztmp = z1 *conjg(z2) *( 1.0d0 +c1 /c2 )
                        dm_ssrep(tau,il,im1,im2,istmp) &
                             &  = dm_ssrep(tau,il,im1,im2,istmp) + ztmp
                     end do
                  end do
               end do
            end do
            
            if ( noncol ) then
               dm(:,:,:,:,1) =  dm_ssrep(:,:,:,:,1) +dm_ssrep(:,:,:,:,4)
               dm(:,:,:,:,2) =  dm_ssrep(:,:,:,:,2) +dm_ssrep(:,:,:,:,3)
               dm(:,:,:,:,3) = (dm_ssrep(:,:,:,:,2) -dm_ssrep(:,:,:,:,3)) *zi
               dm(:,:,:,:,4) =  dm_ssrep(:,:,:,:,1) -dm_ssrep(:,:,:,:,4)
               ismax = 1
            else
               dm(:,:,:,:,1) = dm_ssrep(:,:,:,:,1)
               ismax = 1
            endif
            ! ----
            Do tau=1, ntau
               Do il=1, lmax
                  immax = 2 *il -1
                  Do im1=1, immax
                     DO is=1, ismax
                        ctmp = 0.0d0
                        Do im2=1, immax
                           Do im3=1, immax
                              ctmp = ctmp &
                                   &    +porb_rot_matrix_real( ia,il,im2,im1 )  &
                                   &       *dm(tau,il,im2,im3,is) &
                                   &       *porb_rot_matrix_real( ia,il,im3,im1 )
                           End Do
                        End Do
                        porb0( tau, il, im1, is ) = ctmp
                     End DO
                  End DO
               End Do
            End Do
            
            Do lmt=1,ilmt_phi(it)
               il = ltp_phi(lmt,it);  im  = mtp_phi(lmt,it);  iorb = lmta_phi(lmt,ia)
               tau = taup_phi(lmt,it)
               porb( ib,iorb ) = porb0( tau, il,im,1 )
            ENd do
         End do
      End do ibloop
      deallocate( dm ); deallocate( dm_ssrep ); deallocate( porb0 )
      
    end subroutine set_porb_mode1
    
    subroutine set_porb_mode2( ik, porb )
      integer, intent(in) :: ik
      real(kind=DP), intent(out) :: porb( neg,nlmta_phi*ndim_spinor )

      integer :: ia, ib, it, lmax, mmax, immax, ismax
      integer :: lmt, il, im, tau, ip, iorb, lmtt
      integer :: lmt1, lmt2, im1, im2, iorb1, iorb2, lmtt1, lmtt2
      integer :: is, is1, is2, is3, istmp, iksnl, im3
      real(kind=DP) :: c1, c2
      complex(kind=CMPLDP) :: ztmp, z1, z2, z3
      real(kind=DP), allocatable :: porb0(:,:,:,:)
      complex(kind=CMPLDP), allocatable :: dm_ssrep(:,:,:,:,:), dm(:,:,:,:,:)
      complex(kind=CMPLDP), allocatable :: zwk(:,:)
      complex(kind=CMPLDP) :: PauliMatrix( ndim_magmom, ndim_spinor, ndim_spinor )

      lmax = nloc;   mmax = 2*lmax -1

      allocate( dm_ssrep( ntau, lmax, mmax, mmax, ndim_chgpot ) );
      allocate( porb0( ntau, lmax, mmax*ndim_spinor, ndim_magmom ) );
      allocate( zwk( mmax*ndim_spinor, mmax*ndim_spinor ) );

      call m_ES_set_Pauli_Matrix( PauliMatrix )

      iksnl = (ik-1)/nspin + 1

      ibloop: do ib=1, neg
         do ia=1,natm
            it = ityp(ia)
            if(iproj_group(ia) == 0) cycle
            
            dm_ssrep = 0.0d0;  dm = 0.0d0;  porb0 = 0.d00
            
            ! diagonal part
            do lmt=1,ilmt_phi(it)
               il = ltp_phi(lmt,it); im  = mtp_phi(lmt,it);
               tau = taup_phi(lmt,it)
               ip  = iproj_phi(lmt,it)
               iorb = lmta_phi(lmt,ia); lmtt = lmtt_phi(lmt,it)
               
               Do is1=1, ndim_spinor
                  Do is2=1, ndim_spinor
                     istmp = ( is1 -1 )*ndim_spinor + is2
                     
                     z1 = dcmplx( compr(ib,iorb,1,ik+is1-1 ), &
                          &       compi(ib,iorb,1,ik+is1-1 ) )
                     z2 = dcmplx( compr(ib,iorb,1,ik+is2-1 ), &
                          &       compi(ib,iorb,1,ik+is2-1 ) )
                     ztmp = z1 *conjg(z2) &
                          &      *( 1.d0+qorb(iorb)/norm_phig_mpi(lmtt,iksnl) )
                     dm_ssrep(tau,il,im,im,istmp) = dm_ssrep(tau,il,im,im,istmp) &
                          &                        +ztmp
                  end do
               end Do
            end do
            ! non-diagonal part
            do lmt2=1,ilmt_phi(it)
               do lmt1=1,ilmt_phi(it)
                  if ( lmt1 == lmt2 ) cycle
                  if (ltp_phi(lmt1,it) /= ltp_phi(lmt2,it)) cycle
                  if (taup_phi(lmt1,it) /= taup_phi(lmt2,it)) cycle
                  
                  il = ltp_phi(lmt1,it);     tau = taup_phi(lmt1,it)
                  ip  = iproj_phi(lmt1,it)
                  im1   = mtp_phi(lmt1,it);     im2 = mtp_phi(lmt2,it); 
                  iorb1 = lmta_phi(lmt1,ia);  iorb2 = lmta_phi(lmt2,ia)
                  lmtt1 = lmtt_phi(lmt1,it);  lmtt2 = lmtt_phi(lmt2,it)
                  
                  c1 = ( qorb(iorb1) +qorb(iorb2) ) /2.0d0      ! approx
                  c2 = sqrt(norm_phig_mpi(lmtt1,iksnl)*norm_phig_mpi(lmtt2,iksnl))
                  
                  Do is1=1, ndim_spinor
                     Do is2=1, ndim_spinor
                        istmp = ( is1 -1 )*ndim_spinor + is2
                        z1 = dcmplx( compr(ib,iorb1,1,ik+is1-1 ), &
                             &       compi(ib,iorb1,1,ik+is1-1 ) )
                        z2 = dcmplx( compr(ib,iorb2,1,ik+is2-1 ), &
                             &       compi(ib,iorb2,1,ik+is2-1 ) )
                        ztmp = z1 *conjg(z2) *( 1.0d0 +c1 /c2 )
                        dm_ssrep(tau,il,im1,im2,istmp) &
                             &  = dm_ssrep(tau,il,im1,im2,istmp) + ztmp
                     end do
                  end do
               end do
            end do
            
            ismax = 1
            ! ----
            Do tau=1, ntau
               Do il=1, lmax
                  immax = 2 *il -1
                  Do is=1, ndim_magmom
                     zwk = 0.0d0
                     
                     Do im1=1, immax
                        Do im2=1, immax
                           Do is1=1, ndim_spinor
                              Do is2=1, ndim_spinor
                                 ztmp = 0.0d0
                                 Do is3=1, ndim_spinor
                                    istmp = ( is1 -1 )*ndim_spinor + is3
                                    ztmp = ztmp +dm_ssrep(tau,il,im1,im2,istmp) &
                                         &      *PauliMatrix(is,is3,is2)
                                 End Do
                                 zwk(im1+immax*(is1-1),im2+immax*(is2-1)) = ztmp
                              End Do
                           End Do
                        End Do
                     ENd Do
                     Do im1=1, immax *ndim_spinor
                        ztmp = 0.0d0
                        DO is2=1, ndim_spinor
                           DO is3=1, ndim_spinor
                              Do im2=1, immax
                                 Do im3=1, immax
                                    z1 = porb_rot_matrix_cmplx(ia,il,immax*(is2-1)+im2,im1)
                                    z2 = zwk(immax*(is2-1)+im2,immax*(is3-1)+im3) 
                                    z3 = porb_rot_matrix_cmplx(ia,il,immax*(is3-1)+im3,im1)
                                    ztmp = ztmp +conjg(z1) *z2 *z3
                                 End Do
                              End Do
                           End Do
                        End DO
                        porb0( tau, il, im1, is ) = ztmp
                     End Do
                  End Do
               End Do
            End Do
            
            Do lmt=1,ilmt_phi(it)
               il = ltp_phi(lmt,it);  im1 = mtp_phi(lmt,it);  iorb1 = lmta_phi(lmt,ia)
               tau = taup_phi(lmt,it)
               Do is1=1, ndim_spinor
                  im2 = (im1 -1)*ndim_spinor +is1
                  iorb2 = (iorb1 -1)*ndim_spinor +is1
                  porb( ib,iorb2 ) = porb0( tau, il,im2,1 )
               ENd do
            End Do
         End do
      End do ibloop
      deallocate( dm_ssrep ); deallocate( zwk ); deallocate( porb0 )

    end subroutine set_porb_mode2

    subroutine case_mode1
      integer :: ik, iorb, ia, il, im, tau, ib
      real(kind=DP), allocatable :: porb(:,:)

      if ( mype /= 0 ) return

      allocate( porb( neg,nlmta_phi) )

      do ik = 1, kv3, ndim_spinor
         write(nfwfk_orb_proj,'(A)') "================= "
         write(nfwfk_orb_proj,'(A,I6,A,3F16.8,A)') 'ik = ', &
              &            ik +nk_in_the_process -1, " ( ", vkxyz(ik,1:3,BUCS), " )"
         call set_porb_mode1( ik, porb )

         do iorb = 1,nlmta_phi
            call m_PP_tell_iorb_ia_l_m_tau(iorb,ia,il,im,tau)
            if ( iproj_group(ia) == 0) cycle

            write(nfwfk_orb_proj,'(I5,3I3,A)') ia, il-1, im, tau, ' : ia, l, m'', tau'
            write(nfwfk_orb_proj,'(4F18.10)') ( porb( neordr(ib,ik),iorb ), ib=1, neg_t )
         end do
         write(nfwfk_orb_proj,*)
      end do
      deallocate( porb )

    end subroutine case_mode1

    subroutine case_mode2
      integer :: ik, iorb1, ia, il, im, tau
      integer :: im0, iorb0, my_l, ib
      real(kind=DP) :: val_j, val_mj
      real(kind=DP), allocatable :: porb(:,:)

      if ( mype /= 0 ) return

      allocate( porb( neg,nlmta_phi*ndim_spinor ) )

      do ik = 1, kv3, ndim_spinor         
         write(nfwfk_orb_proj,'(A)') "================= "
         write(nfwfk_orb_proj,'(A,I6,A,3F16.8,A)') 'ik = ', &
              &            ik +nk_in_the_process -1, " ( ", vkxyz(ik,1:3,BUCS), " )"
         
         call set_porb_mode2( ik, porb )

         do iorb1 = 1, nlmta_phi *ndim_spinor
            iorb0 = int( (iorb1-1)/kfac )+1
            call m_PP_tell_iorb_ia_l_m_tau(iorb0,ia,il,im0,tau)
            if ( iproj_group(ia) == 0) cycle

            im = ( im0 -1 )*ndim_spinor +mod( iorb1 -1, ndim_spinor ) +1

            if ( population_diag_mode == DIAG_LS ) then
               my_l = il -1
               if ( my_l > 0 ) then
                  if ( im <= 2*my_l ) then
                     val_j = my_l -0.5d0;     val_mj = -val_j +(im -1)
                  else
                     val_j = my_l +0.5d0;     val_mj = -val_j +(im -2*my_l -1)
                  endif
               else
                  val_j = my_l +0.5d0;        val_mj = -val_j +(im -2*my_l -1)
               endif
               write(nfwfk_orb_proj,'(I5,3I3,A,A,F4.1,A,F4.1,A)') &
                    &                ia, il-1, im, tau, &
                       &                ' : ia, l, ms'', tau', &
                       &                "     ( j= ", val_j, "  mj= ", val_mj, " )"
            else
               write(nfwfk_orb_proj,'(I5,3I3,A)') &
                    &                ia, il-1, im, tau, ' : ia, l, ms'', tau'
            endif
            write(nfwfk_orb_proj,'(4F18.10)') &
                 &              ( porb( neordr(ib,ik),iorb1 ), ib=1, neg_t )
         End Do
         write(nfwfk_orb_proj,*)
      end do
      deallocate( porb )

    end subroutine case_mode2

    subroutine case_ordinal
      integer :: ik, iksnl, iorb, ia, is, ib
      integer :: il, im, tau, lmtt
      real(kind=DP), allocatable :: porb(:)

      if ( mype /= 0 ) return

      allocate( porb(neg) )

      do ik = 1, kv3, ndim_spinor
         iksnl = (ik-1)/nspin + 1
         
         write(nfwfk_orb_proj,'(A)') "================= "
         write(nfwfk_orb_proj,'(A,I6,A,3F16.8,A)') 'ik = ', &
              &            ik +nk_in_the_process -1, " ( ", vkxyz(ik,1:3,BUCS), " )"
         
         do iorb = 1,nlmta_phi
            call m_PP_tell_iorb_ia_l_m_tau(iorb,ia,il,im,tau)
            if ( iproj_group(ia) == 0) cycle

            write(nfwfk_orb_proj,'(I5,3I3,A)') ia, il-1, im, tau, ' : ia, l, m, tau'
            
            call m_PP_tell_iorb_lmtt(iorb,lmtt)
            
            porb = 0.0d0
            Do is=1, ndim_spinor
               do ib = 1, neg
                  porb(ib) = porb(ib) &
                       & + ( compr(ib,iorb,1,ik+is-1)**2 &
                       &    +compi(ib,iorb,1,ik+is-1)**2 ) &
                       &     *( 1.d0+qorb(iorb)/norm_phig_mpi(lmtt,iksnl) )
               end do
            End Do
            write(nfwfk_orb_proj,'(4F18.10)') ( porb( neordr(ib,ik) ), ib=1, neg_t )
         end do
         write(nfwfk_orb_proj,*)
      end do
      deallocate( porb )

    end subroutine case_ordinal

    subroutine case_spinmixed_basis
      integer :: ik, iksnl, iorb, ia, is, ib
      integer :: iorb0, il, im, tau, lmtt, my_l, im0
      real(kind=DP) :: val_j, val_mj
      complex(kind=CMPLDP) :: z1, z2

      real(kind=DP), allocatable :: porb(:)

      if ( mype /= 0 ) return

      allocate( porb(neg) )

      do ik = 1, kv3, ndim_spinor
         iksnl = (ik-1)/nspin + 1
         
         write(nfwfk_orb_proj,'(A)') "================= "
         write(nfwfk_orb_proj,'(A,I6,A,3F16.8,A)') 'ik = ', &
              &            ik +nk_in_the_process -1, " ( ", vkxyz(ik,1:3,BUCS), " )"
         
         do iorb = 1, nlmta_phi *ndim_spinor
            iorb0 = int( (iorb-1)/ndim_spinor )+1
            call m_PP_tell_iorb_ia_l_m_tau(iorb0,ia,il,im0,tau)
            if ( iproj_group(ia) == 0) cycle

            im = ( im0 -1 )*ndim_spinor +mod( iorb -1, ndim_spinor ) +1

            if ( sw_diagonalize_population == ON ) then
               if ( population_diag_mode == DIAG_LS ) then
                  my_l = il -1
                  if ( my_l > 0 ) then
                     if ( im <= 2*my_l ) then
                        val_j = my_l -0.5d0;    val_mj = -val_j +(im -1)
                     else
                        val_j = my_l +0.5d0;    val_mj = -val_j +(im -2*my_l -1)
                     endif
                  else
                     val_j = my_l +0.5d0;       val_mj = -val_j +(im -2*my_l -1)
                  endif
               endif
            endif

            call m_PP_tell_iorb_lmtt(iorb0,lmtt)

            if ( population_diag_mode == DIAG_LS ) then
               write(nfwfk_orb_proj,'(I5,3I3,A,A,F4.1,A,F4.1,A)') &
                    &                ia, il-1, im, tau, &
                    &                ' : ia, l, ms'', tau', &
                    &                "     ( j= ", val_j, "  mj= ", val_mj, " )"
            else
               write(nfwfk_orb_proj,'(I5,3I3,A)') &
                    &                ia, il-1, im, tau, ' : ia, l, ms'', tau'
            endif
            
            porb = 0.0d0
            do ib = 1, neg
               z1 = dcmplx( compr(ib,iorb,1,ik ), &
                    &       compi(ib,iorb,1,ik ) )
               z2 = dcmplx( compr(ib,iorb,1,ik+1 ), &
                    &       compi(ib,iorb,1,ik+1 ) )
               porb(ib) = porb(ib) &
                    &   + (z1+z2)*conjg(z1+z2) &
                    &     *( 1.d0+qorb(iorb0)/norm_phig_mpi(lmtt,iksnl) )
            end do
            write(nfwfk_orb_proj,'(4F18.10)') ( porb( neordr(ib,ik) ), ib=1, neg_t )
         end do
         write(nfwfk_orb_proj,*)
      end do
      deallocate( porb )

    end subroutine case_spinmixed_basis
    
    subroutine case_with_j
      integer :: ik, iksnl, ia, ig, ii, it, ilp, ll, tau, ip
      integer :: iorb, lmtt1, ib, m1
      real(kind=DP) :: c1, c2
      complex(kind=CMPLDP) :: z1, z2

      real(kind=DP), allocatable :: porb(:)
      complex(kind=CMPLDP), allocatable :: zcomp(:,:,:)

! ----------------
      if ( mype /= 0 ) return

      allocate( porb(neg) )

      Do ik=1, kv3, ndim_spinor
         write(nfwfk_orb_proj,'(A)') "================= "
         write(nfwfk_orb_proj,'(A,I6,A,3F16.8,A)') 'ik = ', &
              &            ik +nk_in_the_process -1, " ( ", vkxyz(ik,1:3,BUCS), " )"

         iksnl = ( ik -1 )/nspin +1

         Do ia=1, natm
            ig = iproj_group(ia)
            if ( ig == 0 ) cycle

            do ii=1,num_proj_elems(ig)
               ip = proj_group( ii, ig )
               it = proj_attribute(ip)%ityp
               ilp = proj_attribute(ip)%l +1
               ll = proj_attribute(ip)%l
               tau = proj_attribute(ip)%t
!
               allocate( zcomp( -ll:ll, neg, ndim_spinor ) ); zcomp = 0.0d0
               call tranform_compri_r2c_sph( ia, it, ll, tau, ik, &
                    &                        compr, compi, zcomp )

               if ( ll == 0 ) then
                  call find_iorb_from_lmt( ia, it, ll, 1, tau, iorb )
                  call m_PP_tell_iorb_lmtt( iorb, lmtt1 )

                  write(nfwfk_orb_proj,'(I5,F7.2,I3,F7.2,I3,A)') &
                       &         ia, ll+0.5d0, ll, 0.5d0, tau, ' : ia, j, l, mj, tau'
                  Do ib=1, neg
                     z1 = cmplx( compr(ib,iorb,1,ik),   compi(ib,iorb,1,ik) )
                     z2 = cmplx( compr(ib,iorb,1,ik+1), compi(ib,iorb,1,ik+1) )
                     porb(ib) = ( z1*conjg(z1) +z2*conjg(z2) ) &
                          &     *( 1.d0+qorb(iorb)/norm_phig_mpi( lmtt1, iksnl ) )
                  End Do
                  write(nfwfk_orb_proj,'(4F18.10)') &
                       &               ( porb( neordr(ib,ik) ), ib=1, neg_t )
               else
! j_up
                  Do m1=-ll -1, ll
                     c1 = dble( ll +m1 + 1 ) / dble( 2 *ll +1 )
                     c2 = dble( ll -m1 )     / dble( 2 *ll +1 )
                     c1 = sqrt(c1);  c2 = sqrt(c2)

                     call find_iorb_from_lmt( ia, it, ll, 1, tau, iorb )
                     call m_PP_tell_iorb_lmtt( iorb, lmtt1 )

                     write(nfwfk_orb_proj,'(I5,F7.2,I3,F7.2,I3,A)') &
                          &         ia, ll+0.5d0, ll, m1+0.5d0, tau, &
                          &             ' : ia, j, l, mj, tau'
                     Do ib=1, neg
                        z1 = 0.0d0
                        if ( m1 > -ll -1 ) z1 = z1 +c1 *zcomp( m1,    ib, 1 )
                        if ( m1 < ll     ) z1 = z1 +c2 *zcomp( m1 +1, ib, 2 )
                        porb(ib) = z1 *conjg(z1) &
                             &     *( 1.d0+qorb(iorb)/norm_phig_mpi(lmtt1,iksnl) )
                     End Do
                     write(nfwfk_orb_proj,'(4F18.10)') &
                          &         ( porb( neordr(ib,ik) ), ib=1, neg_t )
                  End Do
! j_down
                  Do m1=-ll+1, ll
                     c1 = dble( ll -m1 + 1 ) / dble( 2 *ll +1 )
                     c2 = dble( ll +m1 )     / dble( 2 *ll +1 )
                     c1 = sqrt(c1);  c2 = -sqrt(c2)

                     call find_iorb_from_lmt( ia, it, ll, 1, tau, iorb )
                     call m_PP_tell_iorb_lmtt( iorb, lmtt1 )

                     write(nfwfk_orb_proj,'(I5,F7.2,I3,F7.2,I3,A)') &
                          &         ia, ll-0.5d0, ll, m1-0.5d0, tau, &
                          &             ' : ia, j, l, mj, tau'
                     Do ib=1, neg
                        z1 = c1 *zcomp( m1-1, ib, 1 ) +c2 *zcomp( m1, ib, 2 )
                        porb(ib) = z1 *conjg(z1) &
                             &     *( 1.d0+qorb(iorb)/norm_phig_mpi(lmtt1,iksnl) )
                     End Do
                     write(nfwfk_orb_proj,'(4F18.10)') &
                          &         ( porb( neordr(ib,ik) ), ib=1, neg_t )
                  End Do
               end if
               deallocate( zcomp )

            End Do
         End Do
         write(nfwfk_orb_proj,*)

      End Do
      deallocate( porb )

    end subroutine case_with_j

    subroutine tranform_compri_r2c_sph( ia, it, ll, tau, ik, &
         &                              compr, compi, zcomp )
      integer, intent(in) :: ia, it, ll, tau, ik
      real(kind=DP), intent(in) :: compr( neg, nlmta_phi, 1, kv3 )
      real(kind=DP), intent(in) :: compi( neg, nlmta_phi, 1, kv3 )
      complex(kind=CMPLDP), intent(out) :: zcomp( -ll:ll, neg, ndim_spinor )

      integer :: m1, m2, ib, iorb
      complex(kind=CMPLDP) :: z1
      complex(kind=CMPLDP) :: ztmp( ndim_spinor )

      Do m1=-ll, ll
         Do m2=1, 2*ll +1
            call find_iorb_from_lmt( ia, it, ll, m2, tau, iorb )
            Do ib=1, neg
               ztmp(1) = cmplx( compr(ib,iorb,1,ik),   compi(ib,iorb,1,ik)   )
               ztmp(2) = cmplx( compr(ib,iorb,1,ik+1), compi(ib,iorb,1,ik+1) )

               if ( ll == 0 ) z1 = MatU_ylm_RC_L0( m2, m1 )
               if ( ll == 1 ) z1 = MatU_ylm_RC_L1( m2, m1 )
               if ( ll == 2 ) z1 = MatU_ylm_RC_L2( m2, m1 )
               if ( ll == 3 ) z1 = MatU_ylm_RC_L3( m2, m1 )

               zcomp(m1,ib,:) = zcomp(m1,ib,:) +z1 *ztmp(:)
            End Do
         End Do
      End Do
    end subroutine tranform_compri_r2c_sph

    subroutine find_iorb_from_lmt( ia, it, ll, mm, tau, iorb )
      integer, intent(in) :: ia, it, ll, mm, tau
      integer, intent(out) :: iorb

      integer :: lmt1, l1, m1, t1

      iorb = 0
      Do lmt1=1, ilmt_phi(it)
         l1 = ltp_phi(lmt1,it); m1 = mtp_phi(lmt1,it);  t1 = taup_phi(lmt1,it)
         if ( l1 == ll +1 .and. m1 == mm .and. t1 == tau ) then
            exit
         endif
      ENd Do
      iorb = lmta_phi( lmt1,ia )

    end subroutine find_iorb_from_lmt

  end subroutine m_ESIO_wd_Wfn_orb_proj

  subroutine m_ESIO_wd_EigenValues_bxsf
    integer :: ik, ib,jb,ibo,jbo, neg_t
    integer :: lun, nbz_mesh(3), ispin
    integer :: kv3_wk

    integer, allocatable :: neordr_t(:,:), kvtab(:)
    real(kind=DP), parameter :: delta = 1.d-12

    if ( ekmode == ON ) then
       allocate(neordr_t(neg,kv3_ek));   neordr_t = 0
       neg_t = neg
       if(neg_is_enlarged) neg_t = neg -num_extra_bands

       do ik = 1, kv3_ek, ndim_spinor
          if (nspin == 1 .or. (nspin == 2 .and. mod(ik,2) == 1)) &
               & neordr_t(1:neg,ik) = (/(ib,ib=1,neg)/)
          do ib = 1, neg-1
             do jb = ib+1, neg
                ibo = neordr_t(ib,ik);  jbo = neordr_t(jb,ik)
                if ( eko_ek(jbo,ik) < eko_ek(ibo,ik)-delta ) then        ! MPI
                   neordr_t(jb,ik) = ibo;    neordr_t(ib,ik) = jbo
                end if
             end do
          end do
       end do
    else
       write(*,*) "ekmode == Off is not supported "
       stop
    endif

    call m_Kp_get_nkmesh( nbz_mesh )
    kv3_wk = ( nbz_mesh(1) +1 )*( nbz_mesh(2) +1 )*( nbz_mesh(3) +1 ) *nspin

    allocate(kvtab(kv3_wk));  kvtab = 0
    call m_Kp_get_kptable_bxsf( kv3_wk, kvtab, 1 )

    lun = 360
    Do ispin=1, nspin, ndim_spinor
       if ( nspin == 1 .or. ndim_spinor == 2 ) then
          Open( unit=lun, file="./myband.bxsf", status="unknown", form="formatted" )
       else
          if ( ispin == 1 ) then
             Open( unit=lun, file="./myband_up.bxsf", status="unknown", &
                  &          form="formatted" )
          else
             Open( unit=lun, file="./myband_down.bxsf", status="unknown", &
                  &          form="formatted" )
          endif
       endif
       call print_header( lun );  call print_body( lun );   call print_tail( lun )
       close( lun )
    End Do

    deallocate(kvtab)
    if ( allocated(neordr_t) ) deallocate(neordr_t);

  contains

    subroutine print_header( lun )
      integer, intent(in) :: lun

      write( lun, '(A)') "BEGIN_INFO"
      write( lun, '(A)') "  #"
      write( lun, '(A)') "  # this is a Band-XCRYSDEN-Structure-File"
      write( lun, '(A)') "  #  aimed for Visualization of Fermi Surface"
      write( lun, '(A)') "  #"
      write( lun, '(A,F20.15)') "    Fermi Energy:",  0.0d0
      write( lun, '(A)') "END_INFO"
      write( lun, * )
      write( lun, '(A)') "BEGIN_BLOCK_BANDGRID_3D"
      write( lun, '(A)') "  from_phase/0"
      write( lun, '(A)') "  BEGIN_BANDGRID_3D_fermi"
      write( lun, '(I6)') neg_t
      write( lun, '(3I6)') nbz_mesh(1:3) +1
      write( lun, '(3F10.2)') 0.0, 0.0, 0.0
      write( lun, '(3F12.8)') rltv(1:3,1) /Bohr
      write( lun, '(3F12.8)') rltv(1:3,2) /Bohr
      write( lun, '(3F12.8)') rltv(1:3,3) /Bohr
    end subroutine print_header

    subroutine print_body( lun )
      integer, intent(in) :: lun
      integer ib, ik

      if ( ekmode == ON ) then
         if ( ndim_spinor == 1 ) then
            Do ib=1, neg_t
               write( lun, '(A,I8)') "    BAND: ", ib
               write( lun, '(5F15.8)') &
                    &   ( ( eko_ek( neordr_t(ib,kvtab(ik)),kvtab(ik) )-efermi ) &
                    &         *Hartree,  ik=ispin,kv3_wk,nspin )
            End Do
         else
            Do ib=1, neg_t
               write( lun, '(A,I8)') "    BAND: ", ib
               write( lun, '(5F15.8)') &
                    &   ( ( eko_ek( neordr_t(ib,kvtab(ik)),kvtab(ik) ) -efermi ) &
                    &          *Hartree, ik=ispin,kv3_wk,nspin )
            End Do
         endif
      endif
    end subroutine print_body

    subroutine print_tail( lun )
      integer, intent(in) :: lun
       write( lun, '(A)') "  END_BANDGRID_3D_fermi"
       write( lun, '(A)') "END_BLOCK_BANDGRID_3D"
    end subroutine print_tail

  end subroutine m_ESIO_wd_EigenValues_bxsf

  subroutine m_ESIO_wd_EigenValues_frmsf
    integer :: ik, ib,jb,ibo,jbo, neg_t
    integer :: lun, nbz_mesh(3), ispin
    integer :: kv3_wk

    integer, allocatable :: neordr_t(:,:), kvtab(:)
    real(kind=DP), parameter :: delta = 1.d-12

    if ( ekmode == ON ) then
       allocate(neordr_t(neg,kv3_ek));   neordr_t = 0
       neg_t = neg
       if(neg_is_enlarged) neg_t = neg -num_extra_bands

       do ik = 1, kv3_ek, ndim_spinor
          if (nspin == 1 .or. (nspin == 2 .and. mod(ik,2) == 1)) &
               & neordr_t(1:neg,ik) = (/(ib,ib=1,neg)/)
          do ib = 1, neg-1
             do jb = ib+1, neg
                ibo = neordr_t(ib,ik);  jbo = neordr_t(jb,ik)
                if ( eko_ek(jbo,ik) < eko_ek(ibo,ik)-delta ) then        ! MPI
                   neordr_t(jb,ik) = ibo;    neordr_t(ib,ik) = jbo
                end if
             end do
          end do
       end do
    else
       write(*,*) "ekmode == Off is not supported "
       stop
    endif

    call m_Kp_get_nkmesh( nbz_mesh )
    kv3_wk = ( nbz_mesh(1) )*( nbz_mesh(2) )*( nbz_mesh(3) ) *nspin

    allocate(kvtab(kv3_wk));  kvtab = 0
    call m_Kp_get_kptable_bxsf( kv3_wk, kvtab, 0 )

    lun = 360
    Do ispin=1, nspin, ndim_spinor
       if ( nspin == 1 .or. ndim_spinor == 2 ) then
          Open( unit=lun, file="./myband.frmsf", status="unknown", form="formatted" )
       else
          if ( ispin == 1 ) then
             Open( unit=lun, file="./myband_up.frmsf", status="unknown", &
                  &          form="formatted" )
          else
             Open( unit=lun, file="./myband_down.frmsf", status="unknown", &
                  &          form="formatted" )
          endif
       endif
       call print_header( lun );  call print_body( lun );   call print_tail( lun )
       close( lun )
    End Do

    deallocate(kvtab)
    if ( allocated(neordr_t) ) deallocate(neordr_t);

  contains

    subroutine print_header( lun )
      integer, intent(in) :: lun

      write( lun, '(3I6)') nbz_mesh(1:3)
      write( lun, '(I6)') 1
      write( lun, '(I6)') neg_t
      write( lun, '(3F12.8)') rltv(1:3,1) /Bohr
      write( lun, '(3F12.8)') rltv(1:3,2) /Bohr
      write( lun, '(3F12.8)') rltv(1:3,3) /Bohr
    end subroutine print_header

    subroutine print_body( lun )
      integer, intent(in) :: lun
      integer ib, ik

      if ( ekmode == ON ) then
         if ( ndim_spinor == 1 ) then
            Do ib=1, neg_t
               write( lun, '(5F15.8)') &
                    &   ( ( eko_ek( neordr_t(ib,kvtab(ik)),kvtab(ik) )-efermi ) &
                    &         *Hartree,  ik=ispin,kv3_wk,nspin )
            End Do
         else
            Do ib=1, neg_t
               write( lun, '(5F15.8)') &
                    &   ( ( eko_ek( neordr_t(ib,kvtab(ik)),kvtab(ik) ) -efermi ) &
                    &          *Hartree, ik=ispin,kv3_wk,nspin )
            End Do
         endif
      endif
    end subroutine print_body

    subroutine print_tail( lun )
      integer, intent(in) :: lun
    end subroutine print_tail

  end subroutine m_ESIO_wd_EigenValues_frmsf

  subroutine m_ESIO_wd_phirt2_rotated
    use m_PseudoPotential,  only : nlmt_phi, ilmt_phi, ltp_phi, mtp_phi, taup_phi, &
         &                         phirt, xh, radr_paw, rmax, nmesh
    use m_Ionic_System,  only : cps, pos
    use m_Electronic_Structure,  only : porb_rot_matrix_cmplx, porb_rot_matrix_real
    use m_Control_Parameters,  only : population_diag_mode
    use m_Const_Parameters,   only : DIAG_CHARGE_DENSITY_MATRIX, &
         &                           DIAG_SPIN_DENSITY_MATRIX, DIAG_LS_with_t2g_octa
!
    call change_of_coordinate_system(altv,pos,natm,natm,cps)

    select case( population_diag_mode )
    case ( DIAG_CHARGE_DENSITY_MATRIX )
       call case_mode1
    case ( DIAG_SPIN_DENSITY_MATRIX )
       call case_mode2
    case ( DIAG_LS_with_t2g_octa )
       call case_mode2
    end select

  contains

    subroutine case_mode1
      integer :: ia, it
      integer :: lmt1, il1, im1, tau1, lmt2, il2, im2, tau2
      real(kind=DP) :: rcut, dx, dy, dz
      real(kind=DP) :: origin(3), vec_span(3,3)
      real(kind=DP), allocatable :: phi_on_mesh(:,:,:,:)
      real(kind=DP), allocatable :: phi2_on_mesh(:,:,:)

      real(kind=DP), allocatable :: phi_on_mesh_rot(:,:,:)

      integer :: lun = 4000
      integer :: nxmax, nymax, nzmax

      nxmax = 10;     nymax = 10;     nzmax = 10

      Do ia=1, natm
         it = ityp(ia)
         if (iproj_group(ia) == 0) cycle

         rcut = 3.0d0
         dx = rcut /nxmax;        dy = rcut /nymax;        dz = rcut /nzmax

         origin(1:3) = cps(ia,1:3) -rcut
         vec_span = 0.0d0
         vec_span(1,1) = rcut *2;   vec_span(2,2) = rcut *2;   vec_span(3,3) = rcut *2

         allocate( phi_on_mesh(nlmt_phi,-nxmax:nxmax,-nymax:nymax,-nzmax:nzmax) )
         allocate( phi_on_mesh_rot(-nxmax:nxmax,-nymax:nymax,-nzmax:nzmax) )
         allocate( phi2_on_mesh(-nxmax:nxmax,-nymax:nymax,-nzmax:nzmax) )

         call set_phirt_on_mesh( it, -nxmax, nxmax, -nymax, nymax, -nzmax, nzmax, &
              &                  dx, dy, dz, phi_on_mesh )
         ! -
         do lmt1=1,ilmt_phi(it)
            il1 = ltp_phi(lmt1,it);  im1 = mtp_phi(lmt1,it);  tau1 = taup_phi(lmt1,it)

            phi_on_mesh_rot = 0.0d0

            Do lmt2=1, ilmt_phi(it)
               il2 = ltp_phi(lmt2,it);  im2 = mtp_phi(lmt2,it);  tau2 = taup_phi(lmt2,it)
               if ( il1 /= il2 ) cycle
               if ( tau1 /= tau2 ) cycle
               phi_on_mesh_rot(:,:,:) = phi_on_mesh_rot(:,:,:) &
                    &                  +porb_rot_matrix_real(ia,il1,im2,im1) &
                    &                   *phi_on_mesh(lmt2,:,:,:)
            End Do
            phi2_on_mesh(:,:,:) = phi_on_mesh_rot(:,:,:)  &
                 &               *phi_on_mesh_rot(:,:,:)

            if ( mype == 0 ) then
               call printout_phi2( lun, ia, il1, im1, tau1, &
                    &             -nxmax, nxmax, -nymax, nymax, -nzmax, nzmax, &
                    &              origin, vec_span, phi2_on_mesh, 1 )
            endif
         End do
         deallocate( phi_on_mesh );       deallocate( phi_on_mesh_rot )
         deallocate( phi2_on_mesh )
      End Do

    end subroutine case_mode1

    subroutine case_mode2
      integer :: ia, it
      integer :: lmt1, il1, im1, tau1, lmt2, il2, im2, tau2
      integer :: j1, j2, is1, is2, immax
      real(kind=DP) :: rcut, dx, dy, dz
      real(kind=DP) :: origin(3), vec_span(3,3)
      real(kind=DP), allocatable :: phi_on_mesh(:,:,:,:)
      real(kind=DP), allocatable :: phi2_on_mesh(:,:,:,:)
      complex(kind=CMPLDP), allocatable :: phi_on_mesh_rot(:,:,:,:)

      integer :: lun = 4000
      integer :: nxmax, nymax, nzmax

      nxmax = 10;     nymax = 10;     nzmax = 10

      Do ia=1, natm
         it = ityp(ia)
         if (iproj_group(ia) == 0) cycle

         rcut = 3.0d0
         dx = rcut /nxmax;        dy = rcut /nymax;        dz = rcut /nzmax

         origin(1:3) = cps(ia,1:3) -rcut
         vec_span = 0.0d0
         vec_span(1,1) = rcut *2;   vec_span(2,2) = rcut *2;   vec_span(3,3) = rcut *2

         allocate( phi_on_mesh(nlmt_phi,-nxmax:nxmax,-nymax:nymax,-nzmax:nzmax) )
         allocate( phi_on_mesh_rot(-nxmax:nxmax,-nymax:nymax,-nzmax:nzmax,ndim_spinor) )
         allocate( phi2_on_mesh(-nxmax:nxmax,-nymax:nymax,-nzmax:nzmax,ndim_spinor) )

         call set_phirt_on_mesh( it, -nxmax, nxmax, -nymax, nymax, -nzmax, nzmax, &
              &                  dx, dy, dz, phi_on_mesh )
         ! -
         do lmt1=1,ilmt_phi(it)
            il1 = ltp_phi(lmt1,it);  im1 = mtp_phi(lmt1,it);  tau1 = taup_phi(lmt1,it)

            if ( population_diag_mode == DIAG_LS_with_t2g_octa ) then
               if ( il1 /= 2 +1 ) cycle
            endif

            Do is1=1, ndim_spinor
               phi_on_mesh_rot = 0.0d0

               Do lmt2=1, ilmt_phi(it)
                  il2 = ltp_phi(lmt2,it);  im2 = mtp_phi(lmt2,it);
                  tau2 = taup_phi(lmt2,it)
                  immax = 2 *il2 -1
                  if ( il1 /= il2 ) cycle
                  if ( tau1 /= tau2 ) cycle
                  Do is2=1, ndim_spinor
                     j1 = ( im1 -1 )*ndim_spinor +is1
                     j2 = immax *(is2 -1) +im2

                     phi_on_mesh_rot(:,:,:,is2) = phi_on_mesh_rot(:,:,:,is2) &
                       &                        +porb_rot_matrix_cmplx(ia,il1,j2,j1)&
                       &                        *phi_on_mesh(lmt2,:,:,:)
                  End do
               End Do
#if 0
               phi2_on_mesh(:,:,:,1) = conjg( phi_on_mesh_rot(:,:,:,1) ) &
                    &                  *phi_on_mesh_rot(:,:,:,1)
               phi2_on_mesh(:,:,:,2) = conjg( phi_on_mesh_rot(:,:,:,2) ) &
                    &                  *phi_on_mesh_rot(:,:,:,2)
               if ( mype == 0 ) then
                  call printout_phi2( lun, ia, il1, j1, tau1, &
                       &             -nxmax, nxmax, -nymax, nymax, -nzmax, nzmax, &
                       &              origin, vec_span, phi2_on_mesh, 2 )
               endif
#else
               phi2_on_mesh(:,:,:,1) = conjg( phi_on_mesh_rot(:,:,:,1) ) &
                    &                  *phi_on_mesh_rot(:,:,:,1) &
                    &                +conjg( phi_on_mesh_rot(:,:,:,2) ) &
!                    &                -conjg( phi_on_mesh_rot(:,:,:,2) ) &
                    &                  *phi_on_mesh_rot(:,:,:,2)
               if ( mype == 0 ) then
                  call printout_phi2( lun, ia, il1, j1, tau1, &
                       &             -nxmax, nxmax, -nymax, nymax, -nzmax, nzmax, &
                       &              origin, vec_span, phi2_on_mesh, 1 )
               endif
#endif
            End Do
         End do
         deallocate( phi_on_mesh );       deallocate( phi_on_mesh_rot )
         deallocate( phi2_on_mesh )
      End Do

    end subroutine case_mode2

    subroutine printout_phi2( lun, ia, il, im, tau, &
         &                    nxs, nxe, nys, nye, nzs, nze, &
         &                    origin, vec_span, phi2_on_mesh, ncomp )
      integer, intent(in) :: lun, ia, il, im, tau, ncomp
      integer, intent(in) :: nxs, nxe, nys, nye, nzs, nze
      real(kind=DP), intent(in) :: phi2_on_mesh(nxs:nxe,nys:nye,nzs:nze,ncomp)
      real(kind=DP), intent(in) :: origin(3), vec_span(3,3)

      integer :: i, nx, ny, nz
      character*4 char1
      character*1 char2, char4
      character*2, char3
      character*64 file1

      write(char1,'(I4.4)') ia
      write(char2,'(I1.1)') il
      write(char3,'(I2.2)') im
      write(char4,'(I1.1)') tau

      file1 = "phirot_squared.ia_" // char1 // ".il_" // char2 // ".tau_" &
           &                 // char4 // ".no_" // char3 // ".xsf"
      open( unit=lun, file=file1, status="unknown", form="formatted" )

      write(lun,'(A)') 'CRYSTAL'
      write(lun,'(A)') 'PRIMVEC'
      write(lun,'(3F20.10)') altv(1:3,1) *Bohr
      write(lun,'(3F20.10)') altv(1:3,2) *Bohr
      write(lun,'(3F20.10)') altv(1:3,3) *Bohr
      write(lun,'(A)') 'CONVVEC'
      write(lun,'(3F20.10)') altv(1:3,1) *Bohr
      write(lun,'(3F20.10)') altv(1:3,2) *Bohr
      write(lun,'(3F20.10)') altv(1:3,3) *Bohr
      write(lun,'(A)') 'PRIMCOORD'
      write(lun,*) natm, 1
      Do i=1, natm
         write(lun,'(I5,3F20.10)') nint(iatomn(ityp(i))), cps(i,1:3)*Bohr
      End Do
      write(lun,'(A)') "BEGIN_BLOCK_DATAGRID_3D"
      write(lun,'(A,I6,A,I4,A,I4,A,I4,A)') &
           &    "  phi2 on mesh ( ia=", ia, ", il=", il, ", tau=", tau, &
           &                      ", no=", im,  " )"
      write(lun,'(A)') "BEGIN_DATAGRID_3D_1"
      write(lun,*) nxe -nxs +1, nye -nys +1, nze -nzs +1
      write(lun,'(3F20.10)') origin*Bohr
      write(lun,'(3F20.10)') vec_span(1:3,1) *Bohr
      write(lun,'(3F20.10)') vec_span(1:3,2) *Bohr
      write(lun,'(3F20.10)') vec_span(1:3,3) *Bohr
      write(lun,'(5F10.5)') (((phi2_on_mesh(nx,ny,nz,1),nx=nxs,nxe), &
           &                  ny=nys,nye),nz=nzs,nze)
      write(lun,'(A)') "END_DATAGRID_3D"
      if ( ncomp == 2 ) then
        write(lun,'(A)') "BEGIN_DATAGRID_3D_2"
         write(lun,*) nxe -nxs +1, nye -nys +1, nze -nzs +1
         write(lun,'(3F20.10)') origin*Bohr
         write(lun,'(3F20.10)') vec_span(1:3,1) *Bohr
         write(lun,'(3F20.10)') vec_span(1:3,2) *Bohr
         write(lun,'(3F20.10)') vec_span(1:3,3) *Bohr
         write(lun,'(5F10.5)') (((phi2_on_mesh(nx,ny,nz,2),nx=nxs,nxe), &
              &                  ny=nys,nye),nz=nzs,nze)
         write(lun,'(A)') "END_DATAGRID_3D"
      endif
      write(lun,'(A)') "END_BLOCK_DATAGRID_3D"
      close(lun)
    end subroutine printout_phi2

    subroutine set_phirt_on_mesh( it, nxs, nxe, nys, nye, nzs, nze, &
         &                        dx, dy, dz, phi_on_mesh )
      integer, intent(in) :: it, nxs, nxe, nys, nye, nzs, nze
      real(kind=DP), intent(in) :: dx, dy, dz
      real(kind=DP), intent(out) :: phi_on_mesh(nlmt_phi,nxs:nxe,nys:nye,nzs:nze)

      integer :: lmt1, il1, im1, tau1, nx, ny, nz, ind, nspher1
      real(kind=DP) :: cx, cy, cz, dist, ylm1, c1, ctmp, f1, f2
      real(kind=DP) :: dist_min = 1.0D-10

      do lmt1=1,ilmt_phi(it)
         il1 = ltp_phi(lmt1,it); im1 = mtp_phi(lmt1,it); tau1 = taup_phi(lmt1,it)

         Do nz=nzs, nze
            Do ny=nys, nye
               Do nx=nxs, nxe
                  cx = nx *dx;     cy = ny *dy;       cz = nz *dz
                  dist = sqrt(cx**2 +cy**2 +cz**2)

                  if ( dist < dist_min ) then
                     dist = dist_min
                     ind = 1
                     c1 = phirt(ind,il1,tau1,it) /radr_paw(ind,it)
                  else
                     ctmp = log( dist /rmax(it) ) *xh(it) +nmesh(it)
                     ind = int(ctmp)
                     if ( ind < 1 ) then
                        ind = 1
                        c1 = phirt(ind,il1,tau1,it) /radr_paw(ind,it)
                     else
                        f1 = ctmp -int(ctmp)
                        f2 = 1.0d0 -f1
                        c1 = ( phirt(ind,il1,tau1,it)/radr_paw(ind,it) )**f2 &
                             & *( phirt(ind+1,il1,tau1,it)/radr_paw(ind+1,it)) **f1
                     endif
                  endif
                  ! -- ylm --
                  nspher1 = ( il1 -1 )**2 +im1
                  call sphr( 1, nspher1, cx, cy, cz, ylm1 )
                  ! -------
                  phi_on_mesh(lmt1,nx,ny,nz) = c1 *ylm1
               End do
            ENd Do
         End Do
      End do
    end subroutine set_phirt_on_mesh

  end subroutine m_ESIO_wd_phirt2_rotated

end module m_ES_IO
