!=======================================================================
!
!  PROGRAM  PHASE/0 2016.01 ($Rev: 556 $)
!
!  MODULE: m_Ldos
!
!  AUTHOR(S): T. Yamasaki   January/18/2004
!
!  Contact address :  Phase System Consortium
!                     E-mail: phase_system@nims.go.jp URL https://azuma.nims.go.jp
!  
!
!
!=======================================================================
!
!     The original version of this set of the computer programs "PHASE"
!  was developed by the members of the Theory Group of Joint Research
!  Center for Atom Technology (JRCAT), based in Tsukuba, in the period
!  1993-2001.
!
!     Since 2002, this set has been tuned and new functions have been
!  added to it as a part of the national project "Frontier Simulation 
!  Software for Industrial Science (FSIS)",  which is supported by
!  the IT program of the Ministry of Education, Culture, Sports,
!  Science and Technology (MEXT) of Japan. 
!     Since 2006, this program set has been developed as a part of the
!  national project "Revolutionary Simulation Software (RSS21)", which
!  is supported by the next-generation IT program of MEXT of Japan.
!   Since 2013, this program set has been further developed centering on PHASE System
!  Consortium.
!   The activity of development of this program set has been supervised by Takahisa Ohno.
!
module m_Ldos
! $Id: m_Ldos.F90 556 2016-10-14 12:22:06Z ktagami $
  use m_Const_Parameters, only   : DP, CMPLDP, REGULAR_INTERVALS, BY_ATOMIC_POSITIONS &
       &                         , DELTA10, EXECUT, ON, SOFTPART, HARDPART, DIRECT, PAI2 &
       &                         , ALDOS, LAYERDOS, NO, OFF, ELECTRON
  use m_Control_Parameters, only : kimg, sw_aldos, sw_layerdos, crtdst_aldos, naldos_from, naldos_to &
       &                         , slicing_way_winlay, deltaz_winlay, normal_axis_winlay &
       &                         , crtdst_winlay, nspin, neg, ipridos, ekmode, sw_save_ldos_weight &
       &                         , af, printable &
       &                         , m_CtrlP_check_naldos_range, hardpart_subroutine,sw_rspace_ldos
  use m_Files, only              : nfout, nfldos &
       &                         , m_Files_open_nfldos, m_Files_close_nfldos
  use m_Crystal_Structure, only :  altv, rltv, univol
  use m_Ionic_System,   only :     natm2,natm,cps,numlay,ityp,iwei,if_aldos
  use m_Parallelization, only :    ista_e,iend_e,istep_e, np_e, map_z, npes, ierr, mype &
       &                         , map_k, myrank_k, map_ek, ista_kngp, iend_kngp, ista_k, iend_k &
       &                         , nrank_e, nrank_k, myrank_e, map_e, mpi_k_world &
       &                         , mpi_comm_group
  use m_Kpoints, only :            kv3, kv3_ek
  use m_FFT, only :                nfft, nfftp,nfftp_nonpara, fft_box_size_WF, fft_box_size_CD &
       &                         , fft_box_size_CD_nonpara &
       &                         , m_FFT_alloc_CD_box, m_FFT_dealloc_CD_box &
       &                         , m_FFT_CD_inverse0 &
       &                         , m_FFT_alloc_WF_work, m_FFT_dealloc_WF_work
  use m_PlaneWaveBasisSet, only :  kg, igf, ngabc, kgp
  use m_PseudoPotential, only :    modnrm
!$$  use m_Electronic_Structure,only :neordr, totch, efermi &
!$$       &                         , m_ES_WF_in_Rspace &
!$$       &                         , m_ES_wd_zaj_small_portion
  use m_Electronic_Structure,only :neordr, totch, efermi 
  use m_Electronic_Structure,only :m_ES_WF_in_Rspace_3D &
       &                         , m_ES_wd_zaj_small_portion_3D
  use m_Charge_Density, only :     chgq_l, chgq_enl &
       &                         , m_CD_hardpart_sub &
       &                         , m_CD_map_valence_charge_to_fft_box &
       &                         , m_CD_restore_chgq &
       &                         , m_CD_cp_chgq_to_chgqo & !ASMS
       &                         , m_CD_map_chgq_to_fft_box &
       &                         , m_CD_set_ylm_enl_etc &
       &                         , m_CD_dealloc_ylm_enl_etc &
       &                         , m_CD_keep_retrieve_hsr


! ============================== added by K. Tagami ============== 11.0
  use m_Control_Parameters,    only : noncol, ndim_magmom, ndim_spinor, sw_fft_xzy
!  use m_Charge_Density, only :    m_CD_hardpart_sub_noncl, &
!       &                          m_CD_hardpart_sub2_noncl, &
!       &                          m_CD_map_chgqenl_to_fft_box_kt
! ================================================================ 11.0
  use m_Parallelization, only : nel_fft_x, nel_fft_y, nel_fft_z, neg_g &
                              , map_fft_x, map_fft_y                   &
                              , fft_X_x_nel, fft_X_y_nel, fft_X_z_nel  &
                              , mpi_kg_world, mpi_ke_world, myrank_g
  use m_PlaneWaveBasisSet, only :  ngabc_kngp_l
  use m_FFT, only : m_FFT_Direct_3D,m_FFT_Direct_XYZ_3D
  use m_Parallelization, only : mpi_ge_world

  implicit none

  integer, private, target, allocatable, dimension(:,:,:) ::  mesh, meshp
  
  integer ::  nlayer = 0
  integer ::  mlayer = 1
  integer ::  naldos = 0
  integer, private :: naldos_write = 0
  integer ::  maldos = 1
  integer, private ::  n_total_ldoscal = 0
  real(kind=DP),private,allocatable,dimension(:,:)  :: winlay ! d(mlayer,2)
!  real(kind=DP),public,allocatable,dimension(:,:,:,:) :: weiwsc ! d(maldos,neg|np_e,1|ista_k:iend_k,nspin)
!  real(kind=DP),public,allocatable,dimension(:,:,:,:) :: weilay ! d(mlayer,neg|np_e,1|ista_k:iend_k,nspin)
  real(kind=DP),public,allocatable,dimension(:,:,:) :: weiwsc ! d(maldos,neg|np_e,1|ista_k:iend_k)
  real(kind=DP),public,allocatable,dimension(:,:,:) :: weilay ! d(mlayer,neg|np_e,1|ista_k:iend_k)
!  real(kind=DP),private,allocatable,dimension(:) ::    bfft   ! d(nfft)
  real(kind=DP),private :: maxhv, maxv
  real(kind=DP),private :: height
  integer,private,allocatable,dimension(:) :: if_aldos_full ! d(natm2)

! ============================= added by K. Tagami ===================== 11.0
  real(kind=DP),public,allocatable,dimension(:,:,:,:) :: weiwsc_noncl
  real(kind=DP),public,allocatable,dimension(:,:,:,:) :: weilay_noncl
! ====================================================================== 11.0

  integer, parameter :: ilen = 5

  include 'mpif.h'
  integer istatus(mpi_status_size)

contains
  integer function m_Ldos_what_is_n_total_ldos()
!!$    m_Ldos_what_is_n_total_ldos = n_total_ldos
    m_Ldos_what_is_n_total_ldos = naldos_write + mlayer
  end function m_Ldos_what_is_n_total_ldos

  subroutine m_Ldos_preparation()

    if(sw_aldos == ON ) then
       call set_naldos()      ! -> naldos, maldos
       call m_CtrlP_check_naldos_range(nfout,maldos)
!!$       call check_naldos_range()
       call set_if_aldos_full()
       call set_naldos_write()
       call alloc_mesh()
       call fillup_mesh()
    end if
    call set_nlayer()
    if(sw_layerdos == ON ) then
       call alloc_winlay()
       call set_winlay()
!!$       call dealloc_winlay()
    end if
  end subroutine m_Ldos_preparation

  subroutine set_naldos()
    naldos = natm2
    maldos = naldos + 1
  end subroutine set_naldos

!!$  subroutine check_naldos_range()
!!$    if(naldos_from == 0 .and. naldos_to == 0) then
!!$       naldos_from = 1
!!$       naldos_to   = maldos
!!$    else
!!$       if(naldos_from < 1)     naldos_from = 1
!!$       if(naldos_from > maldos) naldos_from = maldos
!!$       if(naldos_to < 1)       naldos_to   = 1
!!$       if(naldos_to > maldos)   naldos_to   = maldos
!!$       if(naldos_to < naldos_from) naldos_to = naldos_from
!!$    end if
!!$    write(nfout,'(" !!ldos naldos_from         = ",i6," <<check_naldos_range>>")') naldos_from
!!$    write(nfout,'(" !!ldos naldos_to           = ",i6," <<check_naldos_range>>")') naldos_to
!!$  end subroutine check_naldos_range

  subroutine set_if_aldos_full()
    integer, allocatable, dimension(:) :: ip_atom
    integer :: i, nb

    allocate(if_aldos_full(naldos_from:naldos_to))
    allocate(ip_atom(natm2+1)); ip_atom = 0

    nb = natm
    do i = 1, natm
       ip_atom(i) = i
       if(iwei(i) == 2) then
          nb = nb + 1
          ip_atom(nb) = i
       end if
    end do

    do i = 1, natm2
       if(ip_atom(i) == 0) then
          if(ipridos>=1) write(nfout,'(" !!ldos i, ip_atom = ",i5,i5)') i,ip_atom(i)
          stop ' ip_atom is illegal <<set_if_aldos_full>>'
       end if
    end do

    if_aldos_full = OFF
    do i = naldos_from, naldos_to
       if(i <= natm2) then
          if_aldos_full(i) = if_aldos(ip_atom(i))
       else
          if_aldos_full(i) = ON
       end if
    end do

    nb = 0
    ip_atom = 0
    do i = naldos_from, naldos_to
       if(if_aldos_full(i) == ON) then
          nb = nb + 1
          ip_atom(nb) = i
       end if
    end do
    if(ipridos>=1) write(nfout,'(" !!ldos aldos_atoms = ",10i5)') (ip_atom(i),i=1,nb)

    deallocate(ip_atom)
  end subroutine set_if_aldos_full

  subroutine set_naldos_write()
    integer :: i
    naldos_write = 0
    do i = naldos_from, naldos_to
       if(if_aldos_full(i) == ON) naldos_write = naldos_write + 1
    end do
    if(ipridos>=1) write(nfout,'(" !!ldos naldos_write = ",i5)') naldos_write
!!$    naldos_write = naldos_to - naldos_from + 1
  end subroutine set_naldos_write
    
  subroutine set_nlayer()
    integer :: i,j, io,jo
    real(kind=DP) :: h
    integer, allocatable, dimension(:) :: layer_index, layer_order

    if(sw_layerdos == ON) then
       call get_height(h)
       height = h
       if(slicing_way_winlay == REGULAR_INTERVALS) then
!!$       call get_height(h)
          if(kimg == 1) h = h*0.5d0
          nlayer = h/deltaz_winlay + (1-DELTA10)
          mlayer = nlayer + 1
       else if(slicing_way_winlay == BY_ATOMIC_POSITIONS) then
          allocate(layer_index(natm))
          nlayer = 1
          layer_index(nlayer) = numlay(1)
          do i = 2, natm
             do j = 1, nlayer
                if(layer_index(j) == numlay(i)) goto 1001
             end do
             nlayer = nlayer + 1
             layer_index(nlayer) = numlay(i)
1001         continue
          end do
          if(minval(layer_index(1:nlayer)) /= 1 .or. maxval(layer_index(1:nlayer)) /= nlayer) then
             allocate(layer_order(nlayer)); layer_order(:) = (/(i,i=1,nlayer)/)
             do i = 1, nlayer-1
                do j = i+1, nlayer
                   io = layer_order(i)
                   jo = layer_order(j)
                   if(layer_index(io) < layer_index(io)) then
                      layer_order(j) = io
                      layer_order(i) = jo
                   end if
                end do
             end do
             if(ipridos>=1) then
                write(nfout,'(" !!ldos i, layer_index, layer_order <<m_Ldos.set_nlayer>>")')
                do i = 1, nlayer
                   write(nfout,'(" !!ldos ",3i8)') i, layer_index(i), layer_order(i)
                end do
             end if
             do i = 1, natm
                do j = 1, nlayer
                   if(layer_index(j) == numlay(i)) then
                      numlay(i) = layer_order(j)
                      goto 1002
                   end if
                end do
1002            continue
             end do
             if(ipridos>=1) then
                write(nfout,'(" !!ldos #atom, numlay <<m_Ldos.set_nlayer>>")')
                do i = 1, natm
                   write(nfout,'(" !!ldos ",3i8)') i, numlay(i)
                end do
             end if
             deallocate(layer_order)
          end if
          deallocate(layer_index)
          mlayer = nlayer + 1
       end if
       if(ipridos>=1) write(nfout,'(" !!ldos nlayer, mlayer = ",2i8)') nlayer, mlayer
    else
       nlayer = 0
       mlayer = 0
    end if
  end subroutine set_nlayer

  subroutine alloc_mesh()
    integer :: id,nd2,nn
    id  = fft_box_size_WF(1,0)
    nd2 = fft_box_size_WF(2,0)
    nn  = fft_box_size_WF(3,1)
    allocate(mesh(id,nd2,nn))
    id  = fft_box_size_CD_nonpara(1,0)
    nd2 = fft_box_size_CD_nonpara(2,0)
    nn  = fft_box_size_CD(3,1)
    allocate(meshp(id,nd2,nn))
  end subroutine alloc_mesh

  subroutine m_Ldos_dealloc_mesh()
    deallocate(mesh)
    deallocate(meshp)
  end subroutine m_Ldos_dealloc_mesh

  subroutine alloc_winlay()
    allocate(winlay(mlayer,2))
  end subroutine alloc_winlay

  subroutine dealloc_winlay()
    deallocate(winlay)
  end subroutine dealloc_winlay

  subroutine fillup_mesh()
    integer :: id, nd2, nl, nm, nn
    real(kind=DP),allocatable,dimension(:,:) :: wk_catoms, cps_full ! d(natm2,3)
    real(kind=DP),allocatable,dimension(:) :: wk_dstnc  ! d(natm2)
    integer, allocatable, dimension(:) ::     wk_ioddst, ityp_full ! d((natm2+1)*4)

    allocate(wk_catoms(natm2,3))
    allocate(wk_dstnc(natm2))
    allocate(wk_ioddst((natm2+1)*4))
    allocate(cps_full(natm2,3))
    allocate(ityp_full(natm2))

    cps_full(1:natm,1:3) = cps(1:natm,1:3)
    ityp_full(1:natm) = ityp(1:natm)
    call rplcps(cps_full,ityp_full,1,natm2,natm,iwei)
        

    id  = fft_box_size_WF(1,0)
    nd2 = fft_box_size_WF(2,0)
    nl  = fft_box_size_WF(1,1)
    nm  = fft_box_size_WF(2,1)
    nn  = fft_box_size_WF(3,1)
    call anlmes(nfout,ipridos,mesh,id,nd2,nl,nm,nn,altv,rltv,cps_full,natm2,crtdst_aldos,DELTA10,wk_catoms,wk_ioddst,wk_dstnc)
    id  = fft_box_size_CD_nonpara(1,0)
    nd2 = fft_box_size_CD_nonpara(2,0)
    nl  = fft_box_size_CD(1,1)
    nm  = fft_box_size_CD(2,1)
    nn   = fft_box_size_CD(3,1)
    call anlmes(nfout,ipridos-1,meshp,id,nd2,nl,nm,nn,altv,rltv,cps_full,natm2,crtdst_aldos,DELTA10,wk_catoms,wk_ioddst,wk_dstnc)

    deallocate(ityp_full,cps_full,wk_ioddst,wk_dstnc,wk_catoms)
  end subroutine fillup_mesh

  subroutine get_height(h)
    real(kind=DP), intent(out) :: h
    integer :: iaxis,i2,i3
    real(kind=DP) :: a,b,S

    iaxis = normal_axis_winlay
    i2 = iaxis + 1
    if(i2 >= 4) i2 = i2 - 3
    i3 = i2 + 1
    if(i3 >= 4) i3 = i3 - 3
    a = dsqrt(altv(1,i2)**2 + altv(2,i2)**2 + altv(3,i2)**2)
    b = dsqrt(altv(1,i3)**2 + altv(2,i3)**2 + altv(3,i3)**2)
! ----------------------------
!        29 Jan. 2008
!     Revised by M. Usami
!!$    S = a*a * b*b - (altv(1,i2)*altv(1,i3)+altv(2,i2)*altv(2,i3)+altv(3,i2)*altv(3,i3))
    S = a*a * b*b - (altv(1,i2)*altv(1,i3)+altv(2,i2)*altv(2,i3)+altv(3,i2)*altv(3,i3))**2
! --------------------------
    S = dsqrt(S)
    h = univol/S
    if(ipridos>=1) write(nfout,'(" !!ldos S, h, V = ",3f16.8," <<m_Ldos.set_nlayer>>")') S,h,univol
  end subroutine get_height

  subroutine set_winlay()
! ----------------------------------------------
!       Nov. 17th 1992 by T.Yamasaki
!
!    the original subroutine name was "dwinly2"
!
!       modified by T. Yamasaki, 09th Feb 2004
! ----------------------------------------------
    real(kind=DP) :: zmin, tmp, h
    integer ::  i, nl
    i = normal_axis_winlay
!!$    call get_height(h)
    h = height
    if(kimg == 1) then
       maxhv = h*0.5d0
       maxv  = maxhv*2
    else
       maxhv = h
       maxv  = maxhv
    end if

    winlay = 0.d0
    if(slicing_way_winlay == REGULAR_INTERVALS) then
       if(ipridos>=1) then
          write(nfout,'(" !!ldos slicing_way = REGULAR_INTERVALS")')
          write(nfout,'(" !!ldos nlayer, mlayer = ",2i6)') nlayer, mlayer
          write(nfout,'(" !!ldos maxhv, maxv = ",2f8.4)') maxhv, maxv
          write(nfout,'(" !!deltaz_winlay = ",f8.4)') deltaz_winlay
       end if
       if(kimg == 1) then
          winlay(1,1) = 0.d0
          winlay(mlayer,2) = maxhv
          do i = 1, nlayer
             winlay(i,2) = deltaz_winlay * i
             if(winlay(i,2) > maxhv) winlay(i,2) = maxhv
             if(i+1 <= mlayer) winlay(i+1,1) = winlay(i,2)
          end do
       else if(kimg == 2) then
          zmin = minval(cps(1:natm,normal_axis_winlay))
          winlay(1,1) = zmin - crtdst_winlay
          winlay(mlayer,2) = winlay(1,1) + maxv
          do i = 1, nlayer
             winlay(i,2) = winlay(i,1) + deltaz_winlay
             if(winlay(i,2) > winlay(mlayer,2)) winlay(i,2) = winlay(mlayer,2)
             if(i+1 <= mlayer) winlay(i+1,1) = winlay(i,2)
          end do
       end if
    else if(slicing_way_winlay == BY_ATOMIC_POSITIONS) then
       if(ipridos>=1) &
            & write(nfout,'(" !!ldos slicing_selection = BY_ATOMIC_POSITIONS")')

       do i = 1, mlayer
          winlay(i,1) = +maxv*30
          winlay(i,2) = -maxv*30
       end do

       do i = 1, natm
          nl = numlay(i)
          if(winlay(nl,1) > cps(i,normal_axis_winlay)) winlay(nl,1) = cps(i,normal_axis_winlay)
          if(winlay(nl,2) < cps(i,normal_axis_winlay)) winlay(nl,2) = cps(i,normal_axis_winlay)
       end do

       if(ipridos>=1) then
          write(nfout,'(" !!ldos a range of atomic positions of each layer ")')
          do i = 1, nlayer
             write(nfout,'(2x,i4,2f20.8)') i, winlay(i,1), winlay(i,2)
          end do
       end if

       tmp = winlay(1,1) - crtdst_winlay
       if(kimg == 2) then
          if( tmp < winlay(nlayer,2) - maxhv + crtdst_winlay ) then
!     ( a unit cell has no vacuum region )
             winlay(1,1) = (winlay(1,1) + winlay(nlayer,2) - maxv)*0.5
             winlay(nlayer,2) = winlay(1,1)+maxv
             winlay(mlayer,1) = winlay(1,1)
             winlay(mlayer,2) = winlay(1,1)
          else
!     ( a unit cell has a vacuum region)
             winlay(1,1) = winlay(1,1) - crtdst_winlay
             winlay(nlayer,2) = winlay(nlayer,2) + crtdst_winlay
             winlay(mlayer,1) = winlay(nlayer,2)
             winlay(mlayer,2) = winlay(1,1) + maxv
          endif
       else if(kimg == 1) then
          winlay(1,1) = 0.d0
          winlay(mlayer,2) = maxhv
          winlay(nlayer,2) = winlay(nlayer,2) + crtdst_winlay
          winlay(mlayer,1) = winlay(nlayer,2)
       end if

       do i = 1, nlayer-1
          winlay(i,2) = (winlay(i,2) + winlay(i+1,1))*0.5
          winlay(i+1,1) = winlay(i,2)
       end do
    else
       if(ipridos>=1) write(nfout,'(" !!ldos slicing_selection is illegal")')
    end if
             
    if(ipridos>=1) then
       write(nfout,'(" !!ldos     no,        min,           max ")')
       do i = 1, nlayer
          write(6,'(" !!ldos ",i4,2f20.8)') i, winlay(i,1), winlay(i,2)
       enddo
       if(mlayer > nlayer) then
          write(6,'(" !!ldos ",i4,2f20.8)') mlayer, winlay(mlayer,1), winlay(mlayer,2)
       endif
    end if

  end subroutine set_winlay

  subroutine m_Ldos_alloc_weiwsc_etc()
    integer :: n_ldos_allocated

    if ( hardpart_subroutine /= 2 .and. sw_rspace_ldos == OFF ) then
       call m_CD_cp_chgq_to_chgqo() !ASMS
    endif

    n_total_ldoscal = 0
! ======================== modified by K. Tagami ========== 11.0
!    if(sw_aldos == ON) then
!       call alloc_weiwsc(n_ldos_allocated)
!       n_total_ldoscal = n_total_ldoscal + n_ldos_allocated
!    end if
!    if(sw_layerdos == ON) then
!       call alloc_weilay(n_ldos_allocated)
!       n_total_ldoscal = n_total_ldoscal + n_ldos_allocated
!    end if
!
    if ( noncol ) then
       if(sw_aldos == ON) then
          call alloc_weiwsc_noncl(n_ldos_allocated)
          n_total_ldoscal = n_total_ldoscal + n_ldos_allocated
       end if
       if(sw_layerdos == ON) then
          call alloc_weilay_noncl(n_ldos_allocated)
          n_total_ldoscal = n_total_ldoscal + n_ldos_allocated
       end if
    else
       if(sw_aldos == ON) then
          call alloc_weiwsc(n_ldos_allocated)
          n_total_ldoscal = n_total_ldoscal + n_ldos_allocated
       end if
       if(sw_layerdos == ON) then
          call alloc_weilay(n_ldos_allocated)
          n_total_ldoscal = n_total_ldoscal + n_ldos_allocated
       end if
    endif
! ========================================================= 11.0
  end subroutine m_Ldos_alloc_weiwsc_etc

  subroutine m_Ldos_dealloc_weiwsc_etc()
! ===================================== modiifed by K. Tagami ====== 11.0
!    if(sw_aldos == ON) then
!       call dealloc_weiwsc()
!    end if
!    if(sw_layerdos == ON) then
!       call dealloc_weilay()
!    end if
!
    if ( noncol ) then
       if(sw_aldos == ON) then
          call dealloc_weiwsc_noncl()
       end if
       if(sw_layerdos == ON) then
          call dealloc_weilay_noncl()
       end if
    else
       if(sw_aldos == ON) then
          call dealloc_weiwsc()
       end if
       if(sw_layerdos == ON) then
          call dealloc_weilay()
       end if
    endif
! ========================================================================== 11.0
  end subroutine m_Ldos_dealloc_weiwsc_etc

  subroutine alloc_weiwsc(n_ldos_allocated)
    integer, intent(out) :: n_ldos_allocated
    n_ldos_allocated = 0
    if(ekmode == ON) then
       allocate(weiwsc(maldos,neg,1)); weiwsc = 0.d0
    else if(ekmode == OFF) then
       if(hardpart_subroutine==2.or.sw_rspace_ldos==ON)then
          allocate(weiwsc(maldos,np_e,ista_k:iend_k)); weiwsc = 0.d0
       else
          allocate(weiwsc(maldos,neg,kv3)); weiwsc = 0.d0
       endif
    end if
    n_ldos_allocated = maldos
  end subroutine alloc_weiwsc

! ================================= added by K. Tagami ============= 11.0
  subroutine alloc_weiwsc_noncl(n_ldos_allocated)
    integer, intent(out) :: n_ldos_allocated
    n_ldos_allocated = 0
    if(ekmode == ON) then
       allocate(weiwsc_noncl(maldos,neg,1,ndim_magmom))
    else if(ekmode == OFF) then
       if(hardpart_subroutine==2.or.sw_rspace_ldos==ON)then
          allocate(weiwsc_noncl(maldos,np_e,ista_k:iend_k,ndim_magmom))
       else
          allocate(weiwsc_noncl(maldos,neg,kv3,ndim_magmom))
       endif
    end if

    weiwsc_noncl = 0.0d0
    n_ldos_allocated = maldos

  end subroutine alloc_weiwsc_noncl
! =============================================================== 11.0

  subroutine dealloc_weiwsc()
    deallocate(weiwsc)
  end subroutine dealloc_weiwsc

! ================================== added by K. Tagami ========== 11.0
  subroutine dealloc_weiwsc_noncl()
    deallocate(weiwsc_noncl)
  end subroutine dealloc_weiwsc_noncl
! =============================================================== 11.0

  subroutine alloc_weilay(n_ldos_allocated)
    integer, intent(out) :: n_ldos_allocated
!!$    allocate(weilay(mlayer,np_e)); weilay = 0.d0
    n_ldos_allocated = 0
    if(ekmode == ON) then
       allocate(weilay(mlayer,neg,1)); weilay = 0.d0
    else if(ekmode == OFF) then
       if(hardpart_subroutine==2.or.sw_rspace_ldos==ON)then
          allocate(weilay(mlayer,np_e,ista_k:iend_k)); weilay = 0.d0
       else
          allocate(weilay(mlayer,neg,kv3)); weilay = 0.d0
       endif
    end if
    n_ldos_allocated = mlayer
  end subroutine alloc_weilay

! ============================ added by K. Tagami ================= 11.0
  subroutine alloc_weilay_noncl(n_ldos_allocated)
    integer, intent(out) :: n_ldos_allocated

    n_ldos_allocated = 0
    if(ekmode == ON) then
       allocate(weilay_noncl(mlayer,neg,1,ndim_magmom))
    else if(ekmode == OFF) then
       if(hardpart_subroutine==2.or.sw_rspace_ldos==ON)then
          allocate(weilay_noncl(mlayer,np_e,ista_k:iend_k,ndim_magmom))
       else
          allocate(weilay_noncl(mlayer,neg,kv3,ndim_magmom))
       endif
    end if
    weilay_noncl = 0.d0
    n_ldos_allocated = mlayer
  end subroutine alloc_weilay_noncl
! =============================================================== 11.0

  subroutine dealloc_weilay()
    deallocate(weilay)
  end subroutine dealloc_weilay

! ============================== added by K. Tagami ========== 11.0
  subroutine dealloc_weilay_noncl()
    deallocate(weilay_noncl)
  end subroutine dealloc_weilay_noncl
! ============================================================= 11.0

  subroutine m_Ldos_wd_natm2_and_totch()
    if(mype == 0 .and. ipridos>=0 ) then
       write(nfldos,'(" natm2 = ",i8)') naldos ! natm2
       write(nfldos,'(" totch = ",f20.8)') totch
    end if
  end subroutine m_Ldos_wd_natm2_and_totch

  subroutine m_Ldos_cal()
    integer,allocatable,dimension(:,:) :: meshwk
    integer :: ik,is,ip, ib,j,i
    integer :: ekmode_t

    real(kind=DP) :: denom, chgq0
    real(kind=DP), allocatable, dimension(:) :: zrhoik ! d(kg*kimg)
    real(kind=DP), allocatable, dimension(:) :: bfft   ! d(nfft)

    real(kind=DP), allocatable, dimension(:,:,:) :: wsctmp,laytmp
    integer :: ibsize, lsize
    real(kind=DP), allocatable, dimension(:,:) :: wk_bfft_l
    integer, allocatable, dimension(:) :: mapg2lx, mapg2ly
    if(sw_layerdos == ON) allocate(zrhoik(kg*kimg))

    ibsize = 1

#ifdef FFT_3D_DIVISION
    lsize = fft_X_x_nel*fft_X_y_nel*fft_X_z_nel
#else
    lsize = max(maxval(nel_fft_x(:)),maxval(nel_fft_y(:)),maxval(nel_fft_z(:)))
#endif
    allocate(mapg2ly(nfft))
    mapg2ly = 0
    allocate(mapg2lx(nfft))
    mapg2lx = 0
    call make_map()
    denom = 1.d0/product(fft_box_size_WF(1:3,1))

    call m_CD_keep_retrieve_hsr(.true.)
       if(modnrm == EXECUT) call m_CD_set_ylm_enl_etc()
       if(sw_save_ldos_weight == ON) call m_Ldos_wd_natm2_and_totch()

! ==================================== added by K. Tagami ============= 11.0
       if ( sw_aldos == ON ) weiwsc = 0.d0
       if ( sw_layerdos == ON ) weilay = 0.d0
! ===================================================================== 11.0

       do is = 1, nspin, af+1
! ========================= modiifed by K. Tagami ============= 11.0
!             weiwsc = 0.d0
!             weilay = 0.d0
! ============================================================= 11.0
             
             do ik = is, kv3+is-nspin, nspin
                if(map_k(ik) /= myrank_k) cycle
                if(ipridos >= 2) write(nfout,'(" !!ldos ik = ",i6)') ik
                call m_FFT_alloc_WF_work()
                if(sw_aldos == ON) call fillup_meshwk(mode=SOFTPART)
                
                allocate(bfft(nfft));bfft=0.d0
                allocate(wk_bfft_l(lsize*kimg,ibsize) ,stat=ierr); wk_bfft_l = 0.0d0
                if(ipridos >= 2 .and. printable) write(nfout,'(" !!ldos nfft = ",i6," <<m_Ldos_cal>>")') nfft
                if(ipridos >= 2 .and. printable) &
                     & call m_ES_wd_zaj_small_portion_3D(nfout,ik," -- before m_ES_WF_in_Rspace <<m_Ldos_cal>> --",46)
                do ib = 1, np_e
                   bfft = 0.d0
                   wk_bfft_l = 0.0d0
#ifdef __TIMER_COMM__
                    call m_ES_WF_in_Rspace_3D(ik,ib,ib,ibsize,lsize,wk_bfft_l,0)
#else
                    call m_ES_WF_in_Rspace_3D(ik,ib,ib,ibsize,lsize,wk_bfft_l)
#endif
                   if(sw_aldos == ON) call substitute_weiwsc(ik,ib,.true.)
                   if(sw_layerdos == ON) call substitute_weilay(ik,ib,.true.)
                end do
                deallocate(bfft)
                deallocate(wk_bfft_l)
                call m_FFT_dealloc_WF_work()
             enddo
             do ik=is,kv3+is-nspin,nspin
                call check_sum(ik,"softpart",.false.)
             enddo
                if(sw_aldos==ON)then
                   allocate(wsctmp(maldos,neg,kv3));wsctmp=0.d0
                   do ik = is, kv3+is-nspin, nspin
                      if(map_k(ik) /= myrank_k) cycle
                      do ib = 1, np_e
                         wsctmp(:,neg_g(ib),ik) = weiwsc(:,ib,ik)
                      end do
                   end do
                   call mpi_allreduce( MPI_IN_PLACE, wsctmp, kv3*maldos*neg, &
                        &              mpi_double_precision, &
                        &              mpi_sum,mpi_kg_world,ierr )
                   call mpi_allreduce( MPI_IN_PLACE, wsctmp, kv3*maldos*neg, &
                        &              mpi_double_precision, &
                        &              mpi_sum,mpi_ge_world,ierr )
! ============================== modified by K. Tagami =============== 11.0
!                   weiwsc = wsctmp

                   Do ik=is, kv3+is-nspin,nspin
                      weiwsc(:,:,ik) = wsctmp(:,:,ik)
                   End do
! ==================================================================== 11.0
                   deallocate(wsctmp)
                endif
                if(sw_layerdos == ON)then
                   allocate(laytmp(mlayer,neg,kv3));laytmp=0.d0
                   do ik = is, kv3+is-nspin, nspin
                      if(map_k(ik) /= myrank_k) cycle
                      do ib = 1, np_e
                         laytmp(:,neg_g(ib),ik) = weilay(:,ib,ik)
                      end do
                   end do
                   call mpi_allreduce( MPI_IN_PLACE, laytmp, kv3*mlayer*neg, &
                        &              mpi_double_precision, &
                        &              mpi_sum,mpi_kg_world,ierr )
                   call mpi_allreduce( MPI_IN_PLACE, laytmp, kv3*mlayer*neg, &
                        &              mpi_double_precision, &
                        &              mpi_sum,mpi_ge_world,ierr )
! ============================== modified by K. Tagami =============== 11.0
!                   weilay = laytmp

                   Do ik=is, kv3+is-nspin,nspin
                      weilay(:,:,ik) = laytmp(:,:,ik)
                   End do
! ==================================================================== 11.0
                   deallocate(laytmp)
                endif
             
             if(modnrm == EXECUT) then
                allocate(bfft(nfftp_nonpara))
                do ik = is, kv3+is-nspin, nspin
                   call m_FFT_alloc_CD_box()
                   if(sw_aldos == ON) call fillup_meshwk(mode=HARDPART) 
                   do ib = 1,neg
                      if(ipridos>=2) write(nfout,'(" !ldos   ib = ", i5)') ib
                      call m_CD_hardpart_sub(nfout,is,ik,ib,chgq0)  ! -> chgq_enl
                      call m_CD_map_valence_charge_to_fft_box(is,nfftp_nonpara,bfft)
                      call m_FFT_CD_inverse0(nfout,bfft) ! bfft(G_sp.) -> bfft(R_sp.)
                      if(sw_aldos == ON) call substitute_weiwsc_cd(ik,ib,.true.)
                      if(sw_layerdos == ON) call substitute_weilay_cd(ik,ib,chgq0,.true.)
                   end do
                   if(sw_aldos == ON) deallocate(meshwk)
                   call m_FFT_dealloc_CD_box()
                   call check_sum(ik,"total   ",.true.)
                enddo
                deallocate(bfft)
             endif

       enddo

       if(npes > 1) call mpi_barrier(mpi_comm_group,ierr)
       if(sw_save_ldos_weight == ON) then
          if(hardpart_subroutine==2.or.sw_rspace_ldos==ON)then
          if(sw_aldos == ON) call wd_weight(ALDOS)
          if(sw_layerdos == ON) call wd_weight(LAYERDOS)
          else
          if(sw_aldos == ON) call wd_weight_serial(ALDOS)
          if(sw_layerdos == ON) call wd_weight_serial(LAYERDOS)
          endif
       end if


    call m_CD_keep_retrieve_hsr(.false.)

    deallocate(mapg2ly)
    deallocate(mapg2lx)
    if(modnrm == EXECUT) call m_CD_dealloc_ylm_enl_etc()
    if(hardpart_subroutine/=2.and.sw_rspace_ldos==OFF) call m_CD_restore_chgq()
    if(allocated(meshwk)) deallocate(meshwk)

    if(sw_layerdos == ON) deallocate(zrhoik)
  contains
    subroutine make_map()
      use m_FFT, only : xyz_fft_y, xyz_fft_x
      integer :: id1, id2, nl, nm, nn, index_l, index_g
      integer :: i, j, k, ri, i_, j_, k_

      id1 = fft_box_size_WF(1,0)
      id2 = fft_box_size_WF(2,0)

      nl = xyz_fft_y(2,1) - xyz_fft_y(1,1) + 1
      nm = xyz_fft_y(2,2) - xyz_fft_y(1,2) + 1
      nn = xyz_fft_y(2,3) - xyz_fft_y(1,3) + 1

      if(kimg == 1) then
         do k = xyz_fft_y(1,3), xyz_fft_y(2,3)
            do j = xyz_fft_y(1,2), xyz_fft_y(2,2)
               do i = xyz_fft_y(1,1), xyz_fft_y(2,1), 2
                  do ri = 0, 1
                     i_ = i - xyz_fft_y(1,1) + 1
                     j_ = j - xyz_fft_y(1,2) + 1
                     k_ = k - xyz_fft_y(1,3) + 1
                     index_g = id1*id2*(k-1)+id1*(j-1)+i+ri
                     index_l = nl*nm*(k_-1)+2*nm*((i_-1)/2)+2*(j_-1)+1+ri
                     mapg2ly(index_g) = index_l
                  end do
               end do
            end do
         end do
      else
         do k = xyz_fft_y(1,3), xyz_fft_y(2,3)
            do j = xyz_fft_y(1,2), xyz_fft_y(2,2)
               do i = xyz_fft_y(1,1), xyz_fft_y(2,1)
                  do ri = 0, 1
                     i_ = i - xyz_fft_y(1,1) + 1
                     j_ = j - xyz_fft_y(1,2) + 1
                     k_ = k - xyz_fft_y(1,3) + 1
                     index_g = id1*id2*2*(k-1)+id1*2*(j-1)+2*(i-1)+ri+1
                     index_l = nl*nm*2*(k_-1)+nm*2*(i_-1)+2*(j_-1)+ri+1
                     mapg2ly(index_g) = index_l
                  end do
               end do
            end do
         end do
      end if

      nl = xyz_fft_x(2,1) - xyz_fft_x(1,1) + 1
      nm = xyz_fft_x(2,2) - xyz_fft_x(1,2) + 1
      nn = xyz_fft_x(2,3) - xyz_fft_x(1,3) + 1

      if(kimg == 1) then
         do k = xyz_fft_x(1,3), xyz_fft_x(2,3)
            do j = xyz_fft_x(1,2), xyz_fft_x(2,2)
               do i = xyz_fft_x(1,1), xyz_fft_x(2,1)
                  i_ = i - xyz_fft_x(1,1) + 1
                  j_ = j - xyz_fft_x(1,2) + 1
                  k_ = k - xyz_fft_x(1,3) + 1
                  index_g = id1*id2*(k-1)+id1*(j-1)+i
                  index_l = nl*nm*(k_-1)+nl*(j_-1)+i_
                  mapg2lx(index_g) = index_l
               end do
            end do
         end do
      else
         do k = xyz_fft_x(1,3), xyz_fft_x(2,3)
            do j = xyz_fft_x(1,2), xyz_fft_x(2,2)
               do i = xyz_fft_x(1,1), xyz_fft_x(2,1)
                  do ri = 0, 1
                     i_ = i - xyz_fft_x(1,1) + 1
                     j_ = j - xyz_fft_x(1,2) + 1
                     k_ = k - xyz_fft_x(1,3) + 1
                     index_g = id1*id2*2*(k-1)+id1*2*(j-1)+2*(i-1)+ri+1
                     index_l = nl*nm*2*(k_-1)+nl*2*(j_-1)+2*(i_-1)+ri+1
                     mapg2lx(index_g) = index_l
                  end do
               end do
            end do
         end do
      end if
    end subroutine make_map
    subroutine wd_weight(mode)
      integer, intent(in) :: mode
      integer :: m, ik, ib, ib_ordr, kend, kstep, iws, ip
      real(kind=DP),allocatable,dimension(:):: wei

      if(mode==ALDOS) then
         m = maldos
      else if(mode==LAYERDOS) then
         m = mlayer
      end if

      allocate(wei(m))

      if(ekmode==OFF) then
         kend = kv3; kstep = af+1
      else if(ekmode==ON) then
         kend = 1;   kstep = 1
      end if

      if(ipridos >= 3) then
         do ik = 1, kv3, af+1
            if(map_k(ik) /= myrank_k) cycle
            write(nfout,'(" !ldos: ik = ",i8)') ik
            do ib = 1, neg
               write(nfout,'(" !ldos: ib, ib_ordr = ",2i20)') ib, neordr(ib,ik)
            end do
         end do
      end if

      do ik = 1, kend, kstep
         if(mype == 0) then
            if(mode==ALDOS) then
               write(nfldos,'(" weight for each atomic cell    nk = ",i8)') ik
            else if(mode==LAYERDOS) then
               write(nfldos,'(" weight for each layer          nk = ",i8)') ik
            end if
         end if

         do ib = 1, neg

            if(nrank_k >= 2) then
               if(map_ek(ib,ik) == mype) ib_ordr = neordr(ib,ik)
               call mpi_bcast(ib_ordr,1,mpi_integer,map_ek(ib,ik),mpi_comm_group,ierr)
            else
               ib_ordr = neordr(ib,ik)
            end if

            if(map_ek(ib_ordr,ik) == mype) then
               if(mode == ALDOS) then
                  wei(1:m) = weiwsc(1:m,map_z(ib_ordr),ik)
               else if(mode == LAYERDOS) then
                  wei(1:m) = weilay(1:m,map_z(ib_ordr),ik)
               end if
               if(map_ek(ib_ordr,ik) /= 0) &
                  call mpi_send(wei,m,mpi_double_precision,0,1,mpi_comm_group,ierr)
             else if(mype == 0 .and. map_ek(ib_ordr,ik) /= 0) then
                call mpi_recv(wei,m,mpi_double_precision,map_ek(ib_ordr,ik),1,mpi_comm_group,istatus,ierr)
             end if
             if(mype == 0)  then
               if(ilen >= m) then
                  write(nfldos,'(i4,")",5f15.10)') ib,(wei(iws),iws=1,m)
               else
                  write(nfldos,'(i4,")",5f15.10)') ib,(wei(iws),iws=1,ilen)
                  write(nfldos,'(5x,5f15.10)') (wei(iws),iws=ilen+1,m)
               end if
             end if
          end do
       end do

       deallocate(wei)
     end subroutine wd_weight

    subroutine wd_weight_serial(mode)
      integer, intent(in) :: mode
      integer :: m, ik, ib, ib_ordr, kend, kstep, iws, ip
      real(kind=DP),allocatable,dimension(:):: wei

      if(mode==ALDOS) then
         m = maldos
      else if(mode==LAYERDOS) then
         m = mlayer
      end if

      allocate(wei(m))

      if(ekmode==OFF) then
         kend = kv3; kstep = af+1
      else if(ekmode==ON) then
         kend = 1;   kstep = 1
      end if

      if(ipridos >= 3) then
         do ik = 1, kv3, af+1
            if(map_k(ik) /= myrank_k) cycle
            write(nfout,'(" !ldos: ik = ",i8)') ik
            do ib = 1, neg
               write(nfout,'(" !ldos: ib, ib_ordr = ",2i20)') ib, neordr(ib,ik)
            end do
         end do
      end if

      do ik = 1, kend, kstep
         if(mype == 0) then
            if(mode==ALDOS) then
               write(nfldos,'(" weight for each atomic cell    nk = ",i8)') ik
            else if(mode==LAYERDOS) then
               write(nfldos,'(" weight for each layer          nk = ",i8)') ik
            end if
         end if

         do ib = 1, neg

             if(mode == ALDOS) then
                wei(1:m) = weiwsc(1:m,ib,ik)
             else if(mode == LAYERDOS) then
                wei(1:m) = weilay(1:m,ib,ik)
             end if
             if(mype == 0)  then
               if(ilen >= m) then
                  write(nfldos,'(i4,")",5f15.10)') ib,(wei(iws),iws=1,m)
               else
                  write(nfldos,'(i4,")",5f15.10)') ib,(wei(iws),iws=1,ilen)
                  write(nfldos,'(5x,5f15.10)') (wei(iws),iws=ilen+1,m)
               end if
             end if
         end do
      end do

       deallocate(wei)
     end subroutine wd_weight_serial

    subroutine check_sum(ik,aword,serial)
      integer,intent(in) :: ik
      character*(*),intent(in) :: aword
      logical, intent(in) :: serial

      integer :: ib, ia, ilay, ib_ordr, ik_t
      real(kind=DP), allocatable, dimension(:) :: sum_aldos, sum_layer, sum_mpi

      if(sw_aldos == ON) allocate(sum_aldos(neg))
      if(sw_layerdos == ON) allocate(sum_layer(neg))

      if(ekmode /= ON) then
         ik_t = ik
      else if(ekmode == ON) then
         ik_t = 1
      end if
      if(sw_aldos == ON .or. sw_layerdos == ON) then
            allocate(sum_mpi(neg)); sum_mpi = 0.d0
      end if

      if(sw_aldos == ON) then
         sum_aldos = 0.d0
         if(.not.serial)then
            do ib = 1, np_e
               do ia = 1, natm2 + 1
                  sum_aldos(neg_g(ib)) = sum_aldos(neg_g(ib)) + weiwsc(ia,ib,ik_t)
               end do
            end do
         else
            do ib = 1, neg
               do ia = 1, natm2 + 1
                  sum_aldos(ib) = sum_aldos(ib) + weiwsc(ia,ib,ik_t)
               end do
            end do
         end if
         if(.not.serial)then
               sum_mpi = 0.d0
               call mpi_allreduce(sum_aldos,sum_mpi,neg,mpi_double_precision &
                    & , mpi_sum, mpi_kg_world,ierr)
               sum_aldos = sum_mpi
               call mpi_allreduce(sum_aldos,sum_mpi,neg,mpi_double_precision &
                    & , mpi_sum, mpi_ge_world,ierr)
               sum_aldos = sum_mpi
         endif
      end if

      if(sw_layerdos == ON) then
         sum_layer = 0.d0
         if(.not.serial)then
            do ib = 1, np_e
               do ilay = 1, mlayer
                  sum_layer(neg_g(ib)) = sum_layer(neg_g(ib)) + weilay(ilay,ib,ik_t)
               end do
            end do
         else
            do ib = 1, neg
               do ilay = 1, mlayer
                  sum_layer(ib) = sum_layer(ib) + weilay(ilay,ib,ik_t)
               end do
            end do
         end if
         if(.not.serial)then
               sum_mpi = 0.d0
               call mpi_allreduce(sum_layer,sum_mpi,neg,mpi_double_precision &
                    & , mpi_sum, mpi_kg_world,ierr)
               sum_layer = sum_mpi
               call mpi_allreduce(sum_layer,sum_mpi,neg,mpi_double_precision &
                    & , mpi_sum, mpi_ge_world,ierr)
               sum_layer = sum_mpi
         endif
      end if

!!$      if(mype == 0) then
      if(ipridos>=1) then
         if(sw_aldos == ON .and. sw_layerdos == ON) then
            write(nfout,'(" !!ldos ",a8," -- iban , sum(atomic, layer) --  neg = ",i6 &
                 & ," natm2 = ",i6, " ik = ",i6)') aword,neg,natm2,ik
            do ib = 1, neg
               if(serial) then
                  ib_ordr = ib
               else
                  ib_ordr = ib
               endif
               write(nfout,'(" !!ldos ", i4,2f16.8)') ib, sum_aldos(ib_ordr), sum_layer(ib_ordr)
            end do
         else if(sw_aldos == ON) then
            write(nfout,'(" !!ldos ",a8," -- iban , sum(atomic) --  neg = ",i6 &
                 & ," natm2 = ",i6," ik = ",i6)') aword,neg,natm2,ik
            do ib = 1, neg
               if(serial) then
                  ib_ordr = ib
               else
                  ib_ordr = ib
               endif
               write(nfout,'(" !!ldos ", i4,f16.8)') ib, sum_aldos(ib_ordr)
            end do
         else if(sw_layerdos == ON) then
            write(nfout,'(" !!ldos ",a8," -- iban , sum(layer ) --  neg = ",i6 &
                 & ," natm2 = ",i6," ik = ",i6)') aword,neg,natm2,ik
            do ib = 1, neg
               if(serial) then
                  ib_ordr = ib
               else
                  ib_ordr = ib
               endif
               write(nfout,'(" !!ldos ",i4,f16.8)') ib, sum_layer(ib_ordr)
            end do
         end if
      end if

      if(sw_aldos == ON) deallocate(sum_aldos)
      if(sw_layerdos == ON) deallocate(sum_layer)
      if(allocated(sum_mpi)) deallocate(sum_mpi)
!!$      if(ekmode /= ON .and. (sw_aldos == ON .or. sw_layerdos == ON) .and. npes >= 2) deallocate(sum_mpi)

    end subroutine check_sum

    subroutine substitute_weilay(ik,ib,serial)
!   *********************************
!     by T.Yamasaki
!           10th Jun 1992
!   *********************************
!     Revised by T. Yamasaki, Feb. 2004
!
      integer, intent(in) :: ik,ib
      logical, intent(in) :: serial

      integer :: i, ilay, iax1, iax2, iax3, ib_t
      real(kind=DP) :: a1,a2,width, g,ss
      complex(kind=CMPLDP) :: zi,zsum,zchg
      real(kind=DP),allocatable,dimension(:) :: zsum_layer, zsum_layer_mpi

      allocate(zsum_layer(mlayer)); zsum_layer = 0.d0

!!$      zi = cmplx(0.d0,1.d0)
      zi = cmplx(0.d0,1.d0)*PAI2

      iax1 = normal_axis_winlay
      iax2 = iax1 + 1
      if(iax2 >= 4) iax2 = iax2 - 3
      iax3 = iax2 + 1
      if(iax3 >= 4) iax3 = iax3 - 3


!!$      rl11 = rltv(iax1,iax1)
!!$      ss = 1.d0/maxhv
      ss = 3.d0 - kimg

!!$      call get_height(height)

!!$      n = fft_box_size_WF(1,0)*fft_box_size_WF(2,0)*fft_box_size_WF(3,1)
!!$      n = nfft*kimg/kimg/2
      do i = 1, lsize*kimg/2
         wk_bfft_l(2*i-1,1) = wk_bfft_l(2*i-1,1)**2 + wk_bfft_l(2*i,1)**2
         wk_bfft_l(2*i,  1) = 0.d0
      end do
      if(sw_fft_xzy>0)then
      call m_FFT_Direct_3D(nfout,wk_bfft_l,lsize,ibsize)
      else
      call m_FFT_Direct_XYZ_3D(nfout,wk_bfft_l,lsize,ibsize)
      endif
      bfft = 0.0d0
      if(kimg == 1) then
         do i = 1, nfft
            if(map_fft_x(i)-1 == myrank_g) then
               bfft(i) = wk_bfft_l(mapg2lx(i),1)
            end if
         end do
      else
         do i = 1, nfft/2
            if(map_fft_x(i)-1 == myrank_g) then
               bfft(2*i-1) = wk_bfft_l(mapg2lx(2*i-1),1)
               bfft(2*i  ) = wk_bfft_l(mapg2lx(2*i  ),1)
            end if
         end do
      end if
      call mpi_allreduce(MPI_IN_PLACE,bfft,nfft,mpi_double_precision,mpi_sum,mpi_ke_world,ierr)
      if(kimg == 1) then
         do i = 1, kg
            zrhoik(i) = bfft(igf(i))*denom
         end do
      else
         do i = 1, kg
            zrhoik(i*2-1) = bfft(igf(i)*2-1)*denom
            zrhoik(i*2  ) = bfft(igf(i)*2  )*denom
         end do
      end if

      do ilay = 1, mlayer
!!$         a2 = winlay(ilay,2)
!!$         a1 = winlay(ilay,1)
         a2 = winlay(ilay,2)/height
         a1 = winlay(ilay,1)/height
         width = a2 - a1
         zsum = cmplx(zrhoik(1)*width,0.d0)
!!$         zsum = univol*cmplx(zrhoik(1)*width,0.d0)
         if(kimg == 1) then
            do i = 2, kg
               if(ngabc(i,iax2) == 0 .and. ngabc(i,iax3) == 0) then
!!$                  g = rl11*ngabc(i,iax1)
                  g = ngabc(i,iax1)
                  zsum = zsum  &
                       & + zrhoik(i)*((cdexp(zi*g*a2)-cdexp(zi*g*a1))/( zi*g ))
               end if
            end do
         else if(kimg == 2) then
            do i = 2, kg
               if(ngabc(i,iax2) ==  0 .and. ngabc(i,iax3) == 0) then
!!$                  g = rl11*ngabc(i,iax1)
                  g = ngabc(i,iax1)
                  zchg = dcmplx(zrhoik(i*2-1),zrhoik(i*2))
                  zsum = zsum  &
                       & + zchg*((cdexp(zi*g*a2)-cdexp(zi*g*a1))/( zi*g ))
               endif
            enddo
         endif
         zsum_layer(ilay) = real(zsum)*ss
      end do

      if(ekmode == ON.or.serial) then
         ib_t = ib
      else
         ib_t = map_z(ib)
      end if

      do ilay = 1, mlayer
         weilay(ilay,ib_t,ik) = zsum_layer(ilay)
      end do

      deallocate(zsum_layer)
      
    end subroutine substitute_weilay

    subroutine substitute_weiwsc(ik,ib,serial)
      integer, intent(in) :: ik,ib
      logical, intent(in) :: serial
!   *********************************
!     by T.Yamasaki
!           10th Jun 1992
!   *********************************
!     Revised by T. Yamasaki, Feb. 2004
!
      integer :: ijk, nwsc,n, nel, ib_t
      real(kind=DP) :: xx, denom
      integer :: i

      nel = product(fft_box_size_WF(1:3,1))
      denom = 1.d0/dble(nel)
      if(ipridos >= 2) write(nfout,'(" !!ldos: ib, nfft, natm2 = ",i6,i12,i6,"  <<substitute_weiwsc>>")') ib, nfft, natm2

      if(ekmode == ON.or.serial) then
         ib_t = ib
      else if(ekmode == OFF) then
         ib_t = ib
      end if
      bfft = 0.0d0
      if(kimg == 1) then
         do i = 1, nfft
            if(map_fft_y(i)-1 == myrank_g) then
               bfft(i) = wk_bfft_l(mapg2ly(i),1)
            end if
         end do
      else
         do i = 1, nfft/2
            if(map_fft_y(i)-1 == myrank_g) then
               bfft(2*i-1) = wk_bfft_l(mapg2ly(2*i-1),1)
               bfft(2*i  ) = wk_bfft_l(mapg2ly(2*i  ),1)
            end if
         end do
      end if
      call mpi_allreduce(MPI_IN_PLACE,bfft,nfft,mpi_double_precision,mpi_sum,mpi_ke_world,ierr)
         
      do ijk = 1, nel
         nwsc = meshwk(ijk,1)
         n    = meshwk(ijk,2)
         if((n < 1 .or. 2*n > nfft) .or. (nwsc < 1 .or. nwsc >natm2+1) ) then
            if(ipridos>=1) then
               write(nfout,'(" !!ldos: n = ",i6," ijk = ",i6)') n, ijk
               write(nfout,'(" !!ldos: nwsc = ",i9)') nwsc
            end if
         else
            xx = denom*(bfft(2*n-1)**2 + bfft(2*n)**2)
            weiwsc(nwsc,ib_t,ik) = weiwsc(nwsc,ib_t,ik) + xx
         end if
      end do

      if(ipridos >= 2) then
         write(nfout,'(" !ldos: ik, ib = ",2i8," <<substitute_weiwsc>>")') ik, ib
         write(nfout,'(" !ldos: ",8f10.5)') (weiwsc(nwsc,ib_t,ik),nwsc=1,maldos)
      end if

    end subroutine substitute_weiwsc

    subroutine substitute_weiwsc_cd(ik,ib,serial)
      integer, intent(in) :: ik,ib
      logical, intent(in) :: serial
!   *********************************
!     by T.Yamasaki
!           10th Jun 1992
!   *********************************
!     Revised by T. Yamasaki, Feb. 2004
!
      integer :: ijk, nwsc,n, ib_t
      real(kind=DP) :: denom

      denom = 0.5d0*univol/product(fft_box_size_CD(1:3,1))

      if(ekmode == ON.or.serial) then
         ib_t = ib
      else if(ekmode == OFF) then
         ib_t = map_z(ib)
      end if

      do ijk = 1, product(fft_box_size_CD(1:3,1))
         nwsc = meshwk(ijk,1)
         n    = meshwk(ijk,2)
         if((n < 1 .or. 2*n > nfftp_nonpara) .or. (nwsc < 1 .or. nwsc >natm2+1) ) then
            if(ipridos>=1) then
               write(nfout,'(" !!ldos: n = ",i6," ijk = ",i6)') n, ijk
               write(nfout,'(" !!ldos: nwsc = ",i9)') nwsc
            end if
         else
            weiwsc(nwsc,ib_t,ik) = weiwsc(nwsc,ib_t,ik) + denom*bfft(2*n-1)
! =============================== modified by K. Tagami ============ 11.0
!            weiwsc(nwsc,ib_t,ik) = weiwsc(nwsc,ib_t,ik) + denom*bfft(2*n-1)
!
!            if (map_ek(ib,ik) == mype) then
!               weiwsc(nwsc,ib_t,ik) = weiwsc(nwsc,ib_t,ik) + denom*bfft(2*n-1)
!            endif
! ================================================================== 11.0
         end if
      end do

      if(ipridos >=2) then
         write(nfout,'(" !ldos: ik ,ib = ",2i8)') ik, ib
         write(nfout,'(" !ldos: ",8f10.5)') (weiwsc(nwsc,ib_t,ik),nwsc=1,maldos)
      end if

    end subroutine substitute_weiwsc_cd

    subroutine substitute_weilay_cd(ik,ib,chgq0,serial)
      integer, intent(in) :: ik, ib
      real(kind=DP), intent(in) :: chgq0
      logical, intent(in) :: serial
!   *********************************
!     by T.Yamasaki
!           10th Jun 1992
!   *********************************
!     Revised by T. Yamasaki, Feb. 2004
!
      integer :: i, ista, iax1, iax2, iax3, ilay,  ib_t
      real(kind=DP) :: g, z_work, ss, a1, a2, width
      complex(kind=CMPLDP) :: zi,zsum,zchg
      real(kind=DP),allocatable,dimension(:,:) :: zsum_layer, zsum_layer_mpi

      allocate(zsum_layer(mlayer,2)); zsum_layer = 0.d0

      zi = cmplx(0.d0,1.d0)*PAI2

      iax1 = normal_axis_winlay
      iax2 = iax1 + 1
      if(iax2 >= 4) iax2 = iax2 - 3
      iax3 = iax2 + 1
      if(iax3 >= 4) iax3 = iax3 - 3

!!$      rl11 = rltv(iax1,iax1)
!!$      ss = 0.5d0*univol/maxhv
      ss = 0.5d0*univol*(3.d0-kimg)


      ista = ista_kngp
      if(ista == 1) ista = 2
      do ilay = 1, mlayer
         if(ipridos >= 2) write(nfout,'(" !!ldos: ib = ",i6," ilay = ",i6 &
              & ," <<substitute_weilay_cd>>")') ib,ilay
         a2 = winlay(ilay,2)/height
         a1 = winlay(ilay,1)/height
         width = a2 - a1
         z_work = chgq0*width
         if(serial)then
            if(ista/=2) z_work=0.d0
         endif
         zsum = 0.d0

         if(kimg == 1) then
            if(ekmode == ON.or.serial) then
               do i = ista, iend_kngp
                  if(ngabc_kngp_l(i,iax2) == 0 .and. ngabc_kngp_l(i,iax3) == 0) then
                     g = ngabc_kngp_l(i,iax1)
                     zsum = zsum  &
                          & + chgq_l(i,1,1)*( ( cdexp(zi*g*a2) - cdexp(zi*g*a1))/(zi*g))
                  end if
               end do
            else if(ekmode == OFF) then
               do i = 2, kgp
                  if(ngabc_kngp_l(i,iax2) == 0 .and. ngabc_kngp_l(i,iax3) == 0) then
                     g = ngabc_kngp_l(i,iax1)
                     zsum = zsum  &
                          & + chgq_enl(i,1)*( ( cdexp(zi*g*a2) - cdexp(zi*g*a1))/(zi*g))
                  end if
               end do
            end if
         else if(kimg == 2) then
            if(ekmode == ON.or.serial) then
               do i = ista, iend_kngp
                  if(ngabc_kngp_l(i,iax2) == 0 .and. ngabc_kngp_l(i,iax3) == 0) then
                     g = ngabc_kngp_l(i,iax1)
                     zchg = cmplx(chgq_l(i,1,1),chgq_l(i,2,1))
                     zsum = zsum &
                          & + zchg * ((cdexp(zi*g*a2) - cdexp(zi*g*a1))/(zi*g))
                  end if
               end do
            else if(ekmode == OFF) then
               do i = 2, kgp
                  if(ngabc_kngp_l(i,iax2) == 0 .and. ngabc_kngp_l(i,iax3) == 0) then
                     g = ngabc_kngp_l(i,iax1)
                     zchg = cmplx(chgq_enl(i,1),chgq_enl(i,2))
                     zsum = zsum &
                          & + zchg * ((cdexp(zi*g*a2) - cdexp(zi*g*a1))/(zi*g))
                  end if
               end do
            end if
         end if
         zsum = zsum + cmplx(z_work,0.d0)

         zsum_layer(ilay,1) = real(zsum)
         zsum_layer(ilay,2) = dimag(zsum)
         
      end do
      if(ekmode == ON.or.serial) then
         if(npes > 1) then
            allocate(zsum_layer_mpi(mlayer,2))
            call mpi_allreduce(zsum_layer,zsum_layer_mpi,mlayer*2 &
                 & ,mpi_double_precision,mpi_sum,mpi_ke_world,ierr)
            zsum_layer = zsum_layer_mpi
            deallocate(zsum_layer_mpi)
         end if
      end if

      if(mype == 0) then
         do ilay = 1, mlayer
            if(zsum_layer(ilay,2) > 1.d-6) then
               write(6,'(" !!ldos: zsum,ne.real imag = ",d16.8," ilay = ",i6)') &
                    & zsum_layer(ilay,2), ilay
            endif
!!$            write(6,'(" !!ldos zsum_layer = ",d16.8," ilay = ",i6)') &
!!$                 & zsum_layer(ilay,1), ilay
         end do
      end if

      if(ekmode == ON.or.serial) then
         ib_t = ib
      else
         ib_t = map_z(ib)
      end if

      do ilay = 1, mlayer
         weilay(ilay,ib_t,ik) = weilay(ilay,ib_t,ik) + zsum_layer(ilay,1)*ss
      end do
! ========================= modified by K. Tagami =============== 11.0
!      do ilay = 1, mlayer
!         weilay(ilay,ib_t,ik) = weilay(ilay,ib_t,ik) + zsum_layer(ilay,1)*ss
!      end do
!
!      if (map_ek(ib,ik) == mype) then
!         do ilay = 1, mlayer
!            weilay(ilay,ib_t,ik) = weilay(ilay,ib_t,ik) + zsum_layer(ilay,1)*ss
!         end do
!      endif
! ============================================================== 11.0

      deallocate(zsum_layer)

    end subroutine substitute_weilay_cd

    subroutine fillup_meshwk(mode)
!   *********************************
!     by T.Yamasaki
!           10th Jun 1992
!   *********************************
!     Revised by T. Yamasaki, Feb. 2004
!
      integer, intent(in) :: mode
!!$ASASASASAS
!!$      integer, pointer, dimension(:,:,:) :: mesh_t
      integer, allocatable :: mesh_t(:,:,:)
!!$ASASASASAS
      integer :: i,j,k, icount, nwsc, max_nwsc, min_nwsc, n, nl, nm,nn, nd2,id, nfftwk &
           &    , inew, jnew, knew, idr2, idh

      if(mode == SOFTPART) then

         nl = fft_box_size_WF(1,1)
         nm = fft_box_size_WF(2,1)
         nn = fft_box_size_WF(3,1)

         id  = fft_box_size_WF(1,0)
         nd2 = fft_box_size_WF(2,0)
         allocate(mesh_t(id,nd2,nn))
         mesh_t = mesh

         nfftwk = product(fft_box_size_WF(1:3,1))
      else if(mode == HARDPART) then

         nl = fft_box_size_CD(1,1)
         nm = fft_box_size_CD(2,1)
         nn = fft_box_size_CD(3,1)

         id  = fft_box_size_CD_nonpara(1,0)
         nd2 = fft_box_size_CD_nonpara(2,0)
         allocate(mesh_t(id,nd2,nn))
         mesh_t = meshp
         nfftwk = product(fft_box_size_CD(1:3,1))
      end if
      idr2 = (nl+2)/2
      idh  = id/2

      if(allocated(meshwk)) deallocate(meshwk)
      allocate(meshwk(nfftwk,2)); meshwk = 0

      if(ipridos>=1) write(nfout,'(" !! nfftwk = ",i8)') nfftwk

      max_nwsc = 0
      min_nwsc = natm2+2
      icount = 1
      do k = 1, nn
         do j = 1, nm
            do i = 1, nl
!!$               if(icount > nfftwk) then
!!$                  write(nfout,'(" ! icount = ",i7," > nfftwk")') icount
!!$                  stop  ' large icount <<fillup_meshwk.m_Ldos_cal>>'
!!$               end if
               nwsc = mesh_t(i,j,k)
               if(nwsc > max_nwsc) max_nwsc = nwsc
               if(nwsc < min_nwsc) min_nwsc = nwsc
               meshwk(icount,1) = nwsc
               if(kimg == 1) then
                  if( i <= idr2 ) then
                     n = idh*nd2*(k-1) + idr2*(j-1) + i
                  else
                     inew = nl + 2 - i
                     jnew = nm + 2 - j
                     knew = nn + 2 - k
                     if(jnew > nm) jnew = jnew - nm
                     if(knew > nn) knew = knew - nn
                     n = idh*nd2*(knew-1) + idh*(jnew-1) + inew
                  end if
               else
                  n = id*nd2*(k-1) + id*(j-1) + i
               end if
               meshwk(icount,2) = n
               icount = icount + 1
            end do
         end do
      end do
      if(ipridos>=1) then
         write(nfout,*) ' !!ldos:  max_nwsc =  ', max_nwsc
         write(nfout,*) ' !!ldos:  min_nwsc =  ', min_nwsc
#ifdef DEBUG_MESH
         n = 1000
         if(n*2 > nfftwk) n = nfftwk/2
         write(nfout,'(" !ldos nfftwk = ",i8)') nfftwk
         write(nfout,'(" !ldos: meshwk(*,1) first ",i8," elements")') n
         write(nfout,'(14i5)') (meshwk(i,1),i=1,n)
         write(nfout,'(" !ldos: meshwk(*,1) last  ",i8," elements")') n
         write(nfout,'(14i6)') (meshwk(i,1),i=nfftwk-n+1, nfftwk)

         write(nfout,'(" !ldos: meshwk(*,2) first ",i8," elements")') n
         write(nfout,'(14i5)') (meshwk(i,2),i=1,n)
         write(nfout,'(" !ldos: meshwk(*,2) last  ",i8," elements")') n
         write(nfout,'(14i6)') (meshwk(i,2),i=nfftwk-n+1, nfftwk)
#endif
      end if
      deallocate(mesh_t)
    end subroutine fillup_meshwk

  end subroutine m_Ldos_cal


  subroutine m_Ldos_get_ldos_index(ip,aldos_or_layerdos,ipdos)
    integer, intent(in) ::  ip
    integer, intent(out) :: aldos_or_layerdos, ipdos
    integer :: i, na

    aldos_or_layerdos = NO
    ipdos = 0
    if(sw_aldos == ON) then
!!$       if(1 <= ip .and. ip <= maldos ) then
       if(1 <= ip .and. ip <= naldos_write ) then
          aldos_or_layerdos = ALDOS
!!$          ipdos = naldos_from + ip - 1
          na = 0
          do i = naldos_from, naldos_to
             if(if_aldos_full(i) == ON) na = na + 1
             if(na == ip) then
                ipdos = i
                exit
             end if
          end do
!!$          ipdos = naldos_from - 1 + na

          if(ipdos > maldos .or. ipdos < 1 ) then
             write(nfout,'(" !! ipdos ( = ",i8," ) is illegal << m_Ldos_get_ldos_index>>")') ipdos
             stop ' ipdos is illegal << m_Ldos_get_ldos_index>>'
          end if
!!$       else if( ip > maldos ) then
       else if( ip > naldos_write ) then
          if(sw_layerdos == ON) then
             aldos_or_layerdos = LAYERDOS
!!$             ipdos = ip - maldos
             ipdos = ip - naldos_write
          end if
       end if
    else if(sw_layerdos == ON) then
       if( 1 <= ip .and. ip <= mlayer) then
          aldos_or_layerdos = LAYERDOS
          ipdos = ip
       end if
    end if

  end subroutine m_Ldos_get_ldos_index

  subroutine m_Ldos_get_dos_weight(aldos_or_layerdos,ip,nfldos,ne,nk,dos_weight)
    integer, intent(in) :: aldos_or_layerdos, ip, nfldos
    integer, intent(in) :: ne, nk
    real(kind=DP), intent(out), dimension(ne,nk) :: dos_weight

    real(kind=DP), allocatable, dimension(:,:) :: dos_weight_mpi

    integer :: i, j
    real(kind=DP) :: w
    logical :: serial
    serial = .false.
    if(hardpart_subroutine/=2.and.sw_rspace_ldos==OFF) serial=.true.

    if(ne < neg) then
       if(ipridos>=1) write(nfout,'(" !ldos: ne (= ",i6," ) < neg (= ",i6 &
            & ," ) <<m_Ldos_get_dos_weight>>")') ne, neg
       stop ' ne < neg <<m_Ldos_get_dos_weight>>'
    end if

    if(ekmode == OFF) then
       if(nk < kv3_ek) then
          if(ipridos>=1) write(nfout,'(" !ldos: nk (= ",i6," ) < kv3_ek (= ",i6 &
               & ," ) <<m_Ldos_get_dos_weight>>")') nk, kv3_ek
          stop ' nk < kv3_ek <<m_Ldos_get_dos_weight>>'
       end if

!       if(ne <= iend_e .and. nk <= iend_k) then
        if(ne <= neg .and. nk <= kv3) then
          dos_weight = 0.d0
          if(aldos_or_layerdos == ALDOS) then
             if(.not.serial)then
               do j = ista_k, iend_k
                  do i = ista_e, iend_e, istep_e
                     dos_weight(i,j) = weiwsc(ip,map_z(i),j)
                  end do
               end do
             else
               do j = 1,kv3
                  do i = 1,neg
                     dos_weight(i,j) = weiwsc(ip,i,j)
                  end do
               end do
             endif
          else if(aldos_or_layerdos == LAYERDOS) then
             if(.not.serial)then
               do j = ista_k, iend_k
                  do i = ista_e, iend_e, istep_e
                     dos_weight(i,j) = weilay(ip,map_z(i),j)
                  end do
               end do
             else
               do j = 1, kv3
                  do i = 1,neg
                     dos_weight(i,j) = weilay(ip,i,j)
                  end do
               end do
             endif
          end if
          if(npes > 1.and..not.serial) then
            allocate(dos_weight_mpi(ne,nk))
            call mpi_allreduce(dos_weight, dos_weight_mpi,ne*nk &
                 & ,mpi_double_precision,mpi_sum,mpi_comm_group,ierr)
            dos_weight = dos_weight_mpi
            deallocate(dos_weight_mpi)
          end if
        end if
    else if(ekmode == ON) then
       if(nk < kv3) then
          if(ipridos>=1) write(nfout,'(" !ldos: nk (= ",i6," ) < kv3 (= ",i6 &
               & ," ) <<m_Ldos_get_dos_weight>>")') nk, kv3_ek
          stop ' nk < kv3 <<m_Ldos_get_dos_weight>>'
       end if

       call m_Files_open_nfldos()
       call skip_headerpart()
       if(aldos_or_layerdos == ALDOS) then
          do j = 1, nk
             do i = 1, ne
                call read_aldos_weight(ip,w)
                dos_weight(i,j) = w
             end do
          end do
       else if(aldos_or_layerdos == LAYERDOS) then
          do j = 1, nk
             do i = 1, ne
                call read_layerdos_weight(ip,w)
                dos_weight(i,j) = w
             end do
          end do
       end if
       call m_Files_close_nfldos()
    end if
  contains
    subroutine skip_headerpart()
      read(nfldos,*)
    end subroutine skip_headerpart

    subroutine read_aldos_weight(ip,w)
      integer, intent(in) :: ip
      real(kind=DP), intent(out) :: w
      w = 0.d0
    end subroutine read_aldos_weight

    subroutine read_layerdos_weight(ip,w)
      integer, intent(in) :: ip
      real(kind=DP), intent(out) :: w
      w = 0.d0
    end subroutine read_layerdos_weight

  end subroutine m_Ldos_get_dos_weight

! ============================== added by K. Tagami ================== 11.0
  subroutine m_Ldos_get_dos_weight_noncl( aldos_or_layerdos, ip, nfldos, &
       &                                  ne, nk, dos_weight_noncl )
    integer, intent(in) :: aldos_or_layerdos, ip, nfldos
    integer, intent(in) :: ne, nk
    real(kind=DP), intent(out), dimension(ne,nk,ndim_magmom) :: dos_weight_noncl

    real(kind=DP), allocatable, dimension(:,:,:) :: dos_weight_mpi

    integer :: i, j
    real(kind=DP) :: w
    logical :: serial

    serial = .false.
    if(hardpart_subroutine/=2.and.sw_rspace_ldos==OFF) serial=.true.

    if(ne < neg) then
       if(ipridos>=1) write(nfout,'(" !ldos: ne (= ",i6," ) < neg (= ",i6 &
            & ," ) <<m_Ldos_get_dos_weight>>")') ne, neg
       stop ' ne < neg <<m_Ldos_get_dos_weight>>'
    end if

    if(ekmode == OFF) then
       if(nk < kv3_ek) then
          if(ipridos>=1) write(nfout,'(" !ldos: nk (= ",i6," ) < kv3_ek (= ",i6 &
               & ," ) <<m_Ldos_get_dos_weight>>")') nk, kv3_ek
          stop ' nk < kv3_ek <<m_Ldos_get_dos_weight>>'
       end if

!       if(ne <= iend_e .and. nk <= iend_k) then
        if(ne <= neg .and. nk <= kv3) then
          dos_weight_noncl = 0.d0
          if(aldos_or_layerdos == ALDOS) then
             if(.not.serial)then
               do j = ista_k, iend_k
                  do i = ista_e, iend_e, istep_e
                     dos_weight_noncl(i,j,:) = weiwsc_noncl(ip,map_z(i),j,:)
                  end do
               end do
             else
               do j = 1,kv3
                  do i = 1,neg
                     dos_weight_noncl(i,j,:) = weiwsc_noncl(ip,i,j,:)
                  end do
               end do
             endif
          else if(aldos_or_layerdos == LAYERDOS) then
             if(.not.serial)then
               do j = ista_k, iend_k
                  do i = ista_e, iend_e, istep_e
                     dos_weight_noncl(i,j,:) = weilay_noncl(ip,map_z(i),j,:)
                  end do
               end do
             else
               do j = 1, kv3
                  do i = 1,neg
                     dos_weight_noncl(i,j,:) = weilay_noncl(ip,i,j,:)
                  end do
               end do
             endif
          end if

          if(npes > 1.and..not.serial) then
            allocate(dos_weight_mpi(ne,nk,ndim_magmom))
            call mpi_allreduce( dos_weight_noncl, dos_weight_mpi, ne*nk*ndim_magmom, &
                 &              mpi_double_precision, mpi_sum, mpi_comm_group, ierr )
            dos_weight_noncl = dos_weight_mpi
            deallocate(dos_weight_mpi)
          end if
        end if

    else if(ekmode == ON) then
       if(nk < kv3) then
          if(ipridos>=1) write(nfout,'(" !ldos: nk (= ",i6," ) < kv3 (= ",i6 &
               & ," ) <<m_Ldos_get_dos_weight>>")') nk, kv3_ek
          stop ' nk < kv3 <<m_Ldos_get_dos_weight>>'
       end if

       call m_Files_open_nfldos()
       call skip_headerpart()
       if(aldos_or_layerdos == ALDOS) then
          do j = 1, nk
             do i = 1, ne
                call read_aldos_weight(ip,w)
                dos_weight_noncl(i,j,:) = w
             end do
          end do
       else if(aldos_or_layerdos == LAYERDOS) then
          do j = 1, nk
             do i = 1, ne
                call read_layerdos_weight(ip,w)
                dos_weight_noncl(i,j,:) = w
             end do
          end do
       end if
       call m_Files_close_nfldos()
    end if
  contains
    subroutine skip_headerpart()
      read(nfldos,*)
    end subroutine skip_headerpart

    subroutine read_aldos_weight(ip,w)
      integer, intent(in) :: ip
      real(kind=DP), intent(out) :: w
      w = 0.d0
    end subroutine read_aldos_weight

    subroutine read_layerdos_weight(ip,w)
      integer, intent(in) :: ip
      real(kind=DP), intent(out) :: w
      w = 0.d0
    end subroutine read_layerdos_weight

  end subroutine m_Ldos_get_dos_weight_noncl
! ==================================================================== 11.0

  subroutine m_Ldos_dealloc()
     if(allocated(if_aldos_full)) deallocate(if_aldos_full)
     if(allocated(mesh)) deallocate(mesh)
     if(allocated(meshp)) deallocate(meshp)
     if(allocated(winlay)) deallocate(winlay)
  end subroutine m_Ldos_dealloc

end module m_Ldos
