#define NEC_TUNE
!=======================================================================
!
!  PROGRAM  PHASE/0 2015.01 ($Rev: 447 $)
!
!  MODULE: m_Epsilon
!
!  Contact address :  Phase System Consortium
!                     E-mail: phase_system@nims.go.jp URL https://azuma.nims.go.jp
!
!
!#========================================================================
!#                                                                       #
!# Software Name : PHASE/UVSOR ver. 3.42                                 #
!#                                                                       #
!#      Module Name : m_Epsilon_ek.f90                                   #
!#                                                                       #
!#                                Written by T. Hamada 2008/2/17         #
!#                                                                       #
!#      Contact address :  IIS,The University of Tokyo CISS     #
!#                                                                       #
!#"Multiscale Simulation System for Functional Analysis of Nanomaterials"#
!#                                                                       #
!#========================================================================
!
!     patch 0.1 by J. Koga @adv     2009/05/28
!     patch 0.2 by K. Tagami@adv    2009/05/28
!
!     patch 0.1:  correction in  subroutine os_strength 
!     patch 0.2:  correction for calculation with DFT+U
! 
! ================================================================

module m_Epsilon_ek
! $Id: m_Epsilon_ek.F90 447 2015-08-05 02:38:25Z jkoga $
!
! Universal Virtual Spectroscope for Optoelectronics Research (UVSOR) ver 3.00
! UVSOR module for electronic dielectric function calculation
!
! Tomoyuki Hamada, Takenori Yamamoto, Hiroyoshi Momida (Univ.Tokyo)
! Hideo Mizouchi, Tsuyoshi Uda @adv,
! Motohisa Ohno (Univ, Tokyo, National Institite for Materials Science)
!
! Last Updated NOV 7, 2007
!

  use m_Electronic_Structure,       only : zaj_l,neordr,nrvf_ordr,eko_l,efermi,totch, vnlph_l,vlhxc_l &
       &                                 , fsr_l,fsi_l,fsr_add_l,fsi_add_l,vlhxcQ, eko_ek, occup_l    
  use m_ES_nonlocal,                only : m_ES_add_betar_dot_WFs
  use m_PseudoPotential,            only : ival,ilmt,nlmt,nlmtt,nlmta,lmta,lmtt,ltp,mtp,q,dion,modnrm   &
       &                                 , nac,fqwei,ilmt_add,nlmta_add,ltp_add,mtp_add,lmta_add,lpsmax &
       &                                 , m_PP_alloc_ps                                         &
       &                                 , m_PP_local_part_3D,          m_PP_partial_core_CD_3D  &
       &                                 , m_PP_make_index_lmtt_2_dl2p, m_PP_dealloc_ps          &
       &                                 , m_PP_tell_lmtt_l_m_tau,      m_PP_make_index_lmtt_add &
       &                                 , m_PP_tell_lmtt_l_m_tau_add, taup
  use m_NonLocal_Potential,         only : new_radr_and_wos,wos,radr,betar,nmesh,ilmt,xh,rmax &
       &                                 , wd_lmt_l_m_tau_etc
  use m_Files,                      only : nfout,nfzaj,nfepsout,nfnlo,nfmagopt,nfpot,nfinp,nfspg,nfkpgn,nfepscont &
       &                                 , m_Files_open_ps_files, m_Files_close_ps_files &
       &                                 , m_Files_reopen_nfinp, F_CNTN_BIN_partitioned, file_existance_contfiles &
       &                                 , file_existance_3contfiles
  use m_PlaneWaveBasisSet,          only : kgp,kg1, ngabc,iba,nbase,nbmx,gr_l
  use m_PlaneWaveBasisSet,          only : kg
  use m_Crystal_Structure,          only : rltv, univol, nbztyp,altv,nbztyp_spg,nopr,op,rvol &
       &                                 , il,imag,inv,ngen,igen,jgen,a,b,c,ca,cb,cc
  use m_Kpoints,                    only : kv3,vkxyz,kv3_ek,vkxyz_ek,np0,np2,ip20,ip2cub,nxyz_tetra &
       &                                 , qwgt_ek
  use m_Ionic_System,               only : pos, cps, ntyp,ityp,iwei,natm,natm2 ,ivan,iatomn
  use m_Timing,                     only : tstatc0_begin, tstatc0_end
  use m_Control_Parameters,         only : nspin,kimg,neg,af,paramset,ipri,icond,ipri_kp,ipri_spg &
       &                                 , ipriinputfile,tag_accuracy, sw_use_add_proj, neg_previous &
       &                                 , m_CtrlP_rd_parameters,        m_CtrlP_way_of_smearing &
       &                                 , m_CtrlP_check_inputfilestyle, m_CtrlP_rd_control &
       &                                 , m_CtrlP_rd_accuracy, m_CtrlP_set_way_ksample, printable, noncol &
       &                                 , ndim_magmom, ndim_spinor, way_ksample

  use m_Const_parameters,           only : DP, SP, BUCS, ON, OFF,CARTS, PAI4, PAI2, PAI, OLD, CRDTYP &
       &                                 , WHOLE_BZ, SIMPLE_CUBIC, BCC, FCC, DIAMOND, HEXAGONAL &
       &                                 , GENERAL, GENERAL_LARGER, NONAME, MESH, MONKHORST_PACK &
       &                                 , SKPS_DIRECT_IN, GAMMA, FILE,NODATA, TETRAHEDRON &
       &                                 , NEW_, FMAXVALLEN,LOWER, PARABOLIC, PARA, ANTIFERRO, FERRO, INITIAL, CONTINUATION
  use m_IterationNumbers,           only : nk_in_the_process,nk_converged
  use m_Parallelization,            only : is_kngp,ie_kngp,npes,mype,ierr,map_k & 
       &                                 , map_ek,ista_e,iend_e,istep_e,map_z &
       &                                 , ista_k,iend_k,myrank_e,myrank_k,map_e,nrank_e &
       &                                 , ista_kngp,iend_kngp,ista_snl,iend_snl,np_e, mpi_comm_group, mpi_k_world
  use m_ES_IO,                      only : m_ESIO_rd_EigenValues_etc
  use m_ES_occup_EPS,               only : occup_l_ek &
       &                                 , m_ESoc_EPS_fermi_parabolic_ek,m_ESoc_EPS_fermi_tetra_ek &
       &                                 , m_ESoc_EPS_alloc_occup_l_ek,  m_ESoc_EPS_dealloc_occup_l_ek
!
! ================================Added by K. Tagami ============= 0.2
  use m_Electronic_Structure,    only : dhub
  use m_Control_Parameters, only      : sw_hubbard
  use m_Ionic_System,         only    : ihubbard
! ====================================================================
#ifdef NEC_TUNE
  use m_Parallelization, only : mpi_k_world
#endif
  use m_Parallelization, only : np_g1k, ista_fs, iend_fs, neg_g, mpi_kg_world, mpi_ge_world &
                              , ista_g1k, iend_g1k, mpi_ke_world, nrank_g, myrank_g, ista_atm, iend_atm, np_fs

! ========================== KT_add =========================== 13.0E
  use m_Const_Parameters,     only : Fermi_Dirac
  use m_ES_occup_EPS,         only : m_ESoc_EPS_fermi_dirac_ek
! ============================================================= 13.0E

! ========== KT_add ======== 13.0S
  use m_Control_Parameters, only  : sw_corelevel_spectrum
  use m_CoreLevel_Spectrum,  only : num_core_states, &
       &                            atom_to_probe, qnum_n_to_probe, qnum_l_to_probe, &
       &                            e_low_cls => e_low, e_high_cls => e_high, &
       &                            e_step_cls => e_step, &
       &                            vec_q, ene_core_states, psig_core_states, &
       &                            fsr_core_states, fsi_core_states,  &
       &                            dipole_dxyz_core2val, &
       &                            m_CLS_set_data_core2val_from_pp, &
       &                            m_CLS_alloc_wfn_core_states, &
       &                            m_CLS_dealloc_wfn_core_states, &
       &                            m_CLS_set_wfn_core_states, &
       &                            m_CLS_set_ene_core_states, &
       &                            m_CLS_dealloc_dipole_core2val, &
       &                            m_CLS_chk_sw_corelevel_spectrum, &
       &                            m_CLS_rd_n_main, &
       &                            m_CLS_dealloc_core_ae_wfns, &
       &                            m_CLS_find_ptrans_indx_core2val, &
       &                            ene_initial_state_splitting, &
       &                            mimic_soc_split_spectrum, &
       &                            ndim_spinor_core_states
! ========================== 13.0S

! ==== KT_add ==== 13.0R
  use m_Control_Parameters,  only : wf_filetype
  use m_Const_Parameters,    only : CUBE
  use m_Files,  only : nfwfk,  m_Files_open_nfwfk
  use m_FFT,    only : m_FFT_alloc_WF_work, m_FFT_dealloc_WF_work
! ================ 13.0R

! === KT_add === 13.1R
  use m_Control_Parameters,  only : sw_phonon_with_epsilon
  use m_Files,   only : m_Files_open_nfoptical_coeff, nfoptical_coeff
  use m_Raman,  only : eps_omega_eq_0 => dielectric
! ============== 13.1R

! === KT_add ==== 2014/09/13
  use m_Kpoints,            only : itrs, gen_tetramesh_mode
  use m_Crystal_Structure,   only : gen_name_in_carts, use_altv_rltv
! =============== 2014/09/13

! === KT_add === 2014/09/24
  use m_Control_Parameters,  only : ipriepsilon
  use m_ValenceBand_Spectrum,  only : m_VBS_set_data_ppc_from_pp, nppc_data, &
       &                              phase_ylm1, phase_ylm2, dipole_tau1, dipole_tau2, &
       &                              mnppc, dipole_dxyz_us, &
       &                              m_VBS_dealloc_dipole_ppc, &
       &                              m_VBS_set_data_ppc_from_pp_it
! ============= 2014/09/24

! ===================================================================================================
!                        Major subroutines
!            Name                               Function
! ---------------------------------------------------------------------------------------------------
!    initialization_eps_ek         :: initialization
!    eigen_value_ordering_eps_ek   :: eigenvalue ordering
!    gen_k_points_eps_ek           :: k-point generation
!    calc_transition_moment_eps_ek :: transition moment calculation
!    calc_tm_square_eps_ek         :: transition moment square calculation
!    BZintegration_eps             :: Brillouin zone integration
!    kkt_eps                       :: Kramers-Kronig transformation (KKT)
!    calc_drude_eps                :: Drude term calculation (metal case)
!    optics_eps                    :: optical properties calculation
!    m_CtrlP_rd_epsilon            :: read tag_epsilon
!    ordering_ek                   :: eigenvalue ordering subroutine
!    trans_ek                      :: transition moment routine
!    tmsq_ek                       :: transition moment square routine
!    gen_vk0xyz_ek                 :: k-points generation subroutine (fot linear tetragedron)
!    trm2_symm                     :: symmetrize transition moment square matrix
!    vl_ek                         :: core transition moment routine
!    os_moment_ek                  :: core transition moment sqaure routine
!    linear_tetrahedron            :: linear tetrahdedon
!    kkt_v                         :: core KKT routine
!    optics                        :: optical property
!    off_diagonal                  :: process off-diagonal componet of dielectric function
!    vnl_prepare_ek                :: Read-Needs transition moment correction
!    calc_ptrans_ek                :: Kageshima-Shiraishi transition moment correction
!    calc_ptrans_TM_PP_ek          :: ibid
!
! ===================================== Added by K. Tagami ========== 0.2
!    dhub_prepare_ek               :: Transition moment correction by DFT-U
! ====================================================================
! 
!    smearing_method               :: smearing calculation
!    drude_term_eps                :: Drude term calculation subroutine
!    calc_nlo                      :: nonlinear optics calculation
!    calc_magopt                   :: magneto-optical effect calculation

! ==================================================================================================
!                        Major grobal variables
!            Name                               Content
! --------------------------------------------------------------------------------------------------
!   * Fermi level option
!    nrd_efermi                    :: Fermi level option
!                                     = 0 calculate Fermi level (default)
!                                     = 1 read Fermi level (for insulator and semiconductor only)
!    efermi                        :: Fermi level
! --------------------------------------------------------------------------------------------------
!   * photon
!    e                             :: photon energy
!                                     e(istep)
!                                      istep : index of photon energy step (1:nstep)
!    e_low                         :: lower limit of photon energy
!    e_high                        :: higher limit of photon energy
!    e_step                        :: step width of photon energy
!    nstep                         :: number of photon energy step
!    u                             :: polarization vector (linear polarization case)
!                                     u(ixyz)
!                                      ixyz : cartesian index (1:3)
!                                             = 1:x; =2:y; =3:z
!    p                             :: pointing vector (circular polarization case)
!                                     p(ixyz)
!                                      ixyz : cartesian index (1:3)
!                                             = 1:x; =2:y; =3:z
! --------------------------------------------------------------------------------------------------
!   * k-point for linear tetrahedron
!    vk00xyz                       :: k-point coordinate(cartesian) of nbztyp=1 system
!                                     vk00xyz(ik,ixyz)
!                                      ik: k-point index (1:np0)
!                                      ixyz : cartesian index (1:3)
!    vk0xyz                        :: k-point coordinate(cartesian) of nbztyp/=1 system
!                                     vk0xyz(ik,ixyz)
!                                      ik: k-point index (1:np0)
!                                      ixyz : cartesian index (1:3)
!                                             = 1:x; =2:y; =3:z
!    vk0_op                        :: symmetry operation generating k-point from its equivalent point in IBZ
!                                     vk0_op(ik)
!                                      ik : k-point index (1:np0)
!    nopr_k                        :: number of symmetry operations keeping k-point position
!                                     nopr_k(ik)
!                                      ik : k-point index (1:np0)
!    op_k                          :: index of symmetry operation keeping k-point position
!                                     op_k(ik,iop)
!                                      ik : k-point indes (1:np0)
!                                      iop : opeation no. (1:nopr(ik))
! --------------------------------------------------------------------------------------------------
!   * band
!    band_type                     :: band type index (1:filled; 0:half-filled; -1; unfilled)
!                                     band_type(1:neg,1:nspin)
!    num_vb                        :: number of valence bands
!                                     = neg for nrd_efermi=0 case
!                                    /= neg for nrd_efermi=1 case
!    num_cb                        :: number of conduction bands
!                                     = neg for nrd_efermi=0 case
!                                    /= neg for nrd_efermi=1 case
!    ind_vb                        :: valence band index
!                                     ind_vb(ib,ik)
!                                       ib : band index (1:num_vb)
!                                       ik : k-point index (1:kv3_ek)
!    ind_cb                        :: conduction band index
!                                     ind_cb(ib,ik)
!                                       ib : band index (1:num_cb)
!                                       ik : k-point index (1:kv3_ek)
!    ind_vb2                       :: valence band index with energy-ordering
!                                     ind_vb2(ib,ik)
!                                       ib : band index (energy-ordered) (1:num_cb)
!                                       ik : k-point index (1:kv3_ek)
!    ind_cb2                       :: conduction band index with energy-ordering
!                                     ind_cb2(ib,ik)
!                                       ib : band index (energy-ordered) (1:num_cb)
!                                       ik : k-point index (1:kv3_ek)
!    eb_ek                         :: band energy
!                                     eb_ek(ik,ib)
!                                      ik : k-point index (1:kv3_ek)
!                                      ib : band index(energy-ordered) (1:neg)
!    scissor                       :: scissor operator
!    occ_mpi_ek                    :: band occupation
!                                     occ_mpi_ek(ik,ib)
!                                      ik : k-point index (1:kv3_ek)
!                                      ib : band index(no energy ordering) (1:neg)
!    wf_l                          :: wavefuction (work)
!                                     wf_l(ikg,kimg)
!                                      ikg : G vector index (1:kg1)
!                                      kimg : real/imaginary index
!                                             = 1  real part
!                                             = 2 imaginary part
!    wf_lb                         :: wavefuction (work)
!                                     wf_lb(ikg,ib,kimg)
!                                      ikg : G vector index (1:kg1)
!                                      ib  : band index
!                                      kimg : real/imaginary index
!                                             = 1  real part
!                                             = 2 imaginary part
! --------------------------------------------------------------------------------------------------
!   * transition moment and oscillator strength
!    trm                           :: transition moment
!                                     <phi1|r|phi2>  r: position operator (x,y,z)
!                                     trm(ik,ni,nj,ixyz,ri,lc)
!                                      ik : k-point index (1:kv3_ek)
!                                      ni : initial band index   nj : final band idex
!                                      ixyz: cartesian index (1:3)
!                                           = 1:x; =2:y; =3:z
!                                      ri : real and imaginary component index (1:2)
!                                           = 1 real part
!                                           = 2 imaginary part
!                                      lc : correction component index (1:2)
!                                           = 1 local part
!                                           = 2 correction part (by RN or KS method)
!    trm2                          :: transition moment square
!                                     <phi1|r1|phi2><phi2|r2|phi2> r1, r2: position operator (x,y,z)
!                                     trm2(ik,ni,nj,id,ispin)
!                                      ik : k-point index (1:np0)-> linear tetrahedron case
!                                                         (1:np2)-> pagabolic/gaussian smearing case
!                                      ni : initial band index   nj : final band index
!                                      id : component index (1:6)
!                                           = 1:xx; =2:yy; =3:zz; =4:xy; =5:xz; =6: yz
!                                      ispin: spin index (1:nspin)
!    os_str                        :: oscillator strength
!                                     os_str(ik,ni,nj,id)
!                                      ik : k-point index (1:kv3_ek)
!                                      ni : initial band index  nj : final band index
!                                      id : component index (1:6)
!                                           = 1:xx; =2:yy; =3:zz; =4:xy; =5:xz; =6: yz  
!    nsym                          :: symmetrization parameter
!                                     =0 not symmetrize transition moment
!                                     =1 symmetrize transition moment
!    trm_rptrans_allocated         :: array allocation logical index
! --------------------------------------------------------------------------------------------------
!   * Kageshima-Shiraishi(KS) type transition moment calculaton
!    nppcorr                       :: KS calculation parameter
!                                     = 0 no KS calculaton
!                                     = 1 KS calculaton (only for BHS, not used)
!                                     = 2 extended KS calculation (default)
!    ptrans                        :: KS correction of transition moment
!                                     sum(ij)<phi1|beta(i)>p(i,j)<beta(j)|phi2>   p: KS correction term
!                                     ptrans(ik,ni,nj,ixyz,ri)
!                                      ik : k-point index (1;kv3_ek)
!                                      ni : initial band index  nj : final band index
!                                      ixyz : cartesian index (1:3)
!                                             = 1:x; =2:y; =3:z
!                                      ri : real/imaginary index
!                                            = 1 real part
!                                            = 2 imaginary part
!    ilocal_l                      :: local orbital index 
!                                     = 0 for BHS pseudopotential
!                                     /=0 for Troullier-Martin pseudopotential 
!    nppc_data                     :: number of KS correction terms for psedupotential
!                                     npcc_data(it)
!                                      it : pseudopotential index (1:ntyp)
!    phase_ylm1, phase_ylm2        :: phase ylm index of KS correction term
!                                     phase_ylm1(it,iks) phase_ylm2(it,iks)
!                                      it : pseudopotental index (1:ntyp)
!                                      iks: KS correction term index (1:ntppc_data(it))
!    dipole_tau1, dipolr_tau2      :: tau index of KS correction term
!                                     dipole_tau1(it,iks) dipole_tau2(it,iks)
!                                      it : pseudopotental index (1:ntyp)
!                                      iks: KS correction term index (1:ntppc_data(it))
!    dipole_dxyz_us                :: KS correction term
!    mnppc                         :: maximum number of KS correction terms of pseudopotentials
! --------------------------------------------------------------------------------------------------
!   * Read and Needs (RN) type transition moment calculation
!    nonlocal                      :: RN calculation parameter
!                                     = 0  no RN calculaton
!                                     = 1  RN calculation
!    DELQ                          :: q parameter
!    rtrans                        :: RN correction of transition moment
!                                     <phi1|[Vnl,r]|phi2>: Vnl  non-local part of pseudopotential
!                                                            r  positon operator
!                                     rtrans(ik,ni,nj,ixyz,ri)
!                                      ik : k-point index (1;kv3_ek)
!                                      ni : initial band index  nj : final band index
!                                      ixyz : cartesian index (1:3)
!                                             = 1:x; =2:y; =3:z
!                                      ri : real/imaginary index
!                                            = 1 real part
!                                            = 2 imaginary part
!    rfsr_plus_l                   :: <phi1|exp(iqr)|phi2> real part
!    rfsi_plus_l                   :: <phi1|exp(iqr)|phi2> imaginary part
!    rfsr_minus_l                  :: <phi1|exp(-iqr)|phi2> real part
!    rfsr_minus_l                  :: <phi1|exp(-iqr)|phi2> imaginary part
!    snl_plus                      :: <beta|exp(iqr)|phi>
!    snl_minus                     :: <beta|exp(-iqr)|phi>
! --------------------------------------------------------------------------------------------------
!   * effective mass calculation
!    sw_mass                       :: switch for effective mass calculation
!                                     = 0  skip calculation
!                                     = 1  mass calculation
!    mass_direction                :: mass type
!                                     = 0  mass tensor (no direction)
!                                     = 1  mass in specified direction
!    mass_dir(3)                   :: direction for mass_direction = 1 case
!                                     specifined by crystal index
!                                     e. g. mass_dir = (1.0, 0.0. 0.0) means mass in (100) direction
!                                           of crystal is calculated
!    mass_kpoint                   :: k-point and band specification type
!                                     = 0  mass of condunction and valence band edge is calculated
!                                     = 1  mass of specified k-point and band is calculated
!    ikshift                       :: k-point shift value
!                                     default = 0.0d0
!    mass_ik                       :: index of k-point for mass_kpoint = 1 case
!    mass_ib                       :: index of mass_kpoint for mass_kpoint = 0 case
! --------------------------------------------------------------------------------------------------
!   * Brillouin zone integration
!    way_BZintegral                :: integration method parameter
!                                     = 1 (PARABOLIC_B)  --> parabolic broadning method
!                                     = 2 (L_TETRAHEDRON)--> linear tetrahedron method
!                                     = 3 (GAUSSIAN_B)   --> gaussian broadning method
!    width                         :: smearing width for parabolic and gaussian smearing
!                                     defalut = 0.5eV (0.0183746518 Hartree)
!    tetra_eps                     :: eps value for nsdos0_m, nsdos0_m_SHG_omega, nsdos0_m_SHG_omega2
!                                     and nstts1_m
!    nistep                        :: number of energy steps in tetrahedron integration (only for way_BZintegral = 2)
!                                     default = 10
! --------------------------------------------------------------------------------------------------
!   * Drude term
!    ndrude                        :: Drude calculation parameter
!                                     = 0  no Drude term is included
!                                     = 1  Drude term is calculeted and added to band dielectric function
!                                     = 2  only Drude term is calculated
!    i_drude                       :: Drude term (imaginary part)
!    r_drude                       :: Drude term (real part)
! ----------------------------------------------------------------------------------------------------
!   * nonlinear optics
!    nlo                           :: nonlinear optics type
!                                     = 0  no nonlinear optical suceptibility calculation
!                                     = 1  SHG susceptibility calculation
!                                     = 2  THG susceptibility calculation
!    virt_ex_type                  :: virtual excitation type
!                                     = 0 (ALL_TYPE) all type excitation
!                                     = 1 (ELECTRON) electron excitation
!                                     = 2 (HOLE) hole excitation
!                                     = 3 (THREE_LEVEL) three state excitaion (THG case only)
!    nlo_band                      :: band transition type
!                                     = 0 (ALL_BAND) all band transtion
!                                     = 1 (INTER_BAND) interband transition
!                                     = 2 (INTRA_BAND) intraband transition (THG case only)
!    nlo_term                      :: resonant type
!                                     = 0 (ALL_TERM) all resonant term
!                                     = 1 (OMEGA_TERM) omega resonant term
!                                     = 2 (OMEGA2_TERM) omega2 resonant term
!                                     = 3 (OMEGA3_TERM) omega3 reonant term (THG case only)
!    ptrm                          :: P transition moment
!                                     ptrm(ik,ni,nj,ixyz,ri) = <ni|P|nj>
!                                      ik : k-point index (1;kv3_ek)
!                                      ni : initial band index  nj : final band index
!                                      ixyz : cartesian index (1:3)
!                                             = 1:x; =2:y; =3:z
!                                      ri : real/imaginary index
!                                            = 1 real part
!                                            = 2 imaginary part
!    bktm                          :: {pjl, pli} = 0.5d0*(pjl*pli + pli*pjl)
!                                     bktm(ik,ni,nl,nj,index,2)
!                                      ik : k-point index (1;kv3_ek)
!                                      ni : initial band index  nl: intermediate band index  nj : final band index
!                                      index : component index (1:6)
!                                             = 1:xx; =2:xy; =3:xz; = 4:yy; = 5:yz; = 6:zz
!                                      ri : real/imaginary index
!                                            = 1 real part
!                                            = 2 imaginary part
!    rep4bkt                       :: Re[p1(a)*(p2(b),p3(c),p4(d))]
!                                     rep4bkt(index)
!                                     index : combined index of cartesian indices a, b, c, and d
!                                             p1, p2, p3, and p4 are P transition moment
!                                             (p2,p3,p4) is cartesian index symmetrized product of p2, p3, and p4
!    dres_method                   :: double resonance method
!                                     = 0 (OMIT)     double resonance term is omitted
!                                     = 1 (DAMPING)  double resonance term is damped
!    dres_cut_off                  :: cut-off energy for the omittion and damping
!                                     default = 10.0d-3 Hartree
!    smearing_fact                 :: smearing factor condition
!                                     = 0 (OFF_RESONANCE)  non-resonant SHG/THG moment is used
!                                     = 1 (RESONANCE)      resonant SHG/THG moment is used
! ---------------------------------------------------------------------------------------------------
!   * magneto-optics 
!    magneto_optical               :: magneto optical calculation index
!                                     = 0  no magneto-optical calculation
!                                     = 1  magneto-optical calculation
!    kerr_rotation                 :: kerr rotation angle
!    ker_ellipticity               ::  ker ellipticity coefficient
!
! ---------------------------------------------------------------------------------------------------
!   * restart facility
!    restart_mode                  :: restart option
!                                     = 0  no restart
!                                     = 1  restart using EPS_CONT file (eps_continue.data)
!    nk_read                       :: number of k-point read from trm.data
!    num_vb_restart                :: number of valence band for restart
!    num_cb_restart                :: number of conduction band for restart
!    all_kpt_data_read             :: logical parameter 
!                                     = .true.  all transition moment data is read from transition moment file
!                                     = .false. not all transition moment data is read from transition moment file
! ---------------------------------------------------------------------------------------------------
!   * print option
!    ipriepsilon                   :: print out control
!                                     = 0  brief
!                                     = 1  standard (default)
!                                     = 2  detail
!                                     = 3  debug
!

  implicit none 
  include 'mpif.h'
  integer istatus(mpi_status_size)
  integer,           allocatable,     dimension(:,:)         :: n2_mpi
  integer,           allocatable, dimension(:)           :: vk0_op
  integer                                                :: nkx,nky,nkz,nkx2,nky2,nkz2
  integer,           allocatable, dimension(:)           :: nopr_k, ilocal_l
  integer,           allocatable, dimension(:)           :: PP_norm_type, PP_local_type
  integer,           allocatable, dimension(:)           :: map_k_eps
  integer,           allocatable, dimension(:,:)         :: band_type, op_k
  integer                                                :: kv3_in_the_ek_process
  integer,           allocatable, dimension(:,:)         :: n2_mpi_ek                  ! n2_mpi(neg,kv3_ek)
  integer,           allocatable, dimension(:,:)         :: ind_vb, ind_cb             ! ind_vb(neg,kv3_ek),ind_cb(neg,kv3_ek)
  integer,           allocatable, dimension(:,:)         :: ind_vb2,ind_cb2            ! ind_vb2(neg,kv3_ek),indcb2(neg,kv3_ek)

#if 0
  integer,           allocatable, dimension(:)           :: nppc_data                  ! n
ppc_data(ntyp)
  integer,           allocatable, dimension(:,:)         :: phase_ylm1,phase_ylm2      ! phase_ylm1(ntyp,:),phase_ylm2(ntyp,:)
  integer,           allocatable, dimension(:,:)         :: dipole_tau1,dipole_tau2    ! dipole_tau1(ntyp,:),dipole_tau2(ntyp,:)
  integer                                                :: mnppc
#endif

  integer                                                :: sw_epsilon, restart_mode, crystal, nrd_efermi
  integer                                                :: n_check_ts, band_i, band_f, spin, major_spin, minor_spin
  integer                                                :: nonlocal, nppcorr, nsym, way_BZintegral, ndrude, nlo, dres_method

  integer :: magneto_optical
#if 0
  integer :: ipriepsilon
#endif

  integer :: ptype, system, nstep, nstep_eb_max, nistep, num_vb, num_cb, virt_ex_type, nlo_term, nlo_band, smearing_fact
  integer                                                :: sw_mass, mass_direction, mass_kpoint, mass_ik, mass_ib
  integer                                                :: ik_vb_top, ib_vb_top, ik_cb_bottom, ib_cb_bottom
  integer                                                :: ndeg_vb_top, ndeg_cb_bottom
  integer                                                :: nk_restart_read
  integer                                                :: SINGLE_CRYSTAL, POLYCRYSTAL, METALLIC, NON_METALLIC
  integer                                                :: FILLED_BAND, HALF_FILLED_BAND, UNFILLED_BAND
  integer                                                :: PARABOLIC_B, L_TETRAHEDRON, GAUSSIAN_B, BOTH, MAJOR, MINOR
  integer                                                :: NC_PP, US_PP, BHS_POLY, ORBITAL
  integer :: SHG, THG, ALL_TYPE, ELECTRON, HOLE,THREE_LEVEL, ALL_TERM, OMEGA_TERM, OMEGA2_TERM, &
       &   OMEGA3_TERM, ALL_BAND, INTER_BAND, INTRA_BAND, OMIT, DAMPING, RESONANCE, OFF_RESONANCE
  integer,                        dimension(3,2)         :: k_sample_mesh1
  real(DP),          allocatable,     dimension(:,:)         :: wf_l                       ! work wave functions
  real(DP),          allocatable,     dimension(:,:,:)       :: wf_lb
  real(DP),          allocatable,     dimension(:,:,:,:,:)   :: trm2
  real(DP),          allocatable,     dimension(:,:)         :: e2_mpi
  real(DP),          allocatable, dimension(:)           :: e, i_drude, r_drude
  real(DP),          allocatable, dimension(:)           :: spin_charge
  real(DP),          allocatable, dimension(:,:)         :: imeps,reps
  real(DP),          allocatable, dimension(:)           :: refr,refi,absc,reflc
  real(DP),          allocatable,     dimension(:,:)         :: vk00xyz, vk0xyz
  real(DP),          allocatable, dimension(:,:,:)       :: vkxyz_ek_org
  real(DP),          allocatable, dimension(:,:,:,:,:)   :: rtrans                     ! rtrans(kv3,neg,neg,3,2) 
                                                                                       ! <phi1|[Vnl,r]|phi2>

! ================================ Added by K. Tagami ============== 0.2
  real(DP), allocatable, dimension(:,:,:,:,:)   :: rtrans_hub
  integer    ::          sw_tm_hubbard_correction = ON
! ==================================================================

  real(DP),          allocatable, dimension(:,:,:,:)     :: rfsr_plus_l,rfsi_plus_l    !d(np_e,nlmta,ista_k:iend_k,3)
  real(DP),          allocatable, dimension(:,:,:,:)     :: rfsr_minus_l,rfsi_minus_l  !d(np_e,nlmta,ista_k:iend_k,3)
  real(DP),          allocatable, dimension(:,:,:,:)     :: rfsr_l,rfsi_l              !d(np_e,nlmta,ista_k:iend_k,3)
  real(DP),          allocatable, dimension(:)           :: zfdqcos, zfdqsin           !d(nbmx)
  real(DP),          allocatable, dimension(:,:,:,:)     :: snl_plus                   !d(kg1,nlmtt,ista_snl:iend_snl,3)
  real(DP),          allocatable, dimension(:,:,:,:)     :: snl_minus                  !d(kg1,nlmtt,ista_snl:iend_snl,3)
  real(DP), private, allocatable, dimension(:)           :: qx,qy,qz,vlength,snl2,wka,wkb,ylm
  real(DP), private, allocatable, dimension(:)           :: ar, ai
  real(DP), private, allocatable,     dimension(:,:)         :: work

  real(DP),          allocatable, dimension(:,:,:,:)     :: os_str                     ! os_str(np0,neg,neg,6)
  real(DP),          allocatable, dimension(:,:,:,:,:,:) :: trm                        ! trm(kv3_ek,neg,neg,3,2,2)
                                                                                       ! <phi1|r|phi2>
  real(DP),          allocatable, dimension(:,:,:,:,:,:) :: trm_tmp
  real(DP),          allocatable, dimension(:,:)         :: eb_ek                      ! eb_ek(kv3_ek,neg)
  real(DP),          allocatable, dimension(:,:)         :: eb_ek_tmp
  real(DP),          allocatable, dimension(:,:)         :: occ_mpi_ek                 ! occ_mpi_ek(neg,kv3_ek)

#if 0
  real(DP),          allocatable, dimension(:,:,:)       :: dipole_dxyz_us             ! dipole_dxyz_us(ntyp,:,3)
#endif

  real(DP),          allocatable, dimension(:,:,:,:,:)   :: ptrans                     ! ptrans(kv3,neg,neg,3,2)

  real(DP),          allocatable, dimension(:,:,:,:,:)   :: ptrm                       ! ptrm(kv3_ek,neg,neg,3,2)
                                                                                       ! <phi1|p|phi2>
  real(DP),          allocatable, dimension(:,:)         :: imchi2                     ! imchi2(nstep,18)
  real(DP),          allocatable, dimension(:,:)         :: rechi2                     ! rechi2(nstep,18)
  real(DP),          allocatable, dimension(:,:)         :: imchi3, chi3int            ! imchi3(nstep,30),chi3int(nstep,30)
  real(DP),          allocatable, dimension(:,:)         :: rechi3                     ! rechi3(nstep,30)
                                                                                       ! Re[pij*pjk*pkl*pi]
  real(DP),          allocatable, dimension(:,:)         :: optcr_l                    ! optcr_l(nstep,2)
  real(DP),          allocatable, dimension(:,:)         :: optci_l                    ! optci_l(nstep,2)
  real(DP),          allocatable, dimension(:)           :: kerr_rotation, kerr_ellipticity
 !
  real(DP),          allocatable, dimension(:)           :: edmax_nspin, edmin_nspin
  real(DP),                       dimension(3)           :: u,p
  real(DP),                       dimension(3)           :: mass_dir
  real(DP)                                               :: ikshift
  real(DP)                                               :: ulen, plen
  real(DP)                                               :: band_gap_energy
  real(DP)                                               :: e_low, e_high, e_step
  real(DP)                                               :: DELQ, width, tetra_eps, scissor, tot_charge, dres_cut_off, deg_omega
  real(DP)                                               :: dc_conductivity,drude_damping,plasma_f,effective_m
  real(DP)                                               :: hartree, hartree_in_eV, planck_constant, speed_of_light &
                                                        & , au_of_velocity, au_of_time, vacuum_permittivity_in_au &
                                                        & , metallic_ch, tau_drude, chi2_in_au, chi3_in_au
  integer, parameter, private                            :: len_str = 132
  character*(len_str),private                            :: str
  logical                                                :: trm_rptrans_allocated, trans_ek_restarted

! === KT_add === 13.0R
  integer :: sw_trm_print_full = OFF
  integer :: sw_wf_rspace_print_full = OFF
  real(kind=DP) :: delta_omega = 1.0D-14
! ============== 13.0R

! === KT_add === 13.0S
  real(kind=DP) :: emin_core_spectrum = 0.0d0
  integer :: eels_mode = OFF
! ============== 13.0S

! ==== KT_add === 2015/01/17
  integer :: sw_scissor_renormalization = OFF
! =============== 2015/01/17

! physical constants 
! from the NIST reference on constants, units, and uncertainty
! http://physics.nist.gov/
  data hartree                    /4.359774417d-18/      ! Joule/sec
  data hartree_in_eV              /27.2113845d0/         ! eV
  data planck_constant            /1.05457168d-34/       ! Jolue * sec =h/2pai
  data speed_of_light             /2.99792458d8/         ! m/sec
  data au_of_velocity             /2.18769126d6/         ! m/sec
  data au_of_time                 /2.41884326505d-17/    ! sec
  data vacuum_permittivity_in_au  /0.079577471544205d0/
  data chi2_in_au                 /5.83003753d0/         ! 10-8 esu
  data chi3_in_au                 /0.0033989325591d0/    ! 10-12 esu

 contains

 subroutine initialization_eps_ek
    integer :: id_sname = -1
!
!   subroutine making initialization
!
    call tstatc0_begin('initialization_eps_ek ', id_sname)
    if(printable) then
       write(nfout,'(1x," << UVSOR-Epsilon   INITIALIZATION START >>")')
       write(nfout,*) '-------------------------------------------------------'
       write(nfout,*) ' Electronic Dielectric Function and Optical Spectra Calculation'
       write(nfout,*) '-------------------------------------------------------'
    end if

!   set inernal parameters
    call set_Crystal_param
    call set_PP_param
    call set_band_type_param
    call set_BZ_int_param
    call set_NLO_param
!   set default parameters
    call init_calc_param
    call set_default_options
!   read tag_efermi
!    if(m_CtrlP_check_inputfilestyle(nfinp) == NEW_) then
       call m_Files_reopen_nfinp(1)
       call m_CtrlP_rd_epsilon(nfout)
       call m_Files_reopen_nfinp(2)
       if(sw_epsilon==0) return
       if(printable) then
          write(nfout,'(" !*--- epsilon input-file style = NEW")')
          write(nfout,'(" !*--- epsilon input data is read from F_INP")')
       end if
!    else
!       if(printable) then
!          write(nfout,'(" !*--- epsilon input-file style = OLD")')
!          write(nfout,'(" !*--- epsilon OLD type input file is not supported")')
!       end if
!       stop
!    end if

!   set up input data
    call eps_setup(nfout,e_low,e_high,e_step,nstep,u,p,ptype,DELQ,nonlocal,nppcorr,nsym,nrd_efermi,efermi)
    call restart_setup(nfout)

! ============= KT_mod ============================ 13.0S
!   set KS correction data
!    if(nppcorr>0) call set_ppc_data(nfout) 
!
    if ( sw_corelevel_spectrum == ON ) then
       call m_CLS_set_data_core2val_from_pp
       call m_CLS_alloc_wfn_core_states
       call m_CLS_set_wfn_core_states

!       if ( icond < 2 .or. nrd_efermi == ON )  call m_CLS_set_ene_core_states
       nrd_efermi = OFF
       call m_CLS_set_ene_core_states

       if( nppcorr==2 ) call prepare_for_TM_PP_ek
    else
#if 0
       if(nppcorr>0) call set_ppc_data(nfout    !! moved to m_ValenceBand_Spectrum.F90
#else
!       call m_VBS_set_data_ppc_from_pp
       call m_VBS_set_data_ppc_from_pp_it()
       if(nppcorr==2) call prepare_for_TM_PP_ek
#endif
    endif
! ================================================= 13.0S

!   arrocate data arrays
    call alloc_m_Epsilon1_ek
    call alloc_m_Epsilon2(nstep)
    trm_rptrans_allocated = .false.
    trans_ek_restarted = .false.
 
!   check pseudopotential type
    call check_PP(nfout)

    if(printable) write(nfout,'(1x,"<< UVSOR-Epsilon   INITIALIZATION END >>")')
    call tstatc0_end(id_sname)

 end subroutine initialization_eps_ek

! === KT_add === 13.1R
 subroutine m_Eps_chkif_sw_epsilon
   call m_Files_reopen_nfinp(1)
   call m_CtrlP_rd_epsilon(nfout)
   call m_Files_reopen_nfinp(2)
 end subroutine m_Eps_chkif_sw_epsilon
! ============== 13.1R

 subroutine set_Crystal_param
    SINGLE_CRYSTAL=1
    POLYCRYSTAL=2
    NON_METALLIC=0
    METALLIC=1
 end subroutine set_Crystal_param

 subroutine set_PP_param
    NC_PP=1
    US_PP=2
    BHS_POLY=1
    ORBITAL=2
 end subroutine set_PP_param

 subroutine set_default_options
    band_i=0
    band_f=0
    restart_mode = 0
    crystal=SINGLE_CRYSTAL
    nrd_efermi=0
    n_check_ts=1
    nonlocal=0
    nppcorr=0
    nsym=1
    sw_mass = 0
    mass_direction = 0
    mass_kpoint = 0
    way_BZintegral=PARABOLIC
    spin=BOTH
    ndrude=0
    nlo = 0
    virt_ex_type = 0
    nlo_term = 0
    nlo_band = 0
    dres_method = OMIT
    smearing_fact = OFF_RESONANCE
    magneto_optical = 0
#if 0
    ipriepsilon = 1
#endif
 end subroutine set_default_options

 subroutine init_calc_param
! ==== KT_mod === 2014/10/01
!    efermi=0.0d0
   if ( icond >= 2 ) efermi=0.0d0
! =============== 2014/10/01
    ikshift=0.0d0
    delq=0.0d0
    width=0.0d0
    nistep=0
    tetra_eps = 1.0d-7
    scissor=0.0d0
    dc_conductivity=0.0d0
    drude_damping=0.0d0
    plasma_f=0.0d0
    effective_m=1.0d0
    dres_cut_off = 10.0d-3
    deg_omega = 1.0d0 ! -> dummy delta E between degenerated bands (for NLO calculation)
 end subroutine init_calc_param

 subroutine set_band_type_param
    FILLED_BAND=1
    HALF_FILLED_BAND=0
    UNFILLED_BAND=-1
 end subroutine set_band_type_param

 subroutine set_BZ_int_param
    PARABOLIC_B=1
    L_TETRAHEDRON=2
    GAUSSIAN_B=3
    BOTH=0
    MAJOR=1
    MINOR=2
 end subroutine set_BZ_int_param

 subroutine set_NLO_param
    ALL_TYPE = 0
    ELECTRON = 1
    HOLE = 2
    THREE_LEVEL = 3
    SHG = 1
    THG = 2
    ALL_TERM = 0
    OMEGA_TERM = 1
    OMEGA2_TERM = 2
    OMEGA3_TERM = 3
    ALL_BAND = 0
    INTER_BAND = 1
    INTRA_BAND = 2
    OMIT = 0
    DAMPING = 1
    RESONANCE = 1
    OFF_RESONANCE = 0
 end subroutine set_NLO_param

 subroutine eigen_value_ordering_eps_ek
    integer :: id_sname = -1
    if(sw_epsilon==0) return
    call tstatc0_begin('eigen_value_ordering_eps_ek ',id_sname)
    if(nk_in_the_process == 1 .and. ipri>=1) &
      & write(nfout,'(1x,"<< UVSOR-Epsilon   TRANSITION MOMENT CALCULATION START >>")')
    call ordering_ek(nfout)
    call tstatc0_end(id_sname)
 end subroutine eigen_value_ordering_eps_ek

 subroutine gen_k_points_eps_ek
    integer :: id_sname = -1
!
!   subroutine setting up k-points for linear tetrahedron
!
    call tstatc0_begin('gen_k_points_eps_ek ',id_sname)
    if(sw_epsilon==0) return
    if(ndrude==2) return
    if(way_BZintegral==L_TETRAHEDRON) then
       if(printable) &
         & write(nfout,'(1x,"!* k-points for linear tetrahedron method is generated")')
       call alloc_array_for_tetrahedron
       call gen_vk0xyz_ek(nsym)
    end if
    call tstatc0_end(id_sname)
    contains
     subroutine alloc_array_for_tetrahedron
       allocate(vk00xyz(np0,3)); vk00xyz=0.0d0
       allocate(vk0xyz(np0,3)); vk0xyz=0.0d0
       allocate(vk0_op(np0)); vk0_op=0
       allocate(nopr_k(np0)); nopr_k=0
       allocate(op_k(np0,nopr)); op_k=0
     end subroutine alloc_array_for_tetrahedron
 end subroutine gen_k_points_eps_ek

 subroutine calc_transition_moment_eps_ek
!
!   subroutine calculating transition moment during each ek process
!
! === KT_add == 13.0R
   integer :: ik, ib, wf_filetype_org 
! ============= 13.0R

    if(sw_epsilon==0) return
    if(ndrude/=2) then
       call alloc_arrays_for_tm

! ============== KT_mod =================== 13.0S
!       call trans_ek(nfout,nfzaj)
!
       if ( sw_corelevel_spectrum == ON ) then
          call trans_core2val_ek(nfout,nfzaj)
       else
          call trans_ek(nfout,nfzaj)
       endif
! ========================================== 13.0S

       if(nspin==1) then
          if(printable) &
            & write(nfout,'(/1x,"!* ----- transition moment of ",i4,&
            & " -th k-point is calculated by UVSOR-Epsilon  -----")') &
            & nk_in_the_process
       else
          if(printable) &
            & write(nfout,'(/1x,"!* ----- transition moment of ",i4," and ",&
            & i4," -th k-points is calculated by UVSOR-Epsilon -----")') &
            & nk_in_the_process, nk_in_the_process+nspin-1
       end if
! === DEBUG by tkato 2013/10/18 ================================================
!      if(nk_in_the_process == kv3_ek-nspin+1) then
       if(nk_in_the_process+kv3-1 >= kv3_ek-nspin+1) then
! ==============================================================================
          if(kv3_ek/=nk_converged) then
             if(printable) then
                write(nfout,10)
                write(nfout,20) kv3_ek-nk_converged
                write(nfout,30)
             end if
             stop
          else
             if(printable) then
                write(nfout,'(1x,"!* all k-points are converged")')
                write(nfout,'(1x,"!* transition moment of all k-points is calculated")')
                write(nfout,'(1x,"<< UVSOR-Epsilon   TRANSITION MOMENT CALCULATION END  >>"/)')
             end if
          end if
      end if
      call dealloc_arrays_for_tm
    end if

! ======= KT_add ======= 13.0R
! ======================= 13.0R

     if(ndrude==2) call set_eb_ek_drude

10   format(1x,"!!* there is non-converged k-point")
20   format(1x,"!!* number of non-converged k-points is ",i4)
30   format(1x,"!!* UVSOR-Epsilon STOP because of the non-convergence")
     
  contains
     subroutine alloc_arrays_for_tm
       allocate(wf_lb(maxval(np_g1k),neg,kimg)); wf_lb=0.0d0
     end subroutine alloc_arrays_for_tm
    
     subroutine dealloc_arrays_for_tm
       deallocate(wf_lb)
     end subroutine dealloc_arrays_for_tm
 end subroutine calc_transition_moment_eps_ek

 subroutine calc_mass(nf,nsym)
    implicit none
!
!   calculate effective mass of electron and hole by k-p perturbation method
!
    integer, intent(in)      :: nf, nsym
    integer                  :: ib, ib1, ik, ispin
    real(DP), dimension(3)   :: mass
    real(DP), dimension(3,3) :: axis
    real(DP)                 :: pmass
    if(printable) write(nf,'(1x,"---------- effective mass calculation ----------")')
    if(mass_kpoint == 0) then
       if(system == METALLIC) then
          if(printable) write(nf,'(1x,"!* the system is metallic")')
          if(printable) write(nf,'(1x,"!* band-edge mass cannot be calculated")')
          return
       end if
! calculate hole mass at valence band top
       ik = ik_vb_top
       ib = ib_vb_top
       if(ik==0) then
          if(printable) then
             write(nf,'(1x,"!* ik for valence band top is zero")')
             stop
          end if
       end if
       if(ib==0) then
          if(printable) then
             write(nf,'(1x,"!* ib for valence band top is zero")')
             stop
          end if
       end if

       if(printable) then
          write(nf,'(1x,"!* effective mass at valence band top: ik = ",i4)') ik
          write(nf,'(1x,"!* degeneracy = ",i3)') ndeg_vb_top
          if(mass_direction==0.and.ndeg_vb_top/=1) &
         & write(nfout,'(1x,"!* warning : effective mass should be wrong because of the degeneracy.", &
         & /,1x,"!* set direction indices and  k-point shift parameter in tag_mass.")')
       end if
       do ib1 = ib-ndeg_vb_top+1, ib
          call calc_mass_core(ik,ib1,ispin,mass,axis,pmass)
          if(printable) then
             write(nf,'(1x,"!* ib = ",i4)') ib1
             write(nf,'(1x,"!* ispin = ",i4)') ispin
          end if
          call mass_out
          if(mass_direction == 0) call axis_out
       end do

! calculate electron mass at conduction band bottom
       ik = ik_cb_bottom
       ib = ib_cb_bottom
       if(ik==0) then
          if(printable) then
             write(nf,'(1x,"!* ik for conduction band bottom is zero")')
             stop
          end if
       end if
       if(ib==0) then
          if(printable) then
             write(nf,'(1x,"!* ib for conduction band bottom is zero")')
             stop
          end if
       end if

       if(printable) then
          write(nf,'(/1x,"!* effective mass at conduction band bottom: ik = ",i4)') ik
          write(nf,'(1x,"!* degeneracy = ",i3)') ndeg_cb_bottom
       end if
       do ib1 = ib, ib+ndeg_cb_bottom-1
          call calc_mass_core(ik,ib1,ispin,mass,axis,pmass)
          if(printable) then
             write(nf,'(1x,"!* ib = ",i4)') ib1
             write(nf,'(1x,"!* ispin = ",i4)') ispin
          end if
          call mass_out
          if(mass_direction == 0) call axis_out
       end do
    else
      ik = mass_ik
      ib = mass_ib
      call calc_mass_core(ik,ib,ispin,mass,axis,pmass)
      if(printable) then
         write(nf,'(1x,"!* effective mass at ik = ",i4,3x,"ib = ",i4)') ik, ib
         write(nf,'(1x,"!* ispin = ",i4)') ispin
      end if
      call mass_out
      if(mass_direction == 0) call axis_out
    end if
    contains
     subroutine mass_out
       implicit none
       if(printable) then
          if(mass_direction == 0) then
             write(nfout,'(1x," aa = ",f10.5,3x," bb = ",f10.5,3x," cc = ",f10.5)') mass(1), mass(2), mass(3)
          else
             write(nfout,'(1x," mass along (",3f10.5,") direction = ",f10.5)') mass_dir(1), mass_dir(2), mass_dir(3), pmass
          end if
       end if
     end subroutine mass_out
 
     subroutine axis_out
       implicit none
       integer :: i
       if(printable) then
         write(nfout,'(13x,"a",18x,"b",18x,"c")')
         do i = 1, 3
            write(nfout,'(7x,f10.5,9x,f10.5,9x,f10.5)') axis(i,1), axis(i,2), axis(i,3)
         end do
       end if
     end subroutine axis_out
 end subroutine calc_mass

 subroutine calc_mass_core(ik,ib,ispin,mass,axis,pmass)
    implicit none
    integer, intent(in)                   :: ik, ib
    integer                               :: ib0, index, i, j
    integer, intent(out)                  :: ispin
    real(DP), intent(out), dimension(:)   :: mass
    real(DP), intent(out), dimension(:,:) :: axis
    real(DP),intent(out)                  :: pmass
    real(DP),              dimension(6)   :: m
    real(DP),              dimension(3,3) :: wk1, wk2
    real(DP),              dimension(3,2) :: pt
    real(DP)                              :: eb, eb0, inv_mass
    real(DP)                              :: a, b, c, d, e, f, rsum, isum, rpsum, ipsum
    real(DP)                              :: eps
    eps = 10.0d-8
    m = 0.0d0
    ispin = nspin - mod(ik,nspin)
! calculate the inverse effective mass
    eb = eb_ek(ik,ib)
    if(system/=METALLIC.and.band_type(ib,ispin) == UNFILLED_BAND) eb = eb + scissor
    do index = 1, 6
       call get_ij_index(index,i,j)
       rsum  = 0.0d0; isum  = 0.0d0
       do ib0 = 1, neg
          if(ib0 == ib) then
             cycle
          else
             eb0 = eb_ek(ik,ib0)
             if(system/=METALLIC.and.band_type(ib0,ispin) == UNFILLED_BAND) eb0 = eb0 + scissor
             if(dabs(eb0-eb)<=eps) cycle
             rpsum = 0.0d0; ipsum = 0.0d0
             call get_ptrm(ik,ib0,ib,pt)           !-> contained here
             a = pt(i,1) ; b = pt(i,2)
             c = pt(j,1) ; d = pt(j,2)
             call cmpprodc(a,b,c,d,e,f)
             rpsum = rpsum + e ; ipsum = ipsum + f
             a = pt(j,1) ; b = pt(j,2)
             c = pt(i,1) ; d = pt(i,2)
             call cmpprodc(a,b,c,d,e,f)
             rpsum = rpsum + e ; ipsum = ipsum + f
             rsum = rsum + rpsum/(eb - eb0)
             isum = isum + ipsum/(eb - eb0)
          end if
       end do
       m(index) = rsum
       if(index<=3) m(index) = m(index) + 1.0d0
    end do
! calculate effective mass from the inverse mass 
   call copy_m_2_wk1
    if(mass_direction ==1) then
       call mass_proj(nfout,wk1,pmass)
       pmass = 1.0d0/pmass
    else
       call diag3n(nfout,wk1,mass,axis)
       do i = 1, 3
          inv_mass = 1.0d0/mass(i)
          mass(i) = inv_mass
       end do
    end if
    contains
     subroutine get_ij_index(index,i,j)
       implicit none
       integer, intent(in)     :: index
       integer, intent(out)    :: i, j
       if(index == 1) then
          i = 1
          j = 1
       else if(index == 2) then
          i = 2
          j = 2
       else if(index == 3) then
          i = 3
          j = 3
       else if(index == 4) then
          i = 1
          j = 2
       else if(index == 5) then
          i = 1
          j = 3
       else if(index == 6) then
          i = 2
          j = 3
       end if
     end subroutine get_ij_index

     subroutine get_ptrm(ik,bi,bf,pt)
       implicit none
!
!    get <WF(ik,ibf)|p|WF(ik,ibi)> from <WF(ik,ibf)|r|WF(ik,ibi)>
!
       integer,  intent(in)                  :: ik, bi, bf
       real(DP), intent(out), dimension(3,2) :: pt
       real(DP),              dimension(3,2) :: rt
       real(DP)                              :: ebi, ebf, omega
! copy rtrans
       rt(1:3,1:2) = trm(ik,bf,bi,1:3,1:2,1) + trm(ik,bf,bi,1:3,1:2,2)
       ebi = eb_ek(ik,bi)
       ebf = eb_ek(ik,bf)
       omega = ebf - ebi
! === KT_add === 2015/01/17
       if ( sw_scissor_renormalization == ON ) then
          if ( omega > 0.0 ) then
             omega = omega +scissor
          else
             omega = omega -scissor
          endif
       endif
! ============== 2015/01/17

! get ptrans
       pt(1:3,1) = -1.0d0*rt(1:3,2)*omega
       pt(1:3,2) = rt(1:3,1)*omega
     end subroutine get_ptrm

     subroutine copy_m_2_wk1
       wk1(1,1) = m(1)
       wk1(2,2) = m(2)
       wk1(3,3) = m(3)
       wk1(1,2) = m(4)
       wk1(1,3) = m(5)
       wk1(2,3) = m(6)
       wk1(2,1) = wk1(1,2)
       wk1(3,1) = wk1(1,3)
       wk1(3,2) = wk1(2,3)
     end subroutine copy_m_2_wk1

     subroutine mass_proj(nf,a,pmass)
       implicit none
       integer,  intent(in)                    :: nf
       real(DP), intent(inout), dimension(:,:) :: a
       real(DP), intent(out)                   :: pmass
       real(DP),                dimension(3)   :: dir, ad
       real(DP)                                :: dlen
! calculate direction
       dir(1:3) = mass_dir(1:3)
       call norm(dir,dlen)
! project inverse mass
       ad(1) = a(1,1)*dir(1)+a(1,2)*dir(2)+a(1,3)*dir(3)
       ad(2) = a(2,1)*dir(1)+a(2,2)*dir(2)+a(2,3)*dir(3)
       ad(3) = a(3,1)*dir(1)+a(3,2)*dir(2)+a(3,3)*dir(3)
       pmass = dir(1)*ad(1)+dir(2)*ad(2)+dir(3)*ad(3)
     end subroutine mass_proj
 end subroutine calc_mass_core

 subroutine diag3n(nf,a,b,c)
    implicit none
!
!   diagonalize 3 x 3 matrix
!  
!   a: matrix to be diagonalized
!   b: eigenvalues
!   c: eigen vectors
!
    integer, intent(in)                   :: nf
    real(DP), intent(in),  dimension(3,3) :: a
    real(DP), intent(out), dimension(3)   :: b
    real(DP), intent(out), dimension(3,3) :: c
    integer                               :: i, j
    integer                               :: lwork, liwork, info
    integer, allocatable,  dimension(:)   :: iwork
    real(DP),              dimension(3,3) :: wka
    real(DP),              dimension(3)   :: w
    real(DP), allocatable, dimension(:)   :: work
    character*1                           :: JOBZ, UPLO
! wka contains the upper triangle of matrix a
    wka = 0.0d0
    do j = 1, 3
       do i = 1, j
          wka(i,j) = a(i,j)
       end do
    end do
! set parameter and arrays for DSYEVD
    JOBZ = 'V'; UPLO ='U'
    lwork = 40; info =0
    w = 0.0d0
    liwork = 20
    allocate(iwork(liwork)); iwork = 0.0d0
    allocate(work(lwork)); work = 0.0d0
    call DSYEVD(JOBZ,UPLO,3,wka,3,w,work,lwork,iwork,liwork,info)
    if(info/=0) then
       write(nf,'(1x,"!* errors in LAPACK routine DSYEVD: info = ",i3)') info
       stop
    end if
    deallocate(iwork)
    deallocate(work)
    b=w
    c=wka
 end subroutine diag3n

 subroutine calc_tm_square_eps_ek
    integer :: id_sname = -1
    if(sw_epsilon==0) return
    call tstatc0_begin('calc_tm_square_eps_ek ',id_sname)
    if(printable) &
    & write(nfout,'(/1x,"<< UVSOR-Epsilon  TRANSITION MOMENT SQUARE CALCULATION START >>")')
    if(ndrude/=2) then
       call alloc_arrays_for_tmsq            
       call m_ESoc_EPS_alloc_occup_l_ek   
       call FermiEnergyLevel_ek_here(nrd_efermi)
       call occupation_ek(nfout)  
       call check_band_energy_range(nfout)
       call band_gap(nfout)
       if(sw_mass/=0) call calc_mass(nfout,nsym)

! ========= KT_add ========== 13.0S
       if ( sw_corelevel_spectrum == ON ) then
!          if ( icond >= 2 .and. nrd_efermi == OFF ) call m_CLS_set_ene_core_states
          call adjust_erange
       endif
! =========================== 13.0S

       call tmsq_ek  

! ========= KT_mod ========== 13.1R
!       if(nlo==0) call dealloc_trm
!       if (icond==2) call dealloc_rptrans
!
       if ( sw_phonon_with_epsilon == OFF ) then
          if(nlo==0) call dealloc_trm
          if ( icond <=2 ) call dealloc_rptrans
       endif
! ============================ 13.1R

       call m_ESoc_EPS_dealloc_occup_l_ek
       call dealloc_arrays_for_tmsq
       if(printable) &
       & write(nfout,'(1x,"<< UVSOR-Epsilon   TRANSITION MOMENT SQUARE CALCULATION END >>"/)')
    end if
    if(ndrude==2) then
       call m_ESoc_EPS_alloc_occup_l_ek
       call FermiEnergyLevel_ek_here(nrd_efermi)
       call occupation_ek(nfout)
       call m_ESoc_EPS_dealloc_occup_l_ek
    end if
    call tstatc0_end(id_sname)

  contains

     subroutine alloc_arrays_for_tmsq
       integer :: ni, nf, nspin_kt

! === KT_add === 13.1R
       if ( allocated(trm2) ) deallocate(trm2)
! ============== 13.1R

! === KT_add === 2014/09/22
       if ( noncol ) then
          nspin_kt = 1
       else
          nspin_kt = nspin
       endif
! ============== 2014/09/22

       if(way_BZintegral==L_TETRAHEDRON) then
         if(nrd_efermi==0) then
            ni = neg;  nf = neg
! ==================================== KT_add ========== 13.0S
            if ( sw_corelevel_spectrum == ON ) ni = num_core_states
! ====================================================== 13.0S
            allocate(trm2(np0,ni,nf,6,nspin_kt)); trm2=0.0d0
            if(ipri >= 2) then
               write(nfout,'(" way_BZintegral=L_TETRAHEDRON, nrd_efermi=0")')
               write(nfout,'(" trm2 = (",5i6,")")') np0,ni,nf,6,nspin_kt
            end if
         else
            ni = num_vb;  nf = num_cb
! ==================================== KT_add ========== 13.0S
            if ( sw_corelevel_spectrum == ON ) ni = num_core_states
! ====================================================== 13.0S
            allocate(trm2(np0,ni,nf,6,nspin_kt)); trm2=0.0d0
            if(ipri >= 2) then
               write(nfout,'(" way_BZintegral=L_TETRAHEDRON, nrd_efermi/=0")')
               write(nfout,'(" trm2 = (",5i6,")")') np0,ni,nf,6,nspin_kt
            end if
         end if
       else
         if(nrd_efermi==0) then
            ni = neg;  nf = neg
! ==================================== KT_add ========== 13.0S
            if ( sw_corelevel_spectrum == ON ) ni = num_core_states
! ====================================================== 13.0S
            allocate(trm2(kv3_ek/nspin,ni,nf,6,nspin_kt)); trm2=0.0d0
            if(ipri >= 2) then
               write(nfout,'(" way_BZintegral/=L_TETRAHEDRON, nrd_efermi=0")')
               write(nfout,'(" trm2 = (",5i6,")")') kv3_ek/nspin,ni,nf,6,nspin_kt
            end if
         else
            ni = num_vb;  nf = num_cb
! ==================================== KT_add ========== 13.0S
            if ( sw_corelevel_spectrum == ON ) ni = num_core_states
! ====================================================== 13.0S
            allocate(trm2(kv3_ek/nspin,ni,nf,6,nspin_kt)); trm2=0.0d0
            if(ipri >= 2) then
               write(nfout,'(" way_BZintegral/=L_TETRAHEDRON, nrd_efermi/=0")')
               write(nfout,'(" trm2 = (",5i6,")")') kv3_ek/nspin,ni,nf,6,nspin_kt
            end if
         end if
       end if

       if(nrd_efermi==0) then
          ni = neg;  nf = neg
       else
          ni = num_vb;  nf = num_cb
       end if
! ==================================== KT_add ========== 13.0S
       if ( sw_corelevel_spectrum == ON ) ni = num_core_states
! ====================================================== 13.0S
       allocate(os_str(kv3_ek,ni,nf,6)); os_str=-1.0d308

     end subroutine alloc_arrays_for_tmsq

     subroutine dealloc_trm
       deallocate(trm)
     end subroutine dealloc_trm

     subroutine dealloc_rptrans
       deallocate(rtrans);  deallocate(ptrans)

! ======================================= Added by K. Tagami =========== 0.2
       if ( nonlocal ==1 ) then
          if ( sw_Hubbard == ON .and. sw_tm_hubbard_correction == ON ) then
             deallocate( rtrans_hub )
          endif
       endif
! ======================================================================

       if(nppcorr==2)  deallocate(ilocal_l)

! ================ KT_mod =========================== 13.0S
!       if(nppcorr>=1)  then
!          deallocate(nppc_data);   deallocate(dipole_dxyz_us)
!          deallocate(phase_ylm1);  deallocate(phase_ylm2)
!          deallocate(dipole_tau1); deallocate(dipole_tau2)
!       end if
!
       if ( sw_corelevel_spectrum == ON ) then
          if(nppcorr>=1)  then
             call m_CLS_dealloc_dipole_core2val
          endif
       else
          if(nppcorr>=1)  then
#if 0
             deallocate(nppc_data);   deallocate(dipole_dxyz_us)
             deallocate(phase_ylm1);  deallocate(phase_ylm2)
             deallocate(dipole_tau1); deallocate(dipole_tau2)
#else
             call m_VBS_dealloc_dipole_ppc
#endif
          end if
       endif
! =================================================== 13.0S

     end subroutine dealloc_rptrans

     subroutine dealloc_arrays_for_tmsq
       deallocate(os_str)
     end subroutine dealloc_arrays_for_tmsq

! ======= KT_add ========= 13.0S
     subroutine adjust_erange
       emin_core_spectrum = efermi - maxval( ene_core_states ) +scissor
       e = e + emin_core_spectrum 
     end subroutine adjust_erange
! ======================== 13.0S

 end subroutine calc_tm_square_eps_ek

 subroutine BZintegration_eps
    integer :: id_sname = -1

    if(sw_epsilon==0) return
    if(ndrude==2) return

    call tstatc0_begin('BZintegration_eps ',id_sname)

    imeps = 0.0d0

    if (way_BZintegral==PARABOLIC_B.or.way_BZintegral==GAUSSIAN_B) then
      if(printable) then
         if(way_BZintegral==PARABOLIC_B) write(nfout,'(1x,"<< UVSOR-Epsilon  PARABOLIC BROADNING START  >>")')
         if(way_BZintegral==GAUSSIAN_B) write(nfout,'(1x,"<< UVSOR-Epsilon  GAUSSIAN BROADNING START  >>")')
      end if

!     call smearing_method(nfout,nstep)
!     call smearing_method_mpi(nfout,nstep)
      call smearing_method_mpi2(nfout,nstep)

      if(printable) then
         if(way_BZintegral==PARABOLIC_B) write(nfout,'(1x,"<< UVSOR-Epsilon  PARABOLIC BROADNING END   >>"/)')
         if(way_BZintegral==GAUSSIAN_B) write(nfout,'(1x,"<< UVSOR-Epsilon  GAUSSIAN BROADNING END  >>"/)')
      end if
    end if

    if(way_BZintegral==L_TETRAHEDRON) then
       if(ipri >= 1) write(nfout,'(1x,"<< UVSOR-Epsilon   LINEAR TETRAHEDRON INTEGRATION START >>")')

       call linear_tetrahedron(nfout,nstep)

       if(ipri >= 2) write(nfout,'(1x,"<< UVSOR-Epsilon   LINEAR TETRAHEDRON INTEGRATION END >>"/)')
    end if

    if(nsym/=0) call full_bz_int(nstep,nsym)
    if(crystal==POLYCRYSTAL) call calc_imeps_for_poly
    call tstatc0_end(id_sname)
 end subroutine BZintegration_eps

 subroutine kkt_eps
    integer :: id_sname = -1
    if(sw_epsilon==0) return
    call tstatc0_begin('kkt_eps ',id_sname)
    if(ndrude/=2) then
       if(printable) &
       & write(nfout,'(/1x,"<< UVSOR-Epsilon   KRAMERS-KRONIG TRANSFORMATION START>>")')
!      call kkt_v(e_step,nstep)
       call kkt_v_mpi(e_step,nstep)
       call off_diagonal(nstep)
       if(printable) &
       & write(nfout,'(1x,"<< UVSOR-Epsilon   KRAMERS-KRONIG TRANSFORMATION END >>"/)')
    end if
    call tstatc0_end(id_sname)
 end subroutine kkt_eps

 subroutine calc_drude_eps
    integer :: id_sname = -1
    if(sw_epsilon==0) return
    call tstatc0_begin('calc_drude_eps ',id_sname)
    if(system==METALLIC.and.ndrude/=0) then
       call alloc_arrays_for_drude
       call drude_term_eps
       call calc_drude
       call sum_eps_plus_drude
    end if
    call tstatc0_end(id_sname)
    contains
     subroutine alloc_arrays_for_drude
       allocate(i_drude(nstep)); i_drude=0.0d0
       allocate(r_drude(nstep)); r_drude=0.0d0
     end subroutine alloc_arrays_for_drude
 end subroutine calc_drude_eps

 subroutine optics_eps
    integer :: id_sname = -1
    if(sw_epsilon==0) return
    call tstatc0_begin('optics_eps ',id_sname)
    if(ptype>=0) then
       call eps_for_photon(nfout,nstep,u,p,ptype)
       if(printable) &
       & write(nfout,'(1x,"<< UVSOR-Epsilon   OPTICAL PROPERTIES CALLCULATION START >>")')
       call alloc_arrays_for_optics
       call optics(nstep,1)
       if(printable) &
       & write(nfout,'(1x,"<< UVSOR-Epsilon   OPTICAL PROPERTIES CALCULATION END >>"/)')
    end if
    call tstatc0_end(id_sname)
    contains
     subroutine alloc_arrays_for_optics
        allocate(refr(nstep)); refr=0.0d0
        allocate(refi(nstep)); refi=0.0d0
        allocate(absc(nstep)); absc=0.0d0
        allocate(reflc(nstep)); reflc=0.0d0
     end subroutine alloc_arrays_for_optics
 end subroutine optics_eps

! =============== KT_add ================= 13.0S
 subroutine corelevel_eps
    integer :: id_sname = -1

   if(sw_epsilon==0) return
   call tstatc0_begin('corelevel_eps ',id_sname)
!!   call eps_for_corelevel_spectrum(nfout,nstep,vec_q)
   call eps_for_corelevel_spectrum(nfout,nstep,u)        ! only for XANES

   call tstatc0_end(id_sname)

 end subroutine corelevel_eps
! ======================================== 13.0S

 subroutine wd_eps
    if(sw_epsilon==0) return
    if(printable) &
    & write(nfout,'(1x,"<< UVSOR-Epsilon   FINALIAZTION STARTS >>")')

! ============ KT_mod ================= 13.0S
!    call eps_out(nfout,nstep,ptype)
!
    if ( sw_corelevel_spectrum == ON ) then
       call eps_out_corelevel_spectrum(nfout,nstep)
    else
       call eps_out(nfout,nstep,ptype)
    endif
! ===================================== 13.0S

    if(nlo/=0) call nlo_out(nfout,nstep)
    if(magneto_optical/=0) call magopt_out(nfout)

    if(printable) write(nfout,'(1x,"<< UVSOR-Epsilon   FINALIZATION END >>")')
 end subroutine wd_eps
  
 subroutine alloc_m_Epsilon1_ek
    implicit none
    integer :: nspin_kt
!
!   subroutine allocating arrays
!
    call read_kmesh_from_nfinp  !! necessary for both tetrahedron !!
                                !! and parabolic broadening case  !!
    if(m_CtrlP_way_of_smearing() /= TETRAHEDRON)  then
       allocate(nxyz_tetra(3)) ; nxyz_tetra = 0
       if(printable) &
       & write(nfout,*) m_CtrlP_way_of_smearing()
       call get_cub_data  ! -> np0,np2,ip20,nxyz_tetra,ip2cub
    end if

! === KT_add ==== 2014/09/22
    if ( noncol ) then
       nspin_kt = 1
    else
       nspin_kt = nspin
    endif
! ================ 2014/09/22

! ==== KT_mod ====== 2014/09/22
!    allocate(edmax_nspin(nspin)); edmax_nspin = 0.0d0
!    allocate(edmin_nspin(nspin)); edmin_nspin = 0.0d0
!    allocate(spin_charge(nspin)); spin_charge=0.0d0
!
    allocate(edmax_nspin(nspin_kt)); edmax_nspin = 0.0d0
    allocate(edmin_nspin(nspin_kt)); edmin_nspin = 0.0d0
    allocate(spin_charge(nspin_kt)); spin_charge=0.0d0
! ================== 2014/09/22

    allocate(n2_mpi(neg,kv3)) ; n2_mpi=0.0d0
    allocate(e2_mpi(neg,kv3)) ; e2_mpi=0.0d0

! ==== KT_mod ==== 13.1R
!    if(icond==2) then
    if(icond<=2) then
       allocate(n2_mpi_ek(neg,kv3_ek)); n2_mpi_ek=0
    end if
! ================ 13.1R

    allocate(occ_mpi_ek(neg,kv3_ek)); occ_mpi_ek=0.0d0
    allocate(PP_norm_type(ntyp))  ; PP_norm_type = 0
    allocate(PP_local_type(ntyp)) ; PP_local_type =0

! ==== KT_mod ====== 2014/09/22
!    allocate(band_type(neg,nspin)); band_type=0
!
    allocate(band_type(neg,nspin_kt)); band_type=0
! ================== 2014/09/22

    if(icond<=2) then
       allocate(eb_ek(kv3_ek,neg)); eb_ek=0.0d0
       allocate(eb_ek_tmp(kv3,neg)); eb_ek_tmp=0.0d0
    end if

    if(way_BZintegral == L_TETRAHEDRON) then
        allocate(map_k_eps(np0)); map_k_eps =0
    endif
    if(way_BZintegral /= L_TETRAHEDRON) then
        allocate(map_k_eps(kv3_ek/nspin)); map_k_eps =0
    endif

    if(ndrude==2) return


! ====== KT_mod ==== 13.1R
!    if(icond ==2) then
    if(icond <=2) then
       allocate(ind_vb(neg,kv3_ek)); ind_vb=0
       allocate(ind_cb(neg,kv3_ek)); ind_cb=0
    end if
! ================== 13.1R

    allocate(ind_vb2(neg,kv3_ek)); ind_vb2=0
    allocate(ind_cb2(neg,kv3_ek)); ind_cb2=0
    if(ikshift/=0.0d0) then
       allocate(vkxyz_ek_org(kv3_ek,3,2)); vkxyz_ek_org = 0.0d0
    end if
 end subroutine alloc_m_Epsilon1_ek

 subroutine alloc_m_Epsilon2(nstep)
!
!   subroutine allocating arrays for dielectric fuction
!   and subsceptibilities
!
    implicit none
    integer,intent(in) :: nstep
    allocate(imeps(nstep,6)); imeps=0.0d0
    allocate(reps(nstep,6)); reps=0.0d0
    if(nlo==1) then
       allocate(imchi2(nstep,18)); imchi2 = 0.0d0
       allocate(rechi2(nstep,18)); rechi2 = 0.0d0
    end if
    if(nlo==2) then
       allocate(imchi3(nstep,30)); imchi3 = 0.0d0
       allocate(rechi3(nstep,30)); rechi3 = 0.0d0
       allocate(chi3int(nstep,30)); chi3int =0.0d0
    end if
    if(magneto_optical==1) then
       allocate(optcr_l(nstep,2)); optcr_l = 0.0d0
       allocate(optci_l(nstep,2)); optci_l = 0.0d0
       allocate(kerr_rotation(nstep)); kerr_rotation = 0.0d0
       allocate(kerr_ellipticity(nstep)); kerr_ellipticity = 0.0d0
    end if
 end subroutine alloc_m_Epsilon2

 subroutine dealloc_m_Epsilon
!
!  subroutine deallocating data arrays
!
    implicit none
    if(sw_epsilon==0) return

! == KT_add === 13.1R
    if ( m_CtrlP_way_of_smearing() /= TETRAHEDRON )  then
       if ( allocated(nxyz_tetra) ) deallocate( nxyz_tetra )
    endif
! ============= 13.1R

    deallocate(edmax_nspin);   deallocate(edmin_nspin)
    deallocate(spin_charge)
    deallocate(n2_mpi);     deallocate(e2_mpi);   deallocate(n2_mpi_ek)
    deallocate(occ_mpi_ek)
    deallocate(band_type); deallocate(eb_ek)

    if(ikshift/=0.0d0) deallocate(vkxyz_ek_org)
    deallocate(map_k_eps)

    if(ndrude/=2) then
       deallocate(ind_vb); deallocate(ind_cb); deallocate(ind_vb2); deallocate(ind_cb2)
       if(way_BZintegral==L_TETRAHEDRON) then
          deallocate(vk00xyz); deallocate(vk0xyz)
          deallocate(vk0_op);  deallocate(nopr_k);  deallocate(op_k)
       end if
       deallocate(trm2)
    end if

    deallocate(e);  deallocate(imeps);   deallocate(reps)

! ========== KT_mod ============== 13.0S
!    if(ptype/=-1) call dealloc_optics_data_array
!
    if ( sw_corelevel_spectrum /= ON ) then
       if(ptype/=-1) call dealloc_optics_data_array
    endif
!=============================== 13.0S

    if(nlo/=0) call dealloc_nlo_data_array
    if(magneto_optical /=0) call dealloc_magopt_data_array
    if(ndrude>0) call dealloc_drude_data_array

! ========= KT_add ============= 13.0S
    if ( sw_corelevel_spectrum == ON ) then
       call m_CLS_dealloc_core_ae_wfns
       call m_CLS_dealloc_wfn_core_states
    endif
! ============================== 13.0S
    contains

     subroutine dealloc_drude_data_array
       deallocate(i_drude);    deallocate(r_drude)
     end subroutine dealloc_drude_data_array

     subroutine dealloc_optics_data_array
       deallocate(refr);  deallocate(refi); deallocate(absc);  deallocate(reflc)
     end subroutine dealloc_optics_data_array

     subroutine dealloc_nlo_data_array
       if(nlo==1) then
          deallocate(imchi2);  deallocate(rechi2)
       else if(nlo==2) then
          deallocate(imchi3);  deallocate(rechi3)
       end if
     end subroutine dealloc_nlo_data_array

     subroutine dealloc_magopt_data_array
       deallocate(optcr_l);  deallocate(optci_l)
       deallocate(kerr_rotation); deallocate(kerr_ellipticity)
     end subroutine dealloc_magopt_data_array

 end subroutine dealloc_m_Epsilon 

 subroutine m_CtrlP_rd_epsilon(nfout)
!
! subroutine reading tag_efermi in F_INP file
!

! This subroutine sets following parameters
!   * calculation control parameters
!      sw_epsilon   : switch for epsilon calculation:
!                     = on  or 1  calculate
!                     = off or 0  not calculate
!   * fermi level parameters
!      read_efermi  : Fermi level read option
!                     = on  or 1  read Fermi level (only for semiconductor or insulator)
!                     = off or 0  calculate Fermi level
!      efermi       : Fermi level
!   * crystal type parameters
!      crystal type : crystal type option
!                     = single    single crystal (default)
!                     = poly      polycrystal
!   * photon parameters
!      ux           : x component of polarization vector
!      uy           : y component of
!      uz           : z component of
!      px           : x component of pointing vector
!      py           : y component of
!      pz           : z component of
!      e_low        : lower limit of photon energy
!      e_high       : higher limit of photon energy
!      e_step       : step of photon energy
!   * transition moment paramerters
!      type         : transition moment type
!                     = l         local transition moment(default)
!                     = rn        Read and Needs (RN) type transition moment (Physical Review B vol.44, 13071 (1991))
!                     = ks        Kageshima and Shiraishi (KS) type transition moment (Physical Review  Bvol 56, 14985 (1997))
!                                 original KS type transition moment for BHS type pseudopotentials
!                                 modified KS type transition moment fot Troullier-Martin type pseudopotentials
!      delq         : delta q valule for <phi1[Vnl,r]phi2> calculation (RN case)
!                     see Physical Review B vol.62, 4383 (2000) for details
!      nsym         : transition moment symmetrization option
!                     = on        symmetrize
!                     = off       not symmetrize
!   * effective mass parameters
!      sw_mass      : mass calculation
!                     = off        no effective mass calculation
!                     = on         calcuate mass
!      nx           : x direction index of effective mass
!      ny           : y  
!      nz           : z 
!      shift        : ikshift value
!                     default =0.0 d0
!      point        : k-point specification
!                     = band_edge  calculate mass at conduction and valence band edges
!                     = input      calculate mass of ib-th band at ik-th k-point
!      ik           : k-point index
!      ib           : band index
!   * Brillouin zone integration parameters
!      method       : method of Brillouin zone integration
!                     = tetrahedron(t) linear tetrahedron method
!                     = parabolic(p)   parabolic smearing method(default)
!                     = gaussian(g)    gaussian smearing method
!      tetra_eps    : eps value for linear tetrahedron calculation (see nsdos0_m)
!                     defalt =1.0d-4 Hartree
!      width        : smearing width for parabolic and smearing method
!      spin         : spin option for Brulloiun zone integration
!                     = major      integrate major spin transition moment
!                     = minor      integrate mainor spin transition moment
!                     = both       integrate both major and minor spin transition meoment (default)
!      band_i       : valance band index
!      band_f       : conduction band index
!                     ** band_i and band_f parameters are for band decompostion analysis
!                        and should be specifiled in pair
!   * band gap correction option parameters
!      scissor_operator : scissor operator value (only for insulators and semiconductors)
!                         default value =0.0
!   * Drude parameters
!      drude        : drude calculation option
!                     = on        include Drude term (only for metals)
!                     = off       not include Drude term (default)
!                     = only      calculate Drude term only
!      conductivity : DC conductivity
!    damping factor : Drude damping factor
!  plasma_frequency : plasma frequency
!    effective_mass : effective mass pf free electrons
!
!   * nonlinear optics(NLO)
!      process      : NLO calculation option
!                     = off       does not calculate non linear optical susceptibilities
!                     = SHG       calculate Second Harmonic Generation (SHG) Susceptibilitiies
!                     = THG       calculate Third Harmonic Generation (THG) Susceptibilities
!                     defalult = off
!      excitation   : virtual excitation type
!                     = all       include virtual excitation processes
!                     = electron  include virtual electron excitation processes only
!                     = hole      include virtual hole excitation processes only
!                     = three_state include virtual three level excitation processes only
!                     default = all
!      band         : band excitation type
!                     = all       include all band excitations (inter + intra-band excitations)
!                     = inter     include inter-band excitations only
!                     = intra     include intra-band excitations only
!                     default = all
!      term         : frequency term
!                     = all       include omega and omega2 term
!                     = omega     include omega term only
!                     = omega2    include omega2 term only
!                     = omega3    include omega3 term only
!                     default = all
!      method       : double resonance method
!                     = omit (default)  omit double resonance(dres) term
!                     = damping         damp dres term
!                     defalt = omit
!      cut_off      : cut-off energy for omittion and damping 
!                     default = 10.0d-3 Hartree
!      smearing_fact: SHG/THG moment condition for smearing calculation
!                     = resonance resonant moment is used
!                     = off_resonance non-resonant moment is used
!                     default = off_resonance
!
!    ** caution **
!      spin option      -> same as those set for the Brillouin zone integration parameter
!
!   * magneto optics
!      magopt       : magneto optics calculatoon
!                     = off       does not calculate magneto optical properties
!                     = on        calculate magneto optical properties (Kerr rotation and Kerr ellipticity)
!
!   * print option
!      ipriepsilon  : print level
!                     = 0         brief
!                     = 1         standard
!                     = 2         detail
!                     = 3         debug
!
    integer, intent(in) :: nfout
    character(len=FMAXVALLEN) :: rstr
    integer :: iret, f_selectBlock, f_getStringValue, f_getRealValue, f_getIntValue
    integer :: f_selectParentBlock, f_selectTop
    real(kind=DP) :: dret
!   ---------- epsilon ----------
    character(len("epsilon")),parameter                    ::    tag_epsilon                    = "epsilon"
    character(len("sw_epsilon")), parameter                ::    tag_sw_epsilon                 = "sw_epsilon"
    character(len("on")), parameter                        ::    tag_on                         = "on"
    character(len("off")), parameter                       ::    tag_off                        = "off"
    character(len("destart")),parameter                    ::    tag_restart                    = "restart"
    character(len("trm_file")),parameter                   ::    tag_trm_file                   = "trm_file"
    character(len("wf_file")),parameter                    ::    tag_wf_file                    = "wf_file"
! crystal type
    character(len("crystal_type")), parameter              ::    tag_crystal_type               = "crystal_type"
    character(len("single")), parameter                    ::    tag_single_crystal             = "single"
    character(len("poly")), parameter                      ::    tag_poly_crystal               = "poly"
! fermi energy
    character(len("fermi_energy")), parameter              ::    tag_fermi_energy               = "fermi_energy"
    character(len("read_efermi")), parameter               ::    tag_read_efermi                = "read_efermi"
    character(len("efermi")),parameter                     ::    tag_efermi                     = "efermi"
! photon
    character(len("photon")),parameter                     ::    tag_photon                     = "photon"
    character(len("polar")),parameter                      ::    tag_polar                      = "polar"
    character(len("ux")),parameter                         ::    tag_ux                         = "ux"
    character(len("uy")),parameter                         ::    tag_uy                         = "uy"
    character(len("uz")),parameter                         ::    tag_uz                         = "uz"
    character(len("pointing")),parameter                   ::    tag_pointing                   = "pointing"
    character(len("px")),parameter                         ::    tag_px                         = "px"
    character(len("py")),parameter                         ::    tag_py                         = "py"
    character(len("pz")),parameter                         ::    tag_pz                         = "pz"
    character(len("energy")),parameter                     ::    tag_energy                     = "energy"
    character(len("low")), parameter                       ::    tag_low                        = "low"
    character(len("high")),parameter                       ::    tag_high                       = "high"
    character(len("step")),parameter                       ::    tag_step                       = "step"
! transition moment
    character(len("transition_moment")),parameter          ::    tag_transition_moment          = "transition_moment"
    character(len("check_option")), parameter              ::    tag_check_option               = "check_option"
    character(len("type")),parameter                       ::    tag_type                       = "type"
    character(len("band_i")), parameter                    ::    tag_band_i                     = "band_i"
    character(len("band_f")), parameter                    ::    tag_band_f                     = "band_f"
    character(len("local")),parameter                      ::    tag_local                      = "local"
    character(len("l")),parameter                          ::    tag_l                          = "l"
    character(len("read_needs"))                           ::    tag_read_needs                 = "read_needs"
    character(len("rn")),parameter                         ::    tag_rn                         = "rn"
    character(len("kageshima_shiraishi"))                  ::    tag_kageshima_shiraishi        = "kageshima_shiraishi"
    character(len("ks")),parameter                         ::    tag_ks                         = "ks"
    character(len("symmetry"))                             ::    tag_symmetry                   = "symmetry"
    character(len("delq")),parameter                       ::    tag_delq                       = "delq"
! mass
    character(len("mass")),parameter                       ::    tag_mass                       = "mass"
    character(len("sw_mass")),parameter                    ::    tag_sw_mass                    = "sw_mass"
    character(len("direction")), parameter                 ::    tag_direction                  = "direction"
    character(len("nx")),parameter                         ::    tag_nx                         = "nx"
    character(len("ny")),parameter                         ::    tag_ny                         = "ny"
    character(len("nz")),parameter                         ::    tag_nz                         = "nz"
    character(len("point")),parameter                      ::    tag_point                      = "point"
    character(len("shift")),parameter                      ::    tag_shift                      = "shift"
    character(len("band_edge")),parameter                  ::    tag_band_edge                  = "band_edge"
    character(len("input")),parameter                      ::    tag_input                      = "input"
    character(len("ik")),parameter                         ::    tag_ik                         = "ik"
    character(len("ib")),parameter                         ::    tag_ib                         = "ib"
! BZ integration
    character(len("BZ_integration")), parameter            ::    tag_BZ_integration             = "BZ_integration"
    character(len("method")), parameter                    ::    tag_method                     = "method"
    character(len("tetrahedron")), parameter               ::    tag_tetrahedron                = "tetrahedron"
    character(len("t"))                                    ::    tag_t                          = "t"
    character(len("parabolic")), parameter                 ::    tag_parabolic                  = "parabolic"
    character(len("p")), parameter                         ::    tag_p                          = "p"
    character(len("gaussian")), parameter                  ::    tag_gaussian                   = "gaussian"
    character(len("g")), parameter                         ::    tag_g                          = "g"
    character(len("width")), parameter                     ::    tag_width                      = "width"
    character(len("nistep")), parameter                    ::    tag_nistep                     = "nistep"
    character(len("spin")),  parameter                     ::    tag_spin                       = "spin"
    character(len("both")),  parameter                     ::    tag_both                       = "both"
    character(len("major")), parameter                     ::    tag_major                      = "major"
    character(len("minor")), parameter                     ::    tag_minor                      = "minor"
    character(len("tetra_eps")),parameter                  ::    tag_tetra_eps                  = "tetra_eps"
! band gap correction
    character(len("band_gap_correction")), parameter       ::    tag_band_gap_correction        = "band_gap_correction"
    character(len("scissor_operator")), parameter          ::    tag_scissor                    = "scissor_operator"
! Drude term
    character(len("drude_term")), parameter                ::    tag_drude_term                 = "drude_term"
    character(len("drude")), parameter                     ::    tag_drude                      = "drude"
    character(len("drude_only")),parameter                 ::    tag_drude_only                 = "drude_only"
    character(len("conductivity")), parameter              ::    tag_conductivity               = "conductivity"
    character(len("damping_factor")), parameter            ::    tag_damping_factor             = "damping_factor"
    character(len("plasma_frequency")), parameter          ::    tag_plasma_frequency           = "plasma_frequency"
    character(len("effective_mass")), parameter            ::    tag_effective_mass             = "effective_mass"
! nonlinear optics
    character(len("nonlinear_optics")), parameter          ::    tag_nonlinear_optics           = "nonlinear_optics"
    character(len("process")), parameter                   ::    tag_process                    = "process"
    character(len("SHG")), parameter                       ::    tag_SHG                        = "SHG"
    character(len("THG")), parameter                       ::    tag_THG                        = "THG"
    character(len("excitation")), parameter                ::    tag_excitation                 = "excitation"
    character(len("all")), parameter                       ::    tag_all                        = "all"
    character(len("electron")), parameter                  ::    tag_electron                   = "electron"
    character(len("hole")), parameter                      ::    tag_hole                       = "hole"
    character(len("three_state")), parameter               ::    tag_three_state                = "three_state"
    character(len("band")), parameter                      ::    tag_band                       = "band"
    character(len("inter")), parameter                     ::    tag_inter                      = "inter"
    character(len("intra")), parameter                     ::    tag_intra                      = "intra"
    character(len("term")), parameter                      ::    tag_term                       = "term"
    character(len("omega")), parameter                     ::    tag_omega                      = "omega"
    character(len("2omega")), parameter                    ::    tag_2omega                     = "2omega"
    character(len("3omega")), parameter                    ::    tag_3omega                     = "3omega"
    character(len("dble_resonance")), parameter            ::    tag_dble_resonance             = "dble_resonance"
    character(len("omit")), parameter                      ::    tag_omit                       = "omit"
    character(len("damping")), parameter                   ::    tag_damping                    = "damping"
    character(len("cut_off")), parameter                   ::    tag_cut_off                    = "cut_off"
    character(len("smearing_fact")), parameter             ::    tag_smearing_fact              = "smearing_fact"
    character(len("resonance")), parameter                 ::    tag_resonance                  = "resonance"
    character(len("off_resonance")), parameter             ::    tag_off_resonance              = "off_resonance"
! magneto optics
    character(len("magneto_optical")),parameter            ::    tag_magneto_optical            = "magneto_optical"
    character(len("magopt")),parameter                     ::    tag_magopt                     = "magopt"
! print option
    character(len("ipriepsilon")), parameter               ::    tag_ipriepsilon                = "ipriepsilon"

! ============================== Added by K. Tagami =========== 0.2
    character(len("sw_tm_hubbard_correction")), parameter      ::  &
   &       tag_sw_tm_hubbard_correction                = "sw_tm_hubbard_correction"
! ========================================================

! ========== KT_add =========== 13.0R
    character(len("sw_trm_print_full")), parameter         :: &
         &           tag_sw_trm_print_full             = "sw_trm_print_full"
    character(len("sw_wf_rspace_print_full")), parameter         :: &
         &           tag_sw_wf_rspace_print_full       = "sw_wf_rspace_print_full"
    character(len("delta_omega")), parameter         :: &
         &           tag_delta_omega             = "delta_omega"
! ============================= 13.0R

! ========== KT_add =========== 2015/01/17
    character(len("sw_scissor_renormalization")), parameter         :: &
         &           tag_sw_scissor_renormalization   = "sw_scissor_renormalization"
! ============================= 2015/01/17

! EELS
    character(len("eels")),parameter  ::  tag_eels = "eels"
    character(len("momentum")),parameter  ::  tag_momentum = "momentum"
    character(len("qx")),parameter        ::  tag_qx       = "qx"
    character(len("qy")),parameter        ::  tag_qy       = "qy"
    character(len("qz")),parameter        ::  tag_qz       = "qz"

!   ----------- epsilon ----------
    iret = f_selectTop()
    ! --- epsilon ---
    if(ipriinputfile >= 2 .and. printable) write(nfout,'(" !*  tag_epsilon")')
    if( f_selectBlock( tag_epsilon) == 0) then
       if( f_getStringValue( tag_sw_epsilon, rstr, LOWER) == 0) call set_sw_epsilon(rstr)
       if(sw_epsilon==0) return
! set resart mode
       if( f_getStringValue( tag_restart, rstr, LOWER) == 0) call set_restart(rstr)
! read crystal type
       if( f_getStringValue( tag_crystal_type, rstr, LOWER) == 0) then
          call set_crystal_type(rstr)
       else
          if(printable) then
             write(nfout,'(1x,"!* tag_crystal_type is not found in the input file <<CtrlP_rd_epsilon >>")')
             write(nfout,'(1x,"!* crystal_type is set to be single crystal as default")')
          end if
       end if
! read fermi energy
       if(icond == 2 .or. icond ==3 ) then
          if( f_selectBlock( tag_fermi_energy) == 0) then
             if( f_getStringValue( tag_read_efermi, rstr, LOWER) == 0) call set_efermi(rstr)
          else
             stop ' tag_fermi_energy is not given in the input file <<CtrlP_rd_epsilon >> '
          end if
          iret = f_selectParentBlock()
       end if

! ================== KT_add ======================== 13.0S
       call m_CLS_chk_sw_corelevel_spectrum

       if ( sw_corelevel_spectrum == ON ) then
          call m_CLS_rd_n_main
          u = vec_q
          e_low = e_low_CLS;    e_high = e_high_CLS;     e_step = e_step_CLS
       endif

       if ( f_selectBlock( tag_eels ) == 0 ) then
          eels_mode = ON
          If ( f_selectBlock( tag_momentum ) == 0 ) then
             call set_momentum_transfer
             u = vec_q
             iret = f_selectParentBlock()
          endif
          if( f_selectBlock( tag_energy) == 0) then
             call set_electron_energy
             iret = f_selectParentBlock()
          end if
          iret = f_selectParentBlock()
       endif
! ================================================== 13.0S

! read photon information
       if( f_selectBlock( tag_photon) == 0) then
          If( f_selectBlock( tag_polar) == 0) then
             call set_polarization_vector
             iret = f_selectParentBlock()
          end if
          if( f_selectBlock( tag_pointing) == 0) then
             call set_pointing_vector
             iret = f_selectParentBlock()
          end if
          if( f_selectBlock( tag_energy) == 0) then
             call set_photon_energy
             iret = f_selectParentBlock()
          end if
          iret = f_selectParentBlock()
       else
          if ( sw_corelevel_spectrum == OFF ) then
             stop ' tag_photon is not given in the input file <<CtrlP_rd_epsilon>> '
          endif
       end if

! read transition moment option
       if( f_selectBlock( tag_transition_moment) == 0) then
          if( f_getStringValue( tag_check_option, rstr, LOWER) == 0) call set_check_ts_option(rstr)
          if( f_getIntValue( tag_band_i, iret) == 0) band_i=iret
          if( f_getIntValue( tag_band_f, iret) == 0) band_f=iret
          if( f_getStringValue( tag_type, rstr, LOWER) == 0) call set_transition_moment_type(rstr)

! =================== Modified by K. Tagami ======================= 0.2
!          if(nonlocal==1) then
!             if( f_getRealValue( tag_delq, dret," ") == 0) delq=dret
!          end if
! ------------------------
          if( f_getIntValue( tag_sw_tm_hubbard_correction, iret) == 0)  sw_tm_hubbard_correction = iret
!
          if ( sw_hubbard == ON ) then
             write(nfout,*) "*** sw_tm_hubbard_correction = ", sw_tm_hubbard_correction
          endif
! ----
          if(nonlocal==1) then
             if( f_getRealValue( tag_delq, dret," ") == 0) delq=dret
          else if ( sw_hubbard == ON .and. sw_tm_hubbard_correction == ON ) then
            if( f_getRealValue( tag_delq, dret," ") == 0) delq=dret
          endif
! ===================================================================
          if( f_getStringValue( tag_symmetry, rstr, LOWER) == 0) call set_transition_moment_symmetry(rstr)
          iret = f_selectParentBlock()
       else
          stop ' tag_transition_moment is not given in the input file <<CtrlP_rd_epsilon>> '
       end if
! read mass calculation option
       if( f_selectBlock(tag_mass) == 0) then
          if( f_getStringValue( tag_sw_mass, rstr, LOWER) == 0) call set_sw_mass(rstr)
             if(sw_mass/=0) then
                if( f_selectBlock( tag_direction) == 0) then
                   call set_mass_direction
                   iret = f_selectParentBlock()
                end if
                if( f_getStringValue( tag_point, rstr, LOWER) == 0) call set_mass_kpoint(rstr)
                if( f_getRealValue( tag_shift, dret," ") == 0) ikshift=dret
                if(mass_kpoint ==1) then
                   if( f_getIntValue( tag_ik, iret) == 0) mass_ik=iret
                   if( f_getIntValue( tag_ib, iret) == 0) mass_ib=iret
                end if
          end if
          iret = f_selectParentBlock()
       end if
! read Brillouin zone integration option
       if(f_selectBlock(tag_BZ_integration) == 0) then
          if( f_getStringValue( tag_method, rstr, LOWER) == 0) call set_integration_method(rstr)
          if(way_BZintegral==PARABOLIC_B.or.way_BZintegral==GAUSSIAN_B) then
             if( f_getRealValue( tag_width, dret, 'hartree' ) == 0) width=dret
          end if

          if(way_BZintegral==L_TETRAHEDRON) then
             if( f_getIntValue( tag_nistep, iret) == 0) nistep=iret
             if( f_getRealValue( tag_tetra_eps, dret, 'hartree' ) == 0) tetra_eps=dret
          end if
          if( f_getStringValue( tag_spin, rstr, LOWER) == 0) call set_spin(rstr)
          iret = f_selectParentBlock()
       end if
! read band gap correction option
       if(f_selectBlock(tag_band_gap_correction) == 0) then
          call set_band_gap_correction
! ==== KT_add === 2015/01/17
          if( f_getStringValue( tag_sw_scissor_renormalization, rstr, LOWER) == 0) then
             call set_sw_scissor_renormalization( rstr )
          endif
! =============== 2015/01/17
          iret = f_selectParentBlock()
       end if
! read Drude term option
       if(f_selectBlock(tag_drude_term) == 0) then
          if( f_getStringValue( tag_drude, rstr, LOWER) == 0) call set_drude_option(rstr)
          call set_drude_term
          iret = f_selectParentBlock()
       end if
! read NLO option
       if(f_selectBlock(tag_nonlinear_optics) == 0) then
          if( f_getStringValue( tag_process, rstr, LOWER) == 0) call set_NLO_option(rstr)
          if( f_getStringValue( tag_excitation, rstr, LOWER) == 0) call set_NLO_excitation(rstr)
          if( f_getStringValue( tag_band, rstr, LOWER) == 0) call set_NLO_band(rstr)
          if( f_getStringValue( tag_term, rstr, LOWER) == 0) call set_NLO_term(rstr)
          if( f_selectBlock( tag_dble_resonance) == 0) then
             if( f_getStringValue( tag_method, rstr, LOWER) == 0) call set_dres_method(rstr)
             if( f_getRealValue( tag_cut_off, dret," ") == 0) dres_cut_off=dret
             iret = f_selectParentBlock()
          end if
          if( f_getStringValue( tag_smearing_fact, rstr, LOWER) == 0) call set_NLO_smearing_fact(rstr)
          iret = f_selectParentBlock()
       end if
! read magneto optical option
!      if(f_selectBlock(tag_magneto_optical) == 0) then
!          if( f_getStringValue( tag_magopt, rstr, LOWER) == 0) call set_magopt_option(rstr)
!      end if
! read print option
       call set_ipriepsilon

! === KT_add === 13.0R
       if( f_getStringValue( tag_sw_trm_print_full, rstr, LOWER) == 0) then
          call set_sw_trm_print_full( rstr )
       endif
       if( f_getStringValue( tag_sw_wf_rspace_print_full, rstr, LOWER) == 0) then
          call set_sw_wf_rspace_print_full( rstr )
       endif
       if( f_getRealValue( tag_delta_omega, dret, 'hartree' ) == 0) then
          delta_omega = abs(dret)
          write(nfout,*) "** delta_omega is set to ", delta_omega
       endif
! ============== 13.0R

       iret = f_selectParentBlock()
    else
       if(printable) &
       & write(nfout,'("tag_epsilon is not given in the inputfile <<CtrlP_rd_epsilon>>")')
       sw_epsilon = 0
       return
    end if
    contains
     subroutine set_sw_epsilon(rstr)
       character(len=FMAXVALLEN),intent(in) :: rstr
       logical :: tf
       call strncmp0(tag_on, trim(rstr), tf)
       if(tf) then
          sw_epsilon=1
          goto 1001
       end if
       call strncmp0(tag_off, trim(rstr), tf)
       if(tf) then
          sw_epsilon=0
          goto 1001
       end if
       if(printable) &
       & write(nfout,'(" ! tag for sw_epsilon is not found <<m_CtrlP_rd_epsilon>> ")')
1001   continue
       if(ipriinputfile >= 2 .and. printable) write(nfout,'(" !* sw_epsilon = ",a10)') trim(rstr)
     end subroutine set_sw_epsilon

     subroutine set_restart(rstr)
       character(len=FMAXVALLEN),intent(in) :: rstr
       logical :: tf
       call strncmp0(tag_off, trim(rstr), tf)
       if(tf) then
          restart_mode=0
          goto 1001
       end if
       call strncmp0(tag_on, trim(rstr), tf)
       if(tf) then
          restart_mode=1
          goto 1001
       end if
       if(printable) &
       & write(nfout,'(" ! tag for restart is not found <<m_CtrlP_rd_epsilon>> ")')
1001   continue
       if(ipriinputfile >= 2 .and. printable) write(nfout,'(" !* restart = ",a10)') trim(rstr)
     end subroutine set_restart

     subroutine set_crystal_type(rstr)
       character(len=FMAXVALLEN),intent(in) :: rstr
       logical :: tf
       call strncmp0(tag_single_crystal, trim(rstr), tf)
       if(tf) then
          crystal = SINGLE_CRYSTAL
          goto 1001
       end if
       call strncmp0(tag_poly_crystal, trim(rstr), tf)
       if(tf) then
          crystal = POLYCRYSTAL
          goto 1001
       end if
       if(printable) &
       & write(nfout,'(1x,"!* crystal_type is set to be single crystal as default")')
       crystal = SINGLE_CRYSTAL
1001   if(ipriepsilon>=2 .and. printable) write(nfout,'(1x,"!* Crystal type = ",i3," 1: Single crystal; 2: polycrysta;")') crystal
     end subroutine set_crystal_type

     subroutine set_efermi(rstr)
       character(len=FMAXVALLEN),intent(in) :: rstr
       logical :: tf
       call strncmp0(tag_off, trim(rstr), tf)
       if(tf) then
          nrd_efermi=0
          goto 1001
       end if
       call strncmp0(tag_on, trim(rstr), tf)
       if(tf) then
          nrd_efermi=1
          if( f_getRealValue( tag_efermi, dret,'hartree') == 0) efermi=dret
          goto 1001
       end if
1001   if(ipriepsilon>=2 .and. printable) write(nfout,'(1x,"!* nrd_efermi = ",i3)') nrd_efermi
     end subroutine set_efermi

     subroutine set_polarization_vector
       if( f_getRealValue( tag_ux, dret," ") == 0) u(1)=dret
       if( f_getRealValue( tag_uy, dret," ") == 0) u(2)=dret
       if( f_getRealValue( tag_uz, dret," ") == 0) u(3)=dret
     end subroutine set_polarization_vector

     subroutine set_pointing_vector
       if( f_getRealValue( tag_px, dret," ") == 0) p(1)=dret
       if( f_getRealValue( tag_py, dret," ") == 0) p(2)=dret
       if( f_getRealValue( tag_pz, dret," ") == 0) p(3)=dret
     end subroutine set_pointing_vector

     subroutine set_photon_energy
       if( f_getRealValue( tag_low, dret, 'hartree') == 0)  e_low=dret
       if( f_getRealValue( tag_high, dret,'hartree') == 0) e_high=dret
       if( f_getRealValue( tag_step, dret,'hartree') == 0) e_step=dret
     end subroutine set_photon_energy

! ==== KT_add ===== 2015//08/04
     subroutine set_momentum_transfer
       if( f_getRealValue( tag_qx, dret," ") == 0) vec_q(1) = dret
       if( f_getRealValue( tag_qy, dret," ") == 0) vec_q(2) = dret
       if( f_getRealValue( tag_qz, dret," ") == 0) vec_q(3) = dret
     end subroutine set_momentum_transfer

     subroutine set_electron_energy
       if( f_getRealValue( tag_low, dret, 'hartree') == 0)  e_low=dret
       if( f_getRealValue( tag_high, dret,'hartree') == 0) e_high=dret
       if( f_getRealValue( tag_step, dret,'hartree') == 0) e_step=dret
     end subroutine set_electron_energy
! ================= 2015//08/04

     subroutine set_check_ts_option(rstr)
       character(len=FMAXVALLEN),intent(in) :: rstr
       logical :: tf
       call strncmp0(tag_on, trim(rstr), tf)
       if(tf) then
          n_check_ts=1
          goto 1001
       end if
       call strncmp0(tag_off, trim(rstr), tf)
       if(tf) then
          n_check_ts=0
          goto 1001
       end if
1001   continue
       if(printable.and.ipriinputfile >= 2) write(nfout,'(" !* transition moment check = ",a10)') trim(rstr)
     end subroutine set_check_ts_option

     subroutine set_transition_moment_type(rstr)
       character(len=FMAXVALLEN),intent(in) :: rstr
       logical :: tf
! set local transition moment
       call strncmp0(tag_local, trim(rstr), tf)
       if(tf) then
          goto 1001
       end if
       call strncmp0(tag_l, trim(rstr), tf)
       if(tf) then
          goto 1001
       end if
! set Reed and Needs transition moment
       call strncmp0(tag_read_needs,trim(rstr), tf)
       if(tf) then
          nonlocal=1
          goto 1001
       end if
       call strncmp0(tag_rn,trim(rstr), tf)
       if(tf) then
          nonlocal=1
          goto 1001
       end if
! set Kageshima Shiraishi transition moment
       call strncmp0(tag_kageshima_shiraishi,trim(rstr),tf)
       if(tf) then
          nppcorr=2
          goto 1001
       end if
       call strncmp0(tag_ks,trim(rstr),tf)
       if(tf) then
          nppcorr=2
          goto 1001
       end if
1001   continue
       if(ipriinputfile >= 2 .and. printable) write(nfout,'(" !* transition moment type = ",a10)') trim(rstr)
     end subroutine set_transition_moment_type

     subroutine set_sw_mass(rstr)
       character(len=FMAXVALLEN),intent(in) :: rstr
       logical :: tf
       call strncmp0(tag_on, trim(rstr), tf)
       if(tf) then
          sw_mass = 1
          goto 1001
       end if
       call strncmp0(tag_off, trim(rstr), tf)
       if(tf) then
          sw_mass = 0
          goto 1001
       end if
1001   continue
       if(ipriinputfile >= 2 .and. printable) write(nfout,'(" !*  sw_mass = ",a10)') trim(rstr)
     end subroutine set_sw_mass

     subroutine set_mass_direction
       if( f_getRealValue( tag_nx, dret," ") == 0) mass_dir(1)=dret
       if( f_getRealValue( tag_ny, dret," ") == 0) mass_dir(2)=dret
       if( f_getRealValue( tag_nz, dret," ") == 0) mass_dir(3)=dret
       if(mass_dir(1)**2+mass_dir(2)**2+mass_dir(3)**2 == 0.0d0) then
          mass_direction = 0
       else
          mass_direction = 1
       end if
     end subroutine set_mass_direction

     subroutine set_mass_kpoint(rstr)
       character(len=FMAXVALLEN),intent(in) :: rstr
       logical :: tf
       call strncmp0(tag_band_edge, trim(rstr), tf)
       if(tf) then
          mass_kpoint = 0
          goto 1001
       end if
       call strncmp0(tag_input, trim(rstr), tf)
       if(tf) then
          mass_kpoint = 1
          goto 1001
       end if
1001   continue
       if(ipriinputfile >= 2 .and. printable) write(nfout,'(" !* mass default option = ",a10)') trim(rstr)
     end subroutine set_mass_kpoint

     subroutine set_transition_moment_symmetry(rstr)
       character(len=FMAXVALLEN),intent(in) :: rstr
       logical :: tf
       call strncmp0(tag_on, trim(rstr), tf)
       if(tf) then
          nsym=1
          goto 1001
       end if
       call strncmp0(tag_off, trim(rstr), tf)
       if(tf) then
          nsym=0
          goto 1001
       end if
1001   continue
       if(ipriinputfile >= 2 .and. printable) write(nfout,'(" !* transition moment symmetry = ",a10)') trim(rstr)
     end subroutine set_transition_moment_symmetry

     subroutine set_integration_method(rstr)
       character(len=FMAXVALLEN),intent(in) :: rstr
       logical :: tf
       way_BZintegral = PARABOLIC_B

       call strncmp0(tag_parabolic, trim(rstr), tf)
       if(tf) then
          way_BZintegral = PARABOLIC_B
          goto 1001
       end if
       call strncmp0(tag_p, trim(rstr), tf)
       if(tf) then
          way_BZintegral = PARABOLIC_B
          goto 1001
       end if

       call strncmp0(tag_tetrahedron, trim(rstr), tf)
       if(tf) then
          way_BZintegral = L_TETRAHEDRON
          goto 1001
       end if
       call strncmp0(tag_t, trim(rstr), tf)
       if(tf) then
          way_BZintegral = L_TETRAHEDRON
          goto 1001
       end if

       call strncmp0(tag_gaussian,trim(rstr),tf)
       if(tf) then
          way_BZintegral = GAUSSIAN_B
          goto 1001
       end if
       call strncmp0(tag_g,trim(rstr),tf)
       if(tf) then
          way_BZintegral = GAUSSIAN_B
          goto 1001
       end if

       stop ' ! tag for BZ_integration method is invalid <<m_CtrlP_rd_epsilon>>'
1001   continue
       if(ipriinputfile >= 2 .and. printable) write(nfout,'(" !* Brillouin zone integration  method = ",a10)') trim(rstr)
     end subroutine set_integration_method

     subroutine set_spin(rstr)
       character(len=FMAXVALLEN),intent(in) :: rstr
       logical :: tf
       call strncmp0(tag_both, trim(rstr), tf)
       if(tf) then
          spin=BOTH
          goto 1001
       end if
       call strncmp0(tag_major, trim(rstr), tf)
       if(tf) then
          spin=MAJOR
          goto 1001
       end if
       call strncmp0(tag_minor, trim(rstr), tf)
       if(tf) then
          spin=MINOR
          goto 1001
       end if
       stop ' ! tag for spin is invalid <<m_CtrlP_rd_epsilon>>'
1001   continue
       if(ipriinputfile >= 2 .and. printable) write(nfout,'(" !* spin = ",a10)') trim(rstr)
     end subroutine set_spin

     subroutine set_band_gap_correction
       if( f_getRealValue( tag_scissor, dret, 'hartree') == 0) then
          scissor=dret
       else
          scissor=0.0d0
       end if
     end subroutine set_band_gap_correction

     subroutine set_drude_option(rstr)
       character(len=FMAXVALLEN),intent(in) :: rstr
       logical :: tf
       call strncmp0(tag_drude_only, trim(rstr), tf)
       if(tf) then
          ndrude=2
          goto 1001
       end if
       call strncmp0(tag_off, trim(rstr), tf)
       if(tf) then
          ndrude=0
          goto 1001
       end if
       call strncmp0(tag_on, trim(rstr), tf)
       if(tf) then
          ndrude=1
          goto 1001
       end if
       stop ' ! tag for Drude term is invalid <<m_CtrlP_rd_epsilon>>'
1001   continue
       if(ipriinputfile >= 2 .and. printable) write(nfout,'(" !* drude = ",a10)') trim(rstr)
     end subroutine set_drude_option

     subroutine set_drude_term
       if( f_getRealValue( tag_conductivity, dret," ") == 0) dc_conductivity=dret
       if( f_getRealValue( tag_damping_factor, dret,'hartree') == 0) drude_damping=dret
       if( f_getRealValue( tag_plasma_frequency, dret,'hartree') == 0) plasma_f=dret
       if( f_getRealValue( tag_effective_mass, dret," ") == 0) effective_m=dret
     end subroutine set_drude_term

     subroutine set_NLO_option(rstr)
       character(len=FMAXVALLEN),intent(in) :: rstr
       logical :: tf
       call strncmp0(tag_off, trim(rstr), tf)
       if(tf) then
          nlo = 0
          goto 1001
       end if
       call strncmp0(tag_SHG, trim(rstr), tf)
       if(tf) then
          nlo = 1
          goto 1001
       end if
       call strncmp0(tag_THG, trim(rstr), tf)
       if(tf) then
          nlo = 2
          goto 1001
       end if
       stop ' ! tag for NLO_option is invalid <<m_CtrlP_rd_epsilon>>'
1001   continue
       if(ipriinputfile >= 2 .and. printable) write(nfout,'(" !* process = ",a10)') trim(rstr)
     end subroutine set_NLO_option

     subroutine set_NLO_excitation(rstr)
       character(len=FMAXVALLEN),intent(in) :: rstr
       logical :: tf
       call strncmp0(tag_all, trim(rstr), tf)
       if(tf) then
          virt_ex_type = ALL_TYPE
          goto 1001
       end if
       call strncmp0(tag_electron, trim(rstr), tf)
       if(tf) then
          virt_ex_type = ELECTRON
          goto 1001
       end if
       call strncmp0(tag_hole, trim(rstr), tf)
       if(tf) then
          virt_ex_type = HOLE
          goto 1001
       end if
       call strncmp0(tag_three_state, trim(rstr), tf)
       if(tf) then
          virt_ex_type = THREE_LEVEL
          goto 1001
       end if
       stop ' ! tag for NLO_excitation is invalid <<m_CtrlP_rd_epsilon>>'
1001   continue
       if(ipriinputfile >= 2 .and. printable) write(nfout,'(" !* excitation = ",a10)') trim(rstr)
     end subroutine set_NLO_excitation

     subroutine set_NLO_band(rstr)
       character(len=FMAXVALLEN),intent(in) :: rstr
       logical :: tf
       call strncmp0(tag_all, trim(rstr), tf)
       if(tf) then
          nlo_band = ALL_BAND
          goto 1001
       end if
       call strncmp0(tag_inter, trim(rstr), tf)
       if(tf) then
          nlo_band = INTER_BAND
          goto 1001
       end if
       call strncmp0(tag_intra, trim(rstr), tf)
       if(tf) then
          nlo_band = INTRA_BAND
          goto 1001
       end if
       stop ' ! tag for NLO_band is invalid <<m_CtrlP_rd_epsilon>>'
1001   continue
       if(ipriinputfile >= 2 .and. printable) write(nfout,'(" !* band = ",a10)') trim(rstr)
     end subroutine set_NLO_band

     subroutine set_NLO_term(rstr)
       character(len=FMAXVALLEN),intent(in) :: rstr
       logical :: tf
       call strncmp0(tag_all, trim(rstr), tf)
       if(tf) then
          nlo_term = ALL_TERM
          goto 1001
       end if
       call strncmp0(tag_omega, trim(rstr), tf)
       if(tf) then
          nlo_term = OMEGA_TERM
          goto 1001
       end if
       call strncmp0(tag_2omega, trim(rstr), tf)
       if(tf) then
          nlo_term = OMEGA2_TERM
          goto 1001
       end if
       call strncmp0(tag_3omega, trim(rstr), tf)
       if(tf) then
          nlo_term = OMEGA3_TERM
          goto 1001
       end if
       stop ' ! tag for NLO_excitation is invalid <<m_CtrlP_rd_epsilon>>'
1001   continue
       if(ipriinputfile >= 2 .and. printable) write(nfout,'(" !* NLO term = ",a10)') trim(rstr)
     end subroutine set_NLO_term

     subroutine set_dres_method(rstr)
       character(len=FMAXVALLEN),intent(in) :: rstr
       logical :: tf
       call strncmp0(tag_omit, trim(rstr), tf)
       if(tf) then
          dres_method = OMIT
          goto 1001
       end if
       call strncmp0(tag_damping, trim(rstr), tf)
       if(tf) then
          dres_method = DAMPING
          goto 1001
       end if
       stop ' ! tag for NLO_dble_resonance option is invalid <<m_CtrlP_rd_epsilon>>'
1001   continue
       if(ipriinputfile >= 2 .and. printable) write(nfout,'(" !* dres_method = ",a10)') trim(rstr)
     end subroutine set_dres_method

     subroutine set_NLO_smearing_fact(rstr)
       character(len=FMAXVALLEN),intent(in) :: rstr
       logical :: tf
       call strncmp0(tag_resonance, trim(rstr), tf)
       if(tf) then
          smearing_fact = RESONANCE
          goto 1001
       end if
       call strncmp0(tag_off_resonance, trim(rstr), tf)
       if(tf) then
          smearing_fact = OFF_RESONANCE
          goto 1001
       end if
       stop ' ! tag for smearing option is invalid <<m_CtrlP_rd_epsilon>>'
1001   continue
       if(ipriinputfile >= 2 .and. printable) write(nfout,'(" !* smearing_fact = ",a10)') trim(rstr)
     end subroutine set_NLO_smearing_fact

     subroutine set_magopt_option(rstr)
       character(len=FMAXVALLEN),intent(in) :: rstr
       logical :: tf
       call strncmp0(tag_off, trim(rstr), tf)
       if(tf) then
          magneto_optical=0
          goto 1001
       end if
       call strncmp0(tag_on, trim(rstr), tf)
       if(tf) then
          magneto_optical=1
          goto 1001
       end if
       stop ' ! tag for magneto_optical option is invalid <<m_CtrlP_rd_epsilon>>'
1001   continue
       if(ipriinputfile >= 2 .and. printable) write(nfout,'(" !* drude = ",a10)') trim(rstr)
     end subroutine set_magopt_option

     subroutine set_ipriepsilon
       if( f_getIntValue( tag_ipriepsilon, iret) == 0) ipriepsilon=iret
     end subroutine set_ipriepsilon

! ===== KT_add ===== 13.0R
     subroutine set_sw_trm_print_full( rstr )
       character(len=FMAXVALLEN),intent(in) :: rstr
       logical :: tf

       call strncmp0(tag_off, trim(rstr), tf)
       if (tf) then
          sw_trm_print_full = off;         goto 1001
       end if
       call strncmp0(tag_on, trim(rstr), tf)
       if(tf) then
          sw_trm_print_full = on;         goto 1001
       end if
1001   continue

       write(nfout,*) 'sw_trm_print_full is set to ', sw_trm_print_full

     end subroutine set_sw_trm_print_full

     subroutine set_sw_wf_rspace_print_full( rstr )
       character(len=FMAXVALLEN),intent(in) :: rstr
       logical :: tf

       call strncmp0(tag_off, trim(rstr), tf)
       if (tf) then
          sw_wf_rspace_print_full = off;         goto 1001
       end if
       call strncmp0(tag_on, trim(rstr), tf)
       if(tf) then
          sw_wf_rspace_print_full = on;         goto 1001
       end if
1001   continue

       write(nfout,*) 'sw_wf_rspace_print_full is set to ', sw_wf_rspace_print_full

     end subroutine set_sw_wf_rspace_print_full
! ================== 13.0R

! ==== KT_add === 2015/01/17
     subroutine set_sw_scissor_renormalization( rstr )
       character(len=FMAXVALLEN),intent(in) :: rstr
       logical :: tf

       call strncmp0(tag_off, trim(rstr), tf)
       if (tf) then
          sw_scissor_renormalization = off;         goto 1001
       end if
       call strncmp0(tag_on, trim(rstr), tf)
       if(tf) then
          sw_scissor_renormalization = on;         goto 1001
       end if
1001   continue
       write(nfout,*) 'sw_scissor_renormalization is set to ', sw_scissor_renormalization

     end subroutine set_sw_scissor_renormalization
! ================= 2015/01/17

 end subroutine m_CtrlP_rd_epsilon

 subroutine eps_setup(nfout,e_low,e_high,e_step,nstep,u,p,ptype,DELQ,nonlocal,nppcorr,nsym,nrd_efermi,efermi)
    implicit none
!
!   subroutine for setting up calculation parameters
!
    integer, intent(in)     :: nfout, nonlocal, nppcorr, nsym
    integer, intent(inout)  :: nrd_efermi
    integer, intent(out)    :: nstep, ptype
    integer                 :: i
    real(DP), intent(in)    :: efermi
    real(DP), intent(inout) :: DELQ
    real(DP), intent(inout) :: e_low, e_high, e_step
    real(DP)                :: e_range, ulen, plen
    real(DP), dimension(3 ) :: u,p

    if(printable)  write(nfout,'(/1x," ---------- calculation parameters ----------")')
    if(printable)  write(nfout,'(1x,"!* kv3 = ",i5," kv3_ek = ",i5)') kv3, kv3_ek
! running and restart mode
    if(icond == 2 .and. ipri >= 1) then
       write(nfout,'(1x,"!* running mode = initial")')
       if(restart_mode == 0) write(nfout,'(1x,"!* restart is off")')
       if(restart_mode == 1) write(nfout,'(1x,"!* restart is on")')
    end if
    if(icond == 3) then
       if(ipri >= 1) write(nfout,'(1x,"!* running mode = restart")')
       if(restart_mode == 0) then
          if(ipri >= 1) write(nfout,'(1x,"!* restart is off")')
          stop
       end if
       if(restart_mode == 1 .and. ipri >= 1 ) write(nfout,'(1x,"!* restart from EPS_CONT file")')
    end if
! crystal type
    if(crystal==SINGLE_CRYSTAL.and.printable) write(nfout,'(1x,"!* crystal type = single crystal")')
    if(crystal==POLYCRYSTAL.and.printable)    write(nfout,'(1x,"!* crystal type = polycrystal")')

! fermi level
    if(printable) write(nfout,'(1x,"!* nrd_efermi = ",i3)') nrd_efermi
       if(nlo == 0) then
          if(nrd_efermi==0) then
             if(printable) write(nfout,'(1x,"!* efermi is calculated")')
          else
             if(printable) write(nfout,'(1x,"!* efermi = ",f10.5," read from the input file")') efermi
          end if
       else
          nrd_efermi = 0
          if(printable) write(nfout,'(1x,"!* nrd_efermi = ",i3," is set for nonlinear optics calculation")') nrd_efermi
    end if

! ===================== KT_add =============== 13.0S
    if ( sw_corelevel_spectrum == OFF ) then
! ============================================ 13.0S
! photon energy range
       call photon_energy_range

! photon type
       call u_and_p_vectors

! effective mass
       call effective_mass_option

! transtion moment calculation option
       call transition_moment_option

! effective mass calculation option
       call mass_calculation_option

! BZ integration option
       call BZ_integration_option

! band_gap_correction option
       call scissor_operator_option

! Drude term option
       call drude_term_option

       if(nlo/=0.and.magneto_optical/=0) then
          write(nfout,'(1x,"!* nonlinear optics and magneto-optical calculations cannot be done, simultaneously.")')
          write(nfout,'(1x,"!* UVSOR-Epsilon STOP")')
          stop
       end if

! nlo option
       if(printable) call nlo_option

! magneto optical option
       call magopt_option

! =========================== KT_add ==================== 13.0S
    else
       if ( eels_mode == ON ) then
          call q_vectors
       else
          call u_and_p_vectors
       endif
       call corelevel_energy_range
       call transition_moment_option
       call BZ_integration_option
       call scissor_operator_option
    endif
! ======================================================= 13.0S
! print level
    if(printable) &
    & write(nfout,'(1x,"!* ipriepsion = ",i3, " is set",/)') ipriepsilon
    if(ipriepsilon<=2) ipri_kp=0
    if(ipriepsilon<=2) ipri_spg=0

  contains

    subroutine photon_energy_range
       implicit none
! set energy step of photon
       if(e_step<=0.0d0) then
          e_step=0.002d0
          if(printable) &
         & write(nfout,'(1x,"!* default photon energy step of ",f10.5," is used")') e_step
       end if
! set default e_low and e_high if necessary
       if(e_low<=0.0d0.and.e_high<=0.0d0) then
          e_low=0.0d0; e_high=2.0d0

          if(printable) then
             write(nfout,'(1x,"!* lowest and highest photon energy is 0.0d0")')
             write(nfout,'(1x,"!* default lowest photon energy of ",f10.5," is used ")') e_low
             write(nfout,'(1x,"!* default highest photon energy of ",f10.5," is used")') e_high
          end if
       end if
! set e array
       e_range=(e_high-e_low)/e_step
       nstep=int(e_range)+1

       allocate(e(nstep)); e=0.0d0

       e(1)=e_low
       if(nstep.gt.1) then
! 2007.12.10
          do i=2, nstep
             e(i)=e(1)+e_step*(i-1)
          end do
!         do i=2, nstep
!            e(i)=e(i-1)+e_step
!         end do
! 2007.10.12
       end if
       if(printable) &
      & write(nfout,'(1x,"!* photon energy range = ",f6.3," -",f6.3,1x,"au",3x,"step = ",f6.3,1x,"au")') &
      & e_low,e_high, e_step
     end subroutine photon_energy_range

! ============= KT_add ============= 13.0S
    subroutine corelevel_energy_range
       implicit none

!       e_step = e_step_cls 
!       e_low  = e_low_cls 
!       e_high = e_high_cls

       if(e_step<=0.0d0) then
          e_step=0.002d0
          if(printable) &
         & write(nfout,'(1x,"!* default energy step of ",f10.5," is used")') e_step
       end if

       e_range=(e_high-e_low)/e_step; 
       nstep=int(e_range)+1

       allocate(e(nstep)); e=0.0d0

       e(1)=e_low
       if(nstep.gt.1) then
          do i=2, nstep
             e(i)=e(1)+e_step*(i-1)
          end do
       end if

       if(printable) &
      & write(nfout,'(1x,"!* energy range = ",f6.3," -",f6.3,1x,"au",3x,"step = ",f6.3,1x,"au")') &
      & e_low,e_high, e_step
     end subroutine corelevel_energy_range

     subroutine q_vectors
       real(kind=DP) :: norm

       norm = sqrt( vec_q(1)**2 +vec_q(2)**2 +vec_q(3)**2 )
       if ( norm > 1.0D-8 ) then
          vec_q = vec_q /norm
       else
          vec_q = 0.0;  vec_q(3) = 1.0D0
       endif
       u = vec_q

       if(printable) then
          write(nfout,'(1x,"!* momentum transfer vector " &
                   & ,3x,"qx = ",f10.5,3x,"qy = ",f10.5,3x,"qz = ",f10.5)') u(1),u(2),u(3)
       endif
     end subroutine q_vectors
! = ================================= 13.0S

     subroutine u_and_p_vectors
       implicit none
       call norm(u,ulen)
       call norm(p,plen)
       if(crystal==SINGLE_CRYSTAL) then
       if(printable) write(nfout,'(1x,"!* ptype = ",i3)') ptype
!  single crystal case -> set photon polarization or pointing vector
          if(ulen.eq.0.0d0.and.plen.eq.0.0) then
             ptype=-1 ! calculation of dielectric tensor
             if(printable) write(nfout,'(1x,"!* calculation of dielectric tensor")')
          else
             if(ulen.gt.0.0d0.and.plen.eq.0.0d0) then
                ptype=1 ! calculation for polarized photons
                if(printable) then
                   write(nfout,'(1x,"!* photon polarization = linear")')
                   write(nfout,'(1x,"!* polarization vector of photon" &
                   & ,3x,"ux = ",f10.5,3x,"uy = ",f10.5,3x,"uz = ",f10.5)') u(1),u(2),u(3)
                end if
             else
                if(ulen.eq.0.0.and.plen.gt.0.0d0) then
                   ptype=0 ! calculation for non-polarized photons
                   if(printable) then
                      write(nfout,'(1x,"!* photon polarization = circular ")')
                      write(nfout,'(1x,"!* pointing vector of photons" &
                      & ,3x,"px = ",f10.5,3x,"py = ",f10.5,3x,"pz = ",f10.5)') p(1),p(2),p(3)
                   end if
                else
                   if(printable) &
                   & write(nfout,'(1x,"!!* --- polarization and pointing vector cannot be specified simaltaneously." &
                   &,/,"  UVSOR-Epsilon STOP")')
                   stop
                end if
             end if
          end if
       else
          if(crystal==POLYCRYSTAL) then
!  polycrystal case -> set default pokarization vector
             ptype=1
             u(1)=1.0d0
             u(2)=0.0d0
             u(3)=0.0d0
             if(printable) then
                write(nfout,'(1x,"!* ptype = ",i3)') ptype
                write(nfout,'(1x,"!* --- default polarization vector of photon is set" &
                & ,3x,"ux = ",f10.5,3x,"uy = ",f10.5,3x,"uz = ",f10.5)') u(1),u(2),u(3)
             end if
          end if
       end if
     end subroutine u_and_p_vectors

     subroutine effective_mass_option
       implicit none
       if(printable) write(nfout,'(1x,"!* sw_mass = ",i3)') sw_mass
       if(sw_mass /= 0) then
          if(printable) then
             write(nfout,'(1x,"!* effective mass calculation")')
             write(nfout,'(1x,"!* mass direction = ",i3)') mass_direction
             if(mass_direction == 0) then
                write(nfout,'(1x,"!* direction = principal axis")')
             else
                write(nfout,'(1x,"!* direction = (",3f10.5,")")') mass_dir(1),mass_dir(2),mass_dir(3)
             end if
             if(mass_kpoint==0) then
                write(nfout,'(1x,"!* band = conduction band bottom & valence band top")')
             else
                write(nfout,'(1x,"!* band = ",i3)') mass_ib
                write(nfout,'(1x,"!* k-point = ",i3)') mass_ik
                if(mass_ib > neg) then
                   write(nfout,'(1x,"!* ib = ",i4," is larger than the number of band = ",i4)') &
                 & mass_ib, neg
                   write(nfout,'(1x,"!* UVSOR-Epsilon STOP")')
                   stop
                end if
                if(mass_ik == 0) then
                   write(nfout,'(1x,"!* ik = 0   UVSOR-Epsilon STOP")')
                   stop
                end if
                if(mass_ik > kv3_ek) then
                   write(nfout,'(1x,"!* ib = ",i4," is larger than kv3_ek = ",i4)') mass_ib, kv3_ek
                   write(nfout,'(1x,"!* ik = 0   UVSOR-Epsilon STOP")')
                   stop
                end if
             end if
         end if
       end if
     end subroutine effective_mass_option

     subroutine transition_moment_option
       implicit none
       integer :: band0
! set band_i and band_f if necessary
       if(band_i/=0.and.band_f/=0) then
          if(band_i>band_f) then
             band0=band_f
             band_f=band_i
             band_i=band0
          end if
             if(printable) write(nfout,'(1x,"!* band decomposition calculation of dielectric function")')
             if(band_i<0) then
                if(printable) write(nfout,'(1x,"!* band_i = ",i5," is less than 1")') band_i
                band_i = 1
                if(printable) write(nfout,'(1x,"!* band_i = 1 is set")')
             end if
             if(band_f>neg) then
                if(printable) write(nfout,'(1x,"!* band_f = ",i5," is larger than nband = ",i5)') band_f, neg
                band_f = neg
                if(printable) write(nfout,'(1x,"!* band_f = ",i5," is set")') neg
             end if
             if(printable) write(nfout,'(1x,"!* band ",i3," -> ",i3," transition ")') band_i, band_f
       end if

!   set transition moment option
       if(n_check_ts == 1) then
          if(printable) write(nfout,'(1x,"!* transition moment option check is on")')
       else
          if(printable) write(nfout,'(1x,"!* transition moment option check is off")')
       end if
       if(printable) write(nfout,'(1x,"!* nonlocal = ",i3)') nonlocal
       if(printable) write(nfout,'(1x,"!* nppcorr = ",i3)') nppcorr
       if(nonlocal==0.and.nppcorr==0) then
          if(printable) write(nfout,'(1x,"!* transition moment correction = none")')
       end if
       if(nonlocal==1.and.nppcorr==0) then
          if(printable) write(nfout,'(1x,"!* transition moment correction = Read and Needs method")')
       end if
       if(nppcorr==1) then
          if(printable) write(nfout,'(1x,"!* transition moment correction = Kageshima and Shiraishi method")')
       end if
       if(nppcorr==2) then
          if(printable) write(nfout,'(1x,"!* transition moment correction = Kageshima and Shiraishi method")')
       end if
       if(nonlocal==1.and.nppcorr>0) then
          if(printable) write(nfout,'(1x,"! transition moment option is wrong. &
          & UVSOR-Epsilon stop")')
       end if

!   set DELQ
       if(nonlocal/=0) then
          if(DELQ/=0.0d0) then
             if(printable) &
             & write(nfout,'(1x,"!* - DELQ value of ",f10.5," is used for [Vnl,r] calculation")') DELQ
          else
             DELQ=0.0001d0
             if(printable) &
             & write(nfout,'(1x,"!* - default DELQ value of ",f10.5," is used for [vnl,r] calculation")') DELQ
          end if
       end if

! ========================== Added by K. Tagami ==================== 0.2
       if( sw_hubbard == ON .and. sw_tm_hubbard_correction == ON ) then
          if(DELQ/=0.0d0) then
             if(printable) &
             & write(nfout,'(1x,"!* - DELQ value of ",f10.5," is used for [Dhub,r] calculation")') DELQ
          else
             DELQ=0.0001d0
             if(printable) &
             & write(nfout,'(1x,"!* - default DELQ value of ",f10.5," is used for [Dhub,r] calculation")') DELQ
          end if
       end if
! ====================================================================
       
!   set symmetrization option
       if(printable) then
          write(nfout,'(1x,"!* nsym = ",i3)') nsym
          if(nsym/=0.and.nbztyp/=1) then
             write(nfout,'(1x,"!* transition moment square matrix is symmetrized")')
          end if
       end if
     end subroutine transition_moment_option

     subroutine mass_calculation_option
       if(printable) write(nfout,'(1x,"!* sw_mass = ",i3)') sw_mass
       if(printable.and.sw_mass==0) write(nfout,'(1x,"!* skip mass calculation")')
       if(sw_mass/=0) then
          if(printable) write(nfout,'(1x,"!* mass_kpoint = ",i3)') mass_kpoint
          if(mass_kpoint == 0) then
             if(printable) write(nfout,'(1x,"!* effective mass is calculated at valence band top and conduction band bottom")')
          else
             if(printable) write(nfout,'(1x,"!* effective mass is calculated for ib = ", i4," at ik = ",i4)') mass_ib, mass_ik
             if(mass_ik == 0.or.mass_ib == 0) then
                if(printable) write(nfout,'(1x,"!* ib or ik = 0 is given for mass calculation")')
                if(printable) write(nfout,'(1x,"!* mass calculation is skipped")')
                sw_mass = 0
             end if
          end if
          if(sw_mass/=0.and.nrd_efermi/=0) then
             if(printable) write(nfout,'(1x,"!*  nrd_efermi = 0 is set for mass calculation")')
             nrd_efermi = 0
          end if
       end if
     end subroutine mass_calculation_option

     subroutine BZ_integration_option
       if(printable) &
       & write(nfout,'(1x,"!* way_BZintegral = ",i3)') way_BZintegral

! ================== KT_add ============ 13.0S
       if ( sw_corelevel_spectrum == ON ) then
          if(way_BZintegral==L_TETRAHEDRON) then
             if(printable) then
                write(nfout,*) " !* linear tetrahedron cannot be used in corelevl spectrum"
                write(nfout,*) " !* parabolic broadning is used instead "
             end if
             way_BZintegral=PARABOLIC_B
             call set_default_width
          endif
       endif
! ======================================== 13.0S

       if(way_BZintegral==L_TETRAHEDRON) then
! check mesh type
          if(way_ksample/=MESH) then
             if(printable) then
                write(nfout,'(1x,"!* linear tetrahedron cannot be used for way_ksample = ",i3)') way_ksample
                write(nfout,'(1x,"!* parabolic broadning is used instead ")')
             end if
             way_BZintegral=PARABOLIC_B
             call set_default_width
          else
! check tetra_eps
             if(printable) &
            &  write(nfout,'(1x,"!* Brillouin zone integration method = linear tetrahedron")')
             if(tetra_eps > 0.0d0) then
                if(printable) write(nfout,'(1x,"!* tetra_eps = ",e12.5," hartree")') tetra_eps
             else
                tetra_eps = 1.0d-4
                if(printable) write(nfout,'(1x,"!* tetra_eps =<0.0d0 is read ; tetra_eps = 1.0d-4 hartree is set")')
             end if
! check nistep
             if(nistep >= 1) then
                if(nistep > nstep) then
                   nistep = nstep
                   if(printable) then
                      write(nfout,'(1x,"!* nistep = ",i4," is larger than nstep = ",i4)') nistep, nstep
                      write(nfout,'(1x,"!* nistep = ",i4," is set as default.")') nstep
                   end if
                else
                    if(printable) write(nfout,'(1x,"!* nistep = ",i4)') nistep
                end if
             else
                if(nistep < 1) then
                   nistep = nstep
                   if(printable) then
                      write(nfout,'(1x,"!* nistep = ",i4," is set as default.")') nstep
                   end if
                end if
             end if
          end if
       end if

       if(way_BZintegral/=L_TETRAHEDRON) then
          if(printable) then
             if(way_BZintegral==PARABOLIC_B) &
             & write(nfout,'(1x,"!* Brillouin zone integration method = parabolic broadning")')
             if(way_BZintegral==GAUSSIAN_B) &
             & write(nfout,'(1x,"!* Brillouin zone integration method = gaussian broadning")')
          end if
          if(width<=0.0d0) then
             call set_default_width
          else
             if(printable) &
             & write(nfout,'(1x,"!* smearing width = ",f10.5)') width
          end if
       end if

       if(printable) then
          write(nfout,'(1x,"!* spin = ",i3)') spin
          if(spin/=BOTH) then
             if(spin==MAJOR) write(nfout,'(1x,"!* integration for major spin")')
             if(spin==MINOR) write(nfout,'(1x,"!* integration for minor spin")')
          end if
       end if
     end subroutine BZ_integration_option

     subroutine set_default_width
       width=0.01837451d0
       if(printable) &
       & write(nfout,'(1x,"!* default smearing width of 0.01837451 Hartree (=0.5eV) is set")')
     end subroutine set_default_width

     subroutine scissor_operator_option
       if(scissor/=0.0d0 .and. printable) then
          write(nfout,'(1x,"!* scissor operator = ", f10.5," Hartree ")') scissor
       end if
     end subroutine scissor_operator_option

     subroutine drude_term_option
       if(printable) then
          write(nfout,'(1x,"!* ndrude = ",i3)') ndrude
          if(ndrude/=0) then
             if(ndrude==1) write(nfout,'(1x,"!* Drude Term is included if the system is metallic")')
             if(ndrude==2) write(nfout,'(1x,"!* only Drude Term is calculated if the system is metallic")')
          end if
       end if
     end subroutine drude_term_option

     subroutine nlo_option
       if(printable) then
          write(nfout,'(1x,"!* nlo = ",i3)') nlo
! return if no nlo calculation
          if(nlo==0) then
             write(nfout,'(1x,"!* no nonlinear optical susceptibilitiy is calculated")')
             return
          end if
! nlo type
          if(nlo==1) then
             write(nfout,'(1x,"!* SHG susceptibilities calculation")')
          else
             write(nfout,'(1x,"!* THG susceptibilities calculation")')
          end if
! excitation type
          write(nfout,'(1x,"!* virt_ex_type = ",i3)') virt_ex_type
          if(virt_ex_type == ALL_TYPE) write(nfout,'(1x,"!* NLO process = all type excitation")')
          if(virt_ex_type == ELECTRON) write(nfout,'(1x,"!* NLO process = electron excitation")')
          if(virt_ex_type == HOLE) write(nfout,'(1x,"!* NLO process = hole excitation")')
          if(virt_ex_type == THREE_LEVEL) then
             write(nfout,'(1x,"!* NLO process = three state excitation")')
             if(nlo == 1) then
                write(nfout,'(1x,"!!* SHG process has no three state excitation.",/ &
               & 1x," UVSOR-Epsion STOP")')
               stop
             end if
          end if
! band transition
          write(nfout,'(1x,"!* nlo_band = ",i3)') nlo_band
          if(nlo_band == ALL_BAND) write(nfout,'(1x,"!* inter + intraband calculation")')
          if(nlo_band == INTER_BAND) write(nfout,'(1x,"!* interband calculation")')
          if(nlo_band == INTRA_BAND) write(nfout,'(1x,"!* intraband calculation")')
! resonance type
          if(nlo_term == ALL_TERM) write(nfout,'(1x,"!* NLO term = all resonance term")')
          if(nlo_term == OMEGA_TERM) write(nfout,'(1x,"!* NLO term = omega resonance term")')
          if(nlo_term == OMEGA2_TERM) write(nfout,'(1x,"!* NLO term = omega2 resonance term")')
          if(nlo_term == OMEGA3_TERM) then
             if(nlo == 1) then
                write(nfout,'(1x,"!* NLO term = omega3 term does not exist in SHG process. &
               & UVSOR-Epsilon STOP")')
                stop
             else
               if(nlo_term == OMEGA3_TERM) write(nfout,'(1x,"!* NLO term = omega3 term")')
            end if
          end if
! double resonance method
          if(dres_method == OMIT) then
             write(nfout,'(1x,"!* NLO double resonance term is omitted")')
             write(nfout,'(1x,"   omittion cut-of = ", f10.5," hartree")') dres_cut_off
          end if
          if(dres_method == DAMPING) then
             write(nfout,'(1x,"!* NLO double resonance term is damped")')
             write(nfout,'(1x,"!* damping factor = ", f10.5," hartree")') dres_cut_off
          end if
! smearing factor
          if(way_BZintegral /=L_TETRAHEDRON.and.smearing_fact == RESONANCE) &
          & write(nfout,'(1x,"!* smearing factor = resonance")')
          if(way_BZintegral /=L_TETRAHEDRON.and.smearing_fact == OFF_RESONANCE) &
          & write(nfout,'(1x,"!* smearing factor = off_resonance")')
       end if
     end subroutine nlo_option

     subroutine magopt_option
       if(printable) then
          write(nfout,'(1x,"!* magneto_optical = ",i3)') magneto_optical
! return if no magneto optical calculation
          if(magneto_optical /= 0) then
             write(nfout,'(1x,"!* magneto optical effect calculation")')
             if(ptype /= -1) then
                write(nfout,'(1x,"!* photon polarization option is neglected for magneto optical calculation")')
                write(nfout,'(1x,"!* ptype = -1 is set")')
                ptype = -1
             end if
          else
             write(nfout,'(1x,"!* no magneto optical effect is calculated.")')
             return
          end if

! spin multiplicity
          if(nspin==1) then
             write(nfout,'(1x,"!* magneto-optical effect calculation cannot be done for nspin = 1  state. &
            & UVSOR-Epsilon STOP")')
             stop
          end if
! crystal type
          if(nbztyp_spg/=SIMPLE_CUBIC.and.nbztyp_spg/=BCC.and.nbztyp_spg/=FCC) then
             write(nfout,'(1x,"!* the system is not cubic ")')
             write(nfout,'(1x,"!* SIMPLE_CUBIC = ",i3,2x,"BCC = ",i3,2x,"FCC = ",i3)') SIMPLE_CUBIC, BCC, FCC
             write(nfout,'(1x,"!* nbztyp_spg = ", i3)') nbztyp_spg
             write(nfout,'(1x,"!* magneto-optical effect calculation cannot be done for this system. &
           & UVSOR-Epsilon STOP")')
             stop
          end if
! BZ integration
          if(way_BZintegral/=2) then
             write(nfout,'(1x,"!* gaussian/parabolic smearing cannot be used for magneto-optical calculation")')
             way_BZintegral = 2
             write(nfout,'(1x,"!* Use of linear tetrahedron is set : way_BZintegral = ",i4)') way_BZintegral
          end if
! TM symmetry
          if(nsym /=0 ) then
             write(nfout,'(1x,"!* transition moment symmetry is turned off : nsym = ",i4)') nsym
          end if
      end if
     end subroutine magopt_option
 end subroutine eps_setup

 subroutine restart_setup(nfout)
    integer, intent(in) :: nfout
    integer             :: kv3_ek0, nspin0, num_vb0, num_cb0, nrd_efermi0, neg0, nonlocal0, nppcorr0
    integer             :: ik, nv, nc
    logical             :: rd_status
! 
! restart set up subroutine
!
   if(icond ==2.or.restart_mode/=1) return
   if(printable) write(nfout,'(1x,"!* ---------- restart subroutine ----------")')
   if(printable) write(nfout,'(1x,"!* restart from EPS_CONT data files")')
   if(mype==0) then
      call rd_epscont_header(1)
      if(nrd_efermi/=0) then
         nv = num_vb0;  nc = num_cb0
      else
         nv = neg;  nc = neg
      end if
   end if
   if(npes>1) then
      call mpi_bcast(nv,1,mpi_double_precision,0,mpi_comm_group,ierr)
      call mpi_bcast(nc,1,mpi_double_precision,0,mpi_comm_group,ierr)
   end if
   num_vb = nv;  num_cb = nc

!============================= KT_add ================ 13.0S
   if ( sw_corelevel_spectrum == ON ) nv = num_core_states
! ==================================================== 13.0S

! set arrays
   allocate(n2_mpi_ek(neg,kv3_ek)); n2_mpi_ek = 0
   allocate(ind_vb(neg,kv3_ek)); ind_vb = 0
   allocate(ind_cb(neg,kv3_ek)); ind_cb = 0
   allocate(trm(kv3_ek,nv,nc,3,2,2)); trm=0.0d0
   allocate(eb_ek(kv3_ek,neg)); eb_ek=0.0d0
   if(mype==0) then
      do ik = 1, kv3_ek
         call rd_kpt_index(rd_status)
         if(rd_status .eqv. .true.) then
            write(nfout,'(1x,"!* restart data for ik = ",i4," is read")') ik
         else
            exit
         end if
         call rd_n2_mpi_ek(rd_status)
         if(rd_status .eqv. .true.) then
            if(ipriepsilon>=2) write(nfout,'(1x,"   restart data of n2_mpi_ek is read")')
         else
            exit;  stop
         end if
         call rd_ind_vb(rd_status)
         if(rd_status .eqv. .true.) then
            if(ipriepsilon>2) write(nfout,'(1x,"   restart data of ind_vb is read")')
         else
            exit;  stop
         end if
         call rd_ind_cb(rd_status)
         if(rd_status .eqv. .true.) then
            if(ipriepsilon>=2) write(nfout,'(1x,"   restart data of ind_cb is read")')
         else
            exit
         end if
         call rd_trm(rd_status)
         if(rd_status .eqv. .true.) then
            if(ipriepsilon>=2) write(nfout,'(1x,"   restart data of trm is read")')
         else
            exit
         end if
         call rd_eb_ek(rd_status)
         if(rd_status .eqv. .true.) then
            if(ipriepsilon>=2) write(nfout,'(1x,"   restart data of eb_ek is read")')
         else
            exit
         end if
         nk_restart_read = nk_restart_read + 1
      end do
      if(nk_restart_read == kv3_ek) write(nfout,'(1x," all restart data are read")')
      if(nk_restart_read < kv3_ek) then
         write(nfout,'(1x," restart data to ", i4,"-th k-point are read")') nk_restart_read
! re-position nfepscont
         rewind(nfepscont)
         call rd_epscont_header(0)
         do ik = 1, nk_restart_read
           call rd_kpt_index(rd_status)
           call rd_n2_mpi_ek(rd_status)
           call rd_ind_vb(rd_status)
           call rd_ind_cb(rd_status)
           call rd_trm(rd_status)
           call rd_eb_ek(rd_status)
         end do
      end if
   end if
   if(npes>1) then
      call mpi_bcast(nk_restart_read,1,mpi_integer,0,mpi_comm_group,ierr)
      call mpi_bcast(n2_mpi_ek,kv3_ek*neg,mpi_integer,0,mpi_comm_group,ierr)
      call mpi_bcast(ind_vb,kv3_ek*neg,mpi_integer,0,mpi_comm_group,ierr)
      call mpi_bcast(ind_cb,kv3_ek*neg,mpi_integer,0,mpi_comm_group,ierr)
      call mpi_bcast(trm,kv3_ek*nv*nc*3*2*2,mpi_double_precision,0,mpi_comm_group,ierr)
      call mpi_bcast(eb_ek,kv3_ek*neg,mpi_double_precision,0,mpi_comm_group,ierr)
   end if
   contains
    subroutine rd_epscont_header(ipri)
      integer, intent(in) :: ipri
!     ipri  = 1 : standard
!     ipri  = 0 : scilent
      if(ipri==1) write(nfout,'(1x,"!* header data of epsilon continue dara file is read")')
      read(nfepscont,*) kv3_ek0, nspin0, nrd_efermi0
      rewind(nfepscont)
      if(nrd_efermi0/=0) then
         read(nfepscont,*,end=91) kv3_ek0, nspin0, nrd_efermi0, num_vb0, num_cb0, nonlocal0, nppcorr0
      else
         read(nfepscont,*,end=91) kv3_ek0, nspin0, nrd_efermi0, neg0, nonlocal0, nppcorr0
      end if
      if(ipri==1) then
            write(nfout,'(1x,"!* kv3_ek read = ",i4)') kv3_ek0
            write(nfout,'(1x,"!* nspin read = ",i4)')  nspin0
            write(nfout,'(1x,"!* nrd_efermi read = ",i4)') nrd_efermi0
            if(nrd_efermi0/=0) then
               write(nfout,'(1x,"!* num_vb read = ",i4)') num_vb0
               write(nfout,'(1x,"!* num_cb read = ",i4)') num_cb0
            else
               write(nfout,'(1x,"!* neg read = ",i4)') neg0
           end if
           write(nfout,'(1x,"!* nonlocal read = ",i4)') nonlocal0
           write(nfout,'(1x,"!* nppcorr read = ",i4)') nppcorr0
      end if
! check kv3_ek
      if(kv3_ek0/=kv3_ek) then
         if(ipri==1) write(nfout,'(1x,"!!* kv3_ek is different : it should be ",i4," but kv3_ek read is ",i4)') kv3_ek, kv3_ek0
         goto 99
      end if
! check nspin
      if(nspin0/=nspin) then
         if(ipri==1) write(nfout,'(1x,"!!* nspin is different : it should be ",i2," but nspin read is ",i2)') nspin, nspin0
         goto 99
      end if
! check neg
      if(nrd_efermi0/=nrd_efermi) then
         if(ipri==1) write(nfout,'(1x,"!!* nrd_efermi is different : it shound be = ",i2," but nrd_efermi read is ",i2)') &
          & nrd_efermi, nrd_efermi0
         goto 99
      end if
      if(nrd_efermi==0.and.neg0/=neg) then
         if(ipri==1) write(nfout,'(1x,"!!* neg0 is different : it should be ",i4," but neg read is ",i4)') neg, neg0
         goto 99
      end if
      if(nonlocal0/=nonlocal.or.nppcorr0/=nppcorr) then
         if(ipri==1) write(nfout,'(1x,"!!* transition moment type is different")')
         if(nonlocal0/=nonlocal0) then
            if(ipri==1) write(nfout,'(1x,"!!* nonlocal should be ",i2," but nonlocal read is ",i2)') nonlocal, nonlocal0
         end if
         if(nppcorr0/=nppcorr) then
            if(ipri==1) write(nfout,'(1x,"!!* nppcorr should be ",i2," but nppcorr read is ",i2)') nppcorr, nppcorr0
         end if
         goto 99
      end if
      return
91    if(ipri==1) then
         write(nfout,'("there is no restart header data")')
         write(nfout,'(1x,"!!* UVSOR-Epsilon stop at restart_setup")')
         stop 
      end if
99    if(ipri==1) write(nfout,'(1x,"!!* UVSOR-Epsilon stop at restart_setup")')
      stop
    end subroutine rd_epscont_header

    subroutine rd_kpt_index(status)
      integer              :: kpt_index
      character(len=3)     :: smarker
      character(len=7)     :: emarker
      logical, intent(out) :: status
      read(nfepscont,*,end=99) smarker
      if(smarker /= "kpt") return
      read(nfepscont,*) kpt_index
      read(nfepscont,*) emarker
      if(kpt_index == ik.and.emarker == "kpt_end") status = .true.
99    return
    end subroutine rd_kpt_index

    subroutine rd_n2_mpi_ek(status)
      logical, intent(out)    :: status
      character(len=9)        :: smarker
      character(len=13)       :: emarker
      status = .false.
      read(nfepscont,*,end=99) smarker
      if(smarker /= "n2_mpi_ek") return
      read(nfepscont,*) n2_mpi_ek(1:neg,ik)
      read(nfepscont,*) emarker
      if(emarker == "n2_mpi_ek_end") status = .true.
99    return
    end subroutine rd_n2_mpi_ek

    subroutine rd_ind_vb(status)
      logical, intent(out)    :: status
      character(len=6)        :: smarker
      character(len=10)       :: emarker
      status = .false.
      read(nfepscont,*,end=99) smarker
      if(smarker /= "ind_vb") return
      read(nfepscont,*) ind_vb(1:neg,ik)
      read(nfepscont,*) emarker
      if(emarker == "ind_vb_end") status = .true.
99    return
    end subroutine rd_ind_vb

    subroutine rd_ind_cb(status)
      logical, intent(out)    :: status
      character(len=6)        :: smarker
      character(len=10)       :: emarker
      status = .false.
      read(nfepscont,*,end=99) smarker
      if(smarker /= "ind_cb") return
      read(nfepscont,*) ind_cb(1:neg,ik)
      read(nfepscont,*) emarker
      if(emarker == "ind_cb_end") status = .true.
99    return
    end subroutine rd_ind_cb

    subroutine rd_trm(status)
      logical, intent(out)   :: status
      character(len=3)       :: smarker
      character(len=7)       :: emarker
      status=.false.
      read(nfepscont,*,end=99) smarker
      if(smarker /= "trm") return
      read(nfepscont,*) trm(ik,1:nv,1:nc,1:3,1:2,1:2)
      read(nfepscont,*) emarker
      if(emarker == "trm_end") status = .true.
99    return
    end subroutine rd_trm

    subroutine rd_eb_ek(status)
      logical, intent(out)   :: status
      character(len=5)       :: smarker
      character(len=9)       :: emarker
      status=.false.
      read(nfepscont,*,end=99) smarker
      if(smarker /= "eb_ek") return
      read(nfepscont,*) eb_ek(ik,1:neg)
      read(nfepscont,*) emarker
      if(emarker == "eb_ek_end") status = .true.
99    return
    end subroutine rd_eb_ek
 end subroutine restart_setup

 subroutine eps_for_photon(nfout,nstep,u,p,ptype)
    implicit none
!
!   calculatie dielectric function for linear and circular polarized photons
!
    integer,intent(in)                     :: nfout,nstep,ptype
    real(kind=DP),dimension(3)             :: u,p
    real(kind=DP),allocatable,dimension(:) :: r_eps_wk,i_eps_wk
    allocate(r_eps_wk(nstep)); r_eps_wk=0.0d0
    allocate(i_eps_wk(nstep)); i_eps_wk=0.0d0
    if(ptype==1) call eps_for_lpp(u,nstep,r_eps_wk,i_eps_wk)
    if(ptype==0) call eps_for_cpp(p,nstep,r_eps_wk,i_eps_wk)
    reps(1:nstep,1)=r_eps_wk(1:nstep)
    imeps(1:nstep,1)=i_eps_wk(1:nstep)
    deallocate(r_eps_wk)
    deallocate(i_eps_wk)
    contains
     subroutine eps_for_lpp(u,nstep,r_eps_wk,i_eps_wk)
       implicit none
!
! subroutine for linear polarized photon
!
       integer,intent(in) :: nstep
       integer :: i
       real(kind=DP) :: re,ie
       real(kind=DP),dimension(3) :: u,wk
       real(kind=DP) :: r_eps_wk(nstep),i_eps_wk(nstep)
! Real Part
       do i=1,nstep
          wk(1)=reps(i,1)*u(1)+reps(i,4)*u(2)+reps(i,5)*u(3)
          wk(2)=reps(i,4)*u(1)+reps(i,2)*u(2)+reps(i,6)*u(3)
          wk(3)=reps(i,5)*u(1)+reps(i,6)*u(2)+reps(i,3)*u(3)
          call scalar(wk,u,re)
          r_eps_wk(i)=re
        end do
! imaginary part
       do i=1,nstep
          wk(1)=imeps(i,1)*u(1)+imeps(i,4)*u(2)+imeps(i,5)*u(3)
          wk(2)=imeps(i,4)*u(1)+imeps(i,2)*u(2)+imeps(i,6)*u(3)
          wk(3)=imeps(i,5)*u(1)+imeps(i,6)*u(2)+imeps(i,3)*u(3)
          call scalar(wk,u,ie)
          i_eps_wk(i)=ie
       end do
     end subroutine eps_for_lpp

     subroutine eps_for_cpp(p,nstep,r_eps_wk,i_eps_wk)
       implicit none
!
! subroutine for circular polarized photon
!
       integer,intent(in) :: nstep
       integer :: i
       real(kind=DP)              :: re,ie,fx2,fy2,fz2,fxy,fxz,fyz
       real(kind=DP),dimension(3) :: p,wk
       real(kind=DP)              :: r_eps_wk(nstep),i_eps_wk(nstep)
       call calc_factor(p,fx2,fy2,fz2,fxy,fxz,fyz)
       do i=1,nstep
          re=0.0d0
          ie=0.0d0
          re=reps(i,1)*fx2+reps(i,2)*fy2+reps(i,3)*fz2 &
         &  +reps(i,4)*fxy+reps(i,5)*fxz+reps(i,6)*fyz
          ie=imeps(i,1)*fx2+imeps(i,2)*fy2+imeps(i,3)*fz2 &
         &  +imeps(i,4)*fxy+imeps(i,5)*fxz+imeps(i,6)*fyz
          r_eps_wk(i)=re
          i_eps_wk(i)=ie
       end do
     end subroutine eps_for_cpp
    
     subroutine calc_factor(p,fx2,fy2,fz2,fxy,fxz,fyz)
       implicit none
! subroutine calculating factors used in eps_for_cpp
       real(kind=DP),dimension(3) :: p
       real(kind=DP)              :: fx2,fy2,fz2,fxy,fxz,fyz
       real(kind=DP)              :: theta,phi
! set theta and phi
       theta=dacos(p(1))
       phi=dacos(p(3))
! set factors
       fx2=0.5d0*(dcos(theta)**2*dcos(phi)**2+dsin(theta)**2)
       fy2=0.5d0*(dsin(theta)**2*dcos(phi)**2+dcos(theta)**2)
       fz2=0.5d0*dsin(phi)**2
       fxy=dcos(theta)*dsin(theta)*(dcos(phi)**2-1.0d0)
       fxz=-1.0d0*dcos(theta)*dcos(phi)*dsin(phi)
       fyz=-1.0d0*dsin(theta)*dcos(phi)*dsin(phi)
     end subroutine calc_factor
 end subroutine eps_for_photon

! ================= KT_add ==================== 13.0S
 subroutine eps_for_corelevel_spectrum(nfout,nstep,u)
    implicit none

    integer,intent(in)                     :: nfout,nstep
    real(kind=DP),dimension(3)             :: u
    real(kind=DP),allocatable,dimension(:) :: r_eps_wk,i_eps_wk

    allocate(r_eps_wk(nstep)); r_eps_wk=0.0d0
    allocate(i_eps_wk(nstep)); i_eps_wk=0.0d0

    call eps_for_core2val(u,nstep,r_eps_wk,i_eps_wk)

    reps(1:nstep,1)=r_eps_wk(1:nstep)
    imeps(1:nstep,1)=i_eps_wk(1:nstep)

    deallocate(r_eps_wk);   deallocate(i_eps_wk)

  contains
    subroutine eps_for_core2val(u,nstep,r_eps_wk,i_eps_wk)
       implicit none
!
! subroutine for linear polarized photon
!
       integer,intent(in) :: nstep
       integer :: i
       real(kind=DP) :: re,ie
       real(kind=DP),dimension(3) :: u,wk
       real(kind=DP) :: r_eps_wk(nstep),i_eps_wk(nstep)
! Real Part
       do i=1,nstep
          wk(1)=reps(i,1)*u(1)+reps(i,4)*u(2)+reps(i,5)*u(3)
          wk(2)=reps(i,4)*u(1)+reps(i,2)*u(2)+reps(i,6)*u(3)
          wk(3)=reps(i,5)*u(1)+reps(i,6)*u(2)+reps(i,3)*u(3)
          call scalar(wk,u,re)
          r_eps_wk(i)=re
        end do
! imaginary part
       do i=1,nstep
          wk(1)=imeps(i,1)*u(1)+imeps(i,4)*u(2)+imeps(i,5)*u(3)
          wk(2)=imeps(i,4)*u(1)+imeps(i,2)*u(2)+imeps(i,6)*u(3)
          wk(3)=imeps(i,5)*u(1)+imeps(i,6)*u(2)+imeps(i,3)*u(3)
          call scalar(wk,u,ie)
          i_eps_wk(i)=ie
       end do
     end subroutine eps_for_core2val
   end subroutine eps_for_corelevel_spectrum
! ================================================= 13.0S

 subroutine eps_out(nfout,nstep,ptype)
    implicit none
!
!   dielectric function output
!
    integer, intent(in) :: nfout,nstep,ptype
    integer             :: istep, mfigure, indxy
    real(kind=DP)       :: absc_au_to_MKS, scale
    integer             :: istart

    absc_au_to_MKS = hartree/(planck_constant*au_of_velocity)
    scale          = 10.0d8
    absc_au_to_MKS = absc_au_to_MKS/scale

    if(ndrude == 0) then
       istart = 1
    else if(ndrude > 0) then
       istart = 2
    end if

    if(ptype>=0) then
       if(mype == 0) write(nfepsout,10)
       if(mype == 0) write(nfepsout,20)
       do istep=istart, nstep
          call check_figure_of_ireps(1,mfigure)
          if(mfigure>4) cycle
          if(mype == 0) write(nfepsout,30) e(istep)*hartree_in_eV,reps(istep,1),imeps(istep,1),&
         &refr(istep), refi(istep), absc(istep)*absc_au_to_MKS, reflc(istep)
       end do
       if(e(1).eq.0.0d0) then
          if(mype==0) write(nfout,40) reps(1,1)
       end if
       if(mype == 0) write(nfout,50)

    else
       if(mype == 0) then
          write(nfepsout,60)
          write(nfepsout,70)
          do istep=istart, nstep
             call check_figure_of_ireps(6,mfigure)
             if(mfigure>4) cycle
             if(mype == 0) write(nfepsout,80) e(istep)*hartree_in_eV, reps(istep,1:6)
             if(mype == 0) write(nfepsout,90) imeps(istep,1:6)
          end do
          if(e(1).eq.0.0d0.and.ndrude==0) then
             write(nfout,100)
             write(nfout,110) (reps(1,indxy),indxy=1,3)
             write(nfout,120) (reps(1,indxy),indxy=4,6)
          end if
          write(nfout,130)
       end if
! ===== KT_add ====== 13.1R
       call wd_optical_coeff_linear
! =================== 13.1R
    end if
10  format(16x,"Dielectric Function",36x,"Optical Properties")
20  format(1x,"Photon Energy(eV)",3x,"Real Part",3x,"Imaginary Part",10x,"n",14x,"k",6x,"abs(in 10**9 m-1)",6x,"R")
30  format(3x,f10.5,6x,f10.5,5x,f10.5,7x,f10.5,5x,f10.5,5x,f10.5,5x,f10.5)
40  format(1x,"!* static dielectric constant =",f10.5)
50  format(1x,"!* dielectric Function and optical properties are written in F_EPSOUT.")
60  format(30x,"Dielectric Tensor Component(Imaginary part is in parenthesis)")
70  format(/1x,"Photon Energy(eV)",5x,"xx",13x,"yy",13x,"zz",13x,"xy",13x,"xz",13x,"yz")
80  format(3x,f10.5,6x,6(f10.5,5x))
90  format(18x,6("(",f10.5,")",3x))
100 format(2x,"Static Dielectric Tensor")
110 format(3x,"xx = ",f10.5,2x,"yy = ",f10.5,2x,"zz = ",f10.5)
120 format(3x,"xy = ",f10.5,2x,"xz = ",f10.5,2x,"yz = ",f10.5)
130 format(1x,"!* Dielectric Tensor is written in F_EPSOUT")
    contains
     subroutine check_figure_of_ireps(indxym,mfigure)
       integer, intent(in)  :: indxym
       integer, intent(out) :: mfigure
       integer              :: r_order, i_order, mfigure1
       mfigure = 0
       do indxy = 1, indxym
          r_order=forder(reps(istep,indxy))
          i_order=forder(imeps(istep,indxy))
          if(reps(istep,indxy)<0.0d0) r_order=r_order+1
          if(imeps(istep,indxy)<0.0d0) i_order=i_order+1
          mfigure1=r_order
          if(i_order>r_order) mfigure=i_order
          if(mfigure1>mfigure) mfigure=mfigure1
       end do
     end subroutine check_figure_of_ireps

     integer function forder(x)
        integer                   :: iorder, z
        real(kind=DP), intent(in) :: x
        real(kind=DP)             :: y
        do iorder = 1, 10
           if(iorder == 1) then
              y = 1.d0
           else
              y=10.0d0**(iorder-1)
           end if
           z=int(x/y)
           if(z==0) then
              forder=iorder-1
              exit
           end if
        end do
     end function forder

! === KT_add ==== 13.1R
     subroutine wd_optical_coeff_linear
       integer :: i, j
       real(kind=DP) :: ctmp(3,3)

       if ( mype /=0 ) return

       call m_Files_open_nfoptical_coeff(.false.)
       write(nfoptical_coeff,'(A)') '# Dielectric function (real part, at 0 eV)'
       write(nfoptical_coeff,*) 'i  j  eps'
       ctmp(1,1) = reps(1,1);  ctmp(2,2) = reps(1,2);  ctmp(3,3) = reps(1,3)
       ctmp(1,2) = reps(1,4);  ctmp(1,3) = reps(1,5);  ctmp(2,3) = reps(1,6)
       ctmp(2,1) = ctmp(1,2);  ctmp(3,1) = ctmp(1,3);  ctmp(3,2) = ctmp(2,3)
       Do i=1, 3
          Do j=1, 3
             write(nfoptical_coeff,'(2I6,F25.20)') i, j, ctmp(i,j)
          End do
       End do
       close( nfoptical_coeff )
     end subroutine wd_optical_coeff_linear
! ================ 13.1R

 end subroutine eps_out

 subroutine eps_out_corelevel_spectrum(nfout,nstep)
    implicit none
    integer, intent(in) :: nfout, nstep

    integer :: lun, istep
    real(kind=DP) :: emin,ene

    if ( mype /= 0 ) return

#if 0
    emin = 0.0d0
#else
!    emin = efermi - maxval( ene_core_states ) +e_low
!    emin = emin_core_spectrum +e_low
#endif

    lun = nfepsout

    write(lun,'("#",12x," Spectrum data ")')
    write(lun,'("#",6x,"Energy[eV]", 8x, "Spectrum ")')

    if ( qnum_l_to_probe > 0 .and. mimic_soc_split_spectrum ) then
       if ( ene_initial_state_splitting /= 0.0d0 ) then
          call mimic_soc_splitting
       endif
    end if

    Do istep=1, nstep
!       ene = emin + e(istep)
       ene = e(istep)
       write(lun,'(E18.10,E18.10)') ene*Hartree_in_eV, imeps(istep,1)
    End Do

  contains

    subroutine mimic_soc_splitting
      integer :: istep, jstep, ngeta
      real(kind=DP) :: c1, c2

      ngeta = nint( ene_initial_state_splitting /e_step )
      c1 = qnum_l_to_probe /( 2.0d0*qnum_l_to_probe +1.0d0 )
      c2 = 1.0d0 -c1
      
      imeps(:,2) = 0.0d0
      Do istep=1, nstep
         imeps(istep,2) = imeps(istep,2) + c2 *imeps(istep,1)
         jstep = istep +ngeta
         if ( jstep <=nstep ) then
            imeps(jstep,2) = imeps(jstep,2) + c1 *imeps(istep,1)
         endif
      End Do
      imeps(:,1) = imeps(:,2)
    end subroutine mimic_soc_splitting

  end subroutine eps_out_corelevel_spectrum
! ==================================================== 13.0S

! ==== KT_add === 13.1R
  subroutine set_dielectric_tensor
    integer :: istep, istart

    if(ndrude == 0) then
       istart = 1
    else if(ndrude > 0) then
       istart = 2
    end if
    istep = istart

    eps_omega_eq_0(1,1) = reps(istep,1)
    eps_omega_eq_0(2,2) = reps(istep,2)
    eps_omega_eq_0(3,3) = reps(istep,3)
    eps_omega_eq_0(1,2) = reps(istep,4);   eps_omega_eq_0(2,1) = reps(istep,4)
    eps_omega_eq_0(1,3) = reps(istep,5);   eps_omega_eq_0(3,1) = reps(istep,5)
    eps_omega_eq_0(2,3) = reps(istep,6);   eps_omega_eq_0(3,2) = reps(istep,6)

  end subroutine set_dielectric_tensor
! =============== 13.1R

 subroutine nlo_out(nfout,nstep)
    implicit none
    integer, intent(in) :: nfout, nstep
    integer             :: istep,indxy
    if(nlo==0) return
    if(nlo==1) call chi2_out
    if(nlo==2) call chi3_out

! ==== KT_add === 13.1R
    if ( nlo == 1 ) call wd_optical_coeff_nonlinear2
! =============== 13.1R

 contains
  subroutine chi2_out
    implicit none
    character(len=3), dimension(18) :: ind_chi2
    data ind_chi2 /'xxx','xxy','xxz','xyy','xyz','xzz', &
                 & 'yxx','yxy','yxz','yyy','yyz','yzz', &
                 & 'zxx','zxy','zxz','zyy','zyz','zzz'/
    if(mype == 0.and.printable) then
       write(nfnlo,'(3x,"SHG susceptibility Tensor (10d-8 esu)")')
       do indxy = 1, 18
          write(nfnlo,'(1x,a3)') ind_chi2(indxy)
          write(nfnlo,'(1x,"Photon Energy(eV)",3x,"real part",3x,"imaginary part",7x,"abs")')
          do istep = 1, nstep
             if(mype == 0) write(nfnlo,'(3x,f10.5,6x,3(f10.5,5x))') e(istep)*hartree_in_eV, &
           & rechi2(istep,indxy)*chi2_in_au, imchi2(istep,indxy)*chi2_in_au, &
           & dsqrt(rechi2(istep,indxy)**2+imchi2(istep,indxy)**2)*chi2_in_au
          end do
          write(nfnlo,'(1x,"***")')
       end do
    end if
    if(e(1).eq.0.0d0.and.printable) then
       write(nfout,'(2x,"Static SHG Susceptibility Tensor (10-8 esu)")')
       if(virt_ex_type == ALL_TYPE) write(nfout,'(2x," SHG prrocess = all type excitation")')
       if(virt_ex_type == ELECTRON) write(nfout,'(2x," SHG process = electron excitation")')
       if(virt_ex_type == HOLE) write(nfout,'(2x," SHG process = hole excitation")')
       if(nlo_term == ALL_TERM) write(nfout,'(2x," SHG term = all terms")')
       if(nlo_term == OMEGA_TERM) write(nfout,'(2x," SHG term = omega term")')
       if(nlo_term == OMEGA2_TERM) write(nfout,'(2x," SHG term = omega2 term")')
       if(nlo_band == INTRA_BAND.and.virt_ex_type == HOLE) &
         &  write(nfout,'(1x,"!!* there is no SHG intraband term from virtual hole excitations")')

       write(nfout,'(3x,"xxx = ",f10.5,2x,"xxy = ",f10.5,2x,"xxz = ",f10.5)') (rechi2(1,indxy)*chi2_in_au,indxy=1,3)
       write(nfout,'(3x,"xyy = ",f10.5,2x,"xyz = ",f10.5,2x,"xzz = ",f10.5)') (rechi2(1,indxy)*chi2_in_au,indxy=4,6)
       write(nfout,'(3x,"yxx = ",f10.5,2x,"yxy = ",f10.5,2x,"yxz = ",f10.5)') (rechi2(1,indxy)*chi2_in_au,indxy=7,9)
       write(nfout,'(3x,"yyy = ",f10.5,2x,"yyz = ",f10.5,2x,"yzz = ",f10.5)') (rechi2(1,indxy)*chi2_in_au,indxy=10,12)
       write(nfout,'(3x,"zxx = ",f10.5,2x,"zxy = ",f10.5,2x,"zxz = ",f10.5)') (rechi2(1,indxy)*chi2_in_au,indxy=13,15)
       write(nfout,'(3x,"zyy = ",f10.5,2x,"zyz = ",f10.5,2x,"zzz = ",f10.5)') (rechi2(1,indxy)*chi2_in_au,indxy=16,18)
    end if
    write(nfout,'(1x,"!* SHG susceptibility tensor is written in F_NLO")')
  end subroutine chi2_out

! === KT_add ==== 13.1R
  subroutine wd_optical_coeff_nonlinear2
    integer :: i, j, k
    real(kind=DP) :: ctmp(3,3,3)

    if ( mype /= 0 ) return

    call m_Files_open_nfoptical_coeff(.true.)
    write(nfoptical_coeff,'(A)') '# Second NonLinear optical coeff (real part, at 0 eV)'
    write(nfoptical_coeff,*) 'i  j  k  chi'

    ctmp(1,1,1) = rechi2(1, 1); ctmp(1,1,2) = rechi2(1, 2); ctmp(1,1,3) = rechi2(1, 3)
    ctmp(1,2,2) = rechi2(1, 4); ctmp(1,2,3) = rechi2(1, 5); ctmp(1,3,3) = rechi2(1, 6)
    ctmp(2,1,1) = rechi2(1, 7); ctmp(2,1,2) = rechi2(1, 8); ctmp(2,1,3) = rechi2(1, 9)
    ctmp(2,2,2) = rechi2(1,10); ctmp(2,2,3) = rechi2(1,11); ctmp(2,3,3) = rechi2(1,12)
    ctmp(3,1,1) = rechi2(1,13); ctmp(3,1,2) = rechi2(1,14); ctmp(3,1,3) = rechi2(1,15)
    ctmp(3,2,2) = rechi2(1,16); ctmp(3,2,3) = rechi2(1,17); ctmp(3,3,3) = rechi2(1,18)

    ctmp(1,2,1) = ctmp(1,1,2);  ctmp(1,3,1) = ctmp(1,1,3);  ctmp(1,3,2) = ctmp(1,2,3)
    ctmp(2,2,1) = ctmp(2,1,2);  ctmp(2,3,1) = ctmp(2,1,3);  ctmp(2,3,2) = ctmp(2,2,3)
    ctmp(3,2,1) = ctmp(3,1,2);  ctmp(3,3,1) = ctmp(3,1,3);  ctmp(3,3,2) = ctmp(3,2,3)

!    ctmp = ctmp *chi2_in_au

    Do i=1, 3
       Do j=1, 3
          Do k=1, 3
             write(nfoptical_coeff,'(3I6,F25.20)') i, j, k, ctmp(i,j,k)
          End do
       End do
    End do
    close( nfoptical_coeff )
  end subroutine wd_optical_coeff_nonlinear2
! ================ 13.1R

  subroutine chi3_out
    implicit none
    integer :: i
    character(len=4), dimension(30) :: ind_chi3
    data ind_chi3 /'xxxx','xxxy','xxxz','xxyy','xxyz','xxzz','xyyy','xyyz','xyzz','xzzz', &
                & 'yxxx','yxxy','yxxz','yxyy','yxyz','yxzz','yyyy','yyyz','yyzz','yzzz', &
                & 'zxxx','zxxy','zxxz','zxyy','zxyz','zxzz','zyyy','zyyz','zyzz','zzzz'/
    if(mype == 0.and.printable) then
       write(nfnlo,'(3x,"THG susceptibility Tensor (10d-12 esu)")')
       do indxy = 1, 30
          write(nfnlo,'(1x,a4)') ind_chi3(indxy)
          write(nfnlo,'(1x,"Photon Energy(eV)",3x,"real part",3x,"imaginary part",7x,"abs")')
          do istep = 1, nstep
             if(mype == 0) write(nfnlo,'(3x,f10.5,6x,3(f10.5,5x))') e(istep)*hartree_in_eV, &
           & rechi3(istep,indxy)*chi3_in_au, imchi3(istep,indxy)*chi3_in_au, &
           & dsqrt(rechi3(istep,indxy)**2+imchi3(istep,indxy)**2)*chi3_in_au
          end do
          write(nfnlo,'(1x,"***")')
       end do
    end if
    if(e(1).eq.0.0d0.and.printable) then
       write(nfout,'(2x,"Static THG Susceptibility Tensor (10-12 esu)")')
       if(virt_ex_type == ALL_TYPE) write(nfout,'(2x," THG prrocess = all type excitation")')
       if(virt_ex_type == ELECTRON) write(nfout,'(2x," THG process = electron excitation")')
       if(virt_ex_type == HOLE) write(nfout,'(2x," THG process = hole excitation")')
       if(virt_ex_type == THREE_LEVEL) write(nfout,'(2x," THG process = three state excitation")')
       if(nlo_band == ALL_BAND) write(nfout,'(2x," excitation = inter + intraband")')
       if(nlo_band == INTER_BAND) write(nfout,'(2x," excitation = interband")')
       if(nlo_band == INTRA_BAND) write(nfout,'(2x," excitation = intraband")')
       if(nlo_term == ALL_TERM) write(nfout,'(2x," THG term = all terms")')
       if(nlo_term == OMEGA_TERM) write(nfout,'(2x," THG term = omega term")')
       if(nlo_term == OMEGA2_TERM) write(nfout,'(2x," THG term = omega2 term")')
       if(nlo_term == OMEGA3_TERM) write(nfout,'(2x," THG term = omega3 term")')
       if(nlo_band == INTRA_BAND.and.virt_ex_type == THREE_LEVEL) &
         &  write(nfout,'(1x,"!!* there is no THG intraband term from three state excitations")')
       if(nlo_term==OMEGA2_TERM.and.virt_ex_type == THREE_LEVEL) &
     & write(nfout,'(1x, "!!* Three level excitation has no omega2 term")')

       do indxy = 1, 28, 3
          write(nfout,'(3x,3(a4," = ",f10.5,3x))') (ind_chi3(i),rechi3(1,i)*chi3_in_au, i=indxy, indxy+2)
       end do
    end if
    write(nfout,'(1x,"!* THG susceptibility tensor is written in F_NLO")')
  end subroutine chi3_out
 end subroutine nlo_out

 subroutine magopt_out(nfout)
    implicit none
    integer, intent(in) :: nfout
    integer :: i
    real(kind=DP) ::  sigma_in_inv_sec
    if(mype == 0) write(nfmagopt,'(24x,"sigma_xx(10**16/sec)",10x,"sigma_xy(10**16/sec)",&
                 & 6x,"Kerr rotation angle",3x,"Kerr ellipticity")')
    if(mype == 0) write(nfmagopt,'(1x,"Photon Energy(eV)",2(3x,"Real Part",4x,"Imaginary Part"))')
    sigma_in_inv_sec =4.13408! 1 au= 4.13408*10d16 sec-1
    do i=1, nstep
       if(ndrude/=0.and.e(i)*hartree_in_eV <=0.1d0) cycle
       if(mype == 0) write(nfmagopt,'(3x,f10.5,5x,4(f11.5,4x),4x,2(f11.5,9x))') &
         & e(i)*hartree_in_eV, &
         & optcr_l(i,1)*sigma_in_inv_sec, optci_l(i,1)*sigma_in_inv_sec,&
         & optcr_l(i,2)*sigma_in_inv_sec, optci_l(i,2)*sigma_in_inv_sec,&
         & kerr_rotation(i), kerr_ellipticity(i)
    end do
    write(nfout,'(1x,"!* Magneto-optical data were written in file MAGOPT.")')
 end subroutine magopt_out

 subroutine norm(x,xlen)
    implicit none
    real(DP), dimension(3) :: x
    real(DP)               :: xlen
    xlen = dsqrt(x(1)**2 + x(2)**2 + x(3)**2)
    if(xlen > 1.d-15) then
       x(1) = x(1)/xlen
       x(2) = x(2)/xlen
       x(3) = x(3)/xlen
    else
       x(1:3) = 1.d0/dsqrt(3.d0)
    end if
 end subroutine norm

 subroutine ordering_ek(nfout)
    integer, intent(in)              :: nfout
    integer                          :: ik, ie, nb
    integer, allocatable, dimension(:,:) :: n_mpi  ! MPI
    real(DP),allocatable, dimension(:,:) :: e_mpi  ! MPI
    allocate(n_mpi(neg,kv3));  ! allocate(n2_mpi(neg,kv3))! MPI
    allocate(e_mpi(neg,kv3));  ! allocate(e2_mpi(neg,kv3))! MPI
!
! eigenvalue ordering
! a part of subroutine wd_EigenValues of m_ES_IO
! Tomoyuki Hamada, Univ Tokyo, Feb 3, 2003
!
    if(ipri >= 1) then
       write(nfout,'(" kv3 = ",i8)') kv3
    end if

    n_mpi = 0                                          ! MPI

! ==== KT_mod ============ 13.0S
!    do ik = 1, kv3                                     ! MPI
!       if(map_k(ik) /= myrank_k) cycle                 ! MPI
!       n_mpi(1:neg,ik) = neordr(1:neg,ik)              ! MPI
!    end do                                             ! MPI
!
    if ( noncol ) then
       do ik = 1, kv3, ndim_spinor
          if(map_k(ik) /= myrank_k) cycle                
          n_mpi(1:neg,ik)   = neordr(1:neg,ik)             
          n_mpi(1:neg,ik+1) = neordr(1:neg,ik)             
       end do                                            
    else
       do ik = 1, kv3                                     ! MPI
          if(map_k(ik) /= myrank_k) cycle                 ! MPI
          n_mpi(1:neg,ik) = neordr(1:neg,ik)              ! MPI
       end do                                             ! MPI
    endif
! ======================= 13.0S

    if(npes >= 2) then
       call mpi_allreduce(n_mpi,n2_mpi,neg*kv3,mpi_integer,mpi_sum &
            &                      ,mpi_ge_world,ierr)  ! MPI
    else
       n2_mpi = n_mpi
    end if

    e_mpi = 0.d0                                       ! MPI

! ==== KT_mod ============ 13.0S
!    do ik = 1, kv3                                     ! MPI
!       if(map_k(ik) /= myrank_k) cycle                 ! MPI
!       do ie = 1, neg                                  ! MPI
!          if(map_e(ie) /= myrank_e) cycle              ! MPI
!          e_mpi(ie,ik) = eko_l(map_z(ie),ik)           ! MPI
!       end do
!    end do
!
    if ( noncol ) then
       do ik = 1, kv3, ndim_spinor
          if(map_k(ik) /= myrank_k) cycle                 
          do ie = 1, neg                                  
             if(map_e(ie) /= myrank_e) cycle              
             e_mpi(ie,ik)   = eko_l(map_z(ie),ik)           
             e_mpi(ie,ik+1) = eko_l(map_z(ie),ik)           
          end do
       end do
    else
       do ik = 1, kv3                                     ! MPI
          if(map_k(ik) /= myrank_k) cycle                 ! MPI
          do ie = 1, neg                                  ! MPI
             if(map_e(ie) /= myrank_e) cycle              ! MPI
             e_mpi(ie,ik) = eko_l(map_z(ie),ik)           ! MPI
          end do
       end do
    endif
! ======================= 13.0S

    if(npes >= 2) then
       call mpi_allreduce(MPI_IN_PLACE,e_mpi,neg*kv3,mpi_double_precision &
            &               ,mpi_sum,mpi_kg_world,ierr) ! MPI
       call mpi_allreduce(e_mpi,e2_mpi,neg*kv3,mpi_double_precision &
            &               ,mpi_sum,mpi_ge_world,ierr) ! MPI
    else
       e2_mpi = e_mpi
    end if
    do ik = 1, kv3
       if(nk_in_the_process+ik-1 <= kv3_ek) &
            & n2_mpi_ek(1:neg,nk_in_the_process+ik-1) = n2_mpi(1:neg,ik)
    end do

    if(ipriepsilon >= 2) then
       do ik = 1, kv3
          write(nfout,'(" ik = ",i8)') ik
          write(nfout,'(" *** n2_mpi *** ")')
          write(nfout,'(20i5)') n2_mpi(1:neg,ik)
          write(nfout,'(" *** e2_mpi ***")')
          write(nfout,'(5f16.10)') e2_mpi(1:neg,ik)
       end do
       write(nfout,'(" *** n2_mpi_ek ***")')
       do ik = 1, kv3_ek
          write(nfout,'(" ik = ",i8)') ik
          write(nfout,'(20i5)') n2_mpi_ek(1:neg,ik)
       end do
    end if
 end subroutine ordering_ek

 subroutine trans_ek(nfout,nfzaj)
!
!   transition moment and transition moment product calculation
!   a derivative of subroutine m_wd_WFs in module m_ES_IO
!   T. Hamada and H. Mizouchi, adv, Jan. 28, 2003
!
    integer, intent(in) :: nfout,nfzaj
    integer             :: ik, ib, i1, ri, nbi, nbj
    integer             :: id_sname = -1
    integer             :: ieigsta, ieigend, jeigsta, jeigend
    integer             :: nb, nvb, ivb, icb
    integer, save       :: nv, nc
    real (DP)           :: ebi, ebj

    if(icond==3.and.nk_in_the_process<=nk_restart_read/nspin) return
! prepare wave function
    call mpi_barrier(mpi_comm_group,ierr)
!2008.1.8
!   rewind(nfzaj)
!2008.1.8
    call trans_ek_prepare
    trm_tmp = 0.0d0
    eb_ek_tmp = 0.0d0
    call set_nv_and_nc
 
    if(nonlocal==1) call vnl_prepare_ek(DELQ)
    if(nppcorr>0) call calc_ptrans_ek
    if(nppcorr==2) call calc_ptrans_TM_PP_ek

! =============================== Added by K. Tagami ================ 0.2
    if ( nonlocal ==1 ) then
       if ( sw_hubbard == ON .and. sw_tm_hubbard_correction == ON ) then
          call dhub_prepare_ek(DELQ)
          rtrans = rtrans + rtrans_hub
       endif
    endif
! ===================================================================
    
    do ik = 1, kv3, af+1
! -->> T. Yamasaki    25 Fer. 2009
!!$       if(ipri >= 1) write(nfout,'(" --- trans_ek  ik = ",i8)') ik
          if(nk_in_the_process+ik-1 > kv3_ek) cycle
          if(map_k(ik) /= myrank_k) cycle
! <<-- T. Yamasaki    25 Fer. 2009
       wf_lb = 0.0d0
       do ib = 1, np_e
          do ri = 1, kimg
             wf_lb(:,neg_g(ib),ri) = zaj_l(:,ib,ik,ri)
          end do
       end do
       call mpi_allreduce(MPI_IN_PLACE,wf_lb,maxval(np_g1k)*neg*kimg,MPI_DOUBLE_PRECISION,MPI_SUM,mpi_kg_world,ierr)

!       if(ipri >= 1) then
       if(ipriepsilon >= 2) then
          write(nfout,'(" --- wf_lb --- : nk_in_the_process+ik-1 = ",i8)') nk_in_the_process+ik-1
          do ib = 1, neg
             write(nfout,'(" ib = ",i4,":",10f8.4)') ib,wf_lb(1:10,ib,1)
          end do
       end if
! calculate transition moment
! calculate local part + [Vnl,r] or PPC  part
       if(printable) then
          write(nfout,'(1x,"!* ik = ",i4)') ik
          write(nfout,'(1x,"!* nk_in_the_process+ik-1 = ",i4)') nk_in_the_process+ik-1
          if ( ipriepsilon >=2 ) then
             write(nfout,'(1x,"!* nrd_efermi = ",i4)') nrd_efermi
             write(nfout,'(" band_i, band_f = ",2i8)') band_i, band_f
          endif
       end if
       if(band_i==0.and.band_f==0) then
          if(nrd_efermi == 1) then
             ieigsta = 1;  jeigend = neg
             do nbi = 1, neg
                if(e2_mpi(n2_mpi(nbi,ik),ik)>efermi) then
                   jeigsta = nbi;  ieigend = nbi-1
                   exit
                end if
             end do

!             if(ipri >=1)then
             if(ipriepsilon >=2)then
                write(nfout,'(" ieigsta, ieigend, jeigsta, jeigend = ",4i8)') &
                     & ieigsta, ieigend, jeigsta, jeigend
             end if
             
             do nbi=ieigsta, ieigend
                ebi = e2_mpi(n2_mpi(nbi,ik),ik)
                do nbj=jeigsta, jeigend
                   ebj = e2_mpi(n2_mpi(nbj,ik),ik)
                   call set_b_and_eb(ik,nbi,nbj,ebi,ebj)
                   call vl_ek(ik,nbi,nbj,ebi,ebj)
                end do
             end do
          else
             ieigsta = 1;  ieigend = neg
             jeigsta = 1;  jeigend = neg
#ifndef NEC_TUNE
             do nbi=ieigsta, ieigend
                ebi = e2_mpi(n2_mpi(nbi,ik),ik)
                do nbj=jeigsta, jeigend
                   ebj = e2_mpi(n2_mpi(nbj,ik),ik)
                   call set_b_and_eb(ik,nbi,nbj,ebi,ebj)
                   call vl_ek(ik,nbi,nbj,ebi,ebj)
                end do
             end do
#else
             call vl_ek_tune(ik,ieigsta,ieigend,jeigsta,jeigend)
#endif
          end if
       else
          ieigsta=band_i;  ieigend=band_i
          jeigsta=band_f;  jeigend=band_f
          if(e2_mpi(n2_mpi(ieigsta,ik),ik)>efermi.or.e2_mpi(n2_mpi(jeigsta,ik),ik)<=efermi) then
             if(e2_mpi(n2_mpi(ieigsta,ik),ik)>efermi .and. ipri >= 1) then
                write(nfout,'(1x,"!* band_i setting is wrong")')
                write(nfout,'(1x,"!* band_i energy = ",f10.5, &
              & " is larger than efermi = ",f10.5," at nk_in_the_process+ik-1 = ",i4)') &
              & e2_mpi(n2_mpi(ieigsta,ik),ik), efermi, nk_in_the_process+ik-1
             end if
             if(e2_mpi(n2_mpi(jeigsta,ik),ik)<=efermi .and. ipri >= 1) then
                write(nfout,'(1x,"!* band_f setting is wrong")')
                write(nfout,'(1x,"!* band_f energy = ",f10.5, &
              & " is less than efermi = ",f10.5," at nk_in_the_process+ik-1  = ",i4)') &
              & e2_mpi(n2_mpi(band_f,ik),ik), efermi, nk_in_the_process+ik-1
             end if
             stop
          else
             do nbi=ieigsta, ieigend
                ebi = e2_mpi(n2_mpi(nbi,ik),ik)
                do nbj=jeigsta, jeigend
                   ebj = e2_mpi(n2_mpi(nbj,ik),ik)
                   call set_b_and_eb(ik,nbi,nbj,ebi,ebj)
                   call vl_ek(ik,nbi,nbj,ebi,ebj)
                end do
             end do
          end if
       end if
!       if(ipri >= 1) then
       if(ipriepsilon >= 2) then
          write(nfout,'(1x," ieigsta = ",i4)') ieigsta
          write(nfout,'(1x," ieigend = ",i4)') ieigend
          write(nfout,'(1x," jeigsta = ",i4)') jeigsta
          write(nfout,'(1x," jeigend = ",i4)') jeigend
       end if
! write restart data to nfepscont (restart_mode = 1)
       if(mype==0.and.icond>=2.and.restart_mode == 1) call wd_epscont_data(ik,nv,nc)
    end do
    call mpi_allreduce(MPI_IN_PLACE,trm_tmp,kv3*nv*nc*12,MPI_DOUBLE_PRECISION,MPI_SUM,mpi_comm_group,ierr)
    call mpi_allreduce(MPI_IN_PLACE,eb_ek_tmp,kv3*neg,MPI_DOUBLE_PRECISION,MPI_SUM,mpi_ge_world,ierr)
    do ik = 1, kv3, af+1
       if(nk_in_the_process+ik-1 > kv3_ek) cycle
       trm(nk_in_the_process+ik-1,:,:,:,:,:) = trm_tmp(ik,:,:,:,:,:)
       eb_ek(nk_in_the_process+ik-1,:) = eb_ek_tmp(ik,:)
    end do
    contains

     subroutine set_nv_and_nc
       if(nrd_efermi == 1) then
          nv = num_vb;  nc = num_cb
       else
          nv = neg;     nc = neg
       end if
     end subroutine set_nv_and_nc

     subroutine set_b_and_eb(ik,nbi,nbj,ebi,ebj)
       integer,intent(in) :: ik, nbi, nbj
       real(DP),intent(in) :: ebi, ebj
! -->> T. Yamasaki    25 Fer. 2009
!!$       eb_ek(nk_in_the_process+ik-1,nbi)=ebi
!!$       eb_ek(nk_in_the_process+ik-1,nbj)=ebj
       integer :: ikt
       ikt = nk_in_the_process+ik-1
       if(ikt <= kv3_ek) then
          eb_ek(nk_in_the_process+ik-1,nbi)=ebi
          eb_ek(nk_in_the_process+ik-1,nbj)=ebj
       end if
! <<-- T. Yamasaki    25 Fer. 2009
     end subroutine set_b_and_eb
 end subroutine trans_ek

! ========================== KT_add ========================== 13.0S
 subroutine trans_core2val_ek(nfout,nfzaj)
!
!   transition moment and transition moment product calculation
!   a derivative of subroutine m_wd_WFs in module m_ES_IO
!   T. Hamada and H. Mizouchi, adv, Jan. 28, 2003
!
    integer, intent(in) :: nfout,nfzaj
    integer             :: ik, ib, i1, ri, nbi, nbj
    integer             :: id_sname = -1
    integer             :: ieigsta, ieigend, jeigsta, jeigend
    integer             :: nb, nvb, ivb, icb
    integer, save       :: nv, nc
    real (DP)           :: ebi, ebj

    if(icond==3.and.nk_in_the_process<=nk_restart_read/nspin) return
! prepare wave function
    wf_lb(1:kg1,1:neg,1:kimg)=0.0d0
    call mpi_barrier(mpi_comm_group,ierr)

    call trans_ek_prepare
    call set_nv_and_nc
 
    if(nonlocal==1) call vnl_prepare_ek(DELQ)
    if(nppcorr>0) call calc_ptrans_core2val_ek
    if(nppcorr==2) call calc_ptrans_TM_PP_core2val_ek

    do ik = 1, kv3, af+1
       if(nk_in_the_process+ik-1 > kv3_ek) cycle
       do ib = 1, neg
          wf_l=0.0d0
          if(map_ek(ib,ik) == mype) then                          ! MPI
             do ri = 1, kimg
                wf_l(1:kg1,ri) = zaj_l(1:kg1,map_z(ib),ik,ri)
             end do
             if(map_ek(ib,ik) /= 0) &                             ! MPI
                &  call mpi_send(wf_l,kg1*kimg,mpi_double_precision,0,1,mpi_comm_group,ierr) ! MPI
          else if(mype == 0 .and. map_ek(ib,ik) /= 0) then
             call mpi_recv(wf_l,kg1*kimg,mpi_double_precision,map_ek(ib,ik),1,mpi_comm_group,istatus,ierr)!MPI
          end if
          if(npes >= 2)  then
             call mpi_bcast(wf_l,kg1*kimg,mpi_double_precision,0,mpi_comm_group,ierr)!MPI
          end if
          do ri = 1, kimg
             wf_lb(1:kg1,ib,ri) = wf_l(1:kg1,ri)
          end do
       end do 

       if(ipri >= 1) then
          write(nfout,'(" --- wf_lb --- : nk_in_the_process+ik-1 = ",i8)') nk_in_the_process+ik-1
          do ib = 1, neg
             write(nfout,'(" ib = ",i4,":",10f8.4)') ib,wf_lb(1:10,ib,1)
          end do
       end if
! calculate transition moment
! calculate local part + [Vnl,r] or PPC  part
       if(printable) then
          write(nfout,'(1x,"!* ik = ",i4)') ik
          write(nfout,'(1x,"!* nk_in_the_process+ik-1 = ",i4)') nk_in_the_process+ik-1
          write(nfout,'(1x,"!* nrd_efermi = ",i4)') nrd_efermi
          write(nfout,'(" band_i, band_f = ",2i8)') band_i, band_f
       end if
       if(band_i==0.and.band_f==0) then
          if(nrd_efermi == 1) then
             ieigsta = 1;  jeigend = neg
             do nbi = 1, neg
                if(e2_mpi(n2_mpi(nbi,ik),ik)>efermi) then
                   jeigsta = nbi;  ieigend = nbi-1
                   exit
                end if
             end do

             do nbi=ieigsta, ieigend
                ebi = e2_mpi(n2_mpi(nbi,ik),ik)
                do nbj=jeigsta, jeigend
                   ebj = e2_mpi(n2_mpi(nbj,ik),ik)
                   call set_b_and_eb(ik,nbi,nbj,ebi,ebj)
                end do
             end do
             do nbi=1, num_core_states
                ebi = ene_core_states(nbi)
                do nbj=jeigsta, jeigend
                   ebj = e2_mpi(n2_mpi(nbj,ik),ik)
                   call vl_core2val_ek(ik,nbi,nbj,ebi,ebj)
                end do
             End do

          else
             ieigsta = 1;  ieigend = neg
             jeigsta = 1;  jeigend = neg
             do nbi=ieigsta, ieigend
                ebi = e2_mpi(n2_mpi(nbi,ik),ik)
                do nbj=jeigsta, jeigend
                   ebj = e2_mpi(n2_mpi(nbj,ik),ik)
                   call set_b_and_eb(ik,nbi,nbj,ebi,ebj)
                end do
             end do
             do nbi= 1, num_core_states
                ebi = ene_core_states(nbi)
                do nbj=jeigsta, jeigend
                   ebj = e2_mpi(n2_mpi(nbj,ik),ik)
                   call vl_core2val_ek(ik,nbi,nbj,ebi,ebj)
                end do
             end do
          end if
       else
          ieigsta=band_i;  ieigend=band_i
          jeigsta=band_f;  jeigend=band_f
          if(e2_mpi(n2_mpi(ieigsta,ik),ik)>efermi.or.e2_mpi(n2_mpi(jeigsta,ik),ik)<=efermi) then
             if(e2_mpi(n2_mpi(ieigsta,ik),ik)>efermi .and. ipri >= 1) then
                write(nfout,'(1x,"!* band_i setting is wrong")')
                write(nfout,'(1x,"!* band_i energy = ",f10.5, &
              & " is larger than efermi = ",f10.5," at nk_in_the_process+ik-1 = ",i4)') &
              & e2_mpi(n2_mpi(ieigsta,ik),ik), efermi, nk_in_the_process+ik-1
             end if
             if(e2_mpi(n2_mpi(jeigsta,ik),ik)<=efermi .and. ipri >= 1) then
                write(nfout,'(1x,"!* band_f setting is wrong")')
                write(nfout,'(1x,"!* band_f energy = ",f10.5, &
              & " is less than efermi = ",f10.5," at nk_in_the_process+ik-1  = ",i4)') &
              & e2_mpi(n2_mpi(band_f,ik),ik), efermi, nk_in_the_process+ik-1
             end if
             stop
          else
             do nbi=ieigsta, ieigend
                ebi = e2_mpi(n2_mpi(nbi,ik),ik)
                do nbj=jeigsta, jeigend
                   ebj = e2_mpi(n2_mpi(nbj,ik),ik)
                   call set_b_and_eb(ik,nbi,nbj,ebi,ebj)
                end do
             end do
             do nbi=1, num_core_states
                ebi = ene_core_states(nbi)
                do nbj=jeigsta, jeigend
                   ebj = e2_mpi(n2_mpi(nbj,ik),ik)
                   call vl_core2val_ek(ik,nbi,nbj,ebi,ebj)
                end do
             end do
          end if
       end if
       if(ipri >= 1) then
          write(nfout,'(1x," ieigsta = ",i4)') ieigsta
          write(nfout,'(1x," ieigend = ",i4)') ieigend
          write(nfout,'(1x," jeigsta = ",i4)') jeigsta
          write(nfout,'(1x," jeigend = ",i4)') jeigend
       end if
! write restart data to nfepscont (restart_mode = 1)
       if(mype==0.and.icond>=2.and.restart_mode == 1) call wd_epscont_data(ik,nv,nc)
    end do
    contains

     subroutine set_nv_and_nc
       if(nrd_efermi == 1) then
          nv = num_vb;  nc = num_cb
       else
          nv = neg;     nc = neg
       end if
     end subroutine set_nv_and_nc

     subroutine set_b_and_eb(ik,nbi,nbj,ebi,ebj)
       integer,intent(in) :: ik, nbi, nbj
       real(DP),intent(in) :: ebi, ebj
! -->> T. Yamasaki    25 Fer. 2009
!!$       eb_ek(nk_in_the_process+ik-1,nbi)=ebi
!!$       eb_ek(nk_in_the_process+ik-1,nbj)=ebj
       integer :: ikt
       ikt = nk_in_the_process+ik-1
       if(ikt <= kv3_ek) then
          eb_ek(nk_in_the_process+ik-1,nbi)=ebi
          eb_ek(nk_in_the_process+ik-1,nbj)=ebj
       end if
! <<-- T. Yamasaki    25 Fer. 2009
     end subroutine set_b_and_eb
   end subroutine trans_core2val_ek
! ===================================================== 13.0S

 subroutine wd_epscont_data(ik,nv,nc)
    implicit none
    integer, intent(in) :: ik,nv, nc
! -->> T. Yamasaki    25 Fer. 2009
    if(nk_in_the_process+ik-1 > kv3_ek) return
! <<-- T. Yamasaki    25 Fer. 2009
    if(nk_in_the_process+ik-1 == 1) then
       call wd_epscont_header
       write(nfout,'(1x,"!* header data of restart file is written")')
       write(nfout,'(1x,"   kv3_ek = ",i4)') kv3_ek
       write(nfout,'(1x,"   nspin = ",i4)') nspin
       write(nfout,'(1x,"   kv3_ek = ",i4)') nrd_efermi
       if(nrd_efermi==1) then
          write(nfout,'(1x,"   num_vb = ",i4)') num_vb
          write(nfout,'(1x,"   num_cb = ",i4)') num_cb
       else
          write(nfout,'(1x,"   neg   = ",i4)') neg
       end if
       write(nfout,'(1x,"   nonlocal = ",i4)') nonlocal
       write(nfout,'(1x,"   nppcorr = ",i4)') nppcorr
    end if
    call wd_kpt_index
    write(nfout,'(1x,"!* restart data for ik = ",i4," is written")')  nk_in_the_process+ik-1
    call wd_n2_mpi_ek
    write(nfout,'(1x,"!* n2_mpi_ek data is written down to restart file")')
    call wd_ind_vb
    write(nfout,'(1x,"!* ind_vb data is written down to restart file")')
    call wd_ind_cb
    write(nfout,'(1x,"!* ind_cb data is written down to restart file")')
    call wd_trm
    write(nfout,'(1x,"!* trm data is written down to restart file")')
    call wd_eb_ek
    write(nfout,'(1x,"!* eb_ek data is written down to restart file")')
    contains
     subroutine wd_epscont_header
       if(nrd_efermi == 1) then
          write(nfepscont,*) kv3_ek, nspin, nrd_efermi, num_vb, num_cb, nonlocal, nppcorr
       else
          write(nfepscont,*) kv3_ek, nspin, nrd_efermi, neg, nonlocal, nppcorr
       end if
     end subroutine wd_epscont_header

     subroutine wd_kpt_index
       write(nfepscont,'("kpt")')
       write(nfepscont,*) nk_in_the_process+ik-1
       write(nfepscont,'("kpt_end")')
     end subroutine wd_kpt_index

     subroutine wd_n2_mpi_ek
       write(nfepscont,'("n2_mpi_ek")')
       write(nfepscont,*) n2_mpi_ek(1:neg,nk_in_the_process+ik-1)
       write(nfepscont,'("n2_mpi_ek_end")')
     end subroutine wd_n2_mpi_ek

     subroutine wd_ind_vb
       write(nfepscont,'("ind_vb")')
       write(nfepscont,*) ind_vb(1:neg,nk_in_the_process+ik-1)
       write(nfepscont,'("ind_vb_end")')
     end subroutine wd_ind_vb

     subroutine wd_ind_cb
       write(nfepscont,'("ind_cb")')
       write(nfepscont,*) ind_cb(1:neg,nk_in_the_process+ik-1)
       write(nfepscont,'("ind_cb_end")')
     end subroutine wd_ind_cb

     subroutine wd_trm
        write(nfepscont,'("trm")')
        write(nfepscont,*) trm(nk_in_the_process+ik-1,1:nv,1:nc,1:3,1:2,1:2)
        write(nfepscont,'("trm_end")')
     end subroutine wd_trm

     subroutine wd_eb_ek
        write(nfepscont,'("eb_ek")')
        write(nfepscont,*) eb_ek(nk_in_the_process+ik-1,1:neg)
        write(nfepscont,'("eb_ek_end")')
     end subroutine wd_eb_ek
 end subroutine wd_epscont_data

 subroutine trans_ek_prepare
    implicit none
!
!   set up trm and trm2 arrays and index
!
    integer   :: ik, nbi, nbj, nvb, ivb, icb, n1
    real (DP) :: ebi, ebj

  ! check number of valence bands and set band index(nrd_efermi=1 case)
    if(nrd_efermi == 1) then
       do ik = 1, kv3, af+1
! -->> T. Yamasaki    25 Fer. 2009
          if(nk_in_the_process+ik-1 > kv3_ek) cycle
! <<-- T. Yamasaki    25 Fer. 2009
          nvb=0
          ivb=0
          icb=0
          do nbi=1, neg
             ebi = e2_mpi(n2_mpi(nbi,ik),ik)
             do nbj=1, neg
                ebj = e2_mpi(n2_mpi(nbj,ik),ik)
                if(ebj.gt.efermi.and.ebi.le.efermi) then
                   nvb=nvb+1
                   ivb=ivb+1
                   icb=icb+1
                   call set_ind_vb_and_cb(nbi,nbj,ivb,icb,nvb,ik,nk_in_the_process+ik-1)
                end if
             end do
          end do

          if(.not.trm_rptrans_allocated) then
             if(icond == 2) then
                num_vb = nvb
                num_cb = neg-nvb
             else
                if(num_vb/=nvb.or.num_cb/=neg-nvb) then
                   write(nfout,'("!* restart data is not compatbile with continue data")')
                   write(nfout,'("!* restart num_vb is = ",i4," but num_vb in continuation is = ",i4)') num_vb, nvb
                   write(nfout,'("!* restart num_cb is = ",i4," but num_cb in continuation is = ", i4)') num_cb, neg-nvb
                   write(nfout,'("!* UVSOR-Epsilon stop at trans_ek_prepare")')
                   stop
                end if
             end if
             if(printable) then
                write(nfout,'(2x," no. of valence bands    = ",i4)') num_vb
                write(nfout,'(2x," no. of conduction bands = ",i4)') num_cb
             end if

! ========================= KT_add ============== 13.0S
              if ( sw_corelevel_spectrum == ON ) then
                n1 = num_core_states
             else
                n1 = num_vb
             endif
! ================================================ 13.0S

             call alloc_rptrans_and_trm(n1,num_cb)
          end if

          if(nvb/=num_vb) then
             if(printable) then
                write(nfout,'(2x," - no. of valence or condunction bands at ",i5,"-th k-point is &
           & different from those of 1-st k-point")') nk_in_the_process+ik-1
                write(nfout,'(2x," no. of valence bands at ", i5,"-th k-point =",i5)') nk_in_the_process+ik-1, nvb
                write(nfout,'(2x," no. of valence bands at 1-st k-point =",i5)') num_vb
             end if
             write(nfout,'("!* UVSOR-Epsilon stop at trans_ek_prepare")')
             stop
          end if
       end do
   else
       if(.not.trm_rptrans_allocated) then
          call set_ind_vb_and_cb_default
! ========================= KT_add ============== 13.0S
          if ( sw_corelevel_spectrum == ON ) then
             n1 = num_core_states
          else
             n1 = neg
          endif
! ================================================ 13.0S
          call alloc_rptrans_and_trm(n1,neg)
       end if
   end if
   contains
    subroutine alloc_rptrans_and_trm(nv,nc)
      implicit none
      integer, intent(in) :: nv,nc
      allocate(rtrans(kv3,nc,nv,3,2)); rtrans=0.0d0
      allocate(ptrans(kv3,nc,nv,3,2)); ptrans=0.0d0
! =================================== Added by K. Tagami ======= 0.2
      if ( nonlocal ==1 ) then
         if ( sw_hubbard == ON .and. sw_tm_hubbard_correction == ON ) then
            allocate(rtrans_hub(kv3,nc,nv,3,2)); rtrans_hub = 0.0d0
         endif
      endif
! ==============================================================
      if(icond <=2) then
         allocate(trm(kv3_ek,nv,nc,3,2,2)); trm=0.0d0
         allocate(trm_tmp(kv3,nv,nc,3,2,2)); trm_tmp=0.0d0
      end if
      trm_rptrans_allocated = .true.
      if(printable) then
         write(nfout,'(1x,"!* rptrans(",i2,",",i3,",",i3,", 3, 2) allocated ")') kv3, nv, nc
         write(nfout,'(1x,"!* ptrans(",i2,",",i3,",",i3,", 3, 2) allocated ")') kv3, nv, nc
         if(icond==2) write(nfout,'(1x,"!* trm(",i6,",",i3,",",i3,", 3, 2) allocated ")') kv3_ek, nv, nc
      end if
    end subroutine alloc_rptrans_and_trm

    subroutine set_ind_vb_and_cb(nv,nc,iv,ic,nvb,ik,nk)
      implicit none
      integer, intent(in) :: nv,nc,nk,ik
      integer, intent(inout) :: iv,ic,nvb
      if(ind_vb(nv,nk)==0.and.ind_vb2(n2_mpi(nv,ik),nk)==0) then
         ind_vb(nv,nk)=iv
         ind_vb2(n2_mpi(nv,ik),nk)=iv
      else
         iv=iv-1
         nvb=nvb-1
      end if
      if(ind_cb(nc,nk)==0.and.ind_cb2(n2_mpi(nc,ik),nk)==0) then
         ind_cb(nc,nk)=ic
         ind_cb2(n2_mpi(nc,ik),nk)=ic
      else
         ic=ic-1
      end if
    end subroutine set_ind_vb_and_cb

    subroutine set_ind_vb_and_cb_default
      implicit none
      integer :: ik,i
      do ik=1,kv3_ek
         do i=1,neg
            ind_vb(i,ik)=i
            ind_cb(i,ik)=i
            ind_vb2(i,ik)=i
            ind_cb2(i,ik)=i
         end do
      end do
   end subroutine set_ind_vb_and_cb_default
 end subroutine trans_ek_prepare

 subroutine find_ind_vb_and_cb(ni,nj,iv,ic,ik,ifind)
    implicit none
!
!   find trm and trm2 index of nrd_efermi= 1 case
!
    integer,intent(in)  :: ni,nj,ik
    integer,intent(out) :: iv,ic,ifind
! -->  T. Yamasaki, 2008/02/21 
    integer :: ikt
! <--  T. Yamasaki, 2008/02/21 
    ifind=0
! -->  T. Yamasaki, 2008/02/21 
    ikt = min(ik,kv3_ek)
!!$    iv=ind_vb(ni,ik)
!!$    ic=ind_cb(nj,ik)
    iv=ind_vb(ni,ikt)
    ic=ind_cb(nj,ikt)
! <--  T. Yamasaki, 2008/02/21 
    if(iv/=0.and.ic/=0) then
       ifind=1
    else
       ifind=0
    end if
 end subroutine find_ind_vb_and_cb

 subroutine find_ind_vb_and_cb2(ni,nj,iv,ic,ik,ifind)
!
!   find trm and trm index (energy ordered) of nrd_efrmi=1 case
!
    implicit none
    integer,intent(in)  :: ni,nj,ik
    integer,intent(out) :: iv,ic,ifind
! -->  T. Yamasaki, 2008/02/21 
    integer :: ikt
    ikt = min(ik,kv3_ek)
!!$    iv=ind_vb2(ni,ik)
!!$    ic=ind_cb2(nj,ik)
    iv=ind_vb2(ni,ikt)
    ic=ind_cb2(nj,ikt)
! -->  T. Yamasaki, 2008/02/21 
    if(iv/=0.and.ic/=0) then
       ifind=1
    else
       ifind=0
    end if
 end subroutine find_ind_vb_and_cb2

! ===================== KT_add ===================== 13.0S
 subroutine find_ind_cb_only(nj,ic,ik,ifind)
    implicit none
!
!   find trm and trm2 index of nrd_efermi= 1 case
!
    integer,intent(in)  :: nj,ik
    integer,intent(out) :: ic,ifind

    integer :: ikt

    ifind=0

    ikt = min(ik,kv3_ek)
    ic=ind_cb(nj,ikt)

    if(ic/=0) then
       ifind=1
    else
       ifind=0
    end if
  end subroutine find_ind_cb_only

 subroutine find_ind_cb_only2(nj,ic,ik,ifind)
!
!   find trm and trm index (energy ordered) of nrd_efrmi=1 case
!
    implicit none
    integer,intent(in)  :: nj,ik
    integer,intent(out) :: ic,ifind

    integer :: ikt

    ikt = min(ik,kv3_ek)
    ic = ind_cb2(nj,ikt)

    if(ic/=0) then
       ifind=1
    else
       ifind=0
    end if
  end subroutine find_ind_cb_only2
! ================================================== 13.0S

 subroutine check_ind_vb_and_cb
    implicit none
    integer :: ik
    if(printable) write(nfout,'(1x,"---------- ind_vb and ind_cb list ----------")')
    if(printable) write(nfout,'(1x,"nrd_efermi = ",i3)') nrd_efermi
    if(nrd_efermi == 0) then
       do ik = 1, kv3_ek
          write(nfout,'(1x,"ik = ",i3)') ik
          write(nfout,'(1x,"ind_vb = ")')
          write(nfout,'(1x,10(i3,1x))') ind_vb(1:neg,ik)
          write(nfout,'(1x,"ind_cb = ")')
          write(nfout,'(1x,10(i3,1x))') ind_cb(1:neg,ik)
       end do
    else
       do ik = 1, kv3_ek
          write(nfout,'(1x,"ik = ",i3)') ik
          write(nfout,'(1x,"ind_vb = ")')
          write(nfout,'(1x,10(i3,1x))') ind_vb(1:num_vb,ik)
          write(nfout,'(1x,"ind_cb = ")')
          write(nfout,'(1x,10(i3,1x))') ind_cb(1:num_cb,ik)
       end do
    end if
 end subroutine check_ind_vb_and_cb
 
 subroutine tm_wd_ek(nfout,ieigsta,ieigend,jeigsta,jeigend)
    implicit none
!
!   transition moment write down
!
    integer, intent(in)     :: nfout, ieigsta, ieigend, jeigsta, jeigend
    integer                 :: ik, nbi, nbj, ixyz, i, j
    integer                 :: iv, jc, ifind
    real(DP)                :: ebi, ebj, omega, vtl
    real(DP),dimension(3,2) :: vlocal, vcorr, vtotal
    if(printable) then
       write(nfout,'(/1x," ---------- transition moment <phi2|r|phi1> (phi1 -> phi2) ----------")')
       write(nfout,'(1x,"!* ieigsta = ",i3," ieigend = ",i3)') ieigsta, ieigend
       write(nfout,'(1x,"!* ieigsta = ",i3," ieigend = ",i3)') jeigsta, jeigend
    end if

! ==== KT_mod ==== 2014/09/22
!    do ik=1, kv3_ek, af+1
    do ik=1, kv3_ek, max( af+1, ndim_spinor )
! ================ 2014/09/22
       if(printable) &
       & write(nfout,'(/1x,"ik = ", i4,3x,"(",3f10.5,")")') ik, (vkxyz_ek(ik,ixyz,CARTS),ixyz=1,3)
       do nbi = ieigsta, ieigend
          ebi = eb_ek(ik,nbi)
          do nbj = jeigsta, jeigend
             ebj = eb_ek(ik,nbj)
             if(ebi>efermi.or.ebj<=efermi) cycle
             call find_ind_vb_and_cb(nbi,nbj,iv,jc, ik,ifind)
             if(ifind/=1) then
                if(printable) &
                & write(nfout,'(2x,"- conduction or valence band index is not found   m_Epsilon_ek STOP at tm_wd_ek ")')
                stop
             end if
             omega=ebj-ebi
! ==== KT_add === 2015/01/17
             if ( sw_scissor_renormalization == ON ) omega = omega + scissor
! =============== 2015/01/17

             vlocal(1:3,1:2)=trm(ik,iv,jc,1:3,1:2,1)
             vcorr(1:3,1:2)=trm(ik,iv,jc,1:3,1:2,2)
! ==== KT_add === 2014/09/22
             if ( noncol ) then
                vlocal(1:3,1:2) = vlocal(1:3,1:2) +trm(ik+1,iv,jc,1:3,1:2,1)
                 vcorr(1:3,1:2) =  vcorr(1:3,1:2) +trm(ik+1,iv,jc,1:3,1:2,2)
             endif
! =============== 2014/09/22
             vtotal=vlocal+vcorr

             if(printable) then
                write(nfout,'(/1x,i3," -> ",i3,2x," omega = ",f10.5,3x," real(imaginary)")') nbi,nbj,omega
                if(nonlocal==1) then
                   write(nfout,'(1x,16x," local ",17x," nonlocal ",17x," total ")')
                elseif(nppcorr>1) then
                   write(nfout,'(1x,16x," local ",15x," PP correction",15x," total")')
                else
                   write(nfout,'(1x,16x," local ",18x," total ")')
                end if
                if(nonlocal==1.or.nppcorr>1) then
                   write(nfout,'(1x,"  x  ",3(3x,f10.5,"(",f10.5,")"))') vlocal(1,1:2),vcorr(1,1:2),vtotal(1,1:2)
                   write(nfout,'(1x,"  y  ",3(3x,f10.5,"(",f10.5,")"))') vlocal(2,1:2),vcorr(2,1:2),vtotal(2,1:2)
                   write(nfout,'(1x,"  z  ",3(3x,f10.5,"(",f10.5,")"))') vlocal(3,1:2),vcorr(3,1:2),vtotal(3,1:2)
                else
                   write(nfout,'(1x,"  x  ",2(3x,f10.5,"(",f10.5,")"))') vlocal(1,1:2),vtotal(1,1:2)
                   write(nfout,'(1x,"  y  ",2(3x,f10.5,"(",f10.5,")"))') vlocal(2,1:2),vtotal(2,1:2)
                   write(nfout,'(1x,"  z  ",2(3x,f10.5,"(",f10.5,")"))') vlocal(3,1:2),vtotal(3,1:2)
                end if
             end if
             vtl=0.0d0
             do i = 1, 3
                do j = 1, 2
                   vtl=vtl+vtotal(i,j)**2
                end do
             end do
             vtl=dsqrt(vtl)
             if(printable) then
                write(nfout,'(1x,"|<",i3,"|r|",i3,">| = ",f10.5)') nbi,nbj,vtl
                write(nfout,'(1x,"|<",i3,"|p|",i3,">| = ",f10.5)') nbi,nbj,vtl*omega
             end if
          end do
       end do
    end do
 end subroutine tm_wd_ek
 
 subroutine tmsq_ek
    implicit none
!
! transition moment product calculaton
! coded by T. Hamada (Univ. Tokyo) Aug.12,2003
! MPI parallelized by T. Hamada (Univ. Tokyo) May 28, 2007
!
    integer                            :: ispin, ik, ik2, nbi,nbj, nspin_kt
    integer                            :: ieigsta, ieigend, jeigsta, jeigend
    integer                            :: n_kpt, nk_local, nk_local0
    real(kind=DP)                      :: ebi,ebj
    integer :: iv, ic, ivm, icm
    integer, allocatable,dimension(:) :: iabstrm2
 
! === KT_add ====== 2014/09/22
    if ( noncol ) then
       nspin_kt = 1
    else
       nspin_kt = nspin
    endif
! ================= 2014/09/22

    call set_map_k_eps_mpi
    call set_ieig_jeig_staend                         ! -> contained here

    if(way_BZintegral==L_TETRAHEDRON) then

       do ispin = 1, nspin_kt
          if(printable) &
          & write(nfout,'(1x,"!* ispin in tmsq = ",i3)') ispin
! -->> T. Yamasaki    25 Feb. 2008
          if(ipri >= 2) then
             do ik = 1, min(3,np0)
                if(map_k_eps(ik) /= mype) cycle                   ! MPI
                write(nfout,'(" ik = ",i8)') ik
                ik2 = nspin*(ip20(ik)-1)+ispin
                write(nfout,'(" eb_ek = ",10f9.6,/,9x,10f9.6)') eb_ek(ik2,1:neg)

! ========== KT_add =========================== 13.0S
                if ( sw_corelevel_spectrum == ON ) then
                   do nbi = 1, num_core_states
                      ebi = ene_core_states(nbi)
                      do nbj = jeigsta, jeigend
                         ebj=eb_ek(ik2,nbj)
                         if(ebj.gt.efermi) then
                            write(nfout,'(" nbi,nbj,ebi,ebj, efermi = ",2i4,3f10.6)') nbi,nbj,ebi,ebj,efermi
                         end if
                      end do
                   end do
                else
! ============================================= 13.0S
                   do nbi = ieigsta, ieigend
                      ebi=eb_ek(ik2,nbi)
                      do nbj = jeigsta, jeigend
                         ebj=eb_ek(ik2,nbj)
                         if(ebj.gt.efermi.and.ebi.le.efermi) then
                            write(nfout,'(" nbi,nbj,ebi,ebj, efermi = ",2i4,3f10.6)') nbi,nbj,ebi,ebj,efermi
                         end if
                      end do
                   end do
! ========= KT_add ========================== 13.0S
                endif
! =========================================== 13.0S                   
             end do
          end if
! <<-- T. Yamasaki  25 Feb. 2008

          do ik = 1, np0
            if(map_k_eps(ik) /= mype) cycle                   ! MPI
             ik2 = nspin*(ip20(ik)-1)+ispin

! ================= KT_add ================= 13.0S
             if ( sw_corelevel_spectrum == ON ) then
                do nbi = 1, num_core_states
                   ebi=ene_core_states(nbi)
                   do nbj = jeigsta, jeigend
                      ebj=eb_ek(ik2,nbj)
                      if(ebj.gt.efermi) then
                         call os_moment_ek(nfout,ispin,ik,ebi,ebj,nbi,nbj,nsym)  ! -> os_str(ik2,*)
                      end if
                   end do
                end do
             else
! ========================================== 13.0S
                do nbi = ieigsta, ieigend
                   ebi=eb_ek(ik2,nbi)
                   do nbj = jeigsta, jeigend
                      ebj=eb_ek(ik2,nbj)
                      if(ebj.gt.efermi.and.ebi.le.efermi) then
                         call os_moment_ek(nfout,ispin,ik,ebi,ebj,nbi,nbj,nsym)  ! -> os_str(ik2,*)
                      end if
                   end do
                end do
! ====================== KT_add ============= 13.0S
             endif
! =========================================== 13.0S
          end do
       end do

    else

       do ispin = 1, nspin_kt
          if(printable) &
          & write(nfout,'(1x,"!* ispin in tmsq = ",i3)') ispin
          do ik = ispin, kv3_ek-nspin+ispin, nspin
             ik2 = (ik-1)/nspin + 1
             if(map_k_eps(ik2) /= mype) cycle                   ! MPI

! ================== KT_add ==================== 13.0S
             if ( sw_corelevel_spectrum == ON ) then
                do nbi = 1, num_core_states
                   ebi= ene_core_states(nbi)
                   do nbj = jeigsta, jeigend
                      ebj=eb_ek(ik,nbj)
                      if(ebj.gt.efermi.and.ebi.le.efermi) then
                         call os_moment_ek(nfout,ispin,ik,ebi,ebj,nbi,nbj,nsym)
                      end if
                   end do
                end do
             else
! =========================================== 13.0S
                do nbi = ieigsta, ieigend
                   ebi=eb_ek(ik,nbi)
                   do nbj = jeigsta, jeigend
                      ebj=eb_ek(ik,nbj)
                      if(ebj.gt.efermi.and.ebi.le.efermi) then
                         call os_moment_ek(nfout,ispin,ik,ebi,ebj,nbi,nbj,nsym)
                      end if
                   end do
                end do
! ====================== KT_add =========== 13.0S
             end if
! ========================================= 13.0S
          end do
       end do
    end if

    call trm2_mpi_comm

! -->>  T. Yamasaki 26 Feb. 2008
    if(ipri >= 2) then
       write(nfout,'(" uvsor <<trm2_mpi_comm>>")')
       write(nfout,'(" n_kpt = ",i8)') n_kpt
       if(nrd_efermi == 0) then
          ivm = neg
          icm = neg
       else
          ivm = num_vb
          icm = num_cb
       end if

! ================= KT_add =================== 13.0S
       if ( sw_corelevel_spectrum == ON ) ivm = num_core_states
! ============================================ 13.0S

       write(nfout,'(" ivm, icm = ",2i8)') ivm, icm
       write(nfout,'("  ik   iv   ic  trm2(:,:,:,1:6,1)")')
       allocate(iabstrm2(ivm))
       do ik=1, n_kpt
          do ic = 1, icm
             iabstrm2 = 0
             do iv = 1, ivm
                if(dabs(trm2(ik,iv,ic,1,1)) .gt. 1.d-7) iabstrm2(iv) = 1
             end do
             do iv = 1, ivm
                if(iabstrm2(iv)==1) write(nfout,'(3i5, 6f12.6)') ik,iv,ic, trm2(ik,iv,ic,1:6,1)
!!$                write(nfout,'(3i5, 6f12.6)') ik,iv,ic, trm2(ik,iv,ic,1:6,1)
             end do
          end do
       end do
       deallocate(iabstrm2)
    end if
! <<--  T. Yamasaki 26 Feb. 2008
    
! write out transition moment square matrix
    if(ipriepsilon>=3) then
! ========================== KT_mod ============ 13.0S
!       call tm_wd_ek(nfout,ieigsta,ieigend,jeigsta,jeigend)
!
       if ( sw_corelevel_spectrum == ON ) then
          call tm_wd_core2val_ek(nfout,jeigsta,jeigend)
       else
          call tm_wd_ek(nfout,ieigsta,ieigend,jeigsta,jeigend)
       endif
! ============================================= 13.0S
    endif

! ======== KT_mod ========== 13.0S
!    call wd_os_str_ek(nfout)
!
    if ( sw_corelevel_spectrum == ON ) then
       call wd_os_str_core2val_ek(nfout)
    else
       call wd_os_str_ek(nfout)
    endif
! ========================== 13.0S

! == KT_add ==== 13.0R
    if ( sw_trm_print_full == ON ) then
       call tm_wd_ek_full
    endif
! ============== 13.0R

  contains

    subroutine set_map_k_eps_mpi
       if(way_BZintegral==L_TETRAHEDRON) then
          n_kpt = np0
       else
          n_kpt = kv3_ek/nspin
       end if

       nk_local  = n_kpt/npes
       nk_local0 = nk_local +(n_kpt - nk_local*npes)

       do ik = 1, n_kpt
         if(ik <= nk_local0) then
            map_k_eps(ik) = 0
         else
            map_k_eps(ik) = (ik-nk_local0-1)/nk_local + 1
         end if
       end do
       if(ipriepsilon>=2.and.printable) then
          write(nfout,'(" --------------- map_k_eps --------------- ")')
          write(nfout,'(1x,7(2x,"ik",2x,"map",1x))')
          write(nfout,'(7(2x,i6,2x,i3))') (ik, map_k_eps(ik), ik = 1, n_kpt)
       end if
     end subroutine set_map_k_eps_mpi

     subroutine set_ieig_jeig_staend
       if(band_i==0.and.band_f==0) then
          ieigsta=1
          ieigend=neg
          jeigsta=1
          jeigend=neg
       else
          ieigsta=band_i
          ieigend=band_i
          jeigsta=band_f
          jeigend=band_f
       end if
     end subroutine set_ieig_jeig_staend

     subroutine trm2_mpi_comm
       integer :: ipes, iksta, ikend
! -->> T. Yamasaki,  26 Feb. 2008
       integer :: ik, iv, ic
! <<--
       integer :: ni, nf
       real(kind=DP), allocatable, dimension(:,:,:,:,:) :: trm2_wk

       if(ipri >= 2) then
          write(nfout,'(1x,"---------- trm2_mpi_com ----------")')
          write(nfout,'(1x," npes = ",i3)') npes
       end if
       if(npes == 1) then
          if(ipri >= 2) then
             write(nfout,'(1x," nk_local0 = ",i4)') nk_local0
             write(nfout,'(1x," trm2_mpi_comm is disabled")')
          end if
          return
       else
          if(ipri >= 2) then
             write(nfout,'(1x," nk_local0 = ",i4)') nk_local0
             write(nfout,'(1x," nk_local  = ",i4)') nk_local
             write(nfout,'(1x," trm2_mpi_comm is processed")')
          end if
       end if

       if(nrd_efermi == 0) then
          ni= neg
          nf = neg
       else
          ni = num_vb
          nf = num_cb
       end if

! ===================== KT_add ================ 13.0S
       if ( sw_corelevel_spectrum == ON ) ni = num_core_states
! ============================================ 13.0S

       allocate(trm2_wk(nk_local,ni,nf,6,nspin_kt)) ; trm2_wk = 0.0d0

       if(mype == 0) then
          do ipes = 2, npes
             iksta = 1 + nk_local0 +(ipes-2)*nk_local
             ikend = iksta + nk_local -1
             call mpi_recv( trm2_wk, nk_local*ni*nf*6 *nspin_kt, &
                  &         mpi_double_precision, ipes-1, 1, &
                  &         mpi_comm_group, istatus, ierr )
             trm2( iksta:ikend, 1:ni, 1:nf, 1:6, 1:nspin_kt ) &
                  &     = trm2_wk( 1:nk_local, 1:ni, 1:nf, 1:6, 1:nspin_kt )

             if ( ipriepsilon >=2 ) then
                write(nfout,'(1x,"!*MPI trm2(",i4,":",i4,") data have been sent from ipes = ",i4)') iksta, ikend, ipes-1
             endif
          end do
       else
          iksta = 1 + nk_local0 +(mype-1)*nk_local
          ikend = iksta + nk_local -1
          trm2_wk( 1:nk_local, 1:ni, 1:nf, 1:6, 1:nspin_kt ) &
               &      = trm2( iksta:ikend, 1:ni, 1:nf, 1:6, 1:nspin_kt )
          call mpi_send( trm2_wk, nk_local*ni*nf*6 *nspin_kt, &
               &         mpi_double_precision, 0, 1, mpi_comm_group, ierr )
!          if(ipri >= 1) then
          if(ipriepsilon >= 2) then
             write(nfout,'(1x,"!*MPI trm2(",i4,":",i4,") data have been sent to ipes = 0")') iksta, ikend
          endif
       end if
       call mpi_bcast( trm2, n_kpt*ni*nf*6 *nspin_kt, mpi_double_precision, &
            &          0, mpi_comm_group, ierr )

       deallocate(trm2_wk)
     end subroutine trm2_mpi_comm

 end subroutine tmsq_ek

! =========== KT_add ============ 13.0R
  subroutine tm_wd_ek_full
    implicit none

    integer                 :: ik, nbi, nbj, ixyz, i
    integer                 :: iv, jc, ifind, lun_id
    real(DP),dimension(3,2) :: vlocal, vcorr, vtotal

    lun_id = 404

    if ( mype == 0 ) then
       Do i=0, 2
          write(lun_id+i,'(A)') '# Transiton moment '
          write(lun_id+i,'(A)') '# ik   ni   nj   xyz           Re                Im'
       End Do
    endif

    do ik=1, kv3_ek, af+1
       do nbi = 1, neg -1
          do nbj = nbi +1, neg
             call find_ind_vb_and_cb(nbi,nbj,iv,jc, ik,ifind)
             if(ifind/=1) then
                if(printable) &
                & write(nfout,'(2A)') "- conduction or valence band index not found ", &
                &                     "m_Epsilon_ek STOP at tm_wd_ek_full "
                stop
             end if

             vlocal(1:3,1:2)=trm(ik,iv,jc,1:3,1:2,1)
             vcorr(1:3,1:2)=trm(ik,iv,jc,1:3,1:2,2)
             vtotal=vlocal+vcorr

             if ( mype == 0 ) then
                Do ixyz=1, 3
                   write(lun_id,'(I4,2I5,I6,2F20.12)') ik, nbi, nbj, ixyz, &
                        &                            vlocal(ixyz,1), vlocal(ixyz,2)
                   write(lun_id+1,'(I4,2I5,I6,2F20.12)') ik, nbi, nbj, ixyz, &
                        &                            vcorr(ixyz,1), vcorr(ixyz,2)
                   write(lun_id+2,'(I4,2I5,I6,2F20.12)') ik, nbi, nbj, ixyz, &
                        &                            vtotal(ixyz,1), vtotal(ixyz,2)
                End do
             endif
          end do
       end do
    end do
    if ( mype == 0 ) then
       Do i=0, 2
          close( lun_id +i )
       End do
    endif

  end subroutine tm_wd_ek_full
! =============================== 13.0R

! ============== KT_add ======================== 13.0S
 subroutine tm_wd_core2val_ek(nfout,jeigsta,jeigend)
    implicit none
!
!   transition moment write down
!
    integer, intent(in)     :: nfout, jeigsta, jeigend
    integer                 :: ik, nbi, nbj, ixyz, i, j
    integer                 :: iv, jc, ifind
    real(DP)                :: ebi, ebj, omega, vtl
    real(DP),dimension(3,2) :: vlocal, vcorr, vtotal
    if(printable) then
       write(nfout,'(/1x," ---------- transition moment <phi2|r|phi1> (phi1 -> phi2) ----------")')
       write(nfout,'(1x,"!* ieigsta = ",i3," ieigend = ",i3)') jeigsta, jeigend
    end if

    do ik=1, kv3_ek, af+1
       if(printable) &
       & write(nfout,'(/1x,"ik = ", i4,3x,"(",3f10.5,")")') ik, (vkxyz_ek(ik,ixyz,CARTS),ixyz=1,3)
       do nbi = 1, num_core_states
          ebi = ene_core_states(nbi)

          do nbj = jeigsta, jeigend
             ebj = eb_ek(ik,nbj)

             if(ebj<=efermi) cycle

             call find_ind_cb_only(nbj, jc, ik,ifind)
             iv = nbi

             if(ifind/=1) then
                if(printable) &
                & write(nfout,'(2x,"- conduction or valence band index is not found   m_Epsilon_ek STOP at tm_wd_ek ")')
                stop
             end if
             omega=ebj-ebi
! === KT_add == 2015/01/17
             if ( sw_scissor_renormalization == ON ) omega = omega + scissor
! ============= 2015/01/17

             vlocal(1:3,1:2)=trm(ik,iv,jc,1:3,1:2,1)
             vcorr(1:3,1:2)=trm(ik,iv,jc,1:3,1:2,2)
             vtotal=vlocal+vcorr
             if(printable) then
                write(nfout,'(/1x,i3," -> ",i3,2x," omega = ",f10.5,3x," real(imaginary)")') nbi,nbj,omega
                if(nonlocal==1) then
                   write(nfout,'(1x,16x," local ",17x," nonlocal ",17x," total ")')
                elseif(nppcorr>1) then
                   write(nfout,'(1x,16x," local ",15x," PP correction",15x," total")')
                else
                   write(nfout,'(1x,16x," local ",18x," total ")')
                end if
                if(nonlocal==1.or.nppcorr>1) then
                   write(nfout,'(1x,"  x  ",3(3x,f10.5,"(",f10.5,")"))') vlocal(1,1:2),vcorr(1,1:2),vtotal(1,1:2)
                   write(nfout,'(1x,"  y  ",3(3x,f10.5,"(",f10.5,")"))') vlocal(2,1:2),vcorr(2,1:2),vtotal(2,1:2)
                   write(nfout,'(1x,"  z  ",3(3x,f10.5,"(",f10.5,")"))') vlocal(3,1:2),vcorr(3,1:2),vtotal(3,1:2)
                else
                   write(nfout,'(1x,"  x  ",2(3x,f10.5,"(",f10.5,")"))') vlocal(1,1:2),vtotal(1,1:2)
                   write(nfout,'(1x,"  y  ",2(3x,f10.5,"(",f10.5,")"))') vlocal(2,1:2),vtotal(2,1:2)
                   write(nfout,'(1x,"  z  ",2(3x,f10.5,"(",f10.5,")"))') vlocal(3,1:2),vtotal(3,1:2)
                end if
             end if
             vtl=0.0d0
             do i = 1, 3
                do j = 1, 2
                   vtl=vtl+vtotal(i,j)**2
                end do
             end do
             vtl=dsqrt(vtl)
             if(printable) then
                write(nfout,'(1x,"|<",i3,"|r|",i3,">| = ",f10.5)') nbi,nbj,vtl
                write(nfout,'(1x,"|<",i3,"|p|",i3,">| = ",f10.5)') nbi,nbj,vtl*omega
             end if
          end do
       end do
    end do
  end subroutine tm_wd_core2val_ek
! ====================================================== 13.0S


 subroutine find_highest_valence_band(nfout,nhighest)
    implicit none
!
!   find highest condunction band index for each k-point
!
    integer, intent(in)  :: nfout
    integer, intent(out) :: nhighest
    integer              :: ik, nbi, nbj, nhighest0
    real(kind=DP)        :: ebi, ebj, e_highest
    do ik=1, kv3,af+1
       do nbi=1, neg
          do nbj=1, neg
             ebi = e2_mpi(n2_mpi(nbi,ik),ik)
             ebj = e2_mpi(n2_mpi(nbj,ik),ik)
             if(ebj.le.efermi.and.ebi.le.efermi) then
                e_highest=ebi
                nhighest0=nbi
                if(ebj.gt.e_highest) then
                   e_highest=ebj
                   nhighest0=nbj
                else
                   if(ebj==e_highest.and.nbj>nbi) then
                      nhighest0=nbj
                   end if
                end if
             end if
          end do
       end do
       nhighest=nhighest0
       if(ik/=1.and.nhighest/=nhighest0.and.printable) then
          write(nfout,'(1x,"!* index of highest valence band differs between k-points   m_Epsilon STOP")')
       end if
    end do
 end subroutine find_highest_valence_band

 subroutine FermiEnergyLevel_ek_here(nrd_efermi)
!
!   calculate fermi energy (efermi) if nrd_efermi=0
!
    integer             :: way_of_smearing
    integer, intent(in) :: nrd_efermi
    real(DP)            :: efermi1
    if(nrd_efermi == 1) then
       efermi1=efermi
       efermi=0.0d0
    end if
    way_of_smearing = m_CtrlP_way_of_smearing()
    if(way_of_smearing == PARABOLIC) then
!!$       write(nfout,'(" way_of_smearing = PARABOLIC <<FermiEnergyLevel_ek>>")')
       call m_ESoc_EPS_fermi_parabolic_ek(nfout)
    else if(way_of_smearing == TETRAHEDRON) then
!!$       write(nfout,'(" way_of_smearing = TETRAHEDRON <<FermiEnergyLevel_ek>>")')
       call m_ESoc_EPS_fermi_tetra_ek(nfout)

! ========================= KT_add ======================== 13.0E
    else if ( way_of_smearing == Fermi_Dirac ) then
       call m_ESoc_EPS_fermi_dirac_ek(nfout)
! ========================================================= 13.0E
    end if
    if(nrd_efermi ==0) then
       if(printable) write(nfout,10) efermi
    else
       if(printable) write(nfout,20) efermi1-efermi 
       efermi=efermi1
    end if
10 format(1x,"!*--- efermi = ",f10.5)
20 format(1x,"!*--- difference between read and calculated efermi = ",f10.5)
  end subroutine FermiEnergyLevel_ek_here

 subroutine occupation_ek(nf)
!
!   generate band index
!   a derivative of m_ESIO_wd_eigenValues
!
    integer, intent(in)                :: nf
    integer                            :: ispin, ik, ie, nb, jk, ib
    integer                            :: nspin_kt
    integer                            :: n_filled1, n_unfilled1, n_half_filled1
    integer                            :: ie_mpi
    real(DP)                           :: occ_mpi
    real(DP),allocatable, dimension(:,:)   :: e_mpi          ! MPI
    integer, allocatable, dimension(:) :: n_filled, n_unfilled, n_half_filled

    real(DP), allocatable :: occ_mpi_ek2(:,:)

! === KT_add ======= 2014/09/22
    if ( noncol ) then
       nspin_kt = 1
    else
       nspin_kt = nspin
    endif
! =================== 2014/09/22

! === KT_mod ======= 2014/09/22
!    allocate(n_filled(nspin)) ; n_filled = 0
!    allocate(n_unfilled(nspin)); n_unfilled = 0
!    allocate(n_half_filled(nspin)); n_half_filled = 0
!
    allocate(n_filled(nspin_kt)) ; n_filled = 0
    allocate(n_unfilled(nspin_kt)); n_unfilled = 0
    allocate(n_half_filled(nspin_kt)); n_half_filled = 0
! =================== 2014/09/22

    allocate(e_mpi(neg,kv3_ek))                         ! MPI
    e_mpi = 0.d0                                        ! MPI
    do ik = 1, kv3_ek                                   ! MPI
       do ie = 1, neg                                   ! MPI
          if(map_e(ie) /= myrank_e) cycle               ! MPI
          e_mpi(ie,ik) = occup_l_ek(map_z(ie),ik)       ! MPI
       end do                                           ! MPI
    end do                                              ! MPI
    call MPI_Allreduce(e_mpi,occ_mpi_ek,neg*kv3_ek,MPI_DOUBLE_PRECISION,MPI_SUM,mpi_kg_world,ierr)

    call calc_ispin_charge ! -> contained here
    call find_band_type_ek ! -> contained here

! nocc-> number of occupied band
! nhalffilled -> number of half-filled bands
! nunfilled -> number of unfilled bands

! ==== KT_mod =========== 2014/09/22
!    do ispin = 1, nspin
    do ispin = 1, nspin_kt
! ======================= 2014/09/22
       n_filled1=0
       n_half_filled1=0
       do ib=1,neg
          if(band_type(ib,ispin)==HALF_FILLED_BAND) then
             if(system==NON_METALLIC) system=METALLIC
             n_half_filled1 = n_half_filled1 + 1
          else if(band_type(ib,ispin)==FILLED_BAND) then
             n_filled1 = n_filled1 + 1
          end if
       end do
       n_filled(ispin) = n_filled1
       n_half_filled(ispin) = n_half_filled1
       n_unfilled(ispin) = neg - (n_filled1 + n_half_filled1)
    end do
   
    if(printable) then
       write(nfout,'(/2x,"---------- list of band numbers for each spin ----------")')
       write(nfout,'(/20x,"filled",4x,"half-filled",2x,"unfilled",2x,"number of electrons")')
! ========= KT_mod ====== 2014/09/22
!       do ispin = 1, nspin
       do ispin = 1, nspin_kt
! ======================= 2014/09/22
          write(nfout,'(2x,"ispin =",i3,3i12,7x,f10.5)') ispin, n_filled(ispin), n_half_filled(ispin), n_unfilled(ispin), &
         & spin_charge(ispin)
       end do
    end if

    call calc_tot_charge
    if(printable) &
    & write(nf,'(/1x," total number of electron in the system = ",f10.5)') tot_charge

    if(printable) then
       if(system==METALLIC) then 
          write(nf,'(1x," The system is metallic")')
       else
          write(nf,'(1x," The system is insulating or semiconducting")')
       end if

!       if(system==METALLIC) then
!          write(nf,'(/1x," The system is metallic")')
!          write(nf,'(1x," No. of filled bands = ",i5)') nocc
!          write(nf,'(1x," No. of half-filled bands  = ",i5)') nhalffilled
!          nunfilled=neg-(nocc+nhalffilled)
!          write(nfout,'(1x," No. of unfilled bands = ",i5)') nunfilled
!       else
!          write(nf,'(/1x," The system is insulating or semiconducting")')
!          write(nfout,'(1x," No. of occupiled bands = ",i5)') nocc
!          nunfilled=neg-nocc
!          write(nf,'(1x," No. of unfilled bands = ",i5)') nunfilled
!       end if
!       write(nf,'(1x," number of electron in ispin = ",i3,3x," = ",f10.5)') ispin, spin_charge(ispin)
    end if
 
! print occupation of bands
    if(ipriepsilon>=2) then
       if(printable) then
          write(nf,'(/," ======  Occupations ======")')
!
          if ( noncol ) then
             do ik = 1, kv3_ek, ndim_spinor
                call wd_k_points_noncl
                write(nf,'(5f16.8)') (occ_mpi_ek(n2_mpi_ek(nb,ik),ik),nb = 1, neg)
             end do
          else
             do ik = 1, kv3_ek
                call wd_k_points ! -(c.h.)
                write(nf,'(5f16.8)') (occ_mpi_ek(n2_mpi_ek(nb,ik),ik),nb = 1, neg)
             end do
          end if
       endif
    end if

    deallocate(e_mpi)

!!  deallocate(noc)
  contains

     subroutine wd_k_points
       if(nspin == 1) then
          write(nf,'(i6,3f16.8)') ik,(vkxyz_ek(ik,1:3,BUCS))
       else
          if(mod(ik,2) == 1) then
             write(nf,'(i6,"    UP ",3f16.8)') ik,(vkxyz_ek(ik,1:3,BUCS))
          else
             write(nf,'(i6,"  DOWN ",3f16.8)') ik,(vkxyz_ek(ik,1:3,BUCS))
          end if
       end if
     end subroutine wd_k_points

! ==== KT_add ==== 2014/09/22
     subroutine wd_k_points_noncl
       write(nf,'(i6,3f16.8)') ik,(vkxyz_ek(ik,1:3,BUCS))
     end subroutine wd_k_points_noncl
! ================= 2014/09/22

     subroutine calc_ispin_charge
! === KT_mod ================ 2014/09/22
!       do ispin = 1, nspin
       do ispin = 1, nspin_kt
! =========================== 2014/09/22
          do ik = ispin, kv3_ek-nspin+ispin, nspin
             do ib = 1, neg
                spin_charge(ispin)=spin_charge(ispin)+occ_mpi_ek(n2_mpi_ek(ib,ik),ik)
             end do
          end do
          spin_charge(ispin)=spin_charge(ispin)/real(kv3_ek/nspin,kind=DP)
       end do
     end subroutine calc_ispin_charge

     subroutine calc_tot_charge
       tot_charge=0.0d0
! === KT_mod ================ 2014/09/22
!       do ispin= 1, nspin
       do ispin = 1, nspin_kt
! =========================== 2014/09/22
          tot_charge =tot_charge+spin_charge(ispin)
       end do
     end subroutine calc_tot_charge

 end subroutine occupation_ek

 subroutine find_band_type_ek
    implicit none
!
!   set band index
!   parallelized version 
!   by T. Hamada (Univ. Tokyo), May 25, 2007
!
!   FILLED_BAND      : valence band
!   UNFILLED_BAND    : conducntion band
!   HALF_FILLED_BAND : metallic band
!
    integer       :: ispin, ik, ib
    integer       :: nspin_kt
    integer       :: band_type_mpi, ib_mpi
    real(kind=DP) :: eb, occ, band_ch1, band_ch_mpi
    real(kind=DP), allocatable, dimension(:,:)   :: band_ch
    character(len=11), allocatable, dimension(:,:) :: band_type_chr

    real(kind=DP), allocatable, dimension(:,:)   :: band_ch_mpi2
    integer, allocatable, dimension(:,:) :: band_type_mpi2

    logical       :: occupied

! ==== KT_add ==== 2014/09/22
    if ( noncol ) then
       nspin_kt = 1
    else
       nspin_kt = nspin
    endif
! ================ 2014/09/22

! ==== KT_mod ===== 2014/09/22
!    allocate(band_ch(neg,nspin)) ; band_ch =0.0d0
!    allocate(band_type_chr(neg,nspin))
!
    allocate(band_ch(neg,nspin_kt)) ; band_ch =0.0d0
    allocate(band_type_chr(neg,nspin_kt))
! ================= 2014/09/22

    do ispin = 1, nspin_kt
       
       do ib=1,neg

! === KT_mod ===== 13.1R
!          band_type(ib,ispin)=UNFILLED_BAND
!
          if ( icond == INITIAL .or. icond == CONTINUATION ) then
          else
             band_type(ib,ispin)=UNFILLED_BAND
          endif
! ================ 13.1R

             occupied=.false.
             do ik = ispin, kv3_ek-nspin+ispin, nspin
                occ=occ_mpi_ek(n2_mpi_ek(ib,ik),ik)
                if(occ>0.0d0) then
                   occupied=.true.
                   exit
                end if
             end do
             do ik = ispin, kv3_ek-nspin+ispin, nspin
                eb=eb_ek(ik,ib)
                if(eb<=efermi.and.occupied) band_type(ib,ispin)=FILLED_BAND
                if(eb>efermi.and..not.occupied) band_type(ib,ispin)=UNFILLED_BAND
                if(eb>efermi.and.occupied) band_type(ib,ispin)=HALF_FILLED_BAND
                if(band_type(ib,ispin)==HALF_FILLED_BAND) exit
             end do

             band_ch1 =0.0d0
             do ik = ispin, kv3_ek-nspin+ispin, nspin
                band_ch1=band_ch1+occ_mpi_ek(n2_mpi_ek(ib,ik),ik)
             end do
             band_ch(ib,ispin) = nspin*band_ch1/ real(kv3_ek,kind=DP)
       end do
    end do


    if(printable) then
       write(nfout,'(/1x," --------- list of band type and occupation ---------- ")')
       if(nspin_kt == 1) then
          write(nfout,'(2x,"ispin",4x,"band",4x,"map_e",6x,"type",6x,"occupation")')
       else
          write(nfout,'(2(2x,"ispin",4x,"band",3x,"map_e",6x,"type",6x,"occupation",3x))')
       end if
    end if

    do ispin = 1, nspin_kt
       do ib = 1, neg
          if(band_type(ib,ispin)==FILLED_BAND)      band_type_chr(ib,ispin) = "     filled"
          if(band_type(ib,ispin)==HALF_FILLED_BAND) band_type_chr(ib,ispin) = "half-filled"
          if(band_type(ib,ispin)==UNFILLED_BAND)    band_type_chr(ib,ispin) = "   unfilled"
       end do
    end do
    if(printable) then
       do ib = 1, neg
          write(nfout,'(2(1x,i5,3x,i5,3x,i5,3x,a11,3x,f10.5,3x))') &
          & (ispin, ib, map_e(ib), band_type_chr(ib,ispin), band_ch(ib,ispin), ispin = 1, nspin_kt)
       end do
    end if

    deallocate(band_type_chr)
    deallocate(band_ch)
 end subroutine find_band_type_ek

 subroutine check_band_energy_range(nf)
    implicit none
!
!   find min and max of direct band transition energy
!
    integer, intent(in) :: nf
    integer :: ik, ispin, ispin_max, ispin_min, nspin_kt
    real(kind=DP) :: evb_ik, ect_ik, evt_ik, ecb_ik, edirect, eprange
    real(kind=DP) :: edmax, edmin

    eprange = e_high - e_low
    edmax = 1.0d-10
    edmin = 1.0d10

! ==== KT_add ==== 2014/09/22
    if ( noncol ) then
       nspin_kt = 1
    else
       nspin_kt = nspin
    endif
! ================ 2014/09/22

! === KT_mod ====== 2014/09/22
!    do ispin = 1, nspin
    do ispin = 1, nspin_kt
! ================= 2014/09/22
       do ik = ispin, kv3_ek-nspin+ispin, nspin
          call find_vb_bottom_energy           ! -> contained here
          call find_cb_top_energy              ! -> conatined here
          call find_vb_top_energy              ! -> contained here
          call find_cb_bottom_energy           ! -> contained here
          edirect = ect_ik - evb_ik
          if(edirect > edmax) edmax = edirect
          edirect = ecb_ik - evt_ik
          if(edirect < edmin) edmin = edirect
       end do
       edmax_nspin(ispin) = edmax
       edmin_nspin(ispin) = edmin
   end do

! === KT_mod ====== 2014/09/22
!   if(nspin == 1) then
   if(nspin_kt == 1) then
! ================= 2014/09/22
      ispin = 1
      edmax = edmax_nspin(ispin)
      edmin = edmin_nspin(ispin)
      ispin_max = 1
      ispin_min = 1
   else
       edmax = 1.0d-10
       edmin = 1.0d10
       do ispin = 1, nspin
          if(edmax_nspin(ispin) > edmax) then
             edmax = edmax_nspin(ispin)
             ispin_max = ispin
          end if
          if(edmin_nspin(ispin) < edmin) then
             edmin = edmin_nspin(ispin)
             ispin_min = ispin
          end if
       end do
   end if

   if(printable) then
       write(nf,'(/,"---------- band transition energy range check ----------")')
   end if
   if(edmax > eprange) then
     if(printable) then
          write(nf,'(/1x,"!* WARNIG!! : photon energy range is insufficient")')
          write(nf,'(1x,"!* photon energy range does not cover all possible excitations")')
          write(nf,'(1x,"!* highest photon energy = ",f10.5)') eprange
       end if
    else
       if(printable) then
          write(nf,'(1x,"!* photon energy range is sufficient")')
       end if
    end if
    if(printable) write(nf,'(1x,"!* lowest direct band transition energy  = ",f10.5,3x," ispin = ",1x,i3)') edmin, ispin_max
    if(printable) write(nf,'(1x,"!* highest direct band transition energy = ",f10.5,3x," ispin = ",1x,i3)') edmax, ispin_min
    contains
     subroutine find_vb_bottom_energy
       evb_ik =eb_ek(ik,1)
     end subroutine find_vb_bottom_energy
     
     subroutine find_cb_top_energy
       ect_ik =eb_ek(ik,neg)
     end subroutine find_cb_top_energy

     subroutine find_vb_top_energy
       integer :: ib
       real(kind=DP) :: eig
       evt_ik = -1.0d10
       do ib = 1, neg
          if(band_type(ib,ispin) == UNFILLED_BAND) cycle
          eig =eb_ek(ik,ib)
          if(eig>evt_ik) evt_ik=eig
       end do
     end subroutine find_vb_top_energy

     subroutine find_cb_bottom_energy
       integer :: ib
       real(kind=DP) :: eig
       ecb_ik = 1.0d10
       do ib = 1, neg
          if(band_type(ib,ispin) == UNFILLED_BAND) then
             eig =eb_ek(ik,ib)
             if(eig<ecb_ik) ecb_ik=eig
          end if
       end do
     end subroutine find_cb_bottom_energy
 end subroutine check_band_energy_range

 subroutine band_gap(nf)
    implicit none
!
!   find band gap energy and type(direct or indirect)
!
!   ik_vb_top    -> valence band top k-point
!   ib_vb_top    -> valence band top ib index
!   ik_cb_bottom -> conduction band bottom k-point
!   ib_cb_bottom -> conduction band bottom ib index
! 
    integer, intent(in) :: nf
! 2005.01.25
!   integer :: ik, ib, ik_vb_top, ib_vb_top, ik_cb_bottom, ib_cb_bottom
    integer :: ik, ib, ndeg
! 2005.01.25
    real(kind=DP) :: eig, emin, emax, vb_top_energy, cb_bottom_energy
    real(kind=DP) :: eps
    eps = 10.0d-8

    if(system == METALLIC) return
    if(printable) write(nf,'(/1x,"---------- band gap ---------")')
!  find valence band top
    ik_vb_top = 0
    ib_vb_top = 1
    emax = -1.0d30

! ==== KT_mod ====== 2014/09/22
!    do ik = 1, kv3_ek
    do ik = 1, kv3_ek, ndim_spinor
! ================== 2014/09/22
       do ib = 1, neg
          eig = eb_ek(ik,ib)
          if(eig>efermi) cycle
          if(eig>emax-eps) then
             ik_vb_top = ik
             ib_vb_top = ib 
             emax = eig
          end if
       end do
    end do
    vb_top_energy = emax
    if(ik_vb_top==0) then
       if(printable) write(nf,'(1x,"!* valence band top is not found.   UVSOR-Epsilon stop")')
    else
       if(printable) write(nf,'(1x,"!* valence band top        : ik = ",i5,1x," ib = ",i5,3x," energy = ",f10.5," Hartree")') &
      & ik_vb_top, ib_vb_top, vb_top_energy
    end if
    call check_eigdeg(ik_vb_top,ib_vb_top,ndeg,eps)
    ndeg_vb_top = ndeg
    if(printable) write(nfout,'(1x,"!* degeneracy = ",i3)') ndeg_vb_top

!  find conduction band top
    ik_cb_bottom = 0
    ib_cb_bottom = neg
    emin = 10.0d17

! ==== KT_mod ====== 2014/09/22
!    do ik = 1, kv3_ek
    do ik = 1, kv3_ek, ndim_spinor
! ================== 2014/09/22
       do ib = 1, neg
          eig = eb_ek(ik,ib)
          if(eig<=efermi) cycle
          if(eig<=emin-eps) then
             ik_cb_bottom = ik
             ib_cb_bottom = ib
             emin = eig
          end if
       end do
    end do
    cb_bottom_energy = emin
    if(ik_cb_bottom==0) then
       if(printable) write(nf,'(1x,"!* conduction band bottom is not found.   UVSOR-Epsilon stop")')
    else
       if(printable) write(nf,'(1x,"!* conduction band bottom  : ik = ",i5,1x," ib = ",i5,3x," energy = ",f10.5," Hartree")') &
      & ik_cb_bottom, ib_cb_bottom, cb_bottom_energy
    end if
    call check_eigdeg(ik_cb_bottom,ib_cb_bottom,ndeg,eps)
    ndeg_cb_bottom = ndeg
    if(printable) write(nfout,'(1x,"!* degeneracy = ",i3)') ndeg_cb_bottom

! find band_gap and its type
    band_gap_energy = cb_bottom_energy - vb_top_energy
    if(ik_vb_top == ik_cb_bottom) then
       if(printable) &
     & write(nf,'(1x,"!* band gap = ",f10.5," Hartree",3x,"type = direct",/)') band_gap_energy
    else
       if(printable) &
     & write(nf,'(1x,"!* band gap = ",f10.5," Hartree",3x,"type = indirect",/)') band_gap_energy
    end if
    if(scissor /= 0.0d0) then
       if(printable) write(nf,'(1x,"!* scissor operater = ",f10.5)') scissor
       if(ik_vb_top == ik_cb_bottom) then
          if(printable) &
        & write(nf,'(1x,"!* corrected band gap = ",f10.5," Hartree",3x,"type = direct",/)') band_gap_energy + scissor
       else
          if(printable) &
        & write(nf,'(1x,"!* corrected band gap = ",f10.5," Hartree",3x,"type = indirect",/)') band_gap_energy + scissor
       end if
    end if
 end subroutine band_gap

 subroutine check_eigdeg(ik,ib,ndeg,eps)
    implicit none
    integer, intent(in)  :: ik, ib
    integer, intent(out) :: ndeg
    integer              :: ib1
    real(DP), intent(in) :: eps
    real(DP)             :: eb, eb1, delta
    ndeg = 0
    eb = eb_ek(ik,ib)
    do ib1 = 1, neg
       eb1 = eb_ek(ik, ib1)
       delta = dabs(eb1-eb)
       if(delta<=eps) ndeg = ndeg+1
    end do
 end subroutine check_eigdeg
 
 subroutine wd_os_str_ek(nfout)
    implicit none
!
!   print transition moment product matrix of each k-point in IBZ
!
!   ind : transition matrix product index -> see comments for major variables
!        
    integer, intent(in)                       :: nfout
    integer                                   :: nk_local0, nk_local
    integer                                   :: ispin, ik, ik2, i, j, ind, id, iv, ic, ifind
    integer                                   :: nvband, ncband, ipes, iksta, ikend
    integer                                   :: nspin_kt
    real(kind=DP),dimension(6)                :: sum_os
    real(kind=DP),allocatable, dimension(:,:) :: dsum_os
    real(kind=DP),allocatable, dimension(:)   :: spin_os
    real(kind=DP),dimension(3)                :: vk0,vk2
    real(kind=DP)                             :: ei, ej, fi, fj, sum_fi, tot_osci
    real(kind=DP)                             :: weight, ctmp
    real(kind=DP),allocatable, dimension(:,:,:,:) :: os_str_wk

! ==== KT_add ====== 2014/09/22
    if ( noncol ) then
       nspin_kt = 1
    else
       nspin_kt = nspin
    endif
! ================== 2014/09/22

! ==== KT_mod ====== 2014/09/22
!    allocate(dsum_os(3,nspin)); dsum_os=0.0d0
!    allocate(spin_os(nspin)); spin_os=0.0d0

    allocate(dsum_os(3,nspin_kt)); dsum_os=0.0d0
    allocate(spin_os(nspin_kt)); spin_os=0.0d0
! ================== 2014/09/22

!    if(printable) then
    if(printable .and. ipriepsilon>=2 ) then
       write(nfout,10)
       write(nfout,'(1x,"!* nspin = ",i3)') nspin
       write(nfout,'(1x,"!* kv3_ek = ", i4)') kv3_ek
       write(nfout,'(1x,"!* kv3_ek/nspin = ",i3)') kv3_ek/nspin
    end if

! MPI
    nk_local = kv3_ek/npes
    nk_local0 = nk_local + (kv3_ek-nk_local*npes)
    if(nrd_efermi == 0) then
       nvband = neg
       ncband = neg
    else
       nvband = num_vb
       ncband = num_cb
    end if

! CHanged to the original code 2011,12,12, T Hamada
! -->>  T. Yamasaki 26 Feb. 2008
!    if(way_BZintegral==L_TETRAHEDRON) then
!       if(npes >= 2) then
!          allocate(os_str_wk(kv3_ek,nvband,ncband,6))
!          os_str_wk = 0.d0
!          call mpi_allreduce(os_str,os_str_wk,kv3_ek*nvband*ncband*6,mpi_double_precision &
!               & , mpi_sum,mpi_comm_group,ierr)
!          os_str = os_str_wk
!          deallocate(os_str_wk)
!          if(ipri >= 1) write(nfout,'(1x,"!*MPI ostr(",i4,":",i4,") data have been allreduced")') 1, kv3_ek
!       end if
!    else
! === DEBUG by tkato 2013/10/21 ================================================
#if 0
! ==============================================================================
       allocate(os_str_wk(nk_local,nvband,ncband,6))
       if(mype == 0) then
          do ipes = 2, npes
             iksta = 1 + nk_local0 +(ipes-2)*nk_local
             ikend = iksta + nk_local -1
             call mpi_recv(os_str_wk,nk_local*nvband*ncband*6,mpi_double_precision,ipes-1,1,mpi_comm_group,istatus,ierr)
             os_str(iksta:ikend,1:nvband,1:ncband,1:6) = os_str_wk(1:nk_local,1:nvband,1:ncband,1:6)
!             if(ipri >= 1) then
             if(ipriepsilon >= 2) then
                write(nfout,'(1x,"!*MPI ostr(",i4,":",i4,") data have been sent from ipes = ",i4)') iksta, ikend, ipes-1
             endif
          end do
       else
          iksta = 1 + nk_local0 +(mype-1)*nk_local
          ikend = iksta + nk_local -1
          os_str_wk(1:nk_local,1:nvband,1:ncband,1:6) = os_str(iksta:ikend,1:nvband,1:ncband,1:6)
          call mpi_send(os_str_wk,nk_local*nvband*ncband*6,mpi_double_precision,0,1,mpi_comm_group,ierr)
          if ( ipriepsilon >=2 ) then
             write(nfout,'(1x,"!*MPI os_str(",i4,":",i4,") data have been sent to ipes = 0")') iksta, ikend
          endif
       end if
       deallocate(os_str_wk)
! === DEBUG by tkato 2013/10/21 ================================================
#else
       call MPI_Allreduce(MPI_IN_PLACE,os_str,size(os_str),MPI_DOUBLE_PRECISION,MPI_MAX,mpi_comm_group,ierr)
#endif
! ==============================================================================
!    end if
! <<--  T. Yamasaki 26 Feb. 2008
! Changed to the original code 2011,12,12 T Hamada

! ========== KT_mod ======== 2014/09/22
!    do ispin = 1, nspin
    do ispin = 1, nspin_kt
! ========================== 2014/09/22
!       if(printable) then
       if (printable .and. ipriepsilon>=2 ) then
          if(ispin == 1) write(nfout,20)
          write(nfout,'(1x,"ispin = ", i3)') ispin
       end if

       if(way_BZintegral==L_TETRAHEDRON) then
          do ik=ispin, kv3_ek-nspin+ispin, nspin
! === KT_add ===== 2014/09/22
             if ( noncol ) then
                weight = kv3_ek *qwgt_ek(ik) /dble(ndim_spinor)
             else
                weight = kv3_ek *qwgt_ek(ik)
             endif
! ================== 2014/09/22
             sum_os(1:6)=0.0d0
             sum_fi=0.0d0

             call calc_sum_fi(ik)
             do ind=1,6
                do i=1,neg
                   ei=eb_ek(ik,i)
                   do j=1,neg
                      ej=eb_ek(ik,j)
                      if(ei<=efermi.and.ej>efermi) then
                         call find_ind_vb_and_cb(i,j,iv,ic,ik,ifind)
                         if(ifind/=1) then
                            if(printable) &
                           & write(nfout,'(2x,"- conduction or valence band index is not found  &
                           & m_Epsilon_ek STOP at wd_os_str_ek ")')
                            stop
                         end if
                         fi=occ_mpi_ek(n2_mpi_ek(i,ik),ik) /weight
                         fj=occ_mpi_ek(n2_mpi_ek(j,ik),ik) /weight
                         sum_os(ind) = sum_os(ind) &
                              &       +os_str(ik,iv,ic,ind) *fi *(1.0d0-fj) *qwgt_ek(ik)
                      end if
                   end do
                end do
                if(ind<=3) dsum_os(ind,ispin) = dsum_os(ind,ispin) +sum_os(ind)/sum_fi
             end do
!             if(printable) then
             if (printable .and. ipriepsilon>=2 ) then
                write(nfout,30) ik,ispin,(sum_os(i),i=1,6),sum_fi
             endif
          end do

       else
          do ik = ispin, kv3_ek-nspin+ispin, nspin
! === KT_add ===== 2014/09/22
             if ( noncol ) then
                weight = kv3_ek *qwgt_ek(ik) /dble(ndim_spinor)
             else
                weight = kv3_ek *qwgt_ek(ik)
             endif
! ================== 2014/09/22
             sum_os(1:6)=0.0d0
             sum_fi=0.0d0

             call calc_sum_fi(ik)
             do ind=1,6
                do i=1,neg
                   ei=eb_ek(ik,i)
                   do j=1,neg
                      ej=eb_ek(ik,j)
                      if(ei<=efermi.and.ej>efermi) then
                         call find_ind_vb_and_cb(i,j,iv,ic,ik,ifind)
                         if(ifind/=1) then
                            if(printable) &
                           & write(nfout,'(2x,"- conduction or valence band index is not found  &
                           & m_Epsilon_ek STOP at wd_os_str_ek ")')
                            stop
                         end if
                         fi = occ_mpi_ek(n2_mpi_ek(i,ik),ik) /weight
                         fj = occ_mpi_ek(n2_mpi_ek(j,ik),ik) /weight
                         sum_os(ind) = sum_os(ind) &
                              &       +os_str(ik,iv,ic,ind) *fi *(1.0d0-fj) *qwgt_ek(ik)
                      end if
                   end do
                end do
                if(ind<=3) dsum_os(ind,ispin) = dsum_os(ind,ispin) +sum_os(ind)/sum_fi
             end do
!             if(printable) then
             if (printable .and. ipriepsilon>=2 ) then
                write(nfout,30) ik,ispin,(sum_os(i),i=1,6),sum_fi
             endif
          end do
       end if

       spin_os(ispin)=(dsum_os(1,ispin)+dsum_os(2,ispin)+dsum_os(3,ispin))/3.0d0
       if(printable) then
         write(nfout,'(1x,"!* sum of weighted oscillator strength of k-points in irreducible Brillouin zone = ",f10.5)') &
       & spin_os(ispin)
          write(nfout,'(1x,"!* oscillator strength per electron = ",f10.5/)') &
        & spin_os(ispin)
       end if
    end do
   
! ===== KT_mod ===== 2014/09/22
!    if(nspin > 1) then
    if(nspin_kt > 1) then
! ================== 2014/09/22
       call calc_total_osci
       if(printable) then
          write(nfout,'(1x," total oscillator strength = ",f10.5)') tot_osci
          write(nfout,'(1x," total oscillator strength per electron = ",f10.5)') tot_osci
       end if
    end if

! ===== KT_mod ===== 2014/09/22
!    if(nspin > 1) call set_major_and_minor_spin
    if(nspin_kt > 1) call set_major_and_minor_spin
! ================== 2014/09/22
 
    deallocate(dsum_os)
    deallocate(spin_os)

 10 format(/1x,"!* ----- weighted transition moment square of each k-point in irreducible Brillouin zone -----")
 20 format(20x,"ispin",4x,"xx",8x,"yy",8x,"zz",8x,"xy",8x,"xz",8x,"yz",5x,"valence electron")
 30 format(2x,"k-point = ",i4,4x,i3,1x,6f10.5,2x,f10.5)

  contains

     subroutine calc_sum_fi(ik)
       integer, intent(in) :: ik

       integer :: i
       real(kind=DP) :: weight

       if ( noncol ) then
          weight = kv3_ek*qwgt_ek(ik) /dble(ndim_spinor)
       else
          weight = kv3_ek*qwgt_ek(ik)
       endif

       sum_fi=0.0d0
       do i=1, neg
          ei=eb_ek(ik,i)
          if(ei<=efermi) then
             sum_fi = sum_fi +occ_mpi_ek(n2_mpi_ek(i,ik),ik) /weight
          end if
       end do
     end subroutine calc_sum_fi

     subroutine calc_total_osci
       tot_osci=0.0d0
       do ispin  = 1, nspin
           tot_osci=tot_osci+spin_os(ispin)
       end do
     end subroutine calc_total_osci

     subroutine set_major_and_minor_spin
       integer       :: mispin, mjspin
       real(kind=DP) :: chg, chg1
       chg=spin_charge(1)
       mjspin=1
       mispin=2
       do ispin = 2, nspin
          chg1=spin_charge(ispin)
          if(chg1>chg) then
             chg=chg1
             mispin=mjspin
             mjspin=ispin
          end if
       end do
       major_spin=mjspin
       minor_spin=mispin
       if(printable) &
       & write(nfout,'(1x,"!* major spin = ",i3,3x," minor spin = ",i3)') major_spin, minor_spin
     end subroutine set_major_and_minor_spin
 end subroutine wd_os_str_ek

 subroutine gen_vk0xyz_ek(nsym)
    implicit none
!
!   generate k-points of linear tetrahedron calculation
!   a derivative of gen_vkxyz in m_Epsilon.f90.
!   T. Hamada(Univ. Tokyo) 2003.08.11
!
    integer,intent(in)    :: nsym
    integer               :: nxx,nyy,nzz,npx,npy,npz,ni
    integer               :: ip0,ix,iy,iz,kx,ky,kz
    integer               :: i
    real(DP),dimension(3) :: ka,kb,kc
    kv3_in_the_ek_process=kv3
    if(nbztyp/=1) then
        call gen_vk0xyz_Core(nbztyp_spg,altv,nkx,nky,nkz &
                            & ,nfout,ipri &
                            & ,rltv,vk0xyz,nfkpgn,nfspg,ipri_kp,ipri_spg &
                            & ,np0)
        call find_k0_point_op_ek
        if(nsym/=0) call find_k0_point_sym_ek

        if(ipri >= 2.and.printable) then
             write(nfout,'(/,"  << vk0xyz >>")')
             write(nfout,*) ' !np0 = ',np0
             do i = 1, np0
                 write(nfout,'(i4," ",3f12.6)') i,(vk0xyz(i,ni),ni=1,3)
             end do
        end if
    else

       call gen_vk0xyz_nbztyp_eq_1_case
   
    end if
    contains
     subroutine gen_vk0xyz_nbztyp_eq_1_case
       integer :: i

       nxx=nxyz_tetra(1)
       nyy=nxyz_tetra(2)
       nzz=nxyz_tetra(3)
       npx=nxx+1
       npy=nyy+1
       npz=nzz+1
       do i=1,3
          ka(i)=rltv(i,1)/nxx ! kx: unit vector in a direction
          kb(i)=rltv(i,2)/nyy ! ky: unit vector in b direction
          kc(i)=rltv(i,3)/nzz ! kz: unit vector in c direction
       end do
       do iz=0,nzz-1
          do iy=0,nyy-1
             do ix=0,nxx-1
                ni=npx*(npy*iz+iy)+ix
                do kz=1,2
                   do ky=1,2
                      do kx=1,2
                         ip0=ni+npx*(npy*(kz-1)+ky-1)+kx
                         vk00xyz(ip0,1)=vkxyz_ek(ip20(1),1,CARTS)&
                        & +(ix+kx-1)*ka(1)+(iy+ky-1)*kb(1)+(iz+kz-1)*kc(1)
                         vk00xyz(ip0,2)=vkxyz_ek(ip20(1),2,CARTS)&
                        & +(ix+kx-1)*ka(2)+(iy+ky-1)*kb(2)+(iz+kz-1)*kc(2)
                         vk00xyz(ip0,3)=vkxyz_ek(ip20(1),3,CARTS)&
                        & +(ix+kx-1)*ka(3)+(iy+ky-1)*kb(3)+(iz+kz-1)*kc(3)
                      end do
                   end do
                end do
             end do
          end do
       end do 
   end subroutine gen_vk0xyz_nbztyp_eq_1_case
 end subroutine gen_vk0xyz_ek
 
 subroutine find_k0_point_op_ek
    implicit none
!   find rotation operation for k-points
!   k-points are generated from their equivalent points in IBZ by the operation
!   vk0xyz :: k-point coordinate array (in cartesian)
!   vxyz   :: coordinate array of k-point in IBZ
!   iopr   :: rotation operation index
!
    integer                     :: ispin, ik, ik2, iopr
    real(kind=DP), dimension(3) :: vk0,vk2
    do ispin = 1, nspin
       do ik=1,np0
! set vk0 and vk2
          vk0(1:3)=vk0xyz(ik,1:3)
          ik2=nspin*(ip20(ik)-1)+ispin
          vk2(1:3)=vkxyz_ek(ik2,1:3,CARTS)
! find iopr by which vk2 is transformed to vk0
          call find_k0_point_op_core(vk0,vk2,iopr)
          if(iopr/=0) then
             vk0_op(ik)=iopr
          else
             if(printable) write(nfout,130) ik
             stop
          end if
       end do
    end do

! print out iopr of each vk0
    if(ipriepsilon >=2.and.printable) then
       write (nfout,100)
       write (nfout,105)
       write (nfout,110)
       write (nfout,120) (ik,ip20(ik),vk0_op(ik),ik = 1, np0)
       write (nfout,140)
    end if

100 format(2x,"----------------------------- k-point mapping table -----------------------------")
105 format(2x," k(ip0)=op(iopr)*k(ip2)",3x," k is k-point vector",3x," op is operation matrix")
110 format(6x,3("ip0",5x,"ip2",4x,"iopr",10x))
120 format(3(2x,3(i6,2x),3x))
130 format(2x,"k-point operation error at",i4,"-th k-point")
140 format(2x,"---------------------------------------------------------------------------------")
 end subroutine find_k0_point_op_ek

 subroutine find_k0_point_op_core(vk0,vk2,iopr)
    implicit none
    integer                    :: iopr,iop,ind
    integer                    :: ia,ib,ic,ig
    real(kind=DP),dimension(3) :: gk,vk0,vk2,wk1,wk2,basis
! initialize iopr
    iopr=0
! obtain gamma point coordinate gk(0,0,0)
    call gamma_basis(0,0,0,gk)
! obtain gamma point coordinate basis(ia,ib,ic)
    do ia=-3,3
       do ib=-3,3
          do ic=-3,3
             call gamma_basis(ia,ib,ic,basis)
! rotate vk2 around gamma point vk2->wk1
             do iop=1,nopr
                call rotate_k_point(vk2,gk,wk1,iop)
! calculate vk2+basis = wk1
                wk1=wk1+basis
! compare wk1 and vk0
                call compare_k_point(vk0,wk1,ind)
! if(wk1==vk0) set iopr and exit
                if(ind/=0) then
                   iopr=iop
                   exit
                end if
             end do
             if(iopr/=0) exit
          end do
          if(iopr/=0) exit
       end do
      if(iopr/=0) exit
   end do
 end subroutine find_k0_point_op_core
  
 subroutine find_k0_point_sym_ek
    implicit none
    integer :: i , j, n, ispin, ik, ik2
    integer :: iopr, iop, nop_ik, ind
!
! find symmetry of k-points in IBZ.   The symmetry opetation does not change k-point position
! ik: k-point index
! nopr_k : number of symmetry operation
! op_k(ik,j) : j-th symmetry matrix of ik-th k-point
!
     ispin = 1
     do ik=1,np2
        nop_ik=0
        ik2=nspin*(ik-1)+ispin
        do iopr=1,nopr
           call find_k0_point_sym_ek_core(ik2,iopr,iop)
           if(iop/=0) then
              nop_ik=nop_ik+1
              op_k(ik,nop_ik)=iop
           end if
        end do
        nopr_k(ik)=nop_ik
     end do

     if(ipriepsilon>=2.and.printable) then
        write(nfout,'(/2x,"------------------------------ symmetry of k-point ------------------------------")')
        do ik=1,np2
           write(nfout,10) ik
           write(nfout,20) nopr_k(ik)
           write(nfout,30) (op_k(ik,j),j=1,nopr_k(ik))
        end do
        write(nfout,'(/2x,"---------------------------------------------------------------------------------")')
     end if
10 format(/2x,"ip2 k-point=",1x,i4)
20 format(2x,"nopr=",1x,i4)
30 format(2x,20i4)
 end subroutine find_k0_point_sym_ek

 subroutine find_k0_point_sym_ek_core(ik,iopr,iop)
    implicit none
!
! subroutine for k-point symmetry analysis
! T. Hamada(Univ. Tokyo) 2003.07.11
! modified for m_Epsilon_ek by T.Hamada 2003.08.11
!
! ik: k-point index(ip2 system)
! iopr: index of roation symmetry under consideration
! iop: k-point symmetry index
!     iop=iopr: k-point has rotation symmetry of iopr
!     iop=0   : k-point has no rotation symmetry of iopr
!
    integer                    :: ik,iopr,iop
    integer                    :: ind,ig
    real(kind=DP),dimension(3) :: vk2,vk2_op,basis,basis_op
    real(kind=DP)              :: vlen
! initialize parameters
    iop=0
! set k-point
    vk2(1:3)=vkxyz_ek(ik,1:3,CARTS)
! set gamma point symmetry
    vlen=vk2(1)**2+vk2(2)**2+vk2(3)**2
    if(vlen==0.0d0) then
       iop=iopr
       return
    else
! find non-gammer point symmetry
       basis=0.0d0
       call rotate_k_point(vk2,basis,vk2_op,iopr)
       call comp_k_point_with_translation(vk2,vk2_op,ind)
       if(ind/=0) then
          iop=iopr
          return
       end if
       call rotate_k_point(basis,vk2,basis_op,iopr)
       call check_gamma_point(basis_op,ind)
       if(ind/=0) then
          iop=iopr
          return
        end if
    end if
 end subroutine find_k0_point_sym_ek_core

 subroutine rotate_k_point(a,b,c,iop)
    implicit none
! subroutine rotating k-point
! T. Hamada (Univ. Tokyo) 2003.07.11
! a: k-point coordinate in cartesian (before rotation)
! c: k-point coordinate in cartesian (after rotation)
! b: coordinate of rotation origin in cartesian
! iop: index of rotation operation
    integer                    :: i,iop
    real(kind=DP),dimension(3) :: a,b,c,wk1,wk2,wk3
    wk1=a-b
    do i=1,3
       wk2(i)=op(i,1,iop)*wk1(1)+op(i,2,iop)*wk1(2)+op(i,3,iop)*wk1(3)
    end do
    wk3=wk2+b
    c=wk3
 end subroutine rotate_k_point

 subroutine compare_k_point(a,b,ind)
    implicit none
! subroutine for k-point comparing
! T. Hamada(Univ. Tokyo) 2003.07.11
! a: k-point 1 coordinate in cartesian
! b: k-point 2 coordinate in cartesian
! ind: comparison index
!       ind=1: k-point 1 = k-point 2
!       ind=0: k-point 1/= k-point 2
    integer :: i,ind
    real(kind=DP),dimension(3) :: a,b
    real(kind=DP)              :: eps,dlen
    ind=0
    eps=10.0d0**(-7)
    dlen=0.0d0
    do i=1,3
       dlen=dlen+(a(i)-b(i))**2
    end do
    if(dlen<=eps) ind=1
 end subroutine compare_k_point

 subroutine comp_k_point_with_translation(a,b,ind)
    implicit none
! subroutine for k-point comparing
! T. Hamada(Univ. Tokyo) 2003.07.11
! a: k-point 1 coordinate in cartesian
! b: k-point 2 coordinate in cartesian
! ind: comparison index
!       ind=1: k-point 1 = k-point 2
!       ind=0: k-point 1/= k-point 2
    integer                    :: i,ind
    integer                    :: ia,ib,ic
    real(kind=DP),dimension(3) :: a,b,basis
    ind=0
    do ia=-2,2
       do ib=-2,2
          do ic=-2,2
             call gamma_basis(ia,ib,ic,basis)
             b=b+basis
             call compare_k_point(a,b,ind)
             if(ind/=0) exit
          end do
          if(ind/=0) exit
       end do
       if(ind/=0) exit
    end do
 end subroutine comp_k_point_with_translation

 subroutine check_gamma_point(a,ind)
    implicit none
! subroutine for k-point analysis
! T. Hamada (Univ. Tokyo)  2003.07.11
! a: k-point coordinate in cartesian
! ind: k-point symmetry index
!       ind=1: k-point is gamma point
!       ind=0: k-point is non-gamma point
    integer                    :: ind
    integer                    :: i,ia,ib,ic
    real(kind=DP),dimension(3) :: a,b
    do ia=-2,2
       do ib=-2,2
          do ic=-2,2
             call gamma_basis(ia,ib,ic,b)
             call compare_k_point(a,b,ind)
             if(ind/=0) exit
          end do
         if(ind/=0) exit
       end do
       if(ind/=0) exit
    end do
 end subroutine check_gamma_point

 subroutine gamma_basis(ia,ib,ic,gamma)
    implicit none
! subroutine for calculating gamma point coordinate
! T. Hamada (Univ. Tokyo) 2003.07.11
! ia,ib,ic: gamma point index
! gamma: gamma point coordinate in cartesian
    integer                    :: ia,ib,ic
    real(kind=DP),dimension(3) :: gamma
    integer :: i
    do i=1,3
       gamma(i)=real(ia,kind=DP)*rltv(i,1)+real(ib,kind=DP)*rltv(i,2)+real(ic,kind=DP)*rltv(i,3)
    end do
 end subroutine gamma_basis

 subroutine os_moment_ek(nfout,ispin,ik,ebi,ebj,ni,nj,nsym)
    implicit none
!
! calculation of transition moment product
! Tomoyuki Hamada
! University of Tokyo, Mar. 12, 2003
!
    integer, intent(in)       :: nfout, ispin, ik, ni, nj, nsym
    integer                   :: ik2, ixyz, iv, ic, ifind
    real(kind=DP), intent(in) :: ebi, ebj
    real(DP)                  :: value(3,2)

    value=0.0d0

    if(way_BZintegral==L_TETRAHEDRON) then
       ik2=nspin*(ip20(ik)-1)+ispin

! ======== KT_mod ============== 13.0S
!       call find_ind_vb_and_cb(ni,nj,iv,ic,ik2,ifind)
!       if(ifind/=1.and.printable) then
!          write(nfout,'(1x,"!!* index of valence or conduction band is not found   UVSOR-Epsilon STOP at os_moment_ek")')
!         call find_ind_vb_and_cb(ni,nj,iv,ic,ik2,ifind)
!       endif
!
       if ( sw_corelevel_spectrum == ON ) then
          call find_ind_cb_only(nj,ic,ik2,ifind)
          iv = ni
          if(ifind/=1.and.printable) then
             write(nfout,'(1x,"!!* index of conduction band is not found   UVSOR-Epsilon STOP at os_moment_ek")')
          end if
       else
          call find_ind_vb_and_cb(ni,nj,iv,ic,ik2,ifind)
          if(ifind/=1.and.printable) then
             write(nfout,'(1x,"!!* index of valence or conduction band is not found   UVSOR-Epsilon STOP at os_moment_ek")')
          end if
       endif
! =============================== 13.0S

       value(1:3,1:2)=trm(ik2,iv,ic,1:3,1:2,1)+trm(ik2,iv,ic,1:3,1:2,2)
! ==== KT_add === 2014/09/22
       if ( noncol ) then
          value(1:3,1:2) = value(1:3,1:2) +trm(ik2+1,iv,ic,1:3,1:2,1) &
               &                          +trm(ik2+1,iv,ic,1:3,1:2,2)
       endif
! =============== 2014/09/22

! -->> T. Yamasaki 26 Feb. 2008
!!$       if( ik2 == kv3_ek) then
!!$          write(nfout,'(" value(1:3,1:2) for (ik=",i3,",ik2=",i3," iv=",i3,",ic=",i3,") = ",6f9.5," <<os_moment_ek>>")') &
!!$               & ik,ik2,iv,ic,value(1:3,1:2)
!!$       end if
! <<-- T. Yamasaki 26 Feb. 2008

! calculate transition moment square
       trm2(ik,iv,ic,1,ispin)=value(1,1)**2+value(1,2)**2                              ! xx term
       trm2(ik,iv,ic,2,ispin)=value(2,1)**2+value(2,2)**2                              ! yy term
       trm2(ik,iv,ic,3,ispin)=value(3,1)**2+value(3,2)**2                              ! zz term
       trm2(ik,iv,ic,4,ispin)=value(1,1)*value(2,1)+value(1,2)*value(2,2)              ! xy term
       trm2(ik,iv,ic,5,ispin)=value(1,1)*value(3,1)+value(1,2)*value(3,2)              ! xz term
       trm2(ik,iv,ic,6,ispin)=value(2,1)*value(3,1)+value(2,2)*value(3,2)              ! yz term
! calculate oscillator strength
       call os_strength(ispin,ik,iv,ic,ebi,ebj)
    else

! ============ KT_mod =============== 13.0S
!       call find_ind_vb_and_cb(ni,nj,iv,ic,ik,ifind)
!       if(ifind/=1.and.printable) then
!          write(nfout,'(1x,"!!* index of valence or conduction band is not found   UVSOR-Epsilon STOP at os_moment_ek")')
!          stop
!       end if
!
       if ( sw_corelevel_spectrum == ON ) then
          call find_ind_cb_only(nj,ic,ik,ifind)
          iv = ni
          if(ifind/=1.and.printable) then
             write(nfout,'(1x,"!!* index of conduction band is not found   UVSOR-Epsilon STOP at os_moment_ek")')
             stop
          end if
       else
          call find_ind_vb_and_cb(ni,nj,iv,ic,ik,ifind)
          if(ifind/=1.and.printable) then
             write(nfout,'(1x,"!!* index of valence or conduction band is not found   UVSOR-Epsilon STOP at os_moment_ek")')
             stop
          end if
       endif
! ==================================== 13.0S

       value(1:3,1:2)=trm(ik,iv,ic,1:3,1:2,1)+trm(ik,iv,ic,1:3,1:2,2)
! ==== KT_add === 2014/09/22
       if ( noncol ) then
          value(1:3,1:2) = value(1:3,1:2) +trm(ik+1,iv,ic,1:3,1:2,1) &
               &                          +trm(ik+1,iv,ic,1:3,1:2,2)
       endif
! =============== 2014/09/22

! calculate transition moment square
       ik2 = (ik-1)/nspin + 1
       trm2(ik2,iv,ic,1,ispin)=value(1,1)**2+value(1,2)**2                              ! xx term
       trm2(ik2,iv,ic,2,ispin)=value(2,1)**2+value(2,2)**2                              ! yy term
       trm2(ik2,iv,ic,3,ispin)=value(3,1)**2+value(3,2)**2                              ! zz term
       trm2(ik2,iv,ic,4,ispin)=value(1,1)*value(2,1)+value(1,2)*value(2,2)              ! xy term
       trm2(ik2,iv,ic,5,ispin)=value(1,1)*value(3,1)+value(1,2)*value(3,2)              ! xz term
       trm2(ik2,iv,ic,6,ispin)=value(2,1)*value(3,1)+value(2,2)*value(3,2)              ! yz term
! calculate oscillator strength
       call os_strength(ispin,ik,iv,ic,ebi,ebj)
    end if
! symmetrize transition momennt square matrix
      if(way_BZintegral==L_TETRAHEDRON) call trm2_symm(nfout,ispin,ik,iv,ic,nsym)

    contains
     subroutine os_strength(ispin,ik,i,j,ebi,ebj)
       implicit none
!
!      oscillator strength calculation
!
       integer,intent(in)       :: ispin, ik, i, j
       real(kind=DP),intent(in) :: ebi,ebj
       real(kind=DP)            :: omega,fac

       omega=ebj-ebi
! === KT_add === 2015/01/17
       if ( sw_scissor_renormalization == ON ) omega = omega + scissor
! ============== 2015/01/17

       fac=2.0d0*omega
       if(way_BZintegral==L_TETRAHEDRON) then
          ik2 = nspin*(ip20(ik)-1)+ispin
! -->> T. Yamasaki 26 Feb. 2008
          os_str(ik2,i,j,1:6)=fac*trm2(ik,i,j,1:6,ispin)
!!$          os_str(ik2,i,j,1:6)=os_str(ik2,i,j,1:6)+fac*trm2(ik,i,j,1:6,ispin)
! <<-- T. Yamasaki 26 Feb. 2008
!!$          if(ipri >= 1 .and. ik2==kv3_ek) &
!!$          if(ik2 >= kv3_ek-3) &
!!$               & write(nfout,'(" os_str(",i3,",",i3,",",i3,",1:6) = ",6f9.5)') ik2,i,j,os_str(ik2,i,j,1:6)
       else
          os_str(ik,i,j,1:6)=fac*trm2(ik2,i,j,1:6,ispin)
!!$          if(ipri >= 1) &
!!$               & write(nfout,'(" os_str(",i3,",",i3,",",i3,",1:6) = ",6f9.5)') ik,i,j,os_str(ik,i,j,1:6)
       end if
     end subroutine os_strength
 end subroutine os_moment_ek
   
 subroutine trm2_symm(nfout,ispin,ik,i,j,nsym)
    implicit none
!
!    summetrize transition moment product
!
    integer,intent(in)           :: nfout, ispin, ik, i, j, nsym
    real(kind=DP),dimension(3,3) :: tr2,wk
    real(kind=DP)                :: eps
! set transition moment square matrix of k-point
    tr2(1,1)=trm2(ik,i,j,1,ispin)
    tr2(2,2)=trm2(ik,i,j,2,ispin)
    tr2(3,3)=trm2(ik,i,j,3,ispin)
    tr2(1,2)=trm2(ik,i,j,4,ispin)
    tr2(1,3)=trm2(ik,i,j,5,ispin)
    tr2(2,3)=trm2(ik,i,j,6,ispin)
    tr2(3,1)=tr2(1,3)
    tr2(3,2)=tr2(2,3)
! symmetrize transition moment sqaure matrix
    if(nbztyp/=1.and.nsym/=0) call trm2_symm_core(ik,tr2)
! generate transition moment square of non-ip2 k-points
    if(nbztyp/=1) call rotate_trm2(ik,tr2)
! set trm2
    trm2(ik,i,j,1,ispin)=tr2(1,1)
    trm2(ik,i,j,2,ispin)=tr2(2,2)
    trm2(ik,i,j,3,ispin)=tr2(3,3)
    trm2(ik,i,j,4,ispin)=tr2(1,2)
    trm2(ik,i,j,5,ispin)=tr2(1,3)
    trm2(ik,i,j,6,ispin)=tr2(2,3)
 end subroutine trm2_symm

 subroutine trm2_symm_core(ik,tr2)
    implicit none
    integer                       :: ik,iop,iopr
    real(kind=DP), dimension(3,3) :: tr2,u,tu,wk1,wk2,wk3
! initialize variables
    wk1=0.0d0
! symmetrize transition moment square
    do iopr=1,nopr_k(ip20(ik))
       iop=op_k(ip20(ik),iopr)
       wk2=tr2
       call tenstr(wk2,iop)
       wk1=wk1+wk2
    end do
    tr2=wk1/real(nopr_k(ip20(ik)),kind=DP)
 end subroutine trm2_symm_core

 subroutine rotate_trm2(ik,tr2)
   implicit none
   integer                      :: ik,iopr
   real(kind=DP),dimension(3,3) :: tr2
   iopr=vk0_op(ik)
   call tenstr(tr2,iopr)
 end subroutine rotate_trm2

 subroutine tenstr(a,iop)
   implicit none
   integer, intent(in) :: iop
   real(kind=DP),intent(inout),dimension(3,3) :: a
   real(kind=DP),dimension(3,3)               :: u,tu,wk1,wk2
   u(1:3,1:3)=op(1:3,1:3,iop)
   tu=transpose(u)
   wk1=matmul(a,tu)
   wk2=matmul(u,wk1)
   a=wk2
 end subroutine tenstr

 subroutine scalar(a,b,ab)
    implicit none
    real(kind=DP), intent(in), dimension(3) :: a,b
    real(kind=DP), intent(out)              :: ab
    ab=0.0d0
    ab=a(1)*b(1)+a(2)*b(2)+a(3)*b(3)
 end subroutine scalar

#ifdef NEC_TUNE
 subroutine vl_ek_tune(ik,nbi_s,nbi_e,nbj_s,nbj_e)
!
!   calculates the local part of electronic transition moment
!   between valence and conduction band orbitals
!   Tomoyuki Hamada, Univ. Tokyo, Feb. 4, 2003; July 28.2003; August 12, 2003
!
!   ik: k-point index
!   nbi: valence band index
!   nbj: conduction band index
!   ebi: energy of nbi band
!   ebj: energy of nbj band
!   ixyz: xyz index =1(x); =2(y); =3(z)
!   rtrans: non-local transtion moment of Read and Needs transition moment
!   ptrans: core-repare term of Kageshima and Shiraishi transition moment
!
    implicit none
    integer,intent(in)             :: ik, nbi_s, nbi_e, nbj_s, nbj_e
    integer                        :: nbi, nbj, ikt
    integer                        :: nbi0, nbj0, iv, ic, ifind, iv2, ic2, ixyz, ng
    real(DP)                       :: ebi, ebj
    real(DP)                       :: omega, r_wfij, i_wfij
    real(DP),dimension(3)          :: tsum
    real(DP),allocatable,dimension(:,:,:) :: rsum, isum, rsum_tmp, isum_tmp
    real(DP),dimension(3,2)        :: tlocal,tcorr
    real(DP),allocatable,dimension (:) :: qx, qy, qz
    real(DP),allocatable,dimension(:,:,:) :: wk1, wk2, wk3
    integer                        :: start_i, end_i, start_j, end_j, leni, lenj

    allocate(qx(maxval(np_g1k))); allocate(qy(maxval(np_g1k))); allocate(qz(maxval(np_g1k)))
    ! set_b_and_eb begin
    ikt = nk_in_the_process+ik-1
    if(ikt <= kv3_ek) then
       do nbi = nbi_s, nbi_e
          ebi = e2_mpi(n2_mpi(nbi,ik),ik)
          do nbj = nbj_s, nbj_e
             ebj = e2_mpi(n2_mpi(nbj,ik),ik)
             eb_ek_tmp(ik,nbi)=ebi
             eb_ek_tmp(ik,nbj)=ebj
          end do ! nbj
       end do ! nbi
    end if
    ! set_b_and_eb end

    qx=0.0d0; qy=0.0d0; qz=0.0d0
    call k_plus_G_vectors_m_3D(ik,kg,kg1,kv3,iba,nbase,vkxyz,ngabc,rltv,qx,qy,qz)
    allocate(rsum(neg,neg,3)); rsum = 0.0d0
    allocate(isum(neg,neg,3)); isum = 0.0d0
    allocate(rsum_tmp(np_e,neg,3)); rsum_tmp = 0.0d0
    allocate(isum_tmp(np_e,neg,3)); isum_tmp = 0.0d0

    ! calc_vlocal start
    if(kimg <= 1) then
       allocate(wk1(maxval(np_g1k),np_e,1))
       allocate(wk2(maxval(np_g1k),np_e,1))
       allocate(wk3(maxval(np_g1k),np_e,1))
       do nbi = 1, np_e
          do ng = 1, np_g1k(ik)
             wk1(ng,nbi,1) = qx(ng)*wf_lb(ng,nbi+ista_e-1,1)
             wk2(ng,nbi,1) = qy(ng)*wf_lb(ng,nbi+ista_e-1,1)
             wk3(ng,nbi,1) = qz(ng)*wf_lb(ng,nbi+ista_e-1,1)
          end do
       end do
       call dgemm('T','N',np_e,neg,np_g1k(ik),1.0d0,wk1(1,1,1),maxval(np_g1k),wf_lb(1,1,1), &
                  maxval(np_g1k),0.0d0,rsum_tmp(1,1,1),np_e)
       call dgemm('T','N',np_e,neg,np_g1k(ik),1.0d0,wk2(1,1,1),maxval(np_g1k),wf_lb(1,1,1), &
                  maxval(np_g1k),0.0d0,rsum_tmp(1,1,2),np_e)
       call dgemm('T','N',np_e,neg,np_g1k(ik),1.0d0,wk3(1,1,1),maxval(np_g1k),wf_lb(1,1,1), &
                  maxval(np_g1k),0.0d0,rsum_tmp(1,1,3),np_e)
       deallocate(wk1)
       deallocate(wk2)
       deallocate(wk3)
    else
       allocate(wk1(maxval(np_g1k),np_e,2))
       allocate(wk2(maxval(np_g1k),np_e,2))
       allocate(wk3(maxval(np_g1k),np_e,2))
       do nbi = 1, np_e
          do ng = 1, np_g1k(ik)
             wk1(ng,nbi,1) = qx(ng)*wf_lb(ng,nbi+ista_e-1,1)
             wk1(ng,nbi,2) = qx(ng)*wf_lb(ng,nbi+ista_e-1,2)
             wk2(ng,nbi,1) = qy(ng)*wf_lb(ng,nbi+ista_e-1,1)
             wk2(ng,nbi,2) = qy(ng)*wf_lb(ng,nbi+ista_e-1,2)
             wk3(ng,nbi,1) = qz(ng)*wf_lb(ng,nbi+ista_e-1,1)
             wk3(ng,nbi,2) = qz(ng)*wf_lb(ng,nbi+ista_e-1,2)
          end do
       end do
       call dgemm('T','N',np_e,neg,np_g1k(ik), 1.0d0,wk1(1,1,1),maxval(np_g1k),wf_lb(1,1,1), &
            maxval(np_g1k),0.0d0,rsum_tmp(1,1,1),np_e)
       call dgemm('T','N',np_e,neg,np_g1k(ik), 1.0d0,wk1(1,1,2),maxval(np_g1k),wf_lb(1,1,2), &
            maxval(np_g1k),1.0d0,rsum_tmp(1,1,1),np_e)
       call dgemm('T','N',np_e,neg,np_g1k(ik), 1.0d0,wk2(1,1,1),maxval(np_g1k),wf_lb(1,1,1), &
            maxval(np_g1k),0.0d0,rsum_tmp(1,1,2),np_e)
       call dgemm('T','N',np_e,neg,np_g1k(ik), 1.0d0,wk2(1,1,2),maxval(np_g1k),wf_lb(1,1,2), &
            maxval(np_g1k),1.0d0,rsum_tmp(1,1,2),np_e)
       call dgemm('T','N',np_e,neg,np_g1k(ik), 1.0d0,wk3(1,1,1),maxval(np_g1k),wf_lb(1,1,1), &
            maxval(np_g1k),0.0d0,rsum_tmp(1,1,3),np_e)
       call dgemm('T','N',np_e,neg,np_g1k(ik), 1.0d0,wk3(1,1,2),maxval(np_g1k),wf_lb(1,1,2), &
            maxval(np_g1k),1.0d0,rsum_tmp(1,1,3),np_e)
       call dgemm('T','N',np_e,neg,np_g1k(ik), 1.0d0,wk1(1,1,1),maxval(np_g1k),wf_lb(1,1,2), &
            maxval(np_g1k),0.0d0,isum_tmp(1,1,1),np_e)
       call dgemm('T','N',np_e,neg,np_g1k(ik),-1.0d0,wk1(1,1,2),maxval(np_g1k),wf_lb(1,1,1), &
            maxval(np_g1k),1.0d0,isum_tmp(1,1,1),np_e)
       call dgemm('T','N',np_e,neg,np_g1k(ik), 1.0d0,wk2(1,1,1),maxval(np_g1k),wf_lb(1,1,2), &
            maxval(np_g1k),0.0d0,isum_tmp(1,1,2),np_e)
       call dgemm('T','N',np_e,neg,np_g1k(ik),-1.0d0,wk2(1,1,2),maxval(np_g1k),wf_lb(1,1,1), &
            maxval(np_g1k),1.0d0,isum_tmp(1,1,2),np_e)
       call dgemm('T','N',np_e,neg,np_g1k(ik), 1.0d0,wk3(1,1,1),maxval(np_g1k),wf_lb(1,1,2), &
            maxval(np_g1k),0.0d0,isum_tmp(1,1,3),np_e)
       call dgemm('T','N',np_e,neg,np_g1k(ik),-1.0d0,wk3(1,1,2),maxval(np_g1k),wf_lb(1,1,1), &
            maxval(np_g1k),1.0d0,isum_tmp(1,1,3),np_e)
       deallocate(wk1)
       deallocate(wk2)
       deallocate(wk3)
    end if
    do nbi = 1, np_e
       rsum(nbi+ista_e-1,:,:) = rsum_tmp(nbi,:,:)
       isum(nbi+ista_e-1,:,:) = isum_tmp(nbi,:,:)
    end do
    call mpi_allreduce(MPI_IN_PLACE,rsum,neg*neg*3,MPI_DOUBLE_PRECISION,MPI_SUM,mpi_k_world(myrank_k),ierr)
    call mpi_allreduce(MPI_IN_PLACE,isum,neg*neg*3,MPI_DOUBLE_PRECISION,MPI_SUM,mpi_k_world(myrank_k),ierr)
    ! calc_vlocal end

    nbi = nbi_e - nbi_s + 1
    leni = nbi/nrank_e
    start_i = nbi_s + leni*myrank_e + min(myrank_e,mod(nbi,nrank_e))
    if(mod(nbi,nrank_e) > myrank_e) leni = leni + 1
    end_i = start_i + leni - 1

    nbj = nbj_e - nbj_s + 1
    lenj = nbj/nrank_g
    start_j = nbj_s + lenj*myrank_g + min(myrank_g,mod(nbj,nrank_g))
    if(mod(nbj,nrank_g) > myrank_g) lenj = lenj + 1
    end_j = start_j + lenj - 1

    do nbi = start_i, end_i
       ebi = e2_mpi(n2_mpi(nbi,ik),ik)
       do nbj = start_j, end_j
          ebj = e2_mpi(n2_mpi(nbj,ik),ik)
          if(.not. (nlo == 0 .and. nbi == nbj)) then
             nbi0=n2_mpi(nbi,ik)
             nbj0=n2_mpi(nbj,ik)
             call find_ind_vb_and_cb2(nbi0,nbj0,iv2,ic2,nk_in_the_process+ik-1,ifind)
             if(ifind==0) then
                if(printable) write(nfout,'(1x,"!!* conduction or valence band index is not found&
                   &   UVSOR-Epsilon STOP at vl_ek")')
                stop
             end if

             tlocal(1:3,1:2)=0.0d0; tcorr(1:3,1:2)=0.0d0

             omega=ebj-ebi
! set omega
             if(nbi==nbj) then
                omega = deg_omega            ! -> intra_band case
             else
                if(abs(omega) < 1.d-14) then ! -> degenerate case
                   if(omega >= 0.d0) then
                      omega = 1.d-14
                   else
                      omega = -1.d-14
                   end if
                end if
             end if

! add correction term
             if(kimg<=1) then
! kimg=1 case
                do ixyz=1,3
                   tlocal(ixyz,2)=-1.0d0*rsum(nbj0,nbi0,ixyz)/omega
                   tcorr(ixyz,2)=(-1.0d0*ptrans(ik,ic2,iv2,ixyz,1)+rtrans(ik,ic2,iv2,ixyz,2))/omega
                   tcorr(ixyz,1)=(ptrans(ik,ic2,iv2,ixyz,2)+rtrans(ik,ic2,iv2,ixyz,1))/omega
                end do
             else
! kimg=2 case
                do ixyz=1,3
                   tlocal(ixyz,2)=-1.0d0*rsum(nbj0,nbi0,ixyz)/omega
                   tcorr(ixyz,2)=(-1.0d0*ptrans(ik,ic2,iv2,ixyz,1)+rtrans(ik,ic2,iv2,ixyz,2))/omega
                   tlocal(ixyz,1)=isum(nbj0,nbi0,ixyz)/omega
                   tcorr(ixyz,1)=(ptrans(ik,ic2,iv2,ixyz,2)+rtrans(ik,ic2,iv2,ixyz,1))/omega
                end do
             end if

! set transition moment arrey
             call find_ind_vb_and_cb(nbi,nbj,iv,ic,nk_in_the_process+ik-1,ifind)  ! -> iv, ic, ifind
             if(ifind==1) then
                do ixyz=1,3
                   trm_tmp(ik,iv,ic,ixyz,1,1)=tlocal(ixyz,1)
                   trm_tmp(ik,iv,ic,ixyz,1,2)=tcorr(ixyz,1)
                   trm_tmp(ik,iv,ic,ixyz,2,1)=tlocal(ixyz,2)
                   trm_tmp(ik,iv,ic,ixyz,2,2)=tcorr(ixyz,2)
                end do
                if(ipri >= 2) then
                   write(nfout,'(" ik,nbi,nbj,nbi0,nbj0,ebi,ebj = ",5i8,2f8.4)') &
                      ik,nbi,nbj,nbi0,nbj0,ebi,ebj
                   if(ipri >= 3) then
                      write(nfout,'(" tlocal(1:3,2),rsum(1:3),omega = ",3f10.4,4d11.3)') &
                         tlocal(1:3,2),rsum(nbj0,nbi0,1:3),omega
                      write(nfout,'(" tcorr (1:3,1:2)               = ",6f10.4)') tcorr(1:3,1:2)
                   end if
                   write(nfout,'(" trm(",i4,",",i4,",",i4,",1:3,1:2,1) = ",6f10.4)') &
                      & nk_in_the_process+ik-1,iv,ic,trm_tmp(ik,iv,ic,1:3,1:2,1)
                   write(nfout,'(" trm(",i4,",",i4,",",i4,",1:3,1:2,2) = ",6f10.4)') &
                      & nk_in_the_process+ik-1,iv,ic,trm_tmp(ik,iv,ic,1:3,1:2,2)
                end if
             else
                if(printable) write(nfout,'(1x,"!!* index of valence or conduction band is not found&
                   &   UVSOR-Epsilon STOP at vl_ek")')
                stop
             end if
          end if ! if(.not. (nlo == 0 .and. nbi == nbj)) then
       end do ! nbj
    end do ! nbi

    deallocate(rsum)
    deallocate(isum)
    deallocate(rsum_tmp)
    deallocate(isum_tmp)
    deallocate(qx); deallocate(qy); deallocate(qz)
 end subroutine vl_ek_tune
#endif
 subroutine vl_ek(ik,nbi,nbj,ebi,ebj)
!
!   calculates the local part of electronic transition moment
!   between valence and conduction band orbitals
!   Tomoyuki Hamada, Univ. Tokyo, Feb. 4, 2003; July 28.2003; August 12, 2003
!
!   ik: k-point index
!   nbi: valence band index
!   nbj: conduction band index
!   ebi: energy of nbi band
!   ebj: energy of nbj band
!   ixyz: xyz index =1(x); =2(y); =3(z)
!   rtrans: non-local transtion moment of Read and Needs transition moment
!   ptrans: core-repare term of Kageshima and Shiraishi transition moment
!
    implicit none
    integer,intent(in)             :: ik, nbi, nbj
    integer                        :: nbi0, nbj0, iv, ic, ifind, iv2, ic2, ixyz, ng
    real(DP),intent(in)            :: ebi, ebj
    real(DP)                       :: omega, r_wfij, i_wfij
    real(DP),dimension(3)          :: rsum, isum, tsum
    real(DP),dimension(3,2)        :: tlocal,tcorr
    real(DP),allocatable,dimension (:) :: qx, qy, qz

    allocate(qx(kg1)); allocate(qy(kg1)); allocate(qz(kg1))
! return if nbi = nbj 2005.11.2
    if(nlo==0.and.nbi==nbj) return
! initialize arrays
    rsum(1:3)=0.0d0; isum(1:3)=0.0d0
    tlocal(1:3,1:2)=0.0d0; tcorr(1:3,1:2)=0.0d0
    qx=0.0d0; qy=0.0d0; qz=0.0d0
    omega=ebj-ebi
! set omega
    if(nbi==nbj) then
       omega = deg_omega            ! -> intra_band case
    else
! ============= KT_mod ==== 13.0R
!       if(abs(omega) < 1.d-14) then ! -> degenerate case
!          if(omega >= 0.d0) then
!             omega = 1.d-14
!          else
!             omega = -1.d-14
!          end if
 
       if(abs(omega) < delta_omega) then ! -> degenerate case
          if(omega >= 0.d0) then
             omega = delta_omega
          else
             omega = -delta_omega
          end if
! ========================= 13.0R
! ==== KT_add === 2015/01/17
       else
          if ( sw_scissor_renormalization == ON ) then
             if ( omega > 0.0 ) then
                omega = omega + scissor
             else
                omega = omega - scissor
             endif
          endif
! =============== 2015/01/17
      end if
    end if
! set index
    nbi0=n2_mpi(nbi,ik)
    nbj0=n2_mpi(nbj,ik)
    call find_ind_vb_and_cb2(nbi0,nbj0,iv2,ic2,nk_in_the_process+ik-1,ifind)
    if(ifind==0) then
       if(printable) &
       & write(nfout,'(1x,"!!* conduction or valence band index is not found   UVSOR-Epsilon STOP at vl_ek")')
       stop
    end if

! calculate local transition moment
    call k_plus_G_vectors_m(ik,kg,kg1,kv3,iba,nbase,vkxyz,ngabc,rltv,qx,qy,qz)
    call calc_vlocal  ! -> rsum, isum
! add correction term
! kimg=1 case
    if(kimg<=1) then
       do ixyz=1,3
          tlocal(ixyz,2)=-1.0d0*rsum(ixyz)/omega
          tcorr(ixyz,2)=(-1.0d0*ptrans(ik,ic2,iv2,ixyz,1)+rtrans(ik,ic2,iv2,ixyz,2))/omega
          tcorr(ixyz,1)=(ptrans(ik,ic2,iv2,ixyz,2)+rtrans(ik,ic2,iv2,ixyz,1))/omega
       end do
    else
! kimg=2 case
       do ixyz=1,3
          tlocal(ixyz,2)=-1.0d0*rsum(ixyz)/omega
          tcorr(ixyz,2)=(-1.0d0*ptrans(ik,ic2,iv2,ixyz,1)+rtrans(ik,ic2,iv2,ixyz,2))/omega
          tlocal(ixyz,1)=isum(ixyz)/omega
          tcorr(ixyz,1)=(ptrans(ik,ic2,iv2,ixyz,2)+rtrans(ik,ic2,iv2,ixyz,1))/omega
       end do
    end if

! set transition moment arrey
    call find_ind_vb_and_cb(nbi,nbj,iv,ic,nk_in_the_process+ik-1,ifind)  ! -> iv, ic, ifind
    if(ifind==1) then
       do ixyz=1,3
          trm(nk_in_the_process+ik-1,iv,ic,ixyz,1,1)=tlocal(ixyz,1)
          trm(nk_in_the_process+ik-1,iv,ic,ixyz,1,2)=tcorr(ixyz,1)
          trm(nk_in_the_process+ik-1,iv,ic,ixyz,2,1)=tlocal(ixyz,2)
          trm(nk_in_the_process+ik-1,iv,ic,ixyz,2,2)=tcorr(ixyz,2)
       end do
       if(ipri >= 2) then
          write(nfout,'(" ik,nbi,nbj,nbi0,nbj0,ebi,ebj = ",5i8,2f8.4)') ik,nbi,nbj,nbi0,nbj0,ebi,ebj
          if(ipri >= 3) then
             write(nfout,'(" tlocal(1:3,2),rsum(1:3),omega = ",3f10.4,4d11.3)') tlocal(1:3,2),rsum(1:3),omega
             write(nfout,'(" tcorr (1:3,1:2)               = ",6f10.4)') tcorr(1:3,1:2)
          end if
!!$          if(kimg == 1) then
!!$             write(nfout,'(" rsum = ",3f16.12)') rsum(1:3)
!!$          else
!!$             write(nfout,'(" rsum, isum = ",6f16.12)') rsum(1:3),isum(1:3)
!!$          end if
          write(nfout,'(" trm(",i4,",",i4,",",i4,",1:3,1:2,1) = ",6f10.4)') &
               & nk_in_the_process+ik-1,iv,ic,trm(nk_in_the_process+ik-1,iv,ic,1:3,1:2,1)
          write(nfout,'(" trm(",i4,",",i4,",",i4,",1:3,1:2,2) = ",6f10.4)') &
               & nk_in_the_process+ik-1,iv,ic,trm(nk_in_the_process+ik-1,iv,ic,1:3,1:2,2)
       end if
    else
       if(printable) &
       & write(nfout,'(1x,"!!* index of valence or conduction band is not found   UVSOR-Epsilon STOP at vl_ek")')
       stop
    end if
    deallocate(qx); deallocate(qy); deallocate(qz)
    contains
     subroutine calc_vlocal
! This subroutine calculates local transtion moment
       if (kimg<=1) then
! kimg=1 case
          do ng=1, iba(ik)
! only real part
             r_wfij=wf_lb(ng,nbj0,1)*wf_lb(ng,nbi0,1)
             rsum(1)=rsum(1)+r_wfij*qx(ng)
             rsum(2)=rsum(2)+r_wfij*qy(ng)
             rsum(3)=rsum(3)+r_wfij*qz(ng)
          end do
       else
! kimg=2 case
! real part
          do ng=1, iba(ik)
             r_wfij=wf_lb(ng,nbj0,1)*wf_lb(ng,nbi0,1)+wf_lb(ng,nbj0,2)*wf_lb(ng,nbi0,2)
             rsum(1)=rsum(1)+r_wfij*qx(ng)
             rsum(2)=rsum(2)+r_wfij*qy(ng)
             rsum(3)=rsum(3)+r_wfij*qz(ng)
! imaginary part
             i_wfij=wf_lb(ng,nbj0,1)*wf_lb(ng,nbi0,2)-wf_lb(ng,nbj0,2)*wf_lb(ng,nbi0,1)
             isum(1)=isum(1)+i_wfij*qx(ng)
             isum(2)=isum(2)+i_wfij*qy(ng)
             isum(3)=isum(3)+i_wfij*qz(ng)
          end do
       end if
     end subroutine calc_vlocal
 end subroutine vl_ek

 subroutine linear_tetrahedron(nfout,nstep)
!
!   linear tetrahedron integration of square of transition moment
!   The original program is m_ESoc_fermi_tetrahedron in m_ES_occup.F90
!
!   Tomoyuki Hamada, Univ. Tokyo  Feb. 20, 2003; May 3, 2003
!   MPI-Parallelized by T. Hamada, May 30, 2007
!
    implicit none
    integer, intent(in)                   :: nfout, nstep
    integer                               :: nstep_min, nstep_max
    integer                               :: i, n0, i0
    integer                               :: neig,ispin,ip2,ik,ieig,nxx,nyy,nzz,ip
    integer                               :: n_start, n_end, nst, n_start_mpi, n_end_mpi, nstep_l, nstep_l0, ipes, nistep0
    real(DP)                              :: jdos, e0, fac, edmax0
    real(DP), allocatable, dimension(:,:) :: tint
    real(DP), allocatable, dimension(:,:,:)   :: eig2,eig2_mpi
    real(DP), allocatable, dimension(:)       :: deawk, eia, eja
    real(DP), allocatable, dimension(:,:)     :: tra
    real(DP), allocatable, dimension(:)   :: ed
    real(DP), allocatable, dimension(:,:) :: imeps_mpi
    real(DP), dimension(3)                :: a,b,c
    integer                               :: id_sname = -1
    integer                               :: ini
    logical                               :: called

    called = .false.

    allocate(eig2(np2,neg,nspin)); eig2 = 0.d0
    allocate(eig2_mpi(np2,neg,nspin)); eig2_mpi = 0.d0     ! MPI
    allocate(deawk(np0)); deawk = 0.d0
    allocate(eia(np0)); eia=0.0d0
    allocate(eja(np0)); eja=0.0d0
    allocate(tra(np0,6)); tra=0.0d0
    allocate(ed(nistep)); ed = 0.0d0

    if(printable) then
       write(nfout,*) '!* linear tetrahedron method for k-space integration '
       write(nfout,*) '!* G. Lehmann and M. Taut, phys. stat. sol. (b) vol.54, 469 (1972)'
       write(nfout,'(1x," npes = ",i4)') npes
    end if

! set up parameters
    nxx = nxyz_tetra(1)
    nyy = nxyz_tetra(2)
    nzz = nxyz_tetra(3)
!
    neig=neg
    call set_eigenvalues_ek(nfout,eig2) 

    if(ipriepsilon>=2.and.printable) then
       write(nfout,'(1x,"!* eigenvalues used in the linear tetrahedron scheme")')
       do ispin = 1, nspin
          write(nfout,'(1x," ispin = ",i3)') ispin
          do ip2 = 1, np2
             write(nfout,'(1x,"  ik = ",i3)') ip2
             write(nfout,'(5x,5f10.5)') (eig2(ip2,ieig,ispin),ieig=1,neig)
          end do
       end do
    end if

    do ispin=1,nspin
       if(spin==MAJOR.and.ispin/=major_spin) cycle
       if(spin==MINOR.and.ispin/=minor_spin) cycle
       call set_nstaend
       nistep0 = n_end - n_start + 1
       if(printable) then
          write(nfout,'(1x," nstep_l0 = ",i4,3x,"nstep_l =",i4)')  nstep_l0, nstep_l
          write(nfout,'(1x," nst = ",i4)') nst
          write(nfout,'(1x," nstep_min= ",i4,3x," nstep_max = ",i4)') nstep_min, nstep_max
          write(nfout,'(1x," emin for impes = ",f10.5,1x,"eV")') (e_low + (nstep_min - 1)*e_step)*hartree_in_eV
          write(nfout,'(1x," emax for imeps = ",f10.5,1x,"eV")') (e_low + (nstep_max - 1)*e_step)*hartree_in_eV
          write(nfout,'(1x," n_start = ",i4,1x," : photon energy =",f10.5)') n_start, (e_low + (n_start-1)*e_step)*hartree_in_eV
          write(nfout,'(1x," n_end = ",i4,1x," : photon energy =",f10.5)') n_end, (e_low + (n_end-1)*e_step)*hartree_in_eV
       end if
       if(nistep0 < nistep) then
          if(printable) write(nfout,'(1x," nistep is reduced to ",i4)')  nistep0
          nistep = nistep0
       end if
       allocate(tint(nistep,6)) ; tint = 0.0d0
       do i=n_start, n_end, nistep
          ed = 0.0d0
          ed(1:nistep) = e(i:i+nistep-1)

! ========= KT_mod ============== 13.0S
!          call nsdos3_m(nfout,ed,nistep,nxx,nyy,nzz,eig2,ispin,ip20,np0,deawk,tra,eia,eja,ip2cub,tint,jdos,called)
!
          if ( sw_corelevel_spectrum == ON ) then
             call nsdos3_m_core2val(nfout,ed,nistep,nxx,nyy,nzz,eig2,ispin,ip20,np0,deawk,tra,eia,eja,ip2cub,tint,jdos,called)
          else
             call nsdos3_m(nfout,ed,nistep,nxx,nyy,nzz,eig2,ispin,ip20,np0,deawk,tra,eia,eja,ip2cub,tint,jdos,called)
          endif
! =============================== 13.0S

          if(i+nistep-1 <= nst) then
             imeps(i:i+nistep-1,1:6)=imeps(i:i+nistep-1,1:6)+tint(1:nistep,1:6)
          else
             n0 = (nst/nistep)*nistep
             i0 = nst - n0
             imeps(n0+1:n0+i0,1:6)= imeps(n0+1:n0+i0,1:6) + tint(1:i0,1:6)
          end if
       end do
       if(printable) then
          if(spin==MAJOR) write(nfout,'(1x," ispin = major spin")')
          if(spin==MINOR) write(nfout,'(1x," ispin = minor spin")')
          write(nfout,*) ' ispin=',ispin
       end if
       deallocate(tint)
    end do
    if(ipri >= 2) then
       write(nfout,'(" ---- imeps(n_start:100) --- <<linear_tetrahedron>>(0)")')
       do i = n_start, min(n_end,100)
          write(nfout,'(i8,6f10.6)') i,imeps(i,1:6)
       end do
    end if
!   do i = n_start, n_end
!      write(nfout,'(1x,i4,1x,6f10.5)') i, imeps(i,1:6)
!   end do

! MPI
    allocate(imeps_mpi(nstep_l,6)) ; imeps_mpi = 0.0d0
    if(mype == 0) then
       if(printable) write(nfout,'(1x,i4,1x,"imeps data have been processed by ipes = 0")') nstep_l0
       do ipes = 2, npes
          n_start_mpi = nstep_min + nstep_l0 + (ipes-2)*nstep_l
          n_end_mpi = n_start_mpi + nstep_l - 1
          call mpi_recv(imeps_mpi,nstep_l*6,mpi_double_precision,ipes-1,1,mpi_comm_group,istatus,ierr)
          imeps(n_start_mpi:n_end_mpi,1:6) = imeps_mpi(1:nstep_l,1:6)
!!$          if(printable) write(nfout,'(1x,"!*MPI",i4,1x,"imeps data have been received from ipes = ",i4)') nstep_l, ipes
       end do
    else
       n_start_mpi = nstep_min + nstep_l0 + (mype-1)*nstep_l
       n_end_mpi = n_start_mpi + nstep_l - 1
       imeps_mpi(1:nstep_l,1:6) = imeps(n_start_mpi:n_end_mpi,1:6)
       call mpi_send(imeps_mpi,nstep_l*6,mpi_double_precision,0,1,mpi_comm_group,istatus,ierr)
!!$       if(printable) write(nfout,'(1x,"!*MPI",i4,1x,"imeps data have been sent to ipes = 0")') nstep_l
      ! do i = n_start_mpi, n_end_mpi
      !    write(nfout,'(1x,i4,1x,6f10.5)') i, imeps(i,1:6)
      ! end do
    end if
    call mpi_bcast(imeps,nstep*6,mpi_double_precision,0,mpi_comm_group,ierr)

    e0 = vacuum_permittivity_in_au
    fac=0.25d0/(e0*PAI**2)

    if(nspin==1) then
       imeps(1:nstep,1:6)=fac*imeps(1:nstep,1:6)
    else
       imeps(1:nstep,1:6)=fac/2.0d0*imeps(1:nstep,1:6)
    end if

! -->> T. Yamasaki 26 Feb.2008
    if(ipri >= 2) then
       write(nfout,'(" --- imeps --- <<linear_Tetrahedron>>")')
       do i = 1, min(100,nstep)
          write(nfout,'(i8,6f10.6)') i,imeps(i,1:6)
       end do
    end if
! <<-- 

    deallocate(imeps_mpi)
    deallocate(deawk)
    deallocate(eia)
    deallocate(eja)
    deallocate(tra)
    deallocate(eig2)
    deallocate(eig2_mpi)
    deallocate(ed)
    contains
     subroutine set_nstaend
!   
!    set linear tetrahedron photon energy steps
!
        edmax0 = edmax_nspin(ispin)
        if(edmax0 > e_high) edmax0 = e_high
        nstep_max = int((edmax0-e_low + tetra_eps*2.0d0+scissor)/e_step)+1
        if(nstep_max>nstep) then
           write(nfout,'(1x,"!* nstep_max is reduced to nstep = ",i4)') nstep
           nstep_max=nstep
        end if
        if(nstep_max<1) then
           if(printable) write(nfout,'(1x,"!* error at nstep_max setting")')
           if(printable) write(nfout,'(1x," nstep_max = ",i4," < 1")') nstep_max
           stop
        end if
        if (system /= METALLIC) then
           nstep_min = int((edmin_nspin(ispin)-e_low - tetra_eps*2.0d0+scissor)/e_step)-1
           if(nstep_min>nstep_max) then
              if(printable) write(nfout,'(1x,"!* error at nstep_min setting")')
              if(printable) write(nfout,'(1x," nstep_min = ",i4," > nstep_max = ",i4)') nstep_min, nstep_max
              stop
           end if
           if(nstep_min>nstep) then
              if(printable) write(nfout,'(1x,"!* error at nstep_min setting")')
              if(printable) write(nfout,'(1x," nstep_min = ",i4," > nstep = ",i4)') nstep_min, nstep
              stop
           end if
        else
           nstep_min = 1
        end if
        nstep_l = (nstep_max-nstep_min+1)/npes 

        if(nstep_l < 1) then
           if(printable) &
          & write(nfout,'(1x,"!* npes is too large. should be less than",i4)') nstep_max-nstep_min
        end if
        nstep_l0 = nstep_l + ((nstep_max-nstep_min+1) - nstep_l*npes)
        if(mype == 0) then
           n_start =nstep_min
           n_end = n_start + nstep_l0 - 1
           nst = n_end
        else
           n_start = nstep_min + nstep_l0 + (mype-1)*nstep_l
           n_end = n_start + nstep_l - 1
           nst = n_end
        end if
     end subroutine set_nstaend
 end subroutine linear_tetrahedron
 
 subroutine set_eigenvalues_ek(nf,eko_tl)
    implicit none
    integer, intent(in)              :: nf
! subroutine for setting eigenvalues_ek
! The original code is m_ESIO_wd_EigenValues_ek
! T. Hamada (Univ. Tokyo) 8.13.2003

    real(kind=DP), parameter                 :: delta = 1.d-12
    real(kind=DP), allocatable, dimension(:) :: eko_t
    real(kind=DP), dimension(:,:,:)          :: eko_tl
    integer, allocatable, dimension(:)       :: neordr_t
    integer                                  :: ik, ib,jb,ibo,jbo
    integer                                  :: ispin,ip2
    allocate(eko_t(neg))
    allocate(neordr_t(neg))
    if(icond==2) then
       if(np2*nspin/=nk_converged) then
          if(printable) then
             write(nfout,*) nspin
             write(nfout,*) nk_converged, np2*nspin
          end if
          stop ' !* - there is not-converged k-point. UVSOR  STOP'
       end if
    end if
!!$    do ik = 1, kv3_ek
!!$    do ik = 1, nk_in_the_process
    do ispin=1,nspin
       do ip2=1,np2
          ik=nspin*(ip2-1)+ispin
          eko_t = eko_ek(1:neg,ik)
          if(nspin == 1 .or. (nspin == 2 .and. mod(ik,2) == 1)) &
            & neordr_t(1:neg) = (/(ib,ib=1,neg)/)
          do ib = 1, neg-1
             do jb = ib+1, neg
                ibo = neordr_t(ib)
                jbo = neordr_t(jb)
                if(eko_t(jbo)  < eko_t(ibo)-delta) then        ! MPI
                   neordr_t(jb) = ibo
                   neordr_t(ib) = jbo
                end if
             end do
          end do

          do ib=1,neg
             eko_tl(ip2,ib,ispin)=eko_t(neordr_t(ib))
          end do
       end do
    end do
    deallocate(neordr_t)
    deallocate(eko_t)
 end subroutine set_eigenvalues_ek
 
 subroutine nsdos3_m(nfout,e,ni,nx,ny,nz,eig2,ispin,ip20,np0,&
        &                  dea,tra,eia,eja,ip2cub,tint,jdos,called)
!
!      k-space integrtation of square of transition moment
!      by the linear tetrahedron method
!      The original program is nsdos3 for dos calculation
!
!      Tomoyuki Hamada, Univ. Tokyo Feb. 20, 2003
!      modifed by T. Hamada Aug. 25, 2003; Sept. 1, 2003
!
!
    implicit none
!
    integer, intent(in)        :: nfout, ni, nx, ny, nz, ispin, np0
    integer, dimension(:)      :: ip20,ip2cub
    integer                    :: k0, ik2, iv, ic, ifind
    integer                    :: ieig, jeig
    integer                    :: ini
    integer, save              :: ieigsta, ieigend, jeigsta, jeigend
    real(DP), dimension(:,:,:) :: eig2
    real(DP), dimension(:)     :: dea, eia, eja
    real(DP), dimension(:,:)   :: tra
    real(DP)                   :: ei, ej, jdos, j0
    real(DP), dimension(:)     :: e
    real(DP), dimension(:,:)   :: tint
    real(DP), dimension(ni,6)  :: t0
    logical, intent(inout)     :: called

    tint=0.0d0
    jdos=0.0d0

    if(.not.called) then
       call set_ieig_jeig_staend    ! --> contained here
       if(printable) then
          if(band_i==0.and.band_f==0) then
             write(nfout,'(1x," integration of all possible band transition")')
          else
             write(nfout,'(1x," integration of band = ",i3," -> ",i3," transition ")') band_i, band_f
          end if
          write(nfout,'(1x," ieigsta = ", i3," ieigend = ",i3)') ieigsta, ieigend
          write(nfout,'(1x," jeigsta = ", i3," jeigend = ",i3)') jeigsta, jeigend
       end if
    end if
    call reset_scissors_if_metallic(nfout)

    if(nbztyp==1) vk0xyz=vk00xyz

    if(ipri >= 1) write(nfout,'(" --- tra ---")')

    do ieig=ieigsta, ieigend
       do jeig=jeigsta, jeigend
          t0=0.0d0
          j0=0.0d0
          if(ieig==jeig) cycle 
          if(band_type(ieig,ispin)==FILLED_BAND.and.band_type(jeig,ispin)==FILLED_BAND) cycle
          if(band_type(ieig,ispin)==HALF_FILLED_BAND.and.band_type(jeig,ispin)==FILLED_BAND) cycle
          if(band_type(ieig,ispin)==UNFILLED_BAND.and.band_type(jeig,ispin)==FILLED_BAND) cycle
          if(band_type(ieig,ispin)==UNFILLED_BAND.and.band_type(jeig,ispin)==HALF_FILLED_BAND) cycle
          if(band_type(ieig,ispin)==UNFILLED_BAND.and.band_type(jeig,ispin)==UNFILLED_BAND) cycle
          !write(nfout,'(1x," ieig = ",i3)') ieig
          !write(nfout,'(1x," jeig = ",i3)') jeig
          do  k0=1,np0
             ei=eig2(ip20(k0),ieig, ispin)
             ej=eig2(ip20(k0),jeig, ispin)
             eia(k0)=ei
             eja(k0)=ej
             dea(k0)=ej-ei+scissor !-> scissor operator
             ik2=nspin*(ip20(k0)-1)+ispin
             call find_ind_vb_and_cb(ieig,jeig,iv,ic,ik2,ifind)
             if(ifind==1) then
                tra(k0,1:6)=trm2(k0,iv,ic,1:6,ispin)
             !write(nfout,'(i4,6f10.5)') k0,tra(k0,1:6)
             else
                if(printable) &
                & write(nfout,'(1x,"!!* index of valence or conduction band is wrong   UVSOR-Epsilon STOP")')
             end if
           end do

           if(ipri >= 3 .and. jeig >= ieig .and. ieig >= 4 .and. jeig <= 12) then
              write(nfout,'(" ieig, jeig = ",2i8)') ieig, jeig
              write(nfout,'(10f10.6)') tra(1:np0,1)
           end if

           call nsdos0_m(nfout,e,ni,nx,ny,nz,vk0xyz,dea,eia,eja,tra,j0,t0,ip2cub,called)
           tint=tint+t0
           jdos=jdos+j0
       end do
    end do
    !do ini = 1, ni
    !   write(nfout,'(1x," e = ",f10.5,1x," tint = ",6f10.5)') e(ini), tint(ini,1:6)
    !end do
    contains
     subroutine set_ieig_jeig_staend
       if(band_i==0.and.band_f==0) then
          ieigsta=1
          ieigend=neg
          jeigsta=1
          jeigend=neg
       else
          ieigsta=band_i
          ieigend=band_i
          jeigsta=band_f
          jeigend=band_f
       end if
     end subroutine set_ieig_jeig_staend
 end subroutine nsdos3_m

! ============ KT_add ============== 13.0S
 subroutine nsdos3_m_core2val(nfout,e,ni,nx,ny,nz,eig2,ispin,ip20,np0,&
        &                     dea,tra,eia,eja,ip2cub,tint,jdos,called)
!
!      k-space integrtation of square of transition moment
!      by the linear tetrahedron method
!      The original program is nsdos3 for dos calculation
!
!      Tomoyuki Hamada, Univ. Tokyo Feb. 20, 2003
!      modifed by T. Hamada Aug. 25, 2003; Sept. 1, 2003
!
!
    implicit none
!
    integer, intent(in)        :: nfout, ni, nx, ny, nz, ispin, np0
    integer, dimension(:)      :: ip20,ip2cub
    integer                    :: k0, ik2, iv, ic, ifind
    integer                    :: ieig, jeig
    integer                    :: ini
    integer, save              :: ieigsta, ieigend, jeigsta, jeigend
    real(DP), dimension(:,:,:) :: eig2
    real(DP), dimension(:)     :: dea, eia, eja
    real(DP), dimension(:,:)   :: tra
    real(DP)                   :: ei, ej, jdos, j0
    real(DP), dimension(:)     :: e
    real(DP), dimension(:,:)   :: tint
    real(DP), dimension(ni,6)  :: t0
    logical, intent(inout)     :: called

    tint=0.0d0
    jdos=0.0d0

    if(.not.called) then
       call set_ieig_jeig_staend    ! --> contained here
       if(printable) then
          if(band_i==0.and.band_f==0) then
             write(nfout,'(1x," integration of all possible band transition")')
          else
             write(nfout,'(1x," integration of band = ",i3," -> ",i3," transition ")') band_i, band_f
          end if
          write(nfout,'(1x," ieigsta = ", i3," ieigend = ",i3)') ieigsta, ieigend
          write(nfout,'(1x," jeigsta = ", i3," jeigend = ",i3)') jeigsta, jeigend
       end if
    end if

!!!!!!!!    call reset_scissors_if_metallic(nfout)

    if(nbztyp==1) vk0xyz=vk00xyz

    if(ipri >= 1) write(nfout,'(" --- tra ---")')

    do ieig=1, num_core_states
       do jeig=jeigsta, jeigend
          t0=0.0d0;  j0=0.0d0

          do  k0=1,np0
             ei=ene_core_states(ieig)
             ej=eig2(ip20(k0),jeig, ispin)

             eia(k0)=ei; eja(k0)=ej

            dea(k0)=ej-ei+scissor !-> scissor operator

             ik2=nspin*(ip20(k0)-1)+ispin

             call find_ind_cb_only(jeig,ic,ik2,ifind)
             iv = ieig

             if(ifind==1) then
                tra(k0,1:6)=trm2(k0,iv,ic,1:6,ispin)
             !write(nfout,'(i4,6f10.5)') k0,tra(k0,1:6)
             else
                if(printable) &
                & write(nfout,'(1x,"!!* index of conduction band is wrong   UVSOR-Epsilon STOP")')
             end if
           end do

           if(ipri >= 3 .and. jeig >= ieig .and. ieig >= 4 .and. jeig <= 12) then
              write(nfout,'(" ieig, jeig = ",2i8)') ieig, jeig
              write(nfout,'(10f10.6)') tra(1:np0,1)
           end if

           call nsdos0_m(nfout,e,ni,nx,ny,nz,vk0xyz,dea,eia,eja,tra,j0,t0,ip2cub,called)
           tint=tint+t0
           jdos=jdos+j0
       end do
    end do
    !do ini = 1, ni
    !   write(nfout,'(1x," e = ",f10.5,1x," tint = ",6f10.5)') e(ini), tint(ini,1:6)
    !end do
    contains
     subroutine set_ieig_jeig_staend
       if(band_i==0.and.band_f==0) then
          ieigsta=1; ieigend=neg;   jeigsta=1;    jeigend=neg
       else
          ieigsta=band_i; ieigend=band_i;  jeigsta=band_f;  jeigend=band_f
       end if
     end subroutine set_ieig_jeig_staend
   end subroutine nsdos3_m_core2val
! ==================================== 13.0S

 subroutine nsdos0_m(jf,e,ne,nxx,nyy,nzz,vk0,ea,ei,ej,tr,dos,tint,ip2cub,called)
!
!      subroutine for joint density of states and calculation
!      and k-space integration of property
!      The original code is nsdos0
!
!      Tomoyuki Hamada, Univ. Tokyo, Feb.19, 2003
!      modified by T. Hamada Aug. 19, 2003
!      modified by T. Hamada Aug. 25 2003
!      modified by T. Hamada Aug. 29 2003
!      modified by T. Hanada Oct. 25 2007
!
!      nxx   number of mesh points in x-direction
!      nyy   number of mesh points in y-direction
!      nzz   number of mesh points in z-direction
!      ea    energy difference between eigenstates
!      ei    energy of initial band
!      ej    energy of final band
!      tr    trm2 at all k-points
!      dos   joint density od state (not used)
!      tint  integrated property
    implicit none
!
    integer, intent(in) :: jf

    real(DP),intent(in), dimension(:)        :: e
    real(DP),intent(in), dimension(:)        :: ea, ei, ej
    real(DP),intent(in), dimension(:,:)      :: tr
    real(DP),intent(in), dimension(:,:)      :: vk0
    real(DP),intent(out),dimension(:,:)      :: tint
    real(DP),allocatable,dimension(:,:,:)    :: tcub
    real(DP),allocatable,dimension(:,:)      :: ttr
    real(DP)                                 :: e1,e2,e3,e4,dos,d
    real(DP),dimension(6)                    :: tr1, tr2, tr3, tr4
    real(DP),save                            :: vtet6,vtet6inv
    real(DP),            dimension(2,2,2)    :: ecub, eicub, efcub
    real(DP),            dimension(8)        :: ec, eci, ecf
    real(DP),            dimension(4)        :: et, eti, etf, eb, ebi, ebf
    real(DP),            dimension(2,2,2,6)  :: trcub
    real(DP),            dimension(8,6)      :: trc
    real(DP),            dimension(4,6)      :: trt,trb
    real(DP),            dimension(3)        :: ka, kb, kc, kai, kbi, kci
    real(DP),allocatable,    dimension(:,:)      :: kip0
    real(DP)                                 :: eps, tvol, vtet
    integer                                  :: icub, ip, ip0, iq, it, ix, iy, iz, kx, ky, kz, i, m
    integer                                  :: ncub, ni, np, npx, npy, npz, ntet
    integer,             dimension(2,2,2)    :: iecub
    integer,             dimension(8)        :: iec
    integer,             dimension(4)        :: iet, ieb
    integer,             dimension(6,2)      :: iqmat
    integer,             intent(in)          :: ne, nxx, nyy, nzz
    integer,             dimension(:)        :: ip2cub
    integer                                  :: tintegral, INCLUDE, EXCLUDE
    logical                                  :: called
    equivalence(ec(1),ecub(1,1,1))
    equivalence(eci(1),eicub(1,1,1))
    equivalence(ecf(1),efcub(1,1,1))
    equivalence(trc(1,1),trcub(1,1,1,1))
    equivalence(iec(1),iecub(1,1,1))
    data iqmat/2,2,5,3,3,5, 4,6,6,4,7,7/
    data INCLUDE, EXCLUDE /1,0/
!   INCLUDE: contribution from a tetrahedron with tintegtal=INCLUDE is included
!   EXCLUDE: that from a tetrahedron with EXCLUDE is neglected

    allocate(kip0(np0,3))
!
    kip0=0.0d0
!  definition of eps  <- must be consistent with <nstts1_m>
    eps=tetra_eps
!
    npx=nxx+1
    npy=nyy+1
    npz=nzz+1
    np=npx*npy*npz
    ncub=nxx*nyy*nzz
    ntet=6*ncub
    if(.not.called) then
       if(printable) then
          write(jf,51) ntet,ncub
       end if
! set tetrahedron volume vtet vtet6, 1/vtet6
       vtet=rvol/real(ntet,DP)
       vtet6=vtet*6.00d0
       vtet6inv=1.0d0/vtet6
       if(printable) then
          write(jf,52) rvol,vtet
       end  if
    end if
 51 format(1x," number of tetrahedron = ",i10,/,1x," number of cube = ",i10)
 52 format(1x," Brillouin zone volume = ",f10.5,/1x," tetrahedron volume =",f10.5)

    allocate(tcub(ne,6,ncub)); tcub=0.0d0
    allocate(ttr(ne,6)); ttr=0.0d0
    tint=0.0d0

!
!     ***  integration over b.z. starts    ***
!
!     ***       sampling over cubes        ***
!
    icub=0
    do iz=0,nzz-1
       do iy=0,nyy-1
          do ix=0,nxx-1
             icub=icub+1
             if(icub.ne.ip2cub(icub)) then
                tint(1:ne,1:6)=tint(1:ne,1:6)+tcub(1:ne,1:6,ip2cub(icub))
             else
                tcub(1:ne,1:6,icub)=0.0d0
!     ***  energies at cube corners  ***
                ni=npx*(npy*iz+iy)+ix
                if(nbztyp==1) then
                   do kz=1,2
                      do ky=1,2
                         do kx=1,2
                            ip0=ni+npx*(npy*(kz-1)+ky-1)+kx
                            kip0(ip0,1:3)=vk00xyz(ip0,1:3)
                            ecub(kx,ky,kz)=ea(ip0)
                            eicub(kx,ky,kz)=ei(ip0)
                            efcub(kx,ky,kz)=ej(ip0)
                            trcub(kx,ky,kz,1:6)=tr(ip0,1:6)
                            iecub(kx,ky,kz)=ip0
                         end do
                      end do
                   end do
                end if
                if(nbztyp/=1) then
                   do kz=1,2
                      do ky=1,2
                         do kx=1,2
                            ip0=ni+npx*(npy*(kz-1)+ky-1)+kx
                            kip0(ip0,1:3)=vk0(ip0,1:3)
                            ecub(kx,ky,kz)=ea(ip0)
                            eicub(kx,ky,kz)=ei(ip0)
                            efcub(kx,ky,kz)=ej(ip0)
                            trcub(kx,ky,kz,1:6)=tr(ip0,1:6)
                            iecub(kx,ky,kz)=ip0
                         end do
                      end do
                   end do
                end if
!         ***      six tetrahedrons      ***
!         *** sampling over tetrahedrons ***
                et(1)=ec(1)
                eti(1)=eci(1)
                etf(1)=ecf(1)
                et(4)=ec(8)
                eti(4)=eci(8)
                etf(4)=ecf(8)
                trt(1,1:6)=trc(1,1:6)
                trt(4,1:6)=trc(8,1:6)
                iet(1)=iec(1)
                iet(4)=iec(8)
                do it=1,6
                   iq=iqmat(it,1)
                   et(2)=ec(iq)
                   eti(2)=eci(iq)
                   etf(2)=ecf(iq)
                   trt(2,1:6)=trc(iq,1:6)
                   iet(2)=iec(iq)
                   iq=iqmat(it,2)
                   et(3)=ec(iq)
                   eti(3)=eci(iq)
                   etf(3)=ecf(iq)
                   trt(3,1:6)=trc(iq,1:6)
                   iet(3)=iec(iq)
                   eb(1:4)=et(1:4)
                   ebi(1:4)=eti(1:4)
                   ebf(1:4)=etf(1:4)
                   trb(1:4,1:6)=trt(1:4,1:6)
                   ieb(1:4)=iet(1:4)
                   tintegral=INCLUDE
                   if(system==METALLIC) then
                      call check_band_energy(ebi,ebf,tintegral)
                      call set_trb
                   end if
!        ***  eb(1).le.eb(2).le.eb(3).le.eb(4)  ***
                   call nsttod_m(eb,ebi,ebf,trb,ieb)
                   e1=eb(1)
                   e2=eb(2)
                   e3=eb(3)
                   e4=eb(4)
                   tr1(1:6)=trb(1,1:6)
                   tr2(1:6)=trb(2,1:6)
                   tr3(1:6)=trb(3,1:6)
                   tr4(1:6)=trb(4,1:6)
                   call nstts1_m(e1,e2,e3,e4)
!!                 call check_energy_order(e1,e2,e3,e4)
                   call nstrans(e,ne,e1,e2,e3,e4,tr1,tr2,tr3,tr4,ebi,ebf,ieb,kip0,d,ttr, &
                             & vtet6,vtet6inv,tintegral,INCLUDE,EXCLUDE)
                  if(d==0.0d0) cycle
                  tint=tint+ttr
                  tcub(1:ne,1:6,icub)=tcub(1:ne,1:6,icub)+ttr(1:ne,1:6)
                end do
             end if
          end do
       end do
    end do

    if (.not.called) then
       called=.true.
    end if

  contains
     subroutine nsttod_m(eb,ebi,ebf,trb,ieb)
!      energy ordering k-points
!      The original program is nsttod
!      Tomoyuki Hamada, Univ. Tokyo, Feb. 19, 2003
!      modified by T. Hamada Sept. 2, 2003
       implicit none
       integer                   :: ind, ih, i, k
       integer,  dimension(:)    :: ieb
       real(DP)                  :: a, ainit, afin
       real(DP), dimension(:)    :: eb, ebi,ebf
       real(DP), dimension(:,: ) :: trb
       real(DP), dimension(6)    :: b

       do k=1,3
          a=eb(k)
          ainit=ebi(k)
          afin=ebf(k)
          b(1:6)=trb(k,1:6)
          ih=ieb(k)
          ind=k
          do i=k+1,4
             if(eb(i).lt.a) then
                a=eb(i)
                ainit=ebi(i)
                afin=ebf(i)
                b(1:6)=trb(i,1:6)
                ih=ieb(i)
                ind=i
             end if
          end do
          eb(ind)=eb(k)
          ebi(ind)=ebi(k)
          ebf(ind)=ebf(k)
          trb(ind,1:6)=trb(k,1:6)
          ieb(ind)=ieb(k)
          eb(k)=a
          ebi(k)=ainit
          ebf(k)=afin
          trb(k,1:6)=b(1:6)
          ieb(k)=ih
       end do
     end subroutine nsttod_m

     subroutine set_trb
       implicit none
       integer :: i
       do i=1, 4
          if(ebi(i)<=efermi.and.ebf(i)>efermi) then
             cycle
          else
             trb(i,1:6)=0.0d0
          end if
       end do
     end subroutine set_trb

     subroutine check_band_energy(ebi,ebf,tintegral)
       implicit none
!
!      classify tetrahedron
!      ebi, ebf: initial and final band energy at corner of tetrahedron
!      ebi<efermi and ebj > efermi -> tintegral=INCLUDE
!      other case                  -> tintegral=EXCLUDE
!
       integer, intent(inout)                  :: tintegral

       integer :: i
       real(kind=DP), intent(in), dimension(:) :: ebi, ebf
       tintegral = INCLUDE
       do i=1, 4
          if(ebi(i)<=efermi.and.ebf(i)>efermi) then
             cycle
          else
             tintegral=EXCLUDE
             exit
          end if
       end do
     end subroutine check_band_energy
 end subroutine nsdos0_m

 subroutine nstts1_m(e1,e2,e3,e4)
    implicit none
!
!   non-degenarate energy difference at k-points of tetrahedron
!
    real(DP) :: e1,e2,e3,e4
    real(DP) :: eps,eps2,eps3
    real(DP) :: a21,a32,a43,o
!
    eps = tetra_eps
    eps2=eps*0.5d0
    eps3=(eps*3.0d0)*0.5d0
    a21=dabs(e2-e1)
    a32=dabs(e3-e2)
    a43=dabs(e4-e3)
    if(a21.lt.eps) then
       if(a32.lt.eps) then
          if(a43.lt.eps) then
             o=(e1+e2+e3+e4)*0.25d0
             e1=o-eps3
             e2=o-eps2
             e3=o+eps2
             e4=o+eps3
          else
             e1=e3-eps*2.0d0
             e2=e3-eps
          end if
       else
          if(a43.lt.eps) then
             e1=e2-eps
             e4=e3+eps
          else
             e1=e2-eps
          end if
       end if
    else
       if(a32.lt.eps) then
          if(a43.lt.eps) then
             e3=e2+eps
             e4=e2+eps*2.0d0
          else
             o=(e2+e3)*0.5d0
             e2=o-eps2
             e3=o+eps2
          end if
       else
          if(a43.lt.eps) then
             e4=e3+eps
          end if
       end if
    end if
 end subroutine nstts1_m

 subroutine check_energy_order(e1,e2,e3,e4)
    implicit none
    real(DP), intent(in) :: e1,e2,e3,e4
    if(e1<e2.and.e2<e3.and.e3<e4) then
       return
    else
      if(printable) then
         write(nfout,'(1x,"! Energy order of tetrahedron corner is wrong !")')
         write(nfout,'(1x,"e1=",f10.5,"e2=",f10.5,"e3=",f10.5,"e4=",f10.5)') e1,e2,e3,e4
      end if
      stop
    end if
 end subroutine check_energy_order
 
 subroutine nstrans(ed,ne,e1,e2,e3,e4,a1,a2,a3,a4,ebi,ebf,ieb,kip0,jdos,tia, &
                  & vol,volinv,tintegral,INCLUDE,EXCLUDE)
    implicit none
!      Linear Tetrahedron Calculation
!      G. Lehman and M. Taut, Physica Status Solidi (b) vol.54, pp469-477 (1972)
!      This program calculates tetrahedron integration of property a

!      Tomoyuki Hamada, Univ. Tokyo, Feb.18, 2003
!      Updated  by T. Hamada Mar. 31, 2006, Oct, 26, 2007
!
!    * input
!       e: photon energy energy
!       e1: band transition energy at tetrahedron corner0(k-point k0)
!       e2:                        at             corner1(k-point k1)
!       e3:                        at             corner2(k-point k2)
!       e4:                        at             corner3(k-point k3)
!       here, e1<e2<e3<e4
!       ebi(m) : energy of initial band at m-th corner of tetrahedron (m=1,4)
!       ebf(m) : energy of final band at m-th corder of tetrahedron (m=1,4)
!       k1,k2,k3,k4 are the corner vectors of tetrahedron
!       a1: trm2 at k0  trm2 -> see major variable explanation
!       a2: trm2 at k1
!       a3: trm2 at k2
!       a4: tem2 at k3
!       ieb: k-vector index
!       kip    : k-point vector of tetrahedron corners
!       vol    : volume of tetrahedron
!       volinv : 1/vol
!    * output
!       jdos: joint density of state
!       tip:  tetrahedron integral
!    * variables for linear tetrahedon (see the reference for details)
!       f_b : f/|b|
!       k1, k2, k3 : edge vector of tetrahedron
!       r1, r2, r3 : r vector
!       a          : a vector
!       s          : s vector
!
    integer,intent(in)                  :: tintegral
    integer,intent(in)                  :: ne
    integer,intent(in)                  :: INCLUDE, EXCLUDE
    integer,intent(in),  dimension(4)   :: ieb
    real(DP),intent(in)                 :: e1,e2,e3,e4,vol,volinv
    real(DP),intent(in), dimension(6)   :: a1,a2,a3,a4
    real(DP),intent(in), dimension(:)   :: ed
    real(DP),intent(in), dimension(:,:) :: kip0
    real(DP),intent(in), dimension(4)   :: ebi, ebf
    real(DP),intent(out),dimension(:,:) :: tia
    real(DP),dimension(6)               :: a0i0, ai1
    real(DP),dimension(3)               :: k1,k2,k3,s,r1,r2,r3
    real(DP),dimension(6,3)             :: a
    real(DP)                            :: e, f_b, jdos
    integer                             :: i, istep, icount,tintegral0
    integer                             :: ns0, ns1, ns2, ns3, ne1, ne2, ne3
    integer                             :: icount1, icount2, icount3
    logical                             :: deg12, deg23, deg34

    tia=0.0d0
    jdos = 0.0d0


!  set istep ranges
    ns1=1+ceiling((e1-ed(1))/e_step)
    ne1=1+floor((e2-ed(1))/e_step)
    ns2=1+ceiling((e2-ed(1))/e_step)
    ne2=1+floor((e3-ed(1))/e_step)
    ns3=1+ceiling((e3-ed(1))/e_step)
    ne3=1+floor((e4-ed(1))/e_step)

!    if(npes>=2) call istep_range_trimming

!    if(ns1>ne1) then
!       ns1 = 0 ; ne1 = 0
!    end if
!    if(ns2>ne2) then
!       ns2 = 0 ; ne2 = 0
!    end if
!    if(ns3>ne3) then
!       ns3 = 0 ; ne3 = 0
!    end if
    
    if(npes==1) then
       if(ne1>ne) ne1=ne
       if(ne2>ne) ne2=ne
       if(ne3>ne) ne3=ne
       if(ns1>ne1) then
          ns1 = 0 ; ne1 = 0
       end if
       if(ns2>ne2) then
          ns2 = 0 ; ne2 = 0
       end if
       if(ns3>ne3) then
          ns3 = 0 ; ne3 = 0
       end if
    else
       deg12 = .false.
       deg23 = .false.
       deg34 = .false.
       if(ns1>ne1) then
          deg12 = .true. ; ne1 = ns1
       end if
       if(ns2>ne2) then
          deg23 = .true. ; ne2 = ns2
       end if
       if(ns3>ne3) then
          deg34 = .true. ; ne3 = ns3
       end if
       call istep_range_trimming
      if(deg12 .eqv. .true.) then
          ns1 = 0 ; ne1 = 0
       end if
       if(deg23 .eqv. .true.) then
          ns2 = 0 ; ne2 = 0
       end if
       if(deg34 .eqv. .true.) then
          ns3 = 0 ; ne3 = 0
       end if
!       call check_trimming
    end if

!   calculate unit vectors of tetrahedron
    k1(1:3)=kip0(ieb(2),1:3)-kip0(ieb(1),1:3)
    k2(1:3)=kip0(ieb(3),1:3)-kip0(ieb(1),1:3)
    k3(1:3)=kip0(ieb(4),1:3)-kip0(ieb(1),1:3)
!   calculate A vectors of tetrahedron
    call calc_avec(k1,k2,k3,a1,a2,a3,a4,volinv,a,r1,r2,r3)

    if(ne1/=0) then
!     write(nfout,'(1x," ns1 = ",i3," ne1 = ",i3)') ns1, ne1
      if(tintegral/=EXCLUDE) then
         do istep = ns1, ne1
            e = ed(istep)
            call calcf_b1(e,e1,e2,e3,e4,vol,f_b)
            jdos=f_b
            call svec1(k1,k2,k3,e,e1,e2,e3,e4,s)
            ai1(1:6)=a(1:6,1)*s(1)+a(1:6,2)*s(2)+a(1:6,3)*s(3)
            tia(istep,1:6)=(a1+ai1)*f_b
         end do
      else
         do istep = ns1, ne1
            e = ed(istep)
            call calcf_b1(e,e1,e2,e3,e4,vol,f_b)
            jdos=f_b
            call svec1(k1,k2,k3,e,e1,e2,e3,e4,s)
            if(tintegral==EXCLUDE) then
               tintegral0 = EXCLUDE
               call check_band_energy_at_svec(tintegral0)
               if(tintegral0==EXCLUDE) cycle
            end if
            ai1(1:6)=a(1:6,1)*s(1)+a(1:6,2)*s(2)+a(1:6,3)*s(3)
            tia(istep,1:6)=(a1+ai1)*f_b
         end do
      end if
    end if

    if(ne2/=0) then
!     write(nfout,'(1x," ns2 = ",i3," ne2 = ",i3)') ns2, ne2
      if(tintegral/=EXCLUDE) then
         do istep = ns2, ne2
            e = ed(istep)
            call calcf_b2(e,e1,e2,e3,e4,vol,f_b)
            jdos=f_b
            call svec2(k1,k2,k3,e,e1,e2,e3,e4,s)
            ai1(1:6)=a(1:6,1)*s(1)+a(1:6,2)*s(2)+a(1:6,3)*s(3)
            tia(istep,1:6)=(a1+ai1)*f_b
         end do
      else
         do istep = ns2, ne2
            e = ed(istep)
            call calcf_b2(e,e1,e2,e3,e4,vol,f_b)
            jdos=f_b
            call svec2(k1,k2,k3,e,e1,e2,e3,e4,s)
            if(tintegral==EXCLUDE) then
               tintegral0 = EXCLUDE
               call check_band_energy_at_svec(tintegral0)
               if(tintegral0==EXCLUDE) cycle
            end if
            ai1(1:6)=a(1:6,1)*s(1)+a(1:6,2)*s(2)+a(1:6,3)*s(3)
            tia(istep,1:6)=(a1+ai1)*f_b
         end do
      end if
    end if

    if(ne3/=0) then
!     write(nfout,'(1x," ns3 = ",i3," ne3 = ",i3)') ns3, ne3
      if(tintegral/=EXCLUDE) then
         do istep = ns3, ne3
            e = ed(istep)
            call calcf_b3(e,e1,e2,e3,e4,vol,f_b)
            jdos=f_b
            call svec3(k1,k2,k3,e,e1,e2,e3,e4,s)
            ai1(1:6)=a(1:6,1)*s(1)+a(1:6,2)*s(2)+a(1:6,3)*s(3)
            tia(istep,1:6)=(a1+ai1)*f_b
         end do
      else
         do istep = ns3, ne3
            e = ed(istep)
            call calcf_b3(e,e1,e2,e3,e4,vol,f_b)
            jdos=f_b
            call svec3(k1,k2,k3,e,e1,e2,e3,e4,s)
            if(tintegral==EXCLUDE) then
               tintegral0 = EXCLUDE
               call check_band_energy_at_svec(tintegral0)
               if(tintegral0==EXCLUDE) cycle
            end if
            ai1(1:6)=a(1:6,1)*s(1)+a(1:6,2)*s(2)+a(1:6,3)*s(3)
            tia(istep,1:6)=(a1+ai1)*f_b
         end do
      end if
    end if
    contains
     subroutine istep_range_trimming
       implicit none
! out of range
       if(ns1>ne.or.ne3<1) then
          ns1 = 0 ; ne1 = 0
          ns2 = 0 ; ne2 = 0
          ns3 = 0 ; ne3 = 0
          return
       end if
! trimming of lower bound
       if(ns1<=1.and.ne1>=1) then
          ns1 = 1
       else if(ns2<=1.and.ne2>=1) then
          ns1 = 0 ; ne1 = 0
          ns2 = 1
       else if(ns3<=1.and.ne3>=1) then
          ns1 = 0 ; ne1 = 0
          ns2 = 0 ; ne2 = 0
          ns3 = 1
       end if
! trimming of higher bound
       if(ns1<=ne.and.ne1>=ne) then
          ne1 = ne
          ns2 = 0 ; ne2 = 0
          ns3 = 0 ; ne3 = 0
       else if(ns2<=ne.and.ne2>=ne) then
          ne2 = ne
          ns3 = 0 ; ne3 = 0
       else if(ns3<=ne.and.ne3>=ne) then
          ne3 = ne
       end if
     end subroutine istep_range_trimming

     subroutine check_trimming
       implicit none
       if(ns1>ne) then
          write(nfout,'(1x,"!* ns1 = ",i4," > ne = ",i4)') ns1, ne
          write(nfout,*) ns1, ne1
          write(nfout,*) ns2, ne2
          write(nfout,*) ns3, ne3
       end if
       if(ne1>ne) then
          write(nfout,'(1x,"!* ne1 = ",i4," > ne = ",i4)') ne1, ne
          write(nfout,*) ns1, ne1
          write(nfout,*) ns2, ne2
          write(nfout,*) ns3, ne3
       end if
       if(ns1>ne1) then
          write(nfout,'(1x,"!* ns1 = ",i4," > ne1 = ",i4)') ns1, ne1
          write(nfout,*) ns1, ne1
          write(nfout,*) ns2, ne2
          write(nfout,*) ns3, ne3
       end if
       if(ns2>ne) then
          write(nfout,'(1x,"!* ns2 = ",i4," > ne = ",i4)') ns2, ne
          write(nfout,*) ns1, ne1
          write(nfout,*) ns2, ne2
          write(nfout,*) ns3, ne3
       end if
       if(ne2>ne) then
          write(nfout,'(1x,"!* ne2 = ",i4," > ne = ",i4)') ne2, ne
          write(nfout,*) ns1, ne1
          write(nfout,*) ns2, ne2
          write(nfout,*) ns3, ne3
       end if
       if(ns2>ne2) then
          write(nfout,'(1x,"!* ns2 = ",i4," > ne2 = ",i4)') ns2, ne2
          write(nfout,*) ns1, ne1
          write(nfout,*) ns2, ne2
          write(nfout,*) ns3, ne3
       end if
       if(ns3>ne) then
          write(nfout,'(1x,"!* ns3 = ",i4," > ne = ",i4)') ns3, ne
          write(nfout,*) ns1, ne1
          write(nfout,*) ns2, ne2
          write(nfout,*) ns3, ne3
       end if
       if(ne3>ne) then
          write(nfout,'(1x,"!* ne3 = ",i4," > ne = ",i4)') ne3, ne
          write(nfout,*) ns1, ne1
          write(nfout,*) ns2, ne2
          write(nfout,*) ns3, ne3
       end if
       if(ns3>ne3) then
         write(nfout,'(1x,"!* ns3 = ",i4," > ne3 = ",i4)') ns3, ne3
          write(nfout,*) ns1, ne1
          write(nfout,*) ns2, ne2
          write(nfout,*) ns3, ne3
       end if
     end subroutine check_trimming

     subroutine calc_avec(k1,k2,k3,p1,p2,p3,p4,vinv,a,r1,r2,r3)
       implicit none
       real(DP),intent(in), dimension(3)   :: k1,k2,k3
       real(DP),intent(in), dimension(6)   :: p1,p2,p3,p4
       real(DP),intent(in)                 :: vinv
       real(DP),intent(out),dimension(6,3) :: a
       real(DP),            dimension(6)   :: d1,d2,d3
       real(DP),intent(out),dimension(3)   :: r1,r2,r3
       integer                             :: invtetra
       d1=p2-p1
       d2=p3-p1
       d3=p4-p1
! calculate r vectors
       r1(1)=(k2(2)*k3(3)-k2(3)*k3(2))
       r1(2)=(k2(3)*k3(1)-k2(1)*k3(3))
       r1(3)=(k2(1)*k3(2)-k2(2)*k3(1))
       r2(1)=(k3(2)*k1(3)-k3(3)*k1(2))
       r2(2)=(k3(3)*k1(1)-k3(1)*k1(3))
       r2(3)=(k3(1)*k1(2)-k3(2)*k1(1))
       r3(1)=(k1(2)*k2(3)-k1(3)*k2(2))
       r3(2)=(k1(3)*k2(1)-k1(1)*k2(3))
       r3(3)=(k1(1)*k2(2)-k1(2)*k2(1))
       r1=vinv*r1
       r2=vinv*r2
       r3=vinv*r3
! check r_dot_k
       call check_r_vector(r1,r2,r3,k1,k2,k3,invtetra)
! invtetra=-1 case
       if(invtetra==-1) then
          r1=-1.0d0*r1
          r2=-1.0d0*r2
          r3=-1.0d0*r3
       end if
! calculate a vector
       a(1:6,1)=d1(1:6)*r1(1)+d2(1:6)*r2(1)+d3(1:6)*r3(1)
       a(1:6,2)=d1(1:6)*r1(2)+d2(1:6)*r2(2)+d3(1:6)*r3(2)
       a(1:6,3)=d1(1:6)*r1(3)+d2(1:6)*r2(3)+d3(1:6)*r3(3)
     end subroutine calc_avec

     subroutine check_r_vector(r1,r2,r3,k1,k2,k3,invtetra)
       implicit none
! This subroutine checks r vectors
! see G. Lehmann and M. Taut, phys. stat. sol.(b) vol.54, 469 (1972)
! checks condition ri_dot_kj=1(i=j case),
!                           =0(i/=j case) (Eq.(3.3) of the paper)
! sets invtetra  invtetra= 1 (ri_dot_ki=1 case)
!                invtetra=-1 (rt_dot_ki=-1 case)
!
! T. Hamada(Univ. Tokyo) Nov. 27, 2003
! last modified Oct.25, 2007
       integer,intent(out)               :: invtetra
       integer                           :: i
       real(DP),intent(in), dimension(3) :: r1, r2, r3, k1, k2, k3
       real(DP)                          :: eps, r1_dot_k1, r1_dot_k2, r1_dot_k3, r2_dot_k2, r2_dot_k3, r3_dot_k3
       eps=10.0d0**(-7)
! check ri_dot_ki
! ri_dot_ki must be 1.0d0
        r1_dot_k1=r1(1)*k1(1)+r1(2)*k1(2)+r1(3)*k1(3)
        r2_dot_k2=r2(1)*k2(1)+r2(2)*k2(2)+r2(3)*k2(3)
        r3_dot_k3=r3(1)*k3(1)+r3(2)*k3(2)+r3(3)*k3(3)
!       if(dabs(r1_dot_k1)-1.0d0>=eps) then
!          if(printable) &
!         & write(nfout,'(1x,"!** error in linear tetrahedron.   &
!         &r1 vector is wrong.   r1_dot_k1 = ",f10.5)') r1_dot_k1
!          stop
!       end if
!       if(dabs(r2_dot_k2)-1.0d0>=eps) then
!          if(printable) &
!         & write(nfout,'(1x,"!** error in linear tetrahedron.   &
!         &r2 vector is wrong.   r2_dot_k2 = ",f10.5)') r2_dot_k2
!          stop
!       end if
!       if(dabs(r3_dot_k3)-1.0d0>=eps) then
!          if(printable) &
!         & write(nfout,'(1x,"!** error in linear tetrahedron.   &
!         &r3 vector is wrong.   r3_dot_k3 = ",f10.5)') r3_dot_k3
!          stop
!       end if

! set invtetra
        invtetra=1
        if(r1_dot_k1<0.0d0.and.r2_dot_k2<0.0d0.and.r3_dot_k3<0.0d0) invtetra=-1
     end subroutine check_r_vector

     subroutine calcf_b1(e,e0,e1,e2,e3,v,f_b)
       implicit none
! i0 term calculation
! (e.ge.e0.and.e.lt.e1) case
       real(DP),intent(in)  :: e,e0,e1,e2,e3,v
       real(DP),intent(out) :: f_b
       f_b=(e-e0)**2/((e1-e0)*(e2-e0)*(e3-e0))
       f_b=v*f_b*0.5d0
     end subroutine calcf_b1

     subroutine calcf_b2(e,e0,e1,e2,e3,v,f_b)
       implicit none
! i0 term calculation
! (e.ge.e1.and.e.lt.e2)
       real(DP),intent(in)  :: e,e0,e1,e2,e3,v
       real(DP),intent(out) :: f_b
       f_b=(e-e0)**2/((e1-e0)*(e2-e0)*(e3-e0)) &
      &-(e-e1)**2/((e1-e0)*(e2-e1)*(e3-e1))
       f_b=v*f_b*0.5d0
     end subroutine calcf_b2

     subroutine calcf_b3(e,e0,e1,e2,e3,v,f_b)
       implicit none
! i0 term calculation
! (e.ge.e2.and.e.lt.e3)
       real(DP),intent(in)  :: e,e0,e1,e2,e3,v
       real(DP),intent(out) :: f_b
       f_b=(e-e3)**2/((e3-e0)*(e3-e1)*(e3-e2))
       f_b=v*f_b*0.5d0
     end subroutine calcf_b3

     subroutine svec1(k1,k2,k3,e,e0,e1,e2,e3,s)
       implicit none
! s vector calculation
! (e.ge.e0.and.e.lt.e1) case
       real(DP),intent(in)               :: e, e0, e1, e2, e3
       real(DP),intent(in), dimension(3) :: k1, k2, k3
       real(DP),intent(out),dimension(3) :: s
       real(DP),dimension(3)             :: k0, sum
! clean arrays
       k0=0.0d0
       s=0.0d0
       sum=0.0d0
       sum = (k1-k0)/(e1-e0) + (k2-k0)/(e2-e0) + (k3-k0)/(e3-e0)
       s = k0 + sum*(e-e0)/3.0d0
     end subroutine svec1

     subroutine svec2(k1,k2,k3,e,e0,e1,e2,e3,s)
       implicit none
! s vector calculation
! (e.ge.e1.and.e.lt.e2)
       real(DP),intent(in)               :: e, e0, e1, e2, e3
       real(DP),intent(in), dimension(3) :: k1, k2, k3
       real(DP),intent(out),dimension(3) :: s
       real(DP),dimension(3)             :: k0,s0,s1,s3, sum
       real(DP)                          :: f0,f1
       integer                           :: i
! clean arrays
       k0=0.0d0
       s=0.0d0 ; s0=0.0d0 ; s1 =0.0d0 ; s3 =0.0d0
       sum =0.0d0
       sum = (k1-k0)/(e1-e0) + (k2-k0)/(e2-e0) + (k3-k0)/(e3-e0)
       s0 = k0 +sum*(e1-e0)/3.0d0
       s0 = s0*(e-e2)/(e1-e2)
       sum=0.0d0
       sum = (k0-k3)/(e0-e3) + (k1-k3)/(e1-e3) + (k2-k3)/(e2-e3)
       s3 = k3 + sum*(e2-e3)/3.0d0
       s3 = s3*(e-e1)/(e2-e1)
       s = s0 + s3
     end subroutine svec2

     subroutine svec3(k1,k2,k3,e,e0,e1,e2,e3,s)
       implicit none
! s vector calculation
! (e.ge.e2.and.e.lt.e3) case
       real(DP),intent(in)               :: e, e0, e1, e2, e3
       real(DP),intent(in), dimension(3) :: k1, k2, k3
       real(DP),intent(out),dimension(3) :: s
       real(DP),dimension(3)             :: k0,sum
! clean arrays
       k0=0.0d0
       s=0.0d0
       sum=0.0d0
       sum=(k0-k3)/(e0-e3)+(k1-k3)/(e1-e3) + (k2-k3)/(e2-e3)
       s = k3 + sum*(e-e3)/3.0d0
     end subroutine svec3

     subroutine check_band_energy_at_svec(tintegral)
       implicit none
       integer,intent(inout)      :: tintegral
       integer                    :: i, j
       real(kind=DP),dimension(3) :: bi, bf
       real(kind=DP)              :: ei_at_svec, ef_at_svec, bi_dot_s, bf_dot_s
       if(tintegral/=EXCLUDE) stop
       bi=(ebi(2)-ebi(1))*r1+(ebi(3)-ebi(1))*r2+(ebi(4)-ebi(1))*r3
       bf=(ebf(2)-ebf(1))*r1+(ebf(3)-ebf(1))*r2+(ebf(4)-ebf(1))*r3
       bi_dot_s = bi(1)*s(1)+bi(2)*s(2)+bi(3)*s(3)
       bf_dot_s = bf(1)*s(1)+bf(2)*s(2)+bf(3)*s(3)
       ei_at_svec  = ebi(1) + bi_dot_s
       ef_at_svec  = ebf(1) + bf_dot_s
       if(ei_at_svec<=efermi.and.ef_at_svec>efermi) then
          tintegral=INCLUDE
       end if
     end subroutine check_band_energy_at_svec
 end subroutine nstrans

 subroutine calc_imeps_for_poly
    implicit none
    integer       :: l
    real(kind=DP) :: sum
! subroutine calculating Imaginary part of Dielectric function for polycrystal
! imeps(l,1:3)= (imeps(l,1)+imeps(l,2)+imeps(l,3))/3
! T. Hamada (Univ. Tokyo) 2004.4.5
    do l=1,nstep
       sum=0.0d0
       sum=(imeps(l,1)+imeps(l,2)+imeps(l,3))/3.0d0
       imeps(l,1)=sum
       imeps(l,2)=sum
       imeps(l,3)=sum
       imeps(l,4:6)=0.0d0
    end do
 end subroutine calc_imeps_for_poly

 subroutine kkt_v(e_step,n)

!   kramers-Kronig transformation
!   e:     photon energy
!   delta: photon energy interval
!   imeps: imaginary part of dielectric function
!   reps:  real part of dielectric function
!   T. Hamada, IIS, Univ. Tokyo, June 28, 2003; Aug. 29, 2003
!
    implicit none
    integer               :: i, j, k, l, n
    real(DP)              :: e_step
    real(DP)              :: fac,fac1
    real(DP),dimension(6) :: sum
    fac1=e_step/PAI
! perform KKT
    do  l=1,n
      reps(l,1:6)=0.0d0
      sum=0.0d0
      do  k=1,n
         if(l==k) cycle
         fac=e(k)/(e(k)**2-e(l)**2)
         sum(1:6)=sum(1:6)+fac*imeps(k,1:6)
      end do
      reps(l,1:6)=1.0d0+sum(1:6)*2*fac1
    end do
    if(printable) &
    & write(nfout,'(1x,"!* real part of dielectric function is obtained ")')
 end subroutine kkt_v 

 subroutine kkt_v_mpi(e_step,n)

!   MPI parallelized kramers-Kronig transformation
!   e:     photon energy
!   delta: photon energy interval
!   imeps: imaginary part of dielectric function
!   reps:  real part of dielectric function
!   T. Hamada, IIS, Univ. TOkyo, May 23, 2007
!
    implicit none
    integer                  :: i, j, k, l, n
    integer                  :: ipes
    integer                  :: local_n, local_n0, local_l, lstart, nl, lst, led
    real(DP)                 :: e_step
    real(DP)                 :: fac,fac1
    real(DP),dimension(6)    :: sum
    real(DP), allocatable, dimension (:,:) :: reps_wk

    if(printable) write(nfout,'(1x," npes = ",i4)') npes
    if(printable) write(nfout,'(1x," n = ",i5)') n

    local_n = n/npes 
    local_n0 = local_n + (n - local_n*npes)
  
    allocate(reps_wk(local_n0,6))
    reps_wk = 0.0d0

! perform KKT
    call mpi_bcast(imeps,n*6,mpi_double_precision,0,mpi_comm_group,ierr)
    reps = 0.0d0
    fac1=e_step/PAI

    if(mype == 0) then
      lstart = 1
      nl = local_n0
    else
      lstart = 1 + (mype-1)*local_n + local_n0
      nl = local_n
    end if

    do local_l = 1, nl
       sum=0.0d0
       l= lstart + local_l - 1
       do  k=1,n
          if(l==k) cycle
          fac=e(k)/(e(k)**2-e(l)**2)
          sum(1:6)=sum(1:6)+fac*imeps(k,1:6)
       end do
       reps_wk(local_l,1:6)=1.0d0+sum(1:6)*2*fac1
    end do

    if(mype == 0) then                                                                     
       reps(1:local_n0,1:6) = reps_wk(1:local_n0,1:6)
       if ( ipriepsilon >=2 ) then
          write(nfout,'(1x,"!*MPI",i5," data have been processed by ipes = 0")') local_n0
       endif
       do ipes = 1, npes-1
          call mpi_recv(reps_wk,local_n0*6,mpi_double_precision,ipes,1,mpi_comm_group,istatus,ierr) 
          lst = 1 + (ipes-1)*local_n + local_n0                                           
          led  =  lst + local_n - 1                                                    
          reps(lst:led,1:6) = reps_wk(1:local_n,1:6)                                      
          if ( ipriepsilon >=2 ) then
             write(nfout,'(1x,"!*MPI",i5," data have been received from ipes =",i4)') local_n,ipes
          endif
       end do                                                                             
    else                                                                                 
       call mpi_send(reps_wk,local_n0*6,mpi_double_precision,0,1,mpi_comm_group,ierr)    
    end if
    deallocate(reps_wk)

    if(printable) &
    & write(nfout,'(1x,"!* real part of dielectric function is obtained ")')
 end subroutine kkt_v_mpi

 subroutine optics(nstep,indxy)
    implicit none
!
!   calculate optical properties
!   n:    real part of refractive index
!   k:    imaginary part of refractive index
!   absc: absorption coefficient
!   refl: reflection coefficient

    integer,intent(in) :: nstep
    integer            :: i,indxy
    real(DP)           :: ereal, eimag, eabs, theta, theta_2, n, k
!    real(DP)           :: speed_of_light = 2.99792458d8 ! m/sec
!    real(DP)           :: au_of_velocity = 2.18769126d6 ! m/sec
    real(DP)           :: au_of_speed_of_light
    au_of_speed_of_light = speed_of_light/au_of_velocity
    do i=1, nstep
       ereal=reps(i,indxy)
       eimag=imeps(i,indxy)
       eabs=dsqrt(ereal**2+eimag**2)
       theta = ereal/eabs
       if(theta>=1.d0) theta=1.d0
       if(theta<=-1.d0) theta=-1.d0
       theta=dacos(theta)
       theta_2=theta/2.0d0
       n=dsqrt(eabs)*dcos(theta_2)
       k=dsqrt(eabs)*dsin(theta_2)
       refr(i)=n
       refi(i)=k
       absc(i)=2.0d0*e(i)*k/au_of_speed_of_light
       reflc(i)=((n-1.0d0)**2+k**2)/((n+1.0d0)**2+k**2)
    end do
    if(printable) then
       write(nfout,'(1x,"!* complex refractive index is calculated")')
       write(nfout,'(1x,"!* optical absorption spectra is calculated")')
       write(nfout,'(1x,"!* reflection coefficient is calculated")')
    end if
 end subroutine optics

 subroutine off_diagonal(nstep)
    implicit none
 ! correction for off-diagonal real part of epsilon
 ! Tomoyuki Hamada (Univ. Tokyo) May 24, 2003; Aug. 29, 2003
    integer, intent(in) :: nstep
    reps(1:nstep,4:6)=reps(1:nstep,4:6)-1.0d0
 end subroutine off_diagonal

 subroutine full_bz_int(nstep,nsym)
    implicit none
! generate transition moment integral over full Brillouin zone
! T. Hamada(Univ. Tokyo) June 14, 2003; July 30, 2003
    integer,intent(in)                         :: nstep, nsym
    integer                                    :: i, j
    real(kind=DP), dimension(3,3)              :: tint 

    do i=1, nstep
       tint(1,1)=imeps(i,1)
       tint(2,2)=imeps(i,2)
       tint(3,3)=imeps(i,3)
       tint(1,2)=imeps(i,4)
       tint(1,3)=imeps(i,5)
       tint(2,3)=imeps(i,6)
       tint(3,1)=tint(1,3)
       tint(3,2)=tint(2,3)

       if(magneto_optical==0) call full_bz_int_core(tint,nsym)

       imeps(i,1)=tint(1,1)
       imeps(i,2)=tint(2,2)
       imeps(i,3)=tint(3,3)
       imeps(i,4)=tint(1,2)
       imeps(i,5)=tint(1,3)
       imeps(i,6)=tint(2,3)
    end do     
 
    if(printable) then
       write(nfout,'(1x,"!* Transition moment square integral over full Brillouin zone is created")')
       if(nsym/=0) write(nfout,'(1x,"!* The integral has Brillouin zone symmetry")')
    end if
    contains
     subroutine full_bz_int_core(tint,nsym)
       implicit none
!  symmetrize dielectric matrix by using the rotation matrix os(3,3,nopr)
       integer,intent(in)                         :: nsym
       integer                                    :: iopr
       real(kind=DP),intent(inout),dimension(3,3) :: tint
       real(kind=DP),              dimension(3,3) :: u,tu
       real(kind=DP),              dimension(3,3) :: wk1,wk2,wk3
! initialize working matrices
       wk1=0.0d0
       wk2=0.0d0
       wk3=0.0d0
! symmetrize tint
       if(nsym/=0) then
          do iopr=1,nopr
             u(1:3,1:3)=op(1:3,1:3,iopr)
             tu=transpose(u)
             wk1=matmul(tint,tu)
             wk2=matmul(u,wk1)
             wk3=wk3+wk2
          end do 
          tint=wk3/real(nopr,kind=DP)
       end if
     end subroutine full_bz_int_core
 end subroutine full_bz_int

!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
!!!! added by mizouchi@adv 2003/03/11   !!!!!!!!!!!
!!!! The contributions from the non-local potential term !!!
!!!! are calculated.                                     !!!
!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!

 subroutine vnl_prepare_ek(DELQ)
!
!   calculate Read and Needs correction terms
!
    integer                                  :: id_sname = -1
    integer                                  :: ispin, it,lmt1, lmt2, il1, im1, il2, im2, ia
    integer                                  :: ik ,ib,ib1,p,p1,ii
    real(kind=DP),allocatable,dimension(:,:,:)   :: wkfsr, wkfsi
    real(kind=DP),allocatable,dimension(:,:,:,:) :: wkrfsr, wkrfsi
    real(DP),intent(in)                      :: DELQ
    real(kind=DP)                            :: fac, eib, eib1
#ifdef NEC_TUNE
    integer :: iv, ic, ifind, max_ncount, ncount
    real(kind=DP), allocatable, dimension(:,:,:) :: workarray
    real(kind=DP), allocatable, dimension(:,:) :: wkr1, wkr2, wkr3, wkr4, wki1, wki2, wki3, wki4
#endif
    integer :: iadd
    call tstatc0_begin('vnl_prepare_ek ',id_sname)
!!!!!!!!!!!!!!!!! added by mizouchi 2003/03/18 !!!!!!!!!!!
    allocate(rfsr_l(np_e,np_fs,ista_k:iend_k,3))
    allocate(rfsi_l(np_e,np_fs,ista_k:iend_k,3))
    allocate(rfsr_plus_l(np_e,np_fs,ista_k:iend_k,3))
    allocate(rfsi_plus_l(np_e,np_fs,ista_k:iend_k,3))
    allocate(rfsr_minus_l(np_e,np_fs,ista_k:iend_k,3))
    allocate(rfsi_minus_l(np_e,np_fs,ista_k:iend_k,3))
#ifdef NEC_TUNE
    allocate(workarray(np_e,neg,2))
    max_ncount = 0
    do it = 1, ntyp
       do lmt1 = 1, ilmt(it)
          do lmt2 = 1, ilmt(it)
             do ia = ista_atm, iend_atm
                if(ityp(ia) /= it) cycle
                max_ncount = max_ncount + 1
             end do
          end do
       end do
    end do
    allocate(wkr1(np_e,max_ncount))
    allocate(wkr2(neg,max_ncount))
    allocate(wkr3(neg,max_ncount))
    allocate(wkr4(np_e,max_ncount))
    allocate(wki1(neg,max_ncount))
    allocate(wki2(np_e,max_ncount))
    allocate(wki3(neg,max_ncount))
    allocate(wki4(np_e,max_ncount))
#endif

    call get_snl_pm_ek(DELQ)
!!!!!!!!!!!!!!!!! added by mizouchi 2003/03/18 !!!!!!!!!!!


    Loop_spin: do ispin = 1, nspin, af+1
       Loop_kpoints: do ik = ispin, kv3-nspin+ispin, nspin
          if(map_k(ik) /= myrank_k) cycle              ! MPI
          call m_ES_betar_r_WFs_4_each_k(DELQ,nfout,ik)   !  -> rfsr_l,rfsi_l
              end do Loop_kpoints
    end do Loop_spin


!!!!!!!!!!!!!!!!! added by mizouchi 2003/03/18 !!!!!!!!!!!
    deallocate(rfsr_plus_l)
    deallocate(rfsi_plus_l)
    deallocate(rfsr_minus_l)
    deallocate(rfsi_minus_l)
!!!!!!!!!!!!!!!!! added by mizouchi 2003/03/18 !!!!!!!!!!!
    deallocate(snl_plus)
    deallocate(snl_minus)

    allocate(wkfsr(neg,nlmta,ista_k:iend_k));    allocate(wkfsi(neg,nlmta,ista_k:iend_k))
    allocate(wkrfsr(neg,nlmta,ista_k:iend_k,3));    allocate(wkrfsi(neg,nlmta,ista_k:iend_k,3))

    if(npes>=2) call mpi_barrier(mpi_comm_group,ierr)

    wkfsr  = 0.0d0
    wkfsi  = 0.0d0
    wkrfsr = 0.0d0
    wkrfsi = 0.0d0
    do ik = 1, kv3, af+1
       if(map_k(ik) /= myrank_k) cycle
       do ib = 1, np_e
          do it = ista_fs, iend_fs
             iadd = it - ista_fs + 1
             wkfsr(neg_g(ib),it,ik) = fsr_l(ib,iadd,ik)
             wkfsi(neg_g(ib),it,ik) = fsi_l(ib,iadd,ik)
             do ii = 1, 3
                wkrfsr(neg_g(ib),it,ik,ii) = rfsr_l(ib,iadd,ik,ii)
                wkrfsi(neg_g(ib),it,ik,ii) = rfsi_l(ib,iadd,ik,ii)
             end do
          end do
       end do
    end do
    call mpi_allreduce(MPI_IN_PLACE,wkfsr, size(wkfsr), MPI_DOUBLE_PRECISION,MPI_SUM,mpi_k_world(myrank_k),ierr)
    call mpi_allreduce(MPI_IN_PLACE,wkfsi, size(wkfsi), MPI_DOUBLE_PRECISION,MPI_SUM,mpi_k_world(myrank_k),ierr)
    call mpi_allreduce(MPI_IN_PLACE,wkrfsr,size(wkrfsr),MPI_DOUBLE_PRECISION,MPI_SUM,mpi_k_world(myrank_k),ierr)
    call mpi_allreduce(MPI_IN_PLACE,wkrfsi,size(wkrfsi),MPI_DOUBLE_PRECISION,MPI_SUM,mpi_k_world(myrank_k),ierr)
    
    rtrans = 0.d0

#ifndef NEC_TUNE
    do ii = 1,3
       do ispin = 1, nspin, af+1
          do ik = ispin, kv3-nspin+ispin, nspin

! ================ KT_add ============= 13.0S
             if ( sw_corelevel_spectrum == ON ) then
                do ib = 1, neg
                   do ib1 = 1, num_core_states
                      if(nrd_efermi==1) then
                         eib=e2_mpi(ib,ik)
                         eib1 = ene_core_states(ib1)
                         if(eib.gt.efermi) call calc_rtrans_core2val(ib,ib1)
                      else
                         call calc_rtrans_core2val(ib,ib1)
                      end if
                   end do
                end do
             else
! ===================================== 13.0S
                do ib = 1, neg
                   do ib1 = 1, neg
                      if(nrd_efermi==1) then
                         eib=e2_mpi(ib,ik)
                         eib1=e2_mpi(ib1,ik)
                         if(eib.gt.efermi.and.eib1.le.efermi) call calc_rtrans(ib,ib1)
                      else
                         if(ib/=ib1) call calc_rtrans(ib,ib1)
                      end if
                   end do
                end do
! ===================== KT_add ======== 13.0S
             endif
! ===================================== 13.0S
          end do
       end do
    end do
#else
    if(nrd_efermi==1) then
       do ii = 1,3
          do ispin = 1, nspin, af+1
             do ik = ispin, kv3-nspin+ispin, nspin
                if(map_k(ik) /= myrank_k) cycle
                wkr1 = 0.0d0; wkr2 = 0.0d0; wkr3 = 0.0d0; wkr4 = 0.0d0
                wki1 = 0.0d0; wki2 = 0.0d0; wki3 = 0.0d0; wki4 = 0.0d0
                do ib = 1, np_e
                   iadd = ista_e + ib - 1
                   ncount = 0
                   do it = 1, ntyp
                      do lmt1 = 1, ilmt(it)
                         do lmt2 = 1, ilmt(it)
                            do ia = ista_atm, iend_atm
                               if(ityp(ia) /= it) cycle
                               p = lmta(lmt1,ia)
                               p1 = lmta(lmt2,ia)
                               fac=real(iwei(ia),kind=DP)
                               ncount = ncount + 1
                               wkr1(ib,ncount) = fac*dion(lmt1,lmt2,it)*wkfsr(iadd,p,ik)
                               wkr4(ib,ncount) =                        wkrfsr(iadd,p,ik,ii)
                               wki2(ib,ncount) = fac*dion(lmt1,lmt2,it)*wkfsi(iadd,p,ik)
                               wki4(ib,ncount) =                        wkrfsi(iadd,p,ik,ii)
                            end do
                         end do
                      end do
                   end do
                end do
                do ib = 1, neg
                   ncount = 0
                   do it = 1, ntyp
                      do lmt1 = 1, ilmt(it)
                         do lmt2 = 1, ilmt(it)
                            do ia = ista_atm, iend_atm
                               if(ityp(ia) /= it) cycle
                               p = lmta(lmt1,ia)
                               p1 = lmta(lmt2,ia)
                               fac=real(iwei(ia),kind=DP)
                               ncount = ncount + 1
                               wkr2(ib,ncount) =                        wkrfsr(ib,p1,ik,ii)
                               wkr3(ib,ncount) = fac*dion(lmt1,lmt2,it)*wkfsr(ib,p1,ik)
                               wki1(ib,ncount) =                        wkrfsi(ib,p1,ik,ii)
                               wki3(ib,ncount) = fac*dion(lmt1,lmt2,it)*wkfsi(ib,p1,ik)
                            end do
                         end do
                      end do
                   end do
                end do
                call dgemm('N','T',np_e,neg,max_ncount, 1.0d0,wkr1,np_e,wkr2,neg,0.0d0,workarray(1,1,1),np_e)
                call dgemm('N','T',np_e,neg,max_ncount, 1.0d0,wki2,np_e,wki1,neg,1.0d0,workarray(1,1,1),np_e)
                call dgemm('N','T',np_e,neg,max_ncount,-1.0d0,wkr4,np_e,wkr3,neg,1.0d0,workarray(1,1,1),np_e)
                call dgemm('N','T',np_e,neg,max_ncount,-1.0d0,wki4,np_e,wki3,neg,1.0d0,workarray(1,1,1),np_e)
                call dgemm('N','T',np_e,neg,max_ncount, 1.0d0,wkr1,np_e,wki1,neg,0.0d0,workarray(1,1,2),np_e)
                call dgemm('N','T',np_e,neg,max_ncount,-1.0d0,wki2,np_e,wkr2,neg,1.0d0,workarray(1,1,2),np_e)
                call dgemm('N','T',np_e,neg,max_ncount,-1.0d0,wkr4,np_e,wki3,neg,1.0d0,workarray(1,1,2),np_e)
                call dgemm('N','T',np_e,neg,max_ncount, 1.0d0,wki4,np_e,wkr3,neg,1.0d0,workarray(1,1,2),np_e)
                do ib = 1, np_e
                   iadd = ista_e + ib - 1
                   do ib1 = 1, neg
                      eib=e2_mpi(iadd,ik)
                      eib1=e2_mpi(ib1,ik)
                      if(eib.gt.efermi.and.eib1.le.efermi) then
                         call find_ind_vb_and_cb2(ib1,iadd,iv,ic,nk_in_the_process+ik-1,ifind)
                         if(ifind==0.and.printable) then
                            write(nfout,'(1x,"!!* valence or conduction band index is not found&
                               &   UVSOR-Epsilon STOP at calc_rtrans")')
                         end if
                         rtrans(ik,ic,iv,ii,1) = workarray(ib,ib1,1)
                         rtrans(ik,ic,iv,ii,2) = workarray(ib,ib1,2)
                      end if
                   end do
                end do
             end do
          end do
       end do
    else
       do ii = 1,3
          do ispin = 1, nspin, af+1
             do ik = ispin, kv3-nspin+ispin, nspin
                if(map_k(ik) /= myrank_k) cycle
                wkr1 = 0.0d0; wkr2 = 0.0d0; wkr3 = 0.0d0; wkr4 = 0.0d0
                wki1 = 0.0d0; wki2 = 0.0d0; wki3 = 0.0d0; wki4 = 0.0d0
                do ib = 1, np_e
                   iadd = ista_e + ib - 1
                   ncount = 0
                   do it = 1, ntyp
                      do lmt1 = 1, ilmt(it)
                         do lmt2 = 1, ilmt(it)
                            do ia = ista_atm, iend_atm
                               if(ityp(ia) /= it) cycle
                               p = lmta(lmt1,ia)
                               p1 = lmta(lmt2,ia)
                               fac=real(iwei(ia),kind=DP)
                               ncount = ncount + 1
                               wkr1(ib,ncount) = fac*dion(lmt1,lmt2,it)*wkfsr(iadd,p,ik)
                               wkr4(ib,ncount) =                        wkrfsr(iadd,p,ik,ii)
                               wki2(ib,ncount) = fac*dion(lmt1,lmt2,it)*wkfsi(iadd,p,ik)
                               wki4(ib,ncount) =                        wkrfsi(iadd,p,ik,ii)
                            end do
                         end do
                      end do
                   end do
                end do
                do ib = 1, neg
                   ncount = 0
                   do it = 1, ntyp
                      do lmt1 = 1, ilmt(it)
                         do lmt2 = 1, ilmt(it)
                            do ia = ista_atm, iend_atm
                               if(ityp(ia) /= it) cycle
                               p = lmta(lmt1,ia)
                               p1 = lmta(lmt2,ia)
                               fac=real(iwei(ia),kind=DP)
                               ncount = ncount + 1
                               wkr2(ib,ncount) =                        wkrfsr(ib,p1,ik,ii)
                               wkr3(ib,ncount) = fac*dion(lmt1,lmt2,it)*wkfsr(ib,p1,ik)
                               wki1(ib,ncount) =                        wkrfsi(ib,p1,ik,ii)
                               wki3(ib,ncount) = fac*dion(lmt1,lmt2,it)*wkfsi(ib,p1,ik)
                            end do
                         end do
                      end do
                   end do
                end do
                call dgemm('N','T',np_e,neg,max_ncount, 1.0d0,wkr1,np_e,wkr2,neg,0.0d0,workarray(1,1,1),np_e)
                call dgemm('N','T',np_e,neg,max_ncount, 1.0d0,wki2,np_e,wki1,neg,1.0d0,workarray(1,1,1),np_e)
                call dgemm('N','T',np_e,neg,max_ncount,-1.0d0,wkr4,np_e,wkr3,neg,1.0d0,workarray(1,1,1),np_e)
                call dgemm('N','T',np_e,neg,max_ncount,-1.0d0,wki4,np_e,wki3,neg,1.0d0,workarray(1,1,1),np_e)
                call dgemm('N','T',np_e,neg,max_ncount, 1.0d0,wkr1,np_e,wki1,neg,0.0d0,workarray(1,1,2),np_e)
                call dgemm('N','T',np_e,neg,max_ncount,-1.0d0,wki2,np_e,wkr2,neg,1.0d0,workarray(1,1,2),np_e)
                call dgemm('N','T',np_e,neg,max_ncount,-1.0d0,wkr4,np_e,wki3,neg,1.0d0,workarray(1,1,2),np_e)
                call dgemm('N','T',np_e,neg,max_ncount, 1.0d0,wki4,np_e,wkr3,neg,1.0d0,workarray(1,1,2),np_e)
                do ib = 1, np_e
                   iadd = ista_e + ib - 1
                   do ib1 = 1, neg
                      if(iadd/=ib1) then
                         call find_ind_vb_and_cb2(ib1,iadd,iv,ic,nk_in_the_process+ik-1,ifind)
                         if(ifind==0.and.printable) then
                            write(nfout,'(1x,"!!* valence or conduction band index is not found&
                               &   UVSOR-Epsilon STOP at calc_rtrans")')
                         end if
                         rtrans(ik,ic,iv,ii,1) = workarray(ib,ib1,1)
                         rtrans(ik,ic,iv,ii,2) = workarray(ib,ib1,2)
                      end if
                   end do
                end do
             end do
          end do
       end do
    end if
#endif

    call mpi_allreduce(MPI_IN_PLACE,rtrans,size(rtrans),MPI_DOUBLE_PRECISION,MPI_SUM,mpi_comm_group,ierr)
    rtrans = rtrans*(af+1)

!!!!!!!!!!!!!!!!! added by mizouchi 2003/03/18 !!!!!!!!!!!
    deallocate(rfsr_l)
    deallocate(rfsi_l)
!!!!!!!!!!!!!!!!! added by mizouchi 2003/03/18 !!!!!!!!!!!


    deallocate(wkfsr); deallocate(wkfsi)
    deallocate(wkrfsr); deallocate(wkrfsi)
#ifdef NEC_TUNE
    deallocate(workarray)
    deallocate(wkr1)
    deallocate(wkr2)
    deallocate(wkr3)
    deallocate(wkr4)
    deallocate(wki1)
    deallocate(wki2)
    deallocate(wki3)
    deallocate(wki4)
#endif
    call tstatc0_end(id_sname)

    contains
     subroutine calc_rtrans(ib,ib1)
       implicit none
       integer,intent(in) :: ib, ib1
       integer            :: iv, ic, ifind
       call find_ind_vb_and_cb2(ib1,ib,iv,ic,nk_in_the_process+ik-1,ifind)
       if(ifind==0.and.printable) then
          write(nfout,'(1x,"!!* valence or conduction band index is not found   UVSOR-Epsilon STOP at calc_rtrans")')
       end if
       do it = 1, ntyp
          do lmt1 = 1, ilmt(it)
             do lmt2 = 1, ilmt(it)
                do ia = 1, natm
                   if(ityp(ia) /= it) cycle
                   p = lmta(lmt1,ia)
                   p1 = lmta(lmt2,ia)
                   fac=real(iwei(ia),kind=DP)
                   rtrans(ik,ic,iv,ii,1)  = rtrans(ik,ic,iv,ii,1) &
                       & + fac*dion(lmt1,lmt2,it) &
                       &  *(wkfsr(ib,p,ik)*wkrfsr(ib1,p1,ik,ii) + wkfsi(ib,p,ik)*wkrfsi(ib1,p1,ik,ii) &
                       &  - wkfsr(ib1,p1,ik)*wkrfsr(ib,p,ik,ii) - wkfsi(ib1,p1,ik)*wkrfsi(ib,p,ik,ii))
                   rtrans(ik,ic,iv,ii,2) = rtrans(ik,ic,iv,ii,2) &
                       & + fac*dion(lmt1,lmt2,it) &
                       &  *(wkfsr(ib,p,ik)*wkrfsi(ib1,p1,ik,ii) - wkfsi(ib,p,ik)*wkrfsr(ib1,p1,ik,ii) &
                       &  - wkfsi(ib1,p1,ik)*wkrfsr(ib,p,ik,ii) + wkfsr(ib1,p1,ik)*wkrfsi(ib,p,ik,ii))
                end do
             end do
          end do
       end do
     end subroutine calc_rtrans

! ====================== KT_add ================== 13.0S
     subroutine calc_rtrans_core2val(ib,ib1)
       implicit none
       integer,intent(in) :: ib, ib1
       integer            :: iv, ic, ifind

       call find_ind_cb_only2(ib,ic,nk_in_the_process+ik-1,ifind)
       iv = ib1

       if(ifind==0.and.printable) then
          write(nfout,'(1x,"!!* conduction band index is not found   UVSOR-Epsilon STOP at calc_rtrans")')
       end if

       ia = atom_to_probe
       it = ityp(ia)
       im2 = ib1

       do lmt1 = 1, ilmt(it)
          do lmt2 = 1, ilmt(it)
             p = lmta(lmt1,ia)
             p1 = im2

             fac=real(iwei(ia),kind=DP)
             rtrans(ik,ic,iv,ii,1)  = rtrans(ik,ic,iv,ii,1) &
                  & + fac*dion(lmt1,lmt2,it) &
                  &  *(wkfsr(ib,p,ik)*1.0D0 + wkfsi(ib,p,ik)*0.0d0 &
                  &  - 1.0D0 *wkrfsr(ib,p,ik,ii) -0.d0*wkrfsi(ib,p,ik,ii) )

             rtrans(ik,ic,iv,ii,2) = rtrans(ik,ic,iv,ii,2) &
                  & + fac*dion(lmt1,lmt2,it) &
                  &  *(wkfsr(ib,p,ik)*0.0D0 -wkfsi(ib,p,ik)*1.0D0 &
                  &  -0.0d0*wkrfsr(ib,p,ik,ii) +1.0D0*wkrfsi(ib,p,ik,ii) )
          end do
       end do
     end subroutine calc_rtrans_core2val
! ========================================== 13.0S

 end subroutine vnl_prepare_ek

! ======================== Added by K. Tagami ================ 0.2
 subroutine dhub_prepare_ek(DELQ)
!                                 Reuse of vnl_prepare_ek
!  ==================================================
    integer                                  :: id_sname = -1
    integer                                  :: ispin, it,lmt1, lmt2, il1, im1, il2, im2, ia
    integer                                  :: ik ,ib,ib1,p,p1,ii
    real(kind=DP),allocatable,dimension(:,:,:)   :: wkfsr, wkfsi
    real(kind=DP),allocatable,dimension(:,:,:,:) :: wkrfsr, wkrfsi
    real(DP),intent(in)                      :: DELQ
    real(kind=DP)                            :: fac, eib, eib1
! -----
    call tstatc0_begin('dhub_prepare_ek ',id_sname)
!
    allocate(rfsr_l(np_e,nlmta,ista_k:iend_k,3))
    allocate(rfsi_l(np_e,nlmta,ista_k:iend_k,3))
    allocate(rfsr_plus_l(np_e,nlmta,ista_k:iend_k,3))
    allocate(rfsi_plus_l(np_e,nlmta,ista_k:iend_k,3))
    allocate(rfsr_minus_l(np_e,nlmta,ista_k:iend_k,3))
    allocate(rfsi_minus_l(np_e,nlmta,ista_k:iend_k,3))

    call get_snl_pm_ek(DELQ)

    Loop_spin: do ispin = 1, nspin, af+1
       Loop_kpoints: do ik = ispin, kv3-nspin+ispin, nspin
          if(map_k(ik) /= myrank_k) cycle              ! MPI
          call m_ES_betar_r_WFs_4_each_k(DELQ,nfout,ik)   !  -> rfsr_l,rfsi_l
              end do Loop_kpoints
    end do Loop_spin

    deallocate(rfsr_plus_l)
    deallocate(rfsi_plus_l)
    deallocate(rfsr_minus_l)
    deallocate(rfsi_minus_l)
    deallocate(snl_plus)
    deallocate(snl_minus)

    allocate(wkfsr(neg,nlmta,kv3));    allocate(wkfsi(neg,nlmta,kv3))
    allocate(wkrfsr(neg,nlmta,kv3,3));    allocate(wkrfsi(neg,nlmta,kv3,3))

    if(npes>=2) call mpi_barrier(mpi_comm_group,ierr)

    do ik = 1, kv3, af+1
       do ib = 1, neg
          if(map_ek(ib,ik) == mype) then
               do it = 1, nlmta
                  wkfsr(ib,it,ik) = fsr_l(map_z(ib),it,ik)
                  wkfsi(ib,it,ik) = fsi_l(map_z(ib),it,ik)
                  do ii = 1, 3
                      wkrfsr(ib,it,ik,ii) = rfsr_l(map_z(ib),it,ik,ii)
                      wkrfsi(ib,it,ik,ii) = rfsi_l(map_z(ib),it,ik,ii)
                  end do
               end do
               if(map_ek(ib,ik) /= 0) then
                   call mpi_send(wkfsr,neg*nlmta*kv3,mpi_double_precision,0,1,mpi_comm_group,ierr) ! MPI
                   call mpi_send(wkfsi,neg*nlmta*kv3,mpi_double_precision,0,1,mpi_comm_group,ierr) ! MPI
                   call mpi_send(wkrfsr,neg*nlmta*kv3*3,mpi_double_precision,0,1,mpi_comm_group,ierr) ! MPI
                   call mpi_send(wkrfsi,neg*nlmta*kv3*3,mpi_double_precision,0,1,mpi_comm_group,ierr) ! MPI
               end if
         else if(mype == 0 .and. map_ek(ib,ik) /= 0) then
            call mpi_recv(wkfsr,neg*nlmta*kv3,mpi_double_precision,map_ek(ib,ik),1,mpi_comm_group,istatus,ierr)!MPI
            call mpi_recv(wkfsi,neg*nlmta*kv3,mpi_double_precision,map_ek(ib,ik),1,mpi_comm_group,istatus,ierr)!MPI
            call mpi_recv(wkrfsr,neg*nlmta*kv3*3,mpi_double_precision,map_ek(ib,ik),1,mpi_comm_group,istatus,ierr)!MPI
            call mpi_recv(wkrfsi,neg*nlmta*kv3*3,mpi_double_precision,map_ek(ib,ik),1,mpi_comm_group,istatus,ierr)!MPI
         end if
         if(npes >= 2)  then
              call mpi_bcast(wkfsr,neg*nlmta*kv3,mpi_double_precision,0,mpi_comm_group,ierr)!MPI
              call mpi_bcast(wkfsi,neg*nlmta*kv3,mpi_double_precision,0,mpi_comm_group,ierr)!MPI
              call mpi_bcast(wkrfsr,neg*nlmta*kv3*3,mpi_double_precision,0,mpi_comm_group,ierr)!MPI
              call mpi_bcast(wkrfsi,neg*nlmta*kv3*3,mpi_double_precision,0,mpi_comm_group,ierr)!MPI
         end if
       end do
    end do
    
    rtrans_hub = 0.d0

    do ii = 1,3
       do ispin = 1, nspin, af+1
          do ik = ispin, kv3-nspin+ispin, nspin
             do ib = 1, neg
                do ib1 = 1, neg
                   if(nrd_efermi==1) then
                      eib=e2_mpi(ib,ik)
                      eib1=e2_mpi(ib1,ik)
                      if(eib.gt.efermi.and.eib1.le.efermi) call calc_rtrans_hub(ib,ib1)
                   else
                      if(ib/=ib1) call calc_rtrans_hub(ib,ib1)
                   end if
                end do
             end do
          end do
       end do
    end do

    rtrans_hub = rtrans_hub *(af+1)

    deallocate(rfsr_l); deallocate(rfsi_l)

    deallocate(wkfsr); deallocate(wkfsi)
    deallocate(wkrfsr); deallocate(wkrfsi)
    call tstatc0_end(id_sname)

    contains
     subroutine calc_rtrans_hub(ib,ib1)
       implicit none
       integer,intent(in) :: ib, ib1
       integer            :: iv, ic, ifind
       call find_ind_vb_and_cb2(ib1,ib,iv,ic,nk_in_the_process+ik-1,ifind)

       if(ifind==0.and.printable) then
          write(nfout,'(1x,"!!* valence or conduction band index is not found   UVSOR-Epsilon STOP at calc_rtrans_hub")')
       end if

!
       do it = 1, ntyp
          do lmt1 = 1, ilmt(it)
             do lmt2 = 1, ilmt(it)
                do ia = 1, natm
                   if (ityp(ia) /= it) cycle
                   if( ihubbard(ia) == 0) cycle

                   p = lmta(lmt1,ia)
                   p1 = lmta(lmt2,ia)
                   fac=real(iwei(ia),kind=DP)
!
                   rtrans_hub(ik,ic,iv,ii,1)  = rtrans_hub(ik,ic,iv,ii,1) &
                       & + fac *dhub(lmt1,lmt2,ia,ispin) &
                       &  *(wkfsr(ib,p,ik)*wkrfsr(ib1,p1,ik,ii) + wkfsi(ib,p,ik)*wkrfsi(ib1,p1,ik,ii) &
                       &  - wkfsr(ib1,p1,ik)*wkrfsr(ib,p,ik,ii) - wkfsi(ib1,p1,ik)*wkrfsi(ib,p,ik,ii))
                   rtrans_hub(ik,ic,iv,ii,2) = rtrans_hub(ik,ic,iv,ii,2) &
                       & + fac *dhub(lmt1,lmt2,ia,ispin) &
                       &  *(wkfsr(ib,p,ik)*wkrfsi(ib1,p1,ik,ii) - wkfsi(ib,p,ik)*wkrfsr(ib1,p1,ik,ii) &
                       &  - wkfsi(ib1,p1,ik)*wkrfsr(ib,p,ik,ii) + wkfsr(ib1,p1,ik)*wkrfsi(ib,p,ik,ii))
                end do
             end do
          end do
       end do

     end subroutine calc_rtrans_hub

 end subroutine dhub_prepare_ek
! =============================================================== 0.2

!!!!!!!!!!!!!!!!! added by mizouchi 2003/03/11 !!!!!!!!!!!
!!!! added by mizouchi@adv 2003/03/18 !!!!!!!!!!!
!!!! The following subroutines are those modified from !!!
!!!! the subroutines in m_Electronic_Structure .       !!!
!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!

 subroutine m_ES_alloc_zfdqsincos
    allocate(zfdqcos(nbmx))
    allocate(zfdqsin(nbmx))
    allocate(ar(nbmx))
    allocate(ai(nbmx))
 end subroutine m_ES_alloc_zfdqsincos

 subroutine m_ES_dealloc_zfdqsincos
    deallocate(ai)
    deallocate(ar)
    deallocate(zfdqcos)
    deallocate(zfdqsin)
 end subroutine m_ES_dealloc_zfdqsincos

 subroutine m_ES_betar_r_WFs_4_each_k(DELQ,nfout,ik)
!
!   calculate <beta|r|WF> for Read and Needs correction
!
    integer, intent(in) :: nfout, ik
    integer             :: ia, iksnl, idirection
    integer             :: id_sname = -1
    real(DP),intent(in) :: DELQ

    call tstatc0_begin('betar_r_WFs ',id_sname)
    call m_ES_alloc_zfdqsincos

    iksnl = (ik-1)/nspin + 1
#ifndef NEC_TUNE
    do ia = 1, natm
       do idirection = 1,3  ! loop idirection
          if(ipri >= 2.and.printable) write(nfout,'("(m_ES_betar_r_WFs_4_each_k) ia = ", i5)') ia
          call G_dot_R_plus_delq_dot_R(natm,ia,pos,kgp,nbmx,ngabc,cps,DELQ,idirection &
                                  &,zfdqcos,zfdqsin)
          call m_ES_betar_r_WFs_4_lmta_k(ista_k,iend_k,ik,zaj_l,ia,iksnl,snl_plus,idirection &
                                  &,rfsr_plus_l,rfsi_plus_l)
       !                                             ->rfsr_plus_l,rfsi_plus_l
          call G_dot_R_plus_delq_dot_R(natm,ia,pos,kgp,nbmx,ngabc,cps,-DELQ,idirection &
                                  &,zfdqcos,zfdqsin)
          call m_ES_betar_r_WFs_4_lmta_k(ista_k,iend_k,ik,zaj_l,ia,iksnl,snl_minus,idirection &
                                  &,rfsr_minus_l,rfsi_minus_l)
       !                                             ->rfsr_minus_l,rfsi_minus_l
       end do  ! loop idirection
    end do
#else
    do idirection = 1,3  ! loop idirection
       call m_ES_betar_r_WFs_4_lmta_k(ista_k,iend_k,ik,zaj_l,natm,iksnl,snl_plus,idirection &
                                     ,rfsr_plus_l, rfsi_plus_l, kg, nbmx,pos,cps,ngabc,DELQ)
       ! ->rfsr_plus_l,rfsi_plus_l
       call m_ES_betar_r_WFs_4_lmta_k(ista_k,iend_k,ik,zaj_l,natm,iksnl,snl_minus,idirection &
                                     ,rfsr_minus_l,rfsi_minus_l,kg,nbmx,pos,cps,ngabc,-DELQ)
       ! ->rfsr_minus_l,rfsi_minus_l
    end do  ! loop idirection
#endif
    rfsr_l=(rfsi_plus_l-rfsi_minus_l)*1.0d0/(2.0d0*DELQ)
    rfsi_l=(rfsr_plus_l-rfsr_minus_l)*(-1.0d0)/(2.0d0*DELQ)
    if(ipri >= 2.and.printable) call wd_rfsr_rfsi(ik)
    call m_ES_dealloc_zfdqsincos
    call tstatc0_end(id_sname)
 end subroutine m_ES_betar_r_WFs_4_each_k

#ifndef NEC_TUNE
 subroutine m_ES_betar_r_WFs_4_lmta_k(k1,k2,ik,psi_l,ia,iksnl,snl_pm,idirection &
                                      & ,bpr_l,bpi_l)
    integer,intent(in)                               :: k1,k2, ik, ia, iksnl,idirection
    integer                                          :: it, lmt1, lmtt1, lmta1, il1
    real(kind=DP),intent(in), dimension(kg1,np_e,k1:k2,kimg)           :: psi_l        ! MPI
    real(kind=DP),intent(out),dimension(np_e,nlmta,k1:k2,3)            :: bpr_l,bpi_l  ! MPI
    real(kind=DP),intent(in), dimension(kg1,nlmtt,ista_snl:iend_snl,3) :: snl_pm
    integer :: iadd

    it    = ityp(ia)
    do lmt1 = 1, ilmt(it)
       lmtt1 = lmtt(lmt1,it)
       lmta1 = lmta(lmt1,ia)
       il1   = ltp(lmt1,it)
       call G_plus_delq_dot_R_mult_snl_pm   ! exp(i(G+q)R)*snl_pm
       call betar_r_WFs_core  ! sum(c(k+G)exp(i(G+q)R)*snl_pm()
       call multiple_i_l_1                   !   i**l*( )
    end do
#else
 subroutine m_ES_betar_r_WFs_4_lmta_k(k1,k2,ik,psi_l,natm,iksnl,snl_pm,idirection &
                                      & ,bpr_l,bpi_l,kgp,nbmx,pos,cps,ngabc,delq)
    integer,intent(in)                               :: k1,k2, ik, natm, iksnl,idirection
    integer                                          :: it, lmt1, lmtt1, lmta1, il1
    real(kind=DP),intent(in), dimension(maxval(np_g1k),np_e,k1:k2,kimg)           :: psi_l        ! MPI
    real(kind=DP),intent(out),dimension(np_e,np_fs,k1:k2,3)            :: bpr_l,bpi_l  ! MPI
    real(kind=DP),intent(in), dimension(maxval(np_g1k),nlmtt,ista_snl:iend_snl,3) :: snl_pm
    integer :: i, i1, ib, wsize
    integer, intent(in)                        :: kgp, nbmx
    real(kind=DP),intent(in),dimension(natm,3) :: pos,cps
    integer, intent(in),     dimension(kgp,3)  :: ngabc
    real(kind=DP),intent(in)                   :: delq
    real(kind=DP),allocatable,dimension(:,:) :: ar_tmp, ai_tmp
    real(kind=DP),allocatable,dimension(:,:,:) :: bptmp
    real(kind=DP) :: grt,delqvec(3)
    integer :: ia, ncount
    integer :: iadd

    delqvec = 0.0d0
    delqvec(idirection) =delq

    ncount = 0
    do ia = 1, natm
       it    = ityp(ia)
       do lmt1 = 1, ilmt(it)
          ncount = ncount + 1
       end do
    end do

    allocate(ar_tmp(maxval(np_g1k),ncount)); ar_tmp = 0.0d0
    allocate(ai_tmp(maxval(np_g1k),ncount)); ai_tmp = 0.0d0
    allocate(bptmp(np_e,ncount,2))

    ncount = 0
    do ia = 1, natm
       it    = ityp(ia)
       if(ipri >= 2 .and. printable) &
          write(nfout,'("(m_ES_betar_r_WFs_4_lmta_k) ia = ", i5)') ia
       do i = 1, nbmx
          grt = (pos(ia,1)*ngabc(i,1) + pos(ia,2)*ngabc(i,2) + pos(ia,3)*ngabc(i,3))*PAI2 &
              + (cps(ia,1)*delqvec(1)+cps(ia,2)*delqvec(2)+cps(ia,3)*delqvec(3))
          zfdqcos(i) = dcos(grt)
          zfdqsin(i) = dsin(grt)
       end do
       do lmt1 = 1, ilmt(it)
          lmtt1 = lmtt(lmt1,it)
          lmta1 = lmta(lmt1,ia)
          il1   = ltp(lmt1,it)
          ncount = ncount + 1
          do i = ista_g1k(ik), iend_g1k(ik)
             iadd = i - ista_g1k(ik) + 1
             i1    = nbase(i,ik)
             ar_tmp(iadd,ncount) = zfdqcos(i1)*snl_pm(iadd,lmtt1,iksnl,idirection)
             ai_tmp(iadd,ncount) = zfdqsin(i1)*snl_pm(iadd,lmtt1,iksnl,idirection)
          end do
       end do
    end do

    if(kimg == 1) then
       call dgemm('T','N',np_e,ncount,np_g1k(ik), 1.0d0,psi_l(1,1,ik,1),maxval(np_g1k), &
       & ar_tmp,maxval(np_g1k),0.0d0,bptmp(1,1,1),np_e)
       call dgemm('T','N',np_e,ncount,np_g1k(ik), 1.0d0,psi_l(1,1,ik,1),maxval(np_g1k), &
       & ai_tmp,maxval(np_g1k),0.0d0,bptmp(1,1,2),np_e)
    else if(kimg == 2) then
       call dgemm('T','N',np_e,ncount,np_g1k(ik), 1.0d0,psi_l(1,1,ik,1),maxval(np_g1k), &
       & ar_tmp,maxval(np_g1k),0.0d0,bptmp(1,1,1),np_e)
       call dgemm('T','N',np_e,ncount,np_g1k(ik),-1.0d0,psi_l(1,1,ik,2),maxval(np_g1k), &
       & ai_tmp,maxval(np_g1k),1.0d0,bptmp(1,1,1),np_e)
       call dgemm('T','N',np_e,ncount,np_g1k(ik), 1.0d0,psi_l(1,1,ik,1),maxval(np_g1k), &
       & ai_tmp,maxval(np_g1k),0.0d0,bptmp(1,1,2),np_e)
       call dgemm('T','N',np_e,ncount,np_g1k(ik), 1.0d0,psi_l(1,1,ik,2),maxval(np_g1k), &
       & ar_tmp,maxval(np_g1k),1.0d0,bptmp(1,1,2),np_e)
    end if
    call mpi_allreduce(MPI_IN_PLACE,bptmp,np_e*ncount*2,MPI_DOUBLE_PRECISION,MPI_SUM,mpi_ke_world,ierr)

    deallocate(ar_tmp)
    deallocate(ai_tmp)

    ncount = 0
    do ia = 1, natm
       it    = ityp(ia)
       do lmt1 = 1, ilmt(it)
          lmtt1 = lmtt(lmt1,it)
          lmta1 = lmta(lmt1,ia)
          il1   = ltp(lmt1,it)
          ncount = ncount + 1
          if(lmta1 >= ista_fs .and. lmta1 <= iend_fs) then
          iadd = lmta1 - ista_fs + 1
          do ib = 1, np_e
             bpr_l(ib,iadd,ik,idirection) = bptmp(ib,ncount,1)
             bpi_l(ib,iadd,ik,idirection) = bptmp(ib,ncount,2)
          end do
          call multiple_i_l_1
          end if
       end do
    end do ! ia loop

    deallocate(bptmp)
#endif
    contains
     subroutine G_plus_delq_dot_R_mult_snl_pm
       integer :: i, i1
       do i = 1, iba(ik)
          i1    = nbase(i,ik)
          ar(i) = zfdqcos(i1)*snl_pm(i,lmtt1,iksnl,idirection)
          ai(i) = zfdqsin(i1)*snl_pm(i,lmtt1,iksnl,idirection)
       end do
     end subroutine G_plus_delq_dot_R_mult_snl_pm

     subroutine betar_r_WFs_core
       integer       :: ib, i
       real(kind=DP) :: crt1, cit1

       bpr_l(1:np_e,lmta1,ik,idirection) = 0.d0        ! MPI
       bpi_l(1:np_e,lmta1,ik,idirection) = 0.d0        ! MPI

       if(kimg == 1) then
          do ib = 1, np_e                   ! MPI
             do i = 1, iba(ik)
                crt1 = psi_l(i,ib,ik,1)
                bpr_l(ib,lmta1,ik,idirection) = bpr_l(ib,lmta1,ik,idirection) + ar(i)*crt1
                bpi_l(ib,lmta1,ik,idirection) = bpi_l(ib,lmta1,ik,idirection) + ai(i)*crt1
             end do
          end do
       else if(kimg == 2) then
          do ib = 1, np_e                   ! MPI
             do i = 1, iba(ik)
                crt1     = psi_l(i,ib,ik,1)
                cit1     = psi_l(i,ib,ik,kimg)
                bpr_l(ib,lmta1,ik,idirection) = bpr_l(ib,lmta1,ik,idirection) + ar(i)*crt1-ai(i)*cit1
                bpi_l(ib,lmta1,ik,idirection) = bpi_l(ib,lmta1,ik,idirection) + ai(i)*crt1+ar(i)*cit1
             end do
          end do
       end if
     end subroutine betar_r_WFs_core

     subroutine multiple_i_l_1
       integer       :: mil, ib
       real(kind=DP) :: tempo
       mil = mod(il1,4)
       if(mil == 2) then
          do ib = 1, np_e                 ! MPI
             tempo = bpi_l(ib,iadd,ik,idirection)
             bpi_l(ib,iadd,ik,idirection) = bpr_l(ib,iadd,ik,idirection)
             bpr_l(ib,iadd,ik,idirection) = -tempo
          end do
       else if(mil == 3) then
          do ib = 1, np_e                 ! MPI
             bpr_l(ib,iadd,ik,idirection) = -bpr_l(ib,iadd,ik,idirection)
             bpi_l(ib,iadd,ik,idirection) = -bpi_l(ib,iadd,ik,idirection)
          end do
       else if(mil == 0) then
          do ib = 1, np_e                 ! MPI
             tempo = bpi_l(ib,iadd,ik,idirection)
             bpi_l(ib,iadd,ik,idirection) = -bpr_l(ib,iadd,ik,idirection)
             bpr_l(ib,iadd,ik,idirection) = tempo
          end do
       end if
     end subroutine multiple_i_l_1
 end subroutine m_ES_betar_r_WFs_4_lmta_k

 subroutine wd_rfsr_rfsi(ik)
    integer,intent(in) :: ik
    integer            :: it, ia,idirection

    write(nfout,*) ' --  wd_rfsr_rfsi --'

    do idirection =1,3

    write(nfout,*) ' -- direction ' ,idirection,' --'    

    write(nfout,*) ' -- rfsr_plus, rfsi_plus --'
    do it = 1, neg
       if(map_e(it) /= myrank_e) cycle                  ! MPI
       write(nfout,*) ' ik = ', ik, ' ib = ', it, ' idirection = ', idirection        ! MPI
       write(nfout,'(6d12.4)') (rfsr_plus_l(map_z(it),ia,ik,idirection) &
                    &,rfsi_plus_l(map_z(it),ia,ik,idirection),ia=1,nlmta)!MPI
    end do

    write(nfout,*) ' -- rfsr_minus, rfsi_minus --'
    do it = 1, neg
       if(map_e(it) /= myrank_e) cycle                  ! MPI
       write(nfout,*) ' ik = ', ik, ' ib = ', it, ' idirection = ', idirection        ! MPI
       write(nfout,'(6d12.4)') (rfsr_minus_l(map_z(it),ia,ik,idirection) &
                    &,rfsi_minus_l(map_z(it),ia,ik,idirection),ia=1,nlmta)!MPI
    end do

    write(nfout,*) ' -- rfsr, rfsi --'
    do it = 1, neg
       if(map_e(it) /= myrank_e) cycle                  ! MPI
       write(nfout,*) ' ik = ', ik, ' ib = ', it, ' idirection = ', idirection        ! MPI
       write(nfout,'(6d12.4)') (rfsr_l(map_z(it),ia,ik,idirection) &
                    & ,rfsi_l(map_z(it),ia,ik,idirection),ia=1,nlmta)!MPI
    end do

    end do
 end subroutine wd_rfsr_rfsi


!!!! added by mizouchi@adv 2003/03/18 !!!!!!!!!!!
!!!! The following subroutines are those modified from !!!
!!!! subroutine PseudoPotentiral_Construction .        !!!
!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!

 subroutine get_snl_pm_ek(DELQ)
!
!   calculate <beta|exp(iq_dot_r)WF>
!
    real(DP), intent(in) :: DELQ 
    call m_NLP_betar_r_PWs(DELQ,nfout,kv3,vkxyz) !--> snl_plus,snl_minus
 end subroutine get_snl_pm_ek

!!!! added by mizouchi@adv 2003/03/18 !!!!!!!!!!!
!!!! The following subroutines are those modified from !!!
!!!! a subroutine in m_Nonlocal_Potential .        !!!
!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!

 subroutine m_NLP_betar_r_PWs(DELQ,nfout,kv3,vkxyz)
    integer,      intent(in) :: nfout,kv3
    real(kind=DP),intent(in) :: vkxyz(kv3,3,CRDTYP)
    real(kind=DP),intent(in) :: DELQ
    real(kind=DP)            :: fac, facr
    integer                  :: ik,iksnl,it,n,lmt1,lmtt1,il1,im1,tau1,nspher
    integer                  :: id_sname = -1
    integer                  :: idirection
#ifdef NEC_TUNE
    integer :: ista, iend, ilen
#endif

    call tstatc0_begin('m_NLP_betar_r_PWs ',id_sname)
    if(printable) write(nfout,*) ' <<< m_NLP_betar_dot_PWs >>>'
!    call innerPr_allocate()

    allocate(qx(kg1)); qx = 0.d0
    allocate(qy(kg1)); qy = 0.d0
    allocate(qz(kg1)); qz = 0.d0
    allocate(vlength(kg1)); vlength = 0.d0
    allocate(snl2(kg1)); snl2 = 0.d0
    allocate(wka(kg1));  wka  = 0.d0
    allocate(wkb(kg1));  wkb  = 0.d0
    allocate(ylm(kg1));  ylm  = 0.d0
    allocate(snl_plus(maxval(np_g1k),nlmtt,ista_snl:iend_snl,3))
    allocate(snl_minus(maxval(np_g1k),nlmtt,ista_snl:iend_snl,3))

#ifdef NEC_TUNE
   snl_plus  = 0.0d0
   snl_minus = 0.0d0
#endif
    do idirection = 1,3  !loop idirection

    fac = PAI4/dsqrt(univol)
       do ik = 1, kv3, nspin
          if(map_k(ik) /= myrank_k) cycle                     ! MPI
          call k_G_delq_vectors(ik,kg,kg1,kv3,iba,nbase,vkxyz,ngabc,rltv&
               &,DELQ,idirection,qx,qy,qz,vlength)                            ! ->(bottom_Subr.)
          iksnl = (ik-1)/nspin + 1
          do it=1,ntyp
#ifdef NEC_TUNE
             ilen = (nmesh(it) - 1)/nrank_e + 1
             ista = ilen*myrank_e + 1
             iend = min(ista + ilen - 1, nmesh(it))
#endif
             call new_radr_and_wos(ik,it)                     ! --> radr, wos
             do lmt1 = 1,ilmt(it)
                call m_PP_tell_lmtt_l_m_tau(lmt1,it,lmtt1,il1,im1,tau1,nspher)
                call sphr(iba(ik),nspher,qx,qy,qz,ylm)        ! -(bottom_Subr.)
                if(ipri >= 2) call wd_lmt_l_m_tau_etc &
                                   (nfout,it,lmt1,il1,im1,tau1,lmtt1,nspher)
                snl2 = 0.d0
!xocl spread do/ind_kmesh
#ifndef NEC_TUNE
                do n = 1,nmesh(it)
#else
                do n = ista, iend
#endif
                   facr = fac*wos(n)*radr(n)*betar(n,il1,tau1,it)
                   wka = vlength*radr(n)
                   call dsjnv(il1-1,iba(ik),wka,wkb)          ! -(bottom_Subr.)
                   snl2 = snl2 + facr*wkb*ylm
                end do
!xocl end spread sum(snl2)
                snl_plus(1:np_g1k(ik),lmtt1,iksnl,idirection) = snl2(ista_g1k(ik):iend_g1k(ik))
             end do
          end do

          call k_G_delq_vectors(ik,kg,kg1,kv3,iba,nbase,vkxyz,ngabc,rltv&
               &,-DELQ,idirection,qx,qy,qz,vlength)                            ! ->(bottom_Subr.)
          iksnl = (ik-1)/nspin + 1
          do it=1,ntyp
#ifdef NEC_TUNE
             ilen = (nmesh(it) - 1)/nrank_e + 1
             ista = ilen*myrank_e + 1
             iend = min(ista + ilen - 1, nmesh(it))
#endif
             call new_radr_and_wos(ik,it)                     ! --> radr, wos
             do lmt1 = 1,ilmt(it)
                call m_PP_tell_lmtt_l_m_tau(lmt1,it,lmtt1,il1,im1,tau1,nspher)
                call sphr(iba(ik),nspher,qx,qy,qz,ylm)        ! -(bottom_Subr.)
                if(ipri >= 2) call wd_lmt_l_m_tau_etc &
                                   (nfout,it,lmt1,il1,im1,tau1,lmtt1,nspher)
                snl2 = 0.d0
!xocl spread do/ind_kmesh
#ifndef NEC_TUNE
                do n = 1,nmesh(it)
#else
                do n = ista, iend
#endif
                   facr = fac*wos(n)*radr(n)*betar(n,il1,tau1,it)
                   wka = vlength*radr(n)
                   call dsjnv(il1-1,iba(ik),wka,wkb)          ! -(bottom_Subr.)
                   snl2 = snl2 + facr*wkb*ylm
                end do
!xocl end spread sum(snl2)
                snl_minus(1:np_g1k(ik),lmtt1,iksnl,idirection) = snl2(ista_g1k(ik):iend_g1k(ik))
             end do
          end do

       end do

    end do !loop idirection
#ifdef NEC_TUNE
    call MPI_Allreduce(MPI_IN_PLACE,snl_plus, maxval(np_g1k)*nlmtt*(iend_snl-ista_snl+1)*3, &
    & MPI_DOUBLE_PRECISION,MPI_SUM,mpi_kg_world,ierr)
    call MPI_Allreduce(MPI_IN_PLACE,snl_minus,maxval(np_g1k)*nlmtt*(iend_snl-ista_snl+1)*3, &
    & MPI_DOUBLE_PRECISION,MPI_SUM,mpi_kg_world,ierr)
#endif

   if(ipriepsilon >= 2.and.printable) call wd_snl_pm
!    call innerPr_deallocate

    deallocate(ylm)
    deallocate(wkb)
    deallocate(wka)
    deallocate(snl2)
    deallocate(vlength)
    deallocate(qz)
    deallocate(qy)
    deallocate(qx)

    call tstatc0_end(id_sname)

    contains
     subroutine wd_snl_pm
       integer :: i, ilmtt, ik, j, iksnl
       write(nfout,'(10(''('',3i2,'')''))') ((ngabc(i,j),j=1,3),i=1,30)

       do idirection = 1,3
          do ik = 1, kv3, nspin
             if(map_k(ik) /= myrank_k) cycle        ! MPI
             iksnl = (ik-1)/nspin + 1
             write(nfout,'(" ik = ",i5)') iksnl
             write(nfout,'(8i3)') (nbase(i,ik),i=1,8)
             write(nfout,*) 'snl_plus'
             do ilmtt = 1, nlmtt
                write(nfout,'(8f10.5)') (snl_plus(i,ilmtt,iksnl,idirection),i=1,8)
             end do
             write(nfout,*) 'snl_minus'
             do ilmtt = 1, nlmtt
                write(nfout,'(8f10.5)') (snl_minus(i,ilmtt,iksnl,idirection),i=1,8)
             end do
          end do
       end do

     end subroutine wd_snl_pm
 end subroutine m_NLP_betar_r_PWs

!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
!!!! added by mizouchi@adv 2003/03/18 !!!!!!!!!!!
!!!! The following subroutines are those modified from !!!
!!!! subroutines in bottom_Subroutines .        !!!
!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!

 subroutine G_dot_R_plus_delq_dot_R(katm,ia,pos,kgp,nbmx,ngabc,cps,delq&
                                   &,idirection,zfdqcos,zfdqsin)
!  use m_Const_Parameters, only : PAI2, DP
    implicit none
    integer, intent(in)                        :: katm,ia, kgp, nbmx
    integer, intent(in),     dimension(kgp,3)  :: ngabc
    integer, intent(in)                        :: idirection
    integer       :: i
    real(kind=DP),intent(in),dimension(katm,3) :: pos,cps
    real(kind=DP),intent(in)                   :: delq
    real(kind=DP),intent(out),dimension(nbmx)  :: zfdqcos, zfdqsin
    real(kind=DP) :: grt,delqvec(3)

    delqvec = 0.0d0
    delqvec(idirection) =delq
  
    do i = 1, nbmx
       grt = (pos(ia,1)*ngabc(i,1) + pos(ia,2)*ngabc(i,2)&
           & + pos(ia,3)*ngabc(i,3))*PAI2               &
           &+(cps(ia,1)*delqvec(1)+cps(ia,2)*delqvec(2)+cps(ia,3)*delqvec(3))
       zfdqcos(i) = dcos(grt)
       zfdqsin(i) = dsin(grt)
    end do
 end subroutine G_dot_R_plus_delq_dot_R

 subroutine k_G_delq_vectors(ik,kgp,kg1,knv3,iba,nbase,vk,ngabc,rltv&
     &,delq,idirection,qx,qy,qz,vlen)
    use m_Const_Parameters, only : DP, CRDTYP, BUCS
    implicit none
    integer, intent(in)        :: ik, kgp,kg1,knv3,iba(knv3),nbase(kg1,knv3)
    integer, intent(in)        :: ngabc(kgp,3),idirection
    integer                    :: i, ip
    real(kind=DP), intent(in)  :: vk(knv3,3,CRDTYP)
    real(kind=DP), intent(in)  :: rltv(3,3)
    real(kind=DP), intent(in)  :: delq
    real(kind=DP), intent(out) :: qx(kg1),qy(kg1),qz(kg1),vlen(kg1)
    real(kind=DP)              :: delqvec(3)
    real(kind=DP)              :: ga, gb, gc
!!$  write(6,*) ' !! iba(ik) = ', iba(ik)
!!$  write(6,*) ' -- vk --'
!!$  write(6,'(3f20.10)') (vk(ik,i,BUCS),i=1,3)
!!$  write(6,*) ' -- nbase --'
!!$  write(6,'(15i5)') (nbase(i,ik),i=1,100)

    delqvec = 0.0d0
    delqvec(idirection) = delq

    do i = 1, iba(ik)
       ip = nbase(i,ik)
       ga = vk(ik,1,BUCS) + ngabc(ip,1)
       gb = vk(ik,2,BUCS) + ngabc(ip,2)
       gc = vk(ik,3,BUCS) + ngabc(ip,3)
       qx(i)  = rltv(1,1)*ga + rltv(1,2)*gb + rltv(1,3)*gc+delqvec(1)
       qy(i)  = rltv(2,1)*ga + rltv(2,2)*gb + rltv(2,3)*gc+delqvec(2)
       qz(i)  = rltv(3,1)*ga + rltv(3,2)*gb + rltv(3,3)*gc+delqvec(3)
       vlen(i) = dsqrt( qx(i)**2 + qy(i)**2 + qz(i)**2 )
    end do
 end subroutine k_G_delq_vectors

!!!! added by mizouchi@adv 2003/03/18 !!!!!!!!!!!
!!!! The following subroutines are those modified from !!!
!!!! a subroutine in b_Kpoint .        !!!
!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!

 subroutine gen_vk0xyz_Core(nbztyp1,altv,nx,ny,nz &
                   & ,nfout,ipri &
                   & ,rltv,vk0xyz,nfkpgn,nfspg,ipri_kp,ipri_spg &
                   & ,np0)
    implicit none
    integer, intent(in)                    :: nfout, ipri &
         &                                  , nfkpgn,nfspg,ipri_kp,ipri_spg
    integer, intent(in)                    :: np0
    integer                                :: nn,nk,nw,nwei,nv(3)
    integer                                :: np2,np1
    integer                                :: nxx0,nyy0,nzz0,nxx,nyy,nzz
    integer, intent(in)                    :: nbztyp1,nx,ny,nz
    integer                                :: lmnp0, lmnp1, lmnp2
    integer                                :: nx1, ny1, nz1, nd
    integer, allocatable,       dimension(:,:) :: ka0_wk,ka2_wk
    integer, allocatable,       dimension(:)   :: ip10_wk,ip02_wk,ip12_wk &
     &                                       ,ip01_wk,ip21_wk,iu21_wk,iv21_wk,nstar2_wk &
     &                                       ,ip20_wk
    integer                                :: i
    real(kind=DP), intent(in)              :: rltv(3,3),altv(3,3)
    real(kind=DP), intent(out)             :: vk0xyz(np0,3)
    real(kind=DP), allocatable, dimension(:,:) :: trmat,trbp,trpb,mat1,mat2
    real(kind=DP), allocatable, dimension(:,:) :: pa0_wk,pb0_wk,pb_wk

    if(nbztyp1 == GENERAL .or.nbztyp1 == GENERAL_LARGER) then
       nxx0=k_sample_mesh1(1,1); nyy0=k_sample_mesh1(2,1); nzz0=k_sample_mesh1(3,1)
       call nskma0(il,nxx0,nyy0,nzz0,nxx,nyy,nzz,nx1,ny1,nz1,nd)
       lmnp0=(nxx+1)*(nyy+1)*(nzz+1)
       lmnp1=lmnp0
       lmnp2=lmnp0

! === KT_add === 13.1R
        if ( allocated( ip20 ) ) deallocate( ip20 )
! ============== 13.1R

       allocate(ip10_wk(lmnp0))  ; ip10_wk = 0
       allocate(ip20_wk(lmnp0))  ; ip20_wk = 0
       allocate(ip01_wk(lmnp1))  ; ip01_wk = 0
       allocate(ip02_wk(lmnp2))  ; ip02_wk = 0
       allocate(ip21_wk(lmnp1))  ; ip21_wk = 0
       allocate(ip12_wk(lmnp2))  ; ip12_wk = 0
       allocate(iu21_wk(lmnp1))  ; iu21_wk = 0
       allocate(iv21_wk(lmnp1))  ; iv21_wk = 0
       allocate(nstar2_wk(lmnp2)); nstar2_wk = 0
       allocate(pa0_wk(3,lmnp0)) ; pa0_wk = 0
       allocate(pb0_wk(3,lmnp0)) ; pb0_wk = 0
       allocate(pb_wk(3,lmnp2))  ; pb_wk = 0
       allocate(ka0_wk(4,lmnp0)) ; ka0_wk = 0
       allocate(ka2_wk(4,lmnp2)) ; ka2_wk = 0

       call setkp0_n(il,ngen,inv,igen,jgen,a,b,c,ca,cb,cc &
              & ,np2,np1,np0,lmnp0,lmnp1,lmnp2 &
              & ,nxx0,nyy0,nzz0,nxx,nyy,nzz &
              & ,ip10_wk,ip20_wk,ip01_wk,ip02_wk,ip21_wk,ip12_wk,iu21_wk,iv21_wk &
              & ,nstar2_wk,pa0_wk,pb0_wk,pb_wk,ka0_wk,ka2_wk &
              & ,ipri_kp,itrs)
    else
       call nskma0(il,nx,ny,nz,nxx,nyy,nzz,nx1,ny1,nz1,nd)
       lmnp0=(nxx+1)*(nyy+1)*(nzz+1)
       lmnp1=lmnp0
       lmnp2=lmnp0

       allocate(ip10_wk(lmnp0))  ; ip10_wk = 0
       allocate(ip20_wk(lmnp0))  ; ip20_wk = 0
       allocate(ip01_wk(lmnp1))  ; ip01_wk = 0
       allocate(ip02_wk(lmnp2))  ; ip02_wk = 0
       allocate(ip21_wk(lmnp1))  ; ip21_wk = 0
       allocate(ip12_wk(lmnp2))  ; ip12_wk = 0
       allocate(iu21_wk(lmnp1))  ; iu21_wk = 0
       allocate(iv21_wk(lmnp1))  ; iv21_wk = 0
       allocate(nstar2_wk(lmnp2)); nstar2_wk = 0
       allocate(pa0_wk(3,lmnp0)) ; pa0_wk = 0
       allocate(pb0_wk(3,lmnp0)) ; pb0_wk = 0
       allocate(pb_wk(3,lmnp2))  ; pb_wk = 0
       allocate(ka0_wk(4,lmnp0)) ; ka0_wk = 0
       allocate(ka2_wk(4,lmnp2)) ; ka2_wk = 0

       if ( gen_tetramesh_mode == 0 ) then
          call setkp0_default_n(il,ngen,inv,igen,jgen,a,b,c,ca,cb,cc &
               & ,nx,ny,nz &
               & ,np2,np1,np0,lmnp0,lmnp1,lmnp2 &
               & ,nxx,nyy,nzz &
               & ,ip10_wk,ip20_wk,ip01_wk,ip02_wk,ip21_wk,ip12_wk &
               & ,iu21_wk,iv21_wk &
               & ,nstar2_wk,pa0_wk,pb0_wk,pb_wk,ka0_wk &
               & ,ipri_kp,itrs)
       else if ( gen_tetramesh_mode == 1 ) then
          call setkp0_default_n_kt(il,ngen,inv,igen,jgen,a,b,c,ca,cb,cc &
               & ,nx,ny,nz &
               & ,np2,np1,np0,lmnp0,lmnp1,lmnp2 &
               & ,nxx,nyy,nzz &
               & ,ip10_wk,ip20_wk,ip01_wk,ip02_wk,ip21_wk,ip12_wk &
               & ,iu21_wk,iv21_wk &
               & ,nstar2_wk,pa0_wk,pb0_wk,pb_wk,ka0_wk &
               & ,ipri_kp, &
               &  use_altv_rltv, altv, rltv, itrs, &
               &  gen_name_in_carts )
       endif
   end if

   !!$ kv3 = np2
   nwei = np1

   allocate(trmat(3,3))
   allocate(trbp(3,3))
   allocate(trpb(3,3))
   allocate(mat1(3,3))
   allocate(mat2(3,3))

   call get_trmat1  !-(contained here) ->(trmat)

   !! do nn = 1, kv3
   !!   nv(1) = ka0_wk(1,ip02_wk(nn))
   !!   nv(2) = ka0_wk(2,ip02_wk(nn))
   !!   nv(3) = ka0_wk(3,ip02_wk(nn))
   !!   nk = ka0_wk(4,ip02_wk(nn))
   !!   nw = nstar2_wk(nn)
   !!   vkxyz(nn,1:3,CARTS) = matmul(trmat,nv)/dble(nk)
   !!   qwgt(nn) = dble(nw)/dble(nwei)
   !! enddo

! T.Hamada
   do nn = 1, np0
      nv(1) = ka0_wk(1,nn)
      nv(2) = ka0_wk(2,nn)
      nv(3) = ka0_wk(3,nn)
      nk = ka0_wk(4,nn)
      vk0xyz(nn,1:3) = matmul(trmat,nv)/dble(nk)
   enddo
! T.Hamada

   deallocate(trmat)
   deallocate(trbp )
   deallocate(trpb )
   deallocate(mat1 )
   deallocate(mat2 )


   deallocate(ip10_wk)
   deallocate(ip20_wk)
   deallocate(ip01_wk)
   deallocate(ip02_wk)
   deallocate(ip21_wk)
   deallocate(ip12_wk)
   deallocate(iu21_wk)
   deallocate(iv21_wk)
   deallocate(nstar2_wk)
   deallocate(pa0_wk)
   deallocate(pb0_wk)
   deallocate(pb_wk)
   deallocate(ka0_wk)
   deallocate(ka2_wk)

   contains
    subroutine get_trmat1
!    make translation matrix  trpb (P -> B)

      call getspgtab(trbp)  ! spg+tetra

!!$    goto 2

!!$1   trbp = 0.d0
!!$    do i = 1,3
!!$       trbp(i,i) = 1.d0
!!$    enddo

!!$2   continue

      call inver3n(3,trbp,trpb)
      mat1 = transpose(trpb)
      call inver3n(3,mat1,mat2)
      call matpr3(rltv,mat2,trmat)
    end subroutine get_trmat1
 end subroutine gen_vk0xyz_Core

 subroutine read_kmesh_from_nfinp
    implicit none
! subroutine reading k-point mesh information from input file(nfinp)
! This subroutine was originally written by H.Mizouchi(adv) on March 20, 2003
! and was later modified by T. Hamada on Aug. 7, 2003
!
    integer             :: nlines
    integer, parameter  :: NWK = 6
    integer             :: natm0, ntyp0
    integer             :: icond,ipriekzaj

    allocate(work(NWK,1))          
    if(m_CtrlP_check_inputfilestyle(nfinp) == NEW_) then
       if(printable) write(6,'(" !*--- input-file style = NEW")')
       call m_Files_reopen_nfinp(1)
       call m_CtrlP_rd_control(nfout,file_existance_contfiles,file_existance_3contfiles)
       call m_CtrlP_rd_accuracy(nfout)
       call m_Kp_rd_n_eps(nfout)
       call m_Files_reopen_nfinp(2)
    else
       if(printable) then
          write(6,'("!!* --- input-file style = OLD")')
          write(nfout,'("!* old input-file is not supported")')
       end if
       stop
    end if
 end subroutine read_kmesh_from_nfinp

 subroutine m_Kp_rd_n_eps(nfout)
!
!   read k-point data from F_INP
!
    integer, intent(in)       :: nfout
    character(len=FMAXVALLEN) :: rstr
    integer                   :: i, iret, f_selectBlock, f_getStringValue, f_getIntValue, f_getRealValue
    integer                   :: f_selectParentBlock, f_selectTop
    integer                   :: sum_weight
    logical                   :: prealloc = .false.
    real(DP)                  :: dret
    integer,  dimension(3,2)  :: k_sample_mesh1
! --- Ksampling ---
    character(len("ksampling")),parameter :: tag_ksampling = "ksampling"
    character(len("method")),parameter ::    tag_method    = "method"
    character(len("mesh")),parameter ::      tag_mesh      = "mesh"
    character(len("nx")),parameter ::        tag_nx        = "nx"
    character(len("ny")),parameter ::        tag_ny        = "ny"
    character(len("nz")),parameter ::        tag_nz        = "nz"
    character(len("monk")),parameter ::      tag_monkhorst_pack = "monk"
    character(len("mp_index")),parameter ::  tag_mp_index  = "mp_index"
    character(len("n1")),parameter ::        tag_n1        = "n1"
    character(len("n2")),parameter ::        tag_n2        = "n2"
    character(len("n3")),parameter ::        tag_n3        = "n3"
    character(len("kshift")),parameter ::    tag_kshift    = "kshift"
    character(len("k1")),parameter ::        tag_k1        = "k1"
    character(len("k2")),parameter ::        tag_k2        = "k2"
    character(len("k3")),parameter ::        tag_k3        = "k3"
    character(len("file")),parameter ::      tag_file      = "file"
    character(len("gamma")),parameter ::     tag_gamma     = "gamma"
    character(len("directin")),parameter ::  tag_directin  = "directin"
    character(len("num_kpoints")),parameter ::tag_num_kpoints = "num_kpoints"
    character(len("sum_weight")),parameter:: tag_sum_weight = "sum_weight"
    character(len("kpoints")),parameter ::   tag_kpoints   = "kpoints"
    character(len("kx")),parameter ::        tag_kx        = "kx"
    character(len("ky")),parameter ::        tag_ky        = "ky"
    character(len("kz")),parameter ::        tag_kz        = "kz"
    character(len("denom")),parameter ::     tag_denom     = "denom"
    character(len("weight")),parameter ::    tag_weight    = "weight"
! --- temporary ---
    real(kind=DP), allocatable, dimension(:) :: kx_t,ky_t,kz_t
    real(kind=DP), allocatable, dimension(:) :: weight_t
    integer ::                                  kv3_t = -1
    integer :: mp_index1(3)
    real(kind=DP) :: kshift1(3)

    if(ipriinputfile >= 2.and.printable) write(nfout,'(" << m_Kp_rd_n >>")')
    ! --- accuracy ---
    iret = f_selectTop()
    if( f_selectBlock( tag_accuracy) == 0) then
       if(ipriinputfile >= 2.and.printable) write(nfout,'(" !*  tag_accuracy")')
       if( f_selectBlock( tag_ksampling) == 0) then
          if( f_getStringValue( tag_method, rstr, LOWER) == 0) call set_ksamplingmethod_eps(rstr)
          if( way_ksample == MESH ) then
             if( f_selectBlock( tag_mesh) == 0) then
                if( f_getIntValue( tag_nx, iret) == 0) k_sample_mesh1(1,1) = iret
                if( f_getIntValue( tag_ny, iret) == 0) k_sample_mesh1(2,1) = iret
                if( f_getIntValue( tag_nz, iret) == 0) k_sample_mesh1(3,1) = iret
                k_sample_mesh1(:,2) = k_sample_mesh1(:,1)
                iret = f_selectParentBlock()
             else
                k_sample_mesh1(1:3,1) = 4   ! default value
             end if
! Monkhorst_Pack scheme is added March,26, 2004
          else if( way_ksample == MONKHORST_PACK ) then
              if( f_selectBlock( tag_mp_index) == 0) then
                 if( f_getIntValue( tag_n1, iret) == 0) mp_index1(1) = iret
                 if( f_getIntValue( tag_n2, iret) == 0) mp_index1(2) = iret
                 if( f_getIntValue( tag_n3, iret) == 0) mp_index1(3) = iret
                 iret = f_selectParentBlock()
              else if( f_selectBlock( tag_mesh) == 0) then
                 if( f_getIntValue( tag_nx, iret) == 0) mp_index1(1) = iret
                 if( f_getIntValue( tag_ny, iret) == 0) mp_index1(2) = iret
                 if( f_getIntValue( tag_nz, iret) == 0) mp_index1(3) = iret
                 iret = f_selectParentBlock()
              else
                 mp_index1(1:3) = 4   ! default value
              end if
              if( f_selectBlock( tag_kshift) == 0) then
                 if( f_getRealValue( tag_k1, dret, '' ) == 0) kshift1(1) = dret
                 if( f_getRealValue( tag_k2, dret, '' ) == 0) kshift1(2) = dret
                 if( f_getRealValue( tag_k3, dret, '' ) == 0) kshift1(3) = dret
                 iret = f_selectParentBlock()
              else
                 kshift1(1:3) = 0.5d0   ! default value for cubic system
                 if(il == 0) then   ! default value for hexagonal system
                    kshift1(1:2) = 0.0d0
                    kshift1(3) = 0.5d0
                 end if
              end if
              if(printable) then
                 write(nfout,'("<< Monkhorst-Pack scheme")')
                 write(nfout,'(" MP index:",3(1x,i3))') mp_index1(1:3)
                 write(nfout,'(" kp shift:",3(1x,f10.5))') kshift1(1:3)
                 write(nfout,'(" il = ",i3)') il
                 write(nfout,'("   Monkhorst-Pack scheme >>")')
              end if
#if 0
              do i=1,3
                if(mp_index1(i) > 20) then
                   if(printable) then
                      write(nfout,*)  &
                   & 'Monkhorst-Pack: found a bad parameter'
                      write(nfout,*)  &
                   & '#### Use Monknorst-Pack indeces less than 21. ###'
                   end if
                   stop 'Monkhorst-Pack: found a bad parameter'
                end if
              end do
#endif
! Monkhorst_Pack scheme added March, 26, 2004
          else if( way_ksample == SKPS_DIRECT_IN ) then
             sum_weight = 0
             if( f_getIntValue( tag_sum_weight,  iret) == 0) sum_weight = iret
             if( f_getIntValue( tag_num_kpoints, iret) == 0) then
                kv3_t = iret
                if(kv3_t <= 0) stop ' kv3 is not positive value << m_Kp_rd_n >>'
             else
                prealloc = .true.
                call set_kxyz(prealloc,iret)
                kv3_t = iret
                if(ipriinputfile >= 3.and.printable) write(nfout,'(" !* kv3_t = ",i6," <<m_Kp_rd_n>>")') kv3_t
             end if
             prealloc = .false.
             call alloc_kxyzweight(kv3_t)
             call set_kxyz(prealloc,iret)
          end if
          iret = f_selectParentBlock()
       end if
       iret = f_selectParentBlock()
    end if
    if(way_ksample == MESH) then
       if(ipriinputfile >= 1.and.printable) write(nfout,'(" !* k-point sampling method = ",i6)') way_ksample
       if(ipriinputfile >= 1.and.printable) write(nfout,'(" !* mesh = ",3i4)') &
                                            & k_sample_mesh1(1,1),k_sample_mesh1(2,1),k_sample_mesh1(3,1)

       680   format(' ',3i4,'   : nkx, nky, nkz in m_Epsilon ')
       690   format(' ',3i4,'   : nkx2,nky2,nkz2 in m_Epsilon')

       nkx = k_sample_mesh1(1,1)
       nky = k_sample_mesh1(2,1)
       nkz = k_sample_mesh1(3,1)
       nkx2 = k_sample_mesh1(1,2)
       nky2 = k_sample_mesh1(2,2)
       nkz2 = k_sample_mesh1(3,2)
       
       if(printable) then
          write(6,680) nkx,nky,nkz
          if(nbztyp == 1) write(6,690) nkx2,nky2,nkz2
       end if
    endif
    call dealloc_kxyzweight
  contains
    subroutine alloc_kxyzweight(na)
      integer, intent(in) :: na
      allocate(kx_t(na)); allocate(ky_t(na));allocate(kz_t(na));allocate(weight_t(na))
    end subroutine alloc_kxyzweight

    subroutine dealloc_kxyzweight()
      if(allocated(kx_t)) deallocate(kx_t)
      if(allocated(ky_t)) deallocate(ky_t)
      if(allocated(kz_t)) deallocate(kz_t)
      if(allocated(weight_t)) deallocate(weight_t)
    end subroutine dealloc_kxyzweight

    subroutine set_kxyz(prealloc,iret)
      logical, intent(in)  ::  prealloc
      integer, intent(out) ::  iret
      integer :: i, f_readKPoints, f_selectFirstTableLine, f_selectNextTableLine
      real(kind=DP),dimension(3) :: kvec
      integer ::              weight, sum
      if( f_selectBlock(tag_kpoints) == 0) then
         sum = 0
         i = 1
         do while(.true.)
            if( i == 1 ) then
               if( f_selectFirstTableLine() /= 0 ) then
                  exit
               end if
            else
               if( f_selectNextTableLine() /= 0 ) then
                  exit
               end if
            end if
            iret = f_readKPoints( tag_kx,tag_ky,tag_kz,tag_denom,tag_weight &
                 & ,kvec, weight )
!!$         print '(f8.4, f8.4, f8.4, I3)', kvec(1),  kvec(2), kvec(3), weight
            if(.not.prealloc) then
               if(i > kv3_t) exit
               if(weight < 1.d-13) weight = 1
               sum = sum + weight
               kx_t(i) = kvec(1); ky_t(i) = kvec(2); kz_t(i) = kvec(3)
               weight_t(i) = weight
            end if
            i = i+1
         end do
         if(ipriinputfile >= 3.and.printable) write(nfout,'(" !*  weight-sum = ", i6)') sum
         if(.not.prealloc .and. sum_weight /= 0) then
            if(sum_weight /= sum.and.printable) then
               write(nfout,'(" !* Given sum_weight is not equal to summed weight")')
            end if
         end if
         if(.not.prealloc) weight_t = weight_t/dble(sum)
         iret = f_selectParentBlock()
      else
         stop ' ! No kpoints is given in the inputfile <<m_Kp_rd_n>>'
      end if
      if(prealloc) iret = i-1
      if(.not.prealloc) then
         if(ipriinputfile >= 2.and.printable) then
            do i = 1, kv3_t
               write(nfout,'(" !* i = ",i6," kvxyz,weight = ",4f8.4)') i,kx_t(i),ky_t(i),kz_t(i),weight_t(i)
            end do
         end if
      end if
    end subroutine set_kxyz

    subroutine set_ksamplingmethod_eps(rstr)
      character(len=FMAXVALLEN),intent(in) :: rstr
      logical                              :: tf
      way_ksample = MESH
      call strncmp0(tag_mesh, trim(rstr), tf)
      if(tf) then
         way_ksample = MESH
         goto 1001
      end if
      call strncmp0(tag_file, trim(rstr), tf)
      if(tf) then
         way_ksample = FILE
         goto 1001
      end if
      call strncmp0(tag_gamma,trim(rstr),tf)
      if(tf) then
         way_ksample = GAMMA
         goto 1001
      end if
      call strncmp0(tag_directin, trim(rstr),tf)
      if(tf) then
         way_ksample = SKPS_DIRECT_IN
         goto 1001
      end if
      call strncmp0(tag_monkhorst_pack, trim(rstr),tf)
      if(tf) then
         way_ksample = MONKHORST_PACK
         goto 1001
      end if
      stop ' ! tag for ksampling is invalid <<m_Kp_rd_n.set_ksamplingmethod>>'
1001  continue
      if(ipriinputfile >= 2.and.printable) write(nfout,'(" !* ksamping method = ",a10)') trim(rstr)
      call m_CtrlP_set_way_ksample(way_ksample)
    end subroutine set_ksamplingmethod_eps
 end subroutine m_Kp_rd_n_eps

 subroutine get_cub_data
    implicit none
    integer                                :: nn,nk,nw,nwei,nv(3)
    integer                                :: np1
    integer                                :: nxx0,nyy0,nzz0,nxx,nyy,nzz
    integer                                :: lmnp0, lmnp1, lmnp2
    integer                                :: nx1, ny1, nz1, nd
    real(kind=DP), allocatable, dimension(:,:) :: pa0_wk,pb0_wk,pb_wk
    integer,       allocatable, dimension(:,:) :: ka0_wk,ka2_wk
    integer,       allocatable, dimension(:)   :: ip10_wk,ip02_wk,ip12_wk &
     &                                       ,ip01_wk,ip21_wk,iu21_wk,iv21_wk,nstar2_wk &
     &                                       ,iwt
    integer,       allocatable, dimension(:,:) :: ip2cub_wk

    if(printable) write(nfout,*) '-- get_cub_data --'

    if(nbztyp.eq.1) then

        np0 = (nkx2+1)*(nky2+1)*(nkz2+1)
        np2 = nkx2*nky2*nkz2
        np1=np2
        print '(" np0,np1,np2 = ",3i6)',np0,np1,np2

        if(np2 /= kv3/nspin) then
           if(printable) then
              write(nfout,*) ' np0,np1,np2 ',np0,np1,np2
              write(nfout,*) ' kv3/nspin ',kv3/nspin
              write(nfout,*) ' np2  /=  (kv3/nspin)'
           end if
           stop
        endif

        allocate(ip20(np0)) ; ip20 = 0

        il = 1

        lmnp0=np0
        lmnp1=np1

        allocate(ip01_wk(np1)) ; ip01_wk = 0
        allocate(pa0_wk(3,np0)) ; pa0_wk = 0.d0

        call nskma0(il,nkx2,nky2,nkz2,nxx,nyy,nzz,nx1,ny1,nz1,nd)

        call nskp00(nxx,nyy,nzz,nx1,ny1,nz1,nd,lmnp0,np0,pa0_wk)

!! When nbztyp=1, ip10 in FLAPW program should be ip20 in this program !!

        call nskpbm(np0,lmnp0,lmnp1,pa0_wk,np1,ip20,ip01_wk)

        deallocate(pa0_wk)
        deallocate(ip01_wk)


    else if(nbztyp_spg == GENERAL .or.nbztyp_spg == GENERAL_LARGER) then
        nxx0=k_sample_mesh1(1,1); nyy0=k_sample_mesh1(2,1); nzz0=k_sample_mesh1(3,1)

        call nskma0(il,nxx0,nyy0,nzz0,nxx,nyy,nzz,nx1,ny1,nz1,nd)
        lmnp0=(nxx+1)*(nyy+1)*(nzz+1)
        lmnp1=lmnp0
        lmnp2=lmnp0

        allocate(ip10_wk(lmnp0))  ; ip10_wk = 0
        allocate(ip20(lmnp0))  ; ip20 = 0
        allocate(ip01_wk(lmnp1))    ; ip01_wk = 0
        allocate(ip02_wk(lmnp2))    ; ip02_wk = 0
        allocate(ip21_wk(lmnp1))    ; ip21_wk = 0
        allocate(ip12_wk(lmnp2))    ; ip12_wk = 0
        allocate(iu21_wk(lmnp1))    ; iu21_wk = 0
        allocate(iv21_wk(lmnp1))    ; iv21_wk = 0
        allocate(nstar2_wk(lmnp2))  ; nstar2_wk = 0
        allocate(pa0_wk(3,lmnp0))  ; pa0_wk = 0

        call setkp0_n(il,ngen,inv,igen,jgen,a,b,c,ca,cb,cc &
              & ,np2,np1,np0,lmnp0,lmnp1,lmnp2 &
              & ,nxx0,nyy0,nzz0,nxx,nyy,nzz &
              & ,ip10_wk,ip20,ip01_wk,ip02_wk,ip21_wk,ip12_wk,iu21_wk,iv21_wk &
              & ,nstar2_wk,pa0_wk,pb0_wk,pb_wk,ka0_wk,ka2_wk &
              & ,ipri_kp,itrs)

        deallocate(ip10_wk)
        deallocate(ip01_wk)
        deallocate(ip02_wk)
        deallocate(ip21_wk)
        deallocate(ip12_wk)
        deallocate(iu21_wk)
        deallocate(iv21_wk)
        deallocate(nstar2_wk)
        deallocate(pa0_wk)
        deallocate(pb0_wk)
        deallocate(pb_wk)
        deallocate(ka0_wk)
        deallocate(ka2_wk)

     else

        call nskma0(il,nkx,nky,nkz,nxx,nyy,nzz,nx1,ny1,nz1,nd)
        lmnp0=(nxx+1)*(nyy+1)*(nzz+1)
        lmnp1=lmnp0
        lmnp2=lmnp0

        allocate(ip10_wk(lmnp0))  ; ip10_wk = 0
        allocate(ip20(lmnp0))  ; ip20 = 0
        allocate(ip01_wk(lmnp1))    ; ip01_wk = 0
        allocate(ip02_wk(lmnp2))    ; ip02_wk = 0
        allocate(ip21_wk(lmnp1))    ; ip21_wk = 0
        allocate(ip12_wk(lmnp2))    ; ip12_wk = 0
        allocate(iu21_wk(lmnp1))    ; iu21_wk = 0
        allocate(iv21_wk(lmnp1))    ; iv21_wk = 0
        allocate(nstar2_wk(lmnp2))  ; nstar2_wk = 0
        allocate(pa0_wk(3,lmnp0))  ; pa0_wk = 0
        allocate(pb0_wk(3,lmnp0))  ; pb0_wk = 0
        allocate(pb_wk(3,lmnp2))  ; pb_wk = 0
        allocate(ka0_wk(4,lmnp0))  ; ka0_wk = 0
        allocate(ka2_wk(4,lmnp2))  ; ka2_wk = 0

       if ( gen_tetramesh_mode == 0 ) then
          call setkp0_default_n(il,ngen,inv,igen,jgen,a,b,c,ca,cb,cc &
               & ,nkx,nky,nkz &
               & ,np2,np1,np0,lmnp0,lmnp1,lmnp2 &
               & ,nxx,nyy,nzz &
               & ,ip10_wk,ip20,ip01_wk,ip02_wk,ip21_wk,ip12_wk &
               & ,iu21_wk,iv21_wk &
               & ,nstar2_wk,pa0_wk,pb0_wk,pb_wk,ka0_wk &
               & ,ipri_kp,itrs)
       else if ( gen_tetramesh_mode == 1 ) then
          call setkp0_default_n_kt(il,ngen,inv,igen,jgen,a,b,c,ca,cb,cc &
               & ,nkx,nky,nkz &
               & ,np2,np1,np0,lmnp0,lmnp1,lmnp2 &
               & ,nxx,nyy,nzz &
               & ,ip10_wk,ip20,ip01_wk,ip02_wk,ip21_wk,ip12_wk &
               & ,iu21_wk,iv21_wk &
               & ,nstar2_wk,pa0_wk,pb0_wk,pb_wk,ka0_wk &
               & ,ipri_kp, &
               &  use_altv_rltv, altv, rltv, itrs, &
               &  gen_name_in_carts )
       endif

        deallocate(ip10_wk)
        deallocate(ip01_wk)
        deallocate(ip02_wk)
        deallocate(ip21_wk)
        deallocate(ip12_wk)
        deallocate(iu21_wk)
        deallocate(iv21_wk)
        deallocate(nstar2_wk)
        deallocate(pa0_wk)
        deallocate(pb0_wk)
        deallocate(pb_wk)
        deallocate(ka0_wk)
        deallocate(ka2_wk)

    end if

    nxyz_tetra(1) = nxx
    nxyz_tetra(2) = nyy
    nxyz_tetra(3) = nzz

! === KT_add === 13.1R
    if ( allocated( ip2cub ) ) deallocate( ip2cub )
! ============== 13.1R

    allocate(ip2cub(np1)) ; ip2cub = 0
    allocate(iwt(np2)) ; iwt = 0

    allocate(ip2cub_wk(9,nxyz_tetra(1)*nxyz_tetra(2)*nxyz_tetra(3)))
    ip2cub_wk = 0.d0
    call wtetra &
      &  (nxyz_tetra(1),nxyz_tetra(2),nxyz_tetra(3),np0,np2,ip20 &
      &  ,iwt,ip2cub &
      &  ,ip2cub_wk)
    deallocate(ip2cub_wk)

 end subroutine get_cub_data
!!!!!!!!!!!!!!!!! added by mizouchi 2003/03/20 !!!!!!!!!!!

  subroutine check_PP(nfout)
    implicit none
!
!   find pseudopotantial norm and local potential types
!
!   norm type        NC_PP : norm conserving
!                    US_PP : ultrasoft
!   local potential  BHS_POLY : BHS or polynomial type
!                    ORBITAL  : orbital local potential for Troullier-Martin pseudopotentials

    integer, intent(in)                      :: nfout
    integer                                  :: it, ntype, ltype, ilocal
    integer, allocatable, dimension(:)       :: PP_local_orbital
    character(len=1), dimension(7)           :: local_potential
    data local_potential/'s','p','d','f',' ',' ',' '/

    allocate(PP_local_orbital(ntyp)); PP_local_orbital = 0
    do it = 1, ntyp
       call find_norm_type(it,ntype)
       PP_norm_type(it) = ntype
       call find_local_orbital(it,ilocal)
       if(ilocal == 0) then
          PP_local_type(it) = BHS_POLY
       else
          PP_local_type(it) = ORBITAL
       end if
       PP_local_orbital(it) = ilocal
    end do
    if(printable) write(nfout,'(1x,"!* ---------- pseudopotential type ----------")')
    if(printable) write(nfout,'(1x,4x,"it",10x,"norm",12x,"local potential")')
    do it = 1, ntyp
       if(printable) then
          if(PP_norm_type(it)==NC_PP.and.PP_local_type(it)==BHS_POLY) &
         & write(nfout,'(1x,3x,i3,5x,"norm conserving",5x,"BHS or polynomial")') it
          if(PP_norm_type(it)==NC_PP.and.PP_local_type(it)==ORBITAL) &
         & write(nfout,'(1x,3x,i3,5x,"norm conserving",9x,a1,1x,"orbital")') it, local_potential(PP_local_orbital(it))
          if(PP_norm_type(it)==US_PP.and.PP_local_type(it)==BHS_POLY) &
         & write(nfout,'(1x,3x,i3,8x,"ultrasoft",8x,"BHS or polynomial")') it
          if(PP_norm_type(it)==US_PP.and.PP_local_type(it)==ORBITAL) &
         & write(nfout,'(1x,3x,i3,8x,"ultrasoft",12x,a1,1x,"orbital")') it, local_potential(PP_local_orbital(it))
       end if
    end do
!
!   check transition moment option and stop if it is wrong
!
    if(n_check_ts/=0) then
       call check_ts_option
    else
       if(printable) write(nfout,'(/1x,"!* transition moment option is not checked",/)')
    end if

    deallocate(PP_local_orbital)
    contains
     subroutine find_norm_type(it,ntype)
       implicit none
       integer, intent(in)  :: it
       integer, intent(out) :: ntype
       integer              :: lmt1, lmt2
       real(kind=DP)        :: deficit_ch
       ntype = NC_PP
       do lmt1 = 1, ilmt(it)
          do lmt2 = lmt1, ilmt(it)
             deficit_ch=dabs(q(lmt1,lmt2,it))
             if(deficit_ch>0.0d0) then
                ntype=US_PP
                exit
             end if
          end do
          if(ntype==US_PP) exit
       end do
     end subroutine find_norm_type

     subroutine find_local_orbital(it,ilocal)
       implicit none
       integer, intent(in)  :: it
       integer, intent(out) :: ilocal
       integer :: lmt, il1, il2
       ilocal = 0
       do il1=1,lpsmax(it)
          do lmt=1,ilmt(it)
             il2=ltp(lmt,it)
             if(il1==il2) exit
          end do
          if(il1/=il2) then
             ilocal=il1
          end if
       end do
     end subroutine find_local_orbital

     subroutine check_ts_option
       implicit none
       integer :: it
       if(icond==3) return
       if(printable) write(nfout,'(/1x,"!* enter check of transition moment option")')
       if(nonlocal==0.and.nppcorr==0) then
          if(printable) write(nfout,'(1x,"!* local transition moment : check is skipped",/)')
          return
       end if

       if(nonlocal/=0) then
          do it = 1, ntyp
             if(printable.and.PP_norm_type(it)/=NC_PP) then
                write(nfout,'(1x,"!* A pseudopotential in use is the ultra-soft type")')
                write(nfout,'(1x,"!* Read and Needs method cannot be used for the ultra-soft type.   UVSOR-Epsilon stop")')
                stop
             end if
          end do
       end if
       if(nppcorr/=0) then
          do it = 1, ntyp
             if(PP_local_type(it)/=BHS_POLY) then
                if(printable.and.sw_use_add_proj /= ON) then
                   write(nfout,'(1x,"!* A seudopotential in use is the Troullier-Martin type")')
                   write(nfout,'(1x,"!* set use_additional_projecter = on in Control tag.   UVSOR-Epsilon stop")')
                   stop
                end if
             end if
          end do
       end if
       if(printable) write(nfout,'(1x,"!* no problem with transition moment option setting",/)')

    end subroutine check_ts_option
 end subroutine check_PP

 subroutine prepare_for_TM_PP_ek
    implicit none
    integer :: lmax,ilocal,il1,il2,lmt
    integer :: it,lmt1,lmt2

    allocate(ilocal_l(ntyp))

    if(mype == 0) then
       ilocal=0
       do it = 1, ntyp
          lmax=lpsmax(it)
          call check_local_orbital
          if(printable) then
             if(ilocal==0) then
                write(nfout,'(1x,"!* all non-local projectors are given for it =",i3)') it
                write(nfout,'(1x,"!* skip correction for Troullier-Martin pseudopotential for it =",i3)') it
             else
                write(nfout,'(1x,"!* ptrans correction for Troullier-Matrtin pseudopotential for it =",i3)') it
                write(nfout,'(1x,"!* non-local projector for iloc =",i3," is not given")') ilocal
                write(nfout,'(1x,"!* correction for iloc =",i3," is made")') ilocal
             end if
          end if
          ilocal_l(it)=ilocal
       end do
    end if
    call mpi_bcast(ilocal_l,ntyp,mpi_integer,0,mpi_comm_group,ierr)
    contains
     subroutine check_local_orbital
       do il1=1,lmax
          do lmt=1,ilmt(it)
             il2=ltp(lmt,it)
             if(il1==il2) exit
          end do
          if(il1/=il2) then
             ilocal=il1
          end if
       end do
     end subroutine check_local_orbital
 end subroutine prepare_for_TM_PP_ek
 
 subroutine calc_ptrans_ek
    implicit none
!
!   calculate KS correction term
!
    integer                                :: id_sname = -1
    integer                                :: ispin, it, lmt1, lmt2, il1, im1, il2, im2, ia
    integer                                :: ik, ii, ib, ib1, ilmta, p, p1, index, ifact
    integer                                :: nspher1,nspher2
    real(kind=DP) :: fac, eib, eib1
    real(kind=DP),allocatable,dimension(:,:) :: wkfsr, wkfsi
! --> T. Yamasaki 2008/02/21
! <-- T. Yamasaki 2008/02/21
#ifdef NEC_TUNE
    integer :: iv, ic, ifind, max_ncount, ncount
    real(kind=DP), allocatable, dimension(:,:,:) :: workarray
    real(kind=DP), allocatable, dimension(:,:) :: wkr1, wkr2, wki1, wki2
#endif
    integer :: iadd


    call tstatc0_begin('calc_ptans_ek ',id_sname)
    allocate(wkfsr(neg,nlmta)); allocate(wkfsi(neg,nlmta))
#ifdef NEC_TUNE
    allocate(workarray(np_e,neg,2))
    max_ncount = 0
    do it=1,ntyp
       do lmt1 = 1, ilmt(it)
          do lmt2 = 1, ilmt(it)
             il2 = ltp(lmt2,it); im2 = mtp(lmt2,it)
             do ia = ista_atm, iend_atm
                if(ityp(ia) /= it) cycle
                max_ncount = max_ncount + 1
             end do
          end do
       end do
    end do
    allocate(wkr1(np_e,max_ncount))
    allocate(wkr2(neg,max_ncount))
    allocate(wki1(neg,max_ncount))
    allocate(wki2(np_e,max_ncount))
#endif

    if(npes >= 2) call mpi_barrier(mpi_comm_group,ierr)

!   make copy of <beta|WF>
! --> T. Yamasaki 2008/02/21
!!$    do ik = 1, kv3, af+1
!!$       do ib = 1, neg
!!$          if(map_ek(ib,ik) == mype) then
!!$               do ilmta=1, nlmta
!!$                  wkfsr(ib,ilmta,ik) = fsr_l(map_z(ib),ilmta,ik)
!!$                  wkfsi(ib,ilmta,ik) = fsi_l(map_z(ib),ilmta,ik)
!!$               end do
!!$               if(map_ek(ib,ik) /= 0) then
!!$                   call mpi_send(wkfsr,neg*nlmta*kv3,mpi_double_precision,0,1,mpi_comm_group,ierr) ! MPI
!!$                   call mpi_send(wkfsi,neg*nlmta*kv3,mpi_double_precision,0,1,mpi_comm_group,ierr) ! MPI
!!$               end if
!!$          else if(mype == 0 .and. map_ek(ib,ik) /= 0) then
!!$            call mpi_recv(wkfsr,neg*nlmta*kv3,mpi_double_precision,map_ek(ib,ik),1,mpi_comm_group,istatus,ierr)!MPI
!!$            call mpi_recv(wkfsi,neg*nlmta*kv3,mpi_double_precision,map_ek(ib,ik),1,mpi_comm_group,istatus,ierr)!MPI
!!$         end if
!!$         if(npes >= 2)  then
!!$              call mpi_bcast(wkfsr,neg*nlmta*kv3,mpi_double_precision,0,mpi_comm_group,ierr)!MPI
!!$              call mpi_bcast(wkfsi,neg*nlmta*kv3,mpi_double_precision,0,mpi_comm_group,ierr)!MPI
!!$         end if
!!$       end do
!!$    end do
    wkfsr = 0.0d0
    wkfsi = 0.0d0
    do ik = 1, kv3, af+1
       if(map_k(ik) /= myrank_k) cycle
       do ib = 1, np_e
          do ilmta = ista_fs, iend_fs
             iadd = ilmta - ista_fs + 1
             wkfsr(neg_g(ib),ilmta) = fsr_l(ib,iadd,ik)
             wkfsi(neg_g(ib),ilmta) = fsi_l(ib,iadd,ik)
          end do
       end do
    end do
    call mpi_allreduce(MPI_IN_PLACE,wkfsr,neg*nlmta,MPI_DOUBLE_PRECISION,MPI_SUM,mpi_k_world(myrank_k),ierr)
    call mpi_allreduce(MPI_IN_PLACE,wkfsi,neg*nlmta,MPI_DOUBLE_PRECISION,MPI_SUM,mpi_k_world(myrank_k),ierr)

! <-- T. Yamasaki 2008/02/21

    ptrans=0.0d0

!   calculate sum[<WF1|beta(i)>pij<beta(j)|WF2>
#ifndef NEC_TUNE
    do ii = 1,3
       do ispin = 1, nspin, af+1
          do ik = ispin, kv3-nspin+ispin, nspin
             do ib = 1, neg
                do ib1 = 1, neg
                   if(nrd_efermi==1) then
                      eib=e2_mpi(ib,ik)
                      eib1=e2_mpi(ib1,ik)
                      if(eib.gt.efermi.and.eib1.le.efermi) call calc_ptrans_ek_core(ib,ib1)
                   else
                      if(ib/=ib1) call calc_ptrans_ek_core(ib,ib1)
                   end if
                end do
             end do
          end do
       end do
    end do
#else
    if(nrd_efermi == 1) then
       do ii = 1,3
          do ispin = 1, nspin, af+1
             do ik = ispin, kv3-nspin+ispin, nspin
                if(map_k(ik) /= myrank_k) cycle
                wkr1 = 0.0d0
                wkr2 = 0.0d0
                wki1 = 0.0d0
                wki2 = 0.0d0
                do ib = 1, neg
                   ncount = 0
                   do it=1,ntyp
                      do lmt1 = 1, ilmt(it)
                         do lmt2 = 1, ilmt(it)
                            il2 = ltp(lmt2,it); im2 = mtp(lmt2,it)
                            call find_ptrans_index_ek(it,lmt1,lmt2,nppc_data(it),index,ifact,nspher1,nspher2)
                            if(index==0) cycle
                            do ia = ista_atm, iend_atm
                               if(ityp(ia) /= it) cycle
                               p = lmta(lmt1,ia)
                               p1 = lmta(lmt2,ia)
                               fac=real(iwei(ia),kind=DP)
                               fac = real(ifact,kind=DP)*fac
                               ncount = ncount + 1
                               wkr2(ib,ncount) = wkfsr(ib,p1)
                               wki1(ib,ncount) = wkfsi(ib,p1)
                            end do
                         end do
                      end do
                   end do
                end do
                do ib = 1, np_e
                   iadd = ib + ista_e - 1
                   ncount = 0
                   do it=1,ntyp
                      do lmt1 = 1, ilmt(it)
                         do lmt2 = 1, ilmt(it)
                            il2 = ltp(lmt2,it); im2 = mtp(lmt2,it)
                            call find_ptrans_index_ek(it,lmt1,lmt2,nppc_data(it),index,ifact,nspher1,nspher2)
                            if(index==0) cycle
                            do ia = ista_atm, iend_atm
                               if(ityp(ia) /= it) cycle
                               p = lmta(lmt1,ia)
                               p1 = lmta(lmt2,ia)
                               fac=real(iwei(ia),kind=DP)
                               fac = real(ifact,kind=DP)*fac
                               ncount = ncount + 1
                               wkr1(ib,ncount) = fac*dipole_dxyz_us(it,index,ii)*wkfsr(iadd,p)
                               wki2(ib,ncount) = fac*dipole_dxyz_us(it,index,ii)*wkfsi(iadd,p)
                            end do
                         end do
                      end do
                   end do
                end do
                call dgemm('N','T',np_e,neg,max_ncount, 1.0d0,wkr1,np_e,wki1,neg,0.0d0,workarray(1,1,1),np_e)
                call dgemm('N','T',np_e,neg,max_ncount,-1.0d0,wki2,np_e,wkr2,neg,1.0d0,workarray(1,1,1),np_e)
                call dgemm('N','T',np_e,neg,max_ncount,-1.0d0,wkr1,np_e,wkr2,neg,0.0d0,workarray(1,1,2),np_e)
                call dgemm('N','T',np_e,neg,max_ncount,-1.0d0,wki2,np_e,wki1,neg,1.0d0,workarray(1,1,2),np_e)
                call mpi_allreduce(MPI_IN_PLACE,workarray,np_e*neg*2,MPI_DOUBLE_PRECISION,MPI_SUM,mpi_ke_world,ierr)
                do ib = 1, np_e
                   iadd = ib + ista_e - 1
                   do ib1 = 1, neg
                      eib=e2_mpi(iadd,ik)
                      eib1=e2_mpi(ib1,ik)
                      if(eib.gt.efermi.and.eib1.le.efermi) then
                         call find_ind_vb_and_cb2(ib1,iadd,iv,ic,nk_in_the_process+ik-1,ifind)
                         if(ifind==0.and.printable) then
                            write(nfout, &
                         & '(1x,"!!* valence or conduction band index is not found   UVSOR-Epsilon STOP at calc_ptrans")')
                         end if
                         ptrans(ik,ic,iv,ii,1) = workarray(ib,ib1,1)
                         ptrans(ik,ic,iv,ii,2) = workarray(ib,ib1,2)
                      end if
                   end do
                end do
             end do
          end do
       end do
    else
       do ii = 1,3
          do ispin = 1, nspin, af+1
             do ik = ispin, kv3-nspin+ispin, nspin
                if(map_k(ik) /= myrank_k) cycle
                wkr1 = 0.0d0
                wkr2 = 0.0d0
                wki1 = 0.0d0
                wki2 = 0.0d0
                do ib = 1, neg
                   ncount = 0
                   do it=1,ntyp
                      do lmt1 = 1, ilmt(it)
                         do lmt2 = 1, ilmt(it)
                            il2 = ltp(lmt2,it); im2 = mtp(lmt2,it)
                            call find_ptrans_index_ek(it,lmt1,lmt2,nppc_data(it),index,ifact,nspher1,nspher2)
                            if(index==0) cycle
                            do ia = ista_atm, iend_atm
                               if(ityp(ia) /= it) cycle
                               p = lmta(lmt1,ia)
                               p1 = lmta(lmt2,ia)
                               fac=real(iwei(ia),kind=DP)
                               fac = real(ifact,kind=DP)*fac
                               ncount = ncount + 1
                               wkr2(ib,ncount) = wkfsr(ib,p1)
                               wki1(ib,ncount) = wkfsi(ib,p1)
                            end do
                         end do
                      end do
                   end do
                end do
                do ib = 1, np_e
                   iadd = ib + ista_e - 1
                   ncount = 0
                   do it=1,ntyp
                      do lmt1 = 1, ilmt(it)
                         do lmt2 = 1, ilmt(it)
                            il2 = ltp(lmt2,it); im2 = mtp(lmt2,it)
                            call find_ptrans_index_ek(it,lmt1,lmt2,nppc_data(it),index,ifact,nspher1,nspher2)
                            if(index==0) cycle
                            do ia = ista_atm, iend_atm
                               if(ityp(ia) /= it) cycle
                               p = lmta(lmt1,ia)
                               p1 = lmta(lmt2,ia)
                               fac=real(iwei(ia),kind=DP)
                               fac = real(ifact,kind=DP)*fac
                               ncount = ncount + 1
                               wkr1(ib,ncount) = fac*dipole_dxyz_us(it,index,ii)*wkfsr(iadd,p)
                               wki2(ib,ncount) = fac*dipole_dxyz_us(it,index,ii)*wkfsi(iadd,p)
                            end do
                         end do
                      end do
                   end do
                end do
                call dgemm('N','T',np_e,neg,max_ncount, 1.0d0,wkr1,np_e,wki1,neg,0.0d0,workarray(1,1,1),np_e)
                call dgemm('N','T',np_e,neg,max_ncount,-1.0d0,wki2,np_e,wkr2,neg,1.0d0,workarray(1,1,1),np_e)
                call dgemm('N','T',np_e,neg,max_ncount,-1.0d0,wkr1,np_e,wkr2,neg,0.0d0,workarray(1,1,2),np_e)
                call dgemm('N','T',np_e,neg,max_ncount,-1.0d0,wki2,np_e,wki1,neg,1.0d0,workarray(1,1,2),np_e)
                call mpi_allreduce(MPI_IN_PLACE,workarray,np_e*neg*2,MPI_DOUBLE_PRECISION,MPI_SUM,mpi_ke_world,ierr)
                do ib = 1, np_e
                   iadd = ib + ista_e - 1
                   do ib1 = 1, neg
                      if(iadd/=ib1) then
                         call find_ind_vb_and_cb2(ib1,iadd,iv,ic,nk_in_the_process+ik-1,ifind)
                         if(ifind==0.and.printable) then
                            write(nfout, &
                         & '(1x,"!!* valence or conduction band index is not found   UVSOR-Epsilon STOP at calc_ptrans")')
                         end if
                         ptrans(ik,ic,iv,ii,1) = workarray(ib,ib1,1)
                         ptrans(ik,ic,iv,ii,2) = workarray(ib,ib1,2)
                      end if
                   end do
                end do
             end do
          end do
       end do
    end if
#endif

!!$    if(nk_in_the_process + kv3-1 >= kv3_ek) stop ' m_Epsilon_ek (1)'

    if(nppcorr /= 2) then
        call mpi_allreduce(MPI_IN_PLACE,ptrans,size(ptrans),MPI_DOUBLE_PRECISION,MPI_SUM,mpi_kg_world,ierr)
    end if
    ptrans = ptrans*(af+1)

    deallocate(wkfsr)
    deallocate(wkfsi)
#ifdef NEC_TUNE
    deallocate(workarray)
    deallocate(wkr1)
    deallocate(wkr2)
    deallocate(wki1)
    deallocate(wki2)
#endif
    call tstatc0_end(id_sname)

    contains
     subroutine find_ptrans_index_ek(it,lmt1,lmt2,nptrans,index,ifact,nspher1,nspher2)
       implicit none
!
!      find core repair term pij for <WF1|beta(i)>pij<beta(j)|WF2>
!      i -> (it,lmt1)   j -> (it,lmt2)
!
       integer, intent(in)  :: it, lmt1, lmt2, nptrans
       integer, intent(out) :: index, ifact
       integer              :: lmtt1, il1, im1, tau1, nspher1
       integer              :: lmtt2, il2, im2, tau2, nspher2
       integer              :: nspher10, nspher20
       integer              :: tau10, tau20
       integer              :: iptrans
       index=0
       ifact=0
       call m_PP_tell_lmtt_l_m_tau(lmt1,it,lmtt1,il1,im1,tau1,nspher1)
       call m_PP_tell_lmtt_l_m_tau(lmt2,it,lmtt2,il2,im2,tau2,nspher2)
       if(nspher1>nspher2) then
          nspher20=nspher1
          nspher10=nspher2
          tau10=tau2
          tau20=tau1
          ifact=-1
       else
          nspher10=nspher1
          nspher20=nspher2
          tau10=tau1
          tau20=tau2
          ifact=1
       end if

!      find core-repair term with dipole_tau = tau, phase_ylm = nspher
       do iptrans=1,nptrans
          if(phase_ylm1(it,iptrans)==nspher10.and.phase_ylm2(it,iptrans)==nspher20) then
             if(dipole_tau1(it,iptrans)==tau10.and.dipole_tau2(it,iptrans)==tau20) then
                index=iptrans
                exit
             end if
          end if
       end do
     end subroutine find_ptrans_index_ek
    
     subroutine calc_ptrans_ek_core(ib,ib1)
!
!      calculate <WF1|beta(i)>pij<beta(j)|WF2>
!
       integer,intent(in) :: ib,ib1
       integer            :: index,iv,ic,ifind

       call find_ind_vb_and_cb2(ib1,ib,iv,ic,nk_in_the_process+ik-1,ifind)
       if(ifind==0.and.printable) then
          write(nfout,'(1x,"!!* valence or conduction band index is not found   UVSOR-Epsilon STOP at calc_ptrans")')
       end if

       do it=1,ntyp
          do lmt1 = 1, ilmt(it)
             do lmt2 = 1, ilmt(it)
                il2 = ltp(lmt2,it); im2 = mtp(lmt2,it)
                call find_ptrans_index_ek(it,lmt1,lmt2,nppc_data(it),index,ifact,nspher1,nspher2)
                if(index==0) cycle
                do ia = 1, natm
                   if(ityp(ia) /= it) cycle
                   p = lmta(lmt1,ia)
                   p1 = lmta(lmt2,ia)
                   fac=real(iwei(ia),kind=DP)
                   fac = real(ifact,kind=DP)*fac
                   ptrans(ik,ic,iv,ii,1) = ptrans(ik,ic,iv,ii,1) &
                & + fac*(dipole_dxyz_us(it,index,ii)) &
                & *(wkfsr(ib,p)*wkfsi(ib1,p1) - wkfsi(ib,p)*wkfsr(ib1,p1))
                   ptrans(ik,ic,iv,ii,2) = ptrans(ik,ic,iv,ii,2) &
                & -1.0d0* fac*(dipole_dxyz_us(it,index,ii)) &
                & *(wkfsr(ib,p)*wkfsr(ib1,p1) + wkfsi(ib,p)*wkfsi(ib1,p1))
                end do
             end do
          end do
       end do
     end subroutine calc_ptrans_ek_core
 end subroutine calc_ptrans_ek

 subroutine calc_ptrans_TM_PP_ek
!
!   calculate KS correction term for Troullier-Martin PP
!
    implicit none
    integer                                :: id_sname = -1
    integer                                :: ispin, it, lmt, lmt1, lmt2, il, il1, il2, ia
    integer                                :: ik, ii, ib, ib1, ilmta, p, p1
    integer                                :: ilmta_add, nspher1, nspher2
    integer                                :: icount, im1
    real(kind=DP)                          :: fac, eib, eib1
    real(kind=DP),allocatable,dimension(:,:)   :: wkfsr, wkfsi, wkfsr_add, wkfsi_add
#ifdef NEC_TUNE
    integer :: iv, ic, ifind, ifact, index, max_ncount0, max_ncount1, ncount
    real(kind=DP), allocatable, dimension(:,:,:) :: workarray
    integer, allocatable, dimension(:) :: icount_work
    real(kind=DP), allocatable, dimension(:,:) :: wkr01, wkr02, wki01, wki02
    real(kind=DP), allocatable, dimension(:,:) :: wkr11, wkr12, wki11, wki12
#endif
    integer :: iadd

    call tstatc0_begin('calc_ptrans_TM_PP_ek ',id_sname)
    allocate(wkfsr(neg,nlmta)); wkfsr=0.0d0
    allocate(wkfsi(neg,nlmta)); wkfsi=0.0d0
    allocate(wkfsr_add(neg,nlmta_add)); wkfsr_add=0.0d0
    allocate(wkfsi_add(neg,nlmta_add)); wkfsi_add=0.0d0

    if(sw_use_add_proj==ON) then
       call m_ES_add_betar_dot_WFs(nfout)
       if(printable) &
       & write(nfout,'(1x,"!* fsr_add_l and fsi_add_l have been calculated")')
    else
       if(printable) then
          write(nfout,'(1x,"!* fsr_add_l and fsi_add_l cannot be callculated")')
          write(nfout,'(1x,"!* skip correction for additional projector ")')
       end if
       return
    end if
    if(npes >= 2) call mpi_barrier(mpi_comm_group,ierr)

#ifdef NEC_TUNE
    allocate(workarray(np_e,neg,2))
    allocate(icount_work(neg))

    max_ncount0 = 0
    do it = 1, ntyp
       ncount = 0
       do lmt1 = 1, ilmt_add(it)
          il1 = ltp_add(lmt1,it)
          if(il1/=ilocal_l(it)) cycle
          do lmt2 = 1, ilmt(it)
             do ia = ista_atm, iend_atm
                if(ityp(ia) /= it) cycle
                ncount = ncount + 1
             end do
          end do
       end do
       max_ncount0 = max(max_ncount0, ncount)
    end do
    allocate(wkr01(np_e,max_ncount0))
    allocate(wkr02(neg,max_ncount0))
    allocate(wki01(neg,max_ncount0))
    allocate(wki02(np_e,max_ncount0))

    max_ncount1 = 0
    do it = 1, ntyp
       ncount = 0
       do lmt1 = 1, ilmt(it)
          do lmt2 = 1, ilmt_add(it)
             il2 = ltp_add(lmt2,it)
             if(il2/=ilocal_l(it)) cycle
             do ia = ista_atm, iend_atm
                if(ityp(ia) /= it) cycle
                ncount = ncount + 1
             end do
          end do
       end do
       max_ncount1 = max(max_ncount1, ncount)
    end do
    allocate(wkr11(np_e,max_ncount1))
    allocate(wkr12(neg,max_ncount1))
    allocate(wki11(neg,max_ncount1))
    allocate(wki12(np_e,max_ncount1))
#endif

! make copy of fsr_l and fsi_l
    do ik = 1, kv3, af+1
       if(map_k(ik) /= myrank_k) cycle
       do ib = 1, np_e
          do ilmta = ista_fs, iend_fs
             iadd = ilmta - ista_fs + 1
             wkfsr(neg_g(ib),ilmta) = fsr_l(ib,iadd,ik)
             wkfsi(neg_g(ib),ilmta) = fsi_l(ib,iadd,ik)
          end do
       end do
    end do
    call mpi_allreduce(MPI_IN_PLACE,wkfsr,neg*nlmta,MPI_DOUBLE_PRECISION,MPI_SUM,mpi_k_world(myrank_k),ierr)
    call mpi_allreduce(MPI_IN_PLACE,wkfsi,neg*nlmta,MPI_DOUBLE_PRECISION,MPI_SUM,mpi_k_world(myrank_k),ierr)

! make copy of fsr_add_l and fsi_add_l
    do ik = 1, kv3, af+1
       if(map_k(ik) /= myrank_k) cycle
       do ib = 1, np_e
          do ilmta_add = 1, nlmta_add
             wkfsr_add(neg_g(ib),ilmta_add) = fsr_add_l(ib,ilmta_add,ik)
             wkfsi_add(neg_g(ib),ilmta_add) = fsi_add_l(ib,ilmta_add,ik)
          end do
       end do
    end do
    call mpi_allreduce(MPI_IN_PLACE,wkfsr_add,neg*nlmta_add,MPI_DOUBLE_PRECISION,MPI_SUM,mpi_kg_world,ierr)
    call mpi_allreduce(MPI_IN_PLACE,wkfsi_add,neg*nlmta_add,MPI_DOUBLE_PRECISION,MPI_SUM,mpi_kg_world,ierr)

! skip if PP has no local orbital
    do it = 1, ntyp
       !!if(printable) then
       !!  write(nfout,*) 'local orbital: it=',it
       !!  write(nfout,*) 'local orbital: l=',ilocal_l(it)
       !!end if
       if(ilocal_l(it)==0) then
          if(printable) then
             write(nfout,'(1x,"!* ilocal =0 for it = ",i3)') it
             write(nfout,'(1x,"!* additional projector correction is skipped")')
          end if
          cycle
       end if

! check <additional projector|WF> for l=ilocal
       do lmt=1,ilmt_add(it)
          il=ltp_add(lmt,it)
          if(il==ilocal_l(it)) then
             exit
          else
             if(lmt== ilmt_add(it)) then
                if(printable) then
                   write(nfout,'(1x,"!* additional projector for for iloc is not given for it = ",i3)') it
                   write(nfout,'(1x,"!* calculate additional projector for iloc")')
                end if
                stop
             end if
          end if
       end do

! calculate correction for local orbital
! add sum[<WF1|additional projector>p<beta|WF2>] term
       !!if(printable) write(nfout,*) 'nrd_efermi=',nrd_efermi
       icount=0
#ifndef NEC_TUNE
       do ii = 1, 3
          do ispin = 1, nspin, af+1
             do ik = ispin, kv3-nspin+ispin, nspin
                do ib = 1, neg
                   do ib1 = 1, neg
                      if(nrd_efermi==1) then
                         eib=e2_mpi(ib,ik)
                         eib1=e2_mpi(ib1,ik)
                         !!if(printable) write(nfout,*) 'calc_ptrans_TM_PP_ek_core_1, eib,eib1=',eib,eib1
                         if(eib.gt.efermi.and.eib1.le.efermi) call calc_ptrans_TM_PP_ek_core_1
                      else
                         !!if(printable) write(nfout,*) 'calc_ptrans_TM_PP_ek_core_1, ib,ib1=',ib,ib1
                         if(ib/=ib1) call calc_ptrans_TM_PP_ek_core_1
                      end if
                   end do
                end do
             end do
          end do
       end do
#else
       if(nrd_efermi==1) then
          do ii = 1, 3
             do ispin = 1, nspin, af+1
                do ik = ispin, kv3-nspin+ispin, nspin
                   if(map_k(ik) /= myrank_k) cycle
                   wkr01 = 0.0d0
                   wkr02 = 0.0d0
                   wki01 = 0.0d0
                   wki02 = 0.0d0
                   do ib = 1, np_e
                      iadd = ib + ista_e - 1
                      ncount = 0
                      do lmt1 = 1, ilmt_add(it)
                         il1 = ltp_add(lmt1,it)
                         if(il1/=ilocal_l(it)) cycle
                         do lmt2 = 1, ilmt(it)
                            call find_ptrans_index_add_beta(it,lmt1,lmt2,nppc_data(it),index,ifact)
                            if(index==0) cycle
                            do ia = ista_atm, iend_atm
                               if(ityp(ia) /= it) cycle
                               fac = real(ifact,kind=DP)*real(iwei(ia),kind=DP)
                               p =  lmta_add(lmt1,ia)
                               p1 = lmta(lmt2,ia)
                               ncount = ncount + 1
                               wkr01(ib,ncount) = fac*dipole_dxyz_us(it,index,ii)*wkfsr_add(iadd,p)
                               wki02(ib,ncount) = fac*dipole_dxyz_us(it,index,ii)*wkfsi_add(iadd,p)
                            end do
                         end do
                      end do
                   end do
                   do ib = 1, neg
                      ncount = 0
                      do lmt1 = 1, ilmt_add(it)
                         il1 = ltp_add(lmt1,it)
                         if(il1/=ilocal_l(it)) cycle
                         do lmt2 = 1, ilmt(it)
                            call find_ptrans_index_add_beta(it,lmt1,lmt2,nppc_data(it),index,ifact)
                            if(index==0) cycle
                            do ia = ista_atm, iend_atm
                               if(ityp(ia) /= it) cycle
                               fac = real(ifact,kind=DP)*real(iwei(ia),kind=DP)
                               p =  lmta_add(lmt1,ia)
                               p1 = lmta(lmt2,ia)
                               ncount = ncount + 1
                               wkr02(ib,ncount) = wkfsr(ib,p1)
                               wki01(ib,ncount) = wkfsi(ib,p1)
                            end do
                         end do
                      end do
                      icount_work(ib) = ncount
                   end do
                   call dgemm('N','T',np_e,neg,max_ncount0, 1.0d0,wkr01,np_e,wki01,neg,0.0d0,workarray(1,1,1),np_e)
                   call dgemm('N','T',np_e,neg,max_ncount0,-1.0d0,wki02,np_e,wkr02,neg,1.0d0,workarray(1,1,1),np_e)
                   call dgemm('N','T',np_e,neg,max_ncount0,-1.0d0,wkr01,np_e,wkr02,neg,0.0d0,workarray(1,1,2),np_e)
                   call dgemm('N','T',np_e,neg,max_ncount0,-1.0d0,wki02,np_e,wki01,neg,1.0d0,workarray(1,1,2),np_e)
                   call mpi_allreduce(MPI_IN_PLACE,workarray,np_e*neg*2,MPI_DOUBLE_PRECISION,MPI_SUM,mpi_ke_world,ierr)
                   call mpi_allreduce(MPI_IN_PLACE,icount_work,neg,MPI_INTEGER,MPI_SUM,mpi_ke_world,ierr)
                   do ib = 1, np_e
                      iadd = ib + ista_e - 1
                      ncount = 0
                      do ib1 = 1, neg
                         eib=e2_mpi(iadd,ik)
                         eib1=e2_mpi(ib1,ik)
                         if(eib.gt.efermi.and.eib1.le.efermi) then
                            call find_ind_vb_and_cb2(ib1,iadd,iv,ic,nk_in_the_process+ik-1,ifind)
                            if(ifind==0.and.printable) then
                               write(nfout, &
                            & '(1x,"!!* valence or conduction band index is not found   UVSOR-Epsilon STOP at calc_ptrans")')
                            end if
                            ptrans(ik,ic,iv,ii,1) = ptrans(ik,ic,iv,ii,1) + workarray(ib,ib1,1)
                            ptrans(ik,ic,iv,ii,2) = ptrans(ik,ic,iv,ii,2) + workarray(ib,ib1,2)
                            ncount = ncount + 1
                         end if
                      end do
                      icount = icount + icount_work(iadd)*ncount
                   end do
                end do
             end do
          end do
       else
          do ii = 1, 3
             do ispin = 1, nspin, af+1
                do ik = ispin, kv3-nspin+ispin, nspin
                   if(map_k(ik) /= myrank_k) cycle
                   wkr01 = 0.0d0
                   wkr02 = 0.0d0
                   wki01 = 0.0d0
                   wki02 = 0.0d0
                   do ib = 1, np_e
                      iadd = ib + ista_e - 1
                      ncount = 0
                      do lmt1 = 1, ilmt_add(it)
                         il1 = ltp_add(lmt1,it)
                         if(il1/=ilocal_l(it)) cycle
                         do lmt2 = 1, ilmt(it)
                            call find_ptrans_index_add_beta(it,lmt1,lmt2,nppc_data(it),index,ifact)
                            if(index==0) cycle
                            do ia = ista_atm, iend_atm
                               if(ityp(ia) /= it) cycle
                               fac = real(ifact,kind=DP)*real(iwei(ia),kind=DP)
                               p =  lmta_add(lmt1,ia)
                               p1 = lmta(lmt2,ia)
                               ncount = ncount + 1
                               wkr01(ib,ncount) = fac*dipole_dxyz_us(it,index,ii)*wkfsr_add(iadd,p)
                               wki02(ib,ncount) = fac*dipole_dxyz_us(it,index,ii)*wkfsi_add(iadd,p)
                            end do
                         end do
                      end do
                   end do
                   do ib = 1, neg
                      ncount = 0
                      do lmt1 = 1, ilmt_add(it)
                         il1 = ltp_add(lmt1,it)
                         if(il1/=ilocal_l(it)) cycle
                         do lmt2 = 1, ilmt(it)
                            call find_ptrans_index_add_beta(it,lmt1,lmt2,nppc_data(it),index,ifact)
                            if(index==0) cycle
                            do ia = ista_atm, iend_atm
                               if(ityp(ia) /= it) cycle
                               fac = real(ifact,kind=DP)*real(iwei(ia),kind=DP)
                               p =  lmta_add(lmt1,ia)
                               p1 = lmta(lmt2,ia)
                               ncount = ncount + 1
                               wkr02(ib,ncount) = wkfsr(ib,p1)
                               wki01(ib,ncount) = wkfsi(ib,p1)
                            end do
                         end do
                      end do
                      icount_work(ib) = ncount
                   end do
                   call dgemm('N','T',np_e,neg,max_ncount0, 1.0d0,wkr01,np_e,wki01,neg,0.0d0,workarray(1,1,1),np_e)
                   call dgemm('N','T',np_e,neg,max_ncount0,-1.0d0,wki02,np_e,wkr02,neg,1.0d0,workarray(1,1,1),np_e)
                   call dgemm('N','T',np_e,neg,max_ncount0,-1.0d0,wkr01,np_e,wkr02,neg,0.0d0,workarray(1,1,2),np_e)
                   call dgemm('N','T',np_e,neg,max_ncount0,-1.0d0,wki02,np_e,wki01,neg,1.0d0,workarray(1,1,2),np_e)
                   call mpi_allreduce(MPI_IN_PLACE,workarray,np_e*neg*2,MPI_DOUBLE_PRECISION,MPI_SUM,mpi_ke_world,ierr)
                   call mpi_allreduce(MPI_IN_PLACE,icount_work,neg,MPI_INTEGER,MPI_SUM,mpi_ke_world,ierr)
                   do ib = 1, np_e
                      iadd = ib + ista_e - 1
                      ncount = 0
                      do ib1 = 1, neg
                         if(iadd/=ib1) then
                            call find_ind_vb_and_cb2(ib1,iadd,iv,ic,nk_in_the_process+ik-1,ifind)
                            if(ifind==0.and.printable) then
                               write(nfout, &
                            & '(1x,"!!* valence or conduction band index is not found   UVSOR-Epsilon STOP at calc_ptrans")')
                            end if
                            ptrans(ik,ic,iv,ii,1) = ptrans(ik,ic,iv,ii,1) + workarray(ib,ib1,1)
                            ptrans(ik,ic,iv,ii,2) = ptrans(ik,ic,iv,ii,2) + workarray(ib,ib1,2)
                            ncount = ncount + 1
                         end if
                      end do
                      icount = icount + icount_work(iadd)*ncount
                   end do
                end do
             end do
          end do
       end if
#endif
       call mpi_allreduce(MPI_IN_PLACE,icount,1,MPI_INTEGER,MPI_SUM,mpi_kg_world,ierr)
       call mpi_allreduce(MPI_IN_PLACE,icount,1,MPI_INTEGER,MPI_SUM,mpi_ge_world,ierr)
! === Debug by T.Kato 2013/07/02 ===============================================
!      if(printable) write(nfout,'(1x,"!* number of additional ppc1 terms = ", i8)') icount
       if(printable) write(nfout,'(1x,"!* number of additional ppc1 terms = ", i12)') icount
! ==============================================================================
! add sum sum[<WF1|beta>p<additional projector|WF2>] term
       icount=0
#ifndef NEC_TUNE
       do ii = 1, 3
          do ispin = 1, nspin, af+1
             do ik = ispin, kv3-nspin+ispin, nspin
                do ib = 1, neg
                   do ib1 = 1, neg
                      if(nrd_efermi==1) then
                         eib=e2_mpi(ib,ik)
                         eib1=e2_mpi(ib1,ik)
                         if(eib.gt.efermi.and.eib1.le.efermi) call calc_ptrans_TM_PP_ek_core_2
                      else
                         if(ib/=ib1) call calc_ptrans_TM_PP_ek_core_2
                      end if
                   end do
                end do
             end do
          end do
       end do
#else
       if(nrd_efermi==1) then
          do ii = 1, 3
             do ispin = 1, nspin, af+1
                do ik = ispin, kv3-nspin+ispin, nspin
                   if(map_k(ik) /= myrank_k) cycle
                   wkr11 = 0.0d0
                   wkr12 = 0.0d0
                   wki11 = 0.0d0
                   wki12 = 0.0d0
                   do ib = 1, np_e
                      iadd = ib + ista_e - 1
                      ncount = 0
                      do lmt1 = 1, ilmt(it)
                         do lmt2 = 1, ilmt_add(it)
                            il2 = ltp_add(lmt2,it)
                            if(il2/=ilocal_l(it)) cycle
                            call find_ptrans_index_add_beta(it,lmt2,lmt1,nppc_data(it),index,ifact)
                            if(index==0) cycle
                            ifact=-1*ifact
                            do ia = ista_atm, iend_atm
                               if(ityp(ia) /= it) cycle
                               fac = real(ifact,kind=DP)*real(iwei(ia),kind=DP)
                               p = lmta(lmt1,ia)
                               p1 = lmta_add(lmt2,ia)
                               ncount = ncount + 1
                               wkr11(ib,ncount) = fac*dipole_dxyz_us(it,index,ii)*wkfsr(iadd,p)
                               wki12(ib,ncount) = fac*dipole_dxyz_us(it,index,ii)*wkfsi(iadd,p)
                            end do
                         end do
                      end do
                   end do
                   do ib = 1, neg
                      ncount = 0
                      do lmt1 = 1, ilmt(it)
                         do lmt2 = 1, ilmt_add(it)
                            il2 = ltp_add(lmt2,it)
                            if(il2/=ilocal_l(it)) cycle
                            call find_ptrans_index_add_beta(it,lmt2,lmt1,nppc_data(it),index,ifact)
                            if(index==0) cycle
                            ifact=-1*ifact
                            do ia = ista_atm, iend_atm
                               if(ityp(ia) /= it) cycle
                               fac = real(ifact,kind=DP)*real(iwei(ia),kind=DP)
                               p = lmta(lmt1,ia)
                               p1 = lmta_add(lmt2,ia)
                               ncount = ncount + 1
                               wkr12(ib,ncount) = wkfsr_add(ib,p1)
                               wki11(ib,ncount) = wkfsi_add(ib,p1)
                            end do
                         end do
                      end do
                      icount_work(ib) = ncount
                   end do
                   call dgemm('N','T',np_e,neg,max_ncount1, 1.0d0,wkr11,np_e,wki11,neg,0.0d0,workarray(1,1,1),np_e)
                   call dgemm('N','T',np_e,neg,max_ncount1,-1.0d0,wki12,np_e,wkr12,neg,1.0d0,workarray(1,1,1),np_e)
                   call dgemm('N','T',np_e,neg,max_ncount1,-1.0d0,wkr11,np_e,wkr12,neg,0.0d0,workarray(1,1,2),np_e)
                   call dgemm('N','T',np_e,neg,max_ncount1,-1.0d0,wki12,np_e,wki11,neg,1.0d0,workarray(1,1,2),np_e)
                   call mpi_allreduce(MPI_IN_PLACE,workarray,np_e*neg*2,MPI_DOUBLE_PRECISION,MPI_SUM,mpi_ke_world,ierr)
                   call mpi_allreduce(MPI_IN_PLACE,icount_work,neg,MPI_INTEGER,MPI_SUM,mpi_ke_world,ierr)
                   do ib = 1, np_e
                      iadd = ib + ista_e - 1
                      ncount = 0
                      do ib1 = 1, neg
                         eib=e2_mpi(iadd,ik)
                         eib1=e2_mpi(ib1,ik)
                         if(eib.gt.efermi.and.eib1.le.efermi) then
                            call find_ind_vb_and_cb2(ib1,iadd,iv,ic,nk_in_the_process+ik-1,ifind)
                            if(ifind==0.and.printable) then
                               write(nfout, &
                            & '(1x,"!!* valence or conduction band index is not found   UVSOR-Epsilon STOP at calc_ptrans")')
                            end if
                            ptrans(ik,ic,iv,ii,1) = ptrans(ik,ic,iv,ii,1) + workarray(ib,ib1,1)
                            ptrans(ik,ic,iv,ii,2) = ptrans(ik,ic,iv,ii,2) + workarray(ib,ib1,2)
                            ncount = ncount + 1
                         end if
                      end do
                      icount = icount + icount_work(iadd)*ncount
                   end do
                end do
             end do
          end do
       else
          do ii = 1, 3
             do ispin = 1, nspin, af+1
                do ik = ispin, kv3-nspin+ispin, nspin
                   if(map_k(ik) /= myrank_k) cycle
                   wkr11 = 0.0d0
                   wkr12 = 0.0d0
                   wki11 = 0.0d0
                   wki12 = 0.0d0
                   do ib = 1, np_e
                      iadd = ib + ista_e - 1
                      ncount = 0
                      do lmt1 = 1, ilmt(it)
                         do lmt2 = 1, ilmt_add(it)
                            il2 = ltp_add(lmt2,it)
                            if(il2/=ilocal_l(it)) cycle
                            call find_ptrans_index_add_beta(it,lmt2,lmt1,nppc_data(it),index,ifact)
                            if(index==0) cycle
                            ifact=-1*ifact
                            do ia = ista_atm, iend_atm
                               if(ityp(ia) /= it) cycle
                               fac = real(ifact,kind=DP)*real(iwei(ia),kind=DP)
                               p = lmta(lmt1,ia)
                               p1 = lmta_add(lmt2,ia)
                               ncount = ncount + 1
                               wkr11(ib,ncount) = fac*dipole_dxyz_us(it,index,ii)*wkfsr(iadd,p)
                               wki12(ib,ncount) = fac*dipole_dxyz_us(it,index,ii)*wkfsi(iadd,p)
                            end do
                         end do
                      end do
                   end do
                   do ib = 1, neg
                      ncount = 0
                      do lmt1 = 1, ilmt(it)
                         do lmt2 = 1, ilmt_add(it)
                            il2 = ltp_add(lmt2,it)
                            if(il2/=ilocal_l(it)) cycle
                            call find_ptrans_index_add_beta(it,lmt2,lmt1,nppc_data(it),index,ifact)
                            if(index==0) cycle
                            ifact=-1*ifact
                            do ia = ista_atm, iend_atm
                               if(ityp(ia) /= it) cycle
                               fac = real(ifact,kind=DP)*real(iwei(ia),kind=DP)
                               p = lmta(lmt1,ia)
                               p1 = lmta_add(lmt2,ia)
                               ncount = ncount + 1
                               wkr12(ib,ncount) = wkfsr_add(ib,p1)
                               wki11(ib,ncount) = wkfsi_add(ib,p1)
                            end do
                         end do
                      end do
                      icount_work(ib) = ncount
                   end do
                   call dgemm('N','T',np_e,neg,max_ncount1, 1.0d0,wkr11,np_e,wki11,neg,0.0d0,workarray(1,1,1),np_e)
                   call dgemm('N','T',np_e,neg,max_ncount1,-1.0d0,wki12,np_e,wkr12,neg,1.0d0,workarray(1,1,1),np_e)
                   call dgemm('N','T',np_e,neg,max_ncount1,-1.0d0,wkr11,np_e,wkr12,neg,0.0d0,workarray(1,1,2),np_e)
                   call dgemm('N','T',np_e,neg,max_ncount1,-1.0d0,wki12,np_e,wki11,neg,1.0d0,workarray(1,1,2),np_e)
                   call mpi_allreduce(MPI_IN_PLACE,workarray,np_e*neg*2,MPI_DOUBLE_PRECISION,MPI_SUM,mpi_ke_world,ierr)
                   call mpi_allreduce(MPI_IN_PLACE,icount_work,neg,MPI_INTEGER,MPI_SUM,mpi_ke_world,ierr)
                   do ib = 1, np_e
                      iadd = ib + ista_e - 1
                      ncount = 0
                      do ib1 = 1, neg
                         if(iadd/=ib1) then
                            call find_ind_vb_and_cb2(ib1,iadd,iv,ic,nk_in_the_process+ik-1,ifind)
                            if(ifind==0.and.printable) then
                               write(nfout, &
                            & '(1x,"!!* valence or conduction band index is not found   UVSOR-Epsilon STOP at calc_ptrans")')
                            end if
                            ptrans(ik,ic,iv,ii,1) = ptrans(ik,ic,iv,ii,1) + workarray(ib,ib1,1)
                            ptrans(ik,ic,iv,ii,2) = ptrans(ik,ic,iv,ii,2) + workarray(ib,ib1,2)
                            ncount = ncount + 1
                         end if
                      end do
                      icount = icount + icount_work(iadd)*ncount
                   end do
                end do
             end do
          end do
       end if
#endif
       call mpi_allreduce(MPI_IN_PLACE,icount,1,MPI_INTEGER,MPI_SUM,mpi_kg_world,ierr)
       call mpi_allreduce(MPI_IN_PLACE,icount,1,MPI_INTEGER,MPI_SUM,mpi_ge_world,ierr)
! === Debug by T.Kato 2013/07/02 ===============================================
!      if(printable) write(nfout,'(1x,"!* number of additional ppc2 terms = ", i8)') icount
       if(printable) write(nfout,'(1x,"!* number of additional ppc2 terms = ", i12)') icount
! ==============================================================================
    end do
    call mpi_allreduce(MPI_IN_PLACE,ptrans,size(ptrans),MPI_DOUBLE_PRECISION,MPI_SUM,mpi_kg_world,ierr)
 
    deallocate(wkfsr)
    deallocate(wkfsi)
    deallocate(wkfsr_add)
    deallocate(wkfsi_add)
#ifdef NEC_TUNE
    deallocate(workarray)
    deallocate(icount_work)
    deallocate(wkr01)
    deallocate(wkr02)
    deallocate(wki01)
    deallocate(wki02)
    deallocate(wkr11)
    deallocate(wkr12)
    deallocate(wki11)
    deallocate(wki12)
#endif

    call tstatc0_end(id_sname)

    contains
     subroutine calc_ptrans_TM_PP_ek_core_1
       integer :: iv, ic, ifind
       integer :: index, ifact

       call find_ind_vb_and_cb2(ib1,ib,iv,ic,nk_in_the_process+ik-1,ifind)
       if(ifind==0.and.printable) then
          write(nfout,'(1x,"!!* valence or conduction band index is not found   UVSOR-Epsilon STOP at calc_ptrans")')
       end if

       do lmt1 = 1, ilmt_add(it)
          il1 = ltp_add(lmt1,it)
          if(il1/=ilocal_l(it)) cycle
          do lmt2 = 1, ilmt(it)
! lmt1 -> index for additional projector
! lmt2 -> index for beta
             !!if(printable) &
             !!& write(nfout,*) 'find_ptrans_index_add_beta: it,lmt1,il1,lmt2,nppc_data(it)=',it,lmt1,il1,lmt2,nppc_data(it)
             call find_ptrans_index_add_beta(it,lmt1,lmt2,nppc_data(it),index,ifact)
             !!if(printable) write(nfout,*) 'find_ptrans_index_add_beta: index,ifact=',index,ifact
             if(index==0) cycle
             do ia = 1, natm
                if(ityp(ia) /= it) cycle
                fac = real(ifact,kind=DP)*real(iwei(ia),kind=DP)
                p =  lmta_add(lmt1,ia)
                p1 = lmta(lmt2,ia)
                ptrans(ik,ic,iv,ii,1) = ptrans(ik,ic,iv,ii,1) &
             & + fac*dipole_dxyz_us(it,index,ii) &
             & *(wkfsr_add(ib,p)*wkfsi(ib1,p1) - wkfsi_add(ib,p)*wkfsr(ib1,p1))
                ptrans(ik,ic,iv,ii,2) = ptrans(ik,ic,iv,ii,2) &
             & -1.0d0*fac*dipole_dxyz_us(it,index,ii) &
             & *(wkfsr_add(ib,p)*wkfsr(ib1,p1) + wkfsi_add(ib,p)*wkfsi(ib1,p1))
                icount=icount+1
             end do
          end do
       end do
     end subroutine calc_ptrans_TM_PP_ek_core_1

     subroutine calc_ptrans_TM_PP_ek_core_2
       integer :: iv, ic, ifind
       integer :: index, ifact

       call find_ind_vb_and_cb2(ib1,ib,iv,ic,nk_in_the_process+ik-1,ifind)
       if(ifind==0.and.printable) then
          write(nfout,'(1x,"!!* valence or conduction band index is not found   UVSOR-Epsilon STOP at calc_ptrans")')
       end if

       do lmt1 = 1, ilmt(it)
          do lmt2 = 1, ilmt_add(it)
! lmt1 --> index for beta
! lmt2 --> index for additional projector
             il2 = ltp_add(lmt2,it)
             if(il2/=ilocal_l(it)) cycle
             call find_ptrans_index_add_beta(it,lmt2,lmt1,nppc_data(it),index,ifact)
             if(index==0) cycle
             ifact=-1*ifact
             do ia = 1, natm
                if(ityp(ia) /= it) cycle
                fac = real(ifact,kind=DP)*real(iwei(ia),kind=DP)
                p = lmta(lmt1,ia)
                p1 = lmta_add(lmt2,ia)
! p  --> index for beta
! p1 --> index for addtional projector
                ptrans(ik,ic,iv,ii,1) = ptrans(ik,ic,iv,ii,1) &
             & + fac*dipole_dxyz_us(it,index,ii) &
             & *(wkfsr(ib,p)*wkfsi_add(ib1,p1) - wkfsi(ib,p)*wkfsr_add(ib1,p1))
                ptrans(ik,ic,iv,ii,2) = ptrans(ik,ic,iv,ii,2) &
             & -1.0d0*fac*dipole_dxyz_us(it,index,ii) &
             & *(wkfsr(ib,p)*wkfsr_add(ib1,p1) + wkfsi(ib,p)*wkfsi_add(ib1,p1))
                icount=icount+1
             end do
          end do
       end do
     end subroutine calc_ptrans_TM_PP_ek_core_2

     subroutine find_ptrans_index_add_beta(it,lmt1,lmt2,nptrans,index,ifact)
       implicit none
!
!      find core-repair term pij for <WF|addotional projector(i)>pij<beta(j)|WF2>
!
!      lmt1      : lmt index for additinal projector
!      lmt2      : lmt index for beta
!      nspher1   : spherical harmonics index for lmt1
!      nspher2   : spherical harmonics index for lmt2
!      ifact = 1 : nspher1 < nspher2
!      ifact = -1: nspher1 > nspher2
!      index     : dipole_dxyz_us index
!            = 0 : for nspher1=nspher2 case
!
       integer, intent(in)  :: it,lmt1,lmt2, nptrans
       integer, intent(out) :: index,ifact
       integer              :: lmtt1,il1,im1,tau1,nspher1
       integer              :: lmtt2,il2,im2,tau2,nspher2
       integer              :: nspher10, nspher20
       integer              :: tau10,tau20
       integer              :: iptrans

       index=0
       ifact=0

       !!if(printable) write(nfout,*) 'm_PP_tell_lmtt_l_m_tau_add:lmt1=',lmt1
       call m_PP_tell_lmtt_l_m_tau_add(lmt1,it,lmtt1,il1,im1,tau1,nspher1)
       !!if(printable) then
       !!   write(nfout,*) 'm_PP_tell_lmtt_l_m_tau_add:tau1,nspher1=',tau1,nspher1
       !!   write(nfout,*) 'm_PP_tell_lmtt_l_m_tau:lmt2=',lmt2
       !!end if
       call m_PP_tell_lmtt_l_m_tau(lmt2,it,lmtt2,il2,im2,tau2,nspher2)
       !!if(printable) write(nfout,*) 'm_PP_tell_lmtt_l_m_tau:tau2,nspher2=',tau2,nspher2

       if(nspher1>nspher2) then
          nspher20=nspher1
          nspher10=nspher2
          tau20=tau1
          tau10=tau2
          ifact=-1
       else
          nspher10=nspher1
          nspher20=nspher2
          tau10=tau1
          tau20=tau2
         ifact=1
       end if
       !!if(printable) write(nfout,*) 'loop iptans'
!      find core repair term with dipole_tau=tau, phase_ylm=nspher
       do iptrans=1,nptrans
          if(phase_ylm1(it,iptrans)==nspher10.and.phase_ylm2(it,iptrans)==nspher20) then
             if(dipole_tau1(it,iptrans)==tau10.and.dipole_tau2(it,iptrans)==tau20) then
                index=iptrans
                exit
              end if
          end if
       end do
       !!if(printable) write(nfout,*) 'end loop iptans'
     end subroutine find_ptrans_index_add_beta
 end subroutine calc_ptrans_TM_PP_ek

! ====================== KT_add ====================== 13.0S
 subroutine calc_ptrans_TM_PP_core2val_ek
!
!   calculate KS correction term for Troullier-Martin PP
!
    implicit none
    integer                                :: id_sname = -1
    integer                                :: ispin, it, lmt, lmt1, lmt2, il, il1, il2, ia
    integer                                :: ik, ii, ib, ib1, ilmta, p, p1
    integer                                :: ilmta_add, nspher1, nspher2
    integer                                :: icount, im1
    real(kind=DP)                          :: fac, eib, eib1
    real(kind=DP),allocatable,dimension(:,:,:) :: wkfsr, wkfsi, wkfsr_add, wkfsi_add

    call tstatc0_begin('calc_ptrans_TM_PP_ek ',id_sname)
    allocate(wkfsr(neg,nlmta,kv3)); wkfsr=0.0d0
    allocate(wkfsi(neg,nlmta,kv3)); wkfsi=0.0d0
    allocate(wkfsr_add(neg,nlmta_add,kv3)); wkfsr_add=0.0d0
    allocate(wkfsi_add(neg,nlmta_add,kv3)); wkfsi_add=0.0d0

    if(sw_use_add_proj==ON) then
       call m_ES_add_betar_dot_WFs(nfout)
       if(printable) &
       & write(nfout,'(1x,"!* fsr_add_l and fsi_add_l have been calculated")')
    else
       if(printable) then
          write(nfout,'(1x,"!* fsr_add_l and fsi_add_l cannot be callculated")')
          write(nfout,'(1x,"!* skip correction for additional projector ")')
       end if
       return
    end if

    if(npes >= 2) call mpi_barrier(mpi_comm_group,ierr)

! make copy of fsr_l and fsi_l
#if 1
    do ik = 1, kv3, af+1
       do ib = 1, neg
          if(map_ek(ib,ik) == mype) then
             do ilmta=1, nlmta
                wkfsr(ib,ilmta,ik) = fsr_l(map_z(ib),ilmta,ik)
                wkfsi(ib,ilmta,ik) = fsi_l(map_z(ib),ilmta,ik)
             end do
          end if
       end do
    end do
    call MPI_Allreduce(MPI_IN_PLACE, wkfsr,neg*nlmta*kv3,MPI_DOUBLE_PRECISION,MPI_SUM,mpi_comm_group,ierr)
    call MPI_Allreduce(MPI_IN_PLACE, wkfsi,neg*nlmta*kv3,MPI_DOUBLE_PRECISION,MPI_SUM,mpi_comm_group,ierr)
#else
    do ik = 1, kv3, af+1
       do ib = 1, neg
          if(map_ek(ib,ik) == mype) then
               do ilmta=1, nlmta
                  wkfsr(ib,ilmta,ik) = fsr_l(map_z(ib),ilmta,ik)
                  wkfsi(ib,ilmta,ik) = fsi_l(map_z(ib),ilmta,ik)
               end do
               if(map_ek(ib,ik) /= 0) then
                   call mpi_send(wkfsr,neg*nlmta*kv3,mpi_double_precision,0,1,mpi_comm_group,ierr) ! MPI
                   call mpi_send(wkfsi,neg*nlmta*kv3,mpi_double_precision,0,1,mpi_comm_group,ierr) ! MPI
               end if
          else if(mype == 0 .and. map_ek(ib,ik) /= 0) then
            call mpi_recv(wkfsr,neg*nlmta*kv3,mpi_double_precision,map_ek(ib,ik),1,mpi_comm_group,istatus,ierr)!MPI
            call mpi_recv(wkfsi,neg*nlmta*kv3,mpi_double_precision,map_ek(ib,ik),1,mpi_comm_group,istatus,ierr)!MPI
         end if
         if(npes >= 2)  then
              call mpi_bcast(wkfsr,neg*nlmta*kv3,mpi_double_precision,0,mpi_comm_group,ierr)!MPI
              call mpi_bcast(wkfsi,neg*nlmta*kv3,mpi_double_precision,0,mpi_comm_group,ierr)!MPI
         end if
       end do
    end do
#endif
! make copy of fsr_add_l and fsi_add_l
    do ik = 1, kv3, af+1
       do ib = 1, neg
          if(map_ek(ib,ik) == mype) then
               do ilmta_add = 1, nlmta_add
                  wkfsr_add(ib,ilmta_add,ik) = fsr_add_l(map_z(ib),ilmta_add,ik)
                  wkfsi_add(ib,ilmta_add,ik) = fsi_add_l(map_z(ib),ilmta_add,ik)
               end do
               if(map_ek(ib,ik) /= 0) then
                  call mpi_send(wkfsr_add,neg*nlmta_add*kv3,mpi_double_precision,0,1,mpi_comm_group,ierr) ! MPI
                  call mpi_send(wkfsi_add,neg*nlmta_add*kv3,mpi_double_precision,0,1,mpi_comm_group,ierr) ! MPI
               end if
          else if(mype == 0 .and. map_ek(ib,ik) /= 0) then
            call mpi_recv(wkfsr_add,neg*nlmta_add*kv3,mpi_double_precision,map_ek(ib,ik),1,mpi_comm_group,istatus,ierr)!MPI
            call mpi_recv(wkfsi_add,neg*nlmta_add*kv3,mpi_double_precision,map_ek(ib,ik),1,mpi_comm_group,istatus,ierr)!MPI
         end if
         if(npes >= 2)  then
              call mpi_bcast(wkfsr_add,neg*nlmta_add*kv3,mpi_double_precision,0,mpi_comm_group,ierr)!MPI
              call mpi_bcast(wkfsi_add,neg*nlmta_add*kv3,mpi_double_precision,0,mpi_comm_group,ierr)!MPI
         end if
       end do
    end do


! ***********************
    ia = atom_to_probe
    it = ityp( atom_to_probe )
! ***********************

! skip if PP has no local orbital
    if(ilocal_l(it)==0) then
       if(printable) then
          write(nfout,'(1x,"!* ilocal =0 for it = ",i3)') it
          write(nfout,'(1x,"!* additional projector correction is skipped")')
       end if
    end if

! check <additional projector|WF> for l=ilocal
    do lmt=1,ilmt_add(it)
       il=ltp_add(lmt,it)
       if(il==ilocal_l(it)) then
          exit
       else
          if(lmt== ilmt_add(it)) then
             if(printable) then
                write(nfout,'(1x,"!* additional projector for for iloc is not given for it = ",i3)') it
                write(nfout,'(1x,"!* calculate additional projector for iloc")')
             end if
             stop
          end if
       end if
    end do

! calculate correction for local orbital
! add sum[<WF1|additional projector>p<beta|WF2>] term

    icount=0
    do ii = 1, 3
       do ispin = 1, nspin, af+1
          do ik = ispin, kv3-nspin+ispin, nspin
             do ib = 1, neg
                do ib1 = 1, num_core_states
                   if(nrd_efermi==1) then
                      eib=e2_mpi(ib,ik)
                      eib1=ene_core_states(ib1)
                      if(eib.gt.efermi) call calc_ptrans_TM_PP_ek_core_1
                   else
                      call calc_ptrans_TM_PP_ek_core_1
                   end if
                end do
             end do
          end do
       end do
    end do

    if(printable) then
       write(nfout,'(1x,"!* number of additional ppc1 terms = ", i12)') icount
    endif

    deallocate(wkfsr); deallocate(wkfsi);  deallocate(wkfsr_add); deallocate(wkfsi_add)

    call tstatc0_end(id_sname)

  contains
     subroutine calc_ptrans_TM_PP_ek_core_1
       integer :: iv, ic, ifind
       integer :: index
       integer :: il1, im1, il2, im2, t1, t2
       integer :: nspher_core_orb, nspher_val_orb
       real(kind=DP) :: c1, cosph, sinph

       call find_ind_cb_only2( ib, ic, nk_in_the_process+ik-1,ifind )
       iv = ib1

       if(ifind==0.and.printable) then
          write(nfout,'(1x,"!!* conduction band index is not found   UVSOR-Epsilon STOP at calc_ptrans")')
       end if

       il2 = qnum_l_to_probe +1

       Do im2=1, 2*qnum_l_to_probe +1
          if ( ndim_spinor_core_states == 1 ) then
             if ( im2 /= iv ) cycle
          endif

          nspher_core_orb = (il2 -1)**2 + im2

          c1 = cps(ia,1)*vkxyz(ik,1,CARTS) &
               & +cps(ia,2)*vkxyz(ik,2,CARTS) &
               & +cps(ia,3)*vkxyz(ik,3,CARTS)!
          cosph = cos(c1);  sinph = -sin(c1)

          do lmt1 = 1, ilmt_add(it)          ! valence orb
             il1 = ltp_add(lmt1,it);  im1 = mtp_add(lmt1,it);  t2 = 1

             nspher_val_orb = (il1 -1)**2 + im1

             if(il1/=ilocal_l(it)) cycle

! lmt1 -> index for additional projector
             call m_CLS_find_ptrans_indx_core2val( qnum_n_to_probe, &
                  &                                nspher_core_orb, nspher_val_orb, &
                  &                                t2, index )
             
             if(index==0) cycle
             
             fac = real(iwei(ia),kind=DP)
             p =  lmta_add(lmt1,ia)
             p1 = im2

             ptrans(ik,ic,iv,ii,1) = ptrans(ik,ic,iv,ii,1) &
                  & + fac*(dipole_dxyz_core2val(index,ii)) &
                  &      *( wkfsr_add(ib,p,ik) *fsi_core_states(ib1,p1,ik) &
                  &        -wkfsi_add(ib,p,ik) *fsr_core_states(ib1,p1,ik) )

             ptrans(ik,ic,iv,ii,2) = ptrans(ik,ic,iv,ii,2) &
                  & -1.0d0 *fac *(dipole_dxyz_core2val(index,ii)) &
                  &        *( wkfsr_add(ib,p,ik) *fsr_core_states(ib1,p1,ik) &
                  &          +wkfsi_add(ib,p,ik) *fsi_core_states(ib1,p1,ik) )

             icount=icount+1
          end do
       end Do

     end subroutine calc_ptrans_TM_PP_ek_core_1

   end subroutine calc_ptrans_TM_PP_core2val_ek
! ============================================================= 13.0S

 subroutine smearing_method(nfout,nstep)
    implicit none
!
!  subroutine for parabolic and gaussian smearing for Brillouin zone integration
!
!  The original program is subroutine m_ESoc_fermi_parabolic
!  T. Hamada(Univ. Tokyo) March 14, 2004
!
    integer, intent(in)                      :: nfout, nstep
    integer                                  :: ispin, ik, ie
    integer                                  :: istep, norder
    integer                                  :: i
    integer                                  :: ieigsta, ieigend, jeigsta, jeigend
    real(kind=DP),allocatable,dimension(:,:) :: temp_mpi, eko_mpi                   ! MPI
    real(kind=DP)                            :: sum_qwgt_ek, fac, e0
    real(kind=DP),dimension(nstep,6)         :: tint
 
    allocate(eko_mpi  (neg,kv3)); eko_mpi = 0.d0

    call set_ieig_jeig_staend

    if(printable) then
       if(way_BZintegral==PARABOLIC_B) then
          write(nfout,'(1x, " ---------- Parabolic broadening Brillouin zone integration ----------")')
       end if
       if(way_BZintegral==GAUSSIAN_B) then
          write(nfout,'(/1x, " ---------- Gaussian broadening Brillouin zone integration ----------")')
       end if
       write(nfout,'(1x, " ieigsta = ",i3,3x," ieigend = ",i3)') ieigsta, ieigend
       write(nfout,'(1x, " jeigsta = ",i3,3x," jeigend = ",i3)') jeigsta, jeigend
       write(nfout,'(1x, " width = ",f10.5)') width
    end if
    call reset_scissors_if_metallic(nfout)

!   parabolic smearing
    if(way_BZintegral==PARABOLIC_B) then
       do ispin = 1, nspin
          if(spin==MAJOR.and.ispin/=major_spin) cycle
          if(spin==MINOR.and.ispin/=minor_spin) cycle
          tint=0.0d0
          call parabolic_smearing              ! -(contained here)
          imeps(1:nstep,1:6)=imeps(1:nstep,1:6)+tint(1:nstep,1:6)
          if(printable) then
             if(spin==MAJOR) write(nfout,'(1x," ispin = major spin")')
             if(spin==MINOR) write(nfout,'(1x," ispin = minor spin")')
             write(nfout,'(2x,"ispin = ",i3)') ispin
          end if
       end do
    end if

!   gaussian smearing
    if(way_BZintegral==GAUSSIAN_B) then
       do ispin = 1, nspin
          if(spin==MAJOR.and.ispin/=major_spin) cycle
          if(spin==MINOR.and.ispin/=minor_spin) cycle
          tint=0.0d0
          call gaussian_smearing              ! -(contained here)
          imeps(1:nstep,1:6)=imeps(1:nstep,1:6)+tint(1:nstep,1:6)
          if(printable) then
             if(spin==MAJOR) write(nfout,'(1x," ispin = major spin")')
             if(spin==MINOR) write(nfout,'(1x," ispin = minor spin")')
             write(nfout,'(1x,"ispin = ",i3)') ispin
          end if
       end do
    end if

    sum_qwgt_ek=0.0d0
    do ispin = 1, nspin
       do ik=ispin, kv3_ek-nspin+ispin, nspin
          sum_qwgt_ek=sum_qwgt_ek+qwgt_ek(ik)
       end do
    end do
    sum_qwgt_ek=sum_qwgt_ek/nspin
    if(printable) then
       write(nfout,'(1x,"!* sum of qwgt_ek = ",f10.5)') sum_qwgt_ek
       write(nfout,'(1x,"!* unit cell volume = ",f20.12)') univol
    end if

    e0=vacuum_permittivity_in_au 
!   e0=0.079577471544205d0 ! vacuum permittivity in au
    fac=2.0d0*PAI/(e0*univol*sum_qwgt_ek)

    if(nspin==1) then
       imeps(1:nstep,1:6)=fac*imeps(1:nstep,1:6)
    else
       imeps(1:nstep,1:6)=0.5d0*fac*imeps(1:nstep,1:6)
    end if

    deallocate(eko_mpi)

    contains
     subroutine parabolic_smearing
       integer       :: k, ieig, jeig, iv, ic, ifind
       real(kind=DP) :: wspin = 1.d0, ei, ej, t, weight, dea
       real(kind=DP) :: fieig, fjeig
       do k = ispin, kv3_ek-nspin+ispin, nspin
          do ieig = ieigsta, ieigend
             ei = eb_ek(k,ieig)
             do jeig =jeigsta, jeigend
                ej = eb_ek(k,jeig)
                if(ei<=efermi.and.ej>efermi) then
                   call find_ind_vb_and_cb(ieig,jeig,iv,ic,k,ifind)
                   if(ifind/=1) then
                     if(printable) &
                     & write(nfout,'(1x,"!!* index of valence or conduction band is not found &
                     & UVSOR-Epsilon STOP at parabolic_smearing")')
                      stop
                   end if
                   if(band_type(ieig,ispin)==FILLED_BAND.and.band_type(jeig,ispin)==FILLED_BAND) cycle
                   if(band_type(ieig,ispin)==HALF_FILLED_BAND.and.band_type(jeig,ispin)==FILLED_BAND) cycle
                   if(band_type(ieig,ispin)==UNFILLED_BAND.and.band_type(jeig,ispin)==FILLED_BAND) cycle
                   if(band_type(ieig,ispin)==UNFILLED_BAND.and.band_type(jeig,ispin)==HALF_FILLED_BAND) cycle
                   if(band_type(ieig,ispin)==UNFILLED_BAND.and.band_type(jeig,ispin)==UNFILLED_BAND) cycle
                   dea=ej-ei+scissor
                   fieig= occ_mpi_ek(n2_mpi_ek(ieig,k),k)/(kv3_ek*qwgt_ek(k))
                   fjeig= occ_mpi_ek(n2_mpi_ek(jeig,k),k)/(kv3_ek*qwgt_ek(k))
                   do istep = 1, nstep
                      call width2(e(istep),dea,width,t,weight)  ! -(b_Fermi)
                      tint(istep,1:6)=tint(istep,1:6)+t*wspin*trm2(k,iv,ic,1:6,ispin) &
                    & *fieig*(1.0d0-fjeig)*qwgt_ek(k)
                   end do
                end if
             end do
          end do
       end do
     end subroutine parabolic_smearing

     subroutine gaussian_smearing
       integer       :: k, ieig, jeig, iv, ic, ifind
       real(kind=DP) :: wspin = 1.d0, ei, ej, t, dea, sigma
       real(kind=DP) :: fieig, fjeig
       do k = ispin, kv3_ek-nspin+ispin, nspin
          do ieig = ieigsta, ieigend
             ei = eb_ek(k,ieig)
             do jeig = jeigsta, jeigend
                ej = eb_ek(k,jeig)
                if(ei<=efermi.and.ej>efermi) then
                   call find_ind_vb_and_cb(ieig,jeig,iv,ic,k,ifind)
                   if(ifind/=1) then
                     if(printable) &
                    & write(nfout,'(1x,"!!* index of valence or conduction band is not found &
                    & UVSOR-Epsilon STOP at gaussian_smearing")')
                      stop
                   end if
                   if(band_type(ieig,ispin)==FILLED_BAND.and.band_type(jeig,ispin)==FILLED_BAND) cycle
                   if(band_type(ieig,ispin)==HALF_FILLED_BAND.and.band_type(jeig,ispin)==FILLED_BAND) cycle
                   if(band_type(ieig,ispin)==UNFILLED_BAND.and.band_type(jeig,ispin)==FILLED_BAND) cycle
                   if(band_type(ieig,ispin)==UNFILLED_BAND.and.band_type(jeig,ispin)==HALF_FILLED_BAND) cycle
                   if(band_type(ieig,ispin)==UNFILLED_BAND.and.band_type(jeig,ispin)==UNFILLED_BAND) cycle
                   dea=ej-ei+scissor
                   fieig= occ_mpi_ek(n2_mpi_ek(ieig,k),k)/(kv3_ek*qwgt_ek(k))
                   fjeig= occ_mpi_ek(n2_mpi_ek(jeig,k),k)/(kv3_ek*qwgt_ek(k))
                   do istep = 1, nstep
                      call gaussian_smearing_core(e(istep),dea,width,t,sigma)         ! contained here
                      tint(istep,1:6)=tint(istep,1:6)+t*wspin*trm2(k,iv,ic,1:6,ispin) &
                   & *fieig*(1.0d0-fjeig)*qwgt_ek(k)
                   end do
                end if
             end do
          end do
       end do
     end subroutine gaussian_smearing

     subroutine set_ieig_jeig_staend
       if(band_i==0.and.band_f==0) then
          ieigsta=1
          ieigend=neg
          jeigsta=1
          jeigend=neg
       else
          ieigsta=band_i
          ieigend=band_i
          jeigsta=band_f
          jeigend=band_f
       end if
     end subroutine set_ieig_jeig_staend
 end subroutine smearing_method

 subroutine smearing_method_mpi(nfout,nstep)
    implicit none
!
!  subroutine for parabolic and gaussian smearing for Brillouin zone integration
!
!  The parallelized version of smearing method
!  T. Hamada(Univ. Tokyo) May 25, 2007
!
    integer, intent(in)                      :: nfout, nstep
    integer                                  :: ispin, ik, ie, ipes
    integer                                  :: istep, istep_l, norder
    integer                                  :: i, nl, nl0, ista, istend
    integer                                  :: ieigsta, ieigend, jeigsta, jeigend
    real(kind=DP),allocatable,dimension(:,:) :: temp_mpi, eko_mpi                   ! MPI
    real(kind=DP)                            :: sum_qwgt_ek, fac, e0
    real(kind=DP),allocatable, dimension(:,:)    :: tint

    allocate(eko_mpi  (neg,kv3)); eko_mpi = 0.d0

    call set_ieig_jeig_staend

    nl = nstep/npes
    nl0 = nl + (nstep - nl*npes)

    allocate(tint(nl0,6)) 

    if(printable) then
       if(way_BZintegral==PARABOLIC_B) then
          write(nfout,'(1x, " ---------- Parabolic broadening Brillouin zone integration ----------")')
       end if
       if(way_BZintegral==GAUSSIAN_B) then
          write(nfout,'(/1x, " ---------- Gaussian broadening Brillouin zone integration ----------")')
       end if
       write(nfout,'(1x, " ieigsta = ",i3,3x," ieigend = ",i3)') ieigsta, ieigend
       write(nfout,'(1x, " jeigsta = ",i3,3x," jeigend = ",i3)') jeigsta, jeigend
       write(nfout,'(1x, " width = ",f10.5)') width
       write(nfout,'(1x, " npes = ",i4)') npes
       write(nfout,'(1x, " nl0 = ",i4,3x," nl = ",i4)') nl0, nl
    end if
    call reset_scissors_if_metallic(nfout)

!   parabolic smearing
    if(way_BZintegral==PARABOLIC_B) then
       do ispin = 1, nspin
          if(spin==MAJOR.and.ispin/=major_spin) cycle
          if(spin==MINOR.and.ispin/=minor_spin) cycle
          if(mype == 0) then
             tint = 0.0d0
             call set_ista_istend
             if(ipri>=2) write(nfout,'(1x," ipes = ",i4," : ista = ",i4,1x,"istend = ",i4)') mype, ista, istend
             call parabolic_smearing_mpi
             imeps(ista:istend,1:6)=imeps(ista:istend,1:6)+tint(1:nl0,1:6)
             if(ipriepsilon>=2) write(nfout,'(2x,i4,1x,"data have been processed by ipes = 0")') nl0
             do ipes = 1, npes - 1
                call mpi_recv(tint,nl0*6,mpi_double_precision,ipes,1,mpi_comm_group,istatus,ierr)
                ista = 1 + nl0 + (ipes-1)*nl
                istend = ista + nl - 1
                imeps(ista:istend,1:6)=imeps(ista:istend,1:6)+tint(1:nl,1:6) 
                if(ipriepsilon>=2) write(nfout,'(1x,"!*MPI",i4,1x,"imeps data have been received from ipes = ",i3)') nl, ipes
             end do
          else
             tint = 0.0d0
             call set_ista_istend
             if(ipri>=2) write(nfout,'(1x," ipes = ",i4," : ista = ",i4,1x,"istend = ",i4)') mype, ista, istend
             call parabolic_smearing_mpi
             call mpi_send(tint,nl0*6,mpi_double_precision,0,1,mpi_comm_group,ierr)
             if(ipriepsilon>=2) write(nfout,'(1x,"!*MPI",i4,1x,"imeps data have been processed by ipes = " &
                  &                                            ,i3,1x,"and sent to ipes = 0")') nl, mype
          end if

          if(printable) then
             if(spin==MAJOR) write(nfout,'(1x," ispin = major spin")')
             if(spin==MINOR) write(nfout,'(1x," ispin = minor spin")')
             write(nfout,'(2x,"ispin = ",i3)') ispin
          end if
       end do
    end if

!   gaussian smearing
    if(way_BZintegral==GAUSSIAN_B) then
       do ispin = 1, nspin
          if(spin==MAJOR.and.ispin/=major_spin) cycle
          if(spin==MINOR.and.ispin/=minor_spin) cycle
          if(mype == 0) then
             tint=0.0d0
             call set_ista_istend
             write(nfout,'(1x," ipes = ",i4," : ista = ",i4,1x,"istend = ",i4)') mype, ista, istend
             call gaussian_smearing_mpi              ! -(contained here)
             imeps(ista:istend,1:6)=imeps(ista:istend,1:6)+tint(1:nl0,1:6)
             write(nfout,'(2x,i4,1x,"data have been processed by ipes = 0")') nl0
             do ipes = 1, npes - 1
                call mpi_recv(tint,nl0*6,mpi_double_precision,ipes,1,mpi_comm_group,istatus,ierr)
                ista = 1 + nl0 + (ipes-1)*nl
                istend = ista + nl - 1
                imeps(ista:istend,1:6)=imeps(ista:istend,1:6)+tint(1:nl,1:6)
                if ( ipriepsilon >=2 ) then
                   write(nfout,'(1x,"!*MPI",i4,1x,"imeps data have been received from ipes = ",i3)') nl, ipes
                endif
             end do
          else
             tint = 0.0d0
             call set_ista_istend
             write(nfout,'(1x," ipes = ",i4," : ista = ",i4,1x,"istend = ",i4)') mype, ista, istend
             call gaussian_smearing_mpi
             call mpi_send(tint,nl0*6,mpi_double_precision,0,1,mpi_comm_group,ierr)
             if ( ipriepsilon >=2 ) then
                write(nfout,'(1x,"!*MPI",i4,1x,"imeps data have been processed by ipes = ",i3,1x,"and sent to ipes = 0")') nl, mype
             endif
          end if

          if(printable) then
             if(spin==MAJOR) write(nfout,'(1x," ispin = major spin")')
             if(spin==MINOR) write(nfout,'(1x," ispin = minor spin")')
             write(nfout,'(1x,"ispin = ",i3)') ispin
          end if
       end do
    end if

    sum_qwgt_ek=0.0d0
    do ispin = 1, nspin
       do ik=ispin, kv3_ek-nspin+ispin, nspin
          sum_qwgt_ek=sum_qwgt_ek+qwgt_ek(ik)
       end do
    end do
    sum_qwgt_ek=sum_qwgt_ek/nspin
    if(printable) then
       write(nfout,'(1x,"!* sum of qwgt_ek = ",f10.5)') sum_qwgt_ek
       write(nfout,'(1x,"!* unit cell volume = ",f20.12)') univol
    end if

    e0=vacuum_permittivity_in_au
!   e0=0.079577471544205d0 ! vacuum permittivity in au
    fac=2.0d0*PAI/(e0*univol*sum_qwgt_ek)

    imeps(1:nstep,1:6)=fac*imeps(1:nstep,1:6)

    deallocate(eko_mpi)

    contains
     subroutine parabolic_smearing_mpi
       integer       :: k, k2, ieig, jeig, iv, ic, ifind
       real(kind=DP) :: wspin = 1.d0, ei, ej, t, weight, dea
       real(kind=DP) :: fieig, fjeig

       do k = ispin, kv3_ek-nspin+ispin, nspin
          k2 = (k-1)/nspin + 1
          do ieig = ieigsta, ieigend
             ei = eb_ek(k,ieig)
             do jeig =jeigsta, jeigend
                ej = eb_ek(k,jeig)
                if(ei<=efermi.and.ej>efermi) then
                   call find_ind_vb_and_cb(ieig,jeig,iv,ic,k,ifind)
                   if(ifind/=1) then
                     if(printable) &
                     & write(nfout,'(1x,"!!* index of valence or conduction band is not found &
                     & UVSOR-Epsilon STOP at parabolic_smearing")')
                      stop
                   end if
                   if(band_type(ieig,ispin)==FILLED_BAND.and.band_type(jeig,ispin)==FILLED_BAND) cycle
                   if(band_type(ieig,ispin)==HALF_FILLED_BAND.and.band_type(jeig,ispin)==FILLED_BAND) cycle
                   if(band_type(ieig,ispin)==UNFILLED_BAND.and.band_type(jeig,ispin)==FILLED_BAND) cycle
                   if(band_type(ieig,ispin)==UNFILLED_BAND.and.band_type(jeig,ispin)==HALF_FILLED_BAND) cycle
                   if(band_type(ieig,ispin)==UNFILLED_BAND.and.band_type(jeig,ispin)==UNFILLED_BAND) cycle
                   dea=ej-ei+scissor
                   fieig= occ_mpi_ek(n2_mpi_ek(ieig,k),k)/(kv3_ek*qwgt_ek(k))
                   fjeig= occ_mpi_ek(n2_mpi_ek(jeig,k),k)/(kv3_ek*qwgt_ek(k))
                   do istep = ista, istend
                      call width2(e(istep),dea,width,t,weight)  ! -(b_Fermi)
                      if(mype ==0) istep_l = istep
                      if(mype /=0) istep_l = istep - nl0 - (mype-1)*nl
                      tint(istep_l,1:6)=tint(istep_l,1:6)+t*wspin*trm2(k2,iv,ic,1:6,ispin) &
                   & *fieig*(1.0d0-fjeig)*qwgt_ek(k)
                   end do
                end if
             end do
          end do
       end do
     end subroutine parabolic_smearing_mpi

     subroutine gaussian_smearing_mpi
       integer       :: k, k2, ieig, jeig, iv, ic, ifind
       real(kind=DP) :: wspin = 1.d0, ei, ej, t, dea, sigma
       real(kind=DP) :: fieig, fjeig

       do k = ispin, kv3_ek-nspin+ispin, nspin
          k2 = (k-1)/nspin + 1
          do ieig = ieigsta, ieigend
             ei = eb_ek(k,ieig)
             do jeig = jeigsta, jeigend
                ej = eb_ek(k,jeig)
                if(ei<=efermi.and.ej>efermi) then
                   call find_ind_vb_and_cb(ieig,jeig,iv,ic,k,ifind)
                   if(ifind/=1) then
                     if(printable) &
                    & write(nfout,'(1x,"!!* index of valence or conduction band is not found &
                    & UVSOR-Epsilon STOP at gaussian_smearing")')
                      stop
                   end if
                   if(band_type(ieig,ispin)==FILLED_BAND.and.band_type(jeig,ispin)==FILLED_BAND) cycle
                   if(band_type(ieig,ispin)==HALF_FILLED_BAND.and.band_type(jeig,ispin)==FILLED_BAND) cycle
                   if(band_type(ieig,ispin)==UNFILLED_BAND.and.band_type(jeig,ispin)==FILLED_BAND) cycle
                   if(band_type(ieig,ispin)==UNFILLED_BAND.and.band_type(jeig,ispin)==HALF_FILLED_BAND) cycle
                   if(band_type(ieig,ispin)==UNFILLED_BAND.and.band_type(jeig,ispin)==UNFILLED_BAND) cycle
                   dea=ej-ei+scissor
                   fieig= occ_mpi_ek(n2_mpi_ek(ieig,k),k)/(kv3_ek*qwgt_ek(k))
                   fjeig= occ_mpi_ek(n2_mpi_ek(jeig,k),k)/(kv3_ek*qwgt_ek(k))
                   do istep = ista, istend
                      call gaussian_smearing_core(e(istep),dea,width,t,sigma)         ! contained here
                      if(mype ==0) istep_l = istep
                      if(mype /=0) istep_l = istep - nl0 - (mype-1)*nl
                      tint(istep_l,1:6)=tint(istep_l,1:6)+t*wspin*trm2(k2,iv,ic,1:6,ispin) &
                   & *fieig*(1.0d0-fjeig)*qwgt_ek(k)
                   end do
                end if
             end do
          end do
       end do
     end subroutine gaussian_smearing_mpi

     subroutine set_ieig_jeig_staend
       if(band_i==0.and.band_f==0) then
          ieigsta=1
          ieigend=neg
          jeigsta=1
          jeigend=neg
       else
          ieigsta=band_i
          ieigend=band_i
          jeigsta=band_f
          jeigend=band_f
       end if
     end subroutine set_ieig_jeig_staend
    
     subroutine set_ista_istend
        if(mype == 0) then
           ista = 1
           istend = nl0
        else
           ista = 1 + nl0 + (mype-1)*nl
           istend = ista + nl -1
        end if
     end subroutine set_ista_istend
 end subroutine smearing_method_mpi

 subroutine smearing_method_mpi2(nfout,nstep)
    implicit none
!
!  subroutine for parabolic and gaussian smearing for Brillouin zone integration
!
!  The parallelized version of smearing method
!  T. Hamada(Univ. Tokyo) May 25, 2007
!
    integer, intent(in)                      :: nfout, nstep
    integer                                  :: ispin, ik, ie, ipes, nspin_kt
    integer                                  :: istep, norder
    integer                                  :: i 
    integer                                  :: ieigsta, ieigend, jeigsta, jeigend
    real(kind=DP),allocatable,dimension(:,:) :: eko_mpi                   ! MPI
    real(kind=DP)                            :: sum_qwgt_ek, fac, e0
    real(kind=DP),allocatable, dimension(:,:)    :: tint, imeps_wk

    allocate(eko_mpi  (neg,kv3)); eko_mpi = 0.d0
    allocate(imeps_wk(nstep,6)) ; imeps_wk = 0.0d0
    allocate(tint(nstep,6)) ; tint = 0.0d0

! === KT_add ===== 2014/09/22
    if ( noncol ) then
       nspin_kt = 1
    else
       nspin_kt = nspin
    endif
! ================ 2014/09/22

    call set_ieig_jeig_staend

    if(printable) then
       if(way_BZintegral==PARABOLIC_B) then
          write(nfout,'(1x, " ---------- Parabolic broadening Brillouin zone integration ----------")')
       end if
       if(way_BZintegral==GAUSSIAN_B) then
          write(nfout,'(/1x, " ---------- Gaussian broadening Brillouin zone integration ----------")')
       end if
       write(nfout,'(1x, " ieigsta = ",i3,3x," ieigend = ",i3)') ieigsta, ieigend
       write(nfout,'(1x, " jeigsta = ",i3,3x," jeigend = ",i3)') jeigsta, jeigend
       write(nfout,'(1x, " width = ",f10.5)') width
       write(nfout,'(1x, " npes = ",i4)') npes
    end if

! ====== KT_mod ========= 13.0S
!    call reset_scissors_if_metallic(nfout)
!
    if ( sw_corelevel_spectrum == OFF ) then
       call reset_scissors_if_metallic(nfout)
    endif
! ======================= 13.0S

!   parabolic smearing
    if(way_BZintegral==PARABOLIC_B) then
       do ispin = 1, nspin_kt
          if(spin==MAJOR.and.ispin/=major_spin) cycle
          if(spin==MINOR.and.ispin/=minor_spin) cycle

! ========= KT_mod ============= 13.0S
!          call parabolic_smearing_mpi
!
          if ( sw_corelevel_spectrum == ON ) then
             call parabolic_smearing_core2val_mpi
          else
             call parabolic_smearing_mpi
          endif
! ============================== 13.0S

          imeps(1:nstep,1:6)=imeps(1:nstep,1:6)+tint(1:nstep,1:6)
          if(printable) then
             if(spin==MAJOR) write(nfout,'(1x," ispin = major spin")')
             if(spin==MINOR) write(nfout,'(1x," ispin = minor spin")')
             write(nfout,'(2x,"ispin = ",i3)') ispin
          end if
       end do
    end if

!   gaussian smearing
    if(way_BZintegral==GAUSSIAN_B) then
       do ispin = 1, nspin_kt
          if(spin==MAJOR.and.ispin/=major_spin) cycle
          if(spin==MINOR.and.ispin/=minor_spin) cycle

! ========= KT_mod ============= 13.0S
!          call gaussian_smearing_mpi              ! -(contained here)
!
          if ( sw_corelevel_spectrum == ON ) then
             call gaussian_smearing_core2val_mpi
          else
             call gaussian_smearing_mpi
          endif
! ============================== 13.0S

          imeps(1:nstep,1:6)=imeps(1:nstep,1:6)+tint(1:nstep,1:6)
          if(printable) then
             if(spin==MAJOR) write(nfout,'(1x," ispin = major spin")')
             if(spin==MINOR) write(nfout,'(1x," ispin = minor spin")')
             write(nfout,'(1x,"ispin = ",i3)') ispin
          end if
       end do
    end if

!   MPI
    if(mype == 0) then
       do ipes = 2, npes
          call mpi_recv(imeps_wk,nstep*6,mpi_double_precision,ipes-1,1,mpi_comm_group,istatus,ierr)
          imeps(1:nstep,1:6) = imeps(1:nstep,1:6) + imeps_wk(1:nstep,1:6)
!          if(printable) then
          if(ipriepsilon>=2 .and. printable) then
             write(nfout,'(1x,"!* MPI",i5,1x,"data have been received from ipes = ",i3)') nstep, ipes
          endif
       end do
    else
       imeps_wk(1:nstep,1:6) = imeps(1:nstep,1:6)
       call mpi_send(imeps_wk,nstep*6,mpi_double_precision,0,1,mpi_comm_group,ierr)
       if ( ipriepsilon >=2 ) then
          write(nfout,'(1x,"!* MPI",i5,1x,"data have been sent to ipes = 0")') nstep
       endif
    end if

    sum_qwgt_ek=0.0d0
    do ispin = 1, nspin_kt
       do ik=ispin, kv3_ek-nspin+ispin, nspin
          sum_qwgt_ek=sum_qwgt_ek+qwgt_ek(ik)
       end do
    end do
!   sum_qwgt_ek=sum_qwgt_ek/nspin
    if(printable) then
       write(nfout,'(1x,"!* sum of qwgt_ek = ",f10.5)') sum_qwgt_ek
       write(nfout,'(1x,"!* unit cell volume = ",f20.12)') univol
    end if

    e0=vacuum_permittivity_in_au
!   e0=0.079577471544205d0 ! vacuum permittivity in au
    fac=2.0d0*PAI/(e0*univol*sum_qwgt_ek)

! === KT_add ==== 2014/09/22
    if ( noncol ) fac = fac /dble(ndim_spinor)
! =============== 2014/09/22

    imeps(1:nstep,1:6)=fac*imeps(1:nstep,1:6)

    deallocate(eko_mpi);  Deallocate(imeps_wk)

  contains

     subroutine parabolic_smearing_mpi
       integer       :: k, k2, ieig, jeig, iv, ic, ifind
       real(kind=DP) :: wspin = 1.d0, ei, ej, t, weight, dea
       real(kind=DP) :: fieig, fjeig, kwtmp

       tint =0.0d0
       do k = ispin, kv3_ek-nspin+ispin, nspin
          k2 = (k-1)/nspin + 1
          if(map_k_eps(k2) /= mype) cycle

! ==== KT_add ==== 2014/09/22
          kwtmp = kv3_ek *qwgt_ek(k) /dble(ndim_spinor)
! ================ 2014/09/22

          do ieig = ieigsta, ieigend
             ei = eb_ek(k,ieig)
             do jeig =jeigsta, jeigend
                ej = eb_ek(k,jeig)
                if(ei<=efermi.and.ej>efermi) then
                   call find_ind_vb_and_cb(ieig,jeig,iv,ic,k,ifind)
                   if(ifind/=1) then
                     if(printable) &
                     & write(nfout,'(1x,"!!* index of valence or conduction band is not found &
                     & UVSOR-Epsilon STOP at parabolic_smearing")')
                      stop
                   end if
                   if(band_type(ieig,ispin)==FILLED_BAND.and.band_type(jeig,ispin)==FILLED_BAND) cycle
                   if(band_type(ieig,ispin)==HALF_FILLED_BAND.and.band_type(jeig,ispin)==FILLED_BAND) cycle
                   if(band_type(ieig,ispin)==UNFILLED_BAND.and.band_type(jeig,ispin)==FILLED_BAND) cycle
                   if(band_type(ieig,ispin)==UNFILLED_BAND.and.band_type(jeig,ispin)==HALF_FILLED_BAND) cycle
                   if(band_type(ieig,ispin)==UNFILLED_BAND.and.band_type(jeig,ispin)==UNFILLED_BAND) cycle
                   dea=ej-ei+scissor
                   fieig= occ_mpi_ek(n2_mpi_ek(ieig,k),k) /kwtmp
                   fjeig= occ_mpi_ek(n2_mpi_ek(jeig,k),k) /kwtmp

                   do istep = 1, nstep
                      call width2(e(istep),dea,width,t,weight)  ! -(b_Fermi)
                      tint(istep,1:6)=tint(istep,1:6)+t*wspin*trm2(k2,iv,ic,1:6,ispin) &
                   & *fieig*(1.0d0-fjeig)*qwgt_ek(k)
                   end do
                end if
             end do
          end do
       end do
     end subroutine parabolic_smearing_mpi

! ============ KT_add =============== 13.0S
     subroutine parabolic_smearing_core2val_mpi
       integer       :: k, k2, ieig, jeig, iv, ic, ifind
       real(kind=DP) :: wspin = 1.d0, ei, ej, t, weight, dea
       real(kind=DP) :: fieig, fjeig, kwtmp

       tint =0.0d0
       do k = ispin, kv3_ek-nspin+ispin, nspin
          k2 = (k-1)/nspin + 1
          if(map_k_eps(k2) /= mype) cycle

! ==== KT_add ==== 2014/09/22
          kwtmp = kv3_ek *qwgt_ek(k) /dble(ndim_spinor)
! ================ 2014/09/22

          do ieig = 1, num_core_states
             ei = ene_core_states(ieig)
             do jeig =jeigsta, jeigend
                ej = eb_ek(k,jeig)

                if(ej>efermi) then
                   call find_ind_cb_only( jeig,ic,k,ifind )
                   iv = ieig

                   if(ifind/=1) then
                     if(printable) &
                     & write(nfout,'(1x,"!!* index of conduction band is not found &
                     & UVSOR-Epsilon STOP at parabolic_smearing")')
                      stop
                   end if
#if 0
                   if (band_type(jeig,ispin)==FILLED_BAND) cycle
                   if (band_type(jeig,ispin)==HALF_FILLED_BAND) cycle
#endif
                   dea=ej-ei+scissor

                   fieig= 1.0d0
                   fjeig= occ_mpi_ek(n2_mpi_ek(jeig,k),k) /kwtmp

                   do istep = 1, nstep
                      call width2(e(istep),dea,width,t,weight)  ! -(b_Fermi)
                      if ( t < 1.0D-12 ) t = 0.0d0

                      tint(istep,1:6)=tint(istep,1:6)+t*wspin*trm2(k2,iv,ic,1:6,ispin) &
                   & *fieig*(1.0d0-fjeig)*qwgt_ek(k)
                   end do
                end if
             end do
          end do
       end do

     end subroutine parabolic_smearing_core2val_mpi
! =================================================== 13.0S

     subroutine gaussian_smearing_mpi
       integer       :: k, k2, ieig, jeig, iv, ic, ifind
       real(kind=DP) :: wspin = 1.d0, ei, ej, t, dea, sigma
       real(kind=DP) :: fieig, fjeig, kwtmp

       tint = 0.0d0
       do k = ispin, kv3_ek-nspin+ispin, nspin
          k2 = (k-1)/nspin +1
          if(map_k_eps(k2) /= mype) cycle

! ==== KT_add ==== 2014/09/22
          kwtmp = kv3_ek *qwgt_ek(k) /dble(ndim_spinor)
! ================ 2014/09/22

          do ieig = ieigsta, ieigend
             ei = eb_ek(k,ieig)
             do jeig = jeigsta, jeigend
                ej = eb_ek(k,jeig)
                if(ei<=efermi.and.ej>efermi) then
                   call find_ind_vb_and_cb(ieig,jeig,iv,ic,k,ifind)
                   if(ifind/=1) then
                     if(printable) &
                    & write(nfout,'(1x,"!!* index of valence or conduction band is not found &
                    & UVSOR-Epsilon STOP at gaussian_smearing")')
                      stop
                   end if
                   if(band_type(ieig,ispin)==FILLED_BAND.and.band_type(jeig,ispin)==FILLED_BAND) cycle
                   if(band_type(ieig,ispin)==HALF_FILLED_BAND.and.band_type(jeig,ispin)==FILLED_BAND) cycle
                   if(band_type(ieig,ispin)==UNFILLED_BAND.and.band_type(jeig,ispin)==FILLED_BAND) cycle
                   if(band_type(ieig,ispin)==UNFILLED_BAND.and.band_type(jeig,ispin)==HALF_FILLED_BAND) cycle
                   if(band_type(ieig,ispin)==UNFILLED_BAND.and.band_type(jeig,ispin)==UNFILLED_BAND) cycle
                   dea=ej-ei+scissor
                   fieig= occ_mpi_ek(n2_mpi_ek(ieig,k),k) /kwtmp
                   fjeig= occ_mpi_ek(n2_mpi_ek(jeig,k),k) /kwtmp

                   do istep = 1, nstep
                      call gaussian_smearing_core(e(istep),dea,width,t,sigma)         ! contained here
                      tint(istep,1:6)=tint(istep,1:6)+t*wspin*trm2(k2,iv,ic,1:6,ispin) &
                   & *fieig*(1.0d0-fjeig)*qwgt_ek(k)
                   end do
                end if
             end do
          end do
       end do
     end subroutine gaussian_smearing_mpi

! =============== KT_add ============== 13.0S
     subroutine gaussian_smearing_core2val_mpi
       integer       :: k, k2, ieig, jeig, iv, ic, ifind
       real(kind=DP) :: wspin = 1.d0, ei, ej, t, dea, sigma
       real(kind=DP) :: fieig, fjeig, kwtmp

       tint = 0.0d0
       do k = ispin, kv3_ek-nspin+ispin, nspin
          k2 = (k-1)/nspin +1
          if(map_k_eps(k2) /= mype) cycle

! ==== KT_add ==== 2014/09/22
          kwtmp = kv3_ek *qwgt_ek(k) /dble(ndim_spinor)
! ================ 2014/09/22

          do ieig = 1, num_core_states
             ei = ene_core_states(ieig)
             do jeig = jeigsta, jeigend
                ej = eb_ek(k,jeig)
                if (ej>efermi) then
                   call find_ind_cb_only(jeig,ic,k,ifind)
                   iv = ieig

                   if(ifind/=1) then
                     if(printable) &
                    & write(nfout,'(1x,"!!* index of conduction band is not found &
                    & UVSOR-Epsilon STOP at gaussian_smearing")')
                      stop
                   end if
#if 0
                   if (band_type(jeig,ispin)==FILLED_BAND) cycle
                   if (band_type(jeig,ispin)==HALF_FILLED_BAND) cycle
#endif
                   dea=ej-ei+scissor

                   fieig= 1.0d0
                   fjeig= occ_mpi_ek(n2_mpi_ek(jeig,k),k) /kwtmp
                   do istep = 1, nstep
                      call gaussian_smearing_core(e(istep),dea,width,t,sigma)         ! contained here
                      if ( t < 1.0D-12 ) t = 0.0d0

                      tint(istep,1:6)=tint(istep,1:6)+t*wspin*trm2(k2,iv,ic,1:6,ispin) &
                   & *fieig*(1.0d0-fjeig)*qwgt_ek(k)
                   end do
                end if
             end do
          end do
       end do
     end subroutine gaussian_smearing_core2val_mpi
! ======================================= 13.0S

     subroutine set_ieig_jeig_staend
       if(band_i==0.and.band_f==0) then
          ieigsta=1
          ieigend=neg
          jeigsta=1
          jeigend=neg
       else
          ieigsta=band_i
          ieigend=band_i
          jeigsta=band_f
          jeigend=band_f
       end if
     end subroutine set_ieig_jeig_staend
 end subroutine smearing_method_mpi2

 subroutine gaussian_smearing_core(e,dea,width,t,sigma)
    implicit none
    real(kind=DP),intent(in)  :: e, dea, width
    real(kind=DP),intent(out) :: t,sigma
    real(kind=DP)             :: ee,d,tsum
    ee=(e-dea)/width
    t=0.0d0
    t=exp(-1.0d0*ee**2)/dsqrt(PAI)
    t=t/width
    sigma = width/dsqrt(2.0d0)
 end subroutine gaussian_smearing_core

 subroutine reset_scissors_if_metallic(nfout)
    implicit none
!
!   reset scissor operator if the system is metallic
!
    integer, intent(in) :: nfout
    if(system/=METALLIC) return
    if(scissor/=0.0d0) then
       scissor = 0.0d0
       if(printable) write(nfout,'(1x," scissor_operator is reset to be 0.0d0 because the system is metallic")')
    end if
 end subroutine reset_scissors_if_metallic

 subroutine drude_term_eps
    implicit none
!
!   Drude term calculation
!
!   nelectron: number of metallic electrons
!   edensity : metellic electron density
!   plasma_f : plasma frequency of metallic electrons
!   sigma    : DC conductivity in atomic unit
!   tau_drude: relaxzation time of metallic electrons
!   i_drude  : imaginary part of Drude term
!   r_eps    : real part of Drude term
    integer :: ib, ik

    real(kind=DP) :: edensity,e0,t0,tau_drude,s0,sigma
    if(printable) write(nfout,'(" ---------- Drude term parameters  ----------")')

!    e0=0.079577471544205d0 ! vacuum permittivity in au
!    t0=2.41884326505d-17   ! time in au
    sigma = 0.d0
    e0 = vacuum_permittivity_in_au
    t0 = au_of_time

    if(icond==2.or.icond==3) call calc_metallic_ch
    if(nspin==1) metallic_ch=metallic_ch*2.0d0

    if(printable) then
       write(nfout,'(1x,"!* number of metallic electrons in unit cell = ", f10.5)') metallic_ch
    end if
    edensity=metallic_ch/univol

    if(plasma_f==0.0d0) then
       if(printable) write(nfout,'(1x,"!* calculate plasma frequency")')
       if(effective_m>0.0d0) then
          if(printable) write(nfout,'(1x,"!* effective mass of ",f10.5," is used ")') effective_m
       else
          if(printable) write(nfout,'(1x,"!* effective mass of electron is assumed to be 1.0d0")')
          effective_m=1.0d0
       end if
       plasma_f=dsqrt(4.0d0*PAI*edensity/effective_m)
       if(printable) then
          write(nfout,'(1x,"!* electron density = ",f10.5," /bohr**3")') edensity
          write(nfout,'(1x,"!* plasma frequency = ",f10.5," Hartree   ",f10.5," eV")') plasma_f, plasma_f*27.21167d0
       end if
       if(plasma_f==0.0d0) then
          if(printable) then
             write(nfout,'(1x,"!* plasma frequency is ZERO ")')
             write(nfout,'(1x,"!* Drude term analysis is skipped")')
          end if
          return
       end if
    else
       if(printable) write(nfout,'(1x,"!* plasma frequency of ",f10.5," is used ")') plasma_f
    end if

! calculate Drude damping factor
    if(dc_conductivity<=0.0d0.and.drude_damping<=0.0d0) then
       if(printable) write(nfout,'(1x, "!* default Drude damping factor of 0.10 eV is used")')
       call set_default_damping_factor
    end if
    if(dc_conductivity>0.0d0.and.drude_damping==0.0d0) then
       s0=0.4599848d7/(4.0d0*PAI) ! conductivity in au
       sigma=dc_conductivity/s0
       tau_drude=sigma*effective_m/edensity
       drude_damping=1.0d0/tau_drude
    else
       tau_drude=1.0d0/drude_damping
    end if

! output calculation parameters
    if(printable) then
       if(sigma/=0.0d0) then
          write(nfout,'(1x,"!* dc conductivity used in analysis = ",d12.5," au   = ",d12.5," ohm-1 m-1")') sigma, dc_conductivity
          write(nfout,'(1x,"!* drude damping factor = ",d12.5," au   ",d12.5," eV")') drude_damping, drude_damping*27.21167d0
          write(nfout,'(1x,"!* relaxzation time of electrons = ",d12.5," au   ",d12.5," sec")') tau_drude, tau_drude*t0
       else
          write(nfout,'(1x,"!* Drude damping factor = ",d12.5," au   ",d12.5," eV")') drude_damping, drude_damping*27.21167d0
          write(nfout,'(1x,"!* relaxzation time of electrons = ",d12.5," au   ",d12.5," sec")') tau_drude, tau_drude*t0
       end if
    end if

  contains

     subroutine calc_metallic_ch
       integer :: ispin, it, ia, lmt, il, ik, ib, nspin_kt

       metallic_ch=0.0d0
       nspin_kt = nspin /dble(ndim_spinor)

       do ispin = 1, nspin_kt
          do ik = ispin, kv3_ek-nspin+ispin, nspin
             do ib = 1, neg
                if(band_type(ib,ispin)==HALF_FILLED_BAND) then
                   metallic_ch = metallic_ch + occ_mpi_ek(n2_mpi_ek(ib,ik),ik)
                end if
             end do
          end do
       end do
       metallic_ch = metallic_ch/real(kv3_ek/ndim_spinor,kind=DP)

     end subroutine calc_metallic_ch

     subroutine set_default_damping_factor
! default Drude damping factor = 0.10eV is set
       drude_damping = 0.0036749d0
     end subroutine set_default_damping_factor
 end subroutine drude_term_eps

 subroutine calc_drude
    integer       :: istep
    real(kind=DP) :: tau_drude2,plasma_f2
    tau_drude=1.0d0/drude_damping
    tau_drude2=tau_drude**2
    plasma_f2=plasma_f**2
    if(ndrude==1) then
! Drude terms are calculated for correction
       istep = 1
       r_drude(1)=-plasma_f2*tau_drude2/(1.0d0+e(istep)**2*tau_drude2)
       i_drude(1)=plasma_f**2*tau_drude*1.d60
       do istep=2,nstep
          r_drude(istep)=-plasma_f2*tau_drude2/(1.0d0+e(istep)**2*tau_drude2)
          i_drude(istep)=plasma_f**2*tau_drude/(e(istep)*(1.0d0+e(istep)**2*tau_drude2))
       end do
    else if(ndrude==2) then
! Pure Drude terms are calculated
       istep = 1
       r_drude(1)=1.0d0-plasma_f2*tau_drude2/(1.0d0+e(istep)**2*tau_drude2)
       i_drude(1)=plasma_f**2*tau_drude*1.d60
       do istep=2,nstep
          r_drude(istep)=1.0d0-plasma_f2*tau_drude2/(1.0d0+e(istep)**2*tau_drude2)
          i_drude(istep)=plasma_f**2*tau_drude/(e(istep)*(1.0d0+e(istep)**2*tau_drude2))
       end do
    end if
 end subroutine calc_drude

 subroutine set_eb_ek_drude
    implicit none
!
!   save band energy for Drude calculation
!
    integer       :: ik, nbi, nbj
    real(kind=DP) :: ebi, ebj
    do ik = 1, kv3, max( af+1, ndim_spinor )
       do nbi = 1, neg
          ebi = e2_mpi(n2_mpi(nbi,ik),ik)
          do nbj = 1, neg
             ebj = e2_mpi(n2_mpi(nbj,ik),ik)
             call set_b_and_eb(nbi,nbj,ebi,ebj)
          end do
       end do
    end do
  contains
     subroutine set_b_and_eb(nbi,nbj,ebi,ebj)
       integer,intent(in) :: nbi,nbj
       real(DP),intent(in) :: ebi,ebj
       eb_ek(nk_in_the_process+ik-1,nbi)=ebi
       eb_ek(nk_in_the_process+ik-1,nbj)=ebj
     end subroutine set_b_and_eb
 end subroutine set_eb_ek_drude

 subroutine sum_eps_plus_drude
    implicit none
!
! calculate sum of eps and drude term
!
    integer :: istep
       do istep = 1, nstep
          imeps(istep,1:3)=imeps(istep,1:3)+i_drude(istep)
          reps(istep,1:3)= reps(istep,1:3)+r_drude(istep)
       end do
 end subroutine sum_eps_plus_drude

 subroutine calc_nlo
    implicit none
!
!   calculate nonlinear optical susceptibilities (SHG and THG)
!
    integer                               :: ispin, ik, istep, ipes
    real(DP)                              :: sum_qwgt_ek
    real(DP), allocatable, dimension(:,:) :: imchi2_wk, imchi3_wk

    if(nrd_efermi/=0) return
    if(printable) write(nfout,'(/1x,"<< UVSOR-Epsilon   NONLINEAR OPTICS CALCULATION START >>")')
    if(system==METALLIC) then
       if(printable) then
          write(nfout,'(1x,"!* The system is metallic")')
          write(nfout,'(1x,"!* Nonlinear optics calculation is skipped")')
       end if
       return
    end if
    if(band_i/=0.and.band_f/=0) then
       if(printable) then
          write(nfout,'(1x,"!* Bband decomposition is not possible in nonlinear optics calculation")')
          write(nfout,'(1x,"!* Calculation is skipped")')
       end if
       return
    end if
    call alloc_nlo_arrays
    call calc_ptrm
    if(printable) write(nfout,'(1x,"!* P transition moment have been generated")')

!
! SHG suceptibility calculation
!
    if(nlo == 1) then
! prepare for P transition moment and {pij,pji} factor
! E. Ghahramani, D. J. Moss, and J. E. Sipe, Phys. Rev. B, vol.43, pp. 8990 (1991).
       if(printable) write(nfout,'(1x,"!* Calculation of SHG susceptibilities")')

! virtual electron term
       if(virt_ex_type == ALL_TYPE .or. virt_ex_type == ELECTRON) then
          call virt_elec_process_SHG
       end if
! virtual hole term
       if(virt_ex_type == ALL_TYPE .or. virt_ex_type == HOLE) then
          call virt_hole_process_SHG
       end if

! MPI
! linear tetrahedron case -> MPI process is doe in linear_tetrahedron_SHG
!
       if(way_BZintegral/= L_TETRAHEDRON) then
          allocate(imchi2_wk(nstep,18)) ; imchi2_wk = 0.0d0
          if(mype == 0) then
             do ipes = 2, npes
                call mpi_recv(imchi2_wk,nstep*18,mpi_double_precision,ipes-1,1,mpi_comm_group,istatus,ierr)
                imchi2(1:nstep,1:18) = imchi2(1:nstep,1:18) + imchi2_wk(1:nstep,1:18)
!                if(printable) then
                if(ipriepsilon>=2 .and. printable) then
                   write(nfout,'(1x,"!* MPI",i5,1x,"data have been received from ipes = ",i3)') nstep, ipes
                endif
             end do
          else
             imchi2_wk(1:nstep,1:18) = imchi2(1:nstep,1:18)
             call mpi_send(imchi2_wk,nstep*18,mpi_double_precision,0,1,mpi_comm_group,ierr)
             if ( ipriepsilon >=2 ) then
                write(nfout,'(1x,"!* MPI",i5,1x,"data have been sent to ipes = 0")') nstep
             endif
          end if
          deallocate(imchi2_wk)
       end if
       
       if(way_BZintegral == L_TETRAHEDRON) then
          if(nspin/=1) imchi2=imchi2/2.0d0
          imchi2 = imchi2/(4.0d0*PAI**3) 
       else
          sum_qwgt_ek=0.0d0
          do ispin = 1, nspin
             do ik=ispin, kv3_ek-nspin+ispin, nspin
                sum_qwgt_ek=sum_qwgt_ek+qwgt_ek(ik)
             end do
          end do
          if(printable) then
             write(nfout,'(1x,"!* sum of qwgt_ek = ",f10.5)') sum_qwgt_ek
             write(nfout,'(1x,"!* unit cell volume = ",f20.12)') univol
          end if
          imchi2 = 2.0d0*imchi2/(sum_qwgt_ek*univol)
       end if

! Symmetrize SHG susceptibility
       if(nsym/=0.or.way_BZintegral == PARABOLIC_B.or.way_BZintegral == GAUSSIAN_B) then
          call sym_chi2(nfout,nstep)
       end if
    end if

!
! THG susceptibility calculation
!
    if(nlo==2) then
! prepare for transition moment factor Re[pij*pjk*pkl*pli]
! D. J. Moss et al., Physical Review B, vol.41, 1452 (1991)

       if(printable) write(nfout,'(1x,"!* Calculation of THG susceptibilities")')
! virtual electron term
       if(virt_ex_type == ALL_TYPE.or.virt_ex_type == ELECTRON) then
          call virt_elec_process_THG
       end if
! virtual hole term
       if(virt_ex_type == ALL_TYPE.or.virt_ex_type == HOLE) then
          call virt_hole_process_THG
       end if
! three-level term
       if(virt_ex_type == ALL_TYPE.or.virt_ex_type == THREE_LEVEL) then
          call three_state_process_THG
       end if

!   MPI
       allocate(imchi3_wk(nstep,30)) ; imchi3_wk =0.0d0
       if(mype == 0) then
          do ipes = 2, npes
             call mpi_recv(imchi3_wk,nstep*30,mpi_double_precision,ipes-1,1,mpi_comm_group,istatus,ierr)
             imchi3(1:nstep,1:30) = imchi3(1:nstep,1:30) + imchi3_wk(1:nstep,1:30)
             if ( ipriepsilon >=2 ) then
                write(nfout,'(1x,"!* MPI",i5,1x,"data have been received from ipes = ",i3)') nstep, ipes
             endif
          end do
       else
          imchi3_wk(1:nstep,1:30) = imchi3(1:nstep,1:30)
          call mpi_send(imchi3_wk,nstep*30,mpi_double_precision,0,1,mpi_comm_group,ierr)
          if ( ipriepsilon >=2 ) then
             write(nfout,'(1x,"!* MPI",i5,1x,"data have been sent to ipes = 0")') nstep
          endif
       end if
       deallocate(imchi3_wk)

! Symmetrize THG susceptibility
       if(nsym/=0.or.way_BZintegral == PARABOLIC_B.or.way_BZintegral == GAUSSIAN_B) then
         call sym_chi3(nfout,nstep)
       end if
       sum_qwgt_ek=0.0d0
       do ispin = 1, nspin
          do ik=ispin, kv3_ek-nspin+ispin, nspin
             sum_qwgt_ek=sum_qwgt_ek+qwgt_ek(ik)
          end do
       end do
!      sum_qwgt_ek=sum_qwgt_ek/nspin
       if(printable) then
          write(nfout,'(1x,"!* sum of qwgt_ek = ",f10.5)') sum_qwgt_ek
          write(nfout,'(1x,"!* unit cell volume = ",f20.12)') univol
       end if
       imchi3 = 2.0d0*imchi3/(sum_qwgt_ek*univol)
    end if

    call kkt_NLO(e_step,nstep)
    call dealloc_nlo_arrays
    call dealloc_trm
    if(printable) write(nfout,'(1x,"<< UVSOR-Epsilon   NONLINEAR OPTICS CALCULATION END >>",/)')
 end subroutine calc_nlo

 subroutine alloc_nlo_arrays
    allocate(ptrm(kv3_ek,neg,neg,3,2)); ptrm = 0.0d0
 end subroutine alloc_nlo_arrays
 
 subroutine calc_ptrm
    implicit none
!
!   calculate P-transition moment from R-transition moment
!
!   rtrm: R-transition moment
!   ptrm: P-transition moment
    integer                       :: ik, ieig1, ieig2, i1, i2, ifind
    real(kind=DP)                 :: e1, e2, e21
    real(kind=DP), dimension(3,2) :: rtrm
! calculate P transition moment
! T. Hamada (Univ. Tokyo), 2005.01.18
    do ik = 1, kv3_ek
       do ieig1 = 1, neg
          e1 = eb_ek(ik,ieig1)
          do ieig2 = 1, neg
             e2 = eb_ek(ik,ieig2)
             e21 = e2 - e1
             if(ieig1 == ieig2) then
                e21 = deg_omega
             else
! === KT_mod ================= 13.0R
!                if(dabs(e21) <= 1.0d-14) then
!                   if(e21>=0.0d0) then
!                       e21 = 1.0d-14
!                   else
!                       e21 = -1.0d-14
!                   end if
!                end if

                if(dabs(e21) <= delta_omega) then
                   if(e21>=0.0d0) then
                       e21 = delta_omega
                   else
                       e21 = -delta_omega
                   end if
                end if
! ========================== 13.0R
             end if
             call find_ind_vb_and_cb(ieig1,ieig2,i1,i2,ik,ifind)
             if(ifind/=1.and.printable) then
                write(nfout,'(1x,"!!* index of valence or conduction band is not found   UVSOR-Epsilon STOP at calc_ptrm")')
             end if
! rtrm -> <phi1|r|phi2>
! ptrm -> <phi1|p|phi2>
             rtrm(1:3,1:2) = trm(ik,i1,i2,1:3,1:2,1) + trm(ik,i1,i2,1:3,1:2,2)
             ptrm(ik,i1,i2,1:3,1) = -1.0d0*rtrm(1:3,2)*e21
             ptrm(ik,i1,i2,1:3,2) = rtrm(1:3,1)*e21
          end do
       end do
    end do
 end subroutine calc_ptrm

 subroutine sym_chi2(nfout,nstep)
    implicit none
!
! SHG susceptibility symmetrization
!
    integer, intent(in)      :: nfout, nstep
    integer                  :: istep, iopr, index
    integer                  :: ibkt, i, j, k, l, m, n, index0
    real(DP), dimension(18)  :: wkimchi2
    real(DP), dimension(3,3) :: u, tu
    do istep = 1, nstep
       wkimchi2 = 0.0d0
       do index = 1, 18
          call get_c_and_bkt_index(index,i,ibkt)
          call get_cindex(ibkt,j,k)
          do iopr = 1, nopr
             u(1:3,1:3) = op(1:3,1:3,iopr)
!!           tu(1:3,1:3) = transpose(u)
             do l = 1, 3
                do m = 1, 3
                   do n = 1, 3
                      call get_cind_of_chi2(l,m,n,index0)
                      wkimchi2(index) = wkimchi2(index) + u(i,l)*u(j,m)*u(k,n)*imchi2(istep,index0)
                   end do
                end do
             end do
          end do
       end do
       imchi2(istep,1:18) = wkimchi2(1:18)/real(nopr, kind=DP)
    end do
    write(nfout,'(1x,"!* SHG susceptibility tensor is symmetrized")')
 end subroutine sym_chi2

 subroutine sym_chi3(nfout,nstep)
    implicit none
!
! THG susceptibility symmetrization
!
! coded by T. Hamada 2005.10.19
    integer, intent(in)          :: nfout, nstep
    integer                      :: istep, iopr, index, index0, i, ibkt, j, k, l, m, n, o, p
    real(DP), dimension(30)      :: wkimchi3
    real(DP), dimension(3,3)     :: u
    do istep = 1, nstep
       wkimchi3 = 0.0d0
       do index = 1, 30
          call get_c_and_bkt3_index(index,i,ibkt)
          call get_cindex3(ibkt,j,k,l)
          do iopr = 1, nopr
             u(1:3,1:3) = op(1:3,1:3,iopr)
             do m = 1, 3
                do n = 1, 3
                   do o = 1, 3
                      do p = 1, 3
                         call get_cind_of_chi3(m,n,o,p,index0)
                         wkimchi3(index) = wkimchi3(index) + u(i,m)*u(j,n)*u(k,o)*u(l,p)*imchi3(istep,index0)
                      end do
                   end do
                end do
             end do
          end do
       end do
       imchi3(istep,1:30) = wkimchi3(1:30)/real(nopr, kind=DP)
    end do
    write(nfout,'(1x,"!* THG susceptibility tensor is symmetrized")')
 end subroutine sym_chi3

 subroutine kkt_NLO(delta,n)
    implicit none
!   Kramers-Kronig transformation of SHG susceptibilities chi2
!                                 or THG susceptibilities chi3
!   e:     photon energy
!   delta: photon energy step
!   imchi2: imaginary part of chi2
!   rechi2: real part of chi2
!   imchi3: imaginary part of chi3
!   rechi3: real part of chi3
!
!   modification of subroutine kkt_v   T. Hamada(Univ. Tokyo) 2005.10.19
!
    integer                :: i, j, k, l, n
    integer                :: ipes
    integer                :: local_n, local_n0, local_l, lstart, nl, lst, led
    real(DP), intent(in)   :: delta
    real(DP)               :: fac,fac1
    real(DP),dimension(18) :: sums
    real(DP),dimension(30) :: sumt
    real(DP), allocatable, dimension(:,:) :: rechi2_wk, rechi3_wk

    if(printable) write(nfout,'(1x," npes = ",i4)') npes
    if(printable) write(nfout,'(1x," n = ",i5)') n
    fac1=delta/PAI

! MPI setting
    local_n = n/npes
    local_n0 = local_n + (n-local_n*npes)
    if(mype == 0) then
      lstart = 1
      nl = local_n0
    else
      lstart = 1 + (mype-1)*local_n + local_n0
      nl = local_n
    end if

! perform KKT
! SHG case
    if(nlo==1) then
       call mpi_bcast(imchi2,n*18,mpi_double_precision,0,mpi_comm_group,ierr)
       allocate(rechi2_wk(local_n0,18)) ; rechi2_wk = 0.0d0

       do local_l=1, nl
          sums=0.0d0
          l = lstart + local_l -1
          do  k=1,n
             if(l==k) cycle
             fac=e(k)/(e(k)**2-e(l)**2)
             sums(1:18)=sums(1:18)+fac*imchi2(k,1:18)
          end do
          rechi2_wk(local_l,1:18)=sums(1:18)*2.0d0*fac1
       end do
  
       if(mype == 0) then
          rechi2(1:local_n0,1:18) = rechi2_wk(1:local_n0,1:18)
          if ( ipriepsilon >=2 ) then
             write(nfout,'(1x,"!* MPI",i5," data have been processed by ipes = 0")') local_n0
          endif
          do ipes = 2, npes
             call mpi_recv(rechi2_wk,local_n0*18,mpi_double_precision,ipes-1,1,mpi_comm_group,istatus,ierr)
             lst = 1 + (ipes-2)*local_n + local_n0
             led = lst + local_n -1
             if ( ipriepsilon >=2 ) then
                write(nfout,'(1x,"!* MPI",i5," data have been received from ipes = ",i4)') local_n, ipes
             endif
          end do
       else
          call mpi_send(rechi2_wk,local_n0*18,mpi_double_precision,0,1,mpi_comm_group,ierr)
       end if

       deallocate(rechi2_wk)
       if(printable) &
       & write(nfout,'(1x,"!* real part of SHG susceptibility is obtained ")')
! THG case
    else if(nlo==2) then
       call mpi_bcast(imchi3,n*30,mpi_double_precision,0,mpi_comm_group,ierr)
       allocate(rechi3_wk(local_n0,30)) ; rechi3_wk = 0.0d0

       do local_l=1, nl
          sumt=0.0d0
          l = lstart + local_l -1
          do  k=1,n
             if(l==k) cycle
             fac=e(k)/(e(k)**2-e(l)**2)
             sumt(1:30)=sumt(1:30)+fac*imchi3(k,1:30)
          end do
          rechi3_wk(local_l,1:30)=sumt(1:30)*2.0d0*fac1
       end do

       if(mype == 0) then
          rechi3(1:local_n0,1:30) = rechi3_wk(1:local_n0,1:30)
          if ( ipriepsilon >=2 ) then
             write(nfout,'(1x,"!*MPI",i5," data have been processed by ipes = 0")') local_n0
          endif
          do ipes = 2, npes
             call mpi_recv(rechi3_wk,local_n0*30,mpi_double_precision,ipes-1,1,mpi_comm_group,istatus,ierr)
             lst = 1 + (ipes-2)*local_n + local_n0
             led = lst + local_n -1
             if ( ipriepsilon >=2 ) then
                write(nfout,'(1x,"!* MPI",i5," data have been received from ipes = ",i4)') local_n, ipes
             endif
          end do
       else
          call mpi_send(rechi3_wk,local_n0*30,mpi_double_precision,0,1,mpi_comm_group,ierr)
       end if

       deallocate(rechi3_wk)
       if(printable) &
       & write(nfout,'(1x,"!* real part of THG susceptibility is obtained ")')
    end if
 end subroutine kkt_NLO

 subroutine virt_elec_process_SHG
    implicit none
    if(way_BZintegral == PARABOLIC_B.or.way_BZintegral == GAUSSIAN_B) then
          call smearing_SHG_ve(nfout,nstep)
    end if
    if(way_BZintegral == L_TETRAHEDRON) then
!   for virt_ex_type = ALL_TYPE case, electron and hole term is calculated
       call linear_tetrahedron_SHG(nfout,nstep)
    end if
 end subroutine virt_elec_process_SHG

 subroutine virt_hole_process_SHG
   implicit none
   integer :: istep
    if(way_BZintegral == PARABOLIC_B.or.way_BZintegral == GAUSSIAN_B) then
       call smearing_SHG_vh(nfout,nstep)
    end if
    if(way_BZintegral == L_TETRAHEDRON) then
!   if virt_ex_type == ALL_TYPE) calculation is skipped
       if(virt_ex_type == HOLE) call linear_tetrahedron_SHG(nfout,nstep)
    end if
 end subroutine virt_hole_process_SHG

 subroutine smearing_SHG_ve(nfout,nstep)
    implicit none
!
!   smearing calculation scheme for SHG susceptibility (electron term)
!
!   ref. Ed. Ghahramani, D. J. Moss, J. E. Sipe, Physical Review B vol. 43, 8990 (1991).
!   coded by T. Hamada(Univ. Tokyo) 2005.01.20

    integer, intent(in) :: nfout, nstep
    integer             :: ispin, istep, ipes
    real(kind=DP), dimension(nstep,18) :: chi2intA, chi2intBC

    if(printable) then
       if(way_BZintegral==PARABOLIC_B) then
          write(nfout,'(1x, " ---------- Parabolic broadening Brillouin zone integration ----------")')
       end if
       if(way_BZintegral==GAUSSIAN_B) then
          write(nfout,'(1x, " ---------- Gaussian broadening Brillouin zone integration ----------")')
       end if
       write(nfout,'(1x, "  width = ",f10.5, " Hartree")') width
       if(dres_method == OMIT) then
         write(nfout,'(1x, "  contributions from double resonance transitions are omitted")')
         write(nfout,'(1x, "  cut-off for omittion = ",f10.5," Hartree")') dres_cut_off
       endif
       if(dres_method == DAMPING) then
         write(nfout,'(1x, "  contributions from double resonance transitions are damped")')
         write(nfout,'(1x, "  damping factor = ",f10.5," Hartree")') dres_cut_off
       endif
       if(way_BZintegral /= L_TETRAHEDRON.and.smearing_fact == RESONANCE) &
       & write(nfout,'(1x, "  smearing factor type = resonance")')
       if(way_BZintegral /=L_TETRAHEDRON.and.smearing_fact == OFF_RESONANCE) &
       & write(nfout,'(1x, "  smearing factor type = off_resonance")')
       write(nfout,'(1x, "*excitation  = electron ")')
       if(nlo_term == ALL_TERM.or.nlo_term == OMEGA_TERM) write(nfout,'(1x,"  integration of omega SHG moment")')
       if(nlo_term == ALL_TERM.or.nlo_term == OMEGA2_TERM) write(nfout,'(1x,"  integration of omega2 SHG moment")')
    end if

!  gaussian/parabolic smearing
    do ispin = 1, nspin
       if(spin==MAJOR.and.ispin/=major_spin) cycle
       if(spin==MINOR.and.ispin/=minor_spin) cycle
       call smearing_SHG_ve_termA                                    ! -(contained here)
       imchi2(1:nstep,1:18) = imchi2(1:nstep,1:18) - PAI/2.0d0*chi2intA(1:nstep,1:18)
       call smearing_SHG_ve_termBC                                   ! -(contained here)
       imchi2(1:nstep,1:18) = imchi2(1:nstep,1:18) - PAI/2.0d0*chi2intBC(1:nstep,1:18)

       if(printable) then
          if(spin==MAJOR) write(nfout,'(1x," ispin = major spin")')
          if(spin==MINOR) write(nfout,'(1x," ispin = minor spin")')
          write(nfout,'(2x,"ispin = ",i3)') ispin
       end if
    end do

  contains
   subroutine smearing_SHG_ve_termA
     implicit none
!
!   calculate virtual electron term A of Im[chi2]
!
!   term A -> Im[pjl{pli,pij}]*delta(Eli-omega)/Eli**3(Eli + Eji) term
!
!   omega : photon energy
!   p     : tansition moment(P)
!   i     : valence band index ; j,l : conduction band index
!   Eab   : energy difference between band ea and eb
!   imbranket : Im[pjl{pli,pij}]
!
!   T. Hamada (Univ. Tokyo) 2006.10.23
!
     integer                        :: ik, ik2, ieig, jeig, leig, iterm
     real(kind=DP)                  :: wspin = 1.0d0, weight, ei, ej, el, eli, eji
     real(kind=DP)                  :: omega, c2, sigma, efact
     real(kind=DP), dimension(3,2)  :: p1, p2, p3
     real(kind=DP), dimension(18)   :: impbkt, pefact

     chi2intA = 0.0d0
!    returm if nlo_term = OMEGA2_TERM
     if(nlo_term == OMEGA2_TERM) return

! SHG ve term A calculation
     iterm = 1
     do ik = ispin, kv3_ek-nspin+ispin, nspin
        ik2 = (ik-1)/nspin + 1
        if(map_k_eps(ik2)/=mype) cycle
        do ieig = 1, neg
           if(band_type(ieig,ispin)/= FILLED_BAND) cycle
           ei = eb_ek(ik,ieig)
           do jeig = 1, neg
              if(band_type(jeig,ispin)/=UNFILLED_BAND) cycle
              ej = eb_ek(ik,jeig)
              do leig = 1, neg
                 if(band_type(leig,ispin)/=UNFILLED_BAND) cycle
                 el = eb_ek(ik,leig)
                 if(jeig == leig) cycle
! set energy difference
                 eli = el - ei + scissor
                 eji = ej - ei + scissor
! set moment factor
                 call set_ptrans(ik,leig,ieig,jeig,p1,p2,p3)         ! set P transition moment
                 call calc_impbkt(p1,p2,p3,impbkt)
                 if(smearing_fact == OFF_RESONANCE) then
                    if(dres_method == OMIT) call calc_SHG_energy_factor_omit(eli,eji,efact,iterm)
                    if(dres_method == DAMPING) call calc_SHG_energy_factor_damp(eli,eji,efact,iterm)
                    pefact = impbkt*efact
                 end if
                 do istep = 1, nstep
                    omega = e(istep)
                    if(smearing_fact == RESONANCE) then
                       if(omega==0.0d0) omega=1.0d-4
                       if(dres_method == OMIT) call calc_SHG_energy_factor_omit(omega,eji,efact,iterm)
                       if(dres_method == DAMPING) call calc_SHG_energy_factor_damp(omega,eji,efact,iterm)
                       pefact = impbkt*efact
                    end if
                    if(way_BZintegral == PARABOLIC_B) call width2(omega,eli,width,c2,weight)                ! b_Fermi
                    if(way_BZintegral == GAUSSIAN_B)  call gaussian_smearing_core(omega,eli,width,c2,sigma)
                    chi2intA(istep,1:18) = chi2intA(istep,1:18) + c2*wspin*pefact*qwgt_ek(ik)
                 end do
              end do
           end do
        end do
     end do
   end subroutine smearing_SHG_ve_termA

   subroutine smearing_SHG_ve_termBC
     implicit none
!
!   calculate virtual electron terms B and C of Im[chi2]
!
!   term B = Im[pij{pjl,pli}]*delta(Eli-omega)/Eli**3*(2Eli - Eji)
!   term C = 16*Im[pij{pjl,pli}]*delta(Eji-2omega)/Eji**3*(Eji-2omega)
!
!   omega : photon energy
!   p     : tansition moment(P)
!   i     : valence band index ; j,l : conduction band index
!   Eab   : energy difference between band ea and eb
!   imbranket : Im[pjl{pli,pij}]
!
!   T. Hamada (Univ. Tokyo) 2006.10.23
!
     integer                        :: ik, ik2, ieig, jeig, leig, iterm
     real(kind=DP)                  :: wspin = 1.0d0, weight, ei, ej, el, eli, eji
     real(kind=DP)                  :: omega, omega2, c2, sigma, efact, efact1
     real(kind=DP), dimension(3,2)  :: p1, p2, p3
     real(kind=DP), dimension(18)   :: impbkt, pefact

     chi2intBC = 0.0d0
! SHG ve term B and C calculation
     do ik = ispin, kv3_ek-nspin+ispin, nspin
        ik2 = (ik-1)/nspin + 1
        if(map_k_eps(ik2)/=mype) cycle
        do ieig = 1, neg
           if(band_type(ieig,ispin)/= FILLED_BAND) cycle
           ei = eb_ek(ik,ieig)
           do jeig = 1, neg
              if(band_type(jeig,ispin)/=UNFILLED_BAND) cycle
              ej = eb_ek(ik,jeig)
              do leig = 1, neg
                 if(band_type(leig,ispin)/=UNFILLED_BAND) cycle
                 el = eb_ek(ik,leig)
                 if(jeig == leig) cycle
! set energy difference
                 eli = el - ei + scissor
                 eji = ej - ei + scissor
! set moment factor
                 call set_ptrans(ik,jeig,leig,ieig,p1,p2,p3)          ! set P transition moment
                 call calc_impbkt(p1,p2,p3,impbkt)
! SHG ve term B calculation
                 if(nlo_term /= OMEGA2_TERM) then
                    iterm = 2
                    if(smearing_fact == OFF_RESONANCE) then
                       if(dres_method == OMIT) call calc_SHG_energy_factor_omit(eli,eji,efact,iterm)
                       if(dres_method == DAMPING) call calc_SHG_energy_factor_damp(eli,eji,efact,iterm)
                       pefact = impbkt*efact
                    end if
                    do istep = 1, nstep
                       omega  = e(istep)
                       if(smearing_fact == RESONANCE) then
                          if(omega==0.0d0) omega=1.0d-4
                          if(dres_method == OMIT) call calc_SHG_energy_factor_omit(omega,eji,efact,iterm)
                          if(dres_method == DAMPING) call calc_SHG_energy_factor_damp(omega,eji,efact,iterm)
                          pefact = impbkt*efact
                       end if
                       if(way_BZintegral == PARABOLIC_B) call width2(omega,eli,width,c2,weight)          ! b_Fermi
                       if(way_BZintegral == GAUSSIAN_B) call gaussian_smearing_core(omega,eli,width,c2,sigma)
                       chi2intBC(istep,1:18) = chi2intBC(istep,1:18) + c2*wspin*pefact*qwgt_ek(ik)
                    end do
                 end if
! SHG ve term C calculation
                 if(nlo_term /= OMEGA_TERM) then
                    iterm = 3
                    if(smearing_fact == OFF_RESONANCE) then
                       if(dres_method == OMIT) call calc_SHG_energy_factor_omit(eli,eji,efact,iterm)
                       if(dres_method == DAMPING) call calc_SHG_energy_factor_damp(eli,eji,efact,iterm)
                       pefact = impbkt*efact
                    end if
                    do istep = 1, nstep
                       omega2 = 2.0d0*e(istep)
                       if(smearing_fact == RESONANCE) then
                          if(omega2 == 0.0d0) omega2=1.0d-4
                          if(dres_method == OMIT) call calc_SHG_energy_factor_omit(eli,omega2,efact,iterm)
                          if(dres_method == DAMPING) call calc_SHG_energy_factor_damp(eli,omega2,efact,iterm)
                          pefact = impbkt*efact
                       end if
                       if(way_BZintegral == PARABOLIC_B) call width2(omega2,eji,width,c2,weight)         ! b_Fermi
                       if(way_BZintegral == GAUSSIAN_B) call gaussian_smearing_core(omega2,eji,width,c2,sigma)
                       chi2intBC(istep,1:18) = chi2intBC(istep,1:18) + 16.0d0*c2*wspin*pefact*qwgt_ek(ik)
                    end do
                 end if
              end do
           end do
        end do
     end do
   end subroutine smearing_SHG_ve_termBC
 end subroutine smearing_SHG_ve

 subroutine smearing_SHG_vh(nfout,nstep)
    implicit none
!
!   smearing calculation scheme for SHG susceptibility calculation (virtual hole term)
!
    integer, intent(in) :: nfout, nstep
    integer             :: ispin, istep, ipes
    real(kind=DP), dimension(nstep,18) :: chi2intA, chi2intBC

    if(printable) then
       if(virt_ex_type /= ALL_TYPE) then
          if(way_BZintegral==PARABOLIC_B) then
             write(nfout,'(1x, " ---------- Parabolic broadening Brillouin zone integration ----------")')
          end if
          if(way_BZintegral==GAUSSIAN_B) then
             write(nfout,'(1x, " ---------- Gaussian broadening Brillouin zone integration ----------")')
          end if
          write(nfout,'(1x, "  width = ",f10.5)') width
          if(dres_method == OMIT) then
             write(nfout,'(1x, "  contributions from double resonance transitions are omitted")')
             write(nfout,'(1x, "  cut-off for omittion = ",f10.5," Hartree")') dres_cut_off
          endif
          if(dres_method == DAMPING) then
             write(nfout,'(1x, "  contributions from double resonance transitions are damped")')
             write(nfout,'(1x, "  damping factor = ",f10.5," Hartree")') dres_cut_off
          endif
          if(way_BZintegral /= L_TETRAHEDRON.and.smearing_fact == RESONANCE) &
         & write(nfout,'(1x, "  smearing factor type = resonance")')
          if(way_BZintegral /= L_TETRAHEDRON.and.smearing_fact == OFF_RESONANCE) &
         & write(nfout,'(1x, "  smearing factor type = off_resonance")')
         write(nfout,'(1x, "*excitation = hole ")')
      else
         write(nfout,'(1x, "*excitation = hole ")')
      end if
      if(nlo_term == ALL_TERM.or.nlo_term == OMEGA_TERM) write(nfout,'(1x,"  integration of omega SHG moment")')
      if(nlo_term == ALL_TERM.or.nlo_term == OMEGA2_TERM) write(nfout,'(1x,"  integration of omega2 SHG moment")')
    end if

!  gaussian/parabolic smearing
    do ispin = 1, nspin
       if(spin==MAJOR.and.ispin/=major_spin) cycle
       if(spin==MINOR.and.ispin/=minor_spin) cycle
       call smearing_SHG_vh_termA                                    ! -(contained here)
       imchi2(1:nstep,1:18) = imchi2(1:nstep,1:18) + 0.5d0*PAI*chi2intA(1:nstep,1:18)
       call smearing_SHG_vh_termBC                                   ! -(contained here)
       imchi2(1:nstep,1:18) = imchi2(1:nstep,1:18) + 0.5d0*PAI*chi2intBC(1:nstep,1:18)
       if(printable) then
          if(spin==MAJOR) write(nfout,'(1x," ispin = major spin")')
          if(spin==MINOR) write(nfout,'(1x," ispin = minor spin")')
          write(nfout,'(2x,"ispin = ",i3)') ispin
       end if
    end do

  contains
   subroutine smearing_SHG_vh_termA
     implicit none
!
!   calculate virtual hole term A of Im[chi2]
!
!   term A = Im[pli{pij,pjl}]*delta(Ejl-omega)/Ejl**3*(Ejl + Eji)
!
!   omega : photon energy
!   p     : tansition moment(P)
!   i,l   : valence band index ; j : conduction band index
!   Eab   : energy difference between band ea and eb
!   imbranket : Im[pjl{pli,pij}]
!
!   T. Hamada (Univ. Tokyo) 2005.11.11
!
     integer                        :: ik, ik2, ieig, jeig, leig, iterm
     real(kind=DP)                  :: wspin = 1.0d0, weight, ei, ej, el, ejl, eji
     real(kind=DP)                  :: omega, c2, sigma, efact
     real(kind=DP), dimension(3,2)  :: p1, p2, p3
     real(kind=DP), dimension(18)   :: impbkt, pefact

     chi2intA = 0.0d0
     if(nlo_band == INTRA_BAND) return
     if(nlo_term == OMEGA2_TERM) return

! SHG vh term A calculation
     iterm = 1
     do ik = ispin, kv3_ek-nspin+ispin, nspin
        ik2 = (ik-1)/nspin + 1
        if(map_k_eps(ik2)/=mype) cycle
        do ieig = 1, neg
           if(band_type(ieig,ispin)/= FILLED_BAND) cycle
           ei = eb_ek(ik,ieig)
           do jeig = 1, neg
              if(band_type(jeig,ispin)/=UNFILLED_BAND) cycle
              ej = eb_ek(ik,jeig)
              do leig = 1, neg
                 if(band_type(leig,ispin)/=FILLED_BAND) cycle
                 el = eb_ek(ik,leig)
                 if(leig == ieig) cycle
! set energy difference
                 ejl = ej - el + scissor
                 eji = ej - ei + scissor
! set moment factor
                 call set_ptrans(ik,ieig,jeig,leig,p1,p2,p3)   ! set P transition moment
                 call calc_impbkt(p1,p2,p3,impbkt)
                 if(smearing_fact == OFF_RESONANCE) then
                    if(dres_method == OMIT) call calc_SHG_energy_factor_omit(ejl,eji,efact,iterm)
                    if(dres_method == DAMPING) call calc_SHG_energy_factor_damp(ejl,eji,efact,iterm)
                    pefact = impbkt*efact
                 end if
! SHG vh term A calculation
                 do istep = 1, nstep
                    omega = e(istep)
                    if(smearing_fact == RESONANCE) then
                       if(omega==0.0d0) omega=1.0d-4
                       if(dres_method == OMIT) call calc_SHG_energy_factor_omit(omega,eji,efact,iterm)
                       if(dres_method == DAMPING) call calc_SHG_energy_factor_damp(omega,eji,efact,iterm)
                       pefact = impbkt*efact
                    end if
                    if(way_BZintegral == PARABOLIC_B) call width2(omega,ejl,width,c2,weight)               ! b_Fermi
                    if(way_BZintegral == GAUSSIAN_B)  call gaussian_smearing_core(omega,ejl,width,c2,sigma)
                    chi2intA(istep,1:18) = chi2intA(istep,1:18) + c2*wspin*pefact*qwgt_ek(ik)
                 end do
              end do
           end do
        end do
     end do
   end subroutine smearing_SHG_vh_termA

   subroutine smearing_SHG_vh_termBC
     implicit none
!
!   calculate virtual hole terms B and C of Im[chi2]
!
!   term B = Im[pij{pjl,pli}]*delta(Ejl-omega)/Ejl**3*(2.0d0*Ejl - Eji)
!   term C = 16.0d0*Im[pij{pjl,pli}]*delta(Eji-2.0d0*omega)/Eji**3*(Ejl - Eji)
!
!   omega : photon energy
!   p     : tansition moment(P)
!   i,l   : valence band index ; j : conduction band index
!   Eab   : energy difference between band ea and eb
!
!   T. Hamada (Univ. Tokyo) 2005.11.11
!
     integer                        :: ik, ik2, ieig, jeig, leig, iterm
     real(kind=DP)                  :: wspin = 1.0d0, weight, ei, ej, el, ejl, eji, ejl2
     real(kind=DP)                  :: omega, omega2, c2, sigma, efact
     real(kind=DP), dimension(3,2)  :: p1, p2, p3
     real(kind=DP), dimension(18)   :: impbkt, pefact

     chi2intBC = 0.0d0
     if(nlo_band == INTRA_BAND) return

     do ik = ispin, kv3_ek-nspin+ispin, nspin
        ik2 = (ik-1)/nspin + 1
        if(map_k_eps(ik2)/=mype) cycle
        do ieig = 1, neg
           if(band_type(ieig,ispin)/= FILLED_BAND) cycle
           ei = eb_ek(ik,ieig)
           do jeig = 1, neg
              if(band_type(jeig,ispin)/=UNFILLED_BAND) cycle
              ej = eb_ek(ik,jeig)
              do leig = 1, neg
                 if(band_type(leig,ispin)/=FILLED_BAND) cycle
                 el = eb_ek(ik,leig)
                 if(leig == ieig) cycle
! set energy difference
                 ejl = ej - el + scissor
                 eji = ej - ei + scissor
! set moment factor
                 call set_ptrans(ik,jeig,leig,ieig,p1,p2,p3)         ! set P transition moment
                 call calc_impbkt(p1,p2,p3,impbkt)
! SHG vh term B calculation
                 if(nlo_term /= OMEGA2_TERM) then
                    iterm=2
                    if(smearing_fact == OFF_RESONANCE) then
                       if(dres_method == OMIT) call calc_SHG_energy_factor_omit(ejl,eji,efact,iterm)
                       if(dres_method == DAMPING) call calc_SHG_energy_factor_damp(ejl,eji,efact,iterm)
                       pefact = impbkt*efact
                    end if
                    do istep = 1, nstep
                       omega = e(istep)
                       if(smearing_fact == RESONANCE) then
                          if(omega==0.0d0) omega=1.0d-4
                          if(dres_method == OMIT) call calc_SHG_energy_factor_omit(omega,eji,efact,iterm)
                          if(dres_method == DAMPING) call calc_SHG_energy_factor_damp(omega,eji,efact,iterm)
                          pefact = impbkt*efact
                       end if
                       if(way_BZintegral == PARABOLIC_B) call width2(omega,ejl,width,c2,weight)        ! b_Fermi
                       if(way_BZintegral == GAUSSIAN_B)  call gaussian_smearing_core(omega,ejl,width,c2,sigma)
                       chi2intBC(istep,1:18) = chi2intBC(istep,1:18) + c2*wspin*pefact*qwgt_ek(ik)
                    end do
                 end if
! SHG vh term C calculation
                 if(nlo_term /= OMEGA_TERM) then
                    iterm=3
                    if(smearing_fact == OFF_RESONANCE) then
                       if(dres_method == OMIT) call calc_SHG_energy_factor_omit(ejl,eji,efact,iterm)
                       if(dres_method == DAMPING) call calc_SHG_energy_factor_damp(ejl,eji,efact,iterm)
                       pefact=impbkt*efact
                    end if
                    do istep = 1, nstep
                       omega2 = 2.0d0*e(istep)
                       if(smearing_fact == RESONANCE) then
                          if(omega2==0.0d0) omega2=1.0d-4
                          if(dres_method == OMIT) call calc_SHG_energy_factor_omit(ejl,omega2,efact,iterm)
                          if(dres_method == DAMPING) call calc_SHG_energy_factor_damp(ejl,omega2,efact,iterm)
                          pefact=impbkt*efact
                       end if
                       if(way_BZintegral == PARABOLIC_B) call width2(omega2,eji,width,c2,weight)         ! b_Fermi
                       if(way_BZintegral == GAUSSIAN_B)  call gaussian_smearing_core(omega2,eji,width,c2,sigma)
                       chi2intBC(istep,1:18) = chi2intBC(istep,1:18) + 16.0d0*c2*wspin*pefact*qwgt_ek(ik)
                    end do
                  end if
              end do
           end do
        end do
     end do
  end subroutine smearing_SHG_vh_termBC
 end subroutine smearing_SHG_vh

 subroutine linear_tetrahedron_SHG(nfout,nstep)
    implicit none
!
!   linear tetrahedron scheme for SHG susceptibility calculation
!
!   virt_ex_type = ALL_TYPE case : sum of electron and hole terms of chi2
!                = ELECTRON case : electron term of chi2
!                = HOLE     case : hole term of chi2
!
!   Tomoyuki Hamada, Univ. Tokyo  July 7, 2006
!
    integer, intent(in)                   :: nfout,nstep
    integer                               :: nstep_min, nstep_max
    integer                               :: i, n0, i0
    integer                               :: n_start, n_end, nst, n_start_mpi, n_end_mpi, nstep_l, nstep_l0, ipes
    integer                               :: neig,ispin,ip2,ik,ieig,nxx,nyy,nzz,ip,istep, nistep0
    real(DP)                              :: edmax0
    real(DP), allocatable, dimension(:,:,:)   :: eig2,eig2_mpi
    real(DP), allocatable, dimension(:)       :: eia, eja, ela
    real(DP), allocatable, dimension(:)       :: deawk1,deawk2
    real(DP), allocatable, dimension(:,:)     :: impbktawk1, impbktawk2
    real(DP), allocatable, dimension(:)   :: ed
    real(DP), allocatable, dimension(:,:) :: chi2int
    real(DP), allocatable, dimension(:,:) :: imchi2_mpi
    real(DP), dimension(3)                :: a,b,c
    integer                               :: id_sname = -1
    logical                               :: called, called2


    allocate(eig2(np2,neg,nspin)); eig2 = 0.d0
    allocate(eig2_mpi(np2,neg,nspin)); eig2_mpi = 0.d0        ! MPI
    allocate(deawk1(np0)); deawk1 =0.0d0
    allocate(deawk2(np0)); deawk2 = 0.d0
    allocate(eia(np0)); eia=0.0d0
    allocate(eja(np0)); eja=0.0d0
    allocate(ela(np0)); ela=0.0d0
    allocate(impbktawk1(np0,18)) ; impbktawk1=0.0d0
    allocate(impbktawk2(np0,18)) ; impbktawk2=0.0d0
    !allocate(ed(nistep)); ed = 0.0d0

    if(printable) then
       write(nfout,'(1x, " ----------- Linear tetrahedron Brillouin zone integration ----------")')
       write(nfout,*) '!* by T. Hamada (Univ. Tokyo), 2006'
       write(nfout,'(2x,"nistep = ",i3)') nistep
       write(nfout,'(2x,"eps = ",e12.5," hartree")') tetra_eps
    end if

! set up parameters
    nxx = nxyz_tetra(1)
    nyy = nxyz_tetra(2)
    nzz = nxyz_tetra(3)
!
    neig=neg

    call set_eigenvalues_ek(nfout,eig2)

    if(ipriepsilon>=2.and.printable) then
       write(nfout,'(1x,"!* eigenvalues used in the linear tetrahedron scheme")')
       do ispin = 1, nspin
          write(nfout,'(1x," ispin = ",i3)') ispin
          do ip2 = 1, np2
             write(nfout,'(1x,"  ik = ",i3)') ip2
             write(nfout,'(5x,5f10.5)') (eig2(ip2,ieig,ispin),ieig=1,neig)
          end do
       end do
    end if

! electron term
    if(virt_ex_type/=HOLE) then
       called = .false. ; called2 =.false.
       if(printable) write(nfout,'(1x,"* excitation = electron")')
       do ispin=1,nspin
          if(spin==MAJOR.and.ispin/=major_spin) cycle
          if(spin==MINOR.and.ispin/=minor_spin) cycle
          call set_nstaend
          nistep0 = n_end - n_start + 1
          if(printable) then
             write(nfout,'(1x," nstep_l0 = ",i4,3x,"nstep_l =",i4)')  nstep_l0, nstep_l
             write(nfout,'(1x," n_start = ",i4,1x," n_end = ",i4)') n_start, n_end
             write(nfout,'(1x," nst = ",i4)') nst
             write(nfout,'(1x," nstep_min= ",i4,3x," nstep_max = ",i4)') nstep_min, nstep_max
             write(nfout,'(1x," emin for impes = ",f10.5,1x,"eV")') (e_low + (nstep_min - 1)*e_step)*hartree_in_eV
             write(nfout,'(1x," emax for imeps = ",f10.5,1x,"eV")') (e_low + (nstep_max - 1)*e_step)*hartree_in_eV
          end if
!!$          if(nistep0 < nistep) then
!!$             write(nfout,'(1x," nistep is reduced to ",i4)')  nistep0
!!$          end if
          nistep = nistep0
          if(printable) write(nfout,'(1x," nistep = ",i4)')  nistep
          allocate(ed(nistep)) ; ed =0.0d0
          allocate(chi2int(nistep,18)); chi2int = 0.0d0
          do i=n_start, n_end, nistep
             !ed = 0.0d0
             ed(1:nistep) = e(i:i+nistep-1)
             call nsdos3_m_SHG_ve(nfout,ed,nistep,nxx,nyy,nzz,eig2,ispin,ip20,np0,&
                         & deawk1,deawk2,impbktawk1,impbktawk2,eia,eja,ela,ip2cub,chi2int,called,called2)
             if(i+nistep-1 <= nst) then
                imchi2(i:i+nistep-1,1:18)=imchi2(i:i+nistep-1,1:18) - PAI/2.0d0*chi2int(1:nistep,1:18)
             else
                n0 = (nst/nistep)*nistep
                i0 = nst - n0
                imchi2(n0+1:n0+i0,1:18)= imchi2(n0+1:n0+i0,1:18) - PAI/2.0d0*chi2int(1:i0,1:18)
             end if
          end do
          if(printable) then
             if(spin==MAJOR) write(nfout,'(1x," ispin = major spin")')
             if(spin==MINOR) write(nfout,'(1x," ispin = minor spin")')
             write(nfout,*) ' ispin=',ispin
          end if
          deallocate(chi2int)
          deallocate(ed)
       end do
    end if

! hole term
    if(virt_ex_type/=ELECTRON) then
       called = .false. ; called2 =.false.
       if(printable) write(nfout,'(1x,"* excitation = hole")')
       do ispin=1,nspin
          if(spin==MAJOR.and.ispin/=major_spin) cycle
          if(spin==MINOR.and.ispin/=minor_spin) cycle
          call set_nstaend
          nistep0 = n_end - n_start + 1
          if(printable) then
             write(nfout,'(1x," nstep_l0 = ",i4,3x,"nstep_l =",i4)')  nstep_l0, nstep_l
             write(nfout,'(1x," nstep_min= ",i4,3x," nstep_max = ",i4)') nstep_min, nstep_max
             write(nfout,'(1x," emin for impes = ",f10.5,1x,"eV")') (e_low + (nstep_min - 1)*e_step)*hartree_in_eV
             write(nfout,'(1x," emax for imeps = ",f10.5,1x,"eV")') (e_low + (nstep_max - 1)*e_step)*hartree_in_eV
          end if
!!$          if(nistep0 < nistep) then
!!$             write(nfout,'(1x," nistep is reduced to ",i4)')  nistep0
!!$             nistep = nistep0
!!$          end if
          nistep = nistep0
          !write(nfout,'(1x," nistep = ",i4)')  nistep
          allocate(ed(nistep)) ; ed = 0.0d0
          allocate(chi2int(nistep,18)); chi2int = 0.0d0
          do i=n_start, n_end, nistep
             !ed = 0.0d0
             ed(1:nistep) = e(i:i+nistep-1)
             call nsdos3_m_SHG_vh(nfout,ed,nistep,nxx,nyy,nzz,eig2,ispin,ip20,np0,&
                         & deawk1,deawk2,impbktawk1,impbktawk2,eia,eja,ela,ip2cub,chi2int,called,called2)
             if(i+nistep-1 <= nst) then
                imchi2(i:i+nistep-1,1:18)=imchi2(i:i+nistep-1,1:18) + PAI/2.0d0*chi2int(1:nistep,1:18)
             else
                n0 = (nst/nistep)*nistep
                i0 = nst - n0
                imchi2(n0+1:n0+i0,1:18)= imchi2(n0+1:n0+i0,1:18) + PAI/2.0d0*chi2int(1:i0,1:18)
             end if
          end do
          if(printable) then
             if(spin==MAJOR) write(nfout,'(1x," ispin = major spin")')
             if(spin==MINOR) write(nfout,'(1x," ispin = minor spin")')
             write(nfout,*) ' ispin=',ispin
          end if
          deallocate(chi2int)
          deallocate(ed)
       end do
    end if

! MPI
    allocate(imchi2_mpi(nstep_l,18)) ; imchi2_mpi = 0.0d0
    if(mype == 0) then
!       if(printable) then
       if(printable .and. ipriepsilon>=2 ) then
          write(nfout,'(1x,i4,1x,"imchi2 data have been processed by ipes = 0")') nstep_l0
       endif
       do ipes = 2, npes
          n_start_mpi = nstep_min + nstep_l0 + (ipes-2)*nstep_l
          n_end_mpi = n_start_mpi + nstep_l - 1
          call mpi_recv(imchi2_mpi,nstep_l*18,mpi_double_precision,ipes-1,1,mpi_comm_group,istatus,ierr)
          imchi2(n_start_mpi:n_end_mpi,1:18) = imchi2_mpi(1:nstep_l,1:18)
!          if(printable) then
          if(ipriepsilon>=2 .and. printable) then
             write(nfout,'(1x,"!* MPI",i4,1x,"imchi2 data have been received from ipes = ",i4)') nstep_l, ipes
          endif
       end do
    else
       n_start_mpi = nstep_min + nstep_l0 + (mype-1)*nstep_l
       n_end_mpi = n_start_mpi + nstep_l - 1
       imchi2_mpi(1:nstep_l,1:18) = imchi2(n_start_mpi:n_end_mpi,1:18)
       call mpi_send(imchi2_mpi,nstep_l*18,mpi_double_precision,0,1,mpi_comm_group,istatus,ierr)
       if ( ipriepsilon >=2 ) then
          write(nfout,'(1x,"!*MPI",i4,1x,"imchi2 data have been sent to ipes = 0")') nstep_l
       endif
    end if
    call mpi_bcast(imchi2,nstep*18,mpi_double_precision,0,mpi_comm_group,ierr)
    deallocate(imchi2_mpi)

    deallocate(deawk1)
    deallocate(deawk2)
    deallocate(eia)
    deallocate(eja)
    deallocate(ela)
    deallocate(impbktawk1); deallocate(impbktawk2)
    deallocate(eig2)
    deallocate(eig2_mpi)
    !deallocate(ed)
    contains
     subroutine set_nstaend
!
!    set linear tetrahedron photon energy steps
!
        edmax0 = edmax_nspin(ispin)
        if(edmax0 > e_high) edmax0 = e_high
        nstep_max = int((edmax_nspin(ispin)-e_low + tetra_eps*2.0d0+scissor)/e_step)+1
        if (system /= METALLIC) then
           nstep_min = int((edmin_nspin(ispin)-e_low - tetra_eps*2.0d0+scissor)/e_step)/2-1
        else
           nstep_min = 1
        end if

        nstep_l = (nstep_max-nstep_min+1)/npes
        if(nstep_l < 1) then
           if(printable) &
          & write(nfout,'(1x,"!* npes is too large. should be less than",i4)') nstep_max-nstep_min
           stop
        end if
        nstep_l0 = nstep_l + ((nstep_max-nstep_min+1) - nstep_l*npes)
        if(mype == 0) then
           n_start =nstep_min
           n_end = n_start + nstep_l0 - 1
           nst = n_end
        else
           n_start = nstep_min + nstep_l0 + (mype-1)*nstep_l
           n_end = n_start + nstep_l - 1
           nst = n_end
        end if
     end subroutine set_nstaend
 end subroutine linear_tetrahedron_SHG

 subroutine nsdos3_m_SHG_ve(nfout,e,ni,nx,ny,nz,eig2,ispin,ip20,np0,&
        &                   dea1,dea2,impbkta1,impbkta2,eia,eja,ela,ip2cub,chi2int,called,called2)
!
!   calculate SHG susceptibility (electron term) by linear tetrahedron method
!
!   The original program is nsdos3 for dos calculation
!   Tomoyuki Hamada, Univ. Tokyo July, 14, 2006
!
    implicit none
!
    integer, intent(in)        :: nfout, ni, nx, ny, nz, ispin, np0
    integer, dimension(:)      :: ip20,ip2cub
    integer                    :: k0, ik2, termA, termB
    integer                    :: ieig, jeig, leig
    real(DP), dimension(:,:,:) :: eig2
    real(DP), dimension(:)     :: eia, eja, ela
    real(DP), dimension(:)     :: dea1, dea2
    real(DP), dimension(:,:)   :: impbkta1, impbkta2
    real(DP)                   :: ei, ej, el
    real(DP), dimension(:)     :: e
    real(DP), dimension(:,:)   :: chi2int
    real(DP), dimension(ni,18) :: c2t0
    real(DP), dimension(18)    :: impbkt01, impbkt02
    logical, intent(inout)     :: called, called2
    data termA, termB/1,2/

    chi2int=0.0d0

!!   call reset_scissors_if_metallic(nfout)

    if(nbztyp==1) vk0xyz=vk00xyz

    do ieig=1, neg
       if(band_type(ieig,ispin)/=FILLED_BAND) cycle
       do jeig=1, neg
          if(band_type(jeig,ispin)/=UNFILLED_BAND) cycle
          do leig = 1, neg
             if(band_type(leig,ispin)/=UNFILLED_BAND) cycle
             if(jeig==leig) cycle
             do  k0=1,np0
                ei=eig2(ip20(k0),ieig,ispin)
                ej=eig2(ip20(k0),jeig,ispin)
                el=eig2(ip20(k0),leig,ispin)
                eia(k0)=ei
                eja(k0)=ej
                ela(k0)=el
! dea1=ej-ei
! dea2=el-ei
                dea1(k0)=ej-ei+scissor !-> scissor operator
                dea2(k0)=el-ei+scissor !-> scossor operator
                ik2=nspin*(ip20(k0)-1)+ispin
                call set_impbkta(k0,ik2,leig,ieig,jeig,impbkt01)
                impbkta1(k0,1:18)=impbkt01(1:18)
                call set_impbkta(k0,ik2,jeig,leig,ieig,impbkt02)
                impbkta2(k0,1:18)=impbkt02(1:18)
             end do
             if(nlo_term/=OMEGA2_TERM) then
                call nsdos0_m_SHG_omega(nfout,e,ni,nx,ny,nz,vk0xyz,&
                                      & dea1,dea2,eia,eja,ela,impbkta1,impbkta2,c2t0,ip2cub,called,called2)
                chi2int=chi2int+c2t0
             end if
             if(nlo_term/=OMEGA_TERM) then
                call nsdos0_m_SHG_omega2(nfout,e,ni,nx,ny,nz,vk0xyz,&
                                      & dea1,dea2,eia,eja,ela,impbkta2,c2t0,ip2cub,called,called2)
                chi2int=chi2int+16.0d0*c2t0
             end if
          end do
       end do
    end do
 end subroutine nsdos3_m_SHG_ve

 subroutine nsdos3_m_SHG_vh(nfout,e,ni,nx,ny,nz,eig2,ispin,ip20,np0,&
        &                   dea1,dea2,impbkta1,impbkta2,eia,eja,ela,ip2cub,chi2int,called,called2)
!
!   SHG suscptibility (hole term) calculation by linear tetrahedron method
!
!   The original program is nsdos3 for dos calculation
!   Tomoyuki Hamada, Univ. Tokyo July, 14, 2006
!
    implicit none
!
    integer, intent(in)        :: nfout, ni, nx, ny, nz, ispin, np0
    integer, dimension(:)      :: ip20,ip2cub
    integer                    :: k0, ik2, termA, termB
    integer                    :: ieig, jeig, leig
    real(DP), dimension(:,:,:) :: eig2
    real(DP), dimension(:)     :: eia, eja, ela
    real(DP), dimension(:)     :: dea1, dea2
    real(DP), dimension(:,:)   :: impbkta1, impbkta2
    real(DP)                   :: ei, ej, el
    real(DP), dimension(:)     :: e
    real(DP), dimension(:,:)   :: chi2int
    real(DP), dimension(ni,18) :: c2t0
    real(DP), dimension(18)    :: impbkt01, impbkt02
    logical, intent(inout)     :: called, called2
    data termA, termB/1,2/

    chi2int=0.0d0

!!   call reset_scissors_if_metallic(nfout)

    if(nbztyp==1) vk0xyz=vk00xyz

    do ieig=1, neg
       if(band_type(ieig,ispin)/=FILLED_BAND) cycle
       do jeig=1, neg
          if(band_type(jeig,ispin)/=UNFILLED_BAND) cycle
          do leig = 1, neg
             if(band_type(leig,ispin)/=FILLED_BAND) cycle
             if(ieig==leig) cycle
             do  k0=1,np0
                ei=eig2(ip20(k0),ieig,ispin)
                ej=eig2(ip20(k0),jeig,ispin)
                el=eig2(ip20(k0),leig,ispin)
                eia(k0)=ei
                eja(k0)=ej
                ela(k0)=el
! dea1=ej-ei
! dea2=ej-el
                dea1(k0)=ej-ei+scissor !-> scissor operator
                dea2(k0)=ej-el+scissor !-> scissor operator
                ik2=nspin*(ip20(k0)-1)+ispin
                call set_impbkta(k0,ik2,ieig,jeig,leig,impbkt01)
                impbkta1(k0,1:18)=impbkt01(1:18)
                call set_impbkta(k0,ik2,jeig,leig,ieig,impbkt02)
                impbkta2(k0,1:18)=impbkt02(1:18)
             end do
             if(nlo_term/=OMEGA2_TERM) then
                call nsdos0_m_SHG_omega(nfout,e,ni,nx,ny,nz,vk0xyz,&
                                      & dea1,dea2,eia,eja,ela,impbkta1,impbkta2,c2t0,ip2cub,called,called2)
                chi2int=chi2int+c2t0
             end if
             if(nlo_term/=OMEGA_TERM) then
                call nsdos0_m_SHG_omega2(nfout,e,ni,nx,ny,nz,vk0xyz,&
                                      & dea1,dea2,eia,eja,ela,impbkta2,c2t0,ip2cub,called,called2)
                chi2int=chi2int+16.0d0*c2t0
             end if
          end do
       end do
    end do
 end subroutine nsdos3_m_SHG_vh

 subroutine nsdos0_m_SHG_omega(jf,e,ne,nxx,nyy,nzz,vk0,ea1,ea2,ei,ej,el,tr,tr1,tint,ip2cub,called,called2)
!
!   calculate omega-SHG susceptibility by linear tetrahedron method
!
!   The original code is nsdos0_m for Im[Eps] Calculation
!   T. Hamada July 19, 2006
!
!   nxx   number of mesh points in x-direction
!   nyy   number of mesh points in y-direction
!   nzz   number of mesh points in z-direction
!   ea1   = ej-ei
!   ea2   = el-ei
!   ei    energy of valence band i
!   ej    energy of conduction band j
!   el    energy of conduction band k
!   tr    Im[pjl*{pli,pij}]
!   tint  SHG susceptibility(omega term) at e
    implicit none
!
    integer, intent(in) :: jf
    real(DP),intent(in), dimension(:)        :: e
    real(DP),            dimension(ne)       :: e2
    real(DP),intent(in), dimension(:)        :: ea1, ea2, ei, ej, el
    real(DP),intent(in), dimension(:,:)      :: tr,tr1
    real(DP),intent(in), dimension(:,:)      :: vk0
    real(DP),intent(out),dimension(:,:)      :: tint
    real(DP),save                            :: vtet6,vtet6inv
    real(DP),            dimension(2,2,2)    :: ecub, ecub2, eicub, ejcub, elcub
    real(DP),            dimension(8)        :: ec, ec2, eci, ecj, ecl
    real(DP),            dimension(4)        :: et, et2, eti, etj, etl, eb1, eb2, ebi, ebj, ebl
    real(DP),            dimension(2,2,2,18) :: trcub, trcub1
    real(DP),            dimension(8,18)     :: trc, trc1
    real(DP),            dimension(4,18)     :: trt, trb, trt1, trb1, trb2
    real(DP),allocatable,    dimension(:,:)      :: kip0
    real(DP),allocatable,dimension(:,:,:)    :: tcub
    real(DP),allocatable,dimension(:,:)      :: ttr, ttr1
    real(DP)                                 :: emax, emin, eps, tvol, vtet, eb21, eb22, eb23, eb24, d
    integer                                  :: icub, ip, ip0, iq, it, ix, iy, iz, kx, ky, kz, i, m, iterm
    integer                                  :: ncub, ni, np, npx, npy, npz, ntet, tintegral, INCLUDE, EXCLUDE
    integer,             dimension(2,2,2)    :: iecub
    integer,             dimension(8)        :: iec
    integer,             dimension(4)        :: iet, ieb
    integer,             dimension(6,2)      :: iqmat
    integer,             intent(in)          :: ne, nxx, nyy, nzz
    integer,             dimension(:)        :: ip2cub
    logical                                  :: called, called2
    equivalence(ec(1),ecub(1,1,1))   ; equivalence(ec2(1),ecub2(1,1,1))
    equivalence(eci(1),eicub(1,1,1))
    equivalence(ecj(1),ejcub(1,1,1)) ; equivalence(ecl(1),elcub(1,1,1))
    equivalence(trc(1,1),trcub(1,1,1,1)) ; equivalence(trc1(1,1),trcub1(1,1,1,1))
    equivalence(iec(1),iecub(1,1,1))
    data iqmat/2,2,5,3,3,5, 4,6,6,4,7,7/
    data INCLUDE, EXCLUDE /1,0/
!   INCLUDE: contribution from a tetrahedron with tintegtal=INCLUDE is included
!   EXCLUDE: that from a tetrahedron with EXCLUDE is neglected

    allocate(kip0(np0,3))
!
    kip0=0.0d0
!  definition of eps  <- must be consistent with <nstts1>
    eps=tetra_eps
!
    npx=nxx+1
    npy=nyy+1
    npz=nzz+1
    np=npx*npy*npz
    ncub=nxx*nyy*nzz
    ntet=6*ncub
    if(.not.called) then
! set tetrahedron volume vtet vtet6, 1/vtet6
       vtet=rvol/real(ntet,DP)
       vtet6=vtet*6.00d0
       vtet6inv=1.0d0/vtet6
       if(printable) then
          write(jf,51) ntet,ncub
          write(jf,52) rvol,vtet
          write(jf,50)
       end  if
    end if
 50 format(1x," integration of omega SHG moment")
 51 format(1x," number of tetrahedron = ",i10,/,1x," number of cube = ",i10)
 52 format(1x," Brillouin zone volume = ",f10.5,/1x," tetrahedron volume =",f10.5)

    allocate(tcub(ne,18,ncub)); tcub=0.0d0
    allocate(ttr(ne,18)); ttr=0.0d0
    allocate(ttr1(ne,18)); ttr1=0.0d0
    tint=0.0d0

!
!     ***  integration over b.z. starts    ***
!
!     ***       sampling over cubes        ***
!
    icub=0
    do iz=0,nzz-1
       do iy=0,nyy-1
          do ix=0,nxx-1
             icub=icub+1
             if(icub.ne.ip2cub(icub)) then
                tint(1:ne,1:18)=tint(1:ne,1:18)+tcub(1:ne,1:18,ip2cub(icub))
             else
                tcub(1:ne,1:18,icub)=0.0d0
!     ***  energies at cube corners  ***
                ni=npx*(npy*iz+iy)+ix
                if(nbztyp==1) then
                   do kz=1,2
                      do ky=1,2
                         do kx=1,2
                            ip0=ni+npx*(npy*(kz-1)+ky-1)+kx
                            kip0(ip0,1:3)=vk00xyz(ip0,1:3)
                            ecub(kx,ky,kz) =ea1(ip0); ecub2(kx,ky,kz) =ea2(ip0)
                            eicub(kx,ky,kz)=ei(ip0)
                            ejcub(kx,ky,kz)=ej(ip0) ; elcub(kx,ky,kz)=el(ip0)
                            trcub(kx,ky,kz,1:18)=tr(ip0,1:18) ; trcub1(kx,ky,kz,1:18)=tr1(ip0,1:18)
                            iecub(kx,ky,kz)=ip0
                         end do
                      end do
                   end do
                end if
                if(nbztyp/=1) then
                   do kz=1,2
                      do ky=1,2
                         do kx=1,2
                            ip0=ni+npx*(npy*(kz-1)+ky-1)+kx
                            kip0(ip0,1:3)=vk0(ip0,1:3)
                            ecub(kx,ky,kz) =ea1(ip0); ecub2(kx,ky,kz) =ea2(ip0)
                            eicub(kx,ky,kz)=ei(ip0)
                            ejcub(kx,ky,kz)=ej(ip0) ; elcub(kx,ky,kz)=el(ip0)
                            trcub(kx,ky,kz,1:18)=tr(ip0,1:18) ; trcub1(kx,ky,kz,1:18)=tr1(ip0,1:18)
                            iecub(kx,ky,kz)=ip0
                         end do
                      end do
                   end do
                end if
!         ***      six tetrahedrons      ***
!         *** sampling over tetrahedrons ***

                et(1)=ec(1)   ; et2(1)=ec2(1)
                eti(1)=eci(1)
                etj(1)=ecj(1) ; etl(1)=ecl(1)
                et(4)=ec(8)   ; et2(4)=ec2(8)
                eti(4)=eci(8)
                etj(4)=ecj(8) ; etl(1)=ecl(8)
                trt(1,1:18)=trc(1,1:18) ; trt1(1,1:18)=trc1(1,1:18)
                trt(4,1:18)=trc(8,1:18) ; trt1(4,1:18)=trc1(8,1:18)
                iet(1)=iec(1)
                iet(4)=iec(8)
                do it=1,6
                   iq=iqmat(it,1)
                   et(2)=ec(iq)   ; et2(2)=ec2(iq)
                   eti(2)=eci(iq)
                   etj(2)=ecj(iq) ; etl(2)=ecl(iq)
                   trt(2,1:18)=trc(iq,1:18) ; trt1(2,1:18)=trc1(iq,1:18)
                   iet(2)=iec(iq)
                   iq=iqmat(it,2)
                   et(3)=ec(iq)   ; et2(3)=ec2(iq)
                   eti(3)=eci(iq)
                   etj(3)=ecj(iq) ; etl(3)=ecl(iq)
                   trt(3,1:18)=trc(iq,1:18) ; trt1(3,1:18)=trc1(iq,1:18)
                   iet(3)=iec(iq)
                   eb1(1:4)=et(1:4)  ; eb2(1:4)=et2(1:4)
                   ebi(1:4)=eti(1:4)
                   ebj(1:4)=etj(1:4) ; ebl(1:4)=etl(1:4)
                   trb(1:4,1:18)=trt(1:4,1:18) ; trb1(1:4,1:18)=trt1(1:4,1:18)
                   ieb(1:4)=iet(1:4)
!        ***  eb2(1).le.eb2(2).le.eb2(3).le.eb2(4)  ***
                   call nsttod_m_SHG(eb2,eb1,ebi,ebj,ebl,trb,trb1,ieb)
                   eb21=eb2(1) ; eb22=eb2(2) ; eb23=eb2(3) ; eb24=eb2(4)
                   call nstts1_m(eb21,eb22,eb23,eb24)
                   call check_energy_order(eb21,eb22,eb23,eb24)
                   eb2(1)=eb21 ; eb2(2)=eb22 ; eb2(3)=eb23 ; eb2(4)=eb24
! first omega term
                   iterm = 1
                   call nstrans_SHG(e,ne,eb2,eb1,trb,ebi,ebj,ebl,ieb,kip0,d,ttr,&
                         & vtet6,vtet6inv,tintegral,INCLUDE,EXCLUDE,iterm)
                   if(d/=0.0d0) then
                      tint=tint+ttr
                      tcub(1:ne,1:18,icub)=tcub(1:ne,1:18,icub)+ttr(1:ne,1:18)
                   end if
! second omega term
                   iterm = 2
                   call nstrans_SHG(e,ne,eb2,eb1,trb1,ebi,ebj,ebl,ieb,kip0,d,ttr,&
                         & vtet6,vtet6inv,tintegral,INCLUDE,EXCLUDE,iterm)
                   if(d/=0.0d0) then
                      tint=tint+ttr
                      tcub(1:ne,1:18,icub)=tcub(1:ne,1:18,icub)+ttr(1:ne,1:18)
                   end if
                end do
             end if
          end do
       end do
    end do
    if (.not.called) then
       called=.true.
    end if
    deallocate(kip0)
    deallocate(tcub)
    deallocate(ttr)
 end subroutine nsdos0_m_SHG_omega

 subroutine nsdos0_m_SHG_omega2(jf,e,ne,nxx,nyy,nzz,vk0,ea1,ea2,ei,ej,el,tr,tint,ip2cub,called,called2)
!
!      calculate omega2-SHG susceptibility(imaginary part) by linear tetrahedron method
!
!      The original code is nsdos0_m for Im[Eps] Calculation
!      T. Hamada July 19, 2006
!
!      e     photon energy
!      ne    number of photon energy gird
!      nxx   number of mesh points in x-direction
!      nyy   number of mesh points in y-direction
!      nzz   number of mesh points in z-direction
!      ea1   = ej-ei
!      ea2   = el-ei
!      ei    energy of valence band i
!      ej    energy of conduction band j
!      el    energy of conduction band k
!      tr    Im[pjl*{pli,pij}]
!      tint  SHG susceptibility(omega2 term) at e
    implicit none
!
    integer, intent(in) :: jf
    real(DP),intent(in), dimension(:)        :: e
    real(DP),            dimension(ne)       :: e2
    real(DP),intent(in), dimension(:)        :: ea1, ea2, ei, ej, el
    real(DP),intent(in), dimension(:,:)      :: tr
    real(DP),intent(in), dimension(:,:)      :: vk0
    real(DP),intent(out),dimension(:,:)      :: tint
    real(DP),save                            :: vtet6,vtet6inv
    real(DP),            dimension(2,2,2)    :: ecub, ecub2, eicub, ejcub, elcub
    real(DP),            dimension(8)        :: ec, ec2, eci, ecj, ecl
    real(DP),            dimension(4)        :: et, et2, eti, etj, etl, eb1, eb2, ebi, ebj, ebl
    real(DP),            dimension(2,2,2,18) :: trcub
    real(DP),            dimension(8,18)     :: trc
    real(DP),            dimension(4,18)     :: trt, trb
    real(DP),allocatable,    dimension(:,:)      :: kip0
    real(DP),allocatable,dimension(:,:,:)    :: tcub
    real(DP),allocatable,dimension(:,:)      :: ttr
    real(DP)                                 :: emax, emin, eps, tvol, vtet, eb11, eb12, eb13, eb14, d
    integer                                  :: icub, ip, ip0, iq, it, ix, iy, iz, kx, ky, kz, i, m, iterm
    integer                                  :: ncub, ni, np, npx, npy, npz, ntet, tintegral, INCLUDE, EXCLUDE
    integer,             dimension(2,2,2)    :: iecub
    integer,             dimension(8)        :: iec
    integer,             dimension(4)        :: iet, ieb
    integer,             dimension(6,2)      :: iqmat
    integer,             intent(in)          :: ne, nxx, nyy, nzz
    integer,             dimension(:)        :: ip2cub
    logical                                  :: called, called2
    equivalence(ec(1),ecub(1,1,1))   ; equivalence(ec2(1),ecub2(1,1,1))
    equivalence(eci(1),eicub(1,1,1))
    equivalence(ecj(1),ejcub(1,1,1)) ; equivalence(ecl(1),elcub(1,1,1))
    equivalence(trc(1,1),trcub(1,1,1,1))
    equivalence(iec(1),iecub(1,1,1))
    data iqmat/2,2,5,3,3,5, 4,6,6,4,7,7/
    data INCLUDE, EXCLUDE /1,0/
!   INCLUDE: contribution from a tetrahedron with tintegtal=INCLUDE is included
!   EXCLUDE: that from a tetrahedron with EXCLUDE is neglected

    allocate(kip0(np0,3))
!
    kip0=0.0d0
!  definition of eps  <- must be consistent with <nstts1_m>
    eps=tetra_eps
!
    npx=nxx+1
    npy=nyy+1
    npz=nzz+1
    np=npx*npy*npz
    ncub=nxx*nyy*nzz
    ntet=6*ncub
    if(.not.called2) then
       write(jf,50)
! set tetrahedron volume vtet vtet6, 1/vtet6
       vtet=rvol/real(ntet,DP)
       vtet6=vtet*6.00d0
       vtet6inv=1.0d0/vtet6
       if(printable.and..not.called) then
          write(jf,51) ntet,ncub
          write(jf,52) rvol,vtet
       end if
    end if
 50 format(1x," integration of omega2 SHG moment")
 51 format(1x," number of tetrahedron = ",i10,/,1x," number of cube = ",i10)
 52 format(1x," Brillouin zone volume = ",f10.5,/1x," tetrahedron volume =",f10.5)

    allocate(tcub(ne,18,ncub)); tcub=0.0d0
    allocate(ttr(ne,18)); ttr=0.0d0
    tint=0.0d0

!
!     ***  integration over b.z. starts    ***
!
!     ***       sampling over cubes        ***
!
    icub=0
    do iz=0,nzz-1
       do iy=0,nyy-1
          do ix=0,nxx-1
             icub=icub+1
             if(icub.ne.ip2cub(icub)) then
                tint(1:ne,1:18)=tint(1:ne,1:18)+tcub(1:ne,1:18,ip2cub(icub))
             else
                tcub(1:ne,1:18,icub)=0.0d0
!     ***  energies at cube corners  ***
                ni=npx*(npy*iz+iy)+ix
                if(nbztyp==1) then
                   do kz=1,2
                      do ky=1,2
                         do kx=1,2
                            ip0=ni+npx*(npy*(kz-1)+ky-1)+kx
                            kip0(ip0,1:3)=vk00xyz(ip0,1:3)
                         ecub(kx,ky,kz) =ea1(ip0); ecub2(kx,ky,kz) =ea2(ip0)
                         eicub(kx,ky,kz)=ei(ip0)
                         ejcub(kx,ky,kz)=ej(ip0) ; elcub(kx,ky,kz)=el(ip0)
                         trcub(kx,ky,kz,1:18)=tr(ip0,1:18)
                         iecub(kx,ky,kz)=ip0
                      end do
                   end do
                end do
                end if
                if(nbztyp/=1) then
                   do kz=1,2
                      do ky=1,2
                         do kx=1,2
                            ip0=ni+npx*(npy*(kz-1)+ky-1)+kx
                            kip0(ip0,1:3)=vk0(ip0,1:3)
                            ecub(kx,ky,kz) =ea1(ip0); ecub2(kx,ky,kz) =ea2(ip0)
                            eicub(kx,ky,kz)=ei(ip0)
                            ejcub(kx,ky,kz)=ej(ip0) ; elcub(kx,ky,kz)=el(ip0)
                            trcub(kx,ky,kz,1:18)=tr(ip0,1:18)
                            iecub(kx,ky,kz)=ip0
                         end do
                      end do
                   end do
                end if
!         ***      six tetrahedrons      ***
!         *** sampling over tetrahedrons ***

                et(1)=ec(1)   ; et2(1)=ec2(1)
                eti(1)=eci(1)
                etj(1)=ecj(1) ; etl(1)=ecl(1)
                et(4)=ec(8)   ; et2(4)=ec2(8)
                eti(4)=eci(8)
                etj(4)=ecj(8) ; etl(1)=ecl(8)
                trt(1,1:18)=trc(1,1:18)
                trt(4,1:18)=trc(8,1:18)
                iet(1)=iec(1)
                iet(4)=iec(8)
                do it=1,6
                   iq=iqmat(it,1)
                   et(2)=ec(iq)   ; et2(2)=ec2(iq)
                   eti(2)=eci(iq)
                   etj(2)=ecj(iq) ; etl(2)=ecl(iq)
                   trt(2,1:18)=trc(iq,1:18)
                   iet(2)=iec(iq)
                   iq=iqmat(it,2)
                   et(3)=ec(iq)   ; et2(3)=ec2(iq)
                   eti(3)=eci(iq)
                   etj(2)=ecj(iq) ; etl(3)=ecl(iq)
                   trt(3,1:18)=trc(iq,1:18)
                   iet(3)=iec(iq)
                   eb1(1:4)=et(1:4)  ; eb2(1:4)=et2(1:4)
                   ebi(1:4)=eti(1:4)
                   ebj(1:4)=etj(1:4) ; ebl(1:4)=etl(1:4)
                   trb(1:4,1:18)=trt(1:4,1:18)
                   ieb(1:4)=iet(1:4)
!        ***  eb1(1).le.eb1(2).le.eb1(3).le.eb1(4)  ***
                   call nsttod_m_SHG(eb1,eb2,ebi,ebj,ebl,trb,trb,ieb)
                   eb11=eb1(1) ; eb12=eb1(2) ; eb13=eb1(3) ; eb14=eb1(4)
                   call nstts1_m(eb11,eb12,eb13,eb14)
                   call check_energy_order(eb11,eb12,eb13,eb14)
                   eb1(1)=eb11 ; eb1(2)=eb12 ; eb1(3)=eb13 ; eb1(4)=eb14
! omega2 term
                   iterm=3
                   e2=2.0d0*e
                   call nstrans_SHG(e2,ne,eb1,eb2,trb,ebi,ebj,ebl,ieb,kip0,d,ttr,&
                         & vtet6,vtet6inv,tintegral,INCLUDE,EXCLUDE,iterm)
                   if(d==0.0d0) cycle
                   tint=tint+ttr
                   tcub(1:ne,1:18,icub)=tcub(1:ne,1:18,icub)+ttr(1:ne,1:18)
                end do
             end if
          end do
       end do
    end do
    if (.not.called2) then
       called2=.true.
    end if
    deallocate(kip0)
    deallocate(tcub)
    deallocate(ttr)
 end subroutine nsdos0_m_SHG_omega2

 subroutine nsttod_m_SHG(ed,ed0,ebi,ebj,ebl,trb,trb1,ieb)
!
!   energy ordering k-points
!
!   The original program is nsttod
!   Tomoyuki Hamada, Univ. Tokyo, Feb. 19, 2003
!   modified by T. Hamada Sept. 2, 2003
    implicit none
    integer                   :: ind, ih, i, k
    integer,  dimension(:)    :: ieb
    real(DP)                  :: d, d0, ei, ej, el
    real(DP), dimension(:)    :: ed, ed0, ebi, ebj, ebl
    real(DP), dimension(:,:)  :: trb, trb1
    real(DP), dimension(18)   :: b, b1

    do k=1,3
       d=ed(k); d0=ed0(k)
       ei =ebi(k); ej = ebj(k); el = ebl(k)
       b(1:18)=trb(k,1:18) ; b1(1:18)=trb1(k,1:18)
       ih=ieb(k)
       ind=k
       do i=k+1,4
          if(ed(i).lt.d) then
             d=ed(i) ; d0=ed0(i)
             ei =ebi(i); ej = ebj(i); el = ebl(i)
             b(1:18)=trb(i,1:18); b1(1:18)=trb1(i,1:18)
             ih=ieb(i)
             ind=i
          end if
       end do
       ed(ind)=ed(k)
       ebi(ind)=ebi(k) ; ebj(ind)=ebj(k) ; ebl(ind)=ebl(k)
       trb(ind,1:18)=trb(k,1:18) ; trb1(ind,1:18)=trb1(k,1:18)
       ieb(ind)=ieb(k)
       ed(k)=d ; ed0(k)=d0
       ebi(k)=ei;  ebj(k)=ej ; ebl(k)=el
       trb(k,1:18)=b(1:18) ; trb1(k,1:18)=b1(1:18)
       ieb(k)=ih
    end do
 end subroutine nsttod_m_SHG

 subroutine nstrans_SHG(ep,ni,eb,eb0,tr,ebi,ebj,ebl,ieb,kip0,jdos,tia, &
                      & vol,volinv,tintegral,INCLUDE,EXCLUDE,iterm)
    implicit none
!
!   calculate SHG susceptibility by a modified linear tetrahedron (lt) method
!
!   based on the standard lt shceme:
!   G. Lehman and M. Taut, Physica Status Solidi (b) vol.54, pp469-477 (1972)
!
!   Tomoyuki Hamada, Univ. Tokyo, JAN. 27, 2007
!
!    * input
!       e: photon energy energy
!       eb: band transition energies being resonant with e
!        eb(1): transition energy at tetrahedron corner0
!        eb(2):                   at tetrahedron corner1
!        eb(3):                   at tetrahedron corner2
!        eb(4):                   at tetrahedron corner3
!        here, e1<e2<e3<e4
!       ebi(m) : energy of initial band at m-th corner of tetrahedron (m=1,4)
!       ebf(m) : energy of final band at m-th corder of tetrahedron (m=1,4)
!       eb0: band transition energies not being resonant with e
!        eb0(1): transition energy at tetrahedron corner0
!        eb0(2):                   at tetrahedron corner1
!        eb0(3):                   at tetrahedron corner2
!        eb0(4):                   at tetrahedron corner3
!       tr:Im[p1,{p2,p3}]
!        tr(1):                   at tetrahedron corner0
!        tr(2):                   at tetrahedron corner1
!        tr(3):                   at tetrahedron corner2
!        tr(4):                   at tetrahedron corner3
!       ieb: k-vector index
!       kip    : k-point vector of tetrahedron corners
!       vol    : volume of tetrahedron
!       volinv : 1/vol
!    * output
!       tintegral:  tetrahedron integral
!    * variables for linear tetrahedon (see the reference for details)
!       f_b : f/|b|
!       k1, k2, k3 : edge vector of tetrahedron
!       r1, r2, r3 : r vector
!       a          : a vector
!       s          : s vector
!       e0_at_svec : non-resonant band transition energy at s vector
!
    integer,intent(in)                  :: ni, tintegral, INCLUDE, EXCLUDE
    integer,intent(in)                  :: iterm
    integer,intent(in),  dimension(4)   :: ieb
    real(DP),intent(in)                 :: vol,volinv
    real(DP),intent(in), dimension(:,:) :: tr
    real(DP),intent(in), dimension(:)   :: ep
    real(DP),intent(in), dimension(:,:) :: kip0
    real(DP),intent(in), dimension(:)   :: eb, eb0
    real(DP),intent(in), dimension(:)   :: ebi, ebj, ebl
    real(DP),intent(out)                :: jdos
    real(DP),intent(out),dimension(:,:) :: tia
    real(DP),dimension(18)              :: a0i0, ai1
    real(DP),dimension(3)               :: k1,k2,k3,s,r1,r2,r3
    real(DP),dimension(18)              :: a1, a2, a3, a4
    real(DP),dimension(18,3)            :: a
    real(DP)                            :: e, e1, e2, e3, e4, eb1, eb2, f_b, efact, eb0_at_svec
    real(DP)                            :: escale, estep
    integer                             :: istep, tintegral0
    integer                             :: ns1, ns2, ns3, ne1, ne2, ne3
    !integer                             :: icount1, icount2, icount3
    logical                             :: deg12, deg23, deg34

    tia=0.0d0
    f_b=0.0d0

    e1 = eb(1) ; e2 = eb(2) ; e3 = eb(3) ; e4 =eb(4)

!  set istep ranges
    escale = 1.0d0
    if(iterm==3) escale = 2.0d0
    estep = escale*e_step
    ns1=1+ceiling((e1-ep(1))/estep)
    ne1=1+floor((e2-ep(1))/estep)
    ns2=1+ceiling((e2-ep(1))/estep)
    ne2=1+floor((e3-ep(1))/estep)
    ns3=1+ceiling((e3-ep(1))/estep)
    ne3=1+floor((e4-ep(1))/estep)

    if(npes==1) then
       if(ne1>ni) ne1=ni
       if(ne2>ni) ne2=ni
       if(ne3>ni) ne3=ni
       if(ns1>ne1) then
          ns1 = 0 ; ne1 = 0
       end if
       if(ns2>ne2) then
          ns2 = 0 ; ne2 = 0
       end if
       if(ns3>ne3) then
          ns3 = 0 ; ne3 = 0
       end if
    else
       deg12 = .false.
       deg23 = .false.
       deg34 = .false.
       if(ns1>ne1) then
          deg12 = .true. ; ne1 = ns1
       end if
       if(ns2>ne2) then
          deg23 = .true. ; ne2 = ns2
       end if
       if(ns3>ne3) then
          deg34 = .true. ; ne3 = ns3
       end if
       call istep_range_trimming
       if(deg12 .eqv. .true.) then
          ns1 = 0 ; ne1 = 0
       end if
       if(deg23 .eqv. .true.) then
          ns2 = 0 ; ne2 = 0
       end if
       if(deg34 .eqv. .true.) then
          ns3 = 0 ; ne3 = 0
       end if
    end if
!   write(nfout,'(1x," ns1 = ",i3," ne1 = ",i3)') ns1, ne1
!   write(nfout,'(1x," ns2 = ",i3," ne2 = ",i3)') ns2, ne2
!   write(nfout,'(1x," ns3 = ",i3," ne3 = ",i3)') ns3, ne3
   if(ne1==0.and.ne2==0.and.ne3==0) return

!  calculate unit vectors of tetrahedron
    k1(1:3)=kip0(ieb(2),1:3)-kip0(ieb(1),1:3)
    k2(1:3)=kip0(ieb(3),1:3)-kip0(ieb(1),1:3)
    k3(1:3)=kip0(ieb(4),1:3)-kip0(ieb(1),1:3)

!  calculate A vectors of tetrahedron
    a1=tr(1,1:18) ; a2=tr(2,1:18); a3=tr(3,1:18) ; a4=tr(4,1:18)
    call calc_avec_SHG(k1,k2,k3,a1,a2,a3,a4,volinv,a,r1,r2,r3)

    if(ne1/=0) then
       !ns1 = ne1 - icount1 + 1
       if(dres_method == OMIT) then
          if(iterm/=3) then
             do istep = ns1, ne1
                e = ep(istep)
                call calcf_b1(e,e1,e2,e3,e4,vol,f_b)
                jdos=f_b
                call svec1(k1,k2,k3,e,e1,e2,e3,e4,s)
                call calc_eb0_at_svec ! -> calcaulate SHG energy factor at k-point given by s vector
                eb1 =e ; eb2 = eb0_at_svec
                call calc_SHG_energy_factor_omit(eb1,eb2,efact,iterm)
                ai1(1:18)=a(1:18,1)*s(1)+a(1:18,2)*s(2)+a(1:18,3)*s(3)
                tia(istep,1:18)=(a1+ai1)*f_b*efact
             end do
          else
             do istep = ns1, ne1
                e = ep(istep)
                call calcf_b1(e,e1,e2,e3,e4,vol,f_b)
                jdos=f_b
                call svec1(k1,k2,k3,e,e1,e2,e3,e4,s)
                call calc_eb0_at_svec ! -> calcaulate SHG energy factor at k-point given by s vector
                eb2 =e ; eb1 = eb0_at_svec
                call calc_SHG_energy_factor_omit(eb1,eb2,efact,iterm)
                ai1(1:18)=a(1:18,1)*s(1)+a(1:18,2)*s(2)+a(1:18,3)*s(3)
                tia(istep,1:18)=(a1+ai1)*f_b*efact
             end do
          end if
       else
          if(iterm/=3) then
             do istep = ns1, ne1
                e = ep(istep)
                call calcf_b1(e,e1,e2,e3,e4,vol,f_b)
                jdos=f_b
                call svec1(k1,k2,k3,e,e1,e2,e3,e4,s)
                call calc_eb0_at_svec ! -> calcaulate SHG energy factor at k-point given by s vector
                eb1 =e ; eb2 = eb0_at_svec
                call calc_SHG_energy_factor_damp(eb1,eb2,efact,iterm)
                ai1(1:18)=a(1:18,1)*s(1)+a(1:18,2)*s(2)+a(1:18,3)*s(3)
                tia(istep,1:18)=(a1+ai1)*f_b*efact
             end do
          else
             do istep = ns1, ne1
                e = ep(istep)
                call calcf_b1(e,e1,e2,e3,e4,vol,f_b)
                jdos=f_b
                call svec1(k1,k2,k3,e,e1,e2,e3,e4,s)
                call calc_eb0_at_svec ! -> calcaulate SHG energy factor at k-point given by s vector
                eb2 =e ; eb1 = eb0_at_svec
                call calc_SHG_energy_factor_damp(eb1,eb2,efact,iterm)
                ai1(1:18)=a(1:18,1)*s(1)+a(1:18,2)*s(2)+a(1:18,3)*s(3)
                tia(istep,1:18)=(a1+ai1)*f_b*efact
             end do
          end if
       end if
    end if

    if(ne2/=0) then
       !ns2 = ne2 -icount2 + 1
       if(dres_method == OMIT) then
          if(iterm/=3) then
             do istep = ns2, ne2
                e = ep(istep)
                call calcf_b2(e,e1,e2,e3,e4,vol,f_b)
                jdos=f_b
                call svec2(k1,k2,k3,e,e1,e2,e3,e4,s)
                call calc_eb0_at_svec ! -> calcaulate SHG energy factor at k-point given by s vector
                eb1 =e ; eb2 = eb0_at_svec
                call calc_SHG_energy_factor_omit(eb1,eb2,efact,iterm)
                ai1(1:18)=a(1:18,1)*s(1)+a(1:18,2)*s(2)+a(1:18,3)*s(3)
                tia(istep,1:18)=(a1+ai1)*f_b*efact
             end do
          else
             do istep = ns2, ne2
                e = ep(istep)
                call calcf_b2(e,e1,e2,e3,e4,vol,f_b)
                jdos=f_b
                call svec2(k1,k2,k3,e,e1,e2,e3,e4,s)
                call calc_eb0_at_svec ! -> calcaulate SHG energy factor at k-point given by s vector
                eb2 =e ; eb1 = eb0_at_svec
                call calc_SHG_energy_factor_omit(eb1,eb2,efact,iterm)
                ai1(1:18)=a(1:18,1)*s(1)+a(1:18,2)*s(2)+a(1:18,3)*s(3)
                tia(istep,1:18)=(a1+ai1)*f_b*efact
             end do
          end if
       else
          if(iterm/=3) then
             do istep = ns2, ne2
                e = ep(istep)
                call calcf_b2(e,e1,e2,e3,e4,vol,f_b)
                jdos=f_b
                call svec2(k1,k2,k3,e,e1,e2,e3,e4,s)
                call calc_eb0_at_svec ! -> calcaulate SHG energy factor at k-point given by s vector
                eb1 =e ; eb2 = eb0_at_svec
                call calc_SHG_energy_factor_damp(eb1,eb2,efact,iterm)
                ai1(1:18)=a(1:18,1)*s(1)+a(1:18,2)*s(2)+a(1:18,3)*s(3)
                tia(istep,1:18)=(a1+ai1)*f_b*efact
             end do
          else
             do istep = ns2, ne2
                e = ep(istep)
                call calcf_b2(e,e1,e2,e3,e4,vol,f_b)
                jdos=f_b
                call svec2(k1,k2,k3,e,e1,e2,e3,e4,s)
                call calc_eb0_at_svec ! -> calcaulate SHG energy factor at k-point given by s vector
                eb2 =e ; eb1 = eb0_at_svec
                call calc_SHG_energy_factor_damp(eb1,eb2,efact,iterm)
                ai1(1:18)=a(1:18,1)*s(1)+a(1:18,2)*s(2)+a(1:18,3)*s(3)
                tia(istep,1:18)=(a1+ai1)*f_b*efact
             end do
          end if
       end if
    end if

    if(ns3/=0) then
       !ns3 = ne3 - icount3 + 1
       if(dres_method == OMIT) then
          if(iterm/=3) then
             do istep = ns3, ne3
                e = ep(istep)
                call calcf_b3(e,e1,e2,e3,e4,vol,f_b)
                jdos=f_b
                call svec3(k1,k2,k3,e,e1,e2,e3,e4,s)
                call calc_eb0_at_svec ! -> calcaulate SHG energy factor at k-point given by s vector
                eb1 =e ; eb2 = eb0_at_svec
                call calc_SHG_energy_factor_omit(eb1,eb2,efact,iterm)
                ai1(1:18)=a(1:18,1)*s(1)+a(1:18,2)*s(2)+a(1:18,3)*s(3)
                tia(istep,1:18)=(a1+ai1)*f_b*efact
             end do
          else
             do istep = ns3, ne3
                e = ep(istep)
                call calcf_b3(e,e1,e2,e3,e4,vol,f_b)
                jdos=f_b
                call svec3(k1,k2,k3,e,e1,e2,e3,e4,s)
                call calc_eb0_at_svec ! -> calcaulate SHG energy factor at k-point given by s vector
                eb2 =e ; eb1 = eb0_at_svec
                call calc_SHG_energy_factor_omit(eb1,eb2,efact,iterm)
                ai1(1:18)=a(1:18,1)*s(1)+a(1:18,2)*s(2)+a(1:18,3)*s(3)
                tia(istep,1:18)=(a1+ai1)*f_b*efact
             end do
          end if
       else
          if(iterm/=3) then
             do istep = ns3, ne3
                e = ep(istep)
                call calcf_b3(e,e1,e2,e3,e4,vol,f_b)
                jdos=f_b
                call svec3(k1,k2,k3,e,e1,e2,e3,e4,s)
                call calc_eb0_at_svec ! -> calcaulate SHG energy factor at k-point given by s vector
                eb1 =e ; eb2 = eb0_at_svec
                call calc_SHG_energy_factor_damp(eb1,eb2,efact,iterm)
                ai1(1:18)=a(1:18,1)*s(1)+a(1:18,2)*s(2)+a(1:18,3)*s(3)
                tia(istep,1:18)=(a1+ai1)*f_b*efact
             end do
          else
             do istep = ns3, ne3
                e = ep(istep)
                call calcf_b3(e,e1,e2,e3,e4,vol,f_b)
                jdos=f_b
                call svec3(k1,k2,k3,e,e1,e2,e3,e4,s)
                call calc_eb0_at_svec ! -> calcaulate SHG energy factor at k-point given by s vector
                eb2 =e ; eb1 = eb0_at_svec
                call calc_SHG_energy_factor_damp(eb1,eb2,efact,iterm)
                ai1(1:18)=a(1:18,1)*s(1)+a(1:18,2)*s(2)+a(1:18,3)*s(3)
                tia(istep,1:18)=(a1+ai1)*f_b*efact
             end do
          end if
       end if
    end if
    contains
     subroutine istep_range_trimming
       implicit none
! out of range
       if(ns1>ni.or.ne3<1) then
          ns1 = 0 ; ne1 = 0
          ns2 = 0 ; ne2 = 0
          ns3 = 0 ; ne3 = 0
          return
       end if
! trimming of lower bound
       if(ns1<=1.and.ne1>=1) then
          ns1 = 1
       else if(ns2<=1.and.ne2>=1) then
          ns1 = 0 ; ne1 = 0
          ns2 = 1
       else if(ns3<=1.and.ne3>=1) then
          ns1 = 0 ; ne1 = 0
          ns2 = 0 ; ne2 = 0
          ns3 = 1
       end if
! trimming of higher bound
       if(ns1<=ni.and.ne1>=ni) then
          ne1 = ni
          ns2 = 0 ; ne2 = 0
          ns3 = 0 ; ne3 = 0
       else if(ns2<=ni.and.ne2>=ni) then
          ne2 = ni
          ns3 = 0 ; ne3 = 0
       else if(ns3<=ni.and.ne3>=ni) then
          ne3 = ni
       end if
     end subroutine istep_range_trimming

     subroutine calc_avec_SHG(k1,k2,k3,p1,p2,p3,p4,vinv,a,r1,r2,r3)
       implicit none
       real(DP),intent(in), dimension(3)   :: k1,k2,k3
       real(DP),intent(in), dimension(18)  :: p1,p2,p3,p4
       real(DP),intent(in)                 :: vinv
       real(DP),intent(out),dimension(18,3):: a
       real(DP),            dimension(18)  :: d1,d2,d3
       real(DP),intent(out),dimension(3)   :: r1,r2,r3
       integer                             :: invtetra
       d1=p2-p1
       d2=p3-p1
       d3=p4-p1
! calculate r vectors
       r1(1)=(k2(2)*k3(3)-k2(3)*k3(2))
       r1(2)=(k2(3)*k3(1)-k2(1)*k3(3))
       r1(3)=(k2(1)*k3(2)-k2(2)*k3(1))
       r2(1)=(k3(2)*k1(3)-k3(3)*k1(2))
       r2(2)=(k3(3)*k1(1)-k3(1)*k1(3))
       r2(3)=(k3(1)*k1(2)-k3(2)*k1(1))
       r3(1)=(k1(2)*k2(3)-k1(3)*k2(2))
       r3(2)=(k1(3)*k2(1)-k1(1)*k2(3))
       r3(3)=(k1(1)*k2(2)-k1(2)*k2(1))
       r1=vinv*r1
       r2=vinv*r2
       r3=vinv*r3
! check r_dot_k
       call check_r_vector(r1,r2,r3,k1,k2,k3,invtetra)
! invtetra=-1 case
       if(invtetra==-1) then
          r1=-1.0d0*r1
          r2=-1.0d0*r2
          r3=-1.0d0*r3
       end if
! calculate a vector
       a(1:18,1)=d1(1:18)*r1(1)+d2(1:18)*r2(1)+d3(1:18)*r3(1)
       a(1:18,2)=d1(1:18)*r1(2)+d2(1:18)*r2(2)+d3(1:18)*r3(2)
       a(1:18,3)=d1(1:18)*r1(3)+d2(1:18)*r2(3)+d3(1:18)*r3(3)
     end subroutine calc_avec_SHG

     subroutine check_r_vector(r1,r2,r3,k1,k2,k3,invtetra)
       implicit none
! This subroutine checks r vectors
! see G. Lehmann and M. Taut, phys. stat. sol.(b) vol.54, 469 (1972)
! checks condition ri_dot_kj=1(i=j case),
!                           =0(i/=j case) (Eq.(3.3) of the paper)
! sets invtetra  invtetra= 1 (ri_dot_ki=1 case)
!                invtetra=-1 (rt_dot_ki=-1 case)
!
! T. Hamada(Univ. Tokyo) Nov. 27, 2003
       integer,intent(out)               :: invtetra
       integer                           :: i
       real(DP),intent(in), dimension(3) :: r1, r2, r3, k1, k2, k3
       real(DP)                          :: eps, r1_dot_k1, r1_dot_k2, r1_dot_k3, r2_dot_k2, r2_dot_k3, r3_dot_k3
       eps=10.0d0**(-7)
! check ri_dot_ki
! ri_dot_ki must be 1.0d0
       r1_dot_k1=0.0d0
       r2_dot_k2=0.0d0
       r3_dot_k3=0.0d0
       do i=1,3
          r1_dot_k1=r1_dot_k1+r1(i)*k1(i)
          r2_dot_k2=r2_dot_k2+r2(i)*k2(i)
          r3_dot_k3=r3_dot_k3+r3(i)*k3(i)
       end do

! set invtetra
       invtetra=1
       if(r1_dot_k1<0.0d0.and.r2_dot_k2<0.0d0.and.r3_dot_k3<0.0d0) then
          invtetra=-1
       end if
     end subroutine check_r_vector

     subroutine calcf_b1(e,e0,e1,e2,e3,v,f_b)
       implicit none
! i0 term calculation
! (e.ge.e0.and.e.lt.e1) case
       real(DP),intent(in)  :: e,e0,e1,e2,e3,v
       real(DP),intent(out) :: f_b
       f_b=(e-e0)**2/((e1-e0)*(e2-e0)*(e3-e0))
       f_b=v*f_b*0.5d0
     end subroutine calcf_b1

     subroutine calcf_b2(e,e0,e1,e2,e3,v,f_b)
       implicit none
! i0 term calculation
! (e.ge.e1.and.e.lt.e2)
       real(DP),intent(in)  :: e,e0,e1,e2,e3,v
       real(DP),intent(out) :: f_b
       f_b=(e-e0)**2/((e1-e0)*(e2-e0)*(e3-e0)) &
      &-(e-e1)**2/((e1-e0)*(e2-e1)*(e3-e1))
       f_b=v*f_b*0.5d0
     end subroutine calcf_b2

     subroutine calcf_b3(e,e0,e1,e2,e3,v,f_b)
       implicit none
! i0 term calculation
! (e.ge.e2.and.e.lt.e3)
       real(DP),intent(in)  :: e,e0,e1,e2,e3,v
       real(DP),intent(out) :: f_b
       f_b=(e-e3)**2/((e3-e0)*(e3-e1)*(e3-e2))
       f_b=v*f_b*0.5d0
     end subroutine calcf_b3

     subroutine svec1(k1,k2,k3,e,e0,e1,e2,e3,s)
       implicit none
! s vector calculation
! (e.ge.e0.and.e.lt.e1) case
       real(DP),intent(in)               :: e, e0, e1, e2, e3
       real(DP),intent(in), dimension(3) :: k1, k2, k3
       real(DP),intent(out),dimension(3) :: s
       real(DP),dimension(3)             :: k0, sum
! clean arrays
       k0=0.0d0
       s=0.0d0
       sum=0.0d0
       sum = (k1-k0)/(e1-e0) + (k2-k0)/(e2-e0) + (k3-k0)/(e3-e0)
       s = k0 + sum*(e-e0)/3.0d0
     end subroutine svec1

     subroutine svec2(k1,k2,k3,e,e0,e1,e2,e3,s)
       implicit none
! s vector calculation
! (e.ge.e1.and.e.lt.e2)
       real(DP),intent(in)               :: e, e0, e1, e2, e3
       real(DP),intent(in), dimension(3) :: k1, k2, k3
       real(DP),intent(out),dimension(3) :: s
       real(DP),dimension(3)             :: k0,s0,s1,s3, sum
       real(DP)                          :: f0,f1
       integer                           :: i
! clean arrays
       k0=0.0d0
       s=0.0d0 ; s0=0.0d0 ; s1 =0.0d0 ; s3 =0.0d0
       sum =0.0d0
       sum = (k1-k0)/(e1-e0) + (k2-k0)/(e2-e0) + (k3-k0)/(e3-e0)
       s0 = k0 +sum*(e1-e0)/3.0d0
       s0 = s0*(e-e2)/(e1-e2)
       sum=0.0d0
       sum = (k0-k3)/(e0-e3) + (k1-k3)/(e1-e3) + (k2-k3)/(e2-e3)
       s3 = k3 + sum*(e2-e3)/3.0d0
       s3 = s3*(e-e1)/(e2-e1)
       s = s0 + s3
     end subroutine svec2

     subroutine svec3(k1,k2,k3,e,e0,e1,e2,e3,s)
       implicit none
! s vector calculation
! (e.ge.e2.and.e.lt.e3) case
       real(DP),intent(in)               :: e, e0, e1, e2, e3
       real(DP),intent(in), dimension(3) :: k1, k2, k3
       real(DP),intent(out),dimension(3) :: s
       real(DP),dimension(3)             :: k0,sum
! clean arrays
       k0=0.0d0
       s=0.0d0
       sum=0.0d0
       sum=(k0-k3)/(e0-e3)+(k1-k3)/(e1-e3) + (k2-k3)/(e2-e3)
       s = k3 + sum*(e-e3)/3.0d0
     end subroutine svec3

     subroutine calc_eb0_at_svec
       implicit none
       integer                    :: i
       real(kind=DP)              :: b0_dot_s
       real(kind=DP),dimension(3) :: b0
       b0=(eb0(2)-eb0(1))*r1+(eb0(3)-eb0(1))*r2+(eb0(4)-eb0(1))*r3
       b0_dot_s = b0(1)*s(1) + b0(2)*s(2) + b0(3)*s(3)
       eb0_at_svec  = eb0(1) + b0_dot_s
     end subroutine calc_eb0_at_svec
 end subroutine nstrans_SHG
 
 subroutine calc_SHG_energy_factor_omit(e1,e2,efact,iterm)
    implicit none
!
!   calculate energy factor of SHG moment by omittion
!
    real(kind=DP), intent(in)  :: e1, e2
    real(kind=DP), intent(out) :: efact
    real(kind=DP)              :: edelta, edelta0
    integer, intent(in)        :: iterm
    if(iterm==1) then
       efact = 1.0d0/((e1+e2)*e1**3)
       return
    end if
    edelta=(2.0d0*e1-e2)
    if(iterm==2) then
       if(dabs(edelta)<dres_cut_off) then
          efact =0.0d0
       else
          efact=-1.0d0/(edelta*e1**3)
       end if
    else if(iterm==3) then
       if(dabs(edelta)<dres_cut_off) then
          efact=0.0d0
       else
          efact=1.0d0/(edelta*e2**3)
       end if
    end if
 end subroutine calc_SHG_energy_factor_omit

 subroutine calc_SHG_energy_factor_damp(e1,e2,efact,iterm)
    implicit none
!
!   calcaulte energy factor of SHG moment by damping 
!
!   damping factor is given by dres_cut_off
    real(kind=DP), intent(in)  :: e1, e2
    real(kind=DP), intent(out) :: efact
    real(kind=DP)              :: edelta, edelta0
    integer, intent(in)        :: iterm
    if(iterm==1) then
       efact = 1.0d0/((e1+e2)*e1**3)
       return
    end if
    edelta=(2.0d0*e1-e2)
    if(edelta<0.0d0) edelta=edelta-dres_cut_off
    if(edelta>=0.0d0) edelta =edelta+dres_cut_off
    if(iterm==2) then
       efact=-1.0d0/(edelta*e1**3)
    else if(iterm==3) then
       efact=1.0d0/(edelta*e2**3)
    end if
 end subroutine calc_SHG_energy_factor_damp

 subroutine set_impbkta(ik0,ik2,i1,i2,i3,impbkt0)
    implicit none
    integer, intent(in)      :: ik0, ik2, i1, i2, i3
    real(DP), dimension(:)   :: impbkt0
    real(DP), dimension(3,2) :: p1, p2, p3
    call set_ptrans(ik2,i1,i2,i3,p1,p2,p3)
    call set_p0trans(ik0,p1,p2,p3)
    call calc_impbkt(p1,p2,p3,impbkt0)
 end subroutine set_impbkta

 subroutine set_p0trans(k0,p1,p2,p3)
    implicit none
    integer, intent(in)                     :: k0
    real(DP), intent(inout), dimension(3,2) :: p1, p2, p3
    integer                                 :: iop
    real(DP), dimension(3,3)                :: u
!    iop = vk0_op(k0)
!    u(1:3,1:3)=op(1:3,1:3,iop)
!    call rotate_ptrans(u,p1)
!    call rotate_ptrans(u,p2)
!    call rotate_ptrans(u,p3)
 end subroutine set_p0trans

 subroutine rotate_ptrans(wk,p)
    implicit none
    real(DP), intent(in), dimension(3,3)    :: wk
    real(DP), intent(inout), dimension(3,2) :: p
    real(DP), dimension(3) :: rp, ip, rp0, ip0
    rp(1:3) = p(1:3,1)
    ip(1:3) = p(1:3,2)
    call rotate_ptrans_core
    p(1:3,1) = rp0(1:3)
    p(1:3,2) = ip0(1:3)
    contains
     subroutine rotate_ptrans_core
       implicit none
! real part rotation
       rp0(1) = wk(1,1)*rp(1)+wk(1,2)*rp(2)+wk(1,3)*rp(3)
       rp0(2) = wk(2,1)*rp(1)+wk(2,2)*rp(2)+wk(2,3)*rp(3)
       rp0(3) = wk(3,1)*rp(1)+wk(3,2)*rp(2)+wk(3,3)*rp(3)
! imaginary part rotation
       ip0(1) = wk(1,1)*ip(1)+wk(1,2)*ip(2)+wk(1,3)*ip(3)
       ip0(2) = wk(2,1)*ip(1)+wk(2,2)*ip(2)+wk(2,3)*ip(3)
       ip0(3) = wk(3,1)*ip(1)+wk(3,2)*ip(2)+wk(3,3)*ip(3)
     end subroutine rotate_ptrans_core
 end subroutine rotate_ptrans

 subroutine set_ptrans(ik,i1,i2,i3,p1,p2,p3)
    implicit none
!
! p1=ptrm(ik,i1,i3); p2=ptrm(ik,i2,i1); p3=ptrm(ik,i3,i2)
!
    integer, intent(in)                        :: ik
    integer, intent(in)                        :: i1, i2, i3
    real(kind=DP), intent(out), dimension(3,2) :: p1, p2, p3

    p1(1:3,1:2) = ptrm(ik,i1,i3,1:3,1:2)
    p2(1:3,1:2) = ptrm(ik,i2,i1,1:3,1:2)
    p3(1:3,1:2) = ptrm(ik,i3,i2,1:3,1:2)
 end subroutine set_ptrans

 subroutine calc_impbkt(p1,p2,p3,impbkt)
    implicit none
!
!   calcualte Im[p1{p2,p3}]
!
    real(kind=DP), intent(in), dimension(3,2) :: p1, p2, p3
    real(kind=DP), intent(out),dimension(18)  :: impbkt
    real(kind=DP), dimension(6,2)             :: bkt
    real(kind=DP), dimension(3)               :: rp1, ip1
    real(kind=DP)                             :: rp, ip, rbkt, ibkt, rpbkt, ipbkt
    integer                                   :: index, ic, ibk
    rp1(1:3) = p1(1:3,1)
    ip1(1:3) = p1(1:3,2)
    call calc_bkt(p2,p3,bkt)
    do index = 1, 18
       call get_c_and_bkt_index(index,ic,ibk)
       rp = rp1(ic)
       ip = ip1(ic)
       rbkt = bkt(ibk,1)
       ibkt = bkt(ibk,2)
       call cmpprod(rp,ip,rbkt,ibkt,rpbkt,ipbkt)
       impbkt(index) = ipbkt
    end do
 end subroutine calc_impbkt

 subroutine calc_bkt(p1,p2,bkt)
    implicit none
!
!   calculate branket {p1,p2}(ij) = (p1(i)*p2(j)+p1(j)+p2(i))/2
!
!   i, j -> cartesian index
!   Ref. E. Ghahramani, D. J. Moss, and J. E. Sipe, Phys. Rev. B vol.43, 8990 (1991)
!
    real(kind=DP), intent(in),  dimension(3,2) :: p1, p2
    real(kind=DP), intent(out), dimension(6,2) :: bkt
    real(kind=DP), dimension(3)                :: rp1, ip1, rp2, ip2
    real(kind=DP)                              :: rbkt, ibkt, a, b, c, d, e, f
    integer                                    :: index, ic1, ic2
    bkt =0.0d0
    rp1(1:3) = p1(1:3,1)
    ip1(1:3) = p1(1:3,2)
    rp2(1:3) = p2(1:3,1)
    ip2(1:3) = p2(1:3,2)
    do index = 1, 6
! find cartesian index ic1 and ic2
       call get_cindex(index,ic1,ic2)
       a = rp1(ic1)
       b = ip1(ic1)
       c = rp2(ic2)
       d = ip2(ic2)
       call cmpprod(a,b,c,d,e,f)
       rbkt = e
       ibkt = f
       a = rp1(ic2)
       b = ip1(ic2)
       c = rp2(ic1)
       d = ip2(ic1)
       call cmpprod(a,b,c,d,e,f)
       rbkt = rbkt + e
       ibkt = ibkt + f
       bkt(index,1) = 0.5d0*rbkt
       bkt(index,2) = 0.5d0*ibkt
    end do
 end subroutine calc_bkt

 subroutine get_c_and_bkt_index(index,ic,ibkt)
    implicit none
    integer, intent(in)  :: index
    integer, intent(out) :: ic, ibkt
    integer, dimension(18) :: icd, ibktd
    data icd   /1,1,1,1,1,1,2,2,2,2,2,2,3,3,3,3,3,3/
    data ibktd /1,2,3,4,5,6,1,2,3,4,5,6,1,2,3,4,5,6/
    ic = icd(index)
    ibkt = ibktd(index)
!    if(index==1)       then  ! xxx term
!       ic = 1
!       ibkt = 1
!    else if(index==2)  then  ! xxy=xyx term
!       ic = 1
!       ibkt = 2
!    else if(index==3)  then  ! xxz=xzx term
!       ic = 1
!       ibkt = 3
!    else if(index==4)  then  ! xyy term
!       ic = 1
!       ibkt = 4
!    else if(index==5)  then  ! xyz=xzy term
!       ic = 1
!       ibkt = 5
!    else if(index==6)  then  ! xzz term
!       ic = 1
!       ibkt = 6
!    else if(index==7)  then  ! yxx term
!       ic = 2
!       ibkt = 1
!    else if(index==8)  then  ! yxy=yyx term
!       ic = 2
!       ibkt = 2
!    else if(index==9)  then  ! yxz=yzx term
!       ic = 2
!       ibkt = 3
!    else if(index==10) then  ! yyy term
!       ic = 2
!       ibkt = 4
!    else if(index==11) then  ! yyz=yzy term
!       ic = 2
!       ibkt = 5
!    else if(index==12) then  ! yzz term
!       ic = 2
!       ibkt = 6
!    else if(index==13) then  ! zxx term
!       ic = 3
!       ibkt = 1
!    else if(index==14) then  ! zxy=zyx term
!       ic = 3
!       ibkt = 2
!    else if(index==15) then  ! zxz=zzx term
!       ic = 3
!       ibkt = 3
!    else if(index==16) then  ! zyy term
!       ic = 3
!       ibkt = 4
!    else if(index==17) then  ! zyz=zzy term
!       ic = 3
!       ibkt = 5
!    else if(index==18) then  ! zzz term
!       ic = 3
!       ibkt = 6
!    end if
 end subroutine get_c_and_bkt_index

 subroutine get_cindex(index,i,j)
    implicit none
    integer, intent(in)   :: index
    integer, intent(out)  :: i, j
    integer, dimension(6) :: id, jd
    data id /1,1,1,2,2,3/
    data jd /1,2,3,2,3,3/
    i = id(index)
    j = jd(index)
!    if(index==1)      then   ! xx term
!       i = 1
!       j = 1
!    else if(index==2) then   ! xy term
!       i = 1
!       j = 2
!    else if(index==3) then   ! xz term
!       i = 1
!       j = 3
!    else if(index==4) then   ! yy term
!       i = 2
!       j = 2
!    else if(index==5) then   ! yz term
!       i = 2
!       j = 3
!    else if(index==6) then   ! zz term
!       i = 3
!       j = 3
!    end if
 end subroutine get_cindex

 subroutine get_cind_of_chi2(i,j,k,index)
    implicit none
    integer, intent(in)  :: i,j,k
    integer, intent(out) :: index
    integer, dimension(3,3,3) :: id
    data id(1,1,1),id(1,1,2),id(1,2,1),id(1,1,3),id(1,3,1),id(1,2,2),id(1,2,3),id(1,3,2),id(1,3,3) &
   &    ,id(2,1,1),id(2,1,2),id(2,2,1),id(2,1,3),id(2,3,1),id(2,2,2),id(2,2,3),id(2,3,2),id(2,3,3) &
   &    ,id(3,1,1),id(3,1,2),id(3,2,1),id(3,1,3),id(3,3,1),id(3,2,2),id(3,2,3),id(3,3,2),id(3,3,3) &
   &    / 1, 2, 2, 3, 3, 4, 5, 5, 6 &
   &    , 7, 8, 8, 9, 9,10,11,11,12 &
   &    ,13,14,14,15,15,16,17,17,18 /

    index = id(i,j,k)
!   get cartesian index of chi2
!                                             no.  term   index
!    if(i==1.and.j==1.and.k==1) then         !  1   xxx      1
!       index = 1
!    else if(i==1.and.j==1.and.k==2) then    !  2   xxy      2
!       index = 2
!    else if(i==1.and.j==2.and.k==1) then    !  3   xyx      2
!       index = 2
!    else if(i==1.and.j==1.and.k==3) then    !  4   xxz      3
!       index = 3
!    else if(i==1.and.j==3.and.k==1) then    !  5   xzx      3
!       index = 3
!    else if(i==1.and.j==2.and.k==2) then    !  6   xyy      4
!       index = 4
!    else if(i==1.and.j==2.and.k==3) then    !  7   xyz      5
!       index = 5
!    else if(i==1.and.j==3.and.k==2) then    !  8   xzy      5
!       index = 5
!    else if(i==1.and.j==3.and.k==3) then    !  9   xzz      6
!       index = 6
!    else if(i==2.and.j==1.and.k==1) then    ! 10   yxx      7
!       index = 7
!    else if(i==2.and.j==1.and.k==2) then    ! 11   yxy      8
!       index = 8
!    else if(i==2.and.j==2.and.k==1) then    ! 12   yyx      8
!       index = 8
!    else if(i==2.and.j==1.and.k==3) then    ! 13   yxz      9
!       index = 9
!    else if(i==2.and.j==3.and.k==1) then    ! 14   yzx      9
!       index = 9
!    else if(i==2.and.j==2.and.k==2) then    ! 15   yyy     10
!       index = 10
!    else if(i==2.and.j==2.and.k==3) then    ! 16   yyz     11
!       index = 11
!    else if(i==2.and.j==3.and.k==2) then    ! 17   yzy     11
!       index = 11
!    else if(i==2.and.j==3.and.k==3) then    ! 18   yzz     12
!       index = 12
!    else if(i==3.and.j==1.and.k==1) then    ! 19   zxx     13
!       index = 13
!    else if(i==3.and.j==1.and.k==2) then    ! 20   zxy     14
!       index = 14
!    else if(i==3.and.j==2.and.k==1) then    ! 21   zyx     14
!       index = 14
!    else if(i==3.and.j==1.and.k==3) then    ! 22   zxz     15
!       index = 15
!    else if(i==3.and.j==3.and.k==1) then    ! 23   zzx     15
!       index = 15
!    else if(i==3.and.j==2.and.k==2) then    ! 24   zyy     16
!       index = 16
!    else if(i==3.and.j==2.and.k==3) then    ! 25   zyz     17
!       index = 17
!    else if(i==3.and.j==3.and.k==2) then    ! 26   zzy     17
!       index = 17
!    else if(i==3.and.j==3.and.k==3) then    ! 27   zzz     18
!       index = 18
!    end if
 end subroutine get_cind_of_chi2

 subroutine virt_elec_process_THG
    implicit none
    if(way_BZintegral == L_TETRAHEDRON) then
       write(nfout,'(1x,"!* linear tetrahedron method is not available for THG calculation")')
       write(nfout,'(1x,"!* parabolic smearing method is used instead")')
       way_BZintegral = PARABOLIC_B
       call set_default_width
    end if
    call smearing_THG_ve(nfout,nstep)
    contains
     subroutine set_default_width
       width=0.01837451d0
       if(printable) &
       & write(nfout,'(1x,"!* default smearing width of 0.01837451 Hartree (=0.5eV) is set")')
     end subroutine set_default_width
 end subroutine virt_elec_process_THG

 subroutine virt_hole_process_THG
    implicit none
    if(way_BZintegral == L_TETRAHEDRON.and.virt_ex_type /= ALL_TYPE) then
       write(nfout,'(1x,"!* linear tetrahedron method is not available for THG calculation")')
       write(nfout,'(1x,"!* parabolic smearing method is used instead")')
    end if
    call smearing_THG_vh(nfout,nstep)
    contains
     subroutine set_default_width
       width=0.01837451d0
       if(printable) &
       & write(nfout,'(1x,"!* default smearing width of 0.01837451 Hartree (=0.5eV) is set")')
     end subroutine set_default_width
 end subroutine virt_hole_process_THG

 subroutine three_state_process_THG
    implicit none
    if(way_BZintegral == L_TETRAHEDRON.and.virt_ex_type /= ALL_TYPE) then
       write(nfout,'(1x,"!* linear tetrahedron method is not available for THG calculation")')
       write(nfout,'(1x,"!* parabolic smearing method is used instead")')
    end if
    call smearing_three_state_THG(nfout,nstep)
    contains
     subroutine set_default_width
       width=0.01837451d0
       if(printable) &
       & write(nfout,'(1x,"!* default smearing width of 0.01837451 Hartree (=0.5eV) is set")')
     end subroutine set_default_width
 end subroutine three_state_process_THG

 subroutine smearing_THG_ve(nfout,nstep)
    implicit none
!
!   smearing calculation scheme for THG susceptibility calculation (virtual electron term)
!
!   ref. D. J. Moss, E. Ghahramani, J. E. Sipe, and H. M. van Driel
!   Phys. Rev. B. vol. 41, pp 1542 (1990)

    integer, intent(in) :: nfout, nstep
    integer             :: ispin, istep, ipes

    if(printable) then
       if(way_BZintegral==PARABOLIC_B) &
      &   write(nfout,'(1x, " ---------- Parabolic broadening Brillouin zone integration ----------")')
       if(way_BZintegral==GAUSSIAN_B) &
      &   write(nfout,'(1x, " ---------- Gaussian broadening Brillouin zone integration ----------")')
       write(nfout,'(1x, "  width = ",f10.5)') width
       if(dres_method == OMIT) then
          write(nfout,'(1x, "  contributions from double resonance transitions are omitted")')
          write(nfout,'(1x, "  cut-off for omittion = ",f10.5," Hartree")') dres_cut_off
       endif
       if(dres_method == DAMPING) then
          write(nfout,'(1x, "  contributions from double resonance transitions are damped")')
          write(nfout,'(1x, "  damping factor = ",f10.5," Hartree")') dres_cut_off
       endif
       if(way_BZintegral /= L_TETRAHEDRON.and.smearing_fact == RESONANCE) &
     & write(nfout,'(1x, "  smearing factor type = resonance")')
       if(way_BZintegral /=L_TETRAHEDRON.and.smearing_fact == OFF_RESONANCE) &
     & write(nfout,'(1x, "  smearing factor type = off_resonance")')
       write(nfout,'(1x,"*excitation  = electron ")')
       if(nlo_band == ALL_BAND) write(nfout,'(1x,"  transition = inter + intraband ")')
       if(nlo_band == INTER_BAND) write(nfout,'(1x,"  transition = interband ")')
       if(nlo_band == INTRA_BAND) write(nfout,'(1x,"  transition = intraband ")')
       if(nlo_term == ALL_TERM.or.nlo_term == OMEGA_TERM) write(nfout,'(1x,"  integration of omega THG moment")')
       if(nlo_term == ALL_TERM.or.nlo_term == OMEGA2_TERM) write(nfout,'(1x,"  integration of omega2 THG moment")')
       if(nlo_term == ALL_TERM.or.nlo_term == OMEGA3_TERM) write(nfout,'(1x,"  integration of omega3 THG moment")')
    end if

!   gaussian/parabolic smearing
    do ispin = 1, nspin
       if(spin==MAJOR.and.ispin/=major_spin) cycle
       if(spin==MINOR.and.ispin/=minor_spin) cycle
       call smearing_THG_ve_core
       imchi3(1:nstep,1:30) = imchi3(1:nstep,1:30) - PAI/3.0d0*chi3int(1:nstep,1:30)
       if(printable) then
          if(spin==MAJOR) write(nfout,'(1x," ispin = major spin")')
          if(spin==MINOR) write(nfout,'(1x," ispin = minor spin")')
          write(nfout,'(2x,"ispin = ",i3)') ispin
       end if
    end do

  contains
   subroutine smearing_THG_ve_core
     implicit none
!
!   calculate virtual electron term of Im[chi3]
!
!   D. J. Moss, E. Ghahramani, J. E. Sipe, and H. M. van Driel, Phys. Rev. B, vol 41, pp 1542 (1990).
!   Eq.(7)
!   omega   : photon energy
!   p       : tansition moment(P)
!   rep4bkt : Re[p1*(p2,p3,p4)]   (p2,p3,p4) -> symmetrized product of p2*p3*p4
!   Eab     : energy difference between band ea and eb
!
     integer                        :: ik, ik2, ieig, jeig, keig, leig, ideg1, ideg2, iintra, iterm
     real(kind=DP)                  :: wspin = 1.0d0, weight, ei, ej, ek, el, eli, eji, eki
     real(kind=DP)                  :: eji2, eli2, eki2, eli3, eki3
     real(kind=DP)                  :: efact, omega, omega2, omega3, c2, sigma
     real(kind=DP), dimension(3,2)  :: p1, p2, p3, p4
     real(kind=DP), dimension(30)   :: rep4bkt, pefact

     chi3int = 0.0d0

     do ik = ispin, kv3_ek-nspin+ispin, nspin
        ik2 = (ik-1)/nspin +1
        if(map_k_eps(ik2)/=mype) cycle
        do ieig = 1, neg
           if(band_type(ieig,ispin)/= FILLED_BAND) cycle
           ei = eb_ek(ik,ieig)
           do jeig = 1, neg
              if(band_type(jeig,ispin)/=UNFILLED_BAND) cycle
              ej = eb_ek(ik,jeig)
              do keig = 1, neg
                 if(band_type(keig,ispin)/=UNFILLED_BAND) cycle
                 ek = eb_ek(ik,keig)
                 do leig = 1, neg
                    if(band_type(leig,ispin)/=UNFILLED_BAND) cycle
                    el = eb_ek(ik,leig)
! check intraband excitation
                    if(jeig==keig.or.keig==leig) then
                       iintra = 1
                    else
                       iintra = 0
                    end if
                    if(nlo_band == INTER_BAND.and.iintra /= 0) cycle
                    if(nlo_band == INTRA_BAND.and.iintra == 0) cycle
! set energy difference
                    eli = el - ei + scissor
                    eji = ej - ei + scissor
                    eki = ek - ei + scissor
! set transition moment factor
                    call set_ptrans2(ik,jeig,keig,leig,ieig,p1,p2,p3,p4)               ! set P transition moment
                    call calc_rep4bkt(p1,p2,p3,p4,rep4bkt)
! omega3 term
                    if(nlo_term==ALL_TERM.or.nlo_term==OMEGA3_TERM) then
                       eki3 = 3.0d0*eki ; eji2 = 2.0d0*eji ; eli3 = 3.0d0*eli
                       iterm = 1
                       if(smearing_fact == OFF_RESONANCE) then
                          if(dres_method == OMIT) call calc_THG_energy_factor_eh_omit(eji,eki,eli,efact,iterm)
                          if(dres_method == DAMPING) call calc_THG_energy_factor_eh_damp(eji,eki,eli,efact,iterm)
                          pefact = rep4bkt*efact
                       end if
                       do istep = 1, nstep
                          omega3 = 3.0d0*e(istep)
                          if(smearing_fact == RESONANCE) then
                             if(omega3==0.0d0) omega3=1.0d-4
                             if(dres_method == OMIT) call calc_THG_energy_factor_eh_omit(omega3,eki,eli,efact,iterm)
                             if(dres_method == DAMPING) call calc_THG_energy_factor_eh_damp(omega3,eki,eli,efact,iterm)
                             pefact = rep4bkt*efact
                          end if
                          if(way_BZintegral == PARABOLIC_B) call width2(omega3,eji,width,c2,weight) ! b_Fermi
                          if(way_BZintegral == GAUSSIAN_B)  call gaussian_smearing_core(omega3,eji,width,c2,sigma)
                          chi3int(istep,1:30) = chi3int(istep,1:30) + c2*wspin*pefact*qwgt_ek(ik)
                       end do
                    end if
! omega2 term
                    if(nlo_term==ALL_TERM.or.nlo_term==OMEGA2_TERM) then
                       eki3 = 3.0d0*eki; eji2 = 2.0d0*eji ; eli2 = 2.0d0*eli
                       iterm = 2
                       if(smearing_fact == OFF_RESONANCE) then
                          if(dres_method == OMIT) call calc_THG_energy_factor_eh_omit(eji,eki,eli,efact,iterm)
                          if(dres_method == DAMPING) call calc_THG_energy_factor_eh_damp(eji,eki,eli,efact,iterm)
                          pefact = rep4bkt*efact
                       end if
                       do istep = 1, nstep
                          omega2 = 2.0d0*e(istep)
                          if(smearing_fact == RESONANCE) then
                             if(omega2==0.0d0) omega2=1.0d-4
                             if(dres_method == OMIT) call calc_THG_energy_factor_eh_omit(eji,omega2,eli,efact,iterm)
                             if(dres_method == DAMPING) call calc_THG_energy_factor_eh_damp(eji,omega2,eli,efact,iterm)
                             pefact = rep4bkt*efact
                          end if
                          if(way_BZintegral == PARABOLIC_B) call width2(omega2,eki,width,c2,weight) ! b_Fermi
                          if(way_BZintegral == GAUSSIAN_B)  call gaussian_smearing_core(omega2,eki,width,c2,sigma)
                          chi3int(istep,1:30) = chi3int(istep,1:30) + c2*wspin*pefact*qwgt_ek(ik)
                       end do
                    end if
! omega term
                    if(nlo_term==ALL_TERM.or.nlo_term==OMEGA_TERM) then
                       eli2 = 2.0d0*eli ; eli3 = 3.0d0*eli; eki2 =2.0d0*eki
                       iterm = 3
                       if(smearing_fact == OFF_RESONANCE) then
                          if(dres_method == OMIT) call calc_THG_energy_factor_eh_omit(eji,eki,eli,efact,iterm)
                          if(dres_method == DAMPING) call calc_THG_energy_factor_eh_damp(eji,eki,eli,efact,iterm)
                          pefact = rep4bkt*efact
                       end if
                       do istep = 1, nstep
                          omega = e(istep)
                          if(smearing_fact == RESONANCE) then
                             if(omega==0.0d0) omega=1.0d-4
                             if(dres_method == OMIT) call calc_THG_energy_factor_eh_omit(eji,eki,omega,efact,iterm)
                             if(dres_method == DAMPING) call calc_THG_energy_factor_eh_damp(eji,eki,omega,efact,iterm)
                             pefact = rep4bkt*efact
                          end if
                          if(way_BZintegral == PARABOLIC_B) call width2(omega,eli,width,c2,weight) ! b_Fermi
                          if(way_BZintegral == GAUSSIAN_B)  call gaussian_smearing_core(omega,eli,width,c2,sigma)
                          chi3int(istep,1:30) = chi3int(istep,1:30) + c2*wspin*pefact*qwgt_ek(ik)
                       end do
                    end if
                 end do
              end do
           end do
        end do
     end do
   end subroutine smearing_THG_ve_core
 end subroutine smearing_THG_ve

 subroutine smearing_THG_vh(nfout,nstep)
    implicit none
!
!   smearing calculation scheme for THG susceptibility calculation (virtual hole term)
!
    integer, intent(in) :: nfout, nstep
    integer             :: ispin, istep, ipes

    if(printable) then
       if(virt_ex_type /= ALL_TYPE) then
          if(way_BZintegral==PARABOLIC_B) &
         &   write(nfout,'(1x, " ---------- Parabolic broadening Brillouin zone integration ----------")')
          if(way_BZintegral==GAUSSIAN_B) &
         &   write(nfout,'(1x, " ---------- Gaussian broadening Brillouin zone integration ----------")')
          write(nfout,'(1x, "  width = ",f10.5)') width
          if(dres_method == OMIT) then
             write(nfout,'(1x, "  contributions from double resonance transitions are omitted")')
             write(nfout,'(1x, "  cut-off for omittion = ",f10.5," Hartree")') dres_cut_off
          endif
          if(dres_method == DAMPING) then
             write(nfout,'(1x, "  contributions from double resonance transitions are damped")')
             write(nfout,'(1x, "  damping factor = ",f10.5," Hartree")') dres_cut_off
          endif
          if(way_BZintegral /= L_TETRAHEDRON.and.smearing_fact == RESONANCE) &
        & write(nfout,'(1x, "  smearing factor type = resonance")')
          if(way_BZintegral /=L_TETRAHEDRON.and.smearing_fact == OFF_RESONANCE) &
        & write(nfout,'(1x, "  smearing factor type = off_resonance")')
          write(nfout,'(1x, "*excitation = hole ")')
          if(nlo_band == ALL_BAND) write(nfout,'(1x,"  transition = inter + intraband")')
          if(nlo_band == INTER_BAND) write(nfout,'(1x,"  transition = interband")')
          if(nlo_band == INTRA_BAND) write(nfout,'(1x,"  transition = intraband")')
       else
          write(nfout,'(1x, "*excitation = hole")')
          if(nlo_band == ALL_BAND) write(nfout,'(1x,"  transition = inter + intraband")')
          if(nlo_band == INTER_BAND) write(nfout,'(1x,"  transition = interband")')
          if(nlo_band == INTRA_BAND) write(nfout,'(1x,"  transition = intraband")')
       end if
       if(nlo_term == ALL_TERM.or.nlo_term == OMEGA_TERM) write(nfout,'(1x,"  integration of omega THG moment")')
       if(nlo_term == ALL_TERM.or.nlo_term == OMEGA2_TERM) write(nfout,'(1x,"  integration of omega2 THG moment")')
       if(nlo_term == ALL_TERM.or.nlo_term == OMEGA3_TERM) write(nfout,'(1x,"  integration of omega3 THG moment")')
    end if

!   gaussian/parabolic smearing
    do ispin = 1, nspin
       if(spin==MAJOR.and.ispin/=major_spin) cycle
       if(spin==MINOR.and.ispin/=minor_spin) cycle
       call smearing_THG_vh_termA                                    ! -(contained here)
       imchi3(1:nstep,1:30) = imchi3(1:nstep,1:30) - PAI/3.0d0*chi3int(1:nstep,1:30)
       call smearing_THG_vh_termB                                    ! -(contained here)
       imchi3(1:nstep,1:30) = imchi3(1:nstep,1:30) + PAI/3.0d0*chi3int(1:nstep,1:30)
       call smearing_THG_vh_termC                                    ! -(contained here)
       imchi3(1:nstep,1:30) = imchi3(1:nstep,1:30) + PAI/3.0d0*chi3int(1:nstep,1:30)
       if(printable) then
          if(spin==MAJOR) write(nfout,'(1x," ispin = major spin")')
          if(spin==MINOR) write(nfout,'(1x," ispin = minor spin")')
          write(nfout,'(2x,"ispin = ",i3)') ispin
       end if
    end do

  contains
   subroutine smearing_THG_vh_termA
     implicit none
!
!   calculate virtual hole term of Im[chi3] {<v|p|c><v|p|v><v|p|v><c|p|v>} term
!
!   D. J. Moss, E. Ghahramani, J. E. Sipe, and H. M. van Driel, Phys. Rev. B, vol 41, pp 1542 (1990).
!   Eq.(7)
!   omega : photon energy
!   p     : tansition moment(P)
!   rep4bkt : Re[p1*(p2,p3,p4)]   (p2,p3,p4) -> symmetrized product of p2*p3*p4
!   Eab   : energy difference between band ea and eb
!
     integer                        :: ik, ik2, ieig, jeig, keig, leig, iterm, iintra
     real(kind=DP)                  :: wspin = 1.0d0, weight, ei, ej, ek, el, ejk, eji, ejl
     real(kind=DP)                  :: efact, omega, omega2, omega3, c2, sigma
     real(kind=DP), dimension(3,2)  :: p1, p2, p3, p4
     real(kind=DP), dimension(30)   :: rep4bkt, pefact

     chi3int = 0.0d0

     do ik = ispin, kv3_ek-nspin+ispin, nspin
        ik2 = (ik-1)/nspin +1
        if(map_k_eps(ik2)/=mype) cycle
        do ieig = 1, neg
           if(band_type(ieig,ispin)/= FILLED_BAND) cycle
           ei = eb_ek(ik,ieig)
           do jeig = 1, neg
              if(band_type(jeig,ispin)/=UNFILLED_BAND) cycle
              ej = eb_ek(ik,jeig)
              do keig = 1, neg
                 if(band_type(keig,ispin)/=FILLED_BAND) cycle
                 ek = eb_ek(ik,keig)
                 do leig = 1, neg
                    if(band_type(leig,ispin)/=FILLED_BAND) cycle
                    el = eb_ek(ik,leig)
                    if(leig ==ieig.or.leig == keig) then
                        iintra = 1
                    else
                        iintra = 0
                    end if
                    if(nlo_band == INTER_BAND.and.iintra /= 0) cycle
                    if(nlo_band == INTRA_BAND.and.iintra == 0) cycle
                    if(iintra==1) then
                       if(leig==ieig.and.leig==keig) cycle
                    end if
! set energy difference
                    ejk = ej - ek + scissor
                    eji = ej - ei + scissor
                    ejl = ej - el + scissor
! set transition moment factor
                    call set_ptrans2hA(ik,jeig,ieig,leig,keig,p1,p2,p3,p4)               ! set P transition moment
                    call calc_rep4bkt(p1,p2,p3,p4,rep4bkt)
! omega3 term
                    if(nlo_term==ALL_TERM.or.nlo_term==OMEGA3_TERM) then
                       iterm = 1
                       if(smearing_fact == OFF_RESONANCE) then
                          if(dres_method == OMIT) call calc_THG_energy_factor_eh_omit(eji,ejl,ejk,efact,iterm)
                          if(dres_method == DAMPING) call calc_THG_energy_factor_eh_damp(eji,ejl,ejk,efact,iterm)
                          pefact = rep4bkt*efact
                       end if
                       do istep = 1, nstep
                          omega3 = 3.0d0*e(istep)
                          if(smearing_fact == RESONANCE) then
                             if(omega3==0.0d0) omega3=1.0d-4
                             if(dres_method == OMIT) call calc_THG_energy_factor_eh_omit(omega3,ejl,ejk,efact,iterm)
                             if(dres_method == DAMPING) call calc_THG_energy_factor_eh_damp(omega3,ejl,ejk,efact,iterm)
                             pefact = rep4bkt*efact
                          end if
                          if(way_BZintegral == PARABOLIC_B) call width2(omega3,eji,width,c2,weight) ! b_Fermi
                          if(way_BZintegral == GAUSSIAN_B)  call gaussian_smearing_core(omega3,eji,width,c2,sigma)
                          chi3int(istep,1:30) = chi3int(istep,1:30) + c2*wspin*pefact*qwgt_ek(ik)
                       end do
                    end if
! omega2 term
                    if(nlo_term==ALL_TERM.or.nlo_term==OMEGA2_TERM) then
                       iterm = 2
                       if(smearing_fact == OFF_RESONANCE) then
                          if(dres_method == OMIT) call calc_THG_energy_factor_eh_omit(eji,ejl,ejk,efact,iterm)
                          if(dres_method == DAMPING) call calc_THG_energy_factor_eh_damp(eji,ejl,ejk,efact,iterm)
                          pefact = rep4bkt*efact
                       end if
                       do istep = 1, nstep
                          omega2 = 2.0d0*e(istep)
                          if(smearing_fact == RESONANCE) then
                             if(omega2==0.0d0) omega2=1.0d-4
                             if(dres_method == OMIT) call calc_THG_energy_factor_eh_omit(eji,omega2,ejk,efact,iterm)
                             if(dres_method == DAMPING) call calc_THG_energy_factor_eh_damp(eji,omega2,ejk,efact,iterm)
                             pefact = rep4bkt*efact
                          end if
                          if(way_BZintegral == PARABOLIC_B) call width2(omega2,ejl,width,c2,weight) ! b_Fermi
                          if(way_BZintegral == GAUSSIAN_B)  call gaussian_smearing_core(omega2,ejl,width,c2,sigma)
                          chi3int(istep,1:30) = chi3int(istep,1:30) + c2*wspin*pefact*qwgt_ek(ik)
                       end do
                    end if
! omega term
                    if(nlo_term==ALL_TERM.or.nlo_term==OMEGA_TERM) then
                       iterm = 3
                       if(smearing_fact == OFF_RESONANCE) then
                          if(dres_method == OMIT) call calc_THG_energy_factor_eh_omit(eji,ejl,ejk,efact,iterm)
                          if(dres_method == DAMPING) call calc_THG_energy_factor_eh_damp(eji,ejl,ejk,efact,iterm)
                          pefact = rep4bkt*efact
                       end if
                       do istep = 1, nstep
                          omega = e(istep)
                          if(smearing_fact == RESONANCE) then
                             if(omega==0.0d0) omega=1.0d-4
                             if(dres_method == OMIT) call calc_THG_energy_factor_eh_omit(eji,ejl,omega,efact,iterm)
                             if(dres_method == DAMPING) call calc_THG_energy_factor_eh_damp(eji,ejl,omega,efact,iterm)
                             pefact = rep4bkt*efact
                          end if
                          if(way_BZintegral == PARABOLIC_B) call width2(omega,ejk,width,c2,weight) ! b_Fermi
                          if(way_BZintegral == GAUSSIAN_B)  call gaussian_smearing_core(omega,ejk,width,c2,sigma)
                          chi3int(istep,1:30) = chi3int(istep,1:30) + c2*wspin*pefact*qwgt_ek(ik)
                      end do
                    end if
                 end do
              end do
           end do
        end do
     end do
   end subroutine smearing_THG_vh_termA

   subroutine smearing_THG_vh_termB
     implicit none
!
!   calculate virtual hole term  of Im[chi3] {<v|p|c><c|p|c><v|p|v><c|p|v>} term
!
!   D. J. Moss, E. Ghahramani, J. E. Sipe, and H. M. van Driel, Phys. Rev. B, vol 41, pp 1542 (1990).
!   Eq.(7)
!   omega : photon energy
!   p     : tansition moment(P)
!   rep4bkt : Re[p1*(p2,p3,p4)]   (p2,p3,p4) -> symmetrized product of p2*p3*p4
!   Eab   : energy difference between band ea and eb
!
     integer                        :: ik, ik2, ieig, jeig, keig, leig, iterm, iintra
     real(kind=DP)                  :: wspin = 1.0d0, weight, ei, ej, ek, el, ekl, eji, eki
     real(kind=DP)                  :: efact, omega, omega2, omega3, c2, sigma
     real(kind=DP), dimension(3,2)  :: p1, p2, p3, p4
     real(kind=DP), dimension(30)   :: rep4bkt, pefact

     chi3int = 0.0d0

     do ik = ispin, kv3_ek-nspin+ispin, nspin
        ik2 = (ik-1)/nspin +1
        if(map_k_eps(ik2)/=mype) cycle
        do ieig = 1, neg
           if(band_type(ieig,ispin)/= FILLED_BAND) cycle
           ei = eb_ek(ik,ieig)
           do jeig = 1, neg
              if(band_type(jeig,ispin)/=UNFILLED_BAND) cycle
              ej = eb_ek(ik,jeig)
              do keig = 1, neg
                 if(band_type(keig,ispin)/=UNFILLED_BAND) cycle
                 ek = eb_ek(ik,keig)
                 do leig = 1, neg
                    if(band_type(leig,ispin)/=FILLED_BAND) cycle
                    el = eb_ek(ik,leig)
                    if(leig == ieig.or.jeig == keig) then
                       iintra = 1
                    else
                       iintra = 0
                    end if
                    if(iintra == 1) cycle
!                   if(nlo_band == INTER_BAND.and.iintra /= 0) cycle
!                   if(nlo_band == INTRA_BAND.and.iintra == 0) cycle
! set energy difference
                    ekl = ek - el + scissor
                    eji = ej - ei + scissor
                    eki = ek - ei + scissor
! set transition moment factor
                    call set_ptrans2hB(ik,jeig,keig,ieig,leig,p1,p2,p3,p4)               ! set P transition moment
                    call calc_rep4bkt(p1,p2,p3,p4,rep4bkt)
! omega3 term
                    if(nlo_term==ALL_TERM.or.nlo_term==OMEGA3_TERM) then
                       iterm = 1
                       if(smearing_fact == OFF_RESONANCE) then
                          if(dres_method == OMIT) call calc_THG_energy_factor_eh_omit(eji,eki,ekl,efact,iterm)
                          if(dres_method == DAMPING) call calc_THG_energy_factor_eh_damp(eji,eki,ekl,efact,iterm)
                          pefact = rep4bkt*efact
                       end if
                       do istep = 1, nstep
                          omega3 = 3.0d0*e(istep)
                          if(smearing_fact == RESONANCE) then
                             if(omega3==0.0d0) omega3=1.0d-4
                             if(dres_method == OMIT) call calc_THG_energy_factor_eh_omit(omega3,eki,ekl,efact,iterm)
                             if(dres_method == DAMPING) call calc_THG_energy_factor_eh_damp(omega3,eki,ekl,efact,iterm)
                             pefact = rep4bkt*efact
                          end if
                          if(way_BZintegral == PARABOLIC_B) call width2(omega3,eji,width,c2,weight) ! b_Fermi
                          if(way_BZintegral == GAUSSIAN_B)  call gaussian_smearing_core(omega3,eji,width,c2,sigma)
                          chi3int(istep,1:30) = chi3int(istep,1:30) + c2*wspin*pefact*qwgt_ek(ik)
                       end do
                    end if
! omega2 term
                    if(nlo_term==ALL_TERM.or.nlo_term==OMEGA2_TERM) then
                       iterm = 2
                       if(smearing_fact == OFF_RESONANCE) then
                          if(dres_method == OMIT) call calc_THG_energy_factor_eh_omit(eji,eki,ekl,efact,iterm)
                          if(dres_method == DAMPING) call calc_THG_energy_factor_eh_damp(eji,eki,ekl,efact,iterm)
                          pefact = rep4bkt*efact
                       end if
                       do istep = 1, nstep
                          omega2 = 2.0d0*e(istep)
                          if(smearing_fact == RESONANCE) then
                             if(omega2==0.0d0) omega2=1.0d-4
                             if(dres_method == OMIT) call calc_THG_energy_factor_eh_omit(eji,omega2,ekl,efact,iterm)
                             if(dres_method == DAMPING) call calc_THG_energy_factor_eh_damp(eji,omega2,ekl,efact,iterm)
                             pefact = rep4bkt*efact
                          end if
                          if(way_BZintegral == PARABOLIC_B) call width2(omega2,eki,width,c2,weight) ! b_Fermi
                          if(way_BZintegral == GAUSSIAN_B)  call gaussian_smearing_core(omega2,eki,width,c2,sigma)
                          chi3int(istep,1:30) = chi3int(istep,1:30) + c2*wspin*pefact*qwgt_ek(ik)
                       end do
                    end if
! omega term
                    if(nlo_term==ALL_TERM.or.nlo_term==OMEGA_TERM) then
                       iterm = 3
                       if(smearing_fact == OFF_RESONANCE) then
                          if(dres_method == OMIT) call calc_THG_energy_factor_eh_omit(eji,eki,ekl,efact,iterm)
                          if(dres_method == DAMPING) call calc_THG_energy_factor_eh_damp(eji,eki,ekl,efact,iterm)
                          pefact = rep4bkt*efact
                       end if
                       do istep = 1, nstep
                          omega = e(istep)
                          if(smearing_fact == RESONANCE) then
                             if(omega==0.0d0) omega=1.0d-4
                             if(dres_method == OMIT) call calc_THG_energy_factor_eh_omit(eji,eki,omega,efact,iterm)
                             if(dres_method == DAMPING) call calc_THG_energy_factor_eh_damp(eji,eki,omega,efact,iterm)
                             pefact = rep4bkt*efact
                          end if
                          if(way_BZintegral == PARABOLIC_B) call width2(omega,ekl,width,c2,weight) ! b_Fermi
                          if(way_BZintegral == GAUSSIAN_B)  call gaussian_smearing_core(omega,ekl,width,c2,sigma)
                          chi3int(istep,1:30) = chi3int(istep,1:30) + c2*wspin*pefact*qwgt_ek(ik)
                       end do
                    end if
                 end do
              end do
           end do
        end do
     end do
   end subroutine smearing_THG_vh_termB

   subroutine smearing_THG_vh_termC
     implicit none
!
!   calculate virtual hole term of Im[chi3] {<v|p|c><v|p|v><c|p|c><c|p|v>} term
!
!   D. J. Moss, E. Ghahramani, J. E. Sipe, and H. M. van Driel, Phys. Rev. B, vol 41, pp 1542 (1990).
!   Eq.(7)
!   omega : photon energy
!   p     : tansition moment(P)
!   rep4bkt : Re[p1*(p2,p3,p4)]   (p2,p3,p4) -> symmetrized product of p2*p3*p4
!   Eab   : energy difference between band ea and eb
!
     integer                        :: ik, ik2, ieig, jeig, keig, leig, iterm, iintra
     real(kind=DP)                  :: wspin = 1.0d0, weight, ei, ej, ek, el, ekl, eji, ejl
     real(kind=DP)                  :: efact, omega, omega2, omega3, c2, sigma
     real(kind=DP), dimension(3,2)  :: p1, p2, p3, p4
     real(kind=DP), dimension(30)   :: rep4bkt, pefact

     chi3int = 0.0d0

     do ik = ispin, kv3_ek-nspin+ispin, nspin
        ik2 = (ik-1)/nspin +1
        if(map_k_eps(ik2)/=mype) cycle
        do ieig = 1, neg
           if(band_type(ieig,ispin)/= FILLED_BAND) cycle
           ei = eb_ek(ik,ieig)
           do jeig = 1, neg
              if(band_type(jeig,ispin)/=UNFILLED_BAND) cycle
              ej = eb_ek(ik,jeig)
              do keig = 1, neg
                 if(band_type(keig,ispin)/=UNFILLED_BAND) cycle
                 ek = eb_ek(ik,keig)
                 do leig = 1, neg
                    if(band_type(leig,ispin)/=FILLED_BAND) cycle
                    el = eb_ek(ik,leig)
                    if(leig == ieig.or.jeig == keig) then
                       iintra = 1
                    else
                       iintra = 0
                    end if
                    if(iintra == 1) cycle
!                   if(nlo_band == INTER_BAND.and.iintra /= 0) cycle
!                   if(nlo_band == INTRA_BAND.and.iintra == 0) cycle
! set energy difference
                    ekl = ek - el + scissor
                    eji = ej - ei + scissor
                    ejl = ej - el + scissor
! set transition moment factor
                    call set_ptrans2hC(ik,jeig,ieig,keig,leig,p1,p2,p3,p4)               ! set P transition moment
                    call calc_rep4bkt(p1,p2,p3,p4,rep4bkt)
! omega3 term
                    if(nlo_term==ALL_TERM.or.nlo_term==OMEGA3_TERM) then
                       iterm = 1
                       if(smearing_fact == OFF_RESONANCE) then
                          if(dres_method == OMIT) call calc_THG_energy_factor_eh_omit(eji,ejl,ekl,efact,iterm)
                          if(dres_method == DAMPING) call calc_THG_energy_factor_eh_damp(eji,ejl,ekl,efact,iterm)
                          pefact = rep4bkt*efact
                       end if
                       do istep = 1, nstep
                          omega3 = 3.0d0*e(istep)
                          if(smearing_fact == RESONANCE) then
                             if(omega3==0.0d0) omega3=1.0d-4
                             if(dres_method == OMIT) call calc_THG_energy_factor_eh_omit(omega3,ejl,ekl,efact,iterm)
                             if(dres_method == DAMPING) call calc_THG_energy_factor_eh_damp(omega3,ejl,ekl,efact,iterm)
                             pefact = rep4bkt*efact
                          end if
                          if(way_BZintegral == PARABOLIC_B) call width2(omega3,eji,width,c2,weight) ! b_Fermi
                          if(way_BZintegral == GAUSSIAN_B)  call gaussian_smearing_core(omega3,eji,width,c2,sigma)
                          chi3int(istep,1:30) = chi3int(istep,1:30) + c2*wspin*pefact*qwgt_ek(ik)
                       end do
                    end if
! omega2 term
                    if(nlo_term==ALL_TERM.or.nlo_term==OMEGA2_TERM) then
                       iterm = 2
                       if(smearing_fact == OFF_RESONANCE) then
                          if(dres_method == OMIT) call calc_THG_energy_factor_eh_omit(eji,ejl,ekl,efact,iterm)
                          if(dres_method == DAMPING) call calc_THG_energy_factor_eh_damp(eji,ejl,ekl,efact,iterm)
                          pefact = rep4bkt*efact
                       end if
                       do istep = 1, nstep
                          omega2 = 2.0d0*e(istep)
                          if(smearing_fact == RESONANCE) then
                             if(omega2==0.0d0) omega2=1.0d-4
                             if(dres_method == OMIT) call calc_THG_energy_factor_eh_omit(eji,omega2,ekl,efact,iterm)
                             if(dres_method == DAMPING) call calc_THG_energy_factor_eh_damp(eji,omega2,ekl,efact,iterm)
                             pefact = rep4bkt*efact
                          end if
                          if(way_BZintegral == PARABOLIC_B) call width2(omega2,ejl,width,c2,weight) ! b_Fermi
                          if(way_BZintegral == GAUSSIAN_B)  call gaussian_smearing_core(omega2,ejl,width,c2,sigma)
                          chi3int(istep,1:30) = chi3int(istep,1:30) + c2*wspin*pefact*qwgt_ek(ik)
                       end do
                    end if
! omega term
                    if(nlo_term==ALL_TERM.or.nlo_term==OMEGA_TERM) then
                       iterm = 3
                       if(smearing_fact == OFF_RESONANCE) then
                          if(dres_method == OMIT) call calc_THG_energy_factor_eh_omit(eji,ejl,ekl,efact,iterm)
                          if(dres_method == DAMPING) call calc_THG_energy_factor_eh_damp(eji,ejl,ekl,efact,iterm)
                          pefact = rep4bkt*efact
                       end if
                       do istep = 1, nstep
                          omega = e(istep)
                          if(smearing_fact == RESONANCE) then
                             if(omega==0.0d0) omega=1.0d-4
                             if(dres_method == OMIT) call calc_THG_energy_factor_eh_omit(eji,ejl,omega,efact,iterm)
                             if(dres_method == DAMPING) call calc_THG_energy_factor_eh_damp(eji,ejl,omega,efact,iterm)
                             pefact = rep4bkt*efact
                          end if
                          if(way_BZintegral == PARABOLIC_B) call width2(omega,ekl,width,c2,weight) ! b_Fermi
                          if(way_BZintegral == GAUSSIAN_B)  call gaussian_smearing_core(omega,ekl,width,c2,sigma)
                          chi3int(istep,1:30) = chi3int(istep,1:30) + c2*wspin*pefact*qwgt_ek(ik)
                       end do
                    end if
                 end do
              end do
           end do
        end do
     end do
   end subroutine smearing_THG_vh_termC
 end subroutine smearing_THG_vh

 subroutine smearing_three_state_THG(nfout,nstep)
    implicit none
!
!   smearing calculation scheme for THG susceptibility calculation (three state term)
!
!   ref. D. J. Moss, E. Ghahramani, J. E. Sipe, and H. M. van Driel
!   Phys. Rev. B. vol. 41, pp 1542 (1990)

    integer, intent(in) :: nfout, nstep
    integer             :: ispin, istep, ipes

    if(nlo_band == INTRA_BAND) return   ! three level term has no intra-band contribution
    if(nlo_term == OMEGA2_term) return  ! three level term has no omega2 contribution

    if(printable) then
      if(virt_ex_type /= ALL_TYPE) then
          if(way_BZintegral==PARABOLIC_B) &
         &   write(nfout,'(1x, " ---------- Parabolic broadening Brillouin zone integration ----------")')
          if(way_BZintegral==GAUSSIAN_B) &
         &   write(nfout,'(1x, " ---------- Gaussian broadening Brillouin zone integration ----------")')
          write(nfout,'(1x, "  width = ",f10.5)') width
          if(dres_method == OMIT) then
             write(nfout,'(1x, "  contributions from double resonance transitions are omitted")')
             write(nfout,'(1x, "  cut-off for omittion = ",f10.5," Hartree")') dres_cut_off
          endif
          if(dres_method == DAMPING) then
             write(nfout,'(1x, "  contributions from double resonance transitions are damped")')
             write(nfout,'(1x, "  damping factor = ",f10.5," Hartree")') dres_cut_off
          endif
          if(way_BZintegral /= L_TETRAHEDRON.and.smearing_fact == RESONANCE) &
          & write(nfout,'(1x, "  smearing factor type = resonance")')
          if(way_BZintegral /=L_TETRAHEDRON.and.smearing_fact == OFF_RESONANCE) &
          & write(nfout,'(1x, "  smearing factor type = off_resonance")')
           write(nfout,'(1x,"*excitation = three state")')
          if(nlo_band == INTER_BAND.or.nlo_band == ALL_BAND) write(nfout,'(1x,"  transition = interband ")')
       else
          write(nfout,'(1x, "*excitation = three state")')
          if(nlo_band == INTER_BAND.or.nlo_band == ALL_BAND) write(nfout,'(1x,"  transition = interband ")')
       end if
       if(nlo_term == ALL_TERM.or.nlo_term == OMEGA_TERM) write(nfout,'(1x,"  integration of omega THG moment")')
       if(nlo_term == ALL_TERM.or.nlo_term == OMEGA3_TERM) write(nfout,'(1x,"  integration of omega3 THG moment")')
    end if

!   gaussian/parabolic smearing
    do ispin = 1, nspin
       if(spin==MAJOR.and.ispin/=major_spin) cycle
       if(spin==MINOR.and.ispin/=minor_spin) cycle
       call smearing_THG_three_state_core
       imchi3(1:nstep,1:30) = imchi3(1:nstep,1:30) + PAI/3.0d0*chi3int(1:nstep,1:30)

       if(printable) then
          if(spin==MAJOR) write(nfout,'(1x," ispin = major spin")')
          if(spin==MINOR) write(nfout,'(1x," ispin = minor spin")')
          write(nfout,'(2x,"ispin = ",i3)') ispin
       end if
    end do

  contains
   subroutine smearing_THG_three_state_core
     implicit none
!
!   calculate three state term of THG Im[chi3]
!
!   D. J. Moss, E. Ghahramani, J. E. Sipe, and H. M. van Driel, Phys. Rev. B, vol 41, pp 1542 (1990).
!   Eq.(8)
!   omega : photon energy
!   p     : tansition moment(P)
!   rep4bkt : Re[p1*(p2,p3,p4)]   (p2,p3,p4) -> symmetrized product of p2*p3*p4
!   Eab   : energy difference between band ea and eb
!
     integer                        :: ik, ik2, ieig, jeig, keig, leig, iterm
     real(kind=DP)                  :: wspin = 1.0d0, weight, ei, ej, ek, el, eji, eli, ejk, elk
     real(kind=DP)                  :: efact, omega, omega3, c2, sigma, dummy
     real(kind=DP), dimension(3,2)  :: p1, p2, p3, p4
     real(kind=DP), dimension(30)   :: rep4bkt, pefact

     chi3int = 0.0d0

     do ik = ispin, kv3_ek-nspin+ispin, nspin
        ik2 = (ik-1)/nspin +1
        if(map_k_eps(ik2)/=mype) cycle
        do ieig = 1, neg
           if(band_type(ieig,ispin)/= FILLED_BAND) cycle
           ei = eb_ek(ik,ieig)
           do jeig = 1, neg
              if(band_type(jeig,ispin)/=UNFILLED_BAND) cycle
              ej = eb_ek(ik,jeig)
              do keig = 1, neg
                 if(band_type(keig,ispin)/=FILLED_BAND) cycle
                 ek = eb_ek(ik,keig)
                 do leig = 1, neg
                    if(band_type(leig,ispin)/=UNFILLED_BAND) cycle
                    el = eb_ek(ik,leig)
! set energy difference
                    eji = ej - ei + scissor
                    eli = el - ei + scissor
                    ejk = ej - ek + scissor
                    elk = el - ek + scissor
! set transition moment factor
                    call set_ptrans2(ik,jeig,keig,leig,ieig,p1,p2,p3,p4)               ! set P transition moment
                    call calc_rep4bkt(p1,p2,p3,p4,rep4bkt)
! omega3 term
                    if(nlo_term==ALL_TERM.or.nlo_term==OMEGA3_TERM) then
                       iterm = 1
                       if(smearing_fact == OFF_RESONANCE) then
                          if(dres_method == OMIT) call calc_THG_energy_factor_ts_omit(eji,ejk,eli,dummy,efact,iterm)
                          if(dres_method == DAMPING) call calc_THG_energy_factor_ts_damp(eji,ejk,eli,dummy,efact,iterm)
                          pefact = rep4bkt*efact
                       end if
                       do istep=1, nstep
                          omega3 = 3.0d0*e(istep)
                          if(smearing_fact == RESONANCE) then
                             if(omega3==0.0d0) omega3=1.0d-4
                             if(dres_method == OMIT) call calc_THG_energy_factor_ts_omit(omega3,ejk,eli,dummy,efact,iterm)
                             if(dres_method == DAMPING) call calc_THG_energy_factor_ts_damp(omega3,ejk,eli,dummy,efact,iterm)
                             pefact = rep4bkt*efact
                          end if
                          if(way_BZintegral == PARABOLIC_B) call width2(omega3,eji,width,c2,weight) ! b_Fermi
                          if(way_BZintegral == GAUSSIAN_B)  call gaussian_smearing_core(omega3,eji,width,c2,sigma)
                          chi3int(istep,1:30) = chi3int(istep,1:30) + c2*wspin*pefact*qwgt_ek(ik)
                       end do
                    end if
! omega term
                    if(nlo_term==ALL_TERM.or.nlo_term==OMEGA_TERM) then
                       iterm=3
                       if(smearing_fact == OFF_RESONANCE) then
                       if(dres_method == OMIT) call calc_THG_energy_factor_ts_omit(elk,ejk,eli,eji,efact,iterm)
                          if(dres_method == DAMPING) call calc_THG_energy_factor_ts_damp(elk,ejk,eli,eji,efact,iterm)
                          pefact = rep4bkt*efact
                       end if
                       do istep=1, nstep
                          omega = e(istep)
                          if(smearing_fact == RESONANCE) then
                             if(omega==0.0d0) omega=1.0d-4
                             if(dres_method == OMIT) call calc_THG_energy_factor_ts_omit(elk,ejk,omega,eji,efact,iterm)
                             if(dres_method == DAMPING) call calc_THG_energy_factor_ts_damp(elk,ejk,omega,eji,efact,iterm)
                             pefact = rep4bkt*efact
                          end if
                          if(way_BZintegral == PARABOLIC_B) call width2(omega,eli,width,c2,weight) ! b_Fermi
                          if(way_BZintegral == GAUSSIAN_B)  call gaussian_smearing_core(omega,eli,width,c2,sigma)
                          chi3int(istep,1:30) = chi3int(istep,1:30) + c2*wspin*pefact*qwgt_ek(ik)
                       end do
                    end if
                 end do
              end do
           end do
        end do
     end do
   end subroutine smearing_THG_three_state_core
 end subroutine smearing_three_state_THG
 
 subroutine set_ptrans2(ik,i1,i2,i3,i4,p1,p2,p3,p4)
    implicit none
!
! p1=ptrm(ik,i1,i4); p2=ptrm(ik,i2,i1); p3=ptrm(ik,i3,i2); p4=ptrm(ik,i4,i3)
!
    integer, intent(in)                        :: ik
    integer, intent(in)                        :: i1, i2, i3, i4
    real(kind=DP), intent(out), dimension(3,2) :: p1, p2, p3, p4

    p1(1:3,1:2) = ptrm(ik,i1,i4,1:3,1:2)
    p2(1:3,1:2) = ptrm(ik,i2,i1,1:3,1:2)
    p3(1:3,1:2) = ptrm(ik,i3,i2,1:3,1:2)
    p4(1:3,1:2) = ptrm(ik,i4,i3,1:3,1:2)
 end subroutine set_ptrans2

 subroutine set_ptrans2hA(ik,i1,i2,i3,i4,p1,p2,p3,p4)
    implicit none
!
! p1=ptrm(ik,i1,i2); p2=ptrm(ik,i2,i3); p3=ptrm(ik,i3,i4); p4=ptrm(ik,i4,i1)
!
    integer, intent(in)                        :: ik
    integer, intent(in)                        :: i1, i2, i3, i4
    real(kind=DP), intent(out), dimension(3,2) :: p1, p2, p3, p4

    p1(1:3,1:2) = ptrm(ik,i1,i2,1:3,1:2)
    p2(1:3,1:2) = ptrm(ik,i2,i3,1:3,1:2)
    p3(1:3,1:2) = ptrm(ik,i3,i4,1:3,1:2)
    p4(1:3,1:2) = ptrm(ik,i4,i1,1:3,1:2)
 end subroutine set_ptrans2hA

 subroutine set_ptrans2hB(ik,i1,i2,i3,i4,p1,p2,p3,p4)
    implicit none
!
! p1=ptrm(ik,i1,i3); p2=ptrm(ik,i2,i1); p3=ptrm(ik,i3,i4); p4=ptrm(ik,i4,i2)
!
    integer, intent(in)                        :: ik
    integer, intent(in)                        :: i1, i2, i3, i4
    real(kind=DP), intent(out), dimension(3,2) :: p1, p2, p3, p4

    p1(1:3,1:2) = ptrm(ik,i1,i3,1:3,1:2)
    p2(1:3,1:2) = ptrm(ik,i2,i1,1:3,1:2)
    p3(1:3,1:2) = ptrm(ik,i3,i4,1:3,1:2)
    p4(1:3,1:2) = ptrm(ik,i4,i2,1:3,1:2)
 end subroutine set_ptrans2hB

 subroutine set_ptrans2hC(ik,i1,i2,i3,i4,p1,p2,p3,p4)
    implicit none
!
! p1=ptrm(ik,i1,i2); p2=ptrm(ik,i2,i4); p3=ptrm(ik,i3,i1); p4=ptrm(ik,i4,i3)
!
    integer, intent(in)                        :: ik
    integer, intent(in)                        :: i1, i2, i3, i4
    real(kind=DP), intent(out), dimension(3,2) :: p1, p2, p3, p4

    p1(1:3,1:2) = ptrm(ik,i1,i2,1:3,1:2)
    p2(1:3,1:2) = ptrm(ik,i2,i4,1:3,1:2)
    p3(1:3,1:2) = ptrm(ik,i3,i1,1:3,1:2)
    p4(1:3,1:2) = ptrm(ik,i4,i3,1:3,1:2)
 end subroutine set_ptrans2hC

 subroutine calc_rep4bkt(p1,p2,p3,p4,repbkt)
    implicit none
!
!   calculate Re[p1*p2*p3*p4]
!
    real(kind=DP), intent(in), dimension(3,2) :: p1, p2, p3, p4
    real(kind=DP), intent(out),dimension(30)  :: repbkt
    real(kind=DP), dimension(10,2)            :: bkt3
    real(kind=DP), dimension(3)               :: rp1, ip1
    real(kind=DP)                             :: rp, ip, rbkt3, ibkt3, rp1bkt3, ip1bkt3
    integer                                   :: index, ic, ibk
! set p1
    rp1(1:3) = p1(1:3,1)
    ip1(1:3) = p1(1:3,2)
! calculate {p2,p3,p4}
    call calc_bkt3(p2,p3,p4,bkt3)               ! bkt3 = {p2,p3,p4}
    do index = 1, 30
       call get_c_and_bkt3_index(index,ic,ibk)
       rp = rp1(ic)
       ip = ip1(ic)
       rbkt3 = bkt3(ibk,1)
       ibkt3 = bkt3(ibk,2)
! calculate p1*{p2,p3,p4}
       call cmpprod(rp,ip,rbkt3,ibkt3,rp1bkt3,ip1bkt3)
       repbkt(index) = rp1bkt3
    end do
 end subroutine calc_rep4bkt

 subroutine calc_bkt3(p1,p2,p3,bkt3)
    implicit none
!
!   calculate branket {p1,p2,p3}ijk  = (p1(i)*p2(j)*p3(k)+p1(j)*p2(k)*p3(i)+p1(k)*p2(i)*p3(j)
!                                      +p1(i)*p2(k)*p3(j)+p1(j)*p2(i)*p3(k)+p1(k)*p2(j)*p3(i))/6.0d0
!
!   i, j, k -> cartesian index
!   Ref. D.J. Moss, E. Ghahramani, J. E. Sipe, and H. M. van Deiwl, Phys. Rev. B vol.41, 1542 (1990)
!
    integer                                     :: index, ip, ic1, ic2, ic3, ipc1, ipc2, ipc3, nperm
    real(kind=DP), intent(in),  dimension(3,2)  :: p1, p2, p3
    real(kind=DP), intent(out), dimension(10,2) :: bkt3
    real(kind=DP), dimension(3)                 :: rp1, ip1, rp2, ip2, rp3, ip3
    real(kind=DP)                               :: rbkt, ibkt, rep1, imp1, rep2, imp2, a, b, c, d, e, f
    bkt3 =0.0d0
    rp1(1:3) = p1(1:3,1) ; ip1(1:3) = p1(1:3,2)
    rp2(1:3) = p2(1:3,1) ; ip2(1:3) = p2(1:3,2)
    rp3(1:3) = p3(1:3,1) ; ip3(1:3) = p3(1:3,2)
    do index = 1, 10
! find cartesian index ic1, ic2, ic3
       call get_cindex3(index,ic1,ic2,ic3)
       rbkt = 0.0d0
       ibkt = 0.0d0
       do ip = 1, 6
          call get_cindex_perm3(ip,ic1,ic2,ic3,ipc1,ipc2,ipc3,nperm)
          a = rp1(ipc1); b = ip1(ipc1)
          c = rp2(ipc2); d = ip2(ipc2)
          e = rp3(ipc3); f = ip3(ipc3)
          call cmpprod(a,b,c,d,rep1,imp1)        ! rep1 = Re[p1*p2]    ; imp1 = Im[p1*p2]
          call cmpprod(rep1,imp1,e,f,rep2,imp2)  ! rep2 = Re[p1*p2*p3] ; imp2 = Im[p1*p2*p3]
          rbkt = rbkt+rep2
          ibkt = ibkt+imp2
       end do
       bkt3(index,1) = rbkt ; bkt3(index,2) = ibkt
    end do
    bkt3 =bkt3/6.0d0
 end subroutine calc_bkt3

 subroutine get_c_and_bkt3_index(index,ic,ibkt)
    implicit none
    integer, intent(in)    :: index
    integer, intent(out)   :: ic, ibkt
    integer, dimension(30) :: icd, ibktd
    integer                :: index0
!  index   ic    ibkt     term
!    1      1      1      xxxx
!    2      1      2      xxxy = xxyx = xyxx
!    3      1      3      xxxz = xxzx = xzxx
!    4      1      4      xxyy = xyxy = xyyx
!    5      1      5      xxyz = xxzy = xyxz = xyzx = xzxy= xzyx
!    6      1      6      xxzz = xzxz = xzzx
!    7      1      7      xyyy
!    8      1      8      xyyz = xyzy=  xzyy
!    9      1      9      xyzz = xzyz = xzzy
!   10      1     10      xzzz
!   11      2      1      yxxx
!   12      2      2      yxxy = yxyx = yyxx
!   13      2      3      yxxz = yxzx = yzxx
!   14      2      4      yxyy = yyxy = yyyx
!   15      2      5      yxyz = yxzy = yyxz = yyzx = yzxy = yzyx
!   16      2      6      yxzz = yzxz = yzzx
!   17      2      7      yyyy
!   18      2      8      yyyz = yyzy = yzyy
!   19      2      9      yyzz = yzyz = yzzy
!   20      2     10      yzzz
!   21      3      1      zxxx
!   22      3      2      zxxy = zxyx = zyxx
!   23      3      3      zxxz = zxzx = zzxx
!   24      3      4      zxyy = zyxy = zyyx
!   25      3      5      zxyz = zxzy = zyxz = zyzx = zzxy = zzyx
!   26      3      6      zxzz = zzxz = zzzx
!   27      3      7      zyyy
!   28      3      8      zyyz = zyzy = zyyz
!   29      3      9      zyzz
!   30      3     10      zzzz
    data   icd /1,1,1,1,1,1,1,1,1,1, &
   &            2,2,2,2,2,2,2,2,2,2, &
   &            3,3,3,3,3,3,3,3,3,3/
    data ibktd /1,2,3,4,5,6,7,8,9,10, &
   &            1,2,3,4,5,6,7,8,9,10, &
   &            1,2,3,4,5,6,7,8,9,10/
!   index0 = index - 1
!   ic = (index0)/10 + 1
!   ibkt = mod(index0,10) +1
    ic = icd(index)
    ibkt = ibktd(index)
 end subroutine get_c_and_bkt3_index

 subroutine get_cindex3(index,i,j,k)
    implicit none
!
!   get jkl index of chi3(i,j,k,l)
!
!   j, k, l : cartesian index
!   index   : compound index of (jkl)
!
    integer, intent(in)  :: index
    integer, intent(out) :: i, j, k
    integer, dimension(10) :: id, jd, kd
    data id /1,1,1,1,1,1,2,2,2,3/
    data jd /1,1,1,2,2,3,2,2,3,3/
    data kd /1,2,3,2,3,3,2,3,3,3/
    i = id(index)
    j = jd(index)
    k = kd(index)
!   if(index==1)      then   ! xxx term
!       i = 1 ; j = 1 ; k = 1
!    else if(index==2) then   ! xxy term
!       i = 1 ; j = 1 ; k = 2
!    else if(index==3) then   ! xxz term
!       i = 1 ; j = 1 ; k = 3
!    else if(index==4) then   ! xyy term
!       i = 1 ; j = 2 ; k = 2
!    else if(index==5) then   ! xyz term
!       i = 1 ; j = 2 ; k = 3
!    else if(index==6) then   ! xzz term
!       i = 1 ; j = 3 ; k = 3
!    else if(index==7) then   ! yyy term
!       i = 2 ; j = 2 ; k = 2
!    else if(index==8) then   ! yyz term
!       i = 2 ; j = 2 ; k = 3
!    else if(index==9) then   ! yzz term
!       i = 2 ; j = 3 ; k = 3
!    else if(index==10) then  ! zzz term
!       i = 3 ; j = 3 ; k = 3
!    end if
 end subroutine get_cindex3

 subroutine get_cindex_perm3(ipm,i,j,k,ip1,ip2,ip3,nperm)
    implicit none
!
! generate permutation of i, j, k
!
! nperm         : permutation number = 6
! ip1, ip2, ip3 : permutation of i, j, k
!
    integer, intent(in)  :: ipm, i, j, k
    integer, intent(out) :: ip1, ip2, ip3, nperm
    integer, dimension(3):: ijkd
    integer, dimension(6):: ip1d, ip2d, ip3d
    data ip1d /1,2,3,1,2,3/
    data ip2d /2,3,1,3,1,2/
    data ip3d /3,1,2,2,3,1/
    nperm = 6
    ijkd(1) = i
    ijkd(2) = j
    ijkd(3) = k
    ip1 = ijkd(ip1d(ipm))
    ip2 = ijkd(ip2d(ipm))
    ip3 = ijkd(ip3d(ipm))
!    if(ipm==1) then
!       ip1 = i ; ip2 = j ; ip3 = k
!    else if(ipm==2) then
!       ip1 = j ; ip2 = k ; ip3 = i
!    else if(ipm==3) then
!       ip1 = k ; ip2 = i ; ip3 = j
!    else if(ipm==4) then
!       ip1 = i ; ip2 = k ; ip3 = j
!    else if(ipm==5) then
!       ip1 = j ; ip2 = i ; ip3 = k
!    else if(ipm==6) then
!       ip1 = k ; ip2 = j ; ip3 = i
!    end if
 end subroutine get_cindex_perm3

 subroutine get_cind_of_chi3(i,j,k,l,index)
    implicit none
!
!   get compound index for chi3 index (ijkl)
!
!   i, j, k, l : cartesian index
!   index      : compound index of (ijkl)
!
    integer, intent(in)  :: i,j,k,l
    integer, intent(out) :: index
    integer              :: index0, ind_i, ind_jkl
    integer, dimension(3) :: id
    integer, dimension(3,3,3) :: d
    data id /0,10,20/
    data d(1,1,1),d(1,1,2),d(1,1,3),d(1,2,1),d(1,2,2),d(1,2,3),d(1,3,1),d(1,3,2),d(1,3,3) &
  &    , d(2,1,1),d(2,1,2),d(2,1,3),d(2,2,1),d(2,2,2),d(2,2,3),d(2,3,1),d(2,3,2),d(2,3,3) &
  &    , d(3,1,1),d(3,1,2),d(3,1,3),d(3,2,1),d(3,2,2),d(3,2,3),d(3,3,1),d(3,3,2),d(3,3,3) &
  &  /1,2,3,2,4,5,3,5,6,2,4,5,4,7,8,5,8,9,3,5,6,5,8,9,6,9,10/
   ind_i = id(i)
   ind_jkl = d(j,k,l)
! set ind_i
!    ind_i = 10*(i-1)
! set_ind_jkl
!                                             no.  jkl    ind_jkl
!    if(j==1.and.k==1.and.l==1) then         !  1   xxx     1
!       ind_jkl = 1
!    else if(j==1.and.k==1.and.l==2) then    !  2   xxy     2
!       ind_jkl = 2
!    else if(j==1.and.k==1.and.l==3) then    !  3   xxz     3
!       ind_jkl = 3
!    else if(j==1.and.k==2.and.l==1) then    !  4   xyx     2
!       ind_jkl = 2
!    else if(j==1.and.k==2.and.l==2) then    !  5   xyy     4
!       ind_jkl = 4
!    else if(j==1.and.k==2.and.l==3) then    !  6   xyz     5
!       ind_jkl = 5
!    else if(j==1.and.k==3.and.l==1) then    !  7   xzx     3
!       ind_jkl = 3
!    else if(j==1.and.k==3.and.l==2) then    !  8   xzy     5
!       ind_jkl = 5
!    else if(j==1.and.k==3.and.l==3) then    !  9   xzz     6
!       ind_jkl = 6
!    else if(j==2.and.k==1.and.l==1) then    ! 10   yxx     2
!       ind_jkl = 2
!    else if(j==2.and.k==1.and.l==2) then    ! 11   yxy     4
!       ind_jkl = 4
!    else if(j==2.and.k==1.and.l==3) then    ! 12   yxz     5
!       ind_jkl = 5
!    else if(j==2.and.k==2.and.l==1) then    ! 13   yyx     4
!       ind_jkl = 4
!    else if(j==2.and.k==2.and.l==2) then    ! 14   yyy     7
!       ind_jkl = 7
!    else if(j==2.and.k==2.and.l==3) then    ! 15   yyz     8
!       ind_jkl = 8
!    else if(j==2.and.k==3.and.l==1) then    ! 16   yzx     5
!       ind_jkl = 5
!    else if(j==2.and.k==3.and.l==2) then    ! 17   yzy     8
!       ind_jkl = 8
!    else if(j==2.and.k==3.and.l==3) then    ! 18   yzz     9
!       ind_jkl = 9
!    else if(j==3.and.k==1.and.l==1) then    ! 19   zxx     3
!       ind_jkl = 3
!    else if(j==3.and.k==1.and.l==2) then    ! 20   zxy     5
!       ind_jkl = 5
!    else if(j==3.and.k==1.and.l==3) then    ! 21   zxz     6
!       ind_jkl = 6
!    else if(j==3.and.k==2.and.l==1) then    ! 22   zyx     5
!       ind_jkl = 5
!    else if(j==3.and.k==2.and.l==2) then    ! 23   zyy     8
!       ind_jkl = 8
!    else if(j==3.and.k==2.and.l==3) then    ! 24   zyz     9
!       ind_jkl = 9
!    else if(j==3.and.k==3.and.l==1) then    ! 25   zzx     6
!       ind_jkl = 6
!    else if(j==3.and.k==3.and.l==2) then    ! 26   zzy     9
!       ind_jkl = 9
!    else if(j==3.and.k==3.and.l==3) then    ! 27   zzz    10
!       ind_jkl = 10
!    end if
    index = ind_i + ind_jkl
 end subroutine get_cind_of_chi3

 subroutine calc_THG_energy_factor_eh_omit(e1,e2,e3,efact,iterm)
   implicit none
!
!  set THG energy factor by omittion method
!
   integer, intent(in)     :: iterm
   integer                 :: ideg1, ideg2
   real(DP), intent(in)    :: e1, e2, e3
   real(DP), intent(out)   :: efact
   real(DP)                :: de1, de2, de3, te2, te3
   de1 = 2.0d0*e1 ; de2 = 2.0d0*e2 ; de3 = 2.0d0*e3
   te2 = 3.0d0*e2 ; te3 = 3.0d0*e3
   efact = 0.0d0
! omega3 term factor
   if(iterm==1) then
      call check_deg(te2,de1,ideg1)
      call check_deg(te3,e1,ideg2)
      if(ideg1==0.and.ideg2==0) efact = 729.0d0/(e1**4*(te2-de1)*(te3-e1))
   end if
! omega2 term factor
   if(iterm==2) then
      call check_deg(de3,e2,ideg1)
      call check_deg(de1,te2,ideg2)
      if(ideg1==0.and.ideg2==0) efact = 128.0d0*(de1-e2)/(e2**4*(de3-e2)*(de1-te2)*(de1+e2))
   end if
! omega1 term factor
   if(iterm==3) then
      call check_deg(e1,te3,ideg1)
      call check_deg(e2,de3,ideg2)
      if(ideg1==0.and.ideg2==0) efact = (1.0d0/(e1-te3)+(de2/((e3+e1)*(e2+de3))))/(e3**4*(e2-de3))
   end if
 end subroutine calc_THG_energy_factor_eh_omit

 subroutine calc_THG_energy_factor_eh_damp(e1,e2,e3,efact,iterm)
   implicit none
!
!  set THG energy factor by damping method
!
   integer, intent(in)     :: iterm
   real(DP), intent(in)    :: e1, e2, e3
   real(DP), intent(out)   :: efact
   real(DP)                :: de1, de2, de3, te2, te3, edelta1, edelta2
   de1 = 2.0d0*e1 ; de2 = 2.0d0*e2 ; de3 = 2.0d0*e3
   te2 = 3.0d0*e2 ; te3 = 3.0d0*e3
   efact = 0.0d0
   if(iterm==1) then
      call edamping(te2,de1,edelta1)
      call edamping(te3,e1,edelta2)
      efact = 729.0d0/(e1**4*edelta1*edelta2)
   end if
   if(iterm==2) then
      call edamping(de3,e2,edelta1)
      call edamping(de1,te2,edelta2)
      efact = 128.0d0*(de1-e2)/(e2**4*edelta1*edelta2*(de1+e2))
   end if
   if(iterm==3) then
      call edamping(e1,te3,edelta1)
      call edamping(e2,de3,edelta2)
      efact = (1.0d0/edelta1+(de2/((e3+e1)*(e2+de3))))/(e3**4*edelta2)
   end if
 end subroutine calc_THG_energy_factor_eh_damp

 subroutine calc_THG_energy_factor_ts_omit(e1,e2,e3,e4,efact,iterm)
   implicit none
!
!  set THG energy factor by omittion method
!
   integer, intent(in)     :: iterm
   integer                 :: ideg1, ideg2
   real(DP), intent(in)    :: e1, e2, e3, e4
   real(DP), intent(out)   :: efact
   real(DP)                :: te2, te3
   te2 = 3.0d0*e2 ; te3 = 3.0d0*e3
   efact = 0.0d0
! omega3 term factor
   if(iterm==1) then
      call check_deg(te2,e1,ideg1)
      call check_deg(te3,e1,ideg2)
      if(ideg1==0.and.ideg2==0) efact = 729.0d0/(e1**4*(te2-e1)*(te3-e1))
   end if
! omega1 term factor
   if(iterm==3) then
      call check_deg(e1,te3,ideg1)
      call check_deg(e4,te3,ideg2)
      if(ideg1==0.and.ideg2==0) efact = ((e1+e2)/((e1-te3)*(e4+e3))+(e4+e2)/((e4-te3)*(e1+e3))) &
                              &         /(e3**4*(e2+te3))
   end if
 end subroutine calc_THG_energy_factor_ts_omit

 subroutine calc_THG_energy_factor_ts_damp(e1,e2,e3,e4,efact,iterm)
   implicit none
!
!  set THG energy factor by omittion method
!
   integer, intent(in)     :: iterm
   real(DP), intent(in)    :: e1, e2, e3, e4
   real(DP), intent(out)   :: efact
   real(DP)                :: te2, te3, edelta1, edelta2
   te2 = 3.0d0*e2 ; te3 = 3.0d0*e3
   efact = 0.0d0
! omega3 term factor
   if(iterm==1) then
      call edamping(te2,e1,edelta1)
      call edamping(te3,e1,edelta2)
      efact = 729.0d0/(e1**4*(te2-e1)*(te3-e1))
   end if
! omega1 term factor
   if(iterm==3) then
      call edamping(e1,te3,edelta1)
      call edamping(e4,te3,edelta2)
      efact = ((e1+e2)/((e1-te3)*(e4+e3))+(e4+e2)/((e4-te3)*(e1+e3))) &
     & /(e3**4*(e2+te3))
   end if
 end subroutine calc_THG_energy_factor_ts_damp
 
 subroutine check_deg(e1,e2,ideg)
    implicit none
! check degeneracy e1 = e2
! ideg = 0 no degeneracy
!      = 1 degeneracy
    integer, intent(out)      :: ideg
    real(kind=DP), intent(in) :: e1,e2
    ideg = 0
    if (dabs(e1-e2)<dres_cut_off) ideg = 1
 end subroutine check_deg

 subroutine edamping(e1,e2,edel)
   implicit none
!
!  add damping factor for e1-e2
!
   real(DP), intent(in) :: e1, e2
   real(DP), intent(out) :: edel
   if((e1-e2)>0.0d0) then
      edel = e1-e2+dres_cut_off
   else
      edel = e1-e2-dres_cut_off
   end if
 end subroutine edamping

 subroutine dealloc_nlo_arrays
    implicit none
    deallocate(ptrm)
 end subroutine dealloc_nlo_arrays

 subroutine dealloc_trm
    implicit none
    deallocate(trm)
 end subroutine dealloc_trm

 subroutine calc_magopt
    implicit none
!
!   magneto-optical calculation(beta version)
!
    integer :: istep
    real(kind=DP) :: sigma_r_xx, sigma_i_xx, sigma_r_xy, sigma_i_xy, pai4
    real(kind=DP) :: abs, arg, wkr1, wki1, wkr2, wki2, wkr3, wki3
    write(nfout,'(/1x,"!* ---------- magneto optical effect calculation ---------- ")')
    do istep = 1, nstep
       optcr_l(istep,1)=e(istep)*imeps(istep,1)/(4.0d0*PAI)
       optcr_l(istep,2)=e(istep)*imeps(istep,4)/(4.0d0*PAI)
       optci_l(istep,1)=-1.0d0*e(istep)*(reps(istep,1)-1)/(4.0d0*PAI)
       optci_l(istep,2)=-1.0d0*e(istep)*reps(istep,4)/(4.0d0*PAI)
    end do
    pai4 = 4.0d0*PAI
    do istep = 1, nstep
       sigma_r_xx=optcr_l(istep,1)
       sigma_i_xx=optci_l(istep,1)
       sigma_r_xy=optcr_l(istep,2)
       sigma_i_xy=optci_l(istep,2)
! calculate 1+i(4*PAI/omega)*sigma_xx
       wkr1=(1.0d0-pai4/e(istep))*sigma_i_xx
       wki1=pai4/e(istep)*sigma_r_xx
       call abs_and_arg_of_complex_number(wkr1,wki1,abs,arg)
       abs=dsqrt(abs)
       arg=arg/2.0d0
       call get_complex_number(abs,arg,wkr1,wki1)
! evaluate -sigma_xy/(sigma_xx*(1+i(4*PAI/omega)*sigma_xx)
       call cmpprod(sigma_r_xx,sigma_i_xx,wkr1,wki1,wkr2,wki2)
       sigma_r_xy=-1.0d0*sigma_r_xy
       sigma_i_xy=-1.0d0*sigma_i_xy
       call cmpdivide(sigma_r_xy,sigma_i_xy,wkr2,wki2,wkr3,wki3)
       kerr_rotation(istep)=wkr3
       kerr_ellipticity(istep)=wki3
    end do
 end subroutine calc_magopt

 subroutine cmpc(a,b,c,d)
    implicit none
! (a+ib)* = c+id
    real(DP), intent(in)  :: a, b
    real(DP), intent(out) :: c, d
    c = a
    d =-b
 end subroutine cmpc

 subroutine cmpprod(a,b,c,d,e,f)
    implicit none
! (a+ib) x (c+di) = e+if
    real(DP), intent(in)  :: a, b, c, d
    real(DP), intent(out)  :: e, f
    e = a*c - b*d
    f = a*d + c*b
 end subroutine cmpprod

 subroutine cmpprodc(a,b,c,d,e,f)
    implicit none
! (a+ib)* x (c+id) = e+if
    real(DP), intent(in) :: a, b, c, d
    real(DP), intent(out) :: e, f
    e = a*c + b*d
    f = a*d - b*c
 end subroutine cmpprodc

 subroutine cmpdivide(a,b,c,d,x,y)
    implicit none
! x+iy = (a+bi)/(c+di)
    real(kind=DP), intent(in) :: a, b, c, d
    real(kind=DP), intent(out) :: x, y
    real(kind=DP) :: x2, y2, abs
    abs=c**2+d**2
    call cmpprod(c,-1.0d0*d,a,b,x,y)
    x=x/abs
    y=y/abs
 end subroutine cmpdivide

 subroutine abs_and_arg_of_complex_number(a,b,abs,arg)
    implicit none
! get absolute value and argument of complex number : a+bi
    real(kind=DP), intent(in) :: a, b
    real(kind=DP), intent(out) :: abs, arg
    abs=dsqrt(a**2+b**2)
    arg=dacos(a/abs)
 end subroutine abs_and_arg_of_complex_number

 subroutine get_complex_number(abs,arg,a,b)
    implicit none
    real(kind=DP), intent(in) :: abs, arg
    real(kind=DP), intent(out) :: a, b
! get complex number from its absolute value and argument
    a=abs*dcos(arg)
    b=abs*dsin(arg)
 end subroutine get_complex_number

! ==================== KT_add ==================== 13.0S
 subroutine wd_os_str_core2val_ek(nfout)
    implicit none
!
!   print transition moment product matrix of each k-point in IBZ
!
!   ind : transition matrix product index -> see comments for major variables
!        
    integer, intent(in)                       :: nfout
    integer                                   :: nk_local0, nk_local
    integer                                   :: ispin, ik, ik2, i, j, ind, id, iv, ic, ifind
    integer                                   :: nspin_kt
    integer                                   :: ni, nf, ipes, iksta, ikend
    real(kind=DP),dimension(6)                :: sum_os
    real(kind=DP),allocatable, dimension(:,:) :: dsum_os
    real(kind=DP),allocatable, dimension(:)   :: spin_os
    real(kind=DP),dimension(3)                :: vk0,vk2
    real(kind=DP)                             :: ei, ej, fi, fj, sum_fi, tot_osci
    real(kind=DP)                             :: weight, ctmp
    real(kind=DP),allocatable, dimension(:,:,:,:) :: os_str_wk

! ==== KT_add ====== 2014/09/22
    if ( noncol ) then
       nspin_kt = 1
    else
       nspin_kt = nspin
    endif
! ================== 2014/09/22

! ==== KT_mod ====== 2014/09/22
!    allocate(dsum_os(3,nspin)); dsum_os=0.0d0
!    allocate(spin_os(nspin)); spin_os=0.0d0

    allocate(dsum_os(3,nspin_kt)); dsum_os=0.0d0
    allocate(spin_os(nspin_kt)); spin_os=0.0d0
! ================== 2014/09/22

!    if(printable) then
    if(printable .and. ipriepsilon>=2 ) then
       write(nfout,10)
       write(nfout,'(1x,"!* nspin = ",i3)') nspin
       write(nfout,'(1x,"!* kv3_ek = ", i4)') kv3_ek
       write(nfout,'(1x,"!* kv3_ek/nspin = ",i3)') kv3_ek/nspin
    end if

! MPI
    nk_local = kv3_ek/npes
    nk_local0 = nk_local + (kv3_ek-nk_local*npes)
    if(nrd_efermi == 0) then
       ni = neg
       nf = neg
    else
       ni = num_vb
       nf = num_cb
    end if

    if ( sw_corelevel_spectrum == ON ) ni = num_core_states

    allocate(os_str_wk(nk_local,ni,nf,6))

    if(mype == 0) then
       do ipes = 2, npes
          iksta = 1 + nk_local0 +(ipes-2)*nk_local
          ikend = iksta + nk_local -1
          call mpi_recv(os_str_wk,nk_local*ni*nf*6,mpi_double_precision,ipes-1,1,mpi_comm_group,istatus,ierr)
          os_str(iksta:ikend,1:ni,1:nf,1:6) = os_str_wk(1:nk_local,1:ni,1:nf,1:6)
!             if(ipri >= 1) then
          if(ipriepsilon >= 2) then
             write(nfout,'(1x,"!*MPI ostr(",i4,":",i4,") data have been sent from ipes = ",i4)') iksta, ikend, ipes-1
          endif
       end do
    else
       iksta = 1 + nk_local0 +(mype-1)*nk_local
       ikend = iksta + nk_local -1
       os_str_wk(1:nk_local,1:ni,1:nf,1:6) = os_str(iksta:ikend,1:ni,1:nf,1:6)
       call mpi_send(os_str_wk,nk_local*ni*nf*6,mpi_double_precision,0,1,mpi_comm_group,ierr)
       if ( ipriepsilon >=2 ) then
          write(nfout,'(1x,"!*MPI os_str(",i4,":",i4,") data have been sent to ipes = 0")') iksta, ikend
       endif
    end if
    deallocate(os_str_wk)

!    end if
! <<--  T. Yamasaki 26 Feb. 2008
! Changed to the original code 2011,12,12 T Hamada

! ========== KT_mod ======== 2014/09/22
!    do ispin = 1, nspin
    do ispin = 1, nspin_kt
! ========================== 2014/09/22
       if(printable) then
          if(ispin == 1) write(nfout,20)
          write(nfout,'(1x,"ispin = ", i3)') ispin
       end if
       if(way_BZintegral==L_TETRAHEDRON) then
          do ik=ispin, kv3_ek-nspin+ispin, nspin
! === KT_add ===== 2014/09/22
             if ( noncol ) then
                weight = kv3_ek *qwgt_ek(ik) /dble(ndim_spinor)
             else
                weight = kv3_ek *qwgt_ek(ik)
             endif
! ================== 2014/09/22
             sum_os(1:6)=0.0d0
             sum_fi=0.0d0

             call calc_sum_fi(ik)
             do ind=1,6

                do i=1,num_core_states
                   ei = ene_core_states(i)

                   do j=1,neg
                      ej=eb_ek(ik,j)

                      if(ej>efermi) then
                         call find_ind_cb_only(j,ic,ik,ifind)
                         iv = i
                         if(ifind/=1) then
                            if(printable) &
                           & write(nfout,'(2x,"- conduction or valence band index is not found  &
                           & m_Epsilon_ek STOP at wd_os_str_ek ")')
                            stop
                         end if
                         fi=1.0D0
                         fj=occ_mpi_ek(n2_mpi_ek(j,ik),ik) /weight
                         sum_os(ind) = sum_os(ind) &
                              &       +os_str(ik,iv,ic,ind) *fi *(1.0d0-fj) *qwgt_ek(ik)
                      end if
                   end do
                end do
                if(ind<=3) dsum_os(ind,ispin) = dsum_os(ind,ispin) +sum_os(ind)/sum_fi

             end do
             if (printable .and. ipriepsilon>=2 ) then
                write(nfout,30) ik,ispin,(sum_os(i),i=1,6),sum_fi
             endif
          end do

       else
          do ik = ispin, kv3_ek-nspin+ispin, nspin
! === KT_add ===== 2014/09/22
             if ( noncol ) then
                weight = kv3_ek *qwgt_ek(ik) /dble(ndim_spinor)
             else
                weight = kv3_ek *qwgt_ek(ik)
             endif
! ================== 2014/09/22
             sum_os(1:6)=0.0d0
             sum_fi=0.0d0
             call calc_sum_fi(ik)

             do ind=1,6

                do i=1,num_core_states
                   ei=ene_core_states(i)

                   do j=1,neg
                      ej=eb_ek(ik,j)

                      if(ej>efermi) then
                         call find_ind_cb_only(j,ic,ik,ifind)
                         iv = i

                         if(ifind/=1) then
                            if(printable) &
                           & write(nfout,'(2x,"- conduction or valence band index is not found  &
                           & m_Epsilon_ek STOP at wd_os_str_ek ")')
                            stop
                         end if
                         fi=1.0D0 
                         fj = occ_mpi_ek(n2_mpi_ek(j,ik),ik) /weight
                         sum_os(ind) = sum_os(ind) &
                              &       +os_str(ik,iv,ic,ind) *fi *(1.0d0-fj) *qwgt_ek(ik)
                      end if
                   end do
                end do
                if(ind<=3) dsum_os(ind,ispin) = dsum_os(ind,ispin) +sum_os(ind)/sum_fi

             end do
             if (printable .and. ipriepsilon>=2 ) then
                write(nfout,30) ik,ispin,(sum_os(i),i=1,6),sum_fi
             endif
          end do
       end if

       spin_os(ispin)=(dsum_os(1,ispin)+dsum_os(2,ispin)+dsum_os(3,ispin))/3.0d0
       if(printable) then
         write(nfout,'(1x,"!* sum of weighted oscillator strength of k-points in irreducible Brillouin zone = ",f10.5)') &
       & spin_os(ispin)
          write(nfout,'(1x,"!* oscillator strength per electron = ",f10.5/)') &
        & spin_os(ispin)
       end if
    end do
   
! ===== KT_mod ===== 2014/09/22
!    if(nspin > 1) then
    if(nspin_kt > 1) then
! ================== 2014/09/22
       call calc_total_osci
       if(printable) then
          write(nfout,'(1x," total oscillator strength = ",f10.5)') tot_osci
          write(nfout,'(1x," total oscillator strength per electron = ",f10.5)') tot_osci
       end if
    end if

! ===== KT_mod ===== 2014/09/22
!    if(nspin > 1) call set_major_and_minor_spin
    if(nspin_kt > 1) call set_major_and_minor_spin
! ================== 2014/09/22
 
    deallocate(dsum_os);  deallocate(spin_os)

 10 format(/1x,"!* ----- weighted transition moment square of each k-point in irreducible Brillouin zone -----")
 20 format(20x,"ispin",4x,"xx",8x,"yy",8x,"zz",8x,"xy",8x,"xz",8x,"yz",5x,"core states")
 30 format(2x,"k-point = ",i4,4x,i3,1x,6f10.5,2x,f10.5)

  contains

     subroutine calc_sum_fi(ik)
       integer, intent(in) :: ik

       integer :: i

       sum_fi=0.0d0
       do i=1, num_core_states
          ei=ene_core_states(i)
          sum_fi=sum_fi + 1.0D0
       end do
     end subroutine calc_sum_fi

     subroutine calc_total_osci
       tot_osci=0.0d0
       do ispin  = 1, nspin
           tot_osci=tot_osci+spin_os(ispin)
       end do
     end subroutine calc_total_osci

     subroutine set_major_and_minor_spin
       integer       :: mispin, mjspin
       real(kind=DP) :: chg, chg1
       chg=spin_charge(1)
       mjspin=1
       mispin=2
       do ispin = 2, nspin
          chg1=spin_charge(ispin)
          if(chg1>chg) then
             chg=chg1
             mispin=mjspin
             mjspin=ispin
          end if
       end do
       major_spin=mjspin
       minor_spin=mispin
       if(printable) &
       & write(nfout,'(1x,"!* major spin = ",i3,3x," minor spin = ",i3)') major_spin, minor_spin
     end subroutine set_major_and_minor_spin
   end subroutine wd_os_str_core2val_ek
! ===================================================== 13.0S

! ======================== KT_add =================== 13.0S
 subroutine vl_core2val_ek(ik,nbi,nbj,ebi,ebj)
!
!   calculates the local part of electronic transition moment
!   between valence and conduction band orbitals
!   Tomoyuki Hamada, Univ. Tokyo, Feb. 4, 2003; July 28.2003; August 12, 2003
!
!   ik: k-point index
!   nbi: valence band index
!   nbj: conduction band index
!   ebi: energy of nbi band
!   ebj: energy of nbj band
!   ixyz: xyz index =1(x); =2(y); =3(z)
!   rtrans: non-local transtion moment of Read and Needs transition moment
!   ptrans: core-repare term of Kageshima and Shiraishi transition moment
!
    implicit none
    integer,intent(in)             :: ik, nbi, nbj
    integer                        :: nbi0, nbj0, iv, ic, ifind, iv2, ic2, ixyz, ng
    real(DP),intent(in)            :: ebi, ebj
    real(DP)                       :: omega, r_wfij, i_wfij
    real(DP),dimension(3)          :: rsum, isum, tsum
    real(DP),dimension(3,2)        :: tlocal,tcorr
    real(DP),allocatable,dimension (:) :: qx, qy, qz

    allocate(qx(kg1)); allocate(qy(kg1)); allocate(qz(kg1))

    rsum(1:3)=0.0d0; isum(1:3)=0.0d0
    tlocal(1:3,1:2)=0.0d0; tcorr(1:3,1:2)=0.0d0
    qx=0.0d0; qy=0.0d0; qz=0.0d0

    omega=ebj-ebi
! === KT_add === 2015/01/17
    if ( sw_scissor_renormalization == ON ) omega = omega +scissor
! ============== 2015/01/17

! set index
    nbi0=nbi
    nbj0=n2_mpi(nbj,ik)

!    write(*,*) 'AAA ', nbj0, nbi0

    call find_ind_cb_only2(nbj0,ic2,nk_in_the_process+ik-1,ifind)
    iv2 = nbi

    if(ifind==0) then
       if(printable) &
       & write(nfout,'(1x,"!!* conduction band index is not found   UVSOR-Epsilon STOP at vl_ek")')
       stop
    end if

! calculate local transition moment
    call k_plus_G_vectors_m(ik,kgp,kg1,kv3,iba,nbase,vkxyz,ngabc,rltv,qx,qy,qz)

    call calc_vlocal  ! -> rsum, isum
! add correction term
! kimg=1 case
    if(kimg<=1) then
       do ixyz=1,3
          tlocal(ixyz,2)=-1.0d0*rsum(ixyz)/omega
          tcorr(ixyz,2)=(-1.0d0*ptrans(ik,ic2,iv2,ixyz,1)+rtrans(ik,ic2,iv2,ixyz,2))/omega
          tlocal(ixyz,1)=isum(ixyz)/omega
          tcorr(ixyz,1)=(ptrans(ik,ic2,iv2,ixyz,2)+rtrans(ik,ic2,iv2,ixyz,1))/omega
       end do
    else
! kimg=2 case
       do ixyz=1,3
          tlocal(ixyz,2)=-1.0d0*rsum(ixyz)/omega
          tcorr(ixyz,2)=(-1.0d0*ptrans(ik,ic2,iv2,ixyz,1)+rtrans(ik,ic2,iv2,ixyz,2))/omega
          tlocal(ixyz,1)=isum(ixyz)/omega
          tcorr(ixyz,1)=(ptrans(ik,ic2,iv2,ixyz,2)+rtrans(ik,ic2,iv2,ixyz,1))/omega
       end do
    end if

! set transition moment arrey
    call find_ind_cb_only( nbj,ic,nk_in_the_process+ik-1,ifind)  ! -> iv, ic, ifind
    iv = nbi

    if(ifind==1) then
       do ixyz=1,3
          trm(nk_in_the_process+ik-1,iv,ic,ixyz,1,1)=tlocal(ixyz,1)
          trm(nk_in_the_process+ik-1,iv,ic,ixyz,1,2)=tcorr(ixyz,1)
          trm(nk_in_the_process+ik-1,iv,ic,ixyz,2,1)=tlocal(ixyz,2)
          trm(nk_in_the_process+ik-1,iv,ic,ixyz,2,2)=tcorr(ixyz,2)
       end do
       if(ipri >= 2) then
          write(nfout,'(" ik,nbi,nbj,nbi0,nbj0,ebi,ebj = ",5i8,2f8.4)') ik,nbi,nbj,nbi0,nbj0,ebi,ebj
          if(ipri >= 3) then
             write(nfout,'(" tlocal(1:3,2),rsum(1:3),omega = ",3f10.4,4d11.3)') tlocal(1:3,2),rsum(1:3),omega
             write(nfout,'(" tcorr (1:3,1:2)               = ",6f10.4)') tcorr(1:3,1:2)
          end if
!!$          if(kimg == 1) then
!!$             write(nfout,'(" rsum = ",3f16.12)') rsum(1:3)
!!$          else
!!$             write(nfout,'(" rsum, isum = ",6f16.12)') rsum(1:3),isum(1:3)
!!$          end if
          write(nfout,'(" trm(",i4,",",i4,",",i4,",1:3,1:2,1) = ",6f10.4)') &
               & nk_in_the_process+ik-1,iv,ic,trm(nk_in_the_process+ik-1,iv,ic,1:3,1:2,1)
          write(nfout,'(" trm(",i4,",",i4,",",i4,",1:3,1:2,2) = ",6f10.4)') &
               & nk_in_the_process+ik-1,iv,ic,trm(nk_in_the_process+ik-1,iv,ic,1:3,1:2,2)
       end if
    else
       if(printable) &
       & write(nfout,'(1x,"!!* index of valence or conduction band is not found   UVSOR-Epsilon STOP at vl_ek")')
       stop
    end if
    deallocate(qx); deallocate(qy); deallocate(qz)

  contains

     subroutine calc_vlocal
! This subroutine calculates local transtion moment
       if (kimg<=1) then
! kimg=1 case
          do ng=1, iba(ik)
! real part
             r_wfij=wf_lb(ng,nbj0,1) *psig_core_states(ng,nbi0,ik,1)
             rsum(1)=rsum(1)+r_wfij*qx(ng)
             rsum(2)=rsum(2)+r_wfij*qy(ng)
             rsum(3)=rsum(3)+r_wfij*qz(ng)
! imaginary part
             i_wfij=wf_lb(ng,nbj0,1) *psig_core_states(ng,nbi0,ik,2)
             isum(1)=isum(1)+i_wfij*qx(ng)
             isum(2)=isum(2)+i_wfij*qy(ng)
             isum(3)=isum(3)+i_wfij*qz(ng)
          end do
       else
! kimg=2 case
! real part
          do ng=1, iba(ik)
             r_wfij = wf_lb(ng,nbj0,1)*psig_core_states(ng,nbi0,ik,1) &
                  & + wf_lb(ng,nbj0,2)*psig_core_states(ng,nbi0,ik,2)
             rsum(1)=rsum(1)+r_wfij*qx(ng)
             rsum(2)=rsum(2)+r_wfij*qy(ng)
             rsum(3)=rsum(3)+r_wfij*qz(ng)
! imaginary part
             i_wfij = wf_lb(ng,nbj0,1)*psig_core_states(ng,nbi0,ik,2) &
                  &  -wf_lb(ng,nbj0,2)*psig_core_states(ng,nbi0,ik,1)
             isum(1)=isum(1)+i_wfij*qx(ng)
             isum(2)=isum(2)+i_wfij*qy(ng)
             isum(3)=isum(3)+i_wfij*qz(ng)
          end do
       end if
     end subroutine calc_vlocal

   end subroutine vl_core2val_ek
! ======================================================= 13.0S

! ====================== KT_add ====================== 13.0S
 subroutine calc_ptrans_core2val_ek
    implicit none
!
!   calculate KS correction term
!
    integer                                :: id_sname = -1
    integer                                :: ispin, it, lmt1, lmt2, il1, im1, il2, im2, ia
    integer                                :: ik, ii, ib, ib1, ilmta, p, p1, index, ifact
    integer                                :: nspher1,nspher2
    real(kind=DP) :: fac, eib, eib1
    real(kind=DP),allocatable,dimension(:,:,:) :: wkfsr, wkfsi
! --> T. Yamasaki 2008/02/21
    real(kind=DP),allocatable,dimension(:)     :: wkfsr_tmp, wkfsi_tmp
! <-- T. Yamasaki 2008/02/21


    call tstatc0_begin('calc_ptans_ek ',id_sname)
    allocate(wkfsr(neg,nlmta,kv3)); allocate(wkfsi(neg,nlmta,kv3))

    if(npes >= 2) call mpi_barrier(mpi_comm_group,ierr)

    allocate(wkfsr_tmp(nlmta),wkfsi_tmp(nlmta))

    do ik = 1, kv3, af+1
       do ib = 1, neg
          if(map_ek(ib,ik) == mype) then
             if(mype == 0) then
                do ilmta=1, nlmta
                   wkfsr(ib,ilmta,ik) = fsr_l(map_z(ib),ilmta,ik)
                   wkfsi(ib,ilmta,ik) = fsi_l(map_z(ib),ilmta,ik)
                end do
             else
                do ilmta=1, nlmta
                   wkfsr_tmp(ilmta) = fsr_l(map_z(ib),ilmta,ik)
                   wkfsi_tmp(ilmta) = fsi_l(map_z(ib),ilmta,ik)
                end do
                call mpi_send(wkfsr_tmp,nlmta,mpi_double_precision,0,1,mpi_comm_group,ierr) ! MPI
                call mpi_send(wkfsi_tmp,nlmta,mpi_double_precision,0,1,mpi_comm_group,ierr) ! MPI
             end if
          else if(mype == 0 .and. map_ek(ib,ik) /= 0) then
             call mpi_recv(wkfsr_tmp,nlmta,mpi_double_precision,map_ek(ib,ik),1,mpi_comm_group,istatus,ierr)!MPI
             call mpi_recv(wkfsi_tmp,nlmta,mpi_double_precision,map_ek(ib,ik),1,mpi_comm_group,istatus,ierr)!MPI
             do ilmta=1, nlmta
                wkfsr(ib,ilmta,ik) = wkfsr_tmp(ilmta)
                wkfsi(ib,ilmta,ik) = wkfsi_tmp(ilmta)
             end do
         end if
       end do
    end do
    if(npes >= 2)  then
       call mpi_bcast(wkfsr,neg*nlmta*kv3,mpi_double_precision,0,mpi_comm_group,ierr)!MPI
       call mpi_bcast(wkfsi,neg*nlmta*kv3,mpi_double_precision,0,mpi_comm_group,ierr)!MPI
    end if

    deallocate(wkfsr_tmp,wkfsi_tmp)
! <-- T. Yamasaki 2008/02/21

    ptrans=0.0d0

!   calculate sum[<WF1|beta(i)>pij<beta(j)|WF2>
    do ii = 1,3
       do ispin = 1, nspin, af+1
          do ik = ispin, kv3-nspin+ispin, nspin
             do ib = 1, neg
                do ib1 = 1, num_core_states
                   if(nrd_efermi==1) then
                      eib=e2_mpi(ib,ik)
                      eib1 = ene_core_states(ib1)
                      if(eib.gt.efermi) call calc_ptrans_ek_core(ib,ib1)
                   else
                      call calc_ptrans_ek_core(ib,ib1)
                   end if
                end do
             end do
          end do
       end do
    end do

!!$    if(nk_in_the_process + kv3-1 >= kv3_ek) stop ' m_Epsilon_ek (1)'

    ptrans = ptrans*(af+1)

    deallocate(wkfsr);  deallocate(wkfsi)
    call tstatc0_end(id_sname)

    contains

     subroutine calc_ptrans_ek_core(ib,ib1) !ib: cond band,  ib1: core states
!
!      calculate <WF1|beta(i)>pij<beta(j)|WF2>
!
       integer,intent(in) :: ib,ib1
       integer            :: index,iv,ic,ifind, ifact
       integer :: ia, it, il2, im2, t1, nspher_core_orb, nspher_val_orb, lmt1

       call find_ind_cb_only2(ib,ic,nk_in_the_process+ik-1,ifind)
       if(ifind==0.and.printable) then
          write(nfout,'(1x,"!!* conduction band index is not found   UVSOR-Epsilon STOP at calc_ptrans")')
       end if

       iv = ib1

       ia = atom_to_probe
       it = ityp( atom_to_probe )

       il2 = qnum_l_to_probe +1

       Do im2=1, 2*qnum_l_to_probe +1
          if ( ndim_spinor_core_states == 1 ) then
             if ( im2 /= iv ) cycle
          endif

          nspher_core_orb = (il2 -1)**2 + im2
!
          do lmt1 = 1, ilmt(it)            ! valence
             il1= ltp(lmt1,it);  im1 = mtp(lmt1,it);  t1 = taup(lmt1,it)
             nspher_val_orb = (il1 -1)**2 + im1

             call m_CLS_find_ptrans_indx_core2val( qnum_n_to_probe, &
                  &                                nspher_core_orb, nspher_val_orb, &
                  &                                t1, index )

             if(index==0) cycle

             ifact = -1

             p = lmta(lmt1,ia)
             p1 = im2
             fac=real(iwei(ia),kind=DP)*dble(ifact)

             ptrans(ik,ic,iv,ii,1) = ptrans(ik,ic,iv,ii,1) &
                  & + fac*(dipole_dxyz_core2val(index,ii)) &
                  &      *( wkfsr(ib,p,ik) *fsi_core_states(ib1,p1,ik) &
                  &        -wkfsi(ib,p,ik) *fsr_core_states(ib1,p1,ik) )

             ptrans(ik,ic,iv,ii,2) = ptrans(ik,ic,iv,ii,2) &
                  & -1.0d0 *fac *(dipole_dxyz_core2val(index,ii)) &
                  &        *( wkfsr(ib,p,ik) *fsr_core_states(ib1,p1,ik) &
                  &          +wkfsi(ib,p,ik) *fsi_core_states(ib1,p1,ik) )

          end do
       end Do

     end subroutine calc_ptrans_ek_core
   
   end subroutine calc_ptrans_core2val_ek
! ==================================================== 13.0S

end module m_Epsilon_ek
