!
!  Software name : CIAO (Code for Investigating Atomic Orbitals)
!  Subroutine(s) : calc_general_pp_srel, sizeof_lt_us, set_lt_us
!                : sizeof_ltt_us, set_ltt_us, sizeof_ltlt_us
!                : set_ltlt_us, set_param_us, calc_rpsi_us
!                : calc_ncpp_us, calc_nodes_us, set_vloc_us
!                : calc_rphi_us, check_phi_us, write_checked_phi_us
!                : calc_norm_rcl_us, write_norm_rcl_us 
!                : calc_norm_rc_us, write_norm_rc_us
!                : write_eref_rcl_phi_us, calc_rchi_us, calc_bmat_us
!                : calc_rbeta_us, calc_qnm_us, calc_qnm_sum_us
!                : write_qnm_sum_us, calc_dmat_us
!                : write_qnm_bmat_dmat_us, set_rcut_qps_us
!                : write_rcut_qps_us, calc_qps_us, check_qps_us
!                : write_checked_qps_us, calc_qps_vlocqps_us
!                : write_qps_vlocqps_us, calc_dion_us
!                : write_bmat_dmat_dion_us, calc_us_fourier
!                : write_us, calc_qps_fourier, write_qps
!  Author(s)     : Masakuni Okamoto (August 25, 2003)
!
!
!  Contact address :  Phase System Consortium
!                     E-mail: phase_system@nims.go.jp URL https://azuma.nims.go.jp
!
!
!  The license of the code and contact address :
!  See the files, COPYRIGHT and LICENSE (or LICENSE_J.pdf)
!
!=====================================================================

subroutine calc_general_pp_srel(ier)
!=====================================================================
!
!  M. Okamoto
!
!---------------------------------------------------------------------
  use parameters
  implicit none

  integer,intent(out) :: ier
  
  !+++++++++++++++++++++++++++++++++++++++++++++++++
  allocate(nref_us(0:lmax),is_val_type_us(0:lmax))
  nref_us = 0 ; is_val_type_us = 0
  !+++++++++++++++++++++++++++++++++++++++++++++++++
  
  call sizeof_lt_us(ier)
  if (ier /= 0) then
     write(IFLOG,*) '### ERROR ### in sizeof_lt_us' ; go to 99
  end if

   !+++++++++++++++++++++++++++++++++++++++++++++++++
  allocate( &
       lt_n_us(0:lmax,nref_max_us),                &
       ips_lt_us(num_ltx_us),                      &
       n_lt_us(num_ltx_us),                        &
       l_lt_us(num_ltx_us),                        &
       t_lt_us(num_ltx_us),                        &
       ln_lt_us(num_ltx_us),                       &
       tn_lt_us(num_ltx_us)                        &
       )
  lt_n_us  = 0 ; ips_lt_us = 0
  n_lt_us  = 0 ; l_lt_us   = 0 ; t_lt_us = 0
  ln_lt_us = 0 ; tn_lt_us  = 0
  allocate( &
       is_bound_us(num_ltx_us),                    &
       nrcut_phi_us(num_ltx_us),                   &
       nodes_phi_us(num_ltx_us),                   &

! =========================================== modifided by K. T. ======== 4.0
!       vloc_scr_us(nmesh),                         &
!       vloc_ion_us(nmesh),                         &
       vloc_scr_us(nmesh,1),                         &
       vloc_ion_us(nmesh,1),                         &
! ======================================================================== 4.0
       eref_us(num_ltx_us),                        &
       veff_us(nmesh,num_ltx_us),                  &
       rpsi_us(nmesh,num_ltx_us),                  &
       rphi_us(nmesh,num_ltx_us),                  &
       drpsi_us(nmesh,num_ltx_us),                 &
       drphi_us(nmesh,num_ltx_us),                 &
       ddrphi_us(nmesh,num_ltx_us),                &
       rchi_us(nmesh,num_ltx_us),                  &
       rbeta_us(nmesh,num_ltx_us),                 &
       coeff_phi_us(0:ncoeff_phi_us,num_ltx_us),   &
       is_bound_ps(nps),                           &
       norm_psi_ps(nps),                           &
       norm_psi_us(num_ltx_us),                    &
       norm_phi_us(num_ltx_us)                     &
       )
  is_bound_us    = 0 ; nrcut_phi_us    = 0
  nodes_phi_us   = 0
  vloc_scr_us = 0.d0 ; vloc_ion_us  = 0.d0
  eref_us     = 0.d0 ; veff_us      = 0.d0
  rpsi_us     = 0.d0 ; rphi_us      = 0.d0
  drpsi_us    = 0.d0 ; drphi_us     = 0.d0
  ddrphi_us   = 0.d0 ; rchi_us      = 0.d0
  rbeta_us    = 0.d0 ; coeff_phi_us = 0.d0
  is_bound_ps =    0 ; norm_psi_ps  = 0.d0
  norm_psi_us = 0.d0 ; norm_phi_us  = 0.d0
  !+++++++++++++++++++++++++++++++++++++++++++++++++
  call set_lt_us(ier)
  if (ier /= 0) then
     write(IFLOG,*) '### ERROR ### in set_lt_us' ; go to 99
  end if
  call sizeof_ltt_us(ier)
  if (ier /= 0) then
     write(IFLOG,*) '### ERROR ### in sizeof_ltt_us' ; go to 99
  end if
  !++++++++++++++++++++++++++++++++++++++++++++++++
  allocate( &
       ltt_nm_us(0:lmax,nref_max_us,nref_max_us), &
       ips1_ltt_us(num_lttx_us),                  &
       ips2_ltt_us(num_lttx_us),                  &
       n1_ltt_us(num_lttx_us),                    &
       n2_ltt_us(num_lttx_us),                    &
       l_ltt_us(num_lttx_us),                     &
       t1_ltt_us(num_lttx_us),                    &
       t2_ltt_us(num_lttx_us),                    &
       tn_ltt_us(num_lttx_us),                    &
       tm_ltt_us(num_lttx_us),                    &
       q_sum_us(num_lttx_us),                     &
       qps_sum_us(num_lttx_us),                   &
       vlocqps_us(num_lttx_us),                   &
       bmat_us(num_lttx_us),                      &
       dmat_us(num_lttx_us),                      &
       dion_us(num_lttx_us)                       &
       )
  ltt_nm_us   = 0
  ips1_ltt_us = 0 ; ips2_ltt_us = 0
  n1_ltt_us   = 0 ; n2_ltt_us   = 0
  l_ltt_us    = 0
  t1_ltt_us   = 0 ; t2_ltt_us   = 0
  tn_ltt_us   = 0 ; tm_ltt_us   = 0
  q_sum_us   = 0.d0 ; qps_sum_us = 0.d0
  vlocqps_us = 0.d0 ; bmat_us    = 0.d0
  dmat_us    = 0.d0 ; dion_us    = 0.d0
  !++++++++++++++++++++++++++++++++++++++++++++++++
  call set_ltt_us(ier)
  if (ier /= 0) then
     write(IFLOG,*) '### ERROR ### in set_ltt_us' ; go to 99
  end if
  call sizeof_ltlt_us(ier)
  if (ier /= 0) then
     write(IFLOG,*) '### ERROR ### in sizeof_ltlt_us' ; go to 99
  end if
  !+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
  allocate( &
       ltlt_nm_us(0:lmax,nref_max_us,0:lmax,nref_max_us),      &
       ips1_ltlt_us(num_ltltx_us),                             &
       n1_ltlt_us(num_ltltx_us),                               &
       l1_ltlt_us(num_ltltx_us),                               &
       t1_ltlt_us(num_ltltx_us),                               &
       ln_ltlt_us(num_ltltx_us),                               &
       tn_ltlt_us(num_ltltx_us),                               &
       ips2_ltlt_us(num_ltltx_us),                             &
       n2_ltlt_us(num_ltltx_us),                               &
       l2_ltlt_us(num_ltltx_us),                               &
       t2_ltlt_us(num_ltltx_us),                               &
       lm_ltlt_us(num_ltltx_us),                               &
       tm_ltlt_us(num_ltltx_us),                               &
       nrcut_qps_us(num_ltltx_us,0:l3_max_us),                 &
       q_us(nmesh,num_ltltx_us),                               &
       qps_us(nmesh,num_ltltx_us,0:l3_max_us),                 &
       coeff_qps_us(0:ncoeff_qps_us,num_ltltx_us,0:l3_max_us), &
       rcut_qps_us(num_ltltx_us,0:l3_max_us)                   &
       )
  ltlt_nm_us   = 0
  ips1_ltlt_us = 0 ; n1_ltlt_us = 0 ; l1_ltlt_us = 0
  t1_ltlt_us   = 0 ; ln_ltlt_us = 0 ; tn_ltlt_us = 0
  ips2_ltlt_us = 0 ; n2_ltlt_us = 0 ; l2_ltlt_us = 0
  t2_ltlt_us   = 0 ; lm_ltlt_us = 0 ; tm_ltlt_us = 0
  nrcut_qps_us = 0
  q_us         = 0.d0 ; qps_us      = 0.d0
  coeff_qps_us = 0.d0 ; rcut_qps_us = 0.d0
   !+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
  call set_ltlt_us(ier)
  if (ier /= 0) then
     write(IFLOG,*) '### ERROR ### in set_ltlt_us' ; go to 99
  end if
  call set_param_us(ier)
  if (ier /= 0) then
     write(IFLOG,*) '### ERROR ### in set_param_us' ; go to 99
  end if
  call write_title_wave_function(IFLOG)
  call write_title_wave_function(IFSUM)
  call calc_rpsi_us(ier)
  if (ier /= 0) then
     write(IFLOG,*) '### ERROR ### in calc_rpsi_us' ; go to 99
  end if
  call calc_ncpp_us(ier)
  if (ier /= 0) then
     write(IFLOG,*) '### ERROR ### in calc_ncpp_us' ; go to 99
  end if
  call set_vloc_us(ier)
  if (ier /= 0) then
     write(IFLOG,*) '### ERROR ### in set_vloc_us' ; go to 99
  end if
  call calc_rphi_us(ier)
  if (ier /= 0) then
     write(IFLOG,*) '### ERROR ### in calc_rphi_us' ; go to 99
  end if
  call calc_norm_rcl_us(ier)
  if (ier /= 0) then
     write(IFLOG,*) '### ERROR ### in calc_norm_rcl_us' ; go to 99
  end if
  call write_norm_rcl_us(IFLOG)
  call write_norm_rcl_us(IFSUM)
  call calc_norm_rc_us(ier)
  if (ier /= 0) then
     write(IFLOG,*) '### ERROR ### in calc_norm_rc_us' ; go to 99
  end if
  call write_norm_rc_us(IFLOG)
  call write_norm_rc_us(IFSUM)
  call write_eref_rcl_phi_us(IFLOG)
  call write_eref_rcl_phi_us(IFSUM)
  call elapse_time(time,icount0)
  call write_etime(IFLOG,time)
  call write_etime(IFSUM,time)
  call write_title_separable(IFLOG)
  call write_title_separable(IFSUM)
  call calc_rchi_us(ier)
  if (ier /= 0) then
     write(IFLOG,*) '### ERROR ### in calc_rchi_us' ; go to 99
  end if
  call calc_bmat_us(ier)
  if (ier /= 0) then
     write(IFLOG,*) '### ERROR ### in calc_bmat_us' ; go to 99
  end if
  call calc_rbeta_us(ier)
  if (ier /= 0) then
     write(IFLOG,*) '### ERROR ### in calc_rbeta_us' ; go to 99
  end if
  if (is_write_us /= 0) then
     !+++++++++++++++++++++++++++++++++++++++++
     allocate(psi_g_us(ng_mesh,num_ltx_us), &
          phi_g_us(ng_mesh,num_ltx_us), &
          chi_g_us(ng_mesh,num_ltx_us), &
          beta_g_us(ng_mesh,num_ltx_us))
     psi_g_us = 0.d0 ; phi_g_us  = 0.d0
     chi_g_us = 0.d0 ; beta_g_us = 0.d0
     !+++++++++++++++++++++++++++++++++++++++++
     call calc_us_fourier(ier)
     if (ier /= 0) then
        write(IFLOG,*) '### ERROR ### in calc_us_fourier' ; go to 99
     end if
  end if
  call calc_qnm_us(ier)
  if (ier /= 0) then
     write(IFLOG,*) '### ERROR ### in calc_qnm_us' ; go to 99
  end if
  call calc_qnm_sum_us(ier)
  if (ier /= 0) then
     write(IFLOG,*) '### ERROR ### in calc_qnm_sum_us' ; go to 99
  end if
  call write_qnm_sum_us(IFLOG)
  call calc_dmat_us(ier)
  if (ier /= 0) then
     write(IFLOG,*) '### ERROR ### in calc_dmat_us' ; go to 99
  end if
  if (is_pp_calc_class == NC) then
     write(IFLOG,*)
     write(IFLOG,*) &
          '*** Because this is NC-PP, bypass US-PP subroutines. ***'
     nrcut_qps_us(:,:) = 0
     rcut_qps_us(:,:)  = 0.d0
     qps_us(:,:,:)     = 0.d0
     qps_sum_us(:)     = 0.d0
     vlocqps_us(:)     = 0.d0
     dion_us(:)        = dmat_us(:)
     call write_qps_vlocqps_us(IFLOG)
     call write_qps_vlocqps_us(IFSUM)
     call write_bmat_dmat_dion_us(IFLOG)
     call write_bmat_dmat_dion_us(IFSUM)
     call elapse_time(time,icount0)
     call write_etime(IFLOG,time)
     call write_etime(IFSUM,time)
     call write_title_deliminator(IFLOG)
     call write_title_deliminator(IFSUM)
     ier = 0 ; go to 99
  end if
  call elapse_time(time,icount0)
  call write_etime(IFLOG,time)
  call write_etime(IFSUM,time)
  call write_title_deficit_charge(IFLOG)
  call write_title_deficit_charge(IFSUM)
  call set_rcut_qps_us(ier)
  if (ier /= 0) then
     write(IFLOG,*) '### ERROR ### in set_rcut_qps_us' ; go to 99
  end if
  call write_rcut_qps_us(IFLOG)
  call calc_qps_us(ier)
  if (ier /= 0) then
     write(IFLOG,*) '### ERROR ### in calc_qps_us' ; go to 99
  end if
  call calc_qps_vlocqps_us(ier)
  if (ier /= 0) then
     write(IFLOG,*) '### ERROR ### in calc_qps_vlocqps_us' ; go to 99
  end if
  call write_qps_vlocqps_us(IFLOG)
  call calc_dion_us(ier)
  if (ier /= 0) then
     write(IFLOG,*) '### ERROR ### in calc_dion_us' ; go to 99
  end if
  call write_bmat_dmat_dion_us(IFLOG)
  if (is_write_qps /= 0) then
     !+++++++++++++++++++++++++++++++++++++++++++++++++++++++
     allocate(qps_g_us(ng_mesh,num_ltltx_us,0:l3_max_us), &
          q_g_us(ng_mesh,num_ltltx_us))
     qps_g_us = 0.d0 ; q_g_us = 0.d0
     !+++++++++++++++++++++++++++++++++++++++++++++++++++++++
     call calc_qps_fourier(ier)
     if (ier /= 0) then
        write(IFLOG,*) '### ERROR ### in calc_qps_fourier' ; go to 99
     end if
  end if
  call write_rcut_qps_us(IFSUM)
  call write_qps_vlocqps_us(IFSUM)
  call write_bmat_dmat_dion_us(IFSUM)
  call elapse_time(time,icount0)
  call write_etime(IFLOG,time)
  call write_etime(IFSUM,time)
  call write_title_deliminator(IFLOG)
  call write_title_deliminator(IFSUM)
99 continue
end subroutine calc_general_pp_srel

! ============================ added by K. T. ======================== 4.0
subroutine calc_general_pp_srel_kt(ier)
  use parameters
  implicit none
  
  integer,intent(out) :: ier
  
  !+++++++++++++++++++++++++++++++++++++++++++++++++
  allocate(nref_us(0:lmax),is_val_type_us(0:lmax))
  nref_us = 0 ; is_val_type_us = 0
  !+++++++++++++++++++++++++++++++++++++++++++++++++
  
  call sizeof_lt_us_kt(ier)
  if (ier /= 0) then
     write(IFLOG,*) '### ERROR ### in sizeof_lt_us_kt' ; go to 99
  end if
!  write(*,*) ' nspin = ', nspin

   !+++++++++++++++++++++++++++++++++++++++++++++++++
  allocate( &
       lt_n_us(0:lmax,nref_max_us*nspin),                &
       ips_lt_us(num_ltx_us),                      &
       n_lt_us(num_ltx_us),                        &
       l_lt_us(num_ltx_us),                        &
       t_lt_us(num_ltx_us),                        &
       ln_lt_us(num_ltx_us),                       &
       tn_lt_us(num_ltx_us)                        &
       )
  lt_n_us  = 0 ; ips_lt_us = 0
  n_lt_us  = 0 ; l_lt_us   = 0 ; t_lt_us = 0
  ln_lt_us = 0 ; tn_lt_us  = 0

  allocate( spin_index_lt_us(num_ltx_us) )
  spin_index_lt_us = 0

  allocate( &
       is_bound_us(num_ltx_us),                    &
       nrcut_phi_us(num_ltx_us),                   &
       nodes_phi_us(num_ltx_us),                   &
       vloc_scr_us(nmesh,nspin),                         &
       vloc_ion_us(nmesh,nspin),                         &
       eref_us(num_ltx_us),                        &
       veff_us(nmesh,num_ltx_us),                  &
       rpsi_us(nmesh,num_ltx_us),                  &
       rphi_us(nmesh,num_ltx_us),                  &
       drpsi_us(nmesh,num_ltx_us),                 &
       drphi_us(nmesh,num_ltx_us),                 &
       ddrphi_us(nmesh,num_ltx_us),                &
       rchi_us(nmesh,num_ltx_us),                  &
       rbeta_us(nmesh,num_ltx_us),                 &
       coeff_phi_us(0:ncoeff_phi_us,num_ltx_us),   &
       is_bound_ps(nps),                           &
       norm_psi_ps(nps),                           &
       norm_psi_us(num_ltx_us),                    &
       norm_phi_us(num_ltx_us)                     &
       )
  is_bound_us    = 0 ; nrcut_phi_us    = 0
  nodes_phi_us   = 0
  vloc_scr_us = 0.d0 ; vloc_ion_us  = 0.d0
  eref_us     = 0.d0 ; veff_us      = 0.d0
  rpsi_us     = 0.d0 ; rphi_us      = 0.d0
  drpsi_us    = 0.d0 ; drphi_us     = 0.d0
  ddrphi_us   = 0.d0 ; rchi_us      = 0.d0
  rbeta_us    = 0.d0 ; coeff_phi_us = 0.d0
  is_bound_ps =    0 ; norm_psi_ps  = 0.d0
  norm_psi_us = 0.d0 ; norm_phi_us  = 0.d0
  !+++++++++++++++++++++++++++++++++++++++++++++++++
  call set_lt_us_kt(ier)
  if (ier /= 0) then
     write(IFLOG,*) '### ERROR ### in set_lt_us_kt' ; go to 99
  end if

  call sizeof_ltt_us(ier)
  if (ier /= 0) then
     write(IFLOG,*) '### ERROR ### in sizeof_ltt_us' ; go to 99
  end if
  !++++++++++++++++++++++++++++++++++++++++++++++++
  allocate( &
       ltt_nm_us(0:lmax,nref_max_us*nspin,nref_max_us*nspin), &
       ips1_ltt_us(num_lttx_us),                  &
       ips2_ltt_us(num_lttx_us),                  &
       n1_ltt_us(num_lttx_us),                    &
       n2_ltt_us(num_lttx_us),                    &
       l_ltt_us(num_lttx_us),                     &
       t1_ltt_us(num_lttx_us),                    &
       t2_ltt_us(num_lttx_us),                    &
       tn_ltt_us(num_lttx_us),                    &
       tm_ltt_us(num_lttx_us),                    &
       q_sum_us(num_lttx_us),                     &
       qps_sum_us(num_lttx_us),                   &
       vlocqps_us(num_lttx_us),                   &
       bmat_us(num_lttx_us),                      &
       dmat_us(num_lttx_us),                      &
       dion_us(num_lttx_us)                       &
       )
  ltt_nm_us   = 0
  ips1_ltt_us = 0 ; ips2_ltt_us = 0
  n1_ltt_us   = 0 ; n2_ltt_us   = 0
  l_ltt_us    = 0
  t1_ltt_us   = 0 ; t2_ltt_us   = 0
  tn_ltt_us   = 0 ; tm_ltt_us   = 0
  q_sum_us   = 0.d0 ; qps_sum_us = 0.d0
  vlocqps_us = 0.d0 ; bmat_us    = 0.d0
  dmat_us    = 0.d0 ; dion_us    = 0.d0

  allocate( spin1_index_ltt_us(num_lttx_us) )
  allocate( spin2_index_ltt_us(num_lttx_us) )
  spin1_index_ltt_us = 0
  spin2_index_ltt_us = 0

  !++++++++++++++++++++++++++++++++++++++++++++++++
  call set_ltt_us_kt(ier)
  if (ier /= 0) then
     write(IFLOG,*) '### ERROR ### in set_ltt_us' ; go to 99
  end if

  call sizeof_ltlt_us(ier)
  if (ier /= 0) then
     write(IFLOG,*) '### ERROR ### in sizeof_ltlt_us' ; go to 99
  end if
  !+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
  allocate( &
       ltlt_nm_us(0:lmax,nref_max_us*nspin,0:lmax,nref_max_us*nspin),      &
       ips1_ltlt_us(num_ltltx_us),                             &
       n1_ltlt_us(num_ltltx_us),                               &
       l1_ltlt_us(num_ltltx_us),                               &
       t1_ltlt_us(num_ltltx_us),                               &
       ln_ltlt_us(num_ltltx_us),                               &
       tn_ltlt_us(num_ltltx_us),                               &
       ips2_ltlt_us(num_ltltx_us),                             &
       n2_ltlt_us(num_ltltx_us),                               &
       l2_ltlt_us(num_ltltx_us),                               &
       t2_ltlt_us(num_ltltx_us),                               &
       lm_ltlt_us(num_ltltx_us),                               &
       tm_ltlt_us(num_ltltx_us),                               &
       nrcut_qps_us(num_ltltx_us,0:l3_max_us),                 &
       q_us(nmesh,num_ltltx_us),                               &
       qps_us(nmesh,num_ltltx_us,0:l3_max_us),                 &
       coeff_qps_us(0:ncoeff_qps_us,num_ltltx_us,0:l3_max_us), &
       rcut_qps_us(num_ltltx_us,0:l3_max_us)                   &
       )
  ltlt_nm_us   = 0
  ips1_ltlt_us = 0 ; n1_ltlt_us = 0 ; l1_ltlt_us = 0
  t1_ltlt_us   = 0 ; ln_ltlt_us = 0 ; tn_ltlt_us = 0
  ips2_ltlt_us = 0 ; n2_ltlt_us = 0 ; l2_ltlt_us = 0
  t2_ltlt_us   = 0 ; lm_ltlt_us = 0 ; tm_ltlt_us = 0
  nrcut_qps_us = 0
  q_us         = 0.d0 ; qps_us      = 0.d0
  coeff_qps_us = 0.d0 ; rcut_qps_us = 0.d0

  allocate( spin1_index_ltlt_us(num_ltltx_us) )
  allocate( spin2_index_ltlt_us(num_ltltx_us) )
  spin1_index_ltlt_us = 0
  spin2_index_ltlt_us = 0

   !+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++

  call set_ltlt_us_kt(ier)
  if (ier /= 0) then
     write(IFLOG,*) '### ERROR ### in set_ltlt_us_kt' ; go to 99
  end if

  call set_param_us(ier)
  if (ier /= 0) then
     write(IFLOG,*) '### ERROR ### in set_param_us' ; go to 99
  end if

  call write_title_wave_function(IFLOG)
  call write_title_wave_function(IFSUM)

  call calc_rpsi_us(ier)
  if (ier /= 0) then
     write(IFLOG,*) '### ERROR ### in calc_rpsi_us' ; go to 99
  end if

  call calc_ncpp_us_kt(ier)
  if (ier /= 0) then
     write(IFLOG,*) '### ERROR ### in calc_ncpp_us_kt' ; go to 99
  end if

  call set_vloc_us_kt(ier)
  if (ier /= 0) then
     write(IFLOG,*) '### ERROR ### in set_vloc_us' ; go to 99
  end if

  call calc_rphi_us(ier)
  if (ier /= 0) then
     write(IFLOG,*) '### ERROR ### in calc_rphi_us' ; go to 99
  end if

  call calc_norm_rcl_us(ier)
  if (ier /= 0) then
     write(IFLOG,*) '### ERROR ### in calc_norm_rcl_us' ; go to 99
  end if

  call write_norm_rcl_us_kt(IFLOG)
  call write_norm_rcl_us_kt(IFSUM)

  call calc_norm_rc_us(ier)
  if (ier /= 0) then
     write(IFLOG,*) '### ERROR ### in calc_norm_rc_us' ; go to 99
  end if

  call write_norm_rc_us_kt(IFLOG)
  call write_norm_rc_us_kt(IFSUM)

  call write_eref_rcl_phi_us_kt(IFLOG)
  call write_eref_rcl_phi_us_kt(IFSUM)

  call elapse_time(time,icount0)
  call write_etime(IFLOG,time)
  call write_etime(IFSUM,time)
  call write_title_separable(IFLOG)
  call write_title_separable(IFSUM)

  call calc_rchi_us_kt(ier)
  if (ier /= 0) then
     write(IFLOG,*) '### ERROR ### in calc_rchi_us_kt' ; go to 99
  end if

  call calc_bmat_us_kt(ier)
  if (ier /= 0) then
     write(IFLOG,*) '### ERROR ### in calc_bmat_us_kt' ; go to 99
  end if

  call calc_rbeta_us_kt(ier)
  if (ier /= 0) then
     write(IFLOG,*) '### ERROR ### in calc_rbeta_us_kt' ; go to 99
  end if

  if (is_write_us /= 0) then
     !+++++++++++++++++++++++++++++++++++++++++
     allocate(psi_g_us(ng_mesh,num_ltx_us), &
          phi_g_us(ng_mesh,num_ltx_us), &
          chi_g_us(ng_mesh,num_ltx_us), &
          beta_g_us(ng_mesh,num_ltx_us))
     psi_g_us = 0.d0 ; phi_g_us  = 0.d0
     chi_g_us = 0.d0 ; beta_g_us = 0.d0
     !+++++++++++++++++++++++++++++++++++++++++
     call calc_us_fourier(ier)
     if (ier /= 0) then
        write(IFLOG,*) '### ERROR ### in calc_us_fourier' ; go to 99
     end if
  end if

  call calc_qnm_us_kt(ier)
  if (ier /= 0) then
     write(IFLOG,*) '### ERROR ### in calc_qnm_us_kt' ; go to 99
  end if

  call calc_qnm_sum_us_kt(ier)
  if (ier /= 0) then
     write(IFLOG,*) '### ERROR ### in calc_qnm_sum_us_kt' ; go to 99
  end if

  call write_qnm_sum_us_kt(IFLOG)

  call calc_dmat_us_kt(ier)
  if (ier /= 0) then
     write(IFLOG,*) '### ERROR ### in calc_dmat_us_kt' ; go to 99
  end if

  if (is_pp_calc_class == NC) then

!     stop "Not supported "

     write(IFLOG,*)
     write(IFLOG,*) &
          '*** Because this is NC-PP, bypass US-PP subroutines. ***'
     nrcut_qps_us(:,:) = 0
     rcut_qps_us(:,:)  = 0.d0
     qps_us(:,:,:)     = 0.d0
     qps_sum_us(:)     = 0.d0
     vlocqps_us(:)     = 0.d0
     dion_us(:)        = dmat_us(:)

     call write_qps_vlocqps_us_kt(IFLOG)
     call write_qps_vlocqps_us_kt(IFSUM)
     call write_bmat_dmat_dion_us_kt(IFLOG)
     call write_bmat_dmat_dion_us_kt(IFSUM)

     call elapse_time(time,icount0)
     call write_etime(IFLOG,time)
     call write_etime(IFSUM,time)
     call write_title_deliminator(IFLOG)
     call write_title_deliminator(IFSUM)
     ier = 0 ; go to 99
  end if

  call elapse_time(time,icount0)
  call write_etime(IFLOG,time)
  call write_etime(IFSUM,time)
  call write_title_deficit_charge(IFLOG)
  call write_title_deficit_charge(IFSUM)

  call set_rcut_qps_us_kt(ier)
  if (ier /= 0) then
     write(IFLOG,*) '### ERROR ### in set_rcut_qps_us_kt' ; go to 99
  end if

  call write_rcut_qps_us_kt(IFLOG)

  call calc_qps_us_kt(ier)
  if (ier /= 0) then
     write(IFLOG,*) '### ERROR ### in calc_qps_us_kt' ; go to 99
  end if

  call calc_qps_vlocqps_us_kt(ier)
  if (ier /= 0) then
     write(IFLOG,*) '### ERROR ### in calc_qps_vlocqps_us_kt' ; go to 99
  end if

  call write_qps_vlocqps_us_kt(IFLOG)

  call calc_dion_us_kt(ier)
  if (ier /= 0) then
     write(IFLOG,*) '### ERROR ### in calc_dion_us_kt' ; go to 99
  end if

  call write_bmat_dmat_dion_us_kt(IFLOG)

  if (is_write_qps /= 0) then
     !+++++++++++++++++++++++++++++++++++++++++++++++++++++++
     allocate(qps_g_us(ng_mesh,num_ltltx_us,0:l3_max_us), &
          q_g_us(ng_mesh,num_ltltx_us))
     qps_g_us = 0.d0 ; q_g_us = 0.d0
     !+++++++++++++++++++++++++++++++++++++++++++++++++++++++
     call calc_qps_fourier(ier)
     if (ier /= 0) then
        write(IFLOG,*) '### ERROR ### in calc_qps_fourier' ; go to 99
     end if
  end if

  call write_rcut_qps_us_kt(IFSUM)
  call write_qps_vlocqps_us_kt(IFSUM)

  call write_bmat_dmat_dion_us_kt(IFSUM)

  call elapse_time(time,icount0)
  call write_etime(IFLOG,time)
  call write_etime(IFSUM,time)

  call write_title_deliminator(IFLOG)
  call write_title_deliminator(IFSUM)

99 continue

end subroutine calc_general_pp_srel_kt
! ==================================================================4.0

!=====================================================================
   subroutine sizeof_lt_us(ier)
!=====================================================================
!
!  M. Okamoto
!
!---------------------------------------------------------------------
   use parameters
   implicit none
   integer,intent(out) :: ier
   integer :: lt, ll, tt, lt_loc, is_debug_here, ishell, ips
   is_debug_here = 1
   if (is_debug /= 0) then
      is_debug_here = is_debug
   end if
   ier = 0
   nref_us(:) = 0
   lt     = 0
   lt_loc = 0
   do ips = 1,nps
      ishell = ishell_ps(ips,1)
      ll = l_qnum(ishell)
      do tt = 1,nref_ps(ips)
         nref_us(ll) = nref_us(ll) + 1
         if (ll /= lloc) then
            lt = lt + 1
         else
            lt_loc = lt_loc + 1
         end if
      end do
   end do
   num_lt_us     = lt
   num_lt_loc_us = lt_loc
   num_ltx_us    = num_lt_us + num_lt_loc_us
   if (is_debug_here /= 0) then
      write(IFLOG,*) 'LT: num_ltx_us .....',num_ltx_us
      write(IFLOG,*) 'LT:    num_lt_us    ',num_lt_us
      write(IFLOG,*) 'LT:    num_lt_loc_us',num_lt_loc_us
   end if
   nref_max_us = 0
   do ll = 0,lmax
      if (nref_max_us < nref_us(ll)) then
         nref_max_us = nref_us(ll)
      end if
   end do
99 continue
   end subroutine sizeof_lt_us
!=====================================================================

! ================================ added by K. T. ========================= 4.0
subroutine sizeof_lt_us_kt(ier)
  use parameters
  implicit none
  
  integer,intent(out) :: ier
  integer :: lt, ll, tt, lt_loc, is_debug_here, ishell, ips
  
  is_debug_here = 1
  if (is_debug /= 0) then
     is_debug_here = is_debug
  end if
  ier = 0
  nref_us(:) = 0
  lt     = 0
  lt_loc = 0
  
  do ips = 1,nps, nspin
     ishell = ishell_ps(ips,1)
     ll = l_qnum(ishell)
     
     do tt = 1,nref_ps(ips)
        nref_us(ll) = nref_us(ll) + 1
        if (ll /= lloc) then
           lt = lt + 1
        else
           lt_loc = lt_loc + 1
        end if
     end do
  end do
  
  num_lt_us     = lt
  num_lt_loc_us = lt_loc
  num_ltx_us    = num_lt_us + num_lt_loc_us
!
  if ( nspin == 2 ) then
     num_lt_us = num_lt_us *nspin
     num_lt_loc_us = num_lt_loc_us *nspin
     num_ltx_us = num_ltx_us *nspin
  endif
  !
  if (is_debug_here /= 0) then
     write(IFLOG,*) 'LT: num_ltx_us .....',num_ltx_us
     write(IFLOG,*) 'LT:    num_lt_us    ',num_lt_us
     write(IFLOG,*) 'LT:    num_lt_loc_us',num_lt_loc_us
  end if
  
  nref_max_us = 0
  do ll = 0,lmax
     if (nref_max_us < nref_us(ll)) then
        nref_max_us = nref_us(ll)
     end if
  end do
  
99 continue

end subroutine sizeof_lt_us_kt
! ===================================================================== 4.0


!=====================================================================
   subroutine set_lt_us(ier)
!=====================================================================
!
!  M. Okamoto
!
!---------------------------------------------------------------------
   use parameters
   implicit none
   integer,intent(out) :: ier
   integer :: lt, nn, ll, tt, ltx, lt_loc, ips, ishell, &
              is_debug_here
   integer,allocatable :: tn_tmp(:)
   is_debug_here = 1
   if (is_debug /= 0) then
      is_debug_here = is_debug
   end if
   ier = 0
  !++++++++++++++++++++++++++++++++++++++
   allocate(tn_tmp(0:lmax)) ; tn_tmp = 0
  !++++++++++++++++++++++++++++++++++++++
   tn_tmp(:) = 0
   lt       = 0
   lt_loc   = 0
   do ips = 1,nps
      ishell = ishell_ps(ips,1)
      nn = n_qnum(ishell)
      ll = l_qnum(ishell)
      is_val_type_us(ll) = is_val_type_ps(ips)

      do tt = 1,nref_ps(ips)
         if (ll /= lloc) then
            lt  = lt + 1
            ltx = lt
         else
            lt_loc = lt_loc + 1
            ltx    = lt_loc + num_lt_us
         end if
         tn_tmp(ll) = tn_tmp(ll) + 1
         lt_n_us(ll,tn_tmp(ll)) = ltx
         ips_lt_us(ltx) = ips
         n_lt_us  (ltx) = nn
         l_lt_us  (ltx) = ll
         t_lt_us  (ltx) = tt
         ln_lt_us (ltx) = ll
         tn_lt_us (ltx) = tn_tmp(ll)
         if (is_debug_here /= 0) then
            write(IFLOG,*) 'LT: n,l,t,ln,tn,lt,val_type ...', &
               nn,ll,tt,ll,tn_tmp(ll),ltx,is_val_type_ps(ips)
         end if
      end do
   end do
   if (lt /= num_lt_us) then
      write(IFLOG,*) '### ERROR ### lt != num_lt_us'
      ier = 1 ; go to 99
   end if
   if (lt_loc /= num_lt_loc_us) then
      write(IFLOG,*) '### ERROR ### lt_loc != num_lt_loc_us'
      ier = 1 ; go to 99
   end if
  !+++++++++++++++++++
   deallocate(tn_tmp)
  !+++++++++++++++++++
99 continue
   end subroutine set_lt_us

!=============================== added by K. T. =================== 4.0
subroutine set_lt_us_kt(ier)
  use parameters
  implicit none

  integer,intent(out) :: ier
  integer :: lt, nn, ll, tt, ltx, lt_loc, ips, ishell, &
       is_debug_here
  integer :: ispin, itmp
  
  integer,allocatable :: tn_tmp(:,:)
  
  is_debug_here = 1
  if (is_debug /= 0) then
     is_debug_here = is_debug
  end if
  ier = 0
  !++++++++++++++++++++++++++++++++++++++
  allocate(tn_tmp(0:lmax,nspin)) ; tn_tmp = 0
  !++++++++++++++++++++++++++++++++++++++

  lt       = 0
  lt_loc   = 0
  
  do ips = 1,nps
     ishell = ishell_ps(ips,1)
     nn = n_qnum(ishell)
     ll = l_qnum(ishell)
     is_val_type_us(ll) = is_val_type_ps(ips)
     
     ispin = (1-spin(ishell))/2 + 1
     
     do tt = 1,nref_ps(ips)         
        if (ll /= lloc) then
           lt  = lt + 1
           ltx = lt
        else
           lt_loc = lt_loc + 1
           ltx    = lt_loc + num_lt_us
        end if
        
        tn_tmp(ll,ispin) = tn_tmp(ll,ispin) + 1

        itmp = tn_tmp(ll,ispin) + nref_max_us *(ispin-1 )
        lt_n_us( ll, itmp ) = ltx
        
        ips_lt_us(ltx) = ips
        
        n_lt_us  (ltx) = nn
        l_lt_us  (ltx) = ll
        t_lt_us  (ltx) = tt
        spin_index_lt_us  (ltx) = ispin
        
        ln_lt_us (ltx) = ll
        tn_lt_us (ltx) = tn_tmp(ll,ispin)
        
        if (is_debug_here /= 0) then
           write(IFLOG,*) 'LT: n,l,t, s, ln,tn,lt,val_type ...', &
                nn,ll,tt, ispin, ll,tn_tmp(ll,ispin),ltx,is_val_type_ps(ips)
        end if
     end do
  end do
  if (lt /= num_lt_us) then
     write(IFLOG,*) '### ERROR ### lt != num_lt_us'
     ier = 1 ; go to 99
  end if
  if (lt_loc /= num_lt_loc_us) then
     write(IFLOG,*) '### ERROR ### lt_loc != num_lt_loc_us'
     ier = 1 ; go to 99
  end if
  !+++++++++++++++++++
  deallocate(tn_tmp)
  !+++++++++++++++++++

99 continue

end subroutine set_lt_us_kt
! ======================================================================== 4.0


!=====================================================================
   subroutine sizeof_ltt_us(ier)
!=====================================================================
!
!  M. Okamoto
!
!---------------------------------------------------------------------
   use parameters
   implicit none
   integer,intent(out) :: ier
   integer :: ltt, ltt_nc, is_debug_here, &
              n1, l1, t1, n2, l2, t2, ips1, ips2, ll, lt1, lt2
   is_debug_here = 1
   if (is_debug /= 0) then
      is_debug_here = is_debug
   end if
   ier = 0
   ltt    = 0
   ltt_nc = 0

   do lt1 = 1,num_ltx_us
      n1 = n_lt_us(lt1)
      l1 = l_lt_us(lt1)
      t1 = t_lt_us(lt1)
      ips1 = ips_lt_us(lt1)

      do lt2 = 1,num_ltx_us
         n2 = n_lt_us(lt2)
         l2 = l_lt_us(lt2)
         t2 = t_lt_us(lt2)
         ips2 = ips_lt_us(lt2)

         if (l1 == l2) then
            ll = l1
            if (is_val_type_ps(ips1) /= is_val_type_ps(ips2)) then
               write(IFLOG,*) '### ERROR ### val_type are different'
               write(IFLOG,*) '   ips1,(n1,l1,t1)      ...', &
                                  ips1,n1,l1,t1
               write(IFLOG,*) '   ips2,(n2,l2,t2)      ...', &
                                  ips2,n2,l2,t2
               write(IFLOG,*) '   is_val_type_ps(ips1) ...', &
                                  is_val_type_ps(ips1)
               write(IFLOG,*) '   is_val_type_ps(ips2) ...', &
                                  is_val_type_ps(ips2)
               ier = 1 ; go to 99
            end if
            if (is_val_type_ps(ips1) == TM91) then
               ltt_nc = ltt_nc + 1
            else
               ltt = ltt + 1
            end if
         end if
      end do
   end do
   num_ltt_us  = ltt
   num_lttx_us = ltt + ltt_nc
   if (is_debug_here /= 0) then
      write(IFLOG,*) 'LTT: num_lttx_us .....',num_lttx_us
      write(IFLOG,*) 'LTT:    num_ltt_us    ',num_ltt_us
      write(IFLOG,*) 'LTT:    num_ltt_nc_us ',ltt_nc
   end if
99 continue
   end subroutine sizeof_ltt_us

!=====================================================================
   subroutine set_ltt_us(ier)
!=====================================================================
!
!  M. Okamoto
!
!---------------------------------------------------------------------
   use parameters
   implicit none
   integer,intent(out) :: ier
   integer :: ltt, ltt_nc, is_debug_here, &
              n1, l1, t1, n2, l2, t2, ips1, ips2, ll, ips, &
              tn, tm, lt1, lt2, lttx
   is_debug_here = 1
   if (is_debug /= 0) then
      is_debug_here = is_debug
   end if
   ier = 0
   ltt_nm_us(:,:,:) = 0
   l_ltt_us (:) = 0
   t1_ltt_us(:) = 0
   t2_ltt_us(:) = 0
   ltt    = 0
   ltt_nc = 0
   do lt1 = 1,num_ltx_us
      n1 = n_lt_us(lt1)
      l1 = l_lt_us(lt1)
      t1 = t_lt_us(lt1)
      tn = tn_lt_us(lt1)
      ips1 = ips_lt_us(lt1)
      do lt2 = 1,num_ltx_us
         n2 = n_lt_us(lt2)
         l2 = l_lt_us(lt2)
         t2 = t_lt_us(lt2)
         tm = tn_lt_us(lt2)
         ips2 = ips_lt_us(lt2)
         if (l1 == l2) then
            ll = l1
            if (is_val_type_ps(ips1) /= is_val_type_ps(ips2)) then
               write(IFLOG,*) '### ERROR ### val_type are different'
               write(IFLOG,*) '   ips1,(n1,l1,t1)      ...', &
                                  ips1,n1,l1,t1
               write(IFLOG,*) '   ips2,(n2,l2,t2)      ...', &
                                  ips2,n2,l2,t2
               write(IFLOG,*) '   is_val_type_ps(ips1) ...', &
                                  is_val_type_ps(ips1)
               write(IFLOG,*) '   is_val_type_ps(ips2) ...', &
                                  is_val_type_ps(ips2)
               ier = 1 ; go to 99
            end if
            ips = ips1
            if (is_val_type_ps(ips) == TM91) then
               ltt_nc = ltt_nc + 1
               lttx   = ltt_nc + num_ltt_us
            else
               ltt  = ltt + 1
               lttx = ltt
            end if
            ltt_nm_us(ll,tn,tm) = lttx
            l_ltt_us   (lttx) = ll
            ips1_ltt_us(lttx) = ips1 ; ips2_ltt_us(lttx) = ips2
            n1_ltt_us  (lttx) = n1   ; n2_ltt_us  (lttx) = n2
            t1_ltt_us  (lttx) = t1   ; t2_ltt_us  (lttx) = t2
            tn_ltt_us  (lttx) = tn   ; tm_ltt_us  (lttx) = tm
            if (is_debug_here /= 0) then
               write(IFLOG,*) 'LTT: l,t1,t2,tn,tm,ltt ...', &
                                    ll,t1,t2,tn,tm,lttx
            end if
         end if
      end do
   end do
   if (ltt_nc + ltt /= num_lttx_us) then
      write(IFLOG,*) '### ERROR ### ltt_nc + ltt != num_ltt_us'
      ier = 1 ; go to 99
   end if
   if (ltt /= num_ltt_us) then
      write(IFLOG,*) '### ERROR ### ltt != num_ltt_us'
      ier = 1 ; go to 99
   end if
99 continue
   end subroutine set_ltt_us

!================================= added by K. T. ======================== 4.0
subroutine set_ltt_us_kt(ier)
  use parameters
  implicit none

  integer,intent(out) :: ier
  integer :: ltt, ltt_nc, is_debug_here, &
       n1, l1, t1, n2, l2, t2, ips1, ips2, ll, ips, &
       tn, tm, lt1, lt2, lttx
  integer :: ispin1, ispin2, itmp1, itmp2

  is_debug_here = 1
  if (is_debug /= 0) then
     is_debug_here = is_debug
  end if

  ier = 0
  ltt_nm_us(:,:,:) = 0
  l_ltt_us (:) = 0
  t1_ltt_us(:) = 0
  t2_ltt_us(:) = 0
  ltt    = 0
  ltt_nc = 0

  do lt1 = 1,num_ltx_us
     n1 = n_lt_us(lt1)
     l1 = l_lt_us(lt1)
     t1 = t_lt_us(lt1)
     tn = tn_lt_us(lt1)
     ips1 = ips_lt_us(lt1)

     ispin1 = spin_index_lt_us(lt1)

     do lt2 = 1,num_ltx_us
        n2 = n_lt_us(lt2)
        l2 = l_lt_us(lt2)
        t2 = t_lt_us(lt2)
        tm = tn_lt_us(lt2)
        ips2 = ips_lt_us(lt2)

        ispin2 = spin_index_lt_us(lt2)

        if (l1 == l2) then
           ll = l1
           if (is_val_type_ps(ips1) /= is_val_type_ps(ips2)) then
              write(IFLOG,*) '### ERROR ### val_type are different'
              write(IFLOG,*) '   ips1,(n1,l1,t1)      ...', &
                   ips1,n1,l1,t1
              write(IFLOG,*) '   ips2,(n2,l2,t2)      ...', &
                   ips2,n2,l2,t2
              write(IFLOG,*) '   is_val_type_ps(ips1) ...', &
                   is_val_type_ps(ips1)
              write(IFLOG,*) '   is_val_type_ps(ips2) ...', &
                   is_val_type_ps(ips2)
              ier = 1 ; go to 99
           end if

           ips = ips1
           if (is_val_type_ps(ips) == TM91) then
              ltt_nc = ltt_nc + 1
              lttx   = ltt_nc + num_ltt_us
           else
              ltt  = ltt + 1
              lttx = ltt
           end if

           itmp1 = tn + nref_max_us *(ispin1 -1)
           itmp2 = tm + nref_max_us *(ispin2 -1)

           ltt_nm_us(ll,itmp1,itmp2) = lttx

           l_ltt_us   (lttx) = ll
           ips1_ltt_us(lttx) = ips1 ; ips2_ltt_us(lttx) = ips2
           n1_ltt_us  (lttx) = n1   ; n2_ltt_us  (lttx) = n2
           t1_ltt_us  (lttx) = t1   ; t2_ltt_us  (lttx) = t2
           tn_ltt_us  (lttx) = tn   ; tm_ltt_us  (lttx) = tm

           spin1_index_ltt_us(lttx) = ispin1
           spin2_index_ltt_us(lttx) = ispin2

           if (is_debug_here /= 0) then
              write(IFLOG,*) 'LTT: l,t1,s1,t2,s2,tn,tm,ltt ...', &
                   ll,t1, ispin1, t2, ispin2, tn,tm,lttx
           end if
        end if
     end do
  end do

  if (ltt_nc + ltt /= num_lttx_us) then
     write(IFLOG,*) '### ERROR ### ltt_nc + ltt != num_ltt_us'
     ier = 1 ; go to 99
  end if
  if (ltt /= num_ltt_us) then
     write(IFLOG,*) '### ERROR ### ltt != num_ltt_us'
     ier = 1 ; go to 99
  end if

99 continue

end subroutine set_ltt_us_kt
! ============================================================================ 4.0

!=====================================================================
   subroutine sizeof_ltlt_us(ier)
!=====================================================================
!
!  M. Okamoto
!
!---------------------------------------------------------------------
   use parameters
   implicit none
   integer,intent(out) :: ier
   integer :: ltlt, ltlt_nc, ltltx, is_debug_here, &
              l1, t1, l2, t2, l3, ips1, ips2, n1, n2, &
              ln, lm, tn, tm, lt1, lt2
   is_debug_here = 1
   if (is_debug /= 0) then
      is_debug_here = is_debug
   end if
   ier = 0
   ltlt = 0
   do lt1 = 1,num_ltx_us
      ips1 = ips_lt_us(lt1)
      n1   = n_lt_us  (lt1)
      l1   = l_lt_us  (lt1)
      t1   = t_lt_us  (lt1)
      ln   = ln_lt_us (lt1)
      tn   = tn_lt_us (lt1)
      if (ln == lloc) then
         cycle
      end if
      do lt2 = 1,num_ltx_us
         ips2 = ips_lt_us(lt2)
         n2   = n_lt_us  (lt2)
         l2   = l_lt_us  (lt2)
         t2   = t_lt_us  (lt2)
         lm   = ln_lt_us (lt2)
         tm   = tn_lt_us (lt2)
         if (lm == lloc) then
            cycle
         end if
         if (ln > lm) then
            cycle
         end if
         if ((ln == lm).and.(tn > tm)) then
            cycle
         end if
         if ((is_val_type_us(ln) == TM91).and. &
             (is_val_type_us(lm) == TM91).and. &
             (tn == 1).and. &
             (tm == 1)) then
            cycle
         end if
         ltlt = ltlt + 1
      end do
   end do
   num_ltlt_us = ltlt
   ltlt_nc = 0
   ltlt    = 0
   l3_max_us = 0
   do lt1 = 1,num_ltx_us
      ips1 = ips_lt_us(lt1)
      n1   = n_lt_us  (lt1)
      l1   = l_lt_us  (lt1)
      t1   = t_lt_us  (lt1)
      ln   = ln_lt_us (lt1)
      tn   = tn_lt_us (lt1)
      if (ln == lloc) then
         cycle
      end if
      do lt2 = 1,num_ltx_us
         ips2 = ips_lt_us(lt2)
         n2   = n_lt_us  (lt2)
         l2   = l_lt_us  (lt2)
         t2   = t_lt_us  (lt2)
         lm   = ln_lt_us (lt2)
         tm   = tn_lt_us (lt2)
         if (lm == lloc) then
            cycle
         end if
         if (ln > lm) then
            cycle
         end if
         if ((ln == lm).and.(tn > tm)) then
            cycle
         end if
         if ((is_val_type_us(ln) == TM91).and. &
             (is_val_type_us(lm) == TM91).and. &
             (tn == 1).and. &
             (tm == 1)) then
            ltlt_nc = ltlt_nc + 1
            ltltx   = ltlt_nc + num_ltlt_us  
         else
            ltlt  = ltlt + 1    
            ltltx = ltlt
         end if
         do l3 = abs(ln-lm),ln+lm,2
            if (l3_max_us < l3) then
               l3_max_us = l3
            end if
         end do
      end do
   end do
   num_ltlt_us  = ltlt
   num_ltltx_us = ltlt + ltlt_nc
   if (is_debug_here /= 0) then
      write(IFLOG,*) 'LTLT: num_ltltx_us ....',num_ltltx_us
      write(IFLOG,*) 'LTLT:    num_ltlt_us   ',num_ltlt_us
      write(IFLOG,*) 'LTLT:    num_ltlt_nc_us',ltlt_nc
      write(IFLOG,*) 'LTLT: l3_max_us .......',l3_max_us
   end if
99 continue
   end subroutine sizeof_ltlt_us

!=====================================================================
   subroutine set_ltlt_us(ier)
!=====================================================================
!
!  M. Okamoto
!
!---------------------------------------------------------------------
   use parameters
   implicit none
   integer,intent(out) :: ier
   integer :: ltlt, ltlt_nc, ltltx, l3_max, is_debug_here, &
              n1, l1, t1, n2, l2, t2, l3, ips1, ips2, &
              ln, lm, tn, tm, lt1, lt2
      
   is_debug_here = 1
   if (is_debug /= 0) then
      is_debug_here = is_debug
   end if
   ier = 0
   ltlt_nm_us(:,:,:,:) = 0
   ips1_ltlt_us(:) = 0 ; ips2_ltlt_us(:) = 0
   n1_ltlt_us(:) = 0   ; n2_ltlt_us(:) = 0
   l1_ltlt_us(:) = 0   ; l2_ltlt_us(:) = 0
   t1_ltlt_us(:) = 0   ; t2_ltlt_us(:) = 0
   ln_ltlt_us(:) = 0   ; lm_ltlt_us(:) = 0
   tn_ltlt_us(:) = 0   ; tm_ltlt_us(:) = 0
   ltlt_nc = 0
   ltlt    = 0
   l3_max  = 0
   do lt1 = 1,num_ltx_us
      ips1 = ips_lt_us(lt1)
      n1   = n_lt_us  (lt1)
      l1   = l_lt_us  (lt1)
      t1   = t_lt_us  (lt1)
      ln   = ln_lt_us (lt1)
      tn   = tn_lt_us (lt1)
      if (ln == lloc) then  
         cycle
      end if
      do lt2 = 1,num_ltx_us
         ips2 = ips_lt_us(lt2)
         n2   = n_lt_us  (lt2)
         l2   = l_lt_us  (lt2)
         t2   = t_lt_us  (lt2)
         lm   = ln_lt_us (lt2)
         tm   = tn_lt_us (lt2)
         if (lm == lloc) then
            cycle
         end if
         if (ln > lm) then
            cycle
         end if
         if ((ln == lm).and.(tn > tm)) then
            cycle
         end if
         if ((is_val_type_us(ln) == TM91).and. &
             (is_val_type_us(lm) == TM91).and. &
             (tn == 1).and. &
             (tm == 1)) then      
            ltlt_nc = ltlt_nc + 1
            ltltx   = ltlt_nc + num_ltlt_us
         else
            ltlt  = ltlt + 1
            ltltx = ltlt
         end if
         ltlt_nm_us(ln,tn,lm,tm) = ltltx
         ltlt_nm_us(lm,tm,ln,tn) = ltltx
         ips1_ltlt_us(ltltx) = ips1 ; ips2_ltlt_us(ltltx) = ips2
         n1_ltlt_us  (ltltx) = n1   ; n2_ltlt_us  (ltltx) = n2
         l1_ltlt_us  (ltltx) = l1   ; l2_ltlt_us  (ltltx) = l2
         t1_ltlt_us  (ltltx) = t1   ; t2_ltlt_us  (ltltx) = t2
         ln_ltlt_us  (ltltx) = ln   ; lm_ltlt_us  (ltltx) = lm
         tn_ltlt_us  (ltltx) = tn   ; tm_ltlt_us  (ltltx) = tm
         do l3 = abs(ln-lm),ln+lm,2
            if (l3_max < l3) then
               l3_max = l3
            end if
         end do
         if (is_debug_here /= 0) then
            write(IFLOG,*) 'LTLT: ln,tn,lm,tm,ltlt ...', &
                                  ln,tn,lm,tm,ltltx
         end if
      end do
   end do
   num_ltltx_us = ltlt + ltlt_nc
   if (is_debug_here /= 0) then
      write(IFLOG,*) 'LTLT: num_ltltx_us',num_ltltx_us
   end if
   if (ltlt /= num_ltlt_us) then
      write(IFLOG,*) '### ERROR ### ltlt != num_ltlt_us'
      ier = 1 ; go to 99
   end if
   if (ltlt + ltlt_nc /= num_ltltx_us) then
      write(IFLOG,*) &
         '### ERROR ### ltlt + ltlt_nc != num_ltltx_us'
      ier = 1 ; go to 99
   end if
   if (l3_max /= l3_max_us) then
      write(IFLOG,*) '### ERROR ### ll3_max_us != l3_max_us'
      ier = 1 ; go to 99
   end if
99 continue
   end subroutine set_ltlt_us

!================================ added by K. T. ======================== 4.0
subroutine set_ltlt_us_kt(ier)
  use parameters
  implicit none

  integer,intent(out) :: ier
  integer :: ltlt, ltlt_nc, ltltx, l3_max, is_debug_here, &
       n1, l1, t1, n2, l2, t2, l3, ips1, ips2, &
       ln, lm, tn, tm, lt1, lt2
  integer :: ispin1, ispin2, itmp1, itmp2
  
  is_debug_here = 1
  if (is_debug /= 0) then
     is_debug_here = is_debug
  end if

  ier = 0
  ltlt_nm_us(:,:,:,:) = 0
  ips1_ltlt_us(:) = 0 ; ips2_ltlt_us(:) = 0
  n1_ltlt_us(:) = 0   ; n2_ltlt_us(:) = 0
  l1_ltlt_us(:) = 0   ; l2_ltlt_us(:) = 0
  t1_ltlt_us(:) = 0   ; t2_ltlt_us(:) = 0
  ln_ltlt_us(:) = 0   ; lm_ltlt_us(:) = 0
  tn_ltlt_us(:) = 0   ; tm_ltlt_us(:) = 0
  ltlt_nc = 0
  ltlt    = 0
  l3_max  = 0

  do lt1 = 1,num_ltx_us
     ips1 = ips_lt_us(lt1)
     n1   = n_lt_us  (lt1)
     l1   = l_lt_us  (lt1)
     t1   = t_lt_us  (lt1)
     ln   = ln_lt_us (lt1)
     tn   = tn_lt_us (lt1)

     ispin1 = spin_index_lt_us(lt1)

     if (ln == lloc) then  
        cycle
     end if

     do lt2 = 1,num_ltx_us
        ips2 = ips_lt_us(lt2)
        n2   = n_lt_us  (lt2)
        l2   = l_lt_us  (lt2)
        t2   = t_lt_us  (lt2)
        lm   = ln_lt_us (lt2)
        tm   = tn_lt_us (lt2)

        ispin2 = spin_index_lt_us(lt2)

        if (lm == lloc) then
           cycle
        end if

        if (ln > lm) then
           cycle
        end if

        if ((ln == lm).and.(tn > tm)) then
           cycle
        end if

        if ((is_val_type_us(ln) == TM91).and. &
             (is_val_type_us(lm) == TM91).and. &
             (tn == 1).and. &
             (tm == 1)) then      
           ltlt_nc = ltlt_nc + 1
           ltltx   = ltlt_nc + num_ltlt_us
        else
           ltlt  = ltlt + 1
           ltltx = ltlt
        end if
        
        itmp1 = tn + nref_max_us *(ispin1 -1)
        itmp2 = tm + nref_max_us *(ispin2 -1)

        ltlt_nm_us(ln,itmp1,lm,itmp2) = ltltx
        ltlt_nm_us(lm,itmp2,ln,itmp1) = ltltx

        ips1_ltlt_us(ltltx) = ips1 ; ips2_ltlt_us(ltltx) = ips2
        n1_ltlt_us  (ltltx) = n1   ; n2_ltlt_us  (ltltx) = n2
        l1_ltlt_us  (ltltx) = l1   ; l2_ltlt_us  (ltltx) = l2
        t1_ltlt_us  (ltltx) = t1   ; t2_ltlt_us  (ltltx) = t2
        ln_ltlt_us  (ltltx) = ln   ; lm_ltlt_us  (ltltx) = lm
        tn_ltlt_us  (ltltx) = tn   ; tm_ltlt_us  (ltltx) = tm

        spin1_index_ltlt_us( ltltx ) = ispin1
        spin2_index_ltlt_us( ltltx ) = ispin2

        do l3 = abs(ln-lm),ln+lm,2
           if (l3_max < l3) then
              l3_max = l3
           end if
        end do
        if (is_debug_here /= 0) then
           write(IFLOG,*) 'LTLT: ln,tn,sn,lm,tm,sm,ltlt ...', &
                ln,tn,ispin1,lm,tm,ispin2,ltltx
        end if
     end do
  end do

  num_ltltx_us = ltlt + ltlt_nc
  if (is_debug_here /= 0) then
     write(IFLOG,*) 'LTLT: num_ltltx_us',num_ltltx_us
  end if
  if (ltlt /= num_ltlt_us) then
     write(IFLOG,*) '### ERROR ### ltlt != num_ltlt_us'
     ier = 1 ; go to 99
  end if
  if (ltlt + ltlt_nc /= num_ltltx_us) then
     write(IFLOG,*) &
          '### ERROR ### ltlt + ltlt_nc != num_ltltx_us'
     ier = 1 ; go to 99
  end if
  if (l3_max /= l3_max_us) then
     write(IFLOG,*) '### ERROR ### ll3_max_us != l3_max_us'
     ier = 1 ; go to 99
  end if

99 continue

end subroutine set_ltlt_us_kt
! ======================================================================== 4.0

!=====================================================================
   subroutine set_param_us(ier)
!=====================================================================
!
!  M. Okamoto
!
!---------------------------------------------------------------------
   use parameters
   implicit none
   integer,intent(out) :: ier
   integer :: nn, ll, tt, ln, tn, ips, ishell, nrc, ir, &
              is_debug_here, lt
   real(8) :: r, ee, sum
   is_debug_here = 1
   if (is_debug /= 0) then
      is_debug_here = is_debug
   end if
   ier = 0
   is_bound_ps(:) = 0
   do lt = 1,num_ltx_us
      ips = ips_lt_us(lt)
      tt = t_lt_us(lt)
      ishell = ishell_ps(ips,1)
      if (is_solve(ishell) /= 0) then
         if (abs(deref_ps(ips,tt)) < 1.d-6) then
            is_bound_us(lt) = 1
         else
            is_bound_us(lt) = 0
         end if
      else
         is_bound_us(lt) = 0
      end if
      is_bound_ps(ips) = max(is_bound_ps(ips),is_bound_us(lt))
   end do
   nrcut_phi_max_us = 0
   norm_psi_ps(:) = 0.d0
   do lt = 1,num_ltx_us
      ips = ips_lt_us(lt)
      nn  = n_lt_us(lt)
      ll  = l_lt_us(lt)
      tt  = t_lt_us(lt)
      ln  = ln_lt_us(lt)
      tn  = tn_lt_us(lt)
      ishell = ishell_ps(ips,1)
      if (abs(engy(ishell)) < 1.d-10) then
         ee = efermi        
      else                  
         ee = engy(ishell)
      end if
      eref_us(lt) = deref_ps(ips,tt) + ee
      if (is_debug_here /= 0) then
         write(IFLOG,*) 'US: l,t,eref_us ......',ll,tt,eref_us(lt)
      end if
      SRCH_RC:do ir = nmesh,10,-1
         r = rpos(ir)
         if (r < rcut_phi_ps(ips,tt)) then
            nrc = ir ; exit SRCH_RC
         end if
         if (ir <= 10) then
            write(IFLOG,*) '### ERROR ### nrc was not found'
            write(IFLOG,*) '   nn ...',nn
            write(IFLOG,*) '   ll ...',ll
            write(IFLOG,*) '   tt ...',tt
            ier = 1 ; go to 99
         end if
      end do SRCH_RC
      nrcut_phi_us(lt) = nrc
      if (nrcut_phi_max_us < nrcut_phi_us(lt)) then
         nrcut_phi_max_us = nrcut_phi_us(lt)
      end if
      if (is_debug_here /= 0) then
         write(IFLOG,*) 'US: l,t,nrcut_phi_us .....', &
                             ll,tt,nrcut_phi_us(lt),l_lt_us(lt),ln_lt_us(lt)
      end if
   end do
   norm_psi_ps(:) = 1.d0
   do lt = 1,num_ltx_us
      ips = ips_lt_us(lt)
      ishell = ishell_ps(ips,1)                       !AAS 2009
      if (is_bound_us(lt) == 1) then
         nrc = nrcut_phi_us(lt)
         call set_weight_exp(ier,1,nrc,rpos,wt)
         sum = 0.d0
         do ir = 1,nrc
            sum = sum + chi_g(ir,ishell)**2 * wt(ir)
         end do
         norm_psi_ps(ips) = sqrt(sum)
      end if
      write(IFLOG,*) 'US: ips,norm_psi_ps(ips) ...', &
                          ips,norm_psi_ps(ips)
   end do
   select case (lloc)
   case (0:3)
      nrcut_loc_us = nrcut_phi_max_us
   case (5)
      nrcut_loc_us = 0
   case (6)
      nrcut_loc_us = 0
   case default
      write(IFLOG,*) '### ERROR ### lloc is out of range'
      write(IFLOG,*) '   lloc ...',lloc
      ier = 1 ; go to 99
   end select
   nrcut_max_us  = max(nrcut_phi_max_us, nrcut_loc_us)
   nrcut_maxx_us = nrcut_max_us + 15
99 continue
   end subroutine set_param_us

!=====================================================================
   subroutine calc_rpsi_us(ier)
!=====================================================================
!
!  M. Okamoto
!
!---------------------------------------------------------------------
   use parameters
   implicit none
   integer,intent(out) :: ier
   integer :: ir, ishell, ll, nn, kk, tt, ispin, id, ln, tn, &
              ll_core, node, node_sum, nrt, nrc, ips, lt
   real(8) :: ee, g_norm, sum, rcut, sign_mod, g1, eref, deref
   ier = 0
  !++++++++++++++++++++++++++++++++++++++++
   allocate( &
      chi_gl(nmesh),chi_gr(nmesh),       &
      chi_fl(nmesh),chi_fr(nmesh),       &
      dxchi_gl(nmesh),dxchi_gr(nmesh),   &
      dxchi_fl(nmesh),dxchi_fr(nmesh))
      chi_gl   = 0.d0 ; chi_gr   = 0.d0
      chi_fl   = 0.d0 ; chi_fr   = 0.d0
      dxchi_gl = 0.d0 ; dxchi_gr = 0.d0
      dxchi_fl = 0.d0 ; dxchi_fr = 0.d0
  !++++++++++++++++++++++++++++++++++++++++
   call calc_vcoeff(ier)
   if (ier /= 0) then
      write(IFLOG,*) '### ERROR ### in calc_vcoeff' ; go to 99
   end if
   write(IFLOG,*)
   write(IFLOG,*) 'Calculating unbound states ...'
MAIN1:do lt = 1,num_ltx_us
      ips = ips_lt_us(lt)
      tt = t_lt_us(lt)
      ishell = ishell_ps(ips,1)
      nn = n_qnum(ishell)
      ll = l_qnum(ishell)
      kk = k_qnum(ishell)

      ispin = (1-spin(ishell))/2 + 1

      ln = ln_lt_us(lt)
      tn = tn_lt_us(lt)
      eref  = eref_us(lt)
      deref = deref_ps(ips,tt)
      if (is_bound_us(lt) == 1) then
         rpsi_us(:,lt) = chi_g(:,ishell)
         cycle MAIN1
      else
         rpsi_us(:,lt) = 0.d0
         node = nn - ll - 1
      end if
      write(IFLOG,*)
      write(IFLOG,'(1x,a31,5i5)') &
            '??? ishell,ispin,nn,ll,node ...',ishell,ispin,nn,ll,node
      ee = eref_us(lt)
      if (is_core == PATOM) then
         ll_core = ll
      else
         ll_core = lmax_core
      end if
      nrc  = nrcut_phi_us(lt)
      rcut = rpos(nrc)
      nrt = nrcut_maxx_us 
      id = +1
      call set_initpoints_left(ier,ll,kk,ispin,ee)
      if (ier /= 0) then
         write(IFLOG,*) '### ERROR ### in set_initpoints_left'
         go to 99
      end if
      call int_from_left(ier,node_sum,nrt,id,ll,kk,ee,ispin)
      if (ier /= 0) then
         write(IFLOG,*) '### ERROR ### in int_from_left' ; go to 99
      end if
      if (node_sum > node) then 
         write(IFLOG,*) '### CAUTION ### node_sum > node'
         write(IFLOG,*) '   node_sum ...',node_sum
         write(IFLOG,*) '   node     ...',node
      end if
      call set_weight_exp(ier,1,nrc,rpos,wt)
      select case (is_calc)
      case (NONREL,SREL)
         sum = 0.d0
         do ir = 1,nrc
            sum = sum + chi_gl(ir)**2 * wt(ir)
         end do
      case (REL)
         sum = 0.d0
         do ir = 1,nrc
            sum = sum + (chi_gl(ir)**2 + chi_fl(ir)**2) * wt(ir)
         end do
      end select
      g1 = chi_gl(nrc)
      if (g1 < 0.d0) then
         write(IFLOG,*) '### CAUTION ### g1 < 0'
         write(IFLOG,*) '   g1 (Hamman) ...',g1
      end if
      g_norm = sqrt(sum)
      if (g1 > 0.d0) then
         sign_mod =  1.d0
      else
         sign_mod = -1.d0
      end if
      select case (is_calc)
      case (NONREL,SREL)
         do ir = 1,nrt
            rpsi_us(ir,lt) = chi_gl(ir)/g_norm * sign_mod &
                              * norm_psi_ps(ips)
         end do
         call set_weight_exp(ier,1,nrc,rpos,wt)
         sum = 0.d0
         do ir = 1,nrc
            sum = sum + rpsi_us(ir,lt)**2 * wt(ir)
         end do
         write(IFLOG,*) &
            '[calc_rpsi_us] norm of rpsi_us     ...', sqrt(sum)
         write(IFLOG,*) &
            '[calc_rpsi_us]    norm_psi_ps      ...',norm_psi_ps(ips)
         write(IFLOG,*) &
            '[calc_rpsi_us]    lt, ips, (n,l,t) ...',lt,ips,nn,ll,tt
        !--- MO-100528 [begin]
         !if (is_val_type_ps(ips) == US90) then
         if (is_bound_us(lt) == 0) then
            call opt_ub_to_bound(ier,IFLOG,IFSUM,nn,ll,tt,eref,deref, &
               nmesh,rpos,nrc,rpsi_us(1,lt),rpsi_us(1,lt))
            if (ier /= 0) then
               write(IFLOG,*) '### ERROR ### in opt_ub_to_bound'
               go to 99
            end if
         end if
        !--- MO-100528 [end]
      case (REL) 
         do ir = 1,nrt
            rpsi_us(ir,lt) = chi_gl(ir)/g_norm * sign_mod &
                              * norm_psi_ps(ips)
         end do
      end select
end do MAIN1
   do lt = 1,num_ltx_us
   do ir = 1,nmesh
      if (abs(rpsi_us(ir,lt)) < 1.d-99) then
         rpsi_us(ir,lt) = 0.d0
      end if
   end do
   end do
99 continue
  !++++++++++++++++++++++++++++++++++++++++++
   deallocate(chi_gl,chi_gr,chi_fl,chi_fr, &
      dxchi_gl,dxchi_gr,dxchi_fl,dxchi_fr)
  !++++++++++++++++++++++++++++++++++++++++++
   end subroutine calc_rpsi_us

!=====================================================================
   subroutine calc_ncpp_us(ier)
!=====================================================================
!
!  M. Okamoto
!
!---------------------------------------------------------------------
   use parameters
   implicit none
   integer,intent(out) :: ier
   integer :: ir, nn, ll, tt, tn, ips, ishell, nrc, ll_core, &
              is_bound_state, nk_tm, n1, n2, isdiff, icoeff, lt, nodes
   real(8) :: eref, deref, gcut0, gcut1, gcut2, gcut3, gcut4, rcut, &
              sumpsi_exact, sumphi_num
   real(8),allocatable :: drpsi_tmp(:), rphi_tmp(:)
   ier = 0
   nk_tm = 4
   ncoeff_phi_tm = 6
  !++++++++++++++++++++++++++++++++++++++++++++++++++++++++
   allocate(coeff_tm(0:12,nps,2))
   allocate(coeff_phi_tm(0:ncoeff_phi_tm,nps,2))
   allocate(drpsi_tmp(0:nk_tm),rphi_tmp(nmesh))
      coeff_tm  = 0.d0 ; coeff_phi_tm = 0.d0
      drpsi_tmp = 0.d0 ; rphi_tmp     = 0.d0
  !++++++++++++++++++++++++++++++++++++++++++++++++++++++++
   coeff_phi_tm(:,:,:) = 0.d0
LOOP_TM:do lt = 1,num_ltx_us
      ips = ips_lt_us(lt)
      tt = t_lt_us(lt)
      tn = tn_lt_us(lt)
      ishell = ishell_ps(ips,1)
      nn = n_qnum(ishell)
      ll = l_qnum(ishell)
      if (is_val_type_ps(ips) /= TM91) then
         coeff_tm(:,ips,1) = 0.d0
         cycle LOOP_TM
      end if
      write(IFLOG,*)
      write(IFLOG,'(1x,a23,6i5)') &
            '??? lt,ips,nn,ll,tt ...',lt,ips,nn,ll,tt

      if ((ll == lloc).and.(tn /= 1)) then
         write(IFLOG,*) '### ERROR ### tn != 1 for NC-PP local orbital'
         write(IFLOG,*) '   (n,l,t) ...',nn,ll,tt
         write(IFLOG,*) '   tn      ...',tn
         ier = 1 ; go to 99
      end if

      nrc   = nrcut_phi_us(lt)
      rcut  = rpos(nrc)
      eref  = eref_us(lt)
      deref = deref_ps(ips,tt)
      if (is_core == PATOM) then
         ll_core = ll
      else
         ll_core = lmax_core
      end if
      if (is_solve(ishell) /= 0) then
         if (abs(eref-engy(ishell)) < 1.d-6) then
            is_bound_state = 1
!--- MO-110228 [begin]
            is_bound_us(lt) = 1
            is_bound_us(ips) = 1
!--- MO-110228 [end]
         else
            is_bound_state = 0
!--- MO-110228 [begin]
            is_bound_us(lt) = 0
            is_bound_us(ips) = 0
!--- MO-110228 [end]
         end if
      else
         is_bound_state = 0
!--- MO-110228 [begin]
         is_bound_us(lt) = 0
         is_bound_us(ips) = 0
!--- MO-110228 [end]
      end if
      n1 = nrc - 10 ; n2 = nrc + 10
      if (n1 < 1) then
         n1 = 1 ; n2 = 1 + 10*2
      else if (n2 > nmesh) then
         n1 = nmesh - 10*2 ; n2 = nmesh
      end if
      isdiff = 4
      call diff4_exp(ier,isdiff,n1,n2,rpos,rpsi_us(1,lt),rcut, &
                     gcut0,gcut1,gcut2,gcut3,gcut4)
      if (gcut0 < 0.d0) then
         write(IFLOG,*) '### ERROR ### gcut0 < 0'
         write(IFLOG,*) '   gcut0 ...',gcut0
         write(IFLOG,*) '   gcut1 ...',gcut1
         write(IFLOG,*) '   gcut2 ...',gcut2
         write(IFLOG,*) '   gcut3 ...',gcut3
         write(IFLOG,*) '   gcut4 ...',gcut4
         ier = 1 ; go to 99
      end if
      call calc_tmpp_us(ier,IFLOG,is_bound_state, &
           nmesh,rpos,wr,wt, &
           rpsi_us(1,lt),veff(1,1,ll_core), &
           rphi_us(1,lt),veff_us(1,lt), &
           ll,eref,nrc,gcut0,gcut1,gcut2,gcut3,gcut4, &
           coeff_tm(0,ips,1))
      if (ier /= 0) then
         write(IFLOG,*) '### ERROR ### in calc_tmpp_us'
         go to 99
      end if
      chi_ps (:,ips,1) = rphi_us(:,lt)
      veff_ps(:,ips,1) = veff_us(:,lt)
      call calc_nodes_us(nmesh,rphi_us(1,lt),nrc,nodes)
      nodes_phi_us(lt) = nodes
      call set_weight_exp(ier,1,nrc,rpos,wt)
      sumpsi_exact = 0.d0
      sumphi_num   = 0.d0
      do ir = 1,nrc
         sumpsi_exact = sumpsi_exact + rpsi_us(ir,lt)**2 * wt(ir)
         sumphi_num   = sumphi_num   + rphi_us(ir,lt)**2 * wt(ir)
      end do
      norm_psi_us(lt) = sqrt(sumpsi_exact)
      norm_phi_us(lt) = sqrt(sumphi_num)
      drpsi_tmp(0) = gcut0 ; drpsi_tmp(1) = gcut1
      drpsi_tmp(2) = gcut2 ; drpsi_tmp(3) = gcut3
      drpsi_tmp(4) = gcut4
      rphi_tmp(:) = rphi_us(:,lt) 
      do icoeff = 0,ncoeff_phi_tm
         coeff_phi_tm(icoeff,ips,1) = coeff_tm(2*icoeff,ips,1)
      end do
      call check_rphi_tm(ier,ips,nn,ll,tt,eref,deref,nrc,nk_tm, &
                         drpsi_tmp,rphi_tmp, &
                         sumpsi_exact,sumphi_num)
end do LOOP_TM
   do lt = 1,num_ltx_us
   do ir = 1,nmesh
      if (abs(rpsi_us(ir,lt)) < 1.d-99) then
         rpsi_us(ir,lt) = 0.d0
      end if
      if (abs(rphi_us(ir,lt)) < 1.d-99) then
         rphi_us(ir,lt) = 0.d0
      end if
      if (abs(veff_us(ir,lt)) < 1.d-99) then
         veff_us(ir,lt) = 0.d0
      end if
   end do
   end do
99 continue
  !++++++++++++++++++++++++++++++++++++++++
   deallocate(coeff_tm,drpsi_tmp,rphi_tmp)
  !++++++++++++++++++++++++++++++++++++++++
   end subroutine calc_ncpp_us

!============================= added by K. T. =========================== 4.0
subroutine calc_ncpp_us_kt(ier)
  use parameters
  implicit none

  integer,intent(out) :: ier
  integer :: ir, nn, ll, tt, tn, ips, ishell, nrc, ll_core, &
       is_bound_state, nk_tm, n1, n2, isdiff, icoeff, lt, nodes

  integer :: ispin

  real(8) :: eref, deref, gcut0, gcut1, gcut2, gcut3, gcut4, rcut, &
       sumpsi_exact, sumphi_num
  real(8),allocatable :: drpsi_tmp(:), rphi_tmp(:)

  ier = 0
  nk_tm = 4
  ncoeff_phi_tm = 6

  !++++++++++++++++++++++++++++++++++++++++++++++++++++++++
  allocate(coeff_tm(0:12,nps,2))
  allocate(coeff_phi_tm(0:ncoeff_phi_tm,nps,2))
  allocate(drpsi_tmp(0:nk_tm),rphi_tmp(nmesh))
  coeff_tm  = 0.d0 ; coeff_phi_tm = 0.d0
  drpsi_tmp = 0.d0 ; rphi_tmp     = 0.d0

  !++++++++++++++++++++++++++++++++++++++++++++++++++++++++
  coeff_phi_tm(:,:,:) = 0.d0

  LOOP_TM:do lt = 1,num_ltx_us
     ips = ips_lt_us(lt)
     tt = t_lt_us(lt)
     tn = tn_lt_us(lt)

     ishell = ishell_ps(ips,1)

     nn = n_qnum(ishell)
     ll = l_qnum(ishell)

     ispin = (1-spin(ishell))/2 + 1

     if (is_val_type_ps(ips) /= TM91) then
        coeff_tm(:,ips,1) = 0.d0
        cycle LOOP_TM
     end if
     
     write(IFLOG,*)
     write(IFLOG,'(1x,a23,6i5)') &
          '??? lt,ips,nn,ll,tt ...',lt,ips,nn,ll,tt
     
     if ( nspin == 1 ) then
        if ((ll == lloc).and.(tn /= 1)) then
           write(IFLOG,*) '### ERROR ### tn != 1 for NC-PP local orbital'
           write(IFLOG,*) '   (n,l,t) ...',nn,ll,tt
           write(IFLOG,*) '   tn      ...',tn
           ier = 1 ; go to 99
        end if
     else
        if ((ll == lloc).and.(tn >2 .or. tn <1) ) then
           write(IFLOG,*) '### ERROR ### tn != 1 for NC-PP local orbital'
           write(IFLOG,*) '   (n,l,t) ...',nn,ll,tt
           write(IFLOG,*) '   tn      ...',tn
           ier = 1 ; go to 99
        end if
     endif
      
     nrc   = nrcut_phi_us(lt)
     rcut  = rpos(nrc)
     eref  = eref_us(lt)
     deref = deref_ps(ips,tt)

     if (is_core == PATOM) then
        ll_core = ll
     else
        ll_core = lmax_core
     end if

     if (is_solve(ishell) /= 0) then
        
        if (abs(eref-engy(ishell)) < 1.d-6) then
           is_bound_state = 1
!--- MO-110228 [begin]
           is_bound_us(lt) = 1
           is_bound_us(ips) = 1
!--- MO-110228 [end]
        else
           is_bound_state = 0
!--- MO-110228 [begin]
           is_bound_us(lt) = 0
           is_bound_us(ips) = 0
!--- MO-110228 [end]
        end if
     else
        is_bound_state = 0
!--- MO-110228 [begin]
        is_bound_us(lt) = 0
        is_bound_us(ips) = 0
!--- MO-110228 [end]
     end if

     n1 = nrc - 10 ; n2 = nrc + 10

     if (n1 < 1) then
        n1 = 1 ; n2 = 1 + 10*2
     else if (n2 > nmesh) then
        n1 = nmesh - 10*2 ; n2 = nmesh
     end if

     isdiff = 4
     call diff4_exp(ier,isdiff,n1,n2,rpos,rpsi_us(1,lt),rcut, &
          gcut0,gcut1,gcut2,gcut3,gcut4)

     if (gcut0 < 0.d0) then
        write(IFLOG,*) '### ERROR ### gcut0 < 0'
        write(IFLOG,*) '   gcut0 ...',gcut0
        write(IFLOG,*) '   gcut1 ...',gcut1
        write(IFLOG,*) '   gcut2 ...',gcut2
        write(IFLOG,*) '   gcut3 ...',gcut3
        write(IFLOG,*) '   gcut4 ...',gcut4
        ier = 1 ; go to 99
     end if

     call calc_tmpp_us(ier,IFLOG,is_bound_state, &
          nmesh,rpos,wr,wt, &
          rpsi_us(1,lt),veff(1,ispin,ll_core), &
          rphi_us(1,lt),veff_us(1,lt), &
          ll,eref,nrc,gcut0,gcut1,gcut2,gcut3,gcut4, &
          coeff_tm(0,ips,1))

     if (ier /= 0) then
        write(IFLOG,*) '### ERROR ### in calc_tmpp_us'
        go to 99
     end if

     chi_ps (:,ips,1) = rphi_us(:,lt)
     veff_ps(:,ips,1) = veff_us(:,lt)

     call calc_nodes_us(nmesh,rphi_us(1,lt),nrc,nodes)
     nodes_phi_us(lt) = nodes

     call set_weight_exp(ier,1,nrc,rpos,wt)
     sumpsi_exact = 0.d0
     sumphi_num   = 0.d0

     do ir = 1,nrc
        sumpsi_exact = sumpsi_exact + rpsi_us(ir,lt)**2 * wt(ir)
        sumphi_num   = sumphi_num   + rphi_us(ir,lt)**2 * wt(ir)
     end do

     norm_psi_us(lt) = sqrt(sumpsi_exact)
     norm_phi_us(lt) = sqrt(sumphi_num)
     drpsi_tmp(0) = gcut0 ; drpsi_tmp(1) = gcut1
     drpsi_tmp(2) = gcut2 ; drpsi_tmp(3) = gcut3
     drpsi_tmp(4) = gcut4
     rphi_tmp(:) = rphi_us(:,lt) 

     do icoeff = 0,ncoeff_phi_tm
        coeff_phi_tm(icoeff,ips,1) = coeff_tm(2*icoeff,ips,1)
     end do
     call check_rphi_tm(ier,ips,nn,ll,tt,eref,deref,nrc,nk_tm, &
          drpsi_tmp,rphi_tmp, &
          sumpsi_exact,sumphi_num)
  end do LOOP_TM

  do lt = 1,num_ltx_us
     do ir = 1,nmesh
        if (abs(rpsi_us(ir,lt)) < 1.d-99) then
           rpsi_us(ir,lt) = 0.d0
        end if
        if (abs(rphi_us(ir,lt)) < 1.d-99) then
           rphi_us(ir,lt) = 0.d0
        end if
        if (abs(veff_us(ir,lt)) < 1.d-99) then
           veff_us(ir,lt) = 0.d0
        end if
     end do
  end do

99 continue
  !++++++++++++++++++++++++++++++++++++++++
  deallocate(coeff_tm,drpsi_tmp,rphi_tmp)
  !++++++++++++++++++++++++++++++++++++++++

end subroutine calc_ncpp_us_kt

!=====================================================================
   subroutine calc_nodes_us(nmesh,func,nrc,nodes)
!=====================================================================
!
!  M. Okamoto
!
!---------------------------------------------------------------------
   implicit none
   integer,intent(in)  :: nmesh, nrc
   real(8),intent(in)  :: func(nmesh)
   integer,intent(out) :: nodes
   integer :: ir
   nodes = 0
   do ir = 2,nrc
      if (func(ir-1)*func(ir) < 0.d0) then
         nodes = nodes + 1
      end if
   end do
   end subroutine calc_nodes_us

!=====================================================================
   subroutine set_vloc_us(ier)
!=====================================================================
!
!  M. Okamoto
!
!---------------------------------------------------------------------
   use parameters
   implicit none
   integer,intent(out) :: ier
   integer :: ll, ips, ishell, is_found, ir, ll_core
   real(8) :: eps
   ier = 0
   select case (lloc)
   case (0:3)
      write(IFLOG,*) 'VLOC: Orbital ... lloc =',lloc
      if (is_core == PATOM) then
         ll_core = lloc
      else
         ll_core = lmax_core
      end if
      is_found = 0
      do ips = 1,nps
         ishell = ishell_ps(ips,1)
         ll = l_qnum(ishell)
         if (ll == lloc) then
! ======================================== modified by K. T. ============ 4.0
!            vloc_scr_us(:) = veff_ps(:,ips,1)
            vloc_scr_us(:,1) = veff_ps(:,ips,1)
! ======================================================================= 4.0
            is_found = 1
            exit
         end if
      end do
      if (is_found == 0) then
         write(IFLOG,*) '### ERROR ### lloc was not found'
         write(IFLOG,*) '   lloc ...',lloc
         ier = 1 ; go to 99
      end if
      nrcut_loc_us = nrcut_phi_max_us
   case (5)
      write(IFLOG,*) 'VLOC: BHS'
      if (is_core == PATOM) then
         write(IFLOG,*) '### ERROR ### Vloc cannot be constructed'
         write(IFLOG,*) '              for a pseudo-atom. (lloc=5)'
         ier = 1 ; go to 99
      else
         ll_core = lmax_core
      end if
      do ir = 1,nmesh
         eps = exp(-(rpos(ir)/rcut_loc_bhs)**lambda_loc_bhs)

! ================================= modified by K. T. =============== 4.0
!         vloc_scr_us(ir) = eps*vcut_loc_bhs + (1.d0-eps)*veff(ir,1,ll_core) 
         vloc_scr_us(ir,1) = eps*vcut_loc_bhs + (1.d0-eps)*veff(ir,1,ll_core) 
! ===================================================================== 4.0

      end do
      write(IFLOG,*) 'VLOC: lambda ...',lambda_loc_bhs
      write(IFLOG,*) 'VLOC: rcut   ...',rcut_loc_bhs
      write(IFLOG,*) 'VLOC: vcut   ...',vcut_loc_bhs
   case (6)
      write(IFLOG,*) 'VLOC: Polynomial'
      if (is_core == PATOM) then
         write(IFLOG,*) '### ERROR ### Vloc cannot be constructed'
         write(IFLOG,*) '              for a pseudo-atom. (lloc=6)'
         ier = 1 ; go to 99
      else
         ll_core = lmax_core
      end if
      call set_vloc_poly10(nmesh,rpos,veff(1,1,ll_core), &
                         rcut_loc_poly,vcut_loc_poly,vloc_scr_us)
      write(IFLOG,*) 'VLOC: rcut ...',rcut_loc_poly
      write(IFLOG,*) 'VLOC: vcut ...',vcut_loc_poly
   case default
      write(IFLOG,*) '### ERROR ### lloc is out of range'
      write(IFLOG,*) '   lloc ...',lloc
      ier = 2 ; go to 99
   end select
   write(IFLOG,*) '      ir, rpos, vloc_scr'
   do ir = 1,nmesh,100
      write(IFLOG,'(i5,8(1pe15.5))') &
! ========================================== modiifed by K. T. ========== 4.0
!         ir,rpos(ir),vloc_scr_us(ir),veff(ir,1,ll_core)
         ir,rpos(ir),vloc_scr_us(ir,1),veff(ir,1,ll_core)
! ======================================================================== 4.0
   end do
99 continue
   end subroutine set_vloc_us

!============================ added by K. T. =============================== 4.0
subroutine set_vloc_us_kt(ier)
  use parameters
  implicit none

  integer,intent(out) :: ier
  integer :: ll, ips, ishell, is_found, ir, ll_core
  real(8) :: eps
  ier = 0

  select case (lloc)

  case (0:3)
     write(IFLOG,*) 'VLOC: Orbital ... lloc =',lloc
     if (is_core == PATOM) then
        ll_core = lloc
     else
        ll_core = lmax_core
     end if
     is_found = 0
     
     do ips = 1,nps, nspin
        ishell = ishell_ps(ips,1)
        ll = l_qnum(ishell)

        if (ll == lloc) then
           if ( nspin == 1 ) then
              vloc_scr_us(:,1) = veff_ps(:,ips,1)
              is_found = 1
              exit
           else if ( nspin == 2 ) then
              vloc_scr_us(1:nmesh,1) = veff_ps(1:nmesh,ips,  1)
              vloc_scr_us(1:nmesh,2) = veff_ps(1:nmesh,ips+1,1)
              is_found = 1
              exit
           endif
        endif

     end do
     
     if (is_found == 0) then
        write(IFLOG,*) '### ERROR ### lloc was not found'
        write(IFLOG,*) '   lloc ...',lloc
        ier = 1 ; go to 99
     end if
     nrcut_loc_us = nrcut_phi_max_us
     
  case (5)
     write(IFLOG,*) 'VLOC: BHS'
     if (is_core == PATOM) then
        write(IFLOG,*) '### ERROR ### Vloc cannot be constructed'
        write(IFLOG,*) '              for a pseudo-atom. (lloc=5)'
        ier = 1 ; go to 99
     else
        ll_core = lmax_core
     end if

     do ir = 1,nmesh
        eps = exp(-(rpos(ir)/rcut_loc_bhs)**lambda_loc_bhs)
        vloc_scr_us(ir,1) = eps*vcut_loc_bhs + (1.d0-eps)*veff(ir,1,ll_core) 
     end do

     if ( nspin == 2 ) then
        do ir = 1,nmesh
           eps = exp(-(rpos(ir)/rcut_loc_bhs)**lambda_loc_bhs)
           vloc_scr_us(ir,2) = eps*vcut_loc_bhs + (1.d0-eps)*veff(ir,nspin,ll_core) 
        end do
     endif

     write(IFLOG,*) 'VLOC: lambda ...',lambda_loc_bhs
     write(IFLOG,*) 'VLOC: rcut   ...',rcut_loc_bhs
     write(IFLOG,*) 'VLOC: vcut   ...',vcut_loc_bhs
     
  case (6)
     write(IFLOG,*) 'VLOC: Polynomial'
     if (is_core == PATOM) then
        write(IFLOG,*) '### ERROR ### Vloc cannot be constructed'
        write(IFLOG,*) '              for a pseudo-atom. (lloc=6)'
        ier = 1 ; go to 99
     else
        ll_core = lmax_core
     end if

     
     call set_vloc_poly10(nmesh,rpos,veff(1,1,ll_core), &
          rcut_loc_poly,vcut_loc_poly,vloc_scr_us(1:nmesh,1) )

     if ( nspin == 2 ) then
        call set_vloc_poly10( nmesh, rpos, veff(1,nspin,ll_core), &
             rcut_loc_poly,vcut_loc_poly,vloc_scr_us(1:nmesh,2) )
     endif

     write(IFLOG,*) 'VLOC: rcut ...',rcut_loc_poly
     write(IFLOG,*) 'VLOC: vcut ...',vcut_loc_poly

  case default
     write(IFLOG,*) '### ERROR ### lloc is out of range'
     write(IFLOG,*) '   lloc ...',lloc
     ier = 2 ; go to 99

  end select

  write(IFLOG,*) '      ir, rpos, vloc_scr ( ispin == 1 )'
  do ir = 1,nmesh,100
     write(IFLOG,'(i5,8(1pe15.5))') &
          ir,rpos(ir),vloc_scr_us(ir,1),veff(ir,1,ll_core)
  end do

  if ( nspin==2 ) then
     write(IFLOG,*) '      ir, rpos, vloc_scr ( ispin == 2 )'
     do ir = 1,nmesh,100
        write(IFLOG,'(i5,8(1pe15.5))') &
             ir,rpos(ir),vloc_scr_us(ir,2),veff(ir,nspin,ll_core)
     end do
  endif

99 continue

end subroutine set_vloc_us_kt
! ========================================================================== 4.0

!=====================================================================
   subroutine calc_rphi_us(ier)
!=====================================================================
!
!  M. Okamoto
!
!---------------------------------------------------------------------
   use parameters
   implicit none
   integer,intent(out) :: ier
   integer :: ishell, ll, nn, kk, tt, ispin, ln, tn, &
              nrc, ips, n1, n2, nk_us, isdiff, lt, nodes, ir
   real(8) :: rcut, eref, deref
   real(8),allocatable :: dpsi_tmp(:), psi_tmp(:), phi_tmp(:)
   ier = 0
   nk_us = 4
  !++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
   allocate(dpsi_tmp(0:nk_us),psi_tmp(nmesh),phi_tmp(nmesh))
      dpsi_tmp = 0.d0 ; psi_tmp = 0.d0 ; phi_tmp = 0.d0
  !++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
MAIN1:do lt = 1,num_ltx_us
      ips = ips_lt_us(lt)
      tt = t_lt_us(lt)
      ishell = ishell_ps(ips,1)
      nn = n_qnum(ishell)
      ll = l_qnum(ishell)
      kk = k_qnum(ishell)
      ispin = (1-spin(ishell))/2 + 1
      ln = ln_lt_us(lt)
      tn = tn_lt_us(lt)
      if (is_val_type_ps(ips) /= US90) then
         cycle MAIN1
      end if
      write(IFLOG,*)
      write(IFLOG,'(1x,a29,7i5)') &
            '??? lt,ips,ispin,nn,ll,tt ...', &
                 lt,ips,ispin,nn,ll,tt
      psi_tmp(:) = rpsi_us(:,lt) / rpos(:)
      nrc   = nrcut_phi_us(lt)
      rcut  = rpos(nrc)
      eref  = eref_us(lt)
      deref = deref_ps(ips,tt)
      n1 = nrc - 10 ; n2 = nrc + 10
      if (n1 < 1) then
         n1 = 1 ; n2 = 1 + 10*2
      else if (n2 > nmesh) then
         n1 = nmesh - 10*2 ; n2 = nmesh
      end if
      isdiff = nk_us
      select case (nk_us)
      case (2)
         call diff_exp(ier,isdiff,n1,n2,rpos,psi_tmp,rcut, &
                       dpsi_tmp(0),dpsi_tmp(1),dpsi_tmp(2))
      case (3)
         call diff3_exp(ier,isdiff,n1,n2,rpos,psi_tmp,rcut, &
                 dpsi_tmp(0),dpsi_tmp(1),dpsi_tmp(2),dpsi_tmp(3))
      case (4)
         call diff4_exp(ier,isdiff,n1,n2,rpos,psi_tmp,rcut, &
                 dpsi_tmp(0),dpsi_tmp(1),dpsi_tmp(2),dpsi_tmp(3), &
                 dpsi_tmp(4))
      end select
      call opt_poly_fn(ier,IFLOG,ll,2, &
              nmesh,rpos,psi_tmp,nk_us,nrc,dpsi_tmp(0), &
              ng_phi_us,gmin_phi_us,gmax_phi_us,ncoeff_phi_us, &
              coeff_phi_us(0,lt))
      if (ier /= 0) then
         write(IFLOG,*) '### ERROR ### in opt_poly_fn'
         go to 99
      end if
      phi_tmp(:) = psi_tmp(:)
      call calc_poly_fn(1,nrc,rpos,ll,ncoeff_phi_us, &
              coeff_phi_us(0,lt),phi_tmp)
      rphi_us(:,lt) = phi_tmp(:) * rpos(:)
      call calc_nodes_us(nmesh,rphi_us(1,lt),nmesh,nodes)
      nodes_phi_us(lt) = nodes
      call check_phi_us(ier,lt,nn,ll,tt,ln,tn,eref,deref,nrc,nk_us, &
                        dpsi_tmp,phi_tmp)
end do MAIN1
   do lt = 1,num_ltx_us
   do ir = 1,nmesh
      if (abs(rphi_us(ir,lt)) < 1.d-99) then
         rphi_us(ir,lt) = 0.d0
      end if
   end do
   end do
99 continue
  !+++++++++++++++++++++++++++++++++++++
   deallocate(dpsi_tmp,psi_tmp,phi_tmp)
  !+++++++++++++++++++++++++++++++++++++
   end subroutine calc_rphi_us

!=====================================================================
   subroutine check_phi_us(ier,lt,nn,ll,tt,ln,tn,eref,deref,nrc, &
                           nk_us,dpsi_tmp,phi_tmp)
!=====================================================================
!
!  M. Okamoto
!
!---------------------------------------------------------------------
   use parameters
   implicit none
   integer,intent(out) :: ier
   integer :: ir1, ir2, lt, nn, ll, tt, ln, tn, n1, n2, &
              isdiff, nrc, ik, nk_us, ncoeff
   real(8) :: dpsi_tmp(0:nk_us), phi_tmp(nmesh), rcut, eref, deref
   real(8),allocatable :: &
      f_tmp(:), f0_exact(:), f0_anal(:), f0_num(:)
  !+++++++++++++++++++++++++++++++++++++++++++++++++++++++++
   allocate(f_tmp(nmesh), &
      f0_exact(0:nk_us), f0_anal(0:nk_us), f0_num(0:nk_us))
      f_tmp    = 0.d0
      f0_exact = 0.d0 ; f0_anal = 0.d0 ; f0_num = 0.d0
  !+++++++++++++++++++++++++++++++++++++++++++++++++++++++++
   ier = 0
   do ik = 0,nk_us
      f0_exact(ik) = dpsi_tmp(ik)
   end do
   f0_anal(:) = 0.d0
   nrc = nrcut_phi_us(lt)
   ncoeff = ncoeff_phi_us
   ir1 = nrc ; ir2 = nrc
   call calc_poly_fn( &
      ir1,ir2,rpos,ll,ncoeff_phi_us,coeff_phi_us(0,lt),f_tmp)
   f0_anal(0) = f_tmp(nrc)
   call calc_dpoly_fn( &
      ir1,ir2,rpos,ll,ncoeff_phi_us,coeff_phi_us(0,lt),f_tmp)
   f0_anal(1) = f_tmp(nrc)
   if (nk_us >= 2) then
      call calc_ddpoly_fn( &
         ir1,ir2,rpos,ll,ncoeff_phi_us,coeff_phi_us(0,lt),f_tmp)
      f0_anal(2) = f_tmp(nrc)
   end if
   if (nk_us >= 3) then
      call calc_dddpoly_fn( &
         ir1,ir2,rpos,ll,ncoeff_phi_us,coeff_phi_us(0,lt),f_tmp)
      f0_anal(3) = f_tmp(nrc)
   end if
   if (nk_us >= 4) then
      call calc_ddddpoly_fn( &
         ir1,ir2,rpos,ll,ncoeff_phi_us,coeff_phi_us(0,lt),f_tmp)
      f0_anal(4) = f_tmp(nrc)
   end if
   f0_num(:) = 0.d0
   isdiff = nk_us
   n1 = nrc - 10 ; n2 = nrc + 10
   rcut = rpos(nrc)
   select case (nk_us)
   case (2)
      call diff_exp(ier,isdiff,n1,n2,rpos,phi_tmp,rcut, &
              f0_num(0),f0_num(1),f0_num(2))
   case (3)
      call diff3_exp(ier,isdiff,n1,n2,rpos,phi_tmp,rcut, &
              f0_num(0),f0_num(1),f0_num(2),f0_num(3))
   case (4)
      call diff4_exp(ier,isdiff,n1,n2,rpos,phi_tmp,rcut, &
              f0_num(0),f0_num(1),f0_num(2),f0_num(3),f0_num(4))
   end select
   f0_num(0) = phi_tmp(nrc)
   call write_checked_phi_us(IFLOG,nn,ll,tt,ln,tn, &
           nrc,rcut,eref,deref,ncoeff_phi_us,coeff_phi_us(0,lt), &
           nk_us,f0_exact,f0_anal,f0_num)
   call write_checked_phi_us(IFSUM,nn,ll,tt,ln,tn, &
           nrc,rcut,eref,deref,ncoeff_phi_us,coeff_phi_us(0,lt), &
           nk_us,f0_exact,f0_anal,f0_num)
99 continue
  !++++++++++++++++++++++++++++++++++++++++++
   deallocate(f_tmp,f0_exact,f0_anal,f0_num)
  !++++++++++++++++++++++++++++++++++++++++++
   end subroutine check_phi_us

!=====================================================================
   subroutine write_checked_phi_us(ifile,nn,ll,tt,ln,tn, &
      nrc,rcut,eref,deref,ncoeff,coeff,nk,f0_exact,f0_anal,f0_num)
!=====================================================================
!
!  M. Okamoto
!
!---------------------------------------------------------------------
   implicit none
   integer,intent(in) :: ifile, nn, ll, tt, ln, tn, nrc, ncoeff, nk
   real(8),intent(in) :: &
      rcut, eref, deref, coeff(0:ncoeff), &
      f0_exact(0:nk), f0_anal(0:nk), f0_num(0:nk)
   integer :: icoeff, ik
   write(ifile,*)
   write(ifile,*)  &
      'Vanderbilt ultrasoft phi[n](r) (polynomial) was optimized.'
   write(ifile,15) ' (n,l,t) :', nn,ll,tt,'US'
   write(ifile,16) ' (ln,tn) :', ln,tn
   write(ifile,10) ' rc, nrc :', rcut, nrc
   write(ifile,14) 'eref, de :', eref, deref
   write(ifile,11) 'ncoeff,nk:', ncoeff, nk
   do icoeff=0,ncoeff
   write(ifile,12) 'coeff(',2*icoeff,'):', coeff(icoeff)
   end do
   write(ifile,13) 'phi(rc)','phi''(rc)','phi"(rc)'
   write(ifile,14) 'Exact    :',(f0_exact(ik),ik=0,2)
   write(ifile,14) 'Analytic :',(f0_anal (ik),ik=0,2)
   write(ifile,14) 'Numerical:',(f0_num  (ik),ik=0,2)
   if (nk >= 3) then
   select case (nk)
   case (3)
      write(ifile,17) 'phi"''(rc)'
   case (4)
      write(ifile,17) 'phi"''(rc)','phi""(rc)'
   case (5)
      write(ifile,17) 'phi"''(rc)','phi""(rc)','phi""''(rc)'
   end select
   write(ifile,14) 'Exact    :',(f0_exact(ik),ik=3,nk)
   write(ifile,14) 'Analytic :',(f0_anal (ik),ik=3,nk)
   write(ifile,14) 'Numerical:',(f0_num  (ik),ik=3,nk)
   end if
10 format(1x,a10,f20.10,i20)
11 format(1x,a10,2(i20))
12 format(1x,(a6,i2,a2),f20.10)
13 format(1x,10x,(8x,a7,5x),(8x,a8,4x),(8x,a8,4x))
14 format(1x,a10,5(f20.10))
15 format(1x,a10,(10x,(i2,2x,i2,2x,i2)),(18x,a2))
16 format(1x,a10,(10x,(2x,2x,i2,2x,i2)))
17 format(1x,10x,(8x,a9,3x),(8x,a9,3x),(8x,a10,2x))
   end subroutine write_checked_phi_us

!=====================================================================
   subroutine calc_norm_rcl_us(ier)
!=====================================================================
!
!  M. Okamoto
!
!---------------------------------------------------------------------
   use parameters
   implicit none
   integer,intent(out) :: ier
   integer :: lt, nrc, ir
   real(8) :: sum
   ier = 0
MAIN1:do lt = 1,num_ltx_us
      nrc = nrcut_phi_us(lt)
      call set_weight_exp(ier,1,nrc,rpos,wt)
      sum = 0.d0
      do ir = 1,nrc
         sum = sum + rpsi_us(ir,lt)**2 * wt(ir)
      end do
      norm_psi_us(lt) = sqrt(sum)
      sum = 0.d0
      do ir = 1,nrc
         sum = sum + rphi_us(ir,lt)**2 * wt(ir)
      end do
      norm_phi_us(lt) = sqrt(sum)
end do MAIN1
99 continue
   end subroutine calc_norm_rcl_us

!=====================================================================
   subroutine write_norm_rcl_us(ifile)
!=====================================================================
!
!  M. Okamoto
!
!---------------------------------------------------------------------
   use parameters
   implicit none
   integer,intent(in) :: ifile
   integer :: lt, nn, ll, tt, ln, tn
   character(1) :: fn_label_orbital
   write(ifile,*)
   write(ifile,*) &
      'Norms of psi[n] & phi[n] (0 < r < rcl) : n = (n,l,t) = (ln,tn)'
   write(ifile,10) &
      'n','l','t','ln','tn','<psi|psi>rcl','<phi|phi>rcl','nrcl'
MAIN1:do lt = 1,num_ltx_us
      nn = n_lt_us(lt)
      ll = l_lt_us(lt)
      tt = t_lt_us(lt)
      ln = ln_lt_us(lt)
      tn = tn_lt_us(lt)
   write(ifile,11) nn,ll,tt,ln,tn,fn_label_orbital(ln),tn, &
      norm_psi_us(lt),norm_phi_us(lt),nrcut_phi_us(lt)
end do MAIN1
10 format(1x,3(1x,a1,2x),(a2,2x,a2),(4x),2(8x,a12),(11x,a4))
11 format(1x,3(i2,2x),(i2,2x,i2),(3x,a1,i1),f19.10,f20.10,i15)
   end subroutine write_norm_rcl_us

!====================== added by K. T. =============================== 4.0
subroutine write_norm_rcl_us_kt(ifile)
  use parameters
  implicit none

  integer,intent(in) :: ifile
  integer :: lt, nn, ll, tt, ln,  tn, ispin
  character(1) :: fn_label_orbital

  write(ifile,*)
  write(ifile,*) &
       'Norms of psi[n] & phi[n] (0 < r < rcl) : n = (n,l,t,s) = (ln,tn,s)'

  write(ifile,10) &
       'n','l','t', 's', 'ln','tn','<psi|psi>rcl','<phi|phi>rcl','nrcl'

  MAIN1:do lt = 1,num_ltx_us
     nn = n_lt_us(lt)
     ll = l_lt_us(lt)
     tt = t_lt_us(lt)
     ln = ln_lt_us(lt)
     tn = tn_lt_us(lt)

     ispin = spin_index_lt_us(lt)

     write(ifile,11) nn,ll,tt, ispin, ln,tn,fn_label_orbital(ln),tn, &
          norm_psi_us(lt),norm_phi_us(lt),nrcut_phi_us(lt)
  end do MAIN1

10 format(1x,4(1x,a1,2x),(a2,2x,a2),(4x),2(8x,a12),(11x,a4))
11 format(1x,4(i2,2x),(i2,2x,i2),(3x,a1,i1),f19.10,f20.10,i15)

end subroutine write_norm_rcl_us_kt
! ============================================================================ 4.0

!=====================================================================
   subroutine calc_norm_rc_us(ier)
!=====================================================================
!
!  M. Okamoto
!
!---------------------------------------------------------------------
   use parameters
   implicit none
   integer,intent(out) :: ier
   integer :: lt, nrc, ir
   real(8) :: sum
   ier = 0
MAIN1:do lt = 1,num_ltx_us
      nrc = nrcut_max_us
      call set_weight_exp(ier,1,nrc,rpos,wt)
      sum = 0.d0
      do ir = 1,nrc
         sum = sum + rpsi_us(ir,lt)**2 * wt(ir)
      end do
      norm_psi_us(lt) = sqrt(sum)
      sum = 0.d0
      do ir = 1,nrc
         sum = sum + rphi_us(ir,lt)**2 * wt(ir)
      end do
      norm_phi_us(lt) = sqrt(sum)
end do MAIN1
99 continue
   end subroutine calc_norm_rc_us

!=====================================================================
   subroutine write_norm_rc_us(ifile)
!=====================================================================
!
!  M. Okamoto
!
!---------------------------------------------------------------------
   use parameters
   implicit none
   integer,intent(in) :: ifile
   integer :: lt, nn, ll, tt, ln, tn
   character(1) :: fn_label_orbital
   write(ifile,*)
   write(ifile,*) &
      'Norms of psi[n] & phi[n] (0 < r < R) : n = (n,l,t) = (ln,tn)'
   write(ifile,10) &
      'n','l','t','ln','tn','<psi|psi>R','<phi|phi>R','nR'
MAIN1:do lt = 1,num_ltx_us
      nn = n_lt_us(lt)
      ll = l_lt_us(lt)
      tt = t_lt_us(lt)
      ln = ln_lt_us(lt)
      tn = tn_lt_us(lt)
   write(ifile,11) nn,ll,tt,ln,tn,fn_label_orbital(ln),tn, &
      norm_psi_us(lt),norm_phi_us(lt),nrcut_max_us
end do MAIN1
10 format(1x,3(1x,a1,2x),(a2,2x,a2),(4x),2(8x,a10,2x),(13x,a2))
11 format(1x,3(i2,2x),(i2,2x,i2),(3x,a1,i1),f19.10,f20.10,i15)
   end subroutine write_norm_rc_us

!========================== added by K. T. =============================== 4.0
subroutine write_norm_rc_us_kt(ifile)
  use parameters
  implicit none

  integer,intent(in) :: ifile
  integer :: lt, nn, ll, tt, ln, tn, ispin
  character(1) :: fn_label_orbital

  write(ifile,*)
  write(ifile,*) &
       'Norms of psi[n] & phi[n] (0 < r < R) : n = (n,l,t,s) = (ln,tn)'
  write(ifile,10) &
       'n','l','t','s', 'ln','tn','<psi|psi>R','<phi|phi>R','nR'

  MAIN1:do lt = 1,num_ltx_us
     nn = n_lt_us(lt)
     ll = l_lt_us(lt)
     tt = t_lt_us(lt)
     ln = ln_lt_us(lt)
     tn = tn_lt_us(lt)

     ispin = spin_index_lt_us(lt)

     write(ifile,11) nn,ll,tt, ispin, ln,tn,fn_label_orbital(ln),tn, &
          norm_psi_us(lt),norm_phi_us(lt),nrcut_max_us

  end do MAIN1

10 format(1x,4(1x,a1,2x),(a2,2x,a2),(4x),2(8x,a10,2x),(13x,a2))
11 format(1x,4(i2,2x),(i2,2x,i2),(3x,a1,i1),f19.10,f20.10,i15)

end subroutine write_norm_rc_us_kt
! ====================================================================== 4.0

!=====================================================================
   subroutine write_eref_rcl_phi_us(ifile)
!=====================================================================
!
!  M. Okamoto
!
!---------------------------------------------------------------------
   use parameters
   implicit none
   integer,intent(in) :: ifile
   integer :: ips, nn, ll, tt, ln, tn, lt, nrc
   real(8) :: rcut, eref
   character(2) :: pp_type_tmp
   write(ifile,*)
   write(ifile,*) &
   'Reference energies E[n] & cutoff radii rcl[n] : n = (n,l,t) = (ln,tn)'
   write(ifile,10) 'n','l','t','ln','tn','E[n]','rcl[n]','nrcl[n]'
   do lt = 1,num_ltx_us
      ips = ips_lt_us(lt)
      nn  = n_lt_us(lt)
      ll  = l_lt_us(lt)
      tt  = t_lt_us(lt)
      ln  = ln_lt_us(lt)
      tn  = tn_lt_us(lt)
      nrc = nrcut_phi_us(lt)
      rcut = rpos(nrc)
      eref = eref_us(lt)
      select case (is_val_type_ps(ips))
      case (TM91)
         pp_type_tmp = 'NC'
      case (US90)
         pp_type_tmp = 'US'
      end select
      write(ifile,11) nn,ll,tt,ln,tn,pp_type_tmp,eref,rcut,nrc
   end do
10 format(1x,3(1x,a1,2x),2(a2,2x),2x, &
             (8x,a4,8x),(8x,a6,6x),(8x,a7))
11 format(1x,3(i2,2x),2(i2,2x),(1x,a2),f19.10,f20.10,i15)
   end subroutine write_eref_rcl_phi_us

!=========================== added by K. T. =============================== 4.0
subroutine write_eref_rcl_phi_us_kt(ifile)
  use parameters
  implicit none

  integer,intent(in) :: ifile
  integer :: ips, nn, ll, tt, ln, tn, lt, nrc, ispin
  real(8) :: rcut, eref
  character(2) :: pp_type_tmp

  write(ifile,*)
  write(ifile,*) &
       'Reference energies E[n] & cutoff radii rcl[n] : n = (n,l,t,s) = (ln,tn)'
  write(ifile,10) 'n','l','t','s', 'ln','tn','E[n]','rcl[n]','nrcl[n]'

  do lt = 1,num_ltx_us
     ips = ips_lt_us(lt)
     nn  = n_lt_us(lt)
     ll  = l_lt_us(lt)
     tt  = t_lt_us(lt)
     ln  = ln_lt_us(lt)
     tn  = tn_lt_us(lt)

     ispin = spin_index_lt_us(lt)

     nrc = nrcut_phi_us(lt)
     rcut = rpos(nrc)
     eref = eref_us(lt)

     select case (is_val_type_ps(ips))
     case (TM91)
        pp_type_tmp = 'NC'
     case (US90)
        pp_type_tmp = 'US'
     end select

     write(ifile,11) nn,ll,tt, ispin, ln,tn,pp_type_tmp,eref,rcut,nrc

  end do

10 format(1x,4(1x,a1,2x),2(a2,2x),2x, &
        (8x,a4,8x),(8x,a6,6x),(8x,a7))
11 format(1x,4(i2,2x),2(i2,2x),(1x,a2),f19.10,f20.10,i15)

end subroutine write_eref_rcl_phi_us_kt
! ============================================================================= 4.0

!=====================================================================
   subroutine calc_rchi_us(ier)
!=====================================================================
!
!  M. Okamoto
!
!---------------------------------------------------------------------
   use parameters
   implicit none
   integer,intent(out) :: ier
   integer :: lt, ln, tn, ir, is_direct_diff, ispin, icoeff, &
              is_debug_here, nrc, nrc2, iord_diff_tmp, ll_core, ips, tt
   is_debug_here = 1
   if (is_debug /= 0) then
      is_debug_here = is_debug
   end if
   ier = 0
   is_direct_diff = 0
   select case (is_direct_diff)
   case (0) 
      rchi_us(:,:) = 0.d0
      do lt = 1,num_ltx_us
         ips = ips_lt_us(lt)
         tt  = t_lt_us(lt)
         ln  = ln_lt_us(lt)
         tn  = tn_lt_us(lt)
         select case (is_val_type_ps(ips))
         case (TM91) 
            do ir = 1,nmesh
               rchi_us(ir,lt) = &
! ============================================= modiifed by K. T. ======= 4.0
!                  (veff_ps(ir,ips,1) - vloc_scr_us(ir)) &
                  (veff_ps(ir,ips,1) - vloc_scr_us(ir,1)) &
! ======================================================================== 4.0
                     * rphi_us(ir,lt)
            end do
         case (US90) 
            nrc = nrcut_phi_us(lt)
            call calc_poly_rchi(1,nrc,rpos,ln, &
               ncoeff_phi_us,coeff_phi_us(0,lt), &
               eref_us(lt),rphi_us(1,lt),vloc_scr_us, &
               rchi_us(1,lt))
            do ir = nrc+1,nmesh
               ispin = 1
               if (is_core == PATOM) then
                  ll_core = ln
               else
                  ll_core = lmax_core
               end if
               rchi_us(ir,lt) = &
! ================================================ modified by K. T. ======== 4.0
!                  (veff(ir,ispin,ll_core) - vloc_scr_us(ir)) &
                  (veff(ir,ispin,ll_core) - vloc_scr_us(ir,ispin)) &
! =========================================================================== 4.0
                     * rphi_us(ir,lt)
            end do
            if (is_debug_here /= 0) then
               write(IFLOG,*) 'RCHI: ln,tn,ncoeff ...',&
                                     ln,tn,ncoeff_phi_us
               write(IFLOG,'(8(1pe15.5))') &
                  (coeff_phi_us(icoeff,lt),icoeff=0,ncoeff_phi_us)
            end if
         end select
      end do
   case (1)
      drphi_us(:,:)  = 0.d0
      ddrphi_us(:,:) = 0.d0
      iord_diff_tmp = 10
      do lt = 1,num_ltx_us
         call calc_ddiff_exp(ier,iord_diff_tmp, &
               nrcut_phi_max_us,rpos(1), &
               rphi_us(1,lt),drphi_us(1,lt), &
               ddrphi_us(1,lt))
      end do
      rchi_us(:,:) = 0.d0
      do lt = 1,num_ltx_us
         ln = l_lt_us(lt)
         do ir = 1,nrcut_phi_max_us
            rchi_us(ir,lt) = &
                + 0.5d0 * ddrphi_us(ir,lt) &
                + ( + eref_us(lt) &
                    - 0.5d0*dble(ln*(ln+1))/rpos(ir)**2 &
! ============================================== modified by K. T. ======== 4.0
!                    - vloc_scr_us(ir) ) * rphi_us(ir,lt)
                    - vloc_scr_us(ir,1) ) * rphi_us(ir,lt)
! ========================================================================== 4.0
         end do
      end do
   case (2) 
      rchi_us(:,:) = 0.d0
      do lt = 1,num_ltx_us
         ips = ips_lt_us(lt)
         tt  = t_lt_us(lt)
         ln  = ln_lt_us(lt)
         tn  = tn_lt_us(lt)
         select case (is_val_type_ps(ips))
         case (TM91) 
            do ir = 1,nmesh
               rchi_us(ir,lt) = &
! ============================================= modified by K. T. ============ 4.0
!                  (veff_ps(ir,ips,1) - vloc_scr_us(ir)) &
                  (veff_ps(ir,ips,1) - vloc_scr_us(ir,1)) &
! ============================================================================ 4.0
                     * rphi_us(ir,lt)
            end do
         case (US90)
            nrc = nrcut_phi_us(lt)
            nrc2 = nrcut_max_us
            call calc_poly_rchi(1,nrc,rpos,ln, &
               ncoeff_phi_us,coeff_phi_us(0,lt), &
               eref_us(lt),rphi_us(1,lt),vloc_scr_us, &
               rchi_us(1,lt))
            if (nrc2 > nrc) then
               drphi_us(:,:)  = 0.d0
               ddrphi_us(:,:) = 0.d0
               iord_diff_tmp = 10
               call calc_ddiff_exp(ier,iord_diff_tmp, &
                     nrcut_phi_max_us,rpos(1), &
                     rphi_us(1,lt),drphi_us(1,lt), &
                     ddrphi_us(1,lt))
               do ir = nrc+1,nrc2
                  ispin = 1
                  if (is_core == PATOM) then
                     ll_core = ln
                  else
                     ll_core = lmax_core
                  end if
                  rchi_us(ir,lt) = &
                      + 0.5d0 * ddrphi_us(ir,lt) &
! ======================================================== modified by K. T. ====== 4.0
!                      + ( + eref_us(lt) - vloc_scr_us(ir) &
                      + ( + eref_us(lt) - vloc_scr_us(ir,1) &
! ================================================================================ 4.0
                          - 0.5d0*dble(ln*(ln+1))/rpos(ir)**2 &
                        ) * rphi_us(ir,lt)
               end do
            end if
            if (is_debug_here /= 0) then
               write(IFLOG,*) 'RCHI: ln,tn,ncoeff ...',&
                                     ln,tn,ncoeff_phi_us
               write(IFLOG,'(8(1pe15.5))') &
                  (coeff_phi_us(icoeff,lt),icoeff=0,ncoeff_phi_us)
            end if
         end select
      end do
   end select
   write(IFLOG,*) 'RCHI: nref_ps       ',(nref_ps(ips),ips=1,nps)
   write(IFLOG,*) 'RCHI: is_val_type_ps',(is_val_type_ps(ips),ips=1,nps)
   write(IFLOG,*) '      ir, rpos, rchi'
   do ir = 1,nmesh,100
      write(IFLOG,'(i5,8(1pe15.5))') &
         ir,rpos(ir), &
         (rchi_us(ir,lt),lt=1,num_ltx_us)
   end do
   do lt = 1,num_ltx_us
   do ir = 1,nmesh
      if (abs(rchi_us(ir,lt)) < 1.d-99) then
         rchi_us(ir,lt) = 0.d0
      end if
   end do
   end do
99 continue
   end subroutine calc_rchi_us
!=====================================================================

! ================================== addd by K. T. ===================== 4.0
subroutine calc_rchi_us_kt(ier)
  use parameters
  implicit none

  integer,intent(out) :: ier
  integer :: ishell
  
  integer :: lt, ln, tn, ir, is_direct_diff, ispin, icoeff, &
       is_debug_here, nrc, nrc2, iord_diff_tmp, ll_core, ips, tt

  real(8) :: vloc_tmp(nmesh)

  is_debug_here = 1
  if (is_debug /= 0) then
     is_debug_here = is_debug
  end if
  ier = 0
  is_direct_diff = 0

  select case (is_direct_diff)

  case (0) 
     rchi_us(:,:) = 0.d0
     do lt = 1,num_ltx_us
        ips = ips_lt_us(lt)
        tt  = t_lt_us(lt)
        ln  = ln_lt_us(lt)
        tn  = tn_lt_us(lt)

        ishell = ishell_ps(ips,1)
        ispin = (1-spin(ishell))/2 + 1

        select case (is_val_type_ps(ips))

        case (TM91) 
           if ( ispin == 1 ) then
              do ir = 1,nmesh
                 rchi_us(ir,lt) = &
                      (veff_ps(ir,ips,1) - vloc_scr_us(ir,1)) &
                      * rphi_us(ir,lt)
              end do
           else if ( ispin ==2 ) then
              do ir = 1,nmesh
                 rchi_us(ir,lt) = &
                      (veff_ps(ir,ips,1) - vloc_scr_us(ir,2)) &
                      * rphi_us(ir,lt)
              end do
           endif

        case (US90) 
           nrc = nrcut_phi_us(lt)

           if ( ispin == 1 ) then
              vloc_tmp(1:nmesh) = vloc_scr_us(1:nmesh,1)
           else if ( ispin == 2 ) then
              vloc_tmp(1:nmesh) = vloc_scr_us(1:nmesh,2)
           endif

           call calc_poly_rchi(1,nrc,rpos,ln, &
                ncoeff_phi_us,coeff_phi_us(0,lt), &
                eref_us(lt),rphi_us(1,lt),vloc_tmp, &
                rchi_us(1,lt))

           do ir = nrc+1,nmesh
              if (is_core == PATOM) then
                 ll_core = ln
              else
                 ll_core = lmax_core
              end if
              rchi_us(ir,lt) = &
                   (veff(ir,ispin,ll_core) - vloc_tmp(ir)) &
                   * rphi_us(ir,lt)
           end do

           if (is_debug_here /= 0) then
              write(IFLOG,*) 'RCHI: ln,tn, sn, ncoeff ...',&
                   ln,tn,ispin, ncoeff_phi_us
              write(IFLOG,'(8(1pe15.5))') &
                   (coeff_phi_us(icoeff,lt),icoeff=0,ncoeff_phi_us)
           end if
        end select
     end do

  case (1)
     drphi_us(:,:)  = 0.d0
     ddrphi_us(:,:) = 0.d0
     iord_diff_tmp = 10
     do lt = 1,num_ltx_us
        call calc_ddiff_exp(ier,iord_diff_tmp, &
             nrcut_phi_max_us,rpos(1), &
             rphi_us(1,lt),drphi_us(1,lt), &
             ddrphi_us(1,lt))
     end do
     rchi_us(:,:) = 0.d0
     do lt = 1,num_ltx_us
        ln = l_lt_us(lt)

        ishell = ishell_ps(ips,1)
        ispin = (1-spin(ishell))/2 + 1

        if ( ispin == 1 ) then
           do ir = 1,nrcut_phi_max_us
              rchi_us(ir,lt) = &
                   + 0.5d0 * ddrphi_us(ir,lt) &
                   + ( + eref_us(lt) &
                   - 0.5d0*dble(ln*(ln+1))/rpos(ir)**2 &
                   - vloc_scr_us(ir,ispin) ) * rphi_us(ir,lt)
           end do
        else if ( ispin == 2 ) then
           do ir = 1,nrcut_phi_max_us
              rchi_us(ir,lt) = &
                   + 0.5d0 * ddrphi_us(ir,lt) &
                   + ( + eref_us(lt) &
                   - 0.5d0*dble(ln*(ln+1))/rpos(ir)**2 &
                   - vloc_scr_us(ir,ispin) ) * rphi_us(ir,lt)
           end do
        endif

     end do

  case (2) 
     rchi_us(:,:) = 0.d0
     do lt = 1,num_ltx_us
        ips = ips_lt_us(lt)
        tt  = t_lt_us(lt)
        ln  = ln_lt_us(lt)
        tn  = tn_lt_us(lt)

        ishell = ishell_ps(ips,1)
        ispin = (1-spin(ishell))/2 + 1

        select case (is_val_type_ps(ips))

        case (TM91) 
           if ( ispin == 1 ) then
              do ir = 1,nmesh
                 rchi_us(ir,lt) = &
                      (veff_ps(ir,ips,1) - vloc_scr_us(ir,1)) &
                      * rphi_us(ir,lt)
              end do
           else if ( ispin==2 ) then
              do ir = 1,nmesh
                 rchi_us(ir,lt) = &
                      (veff_ps(ir,ips,1) - vloc_scr_us(ir,2)) &
                      * rphi_us(ir,lt)
              end do
           endif

        case (US90)
           nrc = nrcut_phi_us(lt)
           nrc2 = nrcut_max_us

           if ( ispin == 1 ) then
              vloc_tmp(1:nmesh) = vloc_scr_us(1:nmesh,1)
           else if ( ispin == 2 ) then
              vloc_tmp(1:nmesh) = vloc_scr_us(1:nmesh,2)
           endif

           call calc_poly_rchi(1,nrc,rpos,ln, &
                ncoeff_phi_us,coeff_phi_us(0,lt), &
                eref_us(lt),rphi_us(1,lt),vloc_tmp, &
                rchi_us(1,lt))

           if (nrc2 > nrc) then
              drphi_us(:,:)  = 0.d0
              ddrphi_us(:,:) = 0.d0
              iord_diff_tmp = 10
              call calc_ddiff_exp(ier,iord_diff_tmp, &
                   nrcut_phi_max_us,rpos(1), &
                   rphi_us(1,lt),drphi_us(1,lt), &
                   ddrphi_us(1,lt))

              do ir = nrc+1,nrc2
                 if (is_core == PATOM) then
                    ll_core = ln
                 else
                    ll_core = lmax_core
                 end if
                 rchi_us(ir,lt) = &
                      + 0.5d0 * ddrphi_us(ir,lt) &
                      + ( + eref_us(lt) - vloc_tmp(ir) &
                      - 0.5d0*dble(ln*(ln+1))/rpos(ir)**2 &
                      ) * rphi_us(ir,lt)
              end do
           end if

           if (is_debug_here /= 0) then
              write(IFLOG,*) 'RCHI: ln,tn, sn,ncoeff ...',&
                   ln,tn, ispin, ncoeff_phi_us
              write(IFLOG,'(8(1pe15.5))') &
                   (coeff_phi_us(icoeff,lt),icoeff=0,ncoeff_phi_us)
           end if

        end select
     end do
  end select

  write(IFLOG,*) 'RCHI: nref_ps       ',(nref_ps(ips),ips=1,nps)
  write(IFLOG,*) 'RCHI: is_val_type_ps',(is_val_type_ps(ips),ips=1,nps)
  write(IFLOG,*) '      ir, rpos, rchi'

  do ir = 1,nmesh,100
     write(IFLOG,'(i5,8(1pe15.5))') &
          ir,rpos(ir), &
          (rchi_us(ir,lt),lt=1,num_ltx_us)
  end do

  do lt = 1,num_ltx_us
     do ir = 1,nmesh
        if (abs(rchi_us(ir,lt)) < 1.d-99) then
           rchi_us(ir,lt) = 0.d0
        end if
     end do
  end do

99 continue

end subroutine calc_rchi_us_kt
! ============================================================================ 4.0

!=====================================================================
   subroutine calc_bmat_us(ier)
!=====================================================================
!
!  M. Okamoto
!
!---------------------------------------------------------------------
   use parameters
   implicit none
   integer,intent(out) :: ier
   integer :: ll, tn, tm, lt1, lt2, ltt, ir, is_debug_here
   real(8) :: sum
   is_debug_here = 1
   if (is_debug /= 0) then
      is_debug_here = is_debug
   end if
   ier = 0
   bmat_us(:) = 0.d0
   do ltt = 1,num_lttx_us
      ll  = l_ltt_us(ltt)
      tn  = tn_ltt_us(ltt)
      tm  = tm_ltt_us(ltt)
      lt1 = lt_n_us(ll,tn)
      lt2 = lt_n_us(ll,tm)
      sum = 0.d0
      do ir = 1,nmesh
         sum = sum &
            + wr(ir) * rphi_us(ir,lt1) * rchi_us(ir,lt2)
      end do
      bmat_us(ltt) = sum
      if (is_debug_here /= 0) then
         write(IFLOG,*) 'BMAT: l,tn,tm,bmat_us ...',&
                               ll,tn,tm,sum
      end if
   end do
99 continue
   end subroutine calc_bmat_us
!=====================================================================

! ==================================== added by K. T. ================== 4.0
subroutine calc_bmat_us_kt(ier)
  use parameters
  implicit none

  integer,intent(out) :: ier
  integer :: ll, tn, tm, lt1, lt2, ltt, ir, is_debug_here
  integer :: ispin1, ispin2, itmp1, itmp2
  real(8) :: sum

  is_debug_here = 1
  if (is_debug /= 0) then
     is_debug_here = is_debug
  end if

  ier = 0
  bmat_us(:) = 0.d0

  do ltt = 1,num_lttx_us
     ll  = l_ltt_us(ltt)
     tn  = tn_ltt_us(ltt)
     tm  = tm_ltt_us(ltt)
!
     ispin1 = spin1_index_ltt_us(ltt)
     ispin2 = spin2_index_ltt_us(ltt)
!
     itmp1 = tn + nref_max_us *(ispin1 -1)
     itmp2 = tm + nref_max_us *(ispin2 -1)
!
     lt1 = lt_n_us(ll,itmp1)
     lt2 = lt_n_us(ll,itmp2)
!
     if ( ispin1 /= ispin2 ) cycle

     sum = 0.d0
     do ir = 1,nmesh
        sum = sum &
             + wr(ir) * rphi_us(ir,lt1) * rchi_us(ir,lt2)
     end do
     bmat_us(ltt) = sum
     if (is_debug_here /= 0) then
        write(IFLOG,*) 'BMAT: l,tn,sn,tm,sm,bmat_us ...',&
             ll,tn, ispin1, tm, ispin2, sum
     end if
  end do

99 continue

end subroutine calc_bmat_us_kt
! =========================================================================== 4.0

!=====================================================================
   subroutine calc_rbeta_us(ier)
!=====================================================================
!
!  M. Okamoto
!
!---------------------------------------------------------------------
   use parameters
   implicit none
   integer,intent(out) :: ier
   integer :: ll, tn, tm, lt1, lt2, ir, is_debug_here, &
              nref, ltt, lt, ips
   real(8) :: sum, det, arg, fac
   real(8),allocatable :: bmat(:,:), binv(:,:), bchk(:,:)
   is_debug_here = 1
   if (is_debug /= 0) then
      is_debug_here = is_debug
   end if
   ier = 0
   rbeta_us(:,:) = 0.d0
   do ll = 0,lmax
      nref = nref_us(ll)
      if (ll /= lloc) then
         if (nref == 0) then
            cycle
         end if
        !++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
         allocate(bmat(nref,nref),binv(nref,nref),bchk(nref,nref))
            bmat = 0.d0 ; binv = 0.d0 ; bchk = 0.d0
        !++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
         bmat(:,:) = 0.d0
         do tn = 1,nref
            do tm = 1,nref
               ltt = ltt_nm_us(ll,tn,tm)
               bmat(tn,tm) = bmat_us(ltt)
               write(IFLOG,*) '[calc_rbeta_us] l,tn,tm,bmat ...', &
                    ll,tn,tm,bmat(tn,tm)
            end do
         end do

         call det_real_matrix(ier,nref,bmat,det)
         call inv_real_matrix(ier,nref,bmat,binv)
         write(IFLOG,*) '### CHECK ### det(Bnm) ...',det

         do ir = 1,nmesh
            do tn = 1,nref
               do tm = 1,nref
                  lt1 = lt_n_us(ll,tn)
                  lt2 = lt_n_us(ll,tm)
                  rbeta_us(ir,lt1) = rbeta_us(ir,lt1) &
                       + binv(tm,tn) * rchi_us(ir,lt2)
               end do
            end do
         end do

         do tn = 1,nref
            lt1 = lt_n_us(ll,tn)
            sum = 0.d0
            do ir = 1,nmesh
               sum = sum + wr(ir) * rbeta_us(ir,lt1)**2
            end do
            if (is_debug_here /= 0) then
               write(IFLOG,*) 'BETA: ln,tn,<beta|beta>',ll,tn,sum
            end if
         end do
         bchk = matmul(bmat,binv)
         write(IFLOG,*) 'BCHK: ll, nref ...',ll,nref
         write(IFLOG,*) 'bmat:'
         do tn = 1,nref
            write(IFLOG,10) (bmat(tn,tm),tm=1,nref)
         end do
         write(IFLOG,*) 'binv:'
         do tn = 1,nref
            write(IFLOG,10) (binv(tn,tm),tm=1,nref)
         end do
         write(IFLOG,*) 'bchk:'
         do tn = 1,nref
            write(IFLOG,10) (bchk(tn,tm),tm=1,nref)
         end do
      10 format(5(1pe15.5))
         do tn = 1,nref
         do tm = 1,nref
            if (tn == tm) then
               if (abs(bchk(tn,tm) - 1.d0) > 1.d-6) then
                  write(IFLOG,*) &
                     '### ERROR ### bmat * binv (diag) != 1'
                  write(IFLOG,*) '   ll ...',ll
                  ier = 1 ; go to 99
               end if
            else
               if (abs(bchk(tn,tm)) > 1.d-6) then
                  write(IFLOG,*) &
                     '### ERROR ### bmat * binv (off-diag) != 1'
                  write(IFLOG,*) '   ll ...',ll
                  ier = 2 ; go to 99
               end if
            end if
         end do
         end do
        !+++++++++++++++++++++++++++
         deallocate(bmat,binv,bchk)
        !+++++++++++++++++++++++++++
      else if (ll == lloc) then
         if (nref /= 1) then
            write(IFLOG,*) '### ERROR ### nref != 1 for local orbital'
            write(IFLOG,*) '   ll   ...',ll
            write(IFLOG,*) '   nref ...',nref
            ier = 1 ; go to 99
         end if
         lt = lt_n_us(ll,1)
         select case (is_projector_type)
         case (MANUAL)
            do ir = nmesh,1,-1
               if (rpos(ir) < rcut_beta) then
                  nrcut_beta = ir ; exit
               end if
               if (ir < 10) then
                  write(IFLOG,*) '### ERROR ### nrcut_beta was not found.'
                  ier = 1 ; go to 99
               end if
            end do
            call set_weight_exp(ier,1,nrcut_beta,rpos,wt)
            rbeta_us(:,lt) = 0.d0
            do ir=1,nrcut_beta
               arg = min(kappa_beta*(rpos(ir)-rcut_beta)**2,ARGMAX)
               fac = 1.d0 - exp(-arg)
               rbeta_us(ir,lt) = fac*rphi_us(ir,lt)
            end do
            sum = 0.d0
            do ir = 1,nmesh
               sum = sum + wr(ir) * rbeta_us(ir,lt) * rphi_us(ir,lt)
            end do
            rbeta_us(:,lt) = rbeta_us(:,lt)/sum
         case default
            rbeta_us(:,lt) = 0.d0
         end select            
      end if
   end do
   write(IFLOG,*) 'DUAL: l, tn, tm, <phi[l,tn] | beta[l,tm]>'
   do ll = 0,lmax
      nref = nref_us(ll)
      if ((ll /= lloc).or.(is_projector_type /= NONE)) then
         do tn = 1,nref
         do tm = 1,nref
            lt1 = lt_n_us(ll,tn)
            lt2 = lt_n_us(ll,tm)
            sum = 0.d0
            do ir = 1,nmesh
               sum = sum + wr(ir) &
                   * rphi_us(ir,lt1) * rbeta_us(ir,lt2)
            end do
            write(IFLOG,*) ll,tn,tm,sum
            if (((tn == tm).and.(abs(1.d0-sum) > 1.d-6)).or. &
                ((tn /= tm).and.(abs(sum) > 1.d-6))) then
               write(IFLOG,*) &
                  '### ERROR ### <phi[n] | beta[m]> != delta[nm]'
               write(IFLOG,*) '   tn,tm ...',tn,tm
               write(IFLOG,*) '   sum   ...',sum
               ier = 1 ; go to 99
            end if
         end do
         end do
      end if
   end do
   write(IFLOG,*) 'RBETA: nref_us       ',(nref_us(ll),ll=0,lmax)
   write(IFLOG,*) 'RBETA: nref_ps       ',(nref_ps(ips),ips=1,nps)
   write(IFLOG,*) 'RBETA: is_val_type_ps',(is_val_type_ps(ips),ips=1,nps)
   write(IFLOG,*) '       ir, rpos, rbeta'
   do ir = 1,nmesh,100
      write(IFLOG,'(i5,8(1pe15.5))') &
         ir,rpos(ir), &
         (rbeta_us(ir,lt),lt=1,num_ltx_us)
   end do
   do lt = 1,num_ltx_us
   do ir = 1,nmesh
      if (abs(rbeta_us(ir,lt)) < 1.d-99) then
         rbeta_us(ir,lt) = 0.d0
      end if
   end do
   end do
99 continue
   end subroutine calc_rbeta_us

!=========================== added by K. T. ========================== 4.0
subroutine calc_rbeta_us_kt(ier)
  use parameters
  implicit none
  
  integer,intent(out) :: ier
  integer :: ll, tn, tm, lt1, lt2, ir, is_debug_here, &
       nref, ltt, lt, ips
  integer :: ispin, nref_s
  integer :: ispin1, ispin2, itmp1, itmp2

  real(8) :: sum, det, arg, fac
  real(8),allocatable :: bmat(:,:), binv(:,:), bchk(:,:)

  is_debug_here = 1
  if (is_debug /= 0) then
     is_debug_here = is_debug
  end if

  ier = 0
  rbeta_us(:,:) = 0.d0

  do ll = 0,lmax
     nref = nref_us(ll)
     nref_s = nref *nspin

     if (ll /= lloc) then
        if (nref == 0) then
           cycle
        end if

        !++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
        allocate(bmat(nref_s,nref_s), &
             &   binv(nref_s,nref_s), &
             &   bchk(nref_s,nref_s))
        bmat = 0.d0 ; binv = 0.d0 ; bchk = 0.d0
        !++++++++++++++++++++++++++++++++++++++++++++++++++++++++++

        do tn = 1,nref_s
           do tm = 1,nref_s

              if ( tn <= nref ) then
                 itmp1 = tn
              else
                 itmp1 = nref_max_us +( tn -nref )
              endif
              if ( tm <= nref ) then
                 itmp2 = tm
              else
                 itmp2 = nref_max_us +( tm -nref )
              endif

              ltt = ltt_nm_us(ll,itmp1,itmp2)

              bmat(tn,tm) = bmat_us(ltt)
              write(IFLOG,*) '[calc_rbeta_us_kt] l,tns,tms,bmat ...', &
                   ll,tn,tm,bmat(tn,tm)
           end do
        end do
        
        call det_real_matrix(ier,nref_s,bmat,det)
        call inv_real_matrix(ier,nref_s,bmat,binv)
        write(IFLOG,*) '### CHECK ### det(Bnm) ...',det
        
        do ir = 1,nmesh
           do tn = 1,nref_s
              do tm = 1,nref_s

                 if ( tn <= nref ) then
                    itmp1 = tn
                 else
                    itmp1 = nref_max_us +( tn -nref )
                 endif
                 if ( tm <= nref ) then
                    itmp2 = tm
                 else
                    itmp2 = nref_max_us +( tm -nref )
                 endif

                 lt1 = lt_n_us(ll,itmp1)
                 lt2 = lt_n_us(ll,itmp2)

                 ispin1 =  spin_index_lt_us(lt1)
                 ispin2 =  spin_index_lt_us(lt2)

                 if ( ispin1 == ispin2 ) then
                    rbeta_us(ir,lt1) = rbeta_us(ir,lt1) &
                         + binv(tm,tn) * rchi_us(ir,lt2)
                 endif

              end do
           end do
        end do
        
        do tn = 1,nref_s

           if ( tn <= nref ) then
              itmp1 = tn
           else
              itmp1 = nref_max_us +( tn -nref )
           endif

           lt1 = lt_n_us(ll,itmp1)

           sum = 0.d0
           do ir = 1,nmesh
              sum = sum + wr(ir) * rbeta_us(ir,lt1)**2
           end do
           if (is_debug_here /= 0) then
              write(IFLOG,*) 'BETA: ln,tns,<beta|beta>',ll,tn,sum
           end if
        end do

        bchk = matmul(bmat,binv)
        write(IFLOG,*) 'BCHK: ll, nref, nspin  ...',ll,nref, nspin
        
        write(IFLOG,*) 'bmat:'
        do tn = 1,nref_s
           write(IFLOG,10) (bmat(tn,tm),tm=1,nref_s)
        end do
        
        write(IFLOG,*) 'binv:'
        do tn = 1,nref_s
           write(IFLOG,10) (binv(tn,tm),tm=1,nref_s)
        end do
        
        write(IFLOG,*) 'bchk:'
        do tn = 1,nref_s
           write(IFLOG,10) (bchk(tn,tm),tm=1,nref_s)
        end do

      10 format(5(1pe15.5))

        do tn = 1,nref_s
           do tm = 1,nref_s
              if (tn == tm) then
                 if (abs(bchk(tn,tm) - 1.d0) > 1.d-6) then
                    write(IFLOG,*) &
                         '### ERROR ### bmat * binv (diag) != 1'
                    write(IFLOG,*) '   ll ...',ll
                    ier = 1 ; go to 99
                 end if
              else
                 if (abs(bchk(tn,tm)) > 1.d-6) then
                    write(IFLOG,*) &
                         '### ERROR ### bmat * binv (off-diag) != 1'
                    write(IFLOG,*) '   ll ...',ll
                    ier = 2 ; go to 99
                 end if
              end if
           end do
        end do
        !+++++++++++++++++++++++++++
        deallocate(bmat,binv,bchk)
        !+++++++++++++++++++++++++++

     else if (ll == lloc) then

        if (nref /= 1) then
           write(IFLOG,*) '### ERROR ### nref != 1 for local orbital'
           write(IFLOG,*) '   ll   ...',ll
           write(IFLOG,*) '   nref ...',nref
           ier = 1 ; go to 99
        end if

        Do ispin=1, nspin
           if ( ispin == 1 ) then
              itmp1 = ispin
           else
              itmp1 = nref_max_us + 1
           endif

           lt = lt_n_us(ll,itmp1)

           select case (is_projector_type)

           case (MANUAL)
              do ir = nmesh,1,-1
                 if (rpos(ir) < rcut_beta) then
                    nrcut_beta = ir ; exit
                 end if
                 if (ir < 10) then
                    write(IFLOG,*) '### ERROR ### nrcut_beta was not found.'
                    ier = 1 ; go to 99
                 end if
              end do
           
              call set_weight_exp(ier,1,nrcut_beta,rpos,wt)
              rbeta_us(:,lt) = 0.d0

              do ir=1,nrcut_beta
                 arg = min(kappa_beta*(rpos(ir)-rcut_beta)**2,ARGMAX)
                 fac = 1.d0 - exp(-arg)
                 rbeta_us(ir,lt) = fac*rphi_us(ir,lt)
              end do
              
              sum = 0.d0
              do ir = 1,nmesh
                 sum = sum + wr(ir) * rbeta_us(ir,lt) * rphi_us(ir,lt)
              end do
              rbeta_us(:,lt) = rbeta_us(:,lt)/sum

           case default
              rbeta_us(:,lt) = 0.d0
           end select

        End Do

     end if
  end do

  write(IFLOG,*) 'DUAL: l, tns, tms, <phi[l,tns] | beta[l,tms]>'

  do ll = 0,lmax
     nref = nref_us(ll)

     nref_s = nref *nspin

     if ((ll /= lloc).or.(is_projector_type /= NONE)) then
        do tn = 1,nref_s
           do tm = 1,nref_s

              if ( tn <= nref ) then
                 itmp1 = tn
              else
                 itmp1 = nref_max_us +( tn -nref )
              endif
              if ( tm <= nref ) then
                 itmp2 = tm
              else
                 itmp2 = nref_max_us +( tm -nref )
              endif

              lt1 = lt_n_us(ll,itmp1)
              lt2 = lt_n_us(ll,itmp2)

              ispin1 =  spin_index_lt_us(lt1)
              ispin2 =  spin_index_lt_us(lt2)

              sum = 0.d0

              if ( ispin1 == ispin2 ) then
                 do ir = 1,nmesh
                    sum = sum + wr(ir) &
                         * rphi_us(ir,lt1) * rbeta_us(ir,lt2)
                 end do
              endif

              write(IFLOG,*) ll,tn,tm,sum

              if (((tn == tm).and.(abs(1.d0-sum) > 1.d-6)).or. &
                   ((tn /= tm).and.(abs(sum) > 1.d-6))) then
                 write(IFLOG,*) &
                      '### ERROR ### <phi[n] | beta[m]> != delta[nm]'
                 write(IFLOG,*) '   tns,tms ...',tn,tm
                 write(IFLOG,*) '   sum   ...',sum
                 ier = 1 ; go to 99
              end if
           end do
        end do
     end if
  end do

  write(IFLOG,*) 'RBETA: nref_us       ',(nref_us(ll),ll=0,lmax)
  write(IFLOG,*) 'RBETA: nref_ps       ',(nref_ps(ips),ips=1,nps)
  write(IFLOG,*) 'RBETA: is_val_type_ps',(is_val_type_ps(ips),ips=1,nps)
  write(IFLOG,*) '       ir, rpos, rbeta'

  do ir = 1,nmesh,100
     write(IFLOG,'(i5,8(1pe15.5))') &
          ir,rpos(ir), &
          (rbeta_us(ir,lt),lt=1,num_ltx_us)
  end do

  do lt = 1,num_ltx_us
     do ir = 1,nmesh
        if (abs(rbeta_us(ir,lt)) < 1.d-99) then
           rbeta_us(ir,lt) = 0.d0
        end if
     end do
  end do

99 continue
  
end subroutine calc_rbeta_us_kt
! ============================================================================== 4.0

!=====================================================================
   subroutine calc_qnm_us(ier)
!=====================================================================
!
!  M. Okamoto
!
!---------------------------------------------------------------------
   use parameters
   implicit none
   integer,intent(out) :: ier
   integer :: ltlt, ln, tn, lm, tm, ir, is_debug_here, lt1, lt2
   is_debug_here = 1
   if (is_debug /= 0) then
      is_debug_here = is_debug
   end if
   ier = 0
   q_us(:,:) = 0.d0
   do ltlt = 1,num_ltltx_us
      ln = ln_ltlt_us(ltlt)
      tn = tn_ltlt_us(ltlt)
      lm = lm_ltlt_us(ltlt)
      tm = tm_ltlt_us(ltlt)
      lt1 = lt_n_us(ln,tn)
      lt2 = lt_n_us(lm,tm)
      do ir = 1,nrcut_maxx_us
         q_us(ir,ltlt) = &
             ( rpsi_us(ir,lt1)*rpsi_us(ir,lt2) &
              -rphi_us(ir,lt1)*rphi_us(ir,lt2)) &
              / (rpos(ir)**2)
      end do
   end do
99 continue
   end subroutine calc_qnm_us

!============================ added by K. T. ============================== 4.0
subroutine calc_qnm_us_kt(ier)
  use parameters
  implicit none

  integer,intent(out) :: ier
  integer :: ltlt, ln, tn, lm, tm, ir, is_debug_here, lt1, lt2
  integer :: ispin1, ispin2, itmp1, itmp2

  is_debug_here = 1
  if (is_debug /= 0) then
     is_debug_here = is_debug
  end if

  ier = 0
  q_us(:,:) = 0.d0

  do ltlt = 1,num_ltltx_us
     ln = ln_ltlt_us(ltlt)
     tn = tn_ltlt_us(ltlt)
     lm = lm_ltlt_us(ltlt)
     tm = tm_ltlt_us(ltlt)
!
     ispin1 = spin1_index_ltlt_us(ltlt)
     ispin2 = spin2_index_ltlt_us(ltlt)
!
     itmp1 = tn + nref_max_us *(ispin1 -1)
     itmp2 = tm + nref_max_us *(ispin2 -1)
!
     lt1 = lt_n_us(ln,itmp1)
     lt2 = lt_n_us(lm,itmp2)
!
     if ( ispin1 /= ispin2 ) cycle
!
     do ir = 1,nrcut_maxx_us
        q_us(ir,ltlt) = &
             ( rpsi_us(ir,lt1)*rpsi_us(ir,lt2) &
             -rphi_us(ir,lt1)*rphi_us(ir,lt2)) &
             / (rpos(ir)**2)
     end do
  end do

99 continue

end subroutine calc_qnm_us_kt
! ======================================================================== 4.0

!=====================================================================
   subroutine calc_qnm_sum_us(ier)
!=====================================================================
!
!  M. Okamoto
!
!---------------------------------------------------------------------
   use parameters
   implicit none
   integer,intent(out) :: ier
   integer :: ir, ltlt, ltt, ll, tn, tm, is_debug_here
   real(8) :: r
   is_debug_here = 1
   if (is_debug /= 0) then
      is_debug_here = is_debug
   end if
   ier = 0
   q_sum_us(:) = 0.d0
   do ltt = 1,num_lttx_us
      ll   = l_ltt_us(ltt)
      tn   = tn_ltt_us(ltt)
      tm   = tm_ltt_us(ltt)
      ltlt = ltlt_nm_us(ll,tn,ll,tm)
      if (ltlt /= 0) then
         do ir = 1,nmesh
            r = rpos(ir)
            q_sum_us(ltt) = q_sum_us(ltt) &
               + r*r*wr(ir) * q_us(ir,ltlt)
         end do
      else
         q_sum_us(ltt) = 0.d0
      end if
   end do
   write(IFLOG,*) 'q[nm]: l, tn, tm, q[nm]'
   do ltt = 1,num_lttx_us
      ll = l_ltt_us(ltt)
      tn = tn_ltt_us(ltt)
      tm = tm_ltt_us(ltt)
      write(IFLOG,'(3i5,2(1pe20.10))') &
         ll,tn,tm,q_sum_us(ltt)
   end do
99 continue
   end subroutine calc_qnm_sum_us

!============================ added by K. T. ===================== 4.0
subroutine calc_qnm_sum_us_kt(ier)
  use parameters
  implicit none

  integer,intent(out) :: ier
  integer :: ir, ltlt, ltt, ll, tn, tm, is_debug_here
  integer :: ispin1, ispin2, itmp1, itmp2

  real(8) :: r

  is_debug_here = 1
  if (is_debug /= 0) then
     is_debug_here = is_debug
  end if

  ier = 0
  q_sum_us(:) = 0.d0

  do ltt = 1,num_lttx_us
     ll   = l_ltt_us(ltt)
     tn   = tn_ltt_us(ltt)
     tm   = tm_ltt_us(ltt)

     ispin1 = spin1_index_ltt_us(ltt)
     ispin2 = spin2_index_ltt_us(ltt)
!
     itmp1 = tn + nref_max_us *(ispin1 -1)
     itmp2 = tm + nref_max_us *(ispin2 -1)

     ltlt = ltlt_nm_us(ll,itmp1,ll,itmp2)

     if (ltlt /= 0) then
        do ir = 1,nmesh
           r = rpos(ir)
           q_sum_us(ltt) = q_sum_us(ltt) &
                + r*r*wr(ir) * q_us(ir,ltlt)
        end do
     else
        q_sum_us(ltt) = 0.d0
     end if
  end do

  write(IFLOG,*) 'q[nm]:'
  write(IFLOG,*) '   l,  tn,  sn,  tm,  sm, q[nm]'
  do ltt = 1,num_lttx_us
     ll = l_ltt_us(ltt)
     tn = tn_ltt_us(ltt)
     tm = tm_ltt_us(ltt)

     ispin1 = spin1_index_ltt_us(ltt)
     ispin2 = spin2_index_ltt_us(ltt)

     write(IFLOG,'(5i5,2(1pe20.10))') &
          ll,tn, ispin1, tm, ispin2, q_sum_us(ltt)
  end do

99 continue

end subroutine calc_qnm_sum_us_kt
! =========================================================================== 4.0

!=====================================================================
   subroutine write_qnm_sum_us(ifile)
!=====================================================================
!
!  M. Okamoto
!
!---------------------------------------------------------------------
   use parameters
   implicit none
   integer,intent(in) :: ifile
   integer :: ltt, ll, tn, tm
   write(ifile,*)
   write(ifile,*) &
    'Integrals of deficit charges : nm = (l,tn,tm)'
   write(ifile,10) 'l','tn','tm','q[nm]'
   do ltt = 1,num_lttx_us
      ll = l_ltt_us (ltt)
      tn = tn_ltt_us(ltt)
      tm = tm_ltt_us(ltt)
      write(ifile,11) ll,tn,tm,q_sum_us(ltt)
   end do
10 format(1x,((1x,a1),2x,(a2),2x,(a2)), &
             (8x,a5,7x))
11 format(1x,(i2,2x,i2,2x,i2),3f20.10)
   end subroutine write_qnm_sum_us


!======================= added by K. T. ================================= 4.0
subroutine write_qnm_sum_us_kt(ifile)
  use parameters
  implicit none

  integer,intent(in) :: ifile
  integer :: ltt, ll, tn, tm
  integer :: ispin1, ispin2

  write(ifile,*)
  write(ifile,*) &
       'Integrals of deficit charges : nm = (l,tn,sn,tm,sm)'
  write(ifile,10) 'l','tn','sn','tm', 'sm', 'q[nm]'

  do ltt = 1,num_lttx_us
     ll = l_ltt_us (ltt)
     tn = tn_ltt_us(ltt)
     tm = tm_ltt_us(ltt)
!
     ispin1 = spin1_index_ltt_us(ltt)
     ispin2 = spin2_index_ltt_us(ltt)

     write(ifile,11) ll,tn, ispin1, tm, ispin2, q_sum_us(ltt)
  end do

10 format(1x,((1x,a1),2x,(a2),2x,(a2),2x,a2,2x,a2), &
        (8x,a5,7x))
11 format(1x,(i2,2x,i2,2x,i2,2x,i2,2x,i2),f20.10)

end subroutine write_qnm_sum_us_kt
! ========================================================================= 4.0

!=====================================================================
   subroutine calc_dmat_us(ier)
!=====================================================================
!
!  M. Okamoto
!
!---------------------------------------------------------------------
   use parameters
   implicit none
   integer,intent(out) :: ier
   integer :: ltt, ll, tn, tm, is_debug_here
   is_debug_here = 1
   if (is_debug /= 0) then
      is_debug_here = is_debug
   end if
   ier = 0
   do ltt = 1,num_lttx_us
      ll = l_ltt_us(ltt)
      tn = tn_ltt_us(ltt)
      tm = tm_ltt_us(ltt)
      dmat_us(ltt) = bmat_us(ltt) &
                     + eref_us(lt_n_us(ll,tm)) * q_sum_us(ltt)
      if (is_debug_here /= 0) then
         write(IFLOG,*) 'D[nm]: l,tn,tm,dmat_us', &
                             ll,tn,tm,dmat_us(ltt)
      end if
   end do
99 continue
   end subroutine calc_dmat_us

!========================== added by K. T. ==================================== 4.0
subroutine calc_dmat_us_kt(ier)
  use parameters
  implicit none

  integer,intent(out) :: ier
  integer :: ltt, ll, tn, tm, is_debug_here
  integer :: ispin1, ispin2, itmp1, itmp2

  is_debug_here = 1
  if (is_debug /= 0) then
     is_debug_here = is_debug
  end if

  ier = 0
  do ltt = 1,num_lttx_us
     ll = l_ltt_us(ltt)
     tn = tn_ltt_us(ltt)
     tm = tm_ltt_us(ltt)

     ispin1 = spin1_index_ltt_us(ltt)
     ispin2 = spin2_index_ltt_us(ltt)
!
     itmp1 = tn + nref_max_us *(ispin1 -1)
     itmp2 = tm + nref_max_us *(ispin2 -1)
!
     dmat_us(ltt) = bmat_us(ltt) &
          + eref_us(lt_n_us(ll,itmp2)) * q_sum_us(ltt)

     if (is_debug_here /= 0) then
        write(IFLOG,*) 'D[nm]: l,tn,sn,tm,sm, dmat_us', &
             ll,tn, ispin1, tm, ispin2, dmat_us(ltt)
     end if

  end do

99 continue

end subroutine calc_dmat_us_kt
! =============================================================================== 4.0

!=====================================================================
   subroutine write_qnm_bmat_dmat_us(ifile)
!=====================================================================
!
!  M. Okamoto
!
!---------------------------------------------------------------------
   use parameters
   implicit none
   integer,intent(in) :: ifile
   integer :: ll, tn, tm, ltt
   write(ifile,*)
   write(ifile,*)  'Matrices q[nm], B[nm] & D[nm] : nm = (l,tn,tm)'
   write(ifile,10) 'l','tn','tm','q[nm]','B[nm]','D[nm]'
   do ltt = 1,num_lttx_us
      ll = l_ltt_us(ltt)
      tn = tn_ltt_us(ltt)
      tm = tm_ltt_us(ltt)
         write(ifile,11) ll,tn,tm,q_sum_us(ltt), &
                      bmat_us(ltt),dmat_us(ltt)
   end do
10 format(1x,((1x,a1),2x,(a2),2x,(a2)),3(8x,a5,7x))
11 format(1x,(i2,2x,i2,2x,i2),3f20.10)
   end subroutine write_qnm_bmat_dmat_us

!=====================================================================
   subroutine set_rcut_qps_us(ier)
!=====================================================================
!
!  M. Okamoto
!
!---------------------------------------------------------------------
   use parameters
   implicit none
   integer,intent(out) :: ier
   integer :: ltlt, ln, tn, lm, tm, t1, t2, ips1, ips2, l3, &
              nrc, ir, is_debug_here
   real(8) :: rcut1, rcut2, rcut, r
   is_debug_here = 1
   if (is_debug /= 0) then
      is_debug_here = is_debug
   end if
   ier = 0
   do ltlt = 1,num_ltltx_us
      ln = ln_ltlt_us(ltlt)
      tn = tn_ltlt_us(ltlt)
      lm = lm_ltlt_us(ltlt)
      tm = tm_ltlt_us(ltlt)
      t1 = t1_ltlt_us(ltlt)
      t2 = t2_ltlt_us(ltlt)
      ips1 = ips1_ltlt_us(ltlt)
      ips2 = ips2_ltlt_us(ltlt)
      rcut1 = rin_qps_ps(ips1,t1)
      rcut2 = rin_qps_ps(ips2,t2)
      rcut  = min(rcut1,rcut2)
      SRCH_RC:do ir = nmesh,10,-1
         r = rpos(ir)
         if (r < rcut) then
            nrc = ir ; exit SRCH_RC
         end if
         if (ir <= 10) then
            write(IFLOG,*) '### ERROR ### nrc was not found'
            write(IFLOG,*) '   ln,tn,lm,tm ...',ln,tn,lm,tm
            ier = 1 ; go to 99
         end if
      end do SRCH_RC
      nrcut_qps_us(ltlt,:) = nrc        
      rcut_qps_us (ltlt,:) = rpos(nrc)  
      if (is_debug_here /= 0) then
         write(IFLOG,*) 'QPS: ln,tn,lm,tm,nrcut_qps_us .....', &
                              ln,tn,lm,tm,nrcut_qps_us(ltlt,l3_max_us)
      end if
   end do
   if (allocated(rcl_qps)) then
      do l3 = 0,min(lmax_qps,l3_max_us)
         rcut  = rcl_qps(l3)
         SRCH_RC2:do ir = nmesh,10,-1
            r = rpos(ir)
            if (r < rcut) then
               nrc = ir ; exit SRCH_RC2
            end if
            if (ir <= 10) then
               write(IFLOG,*) '### ERROR ### nrc was not found'
               write(IFLOG,*) '   l3 ...',l3
               ier = 1 ; go to 99
            end if
         end do SRCH_RC2
         nrcut_qps_us(:,l3) = nrc
         rcut_qps_us (:,l3) = rpos(nrc)
         if (is_debug_here /= 0) then
            write(IFLOG,*) 'QPS: l3,nrcut_qps_us .....',l3,nrc
         end if
      end do
     !++++++++++++++++++++
      deallocate(rcl_qps)
     !++++++++++++++++++++
   end if
99 continue
   end subroutine set_rcut_qps_us

! ====================================== added by K. T. ================= 4.0
subroutine set_rcut_qps_us_kt(ier)
  use parameters
  implicit none

  integer,intent(out) :: ier
  integer :: ltlt, ln, tn, lm, tm, t1, t2, ips1, ips2, l3, &
              nrc, ir, is_debug_here
  integer :: ispin1, ispin2

  real(8) :: rcut1, rcut2, rcut, r

  is_debug_here = 1
  if (is_debug /= 0) then
     is_debug_here = is_debug
  end if

  ier = 0
  do ltlt = 1,num_ltltx_us
     ln = ln_ltlt_us(ltlt)
     tn = tn_ltlt_us(ltlt)
     lm = lm_ltlt_us(ltlt)
     tm = tm_ltlt_us(ltlt)
     t1 = t1_ltlt_us(ltlt)
     t2 = t2_ltlt_us(ltlt)
!
     ispin1 = spin1_index_ltlt_us(ltlt)
     ispin2 = spin2_index_ltlt_us(ltlt)
!
     ips1 = ips1_ltlt_us(ltlt)
     ips2 = ips2_ltlt_us(ltlt)

     rcut1 = rin_qps_ps(ips1,t1)
     rcut2 = rin_qps_ps(ips2,t2)
     rcut  = min(rcut1,rcut2)

     SRCH_RC:do ir = nmesh,10,-1
        r = rpos(ir)
        if (r < rcut) then
           nrc = ir ; exit SRCH_RC
        end if
        if (ir <= 10) then
           write(IFLOG,*) '### ERROR ### nrc was not found'
           write(IFLOG,*) '   ln,tn,sn, lm,tm, sm...',ln,tn,ispin1, lm,tm, ispin2
           ier = 1 ; go to 99
        end if
     end do SRCH_RC

     nrcut_qps_us(ltlt,:) = nrc        
     rcut_qps_us (ltlt,:) = rpos(nrc)  
     
     if (is_debug_here /= 0) then
        write(IFLOG,*) 'QPS: ln,tn,sn, lm,tm, sm, nrcut_qps_us .....', &
             ln,tn,ispin1, lm,tm, ispin2, nrcut_qps_us(ltlt,l3_max_us)
     end if

  end do

  if (allocated(rcl_qps)) then

     do l3 = 0,min(lmax_qps,l3_max_us)
        rcut  = rcl_qps(l3)

        SRCH_RC2:do ir = nmesh,10,-1
           r = rpos(ir)
           if (r < rcut) then
              nrc = ir ; exit SRCH_RC2
           end if
           if (ir <= 10) then
              write(IFLOG,*) '### ERROR ### nrc was not found'
              write(IFLOG,*) '   l3 ...',l3
              ier = 1 ; go to 99
           end if
        end do SRCH_RC2

        nrcut_qps_us(:,l3) = nrc
        rcut_qps_us (:,l3) = rpos(nrc)

        if (is_debug_here /= 0) then
           write(IFLOG,*) 'QPS: l3,nrcut_qps_us .....',l3,nrc
        end if

     end do

     !++++++++++++++++++++
     deallocate(rcl_qps)
     !++++++++++++++++++++

  end if

99 continue

end subroutine set_rcut_qps_us_kt
! ============================================================================== 4.0


!=====================================================================
   subroutine write_rcut_qps_us(ifile)
!=====================================================================
!
!  M. Okamoto
!
!---------------------------------------------------------------------
   use parameters
   implicit none
   integer,intent(in) :: ifile
   integer :: ltlt, ln, tn, lm, tm, l3
   character(1) :: fn_label_orbital, fn_label_orbital_capital
   write(ifile,*)
   write(ifile,*) &
      'Cutoff radii rin[nmL] : nmL = (ln,tn,lm,tm,L)'
   write(ifile,10) 'ln','tn','lm','tm','L','rin[nmL]','nrin[nmL]'
   !do ltlt = 1,num_ltltx_us
   do ltlt = 1,num_ltlt_us
      ln = ln_ltlt_us(ltlt)
      tn = tn_ltlt_us(ltlt)
      lm = lm_ltlt_us(ltlt)
      tm = tm_ltlt_us(ltlt)
      do l3 = abs(ln-lm),ln+lm,2
         write(ifile,11) ln,tn,lm,tm,l3, &
                         fn_label_orbital(ln),tn, &
                         fn_label_orbital(lm),tm, &
                         fn_label_orbital_capital(l3), &
                         rcut_qps_us(ltlt,l3), &
                         nrcut_qps_us(ltlt,l3)
      end do
   end do
10 format(1x,((a2),2x,(a2),2x,(a2),2x,(a2),2x,(1x,a1)),12x, &
             (8x,a8,4x),(6x,a9))
11 format(1x,(i2,2x,i2,2x,i2,2x,i2,2x,i2), &
             (5x,a1,i1,'-',a1,i1,'-',a1), &
              f20.10,i15)
   end subroutine write_rcut_qps_us

!================================== added by K. T. ======================= 4.0
subroutine write_rcut_qps_us_kt(ifile)
  use parameters
  implicit none

  integer,intent(in) :: ifile
  integer :: ltlt, ln, tn, lm, tm, l3
  integer :: ispin1, ispin2

  character(1) :: fn_label_orbital, fn_label_orbital_capital

  write(ifile,*)
  write(ifile,*) &
       'Cutoff radii rin[nmL] : nmL = (ln,tn,sn,lm,tm,sn,L)'
  write(ifile,10) 'ln','tn','sn', 'lm','tm', 'sn','L','rin[nmL]','nrin[nmL]'

  !do ltlt = 1,num_ltltx_us
  do ltlt = 1,num_ltlt_us
     ln = ln_ltlt_us(ltlt)
     tn = tn_ltlt_us(ltlt)
     lm = lm_ltlt_us(ltlt)
     tm = tm_ltlt_us(ltlt)
!
     ispin1 = spin1_index_ltlt_us(ltlt)
     ispin2 = spin2_index_ltlt_us(ltlt)

     do l3 = abs(ln-lm),ln+lm,2
        write(ifile,11) ln,tn, ispin1, lm,tm, ispin2, l3, &
             fn_label_orbital(ln),tn, &
             fn_label_orbital(lm),tm, &
             fn_label_orbital_capital(l3), &
             rcut_qps_us(ltlt,l3), &
             nrcut_qps_us(ltlt,l3)
     end do
  end do

10 format(1x,((a2),2x,(a2),2x,a5,2x,(a2),2x,(a2),2x,a5,2x,(1x,a1)),12x, &
        (8x,a8,4x),(6x,a9))
11 format(1x,(i2,2x,i2,2x,i4,3x,i2,2x,i2,2x,i4,3x,i2), &
        (5x,a1,i1,'-',a1,i1,'-',a1), &
        f20.10,i15)

end subroutine write_rcut_qps_us_kt
! ============================================================================== 4.0

!=====================================================================
   subroutine calc_qps_us(ier)
!=====================================================================
!
!  M. Okamoto
!
!---------------------------------------------------------------------
   use parameters
   implicit none
   integer,intent(out) :: ier
   integer :: ltlt, ln, tn, lm, tm, l3, nrc, ir, &
              is_debug_here, nk_us, isdiff, n1, n2
   real(8) :: rcut, qint
   real(8),allocatable :: dq_us(:)
   is_debug_here = 1
   if (is_debug /= 0) then
      is_debug_here = is_debug
   end if
   ier = 0
   nk_us = 2
  !++++++++++++++++++++++++++++++++++++++++
   allocate(dq_us(0:nk_us)) ; dq_us = 0.d0
  !++++++++++++++++++++++++++++++++++++++++
   qps_us(:,:,:) = 0.d0
   !do ltlt = 1,num_ltltx_us
   do ltlt = 1,num_ltlt_us
      ln = ln_ltlt_us(ltlt)
      tn = tn_ltlt_us(ltlt)
      lm = lm_ltlt_us(ltlt)
      tm = tm_ltlt_us(ltlt)
      do l3 = abs(ln-lm),ln+lm,2
         write(IFLOG,*)
         write(IFLOG,*)  &
            'Optimizing Qps[nmL](r) (polynomial) ...'
         write(IFLOG,15) 'ltlt,  L :', ln, tn, lm, tm, l3
         nrc = nrcut_qps_us(ltlt,l3)
         rcut = rpos(nrc)
         n1 = nrc - 10 ; n2 = nrc + 10
         if (n1 < 1) then
            n1 = 1 ; n2 = 1 + 10*2
         else if (n2 > nmesh) then
            n1 = nmesh - 10*2 ; n2 = nmesh
         end if
         isdiff = 2
         call diff_exp(ier,isdiff,n1,n2,rpos,q_us(1,ltlt),rcut, &
                       dq_us(0),dq_us(1),dq_us(2))
         call set_weight_exp(ier,1,nrc,rpos,wt)
         qint = 0.d0
         do ir = 1,nrc
            qint = qint + q_us(ir,ltlt) * rpos(ir)**(l3+2) * wt(ir)
         end do
         if (is_qps_type /= NOOPT) then
            call opt_poly_qps(ier,IFLOG,l3,0, &
               nmesh,rpos,q_us(1,ltlt),nk_us,nrc,dq_us(0),qint, &
               ng_qps_us,gmin_qps_us,gmax_qps_us,ncoeff_qps_us, &
               coeff_qps_us(0,ltlt,l3))
            if (ier /= 0) then
               write(IFLOG,*) '### ERROR ### in opt_poly_qps'
               go to 99
            end if
         end if
         qps_us(:,ltlt,l3) = q_us(:,ltlt)
         if (is_qps_type /= NOOPT) then
            call calc_poly_fn(1,nrc,rpos,l3,ncoeff_qps_us, &
                    coeff_qps_us(0,ltlt,l3),qps_us(1,ltlt,l3))
            call check_qps_us(ier,ltlt,l3,nk_us,dq_us,qint)
         end if
      end do
   end do
15 format(1x,a10,6x,(i2,2x,i2,2x,i2,2x,i2),i20)
   do ltlt = 1,num_ltlt_us
      ln = ln_ltlt_us(ltlt)
      tn = tn_ltlt_us(ltlt)
      lm = lm_ltlt_us(ltlt)
      tm = tm_ltlt_us(ltlt)
      do l3 = abs(ln-lm),ln+lm,2
         do ir = 1,nmesh
         if (abs(qps_us(ir,ltlt,l3)) < 1.d-99) then
            qps_us(ir,ltlt,l3) = 0.d0
         end if
         end do
      end do
   end do
99 continue
  !++++++++++++++++++
   deallocate(dq_us)
  !++++++++++++++++++
   end subroutine calc_qps_us


!================================== added by K. T. ==================== 4.0
subroutine calc_qps_us_kt(ier)
  use parameters
  implicit none

  integer,intent(out) :: ier
  integer :: ltlt, ln, tn, lm, tm, l3, nrc, ir, &
       is_debug_here, nk_us, isdiff, n1, n2
  integer :: ispin1, ispin2

  real(8) :: rcut, qint
  real(8),allocatable :: dq_us(:)

  is_debug_here = 1
  if (is_debug /= 0) then
     is_debug_here = is_debug
  end if

  ier = 0
  nk_us = 2

  !++++++++++++++++++++++++++++++++++++++++
  allocate(dq_us(0:nk_us)) ; dq_us = 0.d0
  !++++++++++++++++++++++++++++++++++++++++

  qps_us(:,:,:) = 0.d0
  !do ltlt = 1,num_ltltx_us
  do ltlt = 1,num_ltlt_us
     ln = ln_ltlt_us(ltlt)
     tn = tn_ltlt_us(ltlt)
     lm = lm_ltlt_us(ltlt)
     tm = tm_ltlt_us(ltlt)

     ispin1 = spin1_index_ltlt_us(ltlt)
     ispin2 = spin2_index_ltlt_us(ltlt)

     do l3 = abs(ln-lm),ln+lm,2
        write(IFLOG,*)
        write(IFLOG,*)  &
             'Optimizing Qps[nmL](r) (polynomial) ...'
        write(IFLOG,15) 'ltlt,  L :', ln, tn, ispin1, lm, tm, ispin2, l3


        nrc = nrcut_qps_us(ltlt,l3)
        rcut = rpos(nrc)
        n1 = nrc - 10 ; n2 = nrc + 10

        if (n1 < 1) then
           n1 = 1 ; n2 = 1 + 10*2
        else if (n2 > nmesh) then
           n1 = nmesh - 10*2 ; n2 = nmesh
        end if

        isdiff = 2
        call diff_exp(ier,isdiff,n1,n2,rpos,q_us(1,ltlt),rcut, &
             dq_us(0),dq_us(1),dq_us(2))
        call set_weight_exp(ier,1,nrc,rpos,wt)

        qint = 0.d0
        do ir = 1,nrc
           qint = qint + q_us(ir,ltlt) * rpos(ir)**(l3+2) * wt(ir)
        end do

        if (is_qps_type /= NOOPT) then
           call opt_poly_qps(ier,IFLOG,l3,0, &
                nmesh,rpos,q_us(1,ltlt),nk_us,nrc,dq_us(0),qint, &
                ng_qps_us,gmin_qps_us,gmax_qps_us,ncoeff_qps_us, &
                coeff_qps_us(0,ltlt,l3))
           if (ier /= 0) then
              write(IFLOG,*) '### ERROR ### in opt_poly_qps'
              go to 99
           end if
        end if
        qps_us(:,ltlt,l3) = q_us(:,ltlt)
        if (is_qps_type /= NOOPT) then
           call calc_poly_fn(1,nrc,rpos,l3,ncoeff_qps_us, &
                coeff_qps_us(0,ltlt,l3),qps_us(1,ltlt,l3))
           call check_qps_us_kt(ier,ltlt,l3,nk_us,dq_us,qint)
        end if
     end do
  end do

15 format(1x,a10,6x,(i2,2x,i2,2x,i2,2x,i2,2x,i2,2x,i2,2x,i2),i20)

  do ltlt = 1,num_ltlt_us
     ln = ln_ltlt_us(ltlt)
     tn = tn_ltlt_us(ltlt)
     lm = lm_ltlt_us(ltlt)
     tm = tm_ltlt_us(ltlt)

     do l3 = abs(ln-lm),ln+lm,2
        do ir = 1,nmesh
           if (abs(qps_us(ir,ltlt,l3)) < 1.d-99) then
              qps_us(ir,ltlt,l3) = 0.d0
           end if
        end do
     end do
  end do

99 continue

  !++++++++++++++++++
  deallocate(dq_us)
  !++++++++++++++++++

end subroutine calc_qps_us_kt
! ============================================================================= 4.0

!=====================================================================
   subroutine check_qps_us(ier,ltlt,l3,nk_us,dq_us,qint)
!=====================================================================
!
!  M. Okamoto
!
!---------------------------------------------------------------------
   use parameters
   implicit none
   integer,intent(out) :: ier
   integer,intent(in)  :: ltlt, l3, nk_us
   real(8),intent(in)  :: dq_us(0:nk_us), qint
   integer :: nrc, ir1, ir2, ir, n1, n2, isdiff, &
              ln, tn, lm, tm, ncoeff, itmp, icoeff
   real(8) :: f0_anal(0:2), f0_num(0:2), &
              fint_anal, fint_num, f_tmp(nmesh), rcut
   ier = 0
   ln = ln_ltlt_us(ltlt)
   tn = tn_ltlt_us(ltlt)
   lm = lm_ltlt_us(ltlt)
   tm = tm_ltlt_us(ltlt)
   f0_anal(:) = 0.d0
   nrc = nrcut_qps_us(ltlt,l3)
   ncoeff = ncoeff_qps_us
   ir1 = nrc
   ir2 = nrc
   call calc_poly_fn( &
      ir1,ir2,rpos,l3,ncoeff_qps_us,coeff_qps_us(0,ltlt,l3),f_tmp)
   f0_anal(0) = f_tmp(nrc)
   call calc_dpoly_fn( &
      ir1,ir2,rpos,l3,ncoeff_qps_us,coeff_qps_us(0,ltlt,l3),f_tmp)
   f0_anal(1) = f_tmp(nrc)
   call calc_ddpoly_fn( &
      ir1,ir2,rpos,l3,ncoeff_qps_us,coeff_qps_us(0,ltlt,l3),f_tmp)
   f0_anal(2) = f_tmp(nrc)
   f0_num(:) = 0.d0
   isdiff = nk_us
   n1 = nrc - 10
   n2 = nrc + 10
   rcut = rpos(nrc)
   call diff_exp(ier,isdiff,n1,n2,rpos,qps_us(1,ltlt,l3),rcut, &
           f0_num(0),f0_num(1),f0_num(2))
   f0_num(0) = qps_us(nrc,ltlt,l3)
   fint_anal = 0.d0
   nrc = nrcut_qps_us(ltlt,l3)
   rcut = rpos(nrc)
   ncoeff = ncoeff_qps_us
   do icoeff = 0,ncoeff_qps_us
      itmp = 2*l3 + 2*icoeff + 3
      fint_anal = fint_anal &
         + coeff_qps_us(icoeff,ltlt,l3) / dble(itmp) &
            * rcut**(itmp)
   end do
   nrc = nrcut_qps_us(ltlt,l3)
   call set_weight_exp(ier,1,nrc,rpos,wt)
   fint_num = 0.d0
   do ir = 1,nrc
      fint_num = fint_num &
         + qps_us(ir,ltlt,l3) * rpos(ir)**(l3+2) * wt(ir)
   end do
   call write_checked_qps_us(IFLOG,ln,tn,lm,tm,l3, &
           nrc,rcut,ncoeff_qps_us,coeff_qps_us(0,ltlt,l3), &
           nk_us,dq_us,f0_anal,f0_num,qint,fint_anal,fint_num)
   call write_checked_qps_us(IFSUM,ln,tn,lm,tm,l3, &
           nrc,rcut,ncoeff_qps_us,coeff_qps_us(0,ltlt,l3), &
           nk_us,dq_us,f0_anal,f0_num,qint,fint_anal,fint_num)
99 continue
   end subroutine check_qps_us

!============================== added by K. T. ===================== 4.0
subroutine check_qps_us_kt(ier,ltlt,l3,nk_us,dq_us,qint)
  use parameters
  implicit none

  integer,intent(out) :: ier
  integer,intent(in)  :: ltlt, l3, nk_us
  real(8),intent(in)  :: dq_us(0:nk_us), qint
  integer :: nrc, ir1, ir2, ir, n1, n2, isdiff, &
       ln, tn, lm, tm, ncoeff, itmp, icoeff
  real(8) :: f0_anal(0:2), f0_num(0:2), &
       fint_anal, fint_num, f_tmp(nmesh), rcut
  integer :: ispin1, ispin2

  ier = 0
  ln = ln_ltlt_us(ltlt)
  tn = tn_ltlt_us(ltlt)
  lm = lm_ltlt_us(ltlt)
  tm = tm_ltlt_us(ltlt)

  ispin1 = spin1_index_ltlt_us(ltlt)
  ispin2 = spin2_index_ltlt_us(ltlt)
  
  f0_anal(:) = 0.d0

  nrc = nrcut_qps_us(ltlt,l3)
  ncoeff = ncoeff_qps_us

  ir1 = nrc
  ir2 = nrc

  call calc_poly_fn( &
       ir1,ir2,rpos,l3,ncoeff_qps_us,coeff_qps_us(0,ltlt,l3),f_tmp)
  f0_anal(0) = f_tmp(nrc)

  call calc_dpoly_fn( &
       ir1,ir2,rpos,l3,ncoeff_qps_us,coeff_qps_us(0,ltlt,l3),f_tmp)
  f0_anal(1) = f_tmp(nrc)

  call calc_ddpoly_fn( &
       ir1,ir2,rpos,l3,ncoeff_qps_us,coeff_qps_us(0,ltlt,l3),f_tmp)
  f0_anal(2) = f_tmp(nrc)

  f0_num(:) = 0.d0
  isdiff = nk_us

  n1 = nrc - 10
  n2 = nrc + 10
  rcut = rpos(nrc)

  call diff_exp(ier,isdiff,n1,n2,rpos,qps_us(1,ltlt,l3),rcut, &
       f0_num(0),f0_num(1),f0_num(2))
  f0_num(0) = qps_us(nrc,ltlt,l3)

  fint_anal = 0.d0
  nrc = nrcut_qps_us(ltlt,l3)
  rcut = rpos(nrc)

  ncoeff = ncoeff_qps_us

  do icoeff = 0,ncoeff_qps_us
     itmp = 2*l3 + 2*icoeff + 3
     fint_anal = fint_anal &
          + coeff_qps_us(icoeff,ltlt,l3) / dble(itmp) &
          * rcut**(itmp)
  end do

  nrc = nrcut_qps_us(ltlt,l3)
  call set_weight_exp(ier,1,nrc,rpos,wt)

  fint_num = 0.d0
  do ir = 1,nrc
     fint_num = fint_num &
          + qps_us(ir,ltlt,l3) * rpos(ir)**(l3+2) * wt(ir)
  end do

  call write_checked_qps_us_kt(IFLOG,ln,tn,ispin1,lm,tm,ispin2,l3, &
       nrc,rcut,ncoeff_qps_us,coeff_qps_us(0,ltlt,l3), &
       nk_us,dq_us,f0_anal,f0_num,qint,fint_anal,fint_num)

  call write_checked_qps_us_kt(IFSUM,ln,tn,ispin1,lm,tm,ispin2,l3, &
       nrc,rcut,ncoeff_qps_us,coeff_qps_us(0,ltlt,l3), &
       nk_us,dq_us,f0_anal,f0_num,qint,fint_anal,fint_num)

99 continue

end subroutine check_qps_us_kt
! ======================================================================== 4.0

!=====================================================================
   subroutine write_checked_qps_us(ifile,ln,tn,lm,tm,l3, &
      nrc,rcut,ncoeff,coeff,nk,f0_exact,f0_anal,f0_num, &
      fint_exact,fint_anal,fint_num)
!=====================================================================
!
!  M. Okamoto
!
!---------------------------------------------------------------------
   implicit none
   integer,intent(in) :: ifile, ln, tn, lm, tm, l3, nrc, ncoeff, nk
   real(8),intent(in) :: &
      rcut,coeff(0:ncoeff), f0_exact(0:nk), f0_anal(0:nk), &
      f0_num(0:nk), fint_exact, fint_anal, fint_num
   integer :: icoeff, ik               
   write(ifile,*)
   write(ifile,*)  &
      'Vanderbilt ultrasoft Qps[nmL](r) (polynomial) was optimized.'
   write(ifile,15) 'ltlt,  L :', ln, tn, lm, tm, l3
   write(ifile,10) ' rc,  nrc:', rcut, nrc
   write(ifile,11) 'ncoeff,nk:', ncoeff, nk
   do icoeff=0,ncoeff
   write(ifile,12) 'coeff(',2*icoeff,'):', coeff(icoeff)
   end do
   write(ifile,13) 'Qps(rc)','Qps''(rc)','Qps"(rc)'
   write(ifile,14) 'Exact    :',(f0_exact(ik),ik=0,nk)
   write(ifile,14) 'Analytic :',(f0_anal (ik),ik=0,nk)
   write(ifile,14) 'Numerical:',(f0_num  (ik),ik=0,nk)
   write(ifile,16) '<r^L*Qps>'
   write(ifile,14) 'Exact    :',fint_exact
   write(ifile,14) 'Analytic :',fint_anal
   write(ifile,14) 'Numerical:',fint_num
10 format(1x,a10,f20.10,i20)
11 format(1x,a10,2(i20))
12 format(1x,(a6,i2,a2),f20.10)
13 format(1x,10x,(8x,a7,5x),(8x,a8,4x),(8x,a8,4x))
14 format(1x,a10,5(f20.10))
15 format(1x,a10,6x,(i2,2x,i2,2x,i2,2x,i2),i20)
16 format(1x,10x,(8x,a9,3x))
   end subroutine write_checked_qps_us

!================================== added by K. T. ====================== 4.0
subroutine write_checked_qps_us_kt(ifile,ln,tn,ispin1, lm,tm,ispin2, l3, &
     nrc,rcut,ncoeff,coeff,nk,f0_exact,f0_anal,f0_num, &
     fint_exact,fint_anal,fint_num)
  implicit none

  integer,intent(in) :: ifile, ln, tn, lm, tm, l3, nrc, ncoeff, nk
  integer, intent(in) :: ispin1, ispin2

  real(8),intent(in) :: &
       rcut,coeff(0:ncoeff), f0_exact(0:nk), f0_anal(0:nk), &
       f0_num(0:nk), fint_exact, fint_anal, fint_num

  integer :: icoeff, ik               

  write(ifile,*)
  write(ifile,*)  &
       'Vanderbilt ultrasoft Qps[nmL](r) (polynomial) was optimized.'
  write(ifile,15) 'ltlt,  L :', ln, tn, ispin1, lm, tm, ispin2, l3
  write(ifile,10) ' rc,  nrc:', rcut, nrc
  write(ifile,11) 'ncoeff,nk:', ncoeff, nk

  do icoeff=0,ncoeff
     write(ifile,12) 'coeff(',2*icoeff,'):', coeff(icoeff)
  end do

  write(ifile,13) 'Qps(rc)','Qps''(rc)','Qps"(rc)'
  write(ifile,14) 'Exact    :',(f0_exact(ik),ik=0,nk)
  write(ifile,14) 'Analytic :',(f0_anal (ik),ik=0,nk)
  write(ifile,14) 'Numerical:',(f0_num  (ik),ik=0,nk)
  write(ifile,16) '<r^L*Qps>'
  write(ifile,14) 'Exact    :',fint_exact
  write(ifile,14) 'Analytic :',fint_anal
  write(ifile,14) 'Numerical:',fint_num

10 format(1x,a10,f20.10,i20)
11 format(1x,a10,2(i20))
12 format(1x,(a6,i2,a2),f20.10)
13 format(1x,10x,(8x,a7,5x),(8x,a8,4x),(8x,a8,4x))
14 format(1x,a10,5(f20.10))
15 format(1x,a10,6x,(i2,2x,i2,2x,i2,2x,i2,2x,i2,2x,i2,2x,i2),i20)
16 format(1x,10x,(8x,a9,3x))
  
end subroutine write_checked_qps_us_kt
! =========================================================================== 4.0

!=====================================================================
   subroutine calc_qps_vlocqps_us(ier)
!=====================================================================
!
!  M. Okamoto
!
!---------------------------------------------------------------------
   use parameters
   implicit none
   integer,intent(out) :: ier
   integer :: ir, ltlt, ltt, ll, tn, tm, is_debug_here
   real(8) :: r
   is_debug_here = 1
   if (is_debug /= 0) then
      is_debug_here = is_debug
   end if
   ier = 0
   qps_sum_us(:) = 0.d0
   vlocqps_us(:) = 0.d0
   do ltt = 1,num_lttx_us
      ll   = l_ltt_us(ltt)
      tn   = tn_ltt_us(ltt)
      tm   = tm_ltt_us(ltt)
      ltlt = ltlt_nm_us(ll,tn,ll,tm)
      if (ltlt /= 0) then
         do ir = 1,nmesh
            r = rpos(ir)
            qps_sum_us(ltt) = qps_sum_us(ltt) &
               + r*r*wr(ir) * qps_us(ir,ltlt,0)
            vlocqps_us(ltt) = vlocqps_us(ltt) &
! =============================================== modified by K. T. ====== 4.0
!               + r*r*wr(ir) * qps_us(ir,ltlt,0) * vloc_scr_us(ir)
               + r*r*wr(ir) * qps_us(ir,ltlt,0) * vloc_scr_us(ir,1)
! ========================================================================= 4.0
         end do
      else
         qps_sum_us(ltt) = 0.d0
         vlocqps_us(ltt) = 0.d0
      end if
      if (is_debug_here /= 0) then
         write(IFLOG,*) 'q[nm]: l,tn,tm,qps_sum,vlocqps', &
                         ll,tn,tm,qps_sum_us(ltt),vlocqps_us(ltt)
      end if
   end do
   call symmetrize_ltt(num_lttx_us,num_lttx_us, &
      l_ltt_us,tn_ltt_us,tm_ltt_us,qps_sum_us)
   call symmetrize_ltt(num_lttx_us,num_lttx_us, &
      l_ltt_us,tn_ltt_us,tm_ltt_us,vlocqps_us)
   write(IFLOG,*) 'q[nm]: l, tn, tm, q[nm], qps[nm]'
   do ltt = 1,num_lttx_us
      ll = l_ltt_us(ltt)
      tn = tn_ltt_us(ltt)
      tm = tm_ltt_us(ltt)
      write(IFLOG,'(3i5,2(1pe20.10))') &
         ll,tn,tm,q_sum_us(ltt),qps_sum_us(ltt)
   end do
99 continue
   end subroutine calc_qps_vlocqps_us

!==================================== added by K. T. ======================== 4.0
subroutine calc_qps_vlocqps_us_kt(ier)
  use parameters
  implicit none

  integer,intent(out) :: ier
  integer :: ir, ltlt, ltt, ll, tn, tm, is_debug_here
  integer :: ispin1, ispin2, itmp1, itmp2
  real(8) :: r

  is_debug_here = 1
  if (is_debug /= 0) then
     is_debug_here = is_debug
  end if

  ier = 0
  qps_sum_us(:) = 0.d0
  vlocqps_us(:) = 0.d0

  do ltt = 1,num_lttx_us
     ll   = l_ltt_us(ltt)
     tn   = tn_ltt_us(ltt)
     tm   = tm_ltt_us(ltt)

     ispin1 = spin1_index_ltt_us(ltt)
     ispin2 = spin2_index_ltt_us(ltt)
!
     itmp1 = tn + nref_max_us *(ispin1 -1)
     itmp2 = tm + nref_max_us *(ispin2 -1)

     ltlt = ltlt_nm_us(ll,itmp1,ll,itmp2)

     if ( ispin1 == ispin2 ) then

        if (ltlt /= 0) then
           do ir = 1,nmesh
              r = rpos(ir)
              qps_sum_us(ltt) = qps_sum_us(ltt) &
                   + r*r*wr(ir) * qps_us(ir,ltlt,0)
              vlocqps_us(ltt) = vlocqps_us(ltt) &
                   + r*r*wr(ir) * qps_us(ir,ltlt,0) * vloc_scr_us(ir,ispin2)
           end do
        else
           qps_sum_us(ltt) = 0.d0
           vlocqps_us(ltt) = 0.d0
        end if
     endif

     if (is_debug_here /= 0) then
        write(IFLOG,*) 'q[nm]: l,tn,sn,tm,sm,qps_sum,vlocqps', &
             ll,tn, ispin1, tm, ispin2, qps_sum_us(ltt),vlocqps_us(ltt)
     end if

  end do

  call symmetrize_ltt_kt( num_lttx_us, num_lttx_us, &
       &                 l_ltt_us, tn_ltt_us, spin1_index_ltt_us, &
       &                           tm_ltt_us, spin2_index_ltt_us, qps_sum_us )

  call symmetrize_ltt_kt( num_lttx_us, num_lttx_us, &
       &                 l_ltt_us, tn_ltt_us, spin1_index_ltt_us, &
       &                           tm_ltt_us, spin2_index_ltt_us, vlocqps_us)

  write(IFLOG,*) 'q[nm]: l, tn, tm, q[nm], qps[nm]'

  do ltt = 1,num_lttx_us
     ll = l_ltt_us(ltt)
     tn = tn_ltt_us(ltt)
     tm = tm_ltt_us(ltt)

     ispin1 = spin1_index_ltt_us(ltt)
     ispin2 = spin2_index_ltt_us(ltt)

     write(IFLOG,'(5i5,2(1pe20.10))') &
          ll,tn, ispin1, tm, ispin2, q_sum_us(ltt),qps_sum_us(ltt)
  end do

99 continue

end subroutine calc_qps_vlocqps_us_kt
! ============================================================================= 4.0

!=====================================================================
   subroutine write_qps_vlocqps_us(ifile)
!=====================================================================
!
!  M. Okamoto
!
!---------------------------------------------------------------------
   use parameters
   implicit none
   integer,intent(in) :: ifile
   integer :: ltt, ll, tn, tm
   write(ifile,*)
   write(ifile,*) &
    'Integrals of deficit charges & <Vloc*Qps[nm]> : nm = (l,tn,tm)'
   write(ifile,10) 'l','tn','tm','q[nm]','qps[nm]','<Vloc*Qps[nm]>'
   do ltt = 1,num_lttx_us
      ll = l_ltt_us (ltt)
      tn = tn_ltt_us(ltt)
      tm = tm_ltt_us(ltt)
      write(ifile,11) ll,tn,tm,q_sum_us(ltt), &
                      qps_sum_us(ltt),vlocqps_us(ltt)
   end do
10 format(1x,((1x,a1),2x,(a2),2x,(a2)), &
             (8x,a5,7x),(8x,a7,5x),(6x,a14))
11 format(1x,(i2,2x,i2,2x,i2),3f20.10)
   end subroutine write_qps_vlocqps_us


!==================================== added by K. T. ============== 4.0
subroutine write_qps_vlocqps_us_kt(ifile)
  use parameters
  implicit none

  integer,intent(in) :: ifile
  integer :: ltt, ll, tn, tm
  integer :: ispin1, ispin2

  write(ifile,*)
  write(ifile,*) &
       'Integrals of deficit charges & <Vloc*Qps[nm]> : nm = (l,tn,sn,tm,sm)'
  write(ifile,10) 'l','tn','sn', 'tm','sm', 'q[nm]','qps[nm]','<Vloc*Qps[nm]>'

  do ltt = 1,num_lttx_us
     ll = l_ltt_us (ltt)
     tn = tn_ltt_us(ltt)
     tm = tm_ltt_us(ltt)

     ispin1 = spin1_index_ltt_us(ltt)
     ispin2 = spin2_index_ltt_us(ltt)

     write(ifile,11) ll,tn, ispin1, tm, ispin2, q_sum_us(ltt), &
          qps_sum_us(ltt),vlocqps_us(ltt)
  end do

10 format(1x,((1x,a1),2x,(a2),2x,(a2),2x,a2,2x,a2), &
        (8x,a5,7x),(8x,a7,5x),(6x,a14))
11 format(1x,(i2,2x,i2,2x,i2,2x,i2,2x,i2),3f20.10)

end subroutine write_qps_vlocqps_us_kt
!============================================================================ 4.0

   subroutine calc_dion_us(ier)
!=====================================================================
!
!  M. Okamoto
!
!---------------------------------------------------------------------
   use parameters
   implicit none
   integer,intent(out) :: ier
   integer :: ltt, ll, tn, tm, is_debug_here
   is_debug_here = 1
   if (is_debug /= 0) then
      is_debug_here = is_debug
   end if
   ier = 0
   do ltt = 1,num_lttx_us
      ll = l_ltt_us(ltt)
      tn = tn_ltt_us(ltt)
      tm = tm_ltt_us(ltt)
      dion_us(ltt) = dmat_us(ltt) - vlocqps_us(ltt)
   end do
   call symmetrize_ltt(num_lttx_us,num_lttx_us, &
      l_ltt_us,tn_ltt_us,tm_ltt_us,dion_us)
   if (is_debug_here /= 0) then
      do ltt = 1,num_lttx_us
         ll = l_ltt_us(ltt)
         tn = tn_ltt_us(ltt)
         tm = tm_ltt_us(ltt)
            write(IFLOG,*) 'Dion[nm]: l,tn,tm,dion_us', &
                                   ll,tn,tm,dion_us(ltt)
      end do
   end if
99 continue
   end subroutine calc_dion_us

! ===================================== added by K. T. ====================== 4.0
subroutine calc_dion_us_kt(ier)
  use parameters
  implicit none

  integer,intent(out) :: ier
  integer :: ltt, ll, tn, tm, is_debug_here
  integer :: ispin1, ispin2

  is_debug_here = 1
  if (is_debug /= 0) then
     is_debug_here = is_debug
  end if

  ier = 0
  do ltt = 1,num_lttx_us
     ll = l_ltt_us(ltt)
     tn = tn_ltt_us(ltt)
     tm = tm_ltt_us(ltt)
     dion_us(ltt) = dmat_us(ltt) - vlocqps_us(ltt)
  end do

  call symmetrize_ltt_kt( num_lttx_us, num_lttx_us, &
       &                  l_ltt_us, tn_ltt_us, spin1_index_ltt_us, &
       &                            tm_ltt_us, spin2_index_ltt_us, dion_us )

  if (is_debug_here /= 0) then
     do ltt = 1,num_lttx_us
        ll = l_ltt_us(ltt)
        tn = tn_ltt_us(ltt)
        tm = tm_ltt_us(ltt)

        ispin1 = spin1_index_ltt_us(ltt)
        ispin2 = spin2_index_ltt_us(ltt)

        write(IFLOG,*) 'Dion[nm]: l,tn,sn,tm,sm,dion_us', &
             ll,tn, ispin1, tm, ispin2, dion_us(ltt)

     end do
  end if

99 continue

end subroutine calc_dion_us_kt
! ========================================================================== 4.0

!=====================================================================
   subroutine symmetrize_ltt(na,n,ln,tn,tm,val_ltt)
!=====================================================================
!
!  Symmetrize val_ltt(1:n)
!
!  2007.10.25  M. Okamoto
!
!---------------------------------------------------------------------
   implicit none
   integer,intent(in)    :: na, n, ln(na), tn(na), tm(na)
   real(8),intent(inout) :: val_ltt(na)
   real(8),allocatable :: varray(:,:,:)
   integer :: lmin, lmax, tmax, i
   lmax = maxval(ln(1:n))
   lmin = minval(ln(1:n))
   tmax = max(maxval(tn(1:n)),maxval(tm(1:n)))
  !++++++++++++++++++++++++++++++++++++++++++++++++++++++
   allocate(varray(lmin:lmax,tmax,tmax)) ; varray = 0.d0
  !++++++++++++++++++++++++++++++++++++++++++++++++++++++
   do i = 1,n
      varray(ln(i),tn(i),tm(i)) = val_ltt(i)
   end do
   do i = 1,n
      val_ltt(i) = 0.5d0 * ( &
         varray(ln(i),tn(i),tm(i)) + varray(ln(i),tm(i),tn(i)) )
   end do
  !+++++++++++++++++++
   deallocate(varray)
  !+++++++++++++++++++
   end subroutine symmetrize_ltt

!====================================== added by K. T. ====================== 4.0
subroutine symmetrize_ltt_kt(na,n,ln,tn,sn,tm,sm,val_ltt)
  implicit none

  integer,intent(in)    :: na, n, ln(na), tn(na), tm(na)
  integer, intent(in) :: sn(na), sm(na)

  real(8),intent(inout) :: val_ltt(na)
  real(8),allocatable :: varray(:,:,:,:,:)
  integer :: lmin, lmax, tmax, smax, i

  lmax = maxval(ln(1:n))
  lmin = minval(ln(1:n))
  tmax = max(maxval(tn(1:n)),maxval(tm(1:n)))

  smax = max(maxval(sn(1:n)),maxval(sm(1:n)))

  !++++++++++++++++++++++++++++++++++++++++++++++++++++++
  allocate(varray(lmin:lmax,tmax,smax,tmax,smax)) ; varray = 0.d0
  !++++++++++++++++++++++++++++++++++++++++++++++++++++++

  do i = 1,n
     varray(ln(i),tn(i),sn(i),tm(i),sm(i)) = val_ltt(i)
  end do
  do i = 1,n
     val_ltt(i) = 0.5d0 * ( &
          &       varray(ln(i),tn(i),sn(i),tm(i),sm(i)) &
          &     + varray(ln(i),tm(i),sm(i),tn(i),sn(i)) )
  end do
  !+++++++++++++++++++
  deallocate(varray)
  !+++++++++++++++++++
end subroutine symmetrize_ltt_kt
! ============================================================================ 4.0

!=====================================================================
   subroutine write_bmat_dmat_dion_us(ifile)
!=====================================================================
!
!  M. Okamoto
!
!---------------------------------------------------------------------
   use parameters
   implicit none
   integer,intent(in) :: ifile
   integer :: ll, tn, tm, ltt
   write(ifile,*)
   write(ifile,*)  'Matrices B[nm], D[nm] & Dion[nm] : nm = (l,tn,tm)'
   write(ifile,10) 'l','tn','tm','B[nm]','D[nm]','Dion[nm]'
   do ltt = 1,num_lttx_us
      ll = l_ltt_us(ltt)
      tn = tn_ltt_us(ltt)
      tm = tm_ltt_us(ltt)
         write(ifile,11) ll,tn,tm,bmat_us(ltt), &
                      dmat_us(ltt),dion_us(ltt)
   end do
10 format(1x,((1x,a1),2x,(a2),2x,(a2)),2(8x,a5,7x),(8x,a8,4x))
11 format(1x,(i2,2x,i2,2x,i2),3f20.10)
   end subroutine write_bmat_dmat_dion_us

!============================= added by K. T. =========================== 4.0
subroutine write_bmat_dmat_dion_us_kt(ifile)
  use parameters
  implicit none

  integer,intent(in) :: ifile
  integer :: ll, tn, tm, ltt
  integer :: ispin1, ispin2

  write(ifile,*)
  write(ifile,*)  'Matrices B[nm], D[nm] & Dion[nm] : nm = (l,tn,sn,tm,sm)'
  write(ifile,10) 'l','tn','sn', 'tm','sm', 'B[nm]','D[nm]','Dion[nm]'

  do ltt = 1,num_lttx_us
     ll = l_ltt_us(ltt)
     tn = tn_ltt_us(ltt)
     tm = tm_ltt_us(ltt)

     ispin1 = spin1_index_ltt_us(ltt)
     ispin2 = spin2_index_ltt_us(ltt)

     write(ifile,11) ll,tn, ispin1, tm, ispin2, bmat_us(ltt), &
          dmat_us(ltt),dion_us(ltt)
  end do

10 format(1x,((1x,a1),2x,(a2),2x,(a2),2x,a2,2x,a2),2(8x,a5,7x),(8x,a8,4x))
11 format(1x,(i2,2x,i2,2x,i2,2x,i2,2x,i2),3f20.10)

end subroutine write_bmat_dmat_dion_us_kt
! ========================================================================== 4.0
   
!=====================================================================
   subroutine calc_us_fourier(ier)
!=====================================================================
!
!  M. Okamoto
!
!---------------------------------------------------------------------
   use parameters
   implicit none
   integer,intent(out) :: ier
   integer :: ir, ig, ll, lt, nrc
   real(8) :: r
   ier = 0
   psi_g_us(:,:) = 0.d0
   phi_g_us(:,:) = 0.d0
   chi_g_us(:,:) = 0.d0
   beta_g_us(:,:) = 0.d0
   do lt = 1,num_ltx_us
      ll = l_lt_us(lt)
     !--- MO-100528 [begin]
      !if (is_bound_us(lt) == 0) then
      !   nrc = nrcut_phi_us(lt)
      !   rphi_us(nrc:nmesh,lt) = rpsi_us(nrc:nmesh,lt)
      !end if
     !--- MO-100528 [end]
      do ir = 1,nmesh
         r = rpos(ir)
         psi_g_us(1,lt) = psi_g_us(1,lt)  & 
            + r*wr(ir) * rpsi_us(ir,lt)
         phi_g_us(1,lt) = phi_g_us(1,lt)  &
            + r*wr(ir) * rphi_us(ir,lt)
         chi_g_us(1,lt) = chi_g_us(1,lt)  &
            + r*wr(ir) * rchi_us(ir,lt)
         beta_g_us(1,lt) = beta_g_us(1,lt)  &
            + r*wr(ir) * rbeta_us(ir,lt)
      end do
      do ig = 2,ng_mesh
         gr_wk(:) = gpos(ig) * rpos(:)
         call bessel_js(ll,nmesh,gr_wk,js_wk)
         do ir = 1,nmesh
            r = rpos(ir)
            psi_g_us(ig,lt) = psi_g_us(ig,lt) &
               + r*wr(ir) * rpsi_us(ir,lt)  * js_wk(ir)
            phi_g_us(ig,lt) = phi_g_us(ig,lt) &
               + r*wr(ir) * rphi_us(ir,lt)  * js_wk(ir)
            chi_g_us(ig,lt) = chi_g_us(ig,lt) &
               + r*wr(ir) * rchi_us(ir,lt)  * js_wk(ir)
            beta_g_us(ig,lt) = beta_g_us(ig,lt) &
               + r*wr(ir) * rbeta_us(ir,lt) * js_wk(ir)
         end do
      end do
   end do
99 continue
   end subroutine calc_us_fourier

!=====================================================================
   subroutine write_us(ier)
!=====================================================================
!
!  M. Okamoto
!     
!---------------------------------------------------------------------
   use parameters
   implicit none
   integer,intent(out) :: ier
   integer :: ir, ig, nn, ll, tt, ln, tn, lt, ips
   real(8) :: g
   character(1) :: ch_ln, ch_tn, fn_label_orbital
   character(2) :: ch_del, pp_type_tmp
   character(3) :: ch3
   ier = 0
   ch_del = ','//' '
   open(IFUS,file=trim(usfile),status='unknown')
   write(IFUS,*) 'US wave functions and their fourier transforms'
   call write_file_header(IFUS)
   do lt = 1,num_ltx_us
      ips = ips_lt_us(lt)
      nn  = n_lt_us(lt)
      ll  = l_lt_us(lt)
      tt  = t_lt_us(lt)
      ln  = ln_lt_us(lt)
      tn  = tn_lt_us(lt)
      select case (is_val_type_ps(ips))
      case (TM91)
         pp_type_tmp = 'NC'
      case (US90)
         pp_type_tmp = 'US'
      end select
      ch_ln = fn_label_orbital(ll)
      write(ch_tn,'(i1)') tn
      ch3(1:3) = '_'//ch_ln(1:1)//ch_tn(1:1)
      write(IFUS,*)
      write(IFUS,*)  '(n,l,t) --->',nn,ll,tt
      write(IFUS,*)  '(ln,tn) --->',ln,tn
      write(IFUS,*)  'pp_type --->  ',pp_type_tmp
      write(IFUS,20) 'eref    --->',eref_us(lt)
      write(IFUS,20) 'rcut    --->',rpos(nrcut_phi_us(lt))
      write(IFUS,*) &
         'rpos'//ch3//ch_del//'rpsi'//ch3//ch_del//'rphi'//ch3 &
         //ch_del//'rchi'//ch3//ch_del//'rbeta'//ch3
      do ir = 1,nmesh
         write(IFUS,10) rpos(ir), &
            rpsi_us(ir,lt),rphi_us(ir,lt), &
            rchi_us(ir,lt),rbeta_us(ir,lt)
      end do
      write(IFUS,*)
      write(IFUS,*) '(n,l,t) --->',nn,ll,tt
      write(IFUS,*) '(ln,tn) --->',ln,tn
      write(IFUS,*) 'pp_type --->  ',pp_type_tmp
      write(IFUS,*) &
         'gpos'//ch3//ch_del//'gpsi'//ch3//ch_del//'gphi'//ch3 &
         //ch_del//'gchi'//ch3//ch_del//'gbeta'//ch3
      do ig = 1,ng_mesh
         g = gpos(ig)
         write(IFUS,10) g, &
            g*psi_g_us(ig,lt),g*phi_g_us(ig,lt), &
            g*chi_g_us(ig,lt),g*beta_g_us(ig,lt)
      end do
   end do
10 format(10(1pe20.10))
20 format(1x,a12,f20.10)
   close(IFUS)
   end subroutine write_us

! ======================================== added by K. T. ============== 4.0
subroutine write_us_kt(ier)
  use parameters
  implicit none

  integer,intent(out) :: ier

  integer :: ir, ig, nn, ll, tt, ln, tn, lt, ips
  real(8) :: g
  character(1) :: ch_ln, ch_tn, fn_label_orbital
  character(2) :: ch_del, pp_type_tmp
  character(3) :: ch3

  integer :: ispin

  ier = 0
  ch_del = ','//' '

  open(IFUS,file=trim(usfile),status='unknown')
  write(IFUS,*) 'US wave functions and their fourier transforms'

  call write_file_header(IFUS)

  do lt = 1,num_ltx_us
     ips = ips_lt_us(lt)
     nn  = n_lt_us(lt)
     ll  = l_lt_us(lt)
     tt  = t_lt_us(lt)
     ln  = ln_lt_us(lt)
     tn  = tn_lt_us(lt)

     ispin = spin_index_lt_us(lt)

     select case (is_val_type_ps(ips))
     case (TM91)
        pp_type_tmp = 'NC'
     case (US90)
        pp_type_tmp = 'US'
     end select

     ch_ln = fn_label_orbital(ll)
     write(ch_tn,'(i1)') tn

     ch3(1:3) = '_'//ch_ln(1:1)//ch_tn(1:1)
     write(IFUS,*)
     write(IFUS,*)  '(n,l,t,s) --->',nn,ll,tt, ispin
     write(IFUS,*)  '(ln,tn) --->',ln,tn
     write(IFUS,*)  'pp_type --->  ',pp_type_tmp
     write(IFUS,20) 'eref    --->',eref_us(lt)
     write(IFUS,20) 'rcut    --->',rpos(nrcut_phi_us(lt))

     write(IFUS,*) &
          'rpos'//ch3//ch_del//'rpsi'//ch3//ch_del//'rphi'//ch3 &
          //ch_del//'rchi'//ch3//ch_del//'rbeta'//ch3

     do ir = 1,nmesh
        write(IFUS,10) rpos(ir), &
             rpsi_us(ir,lt),rphi_us(ir,lt), &
             rchi_us(ir,lt),rbeta_us(ir,lt)
     end do

     write(IFUS,*)
     write(IFUS,*) '(n,l,t,s) --->',nn,ll,tt, ispin
     write(IFUS,*) '(ln,tn) --->',ln,tn
     write(IFUS,*) 'pp_type --->  ',pp_type_tmp
     write(IFUS,*) &
          'gpos'//ch3//ch_del//'gpsi'//ch3//ch_del//'gphi'//ch3 &
          //ch_del//'gchi'//ch3//ch_del//'gbeta'//ch3

     do ig = 1,ng_mesh
        g = gpos(ig)
        write(IFUS,10) g, &
             g*psi_g_us(ig,lt),g*phi_g_us(ig,lt), &
             g*chi_g_us(ig,lt),g*beta_g_us(ig,lt)
     end do
  end do
10 format(10(1pe20.10))
20 format(1x,a12,f20.10)

  close(IFUS)

end subroutine write_us_kt
! ============================================================================= 4.0

!=====================================================================
   subroutine calc_qps_fourier(ier)
!=====================================================================
!
!  M. Okamoto
!
!---------------------------------------------------------------------
   use parameters
   implicit none
   integer,intent(out) :: ier
   integer :: ir, ig, ln, tn, lm, tm, l3, ltlt
   real(8) :: r
   ier = 0
   qps_g_us(:,:,:) = 0.d0
   q_g_us(:,:) = 0.d0
   !do ltlt = 1,num_ltltx_us
   do ltlt = 1,num_ltlt_us
      ln = ln_ltlt_us(ltlt)
      tn = tn_ltlt_us(ltlt)
      lm = lm_ltlt_us(ltlt)
      tm = tm_ltlt_us(ltlt)
      do l3 = abs(ln-lm),ln+lm,2
         do ir = 1,nmesh
            r = rpos(ir)
            qps_g_us(1,ltlt,l3) = qps_g_us(1,ltlt,l3)  &
               + r*r*wr(ir) * qps_us(ir,ltlt,l3)
         end do
         do ig = 2,ng_mesh
            gr_wk(:) = gpos(ig) * rpos(:)
            call bessel_js(l3,nmesh,gr_wk,js_wk)
            do ir = 1,nmesh
               r = rpos(ir)
               qps_g_us(ig,ltlt,l3) = qps_g_us(ig,ltlt,l3) &
                  + r*r*wr(ir) * qps_us(ir,ltlt,l3) * js_wk(ir)
            end do
         end do
      end do
      do ir = 1,nmesh
         r = rpos(ir)
         q_g_us(1,ltlt) = q_g_us(1,ltlt)  &
            + r*r*wr(ir) * q_us(ir,ltlt)
      end do
      do ig = 2,ng_mesh
         gr_wk(:) = gpos(ig) * rpos(:)
         call bessel_js(0,nmesh,gr_wk,js_wk)
         do ir = 1,nmesh
            r = rpos(ir)
            q_g_us(ig,ltlt) = q_g_us(ig,ltlt) &
               + r*r*wr(ir) * q_us(ir,ltlt) * js_wk(ir) 
         end do
      end do
   end do
99 continue
   end subroutine calc_qps_fourier

!=====================================================================

   subroutine write_qps(ier)
!=====================================================================
!
!  M. Okamoto
!     
!---------------------------------------------------------------------
   use parameters
   implicit none
   integer,intent(out) :: ier
   integer :: ir, ig, ln, tn, lm, tm, l3, ltlt
   real(8) :: r, g
   character(1) :: ch_ln, ch_tn, ch_lm, ch_tm, ch_l3, &
                   fn_label_orbital, fn_label_orbital_capital
   character(2) :: ch_del
   character(5) :: ch5
   character(6) :: ch6
   ier = 0
   ch_del = ','//' '
   open(IFQPS,file=trim(qpsfile),status='unknown')
   write(IFQPS,*) 'Qps[nmL](r) and their fourier transforms'
   call write_file_header(IFQPS)
   !do ltlt = 1,num_ltltx_us
   do ltlt = 1,num_ltlt_us
      ln = ln_ltlt_us(ltlt)
      tn = tn_ltlt_us(ltlt)
      lm = lm_ltlt_us(ltlt)
      tm = tm_ltlt_us(ltlt)
      ch_ln = fn_label_orbital(ln)
      ch_lm = fn_label_orbital(lm)
      write(ch_tn,'(i1)') tn
      write(ch_tm,'(i1)') tm
      ch5(1:5) = '_'//ch_ln(1:1)//ch_tn(1:1) &
                    //ch_lm(1:1)//ch_tm(1:1)
      write(IFQPS,*)
      write(IFQPS,*) 'r*r*Q[nm](r): (ln,tn,lm,tm) --->',ln,tn,lm,tm
      write(IFQPS,*) 'rpos'//ch5//ch_del//'rr_qnm'//ch5
      do ir = 1,nmesh
         r = rpos(ir)
         write(IFQPS,10) r,r*r*q_us(ir,ltlt)
      end do
      write(IFQPS,*)
      write(IFQPS,*) 'G*G*Q[nm](G): (ln,tn,lm,tm) --->',ln,tn,lm,tm
      write(IFQPS,*) 'gpos'//ch5//ch_del//'gg_qnm'//ch5
      do ig = 1,ng_mesh
         g = gpos(ig)
         write(IFQPS,10) g,g*g*q_g_us(ig,ltlt)
      end do
      do l3 = abs(ln-lm),ln+lm,2
         ch_l3 = fn_label_orbital_capital(l3)
         ch6(1:6) = ch5(1:5)//ch_l3(1:1)
         write(IFQPS,*)
         write(IFQPS,*) 'r*r*Qps[nmL](r): (ln,tn,lm,tm,L) --->', &
                                       ln,tn,lm,tm,l3
         write(IFQPS,*) 'rpos'//ch6//ch_del//'rr_qps'//ch6
         do ir = 1,nmesh
            r = rpos(ir)
            write(IFQPS,10) r,r*r*qps_us(ir,ltlt,l3)
         end do
         write(IFQPS,*)
         write(IFQPS,*) 'G*G*Qps[nmL](G): (ln,tn,lm,tm,L) --->', &
                                       ln,tn,lm,tm,l3
         write(IFQPS,*) 'gpos'//ch6//ch_del//'gg_qps'//ch6
         do ig = 1,ng_mesh
            g = gpos(ig)
            write(IFQPS,10) g,g*g*qps_g_us(ig,ltlt,l3)
         end do
      end do
   end do
10 format(10(1pe20.10))
   close(IFQPS)
   end subroutine write_qps

! ==================================== added by K. T. ========================= 4.0
subroutine write_qps_kt(ier)
  use parameters
  implicit none

  integer,intent(out) :: ier

  integer :: ir, ig, ln, tn, lm, tm, l3, ltlt
  integer :: ispin1, ispin2

  real(8) :: r, g
  character(1) :: ch_ln, ch_tn, ch_lm, ch_tm, ch_l3, &
       fn_label_orbital, fn_label_orbital_capital
  character(2) :: ch_del
  character(5) :: ch5
  character(6) :: ch6

  ier = 0
  ch_del = ','//' '

  open(IFQPS,file=trim(qpsfile),status='unknown')
  write(IFQPS,*) 'Qps[nmL](r) and their fourier transforms'

  call write_file_header(IFQPS)
   !do ltlt = 1,num_ltltx_us
  do ltlt = 1,num_ltlt_us
     ln = ln_ltlt_us(ltlt)
     tn = tn_ltlt_us(ltlt)
     lm = lm_ltlt_us(ltlt)
     tm = tm_ltlt_us(ltlt)

     ispin1 = spin1_index_ltlt_us(ltlt)
     ispin2 = spin2_index_ltlt_us(ltlt)

     ch_ln = fn_label_orbital(ln)
     ch_lm = fn_label_orbital(lm)

     write(ch_tn,'(i1)') tn
     write(ch_tm,'(i1)') tm

     ch5(1:5) = '_'//ch_ln(1:1)//ch_tn(1:1) &
          //ch_lm(1:1)//ch_tm(1:1)

     write(IFQPS,*)
     write(IFQPS,*) 'r*r*Q[nm](r): (ln,tn,sn,lm,tm,sm) --->',ln,tn,ispin1, lm,tm, ispin2
     write(IFQPS,*) 'rpos'//ch5//ch_del//'rr_qnm'//ch5

     do ir = 1,nmesh
        r = rpos(ir)
        write(IFQPS,10) r,r*r*q_us(ir,ltlt)
     end do

     write(IFQPS,*)
     write(IFQPS,*) 'G*G*Q[nm](G): (ln,tn,sn,lm,tm,sm) --->',ln,tn,ispin1,lm,tm, ispin2
     write(IFQPS,*) 'gpos'//ch5//ch_del//'gg_qnm'//ch5

     do ig = 1,ng_mesh
        g = gpos(ig)
        write(IFQPS,10) g,g*g*q_g_us(ig,ltlt)
     end do

     do l3 = abs(ln-lm),ln+lm,2
        ch_l3 = fn_label_orbital_capital(l3)
        ch6(1:6) = ch5(1:5)//ch_l3(1:1)

        write(IFQPS,*)
        write(IFQPS,*) 'r*r*Qps[nmL](r): (ln,tn,sn,lm,tm,sm,L) --->', &
             ln,tn,ispin1, lm,tm, ispin2, l3
        write(IFQPS,*) 'rpos'//ch6//ch_del//'rr_qps'//ch6

        do ir = 1,nmesh
           r = rpos(ir)
           write(IFQPS,10) r,r*r*qps_us(ir,ltlt,l3)
        end do

        write(IFQPS,*)
        write(IFQPS,*) 'G*G*Qps[nmL](G): (ln,tn,sn,lm,tm,sm,L) --->', &
             ln,tn, ispin1, lm,tm, ispin2, l3
        write(IFQPS,*) 'gpos'//ch6//ch_del//'gg_qps'//ch6

        do ig = 1,ng_mesh
           g = gpos(ig)
           write(IFQPS,10) g,g*g*qps_g_us(ig,ltlt,l3)
        end do
     end do
  end do

10 format(10(1pe20.10))

  close(IFQPS)

end subroutine write_qps_kt
! ============================================================================ 4.0

!=====================================================================
   subroutine calc_dekin_tkc(ier)
!=====================================================================
!
!  M. Okamoto
!
!---------------------------------------------------------------------
   use parameters
   implicit none
   integer,intent(out) :: ier
   integer :: ig, lt, nn, ll, tt, ips, ln, tn, it, ifound
   character(1) :: ch_ln, ch_tn, fn_label_orbital
   character(3) :: ch3
   real(8) :: arg, dekin_tmp, dekin_inf
   integer,allocatable :: igc(:)
   real(8),allocatable :: wgt(:)
   ier = 0
  !++++++++++++++++++++++++++++++++++++
   allocate(igc(ntkc))    ; igc = 0
   allocate(wgt(ng_mesh)) ; wgt = 0.d0
  !++++++++++++++++++++++++++++++++++++
   igc(:) = 0
   do it = 1,ntkc
      GLOOP: do ig = 1,ng_mesh
         if (gpos(ig) > tkc(it)) then
            igc(it) = ig - 1
            exit GLOOP
         end if
      end do GLOOP
   end do
   dekin_tkc(:,:) = 0.d0
   do lt = 1,num_ltx_us
      nn  = n_lt_us(lt)
      ll  = l_lt_us(lt)
      tt  = t_lt_us(lt)
      ln  = ln_lt_us(lt)
      tn  = tn_lt_us(lt)
      ips = ips_lt_us(lt)
      ch_ln = fn_label_orbital(ll)
      write(ch_tn,'(i1)') tn
      ch3(1:3) = '_'//ch_ln(1:1)//ch_tn(1:1)
      write(IFLOG,'(1x,a,5i3,2x,a,a)') &
         'dEkin: n = (n,l,t,ln,tn) -->',nn,ll,tt,ln,tn,'dEkin',ch3
      dekin_tmp = 0.d0
      do ig = 1,ng_mesh
         arg = gpos(ig)*gpos(ig)*phi_g_us(ig,lt)
         dekin_tmp = dekin_tmp + arg*arg*wg(ig)
      end do
      dekin_inf = dekin_tmp / PI
      do it = 1,ntkc
         call set_weight_unif(ier,1,ng_mesh-igc(it)+1,gpos,wgt)
         dekin_tmp = 0.d0
         do ig = igc(it),ng_mesh
            arg = gpos(ig)*gpos(ig)*phi_g_us(ig,lt)
            dekin_tmp = dekin_tmp + arg*arg*wgt(ig-igc(it)+1)
         end do
         dekin_tkc(lt,it) = dekin_tmp / PI
      end do
   end do
99 continue
  !++++++++++++++++++++
   deallocate(igc,wgt)
  !++++++++++++++++++++
   end subroutine calc_dekin_tkc

!================================= added by K. T. =========================== 4.0
subroutine calc_dekin_tkc_kt(ier)
  use parameters
  implicit none

  integer,intent(out) :: ier

  integer :: ig, lt, nn, ll, tt, ips, ln, tn, it, ifound
  character(1) :: ch_ln, ch_tn, fn_label_orbital
  character(3) :: ch3
  real(8) :: arg, dekin_tmp, dekin_inf

  integer :: ispin

  integer,allocatable :: igc(:)
  real(8),allocatable :: wgt(:)

  ier = 0

  !++++++++++++++++++++++++++++++++++++
  allocate(igc(ntkc))    ; igc = 0
  allocate(wgt(ng_mesh)) ; wgt = 0.d0
  !++++++++++++++++++++++++++++++++++++
  igc(:) = 0

  do it = 1,ntkc
     GLOOP: do ig = 1,ng_mesh
        if (gpos(ig) > tkc(it)) then
           igc(it) = ig - 1
           exit GLOOP
        end if
     end do GLOOP
  end do

  dekin_tkc(:,:) = 0.d0

  do lt = 1,num_ltx_us
     nn  = n_lt_us(lt)
     ll  = l_lt_us(lt)
     tt  = t_lt_us(lt)
     ln  = ln_lt_us(lt)
     tn  = tn_lt_us(lt)
     ips = ips_lt_us(lt)

     ispin = spin_index_lt_us(lt)

     ch_ln = fn_label_orbital(ll)

     write(ch_tn,'(i1)') tn
     ch3(1:3) = '_'//ch_ln(1:1)//ch_tn(1:1)

     write(IFLOG,'(1x,a,6i3,2x,a,a)') &
          'dEkin: n = (n,l,t,s,ln,tn) -->',nn,ll,tt, ispin, ln,tn,'dEkin',ch3

     dekin_tmp = 0.d0
     do ig = 1,ng_mesh
        arg = gpos(ig)*gpos(ig)*phi_g_us(ig,lt)
        dekin_tmp = dekin_tmp + arg*arg*wg(ig)
     end do

     dekin_inf = dekin_tmp / PI

     do it = 1,ntkc
        call set_weight_unif(ier,1,ng_mesh-igc(it)+1,gpos,wgt)

        dekin_tmp = 0.d0
        do ig = igc(it),ng_mesh
           arg = gpos(ig)*gpos(ig)*phi_g_us(ig,lt)
           dekin_tmp = dekin_tmp + arg*arg*wgt(ig-igc(it)+1)
        end do

        dekin_tkc(lt,it) = dekin_tmp / PI
     end do
  end do
99 continue

  !++++++++++++++++++++
  deallocate(igc,wgt)
  !++++++++++++++++++++

end subroutine calc_dekin_tkc_kt
! ============================================================================= 4.0
