!#define RCUT_SMOOTH

module m_Crystal_Field
  use m_Control_Parameters,   only : nspin, kimg, noncol
  use m_Const_Parameters, only : DP, PAI4, PAI
  use m_Crystal_Structure, only : univol, rltv
  use m_Ionic_System,     only : natm, ityp, pos, iatomn, ntyp, ivan, ival
  use m_PseudoPotential,  only : lpsmax, flg_paw, nmesh, mmesh, radr_paw, vlocr_pw, rhcorpw
  use m_Parallelization,   only : ista_kngp, iend_kngp, mpi_comm_group, ierr, ista_k, mype, npes, mype

  use m_Charge_Density,   only : chgq_l
  use m_PlaneWaveBasisSet,  only : kgp, ngabc, gr_l
  use m_Electronic_Structure,  only : vlhxc_l
  use m_XC_Potential,   only : vxc_l
!  use m_Nonlocal_Potential,  only : new_radr_and_wos
  use m_Files,  only : nfpot, nfout, m_Files_open_ps_files, m_Files_close_ps_file

  use m_CD_Mag_Moment,  only : rad_cov_default

  implicit none
  include 'mpif.h'

contains


  subroutine set_psir_f( has_open_forb, psir_f )
    integer, intent(out) :: has_open_forb(ntyp)
    real(kind=DP), intent(out) :: psir_f( mmesh, ntyp )
!    real(kind=DP), intent(out) :: rho_f( mmesh, ntyp )

    integer :: it, ierr, nfp, i, ir
    integer :: num_core_ae_wfns
    integer, allocatable :: qnum_n_core_ae_wfns(:)
    integer, allocatable :: qnum_l_core_ae_wfns(:)
    real(kind=DP), allocatable :: psir_core_ae_wfns(:,:)
    real(kind=DP), allocatable :: enelevel_core_ae_wfns(:)
    real(kind=DP), allocatable :: focc_core_ae_wfns(:)
    real(kind=DP) :: weight

! == KT_DEBUG === 2015/06/15 == 
!    call m_Files_open_ps_file(ivan,iatomn,ntyp,it,ierr)  
!                                      ! this does not work properly by unknown reason.
    call m_Files_open_ps_files(ivan,iatomn,ntyp,ierr)
! =============== 2015/06/15

    if (ierr/=0) call mpi_stop(nfout)

    has_open_forb = 0

    Do it=1, ntyp
       nfp = nfpot(it)

       if ( mype == 0 ) call read_num_core_ae_wfns( nfp, num_core_ae_wfns, it )
       if ( npes > 1 ) then
          call mpi_bcast(  num_core_ae_wfns, 1, mpi_integer, 0, mpi_comm_world, ierr )
       endif
       
       if ( num_core_ae_wfns == 0 ) cycle
       
       allocate( qnum_n_core_ae_wfns( num_core_ae_wfns ) )
       allocate( qnum_l_core_ae_wfns( num_core_ae_wfns ) )
       allocate( psir_core_ae_wfns( nmesh(it),  num_core_ae_wfns ) )
       allocate( enelevel_core_ae_wfns( num_core_ae_wfns ) )
       allocate( focc_core_ae_wfns( num_core_ae_wfns ) )
       
       if ( mype == 0 ) then
          call read_data_core_ae_wfns( nfp, num_core_ae_wfns, nmesh(it), &
               &                       qnum_n_core_ae_wfns, qnum_l_core_ae_wfns, &
               &                       psir_core_ae_wfns, &
               &                       enelevel_core_ae_wfns, focc_core_ae_wfns )
       endif
       
       if ( npes > 1 ) then
          call mpi_bcast( qnum_n_core_ae_wfns, num_core_ae_wfns, &
               &          mpi_integer, 0, mpi_comm_world, ierr )
          call mpi_bcast( qnum_l_core_ae_wfns, num_core_ae_wfns, &
               &          mpi_integer, 0, mpi_comm_world, ierr )
          call mpi_bcast( enelevel_core_ae_wfns, num_core_ae_wfns, &
               &          mpi_double_precision, 0, mpi_comm_world, ierr )
          call mpi_bcast( focc_core_ae_wfns, num_core_ae_wfns, &
               &          mpi_double_precision, 0, mpi_comm_world, ierr )
          call mpi_bcast( psir_core_ae_wfns, nmesh(it)*num_core_ae_wfns, &
               &          mpi_double_precision, 0, mpi_comm_world, ierr )
       endif

!
       Do i=1, num_core_ae_wfns
          if ( qnum_l_core_ae_wfns(i) == 3 ) then
             if ( focc_core_ae_wfns(i) > 0.1 .and. focc_core_ae_wfns(i) < 13.9 ) then
                has_open_forb(it) = 1
                Do ir=1, nmesh(it)
                   psir_f(ir,it) = psir_core_ae_wfns(ir,i)
                End Do
             endif
          endif
       End Do

       deallocate( qnum_n_core_ae_wfns )
       deallocate( qnum_l_core_ae_wfns )
       deallocate( psir_core_ae_wfns )
       deallocate( enelevel_core_ae_wfns )
       deallocate( focc_core_ae_wfns )

       call m_Files_close_ps_file(it)
    End Do
  end subroutine set_psir_f

  subroutine read_num_core_ae_wfns( nfp, nums, it )
    integer, intent(in) :: nfp, it
    integer, intent(out) :: nums

    integer :: length, ierr
    character(30) :: search_key

    nums = 0

    search_key = "CORE STATES";  length = len(search_key)
    call read_size_of_array_from_pp(nfp, nums, length, search_key, ierr)

    if ( ierr /= 0 ) then
       write(nfout,*) '----------------------'
       write(nfout,'(A,I2)') '!!! Keyword CORE STATES is not found in the PP', it
       write(nfout,*) '----------------------'
    endif

  end subroutine read_num_core_ae_wfns

  subroutine read_data_core_ae_wfns( nfp, num_core_ae_wfns, nmesh, &
       &                             qnum_n, qnum_l, psir_core, ene_level, focc )
    implicit none

    integer, intent(in) :: num_core_ae_wfns
    integer, intent(in) :: nmesh, nfp
    integer, intent(out) :: qnum_l(num_core_ae_wfns)
    integer, intent(out) :: qnum_n(num_core_ae_wfns)
    real(kind=8), intent(out) :: psir_core(nmesh, num_core_ae_wfns), &
         &                       ene_level(num_core_ae_wfns), focc(num_core_ae_wfns)

    integer :: i, n1, l1, k
    real(kind=DP) :: ene1, f1

    Do i=1, num_core_ae_wfns
       read(nfp,*) n1, l1, ene1, f1
       read(nfp,*) (psir_core(k,i),k=1,nmesh)
       !
       qnum_n(i) = n1;  qnum_l(i) = l1;  ene_level(i) = ene1;   focc(i) = f1
    End Do
  end subroutine read_data_core_ae_wfns

  subroutine read_size_of_array_from_pp( nfp, size_of_array, length, search_tag, ierr )
    implicit none

    integer, intent(in) :: nfp, length
    integer, intent(out) :: size_of_array, ierr
    character(length), intent(in) :: search_tag

    integer :: ifound
    character(30) :: line1

    size_of_array = 0;  ierr = 0

    Do while (.true.)
       read(nfp,'(a30)',end=10) line1
       ifound = index( line1, search_tag )
       if ( ifound /= 0 ) goto 20
    End do

10  ierr = 1; return

20  read(nfp,*) size_of_array

  end subroutine read_size_of_array_from_pp

  subroutine calc_vh( nmesh, rpos, rho, vh )
    integer, intent(in) :: nmesh
    real(kind=DP), intent(in) :: rpos(mmesh), rho(mmesh)
    real(kind=DP), intent(out) :: vh(mmesh)
    
    integer :: ir, ii, i0, is, j, jr, ier
    real(kind=DP) :: sum1, sum2
    real(kind=DP), allocatable :: wt(:)
    
    allocate( wt (nmesh) ); wt = 0.0d0
    
    do ir = 1,nmesh
       sum1 = 0.d0;  sum2 = 0.d0
       if (ir == 1) then
          sum1 = 0.d0
       else if ((ir >= 2).and.(ir <= 5)) then
          do ii = 2,ir
             i0 = ii-1; is = 1
             call set_open_weight_exp(ier,i0,is,rpos,wt)
             
             do j = 1,4
                sum1 = sum1 +rpos(i0+j*is)**2 *rho(i0+j*is) *wt(i0+j*is)
             end do
          end do
       else
          call set_weight_exp(ier,1,ir,rpos,wt)
          do jr = 1,ir
             sum1 = sum1 + rpos(jr)**2*rho(jr)*wt(jr)
          end do
       end if
       sum1 = sum1 *PAI4 /rpos(ir)
       
       if (ir == nmesh) then
          sum2 = 0.d0
       else if ((ir <= nmesh-1).and.(ir >= nmesh-4)) then
          do ii = ir,nmesh-1
             i0 = ii+1; is = -1
             call set_open_weight_exp(ier,i0,is,rpos,wt)
             do j = 1,4
                sum2 = sum2 -rpos(i0+j*is)**2 *rho(i0+j*is) *wt(i0+j*is)
             end do
          end do
       else
          call set_weight_exp(ier,ir,nmesh,rpos,wt)
          do jr = ir,nmesh
             sum2 = sum2 + rpos(jr)*rho(jr)*wt(jr)
          end do
       end if
       sum2 = sum2 *PAI4
       vh(ir) = sum1 + sum2
    end do
    
    deallocate( wt )
  end subroutine calc_vh

  subroutine m_CF_calc_CF_param
    integer if, ia, it, ik, is, ir, ig, ist
    integer nspher, il1, tau1, ismax
    integer ier

    real(kind=DP) :: fac, facr, csum, csum_mpi, c1, c2, csum2
    real(kind=DP) :: rcut

    integer, allocatable :: has_open_forb(:)
    real(kind=DP), allocatable :: qx(:), qy(:), qz(:), vlength(:), ylm(:), psir_f(:,:)
    real(kind=DP), allocatable :: wka(:), wkb(:), snl2(:), zfcos(:), zfsin(:), wos(:)
    real(kind=DP), allocatable :: vtot(:), work(:)

    if ( .not. flg_paw ) return

    ! ========= Assuming that f-electrons are core. ===
    allocate( psir_f(mmesh,ntyp) ); psir_f = 0.0d0
    allocate( has_open_forb(ntyp) );  has_open_forb = 0

    call set_psir_f( has_open_forb, psir_f )
!    write(2000+mype,*) has_open_forb
!    stop

#ifdef RCUT_SMOOTH
    call contract_f_orbital( has_open_forb, psir_f )
#endif

    allocate( qx(ista_kngp:iend_kngp) )
    allocate( qy(ista_kngp:iend_kngp) )
    allocate( qz(ista_kngp:iend_kngp) )
    allocate( vlength(ista_kngp:iend_kngp) )

    allocate( wka(ista_kngp:iend_kngp) )
    allocate( wkb(ista_kngp:iend_kngp) )
    allocate( ylm(ista_kngp:iend_kngp) )
    allocate( snl2(ista_kngp:iend_kngp) )
    allocate( zfcos(ista_kngp:iend_kngp) )
    allocate( zfsin(ista_kngp:iend_kngp) )
!    allocate( radr(mmesh) ); 
    allocate( wos(mmesh) )

!    fac = PAI4 /dsqrt(univol)
    fac = PAI4 

    Do ig=ista_kngp, iend_kngp
       qx(ig) = rltv(1,1)*ngabc(ig,1) +rltv(1,2)*ngabc(ig,2) +rltv(1,3)*ngabc(ig,3)
       qy(ig) = rltv(2,1)*ngabc(ig,1) +rltv(2,2)*ngabc(ig,2) +rltv(2,3)*ngabc(ig,3)
       qz(ig) = rltv(3,1)*ngabc(ig,1) +rltv(3,2)*ngabc(ig,2) +rltv(3,3)*ngabc(ig,3)
       vlength(ig) = sqrt( qx(ig)**2 + qy(ig)**2 + qz(ig)**2 )
    End do

! -----------
    Do ia=1, natm
       it = ityp(ia)
       if ( has_open_forb(it) == 0 ) cycle
       
       call set_weight_exp( ier, 1, nmesh(it), radr_paw(:,it), wos )

! ---
       nspher = 5           ! Y_20
       call sphr( iend_kngp -ista_kngp +1, nspher, &
            &     qx(ista_kngp:iend_kngp), &
            &     qy(ista_kngp:iend_kngp), &
            &     qz(ista_kngp:iend_kngp), ylm(ista_kngp:iend_kngp) )
!
       rcut = rad_cov_default( nint(iatomn(it)) )

       csum2 = 0.0d0
       do ir = 1,nmesh(it)
#ifdef RCUT_SMOOTH
          if ( radr_paw(ir,it) > rcut ) cycle
#endif
          facr = wos(ir) *psir_f(ir,it)**2 *radr_paw(ir,it)**2
          csum2 = csum2 +facr
       End do

       snl2 = 0.0d0
       do ir = 1,nmesh(it)
#ifdef RCUT_SMOOTH
          if ( radr_paw(ir,it) > rcut ) cycle
#endif
          facr = wos(ir) *psir_f(ir,it) *psir_f(ir,it) 

          Do ig=ista_kngp, iend_kngp
             wka(ig) = vlength(ig) *radr_paw(ir,it)
          End do

          call dsjnv( 2, iend_kngp -ista_kngp +1, &        
               &      wka(ista_kngp:iend_kngp), &
               &      wkb(ista_kngp:iend_kngp) ) 
                                                     ! -(bottom_Subr.)
          Do ig=ista_kngp, iend_kngp
             snl2(ig) = snl2(ig) + fac *facr *wkb(ig) *ylm(ig)
          End do
       end do
       snl2 = -snl2        ! i^2 = -1; where 2 <= Y_20

       call calc_phase2( natm, pos, ia, kgp, ngabc, ista_kngp, iend_kngp, zfcos, zfsin )

       csum = 0.0d0

       ist = ista_kngp
!       if(ist == 1) ist = 2

       if ( noncol ) then
          ismax = 1
          Do is=1, ismax
             Do ig=ist, iend_kngp
                c1 = vlhxc_l(ig,1,is)*zfcos(ig) -vlhxc_l(ig,2,is)*zfsin(ig)
                !                c1 = PAI4 *( chgq_l(ig,1,1) *zfcos(ig) -chgq_l(ig,2,1) *zfsin(ig) ) &
                !!                    & /gr_l(ig)**2
                csum = csum +c1 *snl2(ig)
             End do
          End do
       else
          ismax = nspin
          Do is=1, ismax
             if ( kimg ==2 ) then
                Do ig=ist, iend_kngp
!                   c1 = vlhxc_l(ig,1,is)*zfcos(ig) -vlhxc_l(ig,2,is)*zfsin(ig)
                   c1 = ( vlhxc_l(ig,1,is) -vxc_l(ig,1,is) ) *zfcos(ig) &
                        & -( vlhxc_l(ig,2,is) -vxc_l(ig,2,is) ) *zfsin(ig)
                   csum = csum +c1 *snl2(ig)
                End Do
             else
                Do ig=ist, iend_kngp
                   c1 = vlhxc_l(ig,1,is)*zfcos(ig)
                   csum = csum +c1 *snl2(ig)
                End Do
             endif
          End do
          csum = csum /dble(nspin)
       endif

       call mpi_allreduce( csum, csum_mpi, 1, mpi_double_precision, mpi_sum, &
            &              mpi_comm_group, ierr )


       if ( mype == 0 ) then
          write(*,*) "atom, A20<r2>[K]: ", ia, csum_mpi*sqrt(5./16./PAI)*3.1577513*1.0E5, &
               &     " <r2>[bohr2]: ", csum2
       endif
    End do

!    deallocate( radr ); 
    deallocate( wos )

    call mpi_barrier( mpi_comm_group, ierr )

  end subroutine m_CF_calc_CF_param

  subroutine contract_f_orbital( has_open_forb, psir )
    integer, intent(in) :: has_open_forb(ntyp)
    real(kind=DP), intent(inout) :: psir(mmesh,ntyp)

    integer :: ik, ilmt1, il1, tau1, ir, ier, it
    real(kind=DP) :: csum, factor, rcut, beta
    real(kind=DP), allocatable :: tmp_fn(:), wos(:)

    allocate( tmp_fn(mmesh) );  allocate( wos(mmesh) ); 
    beta = 0.5d0

    Do it=1, ntyp
       if ( has_open_forb(it) == 0 ) cycle

       rcut = rad_cov_default( nint(iatomn(it)) )
                    ! Revised according to a report from ASMS Co.ltd, 10 March 2016.

       call set_weight_exp( ier, 1, nmesh(it), radr_paw(:,it), wos )

       tmp_fn = 0.0d0
       Do ir=1, nmesh(it)
          tmp_fn(ir) = psir(ir,it) *exp( -radr_paw(ir,it)/rcut)
!          tmp_fn(ir) = psir(ir,it) /( 1.0d0 +exp( beta *(radr_paw(ir,it)-rcut) ) )
       End do
       
       csum = 0.0d0
       Do ir=1, nmesh(it)
          csum = csum + wos(ir) *tmp_fn(ir)**2
       End do
       
       factor = 1.0d0 /sqrt(csum)
       psir(:,it) = 0.0d0

       do ir = 1,nmesh(it)
          psir(ir,it) = tmp_fn(ir) *factor
       end do
    End Do
    call mpi_barrier( mpi_comm_group, ierr )
    deallocate(tmp_fn); deallocate(wos)

  end subroutine contract_f_orbital

#if 0
R: 4f atom no aru site
vh_lm(|r-R|) = int vh(r) Ylm(|r-R|)  
             = int sum_G vh(G)exp(iGr) Ylm(|r-R|)      ( r-R=r'; r = r'+R )
             =                exp(iGr') exp(iGR) Ylm(|r'|) 
             = sum_G vh(G) exp(iGR) Ylm(G) j_l(Gr') ?? 

vh(r) R4f(r-R)^2 (r-R)^2 dr
#endif

end module m_Crystal_Field
