module m_KineticEnergy_Density
  use m_Control_Parameters,  only : nspin, kimg, noncol, &
       &                            use_symm_ekin_density, use_asymm_ekin_density, &
       &                            g0_wf_precon, amin_wf_precon, af

  use m_KPoints,             only : kv3, vkxyz
  use m_Const_Parameters,    only : DP, DIRECT, OFF, ELECTRON, DELTA, BUCS, PAI2, NO, &
       &                            ANTIFERRO
  use m_PlaneWaveBasisSet,    only : kgp, kg1, ngabc, iba, nbase, igf, kg, gr_l, &
       &                             ngpt_l, ista_k, iend_k
  use m_Crystal_Structure,    only : rltv, univol, tau, nopr
  use m_Electronic_Structure, only : zaj_l, occup_l, m_ES_WF_in_Rspace_3D1
  use m_FFT,                  only : m_FFT_alloc_WF_work, m_FFT_dealloc_WF_work, &
       &                             nfft, m_FFT_WF, fft_box_size_WF
  use m_Parallelization,      only : np_e, map_z, map_k, myrank_k, mpi_comm_group, &
       &                             npes, ista_kngp, iend_kngp, mype
  use m_Files,                only : nfout
  
  implicit none
  include 'mpif.h'

  real(kind=DP), allocatable :: ekins_l(:,:,:)  ! symmetric, positive definite
  real(kind=DP), allocatable :: ekina_l(:,:,:)  ! asymmetric

  real(kind=DP), allocatable :: ekins_old(:,:,:) 
  real(kind=DP), allocatable :: ekina_old(:,:,:) 

contains

  subroutine m_KE_alloc_ekin_density
    if ( use_symm_ekin_density ) then
       if ( noncol ) then
       else
          allocate( ekins_l(ista_kngp:iend_kngp,kimg,nspin) )
          allocate( ekins_old(ista_kngp:iend_kngp,kimg,nspin) )
       endif
       ekins_l = 0.0d0;   ekins_old = 0.0d0
    endif
    if ( use_asymm_ekin_density ) then
       if ( noncol ) then
       else
          allocate( ekina_l(ista_kngp:iend_kngp,kimg,nspin) )
          allocate( ekina_old(ista_kngp:iend_kngp,kimg,nspin) )
       endif
       ekina_l = 0.0d0;   ekina_old = 0.0d0
    endif
  end subroutine m_KE_alloc_ekin_density

  subroutine m_KE_dealloc_ekin_density
    if ( allocated( ekins_l ) ) deallocate( ekins_l )
    if ( allocated( ekina_l ) ) deallocate( ekina_l )
    if ( allocated( ekins_old ) ) deallocate( ekins_old )
    if ( allocated( ekina_old ) ) deallocate( ekina_old )
  end subroutine m_KE_dealloc_ekin_density

  subroutine m_KE_cp_ekin_density_to_old
    if ( use_symm_ekin_density )  ekins_old = ekins_l
    if ( use_asymm_ekin_density ) ekina_old = ekina_l
  end subroutine m_KE_cp_ekin_density_to_old

  subroutine m_KE_simple_mixing( rmxt )
    real(kind=DP), intent(in) :: rmxt

    if ( use_symm_ekin_density ) then
       ekins_l = ekins_old + rmxt*( ekins_l -ekins_old )
    endif
    if ( use_asymm_ekin_density ) then
       ekina_l = ekina_old + rmxt*( ekina_l -ekina_old )
    endif
  end subroutine m_KE_simple_mixing

  subroutine m_KE_kerker_mixing
    integer :: i, is, ri
    real(kind=DP) :: gg, q0, fac_min
    real(kind=DP) :: rmxtrc(nspin)
!
    real(kind=DP), allocatable :: c_p_ekin(:,:), dekin(:,:,:)

    allocate( c_p_ekin( ista_kngp:iend_kngp,nspin) );  c_p_ekin = 0.0d0
    allocate( dekin( ista_kngp:iend_kngp,kimg,nspin) );     dekin = 0.0d0

!    q0 = 1.0d0
!    q0 = 5.0d0
!    q0 = 2.0d0

    q0 = g0_wf_precon **2
    fac_min = amin_wf_precon

    Do i=ista_kngp, iend_kngp
       gg = gr_l(i)*gr_l(i)
       Do is=1, nspin
          c_p_ekin(i,is) = max( gg/(gg+q0), fac_min )
          write(1000+mype,*) i, is, c_p_ekin(i,is)
       End do
    End Do
    stop

    if ( use_symm_ekin_density ) then
       dekin = ekins_l - ekins_old
       Do is=1, nspin
          Do ri=1, kimg
             Do i=ista_kngp, iend_kngp
                ekins_l(i,ri,is) = ekins_old(i,ri,is) &
                     &  +c_p_ekin(i,is) *dekin(i,ri,is) 
             End do
          End do
       End do
    endif
    if ( use_asymm_ekin_density ) then
       dekin = ekina_l - ekina_old
       Do is=1, nspin
          Do ri=1, kimg
             Do i=ista_kngp, iend_kngp
                ekina_l(i,ri,is) = ekina_old(i,ri,is) &
                     &  +c_p_ekin(i,is) *dekin(i,ri,is)
             End do
          End do
       End do
    endif

    deallocate( c_p_ekin)
    deallocate( dekin )

  end subroutine m_KE_kerker_mixing

  subroutine m_KE_calc_asymm_ekin_density
    ekina_l = 0.0d0

    call calc_local_part

  contains

    subroutine calc_local_part
      integer :: ispin, ik, ib, ixyz, ierr
      integer :: i, ig, ri, iend, i1
      real(kind=DP) :: fac, occupation, c1
      real(kind=DP), allocatable :: qxyz(:,:), psi_l(:,:,:,:)
      real(kind=DP), allocatable :: afft(:), bfft(:), cfft(:)
      
      allocate(afft(nfft)); afft =0.0d0; 
      allocate(bfft(nfft)); bfft= 0.0d0
      allocate(cfft(nfft)); cfft= 0.0d0
      allocate( qxyz(kg1,3 ) ); qxyz = 0.0d0

      call m_FFT_alloc_WF_work()
    
      fac = 2.d0/(univol*kv3*product(fft_box_size_WF(1:3,1)))
      
      Do ispin=1, nspin
         afft = 0.0d0

         Do ik=1, kv3, nspin
            if ( map_k(ik) /= myrank_k ) cycle
            
            call k_plus_G_vectors_m( ik, kgp, kg1, kv3, iba, nbase, ngabc, rltv, &
                 &                   qxyz(:,1), qxyz(:,2), qxyz(:,3) )
            
            allocate( psi_l( kg1, np_e, ik:ik, kimg ) ); psi_l = 0.0d0
            
            Do ib=1, np_e
               occupation = occup_l( map_z(ib),ik )
               if ( occupation < DELTA ) cycle
               
!!!!BRANCH_P 3D_Parallel
!!!               call m_ES_WF_in_Rspace_3D1( ista_k, iend_k, ik, ib, zaj_l, cfft )
!!!!BRANCH_P_END 3D_Parallel
               
               Do ig=1, iba(ik)
                  c1 = qxyz(ig,1)**2 + qxyz(ig,2)**2 + qxyz(ig,3)**2
                  psi_l(ig,ib,ik,2) = -zaj_l(ig,ib,ik,1) *c1
               End Do
!!!BRANCH_P 3D_Parallel
!!               call m_ES_WF_in_Rspace_3D1( ik, ik, ik, ib, psi_l, bfft )
!!!BRANCH_P_END 3D_Parallel
               
               Do i = 1, nfft-1, 2
                  c1 = bfft(i)*cfft(i) +bfft(i+1)*cfft(i+1) 
                  afft(i) = afft(i) + occupation *c1
               End do
               
            End do
            deallocate( psi_l )
         End do
         !
         if ( npes > 1 ) then
            call mpi_allreduce( afft, bfft, nfft, mpi_double_precision, &
                 &              mpi_sum, mpi_comm_group, ierr )
            afft = bfft     
         end if
         call m_FFT_WF(ELECTRON,nfout,afft,DIRECT,OFF)
         !
         do ri = 1, kimg
            iend = iend_kngp
            if( iend_kngp > kg ) iend = kg
            if( ista_kngp <= iend ) then
               do i = ista_kngp, iend  !for mpi
                  i1 = kimg*igf(i) + (ri - kimg)
                  ekina_l(i,ri,ispin) = afft(i1)*fac
               end do
            endif
         end do
         
      End Do
      
      ekina_l = ekina_l /2.0d0
 
      call symmetrize_ekin( NO, ekina_l )

      deallocate( afft ); deallocate(bfft ); deallocate( cfft ); deallocate( qxyz )
      call m_FFT_dealloc_WF_work()

    end subroutine calc_local_part

  end subroutine m_KE_calc_asymm_ekin_density

  subroutine m_KE_calc_symm_ekin_density
    ekins_l = 0.0d0
    call calc_local_part

  contains

    subroutine calc_local_part
      integer :: ispin, ik, ib, ixyz, ierr
      integer :: i, ig, ri, iend, i1
      real(kind=DP) :: fac, occupation
      real(kind=DP), allocatable :: qxyz(:,:), psi_l(:,:,:,:)
      real(kind=DP), allocatable :: afft(:), bfft(:)
      
      allocate(afft(nfft)); afft = 0.0d0
      allocate(bfft(nfft)); bfft= 0.0d0
      allocate( qxyz(kg1,3 ) ); qxyz = 0.0d0
    
      call m_FFT_alloc_WF_work()
    
      fac = 2.d0/(univol*kv3*product(fft_box_size_WF(1:3,1)))

      Do ispin=1, nspin
         afft = 0.0d0
       
         Do ik=1, kv3, nspin
            if ( map_k(ik) /= myrank_k ) cycle
            
            call k_plus_G_vectors_m( ik, kgp, kg1, kv3, iba, nbase, ngabc, rltv, &
                 &                   qxyz(:,1), qxyz(:,2), qxyz(:,3) )
          
            allocate( psi_l( kg1, np_e, ik:ik, kimg ) ); psi_l = 0.0d0

            Do ib=1, np_e
               occupation = occup_l( map_z(ib),ik )
               if ( occupation < DELTA ) cycle
             
               Do ixyz=1, 3
                  psi_l = 0.0d0
                  if ( kimg == 1 ) then
                     stop "Not supported"
                  else
                     Do ig=1, iba(ik)
                        psi_l(ig,ib,ik,2) =  zaj_l(ig,ib,ik,1) *qxyz(ig,ixyz)
                        psi_l(ig,ib,ik,1) = -zaj_l(ig,ib,ik,2) *qxyz(ig,ixyz)
                     End do
                  endif
                
!!!BRANCH_P 3D_Parallel
!!                  call m_ES_WF_in_Rspace_3D1( ik, ik, ik, ib, psi_l, bfft )
!!!BRANCH_P_END 3D_Parallel
                
                  Do i = 1, nfft-1, 2
                     afft(i) = afft(i) + occupation*(bfft(i)**2+bfft(i+1)**2) ! MPI
                  End do
                  
               End Do
            End do
            deallocate( psi_l )
         End do
       !
         if ( npes > 1 ) then
            call mpi_allreduce( afft, bfft, nfft, mpi_double_precision, &
                 &              mpi_sum, mpi_comm_group, ierr )
            afft = bfft   
         end if
         call m_FFT_WF(ELECTRON,nfout,afft,DIRECT,OFF)
         !
         do ri = 1, kimg
            iend = iend_kngp
            if( iend_kngp > kg ) iend = kg
            if( ista_kngp <= iend ) then
               do i = ista_kngp, iend  !for mpi
                  i1 = kimg*igf(i) + (ri - kimg)
                  ekins_l(i,ri,ispin) = afft(i1)*fac
               end do
            endif
         end do

      End Do

      ekins_l = ekins_l /2.0d0

      call symmetrize_ekin( NO, ekins_l )

      deallocate( afft ); deallocate(bfft ); deallocate( qxyz )
      call m_FFT_dealloc_WF_work()

    end subroutine calc_local_part

  end subroutine m_KE_calc_symm_ekin_density

  subroutine k_plus_G_vectors_m(ik,kgp,kg1,knv3,iba,nbase,ngabc,rltv, &
       &                        qx,qy,qz)
    integer, intent(in)        :: ik, kgp,kg1,knv3,iba(knv3),nbase(kg1,knv3)
    integer, intent(in)        :: ngabc(kgp,3)
    real(kind=DP), intent(in)  :: rltv(3,3)
    real(kind=DP), intent(out) :: qx(kg1),qy(kg1),qz(kg1)

    integer :: i, ip
    real(kind=DP) :: ga, gb, gc

    do i = 1, iba(ik)
       ip = nbase(i,ik)
       ga = vkxyz(ik,1,BUCS) + real(ngabc(ip,1),kind=DP)
       gb = vkxyz(ik,2,BUCS) + real(ngabc(ip,2),kind=DP)
       gc = vkxyz(ik,3,BUCS) + real(ngabc(ip,3),kind=DP)
       qx(i)  = rltv(1,1)*ga + rltv(1,2)*gb + rltv(1,3)*gc
       qy(i)  = rltv(2,1)*ga + rltv(2,2)*gb + rltv(2,3)*gc
       qz(i)  = rltv(3,1)*ga + rltv(3,2)*gb + rltv(3,3)*gc
    end do
  end subroutine k_plus_G_vectors_m

  subroutine symmetrize_ekin(mode,ekin)
    integer,intent(in)           :: mode 
    real(kind=DP), intent(inout) :: ekin(ista_kngp:iend_kngp,kimg,nspin)

    integer ::       ispin, ng, no, ngp, no1, no2
    real(kind=DP) :: fi, tx,ty,tz, fp, fc, fs, zcr, zci
    real(kind=DP), allocatable :: work(:,:), work2(:,:)

    allocate(work(kgp,kimg)); work = 0.d0
    allocate(work2(ista_kngp:iend_kngp,kimg)); work2 = 0.d0

    if(mode == ANTIFERRO) then
       fi = 1.d0/af
       no1 = nopr + 1; no2 = nopr + af
    else
       fi = 1.d0/nopr
       no1 = 1; no2 = nopr
    end if

    do ispin = 1, nspin, af+1
       work = 0.0d0
       call cp_ekin_to_work(ispin,ekin)     ! ekin -> work
       work2 = 0.d0                          ! initialization

       do no = no1, no2
          tx = tau(1,no,BUCS)*PAI2
          ty = tau(2,no,BUCS)*PAI2
          tz = tau(3,no,BUCS)*PAI2
          if(kimg == 1) then
             do ng = ista_kngp, iend_kngp !for mpi
                ngp = ngpt_l(ng,no)
                fp = ngabc(ngp,1)*tx + ngabc(ngp,2)*ty + ngabc(ngp,3)*tz
                work2(ng,1)        = work2(ng,1) + dcos(fp)*work(ngp,1)
             end do
          else if(kimg == 2) then
             do ng = ista_kngp, iend_kngp !for mpi
                ngp= ngpt_l(ng,no)
                fp = ngabc(ngp,1)*tx + ngabc(ngp,2)*ty + ngabc(ngp,3)*tz
                fc = dcos(fp);     fs = dsin(fp)
                zcr= work(ngp,1);  zci= work(ngp,kimg)
                work2(ng,1)        = work2(ng,1) + fc*zcr - fs*zci
                work2(ng,2)        = work2(ng,2) + fc*zci + fs*zcr
             end do
          end if
       end do
       if(mode /= ANTIFERRO) ekin(:,:,ispin) = work2(:,:)*fi
    end do

    if(mode == ANTIFERRO) ekin(:,:,nspin) = work2(:,:)*fi

    deallocate( work )
    deallocate(work2)

  contains

    subroutine cp_ekin_to_work(ispin,ekin)
      integer, intent(in) :: ispin
      real(DP),intent(in),dimension(ista_kngp:iend_kngp,kimg,nspin) :: ekin
      
      integer :: ng,ri, ierr
      real(kind=DP), allocatable, dimension(:,:) :: work_mpi
      
      do ri = 1, kimg
         do ng = ista_kngp, iend_kngp  !for mpi
            work(ng,ri) = ekin(ng,ri,ispin)
         end do
      end do
      
      if(npes >= 2) then
         allocate(work_mpi(kgp,kimg)); work_mpi = 0.d0
         call mpi_allreduce( work, work_mpi, kgp*kimg, mpi_double_precision, &
              &              mpi_sum, mpi_comm_group, ierr )
         work = work_mpi
         deallocate(work_mpi)
      end if
      
    end subroutine cp_ekin_to_work
    
  end subroutine symmetrize_ekin

#if 0
    subroutine calc_TF_vWeiz_ekin_density( rho, grad_rho, grad2_rho, &
         &                                 weight_TF, weight_vWeiz, ekin_dens )
      real(kind=DP), intent(in) :: weight_TF,  weight_vWeiz
      real(kind=DP), intent(in) :: rho( ista_fftph:iend_fftph, nspin )
      real(kind=DP), intent(in) :: grad_rho( ista_fftph:iend_fftph, nspin )
      real(kind=DP), intent(in) :: grad2_rho( ista_fftph:iend_fftph, nspin )
      real(kind=DP), intent(out) :: ekin_dens( ista_fftph:iend_fftph, nspin )

      integer :: is, i
      real(kind=DP) :: c1, coeff, term1, term2, term3
      real(kind=DP), parameter :: delta1 = 1.0E-12

      coeff = 3.0d0/10.0d0 *(3.0D0*PAI**2)**( 2.0d0/3.0d0 )

      ekin_dens = 0.0d0

      Do is=1, nspin
         Do i=ista_fftph, iend_fftph
            c1 = rho(i,is)
            term1 = coeff *c1**(5.0d0/3.0d0)
            if ( c1 > delta1 ) then
               term2 = grad_rho(i,is)**2 /c1 /8.0d0
            else
               term2 = 0.0d0
            endif
            ekin_dens(i,is) = weight_TF *term1 + weight_vWeiz *term2
         End do
      End Do
    end subroutine calc_TF_vWeiz_ekin_density

    subroutine calc_Abramov_ekin_density( rho, grad_rho, grad2_rho, ekin_dens )
      real(kind=DP), intent(in) :: rho( ista_fftph:iend_fftph, nspin )
      real(kind=DP), intent(in) :: grad_rho( ista_fftph:iend_fftph, nspin )
      real(kind=DP), intent(in) :: grad2_rho( ista_fftph:iend_fftph, nspin )
      real(kind=DP), intent(out) :: ekin_dens( ista_fftph:iend_fftph, nspin )

      integer :: is, i
      real(kind=DP) :: c1, c2, coeff, term1, term2, term3
      real(kind=DP), parameter :: delta1 = 1.0E-15

      coeff = 3.0d0/10.0d0 *(3.0D0*PAI**2)**( 2.0d0/3.0d0 )

      ekin_dens = 0.0d0

      Do is=1, nspin
         Do i=ista_fftph, iend_fftph
            c1 = rho(i,is) *nspin
            term1 = coeff *c1**(5.0d0/3.0d0) /nspin

            if ( c1 > delta1 ) then
               c2 = grad_rho(i,is) *nspin
               term2 = c2**2 /c1 /8.0 /nspin

               term2 = term2 /9.0d0
            else
               term2 = 0.0d0
            endif

            term3 = grad2_rho(i,is) /6.0d0
            term3 = term3 *1.05

            ekin_dens(i,is) = term1 + term2 + term3
         End do
      End Do
    end subroutine calc_Abramov_ekin_density
#endif

end module m_KineticEnergy_Density
