!=======================================================================
!
!  PROGRAM  PHASE/0 2019.01 ($Rev: 581 $)
!
!  MODULE: m_FiniteElectricField
!
!  AUTHOR(S): T. Yamamoto   Oct/01/2007
!
!  Contact address :  IIS,The University of Tokyo RSS21 project
!
!
!
!=======================================================================
!
!     The original version of this set of the computer programs "PHASE"
!  was developed by the members of the Theory Group of Joint Research
!  Center for Atom Technology (JRCAT), based in Tsukuba, in the period
!  1993-2001.
!
!     Since 2002, this set has been tuned and new functions have been
!  added to it as a part of the national project "Frontier Simulation
!  Software for Industrial Science (FSIS)",  which is supported by
!  the IT program of the Ministry of Education, Culture, Sports,
!  Science and Technology (MEXT) of Japan.
!     Since 2006, this program set has been developed as a part of the
!  national project "Revolutionary Simulation Software (RSS21)", which
!  is supported by the next-generation IT program of MEXT of Japan.
!   Since 2013, this program set has been further developed centering on PHASE System
!  Consortium.
!   The activity of development of this program set has been supervised by Takahisa Ohno.
!
!
module m_FiniteElectricField
! $ID: $
  use m_Const_Parameters,     only : DP, PAI, PAI2, PAI4, ON, OFF, CARTS, BUCS &
                                 & , MONKHORST_PACK, PARA, EXECUT, SKIP
  use m_Control_Parameters,   only : iprifef, nspin, printable, elec_field &
                                 & , kimg, way_ksample, sw_check_polar
  use m_Files,                only : nfout
  use m_Crystal_Structure,    only : altv, rltv, univol, nopr, op, imag
  use m_PlaneWaveBasisSet,    only : kg1, ngabc, nbase, iba, kgp
  use m_Electronic_Structure, only : totch, zaj_l, m_ES_add_it_to_vnlph, nrvf_ordr, neordr &
                                 & , fsr_l,fsi_l
  use m_Kpoints,              only : kv3, k_symmetry, vkxyz, mp_index, kshift
  use m_Ionic_System,         only : ntyp,natm,natm2,ityp,cps,iwei,pos
  use m_PseudoPotential,      only : ival,modnrm,dk_fef,qitg_fef &
                                 & , m_PP_include_vanderbilt_pot &
                                 & , n_non0_lmtxlmt,index_lmt1_lmt2,lmta,nlmt,ilmt &
                                 & , ltp,taup,il2p,isph,iqitg,dl2p &
                                 & , m_PP_find_maximum_l
  use m_NonLocal_Potential,   only : snl
  use m_Parallelization,      only : npes,mype,map_ek,map_z
  implicit none
 
  integer :: ns ! the number of valence states
  integer :: msize ! maxval(mp_index)
  integer :: numef ! the number of conponents of the electric filed
  integer, allocatable :: kgbpp(:,:,:,:) !d(0:n1-1,0:n2-1,0:n3-1,numef)
  integer, allocatable :: kgbpm(:,:,:,:) !d(0:n1-1,0:n2-1,0:n3-1,numef)
  integer, allocatable :: indgbp(:,:,:,:,:,:) !d(kg1,2,0:n1-1,0:n2-1,0:n3-1,numef)
  integer, allocatable :: indgbm(:,:,:,:,:,:) !d(kg1,2,0:n1-1,0:n2-1,0:n3-1,numef)
  integer :: elec_id(3)
  real(kind=DP) :: efa(3)
  complex(kind=DP), allocatable :: smat(:,:) !d(ns,ns)
  real(kind=DP), allocatable :: berry(:,:,:) !d(0:msize-1,0:msize-1,numef)
  complex(kind=DP), allocatable :: berry_det(:,:,:,:) !d(0:n1-1,0:n2-1,0:n3-1,numef)

  integer :: nopr_trs
  integer, allocatable :: op_trs(:,:,:) !d(3,3,nopr_trs)

  integer, allocatable :: map_nnn2ik(:,:,:) !d(0:n1-1,0:n2-1,0:n3-1)
  integer, allocatable :: map_nnn2op(:,:,:) !d(0:n1-1,0:n2-1,0:n3-1)
  integer, allocatable :: map_nnn2gs(:,:,:,:) !d(3,0:n1-1,0:n2-1,0:n3-1)
  integer, allocatable :: map_ik2nnn(:,:,:) !d(3,nopr_trs,kv3) = {[n1,n2,n3]}
  logical, allocatable :: flag_op(:,:) !d(nopr_trs,kv3)

  integer :: ngmax
  integer, allocatable :: ngpt(:,:) !d(ngmax,nopr_trs)

  real(kind=DP), allocatable :: grad_fef(:,:,:,:) !d(kg1,ns,kv3,kimg)

  complex(kind=DP), private, allocatable :: ftqe(:,:,:,:) !d(nlmt,nlmt,natm,numef)

contains

  function modp(k,m) result(ks)
    integer :: ks
    integer, intent(in) :: k,m
    if(k<0) then
       ks = k + m*(1-int(dble(k)/m-1.d-8))
    else
       ks = k - m*int(dble(k)/m+1.d-8)
    end if
  end function modp

  subroutine m_FEF_init(nfout)
    integer, intent(in) :: nfout
    integer :: ig,ik
    integer :: jj1,jj2,i1,i2,i3,j,jp,ik1,iopr1,ik2,iopr2
    integer :: ipm,ii(3)
    integer :: iopr,jg,ng(3)
    integer :: irot(3,3)
    integer :: m1,m2,m3
    integer :: igs(3)

    call check_elec_field

    call check_kpoints
    
    write(nfout,*) 'efa=',efa(1:3)
    write(nfout,*) 'elec_id=',elec_id

    call alloc_ngpt !-> ngmax

    do iopr=1,nopr_trs
       irot(:,:) = nint(matmul(transpose(altv),matmul(op_trs(:,:,iopr),rltv))/PAI2)
       write(nfout,'("irot=",9(1x,i3))') irot(1:3,1:3)
       IG_LOOP: do ig=1,ngmax
          ng(1:3) = matmul(irot,ngabc(ig,1:3))
          do jg=1,kgp
             if(ngabc(jg,1)==ng(1).and.ngabc(jg,2)==ng(2).and.ngabc(jg,3)==ng(3)) then
                ngpt(ig,iopr) = jg
                cycle IG_LOOP
             end if
          end do
          write(nfout,*) 'Error: NOT found the rotated G'
          stop 'Error: NOT found the rotated G'
       end do IG_LOOP
    end do

    call alloc_indgb

    m1 = mp_index(1)-1
    m2 = mp_index(2)-1
    m3 = mp_index(3)-1
    do ig=1,3
       if(elec_id(ig)==0) cycle
       do i1=0,mp_index(1)-1
          do i2=0,mp_index(2)-1
             do i3=0,mp_index(3)-1
                write(nfout,*) 'i1,i2,i3,ig=',i1,i2,i3,ig
                ik1 = map_nnn2ik(i1,i2,i3)
                iopr1 = map_nnn2op(i1,i2,i3)
                ii(1) = i1
                ii(2) = i2
                ii(3) = i3
                jp = ii(ig)+1
                ipm = 0
                if(jp>=mp_index(ig)) then
                   ipm = +1
                   jp = 0
                end if
                ii(ig) = jp
                ik2 = map_nnn2ik(ii(1),ii(2),ii(3))
                iopr2 = map_nnn2op(ii(1),ii(2),ii(3))
                igs(1:3) = map_nnn2gs(1:3,ii(1),ii(2),ii(3)) - map_nnn2gs(1:3,i1,i2,i3)
                igs(ig) = igs(ig) + ipm
                write(nfout,*) 'igs=',igs
                call get_ind_of_g_plus_b(igs,i1,i2,i3,ig,elec_id(ig),ik1,iopr1,ik2,iopr2 &
                                      & ,m1,m2,m3,indgbp,kgbpp)

                ii(1) = i1
                ii(2) = i2
                ii(3) = i3
                jp = ii(ig)-1
                ipm = 0
                if(jp<0) then
                   ipm = -1
                   jp = mp_index(ig)-1
                end if
                ii(ig) = jp
                ik2 = map_nnn2ik(ii(1),ii(2),ii(3))
                iopr2 = map_nnn2op(ii(1),ii(2),ii(3))
                igs(1:3) = map_nnn2gs(1:3,ii(1),ii(2),ii(3)) - map_nnn2gs(1:3,i1,i2,i3)
                igs(ig) = igs(ig) + ipm
                write(nfout,*) 'igs=',igs
                call get_ind_of_g_plus_b(igs,i1,i2,i3,ig,elec_id(ig),ik1,iopr1,ik2,iopr2 &
                                      & ,m1,m2,m3,indgbm,kgbpm)
             end do
          end do
       end do
    end do
    call dealloc_ngpt

  end subroutine m_FEF_init

  subroutine check_elec_field
    integer :: ig
    real(kind=8), parameter :: eps = 1.d-12
 
    elec_id(:) = 0
    numef = 0
    do ig=1,3
       efa(ig) =  dot_product(elec_field(:),altv(:,ig))
       if(abs(efa(ig))>eps) then
          numef = numef + 1
          elec_id(ig) = numef
       end if
    end do
  end subroutine check_elec_field

  subroutine check_kpoints
    integer :: i,j,jj,ik,jk
    integer :: i1,i2,i3,jj1,jj2
    integer :: iopr
    integer :: n1,n2,n3
    integer :: kn(3),kns(3)
    real(kind=8) :: rotk(3),dn
    logical :: flag_trs

    if(way_ksample /= MONKHORST_PACK) then
       write(nfout,*) 'set way_ksample = MONKHORST_PACK.'
       stop 'way_ksample /= MONKHORST_PACK.'
    end if
    do i=1,3
       if(abs(kshift(i))>1.d-12) then
          write(nfout,*) 'set kshift = 0.'
          stop 'kshit > 0'
       end if
    end do

    if(imag==PARA) then
       flag_trs = .true.
       do i=1,nopr
          if(abs(op(1,1,i)+1)<1.d-12.and. &
           & abs(op(2,2,i)+1)<1.d-12.and. &
           & abs(op(3,3,i)+1)<1.d-12) then
             flag_trs = .false.
             exit
          end if
       end do
    else
       flag_trs = .false.
    end if
    if(flag_trs) then
       nopr_trs = 2*nopr
       allocate(op_trs(3,3,nopr_trs))
       op_trs(1:3,1:3,1:nopr) = op(1:3,1:3,1:nopr)
       op_trs(1:3,1:3,nopr+1:nopr_trs) = -op(1:3,1:3,1:nopr)
    else
       nopr_trs = nopr
       allocate(op_trs(3,3,nopr_trs))
       op_trs = op
    end if
    write(nfout,*) 'nopr=',nopr
    write(nfout,*) 'nopr_trs=',nopr_trs

    msize = maxval(mp_index)
    n1 = mp_index(1)
    n2 = mp_index(2)
    n3 = mp_index(3)
    allocate(map_nnn2ik(0:n1-1,0:n2-1,0:n3-1)); map_nnn2ik=0
    allocate(map_nnn2op(0:n1-1,0:n2-1,0:n3-1)); map_nnn2op=0
    allocate(map_nnn2gs(3,0:n1-1,0:n2-1,0:n3-1)); map_nnn2gs=0
    allocate(map_ik2nnn(3,nopr_trs,kv3)); map_ik2nnn=-1
    allocate(flag_op(nopr_trs,kv3)); flag_op=.false.

    write(nfout,'("mp_index=",3i6)') mp_index(1:3)
    write(nfout,'("i,mp_index=",2i6)') i,mp_index(i)
    do ik=1,kv3
       LOOP1: do iopr=1,nopr_trs
          rotk = matmul(transpose(altv),matmul(op_trs(:,:,iopr),vkxyz(ik,:,CARTS)))/PAI2
          write(nfout,'("rotk=",3f20.10)') rotk
          do j=1,3
             dn = mp_index(j)*rotk(j)
             kn(j) = nint(dn)
             if(abs(dn-kn(j)) > 1.d-8) cycle LOOP1
             if(kn(j)<0) then
                kns(j) = kn(j)+mp_index(j)*(1-int(dble(kn(j))/mp_index(j)-1.d-8))
             else
                kns(j) = kn(j)-mp_index(j)*int(dble(kn(j))/mp_index(j)+1.d-8)
             end if
          end do
          write(nfout,'("kn=",3(1x,i4)," kns=",3(1x,i4)," iopr=",i4)') kn(1:3),kns(1:3),iopr
          if(map_nnn2ik(kns(1),kns(2),kns(3))>0) cycle LOOP1
          map_nnn2ik(kns(1),kns(2),kns(3)) = ik
          map_nnn2op(kns(1),kns(2),kns(3)) = iopr
          map_nnn2gs(1:3,kns(1),kns(2),kns(3)) = (kns(1:3) - kn(1:3))/mp_index(1:3)
          map_ik2nnn(1:3,iopr,ik) = kns(1:3)
          flag_op(iopr,ik) = .true.
       end do LOOP1
    end do

    ! Write map func.
    do ik=1,kv3
       do iopr=1,nopr_trs
          if(flag_op(iopr,ik)) then
             write(nfout,'("ik=",i3," iopr=",i3," n1=",i3," n2=",i3," n3=",i3)') ik,iopr,map_ik2nnn(1:3,iopr,ik)
          end if
       end do
    end do
    do i1=0,mp_index(1)-1
       do i2=0,mp_index(2)-1
          do i3=0,mp_index(3)-1
             ik=map_nnn2ik(i1,i2,i3)
             iopr=map_nnn2op(i1,i2,i3)
             write(nfout,'(" n1=",i3," n2=",i3," n3=",i3,"ik=",i3," iopr=",i3," gs=",3(1x,i3))') &
            &  i1,i2,i3,ik,iopr,map_nnn2gs(1:3,i1,i2,i3)
          end do
       end do
    end do

  end subroutine check_kpoints

  subroutine alloc_ngpt
    integer :: ik
    ngmax = nbase(iba(1),1)
    do ik=2,kv3
       ngmax = max(ngmax,nbase(iba(ik),ik))
    end do
    write(nfout,*) 'ngmax=',ngmax
    write(nfout,*) 'nopr_trs=',nopr_trs
    allocate(ngpt(ngmax,nopr_trs))
  end subroutine alloc_ngpt

  subroutine dealloc_ngpt
    deallocate(ngpt)
  end subroutine dealloc_ngpt

  subroutine alloc_indgb
    integer :: m1,m2,m3
    m1 = mp_index(1)-1
    m2 = mp_index(2)-1
    m3 = mp_index(3)-1
    allocate(indgbp(kg1,2,0:m1,0:m2,0:m3,numef)); indgbp = 0
    allocate(indgbm(kg1,2,0:m1,0:m2,0:m3,numef)); indgbm = 0
    allocate(kgbpp(0:m1,0:m2,0:m3,numef))
    allocate(kgbpm(0:m1,0:m2,0:m3,numef))
    allocate(berry_det(0:m1,0:m2,0:m3,numef))
    allocate(berry(0:msize-1,0:msize-1,numef))
  end subroutine alloc_indgb

  subroutine alloc_smat
    logical, save :: first = .true.
    if(first) then
       ns = int(totch + 1.d-13)/2
       !!$write(nfout,*) 'debug FEF: ns=',ns
       write(nfout,*) 'm_FEF detected the number of valence bands; ns=',ns
       first = .false.
       call alloc_grad
    end if
    allocate(smat(ns,ns))
    grad_fef = 0.d0
  end subroutine alloc_smat

  subroutine dealloc_smat
    deallocate(smat)
  end subroutine dealloc_smat

  subroutine get_ind_of_g_plus_b(igs,n1,n2,n3,ig,id,ik1,iopr1,ik2,iopr2,m1,m2,m3,indgb,kgbp)
    implicit none
    integer, intent(in) :: igs(3),n1,n2,n3,ig,id,ik1,iopr1,ik2,iopr2,m1,m2,m3
    integer, intent(inout) :: indgb(kg1,2,0:m1,0:m2,0:m3,numef)
    integer, intent(inout) :: kgbp(0:m1,0:m2,0:m3,numef)

    ! local variables
    integer :: jj,i,j,k,l
    integer :: iga,igb,igc,jga,jgb,jgc

    jj = 0
    loop_i: do i=1,iba(ik1)
       k=ngpt(nbase(i,ik1),iopr1)
       iga = ngabc(k,1)+igs(1)
       igb = ngabc(k,2)+igs(2)
       igc = ngabc(k,3)+igs(3)
       loop_j: do j=1,iba(ik2)
          l=ngpt(nbase(j,ik2),iopr2)
          jga = ngabc(l,1)
          jgb = ngabc(l,2)
          jgc = ngabc(l,3)
          if(iga == jga .and. igb == jgb .and. igc == jgc) then
             jj = jj +1
             indgb(jj,2,n1,n2,n3,id) = i
             indgb(jj,1,n1,n2,n3,id) = j
             exit loop_j
          end if
       end do loop_j
    end do loop_i
    kgbp(n1,n2,n3,id) = jj
    write(nfout,*) 'ind_of_g_plus_b:ik1,ik2,id,kgbp,kg1=',ik1,ik2,id,jj,kg1
  end subroutine get_ind_of_g_plus_b

  subroutine calc_overlap(n1,n2,n3,ik0,iopr0,gs0,ik1,iopr1,gs1,ig,id)
    integer, intent(in) :: n1,n2,n3,ik0,iopr0,ik1,iopr1,ig,id
    integer, intent(in) :: gs0(3),gs1(3)
    integer :: n,m,i,ii,jj,mm,nn
    integer :: ipm0,ipm1
    integer :: ip,it,ia,lmt1,lmt2,u,v,mdvdb
    real(kind=8) :: fnr,fni,fmr,fmi,sr,si,ph,dk(3),vk0(3),vk1(3)
    complex(kind=8) :: defchg,fac,fs1u,fs1v,fs2u,fs2v
    complex(kind=8) :: expkt(natm),csum

    if(iopr0>nopr) then
       ipm0=-1
    else
       ipm0=1
    end if
    if(iopr1>nopr) then
       ipm1=-1
    else
       ipm1=1
    end if

    do m=1,ns
       mm = neordr(m,ik1)
       do n=1,ns
          nn = neordr(n,ik0)
          sr = 0.d0
          si = 0.d0
          do i=1,kgbpp(n1,n2,n3,id)
             ii=indgbp(i,2,n1,n2,n3,id)
             jj=indgbp(i,1,n1,n2,n3,id)
             fnr = zaj_l(ii,nn,ik0,1)
             fni = zaj_l(ii,nn,ik0,2) * ipm0
             fmr = zaj_l(jj,mm,ik1,1)
             fmi = zaj_l(jj,mm,ik1,2) * ipm1
             sr = sr + fnr*fmr + fni*fmi
             si = si + fnr*fmi - fni*fmr
          end do
          !!d$write(nfout,*) 'n,m,sr,si=',n,m,sr,si
          smat(n,m) = dcmplx(sr,si)
       end do
    end do

    if(modnrm /= EXECUT) return

    do ia=1,natm
       ph = PAI2*dot_product(gs0-gs1,pos(ia,1:3))
       if(iopr0>1.and.iopr0<=nopr) then
          vk0 = vkxyz(ik0,1:3,CARTS) 
          dk = matmul(op_trs(:,:,iopr0),vk0) - vk0 
          ph = ph + dot_product(dk,cps(ia,1:3))
       else if(iopr0>nopr+1) then
          vk0 = vkxyz(ik0,1:3,CARTS) 
          dk = vk0 + matmul(op_trs(:,:,iopr0),vk0)
          ph = ph + dot_product(dk,cps(ia,1:3))
       end if
       if(iopr1>1.and.iopr1<=nopr) then
          vk1 = vkxyz(ik1,1:3,CARTS) 
          dk = vk1 - matmul(op_trs(:,:,iopr1),vk1)
          ph = ph + dot_product(dk,cps(ia,1:3))
       else if(iopr1>nopr+1) then
          vk1 = vkxyz(ik1,1:3,CARTS) 
          dk = -(vk1 + matmul(op_trs(:,:,iopr1),vk1))
          ph = ph + dot_product(dk,cps(ia,1:3))
       end if
       expkt(ia) = dcmplx(cos(ph),sin(ph))
    end do

    do m=1,ns
       mm = neordr(m,ik1)
       do n=1,ns
          nn = neordr(n,ik0)
          defchg = dcmplx(0.d0,0.d0)
          do ia=1,natm
             it=ityp(ia)
             mdvdb = m_PP_include_vanderbilt_pot(it)
             if(mdvdb == SKIP) cycle
             csum = dcmplx(0.d0,0.d0)
             do lmt1=1,ilmt(it)
                do lmt2=1,ilmt(it)
                   u = lmta(lmt1,ia)
                   v = lmta(lmt2,ia)
                   fac   = ftqe(lmt1,lmt2,it,id)*iwei(ia)
                   fs1u  = dcmplx(fsr_l(nn,u,ik0),-ipm0*fsi_l(nn,u,ik0))
                   fs2v  = dcmplx(fsr_l(mm,v,ik1),ipm1*fsi_l(mm,v,ik1))
                   csum = csum + fac*fs1u*fs2v
                end do
             end do
             defchg = defchg +  csum * expkt(ia) * iwei(ia)
          end do
          smat(n,m) = smat(n,m) + defchg
       end do
    end do
  end subroutine calc_overlap

  subroutine calc_berry_phase(n1,n2,n3,id)
    integer, intent(in) :: n1,n2,n3,id

    integer :: i, info, ipiv(ns), lwork, ip
    real(kind=8) :: ph
    complex(kind=DP) :: p
    complex(kind=DP), allocatable :: work(:)

    ! LU-facterize the overlap matrix
    call zgetf2(ns,ns,smat,ns,ipiv,info)
    if(info/=0) then
       write(nfout,*) 'Error(ZGETF2): INFO=',info
       stop 'Error(ZGETF2)'
    end if

    ! Get the determinant of the overlap matrix
    p = dcmplx(1.d0,0.d0)
    do i=1,ns
       p = p * smat(i,i)
    end do
    berry_det(n1,n2,n3,id) = p
!debug
!!    write(nfout,*) 'n1,n2,n3,id,berry_det=',n1,n2,n3,id,p
!end debug

    ! Inverte the overlap matrix
    lwork = -1
    allocate(work(ns))
    call zgetri(ns,smat,ns,ipiv,work,lwork,info)
    lwork = work(1) ! optimal size of the work array
    deallocate(work)
    allocate(work(lwork))
    call zgetri(ns,smat,ns,ipiv,work,lwork,info)
    if(info/=0) then
       write(nfout,*) 'Error(ZGETRI): INFO=',info
       stop 'Error(ZGETRI)'
    end if
    deallocate(work)
  end subroutine calc_berry_phase

  subroutine  calc_grad(n1,n2,n3,ik0,iopr0,gs0,m1,m2,m3,ik1,iopr1,gs1,ig,id)
    integer, intent(in) :: n1,n2,n3,ik0,iopr0,gs0(3)
    integer, intent(in) :: m1,m2,m3,ik1,iopr1,gs1(3)
    integer, intent(in) :: ig,id

    integer :: m, n, i, ii, jj, mm, nn
    integer :: ipm0,ipm1
    integer :: ia,it,nb,mdvdb
    integer :: lmt1,lmt2,v,l1
    real(kind=DP) :: fac, sr, si, fmr, fmi
    real(kind=DP) :: grad(kg1,kimg),gsmat(kg1,kimg)
    real(kind=DP) :: ph,ph0,ph1
    real(kind=DP) :: dk(3),vk0(3),vk1(3)
    complex(kind=DP) :: exp0,exp1,qf,ctmp,expgr(kg1)
    complex(kind=DP), parameter :: zi=(0.d0,1.d0)

    fac = efa(ig)/PAI4*mp_index(ig)

    if(iopr0>nopr) then
       ipm0=-1
    else
       ipm0=1
    end if
    if(iopr1>nopr) then
       ipm1=-1
    else
       ipm1=1
    end if

    if(iopr0==1) then
       do n=1,ns
          nn = neordr(n,ik0)
          grad = 0.d0
          do m=1,ns
             sr = dble(smat(m,n))
             si = dimag(smat(m,n))
             mm = neordr(m,ik1)
             if(modnrm /= EXECUT) then
                do i=1,kgbpp(n1,n2,n3,id)
                   ii=indgbp(i,2,n1,n2,n3,id)
                   jj=indgbp(i,1,n1,n2,n3,id)
                   fmr = zaj_l(jj,mm,ik1,1)
                   fmi = zaj_l(jj,mm,ik1,2) * ipm1
                   grad(ii,1) = grad(ii,1) - sr*fmi - si*fmr
                   grad(ii,2) = grad(ii,2) + sr*fmr - si*fmi
                end do
             else
                gsmat = dcmplx(0.d0,0.d0)
                do ia=1,natm
                   it=ityp(ia)
                   mdvdb = m_PP_include_vanderbilt_pot(it)
                   if(mdvdb == SKIP) cycle
                   ph1 = -PAI2*dot_product(gs1,pos(ia,1:3))
                   if(iopr1>1.and.iopr1<=nopr) then
                      vk1 = vkxyz(ik1,1:3,CARTS)
                      dk = vk1 - matmul(op_trs(:,:,iopr1),vk1)
                      ph1 = ph1 + dot_product(dk,cps(ia,1:3))
                   else if(iopr1>nopr+1) then
                      vk1 = vkxyz(ik1,1:3,CARTS)
                      dk = -(vk1 + matmul(op_trs(:,:,iopr1),vk1))
                      ph1 = ph1 + dot_product(dk,cps(ia,1:3))
                   end if
                   exp1 = exp(dcmplx(0.d0,ph1))
                   do i=1,iba(ik0)
                      nb = nbase(i,ik0)
                      ph = -PAI2*dot_product(ngabc(nb,1:3),pos(ia,1:3))
                      expgr(i) = dcmplx(cos(ph),sin(ph))
                   end do
                   do lmt1=1,ilmt(it)
                      l1 = ltp(lmt1,it)
                      qf = dcmplx(0.d0,0.d0)
                      do lmt2=1,ilmt(it)
                         v = lmta(lmt2,ia)
                         qf = qf + ftqe(lmt1,lmt2,it,id)*iwei(ia) &
                             & * dcmplx(fsr_l(mm,v,ik1),ipm1*fsi_l(mm,v,ik1))*exp1
                      end do
                      qf = qf * zi**(-l1)
                      do i=1,iba(ik0)
                         ctmp = expgr(i)*snl(i,lmt1,ik0)*qf
                         gsmat(i,1) = gsmat(i,1) + dble(ctmp)
                         gsmat(i,2) = gsmat(i,2) + dimag(ctmp)
                      end do
                   end do
                end do
                do i=1,kgbpp(n1,n2,n3,id)
                   ii=indgbp(i,2,n1,n2,n3,id)
                   jj=indgbp(i,1,n1,n2,n3,id)
                   fmr = zaj_l(jj,mm,ik1,1) + gsmat(ii,1)
                   fmi = zaj_l(jj,mm,ik1,2) * ipm1 + gsmat(ii,2)
                   grad(ii,1) = grad(ii,1) - sr*fmi - si*fmr
                   grad(ii,2) = grad(ii,2) + sr*fmr - si*fmi
                end do
             end if
          end do
          do i=1,iba(ik0)
             grad_fef(i,n,ik0,1) = grad_fef(i,n,ik0,1) + fac * grad(i,1)
             grad_fef(i,n,ik0,2) = grad_fef(i,n,ik0,2) + fac * grad(i,2)
          end do
       end do
    end if

    if(iopr1==1) then
       do n=1,ns
          nn = neordr(n,ik1)
          grad = 0.d0
          do m=1,ns
             sr =  dble(smat(n,m))
             si = -dimag(smat(n,m))
             mm = neordr(m,ik0)
             if(modnrm /= EXECUT) then
                do i=1,kgbpm(m1,m2,m3,id)
                   ii=indgbm(i,2,m1,m2,m3,id)
                   jj=indgbm(i,1,m1,m2,m3,id)
                   fmr = zaj_l(jj,mm,ik0,1)
                   fmi = zaj_l(jj,mm,ik0,2) * ipm0
                   grad(ii,1) = grad(ii,1) + sr*fmi + si*fmr
                   grad(ii,2) = grad(ii,2) - sr*fmr + si*fmi
                end do
             else
                gsmat = dcmplx(0.d0,0.d0)
                do ia=1,natm
                   it=ityp(ia)
                   mdvdb = m_PP_include_vanderbilt_pot(it)
                   if(mdvdb == SKIP) cycle
                   ph0 = -PAI2*dot_product(gs0,pos(ia,1:3))
                   if(iopr0>1.and.iopr0<=nopr) then
                      vk0 = vkxyz(ik0,1:3,CARTS)
                      dk = vk0 - matmul(op_trs(:,:,iopr0),vk0)
                      ph0 = ph0 + dot_product(dk,cps(ia,1:3))
                   else if(iopr0>nopr+1) then
                      vk0 = vkxyz(ik0,1:3,CARTS)
                      dk = -(vk0 + matmul(op_trs(:,:,iopr0),vk0))
                      ph0 = ph0 + dot_product(dk,cps(ia,1:3))
                   end if
                   exp0 = exp(dcmplx(0.d0,ph0))
                   do i=1,iba(ik1)
                      nb = nbase(i,ik1)
                      ph = -PAI2*dot_product(ngabc(nb,1:3),pos(ia,1:3))
                      expgr(i) = dcmplx(cos(ph),sin(ph))
                   end do
                   do lmt1=1,ilmt(it)
                      l1 = ltp(lmt1,it)
                      qf = dcmplx(0.d0,0.d0)
                      do lmt2=1,ilmt(it)
                         v = lmta(lmt2,ia)
                         qf = qf + dconjg(ftqe(lmt1,lmt2,it,id))*iwei(ia) &
                             & * dcmplx(fsr_l(mm,v,ik0),ipm0*fsi_l(mm,v,ik0))*exp0
                      end do
                      qf = qf * zi**(-l1)
                      do i=1,iba(ik1)
                         ctmp = expgr(i)*snl(i,lmt1,ik1)*qf
                         gsmat(i,1) = gsmat(i,1) + dble(ctmp)
                         gsmat(i,2) = gsmat(i,2) + dimag(ctmp)
                      end do
                   end do
                end do
                do i=1,kgbpm(m1,m2,m3,id)
                   ii=indgbm(i,2,m1,m2,m3,id)
                   jj=indgbm(i,1,m1,m2,m3,id)
                   fmr = zaj_l(jj,mm,ik0,1) + gsmat(ii,1)
                   fmi = zaj_l(jj,mm,ik0,2) * ipm0 + gsmat(ii,2)
                   grad(ii,1) = grad(ii,1) + sr*fmi + si*fmr
                   grad(ii,2) = grad(ii,2) - sr*fmr + si*fmi
                end do
             end if
          end do
          do i=1,iba(ik1)
             grad_fef(i,n,ik1,1) = grad_fef(i,n,ik1,1) + fac * grad(i,1)
             grad_fef(i,n,ik1,2) = grad_fef(i,n,ik1,2) + fac * grad(i,2)
          end do
       end do
    end if
  end subroutine  calc_grad

  subroutine m_FEF_polarization(nfout,eplr)
    integer, intent(in) :: nfout
    real(kind=DP), intent(out) :: eplr

    integer :: ig,i,ia,id,ii(3)
    integer :: jj1,jj2,m1,m2,i1,i2,j
    real(kind=DP) :: bp(3),pel(3),pion(3)
    real(kind=DP) :: epel,epion
    real(kind=DP) :: fpi,ph
    complex(kind=DP) :: p
    real(kind=8), parameter :: paid2 = PAI/2


    bp(1:3) = 0.d0
    epel = 0.d0
    do ig=1,3
       id = elec_id(ig)
       if(id==0) cycle
       jj1 = mod(ig+1-1,3)+1
       jj2 = mod(ig+2-1,3)+1
       m1 = mp_index(jj1)
       m2 = mp_index(jj2)
       do i1=0,m1-1
          do i2=0,m2-1
             p = dcmplx(1.d0,0.d0)
             do j=0,mp_index(ig)-1
                ii(jj1) = i1
                ii(jj2) = i2
                ii(ig) = j
                p = p * berry_det(ii(1),ii(2),ii(3),id)
!debug
!!    write(nfout,*) 'n1,n2,n3,ig,berry_det=',ii(1),ii(2),ii(3),ig,berry_det(ii(1),ii(2),ii(3),id)
!end debug
             end do
!debug
!!    write(nfout,*) 'p=',p
!end debug
             ph = dimag(log(p))
             if(ph>paid2) then
                ph = ph - PAI
             else if(ph<-paid2) then
                ph = ph + PAI
             end if
             berry(i1,i2,id) = ph
! debug
!!             write(nfout,*) 'i1,i2,ig,ph=',i1,i2,ig,ph
! end debug
          end do
       end do
       bp(ig) = sum(berry(0:m1-1,0:m2-1,id))/(m1*m2)
       epel = epel + efa(ig)*bp(ig)
    end do
    if(nspin==1) then
       fpi = PAI
    else
       fpi = PAI2
    end if
    epel = -epel/fpi
    pel = matmul(altv,bp)/(univol*fpi)

    do i=1,3
       pion(i) = 0.d0
       do ia=1,natm
          pion(i) = pion(i) + ival(ityp(ia)) * cps(ia,i)
       end do
       epion = -dot_product(elec_field,pion)
       pion(i) = pion(i)/univol
    end do

    if(sw_check_polar==ON) then
       eplr = 0.d0
    else
       eplr = epel + epion
    end if

    write(nfout,'("BP   =",3(1x,f20.9))') bp(1:3)
    write(nfout,'("Pel  =",3(1x,f20.9))') pel(1:3)
    write(nfout,'("Pion =",3(1x,f20.9))') pion(1:3)
    write(nfout,'("Pmac =",3(1x,f20.9))') pel(1:3)+pion(1:3)
  end subroutine m_FEF_polarization

  subroutine m_FEF_build_grad
    integer :: ig,id,ik,iopr
    integer :: n1,n2,n3,ip(3)
    integer :: ik0,ik1,iopr0,iopr1
    integer :: gs0(3),gs1(3)


    call alloc_smat
    do ig=1,3
       id = elec_id(ig)
       if(id==0) cycle
       do ik=1,kv3
          do iopr=1,nopr_trs
             if(.not.flag_op(iopr,ik)) cycle
             n1=map_ik2nnn(1,iopr,ik)
             n2=map_ik2nnn(2,iopr,ik)
             n3=map_ik2nnn(3,iopr,ik)
             ip(1) = n1
             ip(2) = n2
             ip(3) = n3
             ip(ig) = ip(ig) + 1
             gs1 = 0
             if(ip(ig)>=mp_index(ig)) then
                ip(ig)=0
                gs1(ig) = 1
             end if
             ik0 = map_nnn2ik(n1,n2,n3)
             ik1 = map_nnn2ik(ip(1),ip(2),ip(3))
             iopr0 = map_nnn2op(n1,n2,n3)
             iopr1 = map_nnn2op(ip(1),ip(2),ip(3))
             gs0 = map_nnn2gs(1:3,n1,n2,n3)
             gs1 = gs1 + map_nnn2gs(1:3,ip(1),ip(2),ip(3))
             call calc_overlap(n1,n2,n3,ik0,iopr0,gs0,ik1,iopr1,gs1,ig,id) !=> smat
             call calc_berry_phase(n1,n2,n3,id) !=> smat,berry_det
             if(sw_check_polar==OFF) &
              & call calc_grad(n1,n2,n3,ik0,iopr0,gs0,ip(1),ip(2),ip(3),ik1,iopr1,gs1,ig,id) !=> grad_fef
          end do
       end do
    end do
    call dealloc_smat
  end subroutine m_FEF_build_grad
  
  subroutine m_FEF_add_grad_to_vnlph(ik)
    integer, intent(in) :: ik

    integer :: ib,jb

    if(sw_check_polar==ON) return

    do ib=1,ns
       jb = neordr(ib,ik)
       if(map_ek(jb,ik) /= mype) cycle
       call m_ES_add_it_to_vnlph(ik,jb,grad_fef(:,ib,ik,:))
    end do
  end subroutine m_FEF_add_grad_to_vnlph

  subroutine m_FEF_add_grad_to_vnlph_RMM(ik,jb)
    integer, intent(in) :: ik,jb

    integer :: ib

    if(sw_check_polar==ON) return

    if(map_ek(jb,ik) == mype) then
       ib = nrvf_ordr(jb,ik)
       if(ib>ns) return
       call m_ES_add_it_to_vnlph(ik,jb,grad_fef(:,ib,ik,:))
    end if
  end subroutine m_FEF_add_grad_to_vnlph_RMM

  subroutine alloc_grad
    allocate(grad_fef(kg1,ns,kv3,kimg))
  end subroutine alloc_grad

  subroutine dealloc_grad
    deallocate(grad_fef)
  end subroutine dealloc_grad

  subroutine m_FEF_Constract_of_ftq()
    ! local varialbes
    integer :: it,ik,ip,lmt1,lmt2,u,v
    integer :: il1,il2,tau1,tau2,l3
    integer :: n,ilm3,iiqitg,ig,id
    integer :: mdvdb
    integer, allocatable :: il3(:)
    real(kind=DP) :: fac
    real(kind=DP) :: dk(3),ylm,dga
    real(kind=DP) :: ftqr(numef),ftqi(numef),ftqb
    complex(kind=DP) :: zi = (0.d0,1.d0)

    if(modnrm /= EXECUT) return 

    call m_PP_find_maximum_l(n)   !  n-1: maximum l
    n = (n-1) + (n-1) + 1
    allocate(il3(n**2)); call substitute_il3(n**2,il3) ! -(b_Elec..)
    allocate(ftqe(nlmt,nlmt,ntyp,numef)); ftqe=(0.d0,0.d0)
    TYPE: do it = 1, ntyp
       mdvdb = m_PP_include_vanderbilt_pot(it)
       if(mdvdb == SKIP) cycle TYPE
       LMT_1: do lmt1=1,ilmt(it)
          il1 = ltp(lmt1,it)
          tau1 = taup(lmt1,it)
          LMT_2: do lmt2=lmt1,ilmt(it)
             il2 = ltp(lmt2,it)
             tau2 = taup(lmt2,it)
             ftqr(1:numef) = 0.d0
             ftqi(1:numef) = 0.d0
             LM3: do n=1,il2p(lmt1,lmt2,it)
                ilm3 = isph(lmt1,lmt2,n,it); l3=il3(ilm3)
                iiqitg = iqitg(il1,tau1,il2,tau2,l3+1,it)
                if(iiqitg == 0) cycle LM3
                do ig=1,3
                   id = elec_id(ig)
                   if(id==0) cycle
                   dk(1:3)=-rltv(1:3,ig)/mp_index(ig)
                   call sphrp2_for_Berry(ilm3,dk,ylm)
                   ftqb = qitg_fef(iiqitg,id)*dl2p(lmt1,lmt2,n,it)*ylm
                   !!$ write(6,*) 'it,lmt1,lmt2,n,id,ftqb=',it,lmt1,lmt2,n,id,ftqb
                   if(mod(l3,2)==0) then
                      ftqr(id)=ftqr(id)+real(zi**(-l3))*ftqb
                   else
                      ftqi(id)=ftqi(id)+dimag(zi**(-l3))*ftqb
                   end if
                end do
             end do LM3
             do id=1,numef
                ftqe(lmt1,lmt2,it,id) = dcmplx(ftqr(id),ftqi(id))
                ftqe(lmt2,lmt1,it,id) = ftqe(lmt1,lmt2,it,id)
             end do
          end do LMT_2
       end do LMT_1
    end do TYPE

    deallocate(il3)
  end subroutine m_FEF_Constract_of_ftq

end module m_FiniteElectricField
