! @@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@ !
! @@                                                                @@ !
! @@       PROGRAM  ASCOT 2014.440 (ver.4.40)                       @@ !
! @@     "Abinitio Simulation Code for Quantum Transport"           @@ !
! @@                                                                @@ !
! @@                                                                @@ !
! @@  AUTHOR(S): Naoki WATANABE, Nobutaka NISHIKAWA (Mizuho I.R.)   @@ !
! @@             Hisashi KONDO (Univ. Tokyo)                        @@ !
! @@                                                09/May/2014     @@ !
! @@                                                                @@ !
! @@  Contact address: Phase System Consortium                      @@ !
! @@                                                                @@ !
! @@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@ !

subroutine Force__calcSK
  use ac_misc_module
  use ac_mpi_module
  implicit none

  integer :: l
  integer :: a, i, i1
  integer :: b, j, j1
  real(8) :: force(3)
  real(8) :: dS(3), dK(3)
  real(8) :: CDM, EDM

  do a=MPI%isatom, MPI%ieatom
     i=Base%vipao(a)

     do l=0, Param%Cell%nL-1
        do b=1, Param%Data%natom
           if( a==b ) cycle
           if( l==0 .and. a<b ) cycle

           if( .not. associated(Hamiltonian%vAtomMatrix(b,a,l)%Ptr) ) then
              cycle
           end if

           j=Base%vipao(b)

           force = 0.d0
           do i1=MPI%ispao(a), MPI%iepao(a)
              do j1=1, Base%vnpao(b)
                 call Base__bracketdSK( dS, dK, &
                      Base%vpao(i+i1-1), Base%vpao(j+j1-1), Param%Cell%vL(:,l) )

                 if( Param%Option%spin_orbit ) then
                    CDM = dreal(Hamiltonian%vAtomMatrix(b,a,l)%Ptr%CDMLS(j1,i1,1)) &
                         +dreal(Hamiltonian%vAtomMatrix(b,a,l)%Ptr%CDMLS(j1,i1,4))
                    EDM = Hamiltonian%vAtomMatrix(b,a,l)%Ptr%EDMLS(j1,i1)
                 else
                    CDM = sum(Hamiltonian%vAtomMatrix(b,a,l)%Ptr%CDM(j1,i1,:))
                    EDM = Hamiltonian%vAtomMatrix(b,a,l)%Ptr%EDM(j1,i1)
                 end if

                 force = force + CDM*2.d0 * dK - EDM*2.d0 * dS
              end do
           end do

           Param%Data%vatom(a)%force = &
                Param%Data%vatom(a)%force + force
           Param%Data%vatom(b)%force = &
                Param%Data%vatom(b)%force - force
        end do
     end do
  end do

  return
end subroutine Force__calcSK

subroutine Force__calcT
  use ac_misc_module
  use ac_mpi_module
  implicit none

  integer :: spin
  integer :: l
  integer :: a, i, i1
  integer :: b, j1
  integer :: ia, ib, ic

  type(Element_type), pointer :: elem
  type(PPloc_type) :: Vpsloc, Vpsval
  real(8), allocatable :: gVext(:,:,:,:)

  real(8), pointer :: dVA(:,:,:), dVB(:,:,:)
  real(8) :: force(3), forceA(3), forceB(3)
  real(8) :: CDM, rho

  do a=MPI%isatom, MPI%ieatom
     i=Base%vipao(a)

     do spin=1, Param%Option%nspin
        do i1=MPI%ispao(a), MPI%iepao(a)
           call PAO__calcgWaveV( Base%vpao(i+i1-1), spin )
           call PAO__calcWaveV ( Base%vpao(i+i1-1), spin )
        end do

        do l=0, Param%Cell%nL-1
           do b=1, Param%Data%natom
              if( .not. associated(Hamiltonian%vAtomMatrix(b,a,l)%Ptr) ) then
                 cycle
              end if

              allocate( dVA(3,Base%vnpao(b),Base%vnpao(a)) )
              allocate( dVB(3,Base%vnpao(b),Base%vnpao(a)) )

              call Base__bracketdV( dVA, a, b, l )
              call Base__bracketVd( dVB, a, b, l )

              forceA = 0.d0
              forceB = 0.d0
              do i1=MPI%ispao(a), MPI%iepao(a)
                 do j1=1, Base%vnpao(b)
                    if( Param%Option%spin_orbit ) then
                       select case(spin)
                       case(1)
                          CDM = dreal(Hamiltonian%vAtomMatrix(b,a,l)%Ptr%CDMLS(j1,i1,1))
                       case(2)
                          CDM = +2.d0*dreal(Hamiltonian%vAtomMatrix(b,a,l)%Ptr%CDMLS(j1,i1,2))
                       case(3)
                          CDM = -2.d0*dimag(Hamiltonian%vAtomMatrix(b,a,l)%Ptr%CDMLS(j1,i1,2))
                       case(4)
                          CDM = dreal(Hamiltonian%vAtomMatrix(b,a,l)%Ptr%CDMLS(j1,i1,4))
                       end select
                    else
                       CDM = Hamiltonian%vAtomMatrix(b,a,l)%Ptr%CDM(j1,i1,spin)
                    end if

                    forceA = forceA + CDM*2.d0 * dVA(:,j1,i1)
                    forceB = forceB + CDM*2.d0 * dVB(:,j1,i1)
                 end do
              end do

              if( l==0 ) then
                 forceA = forceA * 0.5d0
                 forceB = forceB * 0.5d0
              end if
              Param%Data%vatom(a)%force = &
                   Param%Data%vatom(a)%force + forceA
              Param%Data%vatom(b)%force = &
                   Param%Data%vatom(b)%force + forceB

              if(associated(dVA)) deallocate(dVA)
              if(associated(dVB)) deallocate(dVB)
           end do
        end do

        do i1=MPI%ispao(a), MPI%iepao(a)
           call PAO__freegWaveV( Base%vpao(i+i1-1) )
           call PAO__freeWaveV ( Base%vpao(i+i1-1) )
        end do

     end do
  end do

  if( .not. Param%Option%projection ) then
     allocate( gVext(3,Param%Cell%Na,Param%Cell%Nb,Param%Cell%Nc) )

     do a=MPI%isatom, MPI%ieatom
        if ( MPI%ispao(a) /= 1 ) cycle
        elem => Param__Data__getElement( Param%Data%vatom(a)%name )

        gVext = 0.d0
        if( Param%Option%na .and. (.not. Param%Option%nohar) ) then
           call PPloc__set( Vpsloc, Param%Data%vatom(a)%Ro, elem%Vloc )
           call PPloc__set( Vpsval, Param%Data%vatom(a)%Ro, elem%Vval )
           call Potential__setGradient2( gVext, Vpsloc, Vpsval )
        else
           call PPloc__set( Vpsloc, Param%Data%vatom(a)%Ro, elem%Vloc )
           call Potential__setGradient1( gVext, Vpsloc )
        end if

        force = 0.d0
        do ic = 1, Param%Cell%Nc
           do ib = 1, Param%Cell%Nb
              do ia = 1, Param%Cell%Na
                 if( Param%Option%spin_orbit ) then
                    rho =  dreal(Density%rhoLS(1,ia,ib,ic)) &
                         + dreal(Density%rhoLS(4,ia,ib,ic))
                 else
                    rho = sum(Density%rho(:,ia,ib,ic))
                 end if
                 force = force + rho * gVext(:,ia,ib,ic)
              end do
           end do
        end do

        Param%Data%vatom(a)%force = &
             Param%Data%vatom(a)%force + force * Param%Cell%dV
     end do

     deallocate( gVext )
  end if

  return
end subroutine Force__calcT

subroutine Force__calcN
  use ac_misc_module
  use ac_mpi_module
  implicit none

  integer :: l, a, i, i1
  integer :: b, j, j1
  integer :: c, k, k1
  integer :: lc, lbc
  real(8) :: forceA(3), forceB(3)
  real(8) :: CDM
  complex(8) :: CDMLS(4)

  type UVMatrix_type
     real(8), pointer :: uv(:,:)
     complex(8), pointer :: uvLS(:,:,:)
     real(8), pointer :: duv(:,:,:)
     complex(8), pointer :: duvLS(:,:,:,:)
     real(8), pointer ::  E(:,:)
  end type UVMatrix_type
  type UVMatrix_Ptr_type
     type(UVMatrix_type), pointer :: Ptr
  end type UVMatrix_Ptr_type
  type(UVMatrix_Ptr_type), pointer :: vUV(:,:,:)

  real(8) :: uv0, uvm
  integer :: psm, psl, spin
  real(8) :: duv0(3), duvm(3)
  complex(8), parameter :: CI = ( 0.0d0, 1.0d0 )
  complex(8) :: dVnonLS(3)
  real(8) :: gVnon(3)

  allocate( vUV(Param%Data%natom, 1-Param%Cell%nL:Param%Cell%nL-1, Param%Data%natom ) )

  do a=1, Param%Data%natom
     do l=1-Param%Cell%nL, Param%Cell%nL-1
        do c=1, Param%Data%natom
           vUV(a,l,c)%Ptr => null()
        end do
     end do
  end do

  do l=0, Param%Cell%nL-1
     do a=MPI%isatom, MPI%ieatom
        i=Base%vipao(a)

        do b=1, Param%Data%natom
           j=Base%vipao(b)

           if( .not. associated(Hamiltonian%vAtomMatrix(b,a,l)%Ptr) ) then
              cycle
           end if

           do lc=1-Param%Cell%nL, Param%Cell%nL-1
              lbc = Param__Cell__relative( l, lc )
              if( lbc == -100 ) cycle

              do c=1, Param%Data%natom
                 if( Potential%vnVpsnon(c) == 0 ) cycle

                 k=Potential%viVpsnon(c)

                 if( .not. Potential__intersect( Base%vpao(i), &
                      Potential%vVpsnon(k,1),  Param%Cell%vL(:,lc) ) ) then
                    cycle
                 end if

                 if( .not. Potential__intersect( Base%vpao(j), &
                      Potential%vVpsnon(k,1),  Param%Cell%vL(:,lbc) ) ) then
                    cycle
                 end if

                 if( .not. associated( vUV(c,lc,a)%Ptr ) ) then
                    if( Param%Option%spin_orbit ) then
                       allocate( vUV(c,lc,a)%Ptr )
                       allocate( vUV(c,lc,a)%Ptr%uvLS  ( Potential%vnVpsnon(c), Base%vnpao(a), 2 ) )
                       allocate( vUV(c,lc,a)%Ptr%duvLS( 3, Potential%vnVpsnon(c), Base%vnpao(a), 2 ) )
                       allocate( vUV(c,lc,a)%Ptr%E( Potential%vnVpsnon(c), 2 ) )

                       do k1=1, Potential%vnVpsnon(c)
                          psm = Potential%vVpsnon(k+k1-1,1)%m
                          do i1=1, Base%vnpao(a)
                             do spin=1, 2
                                call Potential__bracketS (  uvm, &
                                     Base%vpao(i+i1-1), Potential%vVpsnon(k+k1-1-2*psm,spin), Param%Cell%vL(:,lc) )
                                call Potential__bracketS (  uv0, &
                                     Base%vpao(i+i1-1), Potential%vVpsnon(k+k1-1,spin), Param%Cell%vL(:,lc) )
                                call Potential__bracketdS( duvm, &
                                     Base%vpao(i+i1-1), Potential%vVpsnon(k+k1-1-2*psm,spin), Param%Cell%vL(:,lc) )
                                call Potential__bracketdS( duv0, &
                                     Base%vpao(i+i1-1), Potential%vVpsnon(k+k1-1,spin), Param%Cell%vL(:,lc) )

                                if( psm > 0 ) then
                                   vUV(c,lc,a)%Ptr%uvLS  (k1,i1,spin) = (-M_SQRT1_2)*(uv0 + CI*uvm)
                                   vUV(c,lc,a)%Ptr%duvLS(:,k1,i1,spin) = (-M_SQRT1_2)*(duv0(:) + CI*duvm(:))
                                else if( psm < 0 ) then
                                   vUV(c,lc,a)%Ptr%uvLS  (k1,i1,spin) = (+M_SQRT1_2)*(uvm - CI*uv0)
                                   vUV(c,lc,a)%Ptr%duvLS(:,k1,i1,spin) = (+M_SQRT1_2)*(duvm(:) - CI*duv0(:))
                                else if( psm == 0 ) then
                                   vUV(c,lc,a)%Ptr%uvLS  (k1,i1,spin) = uv0
                                   vUV(c,lc,a)%Ptr%duvLS(:,k1,i1,spin) = duv0(:)
                                end if
                             end do
                          end do
                          vUV(c,lc,a)%Ptr%E(k1,1) = Potential%vVpsnon(k+k1-1,1)%E
                          vUV(c,lc,a)%Ptr%E(k1,2) = Potential%vVpsnon(k+k1-1,2)%E
                       end do
                    else
                       allocate( vUV(c,lc,a)%Ptr )
                       allocate( vUV(c,lc,a)%Ptr%uv ( Potential%vnVpsnon(c), Base%vnpao(a) ) )
                       allocate( vUV(c,lc,a)%Ptr%duv( 3, Potential%vnVpsnon(c), Base%vnpao(a) ) )
                       allocate( vUV(c,lc,a)%Ptr%E  ( Potential%vnVpsnon(c), 1 ) )

                       do k1=1, Potential%vnVpsnon(c)
                          do i1=1, Base%vnpao(a)
                             call Potential__bracketS (  uv0, Base%vpao(i+i1-1), Potential%vVpsnon(k+k1-1,1), Param%Cell%vL(:,lc) )
                             call Potential__bracketdS( duv0, Base%vpao(i+i1-1), Potential%vVpsnon(k+k1-1,1), Param%Cell%vL(:,lc) )
                             vUV(c,lc,a)%Ptr%uv (k1,i1) = uv0
                             vUV(c,lc,a)%Ptr%duv(:,k1,i1) = duv0
                          end do
                          vUV(c,lc,a)%Ptr%E(k1,1) = Potential%vVpsnon(k+k1-1,1)%E
                       end do
                    end if
                 end if

                 if( .not. associated( vUV(c,lbc,b)%Ptr ) ) then
                    if( Param%Option%spin_orbit ) then
                       allocate( vUV(c,lbc,b)%Ptr )
                       allocate( vUV(c,lbc,b)%Ptr%uvLS  ( Potential%vnVpsnon(c), Base%vnpao(b), 2 ) )
                       allocate( vUV(c,lbc,b)%Ptr%duvLS( 3,Potential%vnVpsnon(c), Base%vnpao(b), 2 ) )
                       allocate( vUV(c,lbc,b)%Ptr%E    ( Potential%vnVpsnon(c), 2 ) )

                       do k1=1, Potential%vnVpsnon(c)
                          psm = Potential%vVpsnon(k+k1-1,1)%m
                          do j1=1, Base%vnpao(b)
                             do spin=1, 2
                                call Potential__bracketS (  uvm, &
                                     Base%vpao(j+j1-1), Potential%vVpsnon(k+k1-1-2*psm,spin), Param%Cell%vL(:,lbc) )
                                call Potential__bracketS (  uv0, &
                                     Base%vpao(j+j1-1), Potential%vVpsnon(k+k1-1,spin), Param%Cell%vL(:,lbc) )
                                call Potential__bracketdS( duvm, &
                                     Base%vpao(j+j1-1), Potential%vVpsnon(k+k1-1-2*psm,spin), Param%Cell%vL(:,lbc) )
                                call Potential__bracketdS( duv0, &
                                     Base%vpao(j+j1-1), Potential%vVpsnon(k+k1-1,spin), Param%Cell%vL(:,lbc) )

                                if( psm > 0 ) then
                                   vUV(c,lbc,b)%Ptr%uvLS  (k1,j1,spin) = (-M_SQRT1_2)*(uv0 + CI*uvm)
                                   vUV(c,lbc,b)%Ptr%duvLS(:,k1,j1,spin) = (-M_SQRT1_2)*(duv0(:) + CI*duvm(:))
                                else if( psm < 0 ) then
                                   vUV(c,lbc,b)%Ptr%uvLS  (k1,j1,spin) = (+M_SQRT1_2)*(uvm - CI*uv0)
                                   vUV(c,lbc,b)%Ptr%duvLS(:,k1,j1,spin) = (+M_SQRT1_2)*(duvm(:) - CI*duv0(:))
                                else if( psm == 0 ) then
                                   vUV(c,lbc,b)%Ptr%uvLS  (k1,j1,spin) = uv0
                                   vUV(c,lbc,b)%Ptr%duvLS(:,k1,j1,spin) = duv0(:)
                                end if
                             end do
                          end do
                          vUV(c,lbc,b)%Ptr%E(k1,1) = Potential%vVpsnon(k+k1-1,1)%E
                          vUV(c,lbc,b)%Ptr%E(k1,2) = Potential%vVpsnon(k+k1-1,2)%E
                       end do
                    else
                       allocate( vUV(c,lbc,b)%Ptr )
                       allocate( vUV(c,lbc,b)%Ptr%uv ( Potential%vnVpsnon(c), Base%vnpao(b) ) )
                       allocate( vUV(c,lbc,b)%Ptr%duv( 3, Potential%vnVpsnon(c), Base%vnpao(b) ) )
                       allocate( vUV(c,lbc,b)%Ptr%E  ( Potential%vnVpsnon(c), 1 ) )

                       do k1=1, Potential%vnVpsnon(c)
                          do j1=1, Base%vnpao(b)
                             call Potential__bracketS (  uv0, Base%vpao(j+j1-1), Potential%vVpsnon(k+k1-1,1), Param%Cell%vL(:,lbc) )
                             call Potential__bracketdS( duv0, Base%vpao(j+j1-1), Potential%vVpsnon(k+k1-1,1), Param%Cell%vL(:,lbc) )
                             vUV(c,lbc,b)%Ptr%uv (k1,j1) = uv0
                             vUV(c,lbc,b)%Ptr%duv(:,k1,j1) = duv0
                          end do
                          vUV(c,lbc,b)%Ptr%E(k1,1) = Potential%vVpsnon(k+k1-1,1)%E
                       end do
                    end if
                 end if

                 forceA = 0.d0
                 forceB = 0.d0

                 if( Param%Option%spin_orbit ) then
                    do i1=MPI%ispao(a), MPI%iepao(a)
                       do j1=1, Base%vnpao(b)
                          CDMLS(:) = Hamiltonian%vAtomMatrix(b,a,l)%Ptr%CDMLS(j1,i1,:)
                          do k1=1, Potential%vnVpsnon(c)
                             psl = Potential%vVpsnon(k+k1-1,1)%l
                             psm = Potential%vVpsnon(k+k1-1,1)%m

                             dVnonLS = &
                                  + vUV(c,lbc,b)%Ptr%E(k1,1) * (dble(psl+psm+1)/dble(2*psl+1)) &
                                  * vUV(c,lbc,b)%Ptr%uvLS(k1,j1,1) * dconjg(vUV(c,lc,a)%Ptr%duvLS(:,k1,i1,1))
                             forceA(:) = forceA(:) + 2.d0*dreal(CDMLS(1)*dVnonLS(:))

                             dVnonLS = &
                                  + vUV(c,lc, a)%Ptr%E(k1,1) * (dble(psl+psm+1)/dble(2*psl+1)) &
                                  * vUV(c,lc, a)%Ptr%uvLS(k1,i1,1) * dconjg(vUV(c,lbc,b)%Ptr%duvLS(:,k1,j1,1))
                             forceB(:) = forceB(:) + 2.d0*dreal(CDMLS(1)*dVnonLS(:))

                             dVnonLS = &
                                  + vUV(c,lbc,b)%Ptr%E(k1,2) * (dble(psl-psm)/dble(2*psl+1)) &
                                  * vUV(c,lbc,b)%Ptr%uvLS(k1,j1,2) * dconjg(vUV(c,lc,a)%Ptr%duvLS(:,k1,i1,2))

                             forceA(:) = forceA(:) + 2.d0*dreal(CDMLS(1)*dVnonLS(:))

                             dVnonLS = &
                                  + vUV(c,lc, a)%Ptr%E(k1,2) * (dble(psl-psm)/dble(2*psl+1)) &
                                  * vUV(c,lc, a)%Ptr%uvLS(k1,i1,2) * dconjg(vUV(c,lbc,b)%Ptr%duvLS(:,k1,j1,2))

                             forceB(:) = forceB(:) + 2.d0*dreal(CDMLS(1)*dVnonLS(:))

                             if( psl > psm ) then
                                dVnonLS = &
                                     + vUV(c,lbc,b)%Ptr%E(k1,1) &
                                     * sqrt(dble(psl+psm+1)/dble(2*psl+1)) * sqrt(dble(psl-psm)/dble(2*psl+1)) &
                                     * vUV(c,lbc,b)%Ptr%uvLS(k1,j1,1) * dconjg(vUV(c,lc,a)%Ptr%duvLS(:,k1,i1,1))
                                forceA(:) = forceA(:) + 2.d0*dreal(CDMLS(2)*dVnonLS(:))

                                dVnonLS = &
                                     + vUV(c,lc, a)%Ptr%E(k1,1) &
                                     * sqrt(dble(psl+psm+1)/dble(2*psl+1)) * sqrt(dble(psl-psm)/dble(2*psl+1)) &
                                     * vUV(c,lc, a)%Ptr%uvLS(k1,i1,1) * dconjg(vUV(c,lbc,b)%Ptr%duvLS(:,k1,j1,1))

                                forceB(:) = forceB(:) + 2.d0*dreal(CDMLS(2)*dVnonLS(:))

                                dVnonLS = &
                                     + vUV(c,lbc,b)%Ptr%E(k1,2) &
                                     * sqrt(dble(psl+psm+1)/dble(2*psl+1)) * sqrt(dble(psl-psm)/dble(2*psl+1)) &
                                     * vUV(c,lbc,b)%Ptr%uvLS(k1,j1,2) * dconjg(vUV(c,lc,a)%Ptr%duvLS(:,k1,i1,2))
                                forceA(:) = forceA(:) - 2.d0*dreal(CDMLS(2)*dVnonLS(:))

                                dVnonLS = &
                                     + vUV(c,lc, a)%Ptr%E(k1,2) &
                                     * sqrt(dble(psl+psm+1)/dble(2*psl+1)) * sqrt(dble(psl-psm)/dble(2*psl+1)) &
                                     * vUV(c,lc, a)%Ptr%uvLS(k1,i1,2) * dconjg(vUV(c,lbc,b)%Ptr%duvLS(:,k1,j1,2))
                                forceB(:) = forceB(:) - 2.d0*dreal(CDMLS(2)*dVnonLS(:))
                             end if

                             if( psl > psm ) then
                                dVnonLS = &
                                     + vUV(c,lbc,b)%Ptr%E(k1,1) &
                                     * sqrt(dble(psl+psm+1)/dble(2*psl+1)) * sqrt(dble(psl-psm)/dble(2*psl+1)) &
                                     * vUV(c,lbc,b)%Ptr%uvLS(k1,j1,1) * dconjg(vUV(c,lc,a)%Ptr%duvLS(:,k1,i1,1))
                                forceA(:) = forceA(:) + 2.d0*dreal(CDMLS(3)*dVnonLS(:))

                                dVnonLS = &
                                     + vUV(c,lc, a)%Ptr%E(k1,1) &
                                     * sqrt(dble(psl+psm+1)/dble(2*psl+1)) * sqrt(dble(psl-psm)/dble(2*psl+1)) &
                                     * vUV(c,lc, a)%Ptr%uvLS(k1,i1,1) * dconjg(vUV(c,lbc,b)%Ptr%duvLS(:,k1,j1,1))
                                forceB(:) = forceB(:) + 2.d0*dreal(CDMLS(3)*dVnonLS(:))

                                dVnonLS = &
                                     + vUV(c,lbc,b)%Ptr%E(k1,2) &
                                     * sqrt(dble(psl+psm+1)/dble(2*psl+1)) * sqrt(dble(psl-psm)/dble(2*psl+1)) &
                                     * vUV(c,lbc,b)%Ptr%uvLS(k1,j1,2) * dconjg(vUV(c,lc,a)%Ptr%duvLS(:,k1,i1,2))
                                forceA(:) = forceA(:) - 2.d0*dreal(CDMLS(3)*dVnonLS(:))

                                dVnonLS = &
                                     + vUV(c,lc, a)%Ptr%E(k1,2) &
                                     * sqrt(dble(psl+psm+1)/dble(2*psl+1)) * sqrt(dble(psl-psm)/dble(2*psl+1)) &
                                     * vUV(c,lc, a)%Ptr%uvLS(k1,i1,2) * dconjg(vUV(c,lbc,b)%Ptr%duvLS(:,k1,j1,2))
                                forceB(:) = forceB(:) - 2.d0*dreal(CDMLS(3)*dVnonLS(:))
                             end if

                             dVnonLS = &
                                  + vUV(c,lbc,b)%Ptr%E(k1,1) &
                                  * (dble(psl-psm+1)/dble(2*psl+1)) &
                                  * vUV(c,lbc,b)%Ptr%uvLS(k1,j1,1) * dconjg(vUV(c,lc,a)%Ptr%duvLS(:,k1,i1,1))
                             forceA(:) = forceA(:) + 2.d0*dreal(CDMLS(4)*dVnonLS(:))

                             dVnonLS = &
                                  + vUV(c,lc, a)%Ptr%E(k1,1) &
                                  * (dble(psl-psm+1)/dble(2*psl+1)) &
                                  * vUV(c,lc, a)%Ptr%uvLS(k1,i1,1) * dconjg(vUV(c,lbc,b)%Ptr%duvLS(:,k1,j1,1))
                             forceB(:) = forceB(:) + 2.d0*dreal(CDMLS(4)*dVnonLS(:))

                             dVnonLS = &
                                  + vUV(c,lbc,b)%Ptr%E(k1,2) &
                                  * (dble(psl+psm)/dble(2*psl+1)) &
                                  * vUV(c,lbc,b)%Ptr%uvLS(k1,j1,2) * dconjg(vUV(c,lc,a)%Ptr%duvLS(:,k1,i1,2))
                             forceA(:) = forceA(:) + 2.d0*dreal(CDMLS(4)*dVnonLS(:))

                             dVnonLS = &
                                  + vUV(c,lc, a)%Ptr%E(k1,2) &
                                  * (dble(psl+psm)/dble(2*psl+1)) &
                                  * vUV(c,lc, a)%Ptr%uvLS(k1,i1,2) * dconjg(vUV(c,lbc,b)%Ptr%duvLS(:,k1,j1,2))
                             forceB(:) = forceB(:) + 2.d0*dreal(CDMLS(4)*dVnonLS(:))

                          end do
                       end do
                    end do
                 else
                    do i1=MPI%ispao(a), MPI%iepao(a)
                       do j1=1, Base%vnpao(b)
                          CDM = sum(Hamiltonian%vAtomMatrix(b,a,l)%Ptr%CDM(j1,i1,:))
                          do k1=1, Potential%vnVpsnon(c)

                             gVnon = &
                                  vUV(c,lbc,b)%Ptr%E(k1,1) &
                                  * vUV(c,lbc,b)%Ptr%uv(k1,j1) * vUV(c,lc,a)%Ptr%duv(:,k1,i1)
                             forceA = forceA + CDM * 2.d0 * gVnon

                             gVnon = &
                                  vUV(c,lc, a)%Ptr%E(k1,1) &
                                  * vUV(c,lc, a)%Ptr%uv(k1,i1) * vUV(c,lbc,b)%Ptr%duv(:,k1,j1)
                             forceB = forceB + CDM * 2.d0 * gVnon

                          end do
                       end do
                    end do
                 end if

                 if( l==0 ) then
                    forceA = forceA * 0.5d0
                    forceB = forceB * 0.5d0
                 end if

                 Param%Data%vatom(a)%force = &
                      Param%Data%vatom(a)%force + forceA
                 Param%Data%vatom(b)%force = &
                      Param%Data%vatom(b)%force + forceB
                 Param%Data%vatom(c)%force = &
                      Param%Data%vatom(c)%force - forceA - forceB

              end do
           end do
        end do
     end do
  end do

  do a=1, Param%Data%natom
     do l=1-Param%Cell%nL, Param%Cell%nL-1
        do c=1, Param%Data%natom
           if( associated( vUV(c,l,a)%Ptr ) ) then
              if( Param%Option%spin_orbit ) then
                 deallocate( vUV(c,l,a)%Ptr%uvLS )
                 deallocate( vUV(c,l,a)%Ptr%duvLS )
              else
                 deallocate( vUV(c,l,a)%Ptr%uv )
                 deallocate( vUV(c,l,a)%Ptr%duv )
              end if
              deallocate( vUV(c,l,a)%Ptr%E )
              deallocate( vUV(c,l,a)%Ptr )
           end if
        end do
     end do
  end do

  if( associated(vUV) ) deallocate(vUV)

  return
end subroutine Force__calcN

subroutine Force__calcL
  use ac_misc_module
  use ac_mpi_module
  implicit none

  integer :: l, a, i, i1
  integer :: b, j, j1
  integer :: c, k, k1
  integer :: lc, lbc
  real(8) :: forceA(3), forceB(3)
  real(8) :: CDM

  type UVMatrix_type
     real(8), pointer :: uv(:,:)
     real(8), pointer :: duv(:,:,:)
     real(8), pointer ::  E(:)
  end type UVMatrix_type
  type UVMatrix_Ptr_type
     type(UVMatrix_type), pointer :: Ptr
  end type UVMatrix_Ptr_type
  type(UVMatrix_Ptr_type), pointer :: vUV(:,:,:)

  allocate( vUV(Param%Data%natom, 1-Param%Cell%nL:Param%Cell%nL-1, Param%Data%natom ) )

  do a=1, Param%Data%natom
     do l=1-Param%Cell%nL, Param%Cell%nL-1
        do c=1, Param%Data%natom
           vUV(a,l,c)%Ptr => null()
        end do
     end do
  end do

  do l=0, Param%Cell%nL-1
     do a=MPI%isatom, MPI%ieatom
        i=Base%vipao(a)

        do b=1, Param%Data%natom
           j=Base%vipao(b)

           if( .not. associated(Hamiltonian%vAtomMatrix(b,a,l)%Ptr) ) then
              cycle
           end if

           do lc=1-Param%Cell%nL, Param%Cell%nL-1
              lbc = Param__Cell__relative( l, lc )
              if( lbc == -100 ) cycle

              do c=1, Param%Data%natom
                 if( Potential%vnVpsloc(c) == 0 ) cycle

                 k=Potential%viVpsloc(c)

                 if( .not. Potential__intersect( Base%vpao(i), &
                      Potential%vVpsloc(k),  Param%Cell%vL(:,lc) ) ) then
                    cycle
                 end if

                 if( .not. Potential__intersect( Base%vpao(j), &
                      Potential%vVpsloc(k),  Param%Cell%vL(:,lbc) ) ) then
                    cycle
                 end if

                 if( .not. associated( vUV(c,lc,a)%Ptr ) ) then
                    allocate( vUV(c,lc,a)%Ptr )
                    allocate( vUV(c,lc,a)%Ptr%uv(  Potential%vnVpsloc(c), Base%vnpao(a) ) )
                    allocate( vUV(c,lc,a)%Ptr%duv( 3, Potential%vnVpsloc(c), Base%vnpao(a) ) )
                    allocate( vUV(c,lc,a)%Ptr%E( Potential%vnVpsloc(c) ) )

                    do k1=1, Potential%vnVpsloc(c)
                       do i1=1, Base%vnpao(a)
                          call Potential__bracketS ( vUV(c,lc,a)%Ptr%uv(k1,i1), &
                               Base%vpao(i+i1-1), Potential%vVpsloc(k+k1-1), Param%Cell%vL(:,lc) )
                          call Potential__bracketdS( vUV(c,lc,a)%Ptr%duv(:,k1,i1), &
                               Base%vpao(i+i1-1), Potential%vVpsloc(k+k1-1), Param%Cell%vL(:,lc) )
                       end do
                       vUV(c,lc,a)%Ptr%E(k1) = Potential%vVpsloc(k+k1-1)%E
                    end do
                 end if

                 if( .not. associated( vUV(c,lbc,b)%Ptr ) ) then
                    allocate( vUV(c,lbc,b)%Ptr )
                    allocate( vUV(c,lbc,b)%Ptr%uv(  Potential%vnVpsloc(c), Base%vnpao(b) ) )
                    allocate( vUV(c,lbc,b)%Ptr%duv( 3, Potential%vnVpsloc(c), Base%vnpao(b) ) )
                    allocate( vUV(c,lbc,b)%Ptr%E( Potential%vnVpsloc(c) ) )

                    do k1=1, Potential%vnVpsloc(c)
                       do j1=1, Base%vnpao(b)
                          call Potential__bracketS ( vUV(c,lbc,b)%Ptr%uv(k1,j1), &
                               Base%vpao(j+j1-1), Potential%vVpsloc(k+k1-1), Param%Cell%vL(:,lbc) )
                          call Potential__bracketdS( vUV(c,lbc,b)%Ptr%duv(:,k1,j1), &
                               Base%vpao(j+j1-1), Potential%vVpsloc(k+k1-1), Param%Cell%vL(:,lbc) )
                       end do
                       vUV(c,lbc,b)%Ptr%E(k1) = Potential%vVpsloc(k+k1-1)%E
                    end do
                 end if

                 forceA = 0.d0
                 forceB = 0.d0

                 do i1=MPI%ispao(a), MPI%iepao(a)
                    do j1=1, Base%vnpao(b)
                       if( Param%Option%spin_orbit ) then
                          CDM = dreal(Hamiltonian%vAtomMatrix(b,a,l)%Ptr%CDMLS(j1,i1,1) &
                               +Hamiltonian%vAtomMatrix(b,a,l)%Ptr%CDMLS(j1,i1,4))
                       else
                          if( Param%Option%spin_polar ) then
                             CDM = Hamiltonian%vAtomMatrix(b,a,l)%Ptr%CDM(j1,i1,1) &
                                  +Hamiltonian%vAtomMatrix(b,a,l)%Ptr%CDM(j1,i1,2)
                          else
                             CDM = Hamiltonian%vAtomMatrix(b,a,l)%Ptr%CDM(j1,i1,1)
                          end if
                       end if

                       do k1=1, Potential%vnVpsloc(c)
                          forceA = forceA + CDM * 2.d0 * &
                               vUV(c,lbc,b)%Ptr%E (k1) * &
                               vUV(c,lbc,b)%Ptr%uv(k1,j1) * vUV(c,lc,a)%Ptr%duv(:,k1,i1)
                          forceB = forceB + CDM * 2.d0 * &
                               vUV(c,lc,a )%Ptr%E (k1) * &
                               vUV(c,lc,a )%Ptr%uv(k1,i1) * vUV(c,lbc,b)%Ptr%duv(:,k1,j1)
                       end do

                    end do
                 end do

                 if( l==0 ) then
                    forceA = forceA * 0.5d0
                    forceB = forceB * 0.5d0
                 end if

                 Param%Data%vatom(a)%force = &
                      Param%Data%vatom(a)%force + forceA
                 Param%Data%vatom(b)%force = &
                      Param%Data%vatom(b)%force + forceB
                 Param%Data%vatom(c)%force = &
                      Param%Data%vatom(c)%force - forceA - forceB

              end do
           end do
        end do
     end do
  end do

  do a=1, Param%Data%natom
     do l=1-Param%Cell%nL, Param%Cell%nL-1
        do c=1, Param%Data%natom
           if( associated( vUV(c,l,a)%Ptr ) ) then
              deallocate( vUV(c,l,a)%Ptr%uv )
              deallocate( vUV(c,l,a)%Ptr%duv )
              deallocate( vUV(c,l,a)%Ptr%E  )
              deallocate( vUV(c,l,a)%Ptr )
           end if
        end do
     end do
  end do

  if( associated(vUV) ) deallocate(vUV)

  return
end subroutine Force__calcL

subroutine Force__calcI
  use ac_misc_module
  use ac_mpi_module
  implicit none

  integer :: a
  integer :: ia, ib, ic
  integer :: ia0, ib0, ic0
  type(Element_type), pointer :: elem
  type(PPcharge_type) :: rrhoval
  type(PPcharge_type) :: rrhopcc
  real(8) :: R(3)
  integer        :: range(6)
  real(8) :: grhovalat(3), grhopccat(3)
  real(8) :: force(3)
  real(8)        :: Vexc

  do a=MPI%isatom, MPI%ieatom
     if ( MPI%ispao(a) /= 1 ) cycle
     elem => Param__Data__getElement( Param%Data%vatom(a)%name )
     force = 0.d0

     if( Param%Option%na ) then
        call PPcharge__set( rrhoval, Param%Data%vatom(a)%Ro, elem%rhoval )
        call Param__Cell__getRange( range, rrhoval%Ro, rrhoval%Rc )

        do ic = range(5), range(6)
           ic0 = modp(ic,Param%Cell%Nc)
           do ib = range(3), range(4)
              ib0 = modp(ib,Param%Cell%Nb)
              do ia = range(1), range(2)
                 ia0 = modp(ia,Param%Cell%Na)

                 call Param__Cell__R(R,ia,ib,ic)
                 call PPcharge__gW(grhovalat,rrhoval,R)

                 force = force - Potential%dVhar(ia0,ib0,ic0) * grhovalat
              end do
           end do
        end do
     end if

     if( elem%pcc ) then
        call PPcharge__set( rrhopcc, Param%Data%vatom(a)%Ro, elem%rhopcc )
        call Param__Cell__getRange( range, rrhopcc%Ro, rrhopcc%Rc )

        do ic = range(5), range(6)
           ic0 = modp(ic,Param%Cell%Nc)
           do ib = range(3), range(4)
              ib0 = modp(ib,Param%Cell%Nb)
              do ia = range(1), range(2)
                 ia0 = modp(ia,Param%Cell%Na)

                 call Param__Cell__R(R,ia,ib,ic)
                 call PPcharge__gW(grhopccat,rrhopcc,R)

                 if( Param%Option%spin_orbit ) then
                    grhopccat = grhopccat*0.5d0
                 else if( Param%Option%spin_polar ) then
                    grhopccat = grhopccat*0.5d0
                 else
                 end if


                 if( Param%Option%spin_orbit ) then
                    Vexc = Potential%Vexc(1,ia0,ib0,ic0) + Potential%Vexc(4,ia0,ib0,ic0)
                 else if( Param%Option%spin_polar ) then
                    Vexc = Potential%Vexc(1,ia0,ib0,ic0) + Potential%Vexc(2,ia0,ib0,ic0)
                 else
                    Vexc = Potential%Vexc(1,ia0,ib0,ic0)
                 end if

                 force = force + Vexc * grhopccat
              end do
           end do
        end do
     end if

     Param%Data%vatom(a)%force = &
          Param%Data%vatom(a)%force + force * Param%Cell%dV

  end do

  return
end subroutine Force__calcI

subroutine Force__calcQ
  use ac_misc_module
  use ac_mpi_module
  implicit none

  integer        :: l, a, b
  type(Element_type), pointer :: elema
  type(Element_type), pointer :: elemb
  real(8) :: R(3)
  real(8)        :: dR
  type(PPcharge_type) :: rrhovalA, rrhovalB
  type(PPloc_type)    :: VvalA, VvalB
  real(8) :: force(3)

  if( Param%Option%na ) then

     do a=MPI%isatom, MPI%ieatom
        if ( MPI%ispao(a) /= 1 ) cycle
        elema => Param__Data__getElement( Param%Data%vatom(a)%name )
        call PPcharge__set( rrhovalA, Param%Data%vatom(a)%Ro, elema%rhoval )
        call PPloc__set( VvalA, Param%Data%vatom(a)%Ro, elema%Vval )

        do b=1, Param%Data%natom 
           if( a == b ) cycle

           elemb => Param__Data__getElement( Param%Data%vatom(b)%name )
           call PPloc__set( VvalB, Param%Data%vatom(b)%Ro, elemb%Vval )
           call PPcharge__set( rrhovalB, Param%Data%vatom(b)%Ro, elemb%rhoval )

           do l=0, Param%Cell%nL-1 
              if( l==0 .and. a<b ) cycle

              R = Param%Data%vatom(a)%Ro - Param%Data%vatom(b)%Ro &
                   - Param%Cell%vL(:,l)
              dR = sqrt(dot_product(R,R))

              if( dR > rrhovalA%Rc + VvalB%Rc ) cycle

              force = Param%Data%vatom(a)%Q*Param%Data%vatom(b)%Q/dR**3*R

              Param%Data%vatom(a)%force = &
                   Param%Data%vatom(a)%force + force
              Param%Data%vatom(b)%force = &
                   Param%Data%vatom(b)%force - force

              call Density__bracketdS( force, rrhovalA, VvalB, Param%Cell%vL(:,l) )

              Param%Data%vatom(a)%force = &
                   Param%Data%vatom(a)%force - force

              call Density__bracketdS( force, rrhovalB, VvalA, Param%Cell%vL(:,l)*(-1.d0) )

              Param%Data%vatom(b)%force = &
                   Param%Data%vatom(b)%force - force
           end do
        end do
     end do

  else

     do a=MPI%isatom, MPI%ieatom
        if ( MPI%ispao(a) /= 1 ) cycle
        do b=1, Param%Data%natom
           if( a == b ) cycle

           R = Param%Data%vatom(a)%Ro - Param%Data%vatom(b)%Ro
           dR = sqrt(dot_product(R,R))

           force = Param%Data%vatom(a)%Q * Param%Data%vatom(b)%Q / dR**3 * R

           Param%Data%vatom(a)%force = &
                Param%Data%vatom(a)%force + force
        end do
     end do

     if( .not. Param%Option%cluster ) then
        do a=MPI%isatom, MPI%ieatom
           if ( MPI%ispao(a) /= 1 ) cycle
           do b=1, Param%Data%natom 
              call Ewald__calcForce( force, Param%Data%vatom(b)%Q, &
                   Param%Data%vatom(b)%Ro, Param%Data%vatom(a)%Ro )

              Param%Data%vatom(a)%force = &
                   Param%Data%vatom(a)%force + force * Param%Data%vatom(a)%Q
           end do
        end do
     end if

  end if

  return
end subroutine Force__calcQ

subroutine Force__calc
  use ac_misc_module
  use ac_mpi_module
  implicit none
  integer :: a

  open(unit=16,file=Param%Option%file_ac_tempout,position='append')
  write(16,*)
  write(16,*) '**************************************************************'
  write(16,*) '************ start calculating forces on each atom'
  write(16,*) '**************************************************************'
  close(16)

  do a=1, Param%Data%natom
     Param%Data%vatom(a)%force = 0.d0
  end do

  call Force__calcSK
  call Force__calcT
  call Force__calcN
  call Force__calcI
  call Force__calcQ
  if( Param%Option%projection ) then
     call Force__calcL
  end if

  call MPI__Allreduce_Force( Param%Data%vatom )

  return
end subroutine Force__calc

subroutine Force__calcNorm( norm )
  use ac_misc_module
  use ac_mpi_module
  implicit none

  real(8), intent(out) :: norm
  integer :: a
  integer :: natom

  norm = 0.d0
  natom = 0
  if( .not. Param%Option%optimize ) return

  do a=1, Param%Data%natom
     if( .not. Param%Data%vatom(a)%optimize ) cycle

     norm = norm + dot_product( Param%Data%vatom(a)%force,Param%Data%vatom(a)%force )
     natom = natom + 1
  end do

  if( natom > 0 ) then
     norm = norm/natom
  end if

  return
end subroutine Force__calcNorm

subroutine Force__show
  use ac_misc_module
  use ac_mpi_module

  implicit none
  integer :: a

  if( MPI%root ) then
     open(unit=60,file='force.dat')
     write(60,*) '--------------------------------------------------------------'
     write(60,*) '------------   Atoms:'
     write(60,*) '--------------------------------------------------------------'

     write(60,*) ' atom    Force'
     do a=1, Param%Data%natom
        write(60,933) a,Param%Data%vatom(a)%name,                              &
             Param%Data%vatom(a)%force
     end do

     write(60,*) '--------------------------------------------------------------'
     write(60,*)
     close(60)
  end if
933 format(i4,'  ',a2,6f20.15)

  open(unit=16,file=Param%Option%file_ac_tempout,position='append')
  write(16,*) '--------------------------------------------------------------'
  write(16,*) '------------   Atoms:'
  write(16,*) '--------------------------------------------------------------'

  write(16,*) ' atom   Ro                            Force'
  do a=1, Param%Data%natom
     write(16,932) a,Param%Data%vatom(a)%name,                              &
          Param%Data%vatom(a)%Ro*AU_TO_AA, Param%Data%vatom(a)%force
  end do

  write(16,*) '--------------------------------------------------------------'
  write(16,*)
932 format(i4,'  ',a2,6f10.5)
  close(16)


  return
end subroutine Force__show
