!=======================================================================
!
!  PROGRAM  PHASE/0 2014.01 (rev.375)
!
!  "First-principles Electronic Structure Calculation Program"
!
!  PROGRAM: vdW
!
!  AUTHOR(S): Y. Ono
!  
!  Contact address :  Phase System Consortium
!                     E-mail: phase_system@nims.go.jp URL https://azuma.nims.go.jp
!  
!
!
!=======================================================================
!
!   The original version of this set of the computer programs "PHASE" was developed by 
!  the members of the Theory Group of Joint Research Center for Atom Technology 
!  (JRCAT), based in Tsukuba, in the period 1993-2001.  
!   Since 2002, this program set had been intensively developed as a part of the following 
!  national projects supported by the Ministry of Education, Culture, Sports, Science and 
!  Technology (MEXT) of Japan; "Frontier Simulation Software for Industrial Science 
!  (FSIS)" from 2002 to 2005, "Revolutionary Simulation Software (RSS21)" from 2006 to 
!  2008. "Research and Development of Innovative Simulation Software (RISS)" from 2008 
!  to 2013. These projects is lead by the Center for Research on Innovative Simulation 
!  Software (CISS), the Institute of Industrial Science (IIS), the University of Tokyo.
!   Since 2013, this program set has been further developed centering on PHASE System 
!  Consortium. 
!   The activity of development of this program set has been supervised by Takahisa Ohno.
!

!********************************** Note ******************************************
! This program calculates non-local exchange-correlation energy (Ecnl).
! This Algorism follows Dion's 1-shot method.
!            (Reference; M.Dion et al. PRL 92 (2004) 246401.)
! E_total = E_total(GGAx) + EcLDA + Ecnl
!
! Periodic systems are assumed.
! Only orthogonal corrdinates are applicable.
! Atomic unit (Hartree) is used.
!
!
!
! +++++++++++ Algorithm ++++++++++
! Ecnl  ---> gauleg
!            Coef
!            piDphi  ---> gauleg
!            cLDA
!
!
!
! ++++++++ Input & Output ++++++++
! Input file
!   nfchr.cube             : Electron density distribution given by
!                             GGA(exchange only) calculation
!   nfefn.data             : Total energy given by GGA(exchange only) calculation
!
! Output
!   E_total(vdW-DF)
!     = E_total(GGAx) + EcLDA + Ecnl
!
!         E_total(GGAx)    : Total energy given by GGA(exchange only) calculation
!         Ecnl             : Non-local Correlation energy
!         EcLDA            : LDA Correlation energy
!
!
!
! ++++++ Internal parameters +++++
! Each internal parameter is set to restrict the error less than 10meV.
!
! na(=nb) ( 30    )        : For Gauss-Legendre integration
! a2(=b2) ( 60    )        : Upper limit of the integral of a, b
! etai    ( 1.3   )        : Radius of analytical integrating sphere (a.u.)
! eta1    ( 8     )        : Radius of numerical integrating sphere  (a.u.)
! eta2    ( 40    )        : Cutoff of asymptotic function      (unit cell)
!
! Calculation scale is Order(da * db * eta1**3).
!
!
!
! ++++ Calculation procedure +++++
! 1. Call gauleg to get parameters for Gauss-Legendre integral.
!
! 2. 7 points formula is used for differentiation of total density.
!
! 3. Call Coef and obtain the coefficients of the asymptotic function.
!
! 4. Calculate the local correlation energy using LDA.
!
! 5. Output total correlation energy.
!     --- Ec = Ecnl + EcLDA
!
!
!
!                                              Written by Youky Ono in 2010/June
!**********************************************************************************
 

 
      Program vdW
      Implicit None
#ifdef _MPI_
      include 'mpif.h'
#endif
      Integer, Parameter :: DP = kind(1.d0)

 
!++++++++++++++++++++++++++ VARIABLES ++++++++++++++++++++++++++
!...Translated by Crescent Bay Software VAST      7.1S0 15:22:42   2/ 4/2010  
      doubleprecision ni1u,nk1u,di1u,dk1u,gamm1u,phi1u,ni2u,nk2u,q0i1u,q0k1u
      Integer  nx,ny,nz,nspin,ispin
      Real(8) a1,a2
      Integer  ci,cj,ck,ca,cb,na,nb
      Parameter (na=30,a1=0,a2=60)

      Real(8) ddel,dphiD,del,phiD,tmp,phix,phiy
      Integer ndel,cdel,nphiD,cphiD
      Parameter (ndel=200,nphiD=1000)
      Real(8) phidD(0:ndel,-1:nphiD+1)

! Gauss-Legendre integration
      Real(8) xi(na),wi(na)
 
      Integer  cir,cix,ciy,ciz&
&             ,cjr,cjx,cjy,cjz&
&             ,ckr,ckx,cky,ckz&
&             ,cr ,cx ,cy ,cz,lcx,hcx,lcy,hcy,lcz,hcz
      Real(8)  di,dj,dk,dx,dy,dz,dv,n,ni,nj,nk
      Integer  i,j,k,zxp,zxm,zyp,zym,zzp,zzm

      Real(8)  eta,etai,eta1,eta2,maxq0,minq0
      Parameter (eta=0.0000000001,etai=1.3,eta1=8,eta2=20)
 
! Unit cell information from 'TAPP'
      Real(8) ax,aa,bb,detr
      Dimension aa(3,3),bb(3,3)
 
      Real(8), Allocatable  ::  C6(:,:,:)
 
      character*4   ::  acube,bcube
      integer       ::  ncube,ix,iy,iz
      real(8) n1,n2,n3,dcube(3,3)
      real(kind=8),allocatable :: atom(:,:)


      Real(8) rs,x,nnx,nny,nnz,nn2,nn2i,nn2k,r,zo
      Real(8) nxp,nxm,nyp,nym,nzp,nzm,rxb,rxa,ryb,rya,rzb,rza
      Real(8) zx(-3:3),zy(-3:3),zz(-3:3),rn(3,-3:3)
      Real(8),  Allocatable  ::  rho(:,:,:),Eii(:,:,:) &
&        ,dxrho(:,:,:),dyrho(:,:,:),dzrho(:,:,:) &
&        ,darho(:,:,:),dbrho(:,:,:),dcrho(:,:,:)
 
      Real(8) rhomin
      Parameter (rhomin=0.0001)
 
      Real(8)  kF,exc0,exLDA,excLDA,pi,Zab,wp,wq,q,m,h,e,GxcLDA
      Real(8)  gamma,a,b,C,d,q0,q0i,q0k
      Real(8)  v1,v2,v3,v4
      Parameter (pi=3.1415926535897932)
      Parameter (e=1,m=1)
      Parameter (Zab=-0.8491)
 
      Real(8) T,W,phi,psi,Ecnl,Ecik,Ecik1,Ecik2,Ecii,EcLDA,ExGGA
      doubleprecision h1
 
! Estimate running time
      Real(8) time
      Integer hh,mm,ss
 
! Time estimation
      Real(8) estime,escpu,eseta1,esnxyz,esna,adj
      Parameter (adj=1,escpu=0.000261)
#ifdef _MPI_
      Integer :: mype,npes,ierr
#endif
!++++++++++++++++++++++++ end VARIABLES ++++++++++++++++++++++++



!---------------------- Calculation Start ----------------------
      Call CPU_TIME(time)
#ifdef _MPI_
  call initialize_mpi(npes,mype)
#endif

  open(6,file='nfefn.data')
  read(6,*) acube
  read(6,*) n1,n2,n3
  ExGGA = n3
  close(6)

  nspin=1
  open(5,file='nfchr.cube')
  read(5,*) acube
  read(5,*) bcube

  read(5,*) ncube,n1,n2,n3
  read(5,*) nx,aa(1,1),aa(1,2),aa(1,3)
  read(5,*) ny,aa(2,1),aa(2,2),aa(2,3)
  read(5,*) nz,aa(3,1),aa(3,2),aa(3,3)
  allocate(atom(ncube,5))

  read(5,*) ((atom(i,j),j=1,5),i=1,ncube)
  allocate(rho(nx,ny,nz))
     read(5,*) (((rho(i,j,k),k=1,nz),j=1,ny),i=1,nx)
  close(5)

  ax = 1


      Allocate(dxrho(nx,ny,nz))
      Allocate(dyrho(nx,ny,nz))
      Allocate(dzrho(nx,ny,nz))
      Allocate(darho(nx,ny,nz))
      Allocate(dbrho(nx,ny,nz))
      Allocate(dcrho(nx,ny,nz))
      Allocate(Eii(nx,ny,nz))
      Allocate(C6(-(nx-1):nx-1,-(ny-1):ny-1,-(nz-1):nz-1))


        
      nb = na
      dx = ax*((aa(1,1)**2+aa(1,2)**2+aa(1,3)**2)**0.5)
      dy = ax*((aa(2,1)**2+aa(2,2)**2+aa(2,3)**2)**0.5)
      dz = ax*((aa(3,1)**2+aa(3,2)**2+aa(3,3)**2)**0.5)
      dv = aa(1,1)*(aa(2,2)*aa(3,3)-aa(2,3)*aa(3,2)) &
&        + aa(1,2)*(aa(2,3)*aa(3,1)-aa(2,1)*aa(3,3)) &
&        + aa(1,3)*(aa(2,1)*aa(3,2)-aa(2,2)*aa(3,1))

 
 
      Do cjx = 1,nx
      Do cjy = 1,ny
      Do cjz = 1,nz
 
         Do j = -3,3
            zx(j) = MOD(2*nx+(cjx+j)-1,nx)+1
            zy(j) = MOD(2*ny+(cjy+j)-1,ny)+1
            zz(j) = MOD(2*nz+(cjz+j)-1,nz)+1
 
            rn(1,j) = rho(zx(j),cjy,cjz)
            rn(2,j) = rho(cjx,zy(j),cjz)
            rn(3,j) = rho(cjx,cjy,zz(j))
         End Do
 
         darho(cjx,cjy,cjz) = &
&           (rn(1,3)-9*rn(1,2)+45*rn(1,1)-45*rn(1,-1)+9*rn(1,-2)-rn(1,-3))/(60)
         dbrho(cjx,cjy,cjz) = &
&           (rn(2,3)-9*rn(2,2)+45*rn(2,1)-45*rn(2,-1)+9*rn(2,-2)-rn(2,-3))/(60)
         dcrho(cjx,cjy,cjz) = &
&           (rn(3,3)-9*rn(3,2)+45*rn(3,1)-45*rn(3,-1)+9*rn(3,-2)-rn(3,-3))/(60)
      End Do
      End Do
      End Do

      detr = (aa(1,1)*aa(2,2)*aa(3,3)+aa(1,2)*aa(2,3)*aa(3,1)+aa(1,3)*aa(2,1)*aa(3,2)) &
&          - (aa(1,1)*aa(2,3)*aa(3,2)+aa(1,2)*aa(2,1)*aa(3,3)+aa(1,3)*aa(2,2)*aa(3,1))

      bb(1,1) =  (aa(2,2)*aa(3,3)-aa(2,3)*aa(3,2))/detr
      bb(2,1) = -(aa(2,1)*aa(3,3)-aa(2,3)*aa(3,1))/detr
      bb(3,1) =  (aa(2,1)*aa(3,2)-aa(2,2)*aa(3,1))/detr
      bb(1,2) = -(aa(1,2)*aa(3,3)-aa(1,3)*aa(3,2))/detr
      bb(2,2) =  (aa(1,1)*aa(3,3)-aa(1,3)*aa(3,1))/detr
      bb(3,2) = -(aa(1,1)*aa(3,2)-aa(1,2)*aa(3,1))/detr
      bb(1,3) =  (aa(1,2)*aa(2,3)-aa(1,3)*aa(2,2))/detr
      bb(2,3) = -(aa(1,1)*aa(2,3)-aa(1,3)*aa(2,1))/detr
      bb(3,3) =  (aa(1,1)*aa(2,2)-aa(1,2)*aa(2,1))/detr

      Do cjx = 1,nx
      Do cjy = 1,ny
      Do cjz = 1,nz

        dxrho(cjx,cjy,cjz) = &
&       (bb(1,1)*darho(cjx,cjy,cjz) + bb(1,2)*dbrho(cjx,cjy,cjz) + bb(1,3)*dcrho(cjx,cjy,cjz))
        dyrho(cjx,cjy,cjz) = &
&       (bb(2,1)*darho(cjx,cjy,cjz) + bb(2,2)*dbrho(cjx,cjy,cjz) + bb(2,3)*dcrho(cjx,cjy,cjz))
        dzrho(cjx,cjy,cjz) = &
&       (bb(3,1)*darho(cjx,cjy,cjz) + bb(3,2)*dbrho(cjx,cjy,cjz) + bb(3,3)*dcrho(cjx,cjy,cjz))

      End Do
      End Do
      End Do

 

!############## Call gauleg for Gauss-Legendre integral ##########
      Call gauleg(a1,a2,na,xi,wi)
!#################################################################
 
      maxq0 = 0
      minq0 = 100
      Do cir = 1,nx*ny*nz 
         cix = 1 + (cir - 1 - mod(cir - 1 + ny*nz,ny*nz))/(ny*nz) 
         ciy = 1 + (cir - ny*nz*(cix - 1) - 1 - mod(cir - 1 + nz,nz))/nz 
         ciz = cir - nz*(ny*(cix - 1) - 1 + ciy) 

         n = rho(cix,ciy,ciz)
         If (n .ge. rhomin) then

         nnx = dxrho(cix,ciy,ciz) 
         nny = dyrho(cix,ciy,ciz) 
         nnz = dzrho(cix,ciy,ciz) 
         nn2 = nnx**2 + nny**2 + nnz**2 
!         write(14,*) nn2**0.5
         rs = (3/((4*pi)*n))**0.3333333333333333 
         x = rs/11.4 
         gxclda = 0.5*((1 + x**3)*log(1 + 1/x) - x**2 + x/2)
         exclda = (-0.458/rs) - 0.0666*gxclda 
         kf = (((3*pi)*pi)*n)**0.3333333333333333 
         exlda = (-3*e)*kf/(4*pi) 
         exc0 = exclda - exlda*zab*nn2/(((18)*2)*kf*kf*n*n) 
         q0 = exc0*kf/exlda 

         If(q0.GE.maxq0) Then
            maxq0 = q0
         Endif
         If(q0.LE.minq0) Then
            minq0 = q0
         Endif
!         write(11,*) q0,n,nn2
!         write(11,*) gxclda,exclda,kf
!         write(11,*) exlda,exc0

         Endif

      End do
!      write(*,*) maxq0,minq0
!      maxq0 = 10

! phi(del,phiD)
      ddel = 1/Real(ndel)
      gamma=(4*pi)/9
      dphiD = (maxq0*eta1-minq0*etai)/(Real(nphiD))
!      write(11,*) dphiD,nphiD
   do cdel = 0,ndel
   do cphiD   = -1,nphiD+1
      del = Real(cdel)*ddel
      phiD = minq0*etai + Real(cphiD)*dphiD
!      write(11,*) minq0,etai,cphiD,dphiD,phiD
      di = phiD*(1+del)
      dk = phiD*(1-del)

      tmp = 0
   do ca = 1, na 
       a = xi(ca) 
   do cb = 1, na
       b = xi(cb) 

      h = 1 - exp((-gamma*(a/di)**2)) 
      v1 = (a**2)/(2*h) 
 
      h = 1 - exp((-gamma*(b/di)**2)) 
      v2 = (b**2)/(2*h) 
 
      h = 1 - exp((-gamma*(a/dk)**2)) 
      v3 = (a**2)/(2*h) 
 
      h = 1 - exp((-gamma*(b/dk)**2)) 
      v4 = (b**2)/(2*h) 
 
      w = 2*((3-(a*a))*b*(cos(b))*(sin(a)) + &
&         (3-(b*b))*a*(cos(a))*(sin(b)) +    &
&         ((a*a)+(b*b)-3)*(sin(a))*(sin(b))  &
&         -3*a*b*(cos(a))*(cos(b)))/(a*b)**3 

      t = 0.5*(1/(v1 + v2) + 1/(v3 + v4))*(1/((v1 + &
&         v3)*(v2 + v4)) + 1/((v1 + v4)*(v2 + v3))) 

      tmp = tmp + wi(ca)*wi(cb)*(a*b)**2*w*t 

   end do
   end do
      tmp = tmp * (2*m*(e**4)/(pi**2))

   phidD(cdel,cphiD) = tmp
!   write(11,*) del,phiD,tmp !4*pi*(D**2)*tmp

   end do
!   write(11,*) ' '
   end do
      tmp = 0
 
 
      Ecik1=0
#ifndef _MPI_
!$OMP PARALLEL DO &
!$OMP& DEFAULT(SHARED) &
!$OMP& PRIVATE(cix, ciy, ciz, ni, nnx, nny, nnz, nn2i, lcx, hcx, lcy &
!$OMP&   , hcy, lcz, hcz, ckr, r, cx, cy, cz, nk, nn2k, di, dk, n, nn2, rs, x,  &
!$OMP&   gxclda, exclda, kf, exlda, exc0, q0, d, gamma, phi, a, h1, v1, b, h,   &
!$OMP&   v2, v3, v4, w, t, cb, ni1u, nk1u, di1u, dk1u, gamm1u, phi1u,i,j,k,     &
!$OMP&   del,cdel,phiD,cphiD,phix,phiy)                                         &
!$OMP&    REDUCTION(+:ecik1) 
#endif
      do cir = 1,nx*ny*nz 
#ifdef _MPI_
         if(mod(cir,npes)/=mype) cycle
#endif
         cix = 1 + (cir - 1 - mod(cir - 1 + ny*nz,ny*nz))/(ny*nz) 
         ciy = 1 + (cir - ny*nz*(cix - 1) - 1 - mod(cir - 1 + nz,nz))/nz 
         ciz = cir - nz*(ny*(cix - 1) - 1 + ciy) 
 
         ni1u = rho(cix,ciy,ciz) 
         if (ni1u .ge. rhomin) then 
            nnx = dxrho(cix,ciy,ciz) 
            nny = dyrho(cix,ciy,ciz) 
            nnz = dzrho(cix,ciy,ciz) 
            nn2i = nnx**2 + nny**2 + nnz**2 
 
            lcx = cix - 4*(aint((eta1/dx))) 
            hcx = cix + 4*(aint((eta1/dx))) 
            lcy = ciy - 4*(aint((eta1/dy))) 
            hcy = ciy + 4*(aint((eta1/dy))) 
            lcz = ciz - 4*(aint((eta1/dz))) 
            hcz = ciz + 4*(aint((eta1/dz))) 
 
            do ckx = 1, hcx - lcx + 1 
               do cky = 1, hcy - lcy + 1 
                  do ckz = 1, hcz - lcz + 1 

!                     r = (((cix + 1 - ckx - lcx)*dx)**2 + ((ciy + 1 - cky - lcy&
!     &                  )*dy)**2 + ((ciz + 1 - ckz - lcz)*dz)**2)**0.5 

               i = cix+1-ckx-lcx
               j = ciy+1-cky-lcy
               k = ciz+1-ckz-lcz
               r = ((i*aa(1,1)+j*aa(2,1)+k*aa(3,1))**2 &
&                 + (i*aa(1,2)+j*aa(2,2)+k*aa(3,2))**2 &
&                 + (i*aa(1,3)+j*aa(2,3)+k*aa(3,3))**2)**0.5

               if (r.le.eta1 .and. r.gt.etai) then
!               if (cir.eq.43.and.k.eq.0) then
!                  write(*,*) i*aa(1,1)+j*aa(2,1),i*aa(1,2)+j*aa(2,2),r
!               end if
                        cx = mod(20*nx + lcx + ckx - 2,nx) + 1 
                        cy = mod(20*ny + lcy + cky - 2,ny) + 1 
                        cz = mod(20*nz + lcz + ckz - 2,nz) + 1 
                        nk1u = rho(cx,cy,cz) 
                        if (nk1u .ge. rhomin) then 
                           nnx = dxrho(cx,cy,cz) 
                           nny = dyrho(cx,cy,cz) 
                           nnz = dzrho(cx,cy,cz) 
                           nn2k = nnx**2 + nny**2 + nnz**2 
 
                           di1u = 0 
                           dk1u = 0 
                           n = ni1u + 0.D0 
                           nn2 = nn2i + 0.D0 
                           rs = (3/((4*pi)*n))**0.3333333333333333 
                           x = rs/11.4 
                           gxclda = 0.5*((1 + x**3)*log(1 + 1/x) - x**2 + x/2&
     &                         - (0)) 
                           exclda = (-0.458/rs) - 0.0666*gxclda 
                           kf = (((3*pi)*pi)*n)**0.3333333333333333 
                           exlda = (-3*e)*kf/(4*pi) 
                           exc0 = exclda - exlda*zab*nn2/(((18)*2)*kf*kf*n*n) 
                           q0 = exc0*kf/exlda 
                           d = r*q0 
                           di1u = di1u + d 
                           dk1u = dk1u + 0.D0 
                           n = 0.D0 + nk1u 
                           nn2 = 0.D0 + nn2k 
                           rs = (3/((4*pi)*n))**0.3333333333333333 
                           x = rs/11.4 
                           gxclda = 0.5*((1 + x**3)*log(1 + 1/x) - x**2 + x/2&
     &                         - (0)) 
                           exclda = (-0.458/rs) - 0.0666*gxclda 
                           kf = (((3*pi)*pi)*n)**0.3333333333333333 
                           exlda = (-3*e)*kf/(4*pi) 
                           exc0 = exclda - exlda*zab*nn2/(((18)*2)*kf*kf*n*n) 
                           q0 = exc0*kf/exlda 
                           d = r*q0 
                           di1u = di1u + 0.D0 
                           dk1u = dk1u + d 
 
                           di1u = di1u + eta 
                           dk1u = dk1u + eta 
 
! Dispersion law of wq.
                           gamm1u = (4*pi)/9 
 
                           del = ABS((di1u-dk1u)/(di1u+dk1u))
                           cdel = AINT(del/ddel)
!                        write(11,*) del,ddel,cdel
                           phiD = (di1u+dk1u)/2
                           cphiD = AINT(REAL(nphiD)*(phiD-minq0*etai)/(maxq0*eta1-minq0*etai))

!                           write(11,*) cphiD,phiD,di1u/r,dk1u/r
!                         write(11,*) phidD(cdel,cphiD)

                           phix = del/ddel - REAL(cdel)
                           phiy = REAL(nphiD)*(phiD-minq0*etai)/(maxq0*eta1-minq0*etai) - REAL(cphiD)

                           phi1u =  (1-phix)*(1-phiy) * phidD(cdel  ,cphiD  ) &
&                                  +   phix *(1-phiy) * phidD(cdel+1,cphiD  ) &
&                                  +(1-phix)*   phiy  * phidD(cdel  ,cphiD+1) &
&                                  +   phix *   phiy  * phidD(cdel+1,cphiD+1)

                           ecik1 = ecik1 + dv**2*0.5*ni1u*phi1u*nk1u 
                           
                        endif 
 
                     endif 
                  end do 
               end do 
            end do 
         endif 
      end do 
#ifdef _MPI_ 
      if(npes>1)then
         call mpi_allreduce(MPI_IN_PLACE,ecik1,1,mpi_double_precision,mpi_sum,mpi_comm_world,ierr)
      endif
#endif
! Ecik1 is the non-local correlation energy E_c^nl summated for i not= k.
 
!      write(*,*) 'test'
 
!########  Call Coef and obtain the coefficients of the ##########
!########  asymptotic function.                         ##########
   Call Coef(nx,ny,nz,aa,eta1,eta2,C6)
!#################################################################
 
 
 
   Ecik2 = 0
#ifndef _MPI_
!$OMP PARALLEL DO PRIVATE(ni, nk, q0i, q0k, psi, c, cjx, cjy, cjz, n, rs, x, &
!$OMP&   gxclda, exclda, kf, exlda, nnx, nny, nnz, nn2, exc0, q0, ni2u, nk2u, &
!$OMP&   q0i1u, q0k1u, cix, ciy, ciz, ckx, cky, ckz) REDUCTION(+:ecik2) 
#endif
      do cir = 1, nx*ny*nz
#ifdef _MPI_
         if(mod(cir,npes)/=mype) cycle
#endif
         cix = 1 + (cir - 1 - mod(cir - 1 + ny*nz,ny*nz))/(ny*nz) 
         ciy = 1 + (cir - ny*nz*(cix - 1) - 1 - mod(cir - 1 + nz,nz))/nz 
         ciz = cir - nz*(ny*(cix - 1) - 1 + ciy) 

!      do cix = 1, nx 
!         do ciy = 1, ny 
!            do ciz = 1, nz 

               ni2u = rho(cix,ciy,ciz) 
               if (ni2u .ge. rhomin) then 
 
      do ckr = 1, nx*ny*nz
         ckx = 1 + (ckr - 1 - mod(ckr - 1 + ny*nz,ny*nz))/(ny*nz)
         cky = 1 + (ckr - ny*nz*(ckx - 1) - 1 - mod(ckr - 1 + nz,nz))/nz
         ckz = ckr - nz*(ny*(ckx - 1) - 1 + cky)

!                  do ckx = 1, nx 
!                     do cky = 1, ny 
!                        do ckz = 1, nz 

                           nk2u = rho(ckx,cky,ckz) 
                           if (nk2u .ge. rhomin) then 
 
                              q0i1u = 0 
                              q0k1u = 0 
                              do j = 1, 2 
                                 cjx = cix*mod(j,2) + ckx*mod(j - 1,2) 
                                 cjy = ciy*mod(j,2) + cky*mod(j - 1,2) 
                                 cjz = ciz*mod(j,2) + ckz*mod(j - 1,2) 
                                 n = ni2u*(mod(j,2)) + nk2u*(mod(j - 1,2)) 
 
                                 rs = (3/((4*pi)*n))**0.3333333333333333 
                                 x = rs/11.4 
                                 gxclda = 0.5*((1 + x**3)*log(1 + 1/x) - x**2&
     &                               + x/2 - 0) 
                                 exclda = (-0.458/rs) - 0.0666*gxclda 
 
                                 kf = (3*pi*pi*n)**0.3333333333333333 
                                 exlda = (-3)*e*kf/(4*pi) 
                                 nnx = dxrho(cjx,cjy,cjz) 
                                 nny = dyrho(cjx,cjy,cjz) 
                                 nnz = dzrho(cjx,cjy,cjz) 
                                 nn2 = nnx**2 + nny**2 + nnz**2 
 
! Eq.(12) of Dion PRL92,246401
                                 exc0 = exclda - exlda*zab*nn2/(36*kf*kf*n*n) 
 
! Eq.(11) of Dion PRL92,246401
                                 q0 = exc0*kf/exlda 
                                 q0i1u = q0i1u + q0*(mod(j,2)) 
                                 q0k1u = q0k1u + q0*(mod(j - 1,2)) 
                              end do 
 
                              psi = ((q0i1u*q0k1u)**2*(q0i1u**2 + q0k1u**2))**(&
     &                           -1) 
                              c = 12*m*e**4*((4*pi)/9)**3 
!                              write(11,*) q0 
                              ecik2 = ecik2 - dv**2*c*ni2u*psi*nk2u&
     &                                 *0.5*c6(cix-ckx,ciy-cky,ciz-ckz)

!                              write(*,*) dv**2*c*ni2u*psi*nk2u,&
!     &                                 c6(cix-ckx,ciy-cky,ciz-ckz),cix-ckx,ciy-cky,ciz-ckz

                           endif 
!                        end do 
!                     end do 
                  end do 

               endif 
!            end do 
!         end do 
      end do 
#ifdef _MPI_
      if(npes>1)then
         call mpi_allreduce(MPI_IN_PLACE,ecik2,1,mpi_double_precision,mpi_sum,mpi_comm_world,ierr)
      endif
#endif
 
   Ecik = Ecik1 + Ecik2
 

 
!######  Call piDphi and obtain the matrix elements of Eii #######
   Call piDphi(nx,ny,nz,rho,dxrho,dyrho,dzrho,dv,etai,na,xi,wi,Eii)
!#################################################################
 
 
 
   Ecii=0
#ifndef _MPI_
!$OMP PARALLEL DO PRIVATE(n, rs, x, gxclda, exclda, kf, exlda, nnx, nny, nnz, &
!$OMP&   nn2, exc0, q0, cjx) REDUCTION(+:ecii) 
#endif
      do cjy = 1, ny 
         do cjz = 1, nz 
            do cjx = 1, nx 
#ifdef _MPI_
               if(mod(cjy*cjz*cjx,npes)/=mype) cycle
#endif
               n = rho(cjx,cjy,cjz) 
               if (n .ge. rhomin) then 
 
                  rs = (3/(4*pi*n))**0.3333333333333333 
                  x = rs/11.4 
                  gxclda = 0.5*((1 + x**3)*log(1 + 1/x) - x**2 + x/2 - 1/3) 
                  exclda = (-0.458/rs) - 0.0666*gxclda 
 
                  kf = (3*pi*pi*n)**0.3333333333333333 
                  exlda = (-3)*e*kf/(4*pi) 
                  nn2 = dxrho(cjx,cjy,cjz)**2 + dyrho(cjx,cjy,cjz)**2 + dzrho(&
     &               cjx,cjy,cjz)**2 
 
! Eq.(12) of Dion PRL92,246401
                  exc0 = exclda - exlda*zab*nn2/(9*2*2*kf*kf*n*n) 
 
                  ecii = ecii + dv*0.5*n**2*eii(cjx,cjy,cjz)/(exc0*kf/exlda)**3 
 
               endif 
            end do 
         end do 
      end do 
 
#ifdef _MPI_
      if(npes>1) call mpi_allreduce(MPI_IN_PLACE,ecii,1,mpi_double_precision,mpi_sum,mpi_comm_world,ierr)
#endif
! Ecii is the non-local correlation energy E_c^nl summated for i = k.
!   write(*,*) '#',Ecii
 
   Ecnl = Ecik + Ecii
 
!   write(*,*) Ecik1,Ecik2
!   write(*,*) Ecik,Ecii,Ecnl
 
 
 
!####  Call cLDA and obtain the local correlation energy EcLDA ###
   Call cLDA(nx,ny,nz,rho,rhomin,dv,EcLDA)
!#################################################################
#ifdef _MPI_ 
   if (mype==0) then
#endif
   Write(*,*)  ' '
   Write(*,11) ExGGA
   Write(*,*)  ' '
   Write(*,12) EcLDA
   Write(*,13) Ecnl
   Write(*,14) EcLDA + Ecnl
   Write(*,*)  ' '
   Write(*,15) EcLDA + Ecnl + ExGGA
   Write(*,*)  ' '
 11    Format('E_total(GGA exchange)      = ',F19.13)

 12    Format('Ec(LDA)                    = ',F19.13)
 13    Format('Ec(nl)                     = ',F19.13)
 14    Format('Ec (= Ec(LDA) + Ec(nl) )   = ',F19.13)

 15    Format('E_total(vdW-DF)            = ',F19.13)

   Write(*,*)  '                  Given in Hartree atomic units'
   Write(*,*)  ' '
 
      Call CPU_TIME(time)
      ss=time
      hh=time/3600
      ss=Mod(ss,3600)
      mm=ss/60
      ss=Mod(ss,60)
      time=time-3600*hh-60*mm-ss
      Write(*,16) hh,mm,ss+time
 16    Format('# Calculation time ',I5,':',I2,':',F7.4)
#ifdef _MPI_
   endif
   call mpi_finalize(ierr)
#endif
 
! End of Dion_main
      End Program vdW

 
 
!** SUBROUTINE CPU_TIME ************************
      Subroutine CPU_TIME(time)
      Real(8) time
      Integer count,count_rate,hh,mm,ss
 
      Call System_clock(count,count_rate)
      If(count_rate.Eq.0) Then
         time=Real(count)/count_rate
      Else
         time=-1
      End If
 
      End Subroutine CPU_TIME
!** End SUBROUTINE CPU_TIME ********************




!** SUBROUTINE gauleg **************************
      Subroutine gauleg(x1,x2,n,xi,wi)
      Implicit none


!++++++++++++++++++++++++++ VARIABLES ++++++++++++++++++++++++++
      Integer  m,j,i,n
      Real(8)  x1,x2,z1,z,xm,xl,pp,p3,p2,p1,pi,eta
      Parameter (pi=3.1415926535897932 , eta=0.0000000001)

      Real(8)  ::  xi(n),wi(n)
!++++++++++++++++++++++++ end VARIABLES ++++++++++++++++++++++++



!---------------------- Calculation Start ----------------------
      m=(n+1)/2
      xm=0.5*(x2+x1)
      xl=0.5*(x2-x1)

      Do i=1,m
         z=COS(pi*(i-0.25)/(n+0.5))

         Do While (ABS(z-z1).GT.eta)
            p1=1.0
            p2=0.0

            Do j=1,n
               p3=p2
               P2=p1
               p1=((2.0*j-1.0)*z*p2-(j-1.0)*p3)/j
            End Do

            pp=n*(z*p1-p2)/(z*z-1.0)
            z1=z
            z=z1-p1/pp
            
         End Do

         xi(i) = xm-xl*z
         xi(n+1-i) = xm+xl*z
         wi(i) = 2.0*xl/((1.0-z*z)*pp*pp)
         wi(n+1-i) = wi(i)

      End Do
      End Subroutine gauleg
!** End SUBROUTINE gauleg **********************




!** SUBROUTINE Coef ****************************
      Subroutine Coef(nx,ny,nz,aa,eta1,eta2,C6)
      Implicit None

!************************ Note *********************************
! This Algorism calculates the coefficient of the asymptotic 
! function phi.
!
! Input
!   nx,ny,nz,ax,ay,az,eta1,eta2 : Information of the unit cell
!
! Output
!   C6                          : Coefficient of the asymptotic
! function of the core function phi.
!
!
!                            Written by Youky Ono in 2009/Sep.
!***************************************************************



!++++++++++++++++++++++++++ VARIABLES ++++++++++++++++++++++++++
      Integer  nx,ny,nz,nspin
      Real(8) f1,f2,f3,rxyz

      Integer  cir,cix,ciy,ciz&
&             ,ckr,ckx,cky,ckz
      Real(8)  di,dj,dk,dx,dy,dz,dv,aa(3,3)
      Integer  i,j,k,tx,ty,tz

      Real(8)  eta1,eta2

      Real(8) :: C6(-(nx-1):nx-1,-(ny-1):ny-1,-(nz-1):nz-1)
!++++++++++++++++++++++++ end VARIABLES ++++++++++++++++++++++++



!---------------------- Calculation Start ----------------------
! As Main program !
      Do cix = -(nx-1),nx-1
      Do ciy = -(ny-1),ny-1
      Do ciz = -(nz-1),nz-1
         C6(cix,ciy,ciz) = 0
      End Do
      End Do
      End Do

      Do ckx = -(nx-1),nx-1
      Do cky = -(ny-1),ny-1
      Do ckz = -(nz-1),nz-1
         Do tx=-1*eta2,eta2
         Do ty=-1*eta2,eta2
         Do tz=-1*eta2,eta2

            f1 = (REAL(ckx)/REAL(nx))+tx
            f2 = (REAL(cky)/REAL(ny))+ty
            f3 = (REAL(ckz)/REAL(nz))+tz

            rxyz = &
&             ((nx*f1*aa(1,1)+ny*f2*aa(2,1)+nz*f3*aa(3,1))**2 &
&             +(nx*f1*aa(1,2)+ny*f2*aa(2,2)+nz*f3*aa(3,2))**2 &
&             +(nx*f1*aa(1,3)+ny*f2*aa(2,3)+nz*f3*aa(3,3))**2)**0.5

         If(rxyz.GT.eta1) Then
            C6(ckx,cky,ckz) = C6(ckx,cky,ckz) + (rxyz**(-6))
         End If

         End Do
         End Do
         End Do
      End Do
      End Do
      End Do


    End Subroutine Coef
!** End SUBROUTINE Coef ************************



!** SUBROUTINE piDphi **************************
      Subroutine piDphi(nx,ny,nz,rho,dxrho,dyrho,dzrho,dv,etai,na,xi,wi,Eii)
      implicit none

!************************ Note *********************************
! This Algorism follows Dion's 1-shot method.
!
! This program is a subroutine.
! This program calculates the matrix elements of Eii(cjx,cjy,cjz).
!
! Input
!   rho(nrxyz,nsipn) : Total density
!
! Output
!   Eii(cjx,cjy,cjz) : Result of the numerical integral. 
!                      The integrand is 4*pi*(D**2)*phi.
!
!
!                            Written by Youky Ono in 2009/June.
!***************************************************************



!++++++++++++++++++++++++++ VARIABLES ++++++++++++++++++++++++++
      integer  nx,ny,nz,nspin
      real(8) da,db,a1,a2
      integer  ci,cj,ck,ca,cb,na

! Gauss-Legendre integration
      Real(8) xi(na),wi(na)

      integer cD,nD
      parameter (nD=75)
      real(8) maxD,minD,dD,LD,PLD,LDxi(nD),LDwi(nD)
      parameter (minD=0)

      integer  cjx,cjy,cjz
      real(8)  di,dj,dk,eta,etai,dx,dy,dz,dv,n,ni,nj,nk
      integer  i,j,k,zxp,zxm,zyp,zym,zzp,zzm
      parameter (eta=0.00000001)

! Unit cell information from 'TAPP'
      real(8) tappax,tappaa
      dimension tappaa(3,3)

      real(8) rs,x,nnx,nny,nnz,nn2,r
      real(8) nxp,nxm,nyp,nym,nzp,nzm
      real(8) zx(-3:3),zy(-3:3),zz(-3:3),rn(3,-3:3)
      real(8)  ::  rho(nx,ny,nz),Eii(nx,ny,nz) &
&        ,dxrho(nx,ny,nz),dyrho(nx,ny,nz),dzrho(nx,ny,nz) 

      real(8)  kF,exc0,exLDA,excLDA,q0,pi,Zab,wp,wq,q,m,h,e,GxcLDA
      real(8)  gamma,a,b,d,px
      real(8)  v1,v2,v3,v4
      parameter (pi=3.1415926535897932)
      parameter (e=1,m=1)
      parameter (Zab=-0.8491)

      real(8) T,W,phi,pphi

! Estimate running time
      real(8) time
      integer hh,mm,ss


!++++++++++++++++++++++++ end VARIABLES ++++++++++++++++++++++++



!---------------------- Calculation Start ----------------------
!      dv = dx*dy*dz
      gamma = 4*pi/9


!      Do cjx = 1,nx
!      Do cjy = 1,ny
!      Do cjz = 1,nz

!         Do j = -3,3
!            zx(j) = MOD(2*nx+(cjx+j)-1,nx)+1
!            zy(j) = MOD(2*ny+(cjy+j)-1,ny)+1
!            zz(j) = MOD(2*nz+(cjz+j)-1,nz)+1

!            rn(1,j) = rho(zx(j),cjy,cjz)
!            rn(2,j) = rho(cjx,zy(j),cjz)
!            rn(3,j) = rho(cjx,cjy,zz(j))
!         End Do

!         dxrho(cjx,cjy,cjz) = &
!&           (rn(1,3)-9*rn(1,2)+45*rn(1,1)-45*rn(1,-1)+9*rn(1,-2)-rn(1,-3))/(60*dx)
!         dyrho(cjx,cjy,cjz) = &
!&           (rn(2,3)-9*rn(2,2)+45*rn(2,1)-45*rn(2,-1)+9*rn(2,-2)-rn(2,-3))/(60*dy)
!         dzrho(cjx,cjy,cjz) = &
!&           (rn(3,3)-9*rn(3,2)+45*rn(3,1)-45*rn(3,-1)+9*rn(3,-2)-rn(3,-3))/(60*dz)

!      End Do
!      End Do
!      End Do


   do cjx=1,nx
   do cjy=1,ny
   do cjz=1,nz      

      Eii(cjx,cjy,cjz)=0
      n = rho(cjx,cjy,cjz)

      rs = (3/(4*pi*n))**0.3333333333333333
      x = rs/11.4
      GxcLDA = 0.5*((1+x**3)*LOG(1+1/x)-x**2+x/2-1/3)
      excLDA = -0.458/rs-0.0666*GxcLDA

      kF = (3*pi*pi*n)**0.3333333333333333
      exLDA = -3*e*kF/(4*pi)
      nnx = dxrho(cjx,cjy,cjz) 
      nny = dyrho(cjx,cjy,cjz) 
      nnz = dzrho(cjx,cjy,cjz) 
      nn2 = (nnx**2)+(nny**2)+(nnz**2)

! Eq.(12) of Dion PRL92,246401
      exc0 = excLDA - exLDA*Zab*nn2/(9*2*2*kF*kF*n*n)

! Eq.(11) of Dion PRL92,246401
      q0 = exc0*kF/exLDA

!      nD=NINT(q0*etai/dD)
      maxD=q0*etai



!############## Call gauleg for Gauss-Legendre integral ##########
      Call gauleg(minD,maxD,nD,LDxi,LDwi)
!#################################################################


      
      Do cD=1,nD
         LD = LDxi(cD)

      phi = 0
      Do ca=1,na
      Do cb=1,ca

         a = xi(ca)
         b = xi(cb)

         h = 1-EXP(-gamma*((a/LD)**2))
         v1 = (a**2)/(2*h)

         h = 1-EXP(-gamma*((b/LD)**2))
         v2 = (b**2)/(2*h)

         h = 1-EXP(-gamma*((a/LD)**2))
         v3 = (a**2)/(2*h)

         h = 1-EXP(-gamma*((b/LD)**2))
         v4 = (b**2)/(2*h)

         W = 2*((3-a*a)*b*COS(b)*SIN(a) + (3-b*b)*a*COS(a)*SIN(b) &
&            + (a*a+b*b-3)*SIN(a)*SIN(b) &
&            -3*a*b*COS(a)*COS(b))/((a*b)**3)

! ((a*b)**2)*T
         T = ((a*b)**2) &
&            * 0.5*(1/(v1+v2) + 1/(v3+v4) ) &
&            * (1/((v1+v3)*(v2+v4)) + 1/((v1+v4)*(v2+v3)))

         phi = phi + wi(ca)*wi(cb)*W*T

      End Do
      End Do

      phi = phi * 2

      Do ca=1,na
         cb=ca
         a = xi(ca)
         b = xi(cb)

         h = 1-EXP(-gamma*((a/LD)**2))
         v1 = (a**2)/(2*h)

         h = 1-EXP(-gamma*((b/LD)**2))
         v2 = (b**2)/(2*h)

         h = 1-EXP(-gamma*((a/LD)**2))
         v3 = (a**2)/(2*h)

         h = 1-EXP(-gamma*((b/LD)**2))
         v4 = (b**2)/(2*h)

         W = 2*((3-a*a)*b*COS(b)*SIN(a) + (3-b*b)*a*COS(a)*SIN(b) &
&            + (a*a+b*b-3)*SIN(a)*SIN(b) &
&            -3*a*b*COS(a)*COS(b))/((a*b)**3)

! ((a*b)**2)*T
         T = ((a*b)**2) &
&            * 0.5*(1/(v1+v2) + 1/(v3+v4) ) &
&            * (1/((v1+v3)*(v2+v4)) + 1/((v1+v4)*(v2+v3)))

         phi = phi - wi(ca)*wi(cb)*W*T

      End Do
      

      phi = phi * 2*m*(e**4)/(pi**2)

      Eii(cjx,cjy,cjz) = Eii(cjx,cjy,cjz) &
&                        + LDwi(cD)*4*pi*(LD**2)*phi

      End Do

   end do
   end do
   end do

      end Subroutine piDphi
!** End SUBROUTINE piphi ***********************




!** SUBROUTINE cLDA ****************************
      Subroutine cLDA(nx,ny,nz,rho,rhomin,dv,EcLDA)
      implicit none

!************************ Note *********************************
! This Algorism follows Dion's 1-shot method.
!
! This program is a subroutine.
! This program calculates the correlation energy from LDA.
! The formula is given at Eq.(58) (p.93) of 'Theory of the 
!   Inhomogeneous Electron Gas' Lundqvist, March.
! 
!
! Input
!   rho(nrxyz,nsipn) : Total density
!
! Output
!   EcLDA : Corrilation energy from LDA.
!
!
!                            Written by Youky Ono in 2009/Jul.
!***************************************************************



!++++++++++++++++++++++++++ VARIABLES ++++++++++++++++++++++++++
      integer  nx,ny,nz,nspin
      integer  ci,cj,ck,cjx,cjy,cjz
      real(8)  dx,dy,dz,dv,eta
      integer  i,j,k
      parameter (eta=0.00000001)

      real(8) rs,x,aB,ec,EcLDA,n,A,C,rhomin
      parameter (A=21,C=0.045)
      real(8)  ::  rho(nx,ny,nz)
      parameter (aB=1) ! aB=1[a.u.]

      real(8)  pi,e,m
      parameter (pi=3.1415926535897932)
      parameter (e=1,m=1) ! Hatree atomic unit

!++++++++++++++++++++++++ end VARIABLES ++++++++++++++++++++++++



!---------------------- Calculation Start ----------------------
!      dv = dx*dy*dz

      EcLDA=0
   do cjx=1,nx
   do cjy=1,ny
   do cjz=1,nz      
      n = rho(cjx,cjy,cjz)

   if(n.GE.rhomin) then

      rs = ((3/(4*pi*n))**0.3333333333333333)/aB
      x = rs/11.4
      ec = -0.0666*0.5*((1+x**3)*LOG(1+1/x)-x**2+x/2-1/3)

      EcLDA = EcLDA + dv*n*ec

   end if
   end do
   end do
   end do


      end Subroutine cLDA
!** End SUBROUTINE cLDA **********************

#ifdef _MPI_
    SUBROUTINE initialize_mpi(npes,mype)
       implicit none
       integer,intent(out) :: npes
       integer,intent(out) :: mype
       include 'mpif.h'
       integer :: ierr
       call mpi_init(ierr)
       call mpi_comm_size(mpi_comm_world,npes,ierr)
       call mpi_comm_rank(mpi_comm_world,mype,ierr)
       if(ierr/=0) then
          write(6,'(a,i5)') 'failed initialization of MPI; error code : ',ierr
          stop
       endif
    end SUBROUTINE initialize_mpi
#endif

