phase0_2015.01.01.patch

differences between files icon phase0_2015.01.01.patch — differences between files, 1.03 MB (1081551 bytes)
ファイルコンテンツ

diff -uprN phase0_2015.01/src_phase/ChargeDensity_Construction.F90 phase0_2015.01.01/src_phase/ChargeDensity_Construction.F90
--- phase0_2015.01/src_phase/ChargeDensity_Construction.F90	2015-09-14 15:16:01.000000000 +0900
+++ phase0_2015.01.01/src_phase/ChargeDensity_Construction.F90	2016-07-12 12:51:19.000000000 +0900
@@ -1,6 +1,6 @@
 !=======================================================================
 !
-!  PROGRAM  PHASE/0 2015.01 ($Rev: 455 $)
+!  PROGRAM  PHASE/0 2015.01 ($Rev: 494 $)
 !
 !  SUBROUINE: ChargeDensity_Construction, FermiEnergyLevel, 
 !             CD_Softpart_plus_Hardpart
@@ -32,7 +32,7 @@
 !   The activity of development of this program set has been supervised by Takahisa Ohno.
 !
 subroutine ChargeDensity_Construction(ic)
-! $Id: ChargeDensity_Construction.F90 455 2015-09-07 08:04:26Z yamasaki $
+! $Id: ChargeDensity_Construction.F90 494 2016-06-02 00:54:16Z jkoga $
   use m_Total_Energy,         only : m_TE_total_energy, ehartr
   use m_Charge_Density,       only : m_CD_convergence_check &
        &                           , m_CD_softpart, m_CD_hardpart &
@@ -73,8 +73,7 @@ subroutine ChargeDensity_Construction(ic
 ! ========================================================================= 11.0
 
 
-  use m_PlaneWaveBasisSet,       only : kgp,ngabc,igfp_l
-  use m_Electronic_Structure,    only : vlhxc_l 
+  use m_PlaneWaveBasisSet,       only : kgp
   use m_FFT,                     only : fft_box_size_CD
 
 
@@ -111,6 +110,12 @@ subroutine ChargeDensity_Construction(ic
        &                          m_OP_calc_OrbMagMom_method2
 ! ============== 2014/08/26
 
+! ==== Positron SCF === 2015/11/28
+  use m_Control_Parameters,  only : sw_positron, positron_method
+  use m_Const_Parameters,    only : positron_CONV
+  use m_Positron_Wave_Functions, only : m_pWF_construct_pcharge, m_pWF_charge_rspace
+! ===================== 2015/11/28
+
   implicit none
   include 'mpif.h'
 
@@ -138,12 +143,14 @@ subroutine ChargeDensity_Construction(ic
 ! =================== added by K. Tagami ============ 5.0
   if ( sw_eval_energy_before_charge == ON ) then
 ! ================================= modified by K.Tagami ====== 11.0
+!     call m_TE_total_energy(nfout,display_on,kv3)
+!
      if ( noncol ) then
         call m_TE_total_energy_noncl(nfout,display_on,kv3)
-! ============================================================= 11.0
      else
         call m_TE_total_energy(nfout,display_on,kv3)
      endif
+! ============================================================= 11.0
   endif
 ! =================================================== 5.0
 
@@ -202,16 +209,20 @@ subroutine ChargeDensity_Construction(ic
   if(sw_esm==ON)then
      nfftcd = fft_box_size_CD(1,0)*fft_box_size_CD(2,0)*fft_box_size_CD(3,0)
      allocate(vhar(nfftcd));vhar=(0.d0,0.d0)
-     allocate(chgc(iend_kngp-ista_kngp+1,nspin));chgc=(0.d0,0.d0)
+!     allocate(chgc(iend_kngp-ista_kngp+1,nspin));chgc=(0.d0,0.d0)
+     allocate(chgc(1:kgp,nspin));chgc=(0.d0,0.d0)
      if(kimg==1)then
         do ig=ista_kngp,iend_kngp
-           chgc(ig-ista_kngp+1,1:nspin) = dcmplx(chgq_l(ig,1,1:nspin),0.d0)
+!           chgc(ig-ista_kngp+1,1:nspin) = dcmplx(chgq_l(ig,1,1:nspin),0.d0)
+           chgc(ig,1:nspin) = dcmplx(chgq_l(ig,1,1:nspin),0.d0)
         enddo
      else
         do ig=ista_kngp,iend_kngp
-           chgc(ig-ista_kngp+1,1:nspin) = dcmplx(chgq_l(ig,1,1:nspin),chgq_l(ig,2,1:nspin))
+!           chgc(ig-ista_kngp+1,1:nspin) = dcmplx(chgq_l(ig,1,1:nspin),chgq_l(ig,2,1:nspin))
+           chgc(ig,1:nspin) = dcmplx(chgq_l(ig,1,1:nspin),chgq_l(ig,2,1:nspin))
         enddo
      endif
+     call mpi_allreduce(mpi_in_place,chgc,kgp*nspin,mpi_double_complex,mpi_sum,mpi_comm_group,ierr)
      call esm_hartree(chgc,ehartr,vhar)
      ehartr  = 0.5d0*ehartr  !Ry -> Ha
      deallocate(chgc)
@@ -249,14 +260,21 @@ subroutine ChargeDensity_Construction(ic
   endif
 ! ============== 2014/08/26
 
+  if ( sw_positron /= OFF ) then
+     if ( positron_method /= Positron_CONV ) then
+        call m_pWF_construct_pcharge
+        call m_pWF_charge_rspace
+     endif
+  endif
+
   if ( sw_eval_energy_before_charge == OFF ) then
 ! ================================== modified by K. Tagami ================ 11.0
      if ( noncol ) then
         call m_TE_total_energy_noncl(nfout,display_on,kv3)
      else
-! ========================================================================== 11.0
         call m_TE_total_energy(nfout,display_on,kv3)
      endif
+! ========================================================================== 11.0
   endif
 ! ======================================================== 5.0
 
@@ -296,7 +314,7 @@ contains
   end subroutine FermiEnergyLevel
 
   subroutine CD_Softpart_plus_Hardpart
-! $Id: ChargeDensity_Construction.F90 455 2015-09-07 08:04:26Z yamasaki $
+! $Id: ChargeDensity_Construction.F90 494 2016-06-02 00:54:16Z jkoga $
 !fj#ifdef __TIMER_SUB__
 !fj  call timer_sta(716)
 !fj#endif
diff -uprN phase0_2015.01/src_phase/ChargeDensity_Mixing.F90 phase0_2015.01.01/src_phase/ChargeDensity_Mixing.F90
--- phase0_2015.01/src_phase/ChargeDensity_Mixing.F90	2015-08-05 14:45:21.000000000 +0900
+++ phase0_2015.01.01/src_phase/ChargeDensity_Mixing.F90	2016-07-12 12:51:19.000000000 +0900
@@ -1,6 +1,6 @@
 !=======================================================================
 !
-!  PROGRAM  PHASE/0 2015.01 ($Rev: 440 $)
+!  PROGRAM  PHASE/0 2015.01 ($Rev: 492 $)
 !
 !  SUBROUINE: ChargeDensity_Mixing
 !
@@ -31,7 +31,7 @@
 !   The activity of development of this program set has been supervised by Takahisa Ohno.
 !
 subroutine ChargeDensity_Mixing
-! $Id: ChargeDensity_Mixing.F90 440 2015-08-03 07:18:01Z ktagami $
+! $Id: ChargeDensity_Mixing.F90 492 2016-05-31 03:06:04Z jkoga $
   use m_Const_Parameters,    only : DP,SIMPLE,BROYD1,BROYD2,DFP,PULAY,RMM2P,ON &
        &                          , FIXED_CHARGE, FIXED_CHARGE_CONTINUATION, CONTINUATION, SKIP
   use m_Charge_Density,      only : m_CD_check
@@ -49,7 +49,7 @@ subroutine ChargeDensity_Mixing
        &                          , m_CtrlP_set_rmx &
        &                          , m_CtrlP_waymix_now &
        &                          , m_CtrlP_set_mix_parameter &
-       &                          , sw_hubbard
+       &                          , sw_hubbard, sw_mix_occ_matrix
   use m_Files,               only : nfout
   use m_IterationNumbers,    only : iteration_electronic, iteration_ionic &
        &                          , m_Iter_cmix_reset
@@ -305,10 +305,10 @@ contains
     case (SIMPLE)
        call m_CD_simple_mixing( nfout,rmxt_tot )
        call m_CD_simple_mixing_hard( nfout, rmxt_hard )
-       if ( sw_hubbard == ON ) then
+       if ( sw_hubbard == ON .and.sw_mix_occ_matrix==OFF ) then
           call Renewal_of_OccMat( .false., ON )           ! hsr --> om 
-          call m_OP_cp_om_to_ommix( nfout, rmxt_hard )       ! om --> ommix
        endif
+       if(sw_hubbard==ON) call m_OP_cp_om_to_ommix( nfout, rmxt_hard )       ! om --> ommix
 
     case (BROYD1)
 !       call m_CD_mix_broyden1_with_hsr(rmxt_tot)
@@ -316,22 +316,21 @@ contains
        stop
 
     case (BROYD2)
-       call m_CD_mix_broyden2_with_hsr(nfout,rmxt_tot)
-       if ( sw_hubbard == ON ) then
+       call m_CD_mix_broyden2_with_hsr(nfout,rmxt_tot,sw_mix_occ_matrix==ON)
+       if ( sw_hubbard == ON .and. sw_mix_occ_matrix==OFF ) then
           call Renewal_of_OccMat(.false., ON )           ! hsr --> om 
-          call m_OP_cp_om_to_ommix( nfout, rmxt_hard )       ! om --> ommix
        endif
-
+       if(sw_hubbard==ON) call m_OP_cp_om_to_ommix( nfout, rmxt_hard )      ! om --> ommix
     case (DFP)
 !       call m_CD_mix_DFP(rmxt_tot)
        write(*,*) 'Not supported '
        stop
     case (PULAY)
-       call m_CD_mix_pulay_with_hsr(nfout,rmxt_tot)
-       if ( sw_hubbard == ON ) then
+       call m_CD_mix_pulay_with_hsr(nfout,rmxt_tot,sw_mix_occ_matrix==ON)
+       if ( sw_hubbard == ON .and. sw_mix_occ_matrix==OFF ) then
           call Renewal_of_OccMat(.false., ON )           ! hsr --> om 
-          call m_OP_cp_om_to_ommix( nfout, rmxt_hard )       ! om --> ommix
        endif
+       if(sw_hubbard==ON) call m_OP_cp_om_to_ommix( nfout, rmxt_hard )      ! om --> ommix
     case default
        stop ' ! waymix is invalid'
     end select mixing_way
@@ -349,13 +348,13 @@ contains
        endif
 
     case (BROYD2)
-       call m_CD_mix_broyden2_intg(nfout,rmxt_tot)
+       call m_CD_mix_broyden2_intg(nfout,rmxt_tot,sw_mix_occ_matrix==ON)
        if ( sw_hubbard == ON ) then
           call Renewal_of_OccMat(.false., ON )           ! hsr --> om
           call m_OP_cp_om_to_ommix( nfout, rmxt_hard )       ! om --> ommix
        endif
     case (PULAY)
-       call m_CD_mix_pulay_intg(nfout,rmxt_tot)
+       call m_CD_mix_pulay_intg(nfout,rmxt_tot,sw_mix_occ_matrix==ON)
        if ( sw_hubbard == ON ) then
           call Renewal_of_OccMat(.false., ON )           ! hsr --> om
           call m_OP_cp_om_to_ommix( nfout, rmxt_hard )       ! om --> ommix
diff -uprN phase0_2015.01/src_phase/Convergence_Check.F90 phase0_2015.01.01/src_phase/Convergence_Check.F90
--- phase0_2015.01/src_phase/Convergence_Check.F90	2015-08-05 14:45:21.000000000 +0900
+++ phase0_2015.01.01/src_phase/Convergence_Check.F90	2016-07-12 12:51:19.000000000 +0900
@@ -1,6 +1,6 @@
 !=======================================================================
 !
-!  PROGRAM  PHASE/0 2015.01 ($Rev: 440 $)
+!  PROGRAM  PHASE/0 2015.01 ($Rev: 472 $)
 !
 !  FUNCTION:  Ending_Time(), ckiter(), ChargeDensity_is_Converged(), 
 !             TotalEnergy_is_Divergent(), Forces_are_Converged(), 
@@ -37,7 +37,7 @@
 !   The activity of development of this program set has been supervised by Takahisa Ohno.
 !
 logical function Ending_Time()
-! $Id: Convergence_Check.F90 440 2015-08-03 07:18:01Z ktagami $
+! $Id: Convergence_Check.F90 472 2015-11-28 09:01:17Z ktagami $
   use m_Const_Parameters,   only : INITIAL, CONTINUATION, FIXED_CHARGE, FIXED_CHARGE_CONTINUATION
   use m_Files,              only : nfstop,nfout,m_Files_open_nfstop,m_Files_close_nfstop
   use m_IterationNumbers,   only : iteration, iteration_ionic, iteration_electronic
@@ -45,6 +45,7 @@ logical function Ending_Time()
        &                         , max_TS_iteration_is_given, max_mdstep_is_given, printable &
        &                         , m_CtrlP_ckcput, m_CtrlP_rd_istop
   use m_Parallelization,    only : mype,mpi_comm_group
+
   implicit none
   include 'mpif.h'                                      ! MPI
 
@@ -209,6 +210,12 @@ logical function ChargeDensity_is_Conver
        &                             truncate_vxw_updating, sw_update_vxw, oneshot
 ! ======================================================================== 12.5Exp
 
+! === Postitron SCF === 2015/11/28
+  use m_Control_Parameters,  only : sw_positron, positron_method
+  use m_Const_Parameters,   only : positron_GGGC
+  use m_Positron_Wave_Functions,  only : m_pWF_update_lifetime
+! ===================== 2015/11/28
+
   implicit none
   logical, save :: renew_wf_again = .false.
   logical :: EigenValues_are_Converged
@@ -320,6 +327,10 @@ logical function ChargeDensity_is_Conver
 ! <--
      end if
 
+     if ( sw_positron /= OFF ) then
+        if ( positron_method == positron_GGGC ) call m_pWF_update_lifetime
+     endif
+
 ! =========================== KT_Test ================= 12.5Exp
      if ( sw_hybrid_functional == ON ) then
         if ( truncate_vxw_updating .and. sw_update_vxw == OFF ) then
@@ -1136,6 +1147,38 @@ logical function Positron_Defect()
   end if
 end function Positron_Defect
 
+! ==== Positron SCF === 2015/11/28
+logical function Positron_scf()
+  use m_Const_Parameters, only :   OFF, Positron_CONV
+  use m_Control_Parameters, only : sw_positron, positron_method
+
+  if ( sw_positron /= OFF ) then
+     if ( positron_method == Positron_CONV ) then
+        Positron_scf = .false.
+     else
+        Positron_scf = .true.
+     end if
+  else
+     Positron_scf = .false.
+  endif
+end function Positron_scf
+
+logical function Positron_nonscf()
+  use m_Const_Parameters, only :   OFF, Positron_CONV
+  use m_Control_Parameters, only : sw_positron, positron_method
+
+  if ( sw_positron /= OFF ) then
+     if ( positron_method == Positron_CONV ) then
+        Positron_nonscf = .true.
+     else
+        Positron_nonscf = .false.
+     end if
+  else
+     Positron_nonscf = .false.
+  endif
+end function Positron_nonscf
+! ========== 2015/11/28
+
 logical function Structure_is_fixed()
   ! Coded by T. Yamasaki, 25 Jul. 2008
   use m_Const_Parameters, only :   ON, VERLET
diff -uprN phase0_2015.01/src_phase/EsmPack/Esm.F90 phase0_2015.01.01/src_phase/EsmPack/Esm.F90
--- phase0_2015.01/src_phase/EsmPack/Esm.F90	2015-08-05 14:45:21.000000000 +0900
+++ phase0_2015.01.01/src_phase/EsmPack/Esm.F90	2016-07-12 12:51:19.000000000 +0900
@@ -227,9 +227,9 @@ END SUBROUTINE esm_ggen_2d
 SUBROUTINE esm_hartree (rhog, ehart, aux )
   Use ESM_VARS
   IMPLICIT NONE
-#ifdef __MPI__
-  include 'mpif.h'
-#endif
+!#ifdef __MPI__
+!  include 'mpif.h'
+!#endif
   !
   COMPLEX(8) :: rhog(ngm,nspin)   !  n(G)
   REAL(8),    Intent(Out) :: ehart             !  Hartree energy
@@ -339,22 +339,22 @@ SUBROUTINE esm_hartree (rhog, ehart, aux
         
      vg3(1:nr3x,ng_2d)=vg2(1:nr3x)*2.d0
   enddo
-#ifdef __MPI__
-  n1h=nr1x/2;n2h=nr2x/2
-  allocate(vg3_mpi(1:nr3x,-n2h:n2h,-n1h:n1h));vg3_mpi(:,:,:)=(0.d0,0.d0)
-  do ng_2d=1,ngm_2d
-     k1 = mill_2d(1,ng_2d)
-     k2 = mill_2d(2,ng_2d)
-     vg3_mpi(:,k2,k1) = vg3(:,ng_2d)
-  enddo
-  call mpi_allreduce(MPI_IN_PLACE,vg3_mpi,nr3x*nr2x*nr1x,mpi_double_complex,mpi_sum,communicator,ierr)
-  do ng_2d=1,ngm_2d
-     k1 = mill_2d(1,ng_2d)
-     k2 = mill_2d(2,ng_2d)
-     vg3(:,ng_2d) = vg3_mpi(:,k2,k1)
-  enddo
-  deallocate(vg3_mpi)
-#endif
+!#ifdef __MPI__
+!  n1h=nr1x/2;n2h=nr2x/2
+!  allocate(vg3_mpi(1:nr3x,-n2h:n2h,-n1h:n1h));vg3_mpi(:,:,:)=(0.d0,0.d0)
+!  do ng_2d=1,ngm_2d
+!     k1 = mill_2d(1,ng_2d)
+!     k2 = mill_2d(2,ng_2d)
+!     vg3_mpi(:,k2,k1) = vg3(:,ng_2d)
+!  enddo
+!  call mpi_allreduce(MPI_IN_PLACE,vg3_mpi,nr3x*nr2x*nr1x,mpi_double_complex,mpi_sum,communicator,ierr)
+!  do ng_2d=1,ngm_2d
+!     k1 = mill_2d(1,ng_2d)
+!     k2 = mill_2d(2,ng_2d)
+!     vg3(:,ng_2d) = vg3_mpi(:,k2,k1)
+!  enddo
+!  deallocate(vg3_mpi)
+!#endif
   deallocate(vg2,vg2_in)
 !$omp end parallel  
 
@@ -507,9 +507,9 @@ SUBROUTINE esm_hartree (rhog, ehart, aux
      
      vg3(1:nr3x,ng_2d)=vg2(1:nr3x)*2.d0
      
-#ifdef __MPI__
-  call mpi_allreduce(MPI_IN_PLACE,vg3(1,ng_2d),nr3x,mpi_double_complex,mpi_sum,communicator,ierr)
-#endif
+!#ifdef __MPI__
+!  call mpi_allreduce(MPI_IN_PLACE,vg3(1,ng_2d),nr3x,mpi_double_complex,mpi_sum,communicator,ierr)
+!#endif
      deallocate(vg2,vg2_in)
   endif ! if( ng_2d > 0 )
   
@@ -537,9 +537,9 @@ SUBROUTINE esm_hartree (rhog, ehart, aux
 #ifdef __PARA
   call mp_sum( ehart, intra_pool_comm )
 #endif
-#ifdef __MPI__
-  call mpi_allreduce(MPI_IN_PLACE,ehart,1,mpi_double_precision,mpi_sum,communicator,ierr)
-#endif
+!#ifdef __MPI__
+!  call mpi_allreduce(MPI_IN_PLACE,ehart,1,mpi_double_precision,mpi_sum,communicator,ierr)
+!#endif
 
 ! Map to FFT mesh (nrxx)
   aux=0.0d0
@@ -594,7 +594,6 @@ SUBROUTINE esm_ewald ( charge, alpha, ew
   !
   !    here the local variables
   !
-  real(8), external      :: qe_erfc, qe_erf
   real(8)                :: gp2, t(2), gp, sa, z1, z0, L
   integer                 :: k1, k2, k3, ipol, it1, it2, ng_2d
   real(8) :: tt, z, zp, kk1, kk2, g, cc1, cc2, arg1, arg2, t1, t2, ff, argmax, ew
@@ -640,7 +639,7 @@ SUBROUTINE esm_ewald ( charge, alpha, ew
      
      tt=upf_zp(it1)*upf_zp(it2)*2.0*pi/sa
      
-     kk1=0.5d0*(-(z-zp)*qe_erf(g*(z-zp))-exp(-g**2*(z-zp)**2)/g/sqrt(pi))
+     kk1=0.5d0*(-(z-zp)*erf(g*(z-zp))-exp(-g**2*(z-zp)**2)/g/sqrt(pi))
      
      if (esm_bc.eq.'bc1') then
         kk2=0.d0
@@ -668,8 +667,8 @@ SUBROUTINE esm_ewald ( charge, alpha, ew
            arg2= gp*(z-zp)
            arg1=min(arg1,argmax)
            arg2=min(arg2,argmax)
-           t1=exp(arg1)*qe_erfc(gp/2.d0/g-g*(z-zp))
-           t2=exp(arg2)*qe_erfc(gp/2.d0/g+g*(z-zp))
+           t1=exp(arg1)*erfc(gp/2.d0/g-g*(z-zp))
+           t2=exp(arg2)*erfc(gp/2.d0/g+g*(z-zp))
            cc1=cc1+(t1+t2)/4.d0/gp
            
            if (esm_bc.eq.'bc1') then
@@ -708,8 +707,8 @@ SUBROUTINE esm_ewald ( charge, alpha, ew
            arg2= gp*(z-zp)
            arg1=min(arg1,argmax)
            arg2=min(arg2,argmax)
-           t1=exp(arg1)*qe_erfc(gp/2.d0/g-g*(z-zp))
-           t2=exp(arg2)*qe_erfc(gp/2.d0/g+g*(z-zp))
+           t1=exp(arg1)*erfc(gp/2.d0/g-g*(z-zp))
+           t2=exp(arg2)*erfc(gp/2.d0/g+g*(z-zp))
            cc1=cc1+cos(ff)*(t1+t2)/4.d0/gp
            
            if (esm_bc.eq.'bc1') then
@@ -791,8 +790,6 @@ subroutine esm_local_(nrx,aux,natm,ngaus
   !    here the local variables
   !
   complex(8),allocatable :: vloc3(:,:),vg2(:),vg2_in(:)
-!  real(8), external      :: qe_erf, qe_erfc
-  real(8)      :: qe_erf, qe_erfc
   real(8)                :: t(3),tt,gp,gp2,sa,z1,z0,pp,cc,ss,t1,t2, &
                             z,zp,arg11,arg12,arg21,arg22,v0,tmp,L,argmax, &
                             z_l,z_r
@@ -849,8 +846,8 @@ subroutine esm_local_(nrx,aux,natm,ngaus
               arg21= gp*(z-zp)
               arg21=min(arg21,argmax)
               arg22= gp/2.d0/tmp+tmp*(z-zp)
-              t1=exp(arg11)*qe_erfc(arg12)
-              t2=exp(arg21)*qe_erfc(arg22)
+              t1=exp(arg11)*erfc(arg12)
+              t2=exp(arg21)*erfc(arg22)
               cc1=cc1+bgauss(it,ig)*cs*(t1+t2)/4.d0/gp
            enddo
            if (esm_bc.eq.'bc1') then
@@ -909,7 +906,7 @@ subroutine esm_local_(nrx,aux,natm,ngaus
            cc1=(0.d0,0.d0) 
            do ig=1,ngauss
               tmp=sqrt(agauss(it,ig))
-              cc1=cc1+bgauss(it,ig)*0.5d0*(-(z-zp)*qe_erf(tmp*(z-zp)) &
+              cc1=cc1+bgauss(it,ig)*0.5d0*(-(z-zp)*erf(tmp*(z-zp)) &
                    -exp(-tmp**2*(z-zp)**2)/tmp/sqrt(pi))
            enddo
            if (esm_bc.eq.'bc1') then
@@ -924,12 +921,12 @@ subroutine esm_local_(nrx,aux,natm,ngaus
      ! smoothing cell edge potential (avoiding unphysical oscillation)
         do ig=1,ngauss
            tmp=sqrt(agauss(it,ig))
-           f1=f1+tt*bgauss(it,ig)*0.5d0*(-(z_r-zp)*qe_erf(tmp*(z_r-zp)) &
+           f1=f1+tt*bgauss(it,ig)*0.5d0*(-(z_r-zp)*erf(tmp*(z_r-zp)) &
                 -exp(-tmp**2*(z_r-zp)**2)/tmp/sqrt(pi))
-           f2=f2+tt*bgauss(it,ig)*0.5d0*(-(z_l-zp)*qe_erf(tmp*(z_l-zp)) &
+           f2=f2+tt*bgauss(it,ig)*0.5d0*(-(z_l-zp)*erf(tmp*(z_l-zp)) &
                 -exp(-tmp**2*(z_l-zp)**2)/tmp/sqrt(pi))
-           f3=f3-tt*bgauss(it,ig)*0.5d0*qe_erf(tmp*(z_r-zp))
-           f4=f4-tt*bgauss(it,ig)*0.5d0*qe_erf(tmp*(z_l-zp))
+           f3=f3-tt*bgauss(it,ig)*0.5d0*erf(tmp*(z_r-zp))
+           f4=f4-tt*bgauss(it,ig)*0.5d0*erf(tmp*(z_l-zp))
         enddo
         if(esm_bc.eq.'bc1')then
            f1=f1+tt*0.d0
@@ -1004,7 +1001,6 @@ subroutine esm_force_ew ( alpha, forceio
   !
   !    here the local variables
   !
-  real(8), external      :: qe_erfc, qe_erf
   integer  :: it1, it2, ipol, k1, k2, k3, ng_2d
   integer  :: nth, ith, omp_get_num_threads, omp_get_thread_num
   real(8) :: t1_for, t2_for, z, zp, kk1_for, kk2_for, g, gp2, gp, z1, t(2), L
@@ -1052,7 +1048,7 @@ subroutine esm_force_ew ( alpha, forceio
      endif
      t2_for=upf_zp(it1)*upf_zp(it2)*fpi/sa
 
-     kk1_for=0.5d0*qe_erf(g*(z-zp))
+     kk1_for=0.5d0*erf(g*(z-zp))
      if (esm_bc.eq.'bc1') then
         kk2_for=0.d0
      else if (esm_bc.eq.'bc2') then
@@ -1078,8 +1074,8 @@ subroutine esm_force_ew ( alpha, forceio
            arg2= gp*(z-zp)
            arg1=min(arg1,argmax)
            arg2=min(arg2,argmax)
-           t1=exp(arg1)*qe_erfc(gp/2.d0/g-g*(z-zp))
-           t2=exp(arg2)*qe_erfc(gp/2.d0/g+g*(z-zp))
+           t1=exp(arg1)*erfc(gp/2.d0/g-g*(z-zp))
+           t2=exp(arg2)*erfc(gp/2.d0/g+g*(z-zp))
            cz1_for=0.d0
            if (esm_bc.eq.'bc1') then      
               cz2_for=0.d0
@@ -1119,8 +1115,8 @@ subroutine esm_force_ew ( alpha, forceio
            arg2= gp*(z-zp)
            arg1=min(arg1,argmax)
            arg2=min(arg2,argmax)
-           t1=exp(arg1)*qe_erfc(gp/2.d0/g-g*(z-zp))
-           t2=exp(arg2)*qe_erfc(gp/2.d0/g+g*(z-zp))
+           t1=exp(arg1)*erfc(gp/2.d0/g-g*(z-zp))
+           t2=exp(arg2)*erfc(gp/2.d0/g+g*(z-zp))
            
            cx1_for=cx1_for+sin(ff)*(t1+t2)/4.d0/gp*k1
            cy1_for=cy1_for+sin(ff)*(t1+t2)/4.d0/gp*k2
@@ -1175,8 +1171,8 @@ subroutine esm_force_ew ( alpha, forceio
            arg2= gp*(z-zp)
            arg1=min(arg1,argmax)
            arg2=min(arg2,argmax)
-           t1=exp(arg1)*qe_erfc(gp/2.d0/g-g*(z-zp))
-           t2=exp(arg2)*qe_erfc(gp/2.d0/g+g*(z-zp))
+           t1=exp(arg1)*erfc(gp/2.d0/g-g*(z-zp))
+           t2=exp(arg2)*erfc(gp/2.d0/g+g*(z-zp))
            
            cx1_for=cx1_for+sin(ff)*(t1+t2)/4.d0/gp*k1
            cy1_for=cy1_for+sin(ff)*(t1+t2)/4.d0/gp*k2
@@ -1276,7 +1272,6 @@ subroutine esm_force_lc_ (nrx, aux, natm
   !
   complex(8),allocatable :: vlocx(:), vlocy(:), vlocdz(:)
   real(8),allocatable    :: for(:,:),for_g(:,:)
-  real(8), external      :: qe_erf, qe_erfc
   real(8)                :: t(3),tt,gp,gp2,sa,z1,z0,pp,cc,ss,t1,t2,z,zp,L,forcelc2(3,nat)
   real(8)                :: arg11,arg12,arg21,arg22,tmp,r1,r2,fx1,fy1,fz1,fx2,fy2,fz2,argmax
   integer                 :: iz,ig,it,ipol,k1,k2,k3,ng,n1,n2,n3,ng_2d
@@ -1357,8 +1352,8 @@ subroutine esm_force_lc_ (nrx, aux, natm
               arg21= gp*(z-zp)
               arg21=min(arg21,argmax)
               arg22= gp/2.d0/tmp+tmp*(z-zp)
-              t1=exp(arg11)*qe_erfc(arg12)
-              t2=exp(arg21)*qe_erfc(arg22)
+              t1=exp(arg11)*erfc(arg12)
+              t2=exp(arg21)*erfc(arg22)
               cx1=cx1+bgauss(it,ig)*CMPLX(ss, -cc, kind=8) &
                    *(t1+t2)/4.d0/gp*k1
               cy1=cy1+bgauss(it,ig)*CMPLX(ss, -cc, kind=8) &
@@ -1440,7 +1435,7 @@ subroutine esm_force_lc_ (nrx, aux, natm
            cc1=(0.d0,0.d0)
            do ig=1,ngauss
               tmp=sqrt(agauss(it,ig))
-              cc1=cc1+bgauss(it,ig)*(0.5d0*qe_erf(tmp*(z-zp)))
+              cc1=cc1+bgauss(it,ig)*(0.5d0*erf(tmp*(z-zp)))
            enddo
            if (esm_bc.eq.'bc1') then
               cc2=(0.d0,0.d0)
diff -uprN phase0_2015.01/src_phase/EsmPack/Makefile phase0_2015.01.01/src_phase/EsmPack/Makefile
--- phase0_2015.01/src_phase/EsmPack/Makefile	2015-08-05 14:45:21.000000000 +0900
+++ phase0_2015.01.01/src_phase/EsmPack/Makefile	2016-07-12 12:51:19.000000000 +0900
@@ -2,8 +2,8 @@
 
 FFLAG   = $(INCLUDE) $(OMPFLAG) $(MPIFLAG)
 
-OBJ = qe_erf.o fft.o vector.o Ewald.o  Esm.o EsmInterface.o
-OBJ_P = qe_erf.o fft.o vector.o Ewald.o  Esm.o EsmPack.o
+OBJ =  fft.o vector.o Ewald.o  Esm.o EsmInterface.o
+OBJ_P =  fft.o vector.o Ewald.o  Esm.o EsmPack.o
 LIBFLAG = -L/usr/local/lib -lfftw3 -lblas -llapack
 
 .f90.o:
diff -uprN phase0_2015.01/src_phase/EsmPack/qe_erf.f90 phase0_2015.01.01/src_phase/EsmPack/qe_erf.f90
--- phase0_2015.01/src_phase/EsmPack/qe_erf.f90	2015-08-05 14:45:21.000000000 +0900
+++ phase0_2015.01.01/src_phase/EsmPack/qe_erf.f90	1970-01-01 09:00:00.000000000 +0900
@@ -1,35 +0,0 @@
-! Copyright (c) 2012, Minoru Otani <minoru.otani@aist.go.jp> 
-! 
-! Permission is hereby granted, free of charge, to any person 
-! obtaining a copy of this software and associated documentation 
-! files (the "Software"), to deal in the Software without restriction, 
-! including without limitation the rights to use, copy, modify, merge, 
-! publish, distribute, sublicense, and/or sell copies of the Software, 
-! and to permit persons to whom the Software is furnished to do so, 
-! subject to the following conditions:
- 
-! The above copyright notice and this permission notice shall be 
-! included in all copies or substantial portions of the Software.
- 
-! THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 
-! EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES 
-! OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 
-! NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT 
-! HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, 
-! WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 
-! OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER 
-! DEALINGS IN THE SOFTWARE.
-
-Function qe_erf(x)
-  Implicit none
-  Real(8), Intent(In)  :: x
-  Real(8) :: qe_erf
-  qe_erf = erf(x)
-End Function qe_erf
-
-Function qe_erfc(x)
-  Implicit none
-  Real(8), Intent(In)  :: x
-  Real(8) :: qe_erfc
-  qe_erfc = erfc(x)
-End Function qe_erfc
diff -uprN phase0_2015.01/src_phase/Finalization_of_mpi.F90 phase0_2015.01.01/src_phase/Finalization_of_mpi.F90
--- phase0_2015.01/src_phase/Finalization_of_mpi.F90	2015-09-14 15:16:30.000000000 +0900
+++ phase0_2015.01.01/src_phase/Finalization_of_mpi.F90	2016-07-12 12:51:19.000000000 +0900
@@ -1,6 +1,6 @@
 !=======================================================================
 !
-!  PROGRAM  PHASE/0 2015.01 ($Rev: 449 $)
+!  PROGRAM  PHASE/0 2015.01 ($Rev: 472 $)
 !
 !  SUBROUINE: Finalization_of_mpi
 !
@@ -14,7 +14,7 @@
 !
 !=======================================================================
 subroutine Finalization_of_mpi
-! $Id: Finalization_of_mpi.F90 449 2015-08-06 04:37:59Z jkoga $
+! $Id: Finalization_of_mpi.F90 472 2015-11-28 09:01:17Z ktagami $
   use m_Parallelization, only :       m_Parallel_end_mpi
   use m_PlaneWaveBasisSet, only :     m_pwBS_dealloc_ngpt_igfp_gr &
        &                            , m_pwBS_dealloc_ylm_l
@@ -30,7 +30,7 @@ subroutine Finalization_of_mpi
   use m_PAW_ChargeDensity,    only : m_PAWCD_dealloc
 #endif
 #ifdef _POSITRON_
-  use m_epc_potential,        only : m_epc_dealloc
+  use m_epc_potential,        only : m_epc_dealloc, m_epc_dealloc_vlhxc_p
   use m_Positron_Wave_Functions,only:m_pWF_deallocate_pzaj_etc
 #endif
 
@@ -80,6 +80,7 @@ subroutine Finalization_of_mpi
 #endif
 #ifdef _POSITRON_
   call m_epc_dealloc()
+  call m_epc_dealloc_vlhxc_p
   call m_pWF_deallocate_pzaj_etc()
 #endif
   call m_ES_dealloc_Dhub()
diff -uprN phase0_2015.01/src_phase/Initial_Electronic_Structure.F90 phase0_2015.01.01/src_phase/Initial_Electronic_Structure.F90
--- phase0_2015.01/src_phase/Initial_Electronic_Structure.F90	2015-08-05 14:45:21.000000000 +0900
+++ phase0_2015.01.01/src_phase/Initial_Electronic_Structure.F90	2016-07-12 12:51:19.000000000 +0900
@@ -1,6 +1,6 @@
 !=======================================================================
 !
-!  PROGRAM  PHASE/0 2015.01 ($Rev: 443 $)
+!  PROGRAM  PHASE/0 2015.01 ($Rev: 481 $)
 !
 !  SUBROUINE: Initial_Electronic_Structure, Initial_WaveFunctions_ek
 !
@@ -31,7 +31,7 @@
 !   The activity of development of this program set has been supervised by Takahisa Ohno.
 !
 subroutine Initial_Electronic_Structure
-! $Id: Initial_Electronic_Structure.F90 443 2015-08-03 13:03:48Z ktagami $
+! $Id: Initial_Electronic_Structure.F90 481 2016-03-25 02:51:57Z jkoga $
   use m_Const_Parameters,   only : Gauss_distrib_func, from_wave_functions&
        &                         , INITIAL, CONTINUATION, FIXED_CHARGE &
        &                         , FIXED_CHARGE_CONTINUATION, ON, OFF, EXECUT &
@@ -53,7 +53,9 @@ subroutine Initial_Electronic_Structure
        &                         , nfcntn_bin_paw &
        &                         , file_existance_contfiles &
        &                         , m_Files_check_nfzaj_existance, m_Files_check_nfchgt_existance &
-       &                         , m_Files_check_file_existance
+       &                         , m_Files_check_file_existance &
+       &                         , m_Files_open_nfcntn_bin_paw,m_Files_close_nfcntn_bin_paw &
+       &                         , m_Files_nfcntn_bin_paw_exists
   use m_Control_Parameters, only : ipri, iprichargedensity, initial_chg, icond, nspin, intzaj &
        &                         , evaluation_eko_diff &
        &                         , skip_alloc_phonon, sw_phonon, sw_calc_force, neg, neg_previous &
@@ -69,7 +71,7 @@ subroutine Initial_Electronic_Structure
   use m_Kpoints,            only : kv3
   use m_PlaneWaveBasisSet,  only : kg1, m_pwBS_alloc_ylm_l,kgp,ngabc
   use m_PlaneWaveBasisSet,  only : m_pwBS_sphrp_l
-  use m_PseudoPotential,    only : modnrm, m_PP_gfqwei, flg_paw, epc
+  use m_PseudoPotential,    only : modnrm, m_PP_gfqwei, flg_paw, epc, m_PP_rd_PAW_parameters
   use m_Total_Energy,      only  : m_TE_set_etotal_old
   use m_Electronic_Structure,only: totch, m_ES_gtotch &
        &                         , m_ES_energy_eigen_values_ext &
@@ -234,6 +236,11 @@ subroutine Initial_Electronic_Structure
 ! ================= 2014/09/19
 
 
+! ==== EXP_CELLOPT === 2015/09/24
+  use m_IterationNumbers,     only : iteration_unit_cell
+  use m_Control_Parameters,   only :  sw_read_nfchgt_prev_cell, sw_read_nfzaj_prev_cell
+! ==================== 2015/09/24
+
   implicit none
   integer :: iloop
 ! <--
@@ -525,18 +532,24 @@ subroutine Initial_Electronic_Structure
         call read_efermi()
         if(icond==FIXED_CHARGE .and. ekmode == OFF) call copy_chgq_to_chgqo()
      else
-        call m_CD_initial_CD_by_Gauss_kt(nfout)   ! (intchg) -> chgq_l
+        if (iteration_unit_cell > 1 .and. sw_read_nfchgt_prev_cell == ON ) then
+           call read_charge_density(condition=-4)
+        else
+           call m_CD_initial_CD_by_Gauss_kt(nfout)   ! (intchg) -> chgq_l
+        endif
      end if
 
      !---- set wave functions ----
-     if(intzaj == by_random_numbers) then
+     if ( iteration_unit_cell > 1 .and. sw_read_nfzaj_prev_cell == ON ) then
+        call read_zaj( condition =-4 )
+     else if(intzaj == by_random_numbers) then
         call m_ESIW_by_randomnumbers(nfout,kv3,1,neg)      ! (rndzaj) -> zaj_l
      else if(intzaj == by_pseudo_atomic_orbitals) then
         call m_ESIW_by_atomic_orbitals(nfout,kv3,1,neg)    ! (paozaj) -> zaj_l
      else if(intzaj == by_matrix_diagon) then
 
      else if(intzaj == FILE) then
-        call read_zaj()
+        call read_zaj( condition = 1 )
         call m_ES_betar_dot_WFs(nfout)         ! (fsrfsi)
         if(initial_chg == FILE .or. (icond==FIXED_CHARGE.and.ekmode==OFF)) then
         else
@@ -663,7 +676,8 @@ contains
     use m_Charge_Density,     only : m_CD_rd_chgq_noncl &
          &                         , m_CD_rd_chgq_import_frm_collin &
          &                         , m_CD_initCD_by_file_rsp_noncl &
-         &                         , m_CD_adjust_spindensity_noncl
+         &                         , m_CD_adjust_spindensity_noncl &
+         &                         , m_CD_import_chgq_prev_cell
     integer, intent(in) :: condition
     integer :: ispin
     integer :: iloop
@@ -703,8 +717,11 @@ contains
           else
              call m_CD_rd_chgq(nfout,nfchgt,F_CHGT_in_partitioned)
           endif
-          if ( flg_paw .and. read_charge_hardpart == YES ) then
+          if ( flg_paw .and. read_charge_hardpart == YES .and. m_Files_nfcntn_bin_paw_exists()) then
+             call m_Files_open_nfcntn_bin_paw()
+             call m_PP_rd_PAW_parameters(nfout,nfcntn_bin_paw)
              call m_CD_rd_hsr(nfcntn_bin_paw)
+             call m_Files_close_nfcntn_bin_paw()
           endif
        end if
 
@@ -723,7 +740,24 @@ contains
 !!$       if ( flg_paw .and. ) then
 !!$          call m_CD_rd_hsr(nfcntn_bin_paw)
 !!$       endif
+
+    else if(condition == -4) then      ! coordinate-continuation
+!
+       call m_CD_import_chgq_prev_cell(nfout,nfchgt, F_CHGT_in_partitioned)
+       if ( flg_paw .and. read_charge_hardpart == YES .and.  m_Files_nfcntn_bin_paw_exists()) then
+          call m_Files_open_nfcntn_bin_paw()
+          call m_PP_rd_PAW_parameters(nfout,nfcntn_bin_paw)
+          call m_CD_rd_hsr(nfcntn_bin_paw)
+          call m_Files_close_nfcntn_bin_paw()
+       endif
+
+       if ( noncol ) then
+          call m_CD_adjust_spindensity_noncl(nfout)
+       else
+          call m_CD_adjust_spindensity(nfout)
+       endif
     end if
+
   end subroutine read_charge_density
  
   subroutine read_efermi()
@@ -769,8 +803,8 @@ contains
 
   subroutine EXX()
     use m_ES_ExactExchange,  only  : m_ES_EXX_gather_valence_states, m_ES_EXX_kernel &
-         &                         , m_ES_EXX_occup, sw_rspace_hyb &
-         &                         , m_ES_EXX_init0 &
+         &                         , m_ES_EXX_occup  &
+         &                         , sw_rspace_hyb, m_ES_EXX_init0 &
 #ifndef MEMORY_SAVE_EXX
          &                         , m_ES_EXX_ylm, m_ES_EXX_crotylm
 #else
@@ -802,14 +836,24 @@ contains
     endif
   end subroutine read_occ_mat
 
-  subroutine read_zaj()
-    use m_ES_IO,                   only : m_ESIO_rd_WFs_import_frm_collin
+  subroutine read_zaj( condition )
+    use m_ES_IO,                   only : m_ESIO_rd_WFs_import_frm_collin, &
+         &                                m_ESIO_import_WFs_prev_cell
+    integer, intent(in) :: condition
+
     call m_Files_open_nfzaj()
-    if ( noncol .and. import_collinear_wavefunctions == ON ) then
-       call m_ESIO_rd_WFs_import_frm_collin( nfout,nfzaj,F_ZAJ_in_partitioned )
-    else
-       call m_ESIO_rd_WFs(nfout,nfzaj,F_ZAJ_in_partitioned)
+
+    if ( condition == 1 ) then
+       if ( noncol .and. import_collinear_wavefunctions == ON ) then
+          call m_ESIO_rd_WFs_import_frm_collin( nfout,nfzaj,F_ZAJ_in_partitioned )
+       else
+          call m_ESIO_rd_WFs(nfout,nfzaj,F_ZAJ_in_partitioned)
+       endif
+
+    else if ( condition == -4 ) then       ! coordinate-continuation
+       call m_ESIO_import_WFs_prev_cell(nfout,nfzaj,F_ZAJ_in_partitioned)
     endif
+
   end subroutine read_zaj
 
   subroutine check_neg()
diff -uprN phase0_2015.01/src_phase/Initialization.F90 phase0_2015.01.01/src_phase/Initialization.F90
--- phase0_2015.01/src_phase/Initialization.F90	2015-08-05 14:49:51.000000000 +0900
+++ phase0_2015.01.01/src_phase/Initialization.F90	2016-07-12 12:51:19.000000000 +0900
@@ -160,7 +160,7 @@ contains
   subroutine aavers
     include 'version.h' ! svn_revision
     character(len=72) :: vers, system, codename
-    write(vers,'("phase/0 2015.01 Revision:",i5," -- ORG_Parallel --")') svn_revision
+    write(vers,'("phase/0 2015.01.01 Revision:",i5," -- ORG_Parallel --")') svn_revision
     codename = 'phaseUnif'
     system = ''
 
diff -uprN phase0_2015.01/src_phase/InputData_Analysis.F90 phase0_2015.01.01/src_phase/InputData_Analysis.F90
--- phase0_2015.01/src_phase/InputData_Analysis.F90	2015-08-05 14:45:21.000000000 +0900
+++ phase0_2015.01.01/src_phase/InputData_Analysis.F90	2016-07-12 12:51:19.000000000 +0900
@@ -1,6 +1,6 @@
 !=======================================================================
 !
-!  PROGRAM  PHASE/0 2015.01 ($Rev: 440 $)
+!  PROGRAM  PHASE/0 2015.01 ($Rev: 475 $)
 !
 !  SUBROUINE: cnstr_fcvect_work_alloc, cnstr_fcvect_work_dealloc, 
 !             get_CS_and_ionic_system_data, read_ntyp_natm_natm2, read_altv, 
@@ -34,7 +34,7 @@
 !   The activity of development of this program set has been supervised by Takahisa Ohno.
 !
 subroutine InputData_Analysis()
-! $Id: InputData_Analysis.F90 440 2015-08-03 07:18:01Z ktagami $
+! $Id: InputData_Analysis.F90 475 2016-02-23 05:22:18Z jkoga $
   use m_Files, only :             nfinp, nfout, nfcntn, file_existance_contfiles, file_existance_3contfiles &
        &                        , m_Files_open_nfcntn &
        &                        , m_Files_check_file_names &
@@ -176,10 +176,6 @@ subroutine InputData_Analysis()
      call m_Files_open_nfdynm_cif_initially()
      if(first_call) call m_CtrlP_rd_accuracy(nfout)
      call m_CS_rd_n(nfout)
-#ifndef _EMPIRICAL_
-     call m_CtrlP_rd_wfsolver(nfout)
-     call m_CtrlP_rd_chargemix(nfout)
-#endif
      call m_CtrlP_rd_struc_evol(nfout)
      call m_CtrlP_rd_postproc(nfout)
 #ifndef _EMPIRICAL_
@@ -190,6 +186,11 @@ subroutine InputData_Analysis()
      call m_IS_rd_n(nfout)
      call m_IS_set_ionic_mass(nfout)
 
+#ifndef _EMPIRICAL_
+     call m_CtrlP_rd_wfsolver(nfout,natm)
+     call m_CtrlP_rd_chargemix(nfout)
+#endif
+
 ! === KT_add ==== 2014/12/29
 #ifdef INIT_CHARGE_ATOM_BY_ATOM
      call m_IS_set_ionic_charge_atoms(nfout)
@@ -589,7 +590,7 @@ contains
 end subroutine InputData_Analysis
 
 subroutine InputData_Analysis_neb()
-! $Id: InputData_Analysis.F90 440 2015-08-03 07:18:01Z ktagami $
+! $Id: InputData_Analysis.F90 475 2016-02-23 05:22:18Z jkoga $
   use m_Files, only :             nfout, nfcntn &
        &                        , m_Files_reopen_nfcntn
   use m_Ionic_System, only :      m_IS_rd_pos_and_v
diff -uprN phase0_2015.01/src_phase/Makefile phase0_2015.01.01/src_phase/Makefile
--- phase0_2015.01/src_phase/Makefile	1970-01-01 09:00:00.000000000 +0900
+++ phase0_2015.01.01/src_phase/Makefile	2016-07-12 12:52:45.000000000 +0900
@@ -0,0 +1,580 @@
+.SUFFIXES:
+.SUFFIXES: .o .F .f .F90 .f90 .c .mod
+
+# Platform    : GNU Linux (EM64T/AMD64)
+# Prog. model : MPI parallel
+# Compiler    : Intel Fortran compiler
+# BLAS/LAPACK : System-installed MKL
+# FFT         : System-installed FFTW3 library
+###########################################################################
+###<< PLEASE CHANGE THE VARIABLES BELOW ACCORDING TO YOUR ENVIRONMENT >>###
+###########################################################################
+F90 = mpif90
+CC  = gcc -m64
+CPP = 
+AR  = ar -vq
+LINK = mpif90
+F90FLAGS = -traceback
+F77FLAGS = -traceback
+CFLAGS = -O -DINTEL
+
+ESM = yes
+ifdef ESM
+CPPESM=-DENABLE_ESM_PACK
+LESM=-lesm
+else
+CPPESM=
+endif
+
+CPPFLAGS = -DLinux -DFFTW3 -D_POT_SMOOTHING_ -DTRANSPOSE -DGGA_ATOMIC_WITH_NEW_GNCPP -DREMOVE_PC_FROM_FORCE -D_USE_LAPACK_ -D_POSITRON_ -D_FAST_WAY_ -D_USE_DATE_AND_TIME_ -DUSE_NONBLK_COMM -DRMM_NONLOCAL_NEW ${CPPESM}
+LFLAGS = 
+F90FLAGS_FIXED = -extend_source -Fl -fixed
+F90FLAGS_FREE = -extend_source -Fl
+MKLHOME=
+INCLUDE=
+LIBS = -L./ ${LESM}  -Wl,--start-group -lmkl_scalapack_lp64 -lmkl_blacs_intelmpi_lp64 -lmkl_intel_lp64 -lmkl_sequential -lmkl_core -Wl,--end-group -Bdynamic -lfftw3  -lpthread
+###########################################################################
+###<< PLEASE CHANGE THE VARIABLES ABOVE ACCORDING TO YOUR ENVIRONMENT >>###
+###########################################################################
+
+ifdef ESM
+ESM_LIB = libesm.a
+else
+ESM_LIB =
+endif
+LAPACK = 
+FFTOBJECT = 
+OBJ_INPUTPARSE = input_parse.o
+
+
+PHASE_LOWER_MODULES = m_Const_Parameters.o   m_ErrorMessages.o m_Parallelization.o \
+m_IterationNumbers.o
+
+PHASE_UPPER_MODULES = \
+m_Control_Parameters.o   m_Files.o                m_Timing.o \
+m_Crystal_Structure.o    m_FFT.o                  m_Ionic_System.o \
+m_CS_SpaceGroup.o        m_CS_Magnetic.o \
+m_Orbital_QuantumNum.o  \
+m_Kpoints.o              m_PseudoPotential.o      m_PlaneWaveBasisSet.o \
+m_Realspace.o \
+m_SpinOrbit_Potential.o  m_SpinOrbit_FromFile.o  \
+m_ES_NonCollinear.o \
+m_NonLocal_Potential.o   m_Electronic_Structure.o \
+m_ES_nonlocal.o	 	 m_ES_ortho.o  m_ES_wf_extrpl.o m_ES_initialWF.o 	\
+m_ES_occup.o \
+m_ES_RSB.o \
+m_FiniteElectricField.o \
+m_ES_ExactExchange.o \
+m_ES_WF_by_SDorCG.o      m_ES_WF_by_Davidson.o \
+m_ES_WF_by_ModifiedDavidson.o m_ES_WF_by_RMM.o \
+m_Charge_Density.o    m_Orbital_Population.o    m_CD_mixing.o m_Dipole.o               \
+m_CD_Mag_Moment.o \
+m_epc_potential.o        \
+m_Positron_Wave_Functions.o \
+m_ES_WF_mixing.o \
+m_Screening_FFT.o        m_Screening.o      m_External_Potential.o \
+m_ES_LHXC.o              m_ES_Intgr_VlhxcQlm.o    m_ES_IO.o \
+m_ES_WF_by_MatDiagon.o   m_ES_dos.o               m_Hubbard.o \
+m_KineticEnergy_Density.o  \
+m_KE_mixing.o \
+m_vdWDF.o \
+m_Ldos.o                 m_XC_Potential.o         \
+string.o m_db.o \
+m_PAW_Tecplot.o m_PAW_ChargeDensity.o m_PAW_Hartree.o  \
+m_PAW_XC_Potential.o \
+m_SpinOrbit_RadInt.o \
+m_OP_Moment.o \
+m_ES_Mag_Constraint.o  \
+m_Total_Energy.o \
+m_Force.o                m_Stress.o               m_ES_WF_by_submat.o \
+m_UnitCell.o \
+m_constraints.o \
+m_ELF.o \
+m_BerryPhase.o           m_BP_Properties.o \
+m_Representation.o       m_Raman.o            m_Phonon.o \
+m_Wannier.o m_Wannier90.o m_Replica.o Renewal_of_ChgCtrlParam.o \
+m_LinearResponse_Control.o  \
+m_ValenceBand_Spectrum.o   m_CoreLevel_Spectrum.o  \
+m_Excitation.o \
+m_ES_occup_EPS.o         m_Epsilon_ek.o \
+m_LinearResponse_Qpt.o  \
+m_rttddft.o \
+m_Potential_Mixing.o       m_ThomasFermiW_Potential.o  \
+m_SpinOrbit_ForceTheorem.o
+
+PHASE_F_SUBROUTINES = Preparation.o \
+InputData_Analysis.o           PseudoPotential_Construction.o 
+
+PHASE_F_OTHERSUBS  = mdmain0.o    constraint_main.o meta_dynamics.o NEB.o Preparation_for_mpi.o \
+Preparation_for_ESM.o \
+scf_routines.o \
+UnitaryTransform_WF.o \
+bottom_Subroutines.o              \
+spline.o \
+b_Crystal_Structure.o             b_Electronic_Structure.o \
+b_Words.o                         b_PseudoPotential.o \
+b_PseudoPotential_EXX.o \
+b_Fermi.o                         b_Kpoints.o \
+b_PlaneWaveBasisSet.o             b_Ionic_System.o \
+b_XC_Potential.o                  b_PAW_XC_Potential.o \
+b_XC_OmegaPBE.o  \
+b_XC_metagga.o \
+b_Ldos_f77.o\
+input_interface.o                 Initialization.o \
+WriteDownData_onto_Files.o        Ewald_and_Structure_Factor.o \
+Initial_Electronic_Structure.o \
+Renewal_of_WaveFunctions.o \
+Renewal_of_pWaveFunctions.o \
+IterationNumbers_Setting.o        ChargeDensity_Construction.o \
+ChargeDensity_Mixing.o            Renewal_of_Potential.o \
+Renewal_of_Hubbard_Potential.o    Renewal_of_OccMat.o \
+Renewal_of_pPotential.o           Renewal_of_Hubbard_Parameters.o \
+Convergence_Check.o               Forces.o \
+Move_Ions.o                       Initial_MD_Condition.o \
+Stress.o                          Postprocessing.o \
+Finalization_of_mpi.o             miscellaneous.o\
+b_BerryPhase.o \
+heap_sort.o                       real_spherical_harmonics.o \
+Real_space_integ.o                crotylm.o \
+screening_correction.o \
+Initialization_Epsilon.o               Shift_Kpoint.o \
+Reset_Kpoint.o                         Preparation_for_Calc_Epsilon.o \
+Transition_moment_Epsilon.o            Calc_Epsilon.o \
+Nonlinear_Optics_Epsilon.o             WriteDownData_onto_Files_Epsilon.o \
+PseudoPotential_ek_Epsilon.o           Dealloc_Radr_and_Wos_Epsilon.o \
+mpi_dummy.o                            WriteDownData_onto_Files_ek.o \
+GaussLeg.o lib_int_deri_add.o  \
+rttddft_main.o \
+Potential_Construction.o        Potential_Mixing.o             ThomasFermiWeiz.o \
+Epsilon_postscf.o vdW.o
+
+# for vc_nl
+NLOBJ = vc_nl.o
+
+ifndef SX_DGEMM
+PHASE_OBJECTSF77 = \
+b_PseudoPotential_f77.o \
+b_Force_f77.o \
+rmmsubs.o \
+gncpp_xc_gga_rad.o  \
+decfft_ent.o \
+spg+tetra.o
+else
+PHASE_OBJECTSF77 = \
+b_PseudoPotential_f77.o \
+b_Force_f77.o \
+rmmsubs.o \
+decfft_ent.o \
+gncpp_xc_gga_rad.o  \
+spg+tetra.o \
+dgemm__.o 
+endif
+
+EKCAL_LOWER_MODULES = m_Const_Parameters.o   m_ErrorMessages.o m_Parallelization.o \
+m_IterationNumbers.o
+
+EKCAL_UPPER_MODULES = \
+m_Control_Parameters.o   m_Files.o                m_Timing.o \
+m_Crystal_Structure.o    m_FFT.o                  m_Ionic_System.o \
+m_CS_SpaceGroup.o        m_CS_Magnetic.o \
+m_Orbital_QuantumNum.o  \
+m_Kpoints.o              m_PseudoPotential.o      m_PlaneWaveBasisSet.o \
+m_Realspace.o \
+m_SpinOrbit_Potential.o  m_SpinOrbit_FromFile.o  \
+m_ES_NonCollinear.o \
+m_NonLocal_Potential.o   m_Electronic_Structure.o \
+m_ES_nonlocal.o	 	 m_ES_ortho.o  m_ES_wf_extrpl.o m_ES_initialWF.o 	\
+m_FiniteElectricField.o \
+m_ES_ExactExchange.o \
+m_ES_occup.o             m_ES_WF_by_SDorCG.o      m_ES_WF_by_Davidson.o \
+m_ES_RSB.o \
+m_ES_WF_by_ModifiedDavidson.o m_ES_WF_by_RMM.o \
+m_epc_potential.o \
+m_Positron_Wave_Functions.o \
+m_Charge_Density.o    m_Orbital_Population.o    m_CD_mixing.o m_Dipole.o               \
+m_CD_Mag_Moment.o \
+m_ES_WF_mixing.o \
+m_Screening_FFT.o        m_Screening.o      m_External_Potential.o \
+m_ES_LHXC.o              m_ES_Intgr_VlhxcQlm.o    m_ES_IO.o \
+m_ES_WF_by_MatDiagon.o   m_ES_dos.o               m_Hubbard.o \
+m_KineticEnergy_Density.o  \
+m_KE_mixing.o \
+m_vdWDF.o \
+m_Ldos.o                 m_XC_Potential.o         \
+string.o m_db.o \
+m_PAW_Tecplot.o m_PAW_ChargeDensity.o m_PAW_Hartree.o  \
+m_PAW_XC_Potential.o \
+m_SpinOrbit_RadInt.o \
+m_ES_Mag_Constraint.o  \
+m_constraints.o \
+m_Total_Energy.o \
+m_OP_Moment.o \
+m_Force.o                m_Stress.o m_ES_WF_by_submat.o\
+m_UnitCell.o \
+m_ELF.o                  m_Wannier.o \
+m_Wannier90.o \
+m_BerryPhase.o           m_BP_Properties.o \
+m_Representation.o       m_Raman.o               m_Phonon.o \
+m_LinearResponse_Control.o  \
+m_ValenceBand_Spectrum.o   m_CoreLevel_Spectrum.o \
+m_Excitation.o \
+m_LinearResponse_Qpt.o \
+m_Potential_Mixing.o       m_ThomasFermiW_Potential.o \
+m_SpinOrbit_ForceTheorem.o
+
+
+EKCAL_F_SUBROUTINES = Preparation.o \
+InputData_Analysis.o           PseudoPotential_Construction.o
+
+EKCAL_F_OTHERSUBS  = ekmain.o    constraint_main.o            Preparation_for_mpi.o \
+scf_routines.o \
+UnitaryTransform_WF.o \
+bottom_Subroutines.o                   \
+spline.o \
+b_Crystal_Structure.o                  b_Electronic_Structure.o \
+b_Words.o                              b_PseudoPotential.o \
+b_PseudoPotential_EXX.o \
+b_Fermi.o                              b_Kpoints.o \
+b_PlaneWaveBasisSet.o                  b_Ionic_System.o \
+b_XC_Potential.o                       b_PAW_XC_Potential.o \
+b_XC_OmegaPBE.o  \
+b_XC_metagga.o \
+b_Ldos_f77.o \
+input_interface.o                      Initialization.o \
+Ewald_and_Structure_Factor.o \
+Initial_Electronic_Structure.o         Renewal_of_WaveFunctions.o \
+IterationNumbers_Setting.o \
+ChargeDensity_Construction.o                 Renewal_of_Potential.o \
+ChargeDensity_Mixing.o \
+Renewal_of_Hubbard_Potential.o         Renewal_of_OccMat.o \
+Convergence_Check.o                    Forces.o \
+WriteDownData_onto_Files.o \
+Initial_MD_Condition.o                 WriteDownData_onto_Files_ek.o\
+Stress.o                               Postprocessing.o \
+Finalization_of_mpi.o \
+miscellaneous.o \
+b_BerryPhase.o \
+heap_sort.o                            real_spherical_harmonics.o \
+Real_space_integ.o                     crotylm.o \
+screening_correction.o \
+mpi_dummy.o \
+GaussLeg.o lib_int_deri_add.o \
+ThomasFermiWeiz.o       Renewal_of_ChgCtrlParam.o \
+vdW.o
+
+ifndef SX_DGEMM
+EKCAL_OBJECTSF77 = \
+b_PseudoPotential_f77.o \
+b_Force_f77.o \
+rmmsubs.o                              spg+tetra.o \
+gncpp_xc_gga_rad.o  \
+decfft_ent.o
+else
+EKCAL_OBJECTSF77 = \
+b_PseudoPotential_f77.o \
+b_Force_f77.o \
+rmmsubs.o                              spg+tetra.o \
+decfft_ent.o \
+gncpp_xc_gga_rad.o  \
+dgemm__.o 
+endif
+
+
+EPS_LOWER_MODULES = m_Const_Parameters.o   m_ErrorMessages.o m_Parallelization.o \
+m_IterationNumbers.o
+
+EPS_UPPER_MODULES = \
+m_Control_Parameters.o   m_Files.o                m_Timing.o \
+m_Crystal_Structure.o    m_FFT.o                  m_Ionic_System.o \
+m_CS_SpaceGroup.o        m_CS_Magnetic.o \
+m_Orbital_QuantumNum.o  \
+m_Kpoints.o              m_PseudoPotential.o      m_PlaneWaveBasisSet.o \
+m_Realspace.o \
+m_SpinOrbit_Potential.o  m_SpinOrbit_FromFile.o  \
+m_ES_NonCollinear.o \
+m_NonLocal_Potential.o   m_Electronic_Structure.o \
+m_ES_nonlocal.o	 	 m_ES_ortho.o  m_ES_wf_extrpl.o m_ES_initialWF.o 	\
+m_FiniteElectricField.o \
+m_Positron_Wave_Functions.o \
+m_ES_ExactExchange.o \
+m_ES_occup.o             m_ES_WF_by_SDorCG.o      m_ES_WF_by_Davidson.o \
+m_ES_RSB.o \
+m_ES_WF_by_ModifiedDavidson.o m_ES_WF_by_RMM.o \
+m_ES_LHXC.o              m_ES_Intgr_VlhxcQlm.o    m_ES_IO.o \
+m_ES_dos.o               m_Hubbard.o \
+m_epc_potential.o \
+m_vdWDF.o \
+m_Charge_Density.o    m_Orbital_Population.o   m_CD_mixing.o m_XC_Potential.o \
+m_CD_Mag_Moment.o \
+m_ES_WF_mixing.o \
+string.o m_db.o \
+m_PAW_Tecplot.o m_PAW_ChargeDensity.o m_PAW_Hartree.o  \
+m_PAW_XC_Potential.o \
+m_SpinOrbit_RadInt.o \
+m_ES_Mag_Constraint.o  \
+m_Total_Energy.o \
+m_OP_Moment.o \
+m_Force.o          m_Stress.o      m_ES_WF_by_submat.o      m_ES_WF_by_MatDiagon.o \
+m_UnitCell.o \
+m_KineticEnergy_Density.o  \
+m_KE_mixing.o \
+m_Ldos.o                 m_ELF.o \
+m_constraints.o \
+m_BerryPhase.o           m_BP_Properties.o \
+m_Representation.o       m_Raman.o       m_Phonon.o               m_Dipole.o \
+m_Screening_FFT.o        m_Screening.o      m_External_Potential.o \
+m_Wannier.o m_Wannier90.o \
+m_ValenceBand_Spectrum.o   m_CoreLevel_Spectrum.o   \
+m_Excitation.o \
+m_ES_occup_EPS.o         m_Epsilon_ek.o \
+m_LinearResponse_Control.o  \
+m_LinearResponse_Qpt.o \
+m_Potential_Mixing.o       m_ThomasFermiW_Potential.o \
+m_SpinOrbit_ForceTheorem.o 
+
+EPS_F_SUBROUTINES = Preparation.o \
+InputData_Analysis.o           PseudoPotential_Construction.o
+
+EPS_F_OTHERSUBS  = epsmain.o           constraint_main.o Preparation_for_mpi.o \
+scf_routines.o \
+UnitaryTransform_WF.o \
+bottom_Subroutines.o                   \
+spline.o \
+b_Crystal_Structure.o                  b_Electronic_Structure.o \
+b_Words.o                              b_PseudoPotential.o \
+b_PseudoPotential_EXX.o \
+b_Fermi.o                              b_Kpoints.o \
+b_PlaneWaveBasisSet.o                  b_Ionic_System.o \
+b_XC_Potential.o                       b_PAW_XC_Potential.o \
+b_XC_OmegaPBE.o  \
+b_XC_metagga.o \
+b_Ldos_f77.o \
+input_interface.o                      Initialization.o \
+Ewald_and_Structure_Factor.o \
+Initial_Electronic_Structure.o         Renewal_of_WaveFunctions.o \
+IterationNumbers_Setting.o \
+ChargeDensity_Construction.o                 Renewal_of_Potential.o \
+ChargeDensity_Mixing.o \
+Renewal_of_Hubbard_Potential.o         Renewal_of_OccMat.o \
+Convergence_Check.o                    Forces.o \
+WriteDownData_onto_Files.o \
+Initial_MD_Condition.o                 WriteDownData_onto_Files_ek.o\
+Stress.o                               Postprocessing.o \
+Finalization_of_mpi.o \
+miscellaneous.o \
+b_BerryPhase.o \
+heap_sort.o                            real_spherical_harmonics.o \
+Real_space_integ.o                     crotylm.o \
+screening_correction.o \
+Initialization_Epsilon.o               Shift_Kpoint.o \
+Reset_Kpoint.o                         Preparation_for_Calc_Epsilon.o \
+Transition_moment_Epsilon.o            Calc_Epsilon.o \
+Nonlinear_Optics_Epsilon.o             WriteDownData_onto_Files_Epsilon.o \
+PseudoPotential_ek_Epsilon.o           Dealloc_Radr_and_Wos_Epsilon.o \
+ThomasFermiWeiz.o         Renewal_of_ChgCtrlParam.o  \
+mpi_dummy.o \
+GaussLeg.o lib_int_deri_add.o \
+vdW.o
+
+ifndef SX_DGEMM
+EPS_OBJECTSF77 = \
+b_PseudoPotential_f77.o \
+b_Force_f77.o \
+rmmsubs.o                              spg+tetra.o \
+decfft_ent.o \
+gncpp_xc_gga_rad.o  
+else
+EPS_OBJECTSF77 = \
+b_PseudoPotential_f77.o \
+b_Force_f77.o \
+rmmsubs.o                              spg+tetra.o \
+decfft_ent.o \
+gncpp_xc_gga_rad.o  \
+dgemm__.o
+endif
+
+
+TDLR_LOWER_MODULES = m_Const_Parameters.o   m_ErrorMessages.o m_Parallelization.o \
+m_IterationNumbers.o
+
+TDLR_UPPER_MODULES = \
+m_Control_Parameters.o   m_Files.o                m_Timing.o \
+m_Crystal_Structure.o    m_FFT.o                  m_Ionic_System.o \
+m_CS_SpaceGroup.o        m_CS_Magnetic.o \
+m_Orbital_QuantumNum.o  \
+m_Kpoints.o              m_PseudoPotential.o      m_PlaneWaveBasisSet.o \
+m_Realspace.o \
+m_SpinOrbit_Potential.o  m_SpinOrbit_FromFile.o  \
+m_ES_NonCollinear.o \
+m_NonLocal_Potential.o   m_Electronic_Structure.o \
+m_ES_nonlocal.o	 	 m_ES_ortho.o  m_ES_wf_extrpl.o m_ES_initialWF.o 	\
+m_FiniteElectricField.o \
+m_Positron_Wave_Functions.o \
+m_ES_ExactExchange.o \
+m_ES_occup.o             m_ES_WF_by_SDorCG.o      m_ES_WF_by_Davidson.o \
+m_ES_RSB.o \
+m_ES_WF_by_ModifiedDavidson.o m_ES_WF_by_RMM.o \
+m_ES_LHXC.o              m_ES_Intgr_VlhxcQlm.o    m_ES_IO.o \
+m_ES_dos.o               m_Hubbard.o \
+m_epc_potential.o \
+m_vdWDF.o \
+m_Charge_Density.o  m_Orbital_Population.o     m_CD_mixing.o m_XC_Potential.o \
+m_CD_Mag_Moment.o \
+m_ES_WF_mixing.o \
+string.o m_db.o \
+m_PAW_Tecplot.o m_PAW_ChargeDensity.o m_PAW_Hartree.o  \
+m_PAW_XC_Potential.o \
+m_SpinOrbit_RadInt.o \
+m_ES_Mag_Constraint.o  \
+m_Total_Energy.o \
+m_OP_Moment.o \
+m_Force.o                m_Stress.o m_ES_WF_by_submat.o      m_ES_WF_by_MatDiagon.o \
+m_UnitCell.o \
+m_KineticEnergy_Density.o  \
+m_KE_mixing.o \
+m_Ldos.o                 m_ELF.o \
+m_constraints.o \
+m_BerryPhase.o           m_BP_Properties.o \
+m_Representation.o       m_Raman.o          m_Phonon.o               m_Dipole.o \
+m_Wannier.o m_Wannier90.o \
+m_Screening_FFT.o        m_Screening.o      m_External_Potential.o \
+m_Excitation.o \
+m_ES_occup_EPS.o        \
+m_LinearResponse_Control.o  \
+m_ValenceBand_Spectrum.o  m_CoreLevel_Spectrum.o   \
+m_LinearResponse_Qpt.o \
+m_LinearResponse_Tools.o     m_LinearResponse_Density.o  \
+m_LinearResponse_NonInt.o    m_LinearResponse_ALDA.o \
+m_LinearResponse_Kernel.o    m_LinearResponse_BS.o \
+m_LinearResponse_Spectrum.o \
+m_Potential_Mixing.o       m_ThomasFermiW_Potential.o \
+m_SpinOrbit_ForceTheorem.o 
+
+TDLR_F_SUBROUTINES = Preparation.o \
+InputData_Analysis.o           PseudoPotential_Construction.o 
+
+TDLR_F_OTHERSUBS  = tdlrmain.o         constraint_main.o  Preparation_for_mpi.o \
+scf_routines.o \
+UnitaryTransform_WF.o \
+bottom_Subroutines.o                   \
+spline.o \
+b_Crystal_Structure.o                  b_Electronic_Structure.o \
+b_Words.o                              b_PseudoPotential.o \
+b_PseudoPotential_EXX.o \
+b_Fermi.o                              b_Kpoints.o \
+b_PlaneWaveBasisSet.o                  b_Ionic_System.o \
+b_XC_Potential.o                       b_PAW_XC_Potential.o \
+b_XC_OmegaPBE.o  \
+b_XC_metagga.o \
+b_Ldos_f77.o \
+input_interface.o                      Initialization.o \
+Ewald_and_Structure_Factor.o \
+Initial_Electronic_Structure.o         Renewal_of_WaveFunctions.o \
+IterationNumbers_Setting.o \
+ChargeDensity_Construction.o                 Renewal_of_Potential.o \
+ChargeDensity_Mixing.o \
+Renewal_of_Hubbard_Potential.o         Renewal_of_OccMat.o \
+Convergence_Check.o                    Forces.o \
+WriteDownData_onto_Files.o \
+Initial_MD_Condition.o                 WriteDownData_onto_Files_ek.o \
+Stress.o                               Postprocessing.o \
+Finalization_of_mpi.o \
+miscellaneous.o \
+b_BerryPhase.o \
+heap_sort.o                            real_spherical_harmonics.o \
+Real_space_integ.o                     crotylm.o \
+screening_correction.o \
+PseudoPotential_ek_Epsilon.o           \
+mpi_dummy.o \
+GaussLeg.o lib_int_deri_add.o \
+ThomasFermiWeiz.o       Renewal_of_ChgCtrlParam.o \
+b_LinearResponse_Kernel.o  b_LinearResponse_exc.o \
+LinearResponse_Proc.o     LinearResponse_Spec.o \
+vdW.o
+
+ifndef SX_DGEMM
+TDLR_OBJECTSF77 = \
+b_PseudoPotential_f77.o \
+b_Force_f77.o \
+rmmsubs.o                              spg+tetra.o \
+decfft_ent.o \
+gncpp_xc_gga_rad.o  
+else
+TDLR_OBJECTSF77 = \
+b_PseudoPotential_f77.o \
+b_Force_f77.o \
+rmmsubs.o                              spg+tetra.o \
+decfft_ent.o \
+gncpp_xc_gga_rad.o  \
+dgemm__.o
+endif
+
+
+
+PHASE_OBJECTS = $(FFTOBJECT) $(PHASE_LOWER_MODULES) $(PHASE_UPPER_MODULES) $(PHASE_F_SUBROUTINES) $(PHASE_F_OTHERSUBS) $(PHASE_OBJECTSF77) $(OBJ_INPUTPARSE)
+
+EKCAL_OBJECTS = $(FFTOBJECT) $(EKCAL_LOWER_MODULES) $(EKCAL_UPPER_MODULES) $(EKCAL_F_SUBROUTINES) $(EKCAL_F_OTHERSUBS) $(EKCAL_OBJECTSF77) $(OBJ_INPUTPARSE)
+
+EPS_OBJECTS = $(FFTOBJECT) $(EPS_LOWER_MODULES) $(EPS_UPPER_MODULES) $(EPS_F_SUBROUTINES) $(EPS_F_OTHERSUBS) $(EPS_OBJECTSF77) $(OBJ_INPUTPARSE)
+
+TDLR_OBJECTS = $(FFTOBJECT) $(TDLR_LOWER_MODULES) $(TDLR_UPPER_MODULES) $(TDLR_F_SUBROUTINES) $(TDLR_F_OTHERSUBS) $(TDLR_OBJECTSF77) $(OBJ_INPUTPARSE)
+
+all : phase ekcal epsmain tdlrmain
+
+ifdef ESM
+phase : $(ESM_LIB) $(LAPACK) $(PHASE_OBJECTS) $(NLOBJ)
+	$(LINK) $(PHASE_OBJECTS) $(NLOBJ) $(LFLAGS) $(LIBS) -o $@
+else
+phase : $(LAPACK) $(PHASE_OBJECTS) $(NLOBJ)
+	$(LINK) $(PHASE_OBJECTS) $(NLOBJ) $(LFLAGS) $(LIBS) -o $@
+endif
+
+ekcal : $(LAPACK) $(EKCAL_OBJECTS) $(NLOBJ)
+	$(LINK) $(EKCAL_OBJECTS) $(NLOBJ) $(LFLAGS) $(LIBS) -o $@
+
+epsmain : $(LAPACK) $(EPS_OBJECTS) $(NLOBJ)
+	$(LINK) $(EPS_OBJECTS) $(NLOBJ) $(LFLAGS) $(LIBS) -o $@
+
+tdlrmain : $(LAPACK) $(TDLR_OBJECTS) $(NLOBJ)
+	$(LINK) $(TDLR_OBJECTS) $(NLOBJ) $(LFLAGS) $(LIBS) -o $@
+
+ifdef NO_MPI
+libesm.a:
+	cd EsmPack; make INCLUDE="$(INCLUDE)" FORTRAN="$(F90)" LIBFLAG="$(LIBS)" MPIFLAG="" AR="$(AR)"
+else
+libesm.a:
+	cd EsmPack; make INCLUDE="$(INCLUDE)" FORTRAN="$(F90)" LIBFLAG="$(LIBS)" MPIFLAG="-D__MPI__" AR="$(AR)"
+endif
+
+liblapack.a:
+	cd LAPACK; make F77="$(F90)" F77FLAGS="$(F77FLAGS)" AR="$(AR)"
+
+libblas.a:
+	cd BLAS; make F77="$(F90)" F77FLAGS="$(F77FLAGS)" AR="$(AR)"
+
+$(OBJ_INPUTPARSE):$(@:.o=.c) $(@:.o=.h)
+	$(CC) -c $(CFLAGS) $(@:.o=.c)
+
+.f.o:
+	$(F90) -c $(F77FLAGS) $*.f
+
+.f90.o:
+	$(F90) -c $(F90FLAGS) $*.f90
+
+.F.o:
+	$(F90) -c $(F77FLAGS) $(CPPFLAGS) $*.F
+
+.F90.o:
+	$(F90) -c $(F90FLAGS) $(CPPFLAGS) $*.F90
+
+clean:
+	\rm -f *.o *.mod *.a *.lib *.L *.list phase ekcal epsmain tdlrmain
+	\cd LAPACK; make clean
+	\cd BLAS; make clean
+	\cd EsmPack; make clean
+
+install: phase ekcal epsmain tdlrmain
+	\mv -f phase ../bin/
+	\mv -f ekcal ../bin/
+	\mv -f epsmain ../bin/
+	\mv -f tdlrmain ../bin/
diff -uprN phase0_2015.01/src_phase/Makefile.K phase0_2015.01.01/src_phase/Makefile.K
--- phase0_2015.01/src_phase/Makefile.K	2015-10-18 02:56:57.000000000 +0900
+++ phase0_2015.01.01/src_phase/Makefile.K	2016-07-28 03:57:30.708737196 +0900
@@ -1,6 +1,9 @@
 .SUFFIXES:
 .SUFFIXES: .o .F .f .F90 .f90 .c .mod
 
+###########################################################################
+###<< PLEASE CHANGE THE VARIABLES BELOW ACCORDING TO YOUR ENVIRONMENT >>###
+###########################################################################
 F90 = mpifrtpx
 CC  = fccpx
 CPP = 
@@ -10,7 +13,7 @@ F90FLAGS = -Nsetvalue -Kfast,parallel,op
 F77FLAGS = -Nsetvalue -Kfast,parallel,openmp,ocl,preex,array_private,auto,simd=2,mfunc=2 -c -V -Qa,d,i,p,t,x -Koptmsg=2
 CFLAGS   = -DINTEL -Kfast,parallel,openmp,ocl,preex,array_private,auto,simd=2,mfunc=2
 
-ESM=YES
+ESM = yes
 ifdef ESM
 CPPESM=-DENABLE_ESM_PACK
 LESM=-lesm
@@ -18,15 +21,15 @@ else
 CPPESM=
 endif
 
-INCLUDE=-I/home/apps/fftw/3.3/include
-
 CPPFLAGS = -DLinux -DFFTW3 -D_POT_SMOOTHING_ -DTRANSPOSE -DGGA_ATOMIC_WITH_NEW_GNCPP -DREMOVE_PC_FROM_FORCE -D_USE_LAPACK_ -D_POSITRON_ -D_FAST_WAY_ -D_USE_DATE_AND_TIME_ -DUSE_NONBLK_COMM -DRMM_NONLOCAL_NEW -D_USE_SCALAPACK_ -DMULT_PHASE_RSPACE ${CPPESM}
-LFLAGS   = -Kfast,parallel,openmp,ocl,preex,array_private,auto,simd=2,mfunc=2 #-W0,-zprl=Src2ex
-LIBS      = -L./ ${LESM} -Kopenmp -lm -SSL2MPI -SSL2BLAMP -SCALAPACK \
+LFLAGS = -Kfast,parallel,openmp,ocl,preex,array_private,auto,simd=2,mfunc=2 #-W0,-zprl=Src2ex
+INCLUDE=-I/home/apps/fftw/3.3/include
+LIBS = -L./ ${LESM} -Kopenmp -lm -SSL2MPI -SSL2BLAMP -SCALAPACK \
  -L/home/apps/fftw/3.3/lib64 -lfftw3 -lfftw3_omp -lfftw3_mpi $(KLINK) $(EIGEN_LIBS) #/opt/FJSVtclang/GM-1.2.0-13/lib64/libtofupa.o
 ###########################################################################
 ###<< PLEASE CHANGE THE VARIABLES ABOVE ACCORDING TO YOUR ENVIRONMENT >>###
 ###########################################################################
+
 ifdef ESM
 ESM_LIB = libesm.a
 else
@@ -34,9 +37,9 @@ ESM_LIB =
 endif
 LAPACK = 
 FFTOBJECT = 
-
 OBJ_INPUTPARSE = input_parse.o
 
+
 PHASE_LOWER_MODULES = m_Const_Parameters.o   m_ErrorMessages.o m_Parallelization.o \
 m_IterationNumbers.o
 
@@ -57,7 +60,7 @@ m_FiniteElectricField.o \
 m_ES_ExactExchange.o \
 m_ES_WF_by_SDorCG.o      m_ES_WF_by_Davidson.o \
 m_ES_WF_by_ModifiedDavidson.o m_ES_WF_by_RMM.o \
-m_Charge_Density.o       m_CD_mixing.o m_Dipole.o               m_Orbital_Population.o \
+m_Charge_Density.o    m_Orbital_Population.o    m_CD_mixing.o m_Dipole.o               \
 m_CD_Mag_Moment.o \
 m_epc_potential.o        \
 m_Positron_Wave_Functions.o \
@@ -181,7 +184,7 @@ m_ES_RSB.o \
 m_ES_WF_by_ModifiedDavidson.o m_ES_WF_by_RMM.o \
 m_epc_potential.o \
 m_Positron_Wave_Functions.o \
-m_Charge_Density.o       m_CD_mixing.o m_Dipole.o               m_Orbital_Population.o \
+m_Charge_Density.o    m_Orbital_Population.o    m_CD_mixing.o m_Dipole.o               \
 m_CD_Mag_Moment.o \
 m_ES_WF_mixing.o \
 m_Screening_FFT.o        m_Screening.o      m_External_Potential.o \
@@ -294,7 +297,7 @@ m_ES_LHXC.o              m_ES_Intgr_Vlhx
 m_ES_dos.o               m_Hubbard.o \
 m_epc_potential.o \
 m_vdWDF.o \
-m_Charge_Density.o       m_CD_mixing.o m_XC_Potential.o \
+m_Charge_Density.o    m_Orbital_Population.o   m_CD_mixing.o m_XC_Potential.o \
 m_CD_Mag_Moment.o \
 m_ES_WF_mixing.o \
 string.o m_db.o \
@@ -312,7 +315,6 @@ m_Ldos.o                 m_ELF.o \
 m_constraints.o \
 m_BerryPhase.o           m_BP_Properties.o \
 m_Representation.o       m_Raman.o       m_Phonon.o               m_Dipole.o \
-m_Orbital_Population.o   \
 m_Screening_FFT.o        m_Screening.o      m_External_Potential.o \
 m_Wannier.o m_Wannier90.o \
 m_ValenceBand_Spectrum.o   m_CoreLevel_Spectrum.o   \
@@ -409,7 +411,7 @@ m_ES_LHXC.o              m_ES_Intgr_Vlhx
 m_ES_dos.o               m_Hubbard.o \
 m_epc_potential.o \
 m_vdWDF.o \
-m_Charge_Density.o       m_CD_mixing.o m_XC_Potential.o \
+m_Charge_Density.o  m_Orbital_Population.o     m_CD_mixing.o m_XC_Potential.o \
 m_CD_Mag_Moment.o \
 m_ES_WF_mixing.o \
 string.o m_db.o \
@@ -427,7 +429,7 @@ m_Ldos.o                 m_ELF.o \
 m_constraints.o \
 m_BerryPhase.o           m_BP_Properties.o \
 m_Representation.o       m_Raman.o          m_Phonon.o               m_Dipole.o \
-m_Orbital_Population.o   m_Wannier.o m_Wannier90.o \
+m_Wannier.o m_Wannier90.o \
 m_Screening_FFT.o        m_Screening.o      m_External_Potential.o \
 m_Excitation.o \
 m_ES_occup_EPS.o        \
@@ -501,6 +503,7 @@ dgemm__.o
 endif
 
 
+
 PHASE_OBJECTS = $(FFTOBJECT) $(PHASE_LOWER_MODULES) $(PHASE_UPPER_MODULES) $(PHASE_F_SUBROUTINES) $(PHASE_F_OTHERSUBS) $(PHASE_OBJECTSF77) $(OBJ_INPUTPARSE)
 
 EKCAL_OBJECTS = $(FFTOBJECT) $(EKCAL_LOWER_MODULES) $(EKCAL_UPPER_MODULES) $(EKCAL_F_SUBROUTINES) $(EKCAL_F_OTHERSUBS) $(EKCAL_OBJECTSF77) $(OBJ_INPUTPARSE)
diff -uprN phase0_2015.01/src_phase/Makefile.frtpx phase0_2015.01.01/src_phase/Makefile.frtpx
--- phase0_2015.01/src_phase/Makefile.frtpx	2015-10-18 02:57:27.000000000 +0900
+++ phase0_2015.01.01/src_phase/Makefile.frtpx	2016-07-28 03:57:43.681118981 +0900
@@ -1,6 +1,9 @@
 .SUFFIXES:
 .SUFFIXES: .o .F .f .F90 .f90 .c .mod
 
+###########################################################################
+###<< PLEASE CHANGE THE VARIABLES BELOW ACCORDING TO YOUR ENVIRONMENT >>###
+###########################################################################
 F90 = mpifrtpx
 CC  = fccpx
 CPP = 
@@ -10,7 +13,7 @@ F90FLAGS = -Nsetvalue -Kfast,parallel,op
 F77FLAGS = -Nsetvalue -Kfast,parallel,openmp,ocl,preex,array_private,auto,simd=2,mfunc=2 -c -V -Qa,d,i,p,t,x -Koptmsg=2
 CFLAGS   = -DINTEL -Kfast,parallel,openmp,ocl,preex,array_private,auto,simd=2,mfunc=2
 
-ESM=YES
+ESM = yes
 ifdef ESM
 CPPESM=-DENABLE_ESM_PACK
 LESM=-lesm
@@ -18,14 +21,14 @@ else
 CPPESM=
 endif
 
-INCLUDE=-I/usr/local/fftw/3.3/include
-
 CPPFLAGS = -DLinux -DFFTW3 -D_POT_SMOOTHING_ -DTRANSPOSE -DGGA_ATOMIC_WITH_NEW_GNCPP -DREMOVE_PC_FROM_FORCE -D_USE_LAPACK_ -D_POSITRON_ -D_FAST_WAY_ -D_USE_DATE_AND_TIME_ -DUSE_NONBLK_COMM -DRMM_NONLOCAL_NEW -D_USE_SCALAPACK_ -DMULT_PHASE_RSPACE ${CPPESM}
-LFLAGS   = -Kfast,parallel,openmp,ocl,preex,array_private,auto,simd=2,mfunc=2 #-W0,-zprl=Src2ex
-LIBS      = -L./ ${LESM} -Kopenmp -SSL2BLAMP -SCALAPACK -L/usr/local/fftw/3.3/lib64 -lfftw3 -lfftw3_omp -lfftw3_mpi $(KLINK) #ScaLAPACK+FFTW3.3
+LFLAGS = -Kfast,parallel,openmp,ocl,preex,array_private,auto,simd=2,mfunc=2 #-W0,-zprl=Src2ex
+INCLUDE=-I/usr/local/fftw/3.3/include
+LIBS = -L./ ${LESM} -Kopenmp -SSL2BLAMP -SCALAPACK -L/usr/local/fftw/3.3/lib64 -lfftw3 -lfftw3_omp -lfftw3_mpi $(KLINK) #ScaLAPACK+FFTW3.3
 ###########################################################################
 ###<< PLEASE CHANGE THE VARIABLES ABOVE ACCORDING TO YOUR ENVIRONMENT >>###
 ###########################################################################
+
 ifdef ESM
 ESM_LIB = libesm.a
 else
@@ -33,9 +36,9 @@ ESM_LIB =
 endif
 LAPACK = 
 FFTOBJECT = 
-
 OBJ_INPUTPARSE = input_parse.o
 
+
 PHASE_LOWER_MODULES = m_Const_Parameters.o   m_ErrorMessages.o m_Parallelization.o \
 m_IterationNumbers.o
 
@@ -56,7 +59,7 @@ m_FiniteElectricField.o \
 m_ES_ExactExchange.o \
 m_ES_WF_by_SDorCG.o      m_ES_WF_by_Davidson.o \
 m_ES_WF_by_ModifiedDavidson.o m_ES_WF_by_RMM.o \
-m_Charge_Density.o       m_CD_mixing.o m_Dipole.o               m_Orbital_Population.o \
+m_Charge_Density.o    m_Orbital_Population.o    m_CD_mixing.o m_Dipole.o               \
 m_CD_Mag_Moment.o \
 m_epc_potential.o        \
 m_Positron_Wave_Functions.o \
@@ -180,7 +183,7 @@ m_ES_RSB.o \
 m_ES_WF_by_ModifiedDavidson.o m_ES_WF_by_RMM.o \
 m_epc_potential.o \
 m_Positron_Wave_Functions.o \
-m_Charge_Density.o       m_CD_mixing.o m_Dipole.o               m_Orbital_Population.o \
+m_Charge_Density.o    m_Orbital_Population.o    m_CD_mixing.o m_Dipole.o               \
 m_CD_Mag_Moment.o \
 m_ES_WF_mixing.o \
 m_Screening_FFT.o        m_Screening.o      m_External_Potential.o \
@@ -293,7 +296,7 @@ m_ES_LHXC.o              m_ES_Intgr_Vlhx
 m_ES_dos.o               m_Hubbard.o \
 m_epc_potential.o \
 m_vdWDF.o \
-m_Charge_Density.o       m_CD_mixing.o m_XC_Potential.o \
+m_Charge_Density.o    m_Orbital_Population.o   m_CD_mixing.o m_XC_Potential.o \
 m_CD_Mag_Moment.o \
 m_ES_WF_mixing.o \
 string.o m_db.o \
@@ -311,7 +314,6 @@ m_Ldos.o                 m_ELF.o \
 m_constraints.o \
 m_BerryPhase.o           m_BP_Properties.o \
 m_Representation.o       m_Raman.o       m_Phonon.o               m_Dipole.o \
-m_Orbital_Population.o   \
 m_Screening_FFT.o        m_Screening.o      m_External_Potential.o \
 m_Wannier.o m_Wannier90.o \
 m_ValenceBand_Spectrum.o   m_CoreLevel_Spectrum.o   \
@@ -408,7 +410,7 @@ m_ES_LHXC.o              m_ES_Intgr_Vlhx
 m_ES_dos.o               m_Hubbard.o \
 m_epc_potential.o \
 m_vdWDF.o \
-m_Charge_Density.o       m_CD_mixing.o m_XC_Potential.o \
+m_Charge_Density.o  m_Orbital_Population.o     m_CD_mixing.o m_XC_Potential.o \
 m_CD_Mag_Moment.o \
 m_ES_WF_mixing.o \
 string.o m_db.o \
@@ -426,7 +428,7 @@ m_Ldos.o                 m_ELF.o \
 m_constraints.o \
 m_BerryPhase.o           m_BP_Properties.o \
 m_Representation.o       m_Raman.o          m_Phonon.o               m_Dipole.o \
-m_Orbital_Population.o   m_Wannier.o m_Wannier90.o \
+m_Wannier.o m_Wannier90.o \
 m_Screening_FFT.o        m_Screening.o      m_External_Potential.o \
 m_Excitation.o \
 m_ES_occup_EPS.o        \
@@ -500,6 +502,7 @@ dgemm__.o
 endif
 
 
+
 PHASE_OBJECTS = $(FFTOBJECT) $(PHASE_LOWER_MODULES) $(PHASE_UPPER_MODULES) $(PHASE_F_SUBROUTINES) $(PHASE_F_OTHERSUBS) $(PHASE_OBJECTSF77) $(OBJ_INPUTPARSE)
 
 EKCAL_OBJECTS = $(FFTOBJECT) $(EKCAL_LOWER_MODULES) $(EKCAL_UPPER_MODULES) $(EKCAL_F_SUBROUTINES) $(EKCAL_F_OTHERSUBS) $(EKCAL_OBJECTSF77) $(OBJ_INPUTPARSE)
diff -uprN phase0_2015.01/src_phase/Makefile.gfortran+lapack+fftw+mpi phase0_2015.01.01/src_phase/Makefile.gfortran+lapack+fftw+mpi
--- phase0_2015.01/src_phase/Makefile.gfortran+lapack+fftw+mpi	2015-10-18 03:12:59.000000000 +0900
+++ phase0_2015.01.01/src_phase/Makefile.gfortran+lapack+fftw+mpi	2016-07-28 02:33:49.911020145 +0900
@@ -66,7 +66,7 @@ m_FiniteElectricField.o \
 m_ES_ExactExchange.o \
 m_ES_WF_by_SDorCG.o      m_ES_WF_by_Davidson.o \
 m_ES_WF_by_ModifiedDavidson.o m_ES_WF_by_RMM.o \
-m_Charge_Density.o       m_CD_mixing.o m_Dipole.o               m_Orbital_Population.o \
+m_Charge_Density.o    m_Orbital_Population.o    m_CD_mixing.o m_Dipole.o               \
 m_CD_Mag_Moment.o \
 m_epc_potential.o        \
 m_Positron_Wave_Functions.o \
@@ -190,7 +190,7 @@ m_ES_RSB.o \
 m_ES_WF_by_ModifiedDavidson.o m_ES_WF_by_RMM.o \
 m_epc_potential.o \
 m_Positron_Wave_Functions.o \
-m_Charge_Density.o       m_CD_mixing.o m_Dipole.o               m_Orbital_Population.o \
+m_Charge_Density.o    m_Orbital_Population.o    m_CD_mixing.o m_Dipole.o               \
 m_CD_Mag_Moment.o \
 m_ES_WF_mixing.o \
 m_Screening_FFT.o        m_Screening.o      m_External_Potential.o \
@@ -303,7 +303,7 @@ m_ES_LHXC.o              m_ES_Intgr_Vlhx
 m_ES_dos.o               m_Hubbard.o \
 m_epc_potential.o \
 m_vdWDF.o \
-m_Charge_Density.o       m_CD_mixing.o m_XC_Potential.o \
+m_Charge_Density.o    m_Orbital_Population.o   m_CD_mixing.o m_XC_Potential.o \
 m_CD_Mag_Moment.o \
 m_ES_WF_mixing.o \
 string.o m_db.o \
@@ -321,7 +321,6 @@ m_Ldos.o                 m_ELF.o \
 m_constraints.o \
 m_BerryPhase.o           m_BP_Properties.o \
 m_Representation.o       m_Raman.o       m_Phonon.o               m_Dipole.o \
-m_Orbital_Population.o   \
 m_Screening_FFT.o        m_Screening.o      m_External_Potential.o \
 m_Wannier.o m_Wannier90.o \
 m_ValenceBand_Spectrum.o   m_CoreLevel_Spectrum.o   \
@@ -418,7 +417,7 @@ m_ES_LHXC.o              m_ES_Intgr_Vlhx
 m_ES_dos.o               m_Hubbard.o \
 m_epc_potential.o \
 m_vdWDF.o \
-m_Charge_Density.o       m_CD_mixing.o m_XC_Potential.o \
+m_Charge_Density.o  m_Orbital_Population.o     m_CD_mixing.o m_XC_Potential.o \
 m_CD_Mag_Moment.o \
 m_ES_WF_mixing.o \
 string.o m_db.o \
@@ -436,7 +435,7 @@ m_Ldos.o                 m_ELF.o \
 m_constraints.o \
 m_BerryPhase.o           m_BP_Properties.o \
 m_Representation.o       m_Raman.o          m_Phonon.o               m_Dipole.o \
-m_Orbital_Population.o   m_Wannier.o m_Wannier90.o \
+m_Wannier.o m_Wannier90.o \
 m_Screening_FFT.o        m_Screening.o      m_External_Potential.o \
 m_Excitation.o \
 m_ES_occup_EPS.o        \
diff -uprN phase0_2015.01/src_phase/Makefile.gfortran.seq phase0_2015.01.01/src_phase/Makefile.gfortran.seq
--- phase0_2015.01/src_phase/Makefile.gfortran.seq	2015-10-18 04:20:08.000000000 +0900
+++ phase0_2015.01.01/src_phase/Makefile.gfortran.seq	2016-07-28 03:15:55.879420869 +0900
@@ -28,7 +28,6 @@ endif
 NO_MPI=YES
 CPPFLAGS = -DLinux -D_GNU_FORTRAN_ -DJRCATFFT_WS -DCD_JRCATFFT_WS -D_NO_MPI_ -DDISABLE_VDWDF -D_POT_SMOOTHING_ -DTRANSPOSE -DGGA_ATOMIC_WITH_NEW_GNCPP -DREMOVE_PC_FROM_FORCE -D_USE_LAPACK_ -D_POSITRON_ -D_FAST_WAY_ -D_USE_DATE_AND_TIME_ -DUSE_NONBLK_COMM -DRMM_NONLOCAL_NEW ${CPPESM}
 LFLAGS = 
-INCLUDE=-I
 LIBS =  -L./ -llapack -lblas  
 ###########################################################################
 ###<< PLEASE CHANGE THE VARIABLES ABOVE ACCORDING TO YOUR ENVIRONMENT >>###
@@ -64,7 +63,7 @@ m_FiniteElectricField.o \
 m_ES_ExactExchange.o \
 m_ES_WF_by_SDorCG.o      m_ES_WF_by_Davidson.o \
 m_ES_WF_by_ModifiedDavidson.o m_ES_WF_by_RMM.o \
-m_Charge_Density.o       m_CD_mixing.o m_Dipole.o               m_Orbital_Population.o \
+m_Charge_Density.o    m_Orbital_Population.o    m_CD_mixing.o m_Dipole.o               \
 m_CD_Mag_Moment.o \
 m_epc_potential.o        \
 m_Positron_Wave_Functions.o \
@@ -188,7 +187,7 @@ m_ES_RSB.o \
 m_ES_WF_by_ModifiedDavidson.o m_ES_WF_by_RMM.o \
 m_epc_potential.o \
 m_Positron_Wave_Functions.o \
-m_Charge_Density.o       m_CD_mixing.o m_Dipole.o               m_Orbital_Population.o \
+m_Charge_Density.o    m_Orbital_Population.o    m_CD_mixing.o m_Dipole.o               \
 m_CD_Mag_Moment.o \
 m_ES_WF_mixing.o \
 m_Screening_FFT.o        m_Screening.o      m_External_Potential.o \
@@ -301,7 +300,7 @@ m_ES_LHXC.o              m_ES_Intgr_Vlhx
 m_ES_dos.o               m_Hubbard.o \
 m_epc_potential.o \
 m_vdWDF.o \
-m_Charge_Density.o       m_CD_mixing.o m_XC_Potential.o \
+m_Charge_Density.o    m_Orbital_Population.o   m_CD_mixing.o m_XC_Potential.o \
 m_CD_Mag_Moment.o \
 m_ES_WF_mixing.o \
 string.o m_db.o \
@@ -319,7 +318,6 @@ m_Ldos.o                 m_ELF.o \
 m_constraints.o \
 m_BerryPhase.o           m_BP_Properties.o \
 m_Representation.o       m_Raman.o       m_Phonon.o               m_Dipole.o \
-m_Orbital_Population.o   \
 m_Screening_FFT.o        m_Screening.o      m_External_Potential.o \
 m_Wannier.o m_Wannier90.o \
 m_ValenceBand_Spectrum.o   m_CoreLevel_Spectrum.o   \
@@ -416,7 +414,7 @@ m_ES_LHXC.o              m_ES_Intgr_Vlhx
 m_ES_dos.o               m_Hubbard.o \
 m_epc_potential.o \
 m_vdWDF.o \
-m_Charge_Density.o       m_CD_mixing.o m_XC_Potential.o \
+m_Charge_Density.o  m_Orbital_Population.o     m_CD_mixing.o m_XC_Potential.o \
 m_CD_Mag_Moment.o \
 m_ES_WF_mixing.o \
 string.o m_db.o \
@@ -434,7 +432,7 @@ m_Ldos.o                 m_ELF.o \
 m_constraints.o \
 m_BerryPhase.o           m_BP_Properties.o \
 m_Representation.o       m_Raman.o          m_Phonon.o               m_Dipole.o \
-m_Orbital_Population.o   m_Wannier.o m_Wannier90.o \
+m_Wannier.o m_Wannier90.o \
 m_Screening_FFT.o        m_Screening.o      m_External_Potential.o \
 m_Excitation.o \
 m_ES_occup_EPS.o        \
diff -uprN phase0_2015.01/src_phase/Makefile.ifort+mkl phase0_2015.01.01/src_phase/Makefile.ifort+mkl
--- phase0_2015.01/src_phase/Makefile.ifort+mkl	1970-01-01 09:00:00.000000000 +0900
+++ phase0_2015.01.01/src_phase/Makefile.ifort+mkl	2016-07-12 12:51:19.000000000 +0900
@@ -0,0 +1,579 @@
+.SUFFIXES:
+.SUFFIXES: .o .F .f .F90 .f90 .c .mod
+
+# Platform    : GNU Linux (EM64T/AMD64)
+# Prog. model : Serial
+# Compiler    : Intel Fortran compiler
+# BLAS/LAPACK : Intel Math Kernel Library (MKL)
+# FFT         : Intel MKL with FFTW3 interface
+###########################################################################
+###<< PLEASE CHANGE THE VARIABLES BELOW ACCORDING TO YOUR ENVIRONMENT >>###
+###########################################################################
+F90 = ifort
+CC  = gcc -m64
+CPP = 
+AR  = ar -vq
+LINK = ifort
+F90FLAGS = -traceback -I./no_mpi/include
+F77FLAGS = -traceback -I./no_mpi/include
+CFLAGS = -O -DINTEL
+
+ESM = yes
+ifdef ESM
+CPPESM=-DENABLE_ESM_PACK
+LESM=-lesm
+else
+CPPESM=
+endif
+NO_MPI=YES
+CPPFLAGS = -DLinux -DFFTW3 -D_NO_MPI_ -D_NO_ARG_DUMMY_ -D_POT_SMOOTHING_ -DTRANSPOSE -DGGA_ATOMIC_WITH_NEW_GNCPP -DREMOVE_PC_FROM_FORCE -D_USE_LAPACK_ -D_POSITRON_ -D_FAST_WAY_ -D_USE_DATE_AND_TIME_ -DUSE_NONBLK_COMM -DRMM_NONLOCAL_NEW ${CPPESM}
+LFLAGS = 
+F90FLAGS_FIXED = -extend_source -Fl -fixed
+F90FLAGS_FREE = -extend_source -Fl
+MKLHOME=/opt/intel/mkl/lib/intel64
+INCLUDE=-I/opt/intel/mkl/lib/intel64/../../include/fftw
+LIBS = -L./ ${LESM}  -L/opt/intel/mkl/lib/intel64 -Wl,--start-group ${MKLHOME}/libmkl_scalapack_lp64.a ${MKLHOME}/libmkl_blacs_intelmpi_lp64.a ${MKLHOME}/libmkl_intel_lp64.a  ${MKLHOME}/libmkl_sequential.a  ${MKLHOME}/libmkl_core.a -Wl,--end-group -Bdynamic   -lpthread
+###########################################################################
+###<< PLEASE CHANGE THE VARIABLES ABOVE ACCORDING TO YOUR ENVIRONMENT >>###
+###########################################################################
+
+ifdef ESM
+ESM_LIB = libesm.a
+else
+ESM_LIB =
+endif
+LAPACK = 
+FFTOBJECT = 
+OBJ_INPUTPARSE = input_parse.o
+
+
+PHASE_LOWER_MODULES = m_Const_Parameters.o   m_ErrorMessages.o m_Parallelization.o \
+m_IterationNumbers.o
+
+PHASE_UPPER_MODULES = \
+m_Control_Parameters.o   m_Files.o                m_Timing.o \
+m_Crystal_Structure.o    m_FFT.o                  m_Ionic_System.o \
+m_CS_SpaceGroup.o        m_CS_Magnetic.o \
+m_Orbital_QuantumNum.o  \
+m_Kpoints.o              m_PseudoPotential.o      m_PlaneWaveBasisSet.o \
+m_Realspace.o \
+m_SpinOrbit_Potential.o  m_SpinOrbit_FromFile.o  \
+m_ES_NonCollinear.o \
+m_NonLocal_Potential.o   m_Electronic_Structure.o \
+m_ES_nonlocal.o	 	 m_ES_ortho.o  m_ES_wf_extrpl.o m_ES_initialWF.o 	\
+m_ES_occup.o \
+m_ES_RSB.o \
+m_FiniteElectricField.o \
+m_ES_ExactExchange.o \
+m_ES_WF_by_SDorCG.o      m_ES_WF_by_Davidson.o \
+m_ES_WF_by_ModifiedDavidson.o m_ES_WF_by_RMM.o \
+m_Charge_Density.o   m_Orbital_Population.o     m_CD_mixing.o m_Dipole.o               \
+m_CD_Mag_Moment.o \
+m_epc_potential.o        \
+m_Positron_Wave_Functions.o \
+m_ES_WF_mixing.o \
+m_Screening_FFT.o        m_Screening.o      m_External_Potential.o \
+m_ES_LHXC.o              m_ES_Intgr_VlhxcQlm.o    m_ES_IO.o \
+m_ES_WF_by_MatDiagon.o   m_ES_dos.o               m_Hubbard.o \
+m_KineticEnergy_Density.o  \
+m_KE_mixing.o \
+m_vdWDF.o \
+m_Ldos.o                 m_XC_Potential.o         \
+string.o m_db.o \
+m_PAW_Tecplot.o m_PAW_ChargeDensity.o m_PAW_Hartree.o  \
+m_PAW_XC_Potential.o \
+m_SpinOrbit_RadInt.o \
+m_OP_Moment.o \
+m_ES_Mag_Constraint.o  \
+m_Total_Energy.o \
+m_Force.o                m_Stress.o               m_ES_WF_by_submat.o \
+m_UnitCell.o \
+m_constraints.o \
+m_ELF.o \
+m_BerryPhase.o           m_BP_Properties.o \
+m_Representation.o       m_Raman.o            m_Phonon.o \
+m_Wannier.o m_Wannier90.o m_Replica.o Renewal_of_ChgCtrlParam.o \
+m_LinearResponse_Control.o  \
+m_ValenceBand_Spectrum.o   m_CoreLevel_Spectrum.o  \
+m_Excitation.o \
+m_ES_occup_EPS.o         m_Epsilon_ek.o \
+m_LinearResponse_Qpt.o  \
+m_rttddft.o \
+m_Potential_Mixing.o       m_ThomasFermiW_Potential.o  \
+m_SpinOrbit_ForceTheorem.o
+
+PHASE_F_SUBROUTINES = Preparation.o \
+InputData_Analysis.o           PseudoPotential_Construction.o 
+
+PHASE_F_OTHERSUBS  = mdmain0.o    constraint_main.o meta_dynamics.o NEB.o Preparation_for_mpi.o \
+Preparation_for_ESM.o \
+scf_routines.o \
+UnitaryTransform_WF.o \
+bottom_Subroutines.o              \
+spline.o \
+b_Crystal_Structure.o             b_Electronic_Structure.o \
+b_Words.o                         b_PseudoPotential.o \
+b_PseudoPotential_EXX.o \
+b_Fermi.o                         b_Kpoints.o \
+b_PlaneWaveBasisSet.o             b_Ionic_System.o \
+b_XC_Potential.o                  b_PAW_XC_Potential.o \
+b_XC_OmegaPBE.o  \
+b_XC_metagga.o \
+b_Ldos_f77.o\
+input_interface.o                 Initialization.o \
+WriteDownData_onto_Files.o        Ewald_and_Structure_Factor.o \
+Initial_Electronic_Structure.o \
+Renewal_of_WaveFunctions.o \
+Renewal_of_pWaveFunctions.o \
+IterationNumbers_Setting.o        ChargeDensity_Construction.o \
+ChargeDensity_Mixing.o            Renewal_of_Potential.o \
+Renewal_of_Hubbard_Potential.o    Renewal_of_OccMat.o \
+Renewal_of_pPotential.o           Renewal_of_Hubbard_Parameters.o \
+Convergence_Check.o               Forces.o \
+Move_Ions.o                       Initial_MD_Condition.o \
+Stress.o                          Postprocessing.o \
+Finalization_of_mpi.o             miscellaneous.o\
+b_BerryPhase.o \
+heap_sort.o                       real_spherical_harmonics.o \
+Real_space_integ.o                crotylm.o \
+screening_correction.o \
+Initialization_Epsilon.o               Shift_Kpoint.o \
+Reset_Kpoint.o                         Preparation_for_Calc_Epsilon.o \
+Transition_moment_Epsilon.o            Calc_Epsilon.o \
+Nonlinear_Optics_Epsilon.o             WriteDownData_onto_Files_Epsilon.o \
+PseudoPotential_ek_Epsilon.o           Dealloc_Radr_and_Wos_Epsilon.o \
+mpi_dummy.o                            WriteDownData_onto_Files_ek.o \
+GaussLeg.o lib_int_deri_add.o  \
+rttddft_main.o \
+Potential_Construction.o        Potential_Mixing.o             ThomasFermiWeiz.o \
+Epsilon_postscf.o vdW.o
+
+# for vc_nl
+NLOBJ = vc_nl.o
+
+ifndef SX_DGEMM
+PHASE_OBJECTSF77 = \
+b_PseudoPotential_f77.o \
+b_Force_f77.o \
+rmmsubs.o \
+gncpp_xc_gga_rad.o  \
+decfft_ent.o \
+spg+tetra.o
+else
+PHASE_OBJECTSF77 = \
+b_PseudoPotential_f77.o \
+b_Force_f77.o \
+rmmsubs.o \
+decfft_ent.o \
+gncpp_xc_gga_rad.o  \
+spg+tetra.o \
+dgemm__.o 
+endif
+
+EKCAL_LOWER_MODULES = m_Const_Parameters.o   m_ErrorMessages.o m_Parallelization.o \
+m_IterationNumbers.o
+
+EKCAL_UPPER_MODULES = \
+m_Control_Parameters.o   m_Files.o                m_Timing.o \
+m_Crystal_Structure.o    m_FFT.o                  m_Ionic_System.o \
+m_CS_SpaceGroup.o        m_CS_Magnetic.o \
+m_Orbital_QuantumNum.o  \
+m_Kpoints.o              m_PseudoPotential.o      m_PlaneWaveBasisSet.o \
+m_Realspace.o \
+m_SpinOrbit_Potential.o  m_SpinOrbit_FromFile.o  \
+m_ES_NonCollinear.o \
+m_NonLocal_Potential.o   m_Electronic_Structure.o \
+m_ES_nonlocal.o	 	 m_ES_ortho.o  m_ES_wf_extrpl.o m_ES_initialWF.o 	\
+m_FiniteElectricField.o \
+m_ES_ExactExchange.o \
+m_ES_occup.o             m_ES_WF_by_SDorCG.o      m_ES_WF_by_Davidson.o \
+m_ES_RSB.o \
+m_ES_WF_by_ModifiedDavidson.o m_ES_WF_by_RMM.o \
+m_epc_potential.o \
+m_Positron_Wave_Functions.o \
+m_Charge_Density.o    m_Orbital_Population.o    m_CD_mixing.o m_Dipole.o               \
+m_CD_Mag_Moment.o \
+m_ES_WF_mixing.o \
+m_Screening_FFT.o        m_Screening.o      m_External_Potential.o \
+m_ES_LHXC.o              m_ES_Intgr_VlhxcQlm.o    m_ES_IO.o \
+m_ES_WF_by_MatDiagon.o   m_ES_dos.o               m_Hubbard.o \
+m_KineticEnergy_Density.o  \
+m_KE_mixing.o \
+m_vdWDF.o \
+m_Ldos.o                 m_XC_Potential.o         \
+string.o m_db.o \
+m_PAW_Tecplot.o m_PAW_ChargeDensity.o m_PAW_Hartree.o  \
+m_PAW_XC_Potential.o \
+m_SpinOrbit_RadInt.o \
+m_ES_Mag_Constraint.o  \
+m_constraints.o \
+m_Total_Energy.o \
+m_OP_Moment.o \
+m_Force.o                m_Stress.o m_ES_WF_by_submat.o\
+m_UnitCell.o \
+m_ELF.o                  m_Wannier.o \
+m_Wannier90.o \
+m_BerryPhase.o           m_BP_Properties.o \
+m_Representation.o       m_Raman.o               m_Phonon.o \
+m_LinearResponse_Control.o  \
+m_ValenceBand_Spectrum.o   m_CoreLevel_Spectrum.o \
+m_Excitation.o \
+m_LinearResponse_Qpt.o \
+m_Potential_Mixing.o       m_ThomasFermiW_Potential.o \
+m_SpinOrbit_ForceTheorem.o
+
+
+EKCAL_F_SUBROUTINES = Preparation.o \
+InputData_Analysis.o           PseudoPotential_Construction.o
+
+EKCAL_F_OTHERSUBS  = ekmain.o    constraint_main.o            Preparation_for_mpi.o \
+scf_routines.o \
+UnitaryTransform_WF.o \
+bottom_Subroutines.o                   \
+spline.o \
+b_Crystal_Structure.o                  b_Electronic_Structure.o \
+b_Words.o                              b_PseudoPotential.o \
+b_PseudoPotential_EXX.o \
+b_Fermi.o                              b_Kpoints.o \
+b_PlaneWaveBasisSet.o                  b_Ionic_System.o \
+b_XC_Potential.o                       b_PAW_XC_Potential.o \
+b_XC_OmegaPBE.o  \
+b_XC_metagga.o \
+b_Ldos_f77.o \
+input_interface.o                      Initialization.o \
+Ewald_and_Structure_Factor.o \
+Initial_Electronic_Structure.o         Renewal_of_WaveFunctions.o \
+IterationNumbers_Setting.o \
+ChargeDensity_Construction.o                 Renewal_of_Potential.o \
+ChargeDensity_Mixing.o \
+Renewal_of_Hubbard_Potential.o         Renewal_of_OccMat.o \
+Convergence_Check.o                    Forces.o \
+WriteDownData_onto_Files.o \
+Initial_MD_Condition.o                 WriteDownData_onto_Files_ek.o\
+Stress.o                               Postprocessing.o \
+Finalization_of_mpi.o \
+miscellaneous.o \
+b_BerryPhase.o \
+heap_sort.o                            real_spherical_harmonics.o \
+Real_space_integ.o                     crotylm.o \
+screening_correction.o \
+mpi_dummy.o \
+GaussLeg.o lib_int_deri_add.o \
+ThomasFermiWeiz.o       Renewal_of_ChgCtrlParam.o \
+vdW.o
+
+ifndef SX_DGEMM
+EKCAL_OBJECTSF77 = \
+b_PseudoPotential_f77.o \
+b_Force_f77.o \
+rmmsubs.o                              spg+tetra.o \
+gncpp_xc_gga_rad.o  \
+decfft_ent.o
+else
+EKCAL_OBJECTSF77 = \
+b_PseudoPotential_f77.o \
+b_Force_f77.o \
+rmmsubs.o                              spg+tetra.o \
+decfft_ent.o \
+gncpp_xc_gga_rad.o  \
+dgemm__.o 
+endif
+
+
+EPS_LOWER_MODULES = m_Const_Parameters.o   m_ErrorMessages.o m_Parallelization.o \
+m_IterationNumbers.o
+
+EPS_UPPER_MODULES = \
+m_Control_Parameters.o   m_Files.o                m_Timing.o \
+m_Crystal_Structure.o    m_FFT.o                  m_Ionic_System.o \
+m_CS_SpaceGroup.o        m_CS_Magnetic.o \
+m_Orbital_QuantumNum.o  \
+m_Kpoints.o              m_PseudoPotential.o      m_PlaneWaveBasisSet.o \
+m_Realspace.o \
+m_SpinOrbit_Potential.o  m_SpinOrbit_FromFile.o  \
+m_ES_NonCollinear.o \
+m_NonLocal_Potential.o   m_Electronic_Structure.o \
+m_ES_nonlocal.o	 	 m_ES_ortho.o  m_ES_wf_extrpl.o m_ES_initialWF.o 	\
+m_FiniteElectricField.o \
+m_Positron_Wave_Functions.o \
+m_ES_ExactExchange.o \
+m_ES_occup.o             m_ES_WF_by_SDorCG.o      m_ES_WF_by_Davidson.o \
+m_ES_RSB.o \
+m_ES_WF_by_ModifiedDavidson.o m_ES_WF_by_RMM.o \
+m_ES_LHXC.o              m_ES_Intgr_VlhxcQlm.o    m_ES_IO.o \
+m_ES_dos.o               m_Hubbard.o \
+m_epc_potential.o \
+m_vdWDF.o \
+m_Charge_Density.o   m_Orbital_Population.o     m_CD_mixing.o m_XC_Potential.o \
+m_CD_Mag_Moment.o \
+m_ES_WF_mixing.o \
+string.o m_db.o \
+m_PAW_Tecplot.o m_PAW_ChargeDensity.o m_PAW_Hartree.o  \
+m_PAW_XC_Potential.o \
+m_SpinOrbit_RadInt.o \
+m_ES_Mag_Constraint.o  \
+m_Total_Energy.o \
+m_OP_Moment.o \
+m_Force.o          m_Stress.o      m_ES_WF_by_submat.o      m_ES_WF_by_MatDiagon.o \
+m_UnitCell.o \
+m_KineticEnergy_Density.o  \
+m_KE_mixing.o \
+m_Ldos.o                 m_ELF.o \
+m_constraints.o \
+m_BerryPhase.o           m_BP_Properties.o \
+m_Representation.o       m_Raman.o       m_Phonon.o               m_Dipole.o \
+m_Screening_FFT.o        m_Screening.o      m_External_Potential.o \
+m_Wannier.o m_Wannier90.o \
+m_ValenceBand_Spectrum.o   m_CoreLevel_Spectrum.o   \
+m_Excitation.o \
+m_ES_occup_EPS.o         m_Epsilon_ek.o \
+m_LinearResponse_Control.o  \
+m_LinearResponse_Qpt.o \
+m_Potential_Mixing.o       m_ThomasFermiW_Potential.o \
+m_SpinOrbit_ForceTheorem.o 
+
+EPS_F_SUBROUTINES = Preparation.o \
+InputData_Analysis.o           PseudoPotential_Construction.o
+
+EPS_F_OTHERSUBS  = epsmain.o           constraint_main.o Preparation_for_mpi.o \
+scf_routines.o \
+UnitaryTransform_WF.o \
+bottom_Subroutines.o                   \
+spline.o \
+b_Crystal_Structure.o                  b_Electronic_Structure.o \
+b_Words.o                              b_PseudoPotential.o \
+b_PseudoPotential_EXX.o \
+b_Fermi.o                              b_Kpoints.o \
+b_PlaneWaveBasisSet.o                  b_Ionic_System.o \
+b_XC_Potential.o                       b_PAW_XC_Potential.o \
+b_XC_OmegaPBE.o  \
+b_XC_metagga.o \
+b_Ldos_f77.o \
+input_interface.o                      Initialization.o \
+Ewald_and_Structure_Factor.o \
+Initial_Electronic_Structure.o         Renewal_of_WaveFunctions.o \
+IterationNumbers_Setting.o \
+ChargeDensity_Construction.o                 Renewal_of_Potential.o \
+ChargeDensity_Mixing.o \
+Renewal_of_Hubbard_Potential.o         Renewal_of_OccMat.o \
+Convergence_Check.o                    Forces.o \
+WriteDownData_onto_Files.o \
+Initial_MD_Condition.o                 WriteDownData_onto_Files_ek.o\
+Stress.o                               Postprocessing.o \
+Finalization_of_mpi.o \
+miscellaneous.o \
+b_BerryPhase.o \
+heap_sort.o                            real_spherical_harmonics.o \
+Real_space_integ.o                     crotylm.o \
+screening_correction.o \
+Initialization_Epsilon.o               Shift_Kpoint.o \
+Reset_Kpoint.o                         Preparation_for_Calc_Epsilon.o \
+Transition_moment_Epsilon.o            Calc_Epsilon.o \
+Nonlinear_Optics_Epsilon.o             WriteDownData_onto_Files_Epsilon.o \
+PseudoPotential_ek_Epsilon.o           Dealloc_Radr_and_Wos_Epsilon.o \
+ThomasFermiWeiz.o         Renewal_of_ChgCtrlParam.o  \
+mpi_dummy.o \
+GaussLeg.o lib_int_deri_add.o \
+vdW.o
+
+ifndef SX_DGEMM
+EPS_OBJECTSF77 = \
+b_PseudoPotential_f77.o \
+b_Force_f77.o \
+rmmsubs.o                              spg+tetra.o \
+decfft_ent.o \
+gncpp_xc_gga_rad.o  
+else
+EPS_OBJECTSF77 = \
+b_PseudoPotential_f77.o \
+b_Force_f77.o \
+rmmsubs.o                              spg+tetra.o \
+decfft_ent.o \
+gncpp_xc_gga_rad.o  \
+dgemm__.o
+endif
+
+
+TDLR_LOWER_MODULES = m_Const_Parameters.o   m_ErrorMessages.o m_Parallelization.o \
+m_IterationNumbers.o
+
+TDLR_UPPER_MODULES = \
+m_Control_Parameters.o   m_Files.o                m_Timing.o \
+m_Crystal_Structure.o    m_FFT.o                  m_Ionic_System.o \
+m_CS_SpaceGroup.o        m_CS_Magnetic.o \
+m_Orbital_QuantumNum.o  \
+m_Kpoints.o              m_PseudoPotential.o      m_PlaneWaveBasisSet.o \
+m_Realspace.o \
+m_SpinOrbit_Potential.o  m_SpinOrbit_FromFile.o  \
+m_ES_NonCollinear.o \
+m_NonLocal_Potential.o   m_Electronic_Structure.o \
+m_ES_nonlocal.o	 	 m_ES_ortho.o  m_ES_wf_extrpl.o m_ES_initialWF.o 	\
+m_FiniteElectricField.o \
+m_Positron_Wave_Functions.o \
+m_ES_ExactExchange.o \
+m_ES_occup.o             m_ES_WF_by_SDorCG.o      m_ES_WF_by_Davidson.o \
+m_ES_RSB.o \
+m_ES_WF_by_ModifiedDavidson.o m_ES_WF_by_RMM.o \
+m_ES_LHXC.o              m_ES_Intgr_VlhxcQlm.o    m_ES_IO.o \
+m_ES_dos.o               m_Hubbard.o \
+m_epc_potential.o \
+m_vdWDF.o \
+m_Charge_Density.o   m_Orbital_Population.o     m_CD_mixing.o m_XC_Potential.o \
+m_CD_Mag_Moment.o \
+m_ES_WF_mixing.o \
+string.o m_db.o \
+m_PAW_Tecplot.o m_PAW_ChargeDensity.o m_PAW_Hartree.o  \
+m_PAW_XC_Potential.o \
+m_SpinOrbit_RadInt.o \
+m_ES_Mag_Constraint.o  \
+m_Total_Energy.o \
+m_OP_Moment.o \
+m_Force.o                m_Stress.o m_ES_WF_by_submat.o      m_ES_WF_by_MatDiagon.o \
+m_UnitCell.o \
+m_KineticEnergy_Density.o  \
+m_KE_mixing.o \
+m_Ldos.o                 m_ELF.o \
+m_constraints.o \
+m_BerryPhase.o           m_BP_Properties.o \
+m_Representation.o       m_Raman.o          m_Phonon.o               m_Dipole.o \
+m_Screening_FFT.o        m_Screening.o      m_External_Potential.o \
+m_Excitation.o \
+m_ES_occup_EPS.o        \
+m_LinearResponse_Control.o  \
+m_ValenceBand_Spectrum.o  m_CoreLevel_Spectrum.o   \
+m_LinearResponse_Qpt.o \
+m_LinearResponse_Tools.o     m_LinearResponse_Density.o  \
+m_LinearResponse_NonInt.o    m_LinearResponse_ALDA.o \
+m_LinearResponse_Kernel.o    m_LinearResponse_BS.o \
+m_LinearResponse_Spectrum.o \
+m_Potential_Mixing.o       m_ThomasFermiW_Potential.o \
+m_SpinOrbit_ForceTheorem.o 
+
+TDLR_F_SUBROUTINES = Preparation.o \
+InputData_Analysis.o           PseudoPotential_Construction.o 
+
+TDLR_F_OTHERSUBS  = tdlrmain.o         constraint_main.o  Preparation_for_mpi.o \
+scf_routines.o \
+UnitaryTransform_WF.o \
+bottom_Subroutines.o                   \
+spline.o \
+b_Crystal_Structure.o                  b_Electronic_Structure.o \
+b_Words.o                              b_PseudoPotential.o \
+b_PseudoPotential_EXX.o \
+b_Fermi.o                              b_Kpoints.o \
+b_PlaneWaveBasisSet.o                  b_Ionic_System.o \
+b_XC_Potential.o                       b_PAW_XC_Potential.o \
+b_XC_OmegaPBE.o  \
+b_XC_metagga.o \
+b_Ldos_f77.o \
+input_interface.o                      Initialization.o \
+Ewald_and_Structure_Factor.o \
+Initial_Electronic_Structure.o         Renewal_of_WaveFunctions.o \
+IterationNumbers_Setting.o \
+ChargeDensity_Construction.o                 Renewal_of_Potential.o \
+ChargeDensity_Mixing.o \
+Renewal_of_Hubbard_Potential.o         Renewal_of_OccMat.o \
+Convergence_Check.o                    Forces.o \
+WriteDownData_onto_Files.o \
+Initial_MD_Condition.o                 WriteDownData_onto_Files_ek.o \
+Stress.o                               Postprocessing.o \
+Finalization_of_mpi.o \
+miscellaneous.o \
+b_BerryPhase.o \
+heap_sort.o                            real_spherical_harmonics.o \
+Real_space_integ.o                     crotylm.o \
+screening_correction.o \
+PseudoPotential_ek_Epsilon.o           \
+mpi_dummy.o \
+GaussLeg.o lib_int_deri_add.o \
+ThomasFermiWeiz.o       Renewal_of_ChgCtrlParam.o \
+b_LinearResponse_Kernel.o  b_LinearResponse_exc.o \
+LinearResponse_Proc.o     LinearResponse_Spec.o \
+vdW.o
+
+ifndef SX_DGEMM
+TDLR_OBJECTSF77 = \
+b_PseudoPotential_f77.o \
+b_Force_f77.o \
+rmmsubs.o                              spg+tetra.o \
+decfft_ent.o \
+gncpp_xc_gga_rad.o  
+else
+TDLR_OBJECTSF77 = \
+b_PseudoPotential_f77.o \
+b_Force_f77.o \
+rmmsubs.o                              spg+tetra.o \
+decfft_ent.o \
+gncpp_xc_gga_rad.o  \
+dgemm__.o
+endif
+
+
+
+PHASE_OBJECTS = $(FFTOBJECT) $(PHASE_LOWER_MODULES) $(PHASE_UPPER_MODULES) $(PHASE_F_SUBROUTINES) $(PHASE_F_OTHERSUBS) $(PHASE_OBJECTSF77) $(OBJ_INPUTPARSE)
+
+EKCAL_OBJECTS = $(FFTOBJECT) $(EKCAL_LOWER_MODULES) $(EKCAL_UPPER_MODULES) $(EKCAL_F_SUBROUTINES) $(EKCAL_F_OTHERSUBS) $(EKCAL_OBJECTSF77) $(OBJ_INPUTPARSE)
+
+EPS_OBJECTS = $(FFTOBJECT) $(EPS_LOWER_MODULES) $(EPS_UPPER_MODULES) $(EPS_F_SUBROUTINES) $(EPS_F_OTHERSUBS) $(EPS_OBJECTSF77) $(OBJ_INPUTPARSE)
+
+TDLR_OBJECTS = $(FFTOBJECT) $(TDLR_LOWER_MODULES) $(TDLR_UPPER_MODULES) $(TDLR_F_SUBROUTINES) $(TDLR_F_OTHERSUBS) $(TDLR_OBJECTSF77) $(OBJ_INPUTPARSE)
+
+all : phase ekcal epsmain tdlrmain
+
+ifdef ESM
+phase : $(ESM_LIB) $(LAPACK) $(PHASE_OBJECTS) $(NLOBJ)
+	$(LINK) $(PHASE_OBJECTS) $(NLOBJ) $(LFLAGS) $(LIBS) -o $@
+else
+phase : $(LAPACK) $(PHASE_OBJECTS) $(NLOBJ)
+	$(LINK) $(PHASE_OBJECTS) $(NLOBJ) $(LFLAGS) $(LIBS) -o $@
+endif
+
+ekcal : $(LAPACK) $(EKCAL_OBJECTS) $(NLOBJ)
+	$(LINK) $(EKCAL_OBJECTS) $(NLOBJ) $(LFLAGS) $(LIBS) -o $@
+
+epsmain : $(LAPACK) $(EPS_OBJECTS) $(NLOBJ)
+	$(LINK) $(EPS_OBJECTS) $(NLOBJ) $(LFLAGS) $(LIBS) -o $@
+
+tdlrmain : $(LAPACK) $(TDLR_OBJECTS) $(NLOBJ)
+	$(LINK) $(TDLR_OBJECTS) $(NLOBJ) $(LFLAGS) $(LIBS) -o $@
+
+ifdef NO_MPI
+libesm.a:
+	cd EsmPack; make INCLUDE="$(INCLUDE)" FORTRAN="$(F90)" LIBFLAG="$(LIBS)" MPIFLAG="" AR="$(AR)"
+else
+libesm.a:
+	cd EsmPack; make INCLUDE="$(INCLUDE)" FORTRAN="$(F90)" LIBFLAG="$(LIBS)" MPIFLAG="-D__MPI__" AR="$(AR)"
+endif
+
+liblapack.a:
+	cd LAPACK; make F77="$(F90)" F77FLAGS="$(F77FLAGS)" AR="$(AR)"
+
+libblas.a:
+	cd BLAS; make F77="$(F90)" F77FLAGS="$(F77FLAGS)" AR="$(AR)"
+
+$(OBJ_INPUTPARSE):$(@:.o=.c) $(@:.o=.h)
+	$(CC) -c $(CFLAGS) $(@:.o=.c)
+
+.f.o:
+	$(F90) -c $(F77FLAGS) $*.f
+
+.f90.o:
+	$(F90) -c $(F90FLAGS) $*.f90
+
+.F.o:
+	$(F90) -c $(F77FLAGS) $(CPPFLAGS) $*.F
+
+.F90.o:
+	$(F90) -c $(F90FLAGS) $(CPPFLAGS) $*.F90
+
+clean:
+	\rm -f *.o *.mod *.a *.lib *.L *.list phase ekcal epsmain tdlrmain
+	\cd LAPACK; make clean
+	\cd BLAS; make clean
+	\cd EsmPack; make clean
+
+install: phase ekcal epsmain tdlrmain
+	\mv -f phase ../bin/
+	\mv -f ekcal ../bin/
+	\mv -f epsmain ../bin/
+	\mv -f tdlrmain ../bin/
diff -uprN phase0_2015.01/src_phase/Makefile.ifort+mpi phase0_2015.01.01/src_phase/Makefile.ifort+mpi
--- phase0_2015.01/src_phase/Makefile.ifort+mpi	2015-10-18 13:05:00.000000000 +0900
+++ phase0_2015.01.01/src_phase/Makefile.ifort+mpi	2016-07-28 01:44:48.646331806 +0900
@@ -72,7 +72,7 @@ m_FiniteElectricField.o \
 m_ES_ExactExchange.o \
 m_ES_WF_by_SDorCG.o      m_ES_WF_by_Davidson.o \
 m_ES_WF_by_ModifiedDavidson.o m_ES_WF_by_RMM.o \
-m_Charge_Density.o       m_CD_mixing.o m_Dipole.o               m_Orbital_Population.o \
+m_Charge_Density.o    m_Orbital_Population.o    m_CD_mixing.o m_Dipole.o               \
 m_CD_Mag_Moment.o \
 m_epc_potential.o        \
 m_Positron_Wave_Functions.o \
@@ -196,7 +196,7 @@ m_ES_RSB.o \
 m_ES_WF_by_ModifiedDavidson.o m_ES_WF_by_RMM.o \
 m_epc_potential.o \
 m_Positron_Wave_Functions.o \
-m_Charge_Density.o       m_CD_mixing.o m_Dipole.o               m_Orbital_Population.o \
+m_Charge_Density.o    m_Orbital_Population.o    m_CD_mixing.o m_Dipole.o               \
 m_CD_Mag_Moment.o \
 m_ES_WF_mixing.o \
 m_Screening_FFT.o        m_Screening.o      m_External_Potential.o \
@@ -309,7 +309,7 @@ m_ES_LHXC.o              m_ES_Intgr_Vlhx
 m_ES_dos.o               m_Hubbard.o \
 m_epc_potential.o \
 m_vdWDF.o \
-m_Charge_Density.o       m_CD_mixing.o m_XC_Potential.o \
+m_Charge_Density.o    m_Orbital_Population.o   m_CD_mixing.o m_XC_Potential.o \
 m_CD_Mag_Moment.o \
 m_ES_WF_mixing.o \
 string.o m_db.o \
@@ -327,7 +327,6 @@ m_Ldos.o                 m_ELF.o \
 m_constraints.o \
 m_BerryPhase.o           m_BP_Properties.o \
 m_Representation.o       m_Raman.o       m_Phonon.o               m_Dipole.o \
-m_Orbital_Population.o   \
 m_Screening_FFT.o        m_Screening.o      m_External_Potential.o \
 m_Wannier.o m_Wannier90.o \
 m_ValenceBand_Spectrum.o   m_CoreLevel_Spectrum.o   \
@@ -424,7 +423,7 @@ m_ES_LHXC.o              m_ES_Intgr_Vlhx
 m_ES_dos.o               m_Hubbard.o \
 m_epc_potential.o \
 m_vdWDF.o \
-m_Charge_Density.o       m_CD_mixing.o m_XC_Potential.o \
+m_Charge_Density.o  m_Orbital_Population.o     m_CD_mixing.o m_XC_Potential.o \
 m_CD_Mag_Moment.o \
 m_ES_WF_mixing.o \
 string.o m_db.o \
@@ -442,7 +441,7 @@ m_Ldos.o                 m_ELF.o \
 m_constraints.o \
 m_BerryPhase.o           m_BP_Properties.o \
 m_Representation.o       m_Raman.o          m_Phonon.o               m_Dipole.o \
-m_Orbital_Population.o   m_Wannier.o m_Wannier90.o \
+m_Wannier.o m_Wannier90.o \
 m_Screening_FFT.o        m_Screening.o      m_External_Potential.o \
 m_Excitation.o \
 m_ES_occup_EPS.o        \
diff -uprN phase0_2015.01/src_phase/Makefile.ifort+omp phase0_2015.01.01/src_phase/Makefile.ifort+omp
--- phase0_2015.01/src_phase/Makefile.ifort+omp	2015-10-18 13:34:57.000000000 +0900
+++ phase0_2015.01.01/src_phase/Makefile.ifort+omp	2016-07-28 02:02:00.574829082 +0900
@@ -72,7 +72,7 @@ m_FiniteElectricField.o \
 m_ES_ExactExchange.o \
 m_ES_WF_by_SDorCG.o      m_ES_WF_by_Davidson.o \
 m_ES_WF_by_ModifiedDavidson.o m_ES_WF_by_RMM.o \
-m_Charge_Density.o       m_CD_mixing.o m_Dipole.o               m_Orbital_Population.o \
+m_Charge_Density.o    m_Orbital_Population.o    m_CD_mixing.o m_Dipole.o               \
 m_CD_Mag_Moment.o \
 m_epc_potential.o        \
 m_Positron_Wave_Functions.o \
@@ -196,7 +196,7 @@ m_ES_RSB.o \
 m_ES_WF_by_ModifiedDavidson.o m_ES_WF_by_RMM.o \
 m_epc_potential.o \
 m_Positron_Wave_Functions.o \
-m_Charge_Density.o       m_CD_mixing.o m_Dipole.o               m_Orbital_Population.o \
+m_Charge_Density.o    m_Orbital_Population.o    m_CD_mixing.o m_Dipole.o               \
 m_CD_Mag_Moment.o \
 m_ES_WF_mixing.o \
 m_Screening_FFT.o        m_Screening.o      m_External_Potential.o \
@@ -309,7 +309,7 @@ m_ES_LHXC.o              m_ES_Intgr_Vlhx
 m_ES_dos.o               m_Hubbard.o \
 m_epc_potential.o \
 m_vdWDF.o \
-m_Charge_Density.o       m_CD_mixing.o m_XC_Potential.o \
+m_Charge_Density.o    m_Orbital_Population.o   m_CD_mixing.o m_XC_Potential.o \
 m_CD_Mag_Moment.o \
 m_ES_WF_mixing.o \
 string.o m_db.o \
@@ -327,7 +327,6 @@ m_Ldos.o                 m_ELF.o \
 m_constraints.o \
 m_BerryPhase.o           m_BP_Properties.o \
 m_Representation.o       m_Raman.o       m_Phonon.o               m_Dipole.o \
-m_Orbital_Population.o   \
 m_Screening_FFT.o        m_Screening.o      m_External_Potential.o \
 m_Wannier.o m_Wannier90.o \
 m_ValenceBand_Spectrum.o   m_CoreLevel_Spectrum.o   \
@@ -424,7 +423,7 @@ m_ES_LHXC.o              m_ES_Intgr_Vlhx
 m_ES_dos.o               m_Hubbard.o \
 m_epc_potential.o \
 m_vdWDF.o \
-m_Charge_Density.o       m_CD_mixing.o m_XC_Potential.o \
+m_Charge_Density.o  m_Orbital_Population.o     m_CD_mixing.o m_XC_Potential.o \
 m_CD_Mag_Moment.o \
 m_ES_WF_mixing.o \
 string.o m_db.o \
@@ -442,7 +441,7 @@ m_Ldos.o                 m_ELF.o \
 m_constraints.o \
 m_BerryPhase.o           m_BP_Properties.o \
 m_Representation.o       m_Raman.o          m_Phonon.o               m_Dipole.o \
-m_Orbital_Population.o   m_Wannier.o m_Wannier90.o \
+m_Wannier.o m_Wannier90.o \
 m_Screening_FFT.o        m_Screening.o      m_External_Potential.o \
 m_Excitation.o \
 m_ES_occup_EPS.o        \
diff -uprN phase0_2015.01/src_phase/Makefile.ifort.seq phase0_2015.01.01/src_phase/Makefile.ifort.seq
--- phase0_2015.01/src_phase/Makefile.ifort.seq	2015-10-18 13:06:47.000000000 +0900
+++ phase0_2015.01.01/src_phase/Makefile.ifort.seq	2016-07-28 02:05:06.200309150 +0900
@@ -72,7 +72,7 @@ m_FiniteElectricField.o \
 m_ES_ExactExchange.o \
 m_ES_WF_by_SDorCG.o      m_ES_WF_by_Davidson.o \
 m_ES_WF_by_ModifiedDavidson.o m_ES_WF_by_RMM.o \
-m_Charge_Density.o       m_CD_mixing.o m_Dipole.o               m_Orbital_Population.o \
+m_Charge_Density.o    m_Orbital_Population.o    m_CD_mixing.o m_Dipole.o               \
 m_CD_Mag_Moment.o \
 m_epc_potential.o        \
 m_Positron_Wave_Functions.o \
@@ -196,7 +196,7 @@ m_ES_RSB.o \
 m_ES_WF_by_ModifiedDavidson.o m_ES_WF_by_RMM.o \
 m_epc_potential.o \
 m_Positron_Wave_Functions.o \
-m_Charge_Density.o       m_CD_mixing.o m_Dipole.o               m_Orbital_Population.o \
+m_Charge_Density.o    m_Orbital_Population.o    m_CD_mixing.o m_Dipole.o               \
 m_CD_Mag_Moment.o \
 m_ES_WF_mixing.o \
 m_Screening_FFT.o        m_Screening.o      m_External_Potential.o \
@@ -309,7 +309,7 @@ m_ES_LHXC.o              m_ES_Intgr_Vlhx
 m_ES_dos.o               m_Hubbard.o \
 m_epc_potential.o \
 m_vdWDF.o \
-m_Charge_Density.o       m_CD_mixing.o m_XC_Potential.o \
+m_Charge_Density.o    m_Orbital_Population.o   m_CD_mixing.o m_XC_Potential.o \
 m_CD_Mag_Moment.o \
 m_ES_WF_mixing.o \
 string.o m_db.o \
@@ -327,7 +327,6 @@ m_Ldos.o                 m_ELF.o \
 m_constraints.o \
 m_BerryPhase.o           m_BP_Properties.o \
 m_Representation.o       m_Raman.o       m_Phonon.o               m_Dipole.o \
-m_Orbital_Population.o   \
 m_Screening_FFT.o        m_Screening.o      m_External_Potential.o \
 m_Wannier.o m_Wannier90.o \
 m_ValenceBand_Spectrum.o   m_CoreLevel_Spectrum.o   \
@@ -424,7 +423,7 @@ m_ES_LHXC.o              m_ES_Intgr_Vlhx
 m_ES_dos.o               m_Hubbard.o \
 m_epc_potential.o \
 m_vdWDF.o \
-m_Charge_Density.o       m_CD_mixing.o m_XC_Potential.o \
+m_Charge_Density.o  m_Orbital_Population.o     m_CD_mixing.o m_XC_Potential.o \
 m_CD_Mag_Moment.o \
 m_ES_WF_mixing.o \
 string.o m_db.o \
@@ -442,7 +441,7 @@ m_Ldos.o                 m_ELF.o \
 m_constraints.o \
 m_BerryPhase.o           m_BP_Properties.o \
 m_Representation.o       m_Raman.o          m_Phonon.o               m_Dipole.o \
-m_Orbital_Population.o   m_Wannier.o m_Wannier90.o \
+m_Wannier.o m_Wannier90.o \
 m_Screening_FFT.o        m_Screening.o      m_External_Potential.o \
 m_Excitation.o \
 m_ES_occup_EPS.o        \
diff -uprN phase0_2015.01/src_phase/Makefile.mpiifort+mkl phase0_2015.01.01/src_phase/Makefile.mpiifort+mkl
--- phase0_2015.01/src_phase/Makefile.mpiifort+mkl	2015-08-05 16:29:15.000000000 +0900
+++ phase0_2015.01.01/src_phase/Makefile.mpiifort+mkl	2016-07-12 12:51:19.000000000 +0900
@@ -67,7 +67,7 @@ m_FiniteElectricField.o \
 m_ES_ExactExchange.o \
 m_ES_WF_by_SDorCG.o      m_ES_WF_by_Davidson.o \
 m_ES_WF_by_ModifiedDavidson.o m_ES_WF_by_RMM.o \
-m_Charge_Density.o       m_CD_mixing.o m_Dipole.o               m_Orbital_Population.o \
+m_Charge_Density.o   m_Orbital_Population.o     m_CD_mixing.o m_Dipole.o               \
 m_CD_Mag_Moment.o \
 m_epc_potential.o        \
 m_Positron_Wave_Functions.o \
@@ -191,7 +191,7 @@ m_ES_RSB.o \
 m_ES_WF_by_ModifiedDavidson.o m_ES_WF_by_RMM.o \
 m_epc_potential.o \
 m_Positron_Wave_Functions.o \
-m_Charge_Density.o       m_CD_mixing.o m_Dipole.o               m_Orbital_Population.o \
+m_Charge_Density.o   m_Orbital_Population.o     m_CD_mixing.o m_Dipole.o               \
 m_CD_Mag_Moment.o \
 m_ES_WF_mixing.o \
 m_Screening_FFT.o        m_Screening.o      m_External_Potential.o \
@@ -304,7 +304,7 @@ m_ES_LHXC.o              m_ES_Intgr_Vlhx
 m_ES_dos.o               m_Hubbard.o \
 m_epc_potential.o \
 m_vdWDF.o \
-m_Charge_Density.o       m_CD_mixing.o m_XC_Potential.o \
+m_Charge_Density.o    m_Orbital_Population.o    m_CD_mixing.o m_XC_Potential.o \
 m_CD_Mag_Moment.o \
 m_ES_WF_mixing.o \
 string.o m_db.o \
@@ -322,7 +322,6 @@ m_Ldos.o                 m_ELF.o \
 m_constraints.o \
 m_BerryPhase.o           m_BP_Properties.o \
 m_Representation.o       m_Raman.o       m_Phonon.o               m_Dipole.o \
-m_Orbital_Population.o   \
 m_Screening_FFT.o        m_Screening.o      m_External_Potential.o \
 m_Wannier.o m_Wannier90.o \
 m_ValenceBand_Spectrum.o   m_CoreLevel_Spectrum.o   \
@@ -419,7 +418,7 @@ m_ES_LHXC.o              m_ES_Intgr_Vlhx
 m_ES_dos.o               m_Hubbard.o \
 m_epc_potential.o \
 m_vdWDF.o \
-m_Charge_Density.o       m_CD_mixing.o m_XC_Potential.o \
+m_Charge_Density.o    m_Orbital_Population.o    m_CD_mixing.o m_XC_Potential.o \
 m_CD_Mag_Moment.o \
 m_ES_WF_mixing.o \
 string.o m_db.o \
@@ -437,7 +436,7 @@ m_Ldos.o                 m_ELF.o \
 m_constraints.o \
 m_BerryPhase.o           m_BP_Properties.o \
 m_Representation.o       m_Raman.o          m_Phonon.o               m_Dipole.o \
-m_Orbital_Population.o   m_Wannier.o m_Wannier90.o \
+m_Wannier.o m_Wannier90.o \
 m_Screening_FFT.o        m_Screening.o      m_External_Potential.o \
 m_Excitation.o \
 m_ES_occup_EPS.o        \
diff -uprN phase0_2015.01/src_phase/Makefile.tmpl phase0_2015.01.01/src_phase/Makefile.tmpl
--- phase0_2015.01/src_phase/Makefile.tmpl	2015-08-05 14:45:21.000000000 +0900
+++ phase0_2015.01.01/src_phase/Makefile.tmpl	2016-07-12 12:51:19.000000000 +0900
@@ -19,7 +19,7 @@ m_FiniteElectricField.o \
 m_ES_ExactExchange.o \
 m_ES_WF_by_SDorCG.o      m_ES_WF_by_Davidson.o \
 m_ES_WF_by_ModifiedDavidson.o m_ES_WF_by_RMM.o \
-m_Charge_Density.o       m_CD_mixing.o m_Dipole.o               m_Orbital_Population.o \
+m_Charge_Density.o    m_Orbital_Population.o    m_CD_mixing.o m_Dipole.o               \
 m_CD_Mag_Moment.o \
 m_epc_potential.o        \
 m_Positron_Wave_Functions.o \
@@ -143,7 +143,7 @@ m_ES_RSB.o \
 m_ES_WF_by_ModifiedDavidson.o m_ES_WF_by_RMM.o \
 m_epc_potential.o \
 m_Positron_Wave_Functions.o \
-m_Charge_Density.o       m_CD_mixing.o m_Dipole.o               m_Orbital_Population.o \
+m_Charge_Density.o    m_Orbital_Population.o    m_CD_mixing.o m_Dipole.o               \
 m_CD_Mag_Moment.o \
 m_ES_WF_mixing.o \
 m_Screening_FFT.o        m_Screening.o      m_External_Potential.o \
@@ -256,7 +256,7 @@ m_ES_LHXC.o              m_ES_Intgr_Vlhx
 m_ES_dos.o               m_Hubbard.o \
 m_epc_potential.o \
 m_vdWDF.o \
-m_Charge_Density.o       m_CD_mixing.o m_XC_Potential.o \
+m_Charge_Density.o    m_Orbital_Population.o   m_CD_mixing.o m_XC_Potential.o \
 m_CD_Mag_Moment.o \
 m_ES_WF_mixing.o \
 string.o m_db.o \
@@ -274,7 +274,6 @@ m_Ldos.o                 m_ELF.o \
 m_constraints.o \
 m_BerryPhase.o           m_BP_Properties.o \
 m_Representation.o       m_Raman.o       m_Phonon.o               m_Dipole.o \
-m_Orbital_Population.o   \
 m_Screening_FFT.o        m_Screening.o      m_External_Potential.o \
 m_Wannier.o m_Wannier90.o \
 m_ValenceBand_Spectrum.o   m_CoreLevel_Spectrum.o   \
@@ -371,7 +370,7 @@ m_ES_LHXC.o              m_ES_Intgr_Vlhx
 m_ES_dos.o               m_Hubbard.o \
 m_epc_potential.o \
 m_vdWDF.o \
-m_Charge_Density.o       m_CD_mixing.o m_XC_Potential.o \
+m_Charge_Density.o  m_Orbital_Population.o     m_CD_mixing.o m_XC_Potential.o \
 m_CD_Mag_Moment.o \
 m_ES_WF_mixing.o \
 string.o m_db.o \
@@ -389,7 +388,7 @@ m_Ldos.o                 m_ELF.o \
 m_constraints.o \
 m_BerryPhase.o           m_BP_Properties.o \
 m_Representation.o       m_Raman.o          m_Phonon.o               m_Dipole.o \
-m_Orbital_Population.o   m_Wannier.o m_Wannier90.o \
+m_Wannier.o m_Wannier90.o \
 m_Screening_FFT.o        m_Screening.o      m_External_Potential.o \
 m_Excitation.o \
 m_ES_occup_EPS.o        \
diff -uprN phase0_2015.01/src_phase/Move_Ions.F90 phase0_2015.01.01/src_phase/Move_Ions.F90
--- phase0_2015.01/src_phase/Move_Ions.F90	2015-08-05 14:45:21.000000000 +0900
+++ phase0_2015.01.01/src_phase/Move_Ions.F90	2016-07-12 12:51:19.000000000 +0900
@@ -1,6 +1,6 @@
 !=======================================================================
 !
-!  PROGRAM  PHASE/0 2015.01 ($Rev: 440 $)
+!  PROGRAM  PHASE/0 2015.01 ($Rev: 482 $)
 !
 !  SUBROUINE:  Move_Ions, wd_cps_and_forces
 !
@@ -33,11 +33,11 @@
 !   The activity of development of this program set has been supervised by Takahisa Ohno.
 !
 subroutine Move_Ions
-! $Id: Move_Ions.F90 440 2015-08-03 07:18:01Z ktagami $
+! $Id: Move_Ions.F90 482 2016-04-08 08:40:45Z jkoga $
   use m_Control_Parameters, only : iprimd, c_iteration2GDIIS &
        &                         , m_CtrlP_what_is_mdalg &
        &                         , m_CtrlP_set_gdiisoptmode &
-       &                         , sw_charge_predictor,sw_wf_predictor,sw_rspace
+       &                         , sw_charge_predictor,sw_wf_predictor,sw_rspace,af
   use m_Const_Parameters, only   : DP, TEMPERATURE_CONTROL, VERLET &
        &, BLUEMOON, QUENCHED_CONSTRAINT, QUENCHED_MD, NORMAL_MODE_ANALYSIS &
        &, HYPERPLANE_ADAPTIVE_COORDINATE, HAC, T_CONTROL, GDIIS, ORDINA &
@@ -50,7 +50,8 @@ subroutine Move_Ions
        &                   , m_IS_cp_cps2cpo,m_IS_wd_pos_and_v &
        &                   , m_IS_phonon_force, m_IS_cg, m_IS_cg2 &
        &                   , m_IS_evaluate_v_verlet &
-       &                   , m_IS_update_cps_history
+       &                   , m_IS_update_cps_history &
+       &                   , m_IS_force_af_symmetry
 !!$       &                   , forcmx_constraint_quench, almda, mdmode &
   use m_Force,        only : forc_l, forcmx
   use m_IterationNumbers, only : iteration_ionic,iteration
@@ -150,6 +151,8 @@ subroutine Move_Ions
   endif
 ! ============================================================== 13.0B
 
+  if (af/=0) call m_IS_force_af_symmetry(nfout)
+
   if(iprimd >= 2) call m_IS_wd_forc(forc_l)
 
   if(sw_rspace==ON)then
diff -uprN phase0_2015.01/src_phase/NEB.F90 phase0_2015.01.01/src_phase/NEB.F90
--- phase0_2015.01/src_phase/NEB.F90	2015-08-05 14:45:21.000000000 +0900
+++ phase0_2015.01.01/src_phase/NEB.F90	2016-07-12 12:51:19.000000000 +0900
@@ -106,6 +106,9 @@ subroutine do_neb()
         if (.not.pp_generated) then
            call PseudoPotential_Construction
            pp_generated = .true.
+#ifdef ENABLE_ESM_PACK
+           call Preparation_for_ESM()
+#endif
         else
            if ( flg_paw ) then
               if ( itr > 1 .or. neb%cond%condition == 1 ) then
diff -uprN phase0_2015.01/src_phase/Postprocessing.F90 phase0_2015.01.01/src_phase/Postprocessing.F90
--- phase0_2015.01/src_phase/Postprocessing.F90	2015-08-05 14:45:21.000000000 +0900
+++ phase0_2015.01.01/src_phase/Postprocessing.F90	2016-07-12 12:51:19.000000000 +0900
@@ -50,7 +50,7 @@
 !
 !$$#ifndef PARA3D
 subroutine Postprocessing(ignore_convergence)
-! $Id: Postprocessing.F90 440 2015-08-03 07:18:01Z ktagami $
+! $Id: Postprocessing.F90 489 2016-05-24 04:15:54Z ktagami $
   use m_Const_Parameters, only :   DP, ON, OFF, FORCE_CONVERGED, INITIAL, CONTINUATION &
        &                         , FIXED_CHARGE, FIXED_CHARGE_CONTINUATION &
        &                         , Gauss_distrib_func, EK, SCF &
@@ -86,7 +86,7 @@ subroutine Postprocessing(ignore_converg
   use m_Ionic_System,       only : natm, natm2, ityp, iwei, iatomn, pos &
        &                         , m_IS_pack_all_ions_in_uc &
        &                         , m_IS_set_natm_prim,m_IS_set_napt_prim
-  use m_PseudoPotential,    only : ival
+  use m_PseudoPotential,    only : ival, flg_paw
   use m_Files, only :              nfout,nfdos,nfchr,nfwfk,nfldos,nfelf &
        &                         , nfvlc, nfcntn_bin_stm &
        &                         , m_Files_open_nfdos, m_Files_open_nfchr &
@@ -165,7 +165,8 @@ subroutine Postprocessing(ignore_converg
   use m_Wannier90, only          : m_Wan90_gen_amat, m_Wan90_gen_mmat, m_Wan90_wd_eig, &
        &                           m_Wan90_write_unk, &
        &                           m_Wan90_gen_mmat_mpi, &
-       &                           m_Wan90_wd_eig_mpi, m_Wan90_write_unk_mpi
+       &                           m_Wan90_wd_eig_mpi, m_Wan90_write_unk_mpi, &
+       &                           m_Wan90_gen_mat_spn
 
 !!$  use m_Files, only :              m_Files_open_nfdos, m_Files_close_nfdos
 
@@ -241,9 +242,10 @@ subroutine Postprocessing(ignore_converg
 ! ============== 13.2S
 
   use m_Control_Parameters, only : sw_wf_squared_rspace, charge_filetype, &
-       &                           sw_wf_integ_moment
+       &                           sw_wf_integ_moment,  sw_calc_contact_density
   use m_ES_IO,              only : m_ESIO_wd_Wfn_squared_noncl, &
        &                           m_ESIO_wd_Wfn_integ_magmom
+  use m_PAW_ChargeDensity,  only : m_PAWCD_calc_contact_density
 
   implicit none
 
@@ -312,7 +314,6 @@ subroutine Postprocessing(ignore_converg
         end if
      end if
 
-! ============== KT_add ====================== 13.0E
      if ( m_CtrlP_way_of_smearing() == Fermi_Dirac ) then
         if ( ekmode == ON ) then
            call m_ESoc_count_charge_belowEF_ek( nfout )
@@ -320,7 +321,6 @@ subroutine Postprocessing(ignore_converg
            call m_ESoc_count_charge_belowEF( nfout )
         endif
      endif
-! ============================================ 13.0E
 
 ! -------- Population ----
 ! ==== KT_add ===== 2014/09/01
@@ -333,34 +333,19 @@ subroutine Postprocessing(ignore_converg
 ! ================ 2014/09/01
 
      if(sw_orb_popu == ON) then
-! ==========================  Modified by K. Tagami =========  0.4 and 0.6
-!        call m_ES_phir_dot_WFs(nfout)
-!        call m_ES_sym_comp(nfout)
-!        call m_ES_orbital_population(nfout)
-! 
         if ( ekmode == OFF .and. sw_calc_force == OFF ) then
            call m_ES_phir_dot_WFs(nfout)
            call m_ES_sym_comp(nfout)
-! ================== modified by K. Tagami ===================== 11.0
-!           call m_ES_orbital_population(nfout)
-!
            if ( noncol ) then
               call m_ES_orbital_population_noncl( nfout )
            else
               call m_ES_orbital_population(nfout)
            endif
-! ============================================================== 11.0
-
         endif
-! =============================================================
+
      end if
 
 ! --------- DOS ------
-!
-! ================================ modified by K. Tagami ============ 11.0
-!!     if(sw_dos == ON) call calc_totaldos()
-                       ! contained in this file, calculate total dos
-!
      if (sw_dos == ON) then
         if ( noncol ) then
            call m_ESdos_alloc_dos_wght_noncl()
@@ -371,18 +356,15 @@ subroutine Postprocessing(ignore_converg
            call calc_totaldos()
         endif
      end if
-! =================================================================== 11.0
 
 ! ---- LDOS ----------
 !
      if(sw_ldos == ON) then
-! =================== added by K. Tagami =============== 11.0 
         if ( noncol ) then
            call calc_localdos_noncl()
         else
            call calc_localdos()
         endif
-! ======================================================= 11.0
      end if
 
 ! ------ Charge Distrib ----
@@ -470,10 +452,8 @@ subroutine Postprocessing(ignore_converg
 !
      if (sw_fine_STM_simulation == ON) then
         if ( noncol ) then
-!!$           call write_potential_for_STM
            call write_potential_for_STM_noncl
         else
-!!$           call write_potential_for_STM_noncl
            call write_potential_for_STM
         endif
      endif
@@ -488,13 +468,15 @@ subroutine Postprocessing(ignore_converg
      end if
 
 ! =============================== added by K. Tagami ============= 11.0
-     if ( noncol ) goto 650
+     if ( noncol ) goto 620
 ! ================================================================ 11.0
 
      if (sw_wannier == ON) then
         call m_Wan_opt_Omega(nfout)
      end if
 
+620  continue
+
      if (sw_wannier90 == ON) then
         call m_Wan90_gen_amat(nfout)
 #ifndef USE_MPI_WANNIER90
@@ -505,6 +487,7 @@ subroutine Postprocessing(ignore_converg
         call m_Wan90_gen_mmat_mpi(nfout)
         call m_Wan90_wd_eig_mpi(nfout)
         if ( sw_write_unk_file == ON ) call m_Wan90_write_unk_mpi(nfout)
+        if ( noncol ) call m_Wan90_gen_mat_spn
 #endif
      end if
 
@@ -607,6 +590,10 @@ subroutine Postprocessing(ignore_converg
         call m_ESIO_wd_Wfn_integ_magmom
      endif
   endif
+  if ( sw_calc_contact_density == ON ) then
+     if ( flg_paw ) call m_PAWCD_calc_contact_density
+  endif
+
 
   call tstatc_wd0()
 
@@ -717,7 +704,6 @@ contains
 !!$     call m_Files_close_nfdos()
   end subroutine calc_totaldos
 
-! =============================== added by K. Tagami ===================== 11.0
   subroutine calc_totaldos_noncl()
 
     if(.not.check_if_metalic_flag.or.ignore_convergence) call m_ESoc_check_if_metalic(nfout)
@@ -762,9 +748,7 @@ contains
     end if
 
   end subroutine calc_totaldos_noncl
-! ========================================================================= 11.0
 
-! ===================== added by K. Tagami ====================== 11.0
   subroutine calc_localdos()
 
     if (ekmode == OFF) then
@@ -915,9 +899,7 @@ contains
     call m_Ldos_dealloc_weiwsc_etc()
 
   end subroutine calc_localdos_noncl
-! ============================================================== 11.0
 
-! ========================= added by K. Tagami =============== 11.0
   subroutine calc_spatial_chg_distrib
     integer :: iloop, iloop2
 
@@ -1134,9 +1116,8 @@ contains
     end if
 
   end subroutine calc_partial_charge_noncl
-! ============================================================ 11.0
 
-! ============================ added by K. Tagami ================= 11.0
+
   subroutine write_potential_for_STM
     integer :: ismax
 
@@ -1223,14 +1204,9 @@ contains
 
   end subroutine write_potential_for_STM_noncl
 
-! ================================================================= 11.0
 
-!================================== modified by K. Tagami ========== 11.0
-!  subroutine wd_fine_STM_parameters()
-!
   subroutine wd_fine_STM_parameters( ismax )
     integer, intent(in) :: ismax
-! ================================================================= 11.0
 
     integer :: ik
     write(nfout,'(" !!STM:    kg(kng)   = ",i8)') kg
@@ -1273,12 +1249,8 @@ contains
     end do
   end subroutine check_neordr_nrvf_ordr
 
-! ====================================== modified by K. Tagami ========== 11.0
-!  subroutine wd_ArraySize_Parameters_For_STM(nf_bin)
-!
   subroutine wd_ArraySize_Parameters_For_STM( nf_bin, ismax )
     integer, intent(in) :: ismax
-! ======================================================================== 11.0
 
     integer, intent(in) :: nf_bin
     if(mype == 0) then
@@ -1370,9 +1342,15 @@ subroutine Postprocessing_k
   use m_Electronic_Structure,   only :  m_ES_cp_eko_l_to_eko_ek2
 ! ============== 13.2S
 
-  use m_Control_Parameters,    only :  sw_wf_squared_rspace, sw_wf_integ_moment
+  use m_Control_Parameters,    only :  sw_wf_squared_rspace, sw_wf_integ_moment, &
+       &                               sw_orb_popu, &
+       &                               sw_print_wf_orb_projection
   use m_Files, only : nfout
-  use m_ES_IO, only : m_ESIO_wd_Wfn_squared_noncl, m_ESIO_wd_Wfn_integ_magmom
+  use m_ES_IO, only : m_ESIO_wd_Wfn_squared_noncl, m_ESIO_wd_Wfn_integ_magmom, &
+       &              m_ESIO_wd_WFn_orb_proj
+  use m_ES_nonlocal,  only : m_ES_phir_dot_WFs
+  use m_Nonlocal_Potential,  only : m_NLP_phir_dot_PWs
+  use m_Kpoints,  only : kv3, vkxyz
 
   implicit none
 
@@ -1380,15 +1358,12 @@ subroutine Postprocessing_k
      if(nk_in_the_process == 1)  call m_Ldos_wd_natm2_and_totch()
      call m_Ldos_alloc_weiwsc_etc()
 
-! ========================== modified by K. Tagami =================== 11.0
-!     call m_Ldos_cal()
-!
      if ( noncol ) then
         call m_Ldos_cal_noncl()
      else
         call m_Ldos_cal()
      endif
-! =================================================================== 11.0
+
      call m_Ldos_dealloc_weiwsc_etc()
   end if
 ! === KT_add === 13.2S
@@ -1405,5 +1380,10 @@ subroutine Postprocessing_k
      if ( sw_wf_integ_moment == ON ) call m_ESIO_wd_Wfn_integ_magmom
   endif
 
+  if ( sw_orb_popu == ON .and. sw_print_wf_orb_projection == ON ) then
+     call m_ES_phir_dot_WFs( nfout )
+     call m_ESIO_wd_WFn_orb_proj
+  endif
+
 !$$#endif
 end subroutine Postprocessing_k
diff -uprN phase0_2015.01/src_phase/Preparation.F90 phase0_2015.01.01/src_phase/Preparation.F90
--- phase0_2015.01/src_phase/Preparation.F90	2015-09-14 15:16:57.000000000 +0900
+++ phase0_2015.01.01/src_phase/Preparation.F90	2016-07-12 12:51:19.000000000 +0900
@@ -1,6 +1,6 @@
 !=======================================================================
 !
-!  PROGRAM  PHASE/0 2015.01 ($Rev: 458 $)
+!  PROGRAM  PHASE/0 2015.01 ($Rev: 474 $)
 !
 !  SUBROUINE:  fft_box_finding_way, Preparation, Preparation_ek
 !
@@ -36,8 +36,17 @@
 #define _INCLUDE_EXX_
 #endif
 
+#ifdef FJ_TIMER
+#   define __TIMER_FJ_START_w_BARRIER(str,a)   call mpi_barrier(str,ierr) ;   call timer_sta(a)
+#   define __TIMER_FJ_START(a)   call timer_sta(a)
+#   define __TIMER_FJ_STOP(a)    call timer_end(a)
+#else
+#   define __TIMER_FJ_START(a)
+#   define __TIMER_FJ_STOP(a)
+#endif
+
 subroutine Preparation()
-! $Id: Preparation.F90 458 2015-09-09 06:05:42Z ktagami $
+! $Id: Preparation.F90 474 2016-02-09 09:32:53Z yamasaki $
   use m_Const_Parameters,  only:DP,FILE,GENERAL,OUTER,INNER,SIMPLE_CUBIC &
        &                       ,HEXAGONAL,TETRAHEDRON &
        &                       ,INITIAL,CONTINUATION,FIXED_CHARGE &
@@ -204,6 +213,10 @@ subroutine Preparation()
 
   use m_ES_WF_by_MatDiagon, only : m_ESmat_set_reduced_basis_mode
 
+! ==== EXP_CELLOPT === 2015/09/24
+  use m_PlaneWaveBasisSet, only : m_pwBS_store_prev_kg1_kgp
+! ==================== 2015/09/24
+
   implicit none
   integer :: outer_or_inner
   include 'mpif.h'
@@ -240,6 +253,7 @@ subroutine Preparation()
         call m_CD_force_dealloc()
         call m_ESrmm_dealloc_r_norm_flag()
         call m_ESsubmat_dealloc()
+        call m_pwBS_store_prev_kg1_kgp
      endif
      call m_IS_gdiis_reset()
      call m_IS_CG_reset()
@@ -312,15 +326,10 @@ subroutine Preparation()
   if(symmetry_method == AUTOMATIC) then
      call m_CS_SG_auto_gnrt_sym_op(.true.,nfout) ! -(m_CS_SpaceGroup) -> nopr,af
   else
-#ifdef FJ_TIMER
-!                    call mpi_barrier(mpi_comm_group, ierr)
-                    call timer_sta(31)
-#endif
+                                                     __TIMER_FJ_START(31)
      call m_CS_gnrt_symmetry_operations(.true.,nfout) ! -(m_Crystal_Structure) -> nopr,af
 !!$     call m_CS_gnrt_symm_operators_tl(.true.,nfout) ! -(m_Crystal_Structure) -> nopr,af
-#ifdef FJ_TIMER
-                    call timer_end(31)
-#endif
+                                                     __TIMER_FJ_STOP(31)
   end if
   call m_CS_alloc_op_tau(nfout)
   call m_CS_alloc_op_tau_tl(nfout)
@@ -328,15 +337,10 @@ subroutine Preparation()
      call m_CS_SG_auto_gnrt_sym_op(paramset,nfout) ! paramset == .false.
      call m_IS_symmetrize_atom_pos(nfout) ! -> cps,pos
   else
-#ifdef FJ_TIMER
-!                    call mpi_barrier(mpi_comm_group, ierr)
-                    call timer_sta(31)
-#endif
+                                                     __TIMER_FJ_START(31)
      call m_CS_gnrt_symmetry_operations(paramset,nfout) ! paramset == .false.
      call m_CS_gnrt_symm_operators_tl(paramset,nfout) ! -(m_Crystal_Structure) -> nopr,af
-#ifdef FJ_TIMER
-                    call timer_end(31)
-#endif
+                                                     __TIMER_FJ_STOP(31)
   end if
 
   call m_CS_SG_print_space_group_name(nfout)
@@ -365,15 +369,10 @@ subroutine Preparation()
      call m_IS_inv_sym_off(nfout) ! -> inversion_symmetry
   end if
   call m_IS_alloc_napt()
-#ifdef FJ_TIMER
-!                    call mpi_barrier(mpi_comm_group, ierr)
-                    call timer_sta(31)
-#endif
+                                                     __TIMER_FJ_START(31)
   call m_CS_wd_op_and_tau(nfout)
   call m_IS_symm_check_of_pos()
-#ifdef FJ_TIMER
-                    call timer_end(31)
-#endif
+                                                     __TIMER_FJ_STOP(31)
 
   if(ekmode /= GRID .and. sw_phonon == ON) then
      call m_Phonon_alloc_qvec()
@@ -464,37 +463,22 @@ subroutine Preparation()
 
      if(icond == PREPARATION_ONLY .or. icond == INITIAL .or. icond == CONTINUATION .or. &
         & icond==COORDINATE_CONTINUATION) then
-#ifdef FJ_TIMER
-!                    call mpi_barrier(mpi_comm_group, ierr)
-                    call timer_sta(28)
-#endif
+                                                     __TIMER_FJ_START(28)
         call m_pwBS_for_each_WF(preallocation=paramset) ! -> kg1, nbase,iba (when paramset==.false.)
-#ifdef FJ_TIMER
-                    call timer_end(28)
-#endif
+                                                     __TIMER_FJ_STOP(28)
      else if(icond == FIXED_CHARGE .or. icond == FIXED_CHARGE_CONTINUATION) then
 !!$        if(ekmode == OFF)&
 !!$             & stop ' ! combination of ekmode and icond is illegal (Preparation)'
         call m_Files_close_files_initial0()
         call m_Files_open_nfeng(icond)
         if(ekmode == OFF .and. fixed_charge_k_parallel == ALL_AT_ONCE) then
-#ifdef FJ_TIMER
-!                    call mpi_barrier(mpi_comm_group, ierr)
-                    call timer_sta(28)
-#endif
+                                                     __TIMER_FJ_START(28)
            call m_pwBS_for_each_WF(preallocation=paramset) ! -> kg1, nbase,iba (when paramset==.false.)
-#ifdef FJ_TIMER
-                    call timer_end(28)
-#endif
+                                                     __TIMER_FJ_STOP(28)
         else 
-#ifdef FJ_TIMER
-!                    call mpi_barrier(mpi_comm_group, ierr)
-                    call timer_sta(28)
-#endif
+                                                     __TIMER_FJ_START(28)
            call m_pwBS_for_each_WF(preallocation=.true.) ! -> kg1, iba
-#ifdef FJ_TIMER
-                    call timer_end(28)
-#endif
+                                                     __TIMER_FJ_STOP(28)
            call m_Kp_alloc_kpoints_ek()  ! -> kv3_ek (=kv3), allocate(vkxyz_ek,qwgt_ek)
            call m_Kp_cp_vkxyz_to_vkxyz_ek()
            if(fixed_charge_k_parallel == ONE_BY_ONE) then
@@ -509,14 +493,9 @@ subroutine Preparation()
            call m_pwBS_cp_iba_to_iba_ek()
            if(icond == FIXED_CHARGE_CONTINUATION) &
                 & call m_ES_cp_iconv(numk_tmp,iconv_ek_tmp)
-#ifdef FJ_TIMER
-!                    call mpi_barrier(mpi_comm_group, ierr)
-                    call timer_sta(28)
-#endif
+                                                     __TIMER_FJ_START(28)
            call m_pwBS_for_each_WF(preallocation=.false.) ! -> kg1, iba
-#ifdef FJ_TIMER
-                    call timer_end(28)
-#endif
+                                                     __TIMER_FJ_STOP(28)
         end if
      else
         stop ' icond is illegal (Preparation)'
@@ -532,6 +511,9 @@ subroutine Preparation()
         call m_pwBS_exxCD()
      endif
 ! ========== ======================================= 13.0F
+     if(ipri>=1) write(nfout,'(" _INCLUDE_EXX_ is defined")')
+#else
+     if(ipri>=1) write(nfout,'(" _INCLUDE_EXX_ is not defined")')
 #endif
 
      if(sw_ldos == ON) then
diff -uprN phase0_2015.01/src_phase/Preparation_for_ESM.F90 phase0_2015.01.01/src_phase/Preparation_for_ESM.F90
--- phase0_2015.01/src_phase/Preparation_for_ESM.F90	2015-08-05 14:45:21.000000000 +0900
+++ phase0_2015.01.01/src_phase/Preparation_for_ESM.F90	2016-07-12 12:51:19.000000000 +0900
@@ -9,7 +9,7 @@ subroutine Preparation_for_ESM()
     use m_FFT, only : fft_box_size_CD
     use m_Crystal_Structure, only : altv
     use m_Parallelization, only : npes,mype, ista_kngp, iend_kngp, mpi_comm_group
-    use m_PlaneWaveBasisSet, only : ngabc,igfp_l
+    use m_PlaneWaveBasisSet, only : ngabc,igfp_l,kgp
 
     implicit none
     include 'mpif.h'
@@ -32,25 +32,29 @@ subroutine Preparation_for_ESM()
     do i=1,natm
        ival_at(i) = ival(ityp(i))
     enddo
-    allocate(ngabc_esm(3,1:iend_kngp-ista_kngp+1));ngabc_esm=0
+!    allocate(ngabc_esm(3,1:iend_kngp-ista_kngp+1));ngabc_esm=0
+    allocate(ngabc_esm(3,1:kgp));ngabc_esm=0
     do i=1,3
        do j=ista_kngp,iend_kngp
-          ngabc_esm(i,j-ista_kngp+1) = ngabc(j,i)
+          ngabc_esm(i,j) = ngabc(j,i)
        enddo
     enddo
+    call mpi_allreduce(mpi_in_place,ngabc_esm,3*kgp,mpi_integer,mpi_sum,mpi_comm_group,ierr)
     allocate(cps_tmp(3,natm))
     do i=1,3
        do j=1,natm
           cps_tmp(i,j) = cps(j,i)
        enddo
     enddo
-    allocate(igfp_l_esm(1:iend_kngp-ista_kngp+1));igfp_l_esm=0
+    allocate(igfp_l_esm(1:kgp));igfp_l_esm=0
     do i=ista_kngp,iend_kngp
-       igfp_l_esm(i-ista_kngp+1) = igfp_l(i)
+       igfp_l_esm(i) = igfp_l(i)
     enddo
+    call mpi_allreduce(mpi_in_place,igfp_l_esm,kgp,mpi_integer,mpi_sum,mpi_comm_group,ierr)
+
     call Esm_interface_map_parameters(natm,ival_at,cps_tmp,1.0d0,altv, &
     & fft_box_size_CD(1,0),fft_box_size_CD(2,0),fft_box_size_CD(3,0),  &
-    & esm_bc_c,.false.,iend_kngp-ista_kngp+1,nspin,ngabc_esm,          &
+    & esm_bc_c,.false.,kgp,nspin,ngabc_esm,                            &
     & igfp_l_esm,igfp_l_esm,esm_w,2.0d0*esm_e_field, esm_izwall,       &
     & esm_z_wall, esm_bar_height,esm_bar_width)
     call Esm_interface_set_communicator(mpi_comm_group)
diff -uprN phase0_2015.01/src_phase/PseudoPotential_Construction.F90 phase0_2015.01.01/src_phase/PseudoPotential_Construction.F90
--- phase0_2015.01/src_phase/PseudoPotential_Construction.F90	2015-08-05 14:45:21.000000000 +0900
+++ phase0_2015.01.01/src_phase/PseudoPotential_Construction.F90	2016-07-12 12:51:19.000000000 +0900
@@ -1,6 +1,6 @@
 !=======================================================================
 !
-!  PROGRAM  PHASE/0 2015.01 ($Rev: 443 $)
+!  PROGRAM  PHASE/0 2015.01 ($Rev: 483 $)
 !
 !  SUBROUINE:  check_gncpp_type, PP_construction_paramset
 !             PseudoPotential_Construction, PseudoPotential_ek
@@ -38,7 +38,7 @@
 !   The activity of development of this program set has been supervised by Takahisa Ohno.
 !
 subroutine PseudoPotential_Construction
-! $Id: PseudoPotential_Construction.F90 443 2015-08-03 13:03:48Z ktagami $
+! $Id: PseudoPotential_Construction.F90 483 2016-04-27 04:02:48Z ktagami $
   use m_PseudoPotential,      only : m_PP_alloc0_ps_ntyp,   m_PP_alloc_ps_ntyp &
        &                           , m_PP_dealloc_ps_ntyp  &
        &                           , m_PP_set_mmesh,        m_PP_set_nloc &
@@ -89,7 +89,7 @@ subroutine PseudoPotential_Construction
        &                           , sw_wannier, sw_berry_phase, corecharge_cntnbin, sw_fef &
        &                           , ekmode, fixed_charge_k_parallel, continuation_using_ppdata &
        &                           , m_CtrlP_set_ppprinted,sw_rspace, sw_rspace_hyb, sw_hybrid_functional &
-       &                           , sw_ldos, sw_rspace_ldos, m_CtrlP_rspace_integ_all_OK
+       &                           , sw_ldos, sw_rspace_ldos, m_CtrlP_rspace_integ_all_OK,nmax_G_hyb
   use m_PlaneWaveBasisSet,    only : kgp,gr_l,ngshell,ngshell_range
   use m_Const_Parameters,     only : OLD,ON, SPHERICAL_HARMONICS, NO
   use m_Files,                only : m_Files_open_ps_files,m_Files_close_ps_files,nfcntn_bin &
@@ -360,26 +360,26 @@ subroutine PseudoPotential_Construction
           &  .and. ekmode==ON ) then
 ! =================13.0S
 
-! ========================================== modified by K. Tagami ========= 0.2
-!        call m_NLP_betar_dot_PWs(nfout,kv3,vkxyz) !(kbint) --> snl
-! ==========================================================================
+#ifdef EXP_WFN_PROJ
+        call m_NLP_betar_dot_PWs(nfout,kv3,vkxyz) !(kbint) --> snl
+#endif
         if(sw_orb_popu == ON) then
            call m_PP_make_qorb(nfout)
-! ========================================== modified by K. Tagami ========= 0.2
-!           call m_NLP_phir_dot_PWs(nfout,kv3,vkxyz) !(kbint) --> phig
-! ===========================================================================
+#ifdef EXP_WFN_PROJ
+           call m_NLP_phir_dot_PWs(nfout,kv3,vkxyz) !(kbint) --> phig
+#endif
            call m_PP_cnstrct_crotylm(nfout) !-> crotylm
         end if
 
         if(sw_use_add_proj == ON) then
-! ========================================== modified by K. Tagami ========= 0.2
-!           call m_NLP_add_betar_dot_PWs(nfout,kv3,vkxyz) !(kbint) --> snl_add
-! ===========================================================================
+#ifdef EXP_WFN_PROJ
+           call m_NLP_add_betar_dot_PWs(nfout,kv3,vkxyz) !(kbint) --> snl_add
+#endif
         end if
         if(intzaj == by_pseudo_atomic_orbitals) then
-! ========================================== modified by K. Tagami ========= 0.2
-!           call m_NLP_paor_dot_PWs(nfout,kv3,vkxyz) !(kbint) --> paog
-! ===========================================================================
+#ifdef EXP_WFN_PROJ
+           call m_NLP_paor_dot_PWs(nfout,kv3,vkxyz) !(kbint) --> paog
+#endif
         end if
 
         if(num_projectors > 0) then
diff -uprN phase0_2015.01/src_phase/Renewal_of_ChgCtrlParam.f90 phase0_2015.01.01/src_phase/Renewal_of_ChgCtrlParam.f90
--- phase0_2015.01/src_phase/Renewal_of_ChgCtrlParam.f90	2015-08-05 14:45:21.000000000 +0900
+++ phase0_2015.01.01/src_phase/Renewal_of_ChgCtrlParam.f90	2016-07-12 12:51:19.000000000 +0900
@@ -10,7 +10,8 @@ subroutine Renewal_of_Chg_Ctrl_Param
   use m_Const_Parameters,  only : MSD, CG, SD, LMSD, LMCG,lmeazyCG, lmmsd
   use m_Control_Parameters,  only :  m_CtrlP_solver_for_WFs_now, intzaj
 
-  use  m_IterationNumbers,    only : iteration, iteration_ionic, iteration_electronic
+  use  m_IterationNumbers,    only : iteration, iteration_ionic, iteration_electronic, &
+       &                             iteration_unit_cell
 
   use m_Control_Parameters,       only : sw_update_charge_hsr, eval_energy_before_charge
   use m_Control_Parameters,     only : sw_recomposing, sw_force_simple_mixing
@@ -41,7 +42,10 @@ subroutine Renewal_of_Chg_Ctrl_Param
        &                           nmax_intermid_lambda, &
        &                           edelta_change_lambda_first, &
        &                           edelta_change_lambda_last, &
-       &                           max_iterations_mag_constraint
+       &                           max_iter_elec_mag_constraint, &
+       &                           max_iter_ion_mag_constraint, &
+       &                           max_iter_cell_mag_constraint, &
+       &                           sw_fix_charge_after_constraint
 
 ! ================= KT_add =================== 13.0XX
   use m_Control_Parameters,   only : sw_calc_ekin_density, &
@@ -59,17 +63,19 @@ subroutine Renewal_of_Chg_Ctrl_Param
        &                             edelta_start_wf_mixing
 ! =========================== 13.0U3
 
+  use m_Parallelization,  only : mype
+
   implicit none
 
   real(kind=DP) :: edeltab_per_atom, edeltb_per_atom, edelta
 
-! ----------------------- UUU
-  if ( sw_monitor_atomcharge == ON ) then
-     if ( iteration_ionic >1 .and. iteration_electronic ==1 ) then
-        call m_CD_set_rad_cov_default
-        call m_CD_set_rad_cov_now
-     endif
-  endif
+! -----------------------
+!  if ( sw_monitor_atomcharge == ON ) then
+!     if ( iteration_ionic >1 .and. iteration_electronic ==1 ) then
+!        call m_CD_set_rad_cov_default
+!        call m_CD_set_rad_cov_now
+!     endif
+!  endif
 
 ! ------------------
   edelta = m_TE_what_is_edeltb_now()
@@ -290,25 +296,50 @@ contains
     logical, save :: First = .true.
     logical, save :: mag_constraint_is_over = .false.
     logical, save :: lambda_is_changed 
-    real(kind=DP), save :: lambda_org, lambda_old
+    integer, save :: sw_fix_charge_after_constr_org
+
+    real(kind=DP), save :: lambda_org, lambda_old, lambda_00
 
     integer :: i, nn
     real(kind=DP) :: c1, ratio
 
+    if ( First ) then
+       lambda_org = mag_constraint_lambda; 
+       lambda_old = mag_constraint_lambda
+       sw_fix_charge_after_constr_org = sw_fix_charge_after_constraint
+       istep = 1
+       First = .false.
+    else 
+!       if ( iteration_unit_cell > 1 .and. iteration_ionic == 1 &
+!            &                       .and. iteration_electronic == 1 ) then
+!          mag_constraint_is_over = .false.
+!          mag_constraint_lambda = lambda_org
+!!          istep = 1;  count = 0
+!       endif
+       if ( iteration_electronic == 1 ) then
+          mag_constraint_is_over = .false.
+          mag_constraint_lambda = lambda_org
+          sw_fix_charge_after_constraint = sw_fix_charge_after_constr_org
+          istep = 1;  count = 0
+       endif
+    endif
+
     if ( damping_method_mag_constraint == 0 )  return
-    if ( mag_constraint_is_over ) return
 
-    if ( iteration_ionic > 1 ) then
+    if ( iteration_ionic > max_iter_ion_mag_constraint ) then
+       mag_constraint_is_over = .true.
+       mag_constraint_lambda = 0.0d0;  
+    endif
+    if ( iteration_unit_cell > max_iter_cell_mag_constraint ) then
        mag_constraint_is_over = .true.
        mag_constraint_lambda = 0.0d0;  
-       return
     endif
 
-    if ( First ) then
-       lambda_org = mag_constraint_lambda; 
-       lambda_old = mag_constraint_lambda
-       istep = 1
-       First = .false.
+    if ( mag_constraint_is_over ) then
+       if ( sw_fix_charge_after_constraint == ON ) then
+          sw_update_charge_total = OFF
+       endif
+       return
     endif
 
     lambda_is_changed = .false.
@@ -346,17 +377,28 @@ contains
        endif
 
     case (ABRUPT)
-       if ( iteration_electronic > max_iterations_mag_constraint ) then
+       if ( iteration_electronic > max_iter_elec_mag_constraint ) then
           mag_constraint_lambda = 0.0d0
           lambda_is_changed = .true.
           mag_constraint_is_over = .true.
        endif
 
+       c1 = abs( edeltab_per_atom )
+
+       if ( c1 <  edelta_change_lambda_last ) then
+          count = count + 1
+          if ( count == succession ) then
+             mag_constraint_lambda = 0.0d0
+             mag_constraint_is_over = .true.
+             lambda_is_changed = .true.
+          endif
+       endif
+
     case (LINEAR)
-       ratio = dble( iteration_electronic ) / dble(max_iterations_mag_constraint)
+       ratio = dble( iteration_electronic ) / dble(max_iter_elec_mag_constraint)
        ratio = 1.0D0 -ratio
 
-       if ( iteration_electronic <= max_iterations_mag_constraint ) then
+       if ( iteration_electronic <= max_iter_elec_mag_constraint ) then
           mag_constraint_lambda = lambda_org *ratio
           lambda_is_changed = .true.
        else
@@ -364,6 +406,14 @@ contains
           mag_constraint_is_over = .true.
        endif
 
+       c1 = abs( edeltab_per_atom )
+       if ( c1 <  edelta_change_lambda_first ) then
+          count = count + 1
+          if ( count == succession ) then
+             mag_constraint_is_over = .true.
+             lambda_is_changed = .true.
+          endif
+       endif
     end select
 
     if ( lambda_is_changed ) then
@@ -373,7 +423,7 @@ contains
     endif
 
     lambda_old = mag_constraint_lambda
-
+!
   end subroutine update_lambda_mag_constraint
 
 ! === KT_add === 13.0U3
diff -uprN phase0_2015.01/src_phase/Renewal_of_pPotential.f90 phase0_2015.01.01/src_phase/Renewal_of_pPotential.f90
--- phase0_2015.01/src_phase/Renewal_of_pPotential.f90	2015-08-05 14:45:21.000000000 +0900
+++ phase0_2015.01.01/src_phase/Renewal_of_pPotential.f90	2016-07-12 12:51:19.000000000 +0900
@@ -1,6 +1,6 @@
 !=======================================================================
 !
-!  PROGRAM  PHASE/0 2015.01 ($Rev: 440 $)
+!  PROGRAM  PHASE/0 2015.01 ($Rev: 472 $)
 !
 !  SUBROUINE: Renewal_of_pPotential
 !
@@ -23,17 +23,28 @@
 !  Science and Technology (MEXT) of Japan. 
 !
 subroutine Renewal_of_pPotential()
-! $Id: Renewal_of_pPotential.f90 440 2015-08-03 07:18:01Z ktagami $
+! $Id: Renewal_of_pPotential.f90 472 2015-11-28 09:01:17Z ktagami $
   use m_Files,               only : nfout
-  use m_ES_Intgr_VlhxcQlm,   only : m_ESiVQ_integrate_VlhxcQlm
+!!  use m_ES_Intgr_VlhxcQlm,   only : m_ESiVQ_integrate_VlhxcQlm
   use m_Charge_Density,      only : chgq_l
-  use m_epc_potential,       only : m_epc_alloc, m_epc_cal_potential,m_epc_ESlhxc_potential,vepc_l
+  use m_epc_potential,       only : m_epc_alloc, m_epc_cal_potential, &
+       &                            m_epc_ESlhxc_potential, vepc_l, &
+       &                            m_epc_ESlhxc_potential_mod
+  use m_Control_Parameters,  only : positron_method
+  use m_Const_Parameters,    only : Positron_CONV
+
   implicit none
 
   call m_epc_alloc()
   call m_epc_cal_potential(nfout,chgq_l) ! chgq_l -> vepc_l, tchgq_l
                                               ! (xcfft) -> vxc_l
-  call m_epc_ESlhxc_potential(nfout)     ! vepc_l, tchgq_l ->  vlhxc_l
-  call m_ESiVQ_integrate_VlhxcQlm(nfout) ! (lclchh) -> vlhxcQ
+
+  if ( positron_method == Positron_CONV ) then
+     call m_epc_ESlhxc_potential(nfout)     ! vepc_l, tchgq_l ->  vlhxc_l
+  else
+     call m_epc_ESlhxc_potential_mod(nfout)
+  endif
+
+!!  call m_ESiVQ_integrate_VlhxcQlm(nfout) ! (lclchh) -> vlhxcQ
 
 end subroutine Renewal_of_pPotential
diff -uprN phase0_2015.01/src_phase/Renewal_of_pWaveFunctions.F90 phase0_2015.01.01/src_phase/Renewal_of_pWaveFunctions.F90
--- phase0_2015.01/src_phase/Renewal_of_pWaveFunctions.F90	2015-08-05 14:45:21.000000000 +0900
+++ phase0_2015.01.01/src_phase/Renewal_of_pWaveFunctions.F90	2016-07-12 12:51:19.000000000 +0900
@@ -1,6 +1,6 @@
 !=======================================================================
 !
-!  PROGRAM  PHASE/0 2015.01 ($Rev: 440 $)
+!  PROGRAM  PHASE/0 2015.01 ($Rev: 472 $)
 !
 !  SUBROUINE: Renewal_of_pWaveFunctions
 !
@@ -25,7 +25,7 @@
 !  Science and Technology (MEXT) of Japan. 
 !
 subroutine Renewal_of_pWaveFunctions()
-! $Id: Renewal_of_pWaveFunctions.F90 440 2015-08-03 07:18:01Z ktagami $
+! $Id: Renewal_of_pWaveFunctions.F90 472 2015-11-28 09:01:17Z ktagami $
   use m_Const_Parameters, only     :  ON, OFF, DP, ORTHONORMALIZATION &
        &                            , CG, SD, MSD, SUBMAT, MATRIXDIAGON, lmMSD, lmSD, lmCG 
   use m_IterationNumbers,     only :  iteration, iteration_positron_wf
@@ -43,6 +43,14 @@ subroutine Renewal_of_pWaveFunctions()
        &                            , m_pWF_wd_pzaj &
        &                            , m_pWF_submat &
        &                            , m_pWF_evolve_wfs_again
+
+! === POSITRON SCF ===
+  use m_Const_Parameters,     only : Positron_CONV, Positron_GGGC, BAND_Energy, TOTAL_Energy
+  use m_Control_Parameters,   only : positron_method
+  use m_Total_Energy,  only : m_TE_tell_total_energy
+  use m_Positron_Wave_Functions, only : pchg_l, pchgo_l
+! =====
+
   implicit none 
   integer :: isolver, precon, sw_submat
   real(kind=DP) :: dtim
@@ -79,6 +87,8 @@ subroutine Renewal_of_pWaveFunctions()
 #endif
    end if
 
+   pchgo_l = pchg_l
+
 contains
   subroutine Renew_pWF_by_lmSDorlmCG(isolver,precon,dtim)
     integer, intent(in)         :: isolver,precon
@@ -88,13 +98,27 @@ contains
     real(kind=DP)               :: dtim_new = 0.d0, dtim_msdv
     integer                     :: isolver_core, mode, what_is_the_pcore_solver
 
+    integer ::   energy_evaluation_lmm
+
+    if ( positron_method == Positron_CONV ) then
+       energy_evaluation_lmm = BAND_ENERGY
+    else if ( positron_method == Positron_GGGC ) then
+       energy_evaluation_lmm = TOTAL_ENERGY
+       energy_evaluation_lmm = BAND_ENERGY
+    endif
+
     isolver_core = what_is_the_pcore_solver(isolver) ! -(in this file)
 
     mode = ORTHONORMALIZATION
 
     call m_pWF_copy_pzaj_to_pzaj_old()
 
-    etotal(1) = m_pWF_tell_band_energy()
+    if ( energy_evaluation_lmm == TOTAL_ENERGY ) then
+       etotal(1) = m_TE_tell_total_energy()
+    else if ( energy_evaluation_lmm == BAND_ENERGY ) then
+       etotal(1) = m_pWF_tell_band_energy()
+    endif
+
     if(ipripositron >= 2) write(nfout,'(" !! etotal(1) = ",f10.6)') etotal(1)
     dtim_msdv = m_CtrlP_dtim_1Dsearch_now(dtim)
     if(ipripositron >= 2) write(nfout,'(" !! dtim_msdv = ",f8.4)') dtim_msdv
@@ -108,13 +132,23 @@ contains
        !    ~~~~~~~~~~~~~~~~~~~~~~~~~
 !!$    end if
 
-    etotal(2) = m_pWF_tell_band_energy()
+    if ( energy_evaluation_lmm == TOTAL_ENERGY ) then
+       etotal(2) = m_TE_tell_total_energy()
+    else if(energy_evaluation_lmm == BAND_ENERGY) then
+       etotal(2) = m_pWF_tell_band_energy()
+    endif
+
     if(ipripositron >= 2) write(nfout,'(" !! etotal(2) = ",f10.6)') etotal(2)
 
 !!$    if(isolver_core == CG .or. isolver_core == eazyCG) mode = NORMALIZATION
     call m_pWF_evolve_WFs_again(nfout,mode,dtim_msdv,factor*dtim_msdv)
 
-    etotal(3) = m_pWF_tell_band_energy()
+    if ( energy_evaluation_lmm == TOTAL_ENERGY ) then
+       etotal(3) = m_TE_tell_total_energy()
+    else if(energy_evaluation_lmm == BAND_ENERGY) then
+       etotal(3) = m_pWF_tell_band_energy()
+    endif
+
     if(ipripositron >= 2) write(nfout,'(" !! etotal(3) = ",f10.6)') etotal(3)
     dtim_new = m_CtrlP_decide_dtim_1Dsearch(nfout,etotal,dtim_msdv,factor)
     mode = ORTHONORMALIZATION
@@ -125,19 +159,20 @@ contains
 end subroutine Renewal_of_pWaveFunctions
 
 subroutine Solve_pWaveFunctions()
-! $Id: Renewal_of_pWaveFunctions.F90 440 2015-08-03 07:18:01Z ktagami $
+! $Id: Renewal_of_pWaveFunctions.F90 472 2015-11-28 09:01:17Z ktagami $
   use m_Control_parameters, only :      pev_max_iteration, ipripositron
   use m_IterationNumbers, only :        iteration_positron_wf, m_Iter_positron_incre
   use m_Files, only :                   nfout
   use m_Positron_Wave_Functions, only : m_pWF_construct_pcharge &
        &                              , m_pWF_charge_rspace, m_pWF_wd_pev &
        &                              , m_pWF_wlifetime
-  use m_Electronic_Structure, only :    m_ES_wd_vlhxcQ
+!!  use m_Electronic_Structure, only :    m_ES_wd_vlhxcQ
   implicit none
   logical :: pEigenValues_are_Converged
 
-  call Initial_pWaveFunctions()
-  if(ipripositron >= 2) call m_ES_wd_vlhxcQ()
+!!  call Initial_pWaveFunctions()
+!!  if(ipripositron >= 2) call m_ES_wd_vlhxcQ()
+
   Solve_pWaveFunction: do
      call m_Iter_positron_incre()
      call Renewal_of_pWaveFunctions()
@@ -198,8 +233,16 @@ subroutine Initial_pWaveFunctions()
        &                              , m_pWF_alloc_afft_etc &
        &                              , m_pWF_dealloc_afft_etc &
        &                              , m_pWF_wd_pzaj
+
+! == POSITRON SCF === 2015/11/28
+  use m_epc_Potential,  only : m_epc_alloc, m_epc_alloc_vlhxc_p
+! =================== 2015/11/28
+
   implicit none
 
+  call m_epc_alloc
+  call m_epc_alloc_vlhxc_p
+
   call m_pWF_allocate_pzaj_etc()
 
   if(intpzaj == by_random_numbers) then
diff -uprN phase0_2015.01/src_phase/b_PseudoPotential_EXX.F90 phase0_2015.01.01/src_phase/b_PseudoPotential_EXX.F90
--- phase0_2015.01/src_phase/b_PseudoPotential_EXX.F90	2015-08-05 14:45:21.000000000 +0900
+++ phase0_2015.01.01/src_phase/b_PseudoPotential_EXX.F90	2016-07-12 12:51:19.000000000 +0900
@@ -3,6 +3,7 @@ subroutine alloc_qitg_exx()
   use m_PlaneWaveBasisSet,  only : kgp
   use m_ES_ExactExchange,   only : nqmk, qitg_exx
   use m_Control_Parameters, only : nmax_G_hyb
+  use m_Const_Parameters,   only : ON
   implicit none
 #if defined(MEMORY_SAVE_EXX) && defined(MEMORY_SAVE_MORE_EXX)
   if(.not.allocated(qitg_exx)) allocate(qitg_exx(nmax_G_hyb,nqitg))
@@ -28,12 +29,12 @@ subroutine qitgft_qmk(it,nmm_il3,mm_il3,
 end subroutine qitgft_qmk
 #else
 subroutine qitgft_qmk(it,nmm_il3,mm_il3,qrsps_mm,lcmax,h)
-  use m_Const_Parameters,   only : DP, PAI4, DELTA
+  use m_Const_Parameters,   only : DP, PAI4, DELTA, ON
   use m_Control_Parameters, only : nmax_G_hyb
   use m_Crystal_Structure,  only : rltv
   use m_PlaneWaveBasisSet,  only : ngabc,kgp
   use m_PseudoPotential,    only : mmesh,nmesh,rmax,radr,wos,nqitg_sp
-  use m_Parallelization,    only : mpi_comm_group,ista_kngp,iend_kngp,npes,ierr
+  use m_Parallelization,    only : mpi_comm_group,ista_kngp,iend_kngp,npes,ierr,ista_kngp_exx,iend_kngp_exx
   use m_ES_ExactExchange,   only : nqmk, qmk, qitg_exx
   use m_Timing,             only : tstatc0_begin, tstatc0_end
   implicit none
@@ -70,6 +71,7 @@ subroutine qitgft_qmk(it,nmm_il3,mm_il3,
   end do
   iend_kngp0 = iend_kngp
   if(iend_kngp0.gt.nmax_G_hyb) iend_kngp0 = nmax_G_hyb
+
   do ik=1,nqmk
      do i = ista_kngp, iend_kngp0
         kg(1:3) = qmk(ik,1:3) + ngabc(i,1:3)
@@ -111,16 +113,22 @@ subroutine qitgft_qmk(it,nmm_il3,mm_il3,
               qitg_t(i,iq) = qitg_exx_l(i,iq)
            end do
         end do
-        call mpi_allreduce(qitg_t,qitg_exx(1,mm0+1,ik),nmax_G_hyb*nqitg_sp(it),mpi_double_precision,mpi_sum,mpi_comm_group,ierr)
+        call mpi_allreduce(qitg_t,qitg_exx(1,mm0+1,ik),nmax_G_hyb*nqitg_sp(it) &
+         &  ,mpi_double_precision,mpi_sum,mpi_comm_group,ierr)
+        do iq=1,nqitg_sp(it)
+           do i=ista_kngp,iend_kngp0
+              qitg_exx(i,mm0+iq,ik) = qitg_t(i,iq)
+           enddo
+        enddo
         deallocate(qitg_t)
      else
         do iq=1,nqitg_sp(it)
-
            do i=1,nmax_G_hyb
               qitg_exx(i,mm0+iq,ik) = qitg_exx_l(i,iq)
            end do
         end do
      end if
+
   end do ! ik
 
   deallocate(qitg_exx_l)
diff -uprN phase0_2015.01/src_phase/configure phase0_2015.01.01/src_phase/configure
--- phase0_2015.01/src_phase/configure	2015-08-05 14:45:21.000000000 +0900
+++ phase0_2015.01.01/src_phase/configure	2016-07-12 12:51:19.000000000 +0900
@@ -343,6 +343,7 @@ fi
 bllib_netlib="Netlib BLAS/LAPACK"
 bllib_acml="AMD Core Math Library (ACML)"
 bllib_mkl="Intel Math Kernel Library (MKL)"
+bllib_mkl_sys="System-installed MKL"
 bllib_sunperf="Sun Performance Library"
 bllib_scsl="SGI Cray Scientific Library (SCSL)"
 bllib_essl="IBM Engineering and Scientific Subroutine Library (ESSL)"
@@ -357,6 +358,7 @@ add_option "${bllib_netlib}"
 case ${sel_comp} in
 "${gnu_compiler}"|"${g95_compiler}"|"${intel_compiler}")
 add_option "${bllib_mkl}"
+add_option "${bllib_mkl_sys}"
 	;;
 esac
 	;;
@@ -477,6 +479,20 @@ esac
 	;;
 esac
 	;;
+"${bllib_mkl_sys}")
+case ${sel_plat} in
+"${gnu_linux_ia32}")
+bldir=""
+bllib="-Wl,--start-group -lmkl_scalapack_core -lmkl_blacs_intelmpi -lmkl_intel -lmkl_sequential -lmkl_core -Wl,--end-group -Bdynamic"
+use_pthread="yes"
+	;;
+"${gnu_linux_amd64}")
+bldir=""
+bllib="-Wl,--start-group -lmkl_scalapack_lp64 -lmkl_blacs_intelmpi_lp64 -lmkl_intel_lp64 -lmkl_sequential -lmkl_core -Wl,--end-group -Bdynamic"
+use_pthread="yes"
+	;;
+esac
+	;;
 "${bllib_mkl}")
 case ${sel_plat} in
 "${gnu_linux_ia32}")
@@ -609,6 +625,7 @@ esac
 # FFT library selection
 fftlib_jrcat="Built-in FFT subroutnes"
 fftlib_fftw3="FFTW3 library"
+fftlib_fftw3_sys="System-installed FFTW3 library"
 fftlib_mkl_fftw3="Intel MKL with FFTW3 interface"
 #fftlib_mkl_fftw3_gnu="Intel MKL with FFTW3 interface library built by gcc"
 fftlib_acml="AMD Core Math Library (ACML)"
@@ -625,10 +642,11 @@ add_option "${fftlib_jrcat}"
 case ${sel_plat} in
 "${gnu_linux_ia32}"|"${gnu_linux_amd64}"|"${windows_mingw_ia32}"|"${windows_mingw_amd64}"|"${windows_sua_ia32}"|"${windows_sua_amd64}")
 add_option "${fftlib_fftw3}"
+add_option "${fftlib_fftw3_sys}"
 #add_option "${fftlib_acml}"
 case ${sel_comp} in
 "${gnu_compiler}"|"${g95_compiler}"|"${intel_compiler}")
-if [ "${sel_bllib}" = "${bllib_mkl}" ];then
+if [ "${sel_bllib}" = "${bllib_mkl}" -o "${sel_bllib}" = "${bllib_mkl_sys}" ];then
 add_option "${fftlib_mkl_fftw3}"
 #add_option "${fftlib_mkl_fftw3_gnu}"
 fi
@@ -679,127 +697,132 @@ fftdir="/usr/local/lib"
 	;;
 "${fftlib_acml}")
 fftlib="-lacml"
-case ${sel_comp} in
-"${gnu_compiler}")
-case ${sel_plat} in
-"${gnu_linux_ia32}")
-fftdir="/opt/acml5.3.0/gfortran32/lib"
-	;;
-"${gnu_linux_amd64}")
-fftdir="/opt/acml5.3.0/gfortran64/lib"
-	;;
-esac
-	;;
-"${intel_compiler}")
-case ${sel_plat} in
-"${gnu_linux_ia32}")
-fftdir="/opt/acml5.3.0/ifort32/lib"
-	;;
-"${gnu_linux_amd64}")
-fftdir="/opt/acml5.3.0/ifort64/lib"
-	;;
-"${windows_mingw_ia32}")
-fftlib="libacml_dll.lib"
-fftdir="/c/progra~1/AMD/acml5.3.0/ifort32/lib"
-	;;
-"${windows_mingw_amd64}")
-fftlib="libacml_dll.lib"
-fftdir="/c/acml5.3.0/ifort64/lib"
-	;;
-esac
-	;;
-"${pgi_compiler}")
-case ${sel_plat} in
-"${gnu_linux_ia32}")
-fftdir="/opt/acml5.3.0/pgi32/lib"
 	;;
-"${gnu_linux_amd64}")
-fftdir="/opt/acml5.3.0/pgi64/lib"
-	;;
-"${windows_mingw_ia32}")
-fftlib="libacml_dll.lib"
-fftdir="/c/progra~1/AMD/acml5.3.0/pgi32/lib"
-	;;
-"${windows_mingw_amd64}")
-fftlib="libacml_dll.lib"
-fftdir="/c/acml5.3.0/win64/lib"
-	;;
-esac
-	;;
-"${pathscale_compiler}")
-case ${sel_plat} in
-"${gnu_linux_ia32}")
-fftdir="/opt/acml5.3.0/pathscale32/lib"
-	;;
-"${gnu_linux_amd64}")
-fftdir="/opt/acml5.3.0/pathscale64/lib"
-	;;
-esac
-	;;
-"${sun_studio}")
-case ${sel_plat} in
-"${sun_solaris_ia32}")
-fftdir="/opt/acml5.3.0/sun32/lib"
-	;;
-"${sun_solaris_amd64}")
-fftdir="/opt/acml5.3.0/sun64/lib"
-	;;
-esac
-	;;
-esac
-	;;
-"${fftlib_mkl}")
-fftlib=""
-fftdir="/opt/intel/mkl/interfaces/fftw3xf"
-case ${sel_plat} in
-"${gnu_linux_ia32}")
-fftdir="/opt/intel/mkl/9.1/lib/32"
-fftlib="${fftlib} -lmkl_ia32 -lguide"
-use_pthread="yes"
-	;;
-"${gnu_linux_amd64}")
-fftdir="/opt/intel/mkl/9.1/lib/em64t"
-fftlib="${fftlib} -lmkl_em64t -lguide"
-use_pthread="yes"
-	;;
-"${gnu_linux_ipf}")
-fftdir="/opt/intel/mkl/9.1/lib/64"
-fftlib="${fftlib} -lmkl_ipf -lguide"
-use_pthread="yes"
-	;;
-"${intel_mac_32bit}")
-fftdir="/Library/Frameworks/Intel_MKL.framework/Versions/9.1/lib_serial/32"
-fftlib="${fftlib} -lmkl_ia32"
-	;;
-"${intel_mac_64bit}")
-fftdir="/Library/Frameworks/Intel_MKL.framework/Versions/9.1/lib_serial/em64t"
-fftlib="${fftlib} -lmkl_em64t"
-	;;
-"${windows_mingw_ia32}")
-fftdir="/c/progra~1/Intel/MKL/9.1/lib_serial/ia32/lib"
-fftlib="fftw3xf_ms.lib mkl_c_dll.lib"
-	;;
-"${windows_mingw_amd64}")
-fftdir="/c/progra~1/Intel/MKL/9.1/lib_serial/em64t/lib"
-fftlib="fftw3xf_ms.lib mkl_dll.lib"
-	;;
-esac
-	;;
-"${fftlib_scsl}")
-fftdir=""
-fftlib="-lscs"
-	;;
-"${fftlib_matrix}")
-fftdir=""
-fftlib="-lmatmpp_sc"
-	;;
-"${fftlib_ssl2}")
-fftdir=""
-fftlib="-lssl2vp"
-	;;
-"${fftlib_asl}")
+"${fftlib_fftw3_sys}")
+fftlib="-lfftw3"
 fftdir=""
-fftlib="-lasl"
+
+#case ${sel_comp} in
+#"${gnu_compiler}")
+#case ${sel_plat} in
+#"${gnu_linux_ia32}")
+#fftdir="/opt/acml5.3.0/gfortran32/lib"
+#	;;
+#"${gnu_linux_amd64}")
+#fftdir="/opt/acml5.3.0/gfortran64/lib"
+#	;;
+#esac
+#	;;
+#"${intel_compiler}")
+#case ${sel_plat} in
+#"${gnu_linux_ia32}")
+#fftdir="/opt/acml5.3.0/ifort32/lib"
+#	;;
+#"${gnu_linux_amd64}")
+#fftdir="/opt/acml5.3.0/ifort64/lib"
+#	;;
+#"${windows_mingw_ia32}")
+#fftlib="libacml_dll.lib"
+#fftdir="/c/progra~1/AMD/acml5.3.0/ifort32/lib"
+#	;;
+#"${windows_mingw_amd64}")
+#fftlib="libacml_dll.lib"
+#fftdir="/c/acml5.3.0/ifort64/lib"
+#	;;
+#esac
+#	;;
+#"${pgi_compiler}")
+#case ${sel_plat} in
+#"${gnu_linux_ia32}")
+#fftdir="/opt/acml5.3.0/pgi32/lib"
+#	;;
+#"${gnu_linux_amd64}")
+#fftdir="/opt/acml5.3.0/pgi64/lib"
+#	;;
+#"${windows_mingw_ia32}")
+#fftlib="libacml_dll.lib"
+#fftdir="/c/progra~1/AMD/acml5.3.0/pgi32/lib"
+#	;;
+#"${windows_mingw_amd64}")
+#fftlib="libacml_dll.lib"
+#fftdir="/c/acml5.3.0/win64/lib"
+#	;;
+#esac
+#	;;
+#"${pathscale_compiler}")
+#case ${sel_plat} in
+#"${gnu_linux_ia32}")
+#fftdir="/opt/acml5.3.0/pathscale32/lib"
+#	;;
+#"${gnu_linux_amd64}")
+#fftdir="/opt/acml5.3.0/pathscale64/lib"
+#	;;
+#esac
+#	;;
+#"${sun_studio}")
+#case ${sel_plat} in
+#"${sun_solaris_ia32}")
+#fftdir="/opt/acml5.3.0/sun32/lib"
+#	;;
+#"${sun_solaris_amd64}")
+#fftdir="/opt/acml5.3.0/sun64/lib"
+#	;;
+#esac
+#	;;
+#esac
+#	;;
+#"${fftlib_mkl}")
+#fftlib=""
+#fftdir="/opt/intel/mkl/interfaces/fftw3xf"
+#case ${sel_plat} in
+#"${gnu_linux_ia32}")
+#fftdir="/opt/intel/mkl/9.1/lib/32"
+#fftlib="${fftlib} -lmkl_ia32 -lguide"
+#use_pthread="yes"
+#	;;
+#"${gnu_linux_amd64}")
+#fftdir="/opt/intel/mkl/9.1/lib/em64t"
+#fftlib="${fftlib} -lmkl_em64t -lguide"
+#use_pthread="yes"
+#	;;
+#"${gnu_linux_ipf}")
+#fftdir="/opt/intel/mkl/9.1/lib/64"
+#fftlib="${fftlib} -lmkl_ipf -lguide"
+#use_pthread="yes"
+#	;;
+#"${intel_mac_32bit}")
+#fftdir="/Library/Frameworks/Intel_MKL.framework/Versions/9.1/lib_serial/32"
+#fftlib="${fftlib} -lmkl_ia32"
+#	;;
+#"${intel_mac_64bit}")
+#fftdir="/Library/Frameworks/Intel_MKL.framework/Versions/9.1/lib_serial/em64t"
+#fftlib="${fftlib} -lmkl_em64t"
+#	;;
+#"${windows_mingw_ia32}")
+#fftdir="/c/progra~1/Intel/MKL/9.1/lib_serial/ia32/lib"
+#fftlib="fftw3xf_ms.lib mkl_c_dll.lib"
+#	;;
+#"${windows_mingw_amd64}")
+#fftdir="/c/progra~1/Intel/MKL/9.1/lib_serial/em64t/lib"
+#fftlib="fftw3xf_ms.lib mkl_dll.lib"
+#	;;
+#esac
+#	;;
+#"${fftlib_scsl}")
+#fftdir=""
+#fftlib="-lscs"
+#	;;
+#"${fftlib_matrix}")
+#fftdir=""
+#fftlib="-lmatmpp_sc"
+#	;;
+#"${fftlib_ssl2}")
+#fftdir=""
+#fftlib="-lssl2vp"
+#	;;
+#"${fftlib_asl}")
+#fftdir=""
+#fftlib="-lasl"
 	;;
 esac
 
@@ -867,7 +890,7 @@ use_dgemm="yes"
 
 #ESM
 use_esm=""
-if [ "${sel_fftlib}" = "${fftlib_fftw3}" -o "${sel_fftlib}" = "${fftlib_mkl_fftw3}" -o "${sel_fftlib}" = "${fftlib_mkl_fftw3_gnu}" ];then
+if [ "${sel_fftlib}" = "${fftlib_fftw3}" -o "${sel_fftlib}" = "${fftlib_mkl_fftw3}" -o "${sel_fftlib}" = "${fftlib_mkl_fftw3_gnu}" -o "${sel_fftlib}" = "${fftlib_fftw3_sys}" ];then
 echo "Do you want to enable the ESM feature? (yes/no) [yes]"
 read use_esm
 #echo "${use_esm}" >> config
@@ -1172,7 +1195,7 @@ cppflags="$cppflags -DJRCATFFT_WS -DCD_J
 	;;
 esac
 	;;
-"${fftlib_fftw3}"|"${fftlib_mkl_fftw3}"|"${fftlib_mkl_fftw3_gnu}")
+"${fftlib_fftw3}"|"${fftlib_mkl_fftw3}"|"${fftlib_mkl_fftw3_gnu}"|"${fftlib_fftw3_sys}")
 cppflags="$cppflags -DFFTW3"
 esm="YES"
 	;;
@@ -1220,7 +1243,7 @@ cppflags="$cppflags -D_NO_MPI_"
 esac
 fi
 
-if [ "${sel_fftlib}" != "${fftlib_fftw3}" -a "${sel_fftlib}" != "${fftlib_mkl_fftw3}" -a "${sel_fftlib}" != "${fftlib_mkl_fftw3_gnu}" ];then
+if [ "${sel_fftlib}" != "${fftlib_fftw3}" -a "${sel_fftlib}" != "${fftlib_mkl_fftw3}" -a "${sel_fftlib}" != "${fftlib_mkl_fftw3_gnu}" -a "${sel_fftlib}" != "${fftlib_fftw3_sys}" ];then
 cppflags="$cppflags -DDISABLE_VDWDF"
 fi
 
@@ -1455,17 +1478,20 @@ esac
 ########################################
 
 case ${sel_fftlib} in
-"${fftlib_fftw3}"|"${fftlib_mkl_fftw3}"|"${fftlib_mkl_fftw3_gnu}")
+"${fftlib_fftw3}"|"${fftlib_mkl_fftw3}"|"${fftlib_mkl_fftw3_gnu}"|"${fftlib_fftw3_sys}")
 libs="-L./ \${LESM} ${libs}"
 esac
 
-if [ "${sel_fftlib}" = "${fftlib_fftw3}" ];then
-fftincludedir="${fftdir}/../include"
-fi
-if [ "${sel_fftlib}" = "${fftlib_mkl_fftw3}" ];then
-fftincludedir="${bldir}/../../include/fftw"
-elif [ "${sel_fftlib}" = "${fftlib_mkl_fftw3_gnu}" ];then
-fftincludedir="${bldir}/../../include/fftw"
+fftincludedir="INCLUDE="
+if [ "${sel_fftlib}" = "${fftlib_fftw3}" ] && [ -n "${fftdir}" ];then
+fftincludedir="INCLUDE=-I${fftdir}/../include"
+fi
+if [ "${sel_fftlib}" = "${fftlib_mkl_fftw3}" ] && [ -n "${bldir}" ];then
+fftincludedir="INCLUDE=-I${bldir}/../../include/fftw"
+elif [ "${sel_fftlib}" = "${fftlib_mkl_fftw3_gnu}" ] && [ -n "${bldir}" ];then
+fftincludedir="INCLUDE=-I${bldir}/../../include/fftw"
+elif [ "${sel_fftlib}" = "${fftlib_fftw3_sys}" ];then
+fftincludedir="INCLUDE="
 fi
 
 cat <<here > Makefile
@@ -1502,7 +1528,7 @@ LFLAGS = ${lflags}
 F90FLAGS_FIXED = ${f90flags_fixed}
 F90FLAGS_FREE = ${f90flags_free}
 MKLHOME=${bldir}
-INCLUDE=-I${fftincludedir}
+${fftincludedir}
 LIBS = ${libs}
 ###########################################################################
 ###<< PLEASE CHANGE THE VARIABLES ABOVE ACCORDING TO YOUR ENVIRONMENT >>###
diff -uprN phase0_2015.01/src_phase/constraint_main.F90 phase0_2015.01.01/src_phase/constraint_main.F90
--- phase0_2015.01/src_phase/constraint_main.F90	2015-08-05 14:45:21.000000000 +0900
+++ phase0_2015.01.01/src_phase/constraint_main.F90	2016-07-12 12:51:19.000000000 +0900
@@ -1,6 +1,6 @@
 !=======================================================================
 !
-!  PROGRAM  PHASE/0 2015.01 ($Rev: 440 $)
+!  PROGRAM  PHASE/0 2015.01 ($Rev: 494 $)
 !
 !  SUBROUINE: constrained_dynamics
 !
@@ -45,6 +45,11 @@ subroutine constrained_dynamics()
   use m_Control_Parameters, only : printable, icond, icond_org
   use m_Files, only : nfout
   use m_Parallelization, only : nrank_conf, mype_conf, conf_para
+#ifdef PAW3D
+  use m_PseudoPotential, only : mmesh
+  use m_Ionic_System,    only : natm
+  use m_Parallelization, only : m_Parallel_init_mpi_paw_3D
+#endif
 
   implicit none
 
diff -uprN phase0_2015.01/src_phase/input_interface.F90 phase0_2015.01.01/src_phase/input_interface.F90
--- phase0_2015.01/src_phase/input_interface.F90	2015-08-05 14:45:21.000000000 +0900
+++ phase0_2015.01.01/src_phase/input_interface.F90	2016-07-12 12:51:19.000000000 +0900
@@ -1,6 +1,6 @@
 !=======================================================================
 !
-!  PROGRAM  PHASE/0 2015.01 ($Rev: 440 $)
+!  PROGRAM  PHASE/0 2015.01 ($Rev: 475 $)
 !
 !  FUNCTION:  getUnitId, setUnit, updateUnits, clearUnitFlag, setDefaultUnits,
 !            realConvByUnit, f_openInputFile, f_closeInputFile, f_selectTop,
@@ -17,7 +17,7 @@
 !
 !
 !=======================================================================
-! $Id: input_interface.F90 440 2015-08-03 07:18:01Z ktagami $
+! $Id: input_interface.F90 475 2016-02-23 05:22:18Z jkoga $
 !
 !!$!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
 !!$! interface functions
@@ -376,7 +376,8 @@ integer function f_selectBlock( blocktag
 	    if( iret < 0 ) then
 	        f_selectBlock  = iret
                 iret = f_selectParentBlock()
-                print '( "A unit with the same dimension as [", a, "] has been already given in the block [", a, "]." )', trim(unit), trim(blocktag)
+                print '( "A unit with the same dimension as [", a, "] has been already given in the block [", a, "]." )'&
+              & , trim(unit), trim(blocktag)
  		return
 	    end if
 	end do
diff -uprN phase0_2015.01/src_phase/m_CD_Mag_Moment.f90 phase0_2015.01.01/src_phase/m_CD_Mag_Moment.f90
--- phase0_2015.01/src_phase/m_CD_Mag_Moment.f90	2015-08-05 14:45:21.000000000 +0900
+++ phase0_2015.01.01/src_phase/m_CD_Mag_Moment.f90	2016-07-12 12:51:19.000000000 +0900
@@ -1,5 +1,5 @@
 module m_CD_Mag_Moment
-! $Id: m_CD_Mag_Moment.f90 440 2015-08-03 07:18:01Z ktagami $
+! $Id: m_CD_Mag_Moment.f90 476 2016-03-10 08:30:50Z yamasaki $
   use m_Control_Parameters,    only : noncol, ndim_magmom, kimg, iprimagmom, ON, OFF
   use m_Const_Parameters,     only : DP, PAI4, Bohr
 
@@ -514,7 +514,7 @@ contains
        ik = ista_k
        call new_radr_and_wos(ik,it)                 ! --> radr, wos
 
-       rcut = rad_cov_default( iatomn(it) )
+       rcut = rad_cov_default( nint(iatomn(it) ))  ! Revised according to a report from ASMS Co.ltd, 10 March 2016.
 
        Do il1=1, lpsmax(it)
 !          if ( il1 == iloc(it) ) cycle
diff -uprN phase0_2015.01/src_phase/m_CD_mixing.F90 phase0_2015.01.01/src_phase/m_CD_mixing.F90
--- phase0_2015.01/src_phase/m_CD_mixing.F90	2015-08-05 14:45:21.000000000 +0900
+++ phase0_2015.01.01/src_phase/m_CD_mixing.F90	2016-07-12 12:51:19.000000000 +0900
@@ -62,7 +62,7 @@
 #endif
 
 module m_CD_mixing
-! $Id: m_CD_mixing.F90 440 2015-08-03 07:18:01Z ktagami $
+! $Id: m_CD_mixing.F90 492 2016-05-31 03:06:04Z jkoga $
   use m_Const_Parameters,    only : BUCS, DP, OFF &
        &                          , EXECUT,SIMPLE_CUBIC,BOHR,NO,ANTIFERRO &
        &                          , ANEW,RENEW,ON, SIMPLE,BROYD1,BROYD2,DFP,PULAY &
@@ -94,8 +94,10 @@ module m_CD_mixing
 ! ==============================================================================
 ! === Added by tkato 2011/11/09 ================================================
   use m_Control_Parameters,  only : sw_mix_bothspins_sametime &
-                                  , sw_recomposing_hsr, sw_force_simple_mixing_hsr
-  use m_Ionic_System,        only : ityp, natm
+                                  , sw_recomposing_hsr, sw_force_simple_mixing_hsr &
+                                  , num_proj_elems, proj_group, proj_attribute, num_projectors &
+                                  , max_projs
+  use m_Ionic_System,        only : ityp, natm,iproj_group
   use m_PseudoPotential,     only : ilmt, nlmt
   use m_Charge_Density,      only : hsr, hsro
 !===============================================================================
@@ -115,6 +117,8 @@ module m_CD_mixing
   use m_Control_Parameters,    only : sw_mix_charge_hardpart, sw_mix_charge_with_ekindens
 ! =============== 2014/09/16
 
+  use m_Orbital_Population, only : om,omold,ommix
+
   implicit none
 ! --> T. Yamasaki  03 Aug. 2009
     real(kind=DP), allocatable, dimension(:):: rmxtrc ! d(nspin_m)
@@ -185,7 +189,10 @@ module m_CD_mixing
 
 ! ========================== adde by K. Tagami ========================== 5.0
   integer :: nsize_rho_hsr
+  integer :: nsize_rho_hsr0
+  integer :: nsize_rho_om
   integer, private, allocatable :: imap_hsr(:)    ! d(nsize_rho_hsr)
+  integer, private, allocatable :: imap_om(:)    ! d(nsize_rho_hsr)
   real(kind=DP),private,allocatable, dimension(:,:) ::   rho_hsr, rhoo_hsr
                                                   ! d(nsize_rho_hsr,nspin)
 
@@ -198,6 +205,7 @@ module m_CD_mixing
   real(DP),private,allocatable,target,dimension(:,:,:,:) :: urec_hsr
 
   real(DP),private,allocatable,dimension(:,:,:) :: d0_hsr_h
+  logical,allocatable,dimension(:) :: diag_elem
 
   logical, save :: first = .true.
 
@@ -245,6 +253,16 @@ module m_CD_mixing
   real(kind=DP), pointer :: ekinq_l(:,:,:), ekinqo_l(:,:,:)
 ! ============== 2014/09/19
 
+  integer, public, allocatable :: i2lp(:) ! d(num_projectors)
+
+! ================================ modified by K. Tagami ================ 11.0
+!  integer, private :: max2lp ! max. of i2lp
+  integer, public :: max2lp ! max. of i2lp
+! ======================================================================= 11.0
+
+  integer, private :: l1max ! max. of l1
+  integer, private :: nyymax
+
 ! --- contained subroutines ---
 !   7. m_CD_prepare_precon       <-(ChargeDensity_Mixing)
 !  10. m_CD_simple_mixing        <-(ChargeDensity_Mixing)
@@ -646,10 +664,22 @@ contains
           tmp = max(gg/(gg+q0),amin)
 !!$             agg = amix_cprec*gg/(gg+q0)
           agg = amix_cprec*tmp
-          c_p(i,:) = agg * pmix(:)
+
+! === 2015/09/29
+!!!        c_p(i,:) = agg * pmix(:)
+
+          c_p(i,1) = agg * pmix(1)
+          c_p(i,2:nspin_m) = amix_cprec * pmix(2:nspin_m)
+! === 2015/09/29
+
        end do
        if (mype == 0) then
-          c_p(1,:) = amix_cprec*fg2 * pmix(:)
+! === 2015/09/29
+!          c_p(1,:) = amix_cprec*fg2 * pmix(:)
+
+          c_p(1,1) = amix_cprec*fg2 * pmix(1)
+          c_p(1,2:nspin_m) = amix_cprec * pmix(2:nspin_m)
+! === 2015/09/29
        end if
     else
        do is = 1, ndim_magmom
@@ -2618,9 +2648,10 @@ contains
   end subroutine m_CD_mix_broyden2
 
 ! ===================== added by K. Tagami ============================== 5.0
-  subroutine m_CD_mix_broyden2_with_hsr(nfout,rmx)
+  subroutine m_CD_mix_broyden2_with_hsr(nfout,rmx,mixocc)
     integer, intent(in) :: nfout
     real(DP),intent(in) :: rmx
+    logical, intent(in) :: mixocc
 
     integer   :: iter,j,mxiter,icr,jcr
 !!$    real(DP)  :: v_dF(nspin),vF(nspin)
@@ -2634,9 +2665,10 @@ contains
     if (previous_waymix /= BROYD2.or.force_dealloc) then
        force_dealloc = .false.
        if ( first ) then
-          call create_map_func(.true.)
-          call alloc_rho_hsr
-          call create_map_func(.false.)
+          if(mixocc) call set_i2lp_max2lp()
+          call create_map_func(.true.,mixocc)
+          call alloc_rho_hsr(mixocc)
+          call create_map_func(.false.,mixocc)
           first = .false.
        endif
        call mix_dealloc_previous()
@@ -2655,6 +2687,10 @@ contains
     else
        call map_hsr_to_rho( hsr, rho_hsr )
        call map_hsr_to_rho( hsro,rhoo_hsr )
+       if(mixocc)then
+          call map_om_to_rho( om, rho_hsr )
+          call map_om_to_rho( omold,rhoo_hsr )
+       endif
     endif
 ! ========================================================================= 11.0
 
@@ -2752,7 +2788,6 @@ contains
 
        call mix_broyden_dealloc2                      !-(m_CD)
        call mix_broyden_dealloc2_hsr
-
     endif
 
 ! ============================== modified by K. Tagami ================= 11.0
@@ -2780,6 +2815,7 @@ contains
        call map_rho_to_hsr_noncl( hsr, hsi, rho_hsr )
     else
        call map_rho_to_hsr( hsr, rho_hsr )
+       if(mixocc) call map_rho_to_om( om, rho_hsr )
     endif
 
     deallocate(rmxtrc)
@@ -3760,10 +3796,11 @@ contains
   end subroutine m_CD_mix_pulay
 
 !!$ 11.07 AS Pulay version of 'sw_mix_charge_hardpart'
-  subroutine m_CD_mix_pulay_with_hsr(nfout,rmx)
+  subroutine m_CD_mix_pulay_with_hsr(nfout,rmx,mixocc)
     integer, parameter  :: iRho = 1, iResid = 2
     integer, intent(in) :: nfout
     real(DP),intent(in) :: rmx
+    logical, intent(in) :: mixocc
     integer   :: iter, mxiter
     real(DP),pointer,dimension(:)  :: e_wk, f_wk, ww1, finv
     integer, pointer,dimension(:)  :: ip
@@ -3777,9 +3814,10 @@ contains
     if(previous_waymix /= PULAY.or.force_dealloc) then
        force_dealloc = .false.
        if ( first ) then
-          call create_map_func(.true.)
-          call alloc_rho_hsr
-          call create_map_func(.false.)
+          if(mixocc) call set_i2lp_max2lp()
+          call create_map_func(.true.,mixocc)
+          call alloc_rho_hsr(mixocc)
+          call create_map_func(.false.,mixocc)
           first = .false.
        endif
        call mix_dealloc_previous()
@@ -3798,6 +3836,10 @@ contains
     else
        call map_hsr_to_rho( hsr, rho_hsr )
        call map_hsr_to_rho( hsro,rhoo_hsr )
+       if(mixocc)then
+          call map_om_to_rho( om, rho_hsr )
+          call map_om_to_rho( omold,rhoo_hsr )
+       endif
     endif
 ! ========================================================================= 11.0
 
@@ -3920,6 +3962,7 @@ contains
        call map_rho_to_hsr_noncl( hsr, hsi, rho_hsr )
     else
        call map_rho_to_hsr( hsr, rho_hsr )
+       if(mixocc) call map_rho_to_om( om, rho_hsr )
     endif
 
     deallocate(rmxtrc)
@@ -4342,10 +4385,11 @@ contains
 !!$ 11.07 AS Pulay version of 'sw_mix_charge_hardpart'
 
 ! ========================= added by K. Tagami ===================== 5.0
-  subroutine create_map_func(paramset)
-    logical :: paramset
+  subroutine create_map_func(paramset,mixocc)
+    logical :: paramset,mixocc
     integer :: n, ia, it
     integer :: lmt1, lmt2
+    integer :: ig,m2,m1,i,ip
 
     n=0
     do ia=1,natm
@@ -4355,13 +4399,36 @@ contains
           do lmt2 = lmt1, ilmt(it)
              n=n+1
 
-             if(.not.paramset) &
-                  & imap_hsr(n) = ia + natm *(lmt1-1) + natm*nlmt*( lmt2 -1 )
+             if(.not.paramset) then
+                imap_hsr(n) = ia + natm *(lmt1-1) + natm*nlmt*( lmt2 -1 )
+                diag_elem(n) = lmt1.eq.lmt2
+             endif
           end do
        end do
     end do
-    nsize_rho_hsr = n
+    nsize_rho_hsr0 = n
+    nsize_rho_hsr = nsize_rho_hsr0
 ! ================================ added by K. Tagami ============ 11.0
+    if(mixocc)then
+      n=0
+      do ia=1,natm
+         ig = iproj_group(ia)
+         if(ig<1) cycle
+         do i=1,num_proj_elems(ig)
+            ip=proj_group(i,ig)
+            it = proj_attribute(ip)%ityp
+            do m2=1,i2lp(ip)
+               do m1=m2,i2lp(ip)
+                  n=n+1
+                  if(.not.paramset) &
+                  & imap_om(n) = m1 + max2lp*(m2-1 + max2lp*( i-1 + max_projs*( ia-1 ) ) )
+               end do
+            end do
+         end do
+      enddo
+      nsize_rho_om = n
+      nsize_rho_hsr = nsize_rho_hsr0+nsize_rho_om
+    endif 
     if ( noncol ) then
        nsize_rho_hsr_realpart = n
 
@@ -4382,20 +4449,69 @@ contains
 ! ================================================================= 11.0
   end subroutine create_map_func
 
-  subroutine alloc_rho_hsr
+  subroutine alloc_rho_hsr(mixocc)
+    logical, intent(in) :: mixocc
+    if ( noncol ) then
+       nspin_m  = ndim_magmom
+    else
+       nspin_m = nspin/(af+1)
+    endif
+    if ( noncol ) then
+       nspin_m  = ndim_magmom
+       allocate( rho_hsr( nsize_rho_hsr,ndim_magmom)); rho_hsr = 0.0d0
+       allocate( rhoo_hsr(nsize_rho_hsr,ndim_magmom));rhoo_hsr = 0.0d0
+    else
+       nspin_m  = nspin/(af+1)
+       allocate( rho_hsr( nsize_rho_hsr,nspin_m)); rho_hsr = 0.0d0
+       allocate( rhoo_hsr(nsize_rho_hsr,nspin_m));rhoo_hsr = 0.0d0
+    endif
 ! ================================ modified by K. Tagami ============== 11.0
 !    allocate( rho_hsr( nsize_rho_hsr,nspin)); rho_hsr = 0.0d0
 !    allocate( rhoo_hsr(nsize_rho_hsr,nspin));rhoo_hsr = 0.0d0
-    allocate( rho_hsr( nsize_rho_hsr,ndim_magmom)); rho_hsr = 0.0d0
-    allocate( rhoo_hsr(nsize_rho_hsr,ndim_magmom));rhoo_hsr = 0.0d0
 ! ====================================================================== 11.0
-    allocate( imap_hsr(nsize_rho_hsr) ); imap_hsr = 0
+    allocate( imap_hsr(nsize_rho_hsr0) ); imap_hsr = 0
+    allocate(diag_elem(nsize_rho_hsr0));diag_elem=.false.
+    if(mixocc)then
+        allocate( imap_om(nsize_rho_om) ); imap_om = 0
+    endif
   end subroutine alloc_rho_hsr
 
+  subroutine set_i2lp_max2lp()
+    integer :: it,ip
+    integer, parameter :: ntau0=2
+
+! =========================== added by K. Tagami ====================== 11.0
+    integer :: nsize
+! ===================================================================== 11.0
+
+    allocate(i2lp(num_projectors))
+    do ip=1,num_projectors
+       i2lp(ip) = 2*proj_attribute(ip)%l+1
+    end do
+    max2lp = 0
+    do ip=1,num_projectors
+       if(i2lp(ip) > max2lp) then
+          max2lp = i2lp(ip)
+          l1max  = proj_attribute(ip)%l+1
+       end if
+    end do
+
+! =========================== modified by K. Tagami ====================== 11.0
+!!
+!!    nyymax = ntau0*l1max**2*(l1max**2+1)/2
+!
+    nsize = ntau0*( 2*( l1max -1 )+1 )
+    nyymax = nsize *( nsize +1 ) /2
+
+  end subroutine set_i2lp_max2lp
+
   subroutine dealloc_rho_hsr
     deallocate(rho_hsr)
     deallocate(rhoo_hsr)
     deallocate(imap_hsr)
+    deallocate(diag_elem)
+    if(allocated(imap_om)) deallocate(imap_om)
+    if(allocated(i2lp))    deallocate(i2lp)
   end subroutine dealloc_rho_hsr
 
   subroutine map_hsr_to_rho( hsr,rho )
@@ -4406,7 +4522,7 @@ contains
 
     do is=1,nspin,(af+1)
        rho(:,is)=0.d0
-       do i=1,nsize_rho_hsr
+       do i=1,nsize_rho_hsr0
           rho(i,is) = hsr( imap_hsr(i),is )
        end do
     end do
@@ -4434,6 +4550,55 @@ contains
   end subroutine map_hsr_to_rho_noncl
 ! ===================================================================== 11.0
 
+  subroutine map_om_to_rho(om,rho)
+    real(kind=DP), intent(in) :: om(max2lp*max2lp*max_projs*natm,nspin)
+    real(kind=DP), intent(out) :: rho(nsize_rho_hsr,nspin)
+
+    integer :: i,is
+
+    do is=1,nspin,(af+1)
+       do i=nsize_rho_hsr0+1,nsize_rho_hsr
+          rho(i,is) = om(imap_om(i-nsize_rho_hsr0),is)
+       end do
+    end do
+  end subroutine map_om_to_rho
+
+  subroutine map_rho_to_om(om,rho)
+    real(kind=DP), intent(out) :: om(max2lp*max2lp*max_projs*natm,nspin)
+    real(kind=DP), intent(in) :: rho(nsize_rho_hsr,nspin)
+    integer :: i,is,ia,ig,ip,it,m1,m2
+
+    do is=1,nspin,(af+1)
+       do i=nsize_rho_hsr0+1,nsize_rho_hsr
+          om(imap_om(i-nsize_rho_hsr0),is) = rho(i,is)
+       end do
+    end do
+    call symmetrize(om)
+
+  contains
+
+    subroutine symmetrize(om)
+      real(kind=DP), intent(inout) :: om(max2lp,max2lp,max_projs,natm,nspin)
+
+      do is=1,nspin,(af+1)
+         do ia=1,natm
+            ig = iproj_group(ia)
+            if(ig<1) cycle
+            do i=1,num_proj_elems(ig)
+               ip=proj_group(i,ig)
+               it = proj_attribute(ip)%ityp
+               do m2=1,i2lp(ip)
+                  do m1=m2,i2lp(ip)
+                     if(m1/=m2) om(m2,m1,i,ia,is) = om(m1,m2,i,ia,is)
+                  end do
+               end do
+            end do
+         end do
+      end do
+    end subroutine symmetrize
+
+  end subroutine map_rho_to_om
+
   subroutine map_rho_to_hsr( hsr,rho )
     real(kind=DP), intent(out) :: hsr( natm *nlmt *nlmt, nspin )
     real(kind=DP), intent(in) :: rho( nsize_rho_hsr,nspin )
@@ -4442,8 +4607,8 @@ contains
 
 
     do is=1,nspin,(af+1)
-       hsr(1:nsize_rho_hsr,is) = 0.0d0
-       do i=1,nsize_rho_hsr
+       hsr(1:nsize_rho_hsr0,is) = 0.0d0
+       do i=1,nsize_rho_hsr0
           hsr(imap_hsr(i),is) = rho(i,is)
        end do
     end do
@@ -5154,9 +5319,10 @@ contains
 
   end subroutine renew_d_br_intg
 
-  subroutine m_CD_mix_broyden2_intg(nfout,rmx)
+  subroutine m_CD_mix_broyden2_intg(nfout,rmx,mixocc)
     integer, intent(in) :: nfout    
     real(DP),intent(in) :: rmx
+    logical, intent(in) :: mixocc
     integer   :: iter,j,mxiter,icr,jcr
 
     integer   :: id_sname = -1
@@ -5170,9 +5336,9 @@ contains
 
        if ( sw_mix_charge_hardpart == ON ) then
           if ( first ) then
-             call create_map_func(.true.)
-             call alloc_rho_hsr
-             call create_map_func(.false.)
+             call create_map_func(.true.,mixocc)
+             call alloc_rho_hsr(mixocc)
+             call create_map_func(.false.,mixocc)
              first = .false.
           endif
        endif
@@ -5194,6 +5360,10 @@ contains
        else
           call map_hsr_to_rho( hsr, rho_hsr )
           call map_hsr_to_rho( hsro,rhoo_hsr )
+          if(mixocc) then
+            call map_om_to_rho(om, rho_hsr)
+            call map_om_to_rho(omold, rho_hsr)
+          endif
        endif
     endif
 
@@ -5259,6 +5429,7 @@ contains
           call map_rho_to_hsr_noncl( hsr, hsi, rho_hsr )
        else
           call map_rho_to_hsr( hsr, rho_hsr )
+          if(mixocc) call map_rho_to_om(om,rho_hsr)
        endif
     endif
 
@@ -5463,9 +5634,10 @@ contains
 
   end subroutine mix_pulay_dealloc2_intg
 
-  subroutine m_CD_mix_pulay_intg(nfout,rmx)
+  subroutine m_CD_mix_pulay_intg(nfout,rmx,mixocc)
     integer, intent(in) :: nfout
     real(DP),intent(in) :: rmx
+    logical, intent(in) :: mixocc
 
     integer, parameter  :: iRho = 1, iResid = 2
     integer   :: iter, mxiter
@@ -5483,9 +5655,10 @@ contains
 
        if ( sw_mix_charge_hardpart == ON ) then
           if ( first ) then
-             call create_map_func(.true.)
-             call alloc_rho_hsr
-             call create_map_func(.false.)
+             call set_i2lp_max2lp()
+             call create_map_func(.true.,mixocc)
+             call alloc_rho_hsr(mixocc)
+             call create_map_func(.false.,mixocc)
              first = .false.
           endif
        endif
@@ -5501,6 +5674,10 @@ contains
     else
        call map_hsr_to_rho( hsr, rho_hsr )
        call map_hsr_to_rho( hsro,rhoo_hsr )
+       if(mixocc) then
+         call map_om_to_rho(om, rho_hsr)
+         call map_om_to_rho(omold, rho_hsr)
+       endif
     endif
 
     allocate(rmxtrc(nspin_m))
@@ -5566,6 +5743,7 @@ contains
        call map_rho_to_hsr_noncl( hsr, hsi, rho_hsr )
     else
        call map_rho_to_hsr( hsr, rho_hsr )
+       if(mixocc) call map_rho_to_om(om,rho_hsr)
     endif
 
     deallocate(rmxtrc)
@@ -6282,4 +6460,27 @@ contains
   end subroutine m_CD_simple_mixing_intg
 ! ====== 2014/09/19
 
+  subroutine m_CD_hsr_diff(nfout)
+    integer, intent(in) :: nfout
+    integer :: i,j,ndiag,nnondiag
+    real(kind=DP) :: sumhsr_diag,sumhsr_nondiag
+    sumhsr_diag = 0.d0
+    sumhsr_nondiag = 0.d0
+    ndiag = 0
+    nnondiag = 0
+    do i=1,nspin_m
+       do j=1,nsize_rho_hsr
+          if(diag_elem(j))then
+             sumhsr_diag = sumhsr_diag+abs(rhoo_hsr(j,i)-rho_hsr(j,i))
+             ndiag = ndiag+1
+          else
+             sumhsr_nondiag = sumhsr_nondiag+abs(rhoo_hsr(j,i)-rho_hsr(j,i))
+             nnondiag = nnondiag+1
+          endif
+       enddo
+    enddo
+    if(printable) write(nfout,'(a,f15.10)') '!** dhsr_diag   ',sumhsr_diag/dble(ndiag)
+    if(printable) write(nfout,'(a,f15.10)') '!** dhsr_nondiag',sumhsr_nondiag/dble(nnondiag)
+  end subroutine m_CD_hsr_diff
+
 end module m_CD_mixing
diff -uprN phase0_2015.01/src_phase/m_CS_Magnetic.F90 phase0_2015.01.01/src_phase/m_CS_Magnetic.F90
--- phase0_2015.01/src_phase/m_CS_Magnetic.F90	2015-08-05 14:45:21.000000000 +0900
+++ phase0_2015.01.01/src_phase/m_CS_Magnetic.F90	2016-07-12 12:51:19.000000000 +0900
@@ -21,7 +21,7 @@ module m_CS_Magnetic
 ! ============= 2014/08/14
 
 ! == KT_add === 2014/08/26
-  use m_Control_Parameters,   only : SpinOrbit_mode, noncol
+  use m_Control_Parameters,   only : SpinOrbit_mode, noncol, iprisym
   use m_Const_Parameters,    only : Neglected
   use m_Ionic_System,  only :   mag_moment0_atoms, ionic_charge_atoms, &
        &                        mag_moment0_atoms_is_defined
@@ -46,6 +46,8 @@ module m_CS_Magnetic
   integer, allocatable :: magmom_dir_inversion_opr_flag(:)
 ! ============= 2014/08/14
 
+  complex(kind=CMPLDP), allocatable :: op_spinor(:,:,:)
+
 contains
 
 ! ------------------------------------------------------------------------
@@ -1082,4 +1084,104 @@ contains
 
   end subroutine m_CS_set_inverse_operation
 
+  subroutine m_CS_set_op_spinor
+    integer :: i, j
+    real(kind=DP) :: k1, k2, k3, ux, uy, uz, c1, c2, c3
+    real(kind=DP) :: sinth, costh, theta, s1, determinant
+!
+    real(kind=DP), parameter :: delta = 1.0D-4
+    complex(kind=CMPLDP), parameter :: zi = ( 0.0d0, 1.0d0 )
+!
+    real(kind=DP), allocatable :: op_work(:,:,:)
+!
+    allocate( op_work(3,3,nopr) );     op_work = op
+    if ( .not. allocated( op_spinor ) ) allocate( op_spinor(2,2,nopr) )
+
+    Do i=1, nopr
+       call calc_determinant( op(:,:,i), determinant )
+       if ( determinant < 0 ) then
+          op_work(1,1,i) = -op_work(1,1,i)
+          op_work(2,2,i) = -op_work(2,2,i)
+          op_work(3,3,i) = -op_work(3,3,i)
+       endif
+
+       costh = ( op_work(1,1,i) +op_work(2,2,i) +op_work(3,3,i) -1.0d0 ) /2.0d0
+       sinth = sqrt( 1.0d0 -costh**2 )
+
+       theta = acos( costh )
+!
+       if ( sinth > delta ) then
+          k1 = ( op_work(3,2,i) -op_work(2,3,i) ) /2.0d0
+          k2 = ( op_work(1,3,i) -op_work(3,1,i) ) /2.0d0
+          k3 = ( op_work(2,1,i) -op_work(1,2,i) ) /2.0d0
+
+          ux = k1 /sinth;       uy = k2 /sinth;         uz = k3 /sinth
+       else
+          if ( costh > 1.0 -delta ) then
+             ux = 0.0d0;  uy = 0.0d0;   uz = 1.0d0
+             theta = 0.0d0
+          else if ( costh < -1.0 +delta ) then
+             k1 = ( op_work(1,1,i) + 1.d0 )/2.0d0
+             k2 = ( op_work(2,2,i) + 1.d0 )/2.0d0
+             k3 = ( op_work(3,3,i) + 1.d0 )/2.0d0
+
+             ux = sqrt(k1);   uy = sqrt(k2);     uz = sqrt(k3)
+
+             if ( uz > delta ) then
+                if ( op_work(1,3,i) < 0 )  ux = -ux
+                if ( op_work(2,3,i) < 0 )  uy = -uy
+             else
+                if ( op_work(1,2,i) < 0 )  uy = -uy
+             endif
+
+          endif
+       endif
+
+#if 0
+       write(*,*) "i = ", i, "costh = ", costh, "sinth = ", sinth
+       write(*,'(4(A,F10.2))') "Axis: ux = ", ux,  ", uy = ", uy,  ", uz = ", uz, &
+            &                  ", Angle = ", theta /PAI *180.0d0
+       write(*,*)
+#endif
+
+       c1 = cos( theta /2.0d0 );   s1 = sin( theta /2.0d0 );
+
+       op_spinor(1,1,i) = c1 -zi *uz *s1
+       op_spinor(1,2,i) = ( -zi *ux -uy ) *s1
+       op_spinor(2,1,i) = ( -zi *ux +uy ) *s1
+       op_spinor(2,2,i) = c1 +zi *uz *s1
+!
+       if ( determinant < 0 ) op_spinor(:,:,i) = zi *op_spinor(:,:,i)
+    End Do
+!
+    if ( iprisym > 1 ) then
+       write(nfout,*) '*** symmetry operation for spinor ***'
+       Do i=1, nopr
+          write(nfout,*) ' #symmetry op. = ', i
+          write(nfout,'(A,F8.4,A,F8.4,2A,F8.4,A,F8.4,A)') &
+               &                '( ', real(op_spinor(1,1,i)), ', ', &
+               &                      aimag(op_spinor(1,1,i)), ' I ) ',  &
+               &                '( ',real(op_spinor(1,2,i)), ', ', &
+               &                      aimag(op_spinor(1,2,i)), ' I )'
+          write(nfout,'(A,F8.4,A,F8.4,2A,F8.4,A,F8.4,A)') &
+               &                '( ', real(op_spinor(2,1,i)), ', ', &
+               &                      aimag(op_spinor(2,1,i)), ' I ) ',  &
+               &                '( ',real(op_spinor(2,2,i)), ', ', &
+               &                      aimag(op_spinor(2,2,i)), ' I )'
+       End Do
+    endif
+    deallocate( op_work )
+
+  contains
+
+    subroutine calc_determinant( a, determinant )
+      real(kind=DP), intent(in) :: a(3,3)
+      real(kind=DP), intent(out) :: determinant
+      determinant = a(1,1)*( a(2,2)*a(3,3) -a(2,3)*a(3,2) ) &
+          &       -a(1,2)*( a(2,1)*a(3,3) -a(2,3)*a(3,1) ) &
+           &       +a(1,3)*( a(2,1)*a(3,2) -a(2,2)*a(3,1) )
+    end subroutine calc_determinant
+
+  end subroutine m_CS_set_op_spinor
+
 end module m_CS_Magnetic
diff -uprN phase0_2015.01/src_phase/m_Charge_Density.F90 phase0_2015.01.01/src_phase/m_Charge_Density.F90
--- phase0_2015.01/src_phase/m_Charge_Density.F90	2015-08-05 14:45:21.000000000 +0900
+++ phase0_2015.01.01/src_phase/m_Charge_Density.F90	2016-07-12 12:51:19.000000000 +0900
@@ -1,6 +1,6 @@
 !=======================================================================
 !
-!  PROGRAM  PHASE/0 2015.01 ($Rev: 440 $)
+!  PROGRAM  PHASE/0 2015.01 ($Rev: 478 $)
 !
 !  MODULE: m_Charge_Density
 !
@@ -86,7 +86,7 @@
 #endif
 
 module m_Charge_Density
-! $Id: m_Charge_Density.F90 440 2015-08-03 07:18:01Z ktagami $
+! $Id: m_Charge_Density.F90 478 2016-03-12 12:28:48Z ktagami $
   use m_Const_Parameters,    only : BUCS, DP, PAI2, DIRECT,OFF,zi,SKIP &
        &                          , EXECUT,SIMPLE_CUBIC,BOHR,NO,ANTIFERRO &
        &                          , ANEW,RENEW,ON, SIMPLE,BROYD1,BROYD2,DFP,PULAY &
@@ -194,6 +194,13 @@ module m_Charge_Density
   use m_Electronic_Structure,only : fsr_add_l, fsi_add_l
 ! ==================== 2014/08/25
 
+! ==== EXP_CELLOPT === 2015/09/24
+  use m_Parallelization, only : ista_fftph, iend_fftph, idisp_fftp, nel_fftp, npes_cdfft
+  use m_PlaneWaveBasisSet,  only : kgp_prev
+  use m_FFT,                only : m_FFT_CD_direct, m_FFT_CD_inverse_c
+! ==================== 2015/09/24
+
+
   implicit none
 
   real(kind=DP),public,target,allocatable,dimension(:,:,:) :: chgq_l, chgqo_l  ! d(ista_kngp:iend_kngp,kimg,nspin)
@@ -622,6 +629,7 @@ contains
             & write(nfout,96) "OLD",chg_t(1,OLD)*univol,chg_t(2,OLD)*univol,totch_old*univol
        if(ipritotalcharge>=1) &
             & write(nfout,96) "NEW",chg_t(1,NEXT)*univol,chg_t(2,NEXT)*univol,totch_new*univol
+       if ( ipritotalcharge_0 >=2 ) call m_CD_calc_abs_magetization( nfout )
     else
        if((ipritotalcharge >= 2 .and. iteration_electronic==1).or.ipritotalcharge>=3) &
             &                 write(nfout,98) "OLD",totch_old*univol
@@ -692,6 +700,7 @@ contains
             &                     ' My:', totch_new(3)*univol, &
             &                     ' Mz:', totch_new(4)*univol
     endif
+    if ( ipritotalcharge_0 >=2 ) call m_CD_calc_abs_magetization( nfout )
 
   end subroutine m_CD_check_noncl
 ! ===================================================================== 11.0
@@ -1612,7 +1621,7 @@ contains
                                  crotylm_paw(m,jj,iopr,ia)
                             hsi(ia,lmt1,lmt2,1) = &
                                  hsi(ia,lmt1,lmt2,1) + &
-                                 weight * &
+                                 weight *weight2 * &
                                  hsi_tmp(ja,lmt3,lmt4,1)* &
                                  crotylm_paw(n,ii,iopr,ia)* &
                                  crotylm_paw(m,jj,iopr,ia)
@@ -4315,6 +4324,349 @@ contains
     call phase_error_wo_filename(ierror, nfout, nfchgt, __LINE__, __FILE__)
   end subroutine m_CD_rd_chgq
 
+! ===== EXP_CELLOPT ==== 2015/09/24
+  subroutine m_CD_import_chgq_prev_cell(nfout,nfchgt, F_CHGT_partitioned)
+    integer, intent(in) :: nfout, nfchgt
+    logical, intent(in) :: F_CHGT_partitioned
+    integer  :: i,j,k,is, ip
+    real(kind=DP), allocatable, dimension(:,:,:) :: chgq_mpi
+
+    real(kind=DP) :: totch_here
+
+    integer :: id_sname = -1
+    integer :: ierror
+
+    call tstatc0_begin('m_CD_import_chgq_prev_cell ',id_sname,1)
+    chgq_l = 0.0d0
+
+    if(F_CHGT_partitioned) then
+       stop "Not supported"
+    else
+       allocate(chgq_mpi1(kgp_prev,kimg,ndim_magmom)); chgq_mpi1 = 0.d0
+       if (mype==0) then
+          rewind nfchgt
+          read(nfchgt, end = 9999, err = 9999) chgq_mpi1
+       endif
+
+       if (npes > 1) then
+          call mpi_bcast( chgq_mpi1, kgp_prev*kimg*ndim_magmom, &
+               &          mpi_double_precision, 0, mpi_comm_group, ierr )
+       endif
+
+       do k = 1, ndim_magmom
+          do j = 1, kimg
+             do i = ista_kngp, iend_kngp  !for mpi
+                if ( i > kgp_prev ) cycle
+                chgq_l(i,j,k) = chgq_mpi1(i,j,k)
+             enddo
+          enddo
+       enddo
+       deallocate(chgq_mpi1)
+    end if
+    call remove_imaginary_charge
+
+    total_charge = 0.d0
+    if(mype == 0) then
+       if ( noncol ) then
+          do is = 1, 1
+             total_charge = total_charge + chgq_l(1,1,is)*univol
+          end do
+       else
+          do is = 1, nspin, af+1
+             total_charge = total_charge + chgq_l(1,1,is)*univol
+          end do
+       endif
+    end if
+    if ( mype == 0 ) then
+       chgq_l(1,:,:) = chgq_l(1,:,:) *totch /total_charge
+    endif
+!
+    total_charge = totch
+
+    call tstatc0_end(id_sname)
+
+    return
+9999 continue
+    ierror = EOF_REACHED
+    call phase_error_wo_filename(ierror, nfout, nfchgt, __LINE__, __FILE__)
+
+  contains
+
+    subroutine remove_imaginary_charge
+      integer :: iloop, i, j, ip
+      real(kind=DP) :: rinplw
+      real(kind=DP), allocatable :: afft(:), afft_mpi1(:), afft_mpi2(:), afft_mpi3(:)
+      real(kind=DP),parameter   :: D_min  = 1.d-40
+
+      call m_FFT_alloc_CD_box()
+
+      allocate(afft(ista_fftp:iend_fftp)); afft =0.0d0
+      allocate(afft_mpi1(nfftp)); afft_mpi1 = 0.0d0
+      if(npes >= 2) then
+         allocate(afft_mpi2(mp_fftp)); afft_mpi2 = 0.0d0
+         allocate(afft_mpi3(mp_fftp)); afft_mpi3 = 0.0d0
+      end if
+
+      rinplw = 1.d0 /product(fft_box_size_CD(1:3,1))
+
+      do iloop = 1, ndim_magmom
+         afft = 0.0d0;   afft_mpi1 = 0.d0
+
+         do j = 1, kimg
+            do i = ista_kngp, iend_kngp  !for mpi
+               ip = (igfp_l(i)-1)*kimg + j
+               !               afft_mpi1(ip) = afft_mpi1(ip) + chgq_l(i,j,iloop) !mpi
+               afft_mpi1(ip) = chgq_l(i,j,iloop) !mpi
+            end do
+         end do
+
+         if (npes >= 2) then
+            call mpi_barrier(mpi_comm_group,ierr)
+            do j = 0, npes-1
+
+               do i = nis_fftp(j),nie_fftp(j)
+                  afft_mpi2(i-nis_fftp(j)+1) = afft_mpi1(i)
+               end do
+               call mpi_allreduce(afft_mpi2,afft_mpi3,mp_fftp &
+                    &  ,mpi_double_precision,mpi_sum,mpi_comm_group,ierr)
+
+               if(j == mype) then
+                  do i = ista_fftp, iend_fftp
+                     afft(i) = afft_mpi3(i - ista_fftp + 1)
+                  end do
+               end if
+            end do
+         else
+            afft = afft_mpi1
+         end if
+
+         call m_FFT_CD_inverse_c(nfout,afft)        ! G-->R space
+         Do i=ista_fftp, iend_fftp, 2
+            afft(i+1) = 0.0d0
+         End Do
+!
+         if ( .not. noncol ) then
+            Do i=ista_fftp, iend_fftp, 2
+               afft(i) = max( afft(i), D_min )
+            End Do
+         else if ( iloop == 1 ) then
+            Do i=ista_fftp, iend_fftp, 2
+               afft(i) = max( afft(i), D_min )
+            End Do
+         endif
+         call m_FFT_CD_direct( nfout, afft )      ! R-- >G space
+         !
+         if(npes >= 2) then
+            call mpi_allgatherv( afft, nel_fftp(mype), mpi_double_precision, &
+                 &               afft_mpi1, nel_fftp, idisp_fftp, &
+                 &               mpi_double_precision, mpi_comm_group, ierr )
+         else
+            afft_mpi1 = afft
+         end if
+
+         do j = 1, kimg
+            do i = ista_kngp, iend_kngp  !for mpi
+               ip = (igfp_l(i)-1)*kimg + j
+               chgq_l(i,j,iloop) = afft_mpi1(ip)
+            end do
+         end do
+      End do
+      chgq_l = chgq_l *rinplw
+!
+      deallocate( afft );    deallocate( afft_mpi1 )
+      if ( npes >=2 ) then
+         deallocate( afft_mpi2 ); deallocate( afft_mpi3 )
+      endif
+      call m_FFT_dealloc_CD_box()
+
+    end subroutine remove_imaginary_charge
+
+  end subroutine m_CD_import_chgq_prev_cell
+
+  subroutine m_CD_calc_abs_magetization( nfout )
+    use m_FFT, only : m_FFT_coef_CD_integration_kt
+
+    integer, intent(in) :: nfout
+    real(kind=DP), allocatable :: f2or1(:)
+
+    call m_FFT_alloc_CD_box()
+
+    allocate( f2or1(ista_fftph:iend_fftph) ); f2or1 = 0.0d0
+    call m_FFT_coef_CD_integration_kt( ista_fftph, iend_fftph, f2or1 )
+!
+    if ( noncol ) then
+       call case_noncollinear
+    else
+       call case_collinear
+    endif
+
+    call m_FFT_dealloc_CD_box()
+    deallocate( f2or1 )
+
+  contains
+
+    subroutine case_noncollinear
+      integer :: iloop, i, j, ip
+      real(kind=DP) :: rinplw, csum, csum_mpi
+      real(kind=DP), allocatable :: afft(:), afft_mpi1(:), afft_mpi2(:), afft_mpi3(:)
+
+      allocate(afft(ista_fftp:iend_fftp)); afft =0.0d0
+      allocate(afft_mpi1(nfftp)); afft_mpi1 = 0.0d0
+      if(npes >= 2) then
+         allocate(afft_mpi2(mp_fftp)); afft_mpi2 = 0.0d0
+         allocate(afft_mpi3(mp_fftp)); afft_mpi3 = 0.0d0
+      end if
+
+      rinplw = 1.d0 /product(fft_box_size_CD(1:3,1))
+
+      do iloop = 2, ndim_magmom
+         afft = 0.0d0;   afft_mpi1 = 0.d0
+
+         do j = 1, kimg
+            do i = ista_kngp, iend_kngp  !for mpi
+               ip = (igfp_l(i)-1)*kimg + j
+               afft_mpi1(ip) = abs( chgq_l(i,j,iloop)  )
+            end do
+         end do
+
+         if (npes >= 2) then
+            call mpi_barrier(mpi_comm_group,ierr)
+            do j = 0, npes-1
+               do i = nis_fftp(j),nie_fftp(j)
+                  afft_mpi2(i-nis_fftp(j)+1) = afft_mpi1(i)
+               end do
+               call mpi_allreduce(afft_mpi2,afft_mpi3,mp_fftp &
+                    &  ,mpi_double_precision,mpi_sum,mpi_comm_group,ierr)
+               if(j == mype) then
+                  do i = ista_fftp, iend_fftp
+                     afft(i) = afft_mpi3(i - ista_fftp + 1)
+                  end do
+               end if
+            end do
+         else
+            afft = afft_mpi1
+         end if
+
+         call m_FFT_CD_inverse_c(nfout,afft)        ! G-->R space
+!
+         csum = 0.0d0
+         Do i=ista_fftph, iend_fftph
+            csum = csum + f2or1(i) *abs( afft(2*i-1) )
+         End Do
+         if ( npes > 1 ) then
+            call mpi_allreduce( csum, csum_mpi, 1, mpi_double_precision, mpi_sum, &
+                 &              mpi_comm_group,ierr )
+            csum = csum_mpi /rinplw *univol
+         else
+            csum = csum /rinplw *univol
+         endif
+!
+         if ( iloop == 2 ) then
+            write(nfout,'(A,F14.8)') ' !      absolute magnetization (x) = ', csum
+         else if ( iloop == 3 ) then
+            write(nfout,'(A,F14.8)') ' !      absolute magnetization (y) = ', csum
+         else if ( iloop == 4 ) then
+            write(nfout,'(A,F14.8)') ' !      absolute magnetization (z) = ', csum
+         endif
+      End do
+
+      write(nfout,*)
+
+      deallocate( afft );    deallocate( afft_mpi1 )
+      if ( npes >=2 ) then
+         deallocate( afft_mpi2 ); deallocate( afft_mpi3 )
+      endif
+
+    end subroutine case_noncollinear
+
+    subroutine case_collinear
+      integer :: iloop, i, j, ip
+      real(kind=DP) :: rinplw, csum, csum_mpi
+      real(kind=DP), allocatable :: afft(:), afft_mpi1(:), afft_mpi2(:), afft_mpi3(:)
+      real(kind=DP), allocatable :: bfft(:)
+
+      allocate(afft(ista_fftp:iend_fftp)); afft =0.0d0
+      allocate(bfft(ista_fftp:iend_fftp)); bfft =0.0d0
+
+      allocate(afft_mpi1(nfftp)); afft_mpi1 = 0.0d0
+      if(npes >= 2) then
+         allocate(afft_mpi2(mp_fftp)); afft_mpi2 = 0.0d0
+         allocate(afft_mpi3(mp_fftp)); afft_mpi3 = 0.0d0
+      end if
+
+      rinplw = 1.d0 /product(fft_box_size_CD(1:3,1))
+
+      Do iloop=1, nspin
+         afft = 0.0d0;   afft_mpi1 = 0.d0
+         do j = 1, kimg
+            do i = ista_kngp, iend_kngp  !for mpi
+               ip = (igfp_l(i)-1)*kimg + j
+               afft_mpi1(ip) = chgq_l(i,j,iloop)
+            end do
+         end do
+
+         if (npes >= 2) then
+            call mpi_barrier(mpi_comm_group,ierr)
+            do j = 0, npes-1
+               do i = nis_fftp(j),nie_fftp(j)
+                  afft_mpi2(i-nis_fftp(j)+1) = afft_mpi1(i)
+               end do
+               call mpi_allreduce(afft_mpi2,afft_mpi3,mp_fftp &
+                    &  ,mpi_double_precision,mpi_sum,mpi_comm_group,ierr)
+               if(j == mype) then
+                  do i = ista_fftp, iend_fftp
+                     afft(i) = afft_mpi3(i - ista_fftp + 1)
+                  end do
+               end if
+            end do
+         else
+            afft = afft_mpi1
+         end if
+
+         call m_FFT_CD_inverse_c(nfout,afft)        ! G-->R space
+         if ( iloop == 1 ) then
+            bfft = bfft +afft
+         else
+            bfft = bfft -afft
+!            bfft = bfft +afft
+         endif
+      End Do
+
+      csum = 0.0d0
+      if ( kimg == 1 ) then
+         Do i=ista_fftph, iend_fftph
+            csum = csum + f2or1(i) *abs( bfft(2*i-1) )
+         End Do
+      else
+         Do i=ista_fftph, iend_fftph
+            csum = csum + f2or1(i) *abs( bfft(2*i-1) )
+         End Do
+!         Do i=ista_fftp, iend_fftp, kimg
+!            csum = csum +abs( bfft(i) )
+!         End Do
+      endif
+
+      if ( npes > 1 ) then
+         call mpi_allreduce( csum, csum_mpi, 1, mpi_double_precision, mpi_sum, &
+              &              mpi_comm_group,ierr )
+         csum = csum_mpi *rinplw *univol
+      else
+         csum = csum /rinplw *univol
+      endif
+!
+      write(nfout,'(A,F14.8)') ' !      absolute magnetization (z) = ', csum
+      write(nfout,*)
+
+      deallocate( afft );    deallocate( afft_mpi1 );   deallocate( bfft )
+      if ( npes >=2 ) then
+         deallocate( afft_mpi2 ); deallocate( afft_mpi3 )
+      endif
+
+    end subroutine case_collinear
+
+  end subroutine m_CD_calc_abs_magetization
+! ====================== 2015/09/24
+
 ! ============================ added by K. Tagami ======================= 11.0
   subroutine m_CD_rd_chgq_import_frm_collin(nfout,nfchgt, F_CHGT_partitioned)
     integer, intent(in) :: nfout, nfchgt
@@ -4737,9 +5089,13 @@ contains
 !                  &            + op_in_gsp(ii,3,no) *mag_work(ngp,1:kimg,3)
 !          End Do
           Do ii=1, 3
-             mag_tmp(1:kimg,ii) = op(ii,1,invop(no)) *mag_work(ngp,1:kimg,1) &
-                  &            + op(ii,2,invop(no)) *mag_work(ngp,1:kimg,2) &
-                  &            + op(ii,3,invop(no)) *mag_work(ngp,1:kimg,3)
+!               ! Following three lines are revised according to a report from ASMS Co.ltd, 10 March 2016.
+             mag_tmp(1:kimg,ii) = op(ii,1,int(invop(no))) *mag_work(ngp,1:kimg,1) &
+                  &            + op(ii,2,int(invop(no))) *mag_work(ngp,1:kimg,2) &
+                  &            + op(ii,3,int(invop(no))) *mag_work(ngp,1:kimg,3) 
+!!$             mag_tmp(1:kimg,ii) = op(ii,1,invop(no)) *mag_work(ngp,1:kimg,1) &
+!!$                  &            + op(ii,2,invop(no)) *mag_work(ngp,1:kimg,2) &
+!!$                  &            + op(ii,3,invop(no)) *mag_work(ngp,1:kimg,3)
           End Do
 
 ! == KT_add === 2014/12/29
@@ -7656,6 +8012,27 @@ contains
   end subroutine m_CD_hardpart_hsr_add_noncl
 ! ======================== 2014/08/25
 
+  subroutine m_CD_keep_retrieve_hsr(keep)
+    logical, intent(in) :: keep
+    real(kind=DP),allocatable,dimension(:,:,:,:),save   :: hsr_tmp
+    real(kind=DP),allocatable,dimension(:,:,:,:),save   :: hsi_tmp
+    if(keep)then
+      if ( noncol ) then
+        if (.not.allocated(hsr_tmp)) allocate(hsr_tmp(natm,nlmt,nlmt,ndim_magmom)); hsr_tmp = hsr
+        if (.not.allocated(hsi_tmp)) allocate(hsi_tmp(natm,nlmt,nlmt,ndim_magmom)); hsi_tmp = hsi_tmp
+      else
+        if(.not.allocated(hsr_tmp)) allocate(hsr_tmp(natm,nlmt,nlmt,nspin)); hsr_tmp = hsr
+      endif
+    else
+      hsr = hsr_tmp
+      deallocate(hsr_tmp)
+      if ( noncol ) then
+        hsi = hsi_tmp
+        deallocate(hsi_tmp)
+      endif
+    endif
+  end subroutine m_CD_keep_retrieve_hsr
+
   subroutine m_CD_keep_chgq_l()
 ! =============================== modified y K. Tagami =============== 11.0
 !!    allocate(chgq_tmp(ista_kngp:iend_kngp,kimg,nspin)); chgq_tmp = 0.d0
diff -uprN phase0_2015.01/src_phase/m_Const_Parameters.F90 phase0_2015.01.01/src_phase/m_Const_Parameters.F90
--- phase0_2015.01/src_phase/m_Const_Parameters.F90	2015-08-05 14:45:21.000000000 +0900
+++ phase0_2015.01.01/src_phase/m_Const_Parameters.F90	2016-07-12 12:51:19.000000000 +0900
@@ -1,6 +1,6 @@
 !=======================================================================
 !
-!  PROGRAM  PHASE/0 2015.01 ($Rev: 440 $)
+!  PROGRAM  PHASE/0 2015.01 ($Rev: 472 $)
 !
 !  MODULE: m_Const_Parameters
 !
@@ -32,7 +32,7 @@
 !
 !***************************************************************
 module m_Const_Parameters
-! $Id: m_Const_Parameters.F90 440 2015-08-03 07:18:01Z ktagami $
+! $Id: m_Const_Parameters.F90 472 2015-11-28 09:01:17Z ktagami $
   implicit none
 
   integer, parameter :: DRIVER_GENERAL=0, DRIVER_CONSTRAINT=1, DRIVER_NEB=2, DRIVER_PHONON=3 &
@@ -363,6 +363,11 @@ end type unitlist
 
 integer, parameter :: BULK   = 1
 integer, parameter :: DEFECT = 2
+integer, parameter :: Positron_CONV = 1  ! zero-density limit, non-scf
+                                         !    calc p- wfns once
+integer, parameter :: Positron_GGGC = 2  ! zero-density limit, scf
+                                         !    update e- and p- wfns successively
+integer, parameter :: Positron_PSN  = 3  ! fully two-component scf
 
 ! ========================== KT_mod =================== 13.0B 
 !integer, parameter :: unit_list_size = 50
diff -uprN phase0_2015.01/src_phase/m_Control_Parameters.F90 phase0_2015.01.01/src_phase/m_Control_Parameters.F90
--- phase0_2015.01/src_phase/m_Control_Parameters.F90	2015-09-14 15:17:49.000000000 +0900
+++ phase0_2015.01.01/src_phase/m_Control_Parameters.F90	2016-07-12 12:51:19.000000000 +0900
@@ -1,6 +1,6 @@
 !=======================================================================
 !
-!  PROGRAM  PHASE/0 2015.01 ($Rev: 453 $)
+!  PROGRAM  PHASE/0 2015.01 ($Rev: 492 $)
 !
 !  MODULE: m_Control_Parameters
 !
@@ -49,7 +49,7 @@
 !
 module m_Control_Parameters
 !     (m_CtrlP)
-! $Id: m_Control_Parameters.F90 453 2015-09-01 05:22:55Z ktagami $
+! $Id: m_Control_Parameters.F90 492 2016-05-31 03:06:04Z jkoga $
 !
 ! This module "m_Control_Parameters" holds parameters that give
 ! methods and calculational conditions in jobs.
@@ -109,13 +109,10 @@ module m_Control_Parameters
 ! ============================================================== 11.0
   use m_ErrorMessages,        only : INVALID_CHARGE_MIXING
 
-! ====== KT_add ========================================= 13.0E
-  use m_Const_Parameters,     only : FERMI_DIRAC, CONST_kB
-! ========= ============================================= 13.0E
-
-! ====== KT_add ========================================= 13.0U3
-  use m_Const_Parameters,     only : STEPWISE
-! ========= ============================================= 13.0U3
+! ====== KT_add ========================================= 13.0E, 13.0U3, positron
+  use m_Const_Parameters,     only : FERMI_DIRAC, CONST_kB, STEPWISE, &
+       &                             Positron_CONV, Positron_GGGC, Positron_PSN
+! ======================================================= 13.0E, 13.0U3, positron
 
   implicit none
   include 'mpif.h'
@@ -230,6 +227,15 @@ module m_Control_Parameters
   real(kind=DP) ::      epsilon_ele
   integer ::            sw_positron_file = ON
   integer ::            positron_filetype = CUBE
+
+  integer ::  positron_method = Positron_CONV
+
+  character(len("positron_method")),private,parameter :: &
+       &             tag_positron_method    = "positron_method"
+  character(len("conv")),private,parameter ::         tag_conv    = "conv"
+  character(len("gggc")),private,parameter ::         tag_gggc    = "gggc"
+  character(len("psn")),private,parameter ::          tag_psn     = "psn"
+
   character(len=LEN_TITLE) ::    positron_title(5)
   data positron_title / &
        &  "positron density", "valence electron density", "e-p pair density" &
@@ -570,6 +576,7 @@ module m_Control_Parameters
 #endif
 
 ! === KT_add === 13.1R
+  integer            :: sw_raman = OFF
   integer            :: sw_phonon_with_epsilon = OFF
   integer            :: sw_calc_dielectric_tensor = OFF
 ! ============== 13.1R
@@ -657,6 +664,19 @@ module m_Control_Parameters
   integer :: sw_neglect_stress_offdiagonal = OFF
 ! ==== 2014/11/22
 
+! ==== EXP_CELLOPT === 2015/09/24
+! -----------------------
+! read nfchgt.data of previous cell
+! -----------------------
+  character(len("sw_read_nfchgt_prev_cell")),private,parameter :: &
+       &                 tag_sw_read_nfchgt_prev_cell = "sw_read_nfchgt_prev_cell"
+  character(len("sw_read_nfzaj_prev_cell")),private,parameter :: &
+       &                 tag_sw_read_nfzaj_prev_cell = "sw_read_nfzaj_prev_cell"
+
+  integer :: sw_read_nfchgt_prev_cell = OFF
+  integer :: sw_read_nfzaj_prev_cell = OFF
+! ==================== 2015/09/24
+
 ! -------------------
 ! symmetry during optimization
 ! ------------------
@@ -856,6 +876,7 @@ module m_Control_Parameters
   character(len("none")),private,parameter ::             tag_none = "none"
   character(len("0")),private,parameter ::              tag_0 = "0"
   character(len("unit_matrix")),private,parameter :: tag_unit_matrix = "unit_matrix"
+  character(len("spin_polarized")),private,parameter :: tag_spin_polarized = "spin_polarized"
 ! === For restart lm+MSD! by tkato 2012/02/16 ==================================
   character(len("dtim_previous")), private, parameter :: tag_dtim_previous = "dtim_previous"
 ! ==============================================================================
@@ -1098,6 +1119,7 @@ module m_Control_Parameters
 !!$  real(kind=DP),private ::      edelta_change_to_rmm   = 1.d-7
   real(kind=DP),private ::      edelta_change_to_rmm   = 1.d-3
   real(kind=DP),private ::      edelta_change_to_rmm_md  = 1.d-3
+  logical,private :: edelta_rmm_given = .false.
   integer,public ::            rmm_save_memory_mode = OFF
   character(len("rmm")),private,parameter ::                 tag_rmm               = "rmm"
   character(len("imGSrmm")),private,parameter ::             tag_imGSrmm           = "imgsrmm"
@@ -1147,11 +1169,17 @@ module m_Control_Parameters
   real(kind=DP) ::      damp = 1.d0
   integer ::            submat_period = 1
   real(kind=DP) ::      submat_critical_ratio = 1.d-15
+#ifdef _USE_SCALAPACK_
+  integer :: sw_scalapack = ON
+#else
   integer :: sw_scalapack = OFF
+#endif
 #ifdef _DEFAULT_HOUSEHOLDER_
   integer :: method_scalapack = HOUSEHOLDER
-#else
+#elif _DEFAULT_DIVIDEandCONQUER_
   integer :: method_scalapack = DIVIDEandCONQUER
+#else
+  integer :: method_scalapack = HOUSEHOLDER
 #endif
 !finteger :: block_size = 64
   integer :: block_size = 0
@@ -1442,12 +1470,16 @@ module m_Control_Parameters
                          & tag_sw_mix_charge_hardpart = "sw_mix_charge_hardpart"
   character(len("sw_mix_bothspins_sametime")), private, parameter :: &
                          & tag_sw_mix_bothspins_sametime = "sw_mix_bothspins_sametime"
+  character(len("sw_mix_occ_matrix")),private,parameter :: &
+                         & tag_sw_mix_occ_matrix = "sw_mix_occ_matrix"
 !!$  integer :: sw_mix_charge_hardpart = OFF
 !!$  integer :: sw_mix_bothspins_sametime = OFF
   integer :: sw_mix_charge_hardpart = OFF
   integer :: sw_mix_bothspins_sametime = ON
 !
 !
+  integer :: sw_mix_occ_matrix = OFF
+
   character(len("sw_force_simple_mixing_hsr")), private, parameter :: &
                  & tag_sw_force_simplemix_hsr = "sw_force_simple_mixing_hsr"
   character(len("sw_recomposing_hsr")), private, parameter :: &
@@ -1642,6 +1674,7 @@ module m_Control_Parameters
   integer,public ::        dos_method = Gauss_distrib_func
   integer,public ::        sw_dos_gaussdistrib = OFF
   integer,public ::        dos_subroutine = 5
+
   real(kind=DP),public ::  deltaE_dos = 1.d-4
   real(kind=DP),public ::  variance_dos_GaussD = 1.d-6
   integer,public ::        nwd_dos_window_width = 10
@@ -1760,7 +1793,17 @@ module m_Control_Parameters
   character(len("ry")),private,parameter :: tag_ry = "ry"
   character(len("rz")),private,parameter :: tag_rz = "rz"
 
-! ---- WaveFuction Squared
+! ---- WaveFunction Orb-projection
+  integer :: sw_print_wf_orb_projection = OFF
+  integer :: wf_orb_proj_print_format = 0
+  character(len("wf_orb_projection")),private,parameter ::  &
+       &            tag_wf_orb_projection       = "wf_orb_projection"
+  character(len("sw_print_wf_orb_projection")),private,parameter :: &
+       &       tag_sw_print_wf_orb_projection   = "sw_print_wf_orb_projection"
+  character(len("wf_orb_proj_print_format")),private,parameter :: &
+       &       tag_wf_orb_proj_print_format = "wf_orb_proj_print_format"
+
+! ---- WaveFunction Squared
   integer :: sw_wf_squared_rspace = OFF
   integer,public ::  wf_squared_filetype = CUBE
   integer,public ::  ik_wf_squared = 1
@@ -1805,7 +1848,7 @@ module m_Control_Parameters
   ! --- approximate DFT+U : Hubbard model ---
   integer :: sw_hubbard = OFF
   integer :: sw_constraint = OFF
-  integer :: initial_occmat = SPIN_POLARIZED
+  integer :: initial_occmat = OFF
   real(kind=DP) :: initial_occmat_factor=1.d0
   integer :: const_site = 0
   real(kind=DP) :: const_alpha = 0.d0
@@ -2071,6 +2114,7 @@ module m_Control_Parameters
   character(len("always")), private, parameter :: tag_always = "always"
   character(len("minimal")), private, parameter :: tag_minimal = "minimal"
 
+
 ! ======= KT_add === 13.0Y
 ! Partial Core correction ( paw, hybrid,... )
 !
@@ -2322,6 +2366,13 @@ module m_Control_Parameters
   integer :: lmax_rsb = 3
   real(kind=DP) :: eps_rsb = 1.d-2
 
+! --- msb effect
+  character(len("msb")), private, parameter :: &
+       &                     tag_msb = "msb"
+  character(len("sw_calc_contact_density")), private, parameter :: &
+       &                     tag_sw_calc_contact_density = "sw_calc_contact_density"
+  integer :: sw_calc_contact_density = off
+
 ! ================= KT_add === 13.0S
 !-- CoreLevels
 !
@@ -2437,6 +2488,7 @@ module m_Control_Parameters
   integer :: nr12=3000,nk=1500
   real(kind=DP) :: maxk=10.d0,r12max=30.0d0
   logical :: oneshot = .true.
+  logical :: sw_save_memory_vdw = .true.
 
   logical :: force_exx_energy1=.false.
 
@@ -2543,6 +2595,10 @@ contains
        if(printable) &
             & write(nfout,'(" !** rmm_precal_phase_matm(redefined) = ",i10," <<m_CtrlP_check_matm>>")') rmm_precal_phase_matm
     end if
+    if(.not.edelta_rmm_given)then
+       edelta_change_to_rmm = 1.d-3/dble(natm)
+       edelta_change_to_rmm_md = 1.d-3/dble(natm)
+    endif
   end subroutine m_CtrlP_check_matm
 
   subroutine alloc_w_solver(n)
@@ -3095,12 +3151,12 @@ contains
       logical :: tf
       call strncmp0(trim(rstr), tag_bulk, tf)
       if(tf) then
-         sw_positron = BULK
+         sw_positron = BULK;   positron_method = Positron_CONV
          goto 1001
       end if
       call strncmp0(trim(rstr), tag_defect, tf)
       if(tf) then
-         sw_positron = DEFECT
+         sw_positron = DEFECT;   positron_method = Positron_GGGC
          goto 1001
       end if
 1001  continue
@@ -3164,8 +3220,10 @@ contains
           if( f_getStringValue( tag_smearing_method, rstr,LOWER) == 0) then
              call set_smearing_method(rstr) ! way_of_smearing
           end if
-          if( f_getRealValue( tag_smearing_width, dret, "hartree") == 0) width = dret
-          if(way_of_smearing == TETRAHEDRON) width_tetra = width
+          if( f_getRealValue( tag_smearing_width, dret, "hartree") == 0) then
+             width = dret
+             if(way_of_smearing == TETRAHEDRON) width_tetra = width
+          endif
           if( f_selectBlock( tag_tetrahedron) == 0) then
              if( f_getIntValue( tag_dimension, iret ) == 0) idimtetra = iret
              if( f_getIntValue( tag_sw_correction, iret ) == 0) sw_correction = iret
@@ -3291,6 +3349,28 @@ contains
                 if( f_getIntValue( tag_f3, iret ) == 0) reduction_factor_exx(3) = iret
                 iret = f_selectParentBlock()
              end if
+             if(f_getStringValue(tag_functional_type,rstr,LOWER)==0)then
+                if(rstr.eq.tag_pbe0) then
+                   write(nfout,'(a)') ' !** functional_type : PBE0'
+                   alpha_exx = 0.25d0
+                   sw_screened_exchange = OFF
+                   sw_exchange_only = OFF
+                else if (rstr.eq.tag_hse06) then
+                   write(nfout,'(a)') ' !** functional_type : HSE06'
+                   alpha_exx = 0.25d0 
+                   omega_exx = 0.106d0
+                   sw_screened_exchange = ON
+                   sw_exchange_only = OFF
+                else if (rstr.eq.tag_hf) then
+                   write(nfout,'(a)') ' !** functional_type : HF (Hartree-Fock)'
+                   alpha_exx = 1.d0
+                   sw_screened_exchange = OFF
+                   sw_exchange_only = ON
+                else
+                   write(nfout,'(a)') ' !** WARNING : invalid functional_type : '//trim(rstr)
+                endif
+             endif
+
              if( f_getRealValue( tag_alpha, dret, "") == 0) alpha_exx = dret
              if( f_getRealValue( tag_omega, dret, "") == 0) omega_exx = dret
              if( f_getRealValue( tag_omega_hf, dret, "") == 0)  omega_exx = dret
@@ -3314,6 +3394,7 @@ contains
                 endif
              endif
 
+
 ! ============================= KT_Test ============================ 12.5Exp
              nmax_G_hyb = -1
              if( f_getIntValue( tag_nmax_G_hyb, iret ) == 0 ) then
@@ -3408,27 +3489,6 @@ contains
                 if(rstr.eq.tag_minimal)  potential_update = 2
              endif
 
-             if(f_getStringValue(tag_functional_type,rstr,LOWER)==0)then
-                if(rstr.eq.tag_pbe0) then
-                   write(nfout,'(a)') ' !** functional_type : PBE0'
-                   alpha_exx = 0.25d0
-                   sw_screened_exchange = OFF
-                   sw_exchange_only = OFF
-                else if (rstr.eq.tag_hse06) then
-                   write(nfout,'(a)') ' !** functional_type : HSE06'
-                   alpha_exx = 0.25d0 
-                   omega_exx = 0.106d0
-                   sw_screened_exchange = ON
-                   sw_exchange_only = OFF
-                else if (rstr.eq.tag_hf) then
-                   write(nfout,'(a)') ' !** functional_type : HF (Hartree-Fock)'
-                   alpha_exx = 1.d0
-                   sw_screened_exchange = OFF
-                   sw_exchange_only = ON
-                else
-                   write(nfout,'(a)') ' !** WARNING : invalid functional_type : '//trim(rstr)
-                endif
-             endif
              if(ipriinputfile>=1) then
                 write(nfout,'(" <<< Hybrid functional method >>>")')
                 if(sw_exchange_only==ON) &
@@ -3809,6 +3869,20 @@ contains
        end if
 
   ! ------- Positron start
+       iret = f_getStringValue(tag_positron_method,rstr,LOWER)
+       if( rstr == tag_CONV ) then
+          positron_method = Positron_CONV;          sw_positron = BULK
+       else if( rstr == tag_GGGC ) then
+          positron_method = Positron_GGGC;          sw_positron = DEFECT
+       else if( rstr == tag_PSN ) then
+          positron_method = Positron_PSN;           sw_positron = DEFECT
+          stop "Positron-PSN : Not implemented"
+       end if
+       if ( sw_positron /= OFF ) then
+          write(nfout,*) "!** Positron_method is set to ", positron_method
+          if ( gmax_positron < 0.01 ) call getgmax_positron()
+       endif
+
        if(sw_positron /= OFF) then
           npeg = 1
           num_extra_pev = 0
@@ -4431,6 +4505,7 @@ write(nfout,'(" !** sw_screening_correct
          if(f_getRealValue(tag_a1,dret,'')==0)     a1     = dret
          if(f_getRealValue(tag_a2,dret,'')==0)     a2     = dret
          if(f_getIntValue(tag_eval_kernel_by_interpolation,iret)==0) eval_kernel_by_interpolation = iret == ON
+         if(f_getIntValue(tag_save_memory_mode,iret)==0) sw_save_memory_vdw = iret == ON
          iret = f_selectParentBlock()
        endif
 
@@ -4466,7 +4541,6 @@ write(nfout,'(" !** sw_screening_correct
     subroutine set_initial_occmat(rstr)
       character(len=FMAXVALLEN),intent(in) :: rstr
       logical :: tf
-      initial_occmat = SPIN_POLARIZED
       call strncmp2(rstr, FMAXVALLEN, tag_off, len(tag_off), tf)
       if(.not.tf) call strncmp0(trim(rstr),tag_off, tf)
       if(.not.tf) call strncmp0(trim(rstr),tag_none,tf)
@@ -4482,6 +4556,12 @@ write(nfout,'(" !** sw_screening_correct
          goto 1001
       endif
 
+      call strncmp0(trim(rstr), tag_spin_polarized, tf)
+      if(tf) then
+         initial_occmat = SPIN_POLARIZED
+         goto 1001
+      endif
+
       call strncmp0(trim(rstr), tag_initial_es, tf)
       if(tf) then
          sw_initial_es = ON
@@ -4685,8 +4765,8 @@ write(nfout,'(" !** sw_screening_correct
   end subroutine m_CtrlP_rd_accuracy
 
 #ifndef _EMPIRICAL_
-  subroutine m_CtrlP_rd_wfsolver(nfout)
-    integer, intent(in) :: nfout
+  subroutine m_CtrlP_rd_wfsolver(nfout,natm)
+    integer, intent(in) :: nfout,natm
     integer :: f_selectBlock, f_getIntValue, f_getRealValue, f_getStringValue
     integer :: f_selectParentBlock, f_selectTop
     integer :: iret,i,ba
@@ -4712,9 +4792,9 @@ write(nfout,'(" !** sw_screening_correct
     ! determine the default value for the davidson-related variables
     if(neg/nrank_e<4) then
        sw_divide_subspace=OFF
+       if(printable) write(nfout,'(" !** REMARK: sw_divide_subspace is set to OFF ")')
        sw_divide_subspace_changed = .true.
        sw_npartition_changed = .true.
-       if(printable) write(nfout,'(" !** REMARK: sw_divide_subspace is set to OFF ")')
     else
        npartition_david = neg/(nblock*nrank_e)
        if (npartition_david<1) npartition_david = 1
@@ -4724,7 +4804,7 @@ write(nfout,'(" !** sw_screening_correct
 
     if(.not.explict_solver)then
        tag_solver_of_WF_is_found = .true.
-       call configure_wf_solver(solver_set)
+       call configure_wf_solver(solver_set,natm)
        meg = neg
     endif
 
@@ -4881,6 +4961,11 @@ write(nfout,'(" !** sw_screening_correct
        end if
 
        ! ---- rmm ---
+       if( explict_solver )then
+          edelta_change_to_rmm = 1.e-3/dble(natm)
+          edelta_change_to_rmm_md = 1.e-3/dble(natm)
+       endif
+
        if( f_selectBlock( tag_rmm) == 0) then
           if(ipriinputfile >= 2 .and. printable) write(nfout,'(" !** -- tag_rmm is found --")')
           if( f_getIntValue(tag_imGSrmm, iret) == 0) imGSrmm = iret
@@ -4891,9 +4976,11 @@ write(nfout,'(" !** sw_screening_correct
           if( f_getRealValue(tag_edelta_change_to_rmm,dret,'hartree')==0) then
              edelta_change_to_rmm = dret
              edelta_change_to_rmm_md = dret
+             edelta_rmm_given = .true.
           endif
           if(f_getRealValue(tag_edelta_change_to_rmm_md,dret,'hartree')==0)then
              edelta_change_to_rmm_md = dret
+             edelta_rmm_given = .true.
           endif
           if( f_getIntValue(tag_save_memory_mode, iret) == 0) rmm_save_memory_mode = iret
           iret = f_selectParentBlock()
@@ -4963,6 +5050,11 @@ write(nfout,'(" !** sw_screening_correct
                 write(nfout,'(" !** before_renewal= OFF")')
              end if
           end if
+
+!!$!BRANCH_P 3D_Parallel
+!!$          if(nrank_k>=2) sw_scalapack = OFF
+!!$             ! This is a tentative default setting until scalapack parallelization is completed for nrank_k>=2
+!!$!BRANCH_P_END 3D_Parallel
           if( f_selectBlock( tag_scalapack) == 0) then
              if( f_getIntValue(tag_sw_scalapack, iret) == 0) sw_scalapack = iret
 #ifndef _USE_SCALAPACK_
@@ -5086,8 +5178,11 @@ write(nfout,'(" !** sw_screening_correct
           iret = f_selectParentBlock()
        else
           max_subspace_size = 4*neg ! default value
-          if(ipriinputfile >= 2 .and. printable) &
-               & write(nfout,'(" !* tag_davidson is not found")')
+!!$          if(ipriinputfile >= 2 .and. printable) &
+          if(ipriinputfile >= 1 .and. printable) then
+             write(nfout,'(" !* tag_davidson is not found")')
+             write(nfout,'(" !** max_subspace_size = ",i6)') max_subspace_size
+          end if
        end if
        
        ! ---- Modified Davidson ---      
@@ -5205,19 +5300,19 @@ write(nfout,'(" !** sw_screening_correct
 1001  continue
     end subroutine set_energy_evaluation
 
-    subroutine configure_wf_solver(solver_set)
-       integer, intent(in) :: solver_set
+    subroutine configure_wf_solver(solver_set,natm)
+       integer, intent(in) :: solver_set,natm
        integer :: i
        if(solver_set == LMM_RMM)then
           call alloc_w_solver(2)
           w_solver(1)%solver = lmMSD
           w_solver(1)%subspace_rotation = ON
-          w_solver(1)%till_n_iter = 2
+          w_solver(1)%till_n_iter = 5
           w_solver(2)%solver = RMM3
           w_solver(2)%till_n_iter = -1
           w_solver(2)%subspace_rotation = ON
-          edelta_change_to_rmm = 1.d-4
-          edelta_change_to_rmm_md = 1.d-4
+          edelta_change_to_rmm = 1.d-4/dble(natm)
+          edelta_change_to_rmm_md = 1.d-4/dble(natm)
           n_WF_solvers_before = 2
           n_WF_solvers_after = 0
           n_WF_solvers_all = n_WF_solvers_before + n_WF_solvers_after
@@ -5226,18 +5321,25 @@ write(nfout,'(" !** sw_screening_correct
           call alloc_w_solver(4)
           if(icond==INITIAL .or. icond==CONTINUATION .or. icond==AUTOMATIC)then
              w_solver(1)%solver = MDDAVIDSON
+             w_solver(3)%solver = MDDAVIDSON
           else
              w_solver(1)%solver = MDKOSUGI
+             w_solver(3)%solver = MDKOSUGI
+          endif
+          if(sw_hubbard==ON) then
+             w_solver(1)%solver = MDKOSUGI
+             w_solver(3)%solver = MDKOSUGI
           endif
-          if(sw_hubbard==ON) w_solver(1)%solver = MDKOSUGI
-
 ! === KT_add === 2015/01/05
-          if ( noncol ) w_solver(1)%solver = MDDAVIDSON
+          if ( noncol ) then
+             w_solver(1)%solver = MDDAVIDSON
+             w_solver(3)%solver = MDDAVIDSON
+          endif
 ! ============== 2015/01/05
 
 !          w_solver(1)%solver = MDKOSUGI
           w_solver(1)%subspace_rotation = ON
-          w_solver(1)%till_n_iter = 2
+          w_solver(1)%till_n_iter = 5
           w_solver(1)%precon = ON
           w_solver(1)%before_or_after_convergence = BEFORE
           w_solver(2)%solver = RMM3
@@ -5245,9 +5347,9 @@ write(nfout,'(" !** sw_screening_correct
           w_solver(2)%till_n_iter = -1
           w_solver(2)%subspace_rotation = ON
           w_solver(2)%before_or_after_convergence = BEFORE
-          w_solver(3)%solver = MDDAVIDSON
+          !!w_solver(3)%solver = MDDAVIDSON
           w_solver(3)%subspace_rotation = ON
-          w_solver(3)%till_n_iter = 1
+          w_solver(3)%till_n_iter = 5
           w_solver(3)%precon = ON
           w_solver(3)%before_or_after_convergence = AFTER
           w_solver(4)%solver = RMM3
@@ -5255,8 +5357,8 @@ write(nfout,'(" !** sw_screening_correct
           w_solver(4)%till_n_iter = -1
           w_solver(4)%subspace_rotation = ON
           w_solver(4)%before_or_after_convergence = AFTER
-          edelta_change_to_rmm = 1.d-3
-          edelta_change_to_rmm_md = 1.d-3
+          edelta_change_to_rmm = 1.d-3/dble(natm)
+          edelta_change_to_rmm_md = 1.d-3/dble(natm)
           n_WF_solvers_before = 2
           n_WF_solvers_after = 2
           n_WF_solvers_all = n_WF_solvers_before + n_WF_solvers_after
@@ -5300,15 +5402,15 @@ write(nfout,'(" !** sw_screening_correct
           !!$if (sw_hubbard==ON.or.nspin>1) sw_divide_subspace=OFF
           if (printable) write(nfout,'(" !** applied wavefunction solver set : davidson")')
        endif
-       if (n_WF_solvers_before>1)then
-          if(intzaj == by_matrix_diagon)then
-             do i=1,n_WF_solvers_before-1
-                w_solver(i)%till_n_iter = i+1
-             enddo
-          else
-             w_solver(1)%till_n_iter = 1
-          endif
-       endif
+       !if (n_WF_solvers_before>1)then
+       !   if(intzaj == by_matrix_diagon)then
+       !      do i=1,n_WF_solvers_before-1
+       !         w_solver(i)%till_n_iter = i+4
+       !      enddo
+       !   else
+       !      w_solver(1)%till_n_iter = 5
+       !   endif
+       !endif
     end subroutine configure_wf_solver
 
     subroutine set_wfsolvers(prealloc,msol,nbase,ba,iret)
@@ -5831,9 +5933,19 @@ write(nfout,'(" !** sw_screening_correct
              write(nfout,*) '!** sw_neglect_stress_offdiagonal is  ', iret
           endif
 ! === 2014/11/22
-
 ! ======== 13.1AS
 
+! === EXP_CELLOPT === 2015/09/24
+          if ( f_getIntValue( tag_sw_read_nfchgt_prev_cell, iret ) ==0 ) then
+             sw_read_nfchgt_prev_cell = iret
+             write(nfout,*) '!** sw_read_nfchgt_prev_cell is  ', iret
+          endif
+          if ( f_getIntValue( tag_sw_read_nfzaj_prev_cell, iret ) ==0 ) then
+             sw_read_nfzaj_prev_cell = iret
+             write(nfout,*) '!** sw_read_nfzaj_prev_cell is  ', iret
+          endif
+! ================== 2015/09/24
+
           iret = f_selectParentBlock()
        endif
 
@@ -6063,6 +6175,8 @@ write(nfout,'(" !** sw_screening_correct
        endif
 ! ======================================================================= 5.0
 
+       if(f_getIntValue(tag_sw_mix_occ_matrix,iret)==0) sw_mix_occ_matrix = iret
+
 ! ================================ added by K. Tagami ================== 11.0
        if (f_getIntValue( tag_sw_mix_imaginary_hardpart,iret ) == 0 )  then
           sw_mix_imaginary_hardpart = iret
@@ -7195,6 +7309,23 @@ write(nfout,'(" !** sw_screening_correct
           iret = f_selectParentBlock()
        end if
 
+       if ( f_selectBlock( tag_wf_orb_projection ) == 0 ) then
+          if ( f_getIntValue( tag_sw_print_wf_orb_projection, iret ) == 0 ) then
+             sw_print_wf_orb_projection = iret
+          endif
+          if ( f_getIntValue( tag_wf_orb_proj_print_format, iret ) == 0 ) then
+             if ( iret < 0 .or. iret > 1 ) iret = 0
+             wf_orb_proj_print_format = iret
+          endif
+          if (ipriinputfile >= 1) then
+             write(nfout,'(A,I6)') " !** sw_print_wf_orb_projection = ", &
+                  &                   sw_print_wf_orb_projection
+             write(nfout,'(A,i6,A)') "!** wf_orb_proj_print_format = ", &
+                  &        wf_orb_proj_print_format, " ( 0: {l m t}, 1: {j l mj t} )"
+          endif
+          iret = f_selectParentBlock()
+       endif
+
        if( f_selectBlock( tag_wf_squared ) == 0 ) then
           if( f_getIntValue( tag_sw_wf_squared_rspace, iret) == 0) &
                &                             sw_wf_squared_rspace = iret
@@ -7224,6 +7355,14 @@ write(nfout,'(" !** sw_screening_correct
           iret = f_selectParentBlock()
        endif
 
+       if( f_selectBlock( tag_msb ) == 0) then
+          if( f_getIntValue( tag_sw_calc_contact_density, iret) == 0) then
+             sw_calc_contact_density = iret
+             write(nfout,*) "!** sw_calc_contact_density is ", iret
+          endif
+          iret = f_selectParentBlock()
+       endif
+
        if( f_selectBlock( tag_elf) == 0) then
           if(ipriinputfile >= 2) write(nfout,'(" !*  tag_elf")')
           if( f_getIntValue( tag_sw_elf, iret) == 0) sw_elf = iret
@@ -8844,9 +8983,9 @@ write(nfout,'(" !** sw_screening_correct
        if(printable) write(6,'(" iconvergence_previous_job is reset " &
             & ,i2,", because neg_previous < neg")') iconvergence_previous_job
     end if
-    if(sw_optimize_lattice==ON)then
-       iconvergence_previous_job = 0
-    endif
+    !if(sw_optimize_lattice==ON)then
+    !   iconvergence_previous_job = 0
+    !endif
   end subroutine m_CtrlP_rd_iconvergence
 
   subroutine m_CtrlP_reset_iconvergence
diff -uprN phase0_2015.01/src_phase/m_Crystal_Structure.F90 phase0_2015.01.01/src_phase/m_Crystal_Structure.F90
--- phase0_2015.01/src_phase/m_Crystal_Structure.F90	2015-08-05 14:45:21.000000000 +0900
+++ phase0_2015.01.01/src_phase/m_Crystal_Structure.F90	2016-07-12 12:51:19.000000000 +0900
@@ -1,6 +1,6 @@
 !=======================================================================
 !
-!  PROGRAM  PHASE/0 2015.01 ($Rev: 440 $)
+!  PROGRAM  PHASE/0 2015.01 ($Rev: 464 $)
 !
 !  MODULE: m_Crystal_Structure
 !
@@ -33,7 +33,7 @@
 !
 module m_Crystal_Structure
 !      (m_CS)
-! $Id: m_Crystal_Structure.F90 440 2015-08-03 07:18:01Z ktagami $
+! $Id: m_Crystal_Structure.F90 464 2015-09-23 14:29:53Z ktagami $
 !!$  use m_Files,              only : nfout,nfopgr,nfmatbp &
   use m_Timing,             only : tstatc0_begin, tstatc0_end
   use m_Control_Parameters, only : ipri, af, m_CtrlP_set_af, m_CtrlP_set_nspin_and_af &
@@ -272,8 +272,10 @@ module m_Crystal_Structure
   character(len("level_of_projection_paw_charge")), private, parameter :: &
        &             tag_level_projection_paw_charge = "level_of_projection_paw_charge"
 
+! --- magnetic moment
   character(len("axis")),private,parameter :: tag_axis = "axis"
   character(len("direction")),private,parameter :: tag_direction = "direction"
+  character(len("moment")),private,parameter :: tag_moment = "moment"
   character(len("magnetic_moment")),private,parameter :: tag_magnetic_moment &
        &                                       = "magnetic_moment"
 
@@ -384,18 +386,31 @@ module m_Crystal_Structure
   character(len("edelta_change_lambda_last")), private, parameter :: &
        &                 tag_edelta_change_lambda_last = "edelta_change_lambda_last"
 !
-  character(len("max_iterations_constraint")), private, parameter :: &
-       &                 tag_max_iterations_constraint = "max_iterations_constraint"
+  character(len("max_iter_elec_mag_constraint")), private, parameter :: &
+       &                 tag_max_iter_elec_mag_constr = "max_iter_elec_mag_constraint"
+  character(len("max_iter_ion_mag_constraint")), private, parameter :: &
+       &                 tag_max_iter_ion_mag_constr = "max_iter_ion_mag_constraint"
+  character(len("max_iter_cell_mag_constraint")), private, parameter :: &
+       &                 tag_max_iter_cell_mag_constr = "max_iter_cell_mag_constraint"
 !
+  character(len("sw_fix_charge_after_constraint")), private, parameter :: &
+       &              tag_sw_fix_charge_after_constr = "sw_fix_charge_after_constraint"
+
   integer, parameter :: nmax_intermid_lambda = 100
 !
   integer :: sw_magnetic_constraint = OFF
   integer :: mag_constraint_type = 0
-  integer :: damping_method_mag_constraint = 0
-  integer :: num_intermid_lambda = 0
-  integer :: max_iterations_mag_constraint = 0
+  integer :: damping_method_mag_constraint = ABRUPT
+  integer :: num_intermid_lambda = 2
+!
+  integer :: max_iter_elec_mag_constraint = 50
+  integer :: max_iter_ion_mag_constraint = 1
+!  integer :: max_iter_cell_mag_constraint = 100
+  integer :: max_iter_cell_mag_constraint = 1
+
+  integer :: sw_fix_charge_after_constraint = OFF
 !
-  real(kind=DP) :: mag_constraint_lambda = 0.0d0
+  real(kind=DP) :: mag_constraint_lambda = 0.20d0
   real(kind=DP) :: edelta_change_lambda_first = 1.0D-4     ! hartree
   real(kind=DP) :: edelta_change_lambda_last = 1.0D-4     ! hartree
 ! ======================================================== 13.0U
@@ -1294,9 +1309,9 @@ contains
       if ( damping_method_mag_constraint == ABRUPT .or. &
            &  damping_method_mag_constraint == LINEAR ) then
 
-         if ( f_getIntValue( tag_max_iterations_constraint, iret ) == 0 ) then
-            if ( iret < 0 ) max_iterations_mag_constraint = 0
-            max_iterations_mag_constraint = iret
+         if ( f_getIntValue( tag_max_iter_elec_mag_constr, iret ) == 0 ) then
+            if ( iret < 0 ) max_iter_elec_mag_constraint = 0
+            max_iter_elec_mag_constraint = iret
          endif
       endif
 
@@ -1313,10 +1328,27 @@ contains
 
       if ( damping_method_mag_constraint == ABRUPT .or. &
            &  damping_method_mag_constraint == LINEAR ) then
-         write(nfout,*) '! max_iterations_mag_constraint is ', &
-              &            max_iterations_mag_constraint
+         write(nfout,*) '! max_iter_elec_mag_constraint is ', &
+              &            max_iter_elec_mag_constraint
       endif
 
+      if ( f_getIntValue( tag_max_iter_ion_mag_constr, iret ) == 0 ) then
+         if ( iret < 0 ) max_iter_ion_mag_constraint = 0
+         max_iter_ion_mag_constraint = iret
+      endif
+      if ( f_getIntValue( tag_max_iter_cell_mag_constr, iret ) == 0 ) then
+         if ( iret < 0 ) max_iter_cell_mag_constraint = 0
+         max_iter_cell_mag_constraint = iret
+      endif
+
+      if ( f_getIntValue( tag_sw_fix_charge_after_constr, iret ) == 0 ) then
+         sw_fix_charge_after_constraint = iret
+         write(nfout,*) '! sw_fix_charge_after_constraint is ', &
+              &             sw_fix_charge_after_constraint
+      endif
+
+      write(nfout,*) '! max_iter_ion_mag_constraint is ', max_iter_ion_mag_constraint
+      write(nfout,*) '! max_iter_cell_mag_constraint is ', max_iter_cell_mag_constraint
 
       write(nfout,*) '! *********************************************** '
 
@@ -1356,6 +1388,9 @@ contains
                if ( f_getRealValue( tag_norm, dret, "" ) == 0 ) then
                   norm = dret
                end if
+               if ( f_getRealValue( tag_moment, dret, "" ) == 0 ) then
+                  norm = dret
+               end if
                if ( f_getRealValue( tag_theta, dret, "" ) == 0 ) then
                   theta = dret
                end if
@@ -1373,8 +1408,10 @@ contains
 
          else
             if ( f_getRealValue( tag_norm, dret, "" ) == 0 ) then
-               norm = dret
-               mag_moment0_global(1) = norm
+               norm = dret;   mag_moment0_global(1) = norm
+            end if
+            if ( f_getRealValue( tag_moment, dret, "" ) == 0 ) then
+               norm = dret;   mag_moment0_global(1) = norm
             end if
 
          endif
@@ -1410,7 +1447,8 @@ contains
       mag_direc0_global(2) = 0.0d0
       mag_direc0_global(3) = 1.0d0
 
-      if ( f_selectBlock( tag_direction ) == 0 ) then
+      if ( f_selectBlock( tag_direction ) == 0 .or. &
+           &  f_selectBlock( tag_magnetic_moment ) == 0 ) then
 
          if( f_getRealValue( tag_mdx, dret, '') == 0 ) then
             mdx = dret;   Flag = 1
@@ -1421,6 +1459,16 @@ contains
          if( f_getRealValue( tag_mdz, dret, '') == 0 ) then
             mdz = dret;   Flag = 1
          endif
+
+         if( f_getRealValue( tag_mx, dret, '') == 0 ) then
+            mdx = dret;   Flag = 1
+         endif
+         if( f_getRealValue( tag_my, dret, '') == 0 ) then
+            mdy = dret;   Flag = 1
+         endif
+         if( f_getRealValue( tag_mz, dret, '') == 0 ) then
+            mdz = dret;   Flag = 1
+         endif
          cnorm = sqrt( mdx**2 + mdy**2 + mdz**2 )
 
          if ( abs(cnorm) > 1.0E-4 ) then
diff -uprN phase0_2015.01/src_phase/m_ES_ExactExchange.F90 phase0_2015.01.01/src_phase/m_ES_ExactExchange.F90
--- phase0_2015.01/src_phase/m_ES_ExactExchange.F90	2015-08-05 14:45:21.000000000 +0900
+++ phase0_2015.01.01/src_phase/m_ES_ExactExchange.F90	2016-07-12 12:51:19.000000000 +0900
@@ -1,6 +1,6 @@
 !=======================================================================
 !
-!  PROGRAM  PHASE/0 2015.01 ($Rev: 440 $)
+!  PROGRAM  PHASE/0 2015.01 ($Rev: 485 $)
 !
 !  MODULE: m_ES_ExactExchange
 !
@@ -35,7 +35,7 @@ module m_ES_ExactExchange
   use m_Electronic_Structure,only: totch,zaj_l,occup_l,neordr,eko_l,vnlph_l,fsr_l,fsi_l
   use m_NonLocal_Potential, only : snl
   use m_PlaneWaveBasisSet,  only : ngabc,igf,kg1,kg,kgp,nbase,nbmx,iba,m_pwBS_kinetic_energies &
-       &                         , nbase_gamma,igfp_l,ngpt_l,m_pwBS_sphrp_exx,kgp_exx, igfp_exx
+       &                         , nbase_gamma,igfp_l,ngpt_l,m_pwBS_sphrp_exx,kgp_exx, igfp_exx, n_rGpv
   use m_Kpoints,            only : kv3,vkxyz,kv3_ek,vkxyz_ek,k_symmetry,qwgt,qwgt_ek,mp_index &
        &                         , m_Kp_sample_mesh, kshift
   use m_FFT,                only : nfft,fft_box_size_WF &
@@ -50,7 +50,7 @@ module m_ES_ExactExchange
        &                         , nlmta,lmta,lmtt,ltp,mtp,taup,nloc,m_PP_find_maximum_l &
        &                         , m_PP_tell_lmtt_l_m_tau &
        &                         , m_PP_set_index_arrays1,m_PP_set_index_arrays2 &
-       &                         , radr,xh,rmax,nmesh,mmesh, qrspspw
+       &                         , radr,xh,rmax,nmesh,mmesh, qrspspw, nlmtt
  
   use m_Files,              only : nfout
   use m_Timing,             only : tstatc0_begin,tstatc0_end
@@ -70,6 +70,8 @@ module m_ES_ExactExchange
        &                         , nrank_e,nrank_k,myrank_e,map_e,ista_e,iend_e,istep_e,idisp_e &
        &                         , map_z,np_e,mpi_k_world,mpi_e_world,myrank_k,map_k,ista_k,iend_k &
        &                         , ista_kg1_k, np_kg1_k, mp_kg1_k &
+!       &                         , m_Parallel_mpi_nval,np_nval, mp_nval,ista_kngp_exx,iend_kngp_exx &
+       &                         , ista_kngp_exx,iend_kngp_exx &
 #ifdef TRANSPOSE
        &                         , ierr,mp_e,nis_e,nie_e,nel_e
 #else
@@ -92,7 +94,7 @@ module m_ES_ExactExchange
   integer :: nval_old
   integer :: nfftwf
   integer :: ntrs ! ntrs = 1, if TRS is used. Otherwise, ntrs=0
-  integer, allocatable :: ngpt_exx(:,:,:) ! d(kgp,nopr,0:ntrs)
+  integer, allocatable :: ngpt_exx(:,:,:) ! d(kg,nopr,0:ntrs)
 #ifndef MEMORY_SAVE_EXX
   real(kind=DP), allocatable :: qitg_exx(:,:,:) ! d(kgp,nqitg,nqmk)
   real(kind=DP), allocatable :: ylm_exx(:,:,:) ! d(kgp,maxylm,nqmk)
@@ -187,6 +189,7 @@ module m_ES_ExactExchange
   real(kind=DP), allocatable, dimension(:,:) :: qrsps_mm
   real(kind=DP), allocatable, dimension(:) :: h, radr_kept, wos_kept
 #endif
+  integer, save :: id_sname_cdfft = -1
   include 'mpif.h'                                      ! MPI
 
 contains
@@ -463,6 +466,7 @@ contains
   end subroutine m_ES_EXX_move_k_into_fbz
 
   subroutine m_ES_EXX_init0
+    integer :: ii,ie
     if(potential_update>0) then
        if(.not.allocated(exx_potential)) allocate(exx_potential(kg1,np_e,ista_k:iend_k,kimg));exx_potential = 0.d0
     endif
@@ -470,7 +474,7 @@ contains
 
   subroutine m_ES_EXX_init
     implicit none
-    integer :: ik, ikbz, ii,ierr
+    integer :: ik, ikbz, ii,ierr,ie
     real(kind=DP) :: dk(3)
     integer,save  :: id_sname = -1
     call tstatc0_begin('m_ES_EXX_init ',id_sname,level=1)
@@ -655,11 +659,15 @@ contains
     real(kind=DP), allocatable :: efsr_l(:,:) ! d(np_e,nlmta)
     real(kind=DP), allocatable :: efsi_l(:,:) ! d(np_e,nlmta)
 
+
     logical :: trans
+    integer, allocatable, dimension(:) :: ista
     integer,save  :: id_sname = -1
 
     if(sw_update_wfv==OFF) return
 
+    allocate(ista(MPI_STATUS_SIZE))
+
     trans = .true.
     if(present(transform)) trans = transform
     call tstatc0_begin('m_ES_EXX_gather_valence_states ',id_sname,level=1)
@@ -691,6 +699,7 @@ contains
        nval = ibm
     end if
 
+
     if(allocated(wfv)) deallocate(wfv)
     if(allocated(occup_val)) deallocate(occup_val)
     allocate(wfv(kg1,nval,kv3,kimg))
@@ -727,25 +736,23 @@ contains
        deallocate(efsi_l)
     end if
 
-    if(npes>1) then
-       allocate(wfv_mpi(kg1,nval,kv3,kimg))
-       allocate(occup_val_mpi(nval,kv3))
-       call mpi_allreduce(wfv,wfv_mpi,kg1*nval*kv3*kimg,MPI_DOUBLE_PRECISION,MPI_SUM,mpi_comm_group,ierr)
-       call mpi_allreduce(occup_val,occup_val_mpi,nval*kv3,MPI_DOUBLE_PRECISION,MPI_SUM,mpi_comm_group,ierr)
-       wfv = wfv_mpi
-       occup_val = occup_val_mpi
-       deallocate(wfv_mpi)
-       deallocate(occup_val_mpi)
-       if(modnrm == EXECUT) then
-          allocate(fsr_mpi(nval,nlmta,kv3))
-          allocate(fsi_mpi(nval,nlmta,kv3))
-          call mpi_allreduce(fsr_exx,fsr_mpi,nval*nlmta*kv3,MPI_DOUBLE_PRECISION,MPI_SUM,mpi_comm_group,ierr)
-          call mpi_allreduce(fsi_exx,fsi_mpi,nval*nlmta*kv3,MPI_DOUBLE_PRECISION,MPI_SUM,mpi_comm_group,ierr)
-          fsr_exx = fsr_mpi
-          fsi_exx = fsi_mpi
-          deallocate(fsr_mpi)
-          deallocate(fsi_mpi)
-       end if
+    allocate(wfv_mpi(kg1,nval,kv3,kimg))
+    allocate(occup_val_mpi(nval,kv3))
+    call mpi_allreduce(wfv,wfv_mpi,kg1*nval*kv3*kimg,MPI_DOUBLE_PRECISION,MPI_SUM,mpi_comm_group,ierr)
+    call mpi_allreduce(occup_val,occup_val_mpi,nval*kv3,MPI_DOUBLE_PRECISION,MPI_SUM,mpi_comm_group,ierr)
+    wfv = wfv_mpi
+    occup_val = occup_val_mpi
+    deallocate(wfv_mpi)
+    deallocate(occup_val_mpi)
+    if(modnrm == EXECUT) then
+       allocate(fsr_mpi(nval,nlmta,kv3))
+       allocate(fsi_mpi(nval,nlmta,kv3))
+       call mpi_allreduce(fsr_exx,fsr_mpi,nval*nlmta*kv3,MPI_DOUBLE_PRECISION,MPI_SUM,mpi_comm_group,ierr)
+       call mpi_allreduce(fsi_exx,fsi_mpi,nval*nlmta*kv3,MPI_DOUBLE_PRECISION,MPI_SUM,mpi_comm_group,ierr)
+       fsr_exx = fsr_mpi
+       fsi_exx = fsi_mpi
+       deallocate(fsr_mpi)
+       deallocate(fsi_mpi)
     end if
 
     if(sw_rspace_hyb==ON .and. modnrm==EXECUT.and.sw_precalculate==ON)then
@@ -763,6 +770,7 @@ contains
        enddo
     endif
 
+    deallocate(ista)
     call tstatc0_end(id_sname)
   end subroutine m_ES_EXX_gather_valence_states
 
@@ -824,13 +832,14 @@ contains
     integer :: ik,ikbz,ig,kgs
     real(kind=DP) :: fac, wi, kg(3), vzero, g2
     real(kind=DP), dimension(6) :: ttr
-
+    integer :: igs,ige
     integer,save  :: id_sname = -1
     call tstatc0_begin('m_ES_EXX_kernel ',id_sname,level=1)
 
-!!$    allocate(vc(kgp,nqmk))
     if(.not.allocated(vc)) allocate(vc(nmax_G_hyb,nqmk))
 
+    vc = 0.d0
+
     call getttr(rltv,ttr)
 
     fac = PAI4/univol
@@ -849,7 +858,8 @@ contains
              kgs=2
              vc(1,ik) = vzero
           end if
-          do ig=kgs,nmax_G_hyb
+          igs = kgs;ige=nmax_G_hyb
+          do ig=igs,ige
              kg(1:3) = qmk(ik,1:3) + ngabc(ig,1:3)
              g2          = ttr(1)*kg(1)*kg(1) &
              &           + ttr(2)*kg(2)*kg(2) &
@@ -868,7 +878,8 @@ contains
              kgs=2
              vc(1,ik) = vzero
           end if
-          do ig=kgs,nmax_G_hyb
+          igs = kgs;ige=nmax_G_hyb
+          do ig=igs,ige
              kg(1:3) = qmk(ik,1:3) + ngabc(ig,1:3)
              g2          = ttr(1)*kg(1)*kg(1) &
              &           + ttr(2)*kg(2)*kg(2) &
@@ -926,7 +937,7 @@ contains
       real(kind=DP) :: chig
       real(kind=DP), intent(in) :: gam
 
-      integer :: ig, ikbz, kgs
+      integer :: ig, ikbz, kgs, igs, ige
       real(kind=DP) :: sumg, sumgk
 
       sumg = 0.d0
@@ -1017,6 +1028,8 @@ contains
     real(kind=DP) :: ene
     integer :: ig,iadd
     logical :: store_p
+    real(kind=DP), allocatable, dimension(:) :: zajbuf_r,zajbuf_i
+    integer :: kgw,kgv
     integer :: id_sname=-1
     call tstatc0_begin('m_ES_EXX_potential ',id_sname,level=1)
     store_p = .true.
@@ -1029,7 +1042,7 @@ contains
     iup = 2
     if(present(iupdate)) iup = iupdate
     if(iup.lt.potential_update)then
-       vxw(:,1:kimg) = exx_potential(:,ib,ik,:)
+       vxw(1:iba(ik),1:kimg) = exx_potential(1:iba(ik),ib,ik,1:kimg)
        if(present(exx))then
          exx=0.d0
          if(kimg==1)then
@@ -1056,21 +1069,20 @@ contains
     allocate(efsr_l(nlmta));efsr_l=0.d0
     allocate(efsi_l(nlmta));efsi_l=0.d0
     if(modnrm == EXECUT) call get_expkt_fs_b(ik,ib,fsr,fsi,efsr_l,efsi_l)
+    allocate(zajbuf_r(kg1));zajbuf_r(:) = zaj_l(:,ib,ik,1)
+    allocate(zajbuf_i(kg1));zajbuf_i(:) = zaj_l(:,ib,ik,kimg)
+    kgw = kg1;kgv=kg1
+
     if(present(exx))then
-       if(kimg==1)then
-          call apply_Vx_to_WF( ispin, ib, ik, zaj_l(1,ib,ik,1), zaj_l(1,ib,ik,1), efsr_l, efsr_l, vxw, ene, eo )
-       else
-          call apply_Vx_to_WF( ispin, ib, ik, zaj_l(1,ib,ik,1), zaj_l(1,ib,ik,2), efsr_l, efsi_l, vxw, ene, eo )
-       endif
+       call apply_Vx_to_WF( ispin, ib, ik, kgw, kgv, zajbuf_r, zajbuf_i, efsr_l, efsi_l, vxw, ene, eo )
     else
-       if(kimg==1)then
-           call apply_Vx_to_WF( ispin, ib, ik, zaj_l(1,ib,ik,1), zaj_l(1,ib,ik,1), efsr_l, efsr_l, vxw)
-       else
-           call apply_Vx_to_WF( ispin, ib, ik, zaj_l(1,ib,ik,1), zaj_l(1,ib,ik,2), efsr_l, efsi_l, vxw)
-       endif
+       call apply_Vx_to_WF( ispin, ib, ik, kgw, kgv, zajbuf_r, zajbuf_i, efsr_l, efsi_l, vxw )
     endif
+
     deallocate(efsr_l)
     deallocate(efsi_l)
+    deallocate(zajbuf_r)
+    deallocate(zajbuf_i)
     if(present(exx)) exx = ene
     if(.not.eo.and.potential_update>0.and.store_p) &
     &  exx_potential(1:iba(ik),ib,ik,1:kimg) = vxw(1:iba(ik),1:kimg)
@@ -1153,6 +1165,8 @@ contains
     real(kind=DP), allocatable, dimension(:,:,:,:) :: vxdi_t
 
     integer,save  :: id_sname = -1
+
+
     call tstatc0_begin('m_ES_EXX_Diagonal_part ',id_sname,level=1)
 
 
@@ -1222,6 +1236,7 @@ contains
     call tstatc0_end(id_sname)
   end subroutine m_ES_EXX_Diagonal_part
 
+
   subroutine m_ES_EXX_add_Diagonal_part(ik,ibo,vxdi,vnldi)
     implicit none
     integer, intent(in) :: ik, ibo
@@ -1298,12 +1313,12 @@ contains
     end if 
   end subroutine m_ES_Vexx_add_vexx
 
-  subroutine apply_Vx_to_WF(ispin,ib,ik,wfr,wfi,bdwr,bdwi,vxw,eexx,eonly,force_l,dbdwr,dbdwi)
+  subroutine apply_Vx_to_WF(ispin,ib,ik,kgw,kgv,wfr,wfi,bdwr,bdwi,vxw,eexx,eonly,force_l,dbdwr,dbdwi)
     implicit none
-    integer, intent(in)                       :: ispin, ib,ik
-    real(kind=DP), intent(in), dimension(kg1) :: wfr, wfi
+    integer, intent(in)                       :: ispin, ib,ik,kgw,kgv
+    real(kind=DP), intent(in), dimension(kgw) :: wfr, wfi
     real(kind=DP), intent(in), dimension(nlmta) :: bdwr, bdwi
-    real(kind=DP), intent(out), optional, dimension(kg1,kimg) :: vxw
+    real(kind=DP), intent(out), optional, dimension(kgv,kimg) :: vxw
     real(kind=DP), intent(out), optional :: eexx
     logical, intent(in), optional :: eonly
 
@@ -1480,9 +1495,11 @@ contains
                       call add_RHOG_hard_part_rs2(iqmk(jkbz,ik),rhor,rhoi,&
                      & fsrqm(:,:,m,jkbz,ispin),fsiqm(:,:,m,jkbz,ispin),bdwr,bdwi)
                    endif
+                   call tstatc0_begin('FFT_CD_exx ',id_sname_cdfft,1)
                    call map_RHOG_on_FFT_box_hard(rhor,rhoi,afft)
                    call m_FFT_CD0_exx(nfout,afft,DIRECT)
                    call map_FFT_box_on_RHOG_hard(rhor,rhoi,afft)
+                   call tstatc0_end(id_sname_cdfft)
                    do ii=1,nmax_G_hyb
                       rhogr(ii) = rhogr(ii) + rhor(ii)
                       rhogi(ii) = rhogi(ii) + rhoi(ii)
@@ -1506,9 +1523,11 @@ contains
 !
              if (modnrm == EXECUT) then
                 if(sw_rspace_hyb==ON)then
+                   call tstatc0_begin('FFT_CD_exx ',id_sname_cdfft,1)
                    call map_RHOG_on_FFT_box_hard_inv(rhogr,rhogi,afft)
                    call m_FFT_CD0_exx(nfout,afft,INVERSE)
                    call map_FFT_box_on_RHOG_hard_inv(rhor,rhoi,afft)
+                   call tstatc0_end(id_sname_cdfft)
                    if(force_mode)then
                       call integrate_QijVnm_rs(iqmk(jkbz,ik),rhor,rhoi,fsr,fsi,qvr,qvi,dfsr,dfsi,dqvr,dqvi,gqvr,gqvi)
                    else
@@ -1611,6 +1630,7 @@ contains
     call tstatc0_end(id_sname)
   end subroutine apply_Vx_to_WF
 
+
   subroutine m_ES_EXX_energy2(eexx)
     implicit none
     real(kind=DP), intent(out) :: eexx
@@ -1678,6 +1698,7 @@ contains
     end if
   end subroutine product_on_FFT_box
 
+
   subroutine sum_rho_vc_rho(rhor,rhoi,vc,exx)
     implicit none
     real(kind=DP), intent(in), dimension(nmax_G_hyb) :: rhor,rhoi
@@ -1960,7 +1981,7 @@ contains
 
     integer :: i,i1,i2
 
-    afft(:) = 0.d0
+!    afft(:) = 0.d0
     do i = 1, nfftp_exx_nonpara/2
 !       i1 = (igfp_l(i)-1)*kimg+1
        i1 = (i-1)*kimg+1
@@ -2009,8 +2030,8 @@ contains
     integer :: i,i1,i2
 
     rinplw = 1.d0/product(fft_box_size_CD_exx(1:3,1))
-    rhor(:)=0.d0
-    rhoi(:)=0.d0
+!    rhor(:)=0.d0
+!    rhoi(:)=0.d0
     do i = 1, nfftp_exx_nonpara/2
        i1 = (i-1)*kimg+1
        rhor(i) = afft(i1)*rinplw 
@@ -2024,66 +2045,119 @@ contains
     integer :: i,j,iopr,ii
     integer :: ia,ib,ic
     integer :: namin,namax,nbmin,nbmax,ncmin,ncmax
-    integer, allocatable, dimension(:,:)   :: ngpt_t
     integer, allocatable, dimension(:,:,:) :: g_list
+    integer, allocatable, dimension(:) :: ngpt_exx_tmp, ngpt_exx0_tmp
     integer,save  :: id_sname = -1
     call tstatc0_begin('m_ES_EXX_ngpt ',id_sname,level=1)
 
-    allocate(ngpt_exx(kgp,nopr,0:ntrs)); ngpt_exx = 0
+!!$    allocate(ngpt_exx(kgp,nopr,0:ntrs)); ngpt_exx = 0
+    allocate(ngpt_exx(kg,nopr,0:ntrs)); ngpt_exx = 0
+
+    !! Time reversal symmetry
+
+    if(ntrs>0) then
+
+       namax = n_rGpv(1); nbmax = n_rGpv(2); ncmax = n_rGpv(3)
+       namin = -namax   ; nbmin = -nbmax   ; ncmin = -ncmax
+       allocate(g_list(namin:namax,nbmin:nbmax,ncmin:ncmax)); g_list = 0
+
+       do i = 1, kgp
+          ia = ngabc(i,1)
+          ib = ngabc(i,2)
+          ic = ngabc(i,3)
+          g_list(ia,ib,ic) = i
+       end do
+    end if
 
     if(npes > 1) then
-       allocate(ngpt_t(kgp,nopr)); ngpt_t = 0
+       allocate(ngpt_exx0_tmp(kgp))
        do iopr=1,nopr
+          ngpt_exx0_tmp = 0
           do i = ista_kngp, iend_kngp
-             ngpt_t(i,iopr) = ngpt_l(i,iopr)
+             ngpt_exx0_tmp(i) = ngpt_l(i,iopr)
           end do
+          call mpi_allreduce(MPI_IN_PLACE,ngpt_exx0_tmp,kgp,mpi_integer,mpi_sum,mpi_comm_group,ierr)
+          ngpt_exx(1:kg,iopr,0) = ngpt_exx0_tmp(1:kg)
+
+          if(ntrs>0) then
+             allocate(ngpt_exx_tmp(kg))
+             ngpt_exx_tmp = 0
+             do i=1,kg
+                ii = ngpt_exx0_tmp(i)
+                ia = -ngabc(ii,1)
+                ib = -ngabc(ii,2)
+                ic = -ngabc(ii,3)
+                ngpt_exx_tmp(i) = g_list(ia,ib,ic)
+             end do
+             ngpt_exx(1:kg,iopr,1) = ngpt_exx_tmp(1:kg)
+          end if
        end do
-       call mpi_allreduce(ngpt_t,ngpt_exx,kgp*nopr,mpi_integer,mpi_sum,mpi_comm_group,ierr)
-       deallocate(ngpt_t)
+       deallocate(ngpt_exx0_tmp)
     else
        do iopr=1,nopr
-          do i = ista_kngp, iend_kngp
-             ngpt_exx(i,iopr,0) = ngpt_l(i,iopr)
-          end do
+          if(ntrs==0) then
+             do i = 1, kg
+                ngpt_exx(i,iopr,0) = ngpt_l(i,iopr)
+             end do
+          else
+             do i = 1, kg
+                ngpt_exx(i,iopr,0) = ngpt_l(i,iopr)
+                ii = ngpt_l(i,iopr)
+                ia = -ngabc(ii,1)
+                ib = -ngabc(ii,2)
+                ic = -ngabc(ii,3)
+                ngpt_exx(i,iopr,1) = g_list(ia,ib,ic)
+             end do
+          end if
        end do
     end if
 
-    !! Time reversal symmetry
-
-   if(ntrs>0) then
-
-    namin = 0; namax = 0
-    nbmin = 0; nbmax = 0
-    ncmin = 0; ncmax = 0
-    do i=1,kgp
-       ia = ngabc(i,1)
-       ib = ngabc(i,2)
-       ic = ngabc(i,3)
-       namin = min(ia,namin)
-       namax = max(ia,namax)
-       nbmin = min(ib,nbmin)
-       nbmax = max(ib,nbmax)
-       ncmin = min(ic,ncmin)
-       ncmax = max(ic,ncmax)
-    end do
-    allocate(g_list(namin:namax,nbmin:nbmax,ncmin:ncmax))
-    do i=1,kgp
-       ia = ngabc(i,1)
-       ib = ngabc(i,2)
-       ic = ngabc(i,3)
-       g_list(ia,ib,ic) = i
-    end do
-    do iopr=1,nopr
-       do i=1,kgp
-          ii = ngpt_exx(i,iopr,0)
-          ia = -ngabc(ii,1)
-          ib = -ngabc(ii,2)
-          ic = -ngabc(ii,3)
-          ngpt_exx(i,iopr,1) = g_list(ia,ib,ic)
+!!$    allocate(ngpt_exx_tmp(kgp))
+!!$    do iopr=1,nopr
+!!$       ngpt_exx_tmp = 0
+!!$       do i=1,kgp
+!!$          ii = ngpt_exx(i,iopr,0)
+!!$          if(ista_kngp<=ii .and. ii<=iend_kngp) then
+!!$             ia = -ngabc_kngp_l(ii,1)
+!!$             ib = -ngabc_kngp_l(ii,2)
+!!$             ic = -ngabc_kngp_l(ii,3)
+!!$             ngpt_exx_tmp(i) = g_list(ia,ib,ic)
+!!$          end if
+!!$       end do
+!!$       call mpi_allreduce(MPI_IN_PLACE, ngpt_exx_tmp, kgp, mpi_integer, mpi_sum,mpi_ke_world,ierr)
+!!$       ngpt_exx(:,iopr,1) = ngpt_exx_tmp(:)
+!!$    end do
+!!$    deallocate(ngpt_exx_tmp)
+!!$    
+!!$          ia = -ngabc(ii,1)
+!!$          ib = -ngabc(ii,2)
+!!$          ic = -ngabc(ii,3)
+!!$          ngpt_exx(i,iopr,1) = g_list(ia,ib,ic)
+!!$       end do
+!!$    end do
+    if(ntrs>0) deallocate(g_list)
+
+    j = 0
+    do ii = 0, ntrs
+       do iopr = 1, nopr
+          do i = 1, kg
+             if(ngpt_exx(i,iopr,ii) <= 0) j = j + 1
+          end do
        end do
     end do
-    deallocate(g_list)
-   end if
+    if(j >= 1) then
+       write(nfout,'(" !! check of ngpt_exx")')
+       do ii = 0, ntrs
+          do iopr = 1, nopr
+             do i = 1, kg
+                if(ngpt_exx(i,iopr,ii) <= 0) write(nfout,'(" ngpt_exx(",i8,",",i8,",",i8,") = ",i20)') &
+                & i,iopr,ii, ngpt_exx(i,iopr,ii)
+             end do
+          end do
+       end do
+       write(nfout,'(" !! total number of negative values for ngpt_exx = ",i8)') j
+       write(nfout,'(" !! out of check of ngpt_exx")')
+    end if
 
     call tstatc0_end(id_sname)
     !!!stop 'Check: G_list'
@@ -2279,9 +2353,10 @@ contains
           deallocate(ylm_t)
        else
           do i=1,n
-             call m_pwBS_sphrp_exx(i,rltv,ista_kngp,iend_kngp,gqmk,gqmkr,ylm_exx(1,i,ik))
+             call m_pwBS_sphrp_exx(i,rltv,1,nmax_G_hyb,gqmk,gqmkr,ylm_exx(1,i,ik))
           end do
        end if
+
     end do
     !!stop 'm_ES_EXX_ylm'
     deallocate(gqmk)
@@ -2322,7 +2397,7 @@ contains
           gqmkr(ig) = sqrt(g2)
      end do
      do i = 1, n
-        call m_pwBS_sphrp_exx(i,rltv,1,kgp,gqmk,gqmkr,ylm_exx(1,i))
+        call m_pwBS_sphrp_exx(i,rltv,1,nmax_G_hyb,gqmk,gqmkr,ylm_exx(1,i))
      end do
      deallocate(gqmk)
      deallocate(gqmkr)
@@ -2334,16 +2409,17 @@ contains
 #ifndef MEMORY_SAVE_MORE_EXX
   subroutine check_qitg()
     implicit none
-    integer :: iq 
+    integer :: iq, ips , ipe
+
+    ips = 1
+    ipe = nmax_G_hyb
 
     do iq=1,nqitg
-       write(nfout,'("iq=",i5,1x,"qitg_l=",f20.5,1x,"qitg_exx=",f20.5,1x)') &
-         & iq, qitg_l(1,iq), qitg_exx(1,iq,1)/univol
+       write(nfout,'("iq=",i5,1x,"qitg_l=",f20.5,1x,"qitg_exx=",f20.5,1x)') iq, qitg_l(ips,iq), qitg_exx(ips,iq,1)/univol
     end do
 
     do iq=1,nqitg
-       write(nfout,'("iq=",i5,1x,"diff=",f20.5)') &
-         & iq, sum(qitg_l(1:nmax_G_hyb,iq)-qitg_exx(1:nmax_G_hyb,iq,1)/univol)
+       write(nfout,'("iq=",i5,1x,"diff=",f20.5)') iq, sum(qitg_l(ips:ipe,iq)-qitg_exx(ips:ipe,iq,1)/univol)
     end do
 
     stop 'check_qitg'
@@ -2351,19 +2427,20 @@ contains
 
   subroutine check_qitg_qmk()
     implicit none
-    integer :: iq, ik
+    integer :: iq, ik, ips, ipe
+
+    ips = 1
+    ipe = nmax_G_hyb
 
     do ik=1,nqmk
     do iq=1,nqitg
-       write(nfout,'("iq=",i5,1x,"qitg_exx=",f20.5,1x)') &
-         & iq, qitg_exx(1,iq,ik)/univol
+       write(nfout,'("iq=",i5,1x,"qitg_exx=",f20.5,1x)') iq, qitg_exx(ips,iq,ik)/univol
     end do
     end do
 
     do ik=1,nqmk
     do iq=1,nqitg
-       write(nfout,'("iq=",i5,1x,"sum=",f20.5)') &
-         & iq, sum(qitg_exx(1:nmax_G_hyb,iq,ik)/univol)
+       write(nfout,'("iq=",i5,1x,"sum=",f20.5)') iq, sum(qitg_exx(ips:ipe,iq,ik)/univol)
     end do
     end do
 
@@ -2374,23 +2451,24 @@ contains
 #ifndef MEMORY_SAVE_EXX
   subroutine check_ylm_exx()
     implicit none
-    integer :: i, n, ik
+    integer :: i, n, ik, ips, ipe
 
     call m_PP_find_maximum_l(n)    ! n-1: maximum l
     n = (n-1) + (n-1) + 1
     n = n*n
 
+    ips = 1
+    ipe = nmax_G_hyb
+
     do ik=1,nqmk
     do i=1,n
-       write(nfout,'("i=",i5,1x,"ylm_exx=",f20.5,1x)') &
-         & i, ylm_exx(1,i,ik)
+       write(nfout,'("i=",i5,1x,"ylm_exx=",f20.5,1x)') i, ylm_exx(ips,i,ik)
     end do
     end do
 
     do ik=1,nqmk
     do i=1,n
-       write(nfout,'("i=",i5,1x,"sum=",f20.5)') &
-         & i, sum(qitg_exx(1:nmax_G_hyb,i,ik))
+       write(nfout,'("i=",i5,1x,"sum=",f20.5)')  i, sum(qitg_exx(ips:ipe,i,ik))
     end do
     end do
 
@@ -2438,6 +2516,7 @@ contains
     preproc_done = .true.
   end subroutine hard_part_preproc
 
+
   subroutine add_RHOG_hard_part(iqmk,rhogr,rhogi,fnr,fni,fmr,fmi)
     implicit none
     integer, intent(in) :: iqmk
@@ -2458,7 +2537,7 @@ contains
     real(kind=DP), allocatable :: qitg_red(:,:),ylm_red(:,:)
     real(kind=DP), allocatable :: rhogr_red(:),rhogi_red(:)
     real(kind=DP) :: yr,yi
-    integer :: ibl1,ibl2,iq,inn,ip,m
+    integer :: ibl1,ibl2,iq,inn,ip,m, ips, ipe
 
     integer,save  :: id_sname = -1
 
@@ -2476,13 +2555,13 @@ contains
     allocate(rhogr_red(ibsize))
     allocate(rhogi_red(ibsize))
 ! --
-    do ibl1=1, nmax_G_hyb, ibsize
-       rhogr_red=0.d0;  rhogi_red=0.d0
-
-       ibl2=ibl1+ibsize-1
+    ips = 1
+    ipe = nmax_G_hyb
 
-       if (ibl2.gt.nmax_G_hyb) ibl2 = nmax_G_hyb
+    do ibl1=ips, ipe, ibsize
+       rhogr_red=0.d0;  rhogi_red=0.d0
 
+       ibl2=min(ipe,ibl1+ibsize-1)
        do iq=1,nqitg
           do i=1,ibl2-ibl1+1
 #if defined(MEMORY_SAVE_EXX) && defined(MEMORY_SAVE_MORE_EXX)
@@ -2714,6 +2793,7 @@ contains
     enddo
   end subroutine qmk_dot_r
 
+
 ! ================================== KT_Test ========================= 12.5Exp
   subroutine integrate_QijVnm(iqmk,potr,poti,fmr,fmi,qvr,qvi,dfmr,dfmi,dqvr,dqvi,gqvr,gqvi)
     implicit none
@@ -2739,7 +2819,7 @@ contains
     real(kind=DP), allocatable :: zsr(:), zsi(:)
 
     real(kind=DP), allocatable :: qitg_red(:,:),ylm_red(:,:)
-    integer :: ibl1,ibl2,iq,inn,ip,m
+    integer :: ibl1,ibl2,iq,inn,ip,m, ips, ipe
     logical :: force_mode = .false. 
     real(kind=DP), allocatable :: gvec(:,:)
     real(kind=DP) :: er(3), ei(3)
@@ -2760,9 +2840,12 @@ contains
     allocate(qitg_red(ibsize,nqitg))
     allocate(ylm_red(ibsize,n*n))
     if(force_mode) allocate(gvec(ibsize,3))
-    do ibl1=1, nmax_G_hyb, ibsize
-       ibl2=ibl1+ibsize-1
-       if(ibl2.gt.nmax_G_hyb) ibl2=nmax_G_hyb
+
+    ips = 1
+    ipe = nmax_G_hyb
+
+    do ibl1=ips, ipe, ibsize
+       ibl2=min(ipe,ibl1+ibsize-1)
        do iq=1,nqitg
           do i=1,ibl2-ibl1+1
 #if defined(MEMORY_SAVE_EXX) && defined(MEMORY_SAVE_MORE_EXX)
@@ -3263,13 +3346,15 @@ contains
     implicit none
     real(kind=DP), intent(out) :: force(natm,3)
 
-    integer :: ik,ib,ig,ispin,ia
+    integer :: ik,ib,ig,ispin,ia,iadd
     real(kind=DP), allocatable :: force_l(:,:) ! d(natm,3)
     real(kind=DP), allocatable :: force_mpi(:,:) ! d(natm,3)
     real(kind=DP), allocatable :: efsr_l(:,:) ! d(np_e,nlmta)
     real(kind=DP), allocatable :: efsi_l(:,:) ! d(np_e,nlmta)
     real(kind=DP), allocatable :: defsr_l(:,:,:) ! d(np_e,nlmta,3)
     real(kind=DP), allocatable :: defsi_l(:,:,:) ! d(np_e,nlmta,3)
+    real(kind=DP), allocatable, dimension(:) :: zajbuf_r,zajbuf_i
+    integer :: kgw,kgv
     integer,save  :: id_sname = -1
 
     if(modnrm /= EXECUT) then
@@ -3303,16 +3388,22 @@ contains
           do ib=1,np_e   ! MPI
              if(occup_l(ib,ik) < DELTA) cycle
              force_l = 0.d0
+             allocate(zajbuf_r(kg1));zajbuf_r(:) = zaj_l(:,ib,ik,1)
+             allocate(zajbuf_i(kg1));zajbuf_i(:) = zaj_l(:,ib,ik,kimg)
+             kgw = kg1;kgv=kg1
+
              if(kimg==1) then
-                call apply_Vx_to_WF(ispin,ib,ik,zaj_l(1,ib,ik,1),zaj_l(1,ib,ik,1) &
+                call apply_Vx_to_WF(ispin,ib,ik,kgw,kgv,zajbuf_r,zajbuf_r &
                     & ,efsr_l(ib,1:nlmta),efsr_l(ib,1:nlmta) &
                     & ,dbdwr=dfsr_l(ib,1:nlmta,ik,1:3),dbdwi=dfsr_l(ib,1:nlmta,ik,1:3),force_l=force_l)
              else
-                call apply_Vx_to_WF(ispin,ib,ik,zaj_l(1,ib,ik,1),zaj_l(1,ib,ik,2) &
+                call apply_Vx_to_WF(ispin,ib,ik,kgw,kgv,zajbuf_r,zajbuf_i &
                     & ,efsr_l(ib,1:nlmta),efsi_l(ib,1:nlmta) &
                     & ,dbdwr=dfsr_l(ib,1:nlmta,ik,1:3),dbdwi=dfsi_l(ib,1:nlmta,ik,1:3),force_l=force_l)
              end if
              force = force + occup_l(ib,ik) * force_l
+             deallocate(zajbuf_r)
+             deallocate(zajbuf_i)
           end do
        end do
     end do
diff -uprN phase0_2015.01/src_phase/m_ES_IO.F90 phase0_2015.01.01/src_phase/m_ES_IO.F90
--- phase0_2015.01/src_phase/m_ES_IO.F90	2015-09-14 15:18:05.000000000 +0900
+++ phase0_2015.01.01/src_phase/m_ES_IO.F90	2016-07-12 12:51:19.000000000 +0900
@@ -1,6 +1,6 @@
 !=======================================================================
 !
-!  PROGRAM  PHASE/0 2015.01 ($Rev: 459 $)
+!  PROGRAM  PHASE/0 2015.01 ($Rev: 487 $)
 !
 !  MODULE: m_ES_IO
 !
@@ -58,7 +58,7 @@
 
 !
 module m_ES_IO
-! $Id: m_ES_IO.F90 459 2015-09-10 08:50:04Z yamasaki $
+! $Id: m_ES_IO.F90 487 2016-05-17 05:20:42Z ktagami $
   use m_Electronic_Structure, only : zaj_l,neordr,nrvf_ordr,eko_l,occup_l,efermi,efermi_spin,totch&
        &                            ,vnlph_l,vlhxc_l,eko_ek
   use m_Electronic_Structure, only : m_ES_WF_in_Rspace
@@ -95,21 +95,37 @@ module m_ES_IO
 
   use m_Control_Parameters, only : ndim_magmom, ik_wf_squared, &
        &                           ib1_wf_squared, ib2_wf_squared, &
-       &                           wf_squared_filetype
+       &                           wf_squared_filetype,  max_projs, proj_attribute, &
+       &                           ndim_chgpot, SpinOrbit_Mode, &
+       &                           wf_orb_proj_print_format, proj_group, num_proj_elems
+  use m_Const_parameters,     only : Neglected
   use m_Files,              only :  nfwfk_sq, m_Files_open_nfwfksq_noncl, &
        &                            nfwfk_integ_mom, &
        &                            m_Files_open_nfwfk_integ_mom, &
-       &                            m_Files_close_nfwfk_integ_mom
-  use m_PseudoPotential,   only : nlmt, ilmt, lmta, q
+       &                            m_Files_close_nfwfk_integ_mom, &
+       &                            m_Files_open_nfwfk_orb_proj, &
+       &                            m_Files_close_nfwfk_orb_proj, &
+       &                            nfwfk_orb_proj
+  use m_PseudoPotential,   only : nlmt, ilmt, lmta, q, &
+       &                          nlmta_phi, nlmtt_phi, qorb, m_PP_tell_iorb_lmt, &
+       &                          m_PP_tell_iorb_ia_l_m_tau, ilmt_phi, &
+       &                          mtp_phi, lmta_phi, ltp_phi, taup_phi
+  use m_Nonlocal_Potential,   only : norm_phig
   use m_Charge_Density,    only : chgq_l, hsr, hsi, &
        &                          m_CD_softpart_ktsub_noncl, &
        &                          m_CD_hardpart_ktsub_noncl, &
        &                          m_CD_alloc_rspace_charge, &
        &                          m_CD_dealloc_rspace_charge, &
        &                          m_CD_rspace_charge_noncl
-  use m_Ionic_System,      only : ityp
-  use m_Electronic_Structure,  only : fsr_l, fsi_l
+  use m_Ionic_System,      only : ityp, iproj_group
+  use m_Electronic_Structure,  only : fsr_l, fsi_l, compr_l, compi_l
   use m_ES_Noncollinear,   only : m_ES_set_Pauli_Matrix
+  use m_SpinOrbit_Potential,  only :  MatU_ylm_RC_L0,  MatU_ylm_RC_L1,  MatU_ylm_RC_L2, &
+       &                              MatU_ylm_RC_L3
+
+! ==== EXP_CELLOPT ==== 2015/09/24
+  use m_PlaneWaveBasisSet,    only : kg1_prev
+! ===================== 2015/09/24
 
   implicit none
   include 'mpif.h'
@@ -732,13 +748,15 @@ contains
                 if(mode == EIGEN_VALUES) then
                    write(nf,'(" ik = ",i4,"  ",8f12.6)') ik, (e_mpi(nb,ik),nb=ie1,ie2)
                 else if(mode == OCCUPATIONS) then
-                   write(nf,'(" ik = ",i4,"  ",8f12.6)') ik, (o_mpi(nb,ik)/(qwgt(ik)*kv3),nb=ie1,ie2)
+                   write(nf,'(" ik = ",i4,"  ",8f12.6)') ik, &
+                        &           (o_mpi(nb,ik)/(qwgt(ik)*kv3/ndim_spinor),nb=ie1,ie2)
                 end if
              else
                 if(mode == EIGEN_VALUES) then
                    write(nf,'(12x,8f12.6)') (e_mpi(nb,ik),nb=ie1,ie2)
                 else if(mode == OCCUPATIONS) then
-                   write(nf,'(12x,8f12.6)') (o_mpi(nb,ik)/(qwgt(ik)*kv3),nb=ie1,ie2)
+                   write(nf,'(12x,8f12.6)') &
+                        &      (o_mpi(nb,ik)/(qwgt(ik)*kv3/ndim_spinor),nb=ie1,ie2)
                 end if
              end if
           end do
@@ -1066,6 +1084,117 @@ contains
                                                   __TIMER_SUB_STOP(1372)
   end subroutine m_ESIO_rd_WFs
 
+! ==== EXP_CELLOPT ==== 2015/09/24
+  subroutine m_ESIO_import_WFs_prev_cell(nfout,nfzaj, F_ZAJ_partitioned)
+    integer, intent(in) :: nfout, nfzaj
+    logical, intent(in) :: F_ZAJ_partitioned
+    integer    :: ik,ib,ri, i
+    integer    :: id_sname = -1
+    integer    :: ierror
+
+    call tstatc0_begin('m_ESIO_import_WFs_prev_cell ',id_sname)
+
+    if(precision_WFfile==SP) then
+       if(ipri >= 1) write(nfout,*) ' !D Reading zaj (single_precision)'
+    else
+       if(ipri >= 1) write(nfout,*) ' !D Reading zaj (double_precision)'
+    end if
+    if(precision_WFfile==SP) then
+       allocate(wf_l(kg1_prev,kimg)); wf_l = 0.d0
+    else
+       allocate(wfdp_l(kg1_prev,kimg)); wfdp_l = 0.d0
+    end if
+    rewind nfzaj
+
+    zaj_l = 0.0d0
+
+    if(F_ZAJ_partitioned) then
+       do ik = ista_k, iend_k, af+1        ! MPI
+
+          do ib = ista_e, iend_e, istep_e  ! MPI
+             if(ib > neg_previous) cycle
+
+             if(precision_WFfile==SP) then
+                read(nfzaj) wf_l
+
+                if(kimg == 1) then
+                   do i = 1, min( kg1, kg1_prev )
+                      zaj_l(i,map_z(ib),ik,1) = wf_l(i,1)
+                   end do
+                else if(kimg==2) then
+                   do i = 1, min( kg1, kg1_prev )
+                      zaj_l(i,map_z(ib),ik,1) = wf_l(i,1)
+                      zaj_l(i,map_z(ib),ik,2) = wf_l(i,2)
+                   end do
+                end if
+             else if(precision_WFfile==DP) then
+                read(nfzaj) wfdp_l
+
+                if(kimg == 1) then
+                   do i = 1, min( kg1, kg1_prev )
+                      zaj_l(i,map_z(ib),ik,1) = wfdp_l(i,1)
+                   end do
+                else if(kimg==2) then
+                   do i = 1, min( kg1, kg1_prev )
+                      zaj_l(i,map_z(ib),ik,1) = wfdp_l(i,1)
+                      zaj_l(i,map_z(ib),ik,2) = wfdp_l(i,2)
+                   end do
+                end if
+
+             end if
+          end do
+       end do
+    else
+
+       do ik = 1, kv3, af+1
+          do ib = 1, neg_previous
+             ! -----------------
+             if(precision_WFfile==SP) then
+                if(mype == 0) read(nfzaj, end = 9999, err = 9999) wf_l  
+                if(mype == 0 .and. map_ek(ib,ik) /= 0) then 
+                   call mpi_send(wf_l,kg1_prev*kimg,mpi_real,map_ek(ib,ik),1,mpi_comm_group,ierr) ! MPI
+                else if(map_ek(ib,ik) == mype .and. map_ek(ib,ik) /= 0) then
+                   call mpi_recv(wf_l,kg1_prev*kimg,mpi_real,0,1,mpi_comm_group,istatus,ierr)     ! MPI
+                end if
+                if(map_ek(ib,ik) == mype) then              ! MPI
+                   do ri = 1, kimg
+                      do i = 1, min( kg1, kg1_prev )
+                         zaj_l(i,map_z(ib),ik,ri) = wf_l(i,ri)  ! MPI
+                      end do
+                   end do
+                end if
+
+                ! -----------------
+             else if(precision_WFfile==DP) then
+                if(mype == 0) read(nfzaj, end = 9999, err = 9999) wfdp_l
+                if(mype == 0 .and. map_ek(ib,ik) /= 0) then
+                   call mpi_send(wfdp_l,kg1_prev*kimg,mpi_double_precision,map_ek(ib,ik),1,mpi_comm_group,ierr) ! MPI
+                else if(map_ek(ib,ik) == mype .and. map_ek(ib,ik) /= 0) then
+                   call mpi_recv(wfdp_l,kg1_prev*kimg,mpi_double_precision,0,1,mpi_comm_group,istatus,ierr)     ! MPI
+                end if
+                if(map_ek(ib,ik) == mype) then              ! MPI
+                   do i = 1, min( kg1, kg1_prev )
+                      zaj_l(i,map_z(ib),ik,ri) = wfdp_l(i,ri)  ! MPI
+                   end do
+                end if
+             endif
+             ! -----------------
+          end do
+       end do
+    end if
+!
+    if(precision_WFfile==SP) then
+       deallocate(wf_l)
+    else if(precision_WFfile==DP) then
+       deallocate(wfdp_l)
+    end if
+    call tstatc0_end(id_sname)
+    return
+9999 continue
+    ierror = EOF_REACHED
+    call phase_error_wo_filename(ierror, nfout, nfzaj, __LINE__, __FILE__)
+  end subroutine m_ESIO_import_WFs_prev_cell
+! ===================== 2015/09/24
 
 ! ==================================== added by K. Tagami =============== 11.0
   subroutine m_ESIO_rd_WFs_import_frm_collin(nfout,nfzaj, F_ZAJ_partitioned)
@@ -2686,5 +2815,269 @@ contains
 
   end subroutine m_ESIO_wd_Wfn_integ_magmom
 
+  subroutine m_ESIO_wd_Wfn_orb_proj
+    integer :: neg_t
+    real(kind=DP), allocatable :: compr(:,:,:,:), compi(:,:,:,:), norm_phig_mpi(:,:)
+    
+    if ( ekmode == OFF ) return
+    
+    allocate(compr(neg,nlmta_phi,1,kv3));  compr = 0.d0
+    allocate(compi(neg,nlmta_phi,1,kv3));  compi = 0.d0
+    
+    if(.not.allocated(norm_phig_mpi)) allocate(norm_phig_mpi(nlmtt_phi,kv3/nspin))
+    norm_phig_mpi=0.d0
+
+    call set_array_compri_etc
+
+    if ( mype == 0 ) then
+       if ( nk_in_the_process == 1 ) then
+          call m_Files_open_nfwfk_orb_proj(2)
+       else
+          call m_Files_open_nfwfk_orb_proj(3)
+       endif
+    endif
+
+    neg_t = neg -num_extra_bands
+
+    if ( SpinOrbit_Mode /= Neglected .and. wf_orb_proj_print_format == 1 ) then
+       call case_with_j
+    else
+       call case_ordinal    ! EXPERIMENTAL 
+    endif
+    if ( mype == 0 ) call m_Files_close_nfwfk_orb_proj
+
+    deallocate( compr ); deallocate( compi ); deallocate( norm_phig_mpi )
+    
+  contains
+    
+    subroutine set_array_compri_etc
+      integer :: ik, ie, ib, iksnl
+      integer :: iorb, lmt
+      integer :: ia, il, im, tau, is
+      real(kind=DP), allocatable :: compr_mpi(:,:,:,:), compi_mpi(:,:,:,:), &
+           &                        norm_phig_mpi2(:,:)
+      real(kind=DP), allocatable :: porb(:)
+
+      do ik = 1, kv3
+         if(map_k(ik) /= myrank_k) cycle
+         iksnl = (ik-1)/nspin + 1
+         
+         do ie = ista_e, iend_e, istep_e
+            ib = map_z(ie)
+            compr(ie,1:nlmta_phi,1,ik) = compr_l(ib,1:nlmta_phi,1,ik)
+            compi(ie,1:nlmta_phi,1,ik) = compi_l(ib,1:nlmta_phi,1,ik)
+         end do
+         norm_phig_mpi(1:nlmtt_phi,iksnl)  = norm_phig(1:nlmtt_phi,iksnl)
+      end do
+      
+      if ( npes >1 ) then
+         allocate( compr_mpi( neg, nlmta_phi, 1, kv3 ) ); compr_mpi = 0.0d0
+         allocate( compi_mpi( neg, nlmta_phi, 1, kv3 ) ); compi_mpi = 0.0d0
+         allocate( norm_phig_mpi2( nlmtt_phi, kv3/nspin ) )
+         call mpi_allreduce( compr, compr_mpi, neg*nlmta_phi*1*kv3, &
+              &              mpi_double_precision, mpi_sum, mpi_comm_group, ierr )
+         call mpi_allreduce( compi, compi_mpi, neg*nlmta_phi*1*kv3, &
+              &              mpi_double_precision, mpi_sum, mpi_comm_group, ierr )
+         call mpi_allreduce( norm_phig_mpi, norm_phig_mpi2, nlmtt_phi*kv3/nspin, &
+              &              mpi_double_precision, mpi_sum, mpi_comm_group, ierr )
+         compr = compr_mpi;   compi = compi_mpi
+
+         norm_phig_mpi = norm_phig_mpi2 /dble(nrank_e)
+
+         deallocate( compr_mpi ); deallocate( compi_mpi ); 
+         deallocate( norm_phig_mpi2 )
+      end if
+    end subroutine set_array_compri_etc
+
+    subroutine case_ordinal
+      integer :: ik, iksnl, iorb, ia, is, ib
+      integer :: il, im, tau, lmt
+      real(kind=DP), allocatable :: porb(:)
+
+      if ( mype /= 0 ) return
+
+      allocate( porb(neg) )
+
+      do ik = 1, kv3, ndim_spinor
+         iksnl = (ik-1)/nspin + 1
+         
+         write(nfwfk_orb_proj,'(A,I5,3F20.15)') 'ik = ', &
+              &            ik +nk_in_the_process -1,  vkxyz(ik,1:3,BUCS)
+         
+         do iorb = 1,nlmta_phi
+            call m_PP_tell_iorb_ia_l_m_tau(iorb,ia,il,im,tau)
+            if ( iproj_group(ia) == 0) cycle
+
+            write(nfwfk_orb_proj,'(I5,3I3,A)') ia, il-1, im, tau, ' : ia, l, m, tau'
+            
+            call m_PP_tell_iorb_lmt(iorb,lmt)
+            
+            porb = 0.0d0
+            Do is=1, ndim_spinor
+               do ib = 1, neg
+                  porb(ib) = porb(ib) &
+                       & + ( compr(ib,iorb,1,ik+is-1)**2 &
+                       &    +compi(ib,iorb,1,ik+is-1)**2 ) &
+                       &     *( 1.d0+qorb(iorb)/norm_phig_mpi(lmt,iksnl) )
+               end do
+            End Do
+            write(nfwfk_orb_proj,'(4F18.10)') ( porb( neordr(ib,ik) ), ib=1, neg_t )
+         end do
+      end do
+      deallocate( porb )
+
+    end subroutine case_ordinal
+    
+    subroutine case_with_j
+      integer :: ik, iksnl, ia, ig, ii, it, ilp, ll, tau, ip
+      integer :: iorb, lmt1, ib, m1
+      real(kind=DP) :: c1, c2
+      complex(kind=CMPLDP) :: z1, z2
+
+      real(kind=DP), allocatable :: porb(:)
+      complex(kind=CMPLDP), allocatable :: zcomp(:,:,:)
+
+! ----------------
+      if ( mype /= 0 ) return
+
+      allocate( porb(neg) )
+
+      Do ik=1, kv3, ndim_spinor
+         write(nfwfk_orb_proj,'(A,I5,3F20.15)') 'ik = ', &
+              &            ik +nk_in_the_process -1,  vkxyz(ik,1:3,BUCS)
+
+         iksnl = ( ik -1 )/nspin +1
+
+         Do ia=1, natm
+            ig = iproj_group(ia)
+            if ( ig == 0 ) cycle
+
+            do ii=1,num_proj_elems(ig)
+               ip = proj_group( ii, ig )
+               it = proj_attribute(ip)%ityp
+               ilp = proj_attribute(ip)%l +1
+               ll = proj_attribute(ip)%l
+               tau = proj_attribute(ip)%t
+!
+               allocate( zcomp( -ll:ll, neg, ndim_spinor ) ); zcomp = 0.0d0
+               call tranform_compri_r2c_sph( ia, it, ll, tau, ik, &
+                    &                        compr, compi, zcomp )
+
+               if ( ll == 0 ) then
+                  call find_iorb_from_lmt( ia, it, ll, 1, tau, iorb )
+                  call m_PP_tell_iorb_lmt( iorb, lmt1 )
+
+                  write(nfwfk_orb_proj,'(I5,F7.2,I3,F7.2,I3,A)') &
+                       &         ia, ll+0.5d0, ll, 0.5d0, tau, ' : ia, j, l, mj, tau'
+                  Do ib=1, neg
+                     z1 = cmplx( compr(ib,iorb,1,ik),   compi(ib,iorb,1,ik) )
+                     z2 = cmplx( compr(ib,iorb,1,ik+1), compi(ib,iorb,1,ik+1) )
+                     porb(ib) = ( z1*conjg(z1) +z2*conjg(z2) ) &
+                          &     *( 1.d0+qorb(iorb)/norm_phig_mpi( lmt1, iksnl ) )
+                  End Do
+                  write(nfwfk_orb_proj,'(4F18.10)') &
+                       &               ( porb( neordr(ib,ik) ), ib=1, neg_t )
+               else
+! j_up
+                  Do m1=-ll -1, ll
+                     c1 = dble( ll +m1 + 1 ) / dble( 2 *ll +1 )
+                     c2 = dble( ll -m1 )     / dble( 2 *ll +1 )
+                     c1 = sqrt(c1);  c2 = sqrt(c2)
+
+                     call find_iorb_from_lmt( ia, it, ll, 1, tau, iorb )
+                     call m_PP_tell_iorb_lmt( iorb, lmt1 )
+
+                     write(nfwfk_orb_proj,'(I5,F7.2,I3,F7.2,I3,A)') &
+                          &         ia, ll+0.5d0, ll, m1+0.5d0, tau, &
+                          &             ' : ia, j, l, mj, tau'
+                     Do ib=1, neg
+                        z1 = 0.0d0
+                        if ( m1 > -ll -1 ) z1 = z1 +c1 *zcomp( m1,    ib, 1 )
+                        if ( m1 < ll     ) z1 = z1 +c2 *zcomp( m1 +1, ib, 2 )
+                        porb(ib) = z1 *conjg(z1) &
+                             &     *( 1.d0+qorb(iorb)/norm_phig_mpi(lmt1,iksnl) )
+                     End Do
+                     write(nfwfk_orb_proj,'(4F18.10)') &
+                          &         ( porb( neordr(ib,ik) ), ib=1, neg_t )
+                  End Do
+! j_down
+                  Do m1=-ll+1, ll
+                     c1 = dble( ll -m1 + 1 ) / dble( 2 *ll +1 )
+                     c2 = dble( ll +m1 )     / dble( 2 *ll +1 )
+                     c1 = sqrt(c1);  c2 = -sqrt(c2)
+
+                     call find_iorb_from_lmt( ia, it, ll, 1, tau, iorb )
+                     call m_PP_tell_iorb_lmt( iorb, lmt1 )
+
+                     write(nfwfk_orb_proj,'(I5,F7.2,I3,F7.2,I3,A)') &
+                          &         ia, ll-0.5d0, ll, m1-0.5d0, tau, &
+                          &             ' : ia, j, l, mj, tau'
+                     Do ib=1, neg
+                        z1 = c1 *zcomp( m1-1, ib, 1 ) +c2 *zcomp( m1, ib, 2 )
+                        porb(ib) = z1 *conjg(z1) &
+                             &     *( 1.d0+qorb(iorb)/norm_phig_mpi(lmt1,iksnl) )
+                     End Do
+                     write(nfwfk_orb_proj,'(4F18.10)') &
+                          &         ( porb( neordr(ib,ik) ), ib=1, neg_t )
+                  End Do
+               end if
+               deallocate( zcomp )
+
+            End Do
+         End Do
+
+      End Do
+      deallocate( porb )
+
+    end subroutine case_with_j
+
+    subroutine tranform_compri_r2c_sph( ia, it, ll, tau, ik, &
+         &                              compr, compi, zcomp )
+      integer, intent(in) :: ia, it, ll, tau, ik
+      real(kind=DP), intent(in) :: compr( neg, nlmta_phi, 1, kv3 )
+      real(kind=DP), intent(in) :: compi( neg, nlmta_phi, 1, kv3 )
+      complex(kind=CMPLDP), intent(out) :: zcomp( -ll:ll, neg, ndim_spinor )
+
+      integer :: m1, m2, ib, iorb
+      complex(kind=CMPLDP) :: z1
+      complex(kind=CMPLDP) :: ztmp( ndim_spinor )
+
+      Do m1=-ll, ll
+         Do m2=1, 2*ll +1
+            call find_iorb_from_lmt( ia, it, ll, m2, tau, iorb )
+            Do ib=1, neg
+               ztmp(1) = cmplx( compr(ib,iorb,1,ik),   compi(ib,iorb,1,ik)   )
+               ztmp(2) = cmplx( compr(ib,iorb,1,ik+1), compi(ib,iorb,1,ik+1) )
+
+               if ( ll == 0 ) z1 = MatU_ylm_RC_L0( m2, m1 )
+               if ( ll == 1 ) z1 = MatU_ylm_RC_L1( m2, m1 )
+               if ( ll == 2 ) z1 = MatU_ylm_RC_L2( m2, m1 )
+               if ( ll == 3 ) z1 = MatU_ylm_RC_L3( m2, m1 )
+
+               zcomp(m1,ib,:) = zcomp(m1,ib,:) +z1 *ztmp(:)
+            End Do
+         End Do
+      End Do
+    end subroutine tranform_compri_r2c_sph
+
+    subroutine find_iorb_from_lmt( ia, it, ll, mm, tau, iorb )
+      integer, intent(in) :: ia, it, ll, mm, tau
+      integer, intent(out) :: iorb
+
+      integer :: lmt1, l1, m1, t1
+
+      iorb = 0
+      Do lmt1=1, ilmt_phi(it)
+         l1 = ltp_phi(lmt1,it); m1 = mtp_phi(lmt1,it);  t1 = taup_phi(lmt1,it)
+         if ( l1 == ll +1 .and. m1 == mm .and. t1 == tau ) then
+            exit
+         endif
+      ENd Do
+      iorb = lmta_phi( lmt1,ia )
+
+    end subroutine find_iorb_from_lmt
+
+  end subroutine m_ESIO_wd_Wfn_orb_proj
+
 
 end module m_ES_IO
diff -uprN phase0_2015.01/src_phase/m_ES_LHXC.F90 phase0_2015.01.01/src_phase/m_ES_LHXC.F90
--- phase0_2015.01/src_phase/m_ES_LHXC.F90	2015-08-05 14:45:21.000000000 +0900
+++ phase0_2015.01.01/src_phase/m_ES_LHXC.F90	2016-07-12 12:51:19.000000000 +0900
@@ -1,6 +1,6 @@
 !=======================================================================
 !
-!  PROGRAM  PHASE/0 2015.01 ($Rev: 440 $)
+!  PROGRAM  PHASE/0 2015.01 ($Rev: 494 $)
 !
 !  MODULE: m_ES_LHXC
 !
@@ -47,7 +47,7 @@
 #endif
 
 module m_ES_LHXC
-! $Id: m_ES_LHXC.F90 440 2015-08-03 07:18:01Z ktagami $
+! $Id: m_ES_LHXC.F90 494 2016-06-02 00:54:16Z jkoga $
   use m_Electronic_Structure, only : vlhxc_l, vloc_esm
   use m_PlaneWaveBasisSet,    only : kg,kgp,gr_l
   use m_PseudoPotential,      only : psc_l, ival
@@ -78,7 +78,13 @@ module m_ES_LHXC
   use m_FFT,                  only : fft_box_size_CD
   use m_PlaneWaveBasisSet,    only : igfp_l
 
-implicit none
+! === POSITRON SCF === 2015/11/28
+  use m_Control_Parameters,   only : sw_positron, positron_method
+  use m_Const_Parameters,     only : positron_GGGC
+  use m_Positron_Wave_Functions,        only :  pchg_l
+! ==================== 2015/11/28
+
+  implicit none
   include 'mpif.h'
 !  61. m_ESlhxc_potential
 contains
@@ -93,7 +99,7 @@ contains
     integer :: ist,ip
     integer :: ig
     integer :: nfftcd,ierr
-    integer :: id_sname = -1
+    integer :: id_sname = -1,id_sname2=-1
 
     call tstatc0_begin('m_ESlhxc_potential ',id_sname,1)
     vlhxc_l = 0.d0
@@ -103,17 +109,23 @@ contains
     if(sw_esm==ON) then
        nfftcd = fft_box_size_CD(1,0)*fft_box_size_CD(2,0)*fft_box_size_CD(3,0)
        allocate(vhar(nfftcd));vhar=(0.d0,0.d0)
-       allocate(chgc(iend_kngp-ista_kngp+1,nspin));chgc=(0.d0,0.d0)
+!       allocate(chgc(iend_kngp-ista_kngp+1,nspin));chgc=(0.d0,0.d0)
+       allocate(chgc(kgp,nspin));chgc=(0.d0,0.d0)
        if(kimg==1)then
           do ig=ista_kngp,iend_kngp
-             chgc(ig-ista_kngp+1,1:nspin) = dcmplx(chg(ig,1,1:nspin),0.d0)
+!             chgc(ig-ista_kngp+1,1:nspin) = dcmplx(chg(ig,1,1:nspin),0.d0)
+             chgc(ig,1:nspin) = dcmplx(chg(ig,1,1:nspin),0.d0)
           enddo
        else
           do ig=ista_kngp,iend_kngp
-             chgc(ig-ista_kngp+1,1:nspin) = dcmplx(chg(ig,1,1:nspin),chg(ig,2,1:nspin))
+!             chgc(ig-ista_kngp+1,1:nspin) = dcmplx(chg(ig,1,1:nspin),chg(ig,2,1:nspin))
+             chgc(ig,1:nspin) = dcmplx(chg(ig,1,1:nspin),chg(ig,2,1:nspin))
           enddo
        endif
+       call mpi_allreduce(mpi_in_place,chgc,kgp*nspin,mpi_double_complex,mpi_sum,mpi_comm_group,ierr)
+       call tstatc0_begin('esm_hartree ',id_sname2,1)
        call esm_hartree(chgc,ehar,vhar)
+       call tstatc0_end(id_sname2)
        vhar(:) = 0.5d0*vhar(:) !Ry -> Ha
        deallocate(chgc)
     endif
@@ -213,6 +225,25 @@ contains
           end do
        end do
     end if
+
+! === POSITRON SCF === 2015/11/28
+    if ( sw_positron /= OFF ) then
+       if ( positron_method == Positron_GGGC ) then
+          if ( nspin == 2 ) stop "UUU"
+          do is = 1, nspin
+             do ik = 1, kimg
+                if(nspin == 1) then
+                   do i = ist, iend_kngp  !for mpi
+                      vlhxc_l(i,ik,is) = vlhxc_l(i,ik,is) &
+                           &             -PAI4*pchg_l(i,ik)/gr_l(i)**2
+                   end do
+                end if
+             end do
+          end do
+       end if
+    endif
+! ============== 2015/11/28
+
     if(iprivlhxcq >= 2) call m_ESlhxc_wd_vlhxc(nfout)
     call tstatc0_end(id_sname)
   end subroutine m_ESlhxc_potential
diff -uprN phase0_2015.01/src_phase/m_ES_Mag_Constraint.f90 phase0_2015.01.01/src_phase/m_ES_Mag_Constraint.f90
--- phase0_2015.01/src_phase/m_ES_Mag_Constraint.f90	2015-08-05 14:45:21.000000000 +0900
+++ phase0_2015.01.01/src_phase/m_ES_Mag_Constraint.f90	2016-07-12 12:51:19.000000000 +0900
@@ -1,5 +1,5 @@
 module m_ES_Mag_Constraint
-! $Id: m_ES_Mag_Constraint.f90 409 2014-10-27 09:24:52Z jkoga $
+! $Id: m_ES_Mag_Constraint.f90 469 2015-09-30 03:06:13Z ktagami $
 
   use m_Parallelization,      only : ista_kngp, iend_kngp, ierr, npes, mype, &
        &                             mpi_comm_group
@@ -242,8 +242,152 @@ contains
 
     end subroutine case_constraint_moment_local
 
-    subroutine case_constraint_direc_local            ! only for noncol
-      stop "Not supported"
+    subroutine case_constraint_direc_local        
+      real(kind=DP) :: MagMom(3), cfactor(3), MagDirec(3)
+      real(kind=DP) :: rad1, fac1r, fac1i, fac2
+      real(kind=DP) :: VecG(3), normG, normG3, gr, d1
+      real(kind=DP) :: c1, c2, c3, cnorm, cnorm2,  cnorm4
+
+      real(kind=DP), parameter :: cnorm_lower_limit = 1.0D-10
+!      real(kind=DP), parameter :: cnorm_lower_limit = 1.0D-4
+
+      real(kind=DP), allocatable :: zfcos(:), zfsin(:)
+      real(kind=DP), allocatable :: RhoMag_on_atom_mpi(:,:)
+
+      integer :: i, j, ia, it, is, ixyz, ixyz_max, ist
+
+      allocate(zfcos(ista_kngp:iend_kngp)); zfcos = 0.d0
+      allocate(zfsin(ista_kngp:iend_kngp)); zfsin = 0.d0
+
+      if ( noncol ) then
+         ixyz_max = 3
+      else
+         ixyz_max = 1
+      endif
+
+      if ( allocated( MagField_constrain_local ) ) then
+         deallocate( MagField_constrain_local )
+      endif
+
+      allocate( MagField_constrain_local(ista_kngp:iend_kngp, kimg, ixyz_max ))
+      MagField_constrain_local = 0.0d0
+!
+      Do ia=1, natm
+         it = ityp(ia)
+         rad1 = rad_cov(ia)
+
+         MagMom = 0.0d0
+         Do ixyz=1, ixyz_max
+            if ( noncol ) then
+               MagMom( ixyz ) = RhoMag_on_atom( ia, ixyz +1 )
+            else
+               MagMom( ixyz ) = RhoMag_on_atom( ia, 1 ) -RhoMag_on_atom( ia, 2 )
+            endif
+         End do
+
+         cnorm2 = 0.0d0
+         Do ixyz=1, ixyz_max
+            cnorm2 = cnorm2 + MagMom(ixyz)**2
+         End do
+         cnorm = sqrt( cnorm2 )
+
+         if ( cnorm < cnorm_lower_limit ) cycle
+
+         MagDirec = MagMom / cnorm
+!
+
+         cnorm4 = 0.0d0
+         Do ixyz=1, ixyz_max
+            cnorm4 = cnorm4 +mag_moment0_atomtyp(it,ixyz)**2
+         End do
+         cnorm4 = sqrt( cnorm4 )
+!
+         Do ixyz=1, ixyz_max
+            if ( noncol ) then
+               c1 = cnorm2 - MagMom(ixyz)**2;    c2 = cnorm**3
+               if ( cnorm4 > 0.0 ) then
+                  c3 = mag_constraint_lambda /univol *c1 /c2 &
+                       &         *( MagDirec(ixyz) -mag_direction0_atomtyp(it,ixyz) )
+               else
+                  c3 = 0.0d0
+               endif
+               cfactor( ixyz ) = c3
+            else
+               if ( cnorm4 > 0.0 ) then
+                  if ( mag_moment0_atomtyp(it,ixyz) > 0.0 ) then
+                     c3 = 1.0d0
+                  else
+                     c3 = -1.0d0
+                  endif
+                  cfactor( ixyz ) = mag_constraint_lambda /univol &
+                       &               *( MagDirec(ixyz) -c3 )
+               else
+                  cfactor( ixyz ) = 0.0d0
+               endif
+            endif
+
+!            write(2000+mype,*) "ia cfac = ", ia, cfactor(ixyz), MagDirec(1), c3
+!            write(2100+mype,*) "ia  = ", ia, MagMom(ixyz), cnorm, cnorm2
+
+            call calc_phase2( natm, pos, ia, kgp, ngabc, ista_kngp, iend_kngp, &
+                 &            zfcos, zfsin )
+
+            if ( ista_kngp == 1 ) then
+               ist = 2
+            else
+               ist = ista_kngp
+            endif
+
+            Do i=ist, iend_kngp
+               VecG(1) = rltv(1,1)*ngabc(i,1) +rltv(1,2)*ngabc(i,2) +rltv(1,3)*ngabc(i,3)
+               VecG(2) = rltv(2,1)*ngabc(i,1) +rltv(2,2)*ngabc(i,2) +rltv(2,3)*ngabc(i,3)
+               VecG(3) = rltv(3,1)*ngabc(i,1) +rltv(3,2)*ngabc(i,2) +rltv(3,3)*ngabc(i,3)
+            
+               normG = sqrt( VecG(1)**2 +VecG(2)**2 +VecG(3)**2 )
+               normG3 = normG**3
+               
+               fac1r = zfcos(i); fac1i = zfsin(i)
+!
+               gr = normG *rad1
+               fac2 = -gr *cos(gr) + sin(gr)
+               fac2 = fac2 *PAI4 /normG3
+!
+               MagField_constrain_local( i, 1, ixyz ) &
+                    & = MagField_constrain_local( i, 1, ixyz ) &
+                    &   +fac1r *fac2 *cfactor( ixyz )
+               if ( kimg == 2 ) then
+                  MagField_constrain_local( i, 2, ixyz ) &
+                       & = MagField_constrain_local( i, 2, ixyz ) &
+                       &  -fac1i *fac2 *cfactor( ixyz )
+               endif
+            End Do
+
+            if ( mype == 0 ) then
+               fac2 = PAI4 /3.0d0 *rad1**3
+
+               MagField_constrain_local( 1, 1, ixyz ) &
+                    & = MagField_constrain_local( 1, 1, ixyz ) &
+                    &   +fac2 *cfactor( ixyz )
+            end if
+         End Do
+      End Do
+!
+      if ( noncol ) then
+         Do i=ista_kngp, iend_kngp
+            Do is=2, ndim_magmom
+               vlhxc_l(i,1,is) = vlhxc_l(i,1,is) +MagField_constrain_local(i,1,is-1)
+               vlhxc_l(i,2,is) = vlhxc_l(i,2,is) +MagField_constrain_local(i,2,is-1)
+            End do
+         End Do
+      else
+         Do i=ista_kngp, iend_kngp
+            Do j=1, kimg
+               vlhxc_l(i,j,1) = vlhxc_l(i,j,1) +MagField_constrain_local(i,j,1)
+               vlhxc_l(i,j,2) = vlhxc_l(i,j,2) -MagField_constrain_local(i,j,1)
+            End Do
+         End Do
+      endif
+
     end subroutine case_constraint_direc_local
 
   end subroutine m_ES_add_MagConstraintPot_chgql
@@ -518,11 +662,100 @@ contains
     end subroutine case_constraint_direc_global
 
     subroutine case_constraint_direc_local
-      stop "kt: Not supported"
+      real(kind=DP) :: MagMom(3), MagDirec(3), c1, c2, c3
+      integer :: i, j, ia, it, is, ixyz
+
+      real(kind=DP) :: cnorm, cnorm2, cnorm4
+      real(kind=DP) :: csum1, csum2
+
+      real(kind=DP), parameter :: cnorm_lower_limit = 1.0D-10
+
+      csum1 = 0.0d0; csum2 = 0.0d0
+
+      if ( noncol ) then
+         Do i=ista_kngp, iend_kngp
+            Do j=1, kimg
+               Do ixyz=1, 3
+                  csum1 = csum1 + MagField_constrain_local(i,j,ixyz)*chgq_l(i,j,ixyz+1)
+               End Do
+            End Do
+         End Do
+
+         Do ia=1, natm
+            it = ityp(ia)
+            Do ixyz=1, 3
+               MagMom(ixyz) = RhoMag_on_Atom( ia,ixyz+1 )
+            End do
+
+            cnorm2 = 0.0d0
+            Do ixyz=1, 3
+               cnorm2 = cnorm2 + MagMom(ixyz)**2
+            End do
+            cnorm = sqrt( cnorm2 )
+
+            cnorm4 = 0.0d0
+            Do ixyz=1, 3
+               cnorm4 = cnorm4 +mag_moment0_atomtyp(it,ixyz)**2
+            End do
+            cnorm4 = sqrt( cnorm4 )
+
+            if ( cnorm4 > 0.0 .and. cnorm > cnorm_lower_limit ) then
+               MagDirec = MagMom / cnorm
+               Do ixyz=1, 3
+                  csum2 = csum2 + ( MagDirec(ixyz) -mag_direction0_atomtyp(it,ixyz) )**2
+               End Do
+            endif
+         End do
+
+      else
+         Do i=ista_kngp, iend_kngp
+            Do j=1, kimg
+               csum1 = csum1 + MagField_constrain_local(i,j,1) &
+                    &         *( chgq_l(i,j,1) -chgq_l(i,j,2) )
+            End Do
+         End Do
+
+         Do ia=1, natm
+            it = ityp(ia)
+            MagMom(1) = RhoMag_on_Atom( ia,1 ) -RhoMag_on_Atom( ia,2 )
+
+            cnorm2 = 0.0d0
+            Do ixyz=1, 1
+               cnorm2 = cnorm2 + MagMom(ixyz)**2
+            End do
+            cnorm = sqrt( cnorm2 )
+
+            cnorm4 = 0.0d0
+            Do ixyz=1, 1
+               cnorm4 = cnorm4 +mag_moment0_atomtyp(it,ixyz)**2
+            End do
+            cnorm4 = sqrt( cnorm4 )
+
+            if ( cnorm4 > 0.0 .and. cnorm > cnorm_lower_limit ) then
+               MagDirec = MagMom / cnorm
+               if ( mag_moment0_atomtyp(it,1) > 0.0 ) then
+                  c3 = 1.0d0
+               else
+                  c3 = -1.0d0
+               endif
+               csum2 = csum2 +( MagDirec(1) -c3 )**2
+            endif
+         End do
+      endif
+
+      if ( npes > 1 ) then
+         call mpi_allreduce( csum1, c1, 1, mpi_double_precision, &
+              &              mpi_sum, mpi_comm_group, ierr )
+         csum1 = c1
+      endif
+
+      ene_double_counting = csum1 *univol
+      ene_mag_constraint = csum2 *mag_constraint_lambda/ 2.0d0
+
     end subroutine case_constraint_direc_local
 
     subroutine case_constraint_moment_local
-      real(kind=DP) :: MagMom(3), MagDirec(3), c1, c2
+      real(kind=DP) :: MagMom(3), c1, c2
       integer :: i, j, ia, it, is, ixyz
 
       real(kind=DP) :: csum1, csum2
@@ -542,8 +775,8 @@ contains
             it = ityp(ia)
             Do ixyz=1, 3
                MagMom(ixyz) = RhoMag_on_Atom( ia,ixyz+1 )
+               csum2 = csum2 + ( MagMom(ixyz) -mag_moment0_atomtyp(it,ixyz) )**2
             End do
-            csum2 = csum2 + ( MagMom(ixyz) -mag_moment0_atomtyp(it,ixyz) )**2
          End do
 
       else
@@ -557,7 +790,7 @@ contains
          Do ia=1, natm
             it = ityp(ia)
             MagMom(1) = RhoMag_on_Atom( ia,1 ) -RhoMag_on_Atom( ia,2 )
-            csum2 = ( MagMom(1) -mag_moment0_atomtyp(it,1) )**2
+            csum2 = csum2 +( MagMom(1) -mag_moment0_atomtyp(it,1) )**2
          End do
       endif
 
diff -uprN phase0_2015.01/src_phase/m_ES_NonCollinear.f90 phase0_2015.01.01/src_phase/m_ES_NonCollinear.f90
--- phase0_2015.01/src_phase/m_ES_NonCollinear.f90	2015-08-05 14:45:21.000000000 +0900
+++ phase0_2015.01.01/src_phase/m_ES_NonCollinear.f90	2016-07-12 12:51:19.000000000 +0900
@@ -1,5 +1,5 @@
 module m_ES_NonCollinear
-! $Id: m_ES_NonCollinear.f90 409 2014-10-27 09:24:52Z jkoga $
+! $Id: m_ES_NonCollinear.f90 487 2016-05-17 05:20:42Z ktagami $
   use m_Const_Parameters,   only : DP, CMPLDP, zi, yes, PAI, &
        &                            BuiltIn, ByProjector, ByPawPot, ZeffApprox, &
        &                           BUCS, CARTS, CRDTYP, DELTA10, ReadFromPP
diff -uprN phase0_2015.01/src_phase/m_ES_WF_by_RMM.F90 phase0_2015.01.01/src_phase/m_ES_WF_by_RMM.F90
--- phase0_2015.01/src_phase/m_ES_WF_by_RMM.F90	2015-08-05 14:45:21.000000000 +0900
+++ phase0_2015.01.01/src_phase/m_ES_WF_by_RMM.F90	2016-07-12 12:51:19.000000000 +0900
@@ -3300,10 +3300,12 @@ contains
             end if
          end do
 
-         do iap = 1, n_ialist0
-            ia = n_ialist0*(lmt1-1) + iap
-            bWr_lmt(ia,:) = bWr_tmp(iap,:)
-            bWi_lmt(ia,:) = bWi_tmp(iap,:)
+         do ib = 1, np_e
+            do iap = 1, n_ialist0
+               ia = n_ialist0*(lmt1-1) + iap
+               bWr_lmt(ia,ib) = bWr_tmp(iap,ib)
+               bWi_lmt(ia,ib) = bWi_tmp(iap,ib)
+            end do
          end do
 
          do iap = 1, n_ialist0
@@ -4428,10 +4430,12 @@ contains
             end if
          end do
 
-         do iap = 1, n_ialist0
-            ia = n_ialist0*(lmt1-1) + iap
-            bWr_lmt(ia,:) = bWr_tmp(iap,:)
-            bWi_lmt(ia,:) = bWi_tmp(iap,:)
+         do ib = 1, np_e
+            do iap = 1, n_ialist0
+               ia = n_ialist0*(lmt1-1) + iap
+               bWr_lmt(ia,ib) = bWr_tmp(iap,ib)
+               bWi_lmt(ia,ib) = bWi_tmp(iap,ib)
+            end do
          end do
 
          do iap = 1, n_ialist0
diff -uprN phase0_2015.01/src_phase/m_ES_WF_by_submat.F90 phase0_2015.01.01/src_phase/m_ES_WF_by_submat.F90
--- phase0_2015.01/src_phase/m_ES_WF_by_submat.F90	2015-08-05 14:45:21.000000000 +0900
+++ phase0_2015.01.01/src_phase/m_ES_WF_by_submat.F90	2016-07-12 12:51:19.000000000 +0900
@@ -1,6 +1,6 @@
 !=======================================================================
 !
-!  PROGRAM  PHASE/0 2015.01 ($Rev: 440 $)
+!  PROGRAM  PHASE/0 2015.01 ($Rev: 511 $)
 !
 !  MODULE:  m_ES_WF_by_submat
 !
@@ -481,6 +481,7 @@ contains
     real(kind=DP) :: sum_sq_diagonal, sum_sq_non_diagonal &
          & , sum_abs_diagonal, sum_abs_non_diagonal
     integer :: id_sname = -1, id_sname2 = -1, ipri0
+    integer :: max_block_size
 #ifdef SUBMAT_DGEMM
     real(kind=DP),allocatable,dimension(:,:) :: w1hw2r,w1hw2i
     real(kind=DP) :: alpha, beta
@@ -572,11 +573,14 @@ contains
 
 ! (make matrix elements )
 #ifdef _USE_SCALAPACK_
+    if(iprisubmat>=2) write(nfout,'(" sw_scalapack = ",i3," <<evolve_WFs_in_subspace>>")') sw_scalapack
     if(sw_scalapack == ON) then
        if(submat_uncalled) then
 ! === DEBUG by tkato 2012/01/23 ================================================
           if(block_size == 0) then
+             max_block_size = int(real(meg)/real(nrank_e))
              block_size =  nb_mgs_default
+             if(block_size.ge.max_block_size) block_size = max_block_size
           end if
 ! ==============================================================================
           call set_nprow_npcol(nprow,npcol)
diff -uprN phase0_2015.01/src_phase/m_ES_dos.F90 phase0_2015.01.01/src_phase/m_ES_dos.F90
--- phase0_2015.01/src_phase/m_ES_dos.F90	2015-08-05 14:45:21.000000000 +0900
+++ phase0_2015.01.01/src_phase/m_ES_dos.F90	2016-07-12 12:51:19.000000000 +0900
@@ -1,6 +1,6 @@
 !=======================================================================
 !
-!  PROGRAM  PHASE/0 2015.01 ($Rev: 440 $)
+!  PROGRAM  PHASE/0 2015.01 ($Rev: 492 $)
 !
 !  MODULE: m_ES_dos
 !
@@ -34,14 +34,14 @@
 !
 module m_ES_dos
 !     (m_ESdos)
-! $Id: m_ES_dos.F90 440 2015-08-03 07:18:01Z ktagami $
+! $Id: m_ES_dos.F90 492 2016-05-31 03:06:04Z jkoga $
 !
 ! This module was originally coded by T. Yamasaki (FUJITSU Laboratories) in 2001.
 ! And this is transferred as match to PHASE by T. Yamasaki, 18th May. 2003.
 !
   use m_Kpoints, only :              kv3, kv3_ek, qwgt,vkxyz_ek &
        &                           , np0,np2,ip20,iwt,ip2cub,nxyz_tetra,trmat &
-       &                           , m_Kp_sample_mesh
+       &                           , m_Kp_sample_mesh, qwgt_ek
   use m_Files, only :                nfout
 !!$  use m_Files, only :                nfdos, nfout
   use m_Timing, only :               tstatc0_begin, tstatc0_end
@@ -54,7 +54,7 @@ module m_ES_dos
        &                            ,ipriinputfile, printable
   use m_Const_Parameters, only :     DP,Hartree,BUCS,EK,SCF, ALDOS, LAYERDOS, ON, OFF, TOTAL, PAI2
   use m_Parallelization, only :      mpi_comm_group,map_ek,mype,map_e,map_k,myrank_e,myrank_k &
-       &                            ,ierr,np_e,map_z,ista_e,npes
+       &                            ,ierr,np_e,map_z,ista_e,npes, nrank_e
   use m_PseudoPotential, only :      nlmta_phi,nlmtt_phi &
        &                            ,m_PP_tell_iorb_ia_l_m_tau,qorb &
        &                            ,m_PP_tell_iorb_lmt
@@ -397,6 +397,7 @@ contains
           call mpi_allreduce(compr,compr_mpi,neg*nlmta_phi*nopr*kv,mpi_double_precision,mpi_sum,mpi_comm_group,ierr)
           call mpi_allreduce(compi,compi_mpi,neg*nlmta_phi*nopr*kv,mpi_double_precision,mpi_sum,mpi_comm_group,ierr)
           call mpi_allreduce(norm_phig_mpi,norm_phig_mpi2,nlmtt_phi*kv/nspin,mpi_double_precision,mpi_sum,mpi_comm_group,ierr)
+          norm_phig_mpi2 = norm_phig_mpi2 / dble(nrank_e)
        end if
 
        eko = eko_mpi
@@ -561,6 +562,7 @@ contains
                &              mpi_double_precision, mpi_sum, mpi_comm_group, ierr )
           call mpi_allreduce( norm_phig_mpi, norm_phig_mpi2, nlmtt_phi*kv/ndim_spinor, &
                &              mpi_double_precision, mpi_sum, mpi_comm_group, ierr )
+          norm_phig_mpi2 = norm_phig_mpi2 / dble(nrank_e)
        end if
 
        eko = eko_mpi
@@ -742,14 +744,15 @@ contains
   subroutine dealloc_dos()
     deallocate(dos)
     deallocate(sumdos)
-!!$    if(sw_pdos == ON) then
-!!$       if(allocated(pdos)) deallocate(pdos)
-!!$       if(allocated(sumpdos)) deallocate(sumpdos)
-!!$    end if
+    if(sw_pdos == ON) then
+       if(allocated(pdos)) deallocate(pdos)
+       if(allocated(sumpdos)) deallocate(sumpdos)
+    end if
   end subroutine dealloc_dos
 
-  subroutine make_dos_with_GaussianDistrib(kv,iwsc)
-    integer, intent(in) :: kv,iwsc
+  subroutine make_dos_with_GaussianDistrib( kv, iwsc, kpt_weight )
+    integer, intent(in) :: kv, iwsc
+    real(kind=DP), intent(in) :: kpt_weight( kv )
 
     integer ::             i, ik, is, ie, id, ispin, iorb, iopr, lmt, iksnl
     real(kind=DP) ::       Es, e, El, Eu, tl, tu, w, DeltaE
@@ -793,41 +796,42 @@ contains
                 tu = (Eu - eko(i,ik))*sqrtdVI
                 !  d = (derf(tl) - derf(tu))*0.5d0/(DeltaE*kv)
                 dos(id+1,ispin) = dos(id+1,ispin) &
-                     & + w * 2*(derf(tu) - derf(tl))*0.5d0/(DeltaE*kv)
+                     & + w *2 *(derf(tu) - derf(tl)) *0.5d0 /DeltaE *kpt_weight(ik)
              end do
              if(iwsc == TOTAL .and. sw_pdos == ON) then
                 do iorb = 1,nlmta_phi
-                call m_PP_tell_iorb_lmt(iorb,lmt)
-                porb = 0.d0
+                   call m_PP_tell_iorb_lmt(iorb,lmt)
+                   porb = 0.d0
 !!$ASASASASAS
 !!$                do iopr=1,nopr
 !!$                   porb = porb + (compr(i,iorb,iopr,ik)**2 &
 !!$                        &       + compi(i,iorb,iopr,ik)**2) &
 !!$                        &     *(1.d0+qorb(iorb)/norm_phig_mpi(lmt,iksnl))
 !!$                end do
-                if ( k_symmetry(ik) == GAMMA ) then
-                   do iopr=1,nopr
-                      porb = porb + compr(i,iorb,iopr,ik)**2  /2.0 &
-                   &     *(1.d0+qorb(iorb)/(norm_phig_mpi(lmt,iksnl)*2.) )
-                   end do
-                else
-                   do iopr=1,nopr
-                      porb = porb + (compr(i,iorb,iopr,ik)**2 &
-                           &       + compi(i,iorb,iopr,ik)**2) &
-                           &     *(1.d0+qorb(iorb)/norm_phig_mpi(lmt,iksnl))
-                   end do
-                endif
+                   if ( k_symmetry(ik) == GAMMA ) then
+                      do iopr=1,nopr
+                         porb = porb + compr(i,iorb,iopr,ik)**2  /2.0 &
+                              &     *(1.d0+qorb(iorb)/(norm_phig_mpi(lmt,iksnl)*2.) )
+                      end do
+                   else
+                      do iopr=1,nopr
+                         porb = porb + (compr(i,iorb,iopr,ik)**2 &
+                              &       + compi(i,iorb,iopr,ik)**2) &
+                              &     *(1.d0+qorb(iorb)/norm_phig_mpi(lmt,iksnl))
+                      end do
+                   endif
 !!$ASASASASAS
-                porb = porb/dble(nopr)
-                do id = is, ie
-                   El = Es + id*DeltaE
-                   Eu = El + DeltaE
-                   tl = (El - eko(i,ik))*sqrtdVI
-                   tu = (Eu - eko(i,ik))*sqrtdVI
-                   !  d = (derf(tl) - derf(tu))*0.5d0/(DeltaE*kv)
-                   pdos(id+1,iorb,ispin) = pdos(id+1,iorb,ispin) &
-                        & + porb * w * 2*(derf(tu) - derf(tl))*0.5d0/(DeltaE*kv)
-                end do
+                   porb = porb/dble(nopr)
+                   do id = is, ie
+                      El = Es + id*DeltaE
+                      Eu = El + DeltaE
+                      tl = (El - eko(i,ik))*sqrtdVI
+                      tu = (Eu - eko(i,ik))*sqrtdVI
+                      !  d = (derf(tl) - derf(tu))*0.5d0/(DeltaE*kv)
+                      pdos(id+1,iorb,ispin) = pdos(id+1,iorb,ispin) &
+                           & + porb *w *2 *(derf(tu) - derf(tl)) *0.5d0 /DeltaE &
+                           &        *kpt_weight(ik)
+                   end do
                 end do
              end if
           end do
@@ -848,8 +852,9 @@ contains
   end subroutine make_dos_with_GaussianDistrib
 
 ! ==================================== added by K. Tagami ============== 11.0
-  subroutine mkdos_with_GaussDistrib_noncl(kv,iwsc)
-    integer, intent(in) :: kv,iwsc
+  subroutine mkdos_with_GaussDistrib_noncl( kv, iwsc, kpt_weight )
+    integer, intent(in) :: kv, iwsc
+    real(kind=DP), intent(in) :: kpt_weight( kv )
 
     integer :: i, ik, is, ie, id, ispin, iorb, iopr, lmt, iksnl
     integer :: is1, is2, istmp, ismax
@@ -907,7 +912,7 @@ contains
                 tu = (Eu - eko(i,iksnl))*sqrtdVI
                 !  d = (derf(tl) - derf(tu))*0.5d0/(DeltaE*kv)
                 dos(id+1,istmp) = dos(id+1,istmp) &
-                     & + w * 2*(derf(tu) - derf(tl))*0.5d0/(DeltaE*kv)
+                     & + w *(derf(tu) - derf(tl)) *0.5d0 /DeltaE *kpt_weight(ik)
              end do
           End do
 
@@ -953,7 +958,8 @@ contains
                    tu = (Eu - eko(i,iksnl))*sqrtdVI
                    !  d = (derf(tl) - derf(tu))*0.5d0/(DeltaE*kv)
                    pdos(id+1,iorb,:) = pdos(id+1,iorb,:) &
-                        & + porb(iorb,:) * w * 2*(derf(tu) - derf(tl))*0.5d0/(DeltaE*kv)
+                        & + porb(iorb,:) *w *(derf(tu) - derf(tl)) &
+                        &                *0.5d0 /DeltaE *kpt_weight(ik)
                 end do
              End do
 
@@ -980,8 +986,9 @@ contains
 ! =================================================================== 11.0
 
 ! ====================== KT_add ======================= 13.0E
-  subroutine make_dos_with_FDiracDistrib(kv,iwsc)
-    integer, intent(in) :: kv,iwsc
+  subroutine make_dos_with_FDiracDistrib( kv, iwsc, kpt_weight )
+    integer, intent(in) :: kv, iwsc
+    real(kind=DP), intent(in) :: kpt_weight( kv )
 
     integer ::             i, ik, is, ie, id, ispin, iorb, iopr, lmt, iksnl
     real(kind=DP) ::       Es, e, Ene1, c1, c2, w, DeltaE
@@ -1022,7 +1029,8 @@ contains
              do id = is, ie
                 ene1 = Es +id*DeltaE
                 call width_fermi_dirac( ene1, eko(i,ik), smearing_width_fdirac, c1, c2 )
-                dos(id+1,ispin) = dos(id+1,ispin) + w *c1 *2.0d0 /dble(kv)
+!!!                dos(id+1,ispin) = dos(id+1,ispin) + w *c1 *2.0d0 /dble(kv)
+                dos(id+1,ispin) = dos(id+1,ispin) + w *c1 *2.0d0 *kpt_weight(ik)
              end do
              if(iwsc == TOTAL .and. sw_pdos == ON) then
                 do iorb = 1,nlmta_phi
@@ -1053,7 +1061,8 @@ contains
                       call width_fermi_dirac( ene1, eko(i,ik), &
                            &                  smearing_width_fdirac, c1, c2  )
                       pdos(id+1,iorb,ispin) = pdos(id+1,iorb,ispin) &
-                           & + porb *w *c1 *2.0d0 /dble(kv)
+!!!                           & + porb *w *c1 *2.0d0 /dble(kv)
+                           & + porb *w *c1 *2.0d0 *kpt_weight(ik)
                    end do
                 end do
              end if
@@ -1395,7 +1404,7 @@ contains
     if ( iproj_group(ia)== 0 ) return
 
 ! ---
-    write(nf,'("PDOS: ia=",i0," l=",i3," m=",i3," t=",i3)') ia,il-1,im,tau
+    write(nf,'("PDOS: ia= ",i0," l=",i3," m=",i3," t=",i3)') ia,il-1,im,tau
 
     write(nf,'(2x,A,5x,A,4x,A)') "No.      E(eV)",&
          &     "  dos_chg(eV)    dos_mx(eV)    dos_my(eV)    dos_mz(eV)", &
@@ -1428,7 +1437,7 @@ contains
     call alloc_eko_and_substitution(kv3) ! eko_l -> eko
     call find_Erange(eko,neg,kv3)
     call alloc_dos(1,icomponent)
-    call make_dos_with_GaussianDistrib(kv3,icomponent)
+    call make_dos_with_GaussianDistrib( kv3, icomponent, qwgt )
     if(icomponent == TOTAL) call get_VBM(totch,DeltaDVBM)
     if(mype == 0) call write_dos(nfdos)
 !!$    call write_dos(nfout)
@@ -1447,7 +1456,7 @@ contains
     call find_Erange( eko, neg, kv3/ndim_spinor )
     call alloc_dos(1,icomponent)
     
-    call mkdos_with_GaussDistrib_noncl(kv3,icomponent)
+    call mkdos_with_GaussDistrib_noncl( kv3, icomponent, qwgt )
     if(icomponent == TOTAL) call get_VBM(totch,DeltaDVBM)
 
     if(mype == 0) then
@@ -1473,7 +1482,7 @@ contains
     call alloc_eko_and_substitution_ek(kv3_ek) ! eko_ek -> eko
     call find_Erange(eko,neg,kv3_ek)
     call alloc_dos(1,icomponent)
-    call make_dos_with_GaussianDistrib(kv3_ek,icomponent)
+    call make_dos_with_GaussianDistrib( kv3_ek, icomponent, qwgt_ek )
     if(icomponent == TOTAL) call get_VBM(totch,DeltaDVBM)
     if(mype == 0) call write_dos(nfdos)
     call dealloc_eko()
@@ -1488,7 +1497,7 @@ contains
     call find_Erange(eko,neg,kv3_ek/ndim_spinor)
     call alloc_dos(1,icomponent)
 
-    call mkdos_with_GaussDistrib_noncl(kv3_ek,icomponent)
+    call mkdos_with_GaussDistrib_noncl( kv3_ek, icomponent, qwgt_ek )
     if(icomponent == TOTAL) call get_VBM(totch,DeltaDVBM)
 
     if(mype == 0) then
@@ -1512,7 +1521,7 @@ contains
     call find_Erange_fermidirac(eko,neg,kv3)
 
     call alloc_dos(1,icomponent)
-    call make_dos_with_FDiracDistrib(kv3,icomponent)
+    call make_dos_with_FDiracDistrib( kv3, icomponent, qwgt )
 
 !!!    if(icomponent == TOTAL) call get_VBM(totch,DeltaDVBM)
     if(icomponent == TOTAL) then
@@ -1535,7 +1544,7 @@ contains
     call find_Erange_fermidirac(eko,neg,kv3_ek)
 
     call alloc_dos(1,icomponent)
-    call make_dos_with_FDiracDistrib(kv3_ek,icomponent)
+    call make_dos_with_FDiracDistrib( kv3_ek, icomponent, qwgt_ek )
 
     if(icomponent == TOTAL) then
        ValenceBandMaximum = efermi
@@ -1698,6 +1707,9 @@ contains
                & , mpi_comm_group,ierr)
             call mpi_allreduce(MPI_IN_PLACE,norm_phig_mpi,nlmtt_phi*np2,mpi_double_precision,mpi_sum &
                & , mpi_comm_group,ierr)
+
+            norm_phig_mpi = norm_phig_mpi / dble(nrank_e)
+
             !compr = compr_mpi
             !compi = compi_mpi
             !norm_phig_mpi = norm_phig_mpi2
@@ -2169,7 +2181,7 @@ contains
                  &              mpi_double_precision, mpi_sum, mpi_comm_group, ierr )
 
             compr = compr_mpi;    compi = compi_mpi
-            norm_phig_mpi = norm_phig_mpi2
+            norm_phig_mpi = norm_phig_mpi2 / dble(nrank_e)
             deallocate(compr_mpi,compi_mpi,norm_phig_mpi2)
           end if
 
diff -uprN phase0_2015.01/src_phase/m_Epsilon_ek.F90 phase0_2015.01.01/src_phase/m_Epsilon_ek.F90
--- phase0_2015.01/src_phase/m_Epsilon_ek.F90	2015-08-05 14:45:21.000000000 +0900
+++ phase0_2015.01.01/src_phase/m_Epsilon_ek.F90	2016-07-12 12:51:19.000000000 +0900
@@ -1,7 +1,7 @@
 #define NEC_TUNE
 !=======================================================================
 !
-!  PROGRAM  PHASE/0 2015.01 ($Rev: 447 $)
+!  PROGRAM  PHASE/0 2015.01 ($Rev: 470 $)
 !
 !  MODULE: m_Epsilon
 !
@@ -32,7 +32,7 @@
 ! ================================================================
 
 module m_Epsilon_ek
-! $Id: m_Epsilon_ek.F90 447 2015-08-05 02:38:25Z jkoga $
+! $Id: m_Epsilon_ek.F90 470 2015-10-05 03:15:15Z ktagami $
 !
 ! Universal Virtual Spectroscope for Optoelectronics Research (UVSOR) ver 3.00
 ! UVSOR module for electronic dielectric function calculation
@@ -7060,14 +7060,23 @@ ppc_data(ntyp)
 ! set omega
              if(nbi==nbj) then
                 omega = deg_omega            ! -> intra_band case
+
+             else if(abs(omega) < 1.d-14) then ! -> degenerate case
+                if(omega >= 0.d0) then
+                   omega = 1.d-14
+                else
+                   omega = -1.d-14
+                end if
+! ====
              else
-                if(abs(omega) < 1.d-14) then ! -> degenerate case
-                   if(omega >= 0.d0) then
-                      omega = 1.d-14
+                if ( sw_scissor_renormalization == ON ) then
+                   if ( omega > 0.0 ) then
+                      omega = omega + scissor
                    else
-                      omega = -1.d-14
-                   end if
+                      omega = omega - scissor
+                   endif
                 end if
+! ====
              end if
 
 ! add correction term
@@ -13044,6 +13053,16 @@ ppc_data(ntyp)
                    else
                        e21 = -delta_omega
                    end if
+! ==
+                else
+                   if ( sw_scissor_renormalization == ON ) then
+                      if ( e21 > 0.0 ) then
+                         e21 = e21 + scissor
+                      else
+                         e21 = e21 - scissor
+                      endif
+                   endif
+! ==
                 end if
 ! ========================== 13.0R
              end if
diff -uprN phase0_2015.01/src_phase/m_Excitation.F90 phase0_2015.01.01/src_phase/m_Excitation.F90
--- phase0_2015.01/src_phase/m_Excitation.F90	2015-08-05 14:45:21.000000000 +0900
+++ phase0_2015.01.01/src_phase/m_Excitation.F90	2016-07-12 12:51:19.000000000 +0900
@@ -2439,7 +2439,7 @@ contains
     deallocate( zrho_work ); deallocate( ztrm2 )
     deallocate( eko_wk ); deallocate( occ_wk )
 
-    SpectrFn_vv = SpectrFn_vv /dble(kv3_fbz) /univol
+    SpectrFn_vv = SpectrFn_vv /dble(kv3_fbz/nspin) /univol
     if ( nspin == 1 ) SpectrFn_vv = SpectrFn_vv *2.0d0
 
 #if 0
@@ -2653,7 +2653,7 @@ contains
        endif
     End Do
 
-    SpectrTensor_vv = SpectrTensor_vv /dble(kv3_fbz) /univol
+    SpectrTensor_vv = SpectrTensor_vv /dble(kv3_fbz/nspin) /univol
     if ( nspin == 1 ) SpectrTensor_vv = SpectrTensor_vv *2.0d0
 
 #if 0
@@ -2821,7 +2821,7 @@ contains
        deallocate( work )
     endif
 
-    SpectrFn_vc = SpectrFn_vc /dble(kv3_fbz) /univol
+    SpectrFn_vc = SpectrFn_vc /dble(kv3_fbz/nspin) /univol
     if ( nspin == 1 ) SpectrFn_vc = SpectrFn_vc *2.0d0
 
 #if 0
diff -uprN phase0_2015.01/src_phase/m_FFT.F90 phase0_2015.01.01/src_phase/m_FFT.F90
--- phase0_2015.01/src_phase/m_FFT.F90	2015-09-14 15:18:23.000000000 +0900
+++ phase0_2015.01.01/src_phase/m_FFT.F90	2016-07-12 12:51:19.000000000 +0900
@@ -1,6 +1,6 @@
 !=======================================================================
 !
-!  PROGRAM  PHASE/0 2015.01 ($Rev: 449 $)
+!  PROGRAM  PHASE/0 2015.01 ($Rev: 481 $)
 !
 !  MODULE: m_FFT
 !
@@ -36,7 +36,7 @@
 !
 
 module m_FFT
-! $Id: m_FFT.F90 449 2015-08-06 04:37:59Z jkoga $
+! $Id: m_FFT.F90 481 2016-03-25 02:51:57Z jkoga $
 !
   use m_Timing,            only : tstatc0_begin, tstatc0_end
   use m_Files,             only : nfout
@@ -1085,6 +1085,226 @@ include "m_FFT_type9_ffte.F90"
     if(mod(nfft_pstrn,2) == 1) bfft(nfft_pstrn) = afft(nfft_pstrn)*bfft(nfft_pstrn)
   end subroutine m_FFT_Vlocal_pW
 
+! === EXP_CELLOPT ==== 2015/09/24
+  subroutine m_FFT_coef_CD_integration_kt(ista,iend,f2or1)
+                                            ! from m_XC_Potential.F90,  for electron ?
+    integer, intent(in) :: ista, iend
+    real(kind=DP),intent(out) :: f2or1( ista:iend )
+
+    integer                  :: idp,nlp,nmp,nnp,nd2p,nd3p,ip, idph, nlph
+
+    nlp  = fft_box_size_CD(1,1)
+    nmp  = fft_box_size_CD(2,1)
+    nnp  = fft_box_size_CD(3,1)
+#ifdef _MPIFFT_
+    idp  = fft_box_size_CD_c(1,0)
+    nd2p = fft_box_size_CD_c(2,0)
+    nd3p = fft_box_size_CD_c(3,0)
+#else
+    idp  = fft_box_size_CD(1,0)
+    nd2p = fft_box_size_CD(2,0)
+    nd3p = fft_box_size_CD(3,0)
+#endif
+
+    call set_f2or1( npes, ista, iend, f2or1 )
+
+  contains
+
+    subroutine set_f2or1(npes,ista,iend,f2or1)
+      integer, intent(in) :: npes,ista, iend
+      real(kind=DP), intent(out), dimension(ista:iend) :: f2or1
+      integer :: idph,nlph,ip,i,j,k
+
+      if(kimg == 1) then
+         idph = idp/2
+         nlph = nlp/2
+#ifdef _MPIFFT_
+
+         f2or1 = 0.d0
+!!$         do j = 1, min(nz_d,nnp-nz_d*myrank_cdfft)*nmp
+!!$         do j = 1, min(nz_d,nnp-nz_d*myrank_cdfft)*nd2p
+         do k = 1, min(nz_d, nnp-nz_d*myrank_cdfft)
+            do j = 1, nmp
+               do i = 1, nlph
+!!$                  ip = i + idph*(j-1) + idph*ly*lz_d*myrank_cdfft
+                  ip = i + idph*(j-1) + idph*ly*(k-1) + idph*ly*lz_d*myrank_cdfft
+                  f2or1(ip) = 2.d0
+               end do
+               ip = 1 + idph*(j-1) + idph*ly*(k-1) + idph*ly*lz_d*myrank_cdfft
+               f2or1(ip) = 1.d0
+               ip = nlph+1 + idph*(j-1) + idph*ly*(k-1) + idph*ly*lz_d*myrank_cdfft
+               f2or1(ip) = 1.d0
+            end do
+         end do
+!!$            ip = idph*(j-1) + 1 + idph*ly*lz_d*myrank_cdfft
+!!$            f2or1(ip) = 1.d0
+!!$            ip = idph*(j-1)+ nlph + 1 + idph*ly*lz_d*myrank_cdfft
+!!$            f2or1(ip) = 1.d0
+!!$         end do
+#else
+         f2or1 = 2.d0
+         if(npes >= 2) then
+            do i = 1, nd2p*nnp
+               ip = idph*(i-1) + 1
+               if(ip>= ista .and. ip <= iend) f2or1(ip) = 1.d0
+            end do
+            do i = 1, nd2p*nnp
+               ip = idph*(i-1) + nlph + 1
+               if(ip>= ista .and. ip <= iend) f2or1(ip) = 1.d0
+            end do
+            do j = nlph+2, idph
+               do i = 1,nd2p*nnp
+                  ip = idph*(i-1)+j
+                  if(ip>= ista .and. ip <= iend) f2or1(ip) = 0.d0
+               end do
+            end do
+            do j = nmp+1, nd2p
+               do k = 1, nnp
+                  do i = 1, nlph
+                     ip = i + idph*(j-1) + idph*nd2p*(k-1)
+                     if(ip >= ista .and. ip <= iend) f2or1(ip) = 0.d0
+                  end do
+               end do
+            end do
+! for SX6 ASL 20040817
+            do k = nnp+1, nd3p
+               do i = 1, idph*nd2p
+                  ip = i + idph*nd2p*(k-1)
+                  if(ip >= ista .and. ip <= iend) f2or1(ip) = 0.d0
+               end do
+            end do
+         else
+            do i = 1, nd2p*nnp
+               ip = idph*(i-1) + 1
+               f2or1(ip) = 1.d0
+            end do
+            do i = 1, nd2p*nnp
+               ip = idph*(i-1) + nlph + 1
+               f2or1(ip) = 1.d0
+            end do
+            do j = nlph+2, idph
+               do i = 1,nd2p*nnp
+                  ip = idph*(i-1)+j
+                  f2or1(ip) = 0.d0
+               end do
+            end do
+            do j = nmp+1, nd2p
+               do k = 1, nnp
+                  do i = 1, nlph
+                     ip = i + idph*(j-1) + idph*nd2p*(k-1)
+                     f2or1(ip) = 0.d0
+                  end do
+               end do
+            end do
+! for SX6 ASL 20040817
+            do k = nnp+1, nd3p
+               do i = 1, idph*nd2p
+                  ip = i + idph*nd2p*(k-1)
+                  f2or1(ip) = 0.d0
+               end do
+            end do
+         end if
+!!$       do i = ista, iend
+!!$          if(mod(i*2,idp) == 2 .or. mod(i*2,idp) == 0) f2or1(i) = 1.d0
+!!$       end do
+#endif
+      else
+#ifdef _MPIFFT_
+!!$         f2or1 = 0.d0                 ! f2or1 works to the fft data in Rspace.
+!!$         do k = 1, min(nz_d,nnp-nz_d*myrank_cdfft)
+!!$            do j = 1, nmp     ! nmp = fft_box_size_CD(2,1)
+!!$               do i = 1, nlp  ! nlp = fft_box_size_CD(1,1)
+!!$                  ip = i+(j-1)*idp+(k-1)*idp*nd2p+idp*nd2p*lz_d*myrank_cdfft
+!!$                  f2or1(ip) = 1.d0
+!!$               end do
+!!$            end do
+!!$         end do
+!         if(iprixc >= 2 ) write(nfout,'(" ix kimg = 2 <<set_f2or1>>")')
+         f2or1 = 1.d0
+         do j = nlp+1, idp      ! x
+            do i = 1, ly*nz_d
+               ip = idp*(i-1)+j+ista-1
+               f2or1(ip) = 0.d0
+            end do
+         end do
+!         if(iprixc >= 2 ) write(nfout,'(" iy kimg = 2 <<set_f2or1>>")')
+         do j = nmp+1,ly         ! y
+            do k = 1, nz_d
+               do i = 1, nlp
+                  ip = i + idp*(j-1) + idp*ly*(k-1) + ista-1
+                  f2or1(ip) = 0.d0
+               end do
+            end do
+         end do
+!         if(iprixc >= 2 ) write(nfout,'(" iz kimg = 2 <<set_f2or1>>")')
+         do  k = nz_d+1, lz_d   ! z
+            do i = 1, idp*ly
+               ip = i + idp*ly*(k-1) + ista-1
+               f2or1(ip) = 0.d0
+            end do
+         end do
+#else
+         f2or1 = 1.d0
+         if(npes >= 2) then
+            do j = nlp+1, idp    ! x
+               do i = 1, nd2p*nnp
+                  ip = idp*(i-1)+j
+                  if(ip>= ista .and. ip <= iend) f2or1(ip) = 0.d0
+               end do
+            end do
+            do j = nmp+1, nd2p   ! y
+               do k = 1, nnp
+                  do i = 1, nlp
+                     ip = i + idp*(j-1) + idp*nd2p*(k-1)
+                     if(ip >= ista .and. ip <= iend) f2or1(ip) = 0.d0
+                  end do
+               end do
+            end do
+! for SX6 ASL 20040817
+            do k = nnp+1, nd3p   ! z
+               do i = 1, idp*nd2p
+                  ip = i + idp*nd2p*(k-1)
+                  if(ip >= ista .and. ip <= iend) f2or1(ip) = 0.d0
+               end do
+            end do
+         else
+            do j = nlp+1, idp    ! x
+               do i = 1, nd2p*nnp
+                  ip = idp*(i-1)+j
+! ================================ modifed by K. Tagami ====( uncertain )== 11.0
+!                  f2or1(ip) = 0.d0
+                  if(ip >= ista .and. ip <= iend) f2or1(ip) = 0.d0
+! ===================================================================== 11.0
+               end do
+            end do
+            do j = nmp+1, nd2p   ! y
+               do k = 1, nnp
+                  do i = 1, nlp
+                     ip = i + idp*(j-1) + idp*nd2p*(k-1)
+! ================================ modifed by K. Tagami ====( uncertain )== 11.0
+!                     f2or1(ip) = 0.d0
+                     if(ip >= ista .and. ip <= iend) f2or1(ip) = 0.d0
+! ===================================================================== 11.0
+                  end do
+               end do
+            end do
+! for SX6 ASL 20040817
+            do k = nnp+1, nd3p   ! z
+               do i = 1, idp*nd2p
+                  ip = i + idp*nd2p*(k-1)
+! ================================ modifed by K. Tagami ====( uncertain )== 11.0
+!                  f2or1(ip) = 0.d0
+                  if(ip >= ista .and. ip <= iend) f2or1(ip) = 0.d0
+! ===================================================================== 11.0
+               end do
+            end do
+         end if
+#endif
+      end if
+    end subroutine set_f2or1
+  end subroutine m_FFT_coef_CD_integration_kt
+! ==================== 2015/09/24
+
   subroutine m_FFT_coef_CD_integration(f2or1)
     real(kind=DP),intent(out), dimension(ista_sfftph:iend_sfftph) :: f2or1
     integer :: idp, nlp, nmp, nnp, nd2p, idph, nlph, ip, i, j, k
diff -uprN phase0_2015.01/src_phase/m_Files.F90 phase0_2015.01.01/src_phase/m_Files.F90
--- phase0_2015.01/src_phase/m_Files.F90	2015-09-14 15:18:31.000000000 +0900
+++ phase0_2015.01.01/src_phase/m_Files.F90	2016-07-12 12:51:19.000000000 +0900
@@ -1,6 +1,6 @@
 !=======================================================================
 !
-!  SOFTWARE NAME : PHASE ($Revision: 454 $)
+!  SOFTWARE NAME : PHASE ($Revision: 483 $)
 !
 !  MODULE: m_Files
 !
@@ -36,7 +36,7 @@
 !
 !
 module m_Files
-!  $Id: m_Files.F90 454 2015-09-07 07:58:39Z yamasaki $
+!  $Id: m_Files.F90 483 2016-04-27 04:02:48Z ktagami $
 !
 !  Operations concerning to files as "open", and "close",
 !  should be done in this module.
@@ -103,7 +103,9 @@ module m_Files
 ! ===================================== 13.0S
        &, nfdynm_cif & ! CIF output
        &, nfwfk_sq   &   ! squared wf
-       &, nfwfk_integ_mom    ! moment of wf ( integerated over space )
+       &, nfwfk_integ_mom  &  ! moment of wf ( integerated over space )
+       &, nfwfk_orb_proj      &      ! orbital-projection of wf
+       &, nfband_spectr_wght         ! spectral weight for band unfolding 
 
   data  &
        &   nfinp,nfpot &                                               ! 31,(37,38,39,40,45,46,11-19,36)
@@ -140,6 +142,8 @@ module m_Files
        &  ,nfdynm_cif &
        &  ,nfwfk_sq &
        &  ,nfwfk_integ_mom &
+       &  ,nfwfk_orb_proj &
+       &  ,nfband_spectr_wght &
 
        &    /31,37,38,39,40,45,46 &
        & ,11,12,13,14,15,16,17,18,19,36 &
@@ -169,9 +173,11 @@ module m_Files
 ! ================================== 13.0S
        &, 500 &
        &, 350 & 
-       &, 351 /
+       &, 351 &
+       &, 352 &
+       &, 345 /
 
-  integer,private,parameter :: number_of_all_files = 76 + MAXNSP
+  integer,private,parameter :: number_of_all_files = 78 + MAXNSP
 
   integer,private, dimension(number_of_all_files) :: n_file
   data n_file &
@@ -202,7 +208,7 @@ module m_Files
 ! ================= KT_add ======== 13.0S
        & ,370, 371, 372 &
 ! ================================= 13.0S
-       & ,500, 350, 351 /
+       & ,500, 350, 351, 352, 345 /
 
   integer,private,parameter :: stringlength_of_filenames = 260
   character(len=stringlength_of_filenames) ::  &
@@ -238,7 +244,8 @@ module m_Files
 ! ================= KT_add ====== 13.0S
        &, F_CORE_ENERGY_OUT, F_CORE_ENERGY_INITIAL,  F_CORE_ENERGY_FINAL &
 ! =============================== 13.0S
-       &, F_DYNM_CIF, F_WFk_Squared, F_WFk_IntegMoment
+       &, F_DYNM_CIF, F_WFk_Squared, F_WFk_IntegMoment &
+       &, F_WFK_ORB_PROJ, F_BAND_SPECTR_WGHT
 
   namelist/fnames/ &
        &  F_INP, F_POT, F_PKB, F_PD, F_PPC, F_STOP, F_OPGR &
@@ -272,7 +279,8 @@ module m_Files
 ! ================= KT_add ====== 13.0S
        &, F_CORE_ENERGY_OUT, F_CORE_ENERGY_INITIAL,  F_CORE_ENERGY_FINAL &
 ! =============================== 13.0S
-       &, F_DYNM_CIF, F_WFk_Squared, F_Wfk_IntegMoment
+       &, F_DYNM_CIF, F_WFk_Squared, F_Wfk_IntegMoment &
+       &, F_WFK_ORB_PROJ, F_BAND_SPECTR_WGHT
 
 
   logical ::             F_ZAJ_partitioned      = .false.
@@ -594,6 +602,12 @@ contains
     if(mype==0) close(nfcntn_berry)
   end subroutine m_Files_close_nfcntn_berry
 
+  logical function m_Files_nfcntn_bin_paw_exists()
+    logical :: ex
+    inquire(file=F_CNTN_BIN_PAW,exist=ex) 
+    m_Files_nfcntn_bin_paw_exists = ex
+  end function m_Files_nfcntn_bin_paw_exists
+
   subroutine m_Files_open_nfcntn_bin_paw
     logical open
     inquire(unit = nfcntn_bin_paw, opened = open)
@@ -890,6 +904,9 @@ contains
 
     F_WFk_Squared  = "./wfnsq.cube"
     F_WFk_IntegMoment   = "./wfn_integ_moment.data"
+    F_WFK_ORB_PROJ    = "./wfn_orb_proj.data"
+
+    F_BAND_SPECTR_WGHT = "./nfband_spectr_wght.data"
 
   end subroutine m_Files_set_default_filenames
       
@@ -1582,6 +1599,76 @@ contains
     end if
   end subroutine m_Files_close_nfeng
 
+! ===
+  subroutine m_Files_open_nfwfk_orb_proj(icond)
+    integer, intent(in) :: icond
+    logical :: open
+
+    if (mype==0) then
+       inquire(unit = nfwfk_orb_proj, opened = open)
+
+       if (open) close(nfwfk_orb_proj)
+
+       if (icond == FIXED_CHARGE .or. &
+            & (icond==FIXED_CHARGE_CONTINUATION &
+            &   .and. fixed_charge_k_parallel==ALL_AT_ONCE)) then
+          call open0( nfwfk_orb_proj, F_WFK_ORB_PROJ, 'F_WFK_ORB_PROJ',&
+               &      unknown, formatted, check_file_name_on )
+       else if (icond==FIXED_CHARGE_CONTINUATION &
+            &        .and. fixed_charge_k_parallel==ONE_BY_ONE) then
+          call open1( nfwfk_orb_proj, F_WFK_ORB_PROJ, 'F_WFK_ORB_PROJ', &
+               &      unknown,formatted, check_file_name_on )
+       else
+          call open1( nfwfk_orb_proj, F_WFK_ORB_PROJ, 'F_WFK_ORB_PROJ', &
+               &      unknown,formatted, check_file_name_on )
+       end if
+    end if
+  end subroutine m_Files_open_nfwfk_orb_proj
+
+  subroutine m_Files_close_nfwfk_orb_proj()
+    logical :: open
+
+    if (mype==0) then
+       inquire( unit=nfwfk_orb_proj, opened = open )
+       if(open) close( nfwfk_orb_proj )
+    end if
+  end subroutine m_Files_close_nfwfk_orb_proj
+
+  subroutine m_Files_open_nfband_spwt(icond)
+    integer, intent(in) :: icond
+    logical :: open
+
+    if (mype==0) then
+       inquire(unit = nfband_spectr_wght, opened = open)
+
+       if (open) close(nfband_spectr_wght)
+
+       if (icond == FIXED_CHARGE .or. &
+            & (icond==FIXED_CHARGE_CONTINUATION &
+            &   .and. fixed_charge_k_parallel==ALL_AT_ONCE)) then
+          call open0( nfband_spectr_wght, F_BAND_SPECTR_WGHT, 'F_BAND_SPECTR_WGHT',&
+               &      unknown, formatted, check_file_name_on )
+       else if (icond==FIXED_CHARGE_CONTINUATION &
+            &        .and. fixed_charge_k_parallel==ONE_BY_ONE) then
+          call open1( nfband_spectr_wght, F_BAND_SPECTR_WGHT, 'F_BAND_SPECTR_WGHT ', &
+               &      unknown,formatted, check_file_name_on )
+       else
+          call open1( nfband_spectr_wght, F_BAND_SPECTR_WGHT, 'F_BAND_SPECTR_WGHT', &
+               &      unknown,formatted, check_file_name_on )
+       end if
+    end if
+  end subroutine m_Files_open_nfband_spwt
+
+  subroutine m_Files_close_nfband_spwt()
+    logical :: open
+
+    if (mype==0) then
+       inquire( unit=nfband_spectr_wght, opened = open )
+       if(open) close( nfband_spectr_wght )
+    end if
+  end subroutine m_Files_close_nfband_spwt
+! ====
+
   subroutine m_Files_close_nfinp()
     logical :: op
     if(mype == 0) then
diff -uprN phase0_2015.01/src_phase/m_Force.F90 phase0_2015.01.01/src_phase/m_Force.F90
--- phase0_2015.01/src_phase/m_Force.F90	2015-08-05 14:45:21.000000000 +0900
+++ phase0_2015.01.01/src_phase/m_Force.F90	2016-07-12 12:51:19.000000000 +0900
@@ -1,6 +1,6 @@
 !=======================================================================
 !
-!  PROGRAM  PHASE/0 2015.01 ($Rev: 440 $)
+!  PROGRAM  PHASE/0 2015.01 ($Rev: 472 $)
 !
 !  MODULE: m_Force
 !
@@ -49,7 +49,7 @@
 #endif
 
 module m_Force
-! $Id: m_Force.F90 440 2015-08-03 07:18:01Z ktagami $
+! $Id: m_Force.F90 472 2015-11-28 09:01:17Z ktagami $
   use m_IterationNumbers,     only : iteration_ionic
   use m_Charge_Density,       only : chgq_l, hsr
   use m_Electronic_Structure, only : zaj_l,vlhxcQ,occup_l,eko_l &
@@ -104,6 +104,12 @@ module m_Force
   use m_FFT,                  only : fft_box_size_CD,nfftps
   use m_ES_nonlocal,          only : betar_dot_Psi_4_each_k_in_rs0
 
+! === Positron ==== 2015/11/28
+  use m_Control_Parameters,  only : sw_positron, positron_method
+  use m_Const_Parameters,    only : Positron_GGGC, OFF
+  use  m_Positron_Wave_Functions,  only : pchg_l
+! ================= 2015/11/28
+
   implicit none
   include 'mpif.h'
 
@@ -492,6 +498,29 @@ contains
          end if
       end do
 
+! == POSITRON SCF === 2015/11/28
+      if ( sw_positron /= OFF ) then
+         if ( positron_method == Positron_GGGC ) then
+            if(kimg == 1) then
+               do i = ista_kngp, iend_kngp  !for mpi
+                  tmp = -zfsin(i) *pchg_l(i,1) *psc_l(i,it)
+                  fx = fx + tmp*ngabc(i,1)
+                  fy = fy + tmp*ngabc(i,2)
+                  fz = fz + tmp*ngabc(i,3)
+               end do
+            else
+               do i = ista_kngp, iend_kngp  !for mpi
+                  tmp = -( zfsin(i) *pchg_l(i,1) +zfcos(i)*pchg_l(i,2) )&
+                       &  *psc_l(i,it)
+                  fx = fx + tmp*ngabc(i,1)
+                  fy = fy + tmp*ngabc(i,2)
+                  fz = fz + tmp*ngabc(i,3)
+               end do
+            endif
+         endif
+      endif
+! ============= 2015/11/28
+
       f1_mpi(1) = fx; f1_mpi(2) = fy; f1_mpi(3) = fz
       call mpi_allreduce(f1_mpi,f2_mpi,3 &
            &  ,mpi_double_precision,mpi_sum,mpi_comm_group,ierr)
diff -uprN phase0_2015.01/src_phase/m_Ionic_System.F90 phase0_2015.01.01/src_phase/m_Ionic_System.F90
--- phase0_2015.01/src_phase/m_Ionic_System.F90	2015-08-05 14:45:21.000000000 +0900
+++ phase0_2015.01.01/src_phase/m_Ionic_System.F90	2016-07-12 12:51:19.000000000 +0900
@@ -1,6 +1,6 @@
 !=======================================================================
 !
-!  PROGRAM  PHASE/0 2015.01 ($Rev: 442 $)
+!  PROGRAM  PHASE/0 2015.01 ($Rev: 490 $)
 !
 !  MODULE: m_Ionic_System
 !
@@ -53,7 +53,7 @@
 
 module m_Ionic_System
 !     (m_IS)
-!  $Id: m_Ionic_System.F90 442 2015-08-03 08:52:00Z jkoga $
+!  $Id: m_Ionic_System.F90 490 2016-05-27 04:49:30Z jkoga $
 !
 !  This module is for structure factor, ewald energy,
 !  and motions of atoms.
@@ -295,6 +295,7 @@ module m_Ionic_System
   integer         ::                                  nrsv = 1  ! number of heat bath
   integer         ::                                  set_initial_velocity = ON  ! (by J. Koga)
   integer         ::                                  sw_read_velocities = OFF  ! (by T. Yamamoto)
+  integer         ::                                  sw_shift_velocities = OFF
   real(kind=DP),private ::                            tk_initial = 0.d0
 !!$  real(kind=DP),private,allocatable,dimension(:)   :: qmass,tkb,cprv,frsv ! d(nrsv)
   real(kind=DP),allocatable,dimension(:)   :: qmass,tkb,cprv,frsv ! d(nrsv)
@@ -358,6 +359,7 @@ module m_Ionic_System
   character(len("set_initial_velocity")),private,parameter :: tag_set_initial_velocity = "set_initial_velocity" ! (by J. Koga)
   character(len("sw_read_velocities")),private,parameter :: tag_sw_read_velocities = "sw_read_velocities"
   character(len("initial_temperature")),private,parameter :: tag_initial_temperature = "initial_temperature"
+  character(len("sw_shift_velocities")),private,parameter :: tag_sw_shift_velocities = "sw_shift_velocities"
 
   !     constraint             sigma                       sgmc
   ! 1. BONDLENGTH_FIX_1 
@@ -836,7 +838,10 @@ contains
   end subroutine m_IS_put_lattice_system
 
   subroutine alloc_normal_hypervector()
-    allocate(normal_hypervector(natm,3,PUCV:CARTS))
+    if ( .not. allocated(normal_hypervector) ) then
+       allocate(normal_hypervector(natm,3,PUCV:CARTS))
+    endif
+    normal_hypervector = 0.0d0
   end subroutine alloc_normal_hypervector
 
   subroutine set_normal_hypervector()
@@ -1073,7 +1078,11 @@ contains
 
        deallocate(work)
 
+       constraints_exist = .false.
+
        ! --- constraint ---
+       call initialize_constraint_param     ! use this for cell optimization
+
        if( f_selectBlock( tag_constraint) == 0) then
           if(ipriinputfile >= 1) write(nfout,'(" !** -- tag_constraint is found --")')
           if(imdalg == CG_STROPT) then
@@ -1114,7 +1123,7 @@ contains
              iret = f_selectParentBlock()
           end if
           ! --- setting bondlength fix sets ---
-          call alloc_bondlength_fix_set()  ! allocate(bondlength_fix_set(2,num_fixed_bonds))
+          if (tf) call alloc_bondlength_fix_set()  ! allocate(bondlength_fix_set(2,num_fixed_bonds))
           tf = f_selectBlock( tag_fixed_bond) == 0
           if(.not.tf) tf = f_selectBlock( tag_fix_bondlength) == 0
           if(.not.tf) tf = f_selectBlock( tag_bondlength_fix) == 0
@@ -1757,7 +1766,9 @@ contains
              if ( f_getIntValue(tag_sw_read_velocities,iret) == 0 ) then
                  sw_read_velocities = iret
              endif
-             
+             if ( f_getIntValue(tag_sw_shift_velocities,iret) == 0 ) then
+                 sw_shift_velocities = iret
+             endif 
              if( f_getRealValue(tag_initial_temperature,dret,"") == 0 ) then
                 tk_initial = dret * CONST_kB
                 if(ipriinputfile >= 1 .and. printable) then
@@ -4377,7 +4388,8 @@ contains
             pcom(i) = 0.d0
             do ia=1,natm
                ir = icnstrnt_typ(imdt(ia),imdalg_t)
-               if ((imode == 1 .and. ir >= 1 .and. imdtyp(ia).ne.0).or.(imode==2 .and. ir == 1.and.imdtyp(ia).ne.0)) then
+               if ((imode == 1 .and. ir >= 1 .and. imdtyp(ia).ne.0).or. &
+             &     (imode == 2 .and. ir == 1 .and. imdtyp(ia).ne.0)) then
                   cpd_l(ia,i) = random(ir,ia,i)
                   if(iprimd >= 2) write(nfout,'(" !!! ia, ir = ",2i8," cpd_l(ia,",i3,") = " &
                        & ,f12.6, " <<set_initial_velocities>>")') ia, ir, i, cpd_l(ia,i)
@@ -4507,6 +4519,79 @@ contains
 
   end subroutine m_IS_rd_n
 
+  subroutine shift_velocities(imode)
+      integer, intent(in) :: imode
+      integer :: i,j,ia,ir
+      real(kind=DP)   :: mcom
+      integer,dimension(natm) :: imdt
+      integer :: icnstrnt_typ
+      integer :: imdalg_t
+      real(kind=DP),dimension(3)   :: pcom
+      real(kind=DP), parameter :: eps = 1.d-12
+      if(imode == 1) then
+         do i=1,natm
+            if ( imdtyp(i) .le. NOSE_HOOVER ) then
+               imdt(i) = NOSE_HOOVER + 1
+            else 
+               imdt(i) = imdtyp(i)
+            endif
+         enddo
+         imdalg_t = T_CONTROL
+      else
+         do i=1, natm
+            imdt(i) = imdtyp(i)
+         end do
+         imdalg_t = VERLET
+      end if
+
+      do i=1,3
+         pcom(i) = 0.d0
+         do ia=1,natm
+            ir = icnstrnt_typ(imdt(ia),imdalg_t)
+            if ((imode == 1 .and. ir >= 1 .and. imdtyp(ia).ne.0).or. &
+          &     (imode == 2 .and. ir == 1 .and. imdtyp(ia).ne.0)) then
+              pcom(i) = pcom(i) + amion(ityp(ia))*cpd_l(ia,i)
+            endif
+         enddo
+      enddo
+
+      mcom = 0.d0
+      if(imode == 1) then
+         do ia=1,natm
+            ir = icnstrnt_typ(imdt(ia),imdalg_t)
+            if (  ir >= 1 .and. imdtyp(ia).ne.0 ) then
+               mcom = mcom + amion(ityp(ia))
+            end if
+         end do
+      else
+         do ia=1,natm
+            ir = icnstrnt_typ(imdt(ia),imdalg_t)
+            if (  ir == 1 .and. imdtyp(ia).ne.0 ) then
+               mcom = mcom + amion(ityp(ia))
+            end if
+         end do
+      end if
+
+      ! shift velocity
+      if(mcom.gt.eps) pcom(:) = pcom(:)/mcom
+
+      if(imode == 1) then
+         do ia=1,natm
+            ir = icnstrnt_typ(imdt(ia),imdalg_t)
+            if (   ir >= 1 .and. imdtyp(ia).ne.0 ) then
+               cpd_l(ia,:) = cpd_l(ia,:) - pcom(:)
+            endif
+         enddo
+      else
+         do ia=1,natm
+            ir = icnstrnt_typ(imdt(ia),imdalg_t)
+            if (   ir == 1 .and. imdtyp(ia).ne.0 ) then
+               cpd_l(ia,:) = cpd_l(ia,:) - pcom(:)
+            endif
+         enddo
+      end if
+  end subroutine shift_velocities
+
 !!$ 2011.06.06
   subroutine scale_velocity()
      integer :: ia,ir,irp
@@ -5647,6 +5732,7 @@ contains
        stop ' Invalid value of mdmode <<m_IS_md>>'
     end if
 
+    if (sw_shift_velocities==ON.and.mdalg==VERLET) call shift_velocities(1)
     deallocate(forc_l)
     call md1_dealloc()
     call tstatc0_end(id_sname)
@@ -6353,6 +6439,12 @@ contains
   end function m_IS_force_check_md_nhp
 ! <--
 
+  subroutine initialize_constraint_param
+    forcmx_constraint_quench = 1.d+2
+    forc_norm_hyperplane_vert = 1.d+2
+    forcmx_hyperplane_vert = 1.d+2
+  end subroutine initialize_constraint_param
+
   subroutine evolve_velocities(mdalg,forc_l)
     integer, intent(in) ::                          mdalg
     real(kind=DP), intent(inout), dimension(natm,3) :: forc_l
@@ -8005,7 +8097,7 @@ end if
     if(t_ctrl_method == VELOCITY_SCALING)then
        call scale_velocity()
     endif
-
+    if (sw_shift_velocities==ON) call shift_velocities(1)
     call md2_dealloc                                    !-(m_Ionic_System)
     call tstatc0_end(id_sname)
   contains
@@ -8593,6 +8685,10 @@ end if
     integer ::              id_sname = -1
     call tstatc0_begin('m_IS_gdiis ',id_sname)
 
+    if(absolute_convergence_of_forc(forc_l_in))then
+       write(nfout,'(a)') ' m_IS_gdiis: forces are absolutely converged!! nothing to do...'
+       return
+    endif
 
     if(mdmode == ORDINA) then
        allocate(forc_l(natm,3))
@@ -9783,6 +9879,10 @@ end if
     integer ::              id_sname = -1
     call tstatc0_begin('m_IS_cg2',id_sname)
 
+     if(absolute_convergence_of_forc(forc_l_in))then
+       write(nfout,'(a)') ' CG2: forces are absolutely converged!! nothing to do...'
+       return
+     endif
 
     if(.not.allocated(vec_g)) allocate(vec_g(natm,3))
     if(.not.allocated(vec_h)) allocate(vec_h(natm,3))
@@ -9792,6 +9892,7 @@ end if
     if(.not.allocated(cps2)) allocate(cps2(natm,3))
     if(.not.allocated(f_total2)) allocate(f_total2(natm,3))
 
+
     f_total0(:,:) = forc_l_in(:,:)
 
 
@@ -10643,6 +10744,80 @@ end if
 #endif
   end subroutine m_IS_symmetrize_atom_pos
 
+  subroutine m_IS_force_af_symmetry(nfout)
+#ifdef SX
+!CDIR BEGIN NOVECTOR
+#endif
+    integer, intent(in) :: nfout
+
+    real(kind=DP), dimension(natm2,3) :: cps_wk,cps_wk2
+    real(kind=DP), dimension(natm,3) :: cpso,poso
+    real(kind=DP), dimension(3,3) :: rltv_t
+    real(kind=DP), dimension(3) :: p,di,dimin
+    real(kind=DP) :: df,dfmin
+    integer, dimension(natm2) :: ityp_wk,ityp_af
+    integer :: i,n,ia,ja,iia
+    cps_wk(1:natm,1:3)  = cps(1:natm,1:3)
+    do i=1,natm
+       ityp_af(i) = nint(iatomn(ityp(i)))
+    end do
+    n = natm
+    do i=1,natm
+       if(iwei(i)==1) cycle
+       n = n + 1
+       cps_wk(n,1:3) = -cps(i,1:3)
+       ityp_af(n) = nint(iatomn(ityp(i)))
+    end do
+    cps_wk2 = 0.d0
+    do ia=1,natm2
+       p = matmul(op(1:3,1:3,nopr+af),cps_wk(ia,1:3)) + tau(1:3,nopr+af,CARTS)
+       iia = 0
+       dfmin = 1.d10
+       do ja=1,natm2
+          if(ityp_af(ia) /= ityp_af(ja)) cycle
+          di = matmul(transpose(rltv),(p - cps_wk(ja,1:3)))
+          df = sum(abs(cos(di(1:3))-1.d0))
+          if(df < dfmin) then
+             iia = ja
+             dfmin = df
+             dimin = di/PAI2
+          end if
+       end do
+       if(iia == 0) stop 'm_IS_symmetrize_atom_pos: error iia=0'
+       p = p - matmul(altv,nint(dimin))
+       cps_wk2(iia,1:3) = cps_wk2(iia,1:3) + p(1:3)
+       cps_wk(iia,1:3) = cps_wk2(iia,1:3)
+    end do
+
+    cpso = cps
+    poso = pos
+    cps = cps_wk2(1:natm,1:3)
+    rltv_t = transpose(rltv)/PAI2
+    call change_of_coordinate_system(rltv_t,cps,natm,natm,pos) !-(b_I.S.) cps -> pos
+    if(printable.and.ipri>1) then
+       write(nfout,*) 'Atomic coordinates were symmetrized.'
+       !!$write(nfout,'(20x,"Inputted Cartesian coordinate -> symmetrized Cartesian coordinate")')
+       !!$do ia=1,natm
+       !!$   write(nfout,'(i4,3f15.8," -> ",3f15.8)') ia,cpso(ia,1:3),cps(ia,1:3)
+       !!$end do
+       !!$write(nfout,'(20x,"Inputted internal coordinate  -> symmetrized internal coordinate")')
+       !!$do ia=1,natm
+       !!$   write(nfout,'(i4,3f15.8," -> ",3f15.8)') ia,poso(ia,1:3),pos(ia,1:3)
+       !!$end do
+       write(nfout,'(" === Symmetrized Cartesian coordinates and errors===")')
+       do ia=1,natm
+          write(nfout,'(i4,7f18.9)') ia,cpso(ia,1:3),cps(ia,1:3),sqrt(sum((cps(ia,1:3)-cpso(ia,1:3))**2))
+       end do
+       write(nfout,'(" === Symmetrized internal coordinates ===")')
+       do ia=1,natm
+          write(nfout,'(i4,6f18.9)') ia,poso(ia,1:3),pos(ia,1:3)
+       end do
+    end if
+#ifdef SX
+!CDIR END
+#endif
+  end subroutine m_IS_force_af_symmetry
+
   subroutine m_IS_dealloc(neb_mode)
     logical, intent(in), optional :: neb_mode
     logical :: neb
@@ -11478,4 +11653,19 @@ end if
      deallocate(pos_full)
   end subroutine m_IS_dump_cif
 
+  logical function absolute_convergence_of_forc(forc_l_in)
+    real(kind=DP), intent(in), dimension(natm,3) :: forc_l_in
+    real(kind=DP),allocatable,dimension(:,:) :: forct
+    integer :: ia
+    allocate(forct(natm,3));forct=forc_l_in
+    absolute_convergence_of_forc = .false.
+    do ia=1,natm
+       if(imdtyp(ia)==0) forct(ia,:)=0.d0
+    enddo
+    if(sum(forct(1:natm,1:3)**2).lt.1e-15)then
+        absolute_convergence_of_forc = .true.
+    endif
+    deallocate(forct)
+  end function absolute_convergence_of_forc
+
 end module m_Ionic_System
diff -uprN phase0_2015.01/src_phase/m_Kpoints.F90 phase0_2015.01.01/src_phase/m_Kpoints.F90
--- phase0_2015.01/src_phase/m_Kpoints.F90	2015-08-05 14:45:21.000000000 +0900
+++ phase0_2015.01.01/src_phase/m_Kpoints.F90	2016-07-12 12:51:19.000000000 +0900
@@ -1,6 +1,6 @@
 !=======================================================================
 !
-!  PROGRAM  PHASE/0 2015.01 ($Rev: 440 $)
+!  PROGRAM  PHASE/0 2015.01 ($Rev: 486 $)
 !
 !  MODULE: m_Kpoint
 !
@@ -37,7 +37,7 @@
 !
 module m_Kpoints
 !     (m_Kp)
-!  $Id: m_Kpoints.F90 440 2015-08-03 07:18:01Z ktagami $
+!  $Id: m_Kpoints.F90 486 2016-05-15 13:59:14Z ktagami $
 ! 
   use m_Crystal_Structure,   only : il,imag,inv,ngen,igen,jgen, a,b,c,ca,cb,cc &
        &                          , altv, rltv, nbztyp , nbztyp_spg, n1_sc, n2_sc, n3_sc
@@ -237,7 +237,7 @@ contains
        end if
     end if
     if(npes > 1) call mpi_bcast(kv3_previous,1,mpi_integer,0,mpi_comm_group,ierr)
-    if(printable) write(nfout,'(i5, " : kv3_previous_job")') kv3_previous
+    if(printable) write(nfout,'(i8, " : kv3_previous_job")') kv3_previous
   end subroutine m_Kp_rd_kv3
 
   subroutine m_Kp_rd_n(nfout)
@@ -560,7 +560,7 @@ contains
        allocate(vkxyz(kv3,3,CRDTYP)); vkxyz = 0.d0
        if(ipri_kp >= 2) then
           write(nfout,'(" !kp vkxyz is allocated now <<m_Kp_alloc_kpoints>>")')
-          write(nfout,'(" !kp kv3 = ", i5," CRDTYP = ",i3)') kv3,CRDTYP
+          write(nfout,'(" !kp kv3 = ", i8," CRDTYP = ",i3)') kv3,CRDTYP
        end if
     else
        if(ipri_kp >= 2) write(nfout,'(" !kp vkxyz is already allocated <<m_Kp_alloc_kpoints>>")')
@@ -692,9 +692,9 @@ contains
 ! ================================================================ 11.0
 
     if(ipri_kp >=1 ) then
-       write(nfout,'(" !kp ek_group = ",i5)') ek_group
-       write(nfout,'(" !kp kv3_ek   = ",i5)') kv3_ek
-       write(nfout,'(" !kp kv3      = ",i5)') kv3
+       write(nfout,'(" !kp ek_group = ",i8)') ek_group
+       write(nfout,'(" !kp kv3_ek   = ",i8)') kv3_ek
+       write(nfout,'(" !kp kv3      = ",i8)') kv3
     end if
   end subroutine m_Kp_set_ek_group
 
@@ -783,7 +783,7 @@ contains
          read(nfinp,'(a132)') str
          if(ipri_kp>=2) write(nfout,'(" ! str : ",a50)') str(1:50)
          call read_nkpnt(str,len_str,ikpnt) !-(b_Kpoints)
-         if(ipri_kp>=2) write(nfout,'(" ! ikpnt(#skipped lines) = ",i5)') ikpnt
+         if(ipri_kp>=2) write(nfout,'(" ! ikpnt(#skipped lines) = ",i8)') ikpnt
          read(nfinp,'(a132)') str
          call read_coordsystem(str,len_str,sw_k_coord_system) !-(b_Kpoints)
          if(sw_k_coord_system == NODATA) ikpnt = ikpnt - 1
@@ -976,7 +976,7 @@ contains
 ! =============== 2014/09/30
 
     else
-       if(ipri_kp>=1) write(nfout,'(" !kp kv3 = ",i5, " nspin = ", i5)') kv3,nspin
+       if(ipri_kp>=1) write(nfout,'(" !kp kv3 = ",i8, " nspin = ", i5)') kv3,nspin
 
 ! ======================= added by K. Tagami ================== 11.0
        if(ipri_kp>=1) write(nfout,*) 'ndim_spinor = ', ndim_spinor
@@ -1794,7 +1794,7 @@ contains
       call first_bz(b1,b2,b3,nkmesh,kmesh,nmp_kmesh,mp_kmesh,nface,face)
 
       if(printable .and. printlevel .ge. 0) &
-      &  write(output,'(1x,"number of k-points in MP mesh        = ",i5)') nmp_kmesh
+      &  write(output,'(1x,"number of k-points in MP mesh        = ",i8)') nmp_kmesh
 
       ! debug
       !do i=1,nmp_kmesh
@@ -1826,7 +1826,7 @@ contains
       call first_bz(b1,b2,b3,nkmesh,kmesh,nfbz_kmesh,fbz_kmesh,nface,face)
 
       if(printable .and. printlevel .ge. 0) &
-      &  write(output,'(1x,"number of k-points in full BZ        = ",i5)') nfbz_kmesh
+      &  write(output,'(1x,"number of k-points in full BZ        = ",i8)') nfbz_kmesh
 
       ! debug
       !do i=1,nfbz_kmesh
@@ -2272,7 +2272,8 @@ contains
       integer :: i,j,n
       integer :: nequiv(nkpmax)
       real(kind=DP) :: g(3),q(3)
-      real(kind=DP), parameter :: eps = 1.d-3
+!      real(kind=DP), parameter :: eps = 1.d-3
+      real(kind=DP), parameter :: eps = 1.d-5
       logical :: not_included(nfbz_kmesh)
 
       not_included(1:nfbz_kmesh) = .true.
@@ -2334,7 +2335,8 @@ contains
       integer :: i,j,n
       integer :: nequiv(nkpmax)
       real(kind=DP) :: g(3), q(3), vec_tmp(3)
-      real(kind=DP), parameter :: eps = 1.d-3
+!      real(kind=DP), parameter :: eps = 1.d-3
+      real(kind=DP), parameter :: eps = 1.d-5
       logical :: not_included(nfbz_kmesh)
 
       not_included(1:nfbz_kmesh) = .true.
@@ -2436,7 +2438,7 @@ contains
 
 
       if(printable .and. printlevel .ge. 0) &
-      &  write(output,'(1x,"number of k-points in irreducible BZ = ",i5)') nkpoint
+      &  write(output,'(1x,"number of k-points in irreducible BZ = ",i8)') nkpoint
 
 
       call gen_rgrid(a1,a2,a3,rgrid)
@@ -2515,7 +2517,7 @@ contains
       if( n == nshell ) stop 'accuracy: n == nshell, accuracy check failure'
 
       if(printable .and. printlevel .ge. 0 ) then
-        write(output,'(1x,"Index of the first none zero shell = ",i5)') none_zero
+        write(output,'(1x,"Index of the first none zero shell = ",i8)') none_zero
         write(output,'(1x,"|R| of the first none zero shell   = ",f10.5)') length_nz
         write(output,'(1x,"phase sum = ",f10.5)') sm_nz
         write(output,'(1x,"efficiency = ",f8.2)') dble(none_zero)/dble(nkpoint)
@@ -2524,7 +2526,7 @@ contains
       if(printable .and. printlevel .ge. 1) then
         write(output,'(5x,4(1x,a10))') 'k1','k2','k3','weight'
         do i=1,nkpoint
-          write(output,'(i5,4(1x,f10.5))') i,kpoint(1:3,i),weight(i)
+          write(output,'(i8,4(1x,f10.5))') i,kpoint(1:3,i),weight(i)
         end do
       end if
 
@@ -2869,7 +2871,7 @@ contains
   subroutine m_Kp_set_kv3(nk)
     integer, intent(in) ::nk
     kv3 = nk
-    if(ipri_kp>=1) write(nfout,'(" !kp kv3 = ",i5," <<m_kp_set_kv3>>")') kv3
+    if(ipri_kp>=1) write(nfout,'(" !kp kv3 = ",i8," <<m_kp_set_kv3>>")') kv3
   end subroutine m_Kp_set_kv3
 
   subroutine m_Kp_set_mesh_super
diff -uprN phase0_2015.01/src_phase/m_Ldos.F90 phase0_2015.01.01/src_phase/m_Ldos.F90
--- phase0_2015.01/src_phase/m_Ldos.F90	2015-08-05 14:45:21.000000000 +0900
+++ phase0_2015.01.01/src_phase/m_Ldos.F90	2016-07-12 12:51:19.000000000 +0900
@@ -1,6 +1,6 @@
 !=======================================================================
 !
-!  PROGRAM  PHASE/0 2015.01 ($Rev: 443 $)
+!  PROGRAM  PHASE/0 2015.01 ($Rev: 471 $)
 !
 !  MODULE: m_Ldos
 !
@@ -31,7 +31,7 @@
 !   The activity of development of this program set has been supervised by Takahisa Ohno.
 !
 module m_Ldos
-! $Id: m_Ldos.F90 443 2015-08-03 13:03:48Z ktagami $
+! $Id: m_Ldos.F90 471 2015-11-13 01:14:05Z jkoga $
   use m_Const_Parameters, only   : DP, CMPLDP, REGULAR_INTERVALS, BY_ATOMIC_POSITIONS &
        &                         , DELTA10, EXECUT, ON, SOFTPART, HARDPART, DIRECT, PAI2 &
        &                         , ALDOS, LAYERDOS, NO, OFF, ELECTRON
@@ -72,7 +72,8 @@ module m_Ldos
        &                         , m_CD_restore_chgq &
        &                         , m_CD_map_chgq_to_fft_box &
        &                         , m_CD_set_ylm_enl_etc &
-       &                         , m_CD_dealloc_ylm_enl_etc 
+       &                         , m_CD_dealloc_ylm_enl_etc &
+       &                         , m_CD_keep_retrieve_hsr
 
 ! ============================== added by K. Tagami ============== 11.0
   use m_Control_Parameters,    only : noncol, ndim_magmom, ndim_spinor
@@ -648,6 +649,7 @@ contains
 
     denom = 1.d0/product(fft_box_size_WF(1:3,1))
 
+    call m_CD_keep_retrieve_hsr(.true.)
     if(ekmode == ON) then
        is = 1
        do ik = 1, kv3
@@ -856,6 +858,8 @@ contains
 
     end if
 
+    call m_CD_keep_retrieve_hsr(.false.)
+
     if(modnrm == EXECUT) call m_CD_dealloc_ylm_enl_etc()
     if(hardpart_subroutine/=2.and.sw_rspace_ldos==OFF) call m_CD_restore_chgq()
     if(allocated(meshwk)) deallocate(meshwk)
@@ -1557,6 +1561,7 @@ contains
     allocate( chgq_enl_kt(kgp,kimg,ndim_magmom) )
     chgq_enl_kt = 0.0d0
 
+    call m_CD_keep_retrieve_hsr(.true.)
     if (ekmode == ON) then
        do ik = 1, kv3, ndim_spinor
           call m_FFT_alloc_WF_work()
@@ -1811,6 +1816,8 @@ contains
 
     end if
 
+    call m_CD_keep_retrieve_hsr(.false.)
+
     if (modnrm == EXECUT) call m_CD_dealloc_ylm_enl_etc()
     if (hardpart_subroutine/=2.and.sw_rspace_ldos==OFF) call m_CD_restore_chgq()
 
diff -uprN phase0_2015.01/src_phase/m_OP_Moment.F90 phase0_2015.01.01/src_phase/m_OP_Moment.F90
--- phase0_2015.01/src_phase/m_OP_Moment.F90	2015-08-05 14:45:21.000000000 +0900
+++ phase0_2015.01.01/src_phase/m_OP_Moment.F90	2016-07-12 12:51:19.000000000 +0900
@@ -1,5 +1,5 @@
 module m_OP_Moment
-! $Id: m_OP_Moment.F90 440 2015-08-03 07:18:01Z ktagami $
+! $Id: m_OP_Moment.F90 479 2016-03-12 12:30:51Z ktagami $
 
 ! =========== Contributions ===================================
 !
@@ -159,6 +159,8 @@ contains
                 istmp = ( is1 -1 )*ndim_spinor + is2
                 Do m1=1, size1
                    Do m2=1, size1
+                      if ( m1 /= m2 ) cycle
+
                       spinmom(1) = spinmom(1) +dmmat_ssrep(m1,m2,istmp) &
                            &                  *PauliMatrix(2,is2,is1)
                       spinmom(2) = spinmom(2) +dmmat_ssrep(m1,m2,istmp) &
@@ -191,8 +193,10 @@ contains
 
       write(nfout,*) '!   ia   l   type          mx             my             mz  '
 !
-      write(nfout,'(I7,I4,3X,A5,3F15.8)') ia, l, "orb  ", orbmom(1), orbmom(2), orbmom(3)
-      write(nfout,'(14X,     A5,3F15.8)')     "spin ", spinmom(1), spinmom(2), spinmom(3)
+      write(nfout,'(I7,I4,3X,A5,3F15.8)') ia, l, &
+           &                              "spin ", spinmom(1), spinmom(2), spinmom(3)
+      write(nfout,'(14X,     A5,3F15.8)') &
+           &                              "orb  ", orbmom(1), orbmom(2), orbmom(3)
 !
       write(nfout,*) '! ------ '
 
@@ -742,7 +746,7 @@ contains
 
       Do it=1, ntyp
          call new_radr_and_wos(ista_k,it)
-         rad1 = rad_cov_default( iatomn(it) )
+         rad1 = rad_cov_default( nint(iatomn(it)) ) ! Revised according to a report from ASMS Co.ltd, 10 March 2016.
 
          do lmt1=1,ilmt(it)
             il1=ltp(lmt1,it); im1=mtp(lmt1,it); it1=taup(lmt1,it)
@@ -1210,29 +1214,13 @@ contains
     integer :: ja
     
     integer :: ixyz1, ixyz2, is_tmp
-    real(kind=DP) :: ctmp1, weight, ctmp2
+    real(kind=DP) :: ctmp1, weight, ctmp2, weight2, determinant
       
     allocate(hsr_tmp(natm,nlmt,nlmt,ndim_magmom)); hsr_tmp = 0.0d0
     allocate(hsi_tmp(natm,nlmt,nlmt,ndim_magmom)); hsi_tmp = 0.0d0
 !
     hsr_tmp = rho_ylm1_ylm2_r;  hsi_tmp = rho_ylm1_ylm2_i
 
-#if 0
-    Do ia=1, natm
-       it = ityp(ia)
-       do lmt1 = 1, ilmt(it)
-          do lmt2 = 1, ilmt(it)
-             do is_tmp=1, ndim_magmom
-                write(850+mype,*) ' ia lmt1 lmt2 istmp = ', ia, lmt1,lmt2,is_tmp
-                write(850+mype,*) 'A ', hsr_tmp(ia,lmt1,lmt2,is_tmp), hsr_tmp(ia,lmt2,lmt1,is_tmp)
-                write(850+mype,*) 'B ', hsi_tmp(ia,lmt1,lmt2,is_tmp), hsi_tmp(ia,lmt2,lmt1,is_tmp)
-             end do
-          end do
-       end do
-    end Do
-    close( 850+mype )
-#endif
-
     do ia=1,natm
        it=ityp(ia)
        do is =1, ndim_magmom
@@ -1254,6 +1242,13 @@ contains
           weight = 1.0d0
        endif
        
+       call calc_determinant( op(:,:,iopr), determinant )
+       if ( determinant > 0 ) then
+          weight2 = 1.0d0
+       else
+          weight2 = -1.0d0
+       endif
+
        do ia = 1, natm
           it = ityp(ia)
           ja=abs(ia2ia_symmtry_op_inv(ia,iopr))
@@ -1298,7 +1293,7 @@ contains
                                  crotylm_paw(m,jj,iopr,ia)
                             rho_ylm1_ylm2_i(ia,lmt1,lmt2,1) = &
                                  rho_ylm1_ylm2_i(ia,lmt1,lmt2,1) + &
-                                 weight * &
+                                 weight * weight2 * &
                                  hsi_tmp(ja,lmt3,lmt4,1)* &
                                  crotylm_paw(n,ii,iopr,ia)* &
                                  crotylm_paw(m,jj,iopr,ia)
@@ -1306,7 +1301,8 @@ contains
                             Do ixyz1=1, 3
                                Do ixyz2=1, 3
                                   ctmp1 = op(ixyz2, ixyz1, iopr) *weight
-                                  
+                                  ctmp2 = op(ixyz2, ixyz1, iopr) *weight2
+
                                   rho_ylm1_ylm2_r(ia,lmt1,lmt2,ixyz2+1) &
                                        & = rho_ylm1_ylm2_r(ia,lmt1,lmt2,ixyz2+1)  &
                                        &  + ctmp1 &
@@ -1315,7 +1311,7 @@ contains
                                        &    *crotylm_paw(m,jj,iopr,ia)
                                   rho_ylm1_ylm2_i(ia,lmt1,lmt2,ixyz2+1) &
                                        & = rho_ylm1_ylm2_i(ia,lmt1,lmt2,ixyz2+1)  &
-                                       &  + op(ixyz2, ixyz1, iopr) &
+                                       &  + ctmp2 &
                                        &    *hsi_tmp(ja,lmt3,lmt4,ixyz1+1) &
                                        &    *crotylm_paw(n,ii,iopr,ia)  &
                                        &    *crotylm_paw(m,jj,iopr,ia)
@@ -1335,22 +1331,6 @@ contains
       
     rho_ylm1_ylm2_r = rho_ylm1_ylm2_r/nopr; rho_ylm1_ylm2_i = rho_ylm1_ylm2_i/nopr; 
       
-#if 0
-    Do ia=1, natm
-       it = ityp(ia)
-       do lmt1 = 1, ilmt(it)
-          do lmt2 = 1, ilmt(it)
-             do is_tmp=1, ndim_magmom
-                write(860+mype,*) ' ia lmt1 lmt2 istmp = ', ia, lmt1,lmt2,is_tmp
-                write(860+mype,*) 'A ', rho_ylm1_ylm2_r(ia,lmt1,lmt2,is_tmp), rho_ylm1_ylm2_r(ia,lmt2,lmt1,is_tmp)
-                write(860+mype,*) 'B ', rho_ylm1_ylm2_i(ia,lmt1,lmt2,is_tmp), rho_ylm1_ylm2_i(ia,lmt2,lmt1,is_tmp)
-             end do
-          end do
-       end do
-    end Do
-    close( 860+mype )
-#endif
-
     do ia=1,natm
        it=ityp(ia)
        do is =1, ndim_magmom
@@ -1363,24 +1343,19 @@ contains
        end do
     end do
     
-#if 0
-    Do ia=1, natm
-       it = ityp(ia)
-       do lmt1 = 1, ilmt(it)
-          do lmt2 = 1, ilmt(it)
-             do is_tmp=1, ndim_magmom
-                write(870+mype,*) ' ia lmt1 lmt2 istmp = ', ia, lmt1,lmt2,is_tmp
-                write(870+mype,*) 'A ', rho_ylm1_ylm2_r(ia,lmt1,lmt2,is_tmp), rho_ylm1_ylm2_r(ia,lmt2,lmt1,is_tmp)
-                write(870+mype,*) 'B ', rho_ylm1_ylm2_i(ia,lmt1,lmt2,is_tmp), rho_ylm1_ylm2_i(ia,lmt2,lmt1,is_tmp)
-             end do
-          end do
-       end do
-    end Do
-    close( 870+mype )
-#endif
-
     deallocate(hsr_tmp);  deallocate(hsi_tmp)
       
+  contains
+    subroutine calc_determinant( a, determinant )
+      real(kind=DP), intent(in) :: a(3,3)
+      real(kind=DP), intent(out) :: determinant
+
+      determinant = a(1,1)*( a(2,2)*a(3,3) -a(2,3)*a(3,2) ) &
+           &       -a(1,2)*( a(2,1)*a(3,3) -a(2,3)*a(3,1) ) &
+           &       +a(1,3)*( a(2,1)*a(3,2) -a(2,2)*a(3,1) )
+
+    end subroutine calc_determinant
+
   end subroutine symmetrz_rho_ylm1_ylm2
 ! ============= 2014/08/26
 
diff -uprN phase0_2015.01/src_phase/m_Orbital_Population.F90 phase0_2015.01.01/src_phase/m_Orbital_Population.F90
--- phase0_2015.01/src_phase/m_Orbital_Population.F90	2015-08-05 14:45:21.000000000 +0900
+++ phase0_2015.01.01/src_phase/m_Orbital_Population.F90	2016-07-12 12:51:19.000000000 +0900
@@ -29,7 +29,7 @@
 !
 !
 module m_Orbital_Population
-! $Id: m_Orbital_Population.F90 416 2014-12-17 04:11:16Z jkoga $
+! $Id: m_Orbital_Population.F90 492 2016-05-31 03:06:04Z jkoga $
   use m_Const_Parameters,     only : DP,ON,ANEW,RENEW,SIMPLE,BROYD1,BROYD2,DFP,PULAY,OFF,UNIT_MATRIX
   use m_Files,                only : nfout, nfoccmat, m_Files_open_nfoccmat
   use m_IterationNumbers,     only : iteration_for_cmix
@@ -37,7 +37,9 @@ module m_Orbital_Population
      &                             , proj_attribute &
      &                             , proj_group, num_proj_elems, max_projs &
      &                             , iprihubbard &
-     &                             , hownew,nbxmix,istrbr,ipripulay, sw_force_simple_mixing_hub
+     &                             , hownew,nbxmix,istrbr,ipripulay, sw_force_simple_mixing_hub &
+     &                             , sw_metric_diff, alpha_pulay, sw_recomposing &
+     &                             , sw_mix_bothspins_sametime, sw_force_simple_mixing
   use m_Ionic_System,         only : natm,ntyp,ityp,iproj_group,zeta1
   use m_PseudoPotential,      only : prodphi,ilmt,ltp,mtp,taup,nlmt,ntau,nlmtt
   use m_Crystal_Structure,    only : op,nopr
@@ -141,6 +143,11 @@ module m_Orbital_Population
   integer, private :: sw_mix_imaginary_component = ON
 ! ===================================================================== 11.0
 
+  real(DP),private,allocatable,target,dimension(:,:,:,:) :: urec_l
+  real(DP),private,allocatable,dimension(:,:) :: d0_l,u_l,v_l
+  real(kind=DP), allocatable, dimension(:,:) :: ynorm
+  real(DP),private,allocatable,dimension(:) :: f_p !d(ista_kgpm:iend_kgpm)
+
   include 'mpif.h'
 
 contains
@@ -187,6 +194,56 @@ contains
     !!$write(nfout,*) 'nyymax=',nyymax
   end subroutine m_OP_set_i2lp_max2lp
 
+  subroutine m_OP_store_om()
+    omold = om
+  end subroutine m_OP_store_om
+
+  subroutine m_OP_om_diff()
+    integer :: is,ia,ja,it,ilmt1,ilmt2
+    integer :: l1,l2,m1,m2,t1,t2,m1r,m2r
+    integer :: iyy,iopr,mm1,mm2
+    integer :: ig,ip,i
+    
+    integer :: ilp
+    real(kind=DP) :: diff,sumdiff
+    integer :: icount
+    sumdiff = 0.d0
+    icount=0
+    do is=1,nspin,af+1
+
+       do ia=1,natm
+          ig = iproj_group(ia)
+          if(ig<1) cycle
+
+          do i=1,num_proj_elems(ig)
+             ip=proj_group(i,ig)
+             it = proj_attribute(ip)%ityp
+             ilp = proj_attribute(ip)%l+1
+
+             do ilmt1 = 1, ilmt(it)
+                l1 = ltp(ilmt1,it); m1 = mtp(ilmt1,it)
+                t1 = taup(ilmt1,it)
+                if ( l1 /= ilp ) cycle
+
+                do ilmt2 = 1, ilmt(it)
+                   l2 = ltp(ilmt2,it); m2 = mtp(ilmt2,it)
+                   t2 = taup(ilmt2,it)
+                   if( l2 /= ilp ) cycle
+
+                   diff = abs(omold(m1,m2,i,ia,is)-om(m1,m2,i,ia,is))
+                   sumdiff = sumdiff+diff
+                   icount = icount+1
+                end do
+             end do
+! =============================================================
+          end do
+          !!if(printable) write(nfout,'(a,i5,a,f20.12)') 'omdiff for atom ',ia,' : ',sumdiff/dble(icount)
+          sumdiff = 0.d0
+          icount = 0
+       end do
+    end do
+  end subroutine m_OP_om_diff
+
   subroutine m_OP_alloc
     allocate(i2lp(num_projectors))
     call m_OP_set_i2lp_max2lp
@@ -2348,36 +2405,430 @@ contains
     else
        nspin_m  = nspin/(af+1)
     endif
-! ====================================================================== 11.0
-    allocate(din(nsize_rho,nspin_m))
-    allocate(dout(nsize_rho,nspin_m))
-    allocate(urec(nsize_rho,nspin_m,nbxmix,2))
+! ================================================================= 11.0
+
+! =========================================== Modified by K. Tagami =========
+!    allocate(f_p(ista_kgpm:iend_kgpm)); call precon_4_mult(f_p) !-(m_CD)
+    allocate(f_p(1:nsize_rho)); f_p = 0
+! ============================================================================
+
+    allocate(din(1:nsize_rho,nspin_m))
+    allocate(dout(1:nsize_rho,nspin_m))
+    allocate(urec_l(1:nsize_rho,nspin_m,nbxmix,2))
     allocate(uuf_p(nbxmix,nspin_m))
     allocate(f(nbxmix,nbxmix,nspin_m))
     allocate(g_p(nbxmix,nspin_m))
     allocate(ncrspd(nbxmix))
+
+    allocate(ynorm(nbxmix,nspin_m));ynorm=1.d0
+! ======================================= Added by K. Tagami ===========
+    din = 0.0d0; dout = 0.0d0; urec_l = 0.0d0; uuf_p = 0.0d0; f = 0.0d0
+    g_p = 0.0d0;  ncrspd = 0
+! ======================================================================
   end subroutine mix_pulay_allocate
 
   subroutine mix_pulay_deallocate
+    if(allocated(f_p)) deallocate(f_p)
     if(allocated(din)) deallocate(din)
     if(allocated(dout)) deallocate(dout)
-    if(allocated(urec)) deallocate(urec)
+    if(allocated(urec_l)) deallocate(urec_l)
     if(allocated(uuf_p)) deallocate(uuf_p)
     if(allocated(f)) deallocate(f)
     if(allocated(g_p)) deallocate(g_p)
     if(allocated(ncrspd)) deallocate(ncrspd)
+    if (allocated(ynorm)) deallocate(ynorm)
   end subroutine mix_pulay_deallocate
 
   subroutine mix_pulay_alloc2
-    allocate(d0(nsize_rho,nspin_m))
+    allocate(d0_l(nsize_rho,nspin_m))
+    d0_l = 0.0d0
   end subroutine mix_pulay_alloc2
 
   subroutine mix_pulay_dealloc2
-    deallocate(d0)
+    deallocate(d0_l)
   end subroutine mix_pulay_dealloc2
 
   subroutine m_OP_mix_pulay(rmx)
-    real(kind=DP), intent(in) :: rmx
+    integer, parameter  :: iRho = 1, iResid = 2
+    real(DP),intent(in) :: rmx
+    integer   :: iter, mxiter
+    real(DP),pointer,dimension(:)  :: e_wk, f_wk, ww1, finv
+    integer, pointer,dimension(:)  :: ip
+! --> T. Yamasaki  03 Aug. 2009
+    real(kind=DP), allocatable, dimension(:):: rmxtrc ! d(nspin_m)
+!   real(kind=DP), allocatable, dimension(:,:,:) :: chgqstore_l, chgqostore_l
+! <--
+    real(kind=DP) :: rmxtt
+    integer   :: id_sname = -1
+    call tstatc0_begin('m_OP_mix_pulay ',id_sname,1)
+
+    if(previous_waymix /= PULAY) then
+       if(first) then
+          call create_map_func(.true.)
+          call alloc_rho
+          call create_map_func(.false.)
+          first = .false.
+       end if
+       call mix_dealloc_previous()
+       call mix_pulay_allocate()
+    end if
+
+    if ( noncol ) then
+       call map_om_to_rho_noncl(om,   om_aimag,   rho)
+       call map_om_to_rho_noncl(omold,omold_aimag,rhoo)
+    else
+       call map_om_to_rho(om,rho)
+       call map_om_to_rho(omold,rhoo)
+    endif
+
+    iter = iter_from_reset()                 !-(m_OP)
+
+    nspin_m  = ndim_magmom/(af+1)
+    allocate( rmxtrck(nspin_m) )
+    if ( noncol ) then
+       rmxtrck = rmx
+    else
+       if ( sw_recomposing_occmat == YES .and. af == 0 .and. nspin == 2 ) then
+          call alloc_rhostore_recomp( rmx, rmxtrck )
+       else
+          rmxtrck = rmx
+       endif
+    end if
+! ========================================================================= 11.0
+
+    if((iter-istrbr+1) <= 1) then
+! ===================================== modified by K. Tagami ======== 5.0
+!!       call simple_mix(rmx)                  !-(m_OP)
+       call simple_mix_kt( rmxtrck )                  !-(m_OP)
+! ==================================================================== 5.0
+    else
+       call mix_pulay_alloc2   !-(m_CD) d0_l,u_l, and w_l are allocated
+       call set_ncrspd_mxiter(nbxmix,iter-istrbr,mxiter) ! -> ncrspd, mxiter
+!!$       call mix_pulay_alloc3(nbxmix,iter-istrbr)   !-(c.h.) e_wk,f_wk,ww1,finv,ip
+       call mix_pulay_alloc3(mxiter)   !-(c.h.) e_wk,f_wk,ww1,finv,ip
+
+       call Resid_and_dd_into_urec(mxiter) !-(c.h.)
+       !                               dF ->urec_l; dd ->urec_l; d0_l,din,dout
+       call Ri_dot_Rj(mxiter)          !-(c.h.) <R(i)|R(j)>->f
+       call get_finv_lapack(nbxmix,mxiter,f)  !-(c.h.) f -> f^{-1}= <R(i)|R(j)>^{-1}
+
+       call Rj_dot_d(mxiter)           !-(c.h.) <R(j)|d>,(j=1,iter-istrb) -> uuf_p
+
+       call get_gmatrix(mxiter)        !-(c.h.) (f,uuf_p)->g
+       call renew_d_using_g(mxiter,rmxtrck)     !-(c.h.)
+
+       call mix_pulay_dealloc3                    !-(c.h.)
+       call mix_pulay_dealloc2                    !-(m_CD)
+    endif
+
+    deallocate(rmxtrck)
+
+    if ( noncol ) then
+       call map_rho_to_om_noncl( ommix,ommix_aimag,rho )
+    else
+       call map_rho_to_om( ommix,rho )
+    endif
+
+    previous_waymix = PULAY
+    call tstatc0_end(id_sname)
+  contains
+    subroutine mix_pulay_alloc3(m)
+      integer, intent(in) :: m
+      allocate(e_wk(m*m)); allocate(f_wk(m*m)); allocate(ww1(m)); allocate(finv(m*m))
+      allocate(ip(m))
+! ===================================== Added by K. Tagami ============
+      e_wk = 0; f_wk = 0; ww1 = 0; finv = 0; ip = 0
+! =====================================================================
+    end subroutine mix_pulay_alloc3
+
+    subroutine set_ncrspd_mxiter(n,iter,m)
+      integer, intent(in)  :: n, iter
+      integer, intent(out) :: m
+      integer :: i, nx
+      if(hownew == ANEW) then
+         m = iter
+!!$         ncrspd(:) = (/(i,i=1,m)/)
+         do i=1,iter
+            ncrspd(i) = i
+         end do
+      else ! hownew == RENEW
+         if(iter <= n) then
+            m = iter
+!!$            ncrspd(:) = (/(i,i=1,m)/)
+            do i=1,iter
+               ncrspd(i) = i
+            end do
+         else
+            m = n
+            nx = ncrspd(1)
+            do i = 1, m-1
+               ncrspd(i) = ncrspd(i+1)
+            end do
+            ncrspd(m) = nx
+         end if
+      end if
+    end subroutine set_ncrspd_mxiter
+
+    subroutine mix_pulay_dealloc3
+      deallocate(e_wk); deallocate(f_wk); deallocate(ww1); deallocate(finv)
+      deallocate(ip)
+    end subroutine mix_pulay_dealloc3
+
+    subroutine Resid_and_dd_into_urec(iter)
+      integer, intent(in) :: iter
+      integer             :: itc,itc0,itc1
+      integer :: i,j,k,imix
+      real(kind=DP) :: sum1,sum2
+      itc = ncrspd(iter)
+      urec_l(:,:,itc,iResid) = rho(:,:) - rhoo(:,:) - (dout(:,:) - din(:,:)) ! =dF(=delta F^i)
+      urec_l(:,:,itc,iRho  ) = rhoo(:,:) - din(:,:)                ! =dd
+      d0_l(:,:) = rho(:,:) - rhoo(:,:)
+      din(:,:)  = rhoo(:,:)
+      dout(:,:) = rho(:,:)
+      ynorm(itc,:)=0.d0 
+      do i=1,nspin_m
+         do k=1,nsize_rho
+            ynorm(itc,i) = ynorm(itc,i)+urec_l(k,i,itc,iResid)*urec_l(k,i,itc,iResid)
+         enddo
+      enddo
+      ynorm(itc,:) = 1.d0/sqrt(ynorm(itc,:))
+    end subroutine Resid_and_dd_into_urec
+
+    subroutine Ri_dot_Rj(n)
+      integer, intent(in) :: n
+      integer  :: it,jt,itc,jtc
+      real(DP) :: ff1(nspin_m),ff1tmp
+
+      do it = 1, n
+         itc = ncrspd(it)
+         do jt = it, n
+            jtc = ncrspd(jt)
+            if(sw_mix_bothspins_sametime==ON .and. nspin_m>1)then
+               call mult1s10_reduce_spin(urec_l,nbxmix,2,itc,iResid,urec_l,jtc,iResid,f_p,ff1tmp)   ! <delta F^i|delta F^j>
+               ff1(1)=ff1tmp;ff1(2)=ff1tmp
+            else
+               call mult1s10(urec_l,nbxmix,2,itc,iResid,urec_l,jtc,iResid,f_p,ff1)   ! <delta F^i|delta F^j>
+            endif
+
+            if ( noncol ) then
+               call mult1s10_reduce_spin( urec_l, nbxmix, 2, itc, iResid, &
+                    &                     urec_l, jtc, iResid, f_p, ff1tmp )
+                                                        ! <delta F^i|delta F^j>
+               ff1(:) = ff1tmp
+            endif
+            f(it,jt,1:nspin_m) = ff1(1:nspin_m)
+            if(jt /= it) f(jt,it,1:nspin_m) = f(it,jt,1:nspin_m)
+         end do
+      end do
+    end subroutine Ri_dot_Rj
+
+    subroutine Rj_dot_d(n)
+      integer, intent(in) :: n
+      integer  :: jt, jtc
+      real(DP) :: ff1(nspin_m),ff1tmp
+
+      do jt = 1, n
+         jtc = ncrspd(jt)
+         if(sw_mix_bothspins_sametime==ON .and. nspin_m>1)then
+            call mult1s5_reduce_spin(urec_l,nbxmix,2,jtc,iResid,d0_l,f_p,ff1tmp)
+            ff1(1) = ff1tmp;ff1(2)=ff1tmp
+         else
+            call mult1s5(urec_l,nbxmix,2,jtc,iResid,d0_l,f_p,ff1)
+         endif
+
+         if ( noncol ) then
+            call mult1s5_reduce_spin(urec_l,nbxmix,2,jtc,iResid,d0_l,f_p,ff1tmp)
+            ff1(:) = ff1tmp
+         endif
+
+         uuf_p(jt,1:nspin_m) = ff1(1:nspin_m)
+      end do
+    end subroutine Rj_dot_d
+
+    subroutine get_finv_lapack(m,n,f)
+      integer,intent(in)                             :: m,n
+      real(DP),intent(inout),dimension(m,m,nspin_m) :: f
+      real(DP), allocatable,dimension(:,:) :: fwork
+      integer :: is,inf,it,jt,kt,nnspin
+      real(DP) :: div,tmp
+      allocate(fwork(n,n))
+      nnspin = nspin
+      if(sw_mix_bothspins_sametime==ON .or. af==1) nnspin=1
+
+      if ( noncol ) then
+         nnspin = 1
+      end if
+
+      do is=1,nnspin
+         if(ipripulay >= 2) then
+            write(nfout,600) n,(('(',it,jt,')',f(it,jt,is),jt=1,n),it=1,n)
+600         format(//11x,"**input matrix**"/12x &
+                 & ,"horder=",I5/(2x,4(1x,1a,i2,",",i2,1a,e14.6)))
+         end if
+         fwork=0
+         do it=1,n
+            do jt=1,n
+               fwork(jt,it) = f(jt,it,is)*ynorm(jt,is)*ynorm(it,is)
+               if(it==jt) fwork(jt,it)=fwork(jt,it)+alpha_pulay
+            enddo
+         enddo
+         call dpotrf('U',n,fwork,n,inf)
+         call dpotri('U',n,fwork,n,inf)
+         do it=1,n-1
+            do jt=it+1,n
+               fwork(jt,it) = fwork(it,jt)
+            enddo
+         enddo
+         do it=1,n
+            do jt=1,n
+               f(jt,it,is) = fwork(jt,it)*ynorm(jt,is)*ynorm(it,is)
+            enddo
+         enddo
+         if(ipripulay >= 2) then
+            write(nfout,630) (('(',it,jt,')',f(it,jt,is),it=1,n),jt=1,n)
+630         format(/11x, "**inverse matrix**" &
+                 & ,/(2x,4(1x,1a,i2,",",i2,1a,e14.6)))
+         end if
+      enddo
+      if(sw_mix_bothspins_sametime==ON .and. nspin_m>1)then
+         do it=1,n
+            do jt=1,n
+               f(jt,it,2) = f(jt,it,1)
+            enddo
+         enddo
+      endif
+! ============================== added by K. Tagami ========== 11.0
+      if ( noncol ) then
+         do it=1,n
+            do jt=1,n
+               f(jt,it,:) = f(jt,it,1)
+            enddo
+         end do
+      endif
+! ============================================================ 11.0
+      deallocate(fwork)
+
+    end subroutine get_finv_lapack
+
+    subroutine get_finv(m,n,f)
+      integer,intent(in)                             :: m,n
+      real(DP),intent(inout),dimension(m,m,nspin_m) :: f
+
+      integer                        :: icount,is,jt,it,icon
+      real(DP)                       :: div
+
+      e_wk = 0.d0
+      do it = 1, n
+         e_wk(it*it) = 1.d0
+      end do
+
+! ======================================= modified by K. Tagami =========== 11.0
+!      do is = 1, nspin, af+1
+      do is = 1, ndim_magmom, af+1
+! ========================================================================== 11.0
+         div = 1.d0/f(1,1,is)
+         icount = 1
+         do jt = 1, n
+            do it = 1, n
+               f_wk(icount) = f(it,jt,is)*div
+               icount = icount + 1
+            end do
+         end do
+         if(ipripulay >= 1) then
+            write(nfout,600) n,(('(',it,jt,')',f(it,jt,is)*div,jt=1,n),it=1,n)
+600         format(//11x,"**input matrix**"/12x &
+                 & ,"horder=",I5/(2x,4(1x,1a,i2,",",i2,1a,e14.6)))
+         end if
+         call rdecomp(n,f_wk,ww1,ip,icon)
+         if(icon /= 0) then
+            stop 'LU decomposition is impossible.'
+         else
+            call rsolve(n,n,f_wk,e_wk,finv,ip)
+         endif
+
+         icount = 1
+         do jt = 1, n
+            do it = 1, n
+               f(it,jt,is) = finv(icount)
+               icount = icount + 1
+            end do
+         end do
+         if(ipripulay >= 1) then
+            write(nfout,630) (('(',it,jt,')',f(it,jt,is),it=1,n),jt=1,n)
+630         format(/11x, "**inverse matrix**" &
+                 & ,/(2x,4(1x,1a,i2,",",i2,1a,e14.6)))
+         end if
+      end do
+    end subroutine get_finv
+
+    subroutine get_gmatrix(n)
+      integer,intent(in) :: n
+      integer :: is, it, jt, nnspin
+      nnspin = nspin
+      if(sw_mix_bothspins_sametime==ON .or. af==1) nnspin=1
+
+! ============================ added by K. Tagami ============= 11.0
+      if ( noncol ) nnspin = 1
+! ============================================================== 11.0      
+
+      g_p = 0.d0
+      do is = 1, nnspin
+         do it = 1, n
+            do jt = 1, n
+               g_p(it,is) = g_p(it,is) - f(jt,it,is)*uuf_p(jt,is)
+            end do
+         end do
+         if(ipripulay >= 2) then
+            write(nfout,'(" -- g_p(1:",i3,") --")') n
+            write(nfout,'(8f20.12)') (g_p(it,is),it=1,n)
+         end if
+      end do
+      if(sw_mix_bothspins_sametime==ON .and. nspin_m>1)then
+         do it = 1,n
+            g_p(it,2) = g_p(it,1)
+         enddo
+      endif
+! ============================== added by K. Tagami ============ 11.0
+      if ( noncol ) then
+         do it = 1,n
+            g_p(it,:) = g_p(it,1)
+         enddo
+      endif
+! ============================================================== 11.0
+
+    end subroutine get_gmatrix
+
+    subroutine renew_d_using_g(n,p)
+      integer, intent(in)                                :: n
+      real(DP),intent(in),dimension(nspin_m) :: p
+      integer    :: is, k, i, it, itc, ns
+
+!!$      do is = 1, nspin, af+1
+      ns = nspin_for_qnewton()
+      do is = 1, ns,af+1
+         do i = 1,nsize_rho
+            rho(i,is)  = rhoo(i,is) + p(is)*d0_l(i,is)
+         end do
+         do it = 1, n
+            itc = ncrspd(it)
+            do i = 1,nsize_rho
+               rho(i,is) = rho(i,is) + g_p(it,is)* &
+                    &        (urec_l(i,is,itc,iRho) + p(is)*urec_l(i,is,itc,iResid))
+            end do
+         end do
+      end do
+
+    end subroutine renew_d_using_g
+
+    integer function nspin_for_qnewton()
+      if ( noncol ) then
+         nspin_for_qnewton=ndim_magmom
+      else
+         nspin_for_qnewton=nspin
+         if (sw_force_simple_mixing==ON .and. sw_recomposing==ON) nspin_for_qnewton=1
+      endif
+    end function nspin_for_qnewton
+
   end subroutine m_OP_mix_pulay
 
   subroutine create_map_func(paramset)
@@ -2627,6 +3078,13 @@ contains
 ! =========================================================== 11.0
   end subroutine m_OP_cp_ommix_to_omold
 
+  subroutine m_OP_cp_ommix_to_om
+    om = ommix
+! ========================= added by K. Tagami ============== 11.0
+    if ( noncol ) om_aimag = ommix_aimag
+! =========================================================== 11.0
+  end subroutine m_OP_cp_ommix_to_om
+
   subroutine m_OP_simple_mixing(nfout,rmxt)
     integer, intent(in) :: nfout
     real(kind=DP), intent(in) :: rmxt
@@ -2721,6 +3179,119 @@ contains
 
   end subroutine alloc_rhostore_recomp
 
+  subroutine mult1s5(u,mb,muv,j,iuv,v,f_q,fmult)
+    integer,intent(in) :: mb,muv,j,iuv
+    real(DP),intent(in), dimension(1:nsize_rho,nspin_m,mb,muv) :: u
+    real(DP),intent(in), dimension(1:nsize_rho,nspin_m) :: v
+    real(DP),intent(in), dimension(1:nsize_rho) :: f_q
+    real(DP),intent(out),dimension(nspin_m)            :: fmult
+
+    real(DP) :: p,  fac
+    integer  :: is,i
+
+    fmult = 0.d0
+    do is = 1, ndim_magmom, af+1
+       p = 0.d0
+       fac=1.0d0
+       do i = 1,nsize_rho
+          if ( noncol ) then
+             fac=f_q(i)
+          else
+             if (is==1 .or. sw_recomposing==OFF .or. sw_metric_diff==ON) then
+                fac=f_q(i)
+             endif
+          end if
+          p = p + fac*u(i,is,j,iuv)*v(i,is)
+       end do
+       fmult(is) = p
+    enddo
+  end subroutine mult1s5
+
+  subroutine mult1s5_reduce_spin(u,mb,muv,j,iuv,v,f_q,fmult)
+    integer,intent(in) :: mb,muv,j,iuv
+    real(DP),intent(in), dimension(1:nsize_rho,nspin_m,mb,muv) :: u
+    real(DP),intent(in), dimension(1:nsize_rho,nspin_m) :: v
+    real(DP),intent(in), dimension(1:nsize_rho):: f_q
+    real(DP),intent(out)            :: fmult
+
+    real(DP) :: p,  fac
+    integer  :: is,i
+
+    fmult = 0.d0
+    p = 0.d0
+
+    do is = 1, ndim_magmom, af+1
+       fac = 1.0d0
+       do i = 1,nsize_rho
+          if ( noncol ) then
+             fac=f_q(i)
+          else
+             if (is==1 .or. sw_recomposing==OFF .or. sw_metric_diff==ON) then
+                fac=f_q(i)
+             endif
+          end if
+          p = p + fac*u(i,is,j,iuv)*v(i,is)
+       end do
+    enddo
+    fmult = p
+  end subroutine mult1s5_reduce_spin
+
+  subroutine mult1s10(u,mb,muv,i,iu,v,j,iv,f_q,fmult)
+    integer,intent(in) :: mb,muv,i,iu,j,iv
+    real(DP),intent(in), dimension(1:nsize_rho,nspin_m,mb,muv) :: u,v
+    real(DP),intent(in), dimension(1:nsize_rho):: f_q
+    real(DP),intent(out),dimension(nspin_m)            :: fmult
+
+    real(DP) :: p,  fac
+    integer  :: is,ig
+    fmult = 0.d0
+
+    do is = 1, ndim_magmom, af+1
+       p = 0.d0
+       fac = 1.0d0
+       do ig = 1,nsize_rho
+          if ( noncol ) then
+             fac=f_q(ig)
+          else
+             if (is==1 .or. sw_recomposing==OFF .or. sw_metric_diff==ON) then
+                fac=f_q(ig)
+             endif
+          end if
+          p = p + fac*u(ig,is,i,iu)*v(ig,is,j,iv)
+       end do
+       fmult(is) = p
+    enddo
+  end subroutine mult1s10
+
+  subroutine mult1s10_reduce_spin(u,mb,muv,i,iu,v,j,iv,f_q,fmult)
+    integer,intent(in) :: mb,muv,i,iu,j,iv
+    real(DP),intent(in), dimension(1:nsize_rho,nspin_m,mb,muv) :: u,v
+    real(DP),intent(in), dimension(1:nsize_rho):: f_q
+    real(DP),intent(out)            :: fmult
+
+    real(DP) :: p,  fac
+    integer  :: is,ig
+
+    fmult = 0.d0
+    p = 0.d0
+
+    do is = 1, ndim_magmom, af+1
+       fac = 1.0d0
+       do ig = 1,nsize_rho
+
+          if ( noncol ) then
+             fac=f_q(ig)
+          else
+             if (is==1 .or. sw_recomposing==OFF .or. sw_metric_diff==ON) then
+                fac=f_q(ig)
+             endif
+          end if
+          p = p + fac*u(ig,is,i,iu)*v(ig,is,j,iv)
+       end do
+    enddo
+    fmult = p
+  end subroutine mult1s10_reduce_spin
+
 ! ================================================================= 5.0
 
 end module m_Orbital_Population
diff -uprN phase0_2015.01/src_phase/m_PAW_ChargeDensity.F90 phase0_2015.01.01/src_phase/m_PAW_ChargeDensity.F90
--- phase0_2015.01/src_phase/m_PAW_ChargeDensity.F90	2015-08-05 14:45:21.000000000 +0900
+++ phase0_2015.01.01/src_phase/m_PAW_ChargeDensity.F90	2016-07-12 12:51:19.000000000 +0900
@@ -1,6 +1,6 @@
 !=======================================================================
 !
-!  PROGRAM  PHASE/0 2015.01 ($Rev: 440 $)
+!  PROGRAM  PHASE/0 2015.01 ($Rev: 488 $)
 !
 !  MODULE: m_PAW_ChargeDensity
 !
@@ -33,10 +33,10 @@
 !
 module m_PAW_ChargeDensity
   use m_db,                   only : getIntDB,getStringDB_TB,getIntDB_TB
-  use m_Const_Parameters,     only : DP,PAI2,PAI4,BUCS,SphericalHarmonicsExpansion, GaussLegendre, LOWER
+  use m_Const_Parameters,     only : DP,PAI2,PAI4,BUCS,SphericalHarmonicsExpansion, GaussLegendre, LOWER, Bohr
   use m_Control_Parameters,   only : kimg,nspin,af,ipripaw,ipriinputfile,printable
   use m_Files,                only : nfout
-  use m_Ionic_System,         only : ityp,natm,ntyp,pos,iwei, speciesname
+  use m_Ionic_System,         only : ityp,natm,ntyp,pos,iwei, speciesname, amion
   use m_Charge_Density,       only : hsr,chgq_l,chgsoft
   use m_PlaneWaveBasisSet,    only : igfp_l
   use m_PseudoPotential,      only : psirpw,phirpw,qrspspw &
@@ -45,7 +45,7 @@ module m_PAW_ChargeDensity
        &                            ,il2p,isph,dl2p,iqitg &
        &                            ,m_PP_find_maximum_l &
        &                            ,ipaw,wf_mnrc,flg_symmtry &
-       &                            ,mmesh
+       &                            ,mmesh, nmesh
   use m_FFT,                  only : fft_box_size_CD,fft_box_size_CD_c, nfftp &
        , m_FFT_CD_inverse0 &
        , m_FFT_check_of_negative_CD &
@@ -152,6 +152,8 @@ module m_PAW_ChargeDensity
   public:: calcGaussLegendreIntegration
   public:: calcSphericalHarmonicsExpansion
   
+  public :: m_PAWCD_calc_contact_density
+
 contains
 
   subroutine m_PAW_dealloc()
@@ -2298,7 +2300,7 @@ contains
   subroutine set_ia2ia_symmtry_op
     integer:: ia,it,no,ja,jt
     integer:: i,j,k
-    real(DP):: pos0(3),pos1(3),pos2(3)
+    real(DP):: pos0(3),pos1(3),pos2(3),pos3(3)
     real(DP):: distance
     
     allocate(op_pr(3,3,nopr+af))
@@ -2314,14 +2316,15 @@ contains
           pos1(:)=matmul(op_pr(:,:,no),pos0(:))+tau(:,no,BUCS)
 !print *,'pos1=',pos1
           pos1(:) = pos1(:) - floor(pos1(:))
-          KLoop: do k=-2,1
-             do j=-2,1
-                do i=-2,1
+          KLoop: do k=-1,1
+             do j=-1,1
+                do i=-1,1
                    do ja=1,natm
                       jt=ityp(ja)
                       if(it/=jt) cycle
-                      
-                      pos2(1:3)=pos(ja,1:3)+(/dble(i),dble(j),dble(k)/)
+                      pos3(1:3) = pos(ja,1:3) 
+                      pos3(:) = pos3(:) - floor(pos3(:))
+                      pos2(1:3)=pos3(1:3)+(/dble(i),dble(j),dble(k)/)
 !print *,'pos2=',pos2                             
                       distance=abs(pos1(1)-pos2(1))+abs(pos1(2)-pos2(2)) &
                            +abs(pos1(3)-pos2(3))
@@ -2340,7 +2343,7 @@ contains
                       end if
                         
                       if(kimg==1 .and. iwei(ja)==2) then
-                         pos2(1:3)=-pos(ja,1:3)+(/dble(i),dble(j),dble(k)/)
+                         pos2(1:3)=-pos3(1:3)+(/dble(i),dble(j),dble(k)/)
                          distance=abs(pos1(1)-pos2(1))+abs(pos1(2)-pos2(2)) &
                               +abs(pos1(3)-pos2(3))
                          if(distance < 1.d-5) then
@@ -3374,4 +3377,133 @@ contains
     return
   end subroutine m_PAWCD_set_sq_der_cd_sdphex2
   
+  subroutine m_PAWCD_calc_contact_density
+                                   ! AE hard part density at nuclear radius
+                                   ! assuming only s-orbital contributes
+
+    integer :: ia, it, is, ir_nucl
+    integer :: il1, il2, im1, im2, it1, it2, lmt1, lmt2
+    integer :: n, isp
+    real(kind=DP)  :: csum, ctmp, c1, c2, fac
+    real(kind=DP), allocatable :: contact_charge_density(:)
+
+    allocate( contact_charge_density(natm) ); contact_charge_density = 0.0d0
+
+#if 1
+    Do it=1, ntyp
+       call calc_nuclear_radius( it, ir_nucl )
+
+       Do ia=1, natm
+          if ( ityp(ia) /= it ) cycle
+
+          csum = 0.0d0
+
+          do lmt1=1,ilmt(it)
+             il1 = ltp(lmt1,it);  im1 = mtp(lmt1,it);   it1 = taup(lmt1,it)
+
+             do lmt2=lmt1,ilmt(it)
+                il2 = ltp(lmt2,it);   im2 = mtp(lmt2,it);    it2 = taup(lmt2,it)
+
+                c1 = psirpw( ir_nucl, il1, it1, it )
+                c2 = psirpw( ir_nucl, il2, it2, it )
+
+                fac=2.d0; if( lmt1.eq.lmt2 ) fac=1.d0
+
+                do n=1,il2p(lmt1,lmt2,it)
+                   isp = isph(lmt1,lmt2,n,it)
+                   if ( isp > 1 ) cycle
+
+                   if ( noncol ) then
+                      ctmp = hsr(ia,lmt1,lmt2,1)
+                   else
+                      ctmp = 0.0d0
+                      Do is=1, nspin
+                         ctmp = ctmp +hsr(ia,lmt1,lmt2,is)
+                      End Do
+                   endif
+
+                   csum = csum + fac *ctmp *dl2p(lmt1,lmt2,n,it) *c1 *c2 /sqrt(PAI4)
+                end do
+             end do
+          end do
+          csum = csum + rhcorpw(ir_nucl,it)/PAI4
+
+          contact_charge_density(ia) = csum /radr_paw( ir_nucl,it )**2
+       end do
+    end do
+
+#else
+    Do it=1, ntyp
+       call calc_nuclear_radius( it, ir_nucl )
+
+       Do ia=1, natm
+          if ( ityp(ia) /= it ) cycle
+
+          csum = 0.0d0
+
+          Do lmt1=1, ilmt(it)
+             il1 = ltp(lmt1,it);  im1 = mtp(lmt1,it);  it1 = taup(lmt1,it)
+             if ( il1 > 1 ) cycle
+
+             Do lmt2=1, ilmt(it)
+                il2 = ltp(lmt2,it);  im2 = mtp(lmt2,it);  it2 = taup(lmt2,it)
+                if ( il2 > 1 ) cycle
+
+                if ( noncol ) then
+                   ctmp = hsr(ia,lmt1,lmt2,1)
+                else
+                   ctmp = 0.0d0
+                   Do is=1, nspin
+                      ctmp = ctmp +hsr(ia,lmt1,lmt2,is)
+                   End Do
+                endif
+                c1 = psirpw( ir_nucl, il1, it1, it )
+                c2 = psirpw( ir_nucl, il2, it2, it )
+                !
+                ctmp = ctmp *c1 *c2 /sqrt(PAI4) + rhcorpw(ir_nucl,it)/PAI4
+                csum = csum +ctmp /radr_paw( ir_nucl,it )**2
+             End Do
+          End Do
+          contact_charge_density(ia) = csum
+       End Do
+    End Do
+#endif
+!
+    write(nfout,*)
+    write(nfout,*) '------------  MOSSB  ---------------'
+    write(nfout,*) '!** contact charge density at nuclear radius '
+    write(nfout,*) '!** ia,    density **'
+    Do ia=1, natm
+       write(nfout,'(I5,F25.12)') ia, contact_charge_density(ia)
+    End Do
+    write(nfout,*) '----------------------------------------'
+    write(nfout,*)
+
+    deallocate( contact_charge_density )
+
+  contains
+
+    subroutine calc_nuclear_radius( it, ir_nucl )
+      integer, intent(in) :: it
+      integer, intent(out):: ir_nucl
+
+      integer :: ir
+      real(kind=DP) :: r0, coeff, mass_no, r_nucl
+
+      r0 = 1.20d0 *1.0D-5/ Bohr                    ! in bohr
+
+      coeff = 1.66053d-27 /9.1093897d-31
+      mass_no = amion(it) /coeff
+
+      r_nucl = r0 *( mass_no )**(1.0d0/3.d0)
+
+      Do ir=1, nmesh(it)
+         if ( radr_paw(ir,it) > r_nucl ) exit
+      End Do
+      ir_nucl = ir
+    end subroutine calc_nuclear_radius
+
+  end subroutine m_PAWCD_calc_contact_density
+
+
 end module m_PAW_ChargeDensity
diff -uprN phase0_2015.01/src_phase/m_PAW_XC_Potential.F90 phase0_2015.01.01/src_phase/m_PAW_XC_Potential.F90
--- phase0_2015.01/src_phase/m_PAW_XC_Potential.F90	2015-08-05 14:45:21.000000000 +0900
+++ phase0_2015.01.01/src_phase/m_PAW_XC_Potential.F90	2016-07-12 12:51:19.000000000 +0900
@@ -7,7 +7,7 @@
 #endif
 !=======================================================================
 !
-!  SOFTWARE NAME : PHASE ($Revision: 416 $)
+!  SOFTWARE NAME : PHASE ($Revision: 494 $)
 !
 !  MODULE: m_PAW_XC_Potential
 !
@@ -661,7 +661,7 @@ contains
         dnps_dph = 0.d0
         ddnps_ddr = 0.d0
                
-        if(xctype == 'ldapw91' .or. xctype == 'ldapbe ') then
+        if(xctype == 'ldapw91' .or. xctype == 'ldapbe '.or. xctype == 'vdwdf' ) then
             grad_nae=0.d0;grad_tnae=0.d0
             grad_nps=0.d0;grad_tnps=0.d0
         else
@@ -692,7 +692,7 @@ contains
                                 ,grad_tnps(1:nrc),wos(1:nrc),texc &
                                 ,dF_dnps(1:nrc,1:nspin))
          exc_ps=exc_ps+texc*omg_wght(ith)
-      else if(xctype == 'ggapbe ' .or. xctype == 'ldapbe ') then
+      else if(xctype == 'ggapbe ' .or. xctype == 'ldapbe '.or. xctype == 'vdwdf') then
          call ex_ggapbe (nspin,nspin,1,nrc,nae(1:nrc,1:nspin) &
                                     ,grad_nae(1:nrc,1:nspin) &
                                     ,wos(1:nrc),texc &
@@ -2008,7 +2008,8 @@ START_TIMER('m_PAW_XC_cal_potential_sphe
                         dFc_dab_ps(ir)*nanb_ps_sph(ir,1)   
                 end if
 
-                if(check_of_xctype()==GGA .and. xctype /= 'ldapw91' .and. xctype /= 'ldapbe ') then 
+                if(check_of_xctype()==GGA .and. xctype /= 'ldapw91' .and. xctype /= 'ldapbe ' &
+                & .and. xctype /= 'vdwdf') then 
                     if(dabs(grad_nae(ir,1)) < DELTA10) cycle
                     if(dabs(grad_tnae(ir)) < DELTA10) cycle
                     if(dabs(grad_nps(ir,1)) < DELTA10) cycle
@@ -2056,7 +2057,8 @@ START_TIMER('m_PAW_XC_cal_potential_sphe
             if(vflag == VXC_AND_EXC) then
             
                 if(check_of_xctype()==GGA   .and. xctype /= 'ldapw91' &
-                                            .and. xctype /= 'ldapbe ') then
+                                            .and. xctype /= 'ldapbe ' &
+                                            .and. xctype /= 'vdwdf' ) then
                     do ir=1,nrc,dnr
                         if(dabs(grad_nae(ir,1)) > 1.d-9) then
                             iga_ae = 1.d0/grad_nae(ir,1)
@@ -2181,7 +2183,8 @@ START_TIMER('m_PAW_XC_cal_potential_sphe
 !                dFxcdnb_ae_sph=0.d0
 !                dFxcdnb_ps_sph=0.d0
                 if(check_of_xctype()==GGA   .and. xctype /= 'ldapw91' &
-                                            .and. xctype /= 'ldapbe ') then
+                                            .and. xctype /= 'ldapbe ' &
+                                            .and. xctype /= 'vdwdf' ) then
                     num_isph_2tm=num_isph_g_g
                     isph_2tm=isph_g_g
                 else
@@ -2214,7 +2217,8 @@ START_TIMER('m_PAW_XC_cal_potential_sphe
                         end if
                         
                         if(check_of_xctype()==GGA   .and. xctype /= 'ldapw91' &
-                                                    .and. xctype /= 'ldapbe ') then 
+                                                    .and. xctype /= 'ldapbe ' &
+                                                    .and. xctype /= 'vdwdf' ) then 
                             sum1 = sum1 + &
                                 0.5d0*dFadgaga_ae(ir)*gaga_ae_sph(ir,nsph) + &
                                 0.5d0*dFadgg_ae(ir)*gg_ae_sph(ir,nsph) + &
@@ -2317,7 +2321,8 @@ START_TIMER('m_PAW_XC_cal_potential_sphe
                         end if
                         
                         if(check_of_xctype()==GGA   .and. xctype /= 'ldapw91' &
-                                                    .and. xctype /= 'ldapbe ') then
+                                                    .and. xctype /= 'ldapbe ' &
+                                                    .and. xctype /= 'vdwdf' ) then
                             sum1 = sum1 + &
                                 0.5d0*dGadgaga_ae(ir)*gaga_ae_sph(ir,nsph) + &
                                 dGadnaga_ae(ir)*naga_ae_sph(ir,nsph) 
@@ -3096,7 +3101,7 @@ contains
     !        dnps_dph = 0.d0
     !        ddnps_ddr = 0.d0
                    
-            if(xctype == 'ldapw91' .or. xctype == 'ldapbe ') then
+            if(xctype == 'ldapw91' .or. xctype == 'ldapbe ' .or. xctype == 'vdwdf') then
                 grad_nae2_sph=0.d0;grad_tnae2_sph=0.d0
                 grad_nps2_sph=0.d0;grad_tnps2_sph=0.d0
                 grad_nae=0.d0;grad_tnae=0.d0
@@ -3180,7 +3185,7 @@ contains
                                     ,dFc_dagg_ps(1:nrc) &
                                     ,dFc_dbgg_ps(1:nrc) &
                                     ,dFc_dabg_ps(1:nrc))
-          else if(xctype == 'ggapbe ' .or. xctype == 'ldapbe ') then
+          else if(xctype == 'ggapbe ' .or. xctype == 'ldapbe '.or. xctype == 'vdwdf') then
               call ex_ggapbe_paw_drv2(nrc,dnr,nspin &
                                     ,nae_sph(1:nrc,1:nspin,1) &
                                     ,grad_nae(1:nrc,1:nspin) &
diff -uprN phase0_2015.01/src_phase/m_Parallelization.F90 phase0_2015.01.01/src_phase/m_Parallelization.F90
--- phase0_2015.01/src_phase/m_Parallelization.F90	2015-08-05 14:45:21.000000000 +0900
+++ phase0_2015.01.01/src_phase/m_Parallelization.F90	2016-07-12 12:51:19.000000000 +0900
@@ -3,7 +3,7 @@
 #endif
 !=======================================================================
 !
-!  PROGRAM  PHASE/0 2015.01 ($Rev: 440 $)
+!  PROGRAM  PHASE/0 2015.01 ($Rev: 482 $)
 !
 !  MODULE: m_Parallelization
 !
@@ -65,7 +65,7 @@
 
 module m_Parallelization
 !   (m_Parallel)
-! $Id: m_Parallelization.F90 440 2015-08-03 07:18:01Z ktagami $
+! $Id: m_Parallelization.F90 482 2016-04-08 08:40:45Z jkoga $
   use m_Const_Parameters, only       : ON, OFF, tag_npes_etc
   use m_ErrorMessages
 
@@ -106,6 +106,8 @@ module m_Parallelization
   integer, allocatable, dimension(:) :: mpi_e_world  ! kd(0:nrank_k-1)
   integer                            :: ista_kngp, iend_kngp, np_kngp, mp_kngp
   integer, allocatable, dimension(:) :: is_kngp, ie_kngp, nel_kngp
+  integer                            :: ista_kngp_exx, iend_kngp_exx, np_kngp_exx, mp_kngp_exx
+  integer, allocatable, dimension(:) :: is_kngp_exx, ie_kngp_exx, nel_kngp_exx
 ! natm for fxyzew_l
   integer                            :: ista_atm, iend_atm, np_atm, mp_atm
   integer, allocatable, dimension(:) :: is_atm, ie_atm, nel_atm
@@ -117,6 +119,12 @@ module m_Parallelization
   integer                            :: ista_nn,iend_nn,np_nn,mp_nn
   integer, allocatable, dimension(:) :: is_nn, ie_nn, nel_nn
 
+  integer                            :: ista_nq, iend_nq, np_nq, mp_nq
+  integer, allocatable, dimension(:) :: is_nq, ie_nq, nel_nq, map_nq, map_z_nq
+
+!  integer                            :: ista_nval,iend_nval,np_nval,mp_nval
+!  integer, allocatable, dimension(:) :: is_nval, ie_nval, nel_nval
+
 ! natm2 for s_ew in m_Ionic_System
   integer                            :: ista_atm2, iend_atm2, np_atm2, mp_atm2
   integer, allocatable, dimension(:) :: is_atm2, ie_atm2, nel_atm2
@@ -172,6 +180,7 @@ module m_Parallelization
   integer,save                       :: mype_conf = 0
   integer                            :: nrank_conf
 
+
 ! for nfft
   integer                            :: ista_ffth, iend_ffth, np_ffth, mp_ffth
   integer, allocatable, dimension(:) :: is_ffth, ie_ffth, nel_ffth
@@ -1803,6 +1812,31 @@ contains
     enddo
   end subroutine m_Parallel_init_mpi_rspace_aug
 
+  subroutine m_Parallel_init_mpi_nq(nfout,ipri,printable,nq)
+    integer, intent(in) :: nfout, ipri,nq
+    logical, intent(in) :: printable
+    integer :: i,j,ip
+
+    allocate(is_nq(0:npes-1));ista_nq=nq+1
+    allocate(ie_nq(0:npes-1));iend_nq=0
+    allocate(nel_nq(0:npes-1));nel_nq=0
+    allocate(map_nq(nq));map_nq=0
+    allocate(map_z_nq(nq));map_z_nq=0
+
+    call set_block_range(nq,npes,nel_nq,is_nq,ie_nq,.true.,map_nq)
+    ista_nq = is_nq(mype)
+    iend_nq = ie_nq(mype)
+    np_nq = nel_nq(mype)
+    mp_nq = maxval(nel_nq)
+    j = 0
+    do ip = 1, npes
+       do i = 1, nel_nq(ip-1)
+          j = j + 1
+          map_z_nq(j) = i
+       end do
+    end do
+  end subroutine m_Parallel_init_mpi_nq
+
   subroutine m_Parallel_init_mpi_atm(nfout,ipri,printable,natm)
     integer, intent(in) :: nfout,ipri,natm
     logical, intent(in) :: printable
diff -uprN phase0_2015.01/src_phase/m_Phonon.F90 phase0_2015.01.01/src_phase/m_Phonon.F90
--- phase0_2015.01/src_phase/m_Phonon.F90	2015-09-15 12:16:16.000000000 +0900
+++ phase0_2015.01.01/src_phase/m_Phonon.F90	2016-07-12 12:51:19.000000000 +0900
@@ -1,6 +1,6 @@
 !=======================================================================
 !
-!  SOFTWARE NAME : PHASE ($Revision: 409 $)
+!  SOFTWARE NAME : PHASE ($Revision: 460 $)
 !
 !  MODULE: m_Phonon
 !
@@ -32,7 +32,7 @@
 !
 !
 module m_Phonon
-! $Id: m_Phonon.F90 409 2014-10-27 09:24:52Z jkoga $
+! $Id: m_Phonon.F90 460 2015-09-15 02:53:17Z jkoga $
 use m_Const_parameters,   only : DP, ON, OFF, NOCONV, FMAXVALLEN, PAI, PAI2, PAI4 &
                          &     , PHONON_GAMMA, PHONON_BAND, PHONON_DOS &
                          &     , LOWER, CARTS, UNIT_PIEZO_CONST &
diff -uprN phase0_2015.01/src_phase/m_PlaneWaveBasisSet.F90 phase0_2015.01.01/src_phase/m_PlaneWaveBasisSet.F90
--- phase0_2015.01/src_phase/m_PlaneWaveBasisSet.F90	2015-08-05 14:45:21.000000000 +0900
+++ phase0_2015.01.01/src_phase/m_PlaneWaveBasisSet.F90	2016-07-12 12:51:19.000000000 +0900
@@ -1,6 +1,6 @@
 !=======================================================================
 !
-!  PROGRAM  PHASE/0 2015.01 ($Rev: 440 $)
+!  PROGRAM  PHASE/0 2015.01 ($Rev: 481 $)
 !
 !  MODULE:  m_PlaneWaveBasisSet
 !
@@ -34,9 +34,41 @@
 !
 !   Revised for the GAMMA point (k=(0,0,0)) by T. Yamasaki, April 2006.
 !
+
+#ifdef __TIMER_SUB__
+#   define __TIMER_SUB_START(a)  call timer_sta(a)
+#   define __TIMER_SUB_STOP(a)   call timer_end(a)
+#else
+#   define __TIMER_SUB_START(a)
+#   define __TIMER_SUB_STOP(a)
+#endif
+#ifdef __TIMER_DO__
+#   define __TIMER_DO_START(a)   call timer_sta(a)
+#   define __TIMER_DO_STOP(a)    call timer_end(a)
+#else
+#   define __TIMER_DO_START(a)
+#   define __TIMER_DO_STOP(a)
+#endif
+#ifdef FJ_TIMER
+#   define __TIMER_FJ_START_w_BARRIER(str,a)   call mpi_barrier(str,ierr) ;   call timer_sta(a)
+#   define __TIMER_FJ_START(a)   call timer_sta(a)
+#   define __TIMER_FJ_STOP(a)    call timer_end(a)
+#else
+#   define __TIMER_FJ_START_w_BARRIER(str,a)
+#   define __TIMER_FJ_START(a)
+#   define __TIMER_FJ_STOP(a)
+#endif
+#ifdef __TIMER_INIDO__
+#   define __TIMER_INIDO_START(a)   call timer_sta(a)
+#   define __TIMER_INIDO_STOP(a)    call timer_end(a)
+#else
+#   define __TIMER_INIDO_START(a)
+#   define __TIMER_INIDO_STOP(a)
+#endif
+
 module m_PlaneWaveBasisSet
 !    ( m_pwBS )
-! $Id: m_PlaneWaveBasisSet.F90 440 2015-08-03 07:18:01Z ktagami $
+! $Id: m_PlaneWaveBasisSet.F90 481 2016-03-25 02:51:57Z jkoga $
   use m_Crystal_Structure,  only : nopr, altv,rltv, m_CS_op_in_PUCV
   use m_Kpoints,            only : kv3,kv3_ek,vkxyz,qwgt, k_symmetry
   use m_FFT,                only : fft_box_size_WF, fft_box_size_CD, fft_box_size_pWF &
@@ -101,6 +133,11 @@ module m_PlaneWaveBasisSet
   integer :: kg_tfw = 0
 ! ========================== 13.0U2
 
+! ===== EXP_CELLOPT ==== 2015/09/24
+  integer :: kg1_prev = 0
+  integer :: kgp_prev = 0
+! ====================== 2015/09/24
+
   integer, dimension(3)                 :: n_rGv
   integer, dimension(3)                 :: n_rGpv
   integer, dimension(3)                 :: n_rGpv_reduced
@@ -465,10 +502,7 @@ contains
     integer, parameter :: CRITICAL_VECTOR_LENGTH = 10000
 
     integer          :: id_sname = -1
-#ifdef __TIMER_SUB__
-  call timer_sta(1221)
-#endif
-
+                                                     __TIMER_SUB_START(1221)
     call tstatc0_begin('m_pwBS_for_each_WF ',id_sname)
 
     if(ipri >= 2) then
@@ -778,9 +812,7 @@ contains
     end if
 
     call tstatc0_end(id_sname)
-#ifdef __TIMER_SUB__
-  call timer_end(1221)
-#endif
+                                                     __TIMER_SUB_STOP(1221)
   contains
     subroutine wd_ngshell_range
       integer :: jg, j,n1,n2,n3
@@ -858,9 +890,7 @@ contains
     kg1_exx = 0
 
     do j = 1, kg_exx
-       ga = ngabc(j,1)
-       gb = ngabc(j,2)
-       gc = ngabc(j,3)
+       ga = ngabc(j,1);    gb = ngabc(j,2);    gc = ngabc(j,3)
        grvv = dsqrt(ttr(1)*ga*ga + ttr(2)*gb*gb + ttr(3)*gc*gc &
             &     + ttr(4)*ga*gb + ttr(5)*gb*gc + ttr(6)*gc*ga )
        if(grvv <= gmax_exx) then
@@ -884,9 +914,7 @@ contains
     kg1p_exx = 0
 
     do j = 1, kgp_exx
-       ga = ngabc(j,1)
-       gb = ngabc(j,2)
-       gc = ngabc(j,3)
+       ga = ngabc(j,1);    gb = ngabc(j,2);    gc = ngabc(j,3)
        grvv = dsqrt(ttr(1)*ga*ga + ttr(2)*gb*gb + ttr(3)*gc*gc &
             &     + ttr(4)*ga*gb + ttr(5)*gb*gc + ttr(6)*gc*ga )
        if(grvv <= gmaxp_exx) then
@@ -2751,6 +2779,8 @@ contains
      if(allocated(nbase)) deallocate(nbase)
  
      if(allocated(ylm_l)) deallocate(ylm_l)
+
+     if(allocated(nbase_gamma)) deallocate(nbase_gamma)
  
 #ifdef _MPIFFT_
      if(allocated(igfp_l_c)) deallocate(igfp_l_c)
@@ -3190,4 +3220,14 @@ contains
   end subroutine m_pwBS_sphrp_exx
 
 
+! ==== EXP_CELLOPT === 2015/09/24
+  subroutine m_pwBS_store_prev_kg1_kgp
+    kg1_prev = kg1;   kgp_prev = kgp
+    if ( mype == 0 ) then
+       write(nfout,*) '** kg1_prev is ', kg1_prev
+       write(nfout,*) '** kgp_prev is ', kgp_prev
+    endif
+  end subroutine m_pwBS_store_prev_kg1_kgp
+! =================== 2015/09/24
+
 end module m_PlaneWaveBasisSet
diff -uprN phase0_2015.01/src_phase/m_Positron_Wave_Functions.F90 phase0_2015.01.01/src_phase/m_Positron_Wave_Functions.F90
--- phase0_2015.01/src_phase/m_Positron_Wave_Functions.F90	2015-08-05 14:45:21.000000000 +0900
+++ phase0_2015.01.01/src_phase/m_Positron_Wave_Functions.F90	2016-07-12 12:51:19.000000000 +0900
@@ -15,9 +15,9 @@
 !
 !=======================================================================
 module m_Positron_Wave_Functions
-! $Id: m_Positron_Wave_Functions.F90 417 2014-12-22 11:44:44Z yamasaki $
+! $Id: m_Positron_Wave_Functions.F90 472 2015-11-28 09:01:17Z ktagami $
   use m_Const_Parameters, only   : DP,POSITRON,OFF,ON,DIRECT,INVERSE, DELTAevdff, SD, MSD &
-       &                         , DENSITY_ONLY, VTK, CUBE
+       &                         , DENSITY_ONLY, VTK, CUBE, SmallestPositiveNumber
   use m_Control_Parameters, only : af, nspin, npeg,kimg,ipripositron &
        &                         , delta_pev, evaluation_pev_diff &
        &                         , num_extra_pev, sw_gga_p, sw_epsilon_ele, epsilon_ele &
@@ -37,7 +37,7 @@ module m_Positron_Wave_Functions
   use m_Crystal_Structure, only  : univol
   use m_PlaneWaveBasisSet, only  : kg_pwf, kg1_pwf, igf_pstrn, igfp_l, igfp_nonpara &
        &                         , m_pwBS_pstrn_kinetic_energies
-  use m_Electronic_Structure,only: vlhxc_l
+!!  use m_Electronic_Structure,only: vlhxc_l
   use m_Parallelization, only :    mype, npes, ista_kngp, iend_kngp &
        &                         , ista_sfftph, iend_sfftph, ierr, mpi_comm_group
   use m_FFT, only   :              nfft, nfftp, nfftp_nonpara, nfft_pstrn, fft_box_size_pWF &
@@ -51,7 +51,7 @@ module m_Positron_Wave_Functions
   use m_Crystal_Structure,  only : altv
   use m_Ionic_System,         only : ntyp,ityp,zfm3_l,natm2, iatomn &
        &                         , m_IS_pack_all_ions_in_uc
-  use m_epc_potential, only      : tchgr_l, grad_tchgr_l
+  use m_epc_potential, only      : tchgr_l, grad_tchgr_l, vlhxc_p
   use m_PseudoPotential, only    : rhchg_l, ival
 
   implicit none
@@ -60,6 +60,7 @@ module m_Positron_Wave_Functions
   real(kind=DP),allocatable,dimension(:,:,:) :: pzaj   ! positron wave functions d(kg1_pwf,npeg,kimg)
   real(kind=DP),allocatable,dimension(:,:,:) :: pzaj_old ! d(kg1_pwf,npeg,kimg)
   real(kind=DP),allocatable,dimension(:,:) ::   pchg_l ! positron charge in g-space, d(ista_kngp:iend_kngp,kimg)
+  real(kind=DP),allocatable,dimension(:,:) ::   pchgo_l ! positron charge in g-space, d(ista_kngp:iend_kngp,kimg)
   real(kind=DP),allocatable,dimension(:) ::     pchr_l ! positron charge in r-space, d(ista_sfftph:iend_sfftph)
   integer, allocatable, dimension(:) ::         npeordr, nprvf_ordr !d(npeg)
   real(kind=DP),allocatable,dimension(:) ::     pev, pev1   ! d(npeg)
@@ -107,6 +108,9 @@ contains
             end do
          end if
       end do
+#ifdef SINGLE_POSITRON
+      if ( nspin == 1 ) pchg_l = pchg_l /2.0d0
+#endif
     end subroutine substitute_pCD_for_pchg
 
   end subroutine m_pWF_construct_pcharge
@@ -165,12 +169,14 @@ contains
     allocate(pev1(1:npeg)); pev1 = 0.d0
     allocate(pevdff(3))
     allocate(pchg_l(ista_kngp:iend_kngp,kimg))
+    allocate(pchgo_l(ista_kngp:iend_kngp,kimg))
     allocate(pchr_l(ista_sfftph:iend_sfftph))
   end subroutine m_pWF_allocate_pzaj_etc
 
   subroutine m_pWF_deallocate_pzaj_etc()
     if(allocated(pchr_l)) deallocate(pchr_l)
     if(allocated(pchg_l)) deallocate(pchg_l)
+    if(allocated(pchgo_l)) deallocate(pchgo_l)
     if(allocated(pevdff)) deallocate(pevdff)
     if(allocated(pev)) deallocate(pev)
     if(allocated(pev1)) deallocate(pev1)
@@ -233,7 +239,7 @@ contains
     call m_pwBS_pstrn_kinetic_energies(ekin)
 !!$    do is = 1, nspin, af+1
     is = 1
-       if(isolver == MSD) call vlhxc_l_zero_term(vlhxc0,is)
+       if(isolver == MSD) call vlhxc_p_zero_term(vlhxc0,is)
        call Vlocal_in_Rspace(is,afft)
        if(isolver == SD) then
           do ib = 1, npeg
@@ -262,13 +268,13 @@ contains
     deallocate(p)
     call m_pWF_dealloc_afft_etc()
   contains
-    subroutine vlhxc_l_zero_term(vlhxc0,ispin)
+    subroutine vlhxc_p_zero_term(vlhxc0,ispin)
       real(kind=DP), intent(out) :: vlhxc0
       integer, intent(in)        :: ispin
       
-      if(mype == 0) vlhxc0 = vlhxc_l(1,1,ispin)
+      if(mype == 0) vlhxc0 = vlhxc_p(1,1,ispin)
       call mpi_bcast(vlhxc0,1,mpi_double_precision,0,mpi_comm_group,ierr)
-    end subroutine vlhxc_l_zero_term
+    end subroutine vlhxc_p_zero_term
   end subroutine m_pWF_renew_WF_by_SDorCG
 
   subroutine m_pWF_evolve_WFs_again(nfout,mode,dtim_old,dtim_new)
@@ -353,8 +359,12 @@ contains
           evr = pzaj(i,ib,1)
           devr = (ekin(i) - pev(ib))*evr + VlocalpW(i1)*denom
           wdi = ekin(i) + vlhxc0 - pev(ib)
-          fdexp = dexp( -p(i) * wdi * dtim)
-          pzaj(i,ib,1) = (fdexp - 1) * devr/wdi + evr
+          if (dabs(wdi) < SmallestPositiveNumber) then
+             pzaj(i,ib,1) = -p(i)*devr*dtim + evr
+          else
+             fdexp = dexp( -p(i) * wdi * dtim)
+             pzaj(i,ib,1) = (fdexp - 1) * devr/wdi + evr
+          endif
        end do
     else if(kimg==2) then
        do i = 1, kg1_pwf
@@ -364,9 +374,14 @@ contains
           devr = e1*evr+VlocalpW(2*i1-1)*denom
           devi = e1*evi+VlocalpW(2*i1  )*denom
           wdi  = ekin(i) + vlhxc0 - pev(ib)
-          fdexp = dexp( -p(i) * wdi * dtim)
-          pzaj(i,ib,1)    = (fdexp -1)*devr/wdi + evr
-          pzaj(i,ib,kimg) = (fdexp -1)*devi/wdi + evi
+          if (dabs(wdi) < SmallestPositiveNumber) then
+             pzaj(i,ib,1) = -p(i)*devr*dtim + evr
+             pzaj(i,ib,2) = -p(i)*devi*dtim + evi
+          else
+             fdexp = dexp( -p(i) * wdi * dtim)
+             pzaj(i,ib,1)    = (fdexp -1)*devr/wdi + evr
+             pzaj(i,ib,kimg) = (fdexp -1)*devi/wdi + evi
+          endif
        end do
     end if
   end subroutine modified_sd_p
@@ -382,7 +397,7 @@ contains
 
 !!$    do is = 1, nspin, af+1
     is = 1
-    call Vlocal_in_Rspace(is,afft)  ! vlhxc_l -> afft
+    call Vlocal_in_Rspace(is,afft)  ! vlhxc_p -> afft
     call evolve_pWFs_in_subspace ! (is,npeg,damp,ekin,afft,bfft)
 !!$    end do
     call m_pWF_dealloc_afft_etc()
@@ -846,7 +861,7 @@ contains
     call m_pwBS_pstrn_kinetic_energies(ekin)
 !!$    do is = 1, nspin, af+1
     is = 1
-       call Vlocal_in_Rspace(is,afft) ! vlhxc_l -> afft
+       call Vlocal_in_Rspace(is,afft) ! vlhxc_p -> afft
        if(ipripositron >= 3) then
           write(nfout,'(" afft <<m_pWF_energy_eigen_values>>")')
           write(nfout,'( 8f8.4)') (afft(ib),ib=1,100)
@@ -899,7 +914,7 @@ contains
     integer, intent(in) :: is
     real(kind=DP), intent(out), dimension(nfft_pstrn) :: afft
     integer :: i
-    call map_vlhxc_l_to_afft()
+    call map_vlhxc_p_to_afft()
     if(ipripositron >= 3) then
        write(nfout,'(" afft <<Vlocal_in_Rspace>>")')
        write(nfout,'( 8f8.4)') (afft(i),i=1,100)
@@ -907,7 +922,7 @@ contains
 !!$    call m_FFT_pWF(nfout,afft,INVERSE,OFF)  ! afft -> afft
     call m_FFT_WF(POSITRON,nfout,afft,INVERSE,OFF)  ! afft -> afft
   contains
-    subroutine map_vlhxc_l_to_afft()
+    subroutine map_vlhxc_p_to_afft()
       integer :: i,i1,ri, iend
       if(npes >= 2)  allocate(afft_mpi(nfft_pstrn))
       afft = 0.d0
@@ -916,14 +931,14 @@ contains
       do ri = 1, kimg
          do i = ista_kngp, iend
             i1 = kimg*igf_pstrn(i) + (ri-kimg)
-            afft(i1) = vlhxc_l(i,ri,is)
+            afft(i1) = vlhxc_p(i,ri,is)
          end do
       end do
       if(ipripositron >= 3) then
-         write(nfout,'(" -- vlhxc_l <<map_vlhxc_l_to_afft>>")')
+         write(nfout,'(" -- vlhxc_p <<map_vlhxc_p_to_afft>>")')
          do ri = 1, kimg
             write(nfout,'(" kimg = ", i8)') kimg
-            write(nfout,'(10f8.4)') (vlhxc_l(i,ri,is),i=ista_kngp,iend)
+            write(nfout,'(10f8.4)') (vlhxc_p(i,ri,is),i=ista_kngp,iend)
          end do
       end if
       if(npes >= 2) then
@@ -932,7 +947,7 @@ contains
          afft = afft_mpi
       end if
       if(npes >= 2) deallocate(afft_mpi)
-    end subroutine map_vlhxc_l_to_afft
+    end subroutine map_vlhxc_p_to_afft
 
   end subroutine Vlocal_in_Rspace
 
@@ -1041,6 +1056,10 @@ contains
 !    core_annihilation_rate=sss/dsqrt(4.d0*3.1415926d0)
      core_annihilation_rate=sss/2.d0*univol
 
+#ifdef SINGLE_POSITRON
+     core_annihilation_rate = sss *univol
+#endif
+
   end subroutine m_pWF_core_annihilation
 
   subroutine m_pWF_valence_annihilation()
@@ -1386,6 +1405,11 @@ contains
 
      if(nspin==1) then
        valence_annihilation_rate=sss/2.d0
+
+#ifdef SINGLE_POSITRON
+       valence_annihilation_rate = sss
+#endif
+
        ssk=((0.5292d0)**3)*1.d4/(2.8d0**2)/3.d0/3.1415926d0
        ssk=ssk
        sss=core_annihilation_rate+valence_annihilation_rate
@@ -1552,6 +1576,11 @@ subroutine m_pWF_wlifetime()
   end if
 end subroutine m_pWF_wlifetime
 
+  subroutine m_pWF_update_lifetime
+    if(ipripositron >= 1) write(nfout,*)'lifetime: ',p_old_lifetime,p_new_lifetime
+    p_old_lifetime=p_new_lifetime
+  end subroutine m_pWF_update_lifetime
+
   subroutine m_pWF_wd_pzaj(nfout,comment,nc)
     integer,        intent(in) :: nfout, nc
     character(len=nc), intent(in) :: comment
diff -uprN phase0_2015.01/src_phase/m_PseudoPotential.F90 phase0_2015.01.01/src_phase/m_PseudoPotential.F90
--- phase0_2015.01/src_phase/m_PseudoPotential.F90	2015-09-14 15:18:42.000000000 +0900
+++ phase0_2015.01.01/src_phase/m_PseudoPotential.F90	2016-07-12 12:51:19.000000000 +0900
@@ -3,7 +3,7 @@
 #define _PAW_CONTINUE_DATA_PREVIOUS_BEFORE_201403_STYLE_
 !=======================================================================
 !
-!  PROGRAM  PHASE/0 2015.01 ($Rev: 454 $)
+!  PROGRAM  PHASE/0 2015.01 ($Rev: 461 $)
 !
 !  MODULE: m_PseudoPotential
 !
@@ -72,7 +72,7 @@
 
 module m_PseudoPotential
 !     (m_PP)
-!  $Id: m_PseudoPotential.F90 454 2015-09-07 07:58:39Z yamasaki $
+!  $Id: m_PseudoPotential.F90 461 2015-09-15 04:27:48Z ktagami $
 !
 !  The original subroutine name was "pspot", which had been coded by
 !  Y. Morikawa (JRCAT-NAIR) in 1993 or ealier.
@@ -2575,8 +2575,10 @@ contains
                 comment_statement = .false.
              endif
           enddo
+
           select case (fn_number_of_words(str))
-          case (9)
+!          case (9)
+          case (4,9)
              is_gncpp = pp_GNCPP1 ! = 1
              pptype_char = "GNCPP1"
 !!$             if(ipripp>=1 .and. .not.ppprinted) write(nfout,'(" !PP PP type --> GNCPP1")')
diff -uprN phase0_2015.01/src_phase/m_Raman.F90 phase0_2015.01.01/src_phase/m_Raman.F90
--- phase0_2015.01/src_phase/m_Raman.F90	2015-09-14 15:18:51.000000000 +0900
+++ phase0_2015.01.01/src_phase/m_Raman.F90	2016-07-12 12:51:19.000000000 +0900
@@ -16,7 +16,8 @@ module m_Raman
   use m_Control_Parameters, only : sw_lo_to_splitting, sw_phonon_oneshot, &
        &                           printable, ipriphonon, &
        &                           sw_phonon_with_epsilon, sw_calc_dielectric_tensor, &
-       &                           num_phonon_calc_mode, sw_excitation
+       &                           num_phonon_calc_mode, sw_excitation, &
+       &                           sw_phonon, sw_calc_force, sw_use_add_proj, sw_raman
 
   use m_Const_Parameters, only : DP, ON, OFF, FMAXVALLEN, LOWER, PAI4, CMPLDP
 
@@ -53,6 +54,8 @@ module m_Raman
   character(len("classical") ), private, parameter :: &
        &                     tag_classical = "classical"
 !
+  character(len("sw_raman") ), private, parameter :: &
+       &                    tag_sw_raman = "sw_raman"
   character(len("sw_phonon_with_epsilon") ), private, parameter :: &
        &                    tag_sw_phonon_with_epsilon = "sw_phonon_with_epsilon"
   character(len("sw_calc_dielectric_tensor") ), private, parameter :: &
@@ -208,12 +211,23 @@ contains
     logical :: tf
 
     if( f_selectBlock( tag_raman ) == 0) then
+! === 2015/10/19
+       if ( f_getIntValue( tag_sw_raman, iret) == 0 ) sw_raman = iret
+       if ( sw_raman == OFF ) return
+
+       if ( sw_phonon == ON ) sw_phonon_with_epsilon = on
+! === 2015/10/19
+
        if ( f_getIntValue( tag_sw_phonon_with_epsilon, iret) == 0) & 
             &  sw_phonon_with_epsilon = iret
        
        if ( sw_phonon_with_epsilon == ON ) then
           sw_excitation = ON
           write(nfout,*) "!** sw_excitation is turned on"
+! === 2015/10/19
+          sw_use_add_proj = ON
+          write(nfout,*) "!** sw_use_add_proj is turned on"
+! === 2015/10/19
        endif
 
        if ( f_getStringValue( tag_raman_calc_scheme, rstr, LOWER) == 0 ) then
@@ -221,6 +235,12 @@ contains
           if(tf) sw_phonon_with_epsilon = on
        endif
        
+! === 2015/10/19
+       if ( sw_phonon_with_epsilon == ON ) then
+          if ( sw_calc_force == ON ) sw_calc_dielectric_tensor = ON
+       endif
+! === 2015/10/19
+
        if ( f_getIntValue( tag_sw_calc_dielectric_tensor, iret) == 0) & 
             &  sw_calc_dielectric_tensor = iret
        
@@ -323,8 +343,9 @@ contains
        endif
 
        if( f_selectBlock( tag_spectrum ) == 0) then
-          if( f_getRealValue( tag_hwhm, dret,'') == 0 ) then
-             raman_spectra_hwhm = dret            ! in cm-1
+          if( f_getRealValue( tag_hwhm, dret,'') == 0 &
+               &  .or. f_getRealValue( tag_linewidth, dret,'') == 0 ) then
+             raman_spectra_hwhm = dret                                   ! in cm-1
              if ( raman_spectra_hwhm < 0.0 ) raman_spectra_hwhm = 5.0d0
           endif
 
@@ -375,6 +396,7 @@ contains
   end subroutine m_Raman_initialize
 
   subroutine m_Raman_print_param
+    if ( sw_raman == OFF ) return
     if ( sw_phonon_with_epsilon == OFF ) return
 
     write(nfout,'(A)') '!** ----- RAMAN setup ----- '
diff -uprN phase0_2015.01/src_phase/m_ThomasFermiW_Potential.F90 phase0_2015.01.01/src_phase/m_ThomasFermiW_Potential.F90
--- phase0_2015.01/src_phase/m_ThomasFermiW_Potential.F90	2015-08-05 14:45:21.000000000 +0900
+++ phase0_2015.01.01/src_phase/m_ThomasFermiW_Potential.F90	2016-07-12 12:51:19.000000000 +0900
@@ -84,7 +84,7 @@ contains
        ik = ista_k
        call new_radr_and_wos(ik,it)                 ! --> radr, wos
 
-       rcut = rad_cov_default( iatomn(it) )
+       rcut = rad_cov_default( nint(iatomn(it)) ) ! Revised according to a report from ASMS Co.ltd, 10 March 2016.
 
        Do il1=1, lpsmax(it)
           if ( il1 == iloc(it) ) cycle
diff -uprN phase0_2015.01/src_phase/m_Total_Energy.F90 phase0_2015.01.01/src_phase/m_Total_Energy.F90
--- phase0_2015.01/src_phase/m_Total_Energy.F90	2015-09-14 15:19:25.000000000 +0900
+++ phase0_2015.01.01/src_phase/m_Total_Energy.F90	2016-07-12 12:51:19.000000000 +0900
@@ -1,6 +1,6 @@
 !=======================================================================
 !
-!  PROGRAM  PHASE/0 2015.01 ($Rev: 454 $)
+!  PROGRAM  PHASE/0 2015.01 ($Rev: 472 $)
 !
 !  MODULE:  m_Total_Energy
 !
@@ -33,7 +33,7 @@
 !
 module m_Total_Energy
 !    ( m_TE )
-! $Id: m_Total_Energy.F90 454 2015-09-07 07:58:39Z yamasaki $
+! $Id: m_Total_Energy.F90 472 2015-11-28 09:01:17Z ktagami $
   use m_Charge_Density,       only : chgq_l, chgqo_l, hsr
 !fj$$  use m_XC_Potential,         only : vxc_l, exc, m_XC_cal_potential
   use m_XC_Potential,         only : vxc_l, exc,eex,ecor
@@ -145,6 +145,15 @@ module m_Total_Energy
 
   use m_Control_Parameters,  only : m_CtrlP_get_isolver_now
 
+! === Positron SCF ==== 2015/11/28
+  use m_Control_Parameters,  only : sw_positron, npeg, positron_method
+  use m_Const_Parameters,   only :  positron_GGGC
+  use m_epc_potential,  only : ecorr_pztr => epc, m_epc_cal_potential, vepc_l
+  use m_PlaneWaveBasisSet, only : kg1_pwf
+  use m_Positron_Wave_Functions,  only : pzaj, nprvf_ordr, pchg_l, pchgo_l, pev
+  use m_PlaneWaveBasisSet,    only : m_pwBS_pstrn_kinetic_energies
+! ==================== 2015/11/28
+
   implicit none
   include 'mpif.h'
 
@@ -215,6 +224,10 @@ module m_Total_Energy
   real(kind=DP),private          :: espinorb_old, espinorb_now
 ! ========================================================================== 11.0
 
+! === positron
+  real(kind=DP) :: ekin_pztr, elocal_pztr, ehartr_ep, eohxc_pztr
+! ===
+
   integer,private, parameter     :: len_str = 132
   character(len=len_str),private    ::  str
 
@@ -316,6 +329,21 @@ contains
 #endif        
     end if
 
+    if ( sw_positron /= OFF ) then
+       if ( positron_method == Positron_GGGC ) then
+          call get_local_pot_energy_pztr
+          call get_xc_and_HE_of_old_CD_pztr( vepc_l )
+          call get_hartree_energy_inter_ep
+          call m_epc_cal_potential( nfout, chgq_l )
+
+         if (iteration_electronic==1) then
+            call get_kinetic_energy_pztr_direct(nfout)
+         else
+            call get_kinetic_energy_pztr(nfout)
+         endif
+       endif
+    endif
+
 #ifdef ENABLE_ESM_PACK
 ! ============== KT_mod =============================== 13.0U2
 !    if(sw_esm==ON.or.sw_hybrid_functional==ON)then
@@ -549,6 +577,13 @@ contains
                  eohxc = eohxc &
                   & + chgqo_l(i,ik,1)*chgq_l(i,ik,1)*screening%phik(i)
              end if
+             if ( sw_positron /= OFF ) then
+                if ( positron_method == positron_GGGC ) then
+                   eohxc = eohxc &
+                        & - (PAI4 *pchgo_l(i,ik)/gr_l(i)**2) &
+                        &   * chgq_l(i,ik,1)
+                endif
+             endif
           end do
        else if(nspin == 2) then
           do i = ist, iend_kngp  !for mpi
@@ -567,6 +602,13 @@ contains
                   & + (chgqo_l(i,ik,UP)+chgqo_l(i,ik,DOWN))* &
                   &   (chgq_l(i,ik,UP)+chgq_l(i,ik,DOWN))*screening%phik(i)
              end if
+             if ( sw_positron /= OFF ) then
+                if ( positron_method == positron_GGGC ) then
+                   eohxc = eohxc &
+                        & - (PAI4 *pchgo_l(i,ik)/gr_l(i)**2) &
+                        &    *(chgq_l(i,ik,UP)+chgq_l(i,ik,DOWN))
+                end if
+             endif
           end do
        end if
     end do
@@ -1250,6 +1292,142 @@ contains
      deallocate(ekin)
   end subroutine get_kinetic_energy_directly
 
+! ==== positron ===
+ subroutine get_xc_and_HE_of_old_CD_pztr(vepc_l)
+    real(kind=DP), intent(in) :: vepc_l(ista_kngp:iend_kngp,kimg,nspin)
+    integer ik, i, ispin
+    integer ist !mpi
+    real(kind=DP) :: eohxc_pztr_mpi
+
+    eohxc_pztr = 0.d0
+    do ik = 1, kimg
+       do ispin = 1, nspin
+          if(mype==0) then
+             eohxc_pztr = eohxc_pztr + vepc_l(1,ik,ispin)*pchg_l(1,ik)
+          endif
+       end do
+       ist = ista_kngp
+       if(ist == 1) ist = 2
+
+       if(nspin == 1) then
+          do i = ist, iend_kngp  !for mpi
+             eohxc_pztr = eohxc_pztr &
+                  & + (vepc_l(i,ik,1) -PAI4*chgqo_l(i,ik,1)/gr_l(i)**2) &
+                  &   * pchg_l(i,ik)
+          end do
+       else if(nspin == 2) then
+          stop "YYY"
+       end if
+    end do
+    if(npes > 1) then
+       call mpi_allreduce(eohxc_pztr,eohxc_pztr_mpi,1 &
+            &  ,mpi_double_precision,mpi_sum,mpi_comm_group,ierr)
+       eohxc_pztr = eohxc_pztr_mpi
+    end if
+!
+    eohxc_pztr = univol *eohxc_pztr
+  end subroutine get_xc_and_HE_of_old_CD_pztr
+
+  subroutine get_kinetic_energy_pztr(nfout)
+    integer,intent(in) :: nfout
+    integer :: ik,ib,ig
+    real(kind=DP) :: ekinet_tmp, ekinet_mpi
+
+    ekin_pztr = 0.0
+    Do ib=1, npeg
+       if ( nprvf_ordr(ib) /= 1 ) cycle
+       ekin_pztr = ekin_pztr + pev(ib)
+    End do
+    ekin_pztr = ekin_pztr  -eohxc_pztr -elocal_pztr
+  end subroutine get_kinetic_energy_pztr
+
+  subroutine get_kinetic_energy_pztr_direct(nfout)
+    integer,intent(in) :: nfout
+    integer :: ik,ib,ig
+    real(kind=DP), allocatable, dimension(:)       :: ekin
+    real(kind=DP) :: ekinet_tmp, ekinet_mpi, e1
+
+    allocate(ekin(kg1_pwf));ekin=0.d0
+
+    ekin_pztr = 0.d0
+
+    call m_pwBS_pstrn_kinetic_energies(ekin)
+
+    do ib=1, npeg
+       if ( nprvf_ordr(ib) /= 1 ) cycle
+       ekinet_tmp=0.d0
+       if (kimg==1) then
+          do ig=1, kg1_pwf
+             ekinet_tmp = ekinet_tmp + ekin(ig)*pzaj(ig,ib,1)**2
+          enddo
+       else if (kimg==2) then
+          do ig=1, kg1_pwf
+             ekinet_tmp = ekinet_tmp + ekin(ig)*( pzaj(ig,ib,1)**2 &
+                  &                              +pzaj(ig,ib,2)**2)
+          enddo
+       endif
+       ekin_pztr = ekin_pztr +ekinet_tmp
+    enddo
+    deallocate(ekin)
+
+  end subroutine get_kinetic_energy_pztr_direct
+
+  subroutine get_hartree_energy_inter_ep
+    integer ik, i
+    integer :: ist !mpi
+    real(kind=DP) :: ehartr_mpi
+
+    ehartr_mpi = 0.d0
+    ehartr_ep = 0.d0
+
+    do ik = 1, kimg
+
+       ist = ista_kngp
+       if(ist == 1) ist = 2
+
+       if(nspin == 1) then
+          do i = ist, iend_kngp !for mpi
+             ehartr_mpi  = ehartr_mpi + PAI4 *chgq_l(i,ik,1) *pchg_l(i,ik) &
+                  &                          /gr_l(i)**2
+          end do
+       else if(nspin == 2) then
+          stop "PPP"
+       endif
+    end do
+    call mpi_allreduce(ehartr_mpi,ehartr_ep,1 &
+         &  ,mpi_double_precision,mpi_sum,mpi_comm_group,ierr)
+
+    ehartr_ep    = -univol *ehartr_ep
+
+  end subroutine get_hartree_energy_inter_ep
+
+  subroutine get_local_pot_energy_pztr
+    integer       :: ik, it, i, ig
+    real(kind=DP) :: elocal_mpi
+
+    elocal_pztr = 0.d0
+    do ik = 1, kimg
+       if(nspin == 1) then
+          do it = 1,ntyp
+             do i = ista_kngp, iend_kngp !for mpi
+                elocal_pztr = elocal_pztr &
+                     & + psc_l(i,it)*zfm3_l(i,it,ik)*pchg_l(i,ik)
+             end do
+          end do
+       else
+          stop "PPP"
+       endif
+    end do
+    if(npes > 1) then
+       call mpi_allreduce(elocal_pztr,elocal_mpi,1 &
+            &  ,mpi_double_precision,mpi_sum,mpi_comm_group,ierr)
+       elocal_pztr = elocal_mpi
+    end if
+
+    elocal_pztr    = -univol*elocal_pztr
+  end subroutine get_local_pot_energy_pztr
+! =====
+
   subroutine get_xc_and_HE_of_old_CD_paw(nfout)
     integer, intent(in) :: nfout
     integer :: ispin, it,lmt1, lmt2, il1, im1, il2, im2, ia
@@ -2067,6 +2245,14 @@ contains
     endif
 ! ================= 13.0S
 
+! ==== POSITRON SCF == 2015/11/28
+    if ( sw_positron /= OFF ) then
+       if ( positron_method == Positron_GGGC ) then
+          etotal0 = etotal0 +ekin_pztr +elocal_pztr +ecorr_pztr +ehartr_ep
+       endif
+    endif
+! =================== 2015/11/28
+
     if(sw_dipole_correction == ON) etotal0 = etotal0 + edip
     if(sw_hubbard == ON) etotal0 = etotal0 + ehub0
     if(sw_hybrid_functional == ON) etotal0 = etotal0 + eexx
diff -uprN phase0_2015.01/src_phase/m_Wannier90.F90 phase0_2015.01.01/src_phase/m_Wannier90.F90
--- phase0_2015.01/src_phase/m_Wannier90.F90	2015-09-14 15:19:53.000000000 +0900
+++ phase0_2015.01.01/src_phase/m_Wannier90.F90	2016-07-12 12:51:19.000000000 +0900
@@ -1,4 +1,6 @@
 #define WAN90_SAVE_MEMORY
+#define WAN90_SKIP_FFT
+#define WAN90_SPN_FORMATTED
 !=======================================================================
 !
 !  PROGRAM  PHASE/0 2014.01 ($Rev: 110 $)
@@ -48,16 +50,23 @@ module m_Wannier90
        &                         , nrank_e,myrank_e,map_e,ista_e,iend_e,istep_e,idisp_e &
        &                         , map_z,np_e,mpi_k_world,myrank_k,map_k,ista_k,iend_k &
        &                         , ista_snl, iend_snl, ierr, map_ek, nrank_k
-! === KT_add === 2015/02/23, 08/31
+! === KT_add === 2015/02/23, 09/02
   use m_Const_Parameters,   only : Delta07, zi, ON, UP, DOWN
   use m_Control_Parameters,  only : ndim_spinor, sw_use_hardpart_wan90, noncol, &
        &                            spin_component_wan90
   use m_Ionic_System,  only : natm, ityp, cps, pos
   use m_PseudoPotential,   only : dk_wan, nloc, nlmta, m_PP_include_vanderbilt_pot, &
        &                          ltp, mtp, taup, ilmt, lmta, il2p, isph, iqitg, dl2p, &
-       &                          qitg_wan, nqitg, phirpw, psirpw
+       &                          qitg_wan, nqitg, phirpw, psirpw, q
   use m_Electronic_Structure,only: fsr_l, fsi_l
-! ============== 2015/02/23, 08/31
+! ============== 2015/02/23, 09/02
+
+! === KT_add === 2015/09/14
+  use m_Const_Parameters,  only : ELECTRON, DIRECT, INVERSE
+  use m_Electronic_Structure,  only : m_ES_map_wf_on_fftmesh
+  use m_FFT,  only : m_FFT_WF
+  use m_PlaneWaveBasisSet,  only : kg1, kg
+! ============== 2015/09/14
 
   implicit none
 
@@ -77,10 +86,18 @@ module m_Wannier90
   integer, allocatable :: ib_inc(:) ! d(num_bands)
 
   real(kind=DP), allocatable :: projfunc(:,:,:,:) ! d(kg1,n_proj,ista_snl:iend_snl,kimg)
-! ==== KT_add === 2015/04/13
+
+! ==== KT_add === 2015/04/13 & 09/02
   real(kind=DP), allocatable :: dk_unit(:,:)
   logical, allocatable :: centre_on_atom(:)
-! =============== 2015/04/13
+
+  integer, allocatable :: spn_index(:)
+  real(kind=DP), allocatable :: spn_quant_dir(:,:)
+! =============== 2015/04/13 & 09/02
+
+! ==== KT_add === 2015/09/14
+  integer, allocatable :: igf_wan90(:,:,:,:)
+! =============== 2015/09/14
 
   include 'mpif.h'                                      ! MPI
   integer istatus(mpi_status_size)                      ! MPI
@@ -146,10 +163,26 @@ contains
     allocate(zaxis(3,n_proj))
     allocate(xaxis(3,n_proj))
     allocate(zona(n_proj))
-    do i=1,n_proj
-       read(nfwannier,*) centre(1:3,i), lang(i), mr(i), irf(i)
-       read(nfwannier,*) zaxis(1:3,i), xaxis(1:3,i), zona(i)
-    end do
+
+    if ( noncol ) then
+       allocate( spn_index(n_proj) )
+       allocate( spn_quant_dir(3,n_proj) )
+    endif
+
+    if ( noncol ) then
+       do i=1,n_proj
+          read(nfwannier,*) centre(1:3,i), lang(i), mr(i), irf(i)
+          read(nfwannier,*) zaxis(1:3,i), xaxis(1:3,i), zona(i)
+!
+          read(nfwannier,*) spn_index(i), spn_quant_dir(1:3,i)
+       end do
+    else
+       do i=1,n_proj
+          read(nfwannier,*) centre(1:3,i), lang(i), mr(i), irf(i)
+          read(nfwannier,*) zaxis(1:3,i), xaxis(1:3,i), zona(i)
+       end do
+    endif
+
     read(nfwannier,*)
 
     read(nfwannier,*)
@@ -183,7 +216,7 @@ contains
 
     close(nfwannier)
 
-! == KT add == 2015/02/23
+! == KT add == 2015/02/23 & 09/02
 100 continue
     if ( npes > 1 ) then
        call mpi_bcast( calc_only_A, 1, mpi_logical, 0, mpi_comm_group, ierr )
@@ -214,6 +247,17 @@ contains
        call mpi_bcast( zona,     n_proj, mpi_double_precision, 0, &
             &          mpi_comm_group, ierr )
 
+       if ( noncol ) then
+          if ( mype /= 0 ) then
+             allocate( spn_index(n_proj) )
+             allocate( spn_quant_dir(3,n_proj) )
+          endif
+          call mpi_bcast( spn_index, n_proj, mpi_integer, 0, &
+               &          mpi_comm_group, ierr )
+          call mpi_bcast( spn_quant_dir, 3*n_proj, mpi_double_precision, 0, &
+               &          mpi_comm_group, ierr )
+       endif
+
        call mpi_bcast( lang, n_proj, mpi_integer, 0, mpi_comm_group, ierr )
        call mpi_bcast( mr,   n_proj, mpi_integer, 0, mpi_comm_group, ierr )
        call mpi_bcast( irf,  n_proj, mpi_integer, 0, mpi_comm_group, ierr )
@@ -231,7 +275,7 @@ contains
        endif
        call mpi_bcast( exclude_bands, n_exclude, mpi_integer, 0, mpi_comm_group, ierr )
     endif
-! ============= 2015/02/23
+! ============= 2015/02/23 & 09/02
 
     num_bands = nb_wan90 - n_exclude
     allocate(ib_inc(num_bands))
@@ -469,7 +513,10 @@ contains
     end subroutine gather_matrix
 
     subroutine print_mat
-      integer :: ik ,m, n, ik_start, iktmp
+      integer :: ik ,m, n, ik_start, iktmp, is
+      real(kind=DP) :: theta, phi
+      complex(kind=CMPLDP) :: ztmp, z1
+      complex(kind=CMPLDP), allocatable :: spn_weight(:,:)
 
       if ( mype /= 0 ) return
 
@@ -487,15 +534,37 @@ contains
       write(nfwannier,*) num_bands, kv3/nspin, n_proj
 
       if ( noncol ) then
-         do ik = 1, kv3, nspin
-            iktmp = ( ik -1 )/nspin +1
+         allocate( spn_weight(2,n_proj) ); spn_weight = 0.0d0
+
+         Do n=1, n_proj
+            theta = acos( spn_quant_dir(3,n) )
+            phi = atan2( spn_quant_dir(2,n), spn_quant_dir(1,n) )
+!            
+            if ( spn_index(n) == 1 ) then
+               spn_weight(1,n) = exp( zi *phi /2.0d0 ) *cos( theta /2.0d0 )
+               spn_weight(2,n) = -exp( zi *phi /2.0d0 ) *sin( theta /2.0d0 )
+            else
+               spn_weight(1,n) = exp( -zi *phi /2.0d0 ) *sin( theta /2.0d0 )
+               spn_weight(2,n) = exp( -zi *phi /2.0d0 ) *cos( theta /2.0d0 )
+            endif
+         End do
+
+         do ik = 1, kv3, ndim_spinor
+            iktmp = ( ik -1 )/ndim_spinor +1
             do n=1,n_proj
                do m=1,num_bands
+                  ztmp = 0.0d0
+                  Do is=1, ndim_spinor
+                     z1 = dcmplx( a_mat(m,n,ik+is-1,1), a_mat(m,n,ik+is-1,2 ) )
+                     ztmp = ztmp +spn_weight(is,n) *z1
+                  End do
+
                   write(nfwannier,'(3(1x,i5),2(1x,f18.12))') m, n, iktmp, &
-                       &         a_mat(m,n,ik,1:2) +a_mat(m,n,ik+1,1:2)
+                       &         real(ztmp), aimag(ztmp)
                end do
             end do
          end do
+         deallocate( spn_weight )
 
       else
          ik_start = 1
@@ -1181,12 +1250,21 @@ contains
     allocate(m_mat(num_bands,num_bands,ista_k:iend_k,nntot,2)); m_mat = 0.d0
 #endif
 
-    call contrib_softpart
+#ifndef WAN90_SKIP_FFT
+!    call contrib_softpart
+    call contrib_softpart2
+#else
+    call m_Wan90_set_igf_wan90
+    call contrib_softpart3
+#endif
+
     if ( sw_use_hardpart_wan90 == ON ) call contrib_hardpart
 
     call gather_matrix
     call print_mmat
 
+    if ( allocated( dk_unit ) ) deallocate( dk_unit )
+    if ( allocated( igf_wan90 ) ) deallocate( igf_wan90 )
     deallocate(m_mat)
 
   contains
@@ -1300,6 +1378,211 @@ contains
 
     end subroutine contrib_softpart
 
+    subroutine contrib_softpart2          ! FFT and inverse-FFT on u_n(k+b)
+      logical :: shift_k
+      integer :: i,ik1,ik2,nn,n,m,ni,mi,ib1,ib2,nnc(3),ir,im
+      integer :: nffth,ip,r1,r2,r3,ngrid
+      integer :: id1,id2,id12
+      integer :: ik_start, ik_skip, iktmp, is1
+      real(kind=DP) :: rgrid(3),da(3),wr,wi,dvol,ph,sumr,sumi
+      real(kind=DP), allocatable :: wf1(:), wf2(:), zcos(:), zsin(:)
+      real(kind=DP), allocatable :: psi1(:,:,:,:)
+      real(kind=DP), allocatable :: psi2(:,:,:,:)
+
+      nffth = nfft/2
+      ngrid = product(fft_box_size_WF(1:3,1))
+      dvol = 1.d0/dble(ngrid)
+
+      allocate(wf1(nfft),wf2(nfft))
+      allocate(zcos(nffth),zsin(nffth))
+
+      call m_FFT_alloc_WF_work()
+
+      ik_start = 1;  ik_skip = 1
+      if ( .not. noncol ) then
+         if ( nspin == 2 .and. spin_component_wan90 == DOWN ) then
+            ik_start = 2
+         endif
+         if ( nspin == 2 ) ik_skip = 2
+      endif
+
+      do nn=1,nntot
+        do ik1 = ik_start, kv3, ik_skip
+            iktmp = ( ik1 -1 )/nspin + 1
+            is1 = mod( ik1 -1, nspin ) +1
+
+            ik2 = nspin*( nnlist(iktmp,nn) -1 ) +is1
+
+            nnc(1:3) = nncell(1:3,iktmp,nn)
+
+            if(all(nnc(1:3) == 0)) then
+               shift_k = .false.
+            else
+               shift_k = .true.
+             ! Phase factors, exp(iG*r)
+               id1 = fft_box_size_WF(1,0)
+               id2 = fft_box_size_WF(2,0)
+               id12 = id1*id2
+               da(1:3) = 1.d0/fft_box_size_WF(1:3,1)
+               do i=1,nffth
+                  ip = i-1
+                  r3 = ip/id12
+                  r2 = (ip-r3*id12)/id1
+                  r1 = ip-r2*id1-r3*id12
+                  rgrid(1) = dble(r1)*da(1)
+                  rgrid(2) = dble(r2)*da(2)
+                  rgrid(3) = dble(r3)*da(3)
+                  ph = PAI2*dot_product(nnc,rgrid)
+                  zcos(i) = cos(ph);    zsin(i) = sin(ph)
+               end do
+            end if
+            do n=1,num_bands
+               ni = ib_inc(n)
+
+               if ( map_ek(ni,ik2) == mype ) then
+                  ib2 = neordr(ni,ik2)
+
+                  allocate( psi2(kg1,1,ik2:ik2,kimg) ); psi2 = 0.0d0
+                  psi2(1:iba(ik2),1,ik2,:) = zaj_l(1:iba(ik2),map_z(ib2),ik2,:)
+
+                  call m_ES_map_WF_on_fftmesh( ik2, ik2, ik2, psi2, wf2 )
+                  deallocate(psi2)
+
+                  if ( shift_k ) then
+                     call m_FFT_WF( ELECTRON, nfout, wf2, INVERSE, ON )
+                     ! u_n(k+b) = exp(-iG*r) * u_n(k')
+                     ! k+b = k' + G
+                     do i=1,nffth
+                        ir = 2*i-1;    im = ir+1
+                        wr = wf2(ir);  wi = wf2(im)
+                        wf2(ir) = zcos(i)*wr + zsin(i)*wi
+                        wf2(im) = zcos(i)*wi - zsin(i)*wr
+                     end do
+                     call m_FFT_WF( ELECTRON, nfout, wf2, DIRECT, ON )
+                     wf2 = wf2 *dvol
+                  end if
+
+               endif
+
+               call mpi_bcast( wf2, nfft, mpi_double_precision, map_ek(ni,ik2), &
+                    &          mpi_comm_group, ierr )
+
+               do m=1,num_bands
+                  mi = ib_inc(m)
+
+                  if ( map_ek(mi,ik1) /= mype ) cycle
+
+                  ib1 = neordr(mi,ik1)
+
+                  allocate( psi1(kg1,1,ik1:ik1,kimg) ); psi1 = 0.0d0
+                  psi1(1:iba(ik1),1,ik1,:) = zaj_l(1:iba(ik1),map_z(ib1),ik1,:)
+
+                  call m_ES_map_WF_on_fftmesh( ik1, ik1, ik1, psi1, wf1 )
+                  deallocate( psi1 )
+
+                  ! <u_m(k)|u_n(k+b)>
+                  sumr = 0.d0;  sumi = 0.d0
+                  do i=1,nffth
+                     ir = 2*i-1;   im = ir+1
+                     sumr = sumr + wf1(ir)*wf2(ir) + wf1(im)*wf2(im)
+                     sumi = sumi + wf1(ir)*wf2(im) - wf1(im)*wf2(ir)
+                  end do
+                  m_mat(m,n,ik1,nn,1) = sumr
+                  m_mat(m,n,ik1,nn,2) = sumi
+
+               end do
+            end do
+         end do
+      end do
+
+      call m_FFT_dealloc_WF_work()
+
+      deallocate(wf1,wf2);   deallocate(zcos,zsin)
+
+    end subroutine contrib_softpart2
+
+    subroutine contrib_softpart3          ! does not use FFT, but displace G vector
+      logical :: shift_k
+      integer :: i,ik1,ik2,nn,n,m,ni,mi,ib1,ib2,nnc(3),ir,im
+      integer :: nffth,ip,r1,r2,r3,ngrid
+      integer :: id1,id2,id12
+      integer :: ik_start, ik_skip, iktmp, is1
+      real(kind=DP) :: rgrid(3),da(3),wr,wi,dvol,ph,sumr,sumi
+      real(kind=DP), allocatable :: wf1(:), wf2(:), zcos(:), zsin(:)
+      real(kind=DP), allocatable :: psi1(:,:,:,:)
+      real(kind=DP), allocatable :: psi2(:,:,:,:)
+
+      nffth = nfft/2
+      ngrid = product(fft_box_size_WF(1:3,1))
+      dvol = 1.d0/dble(ngrid)
+
+      allocate(wf1(nfft),wf2(nfft))
+
+      ik_start = 1;  ik_skip = 1
+      if ( .not. noncol ) then
+         if ( nspin == 2 .and. spin_component_wan90 == DOWN ) then
+            ik_start = 2
+         endif
+         if ( nspin == 2 ) ik_skip = 2
+      endif
+
+      do nn=1,nntot
+         do ik1 = ik_start, kv3, ik_skip
+            iktmp = ( ik1 -1 )/nspin + 1
+            is1 = mod( ik1 -1, nspin ) +1
+
+            ik2 = nspin*( nnlist(iktmp,nn) -1 ) +is1
+
+            nnc(1:3) = nncell(1:3,iktmp,nn)
+
+            do n=1,num_bands
+               ni = ib_inc(n)
+
+               if ( map_ek(ni,ik2) == mype ) then
+                  ib2 = neordr(ni,ik2)
+
+                  allocate( psi2(kg1,1,ik2:ik2,kimg) ); psi2 = 0.0d0
+                  psi2(1:iba(ik2),1,ik2,:) = zaj_l(1:iba(ik2),map_z(ib2),ik2,:)
+
+                  call m_wan90_map_WF_on_fftmesh( ik2, ik2, ik2, psi2, wf2, &
+                       &                          igf_wan90(:,nnc(1),nnc(2),nnc(3))  )
+                  deallocate(psi2)
+               endif
+
+               call mpi_bcast( wf2, nfft, mpi_double_precision, map_ek(ni,ik2), &
+                    &          mpi_comm_group, ierr )
+
+               do m=1,num_bands
+                  mi = ib_inc(m)
+
+                  if ( map_ek(mi,ik1) /= mype ) cycle
+
+                  ib1 = neordr(mi,ik1)
+
+                  allocate( psi1(kg1,1,ik1:ik1,kimg) ); psi1 = 0.0d0
+                  psi1(1:iba(ik1),1,ik1,:) = zaj_l(1:iba(ik1),map_z(ib1),ik1,:)
+
+                  call m_ES_map_WF_on_fftmesh( ik1, ik1, ik1, psi1, wf1 )
+                  deallocate( psi1 )
+
+                  ! <u_m(k)|u_n(k+b)>
+                  sumr = 0.d0;  sumi = 0.d0
+                  do i=1,nffth
+                     ir = 2*i-1;   im = ir+1
+                     sumr = sumr + wf1(ir)*wf2(ir) + wf1(im)*wf2(im)
+                     sumi = sumi + wf1(ir)*wf2(im) - wf1(im)*wf2(ir)
+                  end do
+                  m_mat(m,n,ik1,nn,1) = sumr
+                  m_mat(m,n,ik1,nn,2) = sumi
+
+               end do
+            end do
+         end do
+      end do
+      deallocate(wf1,wf2)
+
+    end subroutine contrib_softpart3
+
     subroutine contrib_hardpart
       integer :: n, nn, ik1, ik2, nnc(3)
       integer :: myindex, m, mm, ni, mi, kk
@@ -1862,4 +2145,345 @@ contains
   end subroutine deompose_sphr_into_lm
 ! =========== 2015/02/23
 
+! ==== KT_add === 2015/09/04
+  subroutine m_Wan90_gen_mat_spn
+    integer :: npauli = 3
+
+    complex(kind=CMPLDP), allocatable :: spn_mat(:,:,:,:) ! d(neg,neg,kv3/nspin,3)
+
+    allocate( spn_mat( npauli, num_bands, num_bands, kv3/ndim_spinor) ); spn_mat = 0.d0
+
+    call contrib_softpart
+    if ( sw_use_hardpart_wan90 == ON ) call contrib_hardpart
+
+    call gather_matrix
+    call print_mat
+
+    deallocate( spn_mat )
+
+  contains
+
+    subroutine contrib_softpart
+      integer :: ik0, ik1, ik2, iktmp, is1, is2
+      integer :: ig
+      integer :: m, mi, n, ni, ib1, ib2
+      complex(kind=CMPLDP) :: zsum
+
+      real(kind=DP), allocatable :: wk_zaj(:,:)
+      real(kind=DP) :: c1, c2
+      complex(kind=CMPLDP) :: z1, z2
+
+      allocate( wk_zaj( kg1, kimg ) ); wk_zaj = 0.0d0
+
+      do ik0=1, kv3, ndim_spinor
+         if ( map_k(ik0) /= myrank_k ) cycle
+
+         iktmp = ( ik0 -1 )/ndim_spinor +1
+
+         Do is2=1, ndim_spinor
+            ik2 = ik0 +is2 -1
+
+            Do n=1, num_bands
+               ni = ib_inc(n)
+               ib2 = neordr(ni,ik2)
+
+               if ( map_e(ib2) == myrank_e ) then
+                  wk_zaj(1:iba(ik2),1:kimg) = zaj_l(1:iba(ik2),map_z(ib2),ik2,1:kimg)
+               endif
+               call mpi_bcast( wk_zaj, kg1*kimg, mpi_double_precision, map_e(ib2), &
+                    &          mpi_k_world(myrank_k), ierr )
+
+               Do is1=1, ndim_spinor
+                  ik1 = ik0 +is1 -1
+               
+                  do m=1,num_bands
+                     mi = ib_inc(m)
+                     ib1 = neordr(mi,ik1)
+
+                     if ( map_e(ib1) /= myrank_e ) cycle
+
+                     zsum = 0.0d0
+
+                     if ( kimg == 1 ) then
+                        Do ig=1, iba(ik0)
+                           c1 = zaj_l(ig,map_z(ib1),ik1,1)
+                           c2 = c1 *wk_zaj(ig,1)
+                           zsum = zsum +c2
+                        End do
+                     else
+                        Do ig=1, iba(ik0)
+                           z1 = dcmplx( zaj_l(ig,map_z(ib1),ik1,1), &
+                                &       zaj_l(ig,map_z(ib1),ik1,2) )
+                           z2 = conjg(z1) *dcmplx( wk_zaj(ig,1), wk_zaj(ig,2) )
+                           zsum = zsum +z2
+                        End Do
+                     endif
+!
+                     if ( is1 == 1 .and. is2 == 1 ) then
+                        spn_mat(3,m,n,iktmp) = spn_mat(3,m,n,iktmp) +zsum
+                     else if ( is1 == 1 .and. is2 == 2 ) then
+                        spn_mat(1,m,n,iktmp) = spn_mat(1,m,n,iktmp) +zsum
+                        spn_mat(2,m,n,iktmp) = spn_mat(2,m,n,iktmp) -zsum *zi
+                     else if ( is1 == 2 .and. is2 == 1 ) then
+                        spn_mat(1,m,n,iktmp) = spn_mat(1,m,n,iktmp) +zsum
+                        spn_mat(2,m,n,iktmp) = spn_mat(2,m,n,iktmp) +zsum *zi
+                     else if ( is1 == 2 .and. is2 == 2 ) then
+                        spn_mat(3,m,n,iktmp) = spn_mat(3,m,n,iktmp) -zsum
+                     endif
+
+                  end do
+               end Do
+            end do
+         end do
+      end do
+
+    end subroutine contrib_softpart
+
+    subroutine contrib_hardpart
+      integer :: ik0, ik1, ik2, iktmp, is1, is2
+      integer :: mi, m, ni, n, ib1, ib2
+      integer :: ia, it, mdvdb, lmt1, lmt2, il1, il2, it1, it2
+      integer :: lmta1, lmta2
+      complex(kind=CMPLDP) :: zsum, wf1, wf2
+      complex(kind=CMPLDP), allocatable :: wk_fsri(:)
+
+      allocate( wk_fsri(nlmta) ); wk_fsri = 0.0d0
+
+! -- start
+      Do ik0=1, kv3, ndim_spinor
+         if ( map_k(ik0) /= myrank_k ) cycle
+
+         iktmp = ( ik0 -1 )/ndim_spinor +1
+
+         Do is2=1, ndim_spinor
+            ik2 = ik0 +is2 -1
+
+            do n=1,num_bands
+               ni = ib_inc(n)
+               ib2 = neordr(ni,ik2)
+
+               if ( map_e(ib2) == mype ) then
+                  wk_fsri(:) = dcmplx( fsr_l( map_z(ib2),:,ik2 ), &
+                       &               fsi_l( map_z(ib2),:,ik2 ) )
+               endif
+               call mpi_bcast( wk_fsri, 2*nlmta, mpi_double_precision, &
+                    &          map_e(ib2), mpi_k_world(myrank_k), ierr )
+
+               Do is1=1, ndim_spinor
+                  ik1 = ik0 +is1 -1
+
+                  do m=1,num_bands
+                     mi = ib_inc(m)
+                     ib1 = neordr(mi,ik1)
+
+                     if ( map_e(ib1) /= myrank_e ) cycle
+                     
+                     zsum = 0.d0
+
+                     Do ia=1, natm
+                        it = ityp(ia)
+                        mdvdb = m_PP_include_vanderbilt_pot(it)
+                        if ( mdvdb == SKIP ) cycle
+
+                        Do lmt1=1, ilmt(it)
+                           il1 = ltp(lmt1,it); it1 = taup(lmt1,it)
+                           lmta1 = lmta( lmt1,ia )
+                           wf1 = dcmplx( fsr_l( map_z(ib1), lmta1, ik1 ), &
+                                &        fsi_l( map_z(ib1), lmta1, ik1 ) )
+
+                           Do lmt2=1, ilmt(it)
+                              il2 = ltp(lmt2,it); it2 = taup(lmt2,it)
+                              lmta2 = lmta( lmt2,ia )
+                              wf2 = wk_fsri(lmta2)
+
+                              zsum = zsum +conjg(wf1) * wf2 *q(lmt1,lmt2,it)
+                           End do
+                        End Do
+                     End Do
+
+                     if ( is1 == 1 .and. is2 == 1 ) then
+                        spn_mat(3,m,n,iktmp) = spn_mat(3,m,n,iktmp) +zsum
+                     else if ( is1 == 1 .and. is2 == 2 ) then
+                        spn_mat(1,m,n,iktmp) = spn_mat(1,m,n,iktmp) +zsum
+                        spn_mat(2,m,n,iktmp) = spn_mat(2,m,n,iktmp) -zsum *zi
+                     else if ( is1 == 2 .and. is2 == 1 ) then
+                        spn_mat(1,m,n,iktmp) = spn_mat(1,m,n,iktmp) +zsum
+                        spn_mat(2,m,n,iktmp) = spn_mat(2,m,n,iktmp) +zsum *zi
+                     else if ( is1 == 2 .and. is2 == 2 ) then
+                        spn_mat(3,m,n,iktmp) = spn_mat(3,m,n,iktmp) -zsum
+                     endif
+
+                  End do
+               End Do
+            End DO
+         End Do
+      End Do
+
+      deallocate( wk_fsri )
+
+    end subroutine contrib_hardpart
+
+    subroutine gather_matrix
+      complex(kind=CMPLDP), allocatable :: spn_mat_mpi(:,:,:,:) ! d(neg,neg,kv3/nspin,3)
+
+      if (npes>1) then
+         allocate(spn_mat_mpi(npauli,num_bands,num_bands,kv3/nspin) )
+         spn_mat_mpi = spn_mat
+         spn_mat = 0.d0
+         call mpi_allreduce( spn_mat_mpi, spn_mat, &
+              &              num_bands*num_bands*kv3/nspin*npauli*2, &
+              &              mpi_double_precision, mpi_sum, mpi_comm_group,ierr )
+         deallocate(spn_mat_mpi)
+      end if
+
+      call mpi_barrier( mpi_comm_world, ierr )
+
+    end subroutine gather_matrix
+
+    subroutine print_mat
+      integer :: ik, mi, ni, ispn, num, nsize
+      complex(kind=CMPLDP), allocatable :: work(:,:)
+
+      if ( mype /= 0 ) return
+
+      open(nfwannier,file=trim(wan90_seedname)//".spn",form="formatted")
+
+#ifdef WAN90_SPN_FORMATTED
+      write(nfwannier,*) 'Generated by PHASE noncol'
+      write(nfwannier,*) num_bands, kv3/nspin
+
+      do ik=1, kv3/nspin
+         Do mi=1, num_bands
+            Do ni=1, mi
+               Do ispn=1, 3
+                  write(nfwannier,'(2f16.12)') spn_mat(ispn,mi,ni,ik)
+               End Do
+            End Do
+         End do
+      end do
+#else
+      write(nfwannier) 'Generated by PHASE noncol'
+      write(nfwannier) num_bands, kv3/nspin
+      
+      nsize = num_bands *( num_bands +1 ) /2
+      allocate( work(3, nsize) ); work = 0.0d0
+
+      Do ik=1, kv3/nspin
+         num = 0
+         Do mi=1, num_bands
+            Do ni=1, mi
+               num = num +1
+               work(:,num) = spn_mat(:,mi,ni,ik)
+            End Do
+         End Do
+         write(nfwannier) ( (work(ispn,mi),ispn=1,3), mi=1, nsize )
+      End do
+      deallocate( work  )
+#endif
+
+      close(nfwannier)
+
+    end subroutine print_mat
+
+  end subroutine m_Wan90_gen_mat_spn
+! ================== 2015/09/04
+
+! ==== KT_add ==== 2015/09/14
+  subroutine m_Wan90_set_igf_wan90
+    integer :: id, i
+    integer :: igf1, igf2, igf3
+    integer :: nx, ny, nz
+    integer :: nxmin, nymin, nzmin, nxmax, nymax, nzmax
+!
+    nxmin = minval( nncell(1,:,:) );   nxmax = maxval( nncell(1,:,:) );  
+    nymin = minval( nncell(2,:,:) );   nymax = maxval( nncell(2,:,:) );  
+    nzmin = minval( nncell(3,:,:) );   nzmax = maxval( nncell(3,:,:) );  
+
+    allocate( igf_wan90( kg, nxmin:nxmax, nymin:nymax, nzmin:nzmax ) )
+    igf_wan90 = 0
+!
+    id = fft_box_size_WF(1,0)
+
+    Do nx=nxmin, nxmax
+       Do ny=nymin, nymax
+          Do nz=nzmin, nzmax
+
+             do i = 1, kg
+                igf1 = ngabc(i,1) + 1 -nx
+                igf2 = ngabc(i,2) + 1 -ny
+                igf3 = ngabc(i,3) + 1 -nz
+!
+                if ( igf1 <= 0 ) igf1 = igf1 + fft_box_size_WF(1,1)
+                if ( igf2 <= 0 ) igf2 = igf2 + fft_box_size_WF(2,1)
+                if ( igf3 <= 0 ) igf3 = igf3 + fft_box_size_WF(3,1)
+
+                igf_wan90(i,nx,ny,nz) = igf1 + (igf2-1)*id &
+                     &                 + (igf3-1)*id*fft_box_size_WF(2,0)
+             enddo
+          End Do
+       End Do
+    End Do
+  end subroutine m_Wan90_set_igf_wan90
+
+  subroutine m_Wan90_map_WF_on_fftmesh(k1,k2,ik,psi_l,bfft,igf_in)
+    integer, intent(in) :: k1,k2,ik
+    integer, intent(in) :: igf_in(kg1)
+    real(kind=DP), intent(in),dimension(kg1,1,k1:k2,kimg) :: psi_l
+    real(kind=DP), intent(inout), dimension(nfft) :: bfft
+
+    integer :: i,i1,ri, j, i2, ii
+
+    bfft = 0.d0
+    if(k_symmetry(ik) == GAMMA) then
+       if(kimg == 1) then
+          i1 = igf_in(1)
+          bfft(i1) = psi_l(1,1,ik,1)
+#ifdef NEC_TUNE_SMP
+!CDIR NODEP
+#endif
+          do ii = 2, iba(ik)
+!!$             i = nbase(ii,1)
+             i = nbase(ii,ik)
+             i1 = igf_in(i)
+             bfft(i1) = psi_l(ii,1,ik,1)
+             j = nbase_gamma(ii,2)
+             i2 = igf_in(j)
+             bfft(i2) =   psi_l(ii,1,ik,1)
+          end do
+       else if(kimg == 2) then
+          i1 = 2*igf_in(1) - 1
+          bfft(i1)   = psi_l(1,1,ik,1)
+          bfft(i1+1) = psi_l(1,1,ik,2)
+#ifdef NEC_TUNE_SMP
+!CDIR NODEP
+#endif
+          do ii = 2, iba(ik)
+!!$             i = nbase(ii,1)
+             i = nbase(ii,ik)
+             i1 = 2*igf_in(i)-1
+             bfft(i1  ) = psi_l(ii,1,ik,1)
+             bfft(i1+1) = psi_l(ii,1,ik,2)
+             j = nbase_gamma(ii,2)
+             i2 = 2*igf_in(j)-1
+             bfft(i2  ) = psi_l(ii,1,ik,1)
+             bfft(i2+1) = -psi_l(ii,1,ik,2)
+          end do
+       end if
+    else
+#ifdef NEC_TUNE_SMP
+!CDIR NOLOOPCHG
+#endif
+       do ri = 1, kimg
+#ifdef NEC_TUNE_SMP
+!CDIR NODEP
+#endif
+          do i = 1, iba(ik)
+             i1 = kimg*igf_in(nbase(i,ik)) + (ri - kimg)
+             bfft(i1) = psi_l(i,1,ik,ri)   ! MPI
+          end do
+       end do
+    end if
+  end subroutine m_Wan90_map_WF_on_fftmesh
+! ===== 2015/09/14
+
 end module m_Wannier90
diff -uprN phase0_2015.01/src_phase/m_XC_Potential.F90 phase0_2015.01.01/src_phase/m_XC_Potential.F90
--- phase0_2015.01/src_phase/m_XC_Potential.F90	2015-08-05 14:45:21.000000000 +0900
+++ phase0_2015.01.01/src_phase/m_XC_Potential.F90	2016-07-12 12:51:19.000000000 +0900
@@ -1,6 +1,6 @@
 !=======================================================================
 !
-!  PROGRAM  PHASE/0 2015.01 ($Rev: 443 $)
+!  PROGRAM  PHASE/0 2015.01 ($Rev: 493 $)
 !
 !  MODULE: m_XC_Potential
 !
@@ -69,7 +69,7 @@
 
 #define XC_PACK_FFT
 module m_XC_Potential
-! $Id: m_XC_Potential.F90 443 2015-08-03 13:03:48Z ktagami $
+! $Id: m_XC_Potential.F90 493 2016-06-01 04:57:01Z ktagami $
 !
 !  Upgraded on 23rd Aug. 2006 by T. Yamasaki
 !    Differentials of the charge density function in GGA calculation are
@@ -92,7 +92,7 @@ module m_XC_Potential
   use m_FFT,                  only : fft_box_size_CD, fft_box_size_CD_c, nfftp &
        &                           , m_FFT_CD_inverse_c &
        &                           , m_FFT_CD_direct_c  &
-       &                           , m_FFT_check_of_negative_CD
+       &                           , m_FFT_check_of_negative_CD, fft_box_size_CD_nonpara
 #ifdef _MPIFFT_
   use m_FFT,                  only : m_FFT_set_cdata  &
        &                           , lx,ly,lz,ly_d,lz_d, ny_d,nz_d
@@ -981,10 +981,12 @@ contains
       Do iloop = 1, ndim_magmom
          afft = 0.0d0
 #ifdef _OLD_MAP_CHARGE_
-         call map_charge_onto_a_fft_box( chgq_l, ndim_magmom, iloop, Valence_Charge_Only )
+         call map_charge_onto_a_fft_box( chgq_l, ndim_magmom, iloop, &
+        &                                Valence_Charge_Only )
                                             !-(m_XC_Pot.) -> afft(*) (xcchg2)
 #else
-         call map_charge_onto_a_fft_box2( nfout, chgq_l, ndim_magmom, iloop, Valence_Charge_Only )
+         call map_charge_onto_a_fft_box2( nfout, chgq_l, ndim_magmom, iloop, &
+        &                                 Valence_Charge_Only )
                                               !-(m_XC_Pot.) -> afft(*) (xcchg2)
 #endif
          if(iprixc >= 2) write(nfout,*) " just after map_charge_onto_a_fft_box p1"
@@ -1013,7 +1015,8 @@ contains
       allocate( quantz_axis_inversion_flg_mesh(ista_fftph:iend_fftph ) )
       quantz_axis_inversion_flg_mesh = 0
 
-      call m_ES_SpinDens_Along_QuantzAxis2( RhoMag_R, chgrhr_l,quantz_axis_inversion_flg_mesh, .false. )
+      call m_ES_SpinDens_Along_QuantzAxis2( RhoMag_R, chgrhr_l, &
+           &                                quantz_axis_inversion_flg_mesh, .false. )
 
     end subroutine set_chgrhr_case_noncollinear3
 
@@ -1077,9 +1080,11 @@ contains
 ! ----                  
 !
 #ifdef _OLD_MAP_CHARGE_
-           call map_charge_onto_a_fft_box( chgq_l, ndim_magmom, iloop, Partial_Core_Charge )
+           call map_charge_onto_a_fft_box( chgq_l, ndim_magmom, iloop, &
+	&                                  Partial_Core_Charge )
 #else
-           call map_charge_onto_a_fft_box2( nfout, chgq_l, ndim_magmom, iloop, Partial_Core_Charge )
+           call map_charge_onto_a_fft_box2( nfout, chgq_l, ndim_magmom, iloop, &
+	&                                   Partial_Core_Charge )
 #endif
 
            afft = bfft + afft / dble(nspin)         ! nspin == 2 
@@ -1180,10 +1185,13 @@ contains
 ! ----                  
 !
 #ifdef _OLD_MAP_CHARGE_
-           call map_charge_onto_a_fft_box( chgq_l, ndim_magmom, iloop, Partial_Core_Charge )
+           call map_charge_onto_a_fft_box( chgq_l, ndim_magmom, iloop, &
+	&                                  Partial_Core_Charge )
 #else
-           call map_charge_onto_a_fft_box2( nfout, chgq_l, ndim_magmom, iloop, Partial_Core_Charge )
+           call map_charge_onto_a_fft_box2( nfout, chgq_l, ndim_magmom, iloop, &
+	&                                   Partial_Core_Charge )
 #endif
+
            afft = bfft + afft / dble(nspin)         ! nspin == 2 
         end if
 ! --
@@ -1234,10 +1242,12 @@ contains
       do iloop = 1, ndim_magmom
          afft = 0.0d0
 #ifdef _OLD_MAP_CHARGE_
-         call map_charge_onto_a_fft_box( chgq_l, ndim_magmom, iloop, Valence_Charge_Only )
+         call map_charge_onto_a_fft_box( chgq_l, ndim_magmom, iloop, &
+	&                                Valence_Charge_Only )
 	                                     !-(m_XC_Pot.) -> afft(*) (xcchg2)
 #else
-         call map_charge_onto_a_fft_box2( nfout, chgq_l, ndim_magmom, iloop, Valence_Charge_Only )
+         call map_charge_onto_a_fft_box2( nfout, chgq_l, ndim_magmom, iloop, &
+	&                                 Valence_Charge_Only )
                                               !-(m_XC_Pot.) -> afft(*) (xcchg2)
 #endif
          if(iprixc >= 2) write(nfout,*) " just after map_charge_onto_a_fft_box p1"
@@ -1516,7 +1526,8 @@ contains
          end if
       end if
 
-      if(xctype.eq.'vdwdf' .and. .not.oneshot) exc = exc + ecnl*univol*rinplw
+      if(xctype.eq.'vdwdf' .and. .not.oneshot) exc = exc + ecnl
+
       if(iprixc >= 2) write(nfout,'(" !XC exc = ",f20.8)') exc
 
       call tstatc0_end(id_sname)
@@ -1560,6 +1571,9 @@ contains
 !!$         exc = exc_mpi
 !!$      end if
 
+
+      if (xctype.eq.'vdwdf') exc = exc + ecnl
+
       chgrhr_l = dF_drho
 
 ! stress tensor comes from gradient correction
@@ -1782,14 +1796,8 @@ contains
 ! ======================================================================= 11.0
 
     subroutine ggaxcp0
-      integer :: i, nfftcd,nfftx,nffty,nfftz,ix,iy,iz,ixyz
       integer :: pot_type
 
-      real(kind=DP),allocatable,dimension(:) :: chgrhr_red
-      real(kind=DP),allocatable,dimension(:) :: grad_rho_red
-      real(kind=DP),allocatable,dimension(:,:) :: dF_drho_red
-      real(kind=DP),allocatable,dimension(:,:,:) :: dfdrho_vdw,dfddrho_vdw
-
 ! ==== KT_add ======== 13.0XX
       real(kind=DP), allocatable :: dummy1(:,:), dummy2(:)
 ! ==================== 13.0XX
@@ -1924,44 +1932,8 @@ contains
             grad_trho=0.d0
             call cr_ggapbe (nspin,ispin,ista_fftph,iend_fftph,chgrhr_l,grad_trho,f2or1,exc,dF_drho,ecor)
          endif
-         if(.not.oneshot)then
-           nfftx = fft_box_size_CD(1,1);nffty = fft_box_size_CD(2,1);nfftz = fft_box_size_CD(3,1)
-           nfftcd = nfftx*nffty*nfftz
-           allocate(dfdrho_vdw (nfftx,nffty,nfftz));dfdrho_vdw=0.d0
-           allocate(dfddrho_vdw(nfftx,nffty,nfftz));dfddrho_vdw=0.d0
-           allocate(chgrhr_red(nfftcd));chgrhr_red=0.0d0
-           allocate(grad_rho_red(nfftcd));grad_rho_red=0.d0
-           if(ispin==1)then
-               do i=ista_fftph,iend_fftph
-                   chgrhr_red(i) = chgrhr_l(i,1)
-                   grad_rho_red(i) = grad_rho(i,1)
-               enddo
-           else
-               do i=ista_fftph,iend_fftph
-                   chgrhr_red(i) = chgrhr_l(i,1)+chgrhr_l(i,2)
-                   grad_rho_red(i) = grad_rho(i,1)+grad_rho(i,2)
-               enddo
-           endif
-           call mpi_allreduce(MPI_IN_PLACE,chgrhr_red,nfftcd,mpi_double_precision,mpi_sum,&
-                  & mpi_cdfft_world(myrank_ggacmp),ierr)
-           call mpi_allreduce(MPI_IN_PLACE,grad_rho_red,nfftcd,mpi_double_precision,mpi_sum,&
-                  & mpi_cdfft_world(myrank_ggacmp),ierr)
-           call vdW_scf(nspin,ispin,nfftx,nffty,nfftz,chgrhr_red,grad_rho_red,ecnl,dfdrho_vdw,dfddrho_vdw)
-           do ix=1,nfftx
-             do iy=1,nffty
-               do iz=1,nfftz
-                 ixyz = (iz-1)*nffty*nfftx+(iy-1)*nfftx+ix    
-                 if(ixyz<ista_fftph.or.ixyz>iend_fftph) cycle
-                 dF_drho(ixyz,1:ispin) = dF_drho(ixyz,1:ispin)+dfdrho_vdw(ix,iy,iz)
-                 dF_dgradrho(ixyz,1:ispin) = dF_dgradrho(ixyz,1:ispin)+dfddrho_vdw(ix,iy,iz)
-               enddo
-             enddo
-           enddo
-           deallocate(dfdrho_vdw)
-           deallocate(dfddrho_vdw)
-           deallocate(chgrhr_red)
-           deallocate(grad_rho_red)
-         endif
+         if (.not. oneshot ) call add_vdwdf_nonlocal_energy
+
 #endif
       else if(xctype == 'katopbe' .or. xctype == 'ggapbek') then
 !!$         call ex_ggapbe (nspin,ispin,chgrhr_l,grad_rho,f2or1,exc,dF_drho,dF_dgradrho)
@@ -2033,6 +2005,97 @@ contains
       end if
     end subroutine ggaxcp0
 
+    subroutine add_vdwdf_nonlocal_energy
+      integer :: i, nfftcd,nfftx,nffty,nfftz,ix,iy,iz,ixyz
+      integer :: ix2, iy2, iz2, nlphf,idp, mmp
+
+      real(kind=DP),allocatable,dimension(:) :: chgrhr_red
+      real(kind=DP),allocatable,dimension(:) :: grad_rho_red
+      real(kind=DP),allocatable,dimension(:,:) :: dF_drho_red
+      real(kind=DP),allocatable,dimension(:,:,:) :: dfdrho_vdw,dfddrho_vdw
+
+      idp = fft_box_size_CD_nonpara(1,0)
+      mmp = fft_box_size_CD_nonpara(2,0)
+
+      nfftx = fft_box_size_CD(1,1)
+      nffty = fft_box_size_CD(2,1)
+      nfftz = fft_box_size_CD(3,1)
+      nfftcd = nfftx*nffty*nfftz
+
+      allocate(dfdrho_vdw (nfftx,nffty,nfftz));dfdrho_vdw=0.d0
+      allocate(dfddrho_vdw(nfftx,nffty,nfftz));dfddrho_vdw=0.d0
+      allocate(chgrhr_red(nfftcd));chgrhr_red=0.0d0
+      allocate(grad_rho_red(nfftcd));grad_rho_red=0.d0
+
+      if(ispin==1)then
+         do i=ista_fftph,iend_fftph
+            chgrhr_red(i)   = chgrhr_l(i,1)
+            grad_rho_red(i) = grad_rho(i,1)
+         enddo
+      else
+         do i=ista_fftph,iend_fftph
+            chgrhr_red(i)   = chgrhr_l(i,1) +chgrhr_l(i,2)
+            grad_rho_red(i) = grad_rho(i,1) +grad_rho(i,2)
+         enddo
+      endif
+      call mpi_allreduce( MPI_IN_PLACE, chgrhr_red, nfftcd, mpi_double_precision, &
+           &              mpi_sum, mpi_cdfft_world(myrank_ggacmp), ierr )
+      call mpi_allreduce( MPI_IN_PLACE, grad_rho_red, nfftcd, mpi_double_precision,&
+           &              mpi_sum, mpi_cdfft_world(myrank_ggacmp), ierr )
+      call vdW_scf( nspin, ispin, nfftx, nffty, nfftz, chgrhr_red, grad_rho_red, &
+           &        ecnl, dfdrho_vdw, dfddrho_vdw )
+
+      if(kimg == 1) then
+         nlphf = idp/2
+      else
+         nlphf = idp
+      end if
+
+      if ( kimg == 1 ) then
+        do ix=1,nfftx
+            do iy=1,nffty
+               do iz=1,nfftz
+                  if ( ix > nlphf ) then
+                     ix2 = idp -ix
+                     iy2 = nffty +2 -iy
+                     iz2 = nfftz +2 -iz
+                     if ( iy2 > nffty ) iy2 = iy2 -nffty
+                     if ( iz2 > nfftz ) iz2 = iz2 -nfftz
+                     cycle
+                  else
+                     ix2 = ix;  iy2 = iy;   iz2 = iz
+                  endif
+                  ixyz = (iz2-1)*mmp*nlphf +(iy2-1)*nlphf +ix2
+
+                  if(ixyz<ista_fftph.or.ixyz>iend_fftph) cycle
+
+                  dF_drho(ixyz,1:ispin) = dF_drho(ixyz,1:ispin) &
+                       &                  +dfdrho_vdw(ix,iy,iz)
+                  dF_dgradrho(ixyz,1:ispin) = dF_dgradrho(ixyz,1:ispin) &
+                       &                     +dfddrho_vdw(ix,iy,iz)
+               enddo
+            enddo
+         enddo
+      else
+         do ix=1, nfftx
+            do iy=1,nffty
+               do iz=1,nfftz
+                  ixyz = (iz-1)*nffty*nfftx +(iy-1)*nfftx +ix
+                  if ( ixyz<ista_fftph .or. ixyz>iend_fftph ) cycle
+
+                  dF_drho(ixyz,1:ispin) = dF_drho(ixyz,1:ispin) &
+                       &                  +dfdrho_vdw(ix,iy,iz)
+                  dF_dgradrho(ixyz,1:ispin) = dF_dgradrho(ixyz,1:ispin) &
+                       &                     +dfddrho_vdw(ix,iy,iz)
+               enddo
+            enddo
+         enddo
+      endif
+
+      deallocate(dfdrho_vdw); deallocate(dfddrho_vdw)
+      deallocate(chgrhr_red); deallocate(grad_rho_red)
+    end subroutine add_vdwdf_nonlocal_energy
+
     subroutine scale_exchange(alpha)
       implicit none
       real(kind=DP), intent(in) :: alpha
@@ -2179,10 +2242,12 @@ contains
       do iloop = 1, ndim_magmom
          afft = 0.0d0
 #ifdef _OLD_MAP_CHARGE_
-         call map_charge_onto_a_fft_box( chgsoft, ndim_magmom, iloop, Valence_Charge_Only )
+         call map_charge_onto_a_fft_box( chgsoft, ndim_magmom, iloop, &
+        &                                Valence_Charge_Only )
                                              !-(m_XC_Pot.) -> afft(*) (xcchg2)
 #else
-         call map_charge_onto_a_fft_box2( nfout, chgsoft, ndim_magmom, iloop, Valence_Charge_Only )
+         call map_charge_onto_a_fft_box2( nfout, chgsoft, ndim_magmom, iloop, &
+        &                                 Valence_Charge_Only )
                                               !-(m_XC_Pot.) -> afft(*) (xcchg2)
 #endif
 !         if(iprixc >= 2) write(nfout,*) " just after map_charge_onto_a_fft_box p1"
@@ -3274,7 +3339,8 @@ contains
          end if
 #else
          do in = 1, 3
-            call g_xyz_total_chden_l(in)     ! G_xyz*(rho(G)up+rho(G)down) -> afft
+!            call g_xyz_total_chden_l(in)     ! G_xyz*(rho(G)up+rho(G)down) -> afft
+            call g_xyz_chden_l(in,1)     ! G_xyz*(rho(G)up+rho(G)down) -> afft
             call m_FFT_CD_inverse_c(nfout,afft)!(-i)*d(rho_total(r))/d(x|y|z)
             call add_sq_afft_to_grad_trho
          end do
@@ -3457,16 +3523,18 @@ contains
             end do
          end do
          if(nrank_ggacmp > 1) then
-            grad_rho_c2(:) = grad_rho(:,is)
+            Do is=1, 2
+               grad_rho_c2(:) = grad_rho(:,is)
 #ifdef _DETAIL_GGA_TIMING_
-            call tstatc0_begin('mpi_allreduce(ggaxcp) ',id_sname)
+               call tstatc0_begin('mpi_allreduce(ggaxcp) ',id_sname)
 #endif
-            call mpi_allreduce(grad_rho_c2,grad_rho_c4,iend_fftph-ista_fftph+1 &
-                 & ,mpi_double_precision, mpi_sum, mpi_ggacmp_cross_world(myrank_cdfft),ierr)
+               call mpi_allreduce(grad_rho_c2,grad_rho_c4,iend_fftph-ista_fftph+1 &
+                    & ,mpi_double_precision, mpi_sum, mpi_ggacmp_cross_world(myrank_cdfft),ierr)
 #ifdef _DETAIL_GGA_TIMING_
-            call tstatc0_end(id_sname)
+               call tstatc0_end(id_sname)
 #endif
-            grad_rho(:,is) = grad_rho_c4(:)
+               grad_rho(:,is) = grad_rho_c4(:)
+            End Do
          end if
       end if
 #endif
diff -uprN phase0_2015.01/src_phase/m_epc_potential.F90 phase0_2015.01.01/src_phase/m_epc_potential.F90
--- phase0_2015.01/src_phase/m_epc_potential.F90	2015-08-05 14:45:21.000000000 +0900
+++ phase0_2015.01.01/src_phase/m_epc_potential.F90	2016-07-12 12:51:19.000000000 +0900
@@ -1,6 +1,6 @@
 !=======================================================================
 !
-!  PROGRAM  PHASE/0 2015.01 ($Rev: 440 $)
+!  PROGRAM  PHASE/0 2015.01 ($Rev: 472 $)
 !
 !  MODULE: m_epc_potential
 !
@@ -24,7 +24,7 @@
 !
 !
 module m_epc_potential
-! $Id: m_epc_potential.F90 440 2015-08-03 07:18:01Z ktagami $
+! $Id: m_epc_potential.F90 472 2015-11-28 09:01:17Z ktagami $
   use m_Electronic_Structure, only : vlhxc_l
 !$$#ifndef PARA3D
   use m_PlaneWaveBasisSet,    only : ngabc,gr_l,igfp_l,igfp_nonpara,kg,kgp,kgp_reduced,ylm_l&
@@ -57,10 +57,19 @@ module m_epc_potential
        &                           , nis_sfftp,nie_sfftp,nel_sfftp,idisp_sfftp,np_sfftp,mp_sfftp &
        &                           , npes_cdfft
 
+! === Postitron SCF === 2015/11/28
+  use m_Control_Parameters,  only : positron_method
+  use m_Const_Parameters,   only : positron_CONV
+  use m_Charge_Density,     only : chgq_l
+  use m_PseudoPotential,      only : psc_l
+! ===================== 2015/11/28
+
   implicit none
 
   include 'mpif.h'
 
+  real(kind=DP), pointer :: vlhxc_p( :,:,: )
+
   real(kind=DP), allocatable, dimension(:,:,:) :: tchgq_l  ! d(ista_kngp:iend_kngp,kimg)
   real(kind=DP), allocatable, dimension(:,:) ::   tchgr_l  ! d(ista_sfftph:iend_sfftph,nspin)
 !!$  real(kind=DP), allocatable, dimension(:) ::     p_potential_l ! d(ista_kngp:iend_kngp)
@@ -99,6 +108,16 @@ contains
     integer                  :: np0, j0, i2, j2, k2, n0
 #endif
 
+    if ( allocated(tchgq_l) ) deallocate( tchgq_l )
+    if ( allocated(tchgr_l) ) deallocate( tchgr_l )
+    if ( allocated( grad_tchgr_l ) ) deallocate( grad_tchgr_l )
+    if ( allocated( chden_l ) ) deallocate( chden_l )
+    if ( allocated( inx ) ) deallocate( inx )
+    if ( allocated( jnx ) ) deallocate( jnx )
+    if ( allocated( knx ) ) deallocate( knx )
+    if ( allocated( vepc_l ) ) deallocate( vepc_l )
+    if ( allocated( f2or1) ) deallocate( f2or1 )
+
     allocate(tchgq_l(ista_kngp:iend_kngp,kimg,nspin));  tchgq_l = 0.d0
     allocate(tchgr_l(ista_sfftph:iend_sfftph,nspin));     tchgr_l = 0.d0
     if(sw_gga_p == ON) then
@@ -228,6 +247,7 @@ contains
 !!$       tchgq_l(:,:,1) = tchgq_l(:,:,2)
 !!$    end if
 
+    tchgq_l = 0.0d0
     do ispin = 1, nspin
        do j = 1, kimg
           do i = ista_kngp, iend_kngp
@@ -674,7 +694,26 @@ contains
     if(npes >= 2) call tstatc0_end(id_sname)
   end subroutine afft_allgatherv
 
-subroutine m_epc_ESlhxc_potential(nfout)
+! ==== POSITRON SCF ===== 2015/11/28
+  subroutine m_epc_alloc_vlhxc_p
+    if ( positron_method == positron_CONV ) then
+       vlhxc_p => vlhxc_l
+    else
+       if ( .not. associated( vlhxc_p ) ) then
+          allocate( vlhxc_p(ista_kngp:iend_kngp,kimg,nspin) )
+          vlhxc_p = 0.0d0
+       endif
+    endif
+  end subroutine m_epc_alloc_vlhxc_p
+  
+  subroutine m_epc_dealloc_vlhxc_p
+    if ( positron_method /= positron_CONV ) then
+       if ( associated( vlhxc_p ) ) deallocate( vlhxc_p )
+    endif
+  end subroutine m_epc_dealloc_vlhxc_p
+! ======================= 2015/11/28
+
+  subroutine m_epc_ESlhxc_potential(nfout)
     integer, intent(in)       :: nfout
 !    real(kind=DP), intent(in) :: chg(ista_kngp:iend_kngp,kimg,nspin)
 !    real(kind=DP), intent(in) :: vxc(ista_kngp:iend_kngp,kimg,nspin)
@@ -685,26 +724,26 @@ subroutine m_epc_ESlhxc_potential(nfout)
 
 !    call tstatc0_begin('m_ESlhxc_potential ',id_sname)
 
-    vlhxc_l = 0.d0
+    vlhxc_p = 0.d0
     ist = ista_kngp
     if(ist == 1) ist = 2
     do is = 1, nspin
        do ik = 1, kimg
-          if(mype==0) vlhxc_l(1,ik,is)   = vepc_l(1,ik,is)
+          if(mype==0) vlhxc_p(1,ik,is)   = vepc_l(1,ik,is)
           if(nspin == 1) then
              do i = ist, iend_kngp  !for mpi
-                vlhxc_l(i,ik,is) = vepc_l(i,ik,is) -PAI4*tchgq_l(i,ik,is)&
+                vlhxc_p(i,ik,is) = vepc_l(i,ik,is) -PAI4*tchgq_l(i,ik,is)&
                      &/gr_l(i)**2
              end do
           else if(nspin == 2) then
              do i = ist, iend_kngp  !for mpi
-                vlhxc_l(i,ik,is) = vepc_l(i,ik,is) -PAI4*(tchgq_l(i,ik,UP)&
+                vlhxc_p(i,ik,is) = vepc_l(i,ik,is) -PAI4*(tchgq_l(i,ik,UP)&
                      &+tchgq_l(i,ik,DOWN))/gr_l(i)**2
              end do
           endif
 !          do it    = 1,ntyp
 !             do i = ista_kngp, iend_kngp  !for mpi
-!                vlhxc_l(i,ik,is) !                     & = vlhxc_l(i,ik,is)
+!                vlhxc_p(i,ik,is) !                     & = vlhxc_p(i,ik,is)
           !+psc_l(i,it)*zfm3_l(i,it,ik)
 !             end do
 !          end do
@@ -712,17 +751,57 @@ subroutine m_epc_ESlhxc_potential(nfout)
     end do
 !    call tstatc0_end(id_sname)
     if(ipripositron >= 3) then
-       write(nfout,'(" -- vepc_l tchgq_l vlhxc_l << m_epc_ESlhxc_potential>&
+       write(nfout,'(" -- vepc_l tchgq_l vlhxc_p << m_epc_ESlhxc_potential>&
             &>")')
        do is = 1, nspin
           if(nspin == 2) write(nfout,'("  ispin = ",i5)') is
           do ik = 1, kimg
              do i = ist, min(ist+20,iend_kngp)
                 write(nfout,'(3f10.4)') vepc_l(i,ik,is),tchgq_l(i,ik,is)&
-                     &,vlhxc_l(i,ik,is)
+                     &,vlhxc_p(i,ik,is)
              end do
           end do
        end do
     end if
   end subroutine m_epc_ESlhxc_potential
+
+! ==== POSITRON SCF ===== 2015/11/28
+  subroutine m_epc_ESlhxc_potential_mod(nfout)    ! for scf
+    integer, intent(in)       :: nfout
+
+    integer :: is,ik,i,it
+    integer :: ist
+    integer :: id_sname = -1
+
+    vlhxc_p = 0.d0
+    ist = ista_kngp
+
+    if(ist == 1) ist = 2
+    do is = 1, nspin
+       do ik = 1, kimg
+          if(mype==0) vlhxc_p(1,ik,is)   = vepc_l(1,ik,is)
+          if(nspin == 1) then
+             do i = ist, iend_kngp  !for mpi
+                vlhxc_p(i,ik,is) = vepc_l(i,ik,is) &
+                     &            -PAI4 *chgq_l(i,ik,is) /gr_l(i)**2
+             end do
+          else if(nspin == 2) then
+             do i = ist, iend_kngp  !for mpi
+                vlhxc_p(i,ik,is) = vepc_l(i,ik,is) &
+                     &           -PAI4*( chgq_l(i,ik,UP)&
+                     &                  +chgq_l(i,ik,DOWN) )/gr_l(i)**2
+             end do
+          endif
+          do it = 1,ntyp
+             do i = ista_kngp, iend_kngp  !for mpi
+                vlhxc_p(i,ik,is) = vlhxc_p(i,ik,is) &
+                     &            -psc_l(i,it) *zfm3_l(i,it,ik)
+             end do
+          end do
+       end do
+    end do
+
+  end subroutine m_epc_ESlhxc_potential_mod
+! ============== 2015/11/28
+
 end module m_epc_potential
diff -uprN phase0_2015.01/src_phase/m_vdWDF.F90 phase0_2015.01.01/src_phase/m_vdWDF.F90
--- phase0_2015.01/src_phase/m_vdWDF.F90	2015-08-05 14:45:21.000000000 +0900
+++ phase0_2015.01.01/src_phase/m_vdWDF.F90	2016-07-12 12:51:19.000000000 +0900
@@ -1,6 +1,6 @@
 !=======================================================================
 !
-!  PROGRAM  PHASE/0 2015.01 (rev.375)
+!  PROGRAM  PHASE/0 2014.01 (rev.375)
 !
 !  "First-principles Electronic Structure Calculation Program"
 !
@@ -29,7 +29,7 @@
 !  Consortium. 
 !   The activity of development of this program set has been supervised by Takahisa Ohno.
 !
-
+!
 !********************************** Note ******************************************
 ! This program calculates the non-local correlation energy (Ecnl) and 
 ! the local correlation energy (EcLDA) as the post calculation by utilizing
@@ -47,7 +47,23 @@
 ! Periodic systems are assumed.
 ! The atomic units (Hartree) are used.
 !
+! ======= modification =====
+!
+!  2016/06/06 : by asms
+!         The FFT normalization parameters are changed in order to be consistent
+!         with the other subroutines.
+!
+!         rho(G) = 1/V *Int rho(r) exp(-iGr);   rho(r) = sum_{G} rho(G) exp(iGr)
+!        
+!         In the dicretized grids, rho(G) = FFT[rho(r)] /(na*nb*nc);
+!                                  rho(r) = FFT[rho(G)]
+!
+!         By this, the nonlocal vdW energy is written as
+!             Encl = V**2 /2 sum_{ij} sum_{G} conjg(theta(i,G))*phi(i,j,G)*theta(i,G)
+!         and 
+!             phi(i,j,G) = 1/V Int phi(i,j,r) exp(-iGr)
 !
+! =========================
 !
 ! ++++++++ List of subroutines ++++++
 ! All subroutine files listed below are included in this file.
@@ -110,107 +126,106 @@
 !**********************************************************************************
 #ifndef DISABLE_VDWDF
 module progress_bar
-   implicit none
-   logical,private :: printable = .false.
-   integer,private :: iend=10
-   integer,private :: uni=6
-   integer,private :: j=0
-   logical, dimension(0:9),private :: done
-
-   contains
-
-   subroutine set_printable(pri)
-      logical, intent(in) :: pri
-      printable = pri
-   end subroutine set_printable
-
-   subroutine reset_progress()
-      j = 0
-      done = .false.
-      if(printable) write(unit=uni,fmt="(a10)") "0%    100%"
-   end subroutine reset_progress
-
-   subroutine set_unit(un)
-      integer, intent(in) :: un
-      uni = un
-   end subroutine set_unit
-
-   subroutine set_end(en)
-      integer, intent(in) :: en
-      iend = en
-   end subroutine set_end
-
-   subroutine progress()
-     use m_Const_Parameters, only : DP
-     implicit none
-     integer(kind=4)::k
-     character(len=17)::bar="???% |          |"
-     integer :: jj
-     real(kind=DP) :: jjj
-     j = j+1
-     if(j.gt.iend) j=iend
-     jj = int(10*(dble(j)/dble(iend))*0.999999d0)
-     if(.not.done(jj).and.j<=iend) then
-        if(printable) write(unit=uni,fmt="(a1,$)") '*'
-        call flush(uni)
-        done(jj) = .true.
-     endif
-     if(j==iend.and.printable) write(unit=uni,fmt=*)
-!     jjj = (dble(j)/dble(iend))
-!     write(unit=bar(1:3),fmt="(i3)") int(100*jjj)
-!     do k=1, jj
-!       bar(6+k:6+k)="*"
-!     enddo
-!     ! print the progress bar.
-!     write(unit=uni,fmt="(a1,a17,$)") char(13), bar
-!!     write(unit=uni,fmt="(a1,a17)") char(13), bar
-!     if (j/=iend) then
-!       flush(unit=6)
-!     else
-!       write(unit=6,fmt=*)
-!       do k=1,jj
-!          bar(6+k:6+k) = ""
-!       enddo
-!     endif
-     return
-   end subroutine progress
+  implicit none
+  logical,private :: printable = .false.
+  integer,private :: iend=10
+  integer,private :: uni=6
+  integer,private :: j=0
+  logical, dimension(0:9),private :: done
+
+contains
+
+  subroutine set_printable(pri)
+    logical, intent(in) :: pri
+    printable = pri
+  end subroutine set_printable
+
+  subroutine reset_progress()
+    j = 0
+    done = .false.
+    if(printable) write(unit=uni,fmt="(a10)") "0%    100%"
+  end subroutine reset_progress
+
+  subroutine set_unit(un)
+    integer, intent(in) :: un
+    uni = un
+  end subroutine set_unit
+
+  subroutine set_end(en)
+    integer, intent(in) :: en
+    iend = en
+  end subroutine set_end
+
+  subroutine progress()
+    use m_Const_Parameters, only : DP
+    implicit none
+    integer(kind=4)::k
+    character(len=17)::bar="???% |          |"
+    integer :: jj
+    real(kind=DP) :: jjj
+    j = j+1
+    if(j.gt.iend) j=iend
+    jj = int(10*(dble(j)/dble(iend))*0.999999d0)
+    if(.not.done(jj).and.j<=iend) then
+       if(printable) write(unit=uni,fmt="(a1,$)") '*'
+       call flush(uni)
+       done(jj) = .true.
+    endif
+    if(j==iend.and.printable) write(unit=uni,fmt=*)
+    !     jjj = (dble(j)/dble(iend))
+    !     write(unit=bar(1:3),fmt="(i3)") int(100*jjj)
+    !     do k=1, jj
+    !       bar(6+k:6+k)="*"
+    !     enddo
+    !     ! print the progress bar.
+    !     write(unit=uni,fmt="(a1,a17,$)") char(13), bar
+    !!     write(unit=uni,fmt="(a1,a17)") char(13), bar
+    !     if (j/=iend) then
+    !       flush(unit=6)
+    !     else
+    !       write(unit=6,fmt=*)
+    !       do k=1,jj
+    !          bar(6+k:6+k) = ""
+    !       enddo
+    !     endif
+    return
+  end subroutine progress
 
 end module progress_bar
 
 module m_vdWDF
-
-  use m_Const_Parameters, only : DP, ON, PAI, FMAXVALLEN, LOWER
-  use m_Control_Parameters, only : printable,nspin,eval_kernel_by_interpolation,na_gl   &
-  &                              , a1,a2,dq_vdw,lambda,q0cut,ds,ndel,nphiD,nr12,nk,maxk &
-  &                              , r12max,oneshot
+  use m_Const_Parameters, only : DP, ON, PAI, FMAXVALLEN, LOWER, CMPLDP
+  use m_Control_Parameters, only : printable,nspin,eval_kernel_by_interpolation, &
+       &                           na_gl,a1,a2,dq_vdw,lambda,q0cut,ds,ndel,nphiD, &
+       &                           nr12,nk,maxk, r12max,oneshot, ipri, &
+       &                           sw_save_memory_vdw, kimg, iprixc
   use m_Files, only : nfout
   use m_Charge_Density, only : m_CD_get_rspace_charge
 
   use m_FFT, only : fft_box_size_CD,fft_box_size_CD_nonpara
-  use m_Crystal_Structure, only : altv,univol
-
-  use m_Parallelization, only : npes,mype,mpi_comm_group
+  use m_Crystal_Structure, only : altv,univol, rltv
 
+  use m_Parallelization, only : npes,mype,mpi_comm_group,m_Parallel_init_mpi_nq,  &
+       &                        ista_nq,iend_nq,np_nq,mp_nq,is_nq,ie_nq,nel_nq, &
+       &                        map_z_nq
   use m_Timing, only : tstatc0_begin,tstatc0_end
 
   implicit none
-
   include 'mpif.h'
 
   real(kind=DP),parameter :: pi=PAI
 
-! Physical values
+  ! Physical values
   Real(kind=DP) ExGGA,Ecnl,Ecnl_12,Ecnl_12_ab,Ecnl_3,Ecnl_3s,EcLDA
 
-  Integer na,nb,nc,nabc
-  Real(kind=DP) aa(3,3),dv
-  Real(kind=DP), Allocatable :: rho(:,:,:),grad(:,:,:)
-
   real(kind=DP) :: phi0=2.77d0 
- 
+
+  Integer na,nb,nc,nabc
+  Real(kind=DP) :: aa(3,3),dv
+  Real(kind=DP), Allocatable :: rho(:,:,:),grad(:,:,:), cgrad(:,:,:,:)
   real(kind=DP),allocatable,dimension(:) :: phi_ab
 
-! Grid points
+  ! Grid points
 !!!! Spline curves
   Integer nq0
   real(kind=DP) :: qa,qb,q0max,q0min
@@ -218,12 +233,12 @@ module m_vdWDF
 !!!! The table of phidD
   real(kind=DP), allocatable, dimension(:,:) :: phidD
 
-! Internal parameters
+  ! Internal parameters
   Real(kind=DP),parameter :: rhomin=1.d-9
 
-! Real-space and reciprocal-space Functions
+  ! Real-space and reciprocal-space Functions
   Complex*16, Allocatable :: theta_G(:,:,:),     &
-&                            theta_G_ab(:,:,:,:),theta_G_a(:,:,:),theta_G_b(:,:,:)
+       &                            theta_G_ab(:,:,:,:),theta_G_a(:,:,:),theta_G_b(:,:,:)
   Real(kind=DP), Allocatable :: theta_R(:,:,:),dtheta_R(:,:,:,:),ddtheta_R(:,:,:,:)
 
   complex(kind=DP), allocatable, dimension(:,:,:,:) :: ualpha_g,ualpha_r
@@ -239,51 +254,97 @@ module m_vdWDF
   integer, parameter :: FFTW_ESTIMATE = 64
   integer, parameter :: FFTW_FORWARD  = -1
   integer, parameter :: FFTW_BACKWARD = +1
-  contains
+
+  real(kind=DP), allocatable, dimension(:,:,:) :: dFdrho, dFddrho
+  real(kind=DP), allocatable, dimension(:,:,:) :: rkar
+
+  real(kind=DP) :: s_cnl1(3,3), s_cnl2(3,3), ecnl_vdwdf
+  real(kind=DP) :: s_cnl1_pc(3,3), s_cnl2_pc(3,3)
+
+  !  logical :: grad_rho_eq_0 = .true.
+  logical :: grad_rho_eq_0 = .false.
+
+contains
 
   subroutine print_vdw_parameters()
     if(printable)then
-    write(nfout,'(a)')            '-- parameters for the vdW-DF calculations --'
-    write(nfout,'(a,2f15.10,i8)') '   rmax, kmax, nmesh     : ',r12max,maxk,nr12
-    write(nfout,'(a,3f15.10)')    '   dq, lambda, and q0cut : ',dq_vdw,lambda,q0cut
-    write(nfout,'(a,2f15.10,i5)') '   q0min, q0max, nq0     : ',q0min,q0max,nq0
-    write(nfout,'(a,f15.10)' )    '   ds                    : ',ds
-    write(nfout,'(a,i5,2f15.10)') '   na_gl, a1, a2         : ',na_gl,a1,a2
-    if(eval_kernel_by_interpolation)then
-    write(nfout,'(a)')            '   kernel evaluation     : by interpolation'
-    write(nfout,'(a,2i8)')        '   ndel, nphiD           : ',ndel,nphiD
-    else
-    write(nfout,'(a)')            '   kernel evaluation     : direct'
-    endif
+       write(nfout,'(a)')            '-- parameters for the vdW-DF calculations --'
+       write(nfout,'(a,2f15.10,i8)') '   rmax, kmax, nmesh     : ',r12max,maxk,nr12
+       write(nfout,'(a,3f15.10)')    '   dq, lambda, and q0cut : ',dq_vdw,lambda,q0cut
+       write(nfout,'(a,2f15.10,i5)') '   q0min, q0max, nq0     : ',q0min,q0max,nq0
+       write(nfout,'(a,f15.10)' )    '   ds                    : ',ds
+       write(nfout,'(a,i5,2f15.10)') '   na_gl, a1, a2         : ',na_gl,a1,a2
+       if(eval_kernel_by_interpolation)then
+          write(nfout,'(a)')            '   kernel evaluation     : by interpolation'
+          write(nfout,'(a,2i8)')        '   ndel, nphiD           : ',ndel,nphiD
+       else
+          write(nfout,'(a)')            '   kernel evaluation     : direct'
+       endif
     endif
   end subroutine print_vdw_parameters
 
   subroutine initialize_vdwdf_scf(nspin,ispin,na,nb,nc,chgr,grad_rho)
     use progress_bar, only : set_printable
+
     integer, intent(in) :: nspin,ispin,na,nb,nc
     real(kind=DP), dimension(na*nb*nc), intent(in) :: chgr,grad_rho
+
     integer :: i,cix,ciy,ciz,nrxyz
+    integer :: idp, mmp, nlphf, cix2, ciy2, ciz2
     real(kind=DP) :: q
-    call set_printable(printable)
 
+    call set_printable(printable)
     call do_cell_params()
+
+    idp = fft_box_size_CD_nonpara(1,0)
+    mmp = fft_box_size_CD_nonpara(2,0)
+
+    if(kimg == 1) then
+       nlphf = idp/2
+    else
+       nlphf = idp
+    end if
+
     rinplw = 1.d0/(dble(na*nb*nc))
     q0max = q0cut*1.01d0
     q0min = 0.09d0
     nq0 = DINT(dLOG((q0max-q0min)*(lambda-1.d0)/dq_vdw+1.d0)/dLOG(lambda))+1
     maxk = dble(nr12)/r12max
 
+    if(firstcall) call m_Parallel_init_mpi_nq(nfout,ipri,printable,nq0)
     if(firstcall) call print_vdw_parameters()
     call alloc_vdw()
-    do cix = 1,na
-       do ciy = 1,nb
-          do ciz = 1,nc
-             nrxyz=(ciz-1)*nb*na+(ciy-1)*na+cix
-             rho(cix,ciy,ciz) = chgr(nrxyz)
-             grad(cix,ciy,ciz) = grad_rho(nrxyz)
+
+    if ( kimg == 1 ) then
+       do cix = 1,na
+          do ciy = 1,nb
+             do ciz = 1,nc
+                if ( cix > nlphf ) then
+                   cix2 = idp -cix
+                   ciy2 = nb +2 -ciy
+                   ciz2 = nc +2 -ciz
+                   if ( ciy2 > nb ) ciy2 = ciy2 -nb
+                   if ( ciz2 > nc ) ciz2 = ciz2 -nc
+                else
+                   cix2 = cix;  ciy2 = ciy; ciz2 = ciz
+                endif
+                nrxyz=(ciz2-1)*mmp*nlphf +(ciy2-1)*nlphf +cix2
+                rho(cix,ciy,ciz) = chgr(nrxyz)
+                grad(cix,ciy,ciz) = grad_rho(nrxyz)
+             end do
+          end do
+       end do
+    else
+       do cix = 1,na
+          do ciy = 1,nb
+             do ciz = 1,nc
+                nrxyz = (ciz-1)*nb*na +(ciy-1)*na +cix
+                rho(cix,ciy,ciz) = chgr(nrxyz)
+                grad(cix,ciy,ciz) = grad_rho(nrxyz)
+             end do
           end do
        end do
-    end do
+    endif
 
     do i=1,nq0
        q = q0min + dq_vdw*(lambda**DBLE(i-1)-1.d0)/(lambda-1.d0)
@@ -291,7 +352,9 @@ module m_vdWDF
     enddo
     call spline0(nq0,qar,q2ar)
 
-    if(eval_kernel_by_interpolation.and.firstcall) call build_lookup_table(ndel,nphiD,phidD)
+    if (eval_kernel_by_interpolation.and.firstcall) then
+       call build_lookup_table(ndel,nphiD,phidD)
+    endif
     firstcall = .false.
   end subroutine initialize_vdwdf_scf
 
@@ -310,6 +373,7 @@ module m_vdWDF
     q0min = 0.09d0
     nq0 = DINT(dLOG((q0max-q0min)*(lambda-1.d0)/dq_vdw+1.d0)/dLOG(lambda))+1
     maxk = dble(nr12)/r12max
+    if(firstcall) call m_Parallel_init_mpi_nq(nfout,ipri,printable,nq0)
 
     call alloc_vdw()
     call m_CD_get_rspace_charge(nfout,na,nb,nc,rho,is)
@@ -324,9 +388,9 @@ module m_vdWDF
     call spline0(nq0,qar,q2ar)
 
     if(eval_kernel_by_interpolation)then
-      if(printable) write(nfout,'(a)') 'building the lookup table for the kernel function ...'
-      call build_lookup_table(ndel,nphiD,phidD) 
-      if(printable) write(nfout,'(a)') '... done'
+       if(printable) write(nfout,'(a)') 'building the lookup table for the kernel function ...'
+       call build_lookup_table(ndel,nphiD,phidD) 
+       if(printable) write(nfout,'(a)') '... done'
     endif
     if(printable) write(nfout,'(a)') '... done initialization'
 
@@ -341,18 +405,42 @@ module m_vdWDF
        aa(i,1:3) = altv(1:3,i)/dble(fft_box_size_CD(i,1))
     enddo
     dv = aa(1,1)*(aa(2,2)*aa(3,3)-aa(2,3)*aa(3,2)) &
-&      + aa(1,2)*(aa(2,3)*aa(3,1)-aa(2,1)*aa(3,3)) &
-&      + aa(1,3)*(aa(2,1)*aa(3,2)-aa(2,2)*aa(3,1))
+         &      + aa(1,2)*(aa(2,3)*aa(3,1)-aa(2,1)*aa(3,3)) &
+         &      + aa(1,3)*(aa(2,1)*aa(3,2)-aa(2,2)*aa(3,1))
   end subroutine do_cell_params
 
+  function real_index(iq,pe) result(res)
+    integer, intent(in) :: iq
+    integer, intent(in),optional :: pe
+    integer :: res
+    integer :: ii,mpe
+    mpe = mype
+    if(present(pe))then
+       mpe = pe
+    endif
+    if(mpe==0)then
+       res = iq
+       return
+    endif
+    res = 0
+    do ii=0,mpe-1
+       res = res+nel_nq(ii) 
+    enddo
+    res = res+iq
+    return
+  end function real_index
+
   subroutine alloc_vdw()
+    integer :: i,cix,ciy,ciz
+    real(kind=DP) :: Ta,Tb,Tc, vec(3), bb(3,3)
+
     Allocate(rho(na,nb,nc));rho=0.d0
     allocate(grad(na,nb,nc));grad=0.d0
     Allocate(theta_G(-(na/2-1):na/2,-(nb/2-1):nb/2,-(nc/2-1):nc/2))
     Allocate(theta_R(na,nb,nc))
     if(.not.oneshot)then
-       Allocate(dtheta_R(nq0,na,nb,nc))
-       Allocate(ddtheta_R(nq0,na,nb,nc))
+       Allocate(dtheta_R(np_nq,na,nb,nc))
+       Allocate(ddtheta_R(np_nq,na,nb,nc))
     endif
     Allocate(theta_G_a(-(na/2-1):na/2,-(nb/2-1):nb/2,-(nc/2-1):nc/2))
     Allocate(theta_G_b(-(na/2-1):na/2,-(nb/2-1):nb/2,-(nc/2-1):nc/2))
@@ -360,17 +448,57 @@ module m_vdWDF
     if(eval_kernel_by_interpolation.and.firstcall) allocate(phidD(0:ndel,-1:nphiD+1))
     allocate(qar(nq0));qar = 0.d0
     allocate(q2ar(nq0,nq0));q2ar = 0.d0
-    allocate(theta_G_ab(nq0,-(na/2-1):na/2,-(nb/2-1):nb/2,-(nc/2-1):nc/2))
+    !    allocate(theta_G_ab(nq0,-(na/2-1):na/2,-(nb/2-1):nb/2,-(nc/2-1):nc/2))
+    if(sw_save_memory_vdw) then
+       allocate(theta_G_ab(np_nq,-(na/2-1):na/2,-(nb/2-1):nb/2,-(nc/2-1):nc/2))
+    else
+       allocate(theta_G_ab(nq0,-(na/2-1):na/2,-(nb/2-1):nb/2,-(nc/2-1):nc/2))
+    endif
     if(.not.oneshot)then
-       allocate(ualpha_g(nq0,-(na/2-1):na/2,-(nb/2-1):nb/2,-(nc/2-1):nc/2))
-       allocate(ualpha_r(nq0,na,nb,nc))
+       !       allocate(ualpha_g(nq0,-(na/2-1):na/2,-(nb/2-1):nb/2,-(nc/2-1):nc/2))
+       !       allocate(ualpha_r(nq0,na,nb,nc))
+       allocate(dFdrho(na,nb,nc))
+       allocate(dFddrho(na,nb,nc))
     endif
+
+    allocate(rkar(-(na/2-1):na/2,-(nb/2-1):nb/2,-(nc/2-1):nc/2))
+
+#if 0
+    Ta = na*DSQRT(aa(1,1)**2 + aa(1,2)**2 + aa(1,3)**2)
+    Tb = nb*DSQRT(aa(2,1)**2 + aa(2,2)**2 + aa(2,3)**2)
+    Tc = nc*DSQRT(aa(3,1)**2 + aa(3,2)**2 + aa(3,3)**2)
+    Do ciz = -(nc/2-1),nc/2
+       Do ciy = -(nb/2-1),nb/2
+          Do cix = -(na/2-1),na/2
+             rkar(cix,ciy,ciz) = &
+                  &    DSQRT((dble(cix)/Ta)**2 + (dble(ciy)/Tb)**2 + (dble(ciz)/Tc)**2)
+          Enddo
+       Enddo
+    Enddo
+#else
+    Do i=1, 3
+       bb(1,i) = rltv(i,1) /PAI /2.0d0
+       bb(2,i) = rltv(i,2) /PAI /2.0d0
+       bb(3,i) = rltv(i,3) /PAI /2.0d0
+    End Do
+
+    Do ciz = -(nc/2-1),nc/2
+       Do ciy = -(nb/2-1),nb/2
+          Do cix = -(na/2-1),na/2
+             vec(1:3) = dble(cix) *bb(1,1:3) &
+                  &    +dble(ciy) *bb(2,1:3) &
+                  &    +dble(ciz) *bb(3,1:3)
+             rkar(cix,ciy,ciz) = DSQRT( vec(1)**2 +vec(2)**2 +vec(3)**2 )
+          End Do
+       End Do
+    End Do
+#endif
   end subroutine alloc_vdw
 
   subroutine finalize_vdwdf()
     call dealloc()
 
-    contains
+  contains
     subroutine dealloc()
       deallocate(rho)
       deallocate(grad)
@@ -388,9 +516,14 @@ module m_vdWDF
       deallocate(q2ar)
       deallocate(theta_G_ab)
       if(.not.oneshot)then
-         deallocate(ualpha_g)
-         deallocate(ualpha_r)
+         !         deallocate(ualpha_g)
+         !         deallocate(ualpha_r)
+         deallocate(dFdrho)
+         deallocate(dFddrho)
       endif
+      deallocate(rkar)
+      !
+      if ( allocated( cgrad ) ) deallocate( cgrad )
     end subroutine dealloc
   end subroutine finalize_vdwdf
 
@@ -404,28 +537,32 @@ module m_vdWDF
     real(kind=DP) :: ddel,dphiD
     integer :: ierr
     integer :: id_sname=-1
+
     call tstatc0_begin('build_lookup_table ',id_sname,1)
+
     phidD = 0.d0
-    ddel = 1/dble(ndel)
+    ddel = 1.d0 /dble(ndel)
     dphiD = (q0max*eta1-q0min*etai)/(dble(nphiD))
+
     if(oneshot) then
        call reset_progress()
        call set_end(int(floor(dble(ndel+1)/dble(npes))))
     endif
+
     do cdel=0,ndel
        if(mod(cdel,npes)/=mype) cycle
        if(oneshot) call progress()
        do cphiD = -1,nphiD+1
           del = dble(cdel)*ddel
           phiD = q0min*etai + dble(cphiD)*dphiD
-          di = phiD*(1+del)
-          dk = phiD*(1-del)
+          di = phiD*(1.d0+del)
+          dk = phiD*(1.d0-del)
           Call kernel_phi(di,dk,tmp)
           phidD(cdel,cphiD) = tmp
        enddo
     enddo
     if(npes>1) &
-    & call mpi_allreduce(MPI_IN_PLACE,phidD,(ndel+1)*(nphiD+2),mpi_double_precision,mpi_sum,mpi_comm_group,ierr)
+         & call mpi_allreduce(MPI_IN_PLACE,phidD,(ndel+1)*(nphiD+2),mpi_double_precision,mpi_sum,mpi_comm_group,ierr)
     call tstatc0_end(id_sname)
   end subroutine build_lookup_table
 
@@ -434,15 +571,18 @@ module m_vdWDF
     integer :: cqa,ierr
     integer :: id_sname = -1
     real(kind=DP),allocatable,dimension(:,:,:) :: tmpdr,tmpddr
+
     call tstatc0_begin('build_theta ',id_sname,1)
-!+++++++++++++++ Execute FFT for theta_R_ab ++++++++++++++++++++
+
+    !+++++++++++++++ Execute FFT for theta_R_ab ++++++++++++++++++++
     if(printable.and.oneshot) &
-    & write(nfout,'(a)') 'building theta (spline coefficient x rho) and their Fourier transforms ...'
+         & write(nfout,'(a)') 'building theta (spline coefficient x rho) and their Fourier transforms ...'
     if(oneshot)then
        call reset_progress()
        call set_end(int(floor(dble(nq0)/dble(npes))))
     endif
     theta_G_ab(:,:,:,:) = (0.d0,0.d0)
+
     if(.not.oneshot)then
        allocate(tmpdr(na,nb,nc));tmpdr=0.d0
        allocate(tmpddr(na,nb,nc));tmpddr=0.d0
@@ -451,32 +591,31 @@ module m_vdWDF
        allocate(tmpdr(1,1,1))
        allocate(tmpddr(1,1,1))
     endif
-    Do cqa = 1,nq0
-       if(mod(cqa,npes)/=mype) cycle
+    Do cqa = ista_nq,iend_nq
        if(oneshot) call progress()
        Call theta_ab(na,nb,nc,cqa,nq0,q0min,q0max,rho,grad,rhomin,theta_R,tmpdr,tmpddr)
        Call RtoG(na,nb,nc,theta_R,theta_G)
-       theta_G_ab(cqa,:,:,:) = theta_G(:,:,:)
+       if(sw_save_memory_vdw)then
+          theta_G_ab(map_z_nq(cqa),:,:,:) = theta_G(:,:,:)
+       else
+          theta_G_ab(cqa,:,:,:) = theta_G(:,:,:)
+       endif
        if(.not.oneshot)then
-          dtheta_R  (cqa,:,:,:) = tmpdr  (:,:,:)
-          ddtheta_R (cqa,:,:,:) = tmpddr (:,:,:)
+          dtheta_R  (map_z_nq(cqa),:,:,:) = tmpdr  (:,:,:)
+          ddtheta_R (map_z_nq(cqa),:,:,:) = tmpddr (:,:,:)
        endif
     Enddo
+
+    if(.not.sw_save_memory_vdw)then
+       call mpi_allreduce(MPI_IN_PLACE,theta_G_ab,nq0*na*nb*nc,mpi_double_complex, &
+            &    mpi_sum,mpi_comm_group,ierr) 
+    endif
     deallocate(tmpdr)
     deallocate(tmpddr)
-    if(npes>1) then
-      call mpi_allreduce(MPI_IN_PLACE,theta_G_ab(1,-(na/2-1),-(nb/2-1),-(nc/2-1)), &
-      &    nq0*na*nb*nc,mpi_double_complex,mpi_sum,mpi_comm_group,ierr)
-      if(.not.oneshot)then
-         call mpi_allreduce(MPI_IN_PLACE,dtheta_R(1,1,1,1), &
-         &    nq0*na*nb*nc,mpi_double_precision,mpi_sum,mpi_comm_group,ierr)
-         call mpi_allreduce(MPI_IN_PLACE,ddtheta_R(1,1,1,1), &
-         &    nq0*na*nb*nc,mpi_double_precision,mpi_sum,mpi_comm_group,ierr)
-      endif
-    endif
+
     if(printable.and.oneshot) write(nfout,'(a)') '... done'
     call tstatc0_end(id_sname)
-!+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
+    !+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
   end subroutine build_theta
 
   subroutine phiab_by_interpl(ca,cb,nr12,phi_ab)
@@ -493,6 +632,7 @@ module m_vdWDF
 
     real(kind=DP) :: phi1u
     integer :: id_sname=-1
+
     call tstatc0_begin('phiab ',id_sname,1)
 
     phi_ab = 0.d0
@@ -501,8 +641,8 @@ module m_vdWDF
     qb = qar(cb)
     qab = DSQRT(qa**2+qb**2)
 
-! Coefficients phi2 and phi4 in the local part is determined to
-! match the non-local part in value and slope at d=d_s.
+    ! Coefficients phi2 and phi4 in the local part is determined to
+    ! match the non-local part in value and slope at d=d_s.
 
     rs = ds/qab
 
@@ -520,7 +660,7 @@ module m_vdWDF
     phi4 = (-1.d0/ds**4)*(phid_s-phi0) + (rs/(2.d0*ds**4))*d_phid_s
 
     i = DINT((ds/qab)/dr12)
-! Non-local part of phi_ab(r12)
+    ! Non-local part of phi_ab(r12)
     if(i.ge.nr12)return
     ddel = 1/dble(ndel)
     do cr12 = i+1,nr12
@@ -533,18 +673,18 @@ module m_vdWDF
        phiD = 0.5d0*(di+dk)
        cdel = DINT(del/ddel)
        cphiD = DINT(DBLE(nphiD)*(phiD-q0min*etai)/(q0max*eta1-q0min*etai))
-!     write(6,*) 'cdel, cphiD: ',cdel,cphiD
+       !     write(6,*) 'cdel, cphiD: ',cdel,cphiD
        if(cdel.ge.ndel.or.cphiD.ge.nphiD.or.cdel.lt.0.or.cphiD.lt.-1)cycle
        phix = del/ddel - dble(cdel)
        phiy = dble(nphiD)*(phiD-q0min*etai)/(q0max*eta1-q0min*etai) - dble(cphiD)
-       phi1u =  (1-phix)*(1-phiy) * phidD(cdel  ,cphiD  ) &
-&              +   phix *(1-phiy) * phidD(cdel+1,cphiD  ) &
-&              +(1-phix)*   phiy  * phidD(cdel  ,cphiD+1) &
-&              +   phix *   phiy  * phidD(cdel+1,cphiD+1)
+       phi1u =  (1.d0-phix)*(1.d0-phiy) * phidD(cdel  ,cphiD  ) &
+            &              +       phix *(1.d0-phiy) * phidD(cdel+1,cphiD  ) &
+            &              +(1.d0-phix)*   phiy  * phidD(cdel  ,cphiD+1) &
+            &              +      phix *   phiy  * phidD(cdel+1,cphiD+1)
        phi_ab(cr12) = phi1u
     enddo
 
-! Local part of phi_ab(r12)
+    ! Local part of phi_ab(r12)
     Do cr12 = 0,i
        r12 = DBLE(cr12)*dr12
 
@@ -556,20 +696,20 @@ module m_vdWDF
     call tstatc0_end(id_sname)
   end subroutine phiab_by_interpl
 
-!** SUBROUTINE phiab **************************************************************************
+  !** SUBROUTINE phiab **************************************************************************
   Subroutine phiab(ca,cb,nr12,phi_ab)
     Implicit none
 
-!++++++++++++++++++++++++++ VARIABLES ++++++++++++++++++++++++++
+    !++++++++++++++++++++++++++ VARIABLES ++++++++++++++++++++++++++
     Integer,intent(in) :: ca,cb,nr12
     Real(kind=DP),intent(out) :: phi_ab(0:nr12)
 
-! Internal valuables
+    ! Internal valuables
     Integer cr12,i
     Real(kind=DP) qab,qa,qb,r12,rs,phiD,dr12,phi2,phi4,phid_s,phid_s1,d_phid_s,d1,d2,abs_d
     Real(kind=DP) di,dk,tmp,dr
     Parameter(dr=0.001d0)
-!+++++++++++++++++++++ End VARIABLES +++++++++++++++++++++++++++
+    !+++++++++++++++++++++ End VARIABLES +++++++++++++++++++++++++++
     integer :: id_sname=-1
     call tstatc0_begin('phiab ',id_sname,1)
 
@@ -577,9 +717,9 @@ module m_vdWDF
     qa = qar(ca)
     qb = qar(cb)
     qab = DSQRT(qa**2+qb**2)
- 
-! Coefficients phi2 and phi4 in the local part is determined to
-! match the non-local part in value and slope at d=d_s.
+
+    ! Coefficients phi2 and phi4 in the local part is determined to
+    ! match the non-local part in value and slope at d=d_s.
 
     rs = ds/qab
 
@@ -590,159 +730,439 @@ module m_vdWDF
     di = qa*(rs+dr)
     dk = qb*(rs+dr)
     Call kernel_phi(di,dk,phid_s1)
- 
+
     d_phid_s = (phid_s1 - phid_s)/dr
- 
+
     phi2 = ( 2.d0/ds**2)*(phid_s-phi0) - (rs/(2.d0*ds**2))*d_phid_s
     phi4 = (-1.d0/ds**4)*(phid_s-phi0) + (rs/(2.d0*ds**4))*d_phid_s
 
     i = DINT((ds/qab)/dr12)
-! Non-local part of phi_ab(r12)
+    ! Non-local part of phi_ab(r12)
     if(i.ge.nr12)return
+
     Do cr12 = i+1,nr12
        r12 = DBLE(cr12)*dr12
- 
+
        di = qa*r12
        dk = qb*r12
- 
+
        Call kernel_phi(di,dk,tmp)
        phi_ab(cr12) = tmp
- 
+
     Enddo
- 
-! Local part of phi_ab(r12)
+
+    ! Local part of phi_ab(r12)
     Do cr12 = 0,i
        r12 = DBLE(cr12)*dr12
- 
+
        d1 = qa*r12
        d2 = qb*r12
        phiD = qab*r12
        phi_ab(cr12) = phi0 + phi2*phiD**2 + phi4*phiD**4
     Enddo
- 
+
     call tstatc0_end(id_sname) 
   End subroutine phiab
-!** End SUBROUTINE phiab **********************************************************************
+  !** End SUBROUTINE phiab **********************************************************************
 
-  subroutine vdWdf_core()
+  subroutine vdWdf()
+    if(sw_save_memory_vdw) then
+       call vdWdf_core()
+       !call vdWdf_core2()
+    else
+       call vdWdf_core_org()
+    endif
+  end subroutine vdWdf
+
+  subroutine vdWdf_core_org()
     use progress_bar
     real(kind=DP) :: fac
-    integer :: cqa,cqb,ierr
-    integer,allocatable,dimension(:,:) :: ind
-    integer :: i,ic,i1,i2,i3
+    integer :: cqaa,cqa,cqb,ierr
+    integer :: i,ic,i2,i3
     complex(kind=DP), allocatable, dimension(:,:,:) :: tmpug,tmpur
     integer :: id_sname = -1
+    integer :: id_sname3 = -1
+
     call tstatc0_begin('vdWdf_core ',id_sname,1)
     Ecnl_12 = 0.0d0
     if(printable.and.oneshot) &
-    & write(nfout,'(a)') 'performing the core operation : Ecnl = sum theta_a x phi_ab x theta_b ...'
-    allocate(ind(int(nq0*(nq0-1)*0.5+nq0),2));ind=0
-    ic=0
-!    do cqa=1,nq0
-!       do cqb=cqa,nq0
-!          ic=ic+1
-!          ind(ic,1) = cqa
-!          ind(ic,2) = cqb
-!       enddo
-!    enddo
+         & write(nfout,'(a)') 'performing the core operation : Ecnl = sum theta_a x phi_ab x theta_b ...'
     if(oneshot)then
        call reset_progress()
-       !call set_end(int(floor(dble(ic)/dble(npes))))
        call set_end(int(floor(dble(nq0)/dble(npes))))
     endif
     if(.not.oneshot)then
        allocate(tmpug(-(na/2-1):na/2,-(nb/2-1):nb/2,-(nc/2-1):nc/2));tmpug=0.d0
-       ualpha_g = (0.d0,0.d0)
+       allocate(tmpur(na,nb,nc));tmpur=0.d0
+       !       ualpha_g = (0.d0,0.d0)
     else
        allocate(tmpug(1,1,1))
     endif
-!    do i=1,ic
-    do cqa=1,nq0
-    if(mod(cqa,npes)/=mype) cycle
-    theta_G_a(:,:,:) = theta_G_ab(cqa,:,:,:)
-    if(.not.oneshot) tmpug=(0.d0,0.d0)
-    if(oneshot) call progress()
-!    do cqb=cqa,nq0
-    do cqb=1,nq0
-       !cqa = ind(i,1)
-       !cqb = ind(i,2)
-       theta_G_b(:,:,:) = theta_G_ab(cqb,:,:,:)
-!       fac=2.d0
-!       if (cqa.eq.cqb) fac=1.d0
-       fac = 1.d0
-       if(eval_kernel_by_interpolation) then
-         call phiab_by_interpl(cqa,cqb,nr12,phi_ab)
-       else
-         Call phiab(cqa,cqb,nr12,phi_ab)
+    if(.not.oneshot)then
+       dFdrho=0.d0;dFddrho=0.d0
+    endif
+
+    do cqaa=1,np_nq
+       cqa = real_index(cqaa)
+       theta_G_a(:,:,:) = theta_G_ab(cqa,:,:,:)
+       if (.not.oneshot)  tmpug=(0.d0,0.d0)
+       if (oneshot) call progress()
+       call tstatc0_begin('vdw_core_core ',id_sname3,1)
+       do cqb=1,nq0
+          theta_G_b(:,:,:) = theta_G_ab(cqb,:,:,:)
+          fac = 1.d0
+          if(eval_kernel_by_interpolation) then
+             call phiab_by_interpl(cqa,cqb,nr12,phi_ab)
+          else
+             Call phiab(cqa,cqb,nr12,phi_ab)
+          endif
+          Call convolution_3d_by_fft(&
+               &    na,nb,nc,cqa,cqb,nr12,phi_ab,theta_G_a,theta_G_b,fac,Ecnl_12_ab,tmpug)
+          Ecnl_12 = Ecnl_12 + Ecnl_12_ab
+       enddo
+       call tstatc0_end(id_sname3)
+       if(.not.oneshot)then
+          call GtoR(na,nb,nc,tmpur,tmpug)
+          dFdrho(:,:,:) = dFdrho(:,:,:) + dble(tmpur(:,:,:))*dtheta_R(cqaa,:,:,:)
+          dFddrho(:,:,:) = dFddrho(:,:,:) + dble(tmpur(:,:,:))*ddtheta_R(cqaa,:,:,:)
        endif
-       Call convolution_3d_by_fft(na,nb,nc,cqa,cqb,nr12,phi_ab,theta_G_a,theta_G_b,fac,Ecnl_12_ab,tmpug)
- 
-       Ecnl_12 = Ecnl_12 + Ecnl_12_ab
-    enddo
-    if(.not.oneshot) ualpha_g(cqa,:,:,:) = tmpug(:,:,:)
     enddo
 
     if(.not.oneshot)then
-       if(npes>1) &
-       & call mpi_allreduce(MPI_IN_PLACE,ualpha_g, &
-       &                    nq0*na*nb*nc,mpi_double_complex,mpi_sum,mpi_comm_group,ierr)
+       if(npes>1) then
+          call mpi_allreduce(MPI_IN_PLACE,dFdrho, &
+               &             na*nb*nc,mpi_double_precision,mpi_sum,mpi_comm_group,ierr)
+          call mpi_allreduce(MPI_IN_PLACE,dFddrho, &
+               &             na*nb*nc,mpi_double_precision,mpi_sum,mpi_comm_group,ierr)
+          deallocate(tmpur)
+       endif
+       deallocate(tmpug)
+    endif
+
+    if(npes>1) then
+       call mpi_allreduce(MPI_IN_PLACE,Ecnl_12,1,mpi_double_precision,mpi_sum,mpi_comm_group,ierr)
+    endif
+    if(printable.and.oneshot) write(nfout,'(a)') '... done'
+    call tstatc0_end(id_sname)
+
+  end subroutine vdWdf_core_org
+
+  subroutine vdWdf_core()
+    use progress_bar
+    real(kind=DP) :: fac, grho, rtmp, esum
+    integer :: cqaa,cqa,cqb,ierr
+    integer :: i,ic,i2,i3, cix, ciy ,ciz
+    complex(kind=DP), allocatable, dimension(:,:,:) :: tmpug,tmpur
+    complex(kind=DP), allocatable, dimension(:,:,:,:) :: theta_buf_s,theta_buf_r
+    integer :: ipos,i0,i1,isend,irecv,ireq,ireqr
+    integer, allocatable, dimension(:) :: ista
+    integer :: id_sname = -1, id_sname2 = -1, id_sname3 = -1
+
+    Ecnl_12 = 0.0d0
+    esum = 0.0d0
+
+    call tstatc0_begin('vdWdf_core ',id_sname,1)
+    if(printable.and.oneshot) &
+         & write(nfout,'(a)') 'performing the core operation : Ecnl = sum theta_a x phi_ab x theta_b ...'
+
+    if(oneshot)then
+       call reset_progress()
+       call set_end(int(floor(dble(nq0)/dble(npes))))
+    endif
+
+    if(npes>1)then
+       allocate(ista(MPI_STATUS_SIZE))
+       allocate(theta_buf_r(1:mp_nq,-(na/2-1):na/2,-(nb/2-1):nb/2,-(nc/2-1):nc/2));theta_buf_r=(0.d0,0.d0)
     endif
 
     if(.not.oneshot)then
-       ualpha_r = (0.d0,0.d0)
+       allocate(tmpug(-(na/2-1):na/2,-(nb/2-1):nb/2,-(nc/2-1):nc/2));tmpug=0.d0
        allocate(tmpur(na,nb,nc));tmpur=0.d0
-       do i=1,nq0
-          if(mod(i,npes)/=mype) cycle
-          tmpug(:,:,:) = ualpha_g(i,:,:,:)
-          call GtoR(na,nb,nc,tmpur,tmpug)
-          ualpha_r(i,:,:,:) = tmpur(:,:,:)
-       enddo
-       deallocate(tmpur)
+       !       ualpha_g = (0.d0,0.d0)
+    else
+       allocate(tmpug(1,1,1))
     endif
+    if(.not.oneshot)then
+       dFdrho=0.d0;dFddrho=0.d0
+    endif
+
+    irecv = mype+1
+    if(irecv.ge.npes) irecv = irecv-npes
+    isend = mype-1
+    if(isend.lt.0)    isend = isend+npes
+
+    do cqaa=1,mp_nq
+       cqa = real_index(cqaa)
+       if (npes>1)        theta_buf_r(1:np_nq,:,:,:) = theta_G_ab(1:np_nq,:,:,:)
+       if (cqaa.le.np_nq) theta_G_a(:,:,:) = theta_G_ab(cqaa,:,:,:)
+
+       if (.not.oneshot)  tmpug=(0.d0,0.d0)
+       if (oneshot)       call progress()
+
+       call tstatc0_begin('vdw_core_core ',id_sname3,1)
+
+       do i0=0,npes-1
+          ipos = i0+mype
+          if(ipos.ge.npes) ipos = ipos-npes
+          if(ipos.lt.0)    ipos = ipos+npes
+
+          if(cqaa.le.np_nq)then
+             do i1=1,nel_nq(ipos)
+                cqb = real_index(i1,ipos)
+                if(npes>1)then
+                   theta_G_b(:,:,:) = theta_buf_r(i1,:,:,:)
+                else
+                   theta_G_b(:,:,:) = theta_G_ab(i1,:,:,:)
+                endif
+
+                fac = 1.d0
+                if(eval_kernel_by_interpolation) then
+                   call phiab_by_interpl(cqa,cqb,nr12,phi_ab)
+                else
+                   Call phiab(cqa,cqb,nr12,phi_ab)
+                endif
+                Call convolution_3d_by_fft( na,nb,nc,cqa,cqb,nr12,phi_ab, &
+                     &                      theta_G_a,theta_G_b,fac,Ecnl_12_ab,tmpug)
+                Ecnl_12 = Ecnl_12 + Ecnl_12_ab
+             enddo
+          endif
+          if(npes>1.and.i0.ne.npes-1)then
+             call tstatc0_begin('vdWdf_core (comm) ',id_sname2,1)
+             allocate(theta_buf_s(1:mp_nq,-(na/2-1):na/2,-(nb/2-1):nb/2,-(nc/2-1):nc/2))
+             theta_buf_s = theta_buf_r
+             call mpi_sendrecv(theta_buf_s,mp_nq*na*nb*nc,mpi_double_complex,isend,0, &
+                  &            theta_buf_r,mp_nq*na*nb*nc,mpi_double_complex,irecv,0, &
+                  &            mpi_comm_group,ista,ierr)
+             deallocate(theta_buf_s)
+             call tstatc0_end(id_sname2)
+          endif
+       enddo
+       call tstatc0_end(id_sname3)
 
-    deallocate(tmpug)
+       if(.not.oneshot.and.cqaa.le.np_nq)then
+          call GtoR(na,nb,nc,tmpur,tmpug)
+#if 1
+          dFdrho(:,:,:) = dFdrho(:,:,:) + dble(tmpur(:,:,:))*dtheta_R(cqaa,:,:,:)
+          if ( .not. grad_rho_eq_0 ) then
+             do cix = 1,na
+                do ciy = 1,nb
+                   do ciz = 1,nc
+                      grho = grad(cix,ciy,ciz)
+                      rtmp = rho(cix,ciy,ciz)
+                      if ( grho > 1.0d-6 ) then
+                         dFddrho(cix,ciy,ciz) = dFddrho(cix,ciy,ciz) &
+                              &                + dble(tmpur(cix,ciy,ciz)) &
+                              &                 *ddtheta_R(cqaa,cix,ciy,ciz) /grho
+                      endif
+                   end do
+                end do
+             end do
+          endif
+#else
+          dFdrho(:,:,:) = dFdrho(:,:,:) + dble(tmpur(:,:,:))*dtheta_R(cqaa,:,:,:)
+          if ( .not. grad_rho_eq_0 ) then
+             dFddrho(:,:,:) = dFddrho(:,:,:) + dble(tmpur(:,:,:))*ddtheta_R(cqaa,:,:,:)
+          endif
+#endif
+
+          if ( iprixc >= 2 ) then
+             do cix = 1,na
+                do ciy = 1,nb
+                   do ciz = 1,nc
+                      esum = esum +dble(tmpur(cix,ciy,ciz)) *theta_R(cix,ciy,ciz)
+                   end do
+                end do
+             end do
+          endif
+       endif
+    enddo
+
+    if(.not.oneshot)then
+       if(npes>1) then
+          call mpi_allreduce(MPI_IN_PLACE,dFdrho, &
+               &             na*nb*nc,mpi_double_precision,mpi_sum,mpi_comm_group,ierr)
+          call mpi_allreduce(MPI_IN_PLACE,dFddrho, &
+               &             na*nb*nc,mpi_double_precision,mpi_sum,mpi_comm_group,ierr)
+          deallocate(tmpur)
+       endif
+       deallocate(tmpug)
+    endif
 
     if(npes>1) then
        call mpi_allreduce(MPI_IN_PLACE,Ecnl_12,1,mpi_double_precision,mpi_sum,mpi_comm_group,ierr)
-       if(.not.oneshot) &
-       & call mpi_allreduce(MPI_IN_PLACE,ualpha_r,nq0*na*nb*nc,mpi_double_complex,mpi_sum,mpi_comm_group,ierr)
     endif
-    deallocate(ind)
+
+    if ( iprixc >=2 ) then
+       esum = esum *univol *rinplw /2.0d0
+       call mpi_allreduce(MPI_IN_PLACE,Esum,1,mpi_double_precision,mpi_sum,mpi_comm_group,ierr)
+       write(nfout,*) "vdw nonolocal E12 "
+       write(nfout,*) "evaluation in real       space : ", esum
+       write(nfout,*) "           in reciprocal space : ", ecnl_12
+    endif
+
+    if(npes>1)then
+       deallocate(ista)
+       deallocate(theta_buf_r)
+    endif
     if(printable.and.oneshot) write(nfout,'(a)') '... done'
     call tstatc0_end(id_sname)
-!+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
+    !+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
   end subroutine vdWdf_core
 
+  subroutine vdWdf_core2()
+    use progress_bar
+    real(kind=DP) :: fac
+    integer :: cqbb,cqa,cqb,ierr
+    integer :: i,ic,i2,i3
+    complex(kind=DP), allocatable, dimension(:,:,:) :: tmpug,tmpur
+    complex(kind=DP), allocatable, dimension(:,:,:,:) :: theta_buf_s,theta_buf_r
+    real(kind=DP), allocatable, dimension(:,:,:,:) :: dtheta_buf
+    integer :: ipos,i0,i1,isend,irecv,ireq,ireqr
+    integer, allocatable, dimension(:) :: ista
+    integer :: id_sname = -1
+    integer :: id_sname2 = -1
+
+    call tstatc0_begin('vdWdf_core2 ',id_sname,1)
+    Ecnl_12 = 0.0d0
+    if(printable.and.oneshot) &
+         & write(nfout,'(a)') 'performing the core operation : Ecnl = sum theta_a x phi_ab x theta_b ...'
+    if(oneshot)then
+       call reset_progress()
+       call set_end(int(floor(dble(nq0)/dble(npes))))
+    endif
+    if(npes>1)then
+       allocate(ista(MPI_STATUS_SIZE))
+       allocate(theta_buf_r(1:mp_nq,-(na/2-1):na/2,-(nb/2-1):nb/2,-(nc/2-1):nc/2));theta_buf_r=(0.d0,0.d0)
+       if(.not.oneshot)then
+          allocate(dtheta_buf(mp_nq,na,nb,nc));dtheta_buf=0.d0
+       endif
+    endif
+    if(.not.oneshot)then
+       allocate(tmpug(-(na/2-1):na/2,-(nb/2-1):nb/2,-(nc/2-1):nc/2));tmpug=0.d0
+       allocate(tmpur(na,nb,nc));tmpur=0.d0
+       !       ualpha_g = (0.d0,0.d0)
+    else
+       allocate(tmpug(1,1,1))
+    endif
+    if(.not.oneshot)then
+       dFdrho=0.d0;dFddrho=0.d0
+    endif
+
+    irecv = mype+1
+    if(irecv.ge.npes) irecv = irecv-npes
+    isend = mype-1
+    if(isend.lt.0)    isend = isend+npes
+
+    if (npes>1) theta_buf_r(1:np_nq,:,:,:) = theta_G_ab(1:np_nq,:,:,:)
+    do i0=0,npes-1
+       ipos = i0+mype
+       if(ipos.ge.npes) ipos = ipos-npes
+       if(ipos.lt.0)    ipos = ipos+npes
+       do i1=1,nel_nq(ipos)
+          cqa = real_index(i1,ipos)
+          if(npes>1)then
+             theta_G_a(:,:,:) = theta_buf_r(i1,:,:,:)
+          else
+             theta_G_a(:,:,:) = theta_G_ab(i1,:,:,:)
+          endif
+          do cqbb=1,np_nq
+             theta_G_b(:,:,:) = theta_G_ab(cqbb,:,:,:)
+             fac = 1.d0
+             cqb = real_index(cqbb)
+             if(eval_kernel_by_interpolation) then
+                call phiab_by_interpl(cqa,cqb,nr12,phi_ab)
+             else
+                Call phiab(cqa,cqb,nr12,phi_ab)
+             endif
+             Call convolution_3d_by_fft(&
+                  &    na,nb,nc,cqa,cqb,nr12,phi_ab,theta_G_a,theta_G_b,fac,Ecnl_12_ab,tmpug)
+             Ecnl_12 = Ecnl_12 + Ecnl_12_ab
+          enddo
+          if(.not.oneshot)then
+             !             call mpi_allreduce(MPI_IN_PLACE,tmpug,na*nb*nc,mpi_double_complex,mpi_sum,mpi_comm_group,ierr)
+             call GtoR(na,nb,nc,tmpur,tmpug)
+             dFdrho(:,:,:) = dFdrho(:,:,:) + dble(tmpur(:,:,:))*dtheta_R(i1,:,:,:)
+             dFddrho(:,:,:) = dFddrho(:,:,:) + dble(tmpur(:,:,:))*ddtheta_R(i1,:,:,:)
+          endif
+       enddo
+       if(npes>1)then
+          call tstatc0_begin('vdWdf_core (comm) ',id_sname2,1)
+          allocate(theta_buf_s(1:mp_nq,-(na/2-1):na/2,-(nb/2-1):nb/2,-(nc/2-1):nc/2))
+          theta_buf_s = theta_buf_r
+          call mpi_sendrecv(theta_buf_s,mp_nq*na*nb*nc,mpi_double_complex,isend,0, &
+               &             theta_buf_r,mp_nq*na*nb*nc,mpi_double_complex,irecv,0, &
+               &             mpi_comm_group,ista,ierr)
+          if(.not.oneshot)then
+             dtheta_buf = dtheta_R
+             call mpi_sendrecv(dtheta_buf,mp_nq*na*nb*nc,mpi_double_precision,isend,0, &
+                  &             dtheta_R,  mp_nq*na*nb*nc,mpi_double_precision,irecv,0, &
+                  &             mpi_comm_group,ista,ierr)
+             dtheta_buf = ddtheta_R
+             call mpi_sendrecv(dtheta_buf,mp_nq*na*nb*nc,mpi_double_precision,isend,0, &
+                  &             ddtheta_R, mp_nq*na*nb*nc,mpi_double_precision,irecv,0, &
+                  &             mpi_comm_group,ista,ierr)
+          endif
+          deallocate(theta_buf_s)
+          call tstatc0_end(id_sname2)
+       endif
+    enddo
+
+    if(.not.oneshot)then
+       if(npes>1) then
+          call mpi_allreduce(MPI_IN_PLACE,dFdrho, &
+               &             na*nb*nc,mpi_double_precision,mpi_sum,mpi_comm_group,ierr)
+          call mpi_allreduce(MPI_IN_PLACE,dFddrho, &
+               &             na*nb*nc,mpi_double_precision,mpi_sum,mpi_comm_group,ierr)
+          deallocate(tmpur)
+       endif
+       deallocate(tmpug)
+    endif
+
+    if(npes>1) then
+       call mpi_allreduce(MPI_IN_PLACE,Ecnl_12,1,mpi_double_precision,mpi_sum,mpi_comm_group,ierr)
+    endif
+    if(npes>1)then
+       deallocate(ista)
+       deallocate(theta_buf_r)
+       if(.not.oneshot) deallocate(dtheta_buf)
+    endif
+    if(printable.and.oneshot) write(nfout,'(a)') '... done'
+    call tstatc0_end(id_sname)
+    !+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
+  end subroutine vdWdf_core2
+
   subroutine corrections()
-    !!$if(printable) write(nfout,'(a)') 'calculating correction terms and contribution from the LDA ...'
+!!$if(printable) write(nfout,'(a)') 'calculating correction terms and contribution from the LDA ...'
     if(printable.and.oneshot) write(nfout,'(a)') 'calculating correction terms ...'
-!+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
+    !+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
     Call piDphi(Ecnl_3,Ecnl_3s)
 !!$    Call cLDA(na,nb,nc,rho,rhomin,dv,EcLDA)
-!+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
+    !+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
     if(printable.and.oneshot) write(nfout,'(a)') '... done'
   end subroutine corrections
 
   subroutine theta_ab(nx,ny,nz,ca,nq0,q0min,q0max,rho,grad,rhomin,theta_R,dtheta_R,ddtheta_R)
     Implicit none
 
-!++++++++++++++++++++++++++ VARIABLES ++++++++++++++++++++++++++
+    !++++++++++++++++++++++++++ VARIABLES ++++++++++++++++++++++++++
     Integer, intent(in) :: nx,ny,nz,ca,nq0
     Real(kind=DP), intent(in)  :: q0min,q0max,rhomin
     Real(kind=DP), intent(in)  :: rho(nx,ny,nz),grad(nx,ny,nz)
     real(kind=DP), intent(out) :: theta_R(nx,ny,nz),dtheta_R(nx,ny,nz),ddtheta_R(nx,ny,nz)
 
-! Internal valuables
+    ! Internal valuables
     Integer cir,cix,ciy,ciz
     Real(kind=DP) ni,dni,q0,dqdn,dqddn
 
     real(kind=DP),allocatable,dimension(:) :: y,y2tmp
     real(kind=DP) :: yout,y1out
+    integer :: id_sname = -1
 
-!+++++++++++++++++++++ End VARIABLES +++++++++++++++++++++++++++
+    call tstatc0_begin('theta_ab ',id_sname,1)
+    !+++++++++++++++++++++ End VARIABLES +++++++++++++++++++++++++++
     allocate(y(nq0));y=0.d0;y(ca)=1.d0
     allocate(y2tmp(nq0));y2tmp(:) = q2ar(ca,:)
-! For the functions theta_R
+    ! For the functions theta_R
     Do cir = 1,nx*ny*nz
        cix = 1+(cir-1-MOD(cir-1+ny*nz,ny*nz))/(ny*nz)
        ciy = 1+(cir-ny*nz*(cix-1)-1-MOD(cir-1+nz,nz))/nz
@@ -762,23 +1182,24 @@ module m_vdWDF
     End do
     deallocate(y)
     deallocate(y2tmp)
+    call tstatc0_end(id_sname)
   end subroutine theta_ab
 
-!** End SUBROUTINE theta_ab *******************************************************************
+  !** End SUBROUTINE theta_ab *******************************************************************
 
-!** SUBROUTINE piDphi **********************************************************************************
+  !** SUBROUTINE piDphi **********************************************************************************
   Subroutine piDphi(Ecii,Ecii_s)
     implicit none
 
 
-!++++++++++++++++++++++++++ VARIABLES ++++++++++++++++++++++++++
+    !++++++++++++++++++++++++++ VARIABLES ++++++++++++++++++++++++++
     real(kind=DP), intent(out) :: Ecii,Ecii_s
 
     Real(kind=DP) da,db,a1,a2,dr
     Parameter (dr = 0.001d0)
     Integer  ci,cj,ck,ca,cb
 
-! Gauss-Legendre integration
+    ! Gauss-Legendre integration
     Integer cD,nD
     Parameter (nD=10)
     Real(kind=DP) maxD,minD,dD,LD,PLD,LDxi(nD),LDwi(nD)
@@ -792,23 +1213,23 @@ module m_vdWDF
     Real(kind=DP) x,nnx,nny,nnz,nn2,r
     Real(kind=DP) nxp,nxm,nyp,nym,nzp,nzm
     Real(kind=DP) zx(-3:3),zy(-3:3),zz(-3:3),rn(3,-3:3)
- 
+
     Real(kind=DP) phi
 
     Real(kind=DP) temp,rs,phid_s,phid_s1,d_phid_s,phi2,phi4
 
-! The table of phi1D
+    ! The table of phi1D
     Integer c1D,n1D,ierr
     Parameter(n1D = 1000)
     Real(kind=DP) d1D,max1D,D,phix,phiy
     Real(kind=DP) phi1D(0:n1D+1)
     real(kind=DP) :: q0,dqdn,dqddn
-!++++++++++++++++++++++++ end VARIABLES ++++++++++++++++++++++++
+    !++++++++++++++++++++++++ end VARIABLES ++++++++++++++++++++++++
 
 
 
-!---------------------- Calculation Start ----------------------
-! Make the table of phi1D
+    !---------------------- Calculation Start ----------------------
+    ! Make the table of phi1D
     max1D = ds
     d1D = max1D/DBLE(n1D)
     Do c1D = 0,n1D+1
@@ -877,42 +1298,40 @@ module m_vdWDF
        phi4 = (-1.d0/ds**4)*(phid_s-phi0) + (rs/(2.d0*ds**4))*d_phid_s
 
        Ecii_s = Ecii_s + 0.5d0*4.d0*pi*dv*(n**2)*            &
-&                    (phi0*(rs**3)*(q0**0)/3.d0 +            &
-&                     phi2*(rs**5)*(q0**2)/5.d0 +            &
-&                     phi4*(rs**7)*(q0**4)/7.d0)
+            &                    (phi0*(rs**3)*(q0**0)/3.d0 +            &
+            &                     phi2*(rs**5)*(q0**2)/5.d0 +            &
+            &                     phi4*(rs**7)*(q0**4)/7.d0)
 
 
     Enddo
     if(npes>1) call mpi_allreduce(MPI_IN_PLACE,Ecii_s,1,mpi_double_precision,mpi_sum,mpi_comm_group,ierr)
   End Subroutine piDphi
-!** End SUBROUTINE piDphi **********************
+  !** End SUBROUTINE piDphi **********************
 
-!** SUBROUTINE cLDA ****************************
+  !** SUBROUTINE cLDA ****************************
   Subroutine cLDA(na,nb,nc,rho,rhomin,dv,EcLDA)
     Implicit none
 
-!************************ Note *********************************
-! This Algorism follows Dion's 1-shot method.
-!
-! This program is a subroutine.
-! This program calculates the correlation energy from LDA.
-! The formula is given at Eq.(58) (p.93) of 'Theory of the
-!   Inhomogeneous Electron Gas' Lundqvist, March.
-!
-!
-! Input
-!   rho(nrxyz,nsipn) : Total density
-!
-! Output
-!   EcLDA : Correlation energy from LDA.
-!
-!
-!                            Written by Youky Ono in 2009/Jul.
-!***************************************************************
-
-
+    !************************ Note *********************************
+    ! This Algorism follows Dion's 1-shot method.
+    !
+    ! This program is a subroutine.
+    ! This program calculates the correlation energy from LDA.
+    ! The formula is given at Eq.(58) (p.93) of 'Theory of the
+    !   Inhomogeneous Electron Gas' Lundqvist, March.
+    !
+    !
+    ! Input
+    !   rho(nrxyz,nsipn) : Total density
+    !
+    ! Output
+    !   EcLDA : Correlation energy from LDA.
+    !
+    !
+    !                            Written by Youky Ono in 2009/Jul.
+    !***************************************************************
 
-!++++++++++++++++++++++++++ VARIABLES ++++++++++++++++++++++++++
+    !++++++++++++++++++++++++++ VARIABLES ++++++++++++++++++++++++++
     integer,intent(in)  :: na,nb,nc
     real(kind=DP),intent(in)  :: rho(na,nb,nc)
     real(kind=DP),intent(in)  :: rhomin
@@ -926,11 +1345,9 @@ module m_vdWDF
 
     real(kind=DP)  e,m
     parameter (e=1.d0,m=1.d0) ! Hatree atomic unit
-!++++++++++++++++++++++++ end VARIABLES ++++++++++++++++++++++++
+    !++++++++++++++++++++++++ end VARIABLES ++++++++++++++++++++++++
 
-
-
-!---------------------- Calculation Start ----------------------
+    !---------------------- Calculation Start ----------------------
     EcLDA=0
     Do cjr = 1,na*nb*nc
        if(mod(cjr,npes)/=mype) cycle
@@ -938,102 +1355,105 @@ module m_vdWDF
        cjy = 1+(cjr-nb*nc*(cjx-1)-1-MOD(cjr-1+nc,nc))/nc
        cjz = cjr-nc*(nb*(cjx-1)-1+cjy)
        n = MAX(rho(cjx,cjy,cjz),rhomin)
- 
+
        rs = ((3.d0/(4.d0*pi*n))**(1.d0/3.d0))/aB
        x = rs/11.4d0
        ec = -0.0666d0*0.5d0*((1.d0+x**3)*DLOG(1.d0+1.d0/x)-x**2+x/2.d0-1.d0/3.d0)
- 
+
        EcLDA = EcLDA + dv*n*ec
     Enddo
     if(npes>1) call mpi_allreduce(MPI_IN_PLACE,EcLDA,1,mpi_double_precision,mpi_sum,mpi_comm_group,ierr)
   End Subroutine cLDA
-!** End SUBROUTINE cLDA **********************
+  !** End SUBROUTINE cLDA **********************
 
-!** SUBROUTINE RtoG ***************************************************************************
-! Execute FFT and transform theta_R to theta_G
+  !** SUBROUTINE RtoG ***************************************************************************
+  ! Execute FFT and transform theta_R to theta_G
   Subroutine RtoG(na,nb,nc,theta_R,theta_G)
     Implicit none
 
     !!include "fftw3.f"
 
 
-!++++++++++++++++++++++++++ VARIABLES ++++++++++++++++++++++++++
+    !++++++++++++++++++++++++++ VARIABLES ++++++++++++++++++++++++++
     Integer  na,nb,nc,cix,ciy,ciz,ca,cb,cc
     Real(kind=DP) rx,ry,rz,kx,ky,kz,ra,rb,rc,ka,kb,kc,rk,r12,x,y,z,term,term1
 
     Real(kind=DP) theta_R(na,nb,nc)
     Complex(kind=DP) theta_G(-(na/2-1):na/2,-(nb/2-1):nb/2,-(nc/2-1):nc/2)
 
-! FFTW3 !!!
+    ! FFTW3 !!!
     integer(kind=DP) :: plan
     Complex(kind=DP),allocatable :: temp_R(:,:,:),temp_G(:,:,:)
-!+++++++++++++++++++++ End VARIABLES +++++++++++++++++++++++++++
+    !+++++++++++++++++++++ End VARIABLES +++++++++++++++++++++++++++
 
     allocate(temp_R(na,nb,nc))
     allocate(temp_G(0:na-1,0:nb-1,0:nc-1))
 
-!***** FFT **************************************************
+    !***** FFT **************************************************
     Do cix = 1,na
-    Do ciy = 1,nb
-    Do ciz = 1,nc
-       temp_R(cix,ciy,ciz) = DCMPLX(theta_R(cix,ciy,ciz))
-    Enddo
-    Enddo
+       Do ciy = 1,nb
+          Do ciz = 1,nc
+             temp_R(cix,ciy,ciz) = DCMPLX(theta_R(cix,ciy,ciz))
+          Enddo
+       Enddo
     Enddo
 
-! FFTW3 !!!
+    ! FFTW3 !!!
     call dfftw_plan_dft_3d(plan,na,nb,nc,temp_R,temp_G,FFTW_FORWARD,FFTW_ESTIMATE)
     call dfftw_execute(plan)
     call dfftw_destroy_plan(plan)
 
     Do ca = -(na/2-1),na/2
-    Do cb = -(nb/2-1),nb/2
-    Do cc = -(nc/2-1),nc/2
-       cix = MOD(ca+na,na)
-       ciy = MOD(cb+nb,nb)
-       ciz = MOD(cc+nc,nc)
-       theta_G(ca,cb,cc) = temp_G(cix,ciy,ciz) / DBLE(na*nb*nc)
-    Enddo
-    Enddo
+       Do cb = -(nb/2-1),nb/2
+          Do cc = -(nc/2-1),nc/2
+             cix = MOD(ca+na,na)
+             ciy = MOD(cb+nb,nb)
+             ciz = MOD(cc+nc,nc)
+             theta_G(ca,cb,cc) = temp_G(cix,ciy,ciz) / DBLE(na*nb*nc)
+          Enddo
+       Enddo
     Enddo
-!***** END of FFT ******************************************
+    !***** END of FFT ******************************************
 
-    deallocate(temp_R)
-    deallocate(temp_G)
+    deallocate(temp_R);   deallocate(temp_G)
   End subroutine RtoG
-!** End SUBROUTINE RtoG ***********************************************************************
+  !** End SUBROUTINE RtoG ***********************************************************************
 
   subroutine GtoR(na,nb,nc,theta_R,theta_G)
-!    include "fftw3.f"
+    !    include "fftw3.f"
     integer,intent(in) :: na,nb,nc
     complex(kind=DP), dimension(-(na/2-1):na/2,-(nb/2-1):nb/2,-(nc/2-1):nc/2), intent(in) :: theta_G
     complex(kind=DP), dimension(na,nb,nc), intent(out) :: theta_R
+
     integer :: ca,cb,cc,cix,ciy,ciz
     integer(kind=DP) :: plan
     complex(kind=DP),allocatable :: temp_R(:,:,:),temp_G(:,:,:)
     integer :: id_sname=-1
+
     call tstatc0_begin('GtoR ',id_sname,1)
+
     allocate(temp_R(na,nb,nc))
     allocate(temp_G(0:na-1,0:nb-1,0:nc-1))
+
     Do ca = -(na/2-1),na/2
-    Do cb = -(nb/2-1),nb/2
-    Do cc = -(nc/2-1),nc/2
-       cix = MOD(ca+na,na)
-       ciy = MOD(cb+nb,nb)
-       ciz = MOD(cc+nc,nc)
-       temp_G(cix,ciy,ciz) = theta_G(ca,cb,cc)
-    Enddo
-    Enddo
+       Do cb = -(nb/2-1),nb/2
+          Do cc = -(nc/2-1),nc/2
+             cix = MOD(ca+na,na)
+             ciy = MOD(cb+nb,nb)
+             ciz = MOD(cc+nc,nc)
+             temp_G(cix,ciy,ciz) = theta_G(ca,cb,cc)
+          Enddo
+       Enddo
     Enddo
 
-! FFTW3 !!!
+    ! FFTW3 !!!
     call dfftw_plan_dft_3d(plan,na,nb,nc,temp_G,temp_R,FFTW_BACKWARD,FFTW_ESTIMATE)
     call dfftw_execute(plan)
     call dfftw_destroy_plan(plan)
-!***** END of FFT ******************************************
+    !***** END of FFT ******************************************
 
-    theta_R = temp_R/dble(na*nb*nc)
-!    theta_R = temp_R
+    !    theta_R = temp_R/dble(na*nb*nc)
+    theta_R = temp_R
 
     deallocate(temp_R)
     deallocate(temp_G)
@@ -1041,7 +1461,7 @@ module m_vdWDF
   end subroutine GtoR
 
   subroutine get_phi_ab_g(nr12,phi_ab,phi_ab_g)
-!    include "fftw3.f"
+    !    include "fftw3.f"
     integer, intent(in) :: nr12
     real(kind=DP), dimension(0:nr12), intent(in)  :: phi_ab
     real(kind=DP), dimension(0:nr12), intent(out) :: phi_ab_g
@@ -1050,6 +1470,10 @@ module m_vdWDF
     integer :: cr,ck
     integer(kind=DP) :: plan
     real(kind=DP) :: dd,r12,term,rk
+    real(kind=DP) :: rr
+    complex(kind=DP) :: zsum
+    complex(kind=DP), parameter :: zi = ( 0.0d0, 1.0d0 )
+
     integer :: id_sname = -1
     call tstatc0_begin('get_phi_ab_g ',id_sname,1)
     allocate(cphiab_r(0:nr12));cphiab_r=0.d0
@@ -1058,20 +1482,27 @@ module m_vdWDF
     dd = r12max/dble(nr12)
 
     do cr=0,nr12
-       cphiab_r(cr) = dcmplx(phi_ab(cr)*dble(cr),0.d0)
+       rr = dble(cr) *dd
+       cphiab_r(cr) = dcmplx( phi_ab(cr)*rr, 0.d0 )
     enddo
+
+#if 0
     call dfftw_plan_dft_1d(plan,nr12+1,cphiab_r,cphiab_g,FFTW_BACKWARD,FFTW_ESTIMATE)
+#else
+    call dfftw_plan_dft_1d( plan, nr12, cphiab_r(0:nr12-1), cphiab_g(0:nr12-1), &
+         &                  FFTW_BACKWARD, FFTW_ESTIMATE )
+#endif
     call dfftw_execute(plan)
     call dfftw_destroy_plan(plan)
 
     do ck=1,nr12
        rk = dble(ck)/r12max
-       phi_ab_g(ck) = 2.d0*dd**2*dimag(cphiab_g(ck))/(dble(rk))
-    enddo 
+       phi_ab_g(ck) = 2.d0 *dd *dimag(cphiab_g(ck))/(dble(rk))
+    enddo
 
     Do ck = 0,0
        rk = dd * dble(ck)
-       term = 0
+       term = 0.0d0
        Do cr = 0,nr12
           r12 = dd*dble(cr)
           term = term + phi_ab(cr) * (r12**2)
@@ -1079,23 +1510,19 @@ module m_vdWDF
        phi_ab_g(ck) = 4.0d0*pi*dd * term
     End do
 
-    deallocate(cphiab_r)
-    deallocate(cphiab_g)
+    deallocate(cphiab_r);   deallocate(cphiab_g)
+
+    phi_ab_g = phi_ab_g / univol
 
-!    if(qa==1)then
-!       do ck=0,nr12
-!          rk = dble(ck)/r12max
-!          write(qa*35+qb,*) rk,phi_ab_g(ck)
-!       enddo
-!    endif
     call tstatc0_end(id_sname)
+
   end Subroutine get_phi_ab_g
 
   Subroutine convolution_3d_by_fft(na,nb,nc,cqa,cqb,nr12,phi_ab,theta_G_a,theta_G_b,fac,Ecnl_12_ab,tmpug)
 
     Implicit none
 
-!++++++++++++++++++++++++++ VARIABLES ++++++++++++++++++++++++++
+    !++++++++++++++++++++++++++ VARIABLES ++++++++++++++++++++++++++
     integer, intent(in) :: na,nb,nc,cqa,cqb,nr12
     complex(kind=DP), intent(in)  :: theta_G_a(-(na/2-1):na/2,-(nb/2-1):nb/2,-(nc/2-1):nc/2)
     complex(kind=DP), intent(in)  :: theta_G_b(-(na/2-1):na/2,-(nb/2-1):nb/2,-(nc/2-1):nc/2)
@@ -1113,64 +1540,79 @@ module m_vdWDF
 
     Real(kind=DP),allocatable :: core_G(:)
 
+    real*8 :: dd
+
     real(kind=DP) :: pi2rk
     integer :: id_sname = -1
-!+++++++++++++++++++++ End VARIABLES +++++++++++++++++++++++++++
+    !+++++++++++++++++++++ End VARIABLES +++++++++++++++++++++++++++
     call tstatc0_begin('convolution_3d_by_fft ',id_sname,1)
     allocate(core_G(0:nr12));core_G=0.d0
     nabc = na*nb*nc
 
-
-    Ta = na*DSQRT(aa(1,1)**2 + aa(1,2)**2 + aa(1,3)**2)
-    Tb = nb*DSQRT(aa(2,1)**2 + aa(2,2)**2 + aa(2,3)**2)
-    Tc = nc*DSQRT(aa(3,1)**2 + aa(3,2)**2 + aa(3,3)**2)
+    !      dd = r12max/dble(nr12)
 
     call get_phi_ab_g(nr12,phi_ab,core_G)
+
     dk = 1.d0/r12max
     temp_c = (0.0d0,0.0d0)
     !tmpug = 0.d0
-    Do ciz = -(nc/2-1),nc/2
-    Do ciy = -(nb/2-1),nb/2
-    Do cix = -(na/2-1),na/2
 
-       rk = DSQRT((dble(cix)/Ta)**2 + (dble(ciy)/Tb)**2 + (dble(ciz)/Tc)**2)
-       !If(rk.LT.maxk) Then
-          ck   = dint(rk/dk)
-          phix = (rk - dk*DBLE(ck))/dk
-          phiy = 1.0 - phix
-          term = phiy*core_G(ck) + phix*core_G(ck+1)
-       !Else
-       !   term = 0.0d0
-       !Endif
-
-       temp_c = temp_c +                        &
-&             DCONJG(theta_G_a(cix,ciy,ciz)) *  &
-&                    theta_G_b(cix,ciy,ciz)  *  &
-&                  term
-       if(.not.oneshot) tmpug(cix,ciy,ciz) = tmpug(cix,ciy,ciz)+theta_G_b(cix,ciy,ciz)*term*dv*nabc!*0.5d0*fac
-    Enddo
-    Enddo
-    Enddo
+    if(oneshot)then
+       Do ciz = -(nc/2-1),nc/2
+          Do ciy = -(nb/2-1),nb/2
+             Do cix = -(na/2-1),na/2
+                rk   = rkar(cix,ciy,ciz)
+                ck   = dint(rk/dk)
+
+                phix = (rk - dk*DBLE(ck))/dk
+                phiy = 1.0d0 -phix
+                term = phiy*core_G(ck) + phix*core_G(ck+1)
+
+                temp_c = temp_c + term *DCONJG(theta_G_a(cix,ciy,ciz))   &
+                     &           *             theta_G_b(cix,ciy,ciz)
+             Enddo
+          Enddo
+       Enddo
+    else
+       Do ciz = -(nc/2-1),nc/2
+          Do ciy = -(nb/2-1),nb/2
+             Do cix = -(na/2-1),na/2
+                rk   = rkar(cix,ciy,ciz)
+                ck   = dint(rk/dk)
+
+                phix = (rk - dk*DBLE(ck))/dk
+                phiy = 1.0d0 - phix
+                term = phiy*core_G(ck) + phix*core_G(ck+1)
+
+                temp_c = temp_c + term *DCONJG(theta_G_a(cix,ciy,ciz))   &
+                     &           *             theta_G_b(cix,ciy,ciz)
+
+                tmpug(cix,ciy,ciz) = tmpug(cix,ciy,ciz) &
+                     &              +theta_G_b(cix,ciy,ciz)*term *univol
+             Enddo
+          Enddo
+       Enddo
+    endif
 
-    temp_c = temp_c * dv*nabc
+    temp_c = temp_c * univol**2
     Ecnl_12_ab = 0.5d0*DBLE(temp_c)*fac
-    !tmpug(:,:,:) = tmpug(:,:,:)*dv*nabc*0.5d0*fac
 
-!***** End Calculate 'theta_G_a*core_G*theta_G_b'  ***********
+    !***** End Calculate 'theta_G_a*core_G*theta_G_b'  ***********
 
     deallocate(core_G)
 
     call tstatc0_end(id_sname)
+
   End subroutine convolution_3d_by_fft
-!** End SUBROUTINE convolution_3d *************************************************************
+  !** End SUBROUTINE convolution_3d *************************************************************
 
   Subroutine convolution_3d(na,nb,nc,cqa,cqb,nr12,phi_ab,theta_G_a,theta_G_b,Ecnl_12_ab)
 
     Implicit none
 
-!    include "fftw3.f"
+    !    include "fftw3.f"
 
-!++++++++++++++++++++++++++ VARIABLES ++++++++++++++++++++++++++
+    !++++++++++++++++++++++++++ VARIABLES ++++++++++++++++++++++++++
     integer, intent(in) :: na,nb,nc,cqa,cqb,nr12
     complex(kind=DP), intent(in)  :: theta_G_a(-(na/2-1):na/2,-(nb/2-1):nb/2,-(nc/2-1):nc/2)
     complex(kind=DP), intent(in)  :: theta_G_b(-(na/2-1):na/2,-(nb/2-1):nb/2,-(nc/2-1):nc/2)
@@ -1189,21 +1631,21 @@ module m_vdWDF
     Real(kind=DP),allocatable :: phiab_r(:)
 
     real(kind=DP) :: pi2rk
-!+++++++++++++++++++++ End VARIABLES +++++++++++++++++++++++++++
+    !+++++++++++++++++++++ End VARIABLES +++++++++++++++++++++++++++
 
     allocate(core_G(0:nk-1))
     allocate(phiab_r(0:nr12));phiab_r=0.d0
     nabc = na*nb*nc
 
-!    dv = aa(1,1)*(aa(2,2)*aa(3,3)-aa(2,3)*aa(3,2)) &
-!&      + aa(1,2)*(aa(2,3)*aa(3,1)-aa(2,1)*aa(3,3)) &
-!&      + aa(1,3)*(aa(2,1)*aa(3,2)-aa(2,2)*aa(3,1))
-
-    Ta = na*DSQRT(aa(1,1)**2 + aa(1,2)**2 + aa(1,3)**2)
-    Tb = nb*DSQRT(aa(2,1)**2 + aa(2,2)**2 + aa(2,3)**2)
-    Tc = nc*DSQRT(aa(3,1)**2 + aa(3,2)**2 + aa(3,3)**2)
+    !    dv = aa(1,1)*(aa(2,2)*aa(3,3)-aa(2,3)*aa(3,2)) &
+    !&      + aa(1,2)*(aa(2,3)*aa(3,1)-aa(2,1)*aa(3,3)) &
+    !&      + aa(1,3)*(aa(2,1)*aa(3,2)-aa(2,2)*aa(3,1))
+
+    !    Ta = na*DSQRT(aa(1,1)**2 + aa(1,2)**2 + aa(1,3)**2)
+    !    Tb = nb*DSQRT(aa(2,1)**2 + aa(2,2)**2 + aa(2,3)**2)
+    !    Tc = nc*DSQRT(aa(3,1)**2 + aa(3,2)**2 + aa(3,3)**2)
 
-!***** Make the core function and execute 3d-FFT by hand *****
+    !***** Make the core function and execute 3d-FFT by hand *****
     dr12 = r12max/dble(nr12)
     dk = maxk/dble(nk-1)
 
@@ -1232,71 +1674,72 @@ module m_vdWDF
        Enddo
        core_G(ck) = 2.d0 * dr12 * term / rk
     End do
-!***** END of Make the core function *************************
+    !***** END of Make the core function *************************
 
-!    if(cqa==1)then
-!       do ck=0,nr12
-!          rk = dk * dble(ck)
-!          write((cqa+1)*35+cqb,*) rk,core_G(ck)
-!       enddo
-!    endif
-!***** Calculate 'theta_G_a*core_G*theta_G_b'  ***************
+    !    if(cqa==1)then
+    !       do ck=0,nr12
+    !          rk = dk * dble(ck)
+    !          write((cqa+1)*35+cqb,*) rk,core_G(ck)
+    !       enddo
+    !    endif
+    !***** Calculate 'theta_G_a*core_G*theta_G_b'  ***************
     dk = maxk/dble(nk-1)
     temp_c = (0.0,0.0)
     Do cix = -(na/2-1),na/2
-    Do ciy = -(nb/2-1),nb/2
-    Do ciz = -(nc/2-1),nc/2
+       Do ciy = -(nb/2-1),nb/2
+          Do ciz = -(nc/2-1),nc/2
 
-       rk = DSQRT((dble(cix)/Ta)**2 + (dble(ciy)/Tb)**2 + (dble(ciz)/Tc)**2)
-       If(rk.LT.maxk) Then
-          ck = DINT(rk/dk)
-          phix = (rk - dk*DBLE(ck))/dk
-          phiy = 1.0 - phix
-          term = phiy*core_G(ck) + phix*core_G(ck+1)
-       Else
-          term = 0.0d0
-       Endif
-
-       temp_c = temp_c +                        &
-&             DCONJG(theta_G_a(cix,ciy,ciz)) *  &
-&                    theta_G_b(cix,ciy,ciz)  *  &
-&                  term
-    Enddo
-    Enddo
+             !rk = DSQRT((dble(cix)/Ta)**2 + (dble(ciy)/Tb)**2 + (dble(ciz)/Tc)**2)
+             rk = rkar(cix,ciy,ciz)
+             If(rk.LT.maxk) Then
+                ck = DINT(rk/dk)
+                phix = (rk - dk*DBLE(ck))/dk
+                phiy = 1.0 - phix
+                term = phiy*core_G(ck) + phix*core_G(ck+1)
+             Else
+                term = 0.0d0
+             Endif
+
+             temp_c = temp_c +                        &
+                  &             DCONJG(theta_G_a(cix,ciy,ciz)) *  &
+                  &                    theta_G_b(cix,ciy,ciz)  *  &
+                  &                  term
+          Enddo
+       Enddo
     Enddo
 
     temp_c = temp_c * dv*nabc
     Ecnl_12_ab = 0.5d0*DBLE(temp_c)
-!***** End Calculate 'theta_G_a*core_G*theta_G_b'  ***********
+    !***** End Calculate 'theta_G_a*core_G*theta_G_b'  ***********
 
     deallocate(core_G)
     deallocate(phiab_r)
 
 
   End subroutine convolution_3d
-!** End SUBROUTINE convolution_3d *************************************************************
+  !** End SUBROUTINE convolution_3d *************************************************************
 
   Subroutine d_q0(n,dn,q0min,q0max,q0,dqdn,dqddn)
     Implicit None
 
 
-!************************ Note *********************************
-! This program calculates q0.
-!
-! Input
-!   rho(nrxyz,nsipn) : Total density
-!
-! Output
-!   q0               : 
-!
-!
-!
-!                            Written by Youky Ono
-!***************************************************************
+    !************************ Note *********************************
+    ! This program calculates q0.
+    !
+    ! Input
+    !   rho(nrxyz,nsipn) : Total density
+    !
+    ! Output
+    !   q0               : 
+    !
+    !
+    !
+    !                            Written by Youky Ono
+    !***************************************************************
 
 
 
-!++++++++++++++++++++++++++ VARIABLES ++++++++++++++++++++++++++
+    !++++++++++++++++++++++++++ VARIABLES ++++++++++++++++++++++++++
     real(kind=DP), intent(in)  :: n,dn,q0min,q0max
     real(kind=DP), intent(out) :: q0,dqdn,dqddn
     Double Precision rs,x,nn2,r,term1,term2,term3,rnn2
@@ -1307,17 +1750,17 @@ module m_vdWDF
     integer :: i
     real(kind=DP) :: s2,s,qq
     real(kind=DP) :: drsdn,dxdn,dexcdn,dsdn,dkFdn
-!++++++++++++++++++++++++ end VARIABLES ++++++++++++++++++++++++
+    !++++++++++++++++++++++++ end VARIABLES ++++++++++++++++++++++++
 
 
 
-!---------------------- Calculation Start ----------------------
+    !---------------------- Calculation Start ----------------------
     nn2 = dn*dn
     rnn2 = dsqrt(nn2)
     rs = (3.d0/(4.d0*pi*n))**(1.d0/3.d0)
 
-! Eq.(58), (59) (p.93-94) of Theory of the Inhomogeneous Electron gas.
-! S.Lundqvist and N.H.March 1983 Plenum Press, NY
+    ! Eq.(58), (59) (p.93-94) of Theory of the Inhomogeneous Electron gas.
+    ! S.Lundqvist and N.H.March 1983 Plenum Press, NY
     x = rs/11.4d0
     GxcLDA = 0.5d0*((1.d0+x**3)*DLog(1.d0+1.d0/x)-x**2+x/2.d0-1.d0/3.d0)
     excLDA = -0.458d0/rs-0.0666d0*GxcLDA
@@ -1329,22 +1772,37 @@ module m_vdWDF
 
     call hxxc(qq,q0max,q0,dq)
     q0 = max(q0,q0min)
-    
+
     if(q0.eq.q0min)then
-      dqdn = 0.d0;dqddn=0.d0
+       dqdn = 0.d0;dqddn=0.d0
     else
-      drsdn  = -(3.d0/(4.d0*PAI))**(1.d0/3.d0)/3.d0*n**(-4.d0/3.d0)
-      dxdn   = drsdn/11.4d0
-      dexcdn = (0.458d0/(rs*rs))*drsdn-0.0333d0*(3*x*x*dlog(1+1.d0/x)-(1+x**3)/(x*(x+1))-2*x+0.5d0)*dxdn
-      dkFdn  = (pi/kF)**2
-      dsdn   = -(rnn2/(4*kF*n*kF*n))*(dkFdn*n+kF)
-      dqdn   = -(4.d0*pi/3.d0)*dexcdn-(Zab/9.d0)*(2*s*dsdn*kF+s2*dkFdn)
-      dqddn  = -(Zab/9.d0)*s/n
-      dqdn   = dqdn*dq
-      dqddn  = dqddn*dq
+#if 0
+       drsdn  = -(3.d0/(4.d0*PAI))**(1.d0/3.d0)/3.d0*n**(-4.d0/3.d0)
+       dxdn   = drsdn/11.4d0
+       dexcdn = (0.458d0/(rs*rs))*drsdn &
+            &   -0.0333d0*(3*x*x*dlog(1+1.d0/x)-(1+x**3)/(x*(x+1))-2*x+0.5d0)*dxdn
+       dkFdn  = (pi/kF)**2
+!!!       dsdn   = -(rnn2/(4*kF*n*kF*n))*(dkFdn*n+kF)
+       dsdn   = -(rnn2/(2*kF*n*kF*n))*(dkFdn*n+kF)
+       dqdn   = -(4.d0*pi/3.d0)*dexcdn-(Zab/9.d0)*(2*s*dsdn*kF+s2*dkFdn)
+       dqddn  = -(Zab/9.d0)*s/n
+
+#else
+       drsdn  = -rs /3.0d0 /n
+       dxdn   = drsdn/11.4d0
+       dexcdn = (0.458d0/(rs*rs))*drsdn &
+            &   -0.0333d0*(3*x*x*dlog(1+1.d0/x)-(1+x**3)/(x*(x+1))-2*x+0.5d0)*dxdn
+       dkFdn  = kF /3.0d0 /n
+       dsdn   = -s /(kF*n) *(dkFdn*n+kF)
+       dqdn   = -(4.d0*pi/3.d0)*dexcdn-(Zab/9.d0)*(2*s*dsdn*kF+s2*dkFdn)
+!       dqddn  = -(Zab/9.d0)*s2 *2.0 /dn *KF
+       dqddn  = -(Zab/9.d0)*s/n
+#endif
+       dqdn   = dqdn*dq
+       dqddn  = dqddn*dq
     endif
   End Subroutine d_q0
-!** End SUBROUTINE d_q0 ***********************************************************************
+  !** End SUBROUTINE d_q0 ***********************************************************************
 
   subroutine hxxc(x,xc,hx,dhx)
     real(kind=DP), intent(in)  :: x,xc
@@ -1362,62 +1820,59 @@ module m_vdWDF
     dhx = xc*dexp(-summ)*dsumm
   end subroutine hxxc
 
-!** SUBROUTINE derivation *********************************************************************
+  !** SUBROUTINE derivation *********************************************************************
   Subroutine derivation(na,nb,nc,aa,rho,dv,grad)
     Implicit none
 
-!++++++++++++++++++++++++++ VARIABLES ++++++++++++++++++++++++++
+    !++++++++++++++++++++++++++ VARIABLES ++++++++++++++++++++++++++
 
-! The unit cell and the electron density information
+    ! The unit cell and the electron density information
     integer,intent(in) :: na,nb,nc
     real(kind=DP), intent(in) :: rho(na,nb,nc)
     Real(kind=DP), intent(in) :: aa(3,3)
     real(kind=DP), intent(in) :: dv
     real(kind=DP), intent(out) :: grad(na,nb,nc)
 
-! Integers
+    ! Integers
     Integer i,j,k,cx,cy,cz,nabc
 
-! Internal valuables
+    ! Internal valuables
     Integer zx(-3:3),zy(-3:3),zz(-3:3)
     Real(kind=DP) rn(3,-3:3),detr,bb(3,3)
     Real(kind=DP),allocatable ::  darho(:,:,:),dbrho(:,:,:),dcrho(:,:,:)
     real(kind=DP) :: dx,dy,dz
-!+++++++++++++++++++++ End VARIABLES +++++++++++++++++++++++++++
+    !+++++++++++++++++++++ End VARIABLES +++++++++++++++++++++++++++
 
     allocate(darho(na,nb,nc));darho=0.d0
     allocate(dbrho(na,nb,nc));dbrho=0.d0
     allocate(dcrho(na,nb,nc));dcrho=0.d0
-!    dv = aa(1,1)*(aa(2,2)*aa(3,3)-aa(2,3)*aa(3,2)) &
-!&      + aa(1,2)*(aa(2,3)*aa(3,1)-aa(2,1)*aa(3,3)) &
-!&      + aa(1,3)*(aa(2,1)*aa(3,2)-aa(2,2)*aa(3,1))
+    !    dv = aa(1,1)*(aa(2,2)*aa(3,3)-aa(2,3)*aa(3,2)) &
+    !&      + aa(1,2)*(aa(2,3)*aa(3,1)-aa(2,1)*aa(3,3)) &
+    !&      + aa(1,3)*(aa(2,1)*aa(3,2)-aa(2,2)*aa(3,1))
 
     Do cx = 1,na
-    Do cy = 1,nb
-    Do cz = 1,nc
+       Do cy = 1,nb
+          Do cz = 1,nc
 
-    Do j = -3,3
-       zx(j) = MOD(2*na+(cx+j)-1,na)+1
-       zy(j) = MOD(2*nb+(cy+j)-1,nb)+1
-       zz(j) = MOD(2*nc+(cz+j)-1,nc)+1
-
-       rn(1,j) = rho(zx(j),cy,cz)
-       rn(2,j) = rho(cx,zy(j),cz)
-       rn(3,j) = rho(cx,cy,zz(j))
-    Enddo
-
-    darho(cx,cy,cz) = &
-&      (rn(1,3)-9.d0*rn(1,2)+45.d0*rn(1,1)-45.d0*rn(1,-1)+9.d0*rn(1,-2)-rn(1,-3))/(60.d0)
-    dbrho(cx,cy,cz) = &
-&      (rn(2,3)-9.d0*rn(2,2)+45.d0*rn(2,1)-45.d0*rn(2,-1)+9.d0*rn(2,-2)-rn(2,-3))/(60.d0)
-    dcrho(cx,cy,cz) = &
-&      (rn(3,3)-9.d0*rn(3,2)+45.d0*rn(3,1)-45.d0*rn(3,-1)+9.d0*rn(3,-2)-rn(3,-3))/(60.d0)
-    Enddo
-    Enddo
+             Do j = -3,3
+                zx(j) = MOD(2*na+(cx+j)-1,na)+1
+                zy(j) = MOD(2*nb+(cy+j)-1,nb)+1
+                zz(j) = MOD(2*nc+(cz+j)-1,nc)+1
+
+                rn(1,j) = rho(zx(j),cy,cz)
+                rn(2,j) = rho(cx,zy(j),cz)
+                rn(3,j) = rho(cx,cy,zz(j))
+             Enddo
+
+             darho(cx,cy,cz) = (rn(1,3)-9.d0*rn(1,2)+45.d0*rn(1,1)-45.d0*rn(1,-1)+9.d0*rn(1,-2)-rn(1,-3))/(60.d0)
+             dbrho(cx,cy,cz) = (rn(2,3)-9.d0*rn(2,2)+45.d0*rn(2,1)-45.d0*rn(2,-1)+9.d0*rn(2,-2)-rn(2,-3))/(60.d0)
+             dcrho(cx,cy,cz) = (rn(3,3)-9.d0*rn(3,2)+45.d0*rn(3,1)-45.d0*rn(3,-1)+9.d0*rn(3,-2)-rn(3,-3))/(60.d0)
+          Enddo
+       Enddo
     Enddo
 
     detr = (aa(1,1)*aa(2,2)*aa(3,3)+aa(1,2)*aa(2,3)*aa(3,1)+aa(1,3)*aa(2,1)*aa(3,2)) &
-&        - (aa(1,1)*aa(2,3)*aa(3,2)+aa(1,2)*aa(2,1)*aa(3,3)+aa(1,3)*aa(2,2)*aa(3,1))
+         &- (aa(1,1)*aa(2,3)*aa(3,2)+aa(1,2)*aa(2,1)*aa(3,3)+aa(1,3)*aa(2,2)*aa(3,1))
 
     bb(1,1) =  (aa(2,2)*aa(3,3)-aa(2,3)*aa(3,2))/detr
     bb(2,1) = -(aa(2,1)*aa(3,3)-aa(2,3)*aa(3,1))/detr
@@ -1430,15 +1885,14 @@ module m_vdWDF
     bb(3,3) =  (aa(1,1)*aa(2,2)-aa(1,2)*aa(2,1))/detr
 
     Do cx = 1,na
-    Do cy = 1,nb
-    Do cz = 1,nc
-
-    dx = (bb(1,1)*darho(cx,cy,cz) + bb(1,2)*dbrho(cx,cy,cz) + bb(1,3)*dcrho(cx,cy,cz))
-    dy = (bb(2,1)*darho(cx,cy,cz) + bb(2,2)*dbrho(cx,cy,cz) + bb(2,3)*dcrho(cx,cy,cz))
-    dz = (bb(3,1)*darho(cx,cy,cz) + bb(3,2)*dbrho(cx,cy,cz) + bb(3,3)*dcrho(cx,cy,cz))
-    grad(cx,cy,cz) = dsqrt(dx**2+dy**2+dz**2)
-    Enddo
-    Enddo
+       Do cy = 1,nb
+          Do cz = 1,nc
+             dx = (bb(1,1)*darho(cx,cy,cz) + bb(1,2)*dbrho(cx,cy,cz) + bb(1,3)*dcrho(cx,cy,cz))
+             dy = (bb(2,1)*darho(cx,cy,cz) + bb(2,2)*dbrho(cx,cy,cz) + bb(2,3)*dcrho(cx,cy,cz))
+             dz = (bb(3,1)*darho(cx,cy,cz) + bb(3,2)*dbrho(cx,cy,cz) + bb(3,3)*dcrho(cx,cy,cz))
+             grad(cx,cy,cz) = dsqrt(dx**2+dy**2+dz**2)
+          Enddo
+       Enddo
     Enddo
 
     deallocate(darho)
@@ -1446,7 +1900,7 @@ module m_vdWDF
     deallocate(dcrho)
 
   End subroutine derivation
-!** End SUBROUTINE derivation *****************************************************************
+  !** End SUBROUTINE derivation *****************************************************************
 
   subroutine spline0(nq0,x,y2)
     integer, intent(in) :: nq0
@@ -1483,8 +1937,8 @@ module m_vdWDF
     real(kind=DP), intent(in) :: a,b
     real(kind=DP) :: res
     res = 2.d0*((3.d0-a*a)*b*DCOS(b)*DSIN(a) + (3.d0-b*b)*a*DCOS(a)*DSIN(b) &
-&       + (a*a+b*b-3.d0)*DSIN(a)*DSIN(b) &
-&       - 3.d0*a*b*DCOS(a)*DCOS(b))/((a*b)**3)
+         &       + (a*a+b*b-3.d0)*DSIN(a)*DSIN(b) &
+         &       - 3.d0*a*b*DCOS(a)*DCOS(b))/((a*b)**3)
   end function Wab
 
   function Twxyz(w,x,y,z) result (res)
@@ -1494,30 +1948,30 @@ module m_vdWDF
     res = 0.5d0*(1.d0/(w+x)+1.d0/(y+z))*(1.d0/((w+y)*(x+z))+1.d0/((w+z)*(y+x)))
   end function Twxyz
 
-!** SUBROUTINE kernel_phi *********************************************************************
+  !** SUBROUTINE kernel_phi *********************************************************************
   Subroutine kernel_phi(di,dk,phi)
     implicit none
 
-!************************ Note *********************************
-! This Algorism follows Dion's 1-shot method.
-!
-! This program is a subroutine.
-! This program calculates the kernel function phi.
-!
-! Input
-!
-!
-! Output
-!   phi              : 
-!                      
-!
-!
-!                            Written by Youky Ono in 2013/Jan.
-!***************************************************************
+    !************************ Note *********************************
+    ! This Algorism follows Dion's 1-shot method.
+    !
+    ! This program is a subroutine.
+    ! This program calculates the kernel function phi.
+    !
+    ! Input
+    !
+    !
+    ! Output
+    !   phi              : 
+    !                      
+    !
+    !
+    !                            Written by Youky Ono in 2013/Jan.
+    !***************************************************************
 
 
 
-!++++++++++++++++++++++++++ VARIABLES ++++++++++++++++++++++++++
+    !++++++++++++++++++++++++++ VARIABLES ++++++++++++++++++++++++++
     real(kind=DP), intent(in)  :: di,dk
     real(kind=DP), intent(out) :: phi
     Integer ca,cb,nb
@@ -1527,75 +1981,75 @@ module m_vdWDF
     real(kind=DP) :: dr
 
 
-! Gauss-Legendre integration
+    ! Gauss-Legendre integration
     Real(kind=DP),allocatable, dimension(:) :: xi,wi
     Real(kind=DP) :: fac
-!++++++++++++++++++++++++ end VARIABLES ++++++++++++++++++++++++
+    !++++++++++++++++++++++++ end VARIABLES ++++++++++++++++++++++++
 
-!---------------------- Calculation Start ----------------------
-! Call gauleg for Gauss-Legendre integral
-   allocate(xi(na_gl));xi=0.d0
-   allocate(wi(na_gl));wi=0.d0
-   Call gauleg(a1,a2,na_gl,xi,wi)
-
-   phi = 0.d0
-   Do ca=1,na_gl
-   Do cb=1,ca
-      fac=2.d0
-      if(ca.eq.cb) fac=1.d0
-      a = xi(ca)
-      b = xi(cb)
-
-      v1 = (a**2)/(2*hofy(a,di))
-      v2 = (b**2)/(2*hofy(b,di))
-      v3 = (a**2)/(2*hofy(a,dk))
-      v4 = (b**2)/(2*hofy(b,dk))
-      phi = phi + fac*wi(ca)*wi(cb)*(a*a*b*b)*Wab(a,b)*Twxyz(v1,v2,v3,v4)
-   End Do
-   End Do
-
-   phi = phi * 2*m*(e**4)/(pi**2)
-   deallocate(xi)
-   deallocate(wi)
+    !---------------------- Calculation Start ----------------------
+    ! Call gauleg for Gauss-Legendre integral
+    allocate(xi(na_gl));xi=0.d0
+    allocate(wi(na_gl));wi=0.d0
+    Call gauleg(a1,a2,na_gl,xi,wi)
+
+    phi = 0.d0
+    Do ca=1,na_gl
+       Do cb=1,ca
+          fac=2.d0
+          if(ca.eq.cb) fac=1.d0
+          a = xi(ca)
+          b = xi(cb)
+
+          v1 = (a**2)/(2.d0*hofy(a,di))
+          v2 = (b**2)/(2.d0*hofy(b,di))
+          v3 = (a**2)/(2.d0*hofy(a,dk))
+          v4 = (b**2)/(2.d0*hofy(b,dk))
+          phi = phi + fac*wi(ca)*wi(cb)*(a*a*b*b)*Wab(a,b)*Twxyz(v1,v2,v3,v4)
+       End Do
+    End Do
+
+    phi = phi * 2.d0 *m *(e**4)/(pi**2)
+    deallocate(xi)
+    deallocate(wi)
   end Subroutine kernel_phi
-!** End SUBROUTINE kernel_phi *****************************************************************
+  !** End SUBROUTINE kernel_phi *****************************************************************
 
-!** SUBROUTINE gauleg **********************************************************************************
+  !** SUBROUTINE gauleg **********************************************************************************
   Subroutine gauleg(x1,x2,n,xi,wi)
     Implicit none
     real(kind=DP), intent(in) :: x1,x2
     integer, intent(in) :: n
     real(kind=DP), intent(out)  ::  xi(n),wi(n)
 
-!++++++++++++++++++++++++++ VARIABLES ++++++++++++++++++++++++++
+    !++++++++++++++++++++++++++ VARIABLES ++++++++++++++++++++++++++
     Integer  m,j,i
     REAL(kind=DP)  z1,z,xm,xl,pp,p3,p2,p1,eta
     Parameter (eta=0.0000000001d0)
 
-!++++++++++++++++++++++++ end VARIABLES ++++++++++++++++++++++++
+    !++++++++++++++++++++++++ end VARIABLES ++++++++++++++++++++++++
 
 
 
-!---------------------- Calculation Start ----------------------
+    !---------------------- Calculation Start ----------------------
     m=(n+1)/2
     xm=0.5d0*(x2+x1)
     xl=0.5d0*(x2-x1)
 
-! === DEBUG by tkato 2014/04/22 ================================================
+    ! === DEBUG by tkato 2014/04/22 ================================================
     z1 = 0.0d0
-! ==============================================================================
+    ! ==============================================================================
     Do i=1,m
        z=DCOS(pi*(i-0.25d0)/(n+0.5d0))
        Do While (ABS(z-z1).GT.eta)
           p1=1.0d0
           p2=0.0d0
- 
+
           Do j=1,n
              p3=p2
              P2=p1
              p1=((2.0d0*j-1.d0)*z*p2-(j-1.d0)*p3)/dble(j)
           Enddo
- 
+
           pp=n*(z*p1-p2)/(z*z-1.d0)
           z1=z
           z=z1-p1/pp
@@ -1607,50 +2061,52 @@ module m_vdWDF
        wi(n+1-i) = wi(i)
     Enddo
   End Subroutine gauleg
-!** End SUBROUTINE gauleg ******************************************************************************
+  !** End SUBROUTINE gauleg ******************************************************************************
 
-!** SUBROUTINE outputs ************************************************************************
+  !** SUBROUTINE outputs ************************************************************************
   Subroutine outputs()
     Implicit none
 
     Ecnl = Ecnl_12 + Ecnl_3 - Ecnl_3s
 
     if(printable)then
-    write(nfout,'(a)')  'Here are the results : '
-    Write(nfout,*)  ' '
-    Write(nfout,11) ExGGA
-    Write(nfout,*)  ' '
-    Write(nfout,12) EcLDA
-    Write(nfout,13) Ecnl
-    Write(nfout,14) EcLDA + Ecnl
-    Write(nfout,*)  ' '
-    Write(nfout,15) EcLDA + Ecnl + ExGGA
-    Write(nfout,*)  ' '
-11  Format('E_total(GGA exchange)      = ',F19.13)
-
-12  Format('Ec(LDA)                    = ',F19.13)
-13  Format('Ec(nl)                     = ',F19.13)
-14  Format('Ec (= Ec(LDA) + Ec(nl) )   = ',F19.13)
+       write(nfout,'(a)')  'Here are the results : '
+       Write(nfout,*)  ' '
+       Write(nfout,11) ExGGA
+       Write(nfout,*)  ' '
+       Write(nfout,12) EcLDA
+       Write(nfout,13) Ecnl
+       Write(nfout,14) EcLDA + Ecnl
+       Write(nfout,*)  ' '
+       Write(nfout,15) EcLDA + Ecnl + ExGGA
+       Write(nfout,*)  ' '
+11     Format('E_total(GGA exchange)      = ',F19.13)
+
+12     Format('Ec(LDA)                    = ',F19.13)
+13     Format('Ec(nl)                     = ',F19.13)
+14     Format('Ec (= Ec(LDA) + Ec(nl) )   = ',F19.13)
 
-15  Format('E_total(vdW-DF)            = ',F19.13)
+15     Format('E_total(vdW-DF)            = ',F19.13)
 
-    Write(nfout,*)  '                  Given in Hartree atomic units'
-    Write(nfout,*)  ' '
+       Write(nfout,*)  '                  Given in Hartree atomic units'
+       Write(nfout,*)  ' '
     endif
 
   End Subroutine outputs
-!** End SUBROUTINE outputs ********************************************************************
+  !** End SUBROUTINE outputs ********************************************************************
 
-  subroutine get_dFdrho_dFddrho(na,nb,nc,dFdrho,dFddrho)
+  subroutine get_dFdrho_dFddrho(na,nb,nc,dFdrho_,dFddrho_)
     integer, intent(in) :: na,nb,nc
-    real(kind=DP), dimension(na,nb,nc), intent(out) :: dFdrho,dFddrho
+    real(kind=DP), dimension(na,nb,nc), intent(out) :: dFdrho_,dFddrho_
     integer :: i,i1,i2,i3
-    dFdrho = 0.0d0;dFddrho = 0.0d0
-    do i=1,nq0
-       dFdrho (:,:,:) = dFdrho (:,:,:) + dble(ualpha_r(i,:,:,:))*dtheta_R (i,:,:,:)/(univol*rinplw)
-       dFddrho(:,:,:) = dFddrho(:,:,:) + dble(ualpha_r(i,:,:,:))*ddtheta_R(i,:,:,:)/(univol*rinplw)
-    enddo
- 
+    dFdrho_ = dFdrho
+    dFddrho_ = dFddrho
+    !    dFdrho = 0.0d0;dFddrho = 0.0d0 ! this can be calculated on the fly!!
+    !    do i=1,nq0
+    !       dFdrho (:,:,:) = dFdrho (:,:,:) + dble(ualpha_r(i,:,:,:))*dtheta_R (i,:,:,:)/(univol*rinplw)
+    !       dFddrho(:,:,:) = dFddrho(:,:,:) + dble(ualpha_r(i,:,:,:))*ddtheta_R(i,:,:,:)/(univol*rinplw)
+    !    enddo
+
   end subroutine get_dFdrho_dFddrho
 
 
diff -uprN phase0_2015.01/src_phase/mdmain0.F90 phase0_2015.01.01/src_phase/mdmain0.F90
--- phase0_2015.01/src_phase/mdmain0.F90	2015-08-05 14:45:21.000000000 +0900
+++ phase0_2015.01.01/src_phase/mdmain0.F90	2016-07-12 12:51:19.000000000 +0900
@@ -1,6 +1,6 @@
 !=======================================================================
 !
-!  PROGRAM  PHASE/0 2015.01 ($Rev: 443 $)
+!  PROGRAM  PHASE/0 2015.01 ($Rev: 482 $)
 !
 !  MAIN PROGRAM: PHASE
 !
@@ -32,7 +32,7 @@
 !  Consortium.
 !   The activity of development of this program set has been supervised by Takahisa Ohno.
 !
-!  $Id: mdmain0.F90 443 2015-08-03 13:03:48Z ktagami $
+!  $Id: mdmain0.F90 482 2016-04-08 08:40:45Z jkoga $
 !
 program PHASE
 #ifdef NEC_TIMER
@@ -42,7 +42,7 @@ program PHASE
   implicit none
   logical  :: ChargeDensity_is_Converged, TotalEnergy_is_Divergent
   logical  :: Already_Converged, Already_Converged2
-  logical  :: Positron_bulk, Positron_defect
+  logical  :: Positron_scf, Positron_nonscf
   logical  :: Hubbard_model
   logical  :: Forces_are_Converged, Ending_Time, Force_errors_are_tolerable,UnitCell_Converged
 !!$  logical  :: ChargeDensity_is_Fixed
@@ -101,6 +101,8 @@ program PHASE
 #endif
 
   call Ewald_and_Structure_Factor
+  if ( Positron_scf() ) call Initial_pWaveFunctions()
+
   call Initial_Electronic_Structure
 
   if(ChargeDensity_is_Fixed() .and. One_by_one_in_each_rank_k()) then ! icond=2, 3
@@ -133,6 +135,8 @@ program PHASE
 ! ============================================================ 5.0
 
                  call Renewal_of_WaveFunctions
+                 if ( Positron_scf() ) call Renewal_of_pWaveFunctions                 
+
                  call ChargeDensity_Construction(0)
                  call Potential_Construction
 
@@ -149,7 +153,8 @@ program PHASE
                  if ( PotentialMix() ) then
                  else
                     call Renewal_of_Potential
-                    if(Hubbard_model()) then
+                    if ( Positron_scf() ) call Renewal_of_pPotential
+                    if (Hubbard_model() ) then
                        call Renewal_of_Hubbard_Parameters
                        call Renewal_of_Hubbard_Potential
                     end if
@@ -167,6 +172,10 @@ program PHASE
                  call Move_Ions
                  call MDIterationNumber_Setting
                  call Ewald_and_Structure_Factor
+                 if ( Hubbard_model() ) then
+                    call Renewal_of_Hubbard_Parameters
+                    call Renewal_of_Hubbard_Potential
+                 end if
 !!$                 call MDIterationNumber_Setting
               end if
               if(BreakMD(force_conv))then
@@ -185,16 +194,21 @@ program PHASE
 #endif
      end if
 
-     if(Already_Converged2() .and. .not.Positron_defect() .and. Positron_bulk()) then
+     if ( Already_Converged2() ) then
+        if ( Positron_nonscf() ) then
 #ifdef NEC_ITER_REG
-        call FTRACE_REGION_BEGIN("POSITRON")
+           call FTRACE_REGION_BEGIN("POSITRON")
 #endif
-        call Renewal_of_pPotential()
-        call Solve_pWaveFunctions()
+           call Initial_pWaveFunctions()
+           call Renewal_of_pPotential()
+           call Solve_pWaveFunctions()
 #ifdef NEC_ITER_REG
-        call FTRACE_REGION_END("POSITRON")
+           call FTRACE_REGION_END("POSITRON")
 #endif
-     end if
+        else if ( Positron_scf() ) then
+           call Write_Positron_LifeTime
+        end if
+     endif
 
 #ifdef NEC_ITER_REG
      call FTRACE_REGION_BEGIN("FINAL")
@@ -577,4 +591,11 @@ contains
 #endif
      call m_CtrlP_set_init_status(.true.)
    end subroutine Array_Deallocate
+
+   subroutine Write_Positron_LifeTime
+     use m_Positron_Wave_Functions, only : m_pWF_wlifetime
+
+     call m_pWF_wlifetime()
+   end subroutine Write_Positron_LifeTime
+
 end program PHASE
diff -uprN phase0_2015.01/src_phase/mpi_dummy.F90 phase0_2015.01.01/src_phase/mpi_dummy.F90
--- phase0_2015.01/src_phase/mpi_dummy.F90	2015-08-05 14:45:21.000000000 +0900
+++ phase0_2015.01.01/src_phase/mpi_dummy.F90	2016-07-12 12:51:19.000000000 +0900
@@ -169,6 +169,12 @@ subroutine mpi_copy( send_buf, recv_buf,
 	end if
 end
 
+subroutine mpi_sendrecv(sendbuf,sendcount,sendtype,dest,sendtag,recvbuf,recvcount,recvtype,source,recvtag,comm,status,ierr)
+        integer :: sendcount,sendtype,dest,sendtag,recvcount,recvtype,source,recvtag,comm,status,ierr
+        logical(4) :: sendbuf(sendcount*2),recvbuf(recvcount*2)
+        call mpi_copy( sendbuf, recvbuf, sendcount, sendtype, ierr )
+end
+
 #ifndef _NO_ARG_DUMMY_
 function iargc()
 	iargc = 0
diff -uprN phase0_2015.01/src_phase/vdW.F90 phase0_2015.01.01/src_phase/vdW.F90
--- phase0_2015.01/src_phase/vdW.F90	2015-08-05 14:45:21.000000000 +0900
+++ phase0_2015.01.01/src_phase/vdW.F90	2016-07-12 12:51:19.000000000 +0900
@@ -48,7 +48,7 @@ subroutine vdW_scf(nspin,ispin,na,nb,nc,
   call build_theta()
   call vdWdf_core()
   call corrections()
-  ecnl = (Ecnl_12 + Ecnl_3 - Ecnl_3s)/(univol*rinplw)
+  ecnl = Ecnl_12 + Ecnl_3 - Ecnl_3s
   call get_dFdrho_dFddrho(na,nb,nc,dFdrho,dFddrho)
   call finalize_vdwdf()
   call tstatc0_end(id_sname)
diff -uprN phase0_2015.01/src_phase/version.h phase0_2015.01.01/src_phase/version.h
--- phase0_2015.01/src_phase/version.h	2015-09-15 12:16:21.000000000 +0900
+++ phase0_2015.01.01/src_phase/version.h	2016-07-12 12:51:19.000000000 +0900
@@ -1 +1 @@
-integer, parameter :: svn_revision = 460
+integer, parameter :: svn_revision = 511
diff -uprN phase0_2015.01/src_phase_3d/ChargeDensity_Construction.F90 phase0_2015.01.01/src_phase_3d/ChargeDensity_Construction.F90
--- phase0_2015.01/src_phase_3d/ChargeDensity_Construction.F90	2015-09-14 15:34:33.000000000 +0900
+++ phase0_2015.01.01/src_phase_3d/ChargeDensity_Construction.F90	2016-07-12 12:51:52.000000000 +0900
@@ -1,6 +1,6 @@
 !=======================================================================
 !
-!  PROGRAM  PHASE/0 2015.01 ($Rev: 455 $)
+!  PROGRAM  PHASE/0 2015.01 ($Rev: 494 $)
 !
 !  SUBROUINE: ChargeDensity_Construction, FermiEnergyLevel, 
 !             CD_Softpart_plus_Hardpart
@@ -32,7 +32,7 @@
 !   The activity of development of this program set has been supervised by Takahisa Ohno.
 !
 subroutine ChargeDensity_Construction(ic)
-! $Id: ChargeDensity_Construction.F90 455 2015-09-07 08:04:26Z yamasaki $
+! $Id: ChargeDensity_Construction.F90 494 2016-06-02 00:54:16Z jkoga $
   use m_Total_Energy,         only : m_TE_total_energy, ehartr
   use m_Charge_Density,       only : m_CD_convergence_check &
        &                           , m_CD_softpart_3D, m_CD_hardpart &
@@ -64,7 +64,7 @@ subroutine ChargeDensity_Construction(ic
   use m_Control_Parameters,   only : nspin, kimg, af
   use m_Kpoints,              only : k_symmetry
   use m_Const_Parameters,     only : GAMMA
-  use m_Parallelization,      only : np_e, ista_k, iend_k, map_k, myrank_k,ista_kngp,iend_kngp
+  use m_Parallelization,      only : np_e, ista_k, iend_k, map_k, myrank_k,ista_kngp,iend_kngp,mpi_ke_world
   use m_PseudoPotential,      only : nlmta
 
 
@@ -77,11 +77,13 @@ subroutine ChargeDensity_Construction(ic
 !!  use m_Total_Energy,          only : m_TE_total_energy_noncl
 ! ========================================================================= 11.0
 
+  use m_PlaneWaveBasisSet,       only : kgp
   use m_FFT,                     only : fft_box_size_CD
 
 
 
 
+
   implicit none
   include 'mpif.h'
 
@@ -123,7 +125,7 @@ subroutine ChargeDensity_Construction(ic
 
 ! =================== added by K. Tagami ============ 5.0
   if ( sw_eval_energy_before_charge == ON ) then
-        call m_TE_total_energy(nfout,display_on,kv3)
+     call m_TE_total_energy(nfout,display_on,kv3)
   endif
 ! =================================================== 5.0
 
@@ -153,16 +155,20 @@ subroutine ChargeDensity_Construction(ic
   if(sw_esm==ON)then
      nfftcd = fft_box_size_CD(1,0)*fft_box_size_CD(2,0)*fft_box_size_CD(3,0)
      allocate(vhar(nfftcd));vhar=(0.d0,0.d0)
-     allocate(chgc(iend_kngp-ista_kngp+1,nspin));chgc=(0.d0,0.d0)
+!     allocate(chgc(iend_kngp-ista_kngp+1,nspin));chgc=(0.d0,0.d0)
+     allocate(chgc(1:kgp,nspin));chgc=(0.d0,0.d0)
      if(kimg==1)then
         do ig=ista_kngp,iend_kngp
-           chgc(ig-ista_kngp+1,1:nspin) = dcmplx(chgq_l(ig,1,1:nspin),0.d0)
+!           chgc(ig-ista_kngp+1,1:nspin) = dcmplx(chgq_l(ig,1,1:nspin),0.d0)
+           chgc(ig,1:nspin) = dcmplx(chgq_l(ig,1,1:nspin),0.d0)
         enddo
      else
         do ig=ista_kngp,iend_kngp
-           chgc(ig-ista_kngp+1,1:nspin) = dcmplx(chgq_l(ig,1,1:nspin),chgq_l(ig,2,1:nspin))
+!           chgc(ig-ista_kngp+1,1:nspin) = dcmplx(chgq_l(ig,1,1:nspin),chgq_l(ig,2,1:nspin))
+           chgc(ig,1:nspin) = dcmplx(chgq_l(ig,1,1:nspin),chgq_l(ig,2,1:nspin))
         enddo
      endif
+     call mpi_allreduce(mpi_in_place,chgc,kgp*nspin,mpi_double_complex,mpi_sum,mpi_ke_world,ierr)
      call esm_hartree(chgc,ehartr,vhar)
      ehartr  = 0.5d0*ehartr  !Ry -> Ha
      deallocate(chgc)
@@ -172,7 +178,7 @@ subroutine ChargeDensity_Construction(ic
 
 
   if ( sw_eval_energy_before_charge == OFF ) then
-        call m_TE_total_energy(nfout,display_on,kv3)
+     call m_TE_total_energy(nfout,display_on,kv3)
   endif
 ! ======================================================== 5.0
 
@@ -207,7 +213,7 @@ contains
   end subroutine FermiEnergyLevel
 
   subroutine CD_Softpart_plus_Hardpart
-! $Id: ChargeDensity_Construction.F90 455 2015-09-07 08:04:26Z yamasaki $
+! $Id: ChargeDensity_Construction.F90 494 2016-06-02 00:54:16Z jkoga $
 !fj#ifdef __TIMER_SUB__
 !fj  call timer_sta(716)
 !fj#endif
diff -uprN phase0_2015.01/src_phase_3d/ChargeDensity_Mixing.F90 phase0_2015.01.01/src_phase_3d/ChargeDensity_Mixing.F90
--- phase0_2015.01/src_phase_3d/ChargeDensity_Mixing.F90	2015-08-05 14:58:03.000000000 +0900
+++ phase0_2015.01.01/src_phase_3d/ChargeDensity_Mixing.F90	2016-07-12 12:51:52.000000000 +0900
@@ -1,6 +1,6 @@
 !=======================================================================
 !
-!  PROGRAM  PHASE/0 2015.01 ($Rev: 440 $)
+!  PROGRAM  PHASE/0 2015.01 ($Rev: 492 $)
 !
 !  SUBROUINE: ChargeDensity_Mixing
 !
@@ -31,7 +31,7 @@
 !   The activity of development of this program set has been supervised by Takahisa Ohno.
 !
 subroutine ChargeDensity_Mixing
-! $Id: ChargeDensity_Mixing.F90 440 2015-08-03 07:18:01Z ktagami $
+! $Id: ChargeDensity_Mixing.F90 492 2016-05-31 03:06:04Z jkoga $
   use m_Const_Parameters,    only : DP,SIMPLE,BROYD1,BROYD2,DFP,PULAY,RMM2P,ON &
        &                          , FIXED_CHARGE, FIXED_CHARGE_CONTINUATION, CONTINUATION, SKIP
   use m_Charge_Density,      only : m_CD_check
@@ -49,7 +49,7 @@ subroutine ChargeDensity_Mixing
        &                          , m_CtrlP_set_rmx &
        &                          , m_CtrlP_waymix_now &
        &                          , m_CtrlP_set_mix_parameter &
-       &                          , sw_hubbard
+       &                          , sw_hubbard, sw_mix_occ_matrix
   use m_Files,               only : nfout
   use m_IterationNumbers,    only : iteration_electronic, iteration_ionic &
        &                          , m_Iter_cmix_reset
@@ -187,10 +187,10 @@ contains
     case (SIMPLE)
        call m_CD_simple_mixing( nfout,rmxt_tot )
        call m_CD_simple_mixing_hard( nfout, rmxt_hard )
-       if ( sw_hubbard == ON ) then
+       if ( sw_hubbard == ON .and.sw_mix_occ_matrix==OFF ) then
           call Renewal_of_OccMat( .false., ON )           ! hsr --> om 
-          call m_OP_cp_om_to_ommix( nfout, rmxt_hard )       ! om --> ommix
        endif
+       if(sw_hubbard==ON) call m_OP_cp_om_to_ommix( nfout, rmxt_hard )       ! om --> ommix
 
     case (BROYD1)
 !       call m_CD_mix_broyden1_with_hsr(rmxt_tot)
@@ -198,22 +198,21 @@ contains
        stop
 
     case (BROYD2)
-       call m_CD_mix_broyden2_with_hsr(nfout,rmxt_tot)
-       if ( sw_hubbard == ON ) then
+       call m_CD_mix_broyden2_with_hsr(nfout,rmxt_tot,sw_mix_occ_matrix==ON)
+       if ( sw_hubbard == ON .and. sw_mix_occ_matrix==OFF ) then
           call Renewal_of_OccMat(.false., ON )           ! hsr --> om 
-          call m_OP_cp_om_to_ommix( nfout, rmxt_hard )       ! om --> ommix
        endif
-
+       if(sw_hubbard==ON) call m_OP_cp_om_to_ommix( nfout, rmxt_hard )      ! om --> ommix
     case (DFP)
 !       call m_CD_mix_DFP(rmxt_tot)
        write(*,*) 'Not supported '
        stop
     case (PULAY)
-       call m_CD_mix_pulay_with_hsr(nfout,rmxt_tot)
-       if ( sw_hubbard == ON ) then
+       call m_CD_mix_pulay_with_hsr(nfout,rmxt_tot,sw_mix_occ_matrix==ON)
+       if ( sw_hubbard == ON .and. sw_mix_occ_matrix==OFF ) then
           call Renewal_of_OccMat(.false., ON )           ! hsr --> om 
-          call m_OP_cp_om_to_ommix( nfout, rmxt_hard )       ! om --> ommix
        endif
+       if(sw_hubbard==ON) call m_OP_cp_om_to_ommix( nfout, rmxt_hard )      ! om --> ommix
     case default
        stop ' ! waymix is invalid'
     end select mixing_way
diff -uprN phase0_2015.01/src_phase_3d/Convergence_Check.F90 phase0_2015.01.01/src_phase_3d/Convergence_Check.F90
--- phase0_2015.01/src_phase_3d/Convergence_Check.F90	2015-08-05 14:58:03.000000000 +0900
+++ phase0_2015.01.01/src_phase_3d/Convergence_Check.F90	2016-07-12 12:51:52.000000000 +0900
@@ -1,6 +1,6 @@
 !=======================================================================
 !
-!  PROGRAM  PHASE/0 2015.01 ($Rev: 440 $)
+!  PROGRAM  PHASE/0 2015.01 ($Rev: 472 $)
 !
 !  FUNCTION:  Ending_Time(), ckiter(), ChargeDensity_is_Converged(), 
 !             TotalEnergy_is_Divergent(), Forces_are_Converged(), 
@@ -37,7 +37,7 @@
 !   The activity of development of this program set has been supervised by Takahisa Ohno.
 !
 logical function Ending_Time()
-! $Id: Convergence_Check.F90 440 2015-08-03 07:18:01Z ktagami $
+! $Id: Convergence_Check.F90 472 2015-11-28 09:01:17Z ktagami $
   use m_Const_Parameters,   only : INITIAL, CONTINUATION, FIXED_CHARGE, FIXED_CHARGE_CONTINUATION
   use m_Files,              only : nfstop,nfout,m_Files_open_nfstop,m_Files_close_nfstop
   use m_IterationNumbers,   only : iteration, iteration_ionic, iteration_electronic
@@ -45,6 +45,7 @@ logical function Ending_Time()
        &                         , max_TS_iteration_is_given, max_mdstep_is_given, printable &
        &                         , m_CtrlP_ckcput, m_CtrlP_rd_istop
   use m_Parallelization,    only : mype,mpi_comm_group
+
   implicit none
   include 'mpif.h'                                      ! MPI
 
@@ -209,6 +210,12 @@ logical function ChargeDensity_is_Conver
        &                             truncate_vxw_updating, sw_update_vxw, oneshot
 ! ======================================================================== 12.5Exp
 
+! === Postitron SCF === 2015/11/28
+  use m_Control_Parameters,  only : sw_positron, positron_method
+  use m_Const_Parameters,   only : positron_GGGC
+  use m_Positron_Wave_Functions,  only : m_pWF_update_lifetime
+! ===================== 2015/11/28
+
   implicit none
   logical, save :: renew_wf_again = .false.
   logical :: EigenValues_are_Converged
@@ -320,6 +327,10 @@ logical function ChargeDensity_is_Conver
 ! <--
      end if
 
+     if ( sw_positron /= OFF ) then
+        if ( positron_method == positron_GGGC ) call m_pWF_update_lifetime
+     endif
+
 ! =========================== KT_Test ================= 12.5Exp
      if ( sw_hybrid_functional == ON ) then
         if ( truncate_vxw_updating .and. sw_update_vxw == OFF ) then
@@ -1095,6 +1106,38 @@ logical function Positron_Defect()
   end if
 end function Positron_Defect
 
+! ==== Positron SCF === 2015/11/28
+logical function Positron_scf()
+  use m_Const_Parameters, only :   OFF, Positron_CONV
+  use m_Control_Parameters, only : sw_positron, positron_method
+
+  if ( sw_positron /= OFF ) then
+     if ( positron_method == Positron_CONV ) then
+        Positron_scf = .false.
+     else
+        Positron_scf = .true.
+     end if
+  else
+     Positron_scf = .false.
+  endif
+end function Positron_scf
+
+logical function Positron_nonscf()
+  use m_Const_Parameters, only :   OFF, Positron_CONV
+  use m_Control_Parameters, only : sw_positron, positron_method
+
+  if ( sw_positron /= OFF ) then
+     if ( positron_method == Positron_CONV ) then
+        Positron_nonscf = .true.
+     else
+        Positron_nonscf = .false.
+     end if
+  else
+     Positron_nonscf = .false.
+  endif
+end function Positron_nonscf
+! ========== 2015/11/28
+
 logical function Structure_is_fixed()
   ! Coded by T. Yamasaki, 25 Jul. 2008
   use m_Const_Parameters, only :   ON, VERLET
diff -uprN phase0_2015.01/src_phase_3d/EsmPack/Esm.F90 phase0_2015.01.01/src_phase_3d/EsmPack/Esm.F90
--- phase0_2015.01/src_phase_3d/EsmPack/Esm.F90	2015-08-05 14:58:03.000000000 +0900
+++ phase0_2015.01.01/src_phase_3d/EsmPack/Esm.F90	2016-07-12 12:51:52.000000000 +0900
@@ -227,9 +227,9 @@ END SUBROUTINE esm_ggen_2d
 SUBROUTINE esm_hartree (rhog, ehart, aux )
   Use ESM_VARS
   IMPLICIT NONE
-#ifdef __MPI__
-  include 'mpif.h'
-#endif
+!#ifdef __MPI__
+!  include 'mpif.h'
+!#endif
   !
   COMPLEX(8) :: rhog(ngm,nspin)   !  n(G)
   REAL(8),    Intent(Out) :: ehart             !  Hartree energy
@@ -339,22 +339,22 @@ SUBROUTINE esm_hartree (rhog, ehart, aux
         
      vg3(1:nr3x,ng_2d)=vg2(1:nr3x)*2.d0
   enddo
-#ifdef __MPI__
-  n1h=nr1x/2;n2h=nr2x/2
-  allocate(vg3_mpi(1:nr3x,-n2h:n2h,-n1h:n1h));vg3_mpi(:,:,:)=(0.d0,0.d0)
-  do ng_2d=1,ngm_2d
-     k1 = mill_2d(1,ng_2d)
-     k2 = mill_2d(2,ng_2d)
-     vg3_mpi(:,k2,k1) = vg3(:,ng_2d)
-  enddo
-  call mpi_allreduce(MPI_IN_PLACE,vg3_mpi,nr3x*nr2x*nr1x,mpi_double_complex,mpi_sum,communicator,ierr)
-  do ng_2d=1,ngm_2d
-     k1 = mill_2d(1,ng_2d)
-     k2 = mill_2d(2,ng_2d)
-     vg3(:,ng_2d) = vg3_mpi(:,k2,k1)
-  enddo
-  deallocate(vg3_mpi)
-#endif
+!#ifdef __MPI__
+!  n1h=nr1x/2;n2h=nr2x/2
+!  allocate(vg3_mpi(1:nr3x,-n2h:n2h,-n1h:n1h));vg3_mpi(:,:,:)=(0.d0,0.d0)
+!  do ng_2d=1,ngm_2d
+!     k1 = mill_2d(1,ng_2d)
+!     k2 = mill_2d(2,ng_2d)
+!     vg3_mpi(:,k2,k1) = vg3(:,ng_2d)
+!  enddo
+!  call mpi_allreduce(MPI_IN_PLACE,vg3_mpi,nr3x*nr2x*nr1x,mpi_double_complex,mpi_sum,communicator,ierr)
+!  do ng_2d=1,ngm_2d
+!     k1 = mill_2d(1,ng_2d)
+!     k2 = mill_2d(2,ng_2d)
+!     vg3(:,ng_2d) = vg3_mpi(:,k2,k1)
+!  enddo
+!  deallocate(vg3_mpi)
+!#endif
   deallocate(vg2,vg2_in)
 !$omp end parallel  
 
@@ -507,9 +507,9 @@ SUBROUTINE esm_hartree (rhog, ehart, aux
      
      vg3(1:nr3x,ng_2d)=vg2(1:nr3x)*2.d0
      
-#ifdef __MPI__
-  call mpi_allreduce(MPI_IN_PLACE,vg3(1,ng_2d),nr3x,mpi_double_complex,mpi_sum,communicator,ierr)
-#endif
+!#ifdef __MPI__
+!  call mpi_allreduce(MPI_IN_PLACE,vg3(1,ng_2d),nr3x,mpi_double_complex,mpi_sum,communicator,ierr)
+!#endif
      deallocate(vg2,vg2_in)
   endif ! if( ng_2d > 0 )
   
@@ -537,9 +537,9 @@ SUBROUTINE esm_hartree (rhog, ehart, aux
 #ifdef __PARA
   call mp_sum( ehart, intra_pool_comm )
 #endif
-#ifdef __MPI__
-  call mpi_allreduce(MPI_IN_PLACE,ehart,1,mpi_double_precision,mpi_sum,communicator,ierr)
-#endif
+!#ifdef __MPI__
+!  call mpi_allreduce(MPI_IN_PLACE,ehart,1,mpi_double_precision,mpi_sum,communicator,ierr)
+!#endif
 
 ! Map to FFT mesh (nrxx)
   aux=0.0d0
@@ -594,7 +594,6 @@ SUBROUTINE esm_ewald ( charge, alpha, ew
   !
   !    here the local variables
   !
-  real(8), external      :: qe_erfc, qe_erf
   real(8)                :: gp2, t(2), gp, sa, z1, z0, L
   integer                 :: k1, k2, k3, ipol, it1, it2, ng_2d
   real(8) :: tt, z, zp, kk1, kk2, g, cc1, cc2, arg1, arg2, t1, t2, ff, argmax, ew
@@ -640,7 +639,7 @@ SUBROUTINE esm_ewald ( charge, alpha, ew
      
      tt=upf_zp(it1)*upf_zp(it2)*2.0*pi/sa
      
-     kk1=0.5d0*(-(z-zp)*qe_erf(g*(z-zp))-exp(-g**2*(z-zp)**2)/g/sqrt(pi))
+     kk1=0.5d0*(-(z-zp)*erf(g*(z-zp))-exp(-g**2*(z-zp)**2)/g/sqrt(pi))
      
      if (esm_bc.eq.'bc1') then
         kk2=0.d0
@@ -668,8 +667,8 @@ SUBROUTINE esm_ewald ( charge, alpha, ew
            arg2= gp*(z-zp)
            arg1=min(arg1,argmax)
            arg2=min(arg2,argmax)
-           t1=exp(arg1)*qe_erfc(gp/2.d0/g-g*(z-zp))
-           t2=exp(arg2)*qe_erfc(gp/2.d0/g+g*(z-zp))
+           t1=exp(arg1)*erfc(gp/2.d0/g-g*(z-zp))
+           t2=exp(arg2)*erfc(gp/2.d0/g+g*(z-zp))
            cc1=cc1+(t1+t2)/4.d0/gp
            
            if (esm_bc.eq.'bc1') then
@@ -708,8 +707,8 @@ SUBROUTINE esm_ewald ( charge, alpha, ew
            arg2= gp*(z-zp)
            arg1=min(arg1,argmax)
            arg2=min(arg2,argmax)
-           t1=exp(arg1)*qe_erfc(gp/2.d0/g-g*(z-zp))
-           t2=exp(arg2)*qe_erfc(gp/2.d0/g+g*(z-zp))
+           t1=exp(arg1)*erfc(gp/2.d0/g-g*(z-zp))
+           t2=exp(arg2)*erfc(gp/2.d0/g+g*(z-zp))
            cc1=cc1+cos(ff)*(t1+t2)/4.d0/gp
            
            if (esm_bc.eq.'bc1') then
@@ -791,8 +790,6 @@ subroutine esm_local_(nrx,aux,natm,ngaus
   !    here the local variables
   !
   complex(8),allocatable :: vloc3(:,:),vg2(:),vg2_in(:)
-!  real(8), external      :: qe_erf, qe_erfc
-  real(8)      :: qe_erf, qe_erfc
   real(8)                :: t(3),tt,gp,gp2,sa,z1,z0,pp,cc,ss,t1,t2, &
                             z,zp,arg11,arg12,arg21,arg22,v0,tmp,L,argmax, &
                             z_l,z_r
@@ -849,8 +846,8 @@ subroutine esm_local_(nrx,aux,natm,ngaus
               arg21= gp*(z-zp)
               arg21=min(arg21,argmax)
               arg22= gp/2.d0/tmp+tmp*(z-zp)
-              t1=exp(arg11)*qe_erfc(arg12)
-              t2=exp(arg21)*qe_erfc(arg22)
+              t1=exp(arg11)*erfc(arg12)
+              t2=exp(arg21)*erfc(arg22)
               cc1=cc1+bgauss(it,ig)*cs*(t1+t2)/4.d0/gp
            enddo
            if (esm_bc.eq.'bc1') then
@@ -909,7 +906,7 @@ subroutine esm_local_(nrx,aux,natm,ngaus
            cc1=(0.d0,0.d0) 
            do ig=1,ngauss
               tmp=sqrt(agauss(it,ig))
-              cc1=cc1+bgauss(it,ig)*0.5d0*(-(z-zp)*qe_erf(tmp*(z-zp)) &
+              cc1=cc1+bgauss(it,ig)*0.5d0*(-(z-zp)*erf(tmp*(z-zp)) &
                    -exp(-tmp**2*(z-zp)**2)/tmp/sqrt(pi))
            enddo
            if (esm_bc.eq.'bc1') then
@@ -924,12 +921,12 @@ subroutine esm_local_(nrx,aux,natm,ngaus
      ! smoothing cell edge potential (avoiding unphysical oscillation)
         do ig=1,ngauss
            tmp=sqrt(agauss(it,ig))
-           f1=f1+tt*bgauss(it,ig)*0.5d0*(-(z_r-zp)*qe_erf(tmp*(z_r-zp)) &
+           f1=f1+tt*bgauss(it,ig)*0.5d0*(-(z_r-zp)*erf(tmp*(z_r-zp)) &
                 -exp(-tmp**2*(z_r-zp)**2)/tmp/sqrt(pi))
-           f2=f2+tt*bgauss(it,ig)*0.5d0*(-(z_l-zp)*qe_erf(tmp*(z_l-zp)) &
+           f2=f2+tt*bgauss(it,ig)*0.5d0*(-(z_l-zp)*erf(tmp*(z_l-zp)) &
                 -exp(-tmp**2*(z_l-zp)**2)/tmp/sqrt(pi))
-           f3=f3-tt*bgauss(it,ig)*0.5d0*qe_erf(tmp*(z_r-zp))
-           f4=f4-tt*bgauss(it,ig)*0.5d0*qe_erf(tmp*(z_l-zp))
+           f3=f3-tt*bgauss(it,ig)*0.5d0*erf(tmp*(z_r-zp))
+           f4=f4-tt*bgauss(it,ig)*0.5d0*erf(tmp*(z_l-zp))
         enddo
         if(esm_bc.eq.'bc1')then
            f1=f1+tt*0.d0
@@ -1004,7 +1001,6 @@ subroutine esm_force_ew ( alpha, forceio
   !
   !    here the local variables
   !
-  real(8), external      :: qe_erfc, qe_erf
   integer  :: it1, it2, ipol, k1, k2, k3, ng_2d
   integer  :: nth, ith, omp_get_num_threads, omp_get_thread_num
   real(8) :: t1_for, t2_for, z, zp, kk1_for, kk2_for, g, gp2, gp, z1, t(2), L
@@ -1052,7 +1048,7 @@ subroutine esm_force_ew ( alpha, forceio
      endif
      t2_for=upf_zp(it1)*upf_zp(it2)*fpi/sa
 
-     kk1_for=0.5d0*qe_erf(g*(z-zp))
+     kk1_for=0.5d0*erf(g*(z-zp))
      if (esm_bc.eq.'bc1') then
         kk2_for=0.d0
      else if (esm_bc.eq.'bc2') then
@@ -1078,8 +1074,8 @@ subroutine esm_force_ew ( alpha, forceio
            arg2= gp*(z-zp)
            arg1=min(arg1,argmax)
            arg2=min(arg2,argmax)
-           t1=exp(arg1)*qe_erfc(gp/2.d0/g-g*(z-zp))
-           t2=exp(arg2)*qe_erfc(gp/2.d0/g+g*(z-zp))
+           t1=exp(arg1)*erfc(gp/2.d0/g-g*(z-zp))
+           t2=exp(arg2)*erfc(gp/2.d0/g+g*(z-zp))
            cz1_for=0.d0
            if (esm_bc.eq.'bc1') then      
               cz2_for=0.d0
@@ -1119,8 +1115,8 @@ subroutine esm_force_ew ( alpha, forceio
            arg2= gp*(z-zp)
            arg1=min(arg1,argmax)
            arg2=min(arg2,argmax)
-           t1=exp(arg1)*qe_erfc(gp/2.d0/g-g*(z-zp))
-           t2=exp(arg2)*qe_erfc(gp/2.d0/g+g*(z-zp))
+           t1=exp(arg1)*erfc(gp/2.d0/g-g*(z-zp))
+           t2=exp(arg2)*erfc(gp/2.d0/g+g*(z-zp))
            
            cx1_for=cx1_for+sin(ff)*(t1+t2)/4.d0/gp*k1
            cy1_for=cy1_for+sin(ff)*(t1+t2)/4.d0/gp*k2
@@ -1175,8 +1171,8 @@ subroutine esm_force_ew ( alpha, forceio
            arg2= gp*(z-zp)
            arg1=min(arg1,argmax)
            arg2=min(arg2,argmax)
-           t1=exp(arg1)*qe_erfc(gp/2.d0/g-g*(z-zp))
-           t2=exp(arg2)*qe_erfc(gp/2.d0/g+g*(z-zp))
+           t1=exp(arg1)*erfc(gp/2.d0/g-g*(z-zp))
+           t2=exp(arg2)*erfc(gp/2.d0/g+g*(z-zp))
            
            cx1_for=cx1_for+sin(ff)*(t1+t2)/4.d0/gp*k1
            cy1_for=cy1_for+sin(ff)*(t1+t2)/4.d0/gp*k2
@@ -1276,7 +1272,6 @@ subroutine esm_force_lc_ (nrx, aux, natm
   !
   complex(8),allocatable :: vlocx(:), vlocy(:), vlocdz(:)
   real(8),allocatable    :: for(:,:),for_g(:,:)
-  real(8), external      :: qe_erf, qe_erfc
   real(8)                :: t(3),tt,gp,gp2,sa,z1,z0,pp,cc,ss,t1,t2,z,zp,L,forcelc2(3,nat)
   real(8)                :: arg11,arg12,arg21,arg22,tmp,r1,r2,fx1,fy1,fz1,fx2,fy2,fz2,argmax
   integer                 :: iz,ig,it,ipol,k1,k2,k3,ng,n1,n2,n3,ng_2d
@@ -1357,8 +1352,8 @@ subroutine esm_force_lc_ (nrx, aux, natm
               arg21= gp*(z-zp)
               arg21=min(arg21,argmax)
               arg22= gp/2.d0/tmp+tmp*(z-zp)
-              t1=exp(arg11)*qe_erfc(arg12)
-              t2=exp(arg21)*qe_erfc(arg22)
+              t1=exp(arg11)*erfc(arg12)
+              t2=exp(arg21)*erfc(arg22)
               cx1=cx1+bgauss(it,ig)*CMPLX(ss, -cc, kind=8) &
                    *(t1+t2)/4.d0/gp*k1
               cy1=cy1+bgauss(it,ig)*CMPLX(ss, -cc, kind=8) &
@@ -1440,7 +1435,7 @@ subroutine esm_force_lc_ (nrx, aux, natm
            cc1=(0.d0,0.d0)
            do ig=1,ngauss
               tmp=sqrt(agauss(it,ig))
-              cc1=cc1+bgauss(it,ig)*(0.5d0*qe_erf(tmp*(z-zp)))
+              cc1=cc1+bgauss(it,ig)*(0.5d0*erf(tmp*(z-zp)))
            enddo
            if (esm_bc.eq.'bc1') then
               cc2=(0.d0,0.d0)
diff -uprN phase0_2015.01/src_phase_3d/EsmPack/Makefile phase0_2015.01.01/src_phase_3d/EsmPack/Makefile
--- phase0_2015.01/src_phase_3d/EsmPack/Makefile	2015-08-05 14:58:03.000000000 +0900
+++ phase0_2015.01.01/src_phase_3d/EsmPack/Makefile	2016-07-12 12:51:52.000000000 +0900
@@ -2,8 +2,8 @@
 
 FFLAG   = $(INCLUDE) $(OMPFLAG) $(MPIFLAG)
 
-OBJ = qe_erf.o fft.o vector.o Ewald.o  Esm.o EsmInterface.o
-OBJ_P = qe_erf.o fft.o vector.o Ewald.o  Esm.o EsmPack.o
+OBJ =  fft.o vector.o Ewald.o  Esm.o EsmInterface.o
+OBJ_P =  fft.o vector.o Ewald.o  Esm.o EsmPack.o
 LIBFLAG = -L/usr/local/lib -lfftw3 -lblas -llapack
 
 .f90.o:
diff -uprN phase0_2015.01/src_phase_3d/EsmPack/qe_erf.f90 phase0_2015.01.01/src_phase_3d/EsmPack/qe_erf.f90
--- phase0_2015.01/src_phase_3d/EsmPack/qe_erf.f90	2015-08-05 14:58:03.000000000 +0900
+++ phase0_2015.01.01/src_phase_3d/EsmPack/qe_erf.f90	1970-01-01 09:00:00.000000000 +0900
@@ -1,35 +0,0 @@
-! Copyright (c) 2012, Minoru Otani <minoru.otani@aist.go.jp> 
-! 
-! Permission is hereby granted, free of charge, to any person 
-! obtaining a copy of this software and associated documentation 
-! files (the "Software"), to deal in the Software without restriction, 
-! including without limitation the rights to use, copy, modify, merge, 
-! publish, distribute, sublicense, and/or sell copies of the Software, 
-! and to permit persons to whom the Software is furnished to do so, 
-! subject to the following conditions:
- 
-! The above copyright notice and this permission notice shall be 
-! included in all copies or substantial portions of the Software.
- 
-! THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 
-! EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES 
-! OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 
-! NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT 
-! HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, 
-! WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 
-! OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER 
-! DEALINGS IN THE SOFTWARE.
-
-Function qe_erf(x)
-  Implicit none
-  Real(8), Intent(In)  :: x
-  Real(8) :: qe_erf
-  qe_erf = erf(x)
-End Function qe_erf
-
-Function qe_erfc(x)
-  Implicit none
-  Real(8), Intent(In)  :: x
-  Real(8) :: qe_erfc
-  qe_erfc = erfc(x)
-End Function qe_erfc
diff -uprN phase0_2015.01/src_phase_3d/Finalization_of_mpi.F90 phase0_2015.01.01/src_phase_3d/Finalization_of_mpi.F90
--- phase0_2015.01/src_phase_3d/Finalization_of_mpi.F90	2015-09-14 15:35:37.000000000 +0900
+++ phase0_2015.01.01/src_phase_3d/Finalization_of_mpi.F90	2016-07-12 12:51:52.000000000 +0900
@@ -1,6 +1,6 @@
 !=======================================================================
 !
-!  PROGRAM  PHASE/0 2015.01 ($Rev: 449 $)
+!  PROGRAM  PHASE/0 2015.01 ($Rev: 472 $)
 !
 !  SUBROUINE: Finalization_of_mpi
 !
@@ -14,7 +14,7 @@
 !
 !=======================================================================
 subroutine Finalization_of_mpi
-! $Id: Finalization_of_mpi.F90 449 2015-08-06 04:37:59Z jkoga $
+! $Id: Finalization_of_mpi.F90 472 2015-11-28 09:01:17Z ktagami $
   use m_Parallelization, only :       m_Parallel_end_mpi
   use m_PlaneWaveBasisSet, only :     m_pwBS_dealloc_ngpt_igfp_gr &
        &                            , m_pwBS_dealloc_ylm_l
@@ -30,7 +30,7 @@ subroutine Finalization_of_mpi
   use m_XC_Potential,         only : m_XC_dealloc_vxc_3D
 #endif
 #ifdef _POSITRON_
-  use m_epc_potential,        only : m_epc_dealloc
+  use m_epc_potential,        only : m_epc_dealloc, m_epc_dealloc_vlhxc_p
   use m_Positron_Wave_Functions,only:m_pWF_deallocate_pzaj_etc
 #endif
 
@@ -62,6 +62,7 @@ subroutine Finalization_of_mpi
 #endif
 #ifdef _POSITRON_
   call m_epc_dealloc()
+  call m_epc_dealloc_vlhxc_p
   call m_pWF_deallocate_pzaj_etc()
 #endif
   call m_ES_dealloc_Dhub()
diff -uprN phase0_2015.01/src_phase_3d/Initial_Electronic_Structure.F90 phase0_2015.01.01/src_phase_3d/Initial_Electronic_Structure.F90
--- phase0_2015.01/src_phase_3d/Initial_Electronic_Structure.F90	2015-08-05 14:58:03.000000000 +0900
+++ phase0_2015.01.01/src_phase_3d/Initial_Electronic_Structure.F90	2016-07-12 12:51:52.000000000 +0900
@@ -1,6 +1,6 @@
 !=======================================================================
 !
-!  PROGRAM  PHASE/0 2015.01 ($Rev: 443 $)
+!  PROGRAM  PHASE/0 2015.01 ($Rev: 481 $)
 !
 !  SUBROUINE: Initial_Electronic_Structure, Initial_WaveFunctions_ek
 !
@@ -31,7 +31,7 @@
 !   The activity of development of this program set has been supervised by Takahisa Ohno.
 !
 subroutine Initial_Electronic_Structure
-! $Id: Initial_Electronic_Structure.F90 443 2015-08-03 13:03:48Z ktagami $
+! $Id: Initial_Electronic_Structure.F90 481 2016-03-25 02:51:57Z jkoga $
   use m_Const_Parameters,   only : Gauss_distrib_func, from_wave_functions&
        &                         , INITIAL, CONTINUATION, FIXED_CHARGE &
        &                         , FIXED_CHARGE_CONTINUATION, ON, OFF, EXECUT &
@@ -53,7 +53,9 @@ subroutine Initial_Electronic_Structure
        &                         , nfcntn_bin_paw &
        &                         , file_existance_contfiles &
        &                         , m_Files_check_nfzaj_existance, m_Files_check_nfchgt_existance &
-       &                         , m_Files_check_file_existance
+       &                         , m_Files_check_file_existance &
+       &                         , m_Files_open_nfcntn_bin_paw,m_Files_close_nfcntn_bin_paw &
+       &                         , m_Files_nfcntn_bin_paw_exists
   use m_Control_Parameters, only : ipri, iprichargedensity, initial_chg, icond, nspin, intzaj &
        &                         , evaluation_eko_diff &
        &                         , skip_alloc_phonon, sw_phonon, sw_calc_force, neg, neg_previous &
@@ -118,7 +120,7 @@ subroutine Initial_Electronic_Structure
   use m_PseudoPotential,      only : qitg_l ,  nqitg     &
  &                                 , psc_l               &
  &                                 , rhvg_l              &
- &                                 , modnrm, m_PP_gfqwei_3D, flg_paw, epc
+ &                                 , modnrm, m_PP_gfqwei_3D, flg_paw, epc, m_PP_rd_PAW_parameters
   use m_XC_Potential,         only : m_XC_cal_potential_3D
   use m_Control_Parameters,   only : nspin, af, kimg, from_PSEUDOPOTENTIAL_FILE  &
  &                                 , istress,sw_fine_STM_simulation
@@ -209,6 +211,10 @@ subroutine Initial_Electronic_Structure
 ! ========================================= 13.0D
 
 
+! ==== EXP_CELLOPT === 2015/09/24
+  use m_IterationNumbers,     only : iteration_unit_cell
+  use m_Control_Parameters,   only :  sw_read_nfchgt_prev_cell, sw_read_nfzaj_prev_cell
+! ==================== 2015/09/24
 
   implicit none
   integer :: iloop
@@ -384,7 +390,9 @@ subroutine Initial_Electronic_Structure
      end if
 
      !---- set wave functions ----
-     if(intzaj == by_random_numbers) then
+     if ( iteration_unit_cell > 1 .and. sw_read_nfzaj_prev_cell == ON ) then
+        call read_zaj( condition =-4 )
+     else if(intzaj == by_random_numbers) then
 #ifdef FJ_TIMER
 !                    call mpi_barrier(mpi_comm_group, ierr)
                     call timer_sta(40)
@@ -402,7 +410,7 @@ subroutine Initial_Electronic_Structure
      else if(intzaj == by_matrix_diagon) then
 
      else if(intzaj == FILE) then
-        call read_zaj()
+        call read_zaj( condition = 1 )
 ! === ik is not defined here!!! by tkato 2013/02/12 ============================
 !!$       call m_ES_betar_dot_WFs_3D(nfout,ik)         ! (fsrfsi)
        do ik = 1, kv3, af+1
@@ -550,8 +558,11 @@ contains
              end do
        else
              call m_CD_rd_chgq(nfout,nfchgt,F_CHGT_in_partitioned)
-          if ( flg_paw .and. read_charge_hardpart == YES ) then
+          if ( flg_paw .and. read_charge_hardpart == YES .and. m_Files_nfcntn_bin_paw_exists()) then
+             call m_Files_open_nfcntn_bin_paw()
+             call m_PP_rd_PAW_parameters(nfout,nfcntn_bin_paw)
              call m_CD_rd_hsr(nfcntn_bin_paw)
+             call m_Files_close_nfcntn_bin_paw()
           endif
        end if
 
@@ -562,7 +573,11 @@ contains
 !!$       if ( flg_paw .and. ) then
 !!$          call m_CD_rd_hsr(nfcntn_bin_paw)
 !!$       endif
+
+    else if(condition == -4) then      ! coordinate-continuation
+!
     end if
+
   end subroutine read_charge_density
  
   subroutine read_efermi()
@@ -623,12 +638,12 @@ contains
 
   subroutine EXX()
     use m_ES_ExactExchange,  only  : m_ES_EXX_gather_valence_states, m_ES_EXX_kernel &
-         &                         , m_ES_EXX_occup, sw_rspace_hyb &
+         &                         , m_ES_EXX_occup  &
          &                         , m_ES_EXX_init0 &
          &                         , m_ES_EXX_ylm, m_ES_EXX_crotylm
     call m_ES_EXX_init0()
     if(modnrm == EXECUT ) then
-       if(sw_rspace_hyb==OFF) call m_ES_EXX_ylm()
+       call m_ES_EXX_ylm()
        call m_ES_EXX_crotylm()
     end if
     call m_ES_EXX_occup(nfout)
@@ -641,9 +656,17 @@ contains
        call m_OP_rd_occ_mat(nfout) ! -> om
   end subroutine read_occ_mat
 
-  subroutine read_zaj()
+  subroutine read_zaj( condition )
+    integer, intent(in) :: condition
+
     call m_Files_open_nfzaj()
-       call m_ESIO_rd_WFs(nfout,nfzaj,F_ZAJ_in_partitioned)
+
+    if ( condition == 1 ) then
+          call m_ESIO_rd_WFs(nfout,nfzaj,F_ZAJ_in_partitioned)
+
+    else if ( condition == -4 ) then       ! coordinate-continuation
+    endif
+
   end subroutine read_zaj
 
   subroutine check_neg()
diff -uprN phase0_2015.01/src_phase_3d/Initialization.F90 phase0_2015.01.01/src_phase_3d/Initialization.F90
--- phase0_2015.01/src_phase_3d/Initialization.F90	2015-08-05 15:07:35.000000000 +0900
+++ phase0_2015.01.01/src_phase_3d/Initialization.F90	2016-07-12 12:51:52.000000000 +0900
@@ -77,6 +77,7 @@ subroutine Initialization(init_mpi)
 ! $Id: Initialization.F90 440 2015-08-03 07:18:01Z ktagami $
   use m_Parallelization, only : m_Parallel_init_comm_world
   use m_Parallelization, only : m_Parallel_get_nproc_from_arg_3D
+  use m_Control_Parameters,only:  m_CtrlP_set_sw_scalapack
   use m_Timing,          only : m_Timing_wd_timenow, m_Timing_init_timer
   use m_Files,           only : nfout &
        &                      , m_Files_open_standardout &
@@ -107,6 +108,9 @@ subroutine Initialization(init_mpi)
   if(printable) call m_Timing_wd_timenow("program start")
 
   call m_Parallel_get_nproc_from_arg_3D(printable)
+#ifdef _USE_SCALAPACK_
+  call m_CtrlP_set_sw_scalapack(printable)
+#endif
   call aavers                      ! -(here)
   endif
   call m_Files_check_file_existance
@@ -160,7 +164,7 @@ contains
   subroutine aavers
     include 'version.h' ! svn_revision
     character(len=72) :: vers, system, codename
-    write(vers,'("phase/0 2015.01 Revision:",i5, " --- 3D_Parallel --")') svn_revision
+    write(vers,'("phase/0 2015.01.01 Revision:",i5," -- 3D_Parallel --")') svn_revision
     codename = 'phaseUnif'
     system = ''
 
diff -uprN phase0_2015.01/src_phase_3d/InputData_Analysis.F90 phase0_2015.01.01/src_phase_3d/InputData_Analysis.F90
--- phase0_2015.01/src_phase_3d/InputData_Analysis.F90	2015-08-05 14:58:03.000000000 +0900
+++ phase0_2015.01.01/src_phase_3d/InputData_Analysis.F90	2016-07-12 12:51:52.000000000 +0900
@@ -1,6 +1,6 @@
 !=======================================================================
 !
-!  PROGRAM  PHASE/0 2015.01 ($Rev: 440 $)
+!  PROGRAM  PHASE/0 2015.01 ($Rev: 475 $)
 !
 !  SUBROUINE: cnstr_fcvect_work_alloc, cnstr_fcvect_work_dealloc, 
 !             get_CS_and_ionic_system_data, read_ntyp_natm_natm2, read_altv, 
@@ -34,7 +34,7 @@
 !   The activity of development of this program set has been supervised by Takahisa Ohno.
 !
 subroutine InputData_Analysis()
-! $Id: InputData_Analysis.F90 440 2015-08-03 07:18:01Z ktagami $
+! $Id: InputData_Analysis.F90 475 2016-02-23 05:22:18Z jkoga $
   use m_Files, only :             nfinp, nfout, nfcntn, file_existance_contfiles, file_existance_3contfiles &
        &                        , m_Files_open_nfcntn &
        &                        , m_Files_check_file_names &
@@ -76,7 +76,7 @@ subroutine InputData_Analysis()
        &                        , sw_phonon, sw_positron &
        &                        , driver &
        &                        , sw_phonon, sw_wannier90 &
-       &                        , fixed_charge_k_parallel &
+       &                        , fixed_charge_k_parallel, neg_is_given &
        &                        , m_CtrlP_rd_parameters, m_CtrlP_rd_iconvergence &
        &                        , m_CtrlP_rd_iconv_ek &
        &                        , m_CtrlP_rd_numk_zajsaved &
@@ -149,12 +149,13 @@ subroutine InputData_Analysis()
   integer :: iret, f_closeInputFile
   logical :: ex
   logical,save :: first_call = .true.
-  logical :: initialization_required
+  logical :: initialization_required, inputfilestyle_is_new = .false.
 
   if(.not.initialization_required()) return
 
   if(m_CtrlP_check_inputfilestyle(nfinp) == NEW_) then
      if(printable) write(nfout,'(" !*--- input-file style = NEW")')
+     inputfilestyle_is_new = .true.
      call m_Files_reopen_nfinp(1)
      if(first_call)then
      call m_CtrlP_rd_printlevel(nfout)
@@ -165,10 +166,6 @@ subroutine InputData_Analysis()
      call m_Files_open_nfdynm_cif_initially()
      if(first_call) call m_CtrlP_rd_accuracy(nfout)
      call m_CS_rd_n(nfout)
-#ifndef _EMPIRICAL_
-     call m_CtrlP_rd_wfsolver(nfout)
-     call m_CtrlP_rd_chargemix(nfout)
-#endif
      call m_CtrlP_rd_struc_evol(nfout)
      call m_CtrlP_rd_postproc(nfout)
 #ifndef _EMPIRICAL_
@@ -178,6 +175,13 @@ subroutine InputData_Analysis()
      call m_IS_rd_n(nfout)
      call m_IS_set_ionic_mass(nfout)
 
+#ifndef _EMPIRICAL_
+     if(neg_is_given) then
+        call m_CtrlP_rd_wfsolver(nfout,natm2)
+        call m_CtrlP_rd_chargemix(nfout)
+     end if
+#endif
+
 
 #ifndef _EMPIRICAL_
      call m_PP_rd_window_param(nfout)
@@ -353,6 +357,25 @@ subroutine InputData_Analysis()
 
   iret = f_closeInputFile()
   first_call = .false. 
+
+  if(.not.neg_is_given) then
+     call Check_of_Pseudopotential()  !-(PseudoPotential_Construction.F90), set neg properly
+
+     if(inputfilestyle_is_new) then
+        call m_Files_reopen_nfinp(1)
+#ifndef _EMPIRICAL_
+!!$        call m_CtrlP_dealloc_wfsolver()
+        call m_CtrlP_rd_wfsolver(nfout,natm)
+        call m_CtrlP_rd_chargemix(nfout)
+#endif
+        iret = f_closeInputFile()
+     else
+        if(printable) then
+           write(nfout,'(" !** inputfilestyle = OLD, and you cannot omit number_of_bands in the inputfile")')
+        end if
+        stop  'inputfilestyle = OLD, and you cannot omit number_of_bands in the inputfile'
+     end if
+  end if
 contains
   subroutine cnstr_fcvect_work_alloc
     if(printable) write(nfout,'(" ! natm  = ",i6," << cnstr_fcvect_work_alloc >>")') natm
@@ -566,7 +589,7 @@ contains
 end subroutine InputData_Analysis
 
 subroutine InputData_Analysis_neb()
-! $Id: InputData_Analysis.F90 440 2015-08-03 07:18:01Z ktagami $
+! $Id: InputData_Analysis.F90 475 2016-02-23 05:22:18Z jkoga $
   use m_Files, only :             nfout, nfcntn &
        &                        , m_Files_reopen_nfcntn
   use m_Ionic_System, only :      m_IS_rd_pos_and_v
diff -uprN phase0_2015.01/src_phase_3d/Makefile phase0_2015.01.01/src_phase_3d/Makefile
--- phase0_2015.01/src_phase_3d/Makefile	1970-01-01 09:00:00.000000000 +0900
+++ phase0_2015.01.01/src_phase_3d/Makefile	2016-07-12 13:15:25.000000000 +0900
@@ -0,0 +1,542 @@
+.SUFFIXES:
+.SUFFIXES: .o .F .f .F90 .f90 .c .mod
+
+# Platform    : GNU Linux (EM64T/AMD64)
+# Prog. model : MPI parallel
+# Compiler    : Intel Fortran compiler
+# BLAS/LAPACK : System-installed MKL
+# FFT         : System-installed FFTW3 library
+###########################################################################
+###<< PLEASE CHANGE THE VARIABLES BELOW ACCORDING TO YOUR ENVIRONMENT >>###
+###########################################################################
+F90 = mpif90
+CC  = gcc -m64
+CPP = 
+AR  = ar -vq
+LINK = mpif90
+F90FLAGS = -traceback
+F77FLAGS = -traceback
+CFLAGS = -O -DINTEL
+
+ESM = yes
+ifdef ESM
+CPPESM=-DENABLE_ESM_PACK
+LESM=-lesm
+else
+CPPESM=
+endif
+
+CPPFLAGS = -DLinux -DFFTW3 -D_MPIFFT_ -DDISABLE_VDWDF -D_USE_DATE_AND_TIME_ -D_POT_SMOOTHING_ -DTRANSPOSE -DGGA_ATOMIC_WITH_NEW_GNCPP -DREMOVE_PC_FROM_FORCE -D_USE_LAPACK_ -D_USE_SCALAPACK_ -D_POSITRON_ -D_FAST_WAY_ -DUSE_NONBLK_COMM -Dforsafe -D_HEAP_SORT_ -DFFT_ALLTOALL -DPOST3D -DPAW3D -DUSE_NONBLK_COMM ${CPPESM}
+LFLAGS = 
+F90FLAGS_FIXED = -extend_source -Fl -fixed
+F90FLAGS_FREE = -extend_source -Fl
+MKLHOME=
+INCLUDE=
+LIBS = -L./ ${LESM}  -Wl,--start-group -lmkl_scalapack_lp64 -lmkl_blacs_intelmpi_lp64 -lmkl_intel_lp64 -lmkl_sequential -lmkl_core -Wl,--end-group -Bdynamic -lfftw3  -lpthread
+###########################################################################
+###<< PLEASE CHANGE THE VARIABLES ABOVE ACCORDING TO YOUR ENVIRONMENT >>###
+###########################################################################
+
+ifdef ESM
+ESM_LIB = libesm.a
+else
+ESM_LIB =
+endif
+LAPACK = 
+FFTOBJECT =  mpifft.o
+OBJ_INPUTPARSE = input_parse.o
+
+
+PHASE_LOWER_MODULES = m_Const_Parameters.o   m_ErrorMessages.o m_Parallelization.o \
+m_IterationNumbers.o #z_tool_timer.o
+
+PHASE_UPPER_MODULES = \
+m_Control_Parameters.o   m_Files.o                m_Timing.o \
+m_Crystal_Structure.o    m_FFT.o                  m_Ionic_System.o \
+m_CS_SpaceGroup.o        m_CS_Magnetic.o \
+m_Orbital_QuantumNum.o  \
+m_Kpoints.o              m_PseudoPotential.o      m_PlaneWaveBasisSet.o \
+m_Realspace.o \
+m_SpinOrbit_Potential.o  m_SpinOrbit_FromFile.o  \
+m_ES_NonCollinear.o \
+m_NonLocal_Potential.o   m_Electronic_Structure.o \
+m_ES_nonlocal.o	 	 m_ES_ortho.o  m_ES_wf_extrpl.o m_ES_initialWF.o 	\
+m_ES_occup.o \
+m_FiniteElectricField.o \
+m_ES_ExactExchange.o \
+m_ES_WF_by_SDorCG.o      m_ES_WF_by_Davidson.o \
+m_ES_WF_by_ModifiedDavidson.o m_ES_WF_by_RMM.o \
+m_Charge_Density.o   m_Orbital_Population.o     m_CD_mixing.o m_Dipole.o               \
+m_CD_Mag_Moment.o \
+m_epc_potential.o        \
+m_Positron_Wave_Functions.o \
+m_Screening_FFT.o        m_Screening.o      m_External_Potential.o \
+m_ES_LHXC.o              m_ES_Intgr_VlhxcQlm.o    m_ES_IO.o \
+m_ES_WF_by_MatDiagon.o   m_ES_dos.o               m_Hubbard.o \
+m_KineticEnergy_Density.o  \
+m_vdWDF.o \
+m_Ldos.o                 m_XC_Potential.o         \
+string.o m_db.o \
+m_PAW_Tecplot.o m_PAW_ChargeDensity.o m_PAW_Hartree.o  \
+m_PAW_XC_Potential.o \
+m_SpinOrbit_RadInt.o \
+m_OP_Moment.o \
+m_ES_Mag_Constraint.o  \
+m_Total_Energy.o \
+m_Force.o                m_Stress.o               m_ES_WF_by_submat.o \
+m_constraints.o \
+m_ELF.o \
+m_BerryPhase.o           m_BP_Properties.o \
+m_Representation.o       m_Raman.o            m_Phonon.o \
+m_Wannier.o  m_Replica.o Renewal_of_ChgCtrlParam.o \
+m_LinearResponse_Control.o  \
+m_LinearResponse_Qpt.o  \
+m_rttddft.o \
+m_Potential_Mixing.o       m_ThomasFermiW_Potential.o
+
+PHASE_F_SUBROUTINES = Preparation.o \
+InputData_Analysis.o           PseudoPotential_Construction.o 
+
+PHASE_F_OTHERSUBS  = mdmain.o     constraint_main.o meta_dynamics.o NEB.o Preparation_for_mpi.o \
+Preparation_for_ESM.o \
+scf_routines.o \
+bottom_Subroutines.o              \
+spline.o \
+b_Crystal_Structure.o             b_Electronic_Structure.o \
+b_Words.o                         b_PseudoPotential.o \
+b_PseudoPotential_EXX.o \
+b_Fermi.o                         b_Kpoints.o \
+b_PlaneWaveBasisSet.o             b_Ionic_System.o \
+b_XC_Potential.o                  b_PAW_XC_Potential.o \
+b_XC_OmegaPBE.o  \
+b_XC_metagga.o \
+b_Ldos_f77.o\
+input_interface.o                 Initialization.o \
+WriteDownData_onto_Files.o        Ewald_and_Structure_Factor.o \
+Initial_Electronic_Structure.o \
+Renewal_of_WaveFunctions.o \
+Renewal_of_pWaveFunctions.o \
+IterationNumbers_Setting.o        ChargeDensity_Construction.o \
+ChargeDensity_Mixing.o            Renewal_of_Potential.o \
+Renewal_of_Hubbard_Potential.o    Renewal_of_OccMat.o \
+Renewal_of_pPotential.o           Renewal_of_Hubbard_Parameters.o \
+Convergence_Check.o               Forces.o \
+Move_Ions.o                       Initial_MD_Condition.o \
+Stress.o                          Postprocessing.o \
+Finalization_of_mpi.o             miscellaneous.o\
+b_BerryPhase.o \
+heap_sort.o                       real_spherical_harmonics.o \
+Real_space_integ.o                crotylm.o \
+screening_correction.o \
+mpi_dummy.o                            WriteDownData_onto_Files_ek.o \
+GaussLeg.o lib_int_deri_add.o  \
+rttddft_main.o \
+Potential_Construction.o        Potential_Mixing.o             ThomasFermiWeiz.o \
+Epsilon_postscf.o vdW.o
+
+# for vc_nl
+NLOBJ = vc_nl.o
+
+ifndef SX_DGEMM
+PHASE_OBJECTSF77 = \
+b_PseudoPotential_f77.o \
+b_Force_f77.o \
+rmmsubs.o \
+gncpp_xc_gga_rad.o  \
+decfft_ent.o \
+spg+tetra.o
+else
+PHASE_OBJECTSF77 = \
+b_PseudoPotential_f77.o \
+b_Force_f77.o \
+rmmsubs.o \
+decfft_ent.o \
+gncpp_xc_gga_rad.o  \
+spg+tetra.o \
+dgemm__.o 
+endif
+
+EKCAL_LOWER_MODULES = m_Const_Parameters.o   m_ErrorMessages.o m_Parallelization.o \
+m_IterationNumbers.o
+
+EKCAL_UPPER_MODULES = \
+m_Control_Parameters.o   m_Files.o                m_Timing.o \
+m_Crystal_Structure.o    m_FFT.o                  m_Ionic_System.o \
+m_CS_SpaceGroup.o        m_CS_Magnetic.o \
+m_Orbital_QuantumNum.o  \
+m_Kpoints.o              m_PseudoPotential.o      m_PlaneWaveBasisSet.o \
+m_Realspace.o \
+m_SpinOrbit_Potential.o  m_SpinOrbit_FromFile.o  \
+m_ES_NonCollinear.o \
+m_NonLocal_Potential.o   m_Electronic_Structure.o \
+m_ES_nonlocal.o	 	 m_ES_ortho.o  m_ES_wf_extrpl.o m_ES_initialWF.o 	\
+m_FiniteElectricField.o \
+m_ES_ExactExchange.o \
+m_ES_occup.o             m_ES_WF_by_SDorCG.o      m_ES_WF_by_Davidson.o \
+m_ES_WF_by_ModifiedDavidson.o m_ES_WF_by_RMM.o \
+m_epc_potential.o \
+m_Positron_Wave_Functions.o \
+m_Charge_Density.o    m_Orbital_Population.o    m_CD_mixing.o m_Dipole.o               \
+m_CD_Mag_Moment.o \
+m_Screening_FFT.o        m_Screening.o      m_External_Potential.o \
+m_ES_LHXC.o              m_ES_Intgr_VlhxcQlm.o    m_ES_IO.o \
+m_ES_WF_by_MatDiagon.o   m_ES_dos.o               m_Hubbard.o \
+m_KineticEnergy_Density.o  \
+m_vdWDF.o \
+m_Ldos.o                 m_XC_Potential.o         \
+string.o m_db.o \
+m_PAW_Tecplot.o m_PAW_ChargeDensity.o m_PAW_Hartree.o  \
+m_PAW_XC_Potential.o \
+m_SpinOrbit_RadInt.o \
+m_ES_Mag_Constraint.o  \
+m_constraints.o \
+m_Total_Energy.o \
+m_OP_Moment.o \
+m_Force.o                m_Stress.o m_ES_WF_by_submat.o\
+m_ELF.o                  m_Wannier.o \
+m_BerryPhase.o           m_BP_Properties.o \
+m_Representation.o       m_Raman.o               m_Phonon.o \
+m_LinearResponse_Control.o  \
+m_LinearResponse_Qpt.o \
+m_Potential_Mixing.o       m_ThomasFermiW_Potential.o 
+
+
+EKCAL_F_SUBROUTINES = Preparation.o \
+InputData_Analysis.o           PseudoPotential_Construction.o
+
+EKCAL_F_OTHERSUBS  = ekmain.o    constraint_main.o            Preparation_for_mpi.o \
+scf_routines.o \
+bottom_Subroutines.o                   \
+spline.o \
+b_Crystal_Structure.o                  b_Electronic_Structure.o \
+b_Words.o                              b_PseudoPotential.o \
+b_PseudoPotential_EXX.o \
+b_Fermi.o                              b_Kpoints.o \
+b_PlaneWaveBasisSet.o                  b_Ionic_System.o \
+b_XC_Potential.o                       b_PAW_XC_Potential.o \
+b_XC_OmegaPBE.o  \
+b_XC_metagga.o \
+b_Ldos_f77.o \
+input_interface.o                      Initialization.o \
+Ewald_and_Structure_Factor.o \
+Initial_Electronic_Structure.o         Renewal_of_WaveFunctions.o \
+IterationNumbers_Setting.o \
+ChargeDensity_Construction.o                 Renewal_of_Potential.o \
+ChargeDensity_Mixing.o \
+Renewal_of_Hubbard_Potential.o         Renewal_of_OccMat.o \
+Convergence_Check.o                    Forces.o \
+WriteDownData_onto_Files.o \
+Initial_MD_Condition.o                 WriteDownData_onto_Files_ek.o\
+Stress.o                               Postprocessing.o \
+Finalization_of_mpi.o \
+miscellaneous.o \
+b_BerryPhase.o \
+heap_sort.o                            real_spherical_harmonics.o \
+Real_space_integ.o                     crotylm.o \
+screening_correction.o \
+mpi_dummy.o \
+GaussLeg.o lib_int_deri_add.o \
+ThomasFermiWeiz.o       Renewal_of_ChgCtrlParam.o \
+vdW.o
+
+ifndef SX_DGEMM
+EKCAL_OBJECTSF77 = \
+b_PseudoPotential_f77.o \
+b_Force_f77.o \
+rmmsubs.o                              spg+tetra.o \
+gncpp_xc_gga_rad.o  \
+decfft_ent.o
+else
+EKCAL_OBJECTSF77 = \
+b_PseudoPotential_f77.o \
+b_Force_f77.o \
+rmmsubs.o                              spg+tetra.o \
+decfft_ent.o \
+gncpp_xc_gga_rad.o  \
+dgemm__.o 
+endif
+
+
+EPS_LOWER_MODULES = m_Const_Parameters.o   m_ErrorMessages.o m_Parallelization.o \
+m_IterationNumbers.o
+
+EPS_UPPER_MODULES = \
+m_Control_Parameters.o   m_Files.o                m_Timing.o \
+m_Crystal_Structure.o    m_FFT.o                  m_Ionic_System.o \
+m_CS_SpaceGroup.o        m_CS_Magnetic.o \
+m_Orbital_QuantumNum.o  \
+m_Kpoints.o              m_PseudoPotential.o      m_PlaneWaveBasisSet.o \
+m_Realspace.o \
+m_SpinOrbit_Potential.o  m_SpinOrbit_FromFile.o  \
+m_ES_NonCollinear.o \
+m_NonLocal_Potential.o   m_Electronic_Structure.o \
+m_ES_nonlocal.o	 	 m_ES_ortho.o  m_ES_wf_extrpl.o m_ES_initialWF.o 	\
+m_FiniteElectricField.o \
+m_Positron_Wave_Functions.o \
+m_ES_ExactExchange.o \
+m_ES_occup.o             m_ES_WF_by_SDorCG.o      m_ES_WF_by_Davidson.o \
+m_ES_WF_by_ModifiedDavidson.o m_ES_WF_by_RMM.o \
+m_ES_LHXC.o              m_ES_Intgr_VlhxcQlm.o    m_ES_IO.o \
+m_ES_dos.o               m_Hubbard.o \
+m_epc_potential.o \
+m_vdWDF.o \
+m_Charge_Density.o   m_Orbital_Population.o       m_CD_mixing.o m_XC_Potential.o \
+m_CD_Mag_Moment.o \
+string.o m_db.o \
+m_PAW_Tecplot.o m_PAW_ChargeDensity.o m_PAW_Hartree.o  \
+m_PAW_XC_Potential.o \
+m_SpinOrbit_RadInt.o \
+m_ES_Mag_Constraint.o  \
+m_Total_Energy.o \
+m_OP_Moment.o \
+m_Force.o          m_Stress.o      m_ES_WF_by_submat.o      m_ES_WF_by_MatDiagon.o \
+m_KineticEnergy_Density.o  \
+m_Ldos.o                 m_ELF.o \
+m_constraints.o \
+m_BerryPhase.o           m_BP_Properties.o \
+m_Representation.o       m_Raman.o       m_Phonon.o               m_Dipole.o \
+m_Screening_FFT.o        m_Screening.o      m_External_Potential.o \
+m_Wannier.o m_Wannier90.o \
+m_ValenceBand_Spectrum.o   m_CoreLevel_Spectrum.o   \
+m_ES_occup_EPS.o         m_Epsilon_ek.o \
+m_LinearResponse_Control.o  \
+m_LinearResponse_Qpt.o \
+m_Potential_Mixing.o       m_ThomasFermiW_Potential.o 
+
+EPS_F_SUBROUTINES = Preparation.o \
+InputData_Analysis.o           PseudoPotential_Construction.o
+
+EPS_F_OTHERSUBS  = epsmain.o           constraint_main.o Preparation_for_mpi.o \
+scf_routines.o \
+bottom_Subroutines.o                   \
+spline.o \
+b_Crystal_Structure.o                  b_Electronic_Structure.o \
+b_Words.o                              b_PseudoPotential.o \
+b_PseudoPotential_EXX.o \
+b_Fermi.o                              b_Kpoints.o \
+b_PlaneWaveBasisSet.o                  b_Ionic_System.o \
+b_XC_Potential.o                       b_PAW_XC_Potential.o \
+b_XC_OmegaPBE.o  \
+b_XC_metagga.o \
+b_Ldos_f77.o \
+input_interface.o                      Initialization.o \
+Ewald_and_Structure_Factor.o \
+Initial_Electronic_Structure.o         Renewal_of_WaveFunctions.o \
+IterationNumbers_Setting.o \
+ChargeDensity_Construction.o                 Renewal_of_Potential.o \
+ChargeDensity_Mixing.o \
+Renewal_of_Hubbard_Potential.o         Renewal_of_OccMat.o \
+Convergence_Check.o                    Forces.o \
+WriteDownData_onto_Files.o \
+Initial_MD_Condition.o                 WriteDownData_onto_Files_ek.o\
+Stress.o                               Postprocessing.o \
+Finalization_of_mpi.o \
+miscellaneous.o \
+b_BerryPhase.o \
+heap_sort.o                            real_spherical_harmonics.o \
+Real_space_integ.o                     crotylm.o \
+screening_correction.o \
+Initialization_Epsilon.o               Shift_Kpoint.o \
+Reset_Kpoint.o                         Preparation_for_Calc_Epsilon.o \
+Transition_moment_Epsilon.o            Calc_Epsilon.o \
+Nonlinear_Optics_Epsilon.o             WriteDownData_onto_Files_Epsilon.o \
+PseudoPotential_ek_Epsilon.o           Dealloc_Radr_and_Wos_Epsilon.o \
+ThomasFermiWeiz.o         Renewal_of_ChgCtrlParam.o  \
+mpi_dummy.o \
+GaussLeg.o lib_int_deri_add.o \
+vdW.o
+
+ifndef SX_DGEMM
+EPS_OBJECTSF77 = \
+b_PseudoPotential_f77.o \
+b_Force_f77.o \
+rmmsubs.o                              spg+tetra.o \
+decfft_ent.o \
+gncpp_xc_gga_rad.o  
+else
+EPS_OBJECTSF77 = \
+b_PseudoPotential_f77.o \
+b_Force_f77.o \
+rmmsubs.o                              spg+tetra.o \
+decfft_ent.o \
+gncpp_xc_gga_rad.o  \
+dgemm__.o
+endif
+
+
+TDLR_LOWER_MODULES = m_Const_Parameters.o   m_ErrorMessages.o m_Parallelization.o \
+m_IterationNumbers.o
+
+TDLR_UPPER_MODULES = \
+m_Control_Parameters.o   m_Files.o                m_Timing.o \
+m_Crystal_Structure.o    m_FFT.o                  m_Ionic_System.o \
+m_CS_SpaceGroup.o        m_CS_Magnetic.o \
+m_Orbital_QuantumNum.o  \
+m_Kpoints.o              m_PseudoPotential.o      m_PlaneWaveBasisSet.o \
+m_Realspace.o \
+m_SpinOrbit_Potential.o  m_SpinOrbit_FromFile.o  \
+m_ES_NonCollinear.o \
+m_NonLocal_Potential.o   m_Electronic_Structure.o \
+m_ES_nonlocal.o	 	 m_ES_ortho.o  m_ES_wf_extrpl.o m_ES_initialWF.o 	\
+m_FiniteElectricField.o \
+m_Positron_Wave_Functions.o \
+m_ES_ExactExchange.o \
+m_ES_occup.o             m_ES_WF_by_SDorCG.o      m_ES_WF_by_Davidson.o \
+m_ES_WF_by_ModifiedDavidson.o m_ES_WF_by_RMM.o \
+m_ES_LHXC.o              m_ES_Intgr_VlhxcQlm.o    m_ES_IO.o \
+m_ES_dos.o               m_Hubbard.o \
+m_epc_potential.o \
+m_vdWDF.o \
+m_Charge_Density.o   m_Orbital_Population.o       m_CD_mixing.o m_XC_Potential.o \
+m_CD_Mag_Moment.o \
+string.o m_db.o \
+m_PAW_Tecplot.o m_PAW_ChargeDensity.o m_PAW_Hartree.o  \
+m_PAW_XC_Potential.o \
+m_SpinOrbit_RadInt.o \
+m_ES_Mag_Constraint.o  \
+m_Total_Energy.o \
+m_OP_Moment.o \
+m_Force.o                m_Stress.o m_ES_WF_by_submat.o      m_ES_WF_by_MatDiagon.o \
+m_UnitCell.o \
+m_KineticEnergy_Density.o  \
+m_Ldos.o                 m_ELF.o \
+m_constraints.o \
+m_BerryPhase.o           m_BP_Properties.o \
+m_Representation.o       m_Raman.o          m_Phonon.o               m_Dipole.o \
+m_Wannier.o m_Wannier90.o \
+m_Screening_FFT.o        m_Screening.o      m_External_Potential.o \
+m_Excitation.o \
+m_ES_occup_EPS.o        \
+m_LinearResponse_Control.o  \
+m_LinearResponse_Qpt.o \
+m_LinearResponse_Tools.o     m_LinearResponse_Density.o  \
+m_LinearResponse_NonInt.o    m_LinearResponse_ALDA.o \
+m_LinearResponse_Kernel.o    m_LinearResponse_BS.o \
+m_LinearResponse_Spectrum.o \
+m_Potential_Mixing.o       m_ThomasFermiW_Potential.o 
+
+TDLR_F_SUBROUTINES = Preparation.o \
+InputData_Analysis.o           PseudoPotential_Construction.o 
+
+TDLR_F_OTHERSUBS  = tdlrmain.o         constraint_main.o  Preparation_for_mpi.o \
+scf_routines.o \
+bottom_Subroutines.o                   \
+spline.o \
+b_Crystal_Structure.o                  b_Electronic_Structure.o \
+b_Words.o                              b_PseudoPotential.o \
+b_PseudoPotential_EXX.o \
+b_Fermi.o                              b_Kpoints.o \
+b_PlaneWaveBasisSet.o                  b_Ionic_System.o \
+b_XC_Potential.o                       b_PAW_XC_Potential.o \
+b_XC_OmegaPBE.o  \
+b_XC_metagga.o \
+b_Ldos_f77.o \
+input_interface.o                      Initialization.o \
+Ewald_and_Structure_Factor.o \
+Initial_Electronic_Structure.o         Renewal_of_WaveFunctions.o \
+IterationNumbers_Setting.o \
+ChargeDensity_Construction.o                 Renewal_of_Potential.o \
+ChargeDensity_Mixing.o \
+Renewal_of_Hubbard_Potential.o         Renewal_of_OccMat.o \
+Convergence_Check.o                    Forces.o \
+WriteDownData_onto_Files.o \
+Initial_MD_Condition.o                 WriteDownData_onto_Files_ek.o \
+Stress.o                               Postprocessing.o \
+Finalization_of_mpi.o \
+miscellaneous.o \
+b_BerryPhase.o \
+heap_sort.o                            real_spherical_harmonics.o \
+Real_space_integ.o                     crotylm.o \
+screening_correction.o \
+PseudoPotential_ek_Epsilon.o           \
+mpi_dummy.o \
+GaussLeg.o lib_int_deri_add.o \
+ThomasFermiWeiz.o       Renewal_of_ChgCtrlParam.o \
+b_LinearResponse_Kernel.o  b_LinearResponse_exc.o \
+LinearResponse_Proc.o     LinearResponse_Spec.o \
+vdW.o
+
+ifndef SX_DGEMM
+TDLR_OBJECTSF77 = \
+b_PseudoPotential_f77.o \
+b_Force_f77.o \
+rmmsubs.o                              spg+tetra.o \
+decfft_ent.o \
+gncpp_xc_gga_rad.o  
+else
+TDLR_OBJECTSF77 = \
+b_PseudoPotential_f77.o \
+b_Force_f77.o \
+rmmsubs.o                              spg+tetra.o \
+decfft_ent.o \
+gncpp_xc_gga_rad.o  \
+dgemm__.o
+endif
+
+
+
+PHASE_OBJECTS = $(FFTOBJECT) $(PHASE_LOWER_MODULES) $(PHASE_UPPER_MODULES) $(PHASE_F_SUBROUTINES) $(PHASE_F_OTHERSUBS) $(PHASE_OBJECTSF77) $(OBJ_INPUTPARSE)
+
+EKCAL_OBJECTS = $(FFTOBJECT) $(EKCAL_LOWER_MODULES) $(EKCAL_UPPER_MODULES) $(EKCAL_F_SUBROUTINES) $(EKCAL_F_OTHERSUBS) $(EKCAL_OBJECTSF77) $(OBJ_INPUTPARSE)
+
+EPS_OBJECTS = $(FFTOBJECT) $(EPS_LOWER_MODULES) $(EPS_UPPER_MODULES) $(EPS_F_SUBROUTINES) $(EPS_F_OTHERSUBS) $(EPS_OBJECTSF77) $(OBJ_INPUTPARSE)
+
+TDLR_OBJECTS = $(FFTOBJECT) $(TDLR_LOWER_MODULES) $(TDLR_UPPER_MODULES) $(TDLR_F_SUBROUTINES) $(TDLR_F_OTHERSUBS) $(TDLR_OBJECTSF77) $(OBJ_INPUTPARSE)
+
+all : phase epsmain
+
+ifdef ESM
+phase : $(ESM_LIB) $(LAPACK) $(PHASE_OBJECTS) $(NLOBJ)
+	$(LINK) $(PHASE_OBJECTS) $(NLOBJ) $(LFLAGS) $(LIBS) -o $@
+else
+phase : $(LAPACK) $(PHASE_OBJECTS) $(NLOBJ)
+	$(LINK) $(PHASE_OBJECTS) $(NLOBJ) $(LFLAGS) $(LIBS) -o $@
+endif
+
+ekcal : $(LAPACK) $(EKCAL_OBJECTS) $(NLOBJ)
+	$(LINK) $(EKCAL_OBJECTS) $(NLOBJ) $(LFLAGS) $(LIBS) -o $@
+
+epsmain : $(LAPACK) $(EPS_OBJECTS) $(NLOBJ)
+	$(LINK) $(EPS_OBJECTS) $(NLOBJ) $(LFLAGS) $(LIBS) -o $@
+
+tdlrmain : $(LAPACK) $(TDLR_OBJECTS) $(NLOBJ)
+	$(LINK) $(TDLR_OBJECTS) $(NLOBJ) $(LFLAGS) $(LIBS) -o $@
+
+ifdef NO_MPI
+libesm.a:
+	cd EsmPack; make INCLUDE="$(INCLUDE)" FORTRAN="$(F90)" LIBFLAG="$(LIBS)" MPIFLAG="" AR="$(AR)"
+else
+libesm.a:
+	cd EsmPack; make INCLUDE="$(INCLUDE)" FORTRAN="$(F90)" LIBFLAG="$(LIBS)" MPIFLAG="-D__MPI__" AR="$(AR)"
+endif
+
+liblapack.a:
+	cd LAPACK; make F77="$(F90)" F77FLAGS="$(F77FLAGS)" AR="$(AR)"
+
+libblas.a:
+	cd BLAS; make F77="$(F90)" F77FLAGS="$(F77FLAGS)" AR="$(AR)"
+
+$(OBJ_INPUTPARSE):$(@:.o=.c) $(@:.o=.h)
+	$(CC) -c $(CFLAGS) $(@:.o=.c)
+
+.f.o:
+	$(F90) -c $(F77FLAGS) $*.f
+
+.f90.o:
+	$(F90) -c $(F90FLAGS) $*.f90
+
+.F.o:
+	$(F90) -c $(F77FLAGS) $(CPPFLAGS) $*.F
+
+.F90.o:
+	$(F90) -c $(F90FLAGS) $(CPPFLAGS) $*.F90
+
+clean:
+	\rm -f *.o *.mod *.a *.lib *.L *.list phase epsmain
+	\cd LAPACK; make clean
+	\cd BLAS; make clean
+	\cd EsmPack; make clean
+
+install: phase epsmain 
+	\mv -f phase ../bin/phase.3d
+	\mv -f epsmain ../bin/epsmain.3d
diff -uprN phase0_2015.01/src_phase_3d/Makefile.K phase0_2015.01.01/src_phase_3d/Makefile.K
--- phase0_2015.01/src_phase_3d/Makefile.K	2015-10-18 02:53:28.000000000 +0900
+++ phase0_2015.01.01/src_phase_3d/Makefile.K	2016-07-28 03:57:57.082513457 +0900
@@ -1,6 +1,9 @@
 .SUFFIXES:
 .SUFFIXES: .o .F .f .F90 .f90 .c .mod
 
+###########################################################################
+###<< PLEASE CHANGE THE VARIABLES BELOW ACCORDING TO YOUR ENVIRONMENT >>###
+###########################################################################
 F90 = mpifrtpx
 CC  = fccpx
 CPP = 
@@ -10,7 +13,7 @@ F90FLAGS = -Nsetvalue -Kfast,parallel,op
 F77FLAGS = -Nsetvalue -Kfast,parallel,openmp,ocl,preex,array_private,auto,simd=2,mfunc=2 -c -V -Qa,d,i,p,t,x -Koptmsg=2
 CFLAGS   = -DINTEL -Kfast,parallel,openmp,ocl,preex,array_private,auto,simd=2,mfunc=2
 
-ESM=YES
+ESM = yes
 ifdef ESM
 CPPESM=-DENABLE_ESM_PACK
 LESM=-lesm
@@ -18,16 +21,15 @@ else
 CPPESM=
 endif
 
+CPPFLAGS = -DLinux -DFFTW3 -D_MPIFFT_ -D_USE_DATE_AND_TIME_ -D_POT_SMOOTHING_ -DTRANSPOSE -DGGA_ATOMIC_WITH_NEW_GNCPP -DREMOVE_PC_FROM_FORCE -D_USE_LAPACK_ -D_USE_SCALAPACK_ -D_POSITRON_ -D_FAST_WAY_ -DUSE_NONBLK_COMM -Dforsafe -D_HEAP_SORT_ -DFFT_ALLTOALL -DPOST3D -DPAW3D -DUSE_NONBLK_COMM ${CPPESM}
+LFLAGS = -Kfast,parallel,openmp,ocl,preex,array_private,auto,simd=2,mfunc=2 #-W0,-zprl=Src2ex
 INCLUDE=-I/home/apps/fftw/3.3/include
-
-CPPFLAGS = -DLinux -DFFTW3 -D_MPIFFT_ -D_USE_DATE_AND_TIME_ -D_POT_SMOOTHING_ -DTRANSPOSE -DGGA_ATOMIC_WITH_NEW_GNCPP -DREMOVE_PC_FROM_FORCE -D_USE_LAPACK_ -D_USE_SCALAPACK_ -D_POSITRON_ -D_FAST_WAY_ -DUSE_NONBLK_COMM -Dforsafe -D_HEAP_SORT_ -DFFT_ALLTOALL -DPOST3D -DUSE_NONBLK_COMM ${CPPESM}
-
-LFLAGS   = -Kfast,parallel,openmp,ocl,preex,array_private,auto,simd=2,mfunc=2 #-W0,-zprl=Src2ex
-LIBS      = -L./ ${LESM} -Kopenmp -lm -SSL2MPI -SSL2BLAMP -SCALAPACK \
+LIBS = -L./ ${LESM} -Kopenmp -lm -SSL2MPI -SSL2BLAMP -SCALAPACK \
  -L/home/apps/fftw/3.3/lib64 -lfftw3 -lfftw3_omp -lfftw3_mpi $(KLINK) $(EIGEN_LIBS) #/opt/FJSVtclang/GM-1.2.0-13/lib64/libtofupa.o
 ###########################################################################
 ###<< PLEASE CHANGE THE VARIABLES ABOVE ACCORDING TO YOUR ENVIRONMENT >>###
 ###########################################################################
+
 ifdef ESM
 ESM_LIB = libesm.a
 else
@@ -39,7 +41,7 @@ OBJ_INPUTPARSE = input_parse.o
 
 
 PHASE_LOWER_MODULES = m_Const_Parameters.o   m_ErrorMessages.o m_Parallelization.o \
-m_IterationNumbers.o
+m_IterationNumbers.o #z_tool_timer.o
 
 PHASE_UPPER_MODULES = \
 m_Control_Parameters.o   m_Files.o                m_Timing.o \
@@ -57,7 +59,7 @@ m_FiniteElectricField.o \
 m_ES_ExactExchange.o \
 m_ES_WF_by_SDorCG.o      m_ES_WF_by_Davidson.o \
 m_ES_WF_by_ModifiedDavidson.o m_ES_WF_by_RMM.o \
-m_Charge_Density.o       m_CD_mixing.o m_Dipole.o               m_Orbital_Population.o \
+m_Charge_Density.o   m_Orbital_Population.o     m_CD_mixing.o m_Dipole.o               \
 m_CD_Mag_Moment.o \
 m_epc_potential.o        \
 m_Positron_Wave_Functions.o \
@@ -167,7 +169,7 @@ m_ES_occup.o             m_ES_WF_by_SDor
 m_ES_WF_by_ModifiedDavidson.o m_ES_WF_by_RMM.o \
 m_epc_potential.o \
 m_Positron_Wave_Functions.o \
-m_Charge_Density.o       m_CD_mixing.o m_Dipole.o               m_Orbital_Population.o \
+m_Charge_Density.o    m_Orbital_Population.o    m_CD_mixing.o m_Dipole.o               \
 m_CD_Mag_Moment.o \
 m_Screening_FFT.o        m_Screening.o      m_External_Potential.o \
 m_ES_LHXC.o              m_ES_Intgr_VlhxcQlm.o    m_ES_IO.o \
@@ -271,7 +273,7 @@ m_ES_LHXC.o              m_ES_Intgr_Vlhx
 m_ES_dos.o               m_Hubbard.o \
 m_epc_potential.o \
 m_vdWDF.o \
-m_Charge_Density.o       m_CD_mixing.o m_XC_Potential.o \
+m_Charge_Density.o   m_Orbital_Population.o       m_CD_mixing.o m_XC_Potential.o \
 m_CD_Mag_Moment.o \
 string.o m_db.o \
 m_PAW_Tecplot.o m_PAW_ChargeDensity.o m_PAW_Hartree.o  \
@@ -286,7 +288,6 @@ m_Ldos.o                 m_ELF.o \
 m_constraints.o \
 m_BerryPhase.o           m_BP_Properties.o \
 m_Representation.o       m_Raman.o       m_Phonon.o               m_Dipole.o \
-m_Orbital_Population.o   \
 m_Screening_FFT.o        m_Screening.o      m_External_Potential.o \
 m_Wannier.o m_Wannier90.o \
 m_ValenceBand_Spectrum.o   m_CoreLevel_Spectrum.o   \
@@ -379,7 +380,7 @@ m_ES_LHXC.o              m_ES_Intgr_Vlhx
 m_ES_dos.o               m_Hubbard.o \
 m_epc_potential.o \
 m_vdWDF.o \
-m_Charge_Density.o       m_CD_mixing.o m_XC_Potential.o \
+m_Charge_Density.o   m_Orbital_Population.o       m_CD_mixing.o m_XC_Potential.o \
 m_CD_Mag_Moment.o \
 string.o m_db.o \
 m_PAW_Tecplot.o m_PAW_ChargeDensity.o m_PAW_Hartree.o  \
@@ -395,7 +396,7 @@ m_Ldos.o                 m_ELF.o \
 m_constraints.o \
 m_BerryPhase.o           m_BP_Properties.o \
 m_Representation.o       m_Raman.o          m_Phonon.o               m_Dipole.o \
-m_Orbital_Population.o   m_Wannier.o m_Wannier90.o \
+m_Wannier.o m_Wannier90.o \
 m_Screening_FFT.o        m_Screening.o      m_External_Potential.o \
 m_Excitation.o \
 m_ES_occup_EPS.o        \
diff -uprN phase0_2015.01/src_phase_3d/Makefile.frtpx phase0_2015.01.01/src_phase_3d/Makefile.frtpx
--- phase0_2015.01/src_phase_3d/Makefile.frtpx	2015-10-18 02:54:02.000000000 +0900
+++ phase0_2015.01.01/src_phase_3d/Makefile.frtpx	2016-07-28 03:58:08.816858850 +0900
@@ -1,14 +1,17 @@
 .SUFFIXES:
 .SUFFIXES: .o .F .f .F90 .f90 .c .mod
 
+###########################################################################
+###<< PLEASE CHANGE THE VARIABLES BELOW ACCORDING TO YOUR ENVIRONMENT >>###
+###########################################################################
 F90 = mpifrtpx
 CC  = fccpx
 CPP = 
 AR  = ar qv
 LINK = mpifrtpx 
-F90FLAGS = -Nsetvalue -Kfast,parallel,openmp,ocl,preex,array_private,auto,simd=2,mfunc=2 -c -V -Qa,d,i,p,t,x -Koptmsg=2
-F77FLAGS = -Nsetvalue -Kfast,parallel,openmp,ocl,preex,array_private,auto,simd=2,mfunc=2 -c -V -Qa,d,i,p,t,x -Koptmsg=2
-CFLAGS   = -DINTEL -Kfast,parallel,openmp,ocl,preex,array_private,auto,simd=2,mfunc=2
+F90FLAGS = -Kparallel,ocl,preex,array_private,auto,simd=2,openmp -c -V -Qa,d,i,p,t,x -Koptmsg=2
+F77FLAGS = -Kparallel,ocl,preex,array_private,auto,simd=2,openmp -c -V -Qa,d,i,p,t,x -Koptmsg=2
+CFLAGS   = -DINTEL -Kfast,parallel,ocl,preex,array_private,auto,simd=2,openmp -DINTEL
 
 ESM=YES
 ifdef ESM
@@ -18,12 +21,10 @@ else
 CPPESM=
 endif
 
-INCLUDE=-I/usr/local/fftw/3.3/include
-
 CPPFLAGS = -DLinux -DFFTW3 -D_MPIFFT_ -D_USE_DATE_AND_TIME_ -D_POT_SMOOTHING_ -DTRANSPOSE -DGGA_ATOMIC_WITH_NEW_GNCPP -DREMOVE_PC_FROM_FORCE -D_USE_LAPACK_ -D_USE_SCALAPACK_ -D_POSITRON_ -D_FAST_WAY_ -DUSE_NONBLK_COMM -Dforsafe -D_HEAP_SORT_ -DFFT_ALLTOALL -DPOST3D -DUSE_NONBLK_COMM ${CPPESM}
-
-LFLAGS   = -Kfast,parallel,openmp,ocl,preex,array_private,auto,simd=2,mfunc=2 #-W0,-zprl=Src2ex
-LIBS      = -L./ ${LESM} -L/usr/local/fftw/3.3/lib64 -lfftw3 -lfftw3_omp -lfftw3_mpi -SSL2BLAMP $(KLINK) -SCALAPACK -Kopenmp #ScaLAPACK+FFTW3.3
+LFLAGS = -Kfast,parallel,ocl,preex,array_private,auto,simd=2,openmp,mfunc=2 -DINTEL #-W0,-zprl=Src2ex
+INCLUDE=-I/usr/local/fftw/3.3/lib64/../include
+LIBS = -L./ ${LESM} -L/usr/local/fftw/3.3/lib64 -lfftw3 -lfftw3_omp -lfftw3_mpi -SSL2BLAMP $(KLINK) -SCALAPACK -Kopenmp #ScaLAPACK+FFTW3.3
 ###########################################################################
 ###<< PLEASE CHANGE THE VARIABLES ABOVE ACCORDING TO YOUR ENVIRONMENT >>###
 ###########################################################################
@@ -56,7 +57,7 @@ m_FiniteElectricField.o \
 m_ES_ExactExchange.o \
 m_ES_WF_by_SDorCG.o      m_ES_WF_by_Davidson.o \
 m_ES_WF_by_ModifiedDavidson.o m_ES_WF_by_RMM.o \
-m_Charge_Density.o       m_CD_mixing.o m_Dipole.o               m_Orbital_Population.o \
+m_Charge_Density.o    m_Orbital_Population.o    m_CD_mixing.o m_Dipole.o               \
 m_CD_Mag_Moment.o \
 m_epc_potential.o        \
 m_Positron_Wave_Functions.o \
@@ -166,7 +167,7 @@ m_ES_occup.o             m_ES_WF_by_SDor
 m_ES_WF_by_ModifiedDavidson.o m_ES_WF_by_RMM.o \
 m_epc_potential.o \
 m_Positron_Wave_Functions.o \
-m_Charge_Density.o       m_CD_mixing.o m_Dipole.o               m_Orbital_Population.o \
+m_Charge_Density.o   m_Orbital_Population.o     m_CD_mixing.o m_Dipole.o               \
 m_CD_Mag_Moment.o \
 m_Screening_FFT.o        m_Screening.o      m_External_Potential.o \
 m_ES_LHXC.o              m_ES_Intgr_VlhxcQlm.o    m_ES_IO.o \
@@ -270,7 +271,7 @@ m_ES_LHXC.o              m_ES_Intgr_Vlhx
 m_ES_dos.o               m_Hubbard.o \
 m_epc_potential.o \
 m_vdWDF.o \
-m_Charge_Density.o       m_CD_mixing.o m_XC_Potential.o \
+m_Charge_Density.o    m_Orbital_Population.o    m_CD_mixing.o m_XC_Potential.o \
 m_CD_Mag_Moment.o \
 string.o m_db.o \
 m_PAW_Tecplot.o m_PAW_ChargeDensity.o m_PAW_Hartree.o  \
@@ -285,7 +286,6 @@ m_Ldos.o                 m_ELF.o \
 m_constraints.o \
 m_BerryPhase.o           m_BP_Properties.o \
 m_Representation.o       m_Raman.o       m_Phonon.o               m_Dipole.o \
-m_Orbital_Population.o   \
 m_Screening_FFT.o        m_Screening.o      m_External_Potential.o \
 m_Wannier.o m_Wannier90.o \
 m_ValenceBand_Spectrum.o   m_CoreLevel_Spectrum.o   \
@@ -378,7 +378,7 @@ m_ES_LHXC.o              m_ES_Intgr_Vlhx
 m_ES_dos.o               m_Hubbard.o \
 m_epc_potential.o \
 m_vdWDF.o \
-m_Charge_Density.o       m_CD_mixing.o m_XC_Potential.o \
+m_Charge_Density.o   m_Orbital_Population.o     m_CD_mixing.o m_XC_Potential.o \
 m_CD_Mag_Moment.o \
 string.o m_db.o \
 m_PAW_Tecplot.o m_PAW_ChargeDensity.o m_PAW_Hartree.o  \
@@ -394,7 +394,7 @@ m_Ldos.o                 m_ELF.o \
 m_constraints.o \
 m_BerryPhase.o           m_BP_Properties.o \
 m_Representation.o       m_Raman.o          m_Phonon.o               m_Dipole.o \
-m_Orbital_Population.o   m_Wannier.o m_Wannier90.o \
+m_Wannier.o m_Wannier90.o \
 m_Screening_FFT.o        m_Screening.o      m_External_Potential.o \
 m_Excitation.o \
 m_ES_occup_EPS.o        \
diff -uprN phase0_2015.01/src_phase_3d/Makefile.gfortran+scalapack+fftw+mpi phase0_2015.01.01/src_phase_3d/Makefile.gfortran+scalapack+fftw+mpi
--- phase0_2015.01/src_phase_3d/Makefile.gfortran+scalapack+fftw+mpi	2015-10-17 06:47:49.000000000 +0900
+++ phase0_2015.01.01/src_phase_3d/Makefile.gfortran+scalapack+fftw+mpi	2016-07-28 03:39:18.885026857 +0900
@@ -26,7 +26,7 @@ else
 CPPESM=
 endif
 
-CPPFLAGS = -DLinux -D_GNU_FORTRAN_ -DFFTW3 -D_MPIFFT_ -D_USE_DATE_AND_TIME_ -D_POT_SMOOTHING_ -DTRANSPOSE -DGGA_ATOMIC_WITH_NEW_GNCPP -DREMOVE_PC_FROM_FORCE -D_USE_LAPACK_ -D_USE_SCALAPACK_ -D_POSITRON_ -D_FAST_WAY_ -DUSE_NONBLK_COMM -Dforsafe -D_HEAP_SORT_ -DFFT_ALLTOALL -DPOST3D -DUSE_NONBLK_COMM ${CPPESM}
+CPPFLAGS = -DLinux -D_GNU_FORTRAN_ -DFFTW3 -D_MPIFFT_ -D_USE_DATE_AND_TIME_ -D_POT_SMOOTHING_ -DTRANSPOSE -DGGA_ATOMIC_WITH_NEW_GNCPP -DREMOVE_PC_FROM_FORCE -D_USE_LAPACK_ -D_USE_SCALAPACK_ -D_POSITRON_ -D_FAST_WAY_ -DUSE_NONBLK_COMM -Dforsafe -D_HEAP_SORT_ -DFFT_ALLTOALL -DPOST3D -DPAW3D -DUSE_NONBLK_COMM ${CPPESM}
 LFLAGS = 
 INCLUDE=-I/usr/local/fftw/include
 LAPACK_LIB = -L/usr/local/scalapack -lscalapack -L/usr/local/lapack -llapack -lblas
@@ -65,7 +65,7 @@ m_FiniteElectricField.o \
 m_ES_ExactExchange.o \
 m_ES_WF_by_SDorCG.o      m_ES_WF_by_Davidson.o \
 m_ES_WF_by_ModifiedDavidson.o m_ES_WF_by_RMM.o \
-m_Charge_Density.o       m_CD_mixing.o m_Dipole.o               m_Orbital_Population.o \
+m_Charge_Density.o   m_Orbital_Population.o     m_CD_mixing.o m_Dipole.o               \
 m_CD_Mag_Moment.o \
 m_epc_potential.o        \
 m_Positron_Wave_Functions.o \
@@ -175,7 +175,7 @@ m_ES_occup.o             m_ES_WF_by_SDor
 m_ES_WF_by_ModifiedDavidson.o m_ES_WF_by_RMM.o \
 m_epc_potential.o \
 m_Positron_Wave_Functions.o \
-m_Charge_Density.o       m_CD_mixing.o m_Dipole.o               m_Orbital_Population.o \
+m_Charge_Density.o    m_Orbital_Population.o    m_CD_mixing.o m_Dipole.o               \
 m_CD_Mag_Moment.o \
 m_Screening_FFT.o        m_Screening.o      m_External_Potential.o \
 m_ES_LHXC.o              m_ES_Intgr_VlhxcQlm.o    m_ES_IO.o \
@@ -279,7 +279,7 @@ m_ES_LHXC.o              m_ES_Intgr_Vlhx
 m_ES_dos.o               m_Hubbard.o \
 m_epc_potential.o \
 m_vdWDF.o \
-m_Charge_Density.o       m_CD_mixing.o m_XC_Potential.o \
+m_Charge_Density.o   m_Orbital_Population.o       m_CD_mixing.o m_XC_Potential.o \
 m_CD_Mag_Moment.o \
 string.o m_db.o \
 m_PAW_Tecplot.o m_PAW_ChargeDensity.o m_PAW_Hartree.o  \
@@ -294,7 +294,6 @@ m_Ldos.o                 m_ELF.o \
 m_constraints.o \
 m_BerryPhase.o           m_BP_Properties.o \
 m_Representation.o       m_Raman.o       m_Phonon.o               m_Dipole.o \
-m_Orbital_Population.o   \
 m_Screening_FFT.o        m_Screening.o      m_External_Potential.o \
 m_Wannier.o m_Wannier90.o \
 m_ValenceBand_Spectrum.o   m_CoreLevel_Spectrum.o   \
@@ -387,7 +386,7 @@ m_ES_LHXC.o              m_ES_Intgr_Vlhx
 m_ES_dos.o               m_Hubbard.o \
 m_epc_potential.o \
 m_vdWDF.o \
-m_Charge_Density.o       m_CD_mixing.o m_XC_Potential.o \
+m_Charge_Density.o   m_Orbital_Population.o       m_CD_mixing.o m_XC_Potential.o \
 m_CD_Mag_Moment.o \
 string.o m_db.o \
 m_PAW_Tecplot.o m_PAW_ChargeDensity.o m_PAW_Hartree.o  \
@@ -403,7 +402,7 @@ m_Ldos.o                 m_ELF.o \
 m_constraints.o \
 m_BerryPhase.o           m_BP_Properties.o \
 m_Representation.o       m_Raman.o          m_Phonon.o               m_Dipole.o \
-m_Orbital_Population.o   m_Wannier.o m_Wannier90.o \
+m_Wannier.o m_Wannier90.o \
 m_Screening_FFT.o        m_Screening.o      m_External_Potential.o \
 m_Excitation.o \
 m_ES_occup_EPS.o        \
diff -uprN phase0_2015.01/src_phase_3d/Makefile.ifort+mpi phase0_2015.01.01/src_phase_3d/Makefile.ifort+mpi
--- phase0_2015.01/src_phase_3d/Makefile.ifort+mpi	2015-10-18 13:03:06.000000000 +0900
+++ phase0_2015.01.01/src_phase_3d/Makefile.ifort+mpi	2016-07-28 03:14:00.645108340 +0900
@@ -26,15 +26,17 @@ else
 CPPESM=
 endif
 
-CPPFLAGS = -DLinux -DFFTW3 -D_MPIFFT_ -D_USE_DATE_AND_TIME_ -D_POT_SMOOTHING_ -DTRANSPOSE -DGGA_ATOMIC_WITH_NEW_GNCPP -DREMOVE_PC_FROM_FORCE -D_USE_LAPACK_ -D_USE_SCALAPACK_ -D_POSITRON_ -D_FAST_WAY_ -DUSE_NONBLK_COMM -Dforsafe -D_HEAP_SORT_ -DFFT_ALLTOALL -DPOST3D -DUSE_NONBLK_COMM ${CPPESM}
+CPPFLAGS = -DLinux -DFFTW3 -D_MPIFFT_ -D_USE_DATE_AND_TIME_ -D_POT_SMOOTHING_ -DTRANSPOSE -DGGA_ATOMIC_WITH_NEW_GNCPP -DREMOVE_PC_FROM_FORCE -D_USE_LAPACK_ -D_USE_SCALAPACK_ -D_POSITRON_ -D_FAST_WAY_ -DUSE_NONBLK_COMM -Dforsafe -D_HEAP_SORT_ -DFFT_ALLTOALL -DPOST3D -DPAW3D -DUSE_NONBLK_COMM ${CPPESM}
 LFLAGS = -mkl
 MKLROOT=/opt/intel/mkl
 INCLUDE=-I$(MKLROOT)/include/fftw
 # Intel MKL (intel64)
 LAPACK_LIB = -lmkl_scalapack_lp64 -lmkl_blacs_intelmpi_lp64 -lmkl_sequential
+#LAPACK_LIB = -lmkl_scalapack_lp64 -lmkl_blacs_openmpi_lp64 -lmkl_sequential
 #LAPACK_LIB = -L$(MKLROOT)/lib/intel64 -lmkl_scalapack_lp64 -lmkl_blacs_intelmpi_lp64 -lmkl_intel_lp64 -lmkl_sequential -lmkl_core
 # Intel MKL (ia32)
 #LAPACK_LIB = -lmkl_scalapack_core -lmkl_blacs_intelmpi -lmkl_sequential
+#LAPACK_LIB = -lmkl_scalapack_core -lmkl_blacs_openmpi -lmkl_sequential
 #LAPACK_LIB = -L$(MKLROOT)/lib/ia32 -lmkl_scalapack_core -lmkl_blacs_intelmpi -lmkl_intel -lmkl_sequential -lmkl_core
 #FFT_LIB = -L$(MKLROOT)/interfaces/fftw3xf -lfftw3xf_intel
 LIBS = -L. ${LESM} $(LAPACK_LIB) $(FFT_LIB)
@@ -71,7 +73,7 @@ m_FiniteElectricField.o \
 m_ES_ExactExchange.o \
 m_ES_WF_by_SDorCG.o      m_ES_WF_by_Davidson.o \
 m_ES_WF_by_ModifiedDavidson.o m_ES_WF_by_RMM.o \
-m_Charge_Density.o       m_CD_mixing.o m_Dipole.o               m_Orbital_Population.o \
+m_Charge_Density.o   m_Orbital_Population.o     m_CD_mixing.o m_Dipole.o               \
 m_CD_Mag_Moment.o \
 m_epc_potential.o        \
 m_Positron_Wave_Functions.o \
@@ -181,7 +183,7 @@ m_ES_occup.o             m_ES_WF_by_SDor
 m_ES_WF_by_ModifiedDavidson.o m_ES_WF_by_RMM.o \
 m_epc_potential.o \
 m_Positron_Wave_Functions.o \
-m_Charge_Density.o       m_CD_mixing.o m_Dipole.o               m_Orbital_Population.o \
+m_Charge_Density.o    m_Orbital_Population.o    m_CD_mixing.o m_Dipole.o               \
 m_CD_Mag_Moment.o \
 m_Screening_FFT.o        m_Screening.o      m_External_Potential.o \
 m_ES_LHXC.o              m_ES_Intgr_VlhxcQlm.o    m_ES_IO.o \
@@ -285,7 +287,7 @@ m_ES_LHXC.o              m_ES_Intgr_Vlhx
 m_ES_dos.o               m_Hubbard.o \
 m_epc_potential.o \
 m_vdWDF.o \
-m_Charge_Density.o       m_CD_mixing.o m_XC_Potential.o \
+m_Charge_Density.o   m_Orbital_Population.o       m_CD_mixing.o m_XC_Potential.o \
 m_CD_Mag_Moment.o \
 string.o m_db.o \
 m_PAW_Tecplot.o m_PAW_ChargeDensity.o m_PAW_Hartree.o  \
@@ -300,7 +302,6 @@ m_Ldos.o                 m_ELF.o \
 m_constraints.o \
 m_BerryPhase.o           m_BP_Properties.o \
 m_Representation.o       m_Raman.o       m_Phonon.o               m_Dipole.o \
-m_Orbital_Population.o   \
 m_Screening_FFT.o        m_Screening.o      m_External_Potential.o \
 m_Wannier.o m_Wannier90.o \
 m_ValenceBand_Spectrum.o   m_CoreLevel_Spectrum.o   \
@@ -393,7 +394,7 @@ m_ES_LHXC.o              m_ES_Intgr_Vlhx
 m_ES_dos.o               m_Hubbard.o \
 m_epc_potential.o \
 m_vdWDF.o \
-m_Charge_Density.o       m_CD_mixing.o m_XC_Potential.o \
+m_Charge_Density.o   m_Orbital_Population.o       m_CD_mixing.o m_XC_Potential.o \
 m_CD_Mag_Moment.o \
 string.o m_db.o \
 m_PAW_Tecplot.o m_PAW_ChargeDensity.o m_PAW_Hartree.o  \
@@ -409,7 +410,7 @@ m_Ldos.o                 m_ELF.o \
 m_constraints.o \
 m_BerryPhase.o           m_BP_Properties.o \
 m_Representation.o       m_Raman.o          m_Phonon.o               m_Dipole.o \
-m_Orbital_Population.o   m_Wannier.o m_Wannier90.o \
+m_Wannier.o m_Wannier90.o \
 m_Screening_FFT.o        m_Screening.o      m_External_Potential.o \
 m_Excitation.o \
 m_ES_occup_EPS.o        \
diff -uprN phase0_2015.01/src_phase_3d/Makefile.mpiifort+mkl phase0_2015.01.01/src_phase_3d/Makefile.mpiifort+mkl
--- phase0_2015.01/src_phase_3d/Makefile.mpiifort+mkl	2015-08-05 16:29:28.000000000 +0900
+++ phase0_2015.01.01/src_phase_3d/Makefile.mpiifort+mkl	2016-07-12 12:51:52.000000000 +0900
@@ -66,7 +66,7 @@ m_FiniteElectricField.o \
 m_ES_ExactExchange.o \
 m_ES_WF_by_SDorCG.o      m_ES_WF_by_Davidson.o \
 m_ES_WF_by_ModifiedDavidson.o m_ES_WF_by_RMM.o \
-m_Charge_Density.o       m_CD_mixing.o m_Dipole.o               m_Orbital_Population.o \
+m_Charge_Density.o   m_Orbital_Population.o     m_CD_mixing.o m_Dipole.o               \
 m_CD_Mag_Moment.o \
 m_epc_potential.o        \
 m_Positron_Wave_Functions.o \
@@ -176,7 +176,7 @@ m_ES_occup.o             m_ES_WF_by_SDor
 m_ES_WF_by_ModifiedDavidson.o m_ES_WF_by_RMM.o \
 m_epc_potential.o \
 m_Positron_Wave_Functions.o \
-m_Charge_Density.o       m_CD_mixing.o m_Dipole.o               m_Orbital_Population.o \
+m_Charge_Density.o    m_Orbital_Population.o    m_CD_mixing.o m_Dipole.o               \
 m_CD_Mag_Moment.o \
 m_Screening_FFT.o        m_Screening.o      m_External_Potential.o \
 m_ES_LHXC.o              m_ES_Intgr_VlhxcQlm.o    m_ES_IO.o \
@@ -280,7 +280,7 @@ m_ES_LHXC.o              m_ES_Intgr_Vlhx
 m_ES_dos.o               m_Hubbard.o \
 m_epc_potential.o \
 m_vdWDF.o \
-m_Charge_Density.o       m_CD_mixing.o m_XC_Potential.o \
+m_Charge_Density.o    m_Orbital_Population.o      m_CD_mixing.o m_XC_Potential.o \
 m_CD_Mag_Moment.o \
 string.o m_db.o \
 m_PAW_Tecplot.o m_PAW_ChargeDensity.o m_PAW_Hartree.o  \
@@ -295,7 +295,6 @@ m_Ldos.o                 m_ELF.o \
 m_constraints.o \
 m_BerryPhase.o           m_BP_Properties.o \
 m_Representation.o       m_Raman.o       m_Phonon.o               m_Dipole.o \
-m_Orbital_Population.o   \
 m_Screening_FFT.o        m_Screening.o      m_External_Potential.o \
 m_Wannier.o m_Wannier90.o \
 m_ValenceBand_Spectrum.o   m_CoreLevel_Spectrum.o   \
@@ -388,7 +387,7 @@ m_ES_LHXC.o              m_ES_Intgr_Vlhx
 m_ES_dos.o               m_Hubbard.o \
 m_epc_potential.o \
 m_vdWDF.o \
-m_Charge_Density.o       m_CD_mixing.o m_XC_Potential.o \
+m_Charge_Density.o    m_Orbital_Population.o      m_CD_mixing.o m_XC_Potential.o \
 m_CD_Mag_Moment.o \
 string.o m_db.o \
 m_PAW_Tecplot.o m_PAW_ChargeDensity.o m_PAW_Hartree.o  \
@@ -404,7 +403,7 @@ m_Ldos.o                 m_ELF.o \
 m_constraints.o \
 m_BerryPhase.o           m_BP_Properties.o \
 m_Representation.o       m_Raman.o          m_Phonon.o               m_Dipole.o \
-m_Orbital_Population.o   m_Wannier.o m_Wannier90.o \
+m_Wannier.o m_Wannier90.o \
 m_Screening_FFT.o        m_Screening.o      m_External_Potential.o \
 m_Excitation.o \
 m_ES_occup_EPS.o        \
diff -uprN phase0_2015.01/src_phase_3d/Makefile.tmpl phase0_2015.01.01/src_phase_3d/Makefile.tmpl
--- phase0_2015.01/src_phase_3d/Makefile.tmpl	2015-08-05 14:58:03.000000000 +0900
+++ phase0_2015.01.01/src_phase_3d/Makefile.tmpl	2016-07-12 12:51:52.000000000 +0900
@@ -18,7 +18,7 @@ m_FiniteElectricField.o \
 m_ES_ExactExchange.o \
 m_ES_WF_by_SDorCG.o      m_ES_WF_by_Davidson.o \
 m_ES_WF_by_ModifiedDavidson.o m_ES_WF_by_RMM.o \
-m_Charge_Density.o       m_CD_mixing.o m_Dipole.o               m_Orbital_Population.o \
+m_Charge_Density.o   m_Orbital_Population.o     m_CD_mixing.o m_Dipole.o               \
 m_CD_Mag_Moment.o \
 m_epc_potential.o        \
 m_Positron_Wave_Functions.o \
@@ -128,7 +128,7 @@ m_ES_occup.o             m_ES_WF_by_SDor
 m_ES_WF_by_ModifiedDavidson.o m_ES_WF_by_RMM.o \
 m_epc_potential.o \
 m_Positron_Wave_Functions.o \
-m_Charge_Density.o       m_CD_mixing.o m_Dipole.o               m_Orbital_Population.o \
+m_Charge_Density.o    m_Orbital_Population.o    m_CD_mixing.o m_Dipole.o               \
 m_CD_Mag_Moment.o \
 m_Screening_FFT.o        m_Screening.o      m_External_Potential.o \
 m_ES_LHXC.o              m_ES_Intgr_VlhxcQlm.o    m_ES_IO.o \
@@ -232,7 +232,7 @@ m_ES_LHXC.o              m_ES_Intgr_Vlhx
 m_ES_dos.o               m_Hubbard.o \
 m_epc_potential.o \
 m_vdWDF.o \
-m_Charge_Density.o       m_CD_mixing.o m_XC_Potential.o \
+m_Charge_Density.o   m_Orbital_Population.o       m_CD_mixing.o m_XC_Potential.o \
 m_CD_Mag_Moment.o \
 string.o m_db.o \
 m_PAW_Tecplot.o m_PAW_ChargeDensity.o m_PAW_Hartree.o  \
@@ -247,7 +247,6 @@ m_Ldos.o                 m_ELF.o \
 m_constraints.o \
 m_BerryPhase.o           m_BP_Properties.o \
 m_Representation.o       m_Raman.o       m_Phonon.o               m_Dipole.o \
-m_Orbital_Population.o   \
 m_Screening_FFT.o        m_Screening.o      m_External_Potential.o \
 m_Wannier.o m_Wannier90.o \
 m_ValenceBand_Spectrum.o   m_CoreLevel_Spectrum.o   \
@@ -340,7 +339,7 @@ m_ES_LHXC.o              m_ES_Intgr_Vlhx
 m_ES_dos.o               m_Hubbard.o \
 m_epc_potential.o \
 m_vdWDF.o \
-m_Charge_Density.o       m_CD_mixing.o m_XC_Potential.o \
+m_Charge_Density.o   m_Orbital_Population.o       m_CD_mixing.o m_XC_Potential.o \
 m_CD_Mag_Moment.o \
 string.o m_db.o \
 m_PAW_Tecplot.o m_PAW_ChargeDensity.o m_PAW_Hartree.o  \
@@ -356,7 +355,7 @@ m_Ldos.o                 m_ELF.o \
 m_constraints.o \
 m_BerryPhase.o           m_BP_Properties.o \
 m_Representation.o       m_Raman.o          m_Phonon.o               m_Dipole.o \
-m_Orbital_Population.o   m_Wannier.o m_Wannier90.o \
+m_Wannier.o m_Wannier90.o \
 m_Screening_FFT.o        m_Screening.o      m_External_Potential.o \
 m_Excitation.o \
 m_ES_occup_EPS.o        \
diff -uprN phase0_2015.01/src_phase_3d/Move_Ions.F90 phase0_2015.01.01/src_phase_3d/Move_Ions.F90
--- phase0_2015.01/src_phase_3d/Move_Ions.F90	2015-08-05 14:58:03.000000000 +0900
+++ phase0_2015.01.01/src_phase_3d/Move_Ions.F90	2016-07-12 12:51:52.000000000 +0900
@@ -1,6 +1,6 @@
 !=======================================================================
 !
-!  PROGRAM  PHASE/0 2015.01 ($Rev: 440 $)
+!  PROGRAM  PHASE/0 2015.01 ($Rev: 482 $)
 !
 !  SUBROUINE:  Move_Ions, wd_cps_and_forces
 !
@@ -33,11 +33,11 @@
 !   The activity of development of this program set has been supervised by Takahisa Ohno.
 !
 subroutine Move_Ions
-! $Id: Move_Ions.F90 440 2015-08-03 07:18:01Z ktagami $
+! $Id: Move_Ions.F90 482 2016-04-08 08:40:45Z jkoga $
   use m_Control_Parameters, only : iprimd, c_iteration2GDIIS &
        &                         , m_CtrlP_what_is_mdalg &
        &                         , m_CtrlP_set_gdiisoptmode &
-       &                         , sw_charge_predictor,sw_wf_predictor,sw_rspace
+       &                         , sw_charge_predictor,sw_wf_predictor,sw_rspace,af
   use m_Const_Parameters, only   : DP, TEMPERATURE_CONTROL, VERLET &
        &, BLUEMOON, QUENCHED_CONSTRAINT, QUENCHED_MD, NORMAL_MODE_ANALYSIS &
        &, HYPERPLANE_ADAPTIVE_COORDINATE, HAC, T_CONTROL, GDIIS, ORDINA &
@@ -50,7 +50,8 @@ subroutine Move_Ions
        &                   , m_IS_cp_cps2cpo,m_IS_wd_pos_and_v &
        &                   , m_IS_phonon_force, m_IS_cg, m_IS_cg2 &
        &                   , m_IS_evaluate_v_verlet &
-       &                   , m_IS_update_cps_history
+       &                   , m_IS_update_cps_history &
+       &                   , m_IS_force_af_symmetry
 !!$       &                   , forcmx_constraint_quench, almda, mdmode &
   use m_Force,        only : forc_l, forcmx
   use m_IterationNumbers, only : iteration_ionic,iteration
@@ -150,6 +151,8 @@ subroutine Move_Ions
   endif
 ! ============================================================== 13.0B
 
+  if (af/=0) call m_IS_force_af_symmetry(nfout)
+
   if(iprimd >= 2) call m_IS_wd_forc(forc_l)
 
   if(sw_rspace==ON)then
diff -uprN phase0_2015.01/src_phase_3d/NEB.F90 phase0_2015.01.01/src_phase_3d/NEB.F90
--- phase0_2015.01/src_phase_3d/NEB.F90	2015-08-05 14:58:03.000000000 +0900
+++ phase0_2015.01.01/src_phase_3d/NEB.F90	2016-07-12 12:51:52.000000000 +0900
@@ -106,6 +106,9 @@ subroutine do_neb()
         if (.not.pp_generated) then
            call PseudoPotential_Construction
            pp_generated = .true.
+#ifdef ENABLE_ESM_PACK
+           call Preparation_for_ESM()
+#endif
         else
            if ( flg_paw ) then
               if ( itr > 1 .or. neb%cond%condition == 1 ) then
diff -uprN phase0_2015.01/src_phase_3d/Postprocessing.F90 phase0_2015.01.01/src_phase_3d/Postprocessing.F90
--- phase0_2015.01/src_phase_3d/Postprocessing.F90	2015-08-05 14:58:03.000000000 +0900
+++ phase0_2015.01.01/src_phase_3d/Postprocessing.F90	2016-07-12 12:51:52.000000000 +0900
@@ -50,7 +50,7 @@
 !
 !$$#ifndef PARA3D
 subroutine Postprocessing(ignore_convergence)
-! $Id: Postprocessing.F90 440 2015-08-03 07:18:01Z ktagami $
+! $Id: Postprocessing.F90 489 2016-05-24 04:15:54Z ktagami $
   use m_Const_Parameters, only :   DP, ON, OFF, FORCE_CONVERGED, INITIAL, CONTINUATION &
        &                         , FIXED_CHARGE, FIXED_CHARGE_CONTINUATION &
        &                         , Gauss_distrib_func, EK, SCF &
@@ -86,7 +86,7 @@ subroutine Postprocessing(ignore_converg
   use m_Ionic_System,       only : natm, natm2, ityp, iwei, iatomn, pos &
        &                         , m_IS_pack_all_ions_in_uc &
        &                         , m_IS_set_natm_prim,m_IS_set_napt_prim
-  use m_PseudoPotential,    only : ival
+  use m_PseudoPotential,    only : ival, flg_paw
   use m_Files, only :              nfout,nfdos,nfchr,nfwfk,nfldos,nfelf &
        &                         , nfvlc, nfcntn_bin_stm &
        &                         , m_Files_open_nfdos, m_Files_open_nfchr &
@@ -184,7 +184,7 @@ subroutine Postprocessing(ignore_converg
 
 
   use m_Control_Parameters, only : sw_wf_squared_rspace, charge_filetype, &
-       &                           sw_wf_integ_moment
+       &                           sw_wf_integ_moment,  sw_calc_contact_density
 
   implicit none
 
@@ -253,7 +253,6 @@ subroutine Postprocessing(ignore_converg
         end if
      end if
 
-! ============== KT_add ====================== 13.0E
      if ( m_CtrlP_way_of_smearing() == Fermi_Dirac ) then
         if ( ekmode == ON ) then
            call m_ESoc_count_charge_belowEF_ek( nfout )
@@ -261,28 +260,19 @@ subroutine Postprocessing(ignore_converg
            call m_ESoc_count_charge_belowEF( nfout )
         endif
      endif
-! ============================================ 13.0E
 
 ! -------- Population ----
 
 ! --------- DOS ------
-!
-! ================================ modified by K. Tagami ============ 11.0
-!!     if(sw_dos == ON) call calc_totaldos()
-                       ! contained in this file, calculate total dos
-!
      if (sw_dos == ON) then
            call m_ESdos_alloc_dos_weight()
            call calc_totaldos()
      end if
-! =================================================================== 11.0
 
 ! ---- LDOS ----------
 !
      if(sw_ldos == ON) then
-! =================== added by K. Tagami =============== 11.0 
            call calc_localdos()
-! ======================================================= 11.0
      end if
 
 ! ------ Charge Distrib ----
@@ -301,7 +291,6 @@ subroutine Postprocessing(ignore_converg
 ! ------ STM or Work function ---
 !
      if (sw_fine_STM_simulation == ON) then
-!!$           call write_potential_for_STM_noncl
            call write_potential_for_STM
      endif
 
@@ -445,7 +434,6 @@ contains
   end subroutine calc_totaldos
 
 
-! ===================== added by K. Tagami ====================== 11.0
   subroutine calc_localdos()
 
     if (ekmode == OFF) then
@@ -526,9 +514,7 @@ contains
 
   end subroutine calc_localdos
 
-! ============================================================== 11.0
 
-! ========================= added by K. Tagami =============== 11.0
   subroutine calc_spatial_chg_distrib
     integer :: iloop, iloop2
 
@@ -663,9 +649,8 @@ contains
 
   end subroutine calc_partial_charge
 
-! ============================================================ 11.0
 
-! ============================ added by K. Tagami ================= 11.0
+
   subroutine write_potential_for_STM
     integer :: ismax
 
@@ -720,14 +705,9 @@ contains
   end subroutine write_potential_for_STM
 
 
-! ================================================================= 11.0
 
-!================================== modified by K. Tagami ========== 11.0
-!  subroutine wd_fine_STM_parameters()
-!
   subroutine wd_fine_STM_parameters( ismax )
     integer, intent(in) :: ismax
-! ================================================================= 11.0
 
     integer :: ik
     write(nfout,'(" !!STM:    kg(kng)   = ",i8)') kg
@@ -770,12 +750,8 @@ contains
     end do
   end subroutine check_neordr_nrvf_ordr
 
-! ====================================== modified by K. Tagami ========== 11.0
-!  subroutine wd_ArraySize_Parameters_For_STM(nf_bin)
-!
   subroutine wd_ArraySize_Parameters_For_STM( nf_bin, ismax )
     integer, intent(in) :: ismax
-! ======================================================================== 11.0
 
     integer, intent(in) :: nf_bin
     if(mype == 0) then
diff -uprN phase0_2015.01/src_phase_3d/Preparation.F90 phase0_2015.01.01/src_phase_3d/Preparation.F90
--- phase0_2015.01/src_phase_3d/Preparation.F90	2015-09-14 15:36:00.000000000 +0900
+++ phase0_2015.01.01/src_phase_3d/Preparation.F90	2016-07-12 12:51:52.000000000 +0900
@@ -1,6 +1,6 @@
 !=======================================================================
 !
-!  PROGRAM  PHASE/0 2015.01 ($Rev: 458 $)
+!  PROGRAM  PHASE/0 2015.01 ($Rev: 474 $)
 !
 !  SUBROUINE:  fft_box_finding_way, Preparation, Preparation_ek
 !
@@ -36,8 +36,17 @@
 #define _INCLUDE_EXX_
 #endif
 
+#ifdef FJ_TIMER
+#   define __TIMER_FJ_START_w_BARRIER(str,a)   call mpi_barrier(str,ierr) ;   call timer_sta(a)
+#   define __TIMER_FJ_START(a)   call timer_sta(a)
+#   define __TIMER_FJ_STOP(a)    call timer_end(a)
+#else
+#   define __TIMER_FJ_START(a)
+#   define __TIMER_FJ_STOP(a)
+#endif
+
 subroutine Preparation()
-! $Id: Preparation.F90 458 2015-09-09 06:05:42Z ktagami $
+! $Id: Preparation.F90 474 2016-02-09 09:32:53Z yamasaki $
   use m_Const_Parameters,  only:DP,FILE,GENERAL,OUTER,INNER,SIMPLE_CUBIC &
        &                       ,HEXAGONAL,TETRAHEDRON &
        &                       ,INITIAL,CONTINUATION,FIXED_CHARGE &
@@ -190,6 +199,7 @@ subroutine Preparation()
 
   use m_ES_WF_by_MatDiagon, only : m_ESmat_set_reduced_basis_mode
 
+
   implicit none
   integer :: outer_or_inner
   include 'mpif.h'
@@ -231,15 +241,10 @@ subroutine Preparation()
   if(symmetry_method == AUTOMATIC) then
      call m_CS_SG_auto_gnrt_sym_op(.true.,nfout) ! -(m_CS_SpaceGroup) -> nopr,af
   else
-#ifdef FJ_TIMER
-!                    call mpi_barrier(mpi_comm_group, ierr)
-                    call timer_sta(31)
-#endif
+                                                     __TIMER_FJ_START(31)
      call m_CS_gnrt_symmetry_operations(.true.,nfout) ! -(m_Crystal_Structure) -> nopr,af
 !!$     call m_CS_gnrt_symm_operators_tl(.true.,nfout) ! -(m_Crystal_Structure) -> nopr,af
-#ifdef FJ_TIMER
-                    call timer_end(31)
-#endif
+                                                     __TIMER_FJ_STOP(31)
   end if
   call m_CS_alloc_op_tau(nfout)
   call m_CS_alloc_op_tau_tl(nfout)
@@ -247,15 +252,10 @@ subroutine Preparation()
      call m_CS_SG_auto_gnrt_sym_op(paramset,nfout) ! paramset == .false.
      call m_IS_symmetrize_atom_pos(nfout) ! -> cps,pos
   else
-#ifdef FJ_TIMER
-!                    call mpi_barrier(mpi_comm_group, ierr)
-                    call timer_sta(31)
-#endif
+                                                     __TIMER_FJ_START(31)
      call m_CS_gnrt_symmetry_operations(paramset,nfout) ! paramset == .false.
      call m_CS_gnrt_symm_operators_tl(paramset,nfout) ! -(m_Crystal_Structure) -> nopr,af
-#ifdef FJ_TIMER
-                    call timer_end(31)
-#endif
+                                                     __TIMER_FJ_STOP(31)
   end if
 
   call m_CS_SG_print_space_group_name(nfout)
@@ -284,15 +284,10 @@ subroutine Preparation()
      call m_IS_inv_sym_off(nfout) ! -> inversion_symmetry
   end if
   call m_IS_alloc_napt()
-#ifdef FJ_TIMER
-!                    call mpi_barrier(mpi_comm_group, ierr)
-                    call timer_sta(31)
-#endif
+                                                     __TIMER_FJ_START(31)
   call m_CS_wd_op_and_tau(nfout)
   call m_IS_symm_check_of_pos()
-#ifdef FJ_TIMER
-                    call timer_end(31)
-#endif
+                                                     __TIMER_FJ_STOP(31)
 
   if(ekmode /= GRID .and. sw_phonon == ON) then
      call m_Phonon_alloc_qvec()
@@ -338,14 +333,9 @@ subroutine Preparation()
 ! ============================================================ 13.0F
 #endif
 
-#ifdef FJ_TIMER
-!                    call mpi_barrier(mpi_comm_group, ierr)
-                    call timer_sta(41)
-#endif
+                                                     __TIMER_FJ_START(41)
   call m_pwBS_generate_G_vectors_3D()    ! ->n_rGv,n_rGpv ->kgp
-#ifdef FJ_TIMER
-                    call timer_end(41)
-#endif
+                                                     __TIMER_FJ_STOP(41)
   call m_Parallel_init_mpi_kngp_3D(nfout,ipriparallel,kgp)  ! -(m_Parallelization) ->ista_kngp,iend_kngp
   call m_pwBS_set_ngabc_kngp_l_3D
 
@@ -354,14 +344,9 @@ subroutine Preparation()
        & call m_Files_open_kpoint_files(way_ksample,nbztyp_spg)  
 
   call m_pwBS_alloc_ngpt_igfp_gr_3D()
-#ifdef FJ_TIMER
-!                    call mpi_barrier(mpi_comm_group, ierr)
-                    call timer_sta(28)
-#endif
+                                                     __TIMER_FJ_START(28)
   call m_pwBS_calc_length_of_G_3D()         ! -> gr_l
-#ifdef FJ_TIMER
-                    call timer_end(28)
-#endif
+                                                     __TIMER_FJ_STOP(28)
   call m_pwBS_G_trans_functions_3D()   ! -> ngpt_l: Set of G-vectors translated according to symmetry operations
 
   ggacmp_parallel_rev = ggacmp_parallel
@@ -398,37 +383,22 @@ subroutine Preparation()
 
      if(icond == PREPARATION_ONLY .or. icond == INITIAL .or. icond == CONTINUATION .or. &
         & icond==COORDINATE_CONTINUATION) then
-#ifdef FJ_TIMER
-!                    call mpi_barrier(mpi_comm_group, ierr)
-                    call timer_sta(28)
-#endif
+                                                     __TIMER_FJ_START(28)
         call m_pwBS_for_each_WF(preallocation=paramset) ! -> kg1, nbase,iba (when paramset==.false.)
-#ifdef FJ_TIMER
-                    call timer_end(28)
-#endif
+                                                     __TIMER_FJ_STOP(28)
      else if(icond == FIXED_CHARGE .or. icond == FIXED_CHARGE_CONTINUATION) then
 !!$        if(ekmode == OFF)&
 !!$             & stop ' ! combination of ekmode and icond is illegal (Preparation)'
         call m_Files_close_files_initial0()
         call m_Files_open_nfeng(icond)
         if(ekmode == OFF .and. fixed_charge_k_parallel == ALL_AT_ONCE) then
-#ifdef FJ_TIMER
-!                    call mpi_barrier(mpi_comm_group, ierr)
-                    call timer_sta(28)
-#endif
+                                                     __TIMER_FJ_START(28)
            call m_pwBS_for_each_WF(preallocation=paramset) ! -> kg1, nbase,iba (when paramset==.false.)
-#ifdef FJ_TIMER
-                    call timer_end(28)
-#endif
+                                                     __TIMER_FJ_STOP(28)
         else 
-#ifdef FJ_TIMER
-!                    call mpi_barrier(mpi_comm_group, ierr)
-                    call timer_sta(28)
-#endif
+                                                     __TIMER_FJ_START(28)
            call m_pwBS_for_each_WF(preallocation=.true.) ! -> kg1, iba
-#ifdef FJ_TIMER
-                    call timer_end(28)
-#endif
+                                                     __TIMER_FJ_STOP(28)
            call m_Kp_alloc_kpoints_ek()  ! -> kv3_ek (=kv3), allocate(vkxyz_ek,qwgt_ek)
            call m_Kp_cp_vkxyz_to_vkxyz_ek()
            if(fixed_charge_k_parallel == ONE_BY_ONE) then
@@ -440,14 +410,9 @@ subroutine Preparation()
            call m_pwBS_cp_iba_to_iba_ek()
            if(icond == FIXED_CHARGE_CONTINUATION) &
                 & call m_ES_cp_iconv(numk_tmp,iconv_ek_tmp)
-#ifdef FJ_TIMER
-!                    call mpi_barrier(mpi_comm_group, ierr)
-                    call timer_sta(28)
-#endif
+                                                     __TIMER_FJ_START(28)
            call m_pwBS_for_each_WF(preallocation=.false.) ! -> kg1, iba
-#ifdef FJ_TIMER
-                    call timer_end(28)
-#endif
+                                                     __TIMER_FJ_STOP(28)
         end if
      else
         stop ' icond is illegal (Preparation)'
@@ -463,6 +428,9 @@ subroutine Preparation()
         call m_pwBS_exxCD()
      endif
 ! ========== ======================================= 13.0F
+     if(ipri>=1) write(nfout,'(" _INCLUDE_EXX_ is defined")')
+#else
+     if(ipri>=1) write(nfout,'(" _INCLUDE_EXX_ is not defined")')
 #endif
 
      if(sw_ldos == ON) then
diff -uprN phase0_2015.01/src_phase_3d/Preparation_for_ESM.F90 phase0_2015.01.01/src_phase_3d/Preparation_for_ESM.F90
--- phase0_2015.01/src_phase_3d/Preparation_for_ESM.F90	2015-08-05 14:58:03.000000000 +0900
+++ phase0_2015.01.01/src_phase_3d/Preparation_for_ESM.F90	2016-07-12 12:51:52.000000000 +0900
@@ -9,7 +9,7 @@ subroutine Preparation_for_ESM()
     use m_FFT, only : fft_box_size_CD
     use m_Crystal_Structure, only : altv
     use m_Parallelization, only : npes,mype, ista_kngp, iend_kngp, mpi_ke_world
-    use m_PlaneWaveBasisSet, only : ngabc_kngp_l,igfp_l
+    use m_PlaneWaveBasisSet, only : ngabc_kngp_l,igfp_l,kgp
 
     implicit none
     include 'mpif.h'
@@ -32,25 +32,29 @@ subroutine Preparation_for_ESM()
     do i=1,natm
        ival_at(i) = ival(ityp(i))
     enddo
-    allocate(ngabc_esm(3,1:iend_kngp-ista_kngp+1));ngabc_esm=0
+!    allocate(ngabc_esm(3,1:iend_kngp-ista_kngp+1));ngabc_esm=0
+    allocate(ngabc_esm(3,1:kgp));ngabc_esm=0
     do i=1,3
        do j=ista_kngp,iend_kngp
-          ngabc_esm(i,j-ista_kngp+1) = ngabc_kngp_l(j,i)
+          ngabc_esm(i,j) = ngabc_kngp_l(j,i)
        enddo
     enddo
+    call mpi_allreduce(mpi_in_place,ngabc_esm,3*kgp,mpi_integer,mpi_sum,mpi_ke_world,ierr)
     allocate(cps_tmp(3,natm))
     do i=1,3
        do j=1,natm
           cps_tmp(i,j) = cps(j,i)
        enddo
     enddo
-    allocate(igfp_l_esm(1:iend_kngp-ista_kngp+1));igfp_l_esm=0
+    allocate(igfp_l_esm(1:kgp));igfp_l_esm=0
     do i=ista_kngp,iend_kngp
-       igfp_l_esm(i-ista_kngp+1) = igfp_l(i)
+       igfp_l_esm(i) = igfp_l(i)
     enddo
+    call mpi_allreduce(mpi_in_place,igfp_l_esm,kgp,mpi_integer,mpi_sum,mpi_ke_world,ierr)
+
     call Esm_interface_map_parameters(natm,ival_at,cps_tmp,1.0d0,altv, &
     & fft_box_size_CD(1,0),fft_box_size_CD(2,0),fft_box_size_CD(3,0),  &
-    & esm_bc_c,.false.,iend_kngp-ista_kngp+1,nspin,ngabc_esm,          &
+    & esm_bc_c,.false.,kgp,nspin,ngabc_esm,                            &
     & igfp_l_esm,igfp_l_esm,esm_w,2.0d0*esm_e_field, esm_izwall,       &
     & esm_z_wall, esm_bar_height,esm_bar_width)
     call Esm_interface_set_communicator(mpi_ke_world)
diff -uprN phase0_2015.01/src_phase_3d/Preparation_for_mpi.F90 phase0_2015.01.01/src_phase_3d/Preparation_for_mpi.F90
--- phase0_2015.01/src_phase_3d/Preparation_for_mpi.F90	2015-08-05 14:58:03.000000000 +0900
+++ phase0_2015.01.01/src_phase_3d/Preparation_for_mpi.F90	2016-07-12 12:51:52.000000000 +0900
@@ -40,7 +40,7 @@ subroutine Preparation_for_mpi(prepare_c
   use m_Files,                only : nfout
   use m_Control_Parameters,   only : ipriparallel,nspin,neg,printable,ngnode_nbmx &
        &                           , flag_mpi_g_dot_r,flag_mpi_g_dot_r_k &
-       &                           , icond, ekmode, fixed_charge_k_parallel, sw_rsb &
+       &                           , icond, ekmode, fixed_charge_k_parallel, sw_rsb, neg_is_given &
        &                           , m_CtrlP_flag_mpi_G_dot_R, fftbox_divide_cube &
        &                           , fftbox_3ddiv_1, fftbox_3ddiv_2, fftbox_3ddiv_3 &
        &                           , fftbox_div_1, fftbox_div_2, sw_fft_xzy
@@ -100,10 +100,12 @@ subroutine Preparation_for_mpi(prepare_c
 
   if(prepare_communicators==ON)then
 
-     call m_Parallel_init_mpi_elec_3D(nfout,ipriparallel,printable,neg,kv3,nspin,kg1,iba)
-     call make_index_band_3D(nfout,ipriparallel,printable,kv3,neg &
-          & , nblocksize_mgs,nblocksize_mgs_is_given,nblocksize_mgs_default)
-     call make_index_band_for_Gdiv_3D(neg, nblocksize_mgs,nblocksize_mgs_is_given,nblocksize_mgs_default)
+!!$     if(neg_is_given) then
+        call m_Parallel_init_mpi_elec_3D(nfout,ipriparallel,printable,neg,kv3,nspin,kg1,iba)
+        call make_index_band_3D(nfout,ipriparallel,printable,kv3,neg &
+             & , nblocksize_mgs,nblocksize_mgs_is_given,nblocksize_mgs_default)
+        call make_index_band_for_Gdiv_3D(neg, nblocksize_mgs,nblocksize_mgs_is_given,nblocksize_mgs_default)
+!!$     end if
 #ifdef FFT_3D_DIVISION
      call m_Parallel_mpi_fft_box_3div(nfout,ipriparallel,printable,fft_box_size_WF,kimg, &
     &                            fftbox_3ddiv_1, fftbox_3ddiv_2,fftbox_3ddiv_3)
diff -uprN phase0_2015.01/src_phase_3d/PseudoPotential_Construction.F90 phase0_2015.01.01/src_phase_3d/PseudoPotential_Construction.F90
--- phase0_2015.01/src_phase_3d/PseudoPotential_Construction.F90	2015-08-05 14:58:03.000000000 +0900
+++ phase0_2015.01.01/src_phase_3d/PseudoPotential_Construction.F90	2016-07-12 12:51:52.000000000 +0900
@@ -1,6 +1,6 @@
 !=======================================================================
 !
-!  PROGRAM  PHASE/0 2015.01 ($Rev: 443 $)
+!  PROGRAM  PHASE/0 2015.01 ($Rev: 483 $)
 !
 !  SUBROUINE:  check_gncpp_type, PP_construction_paramset
 !             PseudoPotential_Construction, PseudoPotential_ek
@@ -38,7 +38,7 @@
 !   The activity of development of this program set has been supervised by Takahisa Ohno.
 !
 subroutine PseudoPotential_Construction
-! $Id: PseudoPotential_Construction.F90 443 2015-08-03 13:03:48Z ktagami $
+! $Id: PseudoPotential_Construction.F90 483 2016-04-27 04:02:48Z ktagami $
   use m_PseudoPotential,      only : m_PP_alloc0_ps_ntyp,   m_PP_alloc_ps_ntyp &
        &                           , m_PP_dealloc_ps_ntyp  &
        &                           , m_PP_set_mmesh,        m_PP_set_nloc &
@@ -63,6 +63,7 @@ subroutine PseudoPotential_Construction
        &                           , m_PP_check_file_format
   use m_PseudoPotential,      only : nlmta,nlmt,lmta,ilmt
   use m_Parallelization,      only : m_Parallel_init_mpi_nlmta_3D
+  use m_Parallelization,      only : m_Parallel_init_mpi_kngp_exx
   use m_PseudoPotential, only : ae_wavefunctions_are_detected
   use m_PseudoPotential, only : m_PP_alloc_qitg_wan, m_PP_set_dk_wan &
                               , m_PP_alloc_qitg_fef, m_PP_set_dk_fef
@@ -88,8 +89,8 @@ subroutine PseudoPotential_Construction
        &                           , num_projectors, projector_type, intzaj &
        &                           , sw_wannier, sw_berry_phase, corecharge_cntnbin, sw_fef &
        &                           , ekmode, fixed_charge_k_parallel, continuation_using_ppdata &
-       &                           , m_CtrlP_set_ppprinted,sw_rspace, sw_rspace_hyb, sw_hybrid_functional &
-       &                           , sw_ldos, sw_rspace_ldos, m_CtrlP_rspace_integ_all_OK
+       &                           , m_CtrlP_set_ppprinted,sw_rspace, sw_hybrid_functional &
+       &                           , sw_ldos, sw_rspace_ldos, m_CtrlP_rspace_integ_all_OK,nmax_G_hyb
   use m_PlaneWaveBasisSet,    only : kgp,gr_l,ngshell,ngshell_range
   use m_Const_Parameters,     only : OLD,ON, SPHERICAL_HARMONICS, NO
   use m_Files,                only : m_Files_open_ps_files,m_Files_close_ps_files,nfcntn_bin &
@@ -132,7 +133,7 @@ subroutine PseudoPotential_Construction
        &                          , m_OP_Qnum_alloc_orb_ind_data
 ! ============== 2014/08/01
 
-  use m_Realspace, only : m_RS_resolve_mesh_soft
+  use m_Realspace, only : m_RS_resolve_mesh_soft,m_RS_resolve_mesh_hard, m_RS_build_qr_clm_ylm
 
   use m_ES_ExactExchange, only : m_ES_EXX_set_nmax_G_hyb
 
@@ -154,6 +155,7 @@ subroutine PseudoPotential_Construction
   if(sw_hybrid_functional == ON) then
      call m_CtrlP_set_hybrid_parameters
      call m_ES_EXX_set_nmax_G_hyb
+     call m_Parallel_init_mpi_kngp_exx(nfout,ipriparallel,nmax_G_hyb)
   endif
 ! ======================================================== 12.5Exp
 
@@ -357,26 +359,26 @@ subroutine PseudoPotential_Construction
           &  .and. ekmode==ON ) then
 ! =================13.0S
 
-! ========================================== modified by K. Tagami ========= 0.2
-!        call m_NLP_betar_dot_PWs(nfout,kv3,vkxyz) !(kbint) --> snl
-! ==========================================================================
+#ifdef EXP_WFN_PROJ
+        call m_NLP_betar_dot_PWs(nfout,kv3,vkxyz) !(kbint) --> snl
+#endif
         if(sw_orb_popu == ON) then
            call m_PP_make_qorb(nfout)
-! ========================================== modified by K. Tagami ========= 0.2
-!           call m_NLP_phir_dot_PWs(nfout,kv3,vkxyz) !(kbint) --> phig
-! ===========================================================================
+#ifdef EXP_WFN_PROJ
+           call m_NLP_phir_dot_PWs(nfout,kv3,vkxyz) !(kbint) --> phig
+#endif
            call m_PP_cnstrct_crotylm(nfout) !-> crotylm
         end if
 
         if(sw_use_add_proj == ON) then
-! ========================================== modified by K. Tagami ========= 0.2
-!           call m_NLP_add_betar_dot_PWs(nfout,kv3,vkxyz) !(kbint) --> snl_add
-! ===========================================================================
+#ifdef EXP_WFN_PROJ
+           call m_NLP_add_betar_dot_PWs(nfout,kv3,vkxyz) !(kbint) --> snl_add
+#endif
         end if
         if(intzaj == by_pseudo_atomic_orbitals) then
-! ========================================== modified by K. Tagami ========= 0.2
-!           call m_NLP_paor_dot_PWs(nfout,kv3,vkxyz) !(kbint) --> paog
-! ===========================================================================
+#ifdef EXP_WFN_PROJ
+           call m_NLP_paor_dot_PWs(nfout,kv3,vkxyz) !(kbint) --> paog
+#endif
         end if
 
         if(num_projectors > 0) then
@@ -570,4 +572,95 @@ contains
         
 end subroutine PseudoPotential_Construction
 
+subroutine Check_of_Pseudopotential
+  use m_Const_Parameters,     only : DP, OLD
+  use m_Files,                only : nfout,nfpot &
+       &                           , m_Files_open_ps_file, m_Files_close_ps_file
+#ifndef ENABLE_ESM_PACK
+  use m_Control_Parameters,   only : ipripp &
+#else
+  use m_Control_Parameters,   only : ipripp, esm_qbac &
+#endif
+      & , neg,m_CtrlP_set_neg_properly, m_CntrlP_set_neg, m_CntrlP_set_meg
+  use m_Crystal_Structure,    only : additional_charge
+  use m_Ionic_System,         only : ntyp, iatom, iatomn, ivan, zeta1, qex
+  use m_PseudoPotential,      only : m_PP_rd_ival
+  use m_Parallelization,      only : mpi_comm_group
+  use m_Ionic_System,         only : natm,ionic_charge_atomtyp, ionic_charge_atoms &
+       &                           , mag_moment0_atoms_is_defined
+  implicit none
+  include 'mpif.h'                                      ! MPI
+  integer :: it, ierror, nfpp
+  real(kind=DP),allocatable,dimension(:)   :: ivalt  ! d(ntyp) #valence electrons
+  real(kind=DP) :: totch_t, ival
+  allocate(ivalt(ntyp))
+  nfpp = 0
+  do it = 1, ntyp
+     call m_Files_open_ps_file(ivan,iatomn,ntyp,it,ierror)
+     if(ierror /= 0) call mpi_stop(nfout)
+     if(ipripp >= 2) write(nfout,'(" !! PP_Construction: it = ",i3)') it
+     if(ivan(it) /= OLD) then
+        nfpp = nfpp + 1
+        call m_PP_rd_ival(nfpot(nfpp),it,nfout,ival) ! -> ival
+     else if(ivan(it) == OLD) then
+     endif
+
+     call mpi_bcast(ival,1,mpi_double_precision,0,mpi_comm_group,ierror)
+     ivalt(it) = ival
+     call m_Files_close_ps_file(it)
+  end do
+  totch_t = 0.d0
+  do it = 1, ntyp
+     totch_t = totch_t + ivalt(it)*iatom(it) + qex(it)
+     write(nfout,'(" ## it = ",i5,", ivalt = ",f8.4, " iatom = ",f8.4," qex = ",f8.4)') it, ivalt(it),iatom(it),qex(it)
+  end do
+  call mpi_bcast(totch_t,1,mpi_double_precision,0,mpi_comm_group,ierror)
+#ifdef ENABLE_ESM_PACK
+  totch_t = totch_t - esm_qbac
+#endif
+! ===== KT_add === 2014/06/08
+  totch_t = totch_t - additional_charge      ! totch is num. of electrons
+! ================ 2014/06/08
+#ifdef INIT_CHARGE_ATOM_BY_ATOM
+    if ( mag_moment0_atoms_is_defined ) then
+       do ia=1, natm
+          totch_t = totch_t -ionic_charge_atoms(ia)
+       end do
+    else
+       do it = 1, ntyp
+          totch_t = totch_t -ionic_charge_atomtyp(it)
+       end do
+    endif
+#else
+    do it = 1, ntyp
+       totch_t = totch_t -ionic_charge_atomtyp(it)
+    end do
+#endif
+
+  if(totch_t >= neg*2.0) then
+     if(ipripp >= 0) then
+        write(nfout,'(" ### Warning(1309): Number of bands(neg) is insufficient:")')
+        write(nfout,'("                totch = ",f10.3," >= neg*2.0 = ",f10.3)') totch_t, neg*2.0
+     end if
+     if(dabs(sum(zeta1)) > 0.d0) then
+        call m_CtrlP_set_neg_properly(1.3*totch_t) ! -> neg
+     else
+        call m_CtrlP_set_neg_properly(totch_t) ! -> neg
+     end if
+     call m_CntrlP_set_meg(neg)
+!!$     call m_CntrlP_set_neg(neg)
+     if(ipripp >= 0) then
+        write(nfout,'(" ### Reset value of neg = ",i8)') neg
+        write(nfout,'("                totch = ",f10.3," <= neg*2.0 = ",f10.3)') totch_t, neg*2.0
+     end if
+  else
+     if(ipripp >= 0) then
+        write(nfout,'(" ###  Number of bands(neg) is sufficient:")')
+        write(nfout,'("                totch = ",f10.3," < neg*2.0 = ",f10.3)') totch_t, neg*2.0
+     end if
+  end if
+
+  deallocate(ivalt)
+end subroutine Check_of_Pseudopotential
+
 
diff -uprN phase0_2015.01/src_phase_3d/Renewal_of_ChgCtrlParam.f90 phase0_2015.01.01/src_phase_3d/Renewal_of_ChgCtrlParam.f90
--- phase0_2015.01/src_phase_3d/Renewal_of_ChgCtrlParam.f90	2015-08-05 14:58:03.000000000 +0900
+++ phase0_2015.01.01/src_phase_3d/Renewal_of_ChgCtrlParam.f90	2016-07-12 12:51:53.000000000 +0900
@@ -10,7 +10,8 @@ subroutine Renewal_of_Chg_Ctrl_Param
   use m_Const_Parameters,  only : MSD, CG, SD, LMSD, LMCG,lmeazyCG, lmmsd
   use m_Control_Parameters,  only :  m_CtrlP_solver_for_WFs_now, intzaj
 
-  use  m_IterationNumbers,    only : iteration, iteration_ionic, iteration_electronic
+  use  m_IterationNumbers,    only : iteration, iteration_ionic, iteration_electronic, &
+       &                             iteration_unit_cell
 
   use m_Control_Parameters,       only : sw_update_charge_hsr, eval_energy_before_charge
   use m_Control_Parameters,     only : sw_recomposing, sw_force_simple_mixing
@@ -41,7 +42,10 @@ subroutine Renewal_of_Chg_Ctrl_Param
        &                           nmax_intermid_lambda, &
        &                           edelta_change_lambda_first, &
        &                           edelta_change_lambda_last, &
-       &                           max_iterations_mag_constraint
+       &                           max_iter_elec_mag_constraint, &
+       &                           max_iter_ion_mag_constraint, &
+       &                           max_iter_cell_mag_constraint, &
+       &                           sw_fix_charge_after_constraint
 
 ! ================= KT_add =================== 13.0XX
   use m_Control_Parameters,   only : sw_calc_ekin_density, &
@@ -59,17 +63,19 @@ subroutine Renewal_of_Chg_Ctrl_Param
        &                             edelta_start_wf_mixing
 ! =========================== 13.0U3
 
+  use m_Parallelization,  only : mype
+
   implicit none
 
   real(kind=DP) :: edeltab_per_atom, edeltb_per_atom, edelta
 
-! ----------------------- UUU
-  if ( sw_monitor_atomcharge == ON ) then
-     if ( iteration_ionic >1 .and. iteration_electronic ==1 ) then
-        call m_CD_set_rad_cov_default
-        call m_CD_set_rad_cov_now
-     endif
-  endif
+! -----------------------
+!  if ( sw_monitor_atomcharge == ON ) then
+!     if ( iteration_ionic >1 .and. iteration_electronic ==1 ) then
+!        call m_CD_set_rad_cov_default
+!        call m_CD_set_rad_cov_now
+!     endif
+!  endif
 
 ! ------------------
   edelta = m_TE_what_is_edeltb_now()
@@ -290,25 +296,50 @@ contains
     logical, save :: First = .true.
     logical, save :: mag_constraint_is_over = .false.
     logical, save :: lambda_is_changed 
-    real(kind=DP), save :: lambda_org, lambda_old
+    integer, save :: sw_fix_charge_after_constr_org
+
+    real(kind=DP), save :: lambda_org, lambda_old, lambda_00
 
     integer :: i, nn
     real(kind=DP) :: c1, ratio
 
+    if ( First ) then
+       lambda_org = mag_constraint_lambda; 
+       lambda_old = mag_constraint_lambda
+       sw_fix_charge_after_constr_org = sw_fix_charge_after_constraint
+       istep = 1
+       First = .false.
+    else 
+!       if ( iteration_unit_cell > 1 .and. iteration_ionic == 1 &
+!            &                       .and. iteration_electronic == 1 ) then
+!          mag_constraint_is_over = .false.
+!          mag_constraint_lambda = lambda_org
+!!          istep = 1;  count = 0
+!       endif
+       if ( iteration_electronic == 1 ) then
+          mag_constraint_is_over = .false.
+          mag_constraint_lambda = lambda_org
+          sw_fix_charge_after_constraint = sw_fix_charge_after_constr_org
+          istep = 1;  count = 0
+       endif
+    endif
+
     if ( damping_method_mag_constraint == 0 )  return
-    if ( mag_constraint_is_over ) return
 
-    if ( iteration_ionic > 1 ) then
+    if ( iteration_ionic > max_iter_ion_mag_constraint ) then
+       mag_constraint_is_over = .true.
+       mag_constraint_lambda = 0.0d0;  
+    endif
+    if ( iteration_unit_cell > max_iter_cell_mag_constraint ) then
        mag_constraint_is_over = .true.
        mag_constraint_lambda = 0.0d0;  
-       return
     endif
 
-    if ( First ) then
-       lambda_org = mag_constraint_lambda; 
-       lambda_old = mag_constraint_lambda
-       istep = 1
-       First = .false.
+    if ( mag_constraint_is_over ) then
+       if ( sw_fix_charge_after_constraint == ON ) then
+          sw_update_charge_total = OFF
+       endif
+       return
     endif
 
     lambda_is_changed = .false.
@@ -346,17 +377,28 @@ contains
        endif
 
     case (ABRUPT)
-       if ( iteration_electronic > max_iterations_mag_constraint ) then
+       if ( iteration_electronic > max_iter_elec_mag_constraint ) then
           mag_constraint_lambda = 0.0d0
           lambda_is_changed = .true.
           mag_constraint_is_over = .true.
        endif
 
+       c1 = abs( edeltab_per_atom )
+
+       if ( c1 <  edelta_change_lambda_last ) then
+          count = count + 1
+          if ( count == succession ) then
+             mag_constraint_lambda = 0.0d0
+             mag_constraint_is_over = .true.
+             lambda_is_changed = .true.
+          endif
+       endif
+
     case (LINEAR)
-       ratio = dble( iteration_electronic ) / dble(max_iterations_mag_constraint)
+       ratio = dble( iteration_electronic ) / dble(max_iter_elec_mag_constraint)
        ratio = 1.0D0 -ratio
 
-       if ( iteration_electronic <= max_iterations_mag_constraint ) then
+       if ( iteration_electronic <= max_iter_elec_mag_constraint ) then
           mag_constraint_lambda = lambda_org *ratio
           lambda_is_changed = .true.
        else
@@ -364,6 +406,14 @@ contains
           mag_constraint_is_over = .true.
        endif
 
+       c1 = abs( edeltab_per_atom )
+       if ( c1 <  edelta_change_lambda_first ) then
+          count = count + 1
+          if ( count == succession ) then
+             mag_constraint_is_over = .true.
+             lambda_is_changed = .true.
+          endif
+       endif
     end select
 
     if ( lambda_is_changed ) then
@@ -373,7 +423,7 @@ contains
     endif
 
     lambda_old = mag_constraint_lambda
-
+!
   end subroutine update_lambda_mag_constraint
 
 ! === KT_add === 13.0U3
diff -uprN phase0_2015.01/src_phase_3d/Renewal_of_pPotential.f90 phase0_2015.01.01/src_phase_3d/Renewal_of_pPotential.f90
--- phase0_2015.01/src_phase_3d/Renewal_of_pPotential.f90	2015-08-05 14:58:03.000000000 +0900
+++ phase0_2015.01.01/src_phase_3d/Renewal_of_pPotential.f90	2016-07-12 12:51:53.000000000 +0900
@@ -1,6 +1,6 @@
 !=======================================================================
 !
-!  PROGRAM  PHASE/0 2015.01 ($Rev: 440 $)
+!  PROGRAM  PHASE/0 2015.01 ($Rev: 472 $)
 !
 !  SUBROUINE: Renewal_of_pPotential
 !
diff -uprN phase0_2015.01/src_phase_3d/Renewal_of_pWaveFunctions.F90 phase0_2015.01.01/src_phase_3d/Renewal_of_pWaveFunctions.F90
--- phase0_2015.01/src_phase_3d/Renewal_of_pWaveFunctions.F90	2015-08-05 14:58:03.000000000 +0900
+++ phase0_2015.01.01/src_phase_3d/Renewal_of_pWaveFunctions.F90	2016-07-12 12:51:53.000000000 +0900
@@ -1,6 +1,6 @@
 !=======================================================================
 !
-!  PROGRAM  PHASE/0 2015.01 ($Rev: 440 $)
+!  PROGRAM  PHASE/0 2015.01 ($Rev: 472 $)
 !
 !  SUBROUINE: Renewal_of_pWaveFunctions
 !
diff -uprN phase0_2015.01/src_phase_3d/b_PseudoPotential_EXX.F90 phase0_2015.01.01/src_phase_3d/b_PseudoPotential_EXX.F90
--- phase0_2015.01/src_phase_3d/b_PseudoPotential_EXX.F90	2015-08-05 14:58:03.000000000 +0900
+++ phase0_2015.01.01/src_phase_3d/b_PseudoPotential_EXX.F90	2016-07-12 12:51:53.000000000 +0900
@@ -2,19 +2,20 @@ subroutine alloc_qitg_exx()
   use m_PseudoPotential,    only : nqitg
   use m_PlaneWaveBasisSet,  only : kgp
   use m_ES_ExactExchange,   only : nqmk, qitg_exx
-  use m_Control_Parameters, only : nmax_G_hyb
-  use m_Parallelization,    only : ista_kngp, iend_kngp
+  use m_Const_Parameters,   only : ON
+  use m_Parallelization,    only : ista_kngp, iend_kngp, ista_kngp_exx,iend_kngp_exx
   implicit none
-  if(.not.allocated(qitg_exx)) allocate(qitg_exx(ista_kngp:iend_kngp,nqitg,nqmk))
+  if(.not.allocated(qitg_exx)) then
+    allocate(qitg_exx(kgp,nqitg,nqmk))
+  endif
 end subroutine alloc_qitg_exx
 
 subroutine qitgft_qmk(it,nmm_il3,mm_il3,qrsps_mm,lcmax,h)
-  use m_Const_Parameters,   only : DP, PAI4, DELTA
-  use m_Control_Parameters, only : nmax_G_hyb
+  use m_Const_Parameters,   only : DP, PAI4, DELTA, ON
   use m_Crystal_Structure,  only : rltv
   use m_PlaneWaveBasisSet,  only : ngabc_kngp_l,kgp
   use m_PseudoPotential,    only : mmesh,nmesh,rmax,radr,wos,nqitg_sp
-  use m_Parallelization,    only : mpi_comm_group,ista_kngp,iend_kngp,npes,ierr
+  use m_Parallelization,    only : mpi_comm_group,ista_kngp,iend_kngp,npes,ierr,ista_kngp_exx,iend_kngp_exx
   use m_ES_ExactExchange,   only : nqmk, qmk, qitg_exx
   use m_Timing,             only : tstatc0_begin, tstatc0_end
   use m_Parallelization,    only : mpi_ke_world
@@ -51,7 +52,7 @@ subroutine qitgft_qmk(it,nmm_il3,mm_il3,
      mm0 = mm0 + nqitg_sp(i)
   end do
   iend_kngp0 = iend_kngp
-  if(iend_kngp0.gt.nmax_G_hyb) iend_kngp0 = nmax_G_hyb
+
   do ik=1,nqmk
      do i = ista_kngp, iend_kngp0
         kg(1:3) = qmk(ik,1:3) + ngabc_kngp_l(i,1:3)
@@ -86,11 +87,12 @@ subroutine qitgft_qmk(it,nmm_il3,mm_il3,
            end do
         end do
      end do
-        do iq=1,nqitg_sp(it)
-           do i=ista_kngp,iend_kngp
-              qitg_exx(i,mm0+iq,ik) = qitg_exx_l(i,iq)
-           end do
+     do iq=1,nqitg_sp(it)
+        do i=ista_kngp,iend_kngp
+           qitg_exx(i,mm0+iq,ik) = qitg_exx_l(i,iq)
         end do
+     end do
+
   end do ! ik
 
   deallocate(qitg_exx_l)
diff -uprN phase0_2015.01/src_phase_3d/configure phase0_2015.01.01/src_phase_3d/configure
--- phase0_2015.01/src_phase_3d/configure	2015-08-05 14:58:03.000000000 +0900
+++ phase0_2015.01.01/src_phase_3d/configure	2016-07-12 12:51:53.000000000 +0900
@@ -343,6 +343,7 @@ fi
 bllib_netlib="Netlib BLAS/LAPACK"
 bllib_acml="AMD Core Math Library (ACML)"
 bllib_mkl="Intel Math Kernel Library (MKL)"
+bllib_mkl_sys="System-installed MKL"
 bllib_sunperf="Sun Performance Library"
 bllib_scsl="SGI Cray Scientific Library (SCSL)"
 bllib_essl="IBM Engineering and Scientific Subroutine Library (ESSL)"
@@ -357,6 +358,7 @@ add_option "${bllib_netlib}"
 case ${sel_comp} in
 "${gnu_compiler}"|"${g95_compiler}"|"${intel_compiler}")
 add_option "${bllib_mkl}"
+add_option "${bllib_mkl_sys}"
 	;;
 esac
 	;;
@@ -477,6 +479,20 @@ esac
 	;;
 esac
 	;;
+"${bllib_mkl_sys}")
+case ${sel_plat} in
+"${gnu_linux_ia32}")
+bldir=""
+bllib="-Wl,--start-group -lmkl_scalapack_core -lmkl_blacs_intelmpi -lmkl_intel -lmkl_sequential -lmkl_core -Wl,--end-group -Bdynamic"
+use_pthread="yes"
+	;;
+"${gnu_linux_amd64}")
+bldir=""
+bllib="-Wl,--start-group -lmkl_scalapack_lp64 -lmkl_blacs_intelmpi_lp64 -lmkl_intel_lp64 -lmkl_sequential -lmkl_core -Wl,--end-group -Bdynamic"
+use_pthread="yes"
+	;;
+esac
+	;;
 "${bllib_mkl}")
 case ${sel_plat} in
 "${gnu_linux_ia32}")
@@ -609,6 +625,7 @@ esac
 # FFT library selection
 fftlib_jrcat="Built-in FFT subroutnes"
 fftlib_fftw3="FFTW3 library"
+fftlib_fftw3_sys="System-installed FFTW3 library"
 fftlib_mkl_fftw3="Intel MKL with FFTW3 interface"
 #fftlib_mkl_fftw3_gnu="Intel MKL with FFTW3 interface library built by gcc"
 fftlib_acml="AMD Core Math Library (ACML)"
@@ -624,10 +641,11 @@ unset list
 case ${sel_plat} in
 "${gnu_linux_ia32}"|"${gnu_linux_amd64}"|"${windows_mingw_ia32}"|"${windows_mingw_amd64}"|"${windows_sua_ia32}"|"${windows_sua_amd64}")
 add_option "${fftlib_fftw3}"
+add_option "${fftlib_fftw3_sys}"
 #add_option "${fftlib_acml}"
 case ${sel_comp} in
 "${gnu_compiler}"|"${g95_compiler}"|"${intel_compiler}")
-if [ "${sel_bllib}" = "${bllib_mkl}" ];then
+if [ "${sel_bllib}" = "${bllib_mkl}" -o "${sel_bllib}" = "${bllib_mkl_sys}" ];then
 add_option "${fftlib_mkl_fftw3}"
 #add_option "${fftlib_mkl_fftw3_gnu}"
 fi
@@ -678,127 +696,132 @@ fftdir="/usr/local/lib"
 	;;
 "${fftlib_acml}")
 fftlib="-lacml"
-case ${sel_comp} in
-"${gnu_compiler}")
-case ${sel_plat} in
-"${gnu_linux_ia32}")
-fftdir="/opt/acml5.3.0/gfortran32/lib"
-	;;
-"${gnu_linux_amd64}")
-fftdir="/opt/acml5.3.0/gfortran64/lib"
-	;;
-esac
-	;;
-"${intel_compiler}")
-case ${sel_plat} in
-"${gnu_linux_ia32}")
-fftdir="/opt/acml5.3.0/ifort32/lib"
-	;;
-"${gnu_linux_amd64}")
-fftdir="/opt/acml5.3.0/ifort64/lib"
-	;;
-"${windows_mingw_ia32}")
-fftlib="libacml_dll.lib"
-fftdir="/c/progra~1/AMD/acml5.3.0/ifort32/lib"
-	;;
-"${windows_mingw_amd64}")
-fftlib="libacml_dll.lib"
-fftdir="/c/acml5.3.0/ifort64/lib"
-	;;
-esac
-	;;
-"${pgi_compiler}")
-case ${sel_plat} in
-"${gnu_linux_ia32}")
-fftdir="/opt/acml5.3.0/pgi32/lib"
 	;;
-"${gnu_linux_amd64}")
-fftdir="/opt/acml5.3.0/pgi64/lib"
-	;;
-"${windows_mingw_ia32}")
-fftlib="libacml_dll.lib"
-fftdir="/c/progra~1/AMD/acml5.3.0/pgi32/lib"
-	;;
-"${windows_mingw_amd64}")
-fftlib="libacml_dll.lib"
-fftdir="/c/acml5.3.0/win64/lib"
-	;;
-esac
-	;;
-"${pathscale_compiler}")
-case ${sel_plat} in
-"${gnu_linux_ia32}")
-fftdir="/opt/acml5.3.0/pathscale32/lib"
-	;;
-"${gnu_linux_amd64}")
-fftdir="/opt/acml5.3.0/pathscale64/lib"
-	;;
-esac
-	;;
-"${sun_studio}")
-case ${sel_plat} in
-"${sun_solaris_ia32}")
-fftdir="/opt/acml5.3.0/sun32/lib"
-	;;
-"${sun_solaris_amd64}")
-fftdir="/opt/acml5.3.0/sun64/lib"
-	;;
-esac
-	;;
-esac
-	;;
-"${fftlib_mkl}")
-fftlib=""
-fftdir="/opt/intel/mkl/interfaces/fftw3xf"
-case ${sel_plat} in
-"${gnu_linux_ia32}")
-fftdir="/opt/intel/mkl/9.1/lib/32"
-fftlib="${fftlib} -lmkl_ia32 -lguide"
-use_pthread="yes"
-	;;
-"${gnu_linux_amd64}")
-fftdir="/opt/intel/mkl/9.1/lib/em64t"
-fftlib="${fftlib} -lmkl_em64t -lguide"
-use_pthread="yes"
-	;;
-"${gnu_linux_ipf}")
-fftdir="/opt/intel/mkl/9.1/lib/64"
-fftlib="${fftlib} -lmkl_ipf -lguide"
-use_pthread="yes"
-	;;
-"${intel_mac_32bit}")
-fftdir="/Library/Frameworks/Intel_MKL.framework/Versions/9.1/lib_serial/32"
-fftlib="${fftlib} -lmkl_ia32"
-	;;
-"${intel_mac_64bit}")
-fftdir="/Library/Frameworks/Intel_MKL.framework/Versions/9.1/lib_serial/em64t"
-fftlib="${fftlib} -lmkl_em64t"
-	;;
-"${windows_mingw_ia32}")
-fftdir="/c/progra~1/Intel/MKL/9.1/lib_serial/ia32/lib"
-fftlib="fftw3xf_ms.lib mkl_c_dll.lib"
-	;;
-"${windows_mingw_amd64}")
-fftdir="/c/progra~1/Intel/MKL/9.1/lib_serial/em64t/lib"
-fftlib="fftw3xf_ms.lib mkl_dll.lib"
-	;;
-esac
-	;;
-"${fftlib_scsl}")
-fftdir=""
-fftlib="-lscs"
-	;;
-"${fftlib_matrix}")
-fftdir=""
-fftlib="-lmatmpp_sc"
-	;;
-"${fftlib_ssl2}")
-fftdir=""
-fftlib="-lssl2vp"
-	;;
-"${fftlib_asl}")
+"${fftlib_fftw3_sys}")
+fftlib="-lfftw3"
 fftdir=""
-fftlib="-lasl"
+
+#case ${sel_comp} in
+#"${gnu_compiler}")
+#case ${sel_plat} in
+#"${gnu_linux_ia32}")
+#fftdir="/opt/acml5.3.0/gfortran32/lib"
+#	;;
+#"${gnu_linux_amd64}")
+#fftdir="/opt/acml5.3.0/gfortran64/lib"
+#	;;
+#esac
+#	;;
+#"${intel_compiler}")
+#case ${sel_plat} in
+#"${gnu_linux_ia32}")
+#fftdir="/opt/acml5.3.0/ifort32/lib"
+#	;;
+#"${gnu_linux_amd64}")
+#fftdir="/opt/acml5.3.0/ifort64/lib"
+#	;;
+#"${windows_mingw_ia32}")
+#fftlib="libacml_dll.lib"
+#fftdir="/c/progra~1/AMD/acml5.3.0/ifort32/lib"
+#	;;
+#"${windows_mingw_amd64}")
+#fftlib="libacml_dll.lib"
+#fftdir="/c/acml5.3.0/ifort64/lib"
+#	;;
+#esac
+#	;;
+#"${pgi_compiler}")
+#case ${sel_plat} in
+#"${gnu_linux_ia32}")
+#fftdir="/opt/acml5.3.0/pgi32/lib"
+#	;;
+#"${gnu_linux_amd64}")
+#fftdir="/opt/acml5.3.0/pgi64/lib"
+#	;;
+#"${windows_mingw_ia32}")
+#fftlib="libacml_dll.lib"
+#fftdir="/c/progra~1/AMD/acml5.3.0/pgi32/lib"
+#	;;
+#"${windows_mingw_amd64}")
+#fftlib="libacml_dll.lib"
+#fftdir="/c/acml5.3.0/win64/lib"
+#	;;
+#esac
+#	;;
+#"${pathscale_compiler}")
+#case ${sel_plat} in
+#"${gnu_linux_ia32}")
+#fftdir="/opt/acml5.3.0/pathscale32/lib"
+#	;;
+#"${gnu_linux_amd64}")
+#fftdir="/opt/acml5.3.0/pathscale64/lib"
+#	;;
+#esac
+#	;;
+#"${sun_studio}")
+#case ${sel_plat} in
+#"${sun_solaris_ia32}")
+#fftdir="/opt/acml5.3.0/sun32/lib"
+#	;;
+#"${sun_solaris_amd64}")
+#fftdir="/opt/acml5.3.0/sun64/lib"
+#	;;
+#esac
+#	;;
+#esac
+#	;;
+#"${fftlib_mkl}")
+#fftlib=""
+#fftdir="/opt/intel/mkl/interfaces/fftw3xf"
+#case ${sel_plat} in
+#"${gnu_linux_ia32}")
+#fftdir="/opt/intel/mkl/9.1/lib/32"
+#fftlib="${fftlib} -lmkl_ia32 -lguide"
+#use_pthread="yes"
+#	;;
+#"${gnu_linux_amd64}")
+#fftdir="/opt/intel/mkl/9.1/lib/em64t"
+#fftlib="${fftlib} -lmkl_em64t -lguide"
+#use_pthread="yes"
+#	;;
+#"${gnu_linux_ipf}")
+#fftdir="/opt/intel/mkl/9.1/lib/64"
+#fftlib="${fftlib} -lmkl_ipf -lguide"
+#use_pthread="yes"
+#	;;
+#"${intel_mac_32bit}")
+#fftdir="/Library/Frameworks/Intel_MKL.framework/Versions/9.1/lib_serial/32"
+#fftlib="${fftlib} -lmkl_ia32"
+#	;;
+#"${intel_mac_64bit}")
+#fftdir="/Library/Frameworks/Intel_MKL.framework/Versions/9.1/lib_serial/em64t"
+#fftlib="${fftlib} -lmkl_em64t"
+#	;;
+#"${windows_mingw_ia32}")
+#fftdir="/c/progra~1/Intel/MKL/9.1/lib_serial/ia32/lib"
+#fftlib="fftw3xf_ms.lib mkl_c_dll.lib"
+#	;;
+#"${windows_mingw_amd64}")
+#fftdir="/c/progra~1/Intel/MKL/9.1/lib_serial/em64t/lib"
+#fftlib="fftw3xf_ms.lib mkl_dll.lib"
+#	;;
+#esac
+#	;;
+#"${fftlib_scsl}")
+#fftdir=""
+#fftlib="-lscs"
+#	;;
+#"${fftlib_matrix}")
+#fftdir=""
+#fftlib="-lmatmpp_sc"
+#	;;
+#"${fftlib_ssl2}")
+#fftdir=""
+#fftlib="-lssl2vp"
+#	;;
+#"${fftlib_asl}")
+#fftdir=""
+#fftlib="-lasl"
 	;;
 esac
 
@@ -867,7 +890,7 @@ use_dgemm="yes"
 
 #ESM
 use_esm=""
-if [ "${sel_fftlib}" = "${fftlib_fftw3}" -o "${sel_fftlib}" = "${fftlib_mkl_fftw3}" -o "${sel_fftlib}" = "${fftlib_mkl_fftw3_gnu}" ];then
+if [ "${sel_fftlib}" = "${fftlib_fftw3}" -o "${sel_fftlib}" = "${fftlib_mkl_fftw3}" -o "${sel_fftlib}" = "${fftlib_mkl_fftw3_gnu}" -o "${sel_fftlib}" = "${fftlib_fftw3_sys}" ];then
 echo "Do you want to enable the ESM feature? (yes/no) [yes]"
 read use_esm
 #echo "${use_esm}" >> config
@@ -1172,7 +1195,7 @@ cppflags="$cppflags -DJRCATFFT_WS -DCD_J
 	;;
 esac
 	;;
-"${fftlib_fftw3}"|"${fftlib_mkl_fftw3}"|"${fftlib_mkl_fftw3_gnu}")
+"${fftlib_fftw3}"|"${fftlib_mkl_fftw3}"|"${fftlib_mkl_fftw3_gnu}"|"${fftlib_fftw3_sys}")
 cppflags="$cppflags -DFFTW3"
 esm="YES"
 	;;
@@ -1225,7 +1248,7 @@ cppflags="$cppflags -DDISABLE_VDWDF"
 fi
 
 # flags applied to all cases
-cppflags="$cppflags -D_USE_DATE_AND_TIME_ -D_POT_SMOOTHING_ -DTRANSPOSE -DGGA_ATOMIC_WITH_NEW_GNCPP -DREMOVE_PC_FROM_FORCE -D_USE_LAPACK_ -D_USE_SCALAPACK_ -D_POSITRON_ -D_FAST_WAY_ -DUSE_NONBLK_COMM -Dforsafe -D_HEAP_SORT_ -DFFT_ALLTOALL -DPOST3D"
+cppflags="$cppflags -D_USE_DATE_AND_TIME_ -D_POT_SMOOTHING_ -DTRANSPOSE -DGGA_ATOMIC_WITH_NEW_GNCPP -DREMOVE_PC_FROM_FORCE -D_USE_LAPACK_ -D_USE_SCALAPACK_ -D_POSITRON_ -D_FAST_WAY_ -DUSE_NONBLK_COMM -Dforsafe -D_HEAP_SORT_ -DFFT_ALLTOALL -DPOST3D -DPAW3D"
 
 # append "-WF," to cpp options.
 if [ "${sel_comp}" = "${ibm_xl}" ]; then
@@ -1455,17 +1478,20 @@ esac
 ########################################
 
 case ${sel_fftlib} in
-"${fftlib_fftw3}"|"${fftlib_mkl_fftw3}"|"${fftlib_mkl_fftw3_gnu}")
+"${fftlib_fftw3}"|"${fftlib_mkl_fftw3}"|"${fftlib_mkl_fftw3_gnu}"|"${fftlib_fftw3_sys}")
 libs="-L./ \${LESM} ${libs}"
 esac
 
-if [ "${sel_fftlib}" = "${fftlib_fftw3}" ];then
-fftincludedir="${fftdir}/../include"
-fi
-if [ "${sel_fftlib}" = "${fftlib_mkl_fftw3}" ];then
-fftincludedir="${bldir}/../../include/fftw"
-elif [ "${sel_fftlib}" = "${fftlib_mkl_fftw3_gnu}" ];then
-fftincludedir="${bldir}/../../include/fftw"
+fftincludedir="INCLUDE="
+if [ "${sel_fftlib}" = "${fftlib_fftw3}" ] && [ -n "${fftdir}" ];then
+fftincludedir="INCLUDE=-I${fftdir}/../include"
+fi
+if [ "${sel_fftlib}" = "${fftlib_mkl_fftw3}" ] && [ -n "${bldir}" ];then
+fftincludedir="INCLUDE=-I${bldir}/../../include/fftw"
+elif [ "${sel_fftlib}" = "${fftlib_mkl_fftw3_gnu}" ] && [ -n "${bldir}" ];then
+fftincludedir="INCLUDE=-I${bldir}/../../include/fftw"
+elif [ "${sel_fftlib}" = "${fftlib_fftw3_sys}" ];then
+fftincludedir="INCLUDE="
 fi
 
 cat <<here > Makefile
@@ -1502,7 +1528,7 @@ LFLAGS = ${lflags}
 F90FLAGS_FIXED = ${f90flags_fixed}
 F90FLAGS_FREE = ${f90flags_free}
 MKLHOME=${bldir}
-INCLUDE=-I${fftincludedir}
+${fftincludedir}
 LIBS = ${libs}
 ###########################################################################
 ###<< PLEASE CHANGE THE VARIABLES ABOVE ACCORDING TO YOUR ENVIRONMENT >>###
diff -uprN phase0_2015.01/src_phase_3d/constraint_main.F90 phase0_2015.01.01/src_phase_3d/constraint_main.F90
--- phase0_2015.01/src_phase_3d/constraint_main.F90	2015-08-05 14:58:03.000000000 +0900
+++ phase0_2015.01.01/src_phase_3d/constraint_main.F90	2016-07-12 12:51:53.000000000 +0900
@@ -1,6 +1,6 @@
 !=======================================================================
 !
-!  PROGRAM  PHASE/0 2015.01 ($Rev: 440 $)
+!  PROGRAM  PHASE/0 2015.01 ($Rev: 494 $)
 !
 !  SUBROUINE: constrained_dynamics
 !
@@ -45,6 +45,11 @@ subroutine constrained_dynamics()
   use m_Control_Parameters, only : printable, icond, icond_org
   use m_Files, only : nfout
   use m_Parallelization, only : nrank_conf, mype_conf, conf_para
+#ifdef PAW3D
+  use m_PseudoPotential, only : mmesh
+  use m_Ionic_System,    only : natm
+  use m_Parallelization, only : m_Parallel_init_mpi_paw_3D
+#endif
 
   implicit none
 
diff -uprN phase0_2015.01/src_phase_3d/input_interface.F90 phase0_2015.01.01/src_phase_3d/input_interface.F90
--- phase0_2015.01/src_phase_3d/input_interface.F90	2015-08-05 14:58:03.000000000 +0900
+++ phase0_2015.01.01/src_phase_3d/input_interface.F90	2016-07-12 12:51:53.000000000 +0900
@@ -1,6 +1,6 @@
 !=======================================================================
 !
-!  PROGRAM  PHASE/0 2015.01 ($Rev: 440 $)
+!  PROGRAM  PHASE/0 2015.01 ($Rev: 475 $)
 !
 !  FUNCTION:  getUnitId, setUnit, updateUnits, clearUnitFlag, setDefaultUnits,
 !            realConvByUnit, f_openInputFile, f_closeInputFile, f_selectTop,
@@ -17,7 +17,7 @@
 !
 !
 !=======================================================================
-! $Id: input_interface.F90 440 2015-08-03 07:18:01Z ktagami $
+! $Id: input_interface.F90 475 2016-02-23 05:22:18Z jkoga $
 !
 !!$!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
 !!$! interface functions
@@ -376,7 +376,8 @@ integer function f_selectBlock( blocktag
 	    if( iret < 0 ) then
 	        f_selectBlock  = iret
                 iret = f_selectParentBlock()
-                print '( "A unit with the same dimension as [", a, "] has been already given in the block [", a, "]." )', trim(unit), trim(blocktag)
+                print '( "A unit with the same dimension as [", a, "] has been already given in the block [", a, "]." )'&
+              & , trim(unit), trim(blocktag)
  		return
 	    end if
 	end do
diff -uprN phase0_2015.01/src_phase_3d/m_CD_Mag_Moment.f90 phase0_2015.01.01/src_phase_3d/m_CD_Mag_Moment.f90
--- phase0_2015.01/src_phase_3d/m_CD_Mag_Moment.f90	2015-08-05 14:58:03.000000000 +0900
+++ phase0_2015.01.01/src_phase_3d/m_CD_Mag_Moment.f90	2016-07-12 12:51:53.000000000 +0900
@@ -1,5 +1,5 @@
 module m_CD_Mag_Moment
-! $Id: m_CD_Mag_Moment.f90 440 2015-08-03 07:18:01Z ktagami $
+! $Id: m_CD_Mag_Moment.f90 476 2016-03-10 08:30:50Z yamasaki $
   use m_Control_Parameters,    only : noncol, ndim_magmom, kimg, iprimagmom, ON, OFF
   use m_Const_Parameters,     only : DP, PAI4, Bohr
 
@@ -514,7 +514,7 @@ contains
        ik = ista_k
        call new_radr_and_wos(ik,it)                 ! --> radr, wos
 
-       rcut = rad_cov_default( iatomn(it) )
+       rcut = rad_cov_default( nint(iatomn(it) ))  ! Revised according to a report from ASMS Co.ltd, 10 March 2016.
 
        Do il1=1, lpsmax(it)
 !          if ( il1 == iloc(it) ) cycle
diff -uprN phase0_2015.01/src_phase_3d/m_CD_mixing.F90 phase0_2015.01.01/src_phase_3d/m_CD_mixing.F90
--- phase0_2015.01/src_phase_3d/m_CD_mixing.F90	2015-08-05 14:58:03.000000000 +0900
+++ phase0_2015.01.01/src_phase_3d/m_CD_mixing.F90	2016-07-12 12:51:53.000000000 +0900
@@ -62,7 +62,7 @@
 #endif
 
 module m_CD_mixing
-! $Id: m_CD_mixing.F90 440 2015-08-03 07:18:01Z ktagami $
+! $Id: m_CD_mixing.F90 492 2016-05-31 03:06:04Z jkoga $
   use m_Const_Parameters,    only : BUCS, DP, OFF &
        &                          , EXECUT,SIMPLE_CUBIC,BOHR,NO,ANTIFERRO &
        &                          , ANEW,RENEW,ON, SIMPLE,BROYD1,BROYD2,DFP,PULAY &
@@ -95,8 +95,10 @@ module m_CD_mixing
 ! ==============================================================================
 ! === Added by tkato 2011/11/09 ================================================
   use m_Control_Parameters,  only : sw_mix_bothspins_sametime &
-                                  , sw_recomposing_hsr, sw_force_simple_mixing_hsr
-  use m_Ionic_System,        only : ityp, natm
+                                  , sw_recomposing_hsr, sw_force_simple_mixing_hsr &
+                                  , num_proj_elems, proj_group, proj_attribute, num_projectors &
+                                  , max_projs
+  use m_Ionic_System,        only : ityp, natm,iproj_group
   use m_PseudoPotential,     only : ilmt, nlmt
   use m_Charge_Density,      only : hsr, hsro
 !===============================================================================
@@ -116,6 +118,8 @@ module m_CD_mixing
   use m_Control_Parameters,    only : sw_mix_charge_hardpart, sw_mix_charge_with_ekindens
 ! =============== 2014/09/16
 
+  use m_Orbital_Population, only : om,omold,ommix
+
   implicit none
 ! --> T. Yamasaki  03 Aug. 2009
     real(kind=DP), allocatable, dimension(:):: rmxtrc ! d(nspin_m)
@@ -186,7 +190,10 @@ module m_CD_mixing
 
 ! ========================== adde by K. Tagami ========================== 5.0
   integer :: nsize_rho_hsr
+  integer :: nsize_rho_hsr0
+  integer :: nsize_rho_om
   integer, private, allocatable :: imap_hsr(:)    ! d(nsize_rho_hsr)
+  integer, private, allocatable :: imap_om(:)    ! d(nsize_rho_hsr)
   real(kind=DP),private,allocatable, dimension(:,:) ::   rho_hsr, rhoo_hsr
                                                   ! d(nsize_rho_hsr,nspin)
 
@@ -199,6 +206,7 @@ module m_CD_mixing
   real(DP),private,allocatable,target,dimension(:,:,:,:) :: urec_hsr
 
   real(DP),private,allocatable,dimension(:,:,:) :: d0_hsr_h
+  logical,allocatable,dimension(:) :: diag_elem
 
   logical, save :: first = .true.
 
@@ -246,6 +254,16 @@ module m_CD_mixing
   real(kind=DP), pointer :: ekinq_l(:,:,:), ekinqo_l(:,:,:)
 ! ============== 2014/09/19
 
+  integer, public, allocatable :: i2lp(:) ! d(num_projectors)
+
+! ================================ modified by K. Tagami ================ 11.0
+!  integer, private :: max2lp ! max. of i2lp
+  integer, public :: max2lp ! max. of i2lp
+! ======================================================================= 11.0
+
+  integer, private :: l1max ! max. of l1
+  integer, private :: nyymax
+
 ! --- contained subroutines ---
 !   7. m_CD_prepare_precon       <-(ChargeDensity_Mixing)
 !  10. m_CD_simple_mixing        <-(ChargeDensity_Mixing)
@@ -2523,9 +2541,10 @@ contains
   end subroutine m_CD_mix_broyden2
 
 ! ===================== added by K. Tagami ============================== 5.0
-  subroutine m_CD_mix_broyden2_with_hsr(nfout,rmx)
+  subroutine m_CD_mix_broyden2_with_hsr(nfout,rmx,mixocc)
     integer, intent(in) :: nfout
     real(DP),intent(in) :: rmx
+    logical, intent(in) :: mixocc
 
     integer   :: iter,j,mxiter,icr,jcr
 !!$    real(DP)  :: v_dF(nspin),vF(nspin)
@@ -2539,9 +2558,10 @@ contains
     if (previous_waymix /= BROYD2.or.force_dealloc) then
        force_dealloc = .false.
        if ( first ) then
-          call create_map_func(.true.)
-          call alloc_rho_hsr
-          call create_map_func(.false.)
+          if(mixocc) call set_i2lp_max2lp()
+          call create_map_func(.true.,mixocc)
+          call alloc_rho_hsr(mixocc)
+          call create_map_func(.false.,mixocc)
           first = .false.
        endif
        call mix_dealloc_previous()
@@ -2560,6 +2580,10 @@ contains
     else
        call map_hsr_to_rho( hsr, rho_hsr )
        call map_hsr_to_rho( hsro,rhoo_hsr )
+       if(mixocc)then
+          call map_om_to_rho( om, rho_hsr )
+          call map_om_to_rho( omold,rhoo_hsr )
+       endif
     endif
 ! ========================================================================= 11.0
 
@@ -2649,7 +2673,6 @@ contains
 
        call mix_broyden_dealloc2                      !-(m_CD)
        call mix_broyden_dealloc2_hsr
-
     endif
 
 ! ============================== modified by K. Tagami ================= 11.0
@@ -2677,6 +2700,7 @@ contains
        call map_rho_to_hsr_noncl( hsr, hsi, rho_hsr )
     else
        call map_rho_to_hsr( hsr, rho_hsr )
+       if(mixocc) call map_rho_to_om( om, rho_hsr )
     endif
 
     deallocate(rmxtrc)
@@ -3639,10 +3663,11 @@ contains
   end subroutine m_CD_mix_pulay
 
 !!$ 11.07 AS Pulay version of 'sw_mix_charge_hardpart'
-  subroutine m_CD_mix_pulay_with_hsr(nfout,rmx)
+  subroutine m_CD_mix_pulay_with_hsr(nfout,rmx,mixocc)
     integer, parameter  :: iRho = 1, iResid = 2
     integer, intent(in) :: nfout
     real(DP),intent(in) :: rmx
+    logical, intent(in) :: mixocc
     integer   :: iter, mxiter
     real(DP),pointer,dimension(:)  :: e_wk, f_wk, ww1, finv
     integer, pointer,dimension(:)  :: ip
@@ -3656,9 +3681,10 @@ contains
     if(previous_waymix /= PULAY.or.force_dealloc) then
        force_dealloc = .false.
        if ( first ) then
-          call create_map_func(.true.)
-          call alloc_rho_hsr
-          call create_map_func(.false.)
+          if(mixocc) call set_i2lp_max2lp()
+          call create_map_func(.true.,mixocc)
+          call alloc_rho_hsr(mixocc)
+          call create_map_func(.false.,mixocc)
           first = .false.
        endif
        call mix_dealloc_previous()
@@ -3677,6 +3703,10 @@ contains
     else
        call map_hsr_to_rho( hsr, rho_hsr )
        call map_hsr_to_rho( hsro,rhoo_hsr )
+       if(mixocc)then
+          call map_om_to_rho( om, rho_hsr )
+          call map_om_to_rho( omold,rhoo_hsr )
+       endif
     endif
 ! ========================================================================= 11.0
 
@@ -3791,6 +3821,7 @@ contains
        call map_rho_to_hsr_noncl( hsr, hsi, rho_hsr )
     else
        call map_rho_to_hsr( hsr, rho_hsr )
+       if(mixocc) call map_rho_to_om( om, rho_hsr )
     endif
 
     deallocate(rmxtrc)
@@ -4208,10 +4239,11 @@ contains
 !!$ 11.07 AS Pulay version of 'sw_mix_charge_hardpart'
 
 ! ========================= added by K. Tagami ===================== 5.0
-  subroutine create_map_func(paramset)
-    logical :: paramset
+  subroutine create_map_func(paramset,mixocc)
+    logical :: paramset,mixocc
     integer :: n, ia, it
     integer :: lmt1, lmt2
+    integer :: ig,m2,m1,i,ip
 
     n=0
     do ia=1,natm
@@ -4221,13 +4253,36 @@ contains
           do lmt2 = lmt1, ilmt(it)
              n=n+1
 
-             if(.not.paramset) &
-                  & imap_hsr(n) = ia + natm *(lmt1-1) + natm*nlmt*( lmt2 -1 )
+             if(.not.paramset) then
+                imap_hsr(n) = ia + natm *(lmt1-1) + natm*nlmt*( lmt2 -1 )
+                diag_elem(n) = lmt1.eq.lmt2
+             endif
           end do
        end do
     end do
-    nsize_rho_hsr = n
+    nsize_rho_hsr0 = n
+    nsize_rho_hsr = nsize_rho_hsr0
 ! ================================ added by K. Tagami ============ 11.0
+    if(mixocc)then
+      n=0
+      do ia=1,natm
+         ig = iproj_group(ia)
+         if(ig<1) cycle
+         do i=1,num_proj_elems(ig)
+            ip=proj_group(i,ig)
+            it = proj_attribute(ip)%ityp
+            do m2=1,i2lp(ip)
+               do m1=m2,i2lp(ip)
+                  n=n+1
+                  if(.not.paramset) &
+                  & imap_om(n) = m1 + max2lp*(m2-1 + max2lp*( i-1 + max_projs*( ia-1 ) ) )
+               end do
+            end do
+         end do
+      enddo
+      nsize_rho_om = n
+      nsize_rho_hsr = nsize_rho_hsr0+nsize_rho_om
+    endif 
     if ( noncol ) then
        nsize_rho_hsr_realpart = n
 
@@ -4248,20 +4303,69 @@ contains
 ! ================================================================= 11.0
   end subroutine create_map_func
 
-  subroutine alloc_rho_hsr
+  subroutine alloc_rho_hsr(mixocc)
+    logical, intent(in) :: mixocc
+    if ( noncol ) then
+       nspin_m  = ndim_magmom
+    else
+       nspin_m = nspin/(af+1)
+    endif
+    if ( noncol ) then
+       nspin_m  = ndim_magmom
+       allocate( rho_hsr( nsize_rho_hsr,ndim_magmom)); rho_hsr = 0.0d0
+       allocate( rhoo_hsr(nsize_rho_hsr,ndim_magmom));rhoo_hsr = 0.0d0
+    else
+       nspin_m  = nspin/(af+1)
+       allocate( rho_hsr( nsize_rho_hsr,nspin_m)); rho_hsr = 0.0d0
+       allocate( rhoo_hsr(nsize_rho_hsr,nspin_m));rhoo_hsr = 0.0d0
+    endif
 ! ================================ modified by K. Tagami ============== 11.0
 !    allocate( rho_hsr( nsize_rho_hsr,nspin)); rho_hsr = 0.0d0
 !    allocate( rhoo_hsr(nsize_rho_hsr,nspin));rhoo_hsr = 0.0d0
-    allocate( rho_hsr( nsize_rho_hsr,ndim_magmom)); rho_hsr = 0.0d0
-    allocate( rhoo_hsr(nsize_rho_hsr,ndim_magmom));rhoo_hsr = 0.0d0
 ! ====================================================================== 11.0
-    allocate( imap_hsr(nsize_rho_hsr) ); imap_hsr = 0
+    allocate( imap_hsr(nsize_rho_hsr0) ); imap_hsr = 0
+    allocate(diag_elem(nsize_rho_hsr0));diag_elem=.false.
+    if(mixocc)then
+        allocate( imap_om(nsize_rho_om) ); imap_om = 0
+    endif
   end subroutine alloc_rho_hsr
 
+  subroutine set_i2lp_max2lp()
+    integer :: it,ip
+    integer, parameter :: ntau0=2
+
+! =========================== added by K. Tagami ====================== 11.0
+    integer :: nsize
+! ===================================================================== 11.0
+
+    allocate(i2lp(num_projectors))
+    do ip=1,num_projectors
+       i2lp(ip) = 2*proj_attribute(ip)%l+1
+    end do
+    max2lp = 0
+    do ip=1,num_projectors
+       if(i2lp(ip) > max2lp) then
+          max2lp = i2lp(ip)
+          l1max  = proj_attribute(ip)%l+1
+       end if
+    end do
+
+! =========================== modified by K. Tagami ====================== 11.0
+!!
+!!    nyymax = ntau0*l1max**2*(l1max**2+1)/2
+!
+    nsize = ntau0*( 2*( l1max -1 )+1 )
+    nyymax = nsize *( nsize +1 ) /2
+
+  end subroutine set_i2lp_max2lp
+
   subroutine dealloc_rho_hsr
     deallocate(rho_hsr)
     deallocate(rhoo_hsr)
     deallocate(imap_hsr)
+    deallocate(diag_elem)
+    if(allocated(imap_om)) deallocate(imap_om)
+    if(allocated(i2lp))    deallocate(i2lp)
   end subroutine dealloc_rho_hsr
 
   subroutine map_hsr_to_rho( hsr,rho )
@@ -4272,7 +4376,7 @@ contains
 
     do is=1,nspin,(af+1)
        rho(:,is)=0.d0
-       do i=1,nsize_rho_hsr
+       do i=1,nsize_rho_hsr0
           rho(i,is) = hsr( imap_hsr(i),is )
        end do
     end do
@@ -4300,6 +4404,55 @@ contains
   end subroutine map_hsr_to_rho_noncl
 ! ===================================================================== 11.0
 
+  subroutine map_om_to_rho(om,rho)
+    real(kind=DP), intent(in) :: om(max2lp*max2lp*max_projs*natm,nspin)
+    real(kind=DP), intent(out) :: rho(nsize_rho_hsr,nspin)
+
+    integer :: i,is
+
+    do is=1,nspin,(af+1)
+       do i=nsize_rho_hsr0+1,nsize_rho_hsr
+          rho(i,is) = om(imap_om(i-nsize_rho_hsr0),is)
+       end do
+    end do
+  end subroutine map_om_to_rho
+
+  subroutine map_rho_to_om(om,rho)
+    real(kind=DP), intent(out) :: om(max2lp*max2lp*max_projs*natm,nspin)
+    real(kind=DP), intent(in) :: rho(nsize_rho_hsr,nspin)
+    integer :: i,is,ia,ig,ip,it,m1,m2
+
+    do is=1,nspin,(af+1)
+       do i=nsize_rho_hsr0+1,nsize_rho_hsr
+          om(imap_om(i-nsize_rho_hsr0),is) = rho(i,is)
+       end do
+    end do
+    call symmetrize(om)
+
+  contains
+
+    subroutine symmetrize(om)
+      real(kind=DP), intent(inout) :: om(max2lp,max2lp,max_projs,natm,nspin)
+
+      do is=1,nspin,(af+1)
+         do ia=1,natm
+            ig = iproj_group(ia)
+            if(ig<1) cycle
+            do i=1,num_proj_elems(ig)
+               ip=proj_group(i,ig)
+               it = proj_attribute(ip)%ityp
+               do m2=1,i2lp(ip)
+                  do m1=m2,i2lp(ip)
+                     if(m1/=m2) om(m2,m1,i,ia,is) = om(m1,m2,i,ia,is)
+                  end do
+               end do
+            end do
+         end do
+      end do
+    end subroutine symmetrize
+
+  end subroutine map_rho_to_om
+
   subroutine map_rho_to_hsr( hsr,rho )
     real(kind=DP), intent(out) :: hsr( natm *nlmt *nlmt, nspin )
     real(kind=DP), intent(in) :: rho( nsize_rho_hsr,nspin )
@@ -4308,8 +4461,8 @@ contains
 
 
     do is=1,nspin,(af+1)
-       hsr(1:nsize_rho_hsr,is) = 0.0d0
-       do i=1,nsize_rho_hsr
+       hsr(1:nsize_rho_hsr0,is) = 0.0d0
+       do i=1,nsize_rho_hsr0
           hsr(imap_hsr(i),is) = rho(i,is)
        end do
     end do
@@ -4517,4 +4670,27 @@ contains
                                                   __TIMER_SUB_STOP(1144)
   end subroutine m_CD_simple_mixing_hsr
 
+  subroutine m_CD_hsr_diff(nfout)
+    integer, intent(in) :: nfout
+    integer :: i,j,ndiag,nnondiag
+    real(kind=DP) :: sumhsr_diag,sumhsr_nondiag
+    sumhsr_diag = 0.d0
+    sumhsr_nondiag = 0.d0
+    ndiag = 0
+    nnondiag = 0
+    do i=1,nspin_m
+       do j=1,nsize_rho_hsr
+          if(diag_elem(j))then
+             sumhsr_diag = sumhsr_diag+abs(rhoo_hsr(j,i)-rho_hsr(j,i))
+             ndiag = ndiag+1
+          else
+             sumhsr_nondiag = sumhsr_nondiag+abs(rhoo_hsr(j,i)-rho_hsr(j,i))
+             nnondiag = nnondiag+1
+          endif
+       enddo
+    enddo
+    if(printable) write(nfout,'(a,f15.10)') '!** dhsr_diag   ',sumhsr_diag/dble(ndiag)
+    if(printable) write(nfout,'(a,f15.10)') '!** dhsr_nondiag',sumhsr_nondiag/dble(nnondiag)
+  end subroutine m_CD_hsr_diff
+
 end module m_CD_mixing
diff -uprN phase0_2015.01/src_phase_3d/m_CS_Magnetic.F90 phase0_2015.01.01/src_phase_3d/m_CS_Magnetic.F90
--- phase0_2015.01/src_phase_3d/m_CS_Magnetic.F90	2015-08-05 14:58:03.000000000 +0900
+++ phase0_2015.01.01/src_phase_3d/m_CS_Magnetic.F90	2016-07-12 12:51:53.000000000 +0900
@@ -21,7 +21,7 @@ module m_CS_Magnetic
 ! ============= 2014/08/14
 
 ! == KT_add === 2014/08/26
-  use m_Control_Parameters,   only : SpinOrbit_mode, noncol
+  use m_Control_Parameters,   only : SpinOrbit_mode, noncol, iprisym
   use m_Const_Parameters,    only : Neglected
   use m_Ionic_System,  only :   mag_moment0_atoms, ionic_charge_atoms, &
        &                        mag_moment0_atoms_is_defined
@@ -46,6 +46,8 @@ module m_CS_Magnetic
   integer, allocatable :: magmom_dir_inversion_opr_flag(:)
 ! ============= 2014/08/14
 
+  complex(kind=CMPLDP), allocatable :: op_spinor(:,:,:)
+
 contains
 
 ! ------------------------------------------------------------------------
@@ -1082,4 +1084,104 @@ contains
 
   end subroutine m_CS_set_inverse_operation
 
+  subroutine m_CS_set_op_spinor
+    integer :: i, j
+    real(kind=DP) :: k1, k2, k3, ux, uy, uz, c1, c2, c3
+    real(kind=DP) :: sinth, costh, theta, s1, determinant
+!
+    real(kind=DP), parameter :: delta = 1.0D-4
+    complex(kind=CMPLDP), parameter :: zi = ( 0.0d0, 1.0d0 )
+!
+    real(kind=DP), allocatable :: op_work(:,:,:)
+!
+    allocate( op_work(3,3,nopr) );     op_work = op
+    if ( .not. allocated( op_spinor ) ) allocate( op_spinor(2,2,nopr) )
+
+    Do i=1, nopr
+       call calc_determinant( op(:,:,i), determinant )
+       if ( determinant < 0 ) then
+          op_work(1,1,i) = -op_work(1,1,i)
+          op_work(2,2,i) = -op_work(2,2,i)
+          op_work(3,3,i) = -op_work(3,3,i)
+       endif
+
+       costh = ( op_work(1,1,i) +op_work(2,2,i) +op_work(3,3,i) -1.0d0 ) /2.0d0
+       sinth = sqrt( 1.0d0 -costh**2 )
+
+       theta = acos( costh )
+!
+       if ( sinth > delta ) then
+          k1 = ( op_work(3,2,i) -op_work(2,3,i) ) /2.0d0
+          k2 = ( op_work(1,3,i) -op_work(3,1,i) ) /2.0d0
+          k3 = ( op_work(2,1,i) -op_work(1,2,i) ) /2.0d0
+
+          ux = k1 /sinth;       uy = k2 /sinth;         uz = k3 /sinth
+       else
+          if ( costh > 1.0 -delta ) then
+             ux = 0.0d0;  uy = 0.0d0;   uz = 1.0d0
+             theta = 0.0d0
+          else if ( costh < -1.0 +delta ) then
+             k1 = ( op_work(1,1,i) + 1.d0 )/2.0d0
+             k2 = ( op_work(2,2,i) + 1.d0 )/2.0d0
+             k3 = ( op_work(3,3,i) + 1.d0 )/2.0d0
+
+             ux = sqrt(k1);   uy = sqrt(k2);     uz = sqrt(k3)
+
+             if ( uz > delta ) then
+                if ( op_work(1,3,i) < 0 )  ux = -ux
+                if ( op_work(2,3,i) < 0 )  uy = -uy
+             else
+                if ( op_work(1,2,i) < 0 )  uy = -uy
+             endif
+
+          endif
+       endif
+
+#if 0
+       write(*,*) "i = ", i, "costh = ", costh, "sinth = ", sinth
+       write(*,'(4(A,F10.2))') "Axis: ux = ", ux,  ", uy = ", uy,  ", uz = ", uz, &
+            &                  ", Angle = ", theta /PAI *180.0d0
+       write(*,*)
+#endif
+
+       c1 = cos( theta /2.0d0 );   s1 = sin( theta /2.0d0 );
+
+       op_spinor(1,1,i) = c1 -zi *uz *s1
+       op_spinor(1,2,i) = ( -zi *ux -uy ) *s1
+       op_spinor(2,1,i) = ( -zi *ux +uy ) *s1
+       op_spinor(2,2,i) = c1 +zi *uz *s1
+!
+       if ( determinant < 0 ) op_spinor(:,:,i) = zi *op_spinor(:,:,i)
+    End Do
+!
+    if ( iprisym > 1 ) then
+       write(nfout,*) '*** symmetry operation for spinor ***'
+       Do i=1, nopr
+          write(nfout,*) ' #symmetry op. = ', i
+          write(nfout,'(A,F8.4,A,F8.4,2A,F8.4,A,F8.4,A)') &
+               &                '( ', real(op_spinor(1,1,i)), ', ', &
+               &                      aimag(op_spinor(1,1,i)), ' I ) ',  &
+               &                '( ',real(op_spinor(1,2,i)), ', ', &
+               &                      aimag(op_spinor(1,2,i)), ' I )'
+          write(nfout,'(A,F8.4,A,F8.4,2A,F8.4,A,F8.4,A)') &
+               &                '( ', real(op_spinor(2,1,i)), ', ', &
+               &                      aimag(op_spinor(2,1,i)), ' I ) ',  &
+               &                '( ',real(op_spinor(2,2,i)), ', ', &
+               &                      aimag(op_spinor(2,2,i)), ' I )'
+       End Do
+    endif
+    deallocate( op_work )
+
+  contains
+
+    subroutine calc_determinant( a, determinant )
+      real(kind=DP), intent(in) :: a(3,3)
+      real(kind=DP), intent(out) :: determinant
+      determinant = a(1,1)*( a(2,2)*a(3,3) -a(2,3)*a(3,2) ) &
+          &       -a(1,2)*( a(2,1)*a(3,3) -a(2,3)*a(3,1) ) &
+           &       +a(1,3)*( a(2,1)*a(3,2) -a(2,2)*a(3,1) )
+    end subroutine calc_determinant
+
+  end subroutine m_CS_set_op_spinor
+
 end module m_CS_Magnetic
diff -uprN phase0_2015.01/src_phase_3d/m_Charge_Density.F90 phase0_2015.01.01/src_phase_3d/m_Charge_Density.F90
--- phase0_2015.01/src_phase_3d/m_Charge_Density.F90	2015-08-05 14:58:03.000000000 +0900
+++ phase0_2015.01.01/src_phase_3d/m_Charge_Density.F90	2016-07-12 12:51:53.000000000 +0900
@@ -1,6 +1,6 @@
 !=======================================================================
 !
-!  PROGRAM  PHASE/0 2015.01 ($Rev: 440 $)
+!  PROGRAM  PHASE/0 2015.01 ($Rev: 478 $)
 !
 !  MODULE: m_Charge_Density
 !
@@ -86,7 +86,7 @@
 #endif
 
 module m_Charge_Density
-! $Id: m_Charge_Density.F90 440 2015-08-03 07:18:01Z ktagami $
+! $Id: m_Charge_Density.F90 478 2016-03-12 12:28:48Z ktagami $
   use m_Const_Parameters,    only : BUCS, DP, PAI2, DIRECT,OFF,zi,SKIP &
        &                          , EXECUT,SIMPLE_CUBIC,BOHR,NO,ANTIFERRO &
        &                          , ANEW,RENEW,ON, SIMPLE,BROYD1,BROYD2,DFP,PULAY &
@@ -3033,6 +3033,7 @@ contains
   end subroutine m_CD_rd_chgq
 
 
+
   subroutine m_CD_wd_chgq(nfchgt,F_CHGT_partitioned)
     integer, intent(in) :: nfchgt
     logical, intent(in) :: F_CHGT_partitioned
@@ -5002,6 +5003,27 @@ contains
 ! =================================================================== 11.0
 
 
+  subroutine m_CD_keep_retrieve_hsr(keep)
+    logical, intent(in) :: keep
+    real(kind=DP),allocatable,dimension(:,:,:,:),save   :: hsr_tmp
+    real(kind=DP),allocatable,dimension(:,:,:,:),save   :: hsi_tmp
+    if(keep)then
+      if ( noncol ) then
+        if (.not.allocated(hsr_tmp)) allocate(hsr_tmp(natm,nlmt,nlmt,ndim_magmom)); hsr_tmp = hsr
+        if (.not.allocated(hsi_tmp)) allocate(hsi_tmp(natm,nlmt,nlmt,ndim_magmom)); hsi_tmp = hsi_tmp
+      else
+        if(.not.allocated(hsr_tmp)) allocate(hsr_tmp(natm,nlmt,nlmt,nspin)); hsr_tmp = hsr
+      endif
+    else
+      hsr = hsr_tmp
+      deallocate(hsr_tmp)
+      if ( noncol ) then
+        hsi = hsi_tmp
+        deallocate(hsi_tmp)
+      endif
+    endif
+  end subroutine m_CD_keep_retrieve_hsr
+
   subroutine m_CD_keep_chgq_l()
 ! =============================== modified y K. Tagami =============== 11.0
 !!    allocate(chgq_tmp(ista_kngp:iend_kngp,kimg,nspin)); chgq_tmp = 0.d0
diff -uprN phase0_2015.01/src_phase_3d/m_Const_Parameters.F90 phase0_2015.01.01/src_phase_3d/m_Const_Parameters.F90
--- phase0_2015.01/src_phase_3d/m_Const_Parameters.F90	2015-08-05 14:58:03.000000000 +0900
+++ phase0_2015.01.01/src_phase_3d/m_Const_Parameters.F90	2016-07-12 12:51:53.000000000 +0900
@@ -1,6 +1,6 @@
 !=======================================================================
 !
-!  PROGRAM  PHASE/0 2015.01 ($Rev: 440 $)
+!  PROGRAM  PHASE/0 2015.01 ($Rev: 472 $)
 !
 !  MODULE: m_Const_Parameters
 !
@@ -32,7 +32,7 @@
 !
 !***************************************************************
 module m_Const_Parameters
-! $Id: m_Const_Parameters.F90 440 2015-08-03 07:18:01Z ktagami $
+! $Id: m_Const_Parameters.F90 472 2015-11-28 09:01:17Z ktagami $
   implicit none
 
   integer, parameter :: DRIVER_GENERAL=0, DRIVER_CONSTRAINT=1, DRIVER_NEB=2, DRIVER_PHONON=3 &
@@ -363,6 +363,11 @@ end type unitlist
 
 integer, parameter :: BULK   = 1
 integer, parameter :: DEFECT = 2
+integer, parameter :: Positron_CONV = 1  ! zero-density limit, non-scf
+                                         !    calc p- wfns once
+integer, parameter :: Positron_GGGC = 2  ! zero-density limit, scf
+                                         !    update e- and p- wfns successively
+integer, parameter :: Positron_PSN  = 3  ! fully two-component scf
 
 ! ========================== KT_mod =================== 13.0B 
 !integer, parameter :: unit_list_size = 50
diff -uprN phase0_2015.01/src_phase_3d/m_Control_Parameters.F90 phase0_2015.01.01/src_phase_3d/m_Control_Parameters.F90
--- phase0_2015.01/src_phase_3d/m_Control_Parameters.F90	2015-09-14 15:38:12.000000000 +0900
+++ phase0_2015.01.01/src_phase_3d/m_Control_Parameters.F90	2016-07-12 12:51:53.000000000 +0900
@@ -1,6 +1,6 @@
 !=======================================================================
 !
-!  PROGRAM  PHASE/0 2015.01 ($Rev: 453 $)
+!  PROGRAM  PHASE/0 2015.01 ($Rev: 492 $)
 !
 !  MODULE: m_Control_Parameters
 !
@@ -49,7 +49,7 @@
 !
 module m_Control_Parameters
 !     (m_CtrlP)
-! $Id: m_Control_Parameters.F90 453 2015-09-01 05:22:55Z ktagami $
+! $Id: m_Control_Parameters.F90 492 2016-05-31 03:06:04Z jkoga $
 !
 ! This module "m_Control_Parameters" holds parameters that give
 ! methods and calculational conditions in jobs.
@@ -96,8 +96,7 @@ module m_Control_Parameters
        &, BARE, PE1, PE2 &
        &, MASK_FUNCTION, PREFITTING
 ! ==============================================================================
-  use m_Parallelization, only   : mpi_comm_group,npes,mype,ierr,nrank_e
-
+  use m_Parallelization, only   : mpi_comm_group,npes,mype,ierr,nrank_e,nrank_k
 ! ====================================added by K. Tagami =================5.0
   use m_Const_Parameters, only : OccMat_type1, OccMat_Type2, &
        &                         Ueff_From_First, Ueff_Gradually
@@ -109,13 +108,10 @@ module m_Control_Parameters
 ! ============================================================== 11.0
   use m_ErrorMessages,        only : INVALID_CHARGE_MIXING
 
-! ====== KT_add ========================================= 13.0E
-  use m_Const_Parameters,     only : FERMI_DIRAC, CONST_kB
-! ========= ============================================= 13.0E
-
-! ====== KT_add ========================================= 13.0U3
-  use m_Const_Parameters,     only : STEPWISE
-! ========= ============================================= 13.0U3
+! ====== KT_add ========================================= 13.0E, 13.0U3, positron
+  use m_Const_Parameters,     only : FERMI_DIRAC, CONST_kB, STEPWISE, &
+       &                             Positron_CONV, Positron_GGGC, Positron_PSN
+! ======================================================= 13.0E, 13.0U3, positron
 
   implicit none
   include 'mpif.h'
@@ -195,6 +191,7 @@ module m_Control_Parameters
   character(len("convergence_ek")),private, parameter :: tag_convergence_ek = "convergence_ek"
 
   integer ::            neg = 1       ! number of eigen values for each k-point
+  logical ::            neg_is_given = .false.
   integer ::            neg_previous  ! number of eigen values for each k-point in the previous job
   integer ::            neg_fixed = 0 ! number of eigen values that are fixed
   integer ::            num_extra_bands = 0 ! number of extra eigen values for each k-point.
@@ -230,6 +227,15 @@ module m_Control_Parameters
   real(kind=DP) ::      epsilon_ele
   integer ::            sw_positron_file = ON
   integer ::            positron_filetype = CUBE
+
+  integer ::  positron_method = Positron_CONV
+
+  character(len("positron_method")),private,parameter :: &
+       &             tag_positron_method    = "positron_method"
+  character(len("conv")),private,parameter ::         tag_conv    = "conv"
+  character(len("gggc")),private,parameter ::         tag_gggc    = "gggc"
+  character(len("psn")),private,parameter ::          tag_psn     = "psn"
+
   character(len=LEN_TITLE) ::    positron_title(5)
   data positron_title / &
        &  "positron density", "valence electron density", "e-p pair density" &
@@ -589,6 +595,7 @@ module m_Control_Parameters
 #endif
 
 ! === KT_add === 13.1R
+  integer            :: sw_raman = OFF
   integer            :: sw_phonon_with_epsilon = OFF
   integer            :: sw_calc_dielectric_tensor = OFF
 ! ============== 13.1R
@@ -676,6 +683,19 @@ module m_Control_Parameters
   integer :: sw_neglect_stress_offdiagonal = OFF
 ! ==== 2014/11/22
 
+! ==== EXP_CELLOPT === 2015/09/24
+! -----------------------
+! read nfchgt.data of previous cell
+! -----------------------
+  character(len("sw_read_nfchgt_prev_cell")),private,parameter :: &
+       &                 tag_sw_read_nfchgt_prev_cell = "sw_read_nfchgt_prev_cell"
+  character(len("sw_read_nfzaj_prev_cell")),private,parameter :: &
+       &                 tag_sw_read_nfzaj_prev_cell = "sw_read_nfzaj_prev_cell"
+
+  integer :: sw_read_nfchgt_prev_cell = OFF
+  integer :: sw_read_nfzaj_prev_cell = OFF
+! ==================== 2015/09/24
+
 ! -------------------
 ! symmetry during optimization
 ! ------------------
@@ -889,6 +909,7 @@ module m_Control_Parameters
   character(len("none")),private,parameter ::             tag_none = "none"
   character(len("0")),private,parameter ::              tag_0 = "0"
   character(len("unit_matrix")),private,parameter :: tag_unit_matrix = "unit_matrix"
+  character(len("spin_polarized")),private,parameter :: tag_spin_polarized = "spin_polarized"
 ! === For restart lm+MSD! by tkato 2012/02/16 ==================================
   character(len("dtim_previous")), private, parameter :: tag_dtim_previous = "dtim_previous"
 ! ==============================================================================
@@ -1131,6 +1152,7 @@ module m_Control_Parameters
 !!$  real(kind=DP),private ::      edelta_change_to_rmm   = 1.d-7
   real(kind=DP),private ::      edelta_change_to_rmm   = 1.d-3
   real(kind=DP),private ::      edelta_change_to_rmm_md  = 1.d-3
+  logical,private :: edelta_rmm_given = .false.
   integer,public ::            rmm_save_memory_mode = OFF
   character(len("rmm")),private,parameter ::                 tag_rmm               = "rmm"
   character(len("imGSrmm")),private,parameter ::             tag_imGSrmm           = "imgsrmm"
@@ -1180,11 +1202,17 @@ module m_Control_Parameters
   real(kind=DP) ::      damp = 1.d0
   integer ::            submat_period = 1
   real(kind=DP) ::      submat_critical_ratio = 1.d-15
+#ifdef _USE_SCALAPACK_
+  integer :: sw_scalapack = ON
+#else
   integer :: sw_scalapack = OFF
+#endif
 #ifdef _DEFAULT_HOUSEHOLDER_
   integer :: method_scalapack = HOUSEHOLDER
-#else
+#elif _DEFAULT_DIVIDEandCONQUER_
   integer :: method_scalapack = DIVIDEandCONQUER
+#else
+  integer :: method_scalapack = HOUSEHOLDER
 #endif
 !finteger :: block_size = 64
   integer :: block_size = 0
@@ -1475,12 +1503,16 @@ module m_Control_Parameters
                          & tag_sw_mix_charge_hardpart = "sw_mix_charge_hardpart"
   character(len("sw_mix_bothspins_sametime")), private, parameter :: &
                          & tag_sw_mix_bothspins_sametime = "sw_mix_bothspins_sametime"
+  character(len("sw_mix_occ_matrix")),private,parameter :: &
+                         & tag_sw_mix_occ_matrix = "sw_mix_occ_matrix"
 !!$  integer :: sw_mix_charge_hardpart = OFF
 !!$  integer :: sw_mix_bothspins_sametime = OFF
   integer :: sw_mix_charge_hardpart = OFF
   integer :: sw_mix_bothspins_sametime = ON
 !
 !
+  integer :: sw_mix_occ_matrix = OFF
+
   character(len("sw_force_simple_mixing_hsr")), private, parameter :: &
                  & tag_sw_force_simplemix_hsr = "sw_force_simple_mixing_hsr"
   character(len("sw_recomposing_hsr")), private, parameter :: &
@@ -1675,6 +1707,7 @@ module m_Control_Parameters
   integer,public ::        dos_method = Gauss_distrib_func
   integer,public ::        sw_dos_gaussdistrib = OFF
   integer,public ::        dos_subroutine = 5
+
   real(kind=DP),public ::  deltaE_dos = 1.d-4
   real(kind=DP),public ::  variance_dos_GaussD = 1.d-6
   integer,public ::        nwd_dos_window_width = 10
@@ -1793,7 +1826,17 @@ module m_Control_Parameters
   character(len("ry")),private,parameter :: tag_ry = "ry"
   character(len("rz")),private,parameter :: tag_rz = "rz"
 
-! ---- WaveFuction Squared
+! ---- WaveFunction Orb-projection
+  integer :: sw_print_wf_orb_projection = OFF
+  integer :: wf_orb_proj_print_format = 0
+  character(len("wf_orb_projection")),private,parameter ::  &
+       &            tag_wf_orb_projection       = "wf_orb_projection"
+  character(len("sw_print_wf_orb_projection")),private,parameter :: &
+       &       tag_sw_print_wf_orb_projection   = "sw_print_wf_orb_projection"
+  character(len("wf_orb_proj_print_format")),private,parameter :: &
+       &       tag_wf_orb_proj_print_format = "wf_orb_proj_print_format"
+
+! ---- WaveFunction Squared
   integer :: sw_wf_squared_rspace = OFF
   integer,public ::  wf_squared_filetype = CUBE
   integer,public ::  ik_wf_squared = 1
@@ -1838,7 +1881,7 @@ module m_Control_Parameters
   ! --- approximate DFT+U : Hubbard model ---
   integer :: sw_hubbard = OFF
   integer :: sw_constraint = OFF
-  integer :: initial_occmat = SPIN_POLARIZED
+  integer :: initial_occmat = OFF
   real(kind=DP) :: initial_occmat_factor=1.d0
   integer :: const_site = 0
   real(kind=DP) :: const_alpha = 0.d0
@@ -2120,7 +2163,6 @@ module m_Control_Parameters
   integer :: sw_remove_pcc_from_pawpot = off
 ! ================== 13.0Y
 
-  integer :: sw_rspace_hyb = OFF
   character(len("sw_eval_vexx")), private, parameter :: tag_sw_eval_vexx = "sw_eval_vexx"
   character(len("sw_retard_eigval_evaluation")), private, parameter :: &
    & tag_sw_retard_eigval_evaluation = "sw_retard_eigval_evaluation"
@@ -2355,6 +2397,13 @@ module m_Control_Parameters
   integer :: lmax_rsb = 3
   real(kind=DP) :: eps_rsb = 1.d-2
 
+! --- msb effect
+  character(len("msb")), private, parameter :: &
+       &                     tag_msb = "msb"
+  character(len("sw_calc_contact_density")), private, parameter :: &
+       &                     tag_sw_calc_contact_density = "sw_calc_contact_density"
+  integer :: sw_calc_contact_density = off
+
 ! ================= KT_add === 13.0S
 !-- CoreLevels
 !
@@ -2470,6 +2519,7 @@ module m_Control_Parameters
   integer :: nr12=3000,nk=1500
   real(kind=DP) :: maxk=10.d0,r12max=30.0d0
   logical :: oneshot = .true.
+  logical :: sw_save_memory_vdw = .true.
 
   logical :: force_exx_energy1=.false.
 
@@ -2568,6 +2618,19 @@ contains
     if(mype == 0 .or. ipriparadeb /= 0) printable = .true.
   end subroutine m_CtrlP_set_printable
 
+#ifdef _USE_SCALAPACK_
+  subroutine m_CtrlP_set_sw_scalapack(printable)
+    logical, intent(in) :: printable
+    if(nrank_k >=2) then
+       sw_scalapack = OFF
+       ! This is a tentative default setting until scalapack parallelization is completed for nrank_k>=2
+       if(printable) write(6,'(a)') ' default parameter of sw_scalapack is set off'
+    else
+       if(printable) write(6,'(a," : sw_scalapack = ",i3)') ' default parameter of sw_scalapack is not changed', sw_scalapack
+    end if
+  end subroutine m_CtrlP_set_sw_scalapack
+#endif
+
 #ifndef _EMPIRICAL_
   subroutine m_CtrlP_check_matm(nfout,natm)
     integer, intent(in) :: nfout,natm
@@ -2576,6 +2639,10 @@ contains
        if(printable) &
             & write(nfout,'(" !** rmm_precal_phase_matm(redefined) = ",i10," <<m_CtrlP_check_matm>>")') rmm_precal_phase_matm
     end if
+    if(.not.edelta_rmm_given)then
+       edelta_change_to_rmm = 1.d-3/dble(natm)
+       edelta_change_to_rmm_md = 1.d-3/dble(natm)
+    endif
   end subroutine m_CtrlP_check_matm
 
   subroutine alloc_w_solver(n)
@@ -3220,12 +3287,12 @@ contains
       logical :: tf
       call strncmp0(trim(rstr), tag_bulk, tf)
       if(tf) then
-         sw_positron = BULK
+         sw_positron = BULK;   positron_method = Positron_CONV
          goto 1001
       end if
       call strncmp0(trim(rstr), tag_defect, tf)
       if(tf) then
-         sw_positron = DEFECT
+         sw_positron = DEFECT;   positron_method = Positron_GGGC
          goto 1001
       end if
 1001  continue
@@ -3284,13 +3351,18 @@ contains
           gmaxp = gmax*2.d0
        end if
        if(sw_positron /= OFF) call getgmax_positron()
-       if( f_getIntValue( tag_num_bands, iret ) == 0) neg = iret
+       if( f_getIntValue( tag_num_bands, iret ) == 0) then
+          neg = iret
+          neg_is_given = .true.
+       end if
        if( f_selectBlock( tag_smearing) == 0) then
           if( f_getStringValue( tag_smearing_method, rstr,LOWER) == 0) then
              call set_smearing_method(rstr) ! way_of_smearing
           end if
-          if( f_getRealValue( tag_smearing_width, dret, "hartree") == 0) width = dret
-          if(way_of_smearing == TETRAHEDRON) width_tetra = width
+          if( f_getRealValue( tag_smearing_width, dret, "hartree") == 0) then
+             width = dret
+             if(way_of_smearing == TETRAHEDRON) width_tetra = width
+          endif
           if( f_selectBlock( tag_tetrahedron) == 0) then
              if( f_getIntValue( tag_dimension, iret ) == 0) idimtetra = iret
              if( f_getIntValue( tag_sw_correction, iret ) == 0) sw_correction = iret
@@ -3416,12 +3488,33 @@ contains
                 if( f_getIntValue( tag_f3, iret ) == 0) reduction_factor_exx(3) = iret
                 iret = f_selectParentBlock()
              end if
+             if(f_getStringValue(tag_functional_type,rstr,LOWER)==0)then
+                if(rstr.eq.tag_pbe0) then
+                   write(nfout,'(a)') ' !** functional_type : PBE0'
+                   alpha_exx = 0.25d0
+                   sw_screened_exchange = OFF
+                   sw_exchange_only = OFF
+                else if (rstr.eq.tag_hse06) then
+                   write(nfout,'(a)') ' !** functional_type : HSE06'
+                   alpha_exx = 0.25d0 
+                   omega_exx = 0.106d0
+                   sw_screened_exchange = ON
+                   sw_exchange_only = OFF
+                else if (rstr.eq.tag_hf) then
+                   write(nfout,'(a)') ' !** functional_type : HF (Hartree-Fock)'
+                   alpha_exx = 1.d0
+                   sw_screened_exchange = OFF
+                   sw_exchange_only = ON
+                else
+                   write(nfout,'(a)') ' !** WARNING : invalid functional_type : '//trim(rstr)
+                endif
+             endif
+
              if( f_getRealValue( tag_alpha, dret, "") == 0) alpha_exx = dret
              if( f_getRealValue( tag_omega, dret, "") == 0) omega_exx = dret
              if( f_getRealValue( tag_omega_hf, dret, "") == 0)  omega_exx = dret
              if( f_getRealValue( tag_omega_pbe, dret, "") == 0) omega_exx_pbe = dret
 
-             if( f_getIntValue(tag_sw_rspace,iret)==0 ) sw_rspace_hyb = iret
              if( f_getIntValue(tag_sw_eval_vexx,iret)==0 ) sw_eval_vexx = iret
              if( f_getIntValue(tag_sw_retard_eigval_evaluation,iret)==0 ) sw_retard_eigval_evaluation = iret
              if( f_getIntValue(tag_sw_precalculate,iret)==0 ) sw_precalculate = iret
@@ -3439,8 +3532,6 @@ contains
                 endif
              endif
 
-! ============================= KT_Test ============================ 12.5Exp
-! ------
              if( f_getIntValue( tag_truncate_vxw_updating, iret ) == 0 ) then
                 if(iret==ON) truncate_vxw_updating = .true.
              endif
@@ -3455,9 +3546,6 @@ contains
              hybrid_calc_is_active = .true.
 
              gmax_exx = gmax;   gmaxp_exx = gmaxp
-             if(nmax_G_hyb==-1) gmaxp_exx = gmax*2.d0
-             if(nmax_G_hyb==-2) gmaxp_exx = gmax*4.d0**(1.d0/3.d0)
-             if(nmax_G_hyb==-3) gmaxp_exx = gmax
 ! -----
              if ( f_getRealValue( tag_gmax_exx_ratio, dret, "") == 0 ) then
                 if( dret < 0.2D0 .or. dret > 1.0D0 ) then
@@ -3518,27 +3606,6 @@ contains
                 if(rstr.eq.tag_minimal)  potential_update = 2
              endif
 
-             if(f_getStringValue(tag_functional_type,rstr,LOWER)==0)then
-                if(rstr.eq.tag_pbe0) then
-                   write(nfout,'(a)') ' !** functional_type : PBE0'
-                   alpha_exx = 0.25d0
-                   sw_screened_exchange = OFF
-                   sw_exchange_only = OFF
-                else if (rstr.eq.tag_hse06) then
-                   write(nfout,'(a)') ' !** functional_type : HSE06'
-                   alpha_exx = 0.25d0 
-                   omega_exx = 0.106d0
-                   sw_screened_exchange = ON
-                   sw_exchange_only = OFF
-                else if (rstr.eq.tag_hf) then
-                   write(nfout,'(a)') ' !** functional_type : HF (Hartree-Fock)'
-                   alpha_exx = 1.d0
-                   sw_screened_exchange = OFF
-                   sw_exchange_only = ON
-                else
-                   write(nfout,'(a)') ' !** WARNING : invalid functional_type : '//trim(rstr)
-                endif
-             endif
              if(ipriinputfile>=1) then
                 write(nfout,'(" <<< Hybrid functional method >>>")')
                 if(sw_exchange_only==ON) &
@@ -3919,6 +3986,20 @@ contains
        end if
 
   ! ------- Positron start
+       iret = f_getStringValue(tag_positron_method,rstr,LOWER)
+       if( rstr == tag_CONV ) then
+          positron_method = Positron_CONV;          sw_positron = BULK
+       else if( rstr == tag_GGGC ) then
+          positron_method = Positron_GGGC;          sw_positron = DEFECT
+       else if( rstr == tag_PSN ) then
+          positron_method = Positron_PSN;           sw_positron = DEFECT
+          stop "Positron-PSN : Not implemented"
+       end if
+       if ( sw_positron /= OFF ) then
+          write(nfout,*) "!** Positron_method is set to ", positron_method
+          if ( gmax_positron < 0.01 ) call getgmax_positron()
+       endif
+
        if(sw_positron /= OFF) then
           npeg = 1
           num_extra_pev = 0
@@ -4541,6 +4622,7 @@ write(nfout,'(" !** sw_screening_correct
          if(f_getRealValue(tag_a1,dret,'')==0)     a1     = dret
          if(f_getRealValue(tag_a2,dret,'')==0)     a2     = dret
          if(f_getIntValue(tag_eval_kernel_by_interpolation,iret)==0) eval_kernel_by_interpolation = iret == ON
+         if(f_getIntValue(tag_save_memory_mode,iret)==0) sw_save_memory_vdw = iret == ON
          iret = f_selectParentBlock()
        endif
 
@@ -4576,7 +4658,6 @@ write(nfout,'(" !** sw_screening_correct
     subroutine set_initial_occmat(rstr)
       character(len=FMAXVALLEN),intent(in) :: rstr
       logical :: tf
-      initial_occmat = SPIN_POLARIZED
       call strncmp2(rstr, FMAXVALLEN, tag_off, len(tag_off), tf)
       if(.not.tf) call strncmp0(trim(rstr),tag_off, tf)
       if(.not.tf) call strncmp0(trim(rstr),tag_none,tf)
@@ -4592,6 +4673,12 @@ write(nfout,'(" !** sw_screening_correct
          goto 1001
       endif
 
+      call strncmp0(trim(rstr), tag_spin_polarized, tf)
+      if(tf) then
+         initial_occmat = SPIN_POLARIZED
+         goto 1001
+      endif
+
       call strncmp0(trim(rstr), tag_initial_es, tf)
       if(tf) then
          sw_initial_es = ON
@@ -4795,8 +4882,8 @@ write(nfout,'(" !** sw_screening_correct
   end subroutine m_CtrlP_rd_accuracy
 
 #ifndef _EMPIRICAL_
-  subroutine m_CtrlP_rd_wfsolver(nfout)
-    integer, intent(in) :: nfout
+  subroutine m_CtrlP_rd_wfsolver(nfout,natm)
+    integer, intent(in) :: nfout,natm
     integer :: f_selectBlock, f_getIntValue, f_getRealValue, f_getStringValue
     integer :: f_selectParentBlock, f_selectTop
     integer :: iret,i,ba
@@ -4821,10 +4908,8 @@ write(nfout,'(" !** sw_screening_correct
 
     ! determine the default value for the davidson-related variables
     if(neg/nrank_e<4) then
-       sw_divide_subspace=OFF
        sw_divide_subspace_changed = .true.
        sw_npartition_changed = .true.
-       if(printable) write(nfout,'(" !** REMARK: sw_divide_subspace is set to OFF ")')
     else
        npartition_david = neg/(nblock*nrank_e)
        if (npartition_david<1) npartition_david = 1
@@ -4834,7 +4919,7 @@ write(nfout,'(" !** sw_screening_correct
 
     if(.not.explict_solver)then
        tag_solver_of_WF_is_found = .true.
-       call configure_wf_solver(solver_set)
+       call configure_wf_solver(solver_set,natm)
        meg = neg
     endif
 
@@ -4991,6 +5076,11 @@ write(nfout,'(" !** sw_screening_correct
        end if
 
        ! ---- rmm ---
+       if( explict_solver )then
+          edelta_change_to_rmm = 1.e-3/dble(natm)
+          edelta_change_to_rmm_md = 1.e-3/dble(natm)
+       endif
+
        if( f_selectBlock( tag_rmm) == 0) then
           if(ipriinputfile >= 2 .and. printable) write(nfout,'(" !** -- tag_rmm is found --")')
           if( f_getIntValue(tag_imGSrmm, iret) == 0) imGSrmm = iret
@@ -5001,9 +5091,11 @@ write(nfout,'(" !** sw_screening_correct
           if( f_getRealValue(tag_edelta_change_to_rmm,dret,'hartree')==0) then
              edelta_change_to_rmm = dret
              edelta_change_to_rmm_md = dret
+             edelta_rmm_given = .true.
           endif
           if(f_getRealValue(tag_edelta_change_to_rmm_md,dret,'hartree')==0)then
              edelta_change_to_rmm_md = dret
+             edelta_rmm_given = .true.
           endif
           if( f_getIntValue(tag_save_memory_mode, iret) == 0) rmm_save_memory_mode = iret
           iret = f_selectParentBlock()
@@ -5073,6 +5165,11 @@ write(nfout,'(" !** sw_screening_correct
                 write(nfout,'(" !** before_renewal= OFF")')
              end if
           end if
+
+!!$!BRANCH_P 3D_Parallel
+!!$          if(nrank_k>=2) sw_scalapack = OFF
+!!$             ! This is a tentative default setting until scalapack parallelization is completed for nrank_k>=2
+!!$!BRANCH_P_END 3D_Parallel
           if( f_selectBlock( tag_scalapack) == 0) then
              if( f_getIntValue(tag_sw_scalapack, iret) == 0) sw_scalapack = iret
 #ifndef _USE_SCALAPACK_
@@ -5202,8 +5299,11 @@ write(nfout,'(" !** sw_screening_correct
           iret = f_selectParentBlock()
        else
           max_subspace_size = 4*neg ! default value
-          if(ipriinputfile >= 2 .and. printable) &
-               & write(nfout,'(" !* tag_davidson is not found")')
+!!$          if(ipriinputfile >= 2 .and. printable) &
+          if(ipriinputfile >= 1 .and. printable) then
+             write(nfout,'(" !* tag_davidson is not found")')
+             write(nfout,'(" !** max_subspace_size = ",i6)') max_subspace_size
+          end if
        end if
        
        ! ---- Modified Davidson ---      
@@ -5321,19 +5421,19 @@ write(nfout,'(" !** sw_screening_correct
 1001  continue
     end subroutine set_energy_evaluation
 
-    subroutine configure_wf_solver(solver_set)
-       integer, intent(in) :: solver_set
+    subroutine configure_wf_solver(solver_set,natm)
+       integer, intent(in) :: solver_set,natm
        integer :: i
        if(solver_set == LMM_RMM)then
           call alloc_w_solver(2)
           w_solver(1)%solver = lmMSD
           w_solver(1)%subspace_rotation = ON
-          w_solver(1)%till_n_iter = 2
+          w_solver(1)%till_n_iter = 5
           w_solver(2)%solver = RMM3
           w_solver(2)%till_n_iter = -1
           w_solver(2)%subspace_rotation = ON
-          edelta_change_to_rmm = 1.d-4
-          edelta_change_to_rmm_md = 1.d-4
+          edelta_change_to_rmm = 1.d-4/dble(natm)
+          edelta_change_to_rmm_md = 1.d-4/dble(natm)
           n_WF_solvers_before = 2
           n_WF_solvers_after = 0
           n_WF_solvers_all = n_WF_solvers_before + n_WF_solvers_after
@@ -5342,18 +5442,25 @@ write(nfout,'(" !** sw_screening_correct
           call alloc_w_solver(4)
           if(icond==INITIAL .or. icond==CONTINUATION .or. icond==AUTOMATIC)then
              w_solver(1)%solver = MDDAVIDSON
+             w_solver(3)%solver = MDDAVIDSON
           else
              w_solver(1)%solver = MDKOSUGI
+             w_solver(3)%solver = MDKOSUGI
+          endif
+          if(sw_hubbard==ON) then
+             w_solver(1)%solver = MDKOSUGI
+             w_solver(3)%solver = MDKOSUGI
           endif
-          if(sw_hubbard==ON) w_solver(1)%solver = MDKOSUGI
-
 ! === KT_add === 2015/01/05
-          if ( noncol ) w_solver(1)%solver = MDDAVIDSON
+          if ( noncol ) then
+             w_solver(1)%solver = MDDAVIDSON
+             w_solver(3)%solver = MDDAVIDSON
+          endif
 ! ============== 2015/01/05
 
 !          w_solver(1)%solver = MDKOSUGI
           w_solver(1)%subspace_rotation = ON
-          w_solver(1)%till_n_iter = 2
+          w_solver(1)%till_n_iter = 5
           w_solver(1)%precon = ON
           w_solver(1)%before_or_after_convergence = BEFORE
           w_solver(2)%solver = RMM3
@@ -5361,9 +5468,9 @@ write(nfout,'(" !** sw_screening_correct
           w_solver(2)%till_n_iter = -1
           w_solver(2)%subspace_rotation = ON
           w_solver(2)%before_or_after_convergence = BEFORE
-          w_solver(3)%solver = MDDAVIDSON
+          !!w_solver(3)%solver = MDDAVIDSON
           w_solver(3)%subspace_rotation = ON
-          w_solver(3)%till_n_iter = 1
+          w_solver(3)%till_n_iter = 5
           w_solver(3)%precon = ON
           w_solver(3)%before_or_after_convergence = AFTER
           w_solver(4)%solver = RMM3
@@ -5371,8 +5478,8 @@ write(nfout,'(" !** sw_screening_correct
           w_solver(4)%till_n_iter = -1
           w_solver(4)%subspace_rotation = ON
           w_solver(4)%before_or_after_convergence = AFTER
-          edelta_change_to_rmm = 1.d-3
-          edelta_change_to_rmm_md = 1.d-3
+          edelta_change_to_rmm = 1.d-3/dble(natm)
+          edelta_change_to_rmm_md = 1.d-3/dble(natm)
           n_WF_solvers_before = 2
           n_WF_solvers_after = 2
           n_WF_solvers_all = n_WF_solvers_before + n_WF_solvers_after
@@ -5416,15 +5523,15 @@ write(nfout,'(" !** sw_screening_correct
           !!$if (sw_hubbard==ON.or.nspin>1) sw_divide_subspace=OFF
           if (printable) write(nfout,'(" !** applied wavefunction solver set : davidson")')
        endif
-       if (n_WF_solvers_before>1)then
-          if(intzaj == by_matrix_diagon)then
-             do i=1,n_WF_solvers_before-1
-                w_solver(i)%till_n_iter = i+1
-             enddo
-          else
-             w_solver(1)%till_n_iter = 1
-          endif
-       endif
+       !if (n_WF_solvers_before>1)then
+       !   if(intzaj == by_matrix_diagon)then
+       !      do i=1,n_WF_solvers_before-1
+       !         w_solver(i)%till_n_iter = i+4
+       !      enddo
+       !   else
+       !      w_solver(1)%till_n_iter = 5
+       !   endif
+       !endif
     end subroutine configure_wf_solver
 
     subroutine set_wfsolvers(prealloc,msol,nbase,ba,iret)
@@ -5947,9 +6054,19 @@ write(nfout,'(" !** sw_screening_correct
              write(nfout,*) '!** sw_neglect_stress_offdiagonal is  ', iret
           endif
 ! === 2014/11/22
-
 ! ======== 13.1AS
 
+! === EXP_CELLOPT === 2015/09/24
+          if ( f_getIntValue( tag_sw_read_nfchgt_prev_cell, iret ) ==0 ) then
+             sw_read_nfchgt_prev_cell = iret
+             write(nfout,*) '!** sw_read_nfchgt_prev_cell is  ', iret
+          endif
+          if ( f_getIntValue( tag_sw_read_nfzaj_prev_cell, iret ) ==0 ) then
+             sw_read_nfzaj_prev_cell = iret
+             write(nfout,*) '!** sw_read_nfzaj_prev_cell is  ', iret
+          endif
+! ================== 2015/09/24
+
           iret = f_selectParentBlock()
        endif
 
@@ -6179,6 +6296,8 @@ write(nfout,'(" !** sw_screening_correct
        endif
 ! ======================================================================= 5.0
 
+       if(f_getIntValue(tag_sw_mix_occ_matrix,iret)==0) sw_mix_occ_matrix = iret
+
 ! ================================ added by K. Tagami ================== 11.0
        if (f_getIntValue( tag_sw_mix_imaginary_hardpart,iret ) == 0 )  then
           sw_mix_imaginary_hardpart = iret
@@ -7311,6 +7430,23 @@ write(nfout,'(" !** sw_screening_correct
           iret = f_selectParentBlock()
        end if
 
+       if ( f_selectBlock( tag_wf_orb_projection ) == 0 ) then
+          if ( f_getIntValue( tag_sw_print_wf_orb_projection, iret ) == 0 ) then
+             sw_print_wf_orb_projection = iret
+          endif
+          if ( f_getIntValue( tag_wf_orb_proj_print_format, iret ) == 0 ) then
+             if ( iret < 0 .or. iret > 1 ) iret = 0
+             wf_orb_proj_print_format = iret
+          endif
+          if (ipriinputfile >= 1) then
+             write(nfout,'(A,I6)') " !** sw_print_wf_orb_projection = ", &
+                  &                   sw_print_wf_orb_projection
+             write(nfout,'(A,i6,A)') "!** wf_orb_proj_print_format = ", &
+                  &        wf_orb_proj_print_format, " ( 0: {l m t}, 1: {j l mj t} )"
+          endif
+          iret = f_selectParentBlock()
+       endif
+
        if( f_selectBlock( tag_wf_squared ) == 0 ) then
           if( f_getIntValue( tag_sw_wf_squared_rspace, iret) == 0) &
                &                             sw_wf_squared_rspace = iret
@@ -7340,6 +7476,14 @@ write(nfout,'(" !** sw_screening_correct
           iret = f_selectParentBlock()
        endif
 
+       if( f_selectBlock( tag_msb ) == 0) then
+          if( f_getIntValue( tag_sw_calc_contact_density, iret) == 0) then
+             sw_calc_contact_density = iret
+             write(nfout,*) "!** sw_calc_contact_density is ", iret
+          endif
+          iret = f_selectParentBlock()
+       endif
+
        if( f_selectBlock( tag_elf) == 0) then
           if(ipriinputfile >= 2) write(nfout,'(" !*  tag_elf")')
           if( f_getIntValue( tag_sw_elf, iret) == 0) sw_elf = iret
@@ -8960,9 +9104,9 @@ write(nfout,'(" !** sw_screening_correct
        if(printable) write(6,'(" iconvergence_previous_job is reset " &
             & ,i2,", because neg_previous < neg")') iconvergence_previous_job
     end if
-    if(sw_optimize_lattice==ON)then
-       iconvergence_previous_job = 0
-    endif
+    !if(sw_optimize_lattice==ON)then
+    !   iconvergence_previous_job = 0
+    !endif
   end subroutine m_CtrlP_rd_iconvergence
 
   subroutine m_CtrlP_reset_iconvergence
@@ -10862,6 +11006,7 @@ write(nfout,'(" !** sw_screening_correct
     else
        neg = int(t*(1+t**p))
     end if
+    neg_is_given = .true.
   end subroutine m_CtrlP_set_neg_properly
 
   subroutine m_CtrlP_flag_mpi_G_dot_R(nfout,nbmx)
diff -uprN phase0_2015.01/src_phase_3d/m_Crystal_Structure.F90 phase0_2015.01.01/src_phase_3d/m_Crystal_Structure.F90
--- phase0_2015.01/src_phase_3d/m_Crystal_Structure.F90	2015-08-05 14:58:03.000000000 +0900
+++ phase0_2015.01.01/src_phase_3d/m_Crystal_Structure.F90	2016-07-12 12:51:53.000000000 +0900
@@ -1,6 +1,6 @@
 !=======================================================================
 !
-!  PROGRAM  PHASE/0 2015.01 ($Rev: 440 $)
+!  PROGRAM  PHASE/0 2015.01 ($Rev: 464 $)
 !
 !  MODULE: m_Crystal_Structure
 !
@@ -33,7 +33,7 @@
 !
 module m_Crystal_Structure
 !      (m_CS)
-! $Id: m_Crystal_Structure.F90 440 2015-08-03 07:18:01Z ktagami $
+! $Id: m_Crystal_Structure.F90 464 2015-09-23 14:29:53Z ktagami $
 !!$  use m_Files,              only : nfout,nfopgr,nfmatbp &
   use m_Timing,             only : tstatc0_begin, tstatc0_end
   use m_Control_Parameters, only : ipri, af, m_CtrlP_set_af, m_CtrlP_set_nspin_and_af &
@@ -272,8 +272,10 @@ module m_Crystal_Structure
   character(len("level_of_projection_paw_charge")), private, parameter :: &
        &             tag_level_projection_paw_charge = "level_of_projection_paw_charge"
 
+! --- magnetic moment
   character(len("axis")),private,parameter :: tag_axis = "axis"
   character(len("direction")),private,parameter :: tag_direction = "direction"
+  character(len("moment")),private,parameter :: tag_moment = "moment"
   character(len("magnetic_moment")),private,parameter :: tag_magnetic_moment &
        &                                       = "magnetic_moment"
 
@@ -384,18 +386,31 @@ module m_Crystal_Structure
   character(len("edelta_change_lambda_last")), private, parameter :: &
        &                 tag_edelta_change_lambda_last = "edelta_change_lambda_last"
 !
-  character(len("max_iterations_constraint")), private, parameter :: &
-       &                 tag_max_iterations_constraint = "max_iterations_constraint"
+  character(len("max_iter_elec_mag_constraint")), private, parameter :: &
+       &                 tag_max_iter_elec_mag_constr = "max_iter_elec_mag_constraint"
+  character(len("max_iter_ion_mag_constraint")), private, parameter :: &
+       &                 tag_max_iter_ion_mag_constr = "max_iter_ion_mag_constraint"
+  character(len("max_iter_cell_mag_constraint")), private, parameter :: &
+       &                 tag_max_iter_cell_mag_constr = "max_iter_cell_mag_constraint"
 !
+  character(len("sw_fix_charge_after_constraint")), private, parameter :: &
+       &              tag_sw_fix_charge_after_constr = "sw_fix_charge_after_constraint"
+
   integer, parameter :: nmax_intermid_lambda = 100
 !
   integer :: sw_magnetic_constraint = OFF
   integer :: mag_constraint_type = 0
-  integer :: damping_method_mag_constraint = 0
-  integer :: num_intermid_lambda = 0
-  integer :: max_iterations_mag_constraint = 0
+  integer :: damping_method_mag_constraint = ABRUPT
+  integer :: num_intermid_lambda = 2
+!
+  integer :: max_iter_elec_mag_constraint = 50
+  integer :: max_iter_ion_mag_constraint = 1
+!  integer :: max_iter_cell_mag_constraint = 100
+  integer :: max_iter_cell_mag_constraint = 1
+
+  integer :: sw_fix_charge_after_constraint = OFF
 !
-  real(kind=DP) :: mag_constraint_lambda = 0.0d0
+  real(kind=DP) :: mag_constraint_lambda = 0.20d0
   real(kind=DP) :: edelta_change_lambda_first = 1.0D-4     ! hartree
   real(kind=DP) :: edelta_change_lambda_last = 1.0D-4     ! hartree
 ! ======================================================== 13.0U
@@ -1294,9 +1309,9 @@ contains
       if ( damping_method_mag_constraint == ABRUPT .or. &
            &  damping_method_mag_constraint == LINEAR ) then
 
-         if ( f_getIntValue( tag_max_iterations_constraint, iret ) == 0 ) then
-            if ( iret < 0 ) max_iterations_mag_constraint = 0
-            max_iterations_mag_constraint = iret
+         if ( f_getIntValue( tag_max_iter_elec_mag_constr, iret ) == 0 ) then
+            if ( iret < 0 ) max_iter_elec_mag_constraint = 0
+            max_iter_elec_mag_constraint = iret
          endif
       endif
 
@@ -1313,10 +1328,27 @@ contains
 
       if ( damping_method_mag_constraint == ABRUPT .or. &
            &  damping_method_mag_constraint == LINEAR ) then
-         write(nfout,*) '! max_iterations_mag_constraint is ', &
-              &            max_iterations_mag_constraint
+         write(nfout,*) '! max_iter_elec_mag_constraint is ', &
+              &            max_iter_elec_mag_constraint
       endif
 
+      if ( f_getIntValue( tag_max_iter_ion_mag_constr, iret ) == 0 ) then
+         if ( iret < 0 ) max_iter_ion_mag_constraint = 0
+         max_iter_ion_mag_constraint = iret
+      endif
+      if ( f_getIntValue( tag_max_iter_cell_mag_constr, iret ) == 0 ) then
+         if ( iret < 0 ) max_iter_cell_mag_constraint = 0
+         max_iter_cell_mag_constraint = iret
+      endif
+
+      if ( f_getIntValue( tag_sw_fix_charge_after_constr, iret ) == 0 ) then
+         sw_fix_charge_after_constraint = iret
+         write(nfout,*) '! sw_fix_charge_after_constraint is ', &
+              &             sw_fix_charge_after_constraint
+      endif
+
+      write(nfout,*) '! max_iter_ion_mag_constraint is ', max_iter_ion_mag_constraint
+      write(nfout,*) '! max_iter_cell_mag_constraint is ', max_iter_cell_mag_constraint
 
       write(nfout,*) '! *********************************************** '
 
@@ -1356,6 +1388,9 @@ contains
                if ( f_getRealValue( tag_norm, dret, "" ) == 0 ) then
                   norm = dret
                end if
+               if ( f_getRealValue( tag_moment, dret, "" ) == 0 ) then
+                  norm = dret
+               end if
                if ( f_getRealValue( tag_theta, dret, "" ) == 0 ) then
                   theta = dret
                end if
@@ -1373,8 +1408,10 @@ contains
 
          else
             if ( f_getRealValue( tag_norm, dret, "" ) == 0 ) then
-               norm = dret
-               mag_moment0_global(1) = norm
+               norm = dret;   mag_moment0_global(1) = norm
+            end if
+            if ( f_getRealValue( tag_moment, dret, "" ) == 0 ) then
+               norm = dret;   mag_moment0_global(1) = norm
             end if
 
          endif
@@ -1410,7 +1447,8 @@ contains
       mag_direc0_global(2) = 0.0d0
       mag_direc0_global(3) = 1.0d0
 
-      if ( f_selectBlock( tag_direction ) == 0 ) then
+      if ( f_selectBlock( tag_direction ) == 0 .or. &
+           &  f_selectBlock( tag_magnetic_moment ) == 0 ) then
 
          if( f_getRealValue( tag_mdx, dret, '') == 0 ) then
             mdx = dret;   Flag = 1
@@ -1421,6 +1459,16 @@ contains
          if( f_getRealValue( tag_mdz, dret, '') == 0 ) then
             mdz = dret;   Flag = 1
          endif
+
+         if( f_getRealValue( tag_mx, dret, '') == 0 ) then
+            mdx = dret;   Flag = 1
+         endif
+         if( f_getRealValue( tag_my, dret, '') == 0 ) then
+            mdy = dret;   Flag = 1
+         endif
+         if( f_getRealValue( tag_mz, dret, '') == 0 ) then
+            mdz = dret;   Flag = 1
+         endif
          cnorm = sqrt( mdx**2 + mdy**2 + mdz**2 )
 
          if ( abs(cnorm) > 1.0E-4 ) then
diff -uprN phase0_2015.01/src_phase_3d/m_ES_ExactExchange.F90 phase0_2015.01.01/src_phase_3d/m_ES_ExactExchange.F90
--- phase0_2015.01/src_phase_3d/m_ES_ExactExchange.F90	2015-08-05 14:58:03.000000000 +0900
+++ phase0_2015.01.01/src_phase_3d/m_ES_ExactExchange.F90	2016-07-12 12:51:53.000000000 +0900
@@ -1,7 +1,7 @@
 #define HYBRID_DGEMM
 !=======================================================================
 !
-!  PROGRAM  PHASE/0 2015.01 ($Rev: 440 $)
+!  PROGRAM  PHASE/0 2015.01 ($Rev: 485 $)
 !
 !  MODULE: m_ES_ExactExchange
 !
@@ -36,13 +36,14 @@ module m_ES_ExactExchange
   use m_Electronic_Structure,only: totch,zaj_l,occup_l,neordr,eko_l,vnlph_l,fsr_l,fsi_l
   use m_NonLocal_Potential, only : snl
   use m_PlaneWaveBasisSet,  only : ngabc,igf,kg1,kg,kgp,nbase,nbmx,iba,m_pwBS_kinetic_energies &
-       &                         , nbase_gamma,igfp_l,ngpt_l,m_pwBS_sphrp_exx,kgp_exx, igfp_exx
+       &                         , nbase_gamma,igfp_l,ngpt_l,m_pwBS_sphrp_exx,kgp_exx, igfp_exx, n_rGpv
   use m_Kpoints,            only : kv3,vkxyz,kv3_ek,vkxyz_ek,k_symmetry,qwgt,qwgt_ek,mp_index &
        &                         , m_Kp_sample_mesh, kshift
   use m_FFT,                only : nfft,fft_box_size_WF &
        &                         , m_FFT_alloc_WF_work &
        &                         , m_FFT_dealloc_WF_work &
        &                         , m_FFT_WF, fft_box_size_CD, m_FFT_CD0 &
+       &                         , m_FFT_CD0_exx &
        &                         , nfftp_exx_nonpara, fft_box_size_CD_exx
   use m_Ionic_System,       only : zeta1, qex, iatom, ntyp, natm, ityp, pos, iwei, napt
   use m_PseudoPotential,    only : qitg_l, iqitg, nqitg, m_PP_include_vanderbilt_pot &
@@ -50,14 +51,14 @@ module m_ES_ExactExchange
        &                         , nlmta,lmta,lmtt,ltp,mtp,taup,nloc,m_PP_find_maximum_l &
        &                         , m_PP_tell_lmtt_l_m_tau &
        &                         , m_PP_set_index_arrays1,m_PP_set_index_arrays2 &
-       &                         , radr,xh,rmax,nmesh,mmesh, qrspspw
+       &                         , radr,xh,rmax,nmesh,mmesh, qrspspw, nlmtt
  
   use m_Files,              only : nfout
   use m_Timing,             only : tstatc0_begin,tstatc0_end
   use m_Control_Parameters, only : nspin,ipri,kimg,neg,printable,af,ekmode &
        &                         , alpha_exx,omega_exx,sw_screened_exchange  &
        &                         , sw_singular_correction,reduction_factor_exx &
-       &                         , way_ksample,m_CtrlP_cachesize,sw_rspace_hyb &
+       &                         , way_ksample,m_CtrlP_cachesize &
        &                         , sw_precalculate,sw_rsb,potential_update 
   use m_Crystal_Structure,  only : univol,altv,rltv,nopr,tau,op
   ! nopr : # of operations
@@ -70,6 +71,8 @@ module m_ES_ExactExchange
        &                         , nrank_e,nrank_k,myrank_e,map_e,ista_e,iend_e,istep_e,idisp_e &
        &                         , map_z,np_e,mpi_k_world,mpi_e_world,myrank_k,map_k,ista_k,iend_k &
        &                         , ista_kg1_k, np_kg1_k, mp_kg1_k &
+!       &                         , m_Parallel_mpi_nval,np_nval, mp_nval,ista_kngp_exx,iend_kngp_exx &
+       &                         , ista_kngp_exx,iend_kngp_exx &
 #ifdef TRANSPOSE
        &                         , ierr,mp_e,nis_e,nie_e,nel_e
 #else
@@ -85,13 +88,17 @@ module m_ES_ExactExchange
                               , ista_fs, iend_fs, np_fs, nrank_g, nel_fft_x, nel_fft_y, nel_fft_z &
                               , xyz_fft_x, xyz_fft_y, xyz_fft_z, mp_g1k, kg1_ext, myrank_g, map_fft_x, map_fft_y &
        &                          , fft_X_x_nel, fft_X_y_nel, fft_X_z_nel &
-                              , is_kngp, ie_kngp, mp_fft_x, nel_kngp, np_kngp
+                              , is_kngp, ie_kngp, mp_fft_x, nel_kngp, np_kngp &
+       &                      , m_Parallel_init_mpi_nval &
+       &                      , ista_nval,iend_nval,np_nval,mp_nval,map_nval,map_z_nval,myrank_nval
   use m_FFT, only : m_FFT_Inverse_3D, m_FFT_Direct_3D
 ! === FFT xzy ==================================================================
   use m_FFT, only : m_FFT_Inverse_XYZ_3D, m_FFT_Direct_XYZ_3D
   use m_Control_Parameters,  only : sw_fft_xzy
 ! ==============================================================================
 
+  use m_Realspace, only : nmesh_rs_aug_max,nmesh_rs_aug,meshx_rs_aug,meshy_rs_aug,meshz_rs_aug,meshxyz_rs_aug &
+  & , qr_clm_ylm,dqr_clm_ylm,plmt1,plmt2,nlmtpair
 
   implicit none
 
@@ -100,7 +107,7 @@ module m_ES_ExactExchange
   integer :: nval_old
   integer :: nfftwf
   integer :: ntrs ! ntrs = 1, if TRS is used. Otherwise, ntrs=0
-  integer, allocatable :: ngpt_exx(:,:,:) ! d(kgp,nopr,0:ntrs)
+  integer, allocatable :: ngpt_exx(:,:,:) ! d(kg,nopr,0:ntrs)
   real(kind=DP), allocatable :: qitg_exx(:,:,:) ! d(kgp,nqitg,nqmk)
   real(kind=DP), allocatable :: ylm_exx(:,:,:) ! d(kgp,maxylm,nqmk)
   real(kind=DP), allocatable :: vc(:,:) ! d(kgp,nqmk)
@@ -195,6 +202,8 @@ module m_ES_ExactExchange
   real(kind=DP), allocatable, dimension(:,:,:,:,:) :: fsrqm,fsiqm
 
   real(kind=DP), allocatable, dimension(:,:,:,:) :: exx_potential
+  integer, save :: id_sname_cdfft = -1
+  integer, allocatable, dimension(:,:) :: ngabc_red
   include 'mpif.h'                                      ! MPI
 
 contains
@@ -470,6 +479,7 @@ contains
   end subroutine m_ES_EXX_move_k_into_fbz
 
   subroutine m_ES_EXX_init0
+    integer :: ii,ie
     if(potential_update>0) then
        if(.not.allocated(exx_potential)) &
        & allocate(exx_potential(maxval(np_g1k),np_e,ista_k:iend_k,kimg))
@@ -479,7 +489,7 @@ contains
 
   subroutine m_ES_EXX_init
     implicit none
-    integer :: ik, ikbz, ii,ierr
+    integer :: ik, ikbz, ii,ierr,ie
     real(kind=DP) :: dk(3)
     integer,save  :: id_sname = -1
     call tstatc0_begin('m_ES_EXX_init ',id_sname,level=1)
@@ -636,10 +646,8 @@ contains
     deallocate(occup_val)
     deallocate(ngpt_exx)
     if(modnrm == EXECUT) then
-       if(sw_rspace_hyb==OFF) then
-         deallocate(ylm_exx)
-         deallocate(qitg_exx)
-       endif
+       deallocate(ylm_exx)
+       deallocate(qitg_exx)
        deallocate(fsr_exx)
        deallocate(fsi_exx)
     end if
@@ -658,11 +666,18 @@ contains
     real(kind=DP), allocatable :: efsr_l(:,:) ! d(np_e,nlmta)
     real(kind=DP), allocatable :: efsi_l(:,:) ! d(np_e,nlmta)
 
+    real(kind=DP), allocatable :: zaj_buf(:,:),zaj_buf2(:,:),wfvv(:,:,:,:)
+    integer :: ig,iadd
+    integer :: ip0,ip1
+
     logical :: trans
+    integer, allocatable, dimension(:) :: ista
     integer,save  :: id_sname = -1
 
     if(sw_update_wfv==OFF) return
 
+    allocate(ista(MPI_STATUS_SIZE))
+
     trans = .true.
     if(present(transform)) trans = transform
     call tstatc0_begin('m_ES_EXX_gather_valence_states ',id_sname,level=1)
@@ -693,6 +708,7 @@ contains
        nval = ibm
     end if
 
+
     if(allocated(wfv)) deallocate(wfv)
     if(allocated(occup_val)) deallocate(occup_val)
     allocate(wfv(maxval(np_g1k),nval,kv3,kimg))
@@ -729,41 +745,37 @@ contains
        deallocate(efsi_l)
     end if
 
-    if(npes>1) then
-       allocate(wfv_mpi(maxval(np_g1k),nval,kv3,kimg))
-       allocate(occup_val_mpi(nval,kv3))
-       call mpi_allreduce(wfv,wfv_mpi,maxval(np_g1k)*nval*kv3*kimg,MPI_DOUBLE_PRECISION,MPI_SUM,mpi_kg_world,ierr)
-       wfv = wfv_mpi
-       call mpi_allreduce(wfv,wfv_mpi,maxval(np_g1k)*nval*kv3*kimg,MPI_DOUBLE_PRECISION,MPI_SUM,mpi_ge_world,ierr)
-       call mpi_allreduce(occup_val,occup_val_mpi,nval*kv3,MPI_DOUBLE_PRECISION,MPI_SUM,mpi_kg_world,ierr)
-       occup_val = occup_val_mpi
-       call mpi_allreduce(occup_val,occup_val_mpi,nval*kv3,MPI_DOUBLE_PRECISION,MPI_SUM,mpi_ge_world,ierr)
-       wfv = wfv_mpi
-       occup_val = occup_val_mpi
-       deallocate(wfv_mpi)
-       deallocate(occup_val_mpi)
-       if(modnrm == EXECUT) then
-          allocate(fsr_mpi(nval,nlmta,kv3))
-          allocate(fsi_mpi(nval,nlmta,kv3))
-          call mpi_allreduce(fsr_exx,fsr_mpi,nval*nlmta*kv3,MPI_DOUBLE_PRECISION,MPI_SUM,mpi_kg_world,ierr)
-          fsr_exx = fsr_mpi
-          call mpi_allreduce(fsr_exx,fsr_mpi,nval*nlmta*kv3,MPI_DOUBLE_PRECISION,MPI_SUM,mpi_ge_world,ierr)
-          call mpi_allreduce(fsi_exx,fsi_mpi,nval*nlmta*kv3,MPI_DOUBLE_PRECISION,MPI_SUM,mpi_kg_world,ierr)
-          fsi_exx = fsi_mpi
-          call mpi_allreduce(fsi_exx,fsi_mpi,nval*nlmta*kv3,MPI_DOUBLE_PRECISION,MPI_SUM,mpi_ge_world,ierr)
-          fsr_exx = fsr_mpi
-          fsi_exx = fsi_mpi
-          deallocate(fsr_mpi)
-          deallocate(fsi_mpi)
-       end if
+    allocate(occup_val_mpi(nval,kv3))
+    allocate(wfv_mpi(maxval(np_g1k),nval,kv3,kimg))
+    call mpi_allreduce(wfv,wfv_mpi,maxval(np_g1k)*nval*kv3*kimg,MPI_DOUBLE_PRECISION,MPI_SUM,mpi_kg_world,ierr)
+    wfv = wfv_mpi
+    call mpi_allreduce(wfv,wfv_mpi,maxval(np_g1k)*nval*kv3*kimg,MPI_DOUBLE_PRECISION,MPI_SUM,mpi_ge_world,ierr)
+    wfv = wfv_mpi
+    call mpi_allreduce(occup_val,occup_val_mpi,nval*kv3,MPI_DOUBLE_PRECISION,MPI_SUM,mpi_kg_world,ierr)
+    occup_val = occup_val_mpi
+    call mpi_allreduce(occup_val,occup_val_mpi,nval*kv3,MPI_DOUBLE_PRECISION,MPI_SUM,mpi_ge_world,ierr)
+    occup_val = occup_val_mpi
+    deallocate(wfv_mpi)
+    deallocate(occup_val_mpi)
+    if(modnrm == EXECUT) then
+       allocate(fsr_mpi(nval,nlmta,kv3))
+       allocate(fsi_mpi(nval,nlmta,kv3))
+       call mpi_allreduce(fsr_exx,fsr_mpi,nval*nlmta*kv3,MPI_DOUBLE_PRECISION,MPI_SUM,mpi_kg_world,ierr)
+       fsr_exx = fsr_mpi
+       call mpi_allreduce(fsr_exx,fsr_mpi,nval*nlmta*kv3,MPI_DOUBLE_PRECISION,MPI_SUM,mpi_ge_world,ierr)
+       call mpi_allreduce(fsi_exx,fsi_mpi,nval*nlmta*kv3,MPI_DOUBLE_PRECISION,MPI_SUM,mpi_kg_world,ierr)
+       fsi_exx = fsi_mpi
+       call mpi_allreduce(fsi_exx,fsi_mpi,nval*nlmta*kv3,MPI_DOUBLE_PRECISION,MPI_SUM,mpi_ge_world,ierr)
+       fsr_exx = fsr_mpi
+       fsi_exx = fsi_mpi
+       deallocate(fsr_mpi)
+       deallocate(fsi_mpi)
     end if
 
-
-
+    deallocate(ista)
     call tstatc0_end(id_sname)
   end subroutine m_ES_EXX_gather_valence_states
 
-
   subroutine m_ES_EXX_kernel(nfout)
     implicit none
     integer, intent(in) :: nfout
@@ -771,12 +783,14 @@ contains
     integer :: ik,ikbz,ig,kgs
     real(kind=DP) :: fac, wi, kg(3), vzero, g2
     real(kind=DP), dimension(6) :: ttr
-
+    integer :: igs,ige
     integer,save  :: id_sname = -1
     call tstatc0_begin('m_ES_EXX_kernel ',id_sname,level=1)
 
     if(.not.allocated(vc)) allocate(vc(ista_kngp:iend_kngp,nqmk))
 
+    vc = 0.d0
+
     call getttr(rltv,ttr)
 
     fac = PAI4/univol
@@ -795,7 +809,9 @@ contains
              kgs=2
              if(ista_kngp == 1) vc(1,ik) = vzero
           end if
-          do ig=max(kgs,ista_kngp),iend_kngp
+          igs = max(kgs,ista_kngp);ige=iend_kngp
+          if(ige.gt.nmax_G_hyb) ige=nmax_G_hyb
+          do ig=igs,ige
              kg(1:3) = qmk(ik,1:3) + ngabc_kngp_l(ig,1:3)
              g2          = ttr(1)*kg(1)*kg(1) &
              &           + ttr(2)*kg(2)*kg(2) &
@@ -814,7 +830,9 @@ contains
              kgs=2
              if(ista_kngp == 1) vc(1,ik) = vzero
           end if
-          do ig=max(kgs,ista_kngp),iend_kngp
+          igs = max(kgs,ista_kngp);ige=iend_kngp
+          if(ige.gt.nmax_G_hyb) ige=nmax_G_hyb
+          do ig=igs,ige
              kg(1:3) = qmk(ik,1:3) + ngabc_kngp_l(ig,1:3)
              g2          = ttr(1)*kg(1)*kg(1) &
              &           + ttr(2)*kg(2)*kg(2) &
@@ -872,7 +890,7 @@ contains
       real(kind=DP) :: chig
       real(kind=DP), intent(in) :: gam
 
-      integer :: ig, ikbz, kgs
+      integer :: ig, ikbz, kgs, igs, ige
       real(kind=DP) :: sumg, sumgk
 
       sumg = 0.d0
@@ -967,6 +985,8 @@ contains
     real(kind=DP) :: ene
     integer :: ig,iadd
     logical :: store_p
+    real(kind=DP), allocatable, dimension(:) :: zajbuf_r,zajbuf_i
+    integer :: kgw,kgv
     integer :: id_sname=-1
     call tstatc0_begin('m_ES_EXX_potential ',id_sname,level=1)
     store_p = .true.
@@ -979,7 +999,7 @@ contains
     iup = 2
     if(present(iupdate)) iup = iupdate
     if(iup.lt.potential_update)then
-       vxw(:,1:kimg) = exx_potential(:,ib,ik,:)
+       vxw(1:np_g1k(ik),1:kimg) = exx_potential(1:np_g1k(ik),ib,ik,1:kimg)
        if(present(exx))then
          exx=0.d0
          if(kimg==1)then
@@ -1016,21 +1036,20 @@ contains
     allocate(efsr_l(nlmta));efsr_l=0.d0
     allocate(efsi_l(nlmta));efsi_l=0.d0
     if(modnrm == EXECUT) call get_expkt_fs_b(ik,ib,fsr,fsi,efsr_l,efsi_l)
+    allocate(zajbuf_r(maxval(np_g1k)));zajbuf_r(1:np_g1k(ik))=zaj_l(1:np_g1k(ik),ib,ik,1)
+    allocate(zajbuf_i(maxval(np_g1k)));zajbuf_i(1:np_g1k(ik))=zaj_l(1:np_g1k(ik),ib,ik,kimg)
+    kgw = maxval(np_g1k);kgv=maxval(np_g1k)
+
     if(present(exx))then
-       if(kimg==1)then
-          call apply_Vx_to_WF( ispin, ib, ik, zaj_l(1,ib,ik,1), zaj_l(1,ib,ik,1), efsr_l, efsr_l, vxw, ene, eo )
-       else
-          call apply_Vx_to_WF( ispin, ib, ik, zaj_l(1,ib,ik,1), zaj_l(1,ib,ik,2), efsr_l, efsi_l, vxw, ene, eo )
-       endif
+       call apply_Vx_to_WF( ispin, ib, ik, kgw, kgv, zajbuf_r, zajbuf_i, efsr_l, efsi_l, vxw, ene, eo )
     else
-       if(kimg==1)then
-           call apply_Vx_to_WF( ispin, ib, ik, zaj_l(1,ib,ik,1), zaj_l(1,ib,ik,1), efsr_l, efsr_l, vxw)
-       else
-           call apply_Vx_to_WF( ispin, ib, ik, zaj_l(1,ib,ik,1), zaj_l(1,ib,ik,2), efsr_l, efsi_l, vxw)
-       endif
+       call apply_Vx_to_WF( ispin, ib, ik, kgw, kgv, zajbuf_r, zajbuf_i, efsr_l, efsi_l, vxw )
     endif
+
     deallocate(efsr_l)
     deallocate(efsi_l)
+    deallocate(zajbuf_r)
+    deallocate(zajbuf_i)
     if(present(exx)) exx = ene
     if(.not.eo.and.potential_update>0.and.store_p) &
     &  exx_potential(1:np_g1k(ik),ib,ik,1:kimg) = vxw(1:np_g1k(ik),1:kimg)
@@ -1116,6 +1135,7 @@ contains
     real(kind=DP), allocatable, dimension(:,:,:,:) :: vxdi_t
 
     integer,save  :: id_sname = -1
+
     call tstatc0_begin('m_ES_EXX_Diagonal_part ',id_sname,level=1)
 
 #ifdef FFT_3D_DIVISION
@@ -1281,12 +1301,12 @@ contains
     end if 
   end subroutine m_ES_Vexx_add_vexx
 
-  subroutine apply_Vx_to_WF(ispin,ib,ik,wfr,wfi,bdwr,bdwi,vxw,eexx,eonly,force_l,dbdwr,dbdwi)
+  subroutine apply_Vx_to_WF(ispin,ib,ik,kgw,kgv,wfr,wfi,bdwr,bdwi,vxw,eexx,eonly,force_l,dbdwr,dbdwi)
     implicit none
-    integer, intent(in)                       :: ispin, ib,ik
-    real(kind=DP), intent(in), dimension(maxval(np_g1k)) :: wfr, wfi
+    integer, intent(in)                       :: ispin, ib,ik,kgw,kgv
+    real(kind=DP), intent(in), dimension(kgw) :: wfr, wfi
     real(kind=DP), intent(in), dimension(nlmta) :: bdwr, bdwi
-    real(kind=DP), intent(out), optional, dimension(maxval(np_g1k),kimg) :: vxw
+    real(kind=DP), intent(out), optional, dimension(kgv,kimg) :: vxw
     real(kind=DP), intent(out), optional :: eexx
     logical, intent(in), optional :: eonly
 
@@ -2176,6 +2196,70 @@ contains
     end if
   end subroutine map_RHOG_on_FFT_box
 
+  subroutine map_RHOG_on_FFT_box_hard(rhor,rhoi,afft)
+    implicit none
+    real(kind=DP), intent(in), dimension(nfftp_exx_nonpara/2) :: rhor, rhoi
+    real(kind=DP), intent(out), dimension(nfftp_exx_nonpara) :: afft
+
+    integer :: i,i1,i2
+
+!    afft(:) = 0.d0
+    do i = 1, nfftp_exx_nonpara/2
+!       i1 = (igfp_l(i)-1)*kimg+1
+       i1 = (i-1)*kimg+1
+       afft(i1)   = rhor(i)
+       afft(i1+1) = rhoi(i)
+    end do
+  end subroutine map_RHOG_on_FFT_box_hard
+
+  subroutine map_RHOG_on_FFT_box_hard_inv(rhor,rhoi,afft)
+    implicit none
+    real(kind=DP), intent(in), dimension(nfftp_exx_nonpara/2) :: rhor, rhoi
+    real(kind=DP), intent(out), dimension(nfftp_exx_nonpara) :: afft
+    integer :: i,i1,i2
+
+    afft(:) = 0.d0
+
+    do i = 1, kgp_exx
+       i1 = (igfp_exx(i)-1)*kimg+1
+       afft(i1) = rhor(i)
+       afft(i1+1) = rhoi(i)
+    end do
+  end subroutine map_RHOG_on_FFT_box_hard_inv
+
+  subroutine map_FFT_box_on_RHOG_hard(rhor,rhoi,afft)
+    implicit none
+    real(kind=DP), intent(in), dimension(nfftp_exx_nonpara) :: afft
+    real(kind=DP), intent(out), dimension(nfftp_exx_nonpara/2) :: rhor, rhoi
+    real(kind=DP) :: rinplw
+    integer :: i,i1,i2
+
+    rinplw = 1.d0/product(fft_box_size_CD_exx(1:3,1))
+    rhor(:)=0.d0
+    rhoi(:)=0.d0
+    do i = 1, kgp_exx
+       i1 = (igfp_exx(i)-1)*kimg+1
+       rhor(i) = afft(i1)*rinplw
+       rhoi(i) = afft(i1+1)*rinplw
+    end do
+  end subroutine map_FFT_box_on_RHOG_hard
+
+  subroutine map_FFT_box_on_RHOG_hard_inv(rhor,rhoi,afft)
+    implicit none
+    real(kind=DP), intent(in), dimension(nfftp_exx_nonpara) :: afft
+    real(kind=DP), intent(out), dimension(nfftp_exx_nonpara/2) :: rhor, rhoi
+    real(kind=DP) :: rinplw
+    integer :: i,i1,i2
+
+    rinplw = 1.d0/product(fft_box_size_CD_exx(1:3,1))
+!    rhor(:)=0.d0
+!    rhoi(:)=0.d0
+    do i = 1, nfftp_exx_nonpara/2
+       i1 = (i-1)*kimg+1
+       rhor(i) = afft(i1)*rinplw 
+       rhoi(i) = afft(i1+1)*rinplw
+    end do
+  end subroutine map_FFT_box_on_RHOG_hard_inv
 
   subroutine m_ES_EXX_ngpt()
     implicit none
@@ -2183,72 +2267,131 @@ contains
     integer :: i,j,iopr,ii
     integer :: ia,ib,ic
     integer :: namin,namax,nbmin,nbmax,ncmin,ncmax
-    integer, allocatable, dimension(:,:)   :: ngpt_t
     integer, allocatable, dimension(:,:,:) :: g_list
+    integer, allocatable, dimension(:) :: ngpt_exx_tmp, ngpt_exx0_tmp
     integer,save  :: id_sname = -1
     call tstatc0_begin('m_ES_EXX_ngpt ',id_sname,level=1)
 
-    allocate(ngpt_exx(kgp,nopr,0:ntrs)); ngpt_exx = 0
+!!$    allocate(ngpt_exx(kgp,nopr,0:ntrs)); ngpt_exx = 0
+    allocate(ngpt_exx(kg,nopr,0:ntrs)); ngpt_exx = 0
+
+    !! Time reversal symmetry
+
+    if(ntrs>0) then
+
+       namax = n_rGpv(1); nbmax = n_rGpv(2); ncmax = n_rGpv(3)
+       namin = -namax   ; nbmin = -nbmax   ; ncmin = -ncmax
+       allocate(g_list(namin:namax,nbmin:nbmax,ncmin:ncmax)); g_list = 0
+
+       do i = ista_kngp, iend_kngp
+          ia = ngabc_kngp_l(i,1)
+          ib = ngabc_kngp_l(i,2)
+          ic = ngabc_kngp_l(i,3)
+          g_list(ia,ib,ic) = i
+       end do
+       call mpi_allreduce(MPI_IN_PLACE, g_list,(namax-namin+1)*(nbmax-nbmin+1)*(ncmax-ncmin+1), &
+       &  mpi_integer, mpi_sum,mpi_ke_world,ierr)
+
+    end if
 
     if(npes > 1) then
-       allocate(ngpt_t(kgp,nopr)); ngpt_t = 0
+       allocate(ngpt_exx0_tmp(kgp))
        do iopr=1,nopr
+          ngpt_exx0_tmp = 0
           do i = ista_kngp, iend_kngp
-             ngpt_t(i,iopr) = ngpt_l(i,iopr)
+             ngpt_exx0_tmp(i) = ngpt_l(i,iopr)
           end do
+          call mpi_allreduce(MPI_IN_PLACE,ngpt_exx0_tmp,kgp,mpi_integer,mpi_sum,mpi_ke_world,ierr)
+          ngpt_exx(1:kg,iopr,0) = ngpt_exx0_tmp(1:kg)
+
+          if(ntrs>0) then
+             allocate(ngpt_exx_tmp(kg))
+             ngpt_exx_tmp = 0
+             do i=1,kg
+                ii = ngpt_exx0_tmp(i)
+                if(ista_kngp<=ii .and. ii<=iend_kngp) then
+                   ia = -ngabc_kngp_l(ii,1)
+                   ib = -ngabc_kngp_l(ii,2)
+                   ic = -ngabc_kngp_l(ii,3)
+                   ngpt_exx_tmp(i) = g_list(ia,ib,ic)
+                end if
+             end do
+             call mpi_allreduce(MPI_IN_PLACE, ngpt_exx_tmp, kg, mpi_integer, mpi_sum,mpi_ke_world,ierr)
+             ngpt_exx(1:kg,iopr,1) = ngpt_exx_tmp(1:kg)
+          end if
        end do
-       call mpi_allreduce(ngpt_t,ngpt_exx,kgp*nopr,mpi_integer,mpi_sum,mpi_ke_world,ierr)
-       deallocate(ngpt_t)
+       deallocate(ngpt_exx0_tmp)
     else
        do iopr=1,nopr
-          do i = ista_kngp, iend_kngp
-             ngpt_exx(i,iopr,0) = ngpt_l(i,iopr)
-          end do
+          if(ntrs==0) then
+             do i = 1, kg
+                ngpt_exx(i,iopr,0) = ngpt_l(i,iopr)
+             end do
+          else
+             do i = 1, kg
+                ngpt_exx(i,iopr,0) = ngpt_l(i,iopr)
+                ii = ngpt_l(i,iopr)
+                ia = -ngabc_kngp_l(ii,1)
+                ib = -ngabc_kngp_l(ii,2)
+                ic = -ngabc_kngp_l(ii,3)
+                ngpt_exx(i,iopr,1) = g_list(ia,ib,ic)
+             end do
+          end if
        end do
     end if
 
-    !! Time reversal symmetry
-
-   if(ntrs>0) then
-
-    namin = 0; namax = 0
-    nbmin = 0; nbmax = 0
-    ncmin = 0; ncmax = 0
-    do i=1,kgp
-       ia = ngabc(i,1)
-       ib = ngabc(i,2)
-       ic = ngabc(i,3)
-       namin = min(ia,namin)
-       namax = max(ia,namax)
-       nbmin = min(ib,nbmin)
-       nbmax = max(ib,nbmax)
-       ncmin = min(ic,ncmin)
-       ncmax = max(ic,ncmax)
-    end do
-    allocate(g_list(namin:namax,nbmin:nbmax,ncmin:ncmax))
-    do i=1,kgp
-       ia = ngabc(i,1)
-       ib = ngabc(i,2)
-       ic = ngabc(i,3)
-       g_list(ia,ib,ic) = i
-    end do
-    do iopr=1,nopr
-       do i=1,kgp
-          ii = ngpt_exx(i,iopr,0)
-          ia = -ngabc(ii,1)
-          ib = -ngabc(ii,2)
-          ic = -ngabc(ii,3)
-          ngpt_exx(i,iopr,1) = g_list(ia,ib,ic)
+!!$    allocate(ngpt_exx_tmp(kgp))
+!!$    do iopr=1,nopr
+!!$       ngpt_exx_tmp = 0
+!!$       do i=1,kgp
+!!$          ii = ngpt_exx(i,iopr,0)
+!!$          if(ista_kngp<=ii .and. ii<=iend_kngp) then
+!!$             ia = -ngabc_kngp_l(ii,1)
+!!$             ib = -ngabc_kngp_l(ii,2)
+!!$             ic = -ngabc_kngp_l(ii,3)
+!!$             ngpt_exx_tmp(i) = g_list(ia,ib,ic)
+!!$          end if
+!!$       end do
+!!$       call mpi_allreduce(MPI_IN_PLACE, ngpt_exx_tmp, kgp, mpi_integer, mpi_sum,mpi_ke_world,ierr)
+!!$       ngpt_exx(:,iopr,1) = ngpt_exx_tmp(:)
+!!$    end do
+!!$    deallocate(ngpt_exx_tmp)
+!!$    
+!!$          ia = -ngabc(ii,1)
+!!$          ib = -ngabc(ii,2)
+!!$          ic = -ngabc(ii,3)
+!!$          ngpt_exx(i,iopr,1) = g_list(ia,ib,ic)
+!!$       end do
+!!$    end do
+    if(ntrs>0) deallocate(g_list)
+
+    j = 0
+    do ii = 0, ntrs
+       do iopr = 1, nopr
+          do i = 1, kg
+             if(ngpt_exx(i,iopr,ii) <= 0) j = j + 1
+          end do
        end do
     end do
-    deallocate(g_list)
-   end if
+    if(j >= 1) then
+       write(nfout,'(" !! check of ngpt_exx")')
+       do ii = 0, ntrs
+          do iopr = 1, nopr
+             do i = 1, kg
+                if(ngpt_exx(i,iopr,ii) <= 0) write(nfout,'(" ngpt_exx(",i8,",",i8,",",i8,") = ",i20)') &
+                & i,iopr,ii, ngpt_exx(i,iopr,ii)
+             end do
+          end do
+       end do
+       write(nfout,'(" !! total number of negative values for ngpt_exx = ",i8)') j
+       write(nfout,'(" !! out of check of ngpt_exx")')
+    end if
 
 ! === Make FFT box index arrays. ===============================================
-   call m_Parallel_wf_onto_fft_exx_3D(nfout,fft_box_size_WF,igf,nbase,nbase_gamma, &
+   call Parallelize_wf_onto_fft_exx_3D(nfout,fft_box_size_WF,igf,nbase,nbase_gamma, &
   &                                   k_symmetry,GAMMA,kg,kg_gamma,kv3)
-   call m_Parallel_fft_onto_wf_rhog_3D(nfout,igf,kg,nfft)
-   call m_Parallel_wf_onto_fft_rhog_3D(nfout,fft_box_size_WF,igf,kg)
+   call Parallelize_fft_onto_wf_rhog_3D(nfout,igf,kg,nfft)
+   call Parallelize_wf_onto_fft_rhog_3D(nfout,fft_box_size_WF,igf,kg)
 ! ==============================================================================
     call tstatc0_end(id_sname)
     !!!stop 'Check: G_list'
@@ -2281,10 +2424,14 @@ contains
     occup_l(1:np_e,ista_k:iend_k) = 0.d0
     do ik=1,kv3,af+1
        if(map_k(ik) /= myrank_k) cycle ! MPI
-       do ib=ista_e,iend_e,istep_e
-          !!if(ib>nval) cycle
+       do ib = 1, neg
           if(neordr(ib,ik)>nval) cycle
-          occup_l(map_z(ib),ik) = kv3*qwgt(ik)
+          if(map_e(ib) == myrank_e) then
+!!$       do ib=ista_e,iend_e,istep_e
+!!$          !!if(ib>nval) cycle
+!!$          if(neordr(ib,ik)>nval) cycle
+             occup_l(map_z(ib),ik) = kv3*qwgt(ik)
+          end if
        end do
     end do
     call tstatc0_end(id_sname)
@@ -2404,6 +2551,7 @@ contains
     implicit none
 
     integer :: i,n,ik,ig
+    real(kind=DP), allocatable, dimension(:) :: ylm_t ! d(kngp,n)
     real(kind=DP), allocatable, dimension(:,:) :: gqmk ! d(ista_kngp:iend_kngp,3)
     real(kind=DP), allocatable, dimension(:) :: gqmkr ! d(ista_kngp:iend_kngp)
 
@@ -2416,7 +2564,9 @@ contains
     call m_PP_find_maximum_l(n)    ! n-1: maximum l
     n = (n-1) + (n-1) + 1
     n = n*n
-    if(.not.allocated(ylm_exx)) allocate(ylm_exx(ista_kngp:iend_kngp,n,nqmk))
+    if(.not.allocated(ylm_exx)) then
+      allocate(ylm_exx(ista_kngp:iend_kngp,n,nqmk))
+    endif
 
     allocate(gqmk(ista_kngp:iend_kngp,3))
     allocate(gqmkr(ista_kngp:iend_kngp))
@@ -2432,9 +2582,10 @@ contains
           gqmk(ig,1:3) = kg(1:3)
           gqmkr(ig) = sqrt(g2)
        end do
-          do i=1,n
-             call m_pwBS_sphrp_exx(i,rltv,ista_kngp,iend_kngp,gqmk,gqmkr,ylm_exx(ista_kngp,i,ik))
-          end do
+       do i=1,n
+          call m_pwBS_sphrp_exx(i,rltv,ista_kngp,iend_kngp,gqmk,gqmkr,ylm_exx(ista_kngp,i,ik))
+       end do
+
     end do
     !!stop 'm_ES_EXX_ylm'
     deallocate(gqmk)
@@ -2445,16 +2596,19 @@ contains
 
   subroutine check_qitg()
     implicit none
-    integer :: iq 
+    integer :: iq, ips , ipe
+
+    ips = 1
+    ipe = nmax_G_hyb
+    ips = ista_kngp
+    ipe = iend_kngp
 
     do iq=1,nqitg
-       write(nfout,'("iq=",i5,1x,"qitg_l=",f20.5,1x,"qitg_exx=",f20.5,1x)') &
-         & iq, qitg_l(ista_kngp,iq), qitg_exx(ista_kngp,iq,1)/univol
+       write(nfout,'("iq=",i5,1x,"qitg_l=",f20.5,1x,"qitg_exx=",f20.5,1x)') iq, qitg_l(ips,iq), qitg_exx(ips,iq,1)/univol
     end do
 
     do iq=1,nqitg
-       write(nfout,'("iq=",i5,1x,"diff=",f20.5)') &
-         & iq, sum(qitg_l(ista_kngp:iend_kngp,iq)-qitg_exx(ista_kngp:iend_kngp,iq,1)/univol)
+       write(nfout,'("iq=",i5,1x,"diff=",f20.5)') iq, sum(qitg_l(ips:ipe,iq)-qitg_exx(ips:ipe,iq,1)/univol)
     end do
 
     stop 'check_qitg'
@@ -2462,19 +2616,22 @@ contains
 
   subroutine check_qitg_qmk()
     implicit none
-    integer :: iq, ik
+    integer :: iq, ik, ips, ipe
+
+    ips = 1
+    ipe = nmax_G_hyb
+    ips = ista_kngp
+    ipe = iend_kngp
 
     do ik=1,nqmk
     do iq=1,nqitg
-       write(nfout,'("iq=",i5,1x,"qitg_exx=",f20.5,1x)') &
-         & iq, qitg_exx(ista_kngp,iq,ik)/univol
+       write(nfout,'("iq=",i5,1x,"qitg_exx=",f20.5,1x)') iq, qitg_exx(ips,iq,ik)/univol
     end do
     end do
 
     do ik=1,nqmk
     do iq=1,nqitg
-       write(nfout,'("iq=",i5,1x,"sum=",f20.5)') &
-         & iq, sum(qitg_exx(ista_kngp:iend_kngp,iq,ik)/univol)
+       write(nfout,'("iq=",i5,1x,"sum=",f20.5)') iq, sum(qitg_exx(ips:ipe,iq,ik)/univol)
     end do
     end do
 
@@ -2483,23 +2640,26 @@ contains
 
   subroutine check_ylm_exx()
     implicit none
-    integer :: i, n, ik
+    integer :: i, n, ik, ips, ipe
 
     call m_PP_find_maximum_l(n)    ! n-1: maximum l
     n = (n-1) + (n-1) + 1
     n = n*n
 
+    ips = 1
+    ipe = nmax_G_hyb
+    ips = ista_kngp
+    ipe = iend_kngp
+
     do ik=1,nqmk
     do i=1,n
-       write(nfout,'("i=",i5,1x,"ylm_exx=",f20.5,1x)') &
-         & i, ylm_exx(ista_kngp,i,ik)
+       write(nfout,'("i=",i5,1x,"ylm_exx=",f20.5,1x)') i, ylm_exx(ips,i,ik)
     end do
     end do
 
     do ik=1,nqmk
     do i=1,n
-       write(nfout,'("i=",i5,1x,"sum=",f20.5)') &
-         & i, sum(qitg_exx(ista_kngp:iend_kngp,i,ik))
+       write(nfout,'("i=",i5,1x,"sum=",f20.5)')  i, sum(qitg_exx(ips:ipe,i,ik))
     end do
     end do
 
@@ -2566,7 +2726,7 @@ contains
     real(kind=DP), allocatable :: qitg_red(:,:),ylm_red(:,:)
     real(kind=DP), allocatable :: rhogr_red(:),rhogi_red(:)
     real(kind=DP) :: yr,yi
-    integer :: ibl1,ibl2,iq,inn,ip,m
+    integer :: ibl1,ibl2,iq,inn,ip,m, ips, ipe
 
     integer,save  :: id_sname = -1
 
@@ -2584,13 +2744,15 @@ contains
     allocate(rhogr_red(ibsize))
     allocate(rhogi_red(ibsize))
 ! --
-    do ibl1=ista_kngp,iend_kngp,ibsize
-       rhogr_red=0.d0;  rhogi_red=0.d0
+    ips = 1
+    ipe = nmax_G_hyb
+    ips = ista_kngp
+    ipe = iend_kngp
 
-       ibl2=ibl1+ibsize-1
-
-       if(ibl2.gt.iend_kngp) ibl2=iend_kngp
+    do ibl1=ips, ipe, ibsize
+       rhogr_red=0.d0;  rhogi_red=0.d0
 
+       ibl2=min(ipe,ibl1+ibsize-1)
        do iq=1,nqitg
           do i=1,ibl2-ibl1+1
              qitg_red(i,iq) = qitg_exx(i+ibl1-1,iq,iqmk)
@@ -2854,6 +3016,139 @@ contains
     call tstatc0_end(id_sname)
   end subroutine add_RHOG_hard_part_
 #endif
+  subroutine add_RHOG_hard_part_rs2(iqmk,rhor,rhoi,qmfnr,qmfni,fmr,fmi)
+    integer, intent(in) :: iqmk
+    real(kind=DP), intent(out) :: rhor(nfftp_exx_nonpara/2)
+    real(kind=DP), intent(out) :: rhoi(nfftp_exx_nonpara/2)
+    real(kind=DP), intent(in) :: qmfnr(nmesh_rs_aug_max,nlmta)
+    real(kind=DP), intent(in) :: qmfni(nmesh_rs_aug_max,nlmta)
+    real(kind=DP), intent(in) :: fmr(nlmta)
+    real(kind=DP), intent(in) :: fmi(nlmta)
+    real(kind=DP), dimension(:), allocatable :: cosqmkr,sinqmkr
+    real(kind=DP), allocatable, dimension(:) :: rtmp,itmp
+    real(kind=DP) :: fr,fi,co,si,qmr,qmi,rr,ii
+    integer :: ia,it,lmt1,ilmta1,ind,imesh,nma
+    integer,save  :: id_sname = -1
+    call tstatc0_begin('add_RHOG_hard_part_rs2 ',id_sname,1)
+    allocate(cosqmkr(nmesh_rs_aug_max));cosqmkr=0.d0
+    allocate(sinqmkr(nmesh_rs_aug_max));sinqmkr=0.d0
+    allocate(rtmp(nmesh_rs_aug_max));rtmp=0.d0
+    allocate(itmp(nmesh_rs_aug_max));itmp=0.d0
+    rhor(:) = 0.d0;rhoi(:) = 0.d0
+    do ia=1,natm
+       it = ityp(ia)
+       if( m_PP_include_vanderbilt_pot(it) == SKIP) cycle
+       call qmk_dot_r(iqmk,ia,cosqmkr,sinqmkr)
+       nma = nmesh_rs_aug(ia)
+       rtmp=0.d0;itmp=0.d0
+       do lmt1=1,ilmt(it)
+          ilmta1 = lmta(lmt1,ia)
+          fr =  fmr(ilmta1)
+          fi = -fmi(ilmta1) ! c.c. of fsr
+          do imesh=1,nma
+             co =  cosqmkr(imesh)
+             si = -sinqmkr(imesh) ! c.c. of exp(i(G+q-k))
+             qmr = qmfnr(imesh,ilmta1)
+             qmi = qmfni(imesh,ilmta1)
+             rr = co*qmr-si*qmi
+             ii = co*qmi+si*qmr
+             rtmp(imesh) = rtmp(imesh) + rr*fr-ii*fi
+             itmp(imesh) = itmp(imesh) + ii*fr+rr*fi
+          enddo
+       enddo
+       do imesh=1,nma
+          ind = meshxyz_rs_aug(imesh,ia)
+          rhor(ind) = rhor(ind) + rtmp(imesh)
+          rhoi(ind) = rhoi(ind) + itmp(imesh)
+       enddo
+    enddo
+    deallocate(cosqmkr) 
+    deallocate(sinqmkr) 
+    deallocate(rtmp)
+    deallocate(itmp)
+    call tstatc0_end(id_sname)
+  end subroutine add_RHOG_hard_part_rs2
+
+  subroutine add_RHOG_hard_part_rs(iqmk,rhor,rhoi,fnr,fni,fmr,fmi)
+    implicit none
+    integer, intent(in) :: iqmk
+    real(kind=DP), intent(out) :: rhor(nfftp_exx_nonpara/2)
+    real(kind=DP), intent(out) :: rhoi(nfftp_exx_nonpara/2)
+    real(kind=DP), intent(in) :: fnr(nlmta)
+    real(kind=DP), intent(in) :: fni(nlmta)
+    real(kind=DP), intent(in) :: fmr(nlmta)
+    real(kind=DP), intent(in) :: fmi(nlmta)
+    integer :: n,it,ia,imesh,lmt1,lmt2,il1,il2,tau1,tau2,ilmta1,ilmta2,lmtp
+    integer :: nma
+    real(kind=DP), dimension(:), allocatable :: cosqmkr,sinqmkr
+    real(kind=DP) :: rr,ii,si,co,qm,rr0,ii0
+    integer :: ind
+    real(kind=DP), allocatable, dimension(:) :: rtmp,itmp
+    real(kind=DP) :: fac
+    integer,save  :: id_sname = -1
+    call tstatc0_begin('add_RHOG_hard_part_rs ',id_sname,1)
+    allocate(cosqmkr(nmesh_rs_aug_max));cosqmkr=0.d0
+    allocate(sinqmkr(nmesh_rs_aug_max));sinqmkr=0.d0
+    allocate(rtmp(nmesh_rs_aug_max));rtmp=0.d0
+    allocate(itmp(nmesh_rs_aug_max));itmp=0.d0
+    rhor(:) = 0.d0;rhoi(:) = 0.d0
+    do ia=1,natm
+       it = ityp(ia)
+       if( m_PP_include_vanderbilt_pot(it) == SKIP) cycle
+       call qmk_dot_r(iqmk,ia,cosqmkr,sinqmkr)
+       nma = nmesh_rs_aug(ia)
+       rtmp=0.d0;itmp=0.d0
+       do lmtp = 1,nlmtpair(ia)
+          lmt1 = plmt1(lmtp,ia)
+          lmt2 = plmt2(lmtp,ia)
+          ilmta1 = lmta(lmt1,ia)
+          ilmta2 = lmta(lmt2,ia)
+          rr = fnr(ilmta1)*fmr(ilmta2) + fni(ilmta1)*fmi(ilmta2)
+          ii = fnr(ilmta1)*fmi(ilmta2) - fni(ilmta1)*fmr(ilmta2)
+          if(lmt1.ne.lmt2)then
+            rr = rr+fnr(ilmta2)*fmr(ilmta1) + fni(ilmta2)*fmi(ilmta1)
+            ii = ii+fnr(ilmta2)*fmi(ilmta1) - fni(ilmta2)*fmr(ilmta1)
+          endif
+          do imesh=1,nma
+             co = cosqmkr(imesh)
+             si = sinqmkr(imesh)
+             qm = qr_clm_ylm(imesh,ia,lmtp)
+             rtmp(imesh) = rtmp(imesh) + qm*(rr*co+ii*si)
+             itmp(imesh) = itmp(imesh) + qm*(ii*co-rr*si)
+          enddo
+       enddo
+       do imesh=1,nma
+          ind = meshxyz_rs_aug(imesh,ia)
+          rhor(ind) = rhor(ind) + rtmp(imesh)
+          rhoi(ind) = rhoi(ind) + itmp(imesh)
+       enddo
+    enddo
+    deallocate(cosqmkr)
+    deallocate(sinqmkr)
+    deallocate(rtmp)
+    deallocate(itmp)
+    call tstatc0_end(id_sname)
+  end subroutine add_RHOG_hard_part_rs
+
+  subroutine qmk_dot_r(iqmk,ia,zc_ar,zs_ar)
+    integer, intent(in) :: iqmk,ia 
+    real(kind=DP),dimension(nmesh_rs_aug_max),intent(out) :: zc_ar,zs_ar
+    integer :: i
+    real(kind=DP) :: inl,inm,inn
+    real(kind=DP) :: rx,ry,rz,kdr
+    integer :: id_sname = -1
+    inl = 1.d0/dble(fft_box_size_CD_exx(1,1))
+    inm = 1.d0/dble(fft_box_size_CD_exx(2,1))
+    inn = 1.d0/dble(fft_box_size_CD_exx(3,1))
+    do i=1,nmesh_rs_aug(ia)
+       rx = dble(meshx_rs_aug(i,ia))*inl
+       ry = dble(meshy_rs_aug(i,ia))*inm
+       rz = dble(meshz_rs_aug(i,ia))*inn
+       kdr = (rx*qmk(iqmk,1)+ry*qmk(iqmk,2)+rz*qmk(iqmk,3))*PAI2
+       zc_ar(i) = dcos(kdr)
+       zs_ar(i) = dsin(kdr)
+    enddo
+  end subroutine qmk_dot_r
 
 ! ================================== KT_Test ========================= 12.5Exp
   subroutine integrate_QijVnm(iqmk,potr,poti,fmr,fmi,qvr,qvi,dfmr,dfmi,dqvr,dqvi,gqvr,gqvi)
@@ -2880,7 +3175,7 @@ contains
     real(kind=DP), allocatable :: zsr(:), zsi(:)
 
     real(kind=DP), allocatable :: qitg_red(:,:),ylm_red(:,:)
-    integer :: ibl1,ibl2,iq,inn,ip,m
+    integer :: ibl1,ibl2,iq,inn,ip,m, ips, ipe
     logical :: force_mode = .false. 
     real(kind=DP), allocatable :: gvec(:,:)
     real(kind=DP) :: er(3), ei(3)
@@ -2901,9 +3196,14 @@ contains
     allocate(qitg_red(ibsize,nqitg))
     allocate(ylm_red(ibsize,n*n))
     if(force_mode) allocate(gvec(ibsize,3))
-    do ibl1=ista_kngp,iend_kngp,ibsize
-       ibl2=ibl1+ibsize-1
-       if(ibl2.gt.iend_kngp) ibl2=iend_kngp
+
+    ips = 1
+    ipe = nmax_G_hyb
+    ips = ista_kngp
+    ipe = iend_kngp
+
+    do ibl1=ips, ipe, ibsize
+       ibl2=min(ipe,ibl1+ibsize-1)
        do iq=1,nqitg
           do i=1,ibl2-ibl1+1
              qitg_red(i,iq) = qitg_exx(i+ibl1-1,iq,iqmk)
@@ -3312,6 +3612,181 @@ contains
 #endif
 ! ========================================================================= 125.Exp
 
+  subroutine integrate_QijVnm_rs2(iqmk,potr,poti,fmrq,fmiq,qvr,qvi)
+    integer, intent(in) :: iqmk
+    real(kind=DP), intent(in) :: potr(nfftp_exx_nonpara/2)
+    real(kind=DP), intent(in) :: poti(nfftp_exx_nonpara/2)
+    real(kind=DP), intent(in) :: fmrq(nmesh_rs_aug_max,nlmta)
+    real(kind=DP), intent(in) :: fmiq(nmesh_rs_aug_max,nlmta)
+    real(kind=DP), intent(out) :: qvr(nlmta)
+    real(kind=DP), intent(out) :: qvi(nlmta)
+    real(kind=DP), dimension(:), allocatable :: cosqmkr,sinqmkr
+    real(kind=DP), allocatable, dimension(:) :: rra,iia
+    integer :: ia,it,lmt1,ilmta1
+    integer :: imesh,nma,ind
+    real(kind=DP) :: co,si
+    real(kind=DP) :: qr,qi,qmr,qmi,rr,ii
+    integer :: id_sname=-1
+    call tstatc0_begin('integrate_QijVnm_rs2 ',id_sname,1)
+    allocate(cosqmkr(nmesh_rs_aug_max));cosqmkr=0.d0
+    allocate(sinqmkr(nmesh_rs_aug_max));sinqmkr=0.d0
+    allocate(rra(nmesh_rs_aug_max));rra=0.d0
+    allocate(iia(nmesh_rs_aug_max));iia=0.d0
+    qvr(:) = 0.d0
+    qvi(:) = 0.d0
+    do ia=1,natm
+       it = ityp(ia)
+       if( m_PP_include_vanderbilt_pot(it) == SKIP) cycle
+       call qmk_dot_r(iqmk,ia,cosqmkr,sinqmkr)
+       nma = nmesh_rs_aug(ia)
+       do imesh=1,nma
+          ind = meshxyz_rs_aug(imesh,ia)
+          co  =  cosqmkr(imesh)
+          si  = -sinqmkr(imesh)
+          rra(imesh) =  potr(ind)*co+poti(ind)*si
+          iia(imesh) = -poti(ind)*co+potr(ind)*si
+       enddo
+       do lmt1=1,ilmt(it)
+          ilmta1 = lmta(lmt1,ia)
+          qr=0.d0;qi=0.d0
+          do imesh=1,nma
+             qmr = fmrq(imesh,ilmta1)
+             qmi = fmiq(imesh,ilmta1)
+             rr = rra(imesh)
+             ii = iia(imesh)
+             qr = qr+qmr*rr-qmi*ii
+             qi = qi+qmr*ii+qmi*rr
+          enddo
+          qvr(ilmta1) = qr
+          qvi(ilmta1) = qi
+       enddo
+    enddo
+    deallocate(cosqmkr)
+    deallocate(sinqmkr)
+    deallocate(rra,iia)
+    call tstatc0_end(id_sname)
+  end subroutine integrate_QijVnm_rs2
+
+  subroutine integrate_QijVnm_rs(iqmk,potr,poti,fmr,fmi,qvr,qvi,dfmr,dfmi,dqvr,dqvi,gqvr,gqvi)
+    integer, intent(in) :: iqmk
+    real(kind=DP), intent(in) :: potr(nfftp_exx_nonpara/2)
+    real(kind=DP), intent(in) :: poti(nfftp_exx_nonpara/2)
+    real(kind=DP), intent(in) :: fmr(nlmta)
+    real(kind=DP), intent(in) :: fmi(nlmta)
+    real(kind=DP), intent(out) :: qvr(nlmta)
+    real(kind=DP), intent(out) :: qvi(nlmta)
+    real(kind=DP), intent(in),optional  :: dfmr(nlmta,3)
+    real(kind=DP), intent(in),optional  :: dfmi(nlmta,3)
+    real(kind=DP), intent(out),optional :: dqvr(nlmta,3)
+    real(kind=DP), intent(out),optional :: dqvi(nlmta,3)
+    real(kind=DP), intent(out),optional :: gqvr(nlmta,3)
+    real(kind=DP), intent(out),optional :: gqvi(nlmta,3)
+    integer :: n,it,ia,imesh,lmt1,lmt2,il1,il2,tau1,tau2,ilmta1,ilmta2,lmtp
+    real(kind=DP), dimension(:), allocatable :: cosqmkr,sinqmkr
+    real(kind=DP) :: co,si,rr,ii,rrr,iii,qr,qi,qqr,qqi,qm,rr0,ii0
+    real(kind=DP), allocatable, dimension(:) :: rra,iia
+    integer :: nma,ind
+    real(kind=DP) :: er(3), ei(3),dqm(3),dqqr(3),dqqi(3),der(3),dei(3),drr(3),dii(3),drr0(3),dii0(3)
+    real(kind=DP) :: dqqr0(3),dqqi0(3),der0(3),dei0(3)
+    logical :: force_mode = .false.
+    integer :: id_sname=-1
+    call tstatc0_begin('integrate_QijVnm_rs ',id_sname,1)
+    force_mode = present(dfmr).and.present(dfmi).and. &
+               & present(dqvr).and.present(dqvi).and. &
+               & present(gqvr).and.present(gqvi)
+    allocate(cosqmkr(nmesh_rs_aug_max));cosqmkr=0.d0
+    allocate(sinqmkr(nmesh_rs_aug_max));sinqmkr=0.d0
+    allocate(rra(nmesh_rs_aug_max));rra=0.d0
+    allocate(iia(nmesh_rs_aug_max));iia=0.d0
+    qvr(:) = 0.d0
+    qvi(:) = 0.d0
+    if(force_mode)then
+       dqvr(1:nlmta,1:3) = 0.d0
+       dqvi(1:nlmta,1:3) = 0.d0
+       gqvr(1:nlmta,1:3) = 0.d0
+       gqvi(1:nlmta,1:3) = 0.d0
+    endif
+    do ia=1,natm
+       it = ityp(ia)
+       if( m_PP_include_vanderbilt_pot(it) == SKIP) cycle
+       call qmk_dot_r(iqmk,ia,cosqmkr,sinqmkr)
+       nma = nmesh_rs_aug(ia)
+       do imesh=1,nma
+          ind = meshxyz_rs_aug(imesh,ia)
+          co  =  cosqmkr(imesh)
+          si  = -sinqmkr(imesh)
+          rra(imesh) =  potr(ind)*co+poti(ind)*si
+          iia(imesh) = -poti(ind)*co+potr(ind)*si
+       enddo
+       do lmtp=1,nlmtpair(ia)
+          lmt1 = plmt1(lmtp,ia)
+          lmt2 = plmt2(lmtp,ia)
+          ilmta1 = lmta(lmt1,ia)
+          ilmta2 = lmta(lmt2,ia)
+          rr0 =  fmr(ilmta1)
+          ii0 =  fmi(ilmta1)
+          qqr=0.d0;qqi=0.d0
+          qr=0.d0;qi=0.d0
+          rr =  fmr(ilmta2)
+          ii =  fmi(ilmta2)
+          if(force_mode)then
+            dqqr=0.d0;dqqi=0.d0
+            dqqr0=0.d0;dqqi0=0.d0
+            der=0.d0;dei=0.d0
+            der0=0.d0;dei0=0.d0
+            drr(1:3) =  dfmr(ilmta2,1:3)
+            dii(1:3) =  dfmi(ilmta2,1:3)
+            er=0.d0;ei=0.d0
+          endif
+          do imesh=1,nma
+             qm = qr_clm_ylm(imesh,ia,lmtp)
+             qr = qr+qm*rra(imesh)
+             qi = qi+qm*iia(imesh)
+          enddo
+          if(force_mode)then
+             do imesh=1,nma
+                dqm(1:3) = dqr_clm_ylm(imesh,ia,lmtp,1:3)
+                er(1:3) = er(1:3)+dqm(1:3)*rra(imesh)
+                ei(1:3) = ei(1:3)+dqm(1:3)*iia(imesh)
+             enddo
+          endif
+          qqr = qr*rr-qi*ii
+          qqi = qr*ii+qi*rr
+          qvr(ilmta1) = qvr(ilmta1)+qqr
+          qvi(ilmta1) = qvi(ilmta1)+qqi
+          if(force_mode)then
+             dqqr(1:3)  = qr*drr(1:3)-qi*dii(1:3)
+             dqqi(1:3)  = qr*dii(1:3)+qi*drr(1:3)
+             der(1:3)   = er(1:3)*rr-ei(1:3)*ii
+             dei(1:3)   = er(1:3)*ii+ei(1:3)*rr
+             dqvr(ilmta1,1:3) = dqvr(ilmta1,1:3)+dqqr(1:3)
+             dqvi(ilmta1,1:3) = dqvi(ilmta1,1:3)+dqqi(1:3)
+             gqvr(ilmta1,1:3) = gqvr(ilmta1,1:3)+der(1:3)
+             gqvi(ilmta1,1:3) = gqvi(ilmta1,1:3)+dei(1:3)
+          endif
+          if(lmt1.ne.lmt2)then
+             qvr(ilmta2) = qvr(ilmta2)+(qr*rr0-qi*ii0)
+             qvi(ilmta2) = qvi(ilmta2)+(qr*ii0+qi*rr0)
+          endif
+          if(force_mode.and.lmt1.ne.lmt2)then
+             drr0(1:3) =  dfmr(ilmta1,1:3)
+             dii0(1:3) =  dfmi(ilmta1,1:3)
+             dqqr0(1:3) = qr*drr0(1:3)-qi*dii0(1:3)
+             dqqi0(1:3) = qr*dii0(1:3)+qi*drr0(1:3)
+             der0(1:3)   = er(1:3)*rr0-ei(1:3)*ii0
+             dei0(1:3)   = er(1:3)*ii0+ei(1:3)*rr0
+             dqvr(ilmta2,1:3) = dqvr(ilmta2,1:3)+dqqr0(1:3)
+             dqvi(ilmta2,1:3) = dqvi(ilmta2,1:3)+dqqi0(1:3)
+             gqvr(ilmta2,1:3) = gqvr(ilmta2,1:3)+der0(1:3)
+             gqvi(ilmta2,1:3) = gqvi(ilmta2,1:3)+dei0(1:3)
+          endif
+       enddo
+    enddo
+    deallocate(cosqmkr)
+    deallocate(sinqmkr)
+    deallocate(rra,iia)
+    call tstatc0_end(id_sname)
+  end subroutine integrate_QijVnm_rs
 
   subroutine add_Vx_hard_part(ik,vxw,sumqvr,sumqvi)
   !!subroutine add_Vx_hard_part(ik,vxw1,sumqvr,sumqvi)
@@ -3527,13 +4002,15 @@ contains
     implicit none
     real(kind=DP), intent(out) :: force(natm,3)
 
-    integer :: ik,ib,ig,ispin,ia
+    integer :: ik,ib,ig,ispin,ia,iadd
     real(kind=DP), allocatable :: force_l(:,:) ! d(natm,3)
     real(kind=DP), allocatable :: force_mpi(:,:) ! d(natm,3)
     real(kind=DP), allocatable :: efsr_l(:,:) ! d(np_e,nlmta)
     real(kind=DP), allocatable :: efsi_l(:,:) ! d(np_e,nlmta)
     real(kind=DP), allocatable :: defsr_l(:,:,:) ! d(np_e,nlmta,3)
     real(kind=DP), allocatable :: defsi_l(:,:,:) ! d(np_e,nlmta,3)
+    real(kind=DP), allocatable, dimension(:) :: zajbuf_r,zajbuf_i
+    integer :: kgw,kgv
     integer,save  :: id_sname = -1
 
     if(modnrm /= EXECUT) then
@@ -3567,16 +4044,22 @@ contains
           do ib=1,np_e   ! MPI
              if(occup_l(ib,ik) < DELTA) cycle
              force_l = 0.d0
+             allocate(zajbuf_r(maxval(np_g1k)));zajbuf_r(1:np_g1k(ik))=zaj_l(1:np_g1k(ik),ib,ik,1)
+             allocate(zajbuf_i(maxval(np_g1k)));zajbuf_i(1:np_g1k(ik))=zaj_l(1:np_g1k(ik),ib,ik,kimg)
+             kgw = maxval(np_g1k);kgv=maxval(np_g1k)
+
              if(kimg==1) then
-                call apply_Vx_to_WF(ispin,ib,ik,zaj_l(1,ib,ik,1),zaj_l(1,ib,ik,1) &
+                call apply_Vx_to_WF(ispin,ib,ik,kgw,kgv,zajbuf_r,zajbuf_r &
                     & ,efsr_l(ib,1:nlmta),efsr_l(ib,1:nlmta) &
                     & ,dbdwr=dfsr_l(ib,1:nlmta,ik,1:3),dbdwi=dfsr_l(ib,1:nlmta,ik,1:3),force_l=force_l)
              else
-                call apply_Vx_to_WF(ispin,ib,ik,zaj_l(1,ib,ik,1),zaj_l(1,ib,ik,2) &
+                call apply_Vx_to_WF(ispin,ib,ik,kgw,kgv,zajbuf_r,zajbuf_i &
                     & ,efsr_l(ib,1:nlmta),efsi_l(ib,1:nlmta) &
                     & ,dbdwr=dfsr_l(ib,1:nlmta,ik,1:3),dbdwi=dfsi_l(ib,1:nlmta,ik,1:3),force_l=force_l)
              end if
              force = force + occup_l(ib,ik) * force_l
+             deallocate(zajbuf_r)
+             deallocate(zajbuf_i)
           end do
        end do
     end do
@@ -3782,7 +4265,7 @@ contains
 ! ==============================================================================
 ! === Make FFT box index arrays. ===============================================
 ! ==============================================================================
-  subroutine m_Parallel_wf_onto_fft_exx_3D(nfout,fft_box_size_WF,igf,nbase,nbase_gamma, &
+  subroutine Parallelize_wf_onto_fft_exx_3D(nfout,fft_box_size_WF,igf,nbase,nbase_gamma, &
  &                                         k_symmetry,GAMMA,kg,kg_gamma,kv3)
     integer, intent(in)  :: nfout, kg, kg_gamma, kv3, GAMMA
     integer, intent(in)  :: fft_box_size_WF(1:3,0:1)
@@ -3813,7 +4296,7 @@ contains
        call mpi_irecv(xyz(1,1,i), 6, mpi_integer, &
       &               i, itag, mpi_ke_world, req_r(i), ierr)
        if(ierr /= 0) then
-          write(nfout,*)' m_Parallel_wf_onto_fft_exx_3D :  mpi_irecv error'
+          write(nfout,*)' Parallelize_wf_onto_fft_exx_3D :  mpi_irecv error'
           call flush(nfout)
           call mpi_abort(mpi_comm_world,170,ierr)
        endif
@@ -3822,20 +4305,20 @@ contains
        call mpi_isend(xyz_fft_x, 6, mpi_integer, &
       &               i, itag, mpi_ke_world, req_s(i), ierr)
        if(ierr /= 0) then
-          write(nfout,*)' m_Parallel_wf_onto_fft_exx_3D :  mpi_isend error'
+          write(nfout,*)' Parallelize_wf_onto_fft_exx_3D :  mpi_isend error'
           call flush(nfout)
           call mpi_abort(mpi_comm_world,171,ierr)
        endif
     enddo
     call mpi_waitall(nrank_g, req_r, sta_r, ierr)
     if(ierr /= 0) then
-       write(nfout,*)' m_Parallel_wf_onto_fft_exx_3D :  mpi_waitall error'
+       write(nfout,*)' Parallelize_wf_onto_fft_exx_3D :  mpi_waitall error'
        call flush(nfout)
        call mpi_abort(mpi_comm_world,172,ierr)
     endif
     call mpi_waitall(nrank_g, req_s, sta_s, ierr)
     if(ierr /= 0) then
-       write(nfout,*)' m_Parallel_wf_onto_fft_exx_3D :  mpi_waitall error'
+       write(nfout,*)' Parallelize_wf_onto_fft_exx_3D :  mpi_waitall error'
        call flush(nfout)
        call mpi_abort(mpi_comm_world,173,ierr)
     endif
@@ -3844,7 +4327,7 @@ contains
     call MPI_ALLGATHER(xyz_fft_x, 6, mpi_integer, &
    &                   rbuf,6, mpi_integer, mpi_ke_world, ierr )
     if(ierr /= 0) then
-       write(nfout,*)' m_Parallel_wf_onto_fft_exx_3D :  mpi_allgather error'
+       write(nfout,*)' Parallelize_wf_onto_fft_exx_3D :  mpi_allgather error'
        call flush(nfout)
        call mpi_abort(mpi_comm_world, 174, ierr)
     endif
@@ -3958,6 +4441,9 @@ contains
                 enddo B_3
              enddo B_1
           else
+!!$             write(nfout,'(" ista_g1k, iend_g1k for " ,i8," = ",2i8)') ik, ista_g1k(ik), iend_g1k(ik)
+!!$             write(nfout,'(" iopr, itrs = ",2i8)') iopr, itrs
+             call flush(6)
       B_11 : do j = ista_g1k(ik), iend_g1k(ik)
                 iadd = j-ista_g1k(ik)+1
                 i1 = igf(ngpt_exx(nbase(j,ik),iopr,itrs))
@@ -4000,7 +4486,7 @@ contains
           call mpi_irecv(wf_fft_recv_exx(1,1,i,iopr,itrs), max_mp_g1k*len*klen, mpi_integer, &
          &               i, itag, mpi_ke_world, req_r(i), ierr)
           if(ierr /= 0) then
-             write(nfout,*)' m_Parallel_wf_onto_fft_exx_3D :  mpi_irecv error'
+             write(nfout,*)' Parallelize_wf_onto_fft_exx_3D :  mpi_irecv error'
              call flush(nfout)
              call mpi_abort(mpi_comm_world,174,ierr)
           endif
@@ -4009,20 +4495,20 @@ contains
           call mpi_isend(wf_fft_send_exx(1,1,i,iopr,itrs), max_mp_g1k*len*klen, mpi_integer, &
          &               i, itag, mpi_ke_world, req_s(i), ierr)
           if(ierr /= 0) then
-             write(nfout,*)' m_Parallel_wf_onto_fft_exx_3D :  mpi_isend error'
+             write(nfout,*)' Parallelize_wf_onto_fft_exx_3D :  mpi_isend error'
              call flush(nfout)
              call mpi_abort(mpi_comm_world,175,ierr)
           endif
        enddo
        call mpi_waitall(nrank_g, req_r, sta_r, ierr)
        if(ierr /= 0) then
-          write(nfout,*)' m_Parallel_wf_onto_fft_exx_3D :  mpi_waitall error'
+          write(nfout,*)' Parallelize_wf_onto_fft_exx_3D :  mpi_waitall error'
           call flush(nfout)
           call mpi_abort(mpi_comm_world,176,ierr)
        endif
        call mpi_waitall(nrank_g, req_s, sta_s, ierr)
        if(ierr /= 0) then
-          write(nfout,*)' m_Parallel_wf_onto_fft_exx_3D :  mpi_waitall error'
+          write(nfout,*)' Parallelize_wf_onto_fft_exx_3D :  mpi_waitall error'
           call flush(nfout)
           call mpi_abort(mpi_comm_world,177,ierr)
        endif
@@ -4031,7 +4517,7 @@ contains
       &                  wf_fft_recv_exx(1,1,0,iopr,itrs), max_mp_g1k*len*klen, mpi_integer, &
       &                  mpi_ke_world, ierr)
        if(ierr /= 0) then
-          write(nfout,*)' m_Parallel_wf_onto_fft_exx_3D :  mpi_alltoall error'
+          write(nfout,*)' Parallelize_wf_onto_fft_exx_3D :  mpi_alltoall error'
           call flush(nfout)
           call mpi_abort(mpi_comm_world, 178, ierr)
        endif
@@ -4093,7 +4579,7 @@ contains
     deallocate(work)
 
     deallocate(wf_fft_send_exx)
-  end subroutine m_Parallel_wf_onto_fft_exx_3D
+  end subroutine Parallelize_wf_onto_fft_exx_3D
 
   subroutine m_Parallel_wf_onto_fft_dealloc_exx_3D
     deallocate(wf_fft_rcnt_exx)
@@ -4105,7 +4591,7 @@ contains
     deallocate(wf_fft_maxsend_exx)
   end subroutine m_Parallel_wf_onto_fft_dealloc_exx_3D
 
-  subroutine m_Parallel_fft_onto_wf_rhog_3D(nfout,igf,kg,nfft)
+  subroutine Parallelize_fft_onto_wf_rhog_3D(nfout,igf,kg,nfft)
     integer, intent(in)  :: nfout, kg, nfft
     integer, intent(in)  :: igf(kg)
     integer, dimension(0:nrank_g-1)                       ::req_r,req_s
@@ -4127,7 +4613,7 @@ contains
     allocate(fft_wf_dist_rhog(fft_l_size), stat=ierr)
     allocate(fft_wf_index_rhog(fft_l_size), stat=ierr)
     if(ierr /= 0) then
-       write(nfout,*)' m_Parallel_fft_onto_wf_rhog_3D :  Not allocate '
+       write(nfout,*)' Parallelize_fft_onto_wf_rhog_3D :  Not allocate '
        call flush(nfout)
        call mpi_abort(mpi_comm_world, 204, ierr)
     endif
@@ -4140,7 +4626,7 @@ contains
 
     allocate(fftigf(nfft,2), stat=ierr)
     if(ierr /= 0) then
-       write(nfout,*)' m_Parallel_fft_onto_wf_rhog_3D :  Not allocate '
+       write(nfout,*)' Parallelize_fft_onto_wf_rhog_3D :  Not allocate '
        call flush(nfout)
        call mpi_abort(mpi_comm_world, 206, ierr)
     endif
@@ -4175,7 +4661,7 @@ contains
        call mpi_irecv(fft_wf_recv_rhog(1,lrank), isrsize, mpi_integer, &
       &               lrank, itag, mpi_ke_world, req_r(lrank), ierr)
        if(ierr /= 0) then
-          write(nfout,*)' m_Parallel_fft_onto_wf_rhog_3D :  mpi_irecv error'
+          write(nfout,*)' Parallelize_fft_onto_wf_rhog_3D :  mpi_irecv error'
           call flush(nfout)
           call mpi_abort(mpi_comm_world, 207, ierr)
        endif
@@ -4188,7 +4674,7 @@ contains
        call mpi_isend(fft_wf_send_rhog(1,lrank), isrsize, mpi_integer, &
       &               lrank, itag, mpi_ke_world, req_s(lrank), ierr)
        if(ierr /= 0) then
-          write(nfout,*)' m_Parallel_fft_onto_wf_rhog_3D :  mpi_isend error'
+          write(nfout,*)' Parallelize_fft_onto_wf_rhog_3D :  mpi_isend error'
           call flush(nfout)
           call mpi_abort(mpi_comm_world, 208, ierr)
        endif
@@ -4196,13 +4682,13 @@ contains
 
     call mpi_waitall(nrank_g, req_r, sta_r, ierr)
     if(ierr /= 0) then
-       write(nfout,*)' m_Parallel_fft_onto_wf_rhog_3D :  mpi_waitall error'
+       write(nfout,*)' Parallelize_fft_onto_wf_rhog_3D :  mpi_waitall error'
        call flush(nfout)
        call mpi_abort(mpi_comm_world, 209, ierr)
     endif
     call mpi_waitall(nrank_g, req_s, sta_s, ierr)
     if(ierr /= 0) then
-       write(nfout,*)' m_Parallel_fft_onto_wf_rhog_3D :  mpi_waitall error'
+       write(nfout,*)' Parallelize_fft_onto_wf_rhog_3D :  mpi_waitall error'
        call flush(nfout)
        call mpi_abort(mpi_comm_world, 210, ierr)
     endif
@@ -4211,7 +4697,7 @@ contains
    &                  fft_wf_recv_rhog, isrsize, mpi_integer, &
    &                  mpi_ke_world, ierr)
     if(ierr /= 0) then
-       write(nfout,*)' m_Parallel_fft_onto_wf_rhog_3D :  mpi_alltoall error'
+       write(nfout,*)' Parallelize_fft_onto_wf_rhog_3D :  mpi_alltoall error'
        call flush(nfout)
        call mpi_abort(mpi_comm_world, 211, ierr)
     endif
@@ -4244,7 +4730,7 @@ contains
     end do
     deallocate(work)
     deallocate(fft_wf_send_rhog)
-  end subroutine m_Parallel_fft_onto_wf_rhog_3D
+  end subroutine Parallelize_fft_onto_wf_rhog_3D
 
   subroutine m_Parallel_fft_onto_wf_dealloc_rhog_3D
     deallocate(fft_wf_rcnt_rhog)
@@ -4254,7 +4740,7 @@ contains
     deallocate(fft_wf_dist_rhog)
   end subroutine m_Parallel_fft_onto_wf_dealloc_rhog_3D
 
-  subroutine m_Parallel_wf_onto_fft_rhog_3D(nfout,fft_box_size_WF,igf,kg)
+  subroutine Parallelize_wf_onto_fft_rhog_3D(nfout,fft_box_size_WF,igf,kg)
     integer, intent(in)  :: nfout, kg
     integer, intent(in)  :: fft_box_size_WF(1:3,0:1)
     integer, intent(in)  :: igf(kg)
@@ -4280,7 +4766,7 @@ contains
        call mpi_irecv(xyz(1,1,i), 6, mpi_integer, &
       &               i, itag, mpi_ke_world, req_r(i), ierr)
        if(ierr /= 0) then
-          write(nfout,*)' m_Parallel_wf_onto_fft_rhog_3D :  mpi_irecv error'
+          write(nfout,*)' Parallelize_wf_onto_fft_rhog_3D :  mpi_irecv error'
           call flush(nfout)
           call mpi_abort(mpi_comm_world,170,ierr)
        endif
@@ -4289,20 +4775,20 @@ contains
        call mpi_isend(xyz_fft_x, 6, mpi_integer, &
       &               i, itag, mpi_ke_world, req_s(i), ierr)
        if(ierr /= 0) then
-          write(nfout,*)' m_Parallel_wf_onto_fft_rhog_3D :  mpi_isend error'
+          write(nfout,*)' Parallelize_wf_onto_fft_rhog_3D :  mpi_isend error'
           call flush(nfout)
           call mpi_abort(mpi_comm_world,171,ierr)
        endif
     enddo
     call mpi_waitall(nrank_g, req_r, sta_r, ierr)
     if(ierr /= 0) then
-       write(nfout,*)' m_Parallel_wf_onto_fft_rhog_3D :  mpi_waitall error'
+       write(nfout,*)' Parallelize_wf_onto_fft_rhog_3D :  mpi_waitall error'
        call flush(nfout)
        call mpi_abort(mpi_comm_world,172,ierr)
     endif
     call mpi_waitall(nrank_g, req_s, sta_s, ierr)
     if(ierr /= 0) then
-       write(nfout,*)' m_Parallel_wf_onto_fft_rhog_3D :  mpi_waitall error'
+       write(nfout,*)' Parallelize_wf_onto_fft_rhog_3D :  mpi_waitall error'
        call flush(nfout)
        call mpi_abort(mpi_comm_world,173,ierr)
     endif
@@ -4311,7 +4797,7 @@ contains
     call MPI_ALLGATHER(xyz_fft_x, 6, mpi_integer, &
    &                   rbuf, 6, mpi_integer, mpi_ke_world, ierr )
     if(ierr /= 0) then
-       write(nfout,*)' m_Parallel_wf_onto_fft_rhog_3D :  mpi_allgather error'
+       write(nfout,*)' Parallelize_wf_onto_fft_rhog_3D :  mpi_allgather error'
        call flush(nfout)
        call mpi_abort(mpi_comm_world, 174, ierr)
     endif
@@ -4375,7 +4861,7 @@ B_11 : do j = ista_kngp, min(kg,iend_kng
        call mpi_irecv(wf_fft_recv_rhog(1,i), max_mp_g1k*len, mpi_integer, &
       &               i, itag, mpi_ke_world, req_r(i), ierr)
        if(ierr /= 0) then
-          write(nfout,*)' m_Parallel_wf_onto_fft_rhog_3D :  mpi_irecv error'
+          write(nfout,*)' Parallelize_wf_onto_fft_rhog_3D :  mpi_irecv error'
           call flush(nfout)
           call mpi_abort(mpi_comm_world,174,ierr)
        endif
@@ -4384,20 +4870,20 @@ B_11 : do j = ista_kngp, min(kg,iend_kng
        call mpi_isend(wf_fft_send_rhog(1,i), max_mp_g1k*len, mpi_integer, &
       &               i, itag, mpi_ke_world, req_s(i), ierr)
        if(ierr /= 0) then
-          write(nfout,*)' m_Parallel_wf_onto_fft_rhog_3D :  mpi_isend error'
+          write(nfout,*)' Parallelize_wf_onto_fft_rhog_3D :  mpi_isend error'
           call flush(nfout)
           call mpi_abort(mpi_comm_world,175,ierr)
        endif
     enddo
     call mpi_waitall(nrank_g, req_r, sta_r, ierr)
     if(ierr /= 0) then
-       write(nfout,*)' m_Parallel_wf_onto_fft_rhog_3D :  mpi_waitall error'
+       write(nfout,*)' Parallelize_wf_onto_fft_rhog_3D :  mpi_waitall error'
        call flush(nfout)
        call mpi_abort(mpi_comm_world,176,ierr)
     endif
     call mpi_waitall(nrank_g, req_s, sta_s, ierr)
     if(ierr /= 0) then
-       write(nfout,*)' m_Parallel_wf_onto_fft_rhog_3D :  mpi_waitall error'
+       write(nfout,*)' Parallelize_wf_onto_fft_rhog_3D :  mpi_waitall error'
        call flush(nfout)
        call mpi_abort(mpi_comm_world,177,ierr)
     endif
@@ -4406,7 +4892,7 @@ B_11 : do j = ista_kngp, min(kg,iend_kng
    &                  wf_fft_recv_rhog, max_mp_g1k*len, mpi_integer, &
    &                  mpi_ke_world, ierr)
     if(ierr /= 0) then
-       write(nfout,*)' m_Parallel_wf_onto_fft_rhog_3D :  mpi_alltoall error'
+       write(nfout,*)' Parallelize_wf_onto_fft_rhog_3D :  mpi_alltoall error'
        call flush(nfout)
        call mpi_abort(mpi_comm_world, 178, ierr)
     endif
@@ -4440,7 +4926,7 @@ B_11 : do j = ista_kngp, min(kg,iend_kng
     deallocate(work)
 
     deallocate(wf_fft_send_rhog)
-  end subroutine m_Parallel_wf_onto_fft_rhog_3D
+  end subroutine Parallelize_wf_onto_fft_rhog_3D
 
   subroutine m_Parallel_wf_onto_fft_dealloc_rhog_3D
     deallocate(wf_fft_rcnt_rhog)
diff -uprN phase0_2015.01/src_phase_3d/m_ES_IO.F90 phase0_2015.01.01/src_phase_3d/m_ES_IO.F90
--- phase0_2015.01/src_phase_3d/m_ES_IO.F90	2015-09-14 15:38:24.000000000 +0900
+++ phase0_2015.01.01/src_phase_3d/m_ES_IO.F90	2016-07-12 12:51:53.000000000 +0900
@@ -1,6 +1,6 @@
 !=======================================================================
 !
-!  PROGRAM  PHASE/0 2015.01 ($Rev: 459 $)
+!  PROGRAM  PHASE/0 2015.01 ($Rev: 487 $)
 !
 !  MODULE: m_ES_IO
 !
@@ -58,7 +58,7 @@
 
 !
 module m_ES_IO
-! $Id: m_ES_IO.F90 459 2015-09-10 08:50:04Z yamasaki $
+! $Id: m_ES_IO.F90 487 2016-05-17 05:20:42Z ktagami $
   use m_Electronic_Structure, only : zaj_l,neordr,nrvf_ordr,eko_l,occup_l,efermi,efermi_spin,totch&
        &                            ,vnlph_l,vlhxc_l,eko_ek
   use m_PlaneWaveBasisSet,    only : kgp,kg1,ngabc,nbase,iba
@@ -96,19 +96,35 @@ module m_ES_IO
 
   use m_Control_Parameters, only : ndim_magmom, ik_wf_squared, &
        &                           ib1_wf_squared, ib2_wf_squared, &
-       &                           wf_squared_filetype
+       &                           wf_squared_filetype,  max_projs, proj_attribute, &
+       &                           ndim_chgpot, SpinOrbit_Mode, &
+       &                           wf_orb_proj_print_format, proj_group, num_proj_elems
+  use m_Const_parameters,     only : Neglected
   use m_Files,              only :  nfwfk_sq, m_Files_open_nfwfksq_noncl, &
        &                            nfwfk_integ_mom, &
        &                            m_Files_open_nfwfk_integ_mom, &
-       &                            m_Files_close_nfwfk_integ_mom
-  use m_PseudoPotential,   only : nlmt, ilmt, lmta, q
+       &                            m_Files_close_nfwfk_integ_mom, &
+       &                            m_Files_open_nfwfk_orb_proj, &
+       &                            m_Files_close_nfwfk_orb_proj, &
+       &                            nfwfk_orb_proj
+  use m_PseudoPotential,   only : nlmt, ilmt, lmta, q, &
+       &                          nlmta_phi, nlmtt_phi, qorb, m_PP_tell_iorb_lmt, &
+       &                          m_PP_tell_iorb_ia_l_m_tau, ilmt_phi, &
+       &                          mtp_phi, lmta_phi, ltp_phi, taup_phi
+  use m_Nonlocal_Potential,   only : norm_phig
   use m_Charge_Density,    only : chgq_l, hsr, hsi, &
        &                          m_CD_alloc_rspace_charge, &
        &                          m_CD_dealloc_rspace_charge, &
        &                          m_CD_rspace_charge_noncl
-  use m_Ionic_System,      only : ityp
-  use m_Electronic_Structure,  only : fsr_l, fsi_l
+  use m_Ionic_System,      only : ityp, iproj_group
+  use m_Electronic_Structure,  only : fsr_l, fsi_l, compr_l, compi_l
   use m_ES_Noncollinear,   only : m_ES_set_Pauli_Matrix
+  use m_SpinOrbit_Potential,  only :  MatU_ylm_RC_L0,  MatU_ylm_RC_L1,  MatU_ylm_RC_L2, &
+       &                              MatU_ylm_RC_L3
+
+! ==== EXP_CELLOPT ==== 2015/09/24
+  use m_PlaneWaveBasisSet,    only : kg1_prev
+! ===================== 2015/09/24
 
   implicit none
   include 'mpif.h'
@@ -731,13 +747,15 @@ contains
                 if(mode == EIGEN_VALUES) then
                    write(nf,'(" ik = ",i4,"  ",8f12.6)') ik, (e_mpi(nb,ik),nb=ie1,ie2)
                 else if(mode == OCCUPATIONS) then
-                   write(nf,'(" ik = ",i4,"  ",8f12.6)') ik, (o_mpi(nb,ik)/(qwgt(ik)*kv3),nb=ie1,ie2)
+                   write(nf,'(" ik = ",i4,"  ",8f12.6)') ik, &
+                        &           (o_mpi(nb,ik)/(qwgt(ik)*kv3/ndim_spinor),nb=ie1,ie2)
                 end if
              else
                 if(mode == EIGEN_VALUES) then
                    write(nf,'(12x,8f12.6)') (e_mpi(nb,ik),nb=ie1,ie2)
                 else if(mode == OCCUPATIONS) then
-                   write(nf,'(12x,8f12.6)') (o_mpi(nb,ik)/(qwgt(ik)*kv3),nb=ie1,ie2)
+                   write(nf,'(12x,8f12.6)') &
+                        &      (o_mpi(nb,ik)/(qwgt(ik)*kv3/ndim_spinor),nb=ie1,ie2)
                 end if
              end if
           end do
diff -uprN phase0_2015.01/src_phase_3d/m_ES_LHXC.F90 phase0_2015.01.01/src_phase_3d/m_ES_LHXC.F90
--- phase0_2015.01/src_phase_3d/m_ES_LHXC.F90	2015-08-05 14:58:03.000000000 +0900
+++ phase0_2015.01.01/src_phase_3d/m_ES_LHXC.F90	2016-07-12 12:51:53.000000000 +0900
@@ -1,6 +1,6 @@
 !=======================================================================
 !
-!  PROGRAM  PHASE/0 2015.01 ($Rev: 440 $)
+!  PROGRAM  PHASE/0 2015.01 ($Rev: 494 $)
 !
 !  MODULE: m_ES_LHXC
 !
@@ -47,7 +47,7 @@
 #endif
 
 module m_ES_LHXC
-! $Id: m_ES_LHXC.F90 440 2015-08-03 07:18:01Z ktagami $
+! $Id: m_ES_LHXC.F90 494 2016-06-02 00:54:16Z jkoga $
   use m_Electronic_Structure, only : vlhxc_l, vloc_esm
   use m_PlaneWaveBasisSet,    only : kg,kgp,gr_l
   use m_PseudoPotential,      only : psc_l, ival
@@ -62,7 +62,7 @@ module m_ES_LHXC
                                    , sw_screening_correction, sw_external_potential 
 #endif
   use m_Const_Parameters,     only : DP, PAI4, UP, DOWN, ON, OFF, CMPLDP
-  use m_Parallelization,      only : ista_kngp, iend_kngp, mype, myrank_g, mpi_comm_group
+  use m_Parallelization,      only : ista_kngp, iend_kngp, mype, myrank_g, mpi_ke_world
   use m_Crystal_Structure,    only : univol
   use m_Dipole,               only : vdip_l, vext_l
   use m_Screening,            only : screening
@@ -77,7 +77,13 @@ module m_ES_LHXC
   use m_FFT,                  only : fft_box_size_CD
   use m_PlaneWaveBasisSet,    only : igfp_l
 
-implicit none
+! === POSITRON SCF === 2015/11/28
+  use m_Control_Parameters,   only : sw_positron, positron_method
+  use m_Const_Parameters,     only : positron_GGGC
+  use m_Positron_Wave_Functions,        only :  pchg_l
+! ==================== 2015/11/28
+
+  implicit none
   include 'mpif.h'
 !  61. m_ESlhxc_potential
 contains
@@ -110,16 +116,19 @@ contains
     if(sw_esm==ON) then
        nfftcd = fft_box_size_CD(1,0)*fft_box_size_CD(2,0)*fft_box_size_CD(3,0)
        allocate(vhar(nfftcd));vhar=(0.d0,0.d0)
-       allocate(chgc(iend_kngp-ista_kngp+1,nspin));chgc=(0.d0,0.d0)
+       allocate(chgc(kgp,nspin));chgc=(0.d0,0.d0)
        if(kimg==1)then
           do ig=ista_kngp,iend_kngp
-             chgc(ig-ista_kngp+1,1:nspin) = dcmplx(chg(ig,1,1:nspin),0.d0)
+!             chgc(ig-ista_kngp+1,1:nspin) = dcmplx(chg(ig,1,1:nspin),0.d0)
+             chgc(ig,1:nspin) = dcmplx(chg(ig,1,1:nspin),0.d0)
           enddo
        else
           do ig=ista_kngp,iend_kngp
-             chgc(ig-ista_kngp+1,1:nspin) = dcmplx(chg(ig,1,1:nspin),chg(ig,2,1:nspin))
+!             chgc(ig-ista_kngp+1,1:nspin) = dcmplx(chg(ig,1,1:nspin),chg(ig,2,1:nspin))
+             chgc(ig,1:nspin) = dcmplx(chg(ig,1,1:nspin),chg(ig,2,1:nspin))
           enddo
        endif
+       call mpi_allreduce(mpi_in_place,chgc,kgp*nspin,mpi_double_complex,mpi_sum,mpi_ke_world,ierr)
        call esm_hartree(chgc,ehar,vhar)
        vhar(:) = 0.5d0*vhar(:) !Ry -> Ha
        deallocate(chgc)
diff -uprN phase0_2015.01/src_phase_3d/m_ES_Mag_Constraint.f90 phase0_2015.01.01/src_phase_3d/m_ES_Mag_Constraint.f90
--- phase0_2015.01/src_phase_3d/m_ES_Mag_Constraint.f90	2015-08-05 14:58:03.000000000 +0900
+++ phase0_2015.01.01/src_phase_3d/m_ES_Mag_Constraint.f90	2016-07-12 12:51:53.000000000 +0900
@@ -1,5 +1,5 @@
 module m_ES_Mag_Constraint
-! $Id: m_ES_Mag_Constraint.f90 409 2014-10-27 09:24:52Z jkoga $
+! $Id: m_ES_Mag_Constraint.f90 469 2015-09-30 03:06:13Z ktagami $
 
   use m_Parallelization,      only : ista_kngp, iend_kngp, ierr, npes, mype, &
        &                             mpi_comm_group
@@ -242,8 +242,152 @@ contains
 
     end subroutine case_constraint_moment_local
 
-    subroutine case_constraint_direc_local            ! only for noncol
-      stop "Not supported"
+    subroutine case_constraint_direc_local        
+      real(kind=DP) :: MagMom(3), cfactor(3), MagDirec(3)
+      real(kind=DP) :: rad1, fac1r, fac1i, fac2
+      real(kind=DP) :: VecG(3), normG, normG3, gr, d1
+      real(kind=DP) :: c1, c2, c3, cnorm, cnorm2,  cnorm4
+
+      real(kind=DP), parameter :: cnorm_lower_limit = 1.0D-10
+!      real(kind=DP), parameter :: cnorm_lower_limit = 1.0D-4
+
+      real(kind=DP), allocatable :: zfcos(:), zfsin(:)
+      real(kind=DP), allocatable :: RhoMag_on_atom_mpi(:,:)
+
+      integer :: i, j, ia, it, is, ixyz, ixyz_max, ist
+
+      allocate(zfcos(ista_kngp:iend_kngp)); zfcos = 0.d0
+      allocate(zfsin(ista_kngp:iend_kngp)); zfsin = 0.d0
+
+      if ( noncol ) then
+         ixyz_max = 3
+      else
+         ixyz_max = 1
+      endif
+
+      if ( allocated( MagField_constrain_local ) ) then
+         deallocate( MagField_constrain_local )
+      endif
+
+      allocate( MagField_constrain_local(ista_kngp:iend_kngp, kimg, ixyz_max ))
+      MagField_constrain_local = 0.0d0
+!
+      Do ia=1, natm
+         it = ityp(ia)
+         rad1 = rad_cov(ia)
+
+         MagMom = 0.0d0
+         Do ixyz=1, ixyz_max
+            if ( noncol ) then
+               MagMom( ixyz ) = RhoMag_on_atom( ia, ixyz +1 )
+            else
+               MagMom( ixyz ) = RhoMag_on_atom( ia, 1 ) -RhoMag_on_atom( ia, 2 )
+            endif
+         End do
+
+         cnorm2 = 0.0d0
+         Do ixyz=1, ixyz_max
+            cnorm2 = cnorm2 + MagMom(ixyz)**2
+         End do
+         cnorm = sqrt( cnorm2 )
+
+         if ( cnorm < cnorm_lower_limit ) cycle
+
+         MagDirec = MagMom / cnorm
+!
+
+         cnorm4 = 0.0d0
+         Do ixyz=1, ixyz_max
+            cnorm4 = cnorm4 +mag_moment0_atomtyp(it,ixyz)**2
+         End do
+         cnorm4 = sqrt( cnorm4 )
+!
+         Do ixyz=1, ixyz_max
+            if ( noncol ) then
+               c1 = cnorm2 - MagMom(ixyz)**2;    c2 = cnorm**3
+               if ( cnorm4 > 0.0 ) then
+                  c3 = mag_constraint_lambda /univol *c1 /c2 &
+                       &         *( MagDirec(ixyz) -mag_direction0_atomtyp(it,ixyz) )
+               else
+                  c3 = 0.0d0
+               endif
+               cfactor( ixyz ) = c3
+            else
+               if ( cnorm4 > 0.0 ) then
+                  if ( mag_moment0_atomtyp(it,ixyz) > 0.0 ) then
+                     c3 = 1.0d0
+                  else
+                     c3 = -1.0d0
+                  endif
+                  cfactor( ixyz ) = mag_constraint_lambda /univol &
+                       &               *( MagDirec(ixyz) -c3 )
+               else
+                  cfactor( ixyz ) = 0.0d0
+               endif
+            endif
+
+!            write(2000+mype,*) "ia cfac = ", ia, cfactor(ixyz), MagDirec(1), c3
+!            write(2100+mype,*) "ia  = ", ia, MagMom(ixyz), cnorm, cnorm2
+
+            call calc_phase2( natm, pos, ia, kgp, ngabc, ista_kngp, iend_kngp, &
+                 &            zfcos, zfsin )
+
+            if ( ista_kngp == 1 ) then
+               ist = 2
+            else
+               ist = ista_kngp
+            endif
+
+            Do i=ist, iend_kngp
+               VecG(1) = rltv(1,1)*ngabc(i,1) +rltv(1,2)*ngabc(i,2) +rltv(1,3)*ngabc(i,3)
+               VecG(2) = rltv(2,1)*ngabc(i,1) +rltv(2,2)*ngabc(i,2) +rltv(2,3)*ngabc(i,3)
+               VecG(3) = rltv(3,1)*ngabc(i,1) +rltv(3,2)*ngabc(i,2) +rltv(3,3)*ngabc(i,3)
+            
+               normG = sqrt( VecG(1)**2 +VecG(2)**2 +VecG(3)**2 )
+               normG3 = normG**3
+               
+               fac1r = zfcos(i); fac1i = zfsin(i)
+!
+               gr = normG *rad1
+               fac2 = -gr *cos(gr) + sin(gr)
+               fac2 = fac2 *PAI4 /normG3
+!
+               MagField_constrain_local( i, 1, ixyz ) &
+                    & = MagField_constrain_local( i, 1, ixyz ) &
+                    &   +fac1r *fac2 *cfactor( ixyz )
+               if ( kimg == 2 ) then
+                  MagField_constrain_local( i, 2, ixyz ) &
+                       & = MagField_constrain_local( i, 2, ixyz ) &
+                       &  -fac1i *fac2 *cfactor( ixyz )
+               endif
+            End Do
+
+            if ( mype == 0 ) then
+               fac2 = PAI4 /3.0d0 *rad1**3
+
+               MagField_constrain_local( 1, 1, ixyz ) &
+                    & = MagField_constrain_local( 1, 1, ixyz ) &
+                    &   +fac2 *cfactor( ixyz )
+            end if
+         End Do
+      End Do
+!
+      if ( noncol ) then
+         Do i=ista_kngp, iend_kngp
+            Do is=2, ndim_magmom
+               vlhxc_l(i,1,is) = vlhxc_l(i,1,is) +MagField_constrain_local(i,1,is-1)
+               vlhxc_l(i,2,is) = vlhxc_l(i,2,is) +MagField_constrain_local(i,2,is-1)
+            End do
+         End Do
+      else
+         Do i=ista_kngp, iend_kngp
+            Do j=1, kimg
+               vlhxc_l(i,j,1) = vlhxc_l(i,j,1) +MagField_constrain_local(i,j,1)
+               vlhxc_l(i,j,2) = vlhxc_l(i,j,2) -MagField_constrain_local(i,j,1)
+            End Do
+         End Do
+      endif
+
     end subroutine case_constraint_direc_local
 
   end subroutine m_ES_add_MagConstraintPot_chgql
@@ -518,11 +662,100 @@ contains
     end subroutine case_constraint_direc_global
 
     subroutine case_constraint_direc_local
-      stop "kt: Not supported"
+      real(kind=DP) :: MagMom(3), MagDirec(3), c1, c2, c3
+      integer :: i, j, ia, it, is, ixyz
+
+      real(kind=DP) :: cnorm, cnorm2, cnorm4
+      real(kind=DP) :: csum1, csum2
+
+      real(kind=DP), parameter :: cnorm_lower_limit = 1.0D-10
+
+      csum1 = 0.0d0; csum2 = 0.0d0
+
+      if ( noncol ) then
+         Do i=ista_kngp, iend_kngp
+            Do j=1, kimg
+               Do ixyz=1, 3
+                  csum1 = csum1 + MagField_constrain_local(i,j,ixyz)*chgq_l(i,j,ixyz+1)
+               End Do
+            End Do
+         End Do
+
+         Do ia=1, natm
+            it = ityp(ia)
+            Do ixyz=1, 3
+               MagMom(ixyz) = RhoMag_on_Atom( ia,ixyz+1 )
+            End do
+
+            cnorm2 = 0.0d0
+            Do ixyz=1, 3
+               cnorm2 = cnorm2 + MagMom(ixyz)**2
+            End do
+            cnorm = sqrt( cnorm2 )
+
+            cnorm4 = 0.0d0
+            Do ixyz=1, 3
+               cnorm4 = cnorm4 +mag_moment0_atomtyp(it,ixyz)**2
+            End do
+            cnorm4 = sqrt( cnorm4 )
+
+            if ( cnorm4 > 0.0 .and. cnorm > cnorm_lower_limit ) then
+               MagDirec = MagMom / cnorm
+               Do ixyz=1, 3
+                  csum2 = csum2 + ( MagDirec(ixyz) -mag_direction0_atomtyp(it,ixyz) )**2
+               End Do
+            endif
+         End do
+
+      else
+         Do i=ista_kngp, iend_kngp
+            Do j=1, kimg
+               csum1 = csum1 + MagField_constrain_local(i,j,1) &
+                    &         *( chgq_l(i,j,1) -chgq_l(i,j,2) )
+            End Do
+         End Do
+
+         Do ia=1, natm
+            it = ityp(ia)
+            MagMom(1) = RhoMag_on_Atom( ia,1 ) -RhoMag_on_Atom( ia,2 )
+
+            cnorm2 = 0.0d0
+            Do ixyz=1, 1
+               cnorm2 = cnorm2 + MagMom(ixyz)**2
+            End do
+            cnorm = sqrt( cnorm2 )
+
+            cnorm4 = 0.0d0
+            Do ixyz=1, 1
+               cnorm4 = cnorm4 +mag_moment0_atomtyp(it,ixyz)**2
+            End do
+            cnorm4 = sqrt( cnorm4 )
+
+            if ( cnorm4 > 0.0 .and. cnorm > cnorm_lower_limit ) then
+               MagDirec = MagMom / cnorm
+               if ( mag_moment0_atomtyp(it,1) > 0.0 ) then
+                  c3 = 1.0d0
+               else
+                  c3 = -1.0d0
+               endif
+               csum2 = csum2 +( MagDirec(1) -c3 )**2
+            endif
+         End do
+      endif
+
+      if ( npes > 1 ) then
+         call mpi_allreduce( csum1, c1, 1, mpi_double_precision, &
+              &              mpi_sum, mpi_comm_group, ierr )
+         csum1 = c1
+      endif
+
+      ene_double_counting = csum1 *univol
+      ene_mag_constraint = csum2 *mag_constraint_lambda/ 2.0d0
+
     end subroutine case_constraint_direc_local
 
     subroutine case_constraint_moment_local
-      real(kind=DP) :: MagMom(3), MagDirec(3), c1, c2
+      real(kind=DP) :: MagMom(3), c1, c2
       integer :: i, j, ia, it, is, ixyz
 
       real(kind=DP) :: csum1, csum2
@@ -542,8 +775,8 @@ contains
             it = ityp(ia)
             Do ixyz=1, 3
                MagMom(ixyz) = RhoMag_on_Atom( ia,ixyz+1 )
+               csum2 = csum2 + ( MagMom(ixyz) -mag_moment0_atomtyp(it,ixyz) )**2
             End do
-            csum2 = csum2 + ( MagMom(ixyz) -mag_moment0_atomtyp(it,ixyz) )**2
          End do
 
       else
@@ -557,7 +790,7 @@ contains
          Do ia=1, natm
             it = ityp(ia)
             MagMom(1) = RhoMag_on_Atom( ia,1 ) -RhoMag_on_Atom( ia,2 )
-            csum2 = ( MagMom(1) -mag_moment0_atomtyp(it,1) )**2
+            csum2 = csum2 +( MagMom(1) -mag_moment0_atomtyp(it,1) )**2
          End do
       endif
 
diff -uprN phase0_2015.01/src_phase_3d/m_ES_NonCollinear.f90 phase0_2015.01.01/src_phase_3d/m_ES_NonCollinear.f90
--- phase0_2015.01/src_phase_3d/m_ES_NonCollinear.f90	2015-08-05 14:58:03.000000000 +0900
+++ phase0_2015.01.01/src_phase_3d/m_ES_NonCollinear.f90	2016-07-12 12:51:53.000000000 +0900
@@ -1,5 +1,5 @@
 module m_ES_NonCollinear
-! $Id: m_ES_NonCollinear.f90 409 2014-10-27 09:24:52Z jkoga $
+! $Id: m_ES_NonCollinear.f90 487 2016-05-17 05:20:42Z ktagami $
   use m_Const_Parameters,   only : DP, CMPLDP, zi, yes, PAI, &
        &                            BuiltIn, ByProjector, ByPawPot, ZeffApprox, &
        &                           BUCS, CARTS, CRDTYP, DELTA10, ReadFromPP
diff -uprN phase0_2015.01/src_phase_3d/m_ES_WF_by_RMM.F90 phase0_2015.01.01/src_phase_3d/m_ES_WF_by_RMM.F90
--- phase0_2015.01/src_phase_3d/m_ES_WF_by_RMM.F90	2015-08-05 14:58:03.000000000 +0900
+++ phase0_2015.01.01/src_phase_3d/m_ES_WF_by_RMM.F90	2016-07-12 12:51:53.000000000 +0900
@@ -1986,10 +1986,12 @@ STOP_TIMER(1620)
             end if
          end do
 
-         do iap = 1, n_ialist0
-            ia = n_ialist0*(lmt1-1) + iap
-            bWr_lmt(ia,:) = bWr_tmp(iap,:)
-            bWi_lmt(ia,:) = bWi_tmp(iap,:)
+         do ib = 1, np_e
+            do iap = 1, n_ialist0
+               ia = n_ialist0*(lmt1-1) + iap
+               bWr_lmt(ia,ib) = bWr_tmp(iap,ib)
+               bWi_lmt(ia,ib) = bWi_tmp(iap,ib)
+            end do
          end do
 
          do iap = 1, n_ialist0
diff -uprN phase0_2015.01/src_phase_3d/m_ES_WF_by_submat.F90 phase0_2015.01.01/src_phase_3d/m_ES_WF_by_submat.F90
--- phase0_2015.01/src_phase_3d/m_ES_WF_by_submat.F90	2015-08-05 14:58:03.000000000 +0900
+++ phase0_2015.01.01/src_phase_3d/m_ES_WF_by_submat.F90	2016-07-12 12:51:53.000000000 +0900
@@ -841,6 +841,7 @@ contains
 
 ! (make matrix elements )
 #ifdef _USE_SCALAPACK_
+    if(iprisubmat>=2) write(nfout,'(" sw_scalapack = ",i3," <<evolve_WFs_in_subspace>>")') sw_scalapack
     if(sw_scalapack == ON) then
        if(submat_uncalled) then
           if(iprisubmat >= 2) write(nfout,'("first nprow,npcol=",2i5)') nprow,npcol
diff -uprN phase0_2015.01/src_phase_3d/m_ES_dos.F90 phase0_2015.01.01/src_phase_3d/m_ES_dos.F90
--- phase0_2015.01/src_phase_3d/m_ES_dos.F90	2015-08-05 14:58:03.000000000 +0900
+++ phase0_2015.01.01/src_phase_3d/m_ES_dos.F90	2016-07-12 12:51:53.000000000 +0900
@@ -1,6 +1,6 @@
 !=======================================================================
 !
-!  PROGRAM  PHASE/0 2015.01 ($Rev: 440 $)
+!  PROGRAM  PHASE/0 2015.01 ($Rev: 492 $)
 !
 !  MODULE: m_ES_dos
 !
@@ -34,14 +34,14 @@
 !
 module m_ES_dos
 !     (m_ESdos)
-! $Id: m_ES_dos.F90 440 2015-08-03 07:18:01Z ktagami $
+! $Id: m_ES_dos.F90 492 2016-05-31 03:06:04Z jkoga $
 !
 ! This module was originally coded by T. Yamasaki (FUJITSU Laboratories) in 2001.
 ! And this is transferred as match to PHASE by T. Yamasaki, 18th May. 2003.
 !
   use m_Kpoints, only :              kv3, kv3_ek, qwgt,vkxyz_ek &
        &                           , np0,np2,ip20,iwt,ip2cub,nxyz_tetra,trmat &
-       &                           , m_Kp_sample_mesh
+       &                           , m_Kp_sample_mesh, qwgt_ek
   use m_Files, only :                nfout
 !!$  use m_Files, only :                nfdos, nfout
   use m_Timing, only :               tstatc0_begin, tstatc0_end
@@ -54,7 +54,8 @@ module m_ES_dos
        &                            ,ipriinputfile, printable
   use m_Const_Parameters, only :     DP,Hartree,BUCS,EK,SCF, ALDOS, LAYERDOS, ON, OFF, TOTAL, PAI2
   use m_Parallelization, only :      mpi_comm_group,map_ek,mype,map_e,map_k,myrank_e,myrank_k &
-       &                            ,ierr,np_e,map_z,ista_e,npes,mpi_kg_world,mpi_ge_world
+       &                            ,ierr,np_e,map_z,ista_e,npes,mpi_kg_world &
+       &                            , mpi_ge_world, nrank_e
   use m_PseudoPotential, only :      nlmta_phi,nlmtt_phi &
        &                            ,m_PP_tell_iorb_ia_l_m_tau,qorb &
        &                            ,m_PP_tell_iorb_lmt
@@ -570,6 +571,7 @@ contains
                &              mpi_double_precision, mpi_sum, mpi_comm_group, ierr )
           call mpi_allreduce( norm_phig_mpi, norm_phig_mpi2, nlmtt_phi*kv/ndim_spinor, &
                &              mpi_double_precision, mpi_sum, mpi_comm_group, ierr )
+          norm_phig_mpi2 = norm_phig_mpi2 / dble(nrank_e)
        end if
 
        eko = eko_mpi
@@ -751,14 +753,15 @@ contains
   subroutine dealloc_dos()
     deallocate(dos)
     deallocate(sumdos)
-!!$    if(sw_pdos == ON) then
-!!$       if(allocated(pdos)) deallocate(pdos)
-!!$       if(allocated(sumpdos)) deallocate(sumpdos)
-!!$    end if
+    if(sw_pdos == ON) then
+       if(allocated(pdos)) deallocate(pdos)
+       if(allocated(sumpdos)) deallocate(sumpdos)
+    end if
   end subroutine dealloc_dos
 
-  subroutine make_dos_with_GaussianDistrib(kv,iwsc)
-    integer, intent(in) :: kv,iwsc
+  subroutine make_dos_with_GaussianDistrib( kv, iwsc, kpt_weight )
+    integer, intent(in) :: kv, iwsc
+    real(kind=DP), intent(in) :: kpt_weight( kv )
 
     integer ::             i, ik, is, ie, id, ispin, iorb, iopr, lmt, iksnl
     real(kind=DP) ::       Es, e, El, Eu, tl, tu, w, DeltaE
@@ -802,41 +805,42 @@ contains
                 tu = (Eu - eko(i,ik))*sqrtdVI
                 !  d = (derf(tl) - derf(tu))*0.5d0/(DeltaE*kv)
                 dos(id+1,ispin) = dos(id+1,ispin) &
-                     & + w * 2*(derf(tu) - derf(tl))*0.5d0/(DeltaE*kv)
+                     & + w *2 *(derf(tu) - derf(tl)) *0.5d0 /DeltaE *kpt_weight(ik)
              end do
              if(iwsc == TOTAL .and. sw_pdos == ON) then
                 do iorb = 1,nlmta_phi
-                call m_PP_tell_iorb_lmt(iorb,lmt)
-                porb = 0.d0
+                   call m_PP_tell_iorb_lmt(iorb,lmt)
+                   porb = 0.d0
 !!$ASASASASAS
 !!$                do iopr=1,nopr
 !!$                   porb = porb + (compr(i,iorb,iopr,ik)**2 &
 !!$                        &       + compi(i,iorb,iopr,ik)**2) &
 !!$                        &     *(1.d0+qorb(iorb)/norm_phig_mpi(lmt,iksnl))
 !!$                end do
-                if ( k_symmetry(ik) == GAMMA ) then
-                   do iopr=1,nopr
-                      porb = porb + compr(i,iorb,iopr,ik)**2  /2.0 &
-                   &     *(1.d0+qorb(iorb)/(norm_phig_mpi(lmt,iksnl)*2.) )
-                   end do
-                else
-                   do iopr=1,nopr
-                      porb = porb + (compr(i,iorb,iopr,ik)**2 &
-                           &       + compi(i,iorb,iopr,ik)**2) &
-                           &     *(1.d0+qorb(iorb)/norm_phig_mpi(lmt,iksnl))
-                   end do
-                endif
+                   if ( k_symmetry(ik) == GAMMA ) then
+                      do iopr=1,nopr
+                         porb = porb + compr(i,iorb,iopr,ik)**2  /2.0 &
+                              &     *(1.d0+qorb(iorb)/(norm_phig_mpi(lmt,iksnl)*2.) )
+                      end do
+                   else
+                      do iopr=1,nopr
+                         porb = porb + (compr(i,iorb,iopr,ik)**2 &
+                              &       + compi(i,iorb,iopr,ik)**2) &
+                              &     *(1.d0+qorb(iorb)/norm_phig_mpi(lmt,iksnl))
+                      end do
+                   endif
 !!$ASASASASAS
-                porb = porb/dble(nopr)
-                do id = is, ie
-                   El = Es + id*DeltaE
-                   Eu = El + DeltaE
-                   tl = (El - eko(i,ik))*sqrtdVI
-                   tu = (Eu - eko(i,ik))*sqrtdVI
-                   !  d = (derf(tl) - derf(tu))*0.5d0/(DeltaE*kv)
-                   pdos(id+1,iorb,ispin) = pdos(id+1,iorb,ispin) &
-                        & + porb * w * 2*(derf(tu) - derf(tl))*0.5d0/(DeltaE*kv)
-                end do
+                   porb = porb/dble(nopr)
+                   do id = is, ie
+                      El = Es + id*DeltaE
+                      Eu = El + DeltaE
+                      tl = (El - eko(i,ik))*sqrtdVI
+                      tu = (Eu - eko(i,ik))*sqrtdVI
+                      !  d = (derf(tl) - derf(tu))*0.5d0/(DeltaE*kv)
+                      pdos(id+1,iorb,ispin) = pdos(id+1,iorb,ispin) &
+                           & + porb *w *2 *(derf(tu) - derf(tl)) *0.5d0 /DeltaE &
+                           &        *kpt_weight(ik)
+                   end do
                 end do
              end if
           end do
@@ -857,8 +861,9 @@ contains
   end subroutine make_dos_with_GaussianDistrib
 
 ! ==================================== added by K. Tagami ============== 11.0
-  subroutine mkdos_with_GaussDistrib_noncl(kv,iwsc)
-    integer, intent(in) :: kv,iwsc
+  subroutine mkdos_with_GaussDistrib_noncl( kv, iwsc, kpt_weight )
+    integer, intent(in) :: kv, iwsc
+    real(kind=DP), intent(in) :: kpt_weight( kv )
 
     integer :: i, ik, is, ie, id, ispin, iorb, iopr, lmt, iksnl
     integer :: is1, is2, istmp, ismax
@@ -916,7 +921,7 @@ contains
                 tu = (Eu - eko(i,iksnl))*sqrtdVI
                 !  d = (derf(tl) - derf(tu))*0.5d0/(DeltaE*kv)
                 dos(id+1,istmp) = dos(id+1,istmp) &
-                     & + w * 2*(derf(tu) - derf(tl))*0.5d0/(DeltaE*kv)
+                     & + w *(derf(tu) - derf(tl)) *0.5d0 /DeltaE *kpt_weight(ik)
              end do
           End do
 
@@ -962,7 +967,8 @@ contains
                    tu = (Eu - eko(i,iksnl))*sqrtdVI
                    !  d = (derf(tl) - derf(tu))*0.5d0/(DeltaE*kv)
                    pdos(id+1,iorb,:) = pdos(id+1,iorb,:) &
-                        & + porb(iorb,:) * w * 2*(derf(tu) - derf(tl))*0.5d0/(DeltaE*kv)
+                        & + porb(iorb,:) *w *(derf(tu) - derf(tl)) &
+                        &                *0.5d0 /DeltaE *kpt_weight(ik)
                 end do
              End do
 
@@ -989,8 +995,9 @@ contains
 ! =================================================================== 11.0
 
 ! ====================== KT_add ======================= 13.0E
-  subroutine make_dos_with_FDiracDistrib(kv,iwsc)
-    integer, intent(in) :: kv,iwsc
+  subroutine make_dos_with_FDiracDistrib( kv, iwsc, kpt_weight )
+    integer, intent(in) :: kv, iwsc
+    real(kind=DP), intent(in) :: kpt_weight( kv )
 
     integer ::             i, ik, is, ie, id, ispin, iorb, iopr, lmt, iksnl
     real(kind=DP) ::       Es, e, Ene1, c1, c2, w, DeltaE
@@ -1031,7 +1038,8 @@ contains
              do id = is, ie
                 ene1 = Es +id*DeltaE
                 call width_fermi_dirac( ene1, eko(i,ik), smearing_width_fdirac, c1, c2 )
-                dos(id+1,ispin) = dos(id+1,ispin) + w *c1 *2.0d0 /dble(kv)
+!!!                dos(id+1,ispin) = dos(id+1,ispin) + w *c1 *2.0d0 /dble(kv)
+                dos(id+1,ispin) = dos(id+1,ispin) + w *c1 *2.0d0 *kpt_weight(ik)
              end do
              if(iwsc == TOTAL .and. sw_pdos == ON) then
                 do iorb = 1,nlmta_phi
@@ -1062,7 +1070,8 @@ contains
                       call width_fermi_dirac( ene1, eko(i,ik), &
                            &                  smearing_width_fdirac, c1, c2  )
                       pdos(id+1,iorb,ispin) = pdos(id+1,iorb,ispin) &
-                           & + porb *w *c1 *2.0d0 /dble(kv)
+!!!                           & + porb *w *c1 *2.0d0 /dble(kv)
+                           & + porb *w *c1 *2.0d0 *kpt_weight(ik)
                    end do
                 end do
              end if
@@ -1404,7 +1413,7 @@ contains
     if ( iproj_group(ia)== 0 ) return
 
 ! ---
-    write(nf,'("PDOS: ia=",i0," l=",i3," m=",i3," t=",i3)') ia,il-1,im,tau
+    write(nf,'("PDOS: ia= ",i0," l=",i3," m=",i3," t=",i3)') ia,il-1,im,tau
 
     write(nf,'(2x,A,5x,A,4x,A)') "No.      E(eV)",&
          &     "  dos_chg(eV)    dos_mx(eV)    dos_my(eV)    dos_mz(eV)", &
@@ -1437,7 +1446,7 @@ contains
     call alloc_eko_and_substitution(kv3) ! eko_l -> eko
     call find_Erange(eko,neg,kv3)
     call alloc_dos(1,icomponent)
-    call make_dos_with_GaussianDistrib(kv3,icomponent)
+    call make_dos_with_GaussianDistrib( kv3, icomponent, qwgt )
     if(icomponent == TOTAL) call get_VBM(totch,DeltaDVBM)
     if(mype == 0) call write_dos(nfdos)
 !!$    call write_dos(nfout)
@@ -1456,7 +1465,7 @@ contains
     call find_Erange( eko, neg, kv3/ndim_spinor )
     call alloc_dos(1,icomponent)
     
-    call mkdos_with_GaussDistrib_noncl(kv3,icomponent)
+    call mkdos_with_GaussDistrib_noncl( kv3, icomponent, qwgt )
     if(icomponent == TOTAL) call get_VBM(totch,DeltaDVBM)
 
     if(mype == 0) then
@@ -1482,7 +1491,7 @@ contains
     call alloc_eko_and_substitution_ek(kv3_ek) ! eko_ek -> eko
     call find_Erange(eko,neg,kv3_ek)
     call alloc_dos(1,icomponent)
-    call make_dos_with_GaussianDistrib(kv3_ek,icomponent)
+    call make_dos_with_GaussianDistrib( kv3_ek, icomponent, qwgt_ek )
     if(icomponent == TOTAL) call get_VBM(totch,DeltaDVBM)
     if(mype == 0) call write_dos(nfdos)
     call dealloc_eko()
@@ -1497,7 +1506,7 @@ contains
     call find_Erange(eko,neg,kv3_ek/ndim_spinor)
     call alloc_dos(1,icomponent)
 
-    call mkdos_with_GaussDistrib_noncl(kv3_ek,icomponent)
+    call mkdos_with_GaussDistrib_noncl( kv3_ek, icomponent, qwgt_ek )
     if(icomponent == TOTAL) call get_VBM(totch,DeltaDVBM)
 
     if(mype == 0) then
@@ -1521,7 +1530,7 @@ contains
     call find_Erange_fermidirac(eko,neg,kv3)
 
     call alloc_dos(1,icomponent)
-    call make_dos_with_FDiracDistrib(kv3,icomponent)
+    call make_dos_with_FDiracDistrib( kv3, icomponent, qwgt )
 
 !!!    if(icomponent == TOTAL) call get_VBM(totch,DeltaDVBM)
     if(icomponent == TOTAL) then
@@ -1544,7 +1553,7 @@ contains
     call find_Erange_fermidirac(eko,neg,kv3_ek)
 
     call alloc_dos(1,icomponent)
-    call make_dos_with_FDiracDistrib(kv3_ek,icomponent)
+    call make_dos_with_FDiracDistrib( kv3_ek, icomponent, qwgt_ek )
 
     if(icomponent == TOTAL) then
        ValenceBandMaximum = efermi
@@ -2190,7 +2199,7 @@ contains
                  &              mpi_double_precision, mpi_sum, mpi_comm_group, ierr )
 
             compr = compr_mpi;    compi = compi_mpi
-            norm_phig_mpi = norm_phig_mpi2
+            norm_phig_mpi = norm_phig_mpi2 / dble(nrank_e)
             deallocate(compr_mpi,compi_mpi,norm_phig_mpi2)
           end if
 
diff -uprN phase0_2015.01/src_phase_3d/m_Electronic_Structure.F90 phase0_2015.01.01/src_phase_3d/m_Electronic_Structure.F90
--- phase0_2015.01/src_phase_3d/m_Electronic_Structure.F90	2015-08-05 14:58:03.000000000 +0900
+++ phase0_2015.01.01/src_phase_3d/m_Electronic_Structure.F90	2016-07-12 12:51:53.000000000 +0900
@@ -4533,7 +4533,7 @@ contains
     else
 #endif
                                                  __TIMER_SUB_START(304)
-    call tstatc0_begin('m_ES_WF_in_Rspace_3D(1) ',id_sname)
+    call tstatc0_begin('m_ES_WF_in_Rspace_3D(1) ',id_sname,1)
 
     if(ipri>=2 .and. ik==1 .and. ib1==1) write(nfout,'(" !### zaj_l --(FFT)--> bfft")')
 
diff -uprN phase0_2015.01/src_phase_3d/m_Epsilon_ek.F90 phase0_2015.01.01/src_phase_3d/m_Epsilon_ek.F90
--- phase0_2015.01/src_phase_3d/m_Epsilon_ek.F90	2015-08-05 14:58:03.000000000 +0900
+++ phase0_2015.01.01/src_phase_3d/m_Epsilon_ek.F90	2016-07-12 12:51:53.000000000 +0900
@@ -1,7 +1,7 @@
 #define NEC_TUNE
 !=======================================================================
 !
-!  PROGRAM  PHASE/0 2015.01 ($Rev: 447 $)
+!  PROGRAM  PHASE/0 2015.01 ($Rev: 470 $)
 !
 !  MODULE: m_Epsilon
 !
@@ -32,7 +32,7 @@
 ! ================================================================
 
 module m_Epsilon_ek
-! $Id: m_Epsilon_ek.F90 447 2015-08-05 02:38:25Z jkoga $
+! $Id: m_Epsilon_ek.F90 470 2015-10-05 03:15:15Z ktagami $
 !
 ! Universal Virtual Spectroscope for Optoelectronics Research (UVSOR) ver 3.00
 ! UVSOR module for electronic dielectric function calculation
@@ -6994,14 +6994,23 @@ ppc_data(ntyp)
 ! set omega
              if(nbi==nbj) then
                 omega = deg_omega            ! -> intra_band case
+
+             else if(abs(omega) < 1.d-14) then ! -> degenerate case
+                if(omega >= 0.d0) then
+                   omega = 1.d-14
+                else
+                   omega = -1.d-14
+                end if
+! ====
              else
-                if(abs(omega) < 1.d-14) then ! -> degenerate case
-                   if(omega >= 0.d0) then
-                      omega = 1.d-14
+                if ( sw_scissor_renormalization == ON ) then
+                   if ( omega > 0.0 ) then
+                      omega = omega + scissor
                    else
-                      omega = -1.d-14
-                   end if
+                      omega = omega - scissor
+                   endif
                 end if
+! ====
              end if
 
 ! add correction term
@@ -13046,6 +13055,16 @@ ppc_data(ntyp)
                    else
                        e21 = -delta_omega
                    end if
+! ==
+                else
+                   if ( sw_scissor_renormalization == ON ) then
+                      if ( e21 > 0.0 ) then
+                         e21 = e21 + scissor
+                      else
+                         e21 = e21 - scissor
+                      endif
+                   endif
+! ==
                 end if
 ! ========================== 13.0R
              end if
diff -uprN phase0_2015.01/src_phase_3d/m_Excitation.F90 phase0_2015.01.01/src_phase_3d/m_Excitation.F90
--- phase0_2015.01/src_phase_3d/m_Excitation.F90	2015-08-05 14:58:03.000000000 +0900
+++ phase0_2015.01.01/src_phase_3d/m_Excitation.F90	2016-07-12 12:51:53.000000000 +0900
@@ -2439,7 +2439,7 @@ contains
     deallocate( zrho_work ); deallocate( ztrm2 )
     deallocate( eko_wk ); deallocate( occ_wk )
 
-    SpectrFn_vv = SpectrFn_vv /dble(kv3_fbz) /univol
+    SpectrFn_vv = SpectrFn_vv /dble(kv3_fbz/nspin) /univol
     if ( nspin == 1 ) SpectrFn_vv = SpectrFn_vv *2.0d0
 
 #if 0
@@ -2653,7 +2653,7 @@ contains
        endif
     End Do
 
-    SpectrTensor_vv = SpectrTensor_vv /dble(kv3_fbz) /univol
+    SpectrTensor_vv = SpectrTensor_vv /dble(kv3_fbz/nspin) /univol
     if ( nspin == 1 ) SpectrTensor_vv = SpectrTensor_vv *2.0d0
 
 #if 0
@@ -2821,7 +2821,7 @@ contains
        deallocate( work )
     endif
 
-    SpectrFn_vc = SpectrFn_vc /dble(kv3_fbz) /univol
+    SpectrFn_vc = SpectrFn_vc /dble(kv3_fbz/nspin) /univol
     if ( nspin == 1 ) SpectrFn_vc = SpectrFn_vc *2.0d0
 
 #if 0
diff -uprN phase0_2015.01/src_phase_3d/m_FFT.F90 phase0_2015.01.01/src_phase_3d/m_FFT.F90
--- phase0_2015.01/src_phase_3d/m_FFT.F90	2015-09-14 15:39:20.000000000 +0900
+++ phase0_2015.01.01/src_phase_3d/m_FFT.F90	2016-07-12 12:51:53.000000000 +0900
@@ -3,7 +3,7 @@
 ! ==============================================================================
 !=======================================================================
 !
-!  PROGRAM  PHASE/0 2015.01 ($Rev: 449 $)
+!  PROGRAM  PHASE/0 2015.01 ($Rev: 481 $)
 !
 !  MODULE: m_FFT
 !
@@ -39,7 +39,7 @@
 !
 
 module m_FFT
-! $Id: m_FFT.F90 449 2015-08-06 04:37:59Z jkoga $
+! $Id: m_FFT.F90 481 2016-03-25 02:51:57Z jkoga $
 !
   use m_Timing,            only : tstatc0_begin, tstatc0_end
   use m_Files,             only : nfout
@@ -925,6 +925,226 @@ include "m_FFT_type9_ffte.F90"
     if(mod(nfft_pstrn,2) == 1) bfft(nfft_pstrn) = afft(nfft_pstrn)*bfft(nfft_pstrn)
   end subroutine m_FFT_Vlocal_pW
 
+! === EXP_CELLOPT ==== 2015/09/24
+  subroutine m_FFT_coef_CD_integration_kt(ista,iend,f2or1)
+                                            ! from m_XC_Potential.F90,  for electron ?
+    integer, intent(in) :: ista, iend
+    real(kind=DP),intent(out) :: f2or1( ista:iend )
+
+    integer                  :: idp,nlp,nmp,nnp,nd2p,nd3p,ip, idph, nlph
+
+    nlp  = fft_box_size_CD(1,1)
+    nmp  = fft_box_size_CD(2,1)
+    nnp  = fft_box_size_CD(3,1)
+#ifdef _MPIFFT_
+    idp  = fft_box_size_CD_c(1,0)
+    nd2p = fft_box_size_CD_c(2,0)
+    nd3p = fft_box_size_CD_c(3,0)
+#else
+    idp  = fft_box_size_CD(1,0)
+    nd2p = fft_box_size_CD(2,0)
+    nd3p = fft_box_size_CD(3,0)
+#endif
+
+    call set_f2or1( npes, ista, iend, f2or1 )
+
+  contains
+
+    subroutine set_f2or1(npes,ista,iend,f2or1)
+      integer, intent(in) :: npes,ista, iend
+      real(kind=DP), intent(out), dimension(ista:iend) :: f2or1
+      integer :: idph,nlph,ip,i,j,k
+
+      if(kimg == 1) then
+         idph = idp/2
+         nlph = nlp/2
+#ifdef _MPIFFT_
+
+         f2or1 = 0.d0
+!!$         do j = 1, min(nz_d,nnp-nz_d*myrank_cdfft)*nmp
+!!$         do j = 1, min(nz_d,nnp-nz_d*myrank_cdfft)*nd2p
+         do k = 1, min(nz_d, nnp-nz_d*myrank_cdfft)
+            do j = 1, nmp
+               do i = 1, nlph
+!!$                  ip = i + idph*(j-1) + idph*ly*lz_d*myrank_cdfft
+                  ip = i + idph*(j-1) + idph*ly*(k-1) + idph*ly*lz_d*myrank_cdfft
+                  f2or1(ip) = 2.d0
+               end do
+               ip = 1 + idph*(j-1) + idph*ly*(k-1) + idph*ly*lz_d*myrank_cdfft
+               f2or1(ip) = 1.d0
+               ip = nlph+1 + idph*(j-1) + idph*ly*(k-1) + idph*ly*lz_d*myrank_cdfft
+               f2or1(ip) = 1.d0
+            end do
+         end do
+!!$            ip = idph*(j-1) + 1 + idph*ly*lz_d*myrank_cdfft
+!!$            f2or1(ip) = 1.d0
+!!$            ip = idph*(j-1)+ nlph + 1 + idph*ly*lz_d*myrank_cdfft
+!!$            f2or1(ip) = 1.d0
+!!$         end do
+#else
+         f2or1 = 2.d0
+         if(npes >= 2) then
+            do i = 1, nd2p*nnp
+               ip = idph*(i-1) + 1
+               if(ip>= ista .and. ip <= iend) f2or1(ip) = 1.d0
+            end do
+            do i = 1, nd2p*nnp
+               ip = idph*(i-1) + nlph + 1
+               if(ip>= ista .and. ip <= iend) f2or1(ip) = 1.d0
+            end do
+            do j = nlph+2, idph
+               do i = 1,nd2p*nnp
+                  ip = idph*(i-1)+j
+                  if(ip>= ista .and. ip <= iend) f2or1(ip) = 0.d0
+               end do
+            end do
+            do j = nmp+1, nd2p
+               do k = 1, nnp
+                  do i = 1, nlph
+                     ip = i + idph*(j-1) + idph*nd2p*(k-1)
+                     if(ip >= ista .and. ip <= iend) f2or1(ip) = 0.d0
+                  end do
+               end do
+            end do
+! for SX6 ASL 20040817
+            do k = nnp+1, nd3p
+               do i = 1, idph*nd2p
+                  ip = i + idph*nd2p*(k-1)
+                  if(ip >= ista .and. ip <= iend) f2or1(ip) = 0.d0
+               end do
+            end do
+         else
+            do i = 1, nd2p*nnp
+               ip = idph*(i-1) + 1
+               f2or1(ip) = 1.d0
+            end do
+            do i = 1, nd2p*nnp
+               ip = idph*(i-1) + nlph + 1
+               f2or1(ip) = 1.d0
+            end do
+            do j = nlph+2, idph
+               do i = 1,nd2p*nnp
+                  ip = idph*(i-1)+j
+                  f2or1(ip) = 0.d0
+               end do
+            end do
+            do j = nmp+1, nd2p
+               do k = 1, nnp
+                  do i = 1, nlph
+                     ip = i + idph*(j-1) + idph*nd2p*(k-1)
+                     f2or1(ip) = 0.d0
+                  end do
+               end do
+            end do
+! for SX6 ASL 20040817
+            do k = nnp+1, nd3p
+               do i = 1, idph*nd2p
+                  ip = i + idph*nd2p*(k-1)
+                  f2or1(ip) = 0.d0
+               end do
+            end do
+         end if
+!!$       do i = ista, iend
+!!$          if(mod(i*2,idp) == 2 .or. mod(i*2,idp) == 0) f2or1(i) = 1.d0
+!!$       end do
+#endif
+      else
+#ifdef _MPIFFT_
+!!$         f2or1 = 0.d0                 ! f2or1 works to the fft data in Rspace.
+!!$         do k = 1, min(nz_d,nnp-nz_d*myrank_cdfft)
+!!$            do j = 1, nmp     ! nmp = fft_box_size_CD(2,1)
+!!$               do i = 1, nlp  ! nlp = fft_box_size_CD(1,1)
+!!$                  ip = i+(j-1)*idp+(k-1)*idp*nd2p+idp*nd2p*lz_d*myrank_cdfft
+!!$                  f2or1(ip) = 1.d0
+!!$               end do
+!!$            end do
+!!$         end do
+!         if(iprixc >= 2 ) write(nfout,'(" ix kimg = 2 <<set_f2or1>>")')
+         f2or1 = 1.d0
+         do j = nlp+1, idp      ! x
+            do i = 1, ly*nz_d
+               ip = idp*(i-1)+j+ista-1
+               f2or1(ip) = 0.d0
+            end do
+         end do
+!         if(iprixc >= 2 ) write(nfout,'(" iy kimg = 2 <<set_f2or1>>")')
+         do j = nmp+1,ly         ! y
+            do k = 1, nz_d
+               do i = 1, nlp
+                  ip = i + idp*(j-1) + idp*ly*(k-1) + ista-1
+                  f2or1(ip) = 0.d0
+               end do
+            end do
+         end do
+!         if(iprixc >= 2 ) write(nfout,'(" iz kimg = 2 <<set_f2or1>>")')
+         do  k = nz_d+1, lz_d   ! z
+            do i = 1, idp*ly
+               ip = i + idp*ly*(k-1) + ista-1
+               f2or1(ip) = 0.d0
+            end do
+         end do
+#else
+         f2or1 = 1.d0
+         if(npes >= 2) then
+            do j = nlp+1, idp    ! x
+               do i = 1, nd2p*nnp
+                  ip = idp*(i-1)+j
+                  if(ip>= ista .and. ip <= iend) f2or1(ip) = 0.d0
+               end do
+            end do
+            do j = nmp+1, nd2p   ! y
+               do k = 1, nnp
+                  do i = 1, nlp
+                     ip = i + idp*(j-1) + idp*nd2p*(k-1)
+                     if(ip >= ista .and. ip <= iend) f2or1(ip) = 0.d0
+                  end do
+               end do
+            end do
+! for SX6 ASL 20040817
+            do k = nnp+1, nd3p   ! z
+               do i = 1, idp*nd2p
+                  ip = i + idp*nd2p*(k-1)
+                  if(ip >= ista .and. ip <= iend) f2or1(ip) = 0.d0
+               end do
+            end do
+         else
+            do j = nlp+1, idp    ! x
+               do i = 1, nd2p*nnp
+                  ip = idp*(i-1)+j
+! ================================ modifed by K. Tagami ====( uncertain )== 11.0
+!                  f2or1(ip) = 0.d0
+                  if(ip >= ista .and. ip <= iend) f2or1(ip) = 0.d0
+! ===================================================================== 11.0
+               end do
+            end do
+            do j = nmp+1, nd2p   ! y
+               do k = 1, nnp
+                  do i = 1, nlp
+                     ip = i + idp*(j-1) + idp*nd2p*(k-1)
+! ================================ modifed by K. Tagami ====( uncertain )== 11.0
+!                     f2or1(ip) = 0.d0
+                     if(ip >= ista .and. ip <= iend) f2or1(ip) = 0.d0
+! ===================================================================== 11.0
+                  end do
+               end do
+            end do
+! for SX6 ASL 20040817
+            do k = nnp+1, nd3p   ! z
+               do i = 1, idp*nd2p
+                  ip = i + idp*nd2p*(k-1)
+! ================================ modifed by K. Tagami ====( uncertain )== 11.0
+!                  f2or1(ip) = 0.d0
+                  if(ip >= ista .and. ip <= iend) f2or1(ip) = 0.d0
+! ===================================================================== 11.0
+               end do
+            end do
+         end if
+#endif
+      end if
+    end subroutine set_f2or1
+  end subroutine m_FFT_coef_CD_integration_kt
+! ==================== 2015/09/24
+
   subroutine m_FFT_coef_CD_integration(f2or1)
     real(kind=DP),intent(out), dimension(ista_sfftph:iend_sfftph) :: f2or1
     integer :: idp, nlp, nmp, nnp, nd2p, idph, nlph, ip, i, j, k
@@ -1182,6 +1402,31 @@ include "m_FFT_type9_ffte.F90"
     call tstatc0_end(id_sname)
   end subroutine m_FFT_CD0
 
+  subroutine m_FFT_CD0_exx(nfout,afftp,inverse_or_direct)  ! R space --> G space
+    integer, intent(in)          :: nfout, inverse_or_direct
+    real(kind=DP), intent(inout) :: afftp(nfftp_exx_nonpara)
+    integer :: id_sname = -1
+    integer, dimension(2) :: flag_mklfft = (/-1, 1/)
+#if defined(JRCATFFT_WS) || defined(FFTW3)
+    if(ipri >= 2) write(nfout,'(" <<m_FFT_CD0_exx>>")')
+    call tstatc0_begin('m_FFT_CD0_exx ', id_sname, 1)
+    if(CD_setup_is_done_exx == NO) then
+#ifdef _INCLUDE_EXX_
+       if(ipri >= 1) write(nfout,'(" <<CDFFT_setup_exx>>")')
+       call CDFFT_setup_exx()
+#endif
+    end if
+    if(inverse_or_direct == DIRECT) then
+       call fft_CD_direct_core_exx(afftp)
+    else if(inverse_or_direct == INVERSE) then
+       call fft_CD_inverse_core_exx(afftp)
+    end if
+
+    call tstatc0_end(id_sname)
+#else
+    stop 'EXX in rspace can only be used with JRCAT FFT or FFTW3'
+#endif
+  end subroutine m_FFT_CD0_exx
 
 
 
diff -uprN phase0_2015.01/src_phase_3d/m_Files.F90 phase0_2015.01.01/src_phase_3d/m_Files.F90
--- phase0_2015.01/src_phase_3d/m_Files.F90	2015-09-14 15:39:27.000000000 +0900
+++ phase0_2015.01.01/src_phase_3d/m_Files.F90	2016-07-12 12:51:53.000000000 +0900
@@ -1,6 +1,6 @@
 !=======================================================================
 !
-!  SOFTWARE NAME : PHASE ($Revision: 454 $)
+!  SOFTWARE NAME : PHASE ($Revision: 483 $)
 !
 !  MODULE: m_Files
 !
@@ -36,7 +36,7 @@
 !
 !
 module m_Files
-!  $Id: m_Files.F90 454 2015-09-07 07:58:39Z yamasaki $
+!  $Id: m_Files.F90 483 2016-04-27 04:02:48Z ktagami $
 !
 !  Operations concerning to files as "open", and "close",
 !  should be done in this module.
@@ -103,7 +103,9 @@ module m_Files
 ! ===================================== 13.0S
        &, nfdynm_cif & ! CIF output
        &, nfwfk_sq   &   ! squared wf
-       &, nfwfk_integ_mom    ! moment of wf ( integerated over space )
+       &, nfwfk_integ_mom  &  ! moment of wf ( integerated over space )
+       &, nfwfk_orb_proj      &      ! orbital-projection of wf
+       &, nfband_spectr_wght         ! spectral weight for band unfolding 
 
   data  &
        &   nfinp,nfpot &                                               ! 31,(37,38,39,40,45,46,11-19,36)
@@ -140,6 +142,8 @@ module m_Files
        &  ,nfdynm_cif &
        &  ,nfwfk_sq &
        &  ,nfwfk_integ_mom &
+       &  ,nfwfk_orb_proj &
+       &  ,nfband_spectr_wght &
 
        &    /31,37,38,39,40,45,46 &
        & ,11,12,13,14,15,16,17,18,19,36 &
@@ -169,9 +173,11 @@ module m_Files
 ! ================================== 13.0S
        &, 500 &
        &, 350 & 
-       &, 351 /
+       &, 351 &
+       &, 352 &
+       &, 345 /
 
-  integer,private,parameter :: number_of_all_files = 76 + MAXNSP
+  integer,private,parameter :: number_of_all_files = 78 + MAXNSP
 
   integer,private, dimension(number_of_all_files) :: n_file
   data n_file &
@@ -202,7 +208,7 @@ module m_Files
 ! ================= KT_add ======== 13.0S
        & ,370, 371, 372 &
 ! ================================= 13.0S
-       & ,500, 350, 351 /
+       & ,500, 350, 351, 352, 345 /
 
   integer,private,parameter :: stringlength_of_filenames = 260
   character(len=stringlength_of_filenames) ::  &
@@ -238,7 +244,8 @@ module m_Files
 ! ================= KT_add ====== 13.0S
        &, F_CORE_ENERGY_OUT, F_CORE_ENERGY_INITIAL,  F_CORE_ENERGY_FINAL &
 ! =============================== 13.0S
-       &, F_DYNM_CIF, F_WFk_Squared, F_WFk_IntegMoment
+       &, F_DYNM_CIF, F_WFk_Squared, F_WFk_IntegMoment &
+       &, F_WFK_ORB_PROJ, F_BAND_SPECTR_WGHT
 
   namelist/fnames/ &
        &  F_INP, F_POT, F_PKB, F_PD, F_PPC, F_STOP, F_OPGR &
@@ -272,7 +279,8 @@ module m_Files
 ! ================= KT_add ====== 13.0S
        &, F_CORE_ENERGY_OUT, F_CORE_ENERGY_INITIAL,  F_CORE_ENERGY_FINAL &
 ! =============================== 13.0S
-       &, F_DYNM_CIF, F_WFk_Squared, F_Wfk_IntegMoment
+       &, F_DYNM_CIF, F_WFk_Squared, F_Wfk_IntegMoment &
+       &, F_WFK_ORB_PROJ, F_BAND_SPECTR_WGHT
 
 
   logical ::             F_ZAJ_partitioned      = .false.
@@ -594,6 +602,12 @@ contains
     if(mype==0) close(nfcntn_berry)
   end subroutine m_Files_close_nfcntn_berry
 
+  logical function m_Files_nfcntn_bin_paw_exists()
+    logical :: ex
+    inquire(file=F_CNTN_BIN_PAW,exist=ex) 
+    m_Files_nfcntn_bin_paw_exists = ex
+  end function m_Files_nfcntn_bin_paw_exists
+
   subroutine m_Files_open_nfcntn_bin_paw
     logical open
     inquire(unit = nfcntn_bin_paw, opened = open)
@@ -890,6 +904,9 @@ contains
 
     F_WFk_Squared  = "./wfnsq.cube"
     F_WFk_IntegMoment   = "./wfn_integ_moment.data"
+    F_WFK_ORB_PROJ    = "./wfn_orb_proj.data"
+
+    F_BAND_SPECTR_WGHT = "./nfband_spectr_wght.data"
 
   end subroutine m_Files_set_default_filenames
       
@@ -1582,6 +1599,76 @@ contains
     end if
   end subroutine m_Files_close_nfeng
 
+! ===
+  subroutine m_Files_open_nfwfk_orb_proj(icond)
+    integer, intent(in) :: icond
+    logical :: open
+
+    if (mype==0) then
+       inquire(unit = nfwfk_orb_proj, opened = open)
+
+       if (open) close(nfwfk_orb_proj)
+
+       if (icond == FIXED_CHARGE .or. &
+            & (icond==FIXED_CHARGE_CONTINUATION &
+            &   .and. fixed_charge_k_parallel==ALL_AT_ONCE)) then
+          call open0( nfwfk_orb_proj, F_WFK_ORB_PROJ, 'F_WFK_ORB_PROJ',&
+               &      unknown, formatted, check_file_name_on )
+       else if (icond==FIXED_CHARGE_CONTINUATION &
+            &        .and. fixed_charge_k_parallel==ONE_BY_ONE) then
+          call open1( nfwfk_orb_proj, F_WFK_ORB_PROJ, 'F_WFK_ORB_PROJ', &
+               &      unknown,formatted, check_file_name_on )
+       else
+          call open1( nfwfk_orb_proj, F_WFK_ORB_PROJ, 'F_WFK_ORB_PROJ', &
+               &      unknown,formatted, check_file_name_on )
+       end if
+    end if
+  end subroutine m_Files_open_nfwfk_orb_proj
+
+  subroutine m_Files_close_nfwfk_orb_proj()
+    logical :: open
+
+    if (mype==0) then
+       inquire( unit=nfwfk_orb_proj, opened = open )
+       if(open) close( nfwfk_orb_proj )
+    end if
+  end subroutine m_Files_close_nfwfk_orb_proj
+
+  subroutine m_Files_open_nfband_spwt(icond)
+    integer, intent(in) :: icond
+    logical :: open
+
+    if (mype==0) then
+       inquire(unit = nfband_spectr_wght, opened = open)
+
+       if (open) close(nfband_spectr_wght)
+
+       if (icond == FIXED_CHARGE .or. &
+            & (icond==FIXED_CHARGE_CONTINUATION &
+            &   .and. fixed_charge_k_parallel==ALL_AT_ONCE)) then
+          call open0( nfband_spectr_wght, F_BAND_SPECTR_WGHT, 'F_BAND_SPECTR_WGHT',&
+               &      unknown, formatted, check_file_name_on )
+       else if (icond==FIXED_CHARGE_CONTINUATION &
+            &        .and. fixed_charge_k_parallel==ONE_BY_ONE) then
+          call open1( nfband_spectr_wght, F_BAND_SPECTR_WGHT, 'F_BAND_SPECTR_WGHT ', &
+               &      unknown,formatted, check_file_name_on )
+       else
+          call open1( nfband_spectr_wght, F_BAND_SPECTR_WGHT, 'F_BAND_SPECTR_WGHT', &
+               &      unknown,formatted, check_file_name_on )
+       end if
+    end if
+  end subroutine m_Files_open_nfband_spwt
+
+  subroutine m_Files_close_nfband_spwt()
+    logical :: open
+
+    if (mype==0) then
+       inquire( unit=nfband_spectr_wght, opened = open )
+       if(open) close( nfband_spectr_wght )
+    end if
+  end subroutine m_Files_close_nfband_spwt
+! ====
+
   subroutine m_Files_close_nfinp()
     logical :: op
     if(mype == 0) then
diff -uprN phase0_2015.01/src_phase_3d/m_Force.F90 phase0_2015.01.01/src_phase_3d/m_Force.F90
--- phase0_2015.01/src_phase_3d/m_Force.F90	2015-08-05 14:58:03.000000000 +0900
+++ phase0_2015.01.01/src_phase_3d/m_Force.F90	2016-07-12 12:51:53.000000000 +0900
@@ -1,6 +1,6 @@
 !=======================================================================
 !
-!  PROGRAM  PHASE/0 2015.01 ($Rev: 440 $)
+!  PROGRAM  PHASE/0 2015.01 ($Rev: 472 $)
 !
 !  MODULE: m_Force
 !
@@ -49,7 +49,7 @@
 #endif
 
 module m_Force
-! $Id: m_Force.F90 440 2015-08-03 07:18:01Z ktagami $
+! $Id: m_Force.F90 472 2015-11-28 09:01:17Z ktagami $
   use m_IterationNumbers,     only : iteration_ionic
   use m_Charge_Density,       only : chgq_l, hsr
   use m_Electronic_Structure, only : zaj_l,vlhxcQ,occup_l,eko_l &
@@ -117,6 +117,12 @@ module m_Force
 
   use m_FFT,                  only : fft_box_size_CD,nfftps
 
+! === Positron ==== 2015/11/28
+  use m_Control_Parameters,  only : sw_positron, positron_method
+  use m_Const_Parameters,    only : Positron_GGGC, OFF
+  use  m_Positron_Wave_Functions,  only : pchg_l
+! ================= 2015/11/28
+
   implicit none
   include 'mpif.h'
 
diff -uprN phase0_2015.01/src_phase_3d/m_Ionic_System.F90 phase0_2015.01.01/src_phase_3d/m_Ionic_System.F90
--- phase0_2015.01/src_phase_3d/m_Ionic_System.F90	2015-08-05 14:58:03.000000000 +0900
+++ phase0_2015.01.01/src_phase_3d/m_Ionic_System.F90	2016-07-12 12:51:53.000000000 +0900
@@ -1,6 +1,6 @@
 !=======================================================================
 !
-!  PROGRAM  PHASE/0 2015.01 ($Rev: 442 $)
+!  PROGRAM  PHASE/0 2015.01 ($Rev: 490 $)
 !
 !  MODULE: m_Ionic_System
 !
@@ -53,7 +53,7 @@
 
 module m_Ionic_System
 !     (m_IS)
-!  $Id: m_Ionic_System.F90 442 2015-08-03 08:52:00Z jkoga $
+!  $Id: m_Ionic_System.F90 490 2016-05-27 04:49:30Z jkoga $
 !
 !  This module is for structure factor, ewald energy,
 !  and motions of atoms.
@@ -297,6 +297,7 @@ module m_Ionic_System
   integer         ::                                  nrsv = 1  ! number of heat bath
   integer         ::                                  set_initial_velocity = ON  ! (by J. Koga)
   integer         ::                                  sw_read_velocities = OFF  ! (by T. Yamamoto)
+  integer         ::                                  sw_shift_velocities = OFF
   real(kind=DP),private ::                            tk_initial = 0.d0
 !!$  real(kind=DP),private,allocatable,dimension(:)   :: qmass,tkb,cprv,frsv ! d(nrsv)
   real(kind=DP),allocatable,dimension(:)   :: qmass,tkb,cprv,frsv ! d(nrsv)
@@ -360,6 +361,7 @@ module m_Ionic_System
   character(len("set_initial_velocity")),private,parameter :: tag_set_initial_velocity = "set_initial_velocity" ! (by J. Koga)
   character(len("sw_read_velocities")),private,parameter :: tag_sw_read_velocities = "sw_read_velocities"
   character(len("initial_temperature")),private,parameter :: tag_initial_temperature = "initial_temperature"
+  character(len("sw_shift_velocities")),private,parameter :: tag_sw_shift_velocities = "sw_shift_velocities"
 
   !     constraint             sigma                       sgmc
   ! 1. BONDLENGTH_FIX_1 
@@ -838,7 +840,10 @@ contains
   end subroutine m_IS_put_lattice_system
 
   subroutine alloc_normal_hypervector()
-    allocate(normal_hypervector(natm,3,PUCV:CARTS))
+    if ( .not. allocated(normal_hypervector) ) then
+       allocate(normal_hypervector(natm,3,PUCV:CARTS))
+    endif
+    normal_hypervector = 0.0d0
   end subroutine alloc_normal_hypervector
 
   subroutine set_normal_hypervector()
@@ -1075,7 +1080,11 @@ contains
 
        deallocate(work)
 
+       constraints_exist = .false.
+
        ! --- constraint ---
+       call initialize_constraint_param     ! use this for cell optimization
+
        if( f_selectBlock( tag_constraint) == 0) then
           if(ipriinputfile >= 1) write(nfout,'(" !** -- tag_constraint is found --")')
           if(imdalg == CG_STROPT) then
@@ -1116,7 +1125,7 @@ contains
              iret = f_selectParentBlock()
           end if
           ! --- setting bondlength fix sets ---
-          call alloc_bondlength_fix_set()  ! allocate(bondlength_fix_set(2,num_fixed_bonds))
+          if (tf) call alloc_bondlength_fix_set()  ! allocate(bondlength_fix_set(2,num_fixed_bonds))
           tf = f_selectBlock( tag_fixed_bond) == 0
           if(.not.tf) tf = f_selectBlock( tag_fix_bondlength) == 0
           if(.not.tf) tf = f_selectBlock( tag_bondlength_fix) == 0
@@ -1759,7 +1768,9 @@ contains
              if ( f_getIntValue(tag_sw_read_velocities,iret) == 0 ) then
                  sw_read_velocities = iret
              endif
-             
+             if ( f_getIntValue(tag_sw_shift_velocities,iret) == 0 ) then
+                 sw_shift_velocities = iret
+             endif 
              if( f_getRealValue(tag_initial_temperature,dret,"") == 0 ) then
                 tk_initial = dret * CONST_kB
                 if(ipriinputfile >= 1 .and. printable) then
@@ -4379,7 +4390,8 @@ contains
             pcom(i) = 0.d0
             do ia=1,natm
                ir = icnstrnt_typ(imdt(ia),imdalg_t)
-               if ((imode == 1 .and. ir >= 1 .and. imdtyp(ia).ne.0).or.(imode==2 .and. ir == 1.and.imdtyp(ia).ne.0)) then
+               if ((imode == 1 .and. ir >= 1 .and. imdtyp(ia).ne.0).or. &
+             &     (imode == 2 .and. ir == 1 .and. imdtyp(ia).ne.0)) then
                   cpd_l(ia,i) = random(ir,ia,i)
                   if(iprimd >= 2) write(nfout,'(" !!! ia, ir = ",2i8," cpd_l(ia,",i3,") = " &
                        & ,f12.6, " <<set_initial_velocities>>")') ia, ir, i, cpd_l(ia,i)
@@ -4509,6 +4521,79 @@ contains
 
   end subroutine m_IS_rd_n
 
+  subroutine shift_velocities(imode)
+      integer, intent(in) :: imode
+      integer :: i,j,ia,ir
+      real(kind=DP)   :: mcom
+      integer,dimension(natm) :: imdt
+      integer :: icnstrnt_typ
+      integer :: imdalg_t
+      real(kind=DP),dimension(3)   :: pcom
+      real(kind=DP), parameter :: eps = 1.d-12
+      if(imode == 1) then
+         do i=1,natm
+            if ( imdtyp(i) .le. NOSE_HOOVER ) then
+               imdt(i) = NOSE_HOOVER + 1
+            else 
+               imdt(i) = imdtyp(i)
+            endif
+         enddo
+         imdalg_t = T_CONTROL
+      else
+         do i=1, natm
+            imdt(i) = imdtyp(i)
+         end do
+         imdalg_t = VERLET
+      end if
+
+      do i=1,3
+         pcom(i) = 0.d0
+         do ia=1,natm
+            ir = icnstrnt_typ(imdt(ia),imdalg_t)
+            if ((imode == 1 .and. ir >= 1 .and. imdtyp(ia).ne.0).or. &
+          &     (imode == 2 .and. ir == 1 .and. imdtyp(ia).ne.0)) then
+              pcom(i) = pcom(i) + amion(ityp(ia))*cpd_l(ia,i)
+            endif
+         enddo
+      enddo
+
+      mcom = 0.d0
+      if(imode == 1) then
+         do ia=1,natm
+            ir = icnstrnt_typ(imdt(ia),imdalg_t)
+            if (  ir >= 1 .and. imdtyp(ia).ne.0 ) then
+               mcom = mcom + amion(ityp(ia))
+            end if
+         end do
+      else
+         do ia=1,natm
+            ir = icnstrnt_typ(imdt(ia),imdalg_t)
+            if (  ir == 1 .and. imdtyp(ia).ne.0 ) then
+               mcom = mcom + amion(ityp(ia))
+            end if
+         end do
+      end if
+
+      ! shift velocity
+      if(mcom.gt.eps) pcom(:) = pcom(:)/mcom
+
+      if(imode == 1) then
+         do ia=1,natm
+            ir = icnstrnt_typ(imdt(ia),imdalg_t)
+            if (   ir >= 1 .and. imdtyp(ia).ne.0 ) then
+               cpd_l(ia,:) = cpd_l(ia,:) - pcom(:)
+            endif
+         enddo
+      else
+         do ia=1,natm
+            ir = icnstrnt_typ(imdt(ia),imdalg_t)
+            if (   ir == 1 .and. imdtyp(ia).ne.0 ) then
+               cpd_l(ia,:) = cpd_l(ia,:) - pcom(:)
+            endif
+         enddo
+      end if
+  end subroutine shift_velocities
+
 !!$ 2011.06.06
   subroutine scale_velocity()
      integer :: ia,ir,irp
@@ -5642,6 +5727,7 @@ contains
        stop ' Invalid value of mdmode <<m_IS_md>>'
     end if
 
+    if (sw_shift_velocities==ON.and.mdalg==VERLET) call shift_velocities(1)
     deallocate(forc_l)
     call md1_dealloc()
     call tstatc0_end(id_sname)
@@ -6348,6 +6434,12 @@ contains
   end function m_IS_force_check_md_nhp
 ! <--
 
+  subroutine initialize_constraint_param
+    forcmx_constraint_quench = 1.d+2
+    forc_norm_hyperplane_vert = 1.d+2
+    forcmx_hyperplane_vert = 1.d+2
+  end subroutine initialize_constraint_param
+
   subroutine evolve_velocities(mdalg,forc_l)
     integer, intent(in) ::                          mdalg
     real(kind=DP), intent(inout), dimension(natm,3) :: forc_l
@@ -7312,7 +7404,7 @@ contains
     if(t_ctrl_method == VELOCITY_SCALING)then
        call scale_velocity()
     endif
-
+    if (sw_shift_velocities==ON) call shift_velocities(1)
     call md2_dealloc                                    !-(m_Ionic_System)
     call tstatc0_end(id_sname)
   contains
@@ -7900,6 +7992,10 @@ contains
     integer ::              id_sname = -1
     call tstatc0_begin('m_IS_gdiis ',id_sname)
 
+    if(absolute_convergence_of_forc(forc_l_in))then
+       write(nfout,'(a)') ' m_IS_gdiis: forces are absolutely converged!! nothing to do...'
+       return
+    endif
 
     if(mdmode == ORDINA) then
        allocate(forc_l(natm,3))
@@ -9090,6 +9186,10 @@ contains
     integer ::              id_sname = -1
     call tstatc0_begin('m_IS_cg2',id_sname)
 
+     if(absolute_convergence_of_forc(forc_l_in))then
+       write(nfout,'(a)') ' CG2: forces are absolutely converged!! nothing to do...'
+       return
+     endif
 
     if(.not.allocated(vec_g)) allocate(vec_g(natm,3))
     if(.not.allocated(vec_h)) allocate(vec_h(natm,3))
@@ -9099,6 +9199,7 @@ contains
     if(.not.allocated(cps2)) allocate(cps2(natm,3))
     if(.not.allocated(f_total2)) allocate(f_total2(natm,3))
 
+
     f_total0(:,:) = forc_l_in(:,:)
 
 
@@ -9950,6 +10051,80 @@ contains
 #endif
   end subroutine m_IS_symmetrize_atom_pos
 
+  subroutine m_IS_force_af_symmetry(nfout)
+#ifdef SX
+!CDIR BEGIN NOVECTOR
+#endif
+    integer, intent(in) :: nfout
+
+    real(kind=DP), dimension(natm2,3) :: cps_wk,cps_wk2
+    real(kind=DP), dimension(natm,3) :: cpso,poso
+    real(kind=DP), dimension(3,3) :: rltv_t
+    real(kind=DP), dimension(3) :: p,di,dimin
+    real(kind=DP) :: df,dfmin
+    integer, dimension(natm2) :: ityp_wk,ityp_af
+    integer :: i,n,ia,ja,iia
+    cps_wk(1:natm,1:3)  = cps(1:natm,1:3)
+    do i=1,natm
+       ityp_af(i) = nint(iatomn(ityp(i)))
+    end do
+    n = natm
+    do i=1,natm
+       if(iwei(i)==1) cycle
+       n = n + 1
+       cps_wk(n,1:3) = -cps(i,1:3)
+       ityp_af(n) = nint(iatomn(ityp(i)))
+    end do
+    cps_wk2 = 0.d0
+    do ia=1,natm2
+       p = matmul(op(1:3,1:3,nopr+af),cps_wk(ia,1:3)) + tau(1:3,nopr+af,CARTS)
+       iia = 0
+       dfmin = 1.d10
+       do ja=1,natm2
+          if(ityp_af(ia) /= ityp_af(ja)) cycle
+          di = matmul(transpose(rltv),(p - cps_wk(ja,1:3)))
+          df = sum(abs(cos(di(1:3))-1.d0))
+          if(df < dfmin) then
+             iia = ja
+             dfmin = df
+             dimin = di/PAI2
+          end if
+       end do
+       if(iia == 0) stop 'm_IS_symmetrize_atom_pos: error iia=0'
+       p = p - matmul(altv,nint(dimin))
+       cps_wk2(iia,1:3) = cps_wk2(iia,1:3) + p(1:3)
+       cps_wk(iia,1:3) = cps_wk2(iia,1:3)
+    end do
+
+    cpso = cps
+    poso = pos
+    cps = cps_wk2(1:natm,1:3)
+    rltv_t = transpose(rltv)/PAI2
+    call change_of_coordinate_system(rltv_t,cps,natm,natm,pos) !-(b_I.S.) cps -> pos
+    if(printable.and.ipri>1) then
+       write(nfout,*) 'Atomic coordinates were symmetrized.'
+       !!$write(nfout,'(20x,"Inputted Cartesian coordinate -> symmetrized Cartesian coordinate")')
+       !!$do ia=1,natm
+       !!$   write(nfout,'(i4,3f15.8," -> ",3f15.8)') ia,cpso(ia,1:3),cps(ia,1:3)
+       !!$end do
+       !!$write(nfout,'(20x,"Inputted internal coordinate  -> symmetrized internal coordinate")')
+       !!$do ia=1,natm
+       !!$   write(nfout,'(i4,3f15.8," -> ",3f15.8)') ia,poso(ia,1:3),pos(ia,1:3)
+       !!$end do
+       write(nfout,'(" === Symmetrized Cartesian coordinates and errors===")')
+       do ia=1,natm
+          write(nfout,'(i4,7f18.9)') ia,cpso(ia,1:3),cps(ia,1:3),sqrt(sum((cps(ia,1:3)-cpso(ia,1:3))**2))
+       end do
+       write(nfout,'(" === Symmetrized internal coordinates ===")')
+       do ia=1,natm
+          write(nfout,'(i4,6f18.9)') ia,poso(ia,1:3),pos(ia,1:3)
+       end do
+    end if
+#ifdef SX
+!CDIR END
+#endif
+  end subroutine m_IS_force_af_symmetry
+
   subroutine m_IS_dealloc(neb_mode)
     logical, intent(in), optional :: neb_mode
     logical :: neb
@@ -11535,4 +11710,19 @@ contains
      deallocate(pos_full)
   end subroutine m_IS_dump_cif
 
+  logical function absolute_convergence_of_forc(forc_l_in)
+    real(kind=DP), intent(in), dimension(natm,3) :: forc_l_in
+    real(kind=DP),allocatable,dimension(:,:) :: forct
+    integer :: ia
+    allocate(forct(natm,3));forct=forc_l_in
+    absolute_convergence_of_forc = .false.
+    do ia=1,natm
+       if(imdtyp(ia)==0) forct(ia,:)=0.d0
+    enddo
+    if(sum(forct(1:natm,1:3)**2).lt.1e-15)then
+        absolute_convergence_of_forc = .true.
+    endif
+    deallocate(forct)
+  end function absolute_convergence_of_forc
+
 end module m_Ionic_System
diff -uprN phase0_2015.01/src_phase_3d/m_Kpoints.F90 phase0_2015.01.01/src_phase_3d/m_Kpoints.F90
--- phase0_2015.01/src_phase_3d/m_Kpoints.F90	2015-08-05 14:58:03.000000000 +0900
+++ phase0_2015.01.01/src_phase_3d/m_Kpoints.F90	2016-07-12 12:51:53.000000000 +0900
@@ -1,6 +1,6 @@
 !=======================================================================
 !
-!  PROGRAM  PHASE/0 2015.01 ($Rev: 440 $)
+!  PROGRAM  PHASE/0 2015.01 ($Rev: 486 $)
 !
 !  MODULE: m_Kpoint
 !
@@ -37,7 +37,7 @@
 !
 module m_Kpoints
 !     (m_Kp)
-!  $Id: m_Kpoints.F90 440 2015-08-03 07:18:01Z ktagami $
+!  $Id: m_Kpoints.F90 486 2016-05-15 13:59:14Z ktagami $
 ! 
   use m_Crystal_Structure,   only : il,imag,inv,ngen,igen,jgen, a,b,c,ca,cb,cc &
        &                          , altv, rltv, nbztyp , nbztyp_spg, n1_sc, n2_sc, n3_sc
@@ -237,7 +237,7 @@ contains
        end if
     end if
     if(npes > 1) call mpi_bcast(kv3_previous,1,mpi_integer,0,mpi_comm_group,ierr)
-    if(printable) write(nfout,'(i5, " : kv3_previous_job")') kv3_previous
+    if(printable) write(nfout,'(i8, " : kv3_previous_job")') kv3_previous
   end subroutine m_Kp_rd_kv3
 
   subroutine m_Kp_rd_n(nfout)
@@ -560,7 +560,7 @@ contains
        allocate(vkxyz(kv3,3,CRDTYP)); vkxyz = 0.d0
        if(ipri_kp >= 2) then
           write(nfout,'(" !kp vkxyz is allocated now <<m_Kp_alloc_kpoints>>")')
-          write(nfout,'(" !kp kv3 = ", i5," CRDTYP = ",i3)') kv3,CRDTYP
+          write(nfout,'(" !kp kv3 = ", i8," CRDTYP = ",i3)') kv3,CRDTYP
        end if
     else
        if(ipri_kp >= 2) write(nfout,'(" !kp vkxyz is already allocated <<m_Kp_alloc_kpoints>>")')
@@ -692,9 +692,9 @@ contains
 ! ================================================================ 11.0
 
     if(ipri_kp >=1 ) then
-       write(nfout,'(" !kp ek_group = ",i5)') ek_group
-       write(nfout,'(" !kp kv3_ek   = ",i5)') kv3_ek
-       write(nfout,'(" !kp kv3      = ",i5)') kv3
+       write(nfout,'(" !kp ek_group = ",i8)') ek_group
+       write(nfout,'(" !kp kv3_ek   = ",i8)') kv3_ek
+       write(nfout,'(" !kp kv3      = ",i8)') kv3
     end if
   end subroutine m_Kp_set_ek_group
 
@@ -783,7 +783,7 @@ contains
          read(nfinp,'(a132)') str
          if(ipri_kp>=2) write(nfout,'(" ! str : ",a50)') str(1:50)
          call read_nkpnt(str,len_str,ikpnt) !-(b_Kpoints)
-         if(ipri_kp>=2) write(nfout,'(" ! ikpnt(#skipped lines) = ",i5)') ikpnt
+         if(ipri_kp>=2) write(nfout,'(" ! ikpnt(#skipped lines) = ",i8)') ikpnt
          read(nfinp,'(a132)') str
          call read_coordsystem(str,len_str,sw_k_coord_system) !-(b_Kpoints)
          if(sw_k_coord_system == NODATA) ikpnt = ikpnt - 1
@@ -976,7 +976,7 @@ contains
 ! =============== 2014/09/30
 
     else
-       if(ipri_kp>=1) write(nfout,'(" !kp kv3 = ",i5, " nspin = ", i5)') kv3,nspin
+       if(ipri_kp>=1) write(nfout,'(" !kp kv3 = ",i8, " nspin = ", i5)') kv3,nspin
 
 ! ======================= added by K. Tagami ================== 11.0
        if(ipri_kp>=1) write(nfout,*) 'ndim_spinor = ', ndim_spinor
@@ -1794,7 +1794,7 @@ contains
       call first_bz(b1,b2,b3,nkmesh,kmesh,nmp_kmesh,mp_kmesh,nface,face)
 
       if(printable .and. printlevel .ge. 0) &
-      &  write(output,'(1x,"number of k-points in MP mesh        = ",i5)') nmp_kmesh
+      &  write(output,'(1x,"number of k-points in MP mesh        = ",i8)') nmp_kmesh
 
       ! debug
       !do i=1,nmp_kmesh
@@ -1826,7 +1826,7 @@ contains
       call first_bz(b1,b2,b3,nkmesh,kmesh,nfbz_kmesh,fbz_kmesh,nface,face)
 
       if(printable .and. printlevel .ge. 0) &
-      &  write(output,'(1x,"number of k-points in full BZ        = ",i5)') nfbz_kmesh
+      &  write(output,'(1x,"number of k-points in full BZ        = ",i8)') nfbz_kmesh
 
       ! debug
       !do i=1,nfbz_kmesh
@@ -2272,7 +2272,8 @@ contains
       integer :: i,j,n
       integer :: nequiv(nkpmax)
       real(kind=DP) :: g(3),q(3)
-      real(kind=DP), parameter :: eps = 1.d-3
+!      real(kind=DP), parameter :: eps = 1.d-3
+      real(kind=DP), parameter :: eps = 1.d-5
       logical :: not_included(nfbz_kmesh)
 
       not_included(1:nfbz_kmesh) = .true.
@@ -2334,7 +2335,8 @@ contains
       integer :: i,j,n
       integer :: nequiv(nkpmax)
       real(kind=DP) :: g(3), q(3), vec_tmp(3)
-      real(kind=DP), parameter :: eps = 1.d-3
+!      real(kind=DP), parameter :: eps = 1.d-3
+      real(kind=DP), parameter :: eps = 1.d-5
       logical :: not_included(nfbz_kmesh)
 
       not_included(1:nfbz_kmesh) = .true.
@@ -2436,7 +2438,7 @@ contains
 
 
       if(printable .and. printlevel .ge. 0) &
-      &  write(output,'(1x,"number of k-points in irreducible BZ = ",i5)') nkpoint
+      &  write(output,'(1x,"number of k-points in irreducible BZ = ",i8)') nkpoint
 
 
       call gen_rgrid(a1,a2,a3,rgrid)
@@ -2515,7 +2517,7 @@ contains
       if( n == nshell ) stop 'accuracy: n == nshell, accuracy check failure'
 
       if(printable .and. printlevel .ge. 0 ) then
-        write(output,'(1x,"Index of the first none zero shell = ",i5)') none_zero
+        write(output,'(1x,"Index of the first none zero shell = ",i8)') none_zero
         write(output,'(1x,"|R| of the first none zero shell   = ",f10.5)') length_nz
         write(output,'(1x,"phase sum = ",f10.5)') sm_nz
         write(output,'(1x,"efficiency = ",f8.2)') dble(none_zero)/dble(nkpoint)
@@ -2524,7 +2526,7 @@ contains
       if(printable .and. printlevel .ge. 1) then
         write(output,'(5x,4(1x,a10))') 'k1','k2','k3','weight'
         do i=1,nkpoint
-          write(output,'(i5,4(1x,f10.5))') i,kpoint(1:3,i),weight(i)
+          write(output,'(i8,4(1x,f10.5))') i,kpoint(1:3,i),weight(i)
         end do
       end if
 
@@ -2869,7 +2871,7 @@ contains
   subroutine m_Kp_set_kv3(nk)
     integer, intent(in) ::nk
     kv3 = nk
-    if(ipri_kp>=1) write(nfout,'(" !kp kv3 = ",i5," <<m_kp_set_kv3>>")') kv3
+    if(ipri_kp>=1) write(nfout,'(" !kp kv3 = ",i8," <<m_kp_set_kv3>>")') kv3
   end subroutine m_Kp_set_kv3
 
   subroutine m_Kp_set_mesh_super
diff -uprN phase0_2015.01/src_phase_3d/m_Ldos.F90 phase0_2015.01.01/src_phase_3d/m_Ldos.F90
--- phase0_2015.01/src_phase_3d/m_Ldos.F90	2015-08-05 14:58:03.000000000 +0900
+++ phase0_2015.01.01/src_phase_3d/m_Ldos.F90	2016-07-12 12:51:53.000000000 +0900
@@ -1,6 +1,6 @@
 !=======================================================================
 !
-!  PROGRAM  PHASE/0 2015.01 ($Rev: 443 $)
+!  PROGRAM  PHASE/0 2015.01 ($Rev: 471 $)
 !
 !  MODULE: m_Ldos
 !
@@ -31,7 +31,7 @@
 !   The activity of development of this program set has been supervised by Takahisa Ohno.
 !
 module m_Ldos
-! $Id: m_Ldos.F90 443 2015-08-03 13:03:48Z ktagami $
+! $Id: m_Ldos.F90 471 2015-11-13 01:14:05Z jkoga $
   use m_Const_Parameters, only   : DP, CMPLDP, REGULAR_INTERVALS, BY_ATOMIC_POSITIONS &
        &                         , DELTA10, EXECUT, ON, SOFTPART, HARDPART, DIRECT, PAI2 &
        &                         , ALDOS, LAYERDOS, NO, OFF, ELECTRON
@@ -68,7 +68,8 @@ module m_Ldos
        &                         , m_CD_restore_chgq &
        &                         , m_CD_map_chgq_to_fft_box &
        &                         , m_CD_set_ylm_enl_etc &
-       &                         , m_CD_dealloc_ylm_enl_etc 
+       &                         , m_CD_dealloc_ylm_enl_etc &
+       &                         , m_CD_keep_retrieve_hsr
 
 
 ! ============================== added by K. Tagami ============== 11.0
@@ -665,6 +666,7 @@ contains
     call make_map()
     denom = 1.d0/product(fft_box_size_WF(1:3,1))
 
+    call m_CD_keep_retrieve_hsr(.true.)
        if(modnrm == EXECUT) call m_CD_set_ylm_enl_etc()
        if(sw_save_ldos_weight == ON) call m_Ldos_wd_natm2_and_totch()
 
@@ -789,6 +791,8 @@ contains
        end if
 
 
+    call m_CD_keep_retrieve_hsr(.false.)
+
     deallocate(mapg2ly)
     deallocate(mapg2lx)
     if(modnrm == EXECUT) call m_CD_dealloc_ylm_enl_etc()
diff -uprN phase0_2015.01/src_phase_3d/m_OP_Moment.F90 phase0_2015.01.01/src_phase_3d/m_OP_Moment.F90
--- phase0_2015.01/src_phase_3d/m_OP_Moment.F90	2015-08-05 14:58:03.000000000 +0900
+++ phase0_2015.01.01/src_phase_3d/m_OP_Moment.F90	2016-07-12 12:51:53.000000000 +0900
@@ -1,5 +1,5 @@
 module m_OP_Moment
-! $Id: m_OP_Moment.F90 440 2015-08-03 07:18:01Z ktagami $
+! $Id: m_OP_Moment.F90 479 2016-03-12 12:30:51Z ktagami $
 
 ! =========== Contributions ===================================
 !
@@ -159,6 +159,8 @@ contains
                 istmp = ( is1 -1 )*ndim_spinor + is2
                 Do m1=1, size1
                    Do m2=1, size1
+                      if ( m1 /= m2 ) cycle
+
                       spinmom(1) = spinmom(1) +dmmat_ssrep(m1,m2,istmp) &
                            &                  *PauliMatrix(2,is2,is1)
                       spinmom(2) = spinmom(2) +dmmat_ssrep(m1,m2,istmp) &
@@ -191,8 +193,10 @@ contains
 
       write(nfout,*) '!   ia   l   type          mx             my             mz  '
 !
-      write(nfout,'(I7,I4,3X,A5,3F15.8)') ia, l, "orb  ", orbmom(1), orbmom(2), orbmom(3)
-      write(nfout,'(14X,     A5,3F15.8)')     "spin ", spinmom(1), spinmom(2), spinmom(3)
+      write(nfout,'(I7,I4,3X,A5,3F15.8)') ia, l, &
+           &                              "spin ", spinmom(1), spinmom(2), spinmom(3)
+      write(nfout,'(14X,     A5,3F15.8)') &
+           &                              "orb  ", orbmom(1), orbmom(2), orbmom(3)
 !
       write(nfout,*) '! ------ '
 
@@ -742,7 +746,7 @@ contains
 
       Do it=1, ntyp
          call new_radr_and_wos(ista_k,it)
-         rad1 = rad_cov_default( iatomn(it) )
+         rad1 = rad_cov_default( nint(iatomn(it)) ) ! Revised according to a report from ASMS Co.ltd, 10 March 2016.
 
          do lmt1=1,ilmt(it)
             il1=ltp(lmt1,it); im1=mtp(lmt1,it); it1=taup(lmt1,it)
@@ -1210,29 +1214,13 @@ contains
     integer :: ja
     
     integer :: ixyz1, ixyz2, is_tmp
-    real(kind=DP) :: ctmp1, weight, ctmp2
+    real(kind=DP) :: ctmp1, weight, ctmp2, weight2, determinant
       
     allocate(hsr_tmp(natm,nlmt,nlmt,ndim_magmom)); hsr_tmp = 0.0d0
     allocate(hsi_tmp(natm,nlmt,nlmt,ndim_magmom)); hsi_tmp = 0.0d0
 !
     hsr_tmp = rho_ylm1_ylm2_r;  hsi_tmp = rho_ylm1_ylm2_i
 
-#if 0
-    Do ia=1, natm
-       it = ityp(ia)
-       do lmt1 = 1, ilmt(it)
-          do lmt2 = 1, ilmt(it)
-             do is_tmp=1, ndim_magmom
-                write(850+mype,*) ' ia lmt1 lmt2 istmp = ', ia, lmt1,lmt2,is_tmp
-                write(850+mype,*) 'A ', hsr_tmp(ia,lmt1,lmt2,is_tmp), hsr_tmp(ia,lmt2,lmt1,is_tmp)
-                write(850+mype,*) 'B ', hsi_tmp(ia,lmt1,lmt2,is_tmp), hsi_tmp(ia,lmt2,lmt1,is_tmp)
-             end do
-          end do
-       end do
-    end Do
-    close( 850+mype )
-#endif
-
     do ia=1,natm
        it=ityp(ia)
        do is =1, ndim_magmom
@@ -1254,6 +1242,13 @@ contains
           weight = 1.0d0
        endif
        
+       call calc_determinant( op(:,:,iopr), determinant )
+       if ( determinant > 0 ) then
+          weight2 = 1.0d0
+       else
+          weight2 = -1.0d0
+       endif
+
        do ia = 1, natm
           it = ityp(ia)
           ja=abs(ia2ia_symmtry_op_inv(ia,iopr))
@@ -1298,7 +1293,7 @@ contains
                                  crotylm_paw(m,jj,iopr,ia)
                             rho_ylm1_ylm2_i(ia,lmt1,lmt2,1) = &
                                  rho_ylm1_ylm2_i(ia,lmt1,lmt2,1) + &
-                                 weight * &
+                                 weight * weight2 * &
                                  hsi_tmp(ja,lmt3,lmt4,1)* &
                                  crotylm_paw(n,ii,iopr,ia)* &
                                  crotylm_paw(m,jj,iopr,ia)
@@ -1306,7 +1301,8 @@ contains
                             Do ixyz1=1, 3
                                Do ixyz2=1, 3
                                   ctmp1 = op(ixyz2, ixyz1, iopr) *weight
-                                  
+                                  ctmp2 = op(ixyz2, ixyz1, iopr) *weight2
+
                                   rho_ylm1_ylm2_r(ia,lmt1,lmt2,ixyz2+1) &
                                        & = rho_ylm1_ylm2_r(ia,lmt1,lmt2,ixyz2+1)  &
                                        &  + ctmp1 &
@@ -1315,7 +1311,7 @@ contains
                                        &    *crotylm_paw(m,jj,iopr,ia)
                                   rho_ylm1_ylm2_i(ia,lmt1,lmt2,ixyz2+1) &
                                        & = rho_ylm1_ylm2_i(ia,lmt1,lmt2,ixyz2+1)  &
-                                       &  + op(ixyz2, ixyz1, iopr) &
+                                       &  + ctmp2 &
                                        &    *hsi_tmp(ja,lmt3,lmt4,ixyz1+1) &
                                        &    *crotylm_paw(n,ii,iopr,ia)  &
                                        &    *crotylm_paw(m,jj,iopr,ia)
@@ -1335,22 +1331,6 @@ contains
       
     rho_ylm1_ylm2_r = rho_ylm1_ylm2_r/nopr; rho_ylm1_ylm2_i = rho_ylm1_ylm2_i/nopr; 
       
-#if 0
-    Do ia=1, natm
-       it = ityp(ia)
-       do lmt1 = 1, ilmt(it)
-          do lmt2 = 1, ilmt(it)
-             do is_tmp=1, ndim_magmom
-                write(860+mype,*) ' ia lmt1 lmt2 istmp = ', ia, lmt1,lmt2,is_tmp
-                write(860+mype,*) 'A ', rho_ylm1_ylm2_r(ia,lmt1,lmt2,is_tmp), rho_ylm1_ylm2_r(ia,lmt2,lmt1,is_tmp)
-                write(860+mype,*) 'B ', rho_ylm1_ylm2_i(ia,lmt1,lmt2,is_tmp), rho_ylm1_ylm2_i(ia,lmt2,lmt1,is_tmp)
-             end do
-          end do
-       end do
-    end Do
-    close( 860+mype )
-#endif
-
     do ia=1,natm
        it=ityp(ia)
        do is =1, ndim_magmom
@@ -1363,24 +1343,19 @@ contains
        end do
     end do
     
-#if 0
-    Do ia=1, natm
-       it = ityp(ia)
-       do lmt1 = 1, ilmt(it)
-          do lmt2 = 1, ilmt(it)
-             do is_tmp=1, ndim_magmom
-                write(870+mype,*) ' ia lmt1 lmt2 istmp = ', ia, lmt1,lmt2,is_tmp
-                write(870+mype,*) 'A ', rho_ylm1_ylm2_r(ia,lmt1,lmt2,is_tmp), rho_ylm1_ylm2_r(ia,lmt2,lmt1,is_tmp)
-                write(870+mype,*) 'B ', rho_ylm1_ylm2_i(ia,lmt1,lmt2,is_tmp), rho_ylm1_ylm2_i(ia,lmt2,lmt1,is_tmp)
-             end do
-          end do
-       end do
-    end Do
-    close( 870+mype )
-#endif
-
     deallocate(hsr_tmp);  deallocate(hsi_tmp)
       
+  contains
+    subroutine calc_determinant( a, determinant )
+      real(kind=DP), intent(in) :: a(3,3)
+      real(kind=DP), intent(out) :: determinant
+
+      determinant = a(1,1)*( a(2,2)*a(3,3) -a(2,3)*a(3,2) ) &
+           &       -a(1,2)*( a(2,1)*a(3,3) -a(2,3)*a(3,1) ) &
+           &       +a(1,3)*( a(2,1)*a(3,2) -a(2,2)*a(3,1) )
+
+    end subroutine calc_determinant
+
   end subroutine symmetrz_rho_ylm1_ylm2
 ! ============= 2014/08/26
 
diff -uprN phase0_2015.01/src_phase_3d/m_Orbital_Population.F90 phase0_2015.01.01/src_phase_3d/m_Orbital_Population.F90
--- phase0_2015.01/src_phase_3d/m_Orbital_Population.F90	2015-08-05 14:58:03.000000000 +0900
+++ phase0_2015.01.01/src_phase_3d/m_Orbital_Population.F90	2016-07-12 12:51:53.000000000 +0900
@@ -29,7 +29,7 @@
 !
 !
 module m_Orbital_Population
-! $Id: m_Orbital_Population.F90 416 2014-12-17 04:11:16Z jkoga $
+! $Id: m_Orbital_Population.F90 492 2016-05-31 03:06:04Z jkoga $
   use m_Const_Parameters,     only : DP,ON,ANEW,RENEW,SIMPLE,BROYD1,BROYD2,DFP,PULAY,OFF,UNIT_MATRIX
   use m_Files,                only : nfout, nfoccmat, m_Files_open_nfoccmat
   use m_IterationNumbers,     only : iteration_for_cmix
@@ -37,7 +37,9 @@ module m_Orbital_Population
      &                             , proj_attribute &
      &                             , proj_group, num_proj_elems, max_projs &
      &                             , iprihubbard &
-     &                             , hownew,nbxmix,istrbr,ipripulay, sw_force_simple_mixing_hub
+     &                             , hownew,nbxmix,istrbr,ipripulay, sw_force_simple_mixing_hub &
+     &                             , sw_metric_diff, alpha_pulay, sw_recomposing &
+     &                             , sw_mix_bothspins_sametime, sw_force_simple_mixing
   use m_Ionic_System,         only : natm,ntyp,ityp,iproj_group,zeta1
   use m_PseudoPotential,      only : prodphi,ilmt,ltp,mtp,taup,nlmt,ntau,nlmtt
   use m_Crystal_Structure,    only : op,nopr
@@ -140,6 +142,11 @@ module m_Orbital_Population
   integer, private :: sw_mix_imaginary_component = ON
 ! ===================================================================== 11.0
 
+  real(DP),private,allocatable,target,dimension(:,:,:,:) :: urec_l
+  real(DP),private,allocatable,dimension(:,:) :: d0_l,u_l,v_l
+  real(kind=DP), allocatable, dimension(:,:) :: ynorm
+  real(DP),private,allocatable,dimension(:) :: f_p !d(ista_kgpm:iend_kgpm)
+
   include 'mpif.h'
 
 contains
@@ -186,6 +193,56 @@ contains
     !!$write(nfout,*) 'nyymax=',nyymax
   end subroutine m_OP_set_i2lp_max2lp
 
+  subroutine m_OP_store_om()
+    omold = om
+  end subroutine m_OP_store_om
+
+  subroutine m_OP_om_diff()
+    integer :: is,ia,ja,it,ilmt1,ilmt2
+    integer :: l1,l2,m1,m2,t1,t2,m1r,m2r
+    integer :: iyy,iopr,mm1,mm2
+    integer :: ig,ip,i
+    
+    integer :: ilp
+    real(kind=DP) :: diff,sumdiff
+    integer :: icount
+    sumdiff = 0.d0
+    icount=0
+    do is=1,nspin,af+1
+
+       do ia=1,natm
+          ig = iproj_group(ia)
+          if(ig<1) cycle
+
+          do i=1,num_proj_elems(ig)
+             ip=proj_group(i,ig)
+             it = proj_attribute(ip)%ityp
+             ilp = proj_attribute(ip)%l+1
+
+             do ilmt1 = 1, ilmt(it)
+                l1 = ltp(ilmt1,it); m1 = mtp(ilmt1,it)
+                t1 = taup(ilmt1,it)
+                if ( l1 /= ilp ) cycle
+
+                do ilmt2 = 1, ilmt(it)
+                   l2 = ltp(ilmt2,it); m2 = mtp(ilmt2,it)
+                   t2 = taup(ilmt2,it)
+                   if( l2 /= ilp ) cycle
+
+                   diff = abs(omold(m1,m2,i,ia,is)-om(m1,m2,i,ia,is))
+                   sumdiff = sumdiff+diff
+                   icount = icount+1
+                end do
+             end do
+! =============================================================
+          end do
+          !!if(printable) write(nfout,'(a,i5,a,f20.12)') 'omdiff for atom ',ia,' : ',sumdiff/dble(icount)
+          sumdiff = 0.d0
+          icount = 0
+       end do
+    end do
+  end subroutine m_OP_om_diff
+
   subroutine m_OP_alloc
     allocate(i2lp(num_projectors))
     call m_OP_set_i2lp_max2lp
@@ -2318,36 +2375,430 @@ contains
     else
        nspin_m  = nspin/(af+1)
     endif
-! ====================================================================== 11.0
-    allocate(din(nsize_rho,nspin_m))
-    allocate(dout(nsize_rho,nspin_m))
-    allocate(urec(nsize_rho,nspin_m,nbxmix,2))
+! ================================================================= 11.0
+
+! =========================================== Modified by K. Tagami =========
+!    allocate(f_p(ista_kgpm:iend_kgpm)); call precon_4_mult(f_p) !-(m_CD)
+    allocate(f_p(1:nsize_rho)); f_p = 0
+! ============================================================================
+
+    allocate(din(1:nsize_rho,nspin_m))
+    allocate(dout(1:nsize_rho,nspin_m))
+    allocate(urec_l(1:nsize_rho,nspin_m,nbxmix,2))
     allocate(uuf_p(nbxmix,nspin_m))
     allocate(f(nbxmix,nbxmix,nspin_m))
     allocate(g_p(nbxmix,nspin_m))
     allocate(ncrspd(nbxmix))
+
+    allocate(ynorm(nbxmix,nspin_m));ynorm=1.d0
+! ======================================= Added by K. Tagami ===========
+    din = 0.0d0; dout = 0.0d0; urec_l = 0.0d0; uuf_p = 0.0d0; f = 0.0d0
+    g_p = 0.0d0;  ncrspd = 0
+! ======================================================================
   end subroutine mix_pulay_allocate
 
   subroutine mix_pulay_deallocate
+    if(allocated(f_p)) deallocate(f_p)
     if(allocated(din)) deallocate(din)
     if(allocated(dout)) deallocate(dout)
-    if(allocated(urec)) deallocate(urec)
+    if(allocated(urec_l)) deallocate(urec_l)
     if(allocated(uuf_p)) deallocate(uuf_p)
     if(allocated(f)) deallocate(f)
     if(allocated(g_p)) deallocate(g_p)
     if(allocated(ncrspd)) deallocate(ncrspd)
+    if (allocated(ynorm)) deallocate(ynorm)
   end subroutine mix_pulay_deallocate
 
   subroutine mix_pulay_alloc2
-    allocate(d0(nsize_rho,nspin_m))
+    allocate(d0_l(nsize_rho,nspin_m))
+    d0_l = 0.0d0
   end subroutine mix_pulay_alloc2
 
   subroutine mix_pulay_dealloc2
-    deallocate(d0)
+    deallocate(d0_l)
   end subroutine mix_pulay_dealloc2
 
   subroutine m_OP_mix_pulay(rmx)
-    real(kind=DP), intent(in) :: rmx
+    integer, parameter  :: iRho = 1, iResid = 2
+    real(DP),intent(in) :: rmx
+    integer   :: iter, mxiter
+    real(DP),pointer,dimension(:)  :: e_wk, f_wk, ww1, finv
+    integer, pointer,dimension(:)  :: ip
+! --> T. Yamasaki  03 Aug. 2009
+    real(kind=DP), allocatable, dimension(:):: rmxtrc ! d(nspin_m)
+!   real(kind=DP), allocatable, dimension(:,:,:) :: chgqstore_l, chgqostore_l
+! <--
+    real(kind=DP) :: rmxtt
+    integer   :: id_sname = -1
+    call tstatc0_begin('m_OP_mix_pulay ',id_sname,1)
+
+    if(previous_waymix /= PULAY) then
+       if(first) then
+          call create_map_func(.true.)
+          call alloc_rho
+          call create_map_func(.false.)
+          first = .false.
+       end if
+       call mix_dealloc_previous()
+       call mix_pulay_allocate()
+    end if
+
+    if ( noncol ) then
+       call map_om_to_rho_noncl(om,   om_aimag,   rho)
+       call map_om_to_rho_noncl(omold,omold_aimag,rhoo)
+    else
+       call map_om_to_rho(om,rho)
+       call map_om_to_rho(omold,rhoo)
+    endif
+
+    iter = iter_from_reset()                 !-(m_OP)
+
+    nspin_m  = ndim_magmom/(af+1)
+    allocate( rmxtrck(nspin_m) )
+    if ( noncol ) then
+       rmxtrck = rmx
+    else
+       if ( sw_recomposing_occmat == YES .and. af == 0 .and. nspin == 2 ) then
+          call alloc_rhostore_recomp( rmx, rmxtrck )
+       else
+          rmxtrck = rmx
+       endif
+    end if
+! ========================================================================= 11.0
+
+    if((iter-istrbr+1) <= 1) then
+! ===================================== modified by K. Tagami ======== 5.0
+!!       call simple_mix(rmx)                  !-(m_OP)
+       call simple_mix_kt( rmxtrck )                  !-(m_OP)
+! ==================================================================== 5.0
+    else
+       call mix_pulay_alloc2   !-(m_CD) d0_l,u_l, and w_l are allocated
+       call set_ncrspd_mxiter(nbxmix,iter-istrbr,mxiter) ! -> ncrspd, mxiter
+!!$       call mix_pulay_alloc3(nbxmix,iter-istrbr)   !-(c.h.) e_wk,f_wk,ww1,finv,ip
+       call mix_pulay_alloc3(mxiter)   !-(c.h.) e_wk,f_wk,ww1,finv,ip
+
+       call Resid_and_dd_into_urec(mxiter) !-(c.h.)
+       !                               dF ->urec_l; dd ->urec_l; d0_l,din,dout
+       call Ri_dot_Rj(mxiter)          !-(c.h.) <R(i)|R(j)>->f
+       call get_finv_lapack(nbxmix,mxiter,f)  !-(c.h.) f -> f^{-1}= <R(i)|R(j)>^{-1}
+
+       call Rj_dot_d(mxiter)           !-(c.h.) <R(j)|d>,(j=1,iter-istrb) -> uuf_p
+
+       call get_gmatrix(mxiter)        !-(c.h.) (f,uuf_p)->g
+       call renew_d_using_g(mxiter,rmxtrck)     !-(c.h.)
+
+       call mix_pulay_dealloc3                    !-(c.h.)
+       call mix_pulay_dealloc2                    !-(m_CD)
+    endif
+
+    deallocate(rmxtrck)
+
+    if ( noncol ) then
+       call map_rho_to_om_noncl( ommix,ommix_aimag,rho )
+    else
+       call map_rho_to_om( ommix,rho )
+    endif
+
+    previous_waymix = PULAY
+    call tstatc0_end(id_sname)
+  contains
+    subroutine mix_pulay_alloc3(m)
+      integer, intent(in) :: m
+      allocate(e_wk(m*m)); allocate(f_wk(m*m)); allocate(ww1(m)); allocate(finv(m*m))
+      allocate(ip(m))
+! ===================================== Added by K. Tagami ============
+      e_wk = 0; f_wk = 0; ww1 = 0; finv = 0; ip = 0
+! =====================================================================
+    end subroutine mix_pulay_alloc3
+
+    subroutine set_ncrspd_mxiter(n,iter,m)
+      integer, intent(in)  :: n, iter
+      integer, intent(out) :: m
+      integer :: i, nx
+      if(hownew == ANEW) then
+         m = iter
+!!$         ncrspd(:) = (/(i,i=1,m)/)
+         do i=1,iter
+            ncrspd(i) = i
+         end do
+      else ! hownew == RENEW
+         if(iter <= n) then
+            m = iter
+!!$            ncrspd(:) = (/(i,i=1,m)/)
+            do i=1,iter
+               ncrspd(i) = i
+            end do
+         else
+            m = n
+            nx = ncrspd(1)
+            do i = 1, m-1
+               ncrspd(i) = ncrspd(i+1)
+            end do
+            ncrspd(m) = nx
+         end if
+      end if
+    end subroutine set_ncrspd_mxiter
+
+    subroutine mix_pulay_dealloc3
+      deallocate(e_wk); deallocate(f_wk); deallocate(ww1); deallocate(finv)
+      deallocate(ip)
+    end subroutine mix_pulay_dealloc3
+
+    subroutine Resid_and_dd_into_urec(iter)
+      integer, intent(in) :: iter
+      integer             :: itc,itc0,itc1
+      integer :: i,j,k,imix
+      real(kind=DP) :: sum1,sum2
+      itc = ncrspd(iter)
+      urec_l(:,:,itc,iResid) = rho(:,:) - rhoo(:,:) - (dout(:,:) - din(:,:)) ! =dF(=delta F^i)
+      urec_l(:,:,itc,iRho  ) = rhoo(:,:) - din(:,:)                ! =dd
+      d0_l(:,:) = rho(:,:) - rhoo(:,:)
+      din(:,:)  = rhoo(:,:)
+      dout(:,:) = rho(:,:)
+      ynorm(itc,:)=0.d0 
+      do i=1,nspin_m
+         do k=1,nsize_rho
+            ynorm(itc,i) = ynorm(itc,i)+urec_l(k,i,itc,iResid)*urec_l(k,i,itc,iResid)
+         enddo
+      enddo
+      ynorm(itc,:) = 1.d0/sqrt(ynorm(itc,:))
+    end subroutine Resid_and_dd_into_urec
+
+    subroutine Ri_dot_Rj(n)
+      integer, intent(in) :: n
+      integer  :: it,jt,itc,jtc
+      real(DP) :: ff1(nspin_m),ff1tmp
+
+      do it = 1, n
+         itc = ncrspd(it)
+         do jt = it, n
+            jtc = ncrspd(jt)
+            if(sw_mix_bothspins_sametime==ON .and. nspin_m>1)then
+               call mult1s10_reduce_spin(urec_l,nbxmix,2,itc,iResid,urec_l,jtc,iResid,f_p,ff1tmp)   ! <delta F^i|delta F^j>
+               ff1(1)=ff1tmp;ff1(2)=ff1tmp
+            else
+               call mult1s10(urec_l,nbxmix,2,itc,iResid,urec_l,jtc,iResid,f_p,ff1)   ! <delta F^i|delta F^j>
+            endif
+
+            if ( noncol ) then
+               call mult1s10_reduce_spin( urec_l, nbxmix, 2, itc, iResid, &
+                    &                     urec_l, jtc, iResid, f_p, ff1tmp )
+                                                        ! <delta F^i|delta F^j>
+               ff1(:) = ff1tmp
+            endif
+            f(it,jt,1:nspin_m) = ff1(1:nspin_m)
+            if(jt /= it) f(jt,it,1:nspin_m) = f(it,jt,1:nspin_m)
+         end do
+      end do
+    end subroutine Ri_dot_Rj
+
+    subroutine Rj_dot_d(n)
+      integer, intent(in) :: n
+      integer  :: jt, jtc
+      real(DP) :: ff1(nspin_m),ff1tmp
+
+      do jt = 1, n
+         jtc = ncrspd(jt)
+         if(sw_mix_bothspins_sametime==ON .and. nspin_m>1)then
+            call mult1s5_reduce_spin(urec_l,nbxmix,2,jtc,iResid,d0_l,f_p,ff1tmp)
+            ff1(1) = ff1tmp;ff1(2)=ff1tmp
+         else
+            call mult1s5(urec_l,nbxmix,2,jtc,iResid,d0_l,f_p,ff1)
+         endif
+
+         if ( noncol ) then
+            call mult1s5_reduce_spin(urec_l,nbxmix,2,jtc,iResid,d0_l,f_p,ff1tmp)
+            ff1(:) = ff1tmp
+         endif
+
+         uuf_p(jt,1:nspin_m) = ff1(1:nspin_m)
+      end do
+    end subroutine Rj_dot_d
+
+    subroutine get_finv_lapack(m,n,f)
+      integer,intent(in)                             :: m,n
+      real(DP),intent(inout),dimension(m,m,nspin_m) :: f
+      real(DP), allocatable,dimension(:,:) :: fwork
+      integer :: is,inf,it,jt,kt,nnspin
+      real(DP) :: div,tmp
+      allocate(fwork(n,n))
+      nnspin = nspin
+      if(sw_mix_bothspins_sametime==ON .or. af==1) nnspin=1
+
+      if ( noncol ) then
+         nnspin = 1
+      end if
+
+      do is=1,nnspin
+         if(ipripulay >= 2) then
+            write(nfout,600) n,(('(',it,jt,')',f(it,jt,is),jt=1,n),it=1,n)
+600         format(//11x,"**input matrix**"/12x &
+                 & ,"horder=",I5/(2x,4(1x,1a,i2,",",i2,1a,e14.6)))
+         end if
+         fwork=0
+         do it=1,n
+            do jt=1,n
+               fwork(jt,it) = f(jt,it,is)*ynorm(jt,is)*ynorm(it,is)
+               if(it==jt) fwork(jt,it)=fwork(jt,it)+alpha_pulay
+            enddo
+         enddo
+         call dpotrf('U',n,fwork,n,inf)
+         call dpotri('U',n,fwork,n,inf)
+         do it=1,n-1
+            do jt=it+1,n
+               fwork(jt,it) = fwork(it,jt)
+            enddo
+         enddo
+         do it=1,n
+            do jt=1,n
+               f(jt,it,is) = fwork(jt,it)*ynorm(jt,is)*ynorm(it,is)
+            enddo
+         enddo
+         if(ipripulay >= 2) then
+            write(nfout,630) (('(',it,jt,')',f(it,jt,is),it=1,n),jt=1,n)
+630         format(/11x, "**inverse matrix**" &
+                 & ,/(2x,4(1x,1a,i2,",",i2,1a,e14.6)))
+         end if
+      enddo
+      if(sw_mix_bothspins_sametime==ON .and. nspin_m>1)then
+         do it=1,n
+            do jt=1,n
+               f(jt,it,2) = f(jt,it,1)
+            enddo
+         enddo
+      endif
+! ============================== added by K. Tagami ========== 11.0
+      if ( noncol ) then
+         do it=1,n
+            do jt=1,n
+               f(jt,it,:) = f(jt,it,1)
+            enddo
+         end do
+      endif
+! ============================================================ 11.0
+      deallocate(fwork)
+
+    end subroutine get_finv_lapack
+
+    subroutine get_finv(m,n,f)
+      integer,intent(in)                             :: m,n
+      real(DP),intent(inout),dimension(m,m,nspin_m) :: f
+
+      integer                        :: icount,is,jt,it,icon
+      real(DP)                       :: div
+
+      e_wk = 0.d0
+      do it = 1, n
+         e_wk(it*it) = 1.d0
+      end do
+
+! ======================================= modified by K. Tagami =========== 11.0
+!      do is = 1, nspin, af+1
+      do is = 1, ndim_magmom, af+1
+! ========================================================================== 11.0
+         div = 1.d0/f(1,1,is)
+         icount = 1
+         do jt = 1, n
+            do it = 1, n
+               f_wk(icount) = f(it,jt,is)*div
+               icount = icount + 1
+            end do
+         end do
+         if(ipripulay >= 1) then
+            write(nfout,600) n,(('(',it,jt,')',f(it,jt,is)*div,jt=1,n),it=1,n)
+600         format(//11x,"**input matrix**"/12x &
+                 & ,"horder=",I5/(2x,4(1x,1a,i2,",",i2,1a,e14.6)))
+         end if
+         call rdecomp(n,f_wk,ww1,ip,icon)
+         if(icon /= 0) then
+            stop 'LU decomposition is impossible.'
+         else
+            call rsolve(n,n,f_wk,e_wk,finv,ip)
+         endif
+
+         icount = 1
+         do jt = 1, n
+            do it = 1, n
+               f(it,jt,is) = finv(icount)
+               icount = icount + 1
+            end do
+         end do
+         if(ipripulay >= 1) then
+            write(nfout,630) (('(',it,jt,')',f(it,jt,is),it=1,n),jt=1,n)
+630         format(/11x, "**inverse matrix**" &
+                 & ,/(2x,4(1x,1a,i2,",",i2,1a,e14.6)))
+         end if
+      end do
+    end subroutine get_finv
+
+    subroutine get_gmatrix(n)
+      integer,intent(in) :: n
+      integer :: is, it, jt, nnspin
+      nnspin = nspin
+      if(sw_mix_bothspins_sametime==ON .or. af==1) nnspin=1
+
+! ============================ added by K. Tagami ============= 11.0
+      if ( noncol ) nnspin = 1
+! ============================================================== 11.0      
+
+      g_p = 0.d0
+      do is = 1, nnspin
+         do it = 1, n
+            do jt = 1, n
+               g_p(it,is) = g_p(it,is) - f(jt,it,is)*uuf_p(jt,is)
+            end do
+         end do
+         if(ipripulay >= 2) then
+            write(nfout,'(" -- g_p(1:",i3,") --")') n
+            write(nfout,'(8f20.12)') (g_p(it,is),it=1,n)
+         end if
+      end do
+      if(sw_mix_bothspins_sametime==ON .and. nspin_m>1)then
+         do it = 1,n
+            g_p(it,2) = g_p(it,1)
+         enddo
+      endif
+! ============================== added by K. Tagami ============ 11.0
+      if ( noncol ) then
+         do it = 1,n
+            g_p(it,:) = g_p(it,1)
+         enddo
+      endif
+! ============================================================== 11.0
+
+    end subroutine get_gmatrix
+
+    subroutine renew_d_using_g(n,p)
+      integer, intent(in)                                :: n
+      real(DP),intent(in),dimension(nspin_m) :: p
+      integer    :: is, k, i, it, itc, ns
+
+!!$      do is = 1, nspin, af+1
+      ns = nspin_for_qnewton()
+      do is = 1, ns,af+1
+         do i = 1,nsize_rho
+            rho(i,is)  = rhoo(i,is) + p(is)*d0_l(i,is)
+         end do
+         do it = 1, n
+            itc = ncrspd(it)
+            do i = 1,nsize_rho
+               rho(i,is) = rho(i,is) + g_p(it,is)* &
+                    &        (urec_l(i,is,itc,iRho) + p(is)*urec_l(i,is,itc,iResid))
+            end do
+         end do
+      end do
+
+    end subroutine renew_d_using_g
+
+    integer function nspin_for_qnewton()
+      if ( noncol ) then
+         nspin_for_qnewton=ndim_magmom
+      else
+         nspin_for_qnewton=nspin
+         if (sw_force_simple_mixing==ON .and. sw_recomposing==ON) nspin_for_qnewton=1
+      endif
+    end function nspin_for_qnewton
+
   end subroutine m_OP_mix_pulay
 
   subroutine create_map_func(paramset)
@@ -2597,6 +3048,13 @@ contains
 ! =========================================================== 11.0
   end subroutine m_OP_cp_ommix_to_omold
 
+  subroutine m_OP_cp_ommix_to_om
+    om = ommix
+! ========================= added by K. Tagami ============== 11.0
+    if ( noncol ) om_aimag = ommix_aimag
+! =========================================================== 11.0
+  end subroutine m_OP_cp_ommix_to_om
+
   subroutine m_OP_simple_mixing(nfout,rmxt)
     integer, intent(in) :: nfout
     real(kind=DP), intent(in) :: rmxt
@@ -2691,6 +3149,119 @@ contains
 
   end subroutine alloc_rhostore_recomp
 
+  subroutine mult1s5(u,mb,muv,j,iuv,v,f_q,fmult)
+    integer,intent(in) :: mb,muv,j,iuv
+    real(DP),intent(in), dimension(1:nsize_rho,nspin_m,mb,muv) :: u
+    real(DP),intent(in), dimension(1:nsize_rho,nspin_m) :: v
+    real(DP),intent(in), dimension(1:nsize_rho) :: f_q
+    real(DP),intent(out),dimension(nspin_m)            :: fmult
+
+    real(DP) :: p,  fac
+    integer  :: is,i
+
+    fmult = 0.d0
+    do is = 1, ndim_magmom, af+1
+       p = 0.d0
+       fac=1.0d0
+       do i = 1,nsize_rho
+          if ( noncol ) then
+             fac=f_q(i)
+          else
+             if (is==1 .or. sw_recomposing==OFF .or. sw_metric_diff==ON) then
+                fac=f_q(i)
+             endif
+          end if
+          p = p + fac*u(i,is,j,iuv)*v(i,is)
+       end do
+       fmult(is) = p
+    enddo
+  end subroutine mult1s5
+
+  subroutine mult1s5_reduce_spin(u,mb,muv,j,iuv,v,f_q,fmult)
+    integer,intent(in) :: mb,muv,j,iuv
+    real(DP),intent(in), dimension(1:nsize_rho,nspin_m,mb,muv) :: u
+    real(DP),intent(in), dimension(1:nsize_rho,nspin_m) :: v
+    real(DP),intent(in), dimension(1:nsize_rho):: f_q
+    real(DP),intent(out)            :: fmult
+
+    real(DP) :: p,  fac
+    integer  :: is,i
+
+    fmult = 0.d0
+    p = 0.d0
+
+    do is = 1, ndim_magmom, af+1
+       fac = 1.0d0
+       do i = 1,nsize_rho
+          if ( noncol ) then
+             fac=f_q(i)
+          else
+             if (is==1 .or. sw_recomposing==OFF .or. sw_metric_diff==ON) then
+                fac=f_q(i)
+             endif
+          end if
+          p = p + fac*u(i,is,j,iuv)*v(i,is)
+       end do
+    enddo
+    fmult = p
+  end subroutine mult1s5_reduce_spin
+
+  subroutine mult1s10(u,mb,muv,i,iu,v,j,iv,f_q,fmult)
+    integer,intent(in) :: mb,muv,i,iu,j,iv
+    real(DP),intent(in), dimension(1:nsize_rho,nspin_m,mb,muv) :: u,v
+    real(DP),intent(in), dimension(1:nsize_rho):: f_q
+    real(DP),intent(out),dimension(nspin_m)            :: fmult
+
+    real(DP) :: p,  fac
+    integer  :: is,ig
+    fmult = 0.d0
+
+    do is = 1, ndim_magmom, af+1
+       p = 0.d0
+       fac = 1.0d0
+       do ig = 1,nsize_rho
+          if ( noncol ) then
+             fac=f_q(ig)
+          else
+             if (is==1 .or. sw_recomposing==OFF .or. sw_metric_diff==ON) then
+                fac=f_q(ig)
+             endif
+          end if
+          p = p + fac*u(ig,is,i,iu)*v(ig,is,j,iv)
+       end do
+       fmult(is) = p
+    enddo
+  end subroutine mult1s10
+
+  subroutine mult1s10_reduce_spin(u,mb,muv,i,iu,v,j,iv,f_q,fmult)
+    integer,intent(in) :: mb,muv,i,iu,j,iv
+    real(DP),intent(in), dimension(1:nsize_rho,nspin_m,mb,muv) :: u,v
+    real(DP),intent(in), dimension(1:nsize_rho):: f_q
+    real(DP),intent(out)            :: fmult
+
+    real(DP) :: p,  fac
+    integer  :: is,ig
+
+    fmult = 0.d0
+    p = 0.d0
+
+    do is = 1, ndim_magmom, af+1
+       fac = 1.0d0
+       do ig = 1,nsize_rho
+
+          if ( noncol ) then
+             fac=f_q(ig)
+          else
+             if (is==1 .or. sw_recomposing==OFF .or. sw_metric_diff==ON) then
+                fac=f_q(ig)
+             endif
+          end if
+          p = p + fac*u(ig,is,i,iu)*v(ig,is,j,iv)
+       end do
+    enddo
+    fmult = p
+  end subroutine mult1s10_reduce_spin
+
 ! ================================================================= 5.0
 
 end module m_Orbital_Population
diff -uprN phase0_2015.01/src_phase_3d/m_PAW_ChargeDensity.F90 phase0_2015.01.01/src_phase_3d/m_PAW_ChargeDensity.F90
--- phase0_2015.01/src_phase_3d/m_PAW_ChargeDensity.F90	2015-08-05 14:58:03.000000000 +0900
+++ phase0_2015.01.01/src_phase_3d/m_PAW_ChargeDensity.F90	2016-07-12 12:51:53.000000000 +0900
@@ -1,6 +1,6 @@
 !=======================================================================
 !
-!  PROGRAM  PHASE/0 2015.01 ($Rev: 440 $)
+!  PROGRAM  PHASE/0 2015.01 ($Rev: 488 $)
 !
 !  MODULE: m_PAW_ChargeDensity
 !
@@ -33,10 +33,10 @@
 !
 module m_PAW_ChargeDensity
   use m_db,                   only : getIntDB,getStringDB_TB,getIntDB_TB
-  use m_Const_Parameters,     only : DP,PAI2,PAI4,BUCS,SphericalHarmonicsExpansion, GaussLegendre, LOWER
+  use m_Const_Parameters,     only : DP,PAI2,PAI4,BUCS,SphericalHarmonicsExpansion, GaussLegendre, LOWER, Bohr
   use m_Control_Parameters,   only : kimg,nspin,af,ipripaw,ipriinputfile,printable
   use m_Files,                only : nfout
-  use m_Ionic_System,         only : ityp,natm,ntyp,pos,iwei, speciesname
+  use m_Ionic_System,         only : ityp,natm,ntyp,pos,iwei, speciesname, amion
   use m_Charge_Density,       only : hsr,chgq_l,chgsoft
   use m_PlaneWaveBasisSet,    only : igfp_l
   use m_PseudoPotential,      only : psirpw,phirpw,qrspspw &
@@ -45,7 +45,7 @@ module m_PAW_ChargeDensity
        &                            ,il2p,isph,dl2p,iqitg &
        &                            ,m_PP_find_maximum_l &
        &                            ,ipaw,wf_mnrc,flg_symmtry &
-       &                            ,mmesh
+       &                            ,mmesh, nmesh
   use m_FFT,                  only : fft_box_size_CD,fft_box_size_CD_c, nfftp &
        , m_FFT_CD_inverse0 &
        , m_FFT_check_of_negative_CD &
@@ -167,6 +167,7 @@ module m_PAW_ChargeDensity
   public:: calcGaussLegendreIntegration
   public:: calcSphericalHarmonicsExpansion
   
+
 contains
 
   subroutine m_PAW_dealloc()
@@ -2313,7 +2314,7 @@ contains
   subroutine set_ia2ia_symmtry_op
     integer:: ia,it,no,ja,jt
     integer:: i,j,k
-    real(DP):: pos0(3),pos1(3),pos2(3)
+    real(DP):: pos0(3),pos1(3),pos2(3),pos3(3)
     real(DP):: distance
     
     allocate(op_pr(3,3,nopr+af))
@@ -2329,14 +2330,15 @@ contains
           pos1(:)=matmul(op_pr(:,:,no),pos0(:))+tau(:,no,BUCS)
 !print *,'pos1=',pos1
           pos1(:) = pos1(:) - floor(pos1(:))
-          KLoop: do k=-2,1
-             do j=-2,1
-                do i=-2,1
+          KLoop: do k=-1,1
+             do j=-1,1
+                do i=-1,1
                    do ja=1,natm
                       jt=ityp(ja)
                       if(it/=jt) cycle
-                      
-                      pos2(1:3)=pos(ja,1:3)+(/dble(i),dble(j),dble(k)/)
+                      pos3(1:3) = pos(ja,1:3) 
+                      pos3(:) = pos3(:) - floor(pos3(:))
+                      pos2(1:3)=pos3(1:3)+(/dble(i),dble(j),dble(k)/)
 !print *,'pos2=',pos2                             
                       distance=abs(pos1(1)-pos2(1))+abs(pos1(2)-pos2(2)) &
                            +abs(pos1(3)-pos2(3))
@@ -2355,7 +2357,7 @@ contains
                       end if
                         
                       if(kimg==1 .and. iwei(ja)==2) then
-                         pos2(1:3)=-pos(ja,1:3)+(/dble(i),dble(j),dble(k)/)
+                         pos2(1:3)=-pos3(1:3)+(/dble(i),dble(j),dble(k)/)
                          distance=abs(pos1(1)-pos2(1))+abs(pos1(2)-pos2(2)) &
                               +abs(pos1(3)-pos2(3))
                          if(distance < 1.d-5) then
@@ -3850,4 +3852,5 @@ contains
 ! ==============================================================================
 #endif
   
+
 end module m_PAW_ChargeDensity
diff -uprN phase0_2015.01/src_phase_3d/m_PAW_XC_Potential.F90 phase0_2015.01.01/src_phase_3d/m_PAW_XC_Potential.F90
--- phase0_2015.01/src_phase_3d/m_PAW_XC_Potential.F90	2015-08-05 14:58:03.000000000 +0900
+++ phase0_2015.01.01/src_phase_3d/m_PAW_XC_Potential.F90	2016-07-12 12:51:53.000000000 +0900
@@ -7,7 +7,7 @@
 #endif
 !=======================================================================
 !
-!  SOFTWARE NAME : PHASE ($Revision: 416 $)
+!  SOFTWARE NAME : PHASE ($Revision: 494 $)
 !
 !  MODULE: m_PAW_XC_Potential
 !
@@ -637,7 +637,7 @@ contains
         dnps_dph = 0.d0
         ddnps_ddr = 0.d0
                
-        if(xctype == 'ldapw91' .or. xctype == 'ldapbe ') then
+        if(xctype == 'ldapw91' .or. xctype == 'ldapbe '.or. xctype == 'vdwdf' ) then
             grad_nae=0.d0;grad_tnae=0.d0
             grad_nps=0.d0;grad_tnps=0.d0
         else
@@ -668,7 +668,7 @@ contains
                                 ,grad_tnps(1:nrc),wos(1:nrc),texc &
                                 ,dF_dnps(1:nrc,1:nspin))
          exc_ps=exc_ps+texc*omg_wght(ith)
-      else if(xctype == 'ggapbe ' .or. xctype == 'ldapbe ') then
+      else if(xctype == 'ggapbe ' .or. xctype == 'ldapbe '.or. xctype == 'vdwdf') then
          call ex_ggapbe (nspin,nspin,1,nrc,nae(1:nrc,1:nspin) &
                                     ,grad_nae(1:nrc,1:nspin) &
                                     ,wos(1:nrc),texc &
@@ -2105,7 +2105,7 @@ START_TIMER('m_PAW_XC_cal_potential_sphe
 ! ==============================================================================
         if(af /= 0) then
            do ia = 1, natm
-              if(flg_done(ia) .eq. .true.) cycle
+              if(flg_done(ia)) cycle
               ja = ia2ia_symmtry_op(ia,nopr+af)
               if(ja > ia) flg_done(ja) = .true.
            enddo
@@ -2398,7 +2398,8 @@ START_TIMER('m_PAW_XC_cal_potential_sphe
                         dFc_dab_ps(ir)*nanb_ps_sph(ir,1)   
                 end if
 
-                if(check_of_xctype()==GGA .and. xctype /= 'ldapw91' .and. xctype /= 'ldapbe ') then 
+                if(check_of_xctype()==GGA .and. xctype /= 'ldapw91' .and. xctype /= 'ldapbe ' &
+                & .and. xctype /= 'vdwdf') then 
                     if(dabs(grad_nae(ir,1)) < DELTA10) cycle
                     if(dabs(grad_tnae(ir)) < DELTA10) cycle
                     if(dabs(grad_nps(ir,1)) < DELTA10) cycle
@@ -2446,7 +2447,8 @@ START_TIMER('m_PAW_XC_cal_potential_sphe
             if(vflag == VXC_AND_EXC) then
             
                 if(check_of_xctype()==GGA   .and. xctype /= 'ldapw91' &
-                                            .and. xctype /= 'ldapbe ') then
+                                            .and. xctype /= 'ldapbe ' &
+                                            .and. xctype /= 'vdwdf' ) then
 ! === For nrc decomposion. by takto 2012/12/05 =================================
 #ifndef PAW3D
 ! ==============================================================================
@@ -2579,7 +2581,8 @@ START_TIMER('m_PAW_XC_cal_potential_sphe
 !                dFxcdnb_ae_sph=0.d0
 !                dFxcdnb_ps_sph=0.d0
                 if(check_of_xctype()==GGA   .and. xctype /= 'ldapw91' &
-                                            .and. xctype /= 'ldapbe ') then
+                                            .and. xctype /= 'ldapbe ' &
+                                            .and. xctype /= 'vdwdf' ) then
                     num_isph_2tm=num_isph_g_g
                     isph_2tm=isph_g_g
                 else
@@ -2620,7 +2623,8 @@ START_TIMER('m_PAW_XC_cal_potential_sphe
                         end if
                         
                         if(check_of_xctype()==GGA   .and. xctype /= 'ldapw91' &
-                                                    .and. xctype /= 'ldapbe ') then 
+                                                    .and. xctype /= 'ldapbe ' &
+                                                    .and. xctype /= 'vdwdf' ) then 
                             sum1 = sum1 + &
                                 0.5d0*dFadgaga_ae(ir)*gaga_ae_sph(ir,nsph) + &
                                 0.5d0*dFadgg_ae(ir)*gg_ae_sph(ir,nsph) + &
@@ -2731,7 +2735,8 @@ START_TIMER('m_PAW_XC_cal_potential_sphe
                         end if
                         
                         if(check_of_xctype()==GGA   .and. xctype /= 'ldapw91' &
-                                                    .and. xctype /= 'ldapbe ') then
+                                                    .and. xctype /= 'ldapbe ' &
+                                                    .and. xctype /= 'vdwdf' ) then
                             sum1 = sum1 + &
                                 0.5d0*dGadgaga_ae(ir)*gaga_ae_sph(ir,nsph) + &
                                 dGadnaga_ae(ir)*naga_ae_sph(ir,nsph) 
@@ -3852,7 +3857,7 @@ contains
     !        dnps_dph = 0.d0
     !        ddnps_ddr = 0.d0
                    
-            if(xctype == 'ldapw91' .or. xctype == 'ldapbe ') then
+            if(xctype == 'ldapw91' .or. xctype == 'ldapbe ' .or. xctype == 'vdwdf') then
                 grad_nae2_sph=0.d0;grad_tnae2_sph=0.d0
                 grad_nps2_sph=0.d0;grad_tnps2_sph=0.d0
                 grad_nae=0.d0;grad_tnae=0.d0
@@ -3936,7 +3941,7 @@ contains
                                     ,dFc_dagg_ps(1:nrc) &
                                     ,dFc_dbgg_ps(1:nrc) &
                                     ,dFc_dabg_ps(1:nrc))
-          else if(xctype == 'ggapbe ' .or. xctype == 'ldapbe ') then
+          else if(xctype == 'ggapbe ' .or. xctype == 'ldapbe '.or. xctype == 'vdwdf') then
               call ex_ggapbe_paw_drv2(nrc,dnr,nspin &
                                     ,nae_sph(1:nrc,1:nspin,1) &
                                     ,grad_nae(1:nrc,1:nspin) &
@@ -4173,7 +4178,7 @@ contains
             dF_dnps = 0.d0
             dF_dgradnps = 0.d0
 
-            if(xctype == 'ldapw91' .or. xctype == 'ldapbe ') then
+            if(xctype == 'ldapw91' .or. xctype == 'ldapbe '.or. xctype == 'vdwdf' ) then
                 grad_nae2_sph=0.d0;grad_tnae2_sph=0.d0
                 grad_nps2_sph=0.d0;grad_tnps2_sph=0.d0
                 grad_nae=0.d0;grad_tnae=0.d0
@@ -4261,7 +4266,7 @@ contains
                                     ,dFc_dbgg_ps           &
                                     ,dFc_dabg_ps           &
                                     ,ista_nrc, iend_nrc, ist, ien)
-          else if(xctype == 'ggapbe ' .or. xctype == 'ldapbe ') then
+          else if(xctype == 'ggapbe ' .or. xctype == 'ldapbe ' .or. xctype == 'vdwdf') then
               call ex_ggapbe_paw_drv2_3D(nrc,dnr,nspin     &
                                     ,nae_sph(ista_nrc,1,1) &
                                     ,grad_nae              &
diff -uprN phase0_2015.01/src_phase_3d/m_Parallelization.F90 phase0_2015.01.01/src_phase_3d/m_Parallelization.F90
--- phase0_2015.01/src_phase_3d/m_Parallelization.F90	2015-08-05 14:58:03.000000000 +0900
+++ phase0_2015.01.01/src_phase_3d/m_Parallelization.F90	2016-07-12 12:51:53.000000000 +0900
@@ -3,7 +3,7 @@
 #endif
 !=======================================================================
 !
-!  PROGRAM  PHASE/0 2015.01 ($Rev: 440 $)
+!  PROGRAM  PHASE/0 2015.01 ($Rev: 482 $)
 !
 !  MODULE: m_Parallelization
 !
@@ -65,7 +65,7 @@
 
 module m_Parallelization
 !   (m_Parallel)
-! $Id: m_Parallelization.F90 440 2015-08-03 07:18:01Z ktagami $
+! $Id: m_Parallelization.F90 482 2016-04-08 08:40:45Z jkoga $
   use m_Const_Parameters, only       : ON, OFF, tag_npes_etc
   use m_ErrorMessages
 ! === For nrc decomposion. by takto 2012/12/07 =================================
@@ -214,6 +214,8 @@ module m_Parallelization
 ! ==============================================================================
   integer                            :: ista_kngp, iend_kngp, np_kngp, mp_kngp
   integer, allocatable, dimension(:) :: is_kngp, ie_kngp, nel_kngp
+  integer                            :: ista_kngp_exx, iend_kngp_exx, np_kngp_exx, mp_kngp_exx
+  integer, allocatable, dimension(:) :: is_kngp_exx, ie_kngp_exx, nel_kngp_exx
 ! natm for fxyzew_l
   integer                            :: ista_atm, iend_atm, np_atm, mp_atm
   integer, allocatable, dimension(:) :: is_atm, ie_atm, nel_atm
@@ -225,6 +227,12 @@ module m_Parallelization
   integer                            :: ista_nn,iend_nn,np_nn,mp_nn
   integer, allocatable, dimension(:) :: is_nn, ie_nn, nel_nn
 
+  integer                            :: ista_nq, iend_nq, np_nq, mp_nq
+  integer, allocatable, dimension(:) :: is_nq, ie_nq, nel_nq, map_nq, map_z_nq
+
+!  integer                            :: ista_nval,iend_nval,np_nval,mp_nval
+!  integer, allocatable, dimension(:) :: is_nval, ie_nval, nel_nval
+
   integer                            :: ista_atm_B, iend_atm_B, np_atm_B, mp_atm_B
   integer, allocatable, dimension(:) :: is_atm_B, ie_atm_B, nel_atm_B
   integer, allocatable, dimension(:) :: mem_atm_B ! (1:natm)
@@ -296,6 +304,12 @@ module m_Parallelization
 ! ==============================================================================
 #endif
 
+  integer :: nrank_nval,myrank_nval
+  integer :: ista_nval,iend_nval,np_nval,mp_nval
+  integer, allocatable, dimension(:) :: nis_nval,nie_nval,nel_nval
+  integer, allocatable, dimension(:) :: map_nval,map_z_nval
+  logical,private :: mpi_nval_enabled = .false.
+
 ! for nfft
   integer                            :: ista_ffth, iend_ffth, np_ffth, mp_ffth
   integer, allocatable, dimension(:) :: is_ffth, ie_ffth, nel_ffth
@@ -1674,6 +1688,31 @@ contains
                                                   __TIMER_SUB_STOP(1238)
   end subroutine m_Parallel_init_mpi_rspace_aug
 
+  subroutine m_Parallel_init_mpi_nq(nfout,ipri,printable,nq)
+    integer, intent(in) :: nfout, ipri,nq
+    logical, intent(in) :: printable
+    integer :: i,j,ip
+
+    allocate(is_nq(0:npes-1));ista_nq=nq+1
+    allocate(ie_nq(0:npes-1));iend_nq=0
+    allocate(nel_nq(0:npes-1));nel_nq=0
+    allocate(map_nq(nq));map_nq=0
+    allocate(map_z_nq(nq));map_z_nq=0
+
+    call set_block_range(nq,npes,nel_nq,is_nq,ie_nq,.true.,map_nq)
+    ista_nq = is_nq(mype)
+    iend_nq = ie_nq(mype)
+    np_nq = nel_nq(mype)
+    mp_nq = maxval(nel_nq)
+    j = 0
+    do ip = 1, npes
+       do i = 1, nel_nq(ip-1)
+          j = j + 1
+          map_z_nq(j) = i
+       end do
+    end do
+  end subroutine m_Parallel_init_mpi_nq
+
   subroutine m_Parallel_init_mpi_atm(nfout,ipri,printable,natm)
     integer, intent(in) :: nfout,ipri,natm
     logical, intent(in) :: printable
@@ -2056,14 +2095,22 @@ contains
     else
        NB = nblocksize_mgs_default
     end if
+    if (.not.nblocksize_mgs_is_given.and. &
+    &   mod(int(real(neg)/real(nrank_e)),NB)/=0.and. &
+    &   mod(neg,nrank_e)==0) then
+       NB = int(real(neg)/real(nrank_e))
+       if(NB<1) NB=1
+       nblocksize_mgs = NB
+       nblocksize_mgs_is_given = .true.
+    endif
 ! === Change nblocksize_mgs if it's too large. by tkato 2014/===================
     if(nrank_e /= 1) then
-       max_block_size = int(real(neg)/real(nrank_e - 1))
+       max_block_size = int(real(neg)/real(nrank_e))
        if(NB > max_block_size) then
           if(mype == 0) then
              write(0,'(a)') '=== WARNING!!! =============================================='
              write(0,'(a)') 'Block size for block-cyclic division on band is too large!'
-             write(0,'(a,i5,a)') 'Block size should be not greater than ', max_block_size, '!'
+             write(0,'(a,i5,a)') 'Block size should not be greater than ', max_block_size, '!'
              write(0,'(a,i5,a)') 'So, nblocksize_mgs is changed into ', max_block_size, '!'
              write(0,'(a)') 'FYI: '
              write(0,'(a,i8)') '   neg: ', neg
@@ -2161,7 +2208,6 @@ contains
   ista_e = nis_e(myrank_e)
   iend_e = nie_e(myrank_e)
   mp_e = maxval(nel_e)
-
 ! wk_sta(:) = nis_e(:) - 1
   wk_sta(0:nrank_e-1) = nis_e(0:nrank_e-1) - 1
   call mpi_allgatherv(neg_g, nel_e(myrank_e), mpi_integer, &
@@ -3161,9 +3207,9 @@ contains
       nrank_e = ne
       nrank_k = nk
       nrank_g = ng
-      if(printable) write(6,'(" nrank_e = ",i3)') nrank_e
-      if(printable) write(6,'(" nrank_k = ",i3)') nrank_k
-      if(printable) write(6,'(" nrank_g = ",i3)') nrank_g
+      if(printable) write(6,'(" nrank_e = ",i6)') nrank_e
+      if(printable) write(6,'(" nrank_k = ",i6)') nrank_k
+      if(printable) write(6,'(" nrank_g = ",i6)') nrank_g
 !      if( mype == 0 .and. nk/=1 ) then
 !        write(6,*) '[ERROR] nk =', nk
 !        call mpi_abort(mpi_comm_world, 2, err)
@@ -3685,6 +3731,61 @@ contains
 
 !===============================================================================
 
+  subroutine m_Parallel_init_mpi_nval(nfout,ipri,printable,nval)
+    integer, intent(in) :: nfout, ipri,nval
+    logical, intent(in) :: printable
+    integer :: i,j,ip
+    nrank_nval = nrank_g
+    myrank_nval = myrank_g
+    if(mpi_nval_enabled) call m_Parallel_dealloc_mpi_nval()
+    call m_Parallel_alloc_mpi_nval(nval)
+    call set_block_range(nval,nrank_nval,nel_nval,nis_nval,nie_nval,.true.,map_nval)
+    mpi_nval_enabled = .true.
+    if(ipri>=2) call wd_val_range_3D()
+    ista_nval = nis_nval(myrank_nval)
+    iend_nval = nie_nval(myrank_nval)
+    np_nval = nel_nval(myrank_nval)
+    mp_nval = maxval(nel_nval)
+
+    j = 0
+    do ip = 1, nrank_nval
+       do i = 1, nel_nval(ip-1)
+          j = j + 1
+          map_z_nval(j) = i
+       end do
+    end do
+
+    contains
+
+    subroutine wd_val_range_3D()
+      if(printable) then
+         write(nfout,'(" !|| nrank_nval")')
+         write(nfout,'(" !||    i    : ",20i4)')(i,i=0,nrank_nval-1)
+         write(nfout,'(" !|| ista_nval   : ",20i4)')(nis_nval(i),i=0,nrank_nval-1)
+         write(nfout,'(" !|| iend_nval   : ",20i4)')(nie_nval(i),i=0,nrank_nval-1)
+         write(nfout,'(" !|| nel_nval    : ",20i4)')(nel_nval(i),i=0,nrank_nval-1)
+      end if
+    end subroutine wd_val_range_3D
+  end subroutine m_Parallel_init_mpi_nval
+
+  subroutine m_Parallel_alloc_mpi_nval(nval)
+    integer, intent(in) :: nval
+    allocate(nis_nval(0:nrank_nval-1));ista_nval=nval+1
+    allocate(nie_nval(0:nrank_nval-1));iend_nval=0
+    allocate(nel_nval(0:nrank_nval-1));nel_nval=0
+    allocate(map_nval(nval));map_nval=0
+    allocate(map_z_nval(nval));map_z_nval=0
+  end subroutine m_Parallel_alloc_mpi_nval
+
+  subroutine m_Parallel_dealloc_mpi_nval()
+    deallocate(nis_nval)
+    deallocate(nie_nval)
+    deallocate(nel_nval)
+    deallocate(map_nval)
+    deallocate(map_z_nval)
+    mpi_nval_enabled = .false.
+  end subroutine m_Parallel_dealloc_mpi_nval
+
   subroutine m_Parallel_init_mpi_iba_3D(nfout,ipri,printable,kv3,iba)
     integer, intent(in) :: nfout,ipri, kv3, iba(kv3)
     logical, intent(in) :: printable
@@ -3931,6 +4032,35 @@ contains
   end subroutine m_Parallel_init_mpi_kngp_3D
 !===============================================================================
 
+  subroutine m_Parallel_init_mpi_kngp_exx(nfout,ipri,nmax_G_hyb)
+    integer, intent(in) :: nfout,ipri,nmax_G_hyb
+    integer :: iwork, i
+    integer :: npes, mype
+
+    npes = nrank_g
+    mype = myrank_g
+
+    allocate(is_kngp_exx(0:npes-1))
+    allocate(ie_kngp_exx(0:npes-1))
+    allocate(nel_kngp_exx(0:npes-1))
+    iwork = ( nmax_G_hyb - 1 ) / npes + 1
+    if(ipri >= 1) then
+       write(nfout,'(" << m_Parallel_init_mpi_kngp_exx >>")')
+       write(nfout,'(" !|| nmax_G_hyb = ",i12)') nmax_G_hyb
+       write(nfout,'(" !|| -- is_kngp_exx(ista_kngp_exx),  ie_kngp_exx(iend_kngp_exx) --")')
+    end if
+    do i = 0, npes-1
+       is_kngp_exx(i) = min(i*iwork+1, nmax_G_hyb+1)
+       ie_kngp_exx(i) = min(is_kngp_exx(i)+iwork-1, nmax_G_hyb)
+       nel_kngp_exx(i) = ie_kngp_exx(i) - is_kngp_exx(i) + 1
+       if(ipri >= 1) write(nfout,'(" !|| ",2i10)') is_kngp_exx(i),ie_kngp_exx(i)
+    enddo
+    ista_kngp_exx = is_kngp_exx(mype)
+    iend_kngp_exx = ie_kngp_exx(mype)
+    np_kngp_exx   = nel_kngp_exx(mype)
+    mp_kngp_exx   = maxval(nel_kngp_exx)
+  end subroutine m_Parallel_init_mpi_kngp_exx
+
   subroutine m_Parallel_init_mpi_kngp_B_3D(nfout,ipri,kngp)
     integer, intent(in) :: nfout,ipri,kngp
     integer :: iwork, i
diff -uprN phase0_2015.01/src_phase_3d/m_Phonon.F90 phase0_2015.01.01/src_phase_3d/m_Phonon.F90
--- phase0_2015.01/src_phase_3d/m_Phonon.F90	2015-09-15 12:16:40.000000000 +0900
+++ phase0_2015.01.01/src_phase_3d/m_Phonon.F90	2016-07-12 12:51:53.000000000 +0900
@@ -1,6 +1,6 @@
 !=======================================================================
 !
-!  SOFTWARE NAME : PHASE ($Revision: 409 $)
+!  SOFTWARE NAME : PHASE ($Revision: 460 $)
 !
 !  MODULE: m_Phonon
 !
@@ -32,7 +32,7 @@
 !
 !
 module m_Phonon
-! $Id: m_Phonon.F90 409 2014-10-27 09:24:52Z jkoga $
+! $Id: m_Phonon.F90 460 2015-09-15 02:53:17Z jkoga $
 use m_Const_parameters,   only : DP, ON, OFF, NOCONV, FMAXVALLEN, PAI, PAI2, PAI4 &
                          &     , PHONON_GAMMA, PHONON_BAND, PHONON_DOS &
                          &     , LOWER, CARTS, UNIT_PIEZO_CONST &
diff -uprN phase0_2015.01/src_phase_3d/m_PlaneWaveBasisSet.F90 phase0_2015.01.01/src_phase_3d/m_PlaneWaveBasisSet.F90
--- phase0_2015.01/src_phase_3d/m_PlaneWaveBasisSet.F90	2015-08-05 14:58:03.000000000 +0900
+++ phase0_2015.01.01/src_phase_3d/m_PlaneWaveBasisSet.F90	2016-07-12 12:51:53.000000000 +0900
@@ -1,6 +1,6 @@
 !=======================================================================
 !
-!  PROGRAM  PHASE/0 2015.01 ($Rev: 440 $)
+!  PROGRAM  PHASE/0 2015.01 ($Rev: 481 $)
 !
 !  MODULE:  m_PlaneWaveBasisSet
 !
@@ -34,9 +34,41 @@
 !
 !   Revised for the GAMMA point (k=(0,0,0)) by T. Yamasaki, April 2006.
 !
+
+#ifdef __TIMER_SUB__
+#   define __TIMER_SUB_START(a)  call timer_sta(a)
+#   define __TIMER_SUB_STOP(a)   call timer_end(a)
+#else
+#   define __TIMER_SUB_START(a)
+#   define __TIMER_SUB_STOP(a)
+#endif
+#ifdef __TIMER_DO__
+#   define __TIMER_DO_START(a)   call timer_sta(a)
+#   define __TIMER_DO_STOP(a)    call timer_end(a)
+#else
+#   define __TIMER_DO_START(a)
+#   define __TIMER_DO_STOP(a)
+#endif
+#ifdef FJ_TIMER
+#   define __TIMER_FJ_START_w_BARRIER(str,a)   call mpi_barrier(str,ierr) ;   call timer_sta(a)
+#   define __TIMER_FJ_START(a)   call timer_sta(a)
+#   define __TIMER_FJ_STOP(a)    call timer_end(a)
+#else
+#   define __TIMER_FJ_START_w_BARRIER(str,a)
+#   define __TIMER_FJ_START(a)
+#   define __TIMER_FJ_STOP(a)
+#endif
+#ifdef __TIMER_INIDO__
+#   define __TIMER_INIDO_START(a)   call timer_sta(a)
+#   define __TIMER_INIDO_STOP(a)    call timer_end(a)
+#else
+#   define __TIMER_INIDO_START(a)
+#   define __TIMER_INIDO_STOP(a)
+#endif
+
 module m_PlaneWaveBasisSet
 !    ( m_pwBS )
-! $Id: m_PlaneWaveBasisSet.F90 440 2015-08-03 07:18:01Z ktagami $
+! $Id: m_PlaneWaveBasisSet.F90 481 2016-03-25 02:51:57Z jkoga $
   use m_Crystal_Structure,  only : nopr, altv,rltv, m_CS_op_in_PUCV
   use m_Kpoints,            only : kv3,kv3_ek,vkxyz,qwgt, k_symmetry
   use m_FFT,                only : fft_box_size_WF, fft_box_size_CD, fft_box_size_pWF &
@@ -106,6 +138,11 @@ module m_PlaneWaveBasisSet
   integer :: kg_tfw = 0
 ! ========================== 13.0U2
 
+! ===== EXP_CELLOPT ==== 2015/09/24
+  integer :: kg1_prev = 0
+  integer :: kgp_prev = 0
+! ====================== 2015/09/24
+
   integer, dimension(3)                 :: n_rGv
   integer, dimension(3)                 :: n_rGpv
   integer, dimension(3)                 :: n_rGpv_reduced
@@ -289,6 +326,10 @@ contains
     if(sw_positron /= OFF) deallocate(igf_pstrn)
     ! ------- Positron end
 
+! ========== KT_add ========================== 13.0F
+    if (sw_hybrid_functional /= OFF .and. use_fft_exx ) deallocate(igf_exx)
+    if (sw_hybrid_functional /= OFF ) deallocate(igfp_exx)
+! ============================================ 13.0F
 
   end subroutine m_pwBS_dealloc_ngpt_igfp_gr
 
@@ -440,10 +481,7 @@ contains
     integer, parameter :: CRITICAL_VECTOR_LENGTH = 10000
 
     integer          :: id_sname = -1
-#ifdef __TIMER_SUB__
-  call timer_sta(1221)
-#endif
-
+                                                     __TIMER_SUB_START(1221)
     call tstatc0_begin('m_pwBS_for_each_WF ',id_sname)
 
     if(ipri >= 2) then
@@ -753,9 +791,7 @@ contains
     end if
 
     call tstatc0_end(id_sname)
-#ifdef __TIMER_SUB__
-  call timer_end(1221)
-#endif
+                                                     __TIMER_SUB_STOP(1221)
   contains
     subroutine wd_ngshell_range
       integer :: jg, j,n1,n2,n3
@@ -833,9 +869,7 @@ contains
     kg1_exx = 0
 
     do j = 1, kg_exx
-       ga = ngabc(j,1)
-       gb = ngabc(j,2)
-       gc = ngabc(j,3)
+       ga = ngabc(j,1);    gb = ngabc(j,2);    gc = ngabc(j,3)
        grvv = dsqrt(ttr(1)*ga*ga + ttr(2)*gb*gb + ttr(3)*gc*gc &
             &     + ttr(4)*ga*gb + ttr(5)*gb*gc + ttr(6)*gc*ga )
        if(grvv <= gmax_exx) then
@@ -859,9 +893,7 @@ contains
     kg1p_exx = 0
 
     do j = 1, kgp_exx
-       ga = ngabc(j,1)
-       gb = ngabc(j,2)
-       gc = ngabc(j,3)
+       ga = ngabc(j,1);    gb = ngabc(j,2);    gc = ngabc(j,3)
        grvv = dsqrt(ttr(1)*ga*ga + ttr(2)*gb*gb + ttr(3)*gc*gc &
             &     + ttr(4)*ga*gb + ttr(5)*gb*gc + ttr(6)*gc*ga )
        if(grvv <= gmaxp_exx) then
@@ -1590,6 +1622,8 @@ contains
      if(allocated(nbase)) deallocate(nbase)
  
      if(allocated(ylm_l)) deallocate(ylm_l)
+
+     if(allocated(nbase_gamma)) deallocate(nbase_gamma)
  
 #ifdef _MPIFFT_
      if(allocated(igfp_l_c)) deallocate(igfp_l_c)
@@ -1648,6 +1682,16 @@ contains
       allocate(igf_pstrn(kg_pwf)) ;                        igf_pstrn = 0
    end if
     ! ------- Positron end
+
+! ========== KT_add ========================== 13.0F
+    if (sw_hybrid_functional /= OFF .and. use_fft_exx ) then
+       allocate(igf_exx(kg_exx)) ; igf_exx = 0
+    end if
+    if (sw_hybrid_functional /= OFF) then
+       allocate(igfp_exx(kgp_exx)) ; igfp_exx = 0
+    endif
+! ============================================ 13.0F
+
   end subroutine m_pwBS_alloc_ngpt_igfp_gr_3D
 
 !------------------------------------------------------------------------------
@@ -1658,9 +1702,7 @@ contains
     integer :: i, k, ngs
     real(kind=DP) :: length_start, length
 
-#ifdef __TIMER_SUB__
-  call timer_sta(1220)
-#endif
+                                                     __TIMER_SUB_START(1220)
 
     call tstatc0_begin('m_pwBS_calc_length_of_G_3D ',id_sname)
 
@@ -1735,9 +1777,7 @@ contains
     end if
 
     call tstatc0_end(id_sname)
-#ifdef __TIMER_SUB__
-  call timer_end(1220)
-#endif
+                                                     __TIMER_SUB_STOP(1220)
   end subroutine m_pwBS_calc_length_of_G_3D
 
 !------------------------------------------------------------------------------
@@ -1774,30 +1814,22 @@ contains
     integer :: i,ni
     real(kind=DP) a,b,c,d,e,f,gx,gy,gz
     real(kind=DP) b1x,b1y,b1z,b2x,b2y,b2z,b3x,b3y,b3z
-#ifdef __TIMER_SUB__
-  call timer_sta(1059)
-#endif
+                                                     __TIMER_SUB_START(1059)
 
     b1x = rltv(1,1);  b1y = rltv(2,1);  b1z = rltv(3,1)
     b2x = rltv(1,2);  b2y = rltv(2,2);  b2z = rltv(3,2)
     b3x = rltv(1,3);  b3y = rltv(2,3);  b3z = rltv(3,3)
 
     ni = ista_ylm
-#ifdef __TIMER_DO__
-  call timer_sta(1081)
-#endif
+                                                     __TIMER_DO_START(1081)
     if(ngabc(1,1) == 0 .and. ngabc(1,2) == 0 .and. ngabc(1,3) == 0) then
        if(ni == 1) then
           ni = 2
           ylm(1) = dsqrt(1.d0/PAI4)
        endif
     end if
-#ifdef __TIMER_DO__
-  call timer_end(1081)
-#endif
-#ifdef __TIMER_DO__
-  call timer_sta(1082)
-#endif
+                                                     __TIMER_DO_STOP(1081)
+                                                     __TIMER_DO_START(1082)
     if(is == 1) then
        a = dsqrt(1.d0/PAI4)
        do i = ni, iend_ylm  !for mpi
@@ -2037,12 +2069,8 @@ contains
           ylm(i) = a*(b-c)*gx*gy/f
        end do
     end if
-#ifdef __TIMER_DO__
-  call timer_end(1082)
-#endif
-#ifdef __TIMER_SUB__
-  call timer_end(1059)
-#endif
+                                                     __TIMER_DO_STOP(1082)
+                                                     __TIMER_SUB_STOP(1059)
   end subroutine m_pwBS_sphrp2_3D
 
 !------------------------------------------------------------------------------
@@ -2520,6 +2548,27 @@ contains
        endif
     end if
 
+    if ( sw_hybrid_functional /= OFF ) then
+       if(    fft_box_size_CD(1,1) == fft_box_size_CD_exx(1,1) .and. &
+            & fft_box_size_CD(2,1) == fft_box_size_CD_exx(2,1) .and. &
+            & fft_box_size_CD(3,1) == fft_box_size_CD_exx(3,1) .and. &
+            & kgp == kgp_exx ) then
+          igfp_exx(ista_kngp:iend_kngp) = igfp_l(ista_kngp:iend_kngp)
+          call mpi_allreduce(MPI_IN_PLACE,igfp_exx,kgp_exx,mpi_integer,mpi_sum,mpi_ke_world,ierr)
+       else
+          id = fft_box_size_CD_exx(1,0)
+          do i = 1, kgp_exx
+             igf1 = ngabc(i,1) + 1
+             igf2 = ngabc(i,2) + 1
+             igf3 = ngabc(i,3) + 1
+             if(ngabc(i,1) <= -1) igf1 = igf1 + fft_box_size_CD_exx(1,1)
+             if(ngabc(i,2) <= -1) igf2 = igf2 + fft_box_size_CD_exx(2,1)
+             if(ngabc(i,3) <= -1) igf3 = igf3 + fft_box_size_CD_exx(3,1)
+             igfp_exx(i) = igf1 + (igf2-1)*id + (igf3-1)*id*fft_box_size_CD_exx(2,0)
+          end do
+       end if
+    endif
+
     call tstatc0_end(id_sname)
   end subroutine m_pwBS_setup_FFTmapfunctions_3D
 
@@ -2977,6 +3026,9 @@ contains
     real(kind=DP) :: rd
     real(kind=DP), allocatable, dimension(:)     :: gmaxp_length
     integer       :: kgp_wk,i,nn,ierr
+! =========== KT_add ================================ 13.0F
+    real(kind=DP)         :: gmax2_exx
+! =================================================== 13.0F
 
       allocate(gmaxp_length(0:nrank_g))
       rd = gmaxp/(DBLE(nrank_g)**(1.0d0/3.0d0))
@@ -2994,16 +3046,15 @@ contains
 
     gmax2 = gmax*2
     gmax2_pstrn = gmax_positron*2
+! =========== KT_add ================================ 13.0F
+    gmax2_exx = gmax_exx*2
+! =================================================== 13.0F
     call adjust_n_rGv_to_2l3m5n  ! -(contained here) fft_box_size_WF,CD -> n_rGV,n_rGpv
 
     allocate(G_length_in_cube(2*n_rGpv(1)+3,2*n_rGpv(2)+3, 2*n_rGpv(3)+3))
-#ifdef FJ_TIMER
-                    call timer_sta(42)
-#endif
+                                                     __TIMER_FJ_START(42)
     call count_Gvectors_in_spheres
-#ifdef FJ_TIMER
-                    call timer_end(42)
-#endif
+                                                     __TIMER_FJ_STOP(42)
 
     call G_vectors_alloc           ! ngabc
 
@@ -3012,20 +3063,13 @@ contains
 
 !f    allocate(gr_t(kgp))
 
-#ifdef FJ_TIMER
-                    call timer_sta(43)
-#endif
+                                                     __TIMER_FJ_START(43)
     call get_Gvectors_in_a_gmaxp_sphere
-#ifdef FJ_TIMER
-                    call timer_end(43)
-#endif
+                                                     __TIMER_FJ_STOP(43)
 
 !comm
 ! interface for ngabc
-#ifdef FJ_TIMER
-                    call mpi_barrier(mpi_comm_group, ierr)
-                    call timer_sta(44)
-#endif
+                                                     __TIMER_FJ_START_w_BARRIER(mpi_comm_group,44)
     call mpi_allgather(kgp_wk,1,mpi_integer,kgp_buff(0),1,mpi_integer,mpi_ke_world,ierr)
       do i = 1,nrank_g-1
         kgp_addr(i) = kgp_buff(i-1) + kgp_addr(i-1)
@@ -3034,9 +3078,7 @@ contains
    call mpi_allgatherv(ngabc_wk(1,1),kgp_wk,mpi_integer,ngabc(1,1),kgp_buff,kgp_addr,mpi_integer,mpi_ke_world,ierr)
    call mpi_allgatherv(ngabc_wk(1,2),kgp_wk,mpi_integer,ngabc(1,2),kgp_buff,kgp_addr,mpi_integer,mpi_ke_world,ierr)
    call mpi_allgatherv(ngabc_wk(1,3),kgp_wk,mpi_integer,ngabc(1,3),kgp_buff,kgp_addr,mpi_integer,mpi_ke_world,ierr)
-#ifdef FJ_TIMER
-                    call timer_end(44)
-#endif
+                                                     __TIMER_FJ_STOP(44)
 
     deallocate(kgp_buff,kgp_addr,ngabc_wk,gr_t)
 !f    deallocate(gr_t)
@@ -3059,9 +3101,7 @@ contains
       kg = 0
       kgp_wk = 0
 !!$      if(sw_positron /= OFF) kg_pwf = 0
-#ifdef __TIMER_INIDO__
-  call timer_sta(1347)
-#endif
+                                                     __TIMER_INIDO_START(1347)
       do k = -n_rGpv(3)-1, n_rGpv(3)+1
          do j = -n_rGpv(2)-1, n_rGpv(2)+1
             do i = -n_rGpv(1)-1, n_rGpv(1)+1
@@ -3093,9 +3133,7 @@ contains
             enddo
          enddo
       enddo
-#ifdef __TIMER_INIDO__
-  call timer_end(1347)
-#endif
+                                                     __TIMER_INIDO_STOP(1347)
       if(ipri >= 2) then
          write(nfout,'(" -- gr_t -- <<m_pwBS_generate_G_vectors>>")') 
          do i = 1, min(20, kgp_wk)
@@ -3112,13 +3150,9 @@ contains
 !!$      call shellsort(nfout,ipri,ttr,kgp,kgp,ngabc)
       !!$call sort_gvec_heap2(ttr,kgp,kgp,ngabc,gr_t)
 !f      call sort_gvec_heap3(ttr,kgp,kgp,ngabc,gr_t)
-#ifdef __TIMER_INIDO__
-  call timer_sta(1348)
-#endif
+                                                     __TIMER_INIDO_START(1348)
       call sort_gvec_heap3(ttr,kgp_wk,kgp_wk,ngabc_wk,gr_t)
-#ifdef __TIMER_INIDO__
-  call timer_end(1348)
-#endif
+                                                     __TIMER_INIDO_STOP(1348)
       if(ipri >= 2) then
          write(nfout,'(" -- gr after (sort_gvec_heap2) -- <<m_pwBS_generate_G_vectors>>")') 
          do i = 1, min(20, kgp_wk)
@@ -3129,22 +3163,14 @@ contains
          end do
       end if
 !f      call shellsort2(nfout,ipri,ttr,kgp,kgp,ngabc,gr_t)
-#ifdef __TIMER_INIDO__
-  call timer_sta(1349)
-#endif
+                                                     __TIMER_INIDO_START(1349)
       call shellsort2(nfout,ipri,ttr,kgp_wk,kgp_wk,ngabc_wk,gr_t)
-#ifdef __TIMER_INIDO__
-  call timer_end(1349)
-#endif
+                                                     __TIMER_INIDO_STOP(1349)
 #elif _SIMPLE_SORT_
      ! Sorting G simply (M.Okamoto)
-#ifdef __TIMER_INIDO__
-  call timer_sta(1350)
-#endif
+                                                     __TIMER_INIDO_START(1350)
       call sort_gvec_simple(ttr,kgp,kgp,ngabc)
-#ifdef __TIMER_INIDO__
-  call timer_end(1350)
-#endif
+                                                     __TIMER_INIDO_STOP(1350)
 #else
      ! _BLUGEL_SORT_ ! Sorting G by Blugel
 !!$      call gsort(ttr,kgp,kgp,ngabc)
@@ -3171,14 +3197,24 @@ contains
       call getttr(rltv,ttr)
       kg = 0
       kgp = 0
+      kgp_reduced = 0
       kg0 = 0
 !f-----
       kgp_wk = 0
 !------
+! =========== KT_add ================================ 13.0F
+      if (sw_hybrid_functional /= OFF .and. use_fft_exx ) then
+         kg_exx = 0
+      endif
+      if (sw_hybrid_functional /= OFF ) kgp_exx = 0
+! =================================================== 13.0F
+
+! ==== KT_add ============= 13.0U2
+      if ( sw_modified_TFW_functional /= OFF ) kg_tfw = 0
+! ========================= 13.0U2
+
       if(sw_positron /= OFF) kg_pwf = 0
-#ifdef __TIMER_INIDO__
-  call timer_sta(1351)
-#endif
+                                                     __TIMER_INIDO_START(1351)
       do k = -n_rGpv(3)-1, n_rGpv(3)+1
          do j = -n_rGpv(2)-1, n_rGpv(2)+1
             do i = -n_rGpv(1)-1, n_rGpv(1)+1
@@ -3188,6 +3224,27 @@ contains
                if(length_of_Gvector <= gmax2) kg = kg + 1
                if(length_of_Gvector <= gmax)  kg0 = kg0 + 1
                if(sw_positron/=OFF .and. length_of_Gvector<=gmax2_pstrn) kg_pwf=kg_pwf+1
+! =========== KT_add ================================ 13.0F
+               if(sw_hybrid_functional/=OFF .and. use_fft_exx ) then
+                  if ( length_of_Gvector<=gmax2_exx) then
+                     kg_exx = kg_exx +1
+                  endif
+               endif
+               if(sw_hybrid_functional /= OFF) then
+                  if( length_of_Gvector <= gmaxp_exx) then
+                     kgp_exx = kgp_exx+1
+                  endif
+               endif
+! =================================================== 13.0F
+
+! ============ KT_add ========== 13.0U2
+               if ( sw_modified_TFW_functional /= OFF ) then
+                  if ( length_of_Gvector<= gmaxp/2 ) then
+                     kg_tfw = kg_tfw +1
+                  endif
+               endif
+! ============================== 13.0U2
+
                if(length_of_Gvector <= gmaxp) kgp = kgp + 1
                G_length_in_cube(i+n_rGpv(1)+2,j+n_rGpv(2)+2, k+n_rGpv(3)+2) = length_of_Gvector
 !f-----
@@ -3206,9 +3263,7 @@ contains
             enddo
          enddo
       enddo
-#ifdef __TIMER_INIDO__
-  call timer_end(1351)
-#endif
+                                                     __TIMER_INIDO_STOP(1351)
       if(ipri >= 1) then
          write(nfout,'(" !pwBS kg0, kg, kgp = ",3i10)') kg0, kg, kgp
          write(nfout,'(" !pwBS   kg0 = (#G(<=Gmax)), kg = (#G(<=2Gmax)), kgp = (#G(<=Gmaxp))")')
@@ -3718,4 +3773,14 @@ contains
     if(allocated(ngabc)) deallocate(ngabc)
   end subroutine m_pwBS_dealloc_ngabc
 
+! ==== EXP_CELLOPT === 2015/09/24
+  subroutine m_pwBS_store_prev_kg1_kgp
+    kg1_prev = kg1;   kgp_prev = kgp
+    if ( mype == 0 ) then
+       write(nfout,*) '** kg1_prev is ', kg1_prev
+       write(nfout,*) '** kgp_prev is ', kgp_prev
+    endif
+  end subroutine m_pwBS_store_prev_kg1_kgp
+! =================== 2015/09/24
+
 end module m_PlaneWaveBasisSet
diff -uprN phase0_2015.01/src_phase_3d/m_Positron_Wave_Functions.F90 phase0_2015.01.01/src_phase_3d/m_Positron_Wave_Functions.F90
--- phase0_2015.01/src_phase_3d/m_Positron_Wave_Functions.F90	2015-08-05 14:58:03.000000000 +0900
+++ phase0_2015.01.01/src_phase_3d/m_Positron_Wave_Functions.F90	2016-07-12 12:51:53.000000000 +0900
@@ -15,9 +15,9 @@
 !
 !=======================================================================
 module m_Positron_Wave_Functions
-! $Id: m_Positron_Wave_Functions.F90 417 2014-12-22 11:44:44Z yamasaki $
+! $Id: m_Positron_Wave_Functions.F90 472 2015-11-28 09:01:17Z ktagami $
   use m_Const_Parameters, only   : DP,POSITRON,OFF,ON,DIRECT,INVERSE, DELTAevdff, SD, MSD &
-       &                         , DENSITY_ONLY, VTK, CUBE
+       &                         , DENSITY_ONLY, VTK, CUBE, SmallestPositiveNumber
   use m_Control_Parameters, only : af, nspin, npeg,kimg,ipripositron &
        &                         , delta_pev, evaluation_pev_diff &
        &                         , num_extra_pev, sw_gga_p, sw_epsilon_ele, epsilon_ele &
@@ -37,7 +37,7 @@ module m_Positron_Wave_Functions
   use m_Crystal_Structure, only  : univol
   use m_PlaneWaveBasisSet, only  : kg_pwf, kg1_pwf, igf_pstrn, igfp_l, igfp_nonpara &
        &                         , m_pwBS_pstrn_kinetic_energies
-  use m_Electronic_Structure,only: vlhxc_l
+!!  use m_Electronic_Structure,only: vlhxc_l
   use m_Parallelization, only :    mype, npes, ista_kngp, iend_kngp &
        &                         , ista_sfftph, iend_sfftph, ierr, mpi_comm_group
   use m_FFT, only   :              nfft, nfftp, nfftp_nonpara, nfft_pstrn, fft_box_size_pWF &
@@ -49,7 +49,7 @@ module m_Positron_Wave_Functions
   use m_Crystal_Structure,  only : altv
   use m_Ionic_System,         only : ntyp,ityp,zfm3_l,natm2, iatomn &
        &                         , m_IS_pack_all_ions_in_uc
-  use m_epc_potential, only      : tchgr_l, grad_tchgr_l
+  use m_epc_potential, only      : tchgr_l, grad_tchgr_l, vlhxc_p
   use m_PseudoPotential, only    : rhchg_l, ival
 
   implicit none
@@ -58,6 +58,7 @@ module m_Positron_Wave_Functions
   real(kind=DP),allocatable,dimension(:,:,:) :: pzaj   ! positron wave functions d(kg1_pwf,npeg,kimg)
   real(kind=DP),allocatable,dimension(:,:,:) :: pzaj_old ! d(kg1_pwf,npeg,kimg)
   real(kind=DP),allocatable,dimension(:,:) ::   pchg_l ! positron charge in g-space, d(ista_kngp:iend_kngp,kimg)
+  real(kind=DP),allocatable,dimension(:,:) ::   pchgo_l ! positron charge in g-space, d(ista_kngp:iend_kngp,kimg)
   real(kind=DP),allocatable,dimension(:) ::     pchr_l ! positron charge in r-space, d(ista_sfftph:iend_sfftph)
   integer, allocatable, dimension(:) ::         npeordr, nprvf_ordr !d(npeg)
   real(kind=DP),allocatable,dimension(:) ::     pev, pev1   ! d(npeg)
@@ -89,12 +90,14 @@ contains
     allocate(pev1(1:npeg)); pev1 = 0.d0
     allocate(pevdff(3))
     allocate(pchg_l(ista_kngp:iend_kngp,kimg))
+    allocate(pchgo_l(ista_kngp:iend_kngp,kimg))
     allocate(pchr_l(ista_sfftph:iend_sfftph))
   end subroutine m_pWF_allocate_pzaj_etc
 
   subroutine m_pWF_deallocate_pzaj_etc()
     if(allocated(pchr_l)) deallocate(pchr_l)
     if(allocated(pchg_l)) deallocate(pchg_l)
+    if(allocated(pchgo_l)) deallocate(pchgo_l)
     if(allocated(pevdff)) deallocate(pevdff)
     if(allocated(pev)) deallocate(pev)
     if(allocated(pev1)) deallocate(pev1)
@@ -213,8 +216,12 @@ contains
           evr = pzaj(i,ib,1)
           devr = (ekin(i) - pev(ib))*evr + VlocalpW(i1)*denom
           wdi = ekin(i) + vlhxc0 - pev(ib)
-          fdexp = dexp( -p(i) * wdi * dtim)
-          pzaj(i,ib,1) = (fdexp - 1) * devr/wdi + evr
+          if (dabs(wdi) < SmallestPositiveNumber) then
+             pzaj(i,ib,1) = -p(i)*devr*dtim + evr
+          else
+             fdexp = dexp( -p(i) * wdi * dtim)
+             pzaj(i,ib,1) = (fdexp - 1) * devr/wdi + evr
+          endif
        end do
     else if(kimg==2) then
        do i = 1, kg1_pwf
@@ -224,9 +231,14 @@ contains
           devr = e1*evr+VlocalpW(2*i1-1)*denom
           devi = e1*evi+VlocalpW(2*i1  )*denom
           wdi  = ekin(i) + vlhxc0 - pev(ib)
-          fdexp = dexp( -p(i) * wdi * dtim)
-          pzaj(i,ib,1)    = (fdexp -1)*devr/wdi + evr
-          pzaj(i,ib,kimg) = (fdexp -1)*devi/wdi + evi
+          if (dabs(wdi) < SmallestPositiveNumber) then
+             pzaj(i,ib,1) = -p(i)*devr*dtim + evr
+             pzaj(i,ib,2) = -p(i)*devi*dtim + evi
+          else
+             fdexp = dexp( -p(i) * wdi * dtim)
+             pzaj(i,ib,1)    = (fdexp -1)*devr/wdi + evr
+             pzaj(i,ib,kimg) = (fdexp -1)*devi/wdi + evi
+          endif
        end do
     end if
   end subroutine modified_sd_p
@@ -575,6 +587,10 @@ contains
 !    core_annihilation_rate=sss/dsqrt(4.d0*3.1415926d0)
      core_annihilation_rate=sss/2.d0*univol
 
+#ifdef SINGLE_POSITRON
+     core_annihilation_rate = sss *univol
+#endif
+
   end subroutine m_pWF_core_annihilation
 
   subroutine m_pWF_valence_annihilation()
@@ -920,6 +936,11 @@ contains
 
      if(nspin==1) then
        valence_annihilation_rate=sss/2.d0
+
+#ifdef SINGLE_POSITRON
+       valence_annihilation_rate = sss
+#endif
+
        ssk=((0.5292d0)**3)*1.d4/(2.8d0**2)/3.d0/3.1415926d0
        ssk=ssk
        sss=core_annihilation_rate+valence_annihilation_rate
@@ -1086,6 +1107,11 @@ subroutine m_pWF_wlifetime()
   end if
 end subroutine m_pWF_wlifetime
 
+  subroutine m_pWF_update_lifetime
+    if(ipripositron >= 1) write(nfout,*)'lifetime: ',p_old_lifetime,p_new_lifetime
+    p_old_lifetime=p_new_lifetime
+  end subroutine m_pWF_update_lifetime
+
   subroutine m_pWF_wd_pzaj(nfout,comment,nc)
     integer,        intent(in) :: nfout, nc
     character(len=nc), intent(in) :: comment
diff -uprN phase0_2015.01/src_phase_3d/m_PseudoPotential.F90 phase0_2015.01.01/src_phase_3d/m_PseudoPotential.F90
--- phase0_2015.01/src_phase_3d/m_PseudoPotential.F90	2015-09-14 15:39:35.000000000 +0900
+++ phase0_2015.01.01/src_phase_3d/m_PseudoPotential.F90	2016-07-12 12:51:53.000000000 +0900
@@ -3,7 +3,7 @@
 #define _PAW_CONTINUE_DATA_PREVIOUS_BEFORE_201403_STYLE_
 !=======================================================================
 !
-!  PROGRAM  PHASE/0 2015.01 ($Rev: 454 $)
+!  PROGRAM  PHASE/0 2015.01 ($Rev: 461 $)
 !
 !  MODULE: m_PseudoPotential
 !
@@ -72,7 +72,7 @@
 
 module m_PseudoPotential
 !     (m_PP)
-!  $Id: m_PseudoPotential.F90 454 2015-09-07 07:58:39Z yamasaki $
+!  $Id: m_PseudoPotential.F90 461 2015-09-15 04:27:48Z ktagami $
 !
 !  The original subroutine name was "pspot", which had been coded by
 !  Y. Morikawa (JRCAT-NAIR) in 1993 or ealier.
@@ -103,7 +103,7 @@ module m_PseudoPotential
 ! === DEBUG by tkato 2011/09/22 ================================================
 !      &                           , nspin, af, printable, PAW_switch
        &                           , nspin, af, printable, PAW_switch, sw_fef &
-       &                           , omega_exx, sw_rspace_hyb &
+       &                           , omega_exx &
 ! ==============================================================================
        &                           , sw_remove_pcc_from_pawpot, sw_wannier90 &
        &                           , sw_optimize_lattice   &
@@ -1753,8 +1753,10 @@ contains
                 comment_statement = .false.
              endif
           enddo
+
           select case (fn_number_of_words(str))
-          case (9)
+!          case (9)
+          case (4,9)
              is_gncpp = pp_GNCPP1 ! = 1
              pptype_char = "GNCPP1"
 !!$             if(ipripp>=1 .and. .not.ppprinted) write(nfout,'(" !PP PP type --> GNCPP1")')
@@ -2488,7 +2490,7 @@ contains
       if(.not.paramset) then
          call coef_simpson_integration(nmesh(it),nmesh(it),xh(it),radr&
               & ,wos)
-         if(sw_hybrid_functional==ON.and.sw_rspace_hyb==OFF) call alloc_qitg_exx()
+         call alloc_qitg_exx()
       end if
       call rd_qrsps_then_iqitg_and_qitgft(is_gncpp,nfp,it,nfout,gr_l,ngshell,ngshell_range &
            & ,paramset,mmt) ! m_PP
@@ -2599,7 +2601,7 @@ contains
       if(.not.paramset) then
          call coef_simpson_integration(nmesh(it),nmesh(it),xh(it),radr&
               & ,wos)
-         if(sw_hybrid_functional==ON.and.sw_rspace_hyb==OFF) call alloc_qitg_exx()   ! ????????????
+         call alloc_qitg_exx()   ! ????????????
       end if
 
       call rd_qrsps_iqitg_and_qitgft_soc( is_gncpp,nfp,it,nfout,gr_l, &
@@ -7346,7 +7348,7 @@ contains
     endif
 ! ================ 2014/09/19
 
-    if(.not.paramset .and. sw_hybrid_functional==ON .and.  m_PP_include_vanderbilt_pot(it)/=SKIP.and.sw_rspace_hyb==OFF) &
+    if(.not.paramset .and. sw_hybrid_functional==ON .and.  m_PP_include_vanderbilt_pot(it)/=SKIP) &
       & call qitgft_qmk(it,nmm_il3,mm_il3,qrsps_mm,lcmax,h) ! in b_PseudoPotential_EXX
 
     if(itpcc(it) == 1) then
@@ -8111,7 +8113,7 @@ contains
 
     if(.not.paramset .and. iflag_ft == 1) call qitgft_mm() ! contained here
 
-    if(.not.paramset .and. sw_hybrid_functional==ON .and.  m_PP_include_vanderbilt_pot(it)/=SKIP.and.sw_rspace_hyb==OFF) &
+    if(.not.paramset .and. sw_hybrid_functional==ON .and.  m_PP_include_vanderbilt_pot(it)/=SKIP) &
       & call qitgft_qmk(it,nmm_il3,mm_il3,qrsps_mm,lcmax,h) ! in b_PseudoPotential_EXX
 
 
@@ -15435,4 +15437,53 @@ contains
        if(allocated(radr)) deallocate(radr)
     end subroutine m_PP_dealloc_radr
 
+    subroutine m_PP_rd_ival(nfp,it,nfout,ival)
+      integer, intent(in) :: nfp,it,nfout
+      real(kind=DP), intent(out) :: ival
+      call mpi_barrier(mpi_comm_group,ierr)
+      call read_natomn_ival()
+
+      contains
+        subroutine read_natomn_ival()
+          integer :: natomn, iloc,itpcc
+          if(mype == 0) then
+             rewind(nfp)
+             if(ipripp >= 2) write(nfout,'(" !PP  READING POTENTIAL FILE ",i5)') nfp
+             call skip_commentlines_pp(nfp)
+             read(nfp,*)      natomn,ival,iloc,itpcc
+             if(ipripp >= 2) write(nfout,110) natomn,ival,iloc,itpcc
+110          FORMAT(' !PP ',2f8.4,2I4,'  : NATOMN, IVAL, ILOC, ITPCC ')
+             if( abs(iatomn(it) - natomn) > 1.d-8)  then
+                if(ipripp >= 2) write(nfout,*) ' !PP iatomn.ne.natomn ',iatomn(it),natomn
+                if(iatomn(it)>=1) call phase_execution_error(INVALID_ATOMIC_NUMBER)
+                write(nfout,'(a,i4,a,f7.3)') ' !PP atomic number for element',it,':',natomn
+             end if
+          end if
+        end subroutine read_natomn_ival
+
+        subroutine skip_commentlines_pp(nf)
+          integer, intent(in) :: nf
+          integer, parameter  ::    len_str = 80
+          character(len=len_str) :: str
+          logical :: comment_statement
+          comment_statement = .true.
+          do while(comment_statement)
+             read(nfp,'(a80)',end=1000) str
+             if(str(1:1) == '#'.or. str(1:1) == '$' .or. str(1:1) == '!' &
+                  & .or. str(1:1) == '%' .or. str(1:1) == '*') then
+                if(ipripp >= 1) write(nfout,'(a80)') str
+             else if(len(trim(str)) == 0) then
+                if(ipripp >= 1) write(nfout,'(a80)') str
+             else
+                comment_statement = .false.
+             endif
+          enddo
+          backspace(nfp)
+          return
+1000      write(nfout,'(" eof is reached")')
+          stop ' eof is reached in PP reading'
+        end subroutine skip_commentlines_pp
+
+    end subroutine m_PP_rd_ival
+
 end module m_PseudoPotential
diff -uprN phase0_2015.01/src_phase_3d/m_Raman.F90 phase0_2015.01.01/src_phase_3d/m_Raman.F90
--- phase0_2015.01/src_phase_3d/m_Raman.F90	2015-09-14 15:39:43.000000000 +0900
+++ phase0_2015.01.01/src_phase_3d/m_Raman.F90	2016-07-12 12:51:53.000000000 +0900
@@ -16,7 +16,8 @@ module m_Raman
   use m_Control_Parameters, only : sw_lo_to_splitting, sw_phonon_oneshot, &
        &                           printable, ipriphonon, &
        &                           sw_phonon_with_epsilon, sw_calc_dielectric_tensor, &
-       &                           num_phonon_calc_mode, sw_excitation
+       &                           num_phonon_calc_mode, sw_excitation, &
+       &                           sw_phonon, sw_calc_force, sw_use_add_proj, sw_raman
 
   use m_Const_Parameters, only : DP, ON, OFF, FMAXVALLEN, LOWER, PAI4, CMPLDP
 
@@ -53,6 +54,8 @@ module m_Raman
   character(len("classical") ), private, parameter :: &
        &                     tag_classical = "classical"
 !
+  character(len("sw_raman") ), private, parameter :: &
+       &                    tag_sw_raman = "sw_raman"
   character(len("sw_phonon_with_epsilon") ), private, parameter :: &
        &                    tag_sw_phonon_with_epsilon = "sw_phonon_with_epsilon"
   character(len("sw_calc_dielectric_tensor") ), private, parameter :: &
@@ -208,12 +211,23 @@ contains
     logical :: tf
 
     if( f_selectBlock( tag_raman ) == 0) then
+! === 2015/10/19
+       if ( f_getIntValue( tag_sw_raman, iret) == 0 ) sw_raman = iret
+       if ( sw_raman == OFF ) return
+
+       if ( sw_phonon == ON ) sw_phonon_with_epsilon = on
+! === 2015/10/19
+
        if ( f_getIntValue( tag_sw_phonon_with_epsilon, iret) == 0) & 
             &  sw_phonon_with_epsilon = iret
        
        if ( sw_phonon_with_epsilon == ON ) then
           sw_excitation = ON
           write(nfout,*) "!** sw_excitation is turned on"
+! === 2015/10/19
+          sw_use_add_proj = ON
+          write(nfout,*) "!** sw_use_add_proj is turned on"
+! === 2015/10/19
        endif
 
        if ( f_getStringValue( tag_raman_calc_scheme, rstr, LOWER) == 0 ) then
@@ -221,6 +235,12 @@ contains
           if(tf) sw_phonon_with_epsilon = on
        endif
        
+! === 2015/10/19
+       if ( sw_phonon_with_epsilon == ON ) then
+          if ( sw_calc_force == ON ) sw_calc_dielectric_tensor = ON
+       endif
+! === 2015/10/19
+
        if ( f_getIntValue( tag_sw_calc_dielectric_tensor, iret) == 0) & 
             &  sw_calc_dielectric_tensor = iret
        
@@ -323,8 +343,9 @@ contains
        endif
 
        if( f_selectBlock( tag_spectrum ) == 0) then
-          if( f_getRealValue( tag_hwhm, dret,'') == 0 ) then
-             raman_spectra_hwhm = dret            ! in cm-1
+          if( f_getRealValue( tag_hwhm, dret,'') == 0 &
+               &  .or. f_getRealValue( tag_linewidth, dret,'') == 0 ) then
+             raman_spectra_hwhm = dret                                   ! in cm-1
              if ( raman_spectra_hwhm < 0.0 ) raman_spectra_hwhm = 5.0d0
           endif
 
@@ -375,6 +396,7 @@ contains
   end subroutine m_Raman_initialize
 
   subroutine m_Raman_print_param
+    if ( sw_raman == OFF ) return
     if ( sw_phonon_with_epsilon == OFF ) return
 
     write(nfout,'(A)') '!** ----- RAMAN setup ----- '
diff -uprN phase0_2015.01/src_phase_3d/m_ThomasFermiW_Potential.F90 phase0_2015.01.01/src_phase_3d/m_ThomasFermiW_Potential.F90
--- phase0_2015.01/src_phase_3d/m_ThomasFermiW_Potential.F90	2015-08-05 14:58:03.000000000 +0900
+++ phase0_2015.01.01/src_phase_3d/m_ThomasFermiW_Potential.F90	2016-07-12 12:51:53.000000000 +0900
@@ -84,7 +84,7 @@ contains
        ik = ista_k
        call new_radr_and_wos(ik,it)                 ! --> radr, wos
 
-       rcut = rad_cov_default( iatomn(it) )
+       rcut = rad_cov_default( nint(iatomn(it)) ) ! Revised according to a report from ASMS Co.ltd, 10 March 2016.
 
        Do il1=1, lpsmax(it)
           if ( il1 == iloc(it) ) cycle
diff -uprN phase0_2015.01/src_phase_3d/m_Total_Energy.F90 phase0_2015.01.01/src_phase_3d/m_Total_Energy.F90
--- phase0_2015.01/src_phase_3d/m_Total_Energy.F90	2015-09-14 15:39:53.000000000 +0900
+++ phase0_2015.01.01/src_phase_3d/m_Total_Energy.F90	2016-07-12 12:51:53.000000000 +0900
@@ -1,6 +1,6 @@
 !=======================================================================
 !
-!  PROGRAM  PHASE/0 2015.01 ($Rev: 454 $)
+!  PROGRAM  PHASE/0 2015.01 ($Rev: 472 $)
 !
 !  MODULE:  m_Total_Energy
 !
@@ -33,7 +33,7 @@
 !
 module m_Total_Energy
 !    ( m_TE )
-! $Id: m_Total_Energy.F90 454 2015-09-07 07:58:39Z yamasaki $
+! $Id: m_Total_Energy.F90 472 2015-11-28 09:01:17Z ktagami $
   use m_Charge_Density,       only : chgq_l, chgqo_l, hsr
 !fj$$  use m_XC_Potential,         only : vxc_l, exc, m_XC_cal_potential
   use m_XC_Potential,         only : vxc_l, exc,eex,ecor
@@ -148,6 +148,15 @@ module m_Total_Energy
 
   use m_Control_Parameters,  only : m_CtrlP_get_isolver_now
 
+! === Positron SCF ==== 2015/11/28
+  use m_Control_Parameters,  only : sw_positron, npeg, positron_method
+  use m_Const_Parameters,   only :  positron_GGGC
+  use m_epc_potential,  only : ecorr_pztr => epc, m_epc_cal_potential, vepc_l
+  use m_PlaneWaveBasisSet, only : kg1_pwf
+  use m_Positron_Wave_Functions,  only : pzaj, nprvf_ordr, pchg_l, pchgo_l, pev
+  use m_PlaneWaveBasisSet,    only : m_pwBS_pstrn_kinetic_energies
+! ==================== 2015/11/28
+
   implicit none
   include 'mpif.h'
 
@@ -218,6 +227,10 @@ module m_Total_Energy
   real(kind=DP),private          :: espinorb_old, espinorb_now
 ! ========================================================================== 11.0
 
+! === positron
+  real(kind=DP) :: ekin_pztr, elocal_pztr, ehartr_ep, eohxc_pztr
+! ===
+
   integer,private, parameter     :: len_str = 132
   character(len=len_str),private    ::  str
 
@@ -341,6 +354,7 @@ contains
   end subroutine get_kinetic_energy_directly
 ! ==============================================================================
 
+
   
   subroutine get_xc_and_HE_of_old_CD_paw_sym
     integer :: ispin, it,lmt1, lmt2, il1, im1, il2, im2, ia
@@ -542,6 +556,14 @@ contains
     endif
 ! ================= 13.0S
 
+! ==== POSITRON SCF == 2015/11/28
+    if ( sw_positron /= OFF ) then
+       if ( positron_method == Positron_GGGC ) then
+          etotal0 = etotal0 +ekin_pztr +elocal_pztr +ecorr_pztr +ehartr_ep
+       endif
+    endif
+! =================== 2015/11/28
+
     if(sw_dipole_correction == ON) etotal0 = etotal0 + edip
     if(sw_hubbard == ON) etotal0 = etotal0 + ehub0
     if(sw_hybrid_functional == ON) etotal0 = etotal0 + eexx
diff -uprN phase0_2015.01/src_phase_3d/m_Wannier90.F90 phase0_2015.01.01/src_phase_3d/m_Wannier90.F90
--- phase0_2015.01/src_phase_3d/m_Wannier90.F90	2015-09-14 15:40:03.000000000 +0900
+++ phase0_2015.01.01/src_phase_3d/m_Wannier90.F90	2016-07-12 12:51:53.000000000 +0900
@@ -1,4 +1,6 @@
 #define WAN90_SAVE_MEMORY
+#define WAN90_SKIP_FFT
+#define WAN90_SPN_FORMATTED
 !=======================================================================
 !
 !  PROGRAM  PHASE/0 2014.01 ($Rev: 110 $)
@@ -48,16 +50,22 @@ module m_Wannier90
        &                         , nrank_e,myrank_e,map_e,ista_e,iend_e,istep_e,idisp_e &
        &                         , map_z,np_e,mpi_k_world,myrank_k,map_k,ista_k,iend_k &
        &                         , ista_snl, iend_snl, ierr, map_ek, nrank_k
-! === KT_add === 2015/02/23, 08/31
+! === KT_add === 2015/02/23, 09/02
   use m_Const_Parameters,   only : Delta07, zi, ON, UP, DOWN
   use m_Control_Parameters,  only : ndim_spinor, sw_use_hardpart_wan90, noncol, &
        &                            spin_component_wan90
   use m_Ionic_System,  only : natm, ityp, cps, pos
   use m_PseudoPotential,   only : dk_wan, nloc, nlmta, m_PP_include_vanderbilt_pot, &
        &                          ltp, mtp, taup, ilmt, lmta, il2p, isph, iqitg, dl2p, &
-       &                          qitg_wan, nqitg, phirpw, psirpw
+       &                          qitg_wan, nqitg, phirpw, psirpw, q
   use m_Electronic_Structure,only: fsr_l, fsi_l
-! ============== 2015/02/23, 08/31
+! ============== 2015/02/23, 09/02
+
+! === KT_add === 2015/09/14
+  use m_Const_Parameters,  only : ELECTRON, DIRECT, INVERSE
+  use m_FFT,  only : m_FFT_WF
+  use m_PlaneWaveBasisSet,  only : kg1, kg
+! ============== 2015/09/14
 
   implicit none
 
@@ -77,10 +85,18 @@ module m_Wannier90
   integer, allocatable :: ib_inc(:) ! d(num_bands)
 
   real(kind=DP), allocatable :: projfunc(:,:,:,:) ! d(kg1,n_proj,ista_snl:iend_snl,kimg)
-! ==== KT_add === 2015/04/13
+
+! ==== KT_add === 2015/04/13 & 09/02
   real(kind=DP), allocatable :: dk_unit(:,:)
   logical, allocatable :: centre_on_atom(:)
-! =============== 2015/04/13
+
+  integer, allocatable :: spn_index(:)
+  real(kind=DP), allocatable :: spn_quant_dir(:,:)
+! =============== 2015/04/13 & 09/02
+
+! ==== KT_add === 2015/09/14
+  integer, allocatable :: igf_wan90(:,:,:,:)
+! =============== 2015/09/14
 
   include 'mpif.h'                                      ! MPI
   integer istatus(mpi_status_size)                      ! MPI
@@ -146,10 +162,26 @@ contains
     allocate(zaxis(3,n_proj))
     allocate(xaxis(3,n_proj))
     allocate(zona(n_proj))
-    do i=1,n_proj
-       read(nfwannier,*) centre(1:3,i), lang(i), mr(i), irf(i)
-       read(nfwannier,*) zaxis(1:3,i), xaxis(1:3,i), zona(i)
-    end do
+
+    if ( noncol ) then
+       allocate( spn_index(n_proj) )
+       allocate( spn_quant_dir(3,n_proj) )
+    endif
+
+    if ( noncol ) then
+       do i=1,n_proj
+          read(nfwannier,*) centre(1:3,i), lang(i), mr(i), irf(i)
+          read(nfwannier,*) zaxis(1:3,i), xaxis(1:3,i), zona(i)
+!
+          read(nfwannier,*) spn_index(i), spn_quant_dir(1:3,i)
+       end do
+    else
+       do i=1,n_proj
+          read(nfwannier,*) centre(1:3,i), lang(i), mr(i), irf(i)
+          read(nfwannier,*) zaxis(1:3,i), xaxis(1:3,i), zona(i)
+       end do
+    endif
+
     read(nfwannier,*)
 
     read(nfwannier,*)
@@ -183,7 +215,7 @@ contains
 
     close(nfwannier)
 
-! == KT add == 2015/02/23
+! == KT add == 2015/02/23 & 09/02
 100 continue
     if ( npes > 1 ) then
        call mpi_bcast( calc_only_A, 1, mpi_logical, 0, mpi_comm_group, ierr )
@@ -214,6 +246,17 @@ contains
        call mpi_bcast( zona,     n_proj, mpi_double_precision, 0, &
             &          mpi_comm_group, ierr )
 
+       if ( noncol ) then
+          if ( mype /= 0 ) then
+             allocate( spn_index(n_proj) )
+             allocate( spn_quant_dir(3,n_proj) )
+          endif
+          call mpi_bcast( spn_index, n_proj, mpi_integer, 0, &
+               &          mpi_comm_group, ierr )
+          call mpi_bcast( spn_quant_dir, 3*n_proj, mpi_double_precision, 0, &
+               &          mpi_comm_group, ierr )
+       endif
+
        call mpi_bcast( lang, n_proj, mpi_integer, 0, mpi_comm_group, ierr )
        call mpi_bcast( mr,   n_proj, mpi_integer, 0, mpi_comm_group, ierr )
        call mpi_bcast( irf,  n_proj, mpi_integer, 0, mpi_comm_group, ierr )
@@ -231,7 +274,7 @@ contains
        endif
        call mpi_bcast( exclude_bands, n_exclude, mpi_integer, 0, mpi_comm_group, ierr )
     endif
-! ============= 2015/02/23
+! ============= 2015/02/23 & 09/02
 
     num_bands = nb_wan90 - n_exclude
     allocate(ib_inc(num_bands))
@@ -469,7 +512,10 @@ contains
     end subroutine gather_matrix
 
     subroutine print_mat
-      integer :: ik ,m, n, ik_start, iktmp
+      integer :: ik ,m, n, ik_start, iktmp, is
+      real(kind=DP) :: theta, phi
+      complex(kind=CMPLDP) :: ztmp, z1
+      complex(kind=CMPLDP), allocatable :: spn_weight(:,:)
 
       if ( mype /= 0 ) return
 
@@ -487,15 +533,37 @@ contains
       write(nfwannier,*) num_bands, kv3/nspin, n_proj
 
       if ( noncol ) then
-         do ik = 1, kv3, nspin
-            iktmp = ( ik -1 )/nspin +1
+         allocate( spn_weight(2,n_proj) ); spn_weight = 0.0d0
+
+         Do n=1, n_proj
+            theta = acos( spn_quant_dir(3,n) )
+            phi = atan2( spn_quant_dir(2,n), spn_quant_dir(1,n) )
+!            
+            if ( spn_index(n) == 1 ) then
+               spn_weight(1,n) = exp( zi *phi /2.0d0 ) *cos( theta /2.0d0 )
+               spn_weight(2,n) = -exp( zi *phi /2.0d0 ) *sin( theta /2.0d0 )
+            else
+               spn_weight(1,n) = exp( -zi *phi /2.0d0 ) *sin( theta /2.0d0 )
+               spn_weight(2,n) = exp( -zi *phi /2.0d0 ) *cos( theta /2.0d0 )
+            endif
+         End do
+
+         do ik = 1, kv3, ndim_spinor
+            iktmp = ( ik -1 )/ndim_spinor +1
             do n=1,n_proj
                do m=1,num_bands
+                  ztmp = 0.0d0
+                  Do is=1, ndim_spinor
+                     z1 = dcmplx( a_mat(m,n,ik+is-1,1), a_mat(m,n,ik+is-1,2 ) )
+                     ztmp = ztmp +spn_weight(is,n) *z1
+                  End do
+
                   write(nfwannier,'(3(1x,i5),2(1x,f18.12))') m, n, iktmp, &
-                       &         a_mat(m,n,ik,1:2) +a_mat(m,n,ik+1,1:2)
+                       &         real(ztmp), aimag(ztmp)
                end do
             end do
          end do
+         deallocate( spn_weight )
 
       else
          ik_start = 1
@@ -1306,4 +1374,345 @@ contains
   end subroutine deompose_sphr_into_lm
 ! =========== 2015/02/23
 
+! ==== KT_add === 2015/09/04
+  subroutine m_Wan90_gen_mat_spn
+    integer :: npauli = 3
+
+    complex(kind=CMPLDP), allocatable :: spn_mat(:,:,:,:) ! d(neg,neg,kv3/nspin,3)
+
+    allocate( spn_mat( npauli, num_bands, num_bands, kv3/ndim_spinor) ); spn_mat = 0.d0
+
+    call contrib_softpart
+    if ( sw_use_hardpart_wan90 == ON ) call contrib_hardpart
+
+    call gather_matrix
+    call print_mat
+
+    deallocate( spn_mat )
+
+  contains
+
+    subroutine contrib_softpart
+      integer :: ik0, ik1, ik2, iktmp, is1, is2
+      integer :: ig
+      integer :: m, mi, n, ni, ib1, ib2
+      complex(kind=CMPLDP) :: zsum
+
+      real(kind=DP), allocatable :: wk_zaj(:,:)
+      real(kind=DP) :: c1, c2
+      complex(kind=CMPLDP) :: z1, z2
+
+      allocate( wk_zaj( kg1, kimg ) ); wk_zaj = 0.0d0
+
+      do ik0=1, kv3, ndim_spinor
+         if ( map_k(ik0) /= myrank_k ) cycle
+
+         iktmp = ( ik0 -1 )/ndim_spinor +1
+
+         Do is2=1, ndim_spinor
+            ik2 = ik0 +is2 -1
+
+            Do n=1, num_bands
+               ni = ib_inc(n)
+               ib2 = neordr(ni,ik2)
+
+               if ( map_e(ib2) == myrank_e ) then
+                  wk_zaj(1:iba(ik2),1:kimg) = zaj_l(1:iba(ik2),map_z(ib2),ik2,1:kimg)
+               endif
+               call mpi_bcast( wk_zaj, kg1*kimg, mpi_double_precision, map_e(ib2), &
+                    &          mpi_k_world(myrank_k), ierr )
+
+               Do is1=1, ndim_spinor
+                  ik1 = ik0 +is1 -1
+               
+                  do m=1,num_bands
+                     mi = ib_inc(m)
+                     ib1 = neordr(mi,ik1)
+
+                     if ( map_e(ib1) /= myrank_e ) cycle
+
+                     zsum = 0.0d0
+
+                     if ( kimg == 1 ) then
+                        Do ig=1, iba(ik0)
+                           c1 = zaj_l(ig,map_z(ib1),ik1,1)
+                           c2 = c1 *wk_zaj(ig,1)
+                           zsum = zsum +c2
+                        End do
+                     else
+                        Do ig=1, iba(ik0)
+                           z1 = dcmplx( zaj_l(ig,map_z(ib1),ik1,1), &
+                                &       zaj_l(ig,map_z(ib1),ik1,2) )
+                           z2 = conjg(z1) *dcmplx( wk_zaj(ig,1), wk_zaj(ig,2) )
+                           zsum = zsum +z2
+                        End Do
+                     endif
+!
+                     if ( is1 == 1 .and. is2 == 1 ) then
+                        spn_mat(3,m,n,iktmp) = spn_mat(3,m,n,iktmp) +zsum
+                     else if ( is1 == 1 .and. is2 == 2 ) then
+                        spn_mat(1,m,n,iktmp) = spn_mat(1,m,n,iktmp) +zsum
+                        spn_mat(2,m,n,iktmp) = spn_mat(2,m,n,iktmp) -zsum *zi
+                     else if ( is1 == 2 .and. is2 == 1 ) then
+                        spn_mat(1,m,n,iktmp) = spn_mat(1,m,n,iktmp) +zsum
+                        spn_mat(2,m,n,iktmp) = spn_mat(2,m,n,iktmp) +zsum *zi
+                     else if ( is1 == 2 .and. is2 == 2 ) then
+                        spn_mat(3,m,n,iktmp) = spn_mat(3,m,n,iktmp) -zsum
+                     endif
+
+                  end do
+               end Do
+            end do
+         end do
+      end do
+
+    end subroutine contrib_softpart
+
+    subroutine contrib_hardpart
+      integer :: ik0, ik1, ik2, iktmp, is1, is2
+      integer :: mi, m, ni, n, ib1, ib2
+      integer :: ia, it, mdvdb, lmt1, lmt2, il1, il2, it1, it2
+      integer :: lmta1, lmta2
+      complex(kind=CMPLDP) :: zsum, wf1, wf2
+      complex(kind=CMPLDP), allocatable :: wk_fsri(:)
+
+      allocate( wk_fsri(nlmta) ); wk_fsri = 0.0d0
+
+! -- start
+      Do ik0=1, kv3, ndim_spinor
+         if ( map_k(ik0) /= myrank_k ) cycle
+
+         iktmp = ( ik0 -1 )/ndim_spinor +1
+
+         Do is2=1, ndim_spinor
+            ik2 = ik0 +is2 -1
+
+            do n=1,num_bands
+               ni = ib_inc(n)
+               ib2 = neordr(ni,ik2)
+
+               if ( map_e(ib2) == mype ) then
+                  wk_fsri(:) = dcmplx( fsr_l( map_z(ib2),:,ik2 ), &
+                       &               fsi_l( map_z(ib2),:,ik2 ) )
+               endif
+               call mpi_bcast( wk_fsri, 2*nlmta, mpi_double_precision, &
+                    &          map_e(ib2), mpi_k_world(myrank_k), ierr )
+
+               Do is1=1, ndim_spinor
+                  ik1 = ik0 +is1 -1
+
+                  do m=1,num_bands
+                     mi = ib_inc(m)
+                     ib1 = neordr(mi,ik1)
+
+                     if ( map_e(ib1) /= myrank_e ) cycle
+                     
+                     zsum = 0.d0
+
+                     Do ia=1, natm
+                        it = ityp(ia)
+                        mdvdb = m_PP_include_vanderbilt_pot(it)
+                        if ( mdvdb == SKIP ) cycle
+
+                        Do lmt1=1, ilmt(it)
+                           il1 = ltp(lmt1,it); it1 = taup(lmt1,it)
+                           lmta1 = lmta( lmt1,ia )
+                           wf1 = dcmplx( fsr_l( map_z(ib1), lmta1, ik1 ), &
+                                &        fsi_l( map_z(ib1), lmta1, ik1 ) )
+
+                           Do lmt2=1, ilmt(it)
+                              il2 = ltp(lmt2,it); it2 = taup(lmt2,it)
+                              lmta2 = lmta( lmt2,ia )
+                              wf2 = wk_fsri(lmta2)
+
+                              zsum = zsum +conjg(wf1) * wf2 *q(lmt1,lmt2,it)
+                           End do
+                        End Do
+                     End Do
+
+                     if ( is1 == 1 .and. is2 == 1 ) then
+                        spn_mat(3,m,n,iktmp) = spn_mat(3,m,n,iktmp) +zsum
+                     else if ( is1 == 1 .and. is2 == 2 ) then
+                        spn_mat(1,m,n,iktmp) = spn_mat(1,m,n,iktmp) +zsum
+                        spn_mat(2,m,n,iktmp) = spn_mat(2,m,n,iktmp) -zsum *zi
+                     else if ( is1 == 2 .and. is2 == 1 ) then
+                        spn_mat(1,m,n,iktmp) = spn_mat(1,m,n,iktmp) +zsum
+                        spn_mat(2,m,n,iktmp) = spn_mat(2,m,n,iktmp) +zsum *zi
+                     else if ( is1 == 2 .and. is2 == 2 ) then
+                        spn_mat(3,m,n,iktmp) = spn_mat(3,m,n,iktmp) -zsum
+                     endif
+
+                  End do
+               End Do
+            End DO
+         End Do
+      End Do
+
+      deallocate( wk_fsri )
+
+    end subroutine contrib_hardpart
+
+    subroutine gather_matrix
+      complex(kind=CMPLDP), allocatable :: spn_mat_mpi(:,:,:,:) ! d(neg,neg,kv3/nspin,3)
+
+      if (npes>1) then
+         allocate(spn_mat_mpi(npauli,num_bands,num_bands,kv3/nspin) )
+         spn_mat_mpi = spn_mat
+         spn_mat = 0.d0
+         call mpi_allreduce( spn_mat_mpi, spn_mat, &
+              &              num_bands*num_bands*kv3/nspin*npauli*2, &
+              &              mpi_double_precision, mpi_sum, mpi_comm_group,ierr )
+         deallocate(spn_mat_mpi)
+      end if
+
+      call mpi_barrier( mpi_comm_world, ierr )
+
+    end subroutine gather_matrix
+
+    subroutine print_mat
+      integer :: ik, mi, ni, ispn, num, nsize
+      complex(kind=CMPLDP), allocatable :: work(:,:)
+
+      if ( mype /= 0 ) return
+
+      open(nfwannier,file=trim(wan90_seedname)//".spn",form="formatted")
+
+#ifdef WAN90_SPN_FORMATTED
+      write(nfwannier,*) 'Generated by PHASE noncol'
+      write(nfwannier,*) num_bands, kv3/nspin
+
+      do ik=1, kv3/nspin
+         Do mi=1, num_bands
+            Do ni=1, mi
+               Do ispn=1, 3
+                  write(nfwannier,'(2f16.12)') spn_mat(ispn,mi,ni,ik)
+               End Do
+            End Do
+         End do
+      end do
+#else
+      write(nfwannier) 'Generated by PHASE noncol'
+      write(nfwannier) num_bands, kv3/nspin
+      
+      nsize = num_bands *( num_bands +1 ) /2
+      allocate( work(3, nsize) ); work = 0.0d0
+
+      Do ik=1, kv3/nspin
+         num = 0
+         Do mi=1, num_bands
+            Do ni=1, mi
+               num = num +1
+               work(:,num) = spn_mat(:,mi,ni,ik)
+            End Do
+         End Do
+         write(nfwannier) ( (work(ispn,mi),ispn=1,3), mi=1, nsize )
+      End do
+      deallocate( work  )
+#endif
+
+      close(nfwannier)
+
+    end subroutine print_mat
+
+  end subroutine m_Wan90_gen_mat_spn
+! ================== 2015/09/04
+
+! ==== KT_add ==== 2015/09/14
+  subroutine m_Wan90_set_igf_wan90
+    integer :: id, i
+    integer :: igf1, igf2, igf3
+    integer :: nx, ny, nz
+    integer :: nxmin, nymin, nzmin, nxmax, nymax, nzmax
+!
+    nxmin = minval( nncell(1,:,:) );   nxmax = maxval( nncell(1,:,:) );  
+    nymin = minval( nncell(2,:,:) );   nymax = maxval( nncell(2,:,:) );  
+    nzmin = minval( nncell(3,:,:) );   nzmax = maxval( nncell(3,:,:) );  
+
+    allocate( igf_wan90( kg, nxmin:nxmax, nymin:nymax, nzmin:nzmax ) )
+    igf_wan90 = 0
+!
+    id = fft_box_size_WF(1,0)
+
+    Do nx=nxmin, nxmax
+       Do ny=nymin, nymax
+          Do nz=nzmin, nzmax
+
+             do i = 1, kg
+                igf1 = ngabc(i,1) + 1 -nx
+                igf2 = ngabc(i,2) + 1 -ny
+                igf3 = ngabc(i,3) + 1 -nz
+!
+                if ( igf1 <= 0 ) igf1 = igf1 + fft_box_size_WF(1,1)
+                if ( igf2 <= 0 ) igf2 = igf2 + fft_box_size_WF(2,1)
+                if ( igf3 <= 0 ) igf3 = igf3 + fft_box_size_WF(3,1)
+
+                igf_wan90(i,nx,ny,nz) = igf1 + (igf2-1)*id &
+                     &                 + (igf3-1)*id*fft_box_size_WF(2,0)
+             enddo
+          End Do
+       End Do
+    End Do
+  end subroutine m_Wan90_set_igf_wan90
+
+  subroutine m_Wan90_map_WF_on_fftmesh(k1,k2,ik,psi_l,bfft,igf_in)
+    integer, intent(in) :: k1,k2,ik
+    integer, intent(in) :: igf_in(kg1)
+    real(kind=DP), intent(in),dimension(kg1,1,k1:k2,kimg) :: psi_l
+    real(kind=DP), intent(inout), dimension(nfft) :: bfft
+
+    integer :: i,i1,ri, j, i2, ii
+
+    bfft = 0.d0
+    if(k_symmetry(ik) == GAMMA) then
+       if(kimg == 1) then
+          i1 = igf_in(1)
+          bfft(i1) = psi_l(1,1,ik,1)
+#ifdef NEC_TUNE_SMP
+!CDIR NODEP
+#endif
+          do ii = 2, iba(ik)
+!!$             i = nbase(ii,1)
+             i = nbase(ii,ik)
+             i1 = igf_in(i)
+             bfft(i1) = psi_l(ii,1,ik,1)
+             j = nbase_gamma(ii,2)
+             i2 = igf_in(j)
+             bfft(i2) =   psi_l(ii,1,ik,1)
+          end do
+       else if(kimg == 2) then
+          i1 = 2*igf_in(1) - 1
+          bfft(i1)   = psi_l(1,1,ik,1)
+          bfft(i1+1) = psi_l(1,1,ik,2)
+#ifdef NEC_TUNE_SMP
+!CDIR NODEP
+#endif
+          do ii = 2, iba(ik)
+!!$             i = nbase(ii,1)
+             i = nbase(ii,ik)
+             i1 = 2*igf_in(i)-1
+             bfft(i1  ) = psi_l(ii,1,ik,1)
+             bfft(i1+1) = psi_l(ii,1,ik,2)
+             j = nbase_gamma(ii,2)
+             i2 = 2*igf_in(j)-1
+             bfft(i2  ) = psi_l(ii,1,ik,1)
+             bfft(i2+1) = -psi_l(ii,1,ik,2)
+          end do
+       end if
+    else
+#ifdef NEC_TUNE_SMP
+!CDIR NOLOOPCHG
+#endif
+       do ri = 1, kimg
+#ifdef NEC_TUNE_SMP
+!CDIR NODEP
+#endif
+          do i = 1, iba(ik)
+             i1 = kimg*igf_in(nbase(i,ik)) + (ri - kimg)
+             bfft(i1) = psi_l(i,1,ik,ri)   ! MPI
+          end do
+       end do
+    end if
+  end subroutine m_Wan90_map_WF_on_fftmesh
+! ===== 2015/09/14
+
 end module m_Wannier90
diff -uprN phase0_2015.01/src_phase_3d/m_XC_Potential.F90 phase0_2015.01.01/src_phase_3d/m_XC_Potential.F90
--- phase0_2015.01/src_phase_3d/m_XC_Potential.F90	2015-08-05 14:58:03.000000000 +0900
+++ phase0_2015.01.01/src_phase_3d/m_XC_Potential.F90	2016-07-12 12:51:53.000000000 +0900
@@ -1,6 +1,6 @@
 !=======================================================================
 !
-!  PROGRAM  PHASE/0 2015.01 ($Rev: 443 $)
+!  PROGRAM  PHASE/0 2015.01 ($Rev: 493 $)
 !
 !  MODULE: m_XC_Potential
 !
@@ -69,7 +69,7 @@
 
 #define XC_PACK_FFT
 module m_XC_Potential
-! $Id: m_XC_Potential.F90 443 2015-08-03 13:03:48Z ktagami $
+! $Id: m_XC_Potential.F90 493 2016-06-01 04:57:01Z ktagami $
 !
 !  Upgraded on 23rd Aug. 2006 by T. Yamasaki
 !    Differentials of the charge density function in GGA calculation are
@@ -89,7 +89,7 @@ module m_XC_Potential
   use m_Crystal_Structure,    only : rltv,univol
   use m_Ionic_System,         only : ntyp,natm,iwei,ityp,pos,zfm3_l  
   use m_FFT,                  only : fft_box_size_CD, fft_box_size_CD_c, nfftp &
-       &                           , m_FFT_check_of_negative_CD
+       &                           , m_FFT_check_of_negative_CD, fft_box_size_CD_nonpara
 #ifdef _MPIFFT_
   use m_FFT,                  only : m_FFT_set_cdata  &
        &                           , lx,ly,lz,ly_d,lz_d, ny_d,nz_d
diff -uprN phase0_2015.01/src_phase_3d/m_epc_potential.F90 phase0_2015.01.01/src_phase_3d/m_epc_potential.F90
--- phase0_2015.01/src_phase_3d/m_epc_potential.F90	2015-08-05 14:58:03.000000000 +0900
+++ phase0_2015.01.01/src_phase_3d/m_epc_potential.F90	2016-07-12 12:51:53.000000000 +0900
@@ -1,6 +1,6 @@
 !=======================================================================
 !
-!  PROGRAM  PHASE/0 2015.01 ($Rev: 440 $)
+!  PROGRAM  PHASE/0 2015.01 ($Rev: 472 $)
 !
 !  MODULE: m_epc_potential
 !
@@ -24,7 +24,7 @@
 !
 !
 module m_epc_potential
-! $Id: m_epc_potential.F90 440 2015-08-03 07:18:01Z ktagami $
+! $Id: m_epc_potential.F90 472 2015-11-28 09:01:17Z ktagami $
   use m_Electronic_Structure, only : vlhxc_l
 !$$#ifdef PARA3D
   use m_PlaneWaveBasisSet,    only : ngabc,gr_l,igfp_l,igfp_nonpara,kg,kgp,kgp_reduced,ylm_l
@@ -56,10 +56,19 @@ module m_epc_potential
        &                           , nis_sfftp,nie_sfftp,nel_sfftp,idisp_sfftp,np_sfftp,mp_sfftp &
        &                           , npes_cdfft
 
+! === Postitron SCF === 2015/11/28
+  use m_Control_Parameters,  only : positron_method
+  use m_Const_Parameters,   only : positron_CONV
+  use m_Charge_Density,     only : chgq_l
+  use m_PseudoPotential,      only : psc_l
+! ===================== 2015/11/28
+
   implicit none
 
   include 'mpif.h'
 
+  real(kind=DP), pointer :: vlhxc_p( :,:,: )
+
   real(kind=DP), allocatable, dimension(:,:,:) :: tchgq_l  ! d(ista_kngp:iend_kngp,kimg)
   real(kind=DP), allocatable, dimension(:,:) ::   tchgr_l  ! d(ista_sfftph:iend_sfftph,nspin)
 !!$  real(kind=DP), allocatable, dimension(:) ::     p_potential_l ! d(ista_kngp:iend_kngp)
@@ -98,6 +107,16 @@ contains
     integer                  :: np0, j0, i2, j2, k2, n0
 #endif
 
+    if ( allocated(tchgq_l) ) deallocate( tchgq_l )
+    if ( allocated(tchgr_l) ) deallocate( tchgr_l )
+    if ( allocated( grad_tchgr_l ) ) deallocate( grad_tchgr_l )
+    if ( allocated( chden_l ) ) deallocate( chden_l )
+    if ( allocated( inx ) ) deallocate( inx )
+    if ( allocated( jnx ) ) deallocate( jnx )
+    if ( allocated( knx ) ) deallocate( knx )
+    if ( allocated( vepc_l ) ) deallocate( vepc_l )
+    if ( allocated( f2or1) ) deallocate( f2or1 )
+
     allocate(tchgq_l(ista_kngp:iend_kngp,kimg,nspin));  tchgq_l = 0.d0
     allocate(tchgr_l(ista_sfftph:iend_sfftph,nspin));     tchgr_l = 0.d0
     if(sw_gga_p == ON) then
@@ -227,6 +246,7 @@ contains
 !!$       tchgq_l(:,:,1) = tchgq_l(:,:,2)
 !!$    end if
 
+    tchgq_l = 0.0d0
     do ispin = 1, nspin
        do j = 1, kimg
           do i = ista_kngp, iend_kngp
@@ -673,7 +693,26 @@ contains
     if(npes >= 2) call tstatc0_end(id_sname)
   end subroutine afft_allgatherv
 
-subroutine m_epc_ESlhxc_potential(nfout)
+! ==== POSITRON SCF ===== 2015/11/28
+  subroutine m_epc_alloc_vlhxc_p
+    if ( positron_method == positron_CONV ) then
+       vlhxc_p => vlhxc_l
+    else
+       if ( .not. associated( vlhxc_p ) ) then
+          allocate( vlhxc_p(ista_kngp:iend_kngp,kimg,nspin) )
+          vlhxc_p = 0.0d0
+       endif
+    endif
+  end subroutine m_epc_alloc_vlhxc_p
+  
+  subroutine m_epc_dealloc_vlhxc_p
+    if ( positron_method /= positron_CONV ) then
+       if ( associated( vlhxc_p ) ) deallocate( vlhxc_p )
+    endif
+  end subroutine m_epc_dealloc_vlhxc_p
+! ======================= 2015/11/28
+
+  subroutine m_epc_ESlhxc_potential(nfout)
     integer, intent(in)       :: nfout
 !    real(kind=DP), intent(in) :: chg(ista_kngp:iend_kngp,kimg,nspin)
 !    real(kind=DP), intent(in) :: vxc(ista_kngp:iend_kngp,kimg,nspin)
@@ -684,26 +723,26 @@ subroutine m_epc_ESlhxc_potential(nfout)
 
 !    call tstatc0_begin('m_ESlhxc_potential ',id_sname)
 
-    vlhxc_l = 0.d0
+    vlhxc_p = 0.d0
     ist = ista_kngp
     if(ist == 1) ist = 2
     do is = 1, nspin
        do ik = 1, kimg
-          if(mype==0) vlhxc_l(1,ik,is)   = vepc_l(1,ik,is)
+          if(mype==0) vlhxc_p(1,ik,is)   = vepc_l(1,ik,is)
           if(nspin == 1) then
              do i = ist, iend_kngp  !for mpi
-                vlhxc_l(i,ik,is) = vepc_l(i,ik,is) -PAI4*tchgq_l(i,ik,is)&
+                vlhxc_p(i,ik,is) = vepc_l(i,ik,is) -PAI4*tchgq_l(i,ik,is)&
                      &/gr_l(i)**2
              end do
           else if(nspin == 2) then
              do i = ist, iend_kngp  !for mpi
-                vlhxc_l(i,ik,is) = vepc_l(i,ik,is) -PAI4*(tchgq_l(i,ik,UP)&
+                vlhxc_p(i,ik,is) = vepc_l(i,ik,is) -PAI4*(tchgq_l(i,ik,UP)&
                      &+tchgq_l(i,ik,DOWN))/gr_l(i)**2
              end do
           endif
 !          do it    = 1,ntyp
 !             do i = ista_kngp, iend_kngp  !for mpi
-!                vlhxc_l(i,ik,is) !                     & = vlhxc_l(i,ik,is)
+!                vlhxc_p(i,ik,is) !                     & = vlhxc_p(i,ik,is)
           !+psc_l(i,it)*zfm3_l(i,it,ik)
 !             end do
 !          end do
@@ -711,17 +750,57 @@ subroutine m_epc_ESlhxc_potential(nfout)
     end do
 !    call tstatc0_end(id_sname)
     if(ipripositron >= 3) then
-       write(nfout,'(" -- vepc_l tchgq_l vlhxc_l << m_epc_ESlhxc_potential>&
+       write(nfout,'(" -- vepc_l tchgq_l vlhxc_p << m_epc_ESlhxc_potential>&
             &>")')
        do is = 1, nspin
           if(nspin == 2) write(nfout,'("  ispin = ",i5)') is
           do ik = 1, kimg
              do i = ist, min(ist+20,iend_kngp)
                 write(nfout,'(3f10.4)') vepc_l(i,ik,is),tchgq_l(i,ik,is)&
-                     &,vlhxc_l(i,ik,is)
+                     &,vlhxc_p(i,ik,is)
              end do
           end do
        end do
     end if
   end subroutine m_epc_ESlhxc_potential
+
+! ==== POSITRON SCF ===== 2015/11/28
+  subroutine m_epc_ESlhxc_potential_mod(nfout)    ! for scf
+    integer, intent(in)       :: nfout
+
+    integer :: is,ik,i,it
+    integer :: ist
+    integer :: id_sname = -1
+
+    vlhxc_p = 0.d0
+    ist = ista_kngp
+
+    if(ist == 1) ist = 2
+    do is = 1, nspin
+       do ik = 1, kimg
+          if(mype==0) vlhxc_p(1,ik,is)   = vepc_l(1,ik,is)
+          if(nspin == 1) then
+             do i = ist, iend_kngp  !for mpi
+                vlhxc_p(i,ik,is) = vepc_l(i,ik,is) &
+                     &            -PAI4 *chgq_l(i,ik,is) /gr_l(i)**2
+             end do
+          else if(nspin == 2) then
+             do i = ist, iend_kngp  !for mpi
+                vlhxc_p(i,ik,is) = vepc_l(i,ik,is) &
+                     &           -PAI4*( chgq_l(i,ik,UP)&
+                     &                  +chgq_l(i,ik,DOWN) )/gr_l(i)**2
+             end do
+          endif
+          do it = 1,ntyp
+             do i = ista_kngp, iend_kngp  !for mpi
+                vlhxc_p(i,ik,is) = vlhxc_p(i,ik,is) &
+                     &            -psc_l(i,it) *zfm3_l(i,it,ik)
+             end do
+          end do
+       end do
+    end do
+
+  end subroutine m_epc_ESlhxc_potential_mod
+! ============== 2015/11/28
+
 end module m_epc_potential
diff -uprN phase0_2015.01/src_phase_3d/m_vdWDF.F90 phase0_2015.01.01/src_phase_3d/m_vdWDF.F90
--- phase0_2015.01/src_phase_3d/m_vdWDF.F90	2015-08-05 14:58:03.000000000 +0900
+++ phase0_2015.01.01/src_phase_3d/m_vdWDF.F90	2016-07-12 12:51:53.000000000 +0900
@@ -1,6 +1,6 @@
 !=======================================================================
 !
-!  PROGRAM  PHASE/0 2015.01 (rev.375)
+!  PROGRAM  PHASE/0 2014.01 (rev.375)
 !
 !  "First-principles Electronic Structure Calculation Program"
 !
@@ -29,7 +29,7 @@
 !  Consortium. 
 !   The activity of development of this program set has been supervised by Takahisa Ohno.
 !
-
+!
 !********************************** Note ******************************************
 ! This program calculates the non-local correlation energy (Ecnl) and 
 ! the local correlation energy (EcLDA) as the post calculation by utilizing
@@ -47,7 +47,23 @@
 ! Periodic systems are assumed.
 ! The atomic units (Hartree) are used.
 !
+! ======= modification =====
+!
+!  2016/06/06 : by asms
+!         The FFT normalization parameters are changed in order to be consistent
+!         with the other subroutines.
+!
+!         rho(G) = 1/V *Int rho(r) exp(-iGr);   rho(r) = sum_{G} rho(G) exp(iGr)
+!        
+!         In the dicretized grids, rho(G) = FFT[rho(r)] /(na*nb*nc);
+!                                  rho(r) = FFT[rho(G)]
+!
+!         By this, the nonlocal vdW energy is written as
+!             Encl = V**2 /2 sum_{ij} sum_{G} conjg(theta(i,G))*phi(i,j,G)*theta(i,G)
+!         and 
+!             phi(i,j,G) = 1/V Int phi(i,j,r) exp(-iGr)
 !
+! =========================
 !
 ! ++++++++ List of subroutines ++++++
 ! All subroutine files listed below are included in this file.
@@ -110,107 +126,106 @@
 !**********************************************************************************
 #ifndef DISABLE_VDWDF
 module progress_bar
-   implicit none
-   logical,private :: printable = .false.
-   integer,private :: iend=10
-   integer,private :: uni=6
-   integer,private :: j=0
-   logical, dimension(0:9),private :: done
-
-   contains
-
-   subroutine set_printable(pri)
-      logical, intent(in) :: pri
-      printable = pri
-   end subroutine set_printable
-
-   subroutine reset_progress()
-      j = 0
-      done = .false.
-      if(printable) write(unit=uni,fmt="(a10)") "0%    100%"
-   end subroutine reset_progress
-
-   subroutine set_unit(un)
-      integer, intent(in) :: un
-      uni = un
-   end subroutine set_unit
-
-   subroutine set_end(en)
-      integer, intent(in) :: en
-      iend = en
-   end subroutine set_end
-
-   subroutine progress()
-     use m_Const_Parameters, only : DP
-     implicit none
-     integer(kind=4)::k
-     character(len=17)::bar="???% |          |"
-     integer :: jj
-     real(kind=DP) :: jjj
-     j = j+1
-     if(j.gt.iend) j=iend
-     jj = int(10*(dble(j)/dble(iend))*0.999999d0)
-     if(.not.done(jj).and.j<=iend) then
-        if(printable) write(unit=uni,fmt="(a1,$)") '*'
-        call flush(uni)
-        done(jj) = .true.
-     endif
-     if(j==iend.and.printable) write(unit=uni,fmt=*)
-!     jjj = (dble(j)/dble(iend))
-!     write(unit=bar(1:3),fmt="(i3)") int(100*jjj)
-!     do k=1, jj
-!       bar(6+k:6+k)="*"
-!     enddo
-!     ! print the progress bar.
-!     write(unit=uni,fmt="(a1,a17,$)") char(13), bar
-!!     write(unit=uni,fmt="(a1,a17)") char(13), bar
-!     if (j/=iend) then
-!       flush(unit=6)
-!     else
-!       write(unit=6,fmt=*)
-!       do k=1,jj
-!          bar(6+k:6+k) = ""
-!       enddo
-!     endif
-     return
-   end subroutine progress
+  implicit none
+  logical,private :: printable = .false.
+  integer,private :: iend=10
+  integer,private :: uni=6
+  integer,private :: j=0
+  logical, dimension(0:9),private :: done
+
+contains
+
+  subroutine set_printable(pri)
+    logical, intent(in) :: pri
+    printable = pri
+  end subroutine set_printable
+
+  subroutine reset_progress()
+    j = 0
+    done = .false.
+    if(printable) write(unit=uni,fmt="(a10)") "0%    100%"
+  end subroutine reset_progress
+
+  subroutine set_unit(un)
+    integer, intent(in) :: un
+    uni = un
+  end subroutine set_unit
+
+  subroutine set_end(en)
+    integer, intent(in) :: en
+    iend = en
+  end subroutine set_end
+
+  subroutine progress()
+    use m_Const_Parameters, only : DP
+    implicit none
+    integer(kind=4)::k
+    character(len=17)::bar="???% |          |"
+    integer :: jj
+    real(kind=DP) :: jjj
+    j = j+1
+    if(j.gt.iend) j=iend
+    jj = int(10*(dble(j)/dble(iend))*0.999999d0)
+    if(.not.done(jj).and.j<=iend) then
+       if(printable) write(unit=uni,fmt="(a1,$)") '*'
+       call flush(uni)
+       done(jj) = .true.
+    endif
+    if(j==iend.and.printable) write(unit=uni,fmt=*)
+    !     jjj = (dble(j)/dble(iend))
+    !     write(unit=bar(1:3),fmt="(i3)") int(100*jjj)
+    !     do k=1, jj
+    !       bar(6+k:6+k)="*"
+    !     enddo
+    !     ! print the progress bar.
+    !     write(unit=uni,fmt="(a1,a17,$)") char(13), bar
+    !!     write(unit=uni,fmt="(a1,a17)") char(13), bar
+    !     if (j/=iend) then
+    !       flush(unit=6)
+    !     else
+    !       write(unit=6,fmt=*)
+    !       do k=1,jj
+    !          bar(6+k:6+k) = ""
+    !       enddo
+    !     endif
+    return
+  end subroutine progress
 
 end module progress_bar
 
 module m_vdWDF
-
-  use m_Const_Parameters, only : DP, ON, PAI, FMAXVALLEN, LOWER
-  use m_Control_Parameters, only : printable,nspin,eval_kernel_by_interpolation,na_gl   &
-  &                              , a1,a2,dq_vdw,lambda,q0cut,ds,ndel,nphiD,nr12,nk,maxk &
-  &                              , r12max,oneshot
+  use m_Const_Parameters, only : DP, ON, PAI, FMAXVALLEN, LOWER, CMPLDP
+  use m_Control_Parameters, only : printable,nspin,eval_kernel_by_interpolation, &
+       &                           na_gl,a1,a2,dq_vdw,lambda,q0cut,ds,ndel,nphiD, &
+       &                           nr12,nk,maxk, r12max,oneshot, ipri, &
+       &                           sw_save_memory_vdw, kimg, iprixc
   use m_Files, only : nfout
   use m_Charge_Density, only : m_CD_get_rspace_charge
 
   use m_FFT, only : fft_box_size_CD,fft_box_size_CD_nonpara
-  use m_Crystal_Structure, only : altv,univol
-
-  use m_Parallelization, only : npes,mype,mpi_comm_group
+  use m_Crystal_Structure, only : altv,univol, rltv
 
+  use m_Parallelization, only : npes,mype,mpi_comm_group,m_Parallel_init_mpi_nq,  &
+       &                        ista_nq,iend_nq,np_nq,mp_nq,is_nq,ie_nq,nel_nq, &
+       &                        map_z_nq
   use m_Timing, only : tstatc0_begin,tstatc0_end
 
   implicit none
-
   include 'mpif.h'
 
   real(kind=DP),parameter :: pi=PAI
 
-! Physical values
+  ! Physical values
   Real(kind=DP) ExGGA,Ecnl,Ecnl_12,Ecnl_12_ab,Ecnl_3,Ecnl_3s,EcLDA
 
-  Integer na,nb,nc,nabc
-  Real(kind=DP) aa(3,3),dv
-  Real(kind=DP), Allocatable :: rho(:,:,:),grad(:,:,:)
-
   real(kind=DP) :: phi0=2.77d0 
- 
+
+  Integer na,nb,nc,nabc
+  Real(kind=DP) :: aa(3,3),dv
+  Real(kind=DP), Allocatable :: rho(:,:,:),grad(:,:,:), cgrad(:,:,:,:)
   real(kind=DP),allocatable,dimension(:) :: phi_ab
 
-! Grid points
+  ! Grid points
 !!!! Spline curves
   Integer nq0
   real(kind=DP) :: qa,qb,q0max,q0min
@@ -218,12 +233,12 @@ module m_vdWDF
 !!!! The table of phidD
   real(kind=DP), allocatable, dimension(:,:) :: phidD
 
-! Internal parameters
+  ! Internal parameters
   Real(kind=DP),parameter :: rhomin=1.d-9
 
-! Real-space and reciprocal-space Functions
+  ! Real-space and reciprocal-space Functions
   Complex*16, Allocatable :: theta_G(:,:,:),     &
-&                            theta_G_ab(:,:,:,:),theta_G_a(:,:,:),theta_G_b(:,:,:)
+       &                            theta_G_ab(:,:,:,:),theta_G_a(:,:,:),theta_G_b(:,:,:)
   Real(kind=DP), Allocatable :: theta_R(:,:,:),dtheta_R(:,:,:,:),ddtheta_R(:,:,:,:)
 
   complex(kind=DP), allocatable, dimension(:,:,:,:) :: ualpha_g,ualpha_r
@@ -239,51 +254,97 @@ module m_vdWDF
   integer, parameter :: FFTW_ESTIMATE = 64
   integer, parameter :: FFTW_FORWARD  = -1
   integer, parameter :: FFTW_BACKWARD = +1
-  contains
+
+  real(kind=DP), allocatable, dimension(:,:,:) :: dFdrho, dFddrho
+  real(kind=DP), allocatable, dimension(:,:,:) :: rkar
+
+  real(kind=DP) :: s_cnl1(3,3), s_cnl2(3,3), ecnl_vdwdf
+  real(kind=DP) :: s_cnl1_pc(3,3), s_cnl2_pc(3,3)
+
+  !  logical :: grad_rho_eq_0 = .true.
+  logical :: grad_rho_eq_0 = .false.
+
+contains
 
   subroutine print_vdw_parameters()
     if(printable)then
-    write(nfout,'(a)')            '-- parameters for the vdW-DF calculations --'
-    write(nfout,'(a,2f15.10,i8)') '   rmax, kmax, nmesh     : ',r12max,maxk,nr12
-    write(nfout,'(a,3f15.10)')    '   dq, lambda, and q0cut : ',dq_vdw,lambda,q0cut
-    write(nfout,'(a,2f15.10,i5)') '   q0min, q0max, nq0     : ',q0min,q0max,nq0
-    write(nfout,'(a,f15.10)' )    '   ds                    : ',ds
-    write(nfout,'(a,i5,2f15.10)') '   na_gl, a1, a2         : ',na_gl,a1,a2
-    if(eval_kernel_by_interpolation)then
-    write(nfout,'(a)')            '   kernel evaluation     : by interpolation'
-    write(nfout,'(a,2i8)')        '   ndel, nphiD           : ',ndel,nphiD
-    else
-    write(nfout,'(a)')            '   kernel evaluation     : direct'
-    endif
+       write(nfout,'(a)')            '-- parameters for the vdW-DF calculations --'
+       write(nfout,'(a,2f15.10,i8)') '   rmax, kmax, nmesh     : ',r12max,maxk,nr12
+       write(nfout,'(a,3f15.10)')    '   dq, lambda, and q0cut : ',dq_vdw,lambda,q0cut
+       write(nfout,'(a,2f15.10,i5)') '   q0min, q0max, nq0     : ',q0min,q0max,nq0
+       write(nfout,'(a,f15.10)' )    '   ds                    : ',ds
+       write(nfout,'(a,i5,2f15.10)') '   na_gl, a1, a2         : ',na_gl,a1,a2
+       if(eval_kernel_by_interpolation)then
+          write(nfout,'(a)')            '   kernel evaluation     : by interpolation'
+          write(nfout,'(a,2i8)')        '   ndel, nphiD           : ',ndel,nphiD
+       else
+          write(nfout,'(a)')            '   kernel evaluation     : direct'
+       endif
     endif
   end subroutine print_vdw_parameters
 
   subroutine initialize_vdwdf_scf(nspin,ispin,na,nb,nc,chgr,grad_rho)
     use progress_bar, only : set_printable
+
     integer, intent(in) :: nspin,ispin,na,nb,nc
     real(kind=DP), dimension(na*nb*nc), intent(in) :: chgr,grad_rho
+
     integer :: i,cix,ciy,ciz,nrxyz
+    integer :: idp, mmp, nlphf, cix2, ciy2, ciz2
     real(kind=DP) :: q
-    call set_printable(printable)
 
+    call set_printable(printable)
     call do_cell_params()
+
+    idp = fft_box_size_CD_nonpara(1,0)
+    mmp = fft_box_size_CD_nonpara(2,0)
+
+    if(kimg == 1) then
+       nlphf = idp/2
+    else
+       nlphf = idp
+    end if
+
     rinplw = 1.d0/(dble(na*nb*nc))
     q0max = q0cut*1.01d0
     q0min = 0.09d0
     nq0 = DINT(dLOG((q0max-q0min)*(lambda-1.d0)/dq_vdw+1.d0)/dLOG(lambda))+1
     maxk = dble(nr12)/r12max
 
+    if(firstcall) call m_Parallel_init_mpi_nq(nfout,ipri,printable,nq0)
     if(firstcall) call print_vdw_parameters()
     call alloc_vdw()
-    do cix = 1,na
-       do ciy = 1,nb
-          do ciz = 1,nc
-             nrxyz=(ciz-1)*nb*na+(ciy-1)*na+cix
-             rho(cix,ciy,ciz) = chgr(nrxyz)
-             grad(cix,ciy,ciz) = grad_rho(nrxyz)
+
+    if ( kimg == 1 ) then
+       do cix = 1,na
+          do ciy = 1,nb
+             do ciz = 1,nc
+                if ( cix > nlphf ) then
+                   cix2 = idp -cix
+                   ciy2 = nb +2 -ciy
+                   ciz2 = nc +2 -ciz
+                   if ( ciy2 > nb ) ciy2 = ciy2 -nb
+                   if ( ciz2 > nc ) ciz2 = ciz2 -nc
+                else
+                   cix2 = cix;  ciy2 = ciy; ciz2 = ciz
+                endif
+                nrxyz=(ciz2-1)*mmp*nlphf +(ciy2-1)*nlphf +cix2
+                rho(cix,ciy,ciz) = chgr(nrxyz)
+                grad(cix,ciy,ciz) = grad_rho(nrxyz)
+             end do
+          end do
+       end do
+    else
+       do cix = 1,na
+          do ciy = 1,nb
+             do ciz = 1,nc
+                nrxyz = (ciz-1)*nb*na +(ciy-1)*na +cix
+                rho(cix,ciy,ciz) = chgr(nrxyz)
+                grad(cix,ciy,ciz) = grad_rho(nrxyz)
+             end do
           end do
        end do
-    end do
+    endif
 
     do i=1,nq0
        q = q0min + dq_vdw*(lambda**DBLE(i-1)-1.d0)/(lambda-1.d0)
@@ -291,7 +352,9 @@ module m_vdWDF
     enddo
     call spline0(nq0,qar,q2ar)
 
-    if(eval_kernel_by_interpolation.and.firstcall) call build_lookup_table(ndel,nphiD,phidD)
+    if (eval_kernel_by_interpolation.and.firstcall) then
+       call build_lookup_table(ndel,nphiD,phidD)
+    endif
     firstcall = .false.
   end subroutine initialize_vdwdf_scf
 
@@ -310,6 +373,7 @@ module m_vdWDF
     q0min = 0.09d0
     nq0 = DINT(dLOG((q0max-q0min)*(lambda-1.d0)/dq_vdw+1.d0)/dLOG(lambda))+1
     maxk = dble(nr12)/r12max
+    if(firstcall) call m_Parallel_init_mpi_nq(nfout,ipri,printable,nq0)
 
     call alloc_vdw()
     call m_CD_get_rspace_charge(nfout,na,nb,nc,rho,is)
@@ -324,9 +388,9 @@ module m_vdWDF
     call spline0(nq0,qar,q2ar)
 
     if(eval_kernel_by_interpolation)then
-      if(printable) write(nfout,'(a)') 'building the lookup table for the kernel function ...'
-      call build_lookup_table(ndel,nphiD,phidD) 
-      if(printable) write(nfout,'(a)') '... done'
+       if(printable) write(nfout,'(a)') 'building the lookup table for the kernel function ...'
+       call build_lookup_table(ndel,nphiD,phidD) 
+       if(printable) write(nfout,'(a)') '... done'
     endif
     if(printable) write(nfout,'(a)') '... done initialization'
 
@@ -341,18 +405,42 @@ module m_vdWDF
        aa(i,1:3) = altv(1:3,i)/dble(fft_box_size_CD(i,1))
     enddo
     dv = aa(1,1)*(aa(2,2)*aa(3,3)-aa(2,3)*aa(3,2)) &
-&      + aa(1,2)*(aa(2,3)*aa(3,1)-aa(2,1)*aa(3,3)) &
-&      + aa(1,3)*(aa(2,1)*aa(3,2)-aa(2,2)*aa(3,1))
+         &      + aa(1,2)*(aa(2,3)*aa(3,1)-aa(2,1)*aa(3,3)) &
+         &      + aa(1,3)*(aa(2,1)*aa(3,2)-aa(2,2)*aa(3,1))
   end subroutine do_cell_params
 
+  function real_index(iq,pe) result(res)
+    integer, intent(in) :: iq
+    integer, intent(in),optional :: pe
+    integer :: res
+    integer :: ii,mpe
+    mpe = mype
+    if(present(pe))then
+       mpe = pe
+    endif
+    if(mpe==0)then
+       res = iq
+       return
+    endif
+    res = 0
+    do ii=0,mpe-1
+       res = res+nel_nq(ii) 
+    enddo
+    res = res+iq
+    return
+  end function real_index
+
   subroutine alloc_vdw()
+    integer :: i,cix,ciy,ciz
+    real(kind=DP) :: Ta,Tb,Tc, vec(3), bb(3,3)
+
     Allocate(rho(na,nb,nc));rho=0.d0
     allocate(grad(na,nb,nc));grad=0.d0
     Allocate(theta_G(-(na/2-1):na/2,-(nb/2-1):nb/2,-(nc/2-1):nc/2))
     Allocate(theta_R(na,nb,nc))
     if(.not.oneshot)then
-       Allocate(dtheta_R(nq0,na,nb,nc))
-       Allocate(ddtheta_R(nq0,na,nb,nc))
+       Allocate(dtheta_R(np_nq,na,nb,nc))
+       Allocate(ddtheta_R(np_nq,na,nb,nc))
     endif
     Allocate(theta_G_a(-(na/2-1):na/2,-(nb/2-1):nb/2,-(nc/2-1):nc/2))
     Allocate(theta_G_b(-(na/2-1):na/2,-(nb/2-1):nb/2,-(nc/2-1):nc/2))
@@ -360,17 +448,57 @@ module m_vdWDF
     if(eval_kernel_by_interpolation.and.firstcall) allocate(phidD(0:ndel,-1:nphiD+1))
     allocate(qar(nq0));qar = 0.d0
     allocate(q2ar(nq0,nq0));q2ar = 0.d0
-    allocate(theta_G_ab(nq0,-(na/2-1):na/2,-(nb/2-1):nb/2,-(nc/2-1):nc/2))
+    !    allocate(theta_G_ab(nq0,-(na/2-1):na/2,-(nb/2-1):nb/2,-(nc/2-1):nc/2))
+    if(sw_save_memory_vdw) then
+       allocate(theta_G_ab(np_nq,-(na/2-1):na/2,-(nb/2-1):nb/2,-(nc/2-1):nc/2))
+    else
+       allocate(theta_G_ab(nq0,-(na/2-1):na/2,-(nb/2-1):nb/2,-(nc/2-1):nc/2))
+    endif
     if(.not.oneshot)then
-       allocate(ualpha_g(nq0,-(na/2-1):na/2,-(nb/2-1):nb/2,-(nc/2-1):nc/2))
-       allocate(ualpha_r(nq0,na,nb,nc))
+       !       allocate(ualpha_g(nq0,-(na/2-1):na/2,-(nb/2-1):nb/2,-(nc/2-1):nc/2))
+       !       allocate(ualpha_r(nq0,na,nb,nc))
+       allocate(dFdrho(na,nb,nc))
+       allocate(dFddrho(na,nb,nc))
     endif
+
+    allocate(rkar(-(na/2-1):na/2,-(nb/2-1):nb/2,-(nc/2-1):nc/2))
+
+#if 0
+    Ta = na*DSQRT(aa(1,1)**2 + aa(1,2)**2 + aa(1,3)**2)
+    Tb = nb*DSQRT(aa(2,1)**2 + aa(2,2)**2 + aa(2,3)**2)
+    Tc = nc*DSQRT(aa(3,1)**2 + aa(3,2)**2 + aa(3,3)**2)
+    Do ciz = -(nc/2-1),nc/2
+       Do ciy = -(nb/2-1),nb/2
+          Do cix = -(na/2-1),na/2
+             rkar(cix,ciy,ciz) = &
+                  &    DSQRT((dble(cix)/Ta)**2 + (dble(ciy)/Tb)**2 + (dble(ciz)/Tc)**2)
+          Enddo
+       Enddo
+    Enddo
+#else
+    Do i=1, 3
+       bb(1,i) = rltv(i,1) /PAI /2.0d0
+       bb(2,i) = rltv(i,2) /PAI /2.0d0
+       bb(3,i) = rltv(i,3) /PAI /2.0d0
+    End Do
+
+    Do ciz = -(nc/2-1),nc/2
+       Do ciy = -(nb/2-1),nb/2
+          Do cix = -(na/2-1),na/2
+             vec(1:3) = dble(cix) *bb(1,1:3) &
+                  &    +dble(ciy) *bb(2,1:3) &
+                  &    +dble(ciz) *bb(3,1:3)
+             rkar(cix,ciy,ciz) = DSQRT( vec(1)**2 +vec(2)**2 +vec(3)**2 )
+          End Do
+       End Do
+    End Do
+#endif
   end subroutine alloc_vdw
 
   subroutine finalize_vdwdf()
     call dealloc()
 
-    contains
+  contains
     subroutine dealloc()
       deallocate(rho)
       deallocate(grad)
@@ -388,9 +516,14 @@ module m_vdWDF
       deallocate(q2ar)
       deallocate(theta_G_ab)
       if(.not.oneshot)then
-         deallocate(ualpha_g)
-         deallocate(ualpha_r)
+         !         deallocate(ualpha_g)
+         !         deallocate(ualpha_r)
+         deallocate(dFdrho)
+         deallocate(dFddrho)
       endif
+      deallocate(rkar)
+      !
+      if ( allocated( cgrad ) ) deallocate( cgrad )
     end subroutine dealloc
   end subroutine finalize_vdwdf
 
@@ -404,28 +537,32 @@ module m_vdWDF
     real(kind=DP) :: ddel,dphiD
     integer :: ierr
     integer :: id_sname=-1
+
     call tstatc0_begin('build_lookup_table ',id_sname,1)
+
     phidD = 0.d0
-    ddel = 1/dble(ndel)
+    ddel = 1.d0 /dble(ndel)
     dphiD = (q0max*eta1-q0min*etai)/(dble(nphiD))
+
     if(oneshot) then
        call reset_progress()
        call set_end(int(floor(dble(ndel+1)/dble(npes))))
     endif
+
     do cdel=0,ndel
        if(mod(cdel,npes)/=mype) cycle
        if(oneshot) call progress()
        do cphiD = -1,nphiD+1
           del = dble(cdel)*ddel
           phiD = q0min*etai + dble(cphiD)*dphiD
-          di = phiD*(1+del)
-          dk = phiD*(1-del)
+          di = phiD*(1.d0+del)
+          dk = phiD*(1.d0-del)
           Call kernel_phi(di,dk,tmp)
           phidD(cdel,cphiD) = tmp
        enddo
     enddo
     if(npes>1) &
-    & call mpi_allreduce(MPI_IN_PLACE,phidD,(ndel+1)*(nphiD+2),mpi_double_precision,mpi_sum,mpi_comm_group,ierr)
+         & call mpi_allreduce(MPI_IN_PLACE,phidD,(ndel+1)*(nphiD+2),mpi_double_precision,mpi_sum,mpi_comm_group,ierr)
     call tstatc0_end(id_sname)
   end subroutine build_lookup_table
 
@@ -434,15 +571,18 @@ module m_vdWDF
     integer :: cqa,ierr
     integer :: id_sname = -1
     real(kind=DP),allocatable,dimension(:,:,:) :: tmpdr,tmpddr
+
     call tstatc0_begin('build_theta ',id_sname,1)
-!+++++++++++++++ Execute FFT for theta_R_ab ++++++++++++++++++++
+
+    !+++++++++++++++ Execute FFT for theta_R_ab ++++++++++++++++++++
     if(printable.and.oneshot) &
-    & write(nfout,'(a)') 'building theta (spline coefficient x rho) and their Fourier transforms ...'
+         & write(nfout,'(a)') 'building theta (spline coefficient x rho) and their Fourier transforms ...'
     if(oneshot)then
        call reset_progress()
        call set_end(int(floor(dble(nq0)/dble(npes))))
     endif
     theta_G_ab(:,:,:,:) = (0.d0,0.d0)
+
     if(.not.oneshot)then
        allocate(tmpdr(na,nb,nc));tmpdr=0.d0
        allocate(tmpddr(na,nb,nc));tmpddr=0.d0
@@ -451,32 +591,31 @@ module m_vdWDF
        allocate(tmpdr(1,1,1))
        allocate(tmpddr(1,1,1))
     endif
-    Do cqa = 1,nq0
-       if(mod(cqa,npes)/=mype) cycle
+    Do cqa = ista_nq,iend_nq
        if(oneshot) call progress()
        Call theta_ab(na,nb,nc,cqa,nq0,q0min,q0max,rho,grad,rhomin,theta_R,tmpdr,tmpddr)
        Call RtoG(na,nb,nc,theta_R,theta_G)
-       theta_G_ab(cqa,:,:,:) = theta_G(:,:,:)
+       if(sw_save_memory_vdw)then
+          theta_G_ab(map_z_nq(cqa),:,:,:) = theta_G(:,:,:)
+       else
+          theta_G_ab(cqa,:,:,:) = theta_G(:,:,:)
+       endif
        if(.not.oneshot)then
-          dtheta_R  (cqa,:,:,:) = tmpdr  (:,:,:)
-          ddtheta_R (cqa,:,:,:) = tmpddr (:,:,:)
+          dtheta_R  (map_z_nq(cqa),:,:,:) = tmpdr  (:,:,:)
+          ddtheta_R (map_z_nq(cqa),:,:,:) = tmpddr (:,:,:)
        endif
     Enddo
+
+    if(.not.sw_save_memory_vdw)then
+       call mpi_allreduce(MPI_IN_PLACE,theta_G_ab,nq0*na*nb*nc,mpi_double_complex, &
+            &    mpi_sum,mpi_comm_group,ierr) 
+    endif
     deallocate(tmpdr)
     deallocate(tmpddr)
-    if(npes>1) then
-      call mpi_allreduce(MPI_IN_PLACE,theta_G_ab(1,-(na/2-1),-(nb/2-1),-(nc/2-1)), &
-      &    nq0*na*nb*nc,mpi_double_complex,mpi_sum,mpi_comm_group,ierr)
-      if(.not.oneshot)then
-         call mpi_allreduce(MPI_IN_PLACE,dtheta_R(1,1,1,1), &
-         &    nq0*na*nb*nc,mpi_double_precision,mpi_sum,mpi_comm_group,ierr)
-         call mpi_allreduce(MPI_IN_PLACE,ddtheta_R(1,1,1,1), &
-         &    nq0*na*nb*nc,mpi_double_precision,mpi_sum,mpi_comm_group,ierr)
-      endif
-    endif
+
     if(printable.and.oneshot) write(nfout,'(a)') '... done'
     call tstatc0_end(id_sname)
-!+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
+    !+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
   end subroutine build_theta
 
   subroutine phiab_by_interpl(ca,cb,nr12,phi_ab)
@@ -493,6 +632,7 @@ module m_vdWDF
 
     real(kind=DP) :: phi1u
     integer :: id_sname=-1
+
     call tstatc0_begin('phiab ',id_sname,1)
 
     phi_ab = 0.d0
@@ -501,8 +641,8 @@ module m_vdWDF
     qb = qar(cb)
     qab = DSQRT(qa**2+qb**2)
 
-! Coefficients phi2 and phi4 in the local part is determined to
-! match the non-local part in value and slope at d=d_s.
+    ! Coefficients phi2 and phi4 in the local part is determined to
+    ! match the non-local part in value and slope at d=d_s.
 
     rs = ds/qab
 
@@ -520,7 +660,7 @@ module m_vdWDF
     phi4 = (-1.d0/ds**4)*(phid_s-phi0) + (rs/(2.d0*ds**4))*d_phid_s
 
     i = DINT((ds/qab)/dr12)
-! Non-local part of phi_ab(r12)
+    ! Non-local part of phi_ab(r12)
     if(i.ge.nr12)return
     ddel = 1/dble(ndel)
     do cr12 = i+1,nr12
@@ -533,18 +673,18 @@ module m_vdWDF
        phiD = 0.5d0*(di+dk)
        cdel = DINT(del/ddel)
        cphiD = DINT(DBLE(nphiD)*(phiD-q0min*etai)/(q0max*eta1-q0min*etai))
-!     write(6,*) 'cdel, cphiD: ',cdel,cphiD
+       !     write(6,*) 'cdel, cphiD: ',cdel,cphiD
        if(cdel.ge.ndel.or.cphiD.ge.nphiD.or.cdel.lt.0.or.cphiD.lt.-1)cycle
        phix = del/ddel - dble(cdel)
        phiy = dble(nphiD)*(phiD-q0min*etai)/(q0max*eta1-q0min*etai) - dble(cphiD)
-       phi1u =  (1-phix)*(1-phiy) * phidD(cdel  ,cphiD  ) &
-&              +   phix *(1-phiy) * phidD(cdel+1,cphiD  ) &
-&              +(1-phix)*   phiy  * phidD(cdel  ,cphiD+1) &
-&              +   phix *   phiy  * phidD(cdel+1,cphiD+1)
+       phi1u =  (1.d0-phix)*(1.d0-phiy) * phidD(cdel  ,cphiD  ) &
+            &              +       phix *(1.d0-phiy) * phidD(cdel+1,cphiD  ) &
+            &              +(1.d0-phix)*   phiy  * phidD(cdel  ,cphiD+1) &
+            &              +      phix *   phiy  * phidD(cdel+1,cphiD+1)
        phi_ab(cr12) = phi1u
     enddo
 
-! Local part of phi_ab(r12)
+    ! Local part of phi_ab(r12)
     Do cr12 = 0,i
        r12 = DBLE(cr12)*dr12
 
@@ -556,20 +696,20 @@ module m_vdWDF
     call tstatc0_end(id_sname)
   end subroutine phiab_by_interpl
 
-!** SUBROUTINE phiab **************************************************************************
+  !** SUBROUTINE phiab **************************************************************************
   Subroutine phiab(ca,cb,nr12,phi_ab)
     Implicit none
 
-!++++++++++++++++++++++++++ VARIABLES ++++++++++++++++++++++++++
+    !++++++++++++++++++++++++++ VARIABLES ++++++++++++++++++++++++++
     Integer,intent(in) :: ca,cb,nr12
     Real(kind=DP),intent(out) :: phi_ab(0:nr12)
 
-! Internal valuables
+    ! Internal valuables
     Integer cr12,i
     Real(kind=DP) qab,qa,qb,r12,rs,phiD,dr12,phi2,phi4,phid_s,phid_s1,d_phid_s,d1,d2,abs_d
     Real(kind=DP) di,dk,tmp,dr
     Parameter(dr=0.001d0)
-!+++++++++++++++++++++ End VARIABLES +++++++++++++++++++++++++++
+    !+++++++++++++++++++++ End VARIABLES +++++++++++++++++++++++++++
     integer :: id_sname=-1
     call tstatc0_begin('phiab ',id_sname,1)
 
@@ -577,9 +717,9 @@ module m_vdWDF
     qa = qar(ca)
     qb = qar(cb)
     qab = DSQRT(qa**2+qb**2)
- 
-! Coefficients phi2 and phi4 in the local part is determined to
-! match the non-local part in value and slope at d=d_s.
+
+    ! Coefficients phi2 and phi4 in the local part is determined to
+    ! match the non-local part in value and slope at d=d_s.
 
     rs = ds/qab
 
@@ -590,159 +730,439 @@ module m_vdWDF
     di = qa*(rs+dr)
     dk = qb*(rs+dr)
     Call kernel_phi(di,dk,phid_s1)
- 
+
     d_phid_s = (phid_s1 - phid_s)/dr
- 
+
     phi2 = ( 2.d0/ds**2)*(phid_s-phi0) - (rs/(2.d0*ds**2))*d_phid_s
     phi4 = (-1.d0/ds**4)*(phid_s-phi0) + (rs/(2.d0*ds**4))*d_phid_s
 
     i = DINT((ds/qab)/dr12)
-! Non-local part of phi_ab(r12)
+    ! Non-local part of phi_ab(r12)
     if(i.ge.nr12)return
+
     Do cr12 = i+1,nr12
        r12 = DBLE(cr12)*dr12
- 
+
        di = qa*r12
        dk = qb*r12
- 
+
        Call kernel_phi(di,dk,tmp)
        phi_ab(cr12) = tmp
- 
+
     Enddo
- 
-! Local part of phi_ab(r12)
+
+    ! Local part of phi_ab(r12)
     Do cr12 = 0,i
        r12 = DBLE(cr12)*dr12
- 
+
        d1 = qa*r12
        d2 = qb*r12
        phiD = qab*r12
        phi_ab(cr12) = phi0 + phi2*phiD**2 + phi4*phiD**4
     Enddo
- 
+
     call tstatc0_end(id_sname) 
   End subroutine phiab
-!** End SUBROUTINE phiab **********************************************************************
+  !** End SUBROUTINE phiab **********************************************************************
 
-  subroutine vdWdf_core()
+  subroutine vdWdf()
+    if(sw_save_memory_vdw) then
+       call vdWdf_core()
+       !call vdWdf_core2()
+    else
+       call vdWdf_core_org()
+    endif
+  end subroutine vdWdf
+
+  subroutine vdWdf_core_org()
     use progress_bar
     real(kind=DP) :: fac
-    integer :: cqa,cqb,ierr
-    integer,allocatable,dimension(:,:) :: ind
-    integer :: i,ic,i1,i2,i3
+    integer :: cqaa,cqa,cqb,ierr
+    integer :: i,ic,i2,i3
     complex(kind=DP), allocatable, dimension(:,:,:) :: tmpug,tmpur
     integer :: id_sname = -1
+    integer :: id_sname3 = -1
+
     call tstatc0_begin('vdWdf_core ',id_sname,1)
     Ecnl_12 = 0.0d0
     if(printable.and.oneshot) &
-    & write(nfout,'(a)') 'performing the core operation : Ecnl = sum theta_a x phi_ab x theta_b ...'
-    allocate(ind(int(nq0*(nq0-1)*0.5+nq0),2));ind=0
-    ic=0
-!    do cqa=1,nq0
-!       do cqb=cqa,nq0
-!          ic=ic+1
-!          ind(ic,1) = cqa
-!          ind(ic,2) = cqb
-!       enddo
-!    enddo
+         & write(nfout,'(a)') 'performing the core operation : Ecnl = sum theta_a x phi_ab x theta_b ...'
     if(oneshot)then
        call reset_progress()
-       !call set_end(int(floor(dble(ic)/dble(npes))))
        call set_end(int(floor(dble(nq0)/dble(npes))))
     endif
     if(.not.oneshot)then
        allocate(tmpug(-(na/2-1):na/2,-(nb/2-1):nb/2,-(nc/2-1):nc/2));tmpug=0.d0
-       ualpha_g = (0.d0,0.d0)
+       allocate(tmpur(na,nb,nc));tmpur=0.d0
+       !       ualpha_g = (0.d0,0.d0)
     else
        allocate(tmpug(1,1,1))
     endif
-!    do i=1,ic
-    do cqa=1,nq0
-    if(mod(cqa,npes)/=mype) cycle
-    theta_G_a(:,:,:) = theta_G_ab(cqa,:,:,:)
-    if(.not.oneshot) tmpug=(0.d0,0.d0)
-    if(oneshot) call progress()
-!    do cqb=cqa,nq0
-    do cqb=1,nq0
-       !cqa = ind(i,1)
-       !cqb = ind(i,2)
-       theta_G_b(:,:,:) = theta_G_ab(cqb,:,:,:)
-!       fac=2.d0
-!       if (cqa.eq.cqb) fac=1.d0
-       fac = 1.d0
-       if(eval_kernel_by_interpolation) then
-         call phiab_by_interpl(cqa,cqb,nr12,phi_ab)
-       else
-         Call phiab(cqa,cqb,nr12,phi_ab)
+    if(.not.oneshot)then
+       dFdrho=0.d0;dFddrho=0.d0
+    endif
+
+    do cqaa=1,np_nq
+       cqa = real_index(cqaa)
+       theta_G_a(:,:,:) = theta_G_ab(cqa,:,:,:)
+       if (.not.oneshot)  tmpug=(0.d0,0.d0)
+       if (oneshot) call progress()
+       call tstatc0_begin('vdw_core_core ',id_sname3,1)
+       do cqb=1,nq0
+          theta_G_b(:,:,:) = theta_G_ab(cqb,:,:,:)
+          fac = 1.d0
+          if(eval_kernel_by_interpolation) then
+             call phiab_by_interpl(cqa,cqb,nr12,phi_ab)
+          else
+             Call phiab(cqa,cqb,nr12,phi_ab)
+          endif
+          Call convolution_3d_by_fft(&
+               &    na,nb,nc,cqa,cqb,nr12,phi_ab,theta_G_a,theta_G_b,fac,Ecnl_12_ab,tmpug)
+          Ecnl_12 = Ecnl_12 + Ecnl_12_ab
+       enddo
+       call tstatc0_end(id_sname3)
+       if(.not.oneshot)then
+          call GtoR(na,nb,nc,tmpur,tmpug)
+          dFdrho(:,:,:) = dFdrho(:,:,:) + dble(tmpur(:,:,:))*dtheta_R(cqaa,:,:,:)
+          dFddrho(:,:,:) = dFddrho(:,:,:) + dble(tmpur(:,:,:))*ddtheta_R(cqaa,:,:,:)
        endif
-       Call convolution_3d_by_fft(na,nb,nc,cqa,cqb,nr12,phi_ab,theta_G_a,theta_G_b,fac,Ecnl_12_ab,tmpug)
- 
-       Ecnl_12 = Ecnl_12 + Ecnl_12_ab
-    enddo
-    if(.not.oneshot) ualpha_g(cqa,:,:,:) = tmpug(:,:,:)
     enddo
 
     if(.not.oneshot)then
-       if(npes>1) &
-       & call mpi_allreduce(MPI_IN_PLACE,ualpha_g, &
-       &                    nq0*na*nb*nc,mpi_double_complex,mpi_sum,mpi_comm_group,ierr)
+       if(npes>1) then
+          call mpi_allreduce(MPI_IN_PLACE,dFdrho, &
+               &             na*nb*nc,mpi_double_precision,mpi_sum,mpi_comm_group,ierr)
+          call mpi_allreduce(MPI_IN_PLACE,dFddrho, &
+               &             na*nb*nc,mpi_double_precision,mpi_sum,mpi_comm_group,ierr)
+          deallocate(tmpur)
+       endif
+       deallocate(tmpug)
+    endif
+
+    if(npes>1) then
+       call mpi_allreduce(MPI_IN_PLACE,Ecnl_12,1,mpi_double_precision,mpi_sum,mpi_comm_group,ierr)
+    endif
+    if(printable.and.oneshot) write(nfout,'(a)') '... done'
+    call tstatc0_end(id_sname)
+
+  end subroutine vdWdf_core_org
+
+  subroutine vdWdf_core()
+    use progress_bar
+    real(kind=DP) :: fac, grho, rtmp, esum
+    integer :: cqaa,cqa,cqb,ierr
+    integer :: i,ic,i2,i3, cix, ciy ,ciz
+    complex(kind=DP), allocatable, dimension(:,:,:) :: tmpug,tmpur
+    complex(kind=DP), allocatable, dimension(:,:,:,:) :: theta_buf_s,theta_buf_r
+    integer :: ipos,i0,i1,isend,irecv,ireq,ireqr
+    integer, allocatable, dimension(:) :: ista
+    integer :: id_sname = -1, id_sname2 = -1, id_sname3 = -1
+
+    Ecnl_12 = 0.0d0
+    esum = 0.0d0
+
+    call tstatc0_begin('vdWdf_core ',id_sname,1)
+    if(printable.and.oneshot) &
+         & write(nfout,'(a)') 'performing the core operation : Ecnl = sum theta_a x phi_ab x theta_b ...'
+
+    if(oneshot)then
+       call reset_progress()
+       call set_end(int(floor(dble(nq0)/dble(npes))))
+    endif
+
+    if(npes>1)then
+       allocate(ista(MPI_STATUS_SIZE))
+       allocate(theta_buf_r(1:mp_nq,-(na/2-1):na/2,-(nb/2-1):nb/2,-(nc/2-1):nc/2));theta_buf_r=(0.d0,0.d0)
     endif
 
     if(.not.oneshot)then
-       ualpha_r = (0.d0,0.d0)
+       allocate(tmpug(-(na/2-1):na/2,-(nb/2-1):nb/2,-(nc/2-1):nc/2));tmpug=0.d0
        allocate(tmpur(na,nb,nc));tmpur=0.d0
-       do i=1,nq0
-          if(mod(i,npes)/=mype) cycle
-          tmpug(:,:,:) = ualpha_g(i,:,:,:)
-          call GtoR(na,nb,nc,tmpur,tmpug)
-          ualpha_r(i,:,:,:) = tmpur(:,:,:)
-       enddo
-       deallocate(tmpur)
+       !       ualpha_g = (0.d0,0.d0)
+    else
+       allocate(tmpug(1,1,1))
     endif
+    if(.not.oneshot)then
+       dFdrho=0.d0;dFddrho=0.d0
+    endif
+
+    irecv = mype+1
+    if(irecv.ge.npes) irecv = irecv-npes
+    isend = mype-1
+    if(isend.lt.0)    isend = isend+npes
+
+    do cqaa=1,mp_nq
+       cqa = real_index(cqaa)
+       if (npes>1)        theta_buf_r(1:np_nq,:,:,:) = theta_G_ab(1:np_nq,:,:,:)
+       if (cqaa.le.np_nq) theta_G_a(:,:,:) = theta_G_ab(cqaa,:,:,:)
+
+       if (.not.oneshot)  tmpug=(0.d0,0.d0)
+       if (oneshot)       call progress()
+
+       call tstatc0_begin('vdw_core_core ',id_sname3,1)
+
+       do i0=0,npes-1
+          ipos = i0+mype
+          if(ipos.ge.npes) ipos = ipos-npes
+          if(ipos.lt.0)    ipos = ipos+npes
+
+          if(cqaa.le.np_nq)then
+             do i1=1,nel_nq(ipos)
+                cqb = real_index(i1,ipos)
+                if(npes>1)then
+                   theta_G_b(:,:,:) = theta_buf_r(i1,:,:,:)
+                else
+                   theta_G_b(:,:,:) = theta_G_ab(i1,:,:,:)
+                endif
+
+                fac = 1.d0
+                if(eval_kernel_by_interpolation) then
+                   call phiab_by_interpl(cqa,cqb,nr12,phi_ab)
+                else
+                   Call phiab(cqa,cqb,nr12,phi_ab)
+                endif
+                Call convolution_3d_by_fft( na,nb,nc,cqa,cqb,nr12,phi_ab, &
+                     &                      theta_G_a,theta_G_b,fac,Ecnl_12_ab,tmpug)
+                Ecnl_12 = Ecnl_12 + Ecnl_12_ab
+             enddo
+          endif
+          if(npes>1.and.i0.ne.npes-1)then
+             call tstatc0_begin('vdWdf_core (comm) ',id_sname2,1)
+             allocate(theta_buf_s(1:mp_nq,-(na/2-1):na/2,-(nb/2-1):nb/2,-(nc/2-1):nc/2))
+             theta_buf_s = theta_buf_r
+             call mpi_sendrecv(theta_buf_s,mp_nq*na*nb*nc,mpi_double_complex,isend,0, &
+                  &            theta_buf_r,mp_nq*na*nb*nc,mpi_double_complex,irecv,0, &
+                  &            mpi_comm_group,ista,ierr)
+             deallocate(theta_buf_s)
+             call tstatc0_end(id_sname2)
+          endif
+       enddo
+       call tstatc0_end(id_sname3)
 
-    deallocate(tmpug)
+       if(.not.oneshot.and.cqaa.le.np_nq)then
+          call GtoR(na,nb,nc,tmpur,tmpug)
+#if 1
+          dFdrho(:,:,:) = dFdrho(:,:,:) + dble(tmpur(:,:,:))*dtheta_R(cqaa,:,:,:)
+          if ( .not. grad_rho_eq_0 ) then
+             do cix = 1,na
+                do ciy = 1,nb
+                   do ciz = 1,nc
+                      grho = grad(cix,ciy,ciz)
+                      rtmp = rho(cix,ciy,ciz)
+                      if ( grho > 1.0d-6 ) then
+                         dFddrho(cix,ciy,ciz) = dFddrho(cix,ciy,ciz) &
+                              &                + dble(tmpur(cix,ciy,ciz)) &
+                              &                 *ddtheta_R(cqaa,cix,ciy,ciz) /grho
+                      endif
+                   end do
+                end do
+             end do
+          endif
+#else
+          dFdrho(:,:,:) = dFdrho(:,:,:) + dble(tmpur(:,:,:))*dtheta_R(cqaa,:,:,:)
+          if ( .not. grad_rho_eq_0 ) then
+             dFddrho(:,:,:) = dFddrho(:,:,:) + dble(tmpur(:,:,:))*ddtheta_R(cqaa,:,:,:)
+          endif
+#endif
+
+          if ( iprixc >= 2 ) then
+             do cix = 1,na
+                do ciy = 1,nb
+                   do ciz = 1,nc
+                      esum = esum +dble(tmpur(cix,ciy,ciz)) *theta_R(cix,ciy,ciz)
+                   end do
+                end do
+             end do
+          endif
+       endif
+    enddo
+
+    if(.not.oneshot)then
+       if(npes>1) then
+          call mpi_allreduce(MPI_IN_PLACE,dFdrho, &
+               &             na*nb*nc,mpi_double_precision,mpi_sum,mpi_comm_group,ierr)
+          call mpi_allreduce(MPI_IN_PLACE,dFddrho, &
+               &             na*nb*nc,mpi_double_precision,mpi_sum,mpi_comm_group,ierr)
+          deallocate(tmpur)
+       endif
+       deallocate(tmpug)
+    endif
 
     if(npes>1) then
        call mpi_allreduce(MPI_IN_PLACE,Ecnl_12,1,mpi_double_precision,mpi_sum,mpi_comm_group,ierr)
-       if(.not.oneshot) &
-       & call mpi_allreduce(MPI_IN_PLACE,ualpha_r,nq0*na*nb*nc,mpi_double_complex,mpi_sum,mpi_comm_group,ierr)
     endif
-    deallocate(ind)
+
+    if ( iprixc >=2 ) then
+       esum = esum *univol *rinplw /2.0d0
+       call mpi_allreduce(MPI_IN_PLACE,Esum,1,mpi_double_precision,mpi_sum,mpi_comm_group,ierr)
+       write(nfout,*) "vdw nonolocal E12 "
+       write(nfout,*) "evaluation in real       space : ", esum
+       write(nfout,*) "           in reciprocal space : ", ecnl_12
+    endif
+
+    if(npes>1)then
+       deallocate(ista)
+       deallocate(theta_buf_r)
+    endif
     if(printable.and.oneshot) write(nfout,'(a)') '... done'
     call tstatc0_end(id_sname)
-!+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
+    !+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
   end subroutine vdWdf_core
 
+  subroutine vdWdf_core2()
+    use progress_bar
+    real(kind=DP) :: fac
+    integer :: cqbb,cqa,cqb,ierr
+    integer :: i,ic,i2,i3
+    complex(kind=DP), allocatable, dimension(:,:,:) :: tmpug,tmpur
+    complex(kind=DP), allocatable, dimension(:,:,:,:) :: theta_buf_s,theta_buf_r
+    real(kind=DP), allocatable, dimension(:,:,:,:) :: dtheta_buf
+    integer :: ipos,i0,i1,isend,irecv,ireq,ireqr
+    integer, allocatable, dimension(:) :: ista
+    integer :: id_sname = -1
+    integer :: id_sname2 = -1
+
+    call tstatc0_begin('vdWdf_core2 ',id_sname,1)
+    Ecnl_12 = 0.0d0
+    if(printable.and.oneshot) &
+         & write(nfout,'(a)') 'performing the core operation : Ecnl = sum theta_a x phi_ab x theta_b ...'
+    if(oneshot)then
+       call reset_progress()
+       call set_end(int(floor(dble(nq0)/dble(npes))))
+    endif
+    if(npes>1)then
+       allocate(ista(MPI_STATUS_SIZE))
+       allocate(theta_buf_r(1:mp_nq,-(na/2-1):na/2,-(nb/2-1):nb/2,-(nc/2-1):nc/2));theta_buf_r=(0.d0,0.d0)
+       if(.not.oneshot)then
+          allocate(dtheta_buf(mp_nq,na,nb,nc));dtheta_buf=0.d0
+       endif
+    endif
+    if(.not.oneshot)then
+       allocate(tmpug(-(na/2-1):na/2,-(nb/2-1):nb/2,-(nc/2-1):nc/2));tmpug=0.d0
+       allocate(tmpur(na,nb,nc));tmpur=0.d0
+       !       ualpha_g = (0.d0,0.d0)
+    else
+       allocate(tmpug(1,1,1))
+    endif
+    if(.not.oneshot)then
+       dFdrho=0.d0;dFddrho=0.d0
+    endif
+
+    irecv = mype+1
+    if(irecv.ge.npes) irecv = irecv-npes
+    isend = mype-1
+    if(isend.lt.0)    isend = isend+npes
+
+    if (npes>1) theta_buf_r(1:np_nq,:,:,:) = theta_G_ab(1:np_nq,:,:,:)
+    do i0=0,npes-1
+       ipos = i0+mype
+       if(ipos.ge.npes) ipos = ipos-npes
+       if(ipos.lt.0)    ipos = ipos+npes
+       do i1=1,nel_nq(ipos)
+          cqa = real_index(i1,ipos)
+          if(npes>1)then
+             theta_G_a(:,:,:) = theta_buf_r(i1,:,:,:)
+          else
+             theta_G_a(:,:,:) = theta_G_ab(i1,:,:,:)
+          endif
+          do cqbb=1,np_nq
+             theta_G_b(:,:,:) = theta_G_ab(cqbb,:,:,:)
+             fac = 1.d0
+             cqb = real_index(cqbb)
+             if(eval_kernel_by_interpolation) then
+                call phiab_by_interpl(cqa,cqb,nr12,phi_ab)
+             else
+                Call phiab(cqa,cqb,nr12,phi_ab)
+             endif
+             Call convolution_3d_by_fft(&
+                  &    na,nb,nc,cqa,cqb,nr12,phi_ab,theta_G_a,theta_G_b,fac,Ecnl_12_ab,tmpug)
+             Ecnl_12 = Ecnl_12 + Ecnl_12_ab
+          enddo
+          if(.not.oneshot)then
+             !             call mpi_allreduce(MPI_IN_PLACE,tmpug,na*nb*nc,mpi_double_complex,mpi_sum,mpi_comm_group,ierr)
+             call GtoR(na,nb,nc,tmpur,tmpug)
+             dFdrho(:,:,:) = dFdrho(:,:,:) + dble(tmpur(:,:,:))*dtheta_R(i1,:,:,:)
+             dFddrho(:,:,:) = dFddrho(:,:,:) + dble(tmpur(:,:,:))*ddtheta_R(i1,:,:,:)
+          endif
+       enddo
+       if(npes>1)then
+          call tstatc0_begin('vdWdf_core (comm) ',id_sname2,1)
+          allocate(theta_buf_s(1:mp_nq,-(na/2-1):na/2,-(nb/2-1):nb/2,-(nc/2-1):nc/2))
+          theta_buf_s = theta_buf_r
+          call mpi_sendrecv(theta_buf_s,mp_nq*na*nb*nc,mpi_double_complex,isend,0, &
+               &             theta_buf_r,mp_nq*na*nb*nc,mpi_double_complex,irecv,0, &
+               &             mpi_comm_group,ista,ierr)
+          if(.not.oneshot)then
+             dtheta_buf = dtheta_R
+             call mpi_sendrecv(dtheta_buf,mp_nq*na*nb*nc,mpi_double_precision,isend,0, &
+                  &             dtheta_R,  mp_nq*na*nb*nc,mpi_double_precision,irecv,0, &
+                  &             mpi_comm_group,ista,ierr)
+             dtheta_buf = ddtheta_R
+             call mpi_sendrecv(dtheta_buf,mp_nq*na*nb*nc,mpi_double_precision,isend,0, &
+                  &             ddtheta_R, mp_nq*na*nb*nc,mpi_double_precision,irecv,0, &
+                  &             mpi_comm_group,ista,ierr)
+          endif
+          deallocate(theta_buf_s)
+          call tstatc0_end(id_sname2)
+       endif
+    enddo
+
+    if(.not.oneshot)then
+       if(npes>1) then
+          call mpi_allreduce(MPI_IN_PLACE,dFdrho, &
+               &             na*nb*nc,mpi_double_precision,mpi_sum,mpi_comm_group,ierr)
+          call mpi_allreduce(MPI_IN_PLACE,dFddrho, &
+               &             na*nb*nc,mpi_double_precision,mpi_sum,mpi_comm_group,ierr)
+          deallocate(tmpur)
+       endif
+       deallocate(tmpug)
+    endif
+
+    if(npes>1) then
+       call mpi_allreduce(MPI_IN_PLACE,Ecnl_12,1,mpi_double_precision,mpi_sum,mpi_comm_group,ierr)
+    endif
+    if(npes>1)then
+       deallocate(ista)
+       deallocate(theta_buf_r)
+       if(.not.oneshot) deallocate(dtheta_buf)
+    endif
+    if(printable.and.oneshot) write(nfout,'(a)') '... done'
+    call tstatc0_end(id_sname)
+    !+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
+  end subroutine vdWdf_core2
+
   subroutine corrections()
-    !!$if(printable) write(nfout,'(a)') 'calculating correction terms and contribution from the LDA ...'
+!!$if(printable) write(nfout,'(a)') 'calculating correction terms and contribution from the LDA ...'
     if(printable.and.oneshot) write(nfout,'(a)') 'calculating correction terms ...'
-!+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
+    !+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
     Call piDphi(Ecnl_3,Ecnl_3s)
 !!$    Call cLDA(na,nb,nc,rho,rhomin,dv,EcLDA)
-!+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
+    !+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
     if(printable.and.oneshot) write(nfout,'(a)') '... done'
   end subroutine corrections
 
   subroutine theta_ab(nx,ny,nz,ca,nq0,q0min,q0max,rho,grad,rhomin,theta_R,dtheta_R,ddtheta_R)
     Implicit none
 
-!++++++++++++++++++++++++++ VARIABLES ++++++++++++++++++++++++++
+    !++++++++++++++++++++++++++ VARIABLES ++++++++++++++++++++++++++
     Integer, intent(in) :: nx,ny,nz,ca,nq0
     Real(kind=DP), intent(in)  :: q0min,q0max,rhomin
     Real(kind=DP), intent(in)  :: rho(nx,ny,nz),grad(nx,ny,nz)
     real(kind=DP), intent(out) :: theta_R(nx,ny,nz),dtheta_R(nx,ny,nz),ddtheta_R(nx,ny,nz)
 
-! Internal valuables
+    ! Internal valuables
     Integer cir,cix,ciy,ciz
     Real(kind=DP) ni,dni,q0,dqdn,dqddn
 
     real(kind=DP),allocatable,dimension(:) :: y,y2tmp
     real(kind=DP) :: yout,y1out
+    integer :: id_sname = -1
 
-!+++++++++++++++++++++ End VARIABLES +++++++++++++++++++++++++++
+    call tstatc0_begin('theta_ab ',id_sname,1)
+    !+++++++++++++++++++++ End VARIABLES +++++++++++++++++++++++++++
     allocate(y(nq0));y=0.d0;y(ca)=1.d0
     allocate(y2tmp(nq0));y2tmp(:) = q2ar(ca,:)
-! For the functions theta_R
+    ! For the functions theta_R
     Do cir = 1,nx*ny*nz
        cix = 1+(cir-1-MOD(cir-1+ny*nz,ny*nz))/(ny*nz)
        ciy = 1+(cir-ny*nz*(cix-1)-1-MOD(cir-1+nz,nz))/nz
@@ -762,23 +1182,24 @@ module m_vdWDF
     End do
     deallocate(y)
     deallocate(y2tmp)
+    call tstatc0_end(id_sname)
   end subroutine theta_ab
 
-!** End SUBROUTINE theta_ab *******************************************************************
+  !** End SUBROUTINE theta_ab *******************************************************************
 
-!** SUBROUTINE piDphi **********************************************************************************
+  !** SUBROUTINE piDphi **********************************************************************************
   Subroutine piDphi(Ecii,Ecii_s)
     implicit none
 
 
-!++++++++++++++++++++++++++ VARIABLES ++++++++++++++++++++++++++
+    !++++++++++++++++++++++++++ VARIABLES ++++++++++++++++++++++++++
     real(kind=DP), intent(out) :: Ecii,Ecii_s
 
     Real(kind=DP) da,db,a1,a2,dr
     Parameter (dr = 0.001d0)
     Integer  ci,cj,ck,ca,cb
 
-! Gauss-Legendre integration
+    ! Gauss-Legendre integration
     Integer cD,nD
     Parameter (nD=10)
     Real(kind=DP) maxD,minD,dD,LD,PLD,LDxi(nD),LDwi(nD)
@@ -792,23 +1213,23 @@ module m_vdWDF
     Real(kind=DP) x,nnx,nny,nnz,nn2,r
     Real(kind=DP) nxp,nxm,nyp,nym,nzp,nzm
     Real(kind=DP) zx(-3:3),zy(-3:3),zz(-3:3),rn(3,-3:3)
- 
+
     Real(kind=DP) phi
 
     Real(kind=DP) temp,rs,phid_s,phid_s1,d_phid_s,phi2,phi4
 
-! The table of phi1D
+    ! The table of phi1D
     Integer c1D,n1D,ierr
     Parameter(n1D = 1000)
     Real(kind=DP) d1D,max1D,D,phix,phiy
     Real(kind=DP) phi1D(0:n1D+1)
     real(kind=DP) :: q0,dqdn,dqddn
-!++++++++++++++++++++++++ end VARIABLES ++++++++++++++++++++++++
+    !++++++++++++++++++++++++ end VARIABLES ++++++++++++++++++++++++
 
 
 
-!---------------------- Calculation Start ----------------------
-! Make the table of phi1D
+    !---------------------- Calculation Start ----------------------
+    ! Make the table of phi1D
     max1D = ds
     d1D = max1D/DBLE(n1D)
     Do c1D = 0,n1D+1
@@ -877,42 +1298,40 @@ module m_vdWDF
        phi4 = (-1.d0/ds**4)*(phid_s-phi0) + (rs/(2.d0*ds**4))*d_phid_s
 
        Ecii_s = Ecii_s + 0.5d0*4.d0*pi*dv*(n**2)*            &
-&                    (phi0*(rs**3)*(q0**0)/3.d0 +            &
-&                     phi2*(rs**5)*(q0**2)/5.d0 +            &
-&                     phi4*(rs**7)*(q0**4)/7.d0)
+            &                    (phi0*(rs**3)*(q0**0)/3.d0 +            &
+            &                     phi2*(rs**5)*(q0**2)/5.d0 +            &
+            &                     phi4*(rs**7)*(q0**4)/7.d0)
 
 
     Enddo
     if(npes>1) call mpi_allreduce(MPI_IN_PLACE,Ecii_s,1,mpi_double_precision,mpi_sum,mpi_comm_group,ierr)
   End Subroutine piDphi
-!** End SUBROUTINE piDphi **********************
+  !** End SUBROUTINE piDphi **********************
 
-!** SUBROUTINE cLDA ****************************
+  !** SUBROUTINE cLDA ****************************
   Subroutine cLDA(na,nb,nc,rho,rhomin,dv,EcLDA)
     Implicit none
 
-!************************ Note *********************************
-! This Algorism follows Dion's 1-shot method.
-!
-! This program is a subroutine.
-! This program calculates the correlation energy from LDA.
-! The formula is given at Eq.(58) (p.93) of 'Theory of the
-!   Inhomogeneous Electron Gas' Lundqvist, March.
-!
-!
-! Input
-!   rho(nrxyz,nsipn) : Total density
-!
-! Output
-!   EcLDA : Correlation energy from LDA.
-!
-!
-!                            Written by Youky Ono in 2009/Jul.
-!***************************************************************
-
-
+    !************************ Note *********************************
+    ! This Algorism follows Dion's 1-shot method.
+    !
+    ! This program is a subroutine.
+    ! This program calculates the correlation energy from LDA.
+    ! The formula is given at Eq.(58) (p.93) of 'Theory of the
+    !   Inhomogeneous Electron Gas' Lundqvist, March.
+    !
+    !
+    ! Input
+    !   rho(nrxyz,nsipn) : Total density
+    !
+    ! Output
+    !   EcLDA : Correlation energy from LDA.
+    !
+    !
+    !                            Written by Youky Ono in 2009/Jul.
+    !***************************************************************
 
-!++++++++++++++++++++++++++ VARIABLES ++++++++++++++++++++++++++
+    !++++++++++++++++++++++++++ VARIABLES ++++++++++++++++++++++++++
     integer,intent(in)  :: na,nb,nc
     real(kind=DP),intent(in)  :: rho(na,nb,nc)
     real(kind=DP),intent(in)  :: rhomin
@@ -926,11 +1345,9 @@ module m_vdWDF
 
     real(kind=DP)  e,m
     parameter (e=1.d0,m=1.d0) ! Hatree atomic unit
-!++++++++++++++++++++++++ end VARIABLES ++++++++++++++++++++++++
+    !++++++++++++++++++++++++ end VARIABLES ++++++++++++++++++++++++
 
-
-
-!---------------------- Calculation Start ----------------------
+    !---------------------- Calculation Start ----------------------
     EcLDA=0
     Do cjr = 1,na*nb*nc
        if(mod(cjr,npes)/=mype) cycle
@@ -938,102 +1355,105 @@ module m_vdWDF
        cjy = 1+(cjr-nb*nc*(cjx-1)-1-MOD(cjr-1+nc,nc))/nc
        cjz = cjr-nc*(nb*(cjx-1)-1+cjy)
        n = MAX(rho(cjx,cjy,cjz),rhomin)
- 
+
        rs = ((3.d0/(4.d0*pi*n))**(1.d0/3.d0))/aB
        x = rs/11.4d0
        ec = -0.0666d0*0.5d0*((1.d0+x**3)*DLOG(1.d0+1.d0/x)-x**2+x/2.d0-1.d0/3.d0)
- 
+
        EcLDA = EcLDA + dv*n*ec
     Enddo
     if(npes>1) call mpi_allreduce(MPI_IN_PLACE,EcLDA,1,mpi_double_precision,mpi_sum,mpi_comm_group,ierr)
   End Subroutine cLDA
-!** End SUBROUTINE cLDA **********************
+  !** End SUBROUTINE cLDA **********************
 
-!** SUBROUTINE RtoG ***************************************************************************
-! Execute FFT and transform theta_R to theta_G
+  !** SUBROUTINE RtoG ***************************************************************************
+  ! Execute FFT and transform theta_R to theta_G
   Subroutine RtoG(na,nb,nc,theta_R,theta_G)
     Implicit none
 
     !!include "fftw3.f"
 
 
-!++++++++++++++++++++++++++ VARIABLES ++++++++++++++++++++++++++
+    !++++++++++++++++++++++++++ VARIABLES ++++++++++++++++++++++++++
     Integer  na,nb,nc,cix,ciy,ciz,ca,cb,cc
     Real(kind=DP) rx,ry,rz,kx,ky,kz,ra,rb,rc,ka,kb,kc,rk,r12,x,y,z,term,term1
 
     Real(kind=DP) theta_R(na,nb,nc)
     Complex(kind=DP) theta_G(-(na/2-1):na/2,-(nb/2-1):nb/2,-(nc/2-1):nc/2)
 
-! FFTW3 !!!
+    ! FFTW3 !!!
     integer(kind=DP) :: plan
     Complex(kind=DP),allocatable :: temp_R(:,:,:),temp_G(:,:,:)
-!+++++++++++++++++++++ End VARIABLES +++++++++++++++++++++++++++
+    !+++++++++++++++++++++ End VARIABLES +++++++++++++++++++++++++++
 
     allocate(temp_R(na,nb,nc))
     allocate(temp_G(0:na-1,0:nb-1,0:nc-1))
 
-!***** FFT **************************************************
+    !***** FFT **************************************************
     Do cix = 1,na
-    Do ciy = 1,nb
-    Do ciz = 1,nc
-       temp_R(cix,ciy,ciz) = DCMPLX(theta_R(cix,ciy,ciz))
-    Enddo
-    Enddo
+       Do ciy = 1,nb
+          Do ciz = 1,nc
+             temp_R(cix,ciy,ciz) = DCMPLX(theta_R(cix,ciy,ciz))
+          Enddo
+       Enddo
     Enddo
 
-! FFTW3 !!!
+    ! FFTW3 !!!
     call dfftw_plan_dft_3d(plan,na,nb,nc,temp_R,temp_G,FFTW_FORWARD,FFTW_ESTIMATE)
     call dfftw_execute(plan)
     call dfftw_destroy_plan(plan)
 
     Do ca = -(na/2-1),na/2
-    Do cb = -(nb/2-1),nb/2
-    Do cc = -(nc/2-1),nc/2
-       cix = MOD(ca+na,na)
-       ciy = MOD(cb+nb,nb)
-       ciz = MOD(cc+nc,nc)
-       theta_G(ca,cb,cc) = temp_G(cix,ciy,ciz) / DBLE(na*nb*nc)
-    Enddo
-    Enddo
+       Do cb = -(nb/2-1),nb/2
+          Do cc = -(nc/2-1),nc/2
+             cix = MOD(ca+na,na)
+             ciy = MOD(cb+nb,nb)
+             ciz = MOD(cc+nc,nc)
+             theta_G(ca,cb,cc) = temp_G(cix,ciy,ciz) / DBLE(na*nb*nc)
+          Enddo
+       Enddo
     Enddo
-!***** END of FFT ******************************************
+    !***** END of FFT ******************************************
 
-    deallocate(temp_R)
-    deallocate(temp_G)
+    deallocate(temp_R);   deallocate(temp_G)
   End subroutine RtoG
-!** End SUBROUTINE RtoG ***********************************************************************
+  !** End SUBROUTINE RtoG ***********************************************************************
 
   subroutine GtoR(na,nb,nc,theta_R,theta_G)
-!    include "fftw3.f"
+    !    include "fftw3.f"
     integer,intent(in) :: na,nb,nc
     complex(kind=DP), dimension(-(na/2-1):na/2,-(nb/2-1):nb/2,-(nc/2-1):nc/2), intent(in) :: theta_G
     complex(kind=DP), dimension(na,nb,nc), intent(out) :: theta_R
+
     integer :: ca,cb,cc,cix,ciy,ciz
     integer(kind=DP) :: plan
     complex(kind=DP),allocatable :: temp_R(:,:,:),temp_G(:,:,:)
     integer :: id_sname=-1
+
     call tstatc0_begin('GtoR ',id_sname,1)
+
     allocate(temp_R(na,nb,nc))
     allocate(temp_G(0:na-1,0:nb-1,0:nc-1))
+
     Do ca = -(na/2-1),na/2
-    Do cb = -(nb/2-1),nb/2
-    Do cc = -(nc/2-1),nc/2
-       cix = MOD(ca+na,na)
-       ciy = MOD(cb+nb,nb)
-       ciz = MOD(cc+nc,nc)
-       temp_G(cix,ciy,ciz) = theta_G(ca,cb,cc)
-    Enddo
-    Enddo
+       Do cb = -(nb/2-1),nb/2
+          Do cc = -(nc/2-1),nc/2
+             cix = MOD(ca+na,na)
+             ciy = MOD(cb+nb,nb)
+             ciz = MOD(cc+nc,nc)
+             temp_G(cix,ciy,ciz) = theta_G(ca,cb,cc)
+          Enddo
+       Enddo
     Enddo
 
-! FFTW3 !!!
+    ! FFTW3 !!!
     call dfftw_plan_dft_3d(plan,na,nb,nc,temp_G,temp_R,FFTW_BACKWARD,FFTW_ESTIMATE)
     call dfftw_execute(plan)
     call dfftw_destroy_plan(plan)
-!***** END of FFT ******************************************
+    !***** END of FFT ******************************************
 
-    theta_R = temp_R/dble(na*nb*nc)
-!    theta_R = temp_R
+    !    theta_R = temp_R/dble(na*nb*nc)
+    theta_R = temp_R
 
     deallocate(temp_R)
     deallocate(temp_G)
@@ -1041,7 +1461,7 @@ module m_vdWDF
   end subroutine GtoR
 
   subroutine get_phi_ab_g(nr12,phi_ab,phi_ab_g)
-!    include "fftw3.f"
+    !    include "fftw3.f"
     integer, intent(in) :: nr12
     real(kind=DP), dimension(0:nr12), intent(in)  :: phi_ab
     real(kind=DP), dimension(0:nr12), intent(out) :: phi_ab_g
@@ -1050,6 +1470,10 @@ module m_vdWDF
     integer :: cr,ck
     integer(kind=DP) :: plan
     real(kind=DP) :: dd,r12,term,rk
+    real(kind=DP) :: rr
+    complex(kind=DP) :: zsum
+    complex(kind=DP), parameter :: zi = ( 0.0d0, 1.0d0 )
+
     integer :: id_sname = -1
     call tstatc0_begin('get_phi_ab_g ',id_sname,1)
     allocate(cphiab_r(0:nr12));cphiab_r=0.d0
@@ -1058,20 +1482,27 @@ module m_vdWDF
     dd = r12max/dble(nr12)
 
     do cr=0,nr12
-       cphiab_r(cr) = dcmplx(phi_ab(cr)*dble(cr),0.d0)
+       rr = dble(cr) *dd
+       cphiab_r(cr) = dcmplx( phi_ab(cr)*rr, 0.d0 )
     enddo
+
+#if 0
     call dfftw_plan_dft_1d(plan,nr12+1,cphiab_r,cphiab_g,FFTW_BACKWARD,FFTW_ESTIMATE)
+#else
+    call dfftw_plan_dft_1d( plan, nr12, cphiab_r(0:nr12-1), cphiab_g(0:nr12-1), &
+         &                  FFTW_BACKWARD, FFTW_ESTIMATE )
+#endif
     call dfftw_execute(plan)
     call dfftw_destroy_plan(plan)
 
     do ck=1,nr12
        rk = dble(ck)/r12max
-       phi_ab_g(ck) = 2.d0*dd**2*dimag(cphiab_g(ck))/(dble(rk))
-    enddo 
+       phi_ab_g(ck) = 2.d0 *dd *dimag(cphiab_g(ck))/(dble(rk))
+    enddo
 
     Do ck = 0,0
        rk = dd * dble(ck)
-       term = 0
+       term = 0.0d0
        Do cr = 0,nr12
           r12 = dd*dble(cr)
           term = term + phi_ab(cr) * (r12**2)
@@ -1079,23 +1510,19 @@ module m_vdWDF
        phi_ab_g(ck) = 4.0d0*pi*dd * term
     End do
 
-    deallocate(cphiab_r)
-    deallocate(cphiab_g)
+    deallocate(cphiab_r);   deallocate(cphiab_g)
+
+    phi_ab_g = phi_ab_g / univol
 
-!    if(qa==1)then
-!       do ck=0,nr12
-!          rk = dble(ck)/r12max
-!          write(qa*35+qb,*) rk,phi_ab_g(ck)
-!       enddo
-!    endif
     call tstatc0_end(id_sname)
+
   end Subroutine get_phi_ab_g
 
   Subroutine convolution_3d_by_fft(na,nb,nc,cqa,cqb,nr12,phi_ab,theta_G_a,theta_G_b,fac,Ecnl_12_ab,tmpug)
 
     Implicit none
 
-!++++++++++++++++++++++++++ VARIABLES ++++++++++++++++++++++++++
+    !++++++++++++++++++++++++++ VARIABLES ++++++++++++++++++++++++++
     integer, intent(in) :: na,nb,nc,cqa,cqb,nr12
     complex(kind=DP), intent(in)  :: theta_G_a(-(na/2-1):na/2,-(nb/2-1):nb/2,-(nc/2-1):nc/2)
     complex(kind=DP), intent(in)  :: theta_G_b(-(na/2-1):na/2,-(nb/2-1):nb/2,-(nc/2-1):nc/2)
@@ -1113,64 +1540,79 @@ module m_vdWDF
 
     Real(kind=DP),allocatable :: core_G(:)
 
+    real*8 :: dd
+
     real(kind=DP) :: pi2rk
     integer :: id_sname = -1
-!+++++++++++++++++++++ End VARIABLES +++++++++++++++++++++++++++
+    !+++++++++++++++++++++ End VARIABLES +++++++++++++++++++++++++++
     call tstatc0_begin('convolution_3d_by_fft ',id_sname,1)
     allocate(core_G(0:nr12));core_G=0.d0
     nabc = na*nb*nc
 
-
-    Ta = na*DSQRT(aa(1,1)**2 + aa(1,2)**2 + aa(1,3)**2)
-    Tb = nb*DSQRT(aa(2,1)**2 + aa(2,2)**2 + aa(2,3)**2)
-    Tc = nc*DSQRT(aa(3,1)**2 + aa(3,2)**2 + aa(3,3)**2)
+    !      dd = r12max/dble(nr12)
 
     call get_phi_ab_g(nr12,phi_ab,core_G)
+
     dk = 1.d0/r12max
     temp_c = (0.0d0,0.0d0)
     !tmpug = 0.d0
-    Do ciz = -(nc/2-1),nc/2
-    Do ciy = -(nb/2-1),nb/2
-    Do cix = -(na/2-1),na/2
 
-       rk = DSQRT((dble(cix)/Ta)**2 + (dble(ciy)/Tb)**2 + (dble(ciz)/Tc)**2)
-       !If(rk.LT.maxk) Then
-          ck   = dint(rk/dk)
-          phix = (rk - dk*DBLE(ck))/dk
-          phiy = 1.0 - phix
-          term = phiy*core_G(ck) + phix*core_G(ck+1)
-       !Else
-       !   term = 0.0d0
-       !Endif
-
-       temp_c = temp_c +                        &
-&             DCONJG(theta_G_a(cix,ciy,ciz)) *  &
-&                    theta_G_b(cix,ciy,ciz)  *  &
-&                  term
-       if(.not.oneshot) tmpug(cix,ciy,ciz) = tmpug(cix,ciy,ciz)+theta_G_b(cix,ciy,ciz)*term*dv*nabc!*0.5d0*fac
-    Enddo
-    Enddo
-    Enddo
+    if(oneshot)then
+       Do ciz = -(nc/2-1),nc/2
+          Do ciy = -(nb/2-1),nb/2
+             Do cix = -(na/2-1),na/2
+                rk   = rkar(cix,ciy,ciz)
+                ck   = dint(rk/dk)
+
+                phix = (rk - dk*DBLE(ck))/dk
+                phiy = 1.0d0 -phix
+                term = phiy*core_G(ck) + phix*core_G(ck+1)
+
+                temp_c = temp_c + term *DCONJG(theta_G_a(cix,ciy,ciz))   &
+                     &           *             theta_G_b(cix,ciy,ciz)
+             Enddo
+          Enddo
+       Enddo
+    else
+       Do ciz = -(nc/2-1),nc/2
+          Do ciy = -(nb/2-1),nb/2
+             Do cix = -(na/2-1),na/2
+                rk   = rkar(cix,ciy,ciz)
+                ck   = dint(rk/dk)
+
+                phix = (rk - dk*DBLE(ck))/dk
+                phiy = 1.0d0 - phix
+                term = phiy*core_G(ck) + phix*core_G(ck+1)
+
+                temp_c = temp_c + term *DCONJG(theta_G_a(cix,ciy,ciz))   &
+                     &           *             theta_G_b(cix,ciy,ciz)
+
+                tmpug(cix,ciy,ciz) = tmpug(cix,ciy,ciz) &
+                     &              +theta_G_b(cix,ciy,ciz)*term *univol
+             Enddo
+          Enddo
+       Enddo
+    endif
 
-    temp_c = temp_c * dv*nabc
+    temp_c = temp_c * univol**2
     Ecnl_12_ab = 0.5d0*DBLE(temp_c)*fac
-    !tmpug(:,:,:) = tmpug(:,:,:)*dv*nabc*0.5d0*fac
 
-!***** End Calculate 'theta_G_a*core_G*theta_G_b'  ***********
+    !***** End Calculate 'theta_G_a*core_G*theta_G_b'  ***********
 
     deallocate(core_G)
 
     call tstatc0_end(id_sname)
+
   End subroutine convolution_3d_by_fft
-!** End SUBROUTINE convolution_3d *************************************************************
+  !** End SUBROUTINE convolution_3d *************************************************************
 
   Subroutine convolution_3d(na,nb,nc,cqa,cqb,nr12,phi_ab,theta_G_a,theta_G_b,Ecnl_12_ab)
 
     Implicit none
 
-!    include "fftw3.f"
+    !    include "fftw3.f"
 
-!++++++++++++++++++++++++++ VARIABLES ++++++++++++++++++++++++++
+    !++++++++++++++++++++++++++ VARIABLES ++++++++++++++++++++++++++
     integer, intent(in) :: na,nb,nc,cqa,cqb,nr12
     complex(kind=DP), intent(in)  :: theta_G_a(-(na/2-1):na/2,-(nb/2-1):nb/2,-(nc/2-1):nc/2)
     complex(kind=DP), intent(in)  :: theta_G_b(-(na/2-1):na/2,-(nb/2-1):nb/2,-(nc/2-1):nc/2)
@@ -1189,21 +1631,21 @@ module m_vdWDF
     Real(kind=DP),allocatable :: phiab_r(:)
 
     real(kind=DP) :: pi2rk
-!+++++++++++++++++++++ End VARIABLES +++++++++++++++++++++++++++
+    !+++++++++++++++++++++ End VARIABLES +++++++++++++++++++++++++++
 
     allocate(core_G(0:nk-1))
     allocate(phiab_r(0:nr12));phiab_r=0.d0
     nabc = na*nb*nc
 
-!    dv = aa(1,1)*(aa(2,2)*aa(3,3)-aa(2,3)*aa(3,2)) &
-!&      + aa(1,2)*(aa(2,3)*aa(3,1)-aa(2,1)*aa(3,3)) &
-!&      + aa(1,3)*(aa(2,1)*aa(3,2)-aa(2,2)*aa(3,1))
-
-    Ta = na*DSQRT(aa(1,1)**2 + aa(1,2)**2 + aa(1,3)**2)
-    Tb = nb*DSQRT(aa(2,1)**2 + aa(2,2)**2 + aa(2,3)**2)
-    Tc = nc*DSQRT(aa(3,1)**2 + aa(3,2)**2 + aa(3,3)**2)
+    !    dv = aa(1,1)*(aa(2,2)*aa(3,3)-aa(2,3)*aa(3,2)) &
+    !&      + aa(1,2)*(aa(2,3)*aa(3,1)-aa(2,1)*aa(3,3)) &
+    !&      + aa(1,3)*(aa(2,1)*aa(3,2)-aa(2,2)*aa(3,1))
+
+    !    Ta = na*DSQRT(aa(1,1)**2 + aa(1,2)**2 + aa(1,3)**2)
+    !    Tb = nb*DSQRT(aa(2,1)**2 + aa(2,2)**2 + aa(2,3)**2)
+    !    Tc = nc*DSQRT(aa(3,1)**2 + aa(3,2)**2 + aa(3,3)**2)
 
-!***** Make the core function and execute 3d-FFT by hand *****
+    !***** Make the core function and execute 3d-FFT by hand *****
     dr12 = r12max/dble(nr12)
     dk = maxk/dble(nk-1)
 
@@ -1232,71 +1674,72 @@ module m_vdWDF
        Enddo
        core_G(ck) = 2.d0 * dr12 * term / rk
     End do
-!***** END of Make the core function *************************
+    !***** END of Make the core function *************************
 
-!    if(cqa==1)then
-!       do ck=0,nr12
-!          rk = dk * dble(ck)
-!          write((cqa+1)*35+cqb,*) rk,core_G(ck)
-!       enddo
-!    endif
-!***** Calculate 'theta_G_a*core_G*theta_G_b'  ***************
+    !    if(cqa==1)then
+    !       do ck=0,nr12
+    !          rk = dk * dble(ck)
+    !          write((cqa+1)*35+cqb,*) rk,core_G(ck)
+    !       enddo
+    !    endif
+    !***** Calculate 'theta_G_a*core_G*theta_G_b'  ***************
     dk = maxk/dble(nk-1)
     temp_c = (0.0,0.0)
     Do cix = -(na/2-1),na/2
-    Do ciy = -(nb/2-1),nb/2
-    Do ciz = -(nc/2-1),nc/2
+       Do ciy = -(nb/2-1),nb/2
+          Do ciz = -(nc/2-1),nc/2
 
-       rk = DSQRT((dble(cix)/Ta)**2 + (dble(ciy)/Tb)**2 + (dble(ciz)/Tc)**2)
-       If(rk.LT.maxk) Then
-          ck = DINT(rk/dk)
-          phix = (rk - dk*DBLE(ck))/dk
-          phiy = 1.0 - phix
-          term = phiy*core_G(ck) + phix*core_G(ck+1)
-       Else
-          term = 0.0d0
-       Endif
-
-       temp_c = temp_c +                        &
-&             DCONJG(theta_G_a(cix,ciy,ciz)) *  &
-&                    theta_G_b(cix,ciy,ciz)  *  &
-&                  term
-    Enddo
-    Enddo
+             !rk = DSQRT((dble(cix)/Ta)**2 + (dble(ciy)/Tb)**2 + (dble(ciz)/Tc)**2)
+             rk = rkar(cix,ciy,ciz)
+             If(rk.LT.maxk) Then
+                ck = DINT(rk/dk)
+                phix = (rk - dk*DBLE(ck))/dk
+                phiy = 1.0 - phix
+                term = phiy*core_G(ck) + phix*core_G(ck+1)
+             Else
+                term = 0.0d0
+             Endif
+
+             temp_c = temp_c +                        &
+                  &             DCONJG(theta_G_a(cix,ciy,ciz)) *  &
+                  &                    theta_G_b(cix,ciy,ciz)  *  &
+                  &                  term
+          Enddo
+       Enddo
     Enddo
 
     temp_c = temp_c * dv*nabc
     Ecnl_12_ab = 0.5d0*DBLE(temp_c)
-!***** End Calculate 'theta_G_a*core_G*theta_G_b'  ***********
+    !***** End Calculate 'theta_G_a*core_G*theta_G_b'  ***********
 
     deallocate(core_G)
     deallocate(phiab_r)
 
 
   End subroutine convolution_3d
-!** End SUBROUTINE convolution_3d *************************************************************
+  !** End SUBROUTINE convolution_3d *************************************************************
 
   Subroutine d_q0(n,dn,q0min,q0max,q0,dqdn,dqddn)
     Implicit None
 
 
-!************************ Note *********************************
-! This program calculates q0.
-!
-! Input
-!   rho(nrxyz,nsipn) : Total density
-!
-! Output
-!   q0               : 
-!
-!
-!
-!                            Written by Youky Ono
-!***************************************************************
+    !************************ Note *********************************
+    ! This program calculates q0.
+    !
+    ! Input
+    !   rho(nrxyz,nsipn) : Total density
+    !
+    ! Output
+    !   q0               : 
+    !
+    !
+    !
+    !                            Written by Youky Ono
+    !***************************************************************
 
 
 
-!++++++++++++++++++++++++++ VARIABLES ++++++++++++++++++++++++++
+    !++++++++++++++++++++++++++ VARIABLES ++++++++++++++++++++++++++
     real(kind=DP), intent(in)  :: n,dn,q0min,q0max
     real(kind=DP), intent(out) :: q0,dqdn,dqddn
     Double Precision rs,x,nn2,r,term1,term2,term3,rnn2
@@ -1307,17 +1750,17 @@ module m_vdWDF
     integer :: i
     real(kind=DP) :: s2,s,qq
     real(kind=DP) :: drsdn,dxdn,dexcdn,dsdn,dkFdn
-!++++++++++++++++++++++++ end VARIABLES ++++++++++++++++++++++++
+    !++++++++++++++++++++++++ end VARIABLES ++++++++++++++++++++++++
 
 
 
-!---------------------- Calculation Start ----------------------
+    !---------------------- Calculation Start ----------------------
     nn2 = dn*dn
     rnn2 = dsqrt(nn2)
     rs = (3.d0/(4.d0*pi*n))**(1.d0/3.d0)
 
-! Eq.(58), (59) (p.93-94) of Theory of the Inhomogeneous Electron gas.
-! S.Lundqvist and N.H.March 1983 Plenum Press, NY
+    ! Eq.(58), (59) (p.93-94) of Theory of the Inhomogeneous Electron gas.
+    ! S.Lundqvist and N.H.March 1983 Plenum Press, NY
     x = rs/11.4d0
     GxcLDA = 0.5d0*((1.d0+x**3)*DLog(1.d0+1.d0/x)-x**2+x/2.d0-1.d0/3.d0)
     excLDA = -0.458d0/rs-0.0666d0*GxcLDA
@@ -1329,22 +1772,37 @@ module m_vdWDF
 
     call hxxc(qq,q0max,q0,dq)
     q0 = max(q0,q0min)
-    
+
     if(q0.eq.q0min)then
-      dqdn = 0.d0;dqddn=0.d0
+       dqdn = 0.d0;dqddn=0.d0
     else
-      drsdn  = -(3.d0/(4.d0*PAI))**(1.d0/3.d0)/3.d0*n**(-4.d0/3.d0)
-      dxdn   = drsdn/11.4d0
-      dexcdn = (0.458d0/(rs*rs))*drsdn-0.0333d0*(3*x*x*dlog(1+1.d0/x)-(1+x**3)/(x*(x+1))-2*x+0.5d0)*dxdn
-      dkFdn  = (pi/kF)**2
-      dsdn   = -(rnn2/(4*kF*n*kF*n))*(dkFdn*n+kF)
-      dqdn   = -(4.d0*pi/3.d0)*dexcdn-(Zab/9.d0)*(2*s*dsdn*kF+s2*dkFdn)
-      dqddn  = -(Zab/9.d0)*s/n
-      dqdn   = dqdn*dq
-      dqddn  = dqddn*dq
+#if 0
+       drsdn  = -(3.d0/(4.d0*PAI))**(1.d0/3.d0)/3.d0*n**(-4.d0/3.d0)
+       dxdn   = drsdn/11.4d0
+       dexcdn = (0.458d0/(rs*rs))*drsdn &
+            &   -0.0333d0*(3*x*x*dlog(1+1.d0/x)-(1+x**3)/(x*(x+1))-2*x+0.5d0)*dxdn
+       dkFdn  = (pi/kF)**2
+!!!       dsdn   = -(rnn2/(4*kF*n*kF*n))*(dkFdn*n+kF)
+       dsdn   = -(rnn2/(2*kF*n*kF*n))*(dkFdn*n+kF)
+       dqdn   = -(4.d0*pi/3.d0)*dexcdn-(Zab/9.d0)*(2*s*dsdn*kF+s2*dkFdn)
+       dqddn  = -(Zab/9.d0)*s/n
+
+#else
+       drsdn  = -rs /3.0d0 /n
+       dxdn   = drsdn/11.4d0
+       dexcdn = (0.458d0/(rs*rs))*drsdn &
+            &   -0.0333d0*(3*x*x*dlog(1+1.d0/x)-(1+x**3)/(x*(x+1))-2*x+0.5d0)*dxdn
+       dkFdn  = kF /3.0d0 /n
+       dsdn   = -s /(kF*n) *(dkFdn*n+kF)
+       dqdn   = -(4.d0*pi/3.d0)*dexcdn-(Zab/9.d0)*(2*s*dsdn*kF+s2*dkFdn)
+!       dqddn  = -(Zab/9.d0)*s2 *2.0 /dn *KF
+       dqddn  = -(Zab/9.d0)*s/n
+#endif
+       dqdn   = dqdn*dq
+       dqddn  = dqddn*dq
     endif
   End Subroutine d_q0
-!** End SUBROUTINE d_q0 ***********************************************************************
+  !** End SUBROUTINE d_q0 ***********************************************************************
 
   subroutine hxxc(x,xc,hx,dhx)
     real(kind=DP), intent(in)  :: x,xc
@@ -1362,62 +1820,59 @@ module m_vdWDF
     dhx = xc*dexp(-summ)*dsumm
   end subroutine hxxc
 
-!** SUBROUTINE derivation *********************************************************************
+  !** SUBROUTINE derivation *********************************************************************
   Subroutine derivation(na,nb,nc,aa,rho,dv,grad)
     Implicit none
 
-!++++++++++++++++++++++++++ VARIABLES ++++++++++++++++++++++++++
+    !++++++++++++++++++++++++++ VARIABLES ++++++++++++++++++++++++++
 
-! The unit cell and the electron density information
+    ! The unit cell and the electron density information
     integer,intent(in) :: na,nb,nc
     real(kind=DP), intent(in) :: rho(na,nb,nc)
     Real(kind=DP), intent(in) :: aa(3,3)
     real(kind=DP), intent(in) :: dv
     real(kind=DP), intent(out) :: grad(na,nb,nc)
 
-! Integers
+    ! Integers
     Integer i,j,k,cx,cy,cz,nabc
 
-! Internal valuables
+    ! Internal valuables
     Integer zx(-3:3),zy(-3:3),zz(-3:3)
     Real(kind=DP) rn(3,-3:3),detr,bb(3,3)
     Real(kind=DP),allocatable ::  darho(:,:,:),dbrho(:,:,:),dcrho(:,:,:)
     real(kind=DP) :: dx,dy,dz
-!+++++++++++++++++++++ End VARIABLES +++++++++++++++++++++++++++
+    !+++++++++++++++++++++ End VARIABLES +++++++++++++++++++++++++++
 
     allocate(darho(na,nb,nc));darho=0.d0
     allocate(dbrho(na,nb,nc));dbrho=0.d0
     allocate(dcrho(na,nb,nc));dcrho=0.d0
-!    dv = aa(1,1)*(aa(2,2)*aa(3,3)-aa(2,3)*aa(3,2)) &
-!&      + aa(1,2)*(aa(2,3)*aa(3,1)-aa(2,1)*aa(3,3)) &
-!&      + aa(1,3)*(aa(2,1)*aa(3,2)-aa(2,2)*aa(3,1))
+    !    dv = aa(1,1)*(aa(2,2)*aa(3,3)-aa(2,3)*aa(3,2)) &
+    !&      + aa(1,2)*(aa(2,3)*aa(3,1)-aa(2,1)*aa(3,3)) &
+    !&      + aa(1,3)*(aa(2,1)*aa(3,2)-aa(2,2)*aa(3,1))
 
     Do cx = 1,na
-    Do cy = 1,nb
-    Do cz = 1,nc
+       Do cy = 1,nb
+          Do cz = 1,nc
 
-    Do j = -3,3
-       zx(j) = MOD(2*na+(cx+j)-1,na)+1
-       zy(j) = MOD(2*nb+(cy+j)-1,nb)+1
-       zz(j) = MOD(2*nc+(cz+j)-1,nc)+1
-
-       rn(1,j) = rho(zx(j),cy,cz)
-       rn(2,j) = rho(cx,zy(j),cz)
-       rn(3,j) = rho(cx,cy,zz(j))
-    Enddo
-
-    darho(cx,cy,cz) = &
-&      (rn(1,3)-9.d0*rn(1,2)+45.d0*rn(1,1)-45.d0*rn(1,-1)+9.d0*rn(1,-2)-rn(1,-3))/(60.d0)
-    dbrho(cx,cy,cz) = &
-&      (rn(2,3)-9.d0*rn(2,2)+45.d0*rn(2,1)-45.d0*rn(2,-1)+9.d0*rn(2,-2)-rn(2,-3))/(60.d0)
-    dcrho(cx,cy,cz) = &
-&      (rn(3,3)-9.d0*rn(3,2)+45.d0*rn(3,1)-45.d0*rn(3,-1)+9.d0*rn(3,-2)-rn(3,-3))/(60.d0)
-    Enddo
-    Enddo
+             Do j = -3,3
+                zx(j) = MOD(2*na+(cx+j)-1,na)+1
+                zy(j) = MOD(2*nb+(cy+j)-1,nb)+1
+                zz(j) = MOD(2*nc+(cz+j)-1,nc)+1
+
+                rn(1,j) = rho(zx(j),cy,cz)
+                rn(2,j) = rho(cx,zy(j),cz)
+                rn(3,j) = rho(cx,cy,zz(j))
+             Enddo
+
+             darho(cx,cy,cz) = (rn(1,3)-9.d0*rn(1,2)+45.d0*rn(1,1)-45.d0*rn(1,-1)+9.d0*rn(1,-2)-rn(1,-3))/(60.d0)
+             dbrho(cx,cy,cz) = (rn(2,3)-9.d0*rn(2,2)+45.d0*rn(2,1)-45.d0*rn(2,-1)+9.d0*rn(2,-2)-rn(2,-3))/(60.d0)
+             dcrho(cx,cy,cz) = (rn(3,3)-9.d0*rn(3,2)+45.d0*rn(3,1)-45.d0*rn(3,-1)+9.d0*rn(3,-2)-rn(3,-3))/(60.d0)
+          Enddo
+       Enddo
     Enddo
 
     detr = (aa(1,1)*aa(2,2)*aa(3,3)+aa(1,2)*aa(2,3)*aa(3,1)+aa(1,3)*aa(2,1)*aa(3,2)) &
-&        - (aa(1,1)*aa(2,3)*aa(3,2)+aa(1,2)*aa(2,1)*aa(3,3)+aa(1,3)*aa(2,2)*aa(3,1))
+         &- (aa(1,1)*aa(2,3)*aa(3,2)+aa(1,2)*aa(2,1)*aa(3,3)+aa(1,3)*aa(2,2)*aa(3,1))
 
     bb(1,1) =  (aa(2,2)*aa(3,3)-aa(2,3)*aa(3,2))/detr
     bb(2,1) = -(aa(2,1)*aa(3,3)-aa(2,3)*aa(3,1))/detr
@@ -1430,15 +1885,14 @@ module m_vdWDF
     bb(3,3) =  (aa(1,1)*aa(2,2)-aa(1,2)*aa(2,1))/detr
 
     Do cx = 1,na
-    Do cy = 1,nb
-    Do cz = 1,nc
-
-    dx = (bb(1,1)*darho(cx,cy,cz) + bb(1,2)*dbrho(cx,cy,cz) + bb(1,3)*dcrho(cx,cy,cz))
-    dy = (bb(2,1)*darho(cx,cy,cz) + bb(2,2)*dbrho(cx,cy,cz) + bb(2,3)*dcrho(cx,cy,cz))
-    dz = (bb(3,1)*darho(cx,cy,cz) + bb(3,2)*dbrho(cx,cy,cz) + bb(3,3)*dcrho(cx,cy,cz))
-    grad(cx,cy,cz) = dsqrt(dx**2+dy**2+dz**2)
-    Enddo
-    Enddo
+       Do cy = 1,nb
+          Do cz = 1,nc
+             dx = (bb(1,1)*darho(cx,cy,cz) + bb(1,2)*dbrho(cx,cy,cz) + bb(1,3)*dcrho(cx,cy,cz))
+             dy = (bb(2,1)*darho(cx,cy,cz) + bb(2,2)*dbrho(cx,cy,cz) + bb(2,3)*dcrho(cx,cy,cz))
+             dz = (bb(3,1)*darho(cx,cy,cz) + bb(3,2)*dbrho(cx,cy,cz) + bb(3,3)*dcrho(cx,cy,cz))
+             grad(cx,cy,cz) = dsqrt(dx**2+dy**2+dz**2)
+          Enddo
+       Enddo
     Enddo
 
     deallocate(darho)
@@ -1446,7 +1900,7 @@ module m_vdWDF
     deallocate(dcrho)
 
   End subroutine derivation
-!** End SUBROUTINE derivation *****************************************************************
+  !** End SUBROUTINE derivation *****************************************************************
 
   subroutine spline0(nq0,x,y2)
     integer, intent(in) :: nq0
@@ -1483,8 +1937,8 @@ module m_vdWDF
     real(kind=DP), intent(in) :: a,b
     real(kind=DP) :: res
     res = 2.d0*((3.d0-a*a)*b*DCOS(b)*DSIN(a) + (3.d0-b*b)*a*DCOS(a)*DSIN(b) &
-&       + (a*a+b*b-3.d0)*DSIN(a)*DSIN(b) &
-&       - 3.d0*a*b*DCOS(a)*DCOS(b))/((a*b)**3)
+         &       + (a*a+b*b-3.d0)*DSIN(a)*DSIN(b) &
+         &       - 3.d0*a*b*DCOS(a)*DCOS(b))/((a*b)**3)
   end function Wab
 
   function Twxyz(w,x,y,z) result (res)
@@ -1494,30 +1948,30 @@ module m_vdWDF
     res = 0.5d0*(1.d0/(w+x)+1.d0/(y+z))*(1.d0/((w+y)*(x+z))+1.d0/((w+z)*(y+x)))
   end function Twxyz
 
-!** SUBROUTINE kernel_phi *********************************************************************
+  !** SUBROUTINE kernel_phi *********************************************************************
   Subroutine kernel_phi(di,dk,phi)
     implicit none
 
-!************************ Note *********************************
-! This Algorism follows Dion's 1-shot method.
-!
-! This program is a subroutine.
-! This program calculates the kernel function phi.
-!
-! Input
-!
-!
-! Output
-!   phi              : 
-!                      
-!
-!
-!                            Written by Youky Ono in 2013/Jan.
-!***************************************************************
+    !************************ Note *********************************
+    ! This Algorism follows Dion's 1-shot method.
+    !
+    ! This program is a subroutine.
+    ! This program calculates the kernel function phi.
+    !
+    ! Input
+    !
+    !
+    ! Output
+    !   phi              : 
+    !                      
+    !
+    !
+    !                            Written by Youky Ono in 2013/Jan.
+    !***************************************************************
 
 
 
-!++++++++++++++++++++++++++ VARIABLES ++++++++++++++++++++++++++
+    !++++++++++++++++++++++++++ VARIABLES ++++++++++++++++++++++++++
     real(kind=DP), intent(in)  :: di,dk
     real(kind=DP), intent(out) :: phi
     Integer ca,cb,nb
@@ -1527,75 +1981,75 @@ module m_vdWDF
     real(kind=DP) :: dr
 
 
-! Gauss-Legendre integration
+    ! Gauss-Legendre integration
     Real(kind=DP),allocatable, dimension(:) :: xi,wi
     Real(kind=DP) :: fac
-!++++++++++++++++++++++++ end VARIABLES ++++++++++++++++++++++++
+    !++++++++++++++++++++++++ end VARIABLES ++++++++++++++++++++++++
 
-!---------------------- Calculation Start ----------------------
-! Call gauleg for Gauss-Legendre integral
-   allocate(xi(na_gl));xi=0.d0
-   allocate(wi(na_gl));wi=0.d0
-   Call gauleg(a1,a2,na_gl,xi,wi)
-
-   phi = 0.d0
-   Do ca=1,na_gl
-   Do cb=1,ca
-      fac=2.d0
-      if(ca.eq.cb) fac=1.d0
-      a = xi(ca)
-      b = xi(cb)
-
-      v1 = (a**2)/(2*hofy(a,di))
-      v2 = (b**2)/(2*hofy(b,di))
-      v3 = (a**2)/(2*hofy(a,dk))
-      v4 = (b**2)/(2*hofy(b,dk))
-      phi = phi + fac*wi(ca)*wi(cb)*(a*a*b*b)*Wab(a,b)*Twxyz(v1,v2,v3,v4)
-   End Do
-   End Do
-
-   phi = phi * 2*m*(e**4)/(pi**2)
-   deallocate(xi)
-   deallocate(wi)
+    !---------------------- Calculation Start ----------------------
+    ! Call gauleg for Gauss-Legendre integral
+    allocate(xi(na_gl));xi=0.d0
+    allocate(wi(na_gl));wi=0.d0
+    Call gauleg(a1,a2,na_gl,xi,wi)
+
+    phi = 0.d0
+    Do ca=1,na_gl
+       Do cb=1,ca
+          fac=2.d0
+          if(ca.eq.cb) fac=1.d0
+          a = xi(ca)
+          b = xi(cb)
+
+          v1 = (a**2)/(2.d0*hofy(a,di))
+          v2 = (b**2)/(2.d0*hofy(b,di))
+          v3 = (a**2)/(2.d0*hofy(a,dk))
+          v4 = (b**2)/(2.d0*hofy(b,dk))
+          phi = phi + fac*wi(ca)*wi(cb)*(a*a*b*b)*Wab(a,b)*Twxyz(v1,v2,v3,v4)
+       End Do
+    End Do
+
+    phi = phi * 2.d0 *m *(e**4)/(pi**2)
+    deallocate(xi)
+    deallocate(wi)
   end Subroutine kernel_phi
-!** End SUBROUTINE kernel_phi *****************************************************************
+  !** End SUBROUTINE kernel_phi *****************************************************************
 
-!** SUBROUTINE gauleg **********************************************************************************
+  !** SUBROUTINE gauleg **********************************************************************************
   Subroutine gauleg(x1,x2,n,xi,wi)
     Implicit none
     real(kind=DP), intent(in) :: x1,x2
     integer, intent(in) :: n
     real(kind=DP), intent(out)  ::  xi(n),wi(n)
 
-!++++++++++++++++++++++++++ VARIABLES ++++++++++++++++++++++++++
+    !++++++++++++++++++++++++++ VARIABLES ++++++++++++++++++++++++++
     Integer  m,j,i
     REAL(kind=DP)  z1,z,xm,xl,pp,p3,p2,p1,eta
     Parameter (eta=0.0000000001d0)
 
-!++++++++++++++++++++++++ end VARIABLES ++++++++++++++++++++++++
+    !++++++++++++++++++++++++ end VARIABLES ++++++++++++++++++++++++
 
 
 
-!---------------------- Calculation Start ----------------------
+    !---------------------- Calculation Start ----------------------
     m=(n+1)/2
     xm=0.5d0*(x2+x1)
     xl=0.5d0*(x2-x1)
 
-! === DEBUG by tkato 2014/04/22 ================================================
+    ! === DEBUG by tkato 2014/04/22 ================================================
     z1 = 0.0d0
-! ==============================================================================
+    ! ==============================================================================
     Do i=1,m
        z=DCOS(pi*(i-0.25d0)/(n+0.5d0))
        Do While (ABS(z-z1).GT.eta)
           p1=1.0d0
           p2=0.0d0
- 
+
           Do j=1,n
              p3=p2
              P2=p1
              p1=((2.0d0*j-1.d0)*z*p2-(j-1.d0)*p3)/dble(j)
           Enddo
- 
+
           pp=n*(z*p1-p2)/(z*z-1.d0)
           z1=z
           z=z1-p1/pp
@@ -1607,50 +2061,52 @@ module m_vdWDF
        wi(n+1-i) = wi(i)
     Enddo
   End Subroutine gauleg
-!** End SUBROUTINE gauleg ******************************************************************************
+  !** End SUBROUTINE gauleg ******************************************************************************
 
-!** SUBROUTINE outputs ************************************************************************
+  !** SUBROUTINE outputs ************************************************************************
   Subroutine outputs()
     Implicit none
 
     Ecnl = Ecnl_12 + Ecnl_3 - Ecnl_3s
 
     if(printable)then
-    write(nfout,'(a)')  'Here are the results : '
-    Write(nfout,*)  ' '
-    Write(nfout,11) ExGGA
-    Write(nfout,*)  ' '
-    Write(nfout,12) EcLDA
-    Write(nfout,13) Ecnl
-    Write(nfout,14) EcLDA + Ecnl
-    Write(nfout,*)  ' '
-    Write(nfout,15) EcLDA + Ecnl + ExGGA
-    Write(nfout,*)  ' '
-11  Format('E_total(GGA exchange)      = ',F19.13)
-
-12  Format('Ec(LDA)                    = ',F19.13)
-13  Format('Ec(nl)                     = ',F19.13)
-14  Format('Ec (= Ec(LDA) + Ec(nl) )   = ',F19.13)
+       write(nfout,'(a)')  'Here are the results : '
+       Write(nfout,*)  ' '
+       Write(nfout,11) ExGGA
+       Write(nfout,*)  ' '
+       Write(nfout,12) EcLDA
+       Write(nfout,13) Ecnl
+       Write(nfout,14) EcLDA + Ecnl
+       Write(nfout,*)  ' '
+       Write(nfout,15) EcLDA + Ecnl + ExGGA
+       Write(nfout,*)  ' '
+11     Format('E_total(GGA exchange)      = ',F19.13)
+
+12     Format('Ec(LDA)                    = ',F19.13)
+13     Format('Ec(nl)                     = ',F19.13)
+14     Format('Ec (= Ec(LDA) + Ec(nl) )   = ',F19.13)
 
-15  Format('E_total(vdW-DF)            = ',F19.13)
+15     Format('E_total(vdW-DF)            = ',F19.13)
 
-    Write(nfout,*)  '                  Given in Hartree atomic units'
-    Write(nfout,*)  ' '
+       Write(nfout,*)  '                  Given in Hartree atomic units'
+       Write(nfout,*)  ' '
     endif
 
   End Subroutine outputs
-!** End SUBROUTINE outputs ********************************************************************
+  !** End SUBROUTINE outputs ********************************************************************
 
-  subroutine get_dFdrho_dFddrho(na,nb,nc,dFdrho,dFddrho)
+  subroutine get_dFdrho_dFddrho(na,nb,nc,dFdrho_,dFddrho_)
     integer, intent(in) :: na,nb,nc
-    real(kind=DP), dimension(na,nb,nc), intent(out) :: dFdrho,dFddrho
+    real(kind=DP), dimension(na,nb,nc), intent(out) :: dFdrho_,dFddrho_
     integer :: i,i1,i2,i3
-    dFdrho = 0.0d0;dFddrho = 0.0d0
-    do i=1,nq0
-       dFdrho (:,:,:) = dFdrho (:,:,:) + dble(ualpha_r(i,:,:,:))*dtheta_R (i,:,:,:)/(univol*rinplw)
-       dFddrho(:,:,:) = dFddrho(:,:,:) + dble(ualpha_r(i,:,:,:))*ddtheta_R(i,:,:,:)/(univol*rinplw)
-    enddo
- 
+    dFdrho_ = dFdrho
+    dFddrho_ = dFddrho
+    !    dFdrho = 0.0d0;dFddrho = 0.0d0 ! this can be calculated on the fly!!
+    !    do i=1,nq0
+    !       dFdrho (:,:,:) = dFdrho (:,:,:) + dble(ualpha_r(i,:,:,:))*dtheta_R (i,:,:,:)/(univol*rinplw)
+    !       dFddrho(:,:,:) = dFddrho(:,:,:) + dble(ualpha_r(i,:,:,:))*ddtheta_R(i,:,:,:)/(univol*rinplw)
+    !    enddo
+
   end subroutine get_dFdrho_dFddrho
 
 
diff -uprN phase0_2015.01/src_phase_3d/mdmain.F90 phase0_2015.01.01/src_phase_3d/mdmain.F90
--- phase0_2015.01/src_phase_3d/mdmain.F90	2015-08-05 14:58:03.000000000 +0900
+++ phase0_2015.01.01/src_phase_3d/mdmain.F90	2016-07-12 12:51:53.000000000 +0900
@@ -1,4 +1,4 @@
-!#define POST3D
+#define POST3D
 !=======================================================================
 !
 !  PROGRAM  PHASE/0 2015.01 ($Rev: 440 $)
diff -uprN phase0_2015.01/src_phase_3d/mdmain0.F90 phase0_2015.01.01/src_phase_3d/mdmain0.F90
--- phase0_2015.01/src_phase_3d/mdmain0.F90	2015-08-05 14:58:03.000000000 +0900
+++ phase0_2015.01.01/src_phase_3d/mdmain0.F90	2016-07-12 12:51:53.000000000 +0900
@@ -1,6 +1,6 @@
 !=======================================================================
 !
-!  PROGRAM  PHASE/0 2015.01 ($Rev: 443 $)
+!  PROGRAM  PHASE/0 2015.01 ($Rev: 482 $)
 !
 !  MAIN PROGRAM: PHASE
 !
@@ -32,7 +32,7 @@
 !  Consortium.
 !   The activity of development of this program set has been supervised by Takahisa Ohno.
 !
-!  $Id: mdmain0.F90 443 2015-08-03 13:03:48Z ktagami $
+!  $Id: mdmain0.F90 482 2016-04-08 08:40:45Z jkoga $
 !
 program PHASE
 #ifdef NEC_TIMER
@@ -48,7 +48,7 @@ program PHASE
   implicit none
   logical  :: ChargeDensity_is_Converged, TotalEnergy_is_Divergent
   logical  :: Already_Converged, Already_Converged2
-  logical  :: Positron_bulk, Positron_defect
+  logical  :: Positron_scf, Positron_nonscf
   logical  :: Hubbard_model
   logical  :: Forces_are_Converged, Ending_Time, Force_errors_are_tolerable,UnitCell_Converged
 !!$  logical  :: ChargeDensity_is_Fixed
@@ -110,6 +110,8 @@ program PHASE
 #endif
 
   call Ewald_and_Structure_Factor
+  if ( Positron_scf() ) call Initial_pWaveFunctions()
+
   call Initial_Electronic_Structure
 
   if(ChargeDensity_is_Fixed() .and. One_by_one_in_each_rank_k()) then ! icond=2, 3
@@ -142,6 +144,8 @@ program PHASE
 ! ============================================================ 5.0
 
                  call Renewal_of_WaveFunctions
+                 if ( Positron_scf() ) call Renewal_of_pWaveFunctions                 
+
                  call ChargeDensity_Construction(0)
                  call Potential_Construction
 
@@ -158,7 +162,8 @@ program PHASE
                  if ( PotentialMix() ) then
                  else
                     call Renewal_of_Potential
-                    if(Hubbard_model()) then
+                    if ( Positron_scf() ) call Renewal_of_pPotential
+                    if (Hubbard_model() ) then
                        call Renewal_of_Hubbard_Parameters
                        call Renewal_of_Hubbard_Potential
                     end if
@@ -176,6 +181,10 @@ program PHASE
                  call Move_Ions
                  call MDIterationNumber_Setting
                  call Ewald_and_Structure_Factor
+                 if ( Hubbard_model() ) then
+                    call Renewal_of_Hubbard_Parameters
+                    call Renewal_of_Hubbard_Potential
+                 end if
 !!$                 call MDIterationNumber_Setting
               end if
               if(BreakMD(force_conv))then
@@ -194,16 +203,21 @@ program PHASE
 #endif
      end if
 
-     if(Already_Converged2() .and. .not.Positron_defect() .and. Positron_bulk()) then
+     if ( Already_Converged2() ) then
+        if ( Positron_nonscf() ) then
 #ifdef NEC_ITER_REG
-        call FTRACE_REGION_BEGIN("POSITRON")
+           call FTRACE_REGION_BEGIN("POSITRON")
 #endif
-        call Renewal_of_pPotential()
-        call Solve_pWaveFunctions()
+           call Initial_pWaveFunctions()
+           call Renewal_of_pPotential()
+           call Solve_pWaveFunctions()
 #ifdef NEC_ITER_REG
-        call FTRACE_REGION_END("POSITRON")
+           call FTRACE_REGION_END("POSITRON")
 #endif
-     end if
+        else if ( Positron_scf() ) then
+           call Write_Positron_LifeTime
+        end if
+     endif
 
 #ifdef NEC_ITER_REG
      call FTRACE_REGION_BEGIN("FINAL")
@@ -470,4 +484,11 @@ contains
 #endif
      call m_CtrlP_set_init_status(.true.)
    end subroutine Array_Deallocate
+
+   subroutine Write_Positron_LifeTime
+     use m_Positron_Wave_Functions, only : m_pWF_wlifetime
+
+     call m_pWF_wlifetime()
+   end subroutine Write_Positron_LifeTime
+
 end program PHASE
diff -uprN phase0_2015.01/src_phase_3d/mpi_dummy.F90 phase0_2015.01.01/src_phase_3d/mpi_dummy.F90
--- phase0_2015.01/src_phase_3d/mpi_dummy.F90	2015-08-05 14:58:03.000000000 +0900
+++ phase0_2015.01.01/src_phase_3d/mpi_dummy.F90	2016-07-12 12:51:53.000000000 +0900
@@ -169,6 +169,12 @@ subroutine mpi_copy( send_buf, recv_buf,
 	end if
 end
 
+subroutine mpi_sendrecv(sendbuf,sendcount,sendtype,dest,sendtag,recvbuf,recvcount,recvtype,source,recvtag,comm,status,ierr)
+        integer :: sendcount,sendtype,dest,sendtag,recvcount,recvtype,source,recvtag,comm,status,ierr
+        logical(4) :: sendbuf(sendcount*2),recvbuf(recvcount*2)
+        call mpi_copy( sendbuf, recvbuf, sendcount, sendtype, ierr )
+end
+
 #ifndef _NO_ARG_DUMMY_
 function iargc()
 	iargc = 0
diff -uprN phase0_2015.01/src_phase_3d/vdW.F90 phase0_2015.01.01/src_phase_3d/vdW.F90
--- phase0_2015.01/src_phase_3d/vdW.F90	2015-08-05 14:58:03.000000000 +0900
+++ phase0_2015.01.01/src_phase_3d/vdW.F90	2016-07-12 12:51:53.000000000 +0900
@@ -48,7 +48,7 @@ subroutine vdW_scf(nspin,ispin,na,nb,nc,
   call build_theta()
   call vdWdf_core()
   call corrections()
-  ecnl = (Ecnl_12 + Ecnl_3 - Ecnl_3s)/(univol*rinplw)
+  ecnl = Ecnl_12 + Ecnl_3 - Ecnl_3s
   call get_dFdrho_dFddrho(na,nb,nc,dFdrho,dFddrho)
   call finalize_vdwdf()
   call tstatc0_end(id_sname)
diff -uprN phase0_2015.01/src_phase_3d/version.h phase0_2015.01.01/src_phase_3d/version.h
--- phase0_2015.01/src_phase_3d/version.h	2015-09-15 12:16:46.000000000 +0900
+++ phase0_2015.01.01/src_phase_3d/version.h	2016-07-12 12:51:53.000000000 +0900
@@ -1 +1 @@
-integer, parameter :: svn_revision = 460
+integer, parameter :: svn_revision = 511