!#define POST3D
!=======================================================================
!
!  PROGRAM  PHASE/0 2014.01 (rev.375)
!
!  "First-principles Electronic Structure Calculation Program"
!
!  MAIN PROGRAM: PHASE
!
!  AUTHOR(S): T. Yamasaki   August/20/2003
!  
!  FURTHER MODIFICATION: T. Yamasaki, January/13/2004
!  
!  Contact address :  Phase System Consortium
!                     E-mail: phase_system@nims.go.jp URL https://azuma.nims.go.jp
!  
!
!
!=======================================================================
!
!   Since 2002, this program set had been intensively developed as a part of the following 
!  national projects supported by the Ministry of Education, Culture, Sports, Science and 
!  Technology (MEXT) of Japan; "Frontier Simulation Software for Industrial Science 
!  (FSIS)" from 2002 to 2005, "Revolutionary Simulation Software (RSS21)" from 2006 to 
!  2008. "Research and Development of Innovative Simulation Software (RISS)" from 2008 
!  to 2013. These projects is lead by the Center for Research on Innovative Simulation 
!  Software (CISS), the Institute of Industrial Science (IIS), the University of Tokyo.
!   Since 2013, this program set has been further developed centering on PHASE System 
!  Consortium. 
!   The activity of development of this program set has been supervised by Takahisa Ohno.
!
!  $Id: mdmain.F90 339 2013-09-11 13:53:00Z yamasaki $
!
program PHASE

#ifdef PARA3D
  use m_Parallelization,     only : myrank_k_3D  , map_k_3D, np_e   &
 &                                , ista_k       , iend_k           &
 &                                , ista_kngp    , iend_kngp        &
 &                                , ista_k_3D    , iend_k_3D        &
 &                                , ista_kngp_3D , iend_kngp_3D     &
 &                                , ista_snl, iend_snl &
 &                                , np_fs_3D     , np_e_3D , np_g1k_3D 
  use m_Electronic_Structure,only : zaj_l, zaj_l_3D       &
 &                                , eko_l, eko_l_3D       &
 &                                , fsr_l, fsr_l_3D       &
 &                                , fsi_l, fsi_l_3D       &
 &                                , occup_l, occup_l_3D    &
 &                                , vlhxc_l, vlhxc_l_3D   &
 &                                , nrvf_ordr,neordr      &
 &                                , vlhxcQ
  use z_interface_3D, only        : decomp_eko_l_3D       &
       &                          , decomp_eko_l_3D_2     &
       &                          , decomp_eko_l_r_3D     &
       &                          , decomp_eko_l_r_3D_2   &
       &                          , decomp_fsr_l_3D       &
       &                          , decomp_fsr_l_3D_ik    &
       &                          , decomp_snl_l_3D       &
       &                          , decomp_snl_l_r_3D     &
       &                          , decomp_snl_l_3D_2     &
       &                          , decomp_zaj_l_3D       &
       &                          , decomp_zaj_l_3D_ik    &
       &                          , decomp_fsr_l_r_3D     &
       &                          , decomp_fsr_l_r_3D_ik  &
       &                          , decomp_vnlph_l_r_3D   &
       &                          , decomp_vnlph_l_3D     &
       &                          , decomp_zaj_l_r_3D     &
       &                          , decomp_zaj_l_r_3D_ik  &
       &                          , decomp_vlhxc_l_3D     &
       &                          , decomp_vlhxc_l_r_3D   &
       &                          , decomp_wfsd_l_3D      &
       &                          , decomp_wfsd_l_r_3D    &
       &                          , decomp_zfm3_l_3D      &
       &                          , decomp_zfm3_l_r_3D    &
       &                          , decomp_psc_l_3D       &
       &                          , decomp_psc_l_r_3D     &
       &                          , decomp_qitg_l_3D      &
       &                          , decomp_qitg_l_r_3D    &
       &                          , decomp_gr_l_3D        &
       &                          , decomp_gr_l_r_3D      &
       &                          , decomp_occup_l_3D     &
       &                          , decomp_occup_l_r_3D   &
       &                          , decomp_ngpt_l_3D      &
       &                          , decomp_ngpt_l_r_3D    &
       &                          , decomp_igfp_l_3D      &
       &                          , decomp_igfp_l_r_3D    &
       &                          , replacement_zaj_ball_eigenvalue  &
       &                          , replacement_zaj_ball_sequence
  use m_Parallelization,    only  : mype
  use m_IterationNumbers,   only  : iteration
  use m_Control_Parameters,  only : nspin,kimg,neg,af,istress,sw_fine_STM_simulation, ON &
       &                          , initial_chg, sw_positron
  use m_Crystal_Structure,  only : nopr
  use m_Const_Parameters,    only : GAMMA, from_PSEUDOPOTENTIAL_FILE, DEFECT, BULK
  use m_PlaneWaveBasisSet,   only : kg1,kg,kgp           &
  &                               , gr_l   , gr_l_3D     &
  &                               , ngpt_l , ngpt_l_3D   &
  &                               , igfp_l , igfp_l_3D
  use m_Kpoints,             only : kv3, k_symmetry
  use m_PseudoPotential,     only : nlmta   , nlmtt            &
 &                                , qitg_l  , qitg_l_3D  , nqitg   &
 &                                , psc_l   , psc_l_3D             &
 &                                , rhpcg_l , rhpcg_l_3D , ntpcc   &
 &                                , qitg_diff_l  , qitg_diff_l_3D  &
 &                                , psc_diff_l   , psc_diff_l_3D   &
 &                                , rhpcg_diff_l , rhpcg_diff_l_3D &
 &                                , rhcg_l  , rhcg_l_3D   &
 &                                , rhceg_l , rhceg_l_3D  &
 &                                , rhchg_l , rhchg_l_3D  &
 &                                , rhvg_l  , rhvg_l_3D
  use m_NonLocal_Potential,  only : snl, snl_l_3D
  use m_Charge_Density,      only : chgq_l , chgq_l_3D              &
       &                          , chgqo_l, chgqo_l_3D             &
       &                          , chgsoft, chgsoft_3D
  use m_XC_Potential,        only : vxc_l  , vxc_l_3D               &
       &                          , vxcpc_l, vxcpc_l_3D
  use m_Ionic_System,        only : zfm3_l, zfm3_l_3D, ntyp
#endif
#ifdef PAW3D
  use m_Files,                only : nfout
  use m_Ionic_System,         only : natm
  use m_PseudoPotential,      only : mmesh
  use m_Parallelization,      only : m_Parallel_init_mpi_paw_3D
#endif

  use m_Const_Parameters,    only : DRIVER_NEB,DRIVER_CONSTRAINT,DRIVER_MTD
  use m_Parallelization,     only : mpi_comm_group

! === TIMERTIMERTIMER ==========================================================
#ifdef __TIMER_FFT__
use mod_timer
use m_Parallelization, only  : mype
#endif
! === TIMERTIMERTIMER ==========================================================

  implicit none
  logical  :: ChargeDensity_is_Converged, TotalEnergy_is_Divergent
  logical  :: Already_Converged, Already_Converged2
  logical  :: Positron_bulk, Positron_defect, Structure_is_fixed
  logical  :: Hubbard_model
  logical  :: Forces_are_Converged, Ending_Time, Force_errors_are_tolerable
  logical  :: from_Initialize
!!$  logical  :: ChargeDensity_is_Fixed
   interface
     subroutine Initialization(init_mpi)
       integer, intent(in), optional :: init_mpi
     end subroutine Initialization
   end interface
#ifdef NEC_ITER_REG
  integer  :: count_for_ftrace
#endif

  integer :: ispin, ik, iksnl, ierR

! 121212
  integer :: initmpi
  logical :: confpara
  integer :: driver
! 121212
  
#ifdef NEC_ITER_REG
     count_for_ftrace = 0
     call FTRACE_REGION_BEGIN("INITIAL")
#endif

! 121212
  confpara = Resolve_Config_Parallel()
  initmpi=1
  if(confpara) initmpi=0

!  call Initialization(init_mpi=1)
  call Initialization(initmpi)
! 121212

#ifdef FJ_TIMER
                    call timer_end(21)
#endif
#ifdef FJ_TIMER
                    call mpi_barrier( mpi_comm_group, ierr)
                    call timer_sta(22)
#endif
  call InputData_Analysis

! 121212
  driver   = Resolve_Driver()
  if (driver==DRIVER_NEB)then
#ifndef DISABLE_CONSTRAINTS
      call do_neb()
  else if (driver==DRIVER_CONSTRAINT) then
      call constrained_dynamics()
  else if (driver==DRIVER_MTD) then
      call meta_dynamics()
#endif
  else
! 121212

#ifdef FJ_TIMER
                    call timer_end(22)
#endif
#ifdef FJ_TIMER
                    call mpi_barrier( mpi_comm_group, ierr)
                    call timer_sta(23)
#endif
  call Preparation                   ! Basis set, symmetry check etc.
#ifdef FJ_TIMER
                    call timer_end(23)
#endif
#ifdef FJ_TIMER
                    call mpi_barrier( mpi_comm_group, ierr)
                    call timer_sta(24)
#endif
  call Preparation_for_mpi(1)        ! mpi
#ifdef FJ_TIMER
                    call timer_end(24)
#endif
#ifdef FJ_TIMER
                    call mpi_barrier( mpi_comm_group, ierr)
                    call timer_sta(25)
#endif
  call PseudoPotential_Construction
#ifdef FJ_TIMER
                    call timer_end(25)
#endif
#ifdef PAW3D
  call  m_Parallel_init_mpi_paw_3D(nfout,natm,mmesh)
#endif
#ifdef FJ_TIMER
                    call mpi_barrier( mpi_comm_group, ierr)
                    call timer_sta(26)
#endif

#ifdef ENABLE_ESM
  call Preparation_for_ESM
#endif

  call Ewald_and_Structure_Factor
#ifdef FJ_TIMER
                    call timer_end(26)
#endif
#ifdef FJ_TIMER
                    call mpi_barrier( mpi_comm_group, ierr)
                    call timer_sta(27)
#endif
  call Initial_Electronic_Structure
#ifdef FJ_TIMER
                    call timer_end(27)
#endif

#ifdef __TIMER__
                    call timer_end(17)
#endif

!$#ifdef PARA3D
!!  from_Initialize = .false.
!$$  from_Initialize = .true.
!$$  if(from_Initialize) call Para3d_Entry()
!$#endif
 
     call Initial_MD_Condition
#ifdef NEC_ITER_REG
     call FTRACE_REGION_END("INITIAL")
#endif

     if(.not.Already_Converged()) then
#ifdef NEC_ITER_REG
        call FTRACE_REGION_BEGIN("SOLVE-FIRST")
#endif
        StressLoop: do
           AtomicConfiguration: do
!$$#ifdef PARA3D
!$$!              if(.not. from_Initialize) then
!$$!                 call PARA_2Dto3D()     !!!!!!!!!! 2D $B"*(B 3D !!!!!!!!!!
!$$!              end if
!$$!                 from_Initialize = .false.
!$$#endif
              ChargeDensity:    do

#ifdef NEC_ITER_REG
                 count_for_ftrace = count_for_ftrace + 1
                 if(count_for_ftrace .eq. 2) then
                    call FTRACE_REGION_END("SOLVE-FIRST")
                    call FTRACE_REGION_BEGIN("SOLVE-CORE")
                 end if
#endif
                 call IterationNumber_Setting
#ifdef __TIMER__
                 call mpi_barrier( mpi_comm_group, ierr)
                 call timer_sta(16)
#endif
!!$                 call ReadCheckPointData_if_needed
                 call Renewal_of_WaveFunctions
#ifdef __TIMER__
                 call mpi_barrier( mpi_comm_group, ierr)
                 call timer_sta(11)
#endif
                 call ChargeDensity_Construction(0)
                 call ChargeDensity_Mixing
#ifdef __TIMER__
                 call timer_end(11)
#endif
                 if(Ending_Time()) then
#ifdef __TIMER__
                    call timer_end(16)
#endif
                    exit StressLoop
                 end if
                 if(TotalEnergy_is_Divergent()) then
#ifdef __TIMER__
                    call timer_end(16)
#endif
                    exit StressLoop
                 end if
#ifdef __TIMER__
                 call mpi_barrier( mpi_comm_group, ierr)
                 call timer_sta(11)
#endif
                 call Renewal_of_Potential_3D
#ifdef __TIMER__
                 call timer_end(11)
#endif
                 if(Hubbard_model()) then
                    call Renewal_of_Hubbard_Potential
                 end if
!!$                 call WriteCheckPointData  ! if necessary
                 if(ChargeDensity_is_Converged()) then
#ifdef __TIMER__
                    call timer_end(16)
#endif
                    exit ChargeDensity
                 end if
!!$                 call WriteCheckPointData
#ifdef __TIMER__
                 call timer_end(16)
#endif
              enddo ChargeDensity

!$$#ifdef PARA3D
!$$!              call PARA_3Dto2D()     !!!!!!!!!! 3D $B"*(B 2D !!!!!!!!!!
!$$#endif

              if(Structure_is_fixed()) then
                  exit StressLoop
              end if
!$$!              call PARA_2Dto3D()     !!!!!!!!!! 3D $B"*(B 2D !!!!!!!!!!
              call Forces
!$$!              call PARA_3Dto2D()     !!!!!!!!!! 3D $B"*(B 2D !!!!!!!!!!
              if(Forces_are_Converged()) then
                exit AtomicConfiguration
              endif
              if(Force_errors_are_tolerable()) then
                 call Move_Ions
!$$!              call PARA_2Dto3D()     !!!!!!!!!! 3D $B"*(B 2D !!!!!!!!!!
                 call Ewald_and_Structure_Factor
!$$!              call PARA_3Dto2D()     !!!!!!!!!! 3D $B"*(B 2D !!!!!!!!!!
                 call MDIterationNumber_Setting
              end if
           enddo AtomicConfiguration
           exit StressLoop
        end do StressLoop

!$$!        call PARA_3Dto2D()     !!!!!!!!!! 3D $B"*(B 2D !!!!!!!!!!


#ifdef NEC_ITER_REG
        if(count_for_ftrace .eq. 1) then
           call FTRACE_REGION_END("SOLVE-FIRST")
        else
           call FTRACE_REGION_END("SOLVE-CORE")
        end if
#endif
     end if


#ifdef NEC_ITER_REG
     call FTRACE_REGION_BEGIN("FINAL")
#endif

!$$!        call PARA_2Dto3D()
#ifdef FJ_TIMER
                    call mpi_barrier( mpi_comm_group, ierr)
                    call timer_sta(39)
#endif
#ifdef POST3D
! === Add postprocessing to 3D Version by tkato ================================
     call Postprocessing(.false.)
! ==============================================================================
#endif
     call rttddft_main
     call WriteDownData_onto_Files(.true.)
#ifdef FJ_TIMER
                    call timer_end(39)
#endif

! 121212
  endif
! 121212

  if(driver/=DRIVER_NEB) call Finalization_of_mpi           ! mpi
#ifdef NEC_ITER_REG
  call FTRACE_REGION_END("FINAL")
#endif
! === TIMERTIMERTIMER ==========================================================
#ifdef __TIMER_FFT__
call print_timer(mype)
#endif
! === TIMERTIMERTIMER ==========================================================
contains

  logical function ChargeDensity_is_Fixed()
    use m_Control_Parameters, only : icond
    use m_Const_Parameters,   only : FIXED_CHARGE, FIXED_CHARGE_CONTINUATION
    if(icond == FIXED_CHARGE .or. icond == FIXED_CHARGE_CONTINUATION) then
       ChargeDensity_is_Fixed = .true.
    else
       ChargeDensity_is_Fixed = .false.
    end if
  end function ChargeDensity_is_Fixed

  logical function One_by_one_in_each_rank_k()
    use m_Control_Parameters, only : fixed_charge_k_parallel
    use m_Const_Parameters,   only : ONE_BY_ONE
    if(fixed_charge_k_parallel == ONE_BY_ONE) then
       One_by_one_in_each_rank_k = .true.
    else
       One_by_one_in_each_rank_k = .false.
    end if
  end function One_by_one_in_each_rank_k

#ifdef PARA3D
  subroutine Para3d_Entry()
    if (allocated(vlhxc_l))    deallocate(vlhxc_l)
    if (allocated(chgq_l))     deallocate(chgq_l)
    if (allocated(chgqo_l))    deallocate(chgqo_l)
    if(istress == ON .or. sw_fine_STM_simulation == ON) then
       if (allocated(chgsoft)) deallocate(chgsoft)
    end if
    if (allocated(vxc_l))      deallocate(vxc_l)
    if (allocated(vxcpc_l))    deallocate(vxcpc_l)
    if (allocated(snl))        deallocate(snl)
    if (allocated(eko_l))      deallocate(eko_l)
    if (allocated(zaj_l))      deallocate(zaj_l)
    if (allocated(fsr_l))      deallocate(fsr_l)
    if(.not.(kv3/nspin == 1 .and. k_symmetry(1) == GAMMA .and. kimg == 2)) then
       if (allocated(fsi_l))   deallocate(fsi_l)
    end if
    if (allocated(occup_l))    deallocate(occup_l)
    if (allocated(zfm3_l))     deallocate(zfm3_l)
    if (allocated(gr_l))       deallocate(gr_l)
    if (allocated(psc_l))      deallocate(psc_l)
    if (allocated(qitg_l))     deallocate(qitg_l)
    if (allocated(rhpcg_l))    deallocate(rhpcg_l)
    if(istress==ON) then
       if (allocated(psc_diff_l))      deallocate(psc_diff_l)
       if (allocated(qitg_diff_l))     deallocate(qitg_diff_l)
       if (allocated(rhpcg_diff_l))    deallocate(rhpcg_diff_l)
    endif
    if (allocated(ngpt_l))     deallocate(ngpt_l)
    if (allocated(igfp_l))    deallocate(igfp_l)
  end subroutine Para3d_Entry

  subroutine PARA_2Dto3D()
#ifdef FJ_TIMER
                    call mpi_barrier( mpi_comm_group, ierr)
                    call timer_sta(37)
#endif

    allocate(vlhxc_l_3D(ista_kngp_3D:iend_kngp_3D,kimg,nspin));             vlhxc_l_3D = 0.0d0
    do ispin = 1, nspin, (af+1)
       call decomp_vlhxc_l_3D(vlhxc_l,vlhxc_l_3D,ispin)
    end do
    deallocate(vlhxc_l)

    allocate(chgq_l_3D(ista_kngp_3D:iend_kngp_3D,kimg,nspin));              chgq_l_3D = 0.d0
    do ispin = 1, nspin, (af+1)
       call decomp_vlhxc_l_3D(chgq_l,chgq_l_3D,ispin)
    end do
    deallocate(chgq_l)

    allocate(chgqo_l_3D(ista_kngp_3D:iend_kngp_3D,kimg,nspin));             chgqo_l_3D = 0.d0
    do ispin = 1, nspin, (af+1)
       call decomp_vlhxc_l_3D(chgqo_l,chgqo_l_3D,ispin)
    end do
    deallocate(chgqo_l)

    if(istress == ON .or. sw_fine_STM_simulation == ON) then
       allocate(chgsoft_3D(ista_kngp_3D:iend_kngp_3D,kimg,nspin));          chgsoft_3D = 0.0d0
       do ispin = 1, nspin, (af+1)
          call decomp_vlhxc_l_3D(chgsoft,chgsoft_3D,ispin)
       end do
       deallocate(chgsoft)
    end if

    allocate(vxc_l_3D(ista_kngp_3D:iend_kngp_3D,kimg,nspin));               vxc_l_3D = 0.d0
    do ispin = 1, nspin, (af+1)
       call decomp_vlhxc_l_3D(vxc_l,vxc_l_3D,ispin)
    end do
    deallocate(vxc_l)

    allocate(vxcpc_l_3D(ista_kngp_3D:iend_kngp_3D,kimg));                   vxcpc_l_3D = 0.d0
    deallocate(vxcpc_l)

    allocate(snl_l_3D(maxval(np_g1k_3D),nlmtt,ista_snl:iend_snl));          snl_l_3D = 0.d0
    do ispin = 1, nspin, (af+1)
       do ik = ispin, kv3-nspin+ispin, nspin
          if(map_k_3D(ik) == myrank_k_3D) then           ! MPI
              call decomp_snl_l_3D_2(snl,snl_l_3D,ik)
          end if
       end do
    end do
    deallocate(snl)

    allocate(eko_l_3D(np_e_3D,ista_k_3D:iend_k_3D));                        eko_l_3D = 0.d0
    do ispin = 1, nspin, (af+1)
       do ik = ispin, kv3-nspin+ispin, nspin
          if(map_k_3D(ik) == myrank_k_3D) then           ! MPI
              call decomp_eko_l_3D_2(eko_l,eko_l_3D,ik,nrvf_ordr,'sort')
          end if
       end do
    end do
    deallocate(eko_l)

    allocate(zaj_l_3D(maxval(np_g1k_3D),np_e_3D,ista_k_3D:iend_k_3D,kimg)); zaj_l_3D = 0.0d0
    do ispin = 1, nspin, (af+1)
       do ik = ispin, kv3-nspin+ispin, nspin
          if(map_k_3D(ik) == myrank_k_3D) then           ! MPI
              call decomp_zaj_l_3D_ik(zaj_l,zaj_l_3D,ik,nrvf_ordr,"sort")
          end if
       end do
    end do
    deallocate(zaj_l)

    allocate(fsr_l_3D(np_e_3D,np_fs_3D,ista_k_3D:iend_k_3D));               fsr_l_3D = 0.0d0
    do ispin = 1, nspin, (af+1)
       do ik = ispin, kv3-nspin+ispin, nspin
          if(map_k_3D(ik) == myrank_k_3D) then           ! MPI
              call decomp_fsr_l_3D_ik(fsr_l,fsr_l_3D,ik,nrvf_ordr,'sort')
          end if
       end do
    end do
    deallocate(fsr_l)

    if(.not.(kv3/nspin == 1 .and. k_symmetry(1) == GAMMA .and. kimg == 2)) then
       allocate(fsi_l_3D(np_e_3D,np_fs_3D,ista_k_3D:iend_k_3D));            fsi_l_3D = 0.d0
       do ispin = 1, nspin, (af+1)
          do ik = ispin, kv3-nspin+ispin, nspin
             if(map_k_3D(ik) == myrank_k_3D) then           ! MPI
                call decomp_fsr_l_3D_ik(fsi_l,fsi_l_3D,ik,nrvf_ordr,'sort')
             end if
          end do
       end do
       deallocate(fsi_l)
    end if

    allocate(occup_l_3D(np_e_3D,ista_k_3D:iend_k_3D));                      occup_l_3D = 0.d0
    do ispin = 1, nspin, (af+1)
       do ik = ispin, kv3-nspin+ispin, nspin
          if(map_k_3D(ik) == myrank_k_3D) then           ! MPI
              call decomp_occup_l_3D(occup_l,occup_l_3D,ik,nrvf_ordr,'sort')
          end if
       end do
    end do
    deallocate(occup_l)

    allocate(zfm3_l_3D(ista_kngp_3D:iend_kngp_3D,ntyp,kimg));               zfm3_l_3D = 0.d0
    call decomp_zfm3_l_3D(zfm3_l,zfm3_l_3D)
    deallocate(zfm3_l)

    allocate(gr_l_3D(ista_kngp_3D:iend_kngp_3D));                           gr_l_3D =0.0d0
    call decomp_gr_l_3D(gr_l,gr_l_3D)
    deallocate(gr_l)

    allocate(psc_l_3D(ista_kngp_3D:iend_kngp_3D,ntyp));                     psc_l_3D =0.0d0
    call decomp_psc_l_3D(psc_l,psc_l_3D)
    deallocate(psc_l)

    allocate(qitg_l_3D(ista_kngp_3D:iend_kngp_3D,nqitg));                   qitg_l_3D =0.0d0
    call decomp_qitg_l_3D(qitg_l,qitg_l_3D,nqitg)
    deallocate(qitg_l)

    allocate(rhpcg_l_3D(ista_kngp_3D:iend_kngp_3D,ntpcc));                  rhpcg_l_3D =0.0d0
    call decomp_qitg_l_3D(rhpcg_l,rhpcg_l_3D,ntpcc)
    deallocate(rhpcg_l)

    if(istress==ON) then
       allocate(psc_diff_l_3D(ista_kngp_3D:iend_kngp_3D,ntyp));              psc_diff_l_3D =0.0d0
       call decomp_psc_l_3D(psc_diff_l,psc_diff_l_3D)
       deallocate(psc_diff_l)

       allocate(qitg_diff_l_3D(ista_kngp_3D:iend_kngp_3D,nqitg));            qitg_diff_l_3D =0.0d0
       call decomp_qitg_l_3D(qitg_diff_l,qitg_diff_l_3D,nqitg)
       deallocate(qitg_diff_l)

       allocate(rhpcg_diff_l_3D(ista_kngp_3D:iend_kngp_3D,ntpcc));           rhpcg_diff_l_3D =0.0d0
       call decomp_qitg_l_3D(rhpcg_diff_l,rhpcg_diff_l_3D,ntpcc)
       deallocate(rhpcg_diff_l)
    endif

    allocate(ngpt_l_3D(ista_kngp_3D:iend_kngp_3D,nopr+af));         ngpt_l_3D =0.0d0
    call decomp_ngpt_l_3D(ngpt_l,ngpt_l_3D,nopr+af)
    deallocate(ngpt_l)

    allocate(igfp_l_3D(ista_kngp_3D:iend_kngp_3D));                 igfp_l_3D =0.0d0
    call decomp_igfp_l_3D(igfp_l,igfp_l_3D)
    deallocate(igfp_l)

   if(initial_chg == from_PSEUDOPOTENTIAL_FILE) then
      allocate(rhvg_l_3D(ista_kngp_3D:iend_kngp_3D,ntyp));                     rhvg_l_3D =0.0d0
      call decomp_psc_l_3D(rhvg_l,rhvg_l_3D)
      deallocate(rhvg_l)
   endif

   if(sw_positron == BULK .or. sw_positron == DEFECT) then
      allocate(rhcg_l_3D(ista_kngp_3D:iend_kngp_3D,ntyp));                     rhcg_l_3D =0.0d0
      allocate(rhceg_l_3D(ista_kngp_3D:iend_kngp_3D,ntyp));                    rhceg_l_3D =0.0d0
      allocate(rhchg_l_3D(ista_kngp_3D:iend_kngp_3D,ntyp));                    rhchg_l_3D =0.0d0
      call decomp_psc_l_3D(rhcg_l,rhcg_l_3D)
      call decomp_psc_l_3D(rhceg_l,rhceg_l_3D)
      call decomp_psc_l_3D(rhchg_l,rhchg_l_3D)
      deallocate(rhcg_l)
      deallocate(rhceg_l)
      deallocate(rhchg_l)
   endif

#ifdef FJ_TIMER
                    call timer_end(37)
#endif
  end subroutine PARA_2Dto3D

  subroutine PARA_3Dto2D()
#ifdef FJ_TIMER
                    call mpi_barrier( mpi_comm_group, ierr)
                    call timer_sta(38)
#endif

    allocate(vlhxc_l(ista_kngp:iend_kngp,kimg,nspin)); vlhxc_l = 0.0d0
    do ispin = 1, nspin, af + 1
       call decomp_vlhxc_l_r_3D(vlhxc_l,vlhxc_l_3D,ispin)
    end do
    deallocate(vlhxc_l_3D)

    allocate(chgq_l(ista_kngp:iend_kngp,kimg,nspin)); chgq_l = 0.d0
    do ispin = 1, nspin, af + 1
       call decomp_vlhxc_l_r_3D(chgq_l,chgq_l_3D,ispin)
    end do
    deallocate(chgq_l_3D)

    allocate(chgqo_l(ista_kngp:iend_kngp,kimg,nspin)); chgqo_l = 0.d0
    do ispin = 1, nspin, af + 1
       call decomp_vlhxc_l_r_3D(chgqo_l,chgqo_l_3D,ispin)
    end do
    deallocate(chgqo_l_3D)

    if(istress == ON .or. sw_fine_STM_simulation == ON) then
       allocate(chgsoft(ista_kngp:iend_kngp,kimg,nspin)) ; chgsoft = 0.0d0
       do ispin = 1, nspin, af + 1
          call decomp_vlhxc_l_r_3D(chgsoft,chgsoft_3D,ispin)
       end do
       deallocate(chgsoft_3D)
    end if

    allocate(vxc_l(ista_kngp:iend_kngp,kimg,nspin)); vxc_l = 0.d0
    do ispin = 1, nspin, af + 1
       call decomp_vlhxc_l_r_3D(vxc_l,vxc_l_3D,ispin)
    end do
    deallocate(vxc_l_3D)

    allocate(vxcpc_l(ista_kngp:iend_kngp,kimg)); vxcpc_l = 0.d0
    deallocate(vxcpc_l_3D)

    allocate(snl(kg1,nlmtt,ista_snl:iend_snl)); snl = 0.d0
    call decomp_snl_l_r_3D(snl_l_3D,snl)
    deallocate(snl_l_3D)

    allocate(eko_l(np_e,ista_k:iend_k));                   eko_l = 0.d0
    do ispin = 1, nspin, af + 1
       do ik = ispin, kv3-nspin+ispin, nspin
          if(map_k_3D(ik) == myrank_k_3D) then           ! MPI
             call decomp_eko_l_r_3D_2(eko_l,eko_l_3D,ik,neordr,'sort')
          end if
       end do
    end do
    deallocate(eko_l_3D)

    allocate(zaj_l(kg1,np_e,ista_k:iend_k,kimg));          zaj_l = 0.0d0
    do ispin = 1, nspin, af + 1
       do ik = ispin, kv3-nspin+ispin, nspin
          if(map_k_3D(ik) == myrank_k_3D) then           ! MPI
             call decomp_zaj_l_r_3D_ik(zaj_l,zaj_l_3D,ik,neordr,"sort")
          end if
       end do
    end do
    deallocate(zaj_l_3D)

    allocate(fsr_l(np_e,nlmta,ista_k:iend_k));             fsr_l = 0.0d0
    do ispin = 1, nspin, af + 1
       do ik = ispin, kv3-nspin+ispin, nspin
          if(map_k_3D(ik) == myrank_k_3D) then           ! MPI
             call decomp_fsr_l_r_3D_ik(fsr_l,fsr_l_3D,ik,neordr,"sort",0)
          end if
       end do
    end do
    deallocate(fsr_l_3D)

    if(.not.(kv3/nspin == 1 .and. k_symmetry(1) == GAMMA .and. kimg == 2)) then
       allocate(fsi_l(np_e,nlmta,ista_k:iend_k));          fsi_l = 0.d0
       do ispin = 1, nspin, af + 1
          do ik = ispin, kv3-nspin+ispin, nspin
             if(map_k_3D(ik) == myrank_k_3D) then           ! MPI
                call decomp_fsr_l_r_3D_ik(fsi_l,fsi_l_3D,ik,neordr,"sort",0)
             end if
          end do
       end do
       deallocate(fsi_l_3D)
    end if

    allocate(occup_l(np_e,ista_k:iend_k));                 occup_l = 0.d0
    do ispin = 1, nspin, af + 1
       do ik = ispin, kv3-nspin+ispin, nspin
          if(map_k_3D(ik) == myrank_k_3D) then           ! MPI
             iksnl = (ik-1)/nspin + 1
             call decomp_occup_l_r_3D(occup_l,occup_l_3D,ik,neordr,'sort')
          end if
       end do
    end do
    deallocate(occup_l_3D)

    allocate(zfm3_l(ista_kngp:iend_kngp,ntyp,kimg));       zfm3_l = 0.d0
    call decomp_zfm3_l_r_3D(zfm3_l,zfm3_l_3D)
    deallocate(zfm3_l_3D)

    allocate(gr_l(ista_kngp:iend_kngp));                   gr_l =0.0d0
    call decomp_gr_l_r_3D(gr_l,gr_l_3D)
    deallocate(gr_l_3D)

    allocate(psc_l(ista_kngp:iend_kngp,ntyp));             psc_l =0.0d0
    call decomp_psc_l_r_3D(psc_l,psc_l_3D)
    deallocate(psc_l_3D)

    allocate(qitg_l(ista_kngp:iend_kngp,nqitg));           qitg_l =0.0d0
    call decomp_qitg_l_r_3D(qitg_l,qitg_l_3D,nqitg)
    deallocate(qitg_l_3D)

    allocate(rhpcg_l(ista_kngp:iend_kngp,ntpcc));          rhpcg_l =0.0d0
    call decomp_qitg_l_r_3D(rhpcg_l,rhpcg_l_3D,ntpcc)
    deallocate(rhpcg_l_3D)

    if(istress==ON) then
       allocate(psc_diff_l(ista_kngp:iend_kngp,ntyp));             psc_diff_l =0.0d0
       call decomp_psc_l_r_3D(psc_diff_l,psc_diff_l_3D)
       deallocate(psc_diff_l_3D)

       allocate(qitg_diff_l(ista_kngp:iend_kngp,nqitg));           qitg_diff_l =0.0d0
       call decomp_qitg_l_r_3D(qitg_diff_l,qitg_diff_l_3D,nqitg)
       deallocate(qitg_diff_l_3D)

       allocate(rhpcg_diff_l(ista_kngp:iend_kngp,ntpcc));          rhpcg_diff_l =0.0d0
       call decomp_qitg_l_r_3D(rhpcg_diff_l,rhpcg_diff_l_3D,ntpcc)
       deallocate(rhpcg_diff_l_3D)
    endif

    allocate(ngpt_l(ista_kngp:iend_kngp,nopr+af));         ngpt_l =0.0d0
    call decomp_ngpt_l_r_3D(ngpt_l,ngpt_l_3D,nopr+af)
    deallocate(ngpt_l_3D)

    allocate(igfp_l(ista_kngp:iend_kngp));                 igfp_l =0.0d0
    call decomp_igfp_l_r_3D(igfp_l,igfp_l_3D)
    deallocate(igfp_l_3D)

   if(initial_chg == from_PSEUDOPOTENTIAL_FILE) then
      allocate(rhvg_l(ista_kngp:iend_kngp,ntyp));                     rhvg_l =0.0d0
      call decomp_psc_l_r_3D(rhvg_l,rhvg_l_3D)
      deallocate(rhvg_l_3D)
   endif

   if(sw_positron == BULK .or. sw_positron == DEFECT) then
      allocate(rhcg_l(ista_kngp:iend_kngp,ntyp));                     rhcg_l =0.0d0
      allocate(rhceg_l(ista_kngp:iend_kngp,ntyp));                    rhceg_l =0.0d0
      allocate(rhchg_l(ista_kngp:iend_kngp,ntyp));                    rhchg_l =0.0d0
      call decomp_psc_l_r_3D(rhcg_l,rhcg_l_3D)
      call decomp_psc_l_r_3D(rhceg_l,rhceg_l_3D)
      call decomp_psc_l_r_3D(rhchg_l,rhchg_l_3D)
      deallocate(rhcg_l_3D)
      deallocate(rhceg_l_3D)
      deallocate(rhchg_l_3D)
   endif

#ifdef FJ_TIMER
                    call timer_end(38)
#endif
  end subroutine PARA_3Dto2D

  subroutine output_zaj()
   integer::ispin, ik

    if (.not. allocated(zaj_l)) then
       allocate(zaj_l(kg1,np_e,ista_k:iend_k,kimg))
    end if
    zaj_l = 0.0d0

    do ispin = 1, nspin, af + 1
       do ik = ispin, kv3-nspin+ispin, nspin
          if(map_k_3D(ik) == myrank_k_3D) then           ! MPI
             call decomp_zaj_l_r_3D_ik(zaj_l,zaj_l_3D,ik,neordr,"sort")
          end if
       end do
    end do

    call dump_zaj(zaj_l)

    deallocate(zaj_l)

  endsubroutine output_zaj
#endif

! 121212
  integer function Resolve_Driver()
    use m_Control_Parameters, only : driver
    Resolve_Driver = driver
  end function Resolve_Driver

  logical function Resolve_Config_Parallel()
    use m_Parallelization, only : m_Parallel_resolve_conf_para
    implicit none
    Resolve_Config_Parallel = m_Parallel_resolve_conf_para()
  end function Resolve_Config_Parallel
! 121212

end program PHASE
