コンテンツに飛ぶ | ナビゲーションに飛ぶ

パーソナルツール

Navigation

現在位置: ホーム / Downloads / PHASE System Download / phase0_2020.01.01.patch

phase0_2020.01.01.patch

differences between files icon phase0_2020.01.01.patch — differences between files, 129 KB (132599 bytes)

ファイルコンテンツ

diff --git a/src_phase/ChargeDensity_Mixing.F90 b/src_phase/ChargeDensity_Mixing.F90
index 4367fec..83ec997 100644
--- a/src_phase/ChargeDensity_Mixing.F90
+++ b/src_phase/ChargeDensity_Mixing.F90
@@ -108,7 +108,7 @@ subroutine ChargeDensity_Mixing
   use m_Control_Parameters,  only : sw_calc_ekin_density, ekin_density_type, &
        &                            sw_mix_charge_with_ekindens
   use m_CD_mixing,  only : m_CD_mix_broyden2_intg, m_CD_mix_pulay_intg, &
-       &                   m_CD_simple_mixing_intg
+       &                   m_CD_simple_mixing_intg, m_CD_force_dealloc
   use m_KE_mixing,  only : m_KE_simple_mixing, m_KE_mix_broyden2, m_KE_mix_pulay, &
        &                   m_KE_prepare_precon, &
        &                   m_KE_simple_mixing_00, m_KE_kerker_mixing, &
@@ -121,7 +121,7 @@ subroutine ChargeDensity_Mixing
   real(kind=DP) :: edeltb_per_atom
   integer,save  :: waymix_at_CDmix_previous = 0
   integer       :: waymix_at_CDmix
-  logical       :: ini = .false.
+  logical       :: ini = .false., mixer_changed
   real(kind=DP) :: rmxt_tot, rmxt_hard, rmxt_occmat
 #ifdef __TIMER_SUB__
     call timer_sta(1101)
@@ -134,7 +134,8 @@ subroutine ChargeDensity_Mixing
 
   edeltb_per_atom = m_TE_what_is_edeltb_now()/natm 
   waymix_at_CDmix = m_CtrlP_waymix_now(iteration_electronic, iteration_ionic &
-       &                             , edeltb_per_atom)
+       &                             , edeltb_per_atom,mixer_changed)
+  if(mixer_changed) call m_CD_force_dealloc()
   if(tag_solver_of_WF_is_found .and. tag_charge_mixing_is_found) then
      if(waymix_at_CDmix_previous /= waymix_at_CDmix) call m_Iter_cmix_reset()
   end if
diff --git a/src_phase/Initial_Electronic_Structure.F90 b/src_phase/Initial_Electronic_Structure.F90
index dfb0211..6a1dd25 100644
--- a/src_phase/Initial_Electronic_Structure.F90
+++ b/src_phase/Initial_Electronic_Structure.F90
@@ -85,8 +85,12 @@ subroutine Initial_Electronic_Structure
        &                         , m_ES_alloc_eko_ek, m_ES_alloc_eko1 &
        &                         , m_ES_cpeko &
        &                         , m_ES_alloc_Dhub,vlhxc_l &
+#ifdef FFTW3
        &                         , m_ES_cp_zaj_prev_to_zaj &
        &                         , m_ES_gen_zaj_from_prev_zaj
+#else
+       &                         , m_ES_cp_zaj_prev_to_zaj
+#endif
   use m_ES_ortho,           only : m_ES_modified_gram_schmidt
   use m_ES_wf_extrpl,       only : m_ES_wf_extrpl_alloc
   use m_ES_nonlocal,        only : m_ES_betar_dot_WFs
@@ -106,8 +110,12 @@ subroutine Initial_Electronic_Structure
        &                         , m_CD_rd_chgq  &
        &                         , m_CD_initial_CD_by_file_rspace &
        &                         , m_CD_adjust_spindensity  &
+#ifdef FFTW3
        &                         , m_CD_cp_chgq_prev_to_chgq &
        &                         , m_CD_gen_chgq_from_prev_chgq
+#else
+       &                         , m_CD_cp_chgq_prev_to_chgq
+#endif
   use m_Crystal_Structure, only  : sw_fix_total_spin, sw_magnetic_constraint,  univol
 ! <--
   use m_XC_Potential,      only  : vxc_l, exc
@@ -288,7 +296,10 @@ subroutine Initial_Electronic_Structure
      call m_PP_gfqwei(nfout)  ! -> modnrm, fqwei, nlmta1, nlmta2
      call m_CtrlP_set_init_status(.false.)
      call UpdateUeff()
-     if(sw_hybrid_functional == ON) call EXX()
+     if(sw_hybrid_functional == ON) then
+       call m_ES_EXX_init()
+       call EXX()
+     endif
      return
   endif
 
@@ -618,7 +629,11 @@ subroutine Initial_Electronic_Structure
         call m_ES_cp_zaj_prev_to_zaj()
      else if((iteration_unit_cell > 1 .or. iteration_stress_correction>1) .and. &
      &  kg1_prev .ne. kg1 .and. sw_interpolate_wfs == ON) then
+#ifdef FFTW3
         call m_ES_gen_zaj_from_prev_zaj(nfout)
+#else
+        call m_ESIW_by_randomnumbers(nfout,kv3,1,neg)      ! (rndzaj) -> zaj_l
+#endif
      else if(intzaj == by_random_numbers) then
         call m_ESIW_by_randomnumbers(nfout,kv3,1,neg)      ! (rndzaj) -> zaj_l
      else if(intzaj == by_pseudo_atomic_orbitals) then
@@ -706,10 +721,12 @@ subroutine Initial_Electronic_Structure
        call m_ES_alloc_zaj_l_prev(kg1_prev)
      endif
      call m_ESIO_rd_WFs(nfout,nfzaj,F_ZAJ_in_partitioned,read_pwbs)
+#ifdef FFTW3
      if(read_pwbs)then
        call m_ES_gen_zaj_from_prev_zaj(nfout)
        call modified_gramschmidt()
      endif
+#endif
      if(neg_previous < neg) then
         call m_ESIW_by_randomnumbers(nfout,kv3,neg_previous+1,neg)
      end if
@@ -979,10 +996,12 @@ contains
             call m_ES_alloc_zaj_l_prev(kg1_prev)
           endif
           call m_ESIO_rd_WFs(nfout,nfzaj,F_ZAJ_in_partitioned,read_pwbs)
+#ifdef FFTW3
           if(read_pwbs)then
             call m_ES_gen_zaj_from_prev_zaj(nfout)
             call modified_gramschmidt()
           endif
+#endif
        endif
 
     else if ( condition == -4 ) then       ! coordinate-continuation
diff --git a/src_phase/Initialization.F90 b/src_phase/Initialization.F90
index 91617c1..2d40114 100644
--- a/src_phase/Initialization.F90
+++ b/src_phase/Initialization.F90
@@ -174,7 +174,7 @@ contains
   subroutine aavers
     include 'version.h' ! svn_revision
     character(len=72) :: vers, system, codename
-    write(vers,'("phase/0 2020.01 Revision:",i5," -- ORG_Parallel --")') svn_revision
+    write(vers,'("phase/0 2020.01.01 Revision:",i5," -- ORG_Parallel --")') svn_revision
     codename = 'phaseUnif'
     system = ''
 
diff --git a/src_phase/Makefile.M1Mac b/src_phase/Makefile.M1Mac
new file mode 100644
index 0000000..be731d9
--- /dev/null
+++ b/src_phase/Makefile.M1Mac
@@ -0,0 +1,595 @@
+.SUFFIXES:
+.SUFFIXES: .o .F .f .F90 .f90 .c .mod
+
+# Platform    : GNU Linux (EM64T/AMD64)
+# Prog. model : MPI parallel
+# Compiler    : GNU compiler collection (gfortran)
+# BLAS/LAPACK : Netlib BLAS/LAPACK
+# FFT         : FFTW3 library
+###########################################################################
+###<< PLEASE CHANGE THE VARIABLES BELOW ACCORDING TO YOUR ENVIRONMENT >>###
+###########################################################################
+F90 = mpif90
+CC  = gcc
+CPP = 
+AR  = ar -vq
+LINK = mpif90
+F90FLAGS = -O2 -ffree-form -fallow-argument-mismatch
+F77FLAGS = -O2 -ffixed-form -fallow-argument-mismatch
+CFLAGS = -DINTEL -DDARWIN
+
+ESM = yes
+ifdef ESM
+CPPESM=-DENABLE_ESM_PACK
+LESM=-lesm
+else
+CPPESM=
+endif
+
+CPPFLAGS = -DLinux -D_GNU_FORTRAN_ -DFFTW3 -D_POT_SMOOTHING_ -DTRANSPOSE -DGGA_ATOMIC_WITH_NEW_GNCPP -DREMOVE_PC_FROM_FORCE -D_USE_LAPACK_ -D_POSITRON_ -D_FAST_WAY_ -D_USE_DATE_AND_TIME_ -DUSE_NONBLK_COMM -DRMM_NONLOCAL_NEW ${CPPESM}
+LFLAGS = 
+F90FLAGS_FIXED = -ffixed-form
+F90FLAGS_FREE = -ffree-form
+MKLHOME=./
+INCLUDE=-I/opt/homebrew/opt/fftw/include
+LIBS = -L./ ${LESM}  -L./ -llapack -lblas -L/opt/homebrew/opt/fftw/lib -lfftw3 
+###########################################################################
+###<< PLEASE CHANGE THE VARIABLES ABOVE ACCORDING TO YOUR ENVIRONMENT >>###
+###########################################################################
+
+ifdef ESM
+ESM_LIB = libesm.a
+else
+ESM_LIB =
+endif
+LAPACK = liblapack.a libblas.a
+FFTOBJECT = 
+OBJ_INPUTPARSE = input_parse.o
+
+PHASE_LOWER_MODULES = m_Const_Parameters.o   m_ErrorMessages.o m_Parallelization.o \
+m_IterationNumbers.o
+
+PHASE_UPPER_MODULES = \
+m_Control_Parameters.o   m_Files.o                m_Timing.o \
+m_Crystal_Structure.o    m_FFT.o                  m_Ionic_System.o \
+m_CS_SpaceGroup.o        m_CS_Magnetic.o \
+m_Orbital_QuantumNum.o  \
+m_Kpoints.o              m_PseudoPotential.o      m_PlaneWaveBasisSet.o \
+m_Realspace.o \
+m_SpinOrbit_Potential.o  m_SpinOrbit_FromFile.o  \
+m_ES_NonCollinear.o \
+m_NonLocal_Potential.o   m_Electronic_Structure.o \
+m_ES_nonlocal.o	 	 m_ES_ortho.o  m_ES_wf_extrpl.o m_ES_initialWF.o 	\
+m_ES_occup.o \
+m_ES_RSB.o \
+m_FiniteElectricField.o \
+m_ES_ExactExchange.o \
+m_ES_WF_by_SDorCG.o      m_ES_WF_by_Davidson.o \
+m_ES_WF_by_ModifiedDavidson.o m_ES_WF_by_RMM.o \
+m_Charge_Density.o  m_CD_Mag_Moment.o m_Orbital_Population.o      m_Dipole.o  m_CD_mixing.o \
+m_PS_opencore.o \
+m_epc_potential.o        \
+m_Positron_Wave_Functions.o \
+m_ES_WF_mixing.o \
+m_Screening_FFT.o        m_Screening.o      m_External_Potential.o \
+m_ES_LHXC.o              m_ES_Intgr_VlhxcQlm.o    m_ES_IO.o \
+m_Representation.o m_OP_rotation.o \
+m_ES_WF_by_MatDiagon.o   m_ES_dos.o               m_Hubbard.o \
+m_KineticEnergy_Density.o  \
+m_KE_mixing.o \
+m_vdWDF.o \
+m_Ldos.o                 m_XC_Potential.o         \
+string.o m_db.o \
+m_PAW_Tecplot.o m_PAW_ChargeDensity.o m_PAW_Hartree.o  \
+m_PAW_XC_Potential.o \
+m_SpinOrbit_RadInt.o \
+m_OP_Moment.o \
+m_ES_Mag_Constraint.o  \
+m_Total_Energy.o \
+m_Force.o                m_Stress.o               m_ES_WF_by_submat.o \
+m_UnitCell.o \
+m_constraints.o \
+m_ELF.o \
+m_BerryPhase.o           m_BP_Properties.o \
+m_Raman.o            m_Phonon.o \
+m_Wannier.o m_Wannier90.o m_Replica.o Renewal_of_ChgCtrlParam.o \
+m_LinearResponse_Control.o  \
+m_ValenceBand_Spectrum.o   m_CoreLevel_Spectrum.o   \
+m_Excitation.o \
+m_CLS_dipquad.o m_ES_occup_EPS.o         m_Epsilon_ek.o \
+m_LinearResponse_Qpt.o  \
+m_rttddft.o \
+m_Potential_Mixing.o       m_ThomasFermiW_Potential.o  \
+m_SpinOrbit_ForceTheorem.o m_Crystal_Field.o \
+m_Fcp.o m_Band_Unfolding.o
+
+PHASE_F_SUBROUTINES = Preparation.o \
+InputData_Analysis.o           PseudoPotential_Construction.o \
+
+PHASE_F_OTHERSUBS  = mdmain0.o    constraint_main.o meta_dynamics.o NEB.o Preparation_for_mpi.o \
+Preparation_for_ESM.o \
+scf_routines.o \
+UnitaryTransform_WF.o \
+bottom_Subroutines.o              \
+spline.o \
+b_Crystal_Structure.o             b_Electronic_Structure.o \
+b_Words.o                         b_PseudoPotential.o \
+b_PseudoPotential_EXX.o \
+b_Fermi.o                         b_Kpoints.o \
+b_PlaneWaveBasisSet.o             b_Ionic_System.o \
+b_XC_Potential.o                  b_PAW_XC_Potential.o \
+b_XC_OmegaPBE.o  \
+b_XC_metagga.o \
+b_Ldos_f77.o\
+input_interface.o                 Initialization.o \
+WriteDownData_onto_Files.o        Ewald_and_Structure_Factor.o \
+Initial_Electronic_Structure.o \
+Renewal_of_WaveFunctions.o \
+Renewal_of_pWaveFunctions.o \
+Uramping.o \
+IterationNumbers_Setting.o        ChargeDensity_Construction.o \
+ChargeDensity_Mixing.o            Renewal_of_Potential.o \
+Renewal_of_Hubbard_Potential.o    Renewal_of_OccMat.o \
+Renewal_of_pPotential.o           Renewal_of_Hubbard_Parameters.o \
+Convergence_Check.o               Forces.o \
+Move_Ions.o                       Initial_MD_Condition.o \
+Stress.o                          Postprocessing.o \
+Finalization_of_mpi.o             miscellaneous.o\
+b_BerryPhase.o \
+heap_sort.o                       real_spherical_harmonics.o \
+Real_space_integ.o                crotylm.o \
+screening_correction.o \
+Initialization_Epsilon.o               Shift_Kpoint.o \
+Reset_Kpoint.o                         Preparation_for_Calc_Epsilon.o \
+Transition_moment_Epsilon.o            Calc_Epsilon.o \
+Nonlinear_Optics_Epsilon.o             WriteDownData_onto_Files_Epsilon.o \
+PseudoPotential_ek_Epsilon.o           Dealloc_Radr_and_Wos_Epsilon.o \
+mpi_dummy.o                            WriteDownData_onto_Files_ek.o \
+GaussLeg.o lib_int_deri_add.o  \
+rttddft_main.o \
+Potential_Construction.o        Potential_Mixing.o             ThomasFermiWeiz.o \
+Epsilon_postscf.o vdW.o Epsilon_Paramset.o memsize.o scdft_routines.o \
+lbfgs.o
+
+# for vc_nl
+NLOBJ = vc_nl.o
+
+ifndef SX_DGEMM
+PHASE_OBJECTSF77 = \
+b_PseudoPotential_f77.o \
+b_Force_f77.o \
+rmmsubs.o \
+gncpp_xc_gga_rad.o  \
+decfft_ent.o \
+spg+tetra.o
+else
+PHASE_OBJECTSF77 = \
+b_PseudoPotential_f77.o \
+b_Force_f77.o \
+rmmsubs.o \
+decfft_ent.o \
+gncpp_xc_gga_rad.o  \
+spg+tetra.o \
+dgemm__.o 
+endif
+
+EKCAL_LOWER_MODULES = m_Const_Parameters.o   m_ErrorMessages.o m_Parallelization.o \
+m_IterationNumbers.o
+
+EKCAL_UPPER_MODULES = \
+m_Control_Parameters.o   m_Files.o                m_Timing.o \
+m_Crystal_Structure.o    m_FFT.o                  m_Ionic_System.o \
+m_CS_SpaceGroup.o        m_CS_Magnetic.o \
+m_Orbital_QuantumNum.o  \
+m_Kpoints.o              m_PseudoPotential.o      m_PlaneWaveBasisSet.o \
+m_Realspace.o \
+m_SpinOrbit_Potential.o  m_SpinOrbit_FromFile.o  \
+m_ES_NonCollinear.o \
+m_NonLocal_Potential.o   m_Electronic_Structure.o \
+m_ES_nonlocal.o	 	 m_ES_ortho.o  m_ES_wf_extrpl.o m_ES_initialWF.o 	\
+m_FiniteElectricField.o \
+m_ES_ExactExchange.o \
+m_ES_occup.o             m_ES_WF_by_SDorCG.o      m_ES_WF_by_Davidson.o \
+m_ES_RSB.o \
+m_ES_WF_by_ModifiedDavidson.o m_ES_WF_by_RMM.o \
+m_epc_potential.o \
+m_Positron_Wave_Functions.o \
+m_Charge_Density.o m_CD_Mag_Moment.o m_Orbital_Population.o      m_Dipole.o  m_CD_mixing.o \
+m_PS_opencore.o \
+m_ES_WF_mixing.o \
+m_Screening_FFT.o        m_Screening.o      m_External_Potential.o \
+m_ES_LHXC.o              m_ES_Intgr_VlhxcQlm.o    m_ES_IO.o \
+m_Representation.o m_OP_rotation.o \
+m_ES_WF_by_MatDiagon.o   m_ES_dos.o               m_Hubbard.o \
+m_KineticEnergy_Density.o  \
+m_KE_mixing.o \
+m_vdWDF.o \
+m_Ldos.o                 m_XC_Potential.o         \
+string.o m_db.o \
+m_PAW_Tecplot.o m_PAW_ChargeDensity.o m_PAW_Hartree.o  \
+m_PAW_XC_Potential.o \
+m_SpinOrbit_RadInt.o \
+m_ES_Mag_Constraint.o  \
+m_constraints.o \
+m_Total_Energy.o \
+m_OP_Moment.o \
+m_Force.o                m_Stress.o m_ES_WF_by_submat.o\
+m_UnitCell.o \
+m_ELF.o                  m_Wannier.o \
+m_Wannier90.o \
+m_BerryPhase.o           m_BP_Properties.o \
+m_Raman.o               m_Phonon.o \
+m_LinearResponse_Control.o  \
+m_ValenceBand_Spectrum.o   m_CoreLevel_Spectrum.o \
+m_Excitation.o \
+m_LinearResponse_Qpt.o \
+m_Potential_Mixing.o       m_ThomasFermiW_Potential.o \
+m_SpinOrbit_ForceTheorem.o m_Crystal_Field.o \
+m_Fcp.o m_CLS_dipquad.o m_Band_Unfolding.o
+
+
+EKCAL_F_SUBROUTINES = Preparation.o \
+InputData_Analysis.o           PseudoPotential_Construction.o
+
+EKCAL_F_OTHERSUBS  = ekmain.o     constraint_main.o          Preparation_for_mpi.o \
+Preparation_for_ESM.o \
+scf_routines.o \
+UnitaryTransform_WF.o \
+bottom_Subroutines.o                   \
+spline.o \
+b_Crystal_Structure.o                  b_Electronic_Structure.o \
+b_Words.o                              b_PseudoPotential.o \
+b_PseudoPotential_EXX.o \
+b_Fermi.o                              b_Kpoints.o \
+b_PlaneWaveBasisSet.o                  b_Ionic_System.o \
+b_XC_Potential.o                       b_PAW_XC_Potential.o \
+b_XC_OmegaPBE.o  \
+b_XC_metagga.o \
+b_Ldos_f77.o \
+input_interface.o                      Initialization.o \
+Ewald_and_Structure_Factor.o \
+Initial_Electronic_Structure.o         Renewal_of_WaveFunctions.o \
+IterationNumbers_Setting.o \
+ChargeDensity_Construction.o                 Renewal_of_Potential.o \
+ChargeDensity_Mixing.o \
+Renewal_of_Hubbard_Potential.o         Renewal_of_OccMat.o \
+Convergence_Check.o                    Forces.o \
+WriteDownData_onto_Files.o \
+Initial_MD_Condition.o                 WriteDownData_onto_Files_ek.o\
+Stress.o                               Postprocessing.o \
+Finalization_of_mpi.o \
+miscellaneous.o \
+b_BerryPhase.o \
+heap_sort.o                            real_spherical_harmonics.o \
+Real_space_integ.o                     crotylm.o \
+screening_correction.o \
+mpi_dummy.o \
+GaussLeg.o lib_int_deri_add.o \
+Uramping.o \
+ThomasFermiWeiz.o       Renewal_of_ChgCtrlParam.o vdW.o scdft_routines.o memsize.o lbfgs.o
+
+ifndef SX_DGEMM
+EKCAL_OBJECTSF77 = \
+b_PseudoPotential_f77.o \
+b_Force_f77.o \
+rmmsubs.o                              spg+tetra.o \
+gncpp_xc_gga_rad.o  \
+decfft_ent.o
+else
+EKCAL_OBJECTSF77 = \
+b_PseudoPotential_f77.o \
+b_Force_f77.o \
+rmmsubs.o                              spg+tetra.o \
+decfft_ent.o \
+gncpp_xc_gga_rad.o  \
+dgemm__.o 
+endif
+
+
+EPS_LOWER_MODULES = m_Const_Parameters.o   m_ErrorMessages.o m_Parallelization.o \
+m_IterationNumbers.o
+
+EPS_UPPER_MODULES = \
+m_Control_Parameters.o   m_Files.o                m_Timing.o \
+m_Crystal_Structure.o    m_FFT.o                  m_Ionic_System.o \
+m_CS_SpaceGroup.o        m_CS_Magnetic.o \
+m_Orbital_QuantumNum.o  \
+m_Kpoints.o              m_PseudoPotential.o      m_PlaneWaveBasisSet.o \
+m_Realspace.o \
+m_SpinOrbit_Potential.o  m_SpinOrbit_FromFile.o  \
+m_ES_NonCollinear.o \
+m_NonLocal_Potential.o   m_Electronic_Structure.o \
+m_ES_nonlocal.o	 	 m_ES_ortho.o  m_ES_wf_extrpl.o m_ES_initialWF.o 	\
+m_FiniteElectricField.o \
+m_Positron_Wave_Functions.o \
+m_ES_ExactExchange.o \
+m_ES_occup.o             m_ES_WF_by_SDorCG.o      m_ES_WF_by_Davidson.o \
+m_ES_RSB.o \
+m_ES_WF_by_ModifiedDavidson.o m_ES_WF_by_RMM.o \
+m_ES_LHXC.o              m_ES_Intgr_VlhxcQlm.o    m_ES_IO.o \
+m_OP_rotation.o \
+m_ES_dos.o               m_Hubbard.o \
+m_epc_potential.o \
+m_vdWDF.o \
+m_Charge_Density.o m_CD_Mag_Moment.o  m_Orbital_Population.o     m_Dipole.o m_CD_mixing.o \
+m_XC_Potential.o \
+m_PS_opencore.o \
+m_ES_WF_mixing.o \
+string.o m_db.o \
+m_PAW_Tecplot.o m_PAW_ChargeDensity.o m_PAW_Hartree.o  \
+m_PAW_XC_Potential.o \
+m_SpinOrbit_RadInt.o \
+m_ES_Mag_Constraint.o  \
+m_Total_Energy.o \
+m_OP_Moment.o \
+m_Force.o          m_Stress.o      m_ES_WF_by_submat.o      m_ES_WF_by_MatDiagon.o \
+m_UnitCell.o \
+m_KineticEnergy_Density.o  \
+m_KE_mixing.o \
+m_Ldos.o                 m_ELF.o \
+m_constraints.o \
+m_BerryPhase.o           m_BP_Properties.o \
+m_Representation.o       m_Raman.o       m_Phonon.o               \
+m_Screening_FFT.o        m_Screening.o      m_External_Potential.o \
+m_Wannier.o \
+m_Wannier90.o \
+m_ValenceBand_Spectrum.o   m_CoreLevel_Spectrum.o   \
+m_Excitation.o \
+m_CLS_dipquad.o m_ES_occup_EPS.o         m_Epsilon_ek.o \
+m_LinearResponse_Control.o  \
+m_LinearResponse_Qpt.o \
+m_Potential_Mixing.o       m_ThomasFermiW_Potential.o \
+m_SpinOrbit_ForceTheorem.o m_Crystal_Field.o \
+m_Fcp.o m_Band_Unfolding.o
+
+EPS_F_SUBROUTINES = Preparation.o \
+InputData_Analysis.o           PseudoPotential_Construction.o
+
+EPS_F_OTHERSUBS  = epsmain.o      constraint_main.o     Preparation_for_mpi.o \
+Preparation_for_ESM.o \
+scf_routines.o \
+UnitaryTransform_WF.o \
+bottom_Subroutines.o                   \
+spline.o \
+b_Crystal_Structure.o                  b_Electronic_Structure.o \
+b_Words.o                              b_PseudoPotential.o \
+b_PseudoPotential_EXX.o \
+b_Fermi.o                              b_Kpoints.o \
+b_PlaneWaveBasisSet.o                  b_Ionic_System.o \
+b_XC_Potential.o                       b_PAW_XC_Potential.o \
+b_XC_OmegaPBE.o  \
+b_XC_metagga.o \
+b_Ldos_f77.o \
+input_interface.o                      Initialization.o \
+Ewald_and_Structure_Factor.o \
+Initial_Electronic_Structure.o         Renewal_of_WaveFunctions.o \
+IterationNumbers_Setting.o \
+ChargeDensity_Construction.o                 Renewal_of_Potential.o \
+ChargeDensity_Mixing.o \
+Renewal_of_Hubbard_Potential.o         Renewal_of_OccMat.o \
+Convergence_Check.o                    Forces.o \
+WriteDownData_onto_Files.o \
+Initial_MD_Condition.o                 WriteDownData_onto_Files_ek.o\
+Stress.o                               Postprocessing.o \
+Finalization_of_mpi.o \
+miscellaneous.o \
+b_BerryPhase.o \
+heap_sort.o                            real_spherical_harmonics.o \
+Real_space_integ.o                     crotylm.o \
+screening_correction.o \
+Initialization_Epsilon.o               Shift_Kpoint.o \
+Reset_Kpoint.o                         Preparation_for_Calc_Epsilon.o \
+Transition_moment_Epsilon.o            Calc_Epsilon.o \
+Nonlinear_Optics_Epsilon.o             WriteDownData_onto_Files_Epsilon.o \
+PseudoPotential_ek_Epsilon.o           Dealloc_Radr_and_Wos_Epsilon.o \
+ThomasFermiWeiz.o         Renewal_of_ChgCtrlParam.o  \
+mpi_dummy.o \
+Uramping.o \
+GaussLeg.o lib_int_deri_add.o vdW.o memsize.o scdft_routines.o lbfgs.o
+
+ifndef SX_DGEMM
+EPS_OBJECTSF77 = \
+b_PseudoPotential_f77.o \
+b_Force_f77.o \
+rmmsubs.o                              spg+tetra.o \
+decfft_ent.o \
+gncpp_xc_gga_rad.o  
+else
+EPS_OBJECTSF77 = \
+b_PseudoPotential_f77.o \
+b_Force_f77.o \
+rmmsubs.o                              spg+tetra.o \
+decfft_ent.o \
+gncpp_xc_gga_rad.o  \
+dgemm__.o
+endif
+
+
+TDLR_LOWER_MODULES = m_Const_Parameters.o   m_ErrorMessages.o m_Parallelization.o \
+m_IterationNumbers.o
+
+TDLR_UPPER_MODULES = \
+m_Control_Parameters.o   m_Files.o                m_Timing.o \
+m_Crystal_Structure.o    m_FFT.o                  m_Ionic_System.o \
+m_CS_SpaceGroup.o        m_CS_Magnetic.o \
+m_Orbital_QuantumNum.o  \
+m_Kpoints.o              m_PseudoPotential.o      m_PlaneWaveBasisSet.o \
+m_Realspace.o \
+m_SpinOrbit_Potential.o  m_SpinOrbit_FromFile.o  \
+m_ES_NonCollinear.o \
+m_NonLocal_Potential.o   m_Electronic_Structure.o \
+m_ES_nonlocal.o	 	 m_ES_ortho.o  m_ES_wf_extrpl.o m_ES_initialWF.o 	\
+m_FiniteElectricField.o \
+m_Positron_Wave_Functions.o \
+m_ES_ExactExchange.o \
+m_ES_occup.o             m_ES_WF_by_SDorCG.o      m_ES_WF_by_Davidson.o \
+m_ES_RSB.o \
+m_ES_WF_by_ModifiedDavidson.o m_ES_WF_by_RMM.o \
+m_ES_LHXC.o              m_ES_Intgr_VlhxcQlm.o    m_ES_IO.o \
+m_Representation.o m_OP_rotation.o \
+m_ES_dos.o               m_Hubbard.o \
+m_epc_potential.o \
+m_vdWDF.o \
+m_Charge_Density.o m_CD_Mag_Moment.o m_Orbital_Population.o m_Dipole.o m_CD_mixing.o \
+m_XC_Potential.o \
+m_PS_opencore.o \
+m_ES_WF_mixing.o \
+string.o m_db.o \
+m_PAW_Tecplot.o m_PAW_ChargeDensity.o m_PAW_Hartree.o  \
+m_PAW_XC_Potential.o \
+m_SpinOrbit_RadInt.o \
+m_ES_Mag_Constraint.o  \
+m_Total_Energy.o \
+m_OP_Moment.o \
+m_Force.o                m_Stress.o m_ES_WF_by_submat.o      m_ES_WF_by_MatDiagon.o \
+m_UnitCell.o \
+m_KineticEnergy_Density.o  \
+m_KE_mixing.o \
+m_Ldos.o                 m_ELF.o \
+m_constraints.o \
+m_BerryPhase.o           m_BP_Properties.o \
+m_Raman.o          m_Phonon.o               \
+m_Wannier.o \
+m_Wannier90.o \
+m_Screening_FFT.o        m_Screening.o      m_External_Potential.o \
+m_Excitation.o \
+m_CLS_dipquad.o m_ES_occup_EPS.o        \
+m_LinearResponse_Control.o  \
+m_ValenceBand_Spectrum.o  m_CoreLevel_Spectrum.o   \
+m_Band_Unfolding.o \
+m_LinearResponse_Qpt.o \
+m_LinearResponse_Tools.o     m_LinearResponse_Density.o  \
+m_LinearResponse_NonInt.o    m_LinearResponse_ALDA.o \
+m_LinearResponse_Kernel.o    m_LinearResponse_BS.o \
+m_LinearResponse_Spectrum.o \
+m_Potential_Mixing.o       m_ThomasFermiW_Potential.o \
+m_SpinOrbit_ForceTheorem.o m_Crystal_Field.o \
+
+TDLR_F_SUBROUTINES = Preparation.o \
+InputData_Analysis.o           PseudoPotential_Construction.o \
+
+TDLR_F_OTHERSUBS  = tdlrmain.o     constraint_main.o    Preparation_for_mpi.o \
+Preparation_for_ESM.o \
+scf_routines.o \
+UnitaryTransform_WF.o \
+bottom_Subroutines.o                   \
+spline.o \
+b_Crystal_Structure.o                  b_Electronic_Structure.o \
+b_Words.o                              b_PseudoPotential.o \
+b_PseudoPotential_EXX.o \
+b_Fermi.o                              b_Kpoints.o \
+b_PlaneWaveBasisSet.o                  b_Ionic_System.o \
+b_XC_Potential.o                       b_PAW_XC_Potential.o \
+b_XC_OmegaPBE.o  \
+b_XC_metagga.o \
+b_Ldos_f77.o \
+input_interface.o                      Initialization.o \
+Ewald_and_Structure_Factor.o \
+Initial_Electronic_Structure.o         Renewal_of_WaveFunctions.o \
+IterationNumbers_Setting.o \
+ChargeDensity_Construction.o                 Renewal_of_Potential.o \
+ChargeDensity_Mixing.o \
+Renewal_of_Hubbard_Potential.o         Renewal_of_OccMat.o \
+Convergence_Check.o                    Forces.o \
+WriteDownData_onto_Files.o \
+Initial_MD_Condition.o                 WriteDownData_onto_Files_ek.o \
+Stress.o                               Postprocessing.o \
+Finalization_of_mpi.o \
+miscellaneous.o \
+b_BerryPhase.o \
+heap_sort.o                            real_spherical_harmonics.o \
+Real_space_integ.o                     crotylm.o \
+screening_correction.o \
+PseudoPotential_ek_Epsilon.o           \
+mpi_dummy.o \
+GaussLeg.o lib_int_deri_add.o \
+ThomasFermiWeiz.o       Renewal_of_ChgCtrlParam.o \
+b_LinearResponse_Kernel.o  b_LinearResponse_exc.o \
+Uramping.o \
+LinearResponse_Proc.o     LinearResponse_Spec.o vdW.o scdft_routines.o memsize.o lbfgs.o
+
+ifndef SX_DGEMM
+TDLR_OBJECTSF77 = \
+b_PseudoPotential_f77.o \
+b_Force_f77.o \
+rmmsubs.o                              spg+tetra.o \
+decfft_ent.o \
+gncpp_xc_gga_rad.o  
+else
+TDLR_OBJECTSF77 = \
+b_PseudoPotential_f77.o \
+b_Force_f77.o \
+rmmsubs.o                              spg+tetra.o \
+decfft_ent.o \
+gncpp_xc_gga_rad.o  \
+dgemm__.o
+endif
+
+
+PHASE_OBJECTS = $(FFTOBJECT) $(PHASE_LOWER_MODULES) $(PHASE_UPPER_MODULES) $(PHASE_F_SUBROUTINES) $(PHASE_F_OTHERSUBS) $(PHASE_OBJECTSF77) $(OBJ_INPUTPARSE)
+
+EKCAL_OBJECTS = $(FFTOBJECT) $(EKCAL_LOWER_MODULES) $(EKCAL_UPPER_MODULES) $(EKCAL_F_SUBROUTINES) $(EKCAL_F_OTHERSUBS) $(EKCAL_OBJECTSF77) $(OBJ_INPUTPARSE)
+
+EPS_OBJECTS = $(FFTOBJECT) $(EPS_LOWER_MODULES) $(EPS_UPPER_MODULES) $(EPS_F_SUBROUTINES) $(EPS_F_OTHERSUBS) $(EPS_OBJECTSF77) $(OBJ_INPUTPARSE)
+
+TDLR_OBJECTS = $(FFTOBJECT) $(TDLR_LOWER_MODULES) $(TDLR_UPPER_MODULES) $(TDLR_F_SUBROUTINES) $(TDLR_F_OTHERSUBS) $(TDLR_OBJECTSF77) $(OBJ_INPUTPARSE)
+
+all : phase ekcal epsmain tdlrmain
+
+ifdef ESM
+phase : $(ESM_LIB) $(LAPACK) $(PHASE_OBJECTS) $(NLOBJ)
+	$(LINK) $(PHASE_OBJECTS) $(NLOBJ) $(LFLAGS) $(LIBS) -o $@
+else
+phase : $(LAPACK) $(PHASE_OBJECTS) $(NLOBJ)
+	$(LINK) $(PHASE_OBJECTS) $(NLOBJ) $(LFLAGS) $(LIBS) -o $@
+endif
+
+ekcal : $(LAPACK) $(EKCAL_OBJECTS) $(NLOBJ)
+	$(LINK) $(EKCAL_OBJECTS) $(NLOBJ) $(LFLAGS) $(LIBS) -o $@
+
+epsmain : $(LAPACK) $(EPS_OBJECTS) $(NLOBJ)
+	$(LINK) $(EPS_OBJECTS) $(NLOBJ) $(LFLAGS) $(LIBS) -o $@
+
+tdlrmain : $(LAPACK) $(TDLR_OBJECTS) $(NLOBJ)
+	$(LINK) $(TDLR_OBJECTS) $(NLOBJ) $(LFLAGS) $(LIBS) -o $@
+
+ifdef NO_MPI
+libesm.a:
+	cd EsmPack; make INCLUDE="$(INCLUDE)" FORTRAN="$(F90)" LIBFLAG="$(LIBS)" MPIFLAG="" AR="$(AR)"
+else
+libesm.a:
+	cd EsmPack; make INCLUDE="$(INCLUDE)" FORTRAN="$(F90) -fallow-argument-mismatch" LIBFLAG="$(LIBS)" MPIFLAG="-D__MPI__" AR="$(AR)"
+endif
+
+liblapack.a:
+	cd LAPACK; make F77="$(F90)" F77FLAGS="$(F77FLAGS)" AR="$(AR)"
+
+libblas.a:
+	cd BLAS; make F77="$(F90)" F77FLAGS="$(F77FLAGS)" AR="$(AR)"
+
+$(OBJ_INPUTPARSE):$(@:.o=.c) $(@:.o=.h)
+	$(CC) -c $(CFLAGS) $(@:.o=.c)
+
+.f.o:
+	$(F90) -c $(F77FLAGS) $*.f
+
+.f90.o:
+	$(F90) -c $(F90FLAGS) $*.f90
+
+.F.o:
+	$(F90) -c $(F77FLAGS) $(CPPFLAGS) $*.F
+
+.F90.o:
+	$(F90) -c $(F90FLAGS) $(CPPFLAGS) $*.F90
+
+clean:
+	\rm -f *.o *.mod *.a *.lib *.L *.list phase ekcal epsmain tdlrmain
+	\cd LAPACK; make clean
+	\cd BLAS; make clean
+	\cd EsmPack; make clean
+
+install: phase ekcal epsmain tdlrmain
+	\mv -f phase ../bin/
+	\mv -f ekcal ../bin/
+	\mv -f epsmain ../bin/
+	\mv -f tdlrmain ../bin/
diff --git a/src_phase/Preparation.F90 b/src_phase/Preparation.F90
index ac293bd..54910f4 100644
--- a/src_phase/Preparation.F90
+++ b/src_phase/Preparation.F90
@@ -391,7 +391,7 @@ subroutine Preparation(imode)
   call m_CS_alloc_op_tau_tl(nfout)
   if(symmetry_method == AUTOMATIC) then
      call m_CS_SG_auto_gnrt_sym_op(paramset,nfout) ! paramset == .false.
-     call m_IS_symmetrize_atom_pos(nfout) ! -> cps,pos
+     if(.not. m_Stress_in_correction()) call m_IS_symmetrize_atom_pos(nfout) ! -> cps,pos
   else
                                                      __TIMER_FJ_START(31)
      call m_CS_gnrt_symmetry_operations(paramset,nfout) ! paramset == .false.
diff --git a/src_phase/input_parse.h b/src_phase/input_parse.h
index 3ccdeeb..a7c921c 100644
--- a/src_phase/input_parse.h
+++ b/src_phase/input_parse.h
@@ -26,6 +26,7 @@ last modification: 2002.12.26 by K. Mae
 #include	<malloc.h>
 #else
 #include        <malloc/malloc.h>
+#include	<ctype.h>
 #endif
 /*#include	<math.h>*/
 #include	<string.h>
diff --git a/src_phase/m_CD_mixing.F90 b/src_phase/m_CD_mixing.F90
index 3e120b3..7eaba99 100644
--- a/src_phase/m_CD_mixing.F90
+++ b/src_phase/m_CD_mixing.F90
@@ -4009,11 +4009,17 @@ contains
       integer :: i, nx
       if(hownew == ANEW) then
          m = iter
-         ncrspd(:) = (/(i,i=1,m)/)
+!         ncrspd(:) = (/(i,i=1,m)/)
+         do i=1,m
+           ncrspd(i) = i
+         enddo
       else ! hownew == RENEW
          if(iter <= n) then
             m = iter
-            ncrspd(:) = (/(i,i=1,m)/)
+!            ncrspd(:) = (/(i,i=1,m)/)
+            do i=1,m
+              ncrspd(i) = i
+            enddo
          else
             m = n
             nx = ncrspd(1)
@@ -5870,11 +5876,17 @@ contains
       integer :: i, nx
       if(hownew == ANEW) then
          m = iter
-         ncrspd(:) = (/(i,i=1,m)/)
+!         ncrspd(:) = (/(i,i=1,m)/)
+         do i=1,m
+           ncrspd(i) = i
+         enddo
       else ! hownew == RENEW
          if(iter <= n) then
             m = iter
-            ncrspd(:) = (/(i,i=1,m)/)
+!            ncrspd(:) = (/(i,i=1,m)/)
+            do i=1,m
+              ncrspd(i) = i
+            enddo
          else
             m = n
             nx = ncrspd(1)
diff --git a/src_phase/m_Charge_Density.F90 b/src_phase/m_Charge_Density.F90
index 368d389..7140858 100644
--- a/src_phase/m_Charge_Density.F90
+++ b/src_phase/m_Charge_Density.F90
@@ -4392,10 +4392,11 @@ contains
                                                  __TIMER_IODO_STOP(1451)
        end if
     end if
-
+#ifdef FFTW3
     if(prv)then
       call m_CD_gen_chgq_from_prev_chgq(nfout)
     endif
+#endif
 
     if(ipri >= 3) write(nfout,*) ' !D Reading chgq_g finished'
     total_charge = 0.d0
@@ -8577,6 +8578,7 @@ contains
     chgq_l = chgq_l_prev*(univol_prev/univol)
   end subroutine m_CD_cp_chgq_prev_to_chgq
 
+#ifdef FFTW3
   subroutine m_CD_gen_chgq_from_prev_chgq(nfout)
     use m_Files, only : m_Files_nfcntn_bin_paw_exists,m_Files_open_nfcntn_bin_paw &
                       , m_Files_close_nfcntn_bin_paw, nfcntn_bin_paw
@@ -8793,6 +8795,7 @@ contains
       deallocate(afft); deallocate(afft_mpi1)
     end subroutine dealloc_rspace_charge_prev
   end subroutine m_CD_gen_chgq_from_prev_chgq
+#endif
     
   subroutine m_CD_dealloc
     use m_Files, only : nfout
diff --git a/src_phase/m_Control_Parameters.F90 b/src_phase/m_Control_Parameters.F90
index 9536b85..706ca45 100644
--- a/src_phase/m_Control_Parameters.F90
+++ b/src_phase/m_Control_Parameters.F90
@@ -11045,9 +11045,10 @@ write(nfout,'(" !** sw_screening_correction = ",i5," alpha = ",f8.3)') sw_screen
   end function m_CtrlP_way_of_smearing
 
   integer function m_CtrlP_waymix_now(iteration_electronic,iteration_ionic &
-       &                             ,edeltb_per_atom)
+       &                             ,edeltb_per_atom,mixer_changed)
     integer, intent(in)      :: iteration_electronic, iteration_ionic
     real(kind=DP),intent(in) :: edeltb_per_atom
+    logical, intent(out), optional :: mixer_changed
     integer :: ips, ipe, imsd_t, it, ip, i
 
     if(tag_solver_of_WF_is_found .and. tag_charge_mixing_is_found) then
@@ -11082,6 +11083,9 @@ write(nfout,'(" !** sw_screening_correction = ",i5," alpha = ",f8.3)') sw_screen
 !       if(ip < 0 .or. ip > n_Charge_Mixing_way) stop ' error at m_CtrlP_waymix_now'
        if(ip < 0 .or. ip > n_Charge_Mixing_way) call phase_execution_error(INVALID_CHARGE_MIXING)
        m_CtrlP_waymix_now = charge_mixing(ip)%mixing_way
+       if(present(mixer_changed)) then 
+         mixer_changed = ip_charge_mixing /= ip
+       endif
        ip_charge_mixing = ip
     else
        m_CtrlP_waymix_now = waymix
diff --git a/src_phase/m_ES_IO.F90 b/src_phase/m_ES_IO.F90
index 54160e5..9f015ff 100644
--- a/src_phase/m_ES_IO.F90
+++ b/src_phase/m_ES_IO.F90
@@ -1723,9 +1723,9 @@ contains
              if(ipri>=2) write(nfout,*) ' !D     ie = ', ie
              if(mype == 0) read(nf,err=2,end=2) wf_l     ! MPI
              if(mype == 0 .and. map_ek(ie,ik) /= 0) then ! MPI
-                call mpi_send(wf_l,kg1*kimg,mpi_real,map_ek(ie,ik),1,mpi_comm_group,ierr) ! MPI
+                call mpi_send(wf_l,kg1*kimg,mpi_real4,map_ek(ie,ik),1,mpi_comm_group,ierr) ! MPI
              else if(map_ek(ie,ik) == mype .and. map_ek(ie,ik) /= 0) then                  ! MPI
-                call mpi_recv(wf_l,kg1*kimg,mpi_real,0,1,mpi_comm_group,istatus,ierr)     ! MPI
+                call mpi_recv(wf_l,kg1*kimg,mpi_real4,0,1,mpi_comm_group,istatus,ierr)     ! MPI
              end if
              if(map_ek(ie,ik) == mype) then              ! MPI
                 do ri = 1, kimg
@@ -1786,9 +1786,9 @@ contains
                 end if
 
                 if(map_ek(ie,ik) == mype) then
-                   call mpi_send(wf_l,kg1*kimg,mpi_real,map_ek(ie,ike2),1,mpi_comm_group,ierr)
+                   call mpi_send(wf_l,kg1*kimg,mpi_real4,map_ek(ie,ike2),1,mpi_comm_group,ierr)
                 else if(map_ek(ie,ike2) == mype) then
-                   call mpi_recv(wf_l,kg1*kimg,mpi_real,map_ek(ie,ik),1,mpi_comm_group,istatus,ierr)
+                   call mpi_recv(wf_l,kg1*kimg,mpi_real4,map_ek(ie,ik),1,mpi_comm_group,istatus,ierr)
                 end if
                 if(map_ek(ie,ike2) == mype) then
                    do ri = 1, kimg
@@ -2030,9 +2030,9 @@ contains
                    wf_l(1:nel,ri) = zaj_l(1:nel,map_z(ibt),ik,ri)
                 end do
                 if(map_ek(ibt,ik) /= 0) &                             ! MPI
-                     &   call mpi_send(wf_l,nel*kimg,mpi_real,0,1,mpi_comm_group,ierr) ! MPI
+                     &   call mpi_send(wf_l,nel*kimg,mpi_real4,0,1,mpi_comm_group,ierr) ! MPI
              else if(mype == 0 .and. map_ek(ibt,ik) /= 0) then
-                call mpi_recv(wf_l,nel*kimg,mpi_real,map_ek(ibt,ik),1,mpi_comm_group,istatus,ierr)!MPI
+                call mpi_recv(wf_l,nel*kimg,mpi_real4,map_ek(ibt,ik),1,mpi_comm_group,istatus,ierr)!MPI
              end if
              if(mype == 0) wf(:,:,ib-ib1+1) = wf_l(:,:)
           end do
@@ -2301,9 +2301,9 @@ contains
                    wf_l(1:kg1,ri) = zaj_l(1:kg1,map_z(ie),ik,ri)
                 end do
                 if(map_ek(ie,ik) /= 0) &                             ! MPI
-                     &   call mpi_send(wf_l,kg1*kimg,mpi_real,0,1,mpi_comm_group,ierr) ! MPI
+                     &   call mpi_send(wf_l,kg1*kimg,mpi_real4,0,1,mpi_comm_group,ierr) ! MPI
              else if(mype == 0 .and. map_ek(ie,ik) /= 0) then
-                call mpi_recv(wf_l,kg1*kimg,mpi_real,map_ek(ie,ik),1,mpi_comm_group,istatus,ierr)!MPI
+                call mpi_recv(wf_l,kg1*kimg,mpi_real4,map_ek(ie,ik),1,mpi_comm_group,istatus,ierr)!MPI
              end if
              if(mype == 0) write(nf)  wf_l                        ! MPI
            else if(precision_WFfile==DP) then
diff --git a/src_phase/m_Electronic_Structure.F90 b/src_phase/m_Electronic_Structure.F90
index 801c7bd..0c5257b 100644
--- a/src_phase/m_Electronic_Structure.F90
+++ b/src_phase/m_Electronic_Structure.F90
@@ -2889,7 +2889,7 @@ contains
     if(iprievdff >= 1) &
 !!$         & write(nfout,'(" >> (",i6,") <eko_old-eko_new>:(",3d13.5,")")') &
 !!$         &    iteration_electronic, evdff(1), evdff(2), evdff(3)
-         & write(nfout,'(" ENERGY EIGEN VALUE SUMM. FOR",i6," -TH ITER (K=",i6,":",i6 &
+         & write(nfout,'(" ENERGY EIGEN VALUE SUM. FOR",i6," -TH ITER (K=",i6,":",i6 &
          & ,") = ",F16.8, " <eko_old-eko_new>:(",3d13.5,")")') &
          &   iteration_electronic,nk_in_the_process, nk_in_the_process+kv3-1 &
          & , ekosum, evdff(1), evdff(2), evdff(3)
@@ -3877,6 +3877,7 @@ contains
     end if
   end subroutine map_zaj_to_fft_box
 
+#ifdef FFTW3
   subroutine m_ES_gen_zaj_from_prev_zaj(nfout)
     integer, intent(in) :: nfout
     integer :: ib,ik,ri
@@ -4099,6 +4100,7 @@ contains
     end subroutine map_zaj_to_fft_box_prev
 
   end subroutine m_ES_gen_zaj_from_prev_zaj
+#endif
 
   subroutine m_ES_alloc_zaj_l_prev(kg1)
     integer, intent(in) :: kg1
diff --git a/src_phase/m_Epsilon_ek.F90 b/src_phase/m_Epsilon_ek.F90
index 49e24fa..cd5e9dc 100644
--- a/src_phase/m_Epsilon_ek.F90
+++ b/src_phase/m_Epsilon_ek.F90
@@ -2113,7 +2113,9 @@ ppc_data(ntyp)
           deallocate(vk00xyz); deallocate(vk0xyz)
           deallocate(vk0_op);  deallocate(nopr_k);  deallocate(op_k)
        end if
-       if(trm2_mode == 0) deallocate(trm2)
+       if(trm2_mode == 0) then
+               if ( allocated(trm2)) deallocate(trm2)
+       end if
     end if
 
     deallocate(e);  deallocate(imeps);   deallocate(reps)
@@ -6511,6 +6513,10 @@ ppc_data(ntyp)
 !    allocate(n_unfilled(nspin)); n_unfilled = 0
 !    allocate(n_half_filled(nspin)); n_half_filled = 0
 !
+    if(allocated(n_filled))      deallocate(n_filled)
+    if(allocated(n_unfilled))    deallocate(n_unfilled)
+    if(allocated(n_half_filled)) deallocate(n_half_filled)
+
     allocate(n_filled(nspin_kt)) ; n_filled = 0
     allocate(n_unfilled(nspin_kt)); n_unfilled = 0
     allocate(n_half_filled(nspin_kt)); n_half_filled = 0
diff --git a/src_phase/m_Ionic_System.F90 b/src_phase/m_Ionic_System.F90
index 7cfe83c..b56f7f5 100644
--- a/src_phase/m_Ionic_System.F90
+++ b/src_phase/m_Ionic_System.F90
@@ -6066,7 +6066,9 @@ contains
              read(nfcntn,*) forcmx_constraint_quench
           end if
        end if
-       if(sw_optimize_lattice==ON)then
+       if(sw_optimize_lattice==ON .or. &
+          imdalg == PT_CONTROL    .or. &
+          imdalg == P_CONTROL) then
           call rewind_to_tag0(nfcntn,len(tag_lattice_vector),tag_lattice_vector &
                &,  EOF_reach, tag_is_found, str, len_str)
           if(tag_is_found)then
@@ -6088,12 +6090,16 @@ contains
             & ,mpi_double_precision,0,mpi_comm_group,ierr)
        call mpi_bcast(forcmx_constraint_quench,1 &
             & ,mpi_double_precision,0,mpi_comm_group,ierr)
-       if(sw_optimize_lattice==ON)then
+       if(sw_optimize_lattice==ON .or. &
+          imdalg == PT_CONTROL    .or. &
+          imdalg == P_CONTROL) then
           call mpi_bcast(altv,9,mpi_double_precision,0,mpi_comm_group,ierr)
        endif
     end if
 
-    if(sw_optimize_lattice==ON)then
+    if(sw_optimize_lattice==ON .or. &
+       imdalg == PT_CONTROL    .or. &
+       imdalg == P_CONTROL) then
        call m_CS_altv_2_rltv()
     endif
 
@@ -6945,7 +6951,8 @@ contains
                   & , cpd_l, cpd_old, ipcpd, iop_supercell)
           end if
 
-          if(iprimd >= 1) then
+!!$          if(iprimd >= 1) then
+          if(iprimd > 1) then
              write(nfout,'(" -- cpd_l, cpd_old (after fd_symmetrize) --")')
              do i  = 1, natm
                 write(nfout,'(i5,6f18.5)') i, cpd_l(i,1:3),cpd_old(i,1:3)
diff --git a/src_phase/m_UnitCell.f90 b/src_phase/m_UnitCell.f90
index 6c5abf5..1b0375a 100644
--- a/src_phase/m_UnitCell.f90
+++ b/src_phase/m_UnitCell.f90
@@ -887,6 +887,10 @@ subroutine m_UnitCell_init_md()
   integer :: i,j
   real(kind=DP), dimension(3) :: avec,bvec,cvec
   if(.not.first_call) return
+  allocate(p_l(natm,3));p_l = 0.d0
+  allocate(p_l_lattice(natm,3));p_l_lattice = 0.d0
+  allocate(forc_l_lattice(natm,3));forc_l_lattice = 0.d0
+  allocate(p_l_metric(natm,3));p_l_metric = 0.d0
   metric = matmul(transpose(altv),altv)
   call m_UnitCell_init_md_per_step()
   m_thermo = qmass(1)
@@ -897,14 +901,10 @@ subroutine m_UnitCell_init_md()
   target_temperature = tkb(1)/CONST_kB
 !  gfree = 3*(natm-1)
   gfree = 3*(natm)
-  allocate(p_l(natm,3));p_l = 0.d0
   do i=1,natm
      p_l(i,:) = amion(ityp(i)) * cpd_l(i,:) * s_thermo
   enddo
-  allocate(p_l_lattice(natm,3));p_l_lattice = 0.d0
   call convert_coords(natm,p_l,altv,p_l_lattice)
-  allocate(forc_l_lattice(natm,3));forc_l_lattice = 0.d0
-  allocate(p_l_metric(natm,3));p_l_metric = 0.d0
   call convert_coords(natm,p_l_lattice,metric_inv,p_l_metric)
   do i=1,3
      do j=1,3
@@ -975,9 +975,12 @@ function get_potential_baro() result(ret)
 end function get_potential_baro
 
 subroutine m_UnitCell_init_md_per_step()
+  real(kind=DP), external :: deter3
+  univol = sqrt(deter3(metric))
   call inver3n(3,metric,metric_inv)
   call inver3n(3,altv,altv_inv)
   call build_dudmetric()
+  H0 = get_H0()
 end subroutine m_UnitCell_init_md_per_step
 
 function tensor_rank2_equals(t1,t2) result(ret)
@@ -1047,6 +1050,28 @@ function m_UC_get_hamiltonian() result(res)
   res = res-H0
 end function m_UC_get_hamiltonian
 
+function get_H0() result (res)
+  real(kind=DP) :: res
+  real(kind=DP) :: k_atm,k_baro,k_thermo,pot_baro,pot_thermo
+  k_atm = get_kinetic_atom(natm,p_l_lattice,metric_inv)
+  k_baro = get_kinetic_baro(matmul(metric,p_baro))
+  pot_baro = get_potential_baro()
+  if(control_pressure == OFF) then
+    k_baro = 0.d0
+    pot_baro = 0.d0
+  endif
+  if(imdalg == PT_CONTROL .and. t_ctrl_method /= VELOCITY_SCALING)then
+     k_thermo = get_kinetic_thermo(p_thermo)
+     pot_thermo = gfree * target_temperature * CONST_kB * log(s_thermo)
+  else if (imdalg == P_CONTROL .or. (imdalg == PT_CONTROL .and. t_ctrl_method == VELOCITY_SCALING)) then
+     k_thermo = 0.d0
+     pot_thermo = 0.d0
+  endif
+!  res = s_thermo * (k_atm + etotal + k_thermo + pot_thermo + k_baro + pot_baro)
+  res = k_atm + etotal + k_thermo + pot_thermo + k_baro + pot_baro
+  return
+end function get_H0
+
 function m_UC_get_curr_pressure() result(res)
   real(kind=DP) :: curr_pressure, res
   integer :: i
@@ -1071,6 +1096,9 @@ subroutine m_UC_rd_md_cntn_data(nfcntn)
    logical             :: tag_is_found, EOF_reach
    integer,      parameter   :: len_str = 132
    character(len=len_str)       :: str
+
+   call m_UnitCell_init_md()
+
    if(mype==0)then
       call rewind_to_tag0(nfcntn,len(tag_pressure_control), &
             &  tag_pressure_control, & 
@@ -1119,7 +1147,9 @@ subroutine m_UC_rd_md_cntn_data(nfcntn)
          call altv_2_rltv(altv,rltv,univol,rvol)  ! in b_CS
          call change_of_coordinate_system(altv,pos,natm,natm,cps) !-(b_I.S.) pos -> cps
          call primitive2bravais(nfout,p2bmat,altv(:,1),altv(:,2),altv(:,3),a,b,c,ca,cb,cc,il) ! in b_CS
-         if(iteration_unit_cell>1) H0_has_been_set = .true.
+         if(iteration_unit_cell>1) then
+           H0_has_been_set = .true.
+         endif
       endif
    endif
 
diff --git a/src_phase_3d/ChargeDensity_Mixing.F90 b/src_phase_3d/ChargeDensity_Mixing.F90
index c8e1afd..a8624c9 100644
--- a/src_phase_3d/ChargeDensity_Mixing.F90
+++ b/src_phase_3d/ChargeDensity_Mixing.F90
@@ -83,7 +83,7 @@ subroutine ChargeDensity_Mixing
   use m_Control_Parameters,  only : sw_calc_ekin_density, ekin_density_type, &
        &                            sw_mix_charge_with_ekindens
   use m_CD_mixing,  only : m_CD_mix_broyden2_intg, m_CD_mix_pulay_intg, &
-       &                   m_CD_simple_mixing_intg
+       &                   m_CD_simple_mixing_intg, m_CD_force_dealloc
   use m_KE_mixing,  only : m_KE_simple_mixing, m_KE_mix_broyden2, m_KE_mix_pulay, &
        &                   m_KE_prepare_precon, &
        &                   m_KE_simple_mixing_00, m_KE_kerker_mixing, &
@@ -96,7 +96,7 @@ subroutine ChargeDensity_Mixing
   real(kind=DP) :: edeltb_per_atom
   integer,save  :: waymix_at_CDmix_previous = 0
   integer       :: waymix_at_CDmix
-  logical       :: ini = .false.
+  logical       :: ini = .false., mixer_changed
   real(kind=DP) :: rmxt_tot, rmxt_hard, rmxt_occmat
 #ifdef __TIMER_SUB__
     call timer_sta(1101)
@@ -107,7 +107,8 @@ subroutine ChargeDensity_Mixing
 
   edeltb_per_atom = m_TE_what_is_edeltb_now()/natm 
   waymix_at_CDmix = m_CtrlP_waymix_now(iteration_electronic, iteration_ionic &
-       &                             , edeltb_per_atom)
+       &                             , edeltb_per_atom,mixer_changed)
+  if(mixer_changed) call m_CD_force_dealloc()
   if(tag_solver_of_WF_is_found .and. tag_charge_mixing_is_found) then
      if(waymix_at_CDmix_previous /= waymix_at_CDmix) call m_Iter_cmix_reset()
   end if
diff --git a/src_phase_3d/Initial_Electronic_Structure.F90 b/src_phase_3d/Initial_Electronic_Structure.F90
index ba221ef..d42fa1d 100644
--- a/src_phase_3d/Initial_Electronic_Structure.F90
+++ b/src_phase_3d/Initial_Electronic_Structure.F90
@@ -104,8 +104,7 @@ subroutine Initial_Electronic_Structure
   use m_Crystal_Structure, only  : sw_fix_total_spin, sw_magnetic_constraint,  univol
 ! <--
   use m_XC_Potential,      only  : vxc_l, exc
-  use m_Parallelization,   only  : m_Parallel_dealloc_mpi_elec, m_Parallel_init_mpi_elec &
-                       &         , ista_kngp,iend_kngp,nrank_e,mype
+  use m_Parallelization,   only  : m_Parallel_dealloc_mpi_elec, ista_kngp,iend_kngp,nrank_e,mype
   use m_Orbital_Population,only  : m_OP_rd_occ_mat, m_OP_mix_om, m_OP_occ_mat_init, m_OP_cp_ommix_to_omold
   use m_FiniteElectricField,only : m_FEF_Constract_of_ftq
   use m_Parallelization,   only    : m_Parallel_init_mpi_elec_3D, np_g1k_prev
@@ -266,7 +265,10 @@ subroutine Initial_Electronic_Structure
      call m_PP_gfqwei_3D(nfout)  ! -> modnrm, fqwei, nlmta1, nlmta2
      call m_CtrlP_set_init_status(.false.)
      call UpdateUeff()
-     if(sw_hybrid_functional == ON) call EXX()
+     if(sw_hybrid_functional == ON) then
+       call m_ES_EXX_init()
+       call EXX()
+     endif
      return
   endif
 
diff --git a/src_phase_3d/Initialization.F90 b/src_phase_3d/Initialization.F90
index d3f8417..dac25fa 100644
--- a/src_phase_3d/Initialization.F90
+++ b/src_phase_3d/Initialization.F90
@@ -174,7 +174,7 @@ contains
   subroutine aavers
     include 'version.h' ! svn_revision
     character(len=72) :: vers, system, codename
-    write(vers,'("phase/0 2020.01 Revision:",i5, " --- 3D_Parallel --")') svn_revision
+    write(vers,'("phase/0 2020.01.01 Revision:",i5, " --- 3D_Parallel --")') svn_revision
     codename = 'phaseUnif'
     system = ''
 
diff --git a/src_phase_3d/Makefile.Fugaku b/src_phase_3d/Makefile.Fugaku
new file mode 100644
index 0000000..7b406a9
--- /dev/null
+++ b/src_phase_3d/Makefile.Fugaku
@@ -0,0 +1,578 @@
+.SUFFIXES:
+.SUFFIXES: .o .F .f .F90 .f90 .c .mod
+
+# Platform    : GNU Linux (EM64T/AMD64)
+# Prog. model : MPI parallel
+# Compiler    : Intel Fortran compiler
+# BLAS/LAPACK : Intel Math Kernel Library (MKL)
+# FFT         : Intel MKL with FFTW3 interface
+###########################################################################
+###<< PLEASE CHANGE THE VARIABLES BELOW ACCORDING TO YOUR ENVIRONMENT >>###
+###########################################################################
+F90 = mpifrtpx
+CC  = fccpx
+CPP = 
+AR  = ar qv
+LINK = mpifrtpx -Kparallel
+F90FLAGS = -O3 -Kparallel,ocl,preex,array_private,auto,simd=2,openmp -c -V -Qa,d,i,p,t,x -Koptmsg=2 -C 0
+F77FLAGS = -Kparallel,ocl,preex,array_private,auto,simd=2,openmp -c -V -Qa,d,i,p,t,x -Koptmsg=2 -C 0
+CFLAGS = -Kfast,parallel,ocl,preex,array_private,auto,simd=2,openmp -DINTEL
+
+ESM = yes
+ifdef ESM
+CPPESM=-DENABLE_ESM_PACK
+LESM=-lesm
+else
+CPPESM=
+endif
+
+CPPFLAGS = -DLinux -DFFTW3 -D_MPIFFT_ -D_USE_DATE_AND_TIME_ -D_POT_SMOOTHING_ -DTRANSPOSE -DGGA_ATOMIC_WITH_NEW_GNCPP -DREMOVE_PC_FROM_FORCE -D_USE_LAPACK_ -D_USE_SCALAPACK_ -D_POSITRON_ -D_FAST_WAY_ -DUSE_NONBLK_COMM -D_HEAP_SORT_ -DFFT_ALLTOALL -DPOST3D -DUSE_NONBLK_COMM -DRMM_NONLOCAL_NEW ${CPPESM}
+LFLAGS = 
+F90FLAGS_FIXED = -extend_source -Fl -fixed
+F90FLAGS_FREE = -extend_source -Fl
+
+FFTW3_DIR=/vol0004/apps/oss/spack-v0.16/opt/spack/linux-rhel8-a64fx/fj-4.3.1/fujitsu-fftw-master-2irjm2a56j7tahcmggbnde2uxf6gjdml
+#FFTW3_DIR=/vol0001/apps/oss/spack-latest/opt/spack/linux-rhel8-a64fx/fj-4.2.0/fftw-3.3.8-3ojdo6i4rd3asyyonmrgxthvprum2gpa/
+INCLUDE=-I${FFTW3_DIR}/include
+#LIBS = -L./ ${LESM} -lfftw3 -lfftw3_omp -lfftw3_mpi -SSL2BLAMP $(KLINK) -SCALAPACK -Kopenmp
+#LIBS = -L./ ${LESM} -lfftw3 -lfftw3_omp -lfftw3_mpi -SSL2BLAMP $(KLINK) -SCALAPACK -Kopenmp
+LIBS = -L./ ${LESM} -lfftw3 -lfftw3_omp -lfftw3_mpi -SSL2BLAMP -lscalapack -Kopenmp
+#LIBS = -L./ ${LESM} libfftw3_omp.a libfftw3.a -SSL2BLAMP $(KLINK) -SCALAPACK -Kopenmp
+###########################################################################
+###<< PLEASE CHANGE THE VARIABLES ABOVE ACCORDING TO YOUR ENVIRONMENT >>###
+###########################################################################
+
+ifdef ESM
+ESM_LIB = libesm.a
+else
+ESM_LIB =
+endif
+LAPACK = 
+FFTOBJECT =  mpifft.o
+OBJ_INPUTPARSE = input_parse.o
+
+PHASE_LOWER_MODULES = m_Const_Parameters.o   m_ErrorMessages.o m_Parallelization.o \
+m_IterationNumbers.o #z_tool_timer.o
+
+PHASE_UPPER_MODULES = \
+m_Control_Parameters.o   m_Files.o                m_Timing.o \
+m_Crystal_Structure.o    m_FFT.o                  m_Ionic_System.o \
+m_CS_SpaceGroup.o        m_CS_Magnetic.o \
+m_Orbital_QuantumNum.o  \
+m_Kpoints.o              m_PseudoPotential.o      m_PlaneWaveBasisSet.o \
+m_Realspace.o \
+m_SpinOrbit_Potential.o  m_SpinOrbit_FromFile.o  \
+m_ES_NonCollinear.o \
+m_NonLocal_Potential.o   m_Electronic_Structure.o \
+m_ES_nonlocal.o	 	 m_ES_ortho.o  m_ES_wf_extrpl.o m_ES_initialWF.o 	\
+m_ES_occup.o \
+m_FiniteElectricField.o \
+m_ES_ExactExchange.o \
+m_ES_WF_by_SDorCG.o      m_ES_WF_by_Davidson.o \
+m_ES_WF_by_ModifiedDavidson.o m_ES_WF_by_RMM.o \
+m_Charge_Density.o  m_CD_Mag_Moment.o m_Orbital_Population.o      m_Dipole.o  m_CD_mixing.o \
+m_PS_opencore.o \
+m_epc_potential.o        \
+m_Positron_Wave_Functions.o \
+m_Screening_FFT.o        m_Screening.o      m_External_Potential.o \
+m_ES_LHXC.o              m_ES_Intgr_VlhxcQlm.o    m_ES_IO.o \
+m_Representation.o m_OP_rotation.o \
+m_ES_WF_by_MatDiagon.o   m_ES_dos.o               m_Hubbard.o \
+m_KineticEnergy_Density.o  \
+m_KE_mixing.o \
+m_vdWDF.o \
+m_XC_Potential_2D.o \
+m_Ldos.o                 m_XC_Potential.o         \
+string.o m_db.o \
+m_PAW_Tecplot.o m_PAW_ChargeDensity.o m_PAW_Hartree.o  \
+m_PAW_XC_Potential.o \
+m_SpinOrbit_RadInt.o \
+m_OP_Moment.o \
+m_ES_Mag_Constraint.o  \
+m_Total_Energy.o \
+m_Force.o                m_Stress.o               m_ES_WF_by_submat.o \
+m_UnitCell.o \
+m_constraints.o \
+m_ELF.o \
+m_BerryPhase.o           m_BP_Properties.o \
+m_Raman.o            m_Phonon.o \
+m_Wannier.o  m_Replica.o Renewal_of_ChgCtrlParam.o \
+m_LinearResponse_Control.o  \
+m_ValenceBand_Spectrum.o   m_CoreLevel_Spectrum.o   \
+m_CLS_dipquad.o m_ES_occup_EPS.o         m_Epsilon_ek.o \
+m_LinearResponse_Qpt.o  \
+m_rttddft.o \
+m_Potential_Mixing.o       m_ThomasFermiW_Potential.o \
+m_Fcp.o m_Band_Unfolding.o
+
+PHASE_F_SUBROUTINES = Preparation.o \
+InputData_Analysis.o           PseudoPotential_Construction.o \
+
+PHASE_F_OTHERSUBS  = mdmain.o     constraint_main.o meta_dynamics.o NEB.o Preparation_for_mpi.o \
+Preparation_for_ESM.o \
+scf_routines.o \
+bottom_Subroutines.o              \
+spline.o \
+b_Crystal_Structure.o             b_Electronic_Structure.o \
+b_Words.o                         b_PseudoPotential.o \
+b_PseudoPotential_EXX.o \
+b_Fermi.o                         b_Kpoints.o \
+b_PlaneWaveBasisSet.o             b_Ionic_System.o \
+b_XC_Potential.o                  b_PAW_XC_Potential.o \
+b_XC_OmegaPBE.o  \
+b_XC_metagga.o \
+b_Ldos_f77.o\
+input_interface.o                 Initialization.o \
+WriteDownData_onto_Files.o        Ewald_and_Structure_Factor.o \
+Initial_Electronic_Structure.o \
+Renewal_of_WaveFunctions.o \
+Renewal_of_pWaveFunctions.o \
+Uramping.o \
+IterationNumbers_Setting.o        ChargeDensity_Construction.o \
+ChargeDensity_Mixing.o            Renewal_of_Potential.o \
+Renewal_of_Hubbard_Potential.o    Renewal_of_OccMat.o \
+Renewal_of_pPotential.o           Renewal_of_Hubbard_Parameters.o \
+Convergence_Check.o               Forces.o \
+Move_Ions.o                       Initial_MD_Condition.o \
+Stress.o                          Postprocessing.o \
+Finalization_of_mpi.o             miscellaneous.o\
+b_BerryPhase.o \
+heap_sort.o                       real_spherical_harmonics.o \
+Real_space_integ.o                crotylm.o \
+screening_correction.o \
+Initialization_Epsilon.o               Shift_Kpoint.o \
+Reset_Kpoint.o                         Preparation_for_Calc_Epsilon.o \
+Transition_moment_Epsilon.o            Calc_Epsilon.o \
+Nonlinear_Optics_Epsilon.o             WriteDownData_onto_Files_Epsilon.o \
+PseudoPotential_ek_Epsilon.o           Dealloc_Radr_and_Wos_Epsilon.o \
+mpi_dummy.o                            WriteDownData_onto_Files_ek.o \
+GaussLeg.o lib_int_deri_add.o  \
+rttddft_main.o \
+Potential_Construction.o        Potential_Mixing.o             ThomasFermiWeiz.o \
+Epsilon_postscf.o vdW.o Epsilon_Paramset.o memsize.o scdft_routines.o \
+lbfgs.o
+
+# for vc_nl
+NLOBJ = vc_nl.o
+
+ifndef SX_DGEMM
+PHASE_OBJECTSF77 = \
+b_PseudoPotential_f77.o \
+b_Force_f77.o \
+rmmsubs.o \
+gncpp_xc_gga_rad.o  \
+decfft_ent.o \
+spg+tetra.o
+else
+PHASE_OBJECTSF77 = \
+b_PseudoPotential_f77.o \
+b_Force_f77.o \
+rmmsubs.o \
+decfft_ent.o \
+gncpp_xc_gga_rad.o  \
+spg+tetra.o \
+dgemm__.o 
+endif
+
+EKCAL_LOWER_MODULES = m_Const_Parameters.o   m_ErrorMessages.o m_Parallelization.o \
+m_IterationNumbers.o
+
+EKCAL_UPPER_MODULES = \
+m_Control_Parameters.o   m_Files.o                m_Timing.o \
+m_Crystal_Structure.o    m_FFT.o                  m_Ionic_System.o \
+m_CS_SpaceGroup.o        m_CS_Magnetic.o \
+m_Orbital_QuantumNum.o  \
+m_Kpoints.o              m_PseudoPotential.o      m_PlaneWaveBasisSet.o \
+m_Realspace.o \
+m_SpinOrbit_Potential.o  m_SpinOrbit_FromFile.o  \
+m_ES_NonCollinear.o \
+m_NonLocal_Potential.o   m_Electronic_Structure.o \
+m_ES_nonlocal.o	 	 m_ES_ortho.o  m_ES_wf_extrpl.o m_ES_initialWF.o 	\
+m_FiniteElectricField.o \
+m_ES_ExactExchange.o \
+m_ES_occup.o             m_ES_WF_by_SDorCG.o      m_ES_WF_by_Davidson.o \
+m_ES_WF_by_ModifiedDavidson.o m_ES_WF_by_RMM.o \
+m_epc_potential.o \
+m_Positron_Wave_Functions.o \
+m_Charge_Density.o m_CD_Mag_Moment.o m_Orbital_Population.o      m_Dipole.o  m_CD_mixing.o \
+m_PS_opencore.o \
+m_Screening_FFT.o        m_Screening.o      m_External_Potential.o \
+m_ES_LHXC.o              m_ES_Intgr_VlhxcQlm.o    m_ES_IO.o \
+m_Representation.o m_OP_rotation.o \
+m_ES_WF_by_MatDiagon.o   m_ES_dos.o               m_Hubbard.o \
+m_KineticEnergy_Density.o  \
+m_KE_mixing.o \
+m_vdWDF.o \
+m_XC_Potential_2D.o \
+m_Ldos.o                 m_XC_Potential.o         \
+string.o m_db.o \
+m_PAW_Tecplot.o m_PAW_ChargeDensity.o m_PAW_Hartree.o  \
+m_PAW_XC_Potential.o \
+m_SpinOrbit_RadInt.o \
+m_ES_Mag_Constraint.o  \
+m_constraints.o \
+m_Total_Energy.o \
+m_OP_Moment.o \
+m_Force.o                m_Stress.o m_ES_WF_by_submat.o\
+m_UnitCell.o \
+m_ELF.o                  m_Wannier.o \
+m_BerryPhase.o           m_BP_Properties.o \
+m_Raman.o               m_Phonon.o \
+m_LinearResponse_Control.o  \
+m_LinearResponse_Qpt.o \
+m_Potential_Mixing.o       m_ThomasFermiW_Potential.o  \
+m_Fcp.o m_CLS_dipquad.o m_Band_Unfolding.o
+
+
+EKCAL_F_SUBROUTINES = Preparation.o \
+InputData_Analysis.o           PseudoPotential_Construction.o
+
+EKCAL_F_OTHERSUBS  = ekmain.o     constraint_main.o          Preparation_for_mpi.o \
+Preparation_for_ESM.o \
+scf_routines.o \
+bottom_Subroutines.o                   \
+spline.o \
+b_Crystal_Structure.o                  b_Electronic_Structure.o \
+b_Words.o                              b_PseudoPotential.o \
+b_PseudoPotential_EXX.o \
+b_Fermi.o                              b_Kpoints.o \
+b_PlaneWaveBasisSet.o                  b_Ionic_System.o \
+b_XC_Potential.o                       b_PAW_XC_Potential.o \
+b_XC_OmegaPBE.o  \
+b_XC_metagga.o \
+b_Ldos_f77.o \
+input_interface.o                      Initialization.o \
+Ewald_and_Structure_Factor.o \
+Initial_Electronic_Structure.o         Renewal_of_WaveFunctions.o \
+IterationNumbers_Setting.o \
+ChargeDensity_Construction.o                 Renewal_of_Potential.o \
+ChargeDensity_Mixing.o \
+Renewal_of_Hubbard_Potential.o         Renewal_of_OccMat.o \
+Convergence_Check.o                    Forces.o \
+WriteDownData_onto_Files.o \
+Initial_MD_Condition.o                 WriteDownData_onto_Files_ek.o\
+Stress.o                               Postprocessing.o \
+Finalization_of_mpi.o \
+miscellaneous.o \
+b_BerryPhase.o \
+heap_sort.o                            real_spherical_harmonics.o \
+Real_space_integ.o                     crotylm.o \
+screening_correction.o \
+mpi_dummy.o \
+GaussLeg.o lib_int_deri_add.o \
+Uramping.o \
+ThomasFermiWeiz.o       Renewal_of_ChgCtrlParam.o vdW.o scdft_routines.o memsize.o lbfgs.o
+
+ifndef SX_DGEMM
+EKCAL_OBJECTSF77 = \
+b_PseudoPotential_f77.o \
+b_Force_f77.o \
+rmmsubs.o                              spg+tetra.o \
+gncpp_xc_gga_rad.o  \
+decfft_ent.o
+else
+EKCAL_OBJECTSF77 = \
+b_PseudoPotential_f77.o \
+b_Force_f77.o \
+rmmsubs.o                              spg+tetra.o \
+decfft_ent.o \
+gncpp_xc_gga_rad.o  \
+dgemm__.o 
+endif
+
+
+EPS_LOWER_MODULES = m_Const_Parameters.o   m_ErrorMessages.o m_Parallelization.o \
+m_IterationNumbers.o
+
+EPS_UPPER_MODULES = \
+m_Control_Parameters.o   m_Files.o                m_Timing.o \
+m_Crystal_Structure.o    m_FFT.o                  m_Ionic_System.o \
+m_CS_SpaceGroup.o        m_CS_Magnetic.o \
+m_Orbital_QuantumNum.o  \
+m_Kpoints.o              m_PseudoPotential.o      m_PlaneWaveBasisSet.o \
+m_Realspace.o \
+m_SpinOrbit_Potential.o  m_SpinOrbit_FromFile.o  \
+m_ES_NonCollinear.o \
+m_NonLocal_Potential.o   m_Electronic_Structure.o \
+m_ES_nonlocal.o	 	 m_ES_ortho.o  m_ES_wf_extrpl.o m_ES_initialWF.o 	\
+m_FiniteElectricField.o \
+m_Positron_Wave_Functions.o \
+m_ES_ExactExchange.o \
+m_ES_occup.o             m_ES_WF_by_SDorCG.o      m_ES_WF_by_Davidson.o \
+m_ES_WF_by_ModifiedDavidson.o m_ES_WF_by_RMM.o \
+m_ES_LHXC.o              m_ES_Intgr_VlhxcQlm.o    m_ES_IO.o \
+m_OP_rotation.o \
+m_ES_dos.o               m_Hubbard.o \
+m_epc_potential.o \
+m_vdWDF.o \
+m_Charge_Density.o m_CD_Mag_Moment.o  m_Orbital_Population.o     m_Dipole.o m_CD_mixing.o \
+m_XC_Potential_2D.o \
+m_XC_Potential.o \
+m_PS_opencore.o \
+string.o m_db.o \
+m_PAW_Tecplot.o m_PAW_ChargeDensity.o m_PAW_Hartree.o  \
+m_PAW_XC_Potential.o \
+m_SpinOrbit_RadInt.o \
+m_ES_Mag_Constraint.o  \
+m_Total_Energy.o \
+m_OP_Moment.o \
+m_Force.o          m_Stress.o      m_ES_WF_by_submat.o      m_ES_WF_by_MatDiagon.o \
+m_UnitCell.o \
+m_KineticEnergy_Density.o  \
+m_KE_mixing.o \
+m_Ldos.o                 m_ELF.o \
+m_constraints.o \
+m_BerryPhase.o           m_BP_Properties.o \
+m_Representation.o       m_Raman.o       m_Phonon.o               \
+m_Screening_FFT.o        m_Screening.o      m_External_Potential.o \
+m_Wannier.o \
+m_ValenceBand_Spectrum.o   m_CoreLevel_Spectrum.o   \
+m_CLS_dipquad.o m_ES_occup_EPS.o         m_Epsilon_ek.o \
+m_LinearResponse_Control.o  \
+m_LinearResponse_Qpt.o \
+m_Potential_Mixing.o       m_ThomasFermiW_Potential.o \
+m_Fcp.o m_Band_Unfolding.o
+
+EPS_F_SUBROUTINES = Preparation.o \
+InputData_Analysis.o           PseudoPotential_Construction.o
+
+EPS_F_OTHERSUBS  = epsmain.o      constraint_main.o     Preparation_for_mpi.o \
+Preparation_for_ESM.o \
+scf_routines.o \
+bottom_Subroutines.o                   \
+spline.o \
+b_Crystal_Structure.o                  b_Electronic_Structure.o \
+b_Words.o                              b_PseudoPotential.o \
+b_PseudoPotential_EXX.o \
+b_Fermi.o                              b_Kpoints.o \
+b_PlaneWaveBasisSet.o                  b_Ionic_System.o \
+b_XC_Potential.o                       b_PAW_XC_Potential.o \
+b_XC_OmegaPBE.o  \
+b_XC_metagga.o \
+b_Ldos_f77.o \
+input_interface.o                      Initialization.o \
+Ewald_and_Structure_Factor.o \
+Initial_Electronic_Structure.o         Renewal_of_WaveFunctions.o \
+IterationNumbers_Setting.o \
+ChargeDensity_Construction.o                 Renewal_of_Potential.o \
+ChargeDensity_Mixing.o \
+Renewal_of_Hubbard_Potential.o         Renewal_of_OccMat.o \
+Convergence_Check.o                    Forces.o \
+WriteDownData_onto_Files.o \
+Initial_MD_Condition.o                 WriteDownData_onto_Files_ek.o\
+Stress.o                               Postprocessing.o \
+Finalization_of_mpi.o \
+miscellaneous.o \
+b_BerryPhase.o \
+heap_sort.o                            real_spherical_harmonics.o \
+Real_space_integ.o                     crotylm.o \
+screening_correction.o \
+Initialization_Epsilon.o               Shift_Kpoint.o \
+Reset_Kpoint.o                         Preparation_for_Calc_Epsilon.o \
+Transition_moment_Epsilon.o            Calc_Epsilon.o \
+Nonlinear_Optics_Epsilon.o             WriteDownData_onto_Files_Epsilon.o \
+PseudoPotential_ek_Epsilon.o           Dealloc_Radr_and_Wos_Epsilon.o \
+ThomasFermiWeiz.o         Renewal_of_ChgCtrlParam.o  \
+mpi_dummy.o \
+Uramping.o \
+GaussLeg.o lib_int_deri_add.o vdW.o memsize.o scdft_routines.o lbfgs.o
+
+ifndef SX_DGEMM
+EPS_OBJECTSF77 = \
+b_PseudoPotential_f77.o \
+b_Force_f77.o \
+rmmsubs.o                              spg+tetra.o \
+decfft_ent.o \
+gncpp_xc_gga_rad.o  
+else
+EPS_OBJECTSF77 = \
+b_PseudoPotential_f77.o \
+b_Force_f77.o \
+rmmsubs.o                              spg+tetra.o \
+decfft_ent.o \
+gncpp_xc_gga_rad.o  \
+dgemm__.o
+endif
+
+
+TDLR_LOWER_MODULES = m_Const_Parameters.o   m_ErrorMessages.o m_Parallelization.o \
+m_IterationNumbers.o
+
+TDLR_UPPER_MODULES = \
+m_Control_Parameters.o   m_Files.o                m_Timing.o \
+m_Crystal_Structure.o    m_FFT.o                  m_Ionic_System.o \
+m_CS_SpaceGroup.o        m_CS_Magnetic.o \
+m_Orbital_QuantumNum.o  \
+m_Kpoints.o              m_PseudoPotential.o      m_PlaneWaveBasisSet.o \
+m_Realspace.o \
+m_SpinOrbit_Potential.o  m_SpinOrbit_FromFile.o  \
+m_ES_NonCollinear.o \
+m_NonLocal_Potential.o   m_Electronic_Structure.o \
+m_ES_nonlocal.o	 	 m_ES_ortho.o  m_ES_wf_extrpl.o m_ES_initialWF.o 	\
+m_FiniteElectricField.o \
+m_Positron_Wave_Functions.o \
+m_ES_ExactExchange.o \
+m_ES_occup.o             m_ES_WF_by_SDorCG.o      m_ES_WF_by_Davidson.o \
+m_ES_WF_by_ModifiedDavidson.o m_ES_WF_by_RMM.o \
+m_ES_LHXC.o              m_ES_Intgr_VlhxcQlm.o    m_ES_IO.o \
+m_Representation.o m_OP_rotation.o \
+m_ES_dos.o               m_Hubbard.o \
+m_epc_potential.o \
+m_vdWDF.o \
+m_Charge_Density.o m_CD_Mag_Moment.o m_Orbital_Population.o m_Dipole.o m_CD_mixing.o \
+m_XC_Potential_2D.o \
+m_XC_Potential.o \
+m_PS_opencore.o \
+string.o m_db.o \
+m_PAW_Tecplot.o m_PAW_ChargeDensity.o m_PAW_Hartree.o  \
+m_PAW_XC_Potential.o \
+m_SpinOrbit_RadInt.o \
+m_ES_Mag_Constraint.o  \
+m_Total_Energy.o \
+m_OP_Moment.o \
+m_Force.o                m_Stress.o m_ES_WF_by_submat.o      m_ES_WF_by_MatDiagon.o \
+m_UnitCell.o \
+m_KineticEnergy_Density.o  \
+m_KE_mixing.o \
+m_Ldos.o                 m_ELF.o \
+m_constraints.o \
+m_BerryPhase.o           m_BP_Properties.o \
+m_Raman.o          m_Phonon.o               \
+m_Wannier.o \
+m_Screening_FFT.o        m_Screening.o      m_External_Potential.o \
+m_Excitation.o \
+m_CLS_dipquad.o m_ES_occup_EPS.o        \
+m_LinearResponse_Control.o  \
+m_Band_Unfolding.o \
+m_LinearResponse_Qpt.o \
+m_LinearResponse_Tools.o     m_LinearResponse_Density.o  \
+m_LinearResponse_NonInt.o    m_LinearResponse_ALDA.o \
+m_LinearResponse_Kernel.o    m_LinearResponse_BS.o \
+m_LinearResponse_Spectrum.o \
+m_Potential_Mixing.o       m_ThomasFermiW_Potential.o 
+
+TDLR_F_SUBROUTINES = Preparation.o \
+InputData_Analysis.o           PseudoPotential_Construction.o \
+
+TDLR_F_OTHERSUBS  = tdlrmain.o     constraint_main.o    Preparation_for_mpi.o \
+Preparation_for_ESM.o \
+scf_routines.o \
+bottom_Subroutines.o                   \
+spline.o \
+b_Crystal_Structure.o                  b_Electronic_Structure.o \
+b_Words.o                              b_PseudoPotential.o \
+b_PseudoPotential_EXX.o \
+b_Fermi.o                              b_Kpoints.o \
+b_PlaneWaveBasisSet.o                  b_Ionic_System.o \
+b_XC_Potential.o                       b_PAW_XC_Potential.o \
+b_XC_OmegaPBE.o  \
+b_XC_metagga.o \
+b_Ldos_f77.o \
+input_interface.o                      Initialization.o \
+Ewald_and_Structure_Factor.o \
+Initial_Electronic_Structure.o         Renewal_of_WaveFunctions.o \
+IterationNumbers_Setting.o \
+ChargeDensity_Construction.o                 Renewal_of_Potential.o \
+ChargeDensity_Mixing.o \
+Renewal_of_Hubbard_Potential.o         Renewal_of_OccMat.o \
+Convergence_Check.o                    Forces.o \
+WriteDownData_onto_Files.o \
+Initial_MD_Condition.o                 WriteDownData_onto_Files_ek.o \
+Stress.o                               Postprocessing.o \
+Finalization_of_mpi.o \
+miscellaneous.o \
+b_BerryPhase.o \
+heap_sort.o                            real_spherical_harmonics.o \
+Real_space_integ.o                     crotylm.o \
+screening_correction.o \
+PseudoPotential_ek_Epsilon.o           \
+mpi_dummy.o \
+GaussLeg.o lib_int_deri_add.o \
+ThomasFermiWeiz.o       Renewal_of_ChgCtrlParam.o \
+b_LinearResponse_Kernel.o  b_LinearResponse_exc.o \
+Uramping.o \
+LinearResponse_Proc.o     LinearResponse_Spec.o vdW.o scdft_routines.o memsize.o lbfgs.o
+
+ifndef SX_DGEMM
+TDLR_OBJECTSF77 = \
+b_PseudoPotential_f77.o \
+b_Force_f77.o \
+rmmsubs.o                              spg+tetra.o \
+decfft_ent.o \
+gncpp_xc_gga_rad.o  
+else
+TDLR_OBJECTSF77 = \
+b_PseudoPotential_f77.o \
+b_Force_f77.o \
+rmmsubs.o                              spg+tetra.o \
+decfft_ent.o \
+gncpp_xc_gga_rad.o  \
+dgemm__.o
+endif
+
+
+PHASE_OBJECTS = $(FFTOBJECT) $(PHASE_LOWER_MODULES) $(PHASE_UPPER_MODULES) $(PHASE_F_SUBROUTINES) $(PHASE_F_OTHERSUBS) $(PHASE_OBJECTSF77) $(OBJ_INPUTPARSE)
+
+EKCAL_OBJECTS = $(FFTOBJECT) $(EKCAL_LOWER_MODULES) $(EKCAL_UPPER_MODULES) $(EKCAL_F_SUBROUTINES) $(EKCAL_F_OTHERSUBS) $(EKCAL_OBJECTSF77) $(OBJ_INPUTPARSE)
+
+EPS_OBJECTS = $(FFTOBJECT) $(EPS_LOWER_MODULES) $(EPS_UPPER_MODULES) $(EPS_F_SUBROUTINES) $(EPS_F_OTHERSUBS) $(EPS_OBJECTSF77) $(OBJ_INPUTPARSE)
+
+TDLR_OBJECTS = $(FFTOBJECT) $(TDLR_LOWER_MODULES) $(TDLR_UPPER_MODULES) $(TDLR_F_SUBROUTINES) $(TDLR_F_OTHERSUBS) $(TDLR_OBJECTSF77) $(OBJ_INPUTPARSE)
+
+all : phase epsmain
+
+ifdef ESM
+phase : $(ESM_LIB) $(LAPACK) $(PHASE_OBJECTS) $(NLOBJ)
+	$(LINK) $(PHASE_OBJECTS) $(NLOBJ) $(LFLAGS) $(LIBS) -o $@
+else
+phase : $(LAPACK) $(PHASE_OBJECTS) $(NLOBJ)
+	$(LINK) $(PHASE_OBJECTS) $(NLOBJ) $(LFLAGS) $(LIBS) -o $@
+endif
+
+ekcal : $(LAPACK) $(EKCAL_OBJECTS) $(NLOBJ)
+	$(LINK) $(EKCAL_OBJECTS) $(NLOBJ) $(LFLAGS) $(LIBS) -o $@
+
+epsmain : $(LAPACK) $(EPS_OBJECTS) $(NLOBJ)
+	$(LINK) $(EPS_OBJECTS) $(NLOBJ) $(LFLAGS) $(LIBS) -o $@
+
+tdlrmain : $(LAPACK) $(TDLR_OBJECTS) $(NLOBJ)
+	$(LINK) $(TDLR_OBJECTS) $(NLOBJ) $(LFLAGS) $(LIBS) -o $@
+
+ifdef NO_MPI
+libesm.a:
+	cd EsmPack; make INCLUDE="$(INCLUDE)" FORTRAN="$(F90)" LIBFLAG="$(LIBS)" MPIFLAG="" AR="$(AR)"
+else
+libesm.a:
+	cd EsmPack; make INCLUDE="$(INCLUDE)" FORTRAN="$(F90)" LIBFLAG="$(LIBS)" MPIFLAG="-D__MPI__" AR="$(AR)"
+endif
+
+liblapack.a:
+	cd LAPACK; make F77="$(F90)" F77FLAGS="$(F77FLAGS)" AR="$(AR)"
+
+libblas.a:
+	cd BLAS; make F77="$(F90)" F77FLAGS="$(F77FLAGS)" AR="$(AR)"
+
+$(OBJ_INPUTPARSE):$(@:.o=.c) $(@:.o=.h)
+	$(CC) -c $(CFLAGS) $(@:.o=.c)
+
+.f.o:
+	$(F90) -c $(F77FLAGS) $*.f
+
+.f90.o:
+	$(F90) -c $(F90FLAGS) $*.f90
+
+.F.o:
+	$(F90) -c $(F77FLAGS) $(CPPFLAGS) $*.F
+
+.F90.o:
+	$(F90) -c $(F90FLAGS) $(CPPFLAGS) $*.F90
+
+clean:
+	\rm -f *.o *.mod *.a *.lib *.L *.list phase epsmain
+	\cd LAPACK; make clean
+	\cd BLAS; make clean
+	\cd EsmPack; make clean
+
+install: phase epsmain 
+	\mv -f phase ../bin/phase.3d
+	\mv -f epsmain ../bin/epsmain.3d
diff --git a/src_phase_3d/Preparation.F90 b/src_phase_3d/Preparation.F90
index 10ef0a5..0b24c8c 100644
--- a/src_phase_3d/Preparation.F90
+++ b/src_phase_3d/Preparation.F90
@@ -384,7 +384,7 @@ subroutine Preparation(imode)
   call m_CS_alloc_op_tau_tl(nfout)
   if(symmetry_method == AUTOMATIC) then
      call m_CS_SG_auto_gnrt_sym_op(paramset,nfout) ! paramset == .false.
-     call m_IS_symmetrize_atom_pos(nfout) ! -> cps,pos
+     if(.not. m_Stress_in_correction()) call m_IS_symmetrize_atom_pos(nfout) ! -> cps,pos
   else
                                                      __TIMER_FJ_START(31)
      call m_CS_gnrt_symmetry_operations(paramset,nfout) ! paramset == .false.
diff --git a/src_phase_3d/m_CD_mixing.F90 b/src_phase_3d/m_CD_mixing.F90
index e8cb863..bddc7dd 100644
--- a/src_phase_3d/m_CD_mixing.F90
+++ b/src_phase_3d/m_CD_mixing.F90
@@ -3862,11 +3862,17 @@ contains
       integer :: i, nx
       if(hownew == ANEW) then
          m = iter
-         ncrspd(:) = (/(i,i=1,m)/)
+!         ncrspd(:) = (/(i,i=1,m)/)
+         do i=1,m
+           ncrspd(i) = i
+         enddo
       else ! hownew == RENEW
          if(iter <= n) then
             m = iter
-            ncrspd(:) = (/(i,i=1,m)/)
+!            ncrspd(:) = (/(i,i=1,m)/)
+            do i=1,m
+              ncrspd(i) = i
+            enddo
          else
             m = n
             nx = ncrspd(1)
@@ -5720,11 +5726,17 @@ contains
       integer :: i, nx
       if(hownew == ANEW) then
          m = iter
-         ncrspd(:) = (/(i,i=1,m)/)
+!         ncrspd(:) = (/(i,i=1,m)/)
+         do i=1,m
+           ncrspd(i) = i
+         enddo
       else ! hownew == RENEW
          if(iter <= n) then
             m = iter
-            ncrspd(:) = (/(i,i=1,m)/)
+!            ncrspd(:) = (/(i,i=1,m)/)
+            do i=1,m
+              ncrspd(i) = i
+            enddo
          else
             m = n
             nx = ncrspd(1)
diff --git a/src_phase_3d/m_Control_Parameters.F90 b/src_phase_3d/m_Control_Parameters.F90
index 4ac710f..352507a 100644
--- a/src_phase_3d/m_Control_Parameters.F90
+++ b/src_phase_3d/m_Control_Parameters.F90
@@ -11215,9 +11215,10 @@ write(nfout,'(" !** sw_screening_correction = ",i5," alpha = ",f8.3)') sw_screen
   end function m_CtrlP_way_of_smearing
 
   integer function m_CtrlP_waymix_now(iteration_electronic,iteration_ionic &
-       &                             ,edeltb_per_atom)
+       &                             ,edeltb_per_atom,mixer_changed)
     integer, intent(in)      :: iteration_electronic, iteration_ionic
     real(kind=DP),intent(in) :: edeltb_per_atom
+    logical, intent(out), optional :: mixer_changed
     integer :: ips, ipe, imsd_t, it, ip, i
 
     if(tag_solver_of_WF_is_found .and. tag_charge_mixing_is_found) then
@@ -11252,6 +11253,9 @@ write(nfout,'(" !** sw_screening_correction = ",i5," alpha = ",f8.3)') sw_screen
 !       if(ip < 0 .or. ip > n_Charge_Mixing_way) stop ' error at m_CtrlP_waymix_now'
        if(ip < 0 .or. ip > n_Charge_Mixing_way) call phase_execution_error(INVALID_CHARGE_MIXING)
        m_CtrlP_waymix_now = charge_mixing(ip)%mixing_way
+       if(present(mixer_changed)) then 
+         mixer_changed = ip_charge_mixing /= ip
+       endif
        ip_charge_mixing = ip
     else
        m_CtrlP_waymix_now = waymix
diff --git a/src_phase_3d/m_ES_IO.F90 b/src_phase_3d/m_ES_IO.F90
index 4dc19e4..6e09912 100644
--- a/src_phase_3d/m_ES_IO.F90
+++ b/src_phase_3d/m_ES_IO.F90
@@ -55,6 +55,7 @@
 #endif
 
 !#define _DEBUG_ESIO_
+#define _USE_ALLREDUCE_IN_WD_WFS_
 
 !
 module m_ES_IO
@@ -78,7 +79,8 @@ module m_ES_IO
        &                           , mype,ierr,map_k, map_ek,ista_e,iend_e,istep_e,map_z, np_e &
        &                           , ista_k,iend_k,myrank_e,myrank_k,map_e,nrank_e &
        &                           , ista_kngp,iend_kngp, nrank_k  &
-       &                           , ista_g1k,iend_g1k, np_g1k , myrank_g, nrank_g, np_g1k_prev
+       &                           , ista_g1k,iend_g1k, np_g1k , myrank_g, nrank_g, np_g1k_prev &
+       &                           , nis_g1k,nie_g1k,map_rank_gek 
   use m_IterationNumbers,     only : nk_in_the_process, nk_converged, nkgroup &
        &                           , first_kpoint_in_this_job, iteration_ionic, iteration_electronic
   use m_FFT,                  only : fft_box_size_WF,nfft
@@ -1132,7 +1134,7 @@ contains
 #endif
                                                   __TIMER_IODO_STOP(1438)
                                                   __TIMER_IOCOMM_START_w_BARRIER(mpi_comm,1439)
-             call mpi_bcast(wf_l,kg1t*kimg,mpi_real,0,mpi_comm_group,ierr)
+             call mpi_bcast(wf_l,kg1t*kimg,mpi_real4,0,mpi_comm_group,ierr)
                                                   __TIMER_IOCOMM_STOP(1439)
              if((map_k(ik) == myrank_k) .and. (map_e(ib) == myrank_e) )then         ! MPI
                 if(pzaj) then
@@ -1281,7 +1283,7 @@ contains
 ! -----------------
            if(precision_WFfile==SP) then
              if(mype == 0) read(nfzaj, end = 9999, err = 9999) wf_l  ! MPI
-             call mpi_bcast(wf_l,kg1_prev*kimg,mpi_real,0,mpi_comm_group,ierr)
+             call mpi_bcast(wf_l,kg1_prev*kimg,mpi_real4,0,mpi_comm_group,ierr)
 
              if((map_k(ik) == myrank_k) .and. (map_e(ib) == myrank_e) )then         ! MPI
                 do ri = 1, kimg
@@ -1363,7 +1365,7 @@ contains
                                                   __TIMER_IODO_STOP(1467)
              endif
                                                   __TIMER_IOCOMM_START_w_BARRIER(mpi_comm_group,1468)
-             call mpi_allreduce(wf_mpi,wf_l,kg1*kimg,mpi_real,mpi_sum,mpi_comm_group,ierr)
+             call mpi_allreduce(wf_mpi,wf_l,kg1*kimg,mpi_real4,mpi_sum,mpi_comm_group,ierr)
                                                   __TIMER_IOCOMM_STOP(1468)
                                                   __TIMER_IODO_START(1469)
            else
@@ -1543,9 +1545,9 @@ contains
              if(ipri>=2) write(nfout,*) ' !D     ie = ', ie
              if(mype == 0) read(nf,err=2,end=2) wf_l     ! MPI
              if(mype == 0 .and. map_ek(ie,ik) /= 0) then ! MPI
-                call mpi_send(wf_l,kg1*kimg,mpi_real,map_ek(ie,ik),1,mpi_comm_group,ierr) ! MPI
+                call mpi_send(wf_l,kg1*kimg,mpi_real4,map_ek(ie,ik),1,mpi_comm_group,ierr) ! MPI
              else if(map_ek(ie,ik) == mype .and. map_ek(ie,ik) /= 0) then                  ! MPI
-                call mpi_recv(wf_l,kg1*kimg,mpi_real,0,1,mpi_comm_group,istatus,ierr)     ! MPI
+                call mpi_recv(wf_l,kg1*kimg,mpi_real4,0,1,mpi_comm_group,istatus,ierr)     ! MPI
              end if
              if(map_ek(ie,ik) == mype) then              ! MPI
                 do ri = 1, kimg
@@ -1606,9 +1608,9 @@ contains
                 end if
 
                 if(map_ek(ie,ik) == mype) then
-                   call mpi_send(wf_l,kg1*kimg,mpi_real,map_ek(ie,ike2),1,mpi_comm_group,ierr)
+                   call mpi_send(wf_l,kg1*kimg,mpi_real4,map_ek(ie,ike2),1,mpi_comm_group,ierr)
                 else if(map_ek(ie,ike2) == mype) then
-                   call mpi_recv(wf_l,kg1*kimg,mpi_real,map_ek(ie,ik),1,mpi_comm_group,istatus,ierr)
+                   call mpi_recv(wf_l,kg1*kimg,mpi_real4,map_ek(ie,ik),1,mpi_comm_group,istatus,ierr)
                 end if
                 if(map_ek(ie,ike2) == mype) then
                    do ri = 1, kimg
@@ -1767,9 +1769,9 @@ contains
                    wf_l(1:kg1,ri) = zaj_l(1:kg1,map_z(ie),ik,ri)
                 end do
                 if(map_ek(ie,ik) /= 0) &                             ! MPI
-                     &   call mpi_send(wf_l,kg1*kimg,mpi_real,0,1,mpi_comm_group,ierr) ! MPI
+                     &   call mpi_send(wf_l,kg1*kimg,mpi_real4,0,1,mpi_comm_group,ierr) ! MPI
              else if(mype == 0 .and. map_ek(ie,ik) /= 0) then
-                call mpi_recv(wf_l,kg1*kimg,mpi_real,map_ek(ie,ik),1,mpi_comm_group,istatus,ierr)!MPI
+                call mpi_recv(wf_l,kg1*kimg,mpi_real4,map_ek(ie,ik),1,mpi_comm_group,istatus,ierr)!MPI
              end if
              if(mype == 0) write(nf)  wf_l                        ! MPI
            else if(precision_WFfile==DP) then
@@ -1998,7 +2000,8 @@ contains
 
     integer, intent(in) :: nfout,nfzaj
     logical, intent(in) :: F_ZAJ_partitioned
-    integer :: ik,ib,ri,j,i
+    real(kind=SP), allocatable, dimension(:,:) :: wf_buf
+    integer :: ik,ib,ri,j,i,ig
     integer :: id_sname = -1
                                                   __TIMER_SUB_START(1373)
     call tstatc0_begin('m_ESIO_wd_WFs ',id_sname)
@@ -2026,9 +2029,10 @@ contains
        allocate(wf_l(kg1,kimg))
        do ik = 1, kv3, af+1
                                                   __TIMER_IODO_START(1442)
+#ifdef _USE_ALLREDUCE_IN_WD_WFS_       
           do ib = 1, neg
              wf_l = 0.0d0
-             if((map_k(ik) == myrank_k) .and. (map_e(ib) == myrank_e) )then         ! MPI
+             if((map_k(ik) == myrank_k) .and. (map_e(ib) == myrank_e)) then         ! MPI
                 do ri = 1, kimg
                   do j = ista_g1k(ik),iend_g1k(ik)
                      wf_l(j,ri) = zaj_l(j-ista_g1k(ik)+1,map_z(ib),ik,ri)
@@ -2036,12 +2040,37 @@ contains
                 end do
              endif
                                                   __TIMER_IOCOMM_START_w_BARRIER(mpi_comm_group,1443)
-             call mpi_allreduce(MPI_IN_PLACE,wf_l,kg1*kimg,mpi_real,mpi_sum,mpi_comm_group,ierr)
+             call mpi_allreduce(MPI_IN_PLACE,wf_l,kg1*kimg,mpi_real4,mpi_sum,mpi_comm_group,ierr)
                                                   __TIMER_IOCOMM_STOP(1443)
                                                   __TIMER_IODO_START(1444)
              if(mype == 0) write(nfzaj)  wf_l                        ! MPI
                                                   __TIMER_IODO_STOP(1444)
           end do
+#else
+!! coded by T. Yamasaki, 2021.02.25 -->
+          allocate(wf_buf(maxval(np_g1k),kimg))
+          do ib = 1, neg
+             wf_l = 0.0d0
+             wf_buf = 0.d0
+             do j = 0, nrank_g-1
+                if(map_k(ik)==myrank_k .and. map_e(ib)==myrank_e .and. j==myrank_g) then
+                   do ig = ista_g1k(ik), iend_g1k(ik)
+                      do ri=1, kimg
+                         wf_buf(ig-ista_g1k(ik)+1,ri) = zaj_l(ig-ista_g1k(ik)+1,map_z(ib),ik,ri)
+                      end do
+                   end do
+                   call mpi_send(wf_buf,maxval(np_g1k)*kimg,mpi_real4,0,1,mpi_comm_group,ierr)
+                end if
+                if(mype==0) then
+                   call mpi_recv(wf_buf,maxval(np_g1k)*kimg,mpi_real4,map_rank_gek(j,map_e(ib),map_k(ik)),1,mpi_comm_group,istatus,ierr)
+                   wf_l(nis_g1k(j,ik):nie_g1k(j,ik),1:kimg) = wf_buf(1:nie_g1k(j,ik)-nis_g1k(j,ik)+1,1:kimg)
+                end if
+             end do
+             if(mype==0) write(nfzaj) wf_l
+          end do
+          deallocate(wf_buf)
+!! <--
+#endif
                                                   __TIMER_IODO_STOP(1442)
        end do
        deallocate(wf_l)
diff --git a/src_phase_3d/m_Electronic_Structure.F90 b/src_phase_3d/m_Electronic_Structure.F90
index f1fb467..6b100a8 100644
--- a/src_phase_3d/m_Electronic_Structure.F90
+++ b/src_phase_3d/m_Electronic_Structure.F90
@@ -1279,7 +1279,7 @@ contains
 ! ==============================================================================
           evdff(1) = evdff(1) + tmp*tmp
           evdff(2) = evdff(2) + dabs(tmp)
-          ekosum = ekosum + eko_l(map_z(ib),ik)
+          ekosum = ekosum + eko_l(ib, ik)
           if(dabs(tmp).gt.DELTAevdff) then
 ! === For epsmain by tkato 2013/11/14 ==========================================
              evdff(3) = evdff(3) + dsqrt(dabs(eko1_l(ib,ik)**2 - eko_l(ib,ik)**2))
@@ -1310,7 +1310,7 @@ contains
     if(iprievdff >= 1) &
 !!$         & write(nfout,'(" >> (",i6,") <eko_old-eko_new>:(",3d13.5,")")') &
 !!$         &    iteration_electronic, evdff(1), evdff(2), evdff(3)
-         & write(nfout,'(" ENERGY EIGEN VALUE SUMM. FOR",i6," -TH ITER (K=",i6,":",i6 &
+         & write(nfout,'(" ENERGY EIGEN VALUE SUM. FOR",i6," -TH ITER (K=",i6,":",i6 &
          & ,") = ",F16.8, " <eko_old-eko_new>:(",3d13.5,")")') &
          &   iteration_electronic,nk_in_the_process, nk_in_the_process+kv3-1 &
          & , ekosum, evdff(1), evdff(2), evdff(3)
@@ -1378,9 +1378,10 @@ contains
     end do
 
     if(npes > 1) then
-       call mpi_allreduce(evdff2,evdffr2,3*kv3 &
-            & ,mpi_double_precision,mpi_sum,mpi_comm_group,ierr)  ! MPI
-       evdff2 = evdffr2
+       call mpi_allreduce(mpi_in_place,evdff2,3*kv3 &
+            & ,mpi_double_precision,mpi_sum,mpi_kg_world,ierr)  ! MPI
+       call mpi_allreduce(mpi_in_place,evdff2,3*kv3 &
+            & ,mpi_double_precision,mpi_sum,mpi_ge_world,ierr)  ! MPI
     end if
 
 ! ================================== modified by K. Tagami ============= 11.0
diff --git a/src_phase_3d/m_Epsilon_ek.F90 b/src_phase_3d/m_Epsilon_ek.F90
index 3c267c0..5d9772c 100644
--- a/src_phase_3d/m_Epsilon_ek.F90
+++ b/src_phase_3d/m_Epsilon_ek.F90
@@ -2101,7 +2101,9 @@ ppc_data(ntyp)
           deallocate(vk00xyz); deallocate(vk0xyz)
           deallocate(vk0_op);  deallocate(nopr_k);  deallocate(op_k)
        end if
-       if(trm2_mode == 0) deallocate(trm2)
+       if(trm2_mode == 0) then
+               if ( allocated(trm2)) deallocate(trm2)
+       end if
     end if
 
     deallocate(e);  deallocate(imeps);   deallocate(reps)
@@ -6514,6 +6516,10 @@ ppc_data(ntyp)
 !    allocate(n_unfilled(nspin)); n_unfilled = 0
 !    allocate(n_half_filled(nspin)); n_half_filled = 0
 !
+    if(allocated(n_filled))      deallocate(n_filled)
+    if(allocated(n_unfilled))    deallocate(n_unfilled)
+    if(allocated(n_half_filled)) deallocate(n_half_filled)
+
     allocate(n_filled(nspin_kt)) ; n_filled = 0
     allocate(n_unfilled(nspin_kt)); n_unfilled = 0
     allocate(n_half_filled(nspin_kt)); n_half_filled = 0
diff --git a/src_phase_3d/m_FFT.F90 b/src_phase_3d/m_FFT.F90
index fcdee77..4263d60 100644
--- a/src_phase_3d/m_FFT.F90
+++ b/src_phase_3d/m_FFT.F90
@@ -13352,6 +13352,16 @@ call stop_timer('FFT-I yfft')
     endif
 
     if (firstcall_direct_xyz_3d) then
+       if(planz1    /= 0) call dfftw_destroy_plan(planz1)
+       if(planz1_1d /= 0) call dfftw_destroy_plan(planz1_1d)
+       if(planz2    /= 0) call dfftw_destroy_plan(planz2)
+       if(planz2_1d /= 0) call dfftw_destroy_plan(planz2_1d)
+       if(plany1    /= 0) call dfftw_destroy_plan(plany1)
+       if(plany2    /= 0) call dfftw_destroy_plan(plany2)
+       if(planx1    /= 0) call dfftw_destroy_plan(planx1)
+       if(planx1_1d /= 0) call dfftw_destroy_plan(planx1_1d)
+       if(planx2    /= 0) call dfftw_destroy_plan(planx2)
+       if(planx2_1d /= 0) call dfftw_destroy_plan(planx2_1d)
        savesize = 0
 #ifdef __TIMER_DO__
   call timer_sta(118)
@@ -16217,6 +16227,16 @@ call stop_timer('FFT-I yfft')
     endif
 
     if (firstcall_inverse_xyz_3d) then
+       if(planx1    /= 0) call dfftw_destroy_plan(planx1)
+       if(planx1_1d /= 0) call dfftw_destroy_plan(planx1_1d)
+       if(planx2    /= 0) call dfftw_destroy_plan(planx2)
+       if(planx2_1d /= 0) call dfftw_destroy_plan(planx2_1d)
+       if(plany1    /= 0) call dfftw_destroy_plan(plany1)
+       if(plany2    /= 0) call dfftw_destroy_plan(plany2)
+       if(planz1    /= 0) call dfftw_destroy_plan(planz1)
+       if(planz1_1d /= 0) call dfftw_destroy_plan(planz1_1d)
+       if(planz2    /= 0) call dfftw_destroy_plan(planz2)
+       if(planz2_1d /= 0) call dfftw_destroy_plan(planz2_1d)
        savesize = 0
 #ifdef __TIMER_DO__
   call timer_sta(134)
diff --git a/src_phase_3d/m_FFT_type4_fftw3_rev.F90 b/src_phase_3d/m_FFT_type4_fftw3_rev.F90
index 8dd370d..ee21468 100644
--- a/src_phase_3d/m_FFT_type4_fftw3_rev.F90
+++ b/src_phase_3d/m_FFT_type4_fftw3_rev.F90
@@ -88,14 +88,14 @@ contains
   subroutine m_FFT_finalize()
     call dfftw_destroy_plan(plan_WF(1))    
     call dfftw_destroy_plan(plan_WF(2))    
-    call dfftw_destroy_plan(plan_CD(1))    
-    call dfftw_destroy_plan(plan_CD(2))    
+!    call dfftw_destroy_plan(plan_CD(1))    
+!    call dfftw_destroy_plan(plan_CD(2))    
     if(sw_positron/=OFF)then
        call dfftw_destroy_plan(plan_pWF(1))
        call dfftw_destroy_plan(plan_pWF(2))
     endif
 
-    call dfftw_cleanup()
+!    call dfftw_cleanup()
   end subroutine m_FFT_finalize
 
   subroutine m_FFT_setup(inversion_symmetry,paramset)
diff --git a/src_phase_3d/m_Ionic_System.F90 b/src_phase_3d/m_Ionic_System.F90
index 7131934..82c6c93 100644
--- a/src_phase_3d/m_Ionic_System.F90
+++ b/src_phase_3d/m_Ionic_System.F90
@@ -6061,7 +6061,9 @@ contains
              read(nfcntn,*) forcmx_constraint_quench
           end if
        end if
-       if(sw_optimize_lattice==ON)then
+       if(sw_optimize_lattice==ON .or. &
+          imdalg == PT_CONTROL    .or. &
+          imdalg == P_CONTROL) then
           call rewind_to_tag0(nfcntn,len(tag_lattice_vector),tag_lattice_vector &
                &,  EOF_reach, tag_is_found, str, len_str)
           if(tag_is_found)then
@@ -6083,12 +6085,16 @@ contains
             & ,mpi_double_precision,0,mpi_comm_group,ierr)
        call mpi_bcast(forcmx_constraint_quench,1 &
             & ,mpi_double_precision,0,mpi_comm_group,ierr)
-       if(sw_optimize_lattice==ON)then
+       if(sw_optimize_lattice==ON .or. &
+          imdalg == PT_CONTROL    .or. &
+          imdalg == P_CONTROL) then
           call mpi_bcast(altv,9,mpi_double_precision,0,mpi_comm_group,ierr)
        endif
     end if
 
-    if(sw_optimize_lattice==ON)then
+    if(sw_optimize_lattice==ON .or. &
+       imdalg == PT_CONTROL    .or. &
+       imdalg == P_CONTROL) then
        call m_CS_altv_2_rltv()
     endif
 
@@ -6940,7 +6946,8 @@ contains
                   & , cpd_l, cpd_old, ipcpd, iop_supercell)
           end if
 
-          if(iprimd >= 1) then
+!!$          if(iprimd >= 1) then
+          if(iprimd > 1) then
              write(nfout,'(" -- cpd_l, cpd_old (after fd_symmetrize) --")')
              do i  = 1, natm
                 write(nfout,'(i5,6f18.5)') i, cpd_l(i,1:3),cpd_old(i,1:3)
diff --git a/src_phase_3d/m_Parallelization.F90 b/src_phase_3d/m_Parallelization.F90
index b491566..ec8b721 100644
--- a/src_phase_3d/m_Parallelization.F90
+++ b/src_phase_3d/m_Parallelization.F90
@@ -160,6 +160,7 @@ module m_Parallelization
   integer, allocatable, dimension(:) :: map_k                     ! d(kv3)  
   integer, allocatable, dimension(:,:):: map_ek                   ! d(neg,kv3)
   integer                            :: myrank_k,ista_k,iend_k
+  integer, allocatable, dimension(:,:,:) :: map_rank_gek  ! d(nrank_g,nrank_e,nrank_k)
   integer, allocatable, dimension(:) :: nis_k,  nie_k,  nel_k
   integer, allocatable, dimension(:,:,:):: map_ekg                  ! d(neg,kg1,kv3)
   integer, allocatable, dimension(:) :: nis_eg, nie_eg, nel_eg, neg_gg, neg_gg_all ! d(nrank_g)
@@ -537,7 +538,9 @@ contains
     inquire(file='dirlist',exist=exist_dirlist)
     if(.not.exist_dirlist) then
 
-       if(firstcall) call mpi_comm_dup(mpi_comm_world,mpi_comm_group,ierr)
+       !if(firstcall) call mpi_comm_dup(mpi_comm_world,mpi_comm_group,ierr)
+      if(.not.firstcall) call mpi_comm_free(mpi_comm_group, ierr)
+      call mpi_comm_dup(mpi_comm_world,mpi_comm_group,ierr)
 
        !!write(*,*) 'mpi_comm_world=',mpi_comm_world
        !!write(*,*) 'mpi_comm_group=',mpi_comm_group
@@ -627,7 +630,9 @@ contains
        !!write(*,*) 'workdir=',trim(workdir)
        !!my_color = mype
        !!my_key = 0
-       if(firstcall) call mpi_comm_split(MPI_COMM_WORLD,my_color,my_key,mpi_comm_group,ierr)
+       !if(firstcall) call mpi_comm_split(MPI_COMM_WORLD,my_color,my_key,mpi_comm_group,ierr)
+       if(.not.firstcall) call mpi_comm_free(mpi_comm_group, ierr)
+       call mpi_comm_split(MPI_COMM_WORLD,my_color,my_key,mpi_comm_group,ierr)
        call mpi_comm_size(mpi_comm_group, npes, ierr)
        call mpi_comm_rank(mpi_comm_group, mype, ierr)
        !!write(*,*) 'mpi_comm_group=',mpi_comm_group
@@ -800,7 +805,9 @@ contains
              icolor = 1
              key = myrank_nbmx
           end if
-          if(firstcall) call mpi_comm_split(mpi_comm_group, icolor, key, mpi_nbmx_world(j),ierr)
+!          if(firstcall) call mpi_comm_split(mpi_comm_group, icolor, key, mpi_nbmx_world(j),ierr)
+          if(.not.firstcall) call mpi_comm_free(mpi_nbmx_world(j), ierr)
+          call mpi_comm_split(mpi_comm_group, icolor, key, mpi_nbmx_world(j),ierr)
           call mpi_comm_size(mpi_nbmx_world(j), newpes, ierr)
           call mpi_comm_rank(mpi_nbmx_world(j), newmype,ierr)
        end do
@@ -892,7 +899,9 @@ contains
                    icolor = 1
                    key = myrank_nbmx_k
                 end if
-                if(firstcall) call mpi_comm_split(mpi_k_world(myrank_k), icolor, key, mpi_nbmx_world_k(j),ierr)
+!                if(firstcall) call mpi_comm_split(mpi_k_world(myrank_k), icolor, key, mpi_nbmx_world_k(j),ierr)
+                if(.not.firstcall) call mpi_comm_free(mpi_nbmx_world_k(j), ierr)
+                call mpi_comm_split(mpi_k_world(myrank_k), icolor, key, mpi_nbmx_world_k(j),ierr)
                 call mpi_comm_size(mpi_nbmx_world_k(j), newpes, ierr)
                 call mpi_comm_rank(mpi_nbmx_world_k(j), newmype,ierr)
              end do
@@ -932,365 +941,22 @@ contains
   end subroutine set_block_range4allgather
 
 ! === necessary to make 3D_Parallel, too!!! by tkato ===========================
-!!BRANCH_P ORG_Parallel
-! ==============================================================================
-  subroutine m_Parallel_init_mpi_elec(nfout,ipri,printable,neg,kv3,nspin,kg1)
-    integer, intent(in) :: nfout,ipri,neg, kv3, nspin, kg1
-    logical, intent(in) :: printable
-    logical, save       :: firstcall=.true.
-
-#ifdef NEC_TUNE_SOFT
-    character*4 F_RSVTASK
-    integer :: tmp_a1,tmp_a2,tmp_b1,tmp_b2
-#elif NEC_TUNE_FFT
-    character*4 F_RSVTASK
-#endif
-
-    integer             :: i, j, ip, icolor, key, kv3_half
-
-!!$! =========================================== added by K. Tagami ======== 11.0
-!!$    integer ::           kv3_div
-!!$! ======================================================================= 11.0
-
-    integer             :: newpes, newmype
-    logical             :: set_mapping_func
-    integer, pointer, dimension(:) :: nproc2rank_k, nproc2rank_e
-
-    integer, parameter  :: nc = 24
-    integer, parameter  :: nc1 = 16
-!!$    integer, parameter  :: nc2 = 18
-    integer             :: i0, i1, sw_title
-    character*7         :: strmap
-
-    sw_title = 1
-    if(ipri >= 2 .and. printable) then
-       write(nfout,'(" !|| << m_Parallel_init_mpi_elec >>")')
-       write(nfout,'(" !|| neg, kv3 = ",i5, i8)') neg, kv3
-    end if
-    if(neg <= 0 ) stop " neg is not positive value "
-    if(kv3 <= 0 ) stop " kv3 is not positive value "
-
-!!!!!!!!!!!!!!!!! added by mizouchi@adv 2003.02.26 !!!!!!!!!!!!!!
-#ifdef IRIX64
-
-! ====================================== modified by K. Tagami =============== 11.0
-!!    if((kv3 == 1.and.nspin ==1).or.(kv3 == 2.and.nspin ==2)) then
-    if ( kv3/nspin == 1 ) then
-! ===========================================================================  11.0
-
-       nrank_e  = npes
-       nrank_k  = 1
-       if(ipri >= 1 .and. printable) then
-          write(nfout,'(" !| modified nrank_e and nrank_k due to 1 kpoint calculation  ")')
-          write(nfout,'(" !| modified nrank_e = ", i4)') nrank_e
-          write(nfout,'(" !| modified nrank_k = ", i4)') nrank_k
-       end if
-#endif
-!!!!!!!!!!!!!!!!! added by mizouchi@adv 2003.02.26 !!!!!!!!!!!!!!
-
-!    if(nrank_k > kv3) stop ' nrank_k > kv3 (m_Parallel_init_mpi_elec)'
-    if(nrank_k > kv3) call phase_execution_error(PARALLELIZATION_INVALID_NK)
-!!$    if(nrank_e > neg) call phase_execution_error(PARALLELIZATION_INVALID_NE) !! not here !!
-
-    allocate(map_e(neg))
-    allocate(map_z(neg))
-    allocate(map_k(kv3))
-    allocate(map_ek(neg,kv3))
-    if(.not.allocated(mpi_k_world)) allocate(mpi_k_world(0:nrank_k-1))
-    if(.not.allocated(mpi_e_world)) allocate(mpi_e_world(0:nrank_e-1))
-    allocate(nis_e(0:nrank_e-1)); nis_e = neg+1
-    allocate(nie_e(0:nrank_e-1)); nie_e = 0
-    allocate(nel_e(0:nrank_e-1)); nel_e = 0
-    allocate(idisp_e(0:nrank_e-1)); idisp_e = 0
-!!$#ifdef TRANSPOSE
-    allocate(nis_g1(0:nrank_g1-1)); nis_g1 = kg1 + 1
-    allocate(nie_g1(0:nrank_g1-1)); nie_g1 = 0
-    allocate(nel_g1(0:nrank_g1-1)); nel_g1 = 0
-!!$#endif
-    allocate(nis_k(0:nrank_k-1)); nis_k = 0
-    allocate(nie_k(0:nrank_k-1))
-    allocate(nel_k(0:nrank_k-1))
-
-! (( myrank_e, myrank_k ))
-    allocate(nproc2rank_e(0:npes-1))
-    allocate(nproc2rank_k(0:npes-1))
-    ip = 0
-    do i = 0, nrank_k-1
-       do j = 0, nrank_e-1
-          nproc2rank_k(ip) = i
-          nproc2rank_e(ip) = j
-          ip = ip + 1
-       end do
-    end do
-    myrank_k = nproc2rank_k(mype)
-    myrank_e = nproc2rank_e(mype)
-    deallocate(nproc2rank_k)
-    deallocate(nproc2rank_e)
-! (( map_e, nis_e, nie_e, nel_e, ista_e, iend_e, istep_e, np_e, mp_e  ))
-!!$#ifdef TRANSPOSE
-    set_mapping_func = .true.
-    call set_block_range(neg,nrank_e,nel_e,nis_e,nie_e, set_mapping_func, map_e)
-    do i = 0, nrank_e-1
-       idisp_e(i) = nis_e(i)-1
-    end do
-    if(ipri >= 2) call wd_e_range
-    istep_e = 1
-!!$#else
-!!$    do i = 1, neg
-!!$       map_e(i) = mod(i-1,nrank_e)
-!!$    end do
-!!$    do i = 1, neg
-!!$       ip = map_e(i)
-!!$       nel_e(ip) = nel_e(ip) + 1
-!!$       if(nis_e(ip) > i) nis_e(ip) = i
-!!$       if(nie_e(ip) < i) nie_e(ip) = i
-!!$    end do
-!!$    istep_e = nrank_e
-!!$
-!!$#endif
-    if(ipri >= 2 .and. printable) then
-       strmap = "   i   "
-       call wd_maparray(neg,map_e,strmap," map_e ",sw_title)
-    end if
-
-    ista_e = nis_e(myrank_e)
-    iend_e = nie_e(myrank_e)
-    np_e = nel_e(myrank_e)
-    mp_e = maxval(nel_e)
-    if(ipri==1 .and. printable) then
-       if(mp_e < 100000) then
-          write(nfout,'(" !|| ista_e, iend_e = ",i5,",",i5,", mp_e = ",i5,", neg = ",i12,", kv3 = ",i5 &
-               & ," << m_Parallel_init_mpi_elec >>")') ista_e,iend_e,mp_e,neg,kv3
-       else if(mp_e <100000000) then
-          write(nfout,'(" !|| ista_e, iend_e = ",i8,",",i8,", mp_e = ",i8,", neg = ",i13,", kv3 = ",i5 &
-               & ," << m_Parallel_init_mpi_elec >>")') ista_e,iend_e,mp_e,neg,kv3
-       else
-          write(nfout,'(" !|| ista_e, iend_e = ",i0,",",i0,", mp_e = ",i0,", neg = ",i0,", kv3 = ",i5 &
-               & ," << m_Parallel_init_mpi_elec >>")') ista_e,iend_e,mp_e,neg,kv3
-       end if
-    end if
-
-
-#ifdef NEC_TUNE_SOFT
-    call getenv('F_RSVTASK',F_RSVTASK)
-    read (F_RSVTASK,'(i4)') itask
-    allocate(ista_e_smp(itask))
-    allocate(iend_e_smp(itask))
-
-    tmp_a1 = (iend_e-ista_e+1)/istep_e
-    tmp_a2 = mod(iend_e-ista_e+1,istep_e)
-    if (tmp_a2 .ne. 0) tmp_a1=tmp_a1+1
-    tmp_b1 = tmp_a1/itask
-    tmp_b2 = mod(tmp_a1,itask)
-    if (tmp_b2 .ne. 0) tmp_b1=tmp_b1+1
-
-    do i=1,itask
-      ista_e_smp(i) = ista_e + tmp_b1 * istep_e * (i -1)
-      iend_e_smp(i) = min((ista_e + tmp_b1 * istep_e * i -1),iend_e)
-    end do
-#elif NEC_TUNE_FFT
-    call getenv('F_RSVTASK',F_RSVTASK)
-    read (F_RSVTASK,'(i4)') itask
-#endif
 
-    if(ipri >= 2 .and. printable) &
-         & write(nfout,'(" !|| -- ista_e, iend_e, istep_e, np_e, mp_e = ",5i6)') &
-         &  ista_e,iend_e,istep_e, np_e, mp_e
-
-! (( map_z ))
-!!$#ifdef TRANSPOSE
-    j = 0
-    do ip = 1, nrank_e
-       do i = 1, nel_e(ip-1)
-          j = j + 1
-          map_z(j) = i
-       end do
-    end do
-!!$#else
-!!$    ip = 1
-!!$    do i = 1, neg
-!!$       map_z(i) = ip
-!!$       if(mod(i,nrank_e) == 0) ip = ip + 1
-!!$    end do
-!!$#endif
-    if(ipri >= 2 .and. printable) then
-       strmap = "   i   "
-       call wd_maparray(neg,map_z,strmap," map_z ",sw_title)
-    end if
-! (( map_k, nis_k, nie_k, nel_k, ista_k, iend_k ))
-    if(kv3/nspin < nrank_k) then
-       if(ipri >= 1 .and. printable) then
-          write(nfout,'(" ******")')
-          write(nfout,'(" ** The nrank_k that you have specified is smaller than the number of k-points (kv3/nspin).")')
-          write(nfout,'(" ** kv3/nspin = ",i8," nrank_k = ",i8)') kv3/nspin, nrank_k
-          write(nfout,'(" ** Reduce the size of the rank for k-point division (nrank_k) &
-  &                     which is set in nk= option when you run this program, please")')
-          call flush(nfout)
-       end if
-       stop ' ** The nrank_k that is specified is smaller than the number of generated or read k-points (=kv3/nspin).'
-    end if
-    if(nspin == 1) then
-       set_mapping_func = .true.
-       call set_block_range(kv3,nrank_k,nel_k,nis_k,nie_k, set_mapping_func,map_k)
-       ista_k = nis_k(myrank_k)
-       iend_k = nie_k(myrank_k)
-    else if(nspin == 2) then
-       set_mapping_func = .false.
-       kv3_half = kv3/nspin
-       call set_block_range(kv3_half,nrank_k,nel_k,nis_k,nie_k, set_mapping_func)
-       nel_k = nel_k*nspin
-       nie_k = nie_k*nspin
-       nis_k = nis_k*nspin-1
-       j = 0
-       do i = 1, kv3
-          if(nie_k(j) < i) j = j + 1
-          map_k(i) = j
-       end do
-       ista_k = nis_k(myrank_k)
-       iend_k = nie_k(myrank_k)
-    end if
-
-    if(ipri >= 2 .and. printable) then
-       strmap = "   i   "
-       call wd_maparray(kv3,map_k,strmap," map_k ",sw_title)
-       write(nfout,'(" !|| -- myrank_k = ",i8)') myrank_k
-       write(nfout,'(" !|| -- ista_k, iend_k = ",2i8)') ista_k,iend_k
-    end if
-   if(ista_k > iend_k) stop ' !! illegal combination of ista_k, and iend_k'
- 
-! (( map_ek ))
-    do j = 1, kv3
-       do i = 1, neg
-          map_ek(i,j) = map_e(i) + map_k(j)*(nrank_e)
-       end do
-    end do
-
-    if(ipri >= 2 .and. printable) then
-       write(nfout,'(" !|| --- map_ek ---")')
-       sw_title = 0
-       do j = 1, kv3
-          write(strmap,'("ik=",i4)') j
-          call wd_maparray(neg,map_ek(1,j),strmap,"map_ek ",sw_title)
-       end do
-    end if
-
-!!$#ifdef TRANSPOSE
-! (( ista_g1, iend_g1 ))
-    set_mapping_func = .false.
-    if(ipri >= 2 .and. printable) write(nfout,'(" !|| kg1 = ",i10)') kg1
-    call set_block_range(kg1,nrank_e,nel_g1,nis_g1,nie_g1, set_mapping_func)
-    ista_g1 = nis_g1(myrank_e)
-    iend_g1 = nie_g1(myrank_e)
-    np_g1   = nel_g1(myrank_e)
-    mp_g1   = maxval(nel_g1) 
-    if(ipri >= 2 .and. printable) then
-       write(nfout,'(" !|| --- mis_g1,nie_g1,nel_g1 ---")')
-       write(nfout,'(" !|| ( rank_e  )",15i7)')(i,i=1,nrank_e)
-       write(nfout,'(" !|| ( nis_g1  )",15i7)')(nis_g1(i),i=0,nrank_e-1)
-       write(nfout,'(" !|| ( nie_g1  )",15i7)')(nie_g1(i),i=0,nrank_e-1)
-       write(nfout,'(" !|| ( nel_g1  )",15i7)')(nel_g1(i),i=0,nrank_e-1)
-       write(nfout,'(" !|| (myrank_e = ",i3," kg1,ista_g1,iend_g1,np_g1,mp_g1 = ",5i7)') &
-            & myrank_e,kg1,ista_g1,iend_g1,np_g1,mp_g1
-    end if
-!!$#endif
-
-! (( mpi_k_world ))
-    do j = 0, nrank_k-1
-       icolor = 0
-       key = 0
-       do i = 0, nrank_e-1
-          if(mype == i + nrank_e*j) then
-             icolor = 1
-             key = i
-          end if
-       end do
-       if(firstcall) call mpi_comm_split(mpi_comm_group, icolor, key, mpi_k_world(j),ierr)
-       call mpi_comm_size(mpi_k_world(j), newpes,ierr)
-       call mpi_comm_rank(mpi_k_world(j), newmype,ierr)
-!!$       print '(" mype = ",i3," newmype = ", i3," newpes = ",i3," mpi_k_world = ", i3 &
-!!$            &, " icolor = ",i3," key = ",i3)' &
-!!$            & ,mype,newmype, newpes,mpi_k_world(j),icolor, key
-    end do
-
-    do j=0,nrank_e-1
-       icolor = myrank_e
-       key = mype
-       if(firstcall) call mpi_comm_split(mpi_comm_group, icolor, key, mpi_e_world(j),ierr)
-       call mpi_comm_size(mpi_e_world(j), newpes,ierr)
-       call mpi_comm_rank(mpi_e_world(j), newmype,ierr)
-    enddo
-    icolor = myrank_e
-    key    = mype
-    call mpi_comm_split(mpi_comm_group, icolor, key, mpi_ge_world, ierr)
-    call mpi_comm_size(mpi_ge_world, newpes, ierr)
-    call mpi_comm_rank(mpi_ge_world, newmype, ierr)
-    firstcall = .false.
-  contains
-    subroutine wd_maparray(nelement,map_a,map_title0,map_title,sw_title)
-      integer, intent(in) ::                 nelement
-      integer, dimension(nelement), intent(in) :: map_a
-      character*7, intent(in) ::             map_title0,map_title
-      integer, intent(in) ::                 sw_title
-      integer :: nca, j, i0, i1, i, iloop
-
-      if(sw_title==1)  write(nfout,'(" !|| --- ",a7," ---")') map_title
-      if(nelement < 100) then
-         nca = nc      ! = 24
-      else if(nelement < 1000) then
-         nca = nc*3/4  ! = 18
-      else if(nelement < 10000) then
-         nca = nc*3/5  ! = 14
-      else if(nelement < 100000) then
-         nca = nc*3/6  ! = 12
-      else
-         nca = nc*3/9  ! = 8
-      end if
-      iloop = ceiling(dble(nelement)/nca)
-      do j = 1, iloop
-         i0 = (j-1)*nca + 1
-         i1 = i0 + nca -1
-         if(i1 > nelement) i1 = nelement
-         if(nelement < 100) then
-            write(nfout,'(" !|| (",a7,")",24i3)') map_title0,(i,i=i0,i1)
-            write(nfout,'(" !|| (",a7,")",24i3)') map_title, (map_a(i),i=i0,min(nelement,i1))
-         else if(nelement < 1000) then
-            write(nfout,'(" !|| (",a7,")",18i4)') map_title0,(i,i=i0,i1)
-            write(nfout,'(" !|| (",a7,")",18i4)') map_title, (map_a(i),i=i0,i1)
-         else if(nelement < 10000) then
-            write(nfout,'(" !|| (",a7,")",14i5)') map_title0,(i,i=i0,i1)
-            write(nfout,'(" !|| (",a7,")",14i5)') map_title, (map_a(i),i=i0,i1)
-         else if(nelement < 100000) then
-            write(nfout,'(" !|| (",a7,")",12i5)') map_title0,(i,i=i0,i1)
-            write(nfout,'(" !|| (",a7,")",12i5)') map_title, (map_a(i),i=i0,i1)
-         else
-            write(nfout,'(" !|| (",a7,")",8i9)') map_title0,(i,i=i0,i1)
-            write(nfout,'(" !|| (",a7,")",8i9)') map_title, (map_a(i),i=i0,i1)
-         end if
-      end do
-    end subroutine wd_maparray
-
-    subroutine wd_e_range
-      if(printable) then
-         write(nfout,'(" !|| nrank_e")')
-         write(nfout,'(" !||    i    : ",20i4)')(i,i=0,nrank_e-1)
-         write(nfout,'(" !|| nis_e   : ",20i4)')(nis_e(i),i=0,nrank_e-1)
-         write(nfout,'(" !|| nie_e   : ",20i4)')(nie_e(i),i=0,nrank_e-1)
-         write(nfout,'(" !|| nel_e   : ",20i4)')(nel_e(i),i=0,nrank_e-1)
-!!$         write(nfout,'(" !||    i : ",20i4)')(i,i=0,nrank_e-1)
-!!$         write(nfout,'(" !|| nis_e : ",20i4)')(nis_e(i),i=0,nrank_e-1)
-!!$         write(nfout,'(" !|| nie_e : ",20i4)')(nie_e(i),i=0,nrank_e-1)
-!!$         write(nfout,'(" !|| nel_e : ",20i4)')(nel_e(i),i=0,nrank_e-1)
-!!$#ifdef TRANSPOSE
-         write(nfout,'(" !|| idisp_e : ",20i4)')(idisp_e(i),i=0,nrank_e-1)
-!!$#endif
-      end if
-    end subroutine wd_e_range
-
-  end subroutine m_Parallel_init_mpi_elec
-! === necessary to make 3D_Parallel, too!!! by tkato ===========================
-!!BRANCH_P_END ORG_Parallel
 ! ==============================================================================
   subroutine m_Parallel_dealloc_mpi_fft_box
+    if(allocated(nis_fft_X_z)) deallocate(nis_fft_X_z)
+    if(allocated(nis_fft_X_y)) deallocate(nis_fft_X_y)
+    if(allocated(nis_fft_Z_x)) deallocate(nis_fft_Z_x)
+    if(allocated(nis_fft_Z_y)) deallocate(nis_fft_Z_y)
+    if(allocated(nis_fft_Y_x)) deallocate(nis_fft_Y_x)
+    if(allocated(nis_fft_Y_z)) deallocate(nis_fft_Y_z)
+    if(allocated(nie_fft_X_z)) deallocate(nie_fft_X_z)
+    if(allocated(nie_fft_X_y)) deallocate(nie_fft_X_y)
+    if(allocated(nie_fft_Z_x)) deallocate(nie_fft_Z_x)
+    if(allocated(nie_fft_Z_y)) deallocate(nie_fft_Z_y)
+    if(allocated(nie_fft_Y_x)) deallocate(nie_fft_Y_x)
+    if(allocated(nie_fft_Y_z)) deallocate(nie_fft_Y_z)
+
     if(allocated(map_fft_x)) deallocate(map_fft_x)
     if(allocated(map_fft_y)) deallocate(map_fft_y)
     if(allocated(map_fft_z)) deallocate(map_fft_z)
@@ -1475,6 +1141,8 @@ contains
     if(ipri >= iprilevel) write(nfout,'(" !|| myrank_ggacmp, myrank_cdfft",/ &
          & ," !|| = ",2i10," <<m_Parallel_init_mpi_cdfft>>")') &
          & myrank_ggacmp, myrank_cdfft
+    if(allocated(map_pe2ggacmp)) deallocate(map_pe2ggacmp)
+    if(allocated(map_pe2cdfft)) deallocate(map_pe2cdfft)
     allocate(map_pe2ggacmp(0:npes-1))
     allocate(map_pe2cdfft(0:npes-1))
     do i = 0, npes-1
@@ -1487,6 +1155,25 @@ contains
          & (map_pe2cdfft(i),i=0,npes-1)
   end subroutine m_Parallel_init_mpi_cdfft
 
+  subroutine m_Parallel_dealloc_mpi_gga()
+    if(allocated(map_ggacmp))  deallocate(map_ggacmp)
+    if(allocated(nis_fftp))    deallocate(nis_fftp)
+    if(allocated(nie_fftp))    deallocate(nie_fftp)
+    if(allocated(nel_fftp))    deallocate(nel_fftp)
+    if(allocated(idisp_fftp))  deallocate(idisp_fftp)
+    if(allocated(nis_fftph))   deallocate(nis_fftph)
+    if(allocated(nie_fftph))   deallocate(nie_fftph)
+    if(allocated(nel_fftph))   deallocate(nel_fftph)
+    if(allocated(idisp_fftph)) deallocate(idisp_fftph)
+    if(allocated(nis_sfftp))   deallocate(nis_sfftp)
+    if(allocated(nie_sfftp))   deallocate(nie_sfftp)
+    if(allocated(nel_sfftp))   deallocate(nel_sfftp)
+    if(allocated(idisp_sfftp)) deallocate(idisp_sfftp)
+    if(allocated(nis_sfftph))  deallocate(nis_sfftph)
+    if(allocated(nie_sfftph))  deallocate(nie_sfftph)
+    if(allocated(nel_sfftph))  deallocate(nel_sfftph)
+  end subroutine m_Parallel_dealloc_mpi_gga
+
   subroutine m_Parallel_init_mpi_gga(nfout,ipri,printable,nfftp,nfftps)
     integer, intent(in)  :: nfout,ipri,nfftp,nfftps
     logical, intent(in)  :: printable
@@ -1625,7 +1312,9 @@ contains
 !!$             key = i
 !!$          end if
 !!$       end do
-       if(firstcall) call mpi_comm_split(mpi_comm_group,icolor,key,mpi_cdfft_world(j),ierr)
+!       if(firstcall) call mpi_comm_split(mpi_comm_group,icolor,key,mpi_cdfft_world(j),ierr)
+       if(.not.firstcall) call mpi_comm_free(mpi_cdfft_world(j), ierr)
+       call mpi_comm_split(mpi_comm_group,icolor,key,mpi_cdfft_world(j),ierr)
        call mpi_comm_size(mpi_cdfft_world(j), newpes, ierr)
        call mpi_comm_rank(mpi_cdfft_world(j), newmype, ierr)
     end do
@@ -1643,7 +1332,9 @@ contains
 !!$             key = i
 !!$          end if
 !!$       end do
-       if(firstcall) call mpi_comm_split(mpi_comm_group,icolor,key,mpi_ggacmp_cross_world(j),ierr)
+!       if(firstcall) call mpi_comm_split(mpi_comm_group,icolor,key,mpi_ggacmp_cross_world(j),ierr)
+       if(.not.firstcall) call mpi_comm_free(mpi_ggacmp_cross_world(j), ierr)
+       call mpi_comm_split(mpi_comm_group,icolor,key,mpi_ggacmp_cross_world(j),ierr)
        call mpi_comm_size(mpi_ggacmp_cross_world(j),newpes,ierr)
        call mpi_comm_rank(mpi_ggacmp_cross_world(j),newmype,ierr)
        if(ipri >= 2) then
@@ -1658,6 +1349,13 @@ contains
     firstcall = .false.
   end subroutine m_Parallel_init_mpi_gga
     
+
+  subroutine m_Parallel_dealloc_mpi_mix()
+    if(allocated(is_kgpm))  deallocate(is_kgpm)
+    if(allocated(ie_kgpm))  deallocate(ie_kgpm)
+    if(allocated(nel_kgpm)) deallocate(nel_kgpm)
+  end subroutine m_Parallel_dealloc_mpi_mix
+
   subroutine m_Parallel_init_mpi_mix(nfout,ipri,printable,kgpm,comm_for_chg)
     integer, intent(in) :: nfout,ipri,kgpm
     logical, intent(in) :: printable
@@ -1914,6 +1612,9 @@ contains
     !!npes = nrank_g
     !!mype = myrank_g
 
+    if(allocated(is_atm_f)) deallocate(is_atm_f)
+    if(allocated(ie_atm_f)) deallocate(ie_atm_f)
+    if(allocated(nel_atm_f)) deallocate(nel_atm_f)
     allocate(is_atm_f(0:npes-1))
     allocate(ie_atm_f(0:npes-1))
     allocate(nel_atm_f(0:npes-1))
@@ -2011,6 +1712,9 @@ contains
                                                   __TIMER_SUB_START(1239)
     npes = nrank_g
     mype = myrank_g
+    if(allocated(is_atm2)) deallocate(is_atm2)
+    if(allocated(ie_atm2)) deallocate(ie_atm2)
+    if(allocated(nel_atm2)) deallocate(nel_atm2)
     allocate(is_atm2(0:npes-1))
     allocate(ie_atm2(0:npes-1))
     allocate(nel_atm2(0:npes-1))
@@ -2047,7 +1751,6 @@ contains
   end subroutine m_Parallel_init_mpi_atm2
 
 
-
   subroutine split_into_ggablock_cdfft(npes,nrank_ggacmp,npes_cdfft,nrest_cdfft)
     integer, intent(in)  :: npes
     integer, intent(out) :: nrank_ggacmp, npes_cdfft, nrest_cdfft
@@ -2126,10 +1829,8 @@ contains
 
     if(allocated(ista_g1k)) deallocate(ista_g1k)
     if(allocated(iend_g1k)) deallocate(iend_g1k)
-    if(.not.allocated(np_g1k_prev)) allocate(np_g1k_prev(size(np_g1k)))
-    np_g1k_prev = np_g1k
     if(allocated(np_g1k)) deallocate(np_g1k)
-!    if(allocated(np_g1k_prev)) deallocate(np_g1k_prev)
+    if(allocated(np_g1k_prev)) deallocate(np_g1k_prev)
     if(allocated(mp_g1k)) deallocate(mp_g1k)
     
     if(allocated(nis_fftp)) deallocate(nis_fftp)
@@ -2354,7 +2055,9 @@ contains
            ikey = i
          end if
        end do
-       if(firstcall) call mpi_comm_split( mpi_comm_world, icolor, ikey, new_comm_world(i), mpi_err )
+!       if(firstcall) call mpi_comm_split( mpi_comm_world, icolor, ikey, new_comm_world(i), mpi_err )
+       if(.not.firstcall) call mpi_comm_free(new_comm_world(i), mpi_err)
+       call mpi_comm_split( mpi_comm_world, icolor, ikey, new_comm_world(i), mpi_err )
      end do
  
      mype_conf = mypetmp/(npestmp/nrank_conf)
@@ -3934,7 +3637,7 @@ contains
     read (F_RSVTASK,'(i4)') itask
 #endif
 
-    if(ipri >= 1 .and. printable) &
+    if(ipri >= 2 .and. printable) &
          & write(nfout,'(" !|| m_P_init_mpi_elec_3D -- ista_e, iend_e, istep_e, np_e, mp_e = ",5i6)') &
          &  ista_e,iend_e,istep_e, np_e, mp_e
 
@@ -3993,9 +3696,46 @@ contains
  
    icolor = myrank_g
    key = myrank_e+myrank_k*nrank_e
-   if(firstcall) call mpi_comm_split(mpi_comm_group, icolor, key, mpi_g_world,ierr)
+!   if(firstcall) call mpi_comm_split(mpi_comm_group, icolor, key, mpi_g_world,ierr)
+   if(.not.firstcall) call mpi_comm_free(mpi_g_world, ierr)
+   call mpi_comm_split(mpi_comm_group, icolor, key, mpi_g_world,ierr)
    call mpi_comm_size(mpi_g_world, nrank_ke,  ierr)
    call mpi_comm_rank(mpi_g_world, myrank_ke, ierr)
+
+!! coded by T. Yamasaki, 2021.02.25 -->
+   if(allocated(map_rank_gek)) deallocate(map_rank_gek)
+   allocate(map_rank_gek(0:nrank_g-1,0:nrank_e-1,0:nrank_k-1)); map_rank_gek = 0
+   map_rank_gek(myrank_g,myrank_e,myrank_k) = mype
+   call mpi_allreduce(MPI_IN_PLACE,map_rank_gek,npes,mpi_integer,mpi_sum,mpi_comm_group,ierr)
+!!$
+!!$   ip = 0
+!!$   do k = 0, nrank_k-1
+!!$      do i = 0, nrank_g-1
+!!$         do j = 0, nrank_e-1
+!!$            map_rank_gek(i,j,k) = ip
+!!$            ip = ip + 1
+!!$         end do
+!!$      end do
+!!$   end do
+   if(ipri>=2 .and. printable) then
+      ip = 0
+      write(nfout,'(" !||  No.  rank_k,rank_e,rank_g,   map_rank_gek")')
+      do k = 0, nrank_k-1
+         do j = 0, nrank_e-1
+            do i = 0, nrank_g-1
+               write(nfout,'(" !|| ",4i6," ",i6)') ip,k,j,i, map_rank_gek(i,j,k)
+               ip = ip + 1
+            end do
+         end do
+      end do
+      write(nfout,'(" myrank_k, myrank_e, myrank_g, mype")')
+      write(nfout,'(4i8)') myrank_k,myrank_e,myrank_g,mype
+   end if
+   if(map_rank_gek(myrank_g,myrank_e,myrank_k) /= mype) then
+      write(nfout,'(" map_rank_gek is not right")')
+      stop ' map_rank_gek is not right'
+   end if
+!! <--
 #if 0
 ! (( map_ek ))
     do j = 1, kv3
@@ -4307,7 +4047,12 @@ contains
     integer :: ibuf_temp
     allocate(np(0:nrank_g-1)); np = 0.d0
     
-
+    if(allocated(nis_fs)) deallocate(nis_fs)
+    if(allocated(nie_fs)) deallocate(nie_fs)
+    if(allocated(nel_fs)) deallocate(nel_fs)
+    if(allocated(nis_fs_atm)) deallocate(nis_fs_atm)
+    if(allocated(nie_fs_atm)) deallocate(nie_fs_atm)
+    if(allocated(nel_fs_atm)) deallocate(nel_fs_atm)
     allocate(nis_fs(0:nrank_g-1))
     allocate(nie_fs(0:nrank_g-1))
     allocate(nel_fs(0:nrank_g-1))
@@ -4431,6 +4176,8 @@ contains
     end if
 
 ! for zaj_l_ball fs(ri)_l_ball
+  if(allocated(ball_buff)) deallocate(ball_buff)
+  if(allocated(ball_addr)) deallocate(ball_addr)
   allocate(ball_buff(0:nrank_e-1))
   allocate(ball_addr(0:nrank_e-1))
   ibuf_temp = np_e*np_fs
@@ -4785,6 +4532,7 @@ contains
       xyz_fft_x(2,3) = wk_x(6,myrank+1)
     endif
 
+    if(allocated(map_fft_x)) deallocate(map_fft_x)
     allocate(map_fft_x( nfft ), stat=ichkalloc)
     if(ichkalloc /= 0) then
        write(nfout,*)'could not allocate in m_Parallel_mpi_fft_box 2'
@@ -4825,6 +4573,7 @@ contains
           np_fft_x = (wk_x(2,myrank+1)-wk_x(1,myrank+1)+1)* &
          &           (wk_x(4,myrank+1)-wk_x(3,myrank+1)+1)* &
          &           (wk_x(6,myrank+1)-wk_x(5,myrank+1)+1)
+          if(allocated(mp_fft_x)) deallocate(mp_fft_x)
           allocate(mp_fft_x(np_fft_x), stat=ichkalloc)
        end if
        if(ichkalloc /= 0) then
@@ -4851,6 +4600,7 @@ contains
 
     deallocate(wk_fft_box)
 
+    if(allocated(nel_fft_x)) deallocate(nel_fft_x)
     allocate(nel_fft_x( 0:nmrank-1 ), stat=ichkalloc)
     if(ichkalloc /= 0) then
        write(nfout,*)'could not allocate in m_Parallel_mpi_fft_box 4'
@@ -4898,14 +4648,15 @@ contains
        color = MPI_UNDEFINED
     end if
     key = myrank_g
-    if(firstcall) then
-     call mpi_comm_split(mpi_ke_world,color,key,mpi_fft_zy_world,ierr)
-     if (ierr /= 0) then
-        write(nfout,*)' m_Parallel_mpi_fft_box :  mpi_comm_split error'
-        call flush(nfout)
-        call mpi_abort(mpi_comm_world, 100003, ierr)
-     endif
+!    if(firstcall) then
+    if(.not. firstcall) call mpi_comm_free(mpi_fft_zy_world, ierr)
+    call mpi_comm_split(mpi_ke_world,color,key,mpi_fft_zy_world,ierr)
+    if (ierr /= 0) then
+       write(nfout,*)' m_Parallel_mpi_fft_box :  mpi_comm_split error'
+       call flush(nfout)
+       call mpi_abort(mpi_comm_world, 100003, ierr)
     endif
+!    endif
     firstcall = .false.
                                                   __TIMER_SUB_STOP(1232)
   end subroutine m_Parallel_mpi_fft_box_3div
@@ -5073,6 +4824,9 @@ contains
       xyz_fft_z(2,3) = fft_box_size_WF(3,n)
     endif
 
+    if(allocated(map_fft_x)) deallocate(map_fft_x)
+    if(allocated(map_fft_z)) deallocate(map_fft_z)
+    if(allocated(map_fft_y)) deallocate(map_fft_y)
     allocate(map_fft_x( nfft ), stat=ichkalloc)
     allocate(map_fft_z( nfft ), stat=ichkalloc)
     allocate(map_fft_y( nfft ), stat=ichkalloc)
@@ -5138,14 +4892,17 @@ contains
     if ((dim1*dim2) > myrank) then
        if (wk_x(1,myrank+1) /= 0) then
           np_fft_x = id1*(wk_x(2,myrank+1)-wk_x(1,myrank+1)+1)*(wk_x(4,myrank+1)-wk_x(3,myrank+1)+1)
+          if(allocated(mp_fft_x)) deallocate(mp_fft_x)
           allocate(mp_fft_x(np_fft_x), stat=ichkalloc)
        end if
        if (wk_z(1,myrank+1) /= 0) then
           np_fft_z = (wk_z(4,myrank+1)-wk_z(3,myrank+1)+1)*(wk_z(2,myrank+1)-wk_z(1,myrank+1)+1)*id3
+          if(allocated(mp_fft_z)) deallocate(mp_fft_z)
           allocate(mp_fft_z(np_fft_z), stat=ichkalloc)
        end if
        if (wk_y(1,myrank+1) /= 0) then
           np_fft_y = (wk_y(4,myrank+1)-wk_y(3,myrank+1)+1)*id2*(wk_y(2,myrank+1)-wk_y(1,myrank+1)+1)
+          if(allocated(mp_fft_y)) deallocate(mp_fft_y)
           allocate(mp_fft_y(np_fft_y), stat=ichkalloc)
        end if
        if(ichkalloc /= 0) then
@@ -5240,6 +4997,9 @@ contains
 
     deallocate(wk_fft_box)
 
+    if(allocated(nel_fft_x)) deallocate(nel_fft_x)
+    if(allocated(nel_fft_z)) deallocate(nel_fft_z)
+    if(allocated(nel_fft_y)) deallocate(nel_fft_y)
     allocate(nel_fft_x( 0:nmrank-1 ), stat=ichkalloc)
     allocate(nel_fft_z( 0:nmrank-1 ), stat=ichkalloc)
     allocate(nel_fft_y( 0:nmrank-1 ), stat=ichkalloc)
@@ -5333,7 +5093,9 @@ contains
        color = MPI_UNDEFINED
     end if
     key = myrank_g
-    if(firstcall) call mpi_comm_split(mpi_ke_world,color,key,mpi_fft_xy_world,ierr)
+!    if(firstcall) call mpi_comm_split(mpi_ke_world,color,key,mpi_fft_xy_world,ierr)
+    if(.not.firstcall) call mpi_comm_free(mpi_fft_xy_world, ierr)
+    call mpi_comm_split(mpi_ke_world,color,key,mpi_fft_xy_world,ierr)
     call mpi_comm_size(mpi_fft_xy_world, nrank_fft_xy, ierr)
     call mpi_comm_rank(mpi_fft_xy_world, myrank_fft_xy, ierr)
      if (ierr /= 0) then
@@ -5359,7 +5121,9 @@ contains
        color = MPI_UNDEFINED
     end if
     key = myrank_g
-    if(firstcall) call mpi_comm_split(mpi_ke_world,color,key,mpi_fft_yz_world,ierr)
+!    if(firstcall) call mpi_comm_split(mpi_ke_world,color,key,mpi_fft_yz_world,ierr)
+    if(.not.firstcall) call mpi_comm_free(mpi_fft_yz_world, ierr)
+    call mpi_comm_split(mpi_ke_world,color,key,mpi_fft_yz_world,ierr)
     call mpi_comm_size(mpi_fft_yz_world, nrank_fft_yz, ierr)
     call mpi_comm_rank(mpi_fft_yz_world, myrank_fft_yz, ierr)
      if (ierr /= 0) then
@@ -5578,6 +5342,9 @@ contains
       xyz_fft_y(2,3) = wk_y(2,myrank+1)
     endif
 
+    if(allocated(map_fft_x)) deallocate(map_fft_x)
+    if(allocated(map_fft_z)) deallocate(map_fft_z)
+    if(allocated(map_fft_y)) deallocate(map_fft_y)
     allocate(map_fft_x( nfft ), stat=ichkalloc)
     allocate(map_fft_z( nfft ), stat=ichkalloc)
     allocate(map_fft_y( nfft ), stat=ichkalloc)
@@ -5666,14 +5433,17 @@ contains
     if ((dim1*dim2) > myrank) then
        if (wk_x(1,myrank+1) /= 0) then
           np_fft_x = id1*(wk_x(2,myrank+1)-wk_x(1,myrank+1)+1)*(wk_x(4,myrank+1)-wk_x(3,myrank+1)+1)
+          if(allocated(mp_fft_x)) deallocate(mp_fft_x)
           allocate(mp_fft_x(np_fft_x), stat=ichkalloc)
        end if
        if (wk_z(1,myrank+1) /= 0) then
           np_fft_z = (wk_z(4,myrank+1)-wk_z(3,myrank+1)+1)*(wk_z(2,myrank+1)-wk_z(1,myrank+1)+1)*id3
+          if(allocated(mp_fft_z)) deallocate(mp_fft_z)
           allocate(mp_fft_z(np_fft_z), stat=ichkalloc)
        end if
        if (wk_y(1,myrank+1) /= 0) then
           np_fft_y = (wk_y(4,myrank+1)-wk_y(3,myrank+1)+1)*id2*(wk_y(2,myrank+1)-wk_y(1,myrank+1)+1)
+          if(allocated(mp_fft_y)) deallocate(mp_fft_y)
           allocate(mp_fft_y(np_fft_y), stat=ichkalloc)
        end if
        if(ichkalloc /= 0) then
@@ -5768,6 +5538,9 @@ contains
 
     deallocate(wk_fft_box)
 
+    if(allocated(nel_fft_x)) deallocate(nel_fft_x)
+    if(allocated(nel_fft_z)) deallocate(nel_fft_z)
+    if(allocated(nel_fft_y)) deallocate(nel_fft_y)
     allocate(nel_fft_x( 0:nmrank-1 ), stat=ichkalloc)
     allocate(nel_fft_z( 0:nmrank-1 ), stat=ichkalloc)
     allocate(nel_fft_y( 0:nmrank-1 ), stat=ichkalloc)
@@ -5859,7 +5632,9 @@ contains
        color = MPI_UNDEFINED
     end if
     key = myrank_g
-    if(firstcall) call mpi_comm_split(mpi_ke_world,color,key,mpi_fft_xz_world,ierr)
+!    if(firstcall) call mpi_comm_split(mpi_ke_world,color,key,mpi_fft_xz_world,ierr)
+    if(.not.firstcall) call mpi_comm_free(mpi_fft_xz_world, ierr)
+    call mpi_comm_split(mpi_ke_world,color,key,mpi_fft_xz_world,ierr)
     call mpi_comm_size(mpi_fft_xz_world, nrank_fft_xz, ierr)
     call mpi_comm_rank(mpi_fft_xz_world, myrank_fft_xz, ierr)
      if (ierr /= 0) then
@@ -5885,7 +5660,9 @@ contains
        color = MPI_UNDEFINED
     end if
     key = myrank_g
-    if(firstcall) call mpi_comm_split(mpi_ke_world,color,key,mpi_fft_zy_world,ierr)
+!    if(firstcall) call mpi_comm_split(mpi_ke_world,color,key,mpi_fft_zy_world,ierr)
+    if(.not.firstcall) call mpi_comm_free(mpi_fft_zy_world, ierr)
+    call mpi_comm_split(mpi_ke_world,color,key,mpi_fft_zy_world,ierr)
     call mpi_comm_size(mpi_fft_zy_world, nrank_fft_zy, ierr)
     call mpi_comm_rank(mpi_fft_zy_world, myrank_fft_zy, ierr)
      if (ierr /= 0) then
@@ -6050,6 +5827,7 @@ contains
       xyz_fftcd_x(2,3) = wk_x(6,myrank+1)
     endif
 
+    if(allocated(map_fftcd_x)) deallocate(map_fftcd_x)
     allocate(map_fftcd_x( nfft ), stat=ichkalloc)
     if(ichkalloc /= 0) then
        write(nfout,*)'could not allocate in m_Parallel_mpi_fft_box 2'
@@ -6160,7 +5938,9 @@ contains
        color = MPI_UNDEFINED
     end if
     key = myrank_g
-    if(firstcall) call mpi_comm_split(mpi_ke_world,color,key,mpi_fftcd_zy_world,ierr)
+!    if(firstcall) call mpi_comm_split(mpi_ke_world,color,key,mpi_fftcd_zy_world,ierr)
+    if(.not.firstcall) call mpi_comm_free(mpi_fftcd_zy_world, ierr)
+    call mpi_comm_split(mpi_ke_world,color,key,mpi_fftcd_zy_world,ierr)
      if (ierr /= 0) then
         write(nfout,*)' m_Parallel_mpi_fft_box_cd :  mpi_comm_split error'
         call flush(nfout)
@@ -6338,6 +6118,9 @@ contains
       xyz_fftcd_z(2,3) = fft_box_size_CD(3,n)
     endif
 
+    if(allocated(map_fftcd_x)) deallocate(map_fftcd_x)
+    if(allocated(map_fftcd_z)) deallocate(map_fftcd_z)
+    if(allocated(map_fftcd_y)) deallocate(map_fftcd_y)
     allocate(map_fftcd_x( nfft ), stat=ichkalloc)
     allocate(map_fftcd_z( nfft ), stat=ichkalloc)
     allocate(map_fftcd_y( nfft ), stat=ichkalloc)
@@ -6403,14 +6186,17 @@ contains
     if ((dim1*dim2) > myrank) then
        if (wk_x(1,myrank+1) /= 0) then
           np_fftcd_x = id1*(wk_x(2,myrank+1)-wk_x(1,myrank+1)+1)*(wk_x(4,myrank+1)-wk_x(3,myrank+1)+1)
+          if(allocated(mp_fftcd_x)) deallocate(mp_fftcd_x)
           allocate(mp_fftcd_x(np_fftcd_x), stat=ichkalloc)
        end if
        if (wk_z(1,myrank+1) /= 0) then
           np_fftcd_z = (wk_z(4,myrank+1)-wk_z(3,myrank+1)+1)*(wk_z(2,myrank+1)-wk_z(1,myrank+1)+1)*id3
+          if(allocated(mp_fftcd_z)) deallocate(mp_fftcd_z)
           allocate(mp_fftcd_z(np_fftcd_z), stat=ichkalloc)
        end if
        if (wk_y(1,myrank+1) /= 0) then
           np_fftcd_y = (wk_y(4,myrank+1)-wk_y(3,myrank+1)+1)*id2*(wk_y(2,myrank+1)-wk_y(1,myrank+1)+1)
+          if(allocated(mp_fftcd_y)) deallocate(mp_fftcd_y)
           allocate(mp_fftcd_y(np_fftcd_y), stat=ichkalloc)
        end if
        if(ichkalloc /= 0) then
@@ -6517,6 +6303,9 @@ contains
 
     deallocate(wk_fft_box)
 
+    if(allocated(nel_fftcd_x)) deallocate(nel_fftcd_x)
+    if(allocated(nel_fftcd_z)) deallocate(nel_fftcd_z)
+    if(allocated(nel_fftcd_y)) deallocate(nel_fftcd_y)
     allocate(nel_fftcd_x( 0:nmrank-1 ), stat=ichkalloc)
     allocate(nel_fftcd_z( 0:nmrank-1 ), stat=ichkalloc)
     allocate(nel_fftcd_y( 0:nmrank-1 ), stat=ichkalloc)
@@ -6607,7 +6396,9 @@ contains
        color = MPI_UNDEFINED
     end if
     key = myrank_g
-    if(firstcall) call mpi_comm_split(mpi_ke_world,color,key,mpi_fftcd_xy_world,ierr)
+!    if(firstcall) call mpi_comm_split(mpi_ke_world,color,key,mpi_fftcd_xy_world,ierr)
+    if(.not.firstcall) call mpi_comm_free(mpi_fftcd_xy_world, ierr)
+    call mpi_comm_split(mpi_ke_world,color,key,mpi_fftcd_xy_world,ierr)
     call mpi_comm_size(mpi_fftcd_xy_world, nrank_fftcd_xy, ierr)
     call mpi_comm_rank(mpi_fftcd_xy_world, myrank_fftcd_xy, ierr)
      if (ierr /= 0) then
@@ -6633,7 +6424,9 @@ contains
        color = MPI_UNDEFINED
     end if
     key = myrank_g
-    if(firstcall) call mpi_comm_split(mpi_ke_world,color,key,mpi_fftcd_yz_world,ierr)
+!    if(firstcall) call mpi_comm_split(mpi_ke_world,color,key,mpi_fftcd_yz_world,ierr)
+    if(.not.firstcall) call mpi_comm_free(mpi_fftcd_yz_world, ierr)
+    call mpi_comm_split(mpi_ke_world,color,key,mpi_fftcd_yz_world,ierr)
     call mpi_comm_size(mpi_fftcd_yz_world, nrank_fftcd_yz, ierr)
     call mpi_comm_rank(mpi_fftcd_yz_world, myrank_fftcd_yz, ierr)
      if (ierr /= 0) then
@@ -6854,6 +6647,9 @@ contains
       xyz_fftcd_y(2,3) = wk_y(2,myrank+1)
     endif
 
+    if(allocated(map_fftcd_x)) deallocate(map_fftcd_x)
+    if(allocated(map_fftcd_z)) deallocate(map_fftcd_z)
+    if(allocated(map_fftcd_y)) deallocate(map_fftcd_y)
     allocate(map_fftcd_x( nfft ), stat=ichkalloc)
     allocate(map_fftcd_z( nfft ), stat=ichkalloc)
     allocate(map_fftcd_y( nfft ), stat=ichkalloc)
@@ -6941,14 +6737,17 @@ contains
     if ((dim1*dim2) > myrank) then
        if (wk_x(1,myrank+1) /= 0) then
           np_fftcd_x = id1*(wk_x(2,myrank+1)-wk_x(1,myrank+1)+1)*(wk_x(4,myrank+1)-wk_x(3,myrank+1)+1)
+          if(allocated(mp_fftcd_x)) deallocate(mp_fftcd_x)
           allocate(mp_fftcd_x(np_fftcd_x), stat=ichkalloc)
        end if
        if (wk_z(1,myrank+1) /= 0) then
           np_fftcd_z = (wk_z(4,myrank+1)-wk_z(3,myrank+1)+1)*(wk_z(2,myrank+1)-wk_z(1,myrank+1)+1)*id3
+          if(allocated(mp_fftcd_z)) deallocate(mp_fftcd_z)
           allocate(mp_fftcd_z(np_fftcd_z), stat=ichkalloc)
        end if
        if (wk_y(1,myrank+1) /= 0) then
           np_fftcd_y = (wk_y(4,myrank+1)-wk_y(3,myrank+1)+1)*id2*(wk_y(2,myrank+1)-wk_y(1,myrank+1)+1)
+          if(allocated(mp_fftcd_y)) deallocate(mp_fftcd_y)
           allocate(mp_fftcd_y(np_fftcd_y), stat=ichkalloc)
        end if
        if(ichkalloc /= 0) then
@@ -7043,6 +6842,9 @@ contains
 
     deallocate(wk_fft_box)
 
+    if(allocated(nel_fftcd_x)) deallocate(nel_fftcd_x)
+    if(allocated(nel_fftcd_z)) deallocate(nel_fftcd_z)
+    if(allocated(nel_fftcd_y)) deallocate(nel_fftcd_y)
     allocate(nel_fftcd_x( 0:nmrank-1 ), stat=ichkalloc)
     allocate(nel_fftcd_z( 0:nmrank-1 ), stat=ichkalloc)
     allocate(nel_fftcd_y( 0:nmrank-1 ), stat=ichkalloc)
@@ -7095,7 +6897,9 @@ contains
        color = MPI_UNDEFINED
     end if
     key = myrank_g
-    if(firstcall) call mpi_comm_split(mpi_ke_world,color,key,mpi_fftcd_xz_world,ierr)
+!    if(firstcall) call mpi_comm_split(mpi_ke_world,color,key,mpi_fftcd_xz_world,ierr)
+    if(.not.firstcall) call mpi_comm_free(mpi_fftcd_xz_world, ierr)
+    call mpi_comm_split(mpi_ke_world,color,key,mpi_fftcd_xz_world,ierr)
     call mpi_comm_size(mpi_fftcd_xz_world, nrank_fftcd_xz, ierr)
     call mpi_comm_rank(mpi_fftcd_xz_world, myrank_fftcd_xz, ierr)
      if (ierr /= 0) then
@@ -7121,7 +6925,9 @@ contains
        color = MPI_UNDEFINED
     end if
     key = myrank_g
-    if(firstcall) call mpi_comm_split(mpi_ke_world,color,key,mpi_fftcd_zy_world,ierr)
+!    if(firstcall) call mpi_comm_split(mpi_ke_world,color,key,mpi_fftcd_zy_world,ierr)
+    if(.not.firstcall) call mpi_comm_free(mpi_fftcd_zy_world, ierr)
+    call mpi_comm_split(mpi_ke_world,color,key,mpi_fftcd_zy_world,ierr)
     call mpi_comm_size(mpi_fftcd_zy_world, nrank_fftcd_zy, ierr)
     call mpi_comm_rank(mpi_fftcd_zy_world, myrank_fftcd_zy, ierr)
      if (ierr /= 0) then
@@ -8225,6 +8031,14 @@ contains
   end subroutine m_Parallel_fft_onto_chgq_3D
 !===============================================================================
 
+  subroutine m_Parallel_fft_onto_chgq_dealloc()
+    deallocate(fft_chgq_scnt)
+    deallocate(fft_chgq_rcnt)
+    deallocate(fft_chgq_dist)
+    deallocate(fft_chgq_index)
+    deallocate(fft_chgq_recv)
+  end subroutine m_Parallel_fft_onto_chgq_dealloc
+
 #ifdef FFT_3D_DIVISION_CD
 !===============================================================================
   subroutine m_Parallel_chgq_onto_fftcd_3D(nfout,kgp,fft_box_size_CD_3D,igfp_l)
@@ -8652,6 +8466,13 @@ contains
 !===============================================================================
 #endif
 !ifdef FFT_3D_DIVISION_CD
+  subroutine m_Parallel_chgq_onto_fftcd_dealloc()
+      deallocate(chgq_fftcd_scnt)
+      deallocate(chgq_fftcd_rcnt)
+      deallocate(chgq_fftcd_index)
+      deallocate(chgq_fftcd_dist)
+      deallocate(chgq_fftcd_recv)
+  end subroutine m_Parallel_chgq_onto_fftcd_dealloc
 
 !===============================================================================
   subroutine m_Parallel_fftcd_onto_chgq_3D(nfout,kgp,fft_box_size_CD_3D,igfp_l)
@@ -8848,6 +8669,14 @@ contains
                                                   __TIMER_SUB_STOP(1247)
   end subroutine m_Parallel_fftcd_onto_chgq_3D
 
+  subroutine m_Parallel_fftcd_onto_chgq_dealloc()
+    deallocate(fftcd_chgq_scnt)
+    deallocate(fftcd_chgq_rcnt)
+    deallocate(fftcd_chgq_dist)
+    deallocate(fftcd_chgq_index)
+    deallocate(fftcd_chgq_recv)
+  end subroutine m_Parallel_fftcd_onto_chgq_dealloc
+
 !===============================================================================
 
   subroutine m_Parallel_init_mpi_G_iba2_3D(nfout,ipri,printable,kv3,iba2)
@@ -9053,10 +8882,14 @@ contains
     myrank_natm = mod(mype,nrank_natm)
     myrank_nrc  = mype/nrank_natm
 
-    if(firstcall)then
+    if(.not.firstcall) then
+      call mpi_comm_free(mpi_natm_world, ierr)
+      call mpi_comm_free(mpi_nrc_world,  ierr)
+    endif
+!    if(firstcall)then
     call mpi_comm_split(MPI_COMM_WORLD,myrank_natm,myrank_nrc, mpi_natm_world,ierr)
     call mpi_comm_split(MPI_COMM_WORLD,myrank_nrc, myrank_natm,mpi_nrc_world, ierr)
-    endif
+!    endif
 
     allocate(is_natm (0:nrank_natm-1))
     allocate(ie_natm (0:nrank_natm-1))
diff --git a/src_phase_3d/m_UnitCell.f90 b/src_phase_3d/m_UnitCell.f90
index e67f8a6..1b0375a 100644
--- a/src_phase_3d/m_UnitCell.f90
+++ b/src_phase_3d/m_UnitCell.f90
@@ -887,6 +887,10 @@ subroutine m_UnitCell_init_md()
   integer :: i,j
   real(kind=DP), dimension(3) :: avec,bvec,cvec
   if(.not.first_call) return
+  allocate(p_l(natm,3));p_l = 0.d0
+  allocate(p_l_lattice(natm,3));p_l_lattice = 0.d0
+  allocate(forc_l_lattice(natm,3));forc_l_lattice = 0.d0
+  allocate(p_l_metric(natm,3));p_l_metric = 0.d0
   metric = matmul(transpose(altv),altv)
   call m_UnitCell_init_md_per_step()
   m_thermo = qmass(1)
@@ -897,14 +901,10 @@ subroutine m_UnitCell_init_md()
   target_temperature = tkb(1)/CONST_kB
 !  gfree = 3*(natm-1)
   gfree = 3*(natm)
-  allocate(p_l(natm,3));p_l = 0.d0
   do i=1,natm
      p_l(i,:) = amion(ityp(i)) * cpd_l(i,:) * s_thermo
   enddo
-  allocate(p_l_lattice(natm,3));p_l_lattice = 0.d0
   call convert_coords(natm,p_l,altv,p_l_lattice)
-  allocate(forc_l_lattice(natm,3));forc_l_lattice = 0.d0
-  allocate(p_l_metric(natm,3));p_l_metric = 0.d0
   call convert_coords(natm,p_l_lattice,metric_inv,p_l_metric)
   do i=1,3
      do j=1,3
@@ -975,9 +975,12 @@ function get_potential_baro() result(ret)
 end function get_potential_baro
 
 subroutine m_UnitCell_init_md_per_step()
+  real(kind=DP), external :: deter3
+  univol = sqrt(deter3(metric))
   call inver3n(3,metric,metric_inv)
   call inver3n(3,altv,altv_inv)
   call build_dudmetric()
+  H0 = get_H0()
 end subroutine m_UnitCell_init_md_per_step
 
 function tensor_rank2_equals(t1,t2) result(ret)
@@ -1047,6 +1050,28 @@ function m_UC_get_hamiltonian() result(res)
   res = res-H0
 end function m_UC_get_hamiltonian
 
+function get_H0() result (res)
+  real(kind=DP) :: res
+  real(kind=DP) :: k_atm,k_baro,k_thermo,pot_baro,pot_thermo
+  k_atm = get_kinetic_atom(natm,p_l_lattice,metric_inv)
+  k_baro = get_kinetic_baro(matmul(metric,p_baro))
+  pot_baro = get_potential_baro()
+  if(control_pressure == OFF) then
+    k_baro = 0.d0
+    pot_baro = 0.d0
+  endif
+  if(imdalg == PT_CONTROL .and. t_ctrl_method /= VELOCITY_SCALING)then
+     k_thermo = get_kinetic_thermo(p_thermo)
+     pot_thermo = gfree * target_temperature * CONST_kB * log(s_thermo)
+  else if (imdalg == P_CONTROL .or. (imdalg == PT_CONTROL .and. t_ctrl_method == VELOCITY_SCALING)) then
+     k_thermo = 0.d0
+     pot_thermo = 0.d0
+  endif
+!  res = s_thermo * (k_atm + etotal + k_thermo + pot_thermo + k_baro + pot_baro)
+  res = k_atm + etotal + k_thermo + pot_thermo + k_baro + pot_baro
+  return
+end function get_H0
+
 function m_UC_get_curr_pressure() result(res)
   real(kind=DP) :: curr_pressure, res
   integer :: i
@@ -1071,6 +1096,9 @@ subroutine m_UC_rd_md_cntn_data(nfcntn)
    logical             :: tag_is_found, EOF_reach
    integer,      parameter   :: len_str = 132
    character(len=len_str)       :: str
+
+   call m_UnitCell_init_md()
+
    if(mype==0)then
       call rewind_to_tag0(nfcntn,len(tag_pressure_control), &
             &  tag_pressure_control, & 
@@ -1119,7 +1147,9 @@ subroutine m_UC_rd_md_cntn_data(nfcntn)
          call altv_2_rltv(altv,rltv,univol,rvol)  ! in b_CS
          call change_of_coordinate_system(altv,pos,natm,natm,cps) !-(b_I.S.) pos -> cps
          call primitive2bravais(nfout,p2bmat,altv(:,1),altv(:,2),altv(:,3),a,b,c,ca,cb,cc,il) ! in b_CS
-         if(iteration_unit_cell>1) H0_has_been_set = .true.
+         if(iteration_unit_cell>1) then
+           H0_has_been_set = .true.
+         endif
       endif
    endif
 
@@ -1411,8 +1441,8 @@ logical function unitcell_can_change()
 !     return
 !  endif
   if(sw_stress_correction==ON)then
-     unitcell_can_change = .true.
-     return
+    unitcell_can_change = .true.
+    return
   endif
   unitcell_can_change = sw_optimize_lattice==ON
 end function unitcell_can_change
diff --git a/src_phase_3d/mdmain.F90 b/src_phase_3d/mdmain.F90
index 8e4c537..4f8beda 100644
--- a/src_phase_3d/mdmain.F90
+++ b/src_phase_3d/mdmain.F90
@@ -1184,7 +1184,7 @@ end program PHASE
      call m_Ldos_dealloc()
      call m_ES_wf_extrpl_dealloc()
 #ifdef FFTW3
-!     call m_FFT_finalize()
+     call m_FFT_finalize()
 #endif
      call m_CtrlP_set_init_status(.true.)
    end subroutine Array_Deallocate