phase0_2020.01.01.patch
phase0_2020.01.01.patch — differences between files, 129 KB (132599 bytes)
ファイルコンテンツ
diff --git a/src_phase/ChargeDensity_Mixing.F90 b/src_phase/ChargeDensity_Mixing.F90 index 4367fec..83ec997 100644 --- a/src_phase/ChargeDensity_Mixing.F90 +++ b/src_phase/ChargeDensity_Mixing.F90 @@ -108,7 +108,7 @@ subroutine ChargeDensity_Mixing use m_Control_Parameters, only : sw_calc_ekin_density, ekin_density_type, & & sw_mix_charge_with_ekindens use m_CD_mixing, only : m_CD_mix_broyden2_intg, m_CD_mix_pulay_intg, & - & m_CD_simple_mixing_intg + & m_CD_simple_mixing_intg, m_CD_force_dealloc use m_KE_mixing, only : m_KE_simple_mixing, m_KE_mix_broyden2, m_KE_mix_pulay, & & m_KE_prepare_precon, & & m_KE_simple_mixing_00, m_KE_kerker_mixing, & @@ -121,7 +121,7 @@ subroutine ChargeDensity_Mixing real(kind=DP) :: edeltb_per_atom integer,save :: waymix_at_CDmix_previous = 0 integer :: waymix_at_CDmix - logical :: ini = .false. + logical :: ini = .false., mixer_changed real(kind=DP) :: rmxt_tot, rmxt_hard, rmxt_occmat #ifdef __TIMER_SUB__ call timer_sta(1101) @@ -134,7 +134,8 @@ subroutine ChargeDensity_Mixing edeltb_per_atom = m_TE_what_is_edeltb_now()/natm waymix_at_CDmix = m_CtrlP_waymix_now(iteration_electronic, iteration_ionic & - & , edeltb_per_atom) + & , edeltb_per_atom,mixer_changed) + if(mixer_changed) call m_CD_force_dealloc() if(tag_solver_of_WF_is_found .and. tag_charge_mixing_is_found) then if(waymix_at_CDmix_previous /= waymix_at_CDmix) call m_Iter_cmix_reset() end if diff --git a/src_phase/Initial_Electronic_Structure.F90 b/src_phase/Initial_Electronic_Structure.F90 index dfb0211..6a1dd25 100644 --- a/src_phase/Initial_Electronic_Structure.F90 +++ b/src_phase/Initial_Electronic_Structure.F90 @@ -85,8 +85,12 @@ subroutine Initial_Electronic_Structure & , m_ES_alloc_eko_ek, m_ES_alloc_eko1 & & , m_ES_cpeko & & , m_ES_alloc_Dhub,vlhxc_l & +#ifdef FFTW3 & , m_ES_cp_zaj_prev_to_zaj & & , m_ES_gen_zaj_from_prev_zaj +#else + & , m_ES_cp_zaj_prev_to_zaj +#endif use m_ES_ortho, only : m_ES_modified_gram_schmidt use m_ES_wf_extrpl, only : m_ES_wf_extrpl_alloc use m_ES_nonlocal, only : m_ES_betar_dot_WFs @@ -106,8 +110,12 @@ subroutine Initial_Electronic_Structure & , m_CD_rd_chgq & & , m_CD_initial_CD_by_file_rspace & & , m_CD_adjust_spindensity & +#ifdef FFTW3 & , m_CD_cp_chgq_prev_to_chgq & & , m_CD_gen_chgq_from_prev_chgq +#else + & , m_CD_cp_chgq_prev_to_chgq +#endif use m_Crystal_Structure, only : sw_fix_total_spin, sw_magnetic_constraint, univol ! <-- use m_XC_Potential, only : vxc_l, exc @@ -288,7 +296,10 @@ subroutine Initial_Electronic_Structure call m_PP_gfqwei(nfout) ! -> modnrm, fqwei, nlmta1, nlmta2 call m_CtrlP_set_init_status(.false.) call UpdateUeff() - if(sw_hybrid_functional == ON) call EXX() + if(sw_hybrid_functional == ON) then + call m_ES_EXX_init() + call EXX() + endif return endif @@ -618,7 +629,11 @@ subroutine Initial_Electronic_Structure call m_ES_cp_zaj_prev_to_zaj() else if((iteration_unit_cell > 1 .or. iteration_stress_correction>1) .and. & & kg1_prev .ne. kg1 .and. sw_interpolate_wfs == ON) then +#ifdef FFTW3 call m_ES_gen_zaj_from_prev_zaj(nfout) +#else + call m_ESIW_by_randomnumbers(nfout,kv3,1,neg) ! (rndzaj) -> zaj_l +#endif else if(intzaj == by_random_numbers) then call m_ESIW_by_randomnumbers(nfout,kv3,1,neg) ! (rndzaj) -> zaj_l else if(intzaj == by_pseudo_atomic_orbitals) then @@ -706,10 +721,12 @@ subroutine Initial_Electronic_Structure call m_ES_alloc_zaj_l_prev(kg1_prev) endif call m_ESIO_rd_WFs(nfout,nfzaj,F_ZAJ_in_partitioned,read_pwbs) +#ifdef FFTW3 if(read_pwbs)then call m_ES_gen_zaj_from_prev_zaj(nfout) call modified_gramschmidt() endif +#endif if(neg_previous < neg) then call m_ESIW_by_randomnumbers(nfout,kv3,neg_previous+1,neg) end if @@ -979,10 +996,12 @@ contains call m_ES_alloc_zaj_l_prev(kg1_prev) endif call m_ESIO_rd_WFs(nfout,nfzaj,F_ZAJ_in_partitioned,read_pwbs) +#ifdef FFTW3 if(read_pwbs)then call m_ES_gen_zaj_from_prev_zaj(nfout) call modified_gramschmidt() endif +#endif endif else if ( condition == -4 ) then ! coordinate-continuation diff --git a/src_phase/Initialization.F90 b/src_phase/Initialization.F90 index 91617c1..2d40114 100644 --- a/src_phase/Initialization.F90 +++ b/src_phase/Initialization.F90 @@ -174,7 +174,7 @@ contains subroutine aavers include 'version.h' ! svn_revision character(len=72) :: vers, system, codename - write(vers,'("phase/0 2020.01 Revision:",i5," -- ORG_Parallel --")') svn_revision + write(vers,'("phase/0 2020.01.01 Revision:",i5," -- ORG_Parallel --")') svn_revision codename = 'phaseUnif' system = '' diff --git a/src_phase/Makefile.M1Mac b/src_phase/Makefile.M1Mac new file mode 100644 index 0000000..be731d9 --- /dev/null +++ b/src_phase/Makefile.M1Mac @@ -0,0 +1,595 @@ +.SUFFIXES: +.SUFFIXES: .o .F .f .F90 .f90 .c .mod + +# Platform : GNU Linux (EM64T/AMD64) +# Prog. model : MPI parallel +# Compiler : GNU compiler collection (gfortran) +# BLAS/LAPACK : Netlib BLAS/LAPACK +# FFT : FFTW3 library +########################################################################### +###<< PLEASE CHANGE THE VARIABLES BELOW ACCORDING TO YOUR ENVIRONMENT >>### +########################################################################### +F90 = mpif90 +CC = gcc +CPP = +AR = ar -vq +LINK = mpif90 +F90FLAGS = -O2 -ffree-form -fallow-argument-mismatch +F77FLAGS = -O2 -ffixed-form -fallow-argument-mismatch +CFLAGS = -DINTEL -DDARWIN + +ESM = yes +ifdef ESM +CPPESM=-DENABLE_ESM_PACK +LESM=-lesm +else +CPPESM= +endif + +CPPFLAGS = -DLinux -D_GNU_FORTRAN_ -DFFTW3 -D_POT_SMOOTHING_ -DTRANSPOSE -DGGA_ATOMIC_WITH_NEW_GNCPP -DREMOVE_PC_FROM_FORCE -D_USE_LAPACK_ -D_POSITRON_ -D_FAST_WAY_ -D_USE_DATE_AND_TIME_ -DUSE_NONBLK_COMM -DRMM_NONLOCAL_NEW ${CPPESM} +LFLAGS = +F90FLAGS_FIXED = -ffixed-form +F90FLAGS_FREE = -ffree-form +MKLHOME=./ +INCLUDE=-I/opt/homebrew/opt/fftw/include +LIBS = -L./ ${LESM} -L./ -llapack -lblas -L/opt/homebrew/opt/fftw/lib -lfftw3 +########################################################################### +###<< PLEASE CHANGE THE VARIABLES ABOVE ACCORDING TO YOUR ENVIRONMENT >>### +########################################################################### + +ifdef ESM +ESM_LIB = libesm.a +else +ESM_LIB = +endif +LAPACK = liblapack.a libblas.a +FFTOBJECT = +OBJ_INPUTPARSE = input_parse.o + +PHASE_LOWER_MODULES = m_Const_Parameters.o m_ErrorMessages.o m_Parallelization.o \ +m_IterationNumbers.o + +PHASE_UPPER_MODULES = \ +m_Control_Parameters.o m_Files.o m_Timing.o \ +m_Crystal_Structure.o m_FFT.o m_Ionic_System.o \ +m_CS_SpaceGroup.o m_CS_Magnetic.o \ +m_Orbital_QuantumNum.o \ +m_Kpoints.o m_PseudoPotential.o m_PlaneWaveBasisSet.o \ +m_Realspace.o \ +m_SpinOrbit_Potential.o m_SpinOrbit_FromFile.o \ +m_ES_NonCollinear.o \ +m_NonLocal_Potential.o m_Electronic_Structure.o \ +m_ES_nonlocal.o m_ES_ortho.o m_ES_wf_extrpl.o m_ES_initialWF.o \ +m_ES_occup.o \ +m_ES_RSB.o \ +m_FiniteElectricField.o \ +m_ES_ExactExchange.o \ +m_ES_WF_by_SDorCG.o m_ES_WF_by_Davidson.o \ +m_ES_WF_by_ModifiedDavidson.o m_ES_WF_by_RMM.o \ +m_Charge_Density.o m_CD_Mag_Moment.o m_Orbital_Population.o m_Dipole.o m_CD_mixing.o \ +m_PS_opencore.o \ +m_epc_potential.o \ +m_Positron_Wave_Functions.o \ +m_ES_WF_mixing.o \ +m_Screening_FFT.o m_Screening.o m_External_Potential.o \ +m_ES_LHXC.o m_ES_Intgr_VlhxcQlm.o m_ES_IO.o \ +m_Representation.o m_OP_rotation.o \ +m_ES_WF_by_MatDiagon.o m_ES_dos.o m_Hubbard.o \ +m_KineticEnergy_Density.o \ +m_KE_mixing.o \ +m_vdWDF.o \ +m_Ldos.o m_XC_Potential.o \ +string.o m_db.o \ +m_PAW_Tecplot.o m_PAW_ChargeDensity.o m_PAW_Hartree.o \ +m_PAW_XC_Potential.o \ +m_SpinOrbit_RadInt.o \ +m_OP_Moment.o \ +m_ES_Mag_Constraint.o \ +m_Total_Energy.o \ +m_Force.o m_Stress.o m_ES_WF_by_submat.o \ +m_UnitCell.o \ +m_constraints.o \ +m_ELF.o \ +m_BerryPhase.o m_BP_Properties.o \ +m_Raman.o m_Phonon.o \ +m_Wannier.o m_Wannier90.o m_Replica.o Renewal_of_ChgCtrlParam.o \ +m_LinearResponse_Control.o \ +m_ValenceBand_Spectrum.o m_CoreLevel_Spectrum.o \ +m_Excitation.o \ +m_CLS_dipquad.o m_ES_occup_EPS.o m_Epsilon_ek.o \ +m_LinearResponse_Qpt.o \ +m_rttddft.o \ +m_Potential_Mixing.o m_ThomasFermiW_Potential.o \ +m_SpinOrbit_ForceTheorem.o m_Crystal_Field.o \ +m_Fcp.o m_Band_Unfolding.o + +PHASE_F_SUBROUTINES = Preparation.o \ +InputData_Analysis.o PseudoPotential_Construction.o \ + +PHASE_F_OTHERSUBS = mdmain0.o constraint_main.o meta_dynamics.o NEB.o Preparation_for_mpi.o \ +Preparation_for_ESM.o \ +scf_routines.o \ +UnitaryTransform_WF.o \ +bottom_Subroutines.o \ +spline.o \ +b_Crystal_Structure.o b_Electronic_Structure.o \ +b_Words.o b_PseudoPotential.o \ +b_PseudoPotential_EXX.o \ +b_Fermi.o b_Kpoints.o \ +b_PlaneWaveBasisSet.o b_Ionic_System.o \ +b_XC_Potential.o b_PAW_XC_Potential.o \ +b_XC_OmegaPBE.o \ +b_XC_metagga.o \ +b_Ldos_f77.o\ +input_interface.o Initialization.o \ +WriteDownData_onto_Files.o Ewald_and_Structure_Factor.o \ +Initial_Electronic_Structure.o \ +Renewal_of_WaveFunctions.o \ +Renewal_of_pWaveFunctions.o \ +Uramping.o \ +IterationNumbers_Setting.o ChargeDensity_Construction.o \ +ChargeDensity_Mixing.o Renewal_of_Potential.o \ +Renewal_of_Hubbard_Potential.o Renewal_of_OccMat.o \ +Renewal_of_pPotential.o Renewal_of_Hubbard_Parameters.o \ +Convergence_Check.o Forces.o \ +Move_Ions.o Initial_MD_Condition.o \ +Stress.o Postprocessing.o \ +Finalization_of_mpi.o miscellaneous.o\ +b_BerryPhase.o \ +heap_sort.o real_spherical_harmonics.o \ +Real_space_integ.o crotylm.o \ +screening_correction.o \ +Initialization_Epsilon.o Shift_Kpoint.o \ +Reset_Kpoint.o Preparation_for_Calc_Epsilon.o \ +Transition_moment_Epsilon.o Calc_Epsilon.o \ +Nonlinear_Optics_Epsilon.o WriteDownData_onto_Files_Epsilon.o \ +PseudoPotential_ek_Epsilon.o Dealloc_Radr_and_Wos_Epsilon.o \ +mpi_dummy.o WriteDownData_onto_Files_ek.o \ +GaussLeg.o lib_int_deri_add.o \ +rttddft_main.o \ +Potential_Construction.o Potential_Mixing.o ThomasFermiWeiz.o \ +Epsilon_postscf.o vdW.o Epsilon_Paramset.o memsize.o scdft_routines.o \ +lbfgs.o + +# for vc_nl +NLOBJ = vc_nl.o + +ifndef SX_DGEMM +PHASE_OBJECTSF77 = \ +b_PseudoPotential_f77.o \ +b_Force_f77.o \ +rmmsubs.o \ +gncpp_xc_gga_rad.o \ +decfft_ent.o \ +spg+tetra.o +else +PHASE_OBJECTSF77 = \ +b_PseudoPotential_f77.o \ +b_Force_f77.o \ +rmmsubs.o \ +decfft_ent.o \ +gncpp_xc_gga_rad.o \ +spg+tetra.o \ +dgemm__.o +endif + +EKCAL_LOWER_MODULES = m_Const_Parameters.o m_ErrorMessages.o m_Parallelization.o \ +m_IterationNumbers.o + +EKCAL_UPPER_MODULES = \ +m_Control_Parameters.o m_Files.o m_Timing.o \ +m_Crystal_Structure.o m_FFT.o m_Ionic_System.o \ +m_CS_SpaceGroup.o m_CS_Magnetic.o \ +m_Orbital_QuantumNum.o \ +m_Kpoints.o m_PseudoPotential.o m_PlaneWaveBasisSet.o \ +m_Realspace.o \ +m_SpinOrbit_Potential.o m_SpinOrbit_FromFile.o \ +m_ES_NonCollinear.o \ +m_NonLocal_Potential.o m_Electronic_Structure.o \ +m_ES_nonlocal.o m_ES_ortho.o m_ES_wf_extrpl.o m_ES_initialWF.o \ +m_FiniteElectricField.o \ +m_ES_ExactExchange.o \ +m_ES_occup.o m_ES_WF_by_SDorCG.o m_ES_WF_by_Davidson.o \ +m_ES_RSB.o \ +m_ES_WF_by_ModifiedDavidson.o m_ES_WF_by_RMM.o \ +m_epc_potential.o \ +m_Positron_Wave_Functions.o \ +m_Charge_Density.o m_CD_Mag_Moment.o m_Orbital_Population.o m_Dipole.o m_CD_mixing.o \ +m_PS_opencore.o \ +m_ES_WF_mixing.o \ +m_Screening_FFT.o m_Screening.o m_External_Potential.o \ +m_ES_LHXC.o m_ES_Intgr_VlhxcQlm.o m_ES_IO.o \ +m_Representation.o m_OP_rotation.o \ +m_ES_WF_by_MatDiagon.o m_ES_dos.o m_Hubbard.o \ +m_KineticEnergy_Density.o \ +m_KE_mixing.o \ +m_vdWDF.o \ +m_Ldos.o m_XC_Potential.o \ +string.o m_db.o \ +m_PAW_Tecplot.o m_PAW_ChargeDensity.o m_PAW_Hartree.o \ +m_PAW_XC_Potential.o \ +m_SpinOrbit_RadInt.o \ +m_ES_Mag_Constraint.o \ +m_constraints.o \ +m_Total_Energy.o \ +m_OP_Moment.o \ +m_Force.o m_Stress.o m_ES_WF_by_submat.o\ +m_UnitCell.o \ +m_ELF.o m_Wannier.o \ +m_Wannier90.o \ +m_BerryPhase.o m_BP_Properties.o \ +m_Raman.o m_Phonon.o \ +m_LinearResponse_Control.o \ +m_ValenceBand_Spectrum.o m_CoreLevel_Spectrum.o \ +m_Excitation.o \ +m_LinearResponse_Qpt.o \ +m_Potential_Mixing.o m_ThomasFermiW_Potential.o \ +m_SpinOrbit_ForceTheorem.o m_Crystal_Field.o \ +m_Fcp.o m_CLS_dipquad.o m_Band_Unfolding.o + + +EKCAL_F_SUBROUTINES = Preparation.o \ +InputData_Analysis.o PseudoPotential_Construction.o + +EKCAL_F_OTHERSUBS = ekmain.o constraint_main.o Preparation_for_mpi.o \ +Preparation_for_ESM.o \ +scf_routines.o \ +UnitaryTransform_WF.o \ +bottom_Subroutines.o \ +spline.o \ +b_Crystal_Structure.o b_Electronic_Structure.o \ +b_Words.o b_PseudoPotential.o \ +b_PseudoPotential_EXX.o \ +b_Fermi.o b_Kpoints.o \ +b_PlaneWaveBasisSet.o b_Ionic_System.o \ +b_XC_Potential.o b_PAW_XC_Potential.o \ +b_XC_OmegaPBE.o \ +b_XC_metagga.o \ +b_Ldos_f77.o \ +input_interface.o Initialization.o \ +Ewald_and_Structure_Factor.o \ +Initial_Electronic_Structure.o Renewal_of_WaveFunctions.o \ +IterationNumbers_Setting.o \ +ChargeDensity_Construction.o Renewal_of_Potential.o \ +ChargeDensity_Mixing.o \ +Renewal_of_Hubbard_Potential.o Renewal_of_OccMat.o \ +Convergence_Check.o Forces.o \ +WriteDownData_onto_Files.o \ +Initial_MD_Condition.o WriteDownData_onto_Files_ek.o\ +Stress.o Postprocessing.o \ +Finalization_of_mpi.o \ +miscellaneous.o \ +b_BerryPhase.o \ +heap_sort.o real_spherical_harmonics.o \ +Real_space_integ.o crotylm.o \ +screening_correction.o \ +mpi_dummy.o \ +GaussLeg.o lib_int_deri_add.o \ +Uramping.o \ +ThomasFermiWeiz.o Renewal_of_ChgCtrlParam.o vdW.o scdft_routines.o memsize.o lbfgs.o + +ifndef SX_DGEMM +EKCAL_OBJECTSF77 = \ +b_PseudoPotential_f77.o \ +b_Force_f77.o \ +rmmsubs.o spg+tetra.o \ +gncpp_xc_gga_rad.o \ +decfft_ent.o +else +EKCAL_OBJECTSF77 = \ +b_PseudoPotential_f77.o \ +b_Force_f77.o \ +rmmsubs.o spg+tetra.o \ +decfft_ent.o \ +gncpp_xc_gga_rad.o \ +dgemm__.o +endif + + +EPS_LOWER_MODULES = m_Const_Parameters.o m_ErrorMessages.o m_Parallelization.o \ +m_IterationNumbers.o + +EPS_UPPER_MODULES = \ +m_Control_Parameters.o m_Files.o m_Timing.o \ +m_Crystal_Structure.o m_FFT.o m_Ionic_System.o \ +m_CS_SpaceGroup.o m_CS_Magnetic.o \ +m_Orbital_QuantumNum.o \ +m_Kpoints.o m_PseudoPotential.o m_PlaneWaveBasisSet.o \ +m_Realspace.o \ +m_SpinOrbit_Potential.o m_SpinOrbit_FromFile.o \ +m_ES_NonCollinear.o \ +m_NonLocal_Potential.o m_Electronic_Structure.o \ +m_ES_nonlocal.o m_ES_ortho.o m_ES_wf_extrpl.o m_ES_initialWF.o \ +m_FiniteElectricField.o \ +m_Positron_Wave_Functions.o \ +m_ES_ExactExchange.o \ +m_ES_occup.o m_ES_WF_by_SDorCG.o m_ES_WF_by_Davidson.o \ +m_ES_RSB.o \ +m_ES_WF_by_ModifiedDavidson.o m_ES_WF_by_RMM.o \ +m_ES_LHXC.o m_ES_Intgr_VlhxcQlm.o m_ES_IO.o \ +m_OP_rotation.o \ +m_ES_dos.o m_Hubbard.o \ +m_epc_potential.o \ +m_vdWDF.o \ +m_Charge_Density.o m_CD_Mag_Moment.o m_Orbital_Population.o m_Dipole.o m_CD_mixing.o \ +m_XC_Potential.o \ +m_PS_opencore.o \ +m_ES_WF_mixing.o \ +string.o m_db.o \ +m_PAW_Tecplot.o m_PAW_ChargeDensity.o m_PAW_Hartree.o \ +m_PAW_XC_Potential.o \ +m_SpinOrbit_RadInt.o \ +m_ES_Mag_Constraint.o \ +m_Total_Energy.o \ +m_OP_Moment.o \ +m_Force.o m_Stress.o m_ES_WF_by_submat.o m_ES_WF_by_MatDiagon.o \ +m_UnitCell.o \ +m_KineticEnergy_Density.o \ +m_KE_mixing.o \ +m_Ldos.o m_ELF.o \ +m_constraints.o \ +m_BerryPhase.o m_BP_Properties.o \ +m_Representation.o m_Raman.o m_Phonon.o \ +m_Screening_FFT.o m_Screening.o m_External_Potential.o \ +m_Wannier.o \ +m_Wannier90.o \ +m_ValenceBand_Spectrum.o m_CoreLevel_Spectrum.o \ +m_Excitation.o \ +m_CLS_dipquad.o m_ES_occup_EPS.o m_Epsilon_ek.o \ +m_LinearResponse_Control.o \ +m_LinearResponse_Qpt.o \ +m_Potential_Mixing.o m_ThomasFermiW_Potential.o \ +m_SpinOrbit_ForceTheorem.o m_Crystal_Field.o \ +m_Fcp.o m_Band_Unfolding.o + +EPS_F_SUBROUTINES = Preparation.o \ +InputData_Analysis.o PseudoPotential_Construction.o + +EPS_F_OTHERSUBS = epsmain.o constraint_main.o Preparation_for_mpi.o \ +Preparation_for_ESM.o \ +scf_routines.o \ +UnitaryTransform_WF.o \ +bottom_Subroutines.o \ +spline.o \ +b_Crystal_Structure.o b_Electronic_Structure.o \ +b_Words.o b_PseudoPotential.o \ +b_PseudoPotential_EXX.o \ +b_Fermi.o b_Kpoints.o \ +b_PlaneWaveBasisSet.o b_Ionic_System.o \ +b_XC_Potential.o b_PAW_XC_Potential.o \ +b_XC_OmegaPBE.o \ +b_XC_metagga.o \ +b_Ldos_f77.o \ +input_interface.o Initialization.o \ +Ewald_and_Structure_Factor.o \ +Initial_Electronic_Structure.o Renewal_of_WaveFunctions.o \ +IterationNumbers_Setting.o \ +ChargeDensity_Construction.o Renewal_of_Potential.o \ +ChargeDensity_Mixing.o \ +Renewal_of_Hubbard_Potential.o Renewal_of_OccMat.o \ +Convergence_Check.o Forces.o \ +WriteDownData_onto_Files.o \ +Initial_MD_Condition.o WriteDownData_onto_Files_ek.o\ +Stress.o Postprocessing.o \ +Finalization_of_mpi.o \ +miscellaneous.o \ +b_BerryPhase.o \ +heap_sort.o real_spherical_harmonics.o \ +Real_space_integ.o crotylm.o \ +screening_correction.o \ +Initialization_Epsilon.o Shift_Kpoint.o \ +Reset_Kpoint.o Preparation_for_Calc_Epsilon.o \ +Transition_moment_Epsilon.o Calc_Epsilon.o \ +Nonlinear_Optics_Epsilon.o WriteDownData_onto_Files_Epsilon.o \ +PseudoPotential_ek_Epsilon.o Dealloc_Radr_and_Wos_Epsilon.o \ +ThomasFermiWeiz.o Renewal_of_ChgCtrlParam.o \ +mpi_dummy.o \ +Uramping.o \ +GaussLeg.o lib_int_deri_add.o vdW.o memsize.o scdft_routines.o lbfgs.o + +ifndef SX_DGEMM +EPS_OBJECTSF77 = \ +b_PseudoPotential_f77.o \ +b_Force_f77.o \ +rmmsubs.o spg+tetra.o \ +decfft_ent.o \ +gncpp_xc_gga_rad.o +else +EPS_OBJECTSF77 = \ +b_PseudoPotential_f77.o \ +b_Force_f77.o \ +rmmsubs.o spg+tetra.o \ +decfft_ent.o \ +gncpp_xc_gga_rad.o \ +dgemm__.o +endif + + +TDLR_LOWER_MODULES = m_Const_Parameters.o m_ErrorMessages.o m_Parallelization.o \ +m_IterationNumbers.o + +TDLR_UPPER_MODULES = \ +m_Control_Parameters.o m_Files.o m_Timing.o \ +m_Crystal_Structure.o m_FFT.o m_Ionic_System.o \ +m_CS_SpaceGroup.o m_CS_Magnetic.o \ +m_Orbital_QuantumNum.o \ +m_Kpoints.o m_PseudoPotential.o m_PlaneWaveBasisSet.o \ +m_Realspace.o \ +m_SpinOrbit_Potential.o m_SpinOrbit_FromFile.o \ +m_ES_NonCollinear.o \ +m_NonLocal_Potential.o m_Electronic_Structure.o \ +m_ES_nonlocal.o m_ES_ortho.o m_ES_wf_extrpl.o m_ES_initialWF.o \ +m_FiniteElectricField.o \ +m_Positron_Wave_Functions.o \ +m_ES_ExactExchange.o \ +m_ES_occup.o m_ES_WF_by_SDorCG.o m_ES_WF_by_Davidson.o \ +m_ES_RSB.o \ +m_ES_WF_by_ModifiedDavidson.o m_ES_WF_by_RMM.o \ +m_ES_LHXC.o m_ES_Intgr_VlhxcQlm.o m_ES_IO.o \ +m_Representation.o m_OP_rotation.o \ +m_ES_dos.o m_Hubbard.o \ +m_epc_potential.o \ +m_vdWDF.o \ +m_Charge_Density.o m_CD_Mag_Moment.o m_Orbital_Population.o m_Dipole.o m_CD_mixing.o \ +m_XC_Potential.o \ +m_PS_opencore.o \ +m_ES_WF_mixing.o \ +string.o m_db.o \ +m_PAW_Tecplot.o m_PAW_ChargeDensity.o m_PAW_Hartree.o \ +m_PAW_XC_Potential.o \ +m_SpinOrbit_RadInt.o \ +m_ES_Mag_Constraint.o \ +m_Total_Energy.o \ +m_OP_Moment.o \ +m_Force.o m_Stress.o m_ES_WF_by_submat.o m_ES_WF_by_MatDiagon.o \ +m_UnitCell.o \ +m_KineticEnergy_Density.o \ +m_KE_mixing.o \ +m_Ldos.o m_ELF.o \ +m_constraints.o \ +m_BerryPhase.o m_BP_Properties.o \ +m_Raman.o m_Phonon.o \ +m_Wannier.o \ +m_Wannier90.o \ +m_Screening_FFT.o m_Screening.o m_External_Potential.o \ +m_Excitation.o \ +m_CLS_dipquad.o m_ES_occup_EPS.o \ +m_LinearResponse_Control.o \ +m_ValenceBand_Spectrum.o m_CoreLevel_Spectrum.o \ +m_Band_Unfolding.o \ +m_LinearResponse_Qpt.o \ +m_LinearResponse_Tools.o m_LinearResponse_Density.o \ +m_LinearResponse_NonInt.o m_LinearResponse_ALDA.o \ +m_LinearResponse_Kernel.o m_LinearResponse_BS.o \ +m_LinearResponse_Spectrum.o \ +m_Potential_Mixing.o m_ThomasFermiW_Potential.o \ +m_SpinOrbit_ForceTheorem.o m_Crystal_Field.o \ + +TDLR_F_SUBROUTINES = Preparation.o \ +InputData_Analysis.o PseudoPotential_Construction.o \ + +TDLR_F_OTHERSUBS = tdlrmain.o constraint_main.o Preparation_for_mpi.o \ +Preparation_for_ESM.o \ +scf_routines.o \ +UnitaryTransform_WF.o \ +bottom_Subroutines.o \ +spline.o \ +b_Crystal_Structure.o b_Electronic_Structure.o \ +b_Words.o b_PseudoPotential.o \ +b_PseudoPotential_EXX.o \ +b_Fermi.o b_Kpoints.o \ +b_PlaneWaveBasisSet.o b_Ionic_System.o \ +b_XC_Potential.o b_PAW_XC_Potential.o \ +b_XC_OmegaPBE.o \ +b_XC_metagga.o \ +b_Ldos_f77.o \ +input_interface.o Initialization.o \ +Ewald_and_Structure_Factor.o \ +Initial_Electronic_Structure.o Renewal_of_WaveFunctions.o \ +IterationNumbers_Setting.o \ +ChargeDensity_Construction.o Renewal_of_Potential.o \ +ChargeDensity_Mixing.o \ +Renewal_of_Hubbard_Potential.o Renewal_of_OccMat.o \ +Convergence_Check.o Forces.o \ +WriteDownData_onto_Files.o \ +Initial_MD_Condition.o WriteDownData_onto_Files_ek.o \ +Stress.o Postprocessing.o \ +Finalization_of_mpi.o \ +miscellaneous.o \ +b_BerryPhase.o \ +heap_sort.o real_spherical_harmonics.o \ +Real_space_integ.o crotylm.o \ +screening_correction.o \ +PseudoPotential_ek_Epsilon.o \ +mpi_dummy.o \ +GaussLeg.o lib_int_deri_add.o \ +ThomasFermiWeiz.o Renewal_of_ChgCtrlParam.o \ +b_LinearResponse_Kernel.o b_LinearResponse_exc.o \ +Uramping.o \ +LinearResponse_Proc.o LinearResponse_Spec.o vdW.o scdft_routines.o memsize.o lbfgs.o + +ifndef SX_DGEMM +TDLR_OBJECTSF77 = \ +b_PseudoPotential_f77.o \ +b_Force_f77.o \ +rmmsubs.o spg+tetra.o \ +decfft_ent.o \ +gncpp_xc_gga_rad.o +else +TDLR_OBJECTSF77 = \ +b_PseudoPotential_f77.o \ +b_Force_f77.o \ +rmmsubs.o spg+tetra.o \ +decfft_ent.o \ +gncpp_xc_gga_rad.o \ +dgemm__.o +endif + + +PHASE_OBJECTS = $(FFTOBJECT) $(PHASE_LOWER_MODULES) $(PHASE_UPPER_MODULES) $(PHASE_F_SUBROUTINES) $(PHASE_F_OTHERSUBS) $(PHASE_OBJECTSF77) $(OBJ_INPUTPARSE) + +EKCAL_OBJECTS = $(FFTOBJECT) $(EKCAL_LOWER_MODULES) $(EKCAL_UPPER_MODULES) $(EKCAL_F_SUBROUTINES) $(EKCAL_F_OTHERSUBS) $(EKCAL_OBJECTSF77) $(OBJ_INPUTPARSE) + +EPS_OBJECTS = $(FFTOBJECT) $(EPS_LOWER_MODULES) $(EPS_UPPER_MODULES) $(EPS_F_SUBROUTINES) $(EPS_F_OTHERSUBS) $(EPS_OBJECTSF77) $(OBJ_INPUTPARSE) + +TDLR_OBJECTS = $(FFTOBJECT) $(TDLR_LOWER_MODULES) $(TDLR_UPPER_MODULES) $(TDLR_F_SUBROUTINES) $(TDLR_F_OTHERSUBS) $(TDLR_OBJECTSF77) $(OBJ_INPUTPARSE) + +all : phase ekcal epsmain tdlrmain + +ifdef ESM +phase : $(ESM_LIB) $(LAPACK) $(PHASE_OBJECTS) $(NLOBJ) + $(LINK) $(PHASE_OBJECTS) $(NLOBJ) $(LFLAGS) $(LIBS) -o $@ +else +phase : $(LAPACK) $(PHASE_OBJECTS) $(NLOBJ) + $(LINK) $(PHASE_OBJECTS) $(NLOBJ) $(LFLAGS) $(LIBS) -o $@ +endif + +ekcal : $(LAPACK) $(EKCAL_OBJECTS) $(NLOBJ) + $(LINK) $(EKCAL_OBJECTS) $(NLOBJ) $(LFLAGS) $(LIBS) -o $@ + +epsmain : $(LAPACK) $(EPS_OBJECTS) $(NLOBJ) + $(LINK) $(EPS_OBJECTS) $(NLOBJ) $(LFLAGS) $(LIBS) -o $@ + +tdlrmain : $(LAPACK) $(TDLR_OBJECTS) $(NLOBJ) + $(LINK) $(TDLR_OBJECTS) $(NLOBJ) $(LFLAGS) $(LIBS) -o $@ + +ifdef NO_MPI +libesm.a: + cd EsmPack; make INCLUDE="$(INCLUDE)" FORTRAN="$(F90)" LIBFLAG="$(LIBS)" MPIFLAG="" AR="$(AR)" +else +libesm.a: + cd EsmPack; make INCLUDE="$(INCLUDE)" FORTRAN="$(F90) -fallow-argument-mismatch" LIBFLAG="$(LIBS)" MPIFLAG="-D__MPI__" AR="$(AR)" +endif + +liblapack.a: + cd LAPACK; make F77="$(F90)" F77FLAGS="$(F77FLAGS)" AR="$(AR)" + +libblas.a: + cd BLAS; make F77="$(F90)" F77FLAGS="$(F77FLAGS)" AR="$(AR)" + +$(OBJ_INPUTPARSE):$(@:.o=.c) $(@:.o=.h) + $(CC) -c $(CFLAGS) $(@:.o=.c) + +.f.o: + $(F90) -c $(F77FLAGS) $*.f + +.f90.o: + $(F90) -c $(F90FLAGS) $*.f90 + +.F.o: + $(F90) -c $(F77FLAGS) $(CPPFLAGS) $*.F + +.F90.o: + $(F90) -c $(F90FLAGS) $(CPPFLAGS) $*.F90 + +clean: + \rm -f *.o *.mod *.a *.lib *.L *.list phase ekcal epsmain tdlrmain + \cd LAPACK; make clean + \cd BLAS; make clean + \cd EsmPack; make clean + +install: phase ekcal epsmain tdlrmain + \mv -f phase ../bin/ + \mv -f ekcal ../bin/ + \mv -f epsmain ../bin/ + \mv -f tdlrmain ../bin/ diff --git a/src_phase/Preparation.F90 b/src_phase/Preparation.F90 index ac293bd..54910f4 100644 --- a/src_phase/Preparation.F90 +++ b/src_phase/Preparation.F90 @@ -391,7 +391,7 @@ subroutine Preparation(imode) call m_CS_alloc_op_tau_tl(nfout) if(symmetry_method == AUTOMATIC) then call m_CS_SG_auto_gnrt_sym_op(paramset,nfout) ! paramset == .false. - call m_IS_symmetrize_atom_pos(nfout) ! -> cps,pos + if(.not. m_Stress_in_correction()) call m_IS_symmetrize_atom_pos(nfout) ! -> cps,pos else __TIMER_FJ_START(31) call m_CS_gnrt_symmetry_operations(paramset,nfout) ! paramset == .false. diff --git a/src_phase/input_parse.h b/src_phase/input_parse.h index 3ccdeeb..a7c921c 100644 --- a/src_phase/input_parse.h +++ b/src_phase/input_parse.h @@ -26,6 +26,7 @@ last modification: 2002.12.26 by K. Mae #include <malloc.h> #else #include <malloc/malloc.h> +#include <ctype.h> #endif /*#include <math.h>*/ #include <string.h> diff --git a/src_phase/m_CD_mixing.F90 b/src_phase/m_CD_mixing.F90 index 3e120b3..7eaba99 100644 --- a/src_phase/m_CD_mixing.F90 +++ b/src_phase/m_CD_mixing.F90 @@ -4009,11 +4009,17 @@ contains integer :: i, nx if(hownew == ANEW) then m = iter - ncrspd(:) = (/(i,i=1,m)/) +! ncrspd(:) = (/(i,i=1,m)/) + do i=1,m + ncrspd(i) = i + enddo else ! hownew == RENEW if(iter <= n) then m = iter - ncrspd(:) = (/(i,i=1,m)/) +! ncrspd(:) = (/(i,i=1,m)/) + do i=1,m + ncrspd(i) = i + enddo else m = n nx = ncrspd(1) @@ -5870,11 +5876,17 @@ contains integer :: i, nx if(hownew == ANEW) then m = iter - ncrspd(:) = (/(i,i=1,m)/) +! ncrspd(:) = (/(i,i=1,m)/) + do i=1,m + ncrspd(i) = i + enddo else ! hownew == RENEW if(iter <= n) then m = iter - ncrspd(:) = (/(i,i=1,m)/) +! ncrspd(:) = (/(i,i=1,m)/) + do i=1,m + ncrspd(i) = i + enddo else m = n nx = ncrspd(1) diff --git a/src_phase/m_Charge_Density.F90 b/src_phase/m_Charge_Density.F90 index 368d389..7140858 100644 --- a/src_phase/m_Charge_Density.F90 +++ b/src_phase/m_Charge_Density.F90 @@ -4392,10 +4392,11 @@ contains __TIMER_IODO_STOP(1451) end if end if - +#ifdef FFTW3 if(prv)then call m_CD_gen_chgq_from_prev_chgq(nfout) endif +#endif if(ipri >= 3) write(nfout,*) ' !D Reading chgq_g finished' total_charge = 0.d0 @@ -8577,6 +8578,7 @@ contains chgq_l = chgq_l_prev*(univol_prev/univol) end subroutine m_CD_cp_chgq_prev_to_chgq +#ifdef FFTW3 subroutine m_CD_gen_chgq_from_prev_chgq(nfout) use m_Files, only : m_Files_nfcntn_bin_paw_exists,m_Files_open_nfcntn_bin_paw & , m_Files_close_nfcntn_bin_paw, nfcntn_bin_paw @@ -8793,6 +8795,7 @@ contains deallocate(afft); deallocate(afft_mpi1) end subroutine dealloc_rspace_charge_prev end subroutine m_CD_gen_chgq_from_prev_chgq +#endif subroutine m_CD_dealloc use m_Files, only : nfout diff --git a/src_phase/m_Control_Parameters.F90 b/src_phase/m_Control_Parameters.F90 index 9536b85..706ca45 100644 --- a/src_phase/m_Control_Parameters.F90 +++ b/src_phase/m_Control_Parameters.F90 @@ -11045,9 +11045,10 @@ write(nfout,'(" !** sw_screening_correction = ",i5," alpha = ",f8.3)') sw_screen end function m_CtrlP_way_of_smearing integer function m_CtrlP_waymix_now(iteration_electronic,iteration_ionic & - & ,edeltb_per_atom) + & ,edeltb_per_atom,mixer_changed) integer, intent(in) :: iteration_electronic, iteration_ionic real(kind=DP),intent(in) :: edeltb_per_atom + logical, intent(out), optional :: mixer_changed integer :: ips, ipe, imsd_t, it, ip, i if(tag_solver_of_WF_is_found .and. tag_charge_mixing_is_found) then @@ -11082,6 +11083,9 @@ write(nfout,'(" !** sw_screening_correction = ",i5," alpha = ",f8.3)') sw_screen ! if(ip < 0 .or. ip > n_Charge_Mixing_way) stop ' error at m_CtrlP_waymix_now' if(ip < 0 .or. ip > n_Charge_Mixing_way) call phase_execution_error(INVALID_CHARGE_MIXING) m_CtrlP_waymix_now = charge_mixing(ip)%mixing_way + if(present(mixer_changed)) then + mixer_changed = ip_charge_mixing /= ip + endif ip_charge_mixing = ip else m_CtrlP_waymix_now = waymix diff --git a/src_phase/m_ES_IO.F90 b/src_phase/m_ES_IO.F90 index 54160e5..9f015ff 100644 --- a/src_phase/m_ES_IO.F90 +++ b/src_phase/m_ES_IO.F90 @@ -1723,9 +1723,9 @@ contains if(ipri>=2) write(nfout,*) ' !D ie = ', ie if(mype == 0) read(nf,err=2,end=2) wf_l ! MPI if(mype == 0 .and. map_ek(ie,ik) /= 0) then ! MPI - call mpi_send(wf_l,kg1*kimg,mpi_real,map_ek(ie,ik),1,mpi_comm_group,ierr) ! MPI + call mpi_send(wf_l,kg1*kimg,mpi_real4,map_ek(ie,ik),1,mpi_comm_group,ierr) ! MPI else if(map_ek(ie,ik) == mype .and. map_ek(ie,ik) /= 0) then ! MPI - call mpi_recv(wf_l,kg1*kimg,mpi_real,0,1,mpi_comm_group,istatus,ierr) ! MPI + call mpi_recv(wf_l,kg1*kimg,mpi_real4,0,1,mpi_comm_group,istatus,ierr) ! MPI end if if(map_ek(ie,ik) == mype) then ! MPI do ri = 1, kimg @@ -1786,9 +1786,9 @@ contains end if if(map_ek(ie,ik) == mype) then - call mpi_send(wf_l,kg1*kimg,mpi_real,map_ek(ie,ike2),1,mpi_comm_group,ierr) + call mpi_send(wf_l,kg1*kimg,mpi_real4,map_ek(ie,ike2),1,mpi_comm_group,ierr) else if(map_ek(ie,ike2) == mype) then - call mpi_recv(wf_l,kg1*kimg,mpi_real,map_ek(ie,ik),1,mpi_comm_group,istatus,ierr) + call mpi_recv(wf_l,kg1*kimg,mpi_real4,map_ek(ie,ik),1,mpi_comm_group,istatus,ierr) end if if(map_ek(ie,ike2) == mype) then do ri = 1, kimg @@ -2030,9 +2030,9 @@ contains wf_l(1:nel,ri) = zaj_l(1:nel,map_z(ibt),ik,ri) end do if(map_ek(ibt,ik) /= 0) & ! MPI - & call mpi_send(wf_l,nel*kimg,mpi_real,0,1,mpi_comm_group,ierr) ! MPI + & call mpi_send(wf_l,nel*kimg,mpi_real4,0,1,mpi_comm_group,ierr) ! MPI else if(mype == 0 .and. map_ek(ibt,ik) /= 0) then - call mpi_recv(wf_l,nel*kimg,mpi_real,map_ek(ibt,ik),1,mpi_comm_group,istatus,ierr)!MPI + call mpi_recv(wf_l,nel*kimg,mpi_real4,map_ek(ibt,ik),1,mpi_comm_group,istatus,ierr)!MPI end if if(mype == 0) wf(:,:,ib-ib1+1) = wf_l(:,:) end do @@ -2301,9 +2301,9 @@ contains wf_l(1:kg1,ri) = zaj_l(1:kg1,map_z(ie),ik,ri) end do if(map_ek(ie,ik) /= 0) & ! MPI - & call mpi_send(wf_l,kg1*kimg,mpi_real,0,1,mpi_comm_group,ierr) ! MPI + & call mpi_send(wf_l,kg1*kimg,mpi_real4,0,1,mpi_comm_group,ierr) ! MPI else if(mype == 0 .and. map_ek(ie,ik) /= 0) then - call mpi_recv(wf_l,kg1*kimg,mpi_real,map_ek(ie,ik),1,mpi_comm_group,istatus,ierr)!MPI + call mpi_recv(wf_l,kg1*kimg,mpi_real4,map_ek(ie,ik),1,mpi_comm_group,istatus,ierr)!MPI end if if(mype == 0) write(nf) wf_l ! MPI else if(precision_WFfile==DP) then diff --git a/src_phase/m_Electronic_Structure.F90 b/src_phase/m_Electronic_Structure.F90 index 801c7bd..0c5257b 100644 --- a/src_phase/m_Electronic_Structure.F90 +++ b/src_phase/m_Electronic_Structure.F90 @@ -2889,7 +2889,7 @@ contains if(iprievdff >= 1) & !!$ & write(nfout,'(" >> (",i6,") <eko_old-eko_new>:(",3d13.5,")")') & !!$ & iteration_electronic, evdff(1), evdff(2), evdff(3) - & write(nfout,'(" ENERGY EIGEN VALUE SUMM. FOR",i6," -TH ITER (K=",i6,":",i6 & + & write(nfout,'(" ENERGY EIGEN VALUE SUM. FOR",i6," -TH ITER (K=",i6,":",i6 & & ,") = ",F16.8, " <eko_old-eko_new>:(",3d13.5,")")') & & iteration_electronic,nk_in_the_process, nk_in_the_process+kv3-1 & & , ekosum, evdff(1), evdff(2), evdff(3) @@ -3877,6 +3877,7 @@ contains end if end subroutine map_zaj_to_fft_box +#ifdef FFTW3 subroutine m_ES_gen_zaj_from_prev_zaj(nfout) integer, intent(in) :: nfout integer :: ib,ik,ri @@ -4099,6 +4100,7 @@ contains end subroutine map_zaj_to_fft_box_prev end subroutine m_ES_gen_zaj_from_prev_zaj +#endif subroutine m_ES_alloc_zaj_l_prev(kg1) integer, intent(in) :: kg1 diff --git a/src_phase/m_Epsilon_ek.F90 b/src_phase/m_Epsilon_ek.F90 index 49e24fa..cd5e9dc 100644 --- a/src_phase/m_Epsilon_ek.F90 +++ b/src_phase/m_Epsilon_ek.F90 @@ -2113,7 +2113,9 @@ ppc_data(ntyp) deallocate(vk00xyz); deallocate(vk0xyz) deallocate(vk0_op); deallocate(nopr_k); deallocate(op_k) end if - if(trm2_mode == 0) deallocate(trm2) + if(trm2_mode == 0) then + if ( allocated(trm2)) deallocate(trm2) + end if end if deallocate(e); deallocate(imeps); deallocate(reps) @@ -6511,6 +6513,10 @@ ppc_data(ntyp) ! allocate(n_unfilled(nspin)); n_unfilled = 0 ! allocate(n_half_filled(nspin)); n_half_filled = 0 ! + if(allocated(n_filled)) deallocate(n_filled) + if(allocated(n_unfilled)) deallocate(n_unfilled) + if(allocated(n_half_filled)) deallocate(n_half_filled) + allocate(n_filled(nspin_kt)) ; n_filled = 0 allocate(n_unfilled(nspin_kt)); n_unfilled = 0 allocate(n_half_filled(nspin_kt)); n_half_filled = 0 diff --git a/src_phase/m_Ionic_System.F90 b/src_phase/m_Ionic_System.F90 index 7cfe83c..b56f7f5 100644 --- a/src_phase/m_Ionic_System.F90 +++ b/src_phase/m_Ionic_System.F90 @@ -6066,7 +6066,9 @@ contains read(nfcntn,*) forcmx_constraint_quench end if end if - if(sw_optimize_lattice==ON)then + if(sw_optimize_lattice==ON .or. & + imdalg == PT_CONTROL .or. & + imdalg == P_CONTROL) then call rewind_to_tag0(nfcntn,len(tag_lattice_vector),tag_lattice_vector & &, EOF_reach, tag_is_found, str, len_str) if(tag_is_found)then @@ -6088,12 +6090,16 @@ contains & ,mpi_double_precision,0,mpi_comm_group,ierr) call mpi_bcast(forcmx_constraint_quench,1 & & ,mpi_double_precision,0,mpi_comm_group,ierr) - if(sw_optimize_lattice==ON)then + if(sw_optimize_lattice==ON .or. & + imdalg == PT_CONTROL .or. & + imdalg == P_CONTROL) then call mpi_bcast(altv,9,mpi_double_precision,0,mpi_comm_group,ierr) endif end if - if(sw_optimize_lattice==ON)then + if(sw_optimize_lattice==ON .or. & + imdalg == PT_CONTROL .or. & + imdalg == P_CONTROL) then call m_CS_altv_2_rltv() endif @@ -6945,7 +6951,8 @@ contains & , cpd_l, cpd_old, ipcpd, iop_supercell) end if - if(iprimd >= 1) then +!!$ if(iprimd >= 1) then + if(iprimd > 1) then write(nfout,'(" -- cpd_l, cpd_old (after fd_symmetrize) --")') do i = 1, natm write(nfout,'(i5,6f18.5)') i, cpd_l(i,1:3),cpd_old(i,1:3) diff --git a/src_phase/m_UnitCell.f90 b/src_phase/m_UnitCell.f90 index 6c5abf5..1b0375a 100644 --- a/src_phase/m_UnitCell.f90 +++ b/src_phase/m_UnitCell.f90 @@ -887,6 +887,10 @@ subroutine m_UnitCell_init_md() integer :: i,j real(kind=DP), dimension(3) :: avec,bvec,cvec if(.not.first_call) return + allocate(p_l(natm,3));p_l = 0.d0 + allocate(p_l_lattice(natm,3));p_l_lattice = 0.d0 + allocate(forc_l_lattice(natm,3));forc_l_lattice = 0.d0 + allocate(p_l_metric(natm,3));p_l_metric = 0.d0 metric = matmul(transpose(altv),altv) call m_UnitCell_init_md_per_step() m_thermo = qmass(1) @@ -897,14 +901,10 @@ subroutine m_UnitCell_init_md() target_temperature = tkb(1)/CONST_kB ! gfree = 3*(natm-1) gfree = 3*(natm) - allocate(p_l(natm,3));p_l = 0.d0 do i=1,natm p_l(i,:) = amion(ityp(i)) * cpd_l(i,:) * s_thermo enddo - allocate(p_l_lattice(natm,3));p_l_lattice = 0.d0 call convert_coords(natm,p_l,altv,p_l_lattice) - allocate(forc_l_lattice(natm,3));forc_l_lattice = 0.d0 - allocate(p_l_metric(natm,3));p_l_metric = 0.d0 call convert_coords(natm,p_l_lattice,metric_inv,p_l_metric) do i=1,3 do j=1,3 @@ -975,9 +975,12 @@ function get_potential_baro() result(ret) end function get_potential_baro subroutine m_UnitCell_init_md_per_step() + real(kind=DP), external :: deter3 + univol = sqrt(deter3(metric)) call inver3n(3,metric,metric_inv) call inver3n(3,altv,altv_inv) call build_dudmetric() + H0 = get_H0() end subroutine m_UnitCell_init_md_per_step function tensor_rank2_equals(t1,t2) result(ret) @@ -1047,6 +1050,28 @@ function m_UC_get_hamiltonian() result(res) res = res-H0 end function m_UC_get_hamiltonian +function get_H0() result (res) + real(kind=DP) :: res + real(kind=DP) :: k_atm,k_baro,k_thermo,pot_baro,pot_thermo + k_atm = get_kinetic_atom(natm,p_l_lattice,metric_inv) + k_baro = get_kinetic_baro(matmul(metric,p_baro)) + pot_baro = get_potential_baro() + if(control_pressure == OFF) then + k_baro = 0.d0 + pot_baro = 0.d0 + endif + if(imdalg == PT_CONTROL .and. t_ctrl_method /= VELOCITY_SCALING)then + k_thermo = get_kinetic_thermo(p_thermo) + pot_thermo = gfree * target_temperature * CONST_kB * log(s_thermo) + else if (imdalg == P_CONTROL .or. (imdalg == PT_CONTROL .and. t_ctrl_method == VELOCITY_SCALING)) then + k_thermo = 0.d0 + pot_thermo = 0.d0 + endif +! res = s_thermo * (k_atm + etotal + k_thermo + pot_thermo + k_baro + pot_baro) + res = k_atm + etotal + k_thermo + pot_thermo + k_baro + pot_baro + return +end function get_H0 + function m_UC_get_curr_pressure() result(res) real(kind=DP) :: curr_pressure, res integer :: i @@ -1071,6 +1096,9 @@ subroutine m_UC_rd_md_cntn_data(nfcntn) logical :: tag_is_found, EOF_reach integer, parameter :: len_str = 132 character(len=len_str) :: str + + call m_UnitCell_init_md() + if(mype==0)then call rewind_to_tag0(nfcntn,len(tag_pressure_control), & & tag_pressure_control, & @@ -1119,7 +1147,9 @@ subroutine m_UC_rd_md_cntn_data(nfcntn) call altv_2_rltv(altv,rltv,univol,rvol) ! in b_CS call change_of_coordinate_system(altv,pos,natm,natm,cps) !-(b_I.S.) pos -> cps call primitive2bravais(nfout,p2bmat,altv(:,1),altv(:,2),altv(:,3),a,b,c,ca,cb,cc,il) ! in b_CS - if(iteration_unit_cell>1) H0_has_been_set = .true. + if(iteration_unit_cell>1) then + H0_has_been_set = .true. + endif endif endif diff --git a/src_phase_3d/ChargeDensity_Mixing.F90 b/src_phase_3d/ChargeDensity_Mixing.F90 index c8e1afd..a8624c9 100644 --- a/src_phase_3d/ChargeDensity_Mixing.F90 +++ b/src_phase_3d/ChargeDensity_Mixing.F90 @@ -83,7 +83,7 @@ subroutine ChargeDensity_Mixing use m_Control_Parameters, only : sw_calc_ekin_density, ekin_density_type, & & sw_mix_charge_with_ekindens use m_CD_mixing, only : m_CD_mix_broyden2_intg, m_CD_mix_pulay_intg, & - & m_CD_simple_mixing_intg + & m_CD_simple_mixing_intg, m_CD_force_dealloc use m_KE_mixing, only : m_KE_simple_mixing, m_KE_mix_broyden2, m_KE_mix_pulay, & & m_KE_prepare_precon, & & m_KE_simple_mixing_00, m_KE_kerker_mixing, & @@ -96,7 +96,7 @@ subroutine ChargeDensity_Mixing real(kind=DP) :: edeltb_per_atom integer,save :: waymix_at_CDmix_previous = 0 integer :: waymix_at_CDmix - logical :: ini = .false. + logical :: ini = .false., mixer_changed real(kind=DP) :: rmxt_tot, rmxt_hard, rmxt_occmat #ifdef __TIMER_SUB__ call timer_sta(1101) @@ -107,7 +107,8 @@ subroutine ChargeDensity_Mixing edeltb_per_atom = m_TE_what_is_edeltb_now()/natm waymix_at_CDmix = m_CtrlP_waymix_now(iteration_electronic, iteration_ionic & - & , edeltb_per_atom) + & , edeltb_per_atom,mixer_changed) + if(mixer_changed) call m_CD_force_dealloc() if(tag_solver_of_WF_is_found .and. tag_charge_mixing_is_found) then if(waymix_at_CDmix_previous /= waymix_at_CDmix) call m_Iter_cmix_reset() end if diff --git a/src_phase_3d/Initial_Electronic_Structure.F90 b/src_phase_3d/Initial_Electronic_Structure.F90 index ba221ef..d42fa1d 100644 --- a/src_phase_3d/Initial_Electronic_Structure.F90 +++ b/src_phase_3d/Initial_Electronic_Structure.F90 @@ -104,8 +104,7 @@ subroutine Initial_Electronic_Structure use m_Crystal_Structure, only : sw_fix_total_spin, sw_magnetic_constraint, univol ! <-- use m_XC_Potential, only : vxc_l, exc - use m_Parallelization, only : m_Parallel_dealloc_mpi_elec, m_Parallel_init_mpi_elec & - & , ista_kngp,iend_kngp,nrank_e,mype + use m_Parallelization, only : m_Parallel_dealloc_mpi_elec, ista_kngp,iend_kngp,nrank_e,mype use m_Orbital_Population,only : m_OP_rd_occ_mat, m_OP_mix_om, m_OP_occ_mat_init, m_OP_cp_ommix_to_omold use m_FiniteElectricField,only : m_FEF_Constract_of_ftq use m_Parallelization, only : m_Parallel_init_mpi_elec_3D, np_g1k_prev @@ -266,7 +265,10 @@ subroutine Initial_Electronic_Structure call m_PP_gfqwei_3D(nfout) ! -> modnrm, fqwei, nlmta1, nlmta2 call m_CtrlP_set_init_status(.false.) call UpdateUeff() - if(sw_hybrid_functional == ON) call EXX() + if(sw_hybrid_functional == ON) then + call m_ES_EXX_init() + call EXX() + endif return endif diff --git a/src_phase_3d/Initialization.F90 b/src_phase_3d/Initialization.F90 index d3f8417..dac25fa 100644 --- a/src_phase_3d/Initialization.F90 +++ b/src_phase_3d/Initialization.F90 @@ -174,7 +174,7 @@ contains subroutine aavers include 'version.h' ! svn_revision character(len=72) :: vers, system, codename - write(vers,'("phase/0 2020.01 Revision:",i5, " --- 3D_Parallel --")') svn_revision + write(vers,'("phase/0 2020.01.01 Revision:",i5, " --- 3D_Parallel --")') svn_revision codename = 'phaseUnif' system = '' diff --git a/src_phase_3d/Makefile.Fugaku b/src_phase_3d/Makefile.Fugaku new file mode 100644 index 0000000..7b406a9 --- /dev/null +++ b/src_phase_3d/Makefile.Fugaku @@ -0,0 +1,578 @@ +.SUFFIXES: +.SUFFIXES: .o .F .f .F90 .f90 .c .mod + +# Platform : GNU Linux (EM64T/AMD64) +# Prog. model : MPI parallel +# Compiler : Intel Fortran compiler +# BLAS/LAPACK : Intel Math Kernel Library (MKL) +# FFT : Intel MKL with FFTW3 interface +########################################################################### +###<< PLEASE CHANGE THE VARIABLES BELOW ACCORDING TO YOUR ENVIRONMENT >>### +########################################################################### +F90 = mpifrtpx +CC = fccpx +CPP = +AR = ar qv +LINK = mpifrtpx -Kparallel +F90FLAGS = -O3 -Kparallel,ocl,preex,array_private,auto,simd=2,openmp -c -V -Qa,d,i,p,t,x -Koptmsg=2 -C 0 +F77FLAGS = -Kparallel,ocl,preex,array_private,auto,simd=2,openmp -c -V -Qa,d,i,p,t,x -Koptmsg=2 -C 0 +CFLAGS = -Kfast,parallel,ocl,preex,array_private,auto,simd=2,openmp -DINTEL + +ESM = yes +ifdef ESM +CPPESM=-DENABLE_ESM_PACK +LESM=-lesm +else +CPPESM= +endif + +CPPFLAGS = -DLinux -DFFTW3 -D_MPIFFT_ -D_USE_DATE_AND_TIME_ -D_POT_SMOOTHING_ -DTRANSPOSE -DGGA_ATOMIC_WITH_NEW_GNCPP -DREMOVE_PC_FROM_FORCE -D_USE_LAPACK_ -D_USE_SCALAPACK_ -D_POSITRON_ -D_FAST_WAY_ -DUSE_NONBLK_COMM -D_HEAP_SORT_ -DFFT_ALLTOALL -DPOST3D -DUSE_NONBLK_COMM -DRMM_NONLOCAL_NEW ${CPPESM} +LFLAGS = +F90FLAGS_FIXED = -extend_source -Fl -fixed +F90FLAGS_FREE = -extend_source -Fl + +FFTW3_DIR=/vol0004/apps/oss/spack-v0.16/opt/spack/linux-rhel8-a64fx/fj-4.3.1/fujitsu-fftw-master-2irjm2a56j7tahcmggbnde2uxf6gjdml +#FFTW3_DIR=/vol0001/apps/oss/spack-latest/opt/spack/linux-rhel8-a64fx/fj-4.2.0/fftw-3.3.8-3ojdo6i4rd3asyyonmrgxthvprum2gpa/ +INCLUDE=-I${FFTW3_DIR}/include +#LIBS = -L./ ${LESM} -lfftw3 -lfftw3_omp -lfftw3_mpi -SSL2BLAMP $(KLINK) -SCALAPACK -Kopenmp +#LIBS = -L./ ${LESM} -lfftw3 -lfftw3_omp -lfftw3_mpi -SSL2BLAMP $(KLINK) -SCALAPACK -Kopenmp +LIBS = -L./ ${LESM} -lfftw3 -lfftw3_omp -lfftw3_mpi -SSL2BLAMP -lscalapack -Kopenmp +#LIBS = -L./ ${LESM} libfftw3_omp.a libfftw3.a -SSL2BLAMP $(KLINK) -SCALAPACK -Kopenmp +########################################################################### +###<< PLEASE CHANGE THE VARIABLES ABOVE ACCORDING TO YOUR ENVIRONMENT >>### +########################################################################### + +ifdef ESM +ESM_LIB = libesm.a +else +ESM_LIB = +endif +LAPACK = +FFTOBJECT = mpifft.o +OBJ_INPUTPARSE = input_parse.o + +PHASE_LOWER_MODULES = m_Const_Parameters.o m_ErrorMessages.o m_Parallelization.o \ +m_IterationNumbers.o #z_tool_timer.o + +PHASE_UPPER_MODULES = \ +m_Control_Parameters.o m_Files.o m_Timing.o \ +m_Crystal_Structure.o m_FFT.o m_Ionic_System.o \ +m_CS_SpaceGroup.o m_CS_Magnetic.o \ +m_Orbital_QuantumNum.o \ +m_Kpoints.o m_PseudoPotential.o m_PlaneWaveBasisSet.o \ +m_Realspace.o \ +m_SpinOrbit_Potential.o m_SpinOrbit_FromFile.o \ +m_ES_NonCollinear.o \ +m_NonLocal_Potential.o m_Electronic_Structure.o \ +m_ES_nonlocal.o m_ES_ortho.o m_ES_wf_extrpl.o m_ES_initialWF.o \ +m_ES_occup.o \ +m_FiniteElectricField.o \ +m_ES_ExactExchange.o \ +m_ES_WF_by_SDorCG.o m_ES_WF_by_Davidson.o \ +m_ES_WF_by_ModifiedDavidson.o m_ES_WF_by_RMM.o \ +m_Charge_Density.o m_CD_Mag_Moment.o m_Orbital_Population.o m_Dipole.o m_CD_mixing.o \ +m_PS_opencore.o \ +m_epc_potential.o \ +m_Positron_Wave_Functions.o \ +m_Screening_FFT.o m_Screening.o m_External_Potential.o \ +m_ES_LHXC.o m_ES_Intgr_VlhxcQlm.o m_ES_IO.o \ +m_Representation.o m_OP_rotation.o \ +m_ES_WF_by_MatDiagon.o m_ES_dos.o m_Hubbard.o \ +m_KineticEnergy_Density.o \ +m_KE_mixing.o \ +m_vdWDF.o \ +m_XC_Potential_2D.o \ +m_Ldos.o m_XC_Potential.o \ +string.o m_db.o \ +m_PAW_Tecplot.o m_PAW_ChargeDensity.o m_PAW_Hartree.o \ +m_PAW_XC_Potential.o \ +m_SpinOrbit_RadInt.o \ +m_OP_Moment.o \ +m_ES_Mag_Constraint.o \ +m_Total_Energy.o \ +m_Force.o m_Stress.o m_ES_WF_by_submat.o \ +m_UnitCell.o \ +m_constraints.o \ +m_ELF.o \ +m_BerryPhase.o m_BP_Properties.o \ +m_Raman.o m_Phonon.o \ +m_Wannier.o m_Replica.o Renewal_of_ChgCtrlParam.o \ +m_LinearResponse_Control.o \ +m_ValenceBand_Spectrum.o m_CoreLevel_Spectrum.o \ +m_CLS_dipquad.o m_ES_occup_EPS.o m_Epsilon_ek.o \ +m_LinearResponse_Qpt.o \ +m_rttddft.o \ +m_Potential_Mixing.o m_ThomasFermiW_Potential.o \ +m_Fcp.o m_Band_Unfolding.o + +PHASE_F_SUBROUTINES = Preparation.o \ +InputData_Analysis.o PseudoPotential_Construction.o \ + +PHASE_F_OTHERSUBS = mdmain.o constraint_main.o meta_dynamics.o NEB.o Preparation_for_mpi.o \ +Preparation_for_ESM.o \ +scf_routines.o \ +bottom_Subroutines.o \ +spline.o \ +b_Crystal_Structure.o b_Electronic_Structure.o \ +b_Words.o b_PseudoPotential.o \ +b_PseudoPotential_EXX.o \ +b_Fermi.o b_Kpoints.o \ +b_PlaneWaveBasisSet.o b_Ionic_System.o \ +b_XC_Potential.o b_PAW_XC_Potential.o \ +b_XC_OmegaPBE.o \ +b_XC_metagga.o \ +b_Ldos_f77.o\ +input_interface.o Initialization.o \ +WriteDownData_onto_Files.o Ewald_and_Structure_Factor.o \ +Initial_Electronic_Structure.o \ +Renewal_of_WaveFunctions.o \ +Renewal_of_pWaveFunctions.o \ +Uramping.o \ +IterationNumbers_Setting.o ChargeDensity_Construction.o \ +ChargeDensity_Mixing.o Renewal_of_Potential.o \ +Renewal_of_Hubbard_Potential.o Renewal_of_OccMat.o \ +Renewal_of_pPotential.o Renewal_of_Hubbard_Parameters.o \ +Convergence_Check.o Forces.o \ +Move_Ions.o Initial_MD_Condition.o \ +Stress.o Postprocessing.o \ +Finalization_of_mpi.o miscellaneous.o\ +b_BerryPhase.o \ +heap_sort.o real_spherical_harmonics.o \ +Real_space_integ.o crotylm.o \ +screening_correction.o \ +Initialization_Epsilon.o Shift_Kpoint.o \ +Reset_Kpoint.o Preparation_for_Calc_Epsilon.o \ +Transition_moment_Epsilon.o Calc_Epsilon.o \ +Nonlinear_Optics_Epsilon.o WriteDownData_onto_Files_Epsilon.o \ +PseudoPotential_ek_Epsilon.o Dealloc_Radr_and_Wos_Epsilon.o \ +mpi_dummy.o WriteDownData_onto_Files_ek.o \ +GaussLeg.o lib_int_deri_add.o \ +rttddft_main.o \ +Potential_Construction.o Potential_Mixing.o ThomasFermiWeiz.o \ +Epsilon_postscf.o vdW.o Epsilon_Paramset.o memsize.o scdft_routines.o \ +lbfgs.o + +# for vc_nl +NLOBJ = vc_nl.o + +ifndef SX_DGEMM +PHASE_OBJECTSF77 = \ +b_PseudoPotential_f77.o \ +b_Force_f77.o \ +rmmsubs.o \ +gncpp_xc_gga_rad.o \ +decfft_ent.o \ +spg+tetra.o +else +PHASE_OBJECTSF77 = \ +b_PseudoPotential_f77.o \ +b_Force_f77.o \ +rmmsubs.o \ +decfft_ent.o \ +gncpp_xc_gga_rad.o \ +spg+tetra.o \ +dgemm__.o +endif + +EKCAL_LOWER_MODULES = m_Const_Parameters.o m_ErrorMessages.o m_Parallelization.o \ +m_IterationNumbers.o + +EKCAL_UPPER_MODULES = \ +m_Control_Parameters.o m_Files.o m_Timing.o \ +m_Crystal_Structure.o m_FFT.o m_Ionic_System.o \ +m_CS_SpaceGroup.o m_CS_Magnetic.o \ +m_Orbital_QuantumNum.o \ +m_Kpoints.o m_PseudoPotential.o m_PlaneWaveBasisSet.o \ +m_Realspace.o \ +m_SpinOrbit_Potential.o m_SpinOrbit_FromFile.o \ +m_ES_NonCollinear.o \ +m_NonLocal_Potential.o m_Electronic_Structure.o \ +m_ES_nonlocal.o m_ES_ortho.o m_ES_wf_extrpl.o m_ES_initialWF.o \ +m_FiniteElectricField.o \ +m_ES_ExactExchange.o \ +m_ES_occup.o m_ES_WF_by_SDorCG.o m_ES_WF_by_Davidson.o \ +m_ES_WF_by_ModifiedDavidson.o m_ES_WF_by_RMM.o \ +m_epc_potential.o \ +m_Positron_Wave_Functions.o \ +m_Charge_Density.o m_CD_Mag_Moment.o m_Orbital_Population.o m_Dipole.o m_CD_mixing.o \ +m_PS_opencore.o \ +m_Screening_FFT.o m_Screening.o m_External_Potential.o \ +m_ES_LHXC.o m_ES_Intgr_VlhxcQlm.o m_ES_IO.o \ +m_Representation.o m_OP_rotation.o \ +m_ES_WF_by_MatDiagon.o m_ES_dos.o m_Hubbard.o \ +m_KineticEnergy_Density.o \ +m_KE_mixing.o \ +m_vdWDF.o \ +m_XC_Potential_2D.o \ +m_Ldos.o m_XC_Potential.o \ +string.o m_db.o \ +m_PAW_Tecplot.o m_PAW_ChargeDensity.o m_PAW_Hartree.o \ +m_PAW_XC_Potential.o \ +m_SpinOrbit_RadInt.o \ +m_ES_Mag_Constraint.o \ +m_constraints.o \ +m_Total_Energy.o \ +m_OP_Moment.o \ +m_Force.o m_Stress.o m_ES_WF_by_submat.o\ +m_UnitCell.o \ +m_ELF.o m_Wannier.o \ +m_BerryPhase.o m_BP_Properties.o \ +m_Raman.o m_Phonon.o \ +m_LinearResponse_Control.o \ +m_LinearResponse_Qpt.o \ +m_Potential_Mixing.o m_ThomasFermiW_Potential.o \ +m_Fcp.o m_CLS_dipquad.o m_Band_Unfolding.o + + +EKCAL_F_SUBROUTINES = Preparation.o \ +InputData_Analysis.o PseudoPotential_Construction.o + +EKCAL_F_OTHERSUBS = ekmain.o constraint_main.o Preparation_for_mpi.o \ +Preparation_for_ESM.o \ +scf_routines.o \ +bottom_Subroutines.o \ +spline.o \ +b_Crystal_Structure.o b_Electronic_Structure.o \ +b_Words.o b_PseudoPotential.o \ +b_PseudoPotential_EXX.o \ +b_Fermi.o b_Kpoints.o \ +b_PlaneWaveBasisSet.o b_Ionic_System.o \ +b_XC_Potential.o b_PAW_XC_Potential.o \ +b_XC_OmegaPBE.o \ +b_XC_metagga.o \ +b_Ldos_f77.o \ +input_interface.o Initialization.o \ +Ewald_and_Structure_Factor.o \ +Initial_Electronic_Structure.o Renewal_of_WaveFunctions.o \ +IterationNumbers_Setting.o \ +ChargeDensity_Construction.o Renewal_of_Potential.o \ +ChargeDensity_Mixing.o \ +Renewal_of_Hubbard_Potential.o Renewal_of_OccMat.o \ +Convergence_Check.o Forces.o \ +WriteDownData_onto_Files.o \ +Initial_MD_Condition.o WriteDownData_onto_Files_ek.o\ +Stress.o Postprocessing.o \ +Finalization_of_mpi.o \ +miscellaneous.o \ +b_BerryPhase.o \ +heap_sort.o real_spherical_harmonics.o \ +Real_space_integ.o crotylm.o \ +screening_correction.o \ +mpi_dummy.o \ +GaussLeg.o lib_int_deri_add.o \ +Uramping.o \ +ThomasFermiWeiz.o Renewal_of_ChgCtrlParam.o vdW.o scdft_routines.o memsize.o lbfgs.o + +ifndef SX_DGEMM +EKCAL_OBJECTSF77 = \ +b_PseudoPotential_f77.o \ +b_Force_f77.o \ +rmmsubs.o spg+tetra.o \ +gncpp_xc_gga_rad.o \ +decfft_ent.o +else +EKCAL_OBJECTSF77 = \ +b_PseudoPotential_f77.o \ +b_Force_f77.o \ +rmmsubs.o spg+tetra.o \ +decfft_ent.o \ +gncpp_xc_gga_rad.o \ +dgemm__.o +endif + + +EPS_LOWER_MODULES = m_Const_Parameters.o m_ErrorMessages.o m_Parallelization.o \ +m_IterationNumbers.o + +EPS_UPPER_MODULES = \ +m_Control_Parameters.o m_Files.o m_Timing.o \ +m_Crystal_Structure.o m_FFT.o m_Ionic_System.o \ +m_CS_SpaceGroup.o m_CS_Magnetic.o \ +m_Orbital_QuantumNum.o \ +m_Kpoints.o m_PseudoPotential.o m_PlaneWaveBasisSet.o \ +m_Realspace.o \ +m_SpinOrbit_Potential.o m_SpinOrbit_FromFile.o \ +m_ES_NonCollinear.o \ +m_NonLocal_Potential.o m_Electronic_Structure.o \ +m_ES_nonlocal.o m_ES_ortho.o m_ES_wf_extrpl.o m_ES_initialWF.o \ +m_FiniteElectricField.o \ +m_Positron_Wave_Functions.o \ +m_ES_ExactExchange.o \ +m_ES_occup.o m_ES_WF_by_SDorCG.o m_ES_WF_by_Davidson.o \ +m_ES_WF_by_ModifiedDavidson.o m_ES_WF_by_RMM.o \ +m_ES_LHXC.o m_ES_Intgr_VlhxcQlm.o m_ES_IO.o \ +m_OP_rotation.o \ +m_ES_dos.o m_Hubbard.o \ +m_epc_potential.o \ +m_vdWDF.o \ +m_Charge_Density.o m_CD_Mag_Moment.o m_Orbital_Population.o m_Dipole.o m_CD_mixing.o \ +m_XC_Potential_2D.o \ +m_XC_Potential.o \ +m_PS_opencore.o \ +string.o m_db.o \ +m_PAW_Tecplot.o m_PAW_ChargeDensity.o m_PAW_Hartree.o \ +m_PAW_XC_Potential.o \ +m_SpinOrbit_RadInt.o \ +m_ES_Mag_Constraint.o \ +m_Total_Energy.o \ +m_OP_Moment.o \ +m_Force.o m_Stress.o m_ES_WF_by_submat.o m_ES_WF_by_MatDiagon.o \ +m_UnitCell.o \ +m_KineticEnergy_Density.o \ +m_KE_mixing.o \ +m_Ldos.o m_ELF.o \ +m_constraints.o \ +m_BerryPhase.o m_BP_Properties.o \ +m_Representation.o m_Raman.o m_Phonon.o \ +m_Screening_FFT.o m_Screening.o m_External_Potential.o \ +m_Wannier.o \ +m_ValenceBand_Spectrum.o m_CoreLevel_Spectrum.o \ +m_CLS_dipquad.o m_ES_occup_EPS.o m_Epsilon_ek.o \ +m_LinearResponse_Control.o \ +m_LinearResponse_Qpt.o \ +m_Potential_Mixing.o m_ThomasFermiW_Potential.o \ +m_Fcp.o m_Band_Unfolding.o + +EPS_F_SUBROUTINES = Preparation.o \ +InputData_Analysis.o PseudoPotential_Construction.o + +EPS_F_OTHERSUBS = epsmain.o constraint_main.o Preparation_for_mpi.o \ +Preparation_for_ESM.o \ +scf_routines.o \ +bottom_Subroutines.o \ +spline.o \ +b_Crystal_Structure.o b_Electronic_Structure.o \ +b_Words.o b_PseudoPotential.o \ +b_PseudoPotential_EXX.o \ +b_Fermi.o b_Kpoints.o \ +b_PlaneWaveBasisSet.o b_Ionic_System.o \ +b_XC_Potential.o b_PAW_XC_Potential.o \ +b_XC_OmegaPBE.o \ +b_XC_metagga.o \ +b_Ldos_f77.o \ +input_interface.o Initialization.o \ +Ewald_and_Structure_Factor.o \ +Initial_Electronic_Structure.o Renewal_of_WaveFunctions.o \ +IterationNumbers_Setting.o \ +ChargeDensity_Construction.o Renewal_of_Potential.o \ +ChargeDensity_Mixing.o \ +Renewal_of_Hubbard_Potential.o Renewal_of_OccMat.o \ +Convergence_Check.o Forces.o \ +WriteDownData_onto_Files.o \ +Initial_MD_Condition.o WriteDownData_onto_Files_ek.o\ +Stress.o Postprocessing.o \ +Finalization_of_mpi.o \ +miscellaneous.o \ +b_BerryPhase.o \ +heap_sort.o real_spherical_harmonics.o \ +Real_space_integ.o crotylm.o \ +screening_correction.o \ +Initialization_Epsilon.o Shift_Kpoint.o \ +Reset_Kpoint.o Preparation_for_Calc_Epsilon.o \ +Transition_moment_Epsilon.o Calc_Epsilon.o \ +Nonlinear_Optics_Epsilon.o WriteDownData_onto_Files_Epsilon.o \ +PseudoPotential_ek_Epsilon.o Dealloc_Radr_and_Wos_Epsilon.o \ +ThomasFermiWeiz.o Renewal_of_ChgCtrlParam.o \ +mpi_dummy.o \ +Uramping.o \ +GaussLeg.o lib_int_deri_add.o vdW.o memsize.o scdft_routines.o lbfgs.o + +ifndef SX_DGEMM +EPS_OBJECTSF77 = \ +b_PseudoPotential_f77.o \ +b_Force_f77.o \ +rmmsubs.o spg+tetra.o \ +decfft_ent.o \ +gncpp_xc_gga_rad.o +else +EPS_OBJECTSF77 = \ +b_PseudoPotential_f77.o \ +b_Force_f77.o \ +rmmsubs.o spg+tetra.o \ +decfft_ent.o \ +gncpp_xc_gga_rad.o \ +dgemm__.o +endif + + +TDLR_LOWER_MODULES = m_Const_Parameters.o m_ErrorMessages.o m_Parallelization.o \ +m_IterationNumbers.o + +TDLR_UPPER_MODULES = \ +m_Control_Parameters.o m_Files.o m_Timing.o \ +m_Crystal_Structure.o m_FFT.o m_Ionic_System.o \ +m_CS_SpaceGroup.o m_CS_Magnetic.o \ +m_Orbital_QuantumNum.o \ +m_Kpoints.o m_PseudoPotential.o m_PlaneWaveBasisSet.o \ +m_Realspace.o \ +m_SpinOrbit_Potential.o m_SpinOrbit_FromFile.o \ +m_ES_NonCollinear.o \ +m_NonLocal_Potential.o m_Electronic_Structure.o \ +m_ES_nonlocal.o m_ES_ortho.o m_ES_wf_extrpl.o m_ES_initialWF.o \ +m_FiniteElectricField.o \ +m_Positron_Wave_Functions.o \ +m_ES_ExactExchange.o \ +m_ES_occup.o m_ES_WF_by_SDorCG.o m_ES_WF_by_Davidson.o \ +m_ES_WF_by_ModifiedDavidson.o m_ES_WF_by_RMM.o \ +m_ES_LHXC.o m_ES_Intgr_VlhxcQlm.o m_ES_IO.o \ +m_Representation.o m_OP_rotation.o \ +m_ES_dos.o m_Hubbard.o \ +m_epc_potential.o \ +m_vdWDF.o \ +m_Charge_Density.o m_CD_Mag_Moment.o m_Orbital_Population.o m_Dipole.o m_CD_mixing.o \ +m_XC_Potential_2D.o \ +m_XC_Potential.o \ +m_PS_opencore.o \ +string.o m_db.o \ +m_PAW_Tecplot.o m_PAW_ChargeDensity.o m_PAW_Hartree.o \ +m_PAW_XC_Potential.o \ +m_SpinOrbit_RadInt.o \ +m_ES_Mag_Constraint.o \ +m_Total_Energy.o \ +m_OP_Moment.o \ +m_Force.o m_Stress.o m_ES_WF_by_submat.o m_ES_WF_by_MatDiagon.o \ +m_UnitCell.o \ +m_KineticEnergy_Density.o \ +m_KE_mixing.o \ +m_Ldos.o m_ELF.o \ +m_constraints.o \ +m_BerryPhase.o m_BP_Properties.o \ +m_Raman.o m_Phonon.o \ +m_Wannier.o \ +m_Screening_FFT.o m_Screening.o m_External_Potential.o \ +m_Excitation.o \ +m_CLS_dipquad.o m_ES_occup_EPS.o \ +m_LinearResponse_Control.o \ +m_Band_Unfolding.o \ +m_LinearResponse_Qpt.o \ +m_LinearResponse_Tools.o m_LinearResponse_Density.o \ +m_LinearResponse_NonInt.o m_LinearResponse_ALDA.o \ +m_LinearResponse_Kernel.o m_LinearResponse_BS.o \ +m_LinearResponse_Spectrum.o \ +m_Potential_Mixing.o m_ThomasFermiW_Potential.o + +TDLR_F_SUBROUTINES = Preparation.o \ +InputData_Analysis.o PseudoPotential_Construction.o \ + +TDLR_F_OTHERSUBS = tdlrmain.o constraint_main.o Preparation_for_mpi.o \ +Preparation_for_ESM.o \ +scf_routines.o \ +bottom_Subroutines.o \ +spline.o \ +b_Crystal_Structure.o b_Electronic_Structure.o \ +b_Words.o b_PseudoPotential.o \ +b_PseudoPotential_EXX.o \ +b_Fermi.o b_Kpoints.o \ +b_PlaneWaveBasisSet.o b_Ionic_System.o \ +b_XC_Potential.o b_PAW_XC_Potential.o \ +b_XC_OmegaPBE.o \ +b_XC_metagga.o \ +b_Ldos_f77.o \ +input_interface.o Initialization.o \ +Ewald_and_Structure_Factor.o \ +Initial_Electronic_Structure.o Renewal_of_WaveFunctions.o \ +IterationNumbers_Setting.o \ +ChargeDensity_Construction.o Renewal_of_Potential.o \ +ChargeDensity_Mixing.o \ +Renewal_of_Hubbard_Potential.o Renewal_of_OccMat.o \ +Convergence_Check.o Forces.o \ +WriteDownData_onto_Files.o \ +Initial_MD_Condition.o WriteDownData_onto_Files_ek.o \ +Stress.o Postprocessing.o \ +Finalization_of_mpi.o \ +miscellaneous.o \ +b_BerryPhase.o \ +heap_sort.o real_spherical_harmonics.o \ +Real_space_integ.o crotylm.o \ +screening_correction.o \ +PseudoPotential_ek_Epsilon.o \ +mpi_dummy.o \ +GaussLeg.o lib_int_deri_add.o \ +ThomasFermiWeiz.o Renewal_of_ChgCtrlParam.o \ +b_LinearResponse_Kernel.o b_LinearResponse_exc.o \ +Uramping.o \ +LinearResponse_Proc.o LinearResponse_Spec.o vdW.o scdft_routines.o memsize.o lbfgs.o + +ifndef SX_DGEMM +TDLR_OBJECTSF77 = \ +b_PseudoPotential_f77.o \ +b_Force_f77.o \ +rmmsubs.o spg+tetra.o \ +decfft_ent.o \ +gncpp_xc_gga_rad.o +else +TDLR_OBJECTSF77 = \ +b_PseudoPotential_f77.o \ +b_Force_f77.o \ +rmmsubs.o spg+tetra.o \ +decfft_ent.o \ +gncpp_xc_gga_rad.o \ +dgemm__.o +endif + + +PHASE_OBJECTS = $(FFTOBJECT) $(PHASE_LOWER_MODULES) $(PHASE_UPPER_MODULES) $(PHASE_F_SUBROUTINES) $(PHASE_F_OTHERSUBS) $(PHASE_OBJECTSF77) $(OBJ_INPUTPARSE) + +EKCAL_OBJECTS = $(FFTOBJECT) $(EKCAL_LOWER_MODULES) $(EKCAL_UPPER_MODULES) $(EKCAL_F_SUBROUTINES) $(EKCAL_F_OTHERSUBS) $(EKCAL_OBJECTSF77) $(OBJ_INPUTPARSE) + +EPS_OBJECTS = $(FFTOBJECT) $(EPS_LOWER_MODULES) $(EPS_UPPER_MODULES) $(EPS_F_SUBROUTINES) $(EPS_F_OTHERSUBS) $(EPS_OBJECTSF77) $(OBJ_INPUTPARSE) + +TDLR_OBJECTS = $(FFTOBJECT) $(TDLR_LOWER_MODULES) $(TDLR_UPPER_MODULES) $(TDLR_F_SUBROUTINES) $(TDLR_F_OTHERSUBS) $(TDLR_OBJECTSF77) $(OBJ_INPUTPARSE) + +all : phase epsmain + +ifdef ESM +phase : $(ESM_LIB) $(LAPACK) $(PHASE_OBJECTS) $(NLOBJ) + $(LINK) $(PHASE_OBJECTS) $(NLOBJ) $(LFLAGS) $(LIBS) -o $@ +else +phase : $(LAPACK) $(PHASE_OBJECTS) $(NLOBJ) + $(LINK) $(PHASE_OBJECTS) $(NLOBJ) $(LFLAGS) $(LIBS) -o $@ +endif + +ekcal : $(LAPACK) $(EKCAL_OBJECTS) $(NLOBJ) + $(LINK) $(EKCAL_OBJECTS) $(NLOBJ) $(LFLAGS) $(LIBS) -o $@ + +epsmain : $(LAPACK) $(EPS_OBJECTS) $(NLOBJ) + $(LINK) $(EPS_OBJECTS) $(NLOBJ) $(LFLAGS) $(LIBS) -o $@ + +tdlrmain : $(LAPACK) $(TDLR_OBJECTS) $(NLOBJ) + $(LINK) $(TDLR_OBJECTS) $(NLOBJ) $(LFLAGS) $(LIBS) -o $@ + +ifdef NO_MPI +libesm.a: + cd EsmPack; make INCLUDE="$(INCLUDE)" FORTRAN="$(F90)" LIBFLAG="$(LIBS)" MPIFLAG="" AR="$(AR)" +else +libesm.a: + cd EsmPack; make INCLUDE="$(INCLUDE)" FORTRAN="$(F90)" LIBFLAG="$(LIBS)" MPIFLAG="-D__MPI__" AR="$(AR)" +endif + +liblapack.a: + cd LAPACK; make F77="$(F90)" F77FLAGS="$(F77FLAGS)" AR="$(AR)" + +libblas.a: + cd BLAS; make F77="$(F90)" F77FLAGS="$(F77FLAGS)" AR="$(AR)" + +$(OBJ_INPUTPARSE):$(@:.o=.c) $(@:.o=.h) + $(CC) -c $(CFLAGS) $(@:.o=.c) + +.f.o: + $(F90) -c $(F77FLAGS) $*.f + +.f90.o: + $(F90) -c $(F90FLAGS) $*.f90 + +.F.o: + $(F90) -c $(F77FLAGS) $(CPPFLAGS) $*.F + +.F90.o: + $(F90) -c $(F90FLAGS) $(CPPFLAGS) $*.F90 + +clean: + \rm -f *.o *.mod *.a *.lib *.L *.list phase epsmain + \cd LAPACK; make clean + \cd BLAS; make clean + \cd EsmPack; make clean + +install: phase epsmain + \mv -f phase ../bin/phase.3d + \mv -f epsmain ../bin/epsmain.3d diff --git a/src_phase_3d/Preparation.F90 b/src_phase_3d/Preparation.F90 index 10ef0a5..0b24c8c 100644 --- a/src_phase_3d/Preparation.F90 +++ b/src_phase_3d/Preparation.F90 @@ -384,7 +384,7 @@ subroutine Preparation(imode) call m_CS_alloc_op_tau_tl(nfout) if(symmetry_method == AUTOMATIC) then call m_CS_SG_auto_gnrt_sym_op(paramset,nfout) ! paramset == .false. - call m_IS_symmetrize_atom_pos(nfout) ! -> cps,pos + if(.not. m_Stress_in_correction()) call m_IS_symmetrize_atom_pos(nfout) ! -> cps,pos else __TIMER_FJ_START(31) call m_CS_gnrt_symmetry_operations(paramset,nfout) ! paramset == .false. diff --git a/src_phase_3d/m_CD_mixing.F90 b/src_phase_3d/m_CD_mixing.F90 index e8cb863..bddc7dd 100644 --- a/src_phase_3d/m_CD_mixing.F90 +++ b/src_phase_3d/m_CD_mixing.F90 @@ -3862,11 +3862,17 @@ contains integer :: i, nx if(hownew == ANEW) then m = iter - ncrspd(:) = (/(i,i=1,m)/) +! ncrspd(:) = (/(i,i=1,m)/) + do i=1,m + ncrspd(i) = i + enddo else ! hownew == RENEW if(iter <= n) then m = iter - ncrspd(:) = (/(i,i=1,m)/) +! ncrspd(:) = (/(i,i=1,m)/) + do i=1,m + ncrspd(i) = i + enddo else m = n nx = ncrspd(1) @@ -5720,11 +5726,17 @@ contains integer :: i, nx if(hownew == ANEW) then m = iter - ncrspd(:) = (/(i,i=1,m)/) +! ncrspd(:) = (/(i,i=1,m)/) + do i=1,m + ncrspd(i) = i + enddo else ! hownew == RENEW if(iter <= n) then m = iter - ncrspd(:) = (/(i,i=1,m)/) +! ncrspd(:) = (/(i,i=1,m)/) + do i=1,m + ncrspd(i) = i + enddo else m = n nx = ncrspd(1) diff --git a/src_phase_3d/m_Control_Parameters.F90 b/src_phase_3d/m_Control_Parameters.F90 index 4ac710f..352507a 100644 --- a/src_phase_3d/m_Control_Parameters.F90 +++ b/src_phase_3d/m_Control_Parameters.F90 @@ -11215,9 +11215,10 @@ write(nfout,'(" !** sw_screening_correction = ",i5," alpha = ",f8.3)') sw_screen end function m_CtrlP_way_of_smearing integer function m_CtrlP_waymix_now(iteration_electronic,iteration_ionic & - & ,edeltb_per_atom) + & ,edeltb_per_atom,mixer_changed) integer, intent(in) :: iteration_electronic, iteration_ionic real(kind=DP),intent(in) :: edeltb_per_atom + logical, intent(out), optional :: mixer_changed integer :: ips, ipe, imsd_t, it, ip, i if(tag_solver_of_WF_is_found .and. tag_charge_mixing_is_found) then @@ -11252,6 +11253,9 @@ write(nfout,'(" !** sw_screening_correction = ",i5," alpha = ",f8.3)') sw_screen ! if(ip < 0 .or. ip > n_Charge_Mixing_way) stop ' error at m_CtrlP_waymix_now' if(ip < 0 .or. ip > n_Charge_Mixing_way) call phase_execution_error(INVALID_CHARGE_MIXING) m_CtrlP_waymix_now = charge_mixing(ip)%mixing_way + if(present(mixer_changed)) then + mixer_changed = ip_charge_mixing /= ip + endif ip_charge_mixing = ip else m_CtrlP_waymix_now = waymix diff --git a/src_phase_3d/m_ES_IO.F90 b/src_phase_3d/m_ES_IO.F90 index 4dc19e4..6e09912 100644 --- a/src_phase_3d/m_ES_IO.F90 +++ b/src_phase_3d/m_ES_IO.F90 @@ -55,6 +55,7 @@ #endif !#define _DEBUG_ESIO_ +#define _USE_ALLREDUCE_IN_WD_WFS_ ! module m_ES_IO @@ -78,7 +79,8 @@ module m_ES_IO & , mype,ierr,map_k, map_ek,ista_e,iend_e,istep_e,map_z, np_e & & , ista_k,iend_k,myrank_e,myrank_k,map_e,nrank_e & & , ista_kngp,iend_kngp, nrank_k & - & , ista_g1k,iend_g1k, np_g1k , myrank_g, nrank_g, np_g1k_prev + & , ista_g1k,iend_g1k, np_g1k , myrank_g, nrank_g, np_g1k_prev & + & , nis_g1k,nie_g1k,map_rank_gek use m_IterationNumbers, only : nk_in_the_process, nk_converged, nkgroup & & , first_kpoint_in_this_job, iteration_ionic, iteration_electronic use m_FFT, only : fft_box_size_WF,nfft @@ -1132,7 +1134,7 @@ contains #endif __TIMER_IODO_STOP(1438) __TIMER_IOCOMM_START_w_BARRIER(mpi_comm,1439) - call mpi_bcast(wf_l,kg1t*kimg,mpi_real,0,mpi_comm_group,ierr) + call mpi_bcast(wf_l,kg1t*kimg,mpi_real4,0,mpi_comm_group,ierr) __TIMER_IOCOMM_STOP(1439) if((map_k(ik) == myrank_k) .and. (map_e(ib) == myrank_e) )then ! MPI if(pzaj) then @@ -1281,7 +1283,7 @@ contains ! ----------------- if(precision_WFfile==SP) then if(mype == 0) read(nfzaj, end = 9999, err = 9999) wf_l ! MPI - call mpi_bcast(wf_l,kg1_prev*kimg,mpi_real,0,mpi_comm_group,ierr) + call mpi_bcast(wf_l,kg1_prev*kimg,mpi_real4,0,mpi_comm_group,ierr) if((map_k(ik) == myrank_k) .and. (map_e(ib) == myrank_e) )then ! MPI do ri = 1, kimg @@ -1363,7 +1365,7 @@ contains __TIMER_IODO_STOP(1467) endif __TIMER_IOCOMM_START_w_BARRIER(mpi_comm_group,1468) - call mpi_allreduce(wf_mpi,wf_l,kg1*kimg,mpi_real,mpi_sum,mpi_comm_group,ierr) + call mpi_allreduce(wf_mpi,wf_l,kg1*kimg,mpi_real4,mpi_sum,mpi_comm_group,ierr) __TIMER_IOCOMM_STOP(1468) __TIMER_IODO_START(1469) else @@ -1543,9 +1545,9 @@ contains if(ipri>=2) write(nfout,*) ' !D ie = ', ie if(mype == 0) read(nf,err=2,end=2) wf_l ! MPI if(mype == 0 .and. map_ek(ie,ik) /= 0) then ! MPI - call mpi_send(wf_l,kg1*kimg,mpi_real,map_ek(ie,ik),1,mpi_comm_group,ierr) ! MPI + call mpi_send(wf_l,kg1*kimg,mpi_real4,map_ek(ie,ik),1,mpi_comm_group,ierr) ! MPI else if(map_ek(ie,ik) == mype .and. map_ek(ie,ik) /= 0) then ! MPI - call mpi_recv(wf_l,kg1*kimg,mpi_real,0,1,mpi_comm_group,istatus,ierr) ! MPI + call mpi_recv(wf_l,kg1*kimg,mpi_real4,0,1,mpi_comm_group,istatus,ierr) ! MPI end if if(map_ek(ie,ik) == mype) then ! MPI do ri = 1, kimg @@ -1606,9 +1608,9 @@ contains end if if(map_ek(ie,ik) == mype) then - call mpi_send(wf_l,kg1*kimg,mpi_real,map_ek(ie,ike2),1,mpi_comm_group,ierr) + call mpi_send(wf_l,kg1*kimg,mpi_real4,map_ek(ie,ike2),1,mpi_comm_group,ierr) else if(map_ek(ie,ike2) == mype) then - call mpi_recv(wf_l,kg1*kimg,mpi_real,map_ek(ie,ik),1,mpi_comm_group,istatus,ierr) + call mpi_recv(wf_l,kg1*kimg,mpi_real4,map_ek(ie,ik),1,mpi_comm_group,istatus,ierr) end if if(map_ek(ie,ike2) == mype) then do ri = 1, kimg @@ -1767,9 +1769,9 @@ contains wf_l(1:kg1,ri) = zaj_l(1:kg1,map_z(ie),ik,ri) end do if(map_ek(ie,ik) /= 0) & ! MPI - & call mpi_send(wf_l,kg1*kimg,mpi_real,0,1,mpi_comm_group,ierr) ! MPI + & call mpi_send(wf_l,kg1*kimg,mpi_real4,0,1,mpi_comm_group,ierr) ! MPI else if(mype == 0 .and. map_ek(ie,ik) /= 0) then - call mpi_recv(wf_l,kg1*kimg,mpi_real,map_ek(ie,ik),1,mpi_comm_group,istatus,ierr)!MPI + call mpi_recv(wf_l,kg1*kimg,mpi_real4,map_ek(ie,ik),1,mpi_comm_group,istatus,ierr)!MPI end if if(mype == 0) write(nf) wf_l ! MPI else if(precision_WFfile==DP) then @@ -1998,7 +2000,8 @@ contains integer, intent(in) :: nfout,nfzaj logical, intent(in) :: F_ZAJ_partitioned - integer :: ik,ib,ri,j,i + real(kind=SP), allocatable, dimension(:,:) :: wf_buf + integer :: ik,ib,ri,j,i,ig integer :: id_sname = -1 __TIMER_SUB_START(1373) call tstatc0_begin('m_ESIO_wd_WFs ',id_sname) @@ -2026,9 +2029,10 @@ contains allocate(wf_l(kg1,kimg)) do ik = 1, kv3, af+1 __TIMER_IODO_START(1442) +#ifdef _USE_ALLREDUCE_IN_WD_WFS_ do ib = 1, neg wf_l = 0.0d0 - if((map_k(ik) == myrank_k) .and. (map_e(ib) == myrank_e) )then ! MPI + if((map_k(ik) == myrank_k) .and. (map_e(ib) == myrank_e)) then ! MPI do ri = 1, kimg do j = ista_g1k(ik),iend_g1k(ik) wf_l(j,ri) = zaj_l(j-ista_g1k(ik)+1,map_z(ib),ik,ri) @@ -2036,12 +2040,37 @@ contains end do endif __TIMER_IOCOMM_START_w_BARRIER(mpi_comm_group,1443) - call mpi_allreduce(MPI_IN_PLACE,wf_l,kg1*kimg,mpi_real,mpi_sum,mpi_comm_group,ierr) + call mpi_allreduce(MPI_IN_PLACE,wf_l,kg1*kimg,mpi_real4,mpi_sum,mpi_comm_group,ierr) __TIMER_IOCOMM_STOP(1443) __TIMER_IODO_START(1444) if(mype == 0) write(nfzaj) wf_l ! MPI __TIMER_IODO_STOP(1444) end do +#else +!! coded by T. Yamasaki, 2021.02.25 --> + allocate(wf_buf(maxval(np_g1k),kimg)) + do ib = 1, neg + wf_l = 0.0d0 + wf_buf = 0.d0 + do j = 0, nrank_g-1 + if(map_k(ik)==myrank_k .and. map_e(ib)==myrank_e .and. j==myrank_g) then + do ig = ista_g1k(ik), iend_g1k(ik) + do ri=1, kimg + wf_buf(ig-ista_g1k(ik)+1,ri) = zaj_l(ig-ista_g1k(ik)+1,map_z(ib),ik,ri) + end do + end do + call mpi_send(wf_buf,maxval(np_g1k)*kimg,mpi_real4,0,1,mpi_comm_group,ierr) + end if + if(mype==0) then + call mpi_recv(wf_buf,maxval(np_g1k)*kimg,mpi_real4,map_rank_gek(j,map_e(ib),map_k(ik)),1,mpi_comm_group,istatus,ierr) + wf_l(nis_g1k(j,ik):nie_g1k(j,ik),1:kimg) = wf_buf(1:nie_g1k(j,ik)-nis_g1k(j,ik)+1,1:kimg) + end if + end do + if(mype==0) write(nfzaj) wf_l + end do + deallocate(wf_buf) +!! <-- +#endif __TIMER_IODO_STOP(1442) end do deallocate(wf_l) diff --git a/src_phase_3d/m_Electronic_Structure.F90 b/src_phase_3d/m_Electronic_Structure.F90 index f1fb467..6b100a8 100644 --- a/src_phase_3d/m_Electronic_Structure.F90 +++ b/src_phase_3d/m_Electronic_Structure.F90 @@ -1279,7 +1279,7 @@ contains ! ============================================================================== evdff(1) = evdff(1) + tmp*tmp evdff(2) = evdff(2) + dabs(tmp) - ekosum = ekosum + eko_l(map_z(ib),ik) + ekosum = ekosum + eko_l(ib, ik) if(dabs(tmp).gt.DELTAevdff) then ! === For epsmain by tkato 2013/11/14 ========================================== evdff(3) = evdff(3) + dsqrt(dabs(eko1_l(ib,ik)**2 - eko_l(ib,ik)**2)) @@ -1310,7 +1310,7 @@ contains if(iprievdff >= 1) & !!$ & write(nfout,'(" >> (",i6,") <eko_old-eko_new>:(",3d13.5,")")') & !!$ & iteration_electronic, evdff(1), evdff(2), evdff(3) - & write(nfout,'(" ENERGY EIGEN VALUE SUMM. FOR",i6," -TH ITER (K=",i6,":",i6 & + & write(nfout,'(" ENERGY EIGEN VALUE SUM. FOR",i6," -TH ITER (K=",i6,":",i6 & & ,") = ",F16.8, " <eko_old-eko_new>:(",3d13.5,")")') & & iteration_electronic,nk_in_the_process, nk_in_the_process+kv3-1 & & , ekosum, evdff(1), evdff(2), evdff(3) @@ -1378,9 +1378,10 @@ contains end do if(npes > 1) then - call mpi_allreduce(evdff2,evdffr2,3*kv3 & - & ,mpi_double_precision,mpi_sum,mpi_comm_group,ierr) ! MPI - evdff2 = evdffr2 + call mpi_allreduce(mpi_in_place,evdff2,3*kv3 & + & ,mpi_double_precision,mpi_sum,mpi_kg_world,ierr) ! MPI + call mpi_allreduce(mpi_in_place,evdff2,3*kv3 & + & ,mpi_double_precision,mpi_sum,mpi_ge_world,ierr) ! MPI end if ! ================================== modified by K. Tagami ============= 11.0 diff --git a/src_phase_3d/m_Epsilon_ek.F90 b/src_phase_3d/m_Epsilon_ek.F90 index 3c267c0..5d9772c 100644 --- a/src_phase_3d/m_Epsilon_ek.F90 +++ b/src_phase_3d/m_Epsilon_ek.F90 @@ -2101,7 +2101,9 @@ ppc_data(ntyp) deallocate(vk00xyz); deallocate(vk0xyz) deallocate(vk0_op); deallocate(nopr_k); deallocate(op_k) end if - if(trm2_mode == 0) deallocate(trm2) + if(trm2_mode == 0) then + if ( allocated(trm2)) deallocate(trm2) + end if end if deallocate(e); deallocate(imeps); deallocate(reps) @@ -6514,6 +6516,10 @@ ppc_data(ntyp) ! allocate(n_unfilled(nspin)); n_unfilled = 0 ! allocate(n_half_filled(nspin)); n_half_filled = 0 ! + if(allocated(n_filled)) deallocate(n_filled) + if(allocated(n_unfilled)) deallocate(n_unfilled) + if(allocated(n_half_filled)) deallocate(n_half_filled) + allocate(n_filled(nspin_kt)) ; n_filled = 0 allocate(n_unfilled(nspin_kt)); n_unfilled = 0 allocate(n_half_filled(nspin_kt)); n_half_filled = 0 diff --git a/src_phase_3d/m_FFT.F90 b/src_phase_3d/m_FFT.F90 index fcdee77..4263d60 100644 --- a/src_phase_3d/m_FFT.F90 +++ b/src_phase_3d/m_FFT.F90 @@ -13352,6 +13352,16 @@ call stop_timer('FFT-I yfft') endif if (firstcall_direct_xyz_3d) then + if(planz1 /= 0) call dfftw_destroy_plan(planz1) + if(planz1_1d /= 0) call dfftw_destroy_plan(planz1_1d) + if(planz2 /= 0) call dfftw_destroy_plan(planz2) + if(planz2_1d /= 0) call dfftw_destroy_plan(planz2_1d) + if(plany1 /= 0) call dfftw_destroy_plan(plany1) + if(plany2 /= 0) call dfftw_destroy_plan(plany2) + if(planx1 /= 0) call dfftw_destroy_plan(planx1) + if(planx1_1d /= 0) call dfftw_destroy_plan(planx1_1d) + if(planx2 /= 0) call dfftw_destroy_plan(planx2) + if(planx2_1d /= 0) call dfftw_destroy_plan(planx2_1d) savesize = 0 #ifdef __TIMER_DO__ call timer_sta(118) @@ -16217,6 +16227,16 @@ call stop_timer('FFT-I yfft') endif if (firstcall_inverse_xyz_3d) then + if(planx1 /= 0) call dfftw_destroy_plan(planx1) + if(planx1_1d /= 0) call dfftw_destroy_plan(planx1_1d) + if(planx2 /= 0) call dfftw_destroy_plan(planx2) + if(planx2_1d /= 0) call dfftw_destroy_plan(planx2_1d) + if(plany1 /= 0) call dfftw_destroy_plan(plany1) + if(plany2 /= 0) call dfftw_destroy_plan(plany2) + if(planz1 /= 0) call dfftw_destroy_plan(planz1) + if(planz1_1d /= 0) call dfftw_destroy_plan(planz1_1d) + if(planz2 /= 0) call dfftw_destroy_plan(planz2) + if(planz2_1d /= 0) call dfftw_destroy_plan(planz2_1d) savesize = 0 #ifdef __TIMER_DO__ call timer_sta(134) diff --git a/src_phase_3d/m_FFT_type4_fftw3_rev.F90 b/src_phase_3d/m_FFT_type4_fftw3_rev.F90 index 8dd370d..ee21468 100644 --- a/src_phase_3d/m_FFT_type4_fftw3_rev.F90 +++ b/src_phase_3d/m_FFT_type4_fftw3_rev.F90 @@ -88,14 +88,14 @@ contains subroutine m_FFT_finalize() call dfftw_destroy_plan(plan_WF(1)) call dfftw_destroy_plan(plan_WF(2)) - call dfftw_destroy_plan(plan_CD(1)) - call dfftw_destroy_plan(plan_CD(2)) +! call dfftw_destroy_plan(plan_CD(1)) +! call dfftw_destroy_plan(plan_CD(2)) if(sw_positron/=OFF)then call dfftw_destroy_plan(plan_pWF(1)) call dfftw_destroy_plan(plan_pWF(2)) endif - call dfftw_cleanup() +! call dfftw_cleanup() end subroutine m_FFT_finalize subroutine m_FFT_setup(inversion_symmetry,paramset) diff --git a/src_phase_3d/m_Ionic_System.F90 b/src_phase_3d/m_Ionic_System.F90 index 7131934..82c6c93 100644 --- a/src_phase_3d/m_Ionic_System.F90 +++ b/src_phase_3d/m_Ionic_System.F90 @@ -6061,7 +6061,9 @@ contains read(nfcntn,*) forcmx_constraint_quench end if end if - if(sw_optimize_lattice==ON)then + if(sw_optimize_lattice==ON .or. & + imdalg == PT_CONTROL .or. & + imdalg == P_CONTROL) then call rewind_to_tag0(nfcntn,len(tag_lattice_vector),tag_lattice_vector & &, EOF_reach, tag_is_found, str, len_str) if(tag_is_found)then @@ -6083,12 +6085,16 @@ contains & ,mpi_double_precision,0,mpi_comm_group,ierr) call mpi_bcast(forcmx_constraint_quench,1 & & ,mpi_double_precision,0,mpi_comm_group,ierr) - if(sw_optimize_lattice==ON)then + if(sw_optimize_lattice==ON .or. & + imdalg == PT_CONTROL .or. & + imdalg == P_CONTROL) then call mpi_bcast(altv,9,mpi_double_precision,0,mpi_comm_group,ierr) endif end if - if(sw_optimize_lattice==ON)then + if(sw_optimize_lattice==ON .or. & + imdalg == PT_CONTROL .or. & + imdalg == P_CONTROL) then call m_CS_altv_2_rltv() endif @@ -6940,7 +6946,8 @@ contains & , cpd_l, cpd_old, ipcpd, iop_supercell) end if - if(iprimd >= 1) then +!!$ if(iprimd >= 1) then + if(iprimd > 1) then write(nfout,'(" -- cpd_l, cpd_old (after fd_symmetrize) --")') do i = 1, natm write(nfout,'(i5,6f18.5)') i, cpd_l(i,1:3),cpd_old(i,1:3) diff --git a/src_phase_3d/m_Parallelization.F90 b/src_phase_3d/m_Parallelization.F90 index b491566..ec8b721 100644 --- a/src_phase_3d/m_Parallelization.F90 +++ b/src_phase_3d/m_Parallelization.F90 @@ -160,6 +160,7 @@ module m_Parallelization integer, allocatable, dimension(:) :: map_k ! d(kv3) integer, allocatable, dimension(:,:):: map_ek ! d(neg,kv3) integer :: myrank_k,ista_k,iend_k + integer, allocatable, dimension(:,:,:) :: map_rank_gek ! d(nrank_g,nrank_e,nrank_k) integer, allocatable, dimension(:) :: nis_k, nie_k, nel_k integer, allocatable, dimension(:,:,:):: map_ekg ! d(neg,kg1,kv3) integer, allocatable, dimension(:) :: nis_eg, nie_eg, nel_eg, neg_gg, neg_gg_all ! d(nrank_g) @@ -537,7 +538,9 @@ contains inquire(file='dirlist',exist=exist_dirlist) if(.not.exist_dirlist) then - if(firstcall) call mpi_comm_dup(mpi_comm_world,mpi_comm_group,ierr) + !if(firstcall) call mpi_comm_dup(mpi_comm_world,mpi_comm_group,ierr) + if(.not.firstcall) call mpi_comm_free(mpi_comm_group, ierr) + call mpi_comm_dup(mpi_comm_world,mpi_comm_group,ierr) !!write(*,*) 'mpi_comm_world=',mpi_comm_world !!write(*,*) 'mpi_comm_group=',mpi_comm_group @@ -627,7 +630,9 @@ contains !!write(*,*) 'workdir=',trim(workdir) !!my_color = mype !!my_key = 0 - if(firstcall) call mpi_comm_split(MPI_COMM_WORLD,my_color,my_key,mpi_comm_group,ierr) + !if(firstcall) call mpi_comm_split(MPI_COMM_WORLD,my_color,my_key,mpi_comm_group,ierr) + if(.not.firstcall) call mpi_comm_free(mpi_comm_group, ierr) + call mpi_comm_split(MPI_COMM_WORLD,my_color,my_key,mpi_comm_group,ierr) call mpi_comm_size(mpi_comm_group, npes, ierr) call mpi_comm_rank(mpi_comm_group, mype, ierr) !!write(*,*) 'mpi_comm_group=',mpi_comm_group @@ -800,7 +805,9 @@ contains icolor = 1 key = myrank_nbmx end if - if(firstcall) call mpi_comm_split(mpi_comm_group, icolor, key, mpi_nbmx_world(j),ierr) +! if(firstcall) call mpi_comm_split(mpi_comm_group, icolor, key, mpi_nbmx_world(j),ierr) + if(.not.firstcall) call mpi_comm_free(mpi_nbmx_world(j), ierr) + call mpi_comm_split(mpi_comm_group, icolor, key, mpi_nbmx_world(j),ierr) call mpi_comm_size(mpi_nbmx_world(j), newpes, ierr) call mpi_comm_rank(mpi_nbmx_world(j), newmype,ierr) end do @@ -892,7 +899,9 @@ contains icolor = 1 key = myrank_nbmx_k end if - if(firstcall) call mpi_comm_split(mpi_k_world(myrank_k), icolor, key, mpi_nbmx_world_k(j),ierr) +! if(firstcall) call mpi_comm_split(mpi_k_world(myrank_k), icolor, key, mpi_nbmx_world_k(j),ierr) + if(.not.firstcall) call mpi_comm_free(mpi_nbmx_world_k(j), ierr) + call mpi_comm_split(mpi_k_world(myrank_k), icolor, key, mpi_nbmx_world_k(j),ierr) call mpi_comm_size(mpi_nbmx_world_k(j), newpes, ierr) call mpi_comm_rank(mpi_nbmx_world_k(j), newmype,ierr) end do @@ -932,365 +941,22 @@ contains end subroutine set_block_range4allgather ! === necessary to make 3D_Parallel, too!!! by tkato =========================== -!!BRANCH_P ORG_Parallel -! ============================================================================== - subroutine m_Parallel_init_mpi_elec(nfout,ipri,printable,neg,kv3,nspin,kg1) - integer, intent(in) :: nfout,ipri,neg, kv3, nspin, kg1 - logical, intent(in) :: printable - logical, save :: firstcall=.true. - -#ifdef NEC_TUNE_SOFT - character*4 F_RSVTASK - integer :: tmp_a1,tmp_a2,tmp_b1,tmp_b2 -#elif NEC_TUNE_FFT - character*4 F_RSVTASK -#endif - - integer :: i, j, ip, icolor, key, kv3_half - -!!$! =========================================== added by K. Tagami ======== 11.0 -!!$ integer :: kv3_div -!!$! ======================================================================= 11.0 - - integer :: newpes, newmype - logical :: set_mapping_func - integer, pointer, dimension(:) :: nproc2rank_k, nproc2rank_e - - integer, parameter :: nc = 24 - integer, parameter :: nc1 = 16 -!!$ integer, parameter :: nc2 = 18 - integer :: i0, i1, sw_title - character*7 :: strmap - - sw_title = 1 - if(ipri >= 2 .and. printable) then - write(nfout,'(" !|| << m_Parallel_init_mpi_elec >>")') - write(nfout,'(" !|| neg, kv3 = ",i5, i8)') neg, kv3 - end if - if(neg <= 0 ) stop " neg is not positive value " - if(kv3 <= 0 ) stop " kv3 is not positive value " - -!!!!!!!!!!!!!!!!! added by mizouchi@adv 2003.02.26 !!!!!!!!!!!!!! -#ifdef IRIX64 - -! ====================================== modified by K. Tagami =============== 11.0 -!! if((kv3 == 1.and.nspin ==1).or.(kv3 == 2.and.nspin ==2)) then - if ( kv3/nspin == 1 ) then -! =========================================================================== 11.0 - - nrank_e = npes - nrank_k = 1 - if(ipri >= 1 .and. printable) then - write(nfout,'(" !| modified nrank_e and nrank_k due to 1 kpoint calculation ")') - write(nfout,'(" !| modified nrank_e = ", i4)') nrank_e - write(nfout,'(" !| modified nrank_k = ", i4)') nrank_k - end if -#endif -!!!!!!!!!!!!!!!!! added by mizouchi@adv 2003.02.26 !!!!!!!!!!!!!! - -! if(nrank_k > kv3) stop ' nrank_k > kv3 (m_Parallel_init_mpi_elec)' - if(nrank_k > kv3) call phase_execution_error(PARALLELIZATION_INVALID_NK) -!!$ if(nrank_e > neg) call phase_execution_error(PARALLELIZATION_INVALID_NE) !! not here !! - - allocate(map_e(neg)) - allocate(map_z(neg)) - allocate(map_k(kv3)) - allocate(map_ek(neg,kv3)) - if(.not.allocated(mpi_k_world)) allocate(mpi_k_world(0:nrank_k-1)) - if(.not.allocated(mpi_e_world)) allocate(mpi_e_world(0:nrank_e-1)) - allocate(nis_e(0:nrank_e-1)); nis_e = neg+1 - allocate(nie_e(0:nrank_e-1)); nie_e = 0 - allocate(nel_e(0:nrank_e-1)); nel_e = 0 - allocate(idisp_e(0:nrank_e-1)); idisp_e = 0 -!!$#ifdef TRANSPOSE - allocate(nis_g1(0:nrank_g1-1)); nis_g1 = kg1 + 1 - allocate(nie_g1(0:nrank_g1-1)); nie_g1 = 0 - allocate(nel_g1(0:nrank_g1-1)); nel_g1 = 0 -!!$#endif - allocate(nis_k(0:nrank_k-1)); nis_k = 0 - allocate(nie_k(0:nrank_k-1)) - allocate(nel_k(0:nrank_k-1)) - -! (( myrank_e, myrank_k )) - allocate(nproc2rank_e(0:npes-1)) - allocate(nproc2rank_k(0:npes-1)) - ip = 0 - do i = 0, nrank_k-1 - do j = 0, nrank_e-1 - nproc2rank_k(ip) = i - nproc2rank_e(ip) = j - ip = ip + 1 - end do - end do - myrank_k = nproc2rank_k(mype) - myrank_e = nproc2rank_e(mype) - deallocate(nproc2rank_k) - deallocate(nproc2rank_e) -! (( map_e, nis_e, nie_e, nel_e, ista_e, iend_e, istep_e, np_e, mp_e )) -!!$#ifdef TRANSPOSE - set_mapping_func = .true. - call set_block_range(neg,nrank_e,nel_e,nis_e,nie_e, set_mapping_func, map_e) - do i = 0, nrank_e-1 - idisp_e(i) = nis_e(i)-1 - end do - if(ipri >= 2) call wd_e_range - istep_e = 1 -!!$#else -!!$ do i = 1, neg -!!$ map_e(i) = mod(i-1,nrank_e) -!!$ end do -!!$ do i = 1, neg -!!$ ip = map_e(i) -!!$ nel_e(ip) = nel_e(ip) + 1 -!!$ if(nis_e(ip) > i) nis_e(ip) = i -!!$ if(nie_e(ip) < i) nie_e(ip) = i -!!$ end do -!!$ istep_e = nrank_e -!!$ -!!$#endif - if(ipri >= 2 .and. printable) then - strmap = " i " - call wd_maparray(neg,map_e,strmap," map_e ",sw_title) - end if - - ista_e = nis_e(myrank_e) - iend_e = nie_e(myrank_e) - np_e = nel_e(myrank_e) - mp_e = maxval(nel_e) - if(ipri==1 .and. printable) then - if(mp_e < 100000) then - write(nfout,'(" !|| ista_e, iend_e = ",i5,",",i5,", mp_e = ",i5,", neg = ",i12,", kv3 = ",i5 & - & ," << m_Parallel_init_mpi_elec >>")') ista_e,iend_e,mp_e,neg,kv3 - else if(mp_e <100000000) then - write(nfout,'(" !|| ista_e, iend_e = ",i8,",",i8,", mp_e = ",i8,", neg = ",i13,", kv3 = ",i5 & - & ," << m_Parallel_init_mpi_elec >>")') ista_e,iend_e,mp_e,neg,kv3 - else - write(nfout,'(" !|| ista_e, iend_e = ",i0,",",i0,", mp_e = ",i0,", neg = ",i0,", kv3 = ",i5 & - & ," << m_Parallel_init_mpi_elec >>")') ista_e,iend_e,mp_e,neg,kv3 - end if - end if - - -#ifdef NEC_TUNE_SOFT - call getenv('F_RSVTASK',F_RSVTASK) - read (F_RSVTASK,'(i4)') itask - allocate(ista_e_smp(itask)) - allocate(iend_e_smp(itask)) - - tmp_a1 = (iend_e-ista_e+1)/istep_e - tmp_a2 = mod(iend_e-ista_e+1,istep_e) - if (tmp_a2 .ne. 0) tmp_a1=tmp_a1+1 - tmp_b1 = tmp_a1/itask - tmp_b2 = mod(tmp_a1,itask) - if (tmp_b2 .ne. 0) tmp_b1=tmp_b1+1 - - do i=1,itask - ista_e_smp(i) = ista_e + tmp_b1 * istep_e * (i -1) - iend_e_smp(i) = min((ista_e + tmp_b1 * istep_e * i -1),iend_e) - end do -#elif NEC_TUNE_FFT - call getenv('F_RSVTASK',F_RSVTASK) - read (F_RSVTASK,'(i4)') itask -#endif - if(ipri >= 2 .and. printable) & - & write(nfout,'(" !|| -- ista_e, iend_e, istep_e, np_e, mp_e = ",5i6)') & - & ista_e,iend_e,istep_e, np_e, mp_e - -! (( map_z )) -!!$#ifdef TRANSPOSE - j = 0 - do ip = 1, nrank_e - do i = 1, nel_e(ip-1) - j = j + 1 - map_z(j) = i - end do - end do -!!$#else -!!$ ip = 1 -!!$ do i = 1, neg -!!$ map_z(i) = ip -!!$ if(mod(i,nrank_e) == 0) ip = ip + 1 -!!$ end do -!!$#endif - if(ipri >= 2 .and. printable) then - strmap = " i " - call wd_maparray(neg,map_z,strmap," map_z ",sw_title) - end if -! (( map_k, nis_k, nie_k, nel_k, ista_k, iend_k )) - if(kv3/nspin < nrank_k) then - if(ipri >= 1 .and. printable) then - write(nfout,'(" ******")') - write(nfout,'(" ** The nrank_k that you have specified is smaller than the number of k-points (kv3/nspin).")') - write(nfout,'(" ** kv3/nspin = ",i8," nrank_k = ",i8)') kv3/nspin, nrank_k - write(nfout,'(" ** Reduce the size of the rank for k-point division (nrank_k) & - & which is set in nk= option when you run this program, please")') - call flush(nfout) - end if - stop ' ** The nrank_k that is specified is smaller than the number of generated or read k-points (=kv3/nspin).' - end if - if(nspin == 1) then - set_mapping_func = .true. - call set_block_range(kv3,nrank_k,nel_k,nis_k,nie_k, set_mapping_func,map_k) - ista_k = nis_k(myrank_k) - iend_k = nie_k(myrank_k) - else if(nspin == 2) then - set_mapping_func = .false. - kv3_half = kv3/nspin - call set_block_range(kv3_half,nrank_k,nel_k,nis_k,nie_k, set_mapping_func) - nel_k = nel_k*nspin - nie_k = nie_k*nspin - nis_k = nis_k*nspin-1 - j = 0 - do i = 1, kv3 - if(nie_k(j) < i) j = j + 1 - map_k(i) = j - end do - ista_k = nis_k(myrank_k) - iend_k = nie_k(myrank_k) - end if - - if(ipri >= 2 .and. printable) then - strmap = " i " - call wd_maparray(kv3,map_k,strmap," map_k ",sw_title) - write(nfout,'(" !|| -- myrank_k = ",i8)') myrank_k - write(nfout,'(" !|| -- ista_k, iend_k = ",2i8)') ista_k,iend_k - end if - if(ista_k > iend_k) stop ' !! illegal combination of ista_k, and iend_k' - -! (( map_ek )) - do j = 1, kv3 - do i = 1, neg - map_ek(i,j) = map_e(i) + map_k(j)*(nrank_e) - end do - end do - - if(ipri >= 2 .and. printable) then - write(nfout,'(" !|| --- map_ek ---")') - sw_title = 0 - do j = 1, kv3 - write(strmap,'("ik=",i4)') j - call wd_maparray(neg,map_ek(1,j),strmap,"map_ek ",sw_title) - end do - end if - -!!$#ifdef TRANSPOSE -! (( ista_g1, iend_g1 )) - set_mapping_func = .false. - if(ipri >= 2 .and. printable) write(nfout,'(" !|| kg1 = ",i10)') kg1 - call set_block_range(kg1,nrank_e,nel_g1,nis_g1,nie_g1, set_mapping_func) - ista_g1 = nis_g1(myrank_e) - iend_g1 = nie_g1(myrank_e) - np_g1 = nel_g1(myrank_e) - mp_g1 = maxval(nel_g1) - if(ipri >= 2 .and. printable) then - write(nfout,'(" !|| --- mis_g1,nie_g1,nel_g1 ---")') - write(nfout,'(" !|| ( rank_e )",15i7)')(i,i=1,nrank_e) - write(nfout,'(" !|| ( nis_g1 )",15i7)')(nis_g1(i),i=0,nrank_e-1) - write(nfout,'(" !|| ( nie_g1 )",15i7)')(nie_g1(i),i=0,nrank_e-1) - write(nfout,'(" !|| ( nel_g1 )",15i7)')(nel_g1(i),i=0,nrank_e-1) - write(nfout,'(" !|| (myrank_e = ",i3," kg1,ista_g1,iend_g1,np_g1,mp_g1 = ",5i7)') & - & myrank_e,kg1,ista_g1,iend_g1,np_g1,mp_g1 - end if -!!$#endif - -! (( mpi_k_world )) - do j = 0, nrank_k-1 - icolor = 0 - key = 0 - do i = 0, nrank_e-1 - if(mype == i + nrank_e*j) then - icolor = 1 - key = i - end if - end do - if(firstcall) call mpi_comm_split(mpi_comm_group, icolor, key, mpi_k_world(j),ierr) - call mpi_comm_size(mpi_k_world(j), newpes,ierr) - call mpi_comm_rank(mpi_k_world(j), newmype,ierr) -!!$ print '(" mype = ",i3," newmype = ", i3," newpes = ",i3," mpi_k_world = ", i3 & -!!$ &, " icolor = ",i3," key = ",i3)' & -!!$ & ,mype,newmype, newpes,mpi_k_world(j),icolor, key - end do - - do j=0,nrank_e-1 - icolor = myrank_e - key = mype - if(firstcall) call mpi_comm_split(mpi_comm_group, icolor, key, mpi_e_world(j),ierr) - call mpi_comm_size(mpi_e_world(j), newpes,ierr) - call mpi_comm_rank(mpi_e_world(j), newmype,ierr) - enddo - icolor = myrank_e - key = mype - call mpi_comm_split(mpi_comm_group, icolor, key, mpi_ge_world, ierr) - call mpi_comm_size(mpi_ge_world, newpes, ierr) - call mpi_comm_rank(mpi_ge_world, newmype, ierr) - firstcall = .false. - contains - subroutine wd_maparray(nelement,map_a,map_title0,map_title,sw_title) - integer, intent(in) :: nelement - integer, dimension(nelement), intent(in) :: map_a - character*7, intent(in) :: map_title0,map_title - integer, intent(in) :: sw_title - integer :: nca, j, i0, i1, i, iloop - - if(sw_title==1) write(nfout,'(" !|| --- ",a7," ---")') map_title - if(nelement < 100) then - nca = nc ! = 24 - else if(nelement < 1000) then - nca = nc*3/4 ! = 18 - else if(nelement < 10000) then - nca = nc*3/5 ! = 14 - else if(nelement < 100000) then - nca = nc*3/6 ! = 12 - else - nca = nc*3/9 ! = 8 - end if - iloop = ceiling(dble(nelement)/nca) - do j = 1, iloop - i0 = (j-1)*nca + 1 - i1 = i0 + nca -1 - if(i1 > nelement) i1 = nelement - if(nelement < 100) then - write(nfout,'(" !|| (",a7,")",24i3)') map_title0,(i,i=i0,i1) - write(nfout,'(" !|| (",a7,")",24i3)') map_title, (map_a(i),i=i0,min(nelement,i1)) - else if(nelement < 1000) then - write(nfout,'(" !|| (",a7,")",18i4)') map_title0,(i,i=i0,i1) - write(nfout,'(" !|| (",a7,")",18i4)') map_title, (map_a(i),i=i0,i1) - else if(nelement < 10000) then - write(nfout,'(" !|| (",a7,")",14i5)') map_title0,(i,i=i0,i1) - write(nfout,'(" !|| (",a7,")",14i5)') map_title, (map_a(i),i=i0,i1) - else if(nelement < 100000) then - write(nfout,'(" !|| (",a7,")",12i5)') map_title0,(i,i=i0,i1) - write(nfout,'(" !|| (",a7,")",12i5)') map_title, (map_a(i),i=i0,i1) - else - write(nfout,'(" !|| (",a7,")",8i9)') map_title0,(i,i=i0,i1) - write(nfout,'(" !|| (",a7,")",8i9)') map_title, (map_a(i),i=i0,i1) - end if - end do - end subroutine wd_maparray - - subroutine wd_e_range - if(printable) then - write(nfout,'(" !|| nrank_e")') - write(nfout,'(" !|| i : ",20i4)')(i,i=0,nrank_e-1) - write(nfout,'(" !|| nis_e : ",20i4)')(nis_e(i),i=0,nrank_e-1) - write(nfout,'(" !|| nie_e : ",20i4)')(nie_e(i),i=0,nrank_e-1) - write(nfout,'(" !|| nel_e : ",20i4)')(nel_e(i),i=0,nrank_e-1) -!!$ write(nfout,'(" !|| i : ",20i4)')(i,i=0,nrank_e-1) -!!$ write(nfout,'(" !|| nis_e : ",20i4)')(nis_e(i),i=0,nrank_e-1) -!!$ write(nfout,'(" !|| nie_e : ",20i4)')(nie_e(i),i=0,nrank_e-1) -!!$ write(nfout,'(" !|| nel_e : ",20i4)')(nel_e(i),i=0,nrank_e-1) -!!$#ifdef TRANSPOSE - write(nfout,'(" !|| idisp_e : ",20i4)')(idisp_e(i),i=0,nrank_e-1) -!!$#endif - end if - end subroutine wd_e_range - - end subroutine m_Parallel_init_mpi_elec -! === necessary to make 3D_Parallel, too!!! by tkato =========================== -!!BRANCH_P_END ORG_Parallel ! ============================================================================== subroutine m_Parallel_dealloc_mpi_fft_box + if(allocated(nis_fft_X_z)) deallocate(nis_fft_X_z) + if(allocated(nis_fft_X_y)) deallocate(nis_fft_X_y) + if(allocated(nis_fft_Z_x)) deallocate(nis_fft_Z_x) + if(allocated(nis_fft_Z_y)) deallocate(nis_fft_Z_y) + if(allocated(nis_fft_Y_x)) deallocate(nis_fft_Y_x) + if(allocated(nis_fft_Y_z)) deallocate(nis_fft_Y_z) + if(allocated(nie_fft_X_z)) deallocate(nie_fft_X_z) + if(allocated(nie_fft_X_y)) deallocate(nie_fft_X_y) + if(allocated(nie_fft_Z_x)) deallocate(nie_fft_Z_x) + if(allocated(nie_fft_Z_y)) deallocate(nie_fft_Z_y) + if(allocated(nie_fft_Y_x)) deallocate(nie_fft_Y_x) + if(allocated(nie_fft_Y_z)) deallocate(nie_fft_Y_z) + if(allocated(map_fft_x)) deallocate(map_fft_x) if(allocated(map_fft_y)) deallocate(map_fft_y) if(allocated(map_fft_z)) deallocate(map_fft_z) @@ -1475,6 +1141,8 @@ contains if(ipri >= iprilevel) write(nfout,'(" !|| myrank_ggacmp, myrank_cdfft",/ & & ," !|| = ",2i10," <<m_Parallel_init_mpi_cdfft>>")') & & myrank_ggacmp, myrank_cdfft + if(allocated(map_pe2ggacmp)) deallocate(map_pe2ggacmp) + if(allocated(map_pe2cdfft)) deallocate(map_pe2cdfft) allocate(map_pe2ggacmp(0:npes-1)) allocate(map_pe2cdfft(0:npes-1)) do i = 0, npes-1 @@ -1487,6 +1155,25 @@ contains & (map_pe2cdfft(i),i=0,npes-1) end subroutine m_Parallel_init_mpi_cdfft + subroutine m_Parallel_dealloc_mpi_gga() + if(allocated(map_ggacmp)) deallocate(map_ggacmp) + if(allocated(nis_fftp)) deallocate(nis_fftp) + if(allocated(nie_fftp)) deallocate(nie_fftp) + if(allocated(nel_fftp)) deallocate(nel_fftp) + if(allocated(idisp_fftp)) deallocate(idisp_fftp) + if(allocated(nis_fftph)) deallocate(nis_fftph) + if(allocated(nie_fftph)) deallocate(nie_fftph) + if(allocated(nel_fftph)) deallocate(nel_fftph) + if(allocated(idisp_fftph)) deallocate(idisp_fftph) + if(allocated(nis_sfftp)) deallocate(nis_sfftp) + if(allocated(nie_sfftp)) deallocate(nie_sfftp) + if(allocated(nel_sfftp)) deallocate(nel_sfftp) + if(allocated(idisp_sfftp)) deallocate(idisp_sfftp) + if(allocated(nis_sfftph)) deallocate(nis_sfftph) + if(allocated(nie_sfftph)) deallocate(nie_sfftph) + if(allocated(nel_sfftph)) deallocate(nel_sfftph) + end subroutine m_Parallel_dealloc_mpi_gga + subroutine m_Parallel_init_mpi_gga(nfout,ipri,printable,nfftp,nfftps) integer, intent(in) :: nfout,ipri,nfftp,nfftps logical, intent(in) :: printable @@ -1625,7 +1312,9 @@ contains !!$ key = i !!$ end if !!$ end do - if(firstcall) call mpi_comm_split(mpi_comm_group,icolor,key,mpi_cdfft_world(j),ierr) +! if(firstcall) call mpi_comm_split(mpi_comm_group,icolor,key,mpi_cdfft_world(j),ierr) + if(.not.firstcall) call mpi_comm_free(mpi_cdfft_world(j), ierr) + call mpi_comm_split(mpi_comm_group,icolor,key,mpi_cdfft_world(j),ierr) call mpi_comm_size(mpi_cdfft_world(j), newpes, ierr) call mpi_comm_rank(mpi_cdfft_world(j), newmype, ierr) end do @@ -1643,7 +1332,9 @@ contains !!$ key = i !!$ end if !!$ end do - if(firstcall) call mpi_comm_split(mpi_comm_group,icolor,key,mpi_ggacmp_cross_world(j),ierr) +! if(firstcall) call mpi_comm_split(mpi_comm_group,icolor,key,mpi_ggacmp_cross_world(j),ierr) + if(.not.firstcall) call mpi_comm_free(mpi_ggacmp_cross_world(j), ierr) + call mpi_comm_split(mpi_comm_group,icolor,key,mpi_ggacmp_cross_world(j),ierr) call mpi_comm_size(mpi_ggacmp_cross_world(j),newpes,ierr) call mpi_comm_rank(mpi_ggacmp_cross_world(j),newmype,ierr) if(ipri >= 2) then @@ -1658,6 +1349,13 @@ contains firstcall = .false. end subroutine m_Parallel_init_mpi_gga + + subroutine m_Parallel_dealloc_mpi_mix() + if(allocated(is_kgpm)) deallocate(is_kgpm) + if(allocated(ie_kgpm)) deallocate(ie_kgpm) + if(allocated(nel_kgpm)) deallocate(nel_kgpm) + end subroutine m_Parallel_dealloc_mpi_mix + subroutine m_Parallel_init_mpi_mix(nfout,ipri,printable,kgpm,comm_for_chg) integer, intent(in) :: nfout,ipri,kgpm logical, intent(in) :: printable @@ -1914,6 +1612,9 @@ contains !!npes = nrank_g !!mype = myrank_g + if(allocated(is_atm_f)) deallocate(is_atm_f) + if(allocated(ie_atm_f)) deallocate(ie_atm_f) + if(allocated(nel_atm_f)) deallocate(nel_atm_f) allocate(is_atm_f(0:npes-1)) allocate(ie_atm_f(0:npes-1)) allocate(nel_atm_f(0:npes-1)) @@ -2011,6 +1712,9 @@ contains __TIMER_SUB_START(1239) npes = nrank_g mype = myrank_g + if(allocated(is_atm2)) deallocate(is_atm2) + if(allocated(ie_atm2)) deallocate(ie_atm2) + if(allocated(nel_atm2)) deallocate(nel_atm2) allocate(is_atm2(0:npes-1)) allocate(ie_atm2(0:npes-1)) allocate(nel_atm2(0:npes-1)) @@ -2047,7 +1751,6 @@ contains end subroutine m_Parallel_init_mpi_atm2 - subroutine split_into_ggablock_cdfft(npes,nrank_ggacmp,npes_cdfft,nrest_cdfft) integer, intent(in) :: npes integer, intent(out) :: nrank_ggacmp, npes_cdfft, nrest_cdfft @@ -2126,10 +1829,8 @@ contains if(allocated(ista_g1k)) deallocate(ista_g1k) if(allocated(iend_g1k)) deallocate(iend_g1k) - if(.not.allocated(np_g1k_prev)) allocate(np_g1k_prev(size(np_g1k))) - np_g1k_prev = np_g1k if(allocated(np_g1k)) deallocate(np_g1k) -! if(allocated(np_g1k_prev)) deallocate(np_g1k_prev) + if(allocated(np_g1k_prev)) deallocate(np_g1k_prev) if(allocated(mp_g1k)) deallocate(mp_g1k) if(allocated(nis_fftp)) deallocate(nis_fftp) @@ -2354,7 +2055,9 @@ contains ikey = i end if end do - if(firstcall) call mpi_comm_split( mpi_comm_world, icolor, ikey, new_comm_world(i), mpi_err ) +! if(firstcall) call mpi_comm_split( mpi_comm_world, icolor, ikey, new_comm_world(i), mpi_err ) + if(.not.firstcall) call mpi_comm_free(new_comm_world(i), mpi_err) + call mpi_comm_split( mpi_comm_world, icolor, ikey, new_comm_world(i), mpi_err ) end do mype_conf = mypetmp/(npestmp/nrank_conf) @@ -3934,7 +3637,7 @@ contains read (F_RSVTASK,'(i4)') itask #endif - if(ipri >= 1 .and. printable) & + if(ipri >= 2 .and. printable) & & write(nfout,'(" !|| m_P_init_mpi_elec_3D -- ista_e, iend_e, istep_e, np_e, mp_e = ",5i6)') & & ista_e,iend_e,istep_e, np_e, mp_e @@ -3993,9 +3696,46 @@ contains icolor = myrank_g key = myrank_e+myrank_k*nrank_e - if(firstcall) call mpi_comm_split(mpi_comm_group, icolor, key, mpi_g_world,ierr) +! if(firstcall) call mpi_comm_split(mpi_comm_group, icolor, key, mpi_g_world,ierr) + if(.not.firstcall) call mpi_comm_free(mpi_g_world, ierr) + call mpi_comm_split(mpi_comm_group, icolor, key, mpi_g_world,ierr) call mpi_comm_size(mpi_g_world, nrank_ke, ierr) call mpi_comm_rank(mpi_g_world, myrank_ke, ierr) + +!! coded by T. Yamasaki, 2021.02.25 --> + if(allocated(map_rank_gek)) deallocate(map_rank_gek) + allocate(map_rank_gek(0:nrank_g-1,0:nrank_e-1,0:nrank_k-1)); map_rank_gek = 0 + map_rank_gek(myrank_g,myrank_e,myrank_k) = mype + call mpi_allreduce(MPI_IN_PLACE,map_rank_gek,npes,mpi_integer,mpi_sum,mpi_comm_group,ierr) +!!$ +!!$ ip = 0 +!!$ do k = 0, nrank_k-1 +!!$ do i = 0, nrank_g-1 +!!$ do j = 0, nrank_e-1 +!!$ map_rank_gek(i,j,k) = ip +!!$ ip = ip + 1 +!!$ end do +!!$ end do +!!$ end do + if(ipri>=2 .and. printable) then + ip = 0 + write(nfout,'(" !|| No. rank_k,rank_e,rank_g, map_rank_gek")') + do k = 0, nrank_k-1 + do j = 0, nrank_e-1 + do i = 0, nrank_g-1 + write(nfout,'(" !|| ",4i6," ",i6)') ip,k,j,i, map_rank_gek(i,j,k) + ip = ip + 1 + end do + end do + end do + write(nfout,'(" myrank_k, myrank_e, myrank_g, mype")') + write(nfout,'(4i8)') myrank_k,myrank_e,myrank_g,mype + end if + if(map_rank_gek(myrank_g,myrank_e,myrank_k) /= mype) then + write(nfout,'(" map_rank_gek is not right")') + stop ' map_rank_gek is not right' + end if +!! <-- #if 0 ! (( map_ek )) do j = 1, kv3 @@ -4307,7 +4047,12 @@ contains integer :: ibuf_temp allocate(np(0:nrank_g-1)); np = 0.d0 - + if(allocated(nis_fs)) deallocate(nis_fs) + if(allocated(nie_fs)) deallocate(nie_fs) + if(allocated(nel_fs)) deallocate(nel_fs) + if(allocated(nis_fs_atm)) deallocate(nis_fs_atm) + if(allocated(nie_fs_atm)) deallocate(nie_fs_atm) + if(allocated(nel_fs_atm)) deallocate(nel_fs_atm) allocate(nis_fs(0:nrank_g-1)) allocate(nie_fs(0:nrank_g-1)) allocate(nel_fs(0:nrank_g-1)) @@ -4431,6 +4176,8 @@ contains end if ! for zaj_l_ball fs(ri)_l_ball + if(allocated(ball_buff)) deallocate(ball_buff) + if(allocated(ball_addr)) deallocate(ball_addr) allocate(ball_buff(0:nrank_e-1)) allocate(ball_addr(0:nrank_e-1)) ibuf_temp = np_e*np_fs @@ -4785,6 +4532,7 @@ contains xyz_fft_x(2,3) = wk_x(6,myrank+1) endif + if(allocated(map_fft_x)) deallocate(map_fft_x) allocate(map_fft_x( nfft ), stat=ichkalloc) if(ichkalloc /= 0) then write(nfout,*)'could not allocate in m_Parallel_mpi_fft_box 2' @@ -4825,6 +4573,7 @@ contains np_fft_x = (wk_x(2,myrank+1)-wk_x(1,myrank+1)+1)* & & (wk_x(4,myrank+1)-wk_x(3,myrank+1)+1)* & & (wk_x(6,myrank+1)-wk_x(5,myrank+1)+1) + if(allocated(mp_fft_x)) deallocate(mp_fft_x) allocate(mp_fft_x(np_fft_x), stat=ichkalloc) end if if(ichkalloc /= 0) then @@ -4851,6 +4600,7 @@ contains deallocate(wk_fft_box) + if(allocated(nel_fft_x)) deallocate(nel_fft_x) allocate(nel_fft_x( 0:nmrank-1 ), stat=ichkalloc) if(ichkalloc /= 0) then write(nfout,*)'could not allocate in m_Parallel_mpi_fft_box 4' @@ -4898,14 +4648,15 @@ contains color = MPI_UNDEFINED end if key = myrank_g - if(firstcall) then - call mpi_comm_split(mpi_ke_world,color,key,mpi_fft_zy_world,ierr) - if (ierr /= 0) then - write(nfout,*)' m_Parallel_mpi_fft_box : mpi_comm_split error' - call flush(nfout) - call mpi_abort(mpi_comm_world, 100003, ierr) - endif +! if(firstcall) then + if(.not. firstcall) call mpi_comm_free(mpi_fft_zy_world, ierr) + call mpi_comm_split(mpi_ke_world,color,key,mpi_fft_zy_world,ierr) + if (ierr /= 0) then + write(nfout,*)' m_Parallel_mpi_fft_box : mpi_comm_split error' + call flush(nfout) + call mpi_abort(mpi_comm_world, 100003, ierr) endif +! endif firstcall = .false. __TIMER_SUB_STOP(1232) end subroutine m_Parallel_mpi_fft_box_3div @@ -5073,6 +4824,9 @@ contains xyz_fft_z(2,3) = fft_box_size_WF(3,n) endif + if(allocated(map_fft_x)) deallocate(map_fft_x) + if(allocated(map_fft_z)) deallocate(map_fft_z) + if(allocated(map_fft_y)) deallocate(map_fft_y) allocate(map_fft_x( nfft ), stat=ichkalloc) allocate(map_fft_z( nfft ), stat=ichkalloc) allocate(map_fft_y( nfft ), stat=ichkalloc) @@ -5138,14 +4892,17 @@ contains if ((dim1*dim2) > myrank) then if (wk_x(1,myrank+1) /= 0) then np_fft_x = id1*(wk_x(2,myrank+1)-wk_x(1,myrank+1)+1)*(wk_x(4,myrank+1)-wk_x(3,myrank+1)+1) + if(allocated(mp_fft_x)) deallocate(mp_fft_x) allocate(mp_fft_x(np_fft_x), stat=ichkalloc) end if if (wk_z(1,myrank+1) /= 0) then np_fft_z = (wk_z(4,myrank+1)-wk_z(3,myrank+1)+1)*(wk_z(2,myrank+1)-wk_z(1,myrank+1)+1)*id3 + if(allocated(mp_fft_z)) deallocate(mp_fft_z) allocate(mp_fft_z(np_fft_z), stat=ichkalloc) end if if (wk_y(1,myrank+1) /= 0) then np_fft_y = (wk_y(4,myrank+1)-wk_y(3,myrank+1)+1)*id2*(wk_y(2,myrank+1)-wk_y(1,myrank+1)+1) + if(allocated(mp_fft_y)) deallocate(mp_fft_y) allocate(mp_fft_y(np_fft_y), stat=ichkalloc) end if if(ichkalloc /= 0) then @@ -5240,6 +4997,9 @@ contains deallocate(wk_fft_box) + if(allocated(nel_fft_x)) deallocate(nel_fft_x) + if(allocated(nel_fft_z)) deallocate(nel_fft_z) + if(allocated(nel_fft_y)) deallocate(nel_fft_y) allocate(nel_fft_x( 0:nmrank-1 ), stat=ichkalloc) allocate(nel_fft_z( 0:nmrank-1 ), stat=ichkalloc) allocate(nel_fft_y( 0:nmrank-1 ), stat=ichkalloc) @@ -5333,7 +5093,9 @@ contains color = MPI_UNDEFINED end if key = myrank_g - if(firstcall) call mpi_comm_split(mpi_ke_world,color,key,mpi_fft_xy_world,ierr) +! if(firstcall) call mpi_comm_split(mpi_ke_world,color,key,mpi_fft_xy_world,ierr) + if(.not.firstcall) call mpi_comm_free(mpi_fft_xy_world, ierr) + call mpi_comm_split(mpi_ke_world,color,key,mpi_fft_xy_world,ierr) call mpi_comm_size(mpi_fft_xy_world, nrank_fft_xy, ierr) call mpi_comm_rank(mpi_fft_xy_world, myrank_fft_xy, ierr) if (ierr /= 0) then @@ -5359,7 +5121,9 @@ contains color = MPI_UNDEFINED end if key = myrank_g - if(firstcall) call mpi_comm_split(mpi_ke_world,color,key,mpi_fft_yz_world,ierr) +! if(firstcall) call mpi_comm_split(mpi_ke_world,color,key,mpi_fft_yz_world,ierr) + if(.not.firstcall) call mpi_comm_free(mpi_fft_yz_world, ierr) + call mpi_comm_split(mpi_ke_world,color,key,mpi_fft_yz_world,ierr) call mpi_comm_size(mpi_fft_yz_world, nrank_fft_yz, ierr) call mpi_comm_rank(mpi_fft_yz_world, myrank_fft_yz, ierr) if (ierr /= 0) then @@ -5578,6 +5342,9 @@ contains xyz_fft_y(2,3) = wk_y(2,myrank+1) endif + if(allocated(map_fft_x)) deallocate(map_fft_x) + if(allocated(map_fft_z)) deallocate(map_fft_z) + if(allocated(map_fft_y)) deallocate(map_fft_y) allocate(map_fft_x( nfft ), stat=ichkalloc) allocate(map_fft_z( nfft ), stat=ichkalloc) allocate(map_fft_y( nfft ), stat=ichkalloc) @@ -5666,14 +5433,17 @@ contains if ((dim1*dim2) > myrank) then if (wk_x(1,myrank+1) /= 0) then np_fft_x = id1*(wk_x(2,myrank+1)-wk_x(1,myrank+1)+1)*(wk_x(4,myrank+1)-wk_x(3,myrank+1)+1) + if(allocated(mp_fft_x)) deallocate(mp_fft_x) allocate(mp_fft_x(np_fft_x), stat=ichkalloc) end if if (wk_z(1,myrank+1) /= 0) then np_fft_z = (wk_z(4,myrank+1)-wk_z(3,myrank+1)+1)*(wk_z(2,myrank+1)-wk_z(1,myrank+1)+1)*id3 + if(allocated(mp_fft_z)) deallocate(mp_fft_z) allocate(mp_fft_z(np_fft_z), stat=ichkalloc) end if if (wk_y(1,myrank+1) /= 0) then np_fft_y = (wk_y(4,myrank+1)-wk_y(3,myrank+1)+1)*id2*(wk_y(2,myrank+1)-wk_y(1,myrank+1)+1) + if(allocated(mp_fft_y)) deallocate(mp_fft_y) allocate(mp_fft_y(np_fft_y), stat=ichkalloc) end if if(ichkalloc /= 0) then @@ -5768,6 +5538,9 @@ contains deallocate(wk_fft_box) + if(allocated(nel_fft_x)) deallocate(nel_fft_x) + if(allocated(nel_fft_z)) deallocate(nel_fft_z) + if(allocated(nel_fft_y)) deallocate(nel_fft_y) allocate(nel_fft_x( 0:nmrank-1 ), stat=ichkalloc) allocate(nel_fft_z( 0:nmrank-1 ), stat=ichkalloc) allocate(nel_fft_y( 0:nmrank-1 ), stat=ichkalloc) @@ -5859,7 +5632,9 @@ contains color = MPI_UNDEFINED end if key = myrank_g - if(firstcall) call mpi_comm_split(mpi_ke_world,color,key,mpi_fft_xz_world,ierr) +! if(firstcall) call mpi_comm_split(mpi_ke_world,color,key,mpi_fft_xz_world,ierr) + if(.not.firstcall) call mpi_comm_free(mpi_fft_xz_world, ierr) + call mpi_comm_split(mpi_ke_world,color,key,mpi_fft_xz_world,ierr) call mpi_comm_size(mpi_fft_xz_world, nrank_fft_xz, ierr) call mpi_comm_rank(mpi_fft_xz_world, myrank_fft_xz, ierr) if (ierr /= 0) then @@ -5885,7 +5660,9 @@ contains color = MPI_UNDEFINED end if key = myrank_g - if(firstcall) call mpi_comm_split(mpi_ke_world,color,key,mpi_fft_zy_world,ierr) +! if(firstcall) call mpi_comm_split(mpi_ke_world,color,key,mpi_fft_zy_world,ierr) + if(.not.firstcall) call mpi_comm_free(mpi_fft_zy_world, ierr) + call mpi_comm_split(mpi_ke_world,color,key,mpi_fft_zy_world,ierr) call mpi_comm_size(mpi_fft_zy_world, nrank_fft_zy, ierr) call mpi_comm_rank(mpi_fft_zy_world, myrank_fft_zy, ierr) if (ierr /= 0) then @@ -6050,6 +5827,7 @@ contains xyz_fftcd_x(2,3) = wk_x(6,myrank+1) endif + if(allocated(map_fftcd_x)) deallocate(map_fftcd_x) allocate(map_fftcd_x( nfft ), stat=ichkalloc) if(ichkalloc /= 0) then write(nfout,*)'could not allocate in m_Parallel_mpi_fft_box 2' @@ -6160,7 +5938,9 @@ contains color = MPI_UNDEFINED end if key = myrank_g - if(firstcall) call mpi_comm_split(mpi_ke_world,color,key,mpi_fftcd_zy_world,ierr) +! if(firstcall) call mpi_comm_split(mpi_ke_world,color,key,mpi_fftcd_zy_world,ierr) + if(.not.firstcall) call mpi_comm_free(mpi_fftcd_zy_world, ierr) + call mpi_comm_split(mpi_ke_world,color,key,mpi_fftcd_zy_world,ierr) if (ierr /= 0) then write(nfout,*)' m_Parallel_mpi_fft_box_cd : mpi_comm_split error' call flush(nfout) @@ -6338,6 +6118,9 @@ contains xyz_fftcd_z(2,3) = fft_box_size_CD(3,n) endif + if(allocated(map_fftcd_x)) deallocate(map_fftcd_x) + if(allocated(map_fftcd_z)) deallocate(map_fftcd_z) + if(allocated(map_fftcd_y)) deallocate(map_fftcd_y) allocate(map_fftcd_x( nfft ), stat=ichkalloc) allocate(map_fftcd_z( nfft ), stat=ichkalloc) allocate(map_fftcd_y( nfft ), stat=ichkalloc) @@ -6403,14 +6186,17 @@ contains if ((dim1*dim2) > myrank) then if (wk_x(1,myrank+1) /= 0) then np_fftcd_x = id1*(wk_x(2,myrank+1)-wk_x(1,myrank+1)+1)*(wk_x(4,myrank+1)-wk_x(3,myrank+1)+1) + if(allocated(mp_fftcd_x)) deallocate(mp_fftcd_x) allocate(mp_fftcd_x(np_fftcd_x), stat=ichkalloc) end if if (wk_z(1,myrank+1) /= 0) then np_fftcd_z = (wk_z(4,myrank+1)-wk_z(3,myrank+1)+1)*(wk_z(2,myrank+1)-wk_z(1,myrank+1)+1)*id3 + if(allocated(mp_fftcd_z)) deallocate(mp_fftcd_z) allocate(mp_fftcd_z(np_fftcd_z), stat=ichkalloc) end if if (wk_y(1,myrank+1) /= 0) then np_fftcd_y = (wk_y(4,myrank+1)-wk_y(3,myrank+1)+1)*id2*(wk_y(2,myrank+1)-wk_y(1,myrank+1)+1) + if(allocated(mp_fftcd_y)) deallocate(mp_fftcd_y) allocate(mp_fftcd_y(np_fftcd_y), stat=ichkalloc) end if if(ichkalloc /= 0) then @@ -6517,6 +6303,9 @@ contains deallocate(wk_fft_box) + if(allocated(nel_fftcd_x)) deallocate(nel_fftcd_x) + if(allocated(nel_fftcd_z)) deallocate(nel_fftcd_z) + if(allocated(nel_fftcd_y)) deallocate(nel_fftcd_y) allocate(nel_fftcd_x( 0:nmrank-1 ), stat=ichkalloc) allocate(nel_fftcd_z( 0:nmrank-1 ), stat=ichkalloc) allocate(nel_fftcd_y( 0:nmrank-1 ), stat=ichkalloc) @@ -6607,7 +6396,9 @@ contains color = MPI_UNDEFINED end if key = myrank_g - if(firstcall) call mpi_comm_split(mpi_ke_world,color,key,mpi_fftcd_xy_world,ierr) +! if(firstcall) call mpi_comm_split(mpi_ke_world,color,key,mpi_fftcd_xy_world,ierr) + if(.not.firstcall) call mpi_comm_free(mpi_fftcd_xy_world, ierr) + call mpi_comm_split(mpi_ke_world,color,key,mpi_fftcd_xy_world,ierr) call mpi_comm_size(mpi_fftcd_xy_world, nrank_fftcd_xy, ierr) call mpi_comm_rank(mpi_fftcd_xy_world, myrank_fftcd_xy, ierr) if (ierr /= 0) then @@ -6633,7 +6424,9 @@ contains color = MPI_UNDEFINED end if key = myrank_g - if(firstcall) call mpi_comm_split(mpi_ke_world,color,key,mpi_fftcd_yz_world,ierr) +! if(firstcall) call mpi_comm_split(mpi_ke_world,color,key,mpi_fftcd_yz_world,ierr) + if(.not.firstcall) call mpi_comm_free(mpi_fftcd_yz_world, ierr) + call mpi_comm_split(mpi_ke_world,color,key,mpi_fftcd_yz_world,ierr) call mpi_comm_size(mpi_fftcd_yz_world, nrank_fftcd_yz, ierr) call mpi_comm_rank(mpi_fftcd_yz_world, myrank_fftcd_yz, ierr) if (ierr /= 0) then @@ -6854,6 +6647,9 @@ contains xyz_fftcd_y(2,3) = wk_y(2,myrank+1) endif + if(allocated(map_fftcd_x)) deallocate(map_fftcd_x) + if(allocated(map_fftcd_z)) deallocate(map_fftcd_z) + if(allocated(map_fftcd_y)) deallocate(map_fftcd_y) allocate(map_fftcd_x( nfft ), stat=ichkalloc) allocate(map_fftcd_z( nfft ), stat=ichkalloc) allocate(map_fftcd_y( nfft ), stat=ichkalloc) @@ -6941,14 +6737,17 @@ contains if ((dim1*dim2) > myrank) then if (wk_x(1,myrank+1) /= 0) then np_fftcd_x = id1*(wk_x(2,myrank+1)-wk_x(1,myrank+1)+1)*(wk_x(4,myrank+1)-wk_x(3,myrank+1)+1) + if(allocated(mp_fftcd_x)) deallocate(mp_fftcd_x) allocate(mp_fftcd_x(np_fftcd_x), stat=ichkalloc) end if if (wk_z(1,myrank+1) /= 0) then np_fftcd_z = (wk_z(4,myrank+1)-wk_z(3,myrank+1)+1)*(wk_z(2,myrank+1)-wk_z(1,myrank+1)+1)*id3 + if(allocated(mp_fftcd_z)) deallocate(mp_fftcd_z) allocate(mp_fftcd_z(np_fftcd_z), stat=ichkalloc) end if if (wk_y(1,myrank+1) /= 0) then np_fftcd_y = (wk_y(4,myrank+1)-wk_y(3,myrank+1)+1)*id2*(wk_y(2,myrank+1)-wk_y(1,myrank+1)+1) + if(allocated(mp_fftcd_y)) deallocate(mp_fftcd_y) allocate(mp_fftcd_y(np_fftcd_y), stat=ichkalloc) end if if(ichkalloc /= 0) then @@ -7043,6 +6842,9 @@ contains deallocate(wk_fft_box) + if(allocated(nel_fftcd_x)) deallocate(nel_fftcd_x) + if(allocated(nel_fftcd_z)) deallocate(nel_fftcd_z) + if(allocated(nel_fftcd_y)) deallocate(nel_fftcd_y) allocate(nel_fftcd_x( 0:nmrank-1 ), stat=ichkalloc) allocate(nel_fftcd_z( 0:nmrank-1 ), stat=ichkalloc) allocate(nel_fftcd_y( 0:nmrank-1 ), stat=ichkalloc) @@ -7095,7 +6897,9 @@ contains color = MPI_UNDEFINED end if key = myrank_g - if(firstcall) call mpi_comm_split(mpi_ke_world,color,key,mpi_fftcd_xz_world,ierr) +! if(firstcall) call mpi_comm_split(mpi_ke_world,color,key,mpi_fftcd_xz_world,ierr) + if(.not.firstcall) call mpi_comm_free(mpi_fftcd_xz_world, ierr) + call mpi_comm_split(mpi_ke_world,color,key,mpi_fftcd_xz_world,ierr) call mpi_comm_size(mpi_fftcd_xz_world, nrank_fftcd_xz, ierr) call mpi_comm_rank(mpi_fftcd_xz_world, myrank_fftcd_xz, ierr) if (ierr /= 0) then @@ -7121,7 +6925,9 @@ contains color = MPI_UNDEFINED end if key = myrank_g - if(firstcall) call mpi_comm_split(mpi_ke_world,color,key,mpi_fftcd_zy_world,ierr) +! if(firstcall) call mpi_comm_split(mpi_ke_world,color,key,mpi_fftcd_zy_world,ierr) + if(.not.firstcall) call mpi_comm_free(mpi_fftcd_zy_world, ierr) + call mpi_comm_split(mpi_ke_world,color,key,mpi_fftcd_zy_world,ierr) call mpi_comm_size(mpi_fftcd_zy_world, nrank_fftcd_zy, ierr) call mpi_comm_rank(mpi_fftcd_zy_world, myrank_fftcd_zy, ierr) if (ierr /= 0) then @@ -8225,6 +8031,14 @@ contains end subroutine m_Parallel_fft_onto_chgq_3D !=============================================================================== + subroutine m_Parallel_fft_onto_chgq_dealloc() + deallocate(fft_chgq_scnt) + deallocate(fft_chgq_rcnt) + deallocate(fft_chgq_dist) + deallocate(fft_chgq_index) + deallocate(fft_chgq_recv) + end subroutine m_Parallel_fft_onto_chgq_dealloc + #ifdef FFT_3D_DIVISION_CD !=============================================================================== subroutine m_Parallel_chgq_onto_fftcd_3D(nfout,kgp,fft_box_size_CD_3D,igfp_l) @@ -8652,6 +8466,13 @@ contains !=============================================================================== #endif !ifdef FFT_3D_DIVISION_CD + subroutine m_Parallel_chgq_onto_fftcd_dealloc() + deallocate(chgq_fftcd_scnt) + deallocate(chgq_fftcd_rcnt) + deallocate(chgq_fftcd_index) + deallocate(chgq_fftcd_dist) + deallocate(chgq_fftcd_recv) + end subroutine m_Parallel_chgq_onto_fftcd_dealloc !=============================================================================== subroutine m_Parallel_fftcd_onto_chgq_3D(nfout,kgp,fft_box_size_CD_3D,igfp_l) @@ -8848,6 +8669,14 @@ contains __TIMER_SUB_STOP(1247) end subroutine m_Parallel_fftcd_onto_chgq_3D + subroutine m_Parallel_fftcd_onto_chgq_dealloc() + deallocate(fftcd_chgq_scnt) + deallocate(fftcd_chgq_rcnt) + deallocate(fftcd_chgq_dist) + deallocate(fftcd_chgq_index) + deallocate(fftcd_chgq_recv) + end subroutine m_Parallel_fftcd_onto_chgq_dealloc + !=============================================================================== subroutine m_Parallel_init_mpi_G_iba2_3D(nfout,ipri,printable,kv3,iba2) @@ -9053,10 +8882,14 @@ contains myrank_natm = mod(mype,nrank_natm) myrank_nrc = mype/nrank_natm - if(firstcall)then + if(.not.firstcall) then + call mpi_comm_free(mpi_natm_world, ierr) + call mpi_comm_free(mpi_nrc_world, ierr) + endif +! if(firstcall)then call mpi_comm_split(MPI_COMM_WORLD,myrank_natm,myrank_nrc, mpi_natm_world,ierr) call mpi_comm_split(MPI_COMM_WORLD,myrank_nrc, myrank_natm,mpi_nrc_world, ierr) - endif +! endif allocate(is_natm (0:nrank_natm-1)) allocate(ie_natm (0:nrank_natm-1)) diff --git a/src_phase_3d/m_UnitCell.f90 b/src_phase_3d/m_UnitCell.f90 index e67f8a6..1b0375a 100644 --- a/src_phase_3d/m_UnitCell.f90 +++ b/src_phase_3d/m_UnitCell.f90 @@ -887,6 +887,10 @@ subroutine m_UnitCell_init_md() integer :: i,j real(kind=DP), dimension(3) :: avec,bvec,cvec if(.not.first_call) return + allocate(p_l(natm,3));p_l = 0.d0 + allocate(p_l_lattice(natm,3));p_l_lattice = 0.d0 + allocate(forc_l_lattice(natm,3));forc_l_lattice = 0.d0 + allocate(p_l_metric(natm,3));p_l_metric = 0.d0 metric = matmul(transpose(altv),altv) call m_UnitCell_init_md_per_step() m_thermo = qmass(1) @@ -897,14 +901,10 @@ subroutine m_UnitCell_init_md() target_temperature = tkb(1)/CONST_kB ! gfree = 3*(natm-1) gfree = 3*(natm) - allocate(p_l(natm,3));p_l = 0.d0 do i=1,natm p_l(i,:) = amion(ityp(i)) * cpd_l(i,:) * s_thermo enddo - allocate(p_l_lattice(natm,3));p_l_lattice = 0.d0 call convert_coords(natm,p_l,altv,p_l_lattice) - allocate(forc_l_lattice(natm,3));forc_l_lattice = 0.d0 - allocate(p_l_metric(natm,3));p_l_metric = 0.d0 call convert_coords(natm,p_l_lattice,metric_inv,p_l_metric) do i=1,3 do j=1,3 @@ -975,9 +975,12 @@ function get_potential_baro() result(ret) end function get_potential_baro subroutine m_UnitCell_init_md_per_step() + real(kind=DP), external :: deter3 + univol = sqrt(deter3(metric)) call inver3n(3,metric,metric_inv) call inver3n(3,altv,altv_inv) call build_dudmetric() + H0 = get_H0() end subroutine m_UnitCell_init_md_per_step function tensor_rank2_equals(t1,t2) result(ret) @@ -1047,6 +1050,28 @@ function m_UC_get_hamiltonian() result(res) res = res-H0 end function m_UC_get_hamiltonian +function get_H0() result (res) + real(kind=DP) :: res + real(kind=DP) :: k_atm,k_baro,k_thermo,pot_baro,pot_thermo + k_atm = get_kinetic_atom(natm,p_l_lattice,metric_inv) + k_baro = get_kinetic_baro(matmul(metric,p_baro)) + pot_baro = get_potential_baro() + if(control_pressure == OFF) then + k_baro = 0.d0 + pot_baro = 0.d0 + endif + if(imdalg == PT_CONTROL .and. t_ctrl_method /= VELOCITY_SCALING)then + k_thermo = get_kinetic_thermo(p_thermo) + pot_thermo = gfree * target_temperature * CONST_kB * log(s_thermo) + else if (imdalg == P_CONTROL .or. (imdalg == PT_CONTROL .and. t_ctrl_method == VELOCITY_SCALING)) then + k_thermo = 0.d0 + pot_thermo = 0.d0 + endif +! res = s_thermo * (k_atm + etotal + k_thermo + pot_thermo + k_baro + pot_baro) + res = k_atm + etotal + k_thermo + pot_thermo + k_baro + pot_baro + return +end function get_H0 + function m_UC_get_curr_pressure() result(res) real(kind=DP) :: curr_pressure, res integer :: i @@ -1071,6 +1096,9 @@ subroutine m_UC_rd_md_cntn_data(nfcntn) logical :: tag_is_found, EOF_reach integer, parameter :: len_str = 132 character(len=len_str) :: str + + call m_UnitCell_init_md() + if(mype==0)then call rewind_to_tag0(nfcntn,len(tag_pressure_control), & & tag_pressure_control, & @@ -1119,7 +1147,9 @@ subroutine m_UC_rd_md_cntn_data(nfcntn) call altv_2_rltv(altv,rltv,univol,rvol) ! in b_CS call change_of_coordinate_system(altv,pos,natm,natm,cps) !-(b_I.S.) pos -> cps call primitive2bravais(nfout,p2bmat,altv(:,1),altv(:,2),altv(:,3),a,b,c,ca,cb,cc,il) ! in b_CS - if(iteration_unit_cell>1) H0_has_been_set = .true. + if(iteration_unit_cell>1) then + H0_has_been_set = .true. + endif endif endif @@ -1411,8 +1441,8 @@ logical function unitcell_can_change() ! return ! endif if(sw_stress_correction==ON)then - unitcell_can_change = .true. - return + unitcell_can_change = .true. + return endif unitcell_can_change = sw_optimize_lattice==ON end function unitcell_can_change diff --git a/src_phase_3d/mdmain.F90 b/src_phase_3d/mdmain.F90 index 8e4c537..4f8beda 100644 --- a/src_phase_3d/mdmain.F90 +++ b/src_phase_3d/mdmain.F90 @@ -1184,7 +1184,7 @@ end program PHASE call m_Ldos_dealloc() call m_ES_wf_extrpl_dealloc() #ifdef FFTW3 -! call m_FFT_finalize() + call m_FFT_finalize() #endif call m_CtrlP_set_init_status(.true.) end subroutine Array_Deallocate