diff --git a/src/main_gpumd/run.cu b/src/main_gpumd/run.cu index 5f2ccf08b..f0145a9ae 100644 --- a/src/main_gpumd/run.cu +++ b/src/main_gpumd/run.cu @@ -483,6 +483,10 @@ void Run::parse_one_keyword(std::vector& tokens) add_efield.parse(param, num_param, group); } else if (strcmp(param[0], "mc") == 0) { mc.parse_mc(param, num_param, group, atom); + } else if (strcmp(param[0], "mc_minimize") == 0) { + MC_Minimize mc_minimize; + mc_minimize.parse_mc_minimize(param, num_param, group, atom, box, force); + mc_minimize.compute(force, atom, box, group); } else if (strcmp(param[0], "dftd3") == 0) { // nothing here; will be handled elsewhere } else if (strcmp(param[0], "compute_lsqt") == 0) { diff --git a/src/main_gpumd/run.cuh b/src/main_gpumd/run.cuh index 146efcaf0..e7558b993 100644 --- a/src/main_gpumd/run.cuh +++ b/src/main_gpumd/run.cuh @@ -26,6 +26,7 @@ class Measure; #include "force/force.cuh" #include "integrate/integrate.cuh" #include "mc/mc.cuh" +#include "mc_minimize/mc_minimize.cuh" #include "measure/measure.cuh" #include "model/atom.cuh" #include "model/box.cuh" diff --git a/src/makefile b/src/makefile index 30164800c..95fcccda2 100644 --- a/src/makefile +++ b/src/makefile @@ -37,6 +37,7 @@ SOURCES_GPUMD = \ $(wildcard phonon/*.cu) \ $(wildcard integrate/*.cu) \ $(wildcard mc/*.cu) \ + $(wildcard mc_minimize/*.cu) \ $(wildcard force/*.cu) \ $(wildcard measure/*.cu) \ $(wildcard model/*.cu) \ @@ -66,6 +67,7 @@ HEADERS = \ $(wildcard main_gpumd/*.cuh) \ $(wildcard integrate/*.cuh) \ $(wildcard mc/*.cuh) \ + $(wildcard mc_minimize/*.cuh) \ $(wildcard minimize/*.cuh) \ $(wildcard force/*.cuh) \ $(wildcard measure/*.cuh) \ @@ -98,6 +100,8 @@ integrate/%.obj: integrate/%.cu $(HEADERS) $(CC) $(CFLAGS) $(INC) -c $< -o $@ mc/%.obj: mc/%.cu $(HEADERS) $(CC) $(CFLAGS) $(INC) -c $< -o $@ +mc_minimize/%.obj: mc_minimize/%.cu $(HEADERS) + $(CC) $(CFLAGS) $(INC) -c $< -o $@ minimize/%.obj: minimize/%.cu $(HEADERS) $(CC) $(CFLAGS) $(INC) -c $< -o $@ force/%.obj: force/%.cu $(HEADERS) @@ -119,6 +123,8 @@ integrate/%.o: integrate/%.cu $(HEADERS) $(CC) $(CFLAGS) $(INC) -c $< -o $@ mc/%.o: mc/%.cu $(HEADERS) $(CC) $(CFLAGS) $(INC) -c $< -o $@ +mc_minimize/%.o: mc_minimize/%.cu $(HEADERS) + $(CC) $(CFLAGS) $(INC) -c $< -o $@ minimize/%.o: minimize/%.cu $(HEADERS) $(CC) $(CFLAGS) $(INC) -c $< -o $@ force/%.o: force/%.cu $(HEADERS) diff --git a/src/makefile.hip b/src/makefile.hip index 7fcfc6d37..2806e54ad 100644 --- a/src/makefile.hip +++ b/src/makefile.hip @@ -29,6 +29,7 @@ SOURCES_GPUMD = \ $(wildcard phonon/*.cu) \ $(wildcard integrate/*.cu) \ $(wildcard mc/*.cu) \ + $(wildcard mc_minimize/*.cu) \ $(wildcard force/*.cu) \ $(wildcard measure/*.cu) \ $(wildcard model/*.cu) \ @@ -53,6 +54,7 @@ HEADERS = \ $(wildcard main_gpumd/*.cuh) \ $(wildcard integrate/*.cuh) \ $(wildcard mc/*.cuh) \ + $(wildcard mc_minimize/*.cuh) \ $(wildcard minimize/*.cuh) \ $(wildcard force/*.cuh) \ $(wildcard measure/*.cuh) \ @@ -84,6 +86,8 @@ integrate/%.o: integrate/%.cu $(HEADERS) $(CC) $(CFLAGS) $(INC) -c $< -o $@ mc/%.o: mc/%.cu $(HEADERS) $(CC) $(CFLAGS) $(INC) -c $< -o $@ +mc_minimize/%.o: mc_minimize/%.cu $(HEADERS) + $(CC) $(CFLAGS) $(INC) -c $< -o $@ minimize/%.o: minimize/%.cu $(HEADERS) $(CC) $(CFLAGS) $(INC) -c $< -o $@ force/%.o: force/%.cu $(HEADERS) diff --git a/src/mc_minimize/mc_minimize.cu b/src/mc_minimize/mc_minimize.cu new file mode 100644 index 000000000..9e135c115 --- /dev/null +++ b/src/mc_minimize/mc_minimize.cu @@ -0,0 +1,187 @@ +/* + Copyright 2017 Zheyong Fan and GPUMD development team + This file is part of GPUMD. + GPUMD is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + GPUMD is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + You should have received a copy of the GNU General Public License + along with GPUMD. If not, see . +*/ + +#include "mc_minimize.cuh" +#include "utilities/read_file.cuh" +#include "mc_minimizer_local.cuh" +#include "mc_minimizer_global.cuh" +#include "mc_minimizer_test.cuh" + +void MC_Minimize::parse_group( + const char** param, int num_param, std::vector& groups, int num_param_before_group) +{ + if (strcmp(param[num_param_before_group], "group") != 0) { + PRINT_INPUT_ERROR("invalid option for mc.\n"); + } + if (!is_valid_int(param[num_param_before_group + 1], &grouping_method)) { + PRINT_INPUT_ERROR("grouping method of MC_Minimize should be an integer.\n"); + } + if (grouping_method < 0) { + PRINT_INPUT_ERROR("grouping method of MC_Minimize should >= 0.\n"); + } + if (grouping_method >= groups.size()) { + PRINT_INPUT_ERROR("Grouping method should < number of grouping methods."); + } + if (!is_valid_int(param[num_param_before_group + 2], &group_id)) { + PRINT_INPUT_ERROR("group ID of MC_Minimize should be an integer.\n"); + } + if (group_id < 0) { + PRINT_INPUT_ERROR("group ID of MC_Minimize should >= 0.\n"); + } + if (group_id >= groups[grouping_method].number) { + PRINT_INPUT_ERROR("Group ID should < number of groups."); + } +} + + +void MC_Minimize::compute( + Force& force, + Atom& atom, + Box& box, + std::vector& group) +{ + mc_minimizer->compute( + num_trials_mc, + force, + atom, + box, + group, + grouping_method, + group_id); +} + +void MC_Minimize::parse_mc_minimize(const char** param, int num_param, std::vector& group, Atom& atom, Box& box, Force& force) +{ + if (num_param < 6) { + PRINT_INPUT_ERROR("mc_minimize should have at least 5 parameters.\n"); + } + + //0 for local, 1 for global, 2 for test + int mc_minimizer_type = 0; + if (strcmp(param[1], "local") == 0) { + printf("Perform simple MC with local relaxation:\n"); + mc_minimizer_type = 0; + } else if (strcmp(param[1], "global") == 0) { + printf("Perform simple MC with global relaxation:\n"); + mc_minimizer_type = 1; + } else if (strcmp(param[1], "test") == 0) { + printf("Perform simple MC test:\n"); + mc_minimizer_type = 2; + } else { + PRINT_INPUT_ERROR("invalid MC Minimizer type for MC.\n"); + } + if (mc_minimizer_type == 0) { + if (num_param < 7) { + PRINT_INPUT_ERROR("reading error for local relaxation, missing parameter\n"); + } + } + if (mc_minimizer_type == 1) { + if (num_param < 6) { + PRINT_INPUT_ERROR("reading error for global relaxation, missing parameter\n"); + } + } + if (mc_minimizer_type == 2) { + if (num_param < 7) { + PRINT_INPUT_ERROR("reading error for test, missing parameter\n"); + } + } + + //check if num_trials_mc reasonable + if (!is_valid_int(param[2], &num_trials_mc)) { + PRINT_INPUT_ERROR("number of MC trials for MC Minimize should be an integer.\n"); + } + if (num_trials_mc <= 0) { + PRINT_INPUT_ERROR("number of MC trials for MC Minimize should be positive.\n"); + } + + //check if temperature reasonable + if (!is_valid_real(param[3], &temperature)) { + PRINT_INPUT_ERROR("temperature for MC Minimize should be a number.\n"); + } + if (temperature <= 0) { + PRINT_INPUT_ERROR("temperature for MC Minimize should be positive.\n"); + } + + //check if force_tolerance reasonable + if (!is_valid_real(param[4], &force_tolerance)) { + PRINT_INPUT_ERROR("force tolerance for MC Minimize should be a number.\n"); + } + if (force_tolerance <= 0) { + PRINT_INPUT_ERROR("force tolerance for MC Minimize should be positive.\n"); + } + + //check if max nmber of relax reasonable + if (!is_valid_int(param[5], &max_relax_steps)) { + PRINT_INPUT_ERROR("max relaxation steps for MC Minimize should be an integer.\n"); + } + if (max_relax_steps <= 0) { + PRINT_INPUT_ERROR("max relaxation steps for MC Minimize should be positive.\n"); + } + + //check if scale factor reasonable + if (mc_minimizer_type == 0) + { + if (!is_valid_real(param[6], &scale_factor)) { + PRINT_INPUT_ERROR("scale factor for MC Minimize should be a number.\n"); + } + if (scale_factor <= 0) { + PRINT_INPUT_ERROR("scale factor for MC Minimize should be positive.\n"); + } + } + if (mc_minimizer_type == 2) + { + if (!is_valid_real(param[6], &scale_factor)) { + PRINT_INPUT_ERROR("scale factor for MC Minimize should be a number.\n"); + } + if (scale_factor <= 0) { + PRINT_INPUT_ERROR("scale factor for MC Minimize should be positive.\n"); + } + } + + + + int num_param_before_group; + if (mc_minimizer_type == 0) + { + num_param_before_group = 7; + } + if (mc_minimizer_type == 1) + { + num_param_before_group = 6; + } + if (mc_minimizer_type == 2) + { + num_param_before_group = 7; + } + + if (num_param > num_param_before_group) + { + parse_group(param, num_param, group, num_param_before_group); + printf(" only for atoms in group %d of grouping method %d.\n", group_id, grouping_method); + } + + if (mc_minimizer_type == 0) + { + mc_minimizer.reset(new MC_Minimizer_Local(param, num_param, scale_factor, temperature, force_tolerance, max_relax_steps)); + } + if (mc_minimizer_type == 1) + { + mc_minimizer.reset(new MC_Minimizer_Global(param, num_param, temperature, force_tolerance, max_relax_steps)); + } + if (mc_minimizer_type == 2) + { + mc_minimizer.reset(new MC_Minimizer_Test(param, num_param, scale_factor, temperature, force_tolerance, max_relax_steps)); + } +} \ No newline at end of file diff --git a/src/mc_minimize/mc_minimize.cuh b/src/mc_minimize/mc_minimize.cuh new file mode 100644 index 000000000..bd91c86ce --- /dev/null +++ b/src/mc_minimize/mc_minimize.cuh @@ -0,0 +1,52 @@ +/* + Copyright 2017 Zheyong Fan and GPUMD development team + This file is part of GPUMD. + GPUMD is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + GPUMD is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + You should have received a copy of the GNU General Public License + along with GPUMD. If not, see . +*/ +#pragma once + +#include "mc_minimizer.cuh" +#include "force/force.cuh" +#include "model/atom.cuh" +#include "model/box.cuh" +#include "model/group.cuh" +#include +#include + +class MC_Minimize +{ +private: + + void parse_group(const char** param, int num_param, std::vector& groups, int num_param_before_group); + int grouping_method = -1; + int group_id = -1; + + //parameters + int num_trials_mc = 0; + double temperature = 0; + double force_tolerance = 0; + int max_relax_steps = 0; + double scale_factor = 0; + + +public: + void parse_mc_minimize(const char** param, int num_param, std::vector& group, Atom& atom, Box& box, Force& force); + + void compute( + Force& force, + Atom& atom, + Box& box, + std::vector& group); + + std::unique_ptr mc_minimizer; +}; + diff --git a/src/mc_minimize/mc_minimizer.cu b/src/mc_minimize/mc_minimizer.cu new file mode 100644 index 000000000..467a6a5af --- /dev/null +++ b/src/mc_minimize/mc_minimizer.cu @@ -0,0 +1,50 @@ +/* + Copyright 2017 Zheyong Fan and GPUMD development team + This file is part of GPUMD. + GPUMD is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + GPUMD is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + You should have received a copy of the GNU General Public License + along with GPUMD. If not, see . +*/ + +#include "mc_minimizer.cuh" + +MC_Minimizer::MC_Minimizer(const char** param, int num_param) +{ + mc_output.open("mc_minimize.out", std::ios::app); + mc_output << "# "; + for (int n = 0; n < num_param; ++n) { + mc_output << param[n] << " "; + } + mc_output << "\n"; + mc_output << std::defaultfloat << std::setprecision(12); + if (strcmp(param[1], "local") == 0) { + mc_output << "Step" << "\t" << "Maximum displacement" << "\t" << "Average displacement" + << "\t" << "Energy difference" << "\t" << "Accept ratio" << std::endl; + } else if (strcmp(param[1], "global") == 0) { + mc_output << "Step" << "\t" << "Energy before" << "\t" << "Energy after" << "\t" << "Accept ratio" << std::endl; + } else if (strcmp(param[1], "test") == 0) { + mc_output << "Step" << "\t" << "Maximum displacement" << "\t" << "Average displacement" + << "\t" << "Local energy difference" << "\t" << "Global energy difference" << "\t" << "Accept ratio" + << "\t" << "Local probability" << "\t" << "Global probability" << "\t" << "Accelerate ratio" << std::endl; + } + + + +#ifdef DEBUG + rng = std::mt19937(13579); +#else + rng = std::mt19937(std::chrono::system_clock::now().time_since_epoch().count()); +#endif +} + +MC_Minimizer::~MC_Minimizer() +{ + //default destructor +} \ No newline at end of file diff --git a/src/mc_minimize/mc_minimizer.cuh b/src/mc_minimize/mc_minimizer.cuh new file mode 100644 index 000000000..a36e18e81 --- /dev/null +++ b/src/mc_minimize/mc_minimizer.cuh @@ -0,0 +1,48 @@ +/* + Copyright 2017 Zheyong Fan and GPUMD development team + This file is part of GPUMD. + GPUMD is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + GPUMD is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + You should have received a copy of the GNU General Public License + along with GPUMD. If not, see . +*/ + +#pragma once +#include "model/atom.cuh" +#include "model/box.cuh" +#include "force/force.cuh" +#include +#include "minimize/minimizer_fire.cuh" +#include +#include +#include +#include + +class MC_Minimizer +{ +private: + +public: + MC_Minimizer(const char** param, int num_param); + virtual ~MC_Minimizer(); + + virtual void compute( + int trials, + Force& force, + Atom& atom, + Box& box, + std::vector& group, + int grouping_method, + int group_id) = 0; + +protected: + std::mt19937 rng; + std::ofstream mc_output; +}; + diff --git a/src/mc_minimize/mc_minimizer_global.cu b/src/mc_minimize/mc_minimizer_global.cu new file mode 100644 index 000000000..3af43486d --- /dev/null +++ b/src/mc_minimize/mc_minimizer_global.cu @@ -0,0 +1,201 @@ +/* + Copyright 2017 Zheyong Fan and GPUMD development team + This file is part of GPUMD. + GPUMD is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + GPUMD is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + You should have received a copy of the GNU General Public License + along with GPUMD. If not, see . +*/ + +#include "mc_minimizer_global.cuh" + +MC_Minimizer_Global::MC_Minimizer_Global( + const char** param, int num_param, + double temperature_input, + double force_tolerance_input, + int max_relax_steps_input) + : MC_Minimizer(param, num_param) +{ + temperature = temperature_input; + force_tolerance = force_tolerance_input; + max_relax_steps = max_relax_steps_input; +} + +MC_Minimizer_Global::~MC_Minimizer_Global() +{ + //default destructor +} + + +// a kernel with a single thread <<<1, 1>>> +static __global__ void exchange( + const int i, + const int j, + const int type_i, + const int type_j, + int* g_type, + double* g_mass, + double* g_vx, + double* g_vy, + double* g_vz) +{ + g_type[i] = type_j; + g_type[j] = type_i; + + double mass_i = g_mass[i]; + g_mass[i] = g_mass[j]; + g_mass[j] = mass_i; + + double vx_i = g_vx[i]; + g_vx[i] = g_vx[j]; + g_vx[j] = vx_i; + + double vy_i = g_vy[i]; + g_vy[i] = g_vy[j]; + g_vy[j] = vy_i; + + double vz_i = g_vz[i]; + g_vz[i] = g_vz[j]; + g_vz[j] = vz_i; +} + +void MC_Minimizer_Global::compute( + int trials, + Force& force, + Atom& atom, + Box& box, + std::vector& group, + int grouping_method, + int group_id) +{ + //get the swap index + int group_size = + grouping_method >= 0 ? group[grouping_method].cpu_size[group_id] : atom.number_of_atoms; + std::uniform_int_distribution r1(0, group_size - 1); + + int num_accepted = 0; + int N = atom.number_of_atoms; + for (int step = 1; step <= trials; ++step) { + + int i = grouping_method >= 0 + ? group[grouping_method] + .cpu_contents[group[grouping_method].cpu_size_sum[group_id] + r1(rng)] + : r1(rng); + int type_i = atom.cpu_type[i]; + int j = 0, type_j = type_i; + while (type_i == type_j) { + j = grouping_method >= 0 + ? group[grouping_method] + .cpu_contents[group[grouping_method].cpu_size_sum[group_id] + r1(rng)] + : r1(rng); + type_j = atom.cpu_type[j]; + } + //initialize + if (step == 1) + { + energy_last_step = 0; + force.compute( + box, + atom.position_per_atom, + atom.type, + group, + atom.potential_per_atom, + atom.force_per_atom, + atom.virial_per_atom); + std::vector pe_before_cpu(N); + atom.potential_per_atom.copy_to_host(pe_before_cpu.data(), N); + for (int n = 0; n < N; ++n) { + energy_last_step += pe_before_cpu[n]; + } + } + + //construct a copy + Atom atom_copy; + atom_copy.number_of_atoms = N; + atom_copy.mass.resize(N); + atom_copy.type.resize(N); + atom_copy.potential_per_atom.resize(N, 0); + atom_copy.force_per_atom.resize(3 * N, 0); + atom_copy.velocity_per_atom.resize(3 * N, 0); + atom_copy.position_per_atom.resize(3 * N); + atom_copy.virial_per_atom.resize(9 * N, 0); + + atom_copy.mass.copy_from_device(atom.mass.data(), N); + atom_copy.type.copy_from_device(atom.type.data(), N); + atom_copy.position_per_atom.copy_from_device(atom.position_per_atom.data(), 3 * N); + + //calculate the energy after swap + exchange<<<1, 1>>>( + i, + j, + type_i, + type_j, + atom_copy.type.data(), + atom_copy.mass.data(), + atom_copy.velocity_per_atom.data(), + atom_copy.velocity_per_atom.data() + N, + atom_copy.velocity_per_atom.data() + N * 2); + + Minimizer_FIRE minimizer(N, max_relax_steps, force_tolerance); + minimizer.compute( + force, + box, + atom_copy.position_per_atom, + atom_copy.type, + group, + atom_copy.potential_per_atom, + atom_copy.force_per_atom, + atom_copy.virial_per_atom); + double pe_after_total = 0; + std::vector pe_after_cpu(N); + atom_copy.potential_per_atom.copy_to_host(pe_after_cpu.data(), N); + for (int n = 0; n < N; ++n) { + pe_after_total += pe_after_cpu[n]; + } + + double energy_difference = pe_after_total - energy_last_step; + std::uniform_real_distribution r2(0, 1); + float random_number = r2(rng); + double probability = exp(-energy_difference / (K_B * temperature)); + printf("%.8f\n", probability); + + if (random_number < probability) { + ++num_accepted; + + atom.cpu_type[i] = type_j; + atom.cpu_type[j] = type_i; + + auto atom_symbol_i = atom.cpu_atom_symbol[i]; + atom.cpu_atom_symbol[i] = atom.cpu_atom_symbol[j]; + atom.cpu_atom_symbol[j] = atom_symbol_i; + + double mass_i = atom.cpu_mass[i]; + atom.cpu_mass[i] = atom.cpu_mass[j]; + atom.cpu_mass[j] = mass_i; + + atom.position_per_atom.copy_from_device(atom_copy.position_per_atom.data(), 3 * N); + atom.potential_per_atom.copy_from_device(atom_copy.potential_per_atom.data(), N); + + exchange<<<1, 1>>>( + i, + j, + type_i, + type_j, + atom.type.data(), + atom.mass.data(), + atom.velocity_per_atom.data(), + atom.velocity_per_atom.data() + N, + atom.velocity_per_atom.data() + N * 2); + + mc_output << step << "\t" << energy_last_step << "\t" << pe_after_total << "\t" << num_accepted / double(step) << std::endl; + + energy_last_step = pe_after_total; + } + } +} \ No newline at end of file diff --git a/src/mc_minimize/mc_minimizer_global.cuh b/src/mc_minimize/mc_minimizer_global.cuh new file mode 100644 index 000000000..8c419bd56 --- /dev/null +++ b/src/mc_minimize/mc_minimizer_global.cuh @@ -0,0 +1,43 @@ +/* + Copyright 2017 Zheyong Fan and GPUMD development team + This file is part of GPUMD. + GPUMD is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + GPUMD is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + You should have received a copy of the GNU General Public License + along with GPUMD. If not, see . +*/ + +#pragma once + +#include "mc_minimizer.cuh" + +class MC_Minimizer_Global : public MC_Minimizer +{ +private: + double temperature; + double force_tolerance; + double max_relax_steps; + double energy_last_step = 0; +public: + MC_Minimizer_Global(const char** param, int num_param, + double temperature_input, + double force_tolerance_input, + int max_relax_steps_input); + ~MC_Minimizer_Global(); + + virtual void compute( + int trials, + Force& force, + Atom& atom, + Box& box, + std::vector& group, + int grouping_method, + int group_id); +}; + diff --git a/src/mc_minimize/mc_minimizer_local.cu b/src/mc_minimize/mc_minimizer_local.cu new file mode 100644 index 000000000..8b3837148 --- /dev/null +++ b/src/mc_minimize/mc_minimizer_local.cu @@ -0,0 +1,711 @@ +/* + Copyright 2017 Zheyong Fan and GPUMD development team + This file is part of GPUMD. + GPUMD is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + GPUMD is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + You should have received a copy of the GNU General Public License + along with GPUMD. If not, see . +*/ + +#include "mc_minimizer_local.cuh" + +MC_Minimizer_Local::MC_Minimizer_Local( + const char** param, int num_param, + double scale_factor_input, + double temperature_input, + double force_tolerance_input, + int max_relax_steps_input) + : MC_Minimizer(param, num_param) +{ + scale_factor = scale_factor_input; + temperature = temperature_input; + force_tolerance = force_tolerance_input; + max_relax_steps = max_relax_steps_input; +} + +MC_Minimizer_Local::~MC_Minimizer_Local() +{ + //default destructor +} + +namespace +{ +/* +this function can find out the atoms whose distance from two atom centers is within rc_radial +*/ +static __global__ void get_neighbors_of_i_and_j( + const int N, + const Box box, + const int i, + const int j, + const float rc_radial_square, + const double* __restrict__ g_x, + const double* __restrict__ g_y, + const double* __restrict__ g_z, + int* g_NN_ij, + int* g_NL_ij) +{ + int n = blockIdx.x * blockDim.x + threadIdx.x; + if (n < N) { + double x0 = g_x[n]; + double y0 = g_y[n]; + double z0 = g_z[n]; + double x0i = g_x[i] - x0; + double y0i = g_y[i] - y0; + double z0i = g_z[i] - z0; + double x0j = g_x[j] - x0; + double y0j = g_y[j] - y0; + double z0j = g_z[j] - z0; + + apply_mic(box, x0i, y0i, z0i); + float distance_square_i = float(x0i * x0i + y0i * y0i + z0i * z0i); + apply_mic(box, x0j, y0j, z0j); + float distance_square_j = float(x0j * x0j + y0j * y0j + z0j * z0j); + + if (distance_square_i < rc_radial_square) { + g_NL_ij[atomicAdd(g_NN_ij, 1)] = n; + } else { + if (distance_square_j < rc_radial_square) { + g_NL_ij[atomicAdd(g_NN_ij, 1)] = n; + } + } + } +} + +// a kernel with a single thread <<<1, 1>>> +static __global__ void exchange( + const int i, + const int j, + const int type_i, + const int type_j, + int* g_type, + double* g_mass, + double* g_vx, + double* g_vy, + double* g_vz) +{ + g_type[i] = type_j; + g_type[j] = type_i; + + double mass_i = g_mass[i]; + g_mass[i] = g_mass[j]; + g_mass[j] = mass_i; + + double vx_i = g_vx[i]; + g_vx[i] = g_vx[j]; + g_vx[j] = vx_i; + + double vy_i = g_vy[i]; + g_vy[i] = g_vy[j]; + g_vy[j] = vy_i; + + double vz_i = g_vz[i]; + g_vz[i] = g_vz[j]; + g_vz[j] = vz_i; +} + +// find the local index of ij +static __global__ void find_local_ij( + const int i, + const int j, + const int local_N, + int* local_index, + int* local_i, + int* local_j) +{ + int n = blockIdx.x * blockDim.x + threadIdx.x; + if (n < local_N) + { + if (local_index[n] == i) + { + *local_i = n; + } + if (local_index[n] == j) + { + *local_j = n; + } + } +} + +/* +this function is used for label the atoms within the rc_radial, which will return an array of [0, 1, 0, 0......]. +1 stands for inside, and 0 stands for outside. +*/ +static __global__ void get_sphere_atoms( + const int N, + const int local_N, + const int* local_index, + const Box box, + const int i, + const int j, + const float rc_radial_square, + const double* __restrict__ g_x, + const double* __restrict__ g_y, + const double* __restrict__ g_z, + double* local_sphere_flags) +{ + int n = blockIdx.x * blockDim.x + threadIdx.x; + if (n < local_N) { + int index = local_index[n]; + double x0 = g_x[index]; + double y0 = g_y[index]; + double z0 = g_z[index]; + double x0i = g_x[i] - x0; + double y0i = g_y[i] - y0; + double z0i = g_z[i] - z0; + double x0j = g_x[j] - x0; + double y0j = g_y[j] - y0; + double z0j = g_z[j] - z0; + + apply_mic(box, x0i, y0i, z0i); + float distance_square_i = float(x0i * x0i + y0i * y0i + z0i * z0i); + apply_mic(box, x0j, y0j, z0j); + float distance_square_j = float(x0j * x0j + y0j * y0j + z0j * z0j); + + if (distance_square_i < rc_radial_square) { + local_sphere_flags[n] = 1; + local_sphere_flags[n + local_N] = 1; + local_sphere_flags[n + 2 * local_N] = 1; + } else { + if (distance_square_j < rc_radial_square) { + local_sphere_flags[n] = 1; + local_sphere_flags[n + local_N] = 1; + local_sphere_flags[n + 2 * local_N] = 1; + } + } + } +} + +/* +This function is used to mark atoms whose distance from two atom centers is between the range of rcmin and rcmax +*/ +static __global__ void get_outer_atoms( + const int N, + const int local_N, + const int* local_index, + const Box box, + const int i, + const int j, + const float rcmin_radial_square, + const float rcmax_radial_square, + const double* __restrict__ g_x, + const double* __restrict__ g_y, + const double* __restrict__ g_z, + double* outer_atoms_flags) +{ + int n = blockIdx.x * blockDim.x + threadIdx.x; + if (n < local_N) { + int index = local_index[n]; + double x0 = g_x[index]; + double y0 = g_y[index]; + double z0 = g_z[index]; + double x0i = g_x[i] - x0; + double y0i = g_y[i] - y0; + double z0i = g_z[i] - z0; + double x0j = g_x[j] - x0; + double y0j = g_y[j] - y0; + double z0j = g_z[j] - z0; + + apply_mic(box, x0i, y0i, z0i); + float distance_square_i = float(x0i * x0i + y0i * y0i + z0i * z0i); + apply_mic(box, x0j, y0j, z0j); + float distance_square_j = float(x0j * x0j + y0j * y0j + z0j * z0j); + + if (distance_square_i < rcmax_radial_square && distance_square_i > rcmin_radial_square && distance_square_j > rcmin_radial_square) { + outer_atoms_flags[n] += 1; + } else { + if (distance_square_j < rcmax_radial_square && distance_square_j > rcmin_radial_square && distance_square_i > rcmin_radial_square) { + outer_atoms_flags[n] += 1; + } + } + } +} + +//copy from a to b, for position, dimension is 3, for mass, dimension is 1, and so on +template +static __global__ void copy +( + T* a, + T* b, + int* local_index, + int global_N, + int local_N, + int dimension) +{ + int n = blockDim.x * blockIdx.x + threadIdx.x; + if (n < local_N) + { + int index = local_index[n]; + for (int i = 0; i < dimension; i++) + { + b[n + i * local_N] = a[index + i * global_N]; + } + } +} + + +//copy from b to a +template +static __global__ void copy_back +( + T* a, + T* b, + int* local_index, + int global_N, + int local_N, + int dimension) +{ + int n = blockDim.x * blockIdx.x + threadIdx.x; + if (n < local_N) + { + int index = local_index[n]; + for (int i = 0; i < dimension; i++) + { + a[index + i * global_N] = b[n + i * local_N]; + } + } +} + +//sum up +static __global__ void gpu_sum(const int size, double* a, double* result) +{ + int number_of_patches = (size - 1) / 1024 + 1; + int tid = threadIdx.x; + int n, patch; + __shared__ double data[1024]; + data[tid] = 0.0; + for (patch = 0; patch < number_of_patches; ++patch) { + n = tid + patch * 1024; + if (n < size) + data[tid] += a[n]; + } + __syncthreads(); + for (int offset = blockDim.x >> 1; offset > 0; offset >>= 1) { + if (tid < offset) { + data[tid] += data[tid + offset]; + } + __syncthreads(); + } + if (tid == 0) + *result = data[0]; +} + +/* +calculate the displacement for the labelled atomsd +*/ +static __global__ void get_displacement( + const Box box, + double* global_position, + double* local_position, + int global_N, + int local_N, + int* local_index, + double* outer_atoms_flags, + double* displacement) +{ + int n = blockDim.x * blockIdx.x + threadIdx.x; + if (n < local_N) + { + int index = local_index[n]; + double displace = 0; + double r12[3]; + for (int i = 0; i < 3; i++) + { + r12[i] = (global_position[index + i * global_N] - local_position[n + i * local_N]); + } + apply_mic(box, r12[0], r12[1], r12[2]); + displace = r12[0] * r12[0] + r12[1] * r12[1] + r12[2] * r12[2]; + displacement[n] = sqrt(displace) * outer_atoms_flags[n]; + } +} + +//remove the energy of the background atoms +static __global__ void modify_energy( + const int local_N, + double* pe, + double* local_sphere_flags) +{ + int n = blockDim.x * blockIdx.x + threadIdx.x; + if (n < local_N) + { + pe[n] *= local_sphere_flags[n]; + } +} + +//calculate the max value +__global__ void gpu_calculate_max( + const int size, + const int number_of_rounds, + const double* array, + double* max) +{ + const int tid = threadIdx.x; + + __shared__ double s_max[1024]; // Shared memory for max values + s_max[tid] = 0; + + double max_value = 0; // Initialize local max + + for (int round = 0; round < number_of_rounds; ++round) { + const int n = tid + round * 1024; + if (n < size) { + const double f = array[n]; + if (f > max_value) + max_value = f; // Update local max + } + } + + s_max[tid] = max_value; // Write local max to shared memory + __syncthreads(); + + for (int offset = blockDim.x >> 1; offset > 0; offset >>= 1) { + if (tid < offset) { + if (s_max[tid + offset] > s_max[tid]) { + s_max[tid] = s_max[tid + offset]; + } + } + __syncthreads(); + } + + if (tid == 0) { + max[0] = s_max[0]; // Block's final max written to global memory + } +} + +double get_outer_average_displacement( + const Box box, + double* global_position, + double* local_position, + int global_N, + int local_N, + int* local_index, + double* outer_atoms_flags, + double* max) +{ + GPU_Vector displacement(local_N); + get_displacement<<<(local_N - 1) / 64 + 1, 64>>>( + box, + global_position, + local_position, + global_N, + local_N, + local_index, + outer_atoms_flags, + displacement.data()); + CHECK(gpuDeviceSynchronize()); + + //calculate the average displacement + const int number_of_rounds = (local_N - 1) / 1024 + 1; + gpu_calculate_max<<<1, 1024>>>(local_N, number_of_rounds, displacement.data(), max); + + GPU_Vector result(1); + gpu_sum<<<1, 1024>>>(local_N, displacement.data(), result.data()); + GPU_Vector outer_number(1); + gpu_sum<<<1, 1024>>>(local_N, outer_atoms_flags, outer_number.data()); + CHECK(gpuDeviceSynchronize()); + double number; + outer_number.copy_to_host(&number, 1); + double ans; + result.copy_to_host(&ans, 1); + ans /= number; + return ans; +} + +//copy the local structure to previous system +void build_all_atoms( + Atom& atom, + Atom& local_atoms, + int local_N, + int* local_index) +{ + copy_back<<<(local_N - 1) / 64 + 1, 64>>>( + atom.position_per_atom.data(), + local_atoms.position_per_atom.data(), + local_index, + atom.number_of_atoms, + local_N, + 3); + CHECK(gpuDeviceSynchronize()); +} + +//construct a local structure +void build_local_atoms( + Atom& atom, + Atom& local_atoms, + int local_N, + int* local_index) +{ + copy<<<(local_N - 1) / 64 + 1, 64>>>( + atom.mass.data(), + local_atoms.mass.data(), + local_index, + atom.number_of_atoms, + local_N, + 1); + copy<<<(local_N - 1) / 64 + 1, 64>>>( + atom.type.data(), + local_atoms.type.data(), + local_index, + atom.number_of_atoms, + local_N, + 1); + copy<<<(local_N - 1) / 64 + 1, 64>>>( + atom.position_per_atom.data(), + local_atoms.position_per_atom.data(), + local_index, + atom.number_of_atoms, + local_N, + 3); + CHECK(gpuDeviceSynchronize()); +} +} + +//implement the local simple MC +void MC_Minimizer_Local::compute( + int trials, + Force& force, + Atom& atom, + Box& box, + std::vector& group, + int grouping_method, + int group_id) +{ + int group_size = + grouping_method >= 0 ? group[grouping_method].cpu_size[group_id] : atom.number_of_atoms; + std::uniform_int_distribution r1(0, group_size - 1); + + int num_accepted = 0; + float rc_radius = force.potentials[0]->rc; + for (int step = 1; step <= trials; ++step) { + + int i = grouping_method >= 0 + ? group[grouping_method] + .cpu_contents[group[grouping_method].cpu_size_sum[group_id] + r1(rng)] + : r1(rng); + int type_i = atom.cpu_type[i]; + int j = 0, type_j = type_i; + while (type_i == type_j) { + j = grouping_method >= 0 + ? group[grouping_method] + .cpu_contents[group[grouping_method].cpu_size_sum[group_id] + r1(rng)] + : r1(rng); + type_j = atom.cpu_type[j]; + } + + Atom local_atoms; + //find local atoms + GPU_Vector local_i; //the local index of i atom + GPU_Vector local_j; //the local index of j atom + GPU_Vector local_N; //the length of local_index + GPU_Vector local_index; // an array contains the index of the local atoms + GPU_Vector local_sphere_flags; // an array labels the atoms inside shell + local_N.resize(1); + local_i.resize(1); + local_j.resize(1); + local_index.resize(atom.number_of_atoms); + CHECK(gpuMemset(local_N.data(), 0, sizeof(int))); + CHECK(gpuMemset(local_i.data(), 0, sizeof(int))); + CHECK(gpuMemset(local_j.data(), 0, sizeof(int))); + get_neighbors_of_i_and_j<<<(atom.number_of_atoms - 1) / 64 + 1, 64>>>( + atom.number_of_atoms, + box, + i, + j, + rc_radius * rc_radius * (scale_factor + 1.2) * (scale_factor + 1.2), + atom.position_per_atom.data(), + atom.position_per_atom.data() + atom.number_of_atoms, + atom.position_per_atom.data() + atom.number_of_atoms * 2, + local_N.data(), + local_index.data()); + GPU_CHECK_KERNEL + + int local_N_cpu; + local_N.copy_to_host(&local_N_cpu); + local_sphere_flags.resize(local_N_cpu * 3); + local_sphere_flags.fill(0); + + find_local_ij<<<(local_N_cpu - 1) / 64 + 1, 64>>>( + i, + j, + local_N_cpu, + local_index.data(), + local_i.data(), + local_j.data()); + int local_i_cpu; + int local_j_cpu; + local_i.copy_to_host(&local_i_cpu); + local_j.copy_to_host(&local_j_cpu); + + //get sphere atoms + get_sphere_atoms<<<(local_N_cpu - 1) / 64 + 1, 64>>>( + atom.number_of_atoms, + local_N_cpu, + local_index.data(), + box, + i, + j, + rc_radius * rc_radius * scale_factor * scale_factor, + atom.position_per_atom.data(), + atom.position_per_atom.data() + atom.number_of_atoms, + atom.position_per_atom.data() + atom.number_of_atoms * 2, + local_sphere_flags.data()); + + //build the local_atoms + local_atoms.number_of_atoms = local_N_cpu; + local_atoms.position_per_atom.resize(3 * local_N_cpu); + local_atoms.force_per_atom.resize(3 * local_N_cpu); + local_atoms.type.resize(local_N_cpu); + local_atoms.velocity_per_atom.resize(3 * local_N_cpu); + local_atoms.mass.resize(local_N_cpu); + local_atoms.potential_per_atom.resize(local_N_cpu); + local_atoms.virial_per_atom.resize(9 * local_N_cpu); + + //initialize + local_atoms.force_per_atom.fill(0); + local_atoms.potential_per_atom.fill(0); + local_atoms.velocity_per_atom.fill(0); + local_atoms.virial_per_atom.fill(0); + + build_local_atoms( + atom, + local_atoms, + local_N_cpu, + local_index.data()); + + //calculate the origin energy, WARNING: here the group is not correct but now this variable will not be used in compute() + //need to modify here + force.potentials[0]->N2 = local_N_cpu; + force.compute(box, + local_atoms.position_per_atom, + local_atoms.type, + group, + local_atoms.potential_per_atom, + local_atoms.force_per_atom, + local_atoms.virial_per_atom); + modify_energy<<<(local_N_cpu - 1) / 64 + 1, 64>>>( + local_N_cpu, + local_atoms.potential_per_atom.data(), + local_sphere_flags.data()); + + std::vector pe_before_cpu(local_N_cpu); + local_atoms.potential_per_atom.copy_to_host(pe_before_cpu.data(), local_N_cpu); + + exchange<<<1, 1>>>( + local_i_cpu, + local_j_cpu, + type_i, + type_j, + local_atoms.type.data(), + local_atoms.mass.data(), + local_atoms.velocity_per_atom.data(), + local_atoms.velocity_per_atom.data() + local_atoms.number_of_atoms, + local_atoms.velocity_per_atom.data() + local_atoms.number_of_atoms * 2); + + Minimizer_FIRE Minimizer( + local_N_cpu, + max_relax_steps, + force_tolerance); + + Minimizer.compute_label_atoms( + force, + box, + local_atoms.position_per_atom, + local_atoms.type, + group, + local_sphere_flags, + local_atoms.potential_per_atom, + local_atoms.force_per_atom, + local_atoms.virial_per_atom); + + std::vector pe_after_cpu(local_N_cpu); + local_atoms.potential_per_atom.copy_to_host(pe_after_cpu.data(), local_N_cpu); + double pe_before_total = 0; + double pe_after_total = 0; + for (int n = 0; n < local_N_cpu; ++n) { + pe_before_total += pe_before_cpu[n]; + } + for (int n = 0; n < local_N_cpu; ++n) { + pe_after_total += pe_after_cpu[n]; + } + //printf(" energy before swapping = %g.10 eV.\n", pe_before_total); + //printf(" energy after swapping = %g.10 eV.\n", pe_after_total); + double energy_difference = pe_after_total - pe_before_total; + std::uniform_real_distribution r2(0, 1); + float random_number = r2(rng); + double probability = exp(-energy_difference / (K_B * temperature)); + + if (random_number < probability) { + ++num_accepted; + + atom.cpu_type[i] = type_j; + atom.cpu_type[j] = type_i; + + auto atom_symbol_i = atom.cpu_atom_symbol[i]; + atom.cpu_atom_symbol[i] = atom.cpu_atom_symbol[j]; + atom.cpu_atom_symbol[j] = atom_symbol_i; + + double mass_i = atom.cpu_mass[i]; + atom.cpu_mass[i] = atom.cpu_mass[j]; + atom.cpu_mass[j] = mass_i; + + //get the output data + GPU_Vector outer_atoms_flags(local_N_cpu); + outer_atoms_flags.fill(0); + get_outer_atoms<<<(local_N_cpu - 1) / 64 + 1, 64>>>( + atom.number_of_atoms, + local_N_cpu, + local_index.data(), + box, + i, + j, + rc_radius * rc_radius * (scale_factor - 1) * (scale_factor - 1), + rc_radius * rc_radius * scale_factor * scale_factor, + atom.position_per_atom.data(), + atom.position_per_atom.data() + atom.number_of_atoms, + atom.position_per_atom.data() + atom.number_of_atoms * 2, + outer_atoms_flags.data()); + CHECK(gpuDeviceSynchronize()); + + GPU_Vector max_displacement(1); + double average_displacement = get_outer_average_displacement( + box, + atom.position_per_atom.data(), + local_atoms.position_per_atom.data(), + atom.number_of_atoms, + local_atoms.number_of_atoms, + local_index.data(), + outer_atoms_flags.data(), + max_displacement.data()); + double max_value; + max_displacement.copy_to_host(&max_value); + + mc_output<< step << "\t" << max_value << "\t" << average_displacement + << "\t" << energy_difference << "\t" << num_accepted / double(step) << std::endl; + + //copy the relaxed local structure to the global structure + build_all_atoms( + atom, + local_atoms, + local_N_cpu, + local_index.data()); + exchange<<<1, 1>>>( + i, + j, + type_i, + type_j, + atom.type.data(), + atom.mass.data(), + atom.velocity_per_atom.data(), + atom.velocity_per_atom.data() + atom.number_of_atoms, + atom.velocity_per_atom.data() + atom.number_of_atoms * 2); + } + //need modification + force.potentials[0]->N2 = atom.number_of_atoms; + } +} \ No newline at end of file diff --git a/src/mc_minimize/mc_minimizer_local.cuh b/src/mc_minimize/mc_minimizer_local.cuh new file mode 100644 index 000000000..be14daf9b --- /dev/null +++ b/src/mc_minimize/mc_minimizer_local.cuh @@ -0,0 +1,44 @@ +/* + Copyright 2017 Zheyong Fan and GPUMD development team + This file is part of GPUMD. + GPUMD is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + GPUMD is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + You should have received a copy of the GNU General Public License + along with GPUMD. If not, see . +*/ + +#pragma once + +#include "mc_minimizer.cuh" + +class MC_Minimizer_Local : public MC_Minimizer +{ +private: + double scale_factor; + double temperature; + double force_tolerance; + int max_relax_steps; +public: + MC_Minimizer_Local(const char** param, int num_param, + double scale_factor_input, + double temperature_input, + double force_tolerance_input, + int max_relax_steps_input); + ~MC_Minimizer_Local(); + + virtual void compute( + int trials, + Force& force, + Atom& atom, + Box& box, + std::vector& group, + int grouping_method, + int group_id); +}; + diff --git a/src/mc_minimize/mc_minimizer_test.cu b/src/mc_minimize/mc_minimizer_test.cu new file mode 100644 index 000000000..af6ac62d7 --- /dev/null +++ b/src/mc_minimize/mc_minimizer_test.cu @@ -0,0 +1,807 @@ +/* + Copyright 2017 Zheyong Fan and GPUMD development team + This file is part of GPUMD. + GPUMD is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + GPUMD is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + You should have received a copy of the GNU General Public License + along with GPUMD. If not, see . +*/ + +#include "mc_minimizer_test.cuh" + +MC_Minimizer_Test::MC_Minimizer_Test( + const char** param, int num_param, + double scale_factor_input, + double temperature_input, + double force_tolerance_input, + int max_relax_steps_input) + : MC_Minimizer(param, num_param) +{ + scale_factor = scale_factor_input; + temperature = temperature_input; + force_tolerance = force_tolerance_input; + max_relax_steps = max_relax_steps_input; +} + +MC_Minimizer_Test::~MC_Minimizer_Test() +{ + //default destructor +} + +namespace +{ +/* +this function can find out the atoms whose distance from two atom centers is within rc_radial +*/ +static __global__ void get_neighbors_of_i_and_j( + const int N, + const Box box, + const int i, + const int j, + const float rc_radial_square, + const double* __restrict__ g_x, + const double* __restrict__ g_y, + const double* __restrict__ g_z, + int* g_NN_ij, + int* g_NL_ij) +{ + int n = blockIdx.x * blockDim.x + threadIdx.x; + if (n < N) { + double x0 = g_x[n]; + double y0 = g_y[n]; + double z0 = g_z[n]; + double x0i = g_x[i] - x0; + double y0i = g_y[i] - y0; + double z0i = g_z[i] - z0; + double x0j = g_x[j] - x0; + double y0j = g_y[j] - y0; + double z0j = g_z[j] - z0; + + apply_mic(box, x0i, y0i, z0i); + float distance_square_i = float(x0i * x0i + y0i * y0i + z0i * z0i); + apply_mic(box, x0j, y0j, z0j); + float distance_square_j = float(x0j * x0j + y0j * y0j + z0j * z0j); + + if (distance_square_i < rc_radial_square) { + g_NL_ij[atomicAdd(g_NN_ij, 1)] = n; + } else { + if (distance_square_j < rc_radial_square) { + g_NL_ij[atomicAdd(g_NN_ij, 1)] = n; + } + } + } +} + +// a kernel with a single thread <<<1, 1>>> +static __global__ void exchange( + const int i, + const int j, + const int type_i, + const int type_j, + int* g_type, + double* g_mass, + double* g_vx, + double* g_vy, + double* g_vz) +{ + g_type[i] = type_j; + g_type[j] = type_i; + + double mass_i = g_mass[i]; + g_mass[i] = g_mass[j]; + g_mass[j] = mass_i; + + double vx_i = g_vx[i]; + g_vx[i] = g_vx[j]; + g_vx[j] = vx_i; + + double vy_i = g_vy[i]; + g_vy[i] = g_vy[j]; + g_vy[j] = vy_i; + + double vz_i = g_vz[i]; + g_vz[i] = g_vz[j]; + g_vz[j] = vz_i; +} + +// find the local index of ij +static __global__ void find_local_ij( + const int i, + const int j, + const int local_N, + int* local_index, + int* local_i, + int* local_j) +{ + int n = blockIdx.x * blockDim.x + threadIdx.x; + if (n < local_N) + { + if (local_index[n] == i) + { + *local_i = n; + } + if (local_index[n] == j) + { + *local_j = n; + } + } +} + +/* +this function is used for label the atoms within the rc_radial, which will return an array of [0, 1, 0, 0......]. +1 stands for inside, and 0 stands for outside. +*/ +static __global__ void get_sphere_atoms( + const int N, + const int local_N, + const int* local_index, + const Box box, + const int i, + const int j, + const float rc_radial_square, + const double* __restrict__ g_x, + const double* __restrict__ g_y, + const double* __restrict__ g_z, + double* local_sphere_flags) +{ + int n = blockIdx.x * blockDim.x + threadIdx.x; + if (n < local_N) { + int index = local_index[n]; + double x0 = g_x[index]; + double y0 = g_y[index]; + double z0 = g_z[index]; + double x0i = g_x[i] - x0; + double y0i = g_y[i] - y0; + double z0i = g_z[i] - z0; + double x0j = g_x[j] - x0; + double y0j = g_y[j] - y0; + double z0j = g_z[j] - z0; + + apply_mic(box, x0i, y0i, z0i); + float distance_square_i = float(x0i * x0i + y0i * y0i + z0i * z0i); + apply_mic(box, x0j, y0j, z0j); + float distance_square_j = float(x0j * x0j + y0j * y0j + z0j * z0j); + + if (distance_square_i < rc_radial_square) { + local_sphere_flags[n] = 1; + local_sphere_flags[n + local_N] = 1; + local_sphere_flags[n + 2 * local_N] = 1; + } else { + if (distance_square_j < rc_radial_square) { + local_sphere_flags[n] = 1; + local_sphere_flags[n + local_N] = 1; + local_sphere_flags[n + 2 * local_N] = 1; + } + } + } +} + +/* +This function is used to mark atoms whose distance from two atom centers is between the range of rcmin and rcmax +*/ +static __global__ void get_outer_atoms( + const int N, + const int local_N, + const int* local_index, + const Box box, + const int i, + const int j, + const float rcmin_radial_square, + const float rcmax_radial_square, + const double* __restrict__ g_x, + const double* __restrict__ g_y, + const double* __restrict__ g_z, + double* outer_atoms_flags) +{ + int n = blockIdx.x * blockDim.x + threadIdx.x; + if (n < local_N) { + int index = local_index[n]; + double x0 = g_x[index]; + double y0 = g_y[index]; + double z0 = g_z[index]; + double x0i = g_x[i] - x0; + double y0i = g_y[i] - y0; + double z0i = g_z[i] - z0; + double x0j = g_x[j] - x0; + double y0j = g_y[j] - y0; + double z0j = g_z[j] - z0; + + apply_mic(box, x0i, y0i, z0i); + float distance_square_i = float(x0i * x0i + y0i * y0i + z0i * z0i); + apply_mic(box, x0j, y0j, z0j); + float distance_square_j = float(x0j * x0j + y0j * y0j + z0j * z0j); + + if (distance_square_i < rcmax_radial_square && distance_square_i > rcmin_radial_square && distance_square_j > rcmin_radial_square) { + outer_atoms_flags[n] += 1; + } else { + if (distance_square_j < rcmax_radial_square && distance_square_j > rcmin_radial_square && distance_square_i > rcmin_radial_square) { + outer_atoms_flags[n] += 1; + } + } + } +} + +//copy from a to b, for position, dimension is 3, for mass, dimension is 1, and so on +template +static __global__ void copy +( + T* a, + T* b, + int* local_index, + int global_N, + int local_N, + int dimension) +{ + int n = blockDim.x * blockIdx.x + threadIdx.x; + if (n < local_N) + { + int index = local_index[n]; + for (int i = 0; i < dimension; i++) + { + b[n + i * local_N] = a[index + i * global_N]; + } + } +} + + +//copy from b to a +template +static __global__ void copy_back +( + T* a, + T* b, + int* local_index, + int global_N, + int local_N, + int dimension) +{ + int n = blockDim.x * blockIdx.x + threadIdx.x; + if (n < local_N) + { + int index = local_index[n]; + for (int i = 0; i < dimension; i++) + { + a[index + i * global_N] = b[n + i * local_N]; + } + } +} + +//sum up +static __global__ void gpu_sum(const int size, double* a, double* result) +{ + int number_of_patches = (size - 1) / 1024 + 1; + int tid = threadIdx.x; + int n, patch; + __shared__ double data[1024]; + data[tid] = 0.0; + for (patch = 0; patch < number_of_patches; ++patch) { + n = tid + patch * 1024; + if (n < size) + data[tid] += a[n]; + } + __syncthreads(); + for (int offset = blockDim.x >> 1; offset > 0; offset >>= 1) { + if (tid < offset) { + data[tid] += data[tid + offset]; + } + __syncthreads(); + } + if (tid == 0) + *result = data[0]; +} + +/* +calculate the displacement for the labelled atomsd +*/ +static __global__ void get_displacement( + const Box box, + double* global_position, + double* local_position, + int global_N, + int local_N, + int* local_index, + double* outer_atoms_flags, + double* displacement) +{ + int n = blockDim.x * blockIdx.x + threadIdx.x; + if (n < local_N) + { + int index = local_index[n]; + double displace = 0; + double r12[3]; + for (int i = 0; i < 3; i++) + { + r12[i] = (global_position[index + i * global_N] - local_position[n + i * local_N]); + } + apply_mic(box, r12[0], r12[1], r12[2]); + displace = r12[0] * r12[0] + r12[1] * r12[1] + r12[2] * r12[2]; + displacement[n] = sqrt(displace) * outer_atoms_flags[n]; + } +} + +//remove the energy of the background atoms +static __global__ void modify_energy( + const int local_N, + double* pe, + double* local_sphere_flags) +{ + int n = blockDim.x * blockIdx.x + threadIdx.x; + if (n < local_N) + { + pe[n] *= local_sphere_flags[n]; + } +} + +//calculate the max value +__global__ void gpu_calculate_max( + const int size, + const int number_of_rounds, + const double* array, + double* max) +{ + const int tid = threadIdx.x; + + __shared__ double s_max[1024]; // Shared memory for max values + s_max[tid] = 0; + + double max_value = 0; // Initialize local max + + for (int round = 0; round < number_of_rounds; ++round) { + const int n = tid + round * 1024; + if (n < size) { + const double f = array[n]; + if (f > max_value) + max_value = f; // Update local max + } + } + + s_max[tid] = max_value; // Write local max to shared memory + __syncthreads(); + + for (int offset = blockDim.x >> 1; offset > 0; offset >>= 1) { + if (tid < offset) { + if (s_max[tid + offset] > s_max[tid]) { + s_max[tid] = s_max[tid + offset]; + } + } + __syncthreads(); + } + + if (tid == 0) { + max[0] = s_max[0]; // Block's final max written to global memory + } +} + +double get_outer_average_displacement( + const Box box, + double* global_position, + double* local_position, + int global_N, + int local_N, + int* local_index, + double* outer_atoms_flags, + double* max) +{ + GPU_Vector displacement(local_N); + get_displacement<<<(local_N - 1) / 64 + 1, 64>>>( + box, + global_position, + local_position, + global_N, + local_N, + local_index, + outer_atoms_flags, + displacement.data()); + CHECK(gpuDeviceSynchronize()); + + //calculate the average displacement + const int number_of_rounds = (local_N - 1) / 1024 + 1; + gpu_calculate_max<<<1, 1024>>>(local_N, number_of_rounds, displacement.data(), max); + + GPU_Vector result(1); + gpu_sum<<<1, 1024>>>(local_N, displacement.data(), result.data()); + GPU_Vector outer_number(1); + gpu_sum<<<1, 1024>>>(local_N, outer_atoms_flags, outer_number.data()); + CHECK(gpuDeviceSynchronize()); + double number; + outer_number.copy_to_host(&number, 1); + double ans; + result.copy_to_host(&ans, 1); + ans /= number; + return ans; +} + +//copy the local structure to previous system +void build_all_atoms( + Atom& atom, + Atom& local_atoms, + int local_N, + int* local_index) +{ + copy_back<<<(local_N - 1) / 64 + 1, 64>>>( + atom.position_per_atom.data(), + local_atoms.position_per_atom.data(), + local_index, + atom.number_of_atoms, + local_N, + 3); + CHECK(gpuDeviceSynchronize()); +} + +//construct a local structure +void build_local_atoms( + Atom& atom, + Atom& local_atoms, + int local_N, + int* local_index) +{ + copy<<<(local_N - 1) / 64 + 1, 64>>>( + atom.mass.data(), + local_atoms.mass.data(), + local_index, + atom.number_of_atoms, + local_N, + 1); + copy<<<(local_N - 1) / 64 + 1, 64>>>( + atom.type.data(), + local_atoms.type.data(), + local_index, + atom.number_of_atoms, + local_N, + 1); + copy<<<(local_N - 1) / 64 + 1, 64>>>( + atom.position_per_atom.data(), + local_atoms.position_per_atom.data(), + local_index, + atom.number_of_atoms, + local_N, + 3); + CHECK(gpuDeviceSynchronize()); +} +} + +//implement the test +void MC_Minimizer_Test::compute( + int trials, + Force& force, + Atom& atom, + Box& box, + std::vector& group, + int grouping_method, + int group_id) +{ + int group_size = + grouping_method >= 0 ? group[grouping_method].cpu_size[group_id] : atom.number_of_atoms; + std::uniform_int_distribution r1(0, group_size - 1); + + int num_accepted = 0; + float rc_radius = force.potentials[0]->rc; + + //Construct an atom object and perform independent global relaxation calculations to compare with local relaxation. The same atoms are exchanged between global and local relaxation in each iteration. + Atom atom_global; + int N = atom.number_of_atoms; + atom_global.number_of_atoms = N; + atom_global.mass.resize(N); + atom_global.type.resize(N); + atom_global.potential_per_atom.resize(N, 0); + atom_global.force_per_atom.resize(3 * N, 0); + atom_global.velocity_per_atom.resize(3 * N, 0); + atom_global.position_per_atom.resize(3 * N); + atom_global.virial_per_atom.resize(9 * N, 0); + + atom_global.mass.copy_from_device(atom.mass.data(), N); + atom_global.type.copy_from_device(atom.type.data(), N); + atom_global.position_per_atom.copy_from_device(atom.position_per_atom.data(), 3 * N); + + for (int step = 1; step <= trials; ++step) { + + int i = grouping_method >= 0 + ? group[grouping_method] + .cpu_contents[group[grouping_method].cpu_size_sum[group_id] + r1(rng)] + : r1(rng); + int type_i = atom.cpu_type[i]; + int j = 0, type_j = type_i; + while (type_i == type_j) { + j = grouping_method >= 0 + ? group[grouping_method] + .cpu_contents[group[grouping_method].cpu_size_sum[group_id] + r1(rng)] + : r1(rng); + type_j = atom.cpu_type[j]; + } + + //measure the running time + auto start_time_global = std::chrono::high_resolution_clock::now(); + //calculate the global relaxation energy after swap + force.potentials[0]->N2 = atom.number_of_atoms; + Atom atom_global_copy; + atom_global_copy.number_of_atoms = N; + atom_global_copy.mass.resize(N); + atom_global_copy.type.resize(N); + atom_global_copy.potential_per_atom.resize(N, 0); + atom_global_copy.force_per_atom.resize(3 * N, 0); + atom_global_copy.velocity_per_atom.resize(3 * N, 0); + atom_global_copy.position_per_atom.resize(3 * N); + atom_global_copy.virial_per_atom.resize(9 * N, 0); + + atom_global_copy.mass.copy_from_device(atom_global.mass.data(), N); + atom_global_copy.type.copy_from_device(atom_global.type.data(), N); + atom_global_copy.position_per_atom.copy_from_device(atom_global.position_per_atom.data(), 3 * N); + //calculate the global energy before swap + force.compute( + box, + atom_global.position_per_atom, + atom_global.type, + group, + atom_global.potential_per_atom, + atom_global.force_per_atom, + atom_global.virial_per_atom); + double pe_before_total_global = 0; + std::vector pe_before_cpu_global(N); + atom_global.potential_per_atom.copy_to_host(pe_before_cpu_global.data(), N); + //calculate the energy after swap + exchange<<<1, 1>>>( + i, + j, + type_i, + type_j, + atom_global_copy.type.data(), + atom_global_copy.mass.data(), + atom_global_copy.velocity_per_atom.data(), + atom_global_copy.velocity_per_atom.data() + N, + atom_global_copy.velocity_per_atom.data() + N * 2); + + Minimizer_FIRE minimizer_compare_global(N, max_relax_steps, force_tolerance); + minimizer_compare_global.compute( + force, + box, + atom_global_copy.position_per_atom, + atom_global_copy.type, + group, + atom_global_copy.potential_per_atom, + atom_global_copy.force_per_atom, + atom_global_copy.virial_per_atom); + double pe_after_total_global = 0; + std::vector pe_after_cpu_global(N); + atom_global_copy.potential_per_atom.copy_to_host(pe_after_cpu_global.data(), N); + for (int n = 0; n < N; ++n) { + pe_after_total_global += pe_after_cpu_global[n]; + pe_before_total_global += pe_before_cpu_global[n]; + } + auto end_time_global = std::chrono::high_resolution_clock::now(); + auto duration_global = std::chrono::duration_cast(end_time_global - start_time_global).count(); + //measure the local running time + auto start_time_local = std::chrono::high_resolution_clock::now(); + Atom local_atoms; + //find local atoms + GPU_Vector local_i; //the local index of i atom + GPU_Vector local_j; //the local index of j atom + GPU_Vector local_N; //the length of local_index + GPU_Vector local_index; // an array contains the index of the local atoms + GPU_Vector local_sphere_flags; // an array labels the atoms inside shell + local_N.resize(1); + local_i.resize(1); + local_j.resize(1); + local_index.resize(atom.number_of_atoms); + CHECK(gpuMemset(local_N.data(), 0, sizeof(int))); + CHECK(gpuMemset(local_i.data(), 0, sizeof(int))); + CHECK(gpuMemset(local_j.data(), 0, sizeof(int))); + get_neighbors_of_i_and_j<<<(atom.number_of_atoms - 1) / 64 + 1, 64>>>( + atom.number_of_atoms, + box, + i, + j, + rc_radius * rc_radius * (scale_factor + 1.2) * (scale_factor + 1.2), + atom.position_per_atom.data(), + atom.position_per_atom.data() + atom.number_of_atoms, + atom.position_per_atom.data() + atom.number_of_atoms * 2, + local_N.data(), + local_index.data()); + GPU_CHECK_KERNEL + + int local_N_cpu; + local_N.copy_to_host(&local_N_cpu); + local_sphere_flags.resize(local_N_cpu * 3); + local_sphere_flags.fill(0); + + find_local_ij<<<(local_N_cpu - 1) / 64 + 1, 64>>>( + i, + j, + local_N_cpu, + local_index.data(), + local_i.data(), + local_j.data()); + int local_i_cpu; + int local_j_cpu; + local_i.copy_to_host(&local_i_cpu); + local_j.copy_to_host(&local_j_cpu); + + //get sphere atoms + get_sphere_atoms<<<(local_N_cpu - 1) / 64 + 1, 64>>>( + atom.number_of_atoms, + local_N_cpu, + local_index.data(), + box, + i, + j, + rc_radius * rc_radius * scale_factor * scale_factor, + atom.position_per_atom.data(), + atom.position_per_atom.data() + atom.number_of_atoms, + atom.position_per_atom.data() + atom.number_of_atoms * 2, + local_sphere_flags.data()); + + //build the local_atoms + local_atoms.number_of_atoms = local_N_cpu; + local_atoms.position_per_atom.resize(3 * local_N_cpu); + local_atoms.force_per_atom.resize(3 * local_N_cpu); + local_atoms.type.resize(local_N_cpu); + local_atoms.velocity_per_atom.resize(3 * local_N_cpu); + local_atoms.mass.resize(local_N_cpu); + local_atoms.potential_per_atom.resize(local_N_cpu); + local_atoms.virial_per_atom.resize(9 * local_N_cpu); + + //initialize + local_atoms.force_per_atom.fill(0); + local_atoms.potential_per_atom.fill(0); + local_atoms.velocity_per_atom.fill(0); + local_atoms.virial_per_atom.fill(0); + + build_local_atoms( + atom, + local_atoms, + local_N_cpu, + local_index.data()); + + //calculate the origin energy, WARNING: here the group is not correct but now this variable will not be used in compute() + //need to modify here + force.potentials[0]->N2 = local_N_cpu; + force.compute(box, + local_atoms.position_per_atom, + local_atoms.type, + group, + local_atoms.potential_per_atom, + local_atoms.force_per_atom, + local_atoms.virial_per_atom); + modify_energy<<<(local_N_cpu - 1) / 64 + 1, 64>>>( + local_N_cpu, + local_atoms.potential_per_atom.data(), + local_sphere_flags.data()); + + std::vector pe_before_cpu(local_N_cpu); + local_atoms.potential_per_atom.copy_to_host(pe_before_cpu.data(), local_N_cpu); + + exchange<<<1, 1>>>( + local_i_cpu, + local_j_cpu, + type_i, + type_j, + local_atoms.type.data(), + local_atoms.mass.data(), + local_atoms.velocity_per_atom.data(), + local_atoms.velocity_per_atom.data() + local_atoms.number_of_atoms, + local_atoms.velocity_per_atom.data() + local_atoms.number_of_atoms * 2); + + Minimizer_FIRE Minimizer( + local_N_cpu, + max_relax_steps, + force_tolerance); + + Minimizer.compute_label_atoms( + force, + box, + local_atoms.position_per_atom, + local_atoms.type, + group, + local_sphere_flags, + local_atoms.potential_per_atom, + local_atoms.force_per_atom, + local_atoms.virial_per_atom); + + std::vector pe_after_cpu(local_N_cpu); + local_atoms.potential_per_atom.copy_to_host(pe_after_cpu.data(), local_N_cpu); + double pe_before_total = 0; + double pe_after_total = 0; + for (int n = 0; n < local_N_cpu; ++n) { + pe_before_total += pe_before_cpu[n]; + } + for (int n = 0; n < local_N_cpu; ++n) { + pe_after_total += pe_after_cpu[n]; + } + auto end_time_local = std::chrono::high_resolution_clock::now(); + auto duration_local = std::chrono::duration_cast(end_time_local - start_time_local).count(); + + //determine if change or not + //printf(" energy before swapping = %g.10 eV.\n", pe_before_total); + //printf(" energy after swapping = %g.10 eV.\n", pe_after_total); + double energy_difference = pe_after_total - pe_before_total; + std::uniform_real_distribution r2(0, 1); + float random_number = r2(rng); + double probability = exp(-energy_difference / (K_B * temperature)); + + //get the output data + GPU_Vector outer_atoms_flags(local_N_cpu); + outer_atoms_flags.fill(0); + get_outer_atoms<<<(local_N_cpu - 1) / 64 + 1, 64>>>( + atom.number_of_atoms, + local_N_cpu, + local_index.data(), + box, + i, + j, + rc_radius * rc_radius * (scale_factor - 1) * (scale_factor - 1), + rc_radius * rc_radius * scale_factor * scale_factor, + atom.position_per_atom.data(), + atom.position_per_atom.data() + atom.number_of_atoms, + atom.position_per_atom.data() + atom.number_of_atoms * 2, + outer_atoms_flags.data()); + CHECK(gpuDeviceSynchronize()); + + GPU_Vector max_displacement(1); + double average_displacement = get_outer_average_displacement( + box, + atom.position_per_atom.data(), + local_atoms.position_per_atom.data(), + atom.number_of_atoms, + local_atoms.number_of_atoms, + local_index.data(), + outer_atoms_flags.data(), + max_displacement.data()); + double max_value; + max_displacement.copy_to_host(&max_value); + double energy_difference_global = pe_after_total_global - pe_before_total_global; + double probability_global = exp(-energy_difference_global / (K_B * temperature)); + double accelerate_ratio = (double)(duration_global - duration_local) / duration_global; + mc_output<< step << "\t" << max_value << "\t" << average_displacement + << "\t" << energy_difference << "\t" << energy_difference_global << "\t" << num_accepted / double(step) + << "\t" << probability << "\t" << probability_global << "\t" << accelerate_ratio << std::endl; + + if (random_number < probability) { + ++num_accepted; + + atom.cpu_type[i] = type_j; + atom.cpu_type[j] = type_i; + + auto atom_symbol_i = atom.cpu_atom_symbol[i]; + atom.cpu_atom_symbol[i] = atom.cpu_atom_symbol[j]; + atom.cpu_atom_symbol[j] = atom_symbol_i; + + double mass_i = atom.cpu_mass[i]; + atom.cpu_mass[i] = atom.cpu_mass[j]; + atom.cpu_mass[j] = mass_i; + + atom_global.position_per_atom.copy_from_device(atom_global_copy.position_per_atom.data(), 3*N); + exchange<<<1, 1>>>( + i, + j, + type_i, + type_j, + atom_global.type.data(), + atom_global.mass.data(), + atom_global.velocity_per_atom.data(), + atom_global.velocity_per_atom.data() + atom_global.number_of_atoms, + atom_global.velocity_per_atom.data() + atom_global.number_of_atoms * 2); + + //copy the relaxed local structure to the global structure + build_all_atoms( + atom, + local_atoms, + local_N_cpu, + local_index.data()); + exchange<<<1, 1>>>( + i, + j, + type_i, + type_j, + atom.type.data(), + atom.mass.data(), + atom.velocity_per_atom.data(), + atom.velocity_per_atom.data() + atom.number_of_atoms, + atom.velocity_per_atom.data() + atom.number_of_atoms * 2); + } + } +} \ No newline at end of file diff --git a/src/mc_minimize/mc_minimizer_test.cuh b/src/mc_minimize/mc_minimizer_test.cuh new file mode 100644 index 000000000..5b1bd60d1 --- /dev/null +++ b/src/mc_minimize/mc_minimizer_test.cuh @@ -0,0 +1,44 @@ +/* + Copyright 2017 Zheyong Fan and GPUMD development team + This file is part of GPUMD. + GPUMD is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + GPUMD is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + You should have received a copy of the GNU General Public License + along with GPUMD. If not, see . +*/ + +#pragma once + +#include "mc_minimizer.cuh" + +class MC_Minimizer_Test : public MC_Minimizer +{ +private: + double scale_factor; + double temperature; + double force_tolerance; + int max_relax_steps; +public: + MC_Minimizer_Test(const char** param, int num_param, + double scale_factor_input, + double temperature_input, + double force_tolerance_input, + int max_relax_steps_input); + ~MC_Minimizer_Test(); + + virtual void compute( + int trials, + Force& force, + Atom& atom, + Box& box, + std::vector& group, + int grouping_method, + int group_id); +}; + diff --git a/src/minimize/minimizer_fire.cu b/src/minimize/minimizer_fire.cu index 9784030c9..e6c25ce03 100644 --- a/src/minimize/minimizer_fire.cu +++ b/src/minimize/minimizer_fire.cu @@ -179,4 +179,88 @@ void Minimizer_FIRE::compute( } printf("Energy minimization finished.\n"); -} \ No newline at end of file +} + +void Minimizer_FIRE::compute_label_atoms( + Force& force, + Box& box, + GPU_Vector& position_per_atom, + GPU_Vector& type, + std::vector& group, + GPU_Vector& local_flags, + GPU_Vector& potential_per_atom, + GPU_Vector& force_per_atom, + GPU_Vector& virial_per_atom) +{ + double next_dt; + const int size = number_of_atoms_ * 3; + //int base = (number_of_steps_ >= 10) ? (number_of_steps_ / 10) : 1; + // create a velocity vector in GPU + GPU_Vector v(size, 0); + GPU_Vector temp1(size); + GPU_Vector temp2(size); + + //printf("\nEnergy minimization started.\n"); + + for (int step = 0; step < number_of_steps_; ++step) { + force.compute( + box, position_per_atom, type, group, potential_per_atom, force_per_atom, virial_per_atom); + pairwise_product(force_per_atom, local_flags, force_per_atom); + pairwise_product(potential_per_atom, local_flags, potential_per_atom); + calculate_force_square_max(force_per_atom); + const double force_max = sqrt(cpu_force_square_max_[0]); + calculate_total_potential(potential_per_atom); + + /*if (step % base == 0 || force_max < force_tolerance_) { + printf( + " step %d: total_potential = %.10f eV, f_max = %.10f eV/A.\n", + step, + cpu_total_potential_[0], + force_max); + */ + if (force_max < force_tolerance_) + break; + + P = dot(v, force_per_atom); + + if (P > 0) { + if (N_neg > N_min) { + next_dt = dt * f_inc; + if (next_dt < dt_max) + dt = next_dt; + alpha *= f_alpha; + } + N_neg++; + } else { + next_dt = dt * f_dec; + if (next_dt > dt_min) + dt = next_dt; + alpha = alpha_start; + // move position back + scalar_multiply(-0.5 * dt, v, temp1); + pairwise_product(temp1, local_flags, temp1); + vector_sum(position_per_atom, temp1, position_per_atom); + v.fill(0); + N_neg = 0; + } + + // md step + // implicit Euler integration + double F_modulus = sqrt(dot(force_per_atom, force_per_atom)); + double v_modulus = sqrt(dot(v, v)); + // dv = F/m*dt + scalar_multiply(dt / m, force_per_atom, temp2); + vector_sum(v, temp2, v); + pairwise_product(v, local_flags, v); + scalar_multiply(1 - alpha, v, temp1); + scalar_multiply(alpha * v_modulus / F_modulus, force_per_atom, temp2); + vector_sum(temp1, temp2, v); + pairwise_product(v, local_flags, v); + // dx = v*dt + scalar_multiply(dt, v, temp1); + pairwise_product(temp1, local_flags, temp1); + vector_sum(position_per_atom, temp1, position_per_atom); + } + + //printf("Energy minimization finished.\n"); +} diff --git a/src/minimize/minimizer_fire.cuh b/src/minimize/minimizer_fire.cuh index a01cd8f90..79faeb2b9 100644 --- a/src/minimize/minimizer_fire.cuh +++ b/src/minimize/minimizer_fire.cuh @@ -50,4 +50,21 @@ public: GPU_Vector& potential_per_atom, GPU_Vector& force_per_atom, GPU_Vector& virial_per_atom); + + +/* +the additional array, local_flags is needed. The length of the array should be same with atom total number. +With 1 means need to update and 0 means keep static +This is currently dedicated to the mc_minimize module +*/ + void compute_label_atoms( + Force& force, + Box& box, + GPU_Vector& position_per_atom, + GPU_Vector& type, + std::vector& group, + GPU_Vector& local_flags, + GPU_Vector& potential_per_atom, + GPU_Vector& force_per_atom, + GPU_Vector& virial_per_atom); }; \ No newline at end of file