Skip to content

Commit

Permalink
Improvements to the simulated annealing algorithm (#23)
Browse files Browse the repository at this point in the history
  • Loading branch information
matt035343 authored Jul 18, 2023
1 parent 27c8fa2 commit eb5641e
Show file tree
Hide file tree
Showing 3 changed files with 44 additions and 29 deletions.
1 change: 0 additions & 1 deletion anti_clustering/_base.py
Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,6 @@ def run(
num_groups: int,
destination_column: str,
) -> pd.DataFrame:
# pylint: disable = R0913
"""
Run anti clustering algorithm on dataset.
:param df: The dataset to run anti-clustering on.
Expand Down
70 changes: 43 additions & 27 deletions anti_clustering/simulated_annealing_heuristic.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@
# See the License for the specific language governing permissions and
# limitations under the License.
"""
A simulated annealing approach to solving the anti-clustering problem.
A simulated annealing with restarts approach to solving the anti-clustering problem.
"""

import math
Expand All @@ -23,7 +23,7 @@

class SimulatedAnnealingHeuristicAntiClustering(ClusterSwapHeuristic):
"""
A simulated annealing approach to solving the anti-clustering problem.
A simulated annealing with restarts approach to solving the anti-clustering problem.
"""

def __init__(
Expand All @@ -32,13 +32,15 @@ def __init__(
random_seed: int = None,
alpha: float = 0.9,
iterations: int = 2000,
starting_temperature: float = 10,
starting_temperature: float = 100,
restarts: int = 9,
):
# pylint: disable = R0913
super().__init__(verbose=verbose, random_seed=random_seed)
self.alpha = alpha
self.iterations = iterations
self.starting_temperature = starting_temperature
self.restarts = restarts

def _solve(self, distance_matrix: npt.NDArray[float], num_groups: int) -> npt.NDArray[bool]:
# Start with random cluster assignment
Expand All @@ -47,34 +49,48 @@ def _solve(self, distance_matrix: npt.NDArray[float], num_groups: int) -> npt.ND
if self.verbose:
print("Solving")

temperature = self.starting_temperature
# Initial objective value
objective = self._calculate_objective(cluster_assignment, distance_matrix)
for iteration in range(self.iterations):
if self.verbose and iteration % 5 == 0:
print(f"Iteration {iteration + 1} of {self.iterations}")
candidate_solutions = []

# Select random element
i = self.rnd.randint(0, len(distance_matrix) - 1)
# Get possible swaps
possible_exchanges = self._get_exchanges(cluster_assignment, i)
if len(possible_exchanges) == 0:
continue
# Select random possible swap.
j = possible_exchanges[self.rnd.randint(0, len(possible_exchanges) - 1)]
for restart in range(self.restarts):
temperature = self.starting_temperature
# Initial objective value
objective = self._calculate_objective(cluster_assignment, distance_matrix)
for iteration in range(self.iterations):
if self.verbose and iteration % 5 == 0:
print(f"Iteration {iteration + 1} of {self.iterations}")

new_cluster_assignment = self._swap(cluster_assignment, i, j)
new_objective = self._calculate_objective(new_cluster_assignment, distance_matrix)
# Select random element
i = self.rnd.randint(0, len(distance_matrix) - 1)
# Get possible swaps
possible_exchanges = self._get_exchanges(cluster_assignment, i)
if len(possible_exchanges) == 0:
continue
# Select random possible swap.
j = possible_exchanges[self.rnd.randint(0, len(possible_exchanges) - 1)]

# Select solution as current if accepted
if self._accept(new_objective - objective, temperature):
objective = new_objective
cluster_assignment = new_cluster_assignment
new_cluster_assignment = self._swap(cluster_assignment, i, j)
new_objective = self._calculate_objective(new_cluster_assignment, distance_matrix)

# Cool down temperature
temperature = temperature * self.alpha
# Select solution as current if accepted
if self._accept(new_objective - objective, temperature):
objective = new_objective
cluster_assignment = new_cluster_assignment

return cluster_assignment
# Cool down temperature
temperature = temperature * self.alpha

candidate_solutions.append((objective, cluster_assignment))

if self.verbose:
print(f"Restart {restart + 1} of {self.restarts}")

# Cold restart, select random cluster assignment
cluster_assignment = self._get_random_clusters(num_groups=num_groups, num_elements=len(distance_matrix))

# Select best solution, maximizing objective
_, best_cluster_assignment = max(candidate_solutions, key=lambda x: x[0])

return best_cluster_assignment

def _calculate_objective(self, cluster_assignment: npt.NDArray[bool], distance_matrix: npt.NDArray[float]) -> float:
"""
Expand All @@ -87,7 +103,7 @@ def _calculate_objective(self, cluster_assignment: npt.NDArray[bool], distance_m

def _accept(self, delta: float, temperature: float) -> bool:
"""
Simulated annealing acceptance function. Notice d/t is negated because this is a maximisation problem.
Simulated annealing acceptance function. Notice d/t is used instead of -d/t because we are maximizing.
:param delta: Difference in objective
:param temperature: Current temperature
:return: Whether the solution is accepted or not.
Expand Down
2 changes: 1 addition & 1 deletion examples/evaluation.py
Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,7 @@

methods: List[AntiClustering] = [
ExchangeHeuristicAntiClustering(),
SimulatedAnnealingHeuristicAntiClustering(alpha=0.95, iterations=10000, starting_temperature=10000),
SimulatedAnnealingHeuristicAntiClustering(alpha=0.95, iterations=5000, starting_temperature=1000, restarts=15),
NaiveRandomHeuristicAntiClustering(),
ExactClusterEditingAntiClustering(),
]
Expand Down

0 comments on commit eb5641e

Please sign in to comment.