Skip to content

Commit

Permalink
Adding Tabu search as new algorithm (#29)
Browse files Browse the repository at this point in the history
  • Loading branch information
matt035343 authored Jun 13, 2024
1 parent 8c770a9 commit b56fb13
Show file tree
Hide file tree
Showing 5 changed files with 124 additions and 10 deletions.
2 changes: 1 addition & 1 deletion .github/workflows/prepare_release.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -15,4 +15,4 @@ jobs:
- uses: SneaksAndData/github-actions/[email protected]
with:
major_v: 0
minor_v: 3
minor_v: 4
1 change: 1 addition & 0 deletions anti_clustering/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,4 +17,5 @@
from anti_clustering.naive_random_heuristic import NaiveRandomHeuristicAntiClustering
from anti_clustering.exact_cluster_editing import ExactClusterEditingAntiClustering
from anti_clustering.exchange_heuristic import ExchangeHeuristicAntiClustering
from anti_clustering.tabu_search_heuristic import TabuSearchHeuristicAntiClustering
from anti_clustering._base import AntiClustering
4 changes: 2 additions & 2 deletions anti_clustering/_cluster_swap_heuristic.py
Original file line number Diff line number Diff line change
Expand Up @@ -84,8 +84,8 @@ def _get_random_clusters(self, num_groups: int, num_elements: int) -> npt.NDArra
def _calculate_objective(self, cluster_assignment: npt.NDArray[bool], distance_matrix: npt.NDArray[float]) -> float:
"""
Calculate objective value
:param cluster_assignment: Cluster assignment matrix
:param distance_matrix: Cost matrix
:param cluster_assignment: Cluster assignment
:param distance_matrix: Distance matrix
:return: Objective value
"""
return np.multiply(cluster_assignment, distance_matrix).sum()
99 changes: 99 additions & 0 deletions anti_clustering/tabu_search_heuristic.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,99 @@
# Copyright 2022 ECCO Sneaks & Data
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""
A tabu search with restarts approach to solving the anti-clustering problem.
"""

import numpy.typing as npt
from anti_clustering._cluster_swap_heuristic import ClusterSwapHeuristic


class TabuSearchHeuristicAntiClustering(ClusterSwapHeuristic):
"""
A tabu search with restarts approach to solving the anti-clustering problem.
In this version, specific transformations are put in the tabu list not solutions.
"""

def __init__(
self,
verbose: bool = False,
random_seed: int = None,
tabu_tenure: int = 10,
iterations: int = 2000,
restarts: int = 9,
):
# pylint: disable = R0913
super().__init__(verbose=verbose, random_seed=random_seed)
self.tabu_tenure = tabu_tenure
self.iterations = iterations
self.restarts = restarts

def _solve(self, distance_matrix: npt.NDArray[float], num_groups: int) -> npt.NDArray[bool]:
# Start with random cluster assignment
cluster_assignment = self._get_random_clusters(num_groups=num_groups, num_elements=len(distance_matrix))

if self.verbose:
print("Solving")

candidate_solutions = []

for restart in range(self.restarts):
tabu_swaps = []
# Initial objective value
objective = self._calculate_objective(cluster_assignment, distance_matrix)
for iteration in range(self.iterations):
if self.verbose and iteration % 5 == 0:
print(f"Iteration {iteration + 1} of {self.iterations}")

# Select random element
i = self.rnd.randint(0, len(distance_matrix) - 1)

# Get possible swaps
possible_exchanges = [
j
for j in self._get_exchanges(cluster_assignment, i)
if (i, j) not in tabu_swaps and (j, i) not in tabu_swaps
]

if len(possible_exchanges) == 0:
continue

# Generate possible assignments
j = possible_exchanges[self.rnd.randint(0, len(possible_exchanges) - 1)]

# Select random possible swap.
new_cluster_assignment = self._swap(cluster_assignment, i, j)
new_objective = self._calculate_objective(new_cluster_assignment, distance_matrix)

# Select solution as current if it improves the objective value
if new_objective > objective:
cluster_assignment = new_cluster_assignment
objective = new_objective
tabu_swaps.append((i, j))
# Delete oldest tabu swap if tabu list is full
if len(tabu_swaps) > self.tabu_tenure:
tabu_swaps.pop(0)

candidate_solutions.append((objective, cluster_assignment))

if self.verbose:
print(f"Restart {restart + 1} of {self.restarts}")

# Cold restart, select random cluster assignment
cluster_assignment = self._get_random_clusters(num_groups=num_groups, num_elements=len(distance_matrix))

# Select best solution, maximizing objective
_, best_cluster_assignment = max(candidate_solutions, key=lambda x: x[0])

return best_cluster_assignment
28 changes: 21 additions & 7 deletions examples/evaluation.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@
ExchangeHeuristicAntiClustering,
SimulatedAnnealingHeuristicAntiClustering,
NaiveRandomHeuristicAntiClustering,
TabuSearchHeuristicAntiClustering,
ExactClusterEditingAntiClustering,
AntiClustering,
)
Expand All @@ -36,15 +37,18 @@
iris_df = pd.DataFrame(data=iris_data.data, columns=iris_data.feature_names)

methods: List[AntiClustering] = [
TabuSearchHeuristicAntiClustering(iterations=5000, restarts=10, tabu_tenure=50),
ExchangeHeuristicAntiClustering(restarts=20),
SimulatedAnnealingHeuristicAntiClustering(alpha=0.95, iterations=5000, starting_temperature=1000, restarts=20),
NaiveRandomHeuristicAntiClustering(),
ExactClusterEditingAntiClustering(),
# ExactClusterEditingAntiClustering(), # This method is extremely slow for large datasets
]

for method in methods:
for k in range(2, 4):
print(f"Method: {method.__class__.__name__}, clusters: {k}")
for k in range(2, 4):
print(f"------------- Number of clusters: {k} -------------")
summary = []
for method in methods:
print(f"Running method: {method.__class__.__name__}")

start_time = time.time()
df = method.run(
Expand All @@ -63,6 +67,16 @@
# Mean of differences
mean_df = difference_df.reset_index(level=[1]).groupby(["level_1"]).mean()

print(f"∆M: {mean_df.loc['mean'][0]}")
print(f"∆SD: {mean_df.loc['std'][0]}")
print(f"Running time: {time_taken}s")
summary.append(
pd.DataFrame(
{
"Method": [method.__class__.__name__],
"Clusters": [k],
"∆M": [round(mean_df.loc["mean"][0], 4)],
"∆SD": [round(mean_df.loc["std"][0], 4)],
"Time (s)": [time_taken],
}
)
)
print("Summary (lower ∆M and ∆SD is better):")
print(pd.concat(summary).to_string())

0 comments on commit b56fb13

Please sign in to comment.