Skip to content

Commit

Permalink
Refactored parallel benchmarking code.
Browse files Browse the repository at this point in the history
  • Loading branch information
rphes committed Jul 24, 2018
1 parent 09a128a commit 710d770
Showing 1 changed file with 31 additions and 42 deletions.
73 changes: 31 additions & 42 deletions examples/benchmark_parallel.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,64 +18,53 @@
M = 10
# no. of numerical dimensions
MN = 5
# max no. of cores to run on
C = 4

data = np.random.randint(1, 1000, (max(N_kproto, N_kmodes), M))


def kprototypes():
# Draw a seed, so both jobs converge in an equal amount of iterations
seed = np.random.randint(np.iinfo(np.int32).max)

start = time.time()
KPrototypes(n_clusters=K, init='Huang', n_init=4, verbose=2,
def _kprototypes(k, n_init, n_jobs, seed):
KPrototypes(n_clusters=k, init='Huang', n_init=n_init, n_jobs=n_jobs,
random_state=seed) \
.fit(data[:N_kproto, :], categorical=list(range(M - MN, M)))
single = time.time() - start
print('Finished 4 runs on 1 thread in {:.2f} seconds'.format(single))

np.random.seed(seed)
start = time.time()
KPrototypes(n_clusters=K, init='Huang', n_init=4, n_jobs=4, verbose=2,
random_state=seed) \
.fit(data[:N_kproto, :], categorical=list(range(M - MN, M)))
multi = time.time() - start
print('Finished 4 runs on 4 threads in {:.2f} seconds'.format(multi))

return single, multi
def _kmodes(k, n_init, n_jobs, seed):
KModes(n_clusters=k, init='Huang', n_init=n_init, n_jobs=n_jobs,
random_state=seed) \
.fit(data[:N_kmodes, :])


def kmodes():
def run(task, stop):
# Draw a seed, so both jobs converge in an equal amount of iterations
seed = np.random.randint(np.iinfo(np.int32).max)
baseline = 0

start = time.time()
KModes(n_clusters=K, init='Huang', n_init=4, verbose=2,
random_state=seed).fit(data[:N_kmodes, :])
single = time.time() - start
print('Finished 4 runs on 1 thread in {:.2f} seconds'.format(single))

start = time.time()
KModes(n_clusters=K, init='Huang', n_init=4, n_jobs=4, verbose=2,
random_state=seed).fit(data[:N_kmodes, :])
multi = time.time() - start
print('Finished 4 runs on 4 threads in {:.2f} seconds'.format(multi))
for n_jobs in range(1, stop + 1):
print('Starting runs on {} core(s)'.format(n_jobs))
t_start = time.time()
task(K, stop, n_jobs, seed)
runtime = time.time() - t_start

return single, multi
if n_jobs == 1:
baseline = runtime
print('Finished {} runs on 1 core in {:.2f} seconds'.format(stop, runtime))
else:
print('Finished {} runs on {} cores in {:.2f} seconds, a {:.1f}x '
'speed-up'.format(stop, n_jobs, runtime, baseline / runtime))


if __name__ == '__main__':
print('Starting K-Prototypes on 1 and on 4 threads for {} clusters with {}'
' points of {} features'.format(K, N_kproto, M))
res_kproto = kprototypes()
print('Starting K-Modes on 1 and on 4 threads for {} clusters with {}'
' points of {} features'.format(K, N_kmodes, M))
res_kmodes = kmodes()
print()
print('K-Protoypes took {:.2f} s for 1 thread and {:.2f} s for 4 threads:'
' a {:.1f}x speed-up'.format(res_kproto[0], res_kproto[1],
res_kproto[0] / res_kproto[1]))
print('K-Modes took {:.2f} s for 1 thread and {:.2f} s for 4 threads:'
' a {:.1f}x speed-up'.format(res_kmodes[0], res_kmodes[1],
res_kmodes[0] / res_kmodes[1]))
print('Running K-Prototypes on 1 to {} cores for {} initialization tries '
'of {} clusters with {}' ' points of {} features'.format(
C, C, K, N_kproto, M))
run(_kprototypes, C)

print('\nRunning K-Modes on 1 to {} cores for {} initialization tries '
'of {} clusters with {}' ' points of {} features'.format(
C, C, K, N_kmodes, M))
run(_kmodes, C)



0 comments on commit 710d770

Please sign in to comment.