Skip to content

Commit

Permalink
fixed setup.py to not include full-path of .c files
Browse files Browse the repository at this point in the history
  • Loading branch information
joshloyal committed Apr 10, 2020
1 parent 86eb2da commit 38f392f
Show file tree
Hide file tree
Showing 6 changed files with 260 additions and 14 deletions.
78 changes: 78 additions & 0 deletions dynetlsm/datasets/samples_generator.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@

__all__ = ['network_from_dynamic_latent_space',
'simple_splitting_dynamic_network',
'synthetic_static_community_dynamic_network',
'synthetic_dynamic_network']


Expand Down Expand Up @@ -205,6 +206,83 @@ def simple_splitting_dynamic_network(n_nodes=120, n_time_steps=9,
return Y, z


def synthetic_static_community_dynamic_network(
n_nodes=100, n_time_steps=5, n_groups=6,
intercept=1.0, lmbda=0.8, sticky_const=20.,
sigma_shape=6, sigma_scale=20,
random_state=42):
rng = check_random_state(random_state)

# group locations
mus = np.array([[-3, 0],
[3, 0],
[-1.5, 0],
[1.5, 0],
[0, 2.0],
[0, -2.0]])

if n_groups > 6:
raise ValueError("Only a maximum of six groups allowed for now.")

# group spread
sigmas = np.sqrt(1. / rng.gamma(shape=sigma_shape, scale=sigma_scale,
size=n_groups))

# sample initial distribution
w0 = rng.dirichlet(np.repeat(10, n_groups)) # E[p] = 1 / n_groups

# set-up transition distribution
with np.errstate(divide='ignore'):
wt = 1. / pairwise_distances(mus)

# only took necessary groups
wt = wt[:n_groups][:, :n_groups]
diag_indices = np.diag_indices_from(wt)
wt[diag_indices] = 0
wt[diag_indices] = sticky_const * np.max(wt, axis=1)
wt /= wt.sum(axis=1).reshape(-1, 1)

# run data generating process
X, z = [], []

# t = 0
z0 = rng.choice(np.arange(n_groups), p=w0, size=n_nodes)
X0 = np.zeros((n_nodes, 2), dtype=np.float64)
for group_id in range(n_groups):
group_count = np.sum(z0 == group_id)
X0[z0 == group_id, :] = (sigmas[group_id] * rng.randn(group_count, 2) +
mus[group_id])
X.append(X0)
z.append(z0)

for t in range(1, n_time_steps):
zt = np.zeros(n_nodes, dtype=np.int)
for group_id in range(n_groups):
group_mask = z[t - 1] == group_id
zt[group_mask] = rng.choice(np.arange(n_groups), p=wt[group_id, :],
size=np.sum(group_mask))

Xt = np.zeros((n_nodes, 2), dtype=np.float64)
for group_id in range(n_groups):
group_mask = zt == group_id
group_count = np.sum(group_mask)
Xt[group_mask, :] = (
sigmas[group_id] * rng.randn(group_count, 2) + (
lmbda * mus[group_id] + (1 - lmbda) * X[t-1][group_mask, :])
)

X.append(Xt)
z.append(zt)

X = np.stack(X, axis=0)
z = np.vstack(z)

Y, _ = network_from_dynamic_latent_space(X, intercept=intercept,
random_state=rng)

return Y, X, z, intercept


def synthetic_dynamic_network(n_nodes=120, n_time_steps=9,
intercept=1.0, lmbda=0.8, sticky_const=20.,
sigma_shape=6, sigma_scale=20, is_directed=False,
Expand Down
10 changes: 6 additions & 4 deletions examples/GoT.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,9 @@
"""
Runs the analysis of the GoT character interactions network found in the
paper 'A Bayesian nonparametric latent space approach to modeling evolving
communities in dynamic networks' by Joshua Loyal and Yuguo Chen
"""

from dynetlsm import DynamicNetworkHDPLPCM
from dynetlsm.datasets import load_got
from dynetlsm.plots import (
Expand All @@ -7,10 +13,6 @@
)


# Runs the analysis of the GoT character interactions network found in the
# paper "A Bayesian nonparametric latent space approach to modeling evolving
# "communities in dynamic networks" by Joshua Loyal and Yuguo Chen

# Load GoT character interaction networks
Y, names = load_got(seasons=[1,2,3,4], weight_min=10)

Expand Down
162 changes: 162 additions & 0 deletions examples/homogeneous_simulation.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,162 @@
import glob
import os

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

from sklearn.utils import check_random_state
from sklearn.metrics import adjusted_rand_score

from dynetlsm import DynamicNetworkHDPLPCM
from dynetlsm.datasets import synthetic_static_community_dynamic_network
from dynetlsm.model_selection.approx_bic import calculate_cluster_counts
from dynetlsm.metrics import variation_of_information
from dynetlsm.network_statistics import density, modularity


def counts_per_time_step(z):
n_time_steps = z.shape[0]
group_counts = np.zeros(n_time_steps, dtype=np.int)
for t in range(n_time_steps):
group_counts[t] = np.unique(z[t]).shape[0]

return group_counts

def posterior_per_time_step(model):
n_time_steps = model.Y_fit_.shape[0]
probas = np.zeros((n_time_steps, model.n_components + 1))
for t in range(n_time_steps):
freq = model.posterior_group_counts_[t]
index = model.posterior_group_ids_[t]
probas[t, index] = freq / freq.sum()

return probas


def benchmark_single(n_iter=10000, burn=5000, tune=1000,
outfile_name='benchmark',
random_state=None):
random_state = check_random_state(random_state)

# generate simulated networks
Y, X, z, intercept = synthetic_static_community_dynamic_network(
n_time_steps=6, n_nodes=120, random_state=random_state)

# fit HDP-LPCM
model = DynamicNetworkHDPLPCM(n_iter=n_iter,
burn=burn,
tune=tune,
tune_interval=1000,
is_directed=False,
selection_type='vi',
n_components=10,
random_state=random_state).fit(Y)

# MAP: number of clusters per time point
map_counts = counts_per_time_step(model.z_)

# Posterior group count probabilities
probas = posterior_per_time_step(model)

# create dataframe of results
results = pd.DataFrame(probas)
results['map_counts'] = map_counts

# goodness-of-fit metrics for MAP
results['insample_auc'] = model.auc_
results['vi'] = variation_of_information(z.ravel(), model.z_.ravel())

# time average VI
vi = 0.
for t in range(Y.shape[0]):
vi += variation_of_information(z[t], model.z_[t])
results['vi_avg'] = vi / Y.shape[0]

results['rand_index'] = adjusted_rand_score(z.ravel(), model.z_.ravel())

# time average rand
adj_rand = 0.
for t in range(Y.shape[0]):
adj_rand += adjusted_rand_score(z[t], model.z_[t])
results['rand_avg'] = adj_rand / Y.shape[0]

# info about simulated networks
results['modularity'] = modularity(Y, z)
results['density'] = density(Y)

results.to_csv(outfile_name, index=False)


# NOTE: This is meant to be run in parallel on a computer cluster!
n_reps = 50
out_dir = 'results'

# create a directory to store the results
if not os.path.exists('results'):
os.mkdir(out_dir)

for i in range(n_reps):
benchmark_single(n_iter=35000, burn=10000, tune=5000, random_state=i,
outfile_name=os.path.join(
out_dir, 'benchmark_{}.csv'.format(i)))

# calculate median metric values
n_time_steps = 6
n_groups = 10

n_files = len(glob.glob('results/*'))
stat_names = ['insample_auc', 'vi_avg', 'rand_avg']
data = np.zeros((n_files, len(stat_names)))
for i, file_name in enumerate(glob.glob('results/*')):
df = pd.read_csv(file_name)
data[i] = df.loc[0, stat_names].values

data = pd.DataFrame(data, columns=stat_names)
print('Median Metrics:')
print(data.median(axis=0))
print('Metrics SD:')
print(data.std(axis=0))

# plot posterior boxplots
data = {'probas': [], 'cluster_number': [], 't': []}
for file_name in glob.glob('results/*'):
df = pd.read_csv(file_name)
for t in range(n_time_steps):
for i in range(1, n_groups):
data['probas'].append(df.iloc[t, i])
data['cluster_number'].append(i)
data['t'].append(t + 1)

data = pd.DataFrame(data)

plt.rc('font', family='sans-serif', size=16)
g = sns.catplot(x='cluster_number', y='probas', col='t',
col_wrap=3, kind='box', data=data)

for ax in g.axes:
ax.set_ylabel('posterior probability')
ax.set_xlabel('# of groups')

g.fig.tight_layout()

plt.savefig('cluster_posterior.png', dpi=300)

# clear figure
plt.clf()

# plot selected number of groups for each simulation
data = np.zeros((n_time_steps, n_groups), dtype=np.int)
for sim_id, file_name in enumerate(glob.glob('results/*')):
df = pd.read_csv(file_name)
for t in range(n_time_steps):
data[t, df.iloc[t, n_groups + 1] - 1] +=1

data = pd.DataFrame(data, columns=range(1, n_groups + 1), index=range(1, n_time_steps + 1))
mask = data.values == 0

g = sns.heatmap(data, annot=True, cmap="Blues", cbar=False, mask=mask)
g.set_xlabel('# of groups')
g.set_ylabel('t')
plt.savefig('num_clusters.png', dpi=300)
10 changes: 6 additions & 4 deletions examples/military_alliances.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,9 @@
"""
Runs the analysis of the military alliances network found in the
paper 'A Bayesian nonparametric latent space approach to modeling evolving
communities in dynamic networks' by Joshua Loyal and Yuguo Chen
"""

from dynetlsm import DynamicNetworkHDPLPCM
from dynetlsm.datasets import load_alliances
from dynetlsm.plots import (
Expand All @@ -7,10 +13,6 @@
)


# Runs the analysis of the military alliances network found in the
# paper "A Bayesian nonparametric latent space approach to modeling evolving
# "communities in dynamic networks" by Joshua Loyal and Yuguo Chen

# Load military alliances networks
Y, names = load_alliances()

Expand Down
10 changes: 6 additions & 4 deletions examples/sampson_monks.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,9 @@
"""
Runs the analysis of the Sampson's monastery network found in the
paper 'A Bayesian nonparametric latent space approach to modeling evolving
communities in dynamic networks' by Joshua Loyal and Yuguo Chen
"""

from dynetlsm import DynamicNetworkHDPLPCM
from dynetlsm.datasets import load_monks
from dynetlsm.plots import (
Expand All @@ -8,10 +14,6 @@
)


# Runs the analysis of the Sampson's monastery network found in the
# paper "A Bayesian nonparametric latent space approach to modeling evolving
# "communities in dynamic networks" by Joshua Loyal and Yuguo Chen

# Load Sampson's monastery network
Y, labels, names = load_monks(dynamic=True, is_directed=False)

Expand Down
4 changes: 2 additions & 2 deletions setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -84,7 +84,7 @@ def get_sources():
for name in os.listdir(src_path):
path = os.path.join(src_path, name)
if os.path.isfile(path) and path.endswith(".c"):
files.append(path)
files.append(os.path.relpath(path))

return files

Expand Down Expand Up @@ -119,7 +119,7 @@ def make_extension(ext_name, macros=[]):
include_dirs = [get_include] + include_dirs
return Extension(
mod_name,
sources=[ext_path] + get_sources(),
sources=[os.path.relpath(ext_path)] + get_sources(),
include_dirs=include_dirs,
extra_compile_args=["-O3", "-Wall", "-fPIC"],
define_macros=macros)
Expand Down

0 comments on commit 38f392f

Please sign in to comment.