fixed setup.py to not include full-path of .c files

joshloyal · Apr 10, 2020 · 38f392f · 38f392f
1 parent 86eb2da
commit 38f392f
Show file tree

Hide file tree

Showing 6 changed files with 260 additions and 14 deletions.
diff --git a/dynetlsm/datasets/samples_generator.py b/dynetlsm/datasets/samples_generator.py
@@ -17,6 +17,7 @@
 
 __all__ = ['network_from_dynamic_latent_space',
            'simple_splitting_dynamic_network',
+           'synthetic_static_community_dynamic_network',
            'synthetic_dynamic_network']
 
 
@@ -205,6 +206,83 @@ def simple_splitting_dynamic_network(n_nodes=120, n_time_steps=9,
     return Y, z
 
 
+def synthetic_static_community_dynamic_network(
+        n_nodes=100, n_time_steps=5, n_groups=6,
+        intercept=1.0, lmbda=0.8, sticky_const=20.,
+        sigma_shape=6, sigma_scale=20,
+        random_state=42):
+    rng = check_random_state(random_state)
+
+    # group locations
+    mus = np.array([[-3, 0],
+                    [3, 0],
+                    [-1.5, 0],
+                    [1.5, 0],
+                    [0, 2.0],
+                    [0, -2.0]])
+
+    if n_groups > 6:
+        raise ValueError("Only a maximum of six groups allowed for now.")
+
+    # group spread
+    sigmas = np.sqrt(1. / rng.gamma(shape=sigma_shape, scale=sigma_scale,
+                                    size=n_groups))
+
+    # sample initial distribution
+    w0 = rng.dirichlet(np.repeat(10, n_groups))  # E[p] = 1 / n_groups
+
+    # set-up transition distribution
+    with np.errstate(divide='ignore'):
+        wt = 1. / pairwise_distances(mus)
+
+    # only took necessary groups
+    wt = wt[:n_groups][:, :n_groups]
+    diag_indices = np.diag_indices_from(wt)
+    wt[diag_indices] = 0
+    wt[diag_indices] = sticky_const * np.max(wt, axis=1)
+    wt /= wt.sum(axis=1).reshape(-1, 1)
+
+    # run data generating process
+    X, z = [], []
+
+    # t = 0
+    z0 = rng.choice(np.arange(n_groups), p=w0, size=n_nodes)
+    X0 = np.zeros((n_nodes, 2), dtype=np.float64)
+    for group_id in range(n_groups):
+        group_count = np.sum(z0 == group_id)
+        X0[z0 == group_id, :] = (sigmas[group_id] * rng.randn(group_count, 2) +
+                                    mus[group_id])
+    X.append(X0)
+    z.append(z0)
+
+    for t in range(1, n_time_steps):
+        zt = np.zeros(n_nodes, dtype=np.int)
+        for group_id in range(n_groups):
+            group_mask = z[t - 1] == group_id
+            zt[group_mask] = rng.choice(np.arange(n_groups), p=wt[group_id, :],
+                                        size=np.sum(group_mask))
+
+        Xt = np.zeros((n_nodes, 2), dtype=np.float64)
+        for group_id in range(n_groups):
+            group_mask = zt == group_id
+            group_count = np.sum(group_mask)
+            Xt[group_mask, :] = (
+                sigmas[group_id] * rng.randn(group_count, 2) + (
+                    lmbda * mus[group_id] + (1 - lmbda) * X[t-1][group_mask, :])
+            )
+
+        X.append(Xt)
+        z.append(zt)
+
+    X = np.stack(X, axis=0)
+    z = np.vstack(z)
+
+    Y, _ = network_from_dynamic_latent_space(X, intercept=intercept,
+                                             random_state=rng)
+
+    return Y, X, z, intercept
+
+
 def synthetic_dynamic_network(n_nodes=120, n_time_steps=9,
                               intercept=1.0, lmbda=0.8, sticky_const=20.,
                               sigma_shape=6, sigma_scale=20, is_directed=False,

diff --git a/examples/GoT.py b/examples/GoT.py
@@ -1,3 +1,9 @@
+"""
+Runs the analysis of the GoT character interactions network found in the
+paper 'A Bayesian nonparametric latent space approach to modeling evolving
+communities in dynamic networks' by Joshua Loyal and Yuguo Chen
+"""
+
 from dynetlsm import DynamicNetworkHDPLPCM
 from dynetlsm.datasets import load_got
 from dynetlsm.plots import (
@@ -7,10 +13,6 @@
 )
 
 
-# Runs the analysis of the GoT character interactions network found in the
-# paper "A Bayesian nonparametric latent space approach to modeling evolving
-# "communities in dynamic networks" by Joshua Loyal and Yuguo Chen
-
 # Load GoT character interaction networks
 Y, names = load_got(seasons=[1,2,3,4], weight_min=10)
 

diff --git a/examples/homogeneous_simulation.py b/examples/homogeneous_simulation.py
@@ -0,0 +1,162 @@
+import glob
+import os
+
+import pandas as pd
+import numpy as np
+import matplotlib.pyplot as plt
+import seaborn as sns
+
+from sklearn.utils import check_random_state
+from sklearn.metrics import adjusted_rand_score
+
+from dynetlsm import DynamicNetworkHDPLPCM
+from dynetlsm.datasets import synthetic_static_community_dynamic_network
+from dynetlsm.model_selection.approx_bic import calculate_cluster_counts
+from dynetlsm.metrics import variation_of_information
+from dynetlsm.network_statistics import density, modularity
+
+
+def counts_per_time_step(z):
+    n_time_steps = z.shape[0]
+    group_counts = np.zeros(n_time_steps, dtype=np.int)
+    for t in range(n_time_steps):
+        group_counts[t] = np.unique(z[t]).shape[0]
+
+    return group_counts
+
+def posterior_per_time_step(model):
+    n_time_steps = model.Y_fit_.shape[0]
+    probas = np.zeros((n_time_steps, model.n_components + 1))
+    for t in range(n_time_steps):
+        freq = model.posterior_group_counts_[t]
+        index = model.posterior_group_ids_[t]
+        probas[t, index] = freq / freq.sum()
+
+    return probas
+
+
+def benchmark_single(n_iter=10000, burn=5000, tune=1000,
+                     outfile_name='benchmark',
+                     random_state=None):
+    random_state = check_random_state(random_state)
+
+    # generate simulated networks
+    Y, X, z, intercept = synthetic_static_community_dynamic_network(
+        n_time_steps=6, n_nodes=120, random_state=random_state)
+
+    # fit HDP-LPCM
+    model = DynamicNetworkHDPLPCM(n_iter=n_iter,
+                                  burn=burn,
+                                  tune=tune,
+                                  tune_interval=1000,
+                                  is_directed=False,
+                                  selection_type='vi',
+                                  n_components=10,
+                                  random_state=random_state).fit(Y)
+
+    # MAP: number of clusters per time point
+    map_counts = counts_per_time_step(model.z_)
+
+    # Posterior group count probabilities
+    probas = posterior_per_time_step(model)
+
+    # create dataframe of results
+    results = pd.DataFrame(probas)
+    results['map_counts'] = map_counts
+
+    # goodness-of-fit metrics for MAP
+    results['insample_auc'] = model.auc_
+    results['vi'] = variation_of_information(z.ravel(), model.z_.ravel())
+
+    # time average VI
+    vi = 0.
+    for t in range(Y.shape[0]):
+        vi += variation_of_information(z[t], model.z_[t])
+    results['vi_avg'] = vi / Y.shape[0]
+
+    results['rand_index'] = adjusted_rand_score(z.ravel(), model.z_.ravel())
+
+    # time average rand
+    adj_rand = 0.
+    for t in range(Y.shape[0]):
+        adj_rand += adjusted_rand_score(z[t], model.z_[t])
+    results['rand_avg'] = adj_rand / Y.shape[0]
+
+    # info about simulated networks
+    results['modularity'] = modularity(Y, z)
+    results['density'] = density(Y)
+
+    results.to_csv(outfile_name, index=False)
+
+
+# NOTE: This is meant to be run in parallel on a computer cluster!
+n_reps = 50
+out_dir = 'results'
+
+# create a directory to store the results
+if not os.path.exists('results'):
+    os.mkdir(out_dir)
+
+for i in range(n_reps):
+    benchmark_single(n_iter=35000, burn=10000, tune=5000, random_state=i,
+                     outfile_name=os.path.join(
+                        out_dir, 'benchmark_{}.csv'.format(i)))
+
+# calculate median metric values
+n_time_steps = 6
+n_groups = 10
+
+n_files = len(glob.glob('results/*'))
+stat_names = ['insample_auc', 'vi_avg', 'rand_avg']
+data = np.zeros((n_files, len(stat_names)))
+for i, file_name in enumerate(glob.glob('results/*')):
+    df = pd.read_csv(file_name)
+    data[i] = df.loc[0, stat_names].values
+
+data = pd.DataFrame(data, columns=stat_names)
+print('Median Metrics:')
+print(data.median(axis=0))
+print('Metrics SD:')
+print(data.std(axis=0))
+
+# plot posterior boxplots
+data = {'probas': [], 'cluster_number': [], 't': []}
+for file_name in glob.glob('results/*'):
+    df = pd.read_csv(file_name)
+    for t in range(n_time_steps):
+        for i in range(1, n_groups):
+            data['probas'].append(df.iloc[t, i])
+            data['cluster_number'].append(i)
+            data['t'].append(t + 1)
+
+data = pd.DataFrame(data)
+
+plt.rc('font', family='sans-serif', size=16)
+g = sns.catplot(x='cluster_number', y='probas', col='t',
+                col_wrap=3, kind='box', data=data)
+
+for ax in g.axes:
+    ax.set_ylabel('posterior probability')
+    ax.set_xlabel('# of groups')
+
+g.fig.tight_layout()
+
+plt.savefig('cluster_posterior.png', dpi=300)
+
+# clear figure
+plt.clf()
+
+# plot selected number of groups for each simulation
+data = np.zeros((n_time_steps, n_groups), dtype=np.int)
+for sim_id, file_name in enumerate(glob.glob('results/*')):
+    df = pd.read_csv(file_name)
+    for t in range(n_time_steps):
+        data[t, df.iloc[t, n_groups + 1] - 1] +=1
+
+data = pd.DataFrame(data, columns=range(1, n_groups + 1), index=range(1, n_time_steps + 1))
+mask = data.values == 0
+
+g = sns.heatmap(data, annot=True, cmap="Blues", cbar=False, mask=mask)
+g.set_xlabel('# of groups')
+g.set_ylabel('t')
+plt.savefig('num_clusters.png', dpi=300)
diff --git a/examples/military_alliances.py b/examples/military_alliances.py
@@ -1,3 +1,9 @@
+"""
+Runs the analysis of the military alliances network found in the
+paper 'A Bayesian nonparametric latent space approach to modeling evolving
+communities in dynamic networks' by Joshua Loyal and Yuguo Chen
+"""
+
 from dynetlsm import DynamicNetworkHDPLPCM
 from dynetlsm.datasets import load_alliances
 from dynetlsm.plots import (
@@ -7,10 +13,6 @@
 )
 
 
-# Runs the analysis of the military alliances network found in the
-# paper "A Bayesian nonparametric latent space approach to modeling evolving
-# "communities in dynamic networks" by Joshua Loyal and Yuguo Chen
-
 # Load military alliances networks
 Y, names = load_alliances()
 

diff --git a/examples/sampson_monks.py b/examples/sampson_monks.py
@@ -1,3 +1,9 @@
+"""
+Runs the analysis of the Sampson's monastery network found in the
+paper 'A Bayesian nonparametric latent space approach to modeling evolving
+communities in dynamic networks' by Joshua Loyal and Yuguo Chen
+"""
+
 from dynetlsm import DynamicNetworkHDPLPCM
 from dynetlsm.datasets import load_monks
 from dynetlsm.plots import (
@@ -8,10 +14,6 @@
 )
 
 
-# Runs the analysis of the Sampson's monastery network found in the
-# paper "A Bayesian nonparametric latent space approach to modeling evolving
-# "communities in dynamic networks" by Joshua Loyal and Yuguo Chen
-
 # Load Sampson's monastery network
 Y, labels, names = load_monks(dynamic=True, is_directed=False)
 

diff --git a/setup.py b/setup.py
@@ -84,7 +84,7 @@ def get_sources():
         for name in os.listdir(src_path):
             path = os.path.join(src_path, name)
             if os.path.isfile(path) and path.endswith(".c"):
-                files.append(path)
+                files.append(os.path.relpath(path))
 
     return files
 
@@ -119,7 +119,7 @@ def make_extension(ext_name, macros=[]):
         include_dirs = [get_include] + include_dirs
     return Extension(
         mod_name,
-        sources=[ext_path] + get_sources(),
+        sources=[os.path.relpath(ext_path)] + get_sources(),
         include_dirs=include_dirs,
         extra_compile_args=["-O3", "-Wall", "-fPIC"],
         define_macros=macros)