-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathadap_ks_evid_per.py
111 lines (103 loc) · 4.88 KB
/
adap_ks_evid_per.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
#!/usr/bin/python3
# -*- coding: utf-8 -*-
"""
Created on Mon Mar 5 17:32:02 2018
@author: jorge
"""
# Adaptive kernel smoothing (similar to Park et al., 2013)
# added bias control on boundaries by reflection.
# parameter axis should be normalized to (0,1).
# coords -- are the sample coordinates (e.g obtained by MDS).
# dim -- is the dimension of samples (or the number of features).
# K -- is the number of clusters to use in the adaptive variance (e.g.
# obtained by the silhouette score).
# obs -- is a vector of test points where the density will be evaluated.
# density -- is the evaluated density, the same length as obs.
from sklearn.cluster import KMeans
import numpy as np
from scipy.stats import multivariate_normal
from var_per import var_per
def adap_ks_evid_per(coords, dim, K, obs):
n = len(coords)
sincp = np.sin(coords[:,-1])
coscp = np.cos(coords[:,-1])
comcp = np.column_stack([coscp, sincp])
fac = np.average([np.std(sincp),np.std(coscp)])/np.max(np.std(coords[:,:-1], axis=0))
coordsclus = np.column_stack([fac*coords[:,:-1],comcp])
mykmeans = KMeans(n_clusters=K, n_init=20, random_state=4855).fit(coordsclus)
labeled = np.zeros([np.size(coords, 0), dim+1])
labeled[:,:-1] = coords
labeled[:,-1] = mykmeans.labels_
ninK = [np.count_nonzero(labeled[:,-1]==i) for i in range(K)]
cluscen = mykmeans.cluster_centers_
ind = np.argsort(labeled[:,-1])
labeled = labeled[ind,:]
coordsclus = coordsclus[ind,:]
loc = np.zeros([K+1, 1], dtype=int)
sig2 = np.zeros([K, dim])
k = 0
loc[0,0] = 0
for i in range(n):
if int(labeled[i,-1]) != k:
loc[k+1,0] = i
if ninK[k] > 1:
sig2[k,:-1] = np.var(labeled[loc[k,0]:loc[k+1,0],:-2], axis=0, ddof=1)
sig2[k,-1] = var_per(labeled[loc[k,0]:loc[k+1,0],-2])
else:
disclus = np.linalg.norm(coordsclus[loc[k,0],:]*np.ones(cluscen.shape)-cluscen, axis=1)
#print('coordsclus = ', coordsclus[loc[k,0],:])
#print('disclus = ', disclus)
indf = np.argsort(disclus)[1]
#print('indf =', indf)
sig2[k,:-1] = np.var(np.row_stack((labeled[loc[k,0],:-2],cluscen[indf,:-2])), axis=0, ddof=1)
cpcluscen = np.arctan2(cluscen[indf,-2],cluscen[indf,-1])
#print('cluscen =', cluscen)
if cpcluscen >= 0:
cpcluscen = cpcluscen
else:
cpcluscen = 2*np.pi+cpcluscen
sig2[k,-1] = var_per(np.row_stack((labeled[loc[k,0],-2],cpcluscen)))
k = k + 1
if k == K-1:
loc[K,0] = n
if ninK[K-1] > 1:
sig2[K-1,:-1] = np.var(labeled[loc[K-1,0]:loc[K,0],:-2], axis=0, ddof=1)
sig2[K-1,-1] = var_per(labeled[loc[K-1,0]:loc[K,0],-2])
else:
disclus = np.linalg.norm(coordsclus[loc[K-1,0],:]*np.ones(cluscen.shape)-cluscen, axis=1)
indf = np.argsort(disclus)[1]
sig2[K-1,:-1] = np.var(np.row_stack((labeled[loc[K-1,0],:-2],cluscen[indf,:-2])), axis=0, ddof=1)
cpcluscen = np.arctan2(cluscen[indf,-2],cluscen[indf,-1])
if cpcluscen >= 0:
cpcluscen = cpcluscen
else:
cpcluscen = 2*np.pi+cpcluscen
sig2[K-1,-1] = var_per(np.row_stack((labeled[loc[K-1,0],-2],cpcluscen)))
break
#print('sig2 = ', sig2)
# silv = (4/(dim+2))**(1/(dim+4))*n**(-1/(dim+4)) # Silverman's rule of thumb
density = np.zeros(np.size(obs,0))
evid = 0.0
fmom = 0.0
# compute evidence
k = 0
for i in range(n):
if i == loc[k+1,0]:
k = k + 1
bw = (4/(dim+2))**(1/(dim+4))*ninK[k]**(-1/(dim+4))*np.sqrt(sig2[k,:]) # Silverman's rule of thumb
evid = evid + multivariate_normal.pdf(obs[0,:-1], labeled[i,:-2], bw[:-1]**2)
fmom = fmom + (np.exp(np.complex(0,labeled[i,-2])))*multivariate_normal.pdf(obs[0,:-1], labeled[i,:-2], bw[:-1]**2)
# compute density in observations
for j in range(len(obs)):
k = 0
for i in range(n):
if i == loc[k+1,0]:
k = k + 1
bw = (4/(dim+2))**(1/(dim+4))*ninK[k]**(-1/(dim+4))*np.sqrt(sig2[k,:])
inside = multivariate_normal.pdf(obs[j], labeled[i,:-1], bw**2)
lbound = multivariate_normal.pdf(obs[j], np.concatenate((labeled[i,:-2],labeled[i,-2:-1]+2*np.pi)), bw**2)
rbound = multivariate_normal.pdf(obs[j], np.concatenate((labeled[i,:-2],labeled[i,-2:-1]-2*np.pi)), bw**2)
density[j] = density[j] + inside + lbound + rbound
meanang = np.arctan2(np.imag(fmom/evid),np.real(fmom/evid))
meanang = (meanang + 2 * np.pi) % (2 * np.pi)
return density/n, evid/n, mykmeans.labels_, meanang