Skip to content

Commit fa18d21

Browse files
committed
quick refac
1 parent cf8810b commit fa18d21

File tree

4 files changed

+38
-53
lines changed

4 files changed

+38
-53
lines changed

pecok/admm.py

Lines changed: 15 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -4,46 +4,46 @@
44
# License: MIT
55

66
import numpy as np
7-
from scipy import linalg as la
7+
from scipy import linalg
88

99

10-
def operator_lstarllstarinv_sym(u, v):
10+
def _operator_lstarllstarinv_sym(u, v):
1111
"""Operator \widetildetilde{L}^*_{sym} on (u,v) in R^{p+1} -> R^{p*p}"""
1212
temp = u.repeat(u.size).reshape((u.size, u.size))
1313
return (temp + temp.T)/2 + np.diag(np.repeat(v, u.size))
1414

1515

16-
def proj_lin_Hsymmetric(Y, n_struct):
16+
def _proj_lin_Hsymmetric(Y, n_struct):
1717
"""Projection onto \Pi_{\mathcal{A}sym}(Y)"""
1818
n_samples,_ = Y.shape
1919
x = np.sum(Y, 1) - 1
2020
y = np.trace(Y) - n_struct
2121
invx = (x-(np.sum(x)+y)/(2*n_samples))/(n_samples-1)
2222
invy = (y-(np.sum(x)+y)/(2*n_samples))/(n_samples-1)
23-
Y = Y - operator_lstarllstarinv_sym(invx, invy)
23+
Y = Y - _operator_lstarllstarinv_sym(invx, invy)
2424
return Y
2525

2626

27-
def proj_positive(x, thresh=0):
27+
def _proj_positive(x, thresh=0):
2828
"""Project onto component-positive matrix"""
2929
x[x < thresh] = 0
3030
return x
3131

3232

33-
def proj_Snp_imp(Y):
33+
def _proj_Snp_imp(Y):
3434
"""Improved projection onto semi-definite positive matrix"""
3535
n_samples,_ = Y.shape
36-
eig_vals = la.eigh(Y, eigvals_only=True)
36+
eig_vals = linalg.eigh(Y, eigvals_only=True)
3737
n_val_neg = np.sum(eig_vals<0)
3838
if n_val_neg == 0:
3939
return Y
4040
if n_val_neg == n_samples:
4141
return np.zeros((n_samples,n_samples))
4242
if n_val_neg < n_samples-n_val_neg:
43-
eig_vals, v = la.eigh(-Y, eigvals=(n_samples - n_val_neg, n_samples - 1))
43+
eig_vals, v = linalg.eigh(-Y, eigvals=(n_samples - n_val_neg, n_samples - 1))
4444
Y = Y + v.dot(np.diag(eig_vals)).dot(v.T)
4545
else:
46-
eig_vals, v = la.eigh(Y, eigvals=(n_val_neg, n_samples - 1))
46+
eig_vals, v = linalg.eigh(Y, eigvals=(n_val_neg, n_samples - 1))
4747
Y = v.dot(np.diag(eig_vals)).dot(v.T)
4848
return Y
4949

@@ -71,9 +71,9 @@ def pecok_admm(relational_data, n_clusters, n_iter_max=-1, rho=5, mat_init=None,
7171
n_iter = n_iter + 1
7272

7373
oldXbar = Xbar
74-
X = proj_lin_Hsymmetric(Xbar - U + relational_data / rho, n_clusters)
75-
Y = proj_positive(Xbar - V)
76-
Z = proj_Snp_imp(Xbar - W)
74+
X = _proj_lin_Hsymmetric(Xbar - U + relational_data / rho, n_clusters)
75+
Y = _proj_positive(Xbar - V)
76+
Z = _proj_Snp_imp(Xbar - W)
7777
Xbar = (X + Y + Z)/3
7878

7979
U = U + X - Xbar
@@ -82,17 +82,17 @@ def pecok_admm(relational_data, n_clusters, n_iter_max=-1, rho=5, mat_init=None,
8282

8383
res_dual = rho * np.linalg.norm(Xbar-oldXbar)
8484
res_primal = np.linalg.norm((X-Xbar, Z-Xbar, Y-Xbar))
85-
if not (is_primal_high(eps_residual, res_primal, X, Y, Z) or is_dual_high(eps_residual, res_dual, Y, Z)):
85+
if not (_is_primal_high(eps_residual, res_primal, X, Y, Z) or _is_dual_high(eps_residual, res_dual, Y, Z)):
8686
break
8787
if verbose:
8888
print("ADMM ends -- n_iter=%i, rho=%2.2f" % (n_iter, rho))
8989
print(" -- res_primal=%.3e, res_dual=%.3e" % (res_primal, res_dual))
9090
return Z
9191

9292

93-
def is_primal_high(eps_residual, res_primal, X, Y, Z):
93+
def _is_primal_high(eps_residual, res_primal, X, Y, Z):
9494
return res_primal > eps_residual * np.max((np.linalg.norm(X), np.linalg.norm(Y), np.linalg.norm(Z)))
9595

9696

97-
def is_dual_high(eps_residual, res_dual, Y, Z):
97+
def _is_dual_high(eps_residual, res_dual, Y, Z):
9898
return res_dual > eps_residual * (np.sqrt(Y.shape[0]) + np.linalg.norm(Y) + np.linalg.norm(Z))

pecok/clustering.py

Lines changed: 7 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
"""PECOK clustering"""
22

3-
# author: Martin Royer <martin.royer@m4x.org>
3+
# author: Martin Royer <martin.royer@math.u-psud.fr>
44
# License: MIT
55

66
import numpy as np
@@ -21,14 +21,14 @@ def _corrected_relational(obs, corr):
2121
return (obs.dot(obs.T) - gamma_hat(obs, corr=corr)) / obs.shape[1]
2222

2323

24-
def kmeanz(X, n_clusters, corr):
24+
def _kmeanz(X, n_clusters, corr):
2525
gram_corrected = _corrected_relational(X, corr=corr)
2626
U, s, _ = lin_svd(gram_corrected, compute_uv=True)
2727
approx = U.dot(np.diag(np.sqrt(s)))
2828
return approx, AgglomerativeClustering(linkage='ward', n_clusters=n_clusters).fit(approx)
2929

3030

31-
def pecok_clustering(obs, n_clusters, corr=4, **kwargs):
31+
def _pecok_clustering(obs, n_clusters, corr=4, **kwargs):
3232
gram_corrected = _corrected_relational(obs, corr=corr)
3333
U, _, V = spa_svd(gram_corrected, k=n_clusters)
3434
Bhat = pecok_admm(gram_corrected, n_clusters=n_clusters, mat_init=U.dot(V), **kwargs)
@@ -37,7 +37,7 @@ def pecok_clustering(obs, n_clusters, corr=4, **kwargs):
3737

3838
class Pecok(BaseEstimator, ClusterMixin, TransformerMixin):
3939
"""PeCoK clustering
40-
Read more in [my thesis]
40+
Read more in [my thesis: http://www.theses.fr/2018SACLS442]
4141
Parameters
4242
----------
4343
n_clusters : int, optional, default: 8
@@ -116,7 +116,7 @@ def fit(self, X, y=None, sample_weight=None):
116116
# random_state = check_random_state(self.random_state)
117117

118118
hc_ = \
119-
pecok_clustering(
119+
_pecok_clustering(
120120
X, n_clusters=self.n_clusters, corr=self.corr, verbose=self.verbose)
121121
# init=self.init, n_init=self.n_init,
122122
# max_iter=self.max_iter, verbose=self.verbose,
@@ -152,7 +152,7 @@ def fit(self, X, y=None, sample_weight=None):
152152

153153
class KMeanz(BaseEstimator, ClusterMixin, TransformerMixin):
154154
"""K-MeanZ clustering
155-
Read more in [my thesis]
155+
Read more in [my thesis: http://www.theses.fr/2018SACLS442]
156156
Parameters
157157
----------
158158
n_clusters : int, optional, default: 8
@@ -231,7 +231,7 @@ def fit(self, X, y=None, sample_weight=None):
231231
# random_state = check_random_state(self.random_state)
232232

233233
self.corrected_points_, hc_ = \
234-
kmeanz(
234+
_kmeanz(
235235
X, n_clusters=self.n_clusters, corr=self.corr)
236236
# init=self.init, n_init=self.n_init,
237237
# max_iter=self.max_iter, verbose=self.verbose,

pecok/gamma.py

Lines changed: 13 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,7 @@
88

99

1010

11-
def gamma_hat2(X):
11+
def _gamma_hat2(X):
1212
n_samples,_ = X.shape
1313
X2 = X / (np.linalg.norm(X, axis=1, keepdims=True)+1e-8)
1414
XaX2 = X.dot(X2.T)
@@ -21,7 +21,7 @@ def gamma_hat2(X):
2121
return gamma
2222

2323

24-
def gamma_hat2_robust(X):
24+
def _gamma_hat2_robust(X):
2525
n_samples,_ = X.shape
2626
X2 = X / (np.linalg.norm(X, axis=1, keepdims=True)+1e-8)
2727
XaX2 = X.dot(X2.T)
@@ -34,7 +34,7 @@ def gamma_hat2_robust(X):
3434
return gamma
3535

3636

37-
def gamma_hat3(X):
37+
def _gamma_hat3(X):
3838
"""Gamma_hat3 estimator from PECOK supplement, in O(n_samples^3 * n_features)
3939
4040
Parameters
@@ -53,7 +53,7 @@ def gamma_hat3(X):
5353
return gamma
5454

5555

56-
def gamma_hat4(X):
56+
def _gamma_hat4(X):
5757
"""Gamma_hat4 estimator from PECOK, in O(n_samples^4 * n_features)
5858
5959
Parameters
@@ -81,21 +81,21 @@ def gamma_hat4(X):
8181
return np.asarray(gamma)
8282

8383

84-
def no_correction(X):
84+
def _no_correction(X):
8585
return np.zeros(X.shape[0])
8686

8787

88-
def cross_diag(X):
88+
def _cross_diag(X):
8989
return np.diag(X.dot(X.T))
9090

9191

9292
def gamma_hat(X, corr):
9393
ghat = {
94-
0: no_correction,
95-
1: gamma_hat2_robust,
96-
2: gamma_hat2,
97-
3: gamma_hat3,
98-
4: gamma_hat4,
99-
8: cross_diag,
100-
}.get(corr, no_correction)(X)
94+
0: _no_correction,
95+
1: _gamma_hat2_robust,
96+
2: _gamma_hat2,
97+
3: _gamma_hat3,
98+
4: _gamma_hat4,
99+
8: _cross_diag,
100+
}.get(corr, _no_correction)(X)
101101
return np.diag(ghat)

tests/test.py

Lines changed: 3 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -6,13 +6,12 @@
66
from functools import wraps
77
import timeit
88

9-
# import pyper
109
import numpy as np
1110
from pecok import KMeanz, Pecok
1211
from sklearn.cluster import KMeans
1312

1413

15-
def timethis(f):
14+
def _timethis(f):
1615
@wraps(f)
1716
def wrap(*args, **kw):
1817
ts = timeit.default_timer()
@@ -22,20 +21,6 @@ def wrap(*args, **kw):
2221
return wrap
2322

2423

25-
def hdclassif(obs, n_struct):
26-
n,_ = obs.shape
27-
myR = pyper.R()
28-
myR.run('library(HDclassif)')
29-
myR.assign('obs', obs)
30-
myR.assign('n_struct', n_struct)
31-
try:
32-
myR.run('res <- hddc(obs, K=n_struct, model=c(1,2,7,9))')
33-
result = np.array(myR['res$class'])
34-
except:
35-
print("fail hdclassif")
36-
result = np.zeros(n)
37-
return result
38-
3924
seed = 432
4025
np.random.seed(seed)
4126
print("seed is %i" % seed)
@@ -62,7 +47,7 @@ def hdclassif(obs, n_struct):
6247

6348
print("truth:".ljust(15), truth)
6449
for method, method_name in methods:
65-
job_result, job_time = timethis(method)(mat_data.T, 2)
50+
job_result, job_time = _timethis(method)(mat_data.T, 2)
6651
print(method_name.ljust(15), job_result)
6752
print("job_time: %.2f (s)".ljust(15) % job_time)
6853

@@ -79,7 +64,7 @@ def hdclassif(obs, n_struct):
7964

8065
print("truth:".ljust(15), truth)
8166
for method, method_name in methods:
82-
job_result, job_time = timethis(method)(X, 2)
67+
job_result, job_time = _timethis(method)(X, 2)
8368
print(method_name.ljust(15), job_result)
8469
print("job_time: %.2f (s)".ljust(15) % job_time)
8570
# print("pecok:".ljust(15), pecok_clustering(X, n_struct=2, rho=100, n_iter_max=3000, verbose=True).labels_)

0 commit comments

Comments
 (0)