From edf9a137a8173618ab43bf7b164516c47b1ae88d Mon Sep 17 00:00:00 2001 From: Artem Golubin Date: Mon, 16 Jun 2025 01:11:28 +0400 Subject: [PATCH 1/3] Update github actions --- .github/workflows/python-app.yml | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/.github/workflows/python-app.yml b/.github/workflows/python-app.yml index b3ff0e27..a5a8d93e 100644 --- a/.github/workflows/python-app.yml +++ b/.github/workflows/python-app.yml @@ -11,11 +11,11 @@ jobs: timeout-minutes: 5 runs-on: ubuntu-latest steps: - - uses: actions/checkout@v2 - - name: Set up Python 3.8 - uses: actions/setup-python@v2 + - uses: actions/checkout@v4 + - name: Set up Python 3.12 + uses: actions/setup-python@v4 with: - python-version: 3.8 + python-version: 3.12 - name: Install dependencies run: | python -m pip install --upgrade pip From 4e93532b6f6907de9199b2b649ae6557688d5eb6 Mon Sep 17 00:00:00 2001 From: Artem Golubin Date: Mon, 16 Jun 2025 01:15:27 +0400 Subject: [PATCH 2/3] Format using ruff --- mla/datasets/base.py | 6 ++- mla/ensemble/base.py | 11 +++- mla/ensemble/gbm.py | 14 +++++- mla/ensemble/random_forest.py | 29 +++++++++-- mla/ensemble/tree.py | 61 +++++++++++++++++++---- mla/fm.py | 18 +++++-- mla/gaussian_mixture.py | 16 ++++-- mla/kmeans.py | 18 +++++-- mla/knn.py | 5 +- mla/linear_models.py | 4 +- mla/metrics/metrics.py | 4 +- mla/naive_bayes.py | 2 +- mla/neuralnet/constraints.py | 4 +- mla/neuralnet/layers/convnet.py | 51 ++++++++++++++----- mla/neuralnet/layers/normalization.py | 8 +-- mla/neuralnet/layers/recurrent/lstm.py | 41 ++++++++++++--- mla/neuralnet/layers/recurrent/rnn.py | 15 +++++- mla/neuralnet/loss.py | 1 + mla/neuralnet/nnet.py | 10 +++- mla/neuralnet/optimizers.py | 47 ++++++++++++----- mla/neuralnet/parameters.py | 9 +++- mla/neuralnet/regularizers.py | 4 +- mla/neuralnet/tests/test_activations.py | 4 +- mla/neuralnet/tests/test_optimizers.py | 11 +++- mla/pca.py | 8 +-- mla/rbm.py | 27 +++++++--- mla/rl/dqn.py | 20 ++++++-- mla/svm/svm.py | 12 +++-- mla/tests/test_classification_accuracy.py | 16 ++++-- mla/tests/test_reduction.py | 11 +++- mla/tests/test_regression_accuracy.py | 12 ++++- mla/tsne.py | 8 ++- 32 files changed, 397 insertions(+), 110 deletions(-) diff --git a/mla/datasets/base.py b/mla/datasets/base.py index 9aa30c77..efefbcd9 100644 --- a/mla/datasets/base.py +++ b/mla/datasets/base.py @@ -39,7 +39,9 @@ def load(dataset="training", digits=np.arange(10)): images = zeros((N, rows, cols), dtype=uint8) labels = zeros((N, 1), dtype=int8) for i in range(len(ind)): - images[i] = array(img[ind[i] * rows * cols: (ind[i] + 1) * rows * cols]).reshape((rows, cols)) + images[i] = array( + img[ind[i] * rows * cols : (ind[i] + 1) * rows * cols] + ).reshape((rows, cols)) labels[i] = lbl[ind[i]] return images, labels @@ -64,7 +66,7 @@ def load_nietzsche(): sentences = [] next_chars = [] for i in range(0, len(text) - maxlen, step): - sentences.append(text[i: i + maxlen]) + sentences.append(text[i : i + maxlen]) next_chars.append(text[i + maxlen]) X = np.zeros((len(sentences), maxlen, len(chars)), dtype=np.bool) diff --git a/mla/ensemble/base.py b/mla/ensemble/base.py index 2ba41b2e..c1a97fb4 100644 --- a/mla/ensemble/base.py +++ b/mla/ensemble/base.py @@ -14,13 +14,20 @@ def f_entropy(p): def information_gain(y, splits): - splits_entropy = sum([f_entropy(split) * (float(split.shape[0]) / y.shape[0]) for split in splits]) + splits_entropy = sum( + [f_entropy(split) * (float(split.shape[0]) / y.shape[0]) for split in splits] + ) return f_entropy(y) - splits_entropy def mse_criterion(y, splits): y_mean = np.mean(y) - return -sum([np.sum((split - y_mean) ** 2) * (float(split.shape[0]) / y.shape[0]) for split in splits]) + return -sum( + [ + np.sum((split - y_mean) ** 2) * (float(split.shape[0]) / y.shape[0]) + for split in splits + ] + ) def xgb_criterion(y, left, right, loss): diff --git a/mla/ensemble/gbm.py b/mla/ensemble/gbm.py index 7a956616..58fbff3b 100644 --- a/mla/ensemble/gbm.py +++ b/mla/ensemble/gbm.py @@ -1,5 +1,6 @@ # coding:utf-8 import numpy as np + # logistic function from scipy.special import expit @@ -32,7 +33,9 @@ def hess(self, actual, predicted): def approximate(self, actual, predicted): """Approximate leaf value.""" - return self.grad(actual, predicted).sum() / (self.hess(actual, predicted).sum() + self.regularization) + return self.grad(actual, predicted).sum() / ( + self.hess(actual, predicted).sum() + self.regularization + ) def transform(self, pred): """Transform predictions values.""" @@ -73,7 +76,14 @@ def transform(self, output): class GradientBoosting(BaseEstimator): """Gradient boosting trees with Taylor's expansion approximation (as in xgboost).""" - def __init__(self, n_estimators, learning_rate=0.1, max_features=10, max_depth=2, min_samples_split=10): + def __init__( + self, + n_estimators, + learning_rate=0.1, + max_features=10, + max_depth=2, + min_samples_split=10, + ): self.min_samples_split = min_samples_split self.learning_rate = learning_rate self.max_depth = max_depth diff --git a/mla/ensemble/random_forest.py b/mla/ensemble/random_forest.py index f4fc5491..57eddf31 100644 --- a/mla/ensemble/random_forest.py +++ b/mla/ensemble/random_forest.py @@ -7,7 +7,14 @@ class RandomForest(BaseEstimator): - def __init__(self, n_estimators=10, max_features=None, min_samples_split=10, max_depth=None, criterion=None): + def __init__( + self, + n_estimators=10, + max_features=None, + min_samples_split=10, + max_depth=None, + criterion=None, + ): """Base class for RandomForest. Parameters @@ -44,7 +51,7 @@ def _train(self): self.y, max_features=self.max_features, min_samples_split=self.min_samples_split, - max_depth=self.max_depth + max_depth=self.max_depth, ) def _predict(self, X=None): @@ -52,7 +59,14 @@ def _predict(self, X=None): class RandomForestClassifier(RandomForest): - def __init__(self, n_estimators=10, max_features=None, min_samples_split=10, max_depth=None, criterion="entropy"): + def __init__( + self, + n_estimators=10, + max_features=None, + min_samples_split=10, + max_depth=None, + criterion="entropy", + ): super(RandomForestClassifier, self).__init__( n_estimators=n_estimators, max_features=max_features, @@ -85,7 +99,14 @@ def _predict(self, X=None): class RandomForestRegressor(RandomForest): - def __init__(self, n_estimators=10, max_features=None, min_samples_split=10, max_depth=None, criterion="mse"): + def __init__( + self, + n_estimators=10, + max_features=None, + min_samples_split=10, + max_depth=None, + criterion="mse", + ): super(RandomForestRegressor, self).__init__( n_estimators=n_estimators, max_features=max_features, diff --git a/mla/ensemble/tree.py b/mla/ensemble/tree.py index 0b4e9769..3e6ae6f7 100644 --- a/mla/ensemble/tree.py +++ b/mla/ensemble/tree.py @@ -58,14 +58,24 @@ def _find_best_split(self, X, target, n_features): gain = self.criterion(target["y"], splits) else: # Gradient boosting - left, right = split_dataset(X, target, column, value, return_X=False) + left, right = split_dataset( + X, target, column, value, return_X=False + ) gain = xgb_criterion(target, left, right, self.loss) if (max_gain is None) or (gain > max_gain): max_col, max_val, max_gain = column, value, gain return max_col, max_val, max_gain - def _train(self, X, target, max_features=None, min_samples_split=10, max_depth=None, minimum_gain=0.01): + def _train( + self, + X, + target, + max_features=None, + min_samples_split=10, + max_depth=None, + minimum_gain=0.01, + ): try: # Exit from recursion using assert syntax assert X.shape[0] > min_samples_split @@ -86,22 +96,43 @@ def _train(self, X, target, max_features=None, min_samples_split=10, max_depth=N self.impurity = gain # Split dataset - left_X, right_X, left_target, right_target = split_dataset(X, target, column, value) + left_X, right_X, left_target, right_target = split_dataset( + X, target, column, value + ) # Grow left and right child self.left_child = Tree(self.regression, self.criterion, self.n_classes) self.left_child._train( - left_X, left_target, max_features, min_samples_split, max_depth - 1, minimum_gain + left_X, + left_target, + max_features, + min_samples_split, + max_depth - 1, + minimum_gain, ) self.right_child = Tree(self.regression, self.criterion, self.n_classes) self.right_child._train( - right_X, right_target, max_features, min_samples_split, max_depth - 1, minimum_gain + right_X, + right_target, + max_features, + min_samples_split, + max_depth - 1, + minimum_gain, ) except AssertionError: self._calculate_leaf_value(target) - def train(self, X, target, max_features=None, min_samples_split=10, max_depth=None, minimum_gain=0.01, loss=None): + def train( + self, + X, + target, + max_features=None, + min_samples_split=10, + max_depth=None, + minimum_gain=0.01, + loss=None, + ): """Build a decision tree from training set. Parameters @@ -131,11 +162,16 @@ def train(self, X, target, max_features=None, min_samples_split=10, max_depth=No self.loss = loss if not self.regression: - self.n_classes = len(np.unique(target['y'])) - - self._train(X, target, max_features=max_features, min_samples_split=min_samples_split, - max_depth=max_depth, minimum_gain=minimum_gain) + self.n_classes = len(np.unique(target["y"])) + self._train( + X, + target, + max_features=max_features, + min_samples_split=min_samples_split, + max_depth=max_depth, + minimum_gain=minimum_gain, + ) def _calculate_leaf_value(self, targets): """Find optimal value for leaf.""" @@ -149,7 +185,10 @@ def _calculate_leaf_value(self, targets): self.outcome = np.mean(targets["y"]) else: # Probability for classification task - self.outcome = np.bincount(targets["y"], minlength=self.n_classes) / targets["y"].shape[0] + self.outcome = ( + np.bincount(targets["y"], minlength=self.n_classes) + / targets["y"].shape[0] + ) def predict_row(self, row): """Predict single row.""" diff --git a/mla/fm.py b/mla/fm.py index 85964a99..6e1c1423 100644 --- a/mla/fm.py +++ b/mla/fm.py @@ -6,6 +6,7 @@ from mla.base import BaseEstimator from mla.metrics import mean_squared_error, binary_crossentropy + np.random.seed(9999) """ @@ -16,7 +17,14 @@ class BaseFM(BaseEstimator): def __init__( - self, n_components=10, max_iter=100, init_stdev=0.1, learning_rate=0.01, reg_v=0.1, reg_w=0.5, reg_w0=0.0 + self, + n_components=10, + max_iter=100, + init_stdev=0.1, + learning_rate=0.01, + reg_v=0.1, + reg_w=0.5, + reg_w0=0.0, ): """Simplified factorization machines implementation using SGD optimizer.""" self.reg_w0 = reg_w0 @@ -36,7 +44,9 @@ def fit(self, X, y=None): # Feature weights self.w = np.zeros(self.n_features) # Factor weights - self.v = np.random.normal(scale=self.init_stdev, size=(self.n_features, self.n_components)) + self.v = np.random.normal( + scale=self.init_stdev, size=(self.n_features, self.n_components) + ) self._train() def _train(self): @@ -56,7 +66,9 @@ def _factor_step(self, loss): def _predict(self, X=None): linear_output = np.dot(X, self.w) - factors_output = np.sum(np.dot(X, self.v) ** 2 - np.dot(X ** 2, self.v ** 2), axis=1) / 2.0 + factors_output = ( + np.sum(np.dot(X, self.v) ** 2 - np.dot(X**2, self.v**2), axis=1) / 2.0 + ) return self.wo + linear_output + factors_output diff --git a/mla/gaussian_mixture.py b/mla/gaussian_mixture.py index d2f1b9b2..8ab82fb0 100644 --- a/mla/gaussian_mixture.py +++ b/mla/gaussian_mixture.py @@ -68,7 +68,9 @@ def _initialize(self): """ self.weights = np.ones(self.K) if self.init == "random": - self.means = [self.X[x] for x in random.sample(range(self.n_samples), self.K)] + self.means = [ + self.X[x] for x in random.sample(range(self.n_samples), self.K) + ] self.covs = [np.cov(self.X.T) for _ in range(self.K)] elif self.init == "kmeans": @@ -106,7 +108,9 @@ def _M_step(self): def _is_converged(self): """Check if the difference of the latest two likelihood is less than the tolerance.""" - if (len(self.likelihood) > 1) and (self.likelihood[-1] - self.likelihood[-2] <= self.tolerance): + if (len(self.likelihood) > 1) and ( + self.likelihood[-1] - self.likelihood[-2] <= self.tolerance + ): return True return False @@ -123,7 +127,9 @@ def _get_likelihood(self, data): n_data = data.shape[0] likelihoods = np.zeros([n_data, self.K]) for c in range(self.K): - likelihoods[:, c] = multivariate_normal.pdf(data, self.means[c], self.covs[c]) + likelihoods[:, c] = multivariate_normal.pdf( + data, self.means[c], self.covs[c] + ) return likelihoods def _get_weighted_likelihood(self, likelihood): @@ -151,7 +157,9 @@ def plot(self, data=None, ax=None, holdon=False): margin = 0.2 xmax, ymax = self.X.max(axis=0) + margin xmin, ymin = self.X.min(axis=0) - margin - axis_X, axis_Y = np.meshgrid(np.arange(xmin, xmax, delta), np.arange(ymin, ymax, delta)) + axis_X, axis_Y = np.meshgrid( + np.arange(xmin, xmax, delta), np.arange(ymin, ymax, delta) + ) def grid_gaussian_pdf(mean, cov): grid_array = np.array(list(zip(axis_X.flatten(), axis_Y.flatten()))) diff --git a/mla/kmeans.py b/mla/kmeans.py index 261de8e1..fb3bc513 100644 --- a/mla/kmeans.py +++ b/mla/kmeans.py @@ -53,7 +53,9 @@ def _initialize_centroids(self, init): """Set the initial centroids.""" if init == "random": - self.centroids = [self.X[x] for x in random.sample(range(self.n_samples), self.K)] + self.centroids = [ + self.X[x] for x in random.sample(range(self.n_samples), self.K) + ] elif init == "++": self.centroids = [random.choice(self.X)] while len(self.centroids) < self.K: @@ -88,7 +90,6 @@ def _get_predictions(self): return predictions def _assign(self, centroids): - for row in range(self.n_samples): for i, cluster in enumerate(self.clusters): if row in cluster: @@ -115,11 +116,13 @@ def _get_centroid(self, cluster): def _dist_from_centers(self): """Calculate distance from centers.""" - return np.array([min([euclidean_distance(x, c) for c in self.centroids]) for x in self.X]) + return np.array( + [min([euclidean_distance(x, c) for c in self.centroids]) for x in self.X] + ) def _choose_next_center(self): distances = self._dist_from_centers() - squared_distances = distances ** 2 + squared_distances = distances**2 probs = squared_distances / squared_distances.sum() ind = np.random.choice(self.X.shape[0], 1, p=probs)[0] return self.X[ind] @@ -141,7 +144,12 @@ def plot(self, ax=None, holdon=False): for i, index in enumerate(self.clusters): point = np.array(data[index]).T - ax.scatter(*point, c=[palette[i], ]) + ax.scatter( + *point, + c=[ + palette[i], + ], + ) for point in self.centroids: ax.scatter(*point, marker="x", linewidths=10) diff --git a/mla/knn.py b/mla/knn.py index 30bdd339..f24c1d79 100644 --- a/mla/knn.py +++ b/mla/knn.py @@ -40,7 +40,10 @@ def _predict_x(self, x): distances = (self.distance_func(x, example) for example in self.X) # Sort all examples by their distance to x and keep their target value. - neighbors = sorted(((dist, target) for (dist, target) in zip(distances, self.y)), key=lambda x: x[0]) + neighbors = sorted( + ((dist, target) for (dist, target) in zip(distances, self.y)), + key=lambda x: x[0], + ) # Get targets of the k-nn and aggregate them (most common one or # average). diff --git a/mla/linear_models.py b/mla/linear_models.py index d7d4e9c9..a7d351dc 100644 --- a/mla/linear_models.py +++ b/mla/linear_models.py @@ -12,7 +12,9 @@ class BasicRegression(BaseEstimator): - def __init__(self, lr=0.001, penalty="None", C=0.01, tolerance=0.0001, max_iters=1000): + def __init__( + self, lr=0.001, penalty="None", C=0.01, tolerance=0.0001, max_iters=1000 + ): """Basic class for implementing continuous regression estimators which are trained with gradient descent optimization on their particular loss function. diff --git a/mla/metrics/metrics.py b/mla/metrics/metrics.py index 9fb20ded..3cffcee3 100644 --- a/mla/metrics/metrics.py +++ b/mla/metrics/metrics.py @@ -71,7 +71,9 @@ def hinge(actual, predicted): def binary_crossentropy(actual, predicted): predicted = np.clip(predicted, EPS, 1 - EPS) - return np.mean(-np.sum(actual * np.log(predicted) + (1 - actual) * np.log(1 - predicted))) + return np.mean( + -np.sum(actual * np.log(predicted) + (1 - actual) * np.log(1 - predicted)) + ) # aliases diff --git a/mla/naive_bayes.py b/mla/naive_bayes.py index 4b7f4cd2..16ba89e9 100644 --- a/mla/naive_bayes.py +++ b/mla/naive_bayes.py @@ -56,6 +56,6 @@ def _pdf(self, n_class, x): mean = self._mean[n_class] var = self._var[n_class] - numerator = np.exp(-(x - mean) ** 2 / (2 * var)) + numerator = np.exp(-((x - mean) ** 2) / (2 * var)) denominator = np.sqrt(2 * np.pi * var) return numerator / denominator diff --git a/mla/neuralnet/constraints.py b/mla/neuralnet/constraints.py index ccc1e4a2..d33e410e 100644 --- a/mla/neuralnet/constraints.py +++ b/mla/neuralnet/constraints.py @@ -15,7 +15,7 @@ def __init__(self, m=2, axis=0): self.m = m def clip(self, p): - norms = np.sqrt(np.sum(p ** 2, axis=self.axis)) + norms = np.sqrt(np.sum(p**2, axis=self.axis)) desired = np.clip(norms, 0, self.m) p = p * (desired / (EPSILON + norms)) return p @@ -37,4 +37,4 @@ def __init__(self, axis=0): self.axis = axis def clip(self, p): - return p / (EPSILON + np.sqrt(np.sum(p ** 2, axis=self.axis))) + return p / (EPSILON + np.sqrt(np.sum(p**2, axis=self.axis))) diff --git a/mla/neuralnet/layers/convnet.py b/mla/neuralnet/layers/convnet.py index 485706c1..40ecef1f 100644 --- a/mla/neuralnet/layers/convnet.py +++ b/mla/neuralnet/layers/convnet.py @@ -6,7 +6,14 @@ class Convolution(Layer, ParamMixin): - def __init__(self, n_filters=8, filter_shape=(3, 3), padding=(0, 0), stride=(1, 1), parameters=None): + def __init__( + self, + n_filters=8, + filter_shape=(3, 3), + padding=(0, 0), + stride=(1, 1), + parameters=None, + ): """A 2D convolutional layer. Input shape: (n_images, n_channels, height, width) @@ -57,10 +64,14 @@ def backward_pass(self, delta): self._params.update_grad("W", d_W) d_c = np.dot(delta, self.col_W.T) - return column_to_image(d_c, self.last_input.shape, self.filter_shape, self.stride, self.padding) + return column_to_image( + d_c, self.last_input.shape, self.filter_shape, self.stride, self.padding + ) def shape(self, x_shape): - height, width = convoltuion_shape(self.height, self.width, self.filter_shape, self.stride, self.padding) + height, width = convoltuion_shape( + self.height, self.width, self.filter_shape, self.stride, self.padding + ) return x_shape[0], self.n_filters, height, width @@ -91,7 +102,9 @@ def forward_pass(self, X): arg_max = np.argmax(col, axis=1) out = np.max(col, axis=1) self.arg_max = arg_max - return out.reshape(n_images, out_height, out_width, n_channels).transpose(0, 3, 1, 2) + return out.reshape(n_images, out_height, out_width, n_channels).transpose( + 0, 3, 1, 2 + ) def backward_pass(self, delta): delta = delta.transpose(0, 2, 3, 1) @@ -102,10 +115,14 @@ def backward_pass(self, delta): y_max = y_max.reshape(delta.shape + (pool_size,)) dcol = y_max.reshape(y_max.shape[0] * y_max.shape[1] * y_max.shape[2], -1) - return column_to_image(dcol, self.last_input.shape, self.pool_shape, self.stride, self.padding) + return column_to_image( + dcol, self.last_input.shape, self.pool_shape, self.stride, self.padding + ) def shape(self, x_shape): - h, w = convoltuion_shape(x_shape[2], x_shape[3], self.pool_shape, self.stride, self.padding) + h, w = convoltuion_shape( + x_shape[2], x_shape[3], self.pool_shape, self.stride, self.padding + ) return x_shape[0], x_shape[1], h, w @@ -137,7 +154,9 @@ def image_to_column(images, filter_shape, stride, padding): """ n_images, n_channels, height, width = images.shape f_height, f_width = filter_shape - out_height, out_width = convoltuion_shape(height, width, (f_height, f_width), stride, padding) + out_height, out_width = convoltuion_shape( + height, width, (f_height, f_width), stride, padding + ) images = np.pad(images, ((0, 0), (0, 0), padding, padding), mode="constant") col = np.zeros((n_images, n_channels, f_height, f_width, out_height, out_width)) @@ -145,7 +164,9 @@ def image_to_column(images, filter_shape, stride, padding): y_bound = y + stride[0] * out_height for x in range(f_width): x_bound = x + stride[1] * out_width - col[:, :, y, x, :, :] = images[:, :, y: y_bound: stride[0], x: x_bound: stride[1]] + col[:, :, y, x, :, :] = images[ + :, :, y : y_bound : stride[0], x : x_bound : stride[1] + ] col = col.transpose(0, 4, 5, 1, 2, 3).reshape(n_images * out_height * out_width, -1) return col @@ -165,10 +186,12 @@ def column_to_image(columns, images_shape, filter_shape, stride, padding): n_images, n_channels, height, width = images_shape f_height, f_width = filter_shape - out_height, out_width = convoltuion_shape(height, width, (f_height, f_width), stride, padding) - columns = columns.reshape(n_images, out_height, out_width, n_channels, f_height, f_width).transpose( - 0, 3, 4, 5, 1, 2 + out_height, out_width = convoltuion_shape( + height, width, (f_height, f_width), stride, padding ) + columns = columns.reshape( + n_images, out_height, out_width, n_channels, f_height, f_width + ).transpose(0, 3, 4, 5, 1, 2) img_h = height + 2 * padding[0] + stride[0] - 1 img_w = width + 2 * padding[1] + stride[1] - 1 @@ -177,9 +200,11 @@ def column_to_image(columns, images_shape, filter_shape, stride, padding): y_bound = y + stride[0] * out_height for x in range(f_width): x_bound = x + stride[1] * out_width - img[:, :, y: y_bound: stride[0], x: x_bound: stride[1]] += columns[:, :, y, x, :, :] + img[:, :, y : y_bound : stride[0], x : x_bound : stride[1]] += columns[ + :, :, y, x, :, : + ] - return img[:, :, padding[0]: height + padding[0], padding[1]: width + padding[1]] + return img[:, :, padding[0] : height + padding[0], padding[1] : width + padding[1]] def convoltuion_shape(img_height, img_width, filter_shape, stride, padding): diff --git a/mla/neuralnet/layers/normalization.py b/mla/neuralnet/layers/normalization.py index 4f601a81..4ed77054 100644 --- a/mla/neuralnet/layers/normalization.py +++ b/mla/neuralnet/layers/normalization.py @@ -47,7 +47,7 @@ def _forward_pass(self, X): xmu = X - mu # step3: following the lower branch - calculation denominator - sq = xmu ** 2 + sq = xmu**2 # step4: calculate variance var = 1.0 / N * np.sum(sq, axis=0) @@ -90,7 +90,9 @@ def forward_pass(self, X): out_flat = self._forward_pass(x_flat) return out_flat.reshape(N, H, W, C).transpose(0, 3, 1, 2) else: - raise NotImplementedError("Unknown model with dimensions = {}".format(len(X.shape))) + raise NotImplementedError( + "Unknown model with dimensions = {}".format(len(X.shape)) + ) def _backward_pass(self, delta): # unfold the variables stored in cache @@ -112,7 +114,7 @@ def _backward_pass(self, delta): dxmu1 = dxhat * ivar # step6 - dsqrtvar = -1.0 / (sqrtvar ** 2) * divar + dsqrtvar = -1.0 / (sqrtvar**2) * divar # step5 dvar = 0.5 * 1.0 / np.sqrt(var + self.eps) * dsqrtvar diff --git a/mla/neuralnet/layers/recurrent/lstm.py b/mla/neuralnet/layers/recurrent/lstm.py index e0b4ce0f..9997f612 100644 --- a/mla/neuralnet/layers/recurrent/lstm.py +++ b/mla/neuralnet/layers/recurrent/lstm.py @@ -15,7 +15,14 @@ class LSTM(Layer, ParamMixin): - def __init__(self, hidden_dim, activation="tanh", inner_init="orthogonal", parameters=None, return_sequences=True): + def __init__( + self, + hidden_dim, + activation="tanh", + inner_init="orthogonal", + parameters=None, + return_sequences=True, + ): self.return_sequences = return_sequences self.hidden_dim = hidden_dim self.inner_init = get_initializer(inner_init) @@ -84,13 +91,18 @@ def forward_pass(self, X): self.states = np.zeros((n_samples, n_timesteps + 1, self.hidden_dim)) self.outputs = np.zeros((n_samples, n_timesteps + 1, self.hidden_dim)) - self.gates = {k: np.zeros((n_samples, n_timesteps, self.hidden_dim)) for k in ["i", "f", "o", "c"]} + self.gates = { + k: np.zeros((n_samples, n_timesteps, self.hidden_dim)) + for k in ["i", "f", "o", "c"] + } self.states[:, -1, :] = self.hprev self.outputs[:, -1, :] = self.oprev for i in range(n_timesteps): - t_gates = np.dot(X[:, i, :], self.W) + np.dot(self.outputs[:, i - 1, :], self.U) + t_gates = np.dot(X[:, i, :], self.W) + np.dot( + self.outputs[:, i - 1, :], self.U + ) # Input self.gates["i"][:, i, :] = sigmoid(t_gates[:, 0, :] + p["b_i"]) @@ -106,7 +118,9 @@ def forward_pass(self, X): self.states[:, i - 1, :] * self.gates["f"][:, i, :] + self.gates["i"][:, i, :] * self.gates["c"][:, i, :] ) - self.outputs[:, i, :] = self.gates["o"][:, i, :] * self.activation(self.states[:, i, :]) + self.outputs[:, i, :] = self.gates["o"][:, i, :] * self.activation( + self.states[:, i, :] + ) self.hprev = self.states[:, n_timesteps - 1, :].copy() self.oprev = self.outputs[:, n_timesteps - 1, :].copy() @@ -130,7 +144,12 @@ def backward_pass(self, delta): # Backpropagation through time for i in reversed(range(n_timesteps)): - dhi = delta[:, i, :] * self.gates["o"][:, i, :] * self.activation_d(self.states[:, i, :]) + dh_next + dhi = ( + delta[:, i, :] + * self.gates["o"][:, i, :] + * self.activation_d(self.states[:, i, :]) + + dh_next + ) og = delta[:, i, :] * self.activation(self.states[:, i, :]) de_o = og * self.sigmoid_d(self.gates["o"][:, i, :]) @@ -139,17 +158,23 @@ def backward_pass(self, delta): grad["U_o"] += np.dot(self.outputs[:, i - 1, :].T, de_o) grad["b_o"] += de_o.sum(axis=0) - de_f = (dhi * self.states[:, i - 1, :]) * self.sigmoid_d(self.gates["f"][:, i, :]) + de_f = (dhi * self.states[:, i - 1, :]) * self.sigmoid_d( + self.gates["f"][:, i, :] + ) grad["W_f"] += np.dot(self.last_input[:, i, :].T, de_f) grad["U_f"] += np.dot(self.outputs[:, i - 1, :].T, de_f) grad["b_f"] += de_f.sum(axis=0) - de_i = (dhi * self.gates["c"][:, i, :]) * self.sigmoid_d(self.gates["i"][:, i, :]) + de_i = (dhi * self.gates["c"][:, i, :]) * self.sigmoid_d( + self.gates["i"][:, i, :] + ) grad["W_i"] += np.dot(self.last_input[:, i, :].T, de_i) grad["U_i"] += np.dot(self.outputs[:, i - 1, :].T, de_i) grad["b_i"] += de_i.sum(axis=0) - de_c = (dhi * self.gates["i"][:, i, :]) * self.activation_d(self.gates["c"][:, i, :]) + de_c = (dhi * self.gates["i"][:, i, :]) * self.activation_d( + self.gates["c"][:, i, :] + ) grad["W_c"] += np.dot(self.last_input[:, i, :].T, de_c) grad["U_c"] += np.dot(self.outputs[:, i - 1, :].T, de_c) grad["b_c"] += de_c.sum(axis=0) diff --git a/mla/neuralnet/layers/recurrent/rnn.py b/mla/neuralnet/layers/recurrent/rnn.py index 3110a261..232daf10 100644 --- a/mla/neuralnet/layers/recurrent/rnn.py +++ b/mla/neuralnet/layers/recurrent/rnn.py @@ -10,7 +10,14 @@ class RNN(Layer, ParamMixin): """Vanilla RNN.""" - def __init__(self, hidden_dim, activation="tanh", inner_init="orthogonal", parameters=None, return_sequences=True): + def __init__( + self, + hidden_dim, + activation="tanh", + inner_init="orthogonal", + parameters=None, + return_sequences=True, + ): self.return_sequences = return_sequences self.hidden_dim = hidden_dim self.inner_init = get_initializer(inner_init) @@ -53,7 +60,11 @@ def forward_pass(self, X): p = self._params for i in range(n_timesteps): - states[:, i, :] = np.tanh(np.dot(X[:, i, :], p["W"]) + np.dot(states[:, i - 1, :], p["U"]) + p["b"]) + states[:, i, :] = np.tanh( + np.dot(X[:, i, :], p["W"]) + + np.dot(states[:, i - 1, :], p["U"]) + + p["b"] + ) self.states = states self.hprev = states[:, n_timesteps - 1, :].copy() diff --git a/mla/neuralnet/loss.py b/mla/neuralnet/loss.py index 8be4dbe3..30def7e9 100644 --- a/mla/neuralnet/loss.py +++ b/mla/neuralnet/loss.py @@ -1,4 +1,5 @@ from ..metrics import mse, logloss, mae, hinge, binary_crossentropy + categorical_crossentropy = logloss diff --git a/mla/neuralnet/nnet.py b/mla/neuralnet/nnet.py index 2809fb0b..c39b96be 100644 --- a/mla/neuralnet/nnet.py +++ b/mla/neuralnet/nnet.py @@ -23,7 +23,15 @@ class NeuralNet(BaseEstimator): fit_required = False def __init__( - self, layers, optimizer, loss, max_epochs=10, batch_size=64, metric="mse", shuffle=False, verbose=True + self, + layers, + optimizer, + loss, + max_epochs=10, + batch_size=64, + metric="mse", + shuffle=False, + verbose=True, ): self.verbose = verbose self.shuffle = shuffle diff --git a/mla/neuralnet/optimizers.py b/mla/neuralnet/optimizers.py index fc9ae1bb..1e2a68a5 100644 --- a/mla/neuralnet/optimizers.py +++ b/mla/neuralnet/optimizers.py @@ -45,7 +45,9 @@ def train_epoch(self, network): batch = zip(X_batch, y_batch) if network.verbose: - batch = tqdm(batch, total=int(np.ceil(network.n_samples / network.batch_size))) + batch = tqdm( + batch, total=int(np.ceil(network.n_samples / network.batch_size)) + ) for X, y in batch: loss = np.mean(network.update(X, y)) @@ -106,7 +108,7 @@ def update(self, network): for i, layer in enumerate(network.parametric_layers): for n in layer.parameters.keys(): grad = layer.parameters.grad[n] - self.accu[i][n] += grad ** 2 + self.accu[i][n] += grad**2 step = self.lr * grad / (np.sqrt(self.accu[i][n]) + self.eps) layer.parameters.step(n, -step) @@ -128,12 +130,20 @@ def update(self, network): for i, layer in enumerate(network.parametric_layers): for n in layer.parameters.keys(): grad = layer.parameters.grad[n] - self.accu[i][n] = self.rho * self.accu[i][n] + (1.0 - self.rho) * grad ** 2 - step = grad * np.sqrt(self.d_accu[i][n] + self.eps) / np.sqrt(self.accu[i][n] + self.eps) + self.accu[i][n] = ( + self.rho * self.accu[i][n] + (1.0 - self.rho) * grad**2 + ) + step = ( + grad + * np.sqrt(self.d_accu[i][n] + self.eps) + / np.sqrt(self.accu[i][n] + self.eps) + ) layer.parameters.step(n, -step * self.lr) # Update delta accumulator - self.d_accu[i][n] = self.rho * self.d_accu[i][n] + (1.0 - self.rho) * step ** 2 + self.d_accu[i][n] = ( + self.rho * self.d_accu[i][n] + (1.0 - self.rho) * step**2 + ) def setup(self, network): # Accumulators @@ -155,7 +165,9 @@ def update(self, network): for i, layer in enumerate(network.parametric_layers): for n in layer.parameters.keys(): grad = layer.parameters.grad[n] - self.accu[i][n] = (self.rho * self.accu[i][n]) + (1.0 - self.rho) * (grad ** 2) + self.accu[i][n] = (self.rho * self.accu[i][n]) + (1.0 - self.rho) * ( + grad**2 + ) step = self.lr * grad / (np.sqrt(self.accu[i][n]) + self.eps) layer.parameters.step(n, -step) @@ -169,7 +181,6 @@ def setup(self, network): class Adam(Optimizer): def __init__(self, learning_rate=0.001, beta_1=0.9, beta_2=0.999, epsilon=1e-8): - self.epsilon = epsilon self.beta_2 = beta_2 self.beta_1 = beta_1 @@ -181,9 +192,17 @@ def update(self, network): for i, layer in enumerate(network.parametric_layers): for n in layer.parameters.keys(): grad = layer.parameters.grad[n] - self.ms[i][n] = (self.beta_1 * self.ms[i][n]) + (1.0 - self.beta_1) * grad - self.vs[i][n] = (self.beta_2 * self.vs[i][n]) + (1.0 - self.beta_2) * grad ** 2 - lr = self.lr * np.sqrt(1.0 - self.beta_2 ** self.t) / (1.0 - self.beta_1 ** self.t) + self.ms[i][n] = (self.beta_1 * self.ms[i][n]) + ( + 1.0 - self.beta_1 + ) * grad + self.vs[i][n] = (self.beta_2 * self.vs[i][n]) + ( + 1.0 - self.beta_2 + ) * grad**2 + lr = ( + self.lr + * np.sqrt(1.0 - self.beta_2**self.t) + / (1.0 - self.beta_1**self.t) + ) step = lr * self.ms[i][n] / (np.sqrt(self.vs[i][n]) + self.epsilon) layer.parameters.step(n, -step) @@ -201,7 +220,6 @@ def setup(self, network): class Adamax(Optimizer): def __init__(self, learning_rate=0.002, beta_1=0.9, beta_2=0.999, epsilon=1e-8): - self.epsilon = epsilon self.beta_2 = beta_2 self.beta_1 = beta_1 @@ -215,7 +233,12 @@ def update(self, network): self.ms[i][n] = self.beta_1 * self.ms[i][n] + (1.0 - self.beta_1) * grad self.us[i][n] = np.maximum(self.beta_2 * self.us[i][n], np.abs(grad)) - step = self.lr / (1 - self.beta_1 ** self.t) * self.ms[i][n] / (self.us[i][n] + self.epsilon) + step = ( + self.lr + / (1 - self.beta_1**self.t) + * self.ms[i][n] + / (self.us[i][n] + self.epsilon) + ) layer.parameters.step(n, -step) self.t += 1 diff --git a/mla/neuralnet/parameters.py b/mla/neuralnet/parameters.py index 65873c26..e81f18db 100644 --- a/mla/neuralnet/parameters.py +++ b/mla/neuralnet/parameters.py @@ -5,7 +5,14 @@ class Parameters(object): - def __init__(self, init="glorot_uniform", scale=0.5, bias=1.0, regularizers=None, constraints=None): + def __init__( + self, + init="glorot_uniform", + scale=0.5, + bias=1.0, + regularizers=None, + constraints=None, + ): """A container for layer's parameters. Parameters diff --git a/mla/neuralnet/regularizers.py b/mla/neuralnet/regularizers.py index 53bc3b37..723cccea 100644 --- a/mla/neuralnet/regularizers.py +++ b/mla/neuralnet/regularizers.py @@ -25,11 +25,11 @@ def _penalty(self, weights): class L2(Regularizer): def _penalty(self, weights): - return self.C * weights ** 2 + return self.C * weights**2 class ElasticNet(Regularizer): """Linear combination of L1 and L2 penalties.""" def _penalty(self, weights): - return 0.5 * self.C * weights ** 2 + (1.0 - self.C) * np.abs(weights) + return 0.5 * self.C * weights**2 + (1.0 - self.C) * np.abs(weights) diff --git a/mla/neuralnet/tests/test_activations.py b/mla/neuralnet/tests/test_activations.py index fc5de9ad..7bb095a6 100644 --- a/mla/neuralnet/tests/test_activations.py +++ b/mla/neuralnet/tests/test_activations.py @@ -14,6 +14,8 @@ def test_softplus(): # naive implementation of np.log(1 + np.exp(z_max)) will overflow # naive implementation of z + np.log(1 + 1 / np.exp(z_min)) will # throw ZeroDivisionError - outputs = np.array([np.log(2.0), np.log1p(np.exp(1.0)), np.log1p(np.exp(-1.0)), 0.0, z_max]) + outputs = np.array( + [np.log(2.0), np.log1p(np.exp(1.0)), np.log1p(np.exp(-1.0)), 0.0, z_max] + ) assert np.allclose(outputs, softplus(inputs)) diff --git a/mla/neuralnet/tests/test_optimizers.py b/mla/neuralnet/tests/test_optimizers.py index a42b5036..0c9c7d84 100644 --- a/mla/neuralnet/tests/test_optimizers.py +++ b/mla/neuralnet/tests/test_optimizers.py @@ -10,13 +10,20 @@ def clasifier(optimizer): X, y = make_classification( - n_samples=1000, n_features=100, n_informative=75, random_state=1111, n_classes=2, class_sep=2.5 + n_samples=1000, + n_features=100, + n_informative=75, + random_state=1111, + n_classes=2, + class_sep=2.5, ) y = one_hot(y) X -= np.mean(X, axis=0) X /= np.std(X, axis=0) - X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.15, random_state=1111) + X_train, X_test, y_train, y_test = train_test_split( + X, y, test_size=0.15, random_state=1111 + ) model = NeuralNet( layers=[ diff --git a/mla/pca.py b/mla/pca.py index 64d6a614..9155f919 100644 --- a/mla/pca.py +++ b/mla/pca.py @@ -47,10 +47,12 @@ def _decompose(self, X): s, Vh = np.linalg.eig(np.cov(X.T)) Vh = Vh.T - s_squared = s ** 2 + s_squared = s**2 variance_ratio = s_squared / s_squared.sum() - logging.info("Explained variance ratio: %s" % (variance_ratio[0: self.n_components])) - self.components = Vh[0: self.n_components] + logging.info( + "Explained variance ratio: %s" % (variance_ratio[0 : self.n_components]) + ) + self.components = Vh[0 : self.n_components] def transform(self, X): X = X.copy() diff --git a/mla/rbm.py b/mla/rbm.py index f74234ef..90143b7e 100644 --- a/mla/rbm.py +++ b/mla/rbm.py @@ -43,7 +43,6 @@ def fit(self, X, y=None): self._train() def _init_weights(self): - self.W = np.random.randn(self.n_visible, self.n_hidden) * 0.1 # Bias for visible and hidden units @@ -63,15 +62,29 @@ def _train(self): hidden_states = self._sample(positive_hidden) # sample hidden state h1 positive_associations = np.dot(batch.T, positive_hidden) - negative_visible = sigmoid(np.dot(hidden_states, self.W.T) + self.bias_v) - negative_visible = self._sample(negative_visible) # use the sampled hidden state h1 to sample v1 - negative_hidden = sigmoid(np.dot(negative_visible, self.W) + self.bias_h) + negative_visible = sigmoid( + np.dot(hidden_states, self.W.T) + self.bias_v + ) + negative_visible = self._sample( + negative_visible + ) # use the sampled hidden state h1 to sample v1 + negative_hidden = sigmoid( + np.dot(negative_visible, self.W) + self.bias_h + ) negative_associations = np.dot(negative_visible.T, negative_hidden) lr = self.lr / float(batch.shape[0]) - self.W += lr * ((positive_associations - negative_associations) / float(self.batch_size)) - self.bias_h += lr * (negative_hidden.sum(axis=0) - negative_associations.sum(axis=0)) - self.bias_v += lr * (np.asarray(batch.sum(axis=0)).squeeze() - negative_visible.sum(axis=0)) + self.W += lr * ( + (positive_associations - negative_associations) + / float(self.batch_size) + ) + self.bias_h += lr * ( + negative_hidden.sum(axis=0) - negative_associations.sum(axis=0) + ) + self.bias_v += lr * ( + np.asarray(batch.sum(axis=0)).squeeze() + - negative_visible.sum(axis=0) + ) error += np.sum((batch - negative_visible) ** 2) diff --git a/mla/rl/dqn.py b/mla/rl/dqn.py index ec8c6c06..42b58097 100644 --- a/mla/rl/dqn.py +++ b/mla/rl/dqn.py @@ -19,7 +19,14 @@ class DQN(object): def __init__( - self, n_episodes=500, gamma=0.99, batch_size=32, epsilon=1.0, decay=0.005, min_epsilon=0.1, memory_limit=500 + self, + n_episodes=500, + gamma=0.99, + batch_size=32, + epsilon=1.0, + decay=0.005, + min_epsilon=0.1, + memory_limit=500, ): """Deep Q learning implementation. @@ -48,7 +55,9 @@ def __init__( def init_environment(self, name="CartPole-v0", monitor=False): self.env = gym.make(name) if monitor: - self.env = wrappers.Monitor(self.env, name, force=True, video_callable=False) + self.env = wrappers.Monitor( + self.env, name, force=True, video_callable=False + ) self.n_states = self.env.observation_space.shape[0] self.n_actions = self.env.action_space.n @@ -122,11 +131,14 @@ def train(self, render=False): while len(self.replay) > self.memory_limit: self.replay.pop(0) - self.epsilon = self.min_epsilon + (1.0 - self.min_epsilon) * np.exp(-self.decay * ep) + self.epsilon = self.min_epsilon + (1.0 - self.min_epsilon) * np.exp( + -self.decay * ep + ) max_reward = max(max_reward, total_reward) logger.info( - "Episode: %s, reward %s, epsilon %s, max reward %s" % (ep, total_reward, self.epsilon, max_reward) + "Episode: %s, reward %s, epsilon %s, max reward %s" + % (ep, total_reward, self.epsilon, max_reward) ) logging.info("Training finished.") diff --git a/mla/svm/svm.py b/mla/svm/svm.py index b9695e13..a4b38aea 100644 --- a/mla/svm/svm.py +++ b/mla/svm/svm.py @@ -71,15 +71,21 @@ def _train(self): self.alpha[j] -= (self.y[j] * (e_i - e_j)) / eta self.alpha[j] = self.clip(self.alpha[j], H, L) - self.alpha[i] = self.alpha[i] + self.y[i] * self.y[j] * (alpha_jo - self.alpha[j]) + self.alpha[i] = self.alpha[i] + self.y[i] * self.y[j] * ( + alpha_jo - self.alpha[j] + ) # Find intercept b1 = ( - self.b - e_i - self.y[i] * (self.alpha[i] - alpha_io) * self.K[i, i] + self.b + - e_i + - self.y[i] * (self.alpha[i] - alpha_io) * self.K[i, i] - self.y[j] * (self.alpha[j] - alpha_jo) * self.K[i, j] ) b2 = ( - self.b - e_j - self.y[j] * (self.alpha[j] - alpha_jo) * self.K[j, j] + self.b + - e_j + - self.y[j] * (self.alpha[j] - alpha_jo) * self.K[j, j] - self.y[i] * (self.alpha[i] - alpha_io) * self.K[i, j] ) if 0 < self.alpha[i] < self.C: diff --git a/mla/tests/test_classification_accuracy.py b/mla/tests/test_classification_accuracy.py index f4fb42aa..8698daa1 100644 --- a/mla/tests/test_classification_accuracy.py +++ b/mla/tests/test_classification_accuracy.py @@ -24,9 +24,17 @@ # Generate a random regression problem X, y = make_classification( - n_samples=750, n_features=10, n_informative=8, random_state=1111, n_classes=2, class_sep=2.5, n_redundant=0 + n_samples=750, + n_features=10, + n_informative=8, + random_state=1111, + n_classes=2, + class_sep=2.5, + n_redundant=0, +) +X_train, X_test, y_train, y_test = train_test_split( + X, y, test_size=0.12, random_state=1111 ) -X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.12, random_state=1111) # All classifiers except convnet, RNN, LSTM. @@ -83,7 +91,9 @@ def test_mlp(): def test_gbm(): - model = GradientBoostingClassifier(n_estimators=25, max_depth=3, max_features=5, learning_rate=0.1) + model = GradientBoostingClassifier( + n_estimators=25, max_depth=3, max_features=5, learning_rate=0.1 + ) model.fit(X_train, y_train) predictions = model.predict(X_test) assert roc_auc_score(y_test, predictions) >= 0.95 diff --git a/mla/tests/test_reduction.py b/mla/tests/test_reduction.py index da87fc82..b9346147 100644 --- a/mla/tests/test_reduction.py +++ b/mla/tests/test_reduction.py @@ -16,7 +16,12 @@ def dataset(): # Generate a random binary classification problem. return make_classification( - n_samples=1000, n_features=100, n_informative=75, random_state=1111, n_classes=2, class_sep=2.5 + n_samples=1000, + n_features=100, + n_informative=75, + random_state=1111, + n_classes=2, + class_sep=2.5, ) @@ -24,7 +29,9 @@ def dataset(): @pytest.mark.skip() def test_PCA(dataset): X, y = dataset - X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.25, random_state=1111) + X_train, X_test, y_train, y_test = train_test_split( + X, y, test_size=0.25, random_state=1111 + ) p = PCA(50, solver="eigen") # fit PCA with training set, not the entire dataset diff --git a/mla/tests/test_regression_accuracy.py b/mla/tests/test_regression_accuracy.py index 5c13b7f7..33cf2f57 100644 --- a/mla/tests/test_regression_accuracy.py +++ b/mla/tests/test_regression_accuracy.py @@ -14,9 +14,17 @@ # Generate a random regression problem X, y = make_regression( - n_samples=1000, n_features=10, n_informative=10, n_targets=1, noise=0.05, random_state=1111, bias=0.5 + n_samples=1000, + n_features=10, + n_informative=10, + n_targets=1, + noise=0.05, + random_state=1111, + bias=0.5, +) +X_train, X_test, y_train, y_test = train_test_split( + X, y, test_size=0.25, random_state=1111 ) -X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.25, random_state=1111) def test_linear(): diff --git a/mla/tsne.py b/mla/tsne.py index c76dc89f..c2995d6d 100644 --- a/mla/tsne.py +++ b/mla/tsne.py @@ -19,7 +19,9 @@ class TSNE(BaseEstimator): y_required = False - def __init__(self, n_components=2, perplexity=30.0, max_iter=200, learning_rate=500): + def __init__( + self, n_components=2, perplexity=30.0, max_iter=200, learning_rate=500 + ): """A t-Distributed Stochastic Neighbor Embedding implementation. Parameters @@ -67,7 +69,9 @@ def fit_transform(self, X, y=None): grad = 4 * np.dot((pmul * P[i] - Q_n[i]) * Q[i], Y[i] - Y) grads[i] = grad - gains = (gains + 0.2) * ((grads > 0) != (velocity > 0)) + (gains * 0.8) * ((grads > 0) == (velocity > 0)) + gains = (gains + 0.2) * ((grads > 0) != (velocity > 0)) + (gains * 0.8) * ( + (grads > 0) == (velocity > 0) + ) gains = gains.clip(min=self.min_gain) velocity = momentum * velocity - self.lr * (gains * grads) From 8fb8bb4282f37ad797052930dc12d0faace7bdb6 Mon Sep 17 00:00:00 2001 From: Artem Golubin Date: Mon, 16 Jun 2025 01:18:12 +0400 Subject: [PATCH 3/3] Format using ruff --- examples/gbm.py | 33 +++++++++++++++++++++++++++------ examples/kmeans.py | 4 +++- examples/linear_models.py | 23 +++++++++++++++++++---- examples/naive_bayes.py | 12 ++++++++++-- examples/nearest_neighbors.py | 16 +++++++++++++--- examples/nnet_convnet_mnist.py | 9 ++++++++- examples/nnet_mlp.py | 24 ++++++++++++++++++++---- examples/nnet_rnn_binary_add.py | 4 +++- examples/pca.py | 11 +++++++++-- examples/random_forest.py | 31 +++++++++++++++++++++++++------ examples/rbm.py | 2 +- examples/svm.py | 15 ++++++++++++--- examples/t-sne.py | 8 +++++++- 13 files changed, 157 insertions(+), 35 deletions(-) diff --git a/examples/gbm.py b/examples/gbm.py index 99f14d55..f3f85fdf 100644 --- a/examples/gbm.py +++ b/examples/gbm.py @@ -18,11 +18,21 @@ def classification(): # Generate a random binary classification problem. X, y = make_classification( - n_samples=350, n_features=15, n_informative=10, random_state=1111, n_classes=2, class_sep=1.0, n_redundant=0 + n_samples=350, + n_features=15, + n_informative=10, + random_state=1111, + n_classes=2, + class_sep=1.0, + n_redundant=0, + ) + X_train, X_test, y_train, y_test = train_test_split( + X, y, test_size=0.15, random_state=1111 ) - X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.15, random_state=1111) - model = GradientBoostingClassifier(n_estimators=50, max_depth=4, max_features=8, learning_rate=0.1) + model = GradientBoostingClassifier( + n_estimators=50, max_depth=4, max_features=8, learning_rate=0.1 + ) model.fit(X_train, y_train) predictions = model.predict(X_test) print(predictions) @@ -34,14 +44,25 @@ def classification(): def regression(): # Generate a random regression problem X, y = make_regression( - n_samples=500, n_features=5, n_informative=5, n_targets=1, noise=0.05, random_state=1111, bias=0.5 + n_samples=500, + n_features=5, + n_informative=5, + n_targets=1, + noise=0.05, + random_state=1111, + bias=0.5, + ) + X_train, X_test, y_train, y_test = train_test_split( + X, y, test_size=0.1, random_state=1111 ) - X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.1, random_state=1111) model = GradientBoostingRegressor(n_estimators=25, max_depth=5, max_features=3) model.fit(X_train, y_train) predictions = model.predict(X_test) - print("regression, mse: %s" % mean_squared_error(y_test.flatten(), predictions.flatten())) + print( + "regression, mse: %s" + % mean_squared_error(y_test.flatten(), predictions.flatten()) + ) if __name__ == "__main__": diff --git a/examples/kmeans.py b/examples/kmeans.py index 9e08a689..9714c43e 100644 --- a/examples/kmeans.py +++ b/examples/kmeans.py @@ -5,7 +5,9 @@ def kmeans_example(plot=False): - X, y = make_blobs(centers=4, n_samples=500, n_features=2, shuffle=True, random_state=42) + X, y = make_blobs( + centers=4, n_samples=500, n_features=2, shuffle=True, random_state=42 + ) clusters = len(np.unique(y)) k = KMeans(K=clusters, max_iters=150, init="++") k.fit(X) diff --git a/examples/linear_models.py b/examples/linear_models.py index e553661d..9bdb1cd0 100644 --- a/examples/linear_models.py +++ b/examples/linear_models.py @@ -17,9 +17,17 @@ def regression(): # Generate a random regression problem X, y = make_regression( - n_samples=10000, n_features=100, n_informative=75, n_targets=1, noise=0.05, random_state=1111, bias=0.5 + n_samples=10000, + n_features=100, + n_informative=75, + n_targets=1, + noise=0.05, + random_state=1111, + bias=0.5, + ) + X_train, X_test, y_train, y_test = train_test_split( + X, y, test_size=0.25, random_state=1111 ) - X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.25, random_state=1111) model = LinearRegression(lr=0.01, max_iters=2000, penalty="l2", C=0.03) model.fit(X_train, y_train) @@ -30,9 +38,16 @@ def regression(): def classification(): # Generate a random binary classification problem. X, y = make_classification( - n_samples=1000, n_features=100, n_informative=75, random_state=1111, n_classes=2, class_sep=2.5 + n_samples=1000, + n_features=100, + n_informative=75, + random_state=1111, + n_classes=2, + class_sep=2.5, + ) + X_train, X_test, y_train, y_test = train_test_split( + X, y, test_size=0.1, random_state=1111 ) - X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.1, random_state=1111) model = LogisticRegression(lr=0.01, max_iters=500, penalty="l1", C=0.01) model.fit(X_train, y_train) diff --git a/examples/naive_bayes.py b/examples/naive_bayes.py index 383e997d..9e051d48 100644 --- a/examples/naive_bayes.py +++ b/examples/naive_bayes.py @@ -8,9 +8,17 @@ def classification(): # Generate a random binary classification problem. X, y = make_classification( - n_samples=1000, n_features=10, n_informative=10, random_state=1111, n_classes=2, class_sep=2.5, n_redundant=0 + n_samples=1000, + n_features=10, + n_informative=10, + random_state=1111, + n_classes=2, + class_sep=2.5, + n_redundant=0, + ) + X_train, X_test, y_train, y_test = train_test_split( + X, y, test_size=0.1, random_state=1111 ) - X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.1, random_state=1111) model = NaiveBayesClassifier() model.fit(X_train, y_train) diff --git a/examples/nearest_neighbors.py b/examples/nearest_neighbors.py index d68bf208..f551ab05 100644 --- a/examples/nearest_neighbors.py +++ b/examples/nearest_neighbors.py @@ -13,9 +13,17 @@ def regression(): # Generate a random regression problem X, y = make_regression( - n_samples=500, n_features=5, n_informative=5, n_targets=1, noise=0.05, random_state=1111, bias=0.5 + n_samples=500, + n_features=5, + n_informative=5, + n_targets=1, + noise=0.05, + random_state=1111, + bias=0.5, + ) + X_train, X_test, y_train, y_test = train_test_split( + X, y, test_size=0.25, random_state=1111 ) - X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.25, random_state=1111) model = knn.KNNRegressor(k=5, distance_func=distance.euclidean) model.fit(X_train, y_train) @@ -35,7 +43,9 @@ def classification(): class_sep=1.5, ) - X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.1, random_state=1111) + X_train, X_test, y_train, y_test = train_test_split( + X, y, test_size=0.1, random_state=1111 + ) clf = knn.KNNClassifier(k=5, distance_func=distance.euclidean) diff --git a/examples/nnet_convnet_mnist.py b/examples/nnet_convnet_mnist.py index aff0b361..4161a060 100644 --- a/examples/nnet_convnet_mnist.py +++ b/examples/nnet_convnet_mnist.py @@ -3,7 +3,14 @@ from mla.datasets import load_mnist from mla.metrics import accuracy from mla.neuralnet import NeuralNet -from mla.neuralnet.layers import Activation, Convolution, MaxPooling, Flatten, Dropout, Parameters +from mla.neuralnet.layers import ( + Activation, + Convolution, + MaxPooling, + Flatten, + Dropout, + Parameters, +) from mla.neuralnet.layers import Dense from mla.neuralnet.optimizers import Adadelta from mla.utils import one_hot diff --git a/examples/nnet_mlp.py b/examples/nnet_mlp.py index f35a4120..484989b3 100644 --- a/examples/nnet_mlp.py +++ b/examples/nnet_mlp.py @@ -23,10 +23,17 @@ def classification(): # Generate a random binary classification problem. X, y = make_classification( - n_samples=1000, n_features=100, n_informative=75, random_state=1111, n_classes=2, class_sep=2.5 + n_samples=1000, + n_features=100, + n_informative=75, + random_state=1111, + n_classes=2, + class_sep=2.5, ) y = one_hot(y) - X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.15, random_state=1111) + X_train, X_test, y_train, y_test = train_test_split( + X, y, test_size=0.15, random_state=1111 + ) model = NeuralNet( layers=[ @@ -51,9 +58,18 @@ def classification(): def regression(): # Generate a random regression problem - X, y = make_regression(n_samples=5000, n_features=25, n_informative=25, n_targets=1, random_state=100, noise=0.05) + X, y = make_regression( + n_samples=5000, + n_features=25, + n_informative=25, + n_targets=1, + random_state=100, + noise=0.05, + ) y *= 0.01 - X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.1, random_state=1111) + X_train, X_test, y_train, y_test = train_test_split( + X, y, test_size=0.1, random_state=1111 + ) model = NeuralNet( layers=[ diff --git a/examples/nnet_rnn_binary_add.py b/examples/nnet_rnn_binary_add.py index d019201b..5057cc5b 100644 --- a/examples/nnet_rnn_binary_add.py +++ b/examples/nnet_rnn_binary_add.py @@ -38,7 +38,9 @@ def addition_dataset(dim=10, n_samples=10000, batch_size=64): # Generate target variable (a+b) y[i, :, 0] = list(reversed([int(x) for x in binary_format.format(a + b)])) - X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=1111) + X_train, X_test, y_train, y_test = train_test_split( + X, y, test_size=0.2, random_state=1111 + ) # Round number of examples for batch processing train_b = (X_train.shape[0] // batch_size) * batch_size diff --git a/examples/pca.py b/examples/pca.py index 4b7bf3ac..10321ada 100644 --- a/examples/pca.py +++ b/examples/pca.py @@ -12,11 +12,18 @@ # Generate a random binary classification problem. X, y = make_classification( - n_samples=1000, n_features=100, n_informative=75, random_state=1111, n_classes=2, class_sep=2.5 + n_samples=1000, + n_features=100, + n_informative=75, + random_state=1111, + n_classes=2, + class_sep=2.5, ) -X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.25, random_state=1111) +X_train, X_test, y_train, y_test = train_test_split( + X, y, test_size=0.25, random_state=1111 +) for s in ["svd", "eigen"]: p = PCA(15, solver=s) diff --git a/examples/random_forest.py b/examples/random_forest.py index ad0c2261..f2fcb44e 100644 --- a/examples/random_forest.py +++ b/examples/random_forest.py @@ -19,17 +19,25 @@ def classification(): # Generate a random binary classification problem. X, y = make_classification( - n_samples=500, n_features=10, n_informative=10, random_state=1111, n_classes=2, class_sep=2.5, n_redundant=0 + n_samples=500, + n_features=10, + n_informative=10, + random_state=1111, + n_classes=2, + class_sep=2.5, + n_redundant=0, ) - X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.15, random_state=1111) + X_train, X_test, y_train, y_test = train_test_split( + X, y, test_size=0.15, random_state=1111 + ) model = RandomForestClassifier(n_estimators=10, max_depth=4) model.fit(X_train, y_train) predictions_prob = model.predict(X_test)[:, 1] predictions = np.argmax(model.predict(X_test), axis=1) - #print(predictions.shape) + # print(predictions.shape) print("classification, roc auc score: %s" % roc_auc_score(y_test, predictions_prob)) print("classification, accuracy score: %s" % accuracy_score(y_test, predictions)) @@ -37,14 +45,25 @@ def classification(): def regression(): # Generate a random regression problem X, y = make_regression( - n_samples=500, n_features=5, n_informative=5, n_targets=1, noise=0.05, random_state=1111, bias=0.5 + n_samples=500, + n_features=5, + n_informative=5, + n_targets=1, + noise=0.05, + random_state=1111, + bias=0.5, + ) + X_train, X_test, y_train, y_test = train_test_split( + X, y, test_size=0.1, random_state=1111 ) - X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.1, random_state=1111) model = RandomForestRegressor(n_estimators=50, max_depth=10, max_features=3) model.fit(X_train, y_train) predictions = model.predict(X_test) - print("regression, mse: %s" % mean_squared_error(y_test.flatten(), predictions.flatten())) + print( + "regression, mse: %s" + % mean_squared_error(y_test.flatten(), predictions.flatten()) + ) if __name__ == "__main__": diff --git a/examples/rbm.py b/examples/rbm.py index 74f2a772..2d167644 100644 --- a/examples/rbm.py +++ b/examples/rbm.py @@ -13,7 +13,7 @@ def print_curve(rbm): def moving_average(a, n=25): ret = np.cumsum(a, dtype=float) ret[n:] = ret[n:] - ret[:-n] - return ret[n - 1:] / n + return ret[n - 1 :] / n plt.plot(moving_average(rbm.errors)) plt.show() diff --git a/examples/svm.py b/examples/svm.py index 19535a4d..062a8710 100644 --- a/examples/svm.py +++ b/examples/svm.py @@ -16,17 +16,26 @@ def classification(): # Generate a random binary classification problem. X, y = make_classification( - n_samples=1200, n_features=10, n_informative=5, random_state=1111, n_classes=2, class_sep=1.75 + n_samples=1200, + n_features=10, + n_informative=5, + random_state=1111, + n_classes=2, + class_sep=1.75, ) # Convert y to {-1, 1} y = (y * 2) - 1 - X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=1111) + X_train, X_test, y_train, y_test = train_test_split( + X, y, test_size=0.2, random_state=1111 + ) for kernel in [RBF(gamma=0.1), Linear()]: model = SVM(max_iter=500, kernel=kernel, C=0.6) model.fit(X_train, y_train) predictions = model.predict(X_test) - print("Classification accuracy (%s): %s" % (kernel, accuracy(y_test, predictions))) + print( + "Classification accuracy (%s): %s" % (kernel, accuracy(y_test, predictions)) + ) if __name__ == "__main__": diff --git a/examples/t-sne.py b/examples/t-sne.py index 36873e91..bd08581d 100644 --- a/examples/t-sne.py +++ b/examples/t-sne.py @@ -8,7 +8,13 @@ logging.basicConfig(level=logging.DEBUG) X, y = make_classification( - n_samples=500, n_features=10, n_informative=5, n_redundant=0, random_state=1111, n_classes=2, class_sep=2.5 + n_samples=500, + n_features=10, + n_informative=5, + n_redundant=0, + random_state=1111, + n_classes=2, + class_sep=2.5, ) p = TSNE(2, max_iter=500)