You signed in with another tab or window. Reload to refresh your session.You signed out in another tab or window. Reload to refresh your session.You switched accounts on another tab or window. Reload to refresh your session.Dismiss alert
I found it useful to have a generic Scikit-learner wrapper which get a custom loss function and a custom parametric_model method. The model then tries to fit the parameter of the parameteric model under bounds using scipy.minimize. From the interface point of view fit does the minimization while predict just uses the model with the best_params. What do you think? would this be useful? should I contribute it to sklearn?
Example:
import pandas as pd
import numpy as np
from scipy.optimize import minimize
from sklearn.base import BaseEstimator, RegressorMixin
from sklearn.utils.validation import check_X_y, check_is_fitted
class MinimizeRegressor(BaseEstimator, RegressorMixin):
"""Generic regressor for parametric models with custom optimization"""
def __init__(self, parametric_model: callable, loss: callable,
param_names: list, initial_params: dict = None,
**minimize_kwargs):
self.parametric_model = parametric_model
self.loss = loss
self.param_names = param_names
self.initial_params = initial_params or {}
self.minimize_kwargs = minimize_kwargs
def _objective(self, params: np.ndarray, X: pd.DataFrame, y: np.ndarray) -> float:
param_dict = dict(zip(self.param_names, params))
y_pred = self.parametric_model(param_dict, X)
return self.loss(y, y_pred, param_dict)
def fit(self, X: pd.DataFrame, y: np.ndarray):
X, y = check_X_y(X, y, accept_sparse=False, ensure_min_features=2)
initial = np.array([self.initial_params.get(name, 0) for name in self.param_names])
self.optimization_ = minimize(
fun=self._objective,
x0=initial,
args=(X, y),
**self.minimize_kwargs
)
self.optimal_params_ = dict(zip(self.param_names, self.optimization_.x))
return self
def predict(self, X: pd.DataFrame) -> np.ndarray:
check_is_fitted(self)
return self.parametric_model(self.optimal_params_, X)
# Complex Non-Linear Use Case --------------------------------------------------
def nonlinear_model(params: dict, X: pd.DataFrame) -> np.ndarray:
"""Sophisticated model with multiple non-linear components"""
return (
params['a'] * np.maximum(X['x1'], 0) + # Rectified linear component
params['b'] * np.exp(params['c'] * X['x2']) + # Exponential growth
params['d'] * np.log1p(np.abs(X['x3'])) * (X['x4'] > 0) # Conditional log
)
def relative_mse_loss(y_true: np.ndarray, y_pred: np.ndarray, params: dict) -> float:
"""Relative MSE loss with stability checks"""
eps = 1e-8 # Prevent division by zero
relative_error = (y_true / (y_pred + eps) - 1)
return np.mean(relative_error ** 2) + 1e-4 * np.sum(np.array(list(params.values())) ** 2)
# Configure and test the regressor
regressor = MinimizeRegressor(
parametric_model=nonlinear_model,
loss=relative_mse_loss,
param_names=['a', 'b', 'c', 'd'],
initial_params={'a': 1.0, 'b': 0.5, 'c': -0.1, 'd': 2.0},
method='L-BFGS-B',
bounds=[
(0, None), # a: Non-negative coefficient for ReLU
(0, None), # b: Non-negative scale for exponential
(None, 0), # c: Negative exponent for decay
(None, None) # d: Unbounded coefficient
],
options={'maxiter': 1000, 'ftol': 1e-6}
)
# Generate synthetic data with complex relationships
np.random.seed(42)
X = pd.DataFrame({
'x1': np.random.normal(2, 1, 100),
'x2': np.random.uniform(0.1, 2, 100),
'x3': np.random.lognormal(1, 0.5, 100),
'x4': np.random.choice([-1, 1], 100)
})
true_params = {'a': 2.5, 'b': 1.2, 'c': -0.3, 'd': 3.0}
y = nonlinear_model(true_params, X) + np.random.normal(0, 0.5, 100)
# Fit and evaluate
regressor.fit(X, y)
predictions = regressor.predict(X)
print("Optimized parameters vs true values:")
print(pd.DataFrame({
'True': true_params,
'Estimated': regressor.optimal_params_
}).T.round(2))
print("\nSample predictions vs actual:")
print(pd.DataFrame({
'Actual': y[:5],
'Predicted': predictions[:5]
}).round(2))
Optimized parameters vs true values:
a b c d
True 2.500 1.200 -0.300 3.000
Estimated 2.485 1.186 -0.293 3.053
Sample predictions vs actual:
Actual Predicted
0 7.337 7.407
1 6.198 6.120
2 3.214 3.300
3 7.089 7.146
4 6.927 6.881
Mean Relative Error: 0.056
reacted with thumbs up emoji reacted with thumbs down emoji reacted with laugh emoji reacted with hooray emoji reacted with confused emoji reacted with heart emoji reacted with rocket emoji reacted with eyes emoji
Uh oh!
There was an error while loading. Please reload this page.
Uh oh!
There was an error while loading. Please reload this page.
-
I found it useful to have a generic Scikit-learner wrapper which get a custom
loss
function and a customparametric_model
method. The model then tries to fit the parameter of the parameteric model under bounds using scipy.minimize. From the interface point of viewfit
does the minimization whilepredict
just uses the model with thebest_params
. What do you think? would this be useful? should I contribute it to sklearn?Example:
Beta Was this translation helpful? Give feedback.
All reactions