sdpython
diff --git a/‎_doc/api/mlmodel.rst
Lines changed: 0 additions & 3 deletions b/‎_doc/api/mlmodel.rst
Lines changed: 0 additions & 3 deletions
diff --git a/‎_doc/conf.py
Lines changed: 35 additions & 0 deletions b/‎_doc/conf.py
Lines changed: 35 additions & 0 deletions
diff --git a/‎mlinsights/mlbatch/pipeline_cache.py
Lines changed: 4 additions & 5 deletions b/‎mlinsights/mlbatch/pipeline_cache.py
Lines changed: 4 additions & 5 deletions
diff --git a/‎mlinsights/mlmodel/categories_to_integers.py
Lines changed: 6 additions & 3 deletions b/‎mlinsights/mlmodel/categories_to_integers.py
Lines changed: 6 additions & 3 deletions
diff --git a/‎mlinsights/mlmodel/extended_features.py
Lines changed: 1 addition & 3 deletions b/‎mlinsights/mlmodel/extended_features.py
Lines changed: 1 addition & 3 deletions
diff --git a/‎mlinsights/mlmodel/kmeans_constraint.py
Lines changed: 1 addition & 0 deletions b/‎mlinsights/mlmodel/kmeans_constraint.py
Lines changed: 1 addition & 0 deletions
diff --git a/‎mlinsights/mlmodel/kmeans_l1.py
Lines changed: 0 additions & 18 deletions b/‎mlinsights/mlmodel/kmeans_l1.py
Lines changed: 0 additions & 18 deletions
diff --git a/‎mlinsights/mlmodel/ml_featurizer.py
Lines changed: 4 additions & 4 deletions b/‎mlinsights/mlmodel/ml_featurizer.py
Lines changed: 4 additions & 4 deletions
diff --git a/‎mlinsights/mlmodel/predictable_tsne.py
Lines changed: 1 addition & 1 deletion b/‎mlinsights/mlmodel/predictable_tsne.py
Lines changed: 1 addition & 1 deletion
diff --git a/‎mlinsights/mlmodel/quantile_mlpregressor.py
Lines changed: 33 additions & 36 deletions b/‎mlinsights/mlmodel/quantile_mlpregressor.py
Lines changed: 33 additions & 36 deletions
@@ -170,7 +170,6 @@ SimpleRegressorCriterion
 ++++++++++++++++++++++++
 
 .. autoclass:: mlinsights.mlmodel.piecewise_tree_regression_criterion.SimpleRegressorCriterion
-    :members:
 
 SimpleRegressorCriterionFast
 ++++++++++++++++++++++++++++
@@ -179,7 +178,6 @@ A similar design but a much faster implementation close to what
 :epkg:`scikit-learn` implements.
 
 .. autoclass:: mlinsights.mlmodel.piecewise_tree_regression_criterion_fast.SimpleRegressorCriterionFast
-    :members:
 
 LinearRegressorCriterion
 ++++++++++++++++++++++++
@@ -190,4 +188,3 @@ a line. The mean square error is the error made with a linear regressor
 and not a constant anymore.
 
 .. autoclass:: mlinsights.mlmodel.piecewise_tree_regression_criterion_linear.LinearRegressorCriterion
-    :members:
@@ -107,7 +107,9 @@
 }
 
 epkg_dictionary = {
+    "bootstrap": "https://en.wikipedia.org/wiki/Bootstrapping_(statistics)",
     "cmake": "https://cmake.org/",
+    "CountVectorizer": "https://scikit-learn.org/stable/modules/generated/sklearn.feature_extraction.text.CountVectorizer.html",
     "CPUExecutionProvider": "https://onnxruntime.ai/docs/execution-providers/",
     "cublasLtMatmul": "https://docs.nvidia.com/cuda/cublas/index.html?highlight=cublasLtMatmul#cublasltmatmul",
     "CUDA": "https://developer.nvidia.com/",
@@ -116,10 +118,18 @@
     "CUDAExecutionProvider": "https://onnxruntime.ai/docs/execution-providers/",
     "custom_gemm.cu": "https://github.com/sdpython/mlinsights/blob/main/mlinsights/ortops/tutorial/cuda/custom_gemm.cu",
     "cython": "https://cython.org/",
+    "decision tree": "https://en.wikipedia.org/wiki/Decision_tree",
+    "dataframe": "https://pandas.pydata.org/docs/reference/api/pandas.DataFrame.html",
     "DOT": "https://graphviz.org/doc/info/lang.html",
     "eigen": "https://eigen.tuxfamily.org/",
     "gcc": "https://gcc.gnu.org/",
+    "Iris": "https://scikit-learn.org/stable/auto_examples/datasets/plot_iris_dataset.html",
     "JIT": "https://en.wikipedia.org/wiki/Just-in-time_compilation",
+    "KMeans": "https://scikit-learn.org/stable/modules/generated/sklearn.cluster.KMeans.html",
+    "k-means": "https://en.wikipedia.org/wiki/K-means_clustering",
+    "L1": "https://en.wikipedia.org/wiki/Norm_(mathematics)#Absolute-value_norm",
+    "L2": "https://en.wikipedia.org/wiki/Norm_(mathematics)#Euclidean_norm",
+    "matplotlib": "https://matplotlib.org/",
     "nccl": "https://developer.nvidia.com/nccl",
     "numpy": "https://numpy.org/",
     "numba": "https://numba.pydata.org/",
@@ -137,16 +147,41 @@
         "model-optimizations/graph-optimizations.html"
     ),
     "openmp": "https://www.openmp.org/",
+    "pandas": (
+        "http://pandas.pydata.org/pandas-docs/stable/",
+        ("http://pandas.pydata.org/pandas-docs/stable/generated/pandas.{0}.html", 1),
+        (
+            "http://pandas.pydata.org/pandas-docs/stable/generated/pandas.{0}.{1}.html",
+            2,
+        ),
+    ),
+    "Pillow": "https://pillow.readthedocs.io/",
     "pybind11": "https://github.com/pybind/pybind11",
+    "Python": "https://www.python.org/",
     "python": "https://www.python.org/",
     "Python C API": "https://docs.python.org/3/c-api/index.html",
     "pytorch": "https://pytorch.org/",
+    "RandomForestRegressor": "https://scikit-learn.org/stable/modules/generated/sklearn.ensemble.RandomForestRegressor.html",
     "scikit-learn": "https://scikit-learn.org/stable/",
     "scipy": "https://scipy.org/",
+    "sklearn": (
+        "http://scikit-learn.org/stable/",
+        ("http://scikit-learn.org/stable/modules/generated/{0}.html", 1),
+        ("http://scikit-learn.org/stable/modules/generated/{0}.{1}.html", 2),
+    ),
     "sphinx-gallery": "https://github.com/sphinx-gallery/sphinx-gallery",
+    "t-SNE": "https://lvdmaaten.github.io/tsne/",
+    "TfidfVectorizer": "https://scikit-learn.org/stable/modules/generated/sklearn.feature_extraction.text.TfidfVectorizer.html",
     "torch": "https://pytorch.org/docs/stable/torch.html",
     "tqdm": "https://tqdm.github.io/",
     "TreeEnsembleClassifier": "https://onnx.ai/onnx/operators/onnx_aionnxml_TreeEnsembleClassifier.html",
     "TreeEnsembleRegressor": "https://onnx.ai/onnx/operators/onnx_aionnxml_TreeEnsembleRegressor.html",
+    "TSNE": "https://scikit-learn.org/stable/modules/generated/sklearn.manifold.TSNE.html",
     "WSL": "https://docs.microsoft.com/en-us/windows/wsl/install",
+    "*py": (
+        "https://docs.python.org/3/",
+        ("https://docs.python.org/3/library/{0}.html", 1),
+        ("https://docs.python.org/3/library/{0}.html#{0}.{1}", 2),
+        ("https://docs.python.org/3/library/{0}.html#{0}.{1}.{2}", 3),
+    ),
 }
@@ -20,11 +20,10 @@ class PipelineCache(Pipeline):
         If True, the time elapsed while fitting each step will be printed as it
         is completed.
 
-    Other attributes:
-
-    :param named_steps: bunch object, a dictionary with attribute access
-        Read-only attribute to access any step parameter by user given name.
-        Keys are step names and values are steps parameters.
+    The attribute *named_steps* is a bunch object, a dictionary
+    with attribute access Read-only attribute to access any step
+    parameter by user given name. Keys are step names and values
+    are steps parameters.
     """
 
     def __init__(self, steps, cache_name=None, verbose=False):
 
@@ -6,7 +6,8 @@
 class CategoriesToIntegers(BaseEstimator, TransformerMixin):
     """
     Does something similar to what
-    `DictVectorizer <http://scikit-learn.org/stable/modules/generated/sklearn.feature_extraction.DictVectorizer.html>`_
+    `DictVectorizer
+    <http://scikit-learn.org/stable/modules/generated/sklearn.feature_extraction.DictVectorizer.html>`_
     does but in a transformer. The method *fit* retains all categories,
     the method *transform* transforms categories into integers.
     Categories are sorted by columns. If the method *transform* tries to convert
@@ -32,7 +33,7 @@ class CategoriesToIntegers(BaseEstimator, TransformerMixin):
 
             import pandas
             from mlinsights.mlmodel import CategoriesToIntegers
-            df = pandas.DataFrame( [{"cat": "a"}, {"cat": "b"}] )
+            df = pandas.DataFrame([{"cat": "a"}, {"cat": "b"}])
             trans = CategoriesToIntegers()
             trans.fit(df)
             newdf = trans.transform(df)
@@ -64,6 +65,7 @@ def fit(self, X, y=None, **fit_params):
             Training data
         :param y: iterable, default=None
             Training targets.
+        :param fit_params: additional fit params
         :return: self
         """
         if not isinstance(X, pandas.DataFrame):
@@ -112,7 +114,7 @@ def _build_schema(self):
 
         return schema, position, new_vector
 
-    def transform(self, X, y=None, **fit_params):
+    def transform(self, X, y=None):
         """
         Transforms categories in numerical features based on the list
         of categories found by method *fit*.
@@ -204,6 +206,7 @@ def fit_transform(self, X, y=None, **fit_params):
             Training data
         :param y: iterable, default=None
             Training targets.
+        :param fit_params: additional fitting parameters
         :return: Dataframe, *X* with categories.
         """
         return self.fit(X, y=y, **fit_params).transform(X, y)
@@ -119,6 +119,7 @@ def fit(self, X, y=None):
 
         :param X: array-like, shape (n_samples, n_features)
             The data.
+        :param y: targets
         :return: self : instance
         """
         self.n_input_features_ = X.shape[1]
@@ -144,9 +145,6 @@ def transform(self, X):
         :param X: array-like, shape [n_samples, n_features]
             The data to transform, row by row.
             rns
-        :param XP: numpy.ndarray, shape [n_samples, NP]
-            The matrix of features, where NP is the number of polynomial
-            features generated from the combination of inputs.
         """
         n_features = X.shape[1]
         if n_features != self.n_input_features_:
 
@@ -153,6 +153,7 @@ def constraint_kmeans(
         :param X: features
         :param sample_weight: sample weight
         :param state: state
+        :param learning_rate: learning rate
         :param history: keeps evolution of centers
         """
         labels, centers, inertia, weights, iter_, all_centers = constraint_kmeans(
 
@@ -446,17 +446,6 @@ class KMeansL1L2(KMeans):
         single run.
     :param tol: float, default=1e-4
         Relative tolerance with regards to inertia to declare convergence.
-    :param precompute_distances: default='lloyd'
-        Precompute distances (faster but takes more memory).
-
-        'lloyd' : do not precompute distances if n_samples * n_clusters > 12
-        million. This corresponds to about 100MB overhead per job using
-        double precision.
-
-        True : always precompute distances.
-
-        False : never precompute distances.
-
     :param verbose: int, default=0
         Verbosity mode.
     :param random_state: int, RandomState instance, default=None
@@ -471,13 +460,6 @@ class KMeansL1L2(KMeans):
         numerical differences may be introduced by subtracting and then adding
         the data mean, in this case it will also not ensure that data is
         C-contiguous which may cause a significant slowdown.
-    :param n_jobs: int, default=None
-        The number of jobs to use for the computation. This works by computing
-        each of the n_init runs in parallel.
-
-        ``None`` means 1 unless in a :obj:`joblib.parallel_backend` context.
-        ``-1`` means using all processors. See :term:`Glossary <n_jobs>`
-        for more details.
     :param algorithm: {"lloyd", "elkan"}, default="lloyd"
         K-means algorithm to use. The classical EM-style algorithm is "lloyd".
         The "elkan" variation is more efficient by using the triangle
 
@@ -18,11 +18,11 @@ def model_featurizer(model, **params):
     a vector into features produced by the model.
     It can be the output itself or intermediate results.
     The model can come from :epkg:`scikit-learn`,
-    :epkg:`keras` or :epkg:`torch`.
+    :epkg:`torch`.
 
-    @param      model       model
-    @param      params      additional parameters
-    @return                 function
+    :param model: model
+    :param params: additional parameters
+    :return: function
     """
     tried = []
     if isinstance(model, LogisticRegression):
 
@@ -23,7 +23,7 @@ class PredictableTSNE(BaseEstimator, TransformerMixin):
     :param normalize: normalizes the outputs, centers and normalizes
         the output of the *t-SNE* and applies that same
         normalization to he prediction of the estimator
-    :param keep_tsne_output: if True, keep raw outputs of
+    :param keep_tsne_outputs: if True, keep raw outputs of
         :epkg:`TSNE` is stored in member `tsne_outputs_`
     """
 
 
@@ -239,25 +239,23 @@ class QuantileMLPRegressor(CustomizedMultilayerPerceptron, RegressorMixin):
         hidden layer.
     :param activation: {'identity', 'logistic', 'tanh', 'relu'}, default 'relu'
         Activation function for the hidden layer.
-        - 'identity', no-op activation, useful to implement linear bottleneck,
-          returns :math:`f(x) = x`
-        - 'logistic', the logistic sigmoid function,
-          returns :math:`f(x) = 1 / (1 + exp(-x))`.
-        - 'tanh', the hyperbolic tan function,
-          returns :math:`f(x) = tanh(x)`.
-        - 'relu', the rectified linear unit function,
-          returns :math:`f(x) = \\max(0, x)`.
+        'identity', no-op activation, useful to implement linear bottleneck,
+        returns :math:`f(x) = x`,
+        'logistic', the logistic sigmoid function,
+        returns :math:`f(x) = 1 / (1 + exp(-x))`.
+        'tanh', the hyperbolic tan function, returns :math:`f(x) = tanh(x)`.
+        'relu', the rectified linear unit function,
+        returns :math:`f(x) = \\max(0, x)`.
     :param solver: ``{'lbfgs', 'sgd', 'adam'}``, default 'adam'
-        The solver for weight optimization.
-        - *'lbfgs'* is an optimizer in the family of quasi-Newton methods.
-        - *'sgd'* refers to stochastic gradient descent.
-        - *'adam'* refers to a stochastic gradient-based optimizer proposed by
-          Kingma, Diederik, and Jimmy Ba
+        The solver for weight optimization,
+        *'lbfgs'* is an optimizer in the family of quasi-Newton methods.
+        *'sgd'* refers to stochastic gradient descent.
+        *'adam'* refers to a stochastic gradient-based optimizer proposed by
+        Kingma, Diederik, and Jimmy Ba
         Note: The default solver 'adam' works pretty well on relatively
         large datasets (with thousands of training samples or more) in terms of
-        both training time and validation score.
-        For small datasets, however, 'lbfgs' can converge faster and perform
-        better.
+        both training time and validation score. For small datasets, however,
+        'lbfgs' can converge faster and perform better.
     :param alpha: float, optional, default 0.0001
         :epkg:`L2` penalty (regularization term) parameter.
     :param batch_size: int, optional, default 'auto'
@@ -266,17 +264,15 @@ class QuantileMLPRegressor(CustomizedMultilayerPerceptron, RegressorMixin):
         When set to "auto", `batch_size=min(200, n_samples)`
     :param learning_rate: {'constant', 'invscaling', 'adaptive'}, default 'constant'
         Learning rate schedule for weight updates.
-        - 'constant' is a constant learning rate given by
-          'learning_rate_init'.
-        - 'invscaling' gradually decreases the learning rate ``learning_rate_``
-          at each time step 't' using an inverse scaling exponent of 'power_t'.
-          effective_learning_rate = learning_rate_init / pow(t, power_t)
-        - 'adaptive' keeps the learning rate constant to
-          'learning_rate_init' as long as training loss keeps decreasing.
-          Each time two consecutive epochs fail to decrease training loss by at
-          least tol, or fail to increase validation score by at least tol if
-          'early_stopping' is on, the current learning rate is divided by 5.
-        Only used when solver='sgd'.
+        'constant' is a constant learning rate given by 'learning_rate_init',
+        'invscaling' gradually decreases the learning rate ``learning_rate_``
+        at each time step 't' using an inverse scaling exponent of 'power_t'.
+        effective_learning_rate = learning_rate_init / pow(t, power_t),
+        'adaptive' keeps the learning rate constant to 'learning_rate_init'
+        as long as training loss keeps decreasing. Each time two consecutive
+        epochs fail to decrease training loss by at least tol, or fail to
+        increase validation score by at least tol if 'early_stopping' is on,
+        the current learning rate is divided by 5. Only used when solver='sgd'.
     :param learning_rate_init: double, optional, default 0.001
         The initial learning rate used. It controls the step-size
         in updating the weights. Only used when solver='sgd' or 'adam'.
@@ -337,25 +333,26 @@ class QuantileMLPRegressor(CustomizedMultilayerPerceptron, RegressorMixin):
     :param n_iter_no_change: int, optional, default 10
         Maximum number of epochs to not meet ``tol`` improvement.
         Only effective when solver='sgd' or 'adam'
+    :param kwargs: additional parameters sent to the constructor of the parent
 
     Fitted attributes:
 
     * `loss_`: float
-        The current loss computed with the loss function.
+      The current loss computed with the loss function.
     * `coefs_`: list, length n_layers - 1
-        The ith element in the list represents the weight matrix corresponding
-        to layer i.
+      The ith element in the list represents the weight matrix corresponding
+      to layer i.
     * `intercepts_`: list, length n_layers - 1
-        The ith element in the list represents the bias vector corresponding to
-        layer i + 1.
+      The ith element in the list represents the bias vector corresponding to
+      layer i + 1.
     * `n_iter_`: int,
-        The number of iterations the solver has ran.
+      The number of iterations the solver has ran.
     * `n_layers_`: int
-        Number of layers.
+      Number of layers.
     * `n_outputs_`: int
-        Number of outputs.
+      Number of outputs.
     * `out_activation_`: string
-        Name of the output activation function.
+      Name of the output activation function.
     """
 
     def __init__(