diff --git a/.github/issue_template.md b/.github/ISSUE_TEMPLATE/bug_report.md similarity index 82% rename from .github/issue_template.md rename to .github/ISSUE_TEMPLATE/bug_report.md index d4fb0abe..ae757838 100644 --- a/.github/issue_template.md +++ b/.github/ISSUE_TEMPLATE/bug_report.md @@ -1,3 +1,9 @@ +--- +name: Reproducible bug report +about: Create a reproducible bug report. Not for support requests. +labels: 'bug' +--- + #### Description @@ -42,3 +48,9 @@ $ pip show metric_learn | grep Version ) --> + +--- + +**Message from the maintainers**: + +Impacted by this bug? Give it a 👍. We prioritise the issues with the most 👍. \ No newline at end of file diff --git a/.github/ISSUE_TEMPLATE/config.yml b/.github/ISSUE_TEMPLATE/config.yml new file mode 100644 index 00000000..415acfcd --- /dev/null +++ b/.github/ISSUE_TEMPLATE/config.yml @@ -0,0 +1,18 @@ +blank_issues_enabled: false + +contact_links: + - name: Have you read the docs? + url: http://contrib.scikit-learn.org/metric-learn/ + about: Much help can be found in the docs + - name: Ask a question + url: https://github.com/scikit-learn-contrib/metric-learn/discussions/new + about: Ask a question or start a discussion about metric-learn + - name: Stack Overflow + url: https://stackoverflow.com + about: Please ask and answer metric-learn usage questions (API, installation...) on Stack Overflow + - name: Cross Validated + url: https://stats.stackexchange.com + about: Please ask and answer metric learning questions (use cases, algorithms & theory...) on Cross Validated + - name: Blank issue + url: https://github.com/scikit-learn-contrib/metric-learn/issues/new + about: Please note that Github Discussions should be used in most cases instead diff --git a/.github/ISSUE_TEMPLATE/doc_improvement.md b/.github/ISSUE_TEMPLATE/doc_improvement.md new file mode 100644 index 00000000..753cf2f7 --- /dev/null +++ b/.github/ISSUE_TEMPLATE/doc_improvement.md @@ -0,0 +1,23 @@ +--- +name: Documentation improvement +about: Create a report to help us improve the documentation. Alternatively you can just open a pull request with the suggested change. +labels: Documentation +--- + +#### Describe the issue linked to the documentation + + + +#### Suggest a potential alternative/fix + + + +--- + +**Message from the maintainers**: + +Confused by this part of the doc too? Give it a 👍. We prioritise the issues with the most 👍. \ No newline at end of file diff --git a/.github/ISSUE_TEMPLATE/enhancement_proposal.md b/.github/ISSUE_TEMPLATE/enhancement_proposal.md new file mode 100644 index 00000000..01dfb1d7 --- /dev/null +++ b/.github/ISSUE_TEMPLATE/enhancement_proposal.md @@ -0,0 +1,18 @@ +--- +name: Enhancement proposal +about: Propose an enhancement for metric-learn +labels: 'enhancement' +--- +# Summary + +What change needs making? + +# Use Cases + +When would you use this? + +--- + +**Message from the maintainers**: + +Want to see this feature happen? Give it a 👍. We prioritise the issues with the most 👍. \ No newline at end of file diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml new file mode 100644 index 00000000..0935a109 --- /dev/null +++ b/.github/workflows/main.yml @@ -0,0 +1,42 @@ +name: CI + +# Controls when the workflow will run +on: + # Triggers the workflow on push or pull request events but only for the master branch + push: + branches: [ master ] + pull_request: + branches: [ master ] + +jobs: + # Run normal testing with the latest versions of all dependencies + build: + runs-on: ${{ matrix.os }} + strategy: + matrix: + os: [ubuntu-latest] + python-version: ['3.8', '3.9', '3.10', '3.11'] + steps: + - uses: actions/checkout@v2 + - name: Set up Python + uses: actions/setup-python@v2 + with: + python-version: ${{ matrix.python-version }} + - name: Run Tests without skggm + run: | + sudo apt-get install liblapack-dev + pip install --upgrade pip pytest + pip install wheel cython numpy scipy codecov pytest-cov scikit-learn + pytest test --cov + bash <(curl -s https://codecov.io/bash) + - name: Run Tests with skggm + env: + SKGGM_VERSION: a0ed406586c4364ea3297a658f415e13b5cbdaf8 + run: | + pip install git+https://github.com/skggm/skggm.git@${SKGGM_VERSION} + pytest test --cov + bash <(curl -s https://codecov.io/bash) + - name: Syntax checking with flake8 + run: | + pip install flake8 + flake8 --extend-ignore=E111,E114 --show-source; diff --git a/.gitignore b/.gitignore index 8321c7d2..66eb3551 100644 --- a/.gitignore +++ b/.gitignore @@ -7,4 +7,6 @@ htmlcov/ .cache/ .pytest_cache/ doc/auto_examples/* -doc/generated/* \ No newline at end of file +doc/generated/* +venv/ +.vscode/ diff --git a/.travis.yml b/.travis.yml deleted file mode 100644 index 0e510a9f..00000000 --- a/.travis.yml +++ /dev/null @@ -1,27 +0,0 @@ -language: python -sudo: false -cache: pip -python: - - "2.7" - - "3.4" - - "3.6" -before_install: - - sudo apt-get install liblapack-dev - - pip install --upgrade pip pytest - - pip install wheel cython numpy scipy codecov pytest-cov - - if $TRAVIS_PYTHON_VERSION == "3.6"; then - pip install scikit-learn; - else - pip install scikit-learn==0.20.3; - fi - - if [[ ($TRAVIS_PYTHON_VERSION == "3.6") || - ($TRAVIS_PYTHON_VERSION == "2.7")]]; then - pip install git+https://github.com/skggm/skggm.git@a0ed406586c4364ea3297a658f415e13b5cbdaf8; - fi -script: - # we do coverage for all versions so that codecov will merge them: this - # way we will see that both paths (with or without skggm) are tested - - pytest test --cov; -after_success: - - bash <(curl -s https://codecov.io/bash) - diff --git a/README.rst b/README.rst index 027e5498..b2f6e6d4 100644 --- a/README.rst +++ b/README.rst @@ -1,9 +1,9 @@ -|Travis-CI Build Status| |License| |PyPI version| |Code coverage| +|GitHub Actions Build Status| |License| |PyPI version| |Code coverage| -metric-learn -============= +metric-learn: Metric Learning in Python +======================================= -Metric Learning algorithms in Python. +metric-learn contains efficient Python implementations of several popular supervised and weakly-supervised metric learning algorithms. As part of `scikit-learn-contrib `_, the API of metric-learn is compatible with `scikit-learn `_, the leading library for machine learning in Python. This allows to use all the scikit-learn routines (for pipelining, model selection, etc) with metric learning algorithms through a unified interface. **Algorithms** @@ -11,6 +11,7 @@ Metric Learning algorithms in Python. - Information Theoretic Metric Learning (ITML) - Sparse Determinant Metric Learning (SDML) - Least Squares Metric Learning (LSML) +- Sparse Compositional Metric Learning (SCML) - Neighborhood Components Analysis (NCA) - Local Fisher Discriminant Analysis (LFDA) - Relative Components Analysis (RCA) @@ -19,36 +20,58 @@ Metric Learning algorithms in Python. **Dependencies** -- Python 2.7+, 3.4+ -- numpy, scipy, scikit-learn>=0.20.3 +- Python 3.6+ (the last version supporting Python 2 and Python 3.5 was + `v0.5.0 `_) +- numpy>= 1.11.0, scipy>= 0.17.0, scikit-learn>=0.21.3 **Optional dependencies** - For SDML, using skggm will allow the algorithm to solve problematic cases (install from commit `a0ed406 `_). + ``pip install 'git+https://github.com/skggm/skggm.git@a0ed406586c4364ea3297a658f415e13b5cbdaf8'`` to install the required version of skggm from GitHub. - For running the examples only: matplotlib **Installation/Setup** -Run ``pip install metric-learn`` to download and install from PyPI. +- If you use Anaconda: ``conda install -c conda-forge metric-learn``. See more options `here `_. -Run ``python setup.py install`` for default installation. +- To install from PyPI: ``pip install metric-learn``. -Run ``pytest test`` to run all tests (you will need to have the ``pytest`` -package installed). +- For a manual install of the latest code, download the source repository and run ``python setup.py install``. You may then run ``pytest test`` to run all tests (you will need to have the ``pytest`` package installed). **Usage** See the `sphinx documentation`_ for full documentation about installation, API, usage, and examples. +**Citation** -.. _sphinx documentation: http://metric-learn.github.io/metric-learn/ +If you use metric-learn in a scientific publication, we would appreciate +citations to the following paper: -.. |Travis-CI Build Status| image:: https://api.travis-ci.org/metric-learn/metric-learn.svg?branch=master - :target: https://travis-ci.org/metric-learn/metric-learn +`metric-learn: Metric Learning Algorithms in Python +`_, de Vazelhes +*et al.*, Journal of Machine Learning Research, 21(138):1-6, 2020. + +Bibtex entry:: + + @article{metric-learn, + title = {metric-learn: {M}etric {L}earning {A}lgorithms in {P}ython}, + author = {{de Vazelhes}, William and {Carey}, CJ and {Tang}, Yuan and + {Vauquier}, Nathalie and {Bellet}, Aur{\'e}lien}, + journal = {Journal of Machine Learning Research}, + year = {2020}, + volume = {21}, + number = {138}, + pages = {1--6} + } + +.. _sphinx documentation: http://contrib.scikit-learn.org/metric-learn/ + +.. |GitHub Actions Build Status| image:: https://github.com/scikit-learn-contrib/metric-learn/workflows/CI/badge.svg + :target: https://github.com/scikit-learn-contrib/metric-learn/actions?query=event%3Apush+branch%3Amaster .. |License| image:: http://img.shields.io/:license-mit-blue.svg?style=flat :target: http://badges.mit-license.org .. |PyPI version| image:: https://badge.fury.io/py/metric-learn.svg :target: http://badge.fury.io/py/metric-learn -.. |Code coverage| image:: https://codecov.io/gh/metric-learn/metric-learn/branch/master/graph/badge.svg - :target: https://codecov.io/gh/metric-learn/metric-learn +.. |Code coverage| image:: https://codecov.io/gh/scikit-learn-contrib/metric-learn/branch/master/graph/badge.svg + :target: https://codecov.io/gh/scikit-learn-contrib/metric-learn diff --git a/bench/benchmarks/iris.py b/bench/benchmarks/iris.py index 5973f7b8..05035085 100644 --- a/bench/benchmarks/iris.py +++ b/bench/benchmarks/iris.py @@ -5,15 +5,15 @@ CLASSES = { 'Covariance': metric_learn.Covariance(), - 'ITML_Supervised': metric_learn.ITML_Supervised(num_constraints=200), + 'ITML_Supervised': metric_learn.ITML_Supervised(n_constraints=200), 'LFDA': metric_learn.LFDA(k=2, dim=2), - 'LMNN': metric_learn.LMNN(k=5, learn_rate=1e-6, verbose=False), - 'LSML_Supervised': metric_learn.LSML_Supervised(num_constraints=200), + 'LMNN': metric_learn.LMNN(n_neighbors=5, learn_rate=1e-6, verbose=False), + 'LSML_Supervised': metric_learn.LSML_Supervised(n_constraints=200), 'MLKR': metric_learn.MLKR(), 'NCA': metric_learn.NCA(max_iter=700, n_components=2), - 'RCA_Supervised': metric_learn.RCA_Supervised(dim=2, num_chunks=30, + 'RCA_Supervised': metric_learn.RCA_Supervised(dim=2, n_chunks=30, chunk_size=2), - 'SDML_Supervised': metric_learn.SDML_Supervised(num_constraints=1500) + 'SDML_Supervised': metric_learn.SDML_Supervised(n_constraints=1500) } diff --git a/doc/_static/css/styles.css b/doc/_static/css/styles.css new file mode 100644 index 00000000..6d350ae4 --- /dev/null +++ b/doc/_static/css/styles.css @@ -0,0 +1,36 @@ +.hatnote { + border-color: #e1e4e5 ; + border-style: solid ; + border-width: 1px ; + font-size: x-small ; + font-style: italic ; + margin-left: auto ; + margin-right: auto ; + margin-bottom: 24px; + padding: 12px; +} +.hatnote-gray { + background-color: #f5f5f5 +} +.hatnote li { + list-style-type: square; + margin-left: 12px !important; +} +.hatnote ul { + list-style-type: square; + margin-left: 0px !important; + margin-bottom: 0px !important; +} +.deprecated { + color: #b94a48; + background-color: #F3E5E5; + border-color: #eed3d7; + margin-top: 0.5rem; + padding: 0.5rem; + border-radius: 0.5rem; + margin-bottom: 0.5rem; +} + +.deprecated p { + margin-bottom: 0 !important; +} \ No newline at end of file diff --git a/doc/conf.py b/doc/conf.py index 5d1baeda..c472cc21 100644 --- a/doc/conf.py +++ b/doc/conf.py @@ -1,6 +1,7 @@ # -*- coding: utf-8 -*- import sys import os +import warnings extensions = [ 'sphinx.ext.autodoc', @@ -20,10 +21,12 @@ # General information about the project. project = u'metric-learn' -copyright = u'2015-2019, CJ Carey, Yuan Tang, William de Vazelhes, Aurélien Bellet, and Nathalie Vauquier' -author = u'CJ Carey, Yuan Tang, William de Vazelhes, Aurélien Bellet, and Nathalie Vauquier' -version = '0.5.0' -release = '0.5.0' +copyright = (u'2015-2023, CJ Carey, Yuan Tang, William de Vazelhes, Aurélien ' + u'Bellet and Nathalie Vauquier') +author = (u'CJ Carey, Yuan Tang, William de Vazelhes, Aurélien Bellet and ' + u'Nathalie Vauquier') +version = '0.7.0' +release = '0.7.0' language = 'en' exclude_patterns = ['_build'] @@ -35,9 +38,6 @@ html_static_path = ['_static'] htmlhelp_basename = 'metric-learndoc' -# Option to only need single backticks to refer to symbols -default_role = 'any' - # Option to hide doctests comments in the documentation (like # doctest: # +NORMALIZE_WHITESPACE for instance) trim_doctest_flags = True @@ -63,3 +63,20 @@ # generate autosummary even if no references autosummary_generate = True + + +# Temporary work-around for spacing problem between parameter and parameter +# type in the doc, see https://github.com/numpy/numpydoc/issues/215. The bug +# has been fixed in sphinx (https://github.com/sphinx-doc/sphinx/pull/5976) but +# through a change in sphinx basic.css except rtd_theme does not use basic.css. +# In an ideal world, this would get fixed in this PR: +# https://github.com/readthedocs/sphinx_rtd_theme/pull/747/files +def setup(app): + app.add_js_file('js/copybutton.js') + app.add_css_file('css/styles.css') + + +# Remove matplotlib agg warnings from generated doc when using plt.show +warnings.filterwarnings("ignore", category=UserWarning, + message='Matplotlib is currently using agg, which is a' + ' non-GUI backend, so cannot show the figure.') diff --git a/doc/getting_started.rst b/doc/getting_started.rst index 5a671d86..90b7c7ee 100644 --- a/doc/getting_started.rst +++ b/doc/getting_started.rst @@ -5,23 +5,28 @@ Getting started Installation and Setup ====================== -Run ``pip install metric-learn`` to download and install from PyPI. +**Installation** -Alternately, download the source repository and run: +metric-learn can be installed in either of the following ways: -- ``python setup.py install`` for default installation. -- ``python setup.py test`` to run all tests. +- If you use Anaconda: ``conda install -c conda-forge metric-learn``. See more options `here `_. + +- To install from PyPI: ``pip install metric-learn``. + +- For a manual install of the latest code, download the source repository and run ``python setup.py install``. You may then run ``pytest test`` to run all tests (you will need to have the ``pytest`` package installed). **Dependencies** -- Python 2.7+, 3.4+ -- numpy, scipy, scikit-learn>=0.20.3 +- Python 3.6+ (the last version supporting Python 2 and Python 3.5 was + `v0.5.0 `_) +- numpy>= 1.11.0, scipy>= 0.17.0, scikit-learn>=0.21.3 **Optional dependencies** - For SDML, using skggm will allow the algorithm to solve problematic cases (install from commit `a0ed406 `_). -- For running the examples only: matplotlib + ``pip install 'git+https://github.com/skggm/skggm.git@a0ed406586c4364ea3297a658f415e13b5cbdaf8'`` to install the required version of skggm from GitHub. +- For running the examples only: matplotlib Quick start =========== @@ -29,11 +34,14 @@ Quick start This example loads the iris dataset, and evaluates a k-nearest neighbors algorithm on an embedding space learned with `NCA`. ->>> from metric_learn import NCA ->>> from sklearn.datasets import load_iris ->>> from sklearn.model_selection import cross_val_score ->>> from sklearn.pipeline import make_pipeline ->>> ->>> X, y = load_iris(return_X_y=True) ->>> clf = make_pipeline(NCA(), KNeighborsClassifier()) ->>> cross_val_score(clf, X, y) +:: + + from metric_learn import NCA + from sklearn.datasets import load_iris + from sklearn.model_selection import cross_val_score + from sklearn.pipeline import make_pipeline + from sklearn.neighbors import KNeighborsClassifier + + X, y = load_iris(return_X_y=True) + clf = make_pipeline(NCA(), KNeighborsClassifier()) + cross_val_score(clf, X, y) diff --git a/doc/index.rst b/doc/index.rst index 9d303bee..f9dfd83d 100644 --- a/doc/index.rst +++ b/doc/index.rst @@ -1,13 +1,36 @@ metric-learn: Metric Learning in Python ======================================= -|Travis-CI Build Status| |License| |PyPI version| |Code coverage| +|GitHub Actions Build Status| |License| |PyPI version| |Code coverage| -Metric-learn contains efficient Python implementations of several -popular supervised and weakly-supervised metric learning algorithms. The API -of metric-learn is compatible with `scikit-learn +`metric-learn `_ +contains efficient Python implementations of several popular supervised and +weakly-supervised metric learning algorithms. As part of `scikit-learn-contrib +`_, the API of metric-learn is compatible with `scikit-learn `_, the leading library for machine learning in -Python. This allows to use of all the scikit-learn routines (for pipelining, -model selection, etc) with metric learning algorithms. +Python. This allows to use all the scikit-learn routines (for pipelining, +model selection, etc) with metric learning algorithms through a unified +interface. + +If you use metric-learn in a scientific publication, we would appreciate +citations to the following paper: + +`metric-learn: Metric Learning Algorithms in Python +`_, de Vazelhes +*et al.*, Journal of Machine Learning Research, 21(138):1-6, 2020. + +Bibtex entry:: + + @article{metric-learn, + title = {metric-learn: {M}etric {L}earning {A}lgorithms in {P}ython}, + author = {{de Vazelhes}, William and {Carey}, CJ and {Tang}, Yuan and + {Vauquier}, Nathalie and {Bellet}, Aur{\'e}lien}, + journal = {Journal of Machine Learning Research}, + year = {2020}, + volume = {21}, + number = {138}, + pages = {1--6} + } + Documentation outline --------------------- @@ -32,13 +55,13 @@ Documentation outline auto_examples/index -:ref:`genindex` | :ref:`modindex` | :ref:`search` +:ref:`genindex` | :ref:`search` -.. |Travis-CI Build Status| image:: https://api.travis-ci.org/metric-learn/metric-learn.svg?branch=master - :target: https://travis-ci.org/metric-learn/metric-learn +.. |GitHub Actions Build Status| image:: https://github.com/scikit-learn-contrib/metric-learn/workflows/CI/badge.svg + :target: https://github.com/scikit-learn-contrib/metric-learn/actions?query=event%3Apush+branch%3Amaster .. |PyPI version| image:: https://badge.fury.io/py/metric-learn.svg :target: http://badge.fury.io/py/metric-learn .. |License| image:: http://img.shields.io/:license-mit-blue.svg?style=flat :target: http://badges.mit-license.org -.. |Code coverage| image:: https://codecov.io/gh/metric-learn/metric-learn/branch/master/graph/badge.svg - :target: https://codecov.io/gh/metric-learn/metric-learn +.. |Code coverage| image:: https://codecov.io/gh/scikit-learn-contrib/metric-learn/branch/master/graph/badge.svg + :target: https://codecov.io/gh/scikit-learn-contrib/metric-learn diff --git a/doc/introduction.rst b/doc/introduction.rst index 04ae1a18..e9ff0015 100644 --- a/doc/introduction.rst +++ b/doc/introduction.rst @@ -96,7 +96,7 @@ examples (for code illustrating some of these use-cases, see the metric learning provides a way to bias the clusters found by algorithms like K-Means towards the intended semantics. - Information retrieval: the learned metric can be used to retrieve the - elements of a database that are semantically closer to a query element. + elements of a database that are semantically closest to a query element. - Dimensionality reduction: metric learning may be seen as a way to reduce the data dimension in a (weakly) supervised setting. - More generally, the learned transformation :math:`L` can be used to project @@ -123,26 +123,3 @@ to the following resources: Survey `_ (2012) - **Book:** `Metric Learning `_ (2015) - -.. Methods [TO MOVE TO SUPERVISED/WEAK SECTIONS] -.. ============================================= - -.. Currently, each metric learning algorithm supports the following methods: - -.. - ``fit(...)``, which learns the model. -.. - ``get_mahalanobis_matrix()``, which returns a Mahalanobis matrix -.. - ``get_metric()``, which returns a function that takes as input two 1D - arrays and outputs the learned metric score on these two points -.. :math:`M = L^{\top}L` such that distance between vectors ``x`` and -.. ``y`` can be computed as :math:`\sqrt{\left(x-y\right)M\left(x-y\right)}`. -.. - ``components_from_metric(metric)``, which returns a transformation matrix -.. :math:`L \in \mathbb{R}^{D \times d}`, which can be used to convert a -.. data matrix :math:`X \in \mathbb{R}^{n \times d}` to the -.. :math:`D`-dimensional learned metric space :math:`X L^{\top}`, -.. in which standard Euclidean distances may be used. -.. - ``transform(X)``, which applies the aforementioned transformation. -.. - ``score_pairs(pairs)`` which returns the distance between pairs of -.. points. ``pairs`` should be a 3D array-like of pairs of shape ``(n_pairs, -.. 2, n_features)``, or it can be a 2D array-like of pairs indicators of -.. shape ``(n_pairs, 2)`` (see section :ref:`preprocessor_section` for more -.. details). \ No newline at end of file diff --git a/doc/metric_learn.rst b/doc/metric_learn.rst index 930404d0..4d0676b9 100644 --- a/doc/metric_learn.rst +++ b/doc/metric_learn.rst @@ -13,7 +13,10 @@ Base Classes metric_learn.Constraints metric_learn.base_metric.BaseMetricLearner + metric_learn.base_metric.MetricTransformer + metric_learn.base_metric.MahalanobisMixin metric_learn.base_metric._PairsClassifierMixin + metric_learn.base_metric._TripletsClassifierMixin metric_learn.base_metric._QuadrupletsClassifierMixin Supervised Learning Algorithms @@ -32,6 +35,7 @@ Supervised Learning Algorithms metric_learn.MMC_Supervised metric_learn.SDML_Supervised metric_learn.RCA_Supervised + metric_learn.SCML_Supervised Weakly Supervised Learning Algorithms ------------------------------------- @@ -44,6 +48,7 @@ Weakly Supervised Learning Algorithms metric_learn.LSML metric_learn.MMC metric_learn.SDML + metric_learn.SCML Unsupervised Learning Algorithms -------------------------------- diff --git a/doc/supervised.rst b/doc/supervised.rst index 3c941b20..49548b83 100644 --- a/doc/supervised.rst +++ b/doc/supervised.rst @@ -50,7 +50,7 @@ classes will be large. To do so, we fit the metric learner (example: >>> from metric_learn import NCA >>> nca = NCA(random_state=42) >>> nca.fit(X, y) -NCA(init=None, max_iter=100, n_components=None, num_dims='deprecated', +NCA(init='auto', max_iter=100, n_components=None, preprocessor=None, random_state=42, tol=None, verbose=False) @@ -69,10 +69,10 @@ Also, as explained before, our metric learners has learn a distance between points. You can use this distance in two main ways: - You can either return the distance between pairs of points using the - `score_pairs` function: + `pair_distance` function: ->>> nca.score_pairs([[[3.5, 3.6], [5.6, 2.4]], [[1.2, 4.2], [2.1, 6.4]]]) -array([0.49627072, 3.65287282]) +>>> nca.pair_distance([[[3.5, 3.6], [5.6, 2.4]], [[1.2, 4.2], [2.1, 6.4]], [[3.3, 7.8], [10.9, 0.1]]]) +array([0.49627072, 3.65287282, 6.06079877]) - Or you can return a function that will return the distance (in the new space) between two 1D arrays (the coordinates of the points in the original @@ -82,6 +82,18 @@ array([0.49627072, 3.65287282]) >>> metric_fun([3.5, 3.6], [5.6, 2.4]) 0.4962707194621285 +- Alternatively, you can use `pair_score` to return the **score** between + pairs of points (the larger the score, the more similar the pair). + For Mahalanobis learners, it is equal to the opposite of the distance. + +>>> score = nca.pair_score([[[3.5, 3.6], [5.6, 2.4]], [[1.2, 4.2], [2.1, 6.4]], [[3.3, 7.8], [10.9, 0.1]]]) +>>> score +array([-0.49627072, -3.65287282, -6.06079877]) + +This is useful because `pair_score` matches the **score** semantic of +scikit-learn's `Classification metrics +`_. + .. note:: If the metric learner that you use learns a :ref:`Mahalanobis distance @@ -93,7 +105,6 @@ array([0.49627072, 3.65287282]) array([[0.43680409, 0.89169412], [0.89169412, 1.9542479 ]]) -.. TODO: remove the "like it is the case etc..." if it's not the case anymore Scikit-learn compatibility -------------------------- @@ -105,6 +116,7 @@ All supervised algorithms are scikit-learn estimators scikit-learn model selection routines (`sklearn.model_selection.cross_val_score`, `sklearn.model_selection.GridSearchCV`, etc). +You can also use some of the scoring functions from `sklearn.metrics`. Algorithms ========== @@ -131,16 +143,16 @@ The distance is learned by solving the following optimization problem: c\sum_{i, j, l}\eta_{ij}(1-y_{ij})[1+||\mathbf{L(x_i-x_j)}||^2-|| \mathbf{L(x_i-x_l)}||^2]_+) -where :math:`\mathbf{x}_i` is an data point, :math:`\mathbf{x}_j` is one -of its k nearest neighbors sharing the same label, and :math:`\mathbf{x}_l` +where :math:`\mathbf{x}_i` is a data point, :math:`\mathbf{x}_j` is one +of its k-nearest neighbors sharing the same label, and :math:`\mathbf{x}_l` are all the other instances within that region with different labels, :math:`\eta_{ij}, y_{ij} \in \{0, 1\}` are both the indicators, -:math:`\eta_{ij}` represents :math:`\mathbf{x}_{j}` is the k nearest -neighbors(with same labels) of :math:`\mathbf{x}_{i}`, :math:`y_{ij}=0` -indicates :math:`\mathbf{x}_{i}, \mathbf{x}_{j}` belong to different class, +:math:`\eta_{ij}` represents :math:`\mathbf{x}_{j}` is the k-nearest +neighbors (with same labels) of :math:`\mathbf{x}_{i}`, :math:`y_{ij}=0` +indicates :math:`\mathbf{x}_{i}, \mathbf{x}_{j}` belong to different classes, :math:`[\cdot]_+=\max(0, \cdot)` is the Hinge loss. -.. topic:: Example Code: +.. rubric:: Example Code :: @@ -152,18 +164,18 @@ indicates :math:`\mathbf{x}_{i}, \mathbf{x}_{j}` belong to different class, X = iris_data['data'] Y = iris_data['target'] - lmnn = LMNN(k=5, learn_rate=1e-6) - lmnn.fit(X, Y, verbose=False) + lmnn = LMNN(n_neighbors=5, learn_rate=1e-6, verbose=False) + lmnn.fit(X, Y) -.. topic:: References: +.. rubric:: References - .. [1] Weinberger et al. `Distance Metric Learning for Large Margin - Nearest Neighbor Classification - `_. - JMLR 2009 - .. [2] `Wikipedia entry on Large Margin Nearest Neighbor `_ - +.. container:: hatnote hatnote-gray + + [1]. Weinberger et al. `Distance Metric Learning for Large Margin Nearest Neighbor Classification `_. JMLR 2009. + + [2]. `Wikipedia entry on Large Margin Nearest Neighbor `_. + .. _nca: @@ -204,7 +216,7 @@ the sum of probability of being correctly classified: \mathbf{L} = \text{argmax}\sum_i p_i -.. topic:: Example Code: +.. rubric:: Example Code :: @@ -219,13 +231,14 @@ the sum of probability of being correctly classified: nca = NCA(max_iter=1000) nca.fit(X, Y) -.. topic:: References: +.. rubric:: References + + +.. container:: hatnote hatnote-gray - .. [1] Goldberger et al. - `Neighbourhood Components Analysis `_. - NIPS 2005 + [1]. Goldberger et al. `Neighbourhood Components Analysis `_. NIPS 2005. - .. [2] `Wikipedia entry on Neighborhood Components Analysis `_ + [2]. `Wikipedia entry on Neighborhood Components Analysis `_. .. _lfda: @@ -235,7 +248,7 @@ the sum of probability of being correctly classified: Local Fisher Discriminant Analysis (:py:class:`LFDA `) -`LFDA` is a linear supervised dimensionality reduction method. It is +`LFDA` is a linear supervised dimensionality reduction method which effectively combines the ideas of `Linear Discriminant Analysis ` and Locality-Preserving Projection . It is particularly useful when dealing with multi-modality, where one ore more classes consist of separate clusters in input space. The core optimization problem of LFDA is solved as a generalized eigenvalue problem. @@ -261,23 +274,23 @@ where \,\,\mathbf{A}_{i,j}(1/n-1/n_l) \qquad y_i = y_j\end{aligned}\right.\\ here :math:`\mathbf{A}_{i,j}` is the :math:`(i,j)`-th entry of the affinity -matrix :math:`\mathbf{A}`:, which can be calculated with local scaling methods. +matrix :math:`\mathbf{A}`:, which can be calculated with local scaling methods, `n` and `n_l` are the total number of points and the number of points per cluster `l` respectively. Then the learning problem becomes derive the LFDA transformation matrix -:math:`\mathbf{T}_{LFDA}`: +:math:`\mathbf{L}_{LFDA}`: .. math:: - \mathbf{T}_{LFDA} = \arg\max_\mathbf{T} - [\text{tr}((\mathbf{T}^T\mathbf{S}^{(w)} - \mathbf{T})^{-1}\mathbf{T}^T\mathbf{S}^{(b)}\mathbf{T})] + \mathbf{L}_{LFDA} = \arg\max_\mathbf{L} + [\text{tr}((\mathbf{L}^T\mathbf{S}^{(w)} + \mathbf{L})^{-1}\mathbf{L}^T\mathbf{S}^{(b)}\mathbf{L})] -That is, it is looking for a transformation matrix :math:`\mathbf{T}` such that +That is, it is looking for a transformation matrix :math:`\mathbf{L}` such that nearby data pairs in the same class are made close and the data pairs in different classes are separated from each other; far apart data pairs in the same class are not imposed to be close. -.. topic:: Example Code: +.. rubric:: Example Code :: @@ -292,15 +305,19 @@ same class are not imposed to be close. lfda = LFDA(k=2, dim=2) lfda.fit(X, Y) -.. topic:: References: +.. note:: + LDFA suffers from a problem called “sign indeterminacy”, which means the sign of the ``components`` and the output from transform depend on a random state. This is directly related to the calculation of eigenvectors in the algorithm. The same input ran in different times might lead to different transforms, but both valid. + + To work around this, fit instances of this class to data once, then keep the instance around to do transformations. + +.. rubric:: References + + +.. container:: hatnote hatnote-gray - .. [1] Sugiyama. `Dimensionality Reduction of Multimodal Labeled Data by Local - Fisher Discriminant Analysis `_. - JMLR 2007 + [1]. Sugiyama. `Dimensionality Reduction of Multimodal Labeled Data by Local Fisher Discriminant Analysis `_. JMLR 2007. - .. [2] Tang. `Local Fisher Discriminant Analysis on Beer Style Clustering - `_. + [2]. Tang. `Local Fisher Discriminant Analysis on Beer Style Clustering `_. .. _mlkr: @@ -326,9 +343,9 @@ empirical development. The Gaussian kernel is denoted as: where :math:`d(\cdot, \cdot)` is the squared distance under some metrics, here in the fashion of Mahalanobis, it should be :math:`d(\mathbf{x}_i, -\mathbf{x}_j) = ||\mathbf{A}(\mathbf{x}_i - \mathbf{x}_j)||`, the transition -matrix :math:`\mathbf{A}` is derived from the decomposition of Mahalanobis -matrix :math:`\mathbf{M=A^TA}`. +\mathbf{x}_j) = ||\mathbf{L}(\mathbf{x}_i - \mathbf{x}_j)||`, the transition +matrix :math:`\mathbf{L}` is derived from the decomposition of Mahalanobis +matrix :math:`\mathbf{M=L^TL}`. Since :math:`\sigma^2` can be integrated into :math:`d(\cdot)`, we can set :math:`\sigma^2=1` for the sake of simplicity. Here we use the cumulative @@ -346,7 +363,7 @@ calculating a weighted average of all the training samples: \hat{y}_i = \frac{\sum_{j\neq i}y_jk_{ij}}{\sum_{j\neq i}k_{ij}} -.. topic:: Example Code: +.. rubric:: Example Code :: @@ -360,10 +377,12 @@ calculating a weighted average of all the training samples: mlkr = MLKR() mlkr.fit(X, Y) -.. topic:: References: +.. rubric:: References - .. [1] Weinberger et al. `Metric Learning for Kernel Regression `_. AISTATS 2007 + +.. container:: hatnote hatnote-gray + + [1]. Weinberger et al. `Metric Learning for Kernel Regression `_. AISTATS 2007. .. _supervised_version: @@ -374,7 +393,12 @@ Supervised versions of weakly-supervised algorithms Each :ref:`weakly-supervised algorithm ` has a supervised version of the form `*_Supervised` where similarity tuples are randomly generated from the labels information and passed to the underlying -algorithm. +algorithm. + +.. warning:: + Supervised versions of weakly-supervised algorithms interpret label -1 + (or any negative label) as a point with unknown label. + Those points are discarded in the learning process. For pairs learners (see :ref:`learning_on_pairs`), pairs (tuple of two points from the dataset), and pair labels (`int` indicating whether the two points @@ -383,8 +407,8 @@ are similar (+1) or dissimilar (-1)), are sampled with the function (of label +1), this method will look at all the samples from the same label and sample randomly a pair among them. To sample negative pairs (of label -1), this method will look at all the samples from a different class and sample randomly -a pair among them. The method will try to build `num_constraints` positive -pairs and `num_constraints` negative pairs, but sometimes it cannot find enough +a pair among them. The method will try to build `n_constraints` positive +pairs and `n_constraints` negative pairs, but sometimes it cannot find enough of one of those, so forcing `same_length=True` will return both times the minimum of the two lenghts. @@ -395,7 +419,7 @@ quadruplets, where for each quadruplet the two first points are from the same class, and the two last points are from a different class (so indeed the two last points should be less similar than the two first points). -.. topic:: Example Code: +.. rubric:: Example Code :: @@ -406,5 +430,5 @@ last points should be less similar than the two first points). X = iris_data['data'] Y = iris_data['target'] - mmc = MMC_Supervised(num_constraints=200) + mmc = MMC_Supervised(n_constraints=200) mmc.fit(X, Y) diff --git a/doc/unsupervised.rst b/doc/unsupervised.rst index 1191e805..110b07f9 100644 --- a/doc/unsupervised.rst +++ b/doc/unsupervised.rst @@ -20,7 +20,7 @@ It can be used for ZCA whitening of the data (see the Wikipedia page of `whitening transformation `_). -.. topic:: Example Code: +.. rubric:: Example Code :: @@ -32,6 +32,9 @@ Whitening_transformation>`_). cov = Covariance().fit(iris) x = cov.transform(iris) -.. topic:: References: +.. rubric:: References - .. [1] On the Generalized Distance in Statistics, P.C.Mahalanobis, 1936 \ No newline at end of file + +.. container:: hatnote hatnote-gray + + [1]. On the Generalized Distance in Statistics, P.C.Mahalanobis, 1936. \ No newline at end of file diff --git a/doc/weakly_supervised.rst b/doc/weakly_supervised.rst index 38f08fbe..76f7c14e 100644 --- a/doc/weakly_supervised.rst +++ b/doc/weakly_supervised.rst @@ -57,13 +57,14 @@ learn: ^^^^^^^^^^^^^^^^^^ The most intuitive way to represent tuples is to provide the algorithm with a -3D array-like of tuples of shape `(n_tuples, t, n_features)`, where +3D array-like of tuples of shape `(n_tuples, tuple_size, n_features)`, where `n_tuples` is the number of tuples, `tuple_size` is the number of elements in a tuple (2 for pairs, 3 for triplets for instance), and `n_features` is the number of features of each point. -.. topic:: Example: - Here is an artificial dataset of 4 pairs of 2 points of 3 features each: +.. rubric:: Example Code + +Here is an artificial dataset of 4 pairs of 2 points of 3 features each: >>> import numpy as np >>> tuples = np.array([[[-0.12, -1.21, -0.20], @@ -94,7 +95,9 @@ would be to keep the dataset of points `X` aside, and just represent tuples as a collection of tuples of *indices* from the points in `X`. Since we loose the feature dimension there, the resulting array is 2D. -.. topic:: Example: An equivalent representation of the above pairs would be: +.. rubric:: Example Code + +An equivalent representation of the above pairs would be: >>> X = np.array([[-0.12, -1.21, -0.20], >>> [+0.05, -0.19, -0.05], @@ -134,8 +137,8 @@ are respected. >>> from metric_learn import MMC >>> mmc = MMC(random_state=42) >>> mmc.fit(tuples, y) -MMC(A0='deprecated', convergence_threshold=0.001, diagonal=False, - diagonal_c=1.0, init=None, max_iter=100, max_proj=10000, +MMC(A0='deprecated', tol=0.001, diagonal=False, + diagonal_c=1.0, init='auto', max_iter=100, max_proj=10000, preprocessor=None, random_state=42, verbose=False) Or alternatively (using a preprocessor): @@ -160,9 +163,9 @@ Also, as explained before, our metric learner has learned a distance between points. You can use this distance in two main ways: - You can either return the distance between pairs of points using the - `score_pairs` function: + `pair_distance` function: ->>> mmc.score_pairs([[[3.5, 3.6, 5.2], [5.6, 2.4, 6.7]], +>>> mmc.pair_distance([[[3.5, 3.6, 5.2], [5.6, 2.4, 6.7]], ... [[1.2, 4.2, 7.7], [2.1, 6.4, 0.9]]]) array([7.27607365, 0.88853014]) @@ -175,6 +178,18 @@ array([7.27607365, 0.88853014]) >>> metric_fun([3.5, 3.6, 5.2], [5.6, 2.4, 6.7]) 7.276073646278203 +- Alternatively, you can use `pair_score` to return the **score** between + pairs of points (the larger the score, the more similar the pair). + For Mahalanobis learners, it is equal to the opposite of the distance. + +>>> score = mmc.pair_score([[[3.5, 3.6], [5.6, 2.4]], [[1.2, 4.2], [2.1, 6.4]], [[3.3, 7.8], [10.9, 0.1]]]) +>>> score +array([-0.49627072, -3.65287282, -6.06079877]) + + This is useful because `pair_score` matches the **score** semantic of + scikit-learn's `Classification metrics + `_. + .. note:: If the metric learner that you use learns a :ref:`Mahalanobis distance @@ -187,8 +202,6 @@ array([[ 0.58603894, -5.69883982, -1.66614919], [-5.69883982, 55.41743549, 16.20219519], [-1.66614919, 16.20219519, 4.73697721]]) -.. TODO: remove the "like it is the case etc..." if it's not the case anymore - .. _sklearn_compat_ws: Prediction and scoring @@ -250,8 +263,8 @@ tuples). >>> y_pairs = np.array([1, -1]) >>> mmc = MMC(random_state=42) >>> mmc.fit(pairs, y_pairs) -MMC(A0='deprecated', convergence_threshold=0.001, diagonal=False, - diagonal_c=1.0, init=None, max_iter=100, max_proj=10000, preprocessor=None, +MMC(tol=0.001, diagonal=False, + diagonal_c=1.0, init='auto', max_iter=100, max_proj=10000, preprocessor=None, random_state=42, verbose=False) Here, we learned a metric that puts the two first points closer @@ -344,8 +357,8 @@ returns the `sklearn.metrics.roc_auc_score` (which is threshold-independent). .. note:: See :ref:`fit_ws` for more details on metric learners functions that are - not specific to learning on pairs, like `transform`, `score_pairs`, - `get_metric` and `get_mahalanobis_matrix`. + not specific to learning on pairs, like `transform`, `pair_distance`, + `pair_score`, `get_metric` and `get_mahalanobis_matrix`. Algorithms ---------- @@ -367,40 +380,40 @@ other methods, `ITML` does not rely on an eigenvalue computation or semi-definite programming. -Given a Mahalanobis distance parameterized by :math:`A`, its corresponding +Given a Mahalanobis distance parameterized by :math:`M`, its corresponding multivariate Gaussian is denoted as: .. math:: - p(\mathbf{x}; \mathbf{A}) = \frac{1}{Z}\exp(-\frac{1}{2}d_\mathbf{A} + p(\mathbf{x}; \mathbf{M}) = \frac{1}{Z}\exp(-\frac{1}{2}d_\mathbf{M} (\mathbf{x}, \mu)) - = \frac{1}{Z}\exp(-\frac{1}{2}((\mathbf{x} - \mu)^T\mathbf{A} + = \frac{1}{Z}\exp(-\frac{1}{2}((\mathbf{x} - \mu)^T\mathbf{M} (\mathbf{x} - \mu)) where :math:`Z` is the normalization constant, the inverse of Mahalanobis -matrix :math:`\mathbf{A}^{-1}` is the covariance of the Gaussian. +matrix :math:`\mathbf{M}^{-1}` is the covariance of the Gaussian. Given pairs of similar points :math:`S` and pairs of dissimilar points :math:`D`, the distance metric learning problem is to minimize the LogDet divergence, which is equivalent as minimizing :math:`\textbf{KL}(p(\mathbf{x}; -\mathbf{A}_0) || p(\mathbf{x}; \mathbf{A}))`: +\mathbf{M}_0) || p(\mathbf{x}; \mathbf{M}))`: .. math:: - \min_\mathbf{A} D_{\ell \mathrm{d}}\left(A, A_{0}\right) = - \operatorname{tr}\left(A A_{0}^{-1}\right)-\log \operatorname{det} - \left(A A_{0}^{-1}\right)-n\\ - \text{subject to } \quad d_\mathbf{A}(\mathbf{x}_i, \mathbf{x}_j) + \min_\mathbf{A} D_{\ell \mathrm{d}}\left(M, M_{0}\right) = + \operatorname{tr}\left(M M_{0}^{-1}\right)-\log \operatorname{det} + \left(M M_{0}^{-1}\right)-n\\ + \text{subject to } \quad d_\mathbf{M}(\mathbf{x}_i, \mathbf{x}_j) \leq u \qquad (\mathbf{x}_i, \mathbf{x}_j)\in S \\ - d_\mathbf{A}(\mathbf{x}_i, \mathbf{x}_j) \geq l \qquad (\mathbf{x}_i, + d_\mathbf{M}(\mathbf{x}_i, \mathbf{x}_j) \geq l \qquad (\mathbf{x}_i, \mathbf{x}_j)\in D where :math:`u` and :math:`l` is the upper and the lower bound of distance -for similar and dissimilar pairs respectively, and :math:`\mathbf{A}_0` +for similar and dissimilar pairs respectively, and :math:`\mathbf{M}_0` is the prior distance metric, set to identity matrix by default, :math:`D_{\ell \mathrm{d}}(\cdot)` is the log determinant. -.. topic:: Example Code: +.. rubric:: Example Code :: @@ -419,11 +432,14 @@ is the prior distance metric, set to identity matrix by default, itml = ITML() itml.fit(pairs, y) -.. topic:: References: +.. rubric:: References + + +.. container:: hatnote hatnote-gray - .. [1] Jason V. Davis, et al. `Information-theoretic Metric Learning `_. ICML 2007 + [1]. Jason V. Davis, et al. `Information-theoretic Metric Learning `_. ICML 2007. - .. [2] Adapted from Matlab code at http://www.cs.utexas.edu/users/pjain/itml/ + [2]. Adapted from Matlab code at http://www.cs.utexas.edu/users/pjain/itml/ . .. _sdml: @@ -458,7 +474,7 @@ the sums of the row elements of :math:`\mathbf{K}`., :math:`||\cdot||_{1, off}` is the off-diagonal L1 norm. -.. topic:: Example Code: +.. rubric:: Example Code :: @@ -476,19 +492,19 @@ is the off-diagonal L1 norm. sdml = SDML() sdml.fit(pairs, y) -.. topic:: References: +.. rubric:: References - .. [1] Qi et al. - `An efficient sparse metric learning in high-dimensional space via - L1-penalized log-determinant regularization `_. - ICML 2009. - .. [2] Adapted from https://gist.github.com/kcarnold/5439945 +.. container:: hatnote hatnote-gray + + [1]. Qi et al. `An efficient sparse metric learning in high-dimensional space via L1-penalized log-determinant regularization `_. ICML 2009. + + [2]. Code adapted from https://gist.github.com/kcarnold/5439945 . .. _rca: :py:class:`RCA ` -^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ Relative Components Analysis (:py:class:`RCA `) @@ -512,33 +528,31 @@ where chunklet :math:`j` consists of :math:`\{\mathbf{x}_{ji}\}_{i=1}^{n_j}` with a mean :math:`\hat{m}_j`. The inverse of :math:`\mathbf{C}^{-1}` is used as the Mahalanobis matrix. -.. topic:: Example Code: +.. rubric:: Example Code :: from metric_learn import RCA - pairs = [[[1.2, 7.5], [1.3, 1.5]], - [[6.4, 2.6], [6.2, 9.7]], - [[1.3, 4.5], [3.2, 4.6]], - [[6.2, 5.5], [5.4, 5.4]]] - y = [1, 1, -1, -1] - - # in this task we want points where the first feature is close to be closer - # to each other, no matter how close the second feature is + X = [[-0.05, 3.0],[0.05, -3.0], + [0.1, -3.55],[-0.1, 3.55], + [-0.95, -0.05],[0.95, 0.05], + [0.4, 0.05],[-0.4, -0.05]] + chunks = [0, 0, 1, 1, 2, 2, 3, 3] rca = RCA() - rca.fit(pairs, y) + rca.fit(X, chunks) + +.. rubric:: References + -.. topic:: References: +.. container:: hatnote hatnote-gray - .. [1] Shental et al. `Adjustment learning and relevant component analysis - `_. ECCV 2002 + [1]. Shental et al. `Adjustment learning and relevant component analysis `_. ECCV 2002. - .. [2] Bar-Hillel et al. `Learning distance functions using equivalence relations `_. ICML 2003 + [2]. Bar-Hillel et al. `Learning distance functions using equivalence relations `_. ICML 2003. - .. [3] Bar-Hillel et al. `Learning a Mahalanobis metric from equivalence constraints `_. JMLR 2005 + [3]. Bar-Hillel et al. `Learning a Mahalanobis metric from equivalence constraints `_. JMLR 2005. .. _mmc: @@ -569,7 +583,7 @@ points, while constrains the sum of distances between dissimilar points: \qquad \qquad \text{s.t.} \qquad \sum_{(\mathbf{x}_i, \mathbf{x}_j) \in D} d^2_{\mathbf{M}}(\mathbf{x}_i, \mathbf{x}_j) \geq 1 -.. topic:: Example Code: +.. rubric:: Example Code :: @@ -587,13 +601,179 @@ points, while constrains the sum of distances between dissimilar points: mmc = MMC() mmc.fit(pairs, y) -.. topic:: References: +.. rubric:: References + + +.. container:: hatnote hatnote-gray + + [1]. Xing et al. `Distance metric learning with application to clustering with side-information `_. NIPS 2002. + + [2]. Adapted from Matlab code http://www.cs.cmu.edu/%7Eepxing/papers/Old_papers/code_Metric_online.tar.gz . + +.. _learning_on_triplets: + +Learning on triplets +==================== + +Some metric learning algorithms learn on triplets of samples. In this case, +one should provide the algorithm with `n_samples` triplets of points. The +semantic of each triplet is that the first point should be closer to the +second point than to the third one. - .. [1] Xing et al. `Distance metric learning with application to clustering with - side-information `_. NIPS 2002 - .. [2] Adapted from Matlab code http://www.cs.cmu.edu/%7Eepxing/papers/Old_papers/code_Metric_online.tar.gz +Fitting +------- +Here is an example for fitting on triplets (see :ref:`fit_ws` for more +details on the input data format and how to fit, in the general case of +learning on tuples). + +>>> from metric_learn import SCML +>>> triplets = np.array([[[1.2, 3.2], [2.3, 5.5], [2.1, 0.6]], +>>> [[4.5, 2.3], [2.1, 2.3], [7.3, 3.4]]]) +>>> scml = SCML(random_state=42) +>>> scml.fit(triplets) +SCML(beta=1e-5, B=None, max_iter=100000, verbose=False, + preprocessor=None, random_state=None) + +Or alternatively (using a preprocessor): + +>>> X = np.array([[[1.2, 3.2], +>>> [2.3, 5.5], +>>> [2.1, 0.6], +>>> [4.5, 2.3], +>>> [2.1, 2.3], +>>> [7.3, 3.4]]) +>>> triplets_indices = np.array([[0, 1, 2], [3, 4, 5]]) +>>> scml = SCML(preprocessor=X, random_state=42) +>>> scml.fit(triplets_indices) +SCML(beta=1e-5, B=None, max_iter=100000, verbose=False, + preprocessor=array([[1.2, 3.2], + [2.3, 5.5], + [2.4, 6.7], + [2.1, 0.6], + [4.5, 2.3], + [2.1, 2.3], + [0.6, 1.2], + [7.3, 3.4]]), + random_state=None) + + +Here, we want to learn a metric that, for each of the two +`triplets`, will make the first point closer to the +second point than to the third one. + +.. _triplets_predicting: + +Prediction +---------- + +When a triplets learner is fitted, it is also able to predict, for an +upcoming triplet, whether the first point is closer to the second point +than to the third one (+1), or not (-1). + +>>> triplets_test = np.array( +... [[[5.6, 5.3], [2.2, 2.1], [1.2, 3.4]], +... [[6.0, 4.2], [4.3, 1.2], [0.1, 7.8]]]) +>>> scml.predict(triplets_test) +array([-1., 1.]) + +.. _triplets_scoring: + +Scoring +------- + +Triplet metric learners can also return a `decision_function` for a set of triplets, +which corresponds to the distance between the first two points minus the distance +between the first and last points of the triplet (the higher the value, the more +similar the first point to the second point compared to the last one). This "score" +can be interpreted as a measure of likeliness of having a +1 prediction for this +triplet. + +>>> scml.decision_function(triplets_test) +array([-1.75700306, 4.98982131]) + +In the above example, for the first triplet in `triplets_test`, the first +point is predicted less similar to the second point than to the last point +(they are further away in the transformed space). + +Unlike pairs learners, triplets learners do not allow to give a `y` when fitting: we +assume that the ordering of points within triplets is such that the training triplets +are all positive. Therefore, it is not possible to use scikit-learn scoring functions +(such as 'f1_score') for triplets learners. + +However, triplets learners do have a default scoring function, which will +basically return the accuracy score on a given test set, i.e. the proportion +of triplets that have the right predicted ordering. + +>>> scml.score(triplets_test) +0.5 + +.. note:: + See :ref:`fit_ws` for more details on metric learners functions that are + not specific to learning on pairs, like `transform`, `pair_distance`, + `pair_score`, `get_metric` and `get_mahalanobis_matrix`. + + + + +Algorithms +---------- + +.. _scml: + +:py:class:`SCML ` +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +Sparse Compositional Metric Learning +(:py:class:`SCML `) + +`SCML` learns a squared Mahalanobis distance from triplet constraints by +optimizing sparse positive weights assigned to a set of :math:`K` rank-one +PSD bases. This can be formulated as an optimization problem with only +:math:`K` parameters, that can be solved with an efficient stochastic +composite scheme. + +The Mahalanobis matrix :math:`M` is built from a basis set :math:`B = \{b_i\}_{i=\{1,...,K\}}` +weighted by a :math:`K` dimensional vector :math:`w = \{w_i\}_{i=\{1,...,K\}}` as: + +.. math:: + + M = \sum_{i=1}^K w_i b_i b_i^T = B \cdot diag(w) \cdot B^T \quad w_i \geq 0 + +Learning :math:`M` in this form makes it PSD by design, as it is a +nonnegative sum of PSD matrices. The basis set :math:`B` is fixed in advance +and it is possible to construct it from the data. The optimization problem +over :math:`w` is formulated as a classic margin-based hinge loss function +involving the set :math:`C` of triplets. A regularization :math:`\ell_1` +is added to yield a sparse combination. The formulation is the following: + +.. math:: + + \min_{w\geq 0} \sum_{(x_i,x_j,x_k)\in C} [1 + d_w(x_i,x_j)-d_w(x_i,x_k)]_+ + \beta||w||_1 + +where :math:`[\cdot]_+` is the hinge loss. + +.. rubric:: Example Code + +:: + + from metric_learn import SCML + + triplets = [[[1.2, 7.5], [1.3, 1.5], [6.2, 9.7]], + [[1.3, 4.5], [3.2, 4.6], [5.4, 5.4]], + [[3.2, 7.5], [3.3, 1.5], [8.2, 9.7]], + [[3.3, 4.5], [5.2, 4.6], [7.4, 5.4]]] + + scml = SCML() + scml.fit(triplets) + +.. rubric:: References + + +.. container:: hatnote hatnote-gray + + [1]. Y. Shi, A. Bellet and F. Sha. `Sparse Compositional Metric Learning. `_. (AAAI), 2014. + + [2]. Adapted from original `Matlab implementation. `_. .. _learning_on_quadruplets: @@ -602,7 +782,7 @@ Learning on quadruplets ======================= Some metric learning algorithms learn on quadruplets of samples. In this case, -one should provide the algorithm with `n_samples` quadruplets of points. Th +one should provide the algorithm with `n_samples` quadruplets of points. The semantic of each quadruplet is that the first two points should be closer together than the last two points. @@ -669,14 +849,12 @@ array([-1., 1.]) Scoring ------- -Quadruplet metric learners can also -return a `decision_function` for a set of pairs. This is basically the "score" -which sign will be taken to find the prediction for the pair, which -corresponds to the difference between the distance between the two last points, -and the distance between the two last points of the quadruplet (higher -score means the two last points are more likely to be more dissimilar than -the two first points (i.e. more likely to have a +1 prediction since it's -the right ordering)). +Quadruplet metric learners can also return a `decision_function` for a set of +quadruplets, which corresponds to the distance between the first pair of points minus +the distance between the second pair of points of the triplet (the higher the value, +the more similar the first pair is than the last pair). +This "score" can be interpreted as a measure of likeliness of having a +1 prediction +for this quadruplet. >>> lsml.decision_function(quadruplets_test) array([-1.75700306, 4.98982131]) @@ -685,17 +863,10 @@ In the above example, for the first quadruplet in `quadruplets_test`, the two first points are predicted less similar than the two last points (they are further away in the transformed space). -Unlike for pairs learners, quadruplets learners don't allow to give a `y` -when fitting, which does not allow to use scikit-learn scoring functions -like: - ->>> from sklearn.model_selection import cross_val_score ->>> cross_val_score(lsml, quadruplets, scoring='f1_score') # this won't work - -(This is actually intentional, for more details -about that, see -`this comment `_ -on github.) +Like triplet learners, quadruplets learners do not allow to give a `y` when fitting: we +assume that the ordering of points within triplets is such that the training triplets +are all positive. Therefore, it is not possible to use scikit-learn scoring functions +(such as 'f1_score') for triplets learners. However, quadruplets learners do have a default scoring function, which will basically return the accuracy score on a given test set, i.e. the proportion @@ -706,8 +877,8 @@ of quadruplets have the right predicted ordering. .. note:: See :ref:`fit_ws` for more details on metric learners functions that are - not specific to learning on pairs, like `transform`, `score_pairs`, - `get_metric` and `get_mahalanobis_matrix`. + not specific to learning on pairs, like `transform`, `pair_distance`, + `pair_score`, `get_metric` and `get_mahalanobis_matrix`. @@ -733,13 +904,13 @@ extension leads to more stable estimation when the dimension is high and only a small amount of constraints is given. The loss function of each constraint -:math:`d(\mathbf{x}_a, \mathbf{x}_b) < d(\mathbf{x}_c, \mathbf{x}_d)` is +:math:`d(\mathbf{x}_i, \mathbf{x}_j) < d(\mathbf{x}_k, \mathbf{x}_l)` is denoted as: .. math:: - H(d_\mathbf{M}(\mathbf{x}_a, \mathbf{x}_b) - - d_\mathbf{M}(\mathbf{x}_c, \mathbf{x}_d)) + H(d_\mathbf{M}(\mathbf{x}_i, \mathbf{x}_j) + - d_\mathbf{M}(\mathbf{x}_k, \mathbf{x}_l)) where :math:`H(\cdot)` is the squared Hinge loss function defined as: @@ -749,8 +920,8 @@ where :math:`H(\cdot)` is the squared Hinge loss function defined as: \,\,x^2 \qquad x>0\end{aligned}\right.\\ The summed loss function :math:`L(C)` is the simple sum over all constraints -:math:`C = \{(\mathbf{x}_a , \mathbf{x}_b , \mathbf{x}_c , \mathbf{x}_d) -: d(\mathbf{x}_a , \mathbf{x}_b) < d(\mathbf{x}_c , \mathbf{x}_d)\}`. The +:math:`C = \{(\mathbf{x}_i , \mathbf{x}_j , \mathbf{x}_k , \mathbf{x}_l) +: d(\mathbf{x}_i , \mathbf{x}_j) < d(\mathbf{x}_k , \mathbf{x}_l)\}`. The original paper suggested here should be a weighted sum since the confidence or probability of each constraint might differ. However, for the sake of simplicity and assumption of no extra knowledge provided, we just deploy @@ -762,9 +933,9 @@ knowledge: .. math:: - \min_\mathbf{M}(D_{ld}(\mathbf{M, M_0}) + \sum_{(\mathbf{x}_a, - \mathbf{x}_b, \mathbf{x}_c, \mathbf{x}_d)\in C}H(d_\mathbf{M}( - \mathbf{x}_a, \mathbf{x}_b) - d_\mathbf{M}(\mathbf{x}_c, \mathbf{x}_c))\\ + \min_\mathbf{M}(D_{ld}(\mathbf{M, M_0}) + \sum_{(\mathbf{x}_i, + \mathbf{x}_j, \mathbf{x}_k, \mathbf{x}_l)\in C}H(d_\mathbf{M}( + \mathbf{x}_i, \mathbf{x}_j) - d_\mathbf{M}(\mathbf{x}_k, \mathbf{x}_l))\\ where :math:`\mathbf{M}_0` is the prior metric matrix, set as identity by default, :math:`D_{ld}(\mathbf{\cdot, \cdot})` is the LogDet divergence: @@ -774,7 +945,7 @@ by default, :math:`D_{ld}(\mathbf{\cdot, \cdot})` is the LogDet divergence: D_{ld}(\mathbf{M, M_0}) = \text{tr}(\mathbf{MM_0}) − \text{logdet} (\mathbf{M}) -.. topic:: Example Code: +.. rubric:: Example Code :: @@ -791,12 +962,13 @@ by default, :math:`D_{ld}(\mathbf{\cdot, \cdot})` is the LogDet divergence: lsml = LSML() lsml.fit(quadruplets) -.. topic:: References: +.. rubric:: References + + +.. container:: hatnote hatnote-gray - .. [1] Liu et al. - `Metric Learning from Relative Comparisons by Minimizing Squared - Residual `_. ICDM 2012 + [1]. Liu et al. `Metric Learning from Relative Comparisons by Minimizing Squared Residual `_. ICDM 2012. - .. [2] Adapted from https://gist.github.com/kcarnold/5439917 + [2]. Code adapted from https://gist.github.com/kcarnold/5439917 . diff --git a/examples/plot_metric_learning_examples.py b/examples/plot_metric_learning_examples.py index 0d602cbb..32759636 100644 --- a/examples/plot_metric_learning_examples.py +++ b/examples/plot_metric_learning_examples.py @@ -15,7 +15,11 @@ ###################################################################### # Imports # ^^^^^^^ +# .. note:: # +# In order to show the charts of the examples you need a graphical +# ``matplotlib`` backend installed. For intance, use ``pip install pyqt5`` +# to get Qt graphical interface or use your favorite one. from sklearn.manifold import TSNE @@ -35,9 +39,9 @@ # We will be using a synthetic dataset to illustrate the plotting, # using the function `sklearn.datasets.make_classification` from # scikit-learn. The dataset will contain: -# - 100 points in 3 classes with 2 clusters per class -# - 5 features, among which 3 are informative (correlated with the class -# labels) and two are random noise with large magnitude +# - 100 points in 3 classes with 2 clusters per class +# - 5 features, among which 3 are informative (correlated with the class +# labels) and two are random noise with large magnitude X, y = make_classification(n_samples=100, n_classes=3, n_clusters_per_class=2, n_informative=3, class_sep=4., n_features=5, @@ -88,7 +92,7 @@ def plot_tsne(X, y, colormap=plt.cm.Paired): # distances between points for the task at hand. Especially in higher # dimensions when Euclidean distances are a poor way to measure distance, this # becomes very useful. -# +# # Basically, we learn this distance: # :math:`D(x, x') = \sqrt{(x-x')^\top M(x-x')}`. And we learn the parameters # :math:`M` of this distance to satisfy certain constraints on the distance @@ -113,12 +117,12 @@ def plot_tsne(X, y, colormap=plt.cm.Paired): ###################################################################### # Large Margin Nearest Neighbour # ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ -# +# # LMNN is a metric learning algorithm primarily designed for k-nearest # neighbor classification. The algorithm is based on semidefinite # programming, a sub-class of convex programming (as most Metric Learning # algorithms are). -# +# # The main intuition behind LMNN is to learn a pseudometric under which # all data instances in the training set are surrounded by at least k # instances that share the same class label. If this is achieved, the @@ -136,10 +140,10 @@ def plot_tsne(X, y, colormap=plt.cm.Paired): ###################################################################### # Fit and then transform! # ----------------------- -# +# # setting up LMNN -lmnn = metric_learn.LMNN(k=5, learn_rate=1e-6) +lmnn = metric_learn.LMNN(n_neighbors=5, learn_rate=1e-6) # fit the data! lmnn.fit(X, y) @@ -162,7 +166,7 @@ def plot_tsne(X, y, colormap=plt.cm.Paired): ###################################################################### # Pretty neat, huh? -# +# # The rest of this notebook will briefly explain the other Metric Learning # algorithms before plotting them. Also, while we have first run ``fit`` # and then ``transform`` to see our data transformed, we can also use @@ -172,10 +176,10 @@ def plot_tsne(X, y, colormap=plt.cm.Paired): ###################################################################### # Information Theoretic Metric Learning # ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ -# +# # ITML uses a regularizer that automatically enforces a Semi-Definite # Positive Matrix condition - the LogDet divergence. It uses soft -# must-link or cannot like constraints, and a simple algorithm based on +# must-link or cannot-link constraints, and a simple algorithm based on # Bregman projections. Unlike LMNN, ITML will implicitly enforce points from # the same class to belong to the same cluster, as you can see below. # @@ -231,7 +235,7 @@ def plot_tsne(X, y, colormap=plt.cm.Paired): ###################################################################### # Least Squares Metric Learning # ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ -# +# # LSML is a simple, yet effective, algorithm that learns a Mahalanobis # metric from a given set of relative comparisons. This is done by # formulating and minimizing a convex loss function that corresponds to @@ -277,7 +281,7 @@ def plot_tsne(X, y, colormap=plt.cm.Paired): ###################################################################### # Local Fisher Discriminant Analysis # ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ -# +# # LFDA is a linear supervised dimensionality reduction method. It is # particularly useful when dealing with multimodality, where one ore more # classes consist of separate clusters in input space. The core @@ -289,7 +293,7 @@ def plot_tsne(X, y, colormap=plt.cm.Paired): # - See more in the documentation of the class :py:class:`LFDA # ` -lfda = metric_learn.LFDA(k=2, num_dims=2) +lfda = metric_learn.LFDA(k=2, n_components=2) X_lfda = lfda.fit_transform(X, y) plot_tsne(X_lfda, y) @@ -298,7 +302,7 @@ def plot_tsne(X, y, colormap=plt.cm.Paired): ###################################################################### # Relative Components Analysis # ^^^^^^^^^^^^^^^^^^^^^^^^^^^^ -# +# # RCA is another one of the older algorithms. It learns a full rank # Mahalanobis distance metric based on a weighted sum of in-class # covariance matrices. It applies a global linear transformation to assign @@ -310,7 +314,7 @@ def plot_tsne(X, y, colormap=plt.cm.Paired): # - See more in the documentation of the class :py:class:`RCA # ` -rca = metric_learn.RCA_Supervised(num_chunks=30, chunk_size=2) +rca = metric_learn.RCA_Supervised(n_chunks=30, chunk_size=2) X_rca = rca.fit_transform(X, y) plot_tsne(X_rca, y) @@ -402,7 +406,7 @@ def plot_tsne(X, y, colormap=plt.cm.Paired): def create_constraints(labels): import itertools import random - + # aggregate indices of same class zeros = np.where(y == 0)[0] ones = np.where(y == 1)[0] @@ -413,7 +417,7 @@ def create_constraints(labels): twos_ = list(itertools.combinations(twos, 2)) # put them together! sim = np.array(zeros_ + ones_ + twos_) - + # similarily, put together indices in different classes dis = [] for zero in zeros: @@ -424,21 +428,25 @@ def create_constraints(labels): for one in ones: for two in twos: dis.append((one, two)) - + # pick up just enough dissimilar examples as we have similar examples dis = np.array(random.sample(dis, len(sim))) - - # return an array of pairs of indices of shape=(2*len(sim), 2), and the corresponding labels, array of shape=(2*len(sim)) - # Each pair of similar points have a label of +1 and each pair of dissimilar points have a label of -1 - return (np.vstack([np.column_stack([sim[:, 0], sim[:, 1]]), np.column_stack([dis[:, 0], dis[:, 1]])]), + + # return an array of pairs of indices of shape=(2*len(sim), 2), and the + # corresponding labels, array of shape=(2*len(sim)) + # Each pair of similar points have a label of +1 and each pair of + # dissimilar points have a label of -1 + return (np.vstack([np.column_stack([sim[:, 0], sim[:, 1]]), + np.column_stack([dis[:, 0], dis[:, 1]])]), np.concatenate([np.ones(len(sim)), -np.ones(len(sim))])) + pairs, pairs_labels = create_constraints(y) ###################################################################### # Now that we've created our constraints, let's see what it looks like! -# +# print(pairs) print(pairs_labels) diff --git a/examples/plot_sandwich.py b/examples/plot_sandwich.py index 84e53d07..740852be 100644 --- a/examples/plot_sandwich.py +++ b/examples/plot_sandwich.py @@ -6,12 +6,20 @@ Sandwich demo based on code from http://nbviewer.ipython.org/6576096 """ +###################################################################### +# .. note:: +# +# In order to show the charts of the examples you need a graphical +# ``matplotlib`` backend installed. For intance, use ``pip install pyqt5`` +# to get Qt graphical interface or use your favorite one. + import numpy as np from matplotlib import pyplot as plt from sklearn.metrics import pairwise_distances from sklearn.neighbors import NearestNeighbors -from metric_learn import LMNN, ITML_Supervised, LSML_Supervised, SDML_Supervised +from metric_learn import (LMNN, ITML_Supervised, LSML_Supervised, + SDML_Supervised) def sandwich_demo(): @@ -27,9 +35,9 @@ def sandwich_demo(): mls = [ LMNN(), - ITML_Supervised(num_constraints=200), - SDML_Supervised(num_constraints=200, balance_param=0.001), - LSML_Supervised(num_constraints=200), + ITML_Supervised(n_constraints=200), + SDML_Supervised(n_constraints=200, balance_param=0.001), + LSML_Supervised(n_constraints=200), ] for ax_num, ml in enumerate(mls, start=3): @@ -47,10 +55,10 @@ def sandwich_demo(): # TODO: use this somewhere def visualize_class_separation(X, labels): - _, (ax1,ax2) = plt.subplots(ncols=2) + _, (ax1, ax2) = plt.subplots(ncols=2) label_order = np.argsort(labels) ax1.imshow(pairwise_distances(X[label_order]), interpolation='nearest') - ax2.imshow(pairwise_distances(labels[label_order,None]), + ax2.imshow(pairwise_distances(labels[label_order, None]), interpolation='nearest') @@ -77,19 +85,19 @@ def sandwich_data(): for k, xc in enumerate(x_centers): data[i, k, 0] = np.random.normal(xc, 0.1) data[i, k, 1] = np.random.normal(yc, 0.1) - labels[i,:] = i + labels[i, :] = i return data.reshape((-1, 2)), labels.ravel() def plot_sandwich_data(x, y, axis=plt, colors='rbgmky'): for idx, val in enumerate(np.unique(y)): - xi = x[y==val] + xi = x[y == val] axis.scatter(*xi.T, s=50, facecolors='none', edgecolors=colors[idx]) def plot_neighborhood_graph(x, nn, y, axis=plt, colors='rbgmky'): for i, a in enumerate(x): - b = x[nn[i,1]] + b = x[nn[i, 1]] axis.plot((a[0], b[0]), (a[1], b[1]), colors[y[i]]) diff --git a/metric_learn/__init__.py b/metric_learn/__init__.py index b2b84559..92823fb1 100644 --- a/metric_learn/__init__.py +++ b/metric_learn/__init__.py @@ -1,5 +1,3 @@ -from __future__ import absolute_import - from .constraints import Constraints from .covariance import Covariance from .itml import ITML, ITML_Supervised @@ -11,5 +9,12 @@ from .rca import RCA, RCA_Supervised from .mlkr import MLKR from .mmc import MMC, MMC_Supervised +from .scml import SCML, SCML_Supervised from ._version import __version__ + +__all__ = ['Constraints', 'Covariance', 'ITML', 'ITML_Supervised', + 'LMNN', 'LSML', 'LSML_Supervised', 'SDML', + 'SDML_Supervised', 'NCA', 'LFDA', 'RCA', 'RCA_Supervised', + 'MLKR', 'MMC', 'MMC_Supervised', 'SCML', + 'SCML_Supervised', '__version__'] diff --git a/metric_learn/_util.py b/metric_learn/_util.py index b476e70b..868ececa 100644 --- a/metric_learn/_util.py +++ b/metric_learn/_util.py @@ -1,6 +1,4 @@ import numpy as np -import scipy -import six from numpy.linalg import LinAlgError from sklearn.datasets import make_spd_matrix from sklearn.decomposition import PCA @@ -8,9 +6,10 @@ from sklearn.utils.validation import check_X_y, check_random_state from .exceptions import PreprocessorError, NonPSDError from sklearn.discriminant_analysis import LinearDiscriminantAnalysis -from scipy.linalg import pinvh +from scipy.linalg import pinvh, eigh import sys import time +import warnings # hack around lack of axis kwarg in older numpy versions try: @@ -283,7 +282,7 @@ def make_name(estimator): if a string is given """ if estimator is not None: - if isinstance(estimator, six.string_types): + if isinstance(estimator, str): estimator_name = estimator else: estimator_name = estimator.__class__.__name__ @@ -448,45 +447,45 @@ def _initialize_components(n_components, input, y=None, init='auto', The input labels (or not if there are no labels). init : string or numpy array, optional (default='auto') - Initialization of the linear transformation. Possible options are - 'auto', 'pca', 'lda', 'identity', 'random', and a numpy array of shape - (n_features_a, n_features_b). - - 'auto' - Depending on ``n_components``, the most reasonable initialization - will be chosen. If ``n_components <= n_classes`` we use 'lda' (see - the description of 'lda' init), as it uses labels information. If - not, but ``n_components < min(n_features, n_samples)``, we use 'pca', - as it projects data onto meaningful directions (those of higher - variance). Otherwise, we just use 'identity'. - - 'pca' - ``n_components`` principal components of the inputs passed - to :meth:`fit` will be used to initialize the transformation. - (See `sklearn.decomposition.PCA`) - - 'lda' - ``min(n_components, n_classes)`` most discriminative - components of the inputs passed to :meth:`fit` will be used to - initialize the transformation. (If ``n_components > n_classes``, - the rest of the components will be zero.) (See - `sklearn.discriminant_analysis.LinearDiscriminantAnalysis`). - This initialization is possible only if `has_classes == True`. - - 'identity' - The identity matrix. If ``n_components`` is strictly smaller than the - dimensionality of the inputs passed to :meth:`fit`, the identity - matrix will be truncated to the first ``n_components`` rows. - - 'random' - The initial transformation will be a random array of shape - `(n_components, n_features)`. Each value is sampled from the - standard normal distribution. - - numpy array - n_features_b must match the dimensionality of the inputs passed to - :meth:`fit` and n_features_a must be less than or equal to that. - If ``n_components`` is not None, n_features_a must match it. + Initialization of the linear transformation. Possible options are + 'auto', 'pca', 'lda', 'identity', 'random', and a numpy array of shape + (n_features_a, n_features_b). + + 'auto' + Depending on ``n_components``, the most reasonable initialization + will be chosen. If ``n_components <= n_classes`` we use 'lda' (see + the description of 'lda' init), as it uses labels information. If + not, but ``n_components < min(n_features, n_samples)``, we use 'pca', + as it projects data onto meaningful directions (those of higher + variance). Otherwise, we just use 'identity'. + + 'pca' + ``n_components`` principal components of the inputs passed + to :meth:`fit` will be used to initialize the transformation. + (See `sklearn.decomposition.PCA`) + + 'lda' + ``min(n_components, n_classes)`` most discriminative + components of the inputs passed to :meth:`fit` will be used to + initialize the transformation. (If ``n_components > n_classes``, + the rest of the components will be zero.) (See + `sklearn.discriminant_analysis.LinearDiscriminantAnalysis`). + This initialization is possible only if `has_classes == True`. + + 'identity' + The identity matrix. If ``n_components`` is strictly smaller than the + dimensionality of the inputs passed to :meth:`fit`, the identity + matrix will be truncated to the first ``n_components`` rows. + + 'random' + The initial transformation will be a random array of shape + `(n_components, n_features)`. Each value is sampled from the + standard normal distribution. + + numpy array + n_features_b must match the dimensionality of the inputs passed to + :meth:`fit` and n_features_a must be less than or equal to that. + If ``n_components`` is not None, n_features_a must match it. verbose : bool Whether to print the details of the initialization or not. @@ -606,26 +605,26 @@ def _initialize_metric_mahalanobis(input, init='identity', random_state=None, The input samples (can be tuples or regular samples). init : string or numpy array, optional (default='identity') - Specification for the matrix to initialize. Possible options are - 'identity', 'covariance', 'random', and a numpy array of shape - (n_features, n_features). + Specification for the matrix to initialize. Possible options are + 'identity', 'covariance', 'random', and a numpy array of shape + (n_features, n_features). - 'identity' - An identity matrix of shape (n_features, n_features). + 'identity' + An identity matrix of shape (n_features, n_features). - 'covariance' - The (pseudo-)inverse covariance matrix (raises an error if the - covariance matrix is not definite and `strict_pd == True`) + 'covariance' + The (pseudo-)inverse covariance matrix (raises an error if the + covariance matrix is not definite and `strict_pd == True`) - 'random' - A random positive definite (PD) matrix of shape - `(n_features, n_features)`, generated using - `sklearn.datasets.make_spd_matrix`. + 'random' + A random positive definite (PD) matrix of shape + `(n_features, n_features)`, generated using + `sklearn.datasets.make_spd_matrix`. - numpy array - A PSD matrix (or strictly PD if strict_pd==True) of - shape (n_features, n_features), that will be used as such to - initialize the metric, or set the prior. + numpy array + A PSD matrix (or strictly PD if strict_pd==True) of + shape (n_features, n_features), that will be used as such to + initialize the metric, or set the prior. random_state : int or `numpy.RandomState` or None, optional (default=None) A pseudo random number generator object or a seed for it if int. If @@ -678,17 +677,20 @@ def _initialize_metric_mahalanobis(input, init='identity', random_state=None, random_state = check_random_state(random_state) M = init - if isinstance(init, np.ndarray): - s, u = scipy.linalg.eigh(init) - init_is_definite = _check_sdp_from_eigen(s) + if isinstance(M, np.ndarray): + w, V = eigh(M, check_finite=False) + init_is_definite = _check_sdp_from_eigen(w) if strict_pd and not init_is_definite: raise LinAlgError("You should provide a strictly positive definite " "matrix as `{}`. This one is not definite. Try another" " {}, or an algorithm that does not " "require the {} to be strictly positive definite." .format(*((matrix_name,) * 3))) + elif return_inverse and not init_is_definite: + warnings.warn('The initialization matrix is not invertible: ' + 'using the pseudo-inverse instead.') if return_inverse: - M_inv = np.dot(u / s, u.T) + M_inv = _pseudo_inverse_from_eig(w, V) return M, M_inv else: return M @@ -702,20 +704,28 @@ def _initialize_metric_mahalanobis(input, init='identity', random_state=None, elif init == 'covariance': if input.ndim == 3: # if the input are tuples, we need to form an X by deduplication - X = np.vstack({tuple(row) for row in input.reshape(-1, n_features)}) + X = np.unique(np.vstack(input), axis=0) else: X = input # atleast2d is necessary to deal with scalar covariance matrices M_inv = np.atleast_2d(np.cov(X, rowvar=False)) - s, u = scipy.linalg.eigh(M_inv) - cov_is_definite = _check_sdp_from_eigen(s) + w, V = eigh(M_inv, check_finite=False) + cov_is_definite = _check_sdp_from_eigen(w) if strict_pd and not cov_is_definite: raise LinAlgError("Unable to get a true inverse of the covariance " "matrix since it is not definite. Try another " "`{}`, or an algorithm that does not " "require the `{}` to be strictly positive definite." .format(*((matrix_name,) * 2))) - M = np.dot(u / s, u.T) + elif not cov_is_definite: + warnings.warn('The covariance matrix is not invertible: ' + 'using the pseudo-inverse instead.' + 'To make the covariance matrix invertible' + ' you can remove any linearly dependent features and/or ' + 'reduce the dimensionality of your input, ' + 'for instance using `sklearn.decomposition.PCA` as a ' + 'preprocessing step.') + M = _pseudo_inverse_from_eig(w, V) if return_inverse: return M, M_inv else: @@ -742,3 +752,36 @@ def _check_n_components(n_features, n_components): if 0 < n_components <= n_features: return n_components raise ValueError('Invalid n_components, must be in [1, %d]' % n_features) + + +def _pseudo_inverse_from_eig(w, V, tol=None): + """Compute the (Moore-Penrose) pseudo-inverse of the EVD of a symetric + matrix. + + Parameters + ---------- + w : (..., M) ndarray + The eigenvalues in ascending order, each repeated according to + its multiplicity. + + v : {(..., M, M) ndarray, (..., M, M) matrix} + The column ``v[:, i]`` is the normalized eigenvector corresponding + to the eigenvalue ``w[i]``. Will return a matrix object if `a` is + a matrix object. + + tol : positive `float`, optional + Absolute eigenvalues below tol are considered zero. + + Returns + ------- + output : (..., M, N) array_like + The pseudo-inverse given by the EVD. + """ + if tol is None: + tol = np.amax(w) * np.max(w.shape) * np.finfo(w.dtype).eps + # discard small eigenvalues and invert the rest + large = np.abs(w) > tol + w = np.divide(1, w, where=large, out=w) + w[~large] = 0 + + return np.dot(V * w, np.conjugate(V).T) diff --git a/metric_learn/_version.py b/metric_learn/_version.py index 2b8877c5..a71c5c7f 100644 --- a/metric_learn/_version.py +++ b/metric_learn/_version.py @@ -1 +1 @@ -__version__ = '0.5.0' +__version__ = '0.7.0' diff --git a/metric_learn/base_metric.py b/metric_learn/base_metric.py index 570172a9..47efe4b7 100644 --- a/metric_learn/base_metric.py +++ b/metric_learn/base_metric.py @@ -2,18 +2,17 @@ Base module. """ -from sklearn.base import BaseEstimator +from sklearn.base import BaseEstimator, ClassifierMixin from sklearn.utils.extmath import stable_cumsum from sklearn.utils.validation import _is_arraylike, check_is_fitted from sklearn.metrics import roc_auc_score, roc_curve, precision_recall_curve import numpy as np from abc import ABCMeta, abstractmethod -import six from ._util import ArrayIndexer, check_input, validate_vector import warnings -class BaseMetricLearner(six.with_metaclass(ABCMeta, BaseEstimator)): +class BaseMetricLearner(BaseEstimator, metaclass=ABCMeta): """ Base class for all metric-learners. @@ -29,26 +28,98 @@ def __init__(self, preprocessor=None): @abstractmethod def score_pairs(self, pairs): - """Returns the score between pairs + """ + Returns the score between pairs (can be a similarity, or a distance/metric depending on the algorithm) + .. deprecated:: 0.7.0 + Refer to `pair_distance` and `pair_score`. + + .. warning:: + This method will be removed in 0.8.0. Please refer to `pair_distance` + or `pair_score`. This change will occur in order to add learners + that don't necessarily learn a Mahalanobis distance. + + Parameters + ---------- + pairs : array-like, shape=(n_pairs, 2, n_features) or (n_pairs, 2) + 3D Array of pairs to score, with each row corresponding to two points, + for 2D array of indices of pairs if the metric learner uses a + preprocessor. + + Returns + ------- + scores : `numpy.ndarray` of shape=(n_pairs,) + The score of every pair. + + See Also + -------- + get_metric : a method that returns a function to compute the metric between + two points. The difference between `score_pairs` is that it works on two + 1D arrays and cannot use a preprocessor. Besides, the returned function + is independent of the metric learner and hence is not modified if the + metric learner is. + """ + + @abstractmethod + def pair_score(self, pairs): + """ + .. versionadded:: 0.7.0 Compute the similarity score between pairs + + Returns the similarity score between pairs of points (the larger the score, + the more similar the pair). For metric learners that learn a distance, + the score is simply the opposite of the distance between pairs. All + learners have access to this method. + Parameters ---------- - pairs : `numpy.ndarray`, shape=(n_samples, 2, n_features) - 3D array of pairs. + pairs : array-like, shape=(n_pairs, 2, n_features) or (n_pairs, 2) + 3D Array of pairs to score, with each row corresponding to two points, + for 2D array of indices of pairs if the metric learner uses a + preprocessor. Returns ------- - scores: `numpy.ndarray` of shape=(n_pairs,) + scores : `numpy.ndarray` of shape=(n_pairs,) The score of every pair. See Also -------- get_metric : a method that returns a function to compute the metric between - two points. The difference with `score_pairs` is that it works on two 1D - arrays and cannot use a preprocessor. Besides, the returned function is - independent of the metric learner and hence is not modified if the metric - learner is. + two points. The difference with `pair_score` is that it works on two + 1D arrays and cannot use a preprocessor. Besides, the returned function + is independent of the metric learner and hence is not modified if the + metric learner is. + """ + + @abstractmethod + def pair_distance(self, pairs): + """ + .. versionadded:: 0.7.0 Compute the distance between pairs + + Returns the (pseudo) distance between pairs, when available. For metric + learners that do not learn a (pseudo) distance, an error is thrown + instead. + + Parameters + ---------- + pairs : array-like, shape=(n_pairs, 2, n_features) or (n_pairs, 2) + 3D Array of pairs for which to compute the distance, with each + row corresponding to two points, for 2D array of indices of pairs + if the metric learner uses a preprocessor. + + Returns + ------- + scores : `numpy.ndarray` of shape=(n_pairs,) + The distance between every pair. + + See Also + -------- + get_metric : a method that returns a function to compute the metric between + two points. The difference with `pair_distance` is that it works on two + 1D arrays and cannot use a preprocessor. Besides, the returned function + is independent of the metric learner and hence is not modified if the + metric learner is. """ def _check_preprocessor(self): @@ -69,19 +140,19 @@ def _prepare_inputs(self, X, y=None, type_of_inputs='classic', Parameters ---------- - input: array-like + X : array-like The input data array to check. y : array-like The input labels array to check. - type_of_inputs: `str` {'classic', 'tuples'} + type_of_inputs : `str` {'classic', 'tuples'} The type of inputs to check. If 'classic', the input should be a 2D array-like of points or a 1D array like of indicators of points. If 'tuples', the input should be a 3D array-like of tuples or a 2D array-like of indicators of tuples. - **kwargs: dict + **kwargs : dict Arguments to pass to check_input. Returns @@ -89,21 +160,29 @@ def _prepare_inputs(self, X, y=None, type_of_inputs='classic', X : `numpy.ndarray` The checked input data array. - y: `numpy.ndarray` (optional) + y : `numpy.ndarray` (optional) The checked input labels array. """ self._check_preprocessor() - return check_input(X, y, + + check_is_fitted(self, ['preprocessor_']) + outs = check_input(X, y, type_of_inputs=type_of_inputs, preprocessor=self.preprocessor_, estimator=self, tuple_size=getattr(self, '_tuple_size', None), **kwargs) + # Conform to SLEP010 + if not hasattr(self, 'n_features_in_'): + self.n_features_in_ = (outs if y is None else outs[0]).shape[1] + return outs @abstractmethod def get_metric(self): - """Returns a function that takes as input two 1D arrays and outputs the - learned metric score on these two points. + """Returns a function that takes as input two 1D arrays and outputs + the value of the learned metric on these two points. Depending on the + algorithm, it can return a distance or a similarity function between + pairs. This function will be independent from the metric learner that learned it (it will not be modified if the initial metric learner is modified), @@ -136,15 +215,25 @@ def get_metric(self): See Also -------- - score_pairs : a method that returns the metric score between several pairs - of points. Unlike `get_metric`, this is a method of the metric learner - and therefore can change if the metric learner changes. Besides, it can - use the metric learner's preprocessor, and works on concatenated arrays. + pair_distance : a method that returns the distance between several + pairs of points. Unlike `get_metric`, this is a method of the metric + learner and therefore can change if the metric learner changes. Besides, + it can use the metric learner's preprocessor, and works on concatenated + arrays. + + pair_score : a method that returns the similarity score between + several pairs of points. Unlike `get_metric`, this is a method of the + metric learner and therefore can change if the metric learner changes. + Besides, it can use the metric learner's preprocessor, and works on + concatenated arrays. """ -class MetricTransformer(six.with_metaclass(ABCMeta)): - +class MetricTransformer(metaclass=ABCMeta): + """ + Base class for all learners that can transform data into a new space + with the metric learned. + """ @abstractmethod def transform(self, X): """Applies the metric transformation. @@ -152,18 +241,18 @@ def transform(self, X): Parameters ---------- X : (n x d) matrix - Data to transform. + Data to transform. Returns ------- transformed : (n x d) matrix - Input data transformed to the metric space by :math:`XL^{\\top}` + Input data transformed to the metric space by :math:`XL^{\\top}` """ -class MahalanobisMixin(six.with_metaclass(ABCMeta, BaseMetricLearner, - MetricTransformer)): - """Mahalanobis metric learning algorithms. +class MahalanobisMixin(BaseMetricLearner, MetricTransformer, + metaclass=ABCMeta): + r"""Mahalanobis metric learning algorithms. Algorithm that learns a Mahalanobis (pseudo) distance :math:`d_M(x, x')`, defined between two column vectors :math:`x` and :math:`x'` by: :math:`d_M(x, @@ -178,20 +267,29 @@ class MahalanobisMixin(six.with_metaclass(ABCMeta, BaseMetricLearner, Attributes ---------- components_ : `numpy.ndarray`, shape=(n_components, n_features) - The learned linear transformation ``L``. + The learned linear transformation ``L``. """ def score_pairs(self, pairs): - """Returns the learned Mahalanobis distance between pairs. + r""" + Returns the learned Mahalanobis distance between pairs. - This distance is defined as: :math:`d_M(x, x') = \sqrt{(x-x')^T M (x-x')}` + This distance is defined as: :math:`d_M(x, x') = \\sqrt{(x-x')^T M (x-x')}` where ``M`` is the learned Mahalanobis matrix, for every pair of points ``x`` and ``x'``. This corresponds to the euclidean distance between embeddings of the points in a new space, obtained through a linear - transformation. Indeed, we have also: :math:`d_M(x, x') = \sqrt{(x_e - + transformation. Indeed, we have also: :math:`d_M(x, x') = \\sqrt{(x_e - x_e')^T (x_e- x_e')}`, with :math:`x_e = L x` (See :class:`MahalanobisMixin`). + .. deprecated:: 0.7.0 + Please use `pair_distance` instead. + + .. warning:: + This method will be removed in 0.8.0. Please refer to `pair_distance` + or `pair_score`. This change will occur in order to add learners + that don't necessarily learn a Mahalanobis distance. + Parameters ---------- pairs : array-like, shape=(n_pairs, 2, n_features) or (n_pairs, 2) @@ -201,20 +299,91 @@ def score_pairs(self, pairs): Returns ------- - scores: `numpy.ndarray` of shape=(n_pairs,) + scores : `numpy.ndarray` of shape=(n_pairs,) The learned Mahalanobis distance for every pair. See Also -------- get_metric : a method that returns a function to compute the metric between - two points. The difference with `score_pairs` is that it works on two 1D - arrays and cannot use a preprocessor. Besides, the returned function is - independent of the metric learner and hence is not modified if the metric - learner is. + two points. The difference with `score_pairs` is that it works on two + 1D arrays and cannot use a preprocessor. Besides, the returned function + is independent of the metric learner and hence is not modified if the + metric learner is. :ref:`mahalanobis_distances` : The section of the project documentation that describes Mahalanobis Distances. """ + dpr_msg = ("score_pairs will be deprecated in release 0.7.0. " + "Use pair_score to compute similarity scores, or " + "pair_distances to compute distances.") + warnings.warn(dpr_msg, category=FutureWarning) + return self.pair_distance(pairs) + + def pair_score(self, pairs): + """ + Returns the opposite of the learned Mahalanobis distance between pairs. + + Parameters + ---------- + pairs : array-like, shape=(n_pairs, 2, n_features) or (n_pairs, 2) + 3D Array of pairs to score, with each row corresponding to two points, + for 2D array of indices of pairs if the metric learner uses a + preprocessor. + + Returns + ------- + scores : `numpy.ndarray` of shape=(n_pairs,) + The opposite of the learned Mahalanobis distance for every pair. + + See Also + -------- + get_metric : a method that returns a function to compute the metric between + two points. The difference with `pair_score` is that it works on two + 1D arrays and cannot use a preprocessor. Besides, the returned function + is independent of the metric learner and hence is not modified if the + metric learner is. + + :ref:`mahalanobis_distances` : The section of the project documentation + that describes Mahalanobis Distances. + """ + return -1 * self.pair_distance(pairs) + + def pair_distance(self, pairs): + """ + Returns the learned Mahalanobis distance between pairs. + + This distance is defined as: :math:`d_M(x, x') = \\sqrt{(x-x')^T M (x-x')}` + where ``M`` is the learned Mahalanobis matrix, for every pair of points + ``x`` and ``x'``. This corresponds to the euclidean distance between + embeddings of the points in a new space, obtained through a linear + transformation. Indeed, we have also: :math:`d_M(x, x') = \\sqrt{(x_e - + x_e')^T (x_e- x_e')}`, with :math:`x_e = L x` (See + :class:`MahalanobisMixin`). + + Parameters + ---------- + pairs : array-like, shape=(n_pairs, 2, n_features) or (n_pairs, 2) + 3D Array of pairs to score, with each row corresponding to two points, + for 2D array of indices of pairs if the metric learner uses a + preprocessor. + + Returns + ------- + scores : `numpy.ndarray` of shape=(n_pairs,) + The learned Mahalanobis distance for every pair. + + See Also + -------- + get_metric : a method that returns a function to compute the metric between + two points. The difference with `pair_distance` is that it works on two + 1D arrays and cannot use a preprocessor. Besides, the returned function + is independent of the metric learner and hence is not modified if the + metric learner is. + + :ref:`mahalanobis_distances` : The section of the project documentation + that describes Mahalanobis Distances. + """ + check_is_fitted(self, ['preprocessor_']) pairs = check_input(pairs, type_of_inputs='tuples', preprocessor=self.preprocessor_, estimator=self, tuple_size=2) @@ -240,12 +409,14 @@ def transform(self, X): X_embedded : `numpy.ndarray`, shape=(n_samples, n_components) The embedded data points. """ + check_is_fitted(self, ['preprocessor_', 'components_']) X_checked = check_input(X, type_of_inputs='classic', estimator=self, - preprocessor=self.preprocessor_, - accept_sparse=True) + preprocessor=self.preprocessor_, + accept_sparse=True) return X_checked.dot(self.components_.T) def get_metric(self): + check_is_fitted(self, 'components_') components_T = self.components_.T.copy() def metric_fun(u, v, squared=False): @@ -266,7 +437,7 @@ def metric_fun(u, v, squared=False): Returns ------- - distance: float + distance : float The distance between u and v according to the new metric. """ u = validate_vector(u) @@ -281,15 +452,6 @@ def metric_fun(u, v, squared=False): get_metric.__doc__ = BaseMetricLearner.get_metric.__doc__ - def metric(self): - """Deprecated. Will be removed in v0.6.0. Use `get_mahalanobis_matrix` - instead""" - # TODO: remove this method in version 0.6.0 - warnings.warn(("`metric` is deprecated since version 0.5.0 and will be " - "removed in 0.6.0. Use `get_mahalanobis_matrix` instead."), - DeprecationWarning) - return self.get_mahalanobis_matrix() - def get_mahalanobis_matrix(self): """Returns a copy of the Mahalanobis matrix learned by the metric learner. @@ -298,20 +460,22 @@ def get_mahalanobis_matrix(self): M : `numpy.ndarray`, shape=(n_features, n_features) The copy of the learned Mahalanobis matrix. """ + check_is_fitted(self, 'components_') return self.components_.T.dot(self.components_) -class _PairsClassifierMixin(BaseMetricLearner): +class _PairsClassifierMixin(BaseMetricLearner, ClassifierMixin): """Base class for pairs learners. Attributes ---------- threshold_ : `float` - If the distance metric between two points is lower than this threshold, - points will be classified as similar, otherwise they will be - classified as dissimilar. + If the distance metric between two points is lower than this threshold, + points will be classified as similar, otherwise they will be + classified as dissimilar. """ + classes_ = np.array([0, 1]) _tuple_size = 2 # number of points in a tuple, 2 for pairs def predict(self, pairs): @@ -333,7 +497,12 @@ def predict(self, pairs): y_predicted : `numpy.ndarray` of floats, shape=(n_constraints,) The predicted learned metric value between samples in every pair. """ - check_is_fitted(self, ['threshold_', 'components_']) + check_is_fitted(self, 'preprocessor_') + + if "threshold_" not in vars(self): + msg = ("A threshold for this estimator has not been set, " + "call its set_threshold or calibrate_threshold method.") + raise AttributeError(msg) return 2 * (- self.decision_function(pairs) <= self.threshold_) - 1 def decision_function(self, pairs): @@ -357,10 +526,11 @@ def decision_function(self, pairs): y_predicted : `numpy.ndarray` of floats, shape=(n_constraints,) The predicted decision function value for each pair. """ + check_is_fitted(self, 'preprocessor_') pairs = check_input(pairs, type_of_inputs='tuples', preprocessor=self.preprocessor_, estimator=self, tuple_size=self._tuple_size) - return - self.score_pairs(pairs) + return self.pair_score(pairs) def score(self, pairs, y): """Computes score of pairs similarity prediction. @@ -407,7 +577,15 @@ def set_threshold(self, threshold): self : `_PairsClassifier` The pairs classifier with the new threshold set. """ - self.threshold_ = threshold + check_is_fitted(self, 'preprocessor_') + try: + self.threshold_ = float(threshold) + except TypeError: + raise ValueError('Parameter threshold must be a real number. ' + 'Got {} instead.'.format(type(threshold))) + except ValueError: + raise ValueError('Parameter threshold must be a real number. ' + 'Got {} instead.'.format(type(threshold))) return self def calibrate_threshold(self, pairs_valid, y_valid, strategy='accuracy', @@ -463,12 +641,13 @@ def calibrate_threshold(self, pairs_valid, y_valid, strategy='accuracy', evaluation tool in clinical medicine, MH Zweig, G Campbell - Clinical chemistry, 1993 - .. [2] most of the code of this function is from scikit-learn's PR #10117 + .. [2] Most of the code of this function is from scikit-learn's PR #10117 See Also -------- sklearn.calibration : scikit-learn's module for calibrating classifiers """ + check_is_fitted(self, 'preprocessor_') self._validate_calibration_params(strategy, min_rate, beta) @@ -574,10 +753,98 @@ def _validate_calibration_params(strategy='accuracy', min_rate=None, 'Got {} instead.'.format(type(beta))) -class _QuadrupletsClassifierMixin(BaseMetricLearner): - """Base class for quadruplets learners. +class _TripletsClassifierMixin(BaseMetricLearner, ClassifierMixin): + """ + Base class for triplets learners. + """ + + classes_ = np.array([0, 1]) + _tuple_size = 3 # number of points in a tuple, 3 for triplets + + def predict(self, triplets): + """Predicts the ordering between sample distances in input triplets. + + For each triplets, returns 1 if the first element is closer to the second + than to the last and -1 if not. + + Parameters + ---------- + triplets : array-like, shape=(n_triplets, 3, n_features) or (n_triplets, 3) + 3D array of triplets to predict, with each row corresponding to three + points, or 2D array of indices of triplets if the metric learner + uses a preprocessor. + + Returns + ------- + prediction : `numpy.ndarray` of floats, shape=(n_constraints,) + Predictions of the ordering of pairs, for each triplet. + """ + return 2 * (self.decision_function(triplets) > 0) - 1 + + def decision_function(self, triplets): + """Predicts differences between sample distances in input triplets. + + For each triplet (X_a, X_b, X_c) in the samples, computes the difference + between the learned distance of the second pair (X_a, X_c) minus the + learned distance of the first pair (X_a, X_b). The higher it is, the more + probable it is that the pairs in the triplets are presented in the right + order, i.e. that the label of the triplet is 1. The lower it is, the more + probable it is that the label of the triplet is -1. + + Parameters + ---------- + triplet : array-like, shape=(n_triplets, 3, n_features) or \ + (n_triplets, 3) + 3D array of triplets to predict, with each row corresponding to three + points, or 2D array of indices of triplets if the metric learner + uses a preprocessor. + + Returns + ------- + decision_function : `numpy.ndarray` of floats, shape=(n_constraints,) + Metric differences. + """ + check_is_fitted(self, 'preprocessor_') + triplets = check_input(triplets, type_of_inputs='tuples', + preprocessor=self.preprocessor_, + estimator=self, tuple_size=self._tuple_size) + return (self.pair_score(triplets[:, :2]) - + self.pair_score(triplets[:, [0, 2]])) + + def score(self, triplets): + """Computes score on input triplets. + + Returns the accuracy score of the following classification task: a triplet + (X_a, X_b, X_c) is correctly classified if the predicted similarity between + the first pair (X_a, X_b) is higher than that of the second pair (X_a, X_c) + + Parameters + ---------- + triplets : array-like, shape=(n_triplets, 3, n_features) or \ + (n_triplets, 3) + 3D array of triplets to score, with each row corresponding to three + points, or 2D array of indices of triplets if the metric learner + uses a preprocessor. + + Returns + ------- + score : float + The triplets score. + """ + # Since the prediction is a vector of values in {-1, +1}, we need to + # rescale them to {0, 1} to compute the accuracy using the mean (because + # then 1 means a correctly classified result (pairs are in the right + # order), and a 0 an incorrectly classified result (pairs are in the + # wrong order). + return self.predict(triplets).mean() / 2 + 0.5 + + +class _QuadrupletsClassifierMixin(BaseMetricLearner, ClassifierMixin): + """ + Base class for quadruplets learners. """ + classes_ = np.array([0, 1]) _tuple_size = 4 # number of points in a tuple, 4 for quadruplets def predict(self, quadruplets): @@ -599,10 +866,6 @@ def predict(self, quadruplets): prediction : `numpy.ndarray` of floats, shape=(n_constraints,) Predictions of the ordering of pairs, for each quadruplet. """ - check_is_fitted(self, 'components_') - quadruplets = check_input(quadruplets, type_of_inputs='tuples', - preprocessor=self.preprocessor_, - estimator=self, tuple_size=self._tuple_size) return np.sign(self.decision_function(quadruplets)) def decision_function(self, quadruplets): @@ -628,11 +891,12 @@ def decision_function(self, quadruplets): decision_function : `numpy.ndarray` of floats, shape=(n_constraints,) Metric differences. """ + check_is_fitted(self, 'preprocessor_') quadruplets = check_input(quadruplets, type_of_inputs='tuples', preprocessor=self.preprocessor_, estimator=self, tuple_size=self._tuple_size) - return (self.score_pairs(quadruplets[:, 2:]) - - self.score_pairs(quadruplets[:, :2])) + return (self.pair_score(quadruplets[:, :2]) - + self.pair_score(quadruplets[:, 2:])) def score(self, quadruplets): """Computes score on input quadruplets diff --git a/metric_learn/constraints.py b/metric_learn/constraints.py index e42ef4b8..4993e9ef 100644 --- a/metric_learn/constraints.py +++ b/metric_learn/constraints.py @@ -4,87 +4,282 @@ """ import numpy as np import warnings -from six.moves import xrange -from scipy.sparse import coo_matrix from sklearn.utils import check_random_state +from sklearn.neighbors import NearestNeighbors + __all__ = ['Constraints'] class Constraints(object): """ - Class to build constraints from labels. + Class to build constraints from labeled data. + + See more in the :ref:`User Guide `. - See more in the :ref:`User Guide ` + Parameters + ---------- + partial_labels : `numpy.ndarray` of ints, shape=(n_samples,) + Array of labels, with -1 indicating unknown label. + + Attributes + ---------- + partial_labels : `numpy.ndarray` of ints, shape=(n_samples,) + Array of labels, with -1 indicating unknown label. """ + def __init__(self, partial_labels): - '''partial_labels : int arraylike, -1 indicating unknown label''' partial_labels = np.asanyarray(partial_labels, dtype=int) - self.num_points, = partial_labels.shape - self.known_label_idx, = np.where(partial_labels >= 0) - self.known_labels = partial_labels[self.known_label_idx] + self.partial_labels = partial_labels - def adjacency_matrix(self, num_constraints, random_state=None): - random_state = check_random_state(random_state) - a, b, c, d = self.positive_negative_pairs(num_constraints, - random_state=random_state) - row = np.concatenate((a, c)) - col = np.concatenate((b, d)) - data = np.ones_like(row, dtype=int) - data[len(a):] = -1 - adj = coo_matrix((data, (row, col)), shape=(self.num_points,)*2) - # symmetrize - return adj + adj.T - - def positive_negative_pairs(self, num_constraints, same_length=False, - random_state=None): + def positive_negative_pairs(self, n_constraints, same_length=False, + random_state=None, num_constraints='deprecated'): + """ + Generates positive pairs and negative pairs from labeled data. + + Positive pairs are formed by randomly drawing ``n_constraints`` pairs of + points with the same label. Negative pairs are formed by randomly drawing + ``n_constraints`` pairs of points with different label. + + In the case where it is not possible to generate enough positive or + negative pairs, a smaller number of pairs will be returned with a warning. + + Parameters + ---------- + n_constraints : int + Number of positive and negative constraints to generate. + + same_length : bool, optional (default=False) + If True, forces the number of positive and negative pairs to be + equal by ignoring some pairs from the larger set. + + random_state : int or numpy.RandomState or None, optional (default=None) + A pseudo random number generator object or a seed for it if int. + + num_constraints : Renamed to n_constraints. Will be deprecated in 0.7.0 + + Returns + ------- + a : array-like, shape=(n_constraints,) + 1D array of indicators for the left elements of positive pairs. + + b : array-like, shape=(n_constraints,) + 1D array of indicators for the right elements of positive pairs. + + c : array-like, shape=(n_constraints,) + 1D array of indicators for the left elements of negative pairs. + + d : array-like, shape=(n_constraints,) + 1D array of indicators for the right elements of negative pairs. + """ + if num_constraints != 'deprecated': + warnings.warn('"num_constraints" parameter has been renamed to' + ' "n_constraints". It has been deprecated in' + ' version 0.6.3 and will be removed in 0.7.0' + '', FutureWarning) + self.n_constraints = num_constraints + else: + self.n_constraints = n_constraints random_state = check_random_state(random_state) - a, b = self._pairs(num_constraints, same_label=True, + a, b = self._pairs(n_constraints, same_label=True, random_state=random_state) - c, d = self._pairs(num_constraints, same_label=False, + c, d = self._pairs(n_constraints, same_label=False, random_state=random_state) if same_length and len(a) != len(c): n = min(len(a), len(c)) return a[:n], b[:n], c[:n], d[:n] return a, b, c, d - def _pairs(self, num_constraints, same_label=True, max_iter=10, + def generate_knntriplets(self, X, k_genuine, k_impostor): + """ + Generates triplets from labeled data. + + For every point (X_a) the triplets (X_a, X_b, X_c) are constructed from all + the combinations of taking one of its `k_genuine`-nearest neighbors of the + same class (X_b) and taking one of its `k_impostor`-nearest neighbors of + other classes (X_c). + + In the case a class doesn't have enough points in the same class (other + classes) to yield `k_genuine` (`k_impostor`) neighbors a warning will be + raised and the maximum value of genuine (impostor) neighbors will be used + for that class. + + Parameters + ---------- + X : (n x d) matrix + Input data, where each row corresponds to a single instance. + + k_genuine : int + Number of neighbors of the same class to be taken into account. + + k_impostor : int + Number of neighbors of different classes to be taken into account. + + Returns + ------- + triplets : array-like, shape=(n_constraints, 3) + 2D array of triplets of indicators. + """ + # Ignore unlabeled samples + known_labels_mask = self.partial_labels >= 0 + known_labels = self.partial_labels[known_labels_mask] + X = X[known_labels_mask] + + labels, labels_count = np.unique(known_labels, return_counts=True) + len_input = known_labels.shape[0] + + # Handle the case where there are too few elements to yield k_genuine or + # k_impostor neighbors for every class. + + k_genuine_vec = np.full_like(labels, k_genuine) + k_impostor_vec = np.full_like(labels, k_impostor) + + for i, count in enumerate(labels_count): + if k_genuine + 1 > count: + k_genuine_vec[i] = count-1 + warnings.warn("The class {} has {} elements, which is not sufficient " + "to generate {} genuine neighbors as specified by " + "k_genuine. Will generate {} genuine neighbors instead." + "\n" + .format(labels[i], count, k_genuine+1, + k_genuine_vec[i])) + if k_impostor > len_input - count: + k_impostor_vec[i] = len_input - count + warnings.warn("The class {} has {} elements of other classes, which is" + " not sufficient to generate {} impostor neighbors as " + "specified by k_impostor. Will generate {} impostor " + "neighbors instead.\n" + .format(labels[i], k_impostor_vec[i], k_impostor, + k_impostor_vec[i])) + + # The total number of possible triplets combinations per label comes from + # taking one of the k_genuine_vec[i] genuine neighbors and one of the + # k_impostor_vec[i] impostor neighbors for the labels_count[i] elements + comb_per_label = labels_count * k_genuine_vec * k_impostor_vec + + # Get start and finish for later triplet assigning + # append zero at the begining for start and get cumulative sum + start_finish_indices = np.hstack((0, comb_per_label)).cumsum() + + # Total number of triplets is the sum of all possible combinations per + # label + num_triplets = start_finish_indices[-1] + triplets = np.empty((num_triplets, 3), dtype=np.intp) + + neigh = NearestNeighbors() + + for i, label in enumerate(labels): + + # generate mask for current label + gen_mask = known_labels == label + gen_indx = np.where(gen_mask) + + # get k_genuine genuine neighbors + neigh.fit(X=X[gen_indx]) + # Take elements of gen_indx according to the yielded k-neighbors + gen_relative_indx = neigh.kneighbors(n_neighbors=k_genuine_vec[i], + return_distance=False) + gen_neigh = np.take(gen_indx, gen_relative_indx) + + # generate mask for impostors of current label + imp_indx = np.where(~gen_mask) + + # get k_impostor impostor neighbors + neigh.fit(X=X[imp_indx]) + # Take elements of imp_indx according to the yielded k-neighbors + imp_relative_indx = neigh.kneighbors(n_neighbors=k_impostor_vec[i], + X=X[gen_mask], + return_distance=False) + imp_neigh = np.take(imp_indx, imp_relative_indx) + + # length = len_label*k_genuine*k_impostor + start, finish = start_finish_indices[i:i+2] + + triplets[start:finish, :] = comb(gen_indx, gen_neigh, imp_neigh, + k_genuine_vec[i], + k_impostor_vec[i]) + + return triplets + + def _pairs(self, n_constraints, same_label=True, max_iter=10, random_state=np.random): - num_labels = len(self.known_labels) + known_label_idx, = np.where(self.partial_labels >= 0) + known_labels = self.partial_labels[known_label_idx] + num_labels = len(known_labels) ab = set() it = 0 - while it < max_iter and len(ab) < num_constraints: - nc = num_constraints - len(ab) + while it < max_iter and len(ab) < n_constraints: + nc = n_constraints - len(ab) for aidx in random_state.randint(num_labels, size=nc): if same_label: - mask = self.known_labels[aidx] == self.known_labels + mask = known_labels[aidx] == known_labels mask[aidx] = False # avoid identity pairs else: - mask = self.known_labels[aidx] != self.known_labels + mask = known_labels[aidx] != known_labels b_choices, = np.where(mask) if len(b_choices) > 0: ab.add((aidx, random_state.choice(b_choices))) it += 1 - if len(ab) < num_constraints: + if len(ab) < n_constraints: warnings.warn("Only generated %d %s constraints (requested %d)" % ( - len(ab), 'positive' if same_label else 'negative', num_constraints)) - ab = np.array(list(ab)[:num_constraints], dtype=int) - return self.known_label_idx[ab.T] + len(ab), 'positive' if same_label else 'negative', n_constraints)) + ab = np.array(list(ab)[:n_constraints], dtype=int) + return known_label_idx[ab.T] - def chunks(self, num_chunks=100, chunk_size=2, random_state=None): + def chunks(self, n_chunks=100, chunk_size=2, random_state=None, + num_chunks='deprecated'): """ - the random state object to be passed must be a numpy random seed + Generates chunks from labeled data. + + Each of ``n_chunks`` chunks is composed of ``chunk_size`` points from + the same class drawn at random. Each point can belong to at most 1 chunk. + + In the case where there is not enough points to generate ``n_chunks`` + chunks of size ``chunk_size``, a ValueError will be raised. + + Parameters + ---------- + n_chunks : int, optional (default=100) + Number of chunks to generate. + + chunk_size : int, optional (default=2) + Number of points in each chunk. + + random_state : int or numpy.RandomState or None, optional (default=None) + A pseudo random number generator object or a seed for it if int. + + num_chunks : Renamed to n_chunks. Will be deprecated in 0.7.0 + + Returns + ------- + chunks : array-like, shape=(n_samples,) + 1D array of chunk indicators, where -1 indicates that the point does not + belong to any chunk. """ + if num_chunks != 'deprecated': + warnings.warn('"num_chunks" parameter has been renamed to' + ' "n_chunks". It has been deprecated in' + ' version 0.6.3 and will be removed in 0.7.0' + '', FutureWarning) + n_chunks = num_chunks random_state = check_random_state(random_state) - chunks = -np.ones_like(self.known_label_idx, dtype=int) - uniq, lookup = np.unique(self.known_labels, return_inverse=True) - all_inds = [set(np.where(lookup == c)[0]) for c in xrange(len(uniq))] + chunks = -np.ones_like(self.partial_labels, dtype=int) + uniq, lookup = np.unique(self.partial_labels, return_inverse=True) + unknown_uniq = np.where(uniq < 0)[0] + all_inds = [set(np.where(lookup == c)[0]) for c in range(len(uniq)) + if c not in unknown_uniq] + max_chunks = int(np.sum([len(s) // chunk_size for s in all_inds])) + if max_chunks < n_chunks: + raise ValueError(('Not enough possible chunks of %d elements in each' + ' class to form expected %d chunks - maximum number' + ' of chunks is %d' + ) % (chunk_size, n_chunks, max_chunks)) idx = 0 - while idx < num_chunks and all_inds: + while idx < n_chunks and all_inds: if len(all_inds) == 1: c = 0 else: - c = random_state.randint(0, high=len(all_inds)-1) + c = random_state.randint(0, high=len(all_inds) - 1) inds = all_inds[c] if len(inds) < chunk_size: del all_inds[c] @@ -93,12 +288,18 @@ def chunks(self, num_chunks=100, chunk_size=2, random_state=None): inds.difference_update(ii) chunks[ii] = idx idx += 1 - if idx < num_chunks: - raise ValueError('Unable to make %d chunks of %d examples each' % - (num_chunks, chunk_size)) return chunks +def comb(A, B, C, sizeB, sizeC): + # generate_knntriplets helper function + # generate an array with all combinations of choosing + # an element from A, B and C + return np.vstack((np.tile(A, (sizeB*sizeC, 1)).ravel(order='F'), + np.tile(np.hstack(B), (sizeC, 1)).ravel(order='F'), + np.tile(C, (1, sizeB)).ravel())).T + + def wrap_pairs(X, constraints): a = np.array(constraints[0]) b = np.array(constraints[1]) diff --git a/metric_learn/covariance.py b/metric_learn/covariance.py index 7214dd62..2c05b28d 100644 --- a/metric_learn/covariance.py +++ b/metric_learn/covariance.py @@ -2,7 +2,6 @@ Covariance metric (baseline method) """ -from __future__ import absolute_import import numpy as np import scipy from sklearn.base import TransformerMixin @@ -43,6 +42,10 @@ def __init__(self, preprocessor=None): def fit(self, X, y=None): """ + Calculates the covariance matrix of the input data. + + Parameters + ---------- X : data matrix, (n x d) y : unused """ diff --git a/metric_learn/itml.py b/metric_learn/itml.py index c3b91fc4..9537eec2 100644 --- a/metric_learn/itml.py +++ b/metric_learn/itml.py @@ -2,17 +2,14 @@ Information Theoretic Metric Learning (ITML) """ -from __future__ import print_function, absolute_import -import warnings import numpy as np -from six.moves import xrange -from sklearn.exceptions import ChangedBehaviorWarning from sklearn.metrics import pairwise_distances from sklearn.utils.validation import check_array from sklearn.base import TransformerMixin from .base_metric import _PairsClassifierMixin, MahalanobisMixin from .constraints import Constraints, wrap_pairs from ._util import components_from_metric, _initialize_metric_mahalanobis +import warnings class _BaseITML(MahalanobisMixin): @@ -20,29 +17,31 @@ class _BaseITML(MahalanobisMixin): _tuple_size = 2 # constraints are pairs - def __init__(self, gamma=1., max_iter=1000, convergence_threshold=1e-3, - prior='identity', A0='deprecated', verbose=False, - preprocessor=None, random_state=None): + def __init__(self, gamma=1., max_iter=1000, tol=1e-3, + prior='identity', verbose=False, + preprocessor=None, random_state=None, + convergence_threshold='deprecated'): + if convergence_threshold != 'deprecated': + warnings.warn('"convergence_threshold" parameter has been ' + ' renamed to "tol". It has been deprecated in' + ' version 0.6.3 and will be removed in 0.7.0' + '', FutureWarning) + tol = convergence_threshold + self.convergence_threshold = 'deprecated' # Avoid errors self.gamma = gamma self.max_iter = max_iter - self.convergence_threshold = convergence_threshold + self.tol = tol self.prior = prior - self.A0 = A0 self.verbose = verbose self.random_state = random_state super(_BaseITML, self).__init__(preprocessor) def _fit(self, pairs, y, bounds=None): - if self.A0 != 'deprecated': - warnings.warn('"A0" parameter is not used.' - ' It has been deprecated in version 0.5.0 and will be' - 'removed in 0.6.0. Use "prior" instead.', - DeprecationWarning) pairs, y = self._prepare_inputs(pairs, y, type_of_inputs='tuples') # init bounds if bounds is None: - X = np.vstack({tuple(row) for row in pairs.reshape(-1, pairs.shape[2])}) + X = np.unique(np.vstack(pairs), axis=0) self.bounds_ = np.percentile(pairwise_distances(X), (5, 95)) else: bounds = check_array(bounds, allow_nd=False, ensure_min_samples=0, @@ -63,30 +62,31 @@ def _fit(self, pairs, y, bounds=None): num_neg = len(neg_pairs) _lambda = np.zeros(num_pos + num_neg) lambdaold = np.zeros_like(_lambda) - gamma_proj = 1. if gamma is np.inf else gamma/(gamma+1.) + gamma_proj = 1. if gamma is np.inf else gamma / (gamma + 1.) pos_bhat = np.zeros(num_pos) + self.bounds_[0] neg_bhat = np.zeros(num_neg) + self.bounds_[1] pos_vv = pos_pairs[:, 0, :] - pos_pairs[:, 1, :] neg_vv = neg_pairs[:, 0, :] - neg_pairs[:, 1, :] - for it in xrange(self.max_iter): + for it in range(self.max_iter): # update positives - for i,v in enumerate(pos_vv): + for i, v in enumerate(pos_vv): wtw = v.dot(A).dot(v) # scalar - alpha = min(_lambda[i], gamma_proj*(1./wtw - 1./pos_bhat[i])) + alpha = min(_lambda[i], gamma_proj * (1. / wtw - 1. / pos_bhat[i])) _lambda[i] -= alpha - beta = alpha/(1 - alpha*wtw) - pos_bhat[i] = 1./((1 / pos_bhat[i]) + (alpha / gamma)) + beta = alpha / (1 - alpha * wtw) + pos_bhat[i] = 1. / ((1 / pos_bhat[i]) + (alpha / gamma)) Av = A.dot(v) A += np.outer(Av, Av * beta) # update negatives - for i,v in enumerate(neg_vv): + for i, v in enumerate(neg_vv): wtw = v.dot(A).dot(v) # scalar - alpha = min(_lambda[i+num_pos], gamma_proj*(1./neg_bhat[i] - 1./wtw)) - _lambda[i+num_pos] -= alpha - beta = -alpha/(1 + alpha*wtw) - neg_bhat[i] = 1./((1 / neg_bhat[i]) - (alpha / gamma)) + alpha = min(_lambda[i + num_pos], + gamma_proj * (1. / neg_bhat[i] - 1. / wtw)) + _lambda[i + num_pos] -= alpha + beta = -alpha / (1 + alpha * wtw) + neg_bhat[i] = 1. / ((1 / neg_bhat[i]) - (alpha / gamma)) Av = A.dot(v) A += np.outer(Av, Av * beta) @@ -95,7 +95,7 @@ def _fit(self, pairs, y, bounds=None): conv = np.inf break conv = np.abs(lambdaold - _lambda).sum() / normsum - if conv < self.convergence_threshold: + if conv < self.tol: break lambdaold = _lambda.copy() if self.verbose: @@ -125,91 +125,91 @@ class ITML(_BaseITML, _PairsClassifierMixin): Parameters ---------- - gamma : float, optional (default=1.) - Value for slack variables + gamma : float, optional (default=1.0) + Value for slack variables max_iter : int, optional (default=1000) - Maximum number of iteration of the optimization procedure. + Maximum number of iteration of the optimization procedure. - convergence_threshold : float, optional (default=1e-3) - Convergence tolerance. + tol : float, optional (default=1e-3) + Convergence tolerance. prior : string or numpy array, optional (default='identity') - The Mahalanobis matrix to use as a prior. Possible options are - 'identity', 'covariance', 'random', and a numpy array of shape - (n_features, n_features). For ITML, the prior should be strictly - positive definite (PD). + The Mahalanobis matrix to use as a prior. Possible options are + 'identity', 'covariance', 'random', and a numpy array of shape + (n_features, n_features). For ITML, the prior should be strictly + positive definite (PD). - 'identity' - An identity matrix of shape (n_features, n_features). + 'identity' + An identity matrix of shape (n_features, n_features). - 'covariance' - The inverse covariance matrix. + 'covariance' + The inverse covariance matrix. - 'random' - The prior will be a random SPD matrix of shape - `(n_features, n_features)`, generated using - `sklearn.datasets.make_spd_matrix`. + 'random' + The prior will be a random SPD matrix of shape + `(n_features, n_features)`, generated using + `sklearn.datasets.make_spd_matrix`. - numpy array - A positive definite (PD) matrix of shape - (n_features, n_features), that will be used as such to set the - prior. - - A0 : Not used - .. deprecated:: 0.5.0 - `A0` was deprecated in version 0.5.0 and will - be removed in 0.6.0. Use 'prior' instead. + numpy array + A positive definite (PD) matrix of shape + (n_features, n_features), that will be used as such to set the + prior. verbose : bool, optional (default=False) - If True, prints information while learning + If True, prints information while learning preprocessor : array-like, shape=(n_samples, n_features) or callable - The preprocessor to call to get tuples from indices. If array-like, - tuples will be formed like this: X[indices]. + The preprocessor to call to get tuples from indices. If array-like, + tuples will be formed like this: X[indices]. random_state : int or numpy.RandomState or None, optional (default=None) - A pseudo random number generator object or a seed for it if int. If - ``prior='random'``, ``random_state`` is used to set the prior. + A pseudo random number generator object or a seed for it if int. If + ``prior='random'``, ``random_state`` is used to set the prior. + + convergence_threshold : Renamed to tol. Will be deprecated in 0.7.0 Attributes ---------- bounds_ : `numpy.ndarray`, shape=(2,) - Bounds on similarity, aside slack variables, s.t. - ``d(a, b) < bounds_[0]`` for all given pairs of similar points ``a`` - and ``b``, and ``d(c, d) > bounds_[1]`` for all given pairs of - dissimilar points ``c`` and ``d``, with ``d`` the learned distance. If - not provided at initialization, bounds_[0] and bounds_[1] are set at - train time to the 5th and 95th percentile of the pairwise distances among - all points present in the input `pairs`. + Bounds on similarity, aside slack variables, s.t. + ``d(a, b) < bounds_[0]`` for all given pairs of similar points ``a`` + and ``b``, and ``d(c, d) > bounds_[1]`` for all given pairs of + dissimilar points ``c`` and ``d``, with ``d`` the learned distance. If + not provided at initialization, bounds_[0] and bounds_[1] are set at + train time to the 5th and 95th percentile of the pairwise distances among + all points present in the input `pairs`. n_iter_ : `int` - The number of iterations the solver has run. + The number of iterations the solver has run. components_ : `numpy.ndarray`, shape=(n_features, n_features) - The linear transformation ``L`` deduced from the learned Mahalanobis - metric (See function `components_from_metric`.) + The linear transformation ``L`` deduced from the learned Mahalanobis + metric (See function `components_from_metric`.) threshold_ : `float` - If the distance metric between two points is lower than this threshold, - points will be classified as similar, otherwise they will be - classified as dissimilar. + If the distance metric between two points is lower than this threshold, + points will be classified as similar, otherwise they will be + classified as dissimilar. Examples -------- - >>> from metric_learn import ITML_Supervised - >>> from sklearn.datasets import load_iris - >>> iris_data = load_iris() - >>> X = iris_data['data'] - >>> Y = iris_data['target'] - >>> itml = ITML_Supervised(num_constraints=200) - >>> itml.fit(X, Y) + >>> from metric_learn import ITML + >>> pairs = [[[1.2, 7.5], [1.3, 1.5]], + >>> [[6.4, 2.6], [6.2, 9.7]], + >>> [[1.3, 4.5], [3.2, 4.6]], + >>> [[6.2, 5.5], [5.4, 5.4]]] + >>> y = [1, 1, -1, -1] + >>> # in this task we want points where the first feature is close to be + >>> # closer to each other, no matter how close the second feature is + >>> itml = ITML() + >>> itml.fit(pairs, y) References ---------- - .. [1] `Information-theoretic Metric Learning - `_ Jason V. Davis, et al. + .. [1] Jason V. Davis, et al. `Information-theoretic Metric Learning + `_. ICML 2007. """ def fit(self, pairs, y, bounds=None, calibration_params=None): @@ -222,28 +222,31 @@ def fit(self, pairs, y, bounds=None, calibration_params=None): ---------- pairs: array-like, shape=(n_constraints, 2, n_features) or \ (n_constraints, 2) - 3D Array of pairs with each row corresponding to two points, - or 2D array of indices of pairs if the metric learner uses a - preprocessor. + 3D Array of pairs with each row corresponding to two points, + or 2D array of indices of pairs if the metric learner uses a + preprocessor. + y: array-like, of shape (n_constraints,) - Labels of constraints. Should be -1 for dissimilar pair, 1 for similar. + Labels of constraints. Should be -1 for dissimilar pair, 1 for similar. + bounds : array-like of two numbers - Bounds on similarity, aside slack variables, s.t. - ``d(a, b) < bounds_[0]`` for all given pairs of similar points ``a`` - and ``b``, and ``d(c, d) > bounds_[1]`` for all given pairs of - dissimilar points ``c`` and ``d``, with ``d`` the learned distance. - If not provided at initialization, bounds_[0] and bounds_[1] will be - set to the 5th and 95th percentile of the pairwise distances among all - points present in the input `pairs`. + Bounds on similarity, aside slack variables, s.t. + ``d(a, b) < bounds_[0]`` for all given pairs of similar points ``a`` + and ``b``, and ``d(c, d) > bounds_[1]`` for all given pairs of + dissimilar points ``c`` and ``d``, with ``d`` the learned distance. + If not provided at initialization, bounds_[0] and bounds_[1] will be + set to the 5th and 95th percentile of the pairwise distances among all + points present in the input `pairs`. + calibration_params : `dict` or `None` - Dictionary of parameters to give to `calibrate_threshold` for the - threshold calibration step done at the end of `fit`. If `None` is - given, `calibrate_threshold` will use the default parameters. + Dictionary of parameters to give to `calibrate_threshold` for the + threshold calibration step done at the end of `fit`. If `None` is + given, `calibrate_threshold` will use the default parameters. Returns ------- self : object - Returns the instance. + Returns the instance. """ calibration_params = (calibration_params if calibration_params is not None else dict()) @@ -262,76 +265,85 @@ class ITML_Supervised(_BaseITML, TransformerMixin): Parameters ---------- - gamma : float, optional - value for slack variables - max_iter : int, optional - convergence_threshold : float, optional - num_labeled : Not used - .. deprecated:: 0.5.0 - `num_labeled` was deprecated in version 0.5.0 and will - be removed in 0.6.0. - num_constraints: int, optional - number of constraints to generate - bounds : Not used - .. deprecated:: 0.5.0 - `bounds` was deprecated in version 0.5.0 and will - be removed in 0.6.0. Set `bounds` at fit time instead : - `itml_supervised.fit(X, y, bounds=...)` + gamma : float, optional (default=1.0) + Value for slack variables + + max_iter : int, optional (default=1000) + Maximum number of iterations of the optimization procedure. + + tol : float, optional (default=1e-3) + Tolerance of the optimization procedure. + + n_constraints : int, optional (default=None) + Number of constraints to generate. If None, default to `20 * + num_classes**2`. prior : string or numpy array, optional (default='identity') - Initialization of the Mahalanobis matrix. Possible options are - 'identity', 'covariance', 'random', and a numpy array of shape - (n_features, n_features). For ITML, the prior should be strictly - positive definite (PD). - - 'identity' - An identity matrix of shape (n_features, n_features). - - 'covariance' - The inverse covariance matrix. - - 'random' - The prior will be a random SPD matrix of shape - `(n_features, n_features)`, generated using - `sklearn.datasets.make_spd_matrix`. - - numpy array - A positive definite (PD) matrix of shape - (n_features, n_features), that will be used as such to set the - prior. - A0 : Not used - .. deprecated:: 0.5.0 - `A0` was deprecated in version 0.5.0 and will - be removed in 0.6.0. Use 'prior' instead. - verbose : bool, optional - if True, prints information while learning + Initialization of the Mahalanobis matrix. Possible options are + 'identity', 'covariance', 'random', and a numpy array of shape + (n_features, n_features). For ITML, the prior should be strictly + positive definite (PD). + + 'identity' + An identity matrix of shape (n_features, n_features). + + 'covariance' + The inverse covariance matrix. + + 'random' + The prior will be a random SPD matrix of shape + `(n_features, n_features)`, generated using + `sklearn.datasets.make_spd_matrix`. + + numpy array + A positive definite (PD) matrix of shape + (n_features, n_features), that will be used as such to set the + prior. + + verbose : bool, optional (default=False) + If True, prints information while learning + preprocessor : array-like, shape=(n_samples, n_features) or callable - The preprocessor to call to get tuples from indices. If array-like, - tuples will be formed like this: X[indices]. + The preprocessor to call to get tuples from indices. If array-like, + tuples will be formed like this: X[indices]. + random_state : int or numpy.RandomState or None, optional (default=None) - A pseudo random number generator object or a seed for it if int. If - ``prior='random'``, ``random_state`` is used to set the prior. In any - case, `random_state` is also used to randomly sample constraints from - labels. + A pseudo random number generator object or a seed for it if int. If + ``prior='random'``, ``random_state`` is used to set the prior. In any + case, `random_state` is also used to randomly sample constraints from + labels. + + num_constraints : Renamed to n_constraints. Will be deprecated in 0.7.0 + convergence_threshold : Renamed to tol. Will be deprecated in 0.7.0 Attributes ---------- bounds_ : `numpy.ndarray`, shape=(2,) - Bounds on similarity, aside slack variables, s.t. - ``d(a, b) < bounds_[0]`` for all given pairs of similar points ``a`` - and ``b``, and ``d(c, d) > bounds_[1]`` for all given pairs of - dissimilar points ``c`` and ``d``, with ``d`` the learned distance. - If not provided at initialization, bounds_[0] and bounds_[1] are set at - train time to the 5th and 95th percentile of the pairwise distances - among all points in the training data `X`. + Bounds on similarity, aside slack variables, s.t. + ``d(a, b) < bounds_[0]`` for all given pairs of similar points ``a`` + and ``b``, and ``d(c, d) > bounds_[1]`` for all given pairs of + dissimilar points ``c`` and ``d``, with ``d`` the learned distance. + If not provided at initialization, bounds_[0] and bounds_[1] are set at + train time to the 5th and 95th percentile of the pairwise distances + among all points in the training data `X`. n_iter_ : `int` - The number of iterations the solver has run. + The number of iterations the solver has run. components_ : `numpy.ndarray`, shape=(n_features, n_features) - The linear transformation ``L`` deduced from the learned Mahalanobis - metric (See function `components_from_metric`.) + The linear transformation ``L`` deduced from the learned Mahalanobis + metric (See function `components_from_metric`.) + + Examples + -------- + >>> from metric_learn import ITML_Supervised + >>> from sklearn.datasets import load_iris + >>> iris_data = load_iris() + >>> X = iris_data['data'] + >>> Y = iris_data['target'] + >>> itml = ITML_Supervised(n_constraints=200) + >>> itml.fit(X, Y) See Also -------- @@ -340,75 +352,56 @@ class ITML_Supervised(_BaseITML, TransformerMixin): that describes the supervised version of weakly supervised estimators. """ - def __init__(self, gamma=1., max_iter=1000, convergence_threshold=1e-3, - num_labeled='deprecated', num_constraints=None, - bounds='deprecated', prior='identity', A0='deprecated', - verbose=False, preprocessor=None, random_state=None): + def __init__(self, gamma=1.0, max_iter=1000, tol=1e-3, + n_constraints=None, prior='identity', + verbose=False, preprocessor=None, random_state=None, + num_constraints='deprecated', + convergence_threshold='deprecated'): _BaseITML.__init__(self, gamma=gamma, max_iter=max_iter, - convergence_threshold=convergence_threshold, - A0=A0, prior=prior, verbose=verbose, - preprocessor=preprocessor, random_state=random_state) - self.num_labeled = num_labeled - self.num_constraints = num_constraints - self.bounds = bounds - - def fit(self, X, y, random_state='deprecated', bounds=None): + tol=tol, + prior=prior, verbose=verbose, + preprocessor=preprocessor, + random_state=random_state, + convergence_threshold=convergence_threshold) + if num_constraints != 'deprecated': + warnings.warn('"num_constraints" parameter has been renamed to' + ' "n_constraints". It has been deprecated in' + ' version 0.6.3 and will be removed in 0.7.0' + '', FutureWarning) + n_constraints = num_constraints + self.n_constraints = n_constraints + # Avoid test get_params from failing (all params passed sholud be set) + self.num_constraints = 'deprecated' + + def fit(self, X, y, bounds=None): """Create constraints from labels and learn the ITML model. Parameters ---------- X : (n x d) matrix - Input data, where each row corresponds to a single instance. + Input data, where each row corresponds to a single instance. y : (n) array-like - Data labels. - - random_state : Not used - .. deprecated:: 0.5.0 - `random_state` in the `fit` function was deprecated in version 0.5.0 - and will be removed in 0.6.0. Set `random_state` at initialization - instead (when instantiating a new `ITML_Supervised` object). + Data labels. bounds : array-like of two numbers - Bounds on similarity, aside slack variables, s.t. - ``d(a, b) < bounds_[0]`` for all given pairs of similar points ``a`` - and ``b``, and ``d(c, d) > bounds_[1]`` for all given pairs of - dissimilar points ``c`` and ``d``, with ``d`` the learned distance. - If not provided at initialization, bounds_[0] and bounds_[1] will be - set to the 5th and 95th percentile of the pairwise distances among all - points in the training data `X`. + Bounds on similarity, aside slack variables, s.t. + ``d(a, b) < bounds_[0]`` for all given pairs of similar points ``a`` + and ``b``, and ``d(c, d) > bounds_[1]`` for all given pairs of + dissimilar points ``c`` and ``d``, with ``d`` the learned distance. + If not provided at initialization, bounds_[0] and bounds_[1] will be + set to the 5th and 95th percentile of the pairwise distances among all + points in the training data `X`. """ - # TODO: remove these in v0.6.0 - if self.num_labeled != 'deprecated': - warnings.warn('"num_labeled" parameter is not used.' - ' It has been deprecated in version 0.5.0 and will be' - ' removed in 0.6.0', DeprecationWarning) - if self.bounds != 'deprecated': - warnings.warn('"bounds" parameter from initialization is not used.' - ' It has been deprecated in version 0.5.0 and will be' - ' removed in 0.6.0. Use the "bounds" parameter of this ' - 'fit method instead.', DeprecationWarning) - if random_state != 'deprecated': - warnings.warn('"random_state" parameter in the `fit` function is ' - 'deprecated. Set `random_state` at initialization ' - 'instead (when instantiating a new `ITML_Supervised` ' - 'object).', DeprecationWarning) - else: - warnings.warn('As of v0.5.0, `ITML_Supervised` now uses the ' - '`random_state` given at initialization to sample ' - 'constraints, not the default `np.random` from the `fit` ' - 'method, since this argument is now deprecated. ' - 'This warning will disappear in v0.6.0.', - ChangedBehaviorWarning) X, y = self._prepare_inputs(X, y, ensure_min_samples=2) - num_constraints = self.num_constraints - if num_constraints is None: + n_constraints = self.n_constraints + if n_constraints is None: num_classes = len(np.unique(y)) - num_constraints = 20 * num_classes**2 + n_constraints = 20 * num_classes**2 c = Constraints(y) - pos_neg = c.positive_negative_pairs(num_constraints, + pos_neg = c.positive_negative_pairs(n_constraints, random_state=self.random_state) pairs, y = wrap_pairs(X, pos_neg) return _BaseITML._fit(self, pairs, y, bounds=bounds) diff --git a/metric_learn/lfda.py b/metric_learn/lfda.py index ffc4c885..82ae20eb 100644 --- a/metric_learn/lfda.py +++ b/metric_learn/lfda.py @@ -1,11 +1,9 @@ """ Local Fisher Discriminant Analysis (LFDA) """ -from __future__ import division, absolute_import import numpy as np import scipy import warnings -from six.moves import xrange from sklearn.metrics import pairwise_distances from sklearn.base import TransformerMixin @@ -27,27 +25,27 @@ class LFDA(MahalanobisMixin, TransformerMixin): Parameters ---------- n_components : int or None, optional (default=None) - Dimensionality of reduced space (if None, defaults to dimension of X). + Dimensionality of reduced space (if None, defaults to dimension of X). - num_dims : Not used + k : int, optional (default=None) + Number of nearest neighbors used in local scaling method. If None, + defaults to min(7, n_features - 1). - .. deprecated:: 0.5.0 - `num_dims` was deprecated in version 0.5.0 and will - be removed in 0.6.0. Use `n_components` instead. + embedding_type : str, optional (default: 'weighted') + Type of metric in the embedding space. - k : int, optional - Number of nearest neighbors used in local scaling method. - Defaults to min(7, n_components - 1). + 'weighted' + weighted eigenvectors - embedding_type : str, optional - Type of metric in the embedding space (default: 'weighted') - 'weighted' - weighted eigenvectors - 'orthonormalized' - orthonormalized - 'plain' - raw eigenvectors + 'orthonormalized' + orthonormalized + + 'plain' + raw eigenvectors preprocessor : array-like, shape=(n_samples, n_features) or callable - The preprocessor to call to get tuples from indices. If array-like, - tuples will be formed like this: X[indices]. + The preprocessor to call to get tuples from indices. If array-like, + tuples will be formed like this: X[indices]. Attributes ---------- @@ -67,22 +65,22 @@ class LFDA(MahalanobisMixin, TransformerMixin): >>> lfda.fit(X, Y) References - ------------------ - .. [1] `Dimensionality Reduction of Multimodal Labeled Data by Local Fisher - Discriminant Analysis `_ - Masashi Sugiyama. - - .. [2] `Local Fisher Discriminant Analysis on Beer Style Clustering - `_ Yuan Tang. + ---------- + .. [1] Masashi Sugiyama. `Dimensionality Reduction of Multimodal Labeled + Data by Local Fisher Discriminant Analysis + `_. JMLR 2007. + + .. [2] Yuan Tang. `Local Fisher Discriminant Analysis on Beer Style + Clustering + `_. ''' - def __init__(self, n_components=None, num_dims='deprecated', + def __init__(self, n_components=None, k=None, embedding_type='weighted', preprocessor=None): if embedding_type not in ('weighted', 'orthonormalized', 'plain'): raise ValueError('Invalid embedding_type: %r' % embedding_type) self.n_components = n_components - self.num_dims = num_dims self.embedding_type = embedding_type self.k = k super(LFDA, self).__init__(preprocessor) @@ -98,11 +96,6 @@ def fit(self, X, y): y : (n,) array-like Class labels, one per point of data. ''' - if self.num_dims != 'deprecated': - warnings.warn('"num_dims" parameter is not used.' - ' It has been deprecated in version 0.5.0 and will be' - ' removed in 0.6.0. Use "n_components" instead', - DeprecationWarning) X, y = self._prepare_inputs(X, y, ensure_min_samples=2) unique_classes, y = np.unique(y, return_inverse=True) n, d = X.shape @@ -113,15 +106,16 @@ def fit(self, X, y): if self.k is None: k = min(7, d - 1) elif self.k >= d: - warnings.warn('Chosen k (%d) too large, using %d instead.' % (self.k,d-1)) + warnings.warn('Chosen k (%d) too large, using %d instead.' + % (self.k, d - 1)) k = d - 1 else: k = int(self.k) - tSb = np.zeros((d,d)) - tSw = np.zeros((d,d)) + tSb = np.zeros((d, d)) + tSw = np.zeros((d, d)) - for c in xrange(num_classes): - Xc = X[y==c] + for c in range(num_classes): + Xc = X[y == c] nc = Xc.shape[0] # classwise affinity matrix @@ -132,14 +126,14 @@ def fit(self, X, y): local_scale = np.outer(sigma, sigma) with np.errstate(divide='ignore', invalid='ignore'): - A = np.exp(-dist/local_scale) - A[local_scale==0] = 0 + A = np.exp(-dist / local_scale) + A[local_scale == 0] = 0 - G = Xc.T.dot(A.sum(axis=0)[:,None] * Xc) - Xc.T.dot(A).dot(Xc) - tSb += G/n + (1-nc/n)*Xc.T.dot(Xc) + _sum_outer(Xc)/n - tSw += G/nc + G = Xc.T.dot(A.sum(axis=0)[:, None] * Xc) - Xc.T.dot(A).dot(Xc) + tSb += G / n + (1 - nc / n) * Xc.T.dot(Xc) + _sum_outer(Xc) / n + tSw += G / nc - tSb -= _sum_outer(X)/n - tSw + tSb -= _sum_outer(X) / n - tSw # symmetrize tSb = (tSb + tSb.T) / 2 @@ -148,7 +142,7 @@ def fit(self, X, y): vals, vecs = _eigh(tSb, tSw, dim) order = np.argsort(-vals)[:dim] vals = vals[order].real - vecs = vecs[:,order] + vecs = vecs[:, order] if self.embedding_type == 'weighted': vecs *= np.sqrt(vals) diff --git a/metric_learn/lmnn.py b/metric_learn/lmnn.py index 2035588f..47bb065f 100644 --- a/metric_learn/lmnn.py +++ b/metric_learn/lmnn.py @@ -1,14 +1,11 @@ """ Large Margin Nearest Neighbor Metric learning (LMNN) """ -from __future__ import print_function, absolute_import import numpy as np -import warnings from collections import Counter -from six.moves import xrange -from sklearn.exceptions import ChangedBehaviorWarning from sklearn.metrics import euclidean_distances from sklearn.base import TransformerMixin +import warnings from ._util import _initialize_components, _check_n_components from .base_metric import MahalanobisMixin @@ -27,102 +24,91 @@ class LMNN(MahalanobisMixin, TransformerMixin): Parameters ---------- - init : None, string or numpy array, optional (default=None) - Initialization of the linear transformation. Possible options are - 'auto', 'pca', 'identity', 'random', and a numpy array of shape - (n_features_a, n_features_b). If None, will be set automatically to - 'auto' (this option is to raise a warning if 'init' is not set, - and stays to its default value None, in v0.5.0). - - 'auto' - Depending on ``n_components``, the most reasonable initialization - will be chosen. If ``n_components <= n_classes`` we use 'lda', as - it uses labels information. If not, but - ``n_components < min(n_features, n_samples)``, we use 'pca', as - it projects data in meaningful directions (those of higher - variance). Otherwise, we just use 'identity'. - - 'pca' - ``n_components`` principal components of the inputs passed - to :meth:`fit` will be used to initialize the transformation. - (See `sklearn.decomposition.PCA`) - - 'lda' - ``min(n_components, n_classes)`` most discriminative - components of the inputs passed to :meth:`fit` will be used to - initialize the transformation. (If ``n_components > n_classes``, - the rest of the components will be zero.) (See - `sklearn.discriminant_analysis.LinearDiscriminantAnalysis`) - - 'identity' - If ``n_components`` is strictly smaller than the - dimensionality of the inputs passed to :meth:`fit`, the identity - matrix will be truncated to the first ``n_components`` rows. - - 'random' - The initial transformation will be a random array of shape - `(n_components, n_features)`. Each value is sampled from the - standard normal distribution. - - numpy array - n_features_b must match the dimensionality of the inputs passed to - :meth:`fit` and n_features_a must be less than or equal to that. - If ``n_components`` is not None, n_features_a must match it. - - k : int, optional - Number of neighbors to consider, not including self-edges. + init : string or numpy array, optional (default='auto') + Initialization of the linear transformation. Possible options are + 'auto', 'pca', 'identity', 'random', and a numpy array of shape + (n_features_a, n_features_b). + + 'auto' + Depending on ``n_components``, the most reasonable initialization + will be chosen. If ``n_components <= n_classes`` we use 'lda', as + it uses labels information. If not, but + ``n_components < min(n_features, n_samples)``, we use 'pca', as + it projects data in meaningful directions (those of higher + variance). Otherwise, we just use 'identity'. + + 'pca' + ``n_components`` principal components of the inputs passed + to :meth:`fit` will be used to initialize the transformation. + (See `sklearn.decomposition.PCA`) + + 'lda' + ``min(n_components, n_classes)`` most discriminative + components of the inputs passed to :meth:`fit` will be used to + initialize the transformation. (If ``n_components > n_classes``, + the rest of the components will be zero.) (See + `sklearn.discriminant_analysis.LinearDiscriminantAnalysis`) + + 'identity' + If ``n_components`` is strictly smaller than the + dimensionality of the inputs passed to :meth:`fit`, the identity + matrix will be truncated to the first ``n_components`` rows. + + 'random' + The initial transformation will be a random array of shape + `(n_components, n_features)`. Each value is sampled from the + standard normal distribution. + + numpy array + n_features_b must match the dimensionality of the inputs passed to + :meth:`fit` and n_features_a must be less than or equal to that. + If ``n_components`` is not None, n_features_a must match it. + + n_neighbors : int, optional (default=3) + Number of neighbors to consider, not including self-edges. min_iter : int, optional (default=50) - Minimum number of iterations of the optimization procedure. + Minimum number of iterations of the optimization procedure. max_iter : int, optional (default=1000) - Maximum number of iterations of the optimization procedure. + Maximum number of iterations of the optimization procedure. learn_rate : float, optional (default=1e-7) - Learning rate of the optimization procedure + Learning rate of the optimization procedure tol : float, optional (default=0.001) - Tolerance of the optimization procedure. If the objective value varies - less than `tol`, we consider the algorithm has converged and stop it. - - use_pca : Not used - - .. deprecated:: 0.5.0 - `use_pca` was deprecated in version 0.5.0 and will - be removed in 0.6.0. + Tolerance of the optimization procedure. If the objective value varies + less than `tol`, we consider the algorithm has converged and stop it. verbose : bool, optional (default=False) - Whether to print the progress of the optimization procedure. + Whether to print the progress of the optimization procedure. - regularization: float, optional - Weighting of pull and push terms, with 0.5 meaning equal weight. + regularization: float, optional (default=0.5) + Relative weight between pull and push terms, with 0.5 meaning equal + weight. preprocessor : array-like, shape=(n_samples, n_features) or callable - The preprocessor to call to get tuples from indices. If array-like, - tuples will be formed like this: X[indices]. + The preprocessor to call to get tuples from indices. If array-like, + tuples will be formed like this: X[indices]. n_components : int or None, optional (default=None) - Dimensionality of reduced space (if None, defaults to dimension of X). - - num_dims : Not used - - .. deprecated:: 0.5.0 - `num_dims` was deprecated in version 0.5.0 and will - be removed in 0.6.0. Use `n_components` instead. + Dimensionality of reduced space (if None, defaults to dimension of X). random_state : int or numpy.RandomState or None, optional (default=None) - A pseudo random number generator object or a seed for it if int. If - ``init='random'``, ``random_state`` is used to initialize the random - transformation. If ``init='pca'``, ``random_state`` is passed as an - argument to PCA when initializing the transformation. + A pseudo random number generator object or a seed for it if int. If + ``init='random'``, ``random_state`` is used to initialize the random + transformation. If ``init='pca'``, ``random_state`` is passed as an + argument to PCA when initializing the transformation. + + k : Renamed to n_neighbors. Will be deprecated in 0.7.0 Attributes ---------- n_iter_ : `int` - The number of iterations the solver has run. + The number of iterations the solver has run. components_ : `numpy.ndarray`, shape=(n_components, n_features) - The learned linear transformation ``L``. + The learned linear transformation ``L``. Examples -------- @@ -133,56 +119,43 @@ class LMNN(MahalanobisMixin, TransformerMixin): >>> iris_data = load_iris() >>> X = iris_data['data'] >>> Y = iris_data['target'] - >>> lmnn = LMNN(k=5, learn_rate=1e-6) + >>> lmnn = LMNN(n_neighbors=5, learn_rate=1e-6) >>> lmnn.fit(X, Y, verbose=False) - Notes - ----- - - If a recent version of the Shogun Python modular (``modshogun``) library - is available, the LMNN implementation will use the fast C++ version from - there. Otherwise, the included pure-Python version will be used. - The two implementations differ slightly, and the C++ version is more - complete. - References ---------- - .. [1] `Distance Metric Learning for Large Margin Nearest Neighbor - Classification `_ - Kilian Q. Weinberger, John Blitzer, Lawrence K. Saul + .. [1] K. Q. Weinberger, J. Blitzer, L. K. Saul. `Distance Metric + Learning for Large Margin Nearest Neighbor Classification + `_. NIPS + 2005. """ - def __init__(self, init=None, k=3, min_iter=50, max_iter=1000, + def __init__(self, init='auto', n_neighbors=3, min_iter=50, max_iter=1000, learn_rate=1e-7, regularization=0.5, convergence_tol=0.001, - use_pca='deprecated', verbose=False, preprocessor=None, - n_components=None, num_dims='deprecated', random_state=None): + verbose=False, preprocessor=None, + n_components=None, random_state=None, k='deprecated'): self.init = init - self.k = k + if k != 'deprecated': + warnings.warn('"num_chunks" parameter has been renamed to' + ' "n_chunks". It has been deprecated in' + ' version 0.6.3 and will be removed in 0.7.0' + '', FutureWarning) + n_neighbors = k + self.k = 'deprecated' # To avoid no_attribute error + self.n_neighbors = n_neighbors self.min_iter = min_iter self.max_iter = max_iter self.learn_rate = learn_rate self.regularization = regularization self.convergence_tol = convergence_tol - self.use_pca = use_pca self.verbose = verbose self.n_components = n_components - self.num_dims = num_dims self.random_state = random_state super(LMNN, self).__init__(preprocessor) def fit(self, X, y): - if self.num_dims != 'deprecated': - warnings.warn('"num_dims" parameter is not used.' - ' It has been deprecated in version 0.5.0 and will be' - ' removed in 0.6.0. Use "n_components" instead', - DeprecationWarning) - if self.use_pca != 'deprecated': - warnings.warn('"use_pca" parameter is not used.' - ' It has been deprecated in version 0.5.0 and will be' - ' removed in 0.6.0.', - DeprecationWarning) - k = self.k + k = self.n_neighbors reg = self.regularization learn_rate = self.learn_rate @@ -195,24 +168,11 @@ def fit(self, X, y): raise ValueError('Must have one label per point.') self.labels_ = np.arange(len(unique_labels)) - # if the init is the default (None), we raise a warning - if self.init is None: - # TODO: replace init=None by init='auto' in v0.6.0 and remove the warning - msg = ("Warning, no init was set (`init=None`). As of version 0.5.0, " - "the default init will now be set to 'auto', instead of the " - "previous identity matrix. If you still want to use the identity " - "matrix as before, set init='identity'. This warning " - "will disappear in v0.6.0, and `init` parameter's default value " - "will be set to 'auto'.") - warnings.warn(msg, ChangedBehaviorWarning) - init = 'auto' - else: - init = self.init - self.components_ = _initialize_components(output_dim, X, y, init, + self.components_ = _initialize_components(output_dim, X, y, self.init, self.verbose, random_state=self.random_state) required_k = np.bincount(label_inds).min() - if self.k > required_k: + if self.n_neighbors > required_k: raise ValueError('not enough class labels for specified k' ' (smallest class has %d)' % required_k) @@ -233,8 +193,12 @@ def fit(self, X, y): it = 1 # we already made one iteration + if self.verbose: + print("iter | objective | objective difference | active constraints", + "| learning rate") + # main loop - for it in xrange(2, self.max_iter): + for it in range(2, self.max_iter): # then at each iteration, we try to find a value of L that has better # objective than the previous L, following the gradient: while True: @@ -244,8 +208,8 @@ def fit(self, X, y): # we copy variables that can be modified by _loss_grad, because if we # retry we don t want to modify them several times (G_next, objective_next, total_active_next) = ( - self._loss_grad(X, L_next, dfG, k, reg, target_neighbors, - label_inds)) + self._loss_grad(X, L_next, dfG, k, reg, target_neighbors, + label_inds)) assert not np.isnan(objective) delta_obj = objective_next - objective if delta_obj > 0: @@ -298,7 +262,7 @@ def _loss_grad(self, X, L, dfG, k, reg, target_neighbors, label_inds): # compute the gradient total_active = 0 df = np.zeros((X.shape[1], X.shape[1])) - for nn_idx in reversed(xrange(k)): # note: reverse not useful here + for nn_idx in reversed(range(k)): # note: reverse not useful here act1 = g0 < g1[:, nn_idx] act2 = g0 < g2[:, nn_idx] total_active += act1.sum() + act2.sum() @@ -321,12 +285,12 @@ def _loss_grad(self, X, L, dfG, k, reg, target_neighbors, label_inds): return 2 * G, objective, total_active def _select_targets(self, X, label_inds): - target_neighbors = np.empty((X.shape[0], self.k), dtype=int) + target_neighbors = np.empty((X.shape[0], self.n_neighbors), dtype=int) for label in self.labels_: inds, = np.nonzero(label_inds == label) dd = euclidean_distances(X[inds], squared=True) np.fill_diagonal(dd, np.inf) - nn = np.argsort(dd)[..., :self.k] + nn = np.argsort(dd)[..., :self.n_neighbors] target_neighbors[inds] = inds[nn] return target_neighbors @@ -338,15 +302,15 @@ def _find_impostors(self, furthest_neighbors, X, label_inds, L): in_inds, = np.nonzero(label_inds == label) out_inds, = np.nonzero(label_inds > label) dist = euclidean_distances(Lx[out_inds], Lx[in_inds], squared=True) - i1,j1 = np.nonzero(dist < margin_radii[out_inds][:,None]) - i2,j2 = np.nonzero(dist < margin_radii[in_inds]) - i = np.hstack((i1,i2)) - j = np.hstack((j1,j2)) + i1, j1 = np.nonzero(dist < margin_radii[out_inds][:, None]) + i2, j2 = np.nonzero(dist < margin_radii[in_inds]) + i = np.hstack((i1, i2)) + j = np.hstack((j1, j2)) if i.size > 0: # get unique (i,j) pairs using index trickery - shape = (i.max()+1, j.max()+1) - tmp = np.ravel_multi_index((i,j), shape) - i,j = np.unravel_index(np.unique(tmp), shape) + shape = (i.max() + 1, j.max() + 1) + tmp = np.ravel_multi_index((i, j), shape) + i, j = np.unravel_index(np.unique(tmp), shape) impostors.append(np.vstack((in_inds[j], out_inds[i]))) if len(impostors) == 0: # No impostors detected @@ -361,19 +325,19 @@ def _inplace_paired_L2(A, B): def _count_edges(act1, act2, impostors, targets): - imp = impostors[0,act1] + imp = impostors[0, act1] c = Counter(zip(imp, targets[imp])) - imp = impostors[1,act2] + imp = impostors[1, act2] c.update(zip(imp, targets[imp])) if c: active_pairs = np.array(list(c.keys())) else: - active_pairs = np.empty((0,2), dtype=int) + active_pairs = np.empty((0, 2), dtype=int) return active_pairs, np.array(list(c.values())) def _sum_outer_products(data, a_inds, b_inds, weights=None): Xab = data[a_inds] - data[b_inds] if weights is not None: - return np.dot(Xab.T, Xab * weights[:,None]) + return np.dot(Xab.T, Xab * weights[:, None]) return np.dot(Xab.T, Xab) diff --git a/metric_learn/lsml.py b/metric_learn/lsml.py index 340e6bf2..af7fa95b 100644 --- a/metric_learn/lsml.py +++ b/metric_learn/lsml.py @@ -2,24 +2,21 @@ Metric Learning from Relative Comparisons by Minimizing Squared Residual (LSML) """ -from __future__ import print_function, absolute_import, division -import warnings import numpy as np import scipy.linalg -from six.moves import xrange from sklearn.base import TransformerMixin -from sklearn.exceptions import ChangedBehaviorWarning from .base_metric import _QuadrupletsClassifierMixin, MahalanobisMixin from .constraints import Constraints from ._util import components_from_metric, _initialize_metric_mahalanobis +import warnings class _BaseLSML(MahalanobisMixin): _tuple_size = 4 # constraints are quadruplets - def __init__(self, tol=1e-3, max_iter=1000, prior=None, + def __init__(self, tol=1e-3, max_iter=1000, prior='identity', verbose=False, preprocessor=None, random_state=None): self.prior = prior self.tol = tol @@ -42,22 +39,10 @@ def _fit(self, quadruplets, weights=None): else: self.w_ = weights self.w_ /= self.w_.sum() # weights must sum to 1 - # if the prior is the default (None), we raise a warning - if self.prior is None: - msg = ("Warning, no prior was set (`prior=None`). As of version 0.5.0, " - "the default prior will now be set to " - "'identity', instead of 'covariance'. If you still want to use " - "the inverse of the covariance matrix as a prior, " - "set prior='covariance'. This warning will disappear in " - "v0.6.0, and `prior` parameter's default value will be set to " - "'identity'.") - warnings.warn(msg, ChangedBehaviorWarning) - prior = 'identity' - else: - prior = self.prior - M, prior_inv = _initialize_metric_mahalanobis(quadruplets, prior, - return_inverse=True, strict_pd=True, matrix_name='prior', - random_state=self.random_state) + M, prior_inv = _initialize_metric_mahalanobis( + quadruplets, self.prior, + return_inverse=True, strict_pd=True, matrix_name='prior', + random_state=self.random_state) step_sizes = np.logspace(-10, 0, 10) # Keep track of the best step size and the loss at that step. @@ -65,7 +50,7 @@ def _fit(self, quadruplets, weights=None): s_best = self._total_loss(M, vab, vcd, prior_inv) if self.verbose: print('initial loss', s_best) - for it in xrange(1, self.max_iter+1): + for it in range(1, self.max_iter + 1): grad = self._gradient(M, vab, vcd, prior_inv) grad_norm = scipy.linalg.norm(grad) if grad_norm < self.tol: @@ -117,8 +102,8 @@ def _gradient(self, metric, vab, vcd, prior_inv): # TODO: vectorize for vab, dab, vcd, dcd in zip(vab[violations], dabs[violations], vcd[violations], dcds[violations]): - dMetric += ((1-np.sqrt(dcd/dab))*np.outer(vab, vab) + - (1-np.sqrt(dab/dcd))*np.outer(vcd, vcd)) + dMetric += ((1 - np.sqrt(dcd / dab)) * np.outer(vab, vab) + + (1 - np.sqrt(dab / dcd)) * np.outer(vcd, vcd)) return dMetric @@ -138,60 +123,66 @@ class LSML(_BaseLSML, _QuadrupletsClassifierMixin): Parameters ---------- - prior : None, string or numpy array, optional (default=None) - Prior to set for the metric. Possible options are - 'identity', 'covariance', 'random', and a numpy array of - shape (n_features, n_features). For LSML, the prior should be strictly - positive definite (PD). If `None`, will be set - automatically to 'identity' (this is to raise a warning if - `prior` is not set, and stays to its default value (None), in v0.5.0). - - 'identity' - An identity matrix of shape (n_features, n_features). - - 'covariance' - The inverse covariance matrix. - - 'random' - The initial Mahalanobis matrix will be a random positive definite - (PD) matrix of shape `(n_features, n_features)`, generated using - `sklearn.datasets.make_spd_matrix`. - - numpy array - A positive definite (PD) matrix of shape - (n_features, n_features), that will be used as such to set the - prior. - - tol : float, optional - max_iter : int, optional - verbose : bool, optional - if True, prints information while learning + prior : string or numpy array, optional (default='identity') + Prior to set for the metric. Possible options are + 'identity', 'covariance', 'random', and a numpy array of + shape (n_features, n_features). For LSML, the prior should be strictly + positive definite (PD). + + 'identity' + An identity matrix of shape (n_features, n_features). + + 'covariance' + The inverse covariance matrix. + + 'random' + The initial Mahalanobis matrix will be a random positive definite + (PD) matrix of shape `(n_features, n_features)`, generated using + `sklearn.datasets.make_spd_matrix`. + + numpy array + A positive definite (PD) matrix of shape + (n_features, n_features), that will be used as such to set the + prior. + + tol : float, optional (default=1e-3) + Convergence tolerance of the optimization procedure. + + max_iter : int, optional (default=1000) + Maximum number of iteration of the optimization procedure. + + verbose : bool, optional (default=False) + If True, prints information while learning + preprocessor : array-like, shape=(n_samples, n_features) or callable - The preprocessor to call to get tuples from indices. If array-like, - tuples will be formed like this: X[indices]. + The preprocessor to call to get tuples from indices. If array-like, + tuples will be formed like this: X[indices]. + random_state : int or numpy.RandomState or None, optional (default=None) - A pseudo random number generator object or a seed for it if int. If - ``init='random'``, ``random_state`` is used to set the random - prior. + A pseudo random number generator object or a seed for it if int. If + ``init='random'``, ``random_state`` is used to set the random + prior. Attributes ---------- n_iter_ : `int` - The number of iterations the solver has run. + The number of iterations the solver has run. components_ : `numpy.ndarray`, shape=(n_features, n_features) - The linear transformation ``L`` deduced from the learned Mahalanobis - metric (See function `components_from_metric`.) + The linear transformation ``L`` deduced from the learned Mahalanobis + metric (See function `components_from_metric`.) Examples -------- - >>> from metric_learn import LSML_Supervised - >>> from sklearn.datasets import load_iris - >>> iris_data = load_iris() - >>> X = iris_data['data'] - >>> Y = iris_data['target'] - >>> lsml = LSML_Supervised(num_constraints=200) - >>> lsml.fit(X, Y) + >>> from metric_learn import LSML + >>> quadruplets = [[[1.2, 7.5], [1.3, 1.5], [6.4, 2.6], [6.2, 9.7]], + >>> [[1.3, 4.5], [3.2, 4.6], [6.2, 5.5], [5.4, 5.4]], + >>> [[3.2, 7.5], [3.3, 1.5], [8.4, 2.6], [8.2, 9.7]], + >>> [[3.3, 4.5], [5.2, 4.6], [8.2, 5.5], [7.4, 5.4]]] + >>> # we want to make closer points where the first feature is close, and + >>> # further if the second feature is close + >>> lsml = LSML() + >>> lsml.fit(quadruplets) References ---------- @@ -199,7 +190,7 @@ class LSML(_BaseLSML, _QuadrupletsClassifierMixin): Squared Residual `_. ICDM 2012. - .. [2] Adapted from https://gist.github.com/kcarnold/5439917 + .. [2] Code adapted from https://gist.github.com/kcarnold/5439917 See Also -------- @@ -216,18 +207,19 @@ def fit(self, quadruplets, weights=None): ---------- quadruplets : array-like, shape=(n_constraints, 4, n_features) or \ (n_constraints, 4) - 3D array-like of quadruplets of points or 2D array of quadruplets of - indicators. In order to supervise the algorithm in the right way, we - should have the four samples ordered in a way such that: - d(pairs[i, 0],X[i, 1]) < d(X[i, 2], X[i, 3]) for all 0 <= i < - n_constraints. + 3D array-like of quadruplets of points or 2D array of quadruplets of + indicators. In order to supervise the algorithm in the right way, we + should have the four samples ordered in a way such that: + d(pairs[i, 0],X[i, 1]) < d(X[i, 2], X[i, 3]) for all 0 <= i < + n_constraints. + weights : (n_constraints,) array of floats, optional - scale factor for each constraint + scale factor for each constraint Returns ------- self : object - Returns the instance. + Returns the instance. """ return self._fit(quadruplets, weights=weights) @@ -243,112 +235,114 @@ class LSML_Supervised(_BaseLSML, TransformerMixin): Parameters ---------- tol : float, optional (default=1e-3) - Tolerance for the convergence procedure. + Convergence tolerance of the optimization procedure. + max_iter : int, optional (default=1000) - Number of maximum iterations of the convergence procedure. - prior : None, string or numpy array, optional (default=None) - Prior to set for the metric. Possible options are - 'identity', 'covariance', 'random', and a numpy array of - shape (n_features, n_features). For LSML, the prior should be strictly - positive definite (PD). If `None`, will be set - automatically to 'identity' (this is to raise a warning if - `prior` is not set, and stays to its default value (None), in v0.5.0). - - 'identity' - An identity matrix of shape (n_features, n_features). - - 'covariance' - The inverse covariance matrix. - - 'random' - The initial Mahalanobis matrix will be a random positive definite - (PD) matrix of shape `(n_features, n_features)`, generated using - `sklearn.datasets.make_spd_matrix`. - - numpy array - A positive definite (PD) matrix of shape - (n_features, n_features), that will be used as such to set the - prior. - num_labeled : Not used - .. deprecated:: 0.5.0 - `num_labeled` was deprecated in version 0.5.0 and will - be removed in 0.6.0. - num_constraints: int, optional - number of constraints to generate - weights : (m,) array of floats, optional - scale factor for each constraint - verbose : bool, optional - if True, prints information while learning + Number of maximum iterations of the optimization procedure. + + prior : string or numpy array, optional (default='identity') + Prior to set for the metric. Possible options are + 'identity', 'covariance', 'random', and a numpy array of + shape (n_features, n_features). For LSML, the prior should be strictly + positive definite (PD). + + 'identity' + An identity matrix of shape (n_features, n_features). + + 'covariance' + The inverse covariance matrix. + + 'random' + The initial Mahalanobis matrix will be a random positive definite + (PD) matrix of shape `(n_features, n_features)`, generated using + `sklearn.datasets.make_spd_matrix`. + + numpy array + A positive definite (PD) matrix of shape + (n_features, n_features), that will be used as such to set the + prior. + + n_constraints: int, optional (default=None) + Number of constraints to generate. If None, default to `20 * + num_classes**2`. + + weights : (n_constraints,) array of floats, optional (default=None) + Relative weight given to each constraint. If None, defaults to uniform + weights. + + verbose : bool, optional (default=False) + If True, prints information while learning + preprocessor : array-like, shape=(n_samples, n_features) or callable - The preprocessor to call to get tuples from indices. If array-like, - tuples will be formed like this: X[indices]. + The preprocessor to call to get tuples from indices. If array-like, + tuples will be formed like this: X[indices]. + random_state : int or numpy.RandomState or None, optional (default=None) - A pseudo random number generator object or a seed for it if int. If - ``init='random'``, ``random_state`` is used to set the random - prior. In any case, `random_state` is also used to randomly sample - constraints from labels. + A pseudo random number generator object or a seed for it if int. If + ``init='random'``, ``random_state`` is used to set the random + prior. In any case, `random_state` is also used to randomly sample + constraints from labels. + + num_constraints : Renamed to n_constraints. Will be deprecated in 0.7.0 + + Examples + -------- + >>> from metric_learn import LSML_Supervised + >>> from sklearn.datasets import load_iris + >>> iris_data = load_iris() + >>> X = iris_data['data'] + >>> Y = iris_data['target'] + >>> lsml = LSML_Supervised(n_constraints=200) + >>> lsml.fit(X, Y) Attributes ---------- n_iter_ : `int` - The number of iterations the solver has run. + The number of iterations the solver has run. components_ : `numpy.ndarray`, shape=(n_features, n_features) - The linear transformation ``L`` deduced from the learned Mahalanobis - metric (See function `components_from_metric`.) + The linear transformation ``L`` deduced from the learned Mahalanobis + metric (See function `components_from_metric`.) """ - def __init__(self, tol=1e-3, max_iter=1000, prior=None, - num_labeled='deprecated', num_constraints=None, weights=None, - verbose=False, preprocessor=None, random_state=None): + def __init__(self, tol=1e-3, max_iter=1000, prior='identity', + n_constraints=None, weights=None, + verbose=False, preprocessor=None, random_state=None, + num_constraints='deprecated'): _BaseLSML.__init__(self, tol=tol, max_iter=max_iter, prior=prior, verbose=verbose, preprocessor=preprocessor, random_state=random_state) - self.num_labeled = num_labeled - self.num_constraints = num_constraints + if num_constraints != 'deprecated': + warnings.warn('"num_constraints" parameter has been renamed to' + ' "n_constraints". It has been deprecated in' + ' version 0.6.3 and will be removed in 0.7.0' + '', FutureWarning) + self.n_constraints = num_constraints + else: + self.n_constraints = n_constraints + # Avoid test get_params from failing (all params passed sholud be set) + self.num_constraints = 'deprecated' self.weights = weights - def fit(self, X, y, random_state='deprecated'): + def fit(self, X, y): """Create constraints from labels and learn the LSML model. Parameters ---------- X : (n x d) matrix - Input data, where each row corresponds to a single instance. + Input data, where each row corresponds to a single instance. y : (n) array-like - Data labels. - - random_state : Not used - .. deprecated:: 0.5.0 - `random_state` in the `fit` function was deprecated in version 0.5.0 - and will be removed in 0.6.0. Set `random_state` at initialization - instead (when instantiating a new `LSML_Supervised` object). + Data labels. """ - if self.num_labeled != 'deprecated': - warnings.warn('"num_labeled" parameter is not used.' - ' It has been deprecated in version 0.5.0 and will be' - ' removed in 0.6.0', DeprecationWarning) - if random_state != 'deprecated': - warnings.warn('"random_state" parameter in the `fit` function is ' - 'deprecated. Set `random_state` at initialization ' - 'instead (when instantiating a new `LSML_Supervised` ' - 'object).', DeprecationWarning) - else: - warnings.warn('As of v0.5.0, `LSML_Supervised` now uses the ' - '`random_state` given at initialization to sample ' - 'constraints, not the default `np.random` from the `fit` ' - 'method, since this argument is now deprecated. ' - 'This warning will disappear in v0.6.0.', - ChangedBehaviorWarning) X, y = self._prepare_inputs(X, y, ensure_min_samples=2) - num_constraints = self.num_constraints - if num_constraints is None: + n_constraints = self.n_constraints + if n_constraints is None: num_classes = len(np.unique(y)) - num_constraints = 20 * num_classes**2 + n_constraints = 20 * num_classes**2 c = Constraints(y) - pos_neg = c.positive_negative_pairs(num_constraints, same_length=True, + pos_neg = c.positive_negative_pairs(n_constraints, same_length=True, random_state=self.random_state) return _BaseLSML._fit(self, X[np.column_stack(pos_neg)], weights=self.weights) diff --git a/metric_learn/mlkr.py b/metric_learn/mlkr.py index 471694b6..01d185e7 100644 --- a/metric_learn/mlkr.py +++ b/metric_learn/mlkr.py @@ -1,21 +1,18 @@ """ Metric Learning for Kernel Regression (MLKR) """ -from __future__ import division, print_function import time import sys import warnings import numpy as np -from sklearn.exceptions import ConvergenceWarning, ChangedBehaviorWarning -from sklearn.utils.fixes import logsumexp from scipy.optimize import minimize +from scipy.special import logsumexp from sklearn.base import TransformerMixin - +from sklearn.exceptions import ConvergenceWarning from sklearn.metrics import pairwise_distances -from metric_learn._util import _check_n_components from .base_metric import MahalanobisMixin -from ._util import _initialize_components +from ._util import _initialize_components, _check_n_components EPS = np.finfo(float).eps @@ -33,78 +30,65 @@ class MLKR(MahalanobisMixin, TransformerMixin): Parameters ---------- n_components : int or None, optional (default=None) - Dimensionality of reduced space (if None, defaults to dimension of X). - - num_dims : Not used - - .. deprecated:: 0.5.0 - `num_dims` was deprecated in version 0.5.0 and will - be removed in 0.6.0. Use `n_components` instead. - - init : None, string or numpy array, optional (default=None) - Initialization of the linear transformation. Possible options are - 'auto', 'pca', 'identity', 'random', and a numpy array of shape - (n_features_a, n_features_b). If None, will be set automatically to - 'auto' (this option is to raise a warning if 'init' is not set, - and stays to its default value None, in v0.5.0). - - 'auto' - Depending on ``n_components``, the most reasonable initialization - will be chosen. If ``n_components < min(n_features, n_samples)``, - we use 'pca', as it projects data in meaningful directions (those - of higher variance). Otherwise, we just use 'identity'. - - 'pca' - ``n_components`` principal components of the inputs passed - to :meth:`fit` will be used to initialize the transformation. - (See `sklearn.decomposition.PCA`) - - 'identity' - If ``n_components`` is strictly smaller than the - dimensionality of the inputs passed to :meth:`fit`, the identity - matrix will be truncated to the first ``n_components`` rows. - - 'random' - The initial transformation will be a random array of shape - `(n_components, n_features)`. Each value is sampled from the - standard normal distribution. - - numpy array - n_features_b must match the dimensionality of the inputs passed to - :meth:`fit` and n_features_a must be less than or equal to that. - If ``n_components`` is not None, n_features_a must match it. - - A0: Not used. - .. deprecated:: 0.5.0 - `A0` was deprecated in version 0.5.0 and will - be removed in 0.6.0. Use 'init' instead. - - tol: float, optional (default=None) - Convergence tolerance for the optimization. - - max_iter: int, optional - Cap on number of conjugate gradient iterations. + Dimensionality of reduced space (if None, defaults to dimension of X). + + init : string or numpy array, optional (default='auto') + Initialization of the linear transformation. Possible options are + 'auto', 'pca', 'identity', 'random', and a numpy array of shape + (n_features_a, n_features_b). + + 'auto' + Depending on ``n_components``, the most reasonable initialization + will be chosen. If ``n_components < min(n_features, n_samples)``, + we use 'pca', as it projects data in meaningful directions (those + of higher variance). Otherwise, we just use 'identity'. + + 'pca' + ``n_components`` principal components of the inputs passed + to :meth:`fit` will be used to initialize the transformation. + (See `sklearn.decomposition.PCA`) + + 'identity' + If ``n_components`` is strictly smaller than the + dimensionality of the inputs passed to :meth:`fit`, the identity + matrix will be truncated to the first ``n_components`` rows. + + 'random' + The initial transformation will be a random array of shape + `(n_components, n_features)`. Each value is sampled from the + standard normal distribution. + + numpy array + n_features_b must match the dimensionality of the inputs passed to + :meth:`fit` and n_features_a must be less than or equal to that. + If ``n_components`` is not None, n_features_a must match it. + + tol : float, optional (default=None) + Convergence tolerance for the optimization. + + max_iter : int, optional (default=1000) + Cap on number of conjugate gradient iterations. verbose : bool, optional (default=False) - Whether to print progress messages or not. + Whether to print progress messages or not. preprocessor : array-like, shape=(n_samples, n_features) or callable - The preprocessor to call to get tuples from indices. If array-like, - tuples will be formed like this: X[indices]. + The preprocessor to call to get tuples from indices. If array-like, + tuples will be formed like this: X[indices]. random_state : int or numpy.RandomState or None, optional (default=None) - A pseudo random number generator object or a seed for it if int. If - ``init='random'``, ``random_state`` is used to initialize the random - transformation. If ``init='pca'``, ``random_state`` is passed as an - argument to PCA when initializing the transformation. + A pseudo random number generator object or a seed for it if int. If + ``init='random'``, ``random_state`` is used to initialize the random + transformation. If ``init='pca'``, ``random_state`` is passed as an + argument to PCA when initializing the transformation. Attributes ---------- n_iter_ : `int` - The number of iterations the solver has run. + The number of iterations the solver has run. components_ : `numpy.ndarray`, shape=(n_components, n_features) - The learned linear transformation ``L``. + The learned linear transformation ``L``. Examples -------- @@ -119,18 +103,16 @@ class MLKR(MahalanobisMixin, TransformerMixin): References ---------- - .. [1] `Information-theoretic Metric Learning - `_ Jason V. Davis, et al. + .. [1] K.Q. Weinberger and G. Tesauto. `Metric Learning for Kernel + Regression `_. AISTATS 2007. """ - def __init__(self, n_components=None, num_dims='deprecated', init=None, - A0='deprecated', tol=None, max_iter=1000, verbose=False, + def __init__(self, n_components=None, init='auto', + tol=None, max_iter=1000, verbose=False, preprocessor=None, random_state=None): self.n_components = n_components - self.num_dims = num_dims self.init = init - self.A0 = A0 self.tol = tol self.max_iter = max_iter self.verbose = verbose @@ -146,18 +128,6 @@ def fit(self, X, y): X : (n x d) array of samples y : (n) data labels """ - if self.A0 != 'deprecated': - warnings.warn('"A0" parameter is not used.' - ' It has been deprecated in version 0.5.0 and will be' - 'removed in 0.6.0. Use "init" instead.', - DeprecationWarning) - - if self.num_dims != 'deprecated': - warnings.warn('"num_dims" parameter is not used.' - ' It has been deprecated in version 0.5.0 and will be' - ' removed in 0.6.0. Use "n_components" instead', - DeprecationWarning) - X, y = self._prepare_inputs(X, y, y_numeric=True, ensure_min_samples=2) n, d = X.shape @@ -170,19 +140,7 @@ def fit(self, X, y): if m is None: m = d # if the init is the default (None), we raise a warning - if self.init is None: - # TODO: - # replace init=None by init='auto' in v0.6.0 and remove the warning - msg = ("Warning, no init was set (`init=None`). As of version 0.5.0, " - "the default init will now be set to 'auto', instead of 'pca'. " - "If you still want to use PCA as an init, set init='pca'. " - "This warning will disappear in v0.6.0, and `init` parameter's" - " default value will be set to 'auto'.") - warnings.warn(msg, ChangedBehaviorWarning) - init = 'auto' - else: - init = self.init - A = _initialize_components(m, X, y, init=init, + A = _initialize_components(m, X, y, init=self.init, random_state=self.random_state, # MLKR works on regression targets: has_classes=False) diff --git a/metric_learn/mmc.py b/metric_learn/mmc.py index c8c52b24..5cf166fd 100644 --- a/metric_learn/mmc.py +++ b/metric_learn/mmc.py @@ -1,30 +1,34 @@ """Mahalanobis Metric for Clustering (MMC)""" -from __future__ import print_function, absolute_import, division -import warnings import numpy as np -from six.moves import xrange from sklearn.base import TransformerMixin from sklearn.utils.validation import assert_all_finite -from sklearn.exceptions import ChangedBehaviorWarning from .base_metric import _PairsClassifierMixin, MahalanobisMixin from .constraints import Constraints, wrap_pairs from ._util import components_from_metric, _initialize_metric_mahalanobis +import warnings class _BaseMMC(MahalanobisMixin): _tuple_size = 2 # constraints are pairs - def __init__(self, max_iter=100, max_proj=10000, convergence_threshold=1e-3, - init=None, A0='deprecated', diagonal=False, + def __init__(self, max_iter=100, max_proj=10000, tol=1e-3, + init='identity', diagonal=False, diagonal_c=1.0, verbose=False, preprocessor=None, - random_state=None): + random_state=None, + convergence_threshold='deprecated'): + if convergence_threshold != 'deprecated': + warnings.warn('"convergence_threshold" parameter has been ' + ' renamed to "tol". It has been deprecated in' + ' version 0.6.3 and will be removed in 0.7.0' + '', FutureWarning) + tol = convergence_threshold + self.convergence_threshold = 'deprecated' # Avoid errors self.max_iter = max_iter self.max_proj = max_proj - self.convergence_threshold = convergence_threshold + self.tol = tol self.init = init - self.A0 = A0 self.diagonal = diagonal self.diagonal_c = diagonal_c self.verbose = verbose @@ -32,30 +36,10 @@ def __init__(self, max_iter=100, max_proj=10000, convergence_threshold=1e-3, super(_BaseMMC, self).__init__(preprocessor) def _fit(self, pairs, y): - if self.A0 != 'deprecated': - warnings.warn('"A0" parameter is not used.' - ' It has been deprecated in version 0.5.0 and will be' - 'removed in 0.6.0. Use "init" instead.', - DeprecationWarning) pairs, y = self._prepare_inputs(pairs, y, type_of_inputs='tuples') - if self.init is None: - # TODO: replace init=None by init='auto' in v0.6.0 and remove the warning - msg = ("Warning, no init was set (`init=None`). As of version 0.5.0, " - "the default init will now be set to 'identity', instead of the " - "identity divided by a scaling factor of 10. " - "If you still want to use the same init as in previous " - "versions, set init=np.eye(d)/10, where d is the dimension " - "of your input space (d=pairs.shape[1]). " - "This warning will disappear in v0.6.0, and `init` parameter's" - " default value will be set to 'auto'.") - warnings.warn(msg, ChangedBehaviorWarning) - init = 'identity' - else: - init = self.init - - self.A_ = _initialize_metric_mahalanobis(pairs, init, + self.A_ = _initialize_metric_mahalanobis(pairs, self.init, random_state=self.random_state, matrix_name='init') @@ -70,14 +54,14 @@ def _fit_full(self, pairs, y): Parameters ---------- X : (n x d) data matrix - each row corresponds to a single instance + Each row corresponds to a single instance. constraints : 4-tuple of arrays - (a,b,c,d) indices into X, with (a,b) specifying similar and (c,d) - dissimilar pairs + (a,b,c,d) indices into X, with (a,b) specifying similar and (c,d) + dissimilar pairs. """ num_dim = pairs.shape[2] - error1 = error2 = 1e10 + error2 = 1e10 eps = 0.01 # error-bound of iterative projection on C1 and C2 A = self.A_ @@ -105,16 +89,17 @@ def _fit_full(self, pairs, y): # constraint function grad2 = self._fD1(neg_pairs, A) # gradient of dissimilarity # constraint function - M = self._grad_projection(grad1, grad2) # gradient of fD1 orthogonal to fS1 + # gradient of fD1 orthogonal to fS1: + M = self._grad_projection(grad1, grad2) A_old = A.copy() - for cycle in xrange(self.max_iter): + for cycle in range(self.max_iter): # projection of constraints C1 and C2 satisfy = False - for it in xrange(self.max_proj): + for it in range(self.max_proj): # First constraint: # f(A) = \sum_{i,j \in S} d_ij' A d_ij <= t (1) @@ -133,7 +118,7 @@ def _fit_full(self, pairs, y): # PSD constraint A >= 0 # project A onto domain A>0 l, V = np.linalg.eigh((A + A.T) / 2) - A[:] = np.dot(V * np.maximum(0, l[None,:]), V.T) + A[:] = np.dot(V * np.maximum(0, l[None, :]), V.T) fDC2 = w.dot(A.ravel()) error2 = (fDC2 - t) / t @@ -169,12 +154,13 @@ def _fit_full(self, pairs, y): A[:] = A_old + alpha * M delta = np.linalg.norm(alpha * M) / np.linalg.norm(A_old) - if delta < self.convergence_threshold: + if delta < self.tol: break if self.verbose: - print('mmc iter: %d, conv = %f, projections = %d' % (cycle, delta, it+1)) + print('mmc iter: %d, conv = %f, projections = %d' % + (cycle, delta, it + 1)) - if delta > self.convergence_threshold: + if delta > self.tol: self.converged_ = False if self.verbose: print('mmc did not converge, conv = %f' % (delta,)) @@ -193,10 +179,10 @@ def _fit_diag(self, pairs, y): Parameters ---------- X : (n x d) data matrix - each row corresponds to a single instance + Each row corresponds to a single instance. constraints : 4-tuple of arrays - (a,b,c,d) indices into X, with (a,b) specifying similar and (c,d) - dissimilar pairs + (a,b,c,d) indices into X, with (a,b) specifying similar and (c,d) + dissimilar pairs. """ num_dim = pairs.shape[2] pos_pairs, neg_pairs = pairs[y == 1], pairs[y == -1] @@ -208,14 +194,16 @@ def _fit_diag(self, pairs, y): reduction = 2.0 w = np.diag(self.A_).copy() - while error > self.convergence_threshold and it < self.max_iter: + while error > self.tol and it < self.max_iter: fD0, fD_1st_d, fD_2nd_d = self._D_constraint(neg_pairs, w) obj_initial = np.dot(s_sum, w) + self.diagonal_c * fD0 fS_1st_d = s_sum # first derivative of the similarity constraints - gradient = fS_1st_d - self.diagonal_c * fD_1st_d # gradient of the objective - hessian = -self.diagonal_c * fD_2nd_d + eps * np.eye(num_dim) # Hessian of the objective + # gradient of the objective: + gradient = fS_1st_d - self.diagonal_c * fD_1st_d + # Hessian of the objective: + hessian = -self.diagonal_c * fD_2nd_d + eps * np.eye(num_dim) step = np.dot(np.linalg.inv(hessian), gradient) # Newton-Rapshon update @@ -225,7 +213,7 @@ def _fit_diag(self, pairs, y): obj = (np.dot(s_sum, w_tmp) + self.diagonal_c * self._D_objective(neg_pairs, w_tmp)) assert_all_finite(obj) - obj_previous = obj + 1 # just to get the while-loop started + obj_previous = np.inf # just to get the while-loop started inner_it = 0 while obj < obj_previous: @@ -250,16 +238,17 @@ def _fit_diag(self, pairs, y): return self def _fD(self, neg_pairs, A): - """The value of the dissimilarity constraint function. + r"""The value of the dissimilarity constraint function. f = f(\sum_{ij \in D} distance(x_i, x_j)) i.e. distance can be L1: \sqrt{(x_i-x_j)A(x_i-x_j)'} """ diff = neg_pairs[:, 0, :] - neg_pairs[:, 1, :] - return np.log(np.sum(np.sqrt(np.sum(np.dot(diff, A) * diff, axis=1))) + 1e-6) + return np.log(np.sum(np.sqrt(np.sum(np.dot(diff, A) * diff, axis=1))) + + 1e-6) def _fD1(self, neg_pairs, A): - """The gradient of the dissimilarity constraint function w.r.t. A. + r"""The gradient of the dissimilarity constraint function w.r.t. A. For example, let distance by L1 norm: f = f(\sum_{ij \in D} \sqrt{(x_i-x_j)A(x_i-x_j)'}) @@ -270,19 +259,19 @@ def _fD1(self, neg_pairs, A): df/dA = f'(\sum_{ij \in D} \sqrt{tr(d_ij'*d_ij*A)}) * 0.5*(\sum_{ij \in D} (1/sqrt{tr(d_ij'*d_ij*A)})*(d_ij'*d_ij)) """ - dim = neg_pairs.shape[2] diff = neg_pairs[:, 0, :] - neg_pairs[:, 1, :] # outer products of all rows in `diff` M = np.einsum('ij,ik->ijk', diff, diff) # faster version of: dist = np.sqrt(np.sum(M * A[None,:,:], axis=(1,2))) dist = np.sqrt(np.einsum('ijk,jk', M, A)) - # faster version of: sum_deri = np.sum(M / (2 * (dist[:,None,None] + 1e-6)), axis=0) + # faster version of: sum_deri = np.sum(M / + # (2 * (dist[:,None,None] + 1e-6)), axis=0) sum_deri = np.einsum('ijk,i->jk', M, 0.5 / (dist + 1e-6)) sum_dist = dist.sum() return sum_deri / (sum_dist + 1e-6) def _fS1(self, pos_pairs, A): - """The gradient of the similarity constraint function w.r.t. A. + r"""The gradient of the similarity constraint function w.r.t. A. f = \sum_{ij}(x_i-x_j)A(x_i-x_j)' = \sum_{ij}d_ij*A*d_ij' df/dA = d(d_ij*A*d_ij')/dA @@ -290,9 +279,9 @@ def _fS1(self, pos_pairs, A): Note that d_ij*A*d_ij' = tr(d_ij*A*d_ij') = tr(d_ij'*d_ij*A) so, d(d_ij*A*d_ij')/dA = d_ij'*d_ij """ - dim = pos_pairs.shape[2] diff = pos_pairs[:, 0, :] - pos_pairs[:, 1, :] - return np.einsum('ij,ik->jk', diff, diff) # sum of outer products of all rows in `diff` + # sum of outer products of all rows in `diff`: + return np.einsum('ij,ik->jk', diff, diff) def _grad_projection(self, grad1, grad2): grad2 = grad2 / np.linalg.norm(grad2) @@ -303,7 +292,7 @@ def _grad_projection(self, grad1, grad2): def _D_objective(self, neg_pairs, w): return np.log(np.sum(np.sqrt(np.sum(((neg_pairs[:, 0, :] - neg_pairs[:, 1, :]) ** 2) * - w[None,:], axis=1) + 1e-6))) + w[None, :], axis=1) + 1e-6))) def _D_constraint(self, neg_pairs, w): """Compute the value, 1st derivative, second derivative (Hessian) of @@ -317,13 +306,14 @@ def _D_constraint(self, neg_pairs, w): sum_deri2 = np.einsum( 'ij,ik->jk', diff_sq, - diff_sq / (-4 * np.maximum(1e-6, dist**3))[:,None] + diff_sq / (-4 * np.maximum(1e-6, dist**3))[:, None] ) sum_dist = dist.sum() return ( - np.log(sum_dist), - sum_deri1 / sum_dist, - sum_deri2 / sum_dist - np.outer(sum_deri1, sum_deri1) / (sum_dist * sum_dist) + np.log(sum_dist), + sum_deri1 / sum_dist, + sum_deri2 / sum_dist - + np.outer(sum_deri1, sum_deri1) / (sum_dist * sum_dist) ) @@ -346,94 +336,92 @@ class MMC(_BaseMMC, _PairsClassifierMixin): Parameters ---------- max_iter : int, optional (default=100) - Maximum number of iterations of the convergence procedure. + Maximum number of iterations of the optimization procedure. max_proj : int, optional (default=10000) - Maximum number of projection steps. + Maximum number of projection steps. - convergence_threshold : float, optional (default=1e-6) - Convergence threshold for the convergence procedure. + tol : float, optional (default=1e-3) + Convergence threshold for the optimization procedure. - init : None, string or numpy array, optional (default=None) - Initialization of the Mahalanobis matrix. Possible options are - 'identity', 'covariance', 'random', and a numpy array of - shape (n_features, n_features). If None, will be set - automatically to 'identity' (this is to raise a warning if - 'init' is not set, and stays to its default value (None), in v0.5.0). + init : string or numpy array, optional (default='identity') + Initialization of the Mahalanobis matrix. Possible options are + 'identity', 'covariance', 'random', and a numpy array of + shape (n_features, n_features). - 'identity' - An identity matrix of shape (n_features, n_features). + 'identity' + An identity matrix of shape (n_features, n_features). - 'covariance' - The (pseudo-)inverse of the covariance matrix. + 'covariance' + The (pseudo-)inverse of the covariance matrix. - 'random' - The initial Mahalanobis matrix will be a random SPD matrix of - shape - `(n_features, n_features)`, generated using - `sklearn.datasets.make_spd_matrix`. + 'random' + The initial Mahalanobis matrix will be a random SPD matrix of + shape + `(n_features, n_features)`, generated using + `sklearn.datasets.make_spd_matrix`. - numpy array - An SPD matrix of shape (n_features, n_features), that will - be used as such to initialize the metric. + numpy array + An SPD matrix of shape (n_features, n_features), that will + be used as such to initialize the metric. - verbose : bool, optional - if True, prints information while learning + diagonal : bool, optional (default=False) + If True, a diagonal metric will be learned, + i.e., a simple scaling of dimensions. The initialization will then + be the diagonal coefficients of the matrix given as 'init'. + + diagonal_c : float, optional (default=1.0) + Weight of the dissimilarity constraint for diagonal + metric learning. Ignored if ``diagonal=False``. + + verbose : bool, optional (default=False) + If True, prints information while learning preprocessor : array-like, shape=(n_samples, n_features) or callable - The preprocessor to call to get tuples from indices. If array-like, - tuples will be gotten like this: X[indices]. - A0 : Not used. - .. deprecated:: 0.5.0 - `A0` was deprecated in version 0.5.0 and will - be removed in 0.6.0. Use 'init' instead. - diagonal : bool, optional - if True, a diagonal metric will be learned, - i.e., a simple scaling of dimensions. The initialization will then - be the diagonal coefficients of the matrix given as 'init'. - diagonal_c : float, optional - weight of the dissimilarity constraint for diagonal - metric learning - verbose : bool, optional - if True, prints information while learning - preprocessor : array-like, shape=(n_samples, n_features) or callable - The preprocessor to call to get tuples from indices. If array-like, - tuples will be gotten like this: X[indices]. + The preprocessor to call to get tuples from indices. If array-like, + tuples will be gotten like this: X[indices]. + random_state : int or numpy.RandomState or None, optional (default=None) - A pseudo random number generator object or a seed for it if int. If - ``init='random'``, ``random_state`` is used to initialize the random - transformation. + A pseudo random number generator object or a seed for it if int. If + ``init='random'``, ``random_state`` is used to initialize the random + transformation. + + convergence_threshold : Renamed to tol. Will be deprecated in 0.7.0 Attributes ---------- n_iter_ : `int` - The number of iterations the solver has run. + The number of iterations the solver has run. components_ : `numpy.ndarray`, shape=(n_features, n_features) - The linear transformation ``L`` deduced from the learned Mahalanobis - metric (See function `components_from_metric`.) + The linear transformation ``L`` deduced from the learned Mahalanobis + metric (See function `components_from_metric`.) threshold_ : `float` - If the distance metric between two points is lower than this threshold, - points will be classified as similar, otherwise they will be - classified as dissimilar. + If the distance metric between two points is lower than this threshold, + points will be classified as similar, otherwise they will be + classified as dissimilar. Examples -------- - >>> from metric_learn import MMC_Supervised - >>> from sklearn.datasets import load_iris - >>> iris_data = load_iris() - >>> X = iris_data['data'] - >>> Y = iris_data['target'] - >>> mmc = MMC_Supervised(num_constraints=200) - >>> mmc.fit(X, Y) + >>> from metric_learn import MMC + >>> pairs = [[[1.2, 7.5], [1.3, 1.5]], + >>> [[6.4, 2.6], [6.2, 9.7]], + >>> [[1.3, 4.5], [3.2, 4.6]], + >>> [[6.2, 5.5], [5.4, 5.4]]] + >>> y = [1, 1, -1, -1] + >>> # in this task we want points where the first feature is close to be + >>> # closer to each other, no matter how close the second feature is + >>> mmc = MMC() + >>> mmc.fit(pairs, y) References ---------- - .. [1] `Distance metric learning with application to clustering with - side-information `_ - Xing, Jordan, Russell, Ng. + .. [1] Xing, Jordan, Russell, Ng. `Distance metric learning with application + to clustering with side-information + `_. + NIPS 2002. See Also -------- @@ -452,19 +440,22 @@ def fit(self, pairs, y, calibration_params=None): ---------- pairs : array-like, shape=(n_constraints, 2, n_features) or \ (n_constraints, 2) - 3D Array of pairs with each row corresponding to two points, - or 2D array of indices of pairs if the metric learner uses a - preprocessor. + 3D Array of pairs with each row corresponding to two points, + or 2D array of indices of pairs if the metric learner uses a + preprocessor. + y : array-like, of shape (n_constraints,) - Labels of constraints. Should be -1 for dissimilar pair, 1 for similar. + Labels of constraints. Should be -1 for dissimilar pair, 1 for similar. + calibration_params : `dict` or `None` - Dictionary of parameters to give to `calibrate_threshold` for the - threshold calibration step done at the end of `fit`. If `None` is - given, `calibrate_threshold` will use the default parameters. + Dictionary of parameters to give to `calibrate_threshold` for the + threshold calibration step done at the end of `fit`. If `None` is + given, `calibrate_threshold` will use the default parameters. + Returns ------- self : object - Returns the instance. + Returns the instance. """ calibration_params = (calibration_params if calibration_params is not None else dict()) @@ -483,129 +474,128 @@ class MMC_Supervised(_BaseMMC, TransformerMixin): Parameters ---------- - max_iter : int, optional - max_proj : int, optional - convergence_threshold : float, optional - num_labeled : Not used - .. deprecated:: 0.5.0 - `num_labeled` was deprecated in version 0.5.0 and will - be removed in 0.6.0. - num_constraints: int, optional - number of constraints to generate - init : None, string or numpy array, optional (default=None) - Initialization of the Mahalanobis matrix. Possible options are - 'identity', 'covariance', 'random', and a numpy array of - shape (n_features, n_features). If None, will be set - automatically to 'identity' (this is to raise a warning if - 'init' is not set, and stays to its default value (None), in v0.5.0). - - 'identity' - An identity matrix of shape (n_features, n_features). - - 'covariance' - The (pseudo-)inverse of the covariance matrix. - - 'random' - The initial Mahalanobis matrix will be a random SPD matrix of - shape `(n_features, n_features)`, generated using - `sklearn.datasets.make_spd_matrix`. - - numpy array - A numpy array of shape (n_features, n_features), that will - be used as such to initialize the metric. - - verbose : bool, optional - if True, prints information while learning + max_iter : int, optional (default=100) + Maximum number of iterations of the optimization procedure. + + max_proj : int, optional (default=10000) + Maximum number of projection steps. + + tol : float, optional (default=1e-3) + Convergence threshold for the optimization procedure. + + n_constraints: int, optional (default=None) + Number of constraints to generate. If None, default to `20 * + num_classes**2`. + + init : string or numpy array, optional (default='identity') + Initialization of the Mahalanobis matrix. Possible options are + 'identity', 'covariance', 'random', and a numpy array of + shape (n_features, n_features). + + 'identity' + An identity matrix of shape (n_features, n_features). + + 'covariance' + The (pseudo-)inverse of the covariance matrix. + + 'random' + The initial Mahalanobis matrix will be a random SPD matrix of + shape `(n_features, n_features)`, generated using + `sklearn.datasets.make_spd_matrix`. + + numpy array + A numpy array of shape (n_features, n_features), that will + be used as such to initialize the metric. + + diagonal : bool, optional (default=False) + If True, a diagonal metric will be learned, + i.e., a simple scaling of dimensions. The initialization will then + be the diagonal coefficients of the matrix given as 'init'. + + diagonal_c : float, optional (default=1.0) + Weight of the dissimilarity constraint for diagonal + metric learning. Ignored if ``diagonal=False``. + + verbose : bool, optional (default=False) + If True, prints information while learning preprocessor : array-like, shape=(n_samples, n_features) or callable - The preprocessor to call to get tuples from indices. If array-like, - tuples will be gotten like this: X[indices]. - A0 : Not used. - .. deprecated:: 0.5.0 - `A0` was deprecated in version 0.5.0 and will - be removed in 0.6.0. Use 'init' instead. - diagonal : bool, optional - if True, a diagonal metric will be learned, - i.e., a simple scaling of dimensions - diagonal_c : float, optional - weight of the dissimilarity constraint for diagonal - metric learning - verbose : bool, optional - if True, prints information while learning - preprocessor : array-like, shape=(n_samples, n_features) or callable - The preprocessor to call to get tuples from indices. If array-like, - tuples will be formed like this: X[indices]. + The preprocessor to call to get tuples from indices. If array-like, + tuples will be formed like this: X[indices]. + random_state : int or numpy.RandomState or None, optional (default=None) - A pseudo random number generator object or a seed for it if int. If - ``init='random'``, ``random_state`` is used to initialize the random - Mahalanobis matrix. In any case, `random_state` is also used to - randomly sample constraints from labels. + A pseudo random number generator object or a seed for it if int. If + ``init='random'``, ``random_state`` is used to initialize the random + Mahalanobis matrix. In any case, `random_state` is also used to + randomly sample constraints from labels. - `MMC_Supervised` creates pairs of similar sample by taking same class - samples, and pairs of dissimilar samples by taking different class - samples. It then passes these pairs to `MMC` for training. + num_constraints : Renamed to n_constraints. Will be deprecated in 0.7.0 + + convergence_threshold : Renamed to tol. Will be deprecated in 0.7.0 + + Examples + -------- + >>> from metric_learn import MMC_Supervised + >>> from sklearn.datasets import load_iris + >>> iris_data = load_iris() + >>> X = iris_data['data'] + >>> Y = iris_data['target'] + >>> mmc = MMC_Supervised(n_constraints=200) + >>> mmc.fit(X, Y) Attributes ---------- n_iter_ : `int` - The number of iterations the solver has run. + The number of iterations the solver has run. components_ : `numpy.ndarray`, shape=(n_features, n_features) - The linear transformation ``L`` deduced from the learned Mahalanobis - metric (See function `components_from_metric`.) + The linear transformation ``L`` deduced from the learned Mahalanobis + metric (See function `components_from_metric`.) """ - def __init__(self, max_iter=100, max_proj=10000, convergence_threshold=1e-6, - num_labeled='deprecated', num_constraints=None, init=None, - A0='deprecated', diagonal=False, diagonal_c=1.0, verbose=False, - preprocessor=None, random_state=None): + def __init__(self, max_iter=100, max_proj=10000, tol=1e-6, + n_constraints=None, init='identity', + diagonal=False, diagonal_c=1.0, verbose=False, + preprocessor=None, random_state=None, + num_constraints='deprecated', + convergence_threshold='deprecated'): _BaseMMC.__init__(self, max_iter=max_iter, max_proj=max_proj, - convergence_threshold=convergence_threshold, - init=init, A0=A0, diagonal=diagonal, + tol=tol, + init=init, diagonal=diagonal, diagonal_c=diagonal_c, verbose=verbose, - preprocessor=preprocessor, random_state=random_state) - self.num_labeled = num_labeled - self.num_constraints = num_constraints + preprocessor=preprocessor, + random_state=random_state, + convergence_threshold=convergence_threshold) + if num_constraints != 'deprecated': + warnings.warn('"num_constraints" parameter has been renamed to' + ' "n_constraints". It has been deprecated in' + ' version 0.6.3 and will be removed in 0.7.0' + '', FutureWarning) + self.n_constraints = num_constraints + else: + self.n_constraints = n_constraints + # Avoid test get_params from failing (all params passed sholud be set) + self.num_constraints = 'deprecated' - def fit(self, X, y, random_state='deprecated'): + def fit(self, X, y): """Create constraints from labels and learn the MMC model. Parameters ---------- X : (n x d) matrix - Input data, where each row corresponds to a single instance. + Input data, where each row corresponds to a single instance. + y : (n) array-like - Data labels. - random_state : Not used - .. deprecated:: 0.5.0 - `random_state` in the `fit` function was deprecated in version 0.5.0 - and will be removed in 0.6.0. Set `random_state` at initialization - instead (when instantiating a new `MMC_Supervised` object). + Data labels. """ - if self.num_labeled != 'deprecated': - warnings.warn('"num_labeled" parameter is not used.' - ' It has been deprecated in version 0.5.0 and will be' - ' removed in 0.6.0', DeprecationWarning) - if random_state != 'deprecated': - warnings.warn('"random_state" parameter in the `fit` function is ' - 'deprecated. Set `random_state` at initialization ' - 'instead (when instantiating a new `MMC_Supervised` ' - 'object).', DeprecationWarning) - else: - warnings.warn('As of v0.5.0, `MMC_Supervised` now uses the ' - '`random_state` given at initialization to sample ' - 'constraints, not the default `np.random` from the `fit` ' - 'method, since this argument is now deprecated. ' - 'This warning will disappear in v0.6.0.', - ChangedBehaviorWarning) X, y = self._prepare_inputs(X, y, ensure_min_samples=2) - num_constraints = self.num_constraints - if num_constraints is None: + n_constraints = self.n_constraints + if n_constraints is None: num_classes = len(np.unique(y)) - num_constraints = 20 * num_classes**2 + n_constraints = 20 * num_classes**2 c = Constraints(y) - pos_neg = c.positive_negative_pairs(num_constraints, + pos_neg = c.positive_negative_pairs(n_constraints, random_state=self.random_state) pairs, y = wrap_pairs(X, pos_neg) return _BaseMMC._fit(self, pairs, y) diff --git a/metric_learn/nca.py b/metric_learn/nca.py index 03abdc41..7b4423d3 100644 --- a/metric_learn/nca.py +++ b/metric_learn/nca.py @@ -2,16 +2,15 @@ Neighborhood Components Analysis (NCA) """ -from __future__ import absolute_import import warnings import time import sys import numpy as np from scipy.optimize import minimize -from sklearn.metrics import pairwise_distances -from sklearn.exceptions import ConvergenceWarning, ChangedBehaviorWarning -from sklearn.utils.fixes import logsumexp +from scipy.special import logsumexp from sklearn.base import TransformerMixin +from sklearn.exceptions import ConvergenceWarning +from sklearn.metrics import pairwise_distances from ._util import _initialize_components, _check_n_components from .base_metric import MahalanobisMixin @@ -33,71 +32,63 @@ class NCA(MahalanobisMixin, TransformerMixin): Parameters ---------- - init : None, string or numpy array, optional (default=None) - Initialization of the linear transformation. Possible options are - 'auto', 'pca', 'identity', 'random', and a numpy array of shape - (n_features_a, n_features_b). If None, will be set automatically to - 'auto' (this option is to raise a warning if 'init' is not set, - and stays to its default value None, in v0.5.0). - - 'auto' - Depending on ``n_components``, the most reasonable initialization - will be chosen. If ``n_components <= n_classes`` we use 'lda', as - it uses labels information. If not, but - ``n_components < min(n_features, n_samples)``, we use 'pca', as - it projects data in meaningful directions (those of higher - variance). Otherwise, we just use 'identity'. - - 'pca' - ``n_components`` principal components of the inputs passed - to :meth:`fit` will be used to initialize the transformation. - (See `sklearn.decomposition.PCA`) - - 'lda' - ``min(n_components, n_classes)`` most discriminative - components of the inputs passed to :meth:`fit` will be used to - initialize the transformation. (If ``n_components > n_classes``, - the rest of the components will be zero.) (See - `sklearn.discriminant_analysis.LinearDiscriminantAnalysis`) - - 'identity' - If ``n_components`` is strictly smaller than the - dimensionality of the inputs passed to :meth:`fit`, the identity - matrix will be truncated to the first ``n_components`` rows. - - 'random' - The initial transformation will be a random array of shape - `(n_components, n_features)`. Each value is sampled from the - standard normal distribution. - - numpy array - n_features_b must match the dimensionality of the inputs passed to - :meth:`fit` and n_features_a must be less than or equal to that. - If ``n_components`` is not None, n_features_a must match it. + init : string or numpy array, optional (default='auto') + Initialization of the linear transformation. Possible options are + 'auto', 'pca', 'identity', 'random', and a numpy array of shape + (n_features_a, n_features_b). + + 'auto' + Depending on ``n_components``, the most reasonable initialization + will be chosen. If ``n_components <= n_classes`` we use 'lda', as + it uses labels information. If not, but + ``n_components < min(n_features, n_samples)``, we use 'pca', as + it projects data in meaningful directions (those of higher + variance). Otherwise, we just use 'identity'. + + 'pca' + ``n_components`` principal components of the inputs passed + to :meth:`fit` will be used to initialize the transformation. + (See `sklearn.decomposition.PCA`) + + 'lda' + ``min(n_components, n_classes)`` most discriminative + components of the inputs passed to :meth:`fit` will be used to + initialize the transformation. (If ``n_components > n_classes``, + the rest of the components will be zero.) (See + `sklearn.discriminant_analysis.LinearDiscriminantAnalysis`) + + 'identity' + If ``n_components`` is strictly smaller than the + dimensionality of the inputs passed to :meth:`fit`, the identity + matrix will be truncated to the first ``n_components`` rows. + + 'random' + The initial transformation will be a random array of shape + `(n_components, n_features)`. Each value is sampled from the + standard normal distribution. + + numpy array + n_features_b must match the dimensionality of the inputs passed to + :meth:`fit` and n_features_a must be less than or equal to that. + If ``n_components`` is not None, n_features_a must match it. n_components : int or None, optional (default=None) - Dimensionality of reduced space (if None, defaults to dimension of X). - - num_dims : Not used - - .. deprecated:: 0.5.0 - `num_dims` was deprecated in version 0.5.0 and will - be removed in 0.6.0. Use `n_components` instead. + Dimensionality of reduced space (if None, defaults to dimension of X). max_iter : int, optional (default=100) Maximum number of iterations done by the optimization algorithm. tol : float, optional (default=None) - Convergence tolerance for the optimization. + Convergence tolerance for the optimization. verbose : bool, optional (default=False) Whether to print progress messages or not. random_state : int or numpy.RandomState or None, optional (default=None) - A pseudo random number generator object or a seed for it if int. If - ``init='random'``, ``random_state`` is used to initialize the random - transformation. If ``init='pca'``, ``random_state`` is passed as an - argument to PCA when initializing the transformation. + A pseudo random number generator object or a seed for it if int. If + ``init='random'``, ``random_state`` is used to initialize the random + transformation. If ``init='pca'``, ``random_state`` is passed as an + argument to PCA when initializing the transformation. Examples -------- @@ -114,28 +105,27 @@ class NCA(MahalanobisMixin, TransformerMixin): Attributes ---------- n_iter_ : `int` - The number of iterations the solver has run. + The number of iterations the solver has run. components_ : `numpy.ndarray`, shape=(n_components, n_features) - The learned linear transformation ``L``. + The learned linear transformation ``L``. References ---------- .. [1] J. Goldberger, G. Hinton, S. Roweis, R. Salakhutdinov. `Neighbourhood Components Analysis `_. - Advances in Neural Information Processing Systems. 17, 513-520, 2005. + NIPS 2005. .. [2] Wikipedia entry on `Neighborhood Components Analysis `_ """ - def __init__(self, init=None, n_components=None, num_dims='deprecated', + def __init__(self, init='auto', n_components=None, max_iter=100, tol=None, verbose=False, preprocessor=None, random_state=None): self.n_components = n_components self.init = init - self.num_dims = num_dims self.max_iter = max_iter self.tol = tol self.verbose = verbose @@ -147,11 +137,6 @@ def fit(self, X, y): X: data matrix, (n x d) y: scalar labels, (n) """ - if self.num_dims != 'deprecated': - warnings.warn('"num_dims" parameter is not used.' - ' It has been deprecated in version 0.5.0 and will be' - ' removed in 0.6.0. Use "n_components" instead', - DeprecationWarning) X, labels = self._prepare_inputs(X, y, ensure_min_samples=2) n, d = X.shape n_components = _check_n_components(d, self.n_components) @@ -160,22 +145,8 @@ def fit(self, X, y): train_time = time.time() # Initialize A - # if the init is the default (None), we raise a warning - if self.init is None: - # TODO: replace init=None by init='auto' in v0.6.0 and remove the warning - msg = ("Warning, no init was set (`init=None`). As of version 0.5.0, " - "the default init will now be set to 'auto', instead of the " - "previous scaling matrix. If you still want to use the same " - "scaling matrix as before, set " - "init=np.eye(X.shape[1])/(np.maximum(X.max(axis=0)-X.min(axis=0)" - ", EPS))). This warning will disappear in v0.6.0, and `init` " - "parameter's default value will be set to 'auto'.") - warnings.warn(msg, ChangedBehaviorWarning) - init = 'auto' - else: - init = self.init - A = _initialize_components(n_components, X, labels, init, self.verbose, - self.random_state) + A = _initialize_components(n_components, X, labels, self.init, + self.verbose, self.random_state) # Run NCA mask = labels[:, np.newaxis] == labels[np.newaxis, :] diff --git a/metric_learn/rca.py b/metric_learn/rca.py index 8471a1b1..253b9c92 100644 --- a/metric_learn/rca.py +++ b/metric_learn/rca.py @@ -2,13 +2,9 @@ Relative Components Analysis (RCA) """ -from __future__ import absolute_import import numpy as np import warnings -from six.moves import xrange -from sklearn import decomposition from sklearn.base import TransformerMixin -from sklearn.exceptions import ChangedBehaviorWarning from ._util import _check_n_components from .base_metric import MahalanobisMixin @@ -17,13 +13,13 @@ # mean center each chunklet separately def _chunk_mean_centering(data, chunks): - num_chunks = chunks.max() + 1 + n_chunks = chunks.max() + 1 chunk_mask = chunks != -1 # We need to ensure the data is float so that we can substract the # mean on it chunk_data = data[chunk_mask].astype(float, copy=False) chunk_labels = chunks[chunk_mask] - for c in xrange(num_chunks): + for c in range(n_chunks): mask = chunk_labels == c chunk_data[mask] -= chunk_data[mask].mean(axis=0) @@ -44,60 +40,49 @@ class RCA(MahalanobisMixin, TransformerMixin): Parameters ---------- n_components : int or None, optional (default=None) - Dimensionality of reduced space (if None, defaults to dimension of X). - - num_dims : Not used - - .. deprecated:: 0.5.0 - `num_dims` was deprecated in version 0.5.0 and will - be removed in 0.6.0. Use `n_components` instead. - - pca_comps : Not used - .. deprecated:: 0.5.0 - `pca_comps` was deprecated in version 0.5.0 and will - be removed in 0.6.0. + Dimensionality of reduced space (if None, defaults to dimension of X). preprocessor : array-like, shape=(n_samples, n_features) or callable - The preprocessor to call to get tuples from indices. If array-like, - tuples will be formed like this: X[indices]. + The preprocessor to call to get tuples from indices. If array-like, + tuples will be formed like this: X[indices]. Examples -------- - >>> from metric_learn import RCA_Supervised - >>> from sklearn.datasets import load_iris - >>> iris_data = load_iris() - >>> X = iris_data['data'] - >>> Y = iris_data['target'] - >>> rca = RCA_Supervised(num_chunks=30, chunk_size=2) - >>> rca.fit(X, Y) + >>> from metric_learn import RCA + >>> X = [[-0.05, 3.0],[0.05, -3.0], + >>> [0.1, -3.55],[-0.1, 3.55], + >>> [-0.95, -0.05],[0.95, 0.05], + >>> [0.4, 0.05],[-0.4, -0.05]] + >>> chunks = [0, 0, 1, 1, 2, 2, 3, 3] + >>> rca = RCA() + >>> rca.fit(X, chunks) References - ------------------ - .. [1] `Adjustment learning and relevant component analysis - `_ Noam - Shental, et al. + ---------- + .. [1] Noam Shental, et al. `Adjustment learning and relevant component + analysis `_ . + ECCV 2002. Attributes ---------- components_ : `numpy.ndarray`, shape=(n_components, n_features) - The learned linear transformation ``L``. + The learned linear transformation ``L``. """ - def __init__(self, n_components=None, num_dims='deprecated', - pca_comps='deprecated', preprocessor=None): + def __init__(self, n_components=None, preprocessor=None): self.n_components = n_components - self.num_dims = num_dims - self.pca_comps = pca_comps super(RCA, self).__init__(preprocessor) def _check_dimension(self, rank, X): d = X.shape[1] + if rank < d: warnings.warn('The inner covariance matrix is not invertible, ' 'so the transformation matrix may contain Nan values. ' - 'You should reduce the dimensionality of your input,' + 'You should remove any linearly dependent features and/or ' + 'reduce the dimensionality of your input, ' 'for instance using `sklearn.decomposition.PCA` as a ' 'preprocessing step.') @@ -110,34 +95,14 @@ def fit(self, X, chunks): Parameters ---------- data : (n x d) data matrix - Each row corresponds to a single instance + Each row corresponds to a single instance + chunks : (n,) array of ints - When ``chunks[i] == -1``, point i doesn't belong to any chunklet. - When ``chunks[i] == j``, point i belongs to chunklet j. + When ``chunks[i] == -1``, point i doesn't belong to any chunklet. + When ``chunks[i] == j``, point i belongs to chunklet j. """ - if self.num_dims != 'deprecated': - warnings.warn('"num_dims" parameter is not used.' - ' It has been deprecated in version 0.5.0 and will be' - ' removed in 0.6.0. Use "n_components" instead', - DeprecationWarning) - - if self.pca_comps != 'deprecated': - warnings.warn( - '"pca_comps" parameter is not used. ' - 'It has been deprecated in version 0.5.0 and will be' - 'removed in 0.6.0. RCA will not do PCA preprocessing anymore. If ' - 'you still want to do it, you could use ' - '`sklearn.decomposition.PCA` and an `sklearn.pipeline.Pipeline`.', - DeprecationWarning) - X, chunks = self._prepare_inputs(X, chunks, ensure_min_samples=2) - warnings.warn( - "RCA will no longer center the data before training. If you want " - "to do some preprocessing, you should do it manually (you can also " - "use an `sklearn.pipeline.Pipeline` for instance). This warning " - "will disappear in version 0.6.0.", ChangedBehaviorWarning) - chunks = np.asanyarray(chunks, dtype=int) chunk_mask, chunked_data = _chunk_mean_centering(X, chunks) @@ -147,7 +112,7 @@ def fit(self, X, chunks): # Fisher Linear Discriminant projection if dim < X.shape[1]: total_cov = np.cov(X[chunk_mask], rowvar=0) - tmp = np.linalg.lstsq(total_cov, inner_cov)[0] + tmp = np.linalg.lstsq(total_cov, inner_cov, rcond=None)[0] vals, vecs = np.linalg.eig(tmp) inds = np.argsort(vals)[:dim] A = vecs[:, inds] @@ -170,76 +135,83 @@ class RCA_Supervised(RCA): `RCA_Supervised` creates chunks of similar points by first sampling a class, taking `chunk_size` elements in it, and repeating the process - `num_chunks` times. + `n_chunks` times. Parameters ---------- n_components : int or None, optional (default=None) - Dimensionality of reduced space (if None, defaults to dimension of X). - - num_dims : Not used + Dimensionality of reduced space (if None, defaults to dimension of X). - .. deprecated:: 0.5.0 - `num_dims` was deprecated in version 0.5.0 and will - be removed in 0.6.0. Use `n_components` instead. + n_chunks: int, optional (default=100) + Number of chunks to generate. - num_chunks: int, optional - - chunk_size: int, optional + chunk_size: int, optional (default=2) + Number of points per chunk. preprocessor : array-like, shape=(n_samples, n_features) or callable - The preprocessor to call to get tuples from indices. If array-like, - tuples will be formed like this: X[indices]. + The preprocessor to call to get tuples from indices. If array-like, + tuples will be formed like this: X[indices]. random_state : int or numpy.RandomState or None, optional (default=None) - A pseudo random number generator object or a seed for it if int. - It is used to randomly sample constraints from labels. + A pseudo random number generator object or a seed for it if int. + It is used to randomly sample constraints from labels. + + num_chunks : Renamed to n_chunks. Will be deprecated in 0.7.0 + + Examples + -------- + >>> from metric_learn import RCA_Supervised + >>> from sklearn.datasets import load_iris + >>> iris_data = load_iris() + >>> X = iris_data['data'] + >>> Y = iris_data['target'] + >>> rca = RCA_Supervised(n_chunks=30, chunk_size=2) + >>> rca.fit(X, Y) Attributes ---------- components_ : `numpy.ndarray`, shape=(n_components, n_features) - The learned linear transformation ``L``. + The learned linear transformation ``L``. """ - def __init__(self, num_dims='deprecated', n_components=None, - pca_comps='deprecated', num_chunks=100, chunk_size=2, - preprocessor=None, random_state=None): + def __init__(self, n_components=None, n_chunks=100, chunk_size=2, + preprocessor=None, random_state=None, + num_chunks='deprecated'): """Initialize the supervised version of `RCA`.""" - RCA.__init__(self, num_dims=num_dims, n_components=n_components, - pca_comps=pca_comps, preprocessor=preprocessor) - self.num_chunks = num_chunks + RCA.__init__(self, n_components=n_components, preprocessor=preprocessor) + if num_chunks != 'deprecated': + warnings.warn('"num_chunks" parameter has been renamed to' + ' "n_chunks". It has been deprecated in' + ' version 0.6.3 and will be removed in 0.7.0' + '', FutureWarning) + n_chunks = num_chunks + self.num_chunks = 'deprecated' # To avoid no_attribute error + self.n_chunks = n_chunks self.chunk_size = chunk_size self.random_state = random_state - def fit(self, X, y, random_state='deprecated'): + def fit(self, X, y): """Create constraints from labels and learn the RCA model. - Needs num_constraints specified in constructor. + Needs n_constraints specified in constructor. (Not true?) Parameters ---------- X : (n x d) data matrix - each row corresponds to a single instance + each row corresponds to a single instance + y : (n) data labels - random_state : Not used - .. deprecated:: 0.5.0 - `random_state` in the `fit` function was deprecated in version 0.5.0 - and will be removed in 0.6.0. Set `random_state` at initialization - instead (when instantiating a new `RCA_Supervised` object). """ - if random_state != 'deprecated': - warnings.warn('"random_state" parameter in the `fit` function is ' - 'deprecated. Set `random_state` at initialization ' - 'instead (when instantiating a new `RCA_Supervised` ' - 'object).', DeprecationWarning) - else: - warnings.warn('As of v0.5.0, `RCA_Supervised` now uses the ' - '`random_state` given at initialization to sample ' - 'constraints, not the default `np.random` from the `fit` ' - 'method, since this argument is now deprecated. ' - 'This warning will disappear in v0.6.0.', - ChangedBehaviorWarning) X, y = self._prepare_inputs(X, y, ensure_min_samples=2) - chunks = Constraints(y).chunks(num_chunks=self.num_chunks, + chunks = Constraints(y).chunks(n_chunks=self.n_chunks, chunk_size=self.chunk_size, random_state=self.random_state) + + if self.n_chunks * (self.chunk_size - 1) < X.shape[1]: + warnings.warn('Due to the parameters of RCA_Supervised, ' + 'the inner covariance matrix is not invertible, ' + 'so the transformation matrix will contain Nan values. ' + 'Increase the number or size of the chunks to correct ' + 'this problem.' + ) + return RCA.fit(self, X, chunks) diff --git a/metric_learn/scml.py b/metric_learn/scml.py new file mode 100644 index 00000000..fedf393d --- /dev/null +++ b/metric_learn/scml.py @@ -0,0 +1,663 @@ +""" +Sparse Compositional Metric Learning (SCML) +""" + +from __future__ import print_function, absolute_import, division +import numpy as np +from .base_metric import _TripletsClassifierMixin, MahalanobisMixin +from ._util import components_from_metric +from sklearn.base import TransformerMixin +from .constraints import Constraints +from sklearn.preprocessing import normalize +from sklearn.neighbors import NearestNeighbors +from sklearn.cluster import KMeans +from sklearn.discriminant_analysis import LinearDiscriminantAnalysis +from sklearn.utils import check_array, check_random_state +import warnings + + +class _BaseSCML(MahalanobisMixin): + + _tuple_size = 3 # constraints are triplets + _authorized_basis = ['triplet_diffs'] + + def __init__(self, beta=1e-5, basis='triplet_diffs', n_basis=None, + gamma=5e-3, max_iter=10000, output_iter=500, batch_size=10, + verbose=False, preprocessor=None, random_state=None): + self.beta = beta + self.basis = basis + self.n_basis = n_basis + self.gamma = gamma + self.max_iter = max_iter + self.output_iter = output_iter + self.batch_size = batch_size + self.verbose = verbose + self.preprocessor = preprocessor + self.random_state = random_state + super(_BaseSCML, self).__init__(preprocessor) + + def _fit(self, triplets, basis=None, n_basis=None): + """ + Optimization procedure to find a sparse vector of weights to + construct the metric from the basis set. This is based on the + dual averaging method. + """ + + if not isinstance(self.max_iter, int): + raise ValueError("max_iter should be an integer, instead it is of type" + " %s" % type(self.max_iter)) + if not isinstance(self.output_iter, int): + raise ValueError("output_iter should be an integer, instead it is of " + "type %s" % type(self.output_iter)) + if not isinstance(self.batch_size, int): + raise ValueError("batch_size should be an integer, instead it is of type" + " %s" % type(self.batch_size)) + + if self.output_iter > self.max_iter: + raise ValueError("The value of output_iter must be equal or smaller than" + " max_iter.") + + # Currently prepare_inputs makes triplets contain points and not indices + triplets = self._prepare_inputs(triplets, type_of_inputs='tuples') + + # TODO: + # This algorithm is built to work with indices, but in order to be + # compliant with the current handling of inputs it is converted + # back to indices by the following function. This should be improved + # in the future. + triplets, X = self._to_index_points(triplets) + + if basis is None: + basis, n_basis = self._initialize_basis(triplets, X) + + dist_diff = self._compute_dist_diff(triplets, X, basis) + + n_triplets = triplets.shape[0] + + # weight vector + w = np.zeros((1, n_basis)) + # avarage obj gradient wrt weights + avg_grad_w = np.zeros((1, n_basis)) + + # l2 norm in time of all obj gradients wrt weights + ada_grad_w = np.zeros((1, n_basis)) + # slack for not dividing by zero + delta = 0.001 + + best_obj = np.inf + + rng = check_random_state(self.random_state) + rand_int = rng.randint(low=0, high=n_triplets, + size=(self.max_iter, self.batch_size)) + for iter in range(self.max_iter): + + idx = rand_int[iter] + + slack_val = 1 + np.matmul(dist_diff[idx, :], w.T) + slack_mask = np.squeeze(slack_val > 0, axis=1) + + grad_w = np.sum(dist_diff[idx[slack_mask], :], + axis=0, keepdims=True)/self.batch_size + avg_grad_w = (iter * avg_grad_w + grad_w) / (iter+1) + + ada_grad_w = np.sqrt(np.square(ada_grad_w) + np.square(grad_w)) + + scale_f = -(iter+1) / (self.gamma * (delta + ada_grad_w)) + + # proximal operator with negative trimming equivalent + w = scale_f * np.minimum(avg_grad_w + self.beta, 0) + + if (iter + 1) % self.output_iter == 0: + # regularization part of obj function + obj1 = np.sum(w)*self.beta + + # Every triplet distance difference in the space given by L + # plus a slack of one + slack_val = 1 + np.matmul(dist_diff, w.T) + # Mask of places with positive slack + slack_mask = slack_val > 0 + + # loss function of learning task part of obj function + obj2 = np.sum(slack_val[slack_mask])/n_triplets + + obj = obj1 + obj2 + if self.verbose: + count = np.sum(slack_mask) + print("[%s] iter %d\t obj %.6f\t num_imp %d" % + (self.__class__.__name__, (iter+1), obj, count)) + + # update the best + if obj < best_obj: + best_obj = obj + best_w = w + + if self.verbose: + print("max iteration reached.") + + # return L matrix yielded from best weights + self.n_iter_ = iter + self.components_ = self._components_from_basis_weights(basis, best_w) + + return self + + def _compute_dist_diff(self, triplets, X, basis): + """ + Helper function to compute the distance difference of every triplet in the + space yielded by the basis set. + """ + # Transformation of data by the basis set + XB = np.matmul(X, basis.T) + + n_triplets = triplets.shape[0] + # get all positive and negative pairs with lowest index first + # np.array (2*n_triplets,2) + triplets_pairs_sorted = np.sort(np.vstack((triplets[:, [0, 1]], + triplets[:, [0, 2]])), + kind='stable') + # calculate all unique pairs and their indices + uniqPairs, indices = np.unique(triplets_pairs_sorted, return_inverse=True, + axis=0) + # calculate L2 distance acording to bases only for unique pairs + dist = np.square(XB[uniqPairs[:, 0], :] - XB[uniqPairs[:, 1], :]) + + # return the diference of distances between all positive and negative + # pairs + return dist[indices[:n_triplets]] - dist[indices[n_triplets:]] + + def _components_from_basis_weights(self, basis, w): + """ + Get components matrix (L) from computed mahalanobis matrix. + """ + + # get rid of inactive bases + # TODO: Maybe have a tolerance over zero? + active_idx, = w > 0 + w = w[..., active_idx] + basis = basis[active_idx, :] + + n_basis, n_features = basis.shape + + if n_basis < n_features: # if metric is low-rank + warnings.warn("The number of bases with nonzero weight is less than the " + "number of features of the input, in consequence the " + "learned transformation reduces the dimension to %d." + % n_basis) + return np.sqrt(w.T)*basis # equivalent to np.diag(np.sqrt(w)).dot(basis) + + else: # if metric is full rank + return components_from_metric(np.matmul(basis.T, w.T*basis)) + + def _to_index_points(self, triplets): + shape = triplets.shape + X, triplets = np.unique(np.vstack(triplets), return_inverse=True, axis=0) + triplets = triplets.reshape(shape[:2]) + return triplets, X + + def _initialize_basis(self, triplets, X): + """ Checks if the basis array is well constructed or constructs it based + on one of the available options. + """ + n_features = X.shape[1] + + if isinstance(self.basis, np.ndarray): + # TODO: should copy? + basis = check_array(self.basis, copy=True) + if basis.shape[1] != n_features: + raise ValueError('The dimensionality ({}) of the provided bases must' + ' match the dimensionality of the data ' + '({}).'.format(basis.shape[1], n_features)) + elif self.basis not in self._authorized_basis: + raise ValueError( + "`basis` must be one of the options '{}' " + "or an array of shape (n_basis, n_features)." + .format("', '".join(self._authorized_basis))) + if self.basis == 'triplet_diffs': + basis, n_basis = self._generate_bases_dist_diff(triplets, X) + + return basis, n_basis + + def _generate_bases_dist_diff(self, triplets, X): + """ Constructs the basis set from the differences of positive and negative + pairs from the triplets constraints. + + The basis set is constructed iteratively by taking n_features triplets, + then adding and substracting respectively all the outerproducts of the + positive and negative pairs, and finally selecting the eigenvectors + of this matrix with positive eigenvalue. This is done until n_basis are + selected. + """ + n_features = X.shape[1] + n_triplets = triplets.shape[0] + + if self.n_basis is None: + # TODO: Get a good default n_basis directive + n_basis = n_features*80 + warnings.warn('As no value for `n_basis` was selected, the number of ' + 'basis will be set to n_basis= %d' % n_basis) + elif isinstance(self.n_basis, int): + n_basis = self.n_basis + else: + raise ValueError("n_basis should be an integer, instead it is of type %s" + % type(self.n_basis)) + + if n_features > n_triplets: + raise ValueError( + "Number of features (%s) is greater than the number of triplets(%s).\n" + "Consider using dimensionality reduction or using another basis " + "generation scheme." % (n_features, n_triplets)) + + basis = np.zeros((n_basis, n_features)) + + # get all positive and negative pairs with lowest index first + # np.array (2*n_triplets,2) + triplets_pairs_sorted = np.sort(np.vstack((triplets[:, [0, 1]], + triplets[:, [0, 2]])), + kind='stable') + # calculate all unique pairs and their indices + uniqPairs, indices = np.unique(triplets_pairs_sorted, return_inverse=True, + axis=0) + # calculate differences only for unique pairs + diff = X[uniqPairs[:, 0], :] - X[uniqPairs[:, 1], :] + + diff_pos = diff[indices[:n_triplets], :] + diff_neg = diff[indices[n_triplets:], :] + + rng = check_random_state(self.random_state) + + start = 0 + finish = 0 + while finish != n_basis: + # Select triplets to yield diff + select_triplet = rng.choice(n_triplets, size=n_features, replace=False) + + # select n_features positive differences + d_pos = diff_pos[select_triplet, :] + + # select n_features negative differences + d_neg = diff_neg[select_triplet, :] + + # Yield matrix + diff_sum = d_pos.T.dot(d_pos) - d_neg.T.dot(d_neg) + + # Calculate eigenvalue and eigenvectors + w, v = np.linalg.eigh(diff_sum.T.dot(diff_sum)) + + # Add eigenvectors with positive eigenvalue to basis set + pos_eig_mask = w > 0 + start = finish + finish += pos_eig_mask.sum() + + try: + basis[start:finish, :] = v[pos_eig_mask] + except ValueError: + # if finish is greater than n_basis + basis[start:, :] = v[pos_eig_mask][:n_basis-start] + break + + # TODO: maybe add a warning in case there are no added bases, this could + # be caused by a bad triplet set. This would cause an infinite loop + + return basis, n_basis + + +class SCML(_BaseSCML, _TripletsClassifierMixin): + """Sparse Compositional Metric Learning (SCML) + + `SCML` learns an squared Mahalanobis distance from triplet constraints by + optimizing sparse positive weights assigned to a set of :math:`K` rank-one + PSD bases. This can be formulated as an optimization problem with only + :math:`K` parameters, that can be solved with an efficient stochastic + composite scheme. + + Read more in the :ref:`User Guide `. + + .. warning:: + SCML is still a bit experimental, don't hesitate to report if + something fails/doesn't work as expected. + + Parameters + ---------- + beta: float (default=1e-5) + L1 regularization parameter. + + basis : string or array-like, optional (default='triplet_diffs') + Set of bases to construct the metric. Possible options are + 'triplet_diffs', and an array-like of shape (n_basis, n_features). + + 'triplet_diffs' + The basis set is constructed iteratively from differences between points + of `n_features` positive or negative pairs randomly sampled from the + triplets constraints. Requires the number of training triplets to be + great or equal to `n_features`. + + array-like + A matrix of shape (n_basis, n_features), that will be used as + the basis set for the metric construction. + + n_basis : int, optional + Number of basis to be yielded. In case it is not set it will be set based + on `basis`. If no value is selected a default will be computed based on + the input. + + gamma: float (default = 5e-3) + Learning rate for the optimization algorithm. + + max_iter : int (default = 10000) + Number of iterations for the algorithm. + + output_iter : int (default = 5000) + Number of iterations to check current weights performance and output this + information in case verbose is True. + + verbose : bool, optional + If True, prints information while learning. + + preprocessor : array-like, shape=(n_samples, n_features) or callable + The preprocessor to call to get triplets from indices. If array-like, + triplets will be formed like this: X[indices]. + + random_state : int or numpy.RandomState or None, optional (default=None) + A pseudo random number generator object or a seed for it if int. + + Attributes + ---------- + components_ : `numpy.ndarray`, shape=(n_features, n_features) + The linear transformation ``L`` deduced from the learned Mahalanobis + metric (See function `_components_from_basis_weights`.) + + Examples + -------- + >>> from metric_learn import SCML + >>> triplets = [[[1.2, 7.5], [1.3, 1.5], [6.2, 9.7]], + >>> [[1.3, 4.5], [3.2, 4.6], [5.4, 5.4]], + >>> [[3.2, 7.5], [3.3, 1.5], [8.2, 9.7]], + >>> [[3.3, 4.5], [5.2, 4.6], [7.4, 5.4]]] + >>> scml = SCML() + >>> scml.fit(triplets) + + References + ---------- + .. [1] Y. Shi, A. Bellet and F. Sha. `Sparse Compositional Metric Learning. + `_. \ + (AAAI), 2014. + + .. [2] Adapted from original `Matlab implementation. \ + `_. + + See Also + -------- + metric_learn.SCML_Supervised : The supervised version of the algorithm. + + :ref:`supervised_version` : The section of the project documentation + that describes the supervised version of weakly supervised estimators. + """ + + def fit(self, triplets): + """Learn the SCML model. + + Parameters + ---------- + triplets : array-like, shape=(n_constraints, 3, n_features) or \ + (n_constraints, 3) + 3D array-like of triplets of points or 2D array of triplets of + indicators. Triplets are assumed to be ordered such that: + d(triplets[i, 0],triplets[i, 1]) < d(triplets[i, 0], triplets[i, 2]). + + Returns + ------- + self : object + Returns the instance. + """ + + return self._fit(triplets) + + +class SCML_Supervised(_BaseSCML, TransformerMixin): + """Supervised version of Sparse Compositional Metric Learning (SCML) + + `SCML_Supervised` creates triplets by taking `k_genuine` neighbours + of the same class and `k_impostor` neighbours from different classes for each + point and then runs the SCML algorithm on these triplets. + + Read more in the :ref:`User Guide `. + + .. warning:: + SCML is still a bit experimental, don't hesitate to report if + something fails/doesn't work as expected. + + Parameters + ---------- + beta: float (default=1e-5) + L1 regularization parameter. + + basis : string or an array-like, optional (default='lda') + Set of bases to construct the metric. Possible options are + 'lda', and an array-like of shape (n_basis, n_features). + + 'lda' + The `n_basis` basis set is constructed from the LDA of significant + local regions in the feature space via clustering, for each region + center k-nearest neighbors are used to obtain the LDA scalings, + which correspond to the locally discriminative basis. + + array-like + A matrix of shape (n_basis, n_features), that will be used as + the basis set for the metric construction. + + n_basis : int, optional + Number of basis to be yielded. In case it is not set it will be set based + on `basis`. If no value is selected a default will be computed based on + the input. + + gamma: float (default = 5e-3) + Learning rate for the optimization algorithm. + + max_iter : int (default = 100000) + Number of iterations for the algorithm. + + output_iter : int (default = 5000) + Number of iterations to check current weights performance and output this + information in case verbose is True. + + verbose : bool, optional + If True, prints information while learning. + + preprocessor : array-like, shape=(n_samples, n_features) or callable + The preprocessor to call to get triplets from indices. If array-like, + triplets will be formed like this: X[indices]. + + random_state : int or numpy.RandomState or None, optional (default=None) + A pseudo random number generator object or a seed for it if int. + + Attributes + ---------- + components_ : `numpy.ndarray`, shape=(n_features, n_features) + The linear transformation ``L`` deduced from the learned Mahalanobis + metric (See function `_components_from_basis_weights`.) + + Examples + -------- + >>> from metric_learn import SCML_Supervised + >>> from sklearn.datasets import load_iris + >>> iris_data = load_iris() + >>> X = iris_data['data'] + >>> Y = iris_data['target'] + >>> scml = SCML_Supervised(random_state=33) + >>> scml.fit(X, Y) + SCML_Supervised(random_state=33) + >>> scml.score_pairs([[X[0], X[1]], [X[0], X[2]]]) + array([1.84640733, 1.55984363]) + >>> scml.get_metric()(X[0], X[1]) + 1.8464073327922157 + + References + ---------- + .. [1] Y. Shi, A. Bellet and F. Sha. `Sparse Compositional Metric Learning. + `_. \ + (AAAI), 2014. + + .. [2] Adapted from original `Matlab implementation. \ + `_. + + See Also + -------- + metric_learn.SCML : The weakly supervised version of this + algorithm. + """ + # Add supervised authorized basis construction options + _authorized_basis = _BaseSCML._authorized_basis + ['lda'] + + def __init__(self, k_genuine=3, k_impostor=10, beta=1e-5, basis='lda', + n_basis=None, gamma=5e-3, max_iter=10000, output_iter=500, + batch_size=10, verbose=False, preprocessor=None, + random_state=None): + self.k_genuine = k_genuine + self.k_impostor = k_impostor + _BaseSCML.__init__(self, beta=beta, basis=basis, n_basis=n_basis, + max_iter=max_iter, output_iter=output_iter, + batch_size=batch_size, verbose=verbose, + preprocessor=preprocessor, random_state=random_state) + + def fit(self, X, y): + """Create constraints from labels and learn the SCML model. + + Parameters + ---------- + X : (n x d) matrix + Input data, where each row corresponds to a single instance. + + y : (n) array-like + Data labels. + + Returns + ------- + self : object + Returns the instance. + """ + X, y = self._prepare_inputs(X, y, ensure_min_samples=2) + + basis, n_basis = self._initialize_basis_supervised(X, y) + + if not isinstance(self.k_genuine, int): + raise ValueError("k_genuine should be an integer, instead it is of type" + " %s" % type(self.k_genuine)) + if not isinstance(self.k_impostor, int): + raise ValueError("k_impostor should be an integer, instead it is of " + "type %s" % type(self.k_impostor)) + + constraints = Constraints(y) + triplets = constraints.generate_knntriplets(X, self.k_genuine, + self.k_impostor) + + triplets = X[triplets] + + return self._fit(triplets, basis, n_basis) + + def _initialize_basis_supervised(self, X, y): + """ Constructs the basis set following one of the supervised options in + case one is selected. + """ + + if isinstance(self.basis, str) and self.basis == 'lda': + basis, n_basis = self._generate_bases_LDA(X, y) + else: + basis, n_basis = None, None + + return basis, n_basis + + def _generate_bases_LDA(self, X, y): + """ Generates bases for the 'lda' option. + + The basis set is constructed using Linear Discriminant Analysis of + significant local regions in the feature space via clustering, for + each region center k-nearest neighbors are used to obtain the LDA scalings, + which correspond to the locally discriminative basis. Currently this is + done at two scales `k={10,20}` if `n_feature < 50` or else `k={20,50}`. + """ + + labels, class_count = np.unique(y, return_counts=True) + n_class = len(labels) + + n_features = X.shape[1] + # Number of basis yielded from each LDA + num_eig = min(n_class-1, n_features) + + if self.n_basis is None: + # TODO: Get a good default n_basis directive + n_basis = min(20*n_features, X.shape[0]*2*num_eig - 1) + warnings.warn('As no value for `n_basis` was selected, the number of ' + 'basis will be set to n_basis= %d' % n_basis) + + elif isinstance(self.n_basis, int): + n_basis = self.n_basis + else: + raise ValueError("n_basis should be an integer, instead it is of type %s" + % type(self.n_basis)) + + # Number of clusters needed for 2 scales given the number of basis + # yielded by every LDA + n_clusters = int(np.ceil(n_basis/(2 * num_eig))) + + if n_basis < n_class: + warnings.warn("The number of basis is less than the number of classes, " + "which may lead to poor discriminative performance.") + elif n_basis >= X.shape[0]*2*num_eig: + raise ValueError("Not enough samples to generate %d LDA bases, n_basis" + "should be smaller than %d" % + (n_basis, X.shape[0]*2*num_eig)) + + kmeans = KMeans(n_clusters=n_clusters, n_init=10, + random_state=self.random_state, algorithm='elkan').fit(X) + cX = kmeans.cluster_centers_ + + n_scales = 2 + if n_features > 50: + scales = [20, 50] + else: + scales = [10, 20] + + k_class = np.vstack((np.minimum(class_count, scales[0]), + np.minimum(class_count, scales[1]))) + + idx_set = [np.zeros((n_clusters, sum(k_class[0, :])), dtype=np.int64), + np.zeros((n_clusters, sum(k_class[1, :])), dtype=np.int64)] + + start_finish_indices = np.hstack((np.zeros((2, 1), np.int64), + k_class)).cumsum(axis=1) + + neigh = NearestNeighbors() + + for c in range(n_class): + sel_c = np.where(y == labels[c]) + + # get k_class same class neighbors + neigh.fit(X=X[sel_c]) + # Only take the neighbors once for the biggest scale + neighbors = neigh.kneighbors(X=cX, n_neighbors=k_class[-1, c], + return_distance=False) + + # add index set of neighbors for every cluster center for both scales + for s, k in enumerate(k_class[:, c]): + start, finish = start_finish_indices[s, c:c+2] + idx_set[s][:, start:finish] = np.take(sel_c, neighbors[:, :k]) + + # Compute basis for every cluster in both scales + basis = np.zeros((n_basis, n_features)) + lda = LinearDiscriminantAnalysis() + start_finish_indices = np.hstack((np.vstack((0, n_clusters * num_eig)), + np.full((2, n_clusters), + num_eig))).cumsum(axis=1) + + for s in range(n_scales): + for c in range(n_clusters): + lda.fit(X[idx_set[s][c, :]], y[idx_set[s][c, :]]) + start, finish = start_finish_indices[s, c:c+2] + normalized_scalings = normalize(lda.scalings_.T) + try: + basis[start: finish, :] = normalized_scalings + except ValueError: + # handle tail + basis[start:, :] = normalized_scalings[:n_basis-start] + break + + return basis, n_basis diff --git a/metric_learn/sdml.py b/metric_learn/sdml.py index 2d67e0b8..c4c427b9 100644 --- a/metric_learn/sdml.py +++ b/metric_learn/sdml.py @@ -2,13 +2,18 @@ Sparse High-Dimensional Metric Learning (SDML) """ -from __future__ import absolute_import import warnings import numpy as np from sklearn.base import TransformerMixin from scipy.linalg import pinvh -from sklearn.covariance import graphical_lasso -from sklearn.exceptions import ConvergenceWarning, ChangedBehaviorWarning +try: + from sklearn.covariance._graph_lasso import ( + _graphical_lasso as graphical_lasso + ) +except ImportError: + from sklearn.covariance import graphical_lasso + +from sklearn.exceptions import ConvergenceWarning from .base_metric import MahalanobisMixin, _PairsClassifierMixin from .constraints import Constraints, wrap_pairs @@ -25,23 +30,17 @@ class _BaseSDML(MahalanobisMixin): _tuple_size = 2 # constraints are pairs - def __init__(self, balance_param=0.5, sparsity_param=0.01, prior=None, - use_cov='deprecated', verbose=False, preprocessor=None, + def __init__(self, balance_param=0.5, sparsity_param=0.01, prior='identity', + verbose=False, preprocessor=None, random_state=None): self.balance_param = balance_param self.sparsity_param = sparsity_param self.prior = prior - self.use_cov = use_cov self.verbose = verbose self.random_state = random_state super(_BaseSDML, self).__init__(preprocessor) def _fit(self, pairs, y): - if self.use_cov != 'deprecated': - warnings.warn('"use_cov" parameter is not used.' - ' It has been deprecated in version 0.5.0 and will be' - 'removed in 0.6.0. Use "prior" instead.', - DeprecationWarning) if not HAS_SKGGM: if self.verbose: print("SDML will use scikit-learn's graphical lasso solver.") @@ -50,27 +49,16 @@ def _fit(self, pairs, y): print("SDML will use skggm's graphical lasso solver.") pairs, y = self._prepare_inputs(pairs, y, type_of_inputs='tuples') + n_features = pairs.shape[2] + if n_features < 2: + raise ValueError(f"Cannot fit SDML with {n_features} feature(s)") # set up (the inverse of) the prior M # if the prior is the default (None), we raise a warning - if self.prior is None: - # TODO: - # replace prior=None by prior='identity' in v0.6.0 and remove the - # warning - msg = ("Warning, no prior was set (`prior=None`). As of version 0.5.0, " - "the default prior will now be set to " - "'identity', instead of 'covariance'. If you still want to use " - "the inverse of the covariance matrix as a prior, " - "set prior='covariance'. This warning will disappear in " - "v0.6.0, and `prior` parameter's default value will be set to " - "'identity'.") - warnings.warn(msg, ChangedBehaviorWarning) - prior = 'identity' - else: - prior = self.prior - _, prior_inv = _initialize_metric_mahalanobis(pairs, prior, - return_inverse=True, strict_pd=True, matrix_name='prior', - random_state=self.random_state) + _, prior_inv = _initialize_metric_mahalanobis( + pairs, self.prior, + return_inverse=True, strict_pd=True, matrix_name='prior', + random_state=self.random_state) diff = pairs[:, 0] - pairs[:, 1] loss_matrix = (diff.T * y).dot(diff) emp_cov = prior_inv + self.balance_param * loss_matrix @@ -97,13 +85,14 @@ def _fit(self, pairs, y): msg=self.verbose, Theta0=theta0, Sigma0=sigma0) else: - _, M = graphical_lasso(emp_cov, alpha=self.sparsity_param, - verbose=self.verbose, - cov_init=sigma0) + _, M, *_ = graphical_lasso(emp_cov, alpha=self.sparsity_param, + verbose=self.verbose, + cov_init=sigma0) raised_error = None w_mahalanobis, _ = np.linalg.eigh(M) not_spd = any(w_mahalanobis < 0.) not_finite = not np.isfinite(M).all() + # TODO: Narrow this to the specific exceptions we expect. except Exception as e: raised_error = e not_spd = False # not_spd not applicable here so we set to False @@ -128,7 +117,7 @@ def _fit(self, pairs, y): class SDML(_BaseSDML, _PairsClassifierMixin): - """Sparse Distance Metric Learning (SDML) + r"""Sparse Distance Metric Learning (SDML) SDML is an efficient sparse metric learning in high-dimensional space via double regularization: an L1-penalization on the off-diagonal elements of the @@ -141,62 +130,55 @@ class SDML(_BaseSDML, _PairsClassifierMixin): Parameters ---------- - balance_param : float, optional - trade off between sparsity and M0 prior - - sparsity_param : float, optional - trade off between optimizer and sparseness (see graph_lasso) + balance_param : float, optional (default=0.5) + Trade off between sparsity and M0 prior. - prior : None, string or numpy array, optional (default=None) - Prior to set for the metric. Possible options are - 'identity', 'covariance', 'random', and a numpy array of - shape (n_features, n_features). For SDML, the prior should be strictly - positive definite (PD). If `None`, will be set - automatically to 'identity' (this is to raise a warning if - `prior` is not set, and stays to its default value (None), in v0.5.0). + sparsity_param : float, optional (default=0.01) + Trade off between optimizer and sparseness (see graph_lasso). - 'identity' - An identity matrix of shape (n_features, n_features). + prior : string or numpy array, optional (default='identity') + Prior to set for the metric. Possible options are + 'identity', 'covariance', 'random', and a numpy array of + shape (n_features, n_features). For SDML, the prior should be strictly + positive definite (PD). - 'covariance' - The inverse covariance matrix. + 'identity' + An identity matrix of shape (n_features, n_features). - 'random' - The prior will be a random positive definite (PD) matrix of shape - `(n_features, n_features)`, generated using - `sklearn.datasets.make_spd_matrix`. + 'covariance' + The inverse covariance matrix. - numpy array - A positive definite (PD) matrix of shape - (n_features, n_features), that will be used as such to set the - prior. + 'random' + The prior will be a random positive definite (PD) matrix of shape + `(n_features, n_features)`, generated using + `sklearn.datasets.make_spd_matrix`. - use_cov : Not used. - .. deprecated:: 0.5.0 - `A0` was deprecated in version 0.5.0 and will - be removed in 0.6.0. Use 'prior' instead. + numpy array + A positive definite (PD) matrix of shape + (n_features, n_features), that will be used as such to set the + prior. verbose : bool, optional (default=False) - if True, prints information while learning + If True, prints information while learning. preprocessor : array-like, shape=(n_samples, n_features) or callable - The preprocessor to call to get tuples from indices. If array-like, - tuples will be gotten like this: X[indices]. + The preprocessor to call to get tuples from indices. If array-like, + tuples will be gotten like this: X[indices]. random_state : int or numpy.RandomState or None, optional (default=None) - A pseudo random number generator object or a seed for it if int. If - ``prior='random'``, ``random_state`` is used to set the prior. + A pseudo random number generator object or a seed for it if int. If + ``prior='random'``, ``random_state`` is used to set the prior. Attributes ---------- components_ : `numpy.ndarray`, shape=(n_features, n_features) - The linear transformation ``L`` deduced from the learned Mahalanobis - metric (See function `components_from_metric`.) + The linear transformation ``L`` deduced from the learned Mahalanobis + metric (See function `components_from_metric`.) threshold_ : `float` - If the distance metric between two points is lower than this threshold, - points will be classified as similar, otherwise they will be - classified as dissimilar. + If the distance metric between two points is lower than this threshold, + points will be classified as similar, otherwise they will be + classified as dissimilar. Examples -------- @@ -205,19 +187,17 @@ class SDML(_BaseSDML, _PairsClassifierMixin): >>> iris_data = load_iris() >>> X = iris_data['data'] >>> Y = iris_data['target'] - >>> sdml = SDML_Supervised(num_constraints=200) + >>> sdml = SDML_Supervised(n_constraints=200) >>> sdml.fit(X, Y) References ---------- + .. [1] Qi et al. `An efficient sparse metric learning in high-dimensional + space via L1-penalized log-determinant regularization + `_. + ICML 2009. - .. [1] Qi et al. - An efficient sparse metric learning in high-dimensional space via - L1-penalized log-determinant regularization. ICML 2009. - http://lms.comp.nus.edu.sg/sites/default/files/publication\ --attachments/icml09-guojun.pdf - - .. [2] Adapted from https://gist.github.com/kcarnold/5439945 + .. [2] Code adapted from https://gist.github.com/kcarnold/5439945 """ def fit(self, pairs, y, calibration_params=None): @@ -230,20 +210,22 @@ def fit(self, pairs, y, calibration_params=None): ---------- pairs : array-like, shape=(n_constraints, 2, n_features) or \ (n_constraints, 2) - 3D Array of pairs with each row corresponding to two points, - or 2D array of indices of pairs if the metric learner uses a - preprocessor. + 3D Array of pairs with each row corresponding to two points, + or 2D array of indices of pairs if the metric learner uses a + preprocessor. + y : array-like, of shape (n_constraints,) - Labels of constraints. Should be -1 for dissimilar pair, 1 for similar. + Labels of constraints. Should be -1 for dissimilar pair, 1 for similar. + calibration_params : `dict` or `None` - Dictionary of parameters to give to `calibrate_threshold` for the - threshold calibration step done at the end of `fit`. If `None` is - given, `calibrate_threshold` will use the default parameters. + Dictionary of parameters to give to `calibrate_threshold` for the + threshold calibration step done at the end of `fit`. If `None` is + given, `calibrate_threshold` will use the default parameters. Returns ------- self : object - Returns the instance. + Returns the instance. """ calibration_params = (calibration_params if calibration_params is not None else dict()) @@ -263,60 +245,57 @@ class SDML_Supervised(_BaseSDML, TransformerMixin): Parameters ---------- balance_param : float, optional (default=0.5) - trade off between sparsity and M0 prior + Trade off between sparsity and M0 prior. + sparsity_param : float, optional (default=0.01) - trade off between optimizer and sparseness (see graph_lasso) - prior : None, string or numpy array, optional (default=None) - Prior to set for the metric. Possible options are - 'identity', 'covariance', 'random', and a numpy array of - shape (n_features, n_features). For SDML, the prior should be strictly - positive definite (PD). If `None`, will be set - automatically to 'identity' (this is to raise a warning if - `prior` is not set, and stays to its default value (None), in v0.5.0). - - 'identity' - An identity matrix of shape (n_features, n_features). - - 'covariance' - The inverse covariance matrix. - - 'random' - The prior will be a random SPD matrix of shape - `(n_features, n_features)`, generated using - `sklearn.datasets.make_spd_matrix`. - - numpy array - A positive definite (PD) matrix of shape - (n_features, n_features), that will be used as such to set the - prior. - - use_cov : Not used. - .. deprecated:: 0.5.0 - `A0` was deprecated in version 0.5.0 and will - be removed in 0.6.0. Use 'prior' instead. - - num_labeled : Not used - .. deprecated:: 0.5.0 - `num_labeled` was deprecated in version 0.5.0 and will - be removed in 0.6.0. - num_constraints : int, optional (default=None) - number of constraints to generate + Trade off between optimizer and sparseness (see graph_lasso). + + prior : string or numpy array, optional (default='identity') + Prior to set for the metric. Possible options are + 'identity', 'covariance', 'random', and a numpy array of + shape (n_features, n_features). For SDML, the prior should be strictly + positive definite (PD). + + 'identity' + An identity matrix of shape (n_features, n_features). + + 'covariance' + The inverse covariance matrix. + + 'random' + The prior will be a random SPD matrix of shape + `(n_features, n_features)`, generated using + `sklearn.datasets.make_spd_matrix`. + + numpy array + A positive definite (PD) matrix of shape + (n_features, n_features), that will be used as such to set the + prior. + + n_constraints : int, optional (default=None) + Number of constraints to generate. If None, defaults to `20 * + num_classes**2`. + verbose : bool, optional (default=False) - if True, prints information while learning + If True, prints information while learning. + preprocessor : array-like, shape=(n_samples, n_features) or callable - The preprocessor to call to get tuples from indices. If array-like, - tuples will be formed like this: X[indices]. + The preprocessor to call to get tuples from indices. If array-like, + tuples will be formed like this: X[indices]. + random_state : int or numpy.RandomState or None, optional (default=None) - A pseudo random number generator object or a seed for it if int. If - ``init='random'``, ``random_state`` is used to set the random - prior. In any case, `random_state` is also used to randomly sample - constraints from labels. + A pseudo random number generator object or a seed for it if int. If + ``init='random'``, ``random_state`` is used to set the random + prior. In any case, `random_state` is also used to randomly sample + constraints from labels. + + num_constraints : Renamed to n_constraints. Will be deprecated in 0.7.0 Attributes ---------- components_ : `numpy.ndarray`, shape=(n_features, n_features) - The linear transformation ``L`` deduced from the learned Mahalanobis - metric (See function `components_from_metric`.) + The linear transformation ``L`` deduced from the learned Mahalanobis + metric (See function `components_from_metric`.) See Also -------- @@ -325,61 +304,48 @@ class SDML_Supervised(_BaseSDML, TransformerMixin): that describes the supervised version of weakly supervised estimators. """ - def __init__(self, balance_param=0.5, sparsity_param=0.01, prior=None, - use_cov='deprecated', num_labeled='deprecated', - num_constraints=None, verbose=False, preprocessor=None, - random_state=None): + def __init__(self, balance_param=0.5, sparsity_param=0.01, prior='identity', + n_constraints=None, verbose=False, preprocessor=None, + random_state=None, num_constraints='deprecated'): _BaseSDML.__init__(self, balance_param=balance_param, sparsity_param=sparsity_param, prior=prior, - use_cov=use_cov, verbose=verbose, + verbose=verbose, preprocessor=preprocessor, random_state=random_state) - self.num_labeled = num_labeled - self.num_constraints = num_constraints + if num_constraints != 'deprecated': + warnings.warn('"num_constraints" parameter has been renamed to' + ' "n_constraints". It has been deprecated in' + ' version 0.6.3 and will be removed in 0.7.0' + '', FutureWarning) + self.n_constraints = num_constraints + else: + self.n_constraints = n_constraints + # Avoid test get_params from failing (all params passed sholud be set) + self.num_constraints = 'deprecated' - def fit(self, X, y, random_state='deprecated'): + def fit(self, X, y): """Create constraints from labels and learn the SDML model. Parameters ---------- X : array-like, shape (n, d) - data matrix, where each row corresponds to a single instance + data matrix, where each row corresponds to a single instance + y : array-like, shape (n,) - data labels, one for each instance - random_state : Not used - .. deprecated:: 0.5.0 - `random_state` in the `fit` function was deprecated in version 0.5.0 - and will be removed in 0.6.0. Set `random_state` at initialization - instead (when instantiating a new `SDML_Supervised` object). + data labels, one for each instance Returns ------- self : object - Returns the instance. + Returns the instance. """ - if self.num_labeled != 'deprecated': - warnings.warn('"num_labeled" parameter is not used.' - ' It has been deprecated in version 0.5.0 and will be' - ' removed in 0.6.0', DeprecationWarning) - if random_state != 'deprecated': - warnings.warn('"random_state" parameter in the `fit` function is ' - 'deprecated. Set `random_state` at initialization ' - 'instead (when instantiating a new `SDML_Supervised` ' - 'object).', DeprecationWarning) - else: - warnings.warn('As of v0.5.0, `SDML_Supervised` now uses the ' - '`random_state` given at initialization to sample ' - 'constraints, not the default `np.random` from the `fit` ' - 'method, since this argument is now deprecated. ' - 'This warning will disappear in v0.6.0.', - ChangedBehaviorWarning) X, y = self._prepare_inputs(X, y, ensure_min_samples=2) - num_constraints = self.num_constraints - if num_constraints is None: + n_constraints = self.n_constraints + if n_constraints is None: num_classes = len(np.unique(y)) - num_constraints = 20 * num_classes**2 + n_constraints = 20 * num_classes**2 c = Constraints(y) - pos_neg = c.positive_negative_pairs(num_constraints, + pos_neg = c.positive_negative_pairs(n_constraints, random_state=self.random_state) pairs, y = wrap_pairs(X, pos_neg) return _BaseSDML._fit(self, pairs, y) diff --git a/metric_learn/sklearn_shims.py b/metric_learn/sklearn_shims.py new file mode 100644 index 00000000..8d746890 --- /dev/null +++ b/metric_learn/sklearn_shims.py @@ -0,0 +1,25 @@ +"""This file is for fixing imports due to different APIs +depending on the scikit-learn version""" +import sklearn +from packaging import version +SKLEARN_AT_LEAST_0_22 = (version.parse(sklearn.__version__) + >= version.parse('0.22.0')) +if SKLEARN_AT_LEAST_0_22: + from sklearn.utils._testing import (set_random_state, + ignore_warnings, + assert_allclose_dense_sparse, + _get_args) + from sklearn.utils.estimator_checks import (_is_public_parameter + as is_public_parameter) + from sklearn.metrics._scorer import get_scorer +else: + from sklearn.utils.testing import (set_random_state, + ignore_warnings, + assert_allclose_dense_sparse, + _get_args) + from sklearn.utils.estimator_checks import is_public_parameter + from sklearn.metrics.scorer import get_scorer + +__all__ = ['set_random_state', 'set_random_state', + 'ignore_warnings', 'assert_allclose_dense_sparse', '_get_args', + 'is_public_parameter', 'get_scorer'] diff --git a/pytest.ini b/pytest.ini new file mode 100644 index 00000000..ef3c8acb --- /dev/null +++ b/pytest.ini @@ -0,0 +1,4 @@ +[pytest] +markers = + integration: mark a test as integration + unit: mark a test as unit \ No newline at end of file diff --git a/setup.cfg b/setup.cfg index 8d95aa1e..bc7695e3 100644 --- a/setup.cfg +++ b/setup.cfg @@ -2,4 +2,6 @@ universal = 1 [metadata] -description-file = README.rst \ No newline at end of file +description-file = README.rst +license_files = + LICENSE.txt diff --git a/setup.py b/setup.py index dfb20fc0..23392077 100755 --- a/setup.py +++ b/setup.py @@ -3,6 +3,32 @@ from setuptools import setup import os import io +import sys + + +CURRENT_PYTHON = sys.version_info[:2] +REQUIRED_PYTHON = (3, 6) + +# This check and everything above must remain compatible with Python 2.7. +if CURRENT_PYTHON < REQUIRED_PYTHON: + sys.stderr.write(""" +========================== +Unsupported Python version +========================== +This version of metric-learn requires Python {}.{}, but you're trying to +install it on Python {}.{}. +This may be because you are using a version of pip that doesn't +understand the python_requires classifier. Make sure you +have pip >= 9.0 and setuptools >= 24.2, then try again: + $ python -m pip install --upgrade pip setuptools + $ python -m pip install django +This will install the latest version of metric-learn which works on your +version of Python. If you can't upgrade your pip (or Python), request +an older version of metric-learn: + $ python -m pip install "metric-learn<0.6.0" +""".format(*(REQUIRED_PYTHON + CURRENT_PYTHON))) + sys.exit(1) + version = {} with io.open(os.path.join('metric_learn', '_version.py')) as fp: @@ -16,27 +42,34 @@ version=version['__version__'], description='Python implementations of metric learning algorithms', long_description=long_description, - author=['CJ Carey', 'Yuan Tang'], + python_requires='>={}.{}'.format(*REQUIRED_PYTHON), + author=[ + 'CJ Carey', + 'Yuan Tang', + 'William de Vazelhes', + 'Aurélien Bellet', + 'Nathalie Vauquier' + ], author_email='ccarey@cs.umass.edu', - url='http://github.com/metric-learn/metric-learn', + url='http://github.com/scikit-learn-contrib/metric-learn', license='MIT', classifiers=[ 'Development Status :: 4 - Beta', 'License :: OSI Approved :: MIT License', - 'Programming Language :: Python', + 'Programming Language :: Python :: 3', 'Operating System :: OS Independent', 'Intended Audience :: Science/Research', 'Topic :: Scientific/Engineering' ], packages=['metric_learn'], install_requires=[ - 'numpy', - 'scipy', - 'scikit-learn', - 'six' + 'numpy>= 1.11.0', + 'scipy>= 0.17.0', + 'scikit-learn>=0.21.3', ], extras_require=dict( - docs=['sphinx', 'shinx_rtd_theme', 'numpydoc'], + docs=['sphinx', 'sphinx_rtd_theme', 'numpydoc', 'sphinx-gallery', + 'matplotlib'], demo=['matplotlib'], sdml=['skggm>=0.2.9'] ), diff --git a/test/metric_learn_test.py b/test/metric_learn_test.py index 00314ad0..d457b52d 100644 --- a/test/metric_learn_test.py +++ b/test/metric_learn_test.py @@ -1,40 +1,41 @@ +import warnings import unittest import re import pytest import numpy as np import scipy from scipy.optimize import check_grad, approx_fprime -from six.moves import xrange from sklearn.metrics import pairwise_distances, euclidean_distances from sklearn.datasets import (load_iris, make_classification, make_regression, make_spd_matrix) from numpy.testing import (assert_array_almost_equal, assert_array_equal, assert_allclose) -from sklearn.utils.testing import assert_warns_message -from sklearn.exceptions import ConvergenceWarning, ChangedBehaviorWarning +from sklearn.exceptions import ConvergenceWarning from sklearn.utils.validation import check_X_y +from sklearn.preprocessing import StandardScaler try: from inverse_covariance import quic + assert quic except ImportError: HAS_SKGGM = False else: HAS_SKGGM = True from metric_learn import (LMNN, NCA, LFDA, Covariance, MLKR, MMC, - LSML_Supervised, ITML_Supervised, SDML_Supervised, - RCA_Supervised, MMC_Supervised, SDML, RCA, ITML, - LSML) + SCML_Supervised, LSML_Supervised, + ITML_Supervised, SDML_Supervised, RCA_Supervised, + MMC_Supervised, SDML, RCA, ITML, SCML) # Import this specially for testing. -from metric_learn.constraints import wrap_pairs +from metric_learn.constraints import wrap_pairs, Constraints from metric_learn.lmnn import _sum_outer_products def class_separation(X, labels): unique_labels, label_inds = np.unique(labels, return_inverse=True) ratio = 0 - for li in xrange(len(unique_labels)): - Xc = X[label_inds==li] - Xnc = X[label_inds!=li] - ratio += pairwise_distances(Xc).mean() / pairwise_distances(Xc,Xnc).mean() + for li in range(len(unique_labels)): + Xc = X[label_inds == li] + Xnc = X[label_inds != li] + ratio += pairwise_distances(Xc).mean() / pairwise_distances(Xc, Xnc).mean() return ratio / len(unique_labels) @@ -75,168 +76,272 @@ def test_singular_returns_pseudo_inverse(self): pseudo_inverse) +class TestSCML(object): + @pytest.mark.parametrize('basis', ('lda', 'triplet_diffs')) + def test_iris(self, basis): + """ + SCML applied to Iris dataset should give better results when + computing class separation. + """ + X, y = load_iris(return_X_y=True) + before = class_separation(X, y) + scml = SCML_Supervised(basis=basis, n_basis=85, k_genuine=7, k_impostor=5, + random_state=42) + scml.fit(X, y) + after = class_separation(scml.transform(X), y) + assert before > after + 0.03 # It's better by a margin of 0.03 + + def test_big_n_features(self): + X, y = make_classification(n_samples=100, n_classes=3, n_features=60, + n_informative=60, n_redundant=0, n_repeated=0, + random_state=42) + X = StandardScaler().fit_transform(X) + scml = SCML_Supervised(random_state=42, n_basis=399) + scml.fit(X, y) + csep = class_separation(scml.transform(X), y) + assert csep < 0.7 + + @pytest.mark.parametrize(('estimator', 'data'), + [(SCML, (np.ones((3, 3, 3)),)), + (SCML_Supervised, (np.array([[0, 0], [0, 1], + [2, 0], [2, 1]]), + np.array([1, 0, 1, 0])))]) + def test_bad_basis(self, estimator, data): + model = estimator(basis='bad_basis', n_basis=33) # n_basis doesn't matter + msg = ("`basis` must be one of the options '{}' or an array of shape " + "(n_basis, n_features)." + .format("', '".join(model._authorized_basis))) + with pytest.raises(ValueError) as raised_error: + model.fit(*data) + assert msg == raised_error.value.args[0] + + def test_dimension_reduction_msg(self): + scml = SCML(n_basis=2) + triplets = np.array([[[0, 1], [2, 1], [0, 0]], + [[2, 1], [0, 1], [2, 0]], + [[0, 0], [2, 0], [0, 1]], + [[2, 0], [0, 0], [2, 1]]]) + msg = ("The number of bases with nonzero weight is less than the " + "number of features of the input, in consequence the " + "learned transformation reduces the dimension to 1.") + with pytest.warns(UserWarning) as raised_warning: + scml.fit(triplets) + assert msg == raised_warning[0].message.args[0] + + @pytest.mark.parametrize(('estimator', 'data'), + [(SCML, (np.array([[[0, 1], [2, 1], [0, 0]], + [[2, 1], [0, 1], [2, 0]], + [[0, 0], [2, 0], [0, 1]], + [[2, 0], [0, 0], [2, 1]]]),)), + (SCML_Supervised, (np.array([[0, 0], [1, 1], + [3, 3]]), + np.array([1, 2, 3])))]) + def test_n_basis_wrong_type(self, estimator, data): + n_basis = 4.0 + model = estimator(n_basis=n_basis) + msg = ("n_basis should be an integer, instead it is of type %s" + % type(n_basis)) + with pytest.raises(ValueError) as raised_error: + model.fit(*data) + assert msg == raised_error.value.args[0] + + def test_small_n_basis_lda(self): + X = np.array([[0, 0], [1, 1], [2, 2], [3, 3]]) + y = np.array([0, 0, 1, 1]) + + n_class = 2 + scml = SCML_Supervised(n_basis=n_class-1) + msg = ("The number of basis is less than the number of classes, which may" + " lead to poor discriminative performance.") + with pytest.warns(UserWarning) as raised_warning: + scml.fit(X, y) + assert msg == raised_warning[0].message.args[0] + + def test_big_n_basis_lda(self): + X = np.array([[0, 0], [1, 1], [3, 3]]) + y = np.array([1, 2, 3]) + + n_class = 3 + num_eig = min(n_class - 1, X.shape[1]) + n_basis = X.shape[0] * 2 * num_eig + + scml = SCML_Supervised(n_basis=n_basis) + msg = ("Not enough samples to generate %d LDA bases, n_basis" + "should be smaller than %d" % + (n_basis, n_basis)) + with pytest.raises(ValueError) as raised_error: + scml.fit(X, y) + assert msg == raised_error.value.args[0] + + @pytest.mark.parametrize(('estimator', 'data'), + [(SCML, (np.random.rand(3, 3, 2),)), + (SCML_Supervised, (np.array([[0, 0], [0, 1], + [2, 0], [2, 1]]), + np.array([1, 0, 1, 0])))]) + def test_array_basis(self, estimator, data): + """ Test that the proper error is raised when the shape of the input basis + array is not consistent with the input + """ + basis = np.eye(3) + scml = estimator(n_basis=3, basis=basis) + + msg = ('The dimensionality ({}) of the provided bases must match the ' + 'dimensionality of the data ({}).' + .format(basis.shape[1], data[0].shape[-1])) + with pytest.raises(ValueError) as raised_error: + scml.fit(*data) + assert msg == raised_error.value.args[0] + + @pytest.mark.parametrize(('estimator', 'data'), + [(SCML, (np.array([[0, 1, 2], [0, 1, 3], [1, 0, 2], + [1, 0, 3], [2, 3, 1], [2, 3, 0], + [3, 2, 1], [3, 2, 0]]),)), + (SCML_Supervised, (np.array([0, 1, 2, 3]), + np.array([0, 0, 1, 1])))]) + def test_verbose(self, estimator, data, capsys): + # assert there is proper output when verbose = True + model = estimator(preprocessor=np.array([[0, 0], [1, 1], [2, 2], [3, 3]]), + max_iter=1, output_iter=1, batch_size=1, + basis='triplet_diffs', random_state=42, verbose=True) + model.fit(*data) + out, _ = capsys.readouterr() + expected_out = ('[%s] iter 1\t obj 0.569946\t num_imp 2\n' + 'max iteration reached.\n' % estimator.__name__) + assert out == expected_out + + def test_triplet_diffs_toy(self): + expected_n_basis = 10 + model = SCML_Supervised(n_basis=expected_n_basis) + X = np.array([[0, 0], [1, 1], [2, 2], [3, 3]]) + triplets = np.array([[0, 1, 2], [0, 1, 3], [1, 0, 2], [1, 0, 3], + [2, 3, 1], [2, 3, 0], [3, 2, 1], [3, 2, 0]]) + basis, n_basis = model._generate_bases_dist_diff(triplets, X) + # All points are along the same line, so the only possible basis will be + # the vector along that line normalized. + expected_basis = np.ones((expected_n_basis, 2))/np.sqrt(2) + assert n_basis == expected_n_basis + np.testing.assert_allclose(basis, expected_basis) + + def test_lda_toy(self): + expected_n_basis = 7 + model = SCML_Supervised(n_basis=expected_n_basis) + X = np.array([[0, 0], [1, 1], [2, 2], [3, 3]]) + y = np.array([0, 0, 1, 1]) + basis, n_basis = model._generate_bases_LDA(X, y) + # All points are along the same line, so the only possible basis will be + # the vector along that line normalized. In this case it is possible to + # obtain it with positive or negative orientations. + expected_basis = np.ones((expected_n_basis, 2))/np.sqrt(2) + assert n_basis == expected_n_basis + np.testing.assert_allclose(np.abs(basis), expected_basis) + + @pytest.mark.parametrize('n_samples', [100, 500]) + @pytest.mark.parametrize('n_features', [10, 50, 100]) + @pytest.mark.parametrize('n_classes', [5, 10, 15]) + def test_triplet_diffs(self, n_samples, n_features, n_classes): + """ + Test that the correct value of n_basis is being generated with + different triplet constraints. + """ + X, y = make_classification(n_samples=n_samples, n_classes=n_classes, + n_features=n_features, n_informative=n_features, + n_redundant=0, n_repeated=0) + X = StandardScaler().fit_transform(X) + model = SCML_Supervised(n_basis=None) # Explicit n_basis=None + constraints = Constraints(y) + triplets = constraints.generate_knntriplets(X, model.k_genuine, + model.k_impostor) + + msg = "As no value for `n_basis` was selected, " + with pytest.warns(UserWarning) as raised_warning: + basis, n_basis = model._generate_bases_dist_diff(triplets, X) + assert msg in str(raised_warning[0].message) + + expected_n_basis = n_features * 80 + assert n_basis == expected_n_basis + assert basis.shape == (expected_n_basis, n_features) + + @pytest.mark.parametrize('n_samples', [100, 500]) + @pytest.mark.parametrize('n_features', [10, 50, 100]) + @pytest.mark.parametrize('n_classes', [5, 10, 15]) + def test_lda(self, n_samples, n_features, n_classes): + """ + Test that when n_basis=None, the correct n_basis is generated, + for SCML_Supervised and different values of n_samples, n_features + and n_classes. + """ + X, y = make_classification(n_samples=n_samples, n_classes=n_classes, + n_features=n_features, n_informative=n_features, + n_redundant=0, n_repeated=0) + X = StandardScaler().fit_transform(X) + + msg = "As no value for `n_basis` was selected, " + with pytest.warns(UserWarning) as raised_warning: + model = SCML_Supervised(n_basis=None) # Explicit n_basis=None + basis, n_basis = model._generate_bases_LDA(X, y) + assert msg in str(raised_warning[0].message) + + num_eig = min(n_classes - 1, n_features) + expected_n_basis = min(20 * n_features, n_samples * 2 * num_eig - 1) + assert n_basis == expected_n_basis + assert basis.shape == (expected_n_basis, n_features) + + @pytest.mark.parametrize('name', ['max_iter', 'output_iter', 'batch_size', + 'n_basis']) + def test_int_inputs(self, name): + value = 1.0 + d = {name: value} + scml = SCML(**d) + triplets = np.array([[[0, 1], [2, 1], [0, 0]]]) + + msg = ("%s should be an integer, instead it is of type" + " %s" % (name, type(value))) + with pytest.raises(ValueError) as raised_error: + scml.fit(triplets) + assert msg == raised_error.value.args[0] + + @pytest.mark.parametrize('name', ['max_iter', 'output_iter', 'batch_size', + 'k_genuine', 'k_impostor', 'n_basis']) + def test_int_inputs_supervised(self, name): + value = 1.0 + d = {name: value} + scml = SCML_Supervised(**d) + X = np.array([[0, 0], [1, 1], [3, 3], [4, 4]]) + y = np.array([1, 1, 0, 0]) + msg = ("%s should be an integer, instead it is of type" + " %s" % (name, type(value))) + with pytest.raises(ValueError) as raised_error: + scml.fit(X, y) + assert msg == raised_error.value.args[0] + + def test_large_output_iter(self): + scml = SCML(max_iter=1, output_iter=2, n_basis=33) # n_basis don't matter + triplets = np.array([[[0, 1], [2, 1], [0, 0]]]) + msg = ("The value of output_iter must be equal or smaller than" + " max_iter.") + + with pytest.raises(ValueError) as raised_error: + scml.fit(triplets) + assert msg == raised_error.value.args[0] + + class TestLSML(MetricTestCase): def test_iris(self): - lsml = LSML_Supervised(num_constraints=200) + lsml = LSML_Supervised(n_constraints=200) lsml.fit(self.iris_points, self.iris_labels) csep = class_separation(lsml.transform(self.iris_points), self.iris_labels) self.assertLess(csep, 0.8) # it's pretty terrible - def test_deprecation_num_labeled(self): - # test that a deprecation message is thrown if num_labeled is set at - # initialization - # TODO: remove in v.0.6 - X = np.array([[0, 0], [0, 1], [2, 0], [2, 1]]) - y = np.array([1, 0, 1, 0]) - lsml_supervised = LSML_Supervised(num_labeled=np.inf) - msg = ('"num_labeled" parameter is not used.' - ' It has been deprecated in version 0.5.0 and will be' - ' removed in 0.6.0') - assert_warns_message(DeprecationWarning, msg, lsml_supervised.fit, X, y) - - def test_changed_behaviour_warning(self): - # test that a ChangedBehavior warning is thrown about the init, if the - # default parameters are used. - # TODO: remove in v.0.6 - X = np.array([[0, 0], [0, 1], [2, 0], [2, 1]]) - y = np.array([1, 0, 1, 0]) - lsml_supervised = LSML_Supervised() - msg = ("Warning, no prior was set (`prior=None`). As of version 0.5.0, " - "the default prior will now be set to " - "'identity', instead of 'covariance'. If you still want to use " - "the inverse of the covariance matrix as a prior, " - "set prior='covariance'. This warning will disappear in " - "v0.6.0, and `prior` parameter's default value will be set to " - "'identity'.") - with pytest.warns(ChangedBehaviorWarning) as raised_warning: - lsml_supervised.fit(X, y) - assert any(msg == str(wrn.message) for wrn in raised_warning) - - pairs = np.array([[[-10., 0.], [10., 0.], [-5., 3.], [5., 0.]], - [[0., 50.], [0., -60], [-10., 0.], [10., 0.]]]) - lsml = LSML() - with pytest.warns(ChangedBehaviorWarning) as raised_warning: - lsml.fit(pairs) - assert any(msg == str(wrn.message) for wrn in raised_warning) - - def test_deprecation_random_state(self): - # test that a deprecation message is thrown if random_state is set at - # fit time - # TODO: remove in v.0.6 - X = np.array([[0, 0], [0, 1], [2, 0], [2, 1]]) - y = np.array([1, 0, 1, 0]) - lsml_supervised = LSML_Supervised() - msg = ('"random_state" parameter in the `fit` function is ' - 'deprecated. Set `random_state` at initialization ' - 'instead (when instantiating a new `LSML_Supervised` ' - 'object).') - with pytest.warns(DeprecationWarning) as raised_warning: - lsml_supervised.fit(X, y, random_state=np.random) - assert any(msg == str(wrn.message) for wrn in raised_warning) - - def test_changed_behaviour_warning_random_state(self): - # test that a ChangedBehavior warning is thrown if the random_state is - # not set in fit. - # TODO: remove in v.0.6 - X = np.array([[0, 0], [0, 1], [2, 0], [2, 1]]) - y = np.array([1, 0, 1, 0]) - lsml_supervised = LSML_Supervised() - msg = ('As of v0.5.0, `LSML_Supervised` now uses the ' - '`random_state` given at initialization to sample ' - 'constraints, not the default `np.random` from the `fit` ' - 'method, since this argument is now deprecated. ' - 'This warning will disappear in v0.6.0.') - with pytest.warns(ChangedBehaviorWarning) as raised_warning: - lsml_supervised.fit(X, y) - assert any(msg == str(wrn.message) for wrn in raised_warning) - class TestITML(MetricTestCase): def test_iris(self): - itml = ITML_Supervised(num_constraints=200) + itml = ITML_Supervised(n_constraints=200) itml.fit(self.iris_points, self.iris_labels) csep = class_separation(itml.transform(self.iris_points), self.iris_labels) self.assertLess(csep, 0.2) - def test_deprecation_num_labeled(self): - # test that a deprecation message is thrown if num_labeled is set at - # initialization - # TODO: remove in v.0.6 - X = np.array([[0, 0], [0, 1], [2, 0], [2, 1]]) - y = np.array([1, 0, 1, 0]) - itml_supervised = ITML_Supervised(num_labeled=np.inf) - msg = ('"num_labeled" parameter is not used.' - ' It has been deprecated in version 0.5.0 and will be' - ' removed in 0.6.0') - assert_warns_message(DeprecationWarning, msg, itml_supervised.fit, X, y) - - def test_deprecation_bounds(self): - # test that a deprecation message is thrown if bounds is set at - # initialization - # TODO: remove in v.0.6 - X = np.array([[0, 0], [0, 1], [2, 0], [2, 1]]) - y = np.array([1, 0, 1, 0]) - itml_supervised = ITML_Supervised(bounds=None) - msg = ('"bounds" parameter from initialization is not used.' - ' It has been deprecated in version 0.5.0 and will be' - ' removed in 0.6.0. Use the "bounds" parameter of this ' - 'fit method instead.') - assert_warns_message(DeprecationWarning, msg, itml_supervised.fit, X, y) - - def test_deprecation_A0(self): - # test that a deprecation message is thrown if A0 is set at - # initialization - # TODO: remove in v.0.6 - X = np.array([[0, 0], [0, 1], [2, 0], [2, 1]]) - y = np.array([1, 0, 1, 0]) - itml_supervised = ITML_Supervised(A0=np.ones_like(X)) - msg = ('"A0" parameter is not used.' - ' It has been deprecated in version 0.5.0 and will be' - 'removed in 0.6.0. Use "prior" instead.') - with pytest.warns(DeprecationWarning) as raised_warning: - itml_supervised.fit(X, y) - assert any(msg == str(wrn.message) for wrn in raised_warning) - - pairs = np.array([[[-10., 0.], [10., 0.]], [[0., 50.], [0., -60]]]) - y_pairs = [1, -1] - itml = ITML(A0=np.ones_like(X)) - with pytest.warns(DeprecationWarning) as raised_warning: - itml.fit(pairs, y_pairs) - assert any(msg == str(wrn.message) for wrn in raised_warning) - - def test_deprecation_random_state(self): - # test that a deprecation message is thrown if random_state is set at - # fit time - # TODO: remove in v.0.6 - X = np.array([[0, 0], [0, 1], [2, 0], [2, 1]]) - y = np.array([1, 0, 1, 0]) - itml_supervised = ITML_Supervised() - msg = ('"random_state" parameter in the `fit` function is ' - 'deprecated. Set `random_state` at initialization ' - 'instead (when instantiating a new `ITML_Supervised` ' - 'object).') - with pytest.warns(DeprecationWarning) as raised_warning: - itml_supervised.fit(X, y, random_state=np.random) - assert any(msg == str(wrn.message) for wrn in raised_warning) - - def test_changed_behaviour_warning_random_state(self): - # test that a ChangedBehavior warning is thrown if the random_state is - # not set in fit. - # TODO: remove in v.0.6 - X = np.array([[0, 0], [0, 1], [2, 0], [2, 1]]) - y = np.array([1, 0, 1, 0]) - itml_supervised = ITML_Supervised() - msg = ('As of v0.5.0, `ITML_Supervised` now uses the ' - '`random_state` given at initialization to sample ' - 'constraints, not the default `np.random` from the `fit` ' - 'method, since this argument is now deprecated. ' - 'This warning will disappear in v0.6.0.') - with pytest.warns(ChangedBehaviorWarning) as raised_warning: - itml_supervised.fit(X, y) - assert any(msg == str(wrn.message) for wrn in raised_warning) - @pytest.mark.parametrize('bounds', [None, (20., 100.), [20., 100.], np.array([20., 100.]), @@ -277,7 +382,7 @@ def test_bounds_parameters_invalid(bounds): class TestLMNN(MetricTestCase): def test_iris(self): - lmnn = LMNN(k=5, learn_rate=1e-6, verbose=False) + lmnn = LMNN(n_neighbors=5, learn_rate=1e-6, verbose=False) lmnn.fit(self.iris_points, self.iris_labels) csep = class_separation(lmnn.transform(self.iris_points), @@ -294,7 +399,7 @@ def test_loss_grad_lbfgs(self): L = rng.randn(rng.randint(1, X.shape[1] + 1), X.shape[1]) lmnn = LMNN() - k = lmnn.k + k = lmnn.n_neighbors reg = lmnn.regularization X, y = lmnn._prepare_inputs(X, y, dtype=float, @@ -327,35 +432,6 @@ def grad(x): np.linalg.norm(approx_fprime(L.ravel(), fun, epsilon))) np.testing.assert_almost_equal(rel_diff, 0., decimal=5) - def test_changed_behaviour_warning(self): - # test that a ChangedBehavior warning is thrown about the init, if the - # default parameters are used. - # TODO: remove in v.0.6 - X = np.array([[0, 0], [0, 1], [2, 0], [2, 1]]) - y = np.array([1, 0, 1, 0]) - lmnn = LMNN(k=2) - msg = ("Warning, no init was set (`init=None`). As of version 0.5.0, " - "the default init will now be set to 'auto', instead of the " - "previous identity matrix. If you still want to use the identity " - "matrix as before, set init='identity'. This warning " - "will disappear in v0.6.0, and `init` parameter's default value " - "will be set to 'auto'.") - with pytest.warns(ChangedBehaviorWarning) as raised_warning: - lmnn.fit(X, y) - assert any(msg == str(wrn.message) for wrn in raised_warning) - - def test_deprecation_use_pca(self): - # test that a DeprecationWarning is thrown about use_pca, if the - # default parameters are used. - # TODO: remove in v.0.6 - X = np.array([[0, 0], [0, 1], [2, 0], [2, 1]]) - y = np.array([1, 0, 1, 0]) - lmnn = LMNN(k=2, use_pca=True) - msg = ('"use_pca" parameter is not used.' - ' It has been deprecated in version 0.5.0 and will be' - ' removed in 0.6.0.') - assert_warns_message(DeprecationWarning, msg, lmnn.fit, X, y) - def test_loss_func(capsys): """Test the loss function (and its gradient) on a simple example, @@ -384,15 +460,15 @@ def loss_fn(L, X, y, target_neighbors, reg): for j in target_neighbors[i]: loss += (1 - reg) * np.sum((Lx[i] - Lx[j]) ** 2) grad += (1 - reg) * np.outer(Lx[i] - Lx[j], X[i] - X[j]) - for l in range(X.shape[0]): - if y[i] != y[l]: + for k in range(X.shape[0]): + if y[i] != y[k]: hin, active = hinge(1 + np.sum((Lx[i] - Lx[j])**2) - - np.sum((Lx[i] - Lx[l])**2)) + np.sum((Lx[i] - Lx[k])**2)) total_active += active if active: loss += reg * hin grad += (reg * (np.outer(Lx[i] - Lx[j], X[i] - X[j]) - - np.outer(Lx[i] - Lx[l], X[i] - X[l]))) + np.outer(Lx[i] - Lx[k], X[i] - X[k]))) grad = 2 * grad return grad, loss, total_active @@ -434,7 +510,7 @@ def __init__(self, callback, *args, **kwargs): def _loss_grad(self, *args, **kwargs): grad, objective, total_active = ( - super(LMNN_with_callback, self)._loss_grad(*args, **kwargs)) + super(LMNN_with_callback, self)._loss_grad(*args, **kwargs)) self.callback.append(grad) return grad, objective, total_active @@ -463,18 +539,18 @@ def _loss_grad(self, X, L, dfG, k, reg, target_neighbors, label_inds): out, _ = capsys.readouterr() lines = re.split("\n+", out) # we get every variable that is printed from the algorithm in verbose - num = '(-?\d+.?\d*(e[+|-]\d+)?)' - strings = [re.search("\d+ (?:{}) (?:{}) (?:(\d+)) (?:{})" + num = r'(-?\d+.?\d*(e[+|-]\d+)?)' + strings = [re.search(r"\d+ (?:{}) (?:{}) (?:(\d+)) (?:{})" .format(num, num, num), s) for s in lines] objectives[name] = [float(match.group(1)) for match in strings if match is not None] obj_diffs[name] = [float(match.group(3)) for match in strings if match is - not None] + not None] total_active[name] = [float(match.group(5)) for match in strings if match is not None] learn_rate[name] = [float(match.group(6)) for match in strings if match is - not None] + not None] assert len(strings) >= 10 # we ensure that we actually did more than 10 # iterations assert total_active[name][0] >= 2 # we ensure that we have some active @@ -499,9 +575,9 @@ def _loss_grad(self, X, L, dfG, k, reg, target_neighbors, label_inds): def test_toy_ex_lmnn(X, y, loss): """Test that the loss give the right result on a toy example""" L = np.array([[1]]) - lmnn = LMNN(k=1, regularization=0.5) + lmnn = LMNN(n_neighbors=1, regularization=0.5) - k = lmnn.k + k = lmnn.n_neighbors reg = lmnn.regularization X, y = lmnn._prepare_inputs(X, y, dtype=float, @@ -512,17 +588,15 @@ def test_toy_ex_lmnn(X, y, loss): lmnn.components_ = np.eye(n_components) target_neighbors = lmnn._select_targets(X, label_inds) - impostors = lmnn._find_impostors(target_neighbors[:, -1], X, label_inds, L) # sum outer products dfG = _sum_outer_products(X, target_neighbors.flatten(), np.repeat(np.arange(X.shape[0]), k)) - df = np.zeros_like(dfG) # storage - a1 = [None]*k - a2 = [None]*k - for nn_idx in xrange(k): + a1 = [None] * k + a2 = [None] * k + for nn_idx in range(k): a1[nn_idx] = np.array([]) a2[nn_idx] = np.array([]) @@ -530,9 +604,10 @@ def test_toy_ex_lmnn(X, y, loss): assert lmnn._loss_grad(X, L.reshape(-1, X.shape[1]), dfG, k, reg, target_neighbors, label_inds)[1] == loss + def test_convergence_simple_example(capsys): # LMNN should converge on this simple example, which it did not with - # this issue: https://github.com/metric-learn/metric-learn/issues/88 + # this issue: https://github.com/scikit-learn-contrib/metric-learn/issues/88 X, y = make_classification(random_state=0) lmnn = LMNN(verbose=True) lmnn.fit(X, y) @@ -542,7 +617,7 @@ def test_convergence_simple_example(capsys): def test_no_twice_same_objective(capsys): # test that the objective function never has twice the same value - # see https://github.com/metric-learn/metric-learn/issues/88 + # see https://github.com/scikit-learn-contrib/metric-learn/issues/88 X, y = make_classification(random_state=0) lmnn = LMNN(verbose=True) lmnn.fit(X, y) @@ -553,7 +628,7 @@ def test_no_twice_same_objective(capsys): # number), and which is followed by a (signed) float (delta obj). It # matches for instance: # 3 **1113.7665747189938** -3.182774197440267 46431.0200999999999998e-06 - objectives = [re.search("\d* (?:(\d*.\d*))[ | -]\d*.\d*", s) + objectives = [re.search(r"\d* (?:(\d*.\d*))[ | -]\d*.\d*", s) for s in lines] objectives = [match.group(1) for match in objectives if match is not None] # we remove the last element because it can be equal to the penultimate @@ -574,8 +649,7 @@ def test_sdml_supervised_raises_warning_msg_not_installed_skggm(self): # load_iris: dataset where we know scikit-learn's graphical lasso fails # with a Floating Point error X, y = load_iris(return_X_y=True) - sdml_supervised = SDML_Supervised(balance_param=0.5, use_cov=True, - sparsity_param=0.01) + sdml_supervised = SDML_Supervised(balance_param=0.5, sparsity_param=0.01) msg = ("There was a problem in SDML when using scikit-learn's graphical " "lasso solver. skggm's graphical lasso can sometimes converge on " "non SPD cases where scikit-learn's graphical lasso fails to " @@ -661,12 +735,12 @@ def test_raises_no_warning_installed_skggm(self): pairs = np.array([[[-10., 0.], [10., 0.]], [[0., -55.], [0., -60]]]) y_pairs = [1, -1] X, y = make_classification(random_state=42) - with pytest.warns(None) as records: + with warnings.catch_warnings(record=True) as records: sdml = SDML(prior='covariance') sdml.fit(pairs, y_pairs) for record in records: assert record.category is not ConvergenceWarning - with pytest.warns(None) as records: + with warnings.catch_warnings(record=True) as records: sdml_supervised = SDML_Supervised(prior='identity', balance_param=1e-5) sdml_supervised.fit(X, y) for record in records: @@ -677,25 +751,13 @@ def test_iris(self): # TODO: un-flake it! rs = np.random.RandomState(5555) - sdml = SDML_Supervised(num_constraints=1500, prior='identity', - balance_param=5e-5) - sdml.fit(self.iris_points, self.iris_labels, random_state=rs) + sdml = SDML_Supervised(n_constraints=1500, prior='identity', + balance_param=5e-5, random_state=rs) + sdml.fit(self.iris_points, self.iris_labels) csep = class_separation(sdml.transform(self.iris_points), self.iris_labels) self.assertLess(csep, 0.22) - def test_deprecation_num_labeled(self): - # test that a deprecation message is thrown if num_labeled is set at - # initialization - # TODO: remove in v.0.6 - X, y = make_classification(random_state=42) - sdml_supervised = SDML_Supervised(num_labeled=np.inf, prior='identity', - balance_param=5e-5) - msg = ('"num_labeled" parameter is not used.' - ' It has been deprecated in version 0.5.0 and will be' - ' removed in 0.6.0') - assert_warns_message(DeprecationWarning, msg, sdml_supervised.fit, X, y) - def test_sdml_raises_warning_non_psd(self): """Tests that SDML raises a warning on a toy example where we know the pseudo-covariance matrix is not PSD""" @@ -738,83 +800,6 @@ def test_sdml_works_on_non_spd_pb_with_skggm(self): random_state=np.random.RandomState(42)) sdml.fit(X, y) - def test_deprecation_use_cov(self): - # test that a deprecation message is thrown if use_cov is set at - # initialization - # TODO: remove in v.0.6 - X = np.array([[0, 0], [0, 1], [2, 0], [2, 1]]) - y = np.array([1, 0, 1, 0]) - sdml_supervised = SDML_Supervised(use_cov=np.ones_like(X), - balance_param=1e-5) - msg = ('"use_cov" parameter is not used.' - ' It has been deprecated in version 0.5.0 and will be' - 'removed in 0.6.0. Use "prior" instead.') - with pytest.warns(DeprecationWarning) as raised_warning: - sdml_supervised.fit(X, y) - assert any(msg == str(wrn.message) for wrn in raised_warning) - - pairs = np.array([[[-10., 0.], [10., 0.]], [[0., 50.], [0., -60]]]) - y_pairs = [1, -1] - sdml = SDML(use_cov=np.ones_like(X), balance_param=1e-5) - with pytest.warns(DeprecationWarning) as raised_warning: - sdml.fit(pairs, y_pairs) - assert any(msg == str(wrn.message) for wrn in raised_warning) - - def test_changed_behaviour_warning(self): - # test that a ChangedBehavior warning is thrown about the init, if the - # default parameters are used (except for the balance_param that we need - # to set for the algorithm to not diverge) - # TODO: remove in v.0.6 - X = np.array([[0, 0], [0, 1], [2, 0], [2, 1]]) - y = np.array([1, 0, 1, 0]) - sdml_supervised = SDML_Supervised(balance_param=1e-5) - msg = ("Warning, no prior was set (`prior=None`). As of version 0.5.0, " - "the default prior will now be set to " - "'identity', instead of 'covariance'. If you still want to use " - "the inverse of the covariance matrix as a prior, " - "set prior='covariance'. This warning will disappear in " - "v0.6.0, and `prior` parameter's default value will be set to " - "'identity'.") - with pytest.warns(ChangedBehaviorWarning) as raised_warning: - sdml_supervised.fit(X, y) - assert any(msg == str(wrn.message) for wrn in raised_warning) - - pairs = np.array([[[-10., 0.], [10., 0.]], [[0., 50.], [0., -60]]]) - y_pairs = [1, -1] - sdml = SDML(balance_param=1e-5) - with pytest.warns(ChangedBehaviorWarning) as raised_warning: - sdml.fit(pairs, y_pairs) - assert any(msg == str(wrn.message) for wrn in raised_warning) - - def test_deprecation_random_state(self): - # test that a deprecation message is thrown if random_state is set at - # fit time - # TODO: remove in v.0.6 - X, y = load_iris(return_X_y=True) - sdml_supervised = SDML_Supervised(balance_param=5e-5) - msg = ('"random_state" parameter in the `fit` function is ' - 'deprecated. Set `random_state` at initialization ' - 'instead (when instantiating a new `SDML_Supervised` ' - 'object).') - with pytest.warns(DeprecationWarning) as raised_warning: - sdml_supervised.fit(X, y, random_state=np.random) - assert any(msg == str(wrn.message) for wrn in raised_warning) - - def test_changed_behaviour_warning_random_state(self): - # test that a ChangedBehavior warning is thrown if the random_state is - # not set in fit. - # TODO: remove in v.0.6 - X, y = load_iris(return_X_y=True) - sdml_supervised = SDML_Supervised(balance_param=5e-5) - msg = ('As of v0.5.0, `SDML_Supervised` now uses the ' - '`random_state` given at initialization to sample ' - 'constraints, not the default `np.random` from the `fit` ' - 'method, since this argument is now deprecated. ' - 'This warning will disappear in v0.6.0.') - with pytest.warns(ChangedBehaviorWarning) as raised_warning: - sdml_supervised.fit(X, y) - assert any(msg == str(wrn.message) for wrn in raised_warning) - @pytest.mark.skipif(not HAS_SKGGM, reason='The message should be printed only if skggm is ' @@ -964,7 +949,7 @@ def test_singleton_class(self): X = X[[ind_0[0], ind_1[0], ind_2[0]]] y = y[[ind_0[0], ind_1[0], ind_2[0]]] - A = make_spd_matrix(X.shape[1], X.shape[1]) + A = make_spd_matrix(n_dim=X.shape[1], random_state=X.shape[1]) nca = NCA(init=A, max_iter=30, n_components=X.shape[1]) nca.fit(X, y) assert_array_equal(nca.components_, A) @@ -975,45 +960,11 @@ def test_one_class(self): X = self.iris_points[self.iris_labels == 0] y = self.iris_labels[self.iris_labels == 0] - A = make_spd_matrix(X.shape[1], X.shape[1]) + A = make_spd_matrix(n_dim=X.shape[1], random_state=X.shape[1]) nca = NCA(init=A, max_iter=30, n_components=X.shape[1]) nca.fit(X, y) assert_array_equal(nca.components_, A) - def test_changed_behaviour_warning(self): - # test that a ChangedBehavior warning is thrown about the init, if the - # default parameters are used. - # TODO: remove in v.0.6 - X = np.array([[0, 0], [0, 1], [2, 0], [2, 1]]) - y = np.array([1, 0, 1, 0]) - nca = NCA() - msg = ("Warning, no init was set (`init=None`). As of version 0.5.0, " - "the default init will now be set to 'auto', instead of the " - "previous scaling matrix. If you still want to use the same " - "scaling matrix as before, set " - "init=np.eye(X.shape[1])/(np.maximum(X.max(axis=0)-X.min(axis=0)" - ", EPS))). This warning will disappear in v0.6.0, and `init` " - "parameter's default value will be set to 'auto'.") - with pytest.warns(ChangedBehaviorWarning) as raised_warning: - nca.fit(X, y) - assert any(msg == str(wrn.message) for wrn in raised_warning) - - -@pytest.mark.parametrize('num_dims', [None, 2]) -def test_deprecation_num_dims_nca(num_dims): - # test that a deprecation message is thrown if num_dims is set at - # initialization - # TODO: remove in v.0.6 - X = np.array([[0, 0], [0, 1], [2, 0], [2, 1]]) - y = np.array([1, 0, 1, 0]) - nca = NCA(num_dims=num_dims) - msg = ('"num_dims" parameter is not used.' - ' It has been deprecated in version 0.5.0 and will be' - ' removed in 0.6.0. Use "n_components" instead') - with pytest.warns(DeprecationWarning) as raised_warning: - nca.fit(X, y) - assert (str(raised_warning[0].message) == msg) - class TestLFDA(MetricTestCase): def test_iris(self): @@ -1027,69 +978,13 @@ def test_iris(self): self.assertEqual(lfda.components_.shape, (2, 4)) -@pytest.mark.parametrize('num_dims', [None, 2]) -def test_deprecation_num_dims_lfda(num_dims): - # test that a deprecation message is thrown if num_dims is set at - # initialization - # TODO: remove in v.0.6 - X = np.array([[0, 0], [0, 1], [2, 0], [2, 1]]) - y = np.array([1, 0, 1, 0]) - lfda = LFDA(num_dims=num_dims) - msg = ('"num_dims" parameter is not used.' - ' It has been deprecated in version 0.5.0 and will be' - ' removed in 0.6.0. Use "n_components" instead') - with pytest.warns(DeprecationWarning) as raised_warning: - lfda.fit(X, y) - assert (str(raised_warning[0].message) == msg) - - class TestRCA(MetricTestCase): def test_iris(self): - rca = RCA_Supervised(n_components=2, num_chunks=30, chunk_size=2) + rca = RCA_Supervised(n_components=2, n_chunks=30, chunk_size=2) rca.fit(self.iris_points, self.iris_labels) csep = class_separation(rca.transform(self.iris_points), self.iris_labels) self.assertLess(csep, 0.29) - def test_deprecation_pca_comps(self): - # test that a deprecation message is thrown if pca_comps is set at - # initialization - # TODO: remove in v.0.6 - X, y = make_classification(random_state=42, n_samples=100) - rca_supervised = RCA_Supervised(pca_comps=X.shape[1], num_chunks=20) - msg = ('"pca_comps" parameter is not used. ' - 'It has been deprecated in version 0.5.0 and will be' - 'removed in 0.6.0. RCA will not do PCA preprocessing anymore. If ' - 'you still want to do it, you could use ' - '`sklearn.decomposition.PCA` and an `sklearn.pipeline.Pipeline`.') - with pytest.warns(ChangedBehaviorWarning) as expected_msg: - rca_supervised.fit(X, y) - assert any(str(w.message) == msg for w in expected_msg) - - rca = RCA(pca_comps=X.shape[1]) - with pytest.warns(ChangedBehaviorWarning) as expected_msg: - rca.fit(X, y) - assert any(str(w.message) == msg for w in expected_msg) - - def test_changedbehaviorwarning_preprocessing(self): - # test that a ChangedBehaviorWarning is thrown when using RCA - # TODO: remove in v.0.6 - - msg = ("RCA will no longer center the data before training. If you want " - "to do some preprocessing, you should do it manually (you can also " - "use an `sklearn.pipeline.Pipeline` for instance). This warning " - "will disappear in version 0.6.0.") - - X, y = make_classification(random_state=42, n_samples=100) - rca_supervised = RCA_Supervised(num_chunks=20) - with pytest.warns(ChangedBehaviorWarning) as expected_msg: - rca_supervised.fit(X, y) - assert any(str(w.message) == msg for w in expected_msg) - - rca = RCA() - with pytest.warns(ChangedBehaviorWarning) as expected_msg: - rca.fit(X, y) - assert any(str(w.message) == msg for w in expected_msg) - def test_rank_deficient_returns_warning(self): """Checks that if the covariance matrix is not invertible, we raise a warning message advising to use PCA""" @@ -1100,65 +995,49 @@ def test_rank_deficient_returns_warning(self): rca = RCA() msg = ('The inner covariance matrix is not invertible, ' 'so the transformation matrix may contain Nan values. ' - 'You should reduce the dimensionality of your input,' + 'You should remove any linearly dependent features and/or ' + 'reduce the dimensionality of your input, ' 'for instance using `sklearn.decomposition.PCA` as a ' 'preprocessing step.') - with pytest.warns(None) as raised_warnings: + + with warnings.catch_warnings(record=True) as raised_warnings: rca.fit(X, y) assert any(str(w.message) == msg for w in raised_warnings) - def test_deprecation_random_state(self): - # test that a deprecation message is thrown if random_state is set at - # fit time - # TODO: remove in v.0.6 - X, y = make_classification(random_state=42, n_samples=100) - rca_supervised = RCA_Supervised(num_chunks=20) - msg = ('"random_state" parameter in the `fit` function is ' - 'deprecated. Set `random_state` at initialization ' - 'instead (when instantiating a new `RCA_Supervised` ' - 'object).') - with pytest.warns(DeprecationWarning) as raised_warning: - rca_supervised.fit(X, y, random_state=np.random) - assert any(msg == str(wrn.message) for wrn in raised_warning) - - def test_changed_behaviour_warning_random_state(self): - # test that a ChangedBehavior warning is thrown if the random_state is - # not set in fit. - # TODO: remove in v.0.6 - X, y = make_classification(random_state=42, n_samples=100) - rca_supervised = RCA_Supervised(num_chunks=20) - msg = ('As of v0.5.0, `RCA_Supervised` now uses the ' - '`random_state` given at initialization to sample ' - 'constraints, not the default `np.random` from the `fit` ' - 'method, since this argument is now deprecated. ' - 'This warning will disappear in v0.6.0.') - with pytest.warns(ChangedBehaviorWarning) as raised_warning: - rca_supervised.fit(X, y) - assert any(msg == str(wrn.message) for wrn in raised_warning) - - -@pytest.mark.parametrize('num_dims', [None, 2]) -def test_deprecation_num_dims_rca(num_dims): - # test that a deprecation message is thrown if num_dims is set at - # initialization - # TODO: remove in v.0.6 - X, y = load_iris(return_X_y=True) - rca = RCA(num_dims=num_dims) - msg = ('"num_dims" parameter is not used.' - ' It has been deprecated in version 0.5.0 and will be' - ' removed in 0.6.0. Use "n_components" instead') - with pytest.warns(DeprecationWarning) as raised_warning: - rca.fit(X, y) - assert any(str(w.message) == msg for w in raised_warning) - - # we take a small number of chunks so that RCA works on iris - rca_supervised = RCA_Supervised(num_dims=num_dims, num_chunks=10) - msg = ('"num_dims" parameter is not used.' - ' It has been deprecated in version 0.5.0 and will be' - ' removed in 0.6.0. Use "n_components" instead') - with pytest.warns(DeprecationWarning) as raised_warning: - rca_supervised.fit(X, y) - assert any(str(w.message) == msg for w in raised_warning) + def test_unknown_labels(self): + n = 200 + n_chunks = 50 + X, y = make_classification(random_state=42, n_samples=2 * n, + n_features=6, n_informative=6, n_redundant=0) + y2 = np.concatenate((y[:n], -np.ones(n))) + + rca = RCA_Supervised(n_chunks=n_chunks, random_state=42) + rca.fit(X[:n], y[:n]) + + rca2 = RCA_Supervised(n_chunks=n_chunks, random_state=42) + rca2.fit(X, y2) + + assert not np.any(np.isnan(rca.components_)) + assert not np.any(np.isnan(rca2.components_)) + + np.testing.assert_array_equal(rca.components_, rca2.components_) + + def test_bad_parameters(self): + n = 200 + n_chunks = 3 + X, y = make_classification(random_state=42, n_samples=n, + n_features=6, n_informative=6, n_redundant=0) + + rca = RCA_Supervised(n_chunks=n_chunks, random_state=42) + msg = ('Due to the parameters of RCA_Supervised, ' + 'the inner covariance matrix is not invertible, ' + 'so the transformation matrix will contain Nan values. ' + 'Increase the number or size of the chunks to correct ' + 'this problem.' + ) + with warnings.catch_warnings(record=True) as raised_warning: + rca.fit(X, y) + assert any(str(w.message) == msg for w in raised_warning) class TestMLKR(MetricTestCase): @@ -1191,65 +1070,19 @@ def grad_fn(M): rel_diff = check_grad(fun, grad_fn, M.ravel()) / np.linalg.norm(grad_fn(M)) np.testing.assert_almost_equal(rel_diff, 0.) - def test_deprecation_A0(self): - # test that a deprecation message is thrown if A0 is set at - # initialization - # TODO: remove in v.0.6 - X = np.array([[0, 0], [0, 1], [2, 0], [2, 1]]) - y = np.array([1, 0, 1, 0]) - mlkr = MLKR(A0=np.ones_like(X)) - msg = ('"A0" parameter is not used.' - ' It has been deprecated in version 0.5.0 and will be' - 'removed in 0.6.0. Use "init" instead.') - with pytest.warns(DeprecationWarning) as raised_warning: - mlkr.fit(X, y) - assert any(msg == str(wrn.message) for wrn in raised_warning) - - def test_changed_behaviour_warning(self): - # test that a ChangedBehavior warning is thrown about the init, if the - # default parameters are used. - # TODO: remove in v.0.6 - X = np.array([[0, 0], [0, 1], [2, 0], [2, 1]]) - y = np.array([0.1, 0.2, 0.3, 0.4]) - mlkr = MLKR() - msg = ("Warning, no init was set (`init=None`). As of version 0.5.0, " - "the default init will now be set to 'auto', instead of 'pca'. " - "If you still want to use PCA as an init, set init='pca'. " - "This warning will disappear in v0.6.0, and `init` parameter's" - " default value will be set to 'auto'.") - with pytest.warns(ChangedBehaviorWarning) as raised_warning: - mlkr.fit(X, y) - assert any(msg == str(wrn.message) for wrn in raised_warning) - - -@pytest.mark.parametrize('num_dims', [None, 2]) -def test_deprecation_num_dims_mlkr(num_dims): - # test that a deprecation message is thrown if num_dims is set at - # initialization - # TODO: remove in v.0.6 - X = np.array([[0, 0], [0, 1], [2, 0], [2, 1]]) - y = np.array([1, 0, 1, 0]) - mlkr = MLKR(num_dims=num_dims) - msg = ('"num_dims" parameter is not used.' - ' It has been deprecated in version 0.5.0 and will be' - ' removed in 0.6.0. Use "n_components" instead') - with pytest.warns(DeprecationWarning) as raised_warning: - mlkr.fit(X, y) - assert (str(raised_warning[0].message) == msg) - class TestMMC(MetricTestCase): def test_iris(self): - # Generate full set of constraints for comparison with reference implementation - n = self.iris_points.shape[0] - mask = (self.iris_labels[None] == self.iris_labels[:,None]) + # Generate full set of constraints for comparison with reference + # implementation + mask = self.iris_labels[None] == self.iris_labels[:, None] a, b = np.nonzero(np.triu(mask, k=1)) c, d = np.nonzero(np.triu(~mask, k=1)) # Full metric n_features = self.iris_points.shape[1] - mmc = MMC(convergence_threshold=0.01, init=np.eye(n_features) / 10) + mmc = MMC(tol=0.01, init=np.eye(n_features) / 10) mmc.fit(*wrap_pairs(self.iris_points, [a, b, c, d])) expected = [[+0.000514, +0.000868, -0.001195, -0.001703], [+0.000868, +0.001468, -0.002021, -0.002879], @@ -1260,7 +1093,7 @@ def test_iris(self): # Diagonal metric mmc = MMC(diagonal=True) - mmc.fit(*wrap_pairs(self.iris_points, [a,b,c,d])) + mmc.fit(*wrap_pairs(self.iris_points, [a, b, c, d])) expected = [0, 0, 1.210220, 1.228596] assert_array_almost_equal(np.diag(expected), mmc.get_mahalanobis_matrix(), decimal=6) @@ -1270,103 +1103,13 @@ def test_iris(self): mmc.fit(self.iris_points, self.iris_labels) csep = class_separation(mmc.transform(self.iris_points), self.iris_labels) self.assertLess(csep, 0.15) - + # Supervised Diagonal mmc = MMC_Supervised(diagonal=True) mmc.fit(self.iris_points, self.iris_labels) csep = class_separation(mmc.transform(self.iris_points), self.iris_labels) self.assertLess(csep, 0.2) - def test_deprecation_num_labeled(self): - # test that a deprecation message is thrown if num_labeled is set at - # initialization - # TODO: remove in v.0.6 - X = np.array([[0, 0], [0, 1], [2, 0], [2, 1]]) - y = np.array([1, 0, 1, 0]) - mmc_supervised = MMC_Supervised(num_labeled=np.inf) - msg = ('"num_labeled" parameter is not used.' - ' It has been deprecated in version 0.5.0 and will be' - ' removed in 0.6.0') - assert_warns_message(DeprecationWarning, msg, mmc_supervised.fit, X, y) - - def test_deprecation_A0(self): - # test that a deprecation message is thrown if A0 is set at - # initialization - # TODO: remove in v.0.6 - X = np.array([[0, 0], [0, 1], [2, 0], [2, 1]]) - y = np.array([1, 0, 1, 0]) - mmc_supervised = MMC_Supervised(A0=np.ones_like(X)) - msg = ('"A0" parameter is not used.' - ' It has been deprecated in version 0.5.0 and will be' - 'removed in 0.6.0. Use "init" instead.') - with pytest.warns(DeprecationWarning) as raised_warning: - mmc_supervised.fit(X, y) - assert any(msg == str(wrn.message) for wrn in raised_warning) - - pairs = np.array([[[-10., 0.], [10., 0.]], [[0., 50.], [0., -60]]]) - y_pairs = [1, -1] - mmc = MMC(A0=np.ones_like(X)) - with pytest.warns(DeprecationWarning) as raised_warning: - mmc.fit(pairs, y_pairs) - assert any(msg == str(wrn.message) for wrn in raised_warning) - - def test_changed_behaviour_warning(self): - # test that a ChangedBehavior warning is thrown about the init, if the - # default parameters are used. - # TODO: remove in v.0.6 - X = np.array([[0, 0], [0, 1], [2, 0], [2, 1]]) - y = np.array([1, 0, 1, 0]) - mmc_supervised = MMC_Supervised() - msg = ("Warning, no init was set (`init=None`). As of version 0.5.0, " - "the default init will now be set to 'identity', instead of the " - "identity divided by a scaling factor of 10. " - "If you still want to use the same init as in previous " - "versions, set init=np.eye(d)/10, where d is the dimension " - "of your input space (d=pairs.shape[1]). " - "This warning will disappear in v0.6.0, and `init` parameter's" - " default value will be set to 'auto'.") - with pytest.warns(ChangedBehaviorWarning) as raised_warning: - mmc_supervised.fit(X, y) - assert any(msg == str(wrn.message) for wrn in raised_warning) - - pairs = np.array([[[-10., 0.], [10., 0.]], [[0., 50.], [0., -60]]]) - y_pairs = [1, -1] - mmc = MMC() - with pytest.warns(ChangedBehaviorWarning) as raised_warning: - mmc.fit(pairs, y_pairs) - assert any(msg == str(wrn.message) for wrn in raised_warning) - - def test_deprecation_random_state(self): - # test that a deprecation message is thrown if random_state is set at - # fit time - # TODO: remove in v.0.6 - X = np.array([[0, 0], [0, 1], [2, 0], [2, 1]]) - y = np.array([1, 0, 1, 0]) - mmc_supervised = MMC_Supervised() - msg = ('"random_state" parameter in the `fit` function is ' - 'deprecated. Set `random_state` at initialization ' - 'instead (when instantiating a new `MMC_Supervised` ' - 'object).') - with pytest.warns(DeprecationWarning) as raised_warning: - mmc_supervised.fit(X, y, random_state=np.random) - assert any(msg == str(wrn.message) for wrn in raised_warning) - - def test_changed_behaviour_warning_random_state(self): - # test that a ChangedBehavior warning is thrown if the random_state is - # not set in fit. - # TODO: remove in v.0.6 - X = np.array([[0, 0], [0, 1], [2, 0], [2, 1]]) - y = np.array([1, 0, 1, 0]) - mmc_supervised = MMC_Supervised() - msg = ('As of v0.5.0, `MMC_Supervised` now uses the ' - '`random_state` given at initialization to sample ' - 'constraints, not the default `np.random` from the `fit` ' - 'method, since this argument is now deprecated. ' - 'This warning will disappear in v0.6.0.') - with pytest.warns(ChangedBehaviorWarning) as raised_warning: - mmc_supervised.fit(X, y) - assert any(msg == str(wrn.message) for wrn in raised_warning) - @pytest.mark.parametrize(('algo_class', 'dataset'), [(NCA, make_classification()), @@ -1388,10 +1131,10 @@ def test_verbose(algo_class, dataset, capsys): for line in lines[3:-2]: # The following regex will match for instance: # '[NCA] 0 6.988936e+01 0.01' - assert re.match("\[" + algo_class.__name__ + "\]\ *\d+\ *\d\.\d{6}e[+|-]" - "\d+\ *\d+\.\d{2}", line) - assert re.match("\[" + algo_class.__name__ + "\] Training took\ *" - "\d+\.\d{2}s\.", lines[-2]) + assert re.match(r"\[" + algo_class.__name__ + r"\]\ *\d+\ *\d\.\d{6}e[+|-]" + r"\d+\ *\d+\.\d{2}", line) + assert re.match(r"\[" + algo_class.__name__ + r"\] Training took\ *" + r"\d+\.\d{2}s\.", lines[-2]) assert lines[-1] == '' @@ -1415,9 +1158,10 @@ def test_convergence_warning(dataset, algo_class): X, y = dataset model = algo_class(max_iter=2, verbose=True) cls_name = model.__class__.__name__ - assert_warns_message(ConvergenceWarning, - '[{}] {} did not converge'.format(cls_name, cls_name), - model.fit, X, y) + msg = '[{}] {} did not converge'.format(cls_name, cls_name) + with pytest.warns(Warning) as raised_warning: + model.fit(X, y) + assert any([msg in str(warn.message) for warn in raised_warning]) if __name__ == '__main__': diff --git a/test/test_base_metric.py b/test/test_base_metric.py index 725df31a..b1e71020 100644 --- a/test/test_base_metric.py +++ b/test/test_base_metric.py @@ -1,126 +1,167 @@ +from numpy.core.numeric import array_equal +import warnings import pytest import re import unittest import metric_learn import numpy as np from sklearn import clone -from sklearn.utils.testing import set_random_state -from test.test_utils import ids_metric_learners, metric_learners +from test.test_utils import ids_metric_learners, metric_learners, remove_y +from metric_learn.sklearn_shims import set_random_state, SKLEARN_AT_LEAST_0_22 def remove_spaces(s): - return re.sub('\s+', '', s) + return re.sub(r'\s+', '', s) + + +def sk_repr_kwargs(def_kwargs, nndef_kwargs): + """Given the non-default arguments, and the default + keywords arguments, build the string that will appear + in the __repr__ of the estimator, depending on the + version of scikit-learn. + """ + if SKLEARN_AT_LEAST_0_22: + def_kwargs = {} + def_kwargs.update(nndef_kwargs) + args_str = ",".join(f"{key}={repr(value)}" + for key, value in def_kwargs.items()) + return args_str class TestStringRepr(unittest.TestCase): def test_covariance(self): + def_kwargs = {'preprocessor': None} + nndef_kwargs = {} + merged_kwargs = sk_repr_kwargs(def_kwargs, nndef_kwargs) self.assertEqual(remove_spaces(str(metric_learn.Covariance())), - remove_spaces("Covariance(preprocessor=None)")) + remove_spaces(f"Covariance({merged_kwargs})")) def test_lmnn(self): + def_kwargs = {'convergence_tol': 0.001, 'init': 'auto', 'n_neighbors': 3, + 'learn_rate': 1e-07, 'max_iter': 1000, 'min_iter': 50, + 'n_components': None, 'preprocessor': None, + 'random_state': None, 'regularization': 0.5, + 'verbose': False} + nndef_kwargs = {'convergence_tol': 0.01, 'n_neighbors': 6} + merged_kwargs = sk_repr_kwargs(def_kwargs, nndef_kwargs) self.assertEqual( - remove_spaces(str(metric_learn.LMNN())), - remove_spaces( - "LMNN(convergence_tol=0.001, init=None, k=3, " - "learn_rate=1e-07, " - "max_iter=1000, min_iter=50, n_components=None, " - "num_dims='deprecated', preprocessor=None, random_state=None, " - "regularization=0.5, use_pca='deprecated', verbose=False)")) + remove_spaces(str(metric_learn.LMNN(convergence_tol=0.01, + n_neighbors=6))), + remove_spaces(f"LMNN({merged_kwargs})")) def test_nca(self): - self.assertEqual(remove_spaces(str(metric_learn.NCA())), - remove_spaces("NCA(init=None, max_iter=100," - "n_components=None, " - "num_dims='deprecated', " - "preprocessor=None, random_state=None, " - "tol=None, verbose=False)")) + def_kwargs = {'init': 'auto', 'max_iter': 100, 'n_components': None, + 'preprocessor': None, 'random_state': None, 'tol': None, + 'verbose': False} + nndef_kwargs = {'max_iter': 42} + merged_kwargs = sk_repr_kwargs(def_kwargs, nndef_kwargs) + self.assertEqual(remove_spaces(str(metric_learn.NCA(max_iter=42))), + remove_spaces(f"NCA({merged_kwargs})")) def test_lfda(self): - self.assertEqual(remove_spaces(str(metric_learn.LFDA())), - remove_spaces( - "LFDA(embedding_type='weighted', k=None, " - "n_components=None, num_dims='deprecated'," - "preprocessor=None)")) + def_kwargs = {'embedding_type': 'weighted', 'k': None, + 'n_components': None, 'preprocessor': None} + nndef_kwargs = {'k': 2} + merged_kwargs = sk_repr_kwargs(def_kwargs, nndef_kwargs) + self.assertEqual(remove_spaces(str(metric_learn.LFDA(k=2))), + remove_spaces(f"LFDA({merged_kwargs})")) def test_itml(self): - self.assertEqual(remove_spaces(str(metric_learn.ITML())), - remove_spaces(""" -ITML(A0='deprecated', convergence_threshold=0.001, gamma=1.0, - max_iter=1000, preprocessor=None, prior='identity', random_state=None, - verbose=False) -""")) - self.assertEqual(remove_spaces(str(metric_learn.ITML_Supervised())), - remove_spaces(""" -ITML_Supervised(A0='deprecated', bounds='deprecated', - convergence_threshold=0.001, gamma=1.0, - max_iter=1000, num_constraints=None, num_labeled='deprecated', - preprocessor=None, prior='identity', random_state=None, verbose=False) -""")) + def_kwargs = {'tol': 0.001, 'gamma': 1.0, + 'max_iter': 1000, 'preprocessor': None, + 'prior': 'identity', 'random_state': None, 'verbose': False} + nndef_kwargs = {'gamma': 0.5} + merged_kwargs = sk_repr_kwargs(def_kwargs, nndef_kwargs) + self.assertEqual(remove_spaces(str(metric_learn.ITML(gamma=0.5))), + remove_spaces(f"ITML({merged_kwargs})")) + def_kwargs = {'tol': 0.001, 'gamma': 1.0, + 'max_iter': 1000, 'n_constraints': None, + 'preprocessor': None, 'prior': 'identity', + 'random_state': None, 'verbose': False} + nndef_kwargs = {'n_constraints': 7} + merged_kwargs = sk_repr_kwargs(def_kwargs, nndef_kwargs) + self.assertEqual( + remove_spaces(str(metric_learn.ITML_Supervised(n_constraints=7))), + remove_spaces(f"ITML_Supervised({merged_kwargs})")) def test_lsml(self): - self.assertEqual(remove_spaces(str(metric_learn.LSML())), - remove_spaces(""" -LSML(max_iter=1000, preprocessor=None, prior=None, - random_state=None, tol=0.001, verbose=False) -""")) - self.assertEqual(remove_spaces(str(metric_learn.LSML_Supervised())), - remove_spaces(""" -LSML_Supervised(max_iter=1000, num_constraints=None, - num_labeled='deprecated', preprocessor=None, prior=None, - random_state=None, tol=0.001, verbose=False, weights=None) -""")) + def_kwargs = {'max_iter': 1000, 'preprocessor': None, 'prior': 'identity', + 'random_state': None, 'tol': 0.001, 'verbose': False} + nndef_kwargs = {'tol': 0.1} + merged_kwargs = sk_repr_kwargs(def_kwargs, nndef_kwargs) + self.assertEqual(remove_spaces(str(metric_learn.LSML(tol=0.1))), + remove_spaces(f"LSML({merged_kwargs})")) + def_kwargs = {'max_iter': 1000, 'n_constraints': None, + 'preprocessor': None, 'prior': 'identity', + 'random_state': None, 'tol': 0.001, 'verbose': False, + 'weights': None} + nndef_kwargs = {'verbose': True} + merged_kwargs = sk_repr_kwargs(def_kwargs, nndef_kwargs) + self.assertEqual( + remove_spaces(str(metric_learn.LSML_Supervised(verbose=True))), + remove_spaces(f"LSML_Supervised({merged_kwargs})")) def test_sdml(self): - self.assertEqual(remove_spaces(str(metric_learn.SDML())), - remove_spaces(""" -SDML(balance_param=0.5, preprocessor=None, prior=None, random_state=None, - sparsity_param=0.01, use_cov='deprecated', verbose=False) -""")) - self.assertEqual(remove_spaces(str(metric_learn.SDML_Supervised())), - remove_spaces(""" -SDML_Supervised(balance_param=0.5, num_constraints=None, - num_labeled='deprecated', preprocessor=None, prior=None, - random_state=None, sparsity_param=0.01, use_cov='deprecated', - verbose=False) -""")) + def_kwargs = {'balance_param': 0.5, 'preprocessor': None, + 'prior': 'identity', 'random_state': None, + 'sparsity_param': 0.01, 'verbose': False} + nndef_kwargs = {'verbose': True} + merged_kwargs = sk_repr_kwargs(def_kwargs, nndef_kwargs) + self.assertEqual(remove_spaces(str(metric_learn.SDML(verbose=True))), + remove_spaces(f"SDML({merged_kwargs})")) + def_kwargs = {'balance_param': 0.5, 'n_constraints': None, + 'preprocessor': None, 'prior': 'identity', + 'random_state': None, 'sparsity_param': 0.01, + 'verbose': False} + nndef_kwargs = {'sparsity_param': 0.5} + merged_kwargs = sk_repr_kwargs(def_kwargs, nndef_kwargs) + self.assertEqual( + remove_spaces(str(metric_learn.SDML_Supervised(sparsity_param=0.5))), + remove_spaces(f"SDML_Supervised({merged_kwargs})")) def test_rca(self): - self.assertEqual(remove_spaces(str(metric_learn.RCA())), - remove_spaces("RCA(n_components=None, " - "num_dims='deprecated', " - "pca_comps='deprecated', " - "preprocessor=None)")) - self.assertEqual(remove_spaces(str(metric_learn.RCA_Supervised())), - remove_spaces( - "RCA_Supervised(chunk_size=2, " - "n_components=None, num_chunks=100, " - "num_dims='deprecated', pca_comps='deprecated', " - "preprocessor=None, random_state=None)")) + def_kwargs = {'n_components': None, 'preprocessor': None} + nndef_kwargs = {'n_components': 3} + merged_kwargs = sk_repr_kwargs(def_kwargs, nndef_kwargs) + self.assertEqual(remove_spaces(str(metric_learn.RCA(n_components=3))), + remove_spaces(f"RCA({merged_kwargs})")) + def_kwargs = {'chunk_size': 2, 'n_components': None, 'n_chunks': 100, + 'preprocessor': None, 'random_state': None} + nndef_kwargs = {'n_chunks': 5} + merged_kwargs = sk_repr_kwargs(def_kwargs, nndef_kwargs) + self.assertEqual( + remove_spaces(str(metric_learn.RCA_Supervised(n_chunks=5))), + remove_spaces(f"RCA_Supervised({merged_kwargs})")) def test_mlkr(self): - self.assertEqual(remove_spaces(str(metric_learn.MLKR())), - remove_spaces("MLKR(A0='deprecated', init=None," - "max_iter=1000, n_components=None," - "num_dims='deprecated', preprocessor=None," - "random_state=None, tol=None, " - "verbose=False)" - )) + def_kwargs = {'init': 'auto', 'max_iter': 1000, + 'n_components': None, 'preprocessor': None, + 'random_state': None, 'tol': None, 'verbose': False} + nndef_kwargs = {'max_iter': 777} + merged_kwargs = sk_repr_kwargs(def_kwargs, nndef_kwargs) + self.assertEqual(remove_spaces(str(metric_learn.MLKR(max_iter=777))), + remove_spaces(f"MLKR({merged_kwargs})")) def test_mmc(self): - self.assertEqual(remove_spaces(str(metric_learn.MMC())), - remove_spaces(""" -MMC(A0='deprecated', convergence_threshold=0.001, diagonal=False, - diagonal_c=1.0, init=None, max_iter=100, max_proj=10000, - preprocessor=None, random_state=None, verbose=False) -""")) - self.assertEqual(remove_spaces(str(metric_learn.MMC_Supervised())), - remove_spaces(""" -MMC_Supervised(A0='deprecated', convergence_threshold=1e-06, diagonal=False, - diagonal_c=1.0, init=None, max_iter=100, max_proj=10000, - num_constraints=None, num_labeled='deprecated', preprocessor=None, - random_state=None, verbose=False) -""")) + def_kwargs = {'tol': 0.001, 'diagonal': False, + 'diagonal_c': 1.0, 'init': 'identity', 'max_iter': 100, + 'max_proj': 10000, 'preprocessor': None, + 'random_state': None, 'verbose': False} + nndef_kwargs = {'diagonal': True} + merged_kwargs = sk_repr_kwargs(def_kwargs, nndef_kwargs) + self.assertEqual(remove_spaces(str(metric_learn.MMC(diagonal=True))), + remove_spaces(f"MMC({merged_kwargs})")) + def_kwargs = {'tol': 1e-06, 'diagonal': False, + 'diagonal_c': 1.0, 'init': 'identity', 'max_iter': 100, + 'max_proj': 10000, 'n_constraints': None, + 'preprocessor': None, 'random_state': None, + 'verbose': False} + nndef_kwargs = {'max_iter': 1} + merged_kwargs = sk_repr_kwargs(def_kwargs, nndef_kwargs) + self.assertEqual( + remove_spaces(str(metric_learn.MMC_Supervised(max_iter=1))), + remove_spaces(f"MMC_Supervised({merged_kwargs})")) @pytest.mark.parametrize('estimator, build_dataset', metric_learners, @@ -135,12 +176,12 @@ def test_get_metric_is_independent_from_metric_learner(estimator, # we fit the metric learner on it and then we compute the metric on some # points - model.fit(input_data, labels) + model.fit(*remove_y(model, input_data, labels)) metric = model.get_metric() score = metric(X[0], X[1]) # then we refit the estimator on another dataset - model.fit(np.sin(input_data), labels) + model.fit(*remove_y(model, np.sin(input_data), labels)) # we recompute the distance between the two points: it should be the same score_bis = metric(X[0], X[1]) @@ -155,7 +196,7 @@ def test_get_metric_raises_error(estimator, build_dataset): input_data, labels, _, X = build_dataset() model = clone(estimator) set_random_state(model) - model.fit(input_data, labels) + model.fit(*remove_y(model, input_data, labels)) metric = model.get_metric() list_test_get_metric_raises = [(X[0].tolist() + [5.2], X[1]), # vectors with @@ -178,7 +219,7 @@ def test_get_metric_works_does_not_raise(estimator, build_dataset): input_data, labels, _, X = build_dataset() model = clone(estimator) set_random_state(model) - model.fit(input_data, labels) + model.fit(*remove_y(model, input_data, labels)) metric = model.get_metric() list_test_get_metric_doesnt_raise = [(X[0], X[1]), @@ -186,7 +227,7 @@ def test_get_metric_works_does_not_raise(estimator, build_dataset): (X[0][None], X[1][None])] for u, v in list_test_get_metric_doesnt_raise: - with pytest.warns(None) as record: + with warnings.catch_warnings(record=True) as record: metric(u, v) assert len(record) == 0 @@ -194,7 +235,7 @@ def test_get_metric_works_does_not_raise(estimator, build_dataset): model.components_ = np.array([3.1]) metric = model.get_metric() for u, v in [(5, 6.7), ([5], [6.7]), ([[5]], [[6.7]])]: - with pytest.warns(None) as record: + with warnings.catch_warnings(record=True) as record: metric(u, v) assert len(record) == 0 @@ -210,20 +251,20 @@ def test_n_components(estimator, build_dataset): if hasattr(model, 'n_components'): set_random_state(model) model.set_params(n_components=None) - model.fit(input_data, labels) + model.fit(*remove_y(model, input_data, labels)) assert model.components_.shape == (X.shape[1], X.shape[1]) model = clone(estimator) set_random_state(model) model.set_params(n_components=X.shape[1] - 1) - model.fit(input_data, labels) + model.fit(*remove_y(model, input_data, labels)) assert model.components_.shape == (X.shape[1] - 1, X.shape[1]) model = clone(estimator) set_random_state(model) model.set_params(n_components=X.shape[1] + 1) with pytest.raises(ValueError) as expected_err: - model.fit(input_data, labels) + model.fit(*remove_y(model, input_data, labels)) assert (str(expected_err.value) == 'Invalid n_components, must be in [1, {}]'.format(X.shape[1])) @@ -231,10 +272,33 @@ def test_n_components(estimator, build_dataset): set_random_state(model) model.set_params(n_components=0) with pytest.raises(ValueError) as expected_err: - model.fit(input_data, labels) + model.fit(*remove_y(model, input_data, labels)) assert (str(expected_err.value) == 'Invalid n_components, must be in [1, {}]'.format(X.shape[1])) +@pytest.mark.parametrize('estimator, build_dataset', metric_learners, + ids=ids_metric_learners) +def test_score_pairs_warning(estimator, build_dataset): + """Tests that score_pairs returns a FutureWarning regarding deprecation. + Also that score_pairs and pair_distance have the same behaviour""" + input_data, labels, _, X = build_dataset() + model = clone(estimator) + set_random_state(model) + + # We fit the metric learner on it and then we call score_pairs on some + # points + model.fit(*remove_y(model, input_data, labels)) + + msg = ("score_pairs will be deprecated in release 0.7.0. " + "Use pair_score to compute similarity scores, or " + "pair_distances to compute distances.") + with pytest.warns(FutureWarning) as raised_warning: + score = model.score_pairs([[X[0], X[1]], ]) + dist = model.pair_distance([[X[0], X[1]], ]) + assert array_equal(score, dist) + assert any([str(warning.message) == msg for warning in raised_warning]) + + if __name__ == '__main__': unittest.main() diff --git a/test/test_components_metric_conversion.py b/test/test_components_metric_conversion.py index c0a7bbd4..c6113957 100644 --- a/test/test_components_metric_conversion.py +++ b/test/test_components_metric_conversion.py @@ -1,11 +1,10 @@ import unittest import numpy as np import pytest -from numpy.linalg import LinAlgError from scipy.stats import ortho_group from sklearn.datasets import load_iris from numpy.testing import assert_array_almost_equal, assert_allclose -from sklearn.utils.testing import ignore_warnings +from metric_learn.sklearn_shims import ignore_warnings from metric_learn import ( LMNN, NCA, LFDA, Covariance, MLKR, @@ -30,27 +29,27 @@ def test_cov(self): def test_lsml_supervised(self): seed = np.random.RandomState(1234) - lsml = LSML_Supervised(num_constraints=200, random_state=seed) + lsml = LSML_Supervised(n_constraints=200, random_state=seed) lsml.fit(self.X, self.y) L = lsml.components_ assert_array_almost_equal(L.T.dot(L), lsml.get_mahalanobis_matrix()) def test_itml_supervised(self): seed = np.random.RandomState(1234) - itml = ITML_Supervised(num_constraints=200) - itml.fit(self.X, self.y, random_state=seed) + itml = ITML_Supervised(n_constraints=200, random_state=seed) + itml.fit(self.X, self.y) L = itml.components_ assert_array_almost_equal(L.T.dot(L), itml.get_mahalanobis_matrix()) def test_lmnn(self): - lmnn = LMNN(k=5, learn_rate=1e-6, verbose=False) + lmnn = LMNN(n_neighbors=5, learn_rate=1e-6, verbose=False) lmnn.fit(self.X, self.y) L = lmnn.components_ assert_array_almost_equal(L.T.dot(L), lmnn.get_mahalanobis_matrix()) def test_sdml_supervised(self): seed = np.random.RandomState(1234) - sdml = SDML_Supervised(num_constraints=1500, prior='identity', + sdml = SDML_Supervised(n_constraints=1500, prior='identity', balance_param=1e-5, random_state=seed) sdml.fit(self.X, self.y) L = sdml.components_ @@ -58,7 +57,7 @@ def test_sdml_supervised(self): def test_nca(self): n = self.X.shape[0] - nca = NCA(max_iter=(100000//n)) + nca = NCA(max_iter=(100000 // n)) nca.fit(self.X, self.y) L = nca.components_ assert_array_almost_equal(L.T.dot(L), nca.get_mahalanobis_matrix()) @@ -70,9 +69,8 @@ def test_lfda(self): assert_array_almost_equal(L.T.dot(L), lfda.get_mahalanobis_matrix()) def test_rca_supervised(self): - seed = np.random.RandomState(1234) - rca = RCA_Supervised(n_components=2, num_chunks=30, chunk_size=2) - rca.fit(self.X, self.y, random_state=seed) + rca = RCA_Supervised(n_components=2, n_chunks=30, chunk_size=2) + rca.fit(self.X, self.y) L = rca.components_ assert_array_almost_equal(L.T.dot(L), rca.get_mahalanobis_matrix()) @@ -94,7 +92,7 @@ def test_components_from_metric_edge_cases(self): # matrix with all its coefficients very low (to check that the algorithm # does not consider it as a diagonal matrix)(non regression test for - # https://github.com/metric-learn/metric-learn/issues/175) + # https://github.com/scikit-learn-contrib/metric-learn/issues/175) M = np.diag([1e-15, 2e-16, 3e-15, 4e-16, 5e-15, 6e-16, 7e-15]) M = P.dot(M).dot(P.T) L = components_from_metric(M) @@ -118,17 +116,14 @@ def test_components_from_metric_edge_cases(self): L = components_from_metric(M) assert_allclose(L.T.dot(L), M) - # matrix with a determinant still high but which should be considered as a - # non-definite matrix (to check we don't test the definiteness with the - # determinant which is a bad strategy) + # matrix with a determinant still high but which is + # undefinite w.r.t to numpy standards M = np.diag([1e5, 1e5, 1e5, 1e5, 1e5, 1e5, 1e-20]) M = P.dot(M).dot(P.T) assert np.abs(np.linalg.det(M)) > 10 assert np.linalg.slogdet(M)[1] > 1 # (just to show that the computed # determinant is far from null) - with pytest.raises(LinAlgError) as err_msg: - np.linalg.cholesky(M) - assert str(err_msg.value) == 'Matrix is not positive definite' + assert np.linalg.matrix_rank(M) < M.shape[0] # (just to show that this case is indeed considered by numpy as an # indefinite case) L = components_from_metric(M) diff --git a/test/test_constraints.py b/test/test_constraints.py new file mode 100644 index 00000000..3429d9cc --- /dev/null +++ b/test/test_constraints.py @@ -0,0 +1,188 @@ +import pytest +import numpy as np +from sklearn.utils import shuffle +from metric_learn.constraints import Constraints +from sklearn.datasets import make_blobs + +SEED = 42 + + +def gen_labels_for_chunks(n_chunks, chunk_size, + n_classes=10, n_unknown_labels=5): + """Generates n_chunks*chunk_size labels that split in n_chunks chunks, + that are homogeneous in the label.""" + assert min(n_chunks, chunk_size) > 0 + classes = shuffle(np.arange(n_classes), random_state=SEED) + n_per_class = chunk_size * (n_chunks // n_classes) + n_maj_class = chunk_size * n_chunks - n_per_class * (n_classes - 1) + + first_labels = classes[0] * np.ones(n_maj_class, dtype=int) + remaining_labels = np.concatenate([k * np.ones(n_per_class, dtype=int) + for k in classes[1:]]) + unknown_labels = -1 * np.ones(n_unknown_labels, dtype=int) + + labels = np.concatenate([first_labels, remaining_labels, unknown_labels]) + return shuffle(labels, random_state=SEED) + + +@pytest.mark.parametrize("n_chunks, chunk_size", [(5, 10), (10, 50)]) +def test_exact_num_points_for_chunks(n_chunks, chunk_size): + """Checks that the chunk generation works well with just enough points.""" + labels = gen_labels_for_chunks(n_chunks, chunk_size) + + constraints = Constraints(labels) + chunks = constraints.chunks(n_chunks=n_chunks, chunk_size=chunk_size, + random_state=SEED) + + chunk_no, size_each_chunk = np.unique(chunks[chunks >= 0], + return_counts=True) + + np.testing.assert_array_equal(size_each_chunk, chunk_size) + assert chunk_no.shape[0] == n_chunks + + +@pytest.mark.parametrize("n_chunks, chunk_size", [(5, 10), (10, 50)]) +def test_chunk_case_one_miss_point(n_chunks, chunk_size): + """Checks that the chunk generation breaks when one point is missing.""" + labels = gen_labels_for_chunks(n_chunks, chunk_size) + + assert len(labels) >= 1 + constraints = Constraints(labels[1:]) + with pytest.raises(ValueError) as e: + constraints.chunks(n_chunks=n_chunks, chunk_size=chunk_size, + random_state=SEED) + + expected_message = (('Not enough possible chunks of %d elements in each' + ' class to form expected %d chunks - maximum number' + ' of chunks is %d' + ) % (chunk_size, n_chunks, n_chunks - 1)) + + assert str(e.value) == expected_message + + +@pytest.mark.parametrize("n_chunks, chunk_size", [(5, 10), (10, 50)]) +def test_unknown_labels_not_in_chunks(n_chunks, chunk_size): + """Checks that unknown labels are not assigned to any chunk.""" + labels = gen_labels_for_chunks(n_chunks, chunk_size) + + constraints = Constraints(labels) + chunks = constraints.chunks(n_chunks=n_chunks, chunk_size=chunk_size, + random_state=SEED) + + assert np.all(chunks[labels < 0] < 0) + + +@pytest.mark.parametrize("k_genuine, k_impostor, T_test", + [(2, 2, + [[0, 1, 3], [0, 1, 4], [0, 2, 3], [0, 2, 4], + [1, 0, 3], [1, 0, 4], [1, 2, 3], [1, 2, 4], + [2, 0, 3], [2, 0, 4], [2, 1, 3], [2, 1, 4], + [3, 4, 1], [3, 4, 2], [3, 5, 1], [3, 5, 2], + [4, 3, 1], [4, 3, 2], [4, 5, 1], [4, 5, 2], + [5, 3, 1], [5, 3, 2], [5, 4, 1], [5, 4, 2]]), + (1, 3, + [[0, 1, 3], [0, 1, 4], [0, 1, 5], [1, 0, 3], + [1, 0, 4], [1, 0, 5], [2, 1, 3], [2, 1, 4], + [2, 1, 5], [3, 4, 0], [3, 4, 1], [3, 4, 2], + [4, 3, 0], [4, 3, 1], [4, 3, 2], [5, 4, 0], + [5, 4, 1], [5, 4, 2]]), + (1, 2, + [[0, 1, 3], [0, 1, 4], [1, 0, 3], [1, 0, 4], + [2, 1, 3], [2, 1, 4], [3, 4, 1], [3, 4, 2], + [4, 3, 1], [4, 3, 2], [5, 4, 1], [5, 4, 2]])]) +def test_generate_knntriplets_under_edge(k_genuine, k_impostor, T_test): + """Checks under the edge cases of knn triplet construction with enough + neighbors""" + + X = np.array([[0, 0], [2, 2], [4, 4], [8, 8], [16, 16], [32, 32], [33, 33]]) + y = np.array([1, 1, 1, 2, 2, 2, -1]) + + T = Constraints(y).generate_knntriplets(X, k_genuine, k_impostor) + + assert np.array_equal(sorted(T.tolist()), T_test) + + +@pytest.mark.parametrize("k_genuine, k_impostor,", + [(3, 3), (2, 4), (3, 4), (10, 9), (144, 33)]) +def test_generate_knntriplets(k_genuine, k_impostor): + """Checks edge and over the edge cases of knn triplet construction with not + enough neighbors""" + + T_test = [[0, 1, 3], [0, 1, 4], [0, 1, 5], [0, 2, 3], [0, 2, 4], [0, 2, 5], + [1, 0, 3], [1, 0, 4], [1, 0, 5], [1, 2, 3], [1, 2, 4], [1, 2, 5], + [2, 0, 3], [2, 0, 4], [2, 0, 5], [2, 1, 3], [2, 1, 4], [2, 1, 5], + [3, 4, 0], [3, 4, 1], [3, 4, 2], [3, 5, 0], [3, 5, 1], [3, 5, 2], + [4, 3, 0], [4, 3, 1], [4, 3, 2], [4, 5, 0], [4, 5, 1], [4, 5, 2], + [5, 3, 0], [5, 3, 1], [5, 3, 2], [5, 4, 0], [5, 4, 1], [5, 4, 2]] + + X = np.array([[0, 0], [2, 2], [4, 4], [8, 8], [16, 16], [32, 32], [33, 33]]) + y = np.array([1, 1, 1, 2, 2, 2, -1]) + + msg1 = ("The class 1 has 3 elements, which is not sufficient to " + f"generate {k_genuine+1} genuine neighbors " + "as specified by k_genuine") + msg2 = ("The class 2 has 3 elements, which is not sufficient to " + f"generate {k_genuine+1} genuine neighbors " + "as specified by k_genuine") + msg3 = ("The class 1 has 3 elements of other classes, which is " + f"not sufficient to generate {k_impostor} impostor " + "neighbors as specified by k_impostor") + msg4 = ("The class 2 has 3 elements of other classes, which is " + f"not sufficient to generate {k_impostor} impostor " + "neighbors as specified by k_impostor") + msgs = [msg1, msg2, msg3, msg4] + with pytest.warns(UserWarning) as user_warning: + T = Constraints(y).generate_knntriplets(X, k_genuine, k_impostor) + assert any([[msg in str(warn.message) for msg in msgs] + for warn in user_warning]) + assert np.array_equal(sorted(T.tolist()), T_test) + + +def test_generate_knntriplets_k_genuine(): + """Checks the correct error raised when k_genuine is too big """ + X, y = shuffle(*make_blobs(random_state=SEED), + random_state=SEED) + + label, labels_count = np.unique(y, return_counts=True) + labels_count_min = np.min(labels_count) + idx_smallest_label, = np.where(labels_count == labels_count_min) + k_genuine = labels_count_min + + warn_msgs = [] + for idx in idx_smallest_label: + warn_msgs.append("The class {} has {} elements, which is not sufficient " + "to generate {} genuine neighbors as specified by " + "k_genuine. Will generate {} genuine neighbors instead." + "\n" + .format(label[idx], k_genuine, k_genuine+1, k_genuine-1)) + + with pytest.warns(UserWarning) as raised_warning: + Constraints(y).generate_knntriplets(X, k_genuine, 1) + for warn in raised_warning: + assert str(warn.message) in warn_msgs + + +def test_generate_knntriplets_k_impostor(): + """Checks the correct error raised when k_impostor is too big """ + X, y = shuffle(*make_blobs(random_state=SEED), + random_state=SEED) + + length = len(y) + label, labels_count = np.unique(y, return_counts=True) + labels_count_max = np.max(labels_count) + idx_biggest_label, = np.where(labels_count == labels_count_max) + k_impostor = length - labels_count_max + 1 + + warn_msgs = [] + for idx in idx_biggest_label: + warn_msgs.append("The class {} has {} elements of other classes, which is" + " not sufficient to generate {} impostor neighbors as " + "specified by k_impostor. Will generate {} impostor " + "neighbors instead.\n" + .format(label[idx], k_impostor-1, k_impostor, + k_impostor-1)) + + with pytest.warns(UserWarning) as raised_warning: + Constraints(y).generate_knntriplets(X, 1, k_impostor) + for warn in raised_warning: + assert str(warn.message) in warn_msgs diff --git a/test/test_fit_transform.py b/test/test_fit_transform.py index a9b2719e..246223b0 100644 --- a/test/test_fit_transform.py +++ b/test/test_fit_transform.py @@ -1,4 +1,3 @@ -import pytest import unittest import numpy as np from sklearn.datasets import load_iris @@ -30,47 +29,47 @@ def test_cov(self): def test_lsml_supervised(self): seed = np.random.RandomState(1234) - lsml = LSML_Supervised(num_constraints=200, random_state=seed) + lsml = LSML_Supervised(n_constraints=200, random_state=seed) lsml.fit(self.X, self.y) res_1 = lsml.transform(self.X) seed = np.random.RandomState(1234) - lsml = LSML_Supervised(num_constraints=200, random_state=seed) + lsml = LSML_Supervised(n_constraints=200, random_state=seed) res_2 = lsml.fit_transform(self.X, self.y) assert_array_almost_equal(res_1, res_2) def test_itml_supervised(self): seed = np.random.RandomState(1234) - itml = ITML_Supervised(num_constraints=200, random_state=seed) + itml = ITML_Supervised(n_constraints=200, random_state=seed) itml.fit(self.X, self.y) res_1 = itml.transform(self.X) seed = np.random.RandomState(1234) - itml = ITML_Supervised(num_constraints=200, random_state=seed) + itml = ITML_Supervised(n_constraints=200, random_state=seed) res_2 = itml.fit_transform(self.X, self.y) assert_array_almost_equal(res_1, res_2) def test_lmnn(self): - lmnn = LMNN(k=5, learn_rate=1e-6, verbose=False) + lmnn = LMNN(n_neighbors=5, learn_rate=1e-6, verbose=False) lmnn.fit(self.X, self.y) res_1 = lmnn.transform(self.X) - lmnn = LMNN(k=5, learn_rate=1e-6, verbose=False) + lmnn = LMNN(n_neighbors=5, learn_rate=1e-6, verbose=False) res_2 = lmnn.fit_transform(self.X, self.y) assert_array_almost_equal(res_1, res_2) def test_sdml_supervised(self): seed = np.random.RandomState(1234) - sdml = SDML_Supervised(num_constraints=1500, balance_param=1e-5, + sdml = SDML_Supervised(n_constraints=1500, balance_param=1e-5, prior='identity', random_state=seed) sdml.fit(self.X, self.y) res_1 = sdml.transform(self.X) seed = np.random.RandomState(1234) - sdml = SDML_Supervised(num_constraints=1500, balance_param=1e-5, + sdml = SDML_Supervised(n_constraints=1500, balance_param=1e-5, prior='identity', random_state=seed) res_2 = sdml.fit_transform(self.X, self.y) @@ -78,11 +77,11 @@ def test_sdml_supervised(self): def test_nca(self): n = self.X.shape[0] - nca = NCA(max_iter=(100000//n)) + nca = NCA(max_iter=(100000 // n)) nca.fit(self.X, self.y) res_1 = nca.transform(self.X) - nca = NCA(max_iter=(100000//n)) + nca = NCA(max_iter=(100000 // n)) res_2 = nca.fit_transform(self.X, self.y) assert_array_almost_equal(res_1, res_2) @@ -100,13 +99,13 @@ def test_lfda(self): def test_rca_supervised(self): seed = np.random.RandomState(1234) - rca = RCA_Supervised(n_components=2, num_chunks=30, chunk_size=2, + rca = RCA_Supervised(n_components=2, n_chunks=30, chunk_size=2, random_state=seed) rca.fit(self.X, self.y) res_1 = rca.transform(self.X) seed = np.random.RandomState(1234) - rca = RCA_Supervised(n_components=2, num_chunks=30, chunk_size=2, + rca = RCA_Supervised(n_components=2, n_chunks=30, chunk_size=2, random_state=seed) res_2 = rca.fit_transform(self.X, self.y) @@ -124,12 +123,12 @@ def test_mlkr(self): def test_mmc_supervised(self): seed = np.random.RandomState(1234) - mmc = MMC_Supervised(num_constraints=200, random_state=seed) + mmc = MMC_Supervised(n_constraints=200, random_state=seed) mmc.fit(self.X, self.y) res_1 = mmc.transform(self.X) seed = np.random.RandomState(1234) - mmc = MMC_Supervised(num_constraints=200, random_state=seed) + mmc = MMC_Supervised(n_constraints=200, random_state=seed) res_2 = mmc.fit_transform(self.X, self.y) assert_array_almost_equal(res_1, res_2) diff --git a/test/test_mahalanobis_mixin.py b/test/test_mahalanobis_mixin.py index a812d185..9378ac60 100644 --- a/test/test_mahalanobis_mixin.py +++ b/test/test_mahalanobis_mixin.py @@ -3,39 +3,62 @@ import pytest import numpy as np from numpy.linalg import LinAlgError -from numpy.testing import assert_array_almost_equal, assert_allclose +from numpy.testing import assert_array_almost_equal, assert_allclose, \ + assert_array_equal from scipy.spatial.distance import pdist, squareform, mahalanobis from scipy.stats import ortho_group from sklearn import clone from sklearn.cluster import DBSCAN -from sklearn.datasets import make_spd_matrix -from sklearn.utils import check_random_state +from sklearn.datasets import make_spd_matrix, make_blobs +from sklearn.utils import check_random_state, shuffle from sklearn.utils.multiclass import type_of_target -from sklearn.utils.testing import set_random_state +from metric_learn.sklearn_shims import set_random_state -from metric_learn._util import make_context +from metric_learn._util import make_context, _initialize_metric_mahalanobis +from metric_learn.sdml import _BaseSDML from metric_learn.base_metric import (_QuadrupletsClassifierMixin, + _TripletsClassifierMixin, _PairsClassifierMixin) from metric_learn.exceptions import NonPSDError from test.test_utils import (ids_metric_learners, metric_learners, - remove_y_quadruplets, ids_classifiers) + remove_y, ids_classifiers) RNG = check_random_state(0) @pytest.mark.parametrize('estimator, build_dataset', metric_learners, ids=ids_metric_learners) -def test_score_pairs_pairwise(estimator, build_dataset): +def test_pair_distance_pair_score_equivalent(estimator, build_dataset): + """ + For Mahalanobis learners, pair_score should be equivalent to the + opposite of the pair_distance result. + """ + input_data, labels, _, X = build_dataset() + n_samples = 20 + X = X[:n_samples] + model = clone(estimator) + set_random_state(model) + model.fit(*remove_y(estimator, input_data, labels)) + + distances = model.pair_distance(np.array(list(product(X, X)))) + scores = model.pair_score(np.array(list(product(X, X)))) + + assert_array_equal(distances, -1 * scores) + + +@pytest.mark.parametrize('estimator, build_dataset', metric_learners, + ids=ids_metric_learners) +def test_pair_distance_pairwise(estimator, build_dataset): # Computing pairwise scores should return a euclidean distance matrix. input_data, labels, _, X = build_dataset() n_samples = 20 X = X[:n_samples] model = clone(estimator) set_random_state(model) - model.fit(*remove_y_quadruplets(estimator, input_data, labels)) + model.fit(*remove_y(estimator, input_data, labels)) - pairwise = model.score_pairs(np.array(list(product(X, X))))\ + pairwise = model.pair_distance(np.array(list(product(X, X))))\ .reshape(n_samples, n_samples) check_is_distance_matrix(pairwise) @@ -50,52 +73,52 @@ def test_score_pairs_pairwise(estimator, build_dataset): @pytest.mark.parametrize('estimator, build_dataset', metric_learners, ids=ids_metric_learners) -def test_score_pairs_toy_example(estimator, build_dataset): - # Checks that score_pairs works on a toy example +def test_pair_distance_toy_example(estimator, build_dataset): + # Checks that pair_distance works on a toy example input_data, labels, _, X = build_dataset() n_samples = 20 X = X[:n_samples] model = clone(estimator) set_random_state(model) - model.fit(*remove_y_quadruplets(estimator, input_data, labels)) + model.fit(*remove_y(estimator, input_data, labels)) pairs = np.stack([X[:10], X[10:20]], axis=1) embedded_pairs = pairs.dot(model.components_.T) distances = np.sqrt(np.sum((embedded_pairs[:, 1] - embedded_pairs[:, 0])**2, axis=-1)) - assert_array_almost_equal(model.score_pairs(pairs), distances) + assert_array_almost_equal(model.pair_distance(pairs), distances) @pytest.mark.parametrize('estimator, build_dataset', metric_learners, ids=ids_metric_learners) -def test_score_pairs_finite(estimator, build_dataset): +def test_pair_distance_finite(estimator, build_dataset): # tests that the score is finite input_data, labels, _, X = build_dataset() model = clone(estimator) set_random_state(model) - model.fit(*remove_y_quadruplets(estimator, input_data, labels)) + model.fit(*remove_y(estimator, input_data, labels)) pairs = np.array(list(product(X, X))) - assert np.isfinite(model.score_pairs(pairs)).all() + assert np.isfinite(model.pair_distance(pairs)).all() @pytest.mark.parametrize('estimator, build_dataset', metric_learners, ids=ids_metric_learners) -def test_score_pairs_dim(estimator, build_dataset): +def test_pair_distance_dim(estimator, build_dataset): # scoring of 3D arrays should return 1D array (several tuples), # and scoring of 2D arrays (one tuple) should return an error (like # scikit-learn's error when scoring 1D arrays) input_data, labels, _, X = build_dataset() model = clone(estimator) set_random_state(model) - model.fit(*remove_y_quadruplets(estimator, input_data, labels)) + model.fit(*remove_y(estimator, input_data, labels)) tuples = np.array(list(product(X, X))) - assert model.score_pairs(tuples).shape == (tuples.shape[0],) + assert model.pair_distance(tuples).shape == (tuples.shape[0],) context = make_context(estimator) msg = ("3D array of formed tuples expected{}. Found 2D array " "instead:\ninput={}. Reshape your data and/or use a preprocessor.\n" .format(context, tuples[1])) with pytest.raises(ValueError) as raised_error: - model.score_pairs(tuples[1]) + model.pair_distance(tuples[1]) assert str(raised_error.value) == msg @@ -118,7 +141,7 @@ def test_embed_toy_example(estimator, build_dataset): X = X[:n_samples] model = clone(estimator) set_random_state(model) - model.fit(*remove_y_quadruplets(estimator, input_data, labels)) + model.fit(*remove_y(estimator, input_data, labels)) embedded_points = X.dot(model.components_.T) assert_array_almost_equal(model.transform(X), embedded_points) @@ -130,7 +153,7 @@ def test_embed_dim(estimator, build_dataset): input_data, labels, _, X = build_dataset() model = clone(estimator) set_random_state(model) - model.fit(*remove_y_quadruplets(estimator, input_data, labels)) + model.fit(*remove_y(estimator, input_data, labels)) assert model.transform(X).shape == X.shape # assert that ValueError is thrown if input shape is 1D @@ -139,12 +162,12 @@ def test_embed_dim(estimator, build_dataset): "instead:\ninput={}. Reshape your data and/or use a " "preprocessor.\n".format(context, X[0])) with pytest.raises(ValueError) as raised_error: - model.score_pairs(model.transform(X[0, :])) + model.pair_distance(model.transform(X[0, :])) assert str(raised_error.value) == err_msg # we test that the shape is also OK when doing dimensionality reduction if hasattr(model, 'n_components'): model.set_params(n_components=2) - model.fit(*remove_y_quadruplets(estimator, input_data, labels)) + model.fit(*remove_y(estimator, input_data, labels)) assert model.transform(X).shape == (X.shape[0], 2) # assert that ValueError is thrown if input shape is 1D with pytest.raises(ValueError) as raised_error: @@ -159,7 +182,7 @@ def test_embed_finite(estimator, build_dataset): input_data, labels, _, X = build_dataset() model = clone(estimator) set_random_state(model) - model.fit(*remove_y_quadruplets(estimator, input_data, labels)) + model.fit(*remove_y(estimator, input_data, labels)) assert np.isfinite(model.transform(X)).all() @@ -170,7 +193,7 @@ def test_embed_is_linear(estimator, build_dataset): input_data, labels, _, X = build_dataset() model = clone(estimator) set_random_state(model) - model.fit(*remove_y_quadruplets(estimator, input_data, labels)) + model.fit(*remove_y(estimator, input_data, labels)) assert_array_almost_equal(model.transform(X[:10] + X[10:20]), model.transform(X[:10]) + model.transform(X[10:20])) @@ -189,12 +212,11 @@ def test_get_metric_equivalent_to_explicit_mahalanobis(estimator, input_data, labels, _, X = build_dataset() model = clone(estimator) set_random_state(model) - model.fit(*remove_y_quadruplets(estimator, input_data, labels)) + model.fit(*remove_y(estimator, input_data, labels)) metric = model.get_metric() n_features = X.shape[1] a, b = (rng.randn(n_features), rng.randn(n_features)) - expected_dist = mahalanobis(a[None], b[None], - VI=model.get_mahalanobis_matrix()) + expected_dist = mahalanobis(a, b, VI=model.get_mahalanobis_matrix()) assert_allclose(metric(a, b), expected_dist, rtol=1e-13) @@ -208,7 +230,7 @@ def test_get_metric_is_pseudo_metric(estimator, build_dataset): input_data, labels, _, X = build_dataset() model = clone(estimator) set_random_state(model) - model.fit(*remove_y_quadruplets(estimator, input_data, labels)) + model.fit(*remove_y(estimator, input_data, labels)) metric = model.get_metric() n_features = X.shape[1] @@ -225,24 +247,6 @@ def test_get_metric_is_pseudo_metric(estimator, build_dataset): np.isclose(metric(a, c), metric(a, b) + metric(b, c), rtol=1e-20)) -@pytest.mark.parametrize('estimator, build_dataset', metric_learners, - ids=ids_metric_learners) -def test_metric_raises_deprecation_warning(estimator, build_dataset): - """assert that a deprecation warning is raised if someones wants to call - the `metric` function""" - # TODO: remove this method in version 0.6.0 - input_data, labels, _, X = build_dataset() - model = clone(estimator) - set_random_state(model) - model.fit(*remove_y_quadruplets(estimator, input_data, labels)) - - with pytest.warns(DeprecationWarning) as raised_warning: - model.metric() - assert (str(raised_warning[0].message) == - ("`metric` is deprecated since version 0.5.0 and will be removed " - "in 0.6.0. Use `get_mahalanobis_matrix` instead.")) - - @pytest.mark.parametrize('estimator, build_dataset', metric_learners, ids=ids_metric_learners) def test_get_metric_compatible_with_scikit_learn(estimator, build_dataset): @@ -251,7 +255,7 @@ def test_get_metric_compatible_with_scikit_learn(estimator, build_dataset): input_data, labels, _, X = build_dataset() model = clone(estimator) set_random_state(model) - model.fit(*remove_y_quadruplets(estimator, input_data, labels)) + model.fit(*remove_y(estimator, input_data, labels)) clustering = DBSCAN(metric=model.get_metric()) clustering.fit(X) @@ -264,7 +268,7 @@ def test_get_squared_metric(estimator, build_dataset): input_data, labels, _, X = build_dataset() model = clone(estimator) set_random_state(model) - model.fit(*remove_y_quadruplets(estimator, input_data, labels)) + model.fit(*remove_y(estimator, input_data, labels)) metric = model.get_metric() n_features = X.shape[1] @@ -284,26 +288,35 @@ def test_components_is_2D(estimator, build_dataset): model = clone(estimator) set_random_state(model) # test that it works for X.shape[1] features - model.fit(*remove_y_quadruplets(estimator, input_data, labels)) + model.fit(*remove_y(estimator, input_data, labels)) assert model.components_.shape == (X.shape[1], X.shape[1]) - # test that it works for 1 feature - trunc_data = input_data[..., :1] + if isinstance(estimator, _BaseSDML): + # SDML doesn't support running on a single feature. + return + + # test that it works for 1 feature. Use 2nd dimension, to avoid border cases + trunc_data = input_data[..., 1:2] # we drop duplicates that might have been formed, i.e. of the form # aabc or abcc or aabb for quadruplets, and aa for pairs. + if isinstance(estimator, _QuadrupletsClassifierMixin): - for slice_idx in [slice(0, 2), slice(2, 4)]: - pairs = trunc_data[:, slice_idx, :] - diffs = pairs[:, 1, :] - pairs[:, 0, :] - to_keep = np.where(np.abs(diffs.ravel()) > 1e-9) - trunc_data = trunc_data[to_keep] - labels = labels[to_keep] + pairs_idx = [[0, 1], [2, 3]] + elif isinstance(estimator, _TripletsClassifierMixin): + pairs_idx = [[0, 1], [0, 2]] elif isinstance(estimator, _PairsClassifierMixin): - diffs = trunc_data[:, 1, :] - trunc_data[:, 0, :] - to_keep = np.where(np.abs(diffs.ravel()) > 1e-9) + pairs_idx = [[0, 1]] + else: + pairs_idx = [] + + for pair_idx in pairs_idx: + pairs = trunc_data[:, pair_idx, :] + diffs = pairs[:, 1, :] - pairs[:, 0, :] + to_keep = np.abs(diffs.ravel()) > 1e-9 trunc_data = trunc_data[to_keep] labels = labels[to_keep] - model.fit(*remove_y_quadruplets(estimator, trunc_data, labels)) + + model.fit(*remove_y(estimator, trunc_data, labels)) assert model.components_.shape == (1, 1) # the components must be 2D @@ -429,7 +442,7 @@ def test_auto_init_transformation(n_samples, n_features, n_classes, random_state=rng) # To make the test work for LMNN: if 'LMNN' in model_base.__class__.__name__: - model_base.set_params(k=1) + model_base.set_params(n_neighbors=1) # To make the test faster for estimators that have a max_iter: if hasattr(model_base, 'max_iter'): model_base.set_params(max_iter=1) @@ -515,12 +528,12 @@ def test_init_mahalanobis(estimator, build_dataset): model.fit(input_data, labels) # Initialize with a random spd matrix - init = make_spd_matrix(X.shape[1], random_state=rng) + init = make_spd_matrix(n_dim=X.shape[1], random_state=rng) model.set_params(**{param: init}) model.fit(input_data, labels) # init.shape[1] must match X.shape[1] - init = make_spd_matrix(X.shape[1] + 1, X.shape[1] + 1) + init = make_spd_matrix(n_dim=X.shape[1] + 1, random_state=rng) model.set_params(**{param: init}) msg = ('The input dimensionality {} of the given ' 'mahalanobis matrix `{}` must match the ' @@ -569,12 +582,12 @@ def test_init_mahalanobis(estimator, build_dataset): in zip(ids_metric_learners, metric_learners) if idml[:4] in ['ITML', 'SDML', 'LSML']]) -def test_singular_covariance_init_or_prior(estimator, build_dataset): +def test_singular_covariance_init_or_prior_strictpd(estimator, build_dataset): """Tests that when using the 'covariance' init or prior, it returns the appropriate error if the covariance matrix is singular, for algorithms that need a strictly PD prior or init (see - https://github.com/metric-learn/metric-learn/issues/202 and - https://github.com/metric-learn/metric-learn/pull/195#issuecomment + https://github.com/scikit-learn-contrib/metric-learn/issues/202 and + https://github.com/scikit-learn-contrib/metric-learn/pull/195#issuecomment -492332451) """ matrices_to_set = [] @@ -603,6 +616,48 @@ def test_singular_covariance_init_or_prior(estimator, build_dataset): assert str(raised_err.value) == msg +@pytest.mark.integration +@pytest.mark.parametrize('estimator, build_dataset', + [(ml, bd) for idml, (ml, bd) + in zip(ids_metric_learners, + metric_learners) + if idml[:3] in ['MMC']], + ids=[idml for idml, (ml, _) + in zip(ids_metric_learners, + metric_learners) + if idml[:3] in ['MMC']]) +def test_singular_covariance_init_of_non_strict_pd(estimator, build_dataset): + """Tests that when using the 'covariance' init or prior, it returns the + appropriate warning if the covariance matrix is singular, for algorithms + that don't need a strictly PD init. Also checks that the returned + inverse matrix has finite values + """ + input_data, labels, _, X = build_dataset() + model = clone(estimator) + set_random_state(model) + # We create a feature that is a linear combination of the first two + # features: + input_data = np.concatenate([input_data, input_data[:, ..., :2].dot([[2], + [3]])], + axis=-1) + model.set_params(init='covariance') + msg = ('The covariance matrix is not invertible: ' + 'using the pseudo-inverse instead.' + 'To make the covariance matrix invertible' + ' you can remove any linearly dependent features and/or ' + 'reduce the dimensionality of your input, ' + 'for instance using `sklearn.decomposition.PCA` as a ' + 'preprocessing step.') + with pytest.warns(UserWarning) as raised_warning: + model.fit(input_data, labels) + assert any([str(warning.message) == msg for warning in raised_warning]) + M, _ = _initialize_metric_mahalanobis(X, init='covariance', + random_state=RNG, + return_inverse=True, + strict_pd=False) + assert np.isfinite(M).all() + + @pytest.mark.integration @pytest.mark.parametrize('estimator, build_dataset', [(ml, bd) for idml, (ml, bd) @@ -614,12 +669,12 @@ def test_singular_covariance_init_or_prior(estimator, build_dataset): metric_learners) if idml[:4] in ['ITML', 'SDML', 'LSML']]) @pytest.mark.parametrize('w0', [1e-20, 0., -1e-20]) -def test_singular_array_init_or_prior(estimator, build_dataset, w0): +def test_singular_array_init_or_prior_strictpd(estimator, build_dataset, w0): """Tests that when using a custom array init (or prior), it returns the appropriate error if it is singular, for algorithms that need a strictly PD prior or init (see - https://github.com/metric-learn/metric-learn/issues/202 and - https://github.com/metric-learn/metric-learn/pull/195#issuecomment + https://github.com/scikit-learn-contrib/metric-learn/issues/202 and + https://github.com/scikit-learn-contrib/metric-learn/pull/195#issuecomment -492332451) """ matrices_to_set = [] @@ -654,6 +709,31 @@ def test_singular_array_init_or_prior(estimator, build_dataset, w0): assert str(raised_err.value) == msg +@pytest.mark.parametrize('w0', [1e-20, 0., -1e-20]) +def test_singular_array_init_of_non_strict_pd(w0): + """Tests that when using a custom array init, it returns the + appropriate warning if it is singular. Also checks if the returned + inverse matrix is finite. This isn't checked for model fitting as no + model curently uses this setting. + """ + rng = np.random.RandomState(42) + X, y = shuffle(*make_blobs(random_state=rng), + random_state=rng) + P = ortho_group.rvs(X.shape[1], random_state=rng) + w = np.abs(rng.randn(X.shape[1])) + w[0] = w0 + M = P.dot(np.diag(w)).dot(P.T) + msg = ('The initialization matrix is not invertible: ' + 'using the pseudo-inverse instead.') + with pytest.warns(UserWarning) as raised_warning: + _, M_inv = _initialize_metric_mahalanobis(X, init=M, + random_state=rng, + return_inverse=True, + strict_pd=False) + assert str(raised_warning[0].message) == msg + assert np.isfinite(M_inv).all() + + @pytest.mark.integration @pytest.mark.parametrize('estimator, build_dataset', metric_learners, ids=ids_metric_learners) @@ -668,9 +748,9 @@ def test_deterministic_initialization(estimator, build_dataset): model.set_params(prior='random') model1 = clone(model) set_random_state(model1, 42) - model1 = model1.fit(input_data, labels) + model1 = model1.fit(*remove_y(model, input_data, labels)) model2 = clone(model) set_random_state(model2, 42) - model2 = model2.fit(input_data, labels) + model2 = model2.fit(*remove_y(model, input_data, labels)) np.testing.assert_allclose(model1.get_mahalanobis_matrix(), model2.get_mahalanobis_matrix()) diff --git a/test/test_pairs_classifiers.py b/test/test_pairs_classifiers.py index affc70f6..bfedefea 100644 --- a/test/test_pairs_classifiers.py +++ b/test/test_pairs_classifiers.py @@ -1,7 +1,6 @@ -from __future__ import division - from functools import partial +import warnings import pytest from numpy.testing import assert_array_equal from scipy.spatial.distance import euclidean @@ -13,7 +12,7 @@ from sklearn.model_selection import train_test_split from test.test_utils import pairs_learners, ids_pairs_learners -from sklearn.utils.testing import set_random_state +from metric_learn.sklearn_shims import set_random_state from sklearn import clone import numpy as np from itertools import product @@ -51,14 +50,14 @@ def test_predict_monotonous(estimator, build_dataset, pairs_train, pairs_test, y_train, y_test = train_test_split(input_data, labels) estimator.fit(pairs_train, y_train) - distances = estimator.score_pairs(pairs_test) + scores = estimator.pair_score(pairs_test) predictions = estimator.predict(pairs_test) - min_dissimilar = np.min(distances[predictions == -1]) - max_similar = np.max(distances[predictions == 1]) - assert max_similar <= min_dissimilar - separator = np.mean([min_dissimilar, max_similar]) - assert (predictions[distances > separator] == -1).all() - assert (predictions[distances < separator] == 1).all() + max_dissimilar = np.max(scores[predictions == -1]) + min_similar = np.min(scores[predictions == 1]) + assert max_dissimilar <= min_similar + separator = np.mean([max_dissimilar, min_similar]) + assert (predictions[scores < separator] == -1).all() + assert (predictions[scores > separator] == 1).all() @pytest.mark.parametrize('with_preprocessor', [True, False]) @@ -66,12 +65,31 @@ def test_predict_monotonous(estimator, build_dataset, ids=ids_pairs_learners) def test_raise_not_fitted_error_if_not_fitted(estimator, build_dataset, with_preprocessor): - """Test that a NotFittedError is raised if someone tries to predict and - the metric learner has not been fitted.""" + """Test that a NotFittedError is raised if someone tries to use + pair_score, score_pairs, decision_function, get_metric, transform or + get_mahalanobis_matrix on input data and the metric learner + has not been fitted.""" input_data, labels, preprocessor, _ = build_dataset(with_preprocessor) estimator = clone(estimator) estimator.set_params(preprocessor=preprocessor) set_random_state(estimator) + with pytest.raises(NotFittedError): # Remove in 0.8.0 + estimator.score_pairs(input_data) + with pytest.raises(NotFittedError): + estimator.pair_score(input_data) + with pytest.raises(NotFittedError): + estimator.decision_function(input_data) + with pytest.raises(NotFittedError): + estimator.get_metric() + with pytest.raises(NotFittedError): + estimator.transform(input_data) + with pytest.raises(NotFittedError): + estimator.get_mahalanobis_matrix() + with pytest.raises(NotFittedError): + estimator.calibrate_threshold(input_data, labels) + + with pytest.raises(NotFittedError): + estimator.set_threshold(0.5) with pytest.raises(NotFittedError): estimator.predict(input_data) @@ -119,7 +137,7 @@ def test_threshold_different_scores_is_finite(estimator, build_dataset, estimator.set_params(preprocessor=preprocessor) set_random_state(estimator) estimator.fit(input_data, labels) - with pytest.warns(None) as record: + with warnings.catch_warnings(record=True) as record: estimator.calibrate_threshold(input_data, labels, **kwargs) assert len(record) == 0 @@ -133,10 +151,26 @@ def fit(self, pairs, y): pairs, y = self._prepare_inputs(pairs, y, type_of_inputs='tuples') self.components_ = np.atleast_2d(np.identity(pairs.shape[2])) - self.threshold_ = 'I am not set.' + # self.threshold_ is not set. return self +def test_unset_threshold(): + """Tests that the "threshold is unset" error is raised when using predict + (performs binary classification on pairs) with an unset threshold.""" + identity_pairs_classifier = IdentityPairsClassifier() + pairs = np.array([[[0.], [1.]], [[1.], [3.]], [[2.], [5.]], [[3.], [7.]]]) + y = np.array([1, 1, -1, -1]) + identity_pairs_classifier.fit(pairs, y) + with pytest.raises(AttributeError) as e: + identity_pairs_classifier.predict(pairs) + + expected_msg = ("A threshold for this estimator has not been set, " + "call its set_threshold or calibrate_threshold method.") + + assert str(e.value) == expected_msg + + def test_set_threshold(): # test that set_threshold indeed sets the threshold identity_pairs_classifier = IdentityPairsClassifier() @@ -147,6 +181,25 @@ def test_set_threshold(): assert identity_pairs_classifier.threshold_ == 0.5 +@pytest.mark.parametrize('value', ["ABC", None, [1, 2, 3], {'key': None}, + (1, 2), set(), + np.array([[[0.], [1.]], [[1.], [3.]]])]) +def test_set_wrong_type_threshold(value): + """ + Test that `set_threshold` indeed sets the threshold + and cannot accept nothing but float or integers, but + being permissive with boolean True=1.0 and False=0.0 + """ + model = IdentityPairsClassifier() + model.fit(np.array([[[0.], [1.]]]), np.array([1])) + msg = ('Parameter threshold must be a real number. ' + 'Got {} instead.'.format(type(value))) + + with pytest.raises(ValueError) as e: # String + model.set_threshold(value) + assert str(e.value).startswith(msg) + + def test_f_beta_1_is_f_1(): # test that putting beta to 1 indeed finds the best threshold to optimize # the f1_score @@ -331,7 +384,7 @@ def test_calibrate_threshold_valid_parameters(valid_args): pairs, y = rng.randn(20, 2, 5), rng.choice([-1, 1], size=20) pairs_learner = IdentityPairsClassifier() pairs_learner.fit(pairs, y) - with pytest.warns(None) as record: + with warnings.catch_warnings(record=True) as record: pairs_learner.calibrate_threshold(pairs, y, **valid_args) assert len(record) == 0 @@ -347,6 +400,7 @@ class MockBadPairsClassifier(MahalanobisMixin, _PairsClassifierMixin): """ def fit(self, pairs, y, calibration_params=None): + self.preprocessor_ = 'not used' self.components_ = 'not used' self.calibrate_threshold(pairs, y, **(calibration_params if calibration_params is not None else @@ -465,7 +519,7 @@ def test_validate_calibration_params_valid_parameters( # test that no warning message is returned if valid arguments are given to # _validate_calibration_params for all pairs metric learners, as well as # a mocking example, and the class itself - with pytest.warns(None) as record: + with warnings.catch_warnings(record=True) as record: estimator._validate_calibration_params(**valid_args) assert len(record) == 0 diff --git a/test/test_quadruplets_classifiers.py b/test/test_quadruplets_classifiers.py index efe10030..a8319961 100644 --- a/test/test_quadruplets_classifiers.py +++ b/test/test_quadruplets_classifiers.py @@ -3,7 +3,7 @@ from sklearn.model_selection import train_test_split from test.test_utils import quadruplets_learners, ids_quadruplets_learners -from sklearn.utils.testing import set_random_state +from metric_learn.sklearn_shims import set_random_state from sklearn import clone import numpy as np diff --git a/test/test_sklearn_compat.py b/test/test_sklearn_compat.py index b2056c09..798d9036 100644 --- a/test/test_sklearn_compat.py +++ b/test/test_sklearn_compat.py @@ -4,80 +4,85 @@ from sklearn.base import TransformerMixin from sklearn.pipeline import make_pipeline from sklearn.utils import check_random_state -from sklearn.utils.estimator_checks import is_public_parameter -from sklearn.utils.testing import (assert_allclose_dense_sparse, - set_random_state) - +from metric_learn.sklearn_shims import (assert_allclose_dense_sparse, + set_random_state, _get_args, + is_public_parameter, get_scorer) from metric_learn import (Covariance, LFDA, LMNN, MLKR, NCA, ITML_Supervised, LSML_Supervised, - MMC_Supervised, RCA_Supervised, SDML_Supervised) + MMC_Supervised, RCA_Supervised, SDML_Supervised, + SCML_Supervised) from sklearn import clone import numpy as np from sklearn.model_selection import (cross_val_score, cross_val_predict, train_test_split, KFold) -from sklearn.metrics.scorer import get_scorer -from sklearn.utils.testing import _get_args from test.test_utils import (metric_learners, ids_metric_learners, mock_preprocessor, tuples_learners, ids_tuples_learners, pairs_learners, - ids_pairs_learners, remove_y_quadruplets, - quadruplets_learners) + ids_pairs_learners, remove_y, + metric_learners_pipeline, + ids_metric_learners_pipeline) class Stable_RCA_Supervised(RCA_Supervised): - def __init__(self, n_components=None, pca_comps=None, + def __init__(self, n_components=None, chunk_size=2, preprocessor=None, random_state=None): # this init makes RCA stable for scikit-learn examples. super(Stable_RCA_Supervised, self).__init__( - num_chunks=2, n_components=n_components, pca_comps=pca_comps, + n_chunks=2, n_components=n_components, chunk_size=chunk_size, preprocessor=preprocessor, random_state=random_state) class Stable_SDML_Supervised(SDML_Supervised): - def __init__(self, sparsity_param=0.01, num_labeled='deprecated', - num_constraints=None, verbose=False, preprocessor=None, + def __init__(self, sparsity_param=0.01, + n_constraints=None, verbose=False, preprocessor=None, random_state=None): # this init makes SDML stable for scikit-learn examples. super(Stable_SDML_Supervised, self).__init__( - sparsity_param=sparsity_param, num_labeled=num_labeled, - num_constraints=num_constraints, verbose=verbose, + sparsity_param=sparsity_param, + n_constraints=n_constraints, verbose=verbose, preprocessor=preprocessor, balance_param=1e-5, prior='identity', random_state=random_state) class TestSklearnCompat(unittest.TestCase): def test_covariance(self): - check_estimator(Covariance) + check_estimator(Covariance()) def test_lmnn(self): - check_estimator(LMNN) + check_estimator(LMNN()) def test_lfda(self): - check_estimator(LFDA) + check_estimator(LFDA()) def test_mlkr(self): - check_estimator(MLKR) + check_estimator(MLKR()) def test_nca(self): - check_estimator(NCA) + check_estimator(NCA()) def test_lsml(self): - check_estimator(LSML_Supervised) + check_estimator(LSML_Supervised()) def test_itml(self): - check_estimator(ITML_Supervised) + check_estimator(ITML_Supervised()) def test_mmc(self): - check_estimator(MMC_Supervised) + check_estimator(MMC_Supervised()) def test_sdml(self): - check_estimator(Stable_SDML_Supervised) + check_estimator(Stable_SDML_Supervised()) def test_rca(self): - check_estimator(Stable_RCA_Supervised) + check_estimator(Stable_RCA_Supervised()) + + def test_scml(self): + msg = "As no value for `n_basis` was selected, " + with pytest.warns(UserWarning) as raised_warning: + check_estimator(SCML_Supervised()) + assert msg in str(raised_warning[0].message) RNG = check_random_state(0) @@ -116,7 +121,8 @@ def test_array_like_inputs(estimator, build_dataset, with_preprocessor): # we subsample the data for the test to be more efficient input_data, _, labels, _ = train_test_split(input_data, labels, - train_size=20) + train_size=40, + random_state=42) X = X[:10] estimator = clone(estimator) @@ -125,8 +131,7 @@ def test_array_like_inputs(estimator, build_dataset, with_preprocessor): input_variants, label_variants = generate_array_like(input_data, labels) for input_variant in input_variants: for label_variant in label_variants: - estimator.fit(*remove_y_quadruplets(estimator, input_variant, - label_variant)) + estimator.fit(*remove_y(estimator, input_variant, label_variant)) if hasattr(estimator, "predict"): estimator.predict(input_variant) if hasattr(estimator, "predict_proba"): @@ -137,8 +142,7 @@ def test_array_like_inputs(estimator, build_dataset, with_preprocessor): estimator.decision_function(input_variant) if hasattr(estimator, "score"): for label_variant in label_variants: - estimator.score(*remove_y_quadruplets(estimator, input_variant, - label_variant)) + estimator.score(*remove_y(estimator, input_variant, label_variant)) X_variants, _ = generate_array_like(X) for X_variant in X_variants: @@ -146,8 +150,19 @@ def test_array_like_inputs(estimator, build_dataset, with_preprocessor): pairs = np.array([[X[0], X[1]], [X[0], X[2]]]) pairs_variants, _ = generate_array_like(pairs) + + not_implemented_msg = "" + # Todo in 0.7.0: Change 'not_implemented_msg' for the message that says + # "This learner does not have pair_distance" + for pairs_variant in pairs_variants: - estimator.score_pairs(pairs_variant) + estimator.pair_score(pairs_variant) # All learners have pair_score + + # But not all of them will have pair_distance + try: + estimator.pair_distance(pairs_variant) + except Exception as raised_exception: + assert raised_exception.value.args[0] == not_implemented_msg @pytest.mark.parametrize('with_preprocessor', [True, False]) @@ -157,7 +172,7 @@ def test_various_scoring_on_tuples_learners(estimator, build_dataset, with_preprocessor): """Tests that scikit-learn's scoring returns something finite, for other scoring than default scoring. (List of scikit-learn's scores can be - found in sklearn.metrics.scorer). For each type of output (predict, + found in sklearn.metrics._scorer). For each type of output (predict, predict_proba, decision_function), we test a bunch of scores. We only test on pairs learners because quadruplets don't have a y argument. """ @@ -199,13 +214,10 @@ def test_cross_validation_is_finite(estimator, build_dataset): estimator.set_params(preprocessor=preprocessor) set_random_state(estimator) assert np.isfinite(cross_val_score(estimator, - *remove_y_quadruplets(estimator, - input_data, - labels))).all() + *remove_y(estimator, input_data, labels) + )).all() assert np.isfinite(cross_val_predict(estimator, - *remove_y_quadruplets(estimator, - input_data, - labels) + *remove_y(estimator, input_data, labels) )).all() @@ -226,7 +238,7 @@ def test_cross_validation_manual_vs_scikit(estimator, build_dataset, n_splits = 3 kfold = KFold(shuffle=False, n_splits=n_splits) n_samples = input_data.shape[0] - fold_sizes = (n_samples // n_splits) * np.ones(n_splits, dtype=np.int) + fold_sizes = (n_samples // n_splits) * np.ones(n_splits, dtype=np.int64) fold_sizes[:n_samples % n_splits] += 1 current = 0 scores, predictions = [], np.zeros(input_data.shape[0]) @@ -237,28 +249,26 @@ def test_cross_validation_manual_vs_scikit(estimator, build_dataset, train_mask = np.ones(input_data.shape[0], bool) train_mask[test_slice] = False y_train, y_test = labels[train_mask], labels[test_slice] - estimator.fit(*remove_y_quadruplets(estimator, - input_data[train_mask], - y_train)) + estimator.fit(*remove_y(estimator, input_data[train_mask], y_train)) if hasattr(estimator, "score"): - scores.append(estimator.score(*remove_y_quadruplets( + scores.append(estimator.score(*remove_y( estimator, input_data[test_slice], y_test))) if hasattr(estimator, "predict"): predictions[test_slice] = estimator.predict(input_data[test_slice]) if hasattr(estimator, "score"): assert all(scores == cross_val_score( - estimator, *remove_y_quadruplets(estimator, input_data, labels), + estimator, *remove_y(estimator, input_data, labels), cv=kfold)) if hasattr(estimator, "predict"): assert all(predictions == cross_val_predict( estimator, - *remove_y_quadruplets(estimator, input_data, labels), + *remove_y(estimator, input_data, labels), cv=kfold)) def check_score(estimator, tuples, y): if hasattr(estimator, "score"): - score = estimator.score(*remove_y_quadruplets(estimator, tuples, y)) + score = estimator.score(*remove_y(estimator, tuples, y)) assert np.isfinite(score) @@ -282,7 +292,7 @@ def test_simple_estimator(estimator, build_dataset, with_preprocessor): estimator.set_params(preprocessor=preprocessor) set_random_state(estimator) - estimator.fit(*remove_y_quadruplets(estimator, tuples_train, y_train)) + estimator.fit(*remove_y(estimator, tuples_train, y_train)) check_score(estimator, tuples_test, y_test) check_predict(estimator, tuples_test) @@ -329,62 +339,53 @@ def test_estimators_fit_returns_self(estimator, build_dataset, input_data, labels, preprocessor, _ = build_dataset(with_preprocessor) estimator = clone(estimator) estimator.set_params(preprocessor=preprocessor) - assert estimator.fit(*remove_y_quadruplets(estimator, - input_data, - labels)) is estimator + assert estimator.fit(*remove_y(estimator, input_data, labels)) is estimator @pytest.mark.parametrize('with_preprocessor', [True, False]) -@pytest.mark.parametrize('estimator, build_dataset', metric_learners, - ids=ids_metric_learners) +@pytest.mark.parametrize('estimator, build_dataset', metric_learners_pipeline, + ids=ids_metric_learners_pipeline) def test_pipeline_consistency(estimator, build_dataset, with_preprocessor): # Adapted from scikit learn # check that make_pipeline(est) gives same score as est - # we do this test on all except quadruplets (since they don't have a y - # in fit): - if estimator.__class__.__name__ not in [e.__class__.__name__ - for (e, _) in - quadruplets_learners]: - input_data, y, preprocessor, _ = build_dataset(with_preprocessor) - - def make_random_state(estimator, in_pipeline): - rs = {} - name_estimator = estimator.__class__.__name__ - if name_estimator[-11:] == '_Supervised': - name_param = 'random_state' - if in_pipeline: - name_param = name_estimator.lower() + '__' + name_param - rs[name_param] = check_random_state(0) - return rs - estimator = clone(estimator) - estimator.set_params(preprocessor=preprocessor) - pipeline = make_pipeline(estimator) - estimator.fit(*remove_y_quadruplets(estimator, input_data, y), - **make_random_state(estimator, False)) - pipeline.fit(*remove_y_quadruplets(estimator, input_data, y), - **make_random_state(estimator, True)) - - if hasattr(estimator, 'score'): - result = estimator.score(*remove_y_quadruplets(estimator, - input_data, - y)) - result_pipe = pipeline.score(*remove_y_quadruplets(estimator, - input_data, - y)) - assert_allclose_dense_sparse(result, result_pipe) + input_data, y, preprocessor, _ = build_dataset(with_preprocessor) - if hasattr(estimator, 'predict'): - result = estimator.predict(input_data) - result_pipe = pipeline.predict(input_data) - assert_allclose_dense_sparse(result, result_pipe) + def make_random_state(estimator, in_pipeline): + rs = {} + name_estimator = estimator.__class__.__name__ + if name_estimator[-11:] == '_Supervised': + name_param = 'random_state' + if in_pipeline: + name_param = name_estimator.lower() + '__' + name_param + rs[name_param] = check_random_state(0) + return rs - if issubclass(estimator.__class__, TransformerMixin): - if hasattr(estimator, 'transform'): - result = estimator.transform(input_data) - result_pipe = pipeline.transform(input_data) - assert_allclose_dense_sparse(result, result_pipe) + estimator = clone(estimator) + estimator.set_params(preprocessor=preprocessor, + **make_random_state(estimator, False)) + pipeline = make_pipeline(estimator) + estimator.fit(input_data, y) + estimator.set_params(preprocessor=preprocessor) + pipeline.set_params(**make_random_state(estimator, True)) + pipeline.fit(input_data, y) + + if hasattr(estimator, 'score'): + result = estimator.score(input_data, y) + result_pipe = pipeline.score(input_data, y) + assert_allclose_dense_sparse(result, result_pipe) + + if hasattr(estimator, 'predict'): + result = estimator.predict(input_data) + result_pipe = pipeline.predict(input_data) + assert_allclose_dense_sparse(result, result_pipe) + + if issubclass(estimator.__class__, TransformerMixin): + if hasattr(estimator, 'transform'): + result = estimator.transform(input_data) + result_pipe = pipeline.transform(input_data) + assert_allclose_dense_sparse(result, result_pipe) @pytest.mark.parametrize('with_preprocessor', [True, False]) @@ -398,7 +399,7 @@ def test_dict_unchanged(estimator, build_dataset, with_preprocessor): estimator.set_params(preprocessor=preprocessor) if hasattr(estimator, "n_components"): estimator.n_components = 1 - estimator.fit(*remove_y_quadruplets(estimator, input_data, labels)) + estimator.fit(*remove_y(estimator, input_data, labels)) def check_dict(): assert estimator.__dict__ == dict_before, ( @@ -429,7 +430,7 @@ def test_dont_overwrite_parameters(estimator, build_dataset, estimator.n_components = 1 dict_before_fit = estimator.__dict__.copy() - estimator.fit(*remove_y_quadruplets(estimator, input_data, labels)) + estimator.fit(*remove_y(estimator, input_data, labels)) dict_after_fit = estimator.__dict__ public_keys_after_fit = [key for key in dict_after_fit.keys() diff --git a/test/test_triplets_classifiers.py b/test/test_triplets_classifiers.py new file mode 100644 index 00000000..515a0a33 --- /dev/null +++ b/test/test_triplets_classifiers.py @@ -0,0 +1,127 @@ +import pytest +from sklearn.exceptions import NotFittedError +from sklearn.model_selection import train_test_split + +from metric_learn import SCML +from test.test_utils import ( + triplets_learners, + ids_triplets_learners, + build_triplets +) +from metric_learn.sklearn_shims import set_random_state +from sklearn import clone +import numpy as np +from numpy.testing import assert_array_equal + + +@pytest.mark.parametrize('with_preprocessor', [True, False]) +@pytest.mark.parametrize('estimator, build_dataset', triplets_learners, + ids=ids_triplets_learners) +def test_predict_only_one_or_minus_one(estimator, build_dataset, + with_preprocessor): + """Test that all predicted values are either +1 or -1""" + input_data, _, preprocessor, _ = build_dataset(with_preprocessor) + estimator = clone(estimator) + estimator.set_params(preprocessor=preprocessor) + set_random_state(estimator) + triplets_train, triplets_test = train_test_split(input_data) + estimator.fit(triplets_train) + predictions = estimator.predict(triplets_test) + + not_valid = [e for e in predictions if e not in [-1, 1]] + assert len(not_valid) == 0 + + +@pytest.mark.parametrize('estimator, build_dataset', triplets_learners, + ids=ids_triplets_learners) +def test_no_zero_prediction(estimator, build_dataset): + """ + Test that all predicted values are not zero, even when the + distance d(x,y) and d(x,z) is the same for a triplet of the + form (x, y, z). i.e border cases. + """ + triplets, _, _, X = build_dataset(with_preprocessor=False) + # Force 3 dimentions only, to use cross product and get easy orthogonal vec. + triplets = np.array([[t[0][:3], t[1][:3], t[2][:3]] for t in triplets]) + X = X[:, :3] + # Dummy fit + estimator = clone(estimator) + set_random_state(estimator) + estimator.fit(triplets) + # We force the transformation to be identity, to force euclidean distance + estimator.components_ = np.eye(X.shape[1]) + + # Get two orthogonal vectors in respect to X[1] + k = X[1] / np.linalg.norm(X[1]) # Normalize first vector + x = X[2] - X[2].dot(k) * k # Get random orthogonal vector + x /= np.linalg.norm(x) # Normalize + y = np.cross(k, x) # Get orthogonal vector to x + # Assert these orthogonal vectors are different + with pytest.raises(AssertionError): + assert_array_equal(X[1], x) + with pytest.raises(AssertionError): + assert_array_equal(X[1], y) + # Assert the distance is the same for both + assert estimator.get_metric()(X[1], x) == estimator.get_metric()(X[1], y) + + # Form the three scenarios where predict() gives 0 with numpy.sign + triplets_test = np.array( # Critical examples + [[X[0], X[2], X[2]], + [X[1], X[1], X[1]], + [X[1], x, y]]) + # Predict + predictions = estimator.predict(triplets_test) + # Check there are no zero values + assert np.sum(predictions == 0) == 0 + + +@pytest.mark.parametrize('with_preprocessor', [True, False]) +@pytest.mark.parametrize('estimator, build_dataset', triplets_learners, + ids=ids_triplets_learners) +def test_raise_not_fitted_error_if_not_fitted(estimator, build_dataset, + with_preprocessor): + """Test that a NotFittedError is raised if someone tries to predict and + the metric learner has not been fitted.""" + input_data, _, preprocessor, _ = build_dataset(with_preprocessor) + estimator = clone(estimator) + estimator.set_params(preprocessor=preprocessor) + set_random_state(estimator) + with pytest.raises(NotFittedError): + estimator.predict(input_data) + + +@pytest.mark.parametrize('estimator, build_dataset', triplets_learners, + ids=ids_triplets_learners) +def test_accuracy_toy_example(estimator, build_dataset): + """Test that the default scoring for triplets (accuracy) works on some + toy example""" + triplets, _, _, X = build_dataset(with_preprocessor=False) + estimator = clone(estimator) + set_random_state(estimator) + estimator.fit(triplets) + # We take the two first points and we build 4 regularly spaced points on the + # line they define, so that it's easy to build triplets of different + # similarities. + X_test = X[0] + np.arange(4)[:, np.newaxis] * (X[0] - X[1]) / 4 + + triplets_test = np.array( + [[X_test[0], X_test[2], X_test[1]], + [X_test[1], X_test[3], X_test[0]], + [X_test[1], X_test[2], X_test[3]], + [X_test[3], X_test[0], X_test[2]]]) + # we force the transformation to be identity so that we control what it does + estimator.components_ = np.eye(X.shape[1]) + assert estimator.score(triplets_test) == 0.25 + + +def test_raise_big_number_of_features(): + triplets, _, _, X = build_triplets(with_preprocessor=False) + triplets = triplets[:3, :, :] + estimator = SCML(n_basis=320) + set_random_state(estimator) + with pytest.raises(ValueError) as exc_info: + estimator.fit(triplets) + assert exc_info.value.args[0] == \ + "Number of features (4) is greater than the number of triplets(3)." \ + "\nConsider using dimensionality reduction or using another basis " \ + "generation scheme." diff --git a/test/test_utils.py b/test/test_utils.py index 37abb307..c0383792 100644 --- a/test/test_utils.py +++ b/test/test_utils.py @@ -1,24 +1,26 @@ +import warnings import pytest +from scipy.linalg import eigh, pinvh from collections import namedtuple import numpy as np from numpy.testing import assert_array_equal, assert_equal from sklearn.model_selection import train_test_split -from sklearn.exceptions import DataConversionWarning from sklearn.utils import check_random_state, shuffle -from sklearn.utils.testing import set_random_state +from metric_learn.sklearn_shims import set_random_state from sklearn.base import clone from metric_learn._util import (check_input, make_context, preprocess_tuples, make_name, preprocess_points, check_collapsed_pairs, validate_vector, _check_sdp_from_eigen, _check_n_components, check_y_valid_values_for_pairs, - _auto_select_init) + _auto_select_init, _pseudo_inverse_from_eig) from metric_learn import (ITML, LSML, MMC, RCA, SDML, Covariance, LFDA, LMNN, MLKR, NCA, ITML_Supervised, LSML_Supervised, MMC_Supervised, RCA_Supervised, SDML_Supervised, - Constraints) + SCML, SCML_Supervised, Constraints) from metric_learn.base_metric import (ArrayIndexer, MahalanobisMixin, _PairsClassifierMixin, + _TripletsClassifierMixin, _QuadrupletsClassifierMixin) from metric_learn.exceptions import PreprocessorError, NonPSDError from sklearn.datasets import make_regression, make_blobs, load_iris @@ -59,11 +61,11 @@ def build_regression(with_preprocessor=False): def build_data(): input_data, labels = load_iris(return_X_y=True) X, y = shuffle(input_data, labels, random_state=SEED) - num_constraints = 50 + n_constraints = 50 constraints = Constraints(y) pairs = ( constraints - .positive_negative_pairs(num_constraints, same_length=True, + .positive_negative_pairs(n_constraints, same_length=True, random_state=check_random_state(SEED))) return X, pairs @@ -83,6 +85,19 @@ def build_pairs(with_preprocessor=False): return Dataset(X[c], target, None, X[c[:, 0]]) +def build_triplets(with_preprocessor=False): + input_data, labels = load_iris(return_X_y=True) + X, y = shuffle(input_data, labels, random_state=SEED) + constraints = Constraints(y) + triplets = constraints.generate_knntriplets(X, k_genuine=3, k_impostor=4) + if with_preprocessor: + # if preprocessor, we build a 2D array of triplets of indices + return Dataset(triplets, np.ones(len(triplets)), X, np.arange(len(X))) + else: + # if not, we build a 3D array of triplets of samples + return Dataset(X[triplets], np.ones(len(triplets)), None, X) + + def build_quadruplets(with_preprocessor=False): # builds a toy quadruplets problem X, indices = build_data() @@ -103,6 +118,11 @@ def build_quadruplets(with_preprocessor=False): [learner for (learner, _) in quadruplets_learners])) +triplets_learners = [(SCML(n_basis=320), build_triplets)] +ids_triplets_learners = list(map(lambda x: x.__class__.__name__, + [learner for (learner, _) in + triplets_learners])) + pairs_learners = [(ITML(max_iter=2), build_pairs), # max_iter=2 to be faster (MMC(max_iter=2), build_pairs), # max_iter=2 to be faster (SDML(prior='identity', balance_param=1e-5), build_pairs)] @@ -118,9 +138,10 @@ def build_quadruplets(with_preprocessor=False): (ITML_Supervised(max_iter=5), build_classification), (LSML_Supervised(), build_classification), (MMC_Supervised(max_iter=5), build_classification), - (RCA_Supervised(num_chunks=5), build_classification), + (RCA_Supervised(n_chunks=5), build_classification), (SDML_Supervised(prior='identity', balance_param=1e-5), - build_classification)] + build_classification), + (SCML_Supervised(n_basis=80), build_classification)] ids_classifiers = list(map(lambda x: x.__class__.__name__, [learner for (learner, _) in classifiers])) @@ -130,10 +151,12 @@ def build_quadruplets(with_preprocessor=False): [learner for (learner, _) in regressors])) WeaklySupervisedClasses = (_PairsClassifierMixin, + _TripletsClassifierMixin, _QuadrupletsClassifierMixin) -tuples_learners = pairs_learners + quadruplets_learners -ids_tuples_learners = ids_pairs_learners + ids_quadruplets_learners +tuples_learners = pairs_learners + triplets_learners + quadruplets_learners +ids_tuples_learners = ids_pairs_learners + ids_triplets_learners \ + + ids_quadruplets_learners supervised_learners = classifiers + regressors ids_supervised_learners = ids_classifiers + ids_regressors @@ -141,14 +164,17 @@ def build_quadruplets(with_preprocessor=False): metric_learners = tuples_learners + supervised_learners ids_metric_learners = ids_tuples_learners + ids_supervised_learners +metric_learners_pipeline = pairs_learners + supervised_learners +ids_metric_learners_pipeline = ids_pairs_learners + ids_supervised_learners + -def remove_y_quadruplets(estimator, X, y): - """Quadruplets learners have no y in fit, but to write test for all - estimators, it is convenient to have this function, that will return X and y - if the estimator needs a y to fit on, and just X otherwise.""" +def remove_y(estimator, X, y): + """Quadruplets and triplets learners have no y in fit, but to write test for + all estimators, it is convenient to have this function, that will return X + and y if the estimator needs a y to fit on, and just X otherwise.""" + no_y_fit = quadruplets_learners + triplets_learners if estimator.__class__.__name__ in [e.__class__.__name__ - for (e, _) in - quadruplets_learners]: + for (e, _) in no_y_fit]: return (X,) else: return (X, y) @@ -328,7 +354,7 @@ def test_check_tuples_valid_tuple_size(tuple_size): checks that checking the number of tuples (pairs, quadruplets, etc) raises no warning if there is the right number of points in a tuple. """ - with pytest.warns(None) as record: + with warnings.catch_warnings(record=True) as record: check_input(tuples_prep(), type_of_inputs='tuples', preprocessor=mock_preprocessor, tuple_size=tuple_size) check_input(tuples_no_prep(), type_of_inputs='tuples', preprocessor=None, @@ -353,7 +379,7 @@ def test_check_tuples_valid_tuple_size(tuple_size): [[2.6, 2.3], [3.4, 5.0]]])]) def test_check_tuples_valid_with_preprocessor(tuples): """Test that valid inputs when using a preprocessor raises no warning""" - with pytest.warns(None) as record: + with warnings.catch_warnings(record=True) as record: check_input(tuples, type_of_inputs='tuples', preprocessor=mock_preprocessor) assert len(record) == 0 @@ -374,7 +400,7 @@ def test_check_tuples_valid_with_preprocessor(tuples): ((3, 1), (4, 4), (29, 4)))]) def test_check_tuples_valid_without_preprocessor(tuples): """Test that valid inputs when using no preprocessor raises no warning""" - with pytest.warns(None) as record: + with warnings.catch_warnings(record=True) as record: check_input(tuples, type_of_inputs='tuples', preprocessor=None) assert len(record) == 0 @@ -383,12 +409,12 @@ def test_check_tuples_behaviour_auto_dtype(): """Checks that check_tuples allows by default every type if using a preprocessor, and numeric types if using no preprocessor""" tuples_prep = [['img1.png', 'img2.png'], ['img3.png', 'img5.png']] - with pytest.warns(None) as record: + with warnings.catch_warnings(record=True) as record: check_input(tuples_prep, type_of_inputs='tuples', preprocessor=mock_preprocessor) assert len(record) == 0 - with pytest.warns(None) as record: + with warnings.catch_warnings(record=True) as record: check_input(tuples_no_prep(), type_of_inputs='tuples') # numeric type assert len(record) == 0 @@ -524,7 +550,7 @@ def test_check_classic_invalid_dtype_not_convertible(preprocessor, points): [2.6, 2.3]])]) def test_check_classic_valid_with_preprocessor(points): """Test that valid inputs when using a preprocessor raises no warning""" - with pytest.warns(None) as record: + with warnings.catch_warnings(record=True) as record: check_input(points, type_of_inputs='classic', preprocessor=mock_preprocessor) assert len(record) == 0 @@ -545,7 +571,7 @@ def test_check_classic_valid_with_preprocessor(points): (3, 1, 4, 4, 29, 4))]) def test_check_classic_valid_without_preprocessor(points): """Test that valid inputs when using no preprocessor raises no warning""" - with pytest.warns(None) as record: + with warnings.catch_warnings(record=True) as record: check_input(points, type_of_inputs='classic', preprocessor=None) assert len(record) == 0 @@ -560,12 +586,12 @@ def test_check_classic_behaviour_auto_dtype(): """Checks that check_input (for points) allows by default every type if using a preprocessor, and numeric types if using no preprocessor""" points_prep = ['img1.png', 'img2.png', 'img3.png', 'img5.png'] - with pytest.warns(None) as record: + with warnings.catch_warnings(record=True) as record: check_input(points_prep, type_of_inputs='classic', preprocessor=mock_preprocessor) assert len(record) == 0 - with pytest.warns(None) as record: + with warnings.catch_warnings(record=True) as record: check_input(points_no_prep(), type_of_inputs='classic') # numeric type assert len(record) == 0 @@ -750,6 +776,8 @@ def test_array_like_indexer_array_like_valid_classic(input_data, indices): """Checks that any array-like is valid in the 'preprocessor' argument, and in the indices, for a classic input""" class MockMetricLearner(MahalanobisMixin): + def fit(self): + pass pass mock_algo = MockMetricLearner(preprocessor=input_data) @@ -764,6 +792,8 @@ def test_array_like_indexer_array_like_valid_tuples(input_data, indices): """Checks that any array-like is valid in the 'preprocessor' argument, and in the indices, for a classic input""" class MockMetricLearner(MahalanobisMixin): + def fit(self): + pass pass mock_algo = MockMetricLearner(preprocessor=input_data) @@ -792,13 +822,12 @@ def test_error_message_tuple_size(estimator, _): per tuple, it throws an error message""" estimator = clone(estimator) set_random_state(estimator) - invalid_pairs = np.array([[[1.3, 6.3], [3., 6.8], [6.5, 4.4]], - [[1.9, 5.3], [1., 7.8], [3.2, 1.2]]]) + invalid_pairs = np.ones((2, 5, 2)) y = [1, 1] with pytest.raises(ValueError) as raised_err: - estimator.fit(*remove_y_quadruplets(estimator, invalid_pairs, y)) - expected_msg = ("Tuples of {} element(s) expected{}. Got tuples of 3 " - "element(s) instead (shape=(2, 3, 2)):\ninput={}.\n" + estimator.fit(*remove_y(estimator, invalid_pairs, y)) + expected_msg = ("Tuples of {} element(s) expected{}. Got tuples of 5 " + "element(s) instead (shape=(2, 5, 2)):\ninput={}.\n" .format(estimator._tuple_size, make_context(estimator), invalid_pairs)) assert str(raised_err.value) == expected_msg @@ -806,9 +835,9 @@ def test_error_message_tuple_size(estimator, _): @pytest.mark.parametrize('estimator, _', metric_learners, ids=ids_metric_learners) -def test_error_message_t_score_pairs(estimator, _): - """tests that if you want to score_pairs on triplets for instance, it returns - the right error message +def test_error_message_t_pair_distance_or_score(estimator, _): + """Tests that if you want to pair_distance or pair_score on triplets + for instance, it returns the right error message """ estimator = clone(estimator) set_random_state(estimator) @@ -816,12 +845,22 @@ def test_error_message_t_score_pairs(estimator, _): triplets = np.array([[[1.3, 6.3], [3., 6.8], [6.5, 4.4]], [[1.9, 5.3], [1., 7.8], [3.2, 1.2]]]) with pytest.raises(ValueError) as raised_err: - estimator.score_pairs(triplets) + estimator.pair_score(triplets) expected_msg = ("Tuples of 2 element(s) expected{}. Got tuples of 3 " "element(s) instead (shape=(2, 3, 2)):\ninput={}.\n" .format(make_context(estimator), triplets)) assert str(raised_err.value) == expected_msg + not_implemented_msg = "" + # Todo in 0.7.0: Change 'not_implemented_msg' for the message that says + # "This learner does not have pair_distance" + + # One exception will trigger for sure + with pytest.raises(Exception) as raised_exception: + estimator.pair_distance(triplets) + err_value = raised_exception.value.args[0] + assert err_value == expected_msg or err_value == not_implemented_msg + def test_preprocess_tuples_simple_example(): """Test the preprocessor on a very simple example of tuples to ensure the @@ -872,35 +911,21 @@ def test_same_with_or_without_preprocessor(estimator, build_dataset): dataset_formed.data, random_state=SEED) - def make_random_state(estimator): - rs = {} - if estimator.__class__.__name__[-11:] == '_Supervised': - rs['random_state'] = check_random_state(SEED) - return rs - estimator_with_preprocessor = clone(estimator) set_random_state(estimator_with_preprocessor) estimator_with_preprocessor.set_params(preprocessor=X) - estimator_with_preprocessor.fit(*remove_y_quadruplets(estimator, - indices_train, - y_train), - **make_random_state(estimator)) + estimator_with_preprocessor.fit(*remove_y(estimator, indices_train, y_train)) estimator_without_preprocessor = clone(estimator) set_random_state(estimator_without_preprocessor) estimator_without_preprocessor.set_params(preprocessor=None) - estimator_without_preprocessor.fit(*remove_y_quadruplets(estimator, - formed_train, - y_train), - **make_random_state(estimator)) + estimator_without_preprocessor.fit(*remove_y(estimator, formed_train, + y_train)) estimator_with_prep_formed = clone(estimator) set_random_state(estimator_with_prep_formed) estimator_with_prep_formed.set_params(preprocessor=X) - estimator_with_prep_formed.fit(*remove_y_quadruplets(estimator, - indices_train, - y_train), - **make_random_state(estimator)) + estimator_with_prep_formed.fit(*remove_y(estimator, indices_train, y_train)) # test prediction methods for method in ["predict", "decision_function"]: @@ -916,31 +941,59 @@ def make_random_state(estimator): method)(formed_test) assert np.array(output_with_prep == output_with_prep_formed).all() - # test score_pairs - output_with_prep = estimator_with_preprocessor.score_pairs( - indicators_to_transform[[[[0, 2], [5, 3]]]]) - output_without_prep = estimator_without_preprocessor.score_pairs( - formed_points_to_transform[[[[0, 2], [5, 3]]]]) + # Test pair_score, all learners have it. + idx1 = np.array([[0, 2], [5, 3]], dtype=int) + output_with_prep = estimator_with_preprocessor.pair_score( + indicators_to_transform[idx1]) + output_without_prep = estimator_without_preprocessor.pair_score( + formed_points_to_transform[idx1]) assert np.array(output_with_prep == output_without_prep).all() - output_with_prep = estimator_with_preprocessor.score_pairs( - indicators_to_transform[[[[0, 2], [5, 3]]]]) - output_without_prep = estimator_with_prep_formed.score_pairs( - formed_points_to_transform[[[[0, 2], [5, 3]]]]) + output_with_prep = estimator_with_preprocessor.pair_score( + indicators_to_transform[idx1]) + output_without_prep = estimator_with_prep_formed.pair_score( + formed_points_to_transform[idx1]) assert np.array(output_with_prep == output_without_prep).all() - # test transform - output_with_prep = estimator_with_preprocessor.transform( - indicators_to_transform) - output_without_prep = estimator_without_preprocessor.transform( - formed_points_to_transform) - assert np.array(output_with_prep == output_without_prep).all() - - output_with_prep = estimator_with_preprocessor.transform( - indicators_to_transform) - output_without_prep = estimator_with_prep_formed.transform( - formed_points_to_transform) - assert np.array(output_with_prep == output_without_prep).all() + # Test pair_distance + not_implemented_msg = "" + # Todo in 0.7.0: Change 'not_implemented_msg' for the message that says + # "This learner does not have pair_distance" + try: + output_with_prep = estimator_with_preprocessor.pair_distance( + indicators_to_transform[idx1]) + output_without_prep = estimator_without_preprocessor.pair_distance( + formed_points_to_transform[idx1]) + assert np.array(output_with_prep == output_without_prep).all() + + output_with_prep = estimator_with_preprocessor.pair_distance( + indicators_to_transform[idx1]) + output_without_prep = estimator_with_prep_formed.pair_distance( + formed_points_to_transform[idx1]) + assert np.array(output_with_prep == output_without_prep).all() + + except Exception as raised_exception: + assert raised_exception.value.args[0] == not_implemented_msg + + # Test transform + not_implemented_msg = "" + # Todo in 0.7.0: Change 'not_implemented_msg' for the message that says + # "This learner does not have transform" + try: + output_with_prep = estimator_with_preprocessor.transform( + indicators_to_transform) + output_without_prep = estimator_without_preprocessor.transform( + formed_points_to_transform) + assert np.array(output_with_prep == output_without_prep).all() + + output_with_prep = estimator_with_preprocessor.transform( + indicators_to_transform) + output_without_prep = estimator_with_prep_formed.transform( + formed_points_to_transform) + assert np.array(output_with_prep == output_without_prep).all() + + except Exception as raised_exception: + assert raised_exception.value.args[0] == not_implemented_msg def test_check_collapsed_pairs_raises_no_error(): @@ -964,6 +1017,7 @@ def test_check_collapsed_pairs_raises_error(): "the same as the right element), out of 3 pairs in" " total.") + def test__validate_vector(): """Replica of scipy.spatial.tests.test_distance.test__validate_vector""" x = [1, 2, 3] @@ -1040,6 +1094,53 @@ def test__check_sdp_from_eigen_returns_definiteness(w, is_definite): assert _check_sdp_from_eigen(w) == is_definite +@pytest.mark.unit +@pytest.mark.parametrize('w, tol, is_definite', + [(np.array([5., 3.]), 2, True), + (np.array([5., 1.]), 2, False), + (np.array([5., -1.]), 2, False)]) +def test__check_sdp_from_eigen_tol_psd(w, tol, is_definite): + """Tests that _check_sdp_from_eigen, for PSD matrices, returns + False if an eigenvalue is lower than tol""" + assert _check_sdp_from_eigen(w, tol=tol) == is_definite + + +@pytest.mark.unit +@pytest.mark.parametrize('w, tol', + [(np.array([5., -3.]), 2), + (np.array([1., -3.]), 2)]) +def test__check_sdp_from_eigen_tol_non_psd(w, tol): + """Tests that _check_sdp_from_eigen raises a NonPSDError + when there is a negative value with abs value higher than tol""" + with pytest.raises(NonPSDError): + _check_sdp_from_eigen(w, tol=tol) + + +@pytest.mark.unit +@pytest.mark.parametrize('w, is_definite', + [(np.array([1e5, 1e5, 1e5, 1e5, + 1e5, 1e5, 1e-20]), False), + (np.array([1e-10, 1e-10]), True)]) +def test__check_sdp_from_eigen_tol_default_psd(w, is_definite): + """Tests that the default tol argument gives good results for edge cases + like even if the determinant is high but clearly one eigenvalue is low, + (undefinite so returns False) or when all eigenvalues are low (definite so + returns True)""" + assert _check_sdp_from_eigen(w, tol=None) == is_definite + + +@pytest.mark.unit +@pytest.mark.parametrize('w', + [np.array([1., -1.]), + np.array([-1e-10, 1e-10])]) +def test__check_sdp_from_eigen_tol_default_non_psd(w): + """Tests that the default tol argument is good for raising + NonPSDError, e.g. that when a value is clearly relatively + negative it raises such an error""" + with pytest.raises(NonPSDError): + _check_sdp_from_eigen(w, tol=None) + + def test__check_n_components(): """Checks that n_components returns what is expected (including the errors)""" @@ -1146,3 +1247,27 @@ def test__auto_select_init(has_classes, n_features, n_samples, n_components, """Checks that the auto selection of the init works as expected""" assert (_auto_select_init(has_classes, n_features, n_samples, n_components, n_classes) == result) + + +@pytest.mark.parametrize('w0', [1e-20, 0., -1e-20]) +def test_pseudo_inverse_from_eig_and_pinvh_singular(w0): + """Checks that _pseudo_inverse_from_eig returns the same result as + scipy.linalg.pinvh for a singular matrix""" + rng = np.random.RandomState(SEED) + A = rng.rand(100, 100) + A = A + A.T + w, V = eigh(A) + w[0] = w0 + A = V.dot(np.diag(w)).dot(V.T) + np.testing.assert_allclose(_pseudo_inverse_from_eig(w, V), pinvh(A), + rtol=1e-05) + + +def test_pseudo_inverse_from_eig_and_pinvh_nonsingular(): + """Checks that _pseudo_inverse_from_eig returns the same result as + scipy.linalg.pinvh for a non singular matrix""" + rng = np.random.RandomState(SEED) + A = rng.rand(100, 100) + A = A + A.T + w, V = eigh(A, check_finite=False) + np.testing.assert_allclose(_pseudo_inverse_from_eig(w, V), pinvh(A))