diff --git a/.github/issue_template.md b/.github/ISSUE_TEMPLATE/bug_report.md
similarity index 82%
rename from .github/issue_template.md
rename to .github/ISSUE_TEMPLATE/bug_report.md
index d4fb0abe..ae757838 100644
--- a/.github/issue_template.md
+++ b/.github/ISSUE_TEMPLATE/bug_report.md
@@ -1,3 +1,9 @@
+---
+name: Reproducible bug report
+about: Create a reproducible bug report. Not for support requests.
+labels: 'bug'
+---
+
 #### Description
 <!-- Describe your issue here.-->
 
@@ -42,3 +48,9 @@ $ pip show metric_learn | grep Version
 )
 -->
 <!-- Thanks for contributing! -->
+
+---
+<!-- Issue Author: Don't delete this message to encourage other users to support your issue! -->
+**Message from the maintainers**:
+
+Impacted by this bug? Give it a 👍. We prioritise the issues with the most 👍.
\ No newline at end of file
diff --git a/.github/ISSUE_TEMPLATE/config.yml b/.github/ISSUE_TEMPLATE/config.yml
new file mode 100644
index 00000000..415acfcd
--- /dev/null
+++ b/.github/ISSUE_TEMPLATE/config.yml
@@ -0,0 +1,18 @@
+blank_issues_enabled: false
+
+contact_links:
+  - name: Have you read the docs?
+    url: http://contrib.scikit-learn.org/metric-learn/
+    about: Much help can be found in the docs
+  - name: Ask a question
+    url: https://github.com/scikit-learn-contrib/metric-learn/discussions/new
+    about: Ask a question or start a discussion about metric-learn
+  - name: Stack Overflow
+    url: https://stackoverflow.com
+    about: Please ask and answer metric-learn usage questions (API, installation...) on Stack Overflow
+  - name: Cross Validated
+    url: https://stats.stackexchange.com
+    about: Please ask and answer metric learning questions (use cases, algorithms & theory...) on Cross Validated
+  - name: Blank issue
+    url: https://github.com/scikit-learn-contrib/metric-learn/issues/new
+    about: Please note that Github Discussions should be used in most cases instead
diff --git a/.github/ISSUE_TEMPLATE/doc_improvement.md b/.github/ISSUE_TEMPLATE/doc_improvement.md
new file mode 100644
index 00000000..753cf2f7
--- /dev/null
+++ b/.github/ISSUE_TEMPLATE/doc_improvement.md
@@ -0,0 +1,23 @@
+---
+name: Documentation improvement
+about: Create a report to help us improve the documentation. Alternatively you can just open a pull request with the suggested change.
+labels: Documentation
+---
+
+#### Describe the issue linked to the documentation
+
+<!--
+Tell us about the confusion introduced in the documentation.
+-->
+
+#### Suggest a potential alternative/fix
+
+<!--
+Tell us how we could improve the documentation in this regard.
+-->
+
+---
+<!-- Issue Author: Don't delete this message to encourage other users to support your issue! -->
+**Message from the maintainers**:
+
+Confused by this part of the doc too? Give it a 👍. We prioritise the issues with the most 👍.
\ No newline at end of file
diff --git a/.github/ISSUE_TEMPLATE/enhancement_proposal.md b/.github/ISSUE_TEMPLATE/enhancement_proposal.md
new file mode 100644
index 00000000..01dfb1d7
--- /dev/null
+++ b/.github/ISSUE_TEMPLATE/enhancement_proposal.md
@@ -0,0 +1,18 @@
+---
+name: Enhancement proposal
+about: Propose an enhancement for metric-learn
+labels: 'enhancement'
+---
+# Summary
+
+What change needs making?
+
+# Use Cases
+
+When would you use this?
+
+---
+<!-- Issue Author: Don't delete this message to encourage other users to support your issue! -->
+**Message from the maintainers**:
+
+Want to see this feature happen? Give it a 👍. We prioritise the issues with the most 👍.
\ No newline at end of file
diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml
new file mode 100644
index 00000000..0935a109
--- /dev/null
+++ b/.github/workflows/main.yml
@@ -0,0 +1,42 @@
+name: CI
+
+# Controls when the workflow will run
+on:
+  # Triggers the workflow on push or pull request events but only for the master branch
+  push:
+    branches: [ master ]
+  pull_request:
+    branches: [ master ]
+    
+jobs:
+  # Run normal testing with the latest versions of all dependencies
+  build:
+    runs-on: ${{ matrix.os }}
+    strategy:
+      matrix:
+        os: [ubuntu-latest]
+        python-version: ['3.8', '3.9', '3.10', '3.11']
+    steps:
+      - uses: actions/checkout@v2
+      - name: Set up Python
+        uses: actions/setup-python@v2
+        with:
+          python-version: ${{ matrix.python-version }}
+      - name: Run Tests without skggm
+        run:  |
+          sudo apt-get install liblapack-dev
+          pip install --upgrade pip pytest
+          pip install wheel cython numpy scipy codecov pytest-cov scikit-learn
+          pytest test --cov
+          bash <(curl -s https://codecov.io/bash)
+      - name: Run Tests with skggm
+        env:
+          SKGGM_VERSION: a0ed406586c4364ea3297a658f415e13b5cbdaf8
+        run:  |
+          pip install git+https://github.com/skggm/skggm.git@${SKGGM_VERSION}
+          pytest test --cov
+          bash <(curl -s https://codecov.io/bash)
+      - name: Syntax checking with flake8
+        run: |
+          pip install flake8
+          flake8 --extend-ignore=E111,E114 --show-source;
diff --git a/.gitignore b/.gitignore
index 8321c7d2..66eb3551 100644
--- a/.gitignore
+++ b/.gitignore
@@ -7,4 +7,6 @@ htmlcov/
 .cache/
 .pytest_cache/
 doc/auto_examples/*
-doc/generated/*
\ No newline at end of file
+doc/generated/*
+venv/
+.vscode/
diff --git a/.travis.yml b/.travis.yml
deleted file mode 100644
index d294c294..00000000
--- a/.travis.yml
+++ /dev/null
@@ -1,52 +0,0 @@
-language: python
-sudo: false
-cache: pip
-language: python
-env:
-  global:
-    - SKGGM_VERSION=a0ed406586c4364ea3297a658f415e13b5cbdaf8
-matrix:
-  include:
-  - name: "Pytest python 3.6 without skggm"
-    python: "3.6"
-    before_install:
-      - sudo apt-get install liblapack-dev
-      - pip install --upgrade pip pytest
-      - pip install wheel cython numpy scipy codecov pytest-cov scikit-learn
-    script:
-      - pytest test --cov;
-    after_success:
-      - bash <(curl -s https://codecov.io/bash)
-  - name: "Pytest python 3.6 with skggm"
-    python: "3.6"
-    before_install:
-      - sudo apt-get install liblapack-dev
-      - pip install --upgrade pip pytest
-      - pip install wheel cython numpy scipy codecov pytest-cov scikit-learn
-      - pip install git+https://github.com/skggm/skggm.git@${SKGGM_VERSION};
-    script:
-      - pytest test --cov;
-    after_success:
-      - bash <(curl -s https://codecov.io/bash)
-  - name: "Pytest python 3.7 with skggm"
-    python: "3.7"
-    before_install:
-      - sudo apt-get install liblapack-dev
-      - pip install --upgrade pip pytest
-      - pip install wheel cython numpy scipy codecov pytest-cov scikit-learn
-      - pip install git+https://github.com/skggm/skggm.git@${SKGGM_VERSION};
-    script:
-      - pytest test --cov;
-    after_success:
-      - bash <(curl -s https://codecov.io/bash)
-  - name: "Syntax checking with flake8"
-    python: "3.7"
-    before_install:
-      - pip install flake8
-    script:
-      - flake8 --extend-ignore=E111,E114 --show-source;
-      # Use this instead to have a syntax check only on the diff:
-      # - source ./build_tools/travis/flake8_diff.sh;
-branches:
-  only:
-  - master
diff --git a/README.rst b/README.rst
index ff770932..b2f6e6d4 100644
--- a/README.rst
+++ b/README.rst
@@ -1,4 +1,4 @@
-|Travis-CI Build Status| |License| |PyPI version| |Code coverage|
+|GitHub Actions Build Status| |License| |PyPI version| |Code coverage|
 
 metric-learn: Metric Learning in Python
 =======================================
@@ -22,7 +22,7 @@ metric-learn contains efficient Python implementations of several popular superv
 
 -  Python 3.6+ (the last version supporting Python 2 and Python 3.5 was
    `v0.5.0 <https://pypi.org/project/metric-learn/0.5.0/>`_)
--  numpy, scipy, scikit-learn>=0.20.3
+-  numpy>= 1.11.0, scipy>= 0.17.0, scikit-learn>=0.21.3
 
 **Optional dependencies**
 
@@ -49,23 +49,26 @@ If you use metric-learn in a scientific publication, we would appreciate
 citations to the following paper:
 
 `metric-learn: Metric Learning Algorithms in Python
-<https://arxiv.org/abs/1908.04710>`_, de Vazelhes
-*et al.*, arXiv:1908.04710, 2019.
+<http://www.jmlr.org/papers/volume21/19-678/19-678.pdf>`_, de Vazelhes
+*et al.*, Journal of Machine Learning Research, 21(138):1-6, 2020.
 
 Bibtex entry::
 
-  @techreport{metric-learn,
+  @article{metric-learn,
     title = {metric-learn: {M}etric {L}earning {A}lgorithms in {P}ython},
     author = {{de Vazelhes}, William and {Carey}, CJ and {Tang}, Yuan and
               {Vauquier}, Nathalie and {Bellet}, Aur{\'e}lien},
-    institution = {arXiv:1908.04710},
-    year = {2019}
+    journal = {Journal of Machine Learning Research},
+    year = {2020},
+    volume = {21},
+    number = {138},
+    pages = {1--6}
   }
 
 .. _sphinx documentation: http://contrib.scikit-learn.org/metric-learn/
 
-.. |Travis-CI Build Status| image:: https://api.travis-ci.org/scikit-learn-contrib/metric-learn.svg?branch=master
-   :target: https://travis-ci.org/scikit-learn-contrib/metric-learn
+.. |GitHub Actions Build Status| image:: https://github.com/scikit-learn-contrib/metric-learn/workflows/CI/badge.svg
+   :target: https://github.com/scikit-learn-contrib/metric-learn/actions?query=event%3Apush+branch%3Amaster
 .. |License| image:: http://img.shields.io/:license-mit-blue.svg?style=flat
    :target: http://badges.mit-license.org
 .. |PyPI version| image:: https://badge.fury.io/py/metric-learn.svg
diff --git a/bench/benchmarks/iris.py b/bench/benchmarks/iris.py
index 5973f7b8..05035085 100644
--- a/bench/benchmarks/iris.py
+++ b/bench/benchmarks/iris.py
@@ -5,15 +5,15 @@
 
 CLASSES = {
     'Covariance': metric_learn.Covariance(),
-    'ITML_Supervised': metric_learn.ITML_Supervised(num_constraints=200),
+    'ITML_Supervised': metric_learn.ITML_Supervised(n_constraints=200),
     'LFDA': metric_learn.LFDA(k=2, dim=2),
-    'LMNN': metric_learn.LMNN(k=5, learn_rate=1e-6, verbose=False),
-    'LSML_Supervised': metric_learn.LSML_Supervised(num_constraints=200),
+    'LMNN': metric_learn.LMNN(n_neighbors=5, learn_rate=1e-6, verbose=False),
+    'LSML_Supervised': metric_learn.LSML_Supervised(n_constraints=200),
     'MLKR': metric_learn.MLKR(),
     'NCA': metric_learn.NCA(max_iter=700, n_components=2),
-    'RCA_Supervised': metric_learn.RCA_Supervised(dim=2, num_chunks=30,
+    'RCA_Supervised': metric_learn.RCA_Supervised(dim=2, n_chunks=30,
                                                   chunk_size=2),
-    'SDML_Supervised': metric_learn.SDML_Supervised(num_constraints=1500)
+    'SDML_Supervised': metric_learn.SDML_Supervised(n_constraints=1500)
 }
 
 
diff --git a/build_tools/travis/flake8_diff.sh b/build_tools/travis/flake8_diff.sh
deleted file mode 100644
index aea926c8..00000000
--- a/build_tools/travis/flake8_diff.sh
+++ /dev/null
@@ -1,132 +0,0 @@
-# This file is not used yet but we keep it in case we need to check the pep8 difference
-# on the diff (see .travis.yml)
-#
-#!/bin/bash
-# copied-pasted and adapted from http://github.com/sklearn-contrib/imbalanced-learn
-# (more precisely: https://raw.githubusercontent.com/glemaitre/imbalanced-learn
-# /adcb9d8e6210b321dac2c1b06879e5e889d52d77/build_tools/travis/flake8_diff.sh)
-
-# This script is used in Travis to check that PRs do not add obvious
-# flake8 violations. It relies on two things:
-#   - find common ancestor between branch and
-#     scikit-learn/scikit-learn remote
-#   - run flake8 --diff on the diff between the branch and the common
-#     ancestor
-#
-# Additional features:
-#   - the line numbers in Travis match the local branch on the PR
-#     author machine.
-#   - ./build_tools/travis/flake8_diff.sh can be run locally for quick
-#     turn-around
-
-set -e
-# pipefail is necessary to propagate exit codes
-set -o pipefail
-
-PROJECT=scikit-learn-contrib/metric-learn
-PROJECT_URL=https://github.com/$PROJECT.git
-
-# Find the remote with the project name (upstream in most cases)
-REMOTE=$(git remote -v | grep $PROJECT | cut -f1 | head -1 || echo '')
-
-# Add a temporary remote if needed. For example this is necessary when
-# Travis is configured to run in a fork. In this case 'origin' is the
-# fork and not the reference repo we want to diff against.
-if [[ -z "$REMOTE" ]]; then
-    TMP_REMOTE=tmp_reference_upstream
-    REMOTE=$TMP_REMOTE
-    git remote add $REMOTE $PROJECT_URL
-fi
-
-echo "Remotes:"
-echo '--------------------------------------------------------------------------------'
-git remote --verbose
-
-# Travis does the git clone with a limited depth (50 at the time of
-# writing). This may not be enough to find the common ancestor with
-# $REMOTE/master so we unshallow the git checkout
-if [[ -a .git/shallow ]]; then
-    echo -e '\nTrying to unshallow the repo:'
-    echo '--------------------------------------------------------------------------------'
-    git fetch --unshallow
-fi
-
-if [[ "$TRAVIS" == "true" ]]; then
-    if [[ "$TRAVIS_PULL_REQUEST" == "false" ]]
-    then
-        # In main repo, using TRAVIS_COMMIT_RANGE to test the commits
-        # that were pushed into a branch
-        if [[ "$PROJECT" == "$TRAVIS_REPO_SLUG" ]]; then
-            if [[ -z "$TRAVIS_COMMIT_RANGE" ]]; then
-                echo "New branch, no commit range from Travis so passing this test by convention"
-                exit 0
-            fi
-            COMMIT_RANGE=$TRAVIS_COMMIT_RANGE
-        fi
-    else
-        # We want to fetch the code as it is in the PR branch and not
-        # the result of the merge into master. This way line numbers
-        # reported by Travis will match with the local code.
-        LOCAL_BRANCH_REF=travis_pr_$TRAVIS_PULL_REQUEST
-        # In Travis the PR target is always origin
-        git fetch origin pull/$TRAVIS_PULL_REQUEST/head:refs/$LOCAL_BRANCH_REF
-    fi
-fi
-
-# If not using the commit range from Travis we need to find the common
-# ancestor between $LOCAL_BRANCH_REF and $REMOTE/master
-if [[ -z "$COMMIT_RANGE" ]]; then
-    if [[ -z "$LOCAL_BRANCH_REF" ]]; then
-        LOCAL_BRANCH_REF=$(git rev-parse --abbrev-ref HEAD)
-    fi
-    echo -e "\nLast 2 commits in $LOCAL_BRANCH_REF:"
-    echo '--------------------------------------------------------------------------------'
-    git log -2 $LOCAL_BRANCH_REF
-
-    REMOTE_MASTER_REF="$REMOTE/master"
-    # Make sure that $REMOTE_MASTER_REF is a valid reference
-    echo -e "\nFetching $REMOTE_MASTER_REF"
-    echo '--------------------------------------------------------------------------------'
-    git fetch $REMOTE master:refs/remotes/$REMOTE_MASTER_REF
-    LOCAL_BRANCH_SHORT_HASH=$(git rev-parse --short $LOCAL_BRANCH_REF)
-    REMOTE_MASTER_SHORT_HASH=$(git rev-parse --short $REMOTE_MASTER_REF)
-
-    COMMIT=$(git merge-base $LOCAL_BRANCH_REF $REMOTE_MASTER_REF) || \
-        echo "No common ancestor found for $(git show $LOCAL_BRANCH_REF -q) and $(git show $REMOTE_MASTER_REF -q)"
-
-    if [ -z "$COMMIT" ]; then
-        exit 1
-    fi
-
-    COMMIT_SHORT_HASH=$(git rev-parse --short $COMMIT)
-
-    echo -e "\nCommon ancestor between $LOCAL_BRANCH_REF ($LOCAL_BRANCH_SHORT_HASH)"\
-         "and $REMOTE_MASTER_REF ($REMOTE_MASTER_SHORT_HASH) is $COMMIT_SHORT_HASH:"
-    echo '--------------------------------------------------------------------------------'
-    git show --no-patch $COMMIT_SHORT_HASH
-
-    COMMIT_RANGE="$COMMIT_SHORT_HASH..$LOCAL_BRANCH_SHORT_HASH"
-
-    if [[ -n "$TMP_REMOTE" ]]; then
-        git remote remove $TMP_REMOTE
-    fi
-
-else
-    echo "Got the commit range from Travis: $COMMIT_RANGE"
-fi
-
-echo -e '\nRunning flake8 on the diff in the range' "$COMMIT_RANGE" \
-     "($(git rev-list $COMMIT_RANGE | wc -l) commit(s)):"
-echo '--------------------------------------------------------------------------------'
-
-# to not include the context (some lines before and after the modified lines), add the
-# flag --unified=0 (warning: it will not include some errors like for instance adding too
-# much blank lines
-check_files() {
-    git diff $COMMIT_RANGE | flake8 --diff --show-source --extend-ignore=E111,E114
-}
-
-check_files
-
-echo -e "No problem detected by flake8\n"
-
diff --git a/doc/_static/css/styles.css b/doc/_static/css/styles.css
new file mode 100644
index 00000000..6d350ae4
--- /dev/null
+++ b/doc/_static/css/styles.css
@@ -0,0 +1,36 @@
+.hatnote {
+    border-color: #e1e4e5 ;
+    border-style: solid ;
+    border-width: 1px ;
+    font-size: x-small ;
+    font-style: italic ;
+    margin-left: auto ;
+    margin-right: auto ;
+    margin-bottom: 24px;
+    padding: 12px;
+}
+.hatnote-gray {
+  background-color: #f5f5f5 
+}
+.hatnote li {
+  list-style-type: square;
+  margin-left: 12px !important;
+}
+.hatnote ul {
+  list-style-type: square;
+  margin-left: 0px !important;
+  margin-bottom: 0px !important;
+}
+.deprecated {
+  color: #b94a48;
+  background-color: #F3E5E5;
+  border-color: #eed3d7;
+  margin-top: 0.5rem;
+  padding: 0.5rem;
+  border-radius: 0.5rem;
+  margin-bottom: 0.5rem;
+}
+
+.deprecated p {
+  margin-bottom: 0 !important;
+}
\ No newline at end of file
diff --git a/doc/conf.py b/doc/conf.py
index eac09b38..c472cc21 100644
--- a/doc/conf.py
+++ b/doc/conf.py
@@ -1,6 +1,7 @@
 # -*- coding: utf-8 -*-
 import sys
 import os
+import warnings
 
 extensions = [
     'sphinx.ext.autodoc',
@@ -20,12 +21,12 @@
 
 # General information about the project.
 project = u'metric-learn'
-copyright = (u'2015-2020, CJ Carey, Yuan Tang, William de Vazelhes, Aurélien '
+copyright = (u'2015-2023, CJ Carey, Yuan Tang, William de Vazelhes, Aurélien '
              u'Bellet and Nathalie Vauquier')
 author = (u'CJ Carey, Yuan Tang, William de Vazelhes, Aurélien Bellet and '
           u'Nathalie Vauquier')
-version = '0.6.0'
-release = '0.6.0'
+version = '0.7.0'
+release = '0.7.0'
 language = 'en'
 
 exclude_patterns = ['_build']
@@ -37,9 +38,6 @@
 html_static_path = ['_static']
 htmlhelp_basename = 'metric-learndoc'
 
-# Option to only need single backticks to refer to symbols
-default_role = 'any'
-
 # Option to hide doctests comments in the documentation (like # doctest:
 # +NORMALIZE_WHITESPACE for instance)
 trim_doctest_flags = True
@@ -66,10 +64,6 @@
 # generate autosummary even if no references
 autosummary_generate = True
 
-# Switch to old behavior with html4, for a good display of references,
-# as described in https://github.com/sphinx-doc/sphinx/issues/6705
-html4_writer = True
-
 
 # Temporary work-around for spacing problem between parameter and parameter
 # type in the doc, see https://github.com/numpy/numpydoc/issues/215. The bug
@@ -78,5 +72,11 @@
 # In an ideal world, this would get fixed in this PR:
 # https://github.com/readthedocs/sphinx_rtd_theme/pull/747/files
 def setup(app):
-    app.add_javascript('js/copybutton.js')
-    app.add_stylesheet("basic.css")
+  app.add_js_file('js/copybutton.js')
+  app.add_css_file('css/styles.css')
+
+
+# Remove matplotlib agg warnings from generated doc when using plt.show
+warnings.filterwarnings("ignore", category=UserWarning,
+                        message='Matplotlib is currently using agg, which is a'
+                                ' non-GUI backend, so cannot show the figure.')
diff --git a/doc/getting_started.rst b/doc/getting_started.rst
index 44fd1436..90b7c7ee 100644
--- a/doc/getting_started.rst
+++ b/doc/getting_started.rst
@@ -19,7 +19,7 @@ metric-learn can be installed in either of the following ways:
 
 - Python 3.6+ (the last version supporting Python 2 and Python 3.5 was
   `v0.5.0 <https://pypi.org/project/metric-learn/0.5.0/>`_)
-- numpy, scipy, scikit-learn>=0.20.3
+- numpy>= 1.11.0, scipy>= 0.17.0, scikit-learn>=0.21.3
 
 **Optional dependencies**
 
diff --git a/doc/index.rst b/doc/index.rst
index 8f000246..f9dfd83d 100644
--- a/doc/index.rst
+++ b/doc/index.rst
@@ -1,6 +1,6 @@
 metric-learn: Metric Learning in Python
 =======================================
-|Travis-CI Build Status| |License| |PyPI version| |Code coverage|
+|GitHub Actions Build Status| |License| |PyPI version| |Code coverage|
 
 `metric-learn <https://github.com/scikit-learn-contrib/metric-learn>`_
 contains efficient Python implementations of several popular supervised and
@@ -15,17 +15,20 @@ If you use metric-learn in a scientific publication, we would appreciate
 citations to the following paper:
 
 `metric-learn: Metric Learning Algorithms in Python
-<https://arxiv.org/abs/1908.04710>`_, de Vazelhes
-*et al.*, arXiv:1908.04710, 2019.
+<http://www.jmlr.org/papers/volume21/19-678/19-678.pdf>`_, de Vazelhes
+*et al.*, Journal of Machine Learning Research, 21(138):1-6, 2020.
 
 Bibtex entry::
 
-  @techreport{metric-learn,
+  @article{metric-learn,
     title = {metric-learn: {M}etric {L}earning {A}lgorithms in {P}ython},
     author = {{de Vazelhes}, William and {Carey}, CJ and {Tang}, Yuan and
               {Vauquier}, Nathalie and {Bellet}, Aur{\'e}lien},
-    institution = {arXiv:1908.04710},
-    year = {2019}
+    journal = {Journal of Machine Learning Research},
+    year = {2020},
+    volume = {21},
+    number = {138},
+    pages = {1--6}
   }
 
 
@@ -54,8 +57,8 @@ Documentation outline
 
 :ref:`genindex` | :ref:`search`
 
-.. |Travis-CI Build Status| image:: https://api.travis-ci.org/scikit-learn-contrib/metric-learn.svg?branch=master
-   :target: https://travis-ci.org/scikit-learn-contrib/metric-learn
+.. |GitHub Actions Build Status| image:: https://github.com/scikit-learn-contrib/metric-learn/workflows/CI/badge.svg
+   :target: https://github.com/scikit-learn-contrib/metric-learn/actions?query=event%3Apush+branch%3Amaster
 .. |PyPI version| image:: https://badge.fury.io/py/metric-learn.svg
    :target: http://badge.fury.io/py/metric-learn
 .. |License| image:: http://img.shields.io/:license-mit-blue.svg?style=flat
diff --git a/doc/introduction.rst b/doc/introduction.rst
index 7d9f52d0..e9ff0015 100644
--- a/doc/introduction.rst
+++ b/doc/introduction.rst
@@ -123,26 +123,3 @@ to the following resources:
   Survey <http://dx.doi.org/10.1561/2200000019>`_ (2012)
 - **Book:** `Metric Learning
   <http://dx.doi.org/10.2200/S00626ED1V01Y201501AIM030>`_ (2015)
-
-.. Methods [TO MOVE TO SUPERVISED/WEAK SECTIONS]
-.. =============================================
-
-.. Currently, each metric learning algorithm supports the following methods:
-
-.. -  ``fit(...)``, which learns the model.
-.. -  ``get_mahalanobis_matrix()``, which returns a Mahalanobis matrix
-.. -  ``get_metric()``, which returns a function that takes as input two 1D
-      arrays and outputs the learned metric score on these two points
-..    :math:`M = L^{\top}L` such that distance between vectors ``x`` and
-..    ``y`` can be computed as :math:`\sqrt{\left(x-y\right)M\left(x-y\right)}`.
-.. -  ``components_from_metric(metric)``, which returns a transformation matrix
-..    :math:`L \in \mathbb{R}^{D \times d}`, which can be used to convert a
-..    data matrix :math:`X \in \mathbb{R}^{n \times d}` to the
-..    :math:`D`-dimensional learned metric space :math:`X L^{\top}`,
-..    in which standard Euclidean distances may be used.
-.. -  ``transform(X)``, which applies the aforementioned transformation.
-.. - ``score_pairs(pairs)`` which returns the distance between pairs of
-..   points. ``pairs`` should be a 3D array-like of pairs of shape ``(n_pairs,
-..   2, n_features)``, or it can be a 2D array-like of pairs indicators of
-..   shape ``(n_pairs, 2)`` (see section :ref:`preprocessor_section` for more
-..   details).
\ No newline at end of file
diff --git a/doc/metric_learn.rst b/doc/metric_learn.rst
index 8f91d91c..4d0676b9 100644
--- a/doc/metric_learn.rst
+++ b/doc/metric_learn.rst
@@ -13,6 +13,8 @@ Base Classes
 
     metric_learn.Constraints
     metric_learn.base_metric.BaseMetricLearner
+    metric_learn.base_metric.MetricTransformer
+    metric_learn.base_metric.MahalanobisMixin
     metric_learn.base_metric._PairsClassifierMixin
     metric_learn.base_metric._TripletsClassifierMixin
     metric_learn.base_metric._QuadrupletsClassifierMixin
diff --git a/doc/supervised.rst b/doc/supervised.rst
index 1b1180e9..49548b83 100644
--- a/doc/supervised.rst
+++ b/doc/supervised.rst
@@ -69,10 +69,10 @@ Also, as explained before, our metric learners has learn a distance between
 points. You can use this distance in two main ways:
 
 - You can either return the distance between pairs of points using the
-  `score_pairs` function:
+  `pair_distance` function:
 
->>> nca.score_pairs([[[3.5, 3.6], [5.6, 2.4]], [[1.2, 4.2], [2.1, 6.4]]])
-array([0.49627072, 3.65287282])
+>>> nca.pair_distance([[[3.5, 3.6], [5.6, 2.4]], [[1.2, 4.2], [2.1, 6.4]], [[3.3, 7.8], [10.9, 0.1]]])
+array([0.49627072, 3.65287282, 6.06079877])
 
 - Or you can return a function that will return the distance (in the new
   space) between two 1D arrays (the coordinates of the points in the original
@@ -82,6 +82,18 @@ array([0.49627072, 3.65287282])
 >>> metric_fun([3.5, 3.6], [5.6, 2.4])
 0.4962707194621285
 
+- Alternatively, you can use `pair_score` to return the **score** between
+  pairs of points (the larger the score, the more similar the pair).
+  For Mahalanobis learners, it is equal to the opposite of the distance.
+
+>>> score = nca.pair_score([[[3.5, 3.6], [5.6, 2.4]], [[1.2, 4.2], [2.1, 6.4]], [[3.3, 7.8], [10.9, 0.1]]])
+>>> score
+array([-0.49627072, -3.65287282, -6.06079877])
+
+This is useful because `pair_score` matches the **score** semantic of 
+scikit-learn's `Classification metrics
+<https://scikit-learn.org/stable/modules/model_evaluation.html#classification-metrics>`_.
+
 .. note::
 
     If the metric learner that you use learns a :ref:`Mahalanobis distance
@@ -93,7 +105,6 @@ array([0.49627072, 3.65287282])
     array([[0.43680409, 0.89169412],
            [0.89169412, 1.9542479 ]])
 
-.. TODO: remove the "like it is the case etc..." if it's not the case anymore
 
 Scikit-learn compatibility
 --------------------------
@@ -105,6 +116,7 @@ All supervised algorithms are scikit-learn estimators
 scikit-learn model selection routines 
 (`sklearn.model_selection.cross_val_score`,
 `sklearn.model_selection.GridSearchCV`, etc).
+You can also use some of the scoring functions from `sklearn.metrics`.
 
 Algorithms
 ==========
@@ -140,7 +152,7 @@ neighbors (with same labels) of :math:`\mathbf{x}_{i}`, :math:`y_{ij}=0`
 indicates :math:`\mathbf{x}_{i}, \mathbf{x}_{j}` belong to different classes, 
 :math:`[\cdot]_+=\max(0, \cdot)` is the Hinge loss.
 
-.. topic:: Example Code:
+.. rubric:: Example Code
 
 ::
 
@@ -152,18 +164,18 @@ indicates :math:`\mathbf{x}_{i}, \mathbf{x}_{j}` belong to different classes,
     X = iris_data['data']
     Y = iris_data['target']
 
-    lmnn = LMNN(k=5, learn_rate=1e-6)
-    lmnn.fit(X, Y, verbose=False)
+    lmnn = LMNN(n_neighbors=5, learn_rate=1e-6, verbose=False)
+    lmnn.fit(X, Y)
 
-.. topic:: References:
+.. rubric:: References
 
-    .. [1] Weinberger et al. `Distance Metric Learning for Large Margin
-       Nearest Neighbor Classification
-       <http://jmlr.csail.mit.edu/papers/volume10/weinberger09a/weinberger09a.pdf>`_.
-       JMLR 2009
 
-    .. [2] `Wikipedia entry on Large Margin Nearest Neighbor <https://en.wikipedia.org/wiki/Large_margin_nearest_neighbor>`_
-       
+.. container:: hatnote hatnote-gray
+
+  [1]. Weinberger et al. `Distance Metric Learning for Large Margin Nearest Neighbor Classification <http://jmlr.csail.mit.edu/papers/volume10/weinberger09a/weinberger09a.pdf>`_. JMLR 2009.
+
+  [2]. `Wikipedia entry on Large Margin Nearest Neighbor <https://en.wikipedia.org/wiki/Large_margin_nearest_neighbor>`_.
+             
 
 .. _nca:
 
@@ -204,7 +216,7 @@ the sum of probability of being correctly classified:
 
       \mathbf{L} = \text{argmax}\sum_i p_i
 
-.. topic:: Example Code:
+.. rubric:: Example Code
 
 ::
 
@@ -219,13 +231,14 @@ the sum of probability of being correctly classified:
     nca = NCA(max_iter=1000)
     nca.fit(X, Y)
 
-.. topic:: References:
+.. rubric:: References
+
 
-    .. [1] Goldberger et al.
-       `Neighbourhood Components Analysis <https://papers.nips.cc/paper/2566-neighbourhood-components-analysis.pdf>`_.
-       NIPS 2005
+.. container:: hatnote hatnote-gray
 
-    .. [2] `Wikipedia entry on Neighborhood Components Analysis <https://en.wikipedia.org/wiki/Neighbourhood_components_analysis>`_
+      [1]. Goldberger et al. `Neighbourhood Components Analysis <https://papers.nips.cc/paper/2566-neighbourhood-components-analysis.pdf>`_. NIPS 2005.
+
+      [2]. `Wikipedia entry on Neighborhood Components Analysis <https://en.wikipedia.org/wiki/Neighbourhood_components_analysis>`_.
        
 
 .. _lfda:
@@ -277,7 +290,7 @@ nearby data pairs in the same class are made close and the data pairs in
 different classes are separated from each other; far apart data pairs in the 
 same class are not imposed to be close.
 
-.. topic:: Example Code:
+.. rubric:: Example Code
 
 ::
 
@@ -292,15 +305,19 @@ same class are not imposed to be close.
     lfda = LFDA(k=2, dim=2)
     lfda.fit(X, Y)
 
-.. topic:: References:
+.. note::
+    LDFA suffers from a problem called “sign indeterminacy”, which means the sign of the ``components`` and the output from transform depend on a random state. This is directly related to the calculation of eigenvectors in the algorithm. The same input ran in different times might lead to different transforms, but both valid.
+    
+    To work around this, fit instances of this class to data once, then keep the instance around to do transformations.
+
+.. rubric:: References
+
 
-    .. [1] Sugiyama. `Dimensionality Reduction of Multimodal Labeled Data by Local
-       Fisher Discriminant Analysis <http://www.jmlr.org/papers/volume8/sugiyama07b/sugiyama07b.pdf>`_.
-       JMLR 2007
+.. container:: hatnote hatnote-gray
 
-    .. [2] Tang. `Local Fisher Discriminant Analysis on Beer Style Clustering
-       <https://gastrograph.com/resources/whitepapers/local-fisher
-       -discriminant-analysis-on-beer-style-clustering.html#>`_.
+      [1]. Sugiyama. `Dimensionality Reduction of Multimodal Labeled Data by Local Fisher Discriminant Analysis <http://www.jmlr.org/papers/volume8/sugiyama07b/sugiyama07b.pdf>`_. JMLR 2007.
+
+      [2]. Tang. `Local Fisher Discriminant Analysis on Beer Style Clustering <https://gastrograph.com/resources/whitepapers/local-fisher-discriminant-analysis-on-beer-style-clustering.html#>`_.
 
 .. _mlkr:
 
@@ -346,7 +363,7 @@ calculating a weighted average of all the training samples:
 
     \hat{y}_i = \frac{\sum_{j\neq i}y_jk_{ij}}{\sum_{j\neq i}k_{ij}}
 
-.. topic:: Example Code:
+.. rubric:: Example Code
 
 ::
 
@@ -360,10 +377,12 @@ calculating a weighted average of all the training samples:
     mlkr = MLKR()
     mlkr.fit(X, Y)
 
-.. topic:: References:
+.. rubric:: References
+
+
+.. container:: hatnote hatnote-gray
 
-    .. [1] Weinberger et al. `Metric Learning for Kernel Regression <http://proceedings.mlr.
-       press/v2/weinberger07a/weinberger07a.pdf>`_. AISTATS 2007
+    [1]. Weinberger et al. `Metric Learning for Kernel Regression <http://proceedings.mlr.press/v2/weinberger07a/weinberger07a.pdf>`_. AISTATS 2007.
 
 
 .. _supervised_version:
@@ -388,8 +407,8 @@ are similar (+1) or dissimilar (-1)), are sampled with the function
 (of label +1), this method will look at all the samples from the same label and
 sample randomly a pair among them. To sample negative pairs (of label -1), this
 method will look at all the samples from a different class and sample randomly
-a pair among them. The method will try to build `num_constraints` positive
-pairs and `num_constraints` negative pairs, but sometimes it cannot find enough
+a pair among them. The method will try to build `n_constraints` positive
+pairs and `n_constraints` negative pairs, but sometimes it cannot find enough
 of one of those, so forcing `same_length=True` will return both times the
 minimum of the two lenghts.
 
@@ -400,7 +419,7 @@ quadruplets, where for each quadruplet the two first points are from the same
 class, and the two last points are from a different class (so indeed the two
 last points should be less similar than the two first points).
 
-.. topic:: Example Code:
+.. rubric:: Example Code
 
 ::
 
@@ -411,5 +430,5 @@ last points should be less similar than the two first points).
     X = iris_data['data']
     Y = iris_data['target']
 
-    mmc = MMC_Supervised(num_constraints=200)
+    mmc = MMC_Supervised(n_constraints=200)
     mmc.fit(X, Y)
diff --git a/doc/unsupervised.rst b/doc/unsupervised.rst
index 1191e805..110b07f9 100644
--- a/doc/unsupervised.rst
+++ b/doc/unsupervised.rst
@@ -20,7 +20,7 @@ It can be used for ZCA whitening of the data (see the Wikipedia page of
 `whitening transformation <https://en.wikipedia.org/wiki/\
 Whitening_transformation>`_).
 
-.. topic:: Example Code:
+.. rubric:: Example Code
 
 ::
 
@@ -32,6 +32,9 @@ Whitening_transformation>`_).
     cov = Covariance().fit(iris)
     x = cov.transform(iris)
 
-.. topic:: References:
+.. rubric:: References
 
-    .. [1] On the Generalized Distance in Statistics, P.C.Mahalanobis, 1936
\ No newline at end of file
+
+.. container:: hatnote hatnote-gray
+
+      [1]. On the Generalized Distance in Statistics, P.C.Mahalanobis, 1936.
\ No newline at end of file
diff --git a/doc/weakly_supervised.rst b/doc/weakly_supervised.rst
index 174210b8..76f7c14e 100644
--- a/doc/weakly_supervised.rst
+++ b/doc/weakly_supervised.rst
@@ -62,8 +62,9 @@ The most intuitive way to represent tuples is to provide the algorithm with a
 in a tuple (2 for pairs, 3 for triplets for instance), and `n_features` is
 the number of features of each point.
 
-.. topic:: Example:
-   Here is an artificial dataset of 4 pairs of 2 points of 3 features each:
+.. rubric:: Example Code
+
+Here is an artificial dataset of 4 pairs of 2 points of 3 features each:
 
 >>> import numpy as np
 >>> tuples = np.array([[[-0.12, -1.21, -0.20],
@@ -94,7 +95,9 @@ would be to keep the dataset of points `X` aside, and just represent tuples
 as a collection of tuples of *indices* from the points in `X`. Since we loose
 the feature dimension there, the resulting array is 2D.
 
-.. topic:: Example: An equivalent representation of the above pairs would be:
+.. rubric:: Example Code
+    
+An equivalent representation of the above pairs would be:
 
 >>> X = np.array([[-0.12, -1.21, -0.20],
 >>>               [+0.05, -0.19, -0.05],
@@ -134,7 +137,7 @@ are respected.
 >>> from metric_learn import MMC
 >>> mmc = MMC(random_state=42)
 >>> mmc.fit(tuples, y)
-MMC(A0='deprecated', convergence_threshold=0.001, diagonal=False,
+MMC(A0='deprecated', tol=0.001, diagonal=False,
   diagonal_c=1.0, init='auto', max_iter=100, max_proj=10000,
   preprocessor=None, random_state=42, verbose=False)
 
@@ -160,9 +163,9 @@ Also, as explained before, our metric learner has learned a distance between
 points. You can use this distance in two main ways:
 
 - You can either return the distance between pairs of points using the
-  `score_pairs` function:
+  `pair_distance` function:
 
->>> mmc.score_pairs([[[3.5, 3.6, 5.2], [5.6, 2.4, 6.7]],
+>>> mmc.pair_distance([[[3.5, 3.6, 5.2], [5.6, 2.4, 6.7]],
 ...                  [[1.2, 4.2, 7.7], [2.1, 6.4, 0.9]]])
 array([7.27607365, 0.88853014])
 
@@ -175,6 +178,18 @@ array([7.27607365, 0.88853014])
 >>> metric_fun([3.5, 3.6, 5.2], [5.6, 2.4, 6.7])
 7.276073646278203
 
+- Alternatively, you can use `pair_score` to return the **score** between
+  pairs of points (the larger the score, the more similar the pair).
+  For Mahalanobis learners, it is equal to the opposite of the distance.
+
+>>> score = mmc.pair_score([[[3.5, 3.6], [5.6, 2.4]], [[1.2, 4.2], [2.1, 6.4]], [[3.3, 7.8], [10.9, 0.1]]])
+>>> score
+array([-0.49627072, -3.65287282, -6.06079877])
+
+  This is useful because `pair_score` matches the **score** semantic of 
+  scikit-learn's `Classification metrics
+  <https://scikit-learn.org/stable/modules/model_evaluation.html#classification-metrics>`_.
+
 .. note::
 
     If the metric learner that you use learns a :ref:`Mahalanobis distance
@@ -187,8 +202,6 @@ array([[ 0.58603894, -5.69883982, -1.66614919],
        [-5.69883982, 55.41743549, 16.20219519],
        [-1.66614919, 16.20219519,  4.73697721]])
 
-.. TODO: remove the "like it is the case etc..." if it's not the case anymore
-
 .. _sklearn_compat_ws:
 
 Prediction and scoring
@@ -250,7 +263,7 @@ tuples).
 >>> y_pairs = np.array([1, -1])
 >>> mmc = MMC(random_state=42)
 >>> mmc.fit(pairs, y_pairs)
-MMC(convergence_threshold=0.001, diagonal=False,
+MMC(tol=0.001, diagonal=False,
     diagonal_c=1.0, init='auto', max_iter=100, max_proj=10000, preprocessor=None,
     random_state=42, verbose=False)
 
@@ -344,8 +357,8 @@ returns the `sklearn.metrics.roc_auc_score` (which is threshold-independent).
 
 .. note::
    See :ref:`fit_ws` for more details on metric learners functions that are
-   not specific to learning on pairs, like `transform`, `score_pairs`,
-   `get_metric` and `get_mahalanobis_matrix`.
+   not specific to learning on pairs, like `transform`, `pair_distance`,
+   `pair_score`, `get_metric` and `get_mahalanobis_matrix`.
 
 Algorithms
 ----------
@@ -400,7 +413,7 @@ for similar and dissimilar pairs respectively, and :math:`\mathbf{M}_0`
 is the prior distance metric, set to identity matrix by default, 
 :math:`D_{\ell \mathrm{d}}(\cdot)` is the log determinant.
 
-.. topic:: Example Code:
+.. rubric:: Example Code
 
 ::
 
@@ -419,11 +432,14 @@ is the prior distance metric, set to identity matrix by default,
     itml = ITML()
     itml.fit(pairs, y)
 
-.. topic:: References:
+.. rubric:: References
 
-    .. [1] Jason V. Davis, et al. `Information-theoretic Metric Learning <https://icml.cc/imls/conferences/2007/proceedings/papers/404.pdf>`_. ICML 2007
 
-    .. [2] Adapted from Matlab code at http://www.cs.utexas.edu/users/pjain/itml/
+.. container:: hatnote hatnote-gray
+
+      [1]. Jason V. Davis, et al. `Information-theoretic Metric Learning <https://icml.cc/imls/conferences/2007/proceedings/papers/404.pdf>`_. ICML 2007.
+
+      [2]. Adapted from Matlab code at http://www.cs.utexas.edu/users/pjain/itml/ .
 
 
 .. _sdml:
@@ -458,7 +474,7 @@ the sums of the row elements of :math:`\mathbf{K}`., :math:`||\cdot||_{1, off}`
 is the off-diagonal L1 norm.
 
 
-.. topic:: Example Code:
+.. rubric:: Example Code
 
 ::
 
@@ -476,19 +492,19 @@ is the off-diagonal L1 norm.
     sdml = SDML()
     sdml.fit(pairs, y)
 
-.. topic:: References:
+.. rubric:: References
+
+
+.. container:: hatnote hatnote-gray
 
-    .. [1] Qi et al.
-       `An efficient sparse metric learning in high-dimensional space via
-       L1-penalized log-determinant regularization <https://icml.cc/Conferences/2009/papers/46.pdf>`_.
-       ICML 2009.
+      [1]. Qi et al. `An efficient sparse metric learning in high-dimensional space via L1-penalized log-determinant regularization <https://icml.cc/Conferences/2009/papers/46.pdf>`_. ICML 2009.
 
-    .. [2] Code adapted from https://gist.github.com/kcarnold/5439945
+      [2]. Code adapted from https://gist.github.com/kcarnold/5439945 .
 
 .. _rca:
 
 :py:class:`RCA <metric_learn.RCA>`
-^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
 
 Relative Components Analysis (:py:class:`RCA <metric_learn.RCA>`)
 
@@ -512,7 +528,7 @@ where chunklet :math:`j` consists of :math:`\{\mathbf{x}_{ji}\}_{i=1}^{n_j}`
 with a mean :math:`\hat{m}_j`. The inverse of :math:`\mathbf{C}^{-1}` is used 
 as the Mahalanobis matrix.
 
-.. topic:: Example Code:
+.. rubric:: Example Code
 
 ::
 
@@ -527,15 +543,16 @@ as the Mahalanobis matrix.
     rca = RCA()
     rca.fit(X, chunks)
 
-.. topic:: References:
+.. rubric:: References
+
+
+.. container:: hatnote hatnote-gray
 
-    .. [1] Shental et al. `Adjustment learning and relevant component analysis
-       <http://citeseerx.ist.psu.edu/viewdoc/download?doi=10.1.1.19.2871
-       &rep=rep1&type=pdf>`_. ECCV 2002
+      [1]. Shental et al. `Adjustment learning and relevant component analysis <http://citeseerx.ist.psu.edu/viewdoc/download?doi=10.1.1.19.2871 &rep=rep1&type=pdf>`_. ECCV 2002.
 
-    .. [2] Bar-Hillel et al. `Learning distance functions using equivalence relations <https://aaai.org/Papers/ICML/2003/ICML03-005.pdf>`_. ICML 2003
+      [2]. Bar-Hillel et al. `Learning distance functions using equivalence relations <https://aaai.org/Papers/ICML/2003/ICML03-005.pdf>`_. ICML 2003.
 
-    .. [3] Bar-Hillel et al. `Learning a Mahalanobis metric from equivalence constraints <http://www.jmlr.org/papers/volume6/bar-hillel05a/bar-hillel05a.pdf>`_. JMLR 2005
+      [3]. Bar-Hillel et al. `Learning a Mahalanobis metric from equivalence constraints <http://www.jmlr.org/papers/volume6/bar-hillel05a/bar-hillel05a.pdf>`_. JMLR 2005.
 
 .. _mmc:
 
@@ -566,7 +583,7 @@ points, while constrains the sum of distances between dissimilar points:
       \qquad \qquad \text{s.t.} \qquad \sum_{(\mathbf{x}_i, \mathbf{x}_j)
       \in D} d^2_{\mathbf{M}}(\mathbf{x}_i, \mathbf{x}_j) \geq 1
 
-.. topic:: Example Code:
+.. rubric:: Example Code
 
 ::
 
@@ -584,13 +601,14 @@ points, while constrains the sum of distances between dissimilar points:
     mmc = MMC()
     mmc.fit(pairs, y)
 
-.. topic:: References:
+.. rubric:: References
+
+
+.. container:: hatnote hatnote-gray
 
-  .. [1] Xing et al. `Distance metric learning with application to clustering with
-        side-information <http://papers.nips
-        .cc/paper/2164-distance-metric-learning-with-application-to-clustering
-        -with-side-information.pdf>`_. NIPS 2002
-  .. [2] Adapted from Matlab code http://www.cs.cmu.edu/%7Eepxing/papers/Old_papers/code_Metric_online.tar.gz
+    [1]. Xing et al. `Distance metric learning with application to clustering with side-information <http://papers.nips .cc/paper/2164-distance-metric-learning-with-application-to-clustering-with-side-information.pdf>`_. NIPS 2002.
+    
+    [2]. Adapted from Matlab code http://www.cs.cmu.edu/%7Eepxing/papers/Old_papers/code_Metric_online.tar.gz .
 
 .. _learning_on_triplets:
 
@@ -691,8 +709,8 @@ of triplets that have the right predicted ordering.
 
 .. note::
    See :ref:`fit_ws` for more details on metric learners functions that are
-   not specific to learning on pairs, like `transform`, `score_pairs`,
-   `get_metric` and `get_mahalanobis_matrix`.
+   not specific to learning on pairs, like `transform`, `pair_distance`,
+   `pair_score`, `get_metric` and `get_mahalanobis_matrix`.
 
 
 
@@ -734,7 +752,7 @@ is added to yield a sparse combination. The formulation is the following:
 
 where :math:`[\cdot]_+` is the hinge loss. 
  
-.. topic:: Example Code:
+.. rubric:: Example Code
 
 ::
 
@@ -748,14 +766,14 @@ where :math:`[\cdot]_+` is the hinge loss.
     scml = SCML()
     scml.fit(triplets)
 
-.. topic:: References:
+.. rubric:: References
 
-  .. [1] Y. Shi, A. Bellet and F. Sha. `Sparse Compositional Metric Learning.
-         <http://researchers.lille.inria.fr/abellet/papers/aaai14.pdf>`_. \
-         (AAAI), 2014.
 
-  .. [2] Adapted from original \
-         `Matlab implementation.<https://github.com/bellet/SCML>`_.
+.. container:: hatnote hatnote-gray
+
+    [1]. Y. Shi, A. Bellet and F. Sha. `Sparse Compositional Metric Learning. <http://researchers.lille.inria.fr/abellet/papers/aaai14.pdf>`_. (AAAI), 2014.
+
+    [2]. Adapted from original `Matlab implementation. <https://github.com/bellet/SCML>`_.
 
 
 .. _learning_on_quadruplets:
@@ -859,8 +877,8 @@ of quadruplets have the right predicted ordering.
 
 .. note::
    See :ref:`fit_ws` for more details on metric learners functions that are
-   not specific to learning on pairs, like `transform`, `score_pairs`,
-   `get_metric` and `get_mahalanobis_matrix`.
+   not specific to learning on pairs, like `transform`, `pair_distance`,
+   `pair_score`, `get_metric` and `get_mahalanobis_matrix`.
 
 
 
@@ -927,7 +945,7 @@ by default, :math:`D_{ld}(\mathbf{\cdot, \cdot})` is the LogDet divergence:
     D_{ld}(\mathbf{M, M_0}) = \text{tr}(\mathbf{MM_0}) − \text{logdet}
     (\mathbf{M})
 
-.. topic:: Example Code:
+.. rubric:: Example Code
 
 ::
 
@@ -944,12 +962,13 @@ by default, :math:`D_{ld}(\mathbf{\cdot, \cdot})` is the LogDet divergence:
     lsml = LSML()
     lsml.fit(quadruplets)
 
-.. topic:: References:
+.. rubric:: References
+
+
+.. container:: hatnote hatnote-gray
 
-    .. [1] Liu et al.
-       `Metric Learning from Relative Comparisons by Minimizing Squared
-       Residual <http://www.cs.ucla.edu/~weiwang/paper/ICDM12.pdf>`_. ICDM 2012
+      [1]. Liu et al. `Metric Learning from Relative Comparisons by Minimizing Squared Residual <http://www.cs.ucla.edu/~weiwang/paper/ICDM12.pdf>`_. ICDM 2012.
 
-    .. [2] Code adapted from https://gist.github.com/kcarnold/5439917
+      [2]. Code adapted from https://gist.github.com/kcarnold/5439917 .
 
 
diff --git a/examples/plot_metric_learning_examples.py b/examples/plot_metric_learning_examples.py
index 71229554..32759636 100644
--- a/examples/plot_metric_learning_examples.py
+++ b/examples/plot_metric_learning_examples.py
@@ -15,7 +15,11 @@
 ######################################################################
 # Imports
 # ^^^^^^^
+# .. note::
 #
+#     In order to show the charts of the examples you need a graphical
+#     ``matplotlib`` backend installed. For intance, use ``pip install pyqt5``
+#     to get Qt graphical interface or use your favorite one.
 
 from sklearn.manifold import TSNE
 
@@ -35,9 +39,9 @@
 # We will be using a synthetic dataset to illustrate the plotting,
 # using the function `sklearn.datasets.make_classification` from
 # scikit-learn. The dataset will contain:
-#   - 100 points in 3 classes with 2 clusters per class
-#   - 5 features, among which 3 are informative (correlated with the class
-#     labels) and two are random noise with large magnitude
+# - 100 points in 3 classes with 2 clusters per class
+# - 5 features, among which 3 are informative (correlated with the class
+# labels) and two are random noise with large magnitude
 
 X, y = make_classification(n_samples=100, n_classes=3, n_clusters_per_class=2,
                            n_informative=3, class_sep=4., n_features=5,
@@ -139,7 +143,7 @@ def plot_tsne(X, y, colormap=plt.cm.Paired):
 #
 
 # setting up LMNN
-lmnn = metric_learn.LMNN(k=5, learn_rate=1e-6)
+lmnn = metric_learn.LMNN(n_neighbors=5, learn_rate=1e-6)
 
 # fit the data!
 lmnn.fit(X, y)
@@ -310,7 +314,7 @@ def plot_tsne(X, y, colormap=plt.cm.Paired):
 # - See more in the documentation of the class :py:class:`RCA
 #   <metric_learn.RCA>`
 
-rca = metric_learn.RCA_Supervised(num_chunks=30, chunk_size=2)
+rca = metric_learn.RCA_Supervised(n_chunks=30, chunk_size=2)
 X_rca = rca.fit_transform(X, y)
 
 plot_tsne(X_rca, y)
diff --git a/examples/plot_sandwich.py b/examples/plot_sandwich.py
index d5856667..740852be 100644
--- a/examples/plot_sandwich.py
+++ b/examples/plot_sandwich.py
@@ -6,6 +6,13 @@
 Sandwich demo based on code from http://nbviewer.ipython.org/6576096
 """
 
+######################################################################
+# .. note::
+#
+#     In order to show the charts of the examples you need a graphical
+#     ``matplotlib`` backend installed. For intance, use ``pip install pyqt5``
+#     to get Qt graphical interface or use your favorite one.
+
 import numpy as np
 from matplotlib import pyplot as plt
 from sklearn.metrics import pairwise_distances
@@ -28,9 +35,9 @@ def sandwich_demo():
 
   mls = [
       LMNN(),
-      ITML_Supervised(num_constraints=200),
-      SDML_Supervised(num_constraints=200, balance_param=0.001),
-      LSML_Supervised(num_constraints=200),
+      ITML_Supervised(n_constraints=200),
+      SDML_Supervised(n_constraints=200, balance_param=0.001),
+      LSML_Supervised(n_constraints=200),
   ]
 
   for ax_num, ml in enumerate(mls, start=3):
diff --git a/metric_learn/_util.py b/metric_learn/_util.py
index 764a34c8..868ececa 100644
--- a/metric_learn/_util.py
+++ b/metric_learn/_util.py
@@ -704,7 +704,7 @@ def _initialize_metric_mahalanobis(input, init='identity', random_state=None,
   elif init == 'covariance':
     if input.ndim == 3:
       # if the input are tuples, we need to form an X by deduplication
-      X = np.vstack({tuple(row) for row in input.reshape(-1, n_features)})
+      X = np.unique(np.vstack(input), axis=0)
     else:
       X = input
     # atleast2d is necessary to deal with scalar covariance matrices
diff --git a/metric_learn/_version.py b/metric_learn/_version.py
index 8411e551..a71c5c7f 100644
--- a/metric_learn/_version.py
+++ b/metric_learn/_version.py
@@ -1 +1 @@
-__version__ = '0.6.1'
+__version__ = '0.7.0'
diff --git a/metric_learn/base_metric.py b/metric_learn/base_metric.py
index 721d7ba0..47efe4b7 100644
--- a/metric_learn/base_metric.py
+++ b/metric_learn/base_metric.py
@@ -2,13 +2,14 @@
 Base module.
 """
 
-from sklearn.base import BaseEstimator
+from sklearn.base import BaseEstimator, ClassifierMixin
 from sklearn.utils.extmath import stable_cumsum
 from sklearn.utils.validation import _is_arraylike, check_is_fitted
 from sklearn.metrics import roc_auc_score, roc_curve, precision_recall_curve
 import numpy as np
 from abc import ABCMeta, abstractmethod
 from ._util import ArrayIndexer, check_input, validate_vector
+import warnings
 
 
 class BaseMetricLearner(BaseEstimator, metaclass=ABCMeta):
@@ -27,13 +28,24 @@ def __init__(self, preprocessor=None):
 
   @abstractmethod
   def score_pairs(self, pairs):
-    """Returns the score between pairs
+    """
+    Returns the score between pairs
     (can be a similarity, or a distance/metric depending on the algorithm)
 
+    .. deprecated:: 0.7.0
+        Refer to `pair_distance` and `pair_score`.
+
+    .. warning::
+        This method will be removed in 0.8.0. Please refer to `pair_distance`
+        or `pair_score`. This change will occur in order to add learners
+        that don't necessarily learn a Mahalanobis distance.
+
     Parameters
     ----------
-    pairs : `numpy.ndarray`, shape=(n_samples, 2, n_features)
-      3D array of pairs.
+    pairs : array-like, shape=(n_pairs, 2, n_features) or (n_pairs, 2)
+      3D Array of pairs to score, with each row corresponding to two points,
+      for 2D array of indices of pairs if the metric learner uses a
+      preprocessor.
 
     Returns
     -------
@@ -43,10 +55,71 @@ def score_pairs(self, pairs):
     See Also
     --------
     get_metric : a method that returns a function to compute the metric between
-      two points. The difference with `score_pairs` is that it works on two 1D
-      arrays and cannot use a preprocessor. Besides, the returned function is
-      independent of the metric learner and hence is not modified if the metric
-      learner is.
+      two points. The difference between `score_pairs` is that it works on two
+      1D arrays and cannot use a preprocessor. Besides, the returned function
+      is independent of the metric learner and hence is not modified if the
+      metric learner is.
+    """
+
+  @abstractmethod
+  def pair_score(self, pairs):
+    """
+    .. versionadded:: 0.7.0 Compute the similarity score between pairs
+
+    Returns the similarity score between pairs of points (the larger the score,
+    the more similar the pair). For metric learners that learn a distance,
+    the score is simply the opposite of the distance between pairs. All
+    learners have access to this method.
+
+    Parameters
+    ----------
+    pairs : array-like, shape=(n_pairs, 2, n_features) or (n_pairs, 2)
+      3D Array of pairs to score, with each row corresponding to two points,
+      for 2D array of indices of pairs if the metric learner uses a
+      preprocessor.
+
+    Returns
+    -------
+    scores : `numpy.ndarray` of shape=(n_pairs,)
+      The score of every pair.
+
+    See Also
+    --------
+    get_metric : a method that returns a function to compute the metric between
+      two points. The difference with `pair_score` is that it works on two
+      1D arrays and cannot use a preprocessor. Besides, the returned function
+      is independent of the metric learner and hence is not modified if the
+      metric learner is.
+    """
+
+  @abstractmethod
+  def pair_distance(self, pairs):
+    """
+    .. versionadded:: 0.7.0 Compute the distance between pairs
+
+    Returns the (pseudo) distance between pairs, when available. For metric
+    learners that do not learn a (pseudo) distance, an error is thrown
+    instead.
+
+    Parameters
+    ----------
+    pairs : array-like, shape=(n_pairs, 2, n_features) or (n_pairs, 2)
+      3D Array of pairs for which to compute the distance, with each
+      row corresponding to two points, for 2D array of indices of pairs
+      if the metric learner uses a preprocessor.
+
+    Returns
+    -------
+    scores : `numpy.ndarray` of shape=(n_pairs,)
+      The distance between every pair.
+
+    See Also
+    --------
+    get_metric : a method that returns a function to compute the metric between
+      two points. The difference with `pair_distance` is that it works on two
+      1D arrays and cannot use a preprocessor. Besides, the returned function
+      is independent of the metric learner and hence is  not modified if the
+      metric learner is.
     """
 
   def _check_preprocessor(self):
@@ -93,17 +166,23 @@ def _prepare_inputs(self, X, y=None, type_of_inputs='classic',
     self._check_preprocessor()
 
     check_is_fitted(self, ['preprocessor_'])
-    return check_input(X, y,
+    outs = check_input(X, y,
                        type_of_inputs=type_of_inputs,
                        preprocessor=self.preprocessor_,
                        estimator=self,
                        tuple_size=getattr(self, '_tuple_size', None),
                        **kwargs)
+    # Conform to SLEP010
+    if not hasattr(self, 'n_features_in_'):
+      self.n_features_in_ = (outs if y is None else outs[0]).shape[1]
+    return outs
 
   @abstractmethod
   def get_metric(self):
-    """Returns a function that takes as input two 1D arrays and outputs the
-    learned metric score on these two points.
+    """Returns a function that takes as input two 1D arrays and outputs
+    the value of the learned metric on these two points. Depending on the
+    algorithm, it can return a distance or a similarity function between
+    pairs.
 
     This function will be independent from the metric learner that learned it
     (it will not be modified if the initial metric learner is modified),
@@ -136,15 +215,25 @@ def get_metric(self):
 
     See Also
     --------
-    score_pairs : a method that returns the metric score between several pairs
-      of points. Unlike `get_metric`, this is a method of the metric learner
-      and therefore can change if the metric learner changes. Besides, it can
-      use the metric learner's preprocessor, and works on concatenated arrays.
+    pair_distance : a method that returns the distance between several
+      pairs of points. Unlike `get_metric`, this is a method of the metric
+      learner and therefore can change if the metric learner changes. Besides,
+      it can use the metric learner's preprocessor, and works on concatenated
+      arrays.
+
+    pair_score : a method that returns the similarity score between
+      several pairs of points. Unlike `get_metric`, this is a method of the
+      metric learner and therefore can change if the metric learner changes.
+      Besides, it can use the metric learner's preprocessor, and works on
+      concatenated arrays.
     """
 
 
 class MetricTransformer(metaclass=ABCMeta):
-
+  """
+  Base class for all learners that can transform data into a new space
+  with the metric learned.
+  """
   @abstractmethod
   def transform(self, X):
     """Applies the metric transformation.
@@ -182,13 +271,92 @@ class MahalanobisMixin(BaseMetricLearner, MetricTransformer,
   """
 
   def score_pairs(self, pairs):
-    r"""Returns the learned Mahalanobis distance between pairs.
+    r"""
+    Returns the learned Mahalanobis distance between pairs.
+
+    This distance is defined as: :math:`d_M(x, x') = \\sqrt{(x-x')^T M (x-x')}`
+    where ``M`` is the learned Mahalanobis matrix, for every pair of points
+    ``x`` and ``x'``. This corresponds to the euclidean distance between
+    embeddings of the points in a new space, obtained through a linear
+    transformation. Indeed, we have also: :math:`d_M(x, x') = \\sqrt{(x_e -
+    x_e')^T (x_e- x_e')}`, with :math:`x_e = L x` (See
+    :class:`MahalanobisMixin`).
+
+    .. deprecated:: 0.7.0
+        Please use `pair_distance` instead.
+
+    .. warning::
+        This method will be removed in 0.8.0. Please refer to `pair_distance`
+        or `pair_score`. This change will occur in order to add learners
+        that don't necessarily learn a Mahalanobis distance.
+
+    Parameters
+    ----------
+    pairs : array-like, shape=(n_pairs, 2, n_features) or (n_pairs, 2)
+      3D Array of pairs to score, with each row corresponding to two points,
+      for 2D array of indices of pairs if the metric learner uses a
+      preprocessor.
+
+    Returns
+    -------
+    scores : `numpy.ndarray` of shape=(n_pairs,)
+      The learned Mahalanobis distance for every pair.
 
-    This distance is defined as: :math:`d_M(x, x') = \sqrt{(x-x')^T M (x-x')}`
+    See Also
+    --------
+    get_metric : a method that returns a function to compute the metric between
+      two points. The difference with `score_pairs` is that it works on two
+      1D arrays and cannot use a preprocessor. Besides, the returned function
+      is independent of the metric learner and hence is  not modified if the
+      metric learner is.
+
+    :ref:`mahalanobis_distances` : The section of the project documentation
+      that describes Mahalanobis Distances.
+    """
+    dpr_msg = ("score_pairs will be deprecated in release 0.7.0. "
+               "Use pair_score to compute similarity scores, or "
+               "pair_distances to compute distances.")
+    warnings.warn(dpr_msg, category=FutureWarning)
+    return self.pair_distance(pairs)
+
+  def pair_score(self, pairs):
+    """
+    Returns the opposite of the learned Mahalanobis distance between pairs.
+
+    Parameters
+    ----------
+    pairs : array-like, shape=(n_pairs, 2, n_features) or (n_pairs, 2)
+      3D Array of pairs to score, with each row corresponding to two points,
+      for 2D array of indices of pairs if the metric learner uses a
+      preprocessor.
+
+    Returns
+    -------
+    scores : `numpy.ndarray` of shape=(n_pairs,)
+      The opposite of the learned Mahalanobis distance for every pair.
+
+    See Also
+    --------
+    get_metric : a method that returns a function to compute the metric between
+      two points. The difference with `pair_score` is that it works on two
+      1D arrays and cannot use a preprocessor. Besides, the returned function
+      is independent of the metric learner and hence is not modified if the
+      metric learner is.
+
+    :ref:`mahalanobis_distances` : The section of the project documentation
+      that describes Mahalanobis Distances.
+    """
+    return -1 * self.pair_distance(pairs)
+
+  def pair_distance(self, pairs):
+    """
+    Returns the learned Mahalanobis distance between pairs.
+
+    This distance is defined as: :math:`d_M(x, x') = \\sqrt{(x-x')^T M (x-x')}`
     where ``M`` is the learned Mahalanobis matrix, for every pair of points
     ``x`` and ``x'``. This corresponds to the euclidean distance between
     embeddings of the points in a new space, obtained through a linear
-    transformation. Indeed, we have also: :math:`d_M(x, x') = \sqrt{(x_e -
+    transformation. Indeed, we have also: :math:`d_M(x, x') = \\sqrt{(x_e -
     x_e')^T (x_e- x_e')}`, with :math:`x_e = L x` (See
     :class:`MahalanobisMixin`).
 
@@ -207,10 +375,10 @@ def score_pairs(self, pairs):
     See Also
     --------
     get_metric : a method that returns a function to compute the metric between
-      two points. The difference with `score_pairs` is that it works on two 1D
-      arrays and cannot use a preprocessor. Besides, the returned function is
-      independent of the metric learner and hence is not modified if the metric
-      learner is.
+      two points. The difference with `pair_distance` is that it works on two
+      1D arrays and cannot use a preprocessor. Besides, the returned function
+      is independent of the metric learner and hence is  not modified if the
+      metric learner is.
 
     :ref:`mahalanobis_distances` : The section of the project documentation
       that describes Mahalanobis Distances.
@@ -296,7 +464,7 @@ def get_mahalanobis_matrix(self):
     return self.components_.T.dot(self.components_)
 
 
-class _PairsClassifierMixin(BaseMetricLearner):
+class _PairsClassifierMixin(BaseMetricLearner, ClassifierMixin):
   """Base class for pairs learners.
 
   Attributes
@@ -307,6 +475,7 @@ class _PairsClassifierMixin(BaseMetricLearner):
     classified as dissimilar.
   """
 
+  classes_ = np.array([0, 1])
   _tuple_size = 2  # number of points in a tuple, 2 for pairs
 
   def predict(self, pairs):
@@ -361,7 +530,7 @@ def decision_function(self, pairs):
     pairs = check_input(pairs, type_of_inputs='tuples',
                         preprocessor=self.preprocessor_,
                         estimator=self, tuple_size=self._tuple_size)
-    return - self.score_pairs(pairs)
+    return self.pair_score(pairs)
 
   def score(self, pairs, y):
     """Computes score of pairs similarity prediction.
@@ -409,8 +578,14 @@ def set_threshold(self, threshold):
       The pairs classifier with the new threshold set.
     """
     check_is_fitted(self, 'preprocessor_')
-
-    self.threshold_ = threshold
+    try:
+      self.threshold_ = float(threshold)
+    except TypeError:
+      raise ValueError('Parameter threshold must be a real number. '
+                       'Got {} instead.'.format(type(threshold)))
+    except ValueError:
+      raise ValueError('Parameter threshold must be a real number. '
+                       'Got {} instead.'.format(type(threshold)))
     return self
 
   def calibrate_threshold(self, pairs_valid, y_valid, strategy='accuracy',
@@ -466,7 +641,7 @@ def calibrate_threshold(self, pairs_valid, y_valid, strategy='accuracy',
            evaluation tool in clinical medicine, MH Zweig, G Campbell -
            Clinical chemistry, 1993
 
-    .. [2] most of the code of this function is from scikit-learn's PR #10117
+    .. [2] Most of the code of this function is from scikit-learn's PR #10117
 
     See Also
     --------
@@ -578,10 +753,12 @@ def _validate_calibration_params(strategy='accuracy', min_rate=None,
                          'Got {} instead.'.format(type(beta)))
 
 
-class _TripletsClassifierMixin(BaseMetricLearner):
-  """Base class for triplets learners.
+class _TripletsClassifierMixin(BaseMetricLearner, ClassifierMixin):
+  """
+  Base class for triplets learners.
   """
 
+  classes_ = np.array([0, 1])
   _tuple_size = 3  # number of points in a tuple, 3 for triplets
 
   def predict(self, triplets):
@@ -602,7 +779,7 @@ def predict(self, triplets):
     prediction : `numpy.ndarray` of floats, shape=(n_constraints,)
       Predictions of the ordering of pairs, for each triplet.
     """
-    return np.sign(self.decision_function(triplets))
+    return 2 * (self.decision_function(triplets) > 0) - 1
 
   def decision_function(self, triplets):
     """Predicts differences between sample distances in input triplets.
@@ -631,8 +808,8 @@ def decision_function(self, triplets):
     triplets = check_input(triplets, type_of_inputs='tuples',
                            preprocessor=self.preprocessor_,
                            estimator=self, tuple_size=self._tuple_size)
-    return (self.score_pairs(triplets[:, [0, 2]]) -
-            self.score_pairs(triplets[:, :2]))
+    return (self.pair_score(triplets[:, :2]) -
+            self.pair_score(triplets[:, [0, 2]]))
 
   def score(self, triplets):
     """Computes score on input triplets.
@@ -662,10 +839,12 @@ def score(self, triplets):
     return self.predict(triplets).mean() / 2 + 0.5
 
 
-class _QuadrupletsClassifierMixin(BaseMetricLearner):
-  """Base class for quadruplets learners.
+class _QuadrupletsClassifierMixin(BaseMetricLearner, ClassifierMixin):
+  """
+  Base class for quadruplets learners.
   """
 
+  classes_ = np.array([0, 1])
   _tuple_size = 4  # number of points in a tuple, 4 for quadruplets
 
   def predict(self, quadruplets):
@@ -716,8 +895,8 @@ def decision_function(self, quadruplets):
     quadruplets = check_input(quadruplets, type_of_inputs='tuples',
                               preprocessor=self.preprocessor_,
                               estimator=self, tuple_size=self._tuple_size)
-    return (self.score_pairs(quadruplets[:, 2:]) -
-            self.score_pairs(quadruplets[:, :2]))
+    return (self.pair_score(quadruplets[:, :2]) -
+            self.pair_score(quadruplets[:, 2:]))
 
   def score(self, quadruplets):
     """Computes score on input quadruplets
diff --git a/metric_learn/constraints.py b/metric_learn/constraints.py
index 2d86b819..4993e9ef 100644
--- a/metric_learn/constraints.py
+++ b/metric_learn/constraints.py
@@ -7,6 +7,7 @@
 from sklearn.utils import check_random_state
 from sklearn.neighbors import NearestNeighbors
 
+
 __all__ = ['Constraints']
 
 
@@ -31,21 +32,21 @@ def __init__(self, partial_labels):
     partial_labels = np.asanyarray(partial_labels, dtype=int)
     self.partial_labels = partial_labels
 
-  def positive_negative_pairs(self, num_constraints, same_length=False,
-                              random_state=None):
+  def positive_negative_pairs(self, n_constraints, same_length=False,
+                              random_state=None, num_constraints='deprecated'):
     """
     Generates positive pairs and negative pairs from labeled data.
 
-    Positive pairs are formed by randomly drawing ``num_constraints`` pairs of
+    Positive pairs are formed by randomly drawing ``n_constraints`` pairs of
     points with the same label. Negative pairs are formed by randomly drawing
-    ``num_constraints`` pairs of points with different label.
+    ``n_constraints`` pairs of points with different label.
 
     In the case where it is not possible to generate enough positive or
     negative pairs, a smaller number of pairs will be returned with a warning.
 
     Parameters
     ----------
-    num_constraints : int
+    n_constraints : int
       Number of positive and negative constraints to generate.
 
     same_length : bool, optional (default=False)
@@ -55,6 +56,8 @@ def positive_negative_pairs(self, num_constraints, same_length=False,
     random_state : int or numpy.RandomState or None, optional (default=None)
       A pseudo random number generator object or a seed for it if int.
 
+    num_constraints : Renamed to n_constraints. Will be deprecated in 0.7.0
+
     Returns
     -------
     a : array-like, shape=(n_constraints,)
@@ -69,10 +72,18 @@ def positive_negative_pairs(self, num_constraints, same_length=False,
     d : array-like, shape=(n_constraints,)
       1D array of indicators for the right elements of negative pairs.
     """
+    if num_constraints != 'deprecated':
+      warnings.warn('"num_constraints" parameter has been renamed to'
+                    ' "n_constraints". It has been deprecated in'
+                    ' version 0.6.3 and will be removed in 0.7.0'
+                    '', FutureWarning)
+      self.n_constraints = num_constraints
+    else:
+      self.n_constraints = n_constraints
     random_state = check_random_state(random_state)
-    a, b = self._pairs(num_constraints, same_label=True,
+    a, b = self._pairs(n_constraints, same_label=True,
                        random_state=random_state)
-    c, d = self._pairs(num_constraints, same_label=False,
+    c, d = self._pairs(n_constraints, same_label=False,
                        random_state=random_state)
     if same_length and len(a) != len(c):
       n = min(len(a), len(c))
@@ -95,12 +106,14 @@ def generate_knntriplets(self, X, k_genuine, k_impostor):
 
     Parameters
     ----------
-      X : (n x d) matrix
-        Input data, where each row corresponds to a single instance.
-      k_genuine : int
-        Number of neighbors of the same class to be taken into account.
-      k_impostor : int
-        Number of neighbors of different classes to be taken into account.
+    X : (n x d) matrix
+      Input data, where each row corresponds to a single instance.
+
+    k_genuine : int
+      Number of neighbors of the same class to be taken into account.
+
+    k_impostor : int
+      Number of neighbors of different classes to be taken into account.
 
     Returns
     -------
@@ -188,15 +201,15 @@ def generate_knntriplets(self, X, k_genuine, k_impostor):
 
     return triplets
 
-  def _pairs(self, num_constraints, same_label=True, max_iter=10,
+  def _pairs(self, n_constraints, same_label=True, max_iter=10,
              random_state=np.random):
     known_label_idx, = np.where(self.partial_labels >= 0)
     known_labels = self.partial_labels[known_label_idx]
     num_labels = len(known_labels)
     ab = set()
     it = 0
-    while it < max_iter and len(ab) < num_constraints:
-      nc = num_constraints - len(ab)
+    while it < max_iter and len(ab) < n_constraints:
+      nc = n_constraints - len(ab)
       for aidx in random_state.randint(num_labels, size=nc):
         if same_label:
           mask = known_labels[aidx] == known_labels
@@ -207,25 +220,26 @@ def _pairs(self, num_constraints, same_label=True, max_iter=10,
         if len(b_choices) > 0:
           ab.add((aidx, random_state.choice(b_choices)))
       it += 1
-    if len(ab) < num_constraints:
+    if len(ab) < n_constraints:
       warnings.warn("Only generated %d %s constraints (requested %d)" % (
-          len(ab), 'positive' if same_label else 'negative', num_constraints))
-    ab = np.array(list(ab)[:num_constraints], dtype=int)
+          len(ab), 'positive' if same_label else 'negative', n_constraints))
+    ab = np.array(list(ab)[:n_constraints], dtype=int)
     return known_label_idx[ab.T]
 
-  def chunks(self, num_chunks=100, chunk_size=2, random_state=None):
+  def chunks(self, n_chunks=100, chunk_size=2, random_state=None,
+             num_chunks='deprecated'):
     """
     Generates chunks from labeled data.
 
-    Each of ``num_chunks`` chunks is composed of ``chunk_size`` points from
+    Each of ``n_chunks`` chunks is composed of ``chunk_size`` points from
     the same class drawn at random. Each point can belong to at most 1 chunk.
 
-    In the case where there is not enough points to generate ``num_chunks``
+    In the case where there is not enough points to generate ``n_chunks``
     chunks of size ``chunk_size``, a ValueError will be raised.
 
     Parameters
     ----------
-    num_chunks : int, optional (default=100)
+    n_chunks : int, optional (default=100)
       Number of chunks to generate.
 
     chunk_size : int, optional (default=2)
@@ -234,12 +248,20 @@ def chunks(self, num_chunks=100, chunk_size=2, random_state=None):
     random_state : int or numpy.RandomState or None, optional (default=None)
       A pseudo random number generator object or a seed for it if int.
 
+    num_chunks : Renamed to n_chunks. Will be deprecated in 0.7.0
+
     Returns
     -------
     chunks : array-like, shape=(n_samples,)
       1D array of chunk indicators, where -1 indicates that the point does not
       belong to any chunk.
     """
+    if num_chunks != 'deprecated':
+      warnings.warn('"num_chunks" parameter has been renamed to'
+                    ' "n_chunks". It has been deprecated in'
+                    ' version 0.6.3 and will be removed in 0.7.0'
+                    '', FutureWarning)
+      n_chunks = num_chunks
     random_state = check_random_state(random_state)
     chunks = -np.ones_like(self.partial_labels, dtype=int)
     uniq, lookup = np.unique(self.partial_labels, return_inverse=True)
@@ -247,13 +269,13 @@ def chunks(self, num_chunks=100, chunk_size=2, random_state=None):
     all_inds = [set(np.where(lookup == c)[0]) for c in range(len(uniq))
                 if c not in unknown_uniq]
     max_chunks = int(np.sum([len(s) // chunk_size for s in all_inds]))
-    if max_chunks < num_chunks:
+    if max_chunks < n_chunks:
       raise ValueError(('Not enough possible chunks of %d elements in each'
                         ' class to form expected %d chunks - maximum number'
                         ' of chunks is %d'
-                        ) % (chunk_size, num_chunks, max_chunks))
+                        ) % (chunk_size, n_chunks, max_chunks))
     idx = 0
-    while idx < num_chunks and all_inds:
+    while idx < n_chunks and all_inds:
       if len(all_inds) == 1:
         c = 0
       else:
diff --git a/metric_learn/covariance.py b/metric_learn/covariance.py
index 3b218e6d..2c05b28d 100644
--- a/metric_learn/covariance.py
+++ b/metric_learn/covariance.py
@@ -42,6 +42,10 @@ def __init__(self, preprocessor=None):
 
   def fit(self, X, y=None):
     """
+    Calculates the covariance matrix of the input data.
+
+    Parameters
+    ----------
     X : data matrix, (n x d)
     y : unused
     """
diff --git a/metric_learn/itml.py b/metric_learn/itml.py
index 43872b60..9537eec2 100644
--- a/metric_learn/itml.py
+++ b/metric_learn/itml.py
@@ -9,6 +9,7 @@
 from .base_metric import _PairsClassifierMixin, MahalanobisMixin
 from .constraints import Constraints, wrap_pairs
 from ._util import components_from_metric, _initialize_metric_mahalanobis
+import warnings
 
 
 class _BaseITML(MahalanobisMixin):
@@ -16,12 +17,20 @@ class _BaseITML(MahalanobisMixin):
 
   _tuple_size = 2  # constraints are pairs
 
-  def __init__(self, gamma=1., max_iter=1000, convergence_threshold=1e-3,
+  def __init__(self, gamma=1., max_iter=1000, tol=1e-3,
                prior='identity', verbose=False,
-               preprocessor=None, random_state=None):
+               preprocessor=None, random_state=None,
+               convergence_threshold='deprecated'):
+    if convergence_threshold != 'deprecated':
+      warnings.warn('"convergence_threshold" parameter has been '
+                    ' renamed to "tol". It has been deprecated in'
+                    ' version 0.6.3 and will be removed in 0.7.0'
+                    '', FutureWarning)
+      tol = convergence_threshold
+    self.convergence_threshold = 'deprecated'  # Avoid errors
     self.gamma = gamma
     self.max_iter = max_iter
-    self.convergence_threshold = convergence_threshold
+    self.tol = tol
     self.prior = prior
     self.verbose = verbose
     self.random_state = random_state
@@ -32,7 +41,7 @@ def _fit(self, pairs, y, bounds=None):
                                     type_of_inputs='tuples')
     # init bounds
     if bounds is None:
-      X = np.vstack({tuple(row) for row in pairs.reshape(-1, pairs.shape[2])})
+      X = np.unique(np.vstack(pairs), axis=0)
       self.bounds_ = np.percentile(pairwise_distances(X), (5, 95))
     else:
       bounds = check_array(bounds, allow_nd=False, ensure_min_samples=0,
@@ -86,7 +95,7 @@ def _fit(self, pairs, y, bounds=None):
         conv = np.inf
         break
       conv = np.abs(lambdaold - _lambda).sum() / normsum
-      if conv < self.convergence_threshold:
+      if conv < self.tol:
         break
       lambdaold = _lambda.copy()
       if self.verbose:
@@ -122,7 +131,7 @@ class ITML(_BaseITML, _PairsClassifierMixin):
   max_iter : int, optional (default=1000)
     Maximum number of iteration of the optimization procedure.
 
-  convergence_threshold : float, optional (default=1e-3)
+  tol : float, optional (default=1e-3)
     Convergence tolerance.
 
   prior : string or numpy array, optional (default='identity')
@@ -158,6 +167,8 @@ class ITML(_BaseITML, _PairsClassifierMixin):
     A pseudo random number generator object or a seed for it if int. If
     ``prior='random'``, ``random_state`` is used to set the prior.
 
+  convergence_threshold : Renamed to tol. Will be deprecated in 0.7.0
+
   Attributes
   ----------
   bounds_ : `numpy.ndarray`, shape=(2,)
@@ -198,7 +209,7 @@ class ITML(_BaseITML, _PairsClassifierMixin):
   ----------
   .. [1] Jason V. Davis, et al. `Information-theoretic Metric Learning
          <http://www.prateekjain.org/publications/all_papers\
-          /DavisKJSD07_ICML.pdf>`_. ICML 2007.
+         /DavisKJSD07_ICML.pdf>`_. ICML 2007.
   """
 
   def fit(self, pairs, y, bounds=None, calibration_params=None):
@@ -260,10 +271,10 @@ class ITML_Supervised(_BaseITML, TransformerMixin):
   max_iter : int, optional (default=1000)
     Maximum number of iterations of the optimization procedure.
 
-  convergence_threshold : float, optional (default=1e-3)
+  tol : float, optional (default=1e-3)
     Tolerance of the optimization procedure.
 
-  num_constraints : int, optional (default=None)
+  n_constraints : int, optional (default=None)
     Number of constraints to generate. If None, default to `20 *
     num_classes**2`.
 
@@ -302,6 +313,9 @@ class ITML_Supervised(_BaseITML, TransformerMixin):
     case, `random_state` is also used to randomly sample constraints from
     labels.
 
+  num_constraints : Renamed to n_constraints. Will be deprecated in 0.7.0
+
+  convergence_threshold : Renamed to tol. Will be deprecated in 0.7.0
 
   Attributes
   ----------
@@ -328,7 +342,7 @@ class ITML_Supervised(_BaseITML, TransformerMixin):
   >>> iris_data = load_iris()
   >>> X = iris_data['data']
   >>> Y = iris_data['target']
-  >>> itml = ITML_Supervised(num_constraints=200)
+  >>> itml = ITML_Supervised(n_constraints=200)
   >>> itml.fit(X, Y)
 
   See Also
@@ -338,14 +352,26 @@ class ITML_Supervised(_BaseITML, TransformerMixin):
     that describes the supervised version of weakly supervised estimators.
   """
 
-  def __init__(self, gamma=1.0, max_iter=1000, convergence_threshold=1e-3,
-               num_constraints=None, prior='identity',
-               verbose=False, preprocessor=None, random_state=None):
+  def __init__(self, gamma=1.0, max_iter=1000, tol=1e-3,
+               n_constraints=None, prior='identity',
+               verbose=False, preprocessor=None, random_state=None,
+               num_constraints='deprecated',
+               convergence_threshold='deprecated'):
     _BaseITML.__init__(self, gamma=gamma, max_iter=max_iter,
-                       convergence_threshold=convergence_threshold,
+                       tol=tol,
                        prior=prior, verbose=verbose,
-                       preprocessor=preprocessor, random_state=random_state)
-    self.num_constraints = num_constraints
+                       preprocessor=preprocessor,
+                       random_state=random_state,
+                       convergence_threshold=convergence_threshold)
+    if num_constraints != 'deprecated':
+      warnings.warn('"num_constraints" parameter has been renamed to'
+                    ' "n_constraints". It has been deprecated in'
+                    ' version 0.6.3 and will be removed in 0.7.0'
+                    '', FutureWarning)
+      n_constraints = num_constraints
+    self.n_constraints = n_constraints
+    # Avoid test get_params from failing (all params passed sholud be set)
+    self.num_constraints = 'deprecated'
 
   def fit(self, X, y, bounds=None):
     """Create constraints from labels and learn the ITML model.
@@ -369,13 +395,13 @@ def fit(self, X, y, bounds=None):
       points in the training data `X`.
     """
     X, y = self._prepare_inputs(X, y, ensure_min_samples=2)
-    num_constraints = self.num_constraints
-    if num_constraints is None:
+    n_constraints = self.n_constraints
+    if n_constraints is None:
       num_classes = len(np.unique(y))
-      num_constraints = 20 * num_classes**2
+      n_constraints = 20 * num_classes**2
 
     c = Constraints(y)
-    pos_neg = c.positive_negative_pairs(num_constraints,
+    pos_neg = c.positive_negative_pairs(n_constraints,
                                         random_state=self.random_state)
     pairs, y = wrap_pairs(X, pos_neg)
     return _BaseITML._fit(self, pairs, y, bounds=bounds)
diff --git a/metric_learn/lfda.py b/metric_learn/lfda.py
index bfa3275e..82ae20eb 100644
--- a/metric_learn/lfda.py
+++ b/metric_learn/lfda.py
@@ -65,7 +65,7 @@ class LFDA(MahalanobisMixin, TransformerMixin):
   >>> lfda.fit(X, Y)
 
   References
-  ------------------
+  ----------
   .. [1] Masashi Sugiyama. `Dimensionality Reduction of Multimodal Labeled
          Data by Local Fisher Discriminant Analysis
          <http://www.ms.k.u-tokyo.ac.jp/2007/LFDA.pdf>`_. JMLR 2007.
diff --git a/metric_learn/lmnn.py b/metric_learn/lmnn.py
index 8bdc4bf0..47bb065f 100644
--- a/metric_learn/lmnn.py
+++ b/metric_learn/lmnn.py
@@ -5,6 +5,7 @@
 from collections import Counter
 from sklearn.metrics import euclidean_distances
 from sklearn.base import TransformerMixin
+import warnings
 
 from ._util import _initialize_components, _check_n_components
 from .base_metric import MahalanobisMixin
@@ -63,7 +64,7 @@ class LMNN(MahalanobisMixin, TransformerMixin):
       :meth:`fit` and n_features_a must be less than or equal to that.
       If ``n_components`` is not None, n_features_a must match it.
 
-  k : int, optional (default=3)
+  n_neighbors : int, optional (default=3)
     Number of neighbors to consider, not including self-edges.
 
   min_iter : int, optional (default=50)
@@ -99,6 +100,8 @@ class LMNN(MahalanobisMixin, TransformerMixin):
     transformation. If ``init='pca'``, ``random_state`` is passed as an
     argument to PCA when initializing the transformation.
 
+  k : Renamed to n_neighbors. Will be deprecated in 0.7.0
+
   Attributes
   ----------
   n_iter_ : `int`
@@ -116,7 +119,7 @@ class LMNN(MahalanobisMixin, TransformerMixin):
   >>> iris_data = load_iris()
   >>> X = iris_data['data']
   >>> Y = iris_data['target']
-  >>> lmnn = LMNN(k=5, learn_rate=1e-6)
+  >>> lmnn = LMNN(n_neighbors=5, learn_rate=1e-6)
   >>> lmnn.fit(X, Y, verbose=False)
 
   References
@@ -128,12 +131,19 @@ class LMNN(MahalanobisMixin, TransformerMixin):
          2005.
   """
 
-  def __init__(self, init='auto', k=3, min_iter=50, max_iter=1000,
+  def __init__(self, init='auto', n_neighbors=3, min_iter=50, max_iter=1000,
                learn_rate=1e-7, regularization=0.5, convergence_tol=0.001,
                verbose=False, preprocessor=None,
-               n_components=None, random_state=None):
+               n_components=None, random_state=None, k='deprecated'):
     self.init = init
-    self.k = k
+    if k != 'deprecated':
+      warnings.warn('"num_chunks" parameter has been renamed to'
+                    ' "n_chunks". It has been deprecated in'
+                    ' version 0.6.3 and will be removed in 0.7.0'
+                    '', FutureWarning)
+      n_neighbors = k
+    self.k = 'deprecated'  # To avoid no_attribute error
+    self.n_neighbors = n_neighbors
     self.min_iter = min_iter
     self.max_iter = max_iter
     self.learn_rate = learn_rate
@@ -145,7 +155,7 @@ def __init__(self, init='auto', k=3, min_iter=50, max_iter=1000,
     super(LMNN, self).__init__(preprocessor)
 
   def fit(self, X, y):
-    k = self.k
+    k = self.n_neighbors
     reg = self.regularization
     learn_rate = self.learn_rate
 
@@ -162,7 +172,7 @@ def fit(self, X, y):
                                               self.verbose,
                                               random_state=self.random_state)
     required_k = np.bincount(label_inds).min()
-    if self.k > required_k:
+    if self.n_neighbors > required_k:
       raise ValueError('not enough class labels for specified k'
                        ' (smallest class has %d)' % required_k)
 
@@ -275,12 +285,12 @@ def _loss_grad(self, X, L, dfG, k, reg, target_neighbors, label_inds):
     return 2 * G, objective, total_active
 
   def _select_targets(self, X, label_inds):
-    target_neighbors = np.empty((X.shape[0], self.k), dtype=int)
+    target_neighbors = np.empty((X.shape[0], self.n_neighbors), dtype=int)
     for label in self.labels_:
       inds, = np.nonzero(label_inds == label)
       dd = euclidean_distances(X[inds], squared=True)
       np.fill_diagonal(dd, np.inf)
-      nn = np.argsort(dd)[..., :self.k]
+      nn = np.argsort(dd)[..., :self.n_neighbors]
       target_neighbors[inds] = inds[nn]
     return target_neighbors
 
diff --git a/metric_learn/lsml.py b/metric_learn/lsml.py
index 28f65ce7..af7fa95b 100644
--- a/metric_learn/lsml.py
+++ b/metric_learn/lsml.py
@@ -9,6 +9,7 @@
 from .base_metric import _QuadrupletsClassifierMixin, MahalanobisMixin
 from .constraints import Constraints
 from ._util import components_from_metric, _initialize_metric_mahalanobis
+import warnings
 
 
 class _BaseLSML(MahalanobisMixin):
@@ -261,11 +262,11 @@ class LSML_Supervised(_BaseLSML, TransformerMixin):
       (n_features, n_features), that will be used as such to set the
       prior.
 
-  num_constraints: int, optional (default=None)
+  n_constraints: int, optional (default=None)
     Number of constraints to generate. If None, default to `20 *
     num_classes**2`.
 
-  weights : (num_constraints,) array of floats, optional (default=None)
+  weights : (n_constraints,) array of floats, optional (default=None)
     Relative weight given to each constraint. If None, defaults to uniform
     weights.
 
@@ -282,6 +283,8 @@ class LSML_Supervised(_BaseLSML, TransformerMixin):
     prior. In any case, `random_state` is also used to randomly sample
     constraints from labels.
 
+  num_constraints : Renamed to n_constraints. Will be deprecated in 0.7.0
+
   Examples
   --------
   >>> from metric_learn import LSML_Supervised
@@ -289,7 +292,7 @@ class LSML_Supervised(_BaseLSML, TransformerMixin):
   >>> iris_data = load_iris()
   >>> X = iris_data['data']
   >>> Y = iris_data['target']
-  >>> lsml = LSML_Supervised(num_constraints=200)
+  >>> lsml = LSML_Supervised(n_constraints=200)
   >>> lsml.fit(X, Y)
 
   Attributes
@@ -303,12 +306,22 @@ class LSML_Supervised(_BaseLSML, TransformerMixin):
   """
 
   def __init__(self, tol=1e-3, max_iter=1000, prior='identity',
-               num_constraints=None, weights=None,
-               verbose=False, preprocessor=None, random_state=None):
+               n_constraints=None, weights=None,
+               verbose=False, preprocessor=None, random_state=None,
+               num_constraints='deprecated'):
     _BaseLSML.__init__(self, tol=tol, max_iter=max_iter, prior=prior,
                        verbose=verbose, preprocessor=preprocessor,
                        random_state=random_state)
-    self.num_constraints = num_constraints
+    if num_constraints != 'deprecated':
+      warnings.warn('"num_constraints" parameter has been renamed to'
+                    ' "n_constraints". It has been deprecated in'
+                    ' version 0.6.3 and will be removed in 0.7.0'
+                    '', FutureWarning)
+      self.n_constraints = num_constraints
+    else:
+      self.n_constraints = n_constraints
+    # Avoid test get_params from failing (all params passed sholud be set)
+    self.num_constraints = 'deprecated'
     self.weights = weights
 
   def fit(self, X, y):
@@ -323,13 +336,13 @@ def fit(self, X, y):
       Data labels.
     """
     X, y = self._prepare_inputs(X, y, ensure_min_samples=2)
-    num_constraints = self.num_constraints
-    if num_constraints is None:
+    n_constraints = self.n_constraints
+    if n_constraints is None:
       num_classes = len(np.unique(y))
-      num_constraints = 20 * num_classes**2
+      n_constraints = 20 * num_classes**2
 
     c = Constraints(y)
-    pos_neg = c.positive_negative_pairs(num_constraints, same_length=True,
+    pos_neg = c.positive_negative_pairs(n_constraints, same_length=True,
                                         random_state=self.random_state)
     return _BaseLSML._fit(self, X[np.column_stack(pos_neg)],
                           weights=self.weights)
diff --git a/metric_learn/mmc.py b/metric_learn/mmc.py
index 1ff30b1e..5cf166fd 100644
--- a/metric_learn/mmc.py
+++ b/metric_learn/mmc.py
@@ -6,19 +6,28 @@
 from .base_metric import _PairsClassifierMixin, MahalanobisMixin
 from .constraints import Constraints, wrap_pairs
 from ._util import components_from_metric, _initialize_metric_mahalanobis
+import warnings
 
 
 class _BaseMMC(MahalanobisMixin):
 
   _tuple_size = 2  # constraints are pairs
 
-  def __init__(self, max_iter=100, max_proj=10000, convergence_threshold=1e-3,
+  def __init__(self, max_iter=100, max_proj=10000, tol=1e-3,
                init='identity', diagonal=False,
                diagonal_c=1.0, verbose=False, preprocessor=None,
-               random_state=None):
+               random_state=None,
+               convergence_threshold='deprecated'):
+    if convergence_threshold != 'deprecated':
+      warnings.warn('"convergence_threshold" parameter has been '
+                    ' renamed to "tol". It has been deprecated in'
+                    ' version 0.6.3 and will be removed in 0.7.0'
+                    '', FutureWarning)
+      tol = convergence_threshold
+    self.convergence_threshold = 'deprecated'  # Avoid errors
     self.max_iter = max_iter
     self.max_proj = max_proj
-    self.convergence_threshold = convergence_threshold
+    self.tol = tol
     self.init = init
     self.diagonal = diagonal
     self.diagonal_c = diagonal_c
@@ -145,13 +154,13 @@ def _fit_full(self, pairs, y):
         A[:] = A_old + alpha * M
 
       delta = np.linalg.norm(alpha * M) / np.linalg.norm(A_old)
-      if delta < self.convergence_threshold:
+      if delta < self.tol:
         break
       if self.verbose:
         print('mmc iter: %d, conv = %f, projections = %d' %
               (cycle, delta, it + 1))
 
-    if delta > self.convergence_threshold:
+    if delta > self.tol:
       self.converged_ = False
       if self.verbose:
         print('mmc did not converge, conv = %f' % (delta,))
@@ -185,7 +194,7 @@ def _fit_diag(self, pairs, y):
     reduction = 2.0
     w = np.diag(self.A_).copy()
 
-    while error > self.convergence_threshold and it < self.max_iter:
+    while error > self.tol and it < self.max_iter:
 
       fD0, fD_1st_d, fD_2nd_d = self._D_constraint(neg_pairs, w)
       obj_initial = np.dot(s_sum, w) + self.diagonal_c * fD0
@@ -332,7 +341,7 @@ class MMC(_BaseMMC, _PairsClassifierMixin):
   max_proj : int, optional (default=10000)
     Maximum number of projection steps.
 
-  convergence_threshold : float, optional (default=1e-3)
+  tol : float, optional (default=1e-3)
     Convergence threshold for the optimization procedure.
 
   init : string or numpy array, optional (default='identity')
@@ -377,6 +386,8 @@ class MMC(_BaseMMC, _PairsClassifierMixin):
     ``init='random'``, ``random_state`` is used to initialize the random
     transformation.
 
+  convergence_threshold : Renamed to tol. Will be deprecated in 0.7.0
+
   Attributes
   ----------
   n_iter_ : `int`
@@ -469,10 +480,10 @@ class MMC_Supervised(_BaseMMC, TransformerMixin):
   max_proj : int, optional (default=10000)
     Maximum number of projection steps.
 
-  convergence_threshold : float, optional (default=1e-3)
+  tol : float, optional (default=1e-3)
     Convergence threshold for the optimization procedure.
 
-  num_constraints: int, optional (default=None)
+  n_constraints: int, optional (default=None)
     Number of constraints to generate. If None, default to `20 *
     num_classes**2`.
 
@@ -518,6 +529,10 @@ class MMC_Supervised(_BaseMMC, TransformerMixin):
     Mahalanobis matrix.  In any case, `random_state` is also used to
     randomly sample constraints from labels.
 
+  num_constraints : Renamed to n_constraints. Will be deprecated in 0.7.0
+
+  convergence_threshold : Renamed to tol. Will be deprecated in 0.7.0
+
   Examples
   --------
   >>> from metric_learn import MMC_Supervised
@@ -525,7 +540,7 @@ class MMC_Supervised(_BaseMMC, TransformerMixin):
   >>> iris_data = load_iris()
   >>> X = iris_data['data']
   >>> Y = iris_data['target']
-  >>> mmc = MMC_Supervised(num_constraints=200)
+  >>> mmc = MMC_Supervised(n_constraints=200)
   >>> mmc.fit(X, Y)
 
   Attributes
@@ -538,16 +553,29 @@ class MMC_Supervised(_BaseMMC, TransformerMixin):
     metric (See function `components_from_metric`.)
   """
 
-  def __init__(self, max_iter=100, max_proj=10000, convergence_threshold=1e-6,
-               num_constraints=None, init='identity',
+  def __init__(self, max_iter=100, max_proj=10000, tol=1e-6,
+               n_constraints=None, init='identity',
                diagonal=False, diagonal_c=1.0, verbose=False,
-               preprocessor=None, random_state=None):
+               preprocessor=None, random_state=None,
+               num_constraints='deprecated',
+               convergence_threshold='deprecated'):
     _BaseMMC.__init__(self, max_iter=max_iter, max_proj=max_proj,
-                      convergence_threshold=convergence_threshold,
+                      tol=tol,
                       init=init, diagonal=diagonal,
                       diagonal_c=diagonal_c, verbose=verbose,
-                      preprocessor=preprocessor, random_state=random_state)
-    self.num_constraints = num_constraints
+                      preprocessor=preprocessor,
+                      random_state=random_state,
+                      convergence_threshold=convergence_threshold)
+    if num_constraints != 'deprecated':
+      warnings.warn('"num_constraints" parameter has been renamed to'
+                    ' "n_constraints". It has been deprecated in'
+                    ' version 0.6.3 and will be removed in 0.7.0'
+                    '', FutureWarning)
+      self.n_constraints = num_constraints
+    else:
+      self.n_constraints = n_constraints
+    # Avoid test get_params from failing (all params passed sholud be set)
+    self.num_constraints = 'deprecated'
 
   def fit(self, X, y):
     """Create constraints from labels and learn the MMC model.
@@ -561,13 +589,13 @@ def fit(self, X, y):
       Data labels.
     """
     X, y = self._prepare_inputs(X, y, ensure_min_samples=2)
-    num_constraints = self.num_constraints
-    if num_constraints is None:
+    n_constraints = self.n_constraints
+    if n_constraints is None:
       num_classes = len(np.unique(y))
-      num_constraints = 20 * num_classes**2
+      n_constraints = 20 * num_classes**2
 
     c = Constraints(y)
-    pos_neg = c.positive_negative_pairs(num_constraints,
+    pos_neg = c.positive_negative_pairs(n_constraints,
                                         random_state=self.random_state)
     pairs, y = wrap_pairs(X, pos_neg)
     return _BaseMMC._fit(self, pairs, y)
diff --git a/metric_learn/rca.py b/metric_learn/rca.py
index 34f7f3ff..253b9c92 100644
--- a/metric_learn/rca.py
+++ b/metric_learn/rca.py
@@ -13,13 +13,13 @@
 
 # mean center each chunklet separately
 def _chunk_mean_centering(data, chunks):
-  num_chunks = chunks.max() + 1
+  n_chunks = chunks.max() + 1
   chunk_mask = chunks != -1
   # We need to ensure the data is float so that we can substract the
   # mean on it
   chunk_data = data[chunk_mask].astype(float, copy=False)
   chunk_labels = chunks[chunk_mask]
-  for c in range(num_chunks):
+  for c in range(n_chunks):
     mask = chunk_labels == c
     chunk_data[mask] -= chunk_data[mask].mean(axis=0)
 
@@ -58,7 +58,7 @@ class RCA(MahalanobisMixin, TransformerMixin):
   >>> rca.fit(X, chunks)
 
   References
-  ------------------
+  ----------
   .. [1] Noam Shental, et al. `Adjustment learning and relevant component
          analysis <http://citeseerx.ist.\
          psu.edu/viewdoc/download?doi=10.1.1.19.2871&rep=rep1&type=pdf>`_ .
@@ -112,7 +112,7 @@ def fit(self, X, chunks):
     # Fisher Linear Discriminant projection
     if dim < X.shape[1]:
       total_cov = np.cov(X[chunk_mask], rowvar=0)
-      tmp = np.linalg.lstsq(total_cov, inner_cov)[0]
+      tmp = np.linalg.lstsq(total_cov, inner_cov, rcond=None)[0]
       vals, vecs = np.linalg.eig(tmp)
       inds = np.argsort(vals)[:dim]
       A = vecs[:, inds]
@@ -135,14 +135,14 @@ class RCA_Supervised(RCA):
 
   `RCA_Supervised` creates chunks of similar points by first sampling a
   class, taking `chunk_size` elements in it, and repeating the process
-  `num_chunks` times.
+  `n_chunks` times.
 
   Parameters
   ----------
   n_components : int or None, optional (default=None)
     Dimensionality of reduced space (if None, defaults to dimension of X).
 
-  num_chunks: int, optional (default=100)
+  n_chunks: int, optional (default=100)
     Number of chunks to generate.
 
   chunk_size: int, optional (default=2)
@@ -156,6 +156,8 @@ class RCA_Supervised(RCA):
     A pseudo random number generator object or a seed for it if int.
     It is used to randomly sample constraints from labels.
 
+  num_chunks : Renamed to n_chunks. Will be deprecated in 0.7.0
+
   Examples
   --------
   >>> from metric_learn import RCA_Supervised
@@ -163,7 +165,7 @@ class RCA_Supervised(RCA):
   >>> iris_data = load_iris()
   >>> X = iris_data['data']
   >>> Y = iris_data['target']
-  >>> rca = RCA_Supervised(num_chunks=30, chunk_size=2)
+  >>> rca = RCA_Supervised(n_chunks=30, chunk_size=2)
   >>> rca.fit(X, Y)
 
   Attributes
@@ -172,17 +174,25 @@ class RCA_Supervised(RCA):
     The learned linear transformation ``L``.
   """
 
-  def __init__(self, n_components=None, num_chunks=100, chunk_size=2,
-               preprocessor=None, random_state=None):
+  def __init__(self, n_components=None, n_chunks=100, chunk_size=2,
+               preprocessor=None, random_state=None,
+               num_chunks='deprecated'):
     """Initialize the supervised version of `RCA`."""
     RCA.__init__(self, n_components=n_components, preprocessor=preprocessor)
-    self.num_chunks = num_chunks
+    if num_chunks != 'deprecated':
+      warnings.warn('"num_chunks" parameter has been renamed to'
+                    ' "n_chunks". It has been deprecated in'
+                    ' version 0.6.3 and will be removed in 0.7.0'
+                    '', FutureWarning)
+      n_chunks = num_chunks
+    self.num_chunks = 'deprecated'  # To avoid no_attribute error
+    self.n_chunks = n_chunks
     self.chunk_size = chunk_size
     self.random_state = random_state
 
   def fit(self, X, y):
     """Create constraints from labels and learn the RCA model.
-    Needs num_constraints specified in constructor.
+    Needs n_constraints specified in constructor. (Not true?)
 
     Parameters
     ----------
@@ -192,11 +202,11 @@ def fit(self, X, y):
     y : (n) data labels
     """
     X, y = self._prepare_inputs(X, y, ensure_min_samples=2)
-    chunks = Constraints(y).chunks(num_chunks=self.num_chunks,
+    chunks = Constraints(y).chunks(n_chunks=self.n_chunks,
                                    chunk_size=self.chunk_size,
                                    random_state=self.random_state)
 
-    if self.num_chunks * (self.chunk_size - 1) < X.shape[1]:
+    if self.n_chunks * (self.chunk_size - 1) < X.shape[1]:
       warnings.warn('Due to the parameters of RCA_Supervised, '
                     'the inner covariance matrix is not invertible, '
                     'so the transformation matrix will contain Nan values. '
diff --git a/metric_learn/scml.py b/metric_learn/scml.py
index c3fde272..fedf393d 100644
--- a/metric_learn/scml.py
+++ b/metric_learn/scml.py
@@ -53,7 +53,7 @@ def _fit(self, triplets, basis=None, n_basis=None):
       raise ValueError("batch_size should be an integer, instead it is of type"
                        " %s" % type(self.batch_size))
 
-    if(self.output_iter > self.max_iter):
+    if self.output_iter > self.max_iter:
       raise ValueError("The value of output_iter must be equal or smaller than"
                        " max_iter.")
 
@@ -240,6 +240,12 @@ def _generate_bases_dist_diff(self, triplets, X):
       raise ValueError("n_basis should be an integer, instead it is of type %s"
                        % type(self.n_basis))
 
+    if n_features > n_triplets:
+      raise ValueError(
+        "Number of features (%s) is greater than the number of triplets(%s).\n"
+        "Consider using dimensionality reduction or using another basis "
+        "generation scheme." % (n_features, n_triplets))
+
     basis = np.zeros((n_basis, n_features))
 
     # get all positive and negative pairs with lowest index first
@@ -260,11 +266,8 @@ def _generate_bases_dist_diff(self, triplets, X):
 
     start = 0
     finish = 0
-
-    while(finish != n_basis):
-
+    while finish != n_basis:
       # Select triplets to yield diff
-
       select_triplet = rng.choice(n_triplets, size=n_features, replace=False)
 
       # select n_features positive differences
@@ -322,9 +325,10 @@ class SCML(_BaseSCML, _TripletsClassifierMixin):
     'triplet_diffs', and an array-like of shape (n_basis, n_features).
 
     'triplet_diffs'
-      The basis set is constructed from the differences between points of
-      `n_basis` positive or negative pairs taken from the triplets
-      constrains.
+      The basis set is constructed iteratively from differences between points
+      of `n_features` positive or negative pairs randomly sampled from the
+      triplets constraints. Requires the number of training triplets to be
+      great or equal to `n_features`.
 
     array-like
         A matrix of shape (n_basis, n_features), that will be used as
@@ -338,7 +342,7 @@ class SCML(_BaseSCML, _TripletsClassifierMixin):
   gamma: float (default = 5e-3)
     Learning rate for the optimization algorithm.
 
-  max_iter : int (default = 100000)
+  max_iter : int (default = 10000)
     Number of iterations for the algorithm.
 
   output_iter : int (default = 5000)
@@ -377,8 +381,8 @@ class SCML(_BaseSCML, _TripletsClassifierMixin):
          <http://researchers.lille.inria.fr/abellet/papers/aaai14.pdf>`_. \
          (AAAI), 2014.
 
-  .. [2] Adapted from original \
-         `Matlab implementation.<https://github.com/bellet/SCML>`_.
+  .. [2] Adapted from original `Matlab implementation. \
+         <https://github.com/bellet/SCML>`_.
 
   See Also
   --------
@@ -473,13 +477,18 @@ class SCML_Supervised(_BaseSCML, TransformerMixin):
 
   Examples
   --------
-  >>> from metric_learn import SCML
-  >>> triplets = np.array([[[1.2, 3.2], [2.3, 5.5], [2.1, 0.6]],
-  >>>                      [[4.5, 2.3], [2.1, 2.3], [7.3, 3.4]]])
-  >>> scml = SCML(random_state=42)
-  >>> scml.fit(triplets)
-  SCML(beta=1e-5, B=None, max_iter=100000, verbose=False,
-      preprocessor=None, random_state=None)
+  >>> from metric_learn import SCML_Supervised
+  >>> from sklearn.datasets import load_iris
+  >>> iris_data = load_iris()
+  >>> X = iris_data['data']
+  >>> Y = iris_data['target']
+  >>> scml = SCML_Supervised(random_state=33)
+  >>> scml.fit(X, Y)
+  SCML_Supervised(random_state=33)
+  >>> scml.score_pairs([[X[0], X[1]], [X[0], X[2]]])
+  array([1.84640733, 1.55984363])
+  >>> scml.get_metric()(X[0], X[1])
+  1.8464073327922157
 
   References
   ----------
@@ -487,8 +496,8 @@ class SCML_Supervised(_BaseSCML, TransformerMixin):
          <http://researchers.lille.inria.fr/abellet/papers/aaai14.pdf>`_. \
          (AAAI), 2014.
 
-  .. [2] Adapted from original \
-         `Matlab implementation.<https://github.com/bellet/SCML>`_.
+  .. [2] Adapted from original `Matlab implementation. \
+         <https://github.com/bellet/SCML>`_.
 
   See Also
   --------
@@ -549,7 +558,7 @@ def _initialize_basis_supervised(self, X, y):
     case one is selected.
     """
 
-    if self.basis == 'lda':
+    if isinstance(self.basis, str) and self.basis == 'lda':
       basis, n_basis = self._generate_bases_LDA(X, y)
     else:
       basis, n_basis = None, None
@@ -597,8 +606,8 @@ def _generate_bases_LDA(self, X, y):
                        "should be smaller than %d" %
                        (n_basis, X.shape[0]*2*num_eig))
 
-    kmeans = KMeans(n_clusters=n_clusters, random_state=self.random_state,
-                    algorithm='elkan').fit(X)
+    kmeans = KMeans(n_clusters=n_clusters, n_init=10,
+                    random_state=self.random_state, algorithm='elkan').fit(X)
     cX = kmeans.cluster_centers_
 
     n_scales = 2
@@ -610,10 +619,10 @@ def _generate_bases_LDA(self, X, y):
     k_class = np.vstack((np.minimum(class_count, scales[0]),
                          np.minimum(class_count, scales[1])))
 
-    idx_set = [np.zeros((n_clusters, sum(k_class[0, :])), dtype=np.int),
-               np.zeros((n_clusters, sum(k_class[1, :])), dtype=np.int)]
+    idx_set = [np.zeros((n_clusters, sum(k_class[0, :])), dtype=np.int64),
+               np.zeros((n_clusters, sum(k_class[1, :])), dtype=np.int64)]
 
-    start_finish_indices = np.hstack((np.zeros((2, 1), np.int),
+    start_finish_indices = np.hstack((np.zeros((2, 1), np.int64),
                                      k_class)).cumsum(axis=1)
 
     neigh = NearestNeighbors()
diff --git a/metric_learn/sdml.py b/metric_learn/sdml.py
index a0736ffa..c4c427b9 100644
--- a/metric_learn/sdml.py
+++ b/metric_learn/sdml.py
@@ -6,7 +6,13 @@
 import numpy as np
 from sklearn.base import TransformerMixin
 from scipy.linalg import pinvh
-from sklearn.covariance import graphical_lasso
+try:
+  from sklearn.covariance._graph_lasso import (
+    _graphical_lasso as graphical_lasso
+  )
+except ImportError:
+  from sklearn.covariance import graphical_lasso
+
 from sklearn.exceptions import ConvergenceWarning
 
 from .base_metric import MahalanobisMixin, _PairsClassifierMixin
@@ -43,6 +49,9 @@ def _fit(self, pairs, y):
         print("SDML will use skggm's graphical lasso solver.")
     pairs, y = self._prepare_inputs(pairs, y,
                                     type_of_inputs='tuples')
+    n_features = pairs.shape[2]
+    if n_features < 2:
+      raise ValueError(f"Cannot fit SDML with {n_features} feature(s)")
 
     # set up (the inverse of) the prior M
     # if the prior is the default (None), we raise a warning
@@ -76,13 +85,14 @@ def _fit(self, pairs, y):
                                 msg=self.verbose,
                                 Theta0=theta0, Sigma0=sigma0)
       else:
-        _, M = graphical_lasso(emp_cov, alpha=self.sparsity_param,
-                               verbose=self.verbose,
-                               cov_init=sigma0)
+        _, M, *_ = graphical_lasso(emp_cov, alpha=self.sparsity_param,
+                                   verbose=self.verbose,
+                                   cov_init=sigma0)
       raised_error = None
       w_mahalanobis, _ = np.linalg.eigh(M)
       not_spd = any(w_mahalanobis < 0.)
       not_finite = not np.isfinite(M).all()
+    # TODO: Narrow this to the specific exceptions we expect.
     except Exception as e:
       raised_error = e
       not_spd = False  # not_spd not applicable here so we set to False
@@ -177,7 +187,7 @@ class SDML(_BaseSDML, _PairsClassifierMixin):
   >>> iris_data = load_iris()
   >>> X = iris_data['data']
   >>> Y = iris_data['target']
-  >>> sdml = SDML_Supervised(num_constraints=200)
+  >>> sdml = SDML_Supervised(n_constraints=200)
   >>> sdml.fit(X, Y)
 
   References
@@ -262,7 +272,7 @@ class SDML_Supervised(_BaseSDML, TransformerMixin):
       (n_features, n_features), that will be used as such to set the
       prior.
 
-  num_constraints : int, optional (default=None)
+  n_constraints : int, optional (default=None)
     Number of constraints to generate. If None, defaults to `20 *
     num_classes**2`.
 
@@ -279,6 +289,8 @@ class SDML_Supervised(_BaseSDML, TransformerMixin):
     prior. In any case, `random_state` is also used to randomly sample
     constraints from labels.
 
+  num_constraints : Renamed to n_constraints. Will be deprecated in 0.7.0
+
   Attributes
   ----------
   components_ : `numpy.ndarray`, shape=(n_features, n_features)
@@ -293,13 +305,22 @@ class SDML_Supervised(_BaseSDML, TransformerMixin):
   """
 
   def __init__(self, balance_param=0.5, sparsity_param=0.01, prior='identity',
-               num_constraints=None, verbose=False, preprocessor=None,
-               random_state=None):
+               n_constraints=None, verbose=False, preprocessor=None,
+               random_state=None, num_constraints='deprecated'):
     _BaseSDML.__init__(self, balance_param=balance_param,
                        sparsity_param=sparsity_param, prior=prior,
                        verbose=verbose,
                        preprocessor=preprocessor, random_state=random_state)
-    self.num_constraints = num_constraints
+    if num_constraints != 'deprecated':
+      warnings.warn('"num_constraints" parameter has been renamed to'
+                    ' "n_constraints". It has been deprecated in'
+                    ' version 0.6.3 and will be removed in 0.7.0'
+                    '', FutureWarning)
+      self.n_constraints = num_constraints
+    else:
+      self.n_constraints = n_constraints
+    # Avoid test get_params from failing (all params passed sholud be set)
+    self.num_constraints = 'deprecated'
 
   def fit(self, X, y):
     """Create constraints from labels and learn the SDML model.
@@ -318,13 +339,13 @@ def fit(self, X, y):
       Returns the instance.
     """
     X, y = self._prepare_inputs(X, y, ensure_min_samples=2)
-    num_constraints = self.num_constraints
-    if num_constraints is None:
+    n_constraints = self.n_constraints
+    if n_constraints is None:
       num_classes = len(np.unique(y))
-      num_constraints = 20 * num_classes**2
+      n_constraints = 20 * num_classes**2
 
     c = Constraints(y)
-    pos_neg = c.positive_negative_pairs(num_constraints,
+    pos_neg = c.positive_negative_pairs(n_constraints,
                                         random_state=self.random_state)
     pairs, y = wrap_pairs(X, pos_neg)
     return _BaseSDML._fit(self, pairs, y)
diff --git a/metric_learn/sklearn_shims.py b/metric_learn/sklearn_shims.py
new file mode 100644
index 00000000..8d746890
--- /dev/null
+++ b/metric_learn/sklearn_shims.py
@@ -0,0 +1,25 @@
+"""This file is for fixing imports due to different APIs
+depending on the scikit-learn version"""
+import sklearn
+from packaging import version
+SKLEARN_AT_LEAST_0_22 = (version.parse(sklearn.__version__)
+                         >= version.parse('0.22.0'))
+if SKLEARN_AT_LEAST_0_22:
+    from sklearn.utils._testing import (set_random_state,
+                                        ignore_warnings,
+                                        assert_allclose_dense_sparse,
+                                        _get_args)
+    from sklearn.utils.estimator_checks import (_is_public_parameter
+                                                as is_public_parameter)
+    from sklearn.metrics._scorer import get_scorer
+else:
+    from sklearn.utils.testing import (set_random_state,
+                                       ignore_warnings,
+                                       assert_allclose_dense_sparse,
+                                       _get_args)
+    from sklearn.utils.estimator_checks import is_public_parameter
+    from sklearn.metrics.scorer import get_scorer
+
+__all__ = ['set_random_state', 'set_random_state',
+           'ignore_warnings', 'assert_allclose_dense_sparse', '_get_args',
+           'is_public_parameter', 'get_scorer']
diff --git a/pytest.ini b/pytest.ini
new file mode 100644
index 00000000..ef3c8acb
--- /dev/null
+++ b/pytest.ini
@@ -0,0 +1,4 @@
+[pytest]
+markers =
+  integration: mark a test as integration
+  unit: mark a test as unit
\ No newline at end of file
diff --git a/setup.py b/setup.py
index 8677e7bf..23392077 100755
--- a/setup.py
+++ b/setup.py
@@ -63,12 +63,13 @@
       ],
       packages=['metric_learn'],
       install_requires=[
-          'numpy',
-          'scipy',
-          'scikit-learn>=0.20.3',
+          'numpy>= 1.11.0',
+          'scipy>= 0.17.0',
+          'scikit-learn>=0.21.3',
       ],
       extras_require=dict(
-          docs=['sphinx', 'shinx_rtd_theme', 'numpydoc'],
+          docs=['sphinx', 'sphinx_rtd_theme', 'numpydoc', 'sphinx-gallery',
+                'matplotlib'],
           demo=['matplotlib'],
           sdml=['skggm>=0.2.9']
       ),
diff --git a/test/metric_learn_test.py b/test/metric_learn_test.py
index 4db0a1fc..d457b52d 100644
--- a/test/metric_learn_test.py
+++ b/test/metric_learn_test.py
@@ -1,3 +1,4 @@
+import warnings
 import unittest
 import re
 import pytest
@@ -9,13 +10,12 @@
                               make_spd_matrix)
 from numpy.testing import (assert_array_almost_equal, assert_array_equal,
                            assert_allclose)
-from sklearn.utils.testing import assert_warns_message
 from sklearn.exceptions import ConvergenceWarning
 from sklearn.utils.validation import check_X_y
 from sklearn.preprocessing import StandardScaler
 try:
   from inverse_covariance import quic
-  assert(quic)
+  assert quic
 except ImportError:
   HAS_SKGGM = False
 else:
@@ -79,19 +79,24 @@ def test_singular_returns_pseudo_inverse(self):
 class TestSCML(object):
   @pytest.mark.parametrize('basis', ('lda', 'triplet_diffs'))
   def test_iris(self, basis):
+    """
+    SCML applied to Iris dataset should give better results when
+    computing class separation.
+    """
     X, y = load_iris(return_X_y=True)
+    before = class_separation(X, y)
     scml = SCML_Supervised(basis=basis, n_basis=85, k_genuine=7, k_impostor=5,
                            random_state=42)
     scml.fit(X, y)
-    csep = class_separation(scml.transform(X), y)
-    assert csep < 0.24
+    after = class_separation(scml.transform(X), y)
+    assert before > after + 0.03  # It's better by a margin of 0.03
 
   def test_big_n_features(self):
     X, y = make_classification(n_samples=100, n_classes=3, n_features=60,
                                n_informative=60, n_redundant=0, n_repeated=0,
                                random_state=42)
     X = StandardScaler().fit_transform(X)
-    scml = SCML_Supervised(random_state=42)
+    scml = SCML_Supervised(random_state=42, n_basis=399)
     scml.fit(X, y)
     csep = class_separation(scml.transform(X), y)
     assert csep < 0.7
@@ -102,7 +107,7 @@ def test_big_n_features(self):
                                                          [2, 0], [2, 1]]),
                                                np.array([1, 0, 1, 0])))])
   def test_bad_basis(self, estimator, data):
-    model = estimator(basis='bad_basis')
+    model = estimator(basis='bad_basis', n_basis=33)  # n_basis doesn't matter
     msg = ("`basis` must be one of the options '{}' or an array of shape "
            "(n_basis, n_features)."
            .format("', '".join(model._authorized_basis)))
@@ -234,16 +239,23 @@ def test_lda_toy(self):
   @pytest.mark.parametrize('n_features', [10, 50, 100])
   @pytest.mark.parametrize('n_classes', [5, 10, 15])
   def test_triplet_diffs(self, n_samples, n_features, n_classes):
+    """
+    Test that the correct value of n_basis is being generated with
+    different triplet constraints.
+    """
     X, y = make_classification(n_samples=n_samples, n_classes=n_classes,
                                n_features=n_features, n_informative=n_features,
                                n_redundant=0, n_repeated=0)
     X = StandardScaler().fit_transform(X)
-
-    model = SCML_Supervised()
+    model = SCML_Supervised(n_basis=None)  # Explicit n_basis=None
     constraints = Constraints(y)
     triplets = constraints.generate_knntriplets(X, model.k_genuine,
                                                 model.k_impostor)
-    basis, n_basis = model._generate_bases_dist_diff(triplets, X)
+
+    msg = "As no value for `n_basis` was selected, "
+    with pytest.warns(UserWarning) as raised_warning:
+      basis, n_basis = model._generate_bases_dist_diff(triplets, X)
+    assert msg in str(raised_warning[0].message)
 
     expected_n_basis = n_features * 80
     assert n_basis == expected_n_basis
@@ -253,13 +265,21 @@ def test_triplet_diffs(self, n_samples, n_features, n_classes):
   @pytest.mark.parametrize('n_features', [10, 50, 100])
   @pytest.mark.parametrize('n_classes', [5, 10, 15])
   def test_lda(self, n_samples, n_features, n_classes):
+    """
+    Test that when n_basis=None, the correct n_basis is generated,
+    for SCML_Supervised and different values of n_samples, n_features
+    and n_classes.
+    """
     X, y = make_classification(n_samples=n_samples, n_classes=n_classes,
                                n_features=n_features, n_informative=n_features,
                                n_redundant=0, n_repeated=0)
     X = StandardScaler().fit_transform(X)
 
-    model = SCML_Supervised()
-    basis, n_basis = model._generate_bases_LDA(X, y)
+    msg = "As no value for `n_basis` was selected, "
+    with pytest.warns(UserWarning) as raised_warning:
+      model = SCML_Supervised(n_basis=None)  # Explicit n_basis=None
+      basis, n_basis = model._generate_bases_LDA(X, y)
+    assert msg in str(raised_warning[0].message)
 
     num_eig = min(n_classes - 1, n_features)
     expected_n_basis = min(20 * n_features, n_samples * 2 * num_eig - 1)
@@ -295,7 +315,7 @@ def test_int_inputs_supervised(self, name):
     assert msg == raised_error.value.args[0]
 
   def test_large_output_iter(self):
-    scml = SCML(max_iter=1, output_iter=2)
+    scml = SCML(max_iter=1, output_iter=2, n_basis=33)  # n_basis don't matter
     triplets = np.array([[[0, 1], [2, 1], [0, 0]]])
     msg = ("The value of output_iter must be equal or smaller than"
            " max_iter.")
@@ -307,7 +327,7 @@ def test_large_output_iter(self):
 
 class TestLSML(MetricTestCase):
   def test_iris(self):
-    lsml = LSML_Supervised(num_constraints=200)
+    lsml = LSML_Supervised(n_constraints=200)
     lsml.fit(self.iris_points, self.iris_labels)
 
     csep = class_separation(lsml.transform(self.iris_points), self.iris_labels)
@@ -316,7 +336,7 @@ def test_iris(self):
 
 class TestITML(MetricTestCase):
   def test_iris(self):
-    itml = ITML_Supervised(num_constraints=200)
+    itml = ITML_Supervised(n_constraints=200)
     itml.fit(self.iris_points, self.iris_labels)
 
     csep = class_separation(itml.transform(self.iris_points), self.iris_labels)
@@ -362,7 +382,7 @@ def test_bounds_parameters_invalid(bounds):
 
 class TestLMNN(MetricTestCase):
   def test_iris(self):
-    lmnn = LMNN(k=5, learn_rate=1e-6, verbose=False)
+    lmnn = LMNN(n_neighbors=5, learn_rate=1e-6, verbose=False)
     lmnn.fit(self.iris_points, self.iris_labels)
 
     csep = class_separation(lmnn.transform(self.iris_points),
@@ -379,7 +399,7 @@ def test_loss_grad_lbfgs(self):
     L = rng.randn(rng.randint(1, X.shape[1] + 1), X.shape[1])
     lmnn = LMNN()
 
-    k = lmnn.k
+    k = lmnn.n_neighbors
     reg = lmnn.regularization
 
     X, y = lmnn._prepare_inputs(X, y, dtype=float,
@@ -555,9 +575,9 @@ def _loss_grad(self, X, L, dfG, k, reg, target_neighbors, label_inds):
 def test_toy_ex_lmnn(X, y, loss):
   """Test that the loss give the right result on a toy example"""
   L = np.array([[1]])
-  lmnn = LMNN(k=1, regularization=0.5)
+  lmnn = LMNN(n_neighbors=1, regularization=0.5)
 
-  k = lmnn.k
+  k = lmnn.n_neighbors
   reg = lmnn.regularization
 
   X, y = lmnn._prepare_inputs(X, y, dtype=float,
@@ -715,12 +735,12 @@ def test_raises_no_warning_installed_skggm(self):
     pairs = np.array([[[-10., 0.], [10., 0.]], [[0., -55.], [0., -60]]])
     y_pairs = [1, -1]
     X, y = make_classification(random_state=42)
-    with pytest.warns(None) as records:
+    with warnings.catch_warnings(record=True) as records:
       sdml = SDML(prior='covariance')
       sdml.fit(pairs, y_pairs)
     for record in records:
       assert record.category is not ConvergenceWarning
-    with pytest.warns(None) as records:
+    with warnings.catch_warnings(record=True) as records:
       sdml_supervised = SDML_Supervised(prior='identity', balance_param=1e-5)
       sdml_supervised.fit(X, y)
     for record in records:
@@ -731,7 +751,7 @@ def test_iris(self):
     # TODO: un-flake it!
     rs = np.random.RandomState(5555)
 
-    sdml = SDML_Supervised(num_constraints=1500, prior='identity',
+    sdml = SDML_Supervised(n_constraints=1500, prior='identity',
                            balance_param=5e-5, random_state=rs)
     sdml.fit(self.iris_points, self.iris_labels)
     csep = class_separation(sdml.transform(self.iris_points),
@@ -929,7 +949,7 @@ def test_singleton_class(self):
       X = X[[ind_0[0], ind_1[0], ind_2[0]]]
       y = y[[ind_0[0], ind_1[0], ind_2[0]]]
 
-      A = make_spd_matrix(X.shape[1], X.shape[1])
+      A = make_spd_matrix(n_dim=X.shape[1], random_state=X.shape[1])
       nca = NCA(init=A, max_iter=30, n_components=X.shape[1])
       nca.fit(X, y)
       assert_array_equal(nca.components_, A)
@@ -940,7 +960,7 @@ def test_one_class(self):
       X = self.iris_points[self.iris_labels == 0]
       y = self.iris_labels[self.iris_labels == 0]
 
-      A = make_spd_matrix(X.shape[1], X.shape[1])
+      A = make_spd_matrix(n_dim=X.shape[1], random_state=X.shape[1])
       nca = NCA(init=A, max_iter=30, n_components=X.shape[1])
       nca.fit(X, y)
       assert_array_equal(nca.components_, A)
@@ -960,7 +980,7 @@ def test_iris(self):
 
 class TestRCA(MetricTestCase):
   def test_iris(self):
-    rca = RCA_Supervised(n_components=2, num_chunks=30, chunk_size=2)
+    rca = RCA_Supervised(n_components=2, n_chunks=30, chunk_size=2)
     rca.fit(self.iris_points, self.iris_labels)
     csep = class_separation(rca.transform(self.iris_points), self.iris_labels)
     self.assertLess(csep, 0.29)
@@ -980,21 +1000,21 @@ def test_rank_deficient_returns_warning(self):
            'for instance using `sklearn.decomposition.PCA` as a '
            'preprocessing step.')
 
-    with pytest.warns(None) as raised_warnings:
+    with warnings.catch_warnings(record=True) as raised_warnings:
       rca.fit(X, y)
     assert any(str(w.message) == msg for w in raised_warnings)
 
   def test_unknown_labels(self):
     n = 200
-    num_chunks = 50
+    n_chunks = 50
     X, y = make_classification(random_state=42, n_samples=2 * n,
                                n_features=6, n_informative=6, n_redundant=0)
     y2 = np.concatenate((y[:n], -np.ones(n)))
 
-    rca = RCA_Supervised(num_chunks=num_chunks, random_state=42)
+    rca = RCA_Supervised(n_chunks=n_chunks, random_state=42)
     rca.fit(X[:n], y[:n])
 
-    rca2 = RCA_Supervised(num_chunks=num_chunks, random_state=42)
+    rca2 = RCA_Supervised(n_chunks=n_chunks, random_state=42)
     rca2.fit(X, y2)
 
     assert not np.any(np.isnan(rca.components_))
@@ -1004,18 +1024,18 @@ def test_unknown_labels(self):
 
   def test_bad_parameters(self):
     n = 200
-    num_chunks = 3
+    n_chunks = 3
     X, y = make_classification(random_state=42, n_samples=n,
                                n_features=6, n_informative=6, n_redundant=0)
 
-    rca = RCA_Supervised(num_chunks=num_chunks, random_state=42)
+    rca = RCA_Supervised(n_chunks=n_chunks, random_state=42)
     msg = ('Due to the parameters of RCA_Supervised, '
            'the inner covariance matrix is not invertible, '
            'so the transformation matrix will contain Nan values. '
            'Increase the number or size of the chunks to correct '
            'this problem.'
            )
-    with pytest.warns(None) as raised_warning:
+    with warnings.catch_warnings(record=True) as raised_warning:
       rca.fit(X, y)
     assert any(str(w.message) == msg for w in raised_warning)
 
@@ -1062,7 +1082,7 @@ def test_iris(self):
 
     # Full metric
     n_features = self.iris_points.shape[1]
-    mmc = MMC(convergence_threshold=0.01, init=np.eye(n_features) / 10)
+    mmc = MMC(tol=0.01, init=np.eye(n_features) / 10)
     mmc.fit(*wrap_pairs(self.iris_points, [a, b, c, d]))
     expected = [[+0.000514, +0.000868, -0.001195, -0.001703],
                 [+0.000868, +0.001468, -0.002021, -0.002879],
@@ -1138,9 +1158,10 @@ def test_convergence_warning(dataset, algo_class):
     X, y = dataset
     model = algo_class(max_iter=2, verbose=True)
     cls_name = model.__class__.__name__
-    assert_warns_message(ConvergenceWarning,
-                         '[{}] {} did not converge'.format(cls_name, cls_name),
-                         model.fit, X, y)
+    msg = '[{}] {} did not converge'.format(cls_name, cls_name)
+    with pytest.warns(Warning) as raised_warning:
+      model.fit(X, y)
+    assert any([msg in str(warn.message) for warn in raised_warning])
 
 
 if __name__ == '__main__':
diff --git a/test/test_base_metric.py b/test/test_base_metric.py
index fed9018a..b1e71020 100644
--- a/test/test_base_metric.py
+++ b/test/test_base_metric.py
@@ -1,74 +1,167 @@
+from numpy.core.numeric import array_equal
+import warnings
 import pytest
 import re
 import unittest
 import metric_learn
 import numpy as np
 from sklearn import clone
-from sklearn.utils.testing import set_random_state
 from test.test_utils import ids_metric_learners, metric_learners, remove_y
+from metric_learn.sklearn_shims import set_random_state, SKLEARN_AT_LEAST_0_22
 
 
 def remove_spaces(s):
   return re.sub(r'\s+', '', s)
 
 
+def sk_repr_kwargs(def_kwargs, nndef_kwargs):
+    """Given the non-default arguments, and the default
+    keywords arguments, build the string that will appear
+    in the __repr__ of the estimator, depending on the
+    version of scikit-learn.
+    """
+    if SKLEARN_AT_LEAST_0_22:
+        def_kwargs = {}
+    def_kwargs.update(nndef_kwargs)
+    args_str = ",".join(f"{key}={repr(value)}"
+                        for key, value in def_kwargs.items())
+    return args_str
+
+
 class TestStringRepr(unittest.TestCase):
 
   def test_covariance(self):
+    def_kwargs = {'preprocessor': None}
+    nndef_kwargs = {}
+    merged_kwargs = sk_repr_kwargs(def_kwargs, nndef_kwargs)
     self.assertEqual(remove_spaces(str(metric_learn.Covariance())),
-                     remove_spaces("Covariance()"))
+                     remove_spaces(f"Covariance({merged_kwargs})"))
 
   def test_lmnn(self):
+    def_kwargs = {'convergence_tol': 0.001, 'init': 'auto', 'n_neighbors': 3,
+                  'learn_rate': 1e-07, 'max_iter': 1000, 'min_iter': 50,
+                  'n_components': None, 'preprocessor': None,
+                  'random_state': None, 'regularization': 0.5,
+                  'verbose': False}
+    nndef_kwargs = {'convergence_tol': 0.01, 'n_neighbors': 6}
+    merged_kwargs = sk_repr_kwargs(def_kwargs, nndef_kwargs)
     self.assertEqual(
-        remove_spaces(str(metric_learn.LMNN(convergence_tol=0.01, k=6))),
-        remove_spaces("LMNN(convergence_tol=0.01, k=6)"))
+        remove_spaces(str(metric_learn.LMNN(convergence_tol=0.01,
+                                            n_neighbors=6))),
+        remove_spaces(f"LMNN({merged_kwargs})"))
 
   def test_nca(self):
+    def_kwargs = {'init': 'auto', 'max_iter': 100, 'n_components': None,
+                  'preprocessor': None, 'random_state': None, 'tol': None,
+                  'verbose': False}
+    nndef_kwargs = {'max_iter': 42}
+    merged_kwargs = sk_repr_kwargs(def_kwargs, nndef_kwargs)
     self.assertEqual(remove_spaces(str(metric_learn.NCA(max_iter=42))),
-                     remove_spaces("NCA(max_iter=42)"))
+                     remove_spaces(f"NCA({merged_kwargs})"))
 
   def test_lfda(self):
+    def_kwargs = {'embedding_type': 'weighted', 'k': None,
+                  'n_components': None, 'preprocessor': None}
+    nndef_kwargs = {'k': 2}
+    merged_kwargs = sk_repr_kwargs(def_kwargs, nndef_kwargs)
     self.assertEqual(remove_spaces(str(metric_learn.LFDA(k=2))),
-                     remove_spaces("LFDA(k=2)"))
+                     remove_spaces(f"LFDA({merged_kwargs})"))
 
   def test_itml(self):
+    def_kwargs = {'tol': 0.001, 'gamma': 1.0,
+                  'max_iter': 1000, 'preprocessor': None,
+                  'prior': 'identity', 'random_state': None, 'verbose': False}
+    nndef_kwargs = {'gamma': 0.5}
+    merged_kwargs = sk_repr_kwargs(def_kwargs, nndef_kwargs)
     self.assertEqual(remove_spaces(str(metric_learn.ITML(gamma=0.5))),
-                     remove_spaces("ITML(gamma=0.5)"))
+                     remove_spaces(f"ITML({merged_kwargs})"))
+    def_kwargs = {'tol': 0.001, 'gamma': 1.0,
+                  'max_iter': 1000, 'n_constraints': None,
+                  'preprocessor': None, 'prior': 'identity',
+                  'random_state': None, 'verbose': False}
+    nndef_kwargs = {'n_constraints': 7}
+    merged_kwargs = sk_repr_kwargs(def_kwargs, nndef_kwargs)
     self.assertEqual(
-        remove_spaces(str(metric_learn.ITML_Supervised(num_constraints=7))),
-        remove_spaces("ITML_Supervised(num_constraints=7)"))
+        remove_spaces(str(metric_learn.ITML_Supervised(n_constraints=7))),
+        remove_spaces(f"ITML_Supervised({merged_kwargs})"))
 
   def test_lsml(self):
+    def_kwargs = {'max_iter': 1000, 'preprocessor': None, 'prior': 'identity',
+                  'random_state': None, 'tol': 0.001, 'verbose': False}
+    nndef_kwargs = {'tol': 0.1}
+    merged_kwargs = sk_repr_kwargs(def_kwargs, nndef_kwargs)
     self.assertEqual(remove_spaces(str(metric_learn.LSML(tol=0.1))),
-                     remove_spaces("LSML(tol=0.1)"))
+                     remove_spaces(f"LSML({merged_kwargs})"))
+    def_kwargs = {'max_iter': 1000, 'n_constraints': None,
+                  'preprocessor': None, 'prior': 'identity',
+                  'random_state': None, 'tol': 0.001, 'verbose': False,
+                  'weights': None}
+    nndef_kwargs = {'verbose': True}
+    merged_kwargs = sk_repr_kwargs(def_kwargs, nndef_kwargs)
     self.assertEqual(
         remove_spaces(str(metric_learn.LSML_Supervised(verbose=True))),
-        remove_spaces("LSML_Supervised(verbose=True)"))
+        remove_spaces(f"LSML_Supervised({merged_kwargs})"))
 
   def test_sdml(self):
+    def_kwargs = {'balance_param': 0.5, 'preprocessor': None,
+                  'prior': 'identity', 'random_state': None,
+                  'sparsity_param': 0.01, 'verbose': False}
+    nndef_kwargs = {'verbose': True}
+    merged_kwargs = sk_repr_kwargs(def_kwargs, nndef_kwargs)
     self.assertEqual(remove_spaces(str(metric_learn.SDML(verbose=True))),
-                     remove_spaces("SDML(verbose=True)"))
+                     remove_spaces(f"SDML({merged_kwargs})"))
+    def_kwargs = {'balance_param': 0.5, 'n_constraints': None,
+                  'preprocessor': None, 'prior': 'identity',
+                  'random_state': None, 'sparsity_param': 0.01,
+                  'verbose': False}
+    nndef_kwargs = {'sparsity_param': 0.5}
+    merged_kwargs = sk_repr_kwargs(def_kwargs, nndef_kwargs)
     self.assertEqual(
         remove_spaces(str(metric_learn.SDML_Supervised(sparsity_param=0.5))),
-        remove_spaces("SDML_Supervised(sparsity_param=0.5)"))
+        remove_spaces(f"SDML_Supervised({merged_kwargs})"))
 
   def test_rca(self):
+    def_kwargs = {'n_components': None, 'preprocessor': None}
+    nndef_kwargs = {'n_components': 3}
+    merged_kwargs = sk_repr_kwargs(def_kwargs, nndef_kwargs)
     self.assertEqual(remove_spaces(str(metric_learn.RCA(n_components=3))),
-                     remove_spaces("RCA(n_components=3)"))
+                     remove_spaces(f"RCA({merged_kwargs})"))
+    def_kwargs = {'chunk_size': 2, 'n_components': None, 'n_chunks': 100,
+                  'preprocessor': None, 'random_state': None}
+    nndef_kwargs = {'n_chunks': 5}
+    merged_kwargs = sk_repr_kwargs(def_kwargs, nndef_kwargs)
     self.assertEqual(
-        remove_spaces(str(metric_learn.RCA_Supervised(num_chunks=5))),
-        remove_spaces("RCA_Supervised(num_chunks=5)"))
+        remove_spaces(str(metric_learn.RCA_Supervised(n_chunks=5))),
+        remove_spaces(f"RCA_Supervised({merged_kwargs})"))
 
   def test_mlkr(self):
+    def_kwargs = {'init': 'auto', 'max_iter': 1000,
+                  'n_components': None, 'preprocessor': None,
+                  'random_state': None, 'tol': None, 'verbose': False}
+    nndef_kwargs = {'max_iter': 777}
+    merged_kwargs = sk_repr_kwargs(def_kwargs, nndef_kwargs)
     self.assertEqual(remove_spaces(str(metric_learn.MLKR(max_iter=777))),
-                     remove_spaces("MLKR(max_iter=777)"))
+                     remove_spaces(f"MLKR({merged_kwargs})"))
 
   def test_mmc(self):
+    def_kwargs = {'tol': 0.001, 'diagonal': False,
+                  'diagonal_c': 1.0, 'init': 'identity', 'max_iter': 100,
+                  'max_proj': 10000, 'preprocessor': None,
+                  'random_state': None, 'verbose': False}
+    nndef_kwargs = {'diagonal': True}
+    merged_kwargs = sk_repr_kwargs(def_kwargs, nndef_kwargs)
     self.assertEqual(remove_spaces(str(metric_learn.MMC(diagonal=True))),
-                     remove_spaces("MMC(diagonal=True)"))
+                     remove_spaces(f"MMC({merged_kwargs})"))
+    def_kwargs = {'tol': 1e-06, 'diagonal': False,
+                  'diagonal_c': 1.0, 'init': 'identity', 'max_iter': 100,
+                  'max_proj': 10000, 'n_constraints': None,
+                  'preprocessor': None, 'random_state': None,
+                  'verbose': False}
+    nndef_kwargs = {'max_iter': 1}
+    merged_kwargs = sk_repr_kwargs(def_kwargs, nndef_kwargs)
     self.assertEqual(
         remove_spaces(str(metric_learn.MMC_Supervised(max_iter=1))),
-        remove_spaces("MMC_Supervised(max_iter=1)"))
+        remove_spaces(f"MMC_Supervised({merged_kwargs})"))
 
 
 @pytest.mark.parametrize('estimator, build_dataset', metric_learners,
@@ -134,7 +227,7 @@ def test_get_metric_works_does_not_raise(estimator, build_dataset):
                                        (X[0][None], X[1][None])]
 
   for u, v in list_test_get_metric_doesnt_raise:
-    with pytest.warns(None) as record:
+    with warnings.catch_warnings(record=True) as record:
       metric(u, v)
     assert len(record) == 0
 
@@ -142,7 +235,7 @@ def test_get_metric_works_does_not_raise(estimator, build_dataset):
   model.components_ = np.array([3.1])
   metric = model.get_metric()
   for u, v in [(5, 6.7), ([5], [6.7]), ([[5]], [[6.7]])]:
-    with pytest.warns(None) as record:
+    with warnings.catch_warnings(record=True) as record:
       metric(u, v)
     assert len(record) == 0
 
@@ -184,5 +277,28 @@ def test_n_components(estimator, build_dataset):
             'Invalid n_components, must be in [1, {}]'.format(X.shape[1]))
 
 
+@pytest.mark.parametrize('estimator, build_dataset', metric_learners,
+                         ids=ids_metric_learners)
+def test_score_pairs_warning(estimator, build_dataset):
+  """Tests that score_pairs returns a FutureWarning regarding deprecation.
+  Also that score_pairs and pair_distance have the same behaviour"""
+  input_data, labels, _, X = build_dataset()
+  model = clone(estimator)
+  set_random_state(model)
+
+  # We fit the metric learner on it and then we call score_pairs on some
+  # points
+  model.fit(*remove_y(model, input_data, labels))
+
+  msg = ("score_pairs will be deprecated in release 0.7.0. "
+         "Use pair_score to compute similarity scores, or "
+         "pair_distances to compute distances.")
+  with pytest.warns(FutureWarning) as raised_warning:
+    score = model.score_pairs([[X[0], X[1]], ])
+    dist = model.pair_distance([[X[0], X[1]], ])
+    assert array_equal(score, dist)
+  assert any([str(warning.message) == msg for warning in raised_warning])
+
+
 if __name__ == '__main__':
   unittest.main()
diff --git a/test/test_components_metric_conversion.py b/test/test_components_metric_conversion.py
index b9da87ed..c6113957 100644
--- a/test/test_components_metric_conversion.py
+++ b/test/test_components_metric_conversion.py
@@ -1,11 +1,10 @@
 import unittest
 import numpy as np
 import pytest
-from numpy.linalg import LinAlgError
 from scipy.stats import ortho_group
 from sklearn.datasets import load_iris
 from numpy.testing import assert_array_almost_equal, assert_allclose
-from sklearn.utils.testing import ignore_warnings
+from metric_learn.sklearn_shims import ignore_warnings
 
 from metric_learn import (
     LMNN, NCA, LFDA, Covariance, MLKR,
@@ -30,27 +29,27 @@ def test_cov(self):
 
   def test_lsml_supervised(self):
     seed = np.random.RandomState(1234)
-    lsml = LSML_Supervised(num_constraints=200, random_state=seed)
+    lsml = LSML_Supervised(n_constraints=200, random_state=seed)
     lsml.fit(self.X, self.y)
     L = lsml.components_
     assert_array_almost_equal(L.T.dot(L), lsml.get_mahalanobis_matrix())
 
   def test_itml_supervised(self):
     seed = np.random.RandomState(1234)
-    itml = ITML_Supervised(num_constraints=200, random_state=seed)
+    itml = ITML_Supervised(n_constraints=200, random_state=seed)
     itml.fit(self.X, self.y)
     L = itml.components_
     assert_array_almost_equal(L.T.dot(L), itml.get_mahalanobis_matrix())
 
   def test_lmnn(self):
-    lmnn = LMNN(k=5, learn_rate=1e-6, verbose=False)
+    lmnn = LMNN(n_neighbors=5, learn_rate=1e-6, verbose=False)
     lmnn.fit(self.X, self.y)
     L = lmnn.components_
     assert_array_almost_equal(L.T.dot(L), lmnn.get_mahalanobis_matrix())
 
   def test_sdml_supervised(self):
     seed = np.random.RandomState(1234)
-    sdml = SDML_Supervised(num_constraints=1500, prior='identity',
+    sdml = SDML_Supervised(n_constraints=1500, prior='identity',
                            balance_param=1e-5, random_state=seed)
     sdml.fit(self.X, self.y)
     L = sdml.components_
@@ -70,7 +69,7 @@ def test_lfda(self):
     assert_array_almost_equal(L.T.dot(L), lfda.get_mahalanobis_matrix())
 
   def test_rca_supervised(self):
-    rca = RCA_Supervised(n_components=2, num_chunks=30, chunk_size=2)
+    rca = RCA_Supervised(n_components=2, n_chunks=30, chunk_size=2)
     rca.fit(self.X, self.y)
     L = rca.components_
     assert_array_almost_equal(L.T.dot(L), rca.get_mahalanobis_matrix())
@@ -117,17 +116,14 @@ def test_components_from_metric_edge_cases(self):
     L = components_from_metric(M)
     assert_allclose(L.T.dot(L), M)
 
-    # matrix with a determinant still high but which should be considered as a
-    # non-definite matrix (to check we don't test the definiteness with the
-    # determinant which is a bad strategy)
+    # matrix with a determinant still high but which is
+    # undefinite w.r.t to numpy standards
     M = np.diag([1e5, 1e5, 1e5, 1e5, 1e5, 1e5, 1e-20])
     M = P.dot(M).dot(P.T)
     assert np.abs(np.linalg.det(M)) > 10
     assert np.linalg.slogdet(M)[1] > 1  # (just to show that the computed
     # determinant is far from null)
-    with pytest.raises(LinAlgError) as err_msg:
-      np.linalg.cholesky(M)
-    assert str(err_msg.value) == 'Matrix is not positive definite'
+    assert np.linalg.matrix_rank(M) < M.shape[0]
     # (just to show that this case is indeed considered by numpy as an
     # indefinite case)
     L = components_from_metric(M)
diff --git a/test/test_constraints.py b/test/test_constraints.py
index 92876779..3429d9cc 100644
--- a/test/test_constraints.py
+++ b/test/test_constraints.py
@@ -7,14 +7,14 @@
 SEED = 42
 
 
-def gen_labels_for_chunks(num_chunks, chunk_size,
+def gen_labels_for_chunks(n_chunks, chunk_size,
                           n_classes=10, n_unknown_labels=5):
-  """Generates num_chunks*chunk_size labels that split in num_chunks chunks,
+  """Generates n_chunks*chunk_size labels that split in n_chunks chunks,
   that are homogeneous in the label."""
-  assert min(num_chunks, chunk_size) > 0
+  assert min(n_chunks, chunk_size) > 0
   classes = shuffle(np.arange(n_classes), random_state=SEED)
-  n_per_class = chunk_size * (num_chunks // n_classes)
-  n_maj_class = chunk_size * num_chunks - n_per_class * (n_classes - 1)
+  n_per_class = chunk_size * (n_chunks // n_classes)
+  n_maj_class = chunk_size * n_chunks - n_per_class * (n_classes - 1)
 
   first_labels = classes[0] * np.ones(n_maj_class, dtype=int)
   remaining_labels = np.concatenate([k * np.ones(n_per_class, dtype=int)
@@ -25,48 +25,48 @@ def gen_labels_for_chunks(num_chunks, chunk_size,
   return shuffle(labels, random_state=SEED)
 
 
-@pytest.mark.parametrize("num_chunks, chunk_size", [(5, 10), (10, 50)])
-def test_exact_num_points_for_chunks(num_chunks, chunk_size):
+@pytest.mark.parametrize("n_chunks, chunk_size", [(5, 10), (10, 50)])
+def test_exact_num_points_for_chunks(n_chunks, chunk_size):
   """Checks that the chunk generation works well with just enough points."""
-  labels = gen_labels_for_chunks(num_chunks, chunk_size)
+  labels = gen_labels_for_chunks(n_chunks, chunk_size)
 
   constraints = Constraints(labels)
-  chunks = constraints.chunks(num_chunks=num_chunks, chunk_size=chunk_size,
+  chunks = constraints.chunks(n_chunks=n_chunks, chunk_size=chunk_size,
                               random_state=SEED)
 
   chunk_no, size_each_chunk = np.unique(chunks[chunks >= 0],
                                         return_counts=True)
 
   np.testing.assert_array_equal(size_each_chunk, chunk_size)
-  assert chunk_no.shape[0] == num_chunks
+  assert chunk_no.shape[0] == n_chunks
 
 
-@pytest.mark.parametrize("num_chunks, chunk_size", [(5, 10), (10, 50)])
-def test_chunk_case_one_miss_point(num_chunks, chunk_size):
+@pytest.mark.parametrize("n_chunks, chunk_size", [(5, 10), (10, 50)])
+def test_chunk_case_one_miss_point(n_chunks, chunk_size):
   """Checks that the chunk generation breaks when one point is missing."""
-  labels = gen_labels_for_chunks(num_chunks, chunk_size)
+  labels = gen_labels_for_chunks(n_chunks, chunk_size)
 
   assert len(labels) >= 1
   constraints = Constraints(labels[1:])
   with pytest.raises(ValueError) as e:
-    constraints.chunks(num_chunks=num_chunks, chunk_size=chunk_size,
+    constraints.chunks(n_chunks=n_chunks, chunk_size=chunk_size,
                        random_state=SEED)
 
   expected_message = (('Not enough possible chunks of %d elements in each'
                        ' class to form expected %d chunks - maximum number'
                        ' of chunks is %d'
-                       ) % (chunk_size, num_chunks, num_chunks - 1))
+                       ) % (chunk_size, n_chunks, n_chunks - 1))
 
   assert str(e.value) == expected_message
 
 
-@pytest.mark.parametrize("num_chunks, chunk_size", [(5, 10), (10, 50)])
-def test_unknown_labels_not_in_chunks(num_chunks, chunk_size):
+@pytest.mark.parametrize("n_chunks, chunk_size", [(5, 10), (10, 50)])
+def test_unknown_labels_not_in_chunks(n_chunks, chunk_size):
   """Checks that unknown labels are not assigned to any chunk."""
-  labels = gen_labels_for_chunks(num_chunks, chunk_size)
+  labels = gen_labels_for_chunks(n_chunks, chunk_size)
 
   constraints = Constraints(labels)
-  chunks = constraints.chunks(num_chunks=num_chunks, chunk_size=chunk_size,
+  chunks = constraints.chunks(n_chunks=n_chunks, chunk_size=chunk_size,
                               random_state=SEED)
 
   assert np.all(chunks[labels < 0] < 0)
@@ -103,7 +103,7 @@ def test_generate_knntriplets_under_edge(k_genuine, k_impostor, T_test):
 
 
 @pytest.mark.parametrize("k_genuine, k_impostor,",
-                         [(2, 3), (3, 3), (2, 4), (3, 4)])
+                         [(3, 3), (2, 4), (3, 4), (10, 9), (144, 33)])
 def test_generate_knntriplets(k_genuine, k_impostor):
   """Checks edge and over the edge cases of knn triplet construction with not
      enough neighbors"""
@@ -118,8 +118,23 @@ def test_generate_knntriplets(k_genuine, k_impostor):
   X = np.array([[0, 0], [2, 2], [4, 4], [8, 8], [16, 16], [32, 32], [33, 33]])
   y = np.array([1, 1, 1, 2, 2, 2, -1])
 
-  T = Constraints(y).generate_knntriplets(X, k_genuine, k_impostor)
-
+  msg1 = ("The class 1 has 3 elements, which is not sufficient to "
+          f"generate {k_genuine+1} genuine neighbors "
+          "as specified by k_genuine")
+  msg2 = ("The class 2 has 3 elements, which is not sufficient to "
+          f"generate {k_genuine+1} genuine neighbors "
+          "as specified by k_genuine")
+  msg3 = ("The class 1 has 3 elements of other classes, which is "
+          f"not sufficient to generate {k_impostor} impostor "
+          "neighbors as specified by k_impostor")
+  msg4 = ("The class 2 has 3 elements of other classes, which is "
+          f"not sufficient to generate {k_impostor} impostor "
+          "neighbors as specified by k_impostor")
+  msgs = [msg1, msg2, msg3, msg4]
+  with pytest.warns(UserWarning) as user_warning:
+    T = Constraints(y).generate_knntriplets(X, k_genuine, k_impostor)
+  assert any([[msg in str(warn.message) for msg in msgs]
+             for warn in user_warning])
   assert np.array_equal(sorted(T.tolist()), T_test)
 
 
diff --git a/test/test_fit_transform.py b/test/test_fit_transform.py
index d4d4bfe0..246223b0 100644
--- a/test/test_fit_transform.py
+++ b/test/test_fit_transform.py
@@ -29,47 +29,47 @@ def test_cov(self):
 
   def test_lsml_supervised(self):
     seed = np.random.RandomState(1234)
-    lsml = LSML_Supervised(num_constraints=200, random_state=seed)
+    lsml = LSML_Supervised(n_constraints=200, random_state=seed)
     lsml.fit(self.X, self.y)
     res_1 = lsml.transform(self.X)
 
     seed = np.random.RandomState(1234)
-    lsml = LSML_Supervised(num_constraints=200, random_state=seed)
+    lsml = LSML_Supervised(n_constraints=200, random_state=seed)
     res_2 = lsml.fit_transform(self.X, self.y)
 
     assert_array_almost_equal(res_1, res_2)
 
   def test_itml_supervised(self):
     seed = np.random.RandomState(1234)
-    itml = ITML_Supervised(num_constraints=200, random_state=seed)
+    itml = ITML_Supervised(n_constraints=200, random_state=seed)
     itml.fit(self.X, self.y)
     res_1 = itml.transform(self.X)
 
     seed = np.random.RandomState(1234)
-    itml = ITML_Supervised(num_constraints=200, random_state=seed)
+    itml = ITML_Supervised(n_constraints=200, random_state=seed)
     res_2 = itml.fit_transform(self.X, self.y)
 
     assert_array_almost_equal(res_1, res_2)
 
   def test_lmnn(self):
-    lmnn = LMNN(k=5, learn_rate=1e-6, verbose=False)
+    lmnn = LMNN(n_neighbors=5, learn_rate=1e-6, verbose=False)
     lmnn.fit(self.X, self.y)
     res_1 = lmnn.transform(self.X)
 
-    lmnn = LMNN(k=5, learn_rate=1e-6, verbose=False)
+    lmnn = LMNN(n_neighbors=5, learn_rate=1e-6, verbose=False)
     res_2 = lmnn.fit_transform(self.X, self.y)
 
     assert_array_almost_equal(res_1, res_2)
 
   def test_sdml_supervised(self):
     seed = np.random.RandomState(1234)
-    sdml = SDML_Supervised(num_constraints=1500, balance_param=1e-5,
+    sdml = SDML_Supervised(n_constraints=1500, balance_param=1e-5,
                            prior='identity', random_state=seed)
     sdml.fit(self.X, self.y)
     res_1 = sdml.transform(self.X)
 
     seed = np.random.RandomState(1234)
-    sdml = SDML_Supervised(num_constraints=1500, balance_param=1e-5,
+    sdml = SDML_Supervised(n_constraints=1500, balance_param=1e-5,
                            prior='identity', random_state=seed)
     res_2 = sdml.fit_transform(self.X, self.y)
 
@@ -99,13 +99,13 @@ def test_lfda(self):
 
   def test_rca_supervised(self):
     seed = np.random.RandomState(1234)
-    rca = RCA_Supervised(n_components=2, num_chunks=30, chunk_size=2,
+    rca = RCA_Supervised(n_components=2, n_chunks=30, chunk_size=2,
                          random_state=seed)
     rca.fit(self.X, self.y)
     res_1 = rca.transform(self.X)
 
     seed = np.random.RandomState(1234)
-    rca = RCA_Supervised(n_components=2, num_chunks=30, chunk_size=2,
+    rca = RCA_Supervised(n_components=2, n_chunks=30, chunk_size=2,
                          random_state=seed)
     res_2 = rca.fit_transform(self.X, self.y)
 
@@ -123,12 +123,12 @@ def test_mlkr(self):
 
   def test_mmc_supervised(self):
     seed = np.random.RandomState(1234)
-    mmc = MMC_Supervised(num_constraints=200, random_state=seed)
+    mmc = MMC_Supervised(n_constraints=200, random_state=seed)
     mmc.fit(self.X, self.y)
     res_1 = mmc.transform(self.X)
 
     seed = np.random.RandomState(1234)
-    mmc = MMC_Supervised(num_constraints=200, random_state=seed)
+    mmc = MMC_Supervised(n_constraints=200, random_state=seed)
     res_2 = mmc.fit_transform(self.X, self.y)
 
     assert_array_almost_equal(res_1, res_2)
diff --git a/test/test_mahalanobis_mixin.py b/test/test_mahalanobis_mixin.py
index ab7e972d..9378ac60 100644
--- a/test/test_mahalanobis_mixin.py
+++ b/test/test_mahalanobis_mixin.py
@@ -3,7 +3,8 @@
 import pytest
 import numpy as np
 from numpy.linalg import LinAlgError
-from numpy.testing import assert_array_almost_equal, assert_allclose
+from numpy.testing import assert_array_almost_equal, assert_allclose, \
+                          assert_array_equal
 from scipy.spatial.distance import pdist, squareform, mahalanobis
 from scipy.stats import ortho_group
 from sklearn import clone
@@ -11,9 +12,10 @@
 from sklearn.datasets import make_spd_matrix, make_blobs
 from sklearn.utils import check_random_state, shuffle
 from sklearn.utils.multiclass import type_of_target
-from sklearn.utils.testing import set_random_state
+from metric_learn.sklearn_shims import set_random_state
 
 from metric_learn._util import make_context, _initialize_metric_mahalanobis
+from metric_learn.sdml import _BaseSDML
 from metric_learn.base_metric import (_QuadrupletsClassifierMixin,
                                       _TripletsClassifierMixin,
                                       _PairsClassifierMixin)
@@ -27,7 +29,27 @@
 
 @pytest.mark.parametrize('estimator, build_dataset', metric_learners,
                          ids=ids_metric_learners)
-def test_score_pairs_pairwise(estimator, build_dataset):
+def test_pair_distance_pair_score_equivalent(estimator, build_dataset):
+  """
+  For Mahalanobis learners, pair_score should be equivalent to the
+  opposite of the pair_distance result.
+  """
+  input_data, labels, _, X = build_dataset()
+  n_samples = 20
+  X = X[:n_samples]
+  model = clone(estimator)
+  set_random_state(model)
+  model.fit(*remove_y(estimator, input_data, labels))
+
+  distances = model.pair_distance(np.array(list(product(X, X))))
+  scores = model.pair_score(np.array(list(product(X, X))))
+
+  assert_array_equal(distances, -1 * scores)
+
+
+@pytest.mark.parametrize('estimator, build_dataset', metric_learners,
+                         ids=ids_metric_learners)
+def test_pair_distance_pairwise(estimator, build_dataset):
   # Computing pairwise scores should return a euclidean distance matrix.
   input_data, labels, _, X = build_dataset()
   n_samples = 20
@@ -36,7 +58,7 @@ def test_score_pairs_pairwise(estimator, build_dataset):
   set_random_state(model)
   model.fit(*remove_y(estimator, input_data, labels))
 
-  pairwise = model.score_pairs(np.array(list(product(X, X))))\
+  pairwise = model.pair_distance(np.array(list(product(X, X))))\
       .reshape(n_samples, n_samples)
 
   check_is_distance_matrix(pairwise)
@@ -51,8 +73,8 @@ def test_score_pairs_pairwise(estimator, build_dataset):
 
 @pytest.mark.parametrize('estimator, build_dataset', metric_learners,
                          ids=ids_metric_learners)
-def test_score_pairs_toy_example(estimator, build_dataset):
-    # Checks that score_pairs works on a toy example
+def test_pair_distance_toy_example(estimator, build_dataset):
+    # Checks that pair_distance works on a toy example
     input_data, labels, _, X = build_dataset()
     n_samples = 20
     X = X[:n_samples]
@@ -64,24 +86,24 @@ def test_score_pairs_toy_example(estimator, build_dataset):
     distances = np.sqrt(np.sum((embedded_pairs[:, 1] -
                                 embedded_pairs[:, 0])**2,
                                axis=-1))
-    assert_array_almost_equal(model.score_pairs(pairs), distances)
+    assert_array_almost_equal(model.pair_distance(pairs), distances)
 
 
 @pytest.mark.parametrize('estimator, build_dataset', metric_learners,
                          ids=ids_metric_learners)
-def test_score_pairs_finite(estimator, build_dataset):
+def test_pair_distance_finite(estimator, build_dataset):
   # tests that the score is finite
   input_data, labels, _, X = build_dataset()
   model = clone(estimator)
   set_random_state(model)
   model.fit(*remove_y(estimator, input_data, labels))
   pairs = np.array(list(product(X, X)))
-  assert np.isfinite(model.score_pairs(pairs)).all()
+  assert np.isfinite(model.pair_distance(pairs)).all()
 
 
 @pytest.mark.parametrize('estimator, build_dataset', metric_learners,
                          ids=ids_metric_learners)
-def test_score_pairs_dim(estimator, build_dataset):
+def test_pair_distance_dim(estimator, build_dataset):
   # scoring of 3D arrays should return 1D array (several tuples),
   # and scoring of 2D arrays (one tuple) should return an error (like
   # scikit-learn's error when scoring 1D arrays)
@@ -90,13 +112,13 @@ def test_score_pairs_dim(estimator, build_dataset):
   set_random_state(model)
   model.fit(*remove_y(estimator, input_data, labels))
   tuples = np.array(list(product(X, X)))
-  assert model.score_pairs(tuples).shape == (tuples.shape[0],)
+  assert model.pair_distance(tuples).shape == (tuples.shape[0],)
   context = make_context(estimator)
   msg = ("3D array of formed tuples expected{}. Found 2D array "
          "instead:\ninput={}. Reshape your data and/or use a preprocessor.\n"
          .format(context, tuples[1]))
   with pytest.raises(ValueError) as raised_error:
-    model.score_pairs(tuples[1])
+    model.pair_distance(tuples[1])
   assert str(raised_error.value) == msg
 
 
@@ -140,7 +162,7 @@ def test_embed_dim(estimator, build_dataset):
              "instead:\ninput={}. Reshape your data and/or use a "
              "preprocessor.\n".format(context, X[0]))
   with pytest.raises(ValueError) as raised_error:
-    model.score_pairs(model.transform(X[0, :]))
+    model.pair_distance(model.transform(X[0, :]))
   assert str(raised_error.value) == err_msg
   # we test that the shape is also OK when doing dimensionality reduction
   if hasattr(model, 'n_components'):
@@ -194,8 +216,7 @@ def test_get_metric_equivalent_to_explicit_mahalanobis(estimator,
   metric = model.get_metric()
   n_features = X.shape[1]
   a, b = (rng.randn(n_features), rng.randn(n_features))
-  expected_dist = mahalanobis(a[None], b[None],
-                              VI=model.get_mahalanobis_matrix())
+  expected_dist = mahalanobis(a, b, VI=model.get_mahalanobis_matrix())
   assert_allclose(metric(a, b), expected_dist, rtol=1e-13)
 
 
@@ -270,8 +291,12 @@ def test_components_is_2D(estimator, build_dataset):
   model.fit(*remove_y(estimator, input_data, labels))
   assert model.components_.shape == (X.shape[1], X.shape[1])
 
-  # test that it works for 1 feature
-  trunc_data = input_data[..., :1]
+  if isinstance(estimator, _BaseSDML):
+    # SDML doesn't support running on a single feature.
+    return
+
+  # test that it works for 1 feature. Use 2nd dimension, to avoid border cases
+  trunc_data = input_data[..., 1:2]
   # we drop duplicates that might have been formed, i.e. of the form
   # aabc or abcc or aabb for quadruplets, and aa for pairs.
 
@@ -417,7 +442,7 @@ def test_auto_init_transformation(n_samples, n_features, n_classes,
                           random_state=rng)
     # To make the test work for LMNN:
     if 'LMNN' in model_base.__class__.__name__:
-      model_base.set_params(k=1)
+      model_base.set_params(n_neighbors=1)
     # To make the test faster for estimators that have a max_iter:
     if hasattr(model_base, 'max_iter'):
       model_base.set_params(max_iter=1)
@@ -503,12 +528,12 @@ def test_init_mahalanobis(estimator, build_dataset):
       model.fit(input_data, labels)
 
       # Initialize with a random spd matrix
-      init = make_spd_matrix(X.shape[1], random_state=rng)
+      init = make_spd_matrix(n_dim=X.shape[1], random_state=rng)
       model.set_params(**{param: init})
       model.fit(input_data, labels)
 
       # init.shape[1] must match X.shape[1]
-      init = make_spd_matrix(X.shape[1] + 1, X.shape[1] + 1)
+      init = make_spd_matrix(n_dim=X.shape[1] + 1, random_state=rng)
       model.set_params(**{param: init})
       msg = ('The input dimensionality {} of the given '
              'mahalanobis matrix `{}` must match the '
@@ -625,7 +650,7 @@ def test_singular_covariance_init_of_non_strict_pd(estimator, build_dataset):
            'preprocessing step.')
     with pytest.warns(UserWarning) as raised_warning:
       model.fit(input_data, labels)
-    assert np.any([str(warning.message) == msg for warning in raised_warning])
+    assert any([str(warning.message) == msg for warning in raised_warning])
     M, _ = _initialize_metric_mahalanobis(X, init='covariance',
                                           random_state=RNG,
                                           return_inverse=True,
diff --git a/test/test_pairs_classifiers.py b/test/test_pairs_classifiers.py
index c5ca27f4..bfedefea 100644
--- a/test/test_pairs_classifiers.py
+++ b/test/test_pairs_classifiers.py
@@ -1,5 +1,6 @@
 from functools import partial
 
+import warnings
 import pytest
 from numpy.testing import assert_array_equal
 from scipy.spatial.distance import euclidean
@@ -11,7 +12,7 @@
 from sklearn.model_selection import train_test_split
 
 from test.test_utils import pairs_learners, ids_pairs_learners
-from sklearn.utils.testing import set_random_state
+from metric_learn.sklearn_shims import set_random_state
 from sklearn import clone
 import numpy as np
 from itertools import product
@@ -49,14 +50,14 @@ def test_predict_monotonous(estimator, build_dataset,
   pairs_train, pairs_test, y_train, y_test = train_test_split(input_data,
                                                               labels)
   estimator.fit(pairs_train, y_train)
-  distances = estimator.score_pairs(pairs_test)
+  scores = estimator.pair_score(pairs_test)
   predictions = estimator.predict(pairs_test)
-  min_dissimilar = np.min(distances[predictions == -1])
-  max_similar = np.max(distances[predictions == 1])
-  assert max_similar <= min_dissimilar
-  separator = np.mean([min_dissimilar, max_similar])
-  assert (predictions[distances > separator] == -1).all()
-  assert (predictions[distances < separator] == 1).all()
+  max_dissimilar = np.max(scores[predictions == -1])
+  min_similar = np.min(scores[predictions == 1])
+  assert max_dissimilar <= min_similar
+  separator = np.mean([max_dissimilar, min_similar])
+  assert (predictions[scores < separator] == -1).all()
+  assert (predictions[scores > separator] == 1).all()
 
 
 @pytest.mark.parametrize('with_preprocessor', [True, False])
@@ -65,15 +66,17 @@ def test_predict_monotonous(estimator, build_dataset,
 def test_raise_not_fitted_error_if_not_fitted(estimator, build_dataset,
                                               with_preprocessor):
   """Test that a NotFittedError is raised if someone tries to use
-  score_pairs, decision_function, get_metric, transform or
+  pair_score, score_pairs, decision_function, get_metric, transform or
   get_mahalanobis_matrix on input data and the metric learner
   has not been fitted."""
   input_data, labels, preprocessor, _ = build_dataset(with_preprocessor)
   estimator = clone(estimator)
   estimator.set_params(preprocessor=preprocessor)
   set_random_state(estimator)
-  with pytest.raises(NotFittedError):
+  with pytest.raises(NotFittedError):  # Remove in 0.8.0
     estimator.score_pairs(input_data)
+  with pytest.raises(NotFittedError):
+    estimator.pair_score(input_data)
   with pytest.raises(NotFittedError):
     estimator.decision_function(input_data)
   with pytest.raises(NotFittedError):
@@ -134,7 +137,7 @@ def test_threshold_different_scores_is_finite(estimator, build_dataset,
   estimator.set_params(preprocessor=preprocessor)
   set_random_state(estimator)
   estimator.fit(input_data, labels)
-  with pytest.warns(None) as record:
+  with warnings.catch_warnings(record=True) as record:
     estimator.calibrate_threshold(input_data, labels, **kwargs)
   assert len(record) == 0
 
@@ -178,6 +181,25 @@ def test_set_threshold():
   assert identity_pairs_classifier.threshold_ == 0.5
 
 
+@pytest.mark.parametrize('value', ["ABC", None, [1, 2, 3], {'key': None},
+                         (1, 2), set(),
+                         np.array([[[0.], [1.]], [[1.], [3.]]])])
+def test_set_wrong_type_threshold(value):
+  """
+  Test that `set_threshold` indeed sets the threshold
+  and cannot accept nothing but float or integers, but
+  being permissive with boolean True=1.0 and False=0.0
+  """
+  model = IdentityPairsClassifier()
+  model.fit(np.array([[[0.], [1.]]]), np.array([1]))
+  msg = ('Parameter threshold must be a real number. '
+         'Got {} instead.'.format(type(value)))
+
+  with pytest.raises(ValueError) as e:  # String
+    model.set_threshold(value)
+  assert str(e.value).startswith(msg)
+
+
 def test_f_beta_1_is_f_1():
   # test that putting beta to 1 indeed finds the best threshold to optimize
   # the f1_score
@@ -362,7 +384,7 @@ def test_calibrate_threshold_valid_parameters(valid_args):
   pairs, y = rng.randn(20, 2, 5), rng.choice([-1, 1], size=20)
   pairs_learner = IdentityPairsClassifier()
   pairs_learner.fit(pairs, y)
-  with pytest.warns(None) as record:
+  with warnings.catch_warnings(record=True) as record:
     pairs_learner.calibrate_threshold(pairs, y, **valid_args)
   assert len(record) == 0
 
@@ -497,7 +519,7 @@ def test_validate_calibration_params_valid_parameters(
   # test that no warning message is returned if valid arguments are given to
   # _validate_calibration_params for all pairs metric learners, as well as
   # a mocking example, and the class itself
-  with pytest.warns(None) as record:
+  with warnings.catch_warnings(record=True) as record:
     estimator._validate_calibration_params(**valid_args)
   assert len(record) == 0
 
diff --git a/test/test_quadruplets_classifiers.py b/test/test_quadruplets_classifiers.py
index efe10030..a8319961 100644
--- a/test/test_quadruplets_classifiers.py
+++ b/test/test_quadruplets_classifiers.py
@@ -3,7 +3,7 @@
 from sklearn.model_selection import train_test_split
 
 from test.test_utils import quadruplets_learners, ids_quadruplets_learners
-from sklearn.utils.testing import set_random_state
+from metric_learn.sklearn_shims import set_random_state
 from sklearn import clone
 import numpy as np
 
diff --git a/test/test_sklearn_compat.py b/test/test_sklearn_compat.py
index e18eb7f4..798d9036 100644
--- a/test/test_sklearn_compat.py
+++ b/test/test_sklearn_compat.py
@@ -4,10 +4,9 @@
 from sklearn.base import TransformerMixin
 from sklearn.pipeline import make_pipeline
 from sklearn.utils import check_random_state
-from sklearn.utils.estimator_checks import is_public_parameter
-from sklearn.utils.testing import (assert_allclose_dense_sparse,
-                                   set_random_state)
-
+from metric_learn.sklearn_shims import (assert_allclose_dense_sparse,
+                                        set_random_state, _get_args,
+                                        is_public_parameter, get_scorer)
 from metric_learn import (Covariance, LFDA, LMNN, MLKR, NCA,
                           ITML_Supervised, LSML_Supervised,
                           MMC_Supervised, RCA_Supervised, SDML_Supervised,
@@ -16,8 +15,6 @@
 import numpy as np
 from sklearn.model_selection import (cross_val_score, cross_val_predict,
                                      train_test_split, KFold)
-from sklearn.metrics.scorer import get_scorer
-from sklearn.utils.testing import _get_args
 from test.test_utils import (metric_learners, ids_metric_learners,
                              mock_preprocessor, tuples_learners,
                              ids_tuples_learners, pairs_learners,
@@ -32,7 +29,7 @@ def __init__(self, n_components=None,
                chunk_size=2, preprocessor=None, random_state=None):
     # this init makes RCA stable for scikit-learn examples.
     super(Stable_RCA_Supervised, self).__init__(
-        num_chunks=2, n_components=n_components,
+        n_chunks=2, n_components=n_components,
         chunk_size=chunk_size, preprocessor=preprocessor,
         random_state=random_state)
 
@@ -40,49 +37,52 @@ def __init__(self, n_components=None,
 class Stable_SDML_Supervised(SDML_Supervised):
 
   def __init__(self, sparsity_param=0.01,
-               num_constraints=None, verbose=False, preprocessor=None,
+               n_constraints=None, verbose=False, preprocessor=None,
                random_state=None):
     # this init makes SDML stable for scikit-learn examples.
     super(Stable_SDML_Supervised, self).__init__(
         sparsity_param=sparsity_param,
-        num_constraints=num_constraints, verbose=verbose,
+        n_constraints=n_constraints, verbose=verbose,
         preprocessor=preprocessor, balance_param=1e-5, prior='identity',
         random_state=random_state)
 
 
 class TestSklearnCompat(unittest.TestCase):
   def test_covariance(self):
-    check_estimator(Covariance)
+    check_estimator(Covariance())
 
   def test_lmnn(self):
-    check_estimator(LMNN)
+    check_estimator(LMNN())
 
   def test_lfda(self):
-    check_estimator(LFDA)
+    check_estimator(LFDA())
 
   def test_mlkr(self):
-    check_estimator(MLKR)
+    check_estimator(MLKR())
 
   def test_nca(self):
-    check_estimator(NCA)
+    check_estimator(NCA())
 
   def test_lsml(self):
-    check_estimator(LSML_Supervised)
+    check_estimator(LSML_Supervised())
 
   def test_itml(self):
-    check_estimator(ITML_Supervised)
+    check_estimator(ITML_Supervised())
 
   def test_mmc(self):
-    check_estimator(MMC_Supervised)
+    check_estimator(MMC_Supervised())
 
   def test_sdml(self):
-    check_estimator(Stable_SDML_Supervised)
+    check_estimator(Stable_SDML_Supervised())
 
   def test_rca(self):
-    check_estimator(Stable_RCA_Supervised)
+    check_estimator(Stable_RCA_Supervised())
 
   def test_scml(self):
-    check_estimator(SCML_Supervised)
+    msg = "As no value for `n_basis` was selected, "
+    with pytest.warns(UserWarning) as raised_warning:
+      check_estimator(SCML_Supervised())
+    assert msg in str(raised_warning[0].message)
 
 
 RNG = check_random_state(0)
@@ -121,7 +121,8 @@ def test_array_like_inputs(estimator, build_dataset, with_preprocessor):
 
   # we subsample the data for the test to be more efficient
   input_data, _, labels, _ = train_test_split(input_data, labels,
-                                              train_size=20)
+                                              train_size=40,
+                                              random_state=42)
   X = X[:10]
 
   estimator = clone(estimator)
@@ -149,8 +150,19 @@ def test_array_like_inputs(estimator, build_dataset, with_preprocessor):
 
   pairs = np.array([[X[0], X[1]], [X[0], X[2]]])
   pairs_variants, _ = generate_array_like(pairs)
+
+  not_implemented_msg = ""
+  # Todo in 0.7.0: Change 'not_implemented_msg' for the message that says
+  # "This learner does not have pair_distance"
+
   for pairs_variant in pairs_variants:
-    estimator.score_pairs(pairs_variant)
+    estimator.pair_score(pairs_variant)  # All learners have pair_score
+
+    # But not all of them will have pair_distance
+    try:
+      estimator.pair_distance(pairs_variant)
+    except Exception as raised_exception:
+      assert raised_exception.value.args[0] == not_implemented_msg
 
 
 @pytest.mark.parametrize('with_preprocessor', [True, False])
@@ -160,7 +172,7 @@ def test_various_scoring_on_tuples_learners(estimator, build_dataset,
                                             with_preprocessor):
   """Tests that scikit-learn's scoring returns something finite,
   for other scoring than default scoring. (List of scikit-learn's scores can be
-  found in sklearn.metrics.scorer). For each type of output (predict,
+  found in sklearn.metrics._scorer). For each type of output (predict,
   predict_proba, decision_function), we test a bunch of scores.
   We only test on pairs learners because quadruplets don't have a y argument.
   """
@@ -226,7 +238,7 @@ def test_cross_validation_manual_vs_scikit(estimator, build_dataset,
     n_splits = 3
     kfold = KFold(shuffle=False, n_splits=n_splits)
     n_samples = input_data.shape[0]
-    fold_sizes = (n_samples // n_splits) * np.ones(n_splits, dtype=np.int)
+    fold_sizes = (n_samples // n_splits) * np.ones(n_splits, dtype=np.int64)
     fold_sizes[:n_samples % n_splits] += 1
     current = 0
     scores, predictions = [], np.zeros(input_data.shape[0])
diff --git a/test/test_triplets_classifiers.py b/test/test_triplets_classifiers.py
index 10393919..515a0a33 100644
--- a/test/test_triplets_classifiers.py
+++ b/test/test_triplets_classifiers.py
@@ -2,10 +2,16 @@
 from sklearn.exceptions import NotFittedError
 from sklearn.model_selection import train_test_split
 
-from test.test_utils import triplets_learners, ids_triplets_learners
-from sklearn.utils.testing import set_random_state
+from metric_learn import SCML
+from test.test_utils import (
+  triplets_learners,
+  ids_triplets_learners,
+  build_triplets
+)
+from metric_learn.sklearn_shims import set_random_state
 from sklearn import clone
 import numpy as np
+from numpy.testing import assert_array_equal
 
 
 @pytest.mark.parametrize('with_preprocessor', [True, False])
@@ -26,6 +32,49 @@ def test_predict_only_one_or_minus_one(estimator, build_dataset,
   assert len(not_valid) == 0
 
 
+@pytest.mark.parametrize('estimator, build_dataset', triplets_learners,
+                         ids=ids_triplets_learners)
+def test_no_zero_prediction(estimator, build_dataset):
+  """
+  Test that all predicted values are not zero, even when the
+  distance d(x,y) and d(x,z) is the same for a triplet of the
+  form (x, y, z). i.e border cases.
+  """
+  triplets, _, _, X = build_dataset(with_preprocessor=False)
+  # Force 3 dimentions only, to use cross product and get easy orthogonal vec.
+  triplets = np.array([[t[0][:3], t[1][:3], t[2][:3]] for t in triplets])
+  X = X[:, :3]
+  # Dummy fit
+  estimator = clone(estimator)
+  set_random_state(estimator)
+  estimator.fit(triplets)
+  # We force the transformation to be identity, to force euclidean distance
+  estimator.components_ = np.eye(X.shape[1])
+
+  # Get two orthogonal vectors in respect to X[1]
+  k = X[1] / np.linalg.norm(X[1])  # Normalize first vector
+  x = X[2] - X[2].dot(k) * k  # Get random orthogonal vector
+  x /= np.linalg.norm(x)  # Normalize
+  y = np.cross(k, x)  # Get orthogonal vector to x
+  # Assert these orthogonal vectors are different
+  with pytest.raises(AssertionError):
+    assert_array_equal(X[1], x)
+  with pytest.raises(AssertionError):
+    assert_array_equal(X[1], y)
+  # Assert the distance is the same for both
+  assert estimator.get_metric()(X[1], x) == estimator.get_metric()(X[1], y)
+
+  # Form the three scenarios where predict() gives 0 with numpy.sign
+  triplets_test = np.array(  # Critical examples
+    [[X[0], X[2], X[2]],
+     [X[1], X[1], X[1]],
+     [X[1], x, y]])
+  # Predict
+  predictions = estimator.predict(triplets_test)
+  # Check there are no zero values
+  assert np.sum(predictions == 0) == 0
+
+
 @pytest.mark.parametrize('with_preprocessor', [True, False])
 @pytest.mark.parametrize('estimator, build_dataset', triplets_learners,
                          ids=ids_triplets_learners)
@@ -63,3 +112,16 @@ def test_accuracy_toy_example(estimator, build_dataset):
   # we force the transformation to be identity so that we control what it does
   estimator.components_ = np.eye(X.shape[1])
   assert estimator.score(triplets_test) == 0.25
+
+
+def test_raise_big_number_of_features():
+  triplets, _, _, X = build_triplets(with_preprocessor=False)
+  triplets = triplets[:3, :, :]
+  estimator = SCML(n_basis=320)
+  set_random_state(estimator)
+  with pytest.raises(ValueError) as exc_info:
+    estimator.fit(triplets)
+  assert exc_info.value.args[0] == \
+         "Number of features (4) is greater than the number of triplets(3)." \
+         "\nConsider using dimensionality reduction or using another basis " \
+         "generation scheme."
diff --git a/test/test_utils.py b/test/test_utils.py
index fdcb864a..c0383792 100644
--- a/test/test_utils.py
+++ b/test/test_utils.py
@@ -1,3 +1,4 @@
+import warnings
 import pytest
 from scipy.linalg import eigh, pinvh
 from collections import namedtuple
@@ -5,7 +6,7 @@
 from numpy.testing import assert_array_equal, assert_equal
 from sklearn.model_selection import train_test_split
 from sklearn.utils import check_random_state, shuffle
-from sklearn.utils.testing import set_random_state
+from metric_learn.sklearn_shims import set_random_state
 from sklearn.base import clone
 from metric_learn._util import (check_input, make_context, preprocess_tuples,
                                 make_name, preprocess_points,
@@ -60,11 +61,11 @@ def build_regression(with_preprocessor=False):
 def build_data():
   input_data, labels = load_iris(return_X_y=True)
   X, y = shuffle(input_data, labels, random_state=SEED)
-  num_constraints = 50
+  n_constraints = 50
   constraints = Constraints(y)
   pairs = (
       constraints
-      .positive_negative_pairs(num_constraints, same_length=True,
+      .positive_negative_pairs(n_constraints, same_length=True,
                                random_state=check_random_state(SEED)))
   return X, pairs
 
@@ -117,7 +118,7 @@ def build_quadruplets(with_preprocessor=False):
                                 [learner for (learner, _) in
                                  quadruplets_learners]))
 
-triplets_learners = [(SCML(), build_triplets)]
+triplets_learners = [(SCML(n_basis=320), build_triplets)]
 ids_triplets_learners = list(map(lambda x: x.__class__.__name__,
                              [learner for (learner, _) in
                               triplets_learners]))
@@ -137,10 +138,10 @@ def build_quadruplets(with_preprocessor=False):
                (ITML_Supervised(max_iter=5), build_classification),
                (LSML_Supervised(), build_classification),
                (MMC_Supervised(max_iter=5), build_classification),
-               (RCA_Supervised(num_chunks=5), build_classification),
+               (RCA_Supervised(n_chunks=5), build_classification),
                (SDML_Supervised(prior='identity', balance_param=1e-5),
                build_classification),
-               (SCML_Supervised(), build_classification)]
+               (SCML_Supervised(n_basis=80), build_classification)]
 ids_classifiers = list(map(lambda x: x.__class__.__name__,
                            [learner for (learner, _) in
                             classifiers]))
@@ -353,7 +354,7 @@ def test_check_tuples_valid_tuple_size(tuple_size):
   checks that checking the number of tuples (pairs, quadruplets, etc) raises
   no warning if there is the right number of points in a tuple.
   """
-  with pytest.warns(None) as record:
+  with warnings.catch_warnings(record=True) as record:
     check_input(tuples_prep(), type_of_inputs='tuples',
                 preprocessor=mock_preprocessor, tuple_size=tuple_size)
     check_input(tuples_no_prep(), type_of_inputs='tuples', preprocessor=None,
@@ -378,7 +379,7 @@ def test_check_tuples_valid_tuple_size(tuple_size):
                                     [[2.6, 2.3], [3.4, 5.0]]])])
 def test_check_tuples_valid_with_preprocessor(tuples):
   """Test that valid inputs when using a preprocessor raises no warning"""
-  with pytest.warns(None) as record:
+  with warnings.catch_warnings(record=True) as record:
     check_input(tuples, type_of_inputs='tuples',
                 preprocessor=mock_preprocessor)
   assert len(record) == 0
@@ -399,7 +400,7 @@ def test_check_tuples_valid_with_preprocessor(tuples):
                            ((3, 1), (4, 4), (29, 4)))])
 def test_check_tuples_valid_without_preprocessor(tuples):
   """Test that valid inputs when using no preprocessor raises no warning"""
-  with pytest.warns(None) as record:
+  with warnings.catch_warnings(record=True) as record:
     check_input(tuples, type_of_inputs='tuples', preprocessor=None)
   assert len(record) == 0
 
@@ -408,12 +409,12 @@ def test_check_tuples_behaviour_auto_dtype():
   """Checks that check_tuples allows by default every type if using a
   preprocessor, and numeric types if using no preprocessor"""
   tuples_prep = [['img1.png', 'img2.png'], ['img3.png', 'img5.png']]
-  with pytest.warns(None) as record:
+  with warnings.catch_warnings(record=True) as record:
     check_input(tuples_prep, type_of_inputs='tuples',
                 preprocessor=mock_preprocessor)
   assert len(record) == 0
 
-  with pytest.warns(None) as record:
+  with warnings.catch_warnings(record=True) as record:
       check_input(tuples_no_prep(), type_of_inputs='tuples')  # numeric type
   assert len(record) == 0
 
@@ -549,7 +550,7 @@ def test_check_classic_invalid_dtype_not_convertible(preprocessor, points):
                                     [2.6, 2.3]])])
 def test_check_classic_valid_with_preprocessor(points):
   """Test that valid inputs when using a preprocessor raises no warning"""
-  with pytest.warns(None) as record:
+  with warnings.catch_warnings(record=True) as record:
     check_input(points, type_of_inputs='classic',
                 preprocessor=mock_preprocessor)
   assert len(record) == 0
@@ -570,7 +571,7 @@ def test_check_classic_valid_with_preprocessor(points):
                            (3, 1, 4, 4, 29, 4))])
 def test_check_classic_valid_without_preprocessor(points):
   """Test that valid inputs when using no preprocessor raises no warning"""
-  with pytest.warns(None) as record:
+  with warnings.catch_warnings(record=True) as record:
     check_input(points, type_of_inputs='classic', preprocessor=None)
   assert len(record) == 0
 
@@ -585,12 +586,12 @@ def test_check_classic_behaviour_auto_dtype():
   """Checks that check_input (for points) allows by default every type if
   using a preprocessor, and numeric types if using no preprocessor"""
   points_prep = ['img1.png', 'img2.png', 'img3.png', 'img5.png']
-  with pytest.warns(None) as record:
+  with warnings.catch_warnings(record=True) as record:
     check_input(points_prep, type_of_inputs='classic',
                 preprocessor=mock_preprocessor)
   assert len(record) == 0
 
-  with pytest.warns(None) as record:
+  with warnings.catch_warnings(record=True) as record:
       check_input(points_no_prep(), type_of_inputs='classic')  # numeric type
   assert len(record) == 0
 
@@ -834,9 +835,9 @@ def test_error_message_tuple_size(estimator, _):
 
 @pytest.mark.parametrize('estimator, _', metric_learners,
                          ids=ids_metric_learners)
-def test_error_message_t_score_pairs(estimator, _):
-  """tests that if you want to score_pairs on triplets for instance, it returns
-  the right error message
+def test_error_message_t_pair_distance_or_score(estimator, _):
+  """Tests that if you want to pair_distance or pair_score on triplets
+  for instance, it returns the right error message
   """
   estimator = clone(estimator)
   set_random_state(estimator)
@@ -844,12 +845,22 @@ def test_error_message_t_score_pairs(estimator, _):
   triplets = np.array([[[1.3, 6.3], [3., 6.8], [6.5, 4.4]],
                        [[1.9, 5.3], [1., 7.8], [3.2, 1.2]]])
   with pytest.raises(ValueError) as raised_err:
-    estimator.score_pairs(triplets)
+    estimator.pair_score(triplets)
   expected_msg = ("Tuples of 2 element(s) expected{}. Got tuples of 3 "
                   "element(s) instead (shape=(2, 3, 2)):\ninput={}.\n"
                   .format(make_context(estimator), triplets))
   assert str(raised_err.value) == expected_msg
 
+  not_implemented_msg = ""
+  # Todo in 0.7.0: Change 'not_implemented_msg' for the message that says
+  # "This learner does not have pair_distance"
+
+  # One exception will trigger for sure
+  with pytest.raises(Exception) as raised_exception:
+      estimator.pair_distance(triplets)
+  err_value = raised_exception.value.args[0]
+  assert err_value == expected_msg or err_value == not_implemented_msg
+
 
 def test_preprocess_tuples_simple_example():
   """Test the preprocessor on a very simple example of tuples to ensure the
@@ -930,31 +941,59 @@ def test_same_with_or_without_preprocessor(estimator, build_dataset):
                                         method)(formed_test)
       assert np.array(output_with_prep == output_with_prep_formed).all()
 
-  # test score_pairs
-  output_with_prep = estimator_with_preprocessor.score_pairs(
-      indicators_to_transform[[[[0, 2], [5, 3]]]])
-  output_without_prep = estimator_without_preprocessor.score_pairs(
-      formed_points_to_transform[[[[0, 2], [5, 3]]]])
+  # Test pair_score, all learners have it.
+  idx1 = np.array([[0, 2], [5, 3]], dtype=int)
+  output_with_prep = estimator_with_preprocessor.pair_score(
+      indicators_to_transform[idx1])
+  output_without_prep = estimator_without_preprocessor.pair_score(
+      formed_points_to_transform[idx1])
   assert np.array(output_with_prep == output_without_prep).all()
 
-  output_with_prep = estimator_with_preprocessor.score_pairs(
-      indicators_to_transform[[[[0, 2], [5, 3]]]])
-  output_without_prep = estimator_with_prep_formed.score_pairs(
-      formed_points_to_transform[[[[0, 2], [5, 3]]]])
+  output_with_prep = estimator_with_preprocessor.pair_score(
+      indicators_to_transform[idx1])
+  output_without_prep = estimator_with_prep_formed.pair_score(
+      formed_points_to_transform[idx1])
   assert np.array(output_with_prep == output_without_prep).all()
 
-  # test transform
-  output_with_prep = estimator_with_preprocessor.transform(
-      indicators_to_transform)
-  output_without_prep = estimator_without_preprocessor.transform(
-      formed_points_to_transform)
-  assert np.array(output_with_prep == output_without_prep).all()
-
-  output_with_prep = estimator_with_preprocessor.transform(
-      indicators_to_transform)
-  output_without_prep = estimator_with_prep_formed.transform(
-      formed_points_to_transform)
-  assert np.array(output_with_prep == output_without_prep).all()
+  # Test pair_distance
+  not_implemented_msg = ""
+  # Todo in 0.7.0: Change 'not_implemented_msg' for the message that says
+  # "This learner does not have pair_distance"
+  try:
+    output_with_prep = estimator_with_preprocessor.pair_distance(
+        indicators_to_transform[idx1])
+    output_without_prep = estimator_without_preprocessor.pair_distance(
+        formed_points_to_transform[idx1])
+    assert np.array(output_with_prep == output_without_prep).all()
+
+    output_with_prep = estimator_with_preprocessor.pair_distance(
+        indicators_to_transform[idx1])
+    output_without_prep = estimator_with_prep_formed.pair_distance(
+        formed_points_to_transform[idx1])
+    assert np.array(output_with_prep == output_without_prep).all()
+
+  except Exception as raised_exception:
+    assert raised_exception.value.args[0] == not_implemented_msg
+
+  # Test transform
+  not_implemented_msg = ""
+  # Todo in 0.7.0: Change 'not_implemented_msg' for the message that says
+  # "This learner does not have transform"
+  try:
+    output_with_prep = estimator_with_preprocessor.transform(
+        indicators_to_transform)
+    output_without_prep = estimator_without_preprocessor.transform(
+        formed_points_to_transform)
+    assert np.array(output_with_prep == output_without_prep).all()
+
+    output_with_prep = estimator_with_preprocessor.transform(
+        indicators_to_transform)
+    output_without_prep = estimator_with_prep_formed.transform(
+        formed_points_to_transform)
+    assert np.array(output_with_prep == output_without_prep).all()
+
+  except Exception as raised_exception:
+    assert raised_exception.value.args[0] == not_implemented_msg
 
 
 def test_check_collapsed_pairs_raises_no_error():
@@ -1055,6 +1094,53 @@ def test__check_sdp_from_eigen_returns_definiteness(w, is_definite):
   assert _check_sdp_from_eigen(w) == is_definite
 
 
+@pytest.mark.unit
+@pytest.mark.parametrize('w, tol, is_definite',
+                         [(np.array([5., 3.]), 2, True),
+                          (np.array([5., 1.]), 2, False),
+                          (np.array([5., -1.]), 2, False)])
+def test__check_sdp_from_eigen_tol_psd(w, tol, is_definite):
+  """Tests that _check_sdp_from_eigen, for PSD matrices, returns
+  False if an eigenvalue is lower than tol"""
+  assert _check_sdp_from_eigen(w, tol=tol) == is_definite
+
+
+@pytest.mark.unit
+@pytest.mark.parametrize('w, tol',
+                         [(np.array([5., -3.]), 2),
+                          (np.array([1., -3.]), 2)])
+def test__check_sdp_from_eigen_tol_non_psd(w, tol):
+  """Tests that _check_sdp_from_eigen raises a NonPSDError
+  when there is a negative value with abs value higher than tol"""
+  with pytest.raises(NonPSDError):
+    _check_sdp_from_eigen(w, tol=tol)
+
+
+@pytest.mark.unit
+@pytest.mark.parametrize('w, is_definite',
+                         [(np.array([1e5, 1e5, 1e5, 1e5,
+                                     1e5, 1e5, 1e-20]), False),
+                          (np.array([1e-10, 1e-10]), True)])
+def test__check_sdp_from_eigen_tol_default_psd(w, is_definite):
+  """Tests that the default tol argument gives good results for edge cases
+  like even if the determinant is high but clearly one eigenvalue is low,
+  (undefinite so returns False) or when all eigenvalues are low (definite so
+  returns True)"""
+  assert _check_sdp_from_eigen(w, tol=None) == is_definite
+
+
+@pytest.mark.unit
+@pytest.mark.parametrize('w',
+                         [np.array([1., -1.]),
+                          np.array([-1e-10, 1e-10])])
+def test__check_sdp_from_eigen_tol_default_non_psd(w):
+  """Tests that the default tol argument is good for raising
+  NonPSDError, e.g. that when a value is clearly relatively
+  negative it raises such an error"""
+  with pytest.raises(NonPSDError):
+    _check_sdp_from_eigen(w, tol=None)
+
+
 def test__check_n_components():
   """Checks that n_components returns what is expected
   (including the errors)"""