From 8ad7472b3ca7affec1cbd867121625722efc2fb5 Mon Sep 17 00:00:00 2001
From: Jonas Seiler <jse@plista.com>
Date: Mon, 19 May 2014 14:36:51 +0200
Subject: [PATCH 1/6] Allow multiple input feature with the same value for
 gaussian processes

---
 sklearn/gaussian_process/gaussian_process.py | 15 ++++-----------
 1 file changed, 4 insertions(+), 11 deletions(-)

diff --git a/sklearn/gaussian_process/gaussian_process.py b/sklearn/gaussian_process/gaussian_process.py
index 2c630ef20e747..3d08a61798e73 100644
--- a/sklearn/gaussian_process/gaussian_process.py
+++ b/sklearn/gaussian_process/gaussian_process.py
@@ -296,10 +296,6 @@ def fit(self, X, y):
 
         # Calculate matrix of distances D between samples
         D, ij = l1_cross_distances(X)
-        if (np.min(np.sum(D, axis=1)) == 0.
-                and self.corr != correlation.pure_nugget):
-            raise Exception("Multiple input features cannot have the same"
-                            " value.")
 
         # Regression matrix and parameters
         F = self.regr(X)
@@ -586,9 +582,6 @@ def reduced_likelihood_function(self, theta=None):
         if D is None:
             # Light storage mode (need to recompute D, ij and F)
             D, ij = l1_cross_distances(self.X)
-            if (np.min(np.sum(D, axis=1)) == 0.
-                    and self.corr != correlation.pure_nugget):
-                raise Exception("Multiple X are not allowed")
             F = self.regr(self.X)
 
         # Set up R
@@ -715,8 +708,8 @@ def minus_reduced_likelihood_function(log10t):
                     # Generate a random starting point log10-uniformly
                     # distributed between bounds
                     log10theta0 = np.log10(self.thetaL) \
-                        + rand(self.theta0.size).reshape(self.theta0.shape) \
-                        * np.log10(self.thetaU / self.thetaL)
+                                  + rand(self.theta0.size).reshape(self.theta0.shape) \
+                                  * np.log10(self.thetaU / self.thetaL)
                     theta0 = 10. ** log10theta0
 
                 # Run Cobyla
@@ -788,7 +781,7 @@ def corr_cut(t, d):
                     return corr(array2d(np.hstack([optimal_theta[0][0:i],
                                                    t[0],
                                                    optimal_theta[0][(i + 1)::]]
-                                                  )), d)
+                    )), d)
 
                 self.corr = corr_cut
                 optimal_theta[0, i], optimal_rlf_value, optimal_par = \
@@ -873,7 +866,7 @@ def _check_params(self, n_samples=None):
         if np.any(self.nugget) < 0.:
             raise ValueError("nugget must be positive or zero.")
         if (n_samples is not None
-                and self.nugget.shape not in [(), (n_samples,)]):
+            and self.nugget.shape not in [(), (n_samples,)]):
             raise ValueError("nugget must be either a scalar "
                              "or array of length n_samples.")
 

From 5d6bd5ad5c2c76b2478fca91197ec2baf332432a Mon Sep 17 00:00:00 2001
From: Jonas Seiler <jse@plista.com>
Date: Wed, 21 May 2014 14:00:11 +0200
Subject: [PATCH 2/6] do not allow multiple feature for non-noisy data.
 Unittest for noisy data #3162

---
 sklearn/gaussian_process/gaussian_process.py  |  17 ++-
 .../tests/test_gaussian_process.py            | 139 +++++++++++++++---
 2 files changed, 131 insertions(+), 25 deletions(-)

diff --git a/sklearn/gaussian_process/gaussian_process.py b/sklearn/gaussian_process/gaussian_process.py
index 3d08a61798e73..e63998cc712d2 100644
--- a/sklearn/gaussian_process/gaussian_process.py
+++ b/sklearn/gaussian_process/gaussian_process.py
@@ -16,6 +16,7 @@
 from . import correlation_models as correlation
 
 MACHINE_EPSILON = np.finfo(np.double).eps
+DEFAULT_NUGGET = 10. * MACHINE_EPSILON
 
 
 def l1_cross_distances(X):
@@ -221,7 +222,7 @@ def __init__(self, regr='constant', corr='squared_exponential', beta0=None,
                  storage_mode='full', verbose=False, theta0=1e-1,
                  thetaL=None, thetaU=None, optimizer='fmin_cobyla',
                  random_start=1, normalize=True,
-                 nugget=10. * MACHINE_EPSILON, random_state=None):
+                 nugget=DEFAULT_NUGGET, random_state=None):
 
         self.regr = regr
         self.corr = corr
@@ -296,6 +297,9 @@ def fit(self, X, y):
 
         # Calculate matrix of distances D between samples
         D, ij = l1_cross_distances(X)
+        if self._prohibitMultipleInputFeature(D):
+            raise Exception("Multiple input features cannot have the same"
+                            " value for non-noisy data.")
 
         # Regression matrix and parameters
         F = self.regr(X)
@@ -521,6 +525,15 @@ def predict(self, X, eval_MSE=False, batch_size=None):
 
                 return y
 
+    def _prohibitMultipleInputFeature(self, D):
+        """
+        Multiple input features are only allowed for noisy data
+        """
+        return self.nugget <= DEFAULT_NUGGET and \
+               np.min(np.sum(D, axis=1)) == 0. and \
+               self.corr != correlation.pure_nugget
+
+
     def reduced_likelihood_function(self, theta=None):
         """
         This function determines the BLUP parameters and evaluates the reduced
@@ -582,6 +595,8 @@ def reduced_likelihood_function(self, theta=None):
         if D is None:
             # Light storage mode (need to recompute D, ij and F)
             D, ij = l1_cross_distances(self.X)
+            if self._prohibitMultipleInputFeature(D):
+                raise Exception("Multiple X only make sense for noisy data")
             F = self.regr(self.X)
 
         # Set up R
diff --git a/sklearn/gaussian_process/tests/test_gaussian_process.py b/sklearn/gaussian_process/tests/test_gaussian_process.py
index 61d617690edb9..efc3f1511bc1d 100644
--- a/sklearn/gaussian_process/tests/test_gaussian_process.py
+++ b/sklearn/gaussian_process/tests/test_gaussian_process.py
@@ -9,6 +9,9 @@
 from nose.tools import assert_true
 
 import numpy as np
+import scipy.stats as ss
+
+import pylab as pl
 
 from sklearn.gaussian_process import GaussianProcess
 from sklearn.gaussian_process import regression_models as regression
@@ -39,7 +42,7 @@ def test_1d(regr=regression.constant, corr=correlation.squared_exponential,
                 and np.allclose(MSE2, 0., atol=10))
 
 
-def test_2d(regr=regression.constant, corr=correlation.squared_exponential,
+def check2d(X, regr=regression.constant, corr=correlation.squared_exponential,
             random_start=10, beta0=None):
     """
     MLE estimation of a two-dimensional Gaussian Process model accounting for
@@ -49,14 +52,7 @@ def test_2d(regr=regression.constant, corr=correlation.squared_exponential,
     """
     b, kappa, e = 5., .5, .1
     g = lambda x: b - x[:, 1] - kappa * (x[:, 0] - e) ** 2.
-    X = np.array([[-4.61611719, -6.00099547],
-                  [4.10469096, 5.32782448],
-                  [0.00000000, -0.50000000],
-                  [-6.17289014, -4.6984743],
-                  [1.3109306, -6.93271427],
-                  [-5.03823144, 3.10584743],
-                  [-2.87600388, 6.74310541],
-                  [5.21301203, 4.26386883]])
+
     y = g(X).ravel()
     gp = GaussianProcess(regr=regr, corr=corr, beta0=beta0,
                          theta0=[1e-2] * 2, thetaL=[1e-4] * 2,
@@ -68,7 +64,7 @@ def test_2d(regr=regression.constant, corr=correlation.squared_exponential,
     assert_true(np.allclose(y_pred, y) and np.allclose(MSE, 0.))
 
 
-def test_2d_2d(regr=regression.constant, corr=correlation.squared_exponential,
+def check2d_2d(X, regr=regression.constant, corr=correlation.squared_exponential,
                random_start=10, beta0=None):
     """
     MLE estimation of a two-dimensional Gaussian Process model accounting for
@@ -79,14 +75,7 @@ def test_2d_2d(regr=regression.constant, corr=correlation.squared_exponential,
     b, kappa, e = 5., .5, .1
     g = lambda x: b - x[:, 1] - kappa * (x[:, 0] - e) ** 2.
     f = lambda x: np.vstack((g(x), g(x))).T
-    X = np.array([[-4.61611719, -6.00099547],
-                  [4.10469096, 5.32782448],
-                  [0.00000000, -0.50000000],
-                  [-6.17289014, -4.6984743],
-                  [1.3109306, -6.93271427],
-                  [-5.03823144, 3.10584743],
-                  [-2.87600388, 6.74310541],
-                  [5.21301203, 4.26386883]])
+
     y = f(X)
     gp = GaussianProcess(regr=regr, corr=corr, beta0=beta0,
                          theta0=[1e-2] * 2, thetaL=[1e-4] * 2,
@@ -112,10 +101,19 @@ def test_more_builtin_correlation_models(random_start=1):
     all_corr = ['absolute_exponential', 'squared_exponential', 'cubic',
                 'linear']
 
+    X = np.array([[-4.61611719, -6.00099547],
+                  [4.10469096, 5.32782448],
+                  [0.00000000, -0.50000000],
+                  [-6.17289014, -4.6984743],
+                  [1.3109306, -6.93271427],
+                  [-5.03823144, 3.10584743],
+                  [-2.87600388, 6.74310541],
+                  [5.21301203, 4.26386883]])
+
     for corr in all_corr:
         test_1d(regr='constant', corr=corr, random_start=random_start)
-        test_2d(regr='constant', corr=corr, random_start=random_start)
-        test_2d_2d(regr='constant', corr=corr, random_start=random_start)
+        check2d(X, regr='constant', corr=corr, random_start=random_start)
+        check2d_2d(X, regr='constant', corr=corr, random_start=random_start)
 
 
 def test_ordinary_kriging():
@@ -123,15 +121,108 @@ def test_ordinary_kriging():
     Repeat test_1d and test_2d with given regression weights (beta0) for
     different regression models (Ordinary Kriging).
     """
+
+    X = np.array([[-4.61611719, -6.00099547],
+                  [4.10469096, 5.32782448],
+                  [0.00000000, -0.50000000],
+                  [-6.17289014, -4.6984743],
+                  [1.3109306, -6.93271427],
+                  [-5.03823144, 3.10584743],
+                  [-2.87600388, 6.74310541],
+                  [5.21301203, 4.26386883]])
+
     test_1d(regr='linear', beta0=[0., 0.5])
     test_1d(regr='quadratic', beta0=[0., 0.5, 0.5])
-    test_2d(regr='linear', beta0=[0., 0.5, 0.5])
-    test_2d(regr='quadratic', beta0=[0., 0.5, 0.5, 0.5, 0.5, 0.5])
-    test_2d_2d(regr='linear', beta0=[0., 0.5, 0.5])
-    test_2d_2d(regr='quadratic', beta0=[0., 0.5, 0.5, 0.5, 0.5, 0.5])
+    check2d(X, regr='linear', beta0=[0., 0.5, 0.5])
+    check2d(X, regr='quadratic', beta0=[0., 0.5, 0.5, 0.5, 0.5, 0.5])
+    check2d_2d(X, regr='linear', beta0=[0., 0.5, 0.5])
+    check2d_2d(X, regr='quadratic', beta0=[0., 0.5, 0.5, 0.5, 0.5, 0.5])
 
 
 def test_no_normalize():
     gp = GaussianProcess(normalize=False).fit(X, y)
     y_pred = gp.predict(X)
     assert_true(np.allclose(y_pred, y))
+
+
+@raises(Exception)
+def test_no_multiple_feature():
+    '''
+    check that multiple features are not allowed for non-noisy data
+    '''
+    X = np.array([[-4.61611719, -6.00099547],
+                  [4.10469096, 5.32782448],
+                  [0.00000000, -0.50000000],
+                  [-6.17289014, -4.6984743],
+                  [-6.17289014, -4.6984743],
+                  [1.3109306, -6.93271427],
+                  [-5.03823144, 3.10584743],
+                  [-2.87600388, 6.74310541],
+                  [5.21301203, 4.26386883]])
+
+    check2d(X)
+
+
+def check2dNoisy(X, regr=regression.constant, corr=correlation.squared_exponential,
+                 random_start=10, beta0=None):
+    """
+    MLE estimation of a two-dimensional Gaussian Process model accounting for
+    anisotropy. Check random start optimization.
+
+    Test the interpolating property.
+    """
+    b, kappa, e = 5., .5, .1
+    g = lambda x: b - x[:, 1] - kappa * (x[:, 0] - e) ** 2.
+
+    y = g(X).ravel() + np.random.normal(0, 0.1, X.shape[0])
+    gp = GaussianProcess(regr=regr, corr=corr, beta0=beta0,
+                         theta0=[1e-2] * 2, thetaL=[1e-4] * 2,
+                         thetaU=[1.] * 2,
+                         random_start=random_start, verbose=False, nugget=0.1)
+    gp.fit(X, y)
+    y_pred, MSE = gp.predict(X, eval_MSE=True)
+
+    fig = pl.figure()
+    pl.plot(X, f(X), 'r:', label=u'$f(x) = x\,\sin(x)$')
+    pl.plot(X, y, 'r.', markersize=10, label=u'Observations')
+    pl.plot(X, y_pred, 'b-', label=u'Prediction')
+    pl.fill(np.concatenate([X, X[::-1]]),
+            np.concatenate([y_pred - 1.9600 * MSE,
+                            (y_pred + 1.9600 * MSE)[::-1]]),
+            alpha=.5, fc='b', ec='None', label='95% confidence interval')
+    pl.xlabel('$x$')
+    pl.ylabel('$f(x)$')
+    pl.ylim(-10, 10)
+    pl.legend(loc='upper left')
+    pl.show()
+
+    print((np.abs(y_pred - y) <= ss.norm.ppf(0.999, y_pred, np.sqrt(MSE))))
+
+    assert_true((np.abs(y_pred - y) <= ss.norm.ppf(0.999, y_pred, np.sqrt(
+        MSE))).all())  #check that difference between prediction and truth is smaller than 99.9% conf int.
+
+
+def test_1d_noisy(regr=regression.constant, corr=correlation.absolute_exponential,
+                  random_start=10, beta0=None):
+    """
+    MLE estimation of a one-dimensional Gaussian Process model.
+    Check random start optimization with noisy / duplicate inputs.
+
+    Test the interpolating property.
+    """
+
+    X = np.atleast_2d([1., 3., 5., 6., 7., 8., 9., 10.] * 2).T
+    x = np.atleast_2d(np.linspace(0, 10, 50)).T
+
+    y = f(X).ravel() + np.random.normal(0, 0.1, len(X))
+
+    gp = GaussianProcess(regr=regr, corr=corr, beta0=beta0,
+                         theta0=1e-2, thetaL=1e-4, thetaU=1e-1,
+                         random_start=random_start, verbose=False, nugget=0.01).fit(X, y)
+    y_pred, MSE = gp.predict(x, eval_MSE=True)
+
+    y = f(x).ravel()
+    assert_true((np.abs(y_pred - y) <= np.abs(
+        ss.norm.ppf(0.025, y_pred, np.sqrt(MSE)))  ).all())  #check that true value is within 95% conf. int.
+
+

From d183e210adafa34f4dc48647fb512bcce336e96b Mon Sep 17 00:00:00 2001
From: Jonas Seiler <jse@plista.com>
Date: Wed, 21 May 2014 14:05:23 +0200
Subject: [PATCH 3/6] remove unused code

---
 .../tests/test_gaussian_process.py            | 39 -------------------
 1 file changed, 39 deletions(-)

diff --git a/sklearn/gaussian_process/tests/test_gaussian_process.py b/sklearn/gaussian_process/tests/test_gaussian_process.py
index efc3f1511bc1d..86f7ea438cd2d 100644
--- a/sklearn/gaussian_process/tests/test_gaussian_process.py
+++ b/sklearn/gaussian_process/tests/test_gaussian_process.py
@@ -163,45 +163,6 @@ def test_no_multiple_feature():
     check2d(X)
 
 
-def check2dNoisy(X, regr=regression.constant, corr=correlation.squared_exponential,
-                 random_start=10, beta0=None):
-    """
-    MLE estimation of a two-dimensional Gaussian Process model accounting for
-    anisotropy. Check random start optimization.
-
-    Test the interpolating property.
-    """
-    b, kappa, e = 5., .5, .1
-    g = lambda x: b - x[:, 1] - kappa * (x[:, 0] - e) ** 2.
-
-    y = g(X).ravel() + np.random.normal(0, 0.1, X.shape[0])
-    gp = GaussianProcess(regr=regr, corr=corr, beta0=beta0,
-                         theta0=[1e-2] * 2, thetaL=[1e-4] * 2,
-                         thetaU=[1.] * 2,
-                         random_start=random_start, verbose=False, nugget=0.1)
-    gp.fit(X, y)
-    y_pred, MSE = gp.predict(X, eval_MSE=True)
-
-    fig = pl.figure()
-    pl.plot(X, f(X), 'r:', label=u'$f(x) = x\,\sin(x)$')
-    pl.plot(X, y, 'r.', markersize=10, label=u'Observations')
-    pl.plot(X, y_pred, 'b-', label=u'Prediction')
-    pl.fill(np.concatenate([X, X[::-1]]),
-            np.concatenate([y_pred - 1.9600 * MSE,
-                            (y_pred + 1.9600 * MSE)[::-1]]),
-            alpha=.5, fc='b', ec='None', label='95% confidence interval')
-    pl.xlabel('$x$')
-    pl.ylabel('$f(x)$')
-    pl.ylim(-10, 10)
-    pl.legend(loc='upper left')
-    pl.show()
-
-    print((np.abs(y_pred - y) <= ss.norm.ppf(0.999, y_pred, np.sqrt(MSE))))
-
-    assert_true((np.abs(y_pred - y) <= ss.norm.ppf(0.999, y_pred, np.sqrt(
-        MSE))).all())  #check that difference between prediction and truth is smaller than 99.9% conf int.
-
-
 def test_1d_noisy(regr=regression.constant, corr=correlation.absolute_exponential,
                   random_start=10, beta0=None):
     """

From 3240ae1dce0116d2f9f43bd6968f23bb389647b1 Mon Sep 17 00:00:00 2001
From: Jonas Seiler <jse@plista.com>
Date: Thu, 22 May 2014 09:52:37 +0200
Subject: [PATCH 4/6] remvoe unused import

---
 .../gaussian_process/tests/test_gaussian_process.py   | 11 ++++-------
 1 file changed, 4 insertions(+), 7 deletions(-)

diff --git a/sklearn/gaussian_process/tests/test_gaussian_process.py b/sklearn/gaussian_process/tests/test_gaussian_process.py
index 86f7ea438cd2d..c81e8df70932c 100644
--- a/sklearn/gaussian_process/tests/test_gaussian_process.py
+++ b/sklearn/gaussian_process/tests/test_gaussian_process.py
@@ -4,6 +4,7 @@
 
 # Author: Vincent Dubourg <vincent.dubourg@gmail.com>
 # Licence: BSD 3 clause
+from macpath import norm_error
 
 from nose.tools import raises
 from nose.tools import assert_true
@@ -11,8 +12,6 @@
 import numpy as np
 import scipy.stats as ss
 
-import pylab as pl
-
 from sklearn.gaussian_process import GaussianProcess
 from sklearn.gaussian_process import regression_models as regression
 from sklearn.gaussian_process import correlation_models as correlation
@@ -65,7 +64,7 @@ def check2d(X, regr=regression.constant, corr=correlation.squared_exponential,
 
 
 def check2d_2d(X, regr=regression.constant, corr=correlation.squared_exponential,
-               random_start=10, beta0=None):
+               random_start=10, beta0=None, theta0=[1e-2] * 2, thetaL=[1e-4] * 2, thetaU=[1e-1] * 2):
     """
     MLE estimation of a two-dimensional Gaussian Process model accounting for
     anisotropy. Check random start optimization.
@@ -78,8 +77,8 @@ def check2d_2d(X, regr=regression.constant, corr=correlation.squared_exponential
 
     y = f(X)
     gp = GaussianProcess(regr=regr, corr=corr, beta0=beta0,
-                         theta0=[1e-2] * 2, thetaL=[1e-4] * 2,
-                         thetaU=[1e-1] * 2,
+                         theta0=theta0, thetaL=thetaL,
+                         thetaU=thetaU,
                          random_start=random_start, verbose=False)
     gp.fit(X, y)
     y_pred, MSE = gp.predict(X, eval_MSE=True)
@@ -185,5 +184,3 @@ def test_1d_noisy(regr=regression.constant, corr=correlation.absolute_exponentia
     y = f(x).ravel()
     assert_true((np.abs(y_pred - y) <= np.abs(
         ss.norm.ppf(0.025, y_pred, np.sqrt(MSE)))  ).all())  #check that true value is within 95% conf. int.
-
-

From 3fb6e7366b5363dc5e6c6f7e2e175281645f6049 Mon Sep 17 00:00:00 2001
From: Jonas Seiler <jse@plista.com>
Date: Thu, 22 May 2014 10:36:17 +0200
Subject: [PATCH 5/6] simpler assertion in gp test

---
 sklearn/gaussian_process/tests/test_gaussian_process.py | 6 ++----
 1 file changed, 2 insertions(+), 4 deletions(-)

diff --git a/sklearn/gaussian_process/tests/test_gaussian_process.py b/sklearn/gaussian_process/tests/test_gaussian_process.py
index c81e8df70932c..b5460718192d7 100644
--- a/sklearn/gaussian_process/tests/test_gaussian_process.py
+++ b/sklearn/gaussian_process/tests/test_gaussian_process.py
@@ -4,8 +4,6 @@
 
 # Author: Vincent Dubourg <vincent.dubourg@gmail.com>
 # Licence: BSD 3 clause
-from macpath import norm_error
-
 from nose.tools import raises
 from nose.tools import assert_true
 
@@ -182,5 +180,5 @@ def test_1d_noisy(regr=regression.constant, corr=correlation.absolute_exponentia
     y_pred, MSE = gp.predict(x, eval_MSE=True)
 
     y = f(x).ravel()
-    assert_true((np.abs(y_pred - y) <= np.abs(
-        ss.norm.ppf(0.025, y_pred, np.sqrt(MSE)))  ).all())  #check that true value is within 95% conf. int.
+    assert_true((np.abs(y_pred - y) <= (1.96 * MSE)  ).all())  #check that true value is within 95% conf. int.
+

From b3e93744f4fa8d261b3fbd930401cc9ae212e903 Mon Sep 17 00:00:00 2001
From: Jonas Seiler <jse@plista.com>
Date: Fri, 20 Jun 2014 13:32:21 +0200
Subject: [PATCH 6/6] small cosmetic fixes

---
 sklearn/gaussian_process/gaussian_process.py     | 16 +++++++---------
 .../tests/test_gaussian_process.py               |  7 +++----
 2 files changed, 10 insertions(+), 13 deletions(-)

diff --git a/sklearn/gaussian_process/gaussian_process.py b/sklearn/gaussian_process/gaussian_process.py
index e63998cc712d2..25acc81df27b2 100644
--- a/sklearn/gaussian_process/gaussian_process.py
+++ b/sklearn/gaussian_process/gaussian_process.py
@@ -297,9 +297,8 @@ def fit(self, X, y):
 
         # Calculate matrix of distances D between samples
         D, ij = l1_cross_distances(X)
-        if self._prohibitMultipleInputFeature(D):
-            raise Exception("Multiple input features cannot have the same"
-                            " value for non-noisy data.")
+        if self._repeated_samples_prohibited(D):
+            raise ValueError("Repeated samples (rows in X) are not allowed for noise-free models")
 
         # Regression matrix and parameters
         F = self.regr(X)
@@ -525,13 +524,12 @@ def predict(self, X, eval_MSE=False, batch_size=None):
 
                 return y
 
-    def _prohibitMultipleInputFeature(self, D):
+    def _repeated_samples_prohibited(self, D):
         """
         Multiple input features are only allowed for noisy data
         """
-        return self.nugget <= DEFAULT_NUGGET and \
-               np.min(np.sum(D, axis=1)) == 0. and \
-               self.corr != correlation.pure_nugget
+        return self.nugget <= DEFAULT_NUGGET and np.min(
+            np.sum(D, axis=1)) == 0. and self.corr != correlation.pure_nugget
 
 
     def reduced_likelihood_function(self, theta=None):
@@ -595,8 +593,8 @@ def reduced_likelihood_function(self, theta=None):
         if D is None:
             # Light storage mode (need to recompute D, ij and F)
             D, ij = l1_cross_distances(self.X)
-            if self._prohibitMultipleInputFeature(D):
-                raise Exception("Multiple X only make sense for noisy data")
+            if self._repeated_samples_prohibited(D):
+                raise ValueError("Repeated samples (rows in X) are not allowed for noise-free models")
             F = self.regr(self.X)
 
         # Set up R
diff --git a/sklearn/gaussian_process/tests/test_gaussian_process.py b/sklearn/gaussian_process/tests/test_gaussian_process.py
index b5460718192d7..96aab3a4f9b0e 100644
--- a/sklearn/gaussian_process/tests/test_gaussian_process.py
+++ b/sklearn/gaussian_process/tests/test_gaussian_process.py
@@ -8,7 +8,6 @@
 from nose.tools import assert_true
 
 import numpy as np
-import scipy.stats as ss
 
 from sklearn.gaussian_process import GaussianProcess
 from sklearn.gaussian_process import regression_models as regression
@@ -142,7 +141,7 @@ def test_no_normalize():
     assert_true(np.allclose(y_pred, y))
 
 
-@raises(Exception)
+@raises(ValueError)
 def test_no_multiple_feature():
     '''
     check that multiple features are not allowed for non-noisy data
@@ -172,7 +171,8 @@ def test_1d_noisy(regr=regression.constant, corr=correlation.absolute_exponentia
     X = np.atleast_2d([1., 3., 5., 6., 7., 8., 9., 10.] * 2).T
     x = np.atleast_2d(np.linspace(0, 10, 50)).T
 
-    y = f(X).ravel() + np.random.normal(0, 0.1, len(X))
+    rs = np.random.RandomState(0)
+    y = f(X).ravel() + rs.normal(0, 0.1, len(X))
 
     gp = GaussianProcess(regr=regr, corr=corr, beta0=beta0,
                          theta0=1e-2, thetaL=1e-4, thetaU=1e-1,
@@ -181,4 +181,3 @@ def test_1d_noisy(regr=regression.constant, corr=correlation.absolute_exponentia
 
     y = f(x).ravel()
     assert_true((np.abs(y_pred - y) <= (1.96 * MSE)  ).all())  #check that true value is within 95% conf. int.
-