diff --git a/examples/model_selection/gp_search.py b/examples/model_selection/gp_search.py
new file mode 100644
index 0000000000000..5005d3a1b023d
--- /dev/null
+++ b/examples/model_selection/gp_search.py
@@ -0,0 +1,246 @@
+from sklearn.datasets import load_digits
+from sklearn.gp_search import GPSearchCV
+from sklearn.ensemble import RandomForestClassifier
+from sklearn.datasets import fetch_20newsgroups
+from sklearn.feature_extraction.text import CountVectorizer
+from sklearn.feature_extraction.text import TfidfTransformer
+from sklearn.linear_model import SGDClassifier
+from sklearn.grid_search import GridSearchCV, RandomizedSearchCV
+from sklearn.pipeline import Pipeline
+
+import logging
+import matplotlib.pyplot as plt
+import numpy as np
+
+
+def extend_result(n_tests, tmp_res):
+    res = np.zeros(n_tests)
+    l = len(tmp_res) - 1
+    for i in range(n_tests):
+        res[i] = tmp_res[min(i, l)]
+
+    return res
+
+
+def test1():
+    iris = load_digits()
+    X, y = iris.data, iris.target
+    clf = RandomForestClassifier(n_estimators=20)
+
+    # specify parameters and distributions to sample from
+    parameters = {"max_depth": ['int', [3, 3]],
+                  "max_features": ['int', [1, 11]],
+                  "min_samples_split": ['int', [1, 11]],
+                  "min_samples_leaf": ['int', [1, 11]],
+                  "bootstrap": ['cat', [True, False]],
+                  "criterion": ['cat', ["gini", "entropy"]]}
+
+    search = GPSearchCV(parameters, estimator=clf, X=X, y=y, n_iter=20)
+    search._fit()
+
+
+def test2():
+    parameters = {'kernel': ['cat', ['rbf', 'poly']],
+                  'd': ['int', [1, 3]],
+                  'C': ['float', [1, 10]]}
+
+    def scoring_function(x):
+        return 0.5
+
+    search = GPSearchCV(parameters, estimator=scoring_function, n_iter=20)
+    search._fit()
+
+
+def test3():
+    # Display progress logs on stdout
+    logging.basicConfig(level=logging.INFO,
+                        format='%(asctime)s %(levelname)s %(message)s')
+
+    # Load some categories from the training set
+    categories = [
+        'alt.atheism',
+        'talk.religion.misc',
+    ]
+    # Uncomment the following to do the analysis on all the categories
+    # categories = None
+
+    print("Loading 20 newsgroups dataset for categories:")
+    print(categories)
+
+    data = fetch_20newsgroups(subset='train', categories=categories)
+    print("%d documents" % len(data.filenames))
+    print("%d categories" % len(data.target_names))
+    print()
+
+    # define a pipeline combining a text feature extractor with a simple
+    # classifier
+    pipeline = Pipeline([
+        ('vect', CountVectorizer()),
+        ('tfidf', TfidfTransformer()),
+        ('clf', SGDClassifier()),
+    ])
+
+    # uncommenting more parameters will give better exploring power but will
+    # increase processing time in a combinatorial way
+    parameters = {
+        'vect__max_df': ['float', [0.5, 1.]],
+        # 'vect__max_features': (None, 5000, 10000, 50000),
+        'vect__ngram_range': ['cat', [(1, 1), (1, 2)]],  # unigrams or bigrams
+        # 'tfidf__use_idf': (True, False),
+        # 'tfidf__norm': ('l1', 'l2'),
+        'clf__alpha': ['float', [0.000001, 0.00001]],
+        'clf__penalty': ['cat', ['l2', 'elasticnet']]
+        # 'clf__n_iter': (10, 50, 80),
+    }
+
+    search = GPSearchCV(parameters, estimator=pipeline, X=data.data, y=data.target, n_iter=20)
+    search._fit()
+
+
+def gp_vs_random_search(test_name, n_tests, search_lenght, save_data=False):
+    """
+    Compare GP-based search vs a simple random one
+    Choose test_name in {'iris', 'text'}
+    """
+
+    n_iter_search = search_lenght
+
+    if(test_name == 'iris'):
+        iris = load_digits()
+        X, y = iris.data, iris.target
+        pipeline = RandomForestClassifier()
+
+        # specify parameters and distributions to sample from
+        parameters = {"max_depth": ['int', [3, 3]],
+                      "max_features": ['int', [1, 11]],
+                      "min_samples_split": ['int', [1, 11]],
+                      "min_samples_leaf": ['int', [1, 11]],
+                      "bootstrap": ['cat', [True, False]],
+                      "criterion": ['cat', ["gini", "entropy"]]}
+
+    elif(test_name == 'text'):
+        # Display progress logs on stdout
+        logging.basicConfig(level=logging.INFO,
+                            format='%(asctime)s %(levelname)s %(message)s')
+
+        # Load some categories from the training set
+        categories = [
+            'alt.atheism',
+            'talk.religion.misc',
+        ]
+        # Uncomment the following to do the analysis on all the categories
+        # categories = None
+        print("Loading 20 newsgroups dataset for categories:")
+        print(categories)
+
+        data = fetch_20newsgroups(subset='train', categories=categories)
+        print("%d documents" % len(data.filenames))
+        print("%d categories" % len(data.target_names))
+
+        X = data.data
+        y = data.target
+
+        # define a pipeline combining a text feature extractor with a simple
+        # classifier
+        pipeline = Pipeline([
+            ('vect', CountVectorizer()),
+            ('tfidf', TfidfTransformer()),
+            ('clf', SGDClassifier()),
+        ])
+
+        # uncommenting more parameters will give better exploring power but will
+        # increase processing time in a combinatorial way
+        parameters = {
+            'vect__max_df': ['float', [0.5, 1.]],
+            # 'vect__max_features': (None, 5000, 10000, 50000),
+            'vect__ngram_range': ['cat', [(1, 1), (1, 2)]],  # unigrams or bigrams
+            # 'tfidf__use_idf': (True, False),
+            # 'tfidf__norm': ('l1', 'l2'),
+            'clf__alpha': ['float', [0.000001, 0.00001]],
+            'clf__penalty': ['cat', ['l2', 'elasticnet']]
+            # 'clf__n_iter': (10, 50, 80),
+        }
+
+    else:
+        print('Dataset not available for test')
+
+    # GP UCB search
+    all_gp_ucb_results = []
+    print 'GP_ucb search'
+    for i in range(n_tests):
+        ucb_search = GPSearchCV(parameters, estimator=pipeline, X=X, y=y,
+                                acquisition_function='UCB',
+                                n_iter=n_iter_search, n_init=20, verbose=False)
+        _, scores = ucb_search._fit()
+
+        max_scores = [scores[0]]
+        print 'Test', i, '-', len(scores), 'parameters tested'
+
+        for j in range(1, len(scores)):
+            max_scores.append(max(max_scores[j-1], scores[j]))
+        all_gp_ucb_results.append(extend_result(n_iter_search, max_scores))
+    all_gp_ucb_results = np.asarray(all_gp_ucb_results)
+    print all_gp_ucb_results.shape
+    if(save_data):
+        np.savetxt('gp_ucb_scores.csv', all_gp_ucb_results, delimiter=',')
+
+    # # GP EI search
+    # all_gp_ei_results = []
+    # print 'GP_ei search'
+    # for i in range(n_tests):
+    #   ei_search = GPSearchCV(parameters,estimator=pipeline,X=X,y=y,
+    #                       acquisition_function='EI',
+    #                       n_iter=n_iter_search, n_init=20, verbose=False)
+    #   _,scores = ei_search._fit()
+
+    #   max_scores = [scores[0]]
+    #   print 'Test',i,'-',len(scores),'parameters tested'
+
+    #   for j in range(1,len(scores)):
+    #       max_scores.append(max(max_scores[j-1],scores[j]))
+    #   all_gp_ei_results.append(extend_result(n_iter_search,max_scores))
+    # all_gp_ei_results = np.asarray(all_gp_ei_results)
+    # print all_gp_ei_results.shape
+    # if(save_data):
+    #   np.savetxt('gp_ei_scores.csv',all_gp_ei_results,delimiter=',')
+
+    # Randomized search
+    print 'Random search'
+    all_random_results = []
+    for i in range(n_tests):
+        random_search = GPSearchCV(parameters, estimator=pipeline, X=X, y=y,
+                                   n_iter=n_iter_search, n_init=n_iter_search, verbose=False)
+        _, scores = random_search._fit()
+
+        max_scores = [scores[0]]
+        print 'Test', i, '-', len(scores), 'parameters tested'
+
+        for j in range(1, len(scores)):
+            max_scores.append(max(max_scores[j-1], scores[j]))
+        all_random_results.append(extend_result(n_iter_search, max_scores))
+    all_random_results = np.asarray(all_random_results)
+    if(save_data):
+        np.savetxt('rand_scores.csv', all_random_results, delimiter=',')
+
+    plt.figure()
+    # plt.plot(range(n_iter_search),np.mean(all_gp_ei_results,axis=0),'r',label='GP-EI')
+    plt.plot(range(n_iter_search), np.mean(all_gp_ucb_results, axis=0), 'b', label='GP-UCB')
+    plt.plot(range(n_iter_search), np.mean(all_random_results, axis=0), 'g', label='Random')
+    plt.legend(loc=4)
+    plt.title('Test GP vs Random on ' + test_name + ' dataset - Average on ' + str(n_tests) + ' trials')
+    plt.xlabel('Iterations')
+    plt.ylabel('Max CV performance')
+    plt.show()
+
+
+if __name__ == "__main__":
+
+    print 'Routine Test'
+    test2()
+    test1()
+
+    # test_name = 'text'
+    # n_tests = 20
+    # search_lenght = 60
+    # print '\nTest GP vs Random on',test_name,'dataset - Average on',n_tests,'trials'
+    # gp_vs_random_search(test_name,n_tests,search_lenght,save_data=True)
diff --git a/sklearn/gp_search.py b/sklearn/gp_search.py
new file mode 100644
index 0000000000000..cb667d992d088
--- /dev/null
+++ b/sklearn/gp_search.py
@@ -0,0 +1,420 @@
+"""
+The :mod:`sklearn.gp_search` includes utilities to fine-tune the parameters
+of an estimator through a Gaussian Process model.
+"""
+from __future__ import print_function
+
+# Author: Sebastien Dubois <http://bit.ly/SebastienDubois>,
+# License: BSD 3 clause
+
+import numpy as np
+from scipy.stats import norm
+
+from .gaussian_process.gaussian_process import GaussianProcess
+from .cross_validation import check_cv
+from .cross_validation import _fit_and_score
+from .metrics.scorer import check_scoring
+from .base import is_classifier, clone
+
+
+#   UTILS    #
+
+def sample_candidates(n_candidates, param_bounds, param_isInt):
+
+    n_parameters = param_isInt.shape[0]
+    candidates = []
+
+    for k in range(n_parameters):
+        if(param_isInt[k]):
+            k_sample = np.asarray(
+                np.random.rand(n_candidates)
+                * np.float(param_bounds[k][1]-param_bounds[k][0])
+                + param_bounds[k][0],
+                dtype=np.int32)
+        else:
+            k_sample = np.asarray(
+                np.random.rand(n_candidates)
+                * np.float(param_bounds[k][1]-param_bounds[k][0])
+                + param_bounds[k][0])
+        candidates.append(k_sample)
+
+    candidates = np.asarray(candidates)
+    candidates = candidates.T
+
+    return compute_unique(candidates)
+
+
+def compute_ei(predictions, sigma, y_best):
+    ei_array = np.zeros(predictions.shape[0])
+    for i in range(ei_array.shape[0]):
+        z = (y_best - predictions[i]) / sigma[i]
+        ei_array[i] = sigma[i] * (z * norm.cdf(z) + norm.pdf(z))
+
+    return ei_array
+
+
+def compute_unique(a):
+    # keep only unique values in the ndarray a
+    # http://stackoverflow.com/questions/16970982/find-unique-rows-in-numpy-array
+
+    b = np.ascontiguousarray(a).view(
+        np.dtype((np.void, a.dtype.itemsize * a.shape[1]))
+        )
+    _, idx = np.unique(b, return_index=True)
+    idx = np.sort(idx)
+
+    return a[idx]
+
+
+def is_in_ndarray(item, a):
+    # look for element item in ndarray a
+    # returns True if item is in a, and its index
+
+    k = 0
+    idx_val = np.asarray(range(a.shape[0]))
+    idxk = range(a.shape[0])
+    while(k < a.shape[1]):
+        idxk = (a[idxk, k] == item[k])
+        if(np.sum(idxk > 0)):
+            k += 1
+            idx_val = idx_val[idxk]
+            idxk = list(idx_val)
+        else:
+            return False, 0
+
+    return True, idx_val[0]
+
+
+#    GPSearchCV    #
+class GPSearchCV(object):
+    """
+    Parameters
+    ----------
+
+    parameters : dict, parameter space on which to optimize the estimator
+        The keys of the dictionnary should be the names of the parameters,
+        and the values should be lists of length 2; the first element being
+        the type of the parameter ('int', 'float' or 'cat' [for categorical]),
+        and the second element being a list of either the bounds between which
+        to search (for 'int' and 'float') or the values the parameter can take
+        (for 'cat')
+        Example : parameters = {'kernel' :  ['cat', ['rbf', 'poly']],
+                                 'd' : ['int', [1,3]],
+                                 'C' : ['float',[1,10])}
+
+    estimator : 1) sklearn estimator or 2) callable
+        1 : object type that implements the "fit" and "predict" methods,
+        as a classifier or a pipeline
+        2 : a function that computes the output given a dictionnary of
+        parameters. The returned value should be a list of one or more
+        floats if score_format == 'cv', and a float if score_format ==
+        'avg'
+
+    X : array-like, shape = [n_samples, n_features]
+        Training vector, where n_samples in the number of samples and
+        n_features is the number of features.
+
+    y : array-like, shape = [n_samples] or [n_samples, n_output], optional
+        Target relative to X for classification or regression;
+        None for unsupervised learning.
+
+    fit_params : dict, optional
+        Parameters to pass to the fit method.
+
+    scoring : string, callable or None, optional
+        A string (see sklearn's model evaluation documentation) or
+        a scorer callable object / function with signature
+        ``scorer(estimator, X, y)``.
+        Default is None.
+
+    cv : integer or cross-validation generator, optional
+        Relevant if the estimator is an sklearn object.
+        If an integer is passed, it is the number of folds.
+        Specific cross-validation objects can be passed, see
+        sklearn.cross_validation module for the list of possible objects
+        Default is 5.
+
+    acquisition function : string, optional
+        Function to maximize in order to choose the next parameter to test.
+        - Simple : maximize the predicted output
+        - UCB : maximize the upper confidence bound
+        - EI : maximizes the expected improvement
+        Default is 'UCB'
+
+    n_iter : int
+        Total number of iterations to perform (including n_init and
+        n_final_iter).
+        Default is 100.
+
+    n_init : int, optional
+        Number of random iterations to perform before the smart search.
+        Default is 30.
+
+    n_final_iter : int, optional
+        Number of final iterations, ie. smart iterations but with
+        acquisition_function == 'Simple'
+        Default is 5.
+
+    n_candidates : int, optional
+        Number of random candidates to sample for each GP iterations
+        Default is 500.
+
+    nugget : float, optional
+        The nugget to set for the Gaussian Process.
+        Default is 1.e-10.
+
+
+    Attributes
+    ----------
+
+    best_parameter_ : dict, the parameter set, from those tested by the
+        method _fit, that maximizes the mean of the cross-validation results.
+
+    tested_parameters_ : ndarray, the parameters tested by _fit
+
+
+    Examples
+    -------
+    >>> from sklearn.datasets import load_digits
+    >>> iris = load_digits()
+    >>> X, y = iris.data, iris.target
+    >>> clf = RandomForestClassifier(n_estimators=20)
+    >>> parameters = {"max_depth": ['int', [3, 3]],
+                    "max_features": ['int', [1, 11]],
+                    "min_samples_split": ['int', [1, 11]],
+                    "min_samples_leaf": ['int', [1, 11]],
+                    "bootstrap": ['cat', [True, False]],
+                    "criterion": ['cat', ["gini", "entropy"]]}
+
+    >>> search = GPSearchCV(parameters,
+                            estimator=clf,
+                            X=X,
+                            y=y,
+                            n_iter=20)
+    >>> search._fit()
+
+    """
+
+    def __init__(self,
+                 parameters,
+                 estimator,
+                 scoring=None,
+                 X=None, y=None,
+                 fit_params=None,
+                 refit=True,
+                 cv=None,
+                 acquisition_function='UCB',
+                 n_iter=100,
+                 n_init=10,
+                 n_candidates=500,
+                 gp_nugget=1.e-10,
+                 verbose=True):
+
+        self.parameters = parameters
+        self.n_parameters = len(parameters)
+        self.acquisition_function = acquisition_function
+        self.n_iter = n_iter
+        self.n_init = n_init
+        self.n_candidates = n_candidates
+        self.param_names = parameters.keys()
+        self.param_isInt = np.array([0 if (parameters[k][0] == 'float')
+                                     else 1 for k in self.param_names])
+        self.param_bounds = np.zeros((self.n_parameters, 2))
+        self.gp_nugget = gp_nugget
+        self.verbose = verbose
+        self.scoring = scoring
+        self.estimator = estimator
+        self.fit_params = fit_params if fit_params is not None else {}
+        self.cv = cv
+        self.X = X
+        self.y = y
+
+        self.best_parameter_ = None
+        self.tested_parameters_ = None
+        self.cv_scores_ = None
+
+        if(callable(estimator)):
+            self._callable_estimator = True
+            if(verbose):
+                print('Estimator is a callable and not an sklearn Estimator')
+        else:
+            self._callable_estimator = False
+
+        if not self._callable_estimator:
+            self.scorer_ = check_scoring(self.estimator, scoring=self.scoring)
+
+        # init param_bounds
+        for i in range(self.n_parameters):
+            if(parameters[self.param_names[i]][0] == 'cat'):
+                self.param_bounds[i, 0] = 0
+                self.param_bounds[i, 1] = \
+                    len(parameters[self.param_names[i]][1])
+            else:
+                self.param_bounds[i] = \
+                    np.array(parameters[self.param_names[i]][1])
+                if(parameters[self.param_names[i]][0] == 'int'):
+                    self.param_bounds[i, 1] += 1
+
+        if(self.verbose):
+            print(self.parameters)
+            print(self.param_names)
+            print(self.param_isInt)
+            print(self.param_bounds)
+
+    def vector_to_dict(self, vector_parameter):
+        dict_parameter = dict.fromkeys(self.param_names)
+        for i in range(self.n_parameters):
+            if(self.parameters[self.param_names[i]][0] == 'cat'):
+                dict_parameter[self.param_names[i]] = \
+                    (self.parameters[self.param_names[i]][1])[
+                        int(vector_parameter[i])]
+            elif(self.parameters[self.param_names[i]][0] == 'int'):
+                dict_parameter[self.param_names[i]] = int(vector_parameter[i])
+            else:
+                dict_parameter[self.param_names[i]] = vector_parameter[i]
+
+        return dict_parameter
+
+    def score(self, test_parameter):
+        """
+        The score function to call in order to evaluate the quality
+        of the parameter test_parameter
+
+        Parameters
+        ----------
+        tested_parameter : dict, the parameter to test
+
+        Returns
+        -------
+        score : the mean of the CV score
+        """
+
+        if not self._callable_estimator:
+            cv = check_cv(self.cv, self.X, self.y,
+                          classifier=is_classifier(self.estimator))
+            cv_score = [_fit_and_score(
+                clone(self.estimator), self.X, self.y, self.scorer_,
+                train, test, False, test_parameter,
+                self.fit_params, return_parameters=True)
+                for train, test in cv]
+
+            n_test_samples = 0
+            score = 0
+            for tmp_score, tmp_n_test_samples, _, _ in cv_score:
+                tmp_score *= tmp_n_test_samples
+                n_test_samples += tmp_n_test_samples
+                score += tmp_score
+            score /= float(n_test_samples)
+
+        else:
+            score = self.estimator(test_parameter)
+
+        return score
+
+    def _fit(self):
+        """
+        Run the hyper-parameter optimization process
+
+        Returns
+        -------
+        tested_parameters_ : ndarray, the parameters tested during the process
+
+        cv_scores_ : array of the mean CV results of the parameters tested
+        """
+
+        n_tested_parameters = 0
+        tested_parameters = np.zeros((self.n_iter, self.n_parameters))
+        cv_scores = np.zeros(self.n_iter)
+
+        #  Initialize with random candidates  #
+        init_candidates = sample_candidates(
+            self.n_init, self.param_bounds, self.param_isInt)
+        self.n_init = init_candidates.shape[0]
+
+        for i in range(self.n_init):
+            dict_candidate = self.vector_to_dict(init_candidates[i, :])
+            cv_score = self.score(dict_candidate)
+
+            if(self.verbose):
+                print ('Step ' + str(i) + ' - Hyperparameter '
+                       + str(dict_candidate) + ' ' + str(cv_score))
+
+            is_in, idx = is_in_ndarray(
+                init_candidates[i, :],
+                tested_parameters[:n_tested_parameters, :])
+            if not is_in:
+                tested_parameters[n_tested_parameters, :] = \
+                    init_candidates[i, :]
+                cv_scores[n_tested_parameters] = cv_score
+                n_tested_parameters += 1
+            else:
+                if(self.verbose):
+                    print('Hyperparameter already tesed')
+                cv_scores[idx] = (cv_scores[idx] + cv_score) / 2.
+
+        for i in range(self.n_iter-self.n_init):
+
+            # Model with a Gaussian Process
+            gp = GaussianProcess(theta0=1. * np.ones(self.n_parameters),
+                                 thetaL=0.001 * np.ones(self.n_parameters),
+                                 thetaU=10. * np.ones(self.n_parameters),
+                                 random_start=3,
+                                 nugget=self.gp_nugget)
+            gp.fit(tested_parameters[:n_tested_parameters, :],
+                   cv_scores[:n_tested_parameters])
+
+            # Sample candidates and predict their corresponding
+            # acquisition values
+            candidates = sample_candidates(self.n_candidates,
+                                           self.param_bounds,
+                                           self.param_isInt)
+            if(self.acquisition_function == 'UCB'):
+                predictions, MSE = gp.predict(candidates, eval_MSE=True)
+                upperBound = predictions + 1.96*np.sqrt(MSE)
+                best_candidate = candidates[np.argmax(upperBound)]
+
+            elif(self.acquisition_function == 'EI'):
+                predictions, MSE = gp.predict(candidates, eval_MSE=True)
+                y_best = np.max(cv_scores)
+                ei = compute_ei(predictions, np.sqrt(MSE), y_best)
+                best_candidate = candidates[np.argmax(ei)]
+
+            else:
+                print('WARNING : acquisition_function not implemented yet : '
+                      + self.acquisition_function)
+
+            dict_candidate = self.vector_to_dict(best_candidate)
+            cv_score = self.score(dict_candidate)
+            if(self.verbose):
+                print ('Step ' + str(i+self.n_init) + ' - Hyperparameter '
+                       + str(dict_candidate) + ' ' + str(cv_score))
+
+            is_in, idx = is_in_ndarray(
+                best_candidate,
+                tested_parameters[:n_tested_parameters, :])
+            if not is_in:
+                tested_parameters[n_tested_parameters, :] = best_candidate
+                cv_scores[n_tested_parameters] = cv_score
+                n_tested_parameters += 1
+            else:
+                if(self.verbose):
+                    print('Hyperparameter already tesed')
+                cv_scores[idx] = (cv_scores[idx] + cv_score) / 2.
+
+        best_idx = np.argmax(cv_scores[:n_tested_parameters])
+        vector_best_param = tested_parameters[best_idx]
+        best_parameter = self.vector_to_dict(vector_best_param)
+
+        # store
+        self.best_parameter_ = best_parameter
+        self.tested_parameters_ = tested_parameters[:n_tested_parameters, :]
+        self.cv_scores_ = cv_scores[:n_tested_parameters]
+
+        if(self.verbose):
+            print ('\nTested ' + str(n_tested_parameters) + ' parameters')
+            print ('Max cv score ' + str(cv_scores[best_idx]))
+            print ('Best parameter ' + str(tested_parameters[best_idx]))
+            print (best_parameter)
+
+        return tested_parameters[:n_tested_parameters, :], \
+            cv_scores[:n_tested_parameters]