From 366037ecc88d64e296b9bec7f6b6dd4871946bac Mon Sep 17 00:00:00 2001 From: Rafael Menelau Oliveira e Cruz Date: Sat, 11 May 2019 23:55:00 -0400 Subject: [PATCH 01/18] refactoring predict --- deslib/base.py | 115 +++++++++++++++++++++++++------------------------ 1 file changed, 59 insertions(+), 56 deletions(-) diff --git a/deslib/base.py b/deslib/base.py index a72f2fdc..c6aa69e4 100644 --- a/deslib/base.py +++ b/deslib/base.py @@ -506,11 +506,7 @@ def predict(self, X): # IF the DFP pruning is considered, calculate the DFP mask # for all samples in X - if self.DFP: - DFP_mask = self._frienemy_pruning(neighbors) - else: - DFP_mask = np.ones( - (ind_ds_classifier.size, self.n_classifiers_)) + DFP_mask = self._apply_dfp(ind_ds_classifier, neighbors) # Get the real indices_ of the samples that will be classified # using a DS algorithm. @@ -575,62 +571,17 @@ def predict_proba(self, X): if ind_disagreement.size: X_DS = X[ind_disagreement, :] - # Always calculating the neighborhood. Passing that to classify - # later - # TODO: Check problems with DES Clustering method. Maybe add a - # check to prevent that here. (or do clustering instead) - # Then, we estimate the nearest neighbors for all samples that we - # need to call DS routines distances, neighbors = self._get_region_competence(X_DS) - if self.with_IH: - # if IH is used, calculate the hardness level associated with - # each sample - hardness = hardness_region_competence(neighbors, - self.DSEL_target_, - self.safe_k) - - # Get the index associated with the easy and hard samples. - # Samples with low hardness are passed down to the knn - # classifier while samples with high hardness are passed down - # to the DS methods. So, here we split the samples that are - # passed to down to each stage by calculating their indices_. - easy_samples_mask = hardness < self.IH_rate - ind_knn_classifier = np.where(easy_samples_mask)[0] - ind_ds_classifier = np.where(~easy_samples_mask)[0] - - if ind_knn_classifier.size: - # all samples with low hardness should be classified by - # the knn method here: - # First get the class associated with each neighbor - - # Accessing which samples in the original matrix are - # associated with the low instance hardness indices_. - ind_knn_original_matrix = ind_disagreement[ - ind_knn_classifier] - - predicted_proba[ind_knn_original_matrix] = \ - self.roc_algorithm_.predict_proba( - X_DS[ind_knn_classifier]) - - # Remove from the neighbors and distance matrices the - # samples that were classified using the KNN - neighbors = np.delete(neighbors, ind_knn_classifier, - axis=0) - distances = np.delete(distances, ind_knn_classifier, - axis=0) - else: - # IH was not considered. So all samples with disagreement are - # passed down to the DS algorithm - ind_ds_classifier = np.arange(ind_disagreement.size) + distances, ind_ds_classifier, neighbors = self._IH_prediction(X_DS, + distances, + ind_disagreement, + neighbors, + predicted_proba) if ind_ds_classifier.size: # Check if the dynamic frienemy pruning should be used - if self.DFP: - DFP_mask = self._frienemy_pruning(neighbors) - else: - DFP_mask = np.ones( - (ind_ds_classifier.size, self.n_classifiers_)) + DFP_mask = self._apply_dfp(ind_ds_classifier, neighbors) ind_ds_original_matrix = ind_disagreement[ind_ds_classifier] @@ -648,6 +599,58 @@ def predict_proba(self, X): return predicted_proba + def _apply_dfp(self, ind_ds_classifier, neighbors): + if self.DFP: + DFP_mask = self._frienemy_pruning(neighbors) + else: + DFP_mask = np.ones( + (ind_ds_classifier.size, self.n_classifiers_)) + return DFP_mask + + def _IH_prediction(self, X_DS, distances, ind_disagreement, neighbors, + predicted_proba): + if self.with_IH: + # if IH is used, calculate the hardness level associated with + # each sample + hardness = hardness_region_competence(neighbors, + self.DSEL_target_, + self.safe_k) + + # Get the index associated with the easy and hard samples. + # Samples with low hardness are passed down to the knn + # classifier while samples with high hardness are passed down + # to the DS methods. So, here we split the samples that are + # passed to down to each stage by calculating their indices_. + easy_samples_mask = hardness < self.IH_rate + ind_knn_classifier = np.where(easy_samples_mask)[0] + ind_ds_classifier = np.where(~easy_samples_mask)[0] + + if ind_knn_classifier.size: + # all samples with low hardness should be classified by + # the knn method here: + # First get the class associated with each neighbor + + # Accessing which samples in the original matrix are + # associated with the low instance hardness indices_. + ind_knn_original_matrix = ind_disagreement[ + ind_knn_classifier] + + predicted_proba[ind_knn_original_matrix] = \ + self.roc_algorithm_.predict_proba( + X_DS[ind_knn_classifier]) + + # Remove from the neighbors and distance matrices the + # samples that were classified using the KNN + neighbors = np.delete(neighbors, ind_knn_classifier, + axis=0) + distances = np.delete(distances, ind_knn_classifier, + axis=0) + else: + # IH was not considered. So all samples with disagreement are + # passed down to the DS algorithm + ind_ds_classifier = np.arange(ind_disagreement.size) + return distances, ind_ds_classifier, neighbors + def _frienemy_pruning(self, neighbors): """Implements the Online Pruning method (frienemy) to remove base classifiers that do not cross the region of competence. We consider From 13befa8623fe1558bff8167dae87cccae946c415 Mon Sep 17 00:00:00 2001 From: Rafael Menelau Oliveira e Cruz Date: Sun, 1 Dec 2019 13:53:52 -0500 Subject: [PATCH 02/18] reducing code duplication --- deslib/base.py | 7 +------ 1 file changed, 1 insertion(+), 6 deletions(-) diff --git a/deslib/base.py b/deslib/base.py index c6aa69e4..1b5d3633 100644 --- a/deslib/base.py +++ b/deslib/base.py @@ -797,12 +797,7 @@ def _preprocess_dsel_scores(self): classifier in the generated_pool for each sample in X. """ - scores = np.empty( - (self.n_samples_, self.n_classifiers_, self.n_classes_)) - for index, clf in enumerate(self.pool_classifiers_): - scores[:, index, :] = clf.predict_proba(self.DSEL_data_) - - return scores + return self._predict_proba_base(self.DSEL_data_) @staticmethod def _all_classifier_agree(predictions): From 3e3ea5f8c88ebb6b0594ac0fc7da374c4556d18e Mon Sep 17 00:00:00 2001 From: Rafael Menelau Oliveira e Cruz Date: Mon, 9 Mar 2020 23:57:43 -0400 Subject: [PATCH 03/18] refactoring predict method --- deslib/base.py | 292 ++++++++++++++++++++++--------------------------- 1 file changed, 128 insertions(+), 164 deletions(-) diff --git a/deslib/base.py b/deslib/base.py index 1b5d3633..605359ca 100644 --- a/deslib/base.py +++ b/deslib/base.py @@ -413,31 +413,14 @@ def predict(self, X): # Check if X is a valid input X = check_array(X) - self._check_num_features(X) n_samples = X.shape[0] predicted_labels = np.empty(n_samples, dtype=np.intp) - if self.needs_proba: - base_probabilities = self._predict_proba_base(X) - base_predictions = base_probabilities.argmax(axis=2) - else: - base_probabilities = None - base_predictions = self._predict_base(X) + base_predictions, base_probabilities = self._preprocess_predictions(X) - all_agree_vector = BaseDS._all_classifier_agree(base_predictions) - ind_all_agree = np.where(all_agree_vector)[0] - - # Since the predictions are always the same, get the predictions of the - # first base classifier. - if ind_all_agree.size: - predicted_labels[ind_all_agree] = base_predictions[ - ind_all_agree, 0] - - # For the samples with disagreement, perform the dynamic selection - # steps. First step is to collect the samples with disagreement - # between base classifiers - ind_disagreement = np.where(~all_agree_vector)[0] + ind_disagreement = self._prediction_by_agreement(base_predictions, + predicted_labels) if ind_disagreement.size: X_DS = X[ind_disagreement, :] @@ -451,81 +434,18 @@ def predict(self, X): # we need to call DS routines distances, neighbors = self._get_region_competence(X_DS) - if self.with_IH: - # if IH is used, calculate the hardness level associated with - # each sample - hardness = hardness_region_competence(neighbors, - self.DSEL_target_, - self.safe_k) - - # Get the index associated with the easy and hard samples. - # Samples with low hardness are passed down to the knn - # classifier while samples with high hardness are passed down - # to the DS methods. So, here we split the samples that are - # passed to down to each stage by calculating their indices_. - easy_samples_mask = hardness < self.IH_rate - ind_knn_classifier = np.where(easy_samples_mask)[0] - ind_ds_classifier = np.where(~easy_samples_mask)[0] - - if ind_knn_classifier.size: - # all samples with low hardness should be classified by - # the knn method here: - # First get the class associated with each neighbor - y_neighbors = self.DSEL_target_[ - neighbors[ind_knn_classifier, :self.safe_k]] - - # Accessing which samples in the original matrix are - # associated with the low instance hardness indices_. This - # is important since the low hardness indices - # ind_knn_classifier was estimated based on a subset - # of samples - ind_knn_original_matrix = ind_disagreement[ - ind_knn_classifier] - prediction_knn, _ = mode(y_neighbors, axis=1) - predicted_labels[ - ind_knn_original_matrix] = prediction_knn.reshape(-1, ) - - # Remove from the neighbors and distance matrices the - # samples that were classified using the KNN - neighbors = np.delete(neighbors, ind_knn_classifier, - axis=0) - distances = np.delete(distances, ind_knn_classifier, - axis=0) - else: - # IH was not considered. So all samples with disagreement are - # passed down to the DS algorithm - ind_ds_classifier = np.arange(ind_disagreement.size) - - # At this stage the samples which all base classifiers agrees or - # that are associated with low hardness were already classified. - # The remaining samples are now passed down to the DS techniques - # for classification. + distances, ind_ds_classifier, neighbors = self._IH_prediction( + X_DS, distances, ind_disagreement, + neighbors, predicted_labels, False + ) # First check whether there are still samples to be classified. if ind_ds_classifier.size: - # IF the DFP pruning is considered, calculate the DFP mask - # for all samples in X - DFP_mask = self._apply_dfp(ind_ds_classifier, neighbors) - - # Get the real indices_ of the samples that will be classified - # using a DS algorithm. - ind_ds_original_matrix = ind_disagreement[ind_ds_classifier] - - if self.needs_proba: - selected_probabilities = base_probabilities[ - ind_ds_original_matrix] - else: - selected_probabilities = None - - pred_ds = self.classify_with_ds(X_DS[ind_ds_classifier], - base_predictions[ - ind_ds_original_matrix], - selected_probabilities, - neighbors=neighbors, - distances=distances, - DFP_mask=DFP_mask) - predicted_labels[ind_ds_original_matrix] = pred_ds + self._predict_DS(X_DS, base_predictions, base_probabilities, + distances, ind_disagreement, + ind_ds_classifier, neighbors, + predicted_labels) return self.classes_.take(predicted_labels) @@ -542,17 +462,12 @@ def predict_proba(self, X): predicted_proba : array of shape = [n_samples, n_classes] Probabilities estimates for each sample in X. """ - # Check if the DS model was trained check_is_fitted(self, ["DSEL_processed_", "DSEL_data_", "DSEL_target_"]) - # Check if X is a valid input X = check_array(X, ensure_2d=False) - # Check if the base classifiers are able to estimate posterior - # probabilities (implements predict_proba method). self._check_predict_proba() - base_probabilities = self._predict_proba_base(X) base_predictions = base_probabilities.argmax(axis=2) @@ -577,7 +492,8 @@ def predict_proba(self, X): distances, ind_disagreement, neighbors, - predicted_proba) + predicted_proba, + True) if ind_ds_classifier.size: # Check if the dynamic frienemy pruning should be used @@ -599,57 +515,128 @@ def predict_proba(self, X): return predicted_proba - def _apply_dfp(self, ind_ds_classifier, neighbors): - if self.DFP: - DFP_mask = self._frienemy_pruning(neighbors) + def _preprocess_predictions(self, X, req_proba=False): + if self.needs_proba or req_proba: + base_probabilities = self._predict_proba_base(X) + base_predictions = base_probabilities.argmax(axis=2) else: - DFP_mask = np.ones( - (ind_ds_classifier.size, self.n_classifiers_)) - return DFP_mask + base_probabilities = None + base_predictions = self._predict_base(X) + return base_predictions, base_probabilities + + def _prediction_by_agreement(self, base_predictions, predicted_labels): + all_agree_vector = BaseDS._all_classifier_agree(base_predictions) + ind_all_agree = np.where(all_agree_vector)[0] + # Since the predictions are always the same, get the predictions of the + # first base classifier. + if ind_all_agree.size: + predicted_labels[ind_all_agree] = base_predictions[ + ind_all_agree, 0] + # return samples with disagreement + ind_disagreement = np.where(~all_agree_vector)[0] + return ind_disagreement def _IH_prediction(self, X_DS, distances, ind_disagreement, neighbors, - predicted_proba): + predicted_proba, is_proba=False): + + # TODO: make this if outside? if self.with_IH: - # if IH is used, calculate the hardness level associated with - # each sample - hardness = hardness_region_competence(neighbors, - self.DSEL_target_, - self.safe_k) - - # Get the index associated with the easy and hard samples. - # Samples with low hardness are passed down to the knn - # classifier while samples with high hardness are passed down - # to the DS methods. So, here we split the samples that are - # passed to down to each stage by calculating their indices_. - easy_samples_mask = hardness < self.IH_rate - ind_knn_classifier = np.where(easy_samples_mask)[0] - ind_ds_classifier = np.where(~easy_samples_mask)[0] - - if ind_knn_classifier.size: - # all samples with low hardness should be classified by - # the knn method here: - # First get the class associated with each neighbor - - # Accessing which samples in the original matrix are - # associated with the low instance hardness indices_. - ind_knn_original_matrix = ind_disagreement[ - ind_knn_classifier] - - predicted_proba[ind_knn_original_matrix] = \ - self.roc_algorithm_.predict_proba( - X_DS[ind_knn_classifier]) - - # Remove from the neighbors and distance matrices the - # samples that were classified using the KNN - neighbors = np.delete(neighbors, ind_knn_classifier, - axis=0) - distances = np.delete(distances, ind_knn_classifier, - axis=0) + ind_hard, ind_easy = self._split_easy_samples(neighbors) + distances, neighbors = self._predict_easy_samples(X_DS, distances, + ind_disagreement, + ind_easy, + neighbors, + predicted_proba, + is_proba) else: # IH was not considered. So all samples with disagreement are # passed down to the DS algorithm - ind_ds_classifier = np.arange(ind_disagreement.size) - return distances, ind_ds_classifier, neighbors + ind_hard = np.arange(ind_disagreement.size) + return distances, ind_hard, neighbors + + def _predict_easy_samples(self, X_DS, distances, ind_disagreement, + ind_easy, neighbors, predictions, is_proba): + # TODO: Make this if outside? + if ind_easy.size: + # all samples with low hardness should be classified by + # the knn method here: + # First get the class associated with each neighbor + + # Accessing which samples in the original matrix are + # associated with the low instance hardness indices_. + ind_knn_original_matrix = ind_disagreement[ind_easy] + + if is_proba: + predictions[ind_knn_original_matrix] = \ + self.roc_algorithm_.predict_proba( + X_DS[ind_easy]) + else: + y_neighbors = self.DSEL_target_[neighbors[ind_easy, + :self.safe_k]] + predictions_knn, _ = mode(y_neighbors, axis=1) + predictions[ind_knn_original_matrix] = predictions_knn.reshape( + -1, ) + + # Remove from the neighbors and distance matrices the + # samples that were classified using the KNN + neighbors = np.delete(neighbors, ind_easy, + axis=0) + distances = np.delete(distances, ind_easy, + axis=0) + return distances, neighbors + + def _split_easy_samples(self, neighbors): + # if IH is used, calculate the hardness level associated with + # each sample + hardness = hardness_region_competence(neighbors, + self.DSEL_target_, + self.safe_k) + # Get the index associated with the easy and hard samples. + # Samples with low hardness are passed down to the knn + # classifier while samples with high hardness are passed down + # to the DS method. So, here we split the samples that are + # passed to down to each stage by calculating their indices. + easy_samples_mask = hardness < self.IH_rate + ind_knn_classifier = np.where(easy_samples_mask)[0] + ind_ds_classifier = np.where(~easy_samples_mask)[0] + return ind_ds_classifier, ind_knn_classifier + + def _predict_DS(self, X_DS, base_predictions, base_probabilities, + distances, ind_disagreement, ind_ds_classifier, neighbors, + predicted, is_proba=False): + + # IF the DFP pruning is considered, calculate the DFP mask + # for all samples in X + DFP_mask = self._apply_dfp(ind_ds_classifier, neighbors) + # Get the real indices_ of the samples that will be classified + # using a DS algorithm. + ind_ds_original_matrix = ind_disagreement[ind_ds_classifier] + if self.needs_proba or is_proba: + selected_probabilities = base_probabilities[ + ind_ds_original_matrix] + else: + selected_probabilities = None + + args = [X_DS[ind_ds_classifier], + base_predictions[ind_ds_original_matrix], + selected_probabilities, + neighbors, + distances, + DFP_mask] + if is_proba: + preds = self.predict_proba_with_ds(*args) + else: + preds = self.classify_with_ds(*args) + + predicted[ind_ds_original_matrix] = preds + + def _apply_dfp(self, ind_ds_classifier, neighbors): + if self.DFP: + DFP_mask = self._frienemy_pruning(neighbors) + else: + DFP_mask = np.ones( + (ind_ds_classifier.size, self.n_classifiers_)) + return DFP_mask def _frienemy_pruning(self, neighbors): """Implements the Online Pruning method (frienemy) to remove base @@ -866,29 +853,6 @@ def _validate_pool(self): raise ValueError("n_classifiers must be greater than zero, " "got {}.".format(self.n_classifiers_)) - def _check_num_features(self, X): - """ Verify if the number of features (n_features) of X is equals to - the number of features used to fit the model. Raises an error if - n_features is different. - - Parameters - ---------- - X : array of shape = [classes, n_features] - The input data. - - Raises - ------- - ValueError - If X has a different dimensionality than the training data. - """ - n_features = X.shape[1] - if self.n_features_ != n_features: - raise ValueError("Number of features of the model must " - "match the input. Model n_features_ is {} and " - "input n_features_ is {} ".format( - self.n_features_, - n_features)) - def _check_predict_proba(self): """ Checks if each base classifier in the pool implements the predict_proba method. From 43296a686e51ee3bd20d3262838b5597ba667a0a Mon Sep 17 00:00:00 2001 From: Rafael Menelau Oliveira e Cruz Date: Tue, 19 May 2020 01:24:43 -0400 Subject: [PATCH 04/18] removing redundant code pieces --- deslib/base.py | 5 ----- 1 file changed, 5 deletions(-) diff --git a/deslib/base.py b/deslib/base.py index a72f2fdc..ac9b7f64 100644 --- a/deslib/base.py +++ b/deslib/base.py @@ -659,11 +659,6 @@ def _frienemy_pruning(self, neighbors): DFP_mask : array of shape = [n_samples, n_classifiers] Mask containing 1 for the selected base classifier and 0 otherwise. - - neighbors : array of shale = [n_samples, n_neighbors] - indices of the k nearest neighbors according to each - instance - References ---------- Oliveira, D.V.R., Cavalcanti, G.D.C. and Sabourin, R., Online Pruning From 888c327635034bcde9d0eee8f4084fac80b70414 Mon Sep 17 00:00:00 2001 From: Rafael Menelau Oliveira e Cruz Date: Sat, 11 May 2019 23:55:00 -0400 Subject: [PATCH 05/18] refactoring predict --- deslib/base.py | 115 +++++++++++++++++++++++++------------------------ 1 file changed, 59 insertions(+), 56 deletions(-) diff --git a/deslib/base.py b/deslib/base.py index 32d27471..89e82219 100644 --- a/deslib/base.py +++ b/deslib/base.py @@ -506,11 +506,7 @@ def predict(self, X): # IF the DFP pruning is considered, calculate the DFP mask # for all samples in X - if self.DFP: - DFP_mask = self._frienemy_pruning(neighbors) - else: - DFP_mask = np.ones( - (ind_ds_classifier.size, self.n_classifiers_)) + DFP_mask = self._apply_dfp(ind_ds_classifier, neighbors) # Get the real indices_ of the samples that will be classified # using a DS algorithm. @@ -575,62 +571,17 @@ def predict_proba(self, X): if ind_disagreement.size: X_DS = X[ind_disagreement, :] - # Always calculating the neighborhood. Passing that to classify - # later - # TODO: Check problems with DES Clustering method. Maybe add a - # check to prevent that here. (or do clustering instead) - # Then, we estimate the nearest neighbors for all samples that we - # need to call DS routines distances, neighbors = self._get_region_competence(X_DS) - if self.with_IH: - # if IH is used, calculate the hardness level associated with - # each sample - hardness = hardness_region_competence(neighbors, - self.DSEL_target_, - self.safe_k) - - # Get the index associated with the easy and hard samples. - # Samples with low hardness are passed down to the knn - # classifier while samples with high hardness are passed down - # to the DS methods. So, here we split the samples that are - # passed to down to each stage by calculating their indices_. - easy_samples_mask = hardness < self.IH_rate - ind_knn_classifier = np.where(easy_samples_mask)[0] - ind_ds_classifier = np.where(~easy_samples_mask)[0] - - if ind_knn_classifier.size: - # all samples with low hardness should be classified by - # the knn method here: - # First get the class associated with each neighbor - - # Accessing which samples in the original matrix are - # associated with the low instance hardness indices_. - ind_knn_original_matrix = ind_disagreement[ - ind_knn_classifier] - - predicted_proba[ind_knn_original_matrix] = \ - self.roc_algorithm_.predict_proba( - X_DS[ind_knn_classifier]) - - # Remove from the neighbors and distance matrices the - # samples that were classified using the KNN - neighbors = np.delete(neighbors, ind_knn_classifier, - axis=0) - distances = np.delete(distances, ind_knn_classifier, - axis=0) - else: - # IH was not considered. So all samples with disagreement are - # passed down to the DS algorithm - ind_ds_classifier = np.arange(ind_disagreement.size) + distances, ind_ds_classifier, neighbors = self._IH_prediction(X_DS, + distances, + ind_disagreement, + neighbors, + predicted_proba) if ind_ds_classifier.size: # Check if the dynamic frienemy pruning should be used - if self.DFP: - DFP_mask = self._frienemy_pruning(neighbors) - else: - DFP_mask = np.ones( - (ind_ds_classifier.size, self.n_classifiers_)) + DFP_mask = self._apply_dfp(ind_ds_classifier, neighbors) ind_ds_original_matrix = ind_disagreement[ind_ds_classifier] @@ -648,6 +599,58 @@ def predict_proba(self, X): return predicted_proba + def _apply_dfp(self, ind_ds_classifier, neighbors): + if self.DFP: + DFP_mask = self._frienemy_pruning(neighbors) + else: + DFP_mask = np.ones( + (ind_ds_classifier.size, self.n_classifiers_)) + return DFP_mask + + def _IH_prediction(self, X_DS, distances, ind_disagreement, neighbors, + predicted_proba): + if self.with_IH: + # if IH is used, calculate the hardness level associated with + # each sample + hardness = hardness_region_competence(neighbors, + self.DSEL_target_, + self.safe_k) + + # Get the index associated with the easy and hard samples. + # Samples with low hardness are passed down to the knn + # classifier while samples with high hardness are passed down + # to the DS methods. So, here we split the samples that are + # passed to down to each stage by calculating their indices_. + easy_samples_mask = hardness < self.IH_rate + ind_knn_classifier = np.where(easy_samples_mask)[0] + ind_ds_classifier = np.where(~easy_samples_mask)[0] + + if ind_knn_classifier.size: + # all samples with low hardness should be classified by + # the knn method here: + # First get the class associated with each neighbor + + # Accessing which samples in the original matrix are + # associated with the low instance hardness indices_. + ind_knn_original_matrix = ind_disagreement[ + ind_knn_classifier] + + predicted_proba[ind_knn_original_matrix] = \ + self.roc_algorithm_.predict_proba( + X_DS[ind_knn_classifier]) + + # Remove from the neighbors and distance matrices the + # samples that were classified using the KNN + neighbors = np.delete(neighbors, ind_knn_classifier, + axis=0) + distances = np.delete(distances, ind_knn_classifier, + axis=0) + else: + # IH was not considered. So all samples with disagreement are + # passed down to the DS algorithm + ind_ds_classifier = np.arange(ind_disagreement.size) + return distances, ind_ds_classifier, neighbors + def _frienemy_pruning(self, neighbors): """Implements the Online Pruning method (frienemy) to remove base classifiers that do not cross the region of competence. We consider From 444640cc09e486a5b6aa00e5036cc1aba22aa11e Mon Sep 17 00:00:00 2001 From: Rafael Menelau Oliveira e Cruz Date: Sun, 1 Dec 2019 13:53:52 -0500 Subject: [PATCH 06/18] reducing code duplication --- deslib/base.py | 7 +------ 1 file changed, 1 insertion(+), 6 deletions(-) diff --git a/deslib/base.py b/deslib/base.py index 89e82219..045f4c53 100644 --- a/deslib/base.py +++ b/deslib/base.py @@ -792,12 +792,7 @@ def _preprocess_dsel_scores(self): classifier in the generated_pool for each sample in X. """ - scores = np.empty( - (self.n_samples_, self.n_classifiers_, self.n_classes_)) - for index, clf in enumerate(self.pool_classifiers_): - scores[:, index, :] = clf.predict_proba(self.DSEL_data_) - - return scores + return self._predict_proba_base(self.DSEL_data_) @staticmethod def _all_classifier_agree(predictions): From 32ab96d31e66402393195ca99f5c607565c9deb5 Mon Sep 17 00:00:00 2001 From: Rafael Menelau Oliveira e Cruz Date: Mon, 9 Mar 2020 23:57:43 -0400 Subject: [PATCH 07/18] refactoring predict method --- deslib/base.py | 292 ++++++++++++++++++++++--------------------------- 1 file changed, 128 insertions(+), 164 deletions(-) diff --git a/deslib/base.py b/deslib/base.py index 045f4c53..a464c2cc 100644 --- a/deslib/base.py +++ b/deslib/base.py @@ -413,31 +413,14 @@ def predict(self, X): # Check if X is a valid input X = check_array(X) - self._check_num_features(X) n_samples = X.shape[0] predicted_labels = np.empty(n_samples, dtype=np.intp) - if self.needs_proba: - base_probabilities = self._predict_proba_base(X) - base_predictions = base_probabilities.argmax(axis=2) - else: - base_probabilities = None - base_predictions = self._predict_base(X) + base_predictions, base_probabilities = self._preprocess_predictions(X) - all_agree_vector = BaseDS._all_classifier_agree(base_predictions) - ind_all_agree = np.where(all_agree_vector)[0] - - # Since the predictions are always the same, get the predictions of the - # first base classifier. - if ind_all_agree.size: - predicted_labels[ind_all_agree] = base_predictions[ - ind_all_agree, 0] - - # For the samples with disagreement, perform the dynamic selection - # steps. First step is to collect the samples with disagreement - # between base classifiers - ind_disagreement = np.where(~all_agree_vector)[0] + ind_disagreement = self._prediction_by_agreement(base_predictions, + predicted_labels) if ind_disagreement.size: X_DS = X[ind_disagreement, :] @@ -451,81 +434,18 @@ def predict(self, X): # we need to call DS routines distances, neighbors = self._get_region_competence(X_DS) - if self.with_IH: - # if IH is used, calculate the hardness level associated with - # each sample - hardness = hardness_region_competence(neighbors, - self.DSEL_target_, - self.safe_k) - - # Get the index associated with the easy and hard samples. - # Samples with low hardness are passed down to the knn - # classifier while samples with high hardness are passed down - # to the DS methods. So, here we split the samples that are - # passed to down to each stage by calculating their indices_. - easy_samples_mask = hardness < self.IH_rate - ind_knn_classifier = np.where(easy_samples_mask)[0] - ind_ds_classifier = np.where(~easy_samples_mask)[0] - - if ind_knn_classifier.size: - # all samples with low hardness should be classified by - # the knn method here: - # First get the class associated with each neighbor - y_neighbors = self.DSEL_target_[ - neighbors[ind_knn_classifier, :self.safe_k]] - - # Accessing which samples in the original matrix are - # associated with the low instance hardness indices_. This - # is important since the low hardness indices - # ind_knn_classifier was estimated based on a subset - # of samples - ind_knn_original_matrix = ind_disagreement[ - ind_knn_classifier] - prediction_knn, _ = mode(y_neighbors, axis=1) - predicted_labels[ - ind_knn_original_matrix] = prediction_knn.reshape(-1, ) - - # Remove from the neighbors and distance matrices the - # samples that were classified using the KNN - neighbors = np.delete(neighbors, ind_knn_classifier, - axis=0) - distances = np.delete(distances, ind_knn_classifier, - axis=0) - else: - # IH was not considered. So all samples with disagreement are - # passed down to the DS algorithm - ind_ds_classifier = np.arange(ind_disagreement.size) - - # At this stage the samples which all base classifiers agrees or - # that are associated with low hardness were already classified. - # The remaining samples are now passed down to the DS techniques - # for classification. + distances, ind_ds_classifier, neighbors = self._IH_prediction( + X_DS, distances, ind_disagreement, + neighbors, predicted_labels, False + ) # First check whether there are still samples to be classified. if ind_ds_classifier.size: - # IF the DFP pruning is considered, calculate the DFP mask - # for all samples in X - DFP_mask = self._apply_dfp(ind_ds_classifier, neighbors) - - # Get the real indices_ of the samples that will be classified - # using a DS algorithm. - ind_ds_original_matrix = ind_disagreement[ind_ds_classifier] - - if self.needs_proba: - selected_probabilities = base_probabilities[ - ind_ds_original_matrix] - else: - selected_probabilities = None - - pred_ds = self.classify_with_ds(X_DS[ind_ds_classifier], - base_predictions[ - ind_ds_original_matrix], - selected_probabilities, - neighbors=neighbors, - distances=distances, - DFP_mask=DFP_mask) - predicted_labels[ind_ds_original_matrix] = pred_ds + self._predict_DS(X_DS, base_predictions, base_probabilities, + distances, ind_disagreement, + ind_ds_classifier, neighbors, + predicted_labels) return self.classes_.take(predicted_labels) @@ -542,17 +462,12 @@ def predict_proba(self, X): predicted_proba : array of shape = [n_samples, n_classes] Probabilities estimates for each sample in X. """ - # Check if the DS model was trained check_is_fitted(self, ["DSEL_processed_", "DSEL_data_", "DSEL_target_"]) - # Check if X is a valid input X = check_array(X, ensure_2d=False) - # Check if the base classifiers are able to estimate posterior - # probabilities (implements predict_proba method). self._check_predict_proba() - base_probabilities = self._predict_proba_base(X) base_predictions = base_probabilities.argmax(axis=2) @@ -577,7 +492,8 @@ def predict_proba(self, X): distances, ind_disagreement, neighbors, - predicted_proba) + predicted_proba, + True) if ind_ds_classifier.size: # Check if the dynamic frienemy pruning should be used @@ -599,57 +515,128 @@ def predict_proba(self, X): return predicted_proba - def _apply_dfp(self, ind_ds_classifier, neighbors): - if self.DFP: - DFP_mask = self._frienemy_pruning(neighbors) + def _preprocess_predictions(self, X, req_proba=False): + if self.needs_proba or req_proba: + base_probabilities = self._predict_proba_base(X) + base_predictions = base_probabilities.argmax(axis=2) else: - DFP_mask = np.ones( - (ind_ds_classifier.size, self.n_classifiers_)) - return DFP_mask + base_probabilities = None + base_predictions = self._predict_base(X) + return base_predictions, base_probabilities + + def _prediction_by_agreement(self, base_predictions, predicted_labels): + all_agree_vector = BaseDS._all_classifier_agree(base_predictions) + ind_all_agree = np.where(all_agree_vector)[0] + # Since the predictions are always the same, get the predictions of the + # first base classifier. + if ind_all_agree.size: + predicted_labels[ind_all_agree] = base_predictions[ + ind_all_agree, 0] + # return samples with disagreement + ind_disagreement = np.where(~all_agree_vector)[0] + return ind_disagreement def _IH_prediction(self, X_DS, distances, ind_disagreement, neighbors, - predicted_proba): + predicted_proba, is_proba=False): + + # TODO: make this if outside? if self.with_IH: - # if IH is used, calculate the hardness level associated with - # each sample - hardness = hardness_region_competence(neighbors, - self.DSEL_target_, - self.safe_k) - - # Get the index associated with the easy and hard samples. - # Samples with low hardness are passed down to the knn - # classifier while samples with high hardness are passed down - # to the DS methods. So, here we split the samples that are - # passed to down to each stage by calculating their indices_. - easy_samples_mask = hardness < self.IH_rate - ind_knn_classifier = np.where(easy_samples_mask)[0] - ind_ds_classifier = np.where(~easy_samples_mask)[0] - - if ind_knn_classifier.size: - # all samples with low hardness should be classified by - # the knn method here: - # First get the class associated with each neighbor - - # Accessing which samples in the original matrix are - # associated with the low instance hardness indices_. - ind_knn_original_matrix = ind_disagreement[ - ind_knn_classifier] - - predicted_proba[ind_knn_original_matrix] = \ - self.roc_algorithm_.predict_proba( - X_DS[ind_knn_classifier]) - - # Remove from the neighbors and distance matrices the - # samples that were classified using the KNN - neighbors = np.delete(neighbors, ind_knn_classifier, - axis=0) - distances = np.delete(distances, ind_knn_classifier, - axis=0) + ind_hard, ind_easy = self._split_easy_samples(neighbors) + distances, neighbors = self._predict_easy_samples(X_DS, distances, + ind_disagreement, + ind_easy, + neighbors, + predicted_proba, + is_proba) else: # IH was not considered. So all samples with disagreement are # passed down to the DS algorithm - ind_ds_classifier = np.arange(ind_disagreement.size) - return distances, ind_ds_classifier, neighbors + ind_hard = np.arange(ind_disagreement.size) + return distances, ind_hard, neighbors + + def _predict_easy_samples(self, X_DS, distances, ind_disagreement, + ind_easy, neighbors, predictions, is_proba): + # TODO: Make this if outside? + if ind_easy.size: + # all samples with low hardness should be classified by + # the knn method here: + # First get the class associated with each neighbor + + # Accessing which samples in the original matrix are + # associated with the low instance hardness indices_. + ind_knn_original_matrix = ind_disagreement[ind_easy] + + if is_proba: + predictions[ind_knn_original_matrix] = \ + self.roc_algorithm_.predict_proba( + X_DS[ind_easy]) + else: + y_neighbors = self.DSEL_target_[neighbors[ind_easy, + :self.safe_k]] + predictions_knn, _ = mode(y_neighbors, axis=1) + predictions[ind_knn_original_matrix] = predictions_knn.reshape( + -1, ) + + # Remove from the neighbors and distance matrices the + # samples that were classified using the KNN + neighbors = np.delete(neighbors, ind_easy, + axis=0) + distances = np.delete(distances, ind_easy, + axis=0) + return distances, neighbors + + def _split_easy_samples(self, neighbors): + # if IH is used, calculate the hardness level associated with + # each sample + hardness = hardness_region_competence(neighbors, + self.DSEL_target_, + self.safe_k) + # Get the index associated with the easy and hard samples. + # Samples with low hardness are passed down to the knn + # classifier while samples with high hardness are passed down + # to the DS method. So, here we split the samples that are + # passed to down to each stage by calculating their indices. + easy_samples_mask = hardness < self.IH_rate + ind_knn_classifier = np.where(easy_samples_mask)[0] + ind_ds_classifier = np.where(~easy_samples_mask)[0] + return ind_ds_classifier, ind_knn_classifier + + def _predict_DS(self, X_DS, base_predictions, base_probabilities, + distances, ind_disagreement, ind_ds_classifier, neighbors, + predicted, is_proba=False): + + # IF the DFP pruning is considered, calculate the DFP mask + # for all samples in X + DFP_mask = self._apply_dfp(ind_ds_classifier, neighbors) + # Get the real indices_ of the samples that will be classified + # using a DS algorithm. + ind_ds_original_matrix = ind_disagreement[ind_ds_classifier] + if self.needs_proba or is_proba: + selected_probabilities = base_probabilities[ + ind_ds_original_matrix] + else: + selected_probabilities = None + + args = [X_DS[ind_ds_classifier], + base_predictions[ind_ds_original_matrix], + selected_probabilities, + neighbors, + distances, + DFP_mask] + if is_proba: + preds = self.predict_proba_with_ds(*args) + else: + preds = self.classify_with_ds(*args) + + predicted[ind_ds_original_matrix] = preds + + def _apply_dfp(self, ind_ds_classifier, neighbors): + if self.DFP: + DFP_mask = self._frienemy_pruning(neighbors) + else: + DFP_mask = np.ones( + (ind_ds_classifier.size, self.n_classifiers_)) + return DFP_mask def _frienemy_pruning(self, neighbors): """Implements the Online Pruning method (frienemy) to remove base @@ -861,29 +848,6 @@ def _validate_pool(self): raise ValueError("n_classifiers must be greater than zero, " "got {}.".format(self.n_classifiers_)) - def _check_num_features(self, X): - """ Verify if the number of features (n_features) of X is equals to - the number of features used to fit the model. Raises an error if - n_features is different. - - Parameters - ---------- - X : array of shape = [classes, n_features] - The input data. - - Raises - ------- - ValueError - If X has a different dimensionality than the training data. - """ - n_features = X.shape[1] - if self.n_features_ != n_features: - raise ValueError("Number of features of the model must " - "match the input. Model n_features_ is {} and " - "input n_features_ is {} ".format( - self.n_features_, - n_features)) - def _check_predict_proba(self): """ Checks if each base classifier in the pool implements the predict_proba method. From 79fd182e23fab4868382f55902427fc7be66a943 Mon Sep 17 00:00:00 2001 From: Rafael Menelau Oliveira e Cruz Date: Tue, 19 May 2020 00:33:56 -0400 Subject: [PATCH 08/18] updating fixtures --- deslib/tests/conftest.py | 9 +++++---- deslib/tests/test_base.py | 5 ++--- 2 files changed, 7 insertions(+), 7 deletions(-) diff --git a/deslib/tests/conftest.py b/deslib/tests/conftest.py index a9edd947..56c449e4 100644 --- a/deslib/tests/conftest.py +++ b/deslib/tests/conftest.py @@ -135,18 +135,19 @@ def create_base_classifier(return_value, return_prob=None): @pytest.fixture def create_pool_classifiers(): clf_0 = create_base_classifier(return_value=np.zeros(1), - return_prob=np.atleast_2d([0.5, 0.5])) + return_prob=np.array([[0.5, 0.5]])) clf_1 = create_base_classifier(return_value=np.ones(1), - return_prob=np.atleast_2d([1.0, 0.0])) + return_prob=np.array([[1.0, 0.0]])) clf_2 = create_base_classifier(return_value=np.zeros(1), - return_prob=np.atleast_2d([0.33, 0.67])) + return_prob=np.array([[0.33, 0.67]])) pool_classifiers = [clf_0, clf_1, clf_2] return pool_classifiers @pytest.fixture def create_pool_all_agree(): - return [create_base_classifier(return_value=np.zeros(1))] * 100 + return [create_base_classifier(return_value=np.zeros(1), + return_prob=np.array([[0.61, 0.39]]))] * 100 @pytest.fixture diff --git a/deslib/tests/test_base.py b/deslib/tests/test_base.py index e6915732..290f491c 100644 --- a/deslib/tests/test_base.py +++ b/deslib/tests/test_base.py @@ -288,15 +288,14 @@ def test_input_IH_rate(IH_rate): def test_predict_proba_all_agree(example_estimate_competence, - create_pool_classifiers): + create_pool_all_agree): X, y, _, _, _, dsel_scores = example_estimate_competence query = np.atleast_2d([1, 1]) - ds_test = BaseDS(create_pool_classifiers) + ds_test = BaseDS(create_pool_all_agree) ds_test.fit(X, y) ds_test.DSEL_scores = dsel_scores backup_all_agree = BaseDS._all_classifier_agree - BaseDS._all_classifier_agree = MagicMock(return_value=np.array([True])) proba = ds_test.predict_proba(query) BaseDS._all_classifier_agree = backup_all_agree From ac13c1a71228d8df79fbe5b51ffd3dab2dcc8012 Mon Sep 17 00:00:00 2001 From: Rafael Menelau Oliveira e Cruz Date: Tue, 19 May 2020 00:34:16 -0400 Subject: [PATCH 09/18] updating predict_proba method --- deslib/base.py | 25 +++++++++++++------------ 1 file changed, 13 insertions(+), 12 deletions(-) diff --git a/deslib/base.py b/deslib/base.py index a464c2cc..a3830d0e 100644 --- a/deslib/base.py +++ b/deslib/base.py @@ -410,15 +410,10 @@ def predict(self, X): # Check if the DS model was trained check_is_fitted(self, ["DSEL_processed_", "DSEL_data_", "DSEL_target_"]) - - # Check if X is a valid input X = check_array(X) - - n_samples = X.shape[0] - predicted_labels = np.empty(n_samples, dtype=np.intp) + predicted_labels = np.empty(X.shape[0], dtype=np.intp) base_predictions, base_probabilities = self._preprocess_predictions(X) - ind_disagreement = self._prediction_by_agreement(base_predictions, predicted_labels) if ind_disagreement.size: @@ -471,9 +466,10 @@ def predict_proba(self, X): base_probabilities = self._predict_proba_base(X) base_predictions = base_probabilities.argmax(axis=2) - n_samples = X.shape[0] - predicted_proba = np.zeros((n_samples, self.n_classes_)) - + predicted_proba = np.zeros((X.shape[0], self.n_classes_)) + ind_disagreement = self._prediction_by_agreement(base_predictions, + predicted_proba, + base_probabilities) all_agree_vector = BaseDS._all_classifier_agree(base_predictions) ind_all_agree = np.where(all_agree_vector)[0] @@ -524,14 +520,19 @@ def _preprocess_predictions(self, X, req_proba=False): base_predictions = self._predict_base(X) return base_predictions, base_probabilities - def _prediction_by_agreement(self, base_predictions, predicted_labels): + def _prediction_by_agreement(self, base_predictions, predictions, + base_probabilities=None): all_agree_vector = BaseDS._all_classifier_agree(base_predictions) ind_all_agree = np.where(all_agree_vector)[0] # Since the predictions are always the same, get the predictions of the # first base classifier. if ind_all_agree.size: - predicted_labels[ind_all_agree] = base_predictions[ - ind_all_agree, 0] + if base_probabilities is not None: + predictions[ind_all_agree] = base_probabilities[ + ind_all_agree].mean(axis=1) + else: + predictions[ind_all_agree] = base_predictions[ + ind_all_agree, 0] # return samples with disagreement ind_disagreement = np.where(~all_agree_vector)[0] return ind_disagreement From 95d5b551d84fbc6025038a35937eeed39464da8c Mon Sep 17 00:00:00 2001 From: Rafael Menelau Oliveira e Cruz Date: Tue, 19 May 2020 00:52:06 -0400 Subject: [PATCH 10/18] removing redundancy --- deslib/base.py | 15 --------------- deslib/dcs/a_posteriori.py | 2 +- deslib/dcs/a_priori.py | 2 +- deslib/des/knop.py | 2 +- deslib/des/meta_des.py | 2 +- deslib/des/probabilistic/base.py | 2 +- deslib/tests/test_base.py | 2 +- 7 files changed, 6 insertions(+), 21 deletions(-) diff --git a/deslib/base.py b/deslib/base.py index a3830d0e..77477916 100644 --- a/deslib/base.py +++ b/deslib/base.py @@ -767,21 +767,6 @@ def _predict_proba_base(self, X): probabilities[:, index] = clf.predict_proba(X) return probabilities - def _preprocess_dsel_scores(self): - """Compute the output profiles of the dynamic selection dataset (DSEL) - Each position of the output profiles vector is the score obtained by a - base classifier :math:`c_{i}` - for the classes of the input sample. - - Returns - ------- - scores : array of shape = [n_samples, n_classifiers, n_classes] - Scores (probabilities) for each class obtained by each base - classifier in the generated_pool - for each sample in X. - """ - return self._predict_proba_base(self.DSEL_data_) - @staticmethod def _all_classifier_agree(predictions): """Check whether there is a difference in opinion among the classifiers diff --git a/deslib/dcs/a_posteriori.py b/deslib/dcs/a_posteriori.py index 7c57e1bd..311162ad 100644 --- a/deslib/dcs/a_posteriori.py +++ b/deslib/dcs/a_posteriori.py @@ -146,7 +146,7 @@ class labels of each example in X. super(APosteriori, self).fit(X, y) self._check_predict_proba() - self.dsel_scores_ = self._preprocess_dsel_scores() + self.dsel_scores_ = self._predict_proba_base(self.DSEL_data_) return self def estimate_competence(self, query, neighbors, distances, diff --git a/deslib/dcs/a_priori.py b/deslib/dcs/a_priori.py index f563fc0c..a6cb6005 100644 --- a/deslib/dcs/a_priori.py +++ b/deslib/dcs/a_priori.py @@ -139,7 +139,7 @@ class labels of each example in X. super(APriori, self).fit(X, y) self._check_predict_proba() - self.dsel_scores_ = self._preprocess_dsel_scores() + self.dsel_scores_ = self._predict_proba_base(self.DSEL_data_) return self def estimate_competence(self, query, neighbors, distances, diff --git a/deslib/des/knop.py b/deslib/des/knop.py index 12200baa..a15a2527 100644 --- a/deslib/des/knop.py +++ b/deslib/des/knop.py @@ -142,7 +142,7 @@ class labels of each example in X. raise ValueError( "Error. KNOP does not accept one class datasets!") self._check_predict_proba() - self.dsel_scores_ = self._preprocess_dsel_scores() + self.dsel_scores_ = self._predict_proba_base(self.DSEL_data_) # Reshape DSEL_scores as a 2-D array for nearest neighbor calculations dsel_output_profiles = self.dsel_scores_.reshape(self.n_samples_, self.n_classifiers_ * diff --git a/deslib/des/meta_des.py b/deslib/des/meta_des.py index ca8b3439..bb5b8cad 100644 --- a/deslib/des/meta_des.py +++ b/deslib/des/meta_des.py @@ -192,7 +192,7 @@ class labels of each example in X. # Check if the base classifier is able to estimate probabilities self._check_predict_proba() - self.dsel_scores_ = self._preprocess_dsel_scores() + self.dsel_scores_ = self._predict_proba_base(self.DSEL_data_) # Reshape DSEL_scores as a 2-D array for nearest neighbor calculations dsel_output_profiles = self.dsel_scores_.reshape(self.n_samples_, diff --git a/deslib/des/probabilistic/base.py b/deslib/des/probabilistic/base.py index cf486688..0c109a3e 100644 --- a/deslib/des/probabilistic/base.py +++ b/deslib/des/probabilistic/base.py @@ -72,7 +72,7 @@ class labels of each example in X. self._check_predict_proba() - self.dsel_scores_ = self._preprocess_dsel_scores() + self.dsel_scores_ = self._predict_proba_base(self.DSEL_data_) # Pre process the source of competence for the entire DSEL, # making the method faster during generalization. diff --git a/deslib/tests/test_base.py b/deslib/tests/test_base.py index 290f491c..91154db1 100644 --- a/deslib/tests/test_base.py +++ b/deslib/tests/test_base.py @@ -246,7 +246,7 @@ def test_preprocess_dsel_scores(create_X_y, create_pool_classifiers): X, y = create_X_y ds_test = BaseDS(create_pool_classifiers) ds_test.fit(X, y) - dsel_scores = ds_test._preprocess_dsel_scores() + dsel_scores = ds_test._predict_proba_base(X) expected = np.array([[0.5, 0.5], [1.0, 0.0], [0.33, 0.67]]) expected = np.tile(expected, (15, 1, 1)) assert np.array_equal(dsel_scores, expected) From eeb2acc31ebdceace0bfb8681ba86f255032c77f Mon Sep 17 00:00:00 2001 From: Rafael Menelau Oliveira e Cruz Date: Tue, 19 May 2020 01:13:20 -0400 Subject: [PATCH 11/18] removing redundant code pieces --- deslib/base.py | 44 ++++---------------------------------------- 1 file changed, 4 insertions(+), 40 deletions(-) diff --git a/deslib/base.py b/deslib/base.py index 77477916..260b5030 100644 --- a/deslib/base.py +++ b/deslib/base.py @@ -243,7 +243,7 @@ class labels of each example in X. # validate the value of k self._validate_k() self._set_region_of_competence_algorithm() - self._fit_region_competence(X_dsel, y_dsel) + self.roc_algorithm_.fit(X_dsel, y_dsel) # validate the IH if self.with_IH: @@ -301,24 +301,10 @@ def _encode_base_labels(self, y): else: return self.enc_.transform(y) - def _fit_region_competence(self, X, y): - """Fit the k-NN classifier inside the dynamic selection method. - - Parameters - ---------- - X : array of shape = [n_samples, n_features] - The Input data. - - y : array of shape = [n_samples] - class labels of each sample in X. - - """ - self.roc_algorithm_.fit(X, y) - def _set_dsel(self, X, y): """Pre-Process the input X and y data into the dynamic selection dataset(DSEL) and get information about the structure of the data - (e.g., n_classes, N_samples, classes) + (e.g., n_classes, n_samples, classes) Parameters ---------- @@ -333,7 +319,8 @@ class labels of each sample in X. self.n_classes_ = self.classes_.size self.n_features_ = X.shape[1] self.n_samples_ = self.DSEL_target_.size - self.DSEL_processed_, self.BKS_DSEL_ = self._preprocess_dsel() + self.BKS_DSEL_ = self._predict_base(self.DSEL_data_) + self.DSEL_processed_ = self.BKS_DSEL_ == y[:, np.newaxis] def _set_region_of_competence_algorithm(self): @@ -701,27 +688,6 @@ def _frienemy_pruning(self, neighbors): return mask - def _preprocess_dsel(self): - """Compute the prediction of each base classifier for - all samples in DSEL. Used to speed-up the test phase, by - not requiring to re-classify training samples during test. - - Returns - ------- - DSEL_processed_ : array of shape = [n_samples, n_classifiers]. - Each element indicates whether the base classifier - predicted the correct label for the corresponding - sample (True), otherwise (False). - - BKS_DSEL_ : array of shape = [n_samples, n_classifiers] - Predicted labels of each base classifier for all samples - in DSEL. - """ - BKS_dsel = self._predict_base(self.DSEL_data_) - processed_dsel = BKS_dsel == self.DSEL_target_[:, np.newaxis] - - return processed_dsel, BKS_dsel - def _predict_base(self, X): """ Get the predictions of each base classifier in the pool for all samples in X. @@ -810,11 +776,9 @@ def _validate_parameters(self): "parameter safe_k must be equal or less than parameter k." "input safe_k is {} and k is {}".format(self.k, self.safe_k)) - if not isinstance(self.IH_rate, float): raise TypeError( "parameter IH_rate should be a float between [0.0, 0.5]") - if self.IH_rate < 0 or self.IH_rate > 0.5: raise ValueError("Parameter IH_rate should be between [0.0, 0.5]." "IH_rate = {}".format(self.IH_rate)) From c08482b4fe785654fe44c95fe552ea94a354aacf Mon Sep 17 00:00:00 2001 From: Rafael Menelau Oliveira e Cruz Date: Wed, 20 May 2020 02:37:50 -0400 Subject: [PATCH 12/18] Merge branch 'refactor_predict' of https://github.com/scikit-learn-contrib/DESlib into refactor_predict # Conflicts: # deslib/base.py --- deslib/base.py | 17 ++++++++--------- 1 file changed, 8 insertions(+), 9 deletions(-) diff --git a/deslib/base.py b/deslib/base.py index 260b5030..261153c1 100644 --- a/deslib/base.py +++ b/deslib/base.py @@ -468,16 +468,15 @@ def predict_proba(self, X): if ind_disagreement.size: X_DS = X[ind_disagreement, :] - distances, neighbors = self._get_region_competence(X_DS) - - distances, ind_ds_classifier, neighbors = self._IH_prediction(X_DS, - distances, - ind_disagreement, - neighbors, - predicted_proba, - True) - + distances, ind_ds_classifier, neighbors = self._IH_prediction( + X_DS, + distances, + ind_disagreement, + neighbors, + predicted_proba, + True + ) if ind_ds_classifier.size: # Check if the dynamic frienemy pruning should be used DFP_mask = self._apply_dfp(ind_ds_classifier, neighbors) From 652c0d0199764cb85896537139cdd9bdd025c184 Mon Sep 17 00:00:00 2001 From: Rafael Menelau Oliveira e Cruz Date: Fri, 29 May 2020 14:53:08 -0400 Subject: [PATCH 13/18] fixing dfp after merge --- deslib/base.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/deslib/base.py b/deslib/base.py index 763b9e39..d2ad35cf 100644 --- a/deslib/base.py +++ b/deslib/base.py @@ -620,7 +620,9 @@ def _predict_DS(self, X_DS, base_predictions, base_probabilities, def _apply_dfp(self, ind_ds_classifier, neighbors): if self.DFP: - DFP_mask = self._frienemy_pruning(neighbors) + DFP_mask = frienemy_pruning_preprocessed(neighbors, + self.DSEL_target_, + self.DSEL_processed_) else: DFP_mask = np.ones( (ind_ds_classifier.size, self.n_classifiers_)) From c7b84aeb206f9cdec99fd4401a5296036e9932f3 Mon Sep 17 00:00:00 2001 From: Rafael Menelau Oliveira e Cruz Date: Tue, 2 Jun 2020 22:56:10 -0400 Subject: [PATCH 14/18] standardizing method to get region of comeptence for desclustering --- deslib/des/des_clustering.py | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/deslib/des/des_clustering.py b/deslib/des/des_clustering.py index 8c30f628..42d14264 100644 --- a/deslib/des/des_clustering.py +++ b/deslib/des/des_clustering.py @@ -176,6 +176,11 @@ class labels of each example in X. self._preprocess_clusters() return self + def _get_region_competence(self, query, k=None): + distances = self.clustering_.transform(query) + region = self.clustering_.predict(query) + return distances, region + def _preprocess_clusters(self): """Preprocess the competence as well as the average diversity of each base classifier for each specific cluster. From 40f0b4634b47976fd66200dae37f049d7db44ea1 Mon Sep 17 00:00:00 2001 From: Rafael Menelau Oliveira e Cruz Date: Wed, 3 Jun 2020 01:42:11 -0400 Subject: [PATCH 15/18] initial bpso implementation --- deslib/util/bpso.py | 335 ++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 335 insertions(+) create mode 100644 deslib/util/bpso.py diff --git a/deslib/util/bpso.py b/deslib/util/bpso.py new file mode 100644 index 00000000..c543f7e3 --- /dev/null +++ b/deslib/util/bpso.py @@ -0,0 +1,335 @@ +# coding=utf-8 + +# Author: Rafael Menelau Oliveira e Cruz +# +# License: BSD 3 clause + +import copy +from typing import List +from typing import Optional +from typing import Union + +import numpy as np + +# Limits +X_MAX = 10 +X_MIN = -X_MAX +MI = 100 +POS_MAX = 100 +POS_MIN = -100 + +# Auxiliary variables +z = 0 + + +def s_shaped_transfer(X): + result = 1.0 / (1.0 + np.power(np.e, -2.0 * X)) + result[np.isnan(result)] = 1 + return result + + +def v_shaped_transfer(X): + return np.abs((2.0 / np.pi) * np.arctan((np.pi / 2.0) * X)) + + +class Particle: + """ + Class representing a particle in a swarm. + + Parameters + ---------- + inertia : float + Initial inertia of the swarm + + c1 : float + Self coefficient + + c2 : float + Group coefficient + + Attributes + ---------- + n_dimensions : int + Particle dimensionality + pbest : array-like + Particle best position + best_fitness : float + Best fitness values obtained by the particle + fitness : float + Current fitness value from the particle + velocity : + Velocity vector. Each element corresponds to the velocity in the + corresponding dimension. + phi : float + Coefficient + history : List[Float] + Fitness evolution of the given particle. + """ + + def __init__(self, + position: Union[List[float], np.ndarray], + inertia: float, + c1: float, + c2: float, + ): + self.position = np.asarray(position) + self.c1 = c1 + self.c2 = c2 + self.inertia = inertia + + # class variables + self.n_dimensions = position.size + self.best_fitness = None + self.fitness = None + self.phi = 0 + self.pbest = np.copy(self.position) + self.velocity = np.zeros(self.n_dimensions) + self.history = [] + + +class BPSO: + """ + Bibary Particle Swarm Optimization (BPSO) with self updating mechanism. + Conversion from continuous to binary representation is conducted using + either the V-shaped and S-shaped transfer functions + + Parameters + ---------- + max_iter : int, default 100 + Number of iterations in the optimization. + n_particles : int, default 20 + Number of particles used in the optimization. + init_inertia : float + Initial inertia of the swarm + final_inertia : float + Final inertia of the swarm + c1 : float + Self coefficient + c2 : float + Group coefficient + + Attributes + ---------- + n_particles_ : int + Number of particles in the swarm + particles_ : List[Particle] + List of particles in the swarm. + g_best_ : Particle + Particle containing the best fitness in the swarm history + + References + ---------- + Kennedy, James, and Russell Eberhart. "Particle swarm optimization." + In Proceedings of IJCNN'95-International Conference on Neural Networks, + vol. 4, pp. 1942-1948. IEEE, 1995. + + Mirjalili, Seyedali, and Andrew Lewis. "S-shaped versus V-shaped transfer + functions for binary particle swarm optimization." Swarm and Evolutionary + Computation 9 (2013): 1-14. + + Zhang, Ying Chao, Xiong Xiong, and QiDong Zhang. "An improved self-adaptive + PSO algorithm with detection function for multimodal function optimization + problems." Mathematical Problems in Engineering 2013 (2013). + """ + def __init__(self, + max_iter: int, + n_particles: int, + n_dim: int, + init_inertia: float, + final_inertia: float, + c1: float, + c2: float, + transfer_function: str = 'v-shaped', + max_iter_no_change=None, + random_state: Optional[int] = None, + ): + self.max_iter = max_iter + self.n_particles = n_particles + self.n_dim = n_dim + self.init_inertia = init_inertia + self.final_inertia = final_inertia + self.initial_c1 = c1 + self.initial_c2 = c2 + self.transfer_function = transfer_function + self.verbose = verbose + self.max_iter_no_change = max_iter_no_change + self.random_state = random_state + + def _create_swarm(self): + + self.particles_ = [] + self.gbest_ = None + + positions = np.random.uniform(0, 1, (self.n_particles, self.n_dim)) + positions = (positions > 0.5).astype(int) + for idx in range(self.n_particles): + particle = Particle(positions[idx], + inertia=self.init_inertia, + c1=self.initial_c1, + c2=self.initial_c2) + + self.particles_.append(particle) + + def _update_velocity(self): + """ + Update the velocity of each particle. + """ + for particle in self.particles_: + for dim in range(len(particle.position)): + tmp_c1 = particle.pbest[dim] - particle.position[dim] + tmp_c2 = self.gbest_.position[dim] - particle.position[dim] + + inertia = particle.inertia * particle.velocity[dim] + cognitive = ( + (particle.c1 * np.random.rand()) * tmp_c1) + social = (particle.c2 * np.random.rand()) * tmp_c2 + + particle.velocity[dim] = inertia + cognitive + social + + # Limit velocity + if particle.velocity[dim] >= X_MAX: + particle.velocity[dim] = X_MAX + elif particle.velocity[dim] <= X_MIN: + particle.velocity[dim] = X_MIN + + def _update_particles(self): + + for particle in self.particles_: + for dim in range(len(particle.position)): + particle.position[dim] = particle.position[dim] + \ + particle.velocity[dim] + if particle.position[dim] >= POS_MAX: + particle.position[dim] = POS_MAX + elif particle.position[dim] <= POS_MIN: + particle.position[dim] = POS_MIN + + def _update_binary_particles(self): + for particle in self.particles_: + velocity = self._transfer_function(particle.velocity) + pos = (np.random.rand(self.n_dim) < velocity).astype(np.int) + particle.position[pos == 1] = particle.position[pos == 1] ^ 1 + + def _transfer_function(self, velocity): + if self.transfer_function == 's-shape': + velocity = s_shaped_transfer(velocity) + else: + velocity = v_shaped_transfer(velocity) + return velocity + + def _self_update(self): + # Compute phi for each particle + for particle in self.particles_: + tmp1 = 0 + tmp2 = 0 + for j in range(len(particle.position)): + tmp1 = tmp1 + self.gbest_.position[j] - particle.position[ + j] + tmp2 = tmp2 + particle.pbest[j] - particle.position[j] + if tmp1 == 0: + tmp1 = 1 + if tmp2 == 0: + tmp2 = 1 + particle.phi = abs(tmp1 / tmp2) + ln = np.log(particle.phi) + tmp = particle.phi * (self.iter_ - ((1 + ln) * self.max_iter) / MI) + particle.inertia = ((self.init_inertia - self.final_inertia) / ( + 1 + np.exp(tmp))) + self.final_inertia + particle.c1 = self.initial_c1 * (particle.phi ** (-1)) + particle.c2 = self.initial_c2 * particle.phi + + def _update_pbest(self): + """ + Method used to update the position of each particle. + """ + for particle in self.particles_: + if (particle.best_fitness is None or + particle.best_fitness >= particle.fitness): + particle.pbest = particle.position + particle.best_fitness = particle.fitness + + def _update_gbest(self): + """ + Method used to update the best particle in the swarm. + """ + for particle in self.particles_: + if self.gbest_ is None or particle.fitness < self.gbest_.fitness: + self.gbest_ = copy.deepcopy(particle) + self._n_iter_no_change = 0 + + def optimize(self): + """ + Run the PSO algorithm. + + Return + ------ + gbest_ : Particle + Particle with the best fitness value. + + """ + self._create_swarm() + self._n_iter_no_change = 0 + self.iter_ = 0 + + while not self._stop(): + self.iter_ = self.iter_ + 1 + self._n_iter_no_change += 1 + self._compute_fitness() + self._update_gbest() + self._update_pbest() + self._update_velocity() + self._self_update() + self._update_binary_particles() + + return self.gbest_ + + def _stop(self): + """ + Function to check if the optimization should stop. + """ + # check early stopping + if (self.max_iter_no_change is not None + and self._n_iter_no_change >= self.max_iter_no_change): + return True + # check reached maximum number of iteration + if self.iter_ >= self.max_iter: + return True + + @staticmethod + def fitness_function(position): + """ + Compute fitness + + Parameters + ---------- + position : Numpy array + A particle in the swarm + + Returns + ------- + fitness : float + Fitness of the particle. + + """ + return np.sum(position == 1) + + def _compute_fitness(self): + """ + Compute the fitness of each particle + """ + for particle in self.particles_: + particle.fitness = self.fitness_function( + particle.position) + + @staticmethod + def fitness(particle, X, y, metric='euclidean', gamma=0.5): + """X must be normalized a priori""" + X_p = X[:, particle] + score = BPSO.compute_knn_score(X_p, y, metric) + distance = BPSO.computer_inner_outer_distances(X_p, y, metric) + fitness = ((gamma * score) + ((1 - gamma) * distance)) + return fitness + + +def main(): + swarm = BPSO(1000, 10, 200, 1, 0.3, c1=2, c2=2, max_iter_no_change=50,) + swarm.optimize() \ No newline at end of file From 9c73fd676545d0959125548d9f523293ff9c997b Mon Sep 17 00:00:00 2001 From: Rafael Menelau Oliveira e Cruz Date: Thu, 4 Jun 2020 21:25:40 -0400 Subject: [PATCH 16/18] fixing dfp after merge --- deslib/util/bpso.py | 99 +++++++++++++-------------------------------- 1 file changed, 28 insertions(+), 71 deletions(-) diff --git a/deslib/util/bpso.py b/deslib/util/bpso.py index c543f7e3..c1fc8c66 100644 --- a/deslib/util/bpso.py +++ b/deslib/util/bpso.py @@ -6,8 +6,6 @@ import copy from typing import List -from typing import Optional -from typing import Union import numpy as np @@ -66,12 +64,7 @@ class Particle: Fitness evolution of the given particle. """ - def __init__(self, - position: Union[List[float], np.ndarray], - inertia: float, - c1: float, - c2: float, - ): + def __init__(self, position, inertia, c1, c2): self.position = np.asarray(position) self.c1 = c1 self.c2 = c2 @@ -132,16 +125,16 @@ class BPSO: problems." Mathematical Problems in Engineering 2013 (2013). """ def __init__(self, - max_iter: int, - n_particles: int, - n_dim: int, - init_inertia: float, - final_inertia: float, - c1: float, - c2: float, - transfer_function: str = 'v-shaped', + max_iter, + n_particles, + n_dim, + init_inertia, + final_inertia, + c1, + c2, + transfer_function='v-shaped', max_iter_no_change=None, - random_state: Optional[int] = None, + random_state=None, ): self.max_iter = max_iter self.n_particles = n_particles @@ -156,10 +149,8 @@ def __init__(self, self.random_state = random_state def _create_swarm(self): - self.particles_ = [] self.gbest_ = None - positions = np.random.uniform(0, 1, (self.n_particles, self.n_dim)) positions = (positions > 0.5).astype(int) for idx in range(self.n_particles): @@ -254,82 +245,48 @@ def _update_gbest(self): for particle in self.particles_: if self.gbest_ is None or particle.fitness < self.gbest_.fitness: self.gbest_ = copy.deepcopy(particle) - self._n_iter_no_change = 0 + self.n_iter_no_change_ = 0 - def optimize(self): + def optimize(self, fitness_function): """ Run the PSO algorithm. + Parameters + ---------- + fitness_function : function + Function used to estimate the fitness of a binary particle. + Return ------ gbest_ : Particle - Particle with the best fitness value. - + Global best solution from the whole swarm. """ self._create_swarm() - self._n_iter_no_change = 0 + self.n_iter_no_change_ = 0 self.iter_ = 0 while not self._stop(): - self.iter_ = self.iter_ + 1 - self._n_iter_no_change += 1 - self._compute_fitness() + # compute fitness of each particle + for particle in self.particles_: + particle.fitness = fitness_function(particle.position) + self._update_gbest() self._update_pbest() self._update_velocity() self._self_update() self._update_binary_particles() - + self.iter_ = self.iter_ + 1 + self.n_iter_no_change_ += 1 return self.gbest_ def _stop(self): """ Function to check if the optimization should stop. """ - # check early stopping + # Early stopping if (self.max_iter_no_change is not None - and self._n_iter_no_change >= self.max_iter_no_change): + and self.n_iter_no_change_ >= self.max_iter_no_change): return True - # check reached maximum number of iteration + # Reached maximum number of iteration if self.iter_ >= self.max_iter: return True - - @staticmethod - def fitness_function(position): - """ - Compute fitness - - Parameters - ---------- - position : Numpy array - A particle in the swarm - - Returns - ------- - fitness : float - Fitness of the particle. - - """ - return np.sum(position == 1) - - def _compute_fitness(self): - """ - Compute the fitness of each particle - """ - for particle in self.particles_: - particle.fitness = self.fitness_function( - particle.position) - - @staticmethod - def fitness(particle, X, y, metric='euclidean', gamma=0.5): - """X must be normalized a priori""" - X_p = X[:, particle] - score = BPSO.compute_knn_score(X_p, y, metric) - distance = BPSO.computer_inner_outer_distances(X_p, y, metric) - fitness = ((gamma * score) + ((1 - gamma) * distance)) - return fitness - - -def main(): - swarm = BPSO(1000, 10, 200, 1, 0.3, c1=2, c2=2, max_iter_no_change=50,) - swarm.optimize() \ No newline at end of file From ffbc30c43b18965f08619e207fee061c798e285d Mon Sep 17 00:00:00 2001 From: Rafael Menelau Oliveira e Cruz Date: Wed, 10 Jun 2020 18:47:06 -0400 Subject: [PATCH 17/18] vectorizing code --- deslib/util/bpso.py | 79 ++++++++++++++++++++++++++++----------------- 1 file changed, 49 insertions(+), 30 deletions(-) diff --git a/deslib/util/bpso.py b/deslib/util/bpso.py index c1fc8c66..49ab1af9 100644 --- a/deslib/util/bpso.py +++ b/deslib/util/bpso.py @@ -82,7 +82,7 @@ def __init__(self, position, inertia, c1, c2): class BPSO: """ - Bibary Particle Swarm Optimization (BPSO) with self updating mechanism. + Binary Particle Swarm Optimization (BPSO) with self updating mechanism. Conversion from continuous to binary representation is conducted using either the V-shaped and S-shaped transfer functions @@ -144,7 +144,6 @@ def __init__(self, self.initial_c1 = c1 self.initial_c2 = c2 self.transfer_function = transfer_function - self.verbose = verbose self.max_iter_no_change = max_iter_no_change self.random_state = random_state @@ -162,37 +161,44 @@ def _create_swarm(self): self.particles_.append(particle) def _update_velocity(self): - """ - Update the velocity of each particle. - """ - for particle in self.particles_: - for dim in range(len(particle.position)): - tmp_c1 = particle.pbest[dim] - particle.position[dim] - tmp_c2 = self.gbest_.position[dim] - particle.position[dim] - - inertia = particle.inertia * particle.velocity[dim] - cognitive = ( - (particle.c1 * np.random.rand()) * tmp_c1) - social = (particle.c2 * np.random.rand()) * tmp_c2 - - particle.velocity[dim] = inertia + cognitive + social - - # Limit velocity - if particle.velocity[dim] >= X_MAX: - particle.velocity[dim] = X_MAX - elif particle.velocity[dim] <= X_MIN: - particle.velocity[dim] = X_MIN + for p in self.particles_: + tmp_c1 = p.pbest - p.position + tmp_c2 = self.gbest_.position - p.position + inertia = p.inertia * p.velocity + cognitive = p.c1 * np.random.rand(p.n_dimensions) * tmp_c1 + social = p.c2 * np.random.rand(p.n_dimensions) * tmp_c2 + p.velocity = inertia + cognitive + social + p.velocity = p.velocity.clip(X_MIN, X_MAX) + + # for dim in range(len(particle.position)): + # tmp_c1 = particle.pbest[dim] - particle.position[dim] + # tmp_c2 = self.gbest_.position[dim] - particle.position[dim] + # + # inertia = particle.inertia * particle.velocity[dim] + # cognitive = ( + # (particle.c1 * np.random.rand()) * tmp_c1) + # social = (particle.c2 * np.random.rand()) * tmp_c2 + # + # particle.velocity[dim] = inertia + cognitive + social + # + # # Limit velocity + # if particle.velocity[dim] >= X_MAX: + # particle.velocity[dim] = X_MAX + # elif particle.velocity[dim] <= X_MIN: + # particle.velocity[dim] = X_MIN def _update_particles(self): - for particle in self.particles_: - for dim in range(len(particle.position)): - particle.position[dim] = particle.position[dim] + \ - particle.velocity[dim] - if particle.position[dim] >= POS_MAX: - particle.position[dim] = POS_MAX - elif particle.position[dim] <= POS_MIN: - particle.position[dim] = POS_MIN + particle.position += particle.velocity + particle.position = particle.position.clip(POS_MAX, POS_MIN) + + # for dim in range(len(particle.position)): + # particle.position[dim] = particle.position[dim] + \ + # particle.velocity[dim] + # if particle.position[dim] >= POS_MAX: + # particle.position[dim] = POS_MAX + # elif particle.position[dim] <= POS_MIN: + # particle.position[dim] = POS_MIN def _update_binary_particles(self): for particle in self.particles_: @@ -290,3 +296,16 @@ def _stop(self): # Reached maximum number of iteration if self.iter_ >= self.max_iter: return True + + +def main(): + from sklearn.datasets import make_classification + from sklearn.neighbors import KNeighborsClassifier + + def fitness(X_train, X_val, y_train, y_val, p): + knn = KNN_classifier + + X, y = make_classification(n_samples=2000, n_features=100, n_redundant=50, + n_informative=20) + swarm = BPSO(1000, 10, 200, 1, 0.3, c1=2, c2=2, max_iter_no_change=50,) + swarm.optimize() \ No newline at end of file From dd7e2979ff61353b3e016057bd5ac55747d48bf2 Mon Sep 17 00:00:00 2001 From: Rafael Menelau Oliveira e Cruz Date: Wed, 10 Jun 2020 18:54:41 -0400 Subject: [PATCH 18/18] adding BPSO to init and organizing documentation --- deslib/util/__init__.py | 16 +++++++++++++--- 1 file changed, 13 insertions(+), 3 deletions(-) diff --git a/deslib/util/__init__.py b/deslib/util/__init__.py index 43f0e319..dd73f4bf 100644 --- a/deslib/util/__init__.py +++ b/deslib/util/__init__.py @@ -23,12 +23,22 @@ deslib.util.knne - Implementation of the K-Nearest Neighbors Equality technique + +deslib.util.aggregation.dfp - General Dynamic Frienemy Pruning (DFP) +implementation. This implementation allows using the DFP method to any ensemble +model, not only dynamic ones. + +deslib.util.bpso - V and S shaped Binary Particle Swarm Optimization for + used feature selection. """ from .aggregation import * +from .bpso import BPSO +from .datasets import * +from .dfp import frienemy_pruning +from .dfp import frienemy_pruning_preprocessed from .diversity import * +from .faiss_knn_wrapper import FaissKNNClassifier from .instance_hardness import * -from .prob_functions import * -from .datasets import * from .knne import KNNE -from .faiss_knn_wrapper import FaissKNNClassifier +from .prob_functions import *