Merge pull request oegedijk#92 from oegedijk/dev

v0.3.2.2
tunayokumus · Mar 3, 2021 · dfc4b5a · dfc4b5a
2 parents 4f55d9d + 31addb9
commit dfc4b5a
Show file tree

Hide file tree

Showing 19 changed files with 773 additions and 434 deletions.
diff --git a/README.md b/README.md
@@ -498,7 +498,10 @@ In order to reduce the memory footprint there are a number of things you can do:
     them up elsewhere, you can also replace the index dropdowns with a simple free
     text field with `index_dropdown=False`. Only valid indexes (i.e. in the 
     `get_index_list()` list) get propagated
-    to other components by default, but this can be overriden with `index_check=False`.
+    to other components by default, but this can be overriden with `index_check=False`. 
+    Instead of an ``index_list_func`` you can also set an 
+    ``explainer.set_index_check_func(func)`` which should return a bool whether
+    the ``index`` exists or not. 
 
     Important: these function can be called multiple times by multiple independent
     components, so probably best to implement some kind of caching functionality.

diff --git a/RELEASE_NOTES.md b/RELEASE_NOTES.md
@@ -1,5 +1,28 @@
 # Release Notes
 
+
+## Version 0.3.2.2:
+
+`index_dropdown=False` now works for indexes not listed in `set_index_list_func()`
+    as long as it can be found by `set_index_exists_func`
+### New Features
+- adds `set_index_exists_func` to add function that checks for index existing
+    besides those listed by `set_index_list_func()`
+
+### Bug Fixes
+- bug fix to make `shap.KernelExplainer` (used with explainer parameter`shap='kernel'`) 
+    work with `RegressionExplainer`
+- bug fix when no explicit `labels` are passed with index selector
+- component only update if `explainer.index_exists()`: no `IndexNotFoundErrors` anymore.
+- fixed title for regression index selector labeled 'Custom' bug
+- `get_y()` now returns `.item()` when necessary
+- removed ticks from confusion matrix plot when no `labels` param passed 
+    (this bug got reintroduced in recent plotly release)
+
+### Improvements
+- new helper function `get_shap_row(index)` to calculate or look up a single 
+    row of shap values.
+
 ## Version 0.3.2:
 
 Highlights:

diff --git a/TODO.md b/TODO.md
@@ -5,14 +5,16 @@
 ## Bugs:
 
 ## Plots:
+- add SHAP decision plots:
+    https://towardsdatascience.com/introducing-shap-decision-plots-52ed3b4a1cba
+- add winsor to shap dependence
 - make plot background transparent?
 - Only use ScatterGl above a certain cutoff
 - seperate standard shap plots for shap_interaction plots 
     - using some kind of inheritance?
 - change lines and annotation to this:
     - https://community.plotly.com/t/announcing-plotly-py-4-12-horizontal-and-vertical-lines-and-rectangles/46783
-- add some of these:
-    https://towardsdatascience.com/introducing-shap-decision-plots-52ed3b4a1cba
+
 
 ### Classifier plots:
 - pdp: add multiclass option
@@ -57,6 +59,7 @@
 
 
 ### Components
+- add winsor to shap dependence
 - add predictions list to whatif composite:
     - https://github.com/oegedijk/explainerdashboard/issues/85
 - add circular callbacks to cutoff - cutoff percentile

diff --git a/docs/source/buildcustom.rst b/docs/source/buildcustom.rst
diff --git a/docs/source/custom.rst b/docs/source/custom.rst
diff --git a/docs/source/deployment.rst b/docs/source/deployment.rst
@@ -226,6 +226,12 @@ footprint there are a number of things you can do:
       that index.
     - with ``explainer.set_index_list_func()`` you can set a function 
       that returns a list of available indexes that can be queried.
+
+    If the number of indexes is too long to fit in a dropdown you can pass 
+    ``index_dropdown=False`` which turns the dropdowns into free text fields.
+    Instead of an ``index_list_func`` you can also set an 
+    ``explainer.set_index_check_func(func)`` which should return a bool whether
+    the ``index`` exists or not. 
 
     Important: these function can be called multiple times by multiple independent
     components, so probably best to implement some kind of caching functionality.

diff --git a/docs/source/index.rst b/docs/source/index.rst
@@ -152,6 +152,7 @@ in the github repo.
    tabs
    components
    custom
+   buildcustom
    deployment
    license
    help

diff --git a/docs/source/screenshots/custom.png b/docs/source/screenshots/custom.png
diff --git a/explainerdashboard/dashboard_components/classifier_components.py b/explainerdashboard/dashboard_components/classifier_components.py
@@ -350,21 +350,22 @@ def component_callbacks(self, app):
                 [Input('clas-prediction-index-'+self.name, 'value'),
                 Input('pos-label-'+self.name, 'value')])
             def update_output_div(index, pos_label):
-                if index is not None:
-                    fig = self.explainer.plot_prediction_result(index, showlegend=False)
+                if index is None or not self.explainer.index_exists(index):
+                    raise PreventUpdate
+                fig = self.explainer.plot_prediction_result(index, showlegend=False)
 
-                    preds_df = self.explainer.prediction_result_df(index, round=self.round, logodds=True)                
-                    preds_df.probability = np.round(100*preds_df.probability.values, self.round).astype(str)
-                    preds_df.probability = preds_df.probability + ' %'
-                    preds_df.logodds = np.round(preds_df.logodds.values, self.round).astype(str)
+                preds_df = self.explainer.prediction_result_df(index, round=self.round, logodds=True)                
+                preds_df.probability = np.round(100*preds_df.probability.values, self.round).astype(str)
+                preds_df.probability = preds_df.probability + ' %'
+                preds_df.logodds = np.round(preds_df.logodds.values, self.round).astype(str)
+
+                if self.explainer.model_output!='logodds':
+                    preds_df = preds_df[['label', 'probability']]
 
-                    if self.explainer.model_output!='logodds':
-                        preds_df = preds_df[['label', 'probability']]
-
-                    preds_table = dbc.Table.from_dataframe(preds_df, 
-                                        striped=False, bordered=False, hover=False)  
-                    return preds_table, fig
-                raise PreventUpdate
+                preds_table = dbc.Table.from_dataframe(preds_df, 
+                                    striped=False, bordered=False, hover=False)  
+                return preds_table, fig
+
         else:
             @app.callback(
                 [Output('clas-prediction-div-'+self.name, 'children'),

diff --git a/explainerdashboard/dashboard_components/decisiontree_components.py b/explainerdashboard/dashboard_components/decisiontree_components.py
@@ -143,14 +143,15 @@ def component_callbacks(self, app):
             [Input('decisiontrees-index-'+self.name, 'value'),
              Input('decisiontrees-highlight-'+self.name, 'value'),
              Input('pos-label-'+self.name, 'value')],
+            [State("decisiontrees-graph-"+self.name, 'figure')]
         )
-        def update_tree_graph(index, highlight, pos_label):
-            if index is not None:
-                highlight = None if highlight is None else int(highlight)
-                return self.explainer.plot_trees(index, 
-                        highlight_tree=highlight, pos_label=pos_label,
-                        higher_is_better=self.higher_is_better)
-            return {}
+        def update_tree_graph(index, highlight, pos_label, old_fig):
+            if index is None or not self.explainer.index_exists(index):
+                return old_fig
+            highlight = None if highlight is None else int(highlight)
+            return self.explainer.plot_trees(index, 
+                    highlight_tree=highlight, pos_label=pos_label,
+                    higher_is_better=self.higher_is_better)
 
         @app.callback(
             Output('decisiontrees-highlight-'+self.name, 'value'),
@@ -211,7 +212,7 @@ def __init__(self, explainer, title="Decision path table", name=None,
         if self.description is None: self.description = """
         Shows the path that an observation took down a specific decision tree.
         """
-        self.register_dependencies("decision_trees")
+        self.register_dependencies("shadow_trees")
 
     def layout(self):
         return dbc.Card([
@@ -262,11 +263,11 @@ def component_callbacks(self, app):
              Input('pos-label-'+self.name, 'value')],
         )
         def update_decisiontree_table(index, highlight, pos_label):
-            if index is not None and highlight is not None:
-                get_decisionpath_df = self.explainer.get_decisionpath_summary_df(
-                    int(highlight), index, pos_label=pos_label)
-                return dbc.Table.from_dataframe(get_decisionpath_df)
-            raise PreventUpdate
+            if index is None or highlight is None or not self.explainer.index_exists(index):
+                raise PreventUpdate
+            get_decisionpath_df = self.explainer.get_decisionpath_summary_df(
+                int(highlight), index, pos_label=pos_label)
+            return dbc.Table.from_dataframe(get_decisionpath_df)
 
 
 class DecisionPathGraphComponent(ExplainerComponent):
@@ -318,6 +319,7 @@ def __init__(self, explainer, title="Decision path graph", name=None,
         self.selector = PosLabelSelector(explainer, name=self.name, pos_label=pos_label)
         self.index_selector = IndexSelector(explainer, 'decisionpath-index-'+self.name,
                                     index=index, index_dropdown=index_dropdown)
+        self.register_dependencies("shadow_trees")
 
     def layout(self):
         return dbc.Card([
@@ -379,8 +381,8 @@ def component_callbacks(self, app):
              State('pos-label-'+self.name, 'value')]
         )
         def update_tree_graph(n_clicks, index, highlight, pos_label):
-            if (n_clicks is not None 
-                and index is not None 
-                and highlight is not None):
+            if index is None or not self.explainer.index_exists(index):
+                raise PreventUpdate
+            if n_clicks is not None and highlight is not None:
                 return self.explainer.decisiontree_encoded(int(highlight), index)
             raise PreventUpdate
diff --git a/explainerdashboard/dashboard_components/overview_components.py b/explainerdashboard/dashboard_components/overview_components.py
@@ -465,6 +465,8 @@ def update_pdp_sort_div(col):
                  Input('pos-label-'+self.name, 'value')]
             )
             def update_pdp_graph(index, col, drop_na, sample, gridlines, gridpoints, sort, pos_label):
+                if index is None or not self.explainer.index_exists(index):
+                    raise PreventUpdate
                 return self.explainer.plot_pdp(col, index, 
                     drop_na=bool(drop_na), sample=sample, gridlines=gridlines, 
                     gridpoints=gridpoints, sort=sort, pos_label=pos_label)
@@ -650,7 +652,7 @@ def component_callbacks(self, app):
             [Input('feature-input-index-'+self.name, 'value')]
         )
         def update_whatif_inputs(index):
-            if index is None:
+            if index is None or not self.explainer.index_exists(index):
                 raise PreventUpdate
             X_row = self.explainer.get_X_row(index, merge=True)[self.explainer.columns_ranked_by_shap()]
             return X_row.values[0].tolist()

diff --git a/explainerdashboard/dashboard_components/regression_components.py b/explainerdashboard/dashboard_components/regression_components.py
@@ -77,13 +77,13 @@ def __init__(self, explainer, title=None, name=None,
             description (str, optional): Tooltip to display when hover over
                 component title. When None default text is shown. 
         """
-        super().__init__(explainer, title, name)
+        super().__init__(explainer, title or f"Select {explainer.index_name}", name)
         assert self.explainer.is_regression, \
             ("explainer is not a RegressionExplainer so the RegressionRandomIndexComponent "
              "will not work. Try using the ClassifierRandomIndexComponent instead.")
 
-        if self.title is None:
-            self.title = f"Select {self.explainer.index_name}"
+        # if self.title is None:
+        #     self.title = f"Select {self.explainer.index_name}"
 
         self.index_name = 'random-index-reg-index-'+self.name
         self.index_selector = IndexSelector(explainer, self.index_name,
@@ -464,12 +464,12 @@ def component_callbacks(self, app):
                 Output('reg-prediction-div-'+self.name, 'children'),
                 [Input('reg-prediction-index-'+self.name, 'value')])
             def update_output_div(index):
-                if index is not None:
-                    preds_df = self.explainer.prediction_result_df(index, round=self.round)
-                    return make_hideable(
-                        dbc.Table.from_dataframe(preds_df, striped=False, bordered=False, hover=False),
-                        hide=self.hide_table)  
-                raise PreventUpdate
+                if index is None or not self.explainer.index_exists(index):
+                    raise PreventUpdate
+                preds_df = self.explainer.prediction_result_df(index, round=self.round)
+                return make_hideable(
+                    dbc.Table.from_dataframe(preds_df, striped=False, bordered=False, hover=False),
+                    hide=self.hide_table)  
         else:
             @app.callback(
                 Output('reg-prediction-div-'+self.name, 'children'),

diff --git a/explainerdashboard/dashboard_components/shap_components.py b/explainerdashboard/dashboard_components/shap_components.py
@@ -1082,7 +1082,7 @@ def component_callbacks(self, app):
                  Input('contributions-graph-orientation-'+self.name, 'value'),
                  Input('pos-label-'+self.name, 'value')])
             def update_output_div(index, depth, sort, orientation, pos_label):
-                if index is None:
+                if index is None or not self.explainer.index_exists(index):
                     raise PreventUpdate
                 depth = None if depth is None else int(depth)
                 plot = self.explainer.plot_contributions(str(index), topx=depth, 
@@ -1236,7 +1236,7 @@ def component_callbacks(self, app):
                 Input('contributions-table-sorting-'+self.name, 'value'),
                 Input('pos-label-'+self.name, 'value')])
             def update_output_div(index, depth, sort, pos_label):
-                if index is None:
+                if index is None or not self.explainer.index_exists(index):
                     raise PreventUpdate
                 depth = None if depth is None else int(depth)
                 contributions_table = dbc.Table.from_dataframe(

diff --git a/explainerdashboard/explainer_methods.py b/explainerdashboard/explainer_methods.py
@@ -6,7 +6,7 @@
 
 import numpy as np
 import pandas as pd
-from pandas.api.types import is_numeric_dtype
+from pandas.api.types import is_numeric_dtype, is_categorical_dtype
 
 from dtreeviz.trees import ShadowDecTree
 
@@ -266,7 +266,10 @@ def merge_categorical_columns(X, onehot_dict=None, cols=None, not_encoded_dict=N
                                     sep).astype("category")
         else:
             if not drop_regular:
-                X_cats.loc[:, col_name] = X[col_name].values
+                if is_categorical_dtype(X[col_name]):
+                    X_cats[col_name] = pd.Categorical(X[col_name])
+                else:
+                    X_cats.loc[:, col_name] = X[col_name].values
     if cols:
         return X_cats[cols]
     else:

diff --git a/explainerdashboard/explainer_plots.py b/explainerdashboard/explainer_plots.py
@@ -1254,12 +1254,18 @@ def plotly_confusion_matrix(cm, labels = None, percentage=True):
     layout = go.Layout(
             title="Confusion Matrix",
             xaxis=dict(title='predicted',
-                       constrain="domain"),
+                       constrain="domain",
+                       tickmode = 'array',
+                       tickvals = [f" {lab}" for lab in labels],
+                       ticktext = [f" {lab}" for lab in labels]),
             yaxis=dict(title=dict(text='observed',standoff=20),
                        autorange="reversed", 
                        side='left',
                        scaleanchor='x', 
-                       scaleratio=1),
+                       scaleratio=1,
+                       tickmode = 'array',
+                       tickvals = [f" {lab}" for lab in labels],
+                       ticktext = [f" {lab}" for lab in labels]),
             plot_bgcolor = '#fff',
         )
     fig = go.Figure(data, layout)
-Original file line number
+Diff line change
@@ Expand Up / @@ -152,6 +152,7 @@ in the github repo. @@
        tabs
        components
        custom
+       buildcustom
        deployment
        license
        help
@@ Expand Down @@