First of all, kudos for this lib, it's amazing how many models you already support (sklearn, skopt, {xgb,cat,light}gbm).
My test works for RandomForest, with basically the same current performance limitations. Having looked at the code, maybe the C extension (cext_acv
) which should speed things up is not yet implemented.
Since the syntax changed a little from the previous lib, I followed one notebook example for the C
parameter (maybe I'm wrong there).
n = 100
C = [[]]
# columns = list of features
# already fitted model of type "lightgbm.sklearn.LGBMClassifier"
acvtree = ACVTree(model, X_train[:n].values)
sdp_importance_m, sdp_importance, sdp_importance_proba, sdp_importance_coal_count, sdp_importance_variable_count = acvtree.global_sdp_importance_clf(data=X_test[:n].values[y_test[:n]<1], data_bground=X_train[:n].values, columns_names=columns, global_proba=0.9, decay=0.7, threshold=0.6, proba=0.9,verbose=1,C=C, verbose=0)
~/.virtualenvs/venv/lib/python3.8/site-packages/acv_explainers/acv_tree.py in global_sdp_importance_clf(self, data, data_bground, columns_names, global_proba, decay, threshold, proba, C, verbose)
64 proba, C, verbose):
65
---> 66 return global_sdp_importance(data, data_bground, columns_names, global_proba, decay, threshold,
67 proba, C, verbose, self.compute_sdp_clf, self.predict)
68
~/.virtualenvs/venv/lib/python3.8/site-packages/acv_explainers/py_acv.py in global_sdp_importance(data, data_bground, columns_names, global_proba, decay, threshold, proba, C, verbose, cond_func, predict)
475 fx = predict(np.expand_dims(ind, 0))[0]
476
--> 477 local_sdp(ind, fx, threshold, proba, index, data_bground, final_coal, decay,
478 C=C, verbose=verbose, cond_func=cond_func)
479
~/.virtualenvs/venv/lib/python3.8/site-packages/acv_explainers/py_acv.py in local_sdp(x, f, threshold, proba, index, data, final_coal, decay, C, verbose, cond_func)
405 if c not in C_off:
406
--> 407 value = cond_func(x, f, threshold, S=chain_l(c), data=data)
408 c_value[size][str(c)] = value
409
~/.virtualenvs/venv/lib/python3.8/site-packages/acv_explainers/acv_tree.py in compute_sdp_clf(self, x, fx, tx, S, data)
37
38 def compute_sdp_clf(self, x, fx, tx, S, data):
---> 39 sdp = cond_sdp_forest_clf(x, fx, tx, self.trees, S, data=data)
40 return sdp
41
~/.virtualenvs/venv/lib/python3.8/site-packages/acv_explainers/py_acv.py in cond_sdp_forest_clf(x, fx, tx, forest, S, data)
239
240 s = (mean_forest['all'] - mean_forest['down']) / (mean_forest['up'] - mean_forest['down'])
--> 241 sdp += 0 * (s[int(fx)] < 0) + 1 * (s[int(fx)] > 1) + s[int(fx)] * (0 <= s[int(fx)] <= 1)
242 # sdp = 0 * (sdp[int(fx)] < 0) + 1 * (sdp[int(fx)] > 1) + sdp[int(fx)] * (0 <= sdp[int(fx)] <= 1)
243 return sdp/n_trees
IndexError: index 1 is out of bounds for axis 0 with size 1