Source code for shapiq.tree.base

"""The base class for tree model conversion."""

from __future__ import annotations

from typing import TYPE_CHECKING

import numpy as np

from .utils import compute_empty_prediction

if TYPE_CHECKING:
    from numpy.typing import NDArray


[docs] class TreeModel: """Internal representation of a single tree used by the shapiq tree explainers. Each library-specific converter (scikit-learn, XGBoost, LightGBM, CatBoost) targets this common format so that the downstream algorithms (TreeSHAP-IQ, LinearTreeSHAP, InterventionalTreeSHAP) only need to understand one node-array layout. Constructor arguments that fall back to a computed default when ``None`` is passed are documented on :meth:`__init__`. The attributes below describe what is available on a fully initialized instance. Attributes: children_left: The left children of each node in a tree. Leaf nodes are ``-1``. children_right: The right children of each node in a tree. Leaf nodes are ``-1``. children_missing: The child each node routes missing-value samples to. Used together with ``children_left`` to derive ``children_left_default`` during ``__init__``. children_left_default: Boolean mask. ``True`` at index ``i`` if missing-value samples at node ``i`` are routed to ``children_left[i]``. Derived from ``children_missing``. features: The feature indices of the decision nodes in a tree. Leaf nodes are ``-2``. thresholds: The thresholds of the decision nodes in a tree. Leaf nodes are ``np.nan``. values: The leaf-node values, flattened to a 1-D array. Non-leaf nodes are set to ``0``. node_sample_weight: The sample weights of the nodes in a tree. empty_prediction: The empty prediction of the tree model (weighted mean of leaf values). leaf_mask: The boolean mask of the leaf nodes in a tree. n_features_in_tree: The number of distinct features actually used by decision nodes. max_feature_id: The maximum feature index used by any decision node (or ``0`` if none). feature_ids: The set of feature indices used by decision nodes. root_node_id: The root node id of the tree model. Defaults to ``0``. n_nodes: The number of nodes in the tree model. decision_type: The split comparison used by :meth:`decision_function`. Either ``"<="`` (default) or ``"<"``. nodes: The node ids of the tree model as ``np.arange(n_nodes)``. feature_map_original_internal: Mapping of feature indices from the original feature indices (as in the model) to the internal feature indices (as in the tree model). feature_map_internal_original: Mapping of feature indices from the internal feature indices (as in the tree model) to the original feature indices (as in the model). original_output_type: The original output type of the tree model. Defaults to ``"raw"``. Currently not used by downstream algorithms. intercepts: Per-leaf intercept terms for linear-leaf tree models. Currently unused. coeffs: Per-leaf coefficient vectors for linear-leaf tree models. Currently unused. """ children_left: NDArray[np.int_] children_right: NDArray[np.int_] children_missing: NDArray[np.int_] features: NDArray[np.int_] thresholds: NDArray[np.floating] values: NDArray[np.floating] node_sample_weight: NDArray[np.floating] children_left_default: NDArray[np.bool_] empty_prediction: float leaf_mask: NDArray[np.bool_] n_features_in_tree: int max_feature_id: int feature_ids: set[int] root_node_id: int n_nodes: int decision_type: str nodes: NDArray[np.int_] feature_map_original_internal: dict[int, int] feature_map_internal_original: dict[int, int] original_output_type: str = "raw" # not used at the moment intercepts: NDArray[np.floating] coeffs: NDArray[np.floating] def __init__( self, children_left: NDArray[np.int_], children_right: NDArray[np.int_], children_missing: NDArray[np.int_], features: NDArray[np.int_], thresholds: NDArray[np.floating], values: NDArray[np.floating], node_sample_weight: NDArray[np.floating], empty_prediction: float | None = None, leaf_mask: NDArray[np.bool_] | None = None, n_features_in_tree: int | None = None, max_feature_id: int | None = None, feature_ids: set[int] | None = None, root_node_id: int | None = None, n_nodes: int | None = None, nodes: NDArray[np.int_] | None = None, decision_type: str | None = None, feature_map_original_internal: dict[int, int] | None = None, feature_map_internal_original: dict[int, int] | None = None, original_output_type: str = "raw", # noqa: ARG002 intercepts: NDArray[np.floating] | None = None, # noqa: ARG002 coeffs: NDArray[np.floating] | None = None, # noqa: ARG002 ) -> None: """Initialize the :class:`TreeModel`. All numpy-array arguments must share a common node ordering. Arguments listed as ``None``-able fall back to a value computed from the mandatory arrays. Args: children_left: Left-child node ids; ``-1`` denotes a leaf. children_right: Right-child node ids; ``-1`` denotes a leaf. children_missing: Node id to which missing-value samples are routed. features: Decision-node feature indices. Leaf positions are sanitized to ``-2``. thresholds: Decision-node thresholds. Leaf positions are sanitized to ``np.nan``. values: Leaf-node values. Higher-dim arrays are flattened to 1-D; non-leaf positions are forced to ``0``. node_sample_weight: Per-node sample weights. ``NaN`` at leaves is replaced with ``1``. empty_prediction: Pre-computed empty prediction. ``None`` triggers :meth:`compute_empty_prediction`. leaf_mask: Boolean mask of leaf nodes. ``None`` derives it from ``children_left == -1``. n_features_in_tree: Number of distinct features used by decision nodes. ``None`` derives it from the unique values in ``features`` (excluding ``-2``). max_feature_id: Largest feature index used. ``None`` derives it from ``features``. feature_ids: Set of feature indices used by decision nodes. ``None`` derives it from ``features``. root_node_id: Root node id. ``None`` defaults to ``0``. n_nodes: Number of nodes. ``None`` derives it from ``len(children_left)``. nodes: Node-id array. ``None`` defaults to ``np.arange(n_nodes)``. decision_type: Split comparison used by :meth:`decision_function` (``"<="`` or ``"<"``). ``None`` defaults to ``"<="``. feature_map_original_internal: Mapping from original to internal feature indices. ``None`` defaults to the identity mapping on ``feature_ids``. feature_map_internal_original: Mapping from internal to original feature indices. ``None`` defaults to the identity mapping on ``feature_ids``. original_output_type: Currently unused; accepted for forward compatibility. intercepts: Currently unused; accepted for forward compatibility with linear-leaf trees. coeffs: Currently unused; accepted for forward compatibility with linear-leaf trees. """ self.children_left = children_left self.children_right = children_right self.children_missing = children_missing # Set children_missing to 1 if equal to children_left else 0 if equal to children_right self.children_left_default = self.children_missing == self.children_left self.features = features self.thresholds = thresholds self.values = values self.node_sample_weight = node_sample_weight # setup leaf mask if leaf_mask is None: self.leaf_mask = np.asarray(self.children_left == -1) else: self.leaf_mask = leaf_mask # sanitize features self.features = np.where(self.leaf_mask, -2, self.features) self.features = self.features.astype(int) # make features integer type # sanitize thresholds self.thresholds = np.where(self.leaf_mask, np.nan, self.thresholds) # sanitize node sample weights self.node_sample_weight[self.leaf_mask] = np.where( np.isnan(self.node_sample_weight[self.leaf_mask]), 1.0, self.node_sample_weight[self.leaf_mask], ) # setup empty prediction if empty_prediction is None: self.compute_empty_prediction() else: self.empty_prediction = empty_prediction unique_features = set(np.unique(self.features)) unique_features.discard(-2) # remove leaf node "features" # setup number of features if n_features_in_tree is None: self.n_features_in_tree = len(unique_features) else: self.n_features_in_tree = n_features_in_tree # setup max feature id if max_feature_id is None and len(unique_features) > 0: self.max_feature_id = max(unique_features) elif max_feature_id is not None: self.max_feature_id = max_feature_id else: self.max_feature_id = 0 # setup feature names if feature_ids is None: self.feature_ids = unique_features else: self.feature_ids = feature_ids # setup root node id if root_node_id is None: self.root_node_id = 0 else: self.root_node_id = root_node_id # setup number of nodes if n_nodes is None: self.n_nodes = len(self.children_left) else: self.n_nodes = n_nodes # setup nodes if nodes is None: self.nodes = np.arange(self.n_nodes) else: self.nodes = nodes # setup original feature mapping if feature_map_original_internal is None: self.feature_map_original_internal = {i: i for i in unique_features} else: self.feature_map_original_internal = feature_map_original_internal # setup new feature mapping if feature_map_internal_original is None: self.feature_map_internal_original = {i: i for i in unique_features} else: self.feature_map_internal_original = feature_map_internal_original # flatten values if necessary if self.values.ndim > 1: if self.values.shape[1] != 1: msg = "Values array has more than one column." raise ValueError(msg) self.values = self.values.flatten() # set all values of non leaf nodes to zero self.values[~self.leaf_mask] = 0 # Set decision function self.decision_type = decision_type if decision_type is not None else "<="
[docs] def decision_function(self, value: float, threshold: float, *, left_default: bool) -> bool: """Decision function for split nodes. The function compares the input value to the threshold using the specified decision type. If the value is NaN, the function returns the left_default. Args: value: The feature value to compare. threshold: The threshold to compare the feature value against. left_default: The default direction to take if the value is NaN. True for left, False for right. Returns: A boolean indicating whether to go left (True) or right (False) at the split node. """ if self.decision_type == "<": return (value < threshold) if not np.isnan(value) else left_default return (value <= threshold) if not np.isnan(value) else left_default
[docs] def compute_empty_prediction(self) -> None: """Compute the empty prediction of the tree model. The method computes the empty prediction of the tree model by taking the weighted average of the leaf node values. The method modifies the tree model in place. """ try: self.empty_prediction = compute_empty_prediction( self.values[self.leaf_mask], self.node_sample_weight[self.leaf_mask], ) except Exception as e: msg = f"Could not compute empty prediction: {e}" raise ValueError(msg) from e
def __post_init__(self) -> None: """No-op hook retained for forward compatibility. Kept so existing callers (e.g. dataclass-aware subclasses) can still invoke it; all initialization happens in :meth:`__init__`. """
[docs] def reduce_feature_complexity(self) -> None: """Reduces the feature complexity of the tree model. The method reduces the feature complexity of the tree model by removing unused features and reindexing the feature indices of the decision nodes in the tree. The method modifies the tree model in place. To see the original feature mappings after the reduction, use the ``feature_map_original_internal`` and ``feature_map_internal_original`` attributes. For example, consider a tree model with the following feature indices: [0, 1, 8] The method will remove the unused feature indices and reindex the feature indices of the decision nodes in the tree to the following: [0, 1, 2] Feature ``'8'`` is 'renamed' to ``'2'`` such that in the internal representation a one-hot vector (and matrices) of length ``3`` suffices to represent the feature indices. """ if self.n_features_in_tree < self.max_feature_id + 1: new_feature_ids = set(range(self.n_features_in_tree)) mapping_old_new = {old_id: new_id for new_id, old_id in enumerate(self.feature_ids)} mapping_new_old = dict(enumerate(self.feature_ids)) new_features = np.zeros_like(self.features) for i, old_feature in enumerate(self.features): new_value = -2 if old_feature == -2 else mapping_old_new[old_feature] new_features[i] = new_value self.features = new_features self.feature_ids = new_feature_ids self.feature_map_original_internal = mapping_old_new self.feature_map_internal_original = mapping_new_old self.n_features_in_tree = len(new_feature_ids) self.max_feature_id = self.n_features_in_tree - 1
[docs] def predict_one(self, x: NDArray[np.floating]) -> float: """Predicts the output of a single instance. Args: x: The instance to predict as a 1-dimensional array. Returns: The prediction of the instance with the tree model. """ node = self.root_node_id is_leaf = self.leaf_mask[node] while not is_leaf: feature_id_internal = self.features[node] feature_id_original = self.feature_map_internal_original[feature_id_internal] if self.decision_function( x[feature_id_original], self.thresholds[node], left_default=self.children_left_default[node], ): node = self.children_left[node] else: node = self.children_right[node] is_leaf = self.leaf_mask[node] return float(self.values[node])
class EdgeTree: """Edge-based representation of a tree used by the TreeSHAP-IQ algorithm. Built from a :class:`TreeModel` via :func:`~shapiq.tree.conversion.edges.create_edge_tree`, this structure pre-computes the per-edge quantities that TreeSHAP-IQ needs to traverse the tree only once per explained instance. Attributes: parents: Parent node id for each node (root parent is ``-1``). ancestors: For each node, the id of the closest ancestor that splits on the same feature (``-1`` if no such ancestor exists). ancestor_nodes: Mapping ``node_id -> per-feature ancestor id array`` for non-root nodes. p_e_values: Per-edge probability factors used by the summary polynomial. p_e_storages: Cached storage of ``p_e`` values along each path. split_weights: Per-edge split weights (fraction of samples taking each branch). empty_predictions: Per-leaf contribution to the empty prediction. edge_heights: Per-node edge height used by the Chebyshev interpolation. max_depth: Maximum depth of the tree. last_feature_node_in_path: Per-node id of the last decision node along the path that split on the same feature. interaction_height_store: Mapping ``order -> per-node interaction-height array`` used to decide which interactions a node contributes to. has_ancestors: Boolean mask; ``True`` at node ``i`` if ``ancestors[i] != -1``. """ parents: np.ndarray ancestors: np.ndarray ancestor_nodes: dict[int, np.ndarray] p_e_values: np.ndarray p_e_storages: np.ndarray split_weights: np.ndarray empty_predictions: np.ndarray edge_heights: np.ndarray max_depth: int last_feature_node_in_path: np.ndarray interaction_height_store: dict[int, np.ndarray] has_ancestors: np.ndarray def __init__( self, parents: np.ndarray, ancestors: np.ndarray, ancestor_nodes: dict[int, np.ndarray], p_e_values: np.ndarray, p_e_storages: np.ndarray, split_weights: np.ndarray, empty_predictions: np.ndarray, edge_heights: np.ndarray, max_depth: int, last_feature_node_in_path: np.ndarray, interaction_height_store: dict[int, np.ndarray], *, has_ancestors: np.ndarray | None = None, ) -> None: """Initialize an :class:`EdgeTree` from pre-computed per-node / per-edge arrays. See the class docstring for the meaning of each attribute. ``has_ancestors`` is derived from ``ancestors > -1`` when not supplied. """ self.parents = parents self.ancestors = ancestors self.ancestor_nodes = ancestor_nodes self.p_e_values = p_e_values self.p_e_storages = p_e_storages self.split_weights = split_weights self.empty_predictions = empty_predictions self.edge_heights = edge_heights self.max_depth = max_depth self.last_feature_node_in_path = last_feature_node_in_path self.interaction_height_store = interaction_height_store if has_ancestors is None: self.has_ancestors = self.ancestors > -1 else: self.has_ancestors = has_ancestors