Source code for shapiq.imputer.base
"""Base class for all Imputers."""
from __future__ import annotations
from abc import abstractmethod
from typing import Generic, TypeVar
import numpy as np
from shapiq.explainer import utils
from shapiq.game import Game
TModel = TypeVar("TModel")
[docs]
class Imputer(Game, Generic[TModel]):
"""Base class for Imputers."""
model: TModel
"""The model to impute missing values for."""
empty_prediction: float
"""The model's prediction on an empty data point (all features missing)."""
n_features: int
"""The number of features in the data (equals the number of players in the game)."""
data: np.ndarray
"""The background data to use for the imputer."""
random_state: int | None
"""The random state to use for sampling."""
@abstractmethod
def __init__(
self,
model: TModel,
data: np.ndarray,
x: np.ndarray | None = None,
*,
sample_size: int | None = 100,
categorical_features: list[int] | None = None,
random_state: int | None = None,
verbose: bool = False,
) -> None:
"""Initializes the base imputer.
Args:
model: The model to explain as a callable function expecting a data points as input and
returning the model's predictions.
data: The background data to use for the explainer as a 2-dimensional array with shape
``(n_samples, n_features)``.
x: The explanation point to use the imputer on either as a 2-dimensional array with
shape ``(1, n_features)`` or as a vector with shape ``(n_features,)``.
sample_size: The number of samples to draw from the background data. Defaults to ``100``
but is usually overwritten in the subclasses.
categorical_features: A list of indices of the categorical features in the background
data.
random_state: The random state to use for sampling. Defaults to ``None``.
verbose: A flag to enable verbose imputation, which will print a progress bar for model
evaluation. Note that this can slow down the imputation process. Defaults to
``False``.
"""
if callable(model):
if not hasattr(model, "_predict_function"):
self._predict_function = utils.predict_callable
# shapiq.Explainer adds a _shapiq_predict_function to the model to make it callable
elif hasattr(model, "_shapiq_predict_function"):
self._predict_function = model._shapiq_predict_function # noqa: SLF001 # pyright: ignore [reportAttributeAccessIssue]
else:
msg = "The model must be callable or have a predict function."
raise ValueError(msg)
self.model = model
# check if data is a vector
if data.ndim == 1:
data = data.reshape(1, data.shape[0])
self.data = data
self._sample_size = sample_size
self.empty_prediction = 0.0
self.n_features = self.data.shape[1]
self._cat_features: list = [] if categorical_features is None else categorical_features
self.random_state = random_state
self._rng = np.random.default_rng(self.random_state)
# fit x
self._x: np.ndarray | None = None
if x is not None:
self.fit(x)
# init the game
# developer note: the normalization_value needs to be set in the subclass
super().__init__(n_players=self.n_features, normalize=False, verbose=verbose)
@property
def x(self) -> np.ndarray:
"""Returns the explanation point if it is set."""
if self._x is None:
msg = "The imputer has not yet been fitted yet."
raise AttributeError(msg)
return self._x.copy()
@property
def sample_size(self) -> int:
"""Returns the sample size."""
if self._sample_size is None:
msg = "The sample size is not set."
raise AttributeError(msg)
return self._sample_size
[docs]
def set_random_state(self, random_state: int | None = None) -> None:
"""Sets the random state for the imputer and the model.
Args:
random_state: The random state to set. Defaults to ``None``, which will set a not
deterministic random state.
"""
self.random_state = random_state
self._rng = np.random.default_rng(random_state)
[docs]
def predict(self, x: np.ndarray) -> np.ndarray:
"""Provides a unified prediction interface.
Args:
x: The data point to predict the model's output for.
Returns:
The model's prediction for the given data point as a vector.
"""
return self._predict_function(self.model, x) # type: ignore[call-arg, operator]
[docs]
def fit(self, x: np.ndarray) -> Imputer:
"""Fits the imputer to the explanation point.
Args:
x: The explanation point to use the imputer on either as a 2-dimensional array with
shape ``(1, n_features)`` or as a vector with shape ``(n_features,)``.
Returns:
The fitted imputer.
"""
self._x = x.copy()
if self._x.ndim == 1:
self._x = self._x.reshape(1, x.shape[0])
return self
[docs]
def insert_empty_value(self, outputs: np.ndarray, coalitions: np.ndarray) -> np.ndarray:
"""Inserts the empty value into the outputs.
Args:
outputs: The model's predictions on the imputed data points.
coalitions: The coalitions for which the model's predictions were made.
Returns:
The model's predictions with the empty value inserted for the empty coalitions.
"""
outputs[~np.any(coalitions, axis=1)] = self.empty_prediction
return outputs