Source code for shapiq.imputer.base

"""Base class for all Imputers."""

from __future__ import annotations

from abc import abstractmethod
from typing import Generic, TypeVar

import numpy as np

from shapiq.explainer import utils
from shapiq.game import Game

TModel = TypeVar("TModel")



[docs]
class Imputer(Game, Generic[TModel]):
    """Base class for Imputers."""

    model: TModel
    """The model to impute missing values for."""

    empty_prediction: float
    """The model's prediction on an empty data point (all features missing)."""

    n_features: int
    """The number of features in the data (equals the number of players in the game)."""

    data: np.ndarray
    """The background data to use for the imputer."""

    random_state: int | None
    """The random state to use for sampling."""

    @abstractmethod
    def __init__(
        self,
        model: TModel,
        data: np.ndarray,
        x: np.ndarray | None = None,
        *,
        sample_size: int | None = 100,
        categorical_features: list[int] | None = None,
        random_state: int | None = None,
        verbose: bool = False,
    ) -> None:
        """Initializes the base imputer.

        Args:
            model: The model to explain as a callable function expecting a data points as input and
                returning the model's predictions.

            data: The background data to use for the explainer as a 2-dimensional array with shape
                ``(n_samples, n_features)``.

            x: The explanation point to use the imputer on either as a 2-dimensional array with
                shape ``(1, n_features)`` or as a vector with shape ``(n_features,)``.

            sample_size: The number of samples to draw from the background data. Defaults to ``100``
                but is usually overwritten in the subclasses.

            categorical_features: A list of indices of the categorical features in the background
                data.

            random_state: The random state to use for sampling. Defaults to ``None``.

            verbose: A flag to enable verbose imputation, which will print a progress bar for model
                evaluation. Note that this can slow down the imputation process. Defaults to
                ``False``.

        """
        if callable(model):
            if not hasattr(model, "_predict_function"):
                self._predict_function = utils.predict_callable
        # shapiq.Explainer adds a _shapiq_predict_function to the model to make it callable
        elif hasattr(model, "_shapiq_predict_function"):
            self._predict_function = model._shapiq_predict_function  # noqa: SLF001  # pyright: ignore [reportAttributeAccessIssue]
        else:
            msg = "The model must be callable or have a predict function."
            raise ValueError(msg)
        self.model = model
        # check if data is a vector
        if data.ndim == 1:
            data = data.reshape(1, data.shape[0])
        self.data = data
        self._sample_size = sample_size
        self.empty_prediction = 0.0
        self.n_features = self.data.shape[1]
        self._cat_features: list = [] if categorical_features is None else categorical_features
        self.random_state = random_state
        self._rng = np.random.default_rng(self.random_state)

        # fit x
        self._x: np.ndarray | None = None
        if x is not None:
            self.fit(x)

        # init the game
        # developer note: the normalization_value needs to be set in the subclass
        super().__init__(n_players=self.n_features, normalize=False, verbose=verbose)

    @property
    def x(self) -> np.ndarray:
        """Returns the explanation point if it is set."""
        if self._x is None:
            msg = "The imputer has not yet been fitted yet."
            raise AttributeError(msg)
        return self._x.copy()

    @property
    def sample_size(self) -> int:
        """Returns the sample size."""
        if self._sample_size is None:
            msg = "The sample size is not set."
            raise AttributeError(msg)
        return self._sample_size


[docs]
    def set_random_state(self, random_state: int | None = None) -> None:
        """Sets the random state for the imputer and the model.

        Args:
            random_state: The random state to set. Defaults to ``None``, which will set a not
                deterministic random state.

        """
        self.random_state = random_state
        self._rng = np.random.default_rng(random_state)



[docs]
    def predict(self, x: np.ndarray) -> np.ndarray:
        """Provides a unified prediction interface.

        Args:
            x: The data point to predict the model's output for.

        Returns:
            The model's prediction for the given data point as a vector.

        """
        return self._predict_function(self.model, x)  # type: ignore[call-arg, operator]



[docs]
    def fit(self, x: np.ndarray) -> Imputer:
        """Fits the imputer to the explanation point.

        Args:
            x: The explanation point to use the imputer on either as a 2-dimensional array with
                shape ``(1, n_features)`` or as a vector with shape ``(n_features,)``.

        Returns:
            The fitted imputer.

        """
        self._x = x.copy()
        if self._x.ndim == 1:
            self._x = self._x.reshape(1, x.shape[0])
        return self



[docs]
    def insert_empty_value(self, outputs: np.ndarray, coalitions: np.ndarray) -> np.ndarray:
        """Inserts the empty value into the outputs.

        Args:
            outputs: The model's predictions on the imputed data points.
            coalitions: The coalitions for which the model's predictions were made.

        Returns:
            The model's predictions with the empty value inserted for the empty coalitions.

        """
        outputs[~np.any(coalitions, axis=1)] = self.empty_prediction
        return outputs