Source code for shapiq.approximator.marginals.owen

"""The Owen Sampling approximator for the Shapley value."""

from __future__ import annotations

from typing import TYPE_CHECKING, Any, Literal

import numpy as np

from shapiq.approximator.base import Approximator
from shapiq.interaction_values import InteractionValues

if TYPE_CHECKING:
    from collections.abc import Callable

    from shapiq.game import Game

ValidOwenSamplingSVIndices = Literal["SV"]
"""The valid indices for the Owen Sampling SV approximator."""


[docs] class OwenSamplingSV(Approximator[ValidOwenSamplingSVIndices]): """Owen Sampling approximator for the Shapley values. The Owen Sampling algorithm estimates the Shapley values (SV) by sampling random marginal contributions for each player and each coalition size. The marginal contributions are used to update an integral representation of the SV. For more information, see :cite:t:`Okhrati.2020`. The number of anchor points M at which the integral is to be palpated share the available budget for each player equally. A higher `n_anchor_points` increases the resolution of the integral reducing bias while reducing the accuracy of the estimation at each point. Attributes: n: The number of players. iteration_cost: The cost of a single iteration of the approximator. """ valid_indices: tuple[ValidOwenSamplingSVIndices, ...] = ("SV",) """The valid indices for this approximator.""" def __init__( self, n: int, n_anchor_points: int = 10, random_state: int | None = None, **kwargs: Any, # noqa: ARG002 ) -> None: """Initialize the Owen Sampling SV approximator. Args: n: The number of players. n_anchor_points: The number of anchor points at which the integral is to be palpated. Defaults to ``10``. random_state: The random state to use for the permutation sampling. Defaults to ``None``. **kwargs: Additional arguments not used. """ super().__init__(n, max_order=1, index="SV", top_order=False, random_state=random_state) self.iteration_cost: int = 2 self.n_anchor_points = n_anchor_points
[docs] def approximate( self, budget: int, game: Game | Callable[[np.ndarray], np.ndarray], *args: Any, # noqa: ARG002 **kwargs: Any, # noqa: ARG002 ) -> InteractionValues: """Approximates the Shapley values using Owen Sampling. Args: budget: The number of game evaluations for approximation game: The game function as a callable that takes a set of players and returns the value. *args: Additional positional arguments not used in this method. **kwargs: Additional keyword arguments not used in this method. Returns: The estimated interaction values. """ used_budget = 0 empty_value = game(np.zeros(self.n, dtype=bool))[0] used_budget += 1 anchors = self.get_anchor_points(self.n_anchor_points) estimates = np.zeros((self.n, self.n_anchor_points), dtype=float) counts = np.zeros((self.n, self.n_anchor_points), dtype=int) # main sampling loop while used_budget + 2 <= budget: # iterate over anchor points from which a marginal contribution can be drawn for m in range(len(anchors)): q = anchors[m] # iterate over players for whom a marginal contribution is to be drawn for player in range(self.n): if used_budget + 2 <= budget: # draw a subset of players without player: all are inserted independently # with probability q coalition = self._rng.choice( [True, False], self.n - 1, replace=True, p=[q, 1 - q], ) # add information that player is absent coalition = np.insert(coalition, player, False) marginal_con = -game(coalition)[0] # add information that player is present to complete marginal contribution coalition[player] = True marginal_con += game(coalition)[0] used_budget += 2 # update the affected strata estimate estimates[player][m] += marginal_con counts[player][m] += 1 # aggregate the anchor estimates: divide each anchor sum by its sample number, sum up the # means, divide by the number of valid anchor estimates estimates = np.divide(estimates, counts, out=estimates, where=counts != 0) result = np.sum(estimates, axis=1) non_zeros = np.count_nonzero(counts, axis=1) result = np.divide(result, non_zeros, out=result, where=non_zeros != 0) # create vector of interaction values with correct length and order result_to_finalize = self._init_result(dtype=float) result_to_finalize[self._interaction_lookup[()]] = empty_value for player in range(self.n): idx = self._interaction_lookup[(player,)] result_to_finalize[idx] = result[player] return InteractionValues( n_players=self.n, values=result_to_finalize, index=self.approximation_index, interaction_lookup=self._interaction_lookup, baseline_value=empty_value, min_order=self.min_order, max_order=self.max_order, estimated=True, estimation_budget=used_budget, target_index=self.index, )
[docs] @staticmethod def get_anchor_points(n_anchor_points: int) -> np.ndarray: """Returns the anchor points for the Owen Sampling approximation. Args: n_anchor_points: The number of anchor points. Returns: An array of anchor points. Raises: ValueError: If the number of anchor points is less than or equal to 0. """ if n_anchor_points <= 0: msg = "The number of anchor points needs to be greater than 0." raise ValueError(msg) if n_anchor_points == 1: return np.array([0.5]) return np.linspace(0.0, 1.0, num=n_anchor_points)