Source code for shapiq.approximator.marginals.stratified

"""This module contains the Stratified Sampling approximation method for the Shapley values."""

from __future__ import annotations

from typing import TYPE_CHECKING, Any, Literal

import numpy as np

from shapiq.approximator.base import Approximator
from shapiq.interaction_values import InteractionValues

if TYPE_CHECKING:
    from collections.abc import Callable

    from shapiq.game import Game

ValidStratifiedSamplingSVIndices = Literal["SV"]
"""The valid indices for the Stratified Sampling SV approximator."""


[docs] class StratifiedSamplingSV(Approximator[ValidStratifiedSamplingSVIndices]): """The Stratified Sampling algorithm for estimating the Shapley values. The Stratified Sampling algorithm estimates the Shapley values (SV) by sampling random marginal contributions for each player and each coalition size. The marginal contributions are grouped into strata by size. The strata are aggregated for each player after sampling to obtain the final estimate. For more information, see Maleki et al. (2013) :cite:t:`Maleki.2013`. See Also: - :class:`~shapiq.approximator.montecarlo.svarmiq.SVARM`: The SVARM approximator - :class:`~shapiq.approximator.montecarlo.svarmiq.SVARMIQ`: The SVARMIQ approximator """ valid_indices: tuple[ValidStratifiedSamplingSVIndices, ...] = ("SV",) def __init__( self, n: int, random_state: int | None = None, **kwargs: Any, # noqa: ARG002 ) -> None: """Initialize the Stratified Sampling SV approximator. Args: n: The number of players. random_state: The random state to use for the permutation sampling. Defaults to ``None``. **kwargs: Additional arguments not used. """ super().__init__(n, max_order=1, index="SV", top_order=False, random_state=random_state) self.iteration_cost: int = 2
[docs] def approximate( self, budget: int, game: Game | Callable[[np.ndarray], np.ndarray], *args: Any, # noqa: ARG002 **kwargs: Any, # noqa: ARG002 ) -> InteractionValues: """Approximates the Shapley values using ApproShapley. Args: budget: The number of game evaluations for approximation game: The game function as a callable that takes a set of players and returns the value. *args: Additional positional arguments (not used). **kwargs: Additional keyword arguments (not used). Returns: The estimated interaction values. """ used_budget = 0 # get value of empty coalition and grand coalition empty_value = game(np.zeros(self.n, dtype=bool))[0] full_value = game(np.ones(self.n, dtype=bool))[0] used_budget += 2 strata = np.zeros((self.n, self.n), dtype=float) counts = np.zeros((self.n, self.n), dtype=int) # main sampling loop while used_budget < budget: # iterate over coalition size to which a marginal contribution can be drawn for size in range(self.n): # iterate over players for whom a marginal contribution is to be drawn for player in range(self.n): # check if enough budget is available to sample a marginal contribution if ((size == 0 or size == self.n - 1) and used_budget < budget) or ( size in range(1, self.n - 1) and used_budget + 2 <= budget ): # if coalition size is 0 or n-1, empty or grand coalition value can be reuse if size == 0: coalition = np.zeros(self.n, dtype=bool) coalition[player] = True marginal_con = game(coalition)[0] - empty_value used_budget += 1 elif size == self.n - 1: coalition = np.ones(self.n, dtype=bool) coalition[player] = False marginal_con = full_value - game(coalition)[0] used_budget += 1 # otherwise both coalitions that make up the marginal contribution have # to eb evaluated else: available_players = list(self._grand_coalition_set) available_players.remove(player) # draw a subset of the player set without player of size stratum # uniformly at random coalition_list = list( self._rng.choice(available_players, size, replace=False), ) coalition = np.zeros(self.n, dtype=bool) coalition[coalition_list] = True marginal_con = -game(coalition)[0] coalition[player] = True marginal_con += game(coalition)[0] used_budget += 2 # update the affected strata estimate strata[player][size] += marginal_con counts[player][size] += 1 # aggregate the stratum estimates: divide each stratum sum by its sample number, sum up # the means, divide by the number of valid stratum estimates strata = np.divide(strata, counts, out=strata, where=counts != 0) result = np.sum(strata, axis=1) non_zeros = np.count_nonzero(counts, axis=1) result = np.divide(result, non_zeros, out=result, where=non_zeros != 0) # create vector of interaction values with correct length and order result_to_finalize = self._init_result(dtype=float) result_to_finalize[self._interaction_lookup[()]] = empty_value for player in range(self.n): idx = self._interaction_lookup[(player,)] result_to_finalize[idx] = result[player] return InteractionValues( n_players=self.n, values=result_to_finalize, index=self.approximation_index, interaction_lookup=self._interaction_lookup, baseline_value=float(empty_value), min_order=self.min_order, max_order=self.max_order, estimated=True, estimation_budget=used_budget, target_index=self.index, )