Module ablation.baseline

Expand source code
from typing import Optional

import numpy as np

from . import distributions
from .distributions import (
    constant,
    constant_mean,
    constant_median,
    gaussian,
    gaussian_blur,
    gaussian_blur_permutation,
    max_distance,
    nearest_neighbors,
    nearest_neighbors_counterfactual,
    opposite_class,
    training,
)
from .utils.general import sample
from .utils.transform import le_to_ohe, ohe_to_le

BASELINES = [
    "gaussian_blur",
    "gaussian_blur_permutation",
    "constant",
    "constant_mean",
    "constant_median",
    "gaussian",
    "training",
    "max_distance",
    "nearest_neighbors",
    "nearest_neighbors_counterfactual",
    "opposite_class",
]


class OneToOneBaseline(np.ndarray):
    pass


class ManyToOneBaseline(np.ndarray):
    pass


class SampleBaseline(np.ndarray):
    pass


class ConstantBaseline(np.ndarray):
    pass


def generate_baseline_distribution(
    method: str,
    X: np.ndarray,
    X_obs: np.ndarray,
    y: Optional[np.ndarray] = None,
    y_obs: Optional[np.ndarray] = None,
    nsamples: Optional[int] = None,
    random_state: Optional[int] = None,
    **kwargs,
) -> np.ndarray:
    """Generate baseline distribution for explanations

    Args:
        method (str): baseline method
        X (np.ndarray): data for source distribution
        X_obs (np.ndarray): data observations to explain with baselines
        y (Optional[np.ndarray]): classes of source distribution
        y_obs: (Optional[np.ndarray]): predicted classes of observations to explain
        nsamples (Optional[int]): number of samples
        random_state (Optional[int]): random seed

    Returns:
        np.ndarray: baseline
    """

    # Signal to distribution functions that they will be used for
    # baseline generation.
    kwargs["baseline"] = True

    np.random.seed(random_state)

    if nsamples is None and method in distributions.SAMPLE:
        raise ValueError(f"nsamples cannot be None for method: {method}")

    if method == "gaussian_blur":
        baseline = gaussian_blur(X, **kwargs)
    elif method == "gaussian_blur_permutation":
        baseline = gaussian_blur_permutation(X, **kwargs)
    elif method == "constant":
        baseline = constant(X, **kwargs)
    elif method == "constant_mean":
        baseline = constant_mean(X, **kwargs)
    elif method == "constant_median":
        baseline = constant_median(X, **kwargs)
    elif method == "gaussian":
        baseline = gaussian(X, **kwargs)
    elif method == "training":
        baseline = training(X, **kwargs)
    elif method == "max_distance":
        baseline = max_distance(X, X_obs, **kwargs)
    elif method == "nearest_neighbors":
        baseline = nearest_neighbors(X, X_obs, **kwargs)
    elif method == "nearest_neighbors_counterfactual":
        baseline = nearest_neighbors_counterfactual(
            X, y, X_obs, y_obs, **kwargs
        )
    elif method == "opposite_class":
        baseline = opposite_class(X, y, y_obs, nsamples, **kwargs)
    else:
        raise ValueError(f"Baseline method '{method}' does not exist!")

    if method in distributions.SAMPLE:
        baseline = sample(baseline, nsamples, random_state)
        return baseline.view(SampleBaseline)
    elif method in distributions.MANY_TO_ONE:
        return baseline.view(ManyToOneBaseline)
    elif method in distributions.CONSTANT:
        return baseline.view(ConstantBaseline)
    return baseline.view(OneToOneBaseline)

Functions

def generate_baseline_distribution(method: str, X: numpy.ndarray, X_obs: numpy.ndarray, y: Optional[numpy.ndarray] = None, y_obs: Optional[numpy.ndarray] = None, nsamples: Optional[int] = None, random_state: Optional[int] = None, **kwargs) ‑> numpy.ndarray

Generate baseline distribution for explanations

Args

method : str
baseline method
X : np.ndarray
data for source distribution
X_obs : np.ndarray
data observations to explain with baselines
y : Optional[np.ndarray]
classes of source distribution
y_obs
(Optional[np.ndarray]): predicted classes of observations to explain
nsamples : Optional[int]
number of samples
random_state : Optional[int]
random seed

Returns

np.ndarray
baseline
Expand source code
def generate_baseline_distribution(
    method: str,
    X: np.ndarray,
    X_obs: np.ndarray,
    y: Optional[np.ndarray] = None,
    y_obs: Optional[np.ndarray] = None,
    nsamples: Optional[int] = None,
    random_state: Optional[int] = None,
    **kwargs,
) -> np.ndarray:
    """Generate baseline distribution for explanations

    Args:
        method (str): baseline method
        X (np.ndarray): data for source distribution
        X_obs (np.ndarray): data observations to explain with baselines
        y (Optional[np.ndarray]): classes of source distribution
        y_obs: (Optional[np.ndarray]): predicted classes of observations to explain
        nsamples (Optional[int]): number of samples
        random_state (Optional[int]): random seed

    Returns:
        np.ndarray: baseline
    """

    # Signal to distribution functions that they will be used for
    # baseline generation.
    kwargs["baseline"] = True

    np.random.seed(random_state)

    if nsamples is None and method in distributions.SAMPLE:
        raise ValueError(f"nsamples cannot be None for method: {method}")

    if method == "gaussian_blur":
        baseline = gaussian_blur(X, **kwargs)
    elif method == "gaussian_blur_permutation":
        baseline = gaussian_blur_permutation(X, **kwargs)
    elif method == "constant":
        baseline = constant(X, **kwargs)
    elif method == "constant_mean":
        baseline = constant_mean(X, **kwargs)
    elif method == "constant_median":
        baseline = constant_median(X, **kwargs)
    elif method == "gaussian":
        baseline = gaussian(X, **kwargs)
    elif method == "training":
        baseline = training(X, **kwargs)
    elif method == "max_distance":
        baseline = max_distance(X, X_obs, **kwargs)
    elif method == "nearest_neighbors":
        baseline = nearest_neighbors(X, X_obs, **kwargs)
    elif method == "nearest_neighbors_counterfactual":
        baseline = nearest_neighbors_counterfactual(
            X, y, X_obs, y_obs, **kwargs
        )
    elif method == "opposite_class":
        baseline = opposite_class(X, y, y_obs, nsamples, **kwargs)
    else:
        raise ValueError(f"Baseline method '{method}' does not exist!")

    if method in distributions.SAMPLE:
        baseline = sample(baseline, nsamples, random_state)
        return baseline.view(SampleBaseline)
    elif method in distributions.MANY_TO_ONE:
        return baseline.view(ManyToOneBaseline)
    elif method in distributions.CONSTANT:
        return baseline.view(ConstantBaseline)
    return baseline.view(OneToOneBaseline)

Classes

class ConstantBaseline (...)

ndarray(shape, dtype=float, buffer=None, offset=0, strides=None, order=None)

An array object represents a multidimensional, homogeneous array of fixed-size items. An associated data-type object describes the format of each element in the array (its byte-order, how many bytes it occupies in memory, whether it is an integer, a floating point number, or something else, etc.)

Arrays should be constructed using array, zeros or empty (refer to the See Also section below). The parameters given here refer to a low-level method (ndarray(…)) for instantiating an array.

For more information, refer to the numpy module and examine the methods and attributes of an array.

Parameters

(for the new method; see Notes below)

shape : tuple of ints
Shape of created array.
dtype : data-type, optional
Any object that can be interpreted as a numpy data type.
buffer : object exposing buffer interface, optional
Used to fill the array with data.
offset : int, optional
Offset of array data in buffer.
strides : tuple of ints, optional
Strides of data in memory.
order : {'C', 'F'}, optional
Row-major (C-style) or column-major (Fortran-style) order.

Attributes

T : ndarray
Transpose of the array.
data : buffer
The array's elements, in memory.
dtype : dtype object
Describes the format of the elements in the array.
flags : dict
Dictionary containing information related to memory use, e.g., 'C_CONTIGUOUS', 'OWNDATA', 'WRITEABLE', etc.
flat : numpy.flatiter object
Flattened version of the array as an iterator. The iterator allows assignments, e.g., x.flat = 3 (See ndarray.flat for assignment examples; TODO).
imag : ndarray
Imaginary part of the array.
real : ndarray
Real part of the array.
size : int
Number of elements in the array.
itemsize : int
The memory use of each array element in bytes.
nbytes : int
The total number of bytes required to store the array data, i.e., itemsize * size.
ndim : int
The array's number of dimensions.
shape : tuple of ints
Shape of the array.
strides : tuple of ints
The step-size required to move from one element to the next in memory. For example, a contiguous (3, 4) array of type int16 in C-order has strides (8, 2). This implies that to move from element to element in memory requires jumps of 2 bytes. To move from row-to-row, one needs to jump 8 bytes at a time (2 * 4).
ctypes : ctypes object
Class containing properties of the array needed for interaction with ctypes.
base : ndarray
If the array is a view into another array, that array is its base (unless that array is also a view). The base array is where the array data is actually stored.

See Also

array
Construct an array.
zeros
Create an array, each element of which is zero.
empty
Create an array, but leave its allocated memory unchanged (i.e., it contains "garbage").
dtype
Create a data-type.
numpy.typing.NDArray
An ndarray alias :term:generic <generic type> w.r.t. its dtype.type <numpy.dtype.type>.

Notes

There are two modes of creating an array using __new__:

  1. If buffer is None, then only shape, dtype, and order are used.
  2. If buffer is an object exposing the buffer interface, then all keywords are interpreted.

No __init__ method is needed because the array is fully initialized after the __new__ method.

Examples

These examples illustrate the low-level ndarray constructor. Refer to the See Also section above for easier ways of constructing an ndarray.

First mode, buffer is None:

>>> np.ndarray(shape=(2,2), dtype=float, order='F')
array([[0.0e+000, 0.0e+000], # random
       [     nan, 2.5e-323]])

Second mode:

>>> np.ndarray((2,), buffer=np.array([1,2,3]),
...            offset=np.int_().itemsize,
...            dtype=int) # offset = 1*itemsize, i.e. skip first element
array([2, 3])
Expand source code
class ConstantBaseline(np.ndarray):
    pass

Ancestors

  • numpy.ndarray
class ManyToOneBaseline (...)

ndarray(shape, dtype=float, buffer=None, offset=0, strides=None, order=None)

An array object represents a multidimensional, homogeneous array of fixed-size items. An associated data-type object describes the format of each element in the array (its byte-order, how many bytes it occupies in memory, whether it is an integer, a floating point number, or something else, etc.)

Arrays should be constructed using array, zeros or empty (refer to the See Also section below). The parameters given here refer to a low-level method (ndarray(…)) for instantiating an array.

For more information, refer to the numpy module and examine the methods and attributes of an array.

Parameters

(for the new method; see Notes below)

shape : tuple of ints
Shape of created array.
dtype : data-type, optional
Any object that can be interpreted as a numpy data type.
buffer : object exposing buffer interface, optional
Used to fill the array with data.
offset : int, optional
Offset of array data in buffer.
strides : tuple of ints, optional
Strides of data in memory.
order : {'C', 'F'}, optional
Row-major (C-style) or column-major (Fortran-style) order.

Attributes

T : ndarray
Transpose of the array.
data : buffer
The array's elements, in memory.
dtype : dtype object
Describes the format of the elements in the array.
flags : dict
Dictionary containing information related to memory use, e.g., 'C_CONTIGUOUS', 'OWNDATA', 'WRITEABLE', etc.
flat : numpy.flatiter object
Flattened version of the array as an iterator. The iterator allows assignments, e.g., x.flat = 3 (See ndarray.flat for assignment examples; TODO).
imag : ndarray
Imaginary part of the array.
real : ndarray
Real part of the array.
size : int
Number of elements in the array.
itemsize : int
The memory use of each array element in bytes.
nbytes : int
The total number of bytes required to store the array data, i.e., itemsize * size.
ndim : int
The array's number of dimensions.
shape : tuple of ints
Shape of the array.
strides : tuple of ints
The step-size required to move from one element to the next in memory. For example, a contiguous (3, 4) array of type int16 in C-order has strides (8, 2). This implies that to move from element to element in memory requires jumps of 2 bytes. To move from row-to-row, one needs to jump 8 bytes at a time (2 * 4).
ctypes : ctypes object
Class containing properties of the array needed for interaction with ctypes.
base : ndarray
If the array is a view into another array, that array is its base (unless that array is also a view). The base array is where the array data is actually stored.

See Also

array
Construct an array.
zeros
Create an array, each element of which is zero.
empty
Create an array, but leave its allocated memory unchanged (i.e., it contains "garbage").
dtype
Create a data-type.
numpy.typing.NDArray
An ndarray alias :term:generic <generic type> w.r.t. its dtype.type <numpy.dtype.type>.

Notes

There are two modes of creating an array using __new__:

  1. If buffer is None, then only shape, dtype, and order are used.
  2. If buffer is an object exposing the buffer interface, then all keywords are interpreted.

No __init__ method is needed because the array is fully initialized after the __new__ method.

Examples

These examples illustrate the low-level ndarray constructor. Refer to the See Also section above for easier ways of constructing an ndarray.

First mode, buffer is None:

>>> np.ndarray(shape=(2,2), dtype=float, order='F')
array([[0.0e+000, 0.0e+000], # random
       [     nan, 2.5e-323]])

Second mode:

>>> np.ndarray((2,), buffer=np.array([1,2,3]),
...            offset=np.int_().itemsize,
...            dtype=int) # offset = 1*itemsize, i.e. skip first element
array([2, 3])
Expand source code
class ManyToOneBaseline(np.ndarray):
    pass

Ancestors

  • numpy.ndarray
class OneToOneBaseline (...)

ndarray(shape, dtype=float, buffer=None, offset=0, strides=None, order=None)

An array object represents a multidimensional, homogeneous array of fixed-size items. An associated data-type object describes the format of each element in the array (its byte-order, how many bytes it occupies in memory, whether it is an integer, a floating point number, or something else, etc.)

Arrays should be constructed using array, zeros or empty (refer to the See Also section below). The parameters given here refer to a low-level method (ndarray(…)) for instantiating an array.

For more information, refer to the numpy module and examine the methods and attributes of an array.

Parameters

(for the new method; see Notes below)

shape : tuple of ints
Shape of created array.
dtype : data-type, optional
Any object that can be interpreted as a numpy data type.
buffer : object exposing buffer interface, optional
Used to fill the array with data.
offset : int, optional
Offset of array data in buffer.
strides : tuple of ints, optional
Strides of data in memory.
order : {'C', 'F'}, optional
Row-major (C-style) or column-major (Fortran-style) order.

Attributes

T : ndarray
Transpose of the array.
data : buffer
The array's elements, in memory.
dtype : dtype object
Describes the format of the elements in the array.
flags : dict
Dictionary containing information related to memory use, e.g., 'C_CONTIGUOUS', 'OWNDATA', 'WRITEABLE', etc.
flat : numpy.flatiter object
Flattened version of the array as an iterator. The iterator allows assignments, e.g., x.flat = 3 (See ndarray.flat for assignment examples; TODO).
imag : ndarray
Imaginary part of the array.
real : ndarray
Real part of the array.
size : int
Number of elements in the array.
itemsize : int
The memory use of each array element in bytes.
nbytes : int
The total number of bytes required to store the array data, i.e., itemsize * size.
ndim : int
The array's number of dimensions.
shape : tuple of ints
Shape of the array.
strides : tuple of ints
The step-size required to move from one element to the next in memory. For example, a contiguous (3, 4) array of type int16 in C-order has strides (8, 2). This implies that to move from element to element in memory requires jumps of 2 bytes. To move from row-to-row, one needs to jump 8 bytes at a time (2 * 4).
ctypes : ctypes object
Class containing properties of the array needed for interaction with ctypes.
base : ndarray
If the array is a view into another array, that array is its base (unless that array is also a view). The base array is where the array data is actually stored.

See Also

array
Construct an array.
zeros
Create an array, each element of which is zero.
empty
Create an array, but leave its allocated memory unchanged (i.e., it contains "garbage").
dtype
Create a data-type.
numpy.typing.NDArray
An ndarray alias :term:generic <generic type> w.r.t. its dtype.type <numpy.dtype.type>.

Notes

There are two modes of creating an array using __new__:

  1. If buffer is None, then only shape, dtype, and order are used.
  2. If buffer is an object exposing the buffer interface, then all keywords are interpreted.

No __init__ method is needed because the array is fully initialized after the __new__ method.

Examples

These examples illustrate the low-level ndarray constructor. Refer to the See Also section above for easier ways of constructing an ndarray.

First mode, buffer is None:

>>> np.ndarray(shape=(2,2), dtype=float, order='F')
array([[0.0e+000, 0.0e+000], # random
       [     nan, 2.5e-323]])

Second mode:

>>> np.ndarray((2,), buffer=np.array([1,2,3]),
...            offset=np.int_().itemsize,
...            dtype=int) # offset = 1*itemsize, i.e. skip first element
array([2, 3])
Expand source code
class OneToOneBaseline(np.ndarray):
    pass

Ancestors

  • numpy.ndarray
class SampleBaseline (...)

ndarray(shape, dtype=float, buffer=None, offset=0, strides=None, order=None)

An array object represents a multidimensional, homogeneous array of fixed-size items. An associated data-type object describes the format of each element in the array (its byte-order, how many bytes it occupies in memory, whether it is an integer, a floating point number, or something else, etc.)

Arrays should be constructed using array, zeros or empty (refer to the See Also section below). The parameters given here refer to a low-level method (ndarray(…)) for instantiating an array.

For more information, refer to the numpy module and examine the methods and attributes of an array.

Parameters

(for the new method; see Notes below)

shape : tuple of ints
Shape of created array.
dtype : data-type, optional
Any object that can be interpreted as a numpy data type.
buffer : object exposing buffer interface, optional
Used to fill the array with data.
offset : int, optional
Offset of array data in buffer.
strides : tuple of ints, optional
Strides of data in memory.
order : {'C', 'F'}, optional
Row-major (C-style) or column-major (Fortran-style) order.

Attributes

T : ndarray
Transpose of the array.
data : buffer
The array's elements, in memory.
dtype : dtype object
Describes the format of the elements in the array.
flags : dict
Dictionary containing information related to memory use, e.g., 'C_CONTIGUOUS', 'OWNDATA', 'WRITEABLE', etc.
flat : numpy.flatiter object
Flattened version of the array as an iterator. The iterator allows assignments, e.g., x.flat = 3 (See ndarray.flat for assignment examples; TODO).
imag : ndarray
Imaginary part of the array.
real : ndarray
Real part of the array.
size : int
Number of elements in the array.
itemsize : int
The memory use of each array element in bytes.
nbytes : int
The total number of bytes required to store the array data, i.e., itemsize * size.
ndim : int
The array's number of dimensions.
shape : tuple of ints
Shape of the array.
strides : tuple of ints
The step-size required to move from one element to the next in memory. For example, a contiguous (3, 4) array of type int16 in C-order has strides (8, 2). This implies that to move from element to element in memory requires jumps of 2 bytes. To move from row-to-row, one needs to jump 8 bytes at a time (2 * 4).
ctypes : ctypes object
Class containing properties of the array needed for interaction with ctypes.
base : ndarray
If the array is a view into another array, that array is its base (unless that array is also a view). The base array is where the array data is actually stored.

See Also

array
Construct an array.
zeros
Create an array, each element of which is zero.
empty
Create an array, but leave its allocated memory unchanged (i.e., it contains "garbage").
dtype
Create a data-type.
numpy.typing.NDArray
An ndarray alias :term:generic <generic type> w.r.t. its dtype.type <numpy.dtype.type>.

Notes

There are two modes of creating an array using __new__:

  1. If buffer is None, then only shape, dtype, and order are used.
  2. If buffer is an object exposing the buffer interface, then all keywords are interpreted.

No __init__ method is needed because the array is fully initialized after the __new__ method.

Examples

These examples illustrate the low-level ndarray constructor. Refer to the See Also section above for easier ways of constructing an ndarray.

First mode, buffer is None:

>>> np.ndarray(shape=(2,2), dtype=float, order='F')
array([[0.0e+000, 0.0e+000], # random
       [     nan, 2.5e-323]])

Second mode:

>>> np.ndarray((2,), buffer=np.array([1,2,3]),
...            offset=np.int_().itemsize,
...            dtype=int) # offset = 1*itemsize, i.e. skip first element
array([2, 3])
Expand source code
class SampleBaseline(np.ndarray):
    pass

Ancestors

  • numpy.ndarray