Module `ablation.baseline`

Expand source code

from typing import Optional

import numpy as np

from . import distributions
from .distributions import (
    constant,
    constant_mean,
    constant_median,
    gaussian,
    gaussian_blur,
    gaussian_blur_permutation,
    max_distance,
    nearest_neighbors,
    nearest_neighbors_counterfactual,
    opposite_class,
    training,
)
from .utils.general import sample
from .utils.transform import le_to_ohe, ohe_to_le

BASELINES = [
    "gaussian_blur",
    "gaussian_blur_permutation",
    "constant",
    "constant_mean",
    "constant_median",
    "gaussian",
    "training",
    "max_distance",
    "nearest_neighbors",
    "nearest_neighbors_counterfactual",
    "opposite_class",
]


class OneToOneBaseline(np.ndarray):
    pass


class ManyToOneBaseline(np.ndarray):
    pass


class SampleBaseline(np.ndarray):
    pass


class ConstantBaseline(np.ndarray):
    pass


def generate_baseline_distribution(
    method: str,
    X: np.ndarray,
    X_obs: np.ndarray,
    y: Optional[np.ndarray] = None,
    y_obs: Optional[np.ndarray] = None,
    nsamples: Optional[int] = None,
    random_state: Optional[int] = None,
    **kwargs,
) -> np.ndarray:
    """Generate baseline distribution for explanations

    Args:
        method (str): baseline method
        X (np.ndarray): data for source distribution
        X_obs (np.ndarray): data observations to explain with baselines
        y (Optional[np.ndarray]): classes of source distribution
        y_obs: (Optional[np.ndarray]): predicted classes of observations to explain
        nsamples (Optional[int]): number of samples
        random_state (Optional[int]): random seed

    Returns:
        np.ndarray: baseline
    """

    # Signal to distribution functions that they will be used for
    # baseline generation.
    kwargs["baseline"] = True

    np.random.seed(random_state)

    if nsamples is None and method in distributions.SAMPLE:
        raise ValueError(f"nsamples cannot be None for method: {method}")

    if method == "gaussian_blur":
        baseline = gaussian_blur(X, **kwargs)
    elif method == "gaussian_blur_permutation":
        baseline = gaussian_blur_permutation(X, **kwargs)
    elif method == "constant":
        baseline = constant(X, **kwargs)
    elif method == "constant_mean":
        baseline = constant_mean(X, **kwargs)
    elif method == "constant_median":
        baseline = constant_median(X, **kwargs)
    elif method == "gaussian":
        baseline = gaussian(X, **kwargs)
    elif method == "training":
        baseline = training(X, **kwargs)
    elif method == "max_distance":
        baseline = max_distance(X, X_obs, **kwargs)
    elif method == "nearest_neighbors":
        baseline = nearest_neighbors(X, X_obs, **kwargs)
    elif method == "nearest_neighbors_counterfactual":
        baseline = nearest_neighbors_counterfactual(
            X, y, X_obs, y_obs, **kwargs
        )
    elif method == "opposite_class":
        baseline = opposite_class(X, y, y_obs, nsamples, **kwargs)
    else:
        raise ValueError(f"Baseline method '{method}' does not exist!")

    if method in distributions.SAMPLE:
        baseline = sample(baseline, nsamples, random_state)
        return baseline.view(SampleBaseline)
    elif method in distributions.MANY_TO_ONE:
        return baseline.view(ManyToOneBaseline)
    elif method in distributions.CONSTANT:
        return baseline.view(ConstantBaseline)
    return baseline.view(OneToOneBaseline)

Functions

def generate_baseline_distribution(method: str, X: numpy.ndarray, X_obs: numpy.ndarray, y: Optional[numpy.ndarray] = None, y_obs: Optional[numpy.ndarray] = None, nsamples: Optional[int] = None, random_state: Optional[int] = None, **kwargs) ‑> numpy.ndarray

Generate baseline distribution for explanations

Args

method : str: baseline method
X : np.ndarray: data for source distribution
X_obs : np.ndarray: data observations to explain with baselines
y : Optional[np.ndarray]: classes of source distribution
y_obs: (Optional[np.ndarray]): predicted classes of observations to explain
nsamples : Optional[int]: number of samples
random_state : Optional[int]: random seed

Returns

np.ndarray: baseline

Expand source code

def generate_baseline_distribution(
    method: str,
    X: np.ndarray,
    X_obs: np.ndarray,
    y: Optional[np.ndarray] = None,
    y_obs: Optional[np.ndarray] = None,
    nsamples: Optional[int] = None,
    random_state: Optional[int] = None,
    **kwargs,
) -> np.ndarray:
    """Generate baseline distribution for explanations

    Args:
        method (str): baseline method
        X (np.ndarray): data for source distribution
        X_obs (np.ndarray): data observations to explain with baselines
        y (Optional[np.ndarray]): classes of source distribution
        y_obs: (Optional[np.ndarray]): predicted classes of observations to explain
        nsamples (Optional[int]): number of samples
        random_state (Optional[int]): random seed

    Returns:
        np.ndarray: baseline
    """

    # Signal to distribution functions that they will be used for
    # baseline generation.
    kwargs["baseline"] = True

    np.random.seed(random_state)

    if nsamples is None and method in distributions.SAMPLE:
        raise ValueError(f"nsamples cannot be None for method: {method}")

    if method == "gaussian_blur":
        baseline = gaussian_blur(X, **kwargs)
    elif method == "gaussian_blur_permutation":
        baseline = gaussian_blur_permutation(X, **kwargs)
    elif method == "constant":
        baseline = constant(X, **kwargs)
    elif method == "constant_mean":
        baseline = constant_mean(X, **kwargs)
    elif method == "constant_median":
        baseline = constant_median(X, **kwargs)
    elif method == "gaussian":
        baseline = gaussian(X, **kwargs)
    elif method == "training":
        baseline = training(X, **kwargs)
    elif method == "max_distance":
        baseline = max_distance(X, X_obs, **kwargs)
    elif method == "nearest_neighbors":
        baseline = nearest_neighbors(X, X_obs, **kwargs)
    elif method == "nearest_neighbors_counterfactual":
        baseline = nearest_neighbors_counterfactual(
            X, y, X_obs, y_obs, **kwargs
        )
    elif method == "opposite_class":
        baseline = opposite_class(X, y, y_obs, nsamples, **kwargs)
    else:
        raise ValueError(f"Baseline method '{method}' does not exist!")

    if method in distributions.SAMPLE:
        baseline = sample(baseline, nsamples, random_state)
        return baseline.view(SampleBaseline)
    elif method in distributions.MANY_TO_ONE:
        return baseline.view(ManyToOneBaseline)
    elif method in distributions.CONSTANT:
        return baseline.view(ConstantBaseline)
    return baseline.view(OneToOneBaseline)

Classes

class ConstantBaseline (...)

ndarray(shape, dtype=float, buffer=None, offset=0, strides=None, order=None)

An array object represents a multidimensional, homogeneous array of fixed-size items. An associated data-type object describes the format of each element in the array (its byte-order, how many bytes it occupies in memory, whether it is an integer, a floating point number, or something else, etc.)

Arrays should be constructed using array, zeros or empty (refer to the See Also section below). The parameters given here refer to a low-level method (ndarray(…)) for instantiating an array.

For more information, refer to the numpy module and examine the methods and attributes of an array.

Parameters

(for the new method; see Notes below)

shape : tuple of ints: Shape of created array.
dtype : data-type, optional: Any object that can be interpreted as a numpy data type.
buffer : object exposing buffer interface, optional: Used to fill the array with data.
offset : int, optional: Offset of array data in buffer.
strides : tuple of ints, optional: Strides of data in memory.
order : {'C', 'F'}, optional: Row-major (C-style) or column-major (Fortran-style) order.

Attributes

T : ndarray: Transpose of the array.
data : buffer: The array's elements, in memory.
dtype : dtype object: Describes the format of the elements in the array.
flags : dict: Dictionary containing information related to memory use, e.g., 'C_CONTIGUOUS', 'OWNDATA', 'WRITEABLE', etc.
flat : numpy.flatiter object: Flattened version of the array as an iterator. The iterator allows assignments, e.g., x.flat = 3 (See ndarray.flat for assignment examples; TODO).
imag : ndarray: Imaginary part of the array.
real : ndarray: Real part of the array.
size : int: Number of elements in the array.
itemsize : int: The memory use of each array element in bytes.
nbytes : int: The total number of bytes required to store the array data, i.e., itemsize * size.
ndim : int: The array's number of dimensions.
shape : tuple of ints: Shape of the array.
strides : tuple of ints: The step-size required to move from one element to the next in memory. For example, a contiguous (3, 4) array of type int16 in C-order has strides (8, 2). This implies that to move from element to element in memory requires jumps of 2 bytes. To move from row-to-row, one needs to jump 8 bytes at a time (2 * 4).
ctypes : ctypes object: Class containing properties of the array needed for interaction with ctypes.
base : ndarray: If the array is a view into another array, that array is its base (unless that array is also a view). The base array is where the array data is actually stored.

Notes

There are two modes of creating an array using __new__:

If buffer is None, then only shape, dtype, and order are used.
If buffer is an object exposing the buffer interface, then all keywords are interpreted.

No __init__ method is needed because the array is fully initialized after the __new__ method.

Examples

These examples illustrate the low-level ndarray constructor. Refer to the See Also section above for easier ways of constructing an ndarray.

First mode, buffer is None:

>>> np.ndarray(shape=(2,2), dtype=float, order='F')
array([[0.0e+000, 0.0e+000], # random
       [     nan, 2.5e-323]])

Second mode:

>>> np.ndarray((2,), buffer=np.array([1,2,3]),
...            offset=np.int_().itemsize,
...            dtype=int) # offset = 1*itemsize, i.e. skip first element
array([2, 3])

Expand source code

class ConstantBaseline(np.ndarray):
    pass

Ancestors

numpy.ndarray

class ManyToOneBaseline (...)

ndarray(shape, dtype=float, buffer=None, offset=0, strides=None, order=None)

For more information, refer to the numpy module and examine the methods and attributes of an array.

Parameters

(for the new method; see Notes below)

shape : tuple of ints: Shape of created array.
dtype : data-type, optional: Any object that can be interpreted as a numpy data type.
buffer : object exposing buffer interface, optional: Used to fill the array with data.
offset : int, optional: Offset of array data in buffer.
strides : tuple of ints, optional: Strides of data in memory.
order : {'C', 'F'}, optional: Row-major (C-style) or column-major (Fortran-style) order.

Attributes

T : ndarray: Transpose of the array.
data : buffer: The array's elements, in memory.
dtype : dtype object: Describes the format of the elements in the array.
flags : dict: Dictionary containing information related to memory use, e.g., 'C_CONTIGUOUS', 'OWNDATA', 'WRITEABLE', etc.
flat : numpy.flatiter object: Flattened version of the array as an iterator. The iterator allows assignments, e.g., x.flat = 3 (See ndarray.flat for assignment examples; TODO).
imag : ndarray: Imaginary part of the array.
real : ndarray: Real part of the array.
size : int: Number of elements in the array.
itemsize : int: The memory use of each array element in bytes.
nbytes : int: The total number of bytes required to store the array data, i.e., itemsize * size.
ndim : int: The array's number of dimensions.
shape : tuple of ints: Shape of the array.
strides : tuple of ints: The step-size required to move from one element to the next in memory. For example, a contiguous (3, 4) array of type int16 in C-order has strides (8, 2). This implies that to move from element to element in memory requires jumps of 2 bytes. To move from row-to-row, one needs to jump 8 bytes at a time (2 * 4).
ctypes : ctypes object: Class containing properties of the array needed for interaction with ctypes.
base : ndarray: If the array is a view into another array, that array is its base (unless that array is also a view). The base array is where the array data is actually stored.

Notes

There are two modes of creating an array using __new__:

If buffer is None, then only shape, dtype, and order are used.
If buffer is an object exposing the buffer interface, then all keywords are interpreted.

No __init__ method is needed because the array is fully initialized after the __new__ method.

Examples

These examples illustrate the low-level ndarray constructor. Refer to the See Also section above for easier ways of constructing an ndarray.

First mode, buffer is None:

>>> np.ndarray(shape=(2,2), dtype=float, order='F')
array([[0.0e+000, 0.0e+000], # random
       [     nan, 2.5e-323]])

Second mode:

>>> np.ndarray((2,), buffer=np.array([1,2,3]),
...            offset=np.int_().itemsize,
...            dtype=int) # offset = 1*itemsize, i.e. skip first element
array([2, 3])

Expand source code

class ManyToOneBaseline(np.ndarray):
    pass

Ancestors

numpy.ndarray

class OneToOneBaseline (...)

ndarray(shape, dtype=float, buffer=None, offset=0, strides=None, order=None)

For more information, refer to the numpy module and examine the methods and attributes of an array.

Parameters

(for the new method; see Notes below)

shape : tuple of ints: Shape of created array.
dtype : data-type, optional: Any object that can be interpreted as a numpy data type.
buffer : object exposing buffer interface, optional: Used to fill the array with data.
offset : int, optional: Offset of array data in buffer.
strides : tuple of ints, optional: Strides of data in memory.
order : {'C', 'F'}, optional: Row-major (C-style) or column-major (Fortran-style) order.

Attributes

T : ndarray: Transpose of the array.
data : buffer: The array's elements, in memory.
dtype : dtype object: Describes the format of the elements in the array.
flags : dict: Dictionary containing information related to memory use, e.g., 'C_CONTIGUOUS', 'OWNDATA', 'WRITEABLE', etc.
flat : numpy.flatiter object: Flattened version of the array as an iterator. The iterator allows assignments, e.g., x.flat = 3 (See ndarray.flat for assignment examples; TODO).
imag : ndarray: Imaginary part of the array.
real : ndarray: Real part of the array.
size : int: Number of elements in the array.
itemsize : int: The memory use of each array element in bytes.
nbytes : int: The total number of bytes required to store the array data, i.e., itemsize * size.
ndim : int: The array's number of dimensions.
shape : tuple of ints: Shape of the array.
strides : tuple of ints: The step-size required to move from one element to the next in memory. For example, a contiguous (3, 4) array of type int16 in C-order has strides (8, 2). This implies that to move from element to element in memory requires jumps of 2 bytes. To move from row-to-row, one needs to jump 8 bytes at a time (2 * 4).
ctypes : ctypes object: Class containing properties of the array needed for interaction with ctypes.
base : ndarray: If the array is a view into another array, that array is its base (unless that array is also a view). The base array is where the array data is actually stored.

Notes

There are two modes of creating an array using __new__:

If buffer is None, then only shape, dtype, and order are used.
If buffer is an object exposing the buffer interface, then all keywords are interpreted.

No __init__ method is needed because the array is fully initialized after the __new__ method.

Examples

These examples illustrate the low-level ndarray constructor. Refer to the See Also section above for easier ways of constructing an ndarray.

First mode, buffer is None:

>>> np.ndarray(shape=(2,2), dtype=float, order='F')
array([[0.0e+000, 0.0e+000], # random
       [     nan, 2.5e-323]])

Second mode:

>>> np.ndarray((2,), buffer=np.array([1,2,3]),
...            offset=np.int_().itemsize,
...            dtype=int) # offset = 1*itemsize, i.e. skip first element
array([2, 3])

Expand source code

class OneToOneBaseline(np.ndarray):
    pass

Ancestors

numpy.ndarray

class SampleBaseline (...)

ndarray(shape, dtype=float, buffer=None, offset=0, strides=None, order=None)

For more information, refer to the numpy module and examine the methods and attributes of an array.

Parameters

(for the new method; see Notes below)

shape : tuple of ints: Shape of created array.
dtype : data-type, optional: Any object that can be interpreted as a numpy data type.
buffer : object exposing buffer interface, optional: Used to fill the array with data.
offset : int, optional: Offset of array data in buffer.
strides : tuple of ints, optional: Strides of data in memory.
order : {'C', 'F'}, optional: Row-major (C-style) or column-major (Fortran-style) order.

Attributes

T : ndarray: Transpose of the array.
data : buffer: The array's elements, in memory.
dtype : dtype object: Describes the format of the elements in the array.
flags : dict: Dictionary containing information related to memory use, e.g., 'C_CONTIGUOUS', 'OWNDATA', 'WRITEABLE', etc.
flat : numpy.flatiter object: Flattened version of the array as an iterator. The iterator allows assignments, e.g., x.flat = 3 (See ndarray.flat for assignment examples; TODO).
imag : ndarray: Imaginary part of the array.
real : ndarray: Real part of the array.
size : int: Number of elements in the array.
itemsize : int: The memory use of each array element in bytes.
nbytes : int: The total number of bytes required to store the array data, i.e., itemsize * size.
ndim : int: The array's number of dimensions.
shape : tuple of ints: Shape of the array.
strides : tuple of ints: The step-size required to move from one element to the next in memory. For example, a contiguous (3, 4) array of type int16 in C-order has strides (8, 2). This implies that to move from element to element in memory requires jumps of 2 bytes. To move from row-to-row, one needs to jump 8 bytes at a time (2 * 4).
ctypes : ctypes object: Class containing properties of the array needed for interaction with ctypes.
base : ndarray: If the array is a view into another array, that array is its base (unless that array is also a view). The base array is where the array data is actually stored.

Notes

There are two modes of creating an array using __new__:

If buffer is None, then only shape, dtype, and order are used.
If buffer is an object exposing the buffer interface, then all keywords are interpreted.

No __init__ method is needed because the array is fully initialized after the __new__ method.

Examples

These examples illustrate the low-level ndarray constructor. Refer to the See Also section above for easier ways of constructing an ndarray.

First mode, buffer is None:

>>> np.ndarray(shape=(2,2), dtype=float, order='F')
array([[0.0e+000, 0.0e+000], # random
       [     nan, 2.5e-323]])

Second mode:

>>> np.ndarray((2,), buffer=np.array([1,2,3]),
...            offset=np.int_().itemsize,
...            dtype=int) # offset = 1*itemsize, i.e. skip first element
array([2, 3])

Expand source code

class SampleBaseline(np.ndarray):
    pass

Ancestors

numpy.ndarray