Module ablation.baseline
Expand source code
from typing import Optional
import numpy as np
from . import distributions
from .distributions import (
constant,
constant_mean,
constant_median,
gaussian,
gaussian_blur,
gaussian_blur_permutation,
max_distance,
nearest_neighbors,
nearest_neighbors_counterfactual,
opposite_class,
training,
)
from .utils.general import sample
from .utils.transform import le_to_ohe, ohe_to_le
BASELINES = [
"gaussian_blur",
"gaussian_blur_permutation",
"constant",
"constant_mean",
"constant_median",
"gaussian",
"training",
"max_distance",
"nearest_neighbors",
"nearest_neighbors_counterfactual",
"opposite_class",
]
class OneToOneBaseline(np.ndarray):
pass
class ManyToOneBaseline(np.ndarray):
pass
class SampleBaseline(np.ndarray):
pass
class ConstantBaseline(np.ndarray):
pass
def generate_baseline_distribution(
method: str,
X: np.ndarray,
X_obs: np.ndarray,
y: Optional[np.ndarray] = None,
y_obs: Optional[np.ndarray] = None,
nsamples: Optional[int] = None,
random_state: Optional[int] = None,
**kwargs,
) -> np.ndarray:
"""Generate baseline distribution for explanations
Args:
method (str): baseline method
X (np.ndarray): data for source distribution
X_obs (np.ndarray): data observations to explain with baselines
y (Optional[np.ndarray]): classes of source distribution
y_obs: (Optional[np.ndarray]): predicted classes of observations to explain
nsamples (Optional[int]): number of samples
random_state (Optional[int]): random seed
Returns:
np.ndarray: baseline
"""
# Signal to distribution functions that they will be used for
# baseline generation.
kwargs["baseline"] = True
np.random.seed(random_state)
if nsamples is None and method in distributions.SAMPLE:
raise ValueError(f"nsamples cannot be None for method: {method}")
if method == "gaussian_blur":
baseline = gaussian_blur(X, **kwargs)
elif method == "gaussian_blur_permutation":
baseline = gaussian_blur_permutation(X, **kwargs)
elif method == "constant":
baseline = constant(X, **kwargs)
elif method == "constant_mean":
baseline = constant_mean(X, **kwargs)
elif method == "constant_median":
baseline = constant_median(X, **kwargs)
elif method == "gaussian":
baseline = gaussian(X, **kwargs)
elif method == "training":
baseline = training(X, **kwargs)
elif method == "max_distance":
baseline = max_distance(X, X_obs, **kwargs)
elif method == "nearest_neighbors":
baseline = nearest_neighbors(X, X_obs, **kwargs)
elif method == "nearest_neighbors_counterfactual":
baseline = nearest_neighbors_counterfactual(
X, y, X_obs, y_obs, **kwargs
)
elif method == "opposite_class":
baseline = opposite_class(X, y, y_obs, nsamples, **kwargs)
else:
raise ValueError(f"Baseline method '{method}' does not exist!")
if method in distributions.SAMPLE:
baseline = sample(baseline, nsamples, random_state)
return baseline.view(SampleBaseline)
elif method in distributions.MANY_TO_ONE:
return baseline.view(ManyToOneBaseline)
elif method in distributions.CONSTANT:
return baseline.view(ConstantBaseline)
return baseline.view(OneToOneBaseline)
Functions
def generate_baseline_distribution(method: str, X: numpy.ndarray, X_obs: numpy.ndarray, y: Optional[numpy.ndarray] = None, y_obs: Optional[numpy.ndarray] = None, nsamples: Optional[int] = None, random_state: Optional[int] = None, **kwargs) ‑> numpy.ndarray
-
Generate baseline distribution for explanations
Args
method
:str
- baseline method
X
:np.ndarray
- data for source distribution
X_obs
:np.ndarray
- data observations to explain with baselines
y
:Optional[np.ndarray]
- classes of source distribution
y_obs
- (Optional[np.ndarray]): predicted classes of observations to explain
nsamples
:Optional[int]
- number of samples
random_state
:Optional[int]
- random seed
Returns
np.ndarray
- baseline
Expand source code
def generate_baseline_distribution( method: str, X: np.ndarray, X_obs: np.ndarray, y: Optional[np.ndarray] = None, y_obs: Optional[np.ndarray] = None, nsamples: Optional[int] = None, random_state: Optional[int] = None, **kwargs, ) -> np.ndarray: """Generate baseline distribution for explanations Args: method (str): baseline method X (np.ndarray): data for source distribution X_obs (np.ndarray): data observations to explain with baselines y (Optional[np.ndarray]): classes of source distribution y_obs: (Optional[np.ndarray]): predicted classes of observations to explain nsamples (Optional[int]): number of samples random_state (Optional[int]): random seed Returns: np.ndarray: baseline """ # Signal to distribution functions that they will be used for # baseline generation. kwargs["baseline"] = True np.random.seed(random_state) if nsamples is None and method in distributions.SAMPLE: raise ValueError(f"nsamples cannot be None for method: {method}") if method == "gaussian_blur": baseline = gaussian_blur(X, **kwargs) elif method == "gaussian_blur_permutation": baseline = gaussian_blur_permutation(X, **kwargs) elif method == "constant": baseline = constant(X, **kwargs) elif method == "constant_mean": baseline = constant_mean(X, **kwargs) elif method == "constant_median": baseline = constant_median(X, **kwargs) elif method == "gaussian": baseline = gaussian(X, **kwargs) elif method == "training": baseline = training(X, **kwargs) elif method == "max_distance": baseline = max_distance(X, X_obs, **kwargs) elif method == "nearest_neighbors": baseline = nearest_neighbors(X, X_obs, **kwargs) elif method == "nearest_neighbors_counterfactual": baseline = nearest_neighbors_counterfactual( X, y, X_obs, y_obs, **kwargs ) elif method == "opposite_class": baseline = opposite_class(X, y, y_obs, nsamples, **kwargs) else: raise ValueError(f"Baseline method '{method}' does not exist!") if method in distributions.SAMPLE: baseline = sample(baseline, nsamples, random_state) return baseline.view(SampleBaseline) elif method in distributions.MANY_TO_ONE: return baseline.view(ManyToOneBaseline) elif method in distributions.CONSTANT: return baseline.view(ConstantBaseline) return baseline.view(OneToOneBaseline)
Classes
class ConstantBaseline (...)
-
ndarray(shape, dtype=float, buffer=None, offset=0, strides=None, order=None)
An array object represents a multidimensional, homogeneous array of fixed-size items. An associated data-type object describes the format of each element in the array (its byte-order, how many bytes it occupies in memory, whether it is an integer, a floating point number, or something else, etc.)
Arrays should be constructed using
array
,zeros
orempty
(refer to the See Also section below). The parameters given here refer to a low-level method (ndarray(…)
) for instantiating an array.For more information, refer to the
numpy
module and examine the methods and attributes of an array.Parameters
(for the new method; see Notes below)
shape
:tuple
ofints
- Shape of created array.
dtype
:data-type
, optional- Any object that can be interpreted as a numpy data type.
buffer
:object exposing buffer interface
, optional- Used to fill the array with data.
offset
:int
, optional- Offset of array data in buffer.
strides
:tuple
ofints
, optional- Strides of data in memory.
order
:{'C', 'F'}
, optional- Row-major (C-style) or column-major (Fortran-style) order.
Attributes
T
:ndarray
- Transpose of the array.
data
:buffer
- The array's elements, in memory.
dtype
:dtype object
- Describes the format of the elements in the array.
flags
:dict
- Dictionary containing information related to memory use, e.g., 'C_CONTIGUOUS', 'OWNDATA', 'WRITEABLE', etc.
flat
:numpy.flatiter object
- Flattened version of the array as an iterator.
The iterator
allows assignments, e.g.,
x.flat = 3
(Seendarray.flat
for assignment examples; TODO). imag
:ndarray
- Imaginary part of the array.
real
:ndarray
- Real part of the array.
size
:int
- Number of elements in the array.
itemsize
:int
- The memory use of each array element in bytes.
nbytes
:int
- The total number of bytes required to store the array data,
i.e.,
itemsize * size
. ndim
:int
- The array's number of dimensions.
shape
:tuple
ofints
- Shape of the array.
strides
:tuple
ofints
- The step-size required to move from one element to the next in
memory. For example, a contiguous
(3, 4)
array of typeint16
in C-order has strides(8, 2)
. This implies that to move from element to element in memory requires jumps of 2 bytes. To move from row-to-row, one needs to jump 8 bytes at a time (2 * 4
). ctypes
:ctypes object
- Class containing properties of the array needed for interaction with ctypes.
base
:ndarray
- If the array is a view into another array, that array is its
base
(unless that array is also a view). Thebase
array is where the array data is actually stored.
See Also
array
- Construct an array.
zeros
- Create an array, each element of which is zero.
empty
- Create an array, but leave its allocated memory unchanged (i.e., it contains "garbage").
dtype
- Create a data-type.
numpy.typing.NDArray
- An ndarray alias :term:
generic <generic type>
w.r.t. itsdtype.type <numpy.dtype.type>
.
Notes
There are two modes of creating an array using
__new__
:- If
buffer
is None, then onlyshape
,dtype
, andorder
are used. - If
buffer
is an object exposing the buffer interface, then all keywords are interpreted.
No
__init__
method is needed because the array is fully initialized after the__new__
method.Examples
These examples illustrate the low-level
ndarray
constructor. Refer to theSee Also
section above for easier ways of constructing an ndarray.First mode,
buffer
is None:>>> np.ndarray(shape=(2,2), dtype=float, order='F') array([[0.0e+000, 0.0e+000], # random [ nan, 2.5e-323]])
Second mode:
>>> np.ndarray((2,), buffer=np.array([1,2,3]), ... offset=np.int_().itemsize, ... dtype=int) # offset = 1*itemsize, i.e. skip first element array([2, 3])
Expand source code
class ConstantBaseline(np.ndarray): pass
Ancestors
- numpy.ndarray
class ManyToOneBaseline (...)
-
ndarray(shape, dtype=float, buffer=None, offset=0, strides=None, order=None)
An array object represents a multidimensional, homogeneous array of fixed-size items. An associated data-type object describes the format of each element in the array (its byte-order, how many bytes it occupies in memory, whether it is an integer, a floating point number, or something else, etc.)
Arrays should be constructed using
array
,zeros
orempty
(refer to the See Also section below). The parameters given here refer to a low-level method (ndarray(…)
) for instantiating an array.For more information, refer to the
numpy
module and examine the methods and attributes of an array.Parameters
(for the new method; see Notes below)
shape
:tuple
ofints
- Shape of created array.
dtype
:data-type
, optional- Any object that can be interpreted as a numpy data type.
buffer
:object exposing buffer interface
, optional- Used to fill the array with data.
offset
:int
, optional- Offset of array data in buffer.
strides
:tuple
ofints
, optional- Strides of data in memory.
order
:{'C', 'F'}
, optional- Row-major (C-style) or column-major (Fortran-style) order.
Attributes
T
:ndarray
- Transpose of the array.
data
:buffer
- The array's elements, in memory.
dtype
:dtype object
- Describes the format of the elements in the array.
flags
:dict
- Dictionary containing information related to memory use, e.g., 'C_CONTIGUOUS', 'OWNDATA', 'WRITEABLE', etc.
flat
:numpy.flatiter object
- Flattened version of the array as an iterator.
The iterator
allows assignments, e.g.,
x.flat = 3
(Seendarray.flat
for assignment examples; TODO). imag
:ndarray
- Imaginary part of the array.
real
:ndarray
- Real part of the array.
size
:int
- Number of elements in the array.
itemsize
:int
- The memory use of each array element in bytes.
nbytes
:int
- The total number of bytes required to store the array data,
i.e.,
itemsize * size
. ndim
:int
- The array's number of dimensions.
shape
:tuple
ofints
- Shape of the array.
strides
:tuple
ofints
- The step-size required to move from one element to the next in
memory. For example, a contiguous
(3, 4)
array of typeint16
in C-order has strides(8, 2)
. This implies that to move from element to element in memory requires jumps of 2 bytes. To move from row-to-row, one needs to jump 8 bytes at a time (2 * 4
). ctypes
:ctypes object
- Class containing properties of the array needed for interaction with ctypes.
base
:ndarray
- If the array is a view into another array, that array is its
base
(unless that array is also a view). Thebase
array is where the array data is actually stored.
See Also
array
- Construct an array.
zeros
- Create an array, each element of which is zero.
empty
- Create an array, but leave its allocated memory unchanged (i.e., it contains "garbage").
dtype
- Create a data-type.
numpy.typing.NDArray
- An ndarray alias :term:
generic <generic type>
w.r.t. itsdtype.type <numpy.dtype.type>
.
Notes
There are two modes of creating an array using
__new__
:- If
buffer
is None, then onlyshape
,dtype
, andorder
are used. - If
buffer
is an object exposing the buffer interface, then all keywords are interpreted.
No
__init__
method is needed because the array is fully initialized after the__new__
method.Examples
These examples illustrate the low-level
ndarray
constructor. Refer to theSee Also
section above for easier ways of constructing an ndarray.First mode,
buffer
is None:>>> np.ndarray(shape=(2,2), dtype=float, order='F') array([[0.0e+000, 0.0e+000], # random [ nan, 2.5e-323]])
Second mode:
>>> np.ndarray((2,), buffer=np.array([1,2,3]), ... offset=np.int_().itemsize, ... dtype=int) # offset = 1*itemsize, i.e. skip first element array([2, 3])
Expand source code
class ManyToOneBaseline(np.ndarray): pass
Ancestors
- numpy.ndarray
class OneToOneBaseline (...)
-
ndarray(shape, dtype=float, buffer=None, offset=0, strides=None, order=None)
An array object represents a multidimensional, homogeneous array of fixed-size items. An associated data-type object describes the format of each element in the array (its byte-order, how many bytes it occupies in memory, whether it is an integer, a floating point number, or something else, etc.)
Arrays should be constructed using
array
,zeros
orempty
(refer to the See Also section below). The parameters given here refer to a low-level method (ndarray(…)
) for instantiating an array.For more information, refer to the
numpy
module and examine the methods and attributes of an array.Parameters
(for the new method; see Notes below)
shape
:tuple
ofints
- Shape of created array.
dtype
:data-type
, optional- Any object that can be interpreted as a numpy data type.
buffer
:object exposing buffer interface
, optional- Used to fill the array with data.
offset
:int
, optional- Offset of array data in buffer.
strides
:tuple
ofints
, optional- Strides of data in memory.
order
:{'C', 'F'}
, optional- Row-major (C-style) or column-major (Fortran-style) order.
Attributes
T
:ndarray
- Transpose of the array.
data
:buffer
- The array's elements, in memory.
dtype
:dtype object
- Describes the format of the elements in the array.
flags
:dict
- Dictionary containing information related to memory use, e.g., 'C_CONTIGUOUS', 'OWNDATA', 'WRITEABLE', etc.
flat
:numpy.flatiter object
- Flattened version of the array as an iterator.
The iterator
allows assignments, e.g.,
x.flat = 3
(Seendarray.flat
for assignment examples; TODO). imag
:ndarray
- Imaginary part of the array.
real
:ndarray
- Real part of the array.
size
:int
- Number of elements in the array.
itemsize
:int
- The memory use of each array element in bytes.
nbytes
:int
- The total number of bytes required to store the array data,
i.e.,
itemsize * size
. ndim
:int
- The array's number of dimensions.
shape
:tuple
ofints
- Shape of the array.
strides
:tuple
ofints
- The step-size required to move from one element to the next in
memory. For example, a contiguous
(3, 4)
array of typeint16
in C-order has strides(8, 2)
. This implies that to move from element to element in memory requires jumps of 2 bytes. To move from row-to-row, one needs to jump 8 bytes at a time (2 * 4
). ctypes
:ctypes object
- Class containing properties of the array needed for interaction with ctypes.
base
:ndarray
- If the array is a view into another array, that array is its
base
(unless that array is also a view). Thebase
array is where the array data is actually stored.
See Also
array
- Construct an array.
zeros
- Create an array, each element of which is zero.
empty
- Create an array, but leave its allocated memory unchanged (i.e., it contains "garbage").
dtype
- Create a data-type.
numpy.typing.NDArray
- An ndarray alias :term:
generic <generic type>
w.r.t. itsdtype.type <numpy.dtype.type>
.
Notes
There are two modes of creating an array using
__new__
:- If
buffer
is None, then onlyshape
,dtype
, andorder
are used. - If
buffer
is an object exposing the buffer interface, then all keywords are interpreted.
No
__init__
method is needed because the array is fully initialized after the__new__
method.Examples
These examples illustrate the low-level
ndarray
constructor. Refer to theSee Also
section above for easier ways of constructing an ndarray.First mode,
buffer
is None:>>> np.ndarray(shape=(2,2), dtype=float, order='F') array([[0.0e+000, 0.0e+000], # random [ nan, 2.5e-323]])
Second mode:
>>> np.ndarray((2,), buffer=np.array([1,2,3]), ... offset=np.int_().itemsize, ... dtype=int) # offset = 1*itemsize, i.e. skip first element array([2, 3])
Expand source code
class OneToOneBaseline(np.ndarray): pass
Ancestors
- numpy.ndarray
class SampleBaseline (...)
-
ndarray(shape, dtype=float, buffer=None, offset=0, strides=None, order=None)
An array object represents a multidimensional, homogeneous array of fixed-size items. An associated data-type object describes the format of each element in the array (its byte-order, how many bytes it occupies in memory, whether it is an integer, a floating point number, or something else, etc.)
Arrays should be constructed using
array
,zeros
orempty
(refer to the See Also section below). The parameters given here refer to a low-level method (ndarray(…)
) for instantiating an array.For more information, refer to the
numpy
module and examine the methods and attributes of an array.Parameters
(for the new method; see Notes below)
shape
:tuple
ofints
- Shape of created array.
dtype
:data-type
, optional- Any object that can be interpreted as a numpy data type.
buffer
:object exposing buffer interface
, optional- Used to fill the array with data.
offset
:int
, optional- Offset of array data in buffer.
strides
:tuple
ofints
, optional- Strides of data in memory.
order
:{'C', 'F'}
, optional- Row-major (C-style) or column-major (Fortran-style) order.
Attributes
T
:ndarray
- Transpose of the array.
data
:buffer
- The array's elements, in memory.
dtype
:dtype object
- Describes the format of the elements in the array.
flags
:dict
- Dictionary containing information related to memory use, e.g., 'C_CONTIGUOUS', 'OWNDATA', 'WRITEABLE', etc.
flat
:numpy.flatiter object
- Flattened version of the array as an iterator.
The iterator
allows assignments, e.g.,
x.flat = 3
(Seendarray.flat
for assignment examples; TODO). imag
:ndarray
- Imaginary part of the array.
real
:ndarray
- Real part of the array.
size
:int
- Number of elements in the array.
itemsize
:int
- The memory use of each array element in bytes.
nbytes
:int
- The total number of bytes required to store the array data,
i.e.,
itemsize * size
. ndim
:int
- The array's number of dimensions.
shape
:tuple
ofints
- Shape of the array.
strides
:tuple
ofints
- The step-size required to move from one element to the next in
memory. For example, a contiguous
(3, 4)
array of typeint16
in C-order has strides(8, 2)
. This implies that to move from element to element in memory requires jumps of 2 bytes. To move from row-to-row, one needs to jump 8 bytes at a time (2 * 4
). ctypes
:ctypes object
- Class containing properties of the array needed for interaction with ctypes.
base
:ndarray
- If the array is a view into another array, that array is its
base
(unless that array is also a view). Thebase
array is where the array data is actually stored.
See Also
array
- Construct an array.
zeros
- Create an array, each element of which is zero.
empty
- Create an array, but leave its allocated memory unchanged (i.e., it contains "garbage").
dtype
- Create a data-type.
numpy.typing.NDArray
- An ndarray alias :term:
generic <generic type>
w.r.t. itsdtype.type <numpy.dtype.type>
.
Notes
There are two modes of creating an array using
__new__
:- If
buffer
is None, then onlyshape
,dtype
, andorder
are used. - If
buffer
is an object exposing the buffer interface, then all keywords are interpreted.
No
__init__
method is needed because the array is fully initialized after the__new__
method.Examples
These examples illustrate the low-level
ndarray
constructor. Refer to theSee Also
section above for easier ways of constructing an ndarray.First mode,
buffer
is None:>>> np.ndarray(shape=(2,2), dtype=float, order='F') array([[0.0e+000, 0.0e+000], # random [ nan, 2.5e-323]])
Second mode:
>>> np.ndarray((2,), buffer=np.array([1,2,3]), ... offset=np.int_().itemsize, ... dtype=int) # offset = 1*itemsize, i.e. skip first element array([2, 3])
Expand source code
class SampleBaseline(np.ndarray): pass
Ancestors
- numpy.ndarray