Module ablation.utils.transform
Expand source code
from typing import List
import numpy as np
def ohe_to_le(X: np.ndarray, agg_map: List[List[int]]) -> np.ndarray:
"""
Mixed data with One-Hot-Encoded categoricals
to Mixed data with Label Encoded categoricals.
An example of the aggregation mappings are:
agg_map = [[1,2,3],[4],[5]]
In this example, the dense feature index 0 maps to
sparse feature indices 1,2,3. Dense feature index 0 is likely
a categorical that was split into three features via
one-hot-encoding. Dense feature indices 1 and 2 are one-to-one
mappings, meaning that they represent non-categoricals.
Args:
X (np.ndarray): Mixed data to be transformed
agg_map (list[list[int]]): Aggregation map
Returns:
np.ndarray: Mixed data with label encoded categoricals
"""
if agg_map is None:
return X
X_LE = np.zeros((X.shape[0], len(agg_map)))
for idx, mapping in enumerate(agg_map):
# Look for multiple mappings due to categorical OHE.
if len(mapping) > 1:
X_LE[:, idx] = np.argmax(X[:, mapping], axis=1)
else:
X_LE[:, idx] = X[:, mapping[0]]
return X_LE
def le_to_ohe(X: np.ndarray, agg_map: List[List[int]]) -> np.ndarray:
"""
Transforms mixed data with Label Encoded categoricals
to mixed data with One-Hot-Encoded categoricals.
Args:
X (np.ndarray): Mixed data with label encoded categoricals
agg_map (list[list[int]]): Aggregation map
Returns:
np.ndarray: Mixed data with one-hot-encoded categoricals
"""
# If agg_map not provided, return input.
if agg_map is None:
return X
X_OHE = np.zeros((X.shape[0], sum(len(mapping) for mapping in agg_map)))
for idx, mapping in enumerate(agg_map):
# If categorical mapping
if len(mapping) > 1:
# NOTE: Inputs are rounded during LE-to-OHE transformation.
X_OHE[:, mapping] = np.eye(len(mapping))[
np.round(X[:, idx]).astype(int)
]
# else, numerical mapping
else:
X_OHE[:, mapping[0]] = X[:, idx]
return X_OHE
Functions
def le_to_ohe(X: numpy.ndarray, agg_map: List[List[int]]) ‑> numpy.ndarray
-
Transforms mixed data with Label Encoded categoricals to mixed data with One-Hot-Encoded categoricals.
Args
X
:np.ndarray
- Mixed data with label encoded categoricals
agg_map
:list[list[int]]
- Aggregation map
Returns
np.ndarray
- Mixed data with one-hot-encoded categoricals
Expand source code
def le_to_ohe(X: np.ndarray, agg_map: List[List[int]]) -> np.ndarray: """ Transforms mixed data with Label Encoded categoricals to mixed data with One-Hot-Encoded categoricals. Args: X (np.ndarray): Mixed data with label encoded categoricals agg_map (list[list[int]]): Aggregation map Returns: np.ndarray: Mixed data with one-hot-encoded categoricals """ # If agg_map not provided, return input. if agg_map is None: return X X_OHE = np.zeros((X.shape[0], sum(len(mapping) for mapping in agg_map))) for idx, mapping in enumerate(agg_map): # If categorical mapping if len(mapping) > 1: # NOTE: Inputs are rounded during LE-to-OHE transformation. X_OHE[:, mapping] = np.eye(len(mapping))[ np.round(X[:, idx]).astype(int) ] # else, numerical mapping else: X_OHE[:, mapping[0]] = X[:, idx] return X_OHE
def ohe_to_le(X: numpy.ndarray, agg_map: List[List[int]]) ‑> numpy.ndarray
-
Mixed data with One-Hot-Encoded categoricals to Mixed data with Label Encoded categoricals.
An example of the aggregation mappings are:
agg_map = [[1,2,3],[4],[5]]
In this example, the dense feature index 0 maps to sparse feature indices 1,2,3. Dense feature index 0 is likely a categorical that was split into three features via one-hot-encoding. Dense feature indices 1 and 2 are one-to-one mappings, meaning that they represent non-categoricals.
Args
X
:np.ndarray
- Mixed data to be transformed
agg_map
:list[list[int]]
- Aggregation map
Returns
np.ndarray
- Mixed data with label encoded categoricals
Expand source code
def ohe_to_le(X: np.ndarray, agg_map: List[List[int]]) -> np.ndarray: """ Mixed data with One-Hot-Encoded categoricals to Mixed data with Label Encoded categoricals. An example of the aggregation mappings are: agg_map = [[1,2,3],[4],[5]] In this example, the dense feature index 0 maps to sparse feature indices 1,2,3. Dense feature index 0 is likely a categorical that was split into three features via one-hot-encoding. Dense feature indices 1 and 2 are one-to-one mappings, meaning that they represent non-categoricals. Args: X (np.ndarray): Mixed data to be transformed agg_map (list[list[int]]): Aggregation map Returns: np.ndarray: Mixed data with label encoded categoricals """ if agg_map is None: return X X_LE = np.zeros((X.shape[0], len(agg_map))) for idx, mapping in enumerate(agg_map): # Look for multiple mappings due to categorical OHE. if len(mapping) > 1: X_LE[:, idx] = np.argmax(X[:, mapping], axis=1) else: X_LE[:, idx] = X[:, mapping[0]] return X_LE