Pandas utils - WatchTower Documentation

`flatten_levels(data, column_reducer=None, index_reducer=None, column_name=..., index_name=..., inplace=False)`

Flattens the columns and/or index of a pandas DataFrame or Series using reducer functions.

Parameters:

Name	Type	Description	Default
`data`	`DataFrame \| Series`	DataFrame or Series to flatten its MultiIndex index/columns.	required
`column_reducer`	`None \| ReducerType`	The reducer function to apply to the columns. If None, the columns will not be flattened. Defaults to None.	`None`
`index_reducer`	`None \| ReducerType`	The reducer function to apply to the index. If None, the index will not be flattened. Defaults to None.	`None`
`column_name`	`str \| None`	Name of the new column. If unset, will create a name.	`...`
`index_name`	`str \| None`	Name of the new index. If unset, will create a name.	`...`
`inplace`	`bool`	If True, the flattening is done in-place. If False, a copy is made. Defaults to False.	`False`

Returns:

Type	Description
`DataFrame \| Series \| None`	pd.DataFrame \| pd.Series \| None: If inplace is False, returns the flattened DataFrame or Series. If inplace is True, returns None.

Source code in wt_ml/utils/pandas_utils.py

def flatten_levels(
    data: pd.DataFrame | pd.Series,
    column_reducer: None | ReducerType = None,
    index_reducer: None | ReducerType = None,
    column_name: str | None = ...,
    index_name: str | None = ...,
    inplace=False,
) -> pd.DataFrame | pd.Series | None:
    """
    Flattens the columns and/or index of a pandas DataFrame or Series using reducer functions.

    Args:
        data (pd.DataFrame | pd.Series): DataFrame or Series to flatten its MultiIndex index/columns.
        column_reducer (None | ReducerType, optional): The reducer function to apply to the columns.
            If None, the columns will not be flattened. Defaults to None.
        index_reducer (None | ReducerType, optional): The reducer function to apply to the index.
            If None, the index will not be flattened. Defaults to None.
        column_name (str | None, optional): Name of the new column. If unset, will create a name.
        index_name (str | None, optional): Name of the new index. If unset, will create a name.
        inplace (bool, optional): If True, the flattening is done in-place. If False, a copy is made.
            Defaults to False.

    Returns:
        pd.DataFrame | pd.Series | None: If inplace is False, returns the flattened DataFrame or Series.
            If inplace is True, returns None.
    """
    if inplace is False:
        data = data.copy()

    if isinstance(data, pd.DataFrame) and column_reducer and isinstance(data.columns, pd.MultiIndex):
        data.columns = flatten_multi_index(data.columns, column_reducer, column_name)

    if index_reducer and isinstance(data.index, pd.MultiIndex):
        data.index = flatten_multi_index(data.index, index_reducer, index_name)

    if inplace is False:
        return data

`flatten_multi_index(multi_index, reducer, name=...)`

Flattens a pandas MultiIndex into a single-level Index using a reducer function.

Parameters:

Name	Type	Description	Default
`multi_index`	`MultiIndex`	The pandas MultiIndex to flatten.	required
`reducer`	`ReducerType`	The reducer function to apply to each row of the MultiIndex. If a string is provided, it must be a key in the REDUCER_DICT, and the corresponding function will be used as the reducer.	required
`name`	`str \| None`	Name of the new Index. If None, will create a name. Defaults to None.	`...`

Returns:

Type	Description
`Index`	pd.Index: A single-level Index with flattened values.

Source code in wt_ml/utils/pandas_utils.py

def flatten_multi_index(multi_index: pd.MultiIndex, reducer: ReducerType, name: str | None = ...) -> pd.Index:
    """
    Flattens a pandas MultiIndex into a single-level Index using a reducer function.

    Args:
        multi_index (pd.MultiIndex): The pandas MultiIndex to flatten.
        reducer (ReducerType): The reducer function to apply to each row of the MultiIndex.
            If a string is provided, it must be a key in the REDUCER_DICT, and the corresponding
            function will be used as the reducer.
        name (str | None, optional): Name of the new Index. If None, will create a name. Defaults to None.

    Returns:
        pd.Index: A single-level Index with flattened values.
    """
    if isinstance(reducer, str):
        if reducer not in REDUCER_DICT:
            raise ValueError(f"Invalid reducer '{reducer}'. Accepted values: {REDUCER_DICT.keys()}")
        reducer = REDUCER_DICT[reducer]

    # NOTE: This has been memory and time profiled against reduce and other algorithms.
    # This was the fastest with least memory footprint.
    multi_index_list = multi_index.to_frame(index=False).astype(str).values.tolist()
    if name is ...:
        name = (
            None
            if all(n is None for n in multi_index.names)
            else reducer(str(n) for n in multi_index.names if n is not None)
        )

    str_index = pd.Index(map(reducer, multi_index_list), dtype=str, name=name, copy=False)
    return str_index

`unflatten_index(index, splitter, names=...)`

Unflattens a pandas str Index into a multi-level MultiIndex using a splitter function.

Parameters:

Name	Type	Description	Default
`index`	`MultiIndex`	The pandas Index to unflatten.	required
`splitter`	`SplitterType`	The splitter function to apply to each row of the Index. If a string is provided, it must be a key in the SPLITTER_DICT, and the corresponding function will be used as the splitter.	required
`names`	`list[str] \| None`	Name of the new MultiIndex. If None, will create names. Defaults to None.	`...`

Returns:

Type	Description
`MultiIndex`	pd.MultiIndex: A multi-level MultiIndex with unflattened values.

Source code in wt_ml/utils/pandas_utils.py

def unflatten_index(index: pd.Index, splitter: SplitterType, names: list[str] | None = ...) -> pd.MultiIndex:
    """
    Unflattens a pandas str Index into a multi-level MultiIndex using a splitter function.

    Args:
        index (pd.MultiIndex): The pandas Index to unflatten.
        splitter (SplitterType): The splitter function to apply to each row of the Index.
            If a string is provided, it must be a key in the SPLITTER_DICT, and the corresponding
            function will be used as the splitter.
        names (list[str] | None, optional): Name of the new MultiIndex. If None, will create names. Defaults to None.

    Returns:
        pd.MultiIndex: A multi-level MultiIndex with unflattened values.
    """
    if isinstance(splitter, str):
        if splitter not in SPLITTER_DICT:
            raise ValueError(f"Invalid reducer '{splitter}'. Accepted values: {SPLITTER_DICT.keys()}")
        splitter = SPLITTER_DICT[splitter]

    if names is ...:
        names = None if index.name is None else splitter(index.name)

    multi_index = pd.MultiIndex.from_tuples(index.map(splitter), names=names)
    return multi_index

`unflatten_levels(data, column_splitter=None, index_splitter=None, column_names=..., index_names=..., inplace=False)`

Unflatten the columns and/or index of a pandas DataFrame or Series using splitter functions.

Parameters:

Name	Type	Description	Default
`data`	`DataFrame \| Series`	DataFrame or Series to unflatten its MultiIndex index/columns.	required
`column_splitter`	`None \| SplitterType`	The splitter function to apply to the columns. If None, the columns will not be unflattened. Defaults to None.	`None`
`index_splitter`	`None \| SplitterType`	The splitter function to apply to the index. If None, the index will not be unflattened. Defaults to None.	`None`
`column_names`	`list[str] \| None`	New column names. If unset, will create names.	`...`
`index_names`	`list[str] \| None`	New index names. If unset, will create names.	`...`
`inplace`	`bool`	If True, the unflattening is done in-place. If False, a copy is made. Defaults to False.	`False`

Returns:

Type	Description
`DataFrame \| Series \| None`	pd.DataFrame \| pd.Series \| None: If inplace is False, returns the unflattened DataFrame or Series. If inplace is True, returns None.

Source code in wt_ml/utils/pandas_utils.py

def unflatten_levels(
    data: pd.DataFrame | pd.Series,
    column_splitter: None | SplitterType = None,
    index_splitter: None | SplitterType = None,
    column_names: list[str] | None = ...,
    index_names: list[str] | None = ...,
    inplace=False,
) -> pd.DataFrame | pd.Series | None:
    """
    Unflatten the columns and/or index of a pandas DataFrame or Series using splitter functions.

    Args:
        data (pd.DataFrame | pd.Series): DataFrame or Series to unflatten its MultiIndex index/columns.
        column_splitter (None | SplitterType, optional): The splitter function to apply to the columns.
            If None, the columns will not be unflattened. Defaults to None.
        index_splitter (None | SplitterType, optional): The splitter function to apply to the index.
            If None, the index will not be unflattened. Defaults to None.
        column_names (list[str] | None, optional): New column names. If unset, will create names.
        index_names (list[str] | None, optional): New index names. If unset, will create names.
        inplace (bool, optional): If True, the unflattening is done in-place. If False, a copy is made.
            Defaults to False.

    Returns:
        pd.DataFrame | pd.Series | None: If inplace is False, returns the unflattened DataFrame or Series.
            If inplace is True, returns None.
    """
    if inplace is False:
        data = data.copy()

    if isinstance(data, pd.DataFrame) and column_splitter and isinstance(data.columns, pd.Index):
        data.columns = unflatten_index(data.columns, column_splitter, column_names)

    if index_splitter and isinstance(data.index, pd.Index):
        data.index = unflatten_index(data.index, index_splitter, index_names)

    if inplace is False:
        return data