flatten_levels(data, column_reducer=None, index_reducer=None, column_name=..., index_name=..., inplace=False)

Flattens the columns and/or index of a pandas DataFrame or Series using reducer functions.

Parameters:

Name Type Description Default
data DataFrame | Series

DataFrame or Series to flatten its MultiIndex index/columns.

required
column_reducer None | ReducerType

The reducer function to apply to the columns. If None, the columns will not be flattened. Defaults to None.

None
index_reducer None | ReducerType

The reducer function to apply to the index. If None, the index will not be flattened. Defaults to None.

None
column_name str | None

Name of the new column. If unset, will create a name.

...
index_name str | None

Name of the new index. If unset, will create a name.

...
inplace bool

If True, the flattening is done in-place. If False, a copy is made. Defaults to False.

False

Returns:

Type Description
DataFrame | Series | None

pd.DataFrame | pd.Series | None: If inplace is False, returns the flattened DataFrame or Series. If inplace is True, returns None.

Source code in wt_ml/utils/pandas_utils.py
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
def flatten_levels(
    data: pd.DataFrame | pd.Series,
    column_reducer: None | ReducerType = None,
    index_reducer: None | ReducerType = None,
    column_name: str | None = ...,
    index_name: str | None = ...,
    inplace=False,
) -> pd.DataFrame | pd.Series | None:
    """
    Flattens the columns and/or index of a pandas DataFrame or Series using reducer functions.

    Args:
        data (pd.DataFrame | pd.Series): DataFrame or Series to flatten its MultiIndex index/columns.
        column_reducer (None | ReducerType, optional): The reducer function to apply to the columns.
            If None, the columns will not be flattened. Defaults to None.
        index_reducer (None | ReducerType, optional): The reducer function to apply to the index.
            If None, the index will not be flattened. Defaults to None.
        column_name (str | None, optional): Name of the new column. If unset, will create a name.
        index_name (str | None, optional): Name of the new index. If unset, will create a name.
        inplace (bool, optional): If True, the flattening is done in-place. If False, a copy is made.
            Defaults to False.

    Returns:
        pd.DataFrame | pd.Series | None: If inplace is False, returns the flattened DataFrame or Series.
            If inplace is True, returns None.
    """
    if inplace is False:
        data = data.copy()

    if isinstance(data, pd.DataFrame) and column_reducer and isinstance(data.columns, pd.MultiIndex):
        data.columns = flatten_multi_index(data.columns, column_reducer, column_name)

    if index_reducer and isinstance(data.index, pd.MultiIndex):
        data.index = flatten_multi_index(data.index, index_reducer, index_name)

    if inplace is False:
        return data

flatten_multi_index(multi_index, reducer, name=...)

Flattens a pandas MultiIndex into a single-level Index using a reducer function.

Parameters:

Name Type Description Default
multi_index MultiIndex

The pandas MultiIndex to flatten.

required
reducer ReducerType

The reducer function to apply to each row of the MultiIndex. If a string is provided, it must be a key in the REDUCER_DICT, and the corresponding function will be used as the reducer.

required
name str | None

Name of the new Index. If None, will create a name. Defaults to None.

...

Returns:

Type Description
Index

pd.Index: A single-level Index with flattened values.

Source code in wt_ml/utils/pandas_utils.py
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
def flatten_multi_index(multi_index: pd.MultiIndex, reducer: ReducerType, name: str | None = ...) -> pd.Index:
    """
    Flattens a pandas MultiIndex into a single-level Index using a reducer function.

    Args:
        multi_index (pd.MultiIndex): The pandas MultiIndex to flatten.
        reducer (ReducerType): The reducer function to apply to each row of the MultiIndex.
            If a string is provided, it must be a key in the REDUCER_DICT, and the corresponding
            function will be used as the reducer.
        name (str | None, optional): Name of the new Index. If None, will create a name. Defaults to None.

    Returns:
        pd.Index: A single-level Index with flattened values.
    """
    if isinstance(reducer, str):
        if reducer not in REDUCER_DICT:
            raise ValueError(f"Invalid reducer '{reducer}'. Accepted values: {REDUCER_DICT.keys()}")
        reducer = REDUCER_DICT[reducer]

    # NOTE: This has been memory and time profiled against reduce and other algorithms.
    # This was the fastest with least memory footprint.
    multi_index_list = multi_index.to_frame(index=False).astype(str).values.tolist()
    if name is ...:
        name = (
            None
            if all(n is None for n in multi_index.names)
            else reducer(str(n) for n in multi_index.names if n is not None)
        )

    str_index = pd.Index(map(reducer, multi_index_list), dtype=str, name=name, copy=False)
    return str_index

unflatten_index(index, splitter, names=...)

Unflattens a pandas str Index into a multi-level MultiIndex using a splitter function.

Parameters:

Name Type Description Default
index MultiIndex

The pandas Index to unflatten.

required
splitter SplitterType

The splitter function to apply to each row of the Index. If a string is provided, it must be a key in the SPLITTER_DICT, and the corresponding function will be used as the splitter.

required
names list[str] | None

Name of the new MultiIndex. If None, will create names. Defaults to None.

...

Returns:

Type Description
MultiIndex

pd.MultiIndex: A multi-level MultiIndex with unflattened values.

Source code in wt_ml/utils/pandas_utils.py
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
def unflatten_index(index: pd.Index, splitter: SplitterType, names: list[str] | None = ...) -> pd.MultiIndex:
    """
    Unflattens a pandas str Index into a multi-level MultiIndex using a splitter function.

    Args:
        index (pd.MultiIndex): The pandas Index to unflatten.
        splitter (SplitterType): The splitter function to apply to each row of the Index.
            If a string is provided, it must be a key in the SPLITTER_DICT, and the corresponding
            function will be used as the splitter.
        names (list[str] | None, optional): Name of the new MultiIndex. If None, will create names. Defaults to None.

    Returns:
        pd.MultiIndex: A multi-level MultiIndex with unflattened values.
    """
    if isinstance(splitter, str):
        if splitter not in SPLITTER_DICT:
            raise ValueError(f"Invalid reducer '{splitter}'. Accepted values: {SPLITTER_DICT.keys()}")
        splitter = SPLITTER_DICT[splitter]

    if names is ...:
        names = None if index.name is None else splitter(index.name)

    multi_index = pd.MultiIndex.from_tuples(index.map(splitter), names=names)
    return multi_index

unflatten_levels(data, column_splitter=None, index_splitter=None, column_names=..., index_names=..., inplace=False)

Unflatten the columns and/or index of a pandas DataFrame or Series using splitter functions.

Parameters:

Name Type Description Default
data DataFrame | Series

DataFrame or Series to unflatten its MultiIndex index/columns.

required
column_splitter None | SplitterType

The splitter function to apply to the columns. If None, the columns will not be unflattened. Defaults to None.

None
index_splitter None | SplitterType

The splitter function to apply to the index. If None, the index will not be unflattened. Defaults to None.

None
column_names list[str] | None

New column names. If unset, will create names.

...
index_names list[str] | None

New index names. If unset, will create names.

...
inplace bool

If True, the unflattening is done in-place. If False, a copy is made. Defaults to False.

False

Returns:

Type Description
DataFrame | Series | None

pd.DataFrame | pd.Series | None: If inplace is False, returns the unflattened DataFrame or Series. If inplace is True, returns None.

Source code in wt_ml/utils/pandas_utils.py
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
def unflatten_levels(
    data: pd.DataFrame | pd.Series,
    column_splitter: None | SplitterType = None,
    index_splitter: None | SplitterType = None,
    column_names: list[str] | None = ...,
    index_names: list[str] | None = ...,
    inplace=False,
) -> pd.DataFrame | pd.Series | None:
    """
    Unflatten the columns and/or index of a pandas DataFrame or Series using splitter functions.

    Args:
        data (pd.DataFrame | pd.Series): DataFrame or Series to unflatten its MultiIndex index/columns.
        column_splitter (None | SplitterType, optional): The splitter function to apply to the columns.
            If None, the columns will not be unflattened. Defaults to None.
        index_splitter (None | SplitterType, optional): The splitter function to apply to the index.
            If None, the index will not be unflattened. Defaults to None.
        column_names (list[str] | None, optional): New column names. If unset, will create names.
        index_names (list[str] | None, optional): New index names. If unset, will create names.
        inplace (bool, optional): If True, the unflattening is done in-place. If False, a copy is made.
            Defaults to False.

    Returns:
        pd.DataFrame | pd.Series | None: If inplace is False, returns the unflattened DataFrame or Series.
            If inplace is True, returns None.
    """
    if inplace is False:
        data = data.copy()

    if isinstance(data, pd.DataFrame) and column_splitter and isinstance(data.columns, pd.Index):
        data.columns = unflatten_index(data.columns, column_splitter, column_names)

    if index_splitter and isinstance(data.index, pd.Index):
        data.index = unflatten_index(data.index, index_splitter, index_names)

    if inplace is False:
        return data