build_allocations(df, agg_lvl, start_date='2023-01-01', end_date='2023-12-31', drop_zero=False)

Build a series where groupby(agg_lvl).sum() = 1.0 or 0.0 because it represent a percentage allocation

Source code in wt_ml/dataset/dma_utils.py
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
def build_allocations(
    df: pd.DataFrame | pd.Series,
    agg_lvl: list[str],
    start_date: str | None = "2023-01-01",
    end_date: str | None = "2023-12-31",
    drop_zero: bool = False,
) -> pd.Series:
    """Build a series where groupby(agg_lvl).sum() = 1.0 or 0.0 because it represent a percentage allocation"""
    if isinstance(df, pd.DataFrame):
        assert df.index.name in ["time", "date"], "if you pass a dataframe to build_allocations, the index must be time"
        df = df.loc[start_date:end_date].sum(0)
    agg_lvl = [lvl for lvl in agg_lvl if lvl in df.index.names]
    out_series = (df / df.groupby(agg_lvl).transform("sum")).replace([np.inf, -np.inf], np.nan).fillna(0.0)
    if drop_zero:
        out_series = out_series[out_series > 0.0]
    return out_series

map_midx(df, lvl_name, lvl_map, axis=1, name=None)

Modify a multiindex of a pandas object to have new values via mapping different names on a level

Parameters:

Name Type Description Default
df PandasObject

object in which to modify the multi index

required
lvl_name str

level name of the multi index to modify

required
lvl_map dict[str, str]

specification of how to change the names of values in the level of the mult index

required
axis int

If 1, modify columns, if 0 modify index. Defaults to 1.

1
name str

if not None, this will insert a new level of this name above the level with level_name. used for things like parent_signal and converting state to region.

None

Returns:

Name Type Description
PandasObject PandasObject

Copy of original df object but with the corresponding updated multi index

Source code in wt_ml/dataset/dma_utils.py
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
def map_midx(
    df: PandasObject,
    lvl_name: str,
    lvl_map: dict[str, str],
    axis: int = 1,
    name: str | None = None,
) -> PandasObject:
    """Modify a multiindex of a pandas object to have new values via mapping different names on a level

    Args:
        df (PandasObject): object in which to modify the multi index
        lvl_name (str): level name of the multi index to modify
        lvl_map (dict[str, str]): specification of how to change the names of values in the level of the mult index
        axis (int, optional): If 1, modify columns, if 0 modify index. Defaults to 1.
        name (str, optional): if not None, this will insert a new level of this name above the level with level_name.
                                used for things like parent_signal and converting state to region.

    Returns:
        PandasObject: Copy of original `df` object but with the corresponding updated multi index
    """
    index = df.columns if axis == 1 else df.index
    assert lvl_name in index.names
    idx_arrays = [
        (index.get_level_values(name) if name != lvl_name else index.get_level_values(name).map(lvl_map))
        for name in index.names
    ]
    new_names = index.names.copy()
    if name is not None:
        idx_arrays.insert(index.names.index(lvl_name) + 1, index.get_level_values(lvl_name))
        new_names.insert(index.names.index(lvl_name), name)
    new_index = pd.MultiIndex.from_arrays(idx_arrays, names=new_names)
    df = df.copy()
    if axis == 1:
        df.columns = new_index
    else:
        df.index = new_index
    return df