Df wrapper - WatchTower Documentation

`Ext`

Source code in wt_ml/dataset/df_wrapper.py

@pd.api.extensions.register_dataframe_accessor("ext")
class Ext:
    def __init__(self, pandas_obj):
        self._obj = pandas_obj
        if not hasattr(self._obj, "attrs"):
            # NOTE: this is a pandas experimental feature. Making sure it exists always in subsequent versions
            self._obj.attrs = {}
        self._obj.attrs["cache_len"] = -1

    @property
    def shape_nd(self):
        if hasattr(self._obj.index, "levshape"):
            idx_shape = self._obj.index.levshape
        else:
            idx_shape = self._obj.index.shape
        if hasattr(self._obj.columns, "levshape"):
            cols = self._obj.columns
            col_shape = [cols.get_level_values(i).nunique() for i in range(len(cols.levshape))]
        else:
            col_shape = self._obj.columns.shape
        return (*idx_shape, *col_shape)

    @property
    def shape_nd_1(self):
        """
        A useful property to expand the dimensions for shape_nd which can be used in model placeholders.
        It assumes that, every dataframe must have a "sim" column in it.
        If it has a "sim" column, it will return shape_nd.
        Else, it will expand shape_nd to include a "sim" dimension. This way it makes the placeholder shapes consistent.
        """
        shape_nd = self.shape_nd
        if "sim" not in self._obj.columns.names:
            # expands to include "sim" dimension
            shape_nd = list(shape_nd) + [1]
        return shape_nd

    @property
    def shape_1(self):
        full_shape = list(self._obj.shape)
        if "sim" not in self._obj.columns.names:
            full_shape.append(1)
        return full_shape

    @property
    def values_nd(self):
        return self._obj.values.reshape(self.shape_nd)

    @property
    def values_nd_1(self):
        """
        A useful property to expand the dimensions for values_nd which can be used in model placeholders.
        It assumes that, every dataframe must have a "sim" column in it.
        If it has a "sim" column, it will return values_nd.
        Else, it will expand values_nd to include a "sim" dimension. This way it makes the placeholder
        shapes consistent.
        """
        if "sim" in self._obj.columns.names:
            return self.values_nd
        else:
            return np.expand_dims(self.values_nd, -1)

    @property
    def values_1(self):
        """
        Adds a sim axis to `dataframe.values`
        """
        full_shape = self.shape_nd_1
        return self._obj.values.reshape(full_shape[0], -1, full_shape[-1])

    @property
    def name(self):
        """The name of the dataset"""
        if hasattr(self._obj, "attrs") and "dataset_name" in self._obj.attrs:
            return self._obj.attrs["dataset_name"]
        return None  # default no name

    @name.setter
    def name(self, value):
        """
        We can save the dataset name into the `attrs` attribute of pandas.
        This is a debug feature which allows us to know which dataset was loaded in the variable.
        NOTE: `attrs` is an experimental feature, meaning it could be removed in subsequent versions of pandas.
        The `attrs` attribute, ensures that the `attrs` dictionary are passed through subsequent pandas chained
        functions. Find more info here,
        https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.DataFrame.attrs.html

        Args:
            value (str): The name of the dataset.
        """
        if not hasattr(self._obj, "attrs"):
            # NOTE: this is a pandas experimental feature. Making sure it exists always in subsequent versions
            self._obj.attrs = {}
        self._obj.attrs["dataset_name"] = value

    @property
    def freq(self) -> str:
        """Returns the frequency of the dataframe based on date columns/index.

        Raises:
            AttributeError: Will be raised if date or timedesc is not found in column/index

        Returns:
            str: The frequency. 'W-SAT': weekly frequency. 'M': monthly frequency.
        """
        df = self._obj
        if hasattr(self._obj, "attrs") and "freq" in df.attrs and len(df) == df.attrs.get("cache_len", -1):
            return self._obj.attrs["freq"]
        if not hasattr(self._obj, "attrs"):
            # NOTE: this is a pandas experimental feature. Making sure it exists always in subsequent versions
            self._obj.attrs = {}
        self._obj.attrs["cache_len"] = len(df)
        found_date = False
        if df.index.nlevels > 1:
            for i, lev in enumerate(df.index.levels):
                if type(lev) is pd.DatetimeIndex:
                    date = pd.Series(lev.sort_values().unique(), name="date")
                    found_date = True
                    break
        if not found_date:
            if type(df.index) is pd.DatetimeIndex:
                date = df.index.to_series(name="date")
            elif "date" in df.columns:
                date = df["date"]
                date = pd.Series(date.sort_values().unique(), name="date")
            elif "timedesc" in df.columns:
                date = df["timedesc"]
                date = pd.Series(date.sort_values().unique(), name="date")
            else:
                raise AttributeError("DataFrame doesn't have date column.")
        # difference should be 7 if freq is in weeks. here we take 10 just incase if there are missing dates
        freq = {True: "W-SAT", False: "M"}[date.diff().median().days <= 10]
        self._obj.attrs["freq"] = freq
        return freq

`freq: str` `property`

Returns the frequency of the dataframe based on date columns/index.

Raises:

Type	Description
`AttributeError`	Will be raised if date or timedesc is not found in column/index

Returns:

Name	Type	Description
`str`	`str`	The frequency. 'W-SAT': weekly frequency. 'M': monthly frequency.

`name` `property` `writable`

The name of the dataset

`shape_nd_1` `property`

A useful property to expand the dimensions for shape_nd which can be used in model placeholders. It assumes that, every dataframe must have a "sim" column in it. If it has a "sim" column, it will return shape_nd. Else, it will expand shape_nd to include a "sim" dimension. This way it makes the placeholder shapes consistent.

`values_1` `property`

Adds a sim axis to dataframe.values

`values_nd_1` `property`

A useful property to expand the dimensions for values_nd which can be used in model placeholders. It assumes that, every dataframe must have a "sim" column in it. If it has a "sim" column, it will return values_nd. Else, it will expand values_nd to include a "sim" dimension. This way it makes the placeholder shapes consistent.

Ext

freq: str property

name property writable

shape_nd_1 property