Layers - WatchTower Documentation

`AllUniqueError`

Bases: ValueError

When there are no hierchical columns because everything is unique within it.

Source code in wt_ml/layers/hier_embedding.py

class AllUniqueError(ValueError):
    """When there are no hierchical columns because everything is unique within it."""

`BetaGammaDecay`

Bases: Module

Class to learn decayed impacts for the ensuing time periods after a spend in a media vehicle

Source code in wt_ml/layers/beta_gamma_decay.py

class BetaGammaDecay(Module):
    """Class to learn decayed impacts for the ensuing time periods after a spend in a media vehicle"""

    def __init__(
        self,
        encodings: dict[str, Any],
        hierarchy_categories: list[str | list[str]] | None = None,
        hyperparameters: Hyperparams | None = None,
        name: str | None = None,
    ):
        """Creates a betagammadecay object to learn decayed impacts using beta and gamma parameters.

        Args:
            hierarchy (pd.DataFrame): The hierarchy that the impact learns on.
            hyperparameters (Hyperparams | None, optional): Dictionary of hyperparameters for buidling this layer.
                                                            Defaults to None.
            name (str | None, optional): Name of the layer. Defaults to None.
        """
        super().__init__(hyperparameters=hyperparameters, name=name)
        self.encodings = encodings
        self.hierarchy_categories = hierarchy_categories

    def build(self, input_shapes):  # noqa: U100
        """Build the layer parameters needed for calculating decays.

        Args:
            input_shapes (InputShapes): The effect and hierarchy shapes.
        """
        self.gamma_min = self.hyperparameters.get_float(
            "gamma_min",
            default=0.01,
            min=0.00,
            max=1.0,
            help="The minimum possible value to learn for the exponential decay factor.",
        )
        self.gamma_max = self.hyperparameters.get_float(
            "gamma_max",
            default=1.0,
            min=self.gamma_min,
            max=1.0,
            help="The maximum possible value to learn for the exponential decay factor.",
        )
        self.beta_min = self.hyperparameters.get_float(
            "beta_min",
            default=0.01,
            min=0.00,
            max=1.0,
            help="The minimum possible value to learn for the first step of the decay.",
        )
        self.beta_max = self.hyperparameters.get_float(
            "beta_max",
            default=1.0,
            min=self.beta_min,
            max=1.0,
            help="The maximum possible value to learn for the first step of the decay.",
        )
        self.betagamma_emb_layer = self.hyperparameters.get_submodule(
            name="betagamma_hier",
            module_type=HierchicalEmbedding,
            kwargs=dict(
                encodings=self.encodings,
                columns=self.hierarchy_categories,
                dropped_columns=[],
                shape=[2],
                feature_names=["beta", "gamma"],
            ),
            help="The embedding layer for the decay parameters.",
        )

    @property
    def gamma_range(self) -> float:
        return self.gamma_max - self.gamma_min

    @property
    def beta_range(self) -> float:
        return self.beta_max - self.beta_min

    def __call__(
        self, batch: BetaGammaDecayInput, training: bool = False, debug: bool = False, skip_metrics: bool = False
    ) -> BetaGammaDecayIntermediaries:
        """Calculate decays, total impacts using the learned beta gamma parameters

        Args:
            impact_by_signal_instant (TensorLike): Instant impacts
            hierarchy (dict[str, TensorLike]): Hierarchy placeholder for Hierarchial embedding variable.
            training (bool, optional): Whether this is a training or inference run. Defaults to False.

        Returns:
            BetaGammaDecayIntermediaries: Intermediate calculations for beta gamma decay - beta, gamma, impacts etc.
        """
        # batch x vehicles x 2
        betagamma_emb = self.betagamma_emb_layer(
            batch.hierarchy, training=training, skip_metrics=skip_metrics, debug=debug
        )
        beta_emb, gamma_emb = tf.unstack(betagamma_emb, num=2, axis=2)
        if self.gamma_max < 1:
            gamma = 1.0 - 1.0 / (
                transform_softbounded(
                    gamma_emb,
                    max_val=3.0,
                    min_val=-3,
                    name="gamma",
                    add_loss=self.add_loss,
                    mult=0.1,
                    enabled=not skip_metrics,
                )
                * (1 / (1 - self.gamma_max) - 1 / (1 - self.gamma_min))
                + 1 / (1 - self.gamma_min)
            )
        else:
            gamma = 1.0 - 1.0 / (softplus(gamma_emb) + 1 / (1 - self.gamma_min))
        # batch x vehicles
        beta = (
            transform_softbounded(
                beta_emb,
                max_val=3.0,
                min_val=-3,
                name="beta",
                mult=0.1,
                add_loss=self.add_loss,
                enabled=not skip_metrics,
            )
            * self.beta_range
            + self.beta_min
        )
        # batch x 1 x vehicle
        decayed_impact_mult = tf.expand_dims(1 + beta * (1 - tf.math.pow(gamma, DECAY_LENGTH)) / (1 - gamma), 1)
        impact_by_signal_total = batch.impact_by_signal_instant * decayed_impact_mult
        impact_by_signal_decayed = exp_moving_avg(
            batch.impact_by_signal_instant,
            beta,
            gamma,
            name="impact_by_signal_decayed",
            decay_length=DECAY_LENGTH,
        )
        impact = tf.math.reduce_sum(impact_by_signal_decayed, axis=2)
        return BetaGammaDecayIntermediaries(
            beta_emb=beta_emb if debug else None,
            gamma_emb=gamma_emb if debug else None,
            beta=beta,
            gamma=gamma,
            decayed_impact_mult=decayed_impact_mult,
            impact_by_signal_total=impact_by_signal_total,
            impact_by_signal=impact_by_signal_decayed,
            impact=impact,
            signal_names=tf.gather(
                tf.convert_to_tensor(tuple(f"{vehicle}_decayed" for vehicle in get_lookups(self.encodings["vehicle"]))),
                batch.hierarchy["vehicle"][0],
            ),
        )

`call(batch, training=False, debug=False, skip_metrics=False)`

Calculate decays, total impacts using the learned beta gamma parameters

Parameters:

Name	Type	Description	Default
`impact_by_signal_instant`	`TensorLike`	Instant impacts	required
`hierarchy`	`dict[str, TensorLike]`	Hierarchy placeholder for Hierarchial embedding variable.	required
`training`	`bool`	Whether this is a training or inference run. Defaults to False.	`False`

Returns:

Name	Type	Description
`BetaGammaDecayIntermediaries`	`BetaGammaDecayIntermediaries`	Intermediate calculations for beta gamma decay - beta, gamma, impacts etc.

Source code in wt_ml/layers/beta_gamma_decay.py

def __call__(
    self, batch: BetaGammaDecayInput, training: bool = False, debug: bool = False, skip_metrics: bool = False
) -> BetaGammaDecayIntermediaries:
    """Calculate decays, total impacts using the learned beta gamma parameters

    Args:
        impact_by_signal_instant (TensorLike): Instant impacts
        hierarchy (dict[str, TensorLike]): Hierarchy placeholder for Hierarchial embedding variable.
        training (bool, optional): Whether this is a training or inference run. Defaults to False.

    Returns:
        BetaGammaDecayIntermediaries: Intermediate calculations for beta gamma decay - beta, gamma, impacts etc.
    """
    # batch x vehicles x 2
    betagamma_emb = self.betagamma_emb_layer(
        batch.hierarchy, training=training, skip_metrics=skip_metrics, debug=debug
    )
    beta_emb, gamma_emb = tf.unstack(betagamma_emb, num=2, axis=2)
    if self.gamma_max < 1:
        gamma = 1.0 - 1.0 / (
            transform_softbounded(
                gamma_emb,
                max_val=3.0,
                min_val=-3,
                name="gamma",
                add_loss=self.add_loss,
                mult=0.1,
                enabled=not skip_metrics,
            )
            * (1 / (1 - self.gamma_max) - 1 / (1 - self.gamma_min))
            + 1 / (1 - self.gamma_min)
        )
    else:
        gamma = 1.0 - 1.0 / (softplus(gamma_emb) + 1 / (1 - self.gamma_min))
    # batch x vehicles
    beta = (
        transform_softbounded(
            beta_emb,
            max_val=3.0,
            min_val=-3,
            name="beta",
            mult=0.1,
            add_loss=self.add_loss,
            enabled=not skip_metrics,
        )
        * self.beta_range
        + self.beta_min
    )
    # batch x 1 x vehicle
    decayed_impact_mult = tf.expand_dims(1 + beta * (1 - tf.math.pow(gamma, DECAY_LENGTH)) / (1 - gamma), 1)
    impact_by_signal_total = batch.impact_by_signal_instant * decayed_impact_mult
    impact_by_signal_decayed = exp_moving_avg(
        batch.impact_by_signal_instant,
        beta,
        gamma,
        name="impact_by_signal_decayed",
        decay_length=DECAY_LENGTH,
    )
    impact = tf.math.reduce_sum(impact_by_signal_decayed, axis=2)
    return BetaGammaDecayIntermediaries(
        beta_emb=beta_emb if debug else None,
        gamma_emb=gamma_emb if debug else None,
        beta=beta,
        gamma=gamma,
        decayed_impact_mult=decayed_impact_mult,
        impact_by_signal_total=impact_by_signal_total,
        impact_by_signal=impact_by_signal_decayed,
        impact=impact,
        signal_names=tf.gather(
            tf.convert_to_tensor(tuple(f"{vehicle}_decayed" for vehicle in get_lookups(self.encodings["vehicle"]))),
            batch.hierarchy["vehicle"][0],
        ),
    )

`init(encodings, hierarchy_categories=None, hyperparameters=None, name=None)`

Creates a betagammadecay object to learn decayed impacts using beta and gamma parameters.

Parameters:

Name	Type	Description	Default
`hierarchy`	`DataFrame`	The hierarchy that the impact learns on.	required
`hyperparameters`	`Hyperparams \| None`	Dictionary of hyperparameters for buidling this layer. Defaults to None.	`None`
`name`	`str \| None`	Name of the layer. Defaults to None.	`None`

Source code in wt_ml/layers/beta_gamma_decay.py

def __init__(
    self,
    encodings: dict[str, Any],
    hierarchy_categories: list[str | list[str]] | None = None,
    hyperparameters: Hyperparams | None = None,
    name: str | None = None,
):
    """Creates a betagammadecay object to learn decayed impacts using beta and gamma parameters.

    Args:
        hierarchy (pd.DataFrame): The hierarchy that the impact learns on.
        hyperparameters (Hyperparams | None, optional): Dictionary of hyperparameters for buidling this layer.
                                                        Defaults to None.
        name (str | None, optional): Name of the layer. Defaults to None.
    """
    super().__init__(hyperparameters=hyperparameters, name=name)
    self.encodings = encodings
    self.hierarchy_categories = hierarchy_categories

`build(input_shapes)`

Build the layer parameters needed for calculating decays.

Parameters:

Name	Type	Description	Default
`input_shapes`	`InputShapes`	The effect and hierarchy shapes.	required

Source code in wt_ml/layers/beta_gamma_decay.py

def build(self, input_shapes):  # noqa: U100
    """Build the layer parameters needed for calculating decays.

    Args:
        input_shapes (InputShapes): The effect and hierarchy shapes.
    """
    self.gamma_min = self.hyperparameters.get_float(
        "gamma_min",
        default=0.01,
        min=0.00,
        max=1.0,
        help="The minimum possible value to learn for the exponential decay factor.",
    )
    self.gamma_max = self.hyperparameters.get_float(
        "gamma_max",
        default=1.0,
        min=self.gamma_min,
        max=1.0,
        help="The maximum possible value to learn for the exponential decay factor.",
    )
    self.beta_min = self.hyperparameters.get_float(
        "beta_min",
        default=0.01,
        min=0.00,
        max=1.0,
        help="The minimum possible value to learn for the first step of the decay.",
    )
    self.beta_max = self.hyperparameters.get_float(
        "beta_max",
        default=1.0,
        min=self.beta_min,
        max=1.0,
        help="The maximum possible value to learn for the first step of the decay.",
    )
    self.betagamma_emb_layer = self.hyperparameters.get_submodule(
        name="betagamma_hier",
        module_type=HierchicalEmbedding,
        kwargs=dict(
            encodings=self.encodings,
            columns=self.hierarchy_categories,
            dropped_columns=[],
            shape=[2],
            feature_names=["beta", "gamma"],
        ),
        help="The embedding layer for the decay parameters.",
    )

`HierchicalEmbedding`

Bases: Module

Hierarchical Embedding creates embeddings for a layer with different input hierarchy levels as trainable weights such that the deviations from the expected deviations are penalized. These trained embeddings are used to calculate the model parameters for a layer.

Source code in wt_ml/layers/hier_embedding.py

class HierchicalEmbedding(Module):
    """Hierarchical Embedding creates embeddings for a layer with different input hierarchy levels
    as trainable weights such that the deviations from the expected deviations are penalized.
    These trained embeddings are used to calculate the model parameters for a layer.
    """

    def __init__(
        self,
        shape: list[int],
        encodings: dict[str, Any],
        columns: list[str | list[str]] | None = None,
        use_bias: bool = True,
        dropped_columns=[],
        initializer: Initializer = 0.0,
        bias_initializer: Initializer = 0.0,
        hyperparameters: Hyperparams | None = None,
        feature_names: list[list[str]] | list[str] | None = None,
        name: str | None = None,
        increase_lr: float | None = None,
    ):
        """Initializes the hierarchical embedding object with hierarchy levels, parameter shape
        and other initializers.

        Args:
            shape (list[int]): Desired dimensions of model parameters only within final result.
            hierarchy (pd.DataFrame): The hierarchy for which embeddings are trained.
            columns (list[str  |  list[str]] | None, optional): Hierarchy levels to learn embeddings. Defaults to None.
            use_bias (bool, optional): Whether to include bias. Defaults to True.
            dropped_columns (list, optional): Columns to exclude in hierarchy.
                                        Defaults to ["granular", "region", "coastal", "populationdensity", "medianage"].
            initializer (Initializer, optional): Initializer for embeddings(weights). Defaults to 0.0.
            bias_initializer (Initializer, optional): Initializer for bias. Defaults to 0.0.
            hyperparameters (Hyperparams | None, optional): Dictionary of hyperparameters for buidling this layer.
                                                            Defaults to None.
            name (str | None, optional): Name of the layer. Defaults to None.
        """
        super().__init__(hyperparameters=hyperparameters, name=name)
        self.use_bias = use_bias
        encodings_dropped = {k: v for k, v in encodings.items() if k not in dropped_columns}
        assert encodings_dropped, "No cols in hierarchy."
        self.encodings = encodings_dropped
        self.shape = shape
        self.increase_lr = increase_lr
        self.initializer = initializer
        self.bias_initializer = bias_initializer
        self.created_reg = False
        self.feature_names = feature_names
        if self.feature_names is not None and isinstance(self.feature_names[0], list):
            self.feature_names = [elem for sublist in self.feature_names for elem in sublist]
        if columns is not None:
            self._process_columns(columns)
        else:
            self.columns = list(self.encodings.keys())
            self.used_cols = set(self.columns)

    def _process_columns(self, columns: list[str | list[str]]):
        """
        Process columns and remove duplicates or column(s) which has unique hierarchies.
        Sets `columns` and `used_cols` attribute.

        Args:
            columns (list[str  |  list[str]]): Hierarchy levels to learn embeddings.
        """
        used_cols = set(tf.nest.flatten(columns))
        missing_cols = used_cols.difference(self.encodings.keys())
        assert not missing_cols, f"Column(s) passed not in hierarchy. {missing_cols}"

        issues: list[str] = []
        new_columns: list[str | tuple[str]] = []
        for column in columns:
            if isinstance(column, (list, tuple)):
                new_column = []
                for sub_col in column:
                    encodings = self.encodings[sub_col]
                    if encodings == "continuous" or len(encodings) > 1:
                        new_column.append(sub_col)
                    else:
                        issues.append(f"{sub_col} in {column} has single encoding.")

                if len(new_column) == 1:
                    # convert to str so duplicates can be detected easily.
                    new_columns.append(new_column[0])
                elif len(new_column) > 1:
                    new_columns.append(tuple(new_column))
                else:
                    issues.append(f"Dropping {column} as it has unique encodings.")
            else:
                encodings = self.encodings[column]
                if encodings == "continuous" or len(encodings) > 1:
                    new_columns.append(column)
                else:
                    issues.append(f"Dropping {column} as it has single encoding.")

        org_col_len = len(new_columns)
        # if any column is duplicated, we need to get rid of it.
        new_columns = list(dict.fromkeys(new_columns))
        assert len(new_columns), "All columns are dopped since they are all unique."
        if len(new_columns) != org_col_len:
            issues.append("Duplicate hierarchies removed.")

        if issues:
            warn_issues(self.name, issues, new_columns, columns)

        self.columns = new_columns
        self.used_cols = set(tf.nest.flatten(self.columns))

    def build(self, input_shapes):  # noqa: U100
        """Builds hyperparamters, deviations, embeddings(weights), bias and other intermediate variables.

        Args:
            input_shapes (InputShapes): The effect and hierarchy shapes.

        Raises:
            AllUniqueError: When there are no hierchical columns because everything is unique within it.
        """
        self.use_l2_squared = self.hyperparameters.get_bool(
            "use_l2_squared",
            default=False,
            help="Use the l2 norm to the fourth power instead of only using it for large values for stability.",
        )
        self.desired_stddev = self.hyperparameters.get_float(
            "desired_stddev",
            default=0.10,
            min=0.01,
            max=100.0,
            help="The desired maximum value for the stddev along the full hierarchy.",
        )
        self.use_inv_sqrt = self.hyperparameters.get_bool(
            "use_inv_sqrt",
            default=True,
            help="Scale the stddev for each category by the inverse square root of the number of unique values.",
        )

        if self.use_bias:
            self.reg_bias = self.hyperparameters.get_float(
                "reg_bias",
                default=0.0,
                min=0.0,
                max=1e4,
                help="The strength of l2 regularization to apply to the bias term.",
            )
        self.offsets = {}
        self.col_counts = {
            k: (
                self.encodings[k]
                if isinstance(self.encodings[k], (float, int))
                else ((max(self.encodings[k].values()) + 1) if not isinstance(self.encodings[k], str) else 1)
            )
            for k in tf.nest.flatten(self.columns)
        }
        var_counts = [
            self.col_counts[col] if isinstance(col, str) else np.prod([self.col_counts[k] for k in col])
            for col in self.columns
        ]

        self.columns = [col for col, count in zip(self.columns, var_counts) if count > 1 or self.is_continuous(col)]
        count = 0
        desired_stddevs = []
        # Scatters is the inverse of gathering from num_regularized_categories + 1 to count of weights
        self.scatters = []
        self.penalty_mults = []
        multipliers = []
        reg_counts = []
        flattened = int(np.prod(self.shape)) if len(self.shape) > 0 else 1
        for col_names in self.columns:
            if isinstance(col_names, str):
                col_names = [col_names]
            cat_cols = [col for col in col_names if self.encodings[col] != "continuous"]
            name = self.stitched_cols(col_names)
            num_cont_cols = len(col_names) - len(cat_cols)
            if num_cont_cols > 1:
                raise ValueError(
                    "You can only have one continuous hierarchical variable within a single hierarchical level"
                )
            number = int(np.prod([self.col_counts[k] for k in col_names]))
            multipliers.append(1 / number)
            self.scatters += [len(multipliers)] * number
            desired_stddevs.append(1 / np.sqrt(number) if self.use_inv_sqrt else 1)
            reg_counts.append(max(1, number - 1))
            self.offsets[name] = count
            count += number
            self.penalty_mults.append(self.get_reg_mult(col_names))

        if count == 0 or len(multipliers) == 0:
            raise AllUniqueError("There are no hierchical columns everything is unique.")
        # scatters is shape (count,)
        self.scatters = np.array(self.scatters)
        # multipliers is shape (1 + regularized_counts,)
        self.multipliers = np.array([0] + multipliers, dtype=np.float32)
        self.penalty_mults = np.array(self.penalty_mults, dtype=np.float32)
        self.dense_shape = [len(self.multipliers), flattened]
        # desired_* is shape (regularized_counts,)
        self.desired_stddevs = (
            self.desired_stddev * np.array(desired_stddevs or [1], dtype=np.float32) / np.sqrt(max(1, len(multipliers)))
        )
        self.desired_l2norms = (
            np.array(reg_counts, dtype=np.float32) * self.desired_stddevs**2 / (1 if self.use_l2_squared else 2)
        )
        self.weights = self.create_var(
            "weights", shape=[count, flattened], dtype=tf.float32, trainable=True, initializer=self.initializer
        )
        if self.use_bias:
            self.bias = self.create_var(
                "bias", shape=[flattened], dtype=tf.float32, trainable=True, initializer=self.bias_initializer
            )

    def stitched_cols(self, col_names: str | list[str] | tuple) -> str:
        """Returns a string representation of the columns."""
        return col_names if isinstance(col_names, str) else "-".join(col_names)

    def get_reg_config(self, col_names: tuple[str] | list[str]) -> tuple[str, float]:
        """Creates name used for regularization and default value for the penalty multiplier.
        If all columns are categorical, we can just join their names in order to find penalty.
        Otherwise, when different continuous features are paired with a same categorical column,
        the resulting hierarchical categories share same penalty. Always, suffix the continuous
        string to the end of the name.
        Example: brand-DEM and brand-GOP have the same penalty called reg_brand-continuous.
        Examples of mixed categories: reg_brand-continuous, reg_vehicle-continuous.

        Args:
            col_names (list[str]): Hierarchical column names.

        Returns:
            tuple[str, float]: Regularization penalty name and the default value.
        """
        count = int(np.prod([self.col_counts[k] for k in col_names]))
        if count == 1:
            # Purely continuous features
            default_value = 0.0
        else:
            default_value = 1.0
        names = [name for name in col_names if not self.is_continuous(name)]
        names.append("continuous") if self.is_continuous(col_names) else None
        reg_name = f"reg_{self.stitched_cols(names)}"
        return reg_name, default_value

    def get_reg_mult(self, col_names: list[str] | tuple[str]) -> float:
        """Returns the penalty multiplier for hierarchy level reg loss."""
        reg_name, default = self.get_reg_config(col_names)
        mult = self.hyperparameters.get_float(
            name=reg_name,
            default=default,
            help="Penalty multiplier for hierarchy level reg loss.",
        )
        return mult

    def is_continuous(self, k: str | Iterable[str]) -> bool:
        return is_continuous(k, self.encodings)

    def get_hierarchical_parameters(self, hierarchy: Mapping[str, TensorLike]) -> tuple[tf.Tensor, tf.Tensor]:
        """Returns the model parameters' for every hierarchical level (non-aggregated weights)

        Args:
            hierarchy (dict[str, TensorLike]): Hierarchy placeholder for Hierarchial embedding variable.

        NOTE: this currently does not depend on training flag. Possible we change how things work such that it will.

        Returns:

            tuple[tf.Tensor, tf.Tensor]: weights, indices

                the 1st list[tf.Tensor]=A: A[i] corresponds to the multiplicative data for the continuous aspects
                                                of the hierarchy in self.columns[i]
                the 2nd list[tf.Tensor]=B: B[i] corresponds to the indices in self.weights that corresponds to the
                                                correct learned coefficients of the hierarchy in self.columns[i]
        """
        # Shape is [count, ...] for both of these
        weights = []
        indices = []
        for col_names in self.columns:
            if isinstance(col_names, str):
                # We want to assume col_names is a list of column names
                col_names = [col_names]
            num_cols = len(col_names)
            cat_cols = [col for col in col_names if self.encodings[col] != "continuous"]
            cont_cols = [col for col in col_names if self.encodings[col] == "continuous"]
            num_cat_cols = len(cat_cols)
            num_cont_cols = num_cols - num_cat_cols

            name = self.stitched_cols(col_names)
            # The start of the region for this weight
            start = self.offsets[name]
            if num_cont_cols == 0:
                shape = tf.shape(hierarchy[cat_cols[0]])
                weight = tf.ones(shape, dtype=tf.float32, name=f"{name}_weights")
            else:
                # you can only have 1 cont col in col_names
                weight = hierarchy[cont_cols[0]]

            if num_cat_cols == 0:
                index = tf.cast(
                    tf.fill(tf.shape(weight), start),
                    dtype=tf.int64,
                    name=f"{name}_indices",
                )
                # if no categorical columns, we have the value of continuous column as the index
            else:
                # The standard encoding of left to right indices given base col_counts[col] for each col
                offsets = np.cumprod([1] + [self.col_counts[col] for col in cat_cols[:-1]])

                # The index in weights where we look up the first of the embeddings for this set of columns
                # This lets us concatenate all embeddings into a single weights matrix rather than defining
                # them separately, and deterministicly able to derive the index in this larger weight matrix.
                index = start + tf.math.add_n(
                    [
                        # hierarchy[col] is column of dataframe
                        tf.constant(offset, dtype=tf.int64) * tf.cast(hierarchy[col], dtype=tf.int64)
                        for offset, col in zip(offsets, cat_cols)
                    ],
                    name=f"{name}_indices",
                )

            # store index for heirarchical parameters and the corresponding continuous weightage
            indices.append(index)
            weights.append(weight)

        # len(self.columns), *shape(hierarchy[<any>])
        weights = tf.stack(weights, axis=0, name="weights_stacked")
        indices = tf.stack(indices, axis=0, name="indices")
        return weights, indices

    def __call__(
        self,
        hierarchy: dict[str, TensorLike] | tuple[tf.Tensor, tf.Tensor],
        training: bool = False,  # noqa: U100
        debug: bool = False,  # noqa: U100
        skip_metrics: bool = False,
    ) -> tf.Tensor:
        """Returns the model parameters' embeddings calculated from the weights.
        Adds l2 regularization penalties to loss based on deviations and bias.

        Args:
            hierarchy (dict[str, TensorLike]): Hierarchy placeholder for Hierarchial embedding variable.
            training (bool, optional): Whether this is a training or inference run. Defaults to False.

        Returns:
            tf.Tensor: Model parameters' embeddings.
        """

        if isinstance(hierarchy, tuple):
            weights, indices = hierarchy
        else:
            # get the hierarchical parameters that correspond to the input hierarchy
            # NOTE: weights is the proper multiplicative relationship using continuous hierarchical variables, not
            #       something from self.weights. Probably should change name in future for readability.
            weights, indices = self.get_hierarchical_parameters(hierarchy)
        # Look up embeddings by indices
        # len(self.columns), *shape(hierarchy[<any>]), np.prod(self.shape)

        if self.increase_lr is not None:
            lr_scaled_weights = self.weights * tf.constant(self.increase_lr, dtype=tf.float32)
        else:
            lr_scaled_weights = self.weights

        looked_up = tf.gather(lr_scaled_weights, indices, name="embeds")
        # Optimization and convert to tensor
        # counts,
        scatters = tf.constant(self.scatters, dtype=tf.int64)
        # Do a matrix multiply to sum over columns
        # *shape(hierarchy[<any>]), np.prod(self.shape)
        weighted = tf.einsum("c...f,c...->...f", looked_up, weights, name="weighted")

        # This is num_regularized_categories x flattened using the same scatter trick as for means
        # len(self.multipliers), np.prod(self.shape)
        cur_l2_norm = tf.scatter_nd(
            scatters[:, None],
            tf.math.square(lr_scaled_weights, name="shifted_squared"),
            shape=self.dense_shape,
            name="cur_l2_norm",
        )[1:]
        # We want we want to apply l2 regularization so that this ratio is pushed to be 1 or less.
        # len(self.multipliers), np.prod(self.shape)
        cur_ratio = cur_l2_norm / tf.constant(self.desired_l2norms[:, None] + EPSILON, dtype=tf.float32)
        # Old negative feedback was roughly cur_ratio ** 2 (in the steady state). This just makes it explicit.
        # We don't care if it is over 0 so we shift down by 1 then up by 1 to get it to be the same scale
        if self.use_l2_squared:
            hier_reg = tf.math.reduce_sum(
                tf.math.square(cur_ratio) * tf.constant(self.penalty_mults[:, None], dtype=tf.float32),
                name="hier_reg",
            )
        else:
            hier_reg = cur_ratio * tf.constant(self.penalty_mults[:, None], dtype=tf.float32)
        if not skip_metrics:
            self.add_loss("hier_reg", hier_reg, category="hier")
        if self.use_bias:
            if self.increase_lr is not None:
                lr_scaled_bias = self.bias * tf.constant(self.increase_lr, dtype=tf.float32)
            else:
                lr_scaled_bias = self.bias

            if self.reg_bias > 0 and not skip_metrics:
                bias_loss = tf.math.reduce_sum(tf.math.square(lr_scaled_bias))
                self.add_loss("reg_bias", bias_loss, category="aux", mult=self.reg_bias)

            result = tf.nn.bias_add(weighted, lr_scaled_bias, name="biased")
        else:
            result = weighted

        # We want to undo the flattening we did for simpler logic.
        initial_shape = [tf.shape(result)[i] for i in range(len(result.shape) - 1)]
        # *shape(hierarchy[<any>]), *self.shape
        return tf.reshape(result, [*initial_shape, *self.shape], name="final_var")

    def get_tensors(
        self, dy_dweights: tf.Tensor | tf.Variable | None = None, dy_dbias: tf.Tensor | tf.Variable | None = None
    ) -> tuple[dict[str, tf.Tensor], dict[str, list[dict] | pd.MultiIndex], list[str | int]]:
        """Get the learned weights for a HierarchicalEmbedding layer"""
        output_tensors: dict[str, tf.Tensor] = {}
        output_indices: dict[str, pd.Index | pd.MultiIndex] = {}

        weights = self.weights if dy_dweights is None else dy_dweights
        feature_names = self._get_feature_names(weights)
        if self.use_bias:
            bias = self.bias if dy_dbias is None else dy_dbias
            self._process_bias(bias, output_tensors, output_indices)
        self._process_columns_in_tensors(output_tensors, output_indices, weights)

        return output_tensors, output_indices, feature_names

    def _get_feature_names(self, weights: tf.Tensor | tf.Variable) -> list[str]:
        n_features = weights.shape[-1]
        feature_names = list(range(n_features)) if self.feature_names is None else self.feature_names
        if len(feature_names) != n_features:
            if n_features % len(feature_names) == 0:
                num_dups = n_features // len(feature_names)
                feature_names = [f"{name}_{i+1}" for name in feature_names for i in range(num_dups)]
            else:
                raise ValueError(
                    f"feature_names must be a list of size {n_features}, but got size {len(feature_names)}"
                )
        return feature_names

    def _process_bias(
        self,
        bias: tf.Tensor | tf.Variable,
        output_tensors: dict[str, tf.Tensor],
        output_indices: dict[str, pd.Index | pd.MultiIndex],
    ):
        output_tensors["bias"] = tf.expand_dims(bias, axis=0)
        output_indices["bias"] = pd.Index(["bias"])

    def _process_columns_in_tensors(
        self,
        output_tensors: dict[str, tf.Tensor],
        output_indices: dict[str, pd.Index | pd.MultiIndex],
        weights: tf.Tensor,
    ):
        n_features = weights.shape[-1]
        for col_names in self.columns:
            if isinstance(col_names, str):
                col_names = [col_names]
            hierarchy, output_index = self._get_hierarchy_and_output_index(col_names)
            learned_weights = self._get_learned_weights(hierarchy, col_names, weights)
            learned_weights = self._reshape_learned_weights_if_needed(learned_weights, n_features)
            output_tensors[self.stitched_cols(col_names)] = learned_weights
            output_indices[self.stitched_cols(col_names)] = output_index

    def _get_hierarchy_and_output_index(
        self, col_names: list[str]
    ) -> tuple[dict[str, NDArray], pd.Index | pd.MultiIndex]:
        cat_cols = [col for col in col_names if self.encodings[col] != "continuous"]
        cont_cols = [col for col in col_names if self.encodings[col] == "continuous"]
        num_cat_cols = len(cat_cols)

        if num_cat_cols == 0:
            hierarchy = {cont_cols[0]: np.asarray([1.0])}
            output_index = pd.Index([cont_cols[0]])
        else:
            midx = pd.MultiIndex.from_product([self.encodings[c].values() for c in cat_cols], names=cat_cols)
            output_index = pd.MultiIndex.from_product([self.encodings[c].keys() for c in cat_cols], names=cat_cols)
            if len(cont_cols) > 0:
                # TODO (@RyanSaxe): why is cont_cols[0] is used? adding a comment will be helpful.
                midx = pd.concat({1.0: pd.DataFrame(index=midx)}, names=[cont_cols[0]]).index
            hierarchy = {h: midx.get_level_values(h).to_numpy() for h in midx.names}

        return hierarchy, output_index

    def _get_learned_weights(
        self, hierarchy: dict[str, NDArray], col_names: list[str], weights: tf.Tensor
    ) -> tf.Tensor:
        name = self.stitched_cols(col_names)
        start = self.offsets[name]
        cat_cols = [col for col in col_names if self.encodings[col] != "continuous"]
        cont_cols = [col for col in col_names if self.encodings[col] == "continuous"]

        if len(cont_cols) == 0:
            shape = tf.shape(hierarchy[list(hierarchy.keys())[0]])
            weight = tf.ones(shape, dtype=tf.float32, name=f"{self.stitched_cols(col_names)}_weights")
        else:
            weight = hierarchy[cont_cols[0]]
        if len(cat_cols) == 0:
            index = tf.cast(
                tf.fill(tf.shape(weight), self.offsets[name]),
                dtype=tf.int64,
                name=f"{name}_indices",
            )
        else:
            offsets = np.cumprod([1] + [self.col_counts[col] for col in cat_cols[:-1]])
            index = start + tf.math.add_n(
                [
                    tf.constant(offset, dtype=tf.int64) * tf.cast(hierarchy[col], dtype=tf.int64)
                    for offset, col in zip(offsets, cat_cols)
                ]
            )

        return tf.gather(weights, index, name="embeds")

    def _reshape_learned_weights_if_needed(self, learned_weights: tf.Tensor, n_features: int) -> tf.Tensor:
        if len(learned_weights.shape) > 2:
            flattened_shape = prod(learned_weights.shape[:-1])
            return tf.reshape(learned_weights, (flattened_shape, n_features))
        return learned_weights

    def get_dfs(
        self, dy_dweights: tf.Tensor | None = None, dy_dbias: tf.Tensor | None = None
    ) -> dict[str, pd.DataFrame]:
        """Get the learned weights for a HierarchicalEmbedding layer as a DataFrame"""
        # NOTE: separated this function so we could more easily differentiate
        output_tensors, output_indices, feature_names = self.get_tensors(dy_dweights=dy_dweights, dy_dbias=dy_dbias)
        return {
            key: pd.DataFrame(tensor, index=output_indices[key], columns=feature_names)
            for key, tensor in output_tensors.items()
        }

    @property
    def dfs(self) -> dict[str, pd.DataFrame]:
        return self.get_dfs()

`call(hierarchy, training=False, debug=False, skip_metrics=False)`

Returns the model parameters' embeddings calculated from the weights. Adds l2 regularization penalties to loss based on deviations and bias.

Parameters:

Name	Type	Description	Default
`hierarchy`	`dict[str, TensorLike]`	Hierarchy placeholder for Hierarchial embedding variable.	required
`training`	`bool`	Whether this is a training or inference run. Defaults to False.	`False`

Returns:

Type	Description
`Tensor`	tf.Tensor: Model parameters' embeddings.

Source code in wt_ml/layers/hier_embedding.py

def __call__(
    self,
    hierarchy: dict[str, TensorLike] | tuple[tf.Tensor, tf.Tensor],
    training: bool = False,  # noqa: U100
    debug: bool = False,  # noqa: U100
    skip_metrics: bool = False,
) -> tf.Tensor:
    """Returns the model parameters' embeddings calculated from the weights.
    Adds l2 regularization penalties to loss based on deviations and bias.

    Args:
        hierarchy (dict[str, TensorLike]): Hierarchy placeholder for Hierarchial embedding variable.
        training (bool, optional): Whether this is a training or inference run. Defaults to False.

    Returns:
        tf.Tensor: Model parameters' embeddings.
    """

    if isinstance(hierarchy, tuple):
        weights, indices = hierarchy
    else:
        # get the hierarchical parameters that correspond to the input hierarchy
        # NOTE: weights is the proper multiplicative relationship using continuous hierarchical variables, not
        #       something from self.weights. Probably should change name in future for readability.
        weights, indices = self.get_hierarchical_parameters(hierarchy)
    # Look up embeddings by indices
    # len(self.columns), *shape(hierarchy[<any>]), np.prod(self.shape)

    if self.increase_lr is not None:
        lr_scaled_weights = self.weights * tf.constant(self.increase_lr, dtype=tf.float32)
    else:
        lr_scaled_weights = self.weights

    looked_up = tf.gather(lr_scaled_weights, indices, name="embeds")
    # Optimization and convert to tensor
    # counts,
    scatters = tf.constant(self.scatters, dtype=tf.int64)
    # Do a matrix multiply to sum over columns
    # *shape(hierarchy[<any>]), np.prod(self.shape)
    weighted = tf.einsum("c...f,c...->...f", looked_up, weights, name="weighted")

    # This is num_regularized_categories x flattened using the same scatter trick as for means
    # len(self.multipliers), np.prod(self.shape)
    cur_l2_norm = tf.scatter_nd(
        scatters[:, None],
        tf.math.square(lr_scaled_weights, name="shifted_squared"),
        shape=self.dense_shape,
        name="cur_l2_norm",
    )[1:]
    # We want we want to apply l2 regularization so that this ratio is pushed to be 1 or less.
    # len(self.multipliers), np.prod(self.shape)
    cur_ratio = cur_l2_norm / tf.constant(self.desired_l2norms[:, None] + EPSILON, dtype=tf.float32)
    # Old negative feedback was roughly cur_ratio ** 2 (in the steady state). This just makes it explicit.
    # We don't care if it is over 0 so we shift down by 1 then up by 1 to get it to be the same scale
    if self.use_l2_squared:
        hier_reg = tf.math.reduce_sum(
            tf.math.square(cur_ratio) * tf.constant(self.penalty_mults[:, None], dtype=tf.float32),
            name="hier_reg",
        )
    else:
        hier_reg = cur_ratio * tf.constant(self.penalty_mults[:, None], dtype=tf.float32)
    if not skip_metrics:
        self.add_loss("hier_reg", hier_reg, category="hier")
    if self.use_bias:
        if self.increase_lr is not None:
            lr_scaled_bias = self.bias * tf.constant(self.increase_lr, dtype=tf.float32)
        else:
            lr_scaled_bias = self.bias

        if self.reg_bias > 0 and not skip_metrics:
            bias_loss = tf.math.reduce_sum(tf.math.square(lr_scaled_bias))
            self.add_loss("reg_bias", bias_loss, category="aux", mult=self.reg_bias)

        result = tf.nn.bias_add(weighted, lr_scaled_bias, name="biased")
    else:
        result = weighted

    # We want to undo the flattening we did for simpler logic.
    initial_shape = [tf.shape(result)[i] for i in range(len(result.shape) - 1)]
    # *shape(hierarchy[<any>]), *self.shape
    return tf.reshape(result, [*initial_shape, *self.shape], name="final_var")

`init(shape, encodings, columns=None, use_bias=True, dropped_columns=[], initializer=0.0, bias_initializer=0.0, hyperparameters=None, feature_names=None, name=None, increase_lr=None)`

Initializes the hierarchical embedding object with hierarchy levels, parameter shape and other initializers.

Parameters:

Name	Type	Description	Default
`shape`	`list[int]`	Desired dimensions of model parameters only within final result.	required
`hierarchy`	`DataFrame`	The hierarchy for which embeddings are trained.	required
`columns`	`list[str \| list[str]] \| None`	Hierarchy levels to learn embeddings. Defaults to None.	`None`
`use_bias`	`bool`	Whether to include bias. Defaults to True.	`True`
`dropped_columns`	`list`	Columns to exclude in hierarchy. Defaults to ["granular", "region", "coastal", "populationdensity", "medianage"].	`[]`
`initializer`	`Initializer`	Initializer for embeddings(weights). Defaults to 0.0.	`0.0`
`bias_initializer`	`Initializer`	Initializer for bias. Defaults to 0.0.	`0.0`
`hyperparameters`	`Hyperparams \| None`	Dictionary of hyperparameters for buidling this layer. Defaults to None.	`None`
`name`	`str \| None`	Name of the layer. Defaults to None.	`None`

Source code in wt_ml/layers/hier_embedding.py

def __init__(
    self,
    shape: list[int],
    encodings: dict[str, Any],
    columns: list[str | list[str]] | None = None,
    use_bias: bool = True,
    dropped_columns=[],
    initializer: Initializer = 0.0,
    bias_initializer: Initializer = 0.0,
    hyperparameters: Hyperparams | None = None,
    feature_names: list[list[str]] | list[str] | None = None,
    name: str | None = None,
    increase_lr: float | None = None,
):
    """Initializes the hierarchical embedding object with hierarchy levels, parameter shape
    and other initializers.

    Args:
        shape (list[int]): Desired dimensions of model parameters only within final result.
        hierarchy (pd.DataFrame): The hierarchy for which embeddings are trained.
        columns (list[str  |  list[str]] | None, optional): Hierarchy levels to learn embeddings. Defaults to None.
        use_bias (bool, optional): Whether to include bias. Defaults to True.
        dropped_columns (list, optional): Columns to exclude in hierarchy.
                                    Defaults to ["granular", "region", "coastal", "populationdensity", "medianage"].
        initializer (Initializer, optional): Initializer for embeddings(weights). Defaults to 0.0.
        bias_initializer (Initializer, optional): Initializer for bias. Defaults to 0.0.
        hyperparameters (Hyperparams | None, optional): Dictionary of hyperparameters for buidling this layer.
                                                        Defaults to None.
        name (str | None, optional): Name of the layer. Defaults to None.
    """
    super().__init__(hyperparameters=hyperparameters, name=name)
    self.use_bias = use_bias
    encodings_dropped = {k: v for k, v in encodings.items() if k not in dropped_columns}
    assert encodings_dropped, "No cols in hierarchy."
    self.encodings = encodings_dropped
    self.shape = shape
    self.increase_lr = increase_lr
    self.initializer = initializer
    self.bias_initializer = bias_initializer
    self.created_reg = False
    self.feature_names = feature_names
    if self.feature_names is not None and isinstance(self.feature_names[0], list):
        self.feature_names = [elem for sublist in self.feature_names for elem in sublist]
    if columns is not None:
        self._process_columns(columns)
    else:
        self.columns = list(self.encodings.keys())
        self.used_cols = set(self.columns)

`build(input_shapes)`

Builds hyperparamters, deviations, embeddings(weights), bias and other intermediate variables.

Parameters:

Name	Type	Description	Default
`input_shapes`	`InputShapes`	The effect and hierarchy shapes.	required

Raises:

Type	Description
`AllUniqueError`	When there are no hierchical columns because everything is unique within it.

Source code in wt_ml/layers/hier_embedding.py

def build(self, input_shapes):  # noqa: U100
    """Builds hyperparamters, deviations, embeddings(weights), bias and other intermediate variables.

    Args:
        input_shapes (InputShapes): The effect and hierarchy shapes.

    Raises:
        AllUniqueError: When there are no hierchical columns because everything is unique within it.
    """
    self.use_l2_squared = self.hyperparameters.get_bool(
        "use_l2_squared",
        default=False,
        help="Use the l2 norm to the fourth power instead of only using it for large values for stability.",
    )
    self.desired_stddev = self.hyperparameters.get_float(
        "desired_stddev",
        default=0.10,
        min=0.01,
        max=100.0,
        help="The desired maximum value for the stddev along the full hierarchy.",
    )
    self.use_inv_sqrt = self.hyperparameters.get_bool(
        "use_inv_sqrt",
        default=True,
        help="Scale the stddev for each category by the inverse square root of the number of unique values.",
    )

    if self.use_bias:
        self.reg_bias = self.hyperparameters.get_float(
            "reg_bias",
            default=0.0,
            min=0.0,
            max=1e4,
            help="The strength of l2 regularization to apply to the bias term.",
        )
    self.offsets = {}
    self.col_counts = {
        k: (
            self.encodings[k]
            if isinstance(self.encodings[k], (float, int))
            else ((max(self.encodings[k].values()) + 1) if not isinstance(self.encodings[k], str) else 1)
        )
        for k in tf.nest.flatten(self.columns)
    }
    var_counts = [
        self.col_counts[col] if isinstance(col, str) else np.prod([self.col_counts[k] for k in col])
        for col in self.columns
    ]

    self.columns = [col for col, count in zip(self.columns, var_counts) if count > 1 or self.is_continuous(col)]
    count = 0
    desired_stddevs = []
    # Scatters is the inverse of gathering from num_regularized_categories + 1 to count of weights
    self.scatters = []
    self.penalty_mults = []
    multipliers = []
    reg_counts = []
    flattened = int(np.prod(self.shape)) if len(self.shape) > 0 else 1
    for col_names in self.columns:
        if isinstance(col_names, str):
            col_names = [col_names]
        cat_cols = [col for col in col_names if self.encodings[col] != "continuous"]
        name = self.stitched_cols(col_names)
        num_cont_cols = len(col_names) - len(cat_cols)
        if num_cont_cols > 1:
            raise ValueError(
                "You can only have one continuous hierarchical variable within a single hierarchical level"
            )
        number = int(np.prod([self.col_counts[k] for k in col_names]))
        multipliers.append(1 / number)
        self.scatters += [len(multipliers)] * number
        desired_stddevs.append(1 / np.sqrt(number) if self.use_inv_sqrt else 1)
        reg_counts.append(max(1, number - 1))
        self.offsets[name] = count
        count += number
        self.penalty_mults.append(self.get_reg_mult(col_names))

    if count == 0 or len(multipliers) == 0:
        raise AllUniqueError("There are no hierchical columns everything is unique.")
    # scatters is shape (count,)
    self.scatters = np.array(self.scatters)
    # multipliers is shape (1 + regularized_counts,)
    self.multipliers = np.array([0] + multipliers, dtype=np.float32)
    self.penalty_mults = np.array(self.penalty_mults, dtype=np.float32)
    self.dense_shape = [len(self.multipliers), flattened]
    # desired_* is shape (regularized_counts,)
    self.desired_stddevs = (
        self.desired_stddev * np.array(desired_stddevs or [1], dtype=np.float32) / np.sqrt(max(1, len(multipliers)))
    )
    self.desired_l2norms = (
        np.array(reg_counts, dtype=np.float32) * self.desired_stddevs**2 / (1 if self.use_l2_squared else 2)
    )
    self.weights = self.create_var(
        "weights", shape=[count, flattened], dtype=tf.float32, trainable=True, initializer=self.initializer
    )
    if self.use_bias:
        self.bias = self.create_var(
            "bias", shape=[flattened], dtype=tf.float32, trainable=True, initializer=self.bias_initializer
        )

`get_dfs(dy_dweights=None, dy_dbias=None)`

Get the learned weights for a HierarchicalEmbedding layer as a DataFrame

Source code in wt_ml/layers/hier_embedding.py

def get_dfs(
    self, dy_dweights: tf.Tensor | None = None, dy_dbias: tf.Tensor | None = None
) -> dict[str, pd.DataFrame]:
    """Get the learned weights for a HierarchicalEmbedding layer as a DataFrame"""
    # NOTE: separated this function so we could more easily differentiate
    output_tensors, output_indices, feature_names = self.get_tensors(dy_dweights=dy_dweights, dy_dbias=dy_dbias)
    return {
        key: pd.DataFrame(tensor, index=output_indices[key], columns=feature_names)
        for key, tensor in output_tensors.items()
    }

`get_hierarchical_parameters(hierarchy)`

Returns the model parameters' for every hierarchical level (non-aggregated weights)

Parameters:

Name	Type	Description	Default
`hierarchy`	`dict[str, TensorLike]`	Hierarchy placeholder for Hierarchial embedding variable.	required

NOTE: this currently does not depend on training flag. Possible we change how things work such that it will.

Returns:

tuple[tf.Tensor, tf.Tensor]: weights, indices

    the 1st list[tf.Tensor]=A: A[i] corresponds to the multiplicative data for the continuous aspects
                                    of the hierarchy in self.columns[i]
    the 2nd list[tf.Tensor]=B: B[i] corresponds to the indices in self.weights that corresponds to the
                                    correct learned coefficients of the hierarchy in self.columns[i]

Source code in wt_ml/layers/hier_embedding.py

def get_hierarchical_parameters(self, hierarchy: Mapping[str, TensorLike]) -> tuple[tf.Tensor, tf.Tensor]:
    """Returns the model parameters' for every hierarchical level (non-aggregated weights)

    Args:
        hierarchy (dict[str, TensorLike]): Hierarchy placeholder for Hierarchial embedding variable.

    NOTE: this currently does not depend on training flag. Possible we change how things work such that it will.

    Returns:

        tuple[tf.Tensor, tf.Tensor]: weights, indices

            the 1st list[tf.Tensor]=A: A[i] corresponds to the multiplicative data for the continuous aspects
                                            of the hierarchy in self.columns[i]
            the 2nd list[tf.Tensor]=B: B[i] corresponds to the indices in self.weights that corresponds to the
                                            correct learned coefficients of the hierarchy in self.columns[i]
    """
    # Shape is [count, ...] for both of these
    weights = []
    indices = []
    for col_names in self.columns:
        if isinstance(col_names, str):
            # We want to assume col_names is a list of column names
            col_names = [col_names]
        num_cols = len(col_names)
        cat_cols = [col for col in col_names if self.encodings[col] != "continuous"]
        cont_cols = [col for col in col_names if self.encodings[col] == "continuous"]
        num_cat_cols = len(cat_cols)
        num_cont_cols = num_cols - num_cat_cols

        name = self.stitched_cols(col_names)
        # The start of the region for this weight
        start = self.offsets[name]
        if num_cont_cols == 0:
            shape = tf.shape(hierarchy[cat_cols[0]])
            weight = tf.ones(shape, dtype=tf.float32, name=f"{name}_weights")
        else:
            # you can only have 1 cont col in col_names
            weight = hierarchy[cont_cols[0]]

        if num_cat_cols == 0:
            index = tf.cast(
                tf.fill(tf.shape(weight), start),
                dtype=tf.int64,
                name=f"{name}_indices",
            )
            # if no categorical columns, we have the value of continuous column as the index
        else:
            # The standard encoding of left to right indices given base col_counts[col] for each col
            offsets = np.cumprod([1] + [self.col_counts[col] for col in cat_cols[:-1]])

            # The index in weights where we look up the first of the embeddings for this set of columns
            # This lets us concatenate all embeddings into a single weights matrix rather than defining
            # them separately, and deterministicly able to derive the index in this larger weight matrix.
            index = start + tf.math.add_n(
                [
                    # hierarchy[col] is column of dataframe
                    tf.constant(offset, dtype=tf.int64) * tf.cast(hierarchy[col], dtype=tf.int64)
                    for offset, col in zip(offsets, cat_cols)
                ],
                name=f"{name}_indices",
            )

        # store index for heirarchical parameters and the corresponding continuous weightage
        indices.append(index)
        weights.append(weight)

    # len(self.columns), *shape(hierarchy[<any>])
    weights = tf.stack(weights, axis=0, name="weights_stacked")
    indices = tf.stack(indices, axis=0, name="indices")
    return weights, indices

`get_reg_config(col_names)`

Creates name used for regularization and default value for the penalty multiplier. If all columns are categorical, we can just join their names in order to find penalty. Otherwise, when different continuous features are paired with a same categorical column, the resulting hierarchical categories share same penalty. Always, suffix the continuous string to the end of the name. Example: brand-DEM and brand-GOP have the same penalty called reg_brand-continuous. Examples of mixed categories: reg_brand-continuous, reg_vehicle-continuous.

Parameters:

Name	Type	Description	Default
`col_names`	`list[str]`	Hierarchical column names.	required

Returns:

Type	Description
`tuple[str, float]`	tuple[str, float]: Regularization penalty name and the default value.

Source code in wt_ml/layers/hier_embedding.py

def get_reg_config(self, col_names: tuple[str] | list[str]) -> tuple[str, float]:
    """Creates name used for regularization and default value for the penalty multiplier.
    If all columns are categorical, we can just join their names in order to find penalty.
    Otherwise, when different continuous features are paired with a same categorical column,
    the resulting hierarchical categories share same penalty. Always, suffix the continuous
    string to the end of the name.
    Example: brand-DEM and brand-GOP have the same penalty called reg_brand-continuous.
    Examples of mixed categories: reg_brand-continuous, reg_vehicle-continuous.

    Args:
        col_names (list[str]): Hierarchical column names.

    Returns:
        tuple[str, float]: Regularization penalty name and the default value.
    """
    count = int(np.prod([self.col_counts[k] for k in col_names]))
    if count == 1:
        # Purely continuous features
        default_value = 0.0
    else:
        default_value = 1.0
    names = [name for name in col_names if not self.is_continuous(name)]
    names.append("continuous") if self.is_continuous(col_names) else None
    reg_name = f"reg_{self.stitched_cols(names)}"
    return reg_name, default_value

`get_reg_mult(col_names)`

Returns the penalty multiplier for hierarchy level reg loss.

Source code in wt_ml/layers/hier_embedding.py

def get_reg_mult(self, col_names: list[str] | tuple[str]) -> float:
    """Returns the penalty multiplier for hierarchy level reg loss."""
    reg_name, default = self.get_reg_config(col_names)
    mult = self.hyperparameters.get_float(
        name=reg_name,
        default=default,
        help="Penalty multiplier for hierarchy level reg loss.",
    )
    return mult

`get_tensors(dy_dweights=None, dy_dbias=None)`

Get the learned weights for a HierarchicalEmbedding layer

Source code in wt_ml/layers/hier_embedding.py

def get_tensors(
    self, dy_dweights: tf.Tensor | tf.Variable | None = None, dy_dbias: tf.Tensor | tf.Variable | None = None
) -> tuple[dict[str, tf.Tensor], dict[str, list[dict] | pd.MultiIndex], list[str | int]]:
    """Get the learned weights for a HierarchicalEmbedding layer"""
    output_tensors: dict[str, tf.Tensor] = {}
    output_indices: dict[str, pd.Index | pd.MultiIndex] = {}

    weights = self.weights if dy_dweights is None else dy_dweights
    feature_names = self._get_feature_names(weights)
    if self.use_bias:
        bias = self.bias if dy_dbias is None else dy_dbias
        self._process_bias(bias, output_tensors, output_indices)
    self._process_columns_in_tensors(output_tensors, output_indices, weights)

    return output_tensors, output_indices, feature_names

`stitched_cols(col_names)`

Returns a string representation of the columns.

Source code in wt_ml/layers/hier_embedding.py

def stitched_cols(self, col_names: str | list[str] | tuple) -> str:
    """Returns a string representation of the columns."""
    return col_names if isinstance(col_names, str) else "-".join(col_names)

`LinearBaseline`

Bases: Module

Source code in wt_ml/layers/linear_baseline.py

class LinearBaseline(Module):
    def __init__(
        self,
        starting_sales: np.ndarray,
        num_starts: int,
        encodings: dict[str, int],
        hyperparameters: Hyperparams | None = None,
        name: str | None = None,
    ):
        """Class initialization to create linear regression lines for calculating baseline, for each granularity.

        Args:
            starting_sales (np.ndarray): Sales at the start of each baseline. shape = num_starts x num_granularity.
            num_starts (int): No. of starting points for each granularity.
            hyperparameters (Hyperparams, optional): All hyperparameters.
            name (str | None, optional): Name of the layer. Defaults to None.
        """
        super().__init__(hyperparameters=hyperparameters, name=name)
        self.starting_sales = starting_sales
        self.num_starts = num_starts
        self.encodings = encodings

    def build(self, input_shapes):
        """Build the layer parameters needed for calculating linear baseline.

        Args:
            input_shapes (Tuple[tf.Tensor, ...]): Tuple of tensor shapes of `*args`(without the defaults)
                                                  passed to `__call__()`.
        """
        self.num_starts = self.num_starts if input_shapes.sales_num_restarts is not ... else 1
        shape = [self.num_starts, len(self.encodings["wholesaler"]), len(self.encodings["brand"])]
        self.use_perfect_adjustment = self.hyperparameters.get_bool(
            "use_perfect_adjustment",
            default=False,
            help="Instead of using gradient descent change directly to the optimal values.",
        )
        if self.use_perfect_adjustment:
            self.lr_scale = 1.0
            if self.num_starts > 1:
                raise ValueError("Perfect adjustments does not support restarts at this time.")
            self.scalar_so_softplus_approaches_0 = 1.0
            self.allow_slope = False
            self.perfection_speed = self.hyperparameters.get_float(
                "perfection_speed",
                default=0.99,
                min=0.01,
                max=1.0,
                help="How close to make the step to the optimal value.",
            )
            self.relative_scale_cap = self.hyperparameters.get_float(
                "relative_scale_cap",
                default=10.0,
                min=1.0,
                max=1000.0,
                help="The maximum ratio it can have to the provided initial value.",
            )
            self.upper_bound = np.tile(
                (
                    (
                        self.relative_scale_cap * self.starting_sales
                        + np.log(
                            -np.expm1(
                                -self.relative_scale_cap * self.starting_sales * self.scalar_so_softplus_approaches_0
                            )
                            + EPSILON
                        )
                        / self.scalar_so_softplus_approaches_0
                    )
                    / self.lr_scale
                ),
                (self.num_starts, 1, 1),
            )
        else:
            self.lr_scale = self.hyperparameters.get_float(
                "lr_scale",
                default=10.0,
                min=1.0,
                max=1000.0,
                help="A factor to multiply the raw weights by so they get larger gradients.",
            )
            self.scalar_so_softplus_approaches_0 = self.hyperparameters.get_float(
                "scalar_so_softplus_approaches_0",
                default=1.0,
                min=1.0,
                max=100.0,
                help="The temperature to apply to softplus to max it better approximate the relu function.",
            )
            self.allow_slope = (
                self.hyperparameters.get_bool(
                    "allow_slope",
                    default=False,
                    help="Whether to allow slope in baseline",
                )
                and input_shapes.dates_since_start is not ...
            )
        self.baseline_intercept = self.create_var(
            # Start a little lower to allow roicurves to start higher.
            "intercept",
            shape=shape,
            dtype=tf.float32,
            initializer=np.tile(
                (
                    (
                        0.8 * self.starting_sales
                        + np.log(-np.expm1(-0.8 * self.starting_sales * self.scalar_so_softplus_approaches_0) + EPSILON)
                        / self.scalar_so_softplus_approaches_0
                    )
                    / self.lr_scale
                ).astype(np.float32),
                (self.num_starts, 1, 1),
            ),
            trainable=not self.use_perfect_adjustment,
        )
        if self.allow_slope:
            self.baseline_slope = self.create_var(
                "slope", shape=shape, dtype=tf.float32, trainable=not self.use_perfect_adjustment
            )
            self.base_under_0_lambda = self.hyperparameters.get_float(
                "base_under_0_lambda",
                default=1.0e-03,
                min=1.0e-08,
                max=1.0,
                help="The weight for the loss applied to the baseline being below -10 before the softplus.",
            )

    def do_perfect_adjustment(self, batch: EconomicModelInput, intermediaries: "EconomicIntermediaries"):
        if not self.use_perfect_adjustment:
            logger.warning("Cannot do perfect adjustment if the hyperparameter is not enabled.")
            return
        y_mask = intermediaries.mask if intermediaries.mask is not None else tf.ones_like(intermediaries.y_smooth)
        multiplicative_impact = prod_n(intermediaries.impacts.multiplicative_impacts)
        additive_impact = tf.math.add_n(intermediaries.impacts.additive_impacts)
        # I thought this needed to be divided by 2, not sure why it doesn't.
        post_softplus_unclipped = tf.einsum(
            "bt,bt,bt->b", y_mask, multiplicative_impact, intermediaries.y_smooth - additive_impact
        ) / (tf.einsum("bt,bt,bt->b", y_mask, multiplicative_impact, multiplicative_impact) + EPSILON)
        post_softplus = tf.math.maximum(post_softplus_unclipped, EPSILON)
        pre_softplus = (
            post_softplus
            + tf.math.log(-tf.math.expm1(-self.scalar_so_softplus_approaches_0 * post_softplus))
            / self.scalar_so_softplus_approaches_0
        ) / self.lr_scale
        gather_indices = tf.squeeze(self.get_indices(batch.wholesaler_index, batch.brand_index, None), 1)
        existing_values = self.baseline_intercept.gather_nd(gather_indices)
        max_values = tf.gather_nd(tf.constant(self.upper_bound, dtype=tf.float32), gather_indices)
        pre_softplus = tf.math.minimum(pre_softplus, max_values)
        self.baseline_intercept.scatter_nd_update(
            gather_indices, self.perfection_speed * pre_softplus + (1 - self.perfection_speed) * existing_values
        )

    def get_indices(self, wholesaler_index: tf.Tensor, brand_index: tf.Tensor, sales_num_restarts: tf.Tensor | None):
        wholesaler_indices = tf.tile(
            tf.cast(wholesaler_index[:, None], dtype=tf.int32),
            [1, tf.shape(sales_num_restarts)[1] if sales_num_restarts is not None else 1],
        )
        brand_indices = tf.tile(
            tf.cast(brand_index[:, None], dtype=tf.int32),
            [1, tf.shape(sales_num_restarts)[1] if sales_num_restarts is not None else 1],
        )
        return tf.cast(
            tf.stack(
                [
                    sales_num_restarts if sales_num_restarts is not None else tf.zeros_like(brand_indices),
                    wholesaler_indices,
                    brand_indices,
                ],
                axis=2,
            ),
            tf.int64,
        )

    def __call__(
        self,
        batch: LinearBaselineInput,
        training=False,  # noqa: U100
        debug=False,
        skip_metrics=False,  # noqa: U100
    ) -> LinearBaselineIntermediaries:
        """Calcuate baseline using slope-intercept form (y=mx+c).

        Args:
            dates_since_start (TensorLike): Number of timestamps since the last restart.
                                            shape = num_time x num_granular.
            sales_num_restarts (TensorLike): Number of restarts that occurred before this point.
                                             shape = num_time x num_granular.
            hierarchy (dict[str, TensorLike]): The lookup tables for categorical values.
            mask (TensorLike): Filter for 0 sales or unrealistic sales.
            training (bool, optional): Whether training the layer parameters or not.
                                       Defaults to False.

        Returns:
            LinearBaselineIntermediaries: Intermediate calculations for baseline like slope, intercept, etc.
        """
        indices = self.get_indices(batch.hierarchy["wholesaler"], batch.hierarchy["brand"], batch.sales_num_restarts)
        # For each granularity, gathering the initial baseline intercept across the time axis
        # The purpose is to use the same intercept for a given baseline across all the data points
        # The shape is converted to `num_time x num_granular` from `num_starts x num_granular`
        # The indices to gather for each baseline are stored in `sales_num_restarts`
        # NOTE: Issue in M1 Macbook version of tensorflow causes gather_nd to break when operating
        #       on a variable. tf.convert_to_tensor solves this. Open github issue can be found here:
        #       https://github.com/tensorflow/tensorflow/issues/57549
        broadcasted_intercept = tf.gather_nd(tf.convert_to_tensor(self.baseline_intercept), indices) * tf.constant(
            self.lr_scale, dtype=tf.float32
        )
        if self.allow_slope:
            broadcasted_slope = tf.gather_nd(tf.convert_to_tensor(self.baseline_slope), indices)
            # to convert, run: dates_since_start = tf.cast(
            # dates_since_start, dtype=tf.float32, name="dates_since_start")
            slope_impact = (
                broadcasted_slope
                * batch.dates_since_start
                * 2.0
                / (tf.math.reduce_max(batch.dates_since_start, axis=1, keepdims=True, name="max_dates") + EPSILON)
            )

            baseline_raw = slope_impact + broadcasted_intercept
        else:
            broadcasted_slope = tf.zeros_like(broadcasted_intercept)
            slope_impact = tf.zeros_like(broadcasted_intercept)
            baseline_raw = broadcasted_intercept
        if self.allow_slope and not skip_metrics:
            mask_weekly = (
                tf.cast(batch.mask, dtype=tf.float32) if batch.mask is not None else tf.ones_like(baseline_raw)
            )
            baseline_raw_masked_for_min = baseline_raw * mask_weekly + (1.0 - mask_weekly) * LARGE_EPSILON
            min_base = tf.reduce_min(baseline_raw_masked_for_min, axis=0, keepdims=True)
            # if our minimum value is very negative, then get a loss
            # this is just a softplus with a scale of .1, per product. if the product has negative min_base,
            # (more neg than -0.5), then this will start to be more and more positive
            min_base_with_0_soft = softplus(-10.0 - min_base, AUX_SCALE, name="min_base_with_0_soft")
            # take the MSE of that signal. above
            base_under_0 = tf.reduce_sum(tf.square(min_base_with_0_soft), name="base_under_0")
            self.add_loss("base_under_0", base_under_0, "aux", self.base_under_0_lambda)
        baseline = softplus(baseline_raw, scale=1 / self.scalar_so_softplus_approaches_0)
        return LinearBaselineIntermediaries(
            intercept=broadcasted_intercept if debug else None,
            slope=broadcasted_slope if debug else None,
            slope_impact=slope_impact if debug else None,
            baseline_raw=baseline_raw if debug else None,
            baseline=baseline,
        )

`call(batch, training=False, debug=False, skip_metrics=False)`

Calcuate baseline using slope-intercept form (y=mx+c).

Parameters:

Name	Type	Description	Default
`dates_since_start`	`TensorLike`	Number of timestamps since the last restart. shape = num_time x num_granular.	required
`sales_num_restarts`	`TensorLike`	Number of restarts that occurred before this point. shape = num_time x num_granular.	required
`hierarchy`	`dict[str, TensorLike]`	The lookup tables for categorical values.	required
`mask`	`TensorLike`	Filter for 0 sales or unrealistic sales.	required
`training`	`bool`	Whether training the layer parameters or not. Defaults to False.	`False`

Returns:

Name	Type	Description
`LinearBaselineIntermediaries`	`LinearBaselineIntermediaries`	Intermediate calculations for baseline like slope, intercept, etc.

Source code in wt_ml/layers/linear_baseline.py

def __call__(
    self,
    batch: LinearBaselineInput,
    training=False,  # noqa: U100
    debug=False,
    skip_metrics=False,  # noqa: U100
) -> LinearBaselineIntermediaries:
    """Calcuate baseline using slope-intercept form (y=mx+c).

    Args:
        dates_since_start (TensorLike): Number of timestamps since the last restart.
                                        shape = num_time x num_granular.
        sales_num_restarts (TensorLike): Number of restarts that occurred before this point.
                                         shape = num_time x num_granular.
        hierarchy (dict[str, TensorLike]): The lookup tables for categorical values.
        mask (TensorLike): Filter for 0 sales or unrealistic sales.
        training (bool, optional): Whether training the layer parameters or not.
                                   Defaults to False.

    Returns:
        LinearBaselineIntermediaries: Intermediate calculations for baseline like slope, intercept, etc.
    """
    indices = self.get_indices(batch.hierarchy["wholesaler"], batch.hierarchy["brand"], batch.sales_num_restarts)
    # For each granularity, gathering the initial baseline intercept across the time axis
    # The purpose is to use the same intercept for a given baseline across all the data points
    # The shape is converted to `num_time x num_granular` from `num_starts x num_granular`
    # The indices to gather for each baseline are stored in `sales_num_restarts`
    # NOTE: Issue in M1 Macbook version of tensorflow causes gather_nd to break when operating
    #       on a variable. tf.convert_to_tensor solves this. Open github issue can be found here:
    #       https://github.com/tensorflow/tensorflow/issues/57549
    broadcasted_intercept = tf.gather_nd(tf.convert_to_tensor(self.baseline_intercept), indices) * tf.constant(
        self.lr_scale, dtype=tf.float32
    )
    if self.allow_slope:
        broadcasted_slope = tf.gather_nd(tf.convert_to_tensor(self.baseline_slope), indices)
        # to convert, run: dates_since_start = tf.cast(
        # dates_since_start, dtype=tf.float32, name="dates_since_start")
        slope_impact = (
            broadcasted_slope
            * batch.dates_since_start
            * 2.0
            / (tf.math.reduce_max(batch.dates_since_start, axis=1, keepdims=True, name="max_dates") + EPSILON)
        )

        baseline_raw = slope_impact + broadcasted_intercept
    else:
        broadcasted_slope = tf.zeros_like(broadcasted_intercept)
        slope_impact = tf.zeros_like(broadcasted_intercept)
        baseline_raw = broadcasted_intercept
    if self.allow_slope and not skip_metrics:
        mask_weekly = (
            tf.cast(batch.mask, dtype=tf.float32) if batch.mask is not None else tf.ones_like(baseline_raw)
        )
        baseline_raw_masked_for_min = baseline_raw * mask_weekly + (1.0 - mask_weekly) * LARGE_EPSILON
        min_base = tf.reduce_min(baseline_raw_masked_for_min, axis=0, keepdims=True)
        # if our minimum value is very negative, then get a loss
        # this is just a softplus with a scale of .1, per product. if the product has negative min_base,
        # (more neg than -0.5), then this will start to be more and more positive
        min_base_with_0_soft = softplus(-10.0 - min_base, AUX_SCALE, name="min_base_with_0_soft")
        # take the MSE of that signal. above
        base_under_0 = tf.reduce_sum(tf.square(min_base_with_0_soft), name="base_under_0")
        self.add_loss("base_under_0", base_under_0, "aux", self.base_under_0_lambda)
    baseline = softplus(baseline_raw, scale=1 / self.scalar_so_softplus_approaches_0)
    return LinearBaselineIntermediaries(
        intercept=broadcasted_intercept if debug else None,
        slope=broadcasted_slope if debug else None,
        slope_impact=slope_impact if debug else None,
        baseline_raw=baseline_raw if debug else None,
        baseline=baseline,
    )

`init(starting_sales, num_starts, encodings, hyperparameters=None, name=None)`

Class initialization to create linear regression lines for calculating baseline, for each granularity.

Parameters:

Name	Type	Description	Default
`starting_sales`	`ndarray`	Sales at the start of each baseline. shape = num_starts x num_granularity.	required
`num_starts`	`int`	No. of starting points for each granularity.	required
`hyperparameters`	`Hyperparams`	All hyperparameters.	`None`
`name`	`str \| None`	Name of the layer. Defaults to None.	`None`

Source code in wt_ml/layers/linear_baseline.py

def __init__(
    self,
    starting_sales: np.ndarray,
    num_starts: int,
    encodings: dict[str, int],
    hyperparameters: Hyperparams | None = None,
    name: str | None = None,
):
    """Class initialization to create linear regression lines for calculating baseline, for each granularity.

    Args:
        starting_sales (np.ndarray): Sales at the start of each baseline. shape = num_starts x num_granularity.
        num_starts (int): No. of starting points for each granularity.
        hyperparameters (Hyperparams, optional): All hyperparameters.
        name (str | None, optional): Name of the layer. Defaults to None.
    """
    super().__init__(hyperparameters=hyperparameters, name=name)
    self.starting_sales = starting_sales
    self.num_starts = num_starts
    self.encodings = encodings

`build(input_shapes)`

Build the layer parameters needed for calculating linear baseline.

Parameters:

Name	Type	Description	Default
`input_shapes`	`Tuple[Tensor, ...]`	Tuple of tensor shapes of `*args`(without the defaults) passed to `__call__()`.	required

Source code in wt_ml/layers/linear_baseline.py

def build(self, input_shapes):
    """Build the layer parameters needed for calculating linear baseline.

    Args:
        input_shapes (Tuple[tf.Tensor, ...]): Tuple of tensor shapes of `*args`(without the defaults)
                                              passed to `__call__()`.
    """
    self.num_starts = self.num_starts if input_shapes.sales_num_restarts is not ... else 1
    shape = [self.num_starts, len(self.encodings["wholesaler"]), len(self.encodings["brand"])]
    self.use_perfect_adjustment = self.hyperparameters.get_bool(
        "use_perfect_adjustment",
        default=False,
        help="Instead of using gradient descent change directly to the optimal values.",
    )
    if self.use_perfect_adjustment:
        self.lr_scale = 1.0
        if self.num_starts > 1:
            raise ValueError("Perfect adjustments does not support restarts at this time.")
        self.scalar_so_softplus_approaches_0 = 1.0
        self.allow_slope = False
        self.perfection_speed = self.hyperparameters.get_float(
            "perfection_speed",
            default=0.99,
            min=0.01,
            max=1.0,
            help="How close to make the step to the optimal value.",
        )
        self.relative_scale_cap = self.hyperparameters.get_float(
            "relative_scale_cap",
            default=10.0,
            min=1.0,
            max=1000.0,
            help="The maximum ratio it can have to the provided initial value.",
        )
        self.upper_bound = np.tile(
            (
                (
                    self.relative_scale_cap * self.starting_sales
                    + np.log(
                        -np.expm1(
                            -self.relative_scale_cap * self.starting_sales * self.scalar_so_softplus_approaches_0
                        )
                        + EPSILON
                    )
                    / self.scalar_so_softplus_approaches_0
                )
                / self.lr_scale
            ),
            (self.num_starts, 1, 1),
        )
    else:
        self.lr_scale = self.hyperparameters.get_float(
            "lr_scale",
            default=10.0,
            min=1.0,
            max=1000.0,
            help="A factor to multiply the raw weights by so they get larger gradients.",
        )
        self.scalar_so_softplus_approaches_0 = self.hyperparameters.get_float(
            "scalar_so_softplus_approaches_0",
            default=1.0,
            min=1.0,
            max=100.0,
            help="The temperature to apply to softplus to max it better approximate the relu function.",
        )
        self.allow_slope = (
            self.hyperparameters.get_bool(
                "allow_slope",
                default=False,
                help="Whether to allow slope in baseline",
            )
            and input_shapes.dates_since_start is not ...
        )
    self.baseline_intercept = self.create_var(
        # Start a little lower to allow roicurves to start higher.
        "intercept",
        shape=shape,
        dtype=tf.float32,
        initializer=np.tile(
            (
                (
                    0.8 * self.starting_sales
                    + np.log(-np.expm1(-0.8 * self.starting_sales * self.scalar_so_softplus_approaches_0) + EPSILON)
                    / self.scalar_so_softplus_approaches_0
                )
                / self.lr_scale
            ).astype(np.float32),
            (self.num_starts, 1, 1),
        ),
        trainable=not self.use_perfect_adjustment,
    )
    if self.allow_slope:
        self.baseline_slope = self.create_var(
            "slope", shape=shape, dtype=tf.float32, trainable=not self.use_perfect_adjustment
        )
        self.base_under_0_lambda = self.hyperparameters.get_float(
            "base_under_0_lambda",
            default=1.0e-03,
            min=1.0e-08,
            max=1.0,
            help="The weight for the loss applied to the baseline being below -10 before the softplus.",
        )

`MonotonicPositiveUnboundedLayer`

Bases: Module, IMixedEffect

Source code in wt_ml/layers/monotonic_positive_unbounded.py

class MonotonicPositiveUnboundedLayer(Module, IMixedEffect):
    def __init__(
        self,
        encodings: Encodings,
        signal_type: str,
        hierarchy_categories: list[str | list[str]] | None = None,
        has_time: bool = False,
        has_signal: bool = False,
        hyperparameters: Hyperparams | None = None,
        non_pos: bool = False,
        non_neg: bool = False,
        non_pos_by_signal: list[bool] | None = None,
        non_neg_by_signal: list[bool] | None = None,
        maximum_strength: float | None = None,
        use_bias: bool | None = None,
        increase_lr: float | None = None,
        name: str | None = None,
    ):
        """Monotonic multiplicative factors affecting sales that also scales ROIs of investments.

        Args:
            hierarchy (pd.DataFrame): The hierarchy that the impact learns on.
            n_instances (int): Number of mixed effect signals. Axis index 2 of effect.
            has_time (bool, optional): Whether the hierarchy is on the time axis. Defaults to False.
            hyperparameters (Hyperparams, optional): Dictionary of hyperparameters for buidling this layer.
            name (str | None, optional): Name of the mixed effect captured. Module parent class sets
                                         to name of class if passes as None.
        """
        super().__init__(hyperparameters=hyperparameters, name=name)
        self.signal_type = signal_type
        self.encodings: Encodings = encodings
        self.has_time = has_time
        self.has_signal = has_signal
        self.hierarchy_categories = hierarchy_categories
        self.non_neg = non_neg
        self.non_pos = non_pos
        self.non_pos_by_signal = non_pos_by_signal
        self.non_neg_by_signal = non_neg_by_signal
        self.maximum_strength = maximum_strength
        self.use_bias = use_bias
        self.increase_lr = increase_lr

    def build(self, input_shapes: InputShapes):
        """Builds the sales_mult hierarchical variable.

        Args:
            input_shapes (InputShapes): The effect and hierarchy shapes.
        """
        self.n_instances = input_shapes.signals[2] if len(input_shapes.signals) > 2 else 1
        if self.has_signal:
            self.n_instances = 1
        shape = [self.n_instances]
        if self.use_bias is None:
            self.use_bias = not (self.has_time or self.has_signal)

        if not self.has_signal:
            signal_enc = self.encodings[self.signal_type]
            if TYPE_CHECKING:
                assert isinstance(signal_enc, Mapping)
            feature_names = tuple(get_lookups(signal_enc))  # pyright: ignore [reportArgumentType]
        else:
            feature_names = ("sales_mult",)

        self.sales_mult = self.hyperparameters.get_submodule(
            "effect_mult",
            module_type=HierchicalEmbedding,
            kwargs=dict(
                encodings=self.encodings,
                columns=self.hierarchy_categories,
                shape=shape,
                use_bias=self.use_bias,
                bias_initializer=0.01,
                increase_lr=self.increase_lr,
                feature_names=feature_names,
            ),
            help="The embedding for the multiplier to apply to each signal before exponentiation.",
        )
        self.use_softplus = self.hyperparameters.get_bool(
            "use_softplus",
            default=True,
            help="Whether to use softplus or exp for the standardization to positive multipliers.",
        )
        self.use_mono = self.hyperparameters.get_bool(
            "use_monotonic", default=False, help="Whether to use a monotonic concave layer to combine signals."
        )
        if self.use_mono:
            self.mono_effect = self.hyperparameters.get_submodule(
                "concave_effect_mult",
                module_type=MonoEffect,
                kwargs=dict(
                    encodings=self.encodings,
                    signal_type=self.signal_type,
                    n_instances=self.n_instances,
                    hierarchy_categories=self.hierarchy_categories,
                    has_signal=self.has_signal,
                ),
                help="Neural Network model that learns the monotonic effect.",
            )

    def __call__(
        self,
        batch: MonotonicPositiveUnboundedInput,
        training: bool = False,
        debug: bool = False,
        skip_metrics: bool = False,
    ) -> MonotonicPositiveUnboundedIntermediaries | DistIntermediaries:
        signals = batch.signals
        if self.use_mono:
            mono_effect_intermediaries = self.mono_effect(
                MonoEffectInput(
                    signals=batch.signals,
                    hierarchy=batch.hierarchy,
                ),
                training=training,
                debug=debug,
                skip_metrics=skip_metrics,
            )
            signals = mono_effect_intermediaries.signals
        # num_gran x num_inst if not has_time and not has_signal
        # num_gran x num_time x num_inst if has_time and not has_signal
        # num_gran x num_inst x 1 if not has_time and has_signal
        # num_gran x num_time x num_inst x 1 if has_time and has_signal
        baseline_sales_effect_raw = self.sales_mult(batch.hierarchy, training=training, skip_metrics=skip_metrics)
        if not self.has_time:
            baseline_sales_effect_raw = tf.expand_dims(baseline_sales_effect_raw, 1)
        if self.has_signal:
            baseline_sales_effect_raw = tf.squeeze(baseline_sales_effect_raw, -1)
        if self.signal_type == "distribution":
            baseline_sales_effect_raw = baseline_sales_effect_raw + tf.constant(3.0, dtype=tf.float32)
        # At this point baseline_sales_effect_raw is always broadcastable to # num_gran x num_time x num_inst
        # learns weightage of each effect signal and applies on it!
        # This is batch x time x n_instances
        # Shifted by -3 to make initialization more sane(before it was very large impacts in initial state).
        softplus_baseline_effect_raw = softplus(baseline_sales_effect_raw - tf.constant(3.0, dtype=tf.float32))
        if self.non_neg:
            baseline_sales_effect_raw = softplus_baseline_effect_raw
        if self.non_pos:
            baseline_sales_effect_raw = -softplus_baseline_effect_raw
        if self.non_pos_by_signal:
            non_pos_by_signal: tf.Tensor = tf.gather(
                tf.constant(self.non_pos_by_signal, dtype=tf.float32, name="non_pos_by_signal"), batch.signal_index
            )
            baseline_sales_effect_raw = baseline_sales_effect_raw * (
                tf.constant(1.0, dtype=tf.float32) - non_pos_by_signal
            ) - (non_pos_by_signal * softplus_baseline_effect_raw)
        if self.non_neg_by_signal:
            non_neg_by_signal = tf.gather(
                tf.constant(self.non_neg_by_signal, dtype=tf.float32, name="non_neg_by_signal"), batch.signal_index
            )
            baseline_sales_effect_raw = (
                baseline_sales_effect_raw * (1 - non_neg_by_signal) + non_neg_by_signal * softplus_baseline_effect_raw
            )
        if self.maximum_strength is not None:
            baseline_sales_effect_raw = (
                tf.math.tanh(baseline_sales_effect_raw / self.maximum_strength) * self.maximum_strength
            )
        baseline_sales_effect = tf.math.multiply(baseline_sales_effect_raw, signals, name="sales_effect")
        baseline_sales_effect = tf.grad_pass_through(lambda x: tf.maximum(-16.0, x, "baseline_sales_effect_clipped"))(
            baseline_sales_effect
        )
        if self.use_softplus:
            # Force 0 to map to 1 after softplus.
            baseline_sales_mult_by_signal = softplus(baseline_sales_effect + np.log(np.e - 1), name="mult_by_signal")
        else:
            baseline_sales_mult_by_signal = tf.math.exp(baseline_sales_effect, name="mult_by_signal")
        # batch x time
        baseline_sales_mult = tf.reduce_prod(baseline_sales_mult_by_signal, 2, name="impact")
        return MonotonicPositiveUnboundedIntermediaries(
            baseline_sales_effect_raw=baseline_sales_effect_raw if debug else None,
            baseline_sales_effect=baseline_sales_effect if debug else None,
            impact_by_signal=baseline_sales_mult_by_signal,
            baseline_sales_mult=baseline_sales_mult if debug else None,
            impact=baseline_sales_mult,
            signal_names=tf.gather(
                tf.convert_to_tensor(get_lookups(self.encodings[self.signal_type])), batch.signal_index
            ),
        )

`init(encodings, signal_type, hierarchy_categories=None, has_time=False, has_signal=False, hyperparameters=None, non_pos=False, non_neg=False, non_pos_by_signal=None, non_neg_by_signal=None, maximum_strength=None, use_bias=None, increase_lr=None, name=None)`

Monotonic multiplicative factors affecting sales that also scales ROIs of investments.

Parameters:

Name	Type	Description	Default
`hierarchy`	`DataFrame`	The hierarchy that the impact learns on.	required
`n_instances`	`int`	Number of mixed effect signals. Axis index 2 of effect.	required
`has_time`	`bool`	Whether the hierarchy is on the time axis. Defaults to False.	`False`
`hyperparameters`	`Hyperparams`	Dictionary of hyperparameters for buidling this layer.	`None`
`name`	`str \| None`	Name of the mixed effect captured. Module parent class sets to name of class if passes as None.	`None`

Source code in wt_ml/layers/monotonic_positive_unbounded.py

def __init__(
    self,
    encodings: Encodings,
    signal_type: str,
    hierarchy_categories: list[str | list[str]] | None = None,
    has_time: bool = False,
    has_signal: bool = False,
    hyperparameters: Hyperparams | None = None,
    non_pos: bool = False,
    non_neg: bool = False,
    non_pos_by_signal: list[bool] | None = None,
    non_neg_by_signal: list[bool] | None = None,
    maximum_strength: float | None = None,
    use_bias: bool | None = None,
    increase_lr: float | None = None,
    name: str | None = None,
):
    """Monotonic multiplicative factors affecting sales that also scales ROIs of investments.

    Args:
        hierarchy (pd.DataFrame): The hierarchy that the impact learns on.
        n_instances (int): Number of mixed effect signals. Axis index 2 of effect.
        has_time (bool, optional): Whether the hierarchy is on the time axis. Defaults to False.
        hyperparameters (Hyperparams, optional): Dictionary of hyperparameters for buidling this layer.
        name (str | None, optional): Name of the mixed effect captured. Module parent class sets
                                     to name of class if passes as None.
    """
    super().__init__(hyperparameters=hyperparameters, name=name)
    self.signal_type = signal_type
    self.encodings: Encodings = encodings
    self.has_time = has_time
    self.has_signal = has_signal
    self.hierarchy_categories = hierarchy_categories
    self.non_neg = non_neg
    self.non_pos = non_pos
    self.non_pos_by_signal = non_pos_by_signal
    self.non_neg_by_signal = non_neg_by_signal
    self.maximum_strength = maximum_strength
    self.use_bias = use_bias
    self.increase_lr = increase_lr

`build(input_shapes)`

Builds the sales_mult hierarchical variable.

Parameters:

Name	Type	Description	Default
`input_shapes`	`InputShapes`	The effect and hierarchy shapes.	required

Source code in wt_ml/layers/monotonic_positive_unbounded.py

def build(self, input_shapes: InputShapes):
    """Builds the sales_mult hierarchical variable.

    Args:
        input_shapes (InputShapes): The effect and hierarchy shapes.
    """
    self.n_instances = input_shapes.signals[2] if len(input_shapes.signals) > 2 else 1
    if self.has_signal:
        self.n_instances = 1
    shape = [self.n_instances]
    if self.use_bias is None:
        self.use_bias = not (self.has_time or self.has_signal)

    if not self.has_signal:
        signal_enc = self.encodings[self.signal_type]
        if TYPE_CHECKING:
            assert isinstance(signal_enc, Mapping)
        feature_names = tuple(get_lookups(signal_enc))  # pyright: ignore [reportArgumentType]
    else:
        feature_names = ("sales_mult",)

    self.sales_mult = self.hyperparameters.get_submodule(
        "effect_mult",
        module_type=HierchicalEmbedding,
        kwargs=dict(
            encodings=self.encodings,
            columns=self.hierarchy_categories,
            shape=shape,
            use_bias=self.use_bias,
            bias_initializer=0.01,
            increase_lr=self.increase_lr,
            feature_names=feature_names,
        ),
        help="The embedding for the multiplier to apply to each signal before exponentiation.",
    )
    self.use_softplus = self.hyperparameters.get_bool(
        "use_softplus",
        default=True,
        help="Whether to use softplus or exp for the standardization to positive multipliers.",
    )
    self.use_mono = self.hyperparameters.get_bool(
        "use_monotonic", default=False, help="Whether to use a monotonic concave layer to combine signals."
    )
    if self.use_mono:
        self.mono_effect = self.hyperparameters.get_submodule(
            "concave_effect_mult",
            module_type=MonoEffect,
            kwargs=dict(
                encodings=self.encodings,
                signal_type=self.signal_type,
                n_instances=self.n_instances,
                hierarchy_categories=self.hierarchy_categories,
                has_signal=self.has_signal,
            ),
            help="Neural Network model that learns the monotonic effect.",
        )

`Pricing`

Bases: Module, IMixedEffect

Source code in wt_ml/layers/pricing.py

class Pricing(Module, IMixedEffect):
    def __init__(
        self,
        encodings: dict[str, Any],
        hierarchy_categories: list[str | list[str]] | None = None,
        hyperparameters: Hyperparams | None = None,
        name: str | None = None,
    ):
        """Multiplicative price elasticity factor affecting baseline sales that also scales ROI of investments.

        Args:
            hierarchy (pd.DataFrame): The hierarchy used to build features learnt by the model to generate impacts.
            hyperparameters (Hyperparams | None, optional): An instance of `FileHyperparameterConfig` class
                                                            that stores all the hyperparameters of Pricing layer.
                                                            `Module` parent class sets these hyperparameters if None.
            name (str | None, optional): Name of the Pricing Layer.
                                         `Module` parent class sets name of the class if None.
        """
        super().__init__(hyperparameters=hyperparameters, name=name)
        self.encodings = encodings
        self.hierarchy_categories = hierarchy_categories

    def build(self, input_shapes: InputShapes):  # noqa: U100
        """Builds the `price_params_emb_layer` hierarchical variable
        for generating price elasticity curve for each granularity.
        Shape of the variable: (num_granular, 2). 2 denotes offset and exponent.

        Args:
            input_shapes (InputShapes): A tuple of tensor shapes of `price` and `hierarchy` passed to `__call__`.
        """
        n_instances = input_shapes.prices[2]
        self.pricing_params_emb_layer = self.hyperparameters.get_submodule(
            "pricing_params_emb_layer",
            module_type=HierchicalEmbedding,
            kwargs=dict(
                encodings=self.encodings,
                columns=self.hierarchy_categories,
                # 2 here denotes offset and exponent
                shape=[n_instances, 2],
                # This initializes to a state where any change in price reduces revenue while still making learning
                # other distributions easy. You can roughly think of this as price * (mult / (price + 1) ** 2)
                # Where mult is a specially calculated value so that the result with price = 1 is 1.
                # Order is offset (softplus), exponent (softplus + 1)
                bias_initializer=tf.constant_initializer(np.tile([[0.6, 0.6]], (n_instances, 1)).reshape(-1)),
                feature_names=[
                    [f"{signal}_offset", f"{signal}_exponent"] for signal in get_lookups(self.encodings["price_dev"])
                ],
            ),
            help="The embedding for the parameters for the pricing elasticity curve.",
        )

    def __call__(
        self, batch: PricingInput, training: bool = False, debug: bool = False, skip_metrics: bool = False
    ) -> PricingIntermediaries:
        """Pricing Layer Forward Propagation.
        We take in the mean normalized $price$ signal of shape `(num_time, num_granular, n_sim)`.
        Then we take the $offset$ and $exponent$ learnt by the model, each of shape `(num_granular,)`.
        The impact is calculated as follows:

        $volume = \\frac{normalization\\_mult} {(price + offset) ^ {exponent}}$

        $normalization\\_{mult} = (1 + offset) ^ {exponent}$

        $impact = volume * price$

        This $impact$ is of shape `(num_time, num_granular, n_sim)`

        > NOTE: normalization\\_mult is a factor to neglect the impact of prices which equal the average price.

        Args:
            price (TensorLike): mean normalized price_per_hl for each granularity each week.
                                Shape: (num_time, num_granular, n_sim)
            hierarchy (dict[str, TensorLike]): Hierarchical Placeholder for creating hierarchical variable.
            training (bool, optional): Whether this is a training or inference run. Defaults to False.

        Returns:
            PricingIntermediaries: Intermediate calculations like offset, asymptote, exponent, etc., and final impact.
        """
        params_emb = self.pricing_params_emb_layer(batch.hierarchy, training=training, skip_metrics=skip_metrics)
        offset_emb, exponent_emb = tf.unstack(params_emb, axis=2)
        offset = softplus(offset_emb * 10) + 0.01
        exponent = monotonic_sigmoid(exponent_emb / 4) * 4 + 1
        # Here we introduce a mult which is a normalization parameter
        # the equation for volume by price is: volume = mult / ((price + offset) ** exponent)
        # We want volume = 1 when price = 1 so we need to solve
        # 1 = mult / ((1 + offset) ** exponent)
        # mult = (1 + offset) ** exponent
        normalization_mult = (1 + offset) ** exponent
        volume = normalization_mult[:, None] / ((batch.prices + offset[:, None]) ** exponent[:, None])
        # Impact is the revenue of this volume at that price, so just the product.
        revenue = tf.math.multiply(
            volume, tf.where(batch.prices > 0, batch.prices, tf.math.reciprocal_no_nan(volume)), name="impact"
        )
        impact_by_signal = revenue
        # Reduce over signal axis
        impact = tf.math.reduce_prod(impact_by_signal, axis=2, name="impact")
        return PricingIntermediaries(
            offset_emb=offset_emb if debug else None,
            exponent_emb=exponent_emb if debug else None,
            offset=offset if debug else None,
            exponent=exponent if debug else None,
            volume=volume if debug else None,
            revenue=revenue if debug else None,
            impact_by_signal=impact_by_signal,
            impact=impact,
            signal_names=tf.gather(tf.convert_to_tensor(get_lookups(self.encodings["price_dev"])), batch.signal_index),
        )

`call(batch, training=False, debug=False, skip_metrics=False)`

Pricing Layer Forward Propagation. We take in the mean normalized \(price\) signal of shape (num_time, num_granular, n_sim). Then we take the \(offset\) and \(exponent\) learnt by the model, each of shape (num_granular,). The impact is calculated as follows:

\(volume = \frac{normalization\_mult} {(price + offset) ^ {exponent}}\)

\(normalization\_{mult} = (1 + offset) ^ {exponent}\)

\(impact = volume * price\)

This \(impact\) is of shape (num_time, num_granular, n_sim)

NOTE: normalization_mult is a factor to neglect the impact of prices which equal the average price.

Parameters:

Name	Type	Description	Default
`price`	`TensorLike`	mean normalized price_per_hl for each granularity each week. Shape: (num_time, num_granular, n_sim)	required
`hierarchy`	`dict[str, TensorLike]`	Hierarchical Placeholder for creating hierarchical variable.	required
`training`	`bool`	Whether this is a training or inference run. Defaults to False.	`False`

Returns:

Name	Type	Description
`PricingIntermediaries`	`PricingIntermediaries`	Intermediate calculations like offset, asymptote, exponent, etc., and final impact.

Source code in wt_ml/layers/pricing.py

def __call__(
    self, batch: PricingInput, training: bool = False, debug: bool = False, skip_metrics: bool = False
) -> PricingIntermediaries:
    """Pricing Layer Forward Propagation.
    We take in the mean normalized $price$ signal of shape `(num_time, num_granular, n_sim)`.
    Then we take the $offset$ and $exponent$ learnt by the model, each of shape `(num_granular,)`.
    The impact is calculated as follows:

    $volume = \\frac{normalization\\_mult} {(price + offset) ^ {exponent}}$

    $normalization\\_{mult} = (1 + offset) ^ {exponent}$

    $impact = volume * price$

    This $impact$ is of shape `(num_time, num_granular, n_sim)`

    > NOTE: normalization\\_mult is a factor to neglect the impact of prices which equal the average price.

    Args:
        price (TensorLike): mean normalized price_per_hl for each granularity each week.
                            Shape: (num_time, num_granular, n_sim)
        hierarchy (dict[str, TensorLike]): Hierarchical Placeholder for creating hierarchical variable.
        training (bool, optional): Whether this is a training or inference run. Defaults to False.

    Returns:
        PricingIntermediaries: Intermediate calculations like offset, asymptote, exponent, etc., and final impact.
    """
    params_emb = self.pricing_params_emb_layer(batch.hierarchy, training=training, skip_metrics=skip_metrics)
    offset_emb, exponent_emb = tf.unstack(params_emb, axis=2)
    offset = softplus(offset_emb * 10) + 0.01
    exponent = monotonic_sigmoid(exponent_emb / 4) * 4 + 1
    # Here we introduce a mult which is a normalization parameter
    # the equation for volume by price is: volume = mult / ((price + offset) ** exponent)
    # We want volume = 1 when price = 1 so we need to solve
    # 1 = mult / ((1 + offset) ** exponent)
    # mult = (1 + offset) ** exponent
    normalization_mult = (1 + offset) ** exponent
    volume = normalization_mult[:, None] / ((batch.prices + offset[:, None]) ** exponent[:, None])
    # Impact is the revenue of this volume at that price, so just the product.
    revenue = tf.math.multiply(
        volume, tf.where(batch.prices > 0, batch.prices, tf.math.reciprocal_no_nan(volume)), name="impact"
    )
    impact_by_signal = revenue
    # Reduce over signal axis
    impact = tf.math.reduce_prod(impact_by_signal, axis=2, name="impact")
    return PricingIntermediaries(
        offset_emb=offset_emb if debug else None,
        exponent_emb=exponent_emb if debug else None,
        offset=offset if debug else None,
        exponent=exponent if debug else None,
        volume=volume if debug else None,
        revenue=revenue if debug else None,
        impact_by_signal=impact_by_signal,
        impact=impact,
        signal_names=tf.gather(tf.convert_to_tensor(get_lookups(self.encodings["price_dev"])), batch.signal_index),
    )

`init(encodings, hierarchy_categories=None, hyperparameters=None, name=None)`

Multiplicative price elasticity factor affecting baseline sales that also scales ROI of investments.

Parameters:

Name	Type	Description	Default
`hierarchy`	`DataFrame`	The hierarchy used to build features learnt by the model to generate impacts.	required
`hyperparameters`	`Hyperparams \| None`	An instance of `FileHyperparameterConfig` class that stores all the hyperparameters of Pricing layer. `Module` parent class sets these hyperparameters if None.	`None`
`name`	`str \| None`	Name of the Pricing Layer. `Module` parent class sets name of the class if None.	`None`

Source code in wt_ml/layers/pricing.py

def __init__(
    self,
    encodings: dict[str, Any],
    hierarchy_categories: list[str | list[str]] | None = None,
    hyperparameters: Hyperparams | None = None,
    name: str | None = None,
):
    """Multiplicative price elasticity factor affecting baseline sales that also scales ROI of investments.

    Args:
        hierarchy (pd.DataFrame): The hierarchy used to build features learnt by the model to generate impacts.
        hyperparameters (Hyperparams | None, optional): An instance of `FileHyperparameterConfig` class
                                                        that stores all the hyperparameters of Pricing layer.
                                                        `Module` parent class sets these hyperparameters if None.
        name (str | None, optional): Name of the Pricing Layer.
                                     `Module` parent class sets name of the class if None.
    """
    super().__init__(hyperparameters=hyperparameters, name=name)
    self.encodings = encodings
    self.hierarchy_categories = hierarchy_categories

`build(input_shapes)`

Builds the price_params_emb_layer hierarchical variable for generating price elasticity curve for each granularity. Shape of the variable: (num_granular, 2). 2 denotes offset and exponent.

Parameters:

Name	Type	Description	Default
`input_shapes`	`InputShapes`	A tuple of tensor shapes of `price` and `hierarchy` passed to `__call__`.	required

Source code in wt_ml/layers/pricing.py

def build(self, input_shapes: InputShapes):  # noqa: U100
    """Builds the `price_params_emb_layer` hierarchical variable
    for generating price elasticity curve for each granularity.
    Shape of the variable: (num_granular, 2). 2 denotes offset and exponent.

    Args:
        input_shapes (InputShapes): A tuple of tensor shapes of `price` and `hierarchy` passed to `__call__`.
    """
    n_instances = input_shapes.prices[2]
    self.pricing_params_emb_layer = self.hyperparameters.get_submodule(
        "pricing_params_emb_layer",
        module_type=HierchicalEmbedding,
        kwargs=dict(
            encodings=self.encodings,
            columns=self.hierarchy_categories,
            # 2 here denotes offset and exponent
            shape=[n_instances, 2],
            # This initializes to a state where any change in price reduces revenue while still making learning
            # other distributions easy. You can roughly think of this as price * (mult / (price + 1) ** 2)
            # Where mult is a specially calculated value so that the result with price = 1 is 1.
            # Order is offset (softplus), exponent (softplus + 1)
            bias_initializer=tf.constant_initializer(np.tile([[0.6, 0.6]], (n_instances, 1)).reshape(-1)),
            feature_names=[
                [f"{signal}_offset", f"{signal}_exponent"] for signal in get_lookups(self.encodings["price_dev"])
            ],
        ),
        help="The embedding for the parameters for the pricing elasticity curve.",
    )

`apply_impacts(baseline, multiplicative_impacts, additive_impacts)`

Apply the impacts on top of the basline to get yhat.

\[ yhat = (baseline*\underset{}{\overset{m}{\prod }}({multiplicativeEffect}_{m})) + \underset{}{\overset{a}{\sum }}({additiveEffect}_{a}) \]

Parameters:

Name	Type	Description	Default
`baseline`	`Tensor`	The baseline impact. This will be the starting point where impacts are applied on.	required
`multiplicative_impacts`	`list[Tensor]`	These impacts scales the baseline multiplicatively (larger effect).	required
`additive_impacts`	`list[Tensor]`	These impacts, increase the baseline additively (smaller effect).	required

Returns:

Type	Description
`Tensor`	tf.Tensor: The yhat after applying all the impacts on the baseline.

Source code in wt_ml/layers/impact_utils.py

def apply_impacts(
    baseline: tf.Tensor,
    multiplicative_impacts: list[tf.Tensor] | tuple[tf.Tensor, ...],
    additive_impacts: list[tf.Tensor] | tuple[tf.Tensor, ...],
) -> tf.Tensor:
    """Apply the impacts on top of the basline to get yhat.

    $$
    yhat = (baseline*\\underset{}{\\overset{m}{\\prod }}({multiplicativeEffect}_{m}))
        + \\underset{}{\\overset{a}{\\sum }}({additiveEffect}_{a})
    $$

    Args:
        baseline (tf.Tensor): The baseline impact. This will be the starting point where impacts are applied on.
        multiplicative_impacts (list[tf.Tensor]): These impacts scales the baseline multiplicatively (larger effect).
        additive_impacts (list[tf.Tensor]): These impacts, increase the baseline additively (smaller effect).

    Returns:
        tf.Tensor: The yhat after applying all the impacts on the baseline.
    """  # noqa: E501
    yhat = baseline
    for impact in multiplicative_impacts:
        yhat = yhat * impact
    for impact in additive_impacts:
        yhat = yhat + impact
    return yhat

`apply_inverse_impacts(y, multiplicative_impacts, additive_impacts)`

Remove the impacts from y to get the baseline back. baseline = (yhat - Σ(additive effects)) / ∏(multiplicative effects)

Here its inverse impacts, i.e.,

inverse multiplicative effect = 1/multiplicative effect inverse additive effect = -1 * additive effect

Parameters:

Name	Type	Description	Default
`y`	`Tensor`	The `y` total impact.	required
`multiplicative_impacts`	`list[Tensor]`	Multiplicative impacts, are scaled down (larger effect).	required
`additive_impacts`	`list[Tensor]`	Additive impacts, are subtracted off the `y` (smaller effect).	required

Returns:

Type	Description
`Tensor`	tf.Tensor: The baseline after all the impacts are removed from `y`.

Source code in wt_ml/layers/impact_utils.py

def apply_inverse_impacts(
    y: tf.Tensor, multiplicative_impacts: list[tf.Tensor], additive_impacts: list[tf.Tensor]
) -> tf.Tensor:
    """Remove the impacts from `y` to get the baseline back.
    baseline = (yhat - Σ(additive effects)) / ∏(multiplicative effects)

    NOTE: Here its inverse impacts, i.e.,
        inverse multiplicative effect = 1/multiplicative effect
        inverse additive effect = -1 * additive effect

    Args:
        y (tf.Tensor): The `y` total impact.
        multiplicative_impacts (list[tf.Tensor]): Multiplicative impacts, are scaled down (larger effect).
        additive_impacts (list[tf.Tensor]): Additive impacts, are subtracted off the `y` (smaller effect).

    Returns:
        tf.Tensor: The baseline after all the impacts are removed from `y`.
    """
    baseline = y
    for impact in additive_impacts:
        baseline = baseline - impact
    for impact in multiplicative_impacts:
        baseline = tf.math.divide_no_nan(baseline, impact)
    return baseline

AllUniqueError

BetaGammaDecay

__call__(batch, training=False, debug=False, skip_metrics=False)

__init__(encodings, hierarchy_categories=None, hyperparameters=None, name=None)

build(input_shapes)

HierchicalEmbedding

__call__(hierarchy, training=False, debug=False, skip_metrics=False)

__init__(shape, encodings, columns=None, use_bias=True, dropped_columns=[], initializer=0.0, bias_initializer=0.0, hyperparameters=None, feature_names=None, name=None, increase_lr=None)

build(input_shapes)

get_dfs(dy_dweights=None, dy_dbias=None)

get_hierarchical_parameters(hierarchy)

get_reg_config(col_names)

get_reg_mult(col_names)

get_tensors(dy_dweights=None, dy_dbias=None)

stitched_cols(col_names)

LinearBaseline

__call__(batch, training=False, debug=False, skip_metrics=False)

__init__(starting_sales, num_starts, encodings, hyperparameters=None, name=None)

build(input_shapes)

MonotonicPositiveUnboundedLayer

__init__(encodings, signal_type, hierarchy_categories=None, has_time=False, has_signal=False, hyperparameters=None, non_pos=False, non_neg=False, non_pos_by_signal=None, non_neg_by_signal=None, maximum_strength=None, use_bias=None, increase_lr=None, name=None)

build(input_shapes)

Pricing

__call__(batch, training=False, debug=False, skip_metrics=False)

__init__(encodings, hierarchy_categories=None, hyperparameters=None, name=None)

build(input_shapes)

apply_impacts(baseline, multiplicative_impacts, additive_impacts)

apply_inverse_impacts(y, multiplicative_impacts, additive_impacts)

`AllUniqueError`

`BetaGammaDecay`

`call(batch, training=False, debug=False, skip_metrics=False)`

`init(encodings, hierarchy_categories=None, hyperparameters=None, name=None)`

`build(input_shapes)`

`HierchicalEmbedding`

`call(hierarchy, training=False, debug=False, skip_metrics=False)`

`init(shape, encodings, columns=None, use_bias=True, dropped_columns=[], initializer=0.0, bias_initializer=0.0, hyperparameters=None, feature_names=None, name=None, increase_lr=None)`

`build(input_shapes)`

`get_dfs(dy_dweights=None, dy_dbias=None)`

`get_hierarchical_parameters(hierarchy)`

`get_reg_config(col_names)`

`get_reg_mult(col_names)`

`get_tensors(dy_dweights=None, dy_dbias=None)`

`stitched_cols(col_names)`

`LinearBaseline`

`call(batch, training=False, debug=False, skip_metrics=False)`

`init(starting_sales, num_starts, encodings, hyperparameters=None, name=None)`

`build(input_shapes)`

`MonotonicPositiveUnboundedLayer`

`init(encodings, signal_type, hierarchy_categories=None, has_time=False, has_signal=False, hyperparameters=None, non_pos=False, non_neg=False, non_pos_by_signal=None, non_neg_by_signal=None, maximum_strength=None, use_bias=None, increase_lr=None, name=None)`

`build(input_shapes)`

`Pricing`

`call(batch, training=False, debug=False, skip_metrics=False)`

`init(encodings, hierarchy_categories=None, hyperparameters=None, name=None)`

`build(input_shapes)`

`apply_impacts(baseline, multiplicative_impacts, additive_impacts)`

`apply_inverse_impacts(y, multiplicative_impacts, additive_impacts)`