Linear baseline - WatchTower Documentation

`LinearBaseline`

Bases: Module

Source code in wt_ml/layers/linear_baseline.py

class LinearBaseline(Module):
    def __init__(
        self,
        starting_sales: np.ndarray,
        num_starts: int,
        encodings: dict[str, int],
        hyperparameters: Hyperparams | None = None,
        name: str | None = None,
    ):
        """Class initialization to create linear regression lines for calculating baseline, for each granularity.

        Args:
            starting_sales (np.ndarray): Sales at the start of each baseline. shape = num_starts x num_granularity.
            num_starts (int): No. of starting points for each granularity.
            hyperparameters (Hyperparams, optional): All hyperparameters.
            name (str | None, optional): Name of the layer. Defaults to None.
        """
        super().__init__(hyperparameters=hyperparameters, name=name)
        self.starting_sales = starting_sales
        self.num_starts = num_starts
        self.encodings = encodings

    def build(self, input_shapes):
        """Build the layer parameters needed for calculating linear baseline.

        Args:
            input_shapes (Tuple[tf.Tensor, ...]): Tuple of tensor shapes of `*args`(without the defaults)
                                                  passed to `__call__()`.
        """
        self.num_starts = self.num_starts if input_shapes.sales_num_restarts is not ... else 1
        shape = [self.num_starts, len(self.encodings["wholesaler"]), len(self.encodings["brand"])]
        self.use_perfect_adjustment = self.hyperparameters.get_bool(
            "use_perfect_adjustment",
            default=False,
            help="Instead of using gradient descent change directly to the optimal values.",
        )
        if self.use_perfect_adjustment:
            self.lr_scale = 1.0
            if self.num_starts > 1:
                raise ValueError("Perfect adjustments does not support restarts at this time.")
            self.scalar_so_softplus_approaches_0 = 1.0
            self.allow_slope = False
            self.perfection_speed = self.hyperparameters.get_float(
                "perfection_speed",
                default=0.99,
                min=0.01,
                max=1.0,
                help="How close to make the step to the optimal value.",
            )
            self.relative_scale_cap = self.hyperparameters.get_float(
                "relative_scale_cap",
                default=10.0,
                min=1.0,
                max=1000.0,
                help="The maximum ratio it can have to the provided initial value.",
            )
            self.upper_bound = np.tile(
                (
                    (
                        self.relative_scale_cap * self.starting_sales
                        + np.log(
                            -np.expm1(
                                -self.relative_scale_cap * self.starting_sales * self.scalar_so_softplus_approaches_0
                            )
                            + EPSILON
                        )
                        / self.scalar_so_softplus_approaches_0
                    )
                    / self.lr_scale
                ),
                (self.num_starts, 1, 1),
            )
        else:
            self.lr_scale = self.hyperparameters.get_float(
                "lr_scale",
                default=10.0,
                min=1.0,
                max=1000.0,
                help="A factor to multiply the raw weights by so they get larger gradients.",
            )
            self.scalar_so_softplus_approaches_0 = self.hyperparameters.get_float(
                "scalar_so_softplus_approaches_0",
                default=1.0,
                min=1.0,
                max=100.0,
                help="The temperature to apply to softplus to max it better approximate the relu function.",
            )
            self.allow_slope = (
                self.hyperparameters.get_bool(
                    "allow_slope",
                    default=False,
                    help="Whether to allow slope in baseline",
                )
                and input_shapes.dates_since_start is not ...
            )
        self.baseline_intercept = self.create_var(
            # Start a little lower to allow roicurves to start higher.
            "intercept",
            shape=shape,
            dtype=tf.float32,
            initializer=np.tile(
                (
                    (
                        0.8 * self.starting_sales
                        + np.log(-np.expm1(-0.8 * self.starting_sales * self.scalar_so_softplus_approaches_0) + EPSILON)
                        / self.scalar_so_softplus_approaches_0
                    )
                    / self.lr_scale
                ).astype(np.float32),
                (self.num_starts, 1, 1),
            ),
            trainable=not self.use_perfect_adjustment,
        )
        if self.allow_slope:
            self.baseline_slope = self.create_var(
                "slope", shape=shape, dtype=tf.float32, trainable=not self.use_perfect_adjustment
            )
            self.base_under_0_lambda = self.hyperparameters.get_float(
                "base_under_0_lambda",
                default=1.0e-03,
                min=1.0e-08,
                max=1.0,
                help="The weight for the loss applied to the baseline being below -10 before the softplus.",
            )

    def do_perfect_adjustment(self, batch: EconomicModelInput, intermediaries: "EconomicIntermediaries"):
        if not self.use_perfect_adjustment:
            logger.warning("Cannot do perfect adjustment if the hyperparameter is not enabled.")
            return
        y_mask = intermediaries.mask if intermediaries.mask is not None else tf.ones_like(intermediaries.y_smooth)
        multiplicative_impact = prod_n(intermediaries.impacts.multiplicative_impacts)
        additive_impact = tf.math.add_n(intermediaries.impacts.additive_impacts)
        # I thought this needed to be divided by 2, not sure why it doesn't.
        post_softplus_unclipped = tf.einsum(
            "bt,bt,bt->b", y_mask, multiplicative_impact, intermediaries.y_smooth - additive_impact
        ) / (tf.einsum("bt,bt,bt->b", y_mask, multiplicative_impact, multiplicative_impact) + EPSILON)
        post_softplus = tf.math.maximum(post_softplus_unclipped, EPSILON)
        pre_softplus = (
            post_softplus
            + tf.math.log(-tf.math.expm1(-self.scalar_so_softplus_approaches_0 * post_softplus))
            / self.scalar_so_softplus_approaches_0
        ) / self.lr_scale
        gather_indices = tf.squeeze(self.get_indices(batch.wholesaler_index, batch.brand_index, None), 1)
        existing_values = self.baseline_intercept.gather_nd(gather_indices)
        max_values = tf.gather_nd(tf.constant(self.upper_bound, dtype=tf.float32), gather_indices)
        pre_softplus = tf.math.minimum(pre_softplus, max_values)
        self.baseline_intercept.scatter_nd_update(
            gather_indices, self.perfection_speed * pre_softplus + (1 - self.perfection_speed) * existing_values
        )

    def get_indices(self, wholesaler_index: tf.Tensor, brand_index: tf.Tensor, sales_num_restarts: tf.Tensor | None):
        wholesaler_indices = tf.tile(
            tf.cast(wholesaler_index[:, None], dtype=tf.int32),
            [1, tf.shape(sales_num_restarts)[1] if sales_num_restarts is not None else 1],
        )
        brand_indices = tf.tile(
            tf.cast(brand_index[:, None], dtype=tf.int32),
            [1, tf.shape(sales_num_restarts)[1] if sales_num_restarts is not None else 1],
        )
        return tf.cast(
            tf.stack(
                [
                    sales_num_restarts if sales_num_restarts is not None else tf.zeros_like(brand_indices),
                    wholesaler_indices,
                    brand_indices,
                ],
                axis=2,
            ),
            tf.int64,
        )

    def __call__(
        self,
        batch: LinearBaselineInput,
        training=False,  # noqa: U100
        debug=False,
        skip_metrics=False,  # noqa: U100
    ) -> LinearBaselineIntermediaries:
        """Calcuate baseline using slope-intercept form (y=mx+c).

        Args:
            dates_since_start (TensorLike): Number of timestamps since the last restart.
                                            shape = num_time x num_granular.
            sales_num_restarts (TensorLike): Number of restarts that occurred before this point.
                                             shape = num_time x num_granular.
            hierarchy (dict[str, TensorLike]): The lookup tables for categorical values.
            mask (TensorLike): Filter for 0 sales or unrealistic sales.
            training (bool, optional): Whether training the layer parameters or not.
                                       Defaults to False.

        Returns:
            LinearBaselineIntermediaries: Intermediate calculations for baseline like slope, intercept, etc.
        """
        indices = self.get_indices(batch.hierarchy["wholesaler"], batch.hierarchy["brand"], batch.sales_num_restarts)
        # For each granularity, gathering the initial baseline intercept across the time axis
        # The purpose is to use the same intercept for a given baseline across all the data points
        # The shape is converted to `num_time x num_granular` from `num_starts x num_granular`
        # The indices to gather for each baseline are stored in `sales_num_restarts`
        # NOTE: Issue in M1 Macbook version of tensorflow causes gather_nd to break when operating
        #       on a variable. tf.convert_to_tensor solves this. Open github issue can be found here:
        #       https://github.com/tensorflow/tensorflow/issues/57549
        broadcasted_intercept = tf.gather_nd(tf.convert_to_tensor(self.baseline_intercept), indices) * tf.constant(
            self.lr_scale, dtype=tf.float32
        )
        if self.allow_slope:
            broadcasted_slope = tf.gather_nd(tf.convert_to_tensor(self.baseline_slope), indices)
            # to convert, run: dates_since_start = tf.cast(
            # dates_since_start, dtype=tf.float32, name="dates_since_start")
            slope_impact = (
                broadcasted_slope
                * batch.dates_since_start
                * 2.0
                / (tf.math.reduce_max(batch.dates_since_start, axis=1, keepdims=True, name="max_dates") + EPSILON)
            )

            baseline_raw = slope_impact + broadcasted_intercept
        else:
            broadcasted_slope = tf.zeros_like(broadcasted_intercept)
            slope_impact = tf.zeros_like(broadcasted_intercept)
            baseline_raw = broadcasted_intercept
        if self.allow_slope and not skip_metrics:
            mask_weekly = (
                tf.cast(batch.mask, dtype=tf.float32) if batch.mask is not None else tf.ones_like(baseline_raw)
            )
            baseline_raw_masked_for_min = baseline_raw * mask_weekly + (1.0 - mask_weekly) * LARGE_EPSILON
            min_base = tf.reduce_min(baseline_raw_masked_for_min, axis=0, keepdims=True)
            # if our minimum value is very negative, then get a loss
            # this is just a softplus with a scale of .1, per product. if the product has negative min_base,
            # (more neg than -0.5), then this will start to be more and more positive
            min_base_with_0_soft = softplus(-10.0 - min_base, AUX_SCALE, name="min_base_with_0_soft")
            # take the MSE of that signal. above
            base_under_0 = tf.reduce_sum(tf.square(min_base_with_0_soft), name="base_under_0")
            self.add_loss("base_under_0", base_under_0, "aux", self.base_under_0_lambda)
        baseline = softplus(baseline_raw, scale=1 / self.scalar_so_softplus_approaches_0)
        return LinearBaselineIntermediaries(
            intercept=broadcasted_intercept if debug else None,
            slope=broadcasted_slope if debug else None,
            slope_impact=slope_impact if debug else None,
            baseline_raw=baseline_raw if debug else None,
            baseline=baseline,
        )

`call(batch, training=False, debug=False, skip_metrics=False)`

Calcuate baseline using slope-intercept form (y=mx+c).

Parameters:

Name	Type	Description	Default
`dates_since_start`	`TensorLike`	Number of timestamps since the last restart. shape = num_time x num_granular.	required
`sales_num_restarts`	`TensorLike`	Number of restarts that occurred before this point. shape = num_time x num_granular.	required
`hierarchy`	`dict[str, TensorLike]`	The lookup tables for categorical values.	required
`mask`	`TensorLike`	Filter for 0 sales or unrealistic sales.	required
`training`	`bool`	Whether training the layer parameters or not. Defaults to False.	`False`

Returns:

Name	Type	Description
`LinearBaselineIntermediaries`	`LinearBaselineIntermediaries`	Intermediate calculations for baseline like slope, intercept, etc.

Source code in wt_ml/layers/linear_baseline.py

def __call__(
    self,
    batch: LinearBaselineInput,
    training=False,  # noqa: U100
    debug=False,
    skip_metrics=False,  # noqa: U100
) -> LinearBaselineIntermediaries:
    """Calcuate baseline using slope-intercept form (y=mx+c).

    Args:
        dates_since_start (TensorLike): Number of timestamps since the last restart.
                                        shape = num_time x num_granular.
        sales_num_restarts (TensorLike): Number of restarts that occurred before this point.
                                         shape = num_time x num_granular.
        hierarchy (dict[str, TensorLike]): The lookup tables for categorical values.
        mask (TensorLike): Filter for 0 sales or unrealistic sales.
        training (bool, optional): Whether training the layer parameters or not.
                                   Defaults to False.

    Returns:
        LinearBaselineIntermediaries: Intermediate calculations for baseline like slope, intercept, etc.
    """
    indices = self.get_indices(batch.hierarchy["wholesaler"], batch.hierarchy["brand"], batch.sales_num_restarts)
    # For each granularity, gathering the initial baseline intercept across the time axis
    # The purpose is to use the same intercept for a given baseline across all the data points
    # The shape is converted to `num_time x num_granular` from `num_starts x num_granular`
    # The indices to gather for each baseline are stored in `sales_num_restarts`
    # NOTE: Issue in M1 Macbook version of tensorflow causes gather_nd to break when operating
    #       on a variable. tf.convert_to_tensor solves this. Open github issue can be found here:
    #       https://github.com/tensorflow/tensorflow/issues/57549
    broadcasted_intercept = tf.gather_nd(tf.convert_to_tensor(self.baseline_intercept), indices) * tf.constant(
        self.lr_scale, dtype=tf.float32
    )
    if self.allow_slope:
        broadcasted_slope = tf.gather_nd(tf.convert_to_tensor(self.baseline_slope), indices)
        # to convert, run: dates_since_start = tf.cast(
        # dates_since_start, dtype=tf.float32, name="dates_since_start")
        slope_impact = (
            broadcasted_slope
            * batch.dates_since_start
            * 2.0
            / (tf.math.reduce_max(batch.dates_since_start, axis=1, keepdims=True, name="max_dates") + EPSILON)
        )

        baseline_raw = slope_impact + broadcasted_intercept
    else:
        broadcasted_slope = tf.zeros_like(broadcasted_intercept)
        slope_impact = tf.zeros_like(broadcasted_intercept)
        baseline_raw = broadcasted_intercept
    if self.allow_slope and not skip_metrics:
        mask_weekly = (
            tf.cast(batch.mask, dtype=tf.float32) if batch.mask is not None else tf.ones_like(baseline_raw)
        )
        baseline_raw_masked_for_min = baseline_raw * mask_weekly + (1.0 - mask_weekly) * LARGE_EPSILON
        min_base = tf.reduce_min(baseline_raw_masked_for_min, axis=0, keepdims=True)
        # if our minimum value is very negative, then get a loss
        # this is just a softplus with a scale of .1, per product. if the product has negative min_base,
        # (more neg than -0.5), then this will start to be more and more positive
        min_base_with_0_soft = softplus(-10.0 - min_base, AUX_SCALE, name="min_base_with_0_soft")
        # take the MSE of that signal. above
        base_under_0 = tf.reduce_sum(tf.square(min_base_with_0_soft), name="base_under_0")
        self.add_loss("base_under_0", base_under_0, "aux", self.base_under_0_lambda)
    baseline = softplus(baseline_raw, scale=1 / self.scalar_so_softplus_approaches_0)
    return LinearBaselineIntermediaries(
        intercept=broadcasted_intercept if debug else None,
        slope=broadcasted_slope if debug else None,
        slope_impact=slope_impact if debug else None,
        baseline_raw=baseline_raw if debug else None,
        baseline=baseline,
    )

`init(starting_sales, num_starts, encodings, hyperparameters=None, name=None)`

Class initialization to create linear regression lines for calculating baseline, for each granularity.

Parameters:

Name	Type	Description	Default
`starting_sales`	`ndarray`	Sales at the start of each baseline. shape = num_starts x num_granularity.	required
`num_starts`	`int`	No. of starting points for each granularity.	required
`hyperparameters`	`Hyperparams`	All hyperparameters.	`None`
`name`	`str \| None`	Name of the layer. Defaults to None.	`None`

Source code in wt_ml/layers/linear_baseline.py

def __init__(
    self,
    starting_sales: np.ndarray,
    num_starts: int,
    encodings: dict[str, int],
    hyperparameters: Hyperparams | None = None,
    name: str | None = None,
):
    """Class initialization to create linear regression lines for calculating baseline, for each granularity.

    Args:
        starting_sales (np.ndarray): Sales at the start of each baseline. shape = num_starts x num_granularity.
        num_starts (int): No. of starting points for each granularity.
        hyperparameters (Hyperparams, optional): All hyperparameters.
        name (str | None, optional): Name of the layer. Defaults to None.
    """
    super().__init__(hyperparameters=hyperparameters, name=name)
    self.starting_sales = starting_sales
    self.num_starts = num_starts
    self.encodings = encodings

`build(input_shapes)`

Build the layer parameters needed for calculating linear baseline.

Parameters:

Name	Type	Description	Default
`input_shapes`	`Tuple[Tensor, ...]`	Tuple of tensor shapes of `*args`(without the defaults) passed to `__call__()`.	required

Source code in wt_ml/layers/linear_baseline.py

def build(self, input_shapes):
    """Build the layer parameters needed for calculating linear baseline.

    Args:
        input_shapes (Tuple[tf.Tensor, ...]): Tuple of tensor shapes of `*args`(without the defaults)
                                              passed to `__call__()`.
    """
    self.num_starts = self.num_starts if input_shapes.sales_num_restarts is not ... else 1
    shape = [self.num_starts, len(self.encodings["wholesaler"]), len(self.encodings["brand"])]
    self.use_perfect_adjustment = self.hyperparameters.get_bool(
        "use_perfect_adjustment",
        default=False,
        help="Instead of using gradient descent change directly to the optimal values.",
    )
    if self.use_perfect_adjustment:
        self.lr_scale = 1.0
        if self.num_starts > 1:
            raise ValueError("Perfect adjustments does not support restarts at this time.")
        self.scalar_so_softplus_approaches_0 = 1.0
        self.allow_slope = False
        self.perfection_speed = self.hyperparameters.get_float(
            "perfection_speed",
            default=0.99,
            min=0.01,
            max=1.0,
            help="How close to make the step to the optimal value.",
        )
        self.relative_scale_cap = self.hyperparameters.get_float(
            "relative_scale_cap",
            default=10.0,
            min=1.0,
            max=1000.0,
            help="The maximum ratio it can have to the provided initial value.",
        )
        self.upper_bound = np.tile(
            (
                (
                    self.relative_scale_cap * self.starting_sales
                    + np.log(
                        -np.expm1(
                            -self.relative_scale_cap * self.starting_sales * self.scalar_so_softplus_approaches_0
                        )
                        + EPSILON
                    )
                    / self.scalar_so_softplus_approaches_0
                )
                / self.lr_scale
            ),
            (self.num_starts, 1, 1),
        )
    else:
        self.lr_scale = self.hyperparameters.get_float(
            "lr_scale",
            default=10.0,
            min=1.0,
            max=1000.0,
            help="A factor to multiply the raw weights by so they get larger gradients.",
        )
        self.scalar_so_softplus_approaches_0 = self.hyperparameters.get_float(
            "scalar_so_softplus_approaches_0",
            default=1.0,
            min=1.0,
            max=100.0,
            help="The temperature to apply to softplus to max it better approximate the relu function.",
        )
        self.allow_slope = (
            self.hyperparameters.get_bool(
                "allow_slope",
                default=False,
                help="Whether to allow slope in baseline",
            )
            and input_shapes.dates_since_start is not ...
        )
    self.baseline_intercept = self.create_var(
        # Start a little lower to allow roicurves to start higher.
        "intercept",
        shape=shape,
        dtype=tf.float32,
        initializer=np.tile(
            (
                (
                    0.8 * self.starting_sales
                    + np.log(-np.expm1(-0.8 * self.starting_sales * self.scalar_so_softplus_approaches_0) + EPSILON)
                    / self.scalar_so_softplus_approaches_0
                )
                / self.lr_scale
            ).astype(np.float32),
            (self.num_starts, 1, 1),
        ),
        trainable=not self.use_perfect_adjustment,
    )
    if self.allow_slope:
        self.baseline_slope = self.create_var(
            "slope", shape=shape, dtype=tf.float32, trainable=not self.use_perfect_adjustment
        )
        self.base_under_0_lambda = self.hyperparameters.get_float(
            "base_under_0_lambda",
            default=1.0e-03,
            min=1.0e-08,
            max=1.0,
            help="The weight for the loss applied to the baseline being below -10 before the softplus.",
        )

LinearBaseline

__call__(batch, training=False, debug=False, skip_metrics=False)

__init__(starting_sales, num_starts, encodings, hyperparameters=None, name=None)

build(input_shapes)

`LinearBaseline`

`call(batch, training=False, debug=False, skip_metrics=False)`

`init(starting_sales, num_starts, encodings, hyperparameters=None, name=None)`

`build(input_shapes)`