Separable - WatchTower Documentation

`SeparableNetwork`

Bases: ModelBasedNetwork
A Model based on separable convex programming methods. https://www.semanticscholar.org/paper/PROBLEM-DECOMPOSITION-IN-BLOCK-SEPARABLE-CONVEX-%3A-Rockafellar/faa78bbd402ff79b1149948c39d14b806f9e7caf
Source code in wt_ml/optimizer/separable/separable_opt.py
class SeparableNetwork(ModelBasedNetwork):
    """A Model based on separable convex programming methods.
    https://www.semanticscholar.org/paper/PROBLEM-DECOMPOSITION-IN-BLOCK-SEPARABLE-CONVEX-%3A-Rockafellar/faa78bbd402ff79b1149948c39d14b806f9e7caf
    """  # noqa: E501

    AXIS_MAPPING: dict[AxisType, int] = {
        "wholesaler": 0,
        "brand": 1,
        "vehicle": 2,
    }

    def __init__(
        self,
        network: EconomicNetwork,
        constraint_specs: list[Constraint],
        dataset_factory: DatasetFactory,
        optimization_target: OptimizationTarget = "net_revenue",
        exact_spend: float | None = None,
        use_full_date: bool = False,
        ignore_maco: bool = True,
        hyperparameters: Hyperparams | None = None,
        name: str | None = None,
    ):
        super().__init__(
            network=network,
            constraint_specs=constraint_specs,
            optimization_target=optimization_target,
            exact_spend=exact_spend,
            use_full_date=use_full_date,
            use_full_vehicle=False,
            ignore_maco=ignore_maco,
            hyperparameters=hyperparameters,
            name=name,
        )
        if any(con.signal_type != "spend" and not con.negate for con in self.constraint_specs):
            logger.warning(
                "Cannot guarantee finding an optimal solution in the case of maximum constraints on outputs."
            )
        self.counts = [0 for _ in self.constraint_specs]
        num_wholesaler = len(self.trained_net.data_encodings["wholesaler"])
        num_brand = len(self.trained_net.data_encodings["brand"])
        self.appeared = tf.zeros([1, 1, 1], dtype=tf.float32)
        for i, batch in enumerate(dataset_factory):
            if i == 0:
                num_vehicles = (
                    batch.vehicle_spends.shape[2] if hasattr(batch, "vehicle_spends") else batch.spends.shape[2]
                )
                self.appeared = tf.zeros([num_wholesaler, num_brand, num_vehicles], dtype=tf.float32)
            indices = self.get_indices(batch, False)
            self.appeared = tf.tensor_scatter_nd_add(
                self.appeared,
                indices,
                tf.ones([*indices.shape[:-1], *self.appeared.shape[len(indices.shape) :]], dtype=tf.float32),
            )
        self.appeared = tf.math.minimum(1.0, self.appeared)
        self.constraint_params = []
        self.counts = []
        self.mask_constraints = []
        self.correct_sums = []
        self.volume_scale_factor = self.hyperparameters.get_float(
            "volume_scale_factor",
            default=300.0,
            min=1.0,
            max=1e04,
            help="The scaling factor for volume constraints to make them a similar magnitude to revenue/spend.",
        )
        for constraint in self.constraint_specs:
            gathered = self.appeared
            for axis_name, indices in constraint.gathers:
                axis = self.AXIS_MAPPING[axis_name]
                gathered = tf.gather(gathered, indices, axis=axis)
            count = tf.reduce_sum(gathered).numpy()
            if count > 0:
                multiplier = -1 if constraint.negate else 1
                broadcasted = (
                    constraint.broadcast(self.appeared.shape, axis_mapping=self.AXIS_MAPPING).numpy() * self.appeared
                )
                correct_sum = (
                    constraint.max_value
                    * multiplier
                    * (self.volume_scale_factor if "volume" in constraint.signal_type else 1.0)
                )
                self.correct_sums.append(correct_sum)
                offset = correct_sum / count * broadcasted
                self.mask_constraints.append(broadcasted)
                self.counts.append(count)
                self.constraint_params.append(
                    (
                        constraint.signal_type,
                        constraint.negate,
                        offset,
                    )
                )
            else:
                logger.warning(f"{constraint} does not exist in the data")
        if len(self.constraint_params) == 0:
            raise ValueError("Must supply at least one active constraint for separable optimization.")

    def build(self, input_shapes):
        super().build(input_shapes)
        shape = [*self.all_vehicle_spends.shape[:2], self.all_vehicle_spends.shape[3]]
        self.x_v = self.create_var(
            "x_v",
            shape=shape,
            dtype=tf.float32,
            initializer=tf.reduce_sum(self.vehicle_spends, axis=2),
            trainable=False,
        )

        self.w_vs = [
            self.create_var(
                f"w_v_{i}",
                shape=shape,
                dtype=tf.float32,
                initializer=-off,
                trainable=False,
            )
            for i, (signal_type, _, off) in enumerate(self.constraint_params)
        ]
        self.y_vs = [
            self.create_var(
                f"y_v_{i}",
                shape=shape,
                dtype=tf.float32,
                initializer=tf.math.abs(off),
                trainable=False,
            )
            for i, (_, _, off) in enumerate(self.constraint_params)
        ]
        self.etas = [
            self.create_var(
                f"eta_{i}",
                shape=shape,
                dtype=tf.float32,
                initializer=0.0,
                trainable=False,
            )
            for i, _ in enumerate(self.constraint_params)
        ]
        self.r = self.hyperparameters.get_float(
            "r", default=1.0, min=1e-02, max=1e06, help="The strength of the constraints within each meta step."
        )
        self.c = self.hyperparameters.get_float(
            "c", default=1.0, min=1e-04, max=1e02, help="The size of each meta step of spends."
        )

    def get_grads(self, batch: EconomicModelInput | ExtendedROICurveInput, all_grads: bool = False):
        self.clear()
        self.trained_net.clear()
        if isinstance(batch, EconomicModelInput):
            gran_indices = tf.stack([batch.wholesaler_index, batch.brand_index], axis=1, name="granularity_indices")
        elif isinstance(batch, ExtendedROICurveInput):
            gran_indices = tf.stack(
                [batch.hierarchy["wholesaler"][:, 0], batch.hierarchy["brand"][:, 0]],
                axis=1,
                name="granularity_indices",
            )
        else:
            raise ValueError("Batch was not a valid type for a SeparableNetwork.")
        r = tf.constant(self.r, dtype=tf.float32, name="r")
        spends_v = self.do_gathers(self.x_v, batch, include_time=False, name="gathered_x_v")
        y_vs = [
            self.do_gathers(y_v, batch, include_time=False, name=f"gathered_y_v_{i}") for i, y_v in enumerate(self.y_vs)
        ]
        w_vs = [
            self.do_gathers(w_v, batch, include_time=False, name=f"gathered_w_v_{i}") for i, w_v in enumerate(self.w_vs)
        ]
        with tf.GradientTape(watch_accessed_variables=False, persistent=all_grads) as tape:
            tape.watch(self.all_vehicle_spends)
            intermediaries = self(batch, training=True)
            objectives = self.calculate_objectives(intermediaries, batch)
            constraint_values = []
            total_constraint_penalty = tf.constant(0, dtype=tf.float32)
            for i, ((signal_type, negate, _), y_v, w_v, eta, mask_cont) in enumerate(
                zip(self.constraint_params, y_vs, w_vs, self.etas, self.mask_constraints)
            ):
                cont_mask = self.do_gathers(
                    tf.constant(mask_cont, dtype=tf.float32, name=f"cont_mask_{i}"), batch, include_time=False
                )
                signal = objectives[signal_type] * cont_mask
                if "volume" in signal_type:
                    signal = (
                        tf.constant(self.volume_scale_factor, dtype=tf.float32, name="volume_scale_factor") * signal
                    )
                if negate:
                    constraint_term = -signal - w_v
                else:
                    constraint_term = signal - w_v
                adjusted_constraint = y_v + r * constraint_term
                constraint_penalty = tf.where(
                    adjusted_constraint <= 0,
                    -tf.math.square(y_v) / 2 / r,
                    y_v * constraint_term + r * tf.math.square(constraint_term) / 2,
                )
                eta.scatter_nd_update(gran_indices, tf.maximum(tf.constant(0, dtype=tf.float32), adjusted_constraint))
                constraint_values.append(tf.math.reduce_sum(constraint_term))
                total_constraint_penalty = total_constraint_penalty + tf.math.reduce_sum(constraint_penalty)
            if self.optimization_target == "neg_spend":
                target = -tf.math.reduce_sum(objectives["spend"])
            else:
                target = tf.math.reduce_sum(objectives[self.optimization_target])
            changes = tf.math.reduce_sum(
                tf.math.squared_difference(objectives["spend"], spends_v, name="l2_spend_changes")
            )
            changes_weight = tf.constant(1 / 2 / self.c, dtype=tf.float32, name="changes_weight")
            objective = changes_weight * changes - target + total_constraint_penalty
        worst_constraint = tf.math.reduce_max(tf.stack(constraint_values, axis=-1), name="worst_constraint")
        grads = {"objective": tape.gradient(objective, self.all_vehicle_spends)}
        return (
            grads,
            {
                "total_spend": tf.math.reduce_sum(self.vehicle_spends),
                "loss": objective,
                "worst_constraint": -worst_constraint,
            }
            | {f"y_v_{i}_norm": tf.norm(y_v, axis=None) for i, y_v in enumerate(self.y_vs)}
            | {f"w_v_{i}_norm": tf.norm(w_v, axis=None) for i, w_v in enumerate(self.w_vs)}
            | {k: tf.reduce_sum(v) for k, v in objectives.items()},
        )

    @tf.function
    def calculate_grad_magnitudes(self, batch: EconomicModelInput | ExtendedROICurveInput):
        grads, _ = self.get_grads(batch, all_grads=True)
        return {k: tf.norm(to_dense(grad)) for k, grad in grads.items()}

    @tf.function
    def train_step(self, batch: EconomicModelInput | ExtendedROICurveInput, return_grads: bool = False):
        grads, targets = self.get_grads(batch)
        self.optimizer.apply_gradients(((grads["objective"], self.incremental_vehicle_spends),))
        if not self.exact_spend:
            self.all_vehicle_spends.assign(tf.math.maximum(0.0, self.incremental_vehicle_spends))
        step = self._step_var.assign_add(1)
        if return_grads:
            gradients_tracker = {
                self.all_vehicle_spends.name: to_dense(grads["objective"]),
            }
            return (targets, targets, step, gradients_tracker)
        else:
            return (targets, targets, step)

    @tf.function
    def take_meta_step(self):
        self.x_v.assign(tf.math.reduce_sum(self.vehicle_spends, axis=2))
        mean_etas = []
        for i, (w_v, y_v, eta, count, mask_cont, correct_sum) in enumerate(
            zip(self.w_vs, self.y_vs, self.etas, self.counts, self.mask_constraints, self.correct_sums)
        ):
            cont_mask = tf.constant(mask_cont, dtype=tf.float32, name=f"cont_mask_{i}")
            eta_masked = eta * cont_mask
            mean_eta = (
                tf.reduce_sum(eta_masked, name="eta_sum")
                / tf.constant(count, dtype=tf.float32, name="counts")
                * cont_mask
            )
            w_v_inc = tf.constant(1 / 2 / self.r, dtype=tf.float32, name="half_r_recip") * (eta_masked - mean_eta)
            new_w_v = w_v + w_v_inc
            w_v.assign(tf.constant(correct_sum, dtype=tf.float32) / tf.math.reduce_sum(new_w_v) * new_w_v)
            y_v.assign(tf.constant(1 / 2, dtype=tf.float32, name="half") * (eta_masked + mean_eta))
            eta.assign(tf.zeros_like(eta))
        return mean_etas

    def train(
        self,
        dataset_factory: Callable[[], Iterable],
        num_steps: int,
        epochs: int = 1,
        verbosity: bool | int = 1,
        print_keys: str | Sequence[str] = "all",
        callbacks: Sequence[Callback] = (),
        track_grads: bool | int = False,
        smoothing: float = 0.0,
        min_interval: float = 0.25,
        unit_scale: bool = True,
        position: int | None = None,
        tqdm_args: dict[str, Any] = {},
        epochs_per_step: int = 8,
        **kwargs,
    ):
        if not any(isinstance(callback, MetaStepCallback) for callback in callbacks):
            meta_step_callback = MetaStepCallback(epochs_per_step, verbosity=1)
            callbacks = (meta_step_callback, *callbacks)
        return super().train(
            dataset_factory=dataset_factory,
            num_steps=num_steps,
            epochs=epochs,
            verbosity=verbosity,
            print_keys=print_keys,
            callbacks=callbacks,
            track_grads=track_grads,
            smoothing=smoothing,
            min_interval=min_interval,
            unit_scale=unit_scale,
            position=position,
            tqdm_args=tqdm_args,
            **kwargs,
        )