Constrained pl - WatchTower Documentation

`ConstrainedPLNetwork`

Bases: ModelBasedNetwork

Source code in wt_ml/optimizer/constrained_pl/constrained_pl.py

class ConstrainedPLNetwork(ModelBasedNetwork):
    def build(self, input_shapes):
        super().build(input_shapes)
        self.barrier_strength = self.hyperparameters.get_float(
            "barrier_strength",
            default=1.0,
            min=1.0,
            max=1e08,
            help="The strength of the barriers on the borders of the constraints.",
        )

    def get_constraints(self) -> tf.Tensor:
        # We do a custom gradient here to prevent memory usage explosion with the number of constraints.
        # The gradient is very structured and we can take advantage of that to massively reduce memory requirements.
        @tf.custom_gradient
        def inner(all_vehicle_spends):
            gathers = []
            for constraint in self.constraint_specs:
                gathered = all_vehicle_spends
                for axis_name, indices in constraint.gathers:
                    axis = self.AXIS_MAPPING[axis_name]
                    gathered = tf.gather(gathered, tf.constant(indices, dtype=tf.int32), axis=axis)
                constrained = tf.reduce_sum(gathered) - constraint.max_value
                if not constraint.negate:
                    constrained = constrained * -1.0
                gathers.append(constrained)

            def grad(upstream):
                zeros = tf.zeros_like(all_vehicle_spends)
                upstreams = tf.unstack(upstream, axis=0, num=len(self.constraint_specs))
                for scalar, constraint in zip(upstreams, self.constraint_specs):
                    zeros = zeros + constraint.broadcast(
                        self.all_vehicle_spends.shape, scalar=(1.0 if constraint.negate else -1.0) * scalar
                    )
                return zeros

            return (
                tf.stack(
                    gathers,
                    axis=0,
                    name="constraints",
                ),
                grad,
            )

        return inner(self.vehicle_spends)

    def get_grads(self, batch: EconomicModelInput | ExtendedROICurveInput, all_grads: bool = False):
        """
        Algorithm is a first-order version of
        https://en.wikipedia.org/wiki/Interior-point_method#Primal-dual_interior-point_method_for_nonlinear_optimization
        where instead of using newton's method to find a zero of the gradient we directly do gradient descent.
        """
        self.clear()
        self.trained_net.clear()
        mu = self.optimizer.learning_rate * tf.constant(self.barrier_strength, dtype=tf.float32)
        with tf.GradientTape(watch_accessed_variables=False, persistent=all_grads) as tape:
            tape.watch(self.all_vehicle_spends)
            intermediaries = self(batch, training=True)
            targets = self.calculate_objectives(intermediaries, batch)
            if self.optimization_target == "neg_spend":
                objective = -tf.math.reduce_sum(targets["spend"])
            else:
                objective = tf.math.reduce_sum(targets[self.optimization_target])
            for key, value in targets.items():
                self.add_metric(key, tf.math.reduce_sum(value, axis=1))
            self.add_metric("objective", tf.math.reduce_sum(targets[self.optimization_target], axis=1))
            barrier = tf.constant(0, dtype=tf.float32)
            worst_constraint = tf.constant(0, dtype=tf.float32)
            if len(self.constraint_specs) > 0:
                constraint_tensors = self.get_constraints()
                # Negative constraint values means the constraint is violated so this helps monitor that.
                worst_constraint = tf.math.reduce_min(constraint_tensors)
                barrier = -tf.math.reduce_sum(
                    tf.where(constraint_tensors > 0, mu, tf.math.maximum(1e-03, mu))
                    * tf.math.log(
                        tf.where(
                            constraint_tensors > 0,
                            constraint_tensors,
                            constraint_tensors - tf.stop_gradient(constraint_tensors) + 1e-05,
                        )
                    )
                )
            guarded_objective = objective + barrier
        grads = {"guarded_objective": tape.gradient(guarded_objective, self.all_vehicle_spends)}
        if all_grads:
            grads["objective"] = tape.gradient(objective, self.all_vehicle_spends)
            grads["barrier"] = tape.gradient(barrier, self.all_vehicle_spends)
        return (
            grads,
            {
                "total_spend": tf.math.reduce_sum(self.vehicle_spends),
                "loss": guarded_objective,
                "barrier": barrier,
                "worst_constraint": worst_constraint,
            }
            | {k: tf.reduce_sum(v) for k, v in targets.items()},
        )

    @tf.function
    def calculate_grad_magnitudes(self, batch: EconomicModelInput | ExtendedROICurveInput):
        grads, _ = self.get_grads(batch, all_grads=True)
        return {k: tf.norm(to_dense(grad)) for k, grad in grads.items()}

    @tf.function
    def train_step(self, batch: EconomicModelInput | ExtendedROICurveInput, return_grads: bool = False):
        grads, targets = self.get_grads(batch)
        grad = tf.where(
            (grads["guarded_objective"] < 0) | (self.all_vehicle_spends > 0),
            grads["guarded_objective"],
            tf.zeros_like(self.all_vehicle_spends),
        )
        self.optimizer.apply_gradients(((grad, self.all_vehicle_spends),))
        if not self.exact_spend:
            self.all_vehicle_spends.assign(tf.math.maximum(0.0, self.all_vehicle_spends))
        step = self._step_var.assign_add(1)
        if return_grads:
            gradients_tracker = {
                self.all_vehicle_spends.name: to_dense(grads["guarded_objective"]),
            }
            return (targets, targets, step, gradients_tracker)
        else:
            return (targets, targets, step)

`get_grads(batch, all_grads=False)`

Algorithm is a first-order version of https://en.wikipedia.org/wiki/Interior-point_method#Primal-dual_interior-point_method_for_nonlinear_optimization where instead of using newton's method to find a zero of the gradient we directly do gradient descent.

Source code in wt_ml/optimizer/constrained_pl/constrained_pl.py

def get_grads(self, batch: EconomicModelInput | ExtendedROICurveInput, all_grads: bool = False):
    """
    Algorithm is a first-order version of
    https://en.wikipedia.org/wiki/Interior-point_method#Primal-dual_interior-point_method_for_nonlinear_optimization
    where instead of using newton's method to find a zero of the gradient we directly do gradient descent.
    """
    self.clear()
    self.trained_net.clear()
    mu = self.optimizer.learning_rate * tf.constant(self.barrier_strength, dtype=tf.float32)
    with tf.GradientTape(watch_accessed_variables=False, persistent=all_grads) as tape:
        tape.watch(self.all_vehicle_spends)
        intermediaries = self(batch, training=True)
        targets = self.calculate_objectives(intermediaries, batch)
        if self.optimization_target == "neg_spend":
            objective = -tf.math.reduce_sum(targets["spend"])
        else:
            objective = tf.math.reduce_sum(targets[self.optimization_target])
        for key, value in targets.items():
            self.add_metric(key, tf.math.reduce_sum(value, axis=1))
        self.add_metric("objective", tf.math.reduce_sum(targets[self.optimization_target], axis=1))
        barrier = tf.constant(0, dtype=tf.float32)
        worst_constraint = tf.constant(0, dtype=tf.float32)
        if len(self.constraint_specs) > 0:
            constraint_tensors = self.get_constraints()
            # Negative constraint values means the constraint is violated so this helps monitor that.
            worst_constraint = tf.math.reduce_min(constraint_tensors)
            barrier = -tf.math.reduce_sum(
                tf.where(constraint_tensors > 0, mu, tf.math.maximum(1e-03, mu))
                * tf.math.log(
                    tf.where(
                        constraint_tensors > 0,
                        constraint_tensors,
                        constraint_tensors - tf.stop_gradient(constraint_tensors) + 1e-05,
                    )
                )
            )
        guarded_objective = objective + barrier
    grads = {"guarded_objective": tape.gradient(guarded_objective, self.all_vehicle_spends)}
    if all_grads:
        grads["objective"] = tape.gradient(objective, self.all_vehicle_spends)
        grads["barrier"] = tape.gradient(barrier, self.all_vehicle_spends)
    return (
        grads,
        {
            "total_spend": tf.math.reduce_sum(self.vehicle_spends),
            "loss": guarded_objective,
            "barrier": barrier,
            "worst_constraint": worst_constraint,
        }
        | {k: tf.reduce_sum(v) for k, v in targets.items()},
    )