Overfit lambda evaluator - WatchTower Documentation

`NFOutput`

Output class to store the results of the overfit lambda feedback process.

Source code in wt_ml/negative_feedback/overfit_lambda_evaluator.py

class NFOutput:
    """Output class to store the results of the overfit lambda feedback process."""

    def __init__(self, additive_impacts: pd.DataFrame, current_impact_factors: pd.DataFrame):
        self.current_impact_factors = current_impact_factors
        self.additive_impacts = additive_impacts
        self.current_hyperparams = {}
        self.new_params = {}
        self.change_ratios_additive = {}
        self.change_ratios_frac = {}
        self.change_ratios = {}

    def add_outputs(self, key, current_hyperparam, change_ratio, cr_additive, cr_frac, new_value):
        """Updates the output class attribute dictionaries with respective key-value pairs."""
        self.current_hyperparams[key] = current_hyperparam
        self.change_ratios_additive[key] = cr_additive
        self.change_ratios_frac[key] = cr_frac
        self.change_ratios[key] = change_ratio
        self.new_params[key] = new_value

    def to_dict(self):
        """Converts the output class attributes to an ordered dictionary."""
        return OrderedDict(
            [
                ("impact_percent", self.current_impact_factors.to_dict()),
                ("additive_impacts", self.additive_impacts.to_dict()),
                ("current_hyperparam", self.current_hyperparams),
                ("new_params", self.new_params),
                ("change_ratios_additive", self.change_ratios_additive),
                ("change_ratios_frac", self.change_ratios_frac),
                ("change_ratios", self.change_ratios),
            ]
        )

`add_outputs(key, current_hyperparam, change_ratio, cr_additive, cr_frac, new_value)`

Updates the output class attribute dictionaries with respective key-value pairs.

Source code in wt_ml/negative_feedback/overfit_lambda_evaluator.py

def add_outputs(self, key, current_hyperparam, change_ratio, cr_additive, cr_frac, new_value):
    """Updates the output class attribute dictionaries with respective key-value pairs."""
    self.current_hyperparams[key] = current_hyperparam
    self.change_ratios_additive[key] = cr_additive
    self.change_ratios_frac[key] = cr_frac
    self.change_ratios[key] = change_ratio
    self.new_params[key] = new_value

`to_dict()`

Converts the output class attributes to an ordered dictionary.

Source code in wt_ml/negative_feedback/overfit_lambda_evaluator.py

def to_dict(self):
    """Converts the output class attributes to an ordered dictionary."""
    return OrderedDict(
        [
            ("impact_percent", self.current_impact_factors.to_dict()),
            ("additive_impacts", self.additive_impacts.to_dict()),
            ("current_hyperparam", self.current_hyperparams),
            ("new_params", self.new_params),
            ("change_ratios_additive", self.change_ratios_additive),
            ("change_ratios_frac", self.change_ratios_frac),
            ("change_ratios", self.change_ratios),
        ]
    )

`get_new_impacts_and_fracs(model, dataset)`

Calculates the impacts and fractions of impacts for each factor in the model. Returns the factors, their additive impacts, and their impact fractions.Filters out the factors that are not relevant for the overfit lambda feedback.

Source code in wt_ml/negative_feedback/overfit_lambda_evaluator.py

def get_new_impacts_and_fracs(model: ModType, dataset: EconomicDataset) -> tuple[list[str], pd.DataFrame, pd.DataFrame]:
    """Calculates the impacts and fractions of impacts for each factor in the model.
    Returns the factors, their additive impacts, and their impact fractions.Filters out the factors that are not
    relevant for the overfit lambda feedback.
    """
    full_intermediaries = utils.concat_intermediaries([model(batch) for batch in dataset])
    full_impact_dfs = OutputImpact(
        encodings=dataset.encodings, intermediaries=full_intermediaries["temporalnet"], with_groups=True
    ).df
    new_colvals = full_impact_dfs.columns.get_level_values("group").map(
        lambda x: NAME_MAPPING.get(x.lower(), x.lower())
    )
    keys = new_colvals.unique().str.lower()
    keys = [key for key in keys if key not in COLS_TO_SKIP]
    raw_impacts = full_impact_dfs.abs().sum(0).groupby(new_colvals).sum()
    filtered_impacts = raw_impacts.loc[~raw_impacts.index.isin(COLS_TO_SKIP)]
    additive_impacts = filtered_impacts.groupby(filtered_impacts.index).sum()
    impact_fracs = additive_impacts.transform(lambda x: x / x.sum()).sort_values(ascending=False)
    return keys, additive_impacts, impact_fracs

`overfit_lambda_feedback(model, dataset, feedback_dir, iter_num, use_abs_target=True)`

Evaluates the model's overfit lambda hyperparameters based on the pre-defined desired states(additive impacts or their fractions) and updates the hyperparameters accordingly. Appends the feedback data to a CSV file and saves the country-level impact plots. Returns the updates to the hyperparameters and the feedback data.

Parameters:

Name	Type	Description	Default
`model`	`ModType`	Trained model	required
`dataset`	`EconomicDataset`	Dataset used for training to calculate the impacts	required
`feedback_dir`	`Path`	Dir to save the feedback data	required
`iter_num`	`int`	NF iteration number	required
`use_abs_target`	`bool`	If True, updates hyperparameters based on desired additive impacts, else based on desired impact fractions. Defaults to True.	`True`

Returns: tuple[Mapping[Sequence[str], Any], Any]: Updates to hyperparameters and feedback data

Source code in wt_ml/negative_feedback/overfit_lambda_evaluator.py

def overfit_lambda_feedback(
    model: ModType,
    dataset: EconomicDataset,
    feedback_dir: Path,
    iter_num: int,
    use_abs_target=True,
) -> tuple[Mapping[Sequence[str], Any], Any]:
    """Evaluates the model's overfit lambda hyperparameters based on the pre-defined desired states(additive impacts or
    their fractions) and updates the hyperparameters accordingly. Appends the feedback data to a CSV file and saves the
    country-level impact plots. Returns the updates to the hyperparameters and the feedback data.

    Args:
        model (ModType): Trained model
        dataset (EconomicDataset): Dataset used for training to calculate the impacts
        feedback_dir (Path): Dir to save the feedback data
        iter_num (int): NF iteration number
        use_abs_target (bool, optional): If True, updates hyperparameters based on desired additive impacts,
                                        else based on desired impact fractions. Defaults to True.
    Returns:
        tuple[Mapping[Sequence[str], Any], Any]: Updates to hyperparameters and feedback data
    """
    nf_keys = {k: v for k, v in model.hyperparameter_tree["impacts"].items() if k.startswith("less_overfit_")}
    keys, additive_impacts, impact_fracs = get_new_impacts_and_fracs(model, dataset)

    new_hyperparams_to_edit = {}
    nf_output = NFOutput(additive_impacts=additive_impacts, current_impact_factors=impact_fracs)
    for key in keys:
        current_keyname = f"less_overfit_{key}_lambda"
        assert current_keyname in nf_keys
        current_hyperparam = nf_keys[current_keyname]
        cr_additive = np.sqrt(DESIRED_ADDITIVE[key] / additive_impacts[key])
        cr_frac = np.sqrt(DESIRED_FRAC[current_keyname] / impact_fracs[key])
        if use_abs_target:
            change_ratio = cr_additive
        else:
            change_ratio = cr_frac
        # Update the hyperparameter
        new_value = current_hyperparam / change_ratio
        new_hyperparams_to_edit[("impacts", current_keyname)] = float(new_value)
        # Update the feedback data object with factor's attributes
        nf_output.add_outputs(key, current_hyperparam, change_ratio, cr_additive, cr_frac, new_value)
    # Save the feedback data
    _append_to_csv(feedback_dir / NF_DATAFILE, nf_output.to_dict())
    logger.info(f"Overfit lambda feedback data for iteration {iter_num} saved.")
    plot_impacts(model, dataset, f"nf_impact{iter_num}")
    return new_hyperparams_to_edit, nf_output.to_dict()