NFOutput

Output class to store the results of the overfit lambda feedback process.

Source code in wt_ml/negative_feedback/overfit_lambda_evaluator.py
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
class NFOutput:
    """Output class to store the results of the overfit lambda feedback process."""

    def __init__(self, additive_impacts: pd.DataFrame, current_impact_factors: pd.DataFrame):
        self.current_impact_factors = current_impact_factors
        self.additive_impacts = additive_impacts
        self.current_hyperparams = {}
        self.new_params = {}
        self.change_ratios_additive = {}
        self.change_ratios_frac = {}
        self.change_ratios = {}

    def add_outputs(self, key, current_hyperparam, change_ratio, cr_additive, cr_frac, new_value):
        """Updates the output class attribute dictionaries with respective key-value pairs."""
        self.current_hyperparams[key] = current_hyperparam
        self.change_ratios_additive[key] = cr_additive
        self.change_ratios_frac[key] = cr_frac
        self.change_ratios[key] = change_ratio
        self.new_params[key] = new_value

    def to_dict(self):
        """Converts the output class attributes to an ordered dictionary."""
        return OrderedDict(
            [
                ("impact_percent", self.current_impact_factors.to_dict()),
                ("additive_impacts", self.additive_impacts.to_dict()),
                ("current_hyperparam", self.current_hyperparams),
                ("new_params", self.new_params),
                ("change_ratios_additive", self.change_ratios_additive),
                ("change_ratios_frac", self.change_ratios_frac),
                ("change_ratios", self.change_ratios),
            ]
        )

add_outputs(key, current_hyperparam, change_ratio, cr_additive, cr_frac, new_value)

Updates the output class attribute dictionaries with respective key-value pairs.

Source code in wt_ml/negative_feedback/overfit_lambda_evaluator.py
117
118
119
120
121
122
123
def add_outputs(self, key, current_hyperparam, change_ratio, cr_additive, cr_frac, new_value):
    """Updates the output class attribute dictionaries with respective key-value pairs."""
    self.current_hyperparams[key] = current_hyperparam
    self.change_ratios_additive[key] = cr_additive
    self.change_ratios_frac[key] = cr_frac
    self.change_ratios[key] = change_ratio
    self.new_params[key] = new_value

to_dict()

Converts the output class attributes to an ordered dictionary.

Source code in wt_ml/negative_feedback/overfit_lambda_evaluator.py
125
126
127
128
129
130
131
132
133
134
135
136
137
def to_dict(self):
    """Converts the output class attributes to an ordered dictionary."""
    return OrderedDict(
        [
            ("impact_percent", self.current_impact_factors.to_dict()),
            ("additive_impacts", self.additive_impacts.to_dict()),
            ("current_hyperparam", self.current_hyperparams),
            ("new_params", self.new_params),
            ("change_ratios_additive", self.change_ratios_additive),
            ("change_ratios_frac", self.change_ratios_frac),
            ("change_ratios", self.change_ratios),
        ]
    )

get_new_impacts_and_fracs(model, dataset)

Calculates the impacts and fractions of impacts for each factor in the model. Returns the factors, their additive impacts, and their impact fractions.Filters out the factors that are not relevant for the overfit lambda feedback.

Source code in wt_ml/negative_feedback/overfit_lambda_evaluator.py
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
def get_new_impacts_and_fracs(model: ModType, dataset: EconomicDataset) -> tuple[list[str], pd.DataFrame, pd.DataFrame]:
    """Calculates the impacts and fractions of impacts for each factor in the model.
    Returns the factors, their additive impacts, and their impact fractions.Filters out the factors that are not
    relevant for the overfit lambda feedback.
    """
    full_intermediaries = utils.concat_intermediaries([model(batch) for batch in dataset])
    full_impact_dfs = OutputImpact(
        encodings=dataset.encodings, intermediaries=full_intermediaries["temporalnet"], with_groups=True
    ).df
    new_colvals = full_impact_dfs.columns.get_level_values("group").map(
        lambda x: NAME_MAPPING.get(x.lower(), x.lower())
    )
    keys = new_colvals.unique().str.lower()
    keys = [key for key in keys if key not in COLS_TO_SKIP]
    raw_impacts = full_impact_dfs.abs().sum(0).groupby(new_colvals).sum()
    filtered_impacts = raw_impacts.loc[~raw_impacts.index.isin(COLS_TO_SKIP)]
    additive_impacts = filtered_impacts.groupby(filtered_impacts.index).sum()
    impact_fracs = additive_impacts.transform(lambda x: x / x.sum()).sort_values(ascending=False)
    return keys, additive_impacts, impact_fracs

overfit_lambda_feedback(model, dataset, feedback_dir, iter_num, use_abs_target=True)

Evaluates the model's overfit lambda hyperparameters based on the pre-defined desired states(additive impacts or their fractions) and updates the hyperparameters accordingly. Appends the feedback data to a CSV file and saves the country-level impact plots. Returns the updates to the hyperparameters and the feedback data.

Parameters:

Name Type Description Default
model ModType

Trained model

required
dataset EconomicDataset

Dataset used for training to calculate the impacts

required
feedback_dir Path

Dir to save the feedback data

required
iter_num int

NF iteration number

required
use_abs_target bool

If True, updates hyperparameters based on desired additive impacts, else based on desired impact fractions. Defaults to True.

True

Returns: tuple[Mapping[Sequence[str], Any], Any]: Updates to hyperparameters and feedback data

Source code in wt_ml/negative_feedback/overfit_lambda_evaluator.py
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
def overfit_lambda_feedback(
    model: ModType,
    dataset: EconomicDataset,
    feedback_dir: Path,
    iter_num: int,
    use_abs_target=True,
) -> tuple[Mapping[Sequence[str], Any], Any]:
    """Evaluates the model's overfit lambda hyperparameters based on the pre-defined desired states(additive impacts or
    their fractions) and updates the hyperparameters accordingly. Appends the feedback data to a CSV file and saves the
    country-level impact plots. Returns the updates to the hyperparameters and the feedback data.

    Args:
        model (ModType): Trained model
        dataset (EconomicDataset): Dataset used for training to calculate the impacts
        feedback_dir (Path): Dir to save the feedback data
        iter_num (int): NF iteration number
        use_abs_target (bool, optional): If True, updates hyperparameters based on desired additive impacts,
                                        else based on desired impact fractions. Defaults to True.
    Returns:
        tuple[Mapping[Sequence[str], Any], Any]: Updates to hyperparameters and feedback data
    """
    nf_keys = {k: v for k, v in model.hyperparameter_tree["impacts"].items() if k.startswith("less_overfit_")}
    keys, additive_impacts, impact_fracs = get_new_impacts_and_fracs(model, dataset)

    new_hyperparams_to_edit = {}
    nf_output = NFOutput(additive_impacts=additive_impacts, current_impact_factors=impact_fracs)
    for key in keys:
        current_keyname = f"less_overfit_{key}_lambda"
        assert current_keyname in nf_keys
        current_hyperparam = nf_keys[current_keyname]
        cr_additive = np.sqrt(DESIRED_ADDITIVE[key] / additive_impacts[key])
        cr_frac = np.sqrt(DESIRED_FRAC[current_keyname] / impact_fracs[key])
        if use_abs_target:
            change_ratio = cr_additive
        else:
            change_ratio = cr_frac
        # Update the hyperparameter
        new_value = current_hyperparam / change_ratio
        new_hyperparams_to_edit[("impacts", current_keyname)] = float(new_value)
        # Update the feedback data object with factor's attributes
        nf_output.add_outputs(key, current_hyperparam, change_ratio, cr_additive, cr_frac, new_value)
    # Save the feedback data
    _append_to_csv(feedback_dir / NF_DATAFILE, nf_output.to_dict())
    logger.info(f"Overfit lambda feedback data for iteration {iter_num} saved.")
    plot_impacts(model, dataset, f"nf_impact{iter_num}")
    return new_hyperparams_to_edit, nf_output.to_dict()