Tabulation

Model post-processing class for generating two outputs: 1. Weekly wholesaler, product-pack and vehicle level spends, incremental revenue along with the ROIs 2. Dataframes for visualizing diminishing return curves for each wholesaler, product-pack and vehicle

Source code in wt_ml/tabulation/tabulation.py
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
class Tabulation:
    """Model post-processing class for generating two outputs:
    1. Weekly wholesaler, product-pack and vehicle level spends, incremental revenue along with the ROIs
    2. Dataframes for visualizing diminishing return curves for each wholesaler, product-pack and vehicle
    """

    def __init__(self, dataset_factory: "DataFactoryType", encodings: dict, gt_model: "TrainableModule"):
        """Class initialization needed for model post-processing

        Args:
            dataset_factory (DataFactoryType): generator function for iterating over
                                                                          batches of data
            encodings (dict): mapping from names to indices for wholesalers, brands, products, vehicles and so on
            gt_model (EconomicNetwork): trained ground truth model to be used for inference
        """
        self.dataset_factory = dataset_factory
        self.encodings = encodings
        self.gt_model = gt_model

    def get_weekly_roi_facts(self):
        """Generates weekly wholesaler, product-pack and vehicle level spends, incremental revenues and ROIs

        Returns:
            pd.DataFrame: weekly roi facts for each wholesaler, product-pack and vehicle
        """
        # apply_sales_mask is set to True because the instantaneous impacts learnt by the model are masked out
        # in betagamma layer, so the impacts crucial for calculating ROIs are masked out.
        # So, investments need to masked too fro the ROI numbers to make sense.
        all_investments_df = get_all_investments_df(
            self.dataset_factory,
            self.encodings,
            denormalize=True,
            apply_sales_mask=True,
            index_types=GRANULARITY_OPTIONS,
        )
        self.all_investments = stack_df_and_rename_col(all_investments_df, "spend")
        all_inc_revs_df = get_all_inc_revenues_df(
            self.dataset_factory, self.encodings, self.gt_model, denormalize=True, total_impact_from_date=True
        )
        all_inc_revs_df.index = all_investments_df.index
        self.all_inc_revs = stack_df_and_rename_col(all_inc_revs_df, "inc_revenue")
        weekly_roi_facts = self.all_investments.merge(
            self.all_inc_revs,
            on=(self.all_investments.columns[:-1].tolist()),
        )
        weekly_roi_facts["roi"] = weekly_roi_facts["inc_revenue"] / (weekly_roi_facts["spend"] + EPSILON)
        return weekly_roi_facts

    def get_roicurves(self):
        """Generates dataframes for visualizing diminishing return curves for each wholesaler, product-pack and vehicle

        Returns:
            pd.DataFrame, pd.DataFrame: diminishing return curves for media vehicles
        """
        vehicles = get_lookups(self.encodings["vehicle"])
        vehicle_roicurves_df = get_all_roicurves_df(
            self.dataset_factory, self.encodings, self.gt_model, vehicles, denormalize=True
        )
        return vehicle_roicurves_df

__init__(dataset_factory, encodings, gt_model)

Class initialization needed for model post-processing

Parameters:

Name Type Description Default
dataset_factory DataFactoryType

generator function for iterating over batches of data

required
encodings dict

mapping from names to indices for wholesalers, brands, products, vehicles and so on

required
gt_model EconomicNetwork

trained ground truth model to be used for inference

required
Source code in wt_ml/tabulation/tabulation.py
25
26
27
28
29
30
31
32
33
34
35
36
def __init__(self, dataset_factory: "DataFactoryType", encodings: dict, gt_model: "TrainableModule"):
    """Class initialization needed for model post-processing

    Args:
        dataset_factory (DataFactoryType): generator function for iterating over
                                                                      batches of data
        encodings (dict): mapping from names to indices for wholesalers, brands, products, vehicles and so on
        gt_model (EconomicNetwork): trained ground truth model to be used for inference
    """
    self.dataset_factory = dataset_factory
    self.encodings = encodings
    self.gt_model = gt_model

get_roicurves()

Generates dataframes for visualizing diminishing return curves for each wholesaler, product-pack and vehicle

Returns:

Type Description

pd.DataFrame, pd.DataFrame: diminishing return curves for media vehicles

Source code in wt_ml/tabulation/tabulation.py
67
68
69
70
71
72
73
74
75
76
77
def get_roicurves(self):
    """Generates dataframes for visualizing diminishing return curves for each wholesaler, product-pack and vehicle

    Returns:
        pd.DataFrame, pd.DataFrame: diminishing return curves for media vehicles
    """
    vehicles = get_lookups(self.encodings["vehicle"])
    vehicle_roicurves_df = get_all_roicurves_df(
        self.dataset_factory, self.encodings, self.gt_model, vehicles, denormalize=True
    )
    return vehicle_roicurves_df

get_weekly_roi_facts()

Generates weekly wholesaler, product-pack and vehicle level spends, incremental revenues and ROIs

Returns:

Type Description

pd.DataFrame: weekly roi facts for each wholesaler, product-pack and vehicle

Source code in wt_ml/tabulation/tabulation.py
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
def get_weekly_roi_facts(self):
    """Generates weekly wholesaler, product-pack and vehicle level spends, incremental revenues and ROIs

    Returns:
        pd.DataFrame: weekly roi facts for each wholesaler, product-pack and vehicle
    """
    # apply_sales_mask is set to True because the instantaneous impacts learnt by the model are masked out
    # in betagamma layer, so the impacts crucial for calculating ROIs are masked out.
    # So, investments need to masked too fro the ROI numbers to make sense.
    all_investments_df = get_all_investments_df(
        self.dataset_factory,
        self.encodings,
        denormalize=True,
        apply_sales_mask=True,
        index_types=GRANULARITY_OPTIONS,
    )
    self.all_investments = stack_df_and_rename_col(all_investments_df, "spend")
    all_inc_revs_df = get_all_inc_revenues_df(
        self.dataset_factory, self.encodings, self.gt_model, denormalize=True, total_impact_from_date=True
    )
    all_inc_revs_df.index = all_investments_df.index
    self.all_inc_revs = stack_df_and_rename_col(all_inc_revs_df, "inc_revenue")
    weekly_roi_facts = self.all_investments.merge(
        self.all_inc_revs,
        on=(self.all_investments.columns[:-1].tolist()),
    )
    weekly_roi_facts["roi"] = weekly_roi_facts["inc_revenue"] / (weekly_roi_facts["spend"] + EPSILON)
    return weekly_roi_facts