Tabulation

`Tabulation`

Model post-processing class for generating two outputs: 1. Weekly wholesaler, product-pack and vehicle level spends, incremental revenue along with the ROIs 2. Dataframes for visualizing diminishing return curves for each wholesaler, product-pack and vehicle

Source code in wt_ml/tabulation/tabulation.py

class Tabulation:
    """Model post-processing class for generating two outputs:
    1. Weekly wholesaler, product-pack and vehicle level spends, incremental revenue along with the ROIs
    2. Dataframes for visualizing diminishing return curves for each wholesaler, product-pack and vehicle
    """

    def __init__(self, dataset_factory: "DataFactoryType", encodings: dict, gt_model: "TrainableModule"):
        """Class initialization needed for model post-processing

        Args:
            dataset_factory (DataFactoryType): generator function for iterating over
                                                                          batches of data
            encodings (dict): mapping from names to indices for wholesalers, brands, products, vehicles and so on
            gt_model (EconomicNetwork): trained ground truth model to be used for inference
        """
        self.dataset_factory = dataset_factory
        self.encodings = encodings
        self.gt_model = gt_model

    def get_weekly_roi_facts(self):
        """Generates weekly wholesaler, product-pack and vehicle level spends, incremental revenues and ROIs

        Returns:
            pd.DataFrame: weekly roi facts for each wholesaler, product-pack and vehicle
        """
        # apply_sales_mask is set to True because the instantaneous impacts learnt by the model are masked out
        # in betagamma layer, so the impacts crucial for calculating ROIs are masked out.
        # So, investments need to masked too fro the ROI numbers to make sense.
        all_investments_df = get_all_investments_df(
            self.dataset_factory,
            self.encodings,
            denormalize=True,
            apply_sales_mask=True,
            index_types=GRANULARITY_OPTIONS,
        )
        self.all_investments = stack_df_and_rename_col(all_investments_df, "spend")
        all_inc_revs_df = get_all_inc_revenues_df(
            self.dataset_factory, self.encodings, self.gt_model, denormalize=True, total_impact_from_date=True
        )
        all_inc_revs_df.index = all_investments_df.index
        self.all_inc_revs = stack_df_and_rename_col(all_inc_revs_df, "inc_revenue")
        weekly_roi_facts = self.all_investments.merge(
            self.all_inc_revs,
            on=(self.all_investments.columns[:-1].tolist()),
        )
        weekly_roi_facts["roi"] = weekly_roi_facts["inc_revenue"] / (weekly_roi_facts["spend"] + EPSILON)
        return weekly_roi_facts

    def get_roicurves(self):
        """Generates dataframes for visualizing diminishing return curves for each wholesaler, product-pack and vehicle

        Returns:
            pd.DataFrame, pd.DataFrame: diminishing return curves for media vehicles
        """
        vehicles = get_lookups(self.encodings["vehicle"])
        vehicle_roicurves_df = get_all_roicurves_df(
            self.dataset_factory, self.encodings, self.gt_model, vehicles, denormalize=True
        )
        return vehicle_roicurves_df

`init(dataset_factory, encodings, gt_model)`

Class initialization needed for model post-processing

Parameters:

Name	Type	Description	Default
`dataset_factory`	`DataFactoryType`	generator function for iterating over batches of data	required
`encodings`	`dict`	mapping from names to indices for wholesalers, brands, products, vehicles and so on	required
`gt_model`	`EconomicNetwork`	trained ground truth model to be used for inference	required

Source code in wt_ml/tabulation/tabulation.py

def __init__(self, dataset_factory: "DataFactoryType", encodings: dict, gt_model: "TrainableModule"):
    """Class initialization needed for model post-processing

    Args:
        dataset_factory (DataFactoryType): generator function for iterating over
                                                                      batches of data
        encodings (dict): mapping from names to indices for wholesalers, brands, products, vehicles and so on
        gt_model (EconomicNetwork): trained ground truth model to be used for inference
    """
    self.dataset_factory = dataset_factory
    self.encodings = encodings
    self.gt_model = gt_model

`get_roicurves()`

Generates dataframes for visualizing diminishing return curves for each wholesaler, product-pack and vehicle

Returns:

Type	Description
	pd.DataFrame, pd.DataFrame: diminishing return curves for media vehicles

Source code in wt_ml/tabulation/tabulation.py

def get_roicurves(self):
    """Generates dataframes for visualizing diminishing return curves for each wholesaler, product-pack and vehicle

    Returns:
        pd.DataFrame, pd.DataFrame: diminishing return curves for media vehicles
    """
    vehicles = get_lookups(self.encodings["vehicle"])
    vehicle_roicurves_df = get_all_roicurves_df(
        self.dataset_factory, self.encodings, self.gt_model, vehicles, denormalize=True
    )
    return vehicle_roicurves_df

`get_weekly_roi_facts()`

Generates weekly wholesaler, product-pack and vehicle level spends, incremental revenues and ROIs

Returns:

Type	Description
	pd.DataFrame: weekly roi facts for each wholesaler, product-pack and vehicle

Source code in wt_ml/tabulation/tabulation.py

def get_weekly_roi_facts(self):
    """Generates weekly wholesaler, product-pack and vehicle level spends, incremental revenues and ROIs

    Returns:
        pd.DataFrame: weekly roi facts for each wholesaler, product-pack and vehicle
    """
    # apply_sales_mask is set to True because the instantaneous impacts learnt by the model are masked out
    # in betagamma layer, so the impacts crucial for calculating ROIs are masked out.
    # So, investments need to masked too fro the ROI numbers to make sense.
    all_investments_df = get_all_investments_df(
        self.dataset_factory,
        self.encodings,
        denormalize=True,
        apply_sales_mask=True,
        index_types=GRANULARITY_OPTIONS,
    )
    self.all_investments = stack_df_and_rename_col(all_investments_df, "spend")
    all_inc_revs_df = get_all_inc_revenues_df(
        self.dataset_factory, self.encodings, self.gt_model, denormalize=True, total_impact_from_date=True
    )
    all_inc_revs_df.index = all_investments_df.index
    self.all_inc_revs = stack_df_and_rename_col(all_inc_revs_df, "inc_revenue")
    weekly_roi_facts = self.all_investments.merge(
        self.all_inc_revs,
        on=(self.all_investments.columns[:-1].tolist()),
    )
    weekly_roi_facts["roi"] = weekly_roi_facts["inc_revenue"] / (weekly_roi_facts["spend"] + EPSILON)
    return weekly_roi_facts

__init__(dataset_factory, encodings, gt_model)

get_roicurves()

get_weekly_roi_facts()

`Tabulation`

`init(dataset_factory, encodings, gt_model)`

`get_roicurves()`

`get_weekly_roi_facts()`