filter_by_observation_end(df, observation_end_column)

Filter out everything except the most recent observations

Source code in wt_ml/dataset/economics/fred_series_detection_by_keyword.py
36
37
38
39
40
def filter_by_observation_end(df, observation_end_column):
    """Filter out everything except the most recent observations"""
    most_recent_observation_date = df[observation_end_column].max()
    df = df.loc[df[observation_end_column] == most_recent_observation_date]
    return df

filter_by_popularity(df, popularity_column)

Get search result with the highest popularity

Source code in wt_ml/dataset/economics/fred_series_detection_by_keyword.py
43
44
45
46
47
48
def filter_by_popularity(df, popularity_column):
    """Get search result with the highest popularity"""
    df[popularity_column] = df[popularity_column].astype(int)
    max_popularity = df[popularity_column].max()
    df = df.loc[df[popularity_column] == max_popularity]
    return df

filter_by_preferred_adj(df, adjustment_column)

Removing non seasonally adjusted series from the search results

Source code in wt_ml/dataset/economics/fred_series_detection_by_keyword.py
29
30
31
32
33
def filter_by_preferred_adj(df, adjustment_column):
    """Removing non seasonally adjusted series from the search results"""
    preferred_adj = ["not" not in seasonal_adj.lower() for seasonal_adj in df[adjustment_column]]
    df = df.loc[preferred_adj]
    return df

filter_by_preferred_frequency(df, freq_column)

Filter search results for the most preferred frequency

Source code in wt_ml/dataset/economics/fred_series_detection_by_keyword.py
20
21
22
23
24
25
26
def filter_by_preferred_frequency(df, freq_column):
    """Filter search results for the most preferred frequency"""
    freq_avaialable = df[freq_column].tolist()
    preferrred_freq_order = ["D", "W", "M", "Q", "A"]
    preferred_freq = [preferred for preferred in preferrred_freq_order if preferred in freq_avaialable][0]
    df = df.loc[(df[freq_column] == preferred_freq)]
    return df