Skip to content
import os
try:
  import google.colab
  IN_COLAB = True
except:
  IN_COLAB = False
if not IN_COLAB:
    os.chdir("..")
import random
import numpy as np
import pandas as pd
from sklearn.metrics import mean_squared_error
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import MinMaxScaler
import plotly.express as px
import plotly.graph_objs as go
from plotly.offline import init_notebook_mode
%load_ext autoreload
%autoreload 2
from IPython.display import Math
from pytorch_tabular import TabularModel
from pytorch_tabular.models import (
    CategoryEmbeddingModelConfig,
    NodeConfig,
    TabNetModelConfig,
    CategoryEmbeddingMDNConfig,
    MixtureDensityHeadConfig,
    NODEMDNConfig,
)
from pytorch_tabular.config import (
    DataConfig,
    OptimizerConfig,
    TrainerConfig,
    ExperimentConfig,
)
from pytorch_tabular.categorical_encoders import CategoricalEmbeddingTransformer

Utility Functions

def generate_linear_example(samples=int(1e5)):
    x_data = np.random.sample(samples)[:, np.newaxis].astype(np.float32)
    y_data = np.add(5*x_data, np.multiply((x_data)**2, np.random.standard_normal(x_data.shape)))

    x_train, x_valid, y_train, y_valid = train_test_split(x_data, y_data, test_size=0.5, random_state=42)
    x_test = np.linspace(0.,1.,int(1e3))[:, np.newaxis].astype(np.float32)
    df_train = pd.DataFrame({"col1": x_train.ravel(), "target": y_train.ravel()})
    df_valid = pd.DataFrame({"col1": x_valid.ravel(), "target": y_valid.ravel()})
    # test = sorted(df_valid.col1.round(3).unique())
    # df_test = pd.DataFrame({"col1": test})
    df_test = pd.DataFrame({"col1": x_test.ravel()})
    return (df_train, df_valid, df_test, ["target"])

def generate_non_linear_example(samples=int(1e5)):
    x_data = np.float32(np.random.uniform(-10, 10, (1, samples)))
    r_data = np.array([np.random.normal(scale=np.abs(i)) for i in x_data])
    y_data = np.float32(np.square(x_data)+r_data*2.0)

    x_data2 = np.float32(np.random.uniform(-10, 10, (1, samples)))
    r_data2 = np.array([np.random.normal(scale=np.abs(i)) for i in x_data2])
    y_data2 = np.float32(-np.square(x_data2)+r_data2*2.0)

    x_data = np.concatenate((x_data,x_data2),axis=1).T
    y_data = np.concatenate((y_data,y_data2),axis=1).T

    min_max_scaler = MinMaxScaler()
    y_data = min_max_scaler.fit_transform(y_data)

    x_train, x_valid, y_train, y_valid = train_test_split(x_data, y_data, test_size=0.5, random_state=42, shuffle=True)
    x_test = np.linspace(-10,10,int(1e3))[:, np.newaxis].astype(np.float32)
    df_train = pd.DataFrame({"col1": x_train.ravel(), "target": y_train.ravel()})
    df_valid = pd.DataFrame({"col1": x_valid.ravel(), "target": y_valid.ravel()})
    # test = sorted(df_valid.col1.round(3).unique())
    # df_test = pd.DataFrame({"col1": test})
    df_test = pd.DataFrame({"col1": x_test.ravel()})
    return (df_train, df_valid, df_test, ["target"])

def generate_step_linear_example(samples=int(1e5)):
    x_data = np.random.sample(samples)[:, np.newaxis].astype(np.float32)
    y_data = np.zeros(x_data.shape)
    mask = x_data<0.5
    y_data[mask] = np.add(5*x_data[mask], np.multiply((x_data[mask])**2, np.random.standard_normal(x_data[mask].shape)))
    y_data[~mask] = np.add(100*x_data[~mask]+x_data[~mask]**2 , np.multiply((x_data[~mask])**2, np.random.standard_normal(x_data[~mask].shape)))
    min_max_scaler = MinMaxScaler()
    y_data = min_max_scaler.fit_transform(y_data)

    x_train, x_valid, y_train, y_valid = train_test_split(x_data, y_data, test_size=0.5, random_state=42, shuffle=True)
    x_test = np.linspace(0.,1.,int(1e3))[:, np.newaxis].astype(np.float32)
    df_train = pd.DataFrame({"col1": x_train.ravel(), "target": y_train.ravel()})
    df_valid = pd.DataFrame({"col1": x_valid.ravel(), "target": y_valid.ravel()})
    # test = sorted(df_valid.col1.round(3).unique())
    # df_test = pd.DataFrame({"col1": test})
    df_test = pd.DataFrame({"col1": x_test.ravel()})
    return (df_train, df_valid, df_test, ["target"])

def generate_gaussian_mixture(samples=int(1e5)):
    x_data = np.random.sample(samples)[:, np.newaxis].astype(np.float32)
    pi = np.sin(x_data)+3*x_data*np.cos(x_data)
    pi = pi/pi.max()
    # g1 = np.random.sample(samples)*4*x_data.squeeze()
    # g2 = np.random.sample(samples)*15*x_data.squeeze()
    g1 = 2*x_data.squeeze() + 0.5*np.random.sample(samples)
    g2 = 8*x_data.squeeze() + 0.5*np.random.sample(samples)

    y_data = pi.round().squeeze()*g1 + (1-pi.round().squeeze())*g2
    y_data = y_data.reshape(-1,1)
    x_train, x_valid, y_train, y_valid = train_test_split(x_data, y_data, test_size=0.5, random_state=42)
    x_test = np.linspace(0.,1.,int(1e3))[:, np.newaxis].astype(np.float32)
    df_train = pd.DataFrame({"col1": x_train.ravel(), "target": y_train.ravel()})
    df_valid = pd.DataFrame({"col1": x_valid.ravel(), "target": y_valid.ravel()})
    # test = sorted(df_valid.col1.round(3).unique())
    # df_test = pd.DataFrame({"col1": test})
    df_test = pd.DataFrame({"col1": x_test.ravel()})
    return (df_train, df_valid, df_test, ["target"])

Linear Example

df_train, df_valid, df_test, target_col = generate_linear_example()

Plot

# display(Math(r"$y = 5x + (x^2 * \epsilon)$"+"\n"+r"$\epsilon \backsim \mathcal{N}(0,1)$"))
fig = px.scatter(df_train, x="col1", y="target", title=r"$y = 5x + (x^2 * \epsilon)$"+"\n"+r"$\epsilon \backsim \mathcal{N}(0,1)$")
fig.update_layout(
    title={
        'y':0.9,
        'x':0.5,
        'xanchor': 'center',
        'yanchor': 'top'})
fig

px.histogram(df_train, x="target", title="Histogram")

Training the MDN

Define the Configs

epochs = 15
batch_size = 128
steps_per_epoch = int((len(df_train)//batch_size)*0.9)
data_config = DataConfig(
    target=['target'],
    continuous_cols=['col1'],
    categorical_cols=[],
#         continuous_feature_transform="quantile_uniform"
)
trainer_config = TrainerConfig(
    auto_lr_find=True, # Runs the LRFinder to automatically derive a learning rate
    batch_size=batch_size,
    max_epochs=epochs,
    early_stopping_patience = 5,
    gpus=-1,  #index of the GPU to use. -1 means all available GPUs, None, means CPU
)
# optimizer_config = OptimizerConfig(lr_scheduler="OneCycleLR", lr_scheduler_params={"max_lr":0.005, "epochs": epochs, "steps_per_epoch":steps_per_epoch})
optimizer_config = OptimizerConfig(lr_scheduler="ReduceLROnPlateau", lr_scheduler_params={"patience":3})
mdn_config = MixtureDensityHeadConfig(num_gaussian=1)
model_config = CategoryEmbeddingMDNConfig(
    task="regression",
    mdn_config=mdn_config,
    layers="128-64",  # Number of nodes in each layer
    activation="ReLU",  # Activation between each layers
    learning_rate=1e-3,
    batch_norm_continuous_input=True,
    use_batch_norm=True,
    dropout=0.0,
    embedding_dropout=0,
    initialization="kaiming",
    #         target_range=[(df_train[col].min(),df_train[col].max()) for col in ['target']]
)
tabular_model = TabularModel(
    data_config=data_config,
    model_config=model_config,
    optimizer_config=optimizer_config,
    trainer_config=trainer_config
)

Training the Model

tabular_model.fit(train=df_train, validation=df_valid)

Predictions and Visualization

pred_df = tabular_model.predict(df_test, quantiles=[0.25,0.5,0.75], n_samples=100)
pred_df.head()


col1 target_prediction target_q25 target_q50 target_q75
0 0.000000 0.261096 0.082106 0.261385 0.435355
1 0.001001 0.204703 0.036332 0.192244 0.387601
2 0.002002 0.240385 0.051463 0.240299 0.377365
3 0.003003 0.211230 0.044478 0.234646 0.374513
4 0.004004 0.234834 0.038737 0.246136 0.377387
fig = go.Figure([
    go.Scatter(
        name='Mean',
        x=pred_df['col1'],
        y=pred_df['target_prediction'],
        mode='lines',
        line=dict(color='rgba(28,53,94,1)'),
    ),
    go.Scatter(
        name='Upper Bound',
        x=pred_df['col1'],
        y=pred_df['target_q75'],
        mode='lines',
        marker=dict(color='rgba(0,147,201,0.3)'),
        line=dict(width=0),
        showlegend=False
    ),
    go.Scatter(
        name='Lower Bound',
        x=pred_df['col1'],
        y=pred_df['target_q25'],
        marker=dict(color="#444"),
        line=dict(width=0),
        mode='lines',
        fillcolor='rgba(0,147,201,0.3)',
        fill='tonexty',
        showlegend=False
    )
])
fig.update_layout(
    yaxis_title='y',
    title='Mixture Density Network Prediction',
    hovermode="x"
)
fig.show()

Non-Linear Example

df_train, df_valid, df_test, target_col = generate_non_linear_example()

Plot

fig = px.scatter(df_train, x="col1", y="target", title=r"$y = \pm x^2 + \epsilon$"+"\n"+r"$\epsilon\backsim\mathcal{N}(0,|x|)$")
fig.update_layout(
    title={
        'y':0.9,
        'x':0.5,
        'xanchor': 'center',
        'yanchor': 'top'})
fig

px.histogram(df_train, x="target", title="Histogram")

Training a FeedForward

Define the Configs

epochs = 200
batch_size = 2048
steps_per_epoch = int((len(df_train)//batch_size)*0.9)
data_config = DataConfig(
    target=['target'],
    continuous_cols=['col1'],
    categorical_cols=[],
#         continuous_feature_transform="quantile_uniform"
)
trainer_config = TrainerConfig(
    auto_lr_find=True, # Runs the LRFinder to automatically derive a learning rate
    batch_size=batch_size,
    max_epochs=epochs,
    early_stopping_patience = 5,
    gpus=-1,  #index of the GPU to use. -1 means all available GPUs, None, means CPU
)
# optimizer_config = OptimizerConfig(lr_scheduler="OneCycleLR", lr_scheduler_params={"max_lr":0.005, "epochs": epochs, "steps_per_epoch":steps_per_epoch})
optimizer_config = OptimizerConfig(lr_scheduler="ReduceLROnPlateau", lr_scheduler_params={"patience":3})
model_config = CategoryEmbeddingModelConfig(
    task="regression",
    layers="8",  # Number of nodes in each layer
    activation="ReLU",  # Activation between each layers
    learning_rate=1e-3,
    batch_norm_continuous_input=True,
    use_batch_norm=True,
    dropout=0.0,
    embedding_dropout=0,
    initialization="kaiming",
    #         target_range=[(df_train[col].min(),df_train[col].max()) for col in ['target']]
)
tabular_model = TabularModel(
    data_config=data_config,
    model_config=model_config,
    optimizer_config=optimizer_config,
    trainer_config=trainer_config
)

Training the Model

tabular_model.fit(train=df_train, validation=df_valid)

Predictions and Visualization

pred_df = tabular_model.predict(df_valid.sample(1000).sort_values("col1"))
pred_df.head()


col1 target target_prediction
19134 -9.996758 0.907346 0.447962
97260 -9.987103 0.123242 0.448189
55069 -9.947652 0.298183 0.449120
88050 -9.938842 0.134552 0.449327
1366 -9.938368 0.231712 0.449339
fig = go.Figure([
    go.Scatter(
        name='Prediction',
        x=pred_df['col1'],
        y=pred_df['target_prediction'],
        mode='lines',
        line=dict(color='rgba(28,53,94,1)'),
    ),
    go.Scatter(
        name='Actual',
        x=pred_df['col1'],
        y=pred_df['target'],
        mode='markers',
        line=dict(color='rgba(60,180,229,1)'),
    ),
])
fig.update_layout(
    yaxis_title='y',
    title='Category Embedding Prediction',
    hovermode="x"
)
fig.show()

Training the MDN

Define the Configs

epochs = 200
batch_size = 2048
steps_per_epoch = int((len(df_train)//batch_size)*0.9)
data_config = DataConfig(
    target=['target'],
    continuous_cols=['col1'],
    categorical_cols=[],
#         continuous_feature_transform="quantile_uniform"
)
trainer_config = TrainerConfig(
    auto_lr_find=True, # Runs the LRFinder to automatically derive a learning rate
    batch_size=batch_size,
    max_epochs=epochs,
    early_stopping_patience = 5,
    early_stopping=None,
    gpus=-1,  #index of the GPU to use. -1 means all available GPUs, None, means CPU
)
# optimizer_config = OptimizerConfig(lr_scheduler="OneCycleLR", lr_scheduler_params={"max_lr":0.005, "epochs": epochs, "steps_per_epoch":steps_per_epoch})
optimizer_config = OptimizerConfig(lr_scheduler="ReduceLROnPlateau", lr_scheduler_params={"patience":3})
mdn_config = MixtureDensityHeadConfig(num_gaussian=2, weight_regularization=2)#, mu_bias_init=[0.3, 0.7])
model_config = CategoryEmbeddingMDNConfig(
    task="regression",
    mdn_config=mdn_config,
    layers="8",  # Number of nodes in each layer
    activation="ReLU",  # Activation between each layers
    learning_rate=1e-3,
    batch_norm_continuous_input=True,
    use_batch_norm=True,
    dropout=0.0,
    embedding_dropout=0,
    initialization="kaiming",
    #         target_range=[(df_train[col].min(),df_train[col].max()) for col in ['target']]
)
tabular_model = TabularModel(
    data_config=data_config,
    model_config=model_config,
    optimizer_config=optimizer_config,
    trainer_config=trainer_config
)

Training the Model

tabular_model.fit(train=df_train, validation=df_valid)

Predictions and Visualization

pred_df = tabular_model.predict(df_test, quantiles=[0.25,0.5,0.75], n_samples=100, ret_logits=True)
pred_df.head()


col1 target_prediction target_q25 target_q50 target_q75 pi_0 pi_1 sigma_0 sigma_1 mu_0 mu_1 backbone_features_0 backbone_features_1 backbone_features_2 backbone_features_3 backbone_features_4 backbone_features_5 backbone_features_6 backbone_features_7
0 -10.000000 0.474035 0.215836 0.281732 0.817638 -0.000006 0.000005 0.060515 0.064350 0.228173 0.826169 0.053938 0.0 1.733542 0.0 0.0 3.974509 0.0 4.162086
1 -9.979980 0.386717 0.216200 0.250949 0.709359 -0.000006 0.000005 0.060404 0.064212 0.229011 0.824638 0.054902 0.0 1.732937 0.0 0.0 3.970430 0.0 4.151996
2 -9.959960 0.789517 0.770570 0.813235 0.857713 -0.000006 0.000005 0.060294 0.064074 0.229848 0.823107 0.055865 0.0 1.732333 0.0 0.0 3.966351 0.0 4.141907
3 -9.939939 0.325282 0.184520 0.236393 0.315427 -0.000006 0.000005 0.060183 0.063936 0.230686 0.821576 0.056829 0.0 1.731729 0.0 0.0 3.962272 0.0 4.131816
4 -9.919920 0.463595 0.233461 0.293257 0.788495 -0.000006 0.000005 0.060073 0.063798 0.231523 0.820045 0.057792 0.0 1.731125 0.0 0.0 3.958193 0.0 4.121726
df = df_valid.sample(10000)
fig = go.Figure([
    go.Scatter(
        name='Ground Truth',
        x=df['col1'],
        y=df['target'],
        mode='markers',
        line=dict(color='rgba(153, 115, 142, 0.2)'),
    ),
    go.Scatter(
        name='Component 1',
        x=pred_df['col1'],
        y=pred_df['mu_0'],
        mode='lines',
        line=dict(color='rgba(36, 37, 130, 1)'),
    ),
    go.Scatter(
        name='Component 2',
        x=pred_df['col1'],
        y=pred_df['mu_1'],
        mode='lines',
        line=dict(color='rgba(246, 76, 114, 1)'),
    ),
    go.Scatter(
        name='Upper Bound 1',
        x=pred_df['col1'],
        y=pred_df['mu_0']+pred_df['sigma_0'],
        mode='lines',
        marker=dict(color='rgba(47, 47, 162, 0.5)'),
#         line=dict(width=0),
        showlegend=False
    ),
    go.Scatter(
        name='Lower Bound 1',
        x=pred_df['col1'],
        y=pred_df['mu_0']-pred_df['sigma_0'],
        marker=dict(color="#444"),
        line=dict(width=0),
        mode='lines',
        fillcolor='rgba(47, 47, 162, 0.5)',
        fill='tonexty',
        showlegend=False
    ),
    go.Scatter(
        name='Upper Bound 2',
        x=pred_df['col1'],
        y=pred_df['mu_1']+pred_df['sigma_1'],
        mode='lines',
        marker=dict(color='rgba(250, 152, 174, 0.5)'),
#         line=dict(width=0),
        showlegend=False
    ),
    go.Scatter(
        name='Lower Bound 2',
        x=pred_df['col1'],
        y=pred_df['mu_1']-pred_df['sigma_1'],
        marker=dict(color="#444"),
        line=dict(width=0),
        mode='lines',
        fillcolor='rgba(250, 152, 174, 0.5)',
        fill='tonexty',
        showlegend=False
    ),
])
fig.update_layout(
    yaxis_title='y',
    title='Mixture Density Network Prediction',
    hovermode="x"
)
fig.show()

Gaussian Mixture

df_train, df_valid, df_test, target_col = generate_gaussian_mixture()

Plot

from IPython.display import display, Math, Latex
eqn = r'$\pi = \frac{sin(x) + 3xcos(x)}{max \left (sin(x) + 3xcos(x) \right )} \\ \\ g1 = 2x + 0.5 \epsilon \rightarrow  \epsilon \backsim \mathcal{N}(0,1) \\ g2 = 8x + 0.5 \epsilon \rightarrow  \epsilon \backsim \mathcal{N}(0,1) \\ p = Bernoulli(pi) \rightarrow \text{Samples one of two outcomes based on the value of } \pi \\ y = p \times g1 + (1-p) \times g2$'
display(Math(eqn))
fig = px.scatter(df_train, x="col1", y="target")
fig.update_layout(
    title={
        'y':0.9,
        'x':0.5,
        'xanchor': 'center',
        'yanchor': 'top'})
fig

px.histogram(df_train, x="target", title="Histogram")

Training a FeedForward

Define the Configs

epochs = 200
batch_size = 2048
steps_per_epoch = int((len(df_train)//batch_size)*0.9)
data_config = DataConfig(
    target=['target'],
    continuous_cols=['col1'],
    categorical_cols=[],
#         continuous_feature_transform="quantile_uniform"
)
trainer_config = TrainerConfig(
    auto_lr_find=True, # Runs the LRFinder to automatically derive a learning rate
    batch_size=batch_size,
    max_epochs=epochs,
    early_stopping_patience = 5,
    gpus=-1,  #index of the GPU to use. -1 means all available GPUs, None, means CPU
)
# optimizer_config = OptimizerConfig(lr_scheduler="OneCycleLR", lr_scheduler_params={"max_lr":0.005, "epochs": epochs, "steps_per_epoch":steps_per_epoch})
optimizer_config = OptimizerConfig(lr_scheduler="ReduceLROnPlateau", lr_scheduler_params={"patience":3})
model_config = CategoryEmbeddingModelConfig(
    task="regression",
    layers="8",  # Number of nodes in each layer
    activation="ReLU",  # Activation between each layers
    learning_rate=1e-3,
    batch_norm_continuous_input=True,
    use_batch_norm=True,
    dropout=0.0,
    embedding_dropout=0,
    initialization="kaiming",
    #         target_range=[(df_train[col].min(),df_train[col].max()) for col in ['target']]
)
tabular_model = TabularModel(
    data_config=data_config,
    model_config=model_config,
    optimizer_config=optimizer_config,
    trainer_config=trainer_config
)

Training the Model

tabular_model.fit(train=df_train, validation=df_valid)

Predictions and Visualization

pred_df = tabular_model.predict(df_valid.sample(1000).sort_values("col1"))
pred_df.head()


col1 target target_prediction
18950 0.000725 0.494310 0.332354
17626 0.000729 0.233029 0.332382
994 0.000870 0.409967 0.333436
3131 0.001038 0.464035 0.334694
15884 0.001590 0.346892 0.338819
fig = go.Figure([
    go.Scatter(
        name='Prediction',
        x=pred_df['col1'],
        y=pred_df['target_prediction'],
        mode='lines',
        line=dict(color='rgba(28,53,94,1)'),
    ),
    go.Scatter(
        name='Actual',
        x=pred_df['col1'],
        y=pred_df['target'],
        mode='markers',
        line=dict(color='rgba(60,180,229,1)'),
    ),
])
fig.update_layout(
    yaxis_title='y',
    title='Category Embedding Network Prediction',
    hovermode="x"
)
fig.show()

Training the MDN

Define the Configs

epochs = 200
batch_size = 2048
steps_per_epoch = int((len(df_train)//batch_size)*0.9)
data_config = DataConfig(
    target=['target'],
    continuous_cols=['col1'],
    categorical_cols=[],
#         continuous_feature_transform="quantile_uniform"
)
trainer_config = TrainerConfig(
    auto_lr_find=True, # Runs the LRFinder to automatically derive a learning rate
    batch_size=batch_size,
    max_epochs=epochs,
    early_stopping_patience = 5,
    early_stopping=None,
    gpus=-1,  #index of the GPU to use. -1 means all available GPUs, None, means CPU
)
# optimizer_config = OptimizerConfig(lr_scheduler="OneCycleLR", lr_scheduler_params={"max_lr":0.005, "epochs": epochs, "steps_per_epoch":steps_per_epoch})
optimizer_config = OptimizerConfig(lr_scheduler="ReduceLROnPlateau", lr_scheduler_params={"patience":3})
mdn_config = MixtureDensityHeadConfig(num_gaussian=2, 
                                      weight_regularization=2, 
#                                       lambda_pi=10, 
#                                       lambda_sigma=1, 
#                                       mu_bias_init=[1,2]
                                     )#, mu_bias_init=[0.3, 0.7])
model_config = CategoryEmbeddingMDNConfig(
    task="regression",
    mdn_config=mdn_config,
    layers="8",  # Number of nodes in each layer
    activation="ReLU",  # Activation between each layers
    learning_rate=1e-3,
    batch_norm_continuous_input=True,
    use_batch_norm=True,
    dropout=0.0,
    embedding_dropout=0,
    initialization="kaiming",
    #         target_range=[(df_train[col].min(),df_train[col].max()) for col in ['target']]
)
tabular_model = TabularModel(
    data_config=data_config,
    model_config=model_config,
    optimizer_config=optimizer_config,
    trainer_config=trainer_config
)

Training the Model

tabular_model.fit(train=df_train, validation=df_valid)

Predictions and Visualization

pred_df = tabular_model.predict(df_test, quantiles=[0.25,0.5,0.75], n_samples=100, ret_logits=True)
pred_df.head()


col1 target_prediction target_q25 target_q50 target_q75 pi_0 pi_1 sigma_0 sigma_1 mu_0 mu_1 backbone_features_0 backbone_features_1 backbone_features_2 backbone_features_3 backbone_features_4 backbone_features_5 backbone_features_6 backbone_features_7
0 0.000000 0.215255 0.125259 0.235676 0.307787 -2.359976 3.190277 0.077286 0.131827 1.506372 0.241181 0.0 2.128445 1.751673 3.381084 0.0 0.0 0.0 3.131942
1 0.001001 0.241467 0.148994 0.254013 0.340765 -2.354158 3.183888 0.077483 0.132052 1.504977 0.249264 0.0 2.125253 1.750712 3.376083 0.0 0.0 0.0 3.123496
2 0.002002 0.267455 0.165220 0.246751 0.353674 -2.348339 3.177499 0.077680 0.132278 1.503582 0.257348 0.0 2.122060 1.749751 3.371081 0.0 0.0 0.0 3.115050
3 0.003003 0.337546 0.214442 0.281356 0.378789 -2.342521 3.171110 0.077878 0.132504 1.502187 0.265431 0.0 2.118868 1.748790 3.366080 0.0 0.0 0.0 3.106604
4 0.004004 0.273186 0.211099 0.271594 0.351111 -2.336703 3.164721 0.078077 0.132730 1.500791 0.273515 0.0 2.115676 1.747829 3.361079 0.0 0.0 0.0 3.098158
df = df_valid.sample(10000)
fig = go.Figure([
    go.Scatter(
        name='Ground Truth',
        x=df['col1'],
        y=df['target'],
        mode='markers',
        line=dict(color='rgba(153, 115, 142, 0.2)'),
    ),
    go.Scatter(
        name='Component 1',
        x=pred_df['col1'],
        y=pred_df['mu_0'],
        mode='lines',
        line=dict(color='rgba(90, 92, 237, 1)'),
    ),
    go.Scatter(
        name='Component 2',
        x=pred_df['col1'],
        y=pred_df['mu_1'],
        mode='lines',
        line=dict(color='rgba(246, 76, 114, 1)'),
    ),
    go.Scatter(
        name='Upper Bound 1',
        x=pred_df['col1'],
        y=pred_df['mu_0']+pred_df['sigma_0'],
        mode='lines',
        marker=dict(color='rgba(47, 47, 162, 0.5)'),
#         line=dict(width=0),
        showlegend=False
    ),
    go.Scatter(
        name='Lower Bound 1',
        x=pred_df['col1'],
        y=pred_df['mu_0']-pred_df['sigma_0'],
        marker=dict(color="#444"),
        line=dict(width=0),
        mode='lines',
        fillcolor='rgba(47, 47, 162, 0.5)',
        fill='tonexty',
        showlegend=False
    ),
    go.Scatter(
        name='Upper Bound 2',
        x=pred_df['col1'],
        y=pred_df['mu_1']+pred_df['sigma_1'],
        mode='lines',
        marker=dict(color='rgba(250, 152, 174, 0.5)'),
#         line=dict(width=0),
        showlegend=False
    ),
    go.Scatter(
        name='Lower Bound 2',
        x=pred_df['col1'],
        y=pred_df['mu_1']-pred_df['sigma_1'],
        marker=dict(color="#444"),
        line=dict(width=0),
        mode='lines',
        fillcolor='rgba(250, 152, 174, 0.5)',
        fill='tonexty',
        showlegend=False
    ),
])
fig.update_layout(
    yaxis_title='y',
#     yaxis_range=[0,1],
    title='Mixture Density Network Prediction',
    hovermode="x"
)
fig.show()

fig = go.Figure([
    go.Scatter(
        name='Ground Truth',
        x=df['col1'],
        y=df['target'],
        mode='markers',
        line=dict(color='rgba(153, 115, 142, 0.2)'),
    ),
    go.Scatter(
        name='Component 1',
        x=pred_df['col1'],
        y=pred_df['mu_0'],
        mode='lines',
        line=dict(color='rgba(90, 92, 237, 1)'),
    ),
    go.Scatter(
        name='Mixing Coefficient 1',
        x=pred_df['col1'],
        y=pred_df['pi_1'],
        mode='lines',
        line=dict(color='rgba(255, 216, 117, 1)'),
    ),

    go.Scatter(
        name='Upper Bound 1',
        x=pred_df['col1'],
        y=pred_df['mu_0']+pred_df['sigma_0'],
        mode='lines',
        marker=dict(color='rgba(47, 47, 162, 0.5)'),
#         line=dict(width=0),
        showlegend=False
    ),
    go.Scatter(
        name='Lower Bound 1',
        x=pred_df['col1'],
        y=pred_df['mu_0']-pred_df['sigma_0'],
        marker=dict(color="#444"),
        line=dict(width=0),
        mode='lines',
        fillcolor='rgba(47, 47, 162, 0.5)',
        fill='tonexty',
        showlegend=False
    ),

])
fig.update_layout(
    yaxis_title='y',
#     yaxis_range=[-0.2,1],
    title='Mixture Density Network Prediction',
    hovermode="x"
)
fig.show()

fig = go.Figure([
    go.Scatter(
        name='Ground Truth',
        x=df['col1'],
        y=df['target'],
        mode='markers',
        line=dict(color='rgba(153, 115, 142, 0.2)'),
    ),

    go.Scatter(
        name='Component 2',
        x=pred_df['col1'],
        y=pred_df['mu_1'],
        mode='lines',
        line=dict(color='rgba(246, 76, 114, 1)'),
    ),

    go.Scatter(
        name='Mixing Coefficient 2',
        x=pred_df['col1'],
        y=pred_df['pi_1'],
        mode='lines',
        line=dict(color='rgba(255, 216, 117, 1)'),
    ),

    go.Scatter(
        name='Upper Bound 2',
        x=pred_df['col1'],
        y=pred_df['mu_1']+pred_df['sigma_1'],
        mode='lines',
        marker=dict(color='rgba(250, 152, 174, 0.5)'),
#         line=dict(width=0),
        showlegend=False
    ),
    go.Scatter(
        name='Lower Bound 2',
        x=pred_df['col1'],
        y=pred_df['mu_1']-pred_df['sigma_1'],
        marker=dict(color="#444"),
        line=dict(width=0),
        mode='lines',
        fillcolor='rgba(250, 152, 174, 0.5)',
        fill='tonexty',
        showlegend=False
    ),
])
fig.update_layout(
    yaxis_title='y',
#     yaxis_range=[-0.2,1],
    title='Mixture Density Network Prediction',
    hovermode="x"
)
fig.show()

from scipy.special import softmax
pred_df[['pi_0','pi_1']] = softmax(pred_df[['pi_0','pi_1']].values, axis=-1)
px.line(pred_df, x='col1', y=['pi_0','pi_1'])

Boston Housing Dataset

# from sklearn.datasets import load_diabetes
# target_col = "target"
# X, y = load_diabetes(as_frame=True, return_X_y=True)
# cont_cols = X.columns.tolist()
# cat_cols = []
# X[target_col] = y
# df_train, df_test = train_test_split(X, test_size=0.2, random_state=42)
# df_train, df_valid = train_test_split(df_train, test_size=0.2, random_state=42)
from sklearn.datasets import load_boston
target_col = "target"
data = load_boston(return_X_y=False)
X = pd.DataFrame(data['data'], columns=data['feature_names'])
cont_cols = X.columns.tolist()
cat_cols = []
X[target_col] = y
df_train, df_test = train_test_split(X, test_size=0.2, random_state=42)
df_train, df_valid = train_test_split(df_train, test_size=0.2, random_state=42)

Plot

px.histogram(df_train, x="target", title="Histogram")

Training the MDN

Define the Configs

Let's use a nifty util function in the package to figure out the centers of the possible gaussian components. It internally runs a Kmeans and returns the cluster centroids and lets set that as the bias initialization

from pytorch_tabular.utils import get_gaussian_centers

mu_init = get_gaussian_centers(df_train[target_col], n_components=4)
epochs = 1000
batch_size = 2048
steps_per_epoch = int((len(df_train)//batch_size)*0.9)
data_config = DataConfig(
    target=['target'],
    continuous_cols=cont_cols,
    categorical_cols=cat_cols,
#         continuous_feature_transform="quantile_uniform"
)
trainer_config = TrainerConfig(
    auto_lr_find=True, # Runs the LRFinder to automatically derive a learning rate
    batch_size=batch_size,
    max_epochs=epochs,
    early_stopping_patience = 5,
#     early_stopping=None,
    gpus=-1,  #index of the GPU to use. -1 means all available GPUs, None, means CPU
)
# optimizer_config = OptimizerConfig(lr_scheduler="OneCycleLR", lr_scheduler_params={"max_lr":0.005, "epochs": epochs, "steps_per_epoch":steps_per_epoch})
optimizer_config = OptimizerConfig(lr_scheduler="ReduceLROnPlateau", lr_scheduler_params={"patience":3})
mdn_config = MixtureDensityHeadConfig(num_gaussian=4, 
                                      weight_regularization=2, 
#                                       lambda_pi=10, 
#                                       lambda_sigma=1, 
                                      mu_bias_init=mu_init
                                     )#, mu_bias_init=[0.3, 0.7])
model_config = CategoryEmbeddingMDNConfig(
    task="regression",
    mdn_config=mdn_config,
    layers="200-100",  # Number of nodes in each layer
    activation="ReLU",  # Activation between each layers
    learning_rate=1e-3,
    batch_norm_continuous_input=True,
    use_batch_norm=True,
    dropout=0.0,
    embedding_dropout=0,
    initialization="kaiming",
    #         target_range=[(df_train[col].min(),df_train[col].max()) for col in ['target']]
)
tabular_model = TabularModel(
    data_config=data_config,
    model_config=model_config,
    optimizer_config=optimizer_config,
    trainer_config=trainer_config
)

Training the Model

tabular_model.fit(train=df_train, validation=df_valid)

Predictions and Visualization

pred_df = tabular_model.predict(df_test, quantiles=[0.25,0.5,0.75], n_samples=100, ret_logits=True)
pred_df.head()


CRIM ZN INDUS CHAS NOX RM AGE DIS RAD TAX ... backbone_features_90 backbone_features_91 backbone_features_92 backbone_features_93 backbone_features_94 backbone_features_95 backbone_features_96 backbone_features_97 backbone_features_98 backbone_features_99
173 0.09178 0.0 4.05 0.0 0.510 6.416 84.1 2.6463 5.0 296.0 ... 1.274902 0.000000 0.000000 0.199463 1.598198 1.412695 0.509436 0.0 0.890321 0.789249
274 0.05644 40.0 6.41 1.0 0.447 6.758 32.9 4.0776 4.0 254.0 ... 2.153439 0.581171 0.000000 2.336369 0.000000 0.000000 0.000000 0.0 1.657820 0.000000
491 0.10574 0.0 27.74 0.0 0.609 5.983 98.8 1.8681 4.0 711.0 ... 2.305889 0.494037 2.370322 0.000000 0.000000 2.163910 0.000000 0.0 0.000000 0.000000
72 0.09164 0.0 10.81 0.0 0.413 6.065 7.8 5.2873 4.0 305.0 ... 0.000000 0.564467 0.000000 0.000000 0.107337 0.000000 0.000000 0.0 0.000000 2.047639
452 5.09017 0.0 18.10 0.0 0.713 6.297 91.8 2.3682 24.0 666.0 ... 0.000000 0.333266 1.646652 0.000000 0.000000 1.867627 1.292073 0.0 0.021513 0.000000

5 rows × 130 columns

import scipy.stats as ss

def plot_normal(x_range, mu=0, sigma=1, cdf=False, **kwargs):
    '''
    Plots the normal distribution function for a given x range
    If mu and sigma are not provided, standard normal is plotted
    If cdf=True cumulative distribution is plotted
    Passes any keyword arguments to matplotlib plot function
    '''
    x = x_range
    if cdf:
        y = ss.norm.cdf(x, mu, sigma)
    else:
        y = ss.norm.pdf(x, mu, sigma)
    return x,y
import torch
from torch import nn

from torch.autograd import Variable
from torch.distributions import Categorical
def get_pdf(idx):
    row = pred_df.loc[idx]
    pi = torch.from_numpy(row[['pi_0','pi_1','pi_2','pi_3']].values).unsqueeze(0)
    mu = torch.from_numpy(row[['mu_0','mu_1','mu_2','mu_3']].values).unsqueeze(0)
    sigma = torch.from_numpy(row[['sigma_0','sigma_1','sigma_2','sigma_3']].values).unsqueeze(0)
    softmax_pi = nn.functional.gumbel_softmax(pi, tau=1, dim=-1)
    categorical = Categorical(softmax_pi)
    pis = categorical.sample().unsqueeze(1)
    sigma = sigma.gather(1, pis).item()
    mu = mu.gather(1, pis).item()
    x = np.linspace(row['target_prediction'].item()*0.1, row['target_prediction'].item()*1.9, 5000)
    return plot_normal(x, mu=mu, sigma=sigma)
# idxs = pred_df[mask].sample(5).index

idxs = [2, 173, 412, 365]
traces = []
for idx in idxs:
    x,y = get_pdf(idx)
    trace = go.Scatter(
            name=f'House_{idx}',
            x=x,
            y=y,
            mode='lines',
    #         line=dict(color='rgba(246, 76, 114, 1)'),
        )
    traces.append(trace)

fig = go.Figure(traces)
fig.update_layout(
    yaxis_title='P(MEDV)',
    xaxis_title='MEDV',
#     yaxis_range=[-0.2,1],
    title='PDFs of different Houses',
    hovermode="x"
)
fig.show()