import os
try:
import google.colab
IN_COLAB = True
except:
IN_COLAB = False
if not IN_COLAB:
os.chdir("..")
import random
import numpy as np
import pandas as pd
from sklearn.metrics import mean_squared_error
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import MinMaxScaler
import plotly.express as px
import plotly.graph_objs as go
from plotly.offline import init_notebook_mode
%load_ext autoreload
%autoreload 2
from IPython.display import Math
from pytorch_tabular import TabularModel
from pytorch_tabular.models import (
CategoryEmbeddingModelConfig,
NodeConfig,
TabNetModelConfig,
CategoryEmbeddingMDNConfig,
MixtureDensityHeadConfig,
NODEMDNConfig,
)
from pytorch_tabular.config import (
DataConfig,
OptimizerConfig,
TrainerConfig,
ExperimentConfig,
)
from pytorch_tabular.categorical_encoders import CategoricalEmbeddingTransformer
Utility Functions
def generate_linear_example(samples=int(1e5)):
x_data = np.random.sample(samples)[:, np.newaxis].astype(np.float32)
y_data = np.add(5*x_data, np.multiply((x_data)**2, np.random.standard_normal(x_data.shape)))
x_train, x_valid, y_train, y_valid = train_test_split(x_data, y_data, test_size=0.5, random_state=42)
x_test = np.linspace(0.,1.,int(1e3))[:, np.newaxis].astype(np.float32)
df_train = pd.DataFrame({"col1": x_train.ravel(), "target": y_train.ravel()})
df_valid = pd.DataFrame({"col1": x_valid.ravel(), "target": y_valid.ravel()})
# test = sorted(df_valid.col1.round(3).unique())
# df_test = pd.DataFrame({"col1": test})
df_test = pd.DataFrame({"col1": x_test.ravel()})
return (df_train, df_valid, df_test, ["target"])
def generate_non_linear_example(samples=int(1e5)):
x_data = np.float32(np.random.uniform(-10, 10, (1, samples)))
r_data = np.array([np.random.normal(scale=np.abs(i)) for i in x_data])
y_data = np.float32(np.square(x_data)+r_data*2.0)
x_data2 = np.float32(np.random.uniform(-10, 10, (1, samples)))
r_data2 = np.array([np.random.normal(scale=np.abs(i)) for i in x_data2])
y_data2 = np.float32(-np.square(x_data2)+r_data2*2.0)
x_data = np.concatenate((x_data,x_data2),axis=1).T
y_data = np.concatenate((y_data,y_data2),axis=1).T
min_max_scaler = MinMaxScaler()
y_data = min_max_scaler.fit_transform(y_data)
x_train, x_valid, y_train, y_valid = train_test_split(x_data, y_data, test_size=0.5, random_state=42, shuffle=True)
x_test = np.linspace(-10,10,int(1e3))[:, np.newaxis].astype(np.float32)
df_train = pd.DataFrame({"col1": x_train.ravel(), "target": y_train.ravel()})
df_valid = pd.DataFrame({"col1": x_valid.ravel(), "target": y_valid.ravel()})
# test = sorted(df_valid.col1.round(3).unique())
# df_test = pd.DataFrame({"col1": test})
df_test = pd.DataFrame({"col1": x_test.ravel()})
return (df_train, df_valid, df_test, ["target"])
def generate_step_linear_example(samples=int(1e5)):
x_data = np.random.sample(samples)[:, np.newaxis].astype(np.float32)
y_data = np.zeros(x_data.shape)
mask = x_data<0.5
y_data[mask] = np.add(5*x_data[mask], np.multiply((x_data[mask])**2, np.random.standard_normal(x_data[mask].shape)))
y_data[~mask] = np.add(100*x_data[~mask]+x_data[~mask]**2 , np.multiply((x_data[~mask])**2, np.random.standard_normal(x_data[~mask].shape)))
min_max_scaler = MinMaxScaler()
y_data = min_max_scaler.fit_transform(y_data)
x_train, x_valid, y_train, y_valid = train_test_split(x_data, y_data, test_size=0.5, random_state=42, shuffle=True)
x_test = np.linspace(0.,1.,int(1e3))[:, np.newaxis].astype(np.float32)
df_train = pd.DataFrame({"col1": x_train.ravel(), "target": y_train.ravel()})
df_valid = pd.DataFrame({"col1": x_valid.ravel(), "target": y_valid.ravel()})
# test = sorted(df_valid.col1.round(3).unique())
# df_test = pd.DataFrame({"col1": test})
df_test = pd.DataFrame({"col1": x_test.ravel()})
return (df_train, df_valid, df_test, ["target"])
def generate_gaussian_mixture(samples=int(1e5)):
x_data = np.random.sample(samples)[:, np.newaxis].astype(np.float32)
pi = np.sin(x_data)+3*x_data*np.cos(x_data)
pi = pi/pi.max()
# g1 = np.random.sample(samples)*4*x_data.squeeze()
# g2 = np.random.sample(samples)*15*x_data.squeeze()
g1 = 2*x_data.squeeze() + 0.5*np.random.sample(samples)
g2 = 8*x_data.squeeze() + 0.5*np.random.sample(samples)
y_data = pi.round().squeeze()*g1 + (1-pi.round().squeeze())*g2
y_data = y_data.reshape(-1,1)
x_train, x_valid, y_train, y_valid = train_test_split(x_data, y_data, test_size=0.5, random_state=42)
x_test = np.linspace(0.,1.,int(1e3))[:, np.newaxis].astype(np.float32)
df_train = pd.DataFrame({"col1": x_train.ravel(), "target": y_train.ravel()})
df_valid = pd.DataFrame({"col1": x_valid.ravel(), "target": y_valid.ravel()})
# test = sorted(df_valid.col1.round(3).unique())
# df_test = pd.DataFrame({"col1": test})
df_test = pd.DataFrame({"col1": x_test.ravel()})
return (df_train, df_valid, df_test, ["target"])
Linear Example
df_train, df_valid, df_test, target_col = generate_linear_example()
Plot
# display(Math(r"$y = 5x + (x^2 * \epsilon)$"+"\n"+r"$\epsilon \backsim \mathcal{N}(0,1)$"))
fig = px.scatter(df_train, x="col1", y="target", title=r"$y = 5x + (x^2 * \epsilon)$"+"\n"+r"$\epsilon \backsim \mathcal{N}(0,1)$")
fig.update_layout(
title={
'y':0.9,
'x':0.5,
'xanchor': 'center',
'yanchor': 'top'})
fig
px.histogram(df_train, x="target", title="Histogram")
Training the MDN
Define the Configs
epochs = 15
batch_size = 128
steps_per_epoch = int((len(df_train)//batch_size)*0.9)
data_config = DataConfig(
target=['target'],
continuous_cols=['col1'],
categorical_cols=[],
# continuous_feature_transform="quantile_uniform"
)
trainer_config = TrainerConfig(
auto_lr_find=True, # Runs the LRFinder to automatically derive a learning rate
batch_size=batch_size,
max_epochs=epochs,
early_stopping_patience = 5,
gpus=-1, #index of the GPU to use. -1 means all available GPUs, None, means CPU
)
# optimizer_config = OptimizerConfig(lr_scheduler="OneCycleLR", lr_scheduler_params={"max_lr":0.005, "epochs": epochs, "steps_per_epoch":steps_per_epoch})
optimizer_config = OptimizerConfig(lr_scheduler="ReduceLROnPlateau", lr_scheduler_params={"patience":3})
mdn_config = MixtureDensityHeadConfig(num_gaussian=1)
model_config = CategoryEmbeddingMDNConfig(
task="regression",
mdn_config=mdn_config,
layers="128-64", # Number of nodes in each layer
activation="ReLU", # Activation between each layers
learning_rate=1e-3,
batch_norm_continuous_input=True,
use_batch_norm=True,
dropout=0.0,
embedding_dropout=0,
initialization="kaiming",
# target_range=[(df_train[col].min(),df_train[col].max()) for col in ['target']]
)
tabular_model = TabularModel(
data_config=data_config,
model_config=model_config,
optimizer_config=optimizer_config,
trainer_config=trainer_config
)
Training the Model
tabular_model.fit(train=df_train, validation=df_valid)
Predictions and Visualization
pred_df = tabular_model.predict(df_test, quantiles=[0.25,0.5,0.75], n_samples=100)
pred_df.head()
fig = go.Figure([
go.Scatter(
name='Mean',
x=pred_df['col1'],
y=pred_df['target_prediction'],
mode='lines',
line=dict(color='rgba(28,53,94,1)'),
),
go.Scatter(
name='Upper Bound',
x=pred_df['col1'],
y=pred_df['target_q75'],
mode='lines',
marker=dict(color='rgba(0,147,201,0.3)'),
line=dict(width=0),
showlegend=False
),
go.Scatter(
name='Lower Bound',
x=pred_df['col1'],
y=pred_df['target_q25'],
marker=dict(color="#444"),
line=dict(width=0),
mode='lines',
fillcolor='rgba(0,147,201,0.3)',
fill='tonexty',
showlegend=False
)
])
fig.update_layout(
yaxis_title='y',
title='Mixture Density Network Prediction',
hovermode="x"
)
fig.show()
Non-Linear Example
df_train, df_valid, df_test, target_col = generate_non_linear_example()
Plot
fig = px.scatter(df_train, x="col1", y="target", title=r"$y = \pm x^2 + \epsilon$"+"\n"+r"$\epsilon\backsim\mathcal{N}(0,|x|)$")
fig.update_layout(
title={
'y':0.9,
'x':0.5,
'xanchor': 'center',
'yanchor': 'top'})
fig
px.histogram(df_train, x="target", title="Histogram")
Training a FeedForward
Define the Configs
epochs = 200
batch_size = 2048
steps_per_epoch = int((len(df_train)//batch_size)*0.9)
data_config = DataConfig(
target=['target'],
continuous_cols=['col1'],
categorical_cols=[],
# continuous_feature_transform="quantile_uniform"
)
trainer_config = TrainerConfig(
auto_lr_find=True, # Runs the LRFinder to automatically derive a learning rate
batch_size=batch_size,
max_epochs=epochs,
early_stopping_patience = 5,
gpus=-1, #index of the GPU to use. -1 means all available GPUs, None, means CPU
)
# optimizer_config = OptimizerConfig(lr_scheduler="OneCycleLR", lr_scheduler_params={"max_lr":0.005, "epochs": epochs, "steps_per_epoch":steps_per_epoch})
optimizer_config = OptimizerConfig(lr_scheduler="ReduceLROnPlateau", lr_scheduler_params={"patience":3})
model_config = CategoryEmbeddingModelConfig(
task="regression",
layers="8", # Number of nodes in each layer
activation="ReLU", # Activation between each layers
learning_rate=1e-3,
batch_norm_continuous_input=True,
use_batch_norm=True,
dropout=0.0,
embedding_dropout=0,
initialization="kaiming",
# target_range=[(df_train[col].min(),df_train[col].max()) for col in ['target']]
)
tabular_model = TabularModel(
data_config=data_config,
model_config=model_config,
optimizer_config=optimizer_config,
trainer_config=trainer_config
)
Training the Model
tabular_model.fit(train=df_train, validation=df_valid)
Predictions and Visualization
pred_df = tabular_model.predict(df_valid.sample(1000).sort_values("col1"))
pred_df.head()
fig = go.Figure([
go.Scatter(
name='Prediction',
x=pred_df['col1'],
y=pred_df['target_prediction'],
mode='lines',
line=dict(color='rgba(28,53,94,1)'),
),
go.Scatter(
name='Actual',
x=pred_df['col1'],
y=pred_df['target'],
mode='markers',
line=dict(color='rgba(60,180,229,1)'),
),
])
fig.update_layout(
yaxis_title='y',
title='Category Embedding Prediction',
hovermode="x"
)
fig.show()
Training the MDN
Define the Configs
epochs = 200
batch_size = 2048
steps_per_epoch = int((len(df_train)//batch_size)*0.9)
data_config = DataConfig(
target=['target'],
continuous_cols=['col1'],
categorical_cols=[],
# continuous_feature_transform="quantile_uniform"
)
trainer_config = TrainerConfig(
auto_lr_find=True, # Runs the LRFinder to automatically derive a learning rate
batch_size=batch_size,
max_epochs=epochs,
early_stopping_patience = 5,
early_stopping=None,
gpus=-1, #index of the GPU to use. -1 means all available GPUs, None, means CPU
)
# optimizer_config = OptimizerConfig(lr_scheduler="OneCycleLR", lr_scheduler_params={"max_lr":0.005, "epochs": epochs, "steps_per_epoch":steps_per_epoch})
optimizer_config = OptimizerConfig(lr_scheduler="ReduceLROnPlateau", lr_scheduler_params={"patience":3})
mdn_config = MixtureDensityHeadConfig(num_gaussian=2, weight_regularization=2)#, mu_bias_init=[0.3, 0.7])
model_config = CategoryEmbeddingMDNConfig(
task="regression",
mdn_config=mdn_config,
layers="8", # Number of nodes in each layer
activation="ReLU", # Activation between each layers
learning_rate=1e-3,
batch_norm_continuous_input=True,
use_batch_norm=True,
dropout=0.0,
embedding_dropout=0,
initialization="kaiming",
# target_range=[(df_train[col].min(),df_train[col].max()) for col in ['target']]
)
tabular_model = TabularModel(
data_config=data_config,
model_config=model_config,
optimizer_config=optimizer_config,
trainer_config=trainer_config
)
Training the Model
tabular_model.fit(train=df_train, validation=df_valid)
Predictions and Visualization
pred_df = tabular_model.predict(df_test, quantiles=[0.25,0.5,0.75], n_samples=100, ret_logits=True)
pred_df.head()
df = df_valid.sample(10000)
fig = go.Figure([
go.Scatter(
name='Ground Truth',
x=df['col1'],
y=df['target'],
mode='markers',
line=dict(color='rgba(153, 115, 142, 0.2)'),
),
go.Scatter(
name='Component 1',
x=pred_df['col1'],
y=pred_df['mu_0'],
mode='lines',
line=dict(color='rgba(36, 37, 130, 1)'),
),
go.Scatter(
name='Component 2',
x=pred_df['col1'],
y=pred_df['mu_1'],
mode='lines',
line=dict(color='rgba(246, 76, 114, 1)'),
),
go.Scatter(
name='Upper Bound 1',
x=pred_df['col1'],
y=pred_df['mu_0']+pred_df['sigma_0'],
mode='lines',
marker=dict(color='rgba(47, 47, 162, 0.5)'),
# line=dict(width=0),
showlegend=False
),
go.Scatter(
name='Lower Bound 1',
x=pred_df['col1'],
y=pred_df['mu_0']-pred_df['sigma_0'],
marker=dict(color="#444"),
line=dict(width=0),
mode='lines',
fillcolor='rgba(47, 47, 162, 0.5)',
fill='tonexty',
showlegend=False
),
go.Scatter(
name='Upper Bound 2',
x=pred_df['col1'],
y=pred_df['mu_1']+pred_df['sigma_1'],
mode='lines',
marker=dict(color='rgba(250, 152, 174, 0.5)'),
# line=dict(width=0),
showlegend=False
),
go.Scatter(
name='Lower Bound 2',
x=pred_df['col1'],
y=pred_df['mu_1']-pred_df['sigma_1'],
marker=dict(color="#444"),
line=dict(width=0),
mode='lines',
fillcolor='rgba(250, 152, 174, 0.5)',
fill='tonexty',
showlegend=False
),
])
fig.update_layout(
yaxis_title='y',
title='Mixture Density Network Prediction',
hovermode="x"
)
fig.show()
Gaussian Mixture
df_train, df_valid, df_test, target_col = generate_gaussian_mixture()
Plot
from IPython.display import display, Math, Latex
eqn = r'$\pi = \frac{sin(x) + 3xcos(x)}{max \left (sin(x) + 3xcos(x) \right )} \\ \\ g1 = 2x + 0.5 \epsilon \rightarrow \epsilon \backsim \mathcal{N}(0,1) \\ g2 = 8x + 0.5 \epsilon \rightarrow \epsilon \backsim \mathcal{N}(0,1) \\ p = Bernoulli(pi) \rightarrow \text{Samples one of two outcomes based on the value of } \pi \\ y = p \times g1 + (1-p) \times g2$'
display(Math(eqn))
fig = px.scatter(df_train, x="col1", y="target")
fig.update_layout(
title={
'y':0.9,
'x':0.5,
'xanchor': 'center',
'yanchor': 'top'})
fig
px.histogram(df_train, x="target", title="Histogram")
Training a FeedForward
Define the Configs
epochs = 200
batch_size = 2048
steps_per_epoch = int((len(df_train)//batch_size)*0.9)
data_config = DataConfig(
target=['target'],
continuous_cols=['col1'],
categorical_cols=[],
# continuous_feature_transform="quantile_uniform"
)
trainer_config = TrainerConfig(
auto_lr_find=True, # Runs the LRFinder to automatically derive a learning rate
batch_size=batch_size,
max_epochs=epochs,
early_stopping_patience = 5,
gpus=-1, #index of the GPU to use. -1 means all available GPUs, None, means CPU
)
# optimizer_config = OptimizerConfig(lr_scheduler="OneCycleLR", lr_scheduler_params={"max_lr":0.005, "epochs": epochs, "steps_per_epoch":steps_per_epoch})
optimizer_config = OptimizerConfig(lr_scheduler="ReduceLROnPlateau", lr_scheduler_params={"patience":3})
model_config = CategoryEmbeddingModelConfig(
task="regression",
layers="8", # Number of nodes in each layer
activation="ReLU", # Activation between each layers
learning_rate=1e-3,
batch_norm_continuous_input=True,
use_batch_norm=True,
dropout=0.0,
embedding_dropout=0,
initialization="kaiming",
# target_range=[(df_train[col].min(),df_train[col].max()) for col in ['target']]
)
tabular_model = TabularModel(
data_config=data_config,
model_config=model_config,
optimizer_config=optimizer_config,
trainer_config=trainer_config
)
Training the Model
tabular_model.fit(train=df_train, validation=df_valid)
Predictions and Visualization
pred_df = tabular_model.predict(df_valid.sample(1000).sort_values("col1"))
pred_df.head()
fig = go.Figure([
go.Scatter(
name='Prediction',
x=pred_df['col1'],
y=pred_df['target_prediction'],
mode='lines',
line=dict(color='rgba(28,53,94,1)'),
),
go.Scatter(
name='Actual',
x=pred_df['col1'],
y=pred_df['target'],
mode='markers',
line=dict(color='rgba(60,180,229,1)'),
),
])
fig.update_layout(
yaxis_title='y',
title='Category Embedding Network Prediction',
hovermode="x"
)
fig.show()
Training the MDN
Define the Configs
epochs = 200
batch_size = 2048
steps_per_epoch = int((len(df_train)//batch_size)*0.9)
data_config = DataConfig(
target=['target'],
continuous_cols=['col1'],
categorical_cols=[],
# continuous_feature_transform="quantile_uniform"
)
trainer_config = TrainerConfig(
auto_lr_find=True, # Runs the LRFinder to automatically derive a learning rate
batch_size=batch_size,
max_epochs=epochs,
early_stopping_patience = 5,
early_stopping=None,
gpus=-1, #index of the GPU to use. -1 means all available GPUs, None, means CPU
)
# optimizer_config = OptimizerConfig(lr_scheduler="OneCycleLR", lr_scheduler_params={"max_lr":0.005, "epochs": epochs, "steps_per_epoch":steps_per_epoch})
optimizer_config = OptimizerConfig(lr_scheduler="ReduceLROnPlateau", lr_scheduler_params={"patience":3})
mdn_config = MixtureDensityHeadConfig(num_gaussian=2,
weight_regularization=2,
# lambda_pi=10,
# lambda_sigma=1,
# mu_bias_init=[1,2]
)#, mu_bias_init=[0.3, 0.7])
model_config = CategoryEmbeddingMDNConfig(
task="regression",
mdn_config=mdn_config,
layers="8", # Number of nodes in each layer
activation="ReLU", # Activation between each layers
learning_rate=1e-3,
batch_norm_continuous_input=True,
use_batch_norm=True,
dropout=0.0,
embedding_dropout=0,
initialization="kaiming",
# target_range=[(df_train[col].min(),df_train[col].max()) for col in ['target']]
)
tabular_model = TabularModel(
data_config=data_config,
model_config=model_config,
optimizer_config=optimizer_config,
trainer_config=trainer_config
)
Training the Model
tabular_model.fit(train=df_train, validation=df_valid)
Predictions and Visualization
pred_df = tabular_model.predict(df_test, quantiles=[0.25,0.5,0.75], n_samples=100, ret_logits=True)
pred_df.head()
df = df_valid.sample(10000)
fig = go.Figure([
go.Scatter(
name='Ground Truth',
x=df['col1'],
y=df['target'],
mode='markers',
line=dict(color='rgba(153, 115, 142, 0.2)'),
),
go.Scatter(
name='Component 1',
x=pred_df['col1'],
y=pred_df['mu_0'],
mode='lines',
line=dict(color='rgba(90, 92, 237, 1)'),
),
go.Scatter(
name='Component 2',
x=pred_df['col1'],
y=pred_df['mu_1'],
mode='lines',
line=dict(color='rgba(246, 76, 114, 1)'),
),
go.Scatter(
name='Upper Bound 1',
x=pred_df['col1'],
y=pred_df['mu_0']+pred_df['sigma_0'],
mode='lines',
marker=dict(color='rgba(47, 47, 162, 0.5)'),
# line=dict(width=0),
showlegend=False
),
go.Scatter(
name='Lower Bound 1',
x=pred_df['col1'],
y=pred_df['mu_0']-pred_df['sigma_0'],
marker=dict(color="#444"),
line=dict(width=0),
mode='lines',
fillcolor='rgba(47, 47, 162, 0.5)',
fill='tonexty',
showlegend=False
),
go.Scatter(
name='Upper Bound 2',
x=pred_df['col1'],
y=pred_df['mu_1']+pred_df['sigma_1'],
mode='lines',
marker=dict(color='rgba(250, 152, 174, 0.5)'),
# line=dict(width=0),
showlegend=False
),
go.Scatter(
name='Lower Bound 2',
x=pred_df['col1'],
y=pred_df['mu_1']-pred_df['sigma_1'],
marker=dict(color="#444"),
line=dict(width=0),
mode='lines',
fillcolor='rgba(250, 152, 174, 0.5)',
fill='tonexty',
showlegend=False
),
])
fig.update_layout(
yaxis_title='y',
# yaxis_range=[0,1],
title='Mixture Density Network Prediction',
hovermode="x"
)
fig.show()
fig = go.Figure([
go.Scatter(
name='Ground Truth',
x=df['col1'],
y=df['target'],
mode='markers',
line=dict(color='rgba(153, 115, 142, 0.2)'),
),
go.Scatter(
name='Component 1',
x=pred_df['col1'],
y=pred_df['mu_0'],
mode='lines',
line=dict(color='rgba(90, 92, 237, 1)'),
),
go.Scatter(
name='Mixing Coefficient 1',
x=pred_df['col1'],
y=pred_df['pi_1'],
mode='lines',
line=dict(color='rgba(255, 216, 117, 1)'),
),
go.Scatter(
name='Upper Bound 1',
x=pred_df['col1'],
y=pred_df['mu_0']+pred_df['sigma_0'],
mode='lines',
marker=dict(color='rgba(47, 47, 162, 0.5)'),
# line=dict(width=0),
showlegend=False
),
go.Scatter(
name='Lower Bound 1',
x=pred_df['col1'],
y=pred_df['mu_0']-pred_df['sigma_0'],
marker=dict(color="#444"),
line=dict(width=0),
mode='lines',
fillcolor='rgba(47, 47, 162, 0.5)',
fill='tonexty',
showlegend=False
),
])
fig.update_layout(
yaxis_title='y',
# yaxis_range=[-0.2,1],
title='Mixture Density Network Prediction',
hovermode="x"
)
fig.show()
fig = go.Figure([
go.Scatter(
name='Ground Truth',
x=df['col1'],
y=df['target'],
mode='markers',
line=dict(color='rgba(153, 115, 142, 0.2)'),
),
go.Scatter(
name='Component 2',
x=pred_df['col1'],
y=pred_df['mu_1'],
mode='lines',
line=dict(color='rgba(246, 76, 114, 1)'),
),
go.Scatter(
name='Mixing Coefficient 2',
x=pred_df['col1'],
y=pred_df['pi_1'],
mode='lines',
line=dict(color='rgba(255, 216, 117, 1)'),
),
go.Scatter(
name='Upper Bound 2',
x=pred_df['col1'],
y=pred_df['mu_1']+pred_df['sigma_1'],
mode='lines',
marker=dict(color='rgba(250, 152, 174, 0.5)'),
# line=dict(width=0),
showlegend=False
),
go.Scatter(
name='Lower Bound 2',
x=pred_df['col1'],
y=pred_df['mu_1']-pred_df['sigma_1'],
marker=dict(color="#444"),
line=dict(width=0),
mode='lines',
fillcolor='rgba(250, 152, 174, 0.5)',
fill='tonexty',
showlegend=False
),
])
fig.update_layout(
yaxis_title='y',
# yaxis_range=[-0.2,1],
title='Mixture Density Network Prediction',
hovermode="x"
)
fig.show()
from scipy.special import softmax
pred_df[['pi_0','pi_1']] = softmax(pred_df[['pi_0','pi_1']].values, axis=-1)
px.line(pred_df, x='col1', y=['pi_0','pi_1'])
Boston Housing Dataset
# from sklearn.datasets import load_diabetes
# target_col = "target"
# X, y = load_diabetes(as_frame=True, return_X_y=True)
# cont_cols = X.columns.tolist()
# cat_cols = []
# X[target_col] = y
# df_train, df_test = train_test_split(X, test_size=0.2, random_state=42)
# df_train, df_valid = train_test_split(df_train, test_size=0.2, random_state=42)
from sklearn.datasets import load_boston
target_col = "target"
data = load_boston(return_X_y=False)
X = pd.DataFrame(data['data'], columns=data['feature_names'])
cont_cols = X.columns.tolist()
cat_cols = []
X[target_col] = y
df_train, df_test = train_test_split(X, test_size=0.2, random_state=42)
df_train, df_valid = train_test_split(df_train, test_size=0.2, random_state=42)
Plot
px.histogram(df_train, x="target", title="Histogram")
Training the MDN
Define the Configs
Let's use a nifty util function in the package to figure out the centers of the possible gaussian components. It internally runs a Kmeans and returns the cluster centroids and lets set that as the bias initialization
from pytorch_tabular.utils import get_gaussian_centers
mu_init = get_gaussian_centers(df_train[target_col], n_components=4)
epochs = 1000
batch_size = 2048
steps_per_epoch = int((len(df_train)//batch_size)*0.9)
data_config = DataConfig(
target=['target'],
continuous_cols=cont_cols,
categorical_cols=cat_cols,
# continuous_feature_transform="quantile_uniform"
)
trainer_config = TrainerConfig(
auto_lr_find=True, # Runs the LRFinder to automatically derive a learning rate
batch_size=batch_size,
max_epochs=epochs,
early_stopping_patience = 5,
# early_stopping=None,
gpus=-1, #index of the GPU to use. -1 means all available GPUs, None, means CPU
)
# optimizer_config = OptimizerConfig(lr_scheduler="OneCycleLR", lr_scheduler_params={"max_lr":0.005, "epochs": epochs, "steps_per_epoch":steps_per_epoch})
optimizer_config = OptimizerConfig(lr_scheduler="ReduceLROnPlateau", lr_scheduler_params={"patience":3})
mdn_config = MixtureDensityHeadConfig(num_gaussian=4,
weight_regularization=2,
# lambda_pi=10,
# lambda_sigma=1,
mu_bias_init=mu_init
)#, mu_bias_init=[0.3, 0.7])
model_config = CategoryEmbeddingMDNConfig(
task="regression",
mdn_config=mdn_config,
layers="200-100", # Number of nodes in each layer
activation="ReLU", # Activation between each layers
learning_rate=1e-3,
batch_norm_continuous_input=True,
use_batch_norm=True,
dropout=0.0,
embedding_dropout=0,
initialization="kaiming",
# target_range=[(df_train[col].min(),df_train[col].max()) for col in ['target']]
)
tabular_model = TabularModel(
data_config=data_config,
model_config=model_config,
optimizer_config=optimizer_config,
trainer_config=trainer_config
)
Training the Model
tabular_model.fit(train=df_train, validation=df_valid)
Predictions and Visualization
pred_df = tabular_model.predict(df_test, quantiles=[0.25,0.5,0.75], n_samples=100, ret_logits=True)
pred_df.head()
import scipy.stats as ss
def plot_normal(x_range, mu=0, sigma=1, cdf=False, **kwargs):
'''
Plots the normal distribution function for a given x range
If mu and sigma are not provided, standard normal is plotted
If cdf=True cumulative distribution is plotted
Passes any keyword arguments to matplotlib plot function
'''
x = x_range
if cdf:
y = ss.norm.cdf(x, mu, sigma)
else:
y = ss.norm.pdf(x, mu, sigma)
return x,y
import torch
from torch import nn
from torch.autograd import Variable
from torch.distributions import Categorical
def get_pdf(idx):
row = pred_df.loc[idx]
pi = torch.from_numpy(row[['pi_0','pi_1','pi_2','pi_3']].values).unsqueeze(0)
mu = torch.from_numpy(row[['mu_0','mu_1','mu_2','mu_3']].values).unsqueeze(0)
sigma = torch.from_numpy(row[['sigma_0','sigma_1','sigma_2','sigma_3']].values).unsqueeze(0)
softmax_pi = nn.functional.gumbel_softmax(pi, tau=1, dim=-1)
categorical = Categorical(softmax_pi)
pis = categorical.sample().unsqueeze(1)
sigma = sigma.gather(1, pis).item()
mu = mu.gather(1, pis).item()
x = np.linspace(row['target_prediction'].item()*0.1, row['target_prediction'].item()*1.9, 5000)
return plot_normal(x, mu=mu, sigma=sigma)
# idxs = pred_df[mask].sample(5).index
idxs = [2, 173, 412, 365]
traces = []
for idx in idxs:
x,y = get_pdf(idx)
trace = go.Scatter(
name=f'House_{idx}',
x=x,
y=y,
mode='lines',
# line=dict(color='rgba(246, 76, 114, 1)'),
)
traces.append(trace)
fig = go.Figure(traces)
fig.update_layout(
yaxis_title='P(MEDV)',
xaxis_title='MEDV',
# yaxis_range=[-0.2,1],
title='PDFs of different Houses',
hovermode="x"
)
fig.show()