Skip to content
import warnings

from rich import print
from rich.pretty import pprint
from sklearn.model_selection import train_test_split

from pytorch_tabular.utils import make_mixed_dataset
data, cat_col_names, num_col_names = make_mixed_dataset(
    task="classification", n_samples=10000, n_features=20, n_categories=4
)

Importing the Library

from pytorch_tabular import TabularModel
from pytorch_tabular.models import (
    CategoryEmbeddingModelConfig,
)
from pytorch_tabular.config import DataConfig, OptimizerConfig, TrainerConfig, ExperimentConfig
from pytorch_tabular.models.common.heads import LinearHeadConfig

Hyperparameter Tuning

train, test = train_test_split(data, random_state=42)
train, val = train_test_split(train, random_state=42)
data_config = DataConfig(
    target=[
        "target"
    ],  # target should always be a list. Multi-targets are only supported for regression. Multi-Task Classification is not implemented
    continuous_cols=num_col_names,
    categorical_cols=cat_col_names,
)
trainer_config = TrainerConfig(
    batch_size=1024,
    max_epochs=100,
    early_stopping="valid_loss",  # Monitor valid_loss for early stopping
    early_stopping_mode="min",  # Set the mode as min because for val_loss, lower is better
    early_stopping_patience=5,  # No. of epochs of degradation training will wait before terminating
    checkpoints="valid_loss",  # Save best checkpoint monitoring val_loss
    load_best=True,  # After training, load the best checkpoint
    progress_bar="none",  # Turning off Progress bar
    trainer_kwargs=dict(enable_model_summary=False),  # Turning off model summary
)
optimizer_config = OptimizerConfig()

head_config = LinearHeadConfig(
    layers="", dropout=0.1, initialization="kaiming"  # No additional layer in head, just a mapping layer to output_dim
).__dict__  # Convert to dict to pass to the model config (OmegaConf doesn't accept objects)

model_config = CategoryEmbeddingModelConfig(
    task="classification",
    layers="1024-512-512",  # Number of nodes in each layer
    activation="LeakyReLU",  # Activation between each layers
    learning_rate=1e-3,
    head="LinearHead",  # Linear Head
    head_config=head_config,  # Linear Head Config
)

Note: For demonstration we are using the test split for tuning, but in real problems, please use a separate validation set for tuning purposes. Otherwise, you will be overfitting to the test set and have falsely high performance estimates.

Define the Hyperparameter Space

The hyperparameter space is defined as a dictionary. The keys are the hyperparameter names and the values are the list of values to be tried. The hyparameter names follow the below convention: - model_config__<hyperparameter_name> for model hyperparameters - model_config.head_config__<hyperparameter_name> for head hyperparameters - trainer_config__<hyperparameter_name> for trainer hyperparameters - optimizer_config__<hyperparameter_name> for optimizer hyperparameters - We can't use data module hyperparameters for tuning as the datamodule is already fitted and we can't change it's hyperparameters.

search_space = {
    "model_config__layers": ["1024-512-512", "1024-512-256", "1024-512-128"],
    "model_config.head_config__dropout": [0.1, 0.2, 0.3],
    "trainer_config__batch_size": [1024, 2048, 4096],
    "optimizer_config__optimizer": ["RAdam", "AdamW"],
}
# Any other parameter which is not part of the search_space, will be kept constant during the search
from pytorch_tabular.tabular_model_tuner import TabularModelTuner
tuner = TabularModelTuner(
    data_config=data_config,
    model_config=model_config,
    optimizer_config=optimizer_config,
    trainer_config=trainer_config
)
with warnings.catch_warnings():
    warnings.simplefilter("ignore")
    result = tuner.tune(
        train=train,
        validation=test,
        search_space=search_space,
        strategy="grid_search",
        # cv=5, # Uncomment this to do a 5 fold cross validation 
        metric="accuracy",
        mode="max",
        progress_bar=True,
        verbose=False # Make True if you want to log metrics and params each iteration
    )
Output()
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs

You are using a CUDA device ('NVIDIA GeForce RTX 3060 Laptop GPU') that has Tensor Cores. To properly utilize them, you should set `torch.set_float32_matmul_precision('medium' | 'high')` which will trade-off precision for performance. For more details, read https://pytorch.org/docs/stable/generated/torch.set_float32_matmul_precision.html#torch.set_float32_matmul_precision

LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]

LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]

GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs

LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]

LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]

GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]

LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]

GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]

LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]

GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs

LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]

LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]

GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]

LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]

GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]

LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]

GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]

LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]

GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs

LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]

LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]

GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs

LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]

LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]

GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]

LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]

GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs

LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]

LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]

GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]

LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]

GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]

LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]

GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs

LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]

LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]

GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs

LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]

LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]

GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs

LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]

LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]

GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs

LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]

LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]

GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]

LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]

GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs

LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]

LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]

GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]

LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]

GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]

LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]

GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs

LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]

LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]

GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]

LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]

GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs

LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]

LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]

GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs

LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]

LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]

GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs

LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]

LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]

GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs

LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]

LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]

GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]

LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]

GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs

LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]

LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]

GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs

LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]

LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]

GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]

LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]

GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs

LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]

LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]

GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs

LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]

LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]

GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs

LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]

LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]

GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]

LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]

GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs

LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]

LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]

GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs

LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]

LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]

GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs

LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]

LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]

GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]

LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]

GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs

LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]

LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]

GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs

LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]

LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]

GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]

LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]

GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]

LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]

GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs

LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]

LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]

GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]

LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]

GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs

LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]

LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]

GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs

LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]

LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]

GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs

LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]

LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]

GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs

LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]

LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]

GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]

LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]

GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs

LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]

LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]

GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]

LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]

GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]

LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]



Result is a namedtuple with trials_df, best_params, best_score and best_model\

  • trials_df: A dataframe with all the hyperparameter combinations and their corresponding scores
  • best_params: The best hyperparameter combination
  • best_score: The best score
  • best_model: If return_best_model is True, return best_model otherwise return None
result.trials_df.head()
trial_id model_config.head_config__dropout model_config__layers optimizer_config__optimizer trainer_config__batch_size loss accuracy
0 0 0.1 1024-512-512 RAdam 1024 0.208672 0.9212
1 1 0.1 1024-512-512 RAdam 2048 0.188086 0.9324
2 2 0.1 1024-512-512 RAdam 4096 0.198695 0.9272
3 3 0.1 1024-512-512 AdamW 1024 0.193950 0.9336
4 4 0.1 1024-512-512 AdamW 2048 0.206217 0.9272
print("Best Score: ", result.best_score)
pprint(result.best_params)
Best Score:  0.9368000030517578
{
'model_config.head_config__dropout': 0.1,
'model_config__layers': '1024-512-512',
'optimizer_config__optimizer': 'AdamW',
'trainer_config__batch_size': 4096,
'loss': 0.19274771213531494
}

Note: For demonstration we are using the test split for tuning, but in real problems, please use a separate validation set for tuning purposes. Otherwise, you will be overfitting to the test set and have falsely high performance estimates.

Define the Hyperparameter Space

The hyperparameter space is defined as a dictionary. The keys are the hyperparameter names and the values are the list of values for categorical and distributions for continuous. The hyparameter names follow the below convention: - model_config__<hyperparameter_name> for model hyperparameters - model_config.head_config__<hyperparameter_name> for head hyperparameters - trainer_config__<hyperparameter_name> for trainer hyperparameters - optimizer_config__<hyperparameter_name> for optimizer hyperparameters - We can't use data module hyperparameters for tuning as the datamodule is already fitted and we can't change it's hyperparameters.

from scipy.stats import uniform, randint, loguniform
search_space = {
    "model_config__layers": ["1024-512-512", "1024-512-256", "1024-512-128"],
    "model_config.head_config__dropout": uniform(0, 0.5),
    "trainer_config__batch_size": randint(128, 2048),
    "optimizer_config__optimizer": ["RAdam", "AdamW"],
}
# Any other parameter which is not part of the search_space, will be kept constant during the search
from pytorch_tabular.tabular_model_tuner import TabularModelTuner
tuner = TabularModelTuner(
    data_config=data_config,
    model_config=model_config,
    optimizer_config=optimizer_config,
    trainer_config=trainer_config
)
with warnings.catch_warnings():
    warnings.simplefilter("ignore")
    result = tuner.tune(
        train=train,
        validation=test, # Need not give validation is we use CV
        search_space=search_space,
        n_trials=10,
        strategy="random_search",
        # cv=5, # Uncomment this to do a 5 fold cross validation 
        metric="accuracy",
        mode="max",
        progress_bar=True,
        verbose=False # Make True if you want to log metrics and params each iteration
    )
Output()
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]

LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]

GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]

LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]

GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs

LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]

LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]

GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs

LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]

LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]

GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]

LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]

GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs

LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]

LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]

GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs

LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]

LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]

GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs

LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]

LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]

GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs

LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]

LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]

GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]

LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]



Result is a namedtuple with trials_df, best_params, best_score and best_model\

  • trials_df: A dataframe with all the hyperparameter combinations and their corresponding scores
  • best_params: The best hyperparameter combination
  • best_score: The best score
  • best_model: If return_best_model is True, return best_model otherwise return None
result.trials_df.head()
trial_id model_config.head_config__dropout model_config__layers optimizer_config__optimizer trainer_config__batch_size loss accuracy
0 0 0.187270 1024-512-512 RAdam 1258 0.214784 0.9216
1 1 0.389846 1024-512-512 RAdam 249 0.211062 0.9236
2 2 0.077997 1024-512-128 RAdam 215 0.200881 0.9252
3 3 0.166854 1024-512-128 AdamW 1460 0.202277 0.9300
4 4 0.484955 1024-512-256 AdamW 513 0.197455 0.9320
print("Best Score: ", result.best_score)
pprint(result.best_params)
Best Score:  0.9319999814033508
{
'model_config.head_config__dropout': 0.48495492608099716,
'model_config__layers': '1024-512-256',
'optimizer_config__optimizer': 'AdamW',
'trainer_config__batch_size': 513,
'loss': 0.1974552422761917
}