Hyperparameter Range Determination: My main challenge is in setting effective ranges for hyperparameters such as length_scale, noise_level, and sigma_0. Currently, for length_scale, I've used the minimum and maximum pairwise distances of my features. Are there recommended statistical approaches or empirical methods to determine effective search spaces for Gaussian Processes Classifier hyperparameters that explore a range of values effectively for the characteristics of my dataset whilst avoiding convergence issues?
import optuna
from sklearn.metrics import accuracy_score, balanced_accuracy_score, log_loss
from sklearn.model_selection import cross_val_score, StratifiedKFold
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import StandardScaler, MaxAbsScaler, MinMaxScaler, Normalizer, PowerTransformer, RobustScaler
from sklearn.base import BaseEstimator, TransformerMixin
from sklearn.gaussian_process import GaussianProcessClassifier
from sklearn.gaussian_process.kernels import RBF, DotProduct, Matern, WhiteKernel
import numpy as np
from scipy.spatial.distance import pdist
Assuming X is your data
max_spacing = np.max(pdist(X_train_selected_GPC)) # Maximum pairwise distance between points
min_spacing = np.min(pdist(X_train_selected_GPC)) # Minimum pairwise distance between points
Set bounds for length_scale
lower_bound = max(min_spacing, 1e-3)
upper_bound = max_spacing
Custom NoOp transformer for no preprocessing
class NoOpTransformer(BaseEstimator, TransformerMixin):
def fit(self, X, y=None):
return self
def transform(self, X):
return X
Function to get the preprocessor based on the trial suggestion
def get_preprocessor(preprocessor_name):
preprocessors = {
'standardscaler': StandardScaler(),
'maxabsscaler': MaxAbsScaler(),
'minmaxscaler': MinMaxScaler(),
'normalizer': Normalizer(),
'powertransformer': PowerTransformer(),
'robustscaler': RobustScaler(),
'none': NoOpTransformer() # No preprocessing
}
return preprocessors.get(preprocessor_name, None)
Define the objective function for the Optuna study
def objective(trial):
Select preprocessor
preprocessor = get_preprocessor(trial.suggest_categorical('preprocessor', ['standardscaler','maxabsscaler','minmaxscaler']))
GaussianProcessClassifier parameters
length_scale = trial.suggest_float('length_scale', lower_bound, upper_bound)
Adjusting noise_level range
noise_level = trial.suggest_float('noise_level', 1e-10, 1e-1) # Example range
sigma_0 = trial.suggest_float('sigma_0', 1e-10, 1e-1) # Example range
kernel_choice = trial.suggest_categorical('kernel', ['RBF', 'DotProduct', 'Matern'])
if kernel_choice == 'RBF':
kernel = RBF(length_scale) + WhiteKernel(noise_level=noise_level)
elif kernel_choice == 'DotProduct':
kernel = DotProduct() + WhiteKernel(noise_level=noise_level)
elif kernel_choice == 'Matern':
nu = trial.suggest_categorical('nu', [0.5, 1.5, 2.5])
kernel = Matern(length_scale=length_scale, nu=nu) + WhiteKernel(noise_level=noise_level)
param = {
'kernel': kernel,
'optimizer': trial.suggest_categorical('optimizer', ['fmin_l_bfgs_b', None]),
'n_restarts_optimizer': trial.suggest_int('n_restarts_optimizer', 0, 10),
'max_iter_predict': trial.suggest_int('max_iter_predict', 200, 500),
'warm_start': trial.suggest_categorical('warm_start', [True, False]),
'random_state': 16061989
}
if preprocessor:
pipeline = Pipeline([
('preprocessor', preprocessor),
('classifier', GaussianProcessClassifier(**param))
])
else:
pipeline = GaussianProcessClassifier(**param)
Perform cross-validation
skf = StratifiedKFold(n_splits=10, shuffle=False)
cv_scores = []
for train_index, val_index in skf.split(X_train_selected_GPC, y_train):
X_train_fold, y_train_fold = X_train_selected_GPC.iloc[train_index], y_train.iloc[train_index]
X_val_fold, y_val_fold = X_train_selected_GPC.iloc[val_index], y_train.iloc[val_index]
pipeline.fit(X_train_fold, y_train_fold)
y_val_pred = pipeline.predict(X_val_fold)
score = balanced_accuracy_score(y_val_fold, y_val_pred)
cv_scores.append(score)
cv_balanced_accuracy = np.mean(cv_scores)
Evaluate on the test set for both metrics
pipeline.fit(X_train_selected_GPC, y_train)
y_pred = pipeline.predict(X_test_selected_GPC)
y_pred_proba = pipeline.predict_proba(X_test_selected_GPC)
test_balanced_accuracy = balanced_accuracy_score(y_test, y_pred)
test_log_loss = log_loss(y_test, y_pred_proba)
Print the test set scores for this trial
print(f'Test Set Scores for Trial {trial.number}: Balanced Accuracy: {test_balanced_accuracy}, Log Loss: {test_log_loss}')
return cv_balanced_accuracy, test_log_loss
Create a multi-objective Optuna study and optimize
study = optuna.create_study(directions=['maximize', 'minimize'],sampler=optuna.samplers.NSGAIISampler())
Run the optimization
study.optimize(objective, n_trials=200)
Output the top 10 Pareto-optimal trials
print("Pareto-optimal trials:")
pareto_trials = study.best_trials
pareto_trials = sorted(pareto_trials, key=lambda t: t.values, reverse=True)[:10] # Sort and select top 10
for trial in pareto_trials:
print(f" Trial {trial.number}:")
print(f" Balanced Accuracy: {trial.values[0]}, Log Loss: {trial.values[1]}")
print(" Params: ")
for key, value in trial.params.items():
print(f" {key}: {value}")
This is an example trial output I get alongside these errors I mentioned:
[I 2024-01-29 20:32:40,110] A new study created in memory with name: no-name-5cfd3414-b60c-4696-a730-bb96b240202f
[I 2024-01-29 20:33:17,858] Trial 0 finished with values: [0.5844709411816231, 0.6273413690380433] and parameters: {'preprocessor': 'standardscaler', 'length_scale': 48006216817.82564, 'noise_level': 0.017815581206219875, 'sigma_0': 0.0835211263249409, 'kernel': 'DotProduct', 'optimizer': None, 'n_restarts_optimizer': 7, 'max_iter_predict': 306, 'warm_start': True}.
Test Set Scores for Trial 0: Balanced Accuracy: 0.6465721040189125, Log Loss: 0.6273413690380433
[I 2024-01-29 20:42:18,682] Trial 1 finished with values: [0.5, 0.6887973622821696] and parameters: {'preprocessor': 'maxabsscaler', 'length_scale': 120751788390.4435, 'noise_level': 0.029608216180959088, 'sigma_0': 0.03343849887746868, 'kernel': 'Matern', 'nu': 2.5, 'optimizer': 'fmin_l_bfgs_b', 'n_restarts_optimizer': 3, 'max_iter_predict': 370, 'warm_start': False}.
Test Set Scores for Trial 1: Balanced Accuracy: 0.5, Log Loss: 0.6887973622821696
/usr/local/lib/python3.10/dist-packages/sklearn/gaussian_process/_gpc.py:477: ConvergenceWarning: lbfgs failed to converge (status=2):
ABNORMAL_TERMINATION_IN_LNSRCH.
Increase the number of iterations (max_iter) or scale the data as shown in:
https://scikit-learn.org/stable/modules/preprocessing.html
_check_optimize_result("lbfgs", opt_res)
/usr/local/lib/python3.10/dist-packages/sklearn/gaussian_process/kernels.py:420: ConvergenceWarning: The optimal value found for dimension 0 of parameter k2__noise_level is close to the specified lower bound 1e-05. Decreasing the bound and calling fit again may find a better value.
warnings.warn(
[I 2024-01-29 21:06:26,838] Trial 2 finished with values: [0.5783523361539313, 0.6372584505619883] and parameters: {'preprocessor': 'standardscaler', 'length_scale': 208476269821.26263, 'noise_level': 1.1309097248553345e-05, 'sigma_0': 0.02855281577629066, 'kernel': 'Matern', 'nu': 1.5, 'optimizer': 'fmin_l_bfgs_b', 'n_restarts_optimizer': 2, 'max_iter_predict': 493, 'warm_start': False}.
Test Set Scores for Trial 2: Balanced Accuracy: 0.6157028550645571, Log Loss: 0.6372584505619883
/usr/local/lib/python3.10/dist-packages/sklearn/gaussian_process/kernels.py:420: ConvergenceWarning: The optimal value found for dimension 0 of parameter k2__noise_level is close to the specified lower bound 1e-05. Decreasing the bound and calling fit again may find a better value.
warnings.warn(
/usr/local/lib/python3.10/dist-packages/sklearn/gaussian_process/kernels.py:420: ConvergenceWarning: The optimal value found for dimension 0 of parameter k2__noise_level is close to the specified lower bound 1e-05. Decreasing the bound and calling fit again may find a better value.
warnings.warn(
/usr/local/lib/python3.10/dist-packages/sklearn/gaussian_process/kernels.py:420: ConvergenceWarning: The optimal value found for dimension 0 of parameter k2__noise_level is close to the specified lower bound 1e-05. Decreasing the bound and calling fit again may find a better value.
warnings.warn(
/usr/local/lib/python3.10/dist-packages/sklearn/gaussian_process/kernels.py:420: ConvergenceWarning: The optimal value found for dimension 0 of parameter k2__noise_level is close to the specified lower bound 1e-05. Decreasing the bound and calling fit again may find a better value.
warnings.warn(
/usr/local/lib/python3.10/dist-packages/sklearn/gaussian_process/kernels.py:420: ConvergenceWarning: The optimal value found for dimension 0 of parameter k2__noise_level is close to the specified lower bound 1e-05. Decreasing the bound and calling fit again may find a better value.
warnings.warn(
/usr/local/lib/python3.10/dist-packages/sklearn/gaussian_process/kernels.py:420: ConvergenceWarning: The optimal value found for dimension 0 of parameter k2__noise_level is close to the specified lower bound 1e-05. Decreasing the bound and calling fit again may find a better value.
warnings.warn(
/usr/local/lib/python3.10/dist-packages/sklearn/gaussian_process/kernels.py:420: ConvergenceWarning: The optimal value found for dimension 0 of parameter k2__noise_level is close to the specified lower bound 1e-05. Decreasing the bound and calling fit again may find a better value.
warnings.warn(
/usr/local/lib/python3.10/dist-packages/sklearn/gaussian_process/kernels.py:420: ConvergenceWarning: The optimal value found for dimension 0 of parameter k2__noise_level is close to the specified lower bound 1e-05. Decreasing the bound and calling fit again may find a better value.
warnings.warn(
[I 2024-01-29 21:21:45,312] Trial 3 finished with values: [0.5966696362908889, 0.6324449645490696] and parameters: {'preprocessor': 'maxabsscaler', 'length_scale': 4144161311.7087917, 'noise_level': 0.04605203545449352, 'sigma_0': 0.04982998930881419, 'kernel': 'RBF', 'optimizer': 'fmin_l_bfgs_b', 'n_restarts_optimizer': 6, 'max_iter_predict': 473, 'warm_start': True}.
Test Set Scores for Trial 3: Balanced Accuracy: 0.635933806146572, Log Loss: 0.6324449645490696
[I 2024-01-29 21:29:36,370] Trial 4 finished with values: [0.5718674606274597, 0.6291962990161305] and parameters: {'preprocessor': 'maxabsscaler', 'length_scale': 169036967838.10867, 'noise_level': 0.032468694910234734, 'sigma_0': 0.09812343775766556, 'kernel': 'DotProduct', 'optimizer': 'fmin_l_bfgs_b', 'n_restarts_optimizer': 1, 'max_iter_predict': 493, 'warm_start': True}.
Test Set Scores for Trial 4: Balanced Accuracy: 0.6402073104200764, Log Loss: 0.6291962990161305
/usr/local/lib/python3.10/dist-packages/sklearn/gaussian_process/kernels.py:420: ConvergenceWarning: The optimal value found for dimension 0 of parameter k1__sigma_0 is close to the specified lower bound 1e-05. Decreasing the bound and calling fit again may find a better value.
warnings.warn(
/usr/local/lib/python3.10/dist-packages/sklearn/gaussian_process/kernels.py:420: ConvergenceWarning: The optimal value found for dimension 0 of parameter k2__noise_level is close to the specified lower bound 1e-05. Decreasing the bound and calling fit again may find a better value.
warnings.warn(
[I 2024-01-29 21:45:01,856] Trial 5 finished with values: [0.5889861621654615, 0.6291962603259411] and parameters: {'preprocessor': 'maxabsscaler', 'length_scale': 291792013215.924, 'noise_level': 0.006619208977044221, 'sigma_0': 0.09635638303210495, 'kernel': 'DotProduct', 'optimizer': 'fmin_l_bfgs_b', 'n_restarts_optimizer': 2, 'max_iter_predict': 259, 'warm_start': True}.
Test Set Scores for Trial 5: Balanced Accuracy: 0.6402073104200764, Log Loss: 0.6291962603259411
[I 2024-01-29 21:45:19,967] Trial 6 finished with values: [0.5767175337434327, 0.6325990433241585] and parameters: {'preprocessor': 'minmaxscaler', 'length_scale': 131193762519.232, 'noise_level': 0.024946412358797826, 'sigma_0': 0.03718758842316881, 'kernel': 'DotProduct', 'optimizer': None, 'n_restarts_optimizer': 3, 'max_iter_predict': 241, 'warm_start': True}.
Test Set Scores for Trial 6: Balanced Accuracy: 0.6381160210947445, Log Loss: 0.6325990433241585
/usr/local/lib/python3.10/dist-packages/sklearn/gaussian_process/kernels.py:430: ConvergenceWarning: The optimal value found for dimension 0 of parameter k1__length_scale is close to the specified upper bound 100000.0. Increasing the bound and calling fit again may find a better value.
warnings.warn(
/usr/local/lib/python3.10/dist-packages/sklearn/gaussian_process/kernels.py:420: ConvergenceWarning: The optimal value found for dimension 0 of parameter k2__noise_level is close to the specified lower bound 1e-05. Decreasing the bound and calling fit again may find a better value.
warnings.warn(
/usr/local/lib/python3.10/dist-packages/sklearn/gaussian_process/kernels.py:430: ConvergenceWarning: The optimal value found for dimension 0 of parameter k1__length_scale is close to the specified upper bound 100000.0. Increasing the bound and calling fit again may find a better value.
warnings.warn(
/usr/local/lib/python3.10/dist-packages/sklearn/gaussian_process/kernels.py:420: ConvergenceWarning: The optimal value found for dimension 0 of parameter k2__noise_level is close to the specified lower bound 1e-05. Decreasing the bound and calling fit again may find a better value.
warnings.warn(
/usr/local/lib/python3.10/dist-packages/sklearn/gaussian_process/kernels.py:430: ConvergenceWarning: The optimal value found for dimension 0 of parameter k1__length_scale is close to the specified upper bound 100000.0. Increasing the bound and calling fit again may find a better value.
warnings.warn(
/usr/local/lib/python3.10/dist-packages/sklearn/gaussian_process/kernels.py:420: ConvergenceWarning: The optimal value found for dimension 0 of parameter k2__noise_level is close to the specified lower bound 1e-05. Decreasing the bound and calling fit again may find a better value.
warnings.warn(
/usr/local/lib/python3.10/dist-packages/sklearn/gaussian_process/kernels.py:430: ConvergenceWarning: The optimal value found for dimension 0 of parameter k1__length_scale is close to the specified upper bound 100000.0. Increasing the bound and calling fit again may find a better value.
warnings.warn(
/usr/local/lib/python3.10/dist-packages/sklearn/gaussian_process/kernels.py:420: ConvergenceWarning: The optimal value found for dimension 0 of parameter k2__noise_level is close to the specified lower bound 1e-05. Decreasing the bound and calling fit again may find a better value.
warnings.warn(
/usr/local/lib/python3.10/dist-packages/sklearn/gaussian_process/kernels.py:430: ConvergenceWarning: The optimal value found for dimension 0 of parameter k1__length_scale is close to the specified upper bound 100000.0. Increasing the bound and calling fit again may find a better value.
warnings.warn(
/usr/local/lib/python3.10/dist-packages/sklearn/gaussian_process/kernels.py:420: ConvergenceWarning: The optimal value found for dimension 0 of parameter k2__noise_level is close to the specified lower bound 1e-05. Decreasing the bound and calling fit again may find a better value.
warnings.warn(
[I 2024-01-29 21:52:52,493] Trial 7 finished with values: [0.5, 0.6889674690713428] and parameters: {'preprocessor': 'standardscaler', 'length_scale': 325781752428.19226, 'noise_level': 0.07205323391930595, 'sigma_0': 0.024985389180689932, 'kernel': 'RBF', 'optimizer': 'fmin_l_bfgs_b', 'n_restarts_optimizer': 3, 'max_iter_predict': 253, 'warm_start': True}.
Test Set Scores for Trial 7: Balanced Accuracy: 0.5, Log Loss: 0.6889674690713428
[I 2024-01-29 21:53:19,129] Trial 8 finished with values: [0.5, 0.6882635212941796] and parameters: {'preprocessor': 'minmaxscaler', 'length_scale': 149276801723.58057, 'noise_level': 0.011586879886197287, 'sigma_0': 0.050370308741337994, 'kernel': 'RBF', 'optimizer': None, 'n_restarts_optimizer': 0, 'max_iter_predict': 204, 'warm_start': False}.
Test Set Scores for Trial 8: Balanced Accuracy: 0.5, Log Loss: 0.6882635212941796
[I 2024-01-29 21:53:45,161] Trial 9 finished with values: [0.5, 0.6881882926594941] and parameters: {'preprocessor': 'minmaxscaler', 'length_scale': 253845618622.0915, 'noise_level': 0.05762401707538051, 'sigma_0': 0.04306879297808018, 'kernel': 'Matern', 'nu': 0.5, 'optimizer': None, 'n_restarts_optimizer': 8, 'max_iter_predict': 426, 'warm_start': False}.
Test Set Scores for Trial 9: Balanced Accuracy: 0.5, Log Loss: 0.6881882926594941
[I 2024-01-29 21:54:07,055] Trial 10 finished with values: [0.5, 0.6882704725970809] and parameters: {'preprocessor': 'minmaxscaler', 'length_scale': 311303775135.55255, 'noise_level': 0.007553664772048748, 'sigma_0': 0.03384982235223868, 'kernel': 'Matern', 'nu': 0.5, 'optimizer': None, 'n_restarts_optimizer': 2, 'max_iter_predict': 283, 'warm_start': True}.
Test Set Scores for Trial 10: Balanced Accuracy: 0.5, Log Loss: 0.6882704725970809
[I 2024-01-29 21:54:35,434] Trial 11 finished with values: [0.5, 0.688233161713861] and parameters: {'preprocessor': 'standardscaler', 'length_scale': 184478382360.91122, 'noise_level': 0.029625348382531086, 'sigma_0': 0.01349093189725387, 'kernel': 'Matern', 'nu': 1.5, 'optimizer': None, 'n_restarts_optimizer': 0, 'max_iter_predict': 209, 'warm_start': False}.
Test Set Scores for Trial 11: Balanced Accuracy: 0.5, Log Loss: 0.688233161713861
[I 2024-01-29 21:54:56,255] Trial 12 finished with values: [0.5, 0.6882300197133167] and parameters: {'preprocessor': 'maxabsscaler', 'length_scale': 292446001224.192, 'noise_level': 0.031532639730960974, 'sigma_0': 0.07393056739169207, 'kernel': 'RBF', 'optimizer': None, 'n_restarts_optimizer': 3, 'max_iter_predict': 270, 'warm_start': True}.
Test Set Scores for Trial 12: Balanced Accuracy: 0.5, Log Loss: 0.6882300197133167
/usr/local/lib/python3.10/dist-packages/sklearn/gaussian_process/kernels.py:420: ConvergenceWarning: The optimal value found for dimension 0 of parameter k2__noise_level is close to the specified lower bound 1e-05. Decreasing the bound and calling fit again may find a better value.
warnings.warn(
```