Advice on Gaussian Process Classifier optimisation best practises?

Question

Hyperparameter Range Determination: My main challenge is in setting effective ranges for hyperparameters such as length_scale, noise_level, and sigma_0. Currently, for length_scale, I've used the minimum and maximum pairwise distances of my features. Are there recommended statistical approaches or empirical methods to determine effective search spaces for Gaussian Processes Classifier hyperparameters that explore a range of values effectively for the characteristics of my dataset whilst avoiding convergence issues?

import optuna
from sklearn.metrics import accuracy_score, balanced_accuracy_score, log_loss
from sklearn.model_selection import cross_val_score, StratifiedKFold
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import StandardScaler, MaxAbsScaler, MinMaxScaler, Normalizer, PowerTransformer, RobustScaler
from sklearn.base import BaseEstimator, TransformerMixin
from sklearn.gaussian_process import GaussianProcessClassifier
from sklearn.gaussian_process.kernels import RBF, DotProduct, Matern, WhiteKernel
import numpy as np
from scipy.spatial.distance import pdist
Assuming X is your data
max_spacing = np.max(pdist(X_train_selected_GPC))  # Maximum pairwise distance between points
min_spacing = np.min(pdist(X_train_selected_GPC))  # Minimum pairwise distance between points
Set bounds for length_scale
lower_bound = max(min_spacing, 1e-3)
upper_bound = max_spacing
Custom NoOp transformer for no preprocessing
class NoOpTransformer(BaseEstimator, TransformerMixin):
   def fit(self, X, y=None):
       return self
def transform(self, X):
       return X
Function to get the preprocessor based on the trial suggestion
def get_preprocessor(preprocessor_name):
   preprocessors = {
       'standardscaler': StandardScaler(),
       'maxabsscaler': MaxAbsScaler(),
       'minmaxscaler': MinMaxScaler(),
       'normalizer': Normalizer(),
       'powertransformer': PowerTransformer(),
       'robustscaler': RobustScaler(),
       'none': NoOpTransformer()  # No preprocessing
   }
   return preprocessors.get(preprocessor_name, None)
Define the objective function for the Optuna study
def objective(trial):
Select preprocessor
preprocessor = get_preprocessor(trial.suggest_categorical('preprocessor', ['standardscaler','maxabsscaler','minmaxscaler']))
GaussianProcessClassifier parameters
length_scale = trial.suggest_float('length_scale', lower_bound, upper_bound)
Adjusting noise_level range
noise_level = trial.suggest_float('noise_level', 1e-10, 1e-1)  # Example range
   sigma_0 = trial.suggest_float('sigma_0', 1e-10, 1e-1)          # Example range
   kernel_choice = trial.suggest_categorical('kernel', ['RBF', 'DotProduct', 'Matern'])
if kernel_choice == 'RBF':
       kernel = RBF(length_scale) + WhiteKernel(noise_level=noise_level)
   elif kernel_choice == 'DotProduct':
       kernel = DotProduct() + WhiteKernel(noise_level=noise_level)
   elif kernel_choice == 'Matern':
       nu = trial.suggest_categorical('nu', [0.5, 1.5, 2.5])
       kernel = Matern(length_scale=length_scale, nu=nu) + WhiteKernel(noise_level=noise_level)
param = {
       'kernel': kernel,
       'optimizer': trial.suggest_categorical('optimizer', ['fmin_l_bfgs_b', None]),
       'n_restarts_optimizer': trial.suggest_int('n_restarts_optimizer', 0, 10),
       'max_iter_predict': trial.suggest_int('max_iter_predict', 200, 500),
       'warm_start': trial.suggest_categorical('warm_start', [True, False]),
       'random_state': 16061989
   }
   if preprocessor:
       pipeline = Pipeline([
           ('preprocessor', preprocessor),
           ('classifier', GaussianProcessClassifier(**param))
       ])
   else:
       pipeline = GaussianProcessClassifier(**param)
Perform cross-validation
skf = StratifiedKFold(n_splits=10, shuffle=False)
   cv_scores = []
   for train_index, val_index in skf.split(X_train_selected_GPC, y_train):
       X_train_fold, y_train_fold = X_train_selected_GPC.iloc[train_index], y_train.iloc[train_index]
       X_val_fold, y_val_fold = X_train_selected_GPC.iloc[val_index], y_train.iloc[val_index]
   pipeline.fit(X_train_fold, y_train_fold)
   y_val_pred = pipeline.predict(X_val_fold)
   score = balanced_accuracy_score(y_val_fold, y_val_pred)
   cv_scores.append(score)


cv_balanced_accuracy = np.mean(cv_scores)
Evaluate on the test set for both metrics
pipeline.fit(X_train_selected_GPC, y_train)
   y_pred = pipeline.predict(X_test_selected_GPC)
   y_pred_proba = pipeline.predict_proba(X_test_selected_GPC)
   test_balanced_accuracy = balanced_accuracy_score(y_test, y_pred)
   test_log_loss = log_loss(y_test, y_pred_proba)
Print the test set scores for this trial
print(f'Test Set Scores for Trial {trial.number}: Balanced Accuracy: {test_balanced_accuracy}, Log Loss: {test_log_loss}')
   return cv_balanced_accuracy, test_log_loss
Create a multi-objective Optuna study and optimize
study = optuna.create_study(directions=['maximize', 'minimize'],sampler=optuna.samplers.NSGAIISampler())
Run the optimization
study.optimize(objective, n_trials=200)
Output the top 10 Pareto-optimal trials
print("Pareto-optimal trials:")
pareto_trials = study.best_trials
pareto_trials = sorted(pareto_trials, key=lambda t: t.values, reverse=True)[:10]  # Sort and select top 10
for trial in pareto_trials:
   print(f"  Trial {trial.number}:")
   print(f"    Balanced Accuracy: {trial.values[0]}, Log Loss: {trial.values[1]}")
   print("    Params: ")
   for key, value in trial.params.items():
       print(f"      {key}: {value}")

This is an example trial output I get alongside these errors I mentioned:

[I 2024-01-29 20:32:40,110] A new study created in memory with name: no-name-5cfd3414-b60c-4696-a730-bb96b240202f
[I 2024-01-29 20:33:17,858] Trial 0 finished with values: [0.5844709411816231, 0.6273413690380433] and parameters: {'preprocessor': 'standardscaler', 'length_scale': 48006216817.82564, 'noise_level': 0.017815581206219875, 'sigma_0': 0.0835211263249409, 'kernel': 'DotProduct', 'optimizer': None, 'n_restarts_optimizer': 7, 'max_iter_predict': 306, 'warm_start': True}. 
Test Set Scores for Trial 0: Balanced Accuracy: 0.6465721040189125, Log Loss: 0.6273413690380433
[I 2024-01-29 20:42:18,682] Trial 1 finished with values: [0.5, 0.6887973622821696] and parameters: {'preprocessor': 'maxabsscaler', 'length_scale': 120751788390.4435, 'noise_level': 0.029608216180959088, 'sigma_0': 0.03343849887746868, 'kernel': 'Matern', 'nu': 2.5, 'optimizer': 'fmin_l_bfgs_b', 'n_restarts_optimizer': 3, 'max_iter_predict': 370, 'warm_start': False}. 
Test Set Scores for Trial 1: Balanced Accuracy: 0.5, Log Loss: 0.6887973622821696
/usr/local/lib/python3.10/dist-packages/sklearn/gaussian_process/_gpc.py:477: ConvergenceWarning: lbfgs failed to converge (status=2):
ABNORMAL_TERMINATION_IN_LNSRCH.
Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
  _check_optimize_result("lbfgs", opt_res)
/usr/local/lib/python3.10/dist-packages/sklearn/gaussian_process/kernels.py:420: ConvergenceWarning: The optimal value found for dimension 0 of parameter k2__noise_level is close to the specified lower bound 1e-05. Decreasing the bound and calling fit again may find a better value.
  warnings.warn(
[I 2024-01-29 21:06:26,838] Trial 2 finished with values: [0.5783523361539313, 0.6372584505619883] and parameters: {'preprocessor': 'standardscaler', 'length_scale': 208476269821.26263, 'noise_level': 1.1309097248553345e-05, 'sigma_0': 0.02855281577629066, 'kernel': 'Matern', 'nu': 1.5, 'optimizer': 'fmin_l_bfgs_b', 'n_restarts_optimizer': 2, 'max_iter_predict': 493, 'warm_start': False}. 
Test Set Scores for Trial 2: Balanced Accuracy: 0.6157028550645571, Log Loss: 0.6372584505619883
/usr/local/lib/python3.10/dist-packages/sklearn/gaussian_process/kernels.py:420: ConvergenceWarning: The optimal value found for dimension 0 of parameter k2__noise_level is close to the specified lower bound 1e-05. Decreasing the bound and calling fit again may find a better value.
  warnings.warn(
/usr/local/lib/python3.10/dist-packages/sklearn/gaussian_process/kernels.py:420: ConvergenceWarning: The optimal value found for dimension 0 of parameter k2__noise_level is close to the specified lower bound 1e-05. Decreasing the bound and calling fit again may find a better value.
  warnings.warn(
/usr/local/lib/python3.10/dist-packages/sklearn/gaussian_process/kernels.py:420: ConvergenceWarning: The optimal value found for dimension 0 of parameter k2__noise_level is close to the specified lower bound 1e-05. Decreasing the bound and calling fit again may find a better value.
  warnings.warn(
/usr/local/lib/python3.10/dist-packages/sklearn/gaussian_process/kernels.py:420: ConvergenceWarning: The optimal value found for dimension 0 of parameter k2__noise_level is close to the specified lower bound 1e-05. Decreasing the bound and calling fit again may find a better value.
  warnings.warn(
/usr/local/lib/python3.10/dist-packages/sklearn/gaussian_process/kernels.py:420: ConvergenceWarning: The optimal value found for dimension 0 of parameter k2__noise_level is close to the specified lower bound 1e-05. Decreasing the bound and calling fit again may find a better value.
  warnings.warn(
/usr/local/lib/python3.10/dist-packages/sklearn/gaussian_process/kernels.py:420: ConvergenceWarning: The optimal value found for dimension 0 of parameter k2__noise_level is close to the specified lower bound 1e-05. Decreasing the bound and calling fit again may find a better value.
  warnings.warn(
/usr/local/lib/python3.10/dist-packages/sklearn/gaussian_process/kernels.py:420: ConvergenceWarning: The optimal value found for dimension 0 of parameter k2__noise_level is close to the specified lower bound 1e-05. Decreasing the bound and calling fit again may find a better value.
  warnings.warn(
/usr/local/lib/python3.10/dist-packages/sklearn/gaussian_process/kernels.py:420: ConvergenceWarning: The optimal value found for dimension 0 of parameter k2__noise_level is close to the specified lower bound 1e-05. Decreasing the bound and calling fit again may find a better value.
  warnings.warn(
[I 2024-01-29 21:21:45,312] Trial 3 finished with values: [0.5966696362908889, 0.6324449645490696] and parameters: {'preprocessor': 'maxabsscaler', 'length_scale': 4144161311.7087917, 'noise_level': 0.04605203545449352, 'sigma_0': 0.04982998930881419, 'kernel': 'RBF', 'optimizer': 'fmin_l_bfgs_b', 'n_restarts_optimizer': 6, 'max_iter_predict': 473, 'warm_start': True}. 
Test Set Scores for Trial 3: Balanced Accuracy: 0.635933806146572, Log Loss: 0.6324449645490696
[I 2024-01-29 21:29:36,370] Trial 4 finished with values: [0.5718674606274597, 0.6291962990161305] and parameters: {'preprocessor': 'maxabsscaler', 'length_scale': 169036967838.10867, 'noise_level': 0.032468694910234734, 'sigma_0': 0.09812343775766556, 'kernel': 'DotProduct', 'optimizer': 'fmin_l_bfgs_b', 'n_restarts_optimizer': 1, 'max_iter_predict': 493, 'warm_start': True}. 
Test Set Scores for Trial 4: Balanced Accuracy: 0.6402073104200764, Log Loss: 0.6291962990161305
/usr/local/lib/python3.10/dist-packages/sklearn/gaussian_process/kernels.py:420: ConvergenceWarning: The optimal value found for dimension 0 of parameter k1__sigma_0 is close to the specified lower bound 1e-05. Decreasing the bound and calling fit again may find a better value.
  warnings.warn(
/usr/local/lib/python3.10/dist-packages/sklearn/gaussian_process/kernels.py:420: ConvergenceWarning: The optimal value found for dimension 0 of parameter k2__noise_level is close to the specified lower bound 1e-05. Decreasing the bound and calling fit again may find a better value.
  warnings.warn(
[I 2024-01-29 21:45:01,856] Trial 5 finished with values: [0.5889861621654615, 0.6291962603259411] and parameters: {'preprocessor': 'maxabsscaler', 'length_scale': 291792013215.924, 'noise_level': 0.006619208977044221, 'sigma_0': 0.09635638303210495, 'kernel': 'DotProduct', 'optimizer': 'fmin_l_bfgs_b', 'n_restarts_optimizer': 2, 'max_iter_predict': 259, 'warm_start': True}. 
Test Set Scores for Trial 5: Balanced Accuracy: 0.6402073104200764, Log Loss: 0.6291962603259411
[I 2024-01-29 21:45:19,967] Trial 6 finished with values: [0.5767175337434327, 0.6325990433241585] and parameters: {'preprocessor': 'minmaxscaler', 'length_scale': 131193762519.232, 'noise_level': 0.024946412358797826, 'sigma_0': 0.03718758842316881, 'kernel': 'DotProduct', 'optimizer': None, 'n_restarts_optimizer': 3, 'max_iter_predict': 241, 'warm_start': True}. 
Test Set Scores for Trial 6: Balanced Accuracy: 0.6381160210947445, Log Loss: 0.6325990433241585
/usr/local/lib/python3.10/dist-packages/sklearn/gaussian_process/kernels.py:430: ConvergenceWarning: The optimal value found for dimension 0 of parameter k1__length_scale is close to the specified upper bound 100000.0. Increasing the bound and calling fit again may find a better value.
  warnings.warn(
/usr/local/lib/python3.10/dist-packages/sklearn/gaussian_process/kernels.py:420: ConvergenceWarning: The optimal value found for dimension 0 of parameter k2__noise_level is close to the specified lower bound 1e-05. Decreasing the bound and calling fit again may find a better value.
  warnings.warn(
/usr/local/lib/python3.10/dist-packages/sklearn/gaussian_process/kernels.py:430: ConvergenceWarning: The optimal value found for dimension 0 of parameter k1__length_scale is close to the specified upper bound 100000.0. Increasing the bound and calling fit again may find a better value.
  warnings.warn(
/usr/local/lib/python3.10/dist-packages/sklearn/gaussian_process/kernels.py:420: ConvergenceWarning: The optimal value found for dimension 0 of parameter k2__noise_level is close to the specified lower bound 1e-05. Decreasing the bound and calling fit again may find a better value.
  warnings.warn(
/usr/local/lib/python3.10/dist-packages/sklearn/gaussian_process/kernels.py:430: ConvergenceWarning: The optimal value found for dimension 0 of parameter k1__length_scale is close to the specified upper bound 100000.0. Increasing the bound and calling fit again may find a better value.
  warnings.warn(
/usr/local/lib/python3.10/dist-packages/sklearn/gaussian_process/kernels.py:420: ConvergenceWarning: The optimal value found for dimension 0 of parameter k2__noise_level is close to the specified lower bound 1e-05. Decreasing the bound and calling fit again may find a better value.
  warnings.warn(
/usr/local/lib/python3.10/dist-packages/sklearn/gaussian_process/kernels.py:430: ConvergenceWarning: The optimal value found for dimension 0 of parameter k1__length_scale is close to the specified upper bound 100000.0. Increasing the bound and calling fit again may find a better value.
  warnings.warn(
/usr/local/lib/python3.10/dist-packages/sklearn/gaussian_process/kernels.py:420: ConvergenceWarning: The optimal value found for dimension 0 of parameter k2__noise_level is close to the specified lower bound 1e-05. Decreasing the bound and calling fit again may find a better value.
  warnings.warn(
/usr/local/lib/python3.10/dist-packages/sklearn/gaussian_process/kernels.py:430: ConvergenceWarning: The optimal value found for dimension 0 of parameter k1__length_scale is close to the specified upper bound 100000.0. Increasing the bound and calling fit again may find a better value.
  warnings.warn(
/usr/local/lib/python3.10/dist-packages/sklearn/gaussian_process/kernels.py:420: ConvergenceWarning: The optimal value found for dimension 0 of parameter k2__noise_level is close to the specified lower bound 1e-05. Decreasing the bound and calling fit again may find a better value.
  warnings.warn(
[I 2024-01-29 21:52:52,493] Trial 7 finished with values: [0.5, 0.6889674690713428] and parameters: {'preprocessor': 'standardscaler', 'length_scale': 325781752428.19226, 'noise_level': 0.07205323391930595, 'sigma_0': 0.024985389180689932, 'kernel': 'RBF', 'optimizer': 'fmin_l_bfgs_b', 'n_restarts_optimizer': 3, 'max_iter_predict': 253, 'warm_start': True}. 
Test Set Scores for Trial 7: Balanced Accuracy: 0.5, Log Loss: 0.6889674690713428
[I 2024-01-29 21:53:19,129] Trial 8 finished with values: [0.5, 0.6882635212941796] and parameters: {'preprocessor': 'minmaxscaler', 'length_scale': 149276801723.58057, 'noise_level': 0.011586879886197287, 'sigma_0': 0.050370308741337994, 'kernel': 'RBF', 'optimizer': None, 'n_restarts_optimizer': 0, 'max_iter_predict': 204, 'warm_start': False}. 
Test Set Scores for Trial 8: Balanced Accuracy: 0.5, Log Loss: 0.6882635212941796
[I 2024-01-29 21:53:45,161] Trial 9 finished with values: [0.5, 0.6881882926594941] and parameters: {'preprocessor': 'minmaxscaler', 'length_scale': 253845618622.0915, 'noise_level': 0.05762401707538051, 'sigma_0': 0.04306879297808018, 'kernel': 'Matern', 'nu': 0.5, 'optimizer': None, 'n_restarts_optimizer': 8, 'max_iter_predict': 426, 'warm_start': False}. 
Test Set Scores for Trial 9: Balanced Accuracy: 0.5, Log Loss: 0.6881882926594941
[I 2024-01-29 21:54:07,055] Trial 10 finished with values: [0.5, 0.6882704725970809] and parameters: {'preprocessor': 'minmaxscaler', 'length_scale': 311303775135.55255, 'noise_level': 0.007553664772048748, 'sigma_0': 0.03384982235223868, 'kernel': 'Matern', 'nu': 0.5, 'optimizer': None, 'n_restarts_optimizer': 2, 'max_iter_predict': 283, 'warm_start': True}. 
Test Set Scores for Trial 10: Balanced Accuracy: 0.5, Log Loss: 0.6882704725970809
[I 2024-01-29 21:54:35,434] Trial 11 finished with values: [0.5, 0.688233161713861] and parameters: {'preprocessor': 'standardscaler', 'length_scale': 184478382360.91122, 'noise_level': 0.029625348382531086, 'sigma_0': 0.01349093189725387, 'kernel': 'Matern', 'nu': 1.5, 'optimizer': None, 'n_restarts_optimizer': 0, 'max_iter_predict': 209, 'warm_start': False}. 
Test Set Scores for Trial 11: Balanced Accuracy: 0.5, Log Loss: 0.688233161713861
[I 2024-01-29 21:54:56,255] Trial 12 finished with values: [0.5, 0.6882300197133167] and parameters: {'preprocessor': 'maxabsscaler', 'length_scale': 292446001224.192, 'noise_level': 0.031532639730960974, 'sigma_0': 0.07393056739169207, 'kernel': 'RBF', 'optimizer': None, 'n_restarts_optimizer': 3, 'max_iter_predict': 270, 'warm_start': True}. 
Test Set Scores for Trial 12: Balanced Accuracy: 0.5, Log Loss: 0.6882300197133167
/usr/local/lib/python3.10/dist-packages/sklearn/gaussian_process/kernels.py:420: ConvergenceWarning: The optimal value found for dimension 0 of parameter k2__noise_level is close to the specified lower bound 1e-05. Decreasing the bound and calling fit again may find a better value.
  warnings.warn(
```

Here's 200 threads about how to choose kernel parameters. https://stats.stackexchange.com/search?q=parameters+kernel+answers%3A1+score%3A2 You can find many more with a search. — Sycorax, Jan 30 '24 at 17:31

Advice on Gaussian Process Classifier optimisation best practises?

Assuming X is your data

Set bounds for length_scale

Custom NoOp transformer for no preprocessing

Function to get the preprocessor based on the trial suggestion

Define the objective function for the Optuna study

Select preprocessor

GaussianProcessClassifier parameters

Adjusting noise_level range

Perform cross-validation

Evaluate on the test set for both metrics

Print the test set scores for this trial

Create a multi-objective Optuna study and optimize

Run the optimization

Output the top 10 Pareto-optimal trials

0 Answers0