I am trying to do cross validation on some dataset with Keras and Sklearn. The issue is I can not use multiprocessing for this task since when I pass any value but 1 as a parameter for n_jobs in RandomizedSearchCV it throws an error:
PicklingError: Could not pickle the task to send it to the workers.
Fun fact: everything worked just fine yesterday. My code below:
import keras
from keras import metrics
from keras import layers
from keras.wrappers.scikit_learn import KerasClassifier
from sklearn.model_selection import RandomizedSearchCV
def create_model(units=16,
dropout=0.3,
activation='relu',
optimizer='adam',
regularization=None):
model = keras.Sequential(
[
layers.Input(shape=(x_df.shape[1], )),
layers.Dense(units=units,
activation=activation,
kernel_regularizer=regularization),
layers.Dropout(dropout),
layers.Dense(units=units,
activation=activation,
kernel_regularizer=regularization),
layers.Dropout(dropout),
layers.Dense(len(names), activation='softmax')
], name='Sequential_model_vol_1'
)
model.compile(loss='sparse_categorical_crossentropy',
optimizer=optimizer,
metrics=[
metrics.SparseCategoricalAccuracy(name='accuracy'),
metrics.SparseTopKCategoricalAccuracy(k=3, name='top-3-accuracy')
]
)
return model
# dummy param_grid
param_grid = {
'units': [32, 64, 128],
'dropout': [0.2, 0.3, 0.5],
'activation': ['relu', 'sigmoid'],
}
# instantiate clf
cv_clf = RandomizedSearchCV(
estimator=KerasClassifier(create_model),
param_distributions=param_grid,
scoring='accuracy',
verbose=1,
n_jobs=-1,
cv=10
)
# fitting
cv_clf.fit(x_train, y_train, verbose=0)
As I said, when n_jobs=1 everything works fine but it takes too long to compute.