I'm trying to implement Neural Collaborative Filtering recommender system using Keras, the dataset I'm using is movielens-small. Whatever I do to hyperparameters or network, when training, the training loss(MAE) decreases nicely but validation loss (always starts lower than train loss?) stays in place or slightly rises.
In a few examples of implementations of this rec system validation loss looks similar:
https://keras.io/examples/structured_data/collaborative_filtering_movielens/

What I'm asking about is how it is possible and does it make any sense that validation loss gets lower than train loss at epoch 1 and doesn't decrease anymore after that.
Here is a code of my net:
# https://files.grouplens.org/datasets/movielens/ml-latest-small.zip
import numpy as np
import pandas as pd
from sklearn.preprocessing import MinMaxScaler
from sklearn.metrics import mean_absolute_error
from sklearn.model_selection import train_test_split
from tensorflow.keras.models import Model, Sequential
from tensorflow.keras.layers import Embedding, Flatten, Input, Dot, Dropout, Dense, BatchNormalization, Concatenate
from tensorflow.keras.optimizers import Adam
from tensorflow import keras
ratings_df = pd.read_csv('./ml-latest-small/ratings.csv', header=0, names=['user_id', 'movie_id', 'rating', 'timestamp'])
Changing id of movies to 0...n
fixed_movie_id_list = list(ratings_df["movie_id"])
old_to_new_id_dict = dict()
new_index = 0
for index, movie_id in enumerate(fixed_movie_id_list):
if old_to_new_id_dict.get(movie_id) == None:
old_to_new_id_dict[movie_id] = new_index
fixed_movie_id_list[index] = new_index
new_index += 1
else:
fixed_movie_id_list[index] = old_to_new_id_dict[movie_id]
ratings_df["old_movie_id"] = ratings_df["movie_id"]
ratings_df["movie_id"] = fixed_movie_id_list
ratings_df["user_id"] = ratings_df["user_id"].apply(lambda x: x-1)
ratings_df = ratings_df.reset_index(drop = True)
ratings_df["rating"] = MinMaxScaler(feature_range=(0,1)).fit_transform(ratings_df[["rating"]])
train, test = train_test_split(ratings_df, test_size=0.2, stratify=ratings_df['user_id'], random_state=1)
users_len = len(ratings_df.user_id.unique())
movies_len = len(ratings_df.movie_id.unique())
movie_embedding = 50
user_embedding = 50
input_movie = Input(shape=[1], name='input-movie')
input_user = Input(shape=[1], name='input-user')
mf_movie_embedding = Embedding(input_dim = movies_len + 1, output_dim = movie_embedding, name='mf_movie_embedding')(input_movie)
mf_user_embedding = Embedding(input_dim = users_len + 1, output_dim = user_embedding, name='mf_user_embedding')(input_user)
mf_movie_flatten = Flatten(name='mf_movie_flatten')(mf_movie_embedding)
mf_user_flatten = Flatten(name='mf_user_flatten')(mf_user_embedding)
mf_output = Dot(axes=1)([mf_movie_flatten, mf_user_flatten])
mlp_movie_embedding = Embedding(input_dim = movies_len + 1, output_dim = movie_embedding, name='mlp_movie_embedding')(input_movie)
mlp_user_embedding = Embedding(input_dim = users_len + 1, output_dim = user_embedding, name='mlp_user_embedding')(input_user)
mlp_movie_flatten = Flatten(name='mlp_movie_flatten')(mlp_movie_embedding)
mlp_user_flatten = Flatten(name='mlp_user_flatten')(mlp_user_embedding)
mlp_concatenate = Concatenate(axis=1)([mlp_movie_flatten, mlp_user_flatten])
mlp_concatenate_dropout = Dropout(0.2)(mlp_concatenate)
mlp_dense_1 = Dense(32, activation='relu', name='mlp_dense_1')(mlp_concatenate_dropout)
mlp_batch_norm_1 = BatchNormalization(name='mlp_batch_norm_1')(mlp_dense_1)
mlp_dropout_1 = Dropout(0.2)(mlp_batch_norm_1)
mlp_dense_2 = Dense(16, activation='relu', name='mlp_dense_2')(mlp_dropout_1)
mlp_batch_norm_2 = BatchNormalization(name='mlp_batch_norm_2')(mlp_dense_2)
mlp_dropout_2 = Dropout(0.2)(mlp_batch_norm_2)
mlp_output = Dense(8, activation='relu', name='mlp_output')(mlp_dropout_2)
mf_mlp_concat = Concatenate(axis=1)([mf_output, mlp_output])
output = Dense(1, name='output', activation='relu')(mf_mlp_concat)
NeuCF_model = Model([input_user, input_movie], output)
NeuCF_model.compile(optimizer=Adam(), loss='mean_absolute_error')
history = NeuCF_model.fit([train.user_id, train.movie_id], train.rating,
epochs=10,
validation_data=[[test.user_id, test.movie_id], test.rating])
import matplotlib.pyplot as plt
plt.plot(history.history['loss'])
plt.plot(history.history['val_loss'])
plt.title('NeuCF_model MAE loss')
plt.ylabel('loss')
plt.xlabel('epoch')
plt.legend(['train', 'test'], loc='upper left')
plt.show()


