I am new to coding and trying to use machine learning for my masters research project to help predict price, return and volatility in stock markets but when I try and compare the 4 different neural networks I keep getting the error that my dataframe truth value is too ambiguous, but I am not sure where this issue is coming from. I have tried a few different suggests from other peoples questions but none of them have resolved the issue I have been having.
I am trying to compare 4 different Neural Networks to determine which provides the best prediction for my dataset. But I keep getting the above error, the error is:
ValueError Traceback (most recent call last)
~\AppData\Local\Temp/ipykernel_30680/3175689273.py in <module>
11 parameters_LSTM = [[1,2,3,4,5,6,7,8,9,10,11,12,13], [3,4,5,6], [1], [300], [20], [future_steps]]
12
---> 13 RMSE_info = compare_ANN_methods(price_data, test_price_data, scaler, parameters_FNN, parameters_TLNN, parameters_SANN, parameters_LSTM, future_steps)
~\AppData\Local\Temp/ipykernel_30680/2096097431.py in compare_ANN_methods(price_data, test_price_data, scaler, parameters_FNN, parameters_TLNN, parameters_SANN, parameters_LSTM, future_steps)
2
3 information_FNN_df = get_accuracies_FNN(price_data, test_price_data, parameters_FNN, scaler)
----> 4 optimized_params_FNN = analyze_results(information_FNN_df, test_price_data, 'FNN').all()
5
6 information_TLNN_df = get_accuracies_TLNN(price_data, test_price_data, parameters_TLNN, scaler)
~\AppData\Local\Temp/ipykernel_30680/2506428711.py in analyze_results(data_frame, test_rainfall_data, name, flag)
1 def analyze_results(data_frame, test_rainfall_data, name, flag=False):
----> 2 optimized_params = data_frame.iloc[data_frame.RMSE.argmin]
3 future_steps = optimized_params.future_steps
4 forecast_values = optimized_params[-1*int(future_steps):]
5 y_true = test_rainfall_data.iloc[:int(future_steps)]
C:\ProgramData\Anaconda3\lib\site-packages\pandas\core\indexing.py in __getitem__(self, key)
928 axis = self.axis or 0
929
--> 930 maybe_callable = com.apply_if_callable(key, self.obj)
931 return self._getitem_axis(maybe_callable, axis=axis)
932
C:\ProgramData\Anaconda3\lib\site-packages\pandas\core\common.py in apply_if_callable(maybe_callable, obj, **kwargs)
356 """
357 if callable(maybe_callable):
--> 358 return maybe_callable(obj, **kwargs)
359
360 return maybe_callable
C:\ProgramData\Anaconda3\lib\site-packages\pandas\core\base.py in argmin(self, axis, skipna, *args, **kwargs)
696 def argmin(self, axis=None, skipna=True, *args, **kwargs) -> int:
697 delegate = self._values
--> 698 nv.validate_minmax_axis(axis)
699 skipna = nv.validate_argmin_with_skipna(skipna, args, kwargs)
700
C:\ProgramData\Anaconda3\lib\site-packages\pandas\compat\numpy\function.py in validate_minmax_axis(axis, ndim)
406 if axis is None:
407 return
--> 408 if axis >= ndim or (axis < 0 and ndim + axis < 0):
409 raise ValueError(f"`axis` must be fewer than the number of dimensions ({ndim})")
C:\ProgramData\Anaconda3\lib\site-packages\pandas\core\generic.py in __nonzero__(self)
1535 @final
1536 def __nonzero__(self):
-> 1537 raise ValueError(
1538 f"The truth value of a {type(self).__name__} is ambiguous. "
1539 "Use a.empty, a.bool(), a.item(), a.any() or a.all()."
ValueError: The truth value of a DataFrame is ambiguous. Use a.empty, a.bool(), a.item(), a.any() or a.all().
The code is:
# look_back, hidden_nodes, output_nodes, epochs, batch_size, future_steps
parameters_FNN = [[1,2,3,6,8,10,12], [3,4,5,6], [1], [500], [20], [future_steps]]
# time_lagged_points, hidden_nodes, output_nodes, epochs, batch_size, future_steps
parameters_TLNN = [[[1,2,3,11,12], [1,2,3,4,11,12], [1,2,3,11,12,13], [1,2,3,4,5,6,10,11,12]], [3,4,5,6], [1], [300], [20], [future_steps]]
# seasonal_period, hidden_nodes, epochs, batch_size, future_steps
parameters_SANN = [[12], [3,4,5,6,7,8,9,10], [500], [20], [future_steps]]
# look_back, hidden_nodes, output_nodes, epochs, batch_size, future_steps
parameters_LSTM = [[1,2,3,4,5,6,7,8,9,10,11,12,13], [3,4,5,6], [1], [300], [20], [future_steps]]
RMSE_info = compare_ANN_methods(price_data, test_price_data, scaler, parameters_FNN, parameters_TLNN, parameters_SANN, parameters_LSTM, future_steps)
Here is where the compare_ANN_methods is defined:
def compare_ANN_methods(price_data, test_price_data, scaler, parameters_FNN, parameters_TLNN, parameters_SANN, parameters_LSTM, future_steps):
information_FNN_df = get_accuracies_FNN(price_data, test_price_data, parameters_FNN, scaler)
optimized_params_FNN = analyze_results(information_FNN_df, test_price_data, 'FNN')
information_TLNN_df = get_accuracies_TLNN(price_data, test_price_data, parameters_TLNN, scaler)
optimized_params_TLNN = analyze_results(information_TLNN_df, test_price_data, 'TLNN')
information_SANN_df = get_accuracies_SANN(price_data, test_price_data, parameters_SANN, scaler)
optimized_params_SANN = analyze_results(information_SANN_df, test_price_data, 'SANN')
information_LSTM_df = get_accuracies_LSTM(price_data, test_price_data, parameters_LSTM, scaler)
optimized_params_LSTM = analyze_results(information_LSTM_df, test_price_data, 'LSTM')
list_of_methods = [optimized_params_FNN, optimized_params_TLNN, optimized_params_SANN, optimized_params_LSTM]
information = [information_FNN_df, information_TLNN_df, information_SANN_df, information_LSTM_df]
index, name, RMSE_info = best_of_all(list_of_methods)
best_optimized_params = analyze_results(information[index], test_price_data, name, True)
return RMSE_info
The dataframe looks like this: Dataframe
Thanks for any help