I get the following error from the last model.fit for my NLP learning after applying the neural network code from this question.
# https://stackoverflow.com/questions/36786722/how-to-display-full-output-in-jupyter-not-only-last-result
from IPython.core.interactiveshell import InteractiveShell
InteractiveShell.ast_node_interactivity = 'all'
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
import tensorflow as tf
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences
from tensorflow.keras.layers import Input, Embedding, Bidirectional, LSTM, Dropout, Dense
#from tensorflow.keras.estimator import Estimator
from tensorflow.keras.models import Model
from tensorflow.keras.optimizers import RMSprop
pd.options.display.max_rows
pd.set_option('display.max_rows', None)
pd.options.display.max_columns
pd.set_option('display.max_columns', None)
pd.set_option('display.max_colwidth', None)
!pip install opendatasets
import opendatasets as od
od.download("https://www.kaggle.com/datasets/nicapotato/womens-ecommerce-clothing-reviews/download")
raw=pd.read_csv("womens-ecommerce-clothing-reviews/Womens Clothing E-Commerce Reviews.csv",index_col=0)
complete_df=raw.dropna()
# https://mccormickml.com/2021/06/29/combining-categorical-numerical-features-with-bert/#33-tokenize--encode
# This will hold all of the dataset samples, as strings.
text_feats = []
# The labels for the samples.
labels = []
numeric_feats=[]
print("Combining features into strings...excluding 'Positive Feedback Count'")
# For each of the samples...
for index, row in complete_df.iterrows():
# Piece it together...
combined = ""
#combined += "The ID of this item is {:}, ".format(row["Clothing ID"])
combined += "This item comes from the {:} department and {:} division, " \
"and is classified under {:}. ".format(row["Department Name"],
row["Division Name"],
row["Class Name"])
combined += "I am {:} years old. ".format(row["Age"])
combined += "I rate this item {:} out of 5 stars. ".format(row["Rating"])
# Not all samples have titles.
if not row["Title"] == "":
combined += row["Title"] + ". "
# Finally, append the review the text!
combined += row["Review Text"]
# Add the combined text to the list.
text_feats.append(combined)
# Also record the sample's label.
labels.append(row["Recommended IND"])
numeric_feats.append(row["Positive Feedback Count"])
print(' DONE.')
print('Dataset contains {:,} samples.'.format(len(text_feats)))
text_df = pd.DataFrame(text_feats, columns =['text'])
numeric_df = pd.DataFrame(numeric_feats, columns =['Positive Feedback Count'])
target_df = pd.DataFrame(labels, columns =['Recommended'])
combine_df=pd.concat((text_df, numeric_df, target_df), axis=1)
# https://stackoverflow.com/questions/69818148/attempting-to-combine-numeric-and-text-features-in-tensorflow-valueerror-layer
X = combine_df.drop('Recommended', axis=1)
y = combine_df['Recommended']
# Split up the data
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.20, random_state=42,stratify=y)
text_train = X_train['text']
text_test = X_test['text']
# subsetting the numeric variables
numeric_train = X_train[['Positive Feedback Count']].to_numpy()
numeric_test = X_test[['Positive Feedback Count']].to_numpy()
tokenizer = Tokenizer(num_words=60000)
tokenizer.fit_on_texts(text_train)
sequences_train = tokenizer.texts_to_sequences(text_train)
sequences_test = tokenizer.texts_to_sequences(text_test)
word_index = tokenizer.word_index
print("\nWord Index = ", word_index)
word2idx = tokenizer.word_index
V = len(word2idx)
print('Found %s unique tokens.' % V) #Found 13489 unique tokens.
nlp_train = pad_sequences(sequences_train, padding = 'post')
print('Shape of data train tensor:', nlp_train.shape) #Shape of data train tensor: (15729, 148)
# get sequence length
T = nlp_train.shape[1]
nlp_test = pad_sequences(sequences_test, maxlen=T,padding = 'post')
print('Shape of data test tensor:', nlp_test.shape) #Shape of data test tensor: (3933, 148)
data_train = np.concatenate((nlp_train,numeric_train), axis=1)
data_test = np.concatenate((nlp_test,numeric_test), axis=1)
# Choosing embedding dimensionality
D = 20
# Hidden state dimensionality
M = 40
nlp_input = Input(shape=(T,),name= 'nlp_input')
meta_input = Input(shape=(2,), name='meta_input')
emb = Embedding(V + 1, D)(nlp_input)
emb = Bidirectional(LSTM(64, return_sequences=True))(emb)
emb = Dropout(0.40)(emb)
emb = Bidirectional(LSTM(128))(emb)
nlp_out = Dropout(0.40)(emb)
x = tf.concat([nlp_out, meta_input], 1)
x = Dense(64, activation='swish')(x)
x = Dropout(0.40)(x)
x = Dense(1, activation='sigmoid')(x)
model = Model(inputs=[nlp_input, meta_input], outputs=[x])
#next, create a custom optimizer
optimizer1 = RMSprop(learning_rate=0.0001)
# Compile and fit
model.compile(loss='binary_crossentropy',
optimizer='adam',
metrics=['accuracy']
)
'''
print('Training model...')
r = model.fit(data_train,
y_train,
epochs=5,
validation_data=(data_test, y_test)
)
# ValueError: Layer "model_1" expects 2 input(s), but it received 1 input tensors. Inputs received: [<tf.Tensor 'IteratorGetNext:0' shape=(None, 149) dtype=int64>]
'''
nlp_train.shape, numeric_train.shape,y_train.shape #((15729, 148), (15729, 1), (15729,))
nlp_test.shape, numeric_test.shape,y_test.shape #((3933, 148), (3933, 1), (3933,))
print('Training model...')
r = model.fit([nlp_train, numeric_train],
y_train,
epochs=5,
validation_data=([nlp_test, numeric_test], y_test)
)
Training model...
Epoch 1/5
WARNING:tensorflow:Model was constructed with shape (None, 2) for input KerasTensor(type_spec=TensorSpec(shape=(None, 2), dtype=tf.float32, name='meta_input'), name='meta_input', description="created by layer 'meta_input'"), but it was called on an input with incompatible shape (None, 1).
---------------------------------------------------------------------------
ValueError Traceback (most recent call last)
C:\Users\KCWONG~1\AppData\Local\Temp/ipykernel_29744/2451003185.py in <module>
25
26 print('Training model...')
---> 27 r = model.fit([nlp_train, numeric_train],
28 y_train,
29 epochs=5,
C:\ProgramData\Anaconda3\lib\site-packages\keras\utils\traceback_utils.py in error_handler(*args, **kwargs)
65 except Exception as e: # pylint: disable=broad-except
66 filtered_tb = _process_traceback_frames(e.__traceback__)
---> 67 raise e.with_traceback(filtered_tb) from None
68 finally:
69 del filtered_tb
C:\ProgramData\Anaconda3\lib\site-packages\tensorflow\python\framework\func_graph.py in autograph_handler(*args, **kwargs)
1145 except Exception as e: # pylint:disable=broad-except
1146 if hasattr(e, "ag_error_metadata"):
-> 1147 raise e.ag_error_metadata.to_exception(e)
1148 else:
1149 raise
ValueError: in user code:
File "C:\ProgramData\Anaconda3\lib\site-packages\keras\engine\training.py", line 1021, in train_function *
return step_function(self, iterator)
File "C:\ProgramData\Anaconda3\lib\site-packages\keras\engine\training.py", line 1010, in step_function **
outputs = model.distribute_strategy.run(run_step, args=(data,))
File "C:\ProgramData\Anaconda3\lib\site-packages\keras\engine\training.py", line 1000, in run_step **
outputs = model.train_step(data)
File "C:\ProgramData\Anaconda3\lib\site-packages\keras\engine\training.py", line 859, in train_step
y_pred = self(x, training=True)
File "C:\ProgramData\Anaconda3\lib\site-packages\keras\utils\traceback_utils.py", line 67, in error_handler
raise e.with_traceback(filtered_tb) from None
File "C:\ProgramData\Anaconda3\lib\site-packages\keras\engine\input_spec.py", line 248, in assert_input_compatibility
raise ValueError(
ValueError: Exception encountered when calling layer "model" (type Functional).
Input 0 of layer "dense_2" is incompatible with the layer: expected axis -1 of input shape to have value 258, but received input with shape (None, 257)
Call arguments received:
• inputs=('tf.Tensor(shape=(None, 148), dtype=int32)', 'tf.Tensor(shape=(None, 1), dtype=int64)')
• training=True
• mask=None
How do I fix this?