Error from model.fit with text and numerical features

Question

I get the following error from the last model.fit for my NLP learning after applying the neural network code from this question.

# https://stackoverflow.com/questions/36786722/how-to-display-full-output-in-jupyter-not-only-last-result
from IPython.core.interactiveshell import InteractiveShell
InteractiveShell.ast_node_interactivity = 'all'

import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split

import tensorflow as tf
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences
from tensorflow.keras.layers import Input, Embedding, Bidirectional, LSTM, Dropout, Dense 
#from tensorflow.keras.estimator import Estimator
from tensorflow.keras.models import Model

from tensorflow.keras.optimizers import RMSprop

pd.options.display.max_rows
pd.set_option('display.max_rows', None)
pd.options.display.max_columns
pd.set_option('display.max_columns', None)
pd.set_option('display.max_colwidth', None)

!pip install opendatasets
import opendatasets as od
od.download("https://www.kaggle.com/datasets/nicapotato/womens-ecommerce-clothing-reviews/download")

raw=pd.read_csv("womens-ecommerce-clothing-reviews/Womens Clothing E-Commerce Reviews.csv",index_col=0)

complete_df=raw.dropna()

# https://mccormickml.com/2021/06/29/combining-categorical-numerical-features-with-bert/#33-tokenize--encode

# This will hold all of the dataset samples, as strings. 
text_feats = []

# The labels for the samples.
labels = []

numeric_feats=[]

print("Combining features into strings...excluding 'Positive Feedback Count'")

# For each of the samples...
for index, row in complete_df.iterrows():

    # Piece it together...    
    combined = ""
    
    #combined += "The ID of this item is {:}, ".format(row["Clothing ID"])
    combined += "This item comes from the {:} department and {:} division, " \
                "and is classified under {:}. ".format(row["Department Name"], 
                                                       row["Division Name"], 
                                                       row["Class Name"])
    
    combined += "I am {:} years old. ".format(row["Age"])
    
    combined += "I rate this item {:} out of 5 stars. ".format(row["Rating"])
    
    # Not all samples have titles.
    if not row["Title"] == "":
        combined += row["Title"] + ". "
    
    # Finally, append the review the text!
    combined += row["Review Text"]
    
    # Add the combined text to the list.
    text_feats.append(combined)

    # Also record the sample's label.
    labels.append(row["Recommended IND"])

    numeric_feats.append(row["Positive Feedback Count"])

print('  DONE.')

print('Dataset contains {:,} samples.'.format(len(text_feats)))

text_df = pd.DataFrame(text_feats, columns =['text']) 
numeric_df = pd.DataFrame(numeric_feats, columns =['Positive Feedback Count'])
target_df = pd.DataFrame(labels, columns =['Recommended']) 
combine_df=pd.concat((text_df, numeric_df, target_df), axis=1)

# https://stackoverflow.com/questions/69818148/attempting-to-combine-numeric-and-text-features-in-tensorflow-valueerror-layer
X = combine_df.drop('Recommended', axis=1) 
y = combine_df['Recommended']

# Split up the data
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.20, random_state=42,stratify=y) 

text_train = X_train['text']
text_test = X_test['text']

# subsetting the numeric variables
numeric_train = X_train[['Positive Feedback Count']].to_numpy()
numeric_test = X_test[['Positive Feedback Count']].to_numpy()

tokenizer = Tokenizer(num_words=60000)
tokenizer.fit_on_texts(text_train)
sequences_train = tokenizer.texts_to_sequences(text_train)
sequences_test = tokenizer.texts_to_sequences(text_test)

word_index = tokenizer.word_index
print("\nWord Index = ", word_index)

word2idx = tokenizer.word_index
V = len(word2idx)
print('Found %s unique tokens.' % V) #Found 13489 unique tokens.

nlp_train = pad_sequences(sequences_train, padding = 'post')
print('Shape of data train tensor:', nlp_train.shape) #Shape of data train tensor: (15729, 148)

# get sequence length
T = nlp_train.shape[1]

nlp_test = pad_sequences(sequences_test, maxlen=T,padding = 'post')
print('Shape of data test tensor:', nlp_test.shape) #Shape of data test tensor: (3933, 148)

data_train = np.concatenate((nlp_train,numeric_train), axis=1)
data_test = np.concatenate((nlp_test,numeric_test), axis=1)

# Choosing embedding dimensionality
D = 20

# Hidden state dimensionality
M = 40

nlp_input = Input(shape=(T,),name= 'nlp_input')
meta_input = Input(shape=(2,), name='meta_input')
emb = Embedding(V + 1, D)(nlp_input)
emb = Bidirectional(LSTM(64, return_sequences=True))(emb)
emb = Dropout(0.40)(emb)
emb = Bidirectional(LSTM(128))(emb)
nlp_out = Dropout(0.40)(emb)
x = tf.concat([nlp_out, meta_input], 1)
x = Dense(64, activation='swish')(x)
x = Dropout(0.40)(x)
x = Dense(1, activation='sigmoid')(x)

model = Model(inputs=[nlp_input, meta_input], outputs=[x])

#next, create a custom optimizer
optimizer1 = RMSprop(learning_rate=0.0001)

# Compile and fit
model.compile(loss='binary_crossentropy',
              optimizer='adam',
              metrics=['accuracy']
             )

'''
print('Training model...')
r = model.fit(data_train,
              y_train,
              epochs=5, 
              validation_data=(data_test, y_test)
             )
# ValueError: Layer "model_1" expects 2 input(s), but it received 1 input tensors. Inputs received: [<tf.Tensor 'IteratorGetNext:0' shape=(None, 149) dtype=int64>]
'''

nlp_train.shape, numeric_train.shape,y_train.shape #((15729, 148), (15729, 1), (15729,))

nlp_test.shape, numeric_test.shape,y_test.shape #((3933, 148), (3933, 1), (3933,))

print('Training model...')
r = model.fit([nlp_train, numeric_train],
              y_train,
              epochs=5, 
              validation_data=([nlp_test, numeric_test], y_test)
             )

Training model...
Epoch 1/5
WARNING:tensorflow:Model was constructed with shape (None, 2) for input KerasTensor(type_spec=TensorSpec(shape=(None, 2), dtype=tf.float32, name='meta_input'), name='meta_input', description="created by layer 'meta_input'"), but it was called on an input with incompatible shape (None, 1).
---------------------------------------------------------------------------
ValueError                                Traceback (most recent call last)
C:\Users\KCWONG~1\AppData\Local\Temp/ipykernel_29744/2451003185.py in <module>
     25 
     26 print('Training model...')
---> 27 r = model.fit([nlp_train, numeric_train],
     28               y_train,
     29               epochs=5,

C:\ProgramData\Anaconda3\lib\site-packages\keras\utils\traceback_utils.py in error_handler(*args, **kwargs)
     65     except Exception as e:  # pylint: disable=broad-except
     66       filtered_tb = _process_traceback_frames(e.__traceback__)
---> 67       raise e.with_traceback(filtered_tb) from None
     68     finally:
     69       del filtered_tb

C:\ProgramData\Anaconda3\lib\site-packages\tensorflow\python\framework\func_graph.py in autograph_handler(*args, **kwargs)
   1145           except Exception as e:  # pylint:disable=broad-except
   1146             if hasattr(e, "ag_error_metadata"):
-> 1147               raise e.ag_error_metadata.to_exception(e)
   1148             else:
   1149               raise

ValueError: in user code:

    File "C:\ProgramData\Anaconda3\lib\site-packages\keras\engine\training.py", line 1021, in train_function  *
        return step_function(self, iterator)
    File "C:\ProgramData\Anaconda3\lib\site-packages\keras\engine\training.py", line 1010, in step_function  **
        outputs = model.distribute_strategy.run(run_step, args=(data,))
    File "C:\ProgramData\Anaconda3\lib\site-packages\keras\engine\training.py", line 1000, in run_step  **
        outputs = model.train_step(data)
    File "C:\ProgramData\Anaconda3\lib\site-packages\keras\engine\training.py", line 859, in train_step
        y_pred = self(x, training=True)
    File "C:\ProgramData\Anaconda3\lib\site-packages\keras\utils\traceback_utils.py", line 67, in error_handler
        raise e.with_traceback(filtered_tb) from None
    File "C:\ProgramData\Anaconda3\lib\site-packages\keras\engine\input_spec.py", line 248, in assert_input_compatibility
        raise ValueError(

    ValueError: Exception encountered when calling layer "model" (type Functional).
    
    Input 0 of layer "dense_2" is incompatible with the layer: expected axis -1 of input shape to have value 258, but received input with shape (None, 257)
    
    Call arguments received:
      • inputs=('tf.Tensor(shape=(None, 148), dtype=int32)', 'tf.Tensor(shape=(None, 1), dtype=int64)')
      • training=True
      • mask=None

How do I fix this?

It finally ran after replacing 2 to 1 in this statement: meta_input = Input(shape=(2,), name='meta_input') — gracenz, May 25 '22 at 18:27

Error from model.fit with text and numerical features

0 Answers0