我是ML的新手,我试图使用序列化使一个简单的MLP工作。我将使用2层MLP和二进制结果。有人能解释一下我做错了什么吗?
数据格式如下。基本上是想弄清楚地址是不是乱码。
(['10@¨260 :?Kings .]~H.wy ','3109 n drake' '(1`72¿0" |3¥4®th St SE'],['something else','something else 2'],[1,0])
错误收到:
Error: ValueError: `logits` and `labels` must have the same shape, received ((None, 250, 1) vs (None,)).
代码:def train_ngram_model(data,
learning_rate=0.002,
epochs=10,
batch_size=3000,
layers=2,
units=64,
dropout_rate=0.5,
num_classes=2,
vectorize=Vectorize()):
encoder = vectorize.charVectorize_tfid(data[0])
# encoder.adapt(data[1])
# encoder.adapt(data[2])
# encoder.adapt(data[3])
# encoder.adapt(data[4])
model = Sequential()
model.add(encoder)
model.add(Embedding(
input_dim=len(encoder.get_vocabulary()),
output_dim=64,
# Use masking to handle the variable sequence lengths
mask_zero=True))
model.add(Dense(units))
model.add(Activation('relu'))
model.add(Dropout(0.45))
model.add(Dense(units))
model.add(Dense(1, activation='sigmoid'))
model.summary()
model.compile(loss=BinaryCrossentropy(from_logits=False),
optimizer='adam',
metrics=['accuracy'])
model.fit(data[1], data[5].astype(np.int), epochs=epochs, batch_size=batch_size)
你需要考虑输入维度,你看我使用简单词汇表的序列到序列输入,我从你的模型和代码中读取,试图预测这些输入的单词序列是否包含或相似度
对于仅包含单词的模型,您可以使用输入生成器,但我们看到您需要更多的预测,然后我们添加预测方法。
[Sample]:
import tensorflow as tf
import tensorflow_addons as tfa
"""""""""""""""""""""""""""""""""""""""""""""""""""""""""
: Variables
"""""""""""""""""""""""""""""""""""""""""""""""""""""""""
epochs = 50
learning_rate = 0.002
batch_size = 4
layers = 2
units = 64
dropout_rate = 0.5
num_classes = 2
input_vocab_size = 128
output_vocab_size = 64
embedding_size = 48
hidden_size = 32
max_time = 7
batch_size = 1
n_blocks = 7
n_sizes = 4
vocab = ["a", "b", "c", "d", "e", "f", "g"]
model = tf.keras.models.Sequential([ ])
"""""""""""""""""""""""""""""""""""""""""""""""""""""""""
: Definition
"""""""""""""""""""""""""""""""""""""""""""""""""""""""""
def train_ngram_model(data, learning_rate=0.002, epochs=10, batch_size=32, layers=2, units=64, dropout_rate=0.5, num_classes=2, vocab=vocab):
embedding_layer = tf.keras.layers.Embedding(input_vocab_size, embedding_size)
###
decoder_cell = tf.keras.layers.LSTMCell(hidden_size)
sampler = tfa.seq2seq.TrainingSampler()
output_layer = tf.keras.layers.Dense(output_vocab_size)
decoder = tfa.seq2seq.BasicDecoder(decoder_cell, sampler, output_layer)
##########################
input_ids = tf.random.uniform(
[n_blocks, n_sizes], maxval=input_vocab_size, dtype=tf.int64)
layer = tf.keras.layers.StringLookup(vocabulary=vocab)
input_ids = layer(data)
##########################
input_lengths = tf.fill([batch_size], max_time)
input_tensors = embedding_layer(input_ids)
initial_state = decoder_cell.get_initial_state(input_tensors)
output, state, lengths = decoder( input_tensors, sequence_length=input_lengths, initial_state=initial_state )
logits = output.rnn_output
label = tf.constant( 0, shape=(1, 1, 1), dtype=tf.float32 )
input_ids = tf.cast( input_ids, dtype=tf.float32 )
input_ids = tf.constant( input_ids, shape=(1, 1, n_blocks, n_sizes), dtype=tf.float32 )
dataset = tf.data.Dataset.from_tensor_slices(( input_ids, input_ids ))
return dataset
def model_initialize( n_blocks=7, n_sizes=4 ):
model = tf.keras.models.Sequential([
tf.keras.layers.InputLayer(input_shape=(n_blocks, n_sizes)),
tf.keras.layers.Dense(32),
tf.keras.layers.Flatten(),
tf.keras.layers.Dense(192, activation='relu'),
tf.keras.layers.Dense(1),
])
model.summary()
model.compile( loss=tf.keras.losses.BinaryCrossentropy(from_logits=False), optimizer='adam', metrics=['accuracy'] )
return model
def target_prediction( data, model, n_blocks=7, n_sizes=4, input_vocab_size=128, vocab=vocab ):
##########################
input_ids = tf.random.uniform(
[n_blocks, n_sizes], maxval=input_vocab_size, dtype=tf.int64)
layer = tf.keras.layers.StringLookup(vocabulary=vocab)
input_ids = layer(data)
##########################
prediction_input = tf.cast( input_ids, dtype=tf.float32 )
prediction_input = tf.constant( prediction_input, shape=( 1, n_blocks, n_sizes ), dtype=tf.float32 )
predictions = model.predict( prediction_input )
result = tf.math.argmax(predictions[0]).numpy()
return result
"""""""""""""""""""""""""""""""""""""""""""""""""""""""""
: Working logicals
"""""""""""""""""""""""""""""""""""""""""""""""""""""""""
data = tf.constant([["a", "c", "d", "e", "d", "z", "b"], ["a", "c", "d", "e", "d", "z", "b"], ["a", "c", "d", "e", "d", "z", "b"], ["a", "c", "d", "e", "d", "z", "b"]])
dataset = train_ngram_model( data, learning_rate, epochs, batch_size, layers, units, dropout_rate, num_classes )
model = model_initialize( n_blocks=7, n_sizes=4 )
model.fit( dataset, epochs=epochs, batch_size=1)
##########################
data = tf.constant([["a", "c", "d", "e", "d", "z", "b"], ["a", "c", "d", "e", "d", "z", "b"], ["a", "c", "d", "e", "d", "z", "b"], ["a", "c", "d", "e", "d", "z", "b"]])
result = target_prediction( data, model, n_blocks=7, n_sizes=4, input_vocab_size=128, vocab=vocab )
print( "result = " + str(result) )
input('...')
[Output]:
示例