下面的代码片段给出了一些tensorType错误
TypeError: Cannot convert Type TensorType(float32, 3D) (of Variable Subtensor{:int64:}.0) into Type TensorType(float32, (False, False, True))。您可以尝试手动转换子张量{:int64:}。0 into a TensorType(float32, (False, False, True)).
这是我在一个教程网站上看到的一个基本例子。你能帮我理解这个错误吗?我是机器学习和keras的新手
import itertools
import numpy as np
# put together a model to predict
from keras.layers import Input, Embedding, merge, Flatten, SimpleRNN
from keras.models import Model
sentences = '''
sam is red
hannah not red
hannah is green
bob is green
bob not red
sam not green
sarah is red
sarah not green'''.strip().split('n')
is_green = np.asarray([[0, 1, 1, 1, 1, 0, 0, 0]], dtype='int32').T
lemma = lambda x: x.strip().lower().split(' ')
sentences_lemmatized = [lemma(sentence) for sentence in sentences]
words = set(itertools.chain(*sentences_lemmatized))
# set(['boy', 'fed', 'ate', 'cat', 'kicked', 'hat'])
# dictionaries for converting words to integers and vice versa
word2idx = dict((v, i) for i, v in enumerate(words))
idx2word = list(words)
# convert the sentences a numpy array
to_idx = lambda x: [word2idx[word] for word in x]
sentences_idx = [to_idx(sentence) for sentence in sentences_lemmatized]
sentences_array = np.asarray(sentences_idx, dtype='int32')
# parameters for the model
sentence_maxlen = 3
n_words = len(words)
n_embed_dims = 5
input_sentence = Input(shape=(sentence_maxlen,), dtype='int32')
input_embedding = Embedding(n_words, n_embed_dims)(input_sentence)
#color_prediction = SimpleRNN(init='uniform',output_dim=1,input_dim=3)(input_embedding)
#color_prediction = SimpleRNN(output_dim=1,input_dim=5,
# init='glorot_uniform', inner_init='orthogonal', activation='sigmoid', weights=None, return_sequences=False)(input_embedding);
color_prediction = SimpleRNN(1, return_sequences=False, batch_input_shape=(10, 2, 3))(input_embedding);
predict_green = Model(input=[input_sentence], output=[color_prediction])
predict_green.compile(optimizer='sgd', loss='binary_crossentropy')
# fit the model to predict what color each person is
predict_green.fit([sentences_array], [is_green], nb_epoch=5000, verbose=1)
embeddings = predict_green.layers[1].W.get_value()
# print out the embedding vector associated with each word
for i in range(n_words):
print('{}: {}'.format(idx2word[i], embeddings[i]))
我是机器学习的新手,也遇到了你的问题。我把代码修改如下,它运行了,但我不确定它是否正确。
import itertools
import os
import numpy as np
from keras.preprocessing import sequence
from keras.models import Sequential
from keras.layers import Dense, Dropout, Activation
from keras.layers import Embedding
from keras.layers import Input, Embedding, merge, Flatten, SimpleRNN
sentences = '''
sam is red
hannah not red
hannah is green
bob is green
bob not red
sam not green
sarah is red
sarah not green'''.strip().split('n')
is_green = np.asarray([[0, 1, 1, 1, 1, 0, 0, 0]], dtype='int32').T
lemma = lambda x: x.strip().lower().split(' ')
sentences_lemmatized = [lemma(sentence) for sentence in sentences]
words = set(itertools.chain(*sentences_lemmatized))
# set(['boy', 'fed', 'ate', 'cat', 'kicked', 'hat'])
# dictionaries for converting words to integers and vice versa
word2idx = dict((v, i) for i, v in enumerate(words))
idx2word = list(words)
# convert the sentences a numpy array
to_idx = lambda x: [word2idx[word] for word in x]
sentences_idx = [to_idx(sentence) for sentence in sentences_lemmatized]
sentences_array = np.asarray(sentences_idx, dtype='int32')
# parameters for the model
sentence_maxlen = 3
n_words = len(words)
n_embed_dims = 3
model = Sequential()
model.add(Embedding(n_words, n_embed_dims,input_length=sentence_maxlen))
model.add(SimpleRNN(3))
model.add(Dense(1))
model.add(Activation('sigmoid'))
model.compile(loss='binary_crossentropy',
optimizer='sgd',
metrics=['accuracy'])
model.fit([sentences_array], [is_green], nb_epoch=5000, verbose=1)
predictions = model.predict(sentences_array)
print predictions.shape
embeddings = model.layers[0].W.get_value()
# print out the embedding vector associated with each word
for i in range(n_words):
print('{}: {}'.format(idx2word[i], embeddings[i]))
输出:sarah: [-0.51089537 -0.30958903 -0.17312947] sam: [-0.47487321
-0.33426151 -0.18260512] hannah: [ 0.51548952 0.33343625 0.18121554] is: [ 0.02989657 -0.02573686 0.01081978] green: [ 0.0155487
-0.02551323 0.00846179] not: [ 0.01339869 -0.02586824 0.01932905] bob: [ 0.47654441 0.37283263 0.17969941] red: [-0.02136148
0.04420395 -0.03119873]
作为参考,这个示例代码来自http://benjaminbolte.com/blog/2016/keras-language-modeling.html,尽管从2016年4月开始,它不适用于当前版本的Keras==1.1.1
, Theano==0.8.2
我认为您需要的主要东西,特别是使用新的Keras-1功能API是SimpleRNN
之后的Dense
层。您还需要确保SimpleRNN
的output_dim
为3。
我修改了最小值以使您的代码工作,下面是运行并给出以下输出的更新代码:
import itertools
import numpy as np
# put together a model to predict
from keras.layers import Input, Embedding, merge, Flatten, Dense, SimpleRNN
from keras.models import Model
sentences = '''
sam is red
hannah not red
hannah is green
bob is green
bob not red
sam not green
sarah is red
sarah not green'''.strip().split('n')
is_green = np.asarray([[0, 1, 1, 1, 1, 0, 0, 0]], dtype='int32').T
lemma = lambda x: x.strip().lower().split(' ')
sentences_lemmatized = [lemma(sentence) for sentence in sentences]
words = set(itertools.chain(*sentences_lemmatized))
# set(['boy', 'fed', 'ate', 'cat', 'kicked', 'hat'])
# dictionaries for converting words to integers and vice versa
word2idx = dict((v, i) for i, v in enumerate(words))
idx2word = list(words)
# convert the sentences a numpy array
to_idx = lambda x: [word2idx[word] for word in x]
sentences_idx = [to_idx(sentence) for sentence in sentences_lemmatized]
sentences_array = np.asarray(sentences_idx, dtype='int32')
# parameters for the model
sentence_maxlen = 3
n_words = len(words)
n_embed_dims = 5
input_sentence = Input(shape=(sentence_maxlen,), dtype='int32')
input_embedding = Embedding(n_words, n_embed_dims)(input_sentence)
#color_prediction = SimpleRNN(init='uniform',output_dim=1,input_dim=3)(input_embedding)
#color_prediction = SimpleRNN(output_dim=1,input_dim=5,
# init='glorot_uniform', inner_init='orthogonal', activation='sigmoid', weights=None, return_sequences=False)(input_embedding);
color_prediction = SimpleRNN(3, return_sequences=False, batch_input_shape=(10, 2, 3))(input_embedding);
output = Dense(1, activation='sigmoid')(color_prediction)
predict_green = Model(input=[input_sentence], output=[output])
predict_green.compile(optimizer='sgd', loss='binary_crossentropy')
# fit the model to predict what color each person is
predict_green.fit([sentences_array], [is_green], nb_epoch=5000, verbose=1)
embeddings = predict_green.layers[1].W.get_value()
# print out the embedding vector associated with each word
for i in range(n_words):
print('{}: {}'.format(idx2word[i], embeddings[i]))
输出:red: [-0.03866547 -0.04583547 -0.0314577 -0.04576075 0.00064603]
is: [-0.02093433 -0.00811194 -0.01167872 -0.02813761 -0.02160992]
bob: [-0.46798751 -0.01344876 0.36456427 -0.04407313 0.22842836]
sarah: [ 0.45210958 0.02854088 -0.34124625 0.0184452 -0.23039177]
green: [-0.00628944 0.04167764 -0.02662347 -0.01051781 0.03500782]
not: [-0.01113868 -0.01490347 -0.00235422 -0.03276222 -0.0263596 ]
sam: [ 0.45785579 0.04527023 -0.34266111 -0.01125967 -0.25254542]
hannah: [-0.45697126 -0.00369712 0.37587604 -0.04988953 0.23814654]