如何利用tensorflow模型预测编码语句



使用spacy,我将训练集转换为包含单个单词向量的矩阵。这是在encode_sentences()函数中完成的。此外,我使用label_encoding()函数对相应的标签进行编码。这些数据现在将用作我的模型的训练数据。只要我想预测一个句子以获得正确的标签,我就会得到一个nparray作为输出。我怎样才能做出正确的预测?

import numpy as np
import tensorflow as tf
from tensorflow import keras
import pandas as pd
import sqlite3
from sklearn.preprocessing import LabelEncoder
import spacy
from sklearn.model_selection import train_test_split

nlp = spacy.load('en_core_web_lg')
embedding_dim = nlp.vocab.vectors_length

def read_database(path):
# Loading Data from database
connection = sqlite3.connect(path)
db_rows = pd.read_sql('''select intents.intent, patterns.pattern 
from intents, patterns where intents.id = patterns.intentid''', connection)
labels = []
sentences = []
intents = []
for i in range(len(db_rows)):
labels.append(db_rows["intent"][i])
if db_rows["intent"][i] not in intents:
intents.append(db_rows["intent"][i])
sentences.append(db_rows["pattern"][i])
return sentences, labels, intents

def label_encoding(labels):
# Calculate the length of labels
n_labels = len(labels)
print('Number of labels :-', n_labels)
le = LabelEncoder()
y = le.fit_transform(labels)
print('Length of y :- ', y.shape)
return y

def encode_sentences(sentences):
# Calculate number of sentences
n_sentences = len(sentences)
X = np.zeros((n_sentences, embedding_dim))
# y = np.zeros((n_sentences, embedding_dim))
# Iterate over the sentences
for idx, sentence in enumerate(sentences):
doc = nlp(sentence)
# Save the document's .vector attribute to the corresponding row in
X[idx, :] = doc.vector
return X

sentences_train, labels_train, all_intents = read_database('./database_x.db')
sentences_train = encode_sentences(sentences_train)
labels_train = label_encoding(labels_train)
x_train, x_test, y_train, y_test = train_test_split(sentences_train, labels_train, test_size=0.2)
model = keras.Sequential([keras.layers.Dense(16, activation='relu'),
keras.layers.Dense(16, activation='relu'),
keras.layers.Dense(len(all_intents), activation='softmax')])
model.compile(optimizer=keras.optimizers.Adam(learning_rate=0.01),
loss=keras.losses.SparseCategoricalCrossentropy(),
metrics=['accuracy'])
model.fit(x_train, y_train, batch_size=16, epochs=100)
prediction = model.predict(encode_sentences("how can i test rf heating"))
print("nnn")
print(prediction)

argmax预测为您提供最佳候选标签的索引。

pred_labels_idx = np.argmax(predictions,-1)

最佳实践是使LabelEncoderle可以从功能范围之外访问,这样您就可以将预测inverse_transform实际标记为:

pred_labels = le.inverse_transform(pred_labels_idx)

也许,您可以修改label_encoder,使其在对训练数据上的标签进行编码时也返回label_decoder

最新更新