TensorFlow CSV input



我正在尝试使用 tf 读取 CSV 文件(以高性能的方式,如 Google 推荐的那样(。TextLineReader((.

如您所见,我可以创建批处理,稍后将使用这些批处理来训练我的非常基本的回归模型。问题是tf.train.shuffle_batch返回一个张量。这意味着我不能在我的模型中使用占位符 X Y_pred = X*W+b。在训练阶段,我现在仅将batch_variable用作占位符,但我需要一个占位符才能使用不同的数据进行测试。

我做错了什么?

编辑:我按照尼古拉斯的建议修改了代码(非常感谢!(,但现在我得到了0.0的准确度....这有点奇怪。我正在使用鲍鱼数据集(8 个特征和 3 个类(,其中我用单热向量 (1,0,0( 、(0,1,0(...

 def getPartitionedDatasets(filenames):
    filename_queue = tf.train.string_input_producer(filenames)
    reader = tf.TextLineReader()
    _, value = reader.read(filename_queue) # return a key and value (key is for debugging
    record_defaults = [[1.0] for _ in range(N_FEATURES+1)]
    cont = tf.decode_csv(value, record_defaults=record_defaults)
    features = tf.stack([cont[1],cont[2],cont[3],cont[4],cont[5],cont[6],cont[7],cont[8]])

    label = tf.to_int32(cont[0])
    min_after_dequeue = 10 * BATCH_SIZE
    capacity = 20 * BATCH_SIZE
    data_batch, label_batch_raw = tf.train.shuffle_batch([features, label], batch_size=BATCH_SIZE,
                                        capacity=capacity, min_after_dequeue=min_after_dequeue)
    label_batch_hot = tf.one_hot(label_batch_raw,on_value=1,off_value=0, depth=3)
    return data_batch, label_batch_hot
def get_model_params():
    """Build the model parameters."""
    W = tf.Variable(tf.zeros([N_FEATURES,CLASSES]), name='weights')
    b = tf.Variable(tf.zeros([CLASSES]), name = "bias")
    return W, b
def build_model(data_batch, label_batch_hot, w, b):
    """Use `W` and `b` to build a model based on some data and labels."""
    Y_predicted = tf.matmul(data_batch , w) + b
    cross_entropy = tf.reduce_mean(
        tf.nn.softmax_cross_entropy_with_logits(labels=label_batch_hot,
                                                logits=Y_predicted))
    return Y_predicted, cross_entropy
def get_optimizer_op(cross_entropy_op):
    """Return an optimiser associated to a cross entropy op."""
    return tf.train.GradientDescentOptimizer(0.5).minimize(cross_entropy_op)

def run():
    train_data, train_label = getPartitionedDatasets(["ABA_Train.csv"])
    test_data, test_label   = getPartitionedDatasets(["ABA_Test.csv"])
    W,b = get_model_params();
    train_predicted, train_cross_entropy = build_model(train_data, train_label, W, b)
    optimizer = get_optimizer_op(train_cross_entropy)
    test_predicted, test_cross_entropy = build_model(test_data, test_label, W, b)
    correct_prediction = tf.equal(tf.argmax(test_predicted, 1), tf.argmax(test_label, 1))
    accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))

    with tf.Session() as sess:
        coord = tf.train.Coordinator()
        threads = tf.train.start_queue_runners(coord=coord)
        sess.run(tf.global_variables_initializer())
        for i in range(300):  # generate 10 batches
            sess.run([optimizer])
        print(sess.run(accuracy))

def main():
    run();
if __name__ == '__main__':
    main()
你可以

做的是实现各种方法来

  • 定义模型参数,
  • 根据这些参数和一些数据构建模型,
  • 根据您的
  • 模型构建优化器。

例如,你可以有这样的东西(假设你的训练和测试数据在 2 组不同的文件中(:

def get_model_params():
    """Build the model parameters.""" 
    W = tf.Variable(tf.zeros([N_FEATURES,CLASSES]), name='weights')
    b = tf.Variable(tf.zeros([CLASSES]), name = "bias")
    return W, b
def build_model(data_batch, label_batch_hot, w, b):
    """Use `W` and `b` to build a model based on some data and labels."""
    Y_predicted = tf.matmul(data_batch , W) + b
    cross_entropy = tf.reduce_mean(
        tf.nn.softmax_cross_entropy_with_logits(labels=label_batch_hot,
                                                logits=Y_predicted))
    return y_predicted, cross_entropy
def get_optimizer_op(cross_entropy_op):
    """Return an optimiser associated to a cross entropy op."""
    return tf.train.GradientDescentOptimizer(0.5).minimize(cross_entropy_op)
def run():
    data_batch, label_batch_hot = batch_generator(train_filenames)
    test_data_batch, test_label_batch_hot = batch_generator(test_filenames)
    W, b = get_model_params()
    train_predicted, train_cross_entropy = build_model(data_batch, label_batch_hot, W, b)
    optimizer = get_optimizer_op(train_cross_entropy)
    test_predicted, test_cross_entropy = build_model(test_data_batch, test_label_batch_hot, W, b)
    with tf.Session() as sess:
        coord = tf.train.Coordinator()
        threads = tf.train.start_queue_runners(coord=coord)
        sess.run(tf.global_variables_initializer())
        for i in range(300): # generate 10 batches
             sess.run([optimizer])
        sess.run(test_cross_entropy)

相关内容

最新更新