如何在特定列数的tensorflow数据集中找到最大值



假设下面的代码:

import tensorflow as tf
import numpy as np

simple_data_samples = np.array([
[1, 1, 1, 7, -1],
[2, -2, 2, -2, -2],
[3, 3, 3, -3, -3],
[-4, 4, 4, -4, -4],
[5, 5, 5, -5, -5],
[6, 6, 6, -4, -6],
[7, 7, 8, -7, -7],
[8, 8, 8, -8, -8],
[9, 4, 9, -9, -9],
[10, 10, 10, -10, -10],
[11, 5, 11, -11, -11],
[12, 12, 12, -12, -12],
])

def print_dataset(ds):
for inputs, targets in ds:
print("---Batch---")
print("Feature:", inputs.numpy())
print("Label:", targets.numpy())
print("")


def timeseries_dataset_multistep_combined(features, label_slice, input_sequence_length, output_sequence_length, sequence_stride, batch_size):
feature_ds = tf.keras.preprocessing.timeseries_dataset_from_array(features, None, sequence_length=input_sequence_length + output_sequence_length, sequence_stride=sequence_stride ,batch_size=batch_size, shuffle=False)

def split_feature_label(x):
return x[:, :input_sequence_length, :]+ tf.reduce_max(x[:,:,:],axis=1), x[:, input_sequence_length:, label_slice]+ tf.reduce_max(x[:,:,:],axis=1)

feature_ds = feature_ds.map(split_feature_label)

return feature_ds

ds = timeseries_dataset_multistep_combined(simple_data_samples, slice(None, None, None), input_sequence_length=4, output_sequence_length=2, sequence_stride=2, batch_size=1)
print_dataset(ds)

让我解释一下上面代码的作用。它创建了许多功能和标签。然后,它从每列中获取最大值,并将该列中的各个值相加。例如,此功能及其相应的标签:

Feature: [[[ 1  1  1  7 -1]
[ 2 -2  2 -2 -2]
[ 3  3  3 -3 -3]
[-4  4  4 -4 -4]]]
Label: [[[ 5  5  5 -5 -5]
[ 6  6  6 -4 -6]]]

在每列中具有以下最大值:

6,6,6,7,-1

然后将最大值添加到相应的列中,得到最终输出:

[[ 7  7  7 14 -2]
[ 8  4  8  4 -3]
[ 9  9  9  3 -4]
[ 2 10 10  2 -5]]]
Label: [[[11 11 11  1 -6]
[12 12 12  2 -7]]]

我不想从每一列中提取最大值,而是想从每个特征及其相应标签的前三列和后两列中提取出最大值。提取后,我想将最大值添加到相应列中的每个值中。例如,在上面的示例中,前三列的最大值为6,后两列的最大为7。之后,前三列中的每个值加6,后两列中的每一个值加7。第一批的最终输出如下:

Feature: [[[ 7  7  7  14 6]
[ 8 4  8 5 5]
[ 9  9  9 4 4]
[ 2  10  10 3 3]]]
Label: [[[ 11  11  11 2 2]
[ 12  12 12 3 1]]]

有人知道如何从每个批次的前三列和最后两列中提取最大值吗?

像这样将tf.tiletf.reduce_max一起使用对您有效吗:

import tensorflow as tf
import numpy as np

simple_data_samples = np.array([
[1, 1, 1, 7, -1],
[2, -2, 2, -2, -2],
[3, 3, 3, -3, -3],
[-4, 4, 4, -4, -4],
[5, 5, 5, -5, -5],
[6, 6, 6, -4, -6],
[7, 7, 8, -7, -7],
[8, 8, 8, -8, -8],
[9, 4, 9, -9, -9],
[10, 10, 10, -10, -10],
[11, 5, 11, -11, -11],
[12, 12, 12, -12, -12],
])

def print_dataset(ds):
for inputs, targets in ds:
print("---Batch---")
print("Feature:", inputs.numpy())
print("Label:", targets.numpy())
print("")


def timeseries_dataset_multistep_combined(features, label_slice, input_sequence_length, output_sequence_length, sequence_stride, batch_size):
feature_ds = tf.keras.preprocessing.timeseries_dataset_from_array(features, None, sequence_length=input_sequence_length + output_sequence_length, sequence_stride=sequence_stride ,batch_size=batch_size, shuffle=False)

def split_feature_label(x):
reduced_first_max_columns = tf.reduce_max(x[:,:,:3], axis=1, keepdims=True) 
reduced_last_max_columns = tf.reduce_max(x[:,:,3:], axis=1, keepdims=True)
reduced_first_max_columns = tf.tile(tf.reduce_max(reduced_first_max_columns, axis=-1), [1, 3])
reduced_last_max_columns = tf.tile(tf.reduce_max(reduced_last_max_columns, axis=-1), [1, 2])
reduced_x = tf.expand_dims(tf.concat([reduced_first_max_columns, reduced_last_max_columns], axis=1), axis=0)

return x[:, :input_sequence_length, :] + reduced_x, x[:, input_sequence_length:, label_slice] + reduced_x

feature_ds = feature_ds.map(split_feature_label)

return feature_ds

ds = timeseries_dataset_multistep_combined(simple_data_samples, slice(None, None, None), input_sequence_length=4, output_sequence_length=2, sequence_stride=2, batch_size=1)
print_dataset(ds)
---Batch---
Feature: [[[ 7  7  7 14  6]
[ 8  4  8  5  5]
[ 9  9  9  4  4]
[ 2 10 10  3  3]]]
Label: [[[11 11 11  2  2]
[12 12 12  3  1]]]
---Batch---
Feature: [[[11 11 11 -6 -6]
[ 4 12 12 -7 -7]
[13 13 13 -8 -8]
[14 14 14 -7 -9]]]
Label: [[[ 15  15  16 -10 -10]
[ 16  16  16 -11 -11]]]
...

最新更新