如何在张量切片上有效地应用张量流函数?



我有一个张量,比如说,一批灰度图像(所以是BxHxW张量(。

我想将沿 H 和 W 的张量分别切成 N 和 M,均匀分布(或多或少均匀(的窗口,并将tf.math.bincount应用于每个窗口。 <--对不起,这个稍微复杂的英语。

下面是我目前所做的,但它构建起来很慢,并且创建了一个相当臃肿的张量流图。有没有更好的方法可以做到这一点?

def makeHistBins(inPut, N, M=None):
if M is None:
M = N
LEN = int(inPut.shape[0])
ImgH = int(inPut.shape[1])
ImgW = int(inPut.shape[2])
HistH = math.ceil(ImgH/N)
HistW = math.ceil(ImgW/M)
for k in range(LEN):
for i in range(N):
for j in range(M):
hist = tf.reshape(tf.math.bincount(inPut[k, math.floor(ImgH*i/N):math.ceil(ImgH*(i+1)/N), math.floor(ImgW*j/M):math.ceil(ImgW*(j+1)/M)], minlength=256), [256])
if j == 0:
row = tf.expand_dims(hist, 0)
else:
row = tf.concat([row, tf.expand_dims(hist, 0)], 0)
if i == 0:
col = tf.expand_dims(row, 0)
else:
col = tf.concat([col, tf.expand_dims(row, 0)], 0)
if k == 0:
out = tf.expand_dims(col, 0)
else:
out = tf.concat([out, tf.expand_dims(col, 0)], 0)
return out

# Images is 10x100x100
Out = makeHistBins(Images, 5)
# Out should be 10x5x5x256, where bincount was applied to 20x20 windows along each image.

你应该使用tf.extract_image_patches.此操作从输入图像中收集补丁,就像应用卷积一样。它支持窗口滑动和重叠。

def imagemakeHistBins(inPut, N, M=None):
if M is None:
M = N
B, H, W = inPut.get_shape().as_list()
HistH = math.ceil(H / N)
HistW = math.ceil(W / M)
inPut = tf.pad(inPut+1,[[0,0],[math.ceil((HistH*N-H)/2),math.floor((HistH*N-H)/2)],[math.ceil((HistW*M-W)/2),math.floor((HistW*M-W)/2)]])
# extract inPut to (-1,N,M,HistH*HistW)
# the shape of inPut_new is (10,5,5,400) if inPut is (10,100,100) and M=N=5
# the shape of inPut_new is (10,6,6,289) if inPut is (10,100,100) and M=N=6
# ...
inPut_new = tf.extract_image_patches(images=tf.expand_dims(inPut, axis=-1),
ksizes=[1, HistH, HistW, 1],
strides=[1, HistH, HistW, 1],
rates=[1, 1, 1, 1],
padding='VALID')
# following codes are referenced from https://stackoverflow.com/a/50883668
max_arr_plus = tf.reduce_max(inPut_new) + 1
new_shape = tf.shape(inPut_new)
group = max_arr_plus * tf.reshape(tf.range(new_shape[0] * new_shape[1]*new_shape[2]),
[new_shape[0], new_shape[1],new_shape[2]])
ids = inPut_new + group[:, :, :, None]
out = tf.bincount(ids, minlength=max_arr_plus*new_shape[0]*new_shape[1]*new_shape[2])
out = tf.reshape(out,[new_shape[0], new_shape[1],new_shape[2],max_arr_plus])
return out[:,:,:,1:]

实验比较

您可以同时比较三种方法的时间消耗。

import tensorflow as tf
import math
import time
import numpy as np
def makeHistBins(inPut, N, M=None):
if M is None:
M = N
LEN = int(inPut.shape[0])
ImgH = int(inPut.shape[1])
ImgW = int(inPut.shape[2])
HistH = math.ceil(ImgH/N)
HistW = math.ceil(ImgW/M)
for k in range(LEN):
for i in range(N):
for j in range(M):
hist = tf.reshape(tf.math.bincount(inPut[k, math.floor(ImgH*i/N):math.ceil(ImgH*(i+1)/N), math.floor(ImgW*j/M):math.ceil(ImgW*(j+1)/M)], minlength=256), [256])
if j == 0:
row = tf.expand_dims(hist, 0)
else:
row = tf.concat([row, tf.expand_dims(hist, 0)], 0)
if i == 0:
col = tf.expand_dims(row, 0)
else:
col = tf.concat([col, tf.expand_dims(row, 0)], 0)
if k == 0:
out = tf.expand_dims(col, 0)
else:
out = tf.concat([out, tf.expand_dims(col, 0)], 0)
return out
def makeHistBins2(inPut, N, M=None):
if M is None:
M = N
LEN = int(inPut.shape[0])
ImgH = int(inPut.shape[1])
ImgW = int(inPut.shape[2])
HistH = math.ceil(ImgH/N)
HistW = math.ceil(ImgW/M)
splits_h = [int(np.floor((ImgH*(i+1))/N)- np.floor((ImgH*i)/N)) for i in range(N)]
splits_w = [int(np.floor((ImgW*(i+1))/M)- np.floor((ImgW*i)/M)) for i in range(M)]
# splits_h = np.array([ImgH // N for _ in range(N)])
# if ImgH - sum(splits_h) > 0:
#     splits_h[:(ImgH - sum(splits_h))] += 1
# 
# splits_w = np.array([ImgW // M for _ in range(M)])
# if ImgW - sum(splits_w) > 0:
#     splits_w[:ImgW - sum(splits_w)] += 1
# print(splits_w)
out = tf.map_fn(
lambda x: tf.stack(
[tf.math.bincount(tf.reshape(z, [-1]), minlength=256, maxlength=256) for y in tf.split(x, splits_h, axis=0) for z in tf.split(y, splits_w, axis=1) ]
), inPut)
return tf.reshape(out, [-1, len(splits_h), len(splits_w), 256])
def imagemakeHistBins(inPut, N, M=None):
if M is None:
M = N
B, H, W = inPut.get_shape().as_list()
HistH = math.ceil(H / N)
HistW = math.ceil(W / M)
inPut = tf.pad(inPut+1,[[0,0],[0,HistH*N-H],[0,HistW*M-H]])
# extract inPut to (-1,N,M,HistH*HistW)
# the shape of inPut_new is (10,5,5,400) if inPut is (10,100,100) and M=N=5
# the shape of inPut_new is (10,6,6,289) if inPut is (10,100,100) and M=N=6
# ...
inPut_new = tf.extract_image_patches(images=tf.expand_dims(inPut, axis=-1),
ksizes=[1, HistH, HistW, 1],
strides=[1, HistH, HistW, 1],
rates=[1, 1, 1, 1],
padding='VALID')
# following codes are referenced from https://stackoverflow.com/a/50883668
max_arr_plus = tf.reduce_max(inPut_new) + 1
new_shape = tf.shape(inPut_new)
group = max_arr_plus * tf.reshape(tf.range(new_shape[0] * new_shape[1]*new_shape[2]),
[new_shape[0], new_shape[1],new_shape[2]])
ids = inPut_new + group[:, :, :, None]
out = tf.bincount(ids, minlength=max_arr_plus*new_shape[0]*new_shape[1]*new_shape[2])
out = tf.reshape(out,[new_shape[0], new_shape[1],new_shape[2],max_arr_plus])
return out[:,:,:,1:]
Images =tf.placeholder(shape=(10,100,100),dtype=tf.int32)
out1 = makeHistBins(Images, 5)
out2 = makeHistBins2(Images, 5)
out3 = imagemakeHistBins(Images, 5)
with tf.Session() as sess:
example = np.random.randint(0, 256, size=(10, 100, 100))
value1,value2,value3 = sess.run([out1,out2,out3], feed_dict={Images: example})
print(np.all(value1 == value2))
print(np.all(value1 == value3))
time1 = time.clock()
for _ in range(10):
sess.run(out1,feed_dict={Images:example})
time2 = time.clock()
print('makeHistBins cost time(ms) : %.2f' % ((time2 - time1)*1000))
time1 = time.clock()
for _ in range(10):
sess.run(out2, feed_dict={Images: example})
time2 = time.clock()
print('makeHistBins2 cost time(ms) : %.2f' % ((time2 - time1) * 1000))
time1 = time.clock()
for _ in range(10):
sess.run(out3, feed_dict={Images: example})
time2 = time.clock()
print('imagemakeHistBins cost time(ms) : %.2f' % ((time2 - time1) * 1000))

结果:

# True
# True
# makeHistBins cost time(ms) : 1187.79
# makeHistBins2 cost time(ms) : 568.32
# imagemakeHistBins cost time(ms) : 74.57

imagemakeHistBinsmakeHistBins2相比,imagemakeHistBins的运行时间减少了一个数量级。

笔记

还请注意您的提取策略。

根据您的逻辑,阻止策略如下:

ImgH = 100
N=6
start_h = [math.floor(ImgH*i/N) for i in range(N)] # [0, 16, 33, 50, 66, 83]
end_h = [math.ceil(ImgH*(i+1)/N) for i in range(N)] # [17, 34, 50, 67, 84, 100]
# [0:17, 16:34, 33:50, 50:67, 66:84, 83:100]

tf.splittf.extract_image_patches都不支持这样的块。

splits_h = np.array([ImgH // N for _ in range(N)])
if ImgH - sum(splits_h) > 0:
splits_h[:(ImgH - sum(splits_h))] += 1
# [17 17 17 17 16 16]

所以我的策略是用 0 填充最后一个块。因为我通过out[:,:,:,1:]1MAX(256) + 1的结果,不会影响最终结果的正确性。

# tf.extract_image_patches
# [17 17 17 17 17 15]

均匀的解决方案

def imagemakeHistBins(inPut, N, M=None):
if M is None:
M = N
B, H, W = inPut.get_shape().as_list()
splits_h = np.array([H // N for _ in range(N)])
if H - sum(splits_h) > 0:
splits_h[:(H - sum(splits_h))] += 1
splits_h = [0] + splits_h.tolist()
splits_h = np.cumsum(splits_h)
splits_w = np.array([W // M for _ in range(M)])
if W - sum(splits_w) > 0:
splits_w[:(W - sum(splits_w))] += 1
splits_w = [0] + splits_w.tolist()
splits_w = np.cumsum(splits_w)
# splits_w=[  0  17  34  51  68  84 100] if inPut is (10,100,100) and M=N=6
print(splits_w)
inPut_new = []
for i in range(N):
for j in range(M):
hist = inPut[:, splits_h[i]:splits_h[i + 1], splits_w[j]:splits_w[j + 1]]
hist = tf.pad(hist+1, [[0, 0],
[0, splits_h[1] - splits_h[i + 1] + splits_h[i]],
[0, splits_w[1] - splits_w[j + 1] + splits_w[j]]])
inPut_new.append(hist)
inPut_new = tf.reshape(tf.transpose(tf.stack(inPut_new), [1, 0, 2, 3]), [-1, N, M, splits_h[1] * splits_w[1]])
# following codes are referenced from https://stackoverflow.com/a/50883668
max_arr_plus = tf.reduce_max(inPut_new) + 1
new_shape = tf.shape(inPut_new)
group = max_arr_plus * tf.reshape(tf.range(new_shape[0] * new_shape[1]*new_shape[2]),
[new_shape[0], new_shape[1],new_shape[2]])
ids = inPut_new + group[:, :, :, None]
out = tf.bincount(ids, minlength=max_arr_plus*new_shape[0]*new_shape[1]*new_shape[2])
out = tf.reshape(out,[new_shape[0], new_shape[1],new_shape[2],max_arr_plus])
return out[:,:,:,1:]

以下方法有效。但请确保也指定minlength参数,因为如果返回的 bin 计数不同,则无法堆叠它们。所以基本上我们正在做两个 for 循环(一个用于拆分高度,另一个用于拆分宽度(,然后使用map_fn遍历行,这为您提供了一个10, 20, 20, 256矩阵(正如@giser_yugang指出的那样,输出末尾有一个 256 维(。它需要一些工作才能使其适用于不均匀的分裂。

def makeHistBins(inPut, N, M=None):
if M is None:
M = N
LEN = int(inPut.shape[0])
ImgH = int(inPut.shape[1])
ImgW = int(inPut.shape[2])
#HistH = math.ceil(ImgH/N)
#HistW = math.ceil(ImgW/M)
splits_h = np.array([ImgH//N for _ in range(N)])
if  ImgH - sum(splits_h) > 0:
splits_h[:(ImgH - sum(splits_h))] += 1
splits_w = np.array([ImgW//M for _ in range(M)])
if ImgW - sum(splits_w) > 0:
splits_w[:ImgW - sum(splits_w)] += 1
print(splits_w)
out = tf.map_fn(
lambda x: tf.stack(
[tf.math.bincount(tf.reshape(z, [-1]), minlength=256, maxlength=256) for y in tf.split(x, splits_h, axis=0) for z in tf.split(y, splits_w, axis=1) ]
), inPut)
return tf.reshape(out, [LEN, N, M, 256])

最新更新