转置卷积层前向和后向传递是如何工作的



在我提出我的问题之前,我想向那些认为这更像是一篇数学文章而不是编程文章的人道歉。神经网络在数学和编程方面都很重,我觉得我的问题出在编程方面。我用c++从头开始创建了一个CNN(很有效(。出于这个原因,我觉得我用来创建convolutionfull convolution的函数是正确的。在下面的程序中,我将展示我如何在卷积层为正向的情况下进行CNN的基本正向和反向:

Matrix<float> cnn_forward(Matrix<float> weight, Matrix<float> prev){
Matrix<float> output = prev.convolute(weight);
return output;
}

以及后向传球(在这种情况下,我没有使用偏置或激活功能(:

cnn_back cnn_backward(Matrix<float> a_prev, Matrix<float> dz, Matrix<float> kernel){
Matrix<float> rotated = kernel.rotate_180();
Matrix<float> dx = dz.convolute_full(rotated);
Matrix<float> dw = a_prev.convolute(dz);
cnn_back output;
output.dw = std::move(dw);
output.dx = std::move(dx);
return output;  
}

我在网上看到的一切都表明,转置卷积层只是卷积层的反面。因此,我尝试将以下内容实现为转置卷积层的前向和后向传递。

//forward
Matrix<float> fcn_forward(Matrix<float> weight, Matrix<float> prev){
Matrix<float> output = prev.convolute_full(weight.rotate_180());
return output;
} 
//backward
fcn_back fcn_backward(Matrix<float> a_prev, Matrix<float> dz, Matrix<float> kernel){
Matrix<float> dx = dz.convolute(kernel);
Matrix<float> dw = dz.convolute(a_prev);
fcn_back output;
output.dw = std::move(dw);
output.dx = std::move(dx);
return output;
}
//again, not using a bias or activation function

我的目标是用二维矩阵从pytorch基本上实现torch.nn.ConvTranspose2d。我希望将它与上面的基本卷积公式进行比较。

~编辑~这将是使用numpy数组转换为python,这几乎是我的c++代码的精确复制品。

def convolute(X, W, strides=(1,1)):
new_row = (int)((X.shape[0] - W.shape[0])/strides[0] +1)
new_col = (int)((X.shape[1] - W.shape[1])/strides[1] +1)
out = np.zeros((new_row, new_col), dtype=float)
x_last = 0
y_last = 0
for x in range(0, X.shape[0]-(W.shape[0] - 1), strides[0]):
for y in range(0, X.shape[1]-(W.shape[1] - 1), strides[1]):
amt = 0.0
for i in range(0, W.shape[0]):
for j in range(0, W.shape[1]):
amt += W[i][j] * X[x+i][y+j]
out[x_last][y_last] = amt
y_last += 1
x_last += 1
y_last = 0
return out

def convolute_full(X, W, strides=(1, 1)):
row_num = (X.shape[0] - 1) * strides[0] + W.shape[0]
col_num = (X.shape[1] - 1) * strides[1] + W.shape[1]
output = np.zeros([row_num, col_num])
for i in range(0, X.shape[0]):
i_prime = i * strides[0] 
for j in range(0, X.shape[1]):
j_prime = j * strides[1]
for k_row in range(W.shape[0]):
for k_col in range(W.shape[1]):
output[i_prime+k_row, j_prime+k_col] += W[k_row, k_col] * X[i, j]
return output

def get_errors(predicted, label):
return label - predicted
def fcn_forward(weight, prev):
rotated = np.rot90(np.rot90(weight)) 
output = convolute_full(prev, rotated)
return output
def fcn_backward(a_prev, dz, kernel):
dx = convolute(dz, kernel)
dw = convolute(dz, a_prev)
dx = np.clip(dx, 10, -10)
return dx, dw

def forward(weights, X_init):
values = []
values.append(X_init)
predicted = fcn_forward(weights[0], X_init)
values.append(predicted)
predicted = fcn_forward(weights[1], predicted)
values.append(predicted)
return values
def backward(weights, values, label, learningRate=0.001):
dz = get_errors(values[-1], label)
dx, dw = fcn_backward(values[-2], dz, weights[-1])
weights[-1] = weights[-1] - learningRate*dw
dz = dx
dx, dw = fcn_backward(values[-3], dz, weights[-2])
weights[-2] = weights[-2] - learningRate*dw
return weights

def train_example():
epoch = int(input("enter epoch: "))
#creating a random input
inp = np.random.randn(10,10)
#creating the weight matricies
weights = [np.random.randn(3,3), np.random.randn(3,3)]
#creating the wanted output
label = np.random.randn(14,14)
for i in range(0, epoch):
values = forward(weights, inp)
if(i == 0 or i == 1):
errors = get_errors(values[-1], label)
print("errors:")
print(errors)
print("error sum: ", np.sum(errors))
weights = backward(weights, values, label)
print("current prediction:")
print(values[-1])
print("label: ")
print(label)
errors = get_errors(values[-1], label)
print("errors:")
print(errors)
print("error sum at end of training: ", np.sum(errors))

基本上,这是行不通的。权重没有以正确的方式进行校正。错误只会继续变大(与想要的结果相反(。前向和后向传播转置卷积层的正确方法是什么?

编辑

这是任何想知道它与我上面的代码有何关系的人的答案,因为@Bob的答案是:

def convolute(X, W, strides=(1,1)):
new_row = (int)((X.shape[0] - W.shape[0])/strides[0] +1)
new_col = (int)((X.shape[1] - W.shape[1])/strides[1] +1)
out = np.zeros((new_row, new_col), dtype=float)
x_last = 0
y_last = 0
for x in range(0, X.shape[0]-(W.shape[0] - 1), strides[0]):
for y in range(0, X.shape[1]-(W.shape[1] - 1), strides[1]):
amt = 0.0
for i in range(0, W.shape[0]):
for j in range(0, W.shape[1]):
amt += W[i][j] * X[x+i][y+j]
out[x_last][y_last] = amt
y_last += 1
x_last += 1
y_last = 0
return out

#this is the same result as scipy.signal.convolute2d
def convolute_full(X, W, strides=(1, 1)):
row_num = (X.shape[0] - 1) * strides[0] + W.shape[0]
col_num = (X.shape[1] - 1) * strides[1] + W.shape[1]
output = np.zeros([row_num, col_num])
for i in range(0, X.shape[0]):
i_prime = i * strides[0] 
for j in range(0, X.shape[1]):
j_prime = j * strides[1]
for k_row in range(W.shape[0]):
for k_col in range(W.shape[1]):
output[i_prime+k_row, j_prime+k_col] += W[k_row, k_col] * X[i, j]
return output
def convolute_full_backward(X, dZ, dW, strides=(1, 1)):
for i in range(0, X.shape[0]):
i_prime = i * strides[0] 
for j in range(0, X.shape[1]):
j_prime = j * strides[1]
for k_row in range(dW.shape[0]):
for k_col in range(dW.shape[1]):
dW[k_row, k_col] += dZ[i_prime+k_row, j_prime+k_col] * X[i, j]
return dW

def get_errors(predicted, label):
return label - predicted
def fcn_forward(W, X):
rotated = np.rot90(np.rot90(W))
output = convolute_full(X, rotated)
return output
def fcn_backward(X, dZ, kernel):
dw = np.zeros(kernel.shape)
dw = convolute_full_backward(X, dZ, dw)
dw = np.rot90(np.rot90(dw))
dx = convolute(dZ, np.rot90(np.rot90(kernel)))
np.clip(dx, 10, -10)
return dx, dw

def forward(weights, X):
values = []
values.append(X)
predicted = fcn_forward(weights[0], X)
values.append(predicted)
predicted = fcn_forward(weights[1], predicted)
values.append(predicted)
return values
def backward(weights, values, label, learningRate=0.001):
dz = get_errors(values[-1], label)
dx, dw = fcn_backward(values[-2], dz, weights[-1])
weights[-1] = weights[-1] + learningRate*dw
dz = dx
dx, dw = fcn_backward(values[-3], dz, weights[-2])
#new apply dw:
weights[-2] = weights[-2] + learningRate*dw
return weights

def train_example():
epoch = int(input("please enter epoch: "))
inp = np.random.randn(10,10)
weights = [np.random.randn(3,3), np.random.randn(3,3)]
label = np.random.randn(14,14)
for i in range(0, epoch):
values = forward(weights, inp)
errors = get_errors(values[-1], label)
print("error sum at {} is: {}".format(i, np.sum(errors)))
weights = backward(weights, values, label)
errors = get_errors(values[-1], label)
print("error sum at end of training: ", np.sum(errors))

由于您的实现有显式执行的每个标量乘法,因此可以非常清楚地执行后退步骤。您保持所有循环不变,当您看到累加器的更新时,您可以计算其梯度。

import numpy as np
def convolute_full(X, W, strides=(1, 1)):
row_num = (X.shape[0] - 1) * strides[0] + W.shape[0]
col_num = (X.shape[1] - 1) * strides[1] + W.shape[1]
output = np.zeros([row_num, col_num])
for i in range(0, X.shape[0]):
i_prime = i * strides[0] 
for j in range(0, X.shape[1]):
j_prime = j * strides[1]
for k_row in range(W.shape[0]):
for k_col in range(W.shape[1]):
output[i_prime+k_row, j_prime+k_col] += W[k_row, k_col] * X[i, j]
return output
def convolute_full_backward(X, dZ, dW, strides=(1, 1)):
row_num = (X.shape[0] - 1) * strides[0] + W.shape[0]
col_num = (X.shape[1] - 1) * strides[1] + W.shape[1]
output = np.zeros([row_num, col_num])
for i in range(0, X.shape[0]):
i_prime = i * strides[0] 
for j in range(0, X.shape[1]):
j_prime = j * strides[1]
for k_row in range(W.shape[0]):
for k_col in range(W.shape[1]):
# Only this line changed compard to forward pass
dW[k_row, k_col] += dZ[i_prime+k_row, j_prime+k_col] * X[i, j]

def fcn_forward(X, W):
output = convolute_full(X, W[::-1,::-1])
return output
def fcn_backward(X, dZ, kernel_shape):
dW = np.zeros(kernel_shape)
convolute_full_backward(X, dZ, dW[::-1,::-1])
return dW

为了验证,我创建了一个带有线性损失函数的简单示例

X = np.random.randn(20, 20)
W = np.random.randn(5, 5)
Z = fcn_forward(X, W)
# pick a random loss with known gradient
dZ = np.random.randn(*Z.shape)
F = np.sum(Z * dZ)
dW = fcn_backward(X, dZ, W.shape)
# random perturbation
W_ = W + np.random.randn(*W.shape)
# expected change to the loss function
dF = np.sum(dW * (W_ - W))
Z_ = fcn_forward(X, W_)
F_ = np.sum(Z_ * dZ)
print('Predicted loss change: %f' % dF)
print('Actual loss change: %f' % (F_ - F))

跑去看看。

最新更新