PyTorch和Tensorflow给出不同的结果



我正在实现"感知损失";函数。但是,PyTorch和Tensorflow给出了不同的结果。我用了同样的图片。请告诉我为什么。

TensorFlow

class FeatureExtractor(tf.keras.Model):
def __init__(self, n_layers):
super(FeatureExtractor, self).__init__()
extractor = tf.keras.applications.VGG16(weights="imagenet",
include_top=False,input_shape=(256, 256, 3))
extractor.trainable = True
#features = [extractor.layers[i].output for i in n_layers]
features = [extractor.get_layer(i).output for i in n_layers]
self.extractor = tf.keras.models.Model(extractor.inputs,features)

def call(self, x):
return self.extractor(x)
def loss_function(generated_image, target_image,
feature_extractor):
MSE = tf.keras.losses.MeanSquaredError()
mse_loss = MSE(generated_image, target_image) 
real_features = feature_extractor(target_image)
generated_features = feature_extractor(generated_image)
perceptual_loss = 0
for i in range(len(real_features)):
loss = MSE(real_features[i], generated_features[i]) 
print(loss)
perceptual_loss += loss
return mse_loss, perceptual_loss

运行:

feature_extractor = FeatureExtractor(n_layers=["block1_conv1","block1_conv2", 
"block3_conv2","block4_conv2"])
mse_loss, perceptual_loss = loss_function(image1, image2,
feature_extractor)
print(f"{mse_loss} {perceptual_loss} {mse_loss+perceptual_loss}")

它给:

output:
tf.Tensor(0.0014001362, shape=(), dtype=float32)
tf.Tensor(0.030578917, shape=(), dtype=float32)
tf.Tensor(2.6163354, shape=(), dtype=float32)
tf.Tensor(0.842701, shape=(), dtype=float32)
0.002584027126431465 3.4910154342651367 3.4935994148254395

Pytorch

class FeatureExtractor(torch.nn.Module):
def __init__(self, n_layers):
super(FeatureExtractor, self).__init__()
extractor = models.vgg16(pretrained=True).features            
index = 0
self.layers = nn.ModuleList([])
for i in range(len(n_layers)):
self.layers.append(torch.nn.Sequential())
for j in range(index, n_layers[i] + 1):
self.layers[i].add_module(str(j), extractor[j])
index = n_layers[i] + 1
for param in self.parameters():
param.requires_grad = False
def forward(self, x):
result = []
for i in range(len(self.layers)):
x = self.layers[i](x)
result.append(x)
return result
def loss_function(generated_image, target_image, feature_extractor):
MSE = nn.MSELoss(reduction='mean')
mse_loss = MSE(generated_image, target_image) 
real_features = feature_extractor(target_image)
generated_features = feature_extractor(generated_image)
perceptual_loss = 0
for i in range(len(real_features)):
loss = MSE(real_features[i], generated_features[i])
perceptual_loss += loss
print(loss)
return mse_loss, perceptual_loss

运行:

feature_extractor = FeatureExtractor(n_layers=[1, 3, 13, 20]).to(device)
mse_loss, perceptual_loss = loss_function(image1, image2,
feature_extractor)
print(f"{mse_loss} {perceptual_loss} {mse_loss+perceptual_loss}")

它给:

output:
tensor(0.0003)
tensor(0.0029)
tensor(0.2467)
tensor(0.2311)
0.002584027359262109 0.4810013473033905 0.483585387468338

虽然是相同的模型,但由于初始化参数不同,最终模型的参数可能不同。对于不同的框架,如keras和pytorch,在训练前对输入图像进行预处理是不同的。因此,即使是相同的图像,处理后的灰度值也是不同的。下面的代码是一个示例,可以帮助理解。

from abc import ABC
import torch
import numpy as np
import tensorflow as tf
from torch import nn
from PIL import Image
from torch.autograd import Variable
import torchvision.models as models
import torchvision.transforms as transforms
from keras.preprocessing.image import load_img
from keras.applications.vgg16 import preprocess_input
from keras.preprocessing.image import img_to_array
# 'https://upload.wikimedia.org/wikipedia/commons/thumb/3/3a/Cat03.jpg/1200px-Cat03.jpg'
IMG_URL1 = ' the local path of 1200px-Cat03.jpeg'
# 'https://upload.wikimedia.org/wikipedia/commons/b/bb/Kittyply_edit1.jpg'
IMG_URL2 = 'the local path of Kittyply_edit1.jpg'
# preprocess in keras
image1_tf = load_img(IMG_URL1, target_size=(224, 224))
image1_tf = img_to_array(image1_tf)
image1_tf = image1_tf.reshape((1, image1_tf.shape[0], image1_tf.shape[1], image1_tf.shape[2]))
image1_tf = preprocess_input(image1_tf)
image2_tf = load_img(IMG_URL2, target_size=(224, 224))
image2_tf = img_to_array(image2_tf)
image2_tf = image2_tf.reshape((1, image2_tf.shape[0], image2_tf.shape[1], image2_tf.shape[2]))
image2_tf = preprocess_input(image2_tf)

# preprocess in pytorch
image1_torch = Image.open(IMG_URL1)
image2_torch = Image.open(IMG_URL2)
image1_torch = image1_torch.resize((224, 224))
image2_torch = image2_torch.resize((224, 224))
min_img_size = 224
transform_pipeline = transforms.Compose([transforms.Resize(min_img_size),
transforms.ToTensor(),
transforms.Normalize(mean=[0.485, 0.456, 0.406],
std=[0.229, 0.224, 0.225])])
image1_torch = transform_pipeline(image1_torch)
image2_torch = transform_pipeline(image2_torch)
image1_torch = image1_torch.unsqueeze(0)
image2_torch = image2_torch.unsqueeze(0)
image1_torch = Variable(image1_torch)
image2_torch = Variable(image2_torch)

class FeatureExtractor(tf.keras.Model, ABC):
def __init__(self, n_layers):
super(FeatureExtractor, self).__init__()
extractor = tf.keras.applications.VGG16(weights="imagenet", input_shape=(224, 224, 3))
extractor.trainable = True
features = [extractor.get_layer(i).output for i in n_layers]
self.extractor = tf.keras.models.Model(extractor.inputs, features)
def call(self, x):
return self.extractor(x)

def loss_function(generated_image, target_image, feature_extractor):
MSE = tf.keras.losses.MeanSquaredError()
mse_loss = MSE(generated_image, target_image)
real_features = feature_extractor(target_image)
generated_features = feature_extractor(generated_image)

print("tf prediction:", np.argmax(generated_features[-1].numpy()[0]))
print("tf prediction:", np.argmax(real_features[-1].numpy()[0]))
perceptual_loss = 0
for i in range(len(real_features[:-1])):
loss = MSE(real_features[i], generated_features[i])
print(loss)
perceptual_loss += loss
return mse_loss, perceptual_loss

feature_extractor = FeatureExtractor(n_layers=["block1_conv1", "block1_conv2", "block3_conv2",
"block4_conv2", "predictions"])
print("tensorflow: ")
mse_loss, perceptual_loss = loss_function(image1_tf, image2_tf, feature_extractor)
print(f"{mse_loss} {perceptual_loss} {mse_loss + perceptual_loss}")

class FeatureExtractor1(torch.nn.Module):
def __init__(self, n_layers):
super(FeatureExtractor1, self).__init__()
self.vgg = models.vgg16(pretrained=True)
extractor = self.vgg.features
index = 0
self.layers = nn.ModuleList([])
for i in range(len(n_layers)):
self.layers.append(torch.nn.Sequential())
for j in range(index, n_layers[i] + 1):
self.layers[i].add_module(str(j), extractor[j])
index = n_layers[i] + 1
for param in self.parameters():
param.requires_grad = False
def forward(self, x):
result = []
predict = self.vgg(x)
for i in range(len(self.layers)):
x = self.layers[i](x)
result.append(x)
result.append(predict)
return result

def loss_function1(generated_image, target_image, feature_extractor):
MSE = nn.MSELoss(reduction='mean')
mse_loss = MSE(generated_image, target_image)
real_features = feature_extractor(target_image)
generated_features = feature_extractor(generated_image)
print("torch prediction:", np.argmax(generated_features[-1].numpy()[0]))
print("torch prediction:", np.argmax(real_features[-1].numpy()[0]))
perceptual_loss = 0
for i in range(len(real_features[:-1])):
loss = MSE(real_features[i], generated_features[i])
perceptual_loss += loss
print(loss)
return mse_loss, perceptual_loss

device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
feature_extractor = FeatureExtractor1(n_layers=[1, 3, 13, 20]).to(device)
print("pytorch: ")
mse_loss, perceptual_loss = loss_function1(image1_torch, image2_torch, feature_extractor)
print(f"{mse_loss} {perceptual_loss} {mse_loss + perceptual_loss}")

此外,模型的训练目标是分类的准确性,因此网络中间的特征映射之间的差异结果是有意义的。

相关内容

  • 没有找到相关文章

最新更新