卷积自动编码器不会训练数据



我正在尝试创建一个能够处理地震波形的卷积自动编码器。我遇到的问题是,我的模型似乎没有从数据中学习到任何东西,当我通过将一个波形与相同的重建版本进行比较来测试模型时,我得到了一条直线。它应该输出与原始波形相似的波形:

original_vs_reconstructed_image(蓝色为原始,橙色为重建(

此外,在我的历元图上,验证和测试历元根本不会随着时间的推移而改变。

老实说,我不确定调试的重点在哪里,但我怀疑问题出在自动编码器本身、训练函数的结构方式或我使用的损失函数(MSE(上。

我也是机器学习的新手,所以我可能错过了一些明显的东西,或者做了一些完全错误的事情。

以下是与此问题相关的代码:

import glob
import numpy as np
import obspy as obs
import sklearn.model_selection
import torch
import torch.nn as nn
import torch.nn.functional
from torch.utils.data.sampler import SubsetRandomSampler
from sklearn.cluster import KMeans
import matplotlib.pyplot as plt
import random
import sys
files = glob.glob('/loggerhead/coke/wf_Tony/trim/15_62.5/1108/DH1' + '/*.mseed')
#  empty list to store the properly read waveforms
waves = []
#  read all the files
for f in files:
temp_wave = obs.read(f)
A = temp_wave[0].data
# normalization
B = A/np.max(np.abs(A))
# ensures every wave is size 3126
waves.append(np.pad(B, (0, 3126 - B.size), 'constant'))
wave_arr = np.vstack(waves)
train_arr, test_arr = sklearn.model_selection.train_test_split(wave_arr, train_size=0.95)
train_torch = torch.tensor(train_arr, requires_grad=True).clone()
test_torch = torch.tensor(test_arr, requires_grad=True).clone()
train_waves = train_torch.unsqueeze_(1)
test_waves = test_torch.unsqueeze_(1)
k = 7
p = k//2

class AutoEncoder(nn.Module):
def __init__(self):
#  make sure to always initialize the super class when using outside methods
super().__init__()
self.encoder = nn.Sequential(
nn.Conv1d(1, 64, kernel_size=k, padding=p), nn.LeakyReLU(),
nn.Conv1d(64, 64, kernel_size=k, padding=p), nn.LeakyReLU(), nn.MaxPool1d(kernel_size=2, stride=2),
nn.Conv1d(64, 128, kernel_size=k, padding=p), nn.LeakyReLU(), nn.Conv1d(128, 128, kernel_size=k, padding=p),
nn.LeakyReLU(), nn.MaxPool1d(kernel_size=2, stride=2),
nn.Conv1d(128, 256, kernel_size=k, padding=p), nn.LeakyReLU(), nn.Conv1d(256, 256, kernel_size=k, padding=p),
nn.LeakyReLU(), nn.MaxPool1d(kernel_size=2, stride=2),
nn.Conv1d(256, 512, kernel_size=k, padding=p), nn.LeakyReLU(), nn.Conv1d(512, 512, kernel_size=k, padding=p),
nn.LeakyReLU(), nn.MaxPool1d(kernel_size=2, stride=2),
nn.Conv1d(512, 1024, kernel_size=k, padding=p), nn.LeakyReLU(), nn.Conv1d(1024, 1024, kernel_size=k, padding=p),
nn.LeakyReLU()
)
self.decoder = nn.Sequential(
nn.ConvTranspose1d(1024, 512, kernel_size=2, stride=2), nn.LeakyReLU(),
nn.ConvTranspose1d(512, 256, kernel_size=2, stride=2), nn.LeakyReLU(),
nn.ConvTranspose1d(256, 128, kernel_size=2, stride=2), nn.LeakyReLU(),
nn.ConvTranspose1d(128, 64, kernel_size=2, stride=2), nn.LeakyReLU(),
nn.Conv1d(64, 1, kernel_size=1, padding=p), nn.Tanh()
)  
def forward(self, x):
x = self.encoder(x)
x = self.decoder(x)
return x
model = AutoEncoder()
loss_function_MSE = nn.MSELoss()
optimizer = torch.optim.Adam(model.parameters(), lr=1e-3, weight_decay=1e-5)
# Check if the GPU is available
device = torch.device("cuda") if torch.cuda.is_available() else torch.device("cpu")
print(f'Selected device: {device}')
model.to(device)
# Training function
def train_epoch(model, device, loss_fn, optimizer):
# Set train mode for both the encoder and the decoder
model.train()
train_loss = []
train_tester = train_waves.clone().detach()
# shuffle the training dataset
train_tester = train_tester[torch.randperm(train_tester.size()[0])]
for wave in train_tester:
wave = wave.to(device)
output_thing = model(wave)
loss = loss_fn(output_thing, wave)
optimizer.zero_grad()
loss.backward()
optimizer.step()
#   Print batch loss
print('t partial train loss (single batch): %f' % (loss.data))
train_loss.append(loss.detach().cpu().numpy())
return np.mean(train_loss)
# Testing function
def test_epoch(model, device, loss_fn):
# Set evaluation mode for model
model.eval()
with torch.no_grad(): # No need to track the gradients
# Define the lists to store the outputs for each batch
conc_out = []
conc_label = []
for wave in test_waves:
# Move tensor to the proper device
wave = wave.to(device)
# model data
output_thing = model(wave)
# Append the network output and the original image to the lists
conc_out.append(output_thing.cpu())
conc_label.append(wave.cpu())
# Create a single tensor with all the values in the lists
conc_out = torch.cat(conc_out)
conc_label = torch.cat(conc_label)
# Evaluate global loss
val_loss = loss_fn(conc_out, conc_label)
return val_loss.data
def plot_outputs(model):
rand_num = random.randint(0, 4000)
reconstructed = wave_torch_best[rand_num].to(device)
reconstructed = model(reconstructed)
new_numpy = reconstructed.detach().cpu().numpy()
og = wave_torch_best[rand_num].detach().cpu().numpy()
plt.plot(og[0, :])
plt.plot(new_numpy[0, :])
plt.savefig('/loggerhead/lwrigh89/Plots/Comparing Plots/reconstructed.png')
num_epochs = 4
diz_loss = {'train_loss':[],'val_loss':[]}
for epoch in range(num_epochs):
train_loss = train_epoch(model, device, loss_function_MSE, optimizer)
val_loss = test_epoch(model, device, loss_function_MSE)
print('n EPOCH {}/{} t train loss {} t val loss {}'.format(epoch + 1, num_epochs, train_loss, val_loss))
diz_loss['train_loss'].append(train_loss)
diz_loss['val_loss'].append(val_loss)
if epoch == num_epochs + 1:
torch.save(model.state_dict(), '/loggerhead/lwrigh89/Model/newmodel.pt')
# plot og vs reconstructed
plot_outputs(model)
plt.figure(figsize=(10, 8))
plt.semilogy(diz_loss['train_loss'], label='Train')
plt.semilogy(diz_loss['val_loss'], label='Valid')
plt.xlabel('Epoch')
plt.ylabel('Average Loss')
plt.legend()
plt.savefig('/loggerhead/lwrigh89/Plots/Epochs/epochgraph.png')
# exit program
sys.exit()

我使用的是CUDA版本11.4、Python版本3.10.5和PyTorch版本1.12.0的GPU服务器。

如有任何帮助/指导,我将不胜感激。

我尝试了您的代码(做了一些更改(,它似乎运行得很好。我从你的帖子中注意到了几件事;

(1( 恐怕CCD_ 1总是";不满意";。因为如果您写for epoch in range(num_epochs):,这意味着循环中epoch的最大值将是num_epochs - 1。因此,您的输出'.png可能没有更新。请检查png文件的保存时间。

(2( 我试过你的密码了。结果如下。我使用随机数据来训练模型,所以这并不意味着";精度高";或者不是,但至少,它不是";所有输出均为零"我在下面附上了代码。在代码中,我添加了注释### changed,其中我更改了您的原始代码。

import glob
import numpy as np
import obspy as obs
import sklearn.model_selection
import torch
import torch.nn as nn
import torch.nn.functional
from torch.utils.data.sampler import SubsetRandomSampler
from sklearn.cluster import KMeans
import matplotlib.pyplot as plt
import random
import sys
# files = glob.glob('/loggerhead/coke/wf_Tony/trim/15_62.5/1108/DH1' + '/*.mseed')  ### changed
#  empty list to store the properly read waveforms
waves = []
#  read all the files
for f in range(4):#files:  ### changed
#temp_wave = obs.read(f)  ### changed
#A = temp_wave[0].data  ### changed
A = np.random.random([1, 3126])  ### changed
# normalization
B = A/np.max(np.abs(A))
# ensures every wave is size 3126
waves.append(np.pad(B, (0, 3126 - B.size), 'constant'))
wave_arr = np.vstack(waves)
train_arr, test_arr = sklearn.model_selection.train_test_split(wave_arr, train_size=0.95)
train_torch = torch.tensor(train_arr, requires_grad=True).clone().float()
test_torch = torch.tensor(test_arr, requires_grad=True).clone().float()
train_waves = train_torch.unsqueeze_(1)
test_waves = test_torch.unsqueeze_(1)
k = 7
p = k//2

class AutoEncoder(nn.Module):
def __init__(self):
#  make sure to always initialize the super class when using outside methods
super().__init__()
self.encoder = nn.Sequential(
nn.Conv1d(1, 64, kernel_size=k, padding=p), nn.LeakyReLU(),
nn.Conv1d(64, 64, kernel_size=k, padding=p), nn.LeakyReLU(), nn.MaxPool1d(kernel_size=2, stride=2),
nn.Conv1d(64, 128, kernel_size=k, padding=p), nn.LeakyReLU(), nn.Conv1d(128, 128, kernel_size=k, padding=p),
nn.LeakyReLU(), nn.MaxPool1d(kernel_size=2, stride=2),
nn.Conv1d(128, 256, kernel_size=k, padding=p), nn.LeakyReLU(), nn.Conv1d(256, 256, kernel_size=k, padding=p),
nn.LeakyReLU(), nn.MaxPool1d(kernel_size=2, stride=2),
nn.Conv1d(256, 512, kernel_size=k, padding=p), nn.LeakyReLU(), nn.Conv1d(512, 512, kernel_size=k, padding=p),
nn.LeakyReLU(), nn.MaxPool1d(kernel_size=2, stride=2),
nn.Conv1d(512, 1024, kernel_size=k, padding=p), nn.LeakyReLU(), nn.Conv1d(1024, 1024, kernel_size=k, padding=p),
nn.LeakyReLU()
)
self.decoder = nn.Sequential(
nn.ConvTranspose1d(1024, 512, kernel_size=2, stride=2), nn.LeakyReLU(),
nn.ConvTranspose1d(512, 256, kernel_size=2, stride=2), nn.LeakyReLU(),
nn.ConvTranspose1d(256, 128, kernel_size=2, stride=2), nn.LeakyReLU(),
nn.ConvTranspose1d(128, 64, kernel_size=2, stride=2), nn.LeakyReLU(),
nn.Conv1d(64, 1, kernel_size=1, padding=p), nn.Tanh()
)  
def forward(self, x):
x = self.encoder(x)
x = self.decoder(x)
return x
model = AutoEncoder()
loss_function_MSE = nn.MSELoss()
optimizer = torch.optim.Adam(model.parameters(), lr=1e-3, weight_decay=1e-5)
# Check if the GPU is available
device = torch.device("cuda") if torch.cuda.is_available() else torch.device("cpu")
print(f'Selected device: {device}')
model.to(device)
# Training function
def train_epoch(model, device, loss_fn, optimizer):
# Set train mode for both the encoder and the decoder
model.train()
train_loss = []
train_tester = train_waves.clone().detach()
# shuffle the training dataset
train_tester = train_tester[torch.randperm(train_tester.size()[0])]
for wave in train_tester:
wave = wave.to(device)
output_thing = model(wave)
loss = loss_fn(output_thing, wave)
optimizer.zero_grad()
loss.backward()
optimizer.step()
#   Print batch loss
print('t partial train loss (single batch): %f' % (loss.data))
train_loss.append(loss.detach().cpu().numpy())
return np.mean(train_loss)
# Testing function
def test_epoch(model, device, loss_fn):
# Set evaluation mode for model
model.eval()
with torch.no_grad(): # No need to track the gradients
# Define the lists to store the outputs for each batch
conc_out = []
conc_label = []
for wave in test_waves:
# Move tensor to the proper device
wave = wave.to(device)
# model data
output_thing = model(wave)
# Append the network output and the original image to the lists
conc_out.append(output_thing.cpu())
conc_label.append(wave.cpu())
# Create a single tensor with all the values in the lists
conc_out = torch.cat(conc_out)
conc_label = torch.cat(conc_label)
# Evaluate global loss
val_loss = loss_fn(conc_out, conc_label)
return val_loss.data
def plot_outputs(model):
rand_num = 0#random.randint(0, 4000)  ### changed
wave_torch_best = torch.from_numpy(B).float().unsqueeze_(0)  ### changed
reconstructed = wave_torch_best[rand_num].to(device)
reconstructed = model(reconstructed)
new_numpy = reconstructed.detach().cpu().numpy()
og = wave_torch_best[rand_num].detach().cpu().numpy()
plt.plot(og[0, :])
plt.plot(new_numpy[0, :])
plt.savefig('reconstructed.png')
num_epochs = 100  ### changed
diz_loss = {'train_loss':[],'val_loss':[]}
if(True):
for epoch in range(num_epochs):
train_loss = train_epoch(model, device, loss_function_MSE, optimizer)
val_loss = test_epoch(model, device, loss_function_MSE)
print('n EPOCH {}/{} t train loss {} t val loss {}'.format(epoch + 1, num_epochs, train_loss, val_loss))
diz_loss['train_loss'].append(train_loss)
diz_loss['val_loss'].append(val_loss)
if epoch == num_epochs - 1:  ### changed
print('********')
torch.save(model.state_dict(), 'newmodel.pt')
# plot og vs reconstructed
plot_outputs(model)
plt.figure(figsize=(10, 8))
plt.semilogy(diz_loss['train_loss'], label='Train')
plt.semilogy(diz_loss['val_loss'], label='Valid')
plt.xlabel('Epoch')
plt.ylabel('Average Loss')
plt.legend()
plt.savefig('epochgraph.png')
# exit program
#sys.exit()  ### changed

最新更新