我们在一个学术课程中运行的代码有问题,我很感激从论坛上的人那里得到一些帮助。我们正在训练一个CNN模型将脑电图睡眠记录分类为男性或女性。这是一些论文中做过的但我们在课程中使用不同的数据集。问题是,无论我们做什么,网络都不会学习——我们尝试改变层数、每层的大小、学习率、epoch数、批处理大小、优化器以及添加数据。我们还尝试使用带有GRU(门控循环单元)的RNN来代替CNN,但它没有帮助。下面是一些网络不学习的例子:
示例1示例2
注意"test"数据集实际上是验证。
我们在代码的机器学习部分找不到任何问题。我们认为问题可能出在数据处理部分,但我们不确定,所以我们希望有人检查他/她是否能找到机器学习部分的问题。在我说之前我们有200个8小时的睡眠脑电图记录来自200个病人。这是100赫兹的录音。每个样品的形状为4X2X1000(4批X2个EEG通道每次记录X1000个电压值代表10秒的记录)。
这是机器学习部分(我贴了很多代码,以防有人说需要更多的代码…):
import _pickle
import contextlib
import io
import torch
import os
import numpy as np
from aux_eegproj_funcs_simplified import *
from torch.utils.data import Dataset, DataLoader, SubsetRandomSampler
import torch
import torch.nn as nn
from sklearn.metrics import accuracy_score, f1_score, ConfusionMatrixDisplay
import pandas as pd
import matplotlib.pyplot as plt
from tqdm import tqdm
import torchviz
import mne
# DATASET
table_name = 'sleep_1_10sec.csv' # name of the table from which we take the beginning and end time of each sample (subrecording)
eeg_ds = EEGDataset(EEGTransform, exp_table=build_experiment_tbl(table_name), filtered=0) # EEGDataSet and EEGTransform are a class
batch_sz = 4 # batch size
# splitting the data to train, validation and test
dl_train, dl_val, dl_test = tt_split_by_pid_mf(dataset=eeg_ds, batch_size=batch_sz, train_rt=.8, num_workers=0, verbose=1)
nF = sum(eeg_ds.table['sex (F=1)'][dl_train.sampler.indices] == 1) # number of females
nM = sum(eeg_ds.table['sex (F=1)'][dl_train.sampler.indices] == 2) # number of males
wF = nM / (nF + nM) # females percentage
wM = nF / (nF + nM) # males percentage
模型:
class eegSexNet(nn.Module): # Define a network to classify Sex
def __init__(self, input_shape):
"""
:param input_shape: input tensor shape - every batch size will be ok as it is used to compute the FCs input size.
"""
super().__init__()
# Define the CNN layers in a nn.Sequential.
# Remember to use the number of input channels as the first layer input shape.
self.CNN = nn.Sequential(
nn.Conv1d(in_channels=input_shape[1], out_channels=8, kernel_size=5, stride=1, padding=0, dilation=2),
# TODO try changing the kernel sizes they were 3
nn.ReLU(),
nn.Conv1d(in_channels=8, out_channels=16, kernel_size=5, stride=1, padding=0, dilation=2),
nn.ReLU(),
nn.Conv1d(in_channels=16, out_channels=32, kernel_size=5, stride=1, padding=0, dilation=2),
nn.ReLU(),
Residual(in_channels=32)
)
# Compute the CNN output size here to use as the input size for the fully-connected part.
CNN_forward = self.CNN(torch.zeros(input_shape))
self.FCs = nn.Sequential(
nn.Linear(CNN_forward.shape[1] * CNN_forward.shape[2], 10),
nn.ReLU(),
nn.Linear(10, 1),
nn.Sigmoid()
)
def forward(self, x):
# ------Your code------#
# Forward through the CNN by passing x, flatten and then forward through the linears.
features = self.CNN(x)
features = features.view(features.size(0), -1) # reshape/flatten
scores = self.FCs(features)
# ------^^^^^^^^^------#
return torch.squeeze(scores)
模型类中使用的剩余块:
class Residual(nn.Module):
def __init__(self, in_channels):
super().__init__()
# Define self.direct_path by adding the layers into a nn.Sequential. Use nn.Conv1d and nn.Relu.
# You can use padding to avoid reducing L size, to allow the skip-connection adding.
self.direct_path = nn.Sequential(
nn.Conv1d(in_channels=in_channels, out_channels=16, kernel_size=7, padding=3),
nn.ReLU(),
nn.Conv1d(in_channels=16, out_channels=32, kernel_size=7, padding=3)
)
# You should use convolution layer with a kernel size of 1 to consider the case where the input and output shapes mismatch.
skip_layers = []
if in_channels != 32: # HOW DOES THIS PART WORK? When are you adding the layers
skip_layers.append(
nn.Conv1d(in_channels=in_channels, out_channels=32, kernel_size=1, stride=1, padding=0, dilation=1,
bias=False)
)
else:
self.skip_path = nn.Sequential(*skip_layers)
def forward(self, x):
# Compute the two paths and add the results to each other, then use ReLU (torch.relu) to activate the output.
direct_output = self.direct_path(x)
skip_output = self.skip_path(x)
activated_output = torch.relu(direct_output + skip_output)
return activated_output
训练循环:
def Train_Sex_Net(epochs=n_epochs, fn='None', optimizer=opt_sex, loss_function=bce): # Training SEXNET
global sex_net
gpu_0 = torch.device(1)
label = 0 # select the sex label
train_loss_vec = []
test_loss_vec = []
train_acc_vec = []
test_acc_vec = []
for i_epoch in range(epochs):
train_loss = 0
test_loss = 0
# Train set
train_loss, y_true_train, y_pred_train = forward_epoch(sex_net, dl_train, loss_function, optimizer,
wM, train_loss,
to_train=True, desc='Train', device=gpu_0, label=label)
# Test set
test_loss, y_true_test, y_pred_test = forward_epoch(sex_net, dl_test, loss_function, optimizer, wM, test_loss,
to_train=False, desc='Test', device=gpu_0, label=label)
# Metrics:
train_loss = train_loss / len(dl_train) # we want to get the mean over batches.
test_loss = test_loss / len(dl_test)
train_loss_vec.append(train_loss)
test_loss_vec.append(test_loss)
train_accuracy = accuracy_score(y_true_train.cpu(),
(y_pred_train.cpu().detach() > 0.5) * 1)
test_accuracy = accuracy_score(y_true_test.cpu(),
(y_pred_test.cpu().detach() > 0.5) * 1)
train_acc_vec.append(train_accuracy)
test_acc_vec.append(test_accuracy)
print('n')
print(f'train_loss={round(train_loss, 3)}; train_accuracy={round(train_accuracy, 3)}
test_loss={round(test_loss, 3)}; test_accuracy={round(test_accuracy, 3)}')
return (train_loss_vec, train_acc_vec), (test_loss_vec, test_acc_vec) # (val_loss_vec, val_acc_vec)
被训练循环调用:
def forward_epoch(model, dl, loss_function, optimizer, weight, total_loss=0,
to_train=False, desc=None, device=torch.device('cpu'), label=0): # Training loop
# label =0 is for sex
# label = 1 is for Age
# total loss is over the entire epoch
# y_trues is by patient for the entire epoch; can get last batch with [-batch_size]
# y_preds is by patient for the entire epoch
#
with tqdm(total=len(dl), desc=desc, ncols=100) as pbar:
model = model.double().to(device) # solving runtime memory issue
y_trues = torch.empty(0).type(torch.int).to(device)
y_preds = torch.empty(0).type(torch.int).to(device)
for i_batch, (X, y) in enumerate(dl):
X = X.to(device)
X = X.type(torch.double)
y = y[label].to(device) # added index because of get label returning sex, age
y_pred = model(X) # Forward
y_true = y.type(torch.double) # Loss:
y_true_copy = torch.clone(y_true)
loss = loss_function(y_pred, y_true) # loss of one batch
total_loss += loss.item()
y_trues = torch.cat((y_trues, y_true))
y_preds = torch.cat((y_preds, y_pred))
if to_train:
optimizer.zero_grad() # Backward:zero the gradients to not accumulate their changes.
loss.backward() # get gradients
optimizer.step() # Optimization step: use gradients
pbar.update(1) # Progress bar
return total_loss, y_trues, y_preds
调用所有函数:
sex_net = eegSexNet(torch.Size([4, 2, 1000])) # Instantiate the network
learning_rate = 0.0001
opt_sex = torch.optim.Adam(params=sex_net.parameters(), lr=learning_rate) # Optimizer for eegnet
bce = nn.BCELoss()
n_epochs = 6
f0 = 'sex_k7' # file format '.pickle' added automatically
train_res, test_res = Train_Sex_Net(epochs=n_epochs, fn=f0)
有没有人看到任何可能是错误的代码?或者问题出在数据处理和选择我没有显示的部件上?
我检查了代码。在使用CNN创建模型的类中,添加softmax层可能会帮助您代替sigmoid函数
tf.nn.softmax(
logits, axis=None, name=None
)
softmax函数的主要目的是将全连接层的K个单元(例如表示为K个元素的向量)的(非规范化)输出转换为概率分布(规范化输出)。
此外,您还要求剩余类中的一部分编码工作:
if in_channels != 32: # HOW DOES THIS PART WORK? When are you adding the layers
skip_layers.append(
nn.Conv1d(in_channels=in_channels, out_channels=32, kernel_size=1, stride=1, padding=0, dilation=1,
bias=False)
如果In_channel不等于32,则该通道被称为跳过和卷积,因为在您创建的模型中,您试图仅查找大小为32的In_channel的残差
nn.ReLU(),
Residual(in_channels=32)