无法提高Alexnet在Oxford-102(TensorFlow)上的准确性



嗨,我尝试使用预验证的权重实现Alexnet 。我试图在牛津-102 数据集上训练网络,但是我在整个过程中一直保持0.9%的准确性,并且更改参数没有帮助,低于代码,有人可以帮助我吗?

我正在关注此教程

我切换了给定的测试集(更大(以用作训练集,并给予用于测试集的培训。我使用梯度下降作为优化器。

我构建了与给定文章所做的几乎相同的Alexnet,我计算准确性的方式可能是错误的?

下面是我加载数据

的方式
import os
import sys
import warnings
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
%matplotlib inline
from skimage.io import imread
from skimage.transform import resize
from scipy.io import loadmat
import tensorflow as tf
warnings.filterwarnings('ignore', category=UserWarning, module='skimage')
set_ids = loadmat('setid.mat')
set_ids
test_ids = set_ids['trnid'].tolist()[0]
train_ids = set_ids['tstid'].tolist()[0]
def indexes_processing(int_list):
    returned_list = []
    for index, element in enumerate(int_list):
        returned_list.append(str(element))
    for index, element in enumerate(returned_list):
        if int(element) < 10:
            returned_list[index] = '0000' + element
        elif int(element) < 100:
            returned_list[index] = '000' + element
        elif int(element) < 1000:
            returned_list[index] = '00' + element
        else:
            returned_list[index] = '0' + element
    return returned_list
raw_train_ids = indexes_processing(train_ids)
raw_test_ids = indexes_processing(test_ids)
train_images = []
test_images = []
train_labels = []
test_labels = []
image_labels = (loadmat('imagelabels.mat')['labels'] - 1).tolist()[0]
labels = ['pink primrose', 'hard-leaved pocket orchid', 'canterbury bells', 'sweet pea', 'english marigold', 'tiger lily', 'moon orchid', 'bird of paradise', 'monkshood', 'globe thistle', 'snapdragon', "colt's foot", 'king protea', 'spear thistle', 'yellow iris', 'globe-flower', 'purple coneflower', 'peruvian lily', 'balloon flower', 'giant white arum lily', 'fire lily', 'pincushion flower', 'fritillary', 'red ginger', 'grape hyacinth', 'corn poppy', 'prince of wales feathers', 'stemless gentian', 'artichoke', 'sweet william', 'carnation', 'garden phlox', 'love in the mist', 'mexican aster', 'alpine sea holly', 'ruby-lipped cattleya', 'cape flower', 'great masterwort', 'siam tulip', 'lenten rose', 'barbeton daisy', 'daffodil', 'sword lily', 'poinsettia', 'bolero deep blue', 'wallflower', 'marigold', 'buttercup', 'oxeye daisy', 'common dandelion', 'petunia', 'wild pansy', 'primula', 'sunflower', 'pelargonium', 'bishop of llandaff', 'gaura', 'geranium', 'orange dahlia', 'pink-yellow dahlia?', 'cautleya spicata', 'japanese anemone', 'black-eyed susan', 'silverbush', 'californian poppy', 'osteospermum', 'spring crocus', 'bearded iris', 'windflower', 'tree poppy', 'gazania', 'azalea', 'water lily', 'rose', 'thorn apple', 'morning glory', 'passion flower', 'lotus', 'toad lily', 'anthurium', 'frangipani', 'clematis', 'hibiscus', 'columbine', 'desert-rose', 'tree mallow', 'magnolia', 'cyclamen ', 'watercress', 'canna lily', 'hippeastrum ', 'bee balm', 'ball moss', 'foxglove', 'bougainvillea', 'camellia', 'mallow', 'mexican petunia', 'bromelia', 'blanket flower', 'trumpet creeper', 'blackberry lily']
labels[16]
def one_hot_encode(labels):
    '''
    One hot encode the output labels to be numpy arrays of 0s and 1s
    '''
    out = np.zeros((len(labels), 102))
    for index, element in enumerate(labels):
        out[index, element] = 1
    return out
class ProcessImage():
    def __init__(self):           
        self.i = 0
        self.training_images = np.zeros((6149, 227, 227, 3))
        self.training_labels = None
        self.testing_images = np.zeros((1020, 227, 227, 3))
        self.testing_labels = None
    def set_up_images(self):
        print('Processing Training Images...')
        i = 0
        for element in raw_train_ids:
            img = imread('jpg/image_{}.jpg'.format(element))
            img = resize(img, (227, 227))
            self.training_images[i] = img
            i += 1
        print('Done!')
        i = 0
        print('Processing Testing Images...')
        for element in raw_test_ids:
            img = imread('jpg/image_{}.jpg'.format(element))
            img = resize(img, (227, 227))
            self.testing_images[i] = img
            i += 1
        print('Done!')
        print('Processing Training and Testing Labels...')
        encoded_labels = one_hot_encode(image_labels)
        for train_id in train_ids:
            train_labels.append(encoded_labels[train_id - 1])
        for test_id in test_ids:
            test_labels.append(encoded_labels[test_id - 1])
        self.training_labels = train_labels
        self.testing_labels = test_labels
        print('Done!')
    def next_batch(self, batch_size):
        x = self.training_images[self.i:self.i + batch_size]
        y = self.training_labels[self.i:self.i + batch_size]
        self.i = (self.i + batch_size) % len(self.training_images)
        return x, y
image_processor = ProcessImage()
image_processor.set_up_images()

我的图形

# Helper Functions for AlexNet
def init_weights(filter_height, filter_width, num_channels, num_filters):
    init_random_dist = tf.truncated_normal([filter_height, filter_width, num_channels, num_filters], stddev=0.1)
    return tf.Variable(init_random_dist)
def init_bias(shape):
    init_bias_vals = tf.constant(0.1, shape=shape)
    return tf.Variable(init_bias_vals)
def conv2d(x, W, stride_y, stride_x, padding='SAME'):
    return tf.nn.conv2d(x, W, strides=[1,stride_y,stride_x,1], padding=padding)
def max_pool(x, filter_height, filter_width, stride_y, stride_x, padding='SAME'):
    return tf.nn.max_pool(x, ksize=[1,filter_height,filter_width,1], strides=[1,stride_y,stride_x,1], padding=padding)
def conv_layer(input_x, filter_height, filter_width, num_channels, num_filters, stride_y, stride_x, padding='SAME', groups=1):
    W = init_weights(filter_height, filter_width, int(num_channels/groups), num_filters)
    b = init_bias([num_filters])
    convolve = lambda i, k: tf.nn.conv2d(i, k, strides=[1,stride_y,stride_x,1], padding=padding)
    if groups == 1:
        conv = convolve(input_x, W)
    else:
        input_groups = tf.split(axis=3, num_or_size_splits=groups, value=input_x)
        weight_groups = tf.split(axis=3, num_or_size_splits=groups, value=W)
        output_groups = [convolve(i, k) for i, k in zip(input_groups, weight_groups)]
        conv = tf.concat(axis=3, values=output_groups)
    bias = tf.reshape(tf.nn.bias_add(conv, b), tf.shape(conv))
    return tf.nn.relu(bias)
def lrn(x, radius, alpha, beta, bias=1.0):
    return tf.nn.local_response_normalization(x, depth_radius=radius, alpha=alpha, beta=beta, bias=bias)
def fully_connected(input_layer, num_in, num_out, relu=True):
    W = tf.truncated_normal([num_in, num_out], stddev=0.1)
    W = tf.Variable(W)
    b = init_bias([num_out])
    out = tf.nn.xw_plus_b(input_layer, W, b)
    if relu:
        return tf.nn.relu(out)
    else:
        return out
def drop_out(x, keep_prob):
    return tf.nn.dropout(x, keep_prob=keep_prob)
x = tf.placeholder(tf.float32, shape=[None, 227, 227, 3])
y_true = tf.placeholder(tf.float32, shape=[None, 102])
keep_prob = tf.placeholder(tf.float32)
# Create the graph
# 1st Layer: Conv (w ReLu) -> Lrn -> Pool
conv_1 = conv_layer(x, filter_height=11, filter_width=11, num_channels=3, num_filters=96, stride_y=4, stride_x=4, padding='VALID')
norm_1 = lrn(conv_1, radius=2, alpha=1e-05, beta=0.75)
pool_1 = max_pool(norm_1, filter_height=3, filter_width=3, stride_y=2, stride_x=2, padding='VALID')
pool_1.get_shape()
# 2nd Layer: Conv (w ReLu) -> Lrn -> Pool
conv_2 = conv_layer(pool_1, filter_height=5, filter_width=5, num_channels=96, num_filters=256, stride_y=1, stride_x=1, groups=2)
norm_2 = lrn(conv_2, radius=2, alpha=1e-05, beta=0.75)
pool_2 = max_pool(norm_2, filter_height=3, filter_width=3, stride_y=2, stride_x=2, padding='VALID')
# 3rd Layer: Conv (w ReLu)
conv_3 = conv_layer(pool_2, filter_height=3, filter_width=3, num_channels=256, num_filters=384, stride_y=1, stride_x=1)
# 4th Layer: Conv (w ReLu)
conv_4 = conv_layer(conv_3, filter_height=3, filter_width=3, num_channels=384, num_filters=384, stride_y=1, stride_x=1, groups=2)
# 5th Layer: Conv (w ReLu) -> Pool
conv_5 = conv_layer(conv_4, filter_height=3, filter_width=3, num_channels=384, num_filters=256, stride_y=1, stride_x=1, groups=2)
pool_5 = max_pool(conv_5, filter_height=3, filter_width=3, stride_y=2, stride_x=2, padding='VALID')
# 6th Layer: Flatten -> FC (w ReLu) -> Dropout
pool_6_flat = tf.reshape(pool_5, [-1, 6*6*256])
full_6 = fully_connected(pool_6_flat, 6*6*256, 4096)
full_6_dropout = drop_out(full_6, keep_prob)
# 7th Layer: FC (w ReLu) -> Dropout
full_7 = fully_connected(full_6_dropout, 4096, 4096)
full_7_dropout = drop_out(full_7, keep_prob)
# 8th Layer: FC and return unscaled activations
y_pred = fully_connected(full_7_dropout, 4096, 102, relu=False)

损失功能和优化器

cross_entropy = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(labels=y_true,logits=y_pred))
optimizer = tf.train.GradientDescentOptimizer(learning_rate=0.01)
train = optimizer.minimize(cross_entropy)
init = tf.global_variables_initializer()
saver = tf.train.Saver()

运行会话

with tf.Session() as sess:
    sess.run(init)
    for i in range(15000):
        batches = image_processor.next_batch(128)
        sess.run(train, feed_dict={x:batches[0], y_true:batches[1], keep_prob:0.5})
        if (i%1000 == 0):
            print('On Step {}'.format(i))
            print('Accuracy is: ')
            matches = tf.equal(tf.argmax(y_pred, 1), tf.argmax(y_true, 1))
            acc = tf.reduce_mean(tf.cast(matches, tf.float32))
            print(sess.run(acc, feed_dict={x:image_processor.testing_images, y_true:image_processor.testing_labels, keep_prob:1.0}))
            print('Saving model...')
            saver.save(sess, 'models/model_iter.ckpt', global_step=i)
            print('Saved at step: {}'.format(i))
            print('n')
    print('Saving final model...')
    saver.save(sess, 'models/model_final.ckpt')
    print('Saved')

我不断获得0.00903922的准确性,一遍又一遍地(在整个15000个时期(>不管我有多努力地更改参数,我什至尝试过,我什至尝试过将图像的大小从224更改为227,但它仍然给了我相同的精度为0.00903922。

您的准确性对我来说很好,尽管每次在循环中定义有些奇怪。

让我困扰的是您只训练十个步骤。看来您的训练集由6149张图像组成,您正在批量训练128张图像。这样做十次,您已经查看了6000张图像中的1280个 - 太少了,无法在准确性中看到效果。

取而代。当时的确切数量取决于数据和网络等多个因素,但您至少应该采用10个时代 - 因此,这是480个训练步骤。

最新更新