axis1:轴0超出了维度为0的数组的边界-试图通过使用多处理来预测

这个问题是这个问题的延续，但是使用了tensorflow数据集。

所以，如果我们使用:

import tensorflow as tf
import numpy as np
from multiprocessing import Pool
from keras.datasets import fashion_mnist
from tensorflow.keras.models import Sequential
 
# importing various types of hidden layers
from tensorflow.keras.layers import Conv2D, MaxPooling2D,
Dense, Flatten
 
# Adam optimizer for better LR and less loss
from tensorflow.keras.optimizers import Adam
import matplotlib.pyplot as plt
import numpy as np
# gpu setup
gpus = tf.config.experimental.list_physical_devices('GPU')
for gpu in gpus:
    tf.config.experimental.set_memory_growth(gpu, True)
    
    
def model_arch():
    models = Sequential()
 
    # We are learning 64
    # filters with a kernal size of 5x5
    models.add(Conv2D(64, (5, 5),
                      padding="same",
                      activation="relu",
                      input_shape=(28, 28, 1)))
 
    # Max pooling will reduce the
    # size with a kernal size of 2x2
    models.add(MaxPooling2D(pool_size=(2, 2)))
    models.add(Conv2D(128, (5, 5), padding="same",
                      activation="relu"))
 
    models.add(MaxPooling2D(pool_size=(2, 2)))
    models.add(Conv2D(256, (5, 5), padding="same",
                      activation="relu"))
 
    models.add(MaxPooling2D(pool_size=(2, 2)))
 
    # Once the convolutional and pooling
    # operations are done the layer
    # is flattened and fully connected layers
    # are added
    models.add(Flatten())
    models.add(Dense(256, activation="relu"))
 
    # Finally as there are total 10
    # classes to be added a FCC layer of
    # 10 is created with a softmax activation
    # function
    models.add(Dense(10, activation="softmax"))
    return models
def _apply_df(data):
    model = model_arch()
    model.load_weights("/home/ggous/model_mnist.h5")
    return model.predict(data)
def apply_by_multiprocessing(data, workers):
    pool = Pool(processes=workers)
    result = pool.map(_apply_df, np.array_split(data, workers))
    pool.close()
    return list(result)
def resize_and_rescale(data):
    data = tf.cast(data, tf.float32)
    data /= 255.0
    return data
    
def prepare(ds):
    ds = ds.map(resize_and_rescale)
    return ds.batch(1)
def after_prepare(data):
    tens_data = tf.data.Dataset.from_tensor_slices(data)
    tens_data = prepare(tens_data)
    return tens_data
def main():
    fashion_mnist = tf.keras.datasets.fashion_mnist
    _, (test_images, test_labels) = fashion_mnist.load_data()
    test_images = after_prepare(test_images)
    results = apply_by_multiprocessing(test_images, workers=3)
    print(test_images.shape)           # (10000, 28, 28)
    print(len(results))                # 3
    print([x.shape for x in results])  # [(3334, 10), (3333, 10), (3333, 10)]

if __name__ == "__main__":
    main()

我们得到一个错误:

axis1: axis 0 is out of bounds for array of dimension 0

我刚刚添加了:

def resize_and_rescale(data):
    data = tf.cast(data, tf.float32)
    data /= 255.0
    return data
def prepare(ds):
    ds = ds.map(resize_and_rescale)
    return ds.batch(1)
def after_prepare(data):
    tens_data = tf.data.Dataset.from_tensor_slices(data)
    tens_data = prepare(tens_data)
    return tens_data

因此，我在after_prepare中创建了tensorflow数据集。

保存的模型可以在这里找到

——UPDATE——

现在，它给我消息:

F tensorflow/stream_executor/cuda/cuda_driver.cc:146] Failed setting context: CUDA_ERROR_NOT_INITIALIZED: initialization error

我看到了这个，所以我试着:

multiprocessing.set_start_method('spawn', force=True)

在代码的开头，现在给我很多消息:

 Start cannot spawn child process: No such file or directory
2022-11-08 09:12:35.984897: I tensorflow/core/platform/default/subprocess.cc:304] Start cannot spawn child process: No such file or directory
2022-11-08 09:12:35.984909: W tensorflow/stream_executor/gpu/asm_compiler.cc:80] Couldn't get ptxas version string: INTERNAL: Couldn't invoke ptxas --version
2022-11-08 09:12:35.985087: I tensorflow/core/platform/default/subprocess.cc:304] Start cannot spawn child process: No such file or directory
2022-11-08 09:12:35.985118: W tensorflow/stream_executor/gpu/redzone_allocator.cc:314] INTERNAL: Failed to launch ptxas
...
failed to allocate 256.00M (268435456 bytes) from device: CUDA_ERROR_OUT_OF_MEMORY: out of memory
2022-11-08 09:12:36.618099: I tensorflow/stream_executor/cuda/cuda_driver.cc:733] failed to allocate 256.00M (268435456 bytes) from device: CUDA_ERROR_OUT_OF_MEMORY: out of memory
2022-11-08 09:12:36.618274: I tensorflow/stream_executor/cuda/cuda_driver.cc:733] failed to allocate 230.40M (241592064 bytes) from device: CUDA_ERROR_OUT_OF_MEMORY: out of memory
2022-11-08 09:12:36.618437: I tensorflow/stream_executor/cuda/cuda_driver.cc:733] failed to allocate 207.36M (217433088 bytes) from device: CUDA_ERROR_OUT_OF_MEMORY: out of memory
2022-11-08 09:12:36.618447: W tensorflow/core/common_runtime/bfc_allocator.cc:360] Garbage collection: deallocate free memory regions (i.e., allocations) so that we can re-allocate a larger region to avoid OOM due to memory fragmentation. If you see this message frequently, you are running near the threshold of the available device memory and re-allocation may incur great performance overhead. You may try smaller batch sizes to observe the performance impact. Set TF_ENABLE_GPU_GARBAGE_COLLECTION=false if you'd like to disable this feature.
2022-11-08 09:12:36.629520: I tensorflow/stream_executor/cuda/cuda_driver.cc:733] failed to allocate 256.00M (268435456 bytes) from device: CUDA_ERROR_OUT_OF_MEMORY: out of memory
2022-11-08 09:12:36.629542: W tensorflow/core/common_runtime/bfc_allocator.cc:290] Allocator (GPU_0_bfc) ran out of memory trying to allocate 203.00MiB with freed_by_count=0. The caller indicates that this is not a failure, but this may mean that there could be performance gains if more memory were available.
2022-11-08 09:12:36.629618: I tensorflow/stream_executor/cuda/cuda_driver.cc:733] failed to allocate 256.00M (268435456 bytes) from device: CUDA_ERROR_OUT_OF_MEMORY: out of memory
2022-11-08 09:12:36.629987: I tensorflow/stream_executor/cuda/cuda_driver.cc:733] failed to allocate 256.00M (268435456 bytes) from device: CUDA_ERROR_OUT_OF_MEMORY: out of memory
2022-11-08 09:12:36.630001: W tensorflow/core/common_runtime/bfc_allocator.cc:290] Allocator (GPU_0_bfc) ran out of memory trying to allocate 203.00MiB with freed_by_count=0. The caller indicates that this is not a failure, but this may mean that there could be performance gains if more memory were available.
2022-11-08 09:12:36.630110: I tensorflow/stream_executor/cuda/cuda_driver.cc:733] failed to allocate 230.40M (241592064 bytes) from device: CUDA_ERROR_OUT_OF_MEMORY: out of memory
....
failed to allocate 256.00M (268435456 bytes) from device: CUDA_ERROR_OUT_OF_MEMORY: out of memory
2022-11-08 09:12:37.256468: I tensorflow/stream_executor/cuda/cuda_driver.cc:733] failed to allocate 256.00M (268435456 bytes) from device: CUDA_ERROR_OUT_OF_MEMORY: out of memory
2022-11-08 09:12:37.256640: I tensorflow/stream_executor/cuda/cuda_driver.cc:733] failed to allocate 256.00M (268435456 bytes) from device: CUDA_ERROR_OUT_OF_MEMORY: out of memory
2022-11-08 09:12:37.256810: I tensorflow/stream_executor/cuda/cuda_driver.cc:733] failed to allocate 256.00M (268435456 bytes) from device: CUDA_ERROR_OUT_OF_MEMORY: out of memory
2022-11-08 09:12:37.256988: I tensorflow/stream_executor/cuda/cuda_driver.cc:733] failed to allocate 256.00M (268435456 bytes) from device: CUDA_ERROR_OUT_OF_MEMORY: out of memory
2022-11-08 09:12:37.257166: I tensorflow/stream_executor/cuda/cuda_driver.cc:733] failed to allocate 256.00M (268435456 bytes) from device: CUDA_ERROR_OUT_OF_MEMORY: out of memory
2022-11-08 09:12:37.257224: W tensorflow/core/framework/op_kernel.cc:1780] OP_REQUIRES failed at conv_ops_fused_impl.h:601 : NOT_FOUND: No algorithm worked!  Error messages:
  Profiling failure on CUDNN engine 1#TC: RESOURCE_EXHAUSTED: Out of memory while trying to allocate 16777216 bytes.
  Profiling failure on CUDNN engine 1: RESOURCE_EXHAUSTED: Out of memory while trying to allocate 16777216 bytes.

问题来自数据准备步骤。初始代码采用(10000, 28, 28)形状的数据，并使用np.array_split将其分解为工人大小的numpy数组列表(这里是自workers=3以来的3个numpy数组列表)，以供每个工人处理。

从after_prepare函数返回后，您的输入是1000张量的列表，因为您正在使用batch(1)，并且该数据在到达np.array_split调用时产生错误。

你有两个选择来解决这个问题:

选项1。不要在prepare函数中批量处理数据，而只返回ds。然后在apply_by_multiprocessing函数中更改

result = pool.map(_apply_df, np.array_split(data, workers))

result = pool.map(_apply_df, np.array_split(list(data.as_numpy_iterator()), workers))

选项2。同样，不要在prepare函数中批量处理数据，只返回ds。然后在apply_by_multiprocessing函数中更改

result = pool.map(_apply_df, np.array_split(data, workers))

result = pool.map(_apply_df, data.batch(np.ceil(len(data) / workers)))

请注意，由于批量大小的计算方式不同，这会产生稍微不同的输出形状。

下面是使用选项2的工作代码示例:

import os
import tensorflow as tf
import numpy as np
import multiprocessing
from multiprocessing import Pool
from itertools import chain
from keras.datasets import fashion_mnist
from tensorflow.keras.models import Sequential
# importing various types of hidden layers
from tensorflow.keras.layers import Conv2D, MaxPooling2D,
Dense, Flatten
# Adam optimizer for better LR and less loss
from tensorflow.keras.optimizers import Adam
import matplotlib.pyplot as plt
import numpy as np
# gpu setup
gpus = tf.config.experimental.list_physical_devices('GPU')
for gpu in gpus:
    tf.config.experimental.set_memory_growth(gpu, True)

def model_arch():
    models = Sequential()
    # We are learning 64
    # filters with a kernal size of 5x5
    models.add(Conv2D(64, (5, 5),
                      padding="same",
                      activation="relu",
                      input_shape=(28, 28, 1)))
    # Max pooling will reduce the
    # size with a kernal size of 2x2
    models.add(MaxPooling2D(pool_size=(2, 2)))
    models.add(Conv2D(128, (5, 5), padding="same",
                      activation="relu"))
    models.add(MaxPooling2D(pool_size=(2, 2)))
    models.add(Conv2D(256, (5, 5), padding="same",
                      activation="relu"))
    models.add(MaxPooling2D(pool_size=(2, 2)))
    # Once the convolutional and pooling
    # operations are done the layer
    # is flattened and fully connected layers
    # are added
    models.add(Flatten())
    models.add(Dense(256, activation="relu"))
    # Finally as there are total 10
    # classes to be added a FCC layer of
    # 10 is created with a softmax activation
    # function
    models.add(Dense(10, activation="softmax"))
    return models
def _apply_df(data):
    model = model_arch()
    model.load_weights("model_mnist.h5")
    return model.predict(data)
def apply_by_multiprocessing(data, workers):
    pool = Pool(processes=workers)
    # result = pool.map(_apply_df, np.array_split(list(data.as_numpy_iterator()), workers))
    result = pool.map(_apply_df, data.batch(np.ceil(len(data) / workers)))
    pool.close()
    return list(result)
def resize_and_rescale(data):
    data = tf.cast(data, tf.float32)
    data /= 255.0
    return data
def prepare(ds):
    ds = ds.map(resize_and_rescale)
    return ds
def after_prepare(data):
    tens_data = tf.data.Dataset.from_tensor_slices(data)
    tens_data = prepare(tens_data)
    return tens_data
def main():
        
    multiprocessing.set_start_method('spawn')
    os.environ['CUDA_VISIBLE_DEVICES'] = '-1'
    
    fashion_mnist = tf.keras.datasets.fashion_mnist
    _, (test_images, test_labels) = fashion_mnist.load_data()
    test_images = after_prepare(test_images)
    results = apply_by_multiprocessing(test_images, workers=3)
    print(test_images)                 # <MapDataset with shape=(28, 28)>
    print(len(results))                # 3
    print([x.shape for x in results])  # [(3334, 10), (3334, 10), (3332, 10)]
    
    results_flatten = list(chain.from_iterable(results))
    print(len(results_flatten), results_flatten[0].shape)  # 10000 (10,)

if __name__ == "__main__":
    main()

相关内容

最新更新

热门标签：