这个问题是这个问题的延续,但是使用了tensorflow数据集。
所以,如果我们使用:import tensorflow as tf
import numpy as np
from multiprocessing import Pool
from keras.datasets import fashion_mnist
from tensorflow.keras.models import Sequential
# importing various types of hidden layers
from tensorflow.keras.layers import Conv2D, MaxPooling2D,
Dense, Flatten
# Adam optimizer for better LR and less loss
from tensorflow.keras.optimizers import Adam
import matplotlib.pyplot as plt
import numpy as np
# gpu setup
gpus = tf.config.experimental.list_physical_devices('GPU')
for gpu in gpus:
tf.config.experimental.set_memory_growth(gpu, True)
def model_arch():
models = Sequential()
# We are learning 64
# filters with a kernal size of 5x5
models.add(Conv2D(64, (5, 5),
padding="same",
activation="relu",
input_shape=(28, 28, 1)))
# Max pooling will reduce the
# size with a kernal size of 2x2
models.add(MaxPooling2D(pool_size=(2, 2)))
models.add(Conv2D(128, (5, 5), padding="same",
activation="relu"))
models.add(MaxPooling2D(pool_size=(2, 2)))
models.add(Conv2D(256, (5, 5), padding="same",
activation="relu"))
models.add(MaxPooling2D(pool_size=(2, 2)))
# Once the convolutional and pooling
# operations are done the layer
# is flattened and fully connected layers
# are added
models.add(Flatten())
models.add(Dense(256, activation="relu"))
# Finally as there are total 10
# classes to be added a FCC layer of
# 10 is created with a softmax activation
# function
models.add(Dense(10, activation="softmax"))
return models
def _apply_df(data):
model = model_arch()
model.load_weights("/home/ggous/model_mnist.h5")
return model.predict(data)
def apply_by_multiprocessing(data, workers):
pool = Pool(processes=workers)
result = pool.map(_apply_df, np.array_split(data, workers))
pool.close()
return list(result)
def resize_and_rescale(data):
data = tf.cast(data, tf.float32)
data /= 255.0
return data
def prepare(ds):
ds = ds.map(resize_and_rescale)
return ds.batch(1)
def after_prepare(data):
tens_data = tf.data.Dataset.from_tensor_slices(data)
tens_data = prepare(tens_data)
return tens_data
def main():
fashion_mnist = tf.keras.datasets.fashion_mnist
_, (test_images, test_labels) = fashion_mnist.load_data()
test_images = after_prepare(test_images)
results = apply_by_multiprocessing(test_images, workers=3)
print(test_images.shape) # (10000, 28, 28)
print(len(results)) # 3
print([x.shape for x in results]) # [(3334, 10), (3333, 10), (3333, 10)]
if __name__ == "__main__":
main()
我们得到一个错误:
axis1: axis 0 is out of bounds for array of dimension 0
我刚刚添加了:
def resize_and_rescale(data):
data = tf.cast(data, tf.float32)
data /= 255.0
return data
def prepare(ds):
ds = ds.map(resize_and_rescale)
return ds.batch(1)
def after_prepare(data):
tens_data = tf.data.Dataset.from_tensor_slices(data)
tens_data = prepare(tens_data)
return tens_data
因此,我在after_prepare
中创建了tensorflow数据集。
保存的模型可以在这里找到
——UPDATE——
现在,它给我消息:
F tensorflow/stream_executor/cuda/cuda_driver.cc:146] Failed setting context: CUDA_ERROR_NOT_INITIALIZED: initialization error
我看到了这个,所以我试着:
multiprocessing.set_start_method('spawn', force=True)
在代码的开头,现在给我很多消息:
Start cannot spawn child process: No such file or directory
2022-11-08 09:12:35.984897: I tensorflow/core/platform/default/subprocess.cc:304] Start cannot spawn child process: No such file or directory
2022-11-08 09:12:35.984909: W tensorflow/stream_executor/gpu/asm_compiler.cc:80] Couldn't get ptxas version string: INTERNAL: Couldn't invoke ptxas --version
2022-11-08 09:12:35.985087: I tensorflow/core/platform/default/subprocess.cc:304] Start cannot spawn child process: No such file or directory
2022-11-08 09:12:35.985118: W tensorflow/stream_executor/gpu/redzone_allocator.cc:314] INTERNAL: Failed to launch ptxas
...
failed to allocate 256.00M (268435456 bytes) from device: CUDA_ERROR_OUT_OF_MEMORY: out of memory
2022-11-08 09:12:36.618099: I tensorflow/stream_executor/cuda/cuda_driver.cc:733] failed to allocate 256.00M (268435456 bytes) from device: CUDA_ERROR_OUT_OF_MEMORY: out of memory
2022-11-08 09:12:36.618274: I tensorflow/stream_executor/cuda/cuda_driver.cc:733] failed to allocate 230.40M (241592064 bytes) from device: CUDA_ERROR_OUT_OF_MEMORY: out of memory
2022-11-08 09:12:36.618437: I tensorflow/stream_executor/cuda/cuda_driver.cc:733] failed to allocate 207.36M (217433088 bytes) from device: CUDA_ERROR_OUT_OF_MEMORY: out of memory
2022-11-08 09:12:36.618447: W tensorflow/core/common_runtime/bfc_allocator.cc:360] Garbage collection: deallocate free memory regions (i.e., allocations) so that we can re-allocate a larger region to avoid OOM due to memory fragmentation. If you see this message frequently, you are running near the threshold of the available device memory and re-allocation may incur great performance overhead. You may try smaller batch sizes to observe the performance impact. Set TF_ENABLE_GPU_GARBAGE_COLLECTION=false if you'd like to disable this feature.
2022-11-08 09:12:36.629520: I tensorflow/stream_executor/cuda/cuda_driver.cc:733] failed to allocate 256.00M (268435456 bytes) from device: CUDA_ERROR_OUT_OF_MEMORY: out of memory
2022-11-08 09:12:36.629542: W tensorflow/core/common_runtime/bfc_allocator.cc:290] Allocator (GPU_0_bfc) ran out of memory trying to allocate 203.00MiB with freed_by_count=0. The caller indicates that this is not a failure, but this may mean that there could be performance gains if more memory were available.
2022-11-08 09:12:36.629618: I tensorflow/stream_executor/cuda/cuda_driver.cc:733] failed to allocate 256.00M (268435456 bytes) from device: CUDA_ERROR_OUT_OF_MEMORY: out of memory
2022-11-08 09:12:36.629987: I tensorflow/stream_executor/cuda/cuda_driver.cc:733] failed to allocate 256.00M (268435456 bytes) from device: CUDA_ERROR_OUT_OF_MEMORY: out of memory
2022-11-08 09:12:36.630001: W tensorflow/core/common_runtime/bfc_allocator.cc:290] Allocator (GPU_0_bfc) ran out of memory trying to allocate 203.00MiB with freed_by_count=0. The caller indicates that this is not a failure, but this may mean that there could be performance gains if more memory were available.
2022-11-08 09:12:36.630110: I tensorflow/stream_executor/cuda/cuda_driver.cc:733] failed to allocate 230.40M (241592064 bytes) from device: CUDA_ERROR_OUT_OF_MEMORY: out of memory
....
failed to allocate 256.00M (268435456 bytes) from device: CUDA_ERROR_OUT_OF_MEMORY: out of memory
2022-11-08 09:12:37.256468: I tensorflow/stream_executor/cuda/cuda_driver.cc:733] failed to allocate 256.00M (268435456 bytes) from device: CUDA_ERROR_OUT_OF_MEMORY: out of memory
2022-11-08 09:12:37.256640: I tensorflow/stream_executor/cuda/cuda_driver.cc:733] failed to allocate 256.00M (268435456 bytes) from device: CUDA_ERROR_OUT_OF_MEMORY: out of memory
2022-11-08 09:12:37.256810: I tensorflow/stream_executor/cuda/cuda_driver.cc:733] failed to allocate 256.00M (268435456 bytes) from device: CUDA_ERROR_OUT_OF_MEMORY: out of memory
2022-11-08 09:12:37.256988: I tensorflow/stream_executor/cuda/cuda_driver.cc:733] failed to allocate 256.00M (268435456 bytes) from device: CUDA_ERROR_OUT_OF_MEMORY: out of memory
2022-11-08 09:12:37.257166: I tensorflow/stream_executor/cuda/cuda_driver.cc:733] failed to allocate 256.00M (268435456 bytes) from device: CUDA_ERROR_OUT_OF_MEMORY: out of memory
2022-11-08 09:12:37.257224: W tensorflow/core/framework/op_kernel.cc:1780] OP_REQUIRES failed at conv_ops_fused_impl.h:601 : NOT_FOUND: No algorithm worked! Error messages:
Profiling failure on CUDNN engine 1#TC: RESOURCE_EXHAUSTED: Out of memory while trying to allocate 16777216 bytes.
Profiling failure on CUDNN engine 1: RESOURCE_EXHAUSTED: Out of memory while trying to allocate 16777216 bytes.
问题来自数据准备步骤。初始代码采用(10000, 28, 28)
形状的数据,并使用np.array_split
将其分解为工人大小的numpy数组列表(这里是自workers=3
以来的3个numpy数组列表),以供每个工人处理。
从after_prepare
函数返回后,您的输入是1000张量的列表,因为您正在使用batch(1)
,并且该数据在到达np.array_split
调用时产生错误。
你有两个选择来解决这个问题:
选项1。不要在prepare
函数中批量处理数据,而只返回ds
。然后在apply_by_multiprocessing
函数中更改
result = pool.map(_apply_df, np.array_split(data, workers))
result = pool.map(_apply_df, np.array_split(list(data.as_numpy_iterator()), workers))
选项2。同样,不要在prepare
函数中批量处理数据,只返回ds
。然后在apply_by_multiprocessing
函数中更改
result = pool.map(_apply_df, np.array_split(data, workers))
result = pool.map(_apply_df, data.batch(np.ceil(len(data) / workers)))
请注意,由于批量大小的计算方式不同,这会产生稍微不同的输出形状。
下面是使用选项2的工作代码示例:import os
import tensorflow as tf
import numpy as np
import multiprocessing
from multiprocessing import Pool
from itertools import chain
from keras.datasets import fashion_mnist
from tensorflow.keras.models import Sequential
# importing various types of hidden layers
from tensorflow.keras.layers import Conv2D, MaxPooling2D,
Dense, Flatten
# Adam optimizer for better LR and less loss
from tensorflow.keras.optimizers import Adam
import matplotlib.pyplot as plt
import numpy as np
# gpu setup
gpus = tf.config.experimental.list_physical_devices('GPU')
for gpu in gpus:
tf.config.experimental.set_memory_growth(gpu, True)
def model_arch():
models = Sequential()
# We are learning 64
# filters with a kernal size of 5x5
models.add(Conv2D(64, (5, 5),
padding="same",
activation="relu",
input_shape=(28, 28, 1)))
# Max pooling will reduce the
# size with a kernal size of 2x2
models.add(MaxPooling2D(pool_size=(2, 2)))
models.add(Conv2D(128, (5, 5), padding="same",
activation="relu"))
models.add(MaxPooling2D(pool_size=(2, 2)))
models.add(Conv2D(256, (5, 5), padding="same",
activation="relu"))
models.add(MaxPooling2D(pool_size=(2, 2)))
# Once the convolutional and pooling
# operations are done the layer
# is flattened and fully connected layers
# are added
models.add(Flatten())
models.add(Dense(256, activation="relu"))
# Finally as there are total 10
# classes to be added a FCC layer of
# 10 is created with a softmax activation
# function
models.add(Dense(10, activation="softmax"))
return models
def _apply_df(data):
model = model_arch()
model.load_weights("model_mnist.h5")
return model.predict(data)
def apply_by_multiprocessing(data, workers):
pool = Pool(processes=workers)
# result = pool.map(_apply_df, np.array_split(list(data.as_numpy_iterator()), workers))
result = pool.map(_apply_df, data.batch(np.ceil(len(data) / workers)))
pool.close()
return list(result)
def resize_and_rescale(data):
data = tf.cast(data, tf.float32)
data /= 255.0
return data
def prepare(ds):
ds = ds.map(resize_and_rescale)
return ds
def after_prepare(data):
tens_data = tf.data.Dataset.from_tensor_slices(data)
tens_data = prepare(tens_data)
return tens_data
def main():
multiprocessing.set_start_method('spawn')
os.environ['CUDA_VISIBLE_DEVICES'] = '-1'
fashion_mnist = tf.keras.datasets.fashion_mnist
_, (test_images, test_labels) = fashion_mnist.load_data()
test_images = after_prepare(test_images)
results = apply_by_multiprocessing(test_images, workers=3)
print(test_images) # <MapDataset with shape=(28, 28)>
print(len(results)) # 3
print([x.shape for x in results]) # [(3334, 10), (3334, 10), (3332, 10)]
results_flatten = list(chain.from_iterable(results))
print(len(results_flatten), results_flatten[0].shape) # 10000 (10,)
if __name__ == "__main__":
main()