Vanilla Vulkan计算着色器未写入输出缓冲区



编辑:修复双重取消映射(但不修复问题(
编辑2:修复API版本并从代码中删除验证层。相反,使用VK_INSTANCE_LAYERS=VK_LAYER_KHRONOS_validationenv运行。问题仍然存在
EDIT3:忘记了描述符集,它允许将缓冲区绑定到着色器输入。但仍然无法解决问题:

<TL;DR>我已经用一个基本的计算着色器编写了一个简单的Vulkan仅计算示例代码。没有Vulkan或着色器错误,但计算着色器未写入输出缓冲区:(

为了学习Vulkan API,我开始用一个基本的计算着色器编写一个简单的纯计算示例。它将一个int缓冲区上传到GPU,运行一个递增每个int的计算着色器,并将结果写入第二个缓冲区。

我的问题是,一切都运行得很好,但我的输出缓冲区中没有得到预期的结果,我也不知道为什么。它看起来像是调度了计算着色器,但从未写入输出缓冲区。

为了观察这一点,我首先将随机数上传到我的输入缓冲区,并用值2填充我的输出缓冲区。然后调度计算着色器,该着色器应该从输入中读取每个值X,并将X+1写入输出缓冲区
等待完成后,我映射输出缓冲区并显示其数据。我只有2个:'(

注意:绑定到缓冲区的内存是用VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT | VK_MEMORY_PROPERTY_HOST_COHERENT_BIT创建的。

因此,Vulkan中肯定有一个概念我错了,或者标志/设置中的微妙之处我看不到。。。

计算着色器代码:

#version 450 core
layout (set = 0, binding = 0) buffer InputBuffer {
uvec4 inputData[25];
};
layout (set = 0, binding = 1) buffer OutputBuffer {
uvec4 outputData[25];
};
layout (local_size_x = 8, local_size_y = 1, local_size_z = 1) in;
void main()
{
uint gid = gl_GlobalInvocationID.x;
if(gid < 25)
outputData[gid] = inputData[gid] + uvec4(1,1,1,1);
}

整个示例代码(因为我不知道哪里可能错了,对不起,我已经粘贴了整个东西(:

#include <vulkan/vulkan.h>
#include <iostream>
#include <vector>
#include <assert.h>
#include <fstream>
// Some helper functions
typedef uint32_t            u32;
typedef uint64_t            u64;
// Vulkan two steps enumeration function
#define COUNT_AND_GET1(func, vec, arg1) {
u32 size = 0; 
##vec.clear(); 
##func(##arg1, &size, nullptr); 
if(size > 0) { 
##vec.resize(size); 
##func(##arg1, &size, ##vec.data()); }
}
#define COUNT_AND_GET2(func, vec, arg1, arg2) {
u32 size = 0; 
##vec.clear(); 
##func(##arg1, ##arg2, &size, nullptr); 
if(size > 0) { 
##vec.resize(size); 
##func(##arg1, ##arg2, &size, ##vec.data()); }
}
// Basic vec4 data
struct vec4
{
u32 x; u32 y; u32 z; u32 w;
};
struct PhysicalDeviceProps
{
VkPhysicalDeviceProperties              m_Properties;
VkPhysicalDeviceFeatures                m_Features;
VkPhysicalDeviceMemoryProperties        m_MemoryProperties;
std::vector<VkQueueFamilyProperties>    m_QueueFamilyProperties;
std::vector<VkLayerProperties>          m_LayerProperties;
std::vector<VkExtensionProperties>      m_ExtensionProperties;
};
// Return device memory index that matches specified properties
u32 SelectMemoryHeapFrom(u32 memoryTypeBits, const VkPhysicalDeviceMemoryProperties& memoryProperties, VkMemoryPropertyFlags preferredProperties, VkMemoryPropertyFlags requiredProperties)
{
assert((preferredProperties & requiredProperties) > 0);
u32 selectedType = u32(-1);
u32 memIndex = 0;
while (memIndex < VK_MAX_MEMORY_TYPES && selectedType == u32(-1))
{
if (((memoryTypeBits & (1 << memIndex)) > 0)
&& ((memoryProperties.memoryTypes[memIndex].propertyFlags & preferredProperties) == preferredProperties))
{
// If it exactly matches my preferred properties, grab it.
selectedType = memIndex;
}
++memIndex;
}
if (selectedType == u32(-1))
{
memIndex = 0;
while (memIndex < VK_MAX_MEMORY_TYPES && selectedType == u32(-1))
{
if (((memoryTypeBits & (1 << memIndex)) > 0)
&& ((memoryProperties.memoryTypes[memIndex].propertyFlags & requiredProperties) == requiredProperties))
{
// If it exactly matches my required properties, grab it.
selectedType = memIndex;
}
++memIndex;
}
}
return selectedType;
}
// **** MAIN FUNCTION ****
void SampleCompute()
{
// -------------------------------------
// 1. Create Instance
// -------------------------------------
VkApplicationInfo appInfo = { VK_STRUCTURE_TYPE_APPLICATION_INFO, nullptr, "SampleCompute", 0, "MyEngine", 0, VK_API_VERSION_1_2 };
VkInstanceCreateInfo instCreateInfo = { VK_STRUCTURE_TYPE_INSTANCE_CREATE_INFO, nullptr, 0, &appInfo, 0, nullptr, 0, nullptr };
VkInstance instance = VK_NULL_HANDLE;
if (VK_SUCCESS != vkCreateInstance(&instCreateInfo, nullptr, &instance))
std::cout << "Instance creation failed!n";

// ---------------------------------------------------
// 2. Enumerate physical devices and select 'best' one 
// ---------------------------------------------------
VkPhysicalDevice bestDevice = VK_NULL_HANDLE;
PhysicalDeviceProps bestDeviceProps;
{
std::vector<VkPhysicalDevice> physicalDevices;
COUNT_AND_GET1(vkEnumeratePhysicalDevices, physicalDevices, instance)
assert(!physicalDevices.empty());
std::vector< PhysicalDeviceProps> physicalDeviceProps(physicalDevices.size());
for (u64 i = 0; i < physicalDevices.size(); ++i)
{
vkGetPhysicalDeviceProperties(physicalDevices[i], &physicalDeviceProps[i].m_Properties);
vkGetPhysicalDeviceMemoryProperties(physicalDevices[i], &physicalDeviceProps[i].m_MemoryProperties);
COUNT_AND_GET1(vkGetPhysicalDeviceQueueFamilyProperties, physicalDeviceProps[i].m_QueueFamilyProperties, physicalDevices[i])
COUNT_AND_GET1(vkEnumerateDeviceLayerProperties, physicalDeviceProps[i].m_LayerProperties, physicalDevices[i])
COUNT_AND_GET2(vkEnumerateDeviceExtensionProperties, physicalDeviceProps[i].m_ExtensionProperties, physicalDevices[i], nullptr)
}
u64 bestDeviceIndex = 0;
for (u64 i = 1; i < physicalDevices.size(); ++i)
{
const bool isDiscrete = physicalDeviceProps[bestDeviceIndex].m_Properties.deviceType == VK_PHYSICAL_DEVICE_TYPE_DISCRETE_GPU;
const bool otherIsDiscrete = physicalDeviceProps[i].m_Properties.deviceType == VK_PHYSICAL_DEVICE_TYPE_DISCRETE_GPU;
if (isDiscrete && !otherIsDiscrete)
continue;
else if ((!isDiscrete && otherIsDiscrete)
|| (physicalDeviceProps[bestDeviceIndex].m_Properties.limits.maxFramebufferWidth < physicalDeviceProps[i].m_Properties.limits.maxFramebufferWidth))
bestDeviceIndex = i;
}
bestDevice = physicalDevices[bestDeviceIndex];
bestDeviceProps = physicalDeviceProps[bestDeviceIndex];
}

// ---------------------------------------------------
// 3. Find queue family which support compute pipeline
// ---------------------------------------------------
u32 computeQueue = 0;
while (computeQueue < bestDeviceProps.m_QueueFamilyProperties.size()
&& ((bestDeviceProps.m_QueueFamilyProperties[computeQueue].queueFlags & VK_QUEUE_COMPUTE_BIT) != VK_QUEUE_COMPUTE_BIT))
{
++computeQueue;
}
assert(computeQueue < bestDeviceProps.m_QueueFamilyProperties.size());

// -------------------------------
// 4. Create logical device
// -------------------------------
VkDeviceQueueCreateInfo queueInfo = { VK_STRUCTURE_TYPE_DEVICE_QUEUE_CREATE_INFO, nullptr, 0, computeQueue, 1, nullptr };
VkPhysicalDeviceFeatures features = {};
VkDeviceCreateInfo createInfo = {
VK_STRUCTURE_TYPE_DEVICE_CREATE_INFO, nullptr, 0,
1, &queueInfo,
0, nullptr,
0, nullptr,
&features
};
VkDevice device = VK_NULL_HANDLE;
if (VK_SUCCESS != vkCreateDevice(bestDevice, &createInfo, nullptr, &device))
std::cout << "Logical Device creation failedn";

// -------------------------------
// 5. Create data buffers
// -------------------------------
constexpr u64 elemCount = 25;
constexpr u64 bufferSize = elemCount * sizeof(vec4);
VkBufferCreateInfo bufferCreateInfo = {
VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO, nullptr, 0,
bufferSize,
VK_BUFFER_USAGE_UNIFORM_BUFFER_BIT | VK_BUFFER_USAGE_STORAGE_BUFFER_BIT,
VK_SHARING_MODE_EXCLUSIVE, 0, nullptr
};
VkBuffer inputBuffer = VK_NULL_HANDLE;
if (VK_SUCCESS != vkCreateBuffer(device, &bufferCreateInfo, nullptr, &inputBuffer))
std::cout << "Creating input buffer failed!n";
VkMemoryRequirements inputBufferMemory;
vkGetBufferMemoryRequirements(device, inputBuffer, &inputBufferMemory);
VkBuffer outputBuffer = VK_NULL_HANDLE;
if (VK_SUCCESS != vkCreateBuffer(device, &bufferCreateInfo, nullptr, &outputBuffer))
std::cout << "Creating output buffer failed!n";
VkMemoryRequirements outputBufferMemory;
vkGetBufferMemoryRequirements(device, outputBuffer, &outputBufferMemory);

// -------------------------------
// 6. Allocate memory for buffers
// -------------------------------
u32 inputMemoryIndex = SelectMemoryHeapFrom(inputBufferMemory.memoryTypeBits, bestDeviceProps.m_MemoryProperties, 
VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT | VK_MEMORY_PROPERTY_HOST_COHERENT_BIT, 
VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT | VK_MEMORY_PROPERTY_HOST_COHERENT_BIT);
VkMemoryAllocateInfo inputAllocationInfo = { VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_INFO, nullptr, inputBufferMemory.size, inputMemoryIndex };
VkDeviceMemory inputMemory = VK_NULL_HANDLE;
if (VK_SUCCESS != vkAllocateMemory(device, &inputAllocationInfo, nullptr, &inputMemory))
std::cout << "Memory allocation of " << inputBufferMemory.size << " failed!n";
u32 outputMemoryIndex = SelectMemoryHeapFrom(outputBufferMemory.memoryTypeBits, bestDeviceProps.m_MemoryProperties, 
VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT | VK_MEMORY_PROPERTY_HOST_COHERENT_BIT, 
VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT | VK_MEMORY_PROPERTY_HOST_COHERENT_BIT);
VkMemoryAllocateInfo outputAllocationInfo = { VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_INFO, nullptr, outputBufferMemory.size, outputMemoryIndex };
VkDeviceMemory outputMemory = VK_NULL_HANDLE;
if (VK_SUCCESS != vkAllocateMemory(device, &outputAllocationInfo, nullptr, &outputMemory))
std::cout << "Memory allocation of " << outputBufferMemory.size << " failed!n";

// -------------------------------
// 7. Bind buffers to memory
// -------------------------------
if (vkBindBufferMemory(device, inputBuffer, inputMemory, 0) != VK_SUCCESS)
std::cout << "Input buffer binding failed!n";
if (vkBindBufferMemory(device, outputBuffer, outputMemory, 0) != VK_SUCCESS)
std::cout << "Output buffer binding failed!n";

// ----------------------------------
// 8. Map buffers and upload data
// ----------------------------------
vec4* inputData = nullptr;
if (VK_SUCCESS != vkMapMemory(device, inputMemory, 0, VK_WHOLE_SIZE, 0, (void**)(&inputData)))
std::cout << "Input memory mapping failed!n";

for (u32 i = 0; i < elemCount; ++i)
{
inputData[i].x = static_cast<u32>(rand() / (float)RAND_MAX * 100);
inputData[i].y = static_cast<u32>(rand() / (float)RAND_MAX * 100);
inputData[i].z = static_cast<u32>(rand() / (float)RAND_MAX * 100);
inputData[i].w = static_cast<u32>(rand() / (float)RAND_MAX * 100);
std::cout << inputData[i].x << ", " << inputData[i].y << ", " << inputData[i].z << ", " << inputData[i].w << ", ";
}
std::cout << "nnn";
vkUnmapMemory(device, inputMemory);
vec4* initialOutputData = nullptr;
if (VK_SUCCESS != vkMapMemory(device, outputMemory, 0, VK_WHOLE_SIZE, 0, (void**)(&initialOutputData)))
std::cout << "Output memory mapping failed!n";
for (u32 i = 0; i < elemCount; ++i)
{
initialOutputData[i].x = 2; initialOutputData[i].z = 2; initialOutputData[i].y = 2; initialOutputData[i].w = 2;
}
vkUnmapMemory(device, outputMemory);

// ----------------------------------
// 9. Create shader/pipeline layout
// ----------------------------------
std::vector<VkDescriptorSetLayoutBinding> bindings = {
{ 0, VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, 1, VK_SHADER_STAGE_COMPUTE_BIT, nullptr },
{ 1, VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, 1, VK_SHADER_STAGE_COMPUTE_BIT, nullptr }
};
VkDescriptorSetLayoutCreateInfo layoutInfo = { VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO, nullptr, 0, 2, bindings.data() };
VkDescriptorSetLayout descriptorLayout = VK_NULL_HANDLE;
if (VK_SUCCESS != vkCreateDescriptorSetLayout(device, &layoutInfo, nullptr, &descriptorLayout))
std::cout << "Descriptor Layout creation failed!n";
// Create pipeline layout
VkPipelineLayoutCreateInfo pipelineCreateInfo = { VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO, nullptr, 0, 1, &descriptorLayout, 0, nullptr };
VkPipelineLayout layout = VK_NULL_HANDLE;
if (VK_SUCCESS != vkCreatePipelineLayout(device, &pipelineCreateInfo, nullptr, &layout))
std::cout << "Pipeline Layout creation failedn";

// --------------------------------------------------
// 10. Load shader source and create shader module
// --------------------------------------------------
std::ifstream file("ComputeShader.spv", std::ifstream::binary);
u64 size = 0;
if (!file.is_open())
std::cout << "Can't open shader!n";

file.seekg(0, file.end);
size = file.tellg();
file.seekg(0);
char* shaderSrc = new char[size];
file.read(shaderSrc, size);
VkShaderModuleCreateInfo shaderCreateInfo = { VK_STRUCTURE_TYPE_SHADER_MODULE_CREATE_INFO, nullptr, 0, size, reinterpret_cast<u32*>(shaderSrc) };
VkShaderModule shader = VK_NULL_HANDLE;
if (VK_SUCCESS != vkCreateShaderModule(device, &shaderCreateInfo, nullptr, &shader))
std::cout << "Shader Module creation failedn";
delete[] shaderSrc;

// ----------------------------------
// 10.5. Create descriptor sets
// ----------------------------------
VkDescriptorPoolSize descriptorPoolSize = { VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, 2 };
VkDescriptorPoolCreateInfo descriptorPoolCreateInfo = {
VK_STRUCTURE_TYPE_DESCRIPTOR_POOL_CREATE_INFO, nullptr, 0,
1, 1, &descriptorPoolSize };
VkDescriptorPool descriptorPool = VK_NULL_HANDLE;
vkCreateDescriptorPool(device, &descriptorPoolCreateInfo, 0, &descriptorPool);
VkDescriptorSetAllocateInfo descriptorSetAllocateInfo = {
VK_STRUCTURE_TYPE_DESCRIPTOR_SET_ALLOCATE_INFO, 0,
descriptorPool, 1, &descriptorLayout
};
VkDescriptorSet descriptorSet;
vkAllocateDescriptorSets(device, &descriptorSetAllocateInfo, &descriptorSet);
VkDescriptorBufferInfo inputBufferDescriptorInfo = { inputBuffer, 0, VK_WHOLE_SIZE };
VkDescriptorBufferInfo outputBufferDescriptorInfo = { outputBuffer, 0, VK_WHOLE_SIZE };
VkWriteDescriptorSet writeDescriptorSet[2] = {
{
VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET, 0, descriptorSet,
0, 0, 1,
VK_DESCRIPTOR_TYPE_STORAGE_BUFFER,
0, &inputBufferDescriptorInfo, 0
},
{
VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET, 0, descriptorSet, 
1, 0, 1,
VK_DESCRIPTOR_TYPE_STORAGE_BUFFER,
0, &outputBufferDescriptorInfo, 0
}
};
vkUpdateDescriptorSets(device, 2, writeDescriptorSet, 0, nullptr);

// -------------------------------
// 11. Create compute pipeline
// -------------------------------
const char* entryPointName = "main";
VkComputePipelineCreateInfo computeCreateInfo = {
VK_STRUCTURE_TYPE_COMPUTE_PIPELINE_CREATE_INFO, nullptr, 0,
{
VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO, nullptr, 0,
VK_SHADER_STAGE_COMPUTE_BIT, shader,
entryPointName, nullptr
},
layout, VK_NULL_HANDLE, 0
};
VkPipeline pipeline = VK_NULL_HANDLE;
if (VK_SUCCESS != vkCreateComputePipelines(device, VK_NULL_HANDLE, 1, &computeCreateInfo, nullptr, &pipeline))
std::cout << "Compute Pipeline creation failed!n";

// ------------------------------------------------
// 12. Create Command Pool and Command Buffer
// --------------------------------------------------
VkCommandPoolCreateInfo poolInfo = { VK_STRUCTURE_TYPE_COMMAND_POOL_CREATE_INFO, nullptr, 0, computeQueue };
VkCommandPool cmdPool = VK_NULL_HANDLE;
if (VK_SUCCESS != vkCreateCommandPool(device, &poolInfo, nullptr, &cmdPool))
std::cout << "Command Pool creation failed!n";
VkCommandBufferAllocateInfo cmdBufferInfo = {
VK_STRUCTURE_TYPE_COMMAND_BUFFER_ALLOCATE_INFO, nullptr,
cmdPool, VK_COMMAND_BUFFER_LEVEL_PRIMARY, 1
};
VkCommandBuffer cmdBuffer = VK_NULL_HANDLE;
if (VK_SUCCESS != vkAllocateCommandBuffers(device, &cmdBufferInfo, &cmdBuffer))
std::cout << "Command buffer allocation failed!n";
// ---------------------------
// 13. Run compute shader
// ---------------------------
VkCommandBufferUsageFlags flags = VK_COMMAND_BUFFER_USAGE_ONE_TIME_SUBMIT_BIT;
VkCommandBufferBeginInfo beginInfo = { VK_STRUCTURE_TYPE_COMMAND_BUFFER_BEGIN_INFO, nullptr, VK_COMMAND_BUFFER_USAGE_ONE_TIME_SUBMIT_BIT, nullptr };
vkBeginCommandBuffer(cmdBuffer, &beginInfo);
vkCmdBindDescriptorSets(cmdBuffer, VK_PIPELINE_BIND_POINT_COMPUTE, layout, 0, 1, &descriptorSet, 0, 0);
vkCmdBindPipeline(cmdBuffer, VK_PIPELINE_BIND_POINT_COMPUTE, pipeline);
vkCmdDispatch(cmdBuffer, 8, 1, 1);

vkEndCommandBuffer(cmdBuffer);
// -----------------------------------------
// 14. Submit command buffer (with fence)
// -----------------------------------------
VkFenceCreateInfo fenceCreateInfo = { VK_STRUCTURE_TYPE_FENCE_CREATE_INFO, nullptr, (VkFenceCreateFlags)0 };
VkFence fence = VK_NULL_HANDLE;
if (VK_SUCCESS != vkCreateFence(device, &fenceCreateInfo, nullptr, &fence))
std::cout << "Fence creation failed!n";
VkQueue queue = VK_NULL_HANDLE;
vkGetDeviceQueue(device, computeQueue, 0, &queue);
VkSubmitInfo submitInfo = { 
VK_STRUCTURE_TYPE_SUBMIT_INFO, nullptr, 0, nullptr, 0,
1, &cmdBuffer, 0, nullptr
};
VkResult result = vkQueueSubmit(queue, 1, &submitInfo, fence);
// Wait for everything finished
if (result == VK_SUCCESS)
{
result = vkQueueWaitIdle(queue);
}
vkWaitForFences(device, 1, &fence, VK_TRUE, u64(-1));
// ---------------------------------
// 15. Grab and display results
// ---------------------------------
vec4* resultData = nullptr;
if (VK_SUCCESS != vkMapMemory(device, outputMemory, 0, VK_WHOLE_SIZE, 0, (void**)(&resultData)))
std::cout << "Output memory mapping failed!n";
for (u32 i = 0; i < elemCount; ++i)
{
std::cout << resultData[i].x << ", " << resultData[i].y << ", " << resultData[i].z << ", " << resultData[i].w << ", ";
}
std::cout << "nnn";
vkUnmapMemory(device, outputMemory);
// ------------------------
// 16. Resources Cleanup
// ------------------------
vkFreeCommandBuffers(device, cmdPool, 1, &cmdBuffer);
vkDestroyCommandPool(device, cmdPool, nullptr);
vkDestroyFence(device, fence, nullptr);
vkDestroyPipeline(device, pipeline, nullptr);
vkDestroyPipelineLayout(device, layout, nullptr);
vkDestroyShaderModule(device, shader, nullptr);
vkDestroyDescriptorSetLayout(device, descriptorLayout, nullptr);
vkDestroyBuffer(device, inputBuffer, nullptr);
vkDestroyBuffer(device, outputBuffer, nullptr);
vkFreeMemory(device, inputMemory, nullptr);
vkFreeMemory(device, outputMemory, nullptr);
if (VK_SUCCESS != vkDeviceWaitIdle(device))
std::cout << "Can't wait for device to idlen";
vkDestroyDevice(device, nullptr);
vkDestroyInstance(instance, nullptr);
}

我认为问题可能是同步错误,特别是缺少内存域操作。某些平台可能不喜欢它…

在命令缓冲区的末尾,您需要这个特殊的管道屏障,它可以将写入从设备域转换到主机域:

VkBufferMemoryBarrier outbuffDependency = {};
outbuffDependency.sType = VK_STRUCTURE_TYPE_BUFFER_MEMORY_BARRIER;
outbuffDependency.srcAccessMask = VK_ACCESS_SHADER_WRITE_BIT;
outbuffDependency.dstAccessMask = VK_ACCESS_HOST_READ_BIT;
outbuffDependency.buffer = outputBuffer;
outbuffDependency.size = VK_WHOLE_SIZE;
vkCmdPipelineBarrier(
cmdBuffer,
VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, VK_PIPELINE_STAGE_HOST_BIT,
(VkDependencyFlags)0,
0, nullptr,
1, &outbuffDependency,
0, nullptr
);


Vulkan有一个独特的记忆域概念。有一个主机域,也有一个设备域。相同的内存在每个域中可以具有不同的状态。例如,内存写入在设备域中可见并不意味着它在主机域中也可视

围栏(或vk*WaitIdle(不包括规范中警告的内存域操作:

注意

发出围栏信号并在主机上等待并不保证内存访问的结果对主机可见,因为围栏定义的内存依赖关系的访问范围仅包括设备访问。必须使用内存屏障或其他内存依赖项来保证这一点。有关详细信息,请参阅主机访问类型的描述。

唯一包含域操作的是与VK_PIPELINE_STAGE_HOST_BITvkQueueSubmit的内存依赖关系(您确实使用inputBuffer将其从主机域传输到设备域(。

验证层无法合理地捕捉到这个错误,因为他们无法知道(如果没有一些侵入性的操作系统调试功能(您是否真的通过映射指针从缓冲区读取。

SO,它终于工作了:(
我在尝试soe时做了很多更改,以至于在某个时候我的输入缓冲区被绑定为统一缓冲区
现在它作为存储缓冲区返回,并且描述符集已正确创建和更新,我得到了预期的输出
内存屏障不是强制性的,但我想这是一个很好的做法,当我有一个更复杂的例子,有多个不同缓冲区用途的通道时
感谢大家的帮助,它真的帮助我弄清楚了所有可能使Vulkan实现成功或失败的小细节。

相关内容

  • 没有找到相关文章

最新更新