mirror of
				https://git.zaroz.cloud/nintendo-back-up/yuzu/yuzu-mainline.git
				synced 2025-03-21 01:53:15 +00:00 
			
		
		
		
	Merge pull request #3808 from ReinUsesLisp/wait-for-idle
{maxwell_3d,buffer_cache}: Implement memory barriers using 3D registers
			
			
This commit is contained in:
		
						commit
						2aff0b4733
					
				@ -88,10 +88,6 @@ public:
 | 
			
		||||
                map->MarkAsWritten(true);
 | 
			
		||||
                MarkRegionAsWritten(map->GetStart(), map->GetEnd() - 1);
 | 
			
		||||
            }
 | 
			
		||||
        } else {
 | 
			
		||||
            if (map->IsWritten()) {
 | 
			
		||||
                WriteBarrier();
 | 
			
		||||
            }
 | 
			
		||||
        }
 | 
			
		||||
 | 
			
		||||
        return {ToHandle(block), static_cast<u64>(block->GetOffset(cpu_addr))};
 | 
			
		||||
@ -253,8 +249,6 @@ protected:
 | 
			
		||||
 | 
			
		||||
    virtual BufferType ToHandle(const OwnerBuffer& storage) = 0;
 | 
			
		||||
 | 
			
		||||
    virtual void WriteBarrier() = 0;
 | 
			
		||||
 | 
			
		||||
    virtual OwnerBuffer CreateBlock(VAddr cpu_addr, std::size_t size) = 0;
 | 
			
		||||
 | 
			
		||||
    virtual void UploadBlockData(const OwnerBuffer& buffer, std::size_t offset, std::size_t size,
 | 
			
		||||
 | 
			
		||||
@ -184,6 +184,10 @@ void Maxwell3D::CallMethod(const GPU::MethodCall& method_call) {
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    switch (method) {
 | 
			
		||||
    case MAXWELL3D_REG_INDEX(wait_for_idle): {
 | 
			
		||||
        rasterizer.WaitForIdle();
 | 
			
		||||
        break;
 | 
			
		||||
    }
 | 
			
		||||
    case MAXWELL3D_REG_INDEX(shadow_ram_control): {
 | 
			
		||||
        shadow_state.shadow_ram_control = static_cast<Regs::ShadowRamControl>(method_call.argument);
 | 
			
		||||
        break;
 | 
			
		||||
 | 
			
		||||
@ -709,7 +709,9 @@ public:
 | 
			
		||||
 | 
			
		||||
        union {
 | 
			
		||||
            struct {
 | 
			
		||||
                INSERT_UNION_PADDING_WORDS(0x45);
 | 
			
		||||
                INSERT_UNION_PADDING_WORDS(0x44);
 | 
			
		||||
 | 
			
		||||
                u32 wait_for_idle;
 | 
			
		||||
 | 
			
		||||
                struct {
 | 
			
		||||
                    u32 upload_address;
 | 
			
		||||
@ -1536,6 +1538,7 @@ private:
 | 
			
		||||
    static_assert(offsetof(Maxwell3D::Regs, field_name) == position * 4,                           \
 | 
			
		||||
                  "Field " #field_name " has invalid position")
 | 
			
		||||
 | 
			
		||||
ASSERT_REG_POSITION(wait_for_idle, 0x44);
 | 
			
		||||
ASSERT_REG_POSITION(macros, 0x45);
 | 
			
		||||
ASSERT_REG_POSITION(shadow_ram_control, 0x49);
 | 
			
		||||
ASSERT_REG_POSITION(upload, 0x60);
 | 
			
		||||
 | 
			
		||||
@ -80,6 +80,9 @@ public:
 | 
			
		||||
    /// and invalidated
 | 
			
		||||
    virtual void FlushAndInvalidateRegion(VAddr addr, u64 size) = 0;
 | 
			
		||||
 | 
			
		||||
    /// Notify the host renderer to wait for previous primitive and compute operations.
 | 
			
		||||
    virtual void WaitForIdle() = 0;
 | 
			
		||||
 | 
			
		||||
    /// Notify the rasterizer to send all written commands to the host GPU.
 | 
			
		||||
    virtual void FlushCommands() = 0;
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
@ -51,10 +51,6 @@ Buffer OGLBufferCache::CreateBlock(VAddr cpu_addr, std::size_t size) {
 | 
			
		||||
    return std::make_shared<CachedBufferBlock>(cpu_addr, size);
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
void OGLBufferCache::WriteBarrier() {
 | 
			
		||||
    glMemoryBarrier(GL_SHADER_STORAGE_BARRIER_BIT);
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
GLuint OGLBufferCache::ToHandle(const Buffer& buffer) {
 | 
			
		||||
    return buffer->GetHandle();
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
@ -59,8 +59,6 @@ protected:
 | 
			
		||||
 | 
			
		||||
    GLuint ToHandle(const Buffer& buffer) override;
 | 
			
		||||
 | 
			
		||||
    void WriteBarrier() override;
 | 
			
		||||
 | 
			
		||||
    void UploadBlockData(const Buffer& buffer, std::size_t offset, std::size_t size,
 | 
			
		||||
                         const u8* data) override;
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
@ -746,6 +746,17 @@ void RasterizerOpenGL::FlushAndInvalidateRegion(VAddr addr, u64 size) {
 | 
			
		||||
    InvalidateRegion(addr, size);
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
void RasterizerOpenGL::WaitForIdle() {
 | 
			
		||||
    // Place a barrier on everything that is not framebuffer related.
 | 
			
		||||
    // This is related to another flag that is not currently implemented.
 | 
			
		||||
    glMemoryBarrier(GL_VERTEX_ATTRIB_ARRAY_BARRIER_BIT | GL_ELEMENT_ARRAY_BARRIER_BIT |
 | 
			
		||||
                    GL_UNIFORM_BARRIER_BIT | GL_TEXTURE_FETCH_BARRIER_BIT |
 | 
			
		||||
                    GL_SHADER_IMAGE_ACCESS_BARRIER_BIT | GL_COMMAND_BARRIER_BIT |
 | 
			
		||||
                    GL_PIXEL_BUFFER_BARRIER_BIT | GL_TEXTURE_UPDATE_BARRIER_BIT |
 | 
			
		||||
                    GL_BUFFER_UPDATE_BARRIER_BIT | GL_TRANSFORM_FEEDBACK_BARRIER_BIT |
 | 
			
		||||
                    GL_SHADER_STORAGE_BARRIER_BIT | GL_QUERY_BUFFER_BARRIER_BIT);
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
void RasterizerOpenGL::FlushCommands() {
 | 
			
		||||
    // Only flush when we have commands queued to OpenGL.
 | 
			
		||||
    if (num_queued_commands == 0) {
 | 
			
		||||
 | 
			
		||||
@ -75,6 +75,7 @@ public:
 | 
			
		||||
    void SignalSyncPoint(u32 value) override;
 | 
			
		||||
    void ReleaseFences() override;
 | 
			
		||||
    void FlushAndInvalidateRegion(VAddr addr, u64 size) override;
 | 
			
		||||
    void WaitForIdle() override;
 | 
			
		||||
    void FlushCommands() override;
 | 
			
		||||
    void TickFrame() override;
 | 
			
		||||
    bool AccelerateSurfaceCopy(const Tegra::Engines::Fermi2D::Regs::Surface& src,
 | 
			
		||||
 | 
			
		||||
@ -52,8 +52,6 @@ public:
 | 
			
		||||
protected:
 | 
			
		||||
    VkBuffer ToHandle(const Buffer& buffer) override;
 | 
			
		||||
 | 
			
		||||
    void WriteBarrier() override {}
 | 
			
		||||
 | 
			
		||||
    Buffer CreateBlock(VAddr cpu_addr, std::size_t size) override;
 | 
			
		||||
 | 
			
		||||
    void UploadBlockData(const Buffer& buffer, std::size_t offset, std::size_t size,
 | 
			
		||||
 | 
			
		||||
@ -299,7 +299,7 @@ RasterizerVulkan::RasterizerVulkan(Core::System& system, Core::Frontend::EmuWind
 | 
			
		||||
      buffer_cache(*this, system, device, memory_manager, scheduler, staging_pool),
 | 
			
		||||
      sampler_cache(device),
 | 
			
		||||
      fence_manager(system, *this, device, scheduler, texture_cache, buffer_cache, query_cache),
 | 
			
		||||
      query_cache(system, *this, device, scheduler) {
 | 
			
		||||
      query_cache(system, *this, device, scheduler), wfi_event{device.GetLogical().CreateEvent()} {
 | 
			
		||||
    scheduler.SetQueryCache(query_cache);
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
@ -573,6 +573,26 @@ void RasterizerVulkan::FlushAndInvalidateRegion(VAddr addr, u64 size) {
 | 
			
		||||
    InvalidateRegion(addr, size);
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
void RasterizerVulkan::WaitForIdle() {
 | 
			
		||||
    // Everything but wait pixel operations. This intentionally includes FRAGMENT_SHADER_BIT because
 | 
			
		||||
    // fragment shaders can still write storage buffers.
 | 
			
		||||
    VkPipelineStageFlags flags =
 | 
			
		||||
        VK_PIPELINE_STAGE_DRAW_INDIRECT_BIT | VK_PIPELINE_STAGE_VERTEX_INPUT_BIT |
 | 
			
		||||
        VK_PIPELINE_STAGE_VERTEX_SHADER_BIT | VK_PIPELINE_STAGE_TESSELLATION_CONTROL_SHADER_BIT |
 | 
			
		||||
        VK_PIPELINE_STAGE_TESSELLATION_EVALUATION_SHADER_BIT |
 | 
			
		||||
        VK_PIPELINE_STAGE_GEOMETRY_SHADER_BIT | VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT |
 | 
			
		||||
        VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT | VK_PIPELINE_STAGE_TRANSFER_BIT;
 | 
			
		||||
    if (device.IsExtTransformFeedbackSupported()) {
 | 
			
		||||
        flags |= VK_PIPELINE_STAGE_TRANSFORM_FEEDBACK_BIT_EXT;
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    scheduler.RequestOutsideRenderPassOperationContext();
 | 
			
		||||
    scheduler.Record([event = *wfi_event, flags](vk::CommandBuffer cmdbuf) {
 | 
			
		||||
        cmdbuf.SetEvent(event, flags);
 | 
			
		||||
        cmdbuf.WaitEvents(event, flags, VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT, {}, {}, {});
 | 
			
		||||
    });
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
void RasterizerVulkan::FlushCommands() {
 | 
			
		||||
    if (draw_counter > 0) {
 | 
			
		||||
        draw_counter = 0;
 | 
			
		||||
 | 
			
		||||
@ -126,6 +126,7 @@ public:
 | 
			
		||||
    void SignalSyncPoint(u32 value) override;
 | 
			
		||||
    void ReleaseFences() override;
 | 
			
		||||
    void FlushAndInvalidateRegion(VAddr addr, u64 size) override;
 | 
			
		||||
    void WaitForIdle() override;
 | 
			
		||||
    void FlushCommands() override;
 | 
			
		||||
    void TickFrame() override;
 | 
			
		||||
    bool AccelerateSurfaceCopy(const Tegra::Engines::Fermi2D::Regs::Surface& src,
 | 
			
		||||
@ -275,6 +276,7 @@ private:
 | 
			
		||||
 | 
			
		||||
    vk::Buffer default_buffer;
 | 
			
		||||
    VKMemoryCommit default_buffer_commit;
 | 
			
		||||
    vk::Event wfi_event;
 | 
			
		||||
 | 
			
		||||
    std::array<View, Maxwell::NumRenderTargets> color_attachments;
 | 
			
		||||
    View zeta_attachment;
 | 
			
		||||
 | 
			
		||||
@ -87,6 +87,7 @@ void Load(VkDevice device, DeviceDispatch& dld) noexcept {
 | 
			
		||||
    X(vkCmdSetStencilReference);
 | 
			
		||||
    X(vkCmdSetStencilWriteMask);
 | 
			
		||||
    X(vkCmdSetViewport);
 | 
			
		||||
    X(vkCmdWaitEvents);
 | 
			
		||||
    X(vkCreateBuffer);
 | 
			
		||||
    X(vkCreateBufferView);
 | 
			
		||||
    X(vkCreateCommandPool);
 | 
			
		||||
 | 
			
		||||
@ -205,6 +205,7 @@ struct DeviceDispatch : public InstanceDispatch {
 | 
			
		||||
    PFN_vkCmdSetStencilReference vkCmdSetStencilReference;
 | 
			
		||||
    PFN_vkCmdSetStencilWriteMask vkCmdSetStencilWriteMask;
 | 
			
		||||
    PFN_vkCmdSetViewport vkCmdSetViewport;
 | 
			
		||||
    PFN_vkCmdWaitEvents vkCmdWaitEvents;
 | 
			
		||||
    PFN_vkCreateBuffer vkCreateBuffer;
 | 
			
		||||
    PFN_vkCreateBufferView vkCreateBufferView;
 | 
			
		||||
    PFN_vkCreateCommandPool vkCreateCommandPool;
 | 
			
		||||
@ -958,6 +959,15 @@ public:
 | 
			
		||||
        dld->vkCmdSetEvent(handle, event, stage_flags);
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    void WaitEvents(Span<VkEvent> events, VkPipelineStageFlags src_stage_mask,
 | 
			
		||||
                    VkPipelineStageFlags dst_stage_mask, Span<VkMemoryBarrier> memory_barriers,
 | 
			
		||||
                    Span<VkBufferMemoryBarrier> buffer_barriers,
 | 
			
		||||
                    Span<VkImageMemoryBarrier> image_barriers) const noexcept {
 | 
			
		||||
        dld->vkCmdWaitEvents(handle, events.size(), events.data(), src_stage_mask, dst_stage_mask,
 | 
			
		||||
                             memory_barriers.size(), memory_barriers.data(), buffer_barriers.size(),
 | 
			
		||||
                             buffer_barriers.data(), image_barriers.size(), image_barriers.data());
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    void BindTransformFeedbackBuffersEXT(u32 first, u32 count, const VkBuffer* buffers,
 | 
			
		||||
                                         const VkDeviceSize* offsets,
 | 
			
		||||
                                         const VkDeviceSize* sizes) const noexcept {
 | 
			
		||||
 | 
			
		||||
		Loading…
	
		Reference in New Issue
	
	Block a user