mirror of
				https://git.zaroz.cloud/nintendo-back-up/yuzu/yuzu-mainline.git
				synced 2025-03-21 01:53:15 +00:00 
			
		
		
		
	Merge pull request #2705 from FernandoS27/tex-cache-fixes
GPU: Fixes to Texture Cache and Include Microprofiles for GL State/BufferCopy/Macro Interpreter
This commit is contained in:
		
						commit
						b77a1ed67a
					
				@ -4,14 +4,18 @@
 | 
				
			|||||||
 | 
					
 | 
				
			||||||
#include "common/assert.h"
 | 
					#include "common/assert.h"
 | 
				
			||||||
#include "common/logging/log.h"
 | 
					#include "common/logging/log.h"
 | 
				
			||||||
 | 
					#include "common/microprofile.h"
 | 
				
			||||||
#include "video_core/engines/maxwell_3d.h"
 | 
					#include "video_core/engines/maxwell_3d.h"
 | 
				
			||||||
#include "video_core/macro_interpreter.h"
 | 
					#include "video_core/macro_interpreter.h"
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					MICROPROFILE_DEFINE(MacroInterp, "GPU", "Execute macro interpreter", MP_RGB(128, 128, 192));
 | 
				
			||||||
 | 
					
 | 
				
			||||||
namespace Tegra {
 | 
					namespace Tegra {
 | 
				
			||||||
 | 
					
 | 
				
			||||||
MacroInterpreter::MacroInterpreter(Engines::Maxwell3D& maxwell3d) : maxwell3d(maxwell3d) {}
 | 
					MacroInterpreter::MacroInterpreter(Engines::Maxwell3D& maxwell3d) : maxwell3d(maxwell3d) {}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
void MacroInterpreter::Execute(u32 offset, std::vector<u32> parameters) {
 | 
					void MacroInterpreter::Execute(u32 offset, std::vector<u32> parameters) {
 | 
				
			||||||
 | 
					    MICROPROFILE_SCOPE(MacroInterp);
 | 
				
			||||||
    Reset();
 | 
					    Reset();
 | 
				
			||||||
    registers[1] = parameters[0];
 | 
					    registers[1] = parameters[0];
 | 
				
			||||||
    this->parameters = std::move(parameters);
 | 
					    this->parameters = std::move(parameters);
 | 
				
			||||||
 | 
				
			|||||||
@ -6,8 +6,11 @@
 | 
				
			|||||||
#include <glad/glad.h>
 | 
					#include <glad/glad.h>
 | 
				
			||||||
#include "common/assert.h"
 | 
					#include "common/assert.h"
 | 
				
			||||||
#include "common/logging/log.h"
 | 
					#include "common/logging/log.h"
 | 
				
			||||||
 | 
					#include "common/microprofile.h"
 | 
				
			||||||
#include "video_core/renderer_opengl/gl_state.h"
 | 
					#include "video_core/renderer_opengl/gl_state.h"
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					MICROPROFILE_DEFINE(OpenGL_State, "OpenGL", "State Change", MP_RGB(192, 128, 128));
 | 
				
			||||||
 | 
					
 | 
				
			||||||
namespace OpenGL {
 | 
					namespace OpenGL {
 | 
				
			||||||
 | 
					
 | 
				
			||||||
using Maxwell = Tegra::Engines::Maxwell3D::Regs;
 | 
					using Maxwell = Tegra::Engines::Maxwell3D::Regs;
 | 
				
			||||||
@ -524,6 +527,7 @@ void OpenGLState::ApplySamplers() const {
 | 
				
			|||||||
}
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
void OpenGLState::Apply() const {
 | 
					void OpenGLState::Apply() const {
 | 
				
			||||||
 | 
					    MICROPROFILE_SCOPE(OpenGL_State);
 | 
				
			||||||
    ApplyFramebufferState();
 | 
					    ApplyFramebufferState();
 | 
				
			||||||
    ApplyVertexArrayState();
 | 
					    ApplyVertexArrayState();
 | 
				
			||||||
    ApplyShaderProgram();
 | 
					    ApplyShaderProgram();
 | 
				
			||||||
 | 
				
			|||||||
@ -31,6 +31,8 @@ using VideoCore::Surface::SurfaceType;
 | 
				
			|||||||
 | 
					
 | 
				
			||||||
MICROPROFILE_DEFINE(OpenGL_Texture_Upload, "OpenGL", "Texture Upload", MP_RGB(128, 192, 128));
 | 
					MICROPROFILE_DEFINE(OpenGL_Texture_Upload, "OpenGL", "Texture Upload", MP_RGB(128, 192, 128));
 | 
				
			||||||
MICROPROFILE_DEFINE(OpenGL_Texture_Download, "OpenGL", "Texture Download", MP_RGB(128, 192, 128));
 | 
					MICROPROFILE_DEFINE(OpenGL_Texture_Download, "OpenGL", "Texture Download", MP_RGB(128, 192, 128));
 | 
				
			||||||
 | 
					MICROPROFILE_DEFINE(OpenGL_Texture_Buffer_Copy, "OpenGL", "Texture Buffer Copy",
 | 
				
			||||||
 | 
					                    MP_RGB(128, 192, 128));
 | 
				
			||||||
 | 
					
 | 
				
			||||||
namespace {
 | 
					namespace {
 | 
				
			||||||
 | 
					
 | 
				
			||||||
@ -535,6 +537,7 @@ void TextureCacheOpenGL::ImageBlit(View& src_view, View& dst_view,
 | 
				
			|||||||
}
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
void TextureCacheOpenGL::BufferCopy(Surface& src_surface, Surface& dst_surface) {
 | 
					void TextureCacheOpenGL::BufferCopy(Surface& src_surface, Surface& dst_surface) {
 | 
				
			||||||
 | 
					    MICROPROFILE_SCOPE(OpenGL_Texture_Buffer_Copy);
 | 
				
			||||||
    const auto& src_params = src_surface->GetSurfaceParams();
 | 
					    const auto& src_params = src_surface->GetSurfaceParams();
 | 
				
			||||||
    const auto& dst_params = dst_surface->GetSurfaceParams();
 | 
					    const auto& dst_params = dst_surface->GetSurfaceParams();
 | 
				
			||||||
    UNIMPLEMENTED_IF(src_params.num_levels > 1 || dst_params.num_levels > 1);
 | 
					    UNIMPLEMENTED_IF(src_params.num_levels > 1 || dst_params.num_levels > 1);
 | 
				
			||||||
 | 
				
			|||||||
@ -75,9 +75,12 @@ MatchStructureResult SurfaceBaseImpl::MatchesStructure(const SurfaceParams& rhs)
 | 
				
			|||||||
 | 
					
 | 
				
			||||||
    // Linear Surface check
 | 
					    // Linear Surface check
 | 
				
			||||||
    if (!params.is_tiled) {
 | 
					    if (!params.is_tiled) {
 | 
				
			||||||
        if (std::tie(params.width, params.height, params.pitch) ==
 | 
					        if (std::tie(params.height, params.pitch) == std::tie(rhs.height, rhs.pitch)) {
 | 
				
			||||||
            std::tie(rhs.width, rhs.height, rhs.pitch)) {
 | 
					            if (params.width == rhs.width) {
 | 
				
			||||||
            return MatchStructureResult::FullMatch;
 | 
					                return MatchStructureResult::FullMatch;
 | 
				
			||||||
 | 
					            } else {
 | 
				
			||||||
 | 
					                return MatchStructureResult::SemiMatch;
 | 
				
			||||||
 | 
					            }
 | 
				
			||||||
        }
 | 
					        }
 | 
				
			||||||
        return MatchStructureResult::None;
 | 
					        return MatchStructureResult::None;
 | 
				
			||||||
    }
 | 
					    }
 | 
				
			||||||
 | 
				
			|||||||
@ -200,8 +200,9 @@ public:
 | 
				
			|||||||
        modification_tick = tick;
 | 
					        modification_tick = tick;
 | 
				
			||||||
    }
 | 
					    }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    void MarkAsRenderTarget(const bool is_target) {
 | 
					    void MarkAsRenderTarget(const bool is_target, const u32 index) {
 | 
				
			||||||
        this->is_target = is_target;
 | 
					        this->is_target = is_target;
 | 
				
			||||||
 | 
					        this->index = index;
 | 
				
			||||||
    }
 | 
					    }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    void MarkAsPicked(const bool is_picked) {
 | 
					    void MarkAsPicked(const bool is_picked) {
 | 
				
			||||||
@ -221,6 +222,10 @@ public:
 | 
				
			|||||||
        return is_target;
 | 
					        return is_target;
 | 
				
			||||||
    }
 | 
					    }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    u32 GetRenderTarget() const {
 | 
				
			||||||
 | 
					        return index;
 | 
				
			||||||
 | 
					    }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    bool IsRegistered() const {
 | 
					    bool IsRegistered() const {
 | 
				
			||||||
        return is_registered;
 | 
					        return is_registered;
 | 
				
			||||||
    }
 | 
					    }
 | 
				
			||||||
@ -307,10 +312,13 @@ private:
 | 
				
			|||||||
        return view;
 | 
					        return view;
 | 
				
			||||||
    }
 | 
					    }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    static constexpr u32 NO_RT = 0xFFFFFFFF;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    bool is_modified{};
 | 
					    bool is_modified{};
 | 
				
			||||||
    bool is_target{};
 | 
					    bool is_target{};
 | 
				
			||||||
    bool is_registered{};
 | 
					    bool is_registered{};
 | 
				
			||||||
    bool is_picked{};
 | 
					    bool is_picked{};
 | 
				
			||||||
 | 
					    u32 index{NO_RT};
 | 
				
			||||||
    u64 modification_tick{};
 | 
					    u64 modification_tick{};
 | 
				
			||||||
};
 | 
					};
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
				
			|||||||
@ -290,12 +290,19 @@ std::size_t SurfaceParams::GetLayerSize(bool as_host_size, bool uncompressed) co
 | 
				
			|||||||
 | 
					
 | 
				
			||||||
std::size_t SurfaceParams::GetInnerMipmapMemorySize(u32 level, bool as_host_size,
 | 
					std::size_t SurfaceParams::GetInnerMipmapMemorySize(u32 level, bool as_host_size,
 | 
				
			||||||
                                                    bool uncompressed) const {
 | 
					                                                    bool uncompressed) const {
 | 
				
			||||||
    const bool tiled{as_host_size ? false : is_tiled};
 | 
					 | 
				
			||||||
    const u32 width{GetMipmapSize(uncompressed, GetMipWidth(level), GetDefaultBlockWidth())};
 | 
					    const u32 width{GetMipmapSize(uncompressed, GetMipWidth(level), GetDefaultBlockWidth())};
 | 
				
			||||||
    const u32 height{GetMipmapSize(uncompressed, GetMipHeight(level), GetDefaultBlockHeight())};
 | 
					    const u32 height{GetMipmapSize(uncompressed, GetMipHeight(level), GetDefaultBlockHeight())};
 | 
				
			||||||
    const u32 depth{is_layered ? 1U : GetMipDepth(level)};
 | 
					    const u32 depth{is_layered ? 1U : GetMipDepth(level)};
 | 
				
			||||||
    return Tegra::Texture::CalculateSize(tiled, GetBytesPerPixel(), width, height, depth,
 | 
					    if (is_tiled) {
 | 
				
			||||||
                                         GetMipBlockHeight(level), GetMipBlockDepth(level));
 | 
					        return Tegra::Texture::CalculateSize(!as_host_size, GetBytesPerPixel(), width, height,
 | 
				
			||||||
 | 
					                                             depth, GetMipBlockHeight(level),
 | 
				
			||||||
 | 
					                                             GetMipBlockDepth(level));
 | 
				
			||||||
 | 
					    } else if (as_host_size || IsBuffer()) {
 | 
				
			||||||
 | 
					        return GetBytesPerPixel() * width * height * depth;
 | 
				
			||||||
 | 
					    } else {
 | 
				
			||||||
 | 
					        // Linear Texture Case
 | 
				
			||||||
 | 
					        return pitch * height * depth;
 | 
				
			||||||
 | 
					    }
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
bool SurfaceParams::operator==(const SurfaceParams& rhs) const {
 | 
					bool SurfaceParams::operator==(const SurfaceParams& rhs) const {
 | 
				
			||||||
 | 
				
			|||||||
@ -133,11 +133,11 @@ public:
 | 
				
			|||||||
            regs.zeta.memory_layout.block_depth, regs.zeta.memory_layout.type)};
 | 
					            regs.zeta.memory_layout.block_depth, regs.zeta.memory_layout.type)};
 | 
				
			||||||
        auto surface_view = GetSurface(gpu_addr, depth_params, preserve_contents, true);
 | 
					        auto surface_view = GetSurface(gpu_addr, depth_params, preserve_contents, true);
 | 
				
			||||||
        if (depth_buffer.target)
 | 
					        if (depth_buffer.target)
 | 
				
			||||||
            depth_buffer.target->MarkAsRenderTarget(false);
 | 
					            depth_buffer.target->MarkAsRenderTarget(false, NO_RT);
 | 
				
			||||||
        depth_buffer.target = surface_view.first;
 | 
					        depth_buffer.target = surface_view.first;
 | 
				
			||||||
        depth_buffer.view = surface_view.second;
 | 
					        depth_buffer.view = surface_view.second;
 | 
				
			||||||
        if (depth_buffer.target)
 | 
					        if (depth_buffer.target)
 | 
				
			||||||
            depth_buffer.target->MarkAsRenderTarget(true);
 | 
					            depth_buffer.target->MarkAsRenderTarget(true, DEPTH_RT);
 | 
				
			||||||
        return surface_view.second;
 | 
					        return surface_view.second;
 | 
				
			||||||
    }
 | 
					    }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
@ -167,11 +167,11 @@ public:
 | 
				
			|||||||
        auto surface_view = GetSurface(gpu_addr, SurfaceParams::CreateForFramebuffer(system, index),
 | 
					        auto surface_view = GetSurface(gpu_addr, SurfaceParams::CreateForFramebuffer(system, index),
 | 
				
			||||||
                                       preserve_contents, true);
 | 
					                                       preserve_contents, true);
 | 
				
			||||||
        if (render_targets[index].target)
 | 
					        if (render_targets[index].target)
 | 
				
			||||||
            render_targets[index].target->MarkAsRenderTarget(false);
 | 
					            render_targets[index].target->MarkAsRenderTarget(false, NO_RT);
 | 
				
			||||||
        render_targets[index].target = surface_view.first;
 | 
					        render_targets[index].target = surface_view.first;
 | 
				
			||||||
        render_targets[index].view = surface_view.second;
 | 
					        render_targets[index].view = surface_view.second;
 | 
				
			||||||
        if (render_targets[index].target)
 | 
					        if (render_targets[index].target)
 | 
				
			||||||
            render_targets[index].target->MarkAsRenderTarget(true);
 | 
					            render_targets[index].target->MarkAsRenderTarget(true, static_cast<u32>(index));
 | 
				
			||||||
        return surface_view.second;
 | 
					        return surface_view.second;
 | 
				
			||||||
    }
 | 
					    }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
@ -191,7 +191,7 @@ public:
 | 
				
			|||||||
        if (depth_buffer.target == nullptr) {
 | 
					        if (depth_buffer.target == nullptr) {
 | 
				
			||||||
            return;
 | 
					            return;
 | 
				
			||||||
        }
 | 
					        }
 | 
				
			||||||
        depth_buffer.target->MarkAsRenderTarget(false);
 | 
					        depth_buffer.target->MarkAsRenderTarget(false, NO_RT);
 | 
				
			||||||
        depth_buffer.target = nullptr;
 | 
					        depth_buffer.target = nullptr;
 | 
				
			||||||
        depth_buffer.view = nullptr;
 | 
					        depth_buffer.view = nullptr;
 | 
				
			||||||
    }
 | 
					    }
 | 
				
			||||||
@ -200,7 +200,7 @@ public:
 | 
				
			|||||||
        if (render_targets[index].target == nullptr) {
 | 
					        if (render_targets[index].target == nullptr) {
 | 
				
			||||||
            return;
 | 
					            return;
 | 
				
			||||||
        }
 | 
					        }
 | 
				
			||||||
        render_targets[index].target->MarkAsRenderTarget(false);
 | 
					        render_targets[index].target->MarkAsRenderTarget(false, NO_RT);
 | 
				
			||||||
        render_targets[index].target = nullptr;
 | 
					        render_targets[index].target = nullptr;
 | 
				
			||||||
        render_targets[index].view = nullptr;
 | 
					        render_targets[index].view = nullptr;
 | 
				
			||||||
    }
 | 
					    }
 | 
				
			||||||
@ -270,6 +270,16 @@ protected:
 | 
				
			|||||||
    // and reading it from a sepparate buffer.
 | 
					    // and reading it from a sepparate buffer.
 | 
				
			||||||
    virtual void BufferCopy(TSurface& src_surface, TSurface& dst_surface) = 0;
 | 
					    virtual void BufferCopy(TSurface& src_surface, TSurface& dst_surface) = 0;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    void ManageRenderTargetUnregister(TSurface& surface) {
 | 
				
			||||||
 | 
					        auto& maxwell3d = system.GPU().Maxwell3D();
 | 
				
			||||||
 | 
					        const u32 index = surface->GetRenderTarget();
 | 
				
			||||||
 | 
					        if (index == DEPTH_RT) {
 | 
				
			||||||
 | 
					            maxwell3d.dirty_flags.zeta_buffer = true;
 | 
				
			||||||
 | 
					        } else {
 | 
				
			||||||
 | 
					            maxwell3d.dirty_flags.color_buffer.set(index, true);
 | 
				
			||||||
 | 
					        }
 | 
				
			||||||
 | 
					    }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    void Register(TSurface surface) {
 | 
					    void Register(TSurface surface) {
 | 
				
			||||||
        const GPUVAddr gpu_addr = surface->GetGpuAddr();
 | 
					        const GPUVAddr gpu_addr = surface->GetGpuAddr();
 | 
				
			||||||
        const CacheAddr cache_ptr = ToCacheAddr(system.GPU().MemoryManager().GetPointer(gpu_addr));
 | 
					        const CacheAddr cache_ptr = ToCacheAddr(system.GPU().MemoryManager().GetPointer(gpu_addr));
 | 
				
			||||||
@ -294,6 +304,9 @@ protected:
 | 
				
			|||||||
        if (guard_render_targets && surface->IsProtected()) {
 | 
					        if (guard_render_targets && surface->IsProtected()) {
 | 
				
			||||||
            return;
 | 
					            return;
 | 
				
			||||||
        }
 | 
					        }
 | 
				
			||||||
 | 
					        if (!guard_render_targets && surface->IsRenderTarget()) {
 | 
				
			||||||
 | 
					            ManageRenderTargetUnregister(surface);
 | 
				
			||||||
 | 
					        }
 | 
				
			||||||
        const GPUVAddr gpu_addr = surface->GetGpuAddr();
 | 
					        const GPUVAddr gpu_addr = surface->GetGpuAddr();
 | 
				
			||||||
        const CacheAddr cache_ptr = surface->GetCacheAddr();
 | 
					        const CacheAddr cache_ptr = surface->GetCacheAddr();
 | 
				
			||||||
        const std::size_t size = surface->GetSizeInBytes();
 | 
					        const std::size_t size = surface->GetSizeInBytes();
 | 
				
			||||||
@ -649,15 +662,6 @@ private:
 | 
				
			|||||||
                }
 | 
					                }
 | 
				
			||||||
                return {current_surface, *view};
 | 
					                return {current_surface, *view};
 | 
				
			||||||
            }
 | 
					            }
 | 
				
			||||||
            // The next case is unsafe, so if we r in accurate GPU, just skip it
 | 
					 | 
				
			||||||
            if (Settings::values.use_accurate_gpu_emulation) {
 | 
					 | 
				
			||||||
                return RecycleSurface(overlaps, params, gpu_addr, preserve_contents,
 | 
					 | 
				
			||||||
                                      MatchTopologyResult::FullMatch);
 | 
					 | 
				
			||||||
            }
 | 
					 | 
				
			||||||
            // This is the case the texture is a part of the parent.
 | 
					 | 
				
			||||||
            if (current_surface->MatchesSubTexture(params, gpu_addr)) {
 | 
					 | 
				
			||||||
                return RebuildSurface(current_surface, params, is_render);
 | 
					 | 
				
			||||||
            }
 | 
					 | 
				
			||||||
        } else {
 | 
					        } else {
 | 
				
			||||||
            // If there are many overlaps, odds are they are subtextures of the candidate
 | 
					            // If there are many overlaps, odds are they are subtextures of the candidate
 | 
				
			||||||
            // surface. We try to construct a new surface based on the candidate parameters,
 | 
					            // surface. We try to construct a new surface based on the candidate parameters,
 | 
				
			||||||
@ -793,6 +797,9 @@ private:
 | 
				
			|||||||
    static constexpr u64 registry_page_size{1 << registry_page_bits};
 | 
					    static constexpr u64 registry_page_size{1 << registry_page_bits};
 | 
				
			||||||
    std::unordered_map<CacheAddr, std::vector<TSurface>> registry;
 | 
					    std::unordered_map<CacheAddr, std::vector<TSurface>> registry;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    static constexpr u32 DEPTH_RT = 8;
 | 
				
			||||||
 | 
					    static constexpr u32 NO_RT = 0xFFFFFFFF;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    // The L1 Cache is used for fast texture lookup before checking the overlaps
 | 
					    // The L1 Cache is used for fast texture lookup before checking the overlaps
 | 
				
			||||||
    // This avoids calculating size and other stuffs.
 | 
					    // This avoids calculating size and other stuffs.
 | 
				
			||||||
    std::unordered_map<CacheAddr, TSurface> l1_cache;
 | 
					    std::unordered_map<CacheAddr, TSurface> l1_cache;
 | 
				
			||||||
 | 
				
			|||||||
		Loading…
	
		Reference in New Issue
	
	Block a user